├── .gitignore
├── Figures
    ├── Ext_Data_Fig_1
    │   ├── bar_layer_plots_simple.R
    │   └── cre_per_cluster_bar_plots.R
    ├── Ext_Data_Fig_10
    │   ├── 2018-05-25_markers.txt
    │   ├── cluster_violin_plot.R
    │   └── layer_and_violin_functions.R
    ├── Ext_Data_Fig_11
    │   └── v1_alm_cross_mapping_cor.R
    ├── Ext_Data_Fig_12
    │   ├── 20171030_inj_qc.tsv
    │   ├── alm_retrograde_inj_dotplots_actual_targets_dendrogram.R
    │   ├── prune_leaf_custom.R
    │   └── visp_retrograde_inj_dotplots_actual_targets_dendrogram.R
    ├── Ext_Data_Fig_15
    │   ├── huang_comparison_figure.R
    │   ├── huang_dendrogram.R
    │   ├── linnarsson_comparison.R
    │   ├── linnarsson_stats.R
    │   ├── prune_leaf_custom.R
    │   ├── sankey_functions.R
    │   ├── tolias_comparison_figure.R
    │   └── tolias_dendrogram.R
    ├── Ext_Data_Fig_16
    │   ├── cluster_bar_plot.R
    │   ├── cluster_violin_plot.R
    │   ├── coexpression_bar_plots.R
    │   ├── coexpression_functions.R
    │   ├── layer_and_violin_functions.R
    │   ├── marker_violins.R
    │   ├── river_plots.R
    │   └── sankey_functions.R
    ├── Ext_Data_Fig_18
    │   ├── cluster_violin_plot.R
    │   ├── coexpression_functions.R
    │   ├── color_functions.R
    │   ├── layer_and_violin_functions.R
    │   ├── prune_leaf_custom.R
    │   ├── tsne_plots.R
    │   └── unified_layer_distributions_and_markers.R
    ├── Ext_Data_Fig_2
    │   └── violin_plots.R
    ├── Ext_Data_Fig_20
    │   └── dend_plots.R
    ├── Ext_Data_Fig_3
    │   ├── all_cell_tsne_plot.R
    │   ├── coclustering_heatmaps_sparse_matrix.R
    │   ├── pairwise_cluster_heatmaps.R
    │   └── qc_jitter_plots.R
    ├── Ext_Data_Fig_4
    │   ├── intermediate_fraction_plot.R
    │   └── qc_jitter_plots.R
    ├── Ext_Data_Fig_5
    │   └── all_markers_heatmap.R
    ├── Ext_Data_Fig_6
    │   ├── cluster_marker_heatmaps.R
    │   └── markers.csv
    ├── Ext_Data_Fig_7
    │   ├── comparison_figure.R
    │   └── sankey_functions.R
    ├── Ext_Data_Fig_8
    │   └── saturation_plot.R
    ├── Ext_Data_Fig_9
    │   └── build_dotplot.R
    ├── Figure_1
    │   └── dendrogram_panel.R
    ├── Figure_2
    │   ├── class_comparison_colors_simplified.csv
    │   ├── color_functions.R
    │   ├── degenes_alm_visp_barplot.R
    │   ├── pairwise_DEGene_panels.R
    │   ├── region_marker_genes_panel.R
    │   └── tsne_panels.R
    ├── Figure_3
    │   ├── alm_retrograde_inj_dotplot_categories.R
    │   ├── inj_qc.tsv
    │   ├── prune_leaf_custom.R
    │   └── visp_retrograde_inj_dotplot_categories.R
    ├── Figure_4
    │   ├── color_functions.R
    │   ├── layer_and_violin_functions.R
    │   ├── prune_leaf_custom.R
    │   └── unified_layer_distributions_and_markers.R
    ├── Figure_5
    │   ├── color_functions.R
    │   ├── layer_and_violin_functions.R
    │   ├── prune_leaf_custom.R
    │   └── unified_layer_distributions_and_markers.R
    └── Figure_6
    │   ├── L4 IT VISp Rspo1 L5 IT VISp Batf3.rda
    │   ├── L4 IT VISp Rspo1 L5 IT VISp Hsd11b1 Endou.rda
    │   ├── L4.2016.dat.rda
    │   ├── L4.dat.rda
    │   ├── L4.df.rda
    │   ├── L4.sampled.df.rda
    │   ├── L4_comparison_plots.R
    │   ├── comparison_figure.R
    │   ├── custom_annotate_cat.R
    │   ├── gradient.df.list.rda
    │   ├── l4_gradient_heatmap.R
    │   ├── pairwise_median_lfc.R
    │   ├── plotting_functions.R
    │   ├── sankey_functions.R
    │   ├── sst150_coords.tsv
    │   ├── sst300_cl_anno.csv
    │   ├── sst300_coords.tsv
    │   ├── sst80_cl_anno.csv
    │   ├── sst80_coords.tsv
    │   ├── sst_constellation_positions.xlsx
    │   ├── sst_constellations.R
    │   └── sst_river_plot.R
├── LICENSE
├── README.md
├── RNA-seq Analysis
    ├── ALM.ex.analysis.R
    ├── DEG.R
    ├── V1_ALM_compare.R
    ├── VISp.ex.analysis.R
    ├── all.cell.markers.heatmap.R
    ├── annotate.cluster.R
    ├── annotate.markers.R
    ├── cl.transition.R
    ├── cluster.R
    ├── cluster_hetero.R
    ├── co_cluster.R
    ├── compare_1679.R
    ├── compare_Hrvatin.R
    ├── compare_JoshWang.R
    ├── compare_Linnarsson.R
    ├── compare_Tolias.R
    ├── consensus.clust.R
    ├── copy_bam.R
    ├── fast_tsne.R
    ├── gradient.R
    ├── init.R
    ├── intron.exon.ratio.R
    ├── markers.R
    ├── retrograde.R
    ├── saturation_test.R
    ├── subsample.all.R
    ├── subsample.reads.R
    └── tune_parameter.R
└── tasic2018analysis.Rproj


/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | 


--------------------------------------------------------------------------------
/Figures/Ext_Data_Fig_10/2018-05-25_markers.txt:
--------------------------------------------------------------------------------
 1 | Aqp4
 2 | Sox10
 3 | Pdgfra
 4 | Grm5
 5 | Ccnb1
 6 | Rassf10
 7 | Enpp6
 8 | Opalin
 9 | Serpinb1a
10 | Synpr
11 | Cyba
12 | S100a11
13 | Col1a1
14 | Lum
15 | Dcn
16 | Aox3
17 | Bgn
18 | Osr1
19 | Gjb2
20 | Ccl19
21 | Cd74
22 | Slc47a1
23 | Lypd2
24 | Hs3st6
25 | Ctxn3
26 | Nnat
27 | Mc5r
28 | Spp1
29 | Col15a1
30 | Pdgfrb
31 | Cspg4
32 | Kcnj8
33 | Abcc9
34 | Atp13a5
35 | Art3
36 | Pla1a
37 | Ace2
38 | Acta2
39 | Angpt1
40 | Des
41 | Aoc3
42 | Slc38a5
43 | Pdgfb
44 | Nos3
45 | Eltd1
46 | Pecam1
47 | Ctla2a
48 | Emcn
49 | Sema3c
50 | Tek
51 | Cytl1
52 | Bmx
53 | Sema3f
54 | Gja5
55 | Fcgr3
56 | Cd14
57 | Mrc1
58 | Cd163
59 | Cd4
60 | Lyz2
61 | Siglech
62 | Ctss
63 | Cx3cr1
64 | P2ry12
65 | Tmem119
66 | 


--------------------------------------------------------------------------------
/Figures/Ext_Data_Fig_10/cluster_violin_plot.R:
--------------------------------------------------------------------------------
 1 | library(dplyr)
 2 | library(ggplot2)
 3 | library(feather)
 4 | library(scrattch.vis)
 5 | library(scrattch.io)
 6 | options(stringsAsFactors = F)
 7 | source("layer_and_violin_functions.R")
 8 | 
 9 | #genes <- unique(unlist(read.csv("markers.csv", header=F)))
10 | genes <- unique(unlist(read.table("2018-05-25_markers.txt", header=F)))
11 | 
12 | fdir <- "//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/"
13 | 
14 | p <- group_violin_plot2(data_source = fdir,
15 |                         genes = genes,
16 |                         group_by = "dendcluster",
17 |                         clusters = 118:133,
18 |                         logscale = TRUE,
19 |                         labelheight = 10,
20 |                         max_width = 10,
21 |                         fontsize = 6,
22 |                         showcounts = FALSE)
23 | 
24 | p
25 | 
26 | ggsave("marker_violins.pdf", p, width = 4.1, height = 8.2, useDingbats = F)
27 | 
28 | 


--------------------------------------------------------------------------------
/Figures/Ext_Data_Fig_11/v1_alm_cross_mapping_cor.R:
--------------------------------------------------------------------------------
  1 | library(dendextend)
  2 | library(dplyr)
  3 | library(feather)
  4 | library(ggplot2)
  5 | options(stringsAsFactors = F)
  6 | 
  7 | values_to_colors <- function(x, minval = NULL, maxval = NULL, colorset = c("darkblue","dodgerblue","gray80","orangered","red")) {
  8 |   
  9 |   heat_colors <- colorRampPalette(colorset)(1001)
 10 |   
 11 |   if(is.null(maxval)) {
 12 |     maxval <- max(x)
 13 |   }
 14 |   if (is.null(minval)) {
 15 |     minval <- min(x)
 16 |   }
 17 |   
 18 |   heat_positions <- unlist(round((x - minval) / (maxval - minval) * 1000 + 1, 0))
 19 |   
 20 |   colors <- heat_colors[heat_positions]
 21 |   
 22 |   colors
 23 | }
 24 | 
 25 | color_sum <- function(col1,col2) {
 26 |   
 27 |   rgbmat1 <- col2rgb(col1)/255
 28 |   rgbmat2 <- col2rgb(col2)/255
 29 |   
 30 |   mix <- rgbmat1 + rgbmat2
 31 |   
 32 |   rgb(mix[1],mix[2],mix[3])
 33 |   
 34 | }
 35 | 
 36 | 
 37 | # Load Zizhen's V1 to ALM comparisons
 38 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/V1.ALM.compare.cor.rda")
 39 | 
 40 | # # Rotate the L5a types so that Ucma_2 is closer to L2/3
 41 | # rotate_labels <- labels(V1.dend)
 42 | # rotate_labels[4:12] <- rotate_labels[c(9:12,4:8)]
 43 | # V1.dend <- V1.dend %>% rotate(rotate_labels)
 44 | 
 45 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather")
 46 | cluster_anno <- anno %>% select(cl, cluster_id, cluster_label, cluster_color) %>% unique
 47 | 
 48 | labels(V1.dend) <- cluster_anno$cluster_label[match(labels(V1.dend), cluster_anno$cl)]
 49 | labels(ALM.dend) <- cluster_anno$cluster_label[match(labels(ALM.dend), cluster_anno$cl)]
 50 | 
 51 | comb.map$v1_label <- cluster_anno$cluster_label[match(comb.map$V1.cl, cluster_anno$cl)]
 52 | comb.map$alm_label <- cluster_anno$cluster_label[match(comb.map$ALM.cl, cluster_anno$cl)]
 53 | 
 54 | v1_ggdend <- as.ggdend(V1.dend)
 55 | alm_ggdend <- as.ggdend(ALM.dend)
 56 | 
 57 | # Invert and center dendrograms
 58 | # add 0.3 space between them
 59 | # 0.1 for leaves
 60 | # 0.1 on each side for labels
 61 | 
 62 | # V1: right-to-left
 63 | v1_seg <- v1_ggdend$segments
 64 | names(v1_seg)[1:4] <- c("y","x","yend","xend")
 65 | ymid <- max(v1_seg$y/2)
 66 | ymax <- max(v1_seg$y)
 67 | v1_seg <- v1_seg %>%
 68 |   mutate(y = (y - 2*(y - ymid))/ymax,
 69 |          yend = (yend - 2*(yend - ymid))/ymax,
 70 |          x = x + 0.15,
 71 |          xend = xend + 0.15)
 72 | 
 73 | v1_leaves <- v1_ggdend$labels
 74 | names(v1_leaves)[1:3] <- c("v1_y","v1_x","v1_label")
 75 | v1_leaves <- v1_leaves %>%
 76 |   mutate(v1_y = (v1_y - 2*(v1_y - ymid))/ymax,
 77 |          v1_x = v1_x + 0.05) %>%
 78 |   select(-col,-cex)
 79 | 
 80 | # ALM: left-to-right
 81 | alm_seg <- alm_ggdend$segments
 82 | names(alm_seg)[1:4] <- c("y","x","yend","xend")
 83 | ymid <- max(alm_seg$y/2)
 84 | ymax <- max(alm_seg$y)
 85 | alm_seg <- alm_seg %>%
 86 |   mutate(y = (y - 2*(y - ymid))/ymax,
 87 |          yend = (yend - 2*(yend - ymid))/ymax,
 88 |          x = -x - 0.15,
 89 |          xend = -xend - 0.15)
 90 | 
 91 | alm_leaves <- alm_ggdend$labels
 92 | names(alm_leaves)[1:3] <- c("alm_y","alm_x","alm_label")
 93 | alm_leaves <- alm_leaves %>%
 94 |   mutate(alm_y = (alm_y - 2*(alm_y - ymid))/ymax,
 95 |          alm_x = -alm_x - 0.05) %>%
 96 |   select(-col,-cex)
 97 | 
 98 | # build comparison segments
 99 | comb.map <- comb.map %>%
100 |   mutate(Prob = as.numeric(Prob)) %>%
101 |   left_join(alm_leaves) %>%
102 |   left_join(v1_leaves)
103 | 
104 | v1_to_alm <- comb.map %>%
105 |   filter(type == "V1.ref.ALM")
106 | alm_to_v1 <- comb.map %>%
107 |   filter(type == "ALM.ref.V1")
108 | 
109 | alm_v1_comp_plot <- ggplot() +
110 |   geom_segment(data = v1_seg,
111 |                aes(x = x, xend = xend,
112 |                    y = y, yend = yend),
113 |                lineend = "square") +
114 |   geom_segment(data = alm_seg,
115 |                aes(x = x, xend = xend,
116 |                    y = y, yend = yend),
117 |                lineend = "square") +
118 |   geom_curve(data = alm_to_v1,
119 |              aes(x = alm_x, xend = v1_x,
120 |                  y = alm_y, yend = v1_y,
121 |                  size = Prob),
122 |              color = "#212021",
123 |              curvature = -0.1,
124 |              alpha = 0.5) + #arrow = arrow(length = unit(0.25,"cm"))) +
125 |   geom_curve(data = v1_to_alm,
126 |              aes(x = v1_x, xend = alm_x,
127 |                  y = v1_y, yend = alm_y,
128 |                  size = Prob),
129 |              color = "#848EBC",
130 |              curvature = -0.1,
131 |              alpha = 0.5) +#, arrow = arrow(length = unit(0.25,"cm"))) +
132 |   geom_text(data = alm_leaves,
133 |             aes(x = alm_x - 0.01, y = alm_y,
134 |                 label = alm_label),
135 |             hjust = 1,
136 |             vjust = 0.3) +
137 |   geom_text(data = v1_leaves,
138 |             aes(x = v1_x + 0.01, y = v1_y,
139 |                 label = v1_label),
140 |             hjust = 0,
141 |             vjust = 0.3) +
142 |   theme_void() +
143 |   scale_size_continuous(range = c(0,3))
144 | 
145 | alm_v1_comp_plot
146 | 
147 | ggsave("v1_alm_crossmapping_cor.pdf",alm_v1_comp_plot,width = 12, height = 8)
148 | 


--------------------------------------------------------------------------------
/Figures/Ext_Data_Fig_12/prune_leaf_custom.R:
--------------------------------------------------------------------------------
  1 | prune.dendrogram <- function(dend, leaves, reindex_dend = TRUE, ...) {
  2 |   leaves <- as.character(leaves)
  3 |   
  4 |   for(i in seq_along(leaves))
  5 |   {
  6 |     # this function is probably not the fastest - but it works...
  7 |     dend <- prune_leaf(dend, leaves[i])	# move step by stem to remove all of these leaves...
  8 |   }
  9 |   
 10 |   if(reindex_dend) dend <- reindex_dend(dend)
 11 |   
 12 |   return(dend)
 13 | }
 14 | 
 15 | stats_midcache.dendrogram <- function (x, type = "hclust", quiet = FALSE) 
 16 | {
 17 |   type <- match.arg(type)
 18 |   stopifnot(inherits(x, "dendrogram"))
 19 |   setmid <- function(d, type) {
 20 |     if (is.leaf(d)) 
 21 |       return(d)
 22 |     k <- length(d)
 23 |     if (k < 1) 
 24 |       stop("dendrogram node with non-positive #{branches}")
 25 |     r <- d
 26 |     midS <- 0
 27 |     for (j in 1L:k) {
 28 |       r[[j]] <- unclass(setmid(d[[j]], type))
 29 |       midS <- midS + .midDend(r[[j]])
 30 |     }
 31 |     if (!quiet && type == "hclust" && k != 2) 
 32 |       warning("midcache() of non-binary dendrograms only partly implemented")
 33 |     attr(r, "midpoint") <- (.memberDend(d[[1L]]) + midS)/2
 34 |     r
 35 |   }
 36 |   setmid(x, type = type)
 37 | }
 38 | 
 39 | stats_.midDend <- function (x) {
 40 |   if (is.null(mp <- attr(x, "midpoint"))) 0 else mp
 41 | }
 42 | .midDend <- stats_.midDend # copied so that they would work inside the various functions here...
 43 | 
 44 | stats_.memberDend <- function (x) 
 45 | {
 46 |   r <- attr(x, "x.member")
 47 |   if (is.null(r)) {
 48 |     r <- attr(x, "members")
 49 |     if (is.null(r)) 
 50 |       r <- 1L
 51 |   }
 52 |   r
 53 | }
 54 | .memberDend <- stats_.memberDend
 55 | 
 56 | 
 57 | prune_leaf <- function(dend, leaf_name,...)
 58 | {
 59 |   labels_dend <- labels(dend)
 60 |   
 61 |   if(length(labels_dend) != length(unique(labels_dend)))	warning("Found dubplicate labels in the tree (this might indicate a problem in the tree you supplied)")
 62 |   
 63 |   if(!(leaf_name %in% labels_dend)) {	# what to do if there is no such leaf inside the tree
 64 |     warning(paste("There is no leaf with the label", leaf_name , "in the tree you supplied", "\n" , "Returning original tree", "\n" ))
 65 |     return(dend)
 66 |   }
 67 |   
 68 |   if(sum(labels_dend %in% leaf_name) > 1) {	# what to do if there is no such leaf inside the tree      
 69 |     warning(paste("There are multiple leaves by the name of '", leaf_name , "' in the tree you supplied.  Their locations is:",
 70 |                   paste(which(labels_dend %in% leaf_name), collapse = ","),"\n" , "Returning original tree", "\n" ))
 71 |     return(dend)
 72 |   }
 73 |   
 74 |   is.father.of.leaf.to.remove <- function(dend, leaf_name)
 75 |   {
 76 |     # this function checks if the leaf we wish to remove is the direct child of the current branch (dend) we entered the function
 77 |     is.father <- FALSE
 78 |     for(i in seq_len(length(dend)))
 79 |     {
 80 |       if(is.leaf(dend[[i]]) == TRUE  &&  labels(dend[[i]]) == leaf_name) is.father <- TRUE
 81 |     }
 82 |     return(is.father)
 83 |   }
 84 |   
 85 |   
 86 |   remove_leaf_if_child <- function(dend, leaf_name)
 87 |   {
 88 |     # print(labels(dend))
 89 |     if(all(labels(dend) != leaf_name))
 90 |     {	# if the leaf we want to remove is not in this branch, simply return the branch without going deeper intoit.
 91 |       return(dend)
 92 |     } else {	# but if the leaf we want to remove is here somewhere, go on searching
 93 |       attr(dend, "members") <- attr(dend, "members") - 1 
 94 |       
 95 |       if(!is.father.of.leaf.to.remove(dend, leaf_name))	# if you are not the father, then go on and make this function work on each child
 96 |       {
 97 |         for(i in seq_len(length(dend)))
 98 |         {
 99 |           dend[[i]] <- remove_leaf_if_child(dend[[i]], leaf_name)
100 |         }
101 |       } else { # we'll merge 
102 |         if(length(dend) == 2) {
103 |           leaf_location <- 1 
104 |           # if leaf location is 1, then move branch in leaf 2 to be the new x
105 |           if(is.leaf(dend[[leaf_location]]) == T  &&  labels(dend[[leaf_location]]) == leaf_name) {
106 | 
107 |             branch_to_bumpup <- 2
108 |             dend <- dend[[branch_to_bumpup]]
109 |           } else { # else - the leaf location must be located in position "2"
110 | 
111 |             branch_to_bumpup <- 1
112 |             dend <- dend[[branch_to_bumpup]]
113 |           }
114 |         } else if(length(dend) > 2) {
115 |           # If more than 2 branches, check if any are leaves
116 |           dend_leaves <- unlist(lapply(dend, is.leaf))
117 |           if(sum(dend_leaves) > 0) {
118 |             # If so, check for matching labels to the leaf to prune
119 |             dend_labels <- unlist(lapply(dend, function(x) attr(x, "label")))
120 |             dend_matches <- dend_labels == leaf_name
121 |             # Return a list containing the non-matching branches
122 |             dend[dend_leaves & dend_matches] <- NULL
123 |             # Note that in some cases, the following DOES NOT yield a correct result:
124 |             # dend <- dend[!(dend_leaves & dend_matches)]
125 |             
126 |             # If the length is now 1, it can be bumped up
127 |             if(length(dend) == 1) {
128 |               dend <- dend[[1]]
129 |             }
130 |             
131 |           }
132 |         }
133 |       }
134 |     }
135 |     return(dend)
136 |   }
137 |   
138 |   
139 |   new_dend <- remove_leaf_if_child(dend, leaf_name)
140 |   new_dend <- suppressWarnings(stats_midcache.dendrogram(new_dend)) # fixes the attributes
141 |   #   new_x <- fix_members_attr.dendrogram(new_x) # fix the number of memebers attr for each node
142 |   return(new_dend)
143 | }


--------------------------------------------------------------------------------
/Figures/Ext_Data_Fig_15/huang_comparison_figure.R:
--------------------------------------------------------------------------------
 1 | library(dplyr)
 2 | library(ggplot2)
 3 | library(feather)
 4 | options(stringsAsFactors = F)
 5 | source("sankey_functions.R")
 6 | 
 7 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/external/mouse_GABA_Paul_2016_20171002/anno.feather")
 8 | 
 9 | # rearrange the target clusters based on which query cluster has the highest freq
10 | new_order <- anno %>%
11 |   group_by(cluster_id, cell_class_id) %>%
12 |   summarise(Freq = n()) %>%
13 |   ungroup() %>%
14 |   group_by(cluster_id) %>%
15 |   arrange(-Freq) %>%
16 |   filter(row_number() == 1) %>%
17 |   ungroup() %>%
18 |   arrange(cell_class_id, cluster_id) %>%
19 |   mutate(new_order = 1:n()) %>%
20 |   select(cluster_id, new_order)
21 | 
22 | huang_to_new <- left_join(anno, new_order) %>%
23 |   mutate(cluster_id = new_order)
24 | 
25 | # make_plot_nodes converts group_nodes to rectangles for plotting with geom_rect()
26 | h2n_nodes <- make_plot_nodes(make_group_nodes(huang_to_new, c("cell_class","cluster")))
27 | 
28 | h2n_new_nodes <- h2n_nodes %>%
29 |   filter(group == "cluster") %>%
30 |   mutate(name = sub("^[0-9]+ ","",name),
31 |          name = paste0(name, " (n = ",n,")"))
32 | h2n_huang_nodes <- h2n_nodes %>%
33 |   filter(group == "cell_class") %>%
34 |   mutate(name = paste0(name, " (n = ",n,")"))
35 | 
36 | huang_to_new_river <- build_river_plot(huang_to_new, 
37 |                                        c("cell_class","cluster"), 
38 |                                        fill_group = "cell_class") +
39 |   geom_text(data = h2n_new_nodes,
40 |             aes(x = xmax + 0.01,
41 |                 y = (ymin + ymax)/2,
42 |                 label = name,
43 |                 color = color),
44 |             hjust = 0) +
45 |   geom_text(data = h2n_huang_nodes,
46 |             aes(x = xmin - 0.01,
47 |                 y = (ymin + ymax)/2,
48 |                 label = name,
49 |                 color = color),
50 |             hjust = 1) +
51 |   scale_color_identity()
52 | 
53 | huang_to_new_river
54 | 
55 | ggsave("huang_to_new_river.pdf", huang_to_new_river, height = 10, width = 6)
56 | 


--------------------------------------------------------------------------------
/Figures/Ext_Data_Fig_15/huang_dendrogram.R:
--------------------------------------------------------------------------------
 1 | library(dplyr)
 2 | library(ggplot2)
 3 | library(dendextend)
 4 | options(stringsAsFactors = F)
 5 | 
 6 | source("prune_leaf_custom.R")
 7 | 
 8 | dend <- readRDS("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/dend.RData")
 9 | 
10 | dend <- dend %>%
11 |   prune(labels(dend)[c(1:55,116:133)])
12 | 
13 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/external/mouse_GABA_Paul_2016_20171002/anno.feather")
14 | 
15 | node_n <- anno %>%
16 |   group_by(cluster_label) %>%
17 |   summarize(n_cells = n())
18 | 
19 | nodes <- as.ggdend(dend)$nodes %>%
20 |   mutate(cluster_label = get_nodes_attr(dend,"label")) %>%
21 |   left_join(node_n)
22 | 
23 | plot_nodes <- nodes %>%
24 |   filter(!is.na(n_cells))
25 | 
26 | segments <- as.ggdend(dend)$segments
27 | 
28 | node_labels <- plot_nodes %>%
29 |   filter(y > 0)
30 | 
31 | leaf_labels <- nodes %>%
32 |   filter(y == 0) %>%
33 |   mutate(col = ifelse(is.na(n_cells),"#808080",col))
34 | 
35 | dendrogram_plot <- ggplot() +
36 |   geom_segment(data = segments,
37 |                aes(x = x, xend = xend,
38 |                    y = y, yend = yend)) +
39 |   geom_point(data = plot_nodes,
40 |              aes(x = x,
41 |                  y = y,
42 |                  size = n_cells),
43 |              pch = 21,
44 |              fill = "#00AEEF",
45 |              color = "#2E3192",
46 |              alpha = 0.7) +
47 |   geom_text(data = node_labels,
48 |             aes(x = x, y = y,
49 |                 label = cluster_label),
50 |             vjust = 1) +
51 |   geom_text(data = leaf_labels,
52 |             aes(x = x, y = y - 0.01,
53 |                 label = cluster_label,
54 |                 color = col),
55 |             angle = 90,
56 |             hjust = 1,
57 |             vjust = 0.3) +
58 |   scale_fill_identity() +
59 |   scale_color_identity() +
60 |   scale_size_area(breaks = c(10,25,50,100)) +
61 |   scale_y_continuous(limits = c(-0.1,1)) +
62 |   theme_void()
63 | 
64 | dendrogram_plot
65 | 
66 | ggsave("huang_dendrogram_plot.pdf",dendrogram_plot, width = 8, height = 6)
67 | 


--------------------------------------------------------------------------------
/Figures/Ext_Data_Fig_15/linnarsson_comparison.R:
--------------------------------------------------------------------------------
 1 | library(dplyr)
 2 | library(feather)
 3 | library(ggplot2)
 4 | options(stringsAsFactors = F)
 5 | 
 6 | source("sankey_functions.R")
 7 | 
 8 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process/WGCNA_result_anterograde/map.linnarsson.ss.df.rda")
 9 | 
10 | ss.map.df <- ss.map.df %>%
11 |   mutate(sample_id = rownames(.)) %>%
12 |   rename(ll_label = cl)
13 | 
14 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20170913/anno.feather")
15 | 
16 | cluster_anno <- anno %>%
17 |   select(cl, cluster_id, cluster_label, cluster_color) %>%
18 |   unique() %>%
19 |   mutate(pred.cl = as.character(cl))
20 | 
21 | 
22 | 
23 | ll_anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/external/mouse_SS_CA1_Zeisel_2015_20170620/anno.feather")
24 | ll_cluster_anno <- ll_anno %>%
25 |   select(cluster_id, cluster_label, cluster_color) %>%
26 |   unique()
27 | names(ll_cluster_anno) <- c("ll_id","ll_label","ll_color")
28 | 
29 | 
30 | map_anno <- ss.map.df %>%
31 |   left_join(cluster_anno) %>%
32 |   left_join(ll_cluster_anno) %>%
33 |   filter(complete.cases(.))
34 | 
35 | # rearrange the target clusters based on which query cluster has the highest freq
36 | new_order <- map_anno %>%
37 |   group_by(cluster_id, cluster_label, ll_id) %>%
38 |   summarise(Freq = n()) %>%
39 |   ungroup() %>%
40 |   group_by(cluster_id) %>%
41 |   arrange(-Freq) %>%
42 |   filter(row_number() == 1) %>%
43 |   ungroup() %>%
44 |   arrange(cluster_id,ll_id) %>%
45 |   mutate(new_order = 1:n()) %>%
46 |   select(cluster_id, cluster_label, Freq, new_order)
47 | 
48 | linnarsson_to_new <- left_join(map_anno, new_order) %>%
49 |   mutate(cluster_id = new_order) %>%
50 |   filter(!coarse_cl %in% c("interneurons","pyramidal SS"),
51 |          ll_label != "(none)")
52 | 
53 | # make_plot_nodes converts group_nodes to rectangles for plotting with geom_rect()
54 | l2n_nodes <- make_plot_nodes(make_group_nodes(linnarsson_to_new, c("ll","cluster")),
55 |                              pad = 0.2)
56 | 
57 | l2n_new_nodes <- l2n_nodes %>%
58 |   filter(group == "cluster") %>%
59 |   mutate(name = sub("^[0-9]+ ","",name),
60 |          name = paste0(name, " (n = ",n,")"))
61 | l2n_linnarsson_nodes <- l2n_nodes %>%
62 |   filter(group == "ll") %>%
63 |   mutate(name = paste0(name, " (n = ",n,")"))
64 | 
65 | linnarsson_to_new_river <- build_river_plot(linnarsson_to_new, 
66 |                                         c("ll","cluster"), 
67 |                                         fill_group = "ll",
68 |                                         pad = 0.2) +
69 |   geom_text(data = l2n_new_nodes,
70 |             aes(x = xmax + 0.01,
71 |                 y = (ymin + ymax)/2,
72 |                 label = name,
73 |                 color = color),
74 |             hjust = 0,
75 |             size = 2) +
76 |   geom_text(data = l2n_linnarsson_nodes,
77 |             aes(x = xmin - 0.01,
78 |                 y = (ymin + ymax)/2,
79 |                 label = name,
80 |                 color = "black"),
81 |             hjust = 1,
82 |             size = 2) +
83 |   scale_color_identity()
84 | 
85 | linnarsson_to_new_river
86 | 
87 | ggsave("l2n_glia.pdf",width = 4, height = 12)
88 | 
89 | 


--------------------------------------------------------------------------------
/Figures/Ext_Data_Fig_15/linnarsson_stats.R:
--------------------------------------------------------------------------------
 1 | library(dplyr)
 2 | library(feather)
 3 | library(ggplot2)
 4 | options(stringsAsFactors = F)
 5 | 
 6 | source("sankey_functions.R")
 7 | 
 8 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process/WGCNA_result_anterograde/map.linnarsson.ss.df.rda")
 9 | 
10 | ss.map.df <- ss.map.df %>%
11 |   mutate(sample_id = rownames(.)) %>%
12 |   rename(ll_label = cl)
13 | 
14 | ss.stats <- ss.map.df %>%
15 |   group_by(ll_label) %>%
16 |   mutate(ll_n = n()) %>%
17 |   ungroup() %>%
18 |   group_by(pred.cl, pred_cluster_label, ll_label) %>%
19 |   summarise(n_cells = n(),
20 |             ll_frac = n()/ll_n[1]) %>%
21 |   arrange(as.numeric(pred.cl),
22 |           -n_cells) %>%
23 |   filter(ll_frac > 0.1) %>%
24 |   filter(n_cells > 1)
25 | 
26 | write.csv(ss.stats,"filtered_linnarsson_mapping.csv", quote = F, row.names = F)
27 | 


--------------------------------------------------------------------------------
/Figures/Ext_Data_Fig_15/prune_leaf_custom.R:
--------------------------------------------------------------------------------
  1 | prune.dendrogram <- function(dend, leaves, reindex_dend = TRUE, ...) {
  2 |   leaves <- as.character(leaves)
  3 |   
  4 |   for(i in seq_along(leaves))
  5 |   {
  6 |     # this function is probably not the fastest - but it works...
  7 |     dend <- prune_leaf(dend, leaves[i])	# move step by stem to remove all of these leaves...
  8 |   }
  9 |   
 10 |   if(reindex_dend) dend <- reindex_dend(dend)
 11 |   
 12 |   return(dend)
 13 | }
 14 | 
 15 | stats_midcache.dendrogram <- function (x, type = "hclust", quiet = FALSE) 
 16 | {
 17 |   type <- match.arg(type)
 18 |   stopifnot(inherits(x, "dendrogram"))
 19 |   setmid <- function(d, type) {
 20 |     if (is.leaf(d)) 
 21 |       return(d)
 22 |     k <- length(d)
 23 |     if (k < 1) 
 24 |       stop("dendrogram node with non-positive #{branches}")
 25 |     r <- d
 26 |     midS <- 0
 27 |     for (j in 1L:k) {
 28 |       r[[j]] <- unclass(setmid(d[[j]], type))
 29 |       midS <- midS + .midDend(r[[j]])
 30 |     }
 31 |     if (!quiet && type == "hclust" && k != 2) 
 32 |       warning("midcache() of non-binary dendrograms only partly implemented")
 33 |     attr(r, "midpoint") <- (.memberDend(d[[1L]]) + midS)/2
 34 |     r
 35 |   }
 36 |   setmid(x, type = type)
 37 | }
 38 | 
 39 | stats_.midDend <- function (x) {
 40 |   if (is.null(mp <- attr(x, "midpoint"))) 0 else mp
 41 | }
 42 | .midDend <- stats_.midDend # copied so that they would work inside the various functions here...
 43 | 
 44 | stats_.memberDend <- function (x) 
 45 | {
 46 |   r <- attr(x, "x.member")
 47 |   if (is.null(r)) {
 48 |     r <- attr(x, "members")
 49 |     if (is.null(r)) 
 50 |       r <- 1L
 51 |   }
 52 |   r
 53 | }
 54 | .memberDend <- stats_.memberDend
 55 | 
 56 | 
 57 | prune_leaf <- function(dend, leaf_name,...)
 58 | {
 59 |   labels_dend <- labels(dend)
 60 |   
 61 |   if(length(labels_dend) != length(unique(labels_dend)))	warning("Found dubplicate labels in the tree (this might indicate a problem in the tree you supplied)")
 62 |   
 63 |   if(!(leaf_name %in% labels_dend)) {	# what to do if there is no such leaf inside the tree
 64 |     warning(paste("There is no leaf with the label", leaf_name , "in the tree you supplied", "\n" , "Returning original tree", "\n" ))
 65 |     return(dend)
 66 |   }
 67 |   
 68 |   if(sum(labels_dend %in% leaf_name) > 1) {	# what to do if there is no such leaf inside the tree      
 69 |     warning(paste("There are multiple leaves by the name of '", leaf_name , "' in the tree you supplied.  Their locations is:",
 70 |                   paste(which(labels_dend %in% leaf_name), collapse = ","),"\n" , "Returning original tree", "\n" ))
 71 |     return(dend)
 72 |   }
 73 |   
 74 |   is.father.of.leaf.to.remove <- function(dend, leaf_name)
 75 |   {
 76 |     # this function checks if the leaf we wish to remove is the direct child of the current branch (dend) we entered the function
 77 |     is.father <- FALSE
 78 |     for(i in seq_len(length(dend)))
 79 |     {
 80 |       if(is.leaf(dend[[i]]) == TRUE  &&  labels(dend[[i]]) == leaf_name) is.father <- TRUE
 81 |     }
 82 |     return(is.father)
 83 |   }
 84 |   
 85 |   
 86 |   remove_leaf_if_child <- function(dend, leaf_name)
 87 |   {
 88 |     # print(labels(dend))
 89 |     if(all(labels(dend) != leaf_name))
 90 |     {	# if the leaf we want to remove is not in this branch, simply return the branch without going deeper intoit.
 91 |       return(dend)
 92 |     } else {	# but if the leaf we want to remove is here somewhere, go on searching
 93 |       attr(dend, "members") <- attr(dend, "members") - 1 
 94 |       
 95 |       if(!is.father.of.leaf.to.remove(dend, leaf_name))	# if you are not the father, then go on and make this function work on each child
 96 |       {
 97 |         for(i in seq_len(length(dend)))
 98 |         {
 99 |           dend[[i]] <- remove_leaf_if_child(dend[[i]], leaf_name)
100 |         }
101 |       } else { # we'll merge 
102 |         if(length(dend) == 2) {
103 |           leaf_location <- 1 
104 |           # if leaf location is 1, then move branch in leaf 2 to be the new x
105 |           if(is.leaf(dend[[leaf_location]]) == T  &&  labels(dend[[leaf_location]]) == leaf_name) {
106 | 
107 |             branch_to_bumpup <- 2
108 |             dend <- dend[[branch_to_bumpup]]
109 |           } else { # else - the leaf location must be located in position "2"
110 | 
111 |             branch_to_bumpup <- 1
112 |             dend <- dend[[branch_to_bumpup]]
113 |           }
114 |         } else if(length(dend) > 2) {
115 |           # If more than 2 branches, check if any are leaves
116 |           dend_leaves <- unlist(lapply(dend, is.leaf))
117 |           if(sum(dend_leaves) > 0) {
118 |             # If so, check for matching labels to the leaf to prune
119 |             dend_labels <- unlist(lapply(dend, function(x) attr(x, "label")))
120 |             dend_matches <- dend_labels == leaf_name
121 |             # Return a list containing the non-matching branches
122 |             dend[dend_leaves & dend_matches] <- NULL
123 |             # Note that in some cases, the following DOES NOT yield a correct result:
124 |             # dend <- dend[!(dend_leaves & dend_matches)]
125 |             
126 |             # If the length is now 1, it can be bumped up
127 |             if(length(dend) == 1) {
128 |               dend <- dend[[1]]
129 |             }
130 |             
131 |           }
132 |         }
133 |       }
134 |     }
135 |     return(dend)
136 |   }
137 |   
138 |   
139 |   new_dend <- remove_leaf_if_child(dend, leaf_name)
140 |   new_dend <- suppressWarnings(stats_midcache.dendrogram(new_dend)) # fixes the attributes
141 |   #   new_x <- fix_members_attr.dendrogram(new_x) # fix the number of memebers attr for each node
142 |   return(new_dend)
143 | }


--------------------------------------------------------------------------------
/Figures/Ext_Data_Fig_15/tolias_comparison_figure.R:
--------------------------------------------------------------------------------
  1 | library(dplyr)
  2 | library(ggplot2)
  3 | library(feather)
  4 | options(stringsAsFactors = F)
  5 | source("sankey_functions.R")
  6 | 
  7 | v1_alm_anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather")
  8 | v1_alm_cl_anno <- v1_alm_anno %>%
  9 |   select(cl, cluster_id, cluster_label, cluster_color) %>%
 10 |   unique() %>%
 11 |   mutate(cl = as.character(cl))
 12 | 
 13 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/map.tree.df.rda")
 14 | map.tree.df <- map.tree.df %>%
 15 |   mutate(sample_id = rownames(.))
 16 | 
 17 | map.dat <- map.tree.df %>%
 18 |   mutate(cluster_label = as.character(cluster_label)) %>%
 19 |   mutate(cluster_label = ifelse(is.na(cluster_label), as.character(cl), cluster_label)) %>%
 20 |   mutate(cl = as.character(cl))
 21 | 
 22 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/patch_seq/tolias_patchseq_20161102/anno.feather")
 23 | 
 24 | # Remove old mapping and add new mapping from map.tree.df
 25 | anno <- anno %>%
 26 |   select(-cluster_id, -cluster_label, -cluster_color) %>%
 27 |   left_join(map.dat) %>%
 28 |   rowwise() %>%
 29 |   mutate(cluster_color = ifelse(cl %in% v1_alm_cl_anno$cl,
 30 |                                 v1_alm_cl_anno$cluster_color[v1_alm_cl_anno$cl == cl],
 31 |                                 "#808080")) %>%
 32 | # %>%
 33 | #   mutate(cluster_id = ifelse(cl %in% v1_alm_cl_anno$cl,
 34 | #                                 v1_alm_cl_anno$cluster_id[v1_alm_cl_anno$cl == cl],
 35 | #                                 0)) %>%
 36 |   ungroup()
 37 | 
 38 | # fix cluster ids
 39 | anno_cluster_ids <- anno %>%
 40 |   select(cluster_label) %>%
 41 |   unique() %>%
 42 |   mutate(cluster_id = 1:n())
 43 | 
 44 | anno <- anno %>%
 45 |   left_join(anno_cluster_ids)
 46 | 
 47 | # rearrange the target clusters based on which query cluster has the highest freq
 48 | new_order <- anno %>%
 49 |   group_by(cluster_id, cluster_label, class_id, cluster_color) %>%
 50 |   summarise(Freq = n()) %>%
 51 |   ungroup() %>%
 52 |   group_by(cluster_id) %>%
 53 |   arrange(-Freq) %>%
 54 |   filter(row_number() == 1) %>%
 55 |   ungroup() %>%
 56 |   arrange(class_id, cluster_id) %>%
 57 |   mutate(new_order = 1:n()) %>%
 58 |   select(cluster_id, cluster_label, cluster_color, Freq, new_order) %>%
 59 |   mutate(manual_order = c(2,1,7,4,3,8,5,9,11,6,10,12,13,14))
 60 | 
 61 | #edited_new_order <- edit(new_order)
 62 | 
 63 | tolias_to_new <- left_join(anno, new_order) %>%
 64 |   mutate(cluster_id = manual_order) %>%
 65 |   # select(-cluster_color) %>%
 66 |   # left_join(v1_alm_cl_anno) %>%
 67 |   mutate(cluster_color = ifelse(is.na(cluster_color), "#808080", cluster_color))
 68 | 
 69 | # make_plot_nodes converts group_nodes to rectangles for plotting with geom_rect()
 70 | t2n_nodes <- make_plot_nodes(make_group_nodes(tolias_to_new, c("class","cluster")),
 71 |                              pad = 0.2)
 72 | 
 73 | t2n_new_nodes <- t2n_nodes %>%
 74 |   filter(group == "cluster") %>%
 75 |   mutate(name = sub("^[0-9]+ ","",name),
 76 |          name = paste0(name, " (n = ",n,")"))
 77 | t2n_tolias_nodes <- t2n_nodes %>%
 78 |   filter(group == "class") %>%
 79 |   mutate(name = paste0(name, " (n = ",n,")"))
 80 | 
 81 | tolias_to_new_river <- build_river_plot(tolias_to_new, 
 82 |                                        c("class","cluster"), 
 83 |                                        fill_group = "class",
 84 |                                        pad = 0.2) +
 85 |   geom_text(data = t2n_new_nodes,
 86 |             aes(x = xmax + 0.01,
 87 |                 y = (ymin + ymax)/2,
 88 |                 label = name,
 89 |                 color = color),
 90 |             hjust = 0,
 91 |             size = 2) +
 92 |   geom_text(data = t2n_tolias_nodes,
 93 |             aes(x = xmin - 0.01,
 94 |                 y = (ymin + ymax)/2,
 95 |                 label = name,
 96 |                 color = color),
 97 |             hjust = 1,
 98 |             size = 2) +
 99 |   scale_color_identity()
100 | 
101 | tolias_to_new_river
102 | 
103 | ggsave("tolias_to_new_river.pdf", tolias_to_new_river, height = 3, width = 2)
104 | 


--------------------------------------------------------------------------------
/Figures/Ext_Data_Fig_15/tolias_dendrogram.R:
--------------------------------------------------------------------------------
 1 | library(dplyr)
 2 | library(feather)
 3 | library(ggplot2)
 4 | library(dendextend)
 5 | options(stringsAsFactors = F)
 6 | 
 7 | source("prune_leaf_custom.R")
 8 | 
 9 | v1_alm_anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather")
10 | 
11 | v1_alm_anno <- v1_alm_anno %>%
12 |   filter(cluster_id %in% 1:133)
13 | # 
14 | # dend <- readRDS("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/dend.RData")
15 | # 
16 | # alm_clusters <- unique(v1_alm_anno$cluster_id[grepl("ALM",v1_alm_anno$cluster_label)])
17 | # #inh_clusters <- 1:60
18 | # #nn_clusters <- 116:133
19 | # 
20 | # non_visp_clusters <- c(alm_clusters)
21 | # visp_clusters <- setdiff(1:133, non_visp_clusters)
22 | # 
23 | # non_visp_cluster_labels <- unique(v1_alm_anno$cluster_label[v1_alm_anno$cluster_id %in% non_visp_clusters])
24 | # 
25 | # visp_dend <- dend %>%
26 | #   prune.dendrogram(non_visp_cluster_labels)
27 | 
28 | 
29 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/map.tree.df.rda")
30 | 
31 | v1_alm_cl_anno <- v1_alm_anno %>%
32 |   select(cl, cluster_id, cluster_label, cluster_color) %>%
33 |   unique() %>%
34 |   mutate(cl = as.character(cl))
35 | 
36 | labels(V1.dend) <- v1_alm_cl_anno$cluster_label[match(labels(V1.dend), v1_alm_cl_anno$cl)]
37 | 
38 | node_n <- map.tree.df %>%
39 |   mutate(cl = as.character(cl)) %>%
40 |   group_by(cl) %>%
41 |   summarise(n_cells = n()) %>%
42 |   left_join(v1_alm_cl_anno) %>%
43 |   mutate(cluster_label = ifelse(is.na(cluster_label), cl, cluster_label))
44 | 
45 | nodes <- as.ggdend(V1.dend)$nodes %>%
46 |   mutate(cluster_label = get_nodes_attr(V1.dend,"label")) %>%
47 |   left_join(node_n)
48 | 
49 | plot_nodes <- nodes %>%
50 |   filter(!is.na(n_cells))
51 | 
52 | segments <- as.ggdend(V1.dend)$segments
53 | 
54 | node_labels <- plot_nodes %>%
55 |   filter(y > 0)
56 | 
57 | leaf_labels <- nodes %>%
58 |   filter(y == 0) %>%
59 |   mutate(col = ifelse(is.na(n_cells),"#808080",col)) %>%
60 |   left_join(v1_alm_cl_anno)
61 | 
62 | dendrogram_plot <- ggplot() +
63 |   geom_segment(data = segments,
64 |                aes(x = x, xend = xend,
65 |                    y = y, yend = yend),
66 |                lineend = "square",
67 |                size = 0.5) +
68 |   geom_point(data = plot_nodes,
69 |              aes(x = x,
70 |                  y = y,
71 |                  size = n_cells),
72 |              pch = 21,
73 |              fill = "#00AEEF",
74 |              color = "#2E3192",
75 |              alpha = 1) +
76 |   geom_text(data = node_labels,
77 |             aes(x = x, y = y,
78 |                 label = cl),
79 |             vjust = 1,
80 |             size = 2*5/6) +
81 |   geom_text(data = leaf_labels,
82 |             aes(x = x, y = y - 0.01,
83 |                 label = cluster_label,
84 |                 color = col),
85 |             angle = 90,
86 |             hjust = 1,
87 |             vjust = 0.3,
88 |             size = 2*5/6) +
89 |   scale_fill_identity() +
90 |   scale_color_identity() +
91 |   scale_size_area(max_size = 2) +
92 |   scale_y_continuous(limits = c(-0.1,1)) +
93 |   theme_void()
94 | 
95 | ggsave("tolias_dendrogram_plot.pdf",dendrogram_plot, width = 8.75, height = 5, useDingbats = FALSE)
96 | 


--------------------------------------------------------------------------------
/Figures/Ext_Data_Fig_16/cluster_violin_plot.R:
--------------------------------------------------------------------------------
 1 | library(dplyr)
 2 | library(ggplot2)
 3 | library(feather)
 4 | library(scrattch.vis)
 5 | library(scrattch.io)
 6 | options(stringsAsFactors = F)
 7 | source("layer_and_violin_functions.R")
 8 | 
 9 | genes <- split_cst("Vipr2, Pvalb, Slc32a1")
10 | 
11 | fdir <- "//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/"
12 | 
13 | p <- group_violin_plot2(data_source = fdir,
14 |                         genes = genes,
15 |                         group_by = "dendcluster",
16 |                         clusters = 1:133,
17 |                         logscale = FALSE,
18 |                         labelheight = 2,
19 |                         max_width = 10,
20 |                         fontsize = 6,
21 |                         showcounts = FALSE)
22 | 
23 | ggsave("marker_violins.pdf", width = 10, height = 1, useDingbats = F)
24 | 
25 | 


--------------------------------------------------------------------------------
/Figures/Ext_Data_Fig_16/coexpression_bar_plots.R:
--------------------------------------------------------------------------------
 1 | library(dplyr)
 2 | library(ggplot2)
 3 | library(feather)
 4 | library(reshape2)
 5 | 
 6 | setwd("C:/Users/thucn/Dropbox/AIBS/Transcriptomics/Manuscripts/V1_alm/Figures/Figure_3_Vipr2/scripts")
 7 | source("coexpression_functions.R")
 8 | 
 9 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180312/anno.feather")
10 | data <- feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180312/data.feather")
11 | 
12 | ## VISp
13 | anno <- anno %>%
14 |   group_by(dendcluster_id) %>%
15 |   # Filter annotations for only clusters with > 10% VISp cells
16 |   mutate(group_visp_fraction = sum(region_label == "VISp")/n()) %>%
17 |   filter(group_visp_fraction > 0.05) %>%
18 |   # Only keep cells from those clusters that are from VISp
19 |   filter(region_label == "VISp") %>%
20 |   ungroup()
21 | 
22 | # colors and ordering for different combinations
23 | frac_anno <- data.frame(frac_label = c("frac_1","frac_2","frac_3","frac_12","frac_23","frac_13","frac_123"),
24 |                         frac_id = 1:7,
25 |                         frac_color = c("#eb008b", "#006838", "cyan", "black", "magenta", "gray", "blue"))
26 | 
27 | keep_frac <- c("frac_1", "frac_12", "frac_2","frac_3","frac_12","frac_23","frac_13","frac_123")
28 | keep_frac <- c("frac_12", "frac_3","frac_12","frac_23","frac_13","frac_123")
29 | 
30 | coexpression_barplot(condition_1 = c("Vipr2"),
31 |                      condition_2 = c("Pvalb"),
32 |                      condition_3 = NULL,
33 |                      frac_anno = frac_anno,
34 |                      fpkm_cutoff = 1,
35 |                      group_by = "cluster",
36 |                      groups = c(1:125),
37 |                      anno = anno,
38 |                      keep_frac = keep_frac,
39 |                      data = data)
40 | 
41 | ggsave("coexpression_barplot_Crh_Sst.pdf", width = 8, height = 5, dpi = 120, useDingbats = FALSE)
42 | 
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/Figures/Ext_Data_Fig_16/coexpression_functions.R:
--------------------------------------------------------------------------------
  1 | condition_to_summarise_call <- function(pos_condition, neg_condition, fpkm_cutoff) {
  2 |   call <- paste0("sum(")
  3 |   pos_condition_comparisons <- paste(paste0(pos_condition," > ",fpkm_cutoff), collapse = " & ")
  4 |   if(!is.null(neg_condition)) {
  5 |     neg_condition_comparisons <- paste(paste0(neg_condition," > ",fpkm_cutoff), collapse = " & ")
  6 |     call <- paste0(call, pos_condition_comparisons, " & !(", neg_condition_comparisons,")")
  7 |   } else {
  8 |     call <- paste0(call, pos_condition_comparisons)
  9 |   }
 10 |   call <- paste0(call,")/n()")
 11 |   call
 12 | }
 13 | 
 14 | 
 15 | coexpression_barplot <- function(condition_1, condition_2, condition_3,
 16 |                                  frac_anno, fpkm_cutoff = 1, group_by = "dendcluster",
 17 |                                  groups = 1:128,
 18 |                                  anno, keep_frac = frac_anno$frac_label, data) {
 19 |   
 20 |   
 21 |   group_id <- paste0(group_by,"_id")
 22 |   group_label <- paste0(group_by,"_label")
 23 |   
 24 |   group_filter <- paste0(group_id," %in% c(",paste0(groups,collapse = ","),")")
 25 |   
 26 |   anno <- anno %>%
 27 |     filter_(group_filter)
 28 |   
 29 |   # recompute filtered positions
 30 |   group_order <- data.frame(group = groups) %>%
 31 |     mutate(group_order = 1:n())
 32 |   names(group_order)[1] <- group_id
 33 |   
 34 |   group_anno <- anno %>%
 35 |     ungroup() %>%
 36 |     select(one_of(paste0(group_by,c("_id","_label","_color")))) %>%
 37 |     unique() %>%
 38 |     left_join(group_order) %>%
 39 |     arrange(group_order) %>%
 40 |     mutate(group_xpos = 1:n())
 41 |   
 42 |   anno <- left_join(anno,group_anno)
 43 |   
 44 |   # pull data for all genes used in conditions
 45 |   
 46 |   cond_data <- data[,c("sample_id",condition_1,condition_2,condition_3)]
 47 |   
 48 |   # join gene values to the annotations
 49 |   anno_data <- anno %>%
 50 |     left_join(cond_data, by = "sample_id")
 51 |   
 52 |   # for each group, calculate the fraction of cells that match the conditions
 53 |   
 54 |   if(!is.null(condition_3)) {
 55 |     frac_data <- anno_data %>%
 56 |       group_by(group_xpos) %>%
 57 |       summarise_(frac_1 = condition_to_summarise_call(condition_1, c(condition_2, condition_3), fpkm_cutoff),
 58 |                  frac_2 = condition_to_summarise_call(condition_2, c(condition_1, condition_3), fpkm_cutoff),
 59 |                  frac_3 = condition_to_summarise_call(condition_3, c(condition_1, condition_2), fpkm_cutoff),
 60 |                  frac_12 = condition_to_summarise_call(c(condition_1, condition_2), condition_3, fpkm_cutoff),
 61 |                  frac_23 = condition_to_summarise_call(c(condition_2, condition_3), condition_1, fpkm_cutoff),
 62 |                  frac_13 = condition_to_summarise_call(c(condition_1, condition_3), condition_2, fpkm_cutoff),
 63 |                  frac_123 = condition_to_summarise_call(c(condition_1, condition_2, condition_3), NULL, fpkm_cutoff))
 64 |   } else {
 65 |     frac_data <- anno_data %>%
 66 |       group_by(group_xpos) %>%
 67 |       summarise_(frac_1 = condition_to_summarise_call(condition_1, condition_2, fpkm_cutoff),
 68 |                  frac_2 = condition_to_summarise_call(condition_2, condition_1, fpkm_cutoff),
 69 |                  frac_12 = condition_to_summarise_call(c(condition_1, condition_2), NULL, fpkm_cutoff))
 70 |     
 71 |     frac_anno <- frac_anno %>%
 72 |       filter(!grepl("3",frac_label))
 73 |   }
 74 |   
 75 |   frac_anno <- frac_anno %>%
 76 |     filter(grepl(paste(paste0(keep_frac, "$"), collapse="|"), frac_label))
 77 |   
 78 |   plot_data <- melt(frac_data, "group_xpos") %>%
 79 |     filter(value > 0) %>%
 80 |     rename_("frac_label" = "variable") %>%
 81 |     left_join(frac_anno) %>%
 82 |     arrange(group_xpos, frac_id) %>%
 83 |     group_by(group_xpos) %>%
 84 |     mutate(xmin = group_xpos - 0.4,
 85 |            xmax = group_xpos + 0.4) %>%
 86 |     mutate(csum = cumsum(value),
 87 |            ymin = lag(csum, default = 0),
 88 |            ymax = csum)
 89 |   
 90 |   ggplot() + 
 91 |     geom_rect(data = plot_data,
 92 |               aes(xmin = xmin, xmax = xmax,
 93 |                   ymin = ymin, ymax = ymax,
 94 |                   fill = frac_color)) +
 95 |     scale_fill_identity(breaks = frac_anno$frac_color,
 96 |                         labels = frac_anno$frac_label,
 97 |                         guide = "legend") +
 98 |     scale_x_continuous(breaks = group_anno$group_xpos, 
 99 |                        labels = group_anno[[group_label]],
100 |                        limits = c(0.5, max(group_anno$group_xpos) + 0.5)) +
101 |     scale_y_continuous("Fraction of Cells in Cluster", expand = c(0,0)) +
102 |     theme_classic() +
103 |     theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.3, colour=group_anno$cluster_color))
104 | }
105 | 


--------------------------------------------------------------------------------
/Figures/Ext_Data_Fig_16/marker_violins.R:
--------------------------------------------------------------------------------
 1 | library(dplyr)
 2 | library(scrattch)
 3 | options(stringsAsFactors = F)
 4 | 
 5 | fdir <- "//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20170405/"
 6 | 
 7 | vipr2_genes <- c("Vipr2")
 8 | vipr2_markers <- group_violin_plot(data_source = fdir, 
 9 |                    group_by = "dendcluster", 
10 |                    clusters = 1:112,
11 |                    logscale = FALSE,
12 |                    genes = vipr2_genes,
13 |                    labelheight = 50,
14 |                    showcounts = FALSE,
15 |                    fontsize = 10)
16 | 
17 | vipr2_markers
18 | 
19 | ggsave("vipr2_marker_violins.pdf", vipr2_markers, width = 16, height = 2, useDingbats = F)
20 | 
21 | vipr2_boxplot <- group_box_plot(data_source = fdir, 
22 |                                    group_by = "dendcluster", 
23 |                                    clusters = 1:112,
24 |                                    logscale = FALSE,
25 |                                    genes = vipr2_genes,
26 |                                    labelheight = 50,
27 |                                    showcounts = FALSE,
28 |                                    fontsize = 12)
29 | 
30 | vipr2_boxplot
31 | 
32 | ggsave("vipr2_boxplot.pdf", vipr2_boxplot, width = 16, height = 2, useDingbats = F)
33 | 


--------------------------------------------------------------------------------
/Figures/Ext_Data_Fig_16/river_plots.R:
--------------------------------------------------------------------------------
  1 | library(dplyr)
  2 | library(ggplot2)
  3 | library(feather)
  4 | options(stringsAsFactors = F)
  5 | source("sankey_functions.R")
  6 | 
  7 | 
  8 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20170405/anno.feather")
  9 | 
 10 | anno <- anno %>%
 11 |   filter(genotype_label == "Vipr2-IRES2-Cre/wt;Ai14(RCL-tdT)/wt")
 12 | 
 13 | # make_plot_nodes converts group_nodes to rectangles for plotting with geom_rect()
 14 | nodes <- make_plot_nodes(make_group_nodes(anno, c("genotype","cluster")))
 15 | 
 16 | left_nodes <- nodes %>%
 17 |   filter(group == "genotype") %>%
 18 |   mutate(name = paste0(name, " (n = ",n,")"))
 19 | right_nodes <- nodes %>%
 20 |   filter(group == "cluster") %>%
 21 |   mutate(name = paste0(name, " (n = ",n,")"))
 22 | 
 23 | 
 24 | river <- build_river_plot(anno, 
 25 |                           c("genotype","cluster"), 
 26 |                           fill_group = "cluster") +
 27 |   geom_text(data = right_nodes,
 28 |             aes(x = xmax + 0.01,
 29 |                 y = (ymin + ymax)/2,
 30 |                 label = name,
 31 |                 color = color),
 32 |             hjust = 0) +
 33 |   geom_text(data = left_nodes,
 34 |             aes(x = xmin - 0.01,
 35 |                 y = (ymin + ymax)/2,
 36 |                 label = name,
 37 |                 color = color),
 38 |             hjust = 1) +
 39 |   scale_color_identity()
 40 | 
 41 | river
 42 | 
 43 | ggsave("vipr2_ai14_river.pdf", river, height = 5, width = 5)
 44 | 
 45 | ## Simulations for placeholders
 46 | 
 47 | anno <- anno %>%
 48 |   filter(cluster_id %in% c(1,29:47))
 49 | 
 50 | nodes <- make_plot_nodes(make_group_nodes(anno, c("genotype","cluster")))
 51 | 
 52 | left_nodes <- nodes %>%
 53 |   filter(group == "genotype") %>%
 54 |   mutate(name = paste0(name, " (n = ",n,")"))
 55 | right_nodes <- nodes %>%
 56 |   filter(group == "cluster") %>%
 57 |   mutate(name = paste0(name, " (n = ",n,")"))
 58 | 
 59 | 
 60 | river <- build_river_plot(anno, 
 61 |                           c("genotype","cluster"), 
 62 |                           fill_group = "cluster") +
 63 |   geom_text(data = right_nodes,
 64 |             aes(x = xmax + 0.01,
 65 |                 y = (ymin + ymax)/2,
 66 |                 label = name,
 67 |                 color = color),
 68 |             hjust = 0) +
 69 |   geom_text(data = left_nodes,
 70 |             aes(x = xmin - 0.01,
 71 |                 y = (ymin + ymax)/2,
 72 |                 label = name,
 73 |                 color = color),
 74 |             hjust = 1) +
 75 |   scale_color_identity()
 76 | 
 77 | river
 78 | 
 79 | ggsave("SIMULATION_vipr2_slc32a1_river.pdf", river, height = 5, width = 5)
 80 | 
 81 | 
 82 | anno <- anno %>%
 83 |   filter(cluster_id %in% c(47))
 84 | 
 85 | nodes <- make_plot_nodes(make_group_nodes(anno, c("genotype","cluster")))
 86 | 
 87 | left_nodes <- nodes %>%
 88 |   filter(group == "genotype") %>%
 89 |   mutate(name = paste0(name, " (n = ",n,")"))
 90 | right_nodes <- nodes %>%
 91 |   filter(group == "cluster") %>%
 92 |   mutate(name = paste0(name, " (n = ",n,")"))
 93 | 
 94 | 
 95 | river <- build_river_plot(anno, 
 96 |                           c("genotype","cluster"), 
 97 |                           fill_group = "cluster") +
 98 |   geom_text(data = right_nodes,
 99 |             aes(x = xmax + 0.01,
100 |                 y = (ymin + ymax)/2,
101 |                 label = name,
102 |                 color = color),
103 |             hjust = 0) +
104 |   geom_text(data = left_nodes,
105 |             aes(x = xmin - 0.01,
106 |                 y = (ymin + ymax)/2,
107 |                 label = name,
108 |                 color = color),
109 |             hjust = 1) +
110 |   scale_color_identity()
111 | 
112 | river
113 | 
114 | ggsave("SIMULATION_vipr2_pvalb_river.pdf", river, height = 5, width = 5)
115 | 


--------------------------------------------------------------------------------
/Figures/Ext_Data_Fig_18/cluster_violin_plot.R:
--------------------------------------------------------------------------------
 1 | library(dplyr)
 2 | library(ggplot2)
 3 | library(feather)
 4 | library(scrattch)
 5 | options(stringsAsFactors = F)
 6 | 
 7 | genes <- split_cst("Rspo1, Scnn1a, Hsd11b1, Chrna6, Slc17a8, Batf3, Colq, Fam84b, Osr1, Foxp2, Slc17a7")
 8 | 
 9 | p <- group_violin_plot(data_source = "//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20170913/",
10 |                   genes = genes,
11 |                   group_by = "dendcluster",
12 |                   clusters = 1:128,
13 |                   labelheight = 10)
14 | 
15 | ggsave("marker_violins.pdf", width = 10, height = 3, useDingbats = F)
16 | 
17 | 


--------------------------------------------------------------------------------
/Figures/Ext_Data_Fig_18/coexpression_functions.R:
--------------------------------------------------------------------------------
  1 | condition_to_summarise_call <- function(pos_condition, neg_condition, fpkm_cutoff) {
  2 |   call <- paste0("sum(")
  3 |   pos_condition_comparisons <- paste(paste0(pos_condition," > ",fpkm_cutoff), collapse = " & ")
  4 |   if(!is.null(neg_condition)) {
  5 |     neg_condition_comparisons <- paste(paste0(neg_condition," > ",fpkm_cutoff), collapse = " & ")
  6 |     call <- paste0(call, pos_condition_comparisons, " & !(", neg_condition_comparisons,")")
  7 |   } else {
  8 |     call <- paste0(call, pos_condition_comparisons)
  9 |   }
 10 |   call <- paste0(call,")/n()")
 11 |   call
 12 | }
 13 | 
 14 | 
 15 | coexpression_barplot <- function(condition_1, condition_2, condition_3,
 16 |                                  frac_anno, fpkm_cutoff = 1, group_by = "dendcluster",
 17 |                                  groups = 1:128,
 18 |                                  anno, keep_frac = frac_anno$frac_label, data) {
 19 |   
 20 |   
 21 |   group_id <- paste0(group_by,"_id")
 22 |   group_label <- paste0(group_by,"_label")
 23 |   
 24 |   group_filter <- paste0(group_id," %in% c(",paste0(groups,collapse = ","),")")
 25 |   
 26 |   anno <- anno %>%
 27 |     filter_(group_filter)
 28 |   
 29 |   # recompute filtered positions
 30 |   group_order <- data.frame(group = groups) %>%
 31 |     mutate(group_order = 1:n())
 32 |   names(group_order)[1] <- group_id
 33 |   
 34 |   group_anno <- anno %>%
 35 |     ungroup() %>%
 36 |     select(one_of(paste0(group_by,c("_id","_label","_color")))) %>%
 37 |     unique() %>%
 38 |     left_join(group_order) %>%
 39 |     arrange(group_order) %>%
 40 |     mutate(group_xpos = 1:n())
 41 |   
 42 |   anno <- left_join(anno,group_anno)
 43 |   
 44 |   # pull data for all genes used in conditions
 45 |   
 46 |   cond_data <- data[,c("sample_id",condition_1,condition_2,condition_3)]
 47 |   
 48 |   # join gene values to the annotations
 49 |   anno_data <- anno %>%
 50 |     left_join(cond_data, by = "sample_id")
 51 |   
 52 |   # for each group, calculate the fraction of cells that match the conditions
 53 |   
 54 |   if(!is.null(condition_3)) {
 55 |     frac_data <- anno_data %>%
 56 |       group_by(group_xpos) %>%
 57 |       summarise_(frac_1 = condition_to_summarise_call(condition_1, c(condition_2, condition_3), fpkm_cutoff),
 58 |                  frac_2 = condition_to_summarise_call(condition_2, c(condition_1, condition_3), fpkm_cutoff),
 59 |                  frac_3 = condition_to_summarise_call(condition_3, c(condition_1, condition_2), fpkm_cutoff),
 60 |                  frac_12 = condition_to_summarise_call(c(condition_1, condition_2), condition_3, fpkm_cutoff),
 61 |                  frac_23 = condition_to_summarise_call(c(condition_2, condition_3), condition_1, fpkm_cutoff),
 62 |                  frac_13 = condition_to_summarise_call(c(condition_1, condition_3), condition_2, fpkm_cutoff),
 63 |                  frac_123 = condition_to_summarise_call(c(condition_1, condition_2, condition_3), NULL, fpkm_cutoff))
 64 |   } else {
 65 |     frac_data <- anno_data %>%
 66 |       group_by(group_xpos) %>%
 67 |       summarise_(frac_1 = condition_to_summarise_call(condition_1, condition_2, fpkm_cutoff),
 68 |                  frac_2 = condition_to_summarise_call(condition_2, condition_1, fpkm_cutoff),
 69 |                  frac_12 = condition_to_summarise_call(c(condition_1, condition_2), NULL, fpkm_cutoff))
 70 |     
 71 |     frac_anno <- frac_anno %>%
 72 |       filter(!grepl("3",frac_label))
 73 |   }
 74 |   
 75 |   frac_anno <- frac_anno %>%
 76 |     filter(grepl(paste(paste0(keep_frac, "$"), collapse="|"), frac_label))
 77 |   
 78 |   plot_data <- melt(frac_data, "group_xpos") %>%
 79 |     filter(value > 0) %>%
 80 |     rename_("frac_label" = "variable") %>%
 81 |     left_join(frac_anno) %>%
 82 |     arrange(group_xpos, frac_id) %>%
 83 |     group_by(group_xpos) %>%
 84 |     mutate(xmin = group_xpos - 0.4,
 85 |            xmax = group_xpos + 0.4) %>%
 86 |     mutate(csum = cumsum(value),
 87 |            ymin = lag(csum, default = 0),
 88 |            ymax = csum)
 89 |   
 90 |   ggplot() + 
 91 |     geom_rect(data = plot_data,
 92 |               aes(xmin = xmin, xmax = xmax,
 93 |                   ymin = ymin, ymax = ymax,
 94 |                   fill = frac_color)) +
 95 |     scale_fill_identity(breaks = frac_anno$frac_color,
 96 |                         labels = frac_anno$frac_label,
 97 |                         guide = "legend") +
 98 |     scale_x_continuous(breaks = group_anno$group_xpos, 
 99 |                        labels = group_anno[[group_label]],
100 |                        limits = c(0.5, max(group_anno$group_xpos) + 0.5)) +
101 |     scale_y_continuous("Fraction of Cells in Cluster", expand = c(0,0)) +
102 |     theme_classic() +
103 |     theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.3, colour=group_anno$cluster_color))
104 | }
105 | 


--------------------------------------------------------------------------------
/Figures/Ext_Data_Fig_18/color_functions.R:
--------------------------------------------------------------------------------
 1 | hsv_palette <- function(n_colors, 
 2 |                         hue_start = 0, 
 3 |                         hue_end = max(1, n-1)/n,
 4 |                         sat_start = 0.55,
 5 |                         sat_end = 1,
 6 |                         sat_steps = 4,
 7 |                         val_start = 1,
 8 |                         val_end = 0.8,
 9 |                         val_steps = 3) {
10 |   sats <- rep_len(seq(sat_start, sat_end, length.out = sat_steps),length.out = n_colors)
11 |   vals <- rep_len(seq(val_start, val_end, length.out = val_steps),length.out = n_colors)
12 |   if(hue_end < hue_start) {
13 |     rev(sub("FF$","",rainbow(n_colors, s = sats, v = vals, start = hue_end, end = hue_start)))
14 |   } else {
15 |     sub("FF$","",rainbow(n_colors, s = sats, v = vals, start = hue_start, end = hue_end))
16 |   }
17 | }
18 | 
19 | color_mean <- function(x) {
20 |   library(grDevices)
21 |   
22 |   rgb_x <- col2rgb(x)
23 |   rgb_mean <- rowMeans(rgb_x)
24 |   new_hex <- rgb(rgb_mean["red"]/255,
25 |                  rgb_mean["green"]/255,
26 |                  rgb_mean["blue"]/255)
27 |   
28 |   new_hex
29 |   
30 | }
31 | 
32 | check_s <- function(x, min_sat = 0) {
33 |   library(grDevices)
34 |   
35 |   hsv_x <- rgb2hsv(col2rgb(x))
36 |   
37 |   hsv_x["s",] < min_sat
38 |   
39 | }
40 | 
41 | check_v <- function(x, min_val = 0) {
42 |   library(grDevices)
43 |   
44 |   hsv_x <- rgb2hsv(col2rgb(x))
45 |   
46 |   hsv_x["v",] < min_val
47 |   
48 | }
49 | 
50 | adjust_s <- function(x, shift_sat = 0.3) {
51 |   library(grDevices)
52 |   
53 |   hsv_x <- rgb2hsv(col2rgb(x))
54 |   
55 |   hsv_x["s", ] <- hsv_x["s", ] + shift_sat
56 |   if(hsv_x["s", ] > 1) {
57 |     hsv_x["s", ] <- 1
58 |   } else if (hsv_x["s",] <- 0) {
59 |     hsv_x["s", ] <- 0
60 |   }
61 |   
62 |   hsv(hsv_x[1,], hsv_x[2,], hsv_x[3,])
63 | }
64 | 
65 | adjust_v <- function(x, shift_val = 0.3) {
66 |   library(grDevices)
67 |   
68 |   hsv_x <- rgb2hsv(col2rgb(x))
69 |   
70 |   hsv_x["v", ] <- hsv_x["v", ] + shift_val
71 |   if(hsv_x["v", ] > 1) {
72 |     hsv_x["v", ] <- 1
73 |   } else if (hsv_x["v",] <- 0) {
74 |     hsv_x["v", ] <- 0
75 |   }
76 |   
77 |   hsv(hsv_x[1,], hsv_x[2,], hsv_x[3,])
78 | }
79 | 


--------------------------------------------------------------------------------
/Figures/Ext_Data_Fig_18/prune_leaf_custom.R:
--------------------------------------------------------------------------------
  1 | prune.dendrogram <- function(dend, leaves, reindex_dend = TRUE, ...) {
  2 |   leaves <- as.character(leaves)
  3 |   
  4 |   for(i in seq_along(leaves))
  5 |   {
  6 |     # this function is probably not the fastest - but it works...
  7 |     dend <- prune_leaf(dend, leaves[i])	# move step by stem to remove all of these leaves...
  8 |   }
  9 |   
 10 |   if(reindex_dend) dend <- reindex_dend(dend)
 11 |   
 12 |   return(dend)
 13 | }
 14 | 
 15 | stats_midcache.dendrogram <- function (x, type = "hclust", quiet = FALSE) 
 16 | {
 17 |   type <- match.arg(type)
 18 |   stopifnot(inherits(x, "dendrogram"))
 19 |   setmid <- function(d, type) {
 20 |     if (is.leaf(d)) 
 21 |       return(d)
 22 |     k <- length(d)
 23 |     if (k < 1) 
 24 |       stop("dendrogram node with non-positive #{branches}")
 25 |     r <- d
 26 |     midS <- 0
 27 |     for (j in 1L:k) {
 28 |       r[[j]] <- unclass(setmid(d[[j]], type))
 29 |       midS <- midS + .midDend(r[[j]])
 30 |     }
 31 |     if (!quiet && type == "hclust" && k != 2) 
 32 |       warning("midcache() of non-binary dendrograms only partly implemented")
 33 |     attr(r, "midpoint") <- (.memberDend(d[[1L]]) + midS)/2
 34 |     r
 35 |   }
 36 |   setmid(x, type = type)
 37 | }
 38 | 
 39 | stats_.midDend <- function (x) {
 40 |   if (is.null(mp <- attr(x, "midpoint"))) 0 else mp
 41 | }
 42 | .midDend <- stats_.midDend # copied so that they would work inside the various functions here...
 43 | 
 44 | stats_.memberDend <- function (x) 
 45 | {
 46 |   r <- attr(x, "x.member")
 47 |   if (is.null(r)) {
 48 |     r <- attr(x, "members")
 49 |     if (is.null(r)) 
 50 |       r <- 1L
 51 |   }
 52 |   r
 53 | }
 54 | .memberDend <- stats_.memberDend
 55 | 
 56 | 
 57 | prune_leaf <- function(dend, leaf_name,...)
 58 | {
 59 |   labels_dend <- labels(dend)
 60 |   
 61 |   if(length(labels_dend) != length(unique(labels_dend)))	warning("Found dubplicate labels in the tree (this might indicate a problem in the tree you supplied)")
 62 |   
 63 |   if(!(leaf_name %in% labels_dend)) {	# what to do if there is no such leaf inside the tree
 64 |     warning(paste("There is no leaf with the label", leaf_name , "in the tree you supplied", "\n" , "Returning original tree", "\n" ))
 65 |     return(dend)
 66 |   }
 67 |   
 68 |   if(sum(labels_dend %in% leaf_name) > 1) {	# what to do if there is no such leaf inside the tree      
 69 |     warning(paste("There are multiple leaves by the name of '", leaf_name , "' in the tree you supplied.  Their locations is:",
 70 |                   paste(which(labels_dend %in% leaf_name), collapse = ","),"\n" , "Returning original tree", "\n" ))
 71 |     return(dend)
 72 |   }
 73 |   
 74 |   is.father.of.leaf.to.remove <- function(dend, leaf_name)
 75 |   {
 76 |     # this function checks if the leaf we wish to remove is the direct child of the current branch (dend) we entered the function
 77 |     is.father <- FALSE
 78 |     for(i in seq_len(length(dend)))
 79 |     {
 80 |       if(is.leaf(dend[[i]]) == TRUE  &&  labels(dend[[i]]) == leaf_name) is.father <- TRUE
 81 |     }
 82 |     return(is.father)
 83 |   }
 84 |   
 85 |   
 86 |   remove_leaf_if_child <- function(dend, leaf_name)
 87 |   {
 88 |     # print(labels(dend))
 89 |     if(all(labels(dend) != leaf_name))
 90 |     {	# if the leaf we want to remove is not in this branch, simply return the branch without going deeper intoit.
 91 |       return(dend)
 92 |     } else {	# but if the leaf we want to remove is here somewhere, go on searching
 93 |       attr(dend, "members") <- attr(dend, "members") - 1 
 94 |       
 95 |       if(!is.father.of.leaf.to.remove(dend, leaf_name))	# if you are not the father, then go on and make this function work on each child
 96 |       {
 97 |         for(i in seq_len(length(dend)))
 98 |         {
 99 |           dend[[i]] <- remove_leaf_if_child(dend[[i]], leaf_name)
100 |         }
101 |       } else { # we'll merge 
102 |         if(length(dend) == 2) {
103 |           leaf_location <- 1 
104 |           # if leaf location is 1, then move branch in leaf 2 to be the new x
105 |           if(is.leaf(dend[[leaf_location]]) == T  &&  labels(dend[[leaf_location]]) == leaf_name) {
106 | 
107 |             branch_to_bumpup <- 2
108 |             dend <- dend[[branch_to_bumpup]]
109 |           } else { # else - the leaf location must be located in position "2"
110 | 
111 |             branch_to_bumpup <- 1
112 |             dend <- dend[[branch_to_bumpup]]
113 |           }
114 |         } else if(length(dend) > 2) {
115 |           # If more than 2 branches, check if any are leaves
116 |           dend_leaves <- unlist(lapply(dend, is.leaf))
117 |           if(sum(dend_leaves) > 0) {
118 |             # If so, check for matching labels to the leaf to prune
119 |             dend_labels <- unlist(lapply(dend, function(x) attr(x, "label")))
120 |             dend_matches <- dend_labels == leaf_name
121 |             # Return a list containing the non-matching branches
122 |             dend <- dend[!(dend_leaves & dend_matches)]
123 |           }
124 |         }
125 |       }
126 |     }
127 |     return(dend)
128 |   }
129 |   
130 |   
131 |   new_dend <- remove_leaf_if_child(dend, leaf_name)
132 |   new_dend <- suppressWarnings(stats_midcache.dendrogram(new_dend)) # fixes the attributes
133 |   #   new_x <- fix_members_attr.dendrogram(new_x) # fix the number of memebers attr for each node
134 |   return(new_dend)
135 | }


--------------------------------------------------------------------------------
/Figures/Ext_Data_Fig_18/unified_layer_distributions_and_markers.R:
--------------------------------------------------------------------------------
 1 | library(dplyr)
 2 | library(ggplot2)
 3 | library(cowplot)
 4 | library(feather)
 5 | library(dendextend)
 6 | library(scrattch.vis)
 7 | library(scrattch.io)
 8 | options(stringsAsFactors = F)
 9 | 
10 | source("color_functions.R")
11 | source("prune_leaf_custom.R")
12 | source("layer_and_violin_functions.R")
13 | 
14 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather")
15 | 
16 | 
17 | plot_anno <- data.frame(plot_id = c(1:4),
18 |                         cluster_id = c(35,35,46,46),
19 |                         region_id = c(1,2,1,2),
20 |                         plot_color = rainbow(4),
21 |                         plot_label = c("ALM Calb2 Necab1","VISp Calb2 Necab1",
22 |                                        "ALM Esm1","VISp Esm1"))
23 | 
24 | sub_anno <- anno %>%
25 |   filter(cluster_id %in% c(35,46)) %>%
26 |   left_join(plot_anno) %>%
27 |   mutate(dendcluster_id = plot_id) %>%
28 |   mutate(cluster_color = plot_color)
29 | 
30 | 
31 | layer_jitters <- build_layer_plot(sub_anno,
32 |                               dendcluster_ids = 1:4)
33 | 
34 | ## Violin plots
35 | 
36 | fdir <- "//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/"
37 | 
38 | sst_pvalb_genes <- unique(split_cst("Sst Crh Calb2"))
39 | 
40 | data <- feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/data.feather")
41 | gene_data <- data[,c("sample_id",sst_pvalb_genes)] %>%
42 |   filter(sample_id %in% sub_anno$sample_id) %>%
43 |   left_join(sub_anno) %>%
44 |   mutate(xpos = plot_id)
45 | 
46 | sst_pvalb_markers <- group_violin_plot2(data = gene_data,
47 |                                        group_by = "plot", 
48 |                                        clusters = 1:4,
49 |                                        genes = sst_pvalb_genes,
50 |                                        logscale = TRUE,
51 |                                        labelheight = 2,
52 |                                        max_width = 10,
53 |                                        fontsize = 5,
54 |                                        showcounts = FALSE)
55 | 
56 | all_plots <- plot_grid(layer_jitters,
57 |                        sst_pvalb_markers,
58 |                        align = "v",
59 |                        nrow = 2,
60 |                        rel_widths = 1,
61 |                        rel_heights = 1)
62 | 
63 | save_plot("layers_violins.pdf",
64 |           all_plots,
65 |           ncol = 2,
66 |           nrow = 2,
67 |           base_width = 1.5/2,
68 |           base_height = 4/2)
69 | 
70 | count_summary <- sub_anno %>%
71 |   group_by(plot_id) %>%
72 |   summarise(n_cells = n())
73 | 
74 | max_summary <- data.frame(sst_max = max(gene_data$Sst),
75 |                           crh_max = max(gene_data$Crh),
76 |                           calb2_max = max(gene_data$Calb2))
77 | 


--------------------------------------------------------------------------------
/Figures/Ext_Data_Fig_20/dend_plots.R:
--------------------------------------------------------------------------------
 1 | library(dendextend)
 2 | library(dplyr)
 3 | library(feather)
 4 | options(stringsAsFactors = F)
 5 | 
 6 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/dend.collapse.rda")
 7 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/dend.bak.rda")
 8 | 
 9 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather")
10 | 
11 | cl_anno <- anno %>%
12 |   select(cl, cluster_id, cluster_label, cluster_color) %>%
13 |   unique()
14 | 
15 | cl_to_cluster <- cl_anno %>%
16 |   select(cl, cluster_id)
17 | 
18 | format_dend <- function(dend, cl_anno, cl_to_cluster) {
19 |   # Rotate so that Glutamatergic samples are first:
20 |   original_labels <- as.numeric(labels(dend))
21 |   gaba_labels <- cl_to_cluster$cl[cl_to_cluster$cluster_id %in% 1:60]
22 |   gluta_labels <- cl_to_cluster$cl[cl_to_cluster$cluster_id %in% 61:115]
23 |   nn_labels <- cl_to_cluster$cl[cl_to_cluster$cluster_id %in% 116:133]
24 |   
25 |   dend <- dend %>%
26 |     rotate(as.character(c(gluta_labels, gaba_labels, nn_labels)))
27 |   
28 |   labels_colors(dend) <- cl_anno$cluster_color[match(labels(dend), cl_anno$cl)]
29 |   labels(dend) <- as.character(cl_anno$cluster_label[match(as.numeric(labels(dend)), cl_anno$cl)])
30 |   
31 |   labelDend <- function(dend,n=1)
32 |   {
33 |     if(is.null(attr(dend,"label"))){
34 |       attr(dend, "label") =paste0("n",n)
35 |       n= n +1
36 |     }
37 |     if(length(dend)>1){
38 |       for(i in 1:length(dend)){
39 |         tmp = labelDend(dend[[i]], n)
40 |         dend[[i]] = tmp[[1]]
41 |         n = tmp[[2]]
42 |       }
43 |     }
44 |     return(list(dend, n))
45 |   }
46 |   
47 |   
48 |   dend <- labelDend(dend)[[1]]
49 |   dend
50 | }
51 | 
52 | # Original, uncollapsed dendrogram
53 | dend_original <- format_dend(dend,
54 |                              cl_anno, cl_to_cluster)
55 | 
56 | pdf("dend_0.pdf",
57 |     width = 7.5, height = 4,
58 |     useDingbats = F)
59 | plot(dend_original)
60 | dev.off()
61 | 
62 | # version used for figures
63 | dend_0.4 <- readRDS("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/dend.RData")
64 | pdf("dend_0.4.pdf",
65 |     width = 7.5, height = 4,
66 |     useDingbats = F)
67 | plot(dend_0.4)
68 | dev.off()
69 | 
70 | # Collapsed further
71 | dend_0.8 <- format_dend(dend.collapse[[7]],
72 |                         cl_anno, cl_to_cluster)
73 | pdf("dend_0.8.pdf",
74 |     width = 7.5, height = 4,
75 |     useDingbats = F)
76 | plot(dend_0.8)
77 | dev.off()
78 | 
79 | # legend
80 | dend_dfs <- as.ggdend(dend)
81 | 
82 | library(scrattch.vis)
83 | 
84 | colorset <- c("#FFFFFF","#C5C5C5","#921C1C","#000000")
85 | 
86 | legend <- heatmap_legend_plot(minval = 0, maxval = 1,
87 |                     scale_name = "Bootstrapped Confidence",
88 |                     colorset = colorset)
89 | 
90 | ggsave("legend.pdf",
91 |        legend,
92 |        width = 1,
93 |        height = 1)
94 | 


--------------------------------------------------------------------------------
/Figures/Ext_Data_Fig_3/pairwise_cluster_heatmaps.R:
--------------------------------------------------------------------------------
  1 | library(ggplot2)
  2 | library(dplyr)
  3 | library(feather)
  4 | library(scrattch.vis)
  5 | library(reshape2)
  6 | library(pals)
  7 | options(stringsAsFactors = F)
  8 | 
  9 | dend <- readRDS("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/dend.RData")
 10 | 
 11 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather")
 12 | 
 13 | cl_anno <- anno %>%
 14 |   select(cl, dendcluster_id, cluster_id) %>%
 15 |   unique() %>%
 16 |   filter(cluster_id %in% 1:133) %>%
 17 |   arrange(dendcluster_id)
 18 | 
 19 | 
 20 | # Pairwise Pearson Correlation
 21 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/cl.cor.rda")
 22 | cl.cor2 <- cl.cor[as.character(cl_anno$cl),as.character(cl_anno$cl)]
 23 | rownames(cl.cor2) <- cl_anno$dendcluster_id
 24 | colnames(cl.cor2) <- cl_anno$dendcluster_id
 25 | 
 26 | #colorset <- sub("FF$","",plasma(20))
 27 | colorset <- jet(20)
 28 | 
 29 | cl.cor3 <- melt(cl.cor2) %>%
 30 |   mutate(color = values_to_colors(value, colorset = colorset))
 31 | 
 32 | cl.cor_plot <- ggplot() +
 33 |   geom_tile(data = cl.cor3,
 34 |             aes(x = Var1,
 35 |                 y = Var2,
 36 |                 fill = color)) +
 37 |   scale_fill_identity() +
 38 |   scale_y_reverse() +
 39 |   theme_void()
 40 | 
 41 | ggsave("cl.cor_plot.pdf",
 42 |        cl.cor_plot,
 43 |        width = 2.25,
 44 |        height = 2.25)
 45 | 
 46 | # Pairwise DEGene expression
 47 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/de.summary.rda")
 48 | de.summary <- de.summary %>%
 49 |   mutate(cl1 = as.numeric(as.character(cl1)),
 50 |          cl2 = as.numeric(as.character(cl2)))
 51 | 
 52 | cl_anno1 <- cl_anno
 53 | names(cl_anno1) <- paste0(names(cl_anno), "1")
 54 | cl_anno2 <- cl_anno
 55 | names(cl_anno2) <- paste0(names(cl_anno), "2")
 56 | 
 57 | de.summary2 <- de.summary %>%
 58 |   left_join(cl_anno1) %>%
 59 |   left_join(cl_anno2) %>%
 60 |   mutate(color = values_to_colors(log10(de.num+1), colorset = colorset))
 61 | 
 62 | diag_tiles <- data.frame(pos = 1:125,
 63 |                          color = colorset[1])
 64 | 
 65 | de.summary_plot <- ggplot() +
 66 |   geom_tile(data = de.summary2,
 67 |             aes(x = dendcluster_id1,
 68 |                 y = dendcluster_id2,
 69 |                 fill = color)) +
 70 |   geom_tile(data = de.summary2,
 71 |             aes(x = dendcluster_id2,
 72 |                 y = dendcluster_id1,
 73 |                 fill = color)) +
 74 |   geom_tile(data = diag_tiles,
 75 |             aes(x = pos,
 76 |                 y = pos,
 77 |                 fill = color))+
 78 |   scale_fill_identity() +
 79 |   scale_y_reverse() +
 80 |   theme_void()
 81 | 
 82 | ggsave("de.summary_plot.pdf",
 83 |        de.summary_plot,
 84 |        width = 2.25,
 85 |        height = 2.25)
 86 | 
 87 | # Pairwise coclustering
 88 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/co.stats.rda")
 89 | 
 90 | cl.co.ratio <- co.stats$cl.co.ratio
 91 | 
 92 | cl.co.ratio2 <- cl.co.ratio[as.character(cl_anno$cl),as.character(cl_anno$cl)]
 93 | rownames(cl.co.ratio2) <- cl_anno$dendcluster_id
 94 | colnames(cl.co.ratio2) <- cl_anno$dendcluster_id
 95 | 
 96 | cl.co.ratio3 <- melt(cl.co.ratio2) %>%
 97 |   mutate(color = values_to_colors(value, colorset = colorset))
 98 | 
 99 | cl.co.ratio_plot <- ggplot() +
100 |   geom_tile(data = cl.co.ratio3,
101 |             aes(x = Var1,
102 |                 y = Var2,
103 |                 fill = color)) +
104 |   scale_fill_identity() +
105 |   scale_y_reverse() +
106 |   theme_void()
107 | 
108 | ggsave("cl.co.ratio_plot.pdf",
109 |        cl.co.ratio_plot,
110 |        width = 2.25,
111 |        height = 2.25)
112 | 
113 | legend_colors <- heatmap_legend_plot(0, 1, colorset = colorset) +
114 |   theme_void()
115 | 
116 | ggsave("legend_colors.pdf",
117 |        legend_colors,
118 |        width = 1, height = 0.5)
119 | 


--------------------------------------------------------------------------------
/Figures/Ext_Data_Fig_3/qc_jitter_plots.R:
--------------------------------------------------------------------------------
  1 | library(dplyr)
  2 | library(ggplot2)
  3 | library(ggbeeswarm)
  4 | 
  5 | summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
  6 |                       conf.interval=.95, .drop=TRUE, roundall = F) {
  7 |   require(dplyr)
  8 |   # This does the summary. For each group return a vector with
  9 |   # N, mean, and sd
 10 |   
 11 |   names(data)[names(data) == measurevar] <- "measurevar"
 12 |   
 13 |   datac <- data %>%
 14 |     select(one_of(groupvars,"measurevar")) %>%
 15 |     filter(ifelse(na.rm == T, !is.na(measurevar), T)) %>%
 16 |     mutate(measurevar = as.numeric(measurevar)) %>%
 17 |     group_by_(c(groupvars)) %>%
 18 |     summarise(N = n(),
 19 |               median = median(measurevar),
 20 |               mean = mean(measurevar),
 21 |               max = max(measurevar),
 22 |               sd = ifelse(N == 1, 0, sd(measurevar)),
 23 |               q25 = as.numeric(quantile(measurevar, 0.25)),
 24 |               q75 = as.numeric(quantile(measurevar, 0.75))) %>%
 25 |     mutate(se = sd/sqrt(N))
 26 |   #%>%
 27 |   #  mutate(ci =  se * qt(conf.interval/2 + 0.5, N-1))
 28 |   
 29 |   
 30 |   if(roundall) {
 31 |     roundcols <- c("median","mean","max","sd","q25","q75","se","ci")
 32 |     datac[roundcols] <- round(datac[roundcols],3)
 33 |   }
 34 |   
 35 |   # datac <- datac %>%
 36 |   #   mutate(xpos = 1:n())
 37 |   
 38 |   return(datac)
 39 | }
 40 | 
 41 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather")
 42 | 
 43 | anno <- anno %>%
 44 |   filter(cluster_id %in% 1:133)
 45 | 
 46 | # dendcluster_id is the annotation for cluster ordering based on the current, bootstrapped dendrogram
 47 | genes_stats <- summarySE(data = anno,
 48 |                          measurevar = "confusion_label",
 49 |                          groupvars = "dendcluster_id")
 50 | 
 51 | dendcluster_anno <- anno %>%
 52 |   select(dendcluster_id, dendcluster_label, dendcluster_color) %>%
 53 |   unique()
 54 | 
 55 | confusion_plot <- ggplot() +
 56 |   # geom_quasirandom from the ggbeeswarm package
 57 |   # makes violin-shaped jittered point plots
 58 |   geom_quasirandom(data = anno,
 59 |                    aes(x = dendcluster_id,
 60 |                        y = confusion_label),
 61 |                    color = "skyblue",
 62 |                    # Need to set position_jitter height = 0 to prevent
 63 |                    # jitter on the y-axis, which changes data representation
 64 |                    position = position_jitter(width = .3,height = 0),
 65 |                    size = 0.1) +
 66 |   # Errorbars built using genes_stats values
 67 |   geom_errorbar(data = genes_stats,
 68 |                 aes(x = dendcluster_id,
 69 |                     ymin = q25,
 70 |                     ymax = q75),
 71 |                 size = 0.2) +
 72 |   # Median points from genes_stats
 73 |   geom_point(data = genes_stats,
 74 |              aes(x = dendcluster_id,
 75 |                  y = median),
 76 |              color = "red",
 77 |              size = 0.5) +
 78 |   # Cluster labels as text objects
 79 |   geom_text(data = dendcluster_anno,
 80 |             aes(x = dendcluster_id,
 81 |                 y = -0.1,
 82 |                 label = dendcluster_label,
 83 |                 color = dendcluster_color),
 84 |             angle = 90,
 85 |             hjust = 1,
 86 |             vjust = 0.3,
 87 |             size = 2) +
 88 |   # Median values next to cluster labels, since there's space there.
 89 |   geom_text(data = genes_stats,
 90 |             aes(x = dendcluster_id,
 91 |                 y = 0,
 92 |                 label = round(median,2)),
 93 |             angle = 90,
 94 |             hjust = 1,
 95 |             vjust = 0.3,
 96 |             size = 2) +
 97 |   scale_color_identity() +
 98 |   # Expand the y scale so that the labels are visible
 99 |   scale_y_continuous("Confusion Score",
100 |                      limits = c(-0.5, 1.2), 
101 |                      breaks = seq(0, 1.2, 0.2)) +
102 |   # Remove X-axis title
103 |   scale_x_continuous("") +
104 |   theme_bw() +
105 |   # Theme tuning
106 |   theme(axis.text.x = element_blank(),
107 |         axis.ticks = element_blank(),
108 |         panel.border = element_blank(),
109 |         panel.grid.major.x = element_blank(),
110 |         panel.grid.minor.x = element_blank())
111 | 
112 | ggsave("confusion_plot.pdf", confusion_plot, width = 12, height = 5, useDingbats = F)
113 | 


--------------------------------------------------------------------------------
/Figures/Ext_Data_Fig_4/intermediate_fraction_plot.R:
--------------------------------------------------------------------------------
 1 | library(dplyr)
 2 | library(ggplot2)
 3 | options(stringsAsFactors = F)
 4 | 
 5 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather")
 6 | 
 7 | anno <- anno %>%
 8 |   filter(cluster_id %in% 1:133) %>%
 9 |   mutate(core_int_label = ifelse(core_int_label == "core",
10 |                                  "core",
11 |                                  "int"))
12 | 
13 | plot_data <- anno %>%
14 |   group_by(dendcluster_id, dendcluster_label, dendcluster_color) %>%
15 |   summarise(n_core = sum(core_int_label == "core"),
16 |             n_int = sum(core_int_label == "int"),
17 |             n_cells = n()) %>%
18 |   mutate(frac_core = n_core/n_cells,
19 |          frac_int = n_int/n_cells)
20 | 
21 | hlines <- data.frame(yintercept = seq(0,1,by=0.2))
22 | 
23 | ggplot() +
24 |   geom_rect(data = plot_data,
25 |             aes(xmin = dendcluster_id - 0.5,
26 |                 xmax = dendcluster_id + 0.5,
27 |                 ymin = 0,
28 |                 ymax = frac_core,
29 |                 fill = dendcluster_color)) +
30 |   geom_rect(data = plot_data,
31 |             aes(xmin = dendcluster_id - 0.5,
32 |                 xmax = dendcluster_id + 0.5,
33 |                 ymin = frac_core,
34 |                 ymax = 1,
35 |                 fill = "#000000")) +
36 |   geom_text(data = plot_data,
37 |             aes(x = dendcluster_id,
38 |                 y = -0.02,
39 |                 color = dendcluster_color,
40 |                 label = dendcluster_label),
41 |             angle = 90,
42 |             vjust = 0.3,
43 |             hjust = 1,
44 |             size = 2) +
45 |   geom_hline(data = hlines,
46 |              aes(yintercept = yintercept),
47 |              linetype = "dashed",
48 |              size = 0.1) +
49 |   scale_fill_identity() +
50 |   scale_color_identity() +
51 |   theme_void() +
52 |   scale_y_continuous(limits = c(-1, 1))
53 | 
54 | ggsave("intermediate_fraction.pdf", width = 7.5, height = 2)
55 | 


--------------------------------------------------------------------------------
/Figures/Ext_Data_Fig_5/all_markers_heatmap.R:
--------------------------------------------------------------------------------
  1 | library(dplyr)
  2 | library(scrattch)
  3 | library(ggplot2)
  4 | library(feather)
  5 | library(reshape2)
  6 | options(stringsAsFactors = F)
  7 | 
  8 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/cell.marker.dat.rda")
  9 | 
 10 | anno_file <- "//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather"
 11 | 
 12 | anno <- read_feather(anno_file)
 13 | 
 14 | cluster_anno <- anno %>%
 15 |   select(sample_id, dendcluster_id, dendcluster_color)
 16 | 
 17 | n_cells <- ncol(cell.marker.dat)
 18 | n_genes <- nrow(cell.marker.dat)
 19 | 
 20 | # remove combinatorial genes. ~ rows > 1110
 21 | #cell.marker.dat <- cell.marker.dat[c(966:1200,1:965),]
 22 | # shift pan markers. ~rows > 1040
 23 | 
 24 | gene_pos <- data.frame(gene = rownames(cell.marker.dat)) %>%
 25 |   mutate(ypos = n():1)
 26 | 
 27 | sample_pos <- data.frame(sample_id = colnames(cell.marker.dat)) %>%
 28 |   mutate(xpos = 1:n())
 29 | 
 30 | data_melt <- melt(cell.marker.dat)
 31 | names(data_melt)[1:2] <- c("gene","sample_id")
 32 | 
 33 | cluster_rects <- data.frame(sample_id = colnames(cell.marker.dat)) %>%
 34 |   mutate(xpos = 1:n()) %>%
 35 |   mutate(ymin = nrow(cell.marker.dat) + 5, ymax = nrow(cell.marker.dat) + 20) %>%
 36 |   left_join(cluster_anno)
 37 | 
 38 | plot_data <- data_melt %>%
 39 |   left_join(gene_pos) %>%
 40 |   left_join(sample_pos) %>%
 41 |   mutate(value = ifelse(is.na(value), 0, value)) %>%
 42 |   mutate(fill = values_to_colors(value, colorset = c("white","black")))
 43 | 
 44 | all_tile_plot <- ggplot() +
 45 |   geom_tile(data = plot_data,
 46 |             aes(x = xpos, y = ypos,
 47 |                 fill = fill)) +
 48 |   geom_rect(data = cluster_rects,
 49 |             aes(xmin = xpos - 0.5, xmax = xpos + 0.5,
 50 |                 ymin = ymin, ymax = ymax,
 51 |                 fill = dendcluster_color)) +
 52 |   scale_fill_identity() +
 53 |   scale_y_continuous(expand = c(0,0)) +
 54 |   scale_x_continuous(expand = c(0,0)) +
 55 |   theme_void()
 56 | 
 57 | ggsave("zy_100sampled_truth_bw_rect.png", all_tile_plot, height = (n_genes*3)/600 + 20/600 + 0.0365, width = n_cells/600 + 0.0365, dpi = 600)
 58 | 
 59 | # Rearrange to match dendrogram
 60 | cell.marker.dat2 <- cell.marker.dat[c(1:240,
 61 |                                      436:780,
 62 |                                      241:435,
 63 |                                      781:1000),]
 64 | 
 65 | 
 66 | gene_pos <- data.frame(gene = rownames(cell.marker.dat2)) %>%
 67 |   mutate(ypos = n():1)
 68 | 
 69 | sample_pos <- data.frame(sample_id = colnames(cell.marker.dat2)) %>%
 70 |   left_join(cluster_anno) %>%
 71 |   arrange(dendcluster_id) %>%
 72 |   mutate(xpos = 1:n())
 73 | 
 74 | cell.marker.dat2 <- cell.marker.dat2[,sample_pos$sample_id]
 75 | 
 76 | data_melt <- melt(cell.marker.dat2)
 77 | names(data_melt)[1:2] <- c("gene","sample_id")
 78 | 
 79 | cluster_rects <- data.frame(sample_id = colnames(cell.marker.dat2)) %>%
 80 |   mutate(xpos = 1:n()) %>%
 81 |   mutate(ymin = nrow(cell.marker.dat2) + 5, ymax = nrow(cell.marker.dat2) + 20) %>%
 82 |   left_join(cluster_anno)
 83 | 
 84 | plot_data <- data_melt %>%
 85 |   left_join(gene_pos) %>%
 86 |   left_join(sample_pos) %>%
 87 |   mutate(value = ifelse(is.na(value), 0, value)) %>%
 88 |   mutate(fill = values_to_colors(value, colorset = c("white","black")))
 89 | 
 90 | all_tile_plot <- ggplot() +
 91 |   geom_tile(data = plot_data,
 92 |             aes(x = xpos, y = ypos,
 93 |                 fill = fill)) +
 94 |   geom_rect(data = cluster_rects,
 95 |             aes(xmin = xpos - 0.5, xmax = xpos + 0.5,
 96 |                 ymin = ymin, ymax = ymax,
 97 |                 fill = dendcluster_color)) +
 98 |   scale_fill_identity() +
 99 |   scale_y_continuous(expand = c(0,0)) +
100 |   scale_x_continuous(expand = c(0,0)) +
101 |   theme_void()
102 | 
103 | ggsave("dend_order_zy_100sampled_truth_bw_rect.png", all_tile_plot, height = (n_genes*3)/600 + 20/600 + 0.0365, width = n_cells/600 + 0.0365, dpi = 600)
104 | 


--------------------------------------------------------------------------------
/Figures/Ext_Data_Fig_6/cluster_marker_heatmaps.R:
--------------------------------------------------------------------------------
 1 | library(dplyr)
 2 | library(ggplot2)
 3 | library(feather)
 4 | library(scrattch)
 5 | library(dendextend)
 6 | options(stringsAsFactors = F)
 7 | 
 8 | fdir <- "//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/"
 9 | 
10 | anno <- read_feather(file.path(fdir,"anno.feather"))
11 | 
12 | markers <- read.csv("markers_2018-05-25.csv")
13 | 
14 | Global.markers <- unique(unlist(markers$Global[markers$Global != ""]))
15 | Inh.markers <- unique(unlist(markers$Inh[markers$Inh != ""]))
16 | Ex.markers <- unique(unlist(markers$Ex[markers$Ex != ""]))
17 | 
18 | all_clusters <- anno %>%
19 |   filter(cluster_id %in% 1:133) %>%
20 |   arrange(dendcluster_id) %>%
21 |   select(dendcluster_id) %>%
22 |   unique() %>% unlist()
23 | 
24 | inh_clusters <- anno %>%
25 |   filter(class_label == "GABAergic") %>%
26 |   arrange(dendcluster_id) %>%
27 |   select(dendcluster_id) %>%
28 |   unique() %>% unlist()
29 | 
30 | exc_clusters <- anno %>%
31 |   filter(class_label == "Glutamatergic") %>%
32 |   arrange(dendcluster_id) %>%
33 |   select(dendcluster_id) %>%
34 |   unique() %>% unlist()
35 | 
36 | all_plot <- group_heatmap_plot(data_source = fdir,
37 |                    genes = Global.markers,
38 |                    group_by = "dendcluster",
39 |                    clusters = all_clusters,
40 |                    calculation = "trimmed_mean",
41 |                    labelheight = 13,
42 |                    showcounts = F)
43 | 
44 | inh_plot <- group_heatmap_plot(data_source = fdir,
45 |                                genes = Inh.markers,
46 |                                group_by = "dendcluster",
47 |                                clusters = inh_clusters,
48 |                                calculation = "trimmed_mean",
49 |                                labelheight = 13,
50 |                                showcounts = F)
51 | 
52 | exc_plot <- group_heatmap_plot(data_source = fdir,
53 |                                genes = Ex.markers,
54 |                                group_by = "dendcluster",
55 |                                clusters = exc_clusters,
56 |                                calculation = "trimmed_mean",
57 |                                labelheight = 13,
58 |                                showcounts = F)
59 | 
60 | ggsave("broad_glia_markers.pdf",all_plot,height = 8, width = 15)
61 | ggsave("inh_markers.pdf",inh_plot,height = 8, width = 7.5)
62 | ggsave("exc_markers.pdf",exc_plot,height = 10, width = 7.5)
63 | 


--------------------------------------------------------------------------------
/Figures/Ext_Data_Fig_6/markers.csv:
--------------------------------------------------------------------------------
 1 | Global,Ex,Inh
 2 | Fez1,Slc17a7,Gad2
 3 | Phyhipl,Rtn4rl2,Slc32a1
 4 | Aplp1,Slc30a3,Adarb2
 5 | Gnao1,Cux2,Nfix
 6 | Caly,Stard8,Nfib
 7 | Snap25,Otof,Cxcl14
 8 | Atp1a3,Rrad,Tnfaip8l3
 9 | Camk2b,Penk,Cplx3
10 | Syt1,Col23a1,Lamp5
11 | Gabrg2,Agmat,Pax6
12 | Fabp3,Emx2,Krt73
13 | Stmn2,Hpgd,Sncg
14 | Kif5c,Macc1,Prox1
15 | Slc17a7,Olfr78,Ndnf
16 | Nrn1,Tcap,Ntn1
17 | Neurod2,Rorb,Pde11a
18 | Sv2b,Fgf17,Pdlim5
19 | Satb2,Rspo1,Lsp1
20 | Tbr1,Whrn,Slc35d3
21 | Vsig2,Scnn1a,Cxcl5
22 | Slc32a1,Endou,Nkx2-1
23 | Gad2,Fezf2,Pdlim3
24 | Dlx1,Hsd11b1,Vip
25 | Dlx5,Batf3,Serpinf1
26 | Dlx2,Colq,Slc10a4
27 | Dlx6os1,Pld5,Cldn10
28 | Slc6a1,Wfdc18,C1ql1
29 | Sox2,Htr2c,Etv1
30 | Adamts19,Wfdc17,Ptprk
31 | Trp73,Aldh1a7,C1ql3
32 | Lhx5,Rxfp2,C1ql1
33 | Lhx1,Cpa6,Crabp1
34 | S100a13,Gkn1,Crispld2
35 | S100a16,Tgfb1,Slc17a8
36 | Cmtm5,Prss35,Cyb5r2
37 | Kcnj10,Ctsc,Nr1h4
38 | Gja1,Osr1,Prss12
39 | Gjb6,Oprk1,Igfbp6
40 | Aqp4,Cd52,Calb2
41 | Lcat,Col18a1,Gpc3
42 | Acsbg1,Car3,Pthlh
43 | Olig1,Ctxn3,Sostdc1
44 | Sox10,Ctgf,Mab21l1
45 | Neu4,Fam84b,Tpbg
46 | Gpr17,Chrna6,Slc18a3
47 | Sapcd2,Fn1,Rspo1
48 | Ccnb1,Erg,Lmo1
49 | Brca1,Tac1,Myl1
50 | Plp1,Pvalb,Rspo4
51 | Cldn11,Dppa1,Chat
52 | Mag,Bmp5,Cbln4
53 | Mog,Depdc7,Cbln4
54 | Mog,Stac,Gsx2
55 | Opalin,C1ql2,Itih5
56 | Gjb1,Ptgfr,Mybpc1
57 | Hapln2,Slco2a1,Lhx6
58 | Cyba,Lrrc9,Sox6
59 | Ctsh,Pappa2,Sst
60 | Ifitm3,Foxp2,Calb1
61 | S100a11,Sla2,Chodl
62 | Vtn,Slc17a8,Gabrg1
63 | Igf2,Trh,Lgr6
64 | Slc6a13,Syt6,Tacr3
65 | Dcn,H60b,Etv1
66 | Col1a1,Gpr139,Il1rapl2
67 | Slc13a3,Nxph4,Edn1
68 | H2-Q7,Mup5,Hpse
69 | Six2,Fam150a,Igsf9
70 | Cxcl9,Fbxl7,Myh8
71 | Slc22a6,F2r,Myh13
72 | Col15a1,Olfr111,Chrna2
73 | Slc38a11,Olfr110,Ptgdr
74 | Atp13a5,Serpinb11,Crygs
75 | Ace2,P2ry12,Crhr2
76 | Kcnj8,Kynu,Col6a1
77 | Acta2,Crh,Tacstd2
78 | Myh11,Hsd17b2,Th
79 | Slco1a4,Mup3,Nts
80 | Cldn5,Lhx5,Il7
81 | Ctss,Trp73,Bche
82 | C1qa,Reln,Tac1
83 | C1qb,Cdh13,Pvalb
84 | C1qc,Cpne7,Syt2
85 | F13a1,Marcksl1,Ntf3
86 | Mrc1,Alcam,Ostn
87 | Mpeg1,Efr3a,Gpr149
88 | Siglech,,Vipr2
89 | ,,Meis2
90 | ,,Adamts19
91 | ,,Reln
92 | ,,Nr2f2
93 | ,,Cck
94 | ,,Npy
95 | ,,Crh
96 | ,,Tac2
97 | 


--------------------------------------------------------------------------------
/Figures/Ext_Data_Fig_8/saturation_plot.R:
--------------------------------------------------------------------------------
  1 | library(dplyr)
  2 | library(ggplot2)
  3 | library(feather)
  4 | #library(scrattch)
  5 | 
  6 | color_mean <- function(x) {
  7 |   library(grDevices)
  8 |   
  9 |   rgb_x <- col2rgb(x)
 10 |   rgb_mean <- rowMeans(rgb_x)
 11 |   new_hex <- rgb(rgb_mean["red"]/255,
 12 |                  rgb_mean["green"]/255,
 13 |                  rgb_mean["blue"]/255)
 14 |   
 15 |   new_hex
 16 |   
 17 | }
 18 | 
 19 | ramp_colors <- function (x, minval = NULL, maxval = NULL, colorset = c("darkblue", 
 20 |                                                         "dodgerblue", "gray80", "orangered", "red")) 
 21 | {
 22 |   heat_colors <- colorRampPalette(colorset)(1001)
 23 |   if (is.null(maxval)) {
 24 |     maxval <- max(x)
 25 |   } else {
 26 |     x[x > maxval] <- maxval
 27 |   }
 28 |   if (is.null(minval)) {
 29 |     minval <- min(x)
 30 |   } else {
 31 |     x[x < minval] <- minval
 32 |   }
 33 |   heat_positions <- unlist(round((x - minval)/(maxval - minval) * 
 34 |                                    1000 + 1, 0))
 35 |   colors <- heat_colors[heat_positions]
 36 |   colors
 37 | }
 38 | 
 39 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/saturation.df.rda")
 40 | 
 41 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather")
 42 | 
 43 | anno <- anno %>%
 44 |   filter(dendcluster_id < 134)
 45 | 
 46 | sat_xpos <- saturation.df %>%
 47 |   group_by(sample_size) %>%
 48 |   summarise(n_cl = sum(merge.cl == "nochange") + length(unique(merge.cl)) - 1) %>%
 49 |   mutate(sample_size = as.numeric(as.character(sample_size))) %>%
 50 |   arrange(desc(sample_size)) %>%
 51 |   mutate(xpos = 1:n() + 1) %>%
 52 |   mutate(xmin = xpos - 0.3,
 53 |          xmax = xpos + 0.3)
 54 | 
 55 | dendcluster_ypos <- anno %>%
 56 |   group_by(cl, dendcluster_id, dendcluster_label, dendcluster_color) %>%
 57 |   summarise(cl_size = n()) %>%
 58 |   ungroup() %>%
 59 |   arrange(-dendcluster_id) %>%
 60 |   mutate(ypos = 1:n()) %>%
 61 |   mutate(ymin = ypos - 0.5,
 62 |          ymax = ypos + 0.5)
 63 | 
 64 | full_pos <- dendcluster_ypos %>%
 65 |   mutate(xpos = 1,
 66 |          xmin = xpos - 0.3,
 67 |          xmax = xpos + 0.3) %>%
 68 |   mutate(cl_size = ifelse(is.na(cl_size), 0, cl_size)) %>%
 69 |   mutate(fill = ifelse(is.na(cl_size), "#FFFFFF", ramp_colors(cl_size, 0, 20, colorset = c("orange","white","skyblue"))))
 70 | 
 71 | sat_pos <- saturation.df %>%
 72 |   mutate(sample_size = as.numeric(as.character(sample_size))) %>%
 73 |   mutate(org.cl = as.numeric(as.character(org.cl))) %>%
 74 |   left_join(dendcluster_ypos, by = c("org.cl" = "cl")) %>%
 75 |   left_join(sat_xpos, by = "sample_size") %>%
 76 |   mutate(dendcluster_color = ifelse(merge.cl == "absent",NA,dendcluster_color)) %>%
 77 |   mutate(cl_size.x = ifelse(is.na(cl_size.x), 0, cl_size.x)) %>%
 78 |   mutate(outline = ifelse(merge.cl %in% c("absent","nochange"), NA, "#000000"),
 79 |          fill = ifelse(is.na(cl_size.x), "#FFFFFF", ramp_colors(cl_size.x, 0, 20, colorset = c("orange","white","skyblue"))))
 80 | 
 81 | sat_unmerged <- sat_pos %>%
 82 |   filter(merge.cl %in% c("absent","nochange"))
 83 | 
 84 | sat_merged <- sat_pos %>%
 85 |   filter(!merge.cl %in% c("absent","nochange")) %>%
 86 |   group_by(sample_size, merge.cl) %>%
 87 |   mutate(dendcluster_color = color_mean(dendcluster_color))
 88 | 
 89 | sat_curves <- sat_merged %>%
 90 |   group_by(sample_size, merge.cl) %>%
 91 |   arrange(ypos) %>%
 92 |   mutate(x = xmax, xend = xmax,
 93 |          y = ypos, yend = lead(ypos)) %>%
 94 |   
 95 |   ungroup() %>%
 96 |   filter(complete.cases(.))
 97 | 
 98 | merge_plot <- ggplot() +
 99 |   geom_rect(data = full_pos,
100 |             aes(xmin = xmin, xmax = xmax,
101 |                 ymin = ymin, ymax = ymax,
102 |                 fill = fill,
103 |                 color = "white"),
104 |             size = 0.1) +
105 |   geom_text(data = full_pos,
106 |             aes(x = xmax, y = ymax,
107 |                 label = cl_size),
108 |             hjust = 0, vjust = 1,
109 |             size = 2) +
110 |   geom_text(data = dendcluster_ypos,
111 |             aes(x = 0.5, y = ypos,
112 |                 label = dendcluster_label,
113 |                 color = dendcluster_color),
114 |             hjust = 0, vjust = 0.3,
115 |             size = 2*5/6) +
116 |   geom_rect(data = sat_unmerged,
117 |             aes(xmin = xmin, xmax = xmax,
118 |                 ymin = ymin, ymax = ymax,
119 |                 fill = fill,
120 |                 color = "white"),
121 |             size = 0.1) +
122 |   geom_text(data = sat_unmerged,
123 |             aes(x = xmax, y = ymax,
124 |                 label = cl_size.x),
125 |             hjust = 0, vjust = 1,
126 |             size = 2*5/6) +
127 |   geom_rect(data = sat_merged,
128 |             aes(xmin = xmin + 0.2, xmax = xmax + 0.2,
129 |                 ymin = ymin, ymax = ymax,
130 |                 fill = fill,
131 |                 color = outline)) +
132 |   geom_text(data = sat_merged,
133 |             aes(x = xmax + 0.2, y = ymax,
134 |                 label = cl_size.x),
135 |             hjust = 0, vjust = 1,
136 |             size = 2*5/6) +
137 |   geom_curve(data = sat_curves,
138 |              aes(x = x + 0.2, xend = xend + 0.2,
139 |                  y = y, yend = yend),
140 |              curvature = -1) +
141 |   scale_fill_identity() +
142 |   scale_color_identity() +
143 |   scale_x_reverse(limits = c(12, 0)) +
144 |   theme_void()
145 | 
146 | merge_plot
147 | 
148 | ggsave("merge_plot_numbers.pdf",merge_plot,width = 7.5, height = 10)
149 | 
150 | cluster_counts <- saturation.df %>%
151 |   group_by(sample_size) %>%
152 |   summarise(n_clusters = length(unique(as.character(merge.cl))) - 1 + sum(merge.cl == "nochange"))
153 | 
154 | cluster_counts
155 | 


--------------------------------------------------------------------------------
/Figures/Ext_Data_Fig_9/build_dotplot.R:
--------------------------------------------------------------------------------
  1 | library(dplyr)
  2 | library(feather)
  3 | library(ggplot2)
  4 | options(stringsAsFactors = F)
  5 | 
  6 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather")
  7 | 
  8 | anno <- anno %>%
  9 |   filter(cluster_id %in% 1:133)
 10 | 
 11 | cluster_anno <- anno %>%
 12 |   select(cluster_id, cluster_label, cluster_color) %>%
 13 |   unique()
 14 | 
 15 | cre_anno <- anno %>%
 16 |   select(cre_id, cre_label, cre_color) %>%
 17 |   unique()
 18 | 
 19 | plot_anno <- anno %>%
 20 |   filter(inj_type_label == "No Injection") %>%
 21 |   filter(facs_label != "RFP-negative")
 22 | 
 23 | plot_data <- plot_anno %>%
 24 |   group_by(cre_id, cre_label, cre_color,
 25 |            cluster_id, cluster_label, cluster_color) %>%
 26 |   summarise(n_cells = n())
 27 | 
 28 | max_cre <- max(anno$cre_id)
 29 | max_cluster <- max(anno$cluster_id)
 30 | 
 31 | bg_rects <- data.frame(xmin = 1:(max_cre/2)*2 - 0.5,
 32 |                        xmax = (1:(max_cre/2) + 1)*2 - 1.5,
 33 |                        ymin = 0.5,
 34 |                        ymax = max_cluster + 0.5)
 35 | 
 36 | class_breaks <- anno %>%
 37 |   group_by(subclass_id) %>%
 38 |   summarise(yintercept = max(cluster_id) + 0.5)
 39 | 
 40 | ggplot() +
 41 |   geom_rect(data = bg_rects,
 42 |             aes(xmin = xmin, xmax = xmax,
 43 |                 ymin = ymin, ymax = ymax),
 44 |             fill = "#CAD7D7") +
 45 |   geom_hline(data = class_breaks,
 46 |              aes(yintercept = yintercept),
 47 |              size = 0.2) +
 48 |   geom_point(data = plot_data,
 49 |              aes(x = cre_id, 
 50 |                  y = cluster_id,
 51 |                  size = n_cells,
 52 |                  fill = cluster_color,
 53 |                  color = "#FFFFFF"),
 54 |              pch = 21) +
 55 |   geom_text(data = cluster_anno,
 56 |             aes(x = 1,
 57 |                 y = cluster_id,
 58 |                 label = cluster_label,
 59 |                 color = cluster_color),
 60 |             hjust = 1,
 61 |             vjust = 0.3,
 62 |             size = 2*5/6) +
 63 |   scale_color_identity() +
 64 |   scale_fill_identity() +
 65 |   scale_y_reverse("",
 66 |                   limits = c(133.5, -0.5),
 67 |                   expand = c(0,0)) +
 68 |   scale_x_continuous("",
 69 |                      expand = c(0,0),
 70 |                      limits = c(-5, 57),
 71 |                      breaks = cre_anno$cre_id,
 72 |                      labels = cre_anno$cre_label,
 73 |                      position = "top") +
 74 |   scale_size_area(max_size = 5,
 75 |                   breaks = c(1,10,50,100,200,500)) +
 76 |   theme_classic(base_size = 7) +
 77 |   theme(axis.text.y = element_blank(),
 78 |         axis.ticks = element_blank(),
 79 |         axis.line = element_blank(),
 80 |         axis.text.x = element_text(angle = 90,
 81 |                                    hjust = 0))
 82 | 
 83 | ggsave("cre_dotplot.pdf",width = 8.25, height = 10.5, useDingbats = F)
 84 | 
 85 | 
 86 | # Pan-broad_specific ordering
 87 | pan_cre <- c("Gad2-IRES-Cre","Snap25-IRES2-Cre","Slc32a1-IRES-Cre","Slc17a7-IRES2-Cre")
 88 | broad_cre <- c("Rbp4-Cre_KL100","Pvalb-IRES-Cre","Vip-IRES-Cre","Sst-IRES-Cre","Ctgf-T2A-dgCre","Htr3a-Cre_NO152","Trib2-F2A-CreERT2","Ntsr1-Cre_GN220","Tlx3-Cre_PL56","Ndnf-IRES2-dgCre","Rorb-IRES2-Cre")
 89 | 
 90 | plot_anno2 <- plot_anno %>%
 91 |   mutate(cre_class_id = ifelse(cre_label %in% pan_cre, 1, 
 92 |                                ifelse(cre_label %in% broad_cre,2,3)))
 93 | 
 94 | 
 95 | cre_anno2 <- plot_anno2 %>%
 96 |   select(cre_id, cre_label, cre_color, cre_class_id) %>%
 97 |   unique() %>%
 98 |   arrange(cre_class_id, cre_id) %>%
 99 |   mutate(new_cre_id = 2:(n()+1))
100 | 
101 | plot_anno2 <- plot_anno2 %>%
102 |   left_join(cre_anno2)
103 | 
104 | plot_data2 <- plot_anno2 %>%
105 |   group_by(new_cre_id, cre_label, cre_color,
106 |            cluster_id, cluster_label, cluster_color) %>%
107 |   summarise(n_cells = n())
108 | 
109 | ggplot() +
110 |   geom_rect(data = bg_rects,
111 |             aes(xmin = xmin, xmax = xmax,
112 |                 ymin = ymin, ymax = ymax),
113 |             fill = "#CAD7D7") +
114 |   geom_point(data = plot_data2,
115 |              aes(x = new_cre_id, 
116 |                  y = cluster_id,
117 |                  size = n_cells,
118 |                  color = cluster_color)) +
119 |   geom_text(data = cluster_anno,
120 |             aes(x = 1,
121 |                 y = cluster_id,
122 |                 label = cluster_label,
123 |                 color = cluster_color),
124 |             hjust = 1,
125 |             vjust = 0.3,
126 |             size = 2*5/6) +
127 |   geom_hline(data = class_breaks,
128 |              aes(yintercept = yintercept),
129 |              size = 0.2) +
130 |   scale_color_identity() +
131 |   scale_y_reverse("",
132 |                   limits = c(116.5, -0.5),
133 |                   expand = c(0,0)) +
134 |   scale_x_continuous("",
135 |                      expand = c(0,0),
136 |                      limits = c(-5, 37),
137 |                      breaks = cre_anno2$new_cre_id,
138 |                      labels = cre_anno2$cre_label,
139 |                      position = "top") +
140 |   scale_size_area(max_size = 3,
141 |                   breaks = c(10,50,100,200,500)) +
142 |   theme_classic(base_size = 7) +
143 |   theme(axis.text.y = element_blank(),
144 |         axis.ticks = element_blank(),
145 |         axis.line = element_blank(),
146 |         axis.text.x = element_text(angle = 90,
147 |                                    hjust = 0))
148 | 
149 | ggsave("cre_grouped_dotplot.pdf",width = 8.25, height = 10.5, useDingbats = F)
150 | 


--------------------------------------------------------------------------------
/Figures/Figure_2/class_comparison_colors_simplified.csv:
--------------------------------------------------------------------------------
 1 | class_label1,class_label2,class_type,class_color
 2 | Glutamatergic,Glutamatergic,both_gluta,#00A809
 3 | Glutamatergic,GABAergic,gluta_gaba,#B03FAB
 4 | GABAergic,Glutamatergic,gluta_gaba,#B03FAB
 5 | GABAergic,GABAergic,both_gaba,#DA1C5C
 6 | Glutamatergic,Endothelial,neu_non,#8B5E3C
 7 | Endothelial,Glutamatergic,neu_non,#8B5E3C
 8 | GABAergic,Endothelial,neu_non,#8B5E3C
 9 | Endothelial,GABAergic,neu_non,#8B5E3C
10 | Non-Neuronal,Glutamatergic,neu_non,#8B5E3C
11 | Glutamatergic,Non-Neuronal,neu_non,#8B5E3C
12 | Non-Neuronal,GABAergic,neu_non,#8B5E3C
13 | GABAergic,Non-Neuronal,neu_non,#8B5E3C
14 | Non-Neuronal,Non-Neuronal,non_non,#726658
15 | Non-Neuronal,Endothelial,non_non,#726658
16 | Endothelial,Non-Neuronal,non_non,#726658
17 | 


--------------------------------------------------------------------------------
/Figures/Figure_2/color_functions.R:
--------------------------------------------------------------------------------
 1 | hsv_palette <- function(n_colors,
 2 |                         hue_start = 0,
 3 |                         hue_end = max(1, n-1)/n,
 4 |                         sat_start = 0.55,
 5 |                         sat_end = 1,
 6 |                         sat_steps = 4,
 7 |                         val_start = 1,
 8 |                         val_end = 0.8,
 9 |                         val_steps = 3) {
10 |   sats <- rep_len(seq(sat_start, sat_end, length.out = sat_steps),length.out = n_colors)
11 |   vals <- rep_len(seq(val_start, val_end, length.out = val_steps),length.out = n_colors)
12 |   if(hue_end < hue_start) {
13 |     rev(sub("FF$","",rainbow(n_colors, s = sats, v = vals, start = hue_end, end = hue_start)))
14 |   } else {
15 |     sub("FF$","",rainbow(n_colors, s = sats, v = vals, start = hue_start, end = hue_end))
16 |   }
17 | }
18 | 
19 | color_mean <- function(x) {
20 |   library(grDevices)
21 | 
22 |   rgb_x <- col2rgb(x)
23 |   rgb_mean <- rowMeans(rgb_x)
24 |   new_hex <- rgb(rgb_mean["red"]/255,
25 |                  rgb_mean["green"]/255,
26 |                  rgb_mean["blue"]/255)
27 | 
28 |   new_hex
29 | 
30 | }
31 | 
32 | check_s <- function(x, min_sat = 0) {
33 |   library(grDevices)
34 | 
35 |   hsv_x <- rgb2hsv(col2rgb(x))
36 | 
37 |   hsv_x["s",] < min_sat
38 | 
39 | }
40 | 
41 | check_v <- function(x, min_val = 0) {
42 |   library(grDevices)
43 | 
44 |   hsv_x <- rgb2hsv(col2rgb(x))
45 | 
46 |   hsv_x["v",] < min_val
47 | 
48 | }
49 | 
50 | adjust_s <- function(x, shift_sat = 0.3) {
51 |   library(grDevices)
52 | 
53 |   hsv_x <- rgb2hsv(col2rgb(x))
54 | 
55 |   hsv_x["s", ] <- hsv_x["s", ] + shift_sat
56 |   if(hsv_x["s", ] > 1) {
57 |     hsv_x["s", ] <- 1
58 |   } else if (hsv_x["s",] < 0) {
59 |     hsv_x["s", ] <- 0
60 |   }
61 | 
62 |   hsv(hsv_x[1,], hsv_x[2,], hsv_x[3,])
63 | }
64 | 
65 | adjust_v <- function(x, shift_val = 0.3) {
66 |   library(grDevices)
67 | 
68 |   hsv_x <- rgb2hsv(col2rgb(x))
69 | 
70 |   hsv_x["v", ] <- hsv_x["v", ] + shift_val
71 |   if(hsv_x["v", ] > 1) {
72 |     hsv_x["v", ] <- 1
73 |   } else if (hsv_x["v",] < 0) {
74 |     hsv_x["v", ] <- 0
75 |   }
76 | 
77 |   hsv(hsv_x[1,], hsv_x[2,], hsv_x[3,])
78 | }
79 | 


--------------------------------------------------------------------------------
/Figures/Figure_2/region_marker_genes_panel.R:
--------------------------------------------------------------------------------
  1 | library(dplyr)
  2 | library(feather)
  3 | library(ggplot2)
  4 | library(ggrepel)
  5 | options(stringsAsFactors = F)
  6 | 
  7 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/V1.ALM.diff.gene.rda")
  8 | exc.gene.df <- gene.df %>%
  9 |   mutate(class = "exc")
 10 | 
 11 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/V1.ALM.diff.inh.gene.rda")
 12 | inh.gene.df <- gene.df %>%
 13 |   mutate(class = "inh")
 14 | 
 15 | # Find DE genes for Inhibitory types
 16 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/region.de.df.rda")
 17 | region.de.df[] <- lapply(region.de.df, function(x) ifelse(is.factor(x), as.character(x), x))
 18 | 
 19 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather")
 20 | 
 21 | gene.df <- rbind(exc.gene.df, inh.gene.df) %>%
 22 |   filter(!grepl("^LOC",gene),
 23 |          !grepl("Rik$",gene))
 24 | 
 25 | alm_exc_label <- gene.df %>%
 26 |   filter(class == "exc") %>%
 27 |   arrange(-prop.diff) %>%
 28 |   head(25) %>%
 29 |   mutate(color = ifelse(class == "exc", "#212021", "#BE1E2D"),
 30 |          fill = "#E5E1E5") 
 31 | 
 32 | visp_exc_label <- gene.df %>%
 33 |   filter(class == "exc") %>%
 34 |   arrange(prop.diff) %>%
 35 |   head(25) %>%
 36 |   mutate(color = ifelse(class == "exc", "#848EBC", "#BE1E2D"),
 37 |          fill = "#D0D7EF") 
 38 | 
 39 | alm_inh_label <- gene.df %>%
 40 |   filter(class == "inh") %>%
 41 |   arrange(-prop.diff) %>%
 42 |   head(10) %>%
 43 |   mutate(color = ifelse(class == "exc", "#212021", "#DD6091"),
 44 |          fill = "#E5E1E5") 
 45 | 
 46 | visp_inh_label <- gene.df %>%
 47 |   filter(class == "inh") %>%
 48 |   arrange(prop.diff) %>%
 49 |   head(10) %>%
 50 |   mutate(color = ifelse(class == "exc", "#848EBC", "#DD6091"),
 51 |          fill = "#D0D7EF") 
 52 | 
 53 | 
 54 | all_labels <- rbind(alm_exc_label, visp_exc_label, alm_inh_label, visp_inh_label)
 55 | #all_labels <- rbind(alm_label, visp_label, inh_label)
 56 | 
 57 | no_label <- gene.df %>%
 58 |   filter(!gene %in% all_labels$gene) %>%
 59 |   mutate(color = ifelse(class == "exc", "#808080", "#EA9BA1"))
 60 | 
 61 | 
 62 | marker_plot <- ggplot() +
 63 |   geom_point(data = no_label,
 64 |              aes(x = -prop.diff,
 65 |                  y = log10(prop.max),
 66 |                  color = color),
 67 |              #alpha = 1,
 68 |              size = 0.2) +
 69 |   geom_point(data = all_labels,
 70 |              aes(x = -prop.diff,
 71 |                  y = log10(prop.max),
 72 |                  color = color),
 73 |              size = 1)+
 74 |   geom_label_repel(data = all_labels,
 75 |                    aes(x = -prop.diff,
 76 |                        y = log10(prop.max),
 77 |                        label = gene,
 78 |                        fill = fill,
 79 |                        color = color),
 80 |                    size = 2,
 81 |                    min.segment.length = 0) +
 82 |   scale_color_identity() +
 83 |   scale_fill_identity() +
 84 |   scale_x_continuous(limits = c(-1.5, 1.2), 
 85 |                      breaks = seq(-1,1,by=0.25), 
 86 |                      labels = c(1,0.75,0.5,0.25,0,0.25,0.5,0.75,1)) +
 87 |   scale_y_continuous(limits = c(-3,0.5),
 88 |                      breaks = log10(c(0.001,0.01,0.1,0.25,0.5,0.75,1)), 
 89 |                      labels = c(0.001,0.01, 0.1, 0.25, 0.5, 0.75, 1)) +
 90 |   theme_bw(7) +
 91 |   theme(panel.grid.minor = element_blank())
 92 | 
 93 | marker_plot
 94 | 
 95 | ggsave("marker_plot.pdf",marker_plot, width = 7.5, height = 3.5, useDingbats = FALSE)
 96 | 
 97 | # One layer, with marginals
 98 | library(ggExtra)
 99 | all_points <- rbind(all_labels, no_label %>% mutate(fill = color))
100 | 
101 | margin_points <- all_points %>%
102 |   mutate(color = ifelse(class == "exc", "#808080", "#EA9BA1"))
103 | 
104 | marker_plot2 <- ggplot() +
105 |   geom_point(data = margin_points,
106 |              aes(x = -prop.diff,
107 |                  y = log10(prop.max),
108 |                  color = color),
109 |              size = 0.5) +
110 |   geom_point(data = all_labels,
111 |              aes(x = -prop.diff,
112 |                  y = log10(prop.max),
113 |                  color = color),
114 |              size = 0.5) +
115 |   geom_label_repel(data = all_labels,
116 |                    aes(x = -prop.diff,
117 |                        y = log10(prop.max),
118 |                        label = gene,
119 |                        fill = fill,
120 |                        color = color),
121 |                    size = 2) +
122 |   scale_color_identity() +
123 |   scale_fill_identity() +
124 |   scale_x_continuous(limits = c(-1.5, 1.2), 
125 |                      breaks = seq(-1,1,by=0.25), 
126 |                      labels = c(1,0.75,0.5,0.25,0,0.25,0.5,0.75,1)) +
127 |   scale_y_continuous(limits = c(-3,0.5),
128 |                      breaks = log10(c(0.001,0.01,0.1,0.25,0.5,0.75,1)), 
129 |                      labels = c(0.001,0.01, 0.1, 0.25, 0.5, 0.75, 1)) +
130 |   theme_bw(7) +
131 |   theme(panel.grid.minor = element_blank())
132 | 
133 | marker_plot2 <- ggMarginal(marker_plot2, 
134 |            margins = "x", 
135 |            size = 10,
136 |            groupColour = TRUE, 
137 |            groupFill = TRUE, 
138 |            bw = 0.05)
139 | 
140 | ggsave("marker_plot_marginals.pdf",marker_plot2, width = 7.5, height = 3.5, useDingbats = FALSE)
141 | 


--------------------------------------------------------------------------------
/Figures/Figure_3/prune_leaf_custom.R:
--------------------------------------------------------------------------------
  1 | prune.dendrogram <- function(dend, leaves, reindex_dend = TRUE, ...) {
  2 |   leaves <- as.character(leaves)
  3 |   
  4 |   for(i in seq_along(leaves))
  5 |   {
  6 |     # this function is probably not the fastest - but it works...
  7 |     dend <- prune_leaf(dend, leaves[i])	# move step by stem to remove all of these leaves...
  8 |   }
  9 |   
 10 |   if(reindex_dend) dend <- reindex_dend(dend)
 11 |   
 12 |   return(dend)
 13 | }
 14 | 
 15 | stats_midcache.dendrogram <- function (x, type = "hclust", quiet = FALSE) 
 16 | {
 17 |   type <- match.arg(type)
 18 |   stopifnot(inherits(x, "dendrogram"))
 19 |   setmid <- function(d, type) {
 20 |     if (is.leaf(d)) 
 21 |       return(d)
 22 |     k <- length(d)
 23 |     if (k < 1) 
 24 |       stop("dendrogram node with non-positive #{branches}")
 25 |     r <- d
 26 |     midS <- 0
 27 |     for (j in 1L:k) {
 28 |       r[[j]] <- unclass(setmid(d[[j]], type))
 29 |       midS <- midS + .midDend(r[[j]])
 30 |     }
 31 |     if (!quiet && type == "hclust" && k != 2) 
 32 |       warning("midcache() of non-binary dendrograms only partly implemented")
 33 |     attr(r, "midpoint") <- (.memberDend(d[[1L]]) + midS)/2
 34 |     r
 35 |   }
 36 |   setmid(x, type = type)
 37 | }
 38 | 
 39 | stats_.midDend <- function (x) {
 40 |   if (is.null(mp <- attr(x, "midpoint"))) 0 else mp
 41 | }
 42 | .midDend <- stats_.midDend # copied so that they would work inside the various functions here...
 43 | 
 44 | stats_.memberDend <- function (x) 
 45 | {
 46 |   r <- attr(x, "x.member")
 47 |   if (is.null(r)) {
 48 |     r <- attr(x, "members")
 49 |     if (is.null(r)) 
 50 |       r <- 1L
 51 |   }
 52 |   r
 53 | }
 54 | .memberDend <- stats_.memberDend
 55 | 
 56 | 
 57 | prune_leaf <- function(dend, leaf_name,...)
 58 | {
 59 |   labels_dend <- labels(dend)
 60 |   
 61 |   if(length(labels_dend) != length(unique(labels_dend)))	warning("Found dubplicate labels in the tree (this might indicate a problem in the tree you supplied)")
 62 |   
 63 |   if(!(leaf_name %in% labels_dend)) {	# what to do if there is no such leaf inside the tree
 64 |     warning(paste("There is no leaf with the label", leaf_name , "in the tree you supplied", "\n" , "Returning original tree", "\n" ))
 65 |     return(dend)
 66 |   }
 67 |   
 68 |   if(sum(labels_dend %in% leaf_name) > 1) {	# what to do if there is no such leaf inside the tree      
 69 |     warning(paste("There are multiple leaves by the name of '", leaf_name , "' in the tree you supplied.  Their locations is:",
 70 |                   paste(which(labels_dend %in% leaf_name), collapse = ","),"\n" , "Returning original tree", "\n" ))
 71 |     return(dend)
 72 |   }
 73 |   
 74 |   is.father.of.leaf.to.remove <- function(dend, leaf_name)
 75 |   {
 76 |     # this function checks if the leaf we wish to remove is the direct child of the current branch (dend) we entered the function
 77 |     is.father <- FALSE
 78 |     for(i in seq_len(length(dend)))
 79 |     {
 80 |       if(is.leaf(dend[[i]]) == TRUE  &&  labels(dend[[i]]) == leaf_name) is.father <- TRUE
 81 |     }
 82 |     return(is.father)
 83 |   }
 84 |   
 85 |   
 86 |   remove_leaf_if_child <- function(dend, leaf_name)
 87 |   {
 88 |     # print(labels(dend))
 89 |     if(all(labels(dend) != leaf_name))
 90 |     {	# if the leaf we want to remove is not in this branch, simply return the branch without going deeper intoit.
 91 |       return(dend)
 92 |     } else {	# but if the leaf we want to remove is here somewhere, go on searching
 93 |       attr(dend, "members") <- attr(dend, "members") - 1 
 94 |       
 95 |       if(!is.father.of.leaf.to.remove(dend, leaf_name))	# if you are not the father, then go on and make this function work on each child
 96 |       {
 97 |         for(i in seq_len(length(dend)))
 98 |         {
 99 |           dend[[i]] <- remove_leaf_if_child(dend[[i]], leaf_name)
100 |         }
101 |       } else { # we'll merge 
102 |         if(length(dend) == 2) {
103 |           leaf_location <- 1 
104 |           # if leaf location is 1, then move branch in leaf 2 to be the new x
105 |           if(is.leaf(dend[[leaf_location]]) == T  &&  labels(dend[[leaf_location]]) == leaf_name) {
106 | 
107 |             branch_to_bumpup <- 2
108 |             dend <- dend[[branch_to_bumpup]]
109 |           } else { # else - the leaf location must be located in position "2"
110 | 
111 |             branch_to_bumpup <- 1
112 |             dend <- dend[[branch_to_bumpup]]
113 |           }
114 |         } else if(length(dend) > 2) {
115 |           # If more than 2 branches, check if any are leaves
116 |           dend_leaves <- unlist(lapply(dend, is.leaf))
117 |           if(sum(dend_leaves) > 0) {
118 |             # If so, check for matching labels to the leaf to prune
119 |             dend_labels <- unlist(lapply(dend, function(x) attr(x, "label")))
120 |             dend_matches <- dend_labels == leaf_name
121 |             # Return a list containing the non-matching branches
122 |             dend[dend_leaves & dend_matches] <- NULL
123 |             # Note that in some cases, the following DOES NOT yield a correct result:
124 |             # dend <- dend[!(dend_leaves & dend_matches)]
125 |             
126 |             # If the length is now 1, it can be bumped up
127 |             if(length(dend) == 1) {
128 |               dend <- dend[[1]]
129 |             }
130 |             
131 |           }
132 |         }
133 |       }
134 |     }
135 |     return(dend)
136 |   }
137 |   
138 |   
139 |   new_dend <- remove_leaf_if_child(dend, leaf_name)
140 |   new_dend <- suppressWarnings(stats_midcache.dendrogram(new_dend)) # fixes the attributes
141 |   #   new_x <- fix_members_attr.dendrogram(new_x) # fix the number of memebers attr for each node
142 |   return(new_dend)
143 | }


--------------------------------------------------------------------------------
/Figures/Figure_4/color_functions.R:
--------------------------------------------------------------------------------
 1 | hsv_palette <- function(n_colors, 
 2 |                         hue_start = 0, 
 3 |                         hue_end = max(1, n-1)/n,
 4 |                         sat_start = 0.55,
 5 |                         sat_end = 1,
 6 |                         sat_steps = 4,
 7 |                         val_start = 1,
 8 |                         val_end = 0.8,
 9 |                         val_steps = 3) {
10 |   sats <- rep_len(seq(sat_start, sat_end, length.out = sat_steps),length.out = n_colors)
11 |   vals <- rep_len(seq(val_start, val_end, length.out = val_steps),length.out = n_colors)
12 |   if(hue_end < hue_start) {
13 |     rev(sub("FF$","",rainbow(n_colors, s = sats, v = vals, start = hue_end, end = hue_start)))
14 |   } else {
15 |     sub("FF$","",rainbow(n_colors, s = sats, v = vals, start = hue_start, end = hue_end))
16 |   }
17 | }
18 | 
19 | color_mean <- function(x) {
20 |   library(grDevices)
21 |   
22 |   rgb_x <- col2rgb(x)
23 |   rgb_mean <- rowMeans(rgb_x)
24 |   new_hex <- rgb(rgb_mean["red"]/255,
25 |                  rgb_mean["green"]/255,
26 |                  rgb_mean["blue"]/255)
27 |   
28 |   new_hex
29 |   
30 | }
31 | 
32 | check_s <- function(x, min_sat = 0) {
33 |   library(grDevices)
34 |   
35 |   hsv_x <- rgb2hsv(col2rgb(x))
36 |   
37 |   hsv_x["s",] < min_sat
38 |   
39 | }
40 | 
41 | check_v <- function(x, min_val = 0) {
42 |   library(grDevices)
43 |   
44 |   hsv_x <- rgb2hsv(col2rgb(x))
45 |   
46 |   hsv_x["v",] < min_val
47 |   
48 | }
49 | 
50 | adjust_s <- function(x, shift_sat = 0.3) {
51 |   library(grDevices)
52 |   
53 |   hsv_x <- rgb2hsv(col2rgb(x))
54 |   
55 |   hsv_x["s", ] <- hsv_x["s", ] + shift_sat
56 |   if(hsv_x["s", ] > 1) {
57 |     hsv_x["s", ] <- 1
58 |   } else if (hsv_x["s",] <- 0) {
59 |     hsv_x["s", ] <- 0
60 |   }
61 |   
62 |   hsv(hsv_x[1,], hsv_x[2,], hsv_x[3,])
63 | }
64 | 
65 | adjust_v <- function(x, shift_val = 0.3) {
66 |   library(grDevices)
67 |   
68 |   hsv_x <- rgb2hsv(col2rgb(x))
69 |   
70 |   hsv_x["v", ] <- hsv_x["v", ] + shift_val
71 |   if(hsv_x["v", ] > 1) {
72 |     hsv_x["v", ] <- 1
73 |   } else if (hsv_x["v",] <- 0) {
74 |     hsv_x["v", ] <- 0
75 |   }
76 |   
77 |   hsv(hsv_x[1,], hsv_x[2,], hsv_x[3,])
78 | }
79 | 


--------------------------------------------------------------------------------
/Figures/Figure_4/prune_leaf_custom.R:
--------------------------------------------------------------------------------
  1 | prune.dendrogram <- function(dend, leaves, reindex_dend = TRUE, ...) {
  2 |   leaves <- as.character(leaves)
  3 |   
  4 |   for(i in seq_along(leaves))
  5 |   {
  6 |     # this function is probably not the fastest - but it works...
  7 |     dend <- prune_leaf(dend, leaves[i])	# move step by stem to remove all of these leaves...
  8 |   }
  9 |   
 10 |   if(reindex_dend) dend <- reindex_dend(dend)
 11 |   
 12 |   return(dend)
 13 | }
 14 | 
 15 | stats_midcache.dendrogram <- function (x, type = "hclust", quiet = FALSE) 
 16 | {
 17 |   type <- match.arg(type)
 18 |   stopifnot(inherits(x, "dendrogram"))
 19 |   setmid <- function(d, type) {
 20 |     if (is.leaf(d)) 
 21 |       return(d)
 22 |     k <- length(d)
 23 |     if (k < 1) 
 24 |       stop("dendrogram node with non-positive #{branches}")
 25 |     r <- d
 26 |     midS <- 0
 27 |     for (j in 1L:k) {
 28 |       r[[j]] <- unclass(setmid(d[[j]], type))
 29 |       midS <- midS + .midDend(r[[j]])
 30 |     }
 31 |     if (!quiet && type == "hclust" && k != 2) 
 32 |       warning("midcache() of non-binary dendrograms only partly implemented")
 33 |     attr(r, "midpoint") <- (.memberDend(d[[1L]]) + midS)/2
 34 |     r
 35 |   }
 36 |   setmid(x, type = type)
 37 | }
 38 | 
 39 | stats_.midDend <- function (x) {
 40 |   if (is.null(mp <- attr(x, "midpoint"))) 0 else mp
 41 | }
 42 | .midDend <- stats_.midDend # copied so that they would work inside the various functions here...
 43 | 
 44 | stats_.memberDend <- function (x) 
 45 | {
 46 |   r <- attr(x, "x.member")
 47 |   if (is.null(r)) {
 48 |     r <- attr(x, "members")
 49 |     if (is.null(r)) 
 50 |       r <- 1L
 51 |   }
 52 |   r
 53 | }
 54 | .memberDend <- stats_.memberDend
 55 | 
 56 | 
 57 | prune_leaf <- function(dend, leaf_name,...)
 58 | {
 59 |   labels_dend <- labels(dend)
 60 |   
 61 |   if(length(labels_dend) != length(unique(labels_dend)))	warning("Found dubplicate labels in the tree (this might indicate a problem in the tree you supplied)")
 62 |   
 63 |   if(!(leaf_name %in% labels_dend)) {	# what to do if there is no such leaf inside the tree
 64 |     warning(paste("There is no leaf with the label", leaf_name , "in the tree you supplied", "\n" , "Returning original tree", "\n" ))
 65 |     return(dend)
 66 |   }
 67 |   
 68 |   if(sum(labels_dend %in% leaf_name) > 1) {	# what to do if there is no such leaf inside the tree      
 69 |     warning(paste("There are multiple leaves by the name of '", leaf_name , "' in the tree you supplied.  Their locations is:",
 70 |                   paste(which(labels_dend %in% leaf_name), collapse = ","),"\n" , "Returning original tree", "\n" ))
 71 |     return(dend)
 72 |   }
 73 |   
 74 |   is.father.of.leaf.to.remove <- function(dend, leaf_name)
 75 |   {
 76 |     # this function checks if the leaf we wish to remove is the direct child of the current branch (dend) we entered the function
 77 |     is.father <- FALSE
 78 |     for(i in seq_len(length(dend)))
 79 |     {
 80 |       if(is.leaf(dend[[i]]) == TRUE  &&  labels(dend[[i]]) == leaf_name) is.father <- TRUE
 81 |     }
 82 |     return(is.father)
 83 |   }
 84 |   
 85 |   
 86 |   remove_leaf_if_child <- function(dend, leaf_name)
 87 |   {
 88 |     # print(labels(dend))
 89 |     if(all(labels(dend) != leaf_name))
 90 |     {	# if the leaf we want to remove is not in this branch, simply return the branch without going deeper intoit.
 91 |       return(dend)
 92 |     } else {	# but if the leaf we want to remove is here somewhere, go on searching
 93 |       attr(dend, "members") <- attr(dend, "members") - 1 
 94 |       
 95 |       if(!is.father.of.leaf.to.remove(dend, leaf_name))	# if you are not the father, then go on and make this function work on each child
 96 |       {
 97 |         for(i in seq_len(length(dend)))
 98 |         {
 99 |           dend[[i]] <- remove_leaf_if_child(dend[[i]], leaf_name)
100 |         }
101 |       } else { # we'll merge 
102 |         if(length(dend) == 2) {
103 |           leaf_location <- 1 
104 |           # if leaf location is 1, then move branch in leaf 2 to be the new x
105 |           if(is.leaf(dend[[leaf_location]]) == T  &&  labels(dend[[leaf_location]]) == leaf_name) {
106 | 
107 |             branch_to_bumpup <- 2
108 |             dend <- dend[[branch_to_bumpup]]
109 |           } else { # else - the leaf location must be located in position "2"
110 | 
111 |             branch_to_bumpup <- 1
112 |             dend <- dend[[branch_to_bumpup]]
113 |           }
114 |         } else if(length(dend) > 2) {
115 |           # If more than 2 branches, check if any are leaves
116 |           dend_leaves <- unlist(lapply(dend, is.leaf))
117 |           if(sum(dend_leaves) > 0) {
118 |             # If so, check for matching labels to the leaf to prune
119 |             dend_labels <- unlist(lapply(dend, function(x) attr(x, "label")))
120 |             dend_matches <- dend_labels == leaf_name
121 |             # Return a list containing the non-matching branches
122 |             dend <- dend[!(dend_leaves & dend_matches)]
123 |           }
124 |         }
125 |       }
126 |     }
127 |     return(dend)
128 |   }
129 |   
130 |   
131 |   new_dend <- remove_leaf_if_child(dend, leaf_name)
132 |   new_dend <- suppressWarnings(stats_midcache.dendrogram(new_dend)) # fixes the attributes
133 |   #   new_x <- fix_members_attr.dendrogram(new_x) # fix the number of memebers attr for each node
134 |   return(new_dend)
135 | }


--------------------------------------------------------------------------------
/Figures/Figure_4/unified_layer_distributions_and_markers.R:
--------------------------------------------------------------------------------
 1 | library(dplyr)
 2 | library(ggplot2)
 3 | library(cowplot)
 4 | library(feather)
 5 | library(dendextend)
 6 | library(scrattch.vis)
 7 | library(scrattch.io)
 8 | options(stringsAsFactors = F)
 9 | 
10 | source("color_functions.R")
11 | source("prune_leaf_custom.R")
12 | source("layer_and_violin_functions.R")
13 | 
14 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather")
15 | 
16 | cluster_anno <- anno %>%
17 |   select(cl, dendcluster_id, cluster_id, cluster_label, cluster_color) %>%
18 |   unique()
19 | 
20 | cocl_in <- read.csv("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/co.stats.csv")
21 | cocl_in$cluster_id.x <- cluster_anno$cluster_id[match(cocl_in$cl.x,cluster_anno$cl)]
22 | cocl_in$cluster_id.y <- cluster_anno$cluster_id[match(cocl_in$cl.y,cluster_anno$cl)]
23 | 
24 | cocl <- cocl_in %>%
25 |   select(cluster_id.x, cluster_id.y, co.ratio) %>%
26 |   filter(cluster_id.x != cluster_id.y)
27 | 
28 | dend <- readRDS("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/dend.RData")
29 | 
30 | gluta_layers <- build_layer_plot(anno,
31 |                                  dend,
32 |                                  cocl,
33 |                                  dendcluster_ids = 1:55)
34 | 
35 | ## Violin plots
36 | 
37 | fdir <- "//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/"
38 | 
39 | #gluta_genes <- split_cst("Slc30a3 Cux2 Stard8 Rorb Deptor Scnn1a Rspo1 Hsd11b1 Batf3 Colq Postn Wnt7b Lemd1 Rxfp2 Oprk1 Tunar Osr1 Car3 Fam84b Chrna6 Pvalb Stac Ctxn3 Pappa2 Foxp2 Slc17a8 Trhr Tshz2 Rapgef3 Trh Gpr139 Mup5 Nxph4 Efr3a Rprm Crym")
40 | gluta_genes <- split_cst("Slc30a3 Cux2 Stard8 Rorb Deptor Scnn1a Rspo1 Hsd11b1 Batf3 Colq Postn Wnt7b Lemd1 Rxfp2 Oprk1 Tunar Osr1 Car3 Fam84b Chrna6 Pvalb Stac Ctxn3 Pappa2 Foxp2 Slc17a8 Trhr Tshz2 Rapgef3 Trh Gpr139 Mup5 Nxph4 Efr3a Rprm Crym")
41 | remove_genes <- c("Efr3a","Stard8","Colq","Postn","Wnt7b","Lemd1","Rxfp2","Tunar","Stac","Ctxn3","Mup5")
42 | gluta_genes <- setdiff(gluta_genes, remove_genes)
43 | gluta_markers <- group_violin_plot2(data_source = fdir, 
44 |                                        group_by = "dendcluster", 
45 |                                        clusters = 1:55,
46 |                                        genes = gluta_genes,
47 |                                        logscale = TRUE,
48 |                                        labelheight = 2,
49 |                                        max_width = 10,
50 |                                        fontsize = 5,
51 |                                        showcounts = FALSE)
52 | 
53 | all_plots <- plot_grid(gluta_layers,
54 |                        gluta_markers,
55 |                        align = "v",
56 |                        nrow = 2,
57 |                        rel_widths = 1,
58 |                        rel_heights = 1,
59 |                        labels = c("b","c"))
60 | 
61 | save_plot("panels_bc.pdf",
62 |           all_plots,
63 |           ncol = 2,
64 |           nrow = 2,
65 |           base_width = 7.5/2,
66 |           base_height = 6/2)
67 | 


--------------------------------------------------------------------------------
/Figures/Figure_5/color_functions.R:
--------------------------------------------------------------------------------
 1 | hsv_palette <- function(n_colors, 
 2 |                         hue_start = 0, 
 3 |                         hue_end = max(1, n-1)/n,
 4 |                         sat_start = 0.55,
 5 |                         sat_end = 1,
 6 |                         sat_steps = 4,
 7 |                         val_start = 1,
 8 |                         val_end = 0.8,
 9 |                         val_steps = 3) {
10 |   sats <- rep_len(seq(sat_start, sat_end, length.out = sat_steps),length.out = n_colors)
11 |   vals <- rep_len(seq(val_start, val_end, length.out = val_steps),length.out = n_colors)
12 |   if(hue_end < hue_start) {
13 |     rev(sub("FF$","",rainbow(n_colors, s = sats, v = vals, start = hue_end, end = hue_start)))
14 |   } else {
15 |     sub("FF$","",rainbow(n_colors, s = sats, v = vals, start = hue_start, end = hue_end))
16 |   }
17 | }
18 | 
19 | color_mean <- function(x) {
20 |   library(grDevices)
21 |   
22 |   rgb_x <- col2rgb(x)
23 |   rgb_mean <- rowMeans(rgb_x)
24 |   new_hex <- rgb(rgb_mean["red"]/255,
25 |                  rgb_mean["green"]/255,
26 |                  rgb_mean["blue"]/255)
27 |   
28 |   new_hex
29 |   
30 | }
31 | 
32 | check_s <- function(x, min_sat = 0) {
33 |   library(grDevices)
34 |   
35 |   hsv_x <- rgb2hsv(col2rgb(x))
36 |   
37 |   hsv_x["s",] < min_sat
38 |   
39 | }
40 | 
41 | check_v <- function(x, min_val = 0) {
42 |   library(grDevices)
43 |   
44 |   hsv_x <- rgb2hsv(col2rgb(x))
45 |   
46 |   hsv_x["v",] < min_val
47 |   
48 | }
49 | 
50 | adjust_s <- function(x, shift_sat = 0.3) {
51 |   library(grDevices)
52 |   
53 |   hsv_x <- rgb2hsv(col2rgb(x))
54 |   
55 |   hsv_x["s", ] <- hsv_x["s", ] + shift_sat
56 |   if(hsv_x["s", ] > 1) {
57 |     hsv_x["s", ] <- 1
58 |   } else if (hsv_x["s",] <- 0) {
59 |     hsv_x["s", ] <- 0
60 |   }
61 |   
62 |   hsv(hsv_x[1,], hsv_x[2,], hsv_x[3,])
63 | }
64 | 
65 | adjust_v <- function(x, shift_val = 0.3) {
66 |   library(grDevices)
67 |   
68 |   hsv_x <- rgb2hsv(col2rgb(x))
69 |   
70 |   hsv_x["v", ] <- hsv_x["v", ] + shift_val
71 |   if(hsv_x["v", ] > 1) {
72 |     hsv_x["v", ] <- 1
73 |   } else if (hsv_x["v",] <- 0) {
74 |     hsv_x["v", ] <- 0
75 |   }
76 |   
77 |   hsv(hsv_x[1,], hsv_x[2,], hsv_x[3,])
78 | }
79 | 


--------------------------------------------------------------------------------
/Figures/Figure_5/prune_leaf_custom.R:
--------------------------------------------------------------------------------
  1 | prune.dendrogram <- function(dend, leaves, reindex_dend = TRUE, ...) {
  2 |   leaves <- as.character(leaves)
  3 |   
  4 |   for(i in seq_along(leaves))
  5 |   {
  6 |     # this function is probably not the fastest - but it works...
  7 |     dend <- prune_leaf(dend, leaves[i])	# move step by stem to remove all of these leaves...
  8 |   }
  9 |   
 10 |   if(reindex_dend) dend <- reindex_dend(dend)
 11 |   
 12 |   return(dend)
 13 | }
 14 | 
 15 | stats_midcache.dendrogram <- function (x, type = "hclust", quiet = FALSE) 
 16 | {
 17 |   type <- match.arg(type)
 18 |   stopifnot(inherits(x, "dendrogram"))
 19 |   setmid <- function(d, type) {
 20 |     if (is.leaf(d)) 
 21 |       return(d)
 22 |     k <- length(d)
 23 |     if (k < 1) 
 24 |       stop("dendrogram node with non-positive #{branches}")
 25 |     r <- d
 26 |     midS <- 0
 27 |     for (j in 1L:k) {
 28 |       r[[j]] <- unclass(setmid(d[[j]], type))
 29 |       midS <- midS + .midDend(r[[j]])
 30 |     }
 31 |     if (!quiet && type == "hclust" && k != 2) 
 32 |       warning("midcache() of non-binary dendrograms only partly implemented")
 33 |     attr(r, "midpoint") <- (.memberDend(d[[1L]]) + midS)/2
 34 |     r
 35 |   }
 36 |   setmid(x, type = type)
 37 | }
 38 | 
 39 | stats_.midDend <- function (x) {
 40 |   if (is.null(mp <- attr(x, "midpoint"))) 0 else mp
 41 | }
 42 | .midDend <- stats_.midDend # copied so that they would work inside the various functions here...
 43 | 
 44 | stats_.memberDend <- function (x) 
 45 | {
 46 |   r <- attr(x, "x.member")
 47 |   if (is.null(r)) {
 48 |     r <- attr(x, "members")
 49 |     if (is.null(r)) 
 50 |       r <- 1L
 51 |   }
 52 |   r
 53 | }
 54 | .memberDend <- stats_.memberDend
 55 | 
 56 | 
 57 | prune_leaf <- function(dend, leaf_name,...)
 58 | {
 59 |   labels_dend <- labels(dend)
 60 |   
 61 |   if(length(labels_dend) != length(unique(labels_dend)))	warning("Found dubplicate labels in the tree (this might indicate a problem in the tree you supplied)")
 62 |   
 63 |   if(!(leaf_name %in% labels_dend)) {	# what to do if there is no such leaf inside the tree
 64 |     warning(paste("There is no leaf with the label", leaf_name , "in the tree you supplied", "\n" , "Returning original tree", "\n" ))
 65 |     return(dend)
 66 |   }
 67 |   
 68 |   if(sum(labels_dend %in% leaf_name) > 1) {	# what to do if there is no such leaf inside the tree      
 69 |     warning(paste("There are multiple leaves by the name of '", leaf_name , "' in the tree you supplied.  Their locations is:",
 70 |                   paste(which(labels_dend %in% leaf_name), collapse = ","),"\n" , "Returning original tree", "\n" ))
 71 |     return(dend)
 72 |   }
 73 |   
 74 |   is.father.of.leaf.to.remove <- function(dend, leaf_name)
 75 |   {
 76 |     # this function checks if the leaf we wish to remove is the direct child of the current branch (dend) we entered the function
 77 |     is.father <- FALSE
 78 |     for(i in seq_len(length(dend)))
 79 |     {
 80 |       if(is.leaf(dend[[i]]) == TRUE  &&  labels(dend[[i]]) == leaf_name) is.father <- TRUE
 81 |     }
 82 |     return(is.father)
 83 |   }
 84 |   
 85 |   
 86 |   remove_leaf_if_child <- function(dend, leaf_name)
 87 |   {
 88 |     # print(labels(dend))
 89 |     if(all(labels(dend) != leaf_name))
 90 |     {	# if the leaf we want to remove is not in this branch, simply return the branch without going deeper intoit.
 91 |       return(dend)
 92 |     } else {	# but if the leaf we want to remove is here somewhere, go on searching
 93 |       attr(dend, "members") <- attr(dend, "members") - 1 
 94 |       
 95 |       if(!is.father.of.leaf.to.remove(dend, leaf_name))	# if you are not the father, then go on and make this function work on each child
 96 |       {
 97 |         for(i in seq_len(length(dend)))
 98 |         {
 99 |           dend[[i]] <- remove_leaf_if_child(dend[[i]], leaf_name)
100 |         }
101 |       } else { # we'll merge 
102 |         if(length(dend) == 2) {
103 |           leaf_location <- 1 
104 |           # if leaf location is 1, then move branch in leaf 2 to be the new x
105 |           if(is.leaf(dend[[leaf_location]]) == T  &&  labels(dend[[leaf_location]]) == leaf_name) {
106 | 
107 |             branch_to_bumpup <- 2
108 |             dend <- dend[[branch_to_bumpup]]
109 |           } else { # else - the leaf location must be located in position "2"
110 | 
111 |             branch_to_bumpup <- 1
112 |             dend <- dend[[branch_to_bumpup]]
113 |           }
114 |         } else if(length(dend) > 2) {
115 |           # If more than 2 branches, check if any are leaves
116 |           dend_leaves <- unlist(lapply(dend, is.leaf))
117 |           if(sum(dend_leaves) > 0) {
118 |             # If so, check for matching labels to the leaf to prune
119 |             dend_labels <- unlist(lapply(dend, function(x) attr(x, "label")))
120 |             dend_matches <- dend_labels == leaf_name
121 |             # Return a list containing the non-matching branches
122 |             dend <- dend[!(dend_leaves & dend_matches)]
123 |           }
124 |         }
125 |       }
126 |     }
127 |     return(dend)
128 |   }
129 |   
130 |   
131 |   new_dend <- remove_leaf_if_child(dend, leaf_name)
132 |   new_dend <- suppressWarnings(stats_midcache.dendrogram(new_dend)) # fixes the attributes
133 |   #   new_x <- fix_members_attr.dendrogram(new_x) # fix the number of memebers attr for each node
134 |   return(new_dend)
135 | }


--------------------------------------------------------------------------------
/Figures/Figure_5/unified_layer_distributions_and_markers.R:
--------------------------------------------------------------------------------
 1 | library(dplyr)
 2 | library(ggplot2)
 3 | library(cowplot)
 4 | library(feather)
 5 | library(dendextend)
 6 | library(scrattch.vis)
 7 | library(scrattch.io)
 8 | options(stringsAsFactors = F)
 9 | 
10 | source("color_functions.R")
11 | source("prune_leaf_custom.R")
12 | source("layer_and_violin_functions.R")
13 | 
14 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather")
15 | 
16 | cluster_anno <- anno %>%
17 |   select(cl, dendcluster_id, cluster_id, cluster_label, cluster_color) %>%
18 |   unique()
19 | 
20 | cocl_in <- read.csv("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/co.stats.csv")
21 | cocl_in$cluster_id.x <- cluster_anno$cluster_id[match(cocl_in$cl.x,cluster_anno$cl)]
22 | cocl_in$cluster_id.y <- cluster_anno$cluster_id[match(cocl_in$cl.y,cluster_anno$cl)]
23 | 
24 | cocl <- cocl_in %>%
25 |   select(cluster_id.x, cluster_id.y, co.ratio) %>%
26 |   filter(cluster_id.x != cluster_id.y)
27 | 
28 | dend <- readRDS("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/dend.RData")
29 | 
30 | sst_pvalb_layers <- build_layer_plot(anno,
31 |                               dend,
32 |                               cocl,
33 |                               dendcluster_ids = 85:115)
34 | 
35 | lamp5_sncg_vip_layers <- build_layer_plot(anno,
36 |                                      dend,
37 |                                      cocl,
38 |                                      dendcluster_ids = 56:84)
39 | 
40 | ## Violin plots
41 | 
42 | fdir <- "//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/"
43 | 
44 | sst_pvalb_genes <- c("Sst","Chodl","Nos1","Mme","Tac1","Tacr3","Calb2","Nr2f2","Myh8","Tac2","Hpse","Crhr2","Crh","Esm1","Rxfp1","Nts","Pvalb","Gabrg1","Th","Calb1","Akr1c18","Sema3e","Gpr149","Reln","Tpbg","Cpne5","Vipr2","Nkx2-1")
45 | #sst_pvalb_genes <- unique(split_cst("Sst Chodl Nos1 Etv1 Il1rapl2 Myh8 Chrna2 Tac2 Crhr2 Etv1 Calb2 Hpse C1ql3 Crh Nts Pvalb Gabrg1 Th Prdm8 Calb1 Reln Gpr149 Cpne5 Vipr2 Nkx2-1"))
46 | sst_pvalb_markers <- group_violin_plot2(data_source = fdir, 
47 |                                        group_by = "dendcluster", 
48 |                                        clusters = 85:115,
49 |                                        genes = sst_pvalb_genes,
50 |                                        logscale = TRUE,
51 |                                        labelheight = 2,
52 |                                        max_width = 10,
53 |                                        fontsize = 5,
54 |                                        showcounts = FALSE)
55 | 
56 | lamp5_sncg_vip_genes <- c("Lamp5","Ndnf","Krt73","Fam19a1","Pax6","Ntn1","Plch2","Lsp1","Lhx6","Nkx2-1","Vip","Sncg","Slc17a8","Nptx2","Gpr50","Itih5","Serpinf1","Igfbp6","Gpc3","Lmo1","Ptprt","Rspo4","Chat","Crispld2","Col15a1","Pde1a")
57 | #lamp5_sncg_vip_genes <- split_cst("Lamp5 Pax6 Ndnf Egln3 Pdlim5 Slc35d3 Vax1 Lhx6 Nkx2-1 Vip Calb2 Serpinf1 Col14a1 Sncg Ptprk Crispld2 Slc17a8 Igfbp6 Reln Gpc3 Lmo1 Cck Rspo4 Cbln4 Htr1f C1ql1 Itih5 ")
58 | lamp5_sncg_vip_markers <- group_violin_plot2(data_source = fdir, 
59 |                                         group_by = "dendcluster", 
60 |                                         clusters = 56:84,
61 |                                         genes = lamp5_sncg_vip_genes,
62 |                                         logscale = TRUE,
63 |                                         labelheight = 2,
64 |                                         max_width = 10,
65 |                                         fontsize = 5,
66 |                                         showcounts = FALSE)
67 | 
68 | all_plots <- plot_grid(sst_pvalb_layers,
69 |                        lamp5_sncg_vip_layers,
70 |                        sst_pvalb_markers,
71 |                        lamp5_sncg_vip_markers,
72 |                        align = "v",
73 |                        nrow = 2,
74 |                        rel_widths = 1,
75 |                        rel_heights = 1,
76 |                        labels = c("c","d","e","f"))
77 | 
78 | save_plot("panels_cdef.pdf",
79 |           all_plots,
80 |           ncol = 2,
81 |           nrow = 2,
82 |           base_width = 7.5/2,
83 |           base_height = 6/2)
84 | 


--------------------------------------------------------------------------------
/Figures/Figure_6/L4 IT VISp Rspo1 L5 IT VISp Batf3.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/tasic2018analysis/ae7857c520c675ca560b57ce152689df7bb21829/Figures/Figure_6/L4 IT VISp Rspo1 L5 IT VISp Batf3.rda


--------------------------------------------------------------------------------
/Figures/Figure_6/L4 IT VISp Rspo1 L5 IT VISp Hsd11b1 Endou.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/tasic2018analysis/ae7857c520c675ca560b57ce152689df7bb21829/Figures/Figure_6/L4 IT VISp Rspo1 L5 IT VISp Hsd11b1 Endou.rda


--------------------------------------------------------------------------------
/Figures/Figure_6/L4.2016.dat.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/tasic2018analysis/ae7857c520c675ca560b57ce152689df7bb21829/Figures/Figure_6/L4.2016.dat.rda


--------------------------------------------------------------------------------
/Figures/Figure_6/L4.dat.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/tasic2018analysis/ae7857c520c675ca560b57ce152689df7bb21829/Figures/Figure_6/L4.dat.rda


--------------------------------------------------------------------------------
/Figures/Figure_6/L4.df.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/tasic2018analysis/ae7857c520c675ca560b57ce152689df7bb21829/Figures/Figure_6/L4.df.rda


--------------------------------------------------------------------------------
/Figures/Figure_6/L4.sampled.df.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/tasic2018analysis/ae7857c520c675ca560b57ce152689df7bb21829/Figures/Figure_6/L4.sampled.df.rda


--------------------------------------------------------------------------------
/Figures/Figure_6/L4_comparison_plots.R:
--------------------------------------------------------------------------------
  1 | library(dplyr)
  2 | library(ggplot2)
  3 | library(cowplot)
  4 | options(stringsAsFactors = F)
  5 | 
  6 | load("gradient.df.list.rda")
  7 | 
  8 | load("L4 IT VISp Rspo1 L5 IT VISp Batf3.rda")
  9 | L4_vs_Batf3 <- df %>%
 10 |   mutate(sample_id = rownames(.))
 11 | load("L4 IT VISp Rspo1 L5 IT VISp Hsd11b1 Endou.rda")
 12 | L4_vs_Hsd11b1 <- df %>%
 13 |   mutate(sample_id = rownames(.)) %>%
 14 |   mutate(prob = 1-prob) %>%
 15 |   mutate(eigen = -1 * eigen)
 16 | load("L4.df.rda")
 17 | L4_vs_L4 <- L4.df %>%
 18 |   mutate(sample_id = rownames(.)) %>%
 19 |   mutate(color = "#000000",
 20 |          color = ifelse(prob < 0.2, "#1241FF" , color),
 21 |          color = ifelse(prob > 0.4 & prob < 0.6, "#6808FF" , color),
 22 |          color = ifelse(prob > 0.8, "#C80052", color)) %>%
 23 |   mutate(eigen = -1 * eigen)
 24 | 
 25 | this_theme <- theme_bw(4) +
 26 |   theme(panel.grid.minor = element_blank(),
 27 |         panel.grid.major = element_line(color = "#EBEBEB", size = 0.25),
 28 |         panel.border = element_blank(),
 29 |         axis.ticks = element_blank())
 30 | 
 31 | L4_vs_Batf3_points <- ggplot() +
 32 |   geom_point(data = L4_vs_Batf3,
 33 |              aes(x = prob,
 34 |                  y = eigen),
 35 |              size = 0.2) +
 36 |   ylab("") +
 37 |   scale_x_continuous("",
 38 |                      limits = c(-0.05, 1.05),
 39 |                      expand = c(0,0)) +
 40 |   this_theme
 41 | 
 42 | L4_vs_Batf3_ks_pval <- 0e0
 43 | 
 44 | L4_vs_Batf3_sampled <- L4_vs_Batf3 %>%
 45 |   filter(sample_id %in% rownames(df.list[["69_72"]]))
 46 | 
 47 | L4_vs_Batf3_bars <- ggplot(L4_vs_Batf3_sampled, aes(prob)) + 
 48 |   geom_histogram(aes(y=..count../sum(..count..)),
 49 |                  breaks = seq(0, 1 , by = 0.05),
 50 |                  binwidth = 0.5,
 51 |                  color = NA,
 52 |                  size = 0.25) +
 53 |   geom_hline(yintercept = 0.05,
 54 |              size = 0.5) +
 55 |   ylim(0, 0.3) +
 56 |   ylab("") +
 57 |   scale_x_continuous("Assignment probability",
 58 |                      limits = c(-0.05, 1.05),
 59 |                      expand = c(0,0)) +  
 60 |   this_theme
 61 | 
 62 | L4_vs_Hsd11b1_points <- ggplot() +
 63 |   geom_point(data = L4_vs_Hsd11b1,
 64 |              aes(x = prob,
 65 |                  y = eigen),
 66 |              size = 0.2) +
 67 |   ylab("") +
 68 |   scale_x_continuous("",
 69 |                      limits = c(-0.05, 1.05),
 70 |                      expand = c(0,0)) +  
 71 |   this_theme
 72 | 
 73 | L4_vs_Hsd11b1_ks_pval <- 4.84e-13
 74 | 
 75 | L4_vs_Hsd11b1_sampled <- L4_vs_Hsd11b1 %>%
 76 |   filter(sample_id %in% rownames(df.list[["69_70"]]))
 77 | 
 78 | L4_vs_Hsd11b1_bars <- ggplot(L4_vs_Hsd11b1_sampled, aes(prob)) + 
 79 |   geom_histogram(aes(y=..count../sum(..count..)),
 80 |                  breaks = seq(0, 1 , by = 0.05),
 81 |                  binwidth = 0.5,
 82 |                  color = NA,
 83 |                  size = 0.25) +
 84 |   geom_hline(yintercept = 0.05,
 85 |              size = 0.5) +
 86 |   ylim(0, 0.3) +
 87 |   ylab("") +
 88 |   scale_x_continuous("Assignment probability",
 89 |                      limits = c(-0.05, 1.05),
 90 |                      expand = c(0,0)) +  
 91 |   this_theme
 92 | 
 93 | L4_vs_L4_points <- ggplot() +
 94 |   geom_point(data = L4_vs_L4,
 95 |              aes(x = prob,
 96 |                  y = eigen,
 97 |                  color = color),
 98 |              size = 0.2) +
 99 |   scale_color_identity() +
100 |   ylab("Eigengenvector position") +
101 |   xlab("") +
102 |   this_theme
103 | 
104 | L4_vs_L4_ks_pval <- 4.08e-3
105 | 
106 | L4_vs_L4_sampled <- L4_vs_L4 %>%
107 |   filter(sample_id %in% rownames(df.list[["58"]]))
108 | 
109 | L4_vs_L4_bars <- ggplot(L4_vs_L4_sampled, aes(prob)) + 
110 |   geom_histogram(aes(y=..count../sum(..count..)),
111 |                  breaks = seq(0, 1 , by = 0.05),
112 |                  binwidth = 0.5,
113 |                  color = NA,
114 |                  size = 0.25) +
115 |   geom_hline(yintercept = 0.05,
116 |              size = 0.5) +
117 |   ylim(0, 0.3) +
118 |   ylab("Fraction of Cells") +
119 |   scale_x_continuous("Assignment probability",
120 |                      limits = c(-0.05, 1.05),
121 |                      expand = c(0,0)) +  
122 |   this_theme
123 | 
124 | all_plots <- plot_grid(L4_vs_L4_points,
125 |                        L4_vs_Hsd11b1_points,
126 |                        L4_vs_Batf3_points,
127 |                        L4_vs_L4_bars,
128 |                        L4_vs_Hsd11b1_bars,
129 |                        L4_vs_Batf3_bars,
130 |                        align = "v",
131 |                        nrow = 2,
132 |                        rel_widths = 1,
133 |                        rel_heights = c(1, 0.5))
134 | 
135 | save_plot("gradient_panels.pdf",
136 |           all_plots,
137 |           ncol = 3,
138 |           nrow = 2,
139 |           base_width = 3.5/3,
140 |           base_height = 1,
141 |           useDingbats = F)
142 | 


--------------------------------------------------------------------------------
/Figures/Figure_6/custom_annotate_cat.R:
--------------------------------------------------------------------------------
 1 | annotate_cat <- function(df,
 2 |                          col = NULL, base = NULL,
 3 |                          sort_label = T, na_val = "ZZ_Missing",
 4 |                          colorset = "varibow", color_order = "sort") {
 5 |   
 6 |   library(dplyr)
 7 |   library(viridis)
 8 |   
 9 |   if(class(try(is.character(col), silent = T)) == "try-error") {
10 |     col <- lazyeval::expr_text(col)
11 |   } else if(class(col) == "NULL") {
12 |     stop("Specify a column (col) to annotate.")
13 |   }
14 |   
15 |   if(class(try(is.character(base), silent = T)) == "try-error") {
16 |     base <- lazyeval::expr_text(base)
17 |   } else if(class(base) == "NULL") {
18 |     base <- col
19 |   }
20 |   
21 |   if(!is.character(df[[col]])) {
22 |     df[[col]] <- as.character(df[[col]])
23 |   }
24 |   
25 |   df[[col]][is.na(df[[col]])] <- na_val
26 |   
27 |   x <- df[[col]]
28 |   
29 |   annotations <- data.frame(label = unique(x), stringsAsFactors = F)
30 |   
31 |   if(sort_label == T) {
32 |     annotations <- annotations %>% arrange(label)
33 |   }
34 |   
35 |   annotations <- annotations %>%
36 |     mutate(id = 1:n())
37 |   
38 |   if(colorset == "varibow") {
39 |     colors <- varibow(nrow(annotations))
40 |   } else if(colorset == "rainbow") {
41 |     colors <- sub("FF$","",rainbow(nrow(annotations)))
42 |   } else if(colorset == "viridis") {
43 |     colors <- sub("FF$","",viridis(nrow(annotations)))
44 |   } else if(colorset == "magma") {
45 |     colors <- sub("FF$","",magma(nrow(annotations)))
46 |   } else if(colorset == "inferno") {
47 |     colors <- sub("FF$","",inferno(nrow(annotations)))
48 |   } else if(colorset == "plasma") {
49 |     colors <- sub("FF$","",plasma(nrow(annotations)))
50 |   } else if(colorset == "terrain") {
51 |     colors <- sub("FF$","",terrain.colors(nrow(annotations)))
52 |   } else if(is.character(colorset)) {
53 |     colors <- colorRampPalette(colorset)(nrow(annotations))
54 |   }
55 |   
56 |   if(color_order == "random") {
57 |     
58 |     colors <- sample(colors, length(colors))
59 |     
60 |   }
61 |   
62 |   annotations <- mutate(annotations, color = colors)
63 |   
64 |   names(annotations) <- paste0(base, c("_label","_id","_color"))
65 |   
66 |   names(df)[names(df) == col] <- paste0(base,"_label")
67 |   
68 |   df <- left_join(df, annotations, by = paste0(base, "_label"))
69 |   
70 |   df
71 | }
72 | 
73 | color_mean <- function(x) {
74 |   library(grDevices)
75 |   
76 |   rgb_x <- col2rgb(x)
77 |   rgb_mean <- rowMeans(rgb_x)
78 |   new_hex <- rgb(rgb_mean["red"]/255,
79 |                  rgb_mean["green"]/255,
80 |                  rgb_mean["blue"]/255)
81 |   
82 |   new_hex
83 |   
84 | }


--------------------------------------------------------------------------------
/Figures/Figure_6/gradient.df.list.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/tasic2018analysis/ae7857c520c675ca560b57ce152689df7bb21829/Figures/Figure_6/gradient.df.list.rda


--------------------------------------------------------------------------------
/Figures/Figure_6/l4_gradient_heatmap.R:
--------------------------------------------------------------------------------
 1 | library(feather)
 2 | library(ggplot2)
 3 | library(reshape2)
 4 | library(scrattch.vis)
 5 | library(viridisLite)
 6 | options(stringsAsFactors = F)
 7 | 
 8 | load("L4.df.rda")
 9 | 
10 | genes <- c("Shisa3","Tmem215","Gpr88","Slc35g2","Rab3b","Scnn1a","Thsd7a","Nrsn2","Endou","Ccdc3",
11 |            "Col8a1","Rassf2","Nog","Fmn1","Phactr2","Cnn3","Rreb1","Tshz2",
12 |            "9530059O14Rik","Kitl","Cnr1","Pde1a","LOC105245781","Clec18a","St6galnac5","Galnt14","Doc2a","Xkr6","Lmo3","Stac2","Adam33","Chrm3","Ctxn3")
13 | 
14 | gene_data <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/data.feather",
15 |                           columns = c("sample_id",genes))
16 | 
17 | l4_data <- L4.df %>%
18 |   mutate(sample_id = rownames(.)) %>%
19 |   left_join(gene_data) %>%
20 |   arrange(prob,eigen) %>%
21 |   mutate(xpos = 1:n()) %>%
22 |   mutate(color = values_to_colors(prob, colorset = viridis(10)))
23 | 
24 | first_instance <- function(x, vals) {
25 |   y <- numeric()
26 |   for(val in vals) {
27 |     y <- c(y, min(which(x >= val)))
28 |   }
29 |   y
30 | }
31 | 
32 | x_breaks <- data.frame(val = seq(0, 1, by = 0.2),
33 |                        xpos = first_instance(l4_data$prob, seq(0,1, by = 0.2)))
34 | 
35 | gene_ypos <- data.frame(gene = genes,
36 |                         ypos = 1:length(genes))
37 | 
38 | l4_plot_data <- melt(l4_data[,c("xpos",genes)], "xpos") %>%
39 |   mutate(color = values_to_colors(log10(value + 1))) %>%
40 |   rename(gene = variable) %>%
41 |   left_join(gene_ypos)
42 | 
43 | l4_heatmap <- ggplot() +
44 |   geom_tile(data = l4_plot_data,
45 |             aes(x = xpos,
46 |                 y = ypos,
47 |                 fill = color)) +
48 |   geom_tile(data = l4_data,
49 |             aes(x = xpos,
50 |                 y = -1,
51 |                 fill = color)) +
52 |   scale_fill_identity() +
53 |   scale_y_continuous("", expand = c(0,0),
54 |                    breaks = 1:length(genes),
55 |                    labels = genes) +
56 |   scale_x_continuous("", expand = c(0,0),
57 |                      breaks = x_breaks$xpos,
58 |                      labels = x_breaks$val) +
59 |   theme_bw(4) +
60 |   theme(axis.ticks.y = element_blank(),
61 |         panel.border = element_blank(),
62 |         panel.grid = element_blank())
63 | 
64 | ggsave("l4_heatmap.pdf",
65 |        l4_heatmap,
66 |        width = 2,
67 |        height = 2)
68 | 


--------------------------------------------------------------------------------
/Figures/Figure_6/sst150_coords.tsv:
--------------------------------------------------------------------------------
 1 | cl	sst150_cl	sst150_cluster_id	sst150_cluster_label	sst150_cluster_color	const_x	const_y
 2 | 31	1	30	Sst Chodl	#FFFF00	1.25	8
 3 | 36	2	31	Sst Mme Fam114a1	#FFBB33	5	4.5
 4 | 35	3	32	Sst Tac1 Htr1d	#804811	5	2.5
 5 | 32	4	33	Sst Tac1 Tacr3	#B06411	5	1.5
 6 | 38	5	34	Sst Calb2 Necab1	#BF480D	3	1.5
 7 | 37	6	35	Sst Calb2 Pdlim5	#CC6D3D	1	3.75
 8 | 33	7	36	Sst Nr2f2 Necab1	#FFDF11	1	4.5
 9 | 34	8	37	Sst Myh8 Etv1	#D6C300	1	6
10 | 41	9	38	Sst Myh8 Fibin	#FF8011	2	7.5
11 | 42	10	39	Sst Chrna2 Glra3	#FF9F2C	1	7
12 | 43	11	40	Sst Chrna2 Ptgdr	#FFB307	2	6
13 | 45	12	41	Sst Tac2 Myh4	#D9C566	3	6.75
14 | 39	13	42	Sst Hpse Sema3c	#BF9F00	2	4.5
15 | 40	14	43	Sst Hpse Cbln4	#806B19	3	3.75
16 | 47	15	44	Sst Crhr2 Efemp1	#B95541	4	4.5
17 | 48	16	45	Sst Crh 4930553C11Rik	#C77767	5	5.25
18 | 46	17	46	Sst Esm1	#C11331	5	6.75
19 | 44	18	47	Sst Tac2 Tacstd2	#BF8219	3	5
20 | 50	19	48	Sst Rxfp1 Eya1	#994C00	4	5
21 | 49	20	49	Sst Rxfp1 Prdm8	#802600	4	6
22 | 51	21	50	Sst Nts	#A81111	4.5	7.5
23 | 


--------------------------------------------------------------------------------
/Figures/Figure_6/sst300_cl_anno.csv:
--------------------------------------------------------------------------------
 1 | "","ref.cl","ref.cl","cluster_id","cluster_color","zy_cluster_label","cluster_label","class_id","class_label","class_color","subclass_id","subclass_label","subclass_color","size","ALM","VISp","gene.counts","class_region_label","region_label","subclass_region_label","specific.markers","within.class.markers","within.class.node.markers","sibling.markers","cl","Notes","X","X.1","sst300_cl"
 2 | "1","31",27,30,"#FFFF00","Sst Chodl","Sst Chodl",1,"GABAergic","#EF4136",4,"Sst","#F15A29",180,0.53,0.47,10182,"GABAergic","","Sst","Sit1,Chodl,Krt18,P2rx2","Chodl,Bmp3,Ntn1,Gpr126",NA,NA,31,"","","","1"
 3 | "2","35",30,32,"#804811","Sst Tac1","Sst Tac1 Htr1d",1,"GABAergic","#EF4136",4,"Sst","#F15A29",289,0.47,0.53,9278,"GABAergic","","Sst",NA,"Tac1,Htr1d,Edn1,Mmd2","Tac1,Thbs2,Necab1,Fstl4","Hsdl2,Chrna4,Zfp536,Plxdc2",35,"","","","2"
 4 | "3","36",31,31,"#FFBB33","Sst Etv1 Cdk15","Sst Mme Fam114a1",1,"GABAergic","#EF4136",4,"Sst","#F15A29",58,0.37,0.63,10218,"GABAergic","","Sst","Cryaa,Cdk15","Cdk15,Sox5,Fgf18,Akr1c18","Necab1,Ajap1","Adora1,Hsdl2,Zfp536,Plxdc2",36,"Need second opinion","","","3"
 5 | "4","37",32,35,"#BF480D","Sst Calb2 Slc9a2_1","Sst Calb2 Pdlim5",1,"GABAergic","#EF4136",4,"Sst","#F15A29",268,0.5,0.5,9935,"GABAergic","","Sst",NA,"Slc9a2,Pcsk5,Daam2,Calb2","Daam2,Pcsk5,Calb2,Cbln4","Pcsk5,Ptprt",37,"","","","4"
 6 | "5","33",29,36,"#FFDF11","Sst Etv1 Nr2f2_1","Sst Nr2f2 Necab1",1,"GABAergic","#EF4136",4,"Sst","#F15A29",101,0.35,0.65,10110,"GABAergic","","Sst","Rassf10,Pik3cg,Kl,Mstn","Pik3cg,Kl,LOC102633724,Mstn","Necab1,Fstl4,Ajap1","Adora1,Hsdl2",33,"","","","5"
 7 | "6","39",33,42,"#BF9F00","Sst Hpse Sema3c","Sst Hpse Sema3c",1,"GABAergic","#EF4136",4,"Sst","#F15A29",268,0.15,0.85,8621,"GABAergic","","Sst","Prdm1,C1qtnf7,2610028E06Rik,Hpse","2610028E06Rik,Prdm1,Hpse,C1qtnf7","Hpse,Prdm1,Ackr3","Pnoc,C1qtnf7",39,"","","","6"
 8 | "7","41",35,38,"#FF8011","Sst Myh8 Fibin","Sst Myh8 Fibin",1,"GABAergic","#EF4136",4,"Sst","#F15A29",82,0.2,0.8,8319,"GABAergic","","Sst",NA,"LOC105245003,Fibin,Hrh1,Igfbp5","Myh8,Kit,Ppapdc1a,Il1rapl2","Fam46a,Hs6st2",41,"","","","7"
 9 | "8","42",36,39,"#FF9F2C","Sst Chrna2 Fam46a","Sst Chrna2 Kit",1,"GABAergic","#EF4136",4,"Sst","#F15A29",654,0.56,0.44,8738,"GABAergic","","Sst","Chrna2,Myh8,Myh4,Myh13","Chrna2,Myh13,Ppapdc1a,Myh8","Myh8,Kit,Ppapdc1a,Chrna2","Fam46a,Hs6st2,Me3,Elmo1",42,"Need second opinion","","","8"
10 | "9","43",37,40,"#FFB307","Sst Myh8 Ptgdr","Sst Myh13 Ptgdr",1,"GABAergic","#EF4136",4,"Sst","#F15A29",213,0.34,0.66,8950,"GABAergic","","Sst","LOC101056159,Myh4,Ptgdr,Myh13","LOC101056159,Efemp1,Ptgdr,Gulp1","Efemp1,Ptgdr","Sema3c,Efemp1",43,"","","","9"
11 | "10","45",39,41,"#D9C566","Sst Tac2 Myh4","Sst Tac2 Myh4",1,"GABAergic","#EF4136",4,"Sst","#F15A29",39,0.7,0.3,9688,"GABAergic","","Sst","Crygs","Fam212b,Drd2,Bmp2,Tac2",NA,"Htr1a,Eya4",45,"","","","10"
12 | "11","44",38,47,"#BF8219","Sst Tac2 Slc24a4","Sst Tac2 Tacstd2",1,"GABAergic","#EF4136",4,"Sst","#F15A29",123,0.3,0.7,9640,"GABAergic","","Sst","Tacstd2","Slc24a4,Tac2,Ndnf,Lmo1","Tacstd2","Sema3c,Tmem196",44,"","","","11"
13 | "12","46",40,46,"#C11331","Sst Crhr2 Nr2f2","Sst Esm1",1,"GABAergic","#EF4136",4,"Sst","#F15A29",152,0.47,0.53,10370,"GABAergic","","Sst","Crygs,Crhr2,Esm1","Esm1,Grm8,Crhr2,Stat4","Grm8,Nmbr,Crhr2,3110035E14Rik","Man1a,Crhr2",46,"","","","12"
14 | "13","47",41,44,"#B95541","Sst Crhr2 Twist2_1","Sst Crhr2 Efemp1",1,"GABAergic","#EF4136",4,"Sst","#F15A29",227,0.35,0.65,10052,"GABAergic","","Sst","Crygs,Hpse","Twist2,Agtr2,Cckbr,Adra1d","Grm8,Nmbr,Crhr2,3110035E14Rik","Man1a,Crhr2,Cdh7,Zfp536",47,"","","","13"
15 | "14","49",42,49,"#802600","Sst Rxfp1 Prdm8","Sst Rxfp1 Prdm8",1,"GABAergic","#EF4136",4,"Sst","#F15A29",183,0.13,0.87,9828,"GABAergic","","Sst","Tacstd2,Pard3b,Th","Prdm8,Rxfp1,Pla2g4a,Chrnb3","Rxfp1,Ptgs2,Tacstd2,C1ql3","Rxfp1,Ptprk,Zmat4,Itm2a",49,"","","","14"
16 | "15","51",44,50,"#A81111","Sst Nts","Sst Nts",1,"GABAergic","#EF4136",4,"Sst","#F15A29",43,0.17,0.83,9383,"GABAergic","","Sst","Nts,Th,Tacstd2,Gabrq","Nts,Th,LOC105245834,Diap3",NA,NA,51,"","","","15"
17 | 


--------------------------------------------------------------------------------
/Figures/Figure_6/sst300_coords.tsv:
--------------------------------------------------------------------------------
 1 | sst300_cl	sst300_cluster_id	sst300_cluster_label	sst300_cluster_color	const_x	const_y
 2 | 1	1	Sst Chodl	#FFFF00	1.25	8
 3 | 2	2	Sst Tac1 Htr1d	#804811	5	2.5
 4 | 3	3	Sst Mme Fam114a1	#FFBB33	5	4.5
 5 | 4	4	Sst Calb2 Pdlim5	#BF480D	1	3.75
 6 | 5	5	Sst Nr2f2 Necab1	#FFDF11	1	4.5
 7 | 6	6	Sst Hpse Sema3c	#BF9F00	2	4.5
 8 | 7	7	Sst Myh8 Fibin	#FF8011	2	7.5
 9 | 8	8	Sst Chrna2 Glra3	#FF9F2C	1	7
10 | 9	9	Sst Chrna2 Ptgdr	#FFB307	2	6
11 | 10	10	Sst Tac2 Myh4	#D9C566	3	6.75
12 | 11	11	Sst Tac2 Tacstd2	#BF8219	3	5
13 | 12	12	Sst Esm1	#C11331	5	6.75
14 | 13	13	Sst Crhr2 Efemp1	#B95541	4	4.5
15 | 14	14	Sst Rxfp1 Prdm8	#802600	4	6
16 | 15	15	Sst Nts	#A81111	4.5	7.5
17 | 


--------------------------------------------------------------------------------
/Figures/Figure_6/sst80_coords.tsv:
--------------------------------------------------------------------------------
 1 | sst80_cl	sst80_cluster_id	sst80_cluster_label	sst80_cluster_color	const_x	const_y
 2 | 1	1	Sst Chodl_1	#FFFF00	1.25	8
 3 | 2	2	Sst Chodl_2	#FFFF20	1.75	8
 4 | 3	3	Sst Tac1 Tacr3	#B06411	5	1.5
 5 | 4	4	Sst Tac1 Htr1d	#804811	5	2.5
 6 | 5	5	Sst Mme Fam114a1	#FFBB33	5	4.5
 7 | 6	6	Sst Calb2 Pdlim5_1	#BF480D	1	2.5
 8 | 7	7	Sst Calb2 Pdlim5_2	#BF482D	1.5	3.75
 9 | 8	8	Sst Calb2 Pdlim5_3	#BF684D	1.5	3.25
10 | 9	9	Sst Calb2 Pdlim5_4	#BF680D	1	3.25
11 | 10	10	Sst Calb2 Necab1	#BF282D	3	1.5
12 | 11	11	Sst Nr2f2 Necab1	#FFDF11	1	4.5
13 | 12	12	Sst Myh8 Etv1 	#FFDF31	1	6
14 | 13	13	Sst Hpse Sema3c_1	#BF9F00	2	4.75
15 | 14	14	Sst Hpse Sema3c_2	#BF9F40	2	4.25
16 | 15	15	Sst Hpse Sema3c_3	#BF9F60	2.5	5.5
17 | 16	16	Sst Hpse Cbln4	#806B19	3	3.75
18 | 17	17	Sst Myh8 Fibin	#FF8031	2	7.5
19 | 18	18	Sst Chrna2 Glra3_1	#FF9F2C	0.5	7
20 | 19	19	Sst Chrna2 Glra3_2	#FF9F4C	1	6.75
21 | 20	20	Sst Chrna2 Glra3_3	#FF9F6C	1	7.5
22 | 21	21	Sst Chrna2 Ptgdr	#FFB307	2	6
23 | 22	22	Sst Tac2 Myh4	#D9C566	3	6.75
24 | 23	23	Sst Tac2 Tacstd2	#BF8219	3	5
25 | 24	24	Sst Esm1	#C11331	5	6.75
26 | 25	25	Sst Crh 4930553C11Rik_1	#B95541	4.5	5.5
27 | 26	26	Sst Crh 4930553C11Rik_2	#B95561	5	5
28 | 27	27	Sst Crhr2 Efemp1	#B95581	4	4.5
29 | 28	28	Sst Rxfp1 Prdm8	#802600	4	6
30 | 29	29	Sst Rxfp1 Eya1	#994C20	4	5
31 | 30	30	Sst Nts	#A81111	4.5	7.5
32 | 


--------------------------------------------------------------------------------
/Figures/Figure_6/sst_constellation_positions.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/tasic2018analysis/ae7857c520c675ca560b57ce152689df7bb21829/Figures/Figure_6/sst_constellation_positions.xlsx


--------------------------------------------------------------------------------
/Figures/Figure_6/sst_constellations.R:
--------------------------------------------------------------------------------
  1 | library(ggplot2)
  2 | library(dplyr)
  3 | source("plotting_functions.R")
  4 | 
  5 | 
  6 | 
  7 | cell.cl.map.df_to_transition.df.comb <- function(cell.cl.map.df, cl.df) {
  8 |   transition.df = with(cell.cl.map.df, as.data.frame(table(org.cl, transition.cl)))
  9 |   transition.df = transition.df[transition.df$Freq > 0,]
 10 |   transition.df$org.cl = as.character(transition.df$org.cl)
 11 |   transition.df$transition.cl = as.character(transition.df$transition.cl)
 12 |   
 13 |   ###combine transitions from both directions
 14 |   transition.df$cl.min = pmin(transition.df$org.cl, transition.df$transition.cl)
 15 |   transition.df$cl.max = pmax(transition.df$org.cl, transition.df$transition.cl)
 16 |   transition.df$cl.pair = paste(transition.df$cl.min, transition.df$cl.max)
 17 |   transition.df.comb= do.call("rbind",tapply(1:nrow(transition.df),transition.df$cl.pair, function(x){
 18 |     tmp = transition.df[x,][1,]
 19 |     tmp$Freq = sum(transition.df[x,"Freq"])
 20 |     tmp[,c(4,5,3)]
 21 |   }))
 22 |   cl.size = table(cell.cl.map.df$org.cl)
 23 |   transition.df.comb$cl.min.size = cl.size[transition.df.comb$cl.min]
 24 |   transition.df.comb$cl.max.size = cl.size[transition.df.comb$cl.max]
 25 |   transition.df.comb$ratio = with(transition.df.comb,Freq/pmin(cl.min.size,cl.max.size))
 26 |   transition.df.comb$cl1_label = cl.df[as.character(transition.df.comb$cl.min),"cluster_label"]
 27 |   transition.df.comb$cl2_label = cl.df[as.character(transition.df.comb$cl.max),"cluster_label"]
 28 |   colnames(transition.df.comb)[c(1:3,6)] = c("cl.x","cl.y","trans_n","trans_ratio")
 29 |   
 30 |   transition.df.comb
 31 | }
 32 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/Sst.cl.80.rda")
 33 | cl.df$sst80_cl <- rownames(cl.df)
 34 | 
 35 | write.csv(cl.df,"sst80_cl_anno.csv")
 36 | 
 37 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/Sst.cl.300.rda")
 38 | cl.df$sst300_cl <- rownames(cl.df)
 39 | 
 40 | write.csv(cl.df,"sst300_cl_anno.csv")
 41 | 
 42 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/Sst.cell.cl.map.80.rda")
 43 | samples <- rownames(cell.cl.map.df)
 44 | sst80_df <- cell.cl.map.df %>%
 45 |   mutate(org.cl = as.numeric(org.cl))
 46 | 
 47 | sst80_core_int <- sst80_df %>%
 48 |   group_by(org.cl) %>%
 49 |   summarise(n_core = sum(best.cl == org.cl))
 50 | names(sst80_core_int)[1] <- "cl"
 51 | 
 52 | sst80_cl_anno <- read.delim("sst80_coords.tsv", header = TRUE, sep = "\t")
 53 | rownames(sst80_cl_anno) <- sst80_cl_anno$sst80_cl
 54 | names(sst80_cl_anno)[1:4] <- c("cl","cluster_id","cluster_label","cluster_color")
 55 | 
 56 | sst80_cl_anno <- sst80_cl_anno %>%
 57 |   left_join(sst80_core_int)
 58 | 
 59 | sst80_transitions <- cell.cl.map.df_to_transition.df.comb(sst80_df, sst80_cl_anno) %>%
 60 |   mutate(cl.x = as.numeric(cl.x),
 61 |          cl.y = as.numeric(cl.y)) %>%
 62 |   filter(trans_ratio > 0.05) %>%
 63 |   select(cl.x, cl.y, trans_n)
 64 | 
 65 | sst80_interm <- net_coord_plot(sst80_cl_anno, sst80_transitions, sst80_cl_anno, cluster_ids = 1:30, split = F,
 66 |                                "trans_n", val_min = 0, link_color = "dodgerblue") +
 67 |   scale_y_reverse()
 68 | 
 69 | ggsave("sst80_constellation.pdf",
 70 |        sst80_interm,
 71 |        width = 4, height = 3,
 72 |        useDingbats = F)
 73 | 
 74 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/Sst.cell.cl.map.300.rda")
 75 | sst300_df <- cell.cl.map.df %>%
 76 |   mutate(org.cl = as.numeric(org.cl))
 77 | 
 78 | sst300_core_int <- sst300_df %>%
 79 |   group_by(org.cl) %>%
 80 |   summarise(n_core = sum(best.cl == org.cl))
 81 | names(sst300_core_int)[1] <- "cl"
 82 | 
 83 | sst300_cl_anno <- read.delim("sst300_coords.tsv", header = TRUE, sep = "\t")
 84 | rownames(sst300_cl_anno) <- sst300_cl_anno$sst300_cl
 85 | names(sst300_cl_anno)[1:4] <- c("cl","cluster_id","cluster_label","cluster_color")
 86 | 
 87 | sst300_cl_anno <- sst300_cl_anno %>%
 88 |   left_join(sst300_core_int)
 89 | 
 90 | sst300_transitions <- cell.cl.map.df_to_transition.df.comb(sst300_df, sst300_cl_anno) %>%
 91 |   mutate(cl.x = as.numeric(cl.x),
 92 |          cl.y = as.numeric(cl.y)) %>%
 93 |   filter(trans_ratio > 0.05)
 94 | 
 95 | sst300_interm <- net_coord_plot(sst300_cl_anno, sst300_transitions, sst300_cl_anno, cluster_ids = 1:13, split = F,
 96 |                                "trans_n", val_min = 0, link_color = "dodgerblue") +
 97 |   scale_y_reverse()
 98 | 
 99 | ggsave("sst300_constellation.pdf",
100 |        sst300_interm,
101 |        width = 4, height = 3,
102 |        useDingbats = F)
103 | 
104 | 
105 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/cell.cl.map.df.rda")
106 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather")
107 | sample_to_cl <- anno %>% select(sample_id, cl)
108 | 
109 | sst150_df <- cell.cl.map.df[samples,] %>%
110 |   mutate(sample_id = rownames(.)) %>%
111 |   mutate(org.cl = as.numeric(as.character(org.cl))) 
112 | 
113 | sst150_core_int <- sst150_df %>%
114 |   group_by(org.cl) %>%
115 |   summarise(n_core = sum(best.cl == org.cl))
116 | 
117 | sst150_cl_anno <- read.delim("sst150_coords.tsv", header = TRUE, sep = "\t")
118 | rownames(sst150_cl_anno) <- sst150_cl_anno$sst150_cl
119 | names(sst150_cl_anno)[1:5] <- c("cl","sst150_cl","cluster_id","cluster_label","cluster_color")
120 | 
121 | sst150_cl_anno <- sst150_cl_anno %>%
122 |   left_join(sst150_core_int, by = c("cl"="org.cl")) 
123 | 
124 | sst150_transitions <- cell.cl.map.df_to_transition.df.comb(sst150_df, sst150_cl_anno) %>%
125 |   mutate(cl.x = as.numeric(cl.x),
126 |          cl.y = as.numeric(cl.y)) %>%
127 |   filter(trans_ratio > 0.05)
128 | 
129 | sst150_interm <- net_coord_plot(sst150_cl_anno, sst150_transitions, sst150_cl_anno, cluster_ids = 30:50, split = F,
130 |                                 "trans_n", val_min = 0, link_color = "dodgerblue") +
131 |   scale_y_reverse()
132 | 
133 | ggsave("sst150_constellation.pdf",
134 |        sst150_interm,
135 |        width = 4, height = 3,
136 |        useDingbats = F)
137 | 


--------------------------------------------------------------------------------
/Figures/Figure_6/sst_river_plot.R:
--------------------------------------------------------------------------------
  1 | library(ggplot2)
  2 | library(dplyr)
  3 | library(scrattch.io)
  4 | library(feather)
  5 | options(stringsAsFactors = F)
  6 | 
  7 | source("custom_annotate_cat.R")
  8 | source("sankey_functions.R")
  9 | 
 10 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather")
 11 | 
 12 | sst_cluster_anno <- anno %>%
 13 |   filter(subclass_label == "Sst") %>%
 14 |   select(cl, cluster_id, cluster_label, cluster_color) %>%
 15 |   unique() %>%
 16 |   arrange(cluster_id) %>%
 17 |   mutate(cluster_id = 1:n())
 18 | 
 19 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/Sst.cell.cl.map.80.rda")
 20 | sst80 <- cell.cl.map.df %>%
 21 |   mutate(sample_id = rownames(.))
 22 | 
 23 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/Sst.cell.cl.map.300.rda")
 24 | sst300 <- cell.cl.map.df %>%
 25 |   mutate(sample_id = rownames(.))
 26 | 
 27 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/cell.cl.map.df.rda")
 28 | sst150 <- cell.cl.map.df[sst80$sample_id,] %>%
 29 |   mutate(sample_id = rownames(.)) %>%
 30 |   mutate(org.cl = as.numeric(as.character(org.cl))) %>%
 31 |   left_join(sst_cluster_anno, by = c("org.cl" = "cl"))
 32 | 
 33 | sst80_cl <- sst80 %>%
 34 |   select(sample_id, org.cl)
 35 | names(sst80_cl)[2] <- "sst80_cl"
 36 | sst150_cl <- sst150 %>%
 37 |   select(sample_id, org.cl, cluster_id, cluster_label, cluster_color)
 38 | names(sst150_cl)[2] <- "sst150_cl"
 39 | sst300_cl <- sst300 %>%
 40 |   select(sample_id, org.cl)
 41 | names(sst300_cl)[2] <- "sst300_cl"
 42 | 
 43 | 
 44 | river_data <- sst80_cl %>%
 45 |   left_join(sst150_cl) %>%
 46 |   left_join(sst300_cl) 
 47 | 
 48 | # Filter transitions of fewer than 3 cells
 49 | river_data2 <- river_data %>%
 50 |   group_by(sst80_cl, sst150_cl) %>%
 51 |   mutate(n_80_150 = n()) %>%
 52 |   group_by(sst150_cl, sst300_cl) %>%
 53 |   mutate(n_150_300 = n()) %>%
 54 |   ungroup() %>%
 55 |   filter(n_80_150 > 2 & n_150_300 > 2)
 56 | 
 57 | max_freq_name <- function(x) {
 58 |   names(table(x))[which(table(x) == max(table(x)))]
 59 | }
 60 | 
 61 | # Ordering:
 62 | # First, order 300 based on clusters
 63 | sst300_cl_anno <- river_data2 %>%
 64 |   group_by(sst300_cl) %>%
 65 |   summarise(sst300_cluster_weight = mean(cluster_id),
 66 |             sst300_cluster_color = color_mean(cluster_color),
 67 |             sst300_cluster_label = max_freq_name(cluster_label)) %>%
 68 |   arrange(sst300_cluster_weight) %>%
 69 |   mutate(sst300_cluster_id = 1:n())
 70 | 
 71 | river_data2 <- river_data2 %>%
 72 |   left_join(sst300_cl_anno)
 73 | 
 74 | # Then, order standard clustering based on 300
 75 | std_cl_anno <- river_data2 %>%
 76 |   group_by(cluster_id) %>%
 77 |   summarise(std_cluster_weight = mean(sst300_cluster_id),
 78 |             std_cluster_color = color_mean(sst300_cluster_color),
 79 |             std_cluster_label = cluster_label[1]) %>%
 80 |   arrange(std_cluster_weight) %>%
 81 |   mutate(std_cluster_id = 1:n())
 82 | 
 83 | river_data2 <- river_data2 %>%
 84 |   left_join(std_cl_anno)
 85 | 
 86 | sst80_cl_anno <- river_data2 %>%
 87 |   group_by(sst80_cl) %>%
 88 |   summarise(sst80_cluster_weight = mean(std_cluster_id),
 89 |             sst80_cluster_color = color_mean(std_cluster_color),
 90 |             sst80_cluster_label = max_freq_name(std_cluster_label)) %>%
 91 |   arrange(sst80_cluster_weight) %>%
 92 |   mutate(sst80_cluster_id = 1:n())
 93 | 
 94 | river_data2 <- river_data2 %>%
 95 |   left_join(sst80_cl_anno)
 96 | 
 97 | plot_nodes <- make_plot_nodes(make_group_nodes(river_data2, c("sst80_cluster","std_cluster","sst300_cluster")),pad = 0.2)
 98 | 
 99 | group_links <- make_group_links(river_data2, 
100 |                                 c("sst80_cluster","std_cluster","sst300_cluster"),
101 |                                 plot_nodes) %>%
102 |   filter(n > 4)
103 | 
104 | plot_links <- make_plot_links(group_links,
105 |                               fill = "std_cluster")
106 | 
107 | sst80_labels <- plot_nodes %>%
108 |   filter(group == "sst80_cluster") %>%
109 |   mutate(x = 1,
110 |          y = (ymin + ymax) / 2)
111 | std_labels <- plot_nodes %>%
112 |   filter(group == "std_cluster") %>%
113 |   mutate(x = 2,
114 |          y = (ymin + ymax) / 2)
115 | sst300_labels <- plot_nodes %>%
116 |   filter(group == "sst300_cluster") %>%
117 |   mutate(x = 3,
118 |          y = (ymin + ymax) / 2)
119 | 
120 | river_plot <- build_river_plot_predefined(plot_nodes, plot_links) +
121 |   geom_text(data = sst80_labels,
122 |             aes(x = x, y = y,
123 |                 label = name),
124 |             size = 2) +
125 |   geom_text(data = std_labels,
126 |             aes(x = x, y = y,
127 |                 label = name),
128 |             size = 2) +
129 |   geom_text(data = sst300_labels,
130 |             aes(x = x, y = y,
131 |                 label = name),
132 |             size = 2)
133 | 
134 | ggsave("sst_river_plot.pdf",
135 |        river_plot,
136 |        width = 6,
137 |        height = 3.5)
138 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Allen Institute Software License - This software license is the 2-clause BSD license 
 2 | plus a third clause that prohibits redistribution for commercial purposes without further permission.
 3 | 
 4 | Copyright (c) 2018. Allen Institute. All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
 7 | following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the 
10 | following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the 
13 | following disclaimer in the documentation and/or other materials provided with the distribution.
14 | 
15 | 3. Redistributions for commercial purposes are not permitted without the Allen Institute's written permission.
16 | For purposes of this license, commercial purposes is the incorporation of the Allen Institute's software into
17 | anything for which you will charge fees or other compensation. Contact terms@alleninstitute.org for commercial
18 | licensing opportunities.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
21 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
26 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # tasic2018analysis
 2 | 
 3 | Scripts used for Data processing and visualization for the paper Bosiljka Tasic, *et al.*, "Shared and distinct transcriptomic cell types across neocortical areas." (2018) Nature.  
 4 | 
 5 | These scripts were not written with portability in mind, and frequently refer to local file structures. They are provided as-is as a record of the analytical processes used in the paper.  
 6 | 
 7 | To perform similar analyses on your own data, we recommend utilizing the R packages built around this analysis, available at   https://github.com/AllenInstitute/scrattch/ .
 8 | 
 9 | ## Citation
10 | 
11 | To cite use of these scripts or parts thereof, please use:  
12 | Tasic, et al. "Shared and distinct transcriptomic cell types across neocortical areas." (2018) Nature.  
13 | 
14 | ## License
15 | 
16 | The license for use of these scripts is available on Github at: https://github.com/AllenInstitute/tasic2018analysis/blob/master/LICENSE 
17 | 
18 | ## Statement of Support
19 | 
20 | We are not currently supporting this code, but simply releasing it to the community AS IS but are not able to provide any guarantees of support. The community is welcome to submit issues, but you should not expect an active response. 
21 | 


--------------------------------------------------------------------------------
/RNA-seq Analysis/ALM.ex.analysis.R:
--------------------------------------------------------------------------------
  1 | ALM.ex.cl = droplevels(ex.cl[samp.dat[names(ex.cl),"Region"]=="ALM"])
  2 | ALM.ex.cl = ALM.ex.cl[cl.df[as.character(ALM.ex.cl),"region_label"]!="VISp"]
  3 | ALM.ex.markers = select_markers(norm.dat, ALM.ex.cl, n.markers=50, de.genes=de.genes)$markers
  4 | ALM.ex.tsne.result <- plot_tSNE_cl(norm.dat, ALM.ex.markers, ALM.ex.cl, cl.df, theta=0.05)
  5 | ALM.ex.tsne.df = ALM.ex.tsne.result$tsne.df
  6 | save(ALM.ex.tsne.df, file="ALM.ex.tsne.df.rda")
  7 | ggsave("ALM.ex.tsne.pdf", ALM.ex.tsne.result$g, height=10, width=10)
  8 | save(ALM.ex.cl, file="ALM.ex.cl.rda")
  9 | save(ALM.ex.markers, file="ALM.ex.markers.rda")
 10 | ALM.ex.cell.cl.map.df =  get_core_transition(norm.dat, ALM.ex.cl, ALM.ex.markers, n.bin=5, n.iter=100, mc.cores=10)
 11 | save(ALM.ex.cell.cl.map.df, file="ALM.ex.cell.cl.map.df.rda")
 12 | 
 13 | de.param$q1.th=0.4
 14 | ALM.ex.PT.cl = droplevels(ALM.ex.cl[ALM.ex.cl %in% row.names(cl.df)[cl.df$subclass_label=="L5 PT"]])
 15 | 
 16 | 
 17 | 
 18 | 
 19 | ALM.ex.cl.present.counts = get_cl_sums(norm.dat>0, droplevels(ALM.ex.cl))
 20 | cl.present.counts = get_cl_sums(norm.dat>0, cl.clean)
 21 | 
 22 | df = within_group_specific_markers(levels(ALM.ex.PT.cl), norm.dat, ALM.ex.PT.cl, de.param = de.param, cl.present.counts=ALM.ex.cl.present.counts)
 23 | 
 24 | 
 25 | PT = levels(ALM.ex.PT.cl)
 26 | all = levels(ALM.ex.cl)
 27 | pairs <- list(c1=list(95, PT), c2=list(96:97, PT), c3=list(PT, all), c4=list(95, all), c5=list(96:97,all), c6=list(96,96:97), c7=list(96, all), c8=list(97, 96:97),c9=list(97, all))
 28 |  
 29 | de.param$q1.th=0.4
 30 | ALM.PT.gene.list= sapply(pairs, function(x){
 31 |   print(x)
 32 |   df=group_specific_markers(as.character(x[[1]]), norm.dat, droplevels(cl.clean[cl.clean%in% as.character(x[[2]])]), de.param=de.param, n.markers=10, cl.present.counts = cl.present.counts)
 33 |   df=df[order(df$pval),]
 34 |   head(df,20)
 35 | },simplify=F)
 36 | save(ALM.PT.gene.list, file="ALM.PT.gene.list.top.20.rda")
 37 | PT.pairs = read.csv("PT_DE_pairs",row.names=1,header=FALSE)
 38 | 
 39 | names(ALM.PT.gene.list) = PT.pairs[[1]]
 40 | 
 41 | ALM.PT.gene.df = do.call("rbind", ALM.PT.gene.list)
 42 | ALM.PT.gene.df$pair = gsub("\\..*","",row.names(ALM.PT.gene.df))
 43 | ALM.PT.gene.df = ALM.PT.gene.df[,c(8,1:7)]
 44 | row.names(ALM.PT.gene.df) = NULL
 45 | write.csv(ALM.PT.gene.df, file="ALM.PT.gene.df.csv")
 46 | 
 47 | de.param = de_param(q1.th=0.4)
 48 | tmp=display_cl(ALM.ex.PT.cl,norm.dat, prefix="ALM.L5.PT", de.param = de.param, n.markers=30)
 49 | ALM.ex.PT.markers= tmp$markers
 50 | top.genes= unique(unlist(lapply(ALM.PT.gene.list, row.names)))
 51 | 
 52 | setdiff(unique(unlist(lapply(ALM.PT.gene.list, row.names))), ALM.ex.PT.markers)
 53 | tmp=display_cl(ALM.ex.PT.cl,norm.dat, prefix="ALM.L5.PT.tmp", de.param = de.param, markers=top.genes)
 54 | save(ALM.ex.PT.markers, file="ALM.ex.PT.markers.rda")
 55 | 
 56 | 
 57 | source("~/zizhen/My_R/map_river_plot.R")
 58 | source("~/zizhen/My_R/sankey_functions.R")
 59 | 
 60 | colnames(map.df) = c("map_cluster_id","map_prob","map_cluster_label", "cluster_id","sub_cluster","stim","cor")
 61 | map.df$map_cluster_color = cl.df[as.character(map.df$map_cluster_id),"cluster_color"]
 62 | 
 63 | tmp = compare_annotate(setNames(map.df$cluster_id, row.names(map.df)), setNames(map.df$map_cluster_id,row.names(map.df)), cl.df, reorder=FALSE)
 64 | tmp.cl.df = tmp$cl.df
 65 | map.df$cluster_label = map.df$cluster_id
 66 | map.df$cluster_id = as.integer(as.factor(map.df$cluster_label))
 67 | map.df$cluster_color = tmp.cl.df[as.character(map.df$cluster_label),"cluster_color"]
 68 | map.df$cluster_color=as.character(map.df$cluster_color)
 69 | map.df$map_cluster_color=as.character(map.df$map_cluster_color)
 70 | 
 71 | map.df$maintype = hrvatin.samp.dat$maintype
 72 | inh.map.df = droplevels(map.df[which(map.df$maintype=="Interneurons"),])
 73 | g=river_plot(inh.map.df, min.cells=4, min.frac=0.1)
 74 | ggsave(g, file="Hrvatin.inh.map.pdf")
 75 | 
 76 | ex.map.df = droplevels(map.df[which(map.df$maintype=="Excitatory" & !map.df$cluster_label %in% c("RSP","Hip","Sub")),])
 77 | g=river_plot(ex.map.df, min.cells=10, min.frac=0.1)
 78 | ggsave(g, file="Hrvatin.ex.map.pdf")
 79 | 
 80 | tb=with(ex.map.df, table(map_cluster_label, stim))
 81 | tb.df = as.data.frame(tb)
 82 | table(map.df$map_cluster_label)
 83 | stim.pval = sapply(names(de.num), function(p){
 84 |   x = de.num[[p]]
 85 |   n <- total.num - m
 86 |   k <- full.de.num[[p]]
 87 |   pval = phyper(x - 1, m, n, k, lower.tail = FALSE)
 88 | })
 89 | 
 90 | 
 91 | 
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 |                 
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 


--------------------------------------------------------------------------------
/RNA-seq Analysis/VISp.ex.analysis.R:
--------------------------------------------------------------------------------
 1 | VISp.ex.cl = droplevels(ex.cl[samp.dat[names(ex.cl),"Region"]=="VISp"])
 2 | VISp.ex.cl = VISp.ex.cl[cl.df[as.character(VISp.ex.cl),"region_label"]!="ALM"]
 3 | VISp.ex.markers = select_markers(norm.dat, VISp.ex.cl, n.markers=50, de.genes=de.genes)$markers
 4 | VISp.ex.tsne.result <- plot_tSNE_cl(norm.dat, VISp.ex.markers, VISp.ex.cl, cl.df, theta=0.05)
 5 | VISp.ex.tsne.df = VISp.ex.tsne.result$tsne.df
 6 | save(VISp.ex.tsne.df, file="VISp.ex.tsne.df.rda")
 7 | ggsave("VISp.ex.tsne.pdf", VISp.ex.tsne.result$g, height=10, width=10)
 8 | save(VISp.ex.cl, file="VISp.ex.cl.rda")
 9 | save(VISp.ex.markers, file="VISp.ex.markers.rda")
10 | VISp.ex.cell.cl.map.df =  get_core_transition(norm.dat, VISp.ex.cl, VISp.ex.markers, n.bin=5, n.iter=100, mc.cores=10)
11 | save(VISp.ex.cell.cl.map.df, file="VISp.ex.cell.cl.map.df.rda")
12 | 
13 | 
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/RNA-seq Analysis/all.cell.markers.heatmap.R:
--------------------------------------------------------------------------------
 1 | load("cl.final.rda")
 2 | library(feather)
 3 | anno <- read_feather("/data/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather")
 4 | tmp.cl.df = as.data.frame(unique(anno[,c("cl","dendcluster_id")]))
 5 | dend.id = with(tmp.cl.df, setNames(dendcluster_id, cl))
 6 | cl.ord = names(dend.id)[order(dend.id)]
 7 | cl.ord = cl.ord[cl.ord %in% levels(cl.clean)]
 8 | 
 9 | 
10 | load("de.genes.rda")
11 | load("cl.med.rda")
12 | 
13 | load("cl.clean.rda")
14 | cl.clean = setNames(factor(as.character(cl.clean), cl.ord), names(cl.clean))
15 | 
16 | 
17 | ####Compute heatmap at single cell level that cover all the clusters.
18 | max.cl.size=50
19 | all.markers= select_markers(norm.dat, cl.clean, de.genes=de.genes, n.markers=20)$markers
20 | core.cells=names(cl)[is.na(cell.cl.map.df[names(cl.clean),"transition.cl"])]
21 | tmp.cells = unlist(tapply(core.cells, cl.clean[core.cells], function(x){
22 |   x= sample(x, min(length(x),max.cl.size))
23 | },simplify=FALSE))
24 | tmp.cl = setNames(factor(as.character(cl.clean[tmp.cells]), cl.ord), tmp.cells)
25 | tmp.cl = sort(tmp.cl)
26 | 
27 | cl.med = cl.med[,levels(cl.clean)]
28 | all.markers= all.markers[rowMaxs(cl.med[all.markers,]) > 2]
29 | gene.min = apply(cl.med[all.markers,],1, function(x)min(which(x>2)))
30 | gene.sd= apply(cl.med[all.markers,],1, function(x)sd(which(x>2)))
31 | gene.sd[is.na(gene.sd)]=0
32 | 
33 | select.markers = all.markers[gene.sd < 20 | gene.sd > 20 & rowSums(cl.med[all.markers,] > 2) >  20]
34 | select = gene.sd[select.markers] < 10 | rowSums(cl.med[select.markers,]>2) < 15
35 | set1 = select.markers[select]
36 | set2 = select.markers[!select]
37 | 
38 | 
39 | tmp=setNames(rep(6, length(de.genes)),names(de.genes))
40 | set3=unique(unlist(select_markers_pair(norm.dat[set2,], add.genes=tmp, de.genes=de.genes,rm.genes=setdiff(row.names(norm.dat),set2))))
41 | set3=set3[hclust(dist(cl.med[set3,]),method="average")$order]
42 | 
43 | ord.genes = c(names(sort(gene.min[set1])),set3)
44 | tmp.dat = as.matrix(norm.dat[ord.genes,names(tmp.cl)])
45 | tmp.dat = tmp.dat/rowMaxs(tmp.dat)
46 | cl.col = as.character(cl.df[as.character(cl[colnames(tmp.dat)]),"cluster_color"])
47 | 
48 | gray.red <-colorRampPalette(c("gray", "red"))
49 | 
50 | pdf("all.heatmap.pdf",height=15,width=12)
51 | heatmap.3(tmp.dat,Rowv=NULL, Colv=NULL, ColSideColors=cl.col,trace="none",col=gray.red(100),labRow=FALSE, labCol=FALSE)
52 | dev.off()
53 | 
54 | darkblue.orange <-colorRampPalette(c("darkblue", "orange"))
55 | pdf("all.heatmap.2.pdf",height=15,width=12)
56 | heatmap.3(tmp.dat,Rowv=NULL, Colv=NULL, ColSideColors=cl.col,trace="none",col=darkblue.orange(100),labRow=FALSE, labCol=FALSE)
57 | dev.off()
58 | 
59 | cell.marker.dat = tmp.dat
60 | save(cell.marker.dat, file="cell.marker.dat.rda")
61 | 


--------------------------------------------------------------------------------
/RNA-seq Analysis/cl.transition.R:
--------------------------------------------------------------------------------
  1 | library(iterclust)
  2 | library(matrixStats)
  3 | 
  4 | cl.clean = droplevels(cl[cl %in% row.names(cl.df)[cl.df$class_label!="Low Quality"]])  
  5 |  
  6 | test_cv_cor <- function(norm.dat, cl, markers, n.bin=5,g.perc=1){
  7 |   bins=unlist(tapply(names(cl), cl, function(x){
  8 |     if(length(x) > n.bin){
  9 |       tmp=rep_len(1:n.bin, length(x))
 10 |     }else{
 11 |       tmp = sample(1:n.bin, length(x))
 12 |     }
 13 |     setNames(tmp[sample(length(tmp))], x)
 14 |   }))
 15 |   names(bins) = gsub(".*\\.", "", names(bins))
 16 |   bins= bins[names(cl)]
 17 |   pred.cl = setNames(rep(NA, length(cl)), names(cl))
 18 |   for(i in 1:n.bin){
 19 |     print(i)
 20 |     train.cells = names(cl)[bins!=i]
 21 |     test.cells =names(cl)[bins==i]
 22 |     select.markers=sample(markers, round(length(markers)*g.perc))
 23 |     map.result <- map_by_cor(norm.dat[select.markers,], cl[train.cells], norm.dat[select.markers, test.cells])$pred.df
 24 |     pred.cl[test.cells] = as.character(map.result[test.cells, "pred.cl"])
 25 |   }
 26 |   return(pred.cl)
 27 | }
 28 | 
 29 | 
 30 | get_core_transition <- function(norm.dat, cl, select.markers, n.bin=5, n.iter=100, mc.cores=10)
 31 |   {
 32 |     cl.cv <- mclapply(1:n.iter, function(i){
 33 |       tmp=test_cv_cor(norm.dat, cl, select.markers, n.bin=n.bin)
 34 |     }, mc.cores=mc.cores)
 35 |     
 36 |     cell.cl.cor.map = do.call("rbind",sapply(cl.cv, function(x){
 37 |       df = data.frame(cell=names(x),cl=x)
 38 |     },simplify=F))
 39 |     cell.cl.cor.map = table(cell.cl.cor.map[,1],cell.cl.cor.map[,2])
 40 |     cell.cl.cor.map = cell.cl.cor.map / rowSums(cell.cl.cor.map)
 41 | 
 42 |     cell.cl.map.df = data.frame(org.cl= as.character(cl[row.names(cell.cl.cor.map)]),best.score=rowMaxs(cell.cl.cor.map), best.cl = colnames(cell.cl.cor.map)[apply(cell.cl.cor.map, 1, which.max)], stringsAsFactors=FALSE)
 43 |     row.names(cell.cl.map.df) = row.names(cell.cl.cor.map)
 44 |     tmp=get_pair_matrix_coor(cell.cl.cor.map, row.names(cell.cl.map.df), as.character(cell.cl.map.df$best.cl))
 45 |     tmp1 = cell.cl.cor.map
 46 |     tmp1[tmp]= 0
 47 |     cell.cl.map.df$second.score = rowMaxs(tmp1)
 48 |     cell.cl.map.df$second.cl =colnames(tmp1)[apply(tmp1,1, which.max)]
 49 |     cell.cl.map.df$second.cl[cell.cl.map.df$second.score ==0] = NA
 50 |     
 51 |     cell.cl.map.df$transition.cl = NA
 52 |     tmp = with(cell.cl.map.df, org.cl!=best.cl | best.score < 0.9)
 53 |     cell.cl.map.df[tmp,"transition.cl"] = as.character(cell.cl.map.df[tmp,"best.cl"])
 54 |     tmp = with(cell.cl.map.df, which(org.cl==transition.cl))
 55 |     cell.cl.map.df$transition.cl[tmp] = as.character(cell.cl.map.df[tmp,"second.cl"])
 56 |     
 57 |     cl.med <- do.call("cbind",tapply(names(cl), cl, function(x){
 58 |       rowMedians(as.matrix(norm.dat[select.markers,x]))
 59 |     }))
 60 |     row.names(cl.med) = select.markers
 61 |     
 62 |     cell.cl.cor=cor(as.matrix(norm.dat[select.markers, row.names(cell.cl.map.df)]), cl.med[select.markers,])
 63 |     cell.cl.map.df$cor = with(cell.cl.map.df, get_pair_matrix(cell.cl.cor, row.names(cell.cl.map.df),as.character(org.cl)))
 64 |     cell.cl.map.df$core = is.na(cell.cl.map.df$transition.cl)
 65 |     return(cell.cl.map.df)
 66 |   }
 67 | 
 68 | load("select.markers.rda")
 69 | cell.cl.map.df = get_core_transition(norm.dat, cl.clean, select.markers, n.bin=5, n.iter=100, mc.cores=10)
 70 | 
 71 | corrected.cells = with(cell.cl.map.df, org.cl!=best.cl & best.score > 0.9)
 72 | cl.clean.correct = cl.clean
 73 | cl.clean.correct[corrected.cells] = cell.cl.map.df[corrected.cells, "best.cl"]
 74 | 
 75 | tmp.cells = intersect(names(cl.clean)[cl.clean=="104"], row.names(samp.dat)[samp.dat$Region=="VISp"])
 76 | 
 77 | tmp1.cells = intersect(names(cl.clean)[cl.clean=="104"], row.names(samp.dat)[samp.dat$Region=="ALM"])
 78 | 
 79 | 
 80 | 
 81 | save(cell.cl.map.df, file="cell.cl.map.df.rda")
 82 |  
 83 | transition.df = with(cell.cl.map.df, as.data.frame(table(org.cl, transition.cl)))
 84 | transition.df = transition.df[transition.df$Freq > 0,]
 85 | transition.df$org.cl = as.character(transition.df$org.cl)
 86 | transition.df$transition.cl = as.character(transition.df$transition.cl)
 87 | save(transition.df, file="transition.df.rda")
 88 | 
 89 | ###combine transitions from both directions
 90 | transition.df$cl.min = pmin(transition.df$org.cl, transition.df$transition.cl)
 91 | transition.df$cl.max = pmax(transition.df$org.cl, transition.df$transition.cl)
 92 | transition.df$cl.pair = paste(transition.df$cl.min, transition.df$cl.max)
 93 | transition.df.comb= do.call("rbind",tapply(1:nrow(transition.df),transition.df$cl.pair, function(x){
 94 |   tmp = transition.df[x,][1,]
 95 |   tmp$Freq = sum(transition.df[x,"Freq"])
 96 |   tmp[,c(4,5,3)]
 97 | }))
 98 | cl.size = table(cl.clean)
 99 | transition.df.comb$cl.min.size = cl.size[transition.df.comb$cl.min]
100 | transition.df.comb$cl.max.size = cl.size[transition.df.comb$cl.max]
101 | transition.df.comb$ratio = with(transition.df.comb,Freq/pmin(cl.min.size,cl.max.size))
102 | save(transition.df.comb, file="transition.df.comb.rda")
103 | transition.df.comb$cl1_label = cl.df[as.character(transition.df.comb$cl.min),"cluster_label"]
104 | transition.df.comb$cl2_label = cl.df[as.character(transition.df.comb$cl.max),"cluster_label"]
105 | transition.df.comb[transition.df.comb$ratio > 0.1 & transition.df.comb$Freq > 1,]
106 | colnames(transition.df.comb)[1:2] = c("cl1","cl2")
107 | save(transition.df.comb, file="transition.df.comb.rda")
108 | 
109 | 
110 | 
111 | 
112 | 
113 | 
114 | 


--------------------------------------------------------------------------------
/RNA-seq Analysis/cluster_hetero.R:
--------------------------------------------------------------------------------
  1 | #####internal heterogenity#######
  2 | pca_hetero <- function(norm.dat,select.cells, perm.num=10,vg.padj=0.5,rm.gene.mod= NULL, rm.eigen=NULL,rm.th=0.6)
  3 |   {
  4 |     vg= findVG(2^norm.dat[,select.cells]-1)
  5 |     select.genes = row.names(vg)[vg$loess.padj < vg.padj]
  6 |     if(length(select.genes)==0){
  7 |       return(0)
  8 |     }
  9 |     if(length(select.genes)==1){
 10 |       return(1)
 11 |     }
 12 |     pca = prcomp(t(as.matrix(norm.dat[select.genes, select.cells])),tol=0.01)
 13 |     if(!is.null(rm.gene.mod)|!is.null(rm.eigen)){
 14 |       if(is.null(rm.eigen)){
 15 |         rm.eigen = get_eigen(rm.gene.mod, norm.dat, select.cells)[[1]]
 16 |       }
 17 |       else{
 18 |         rm.eigen = rm.eigen[select.cells,]
 19 |       }
 20 |       rm.cor=cor(pca$x, rm.eigen)
 21 |       rm.score =rowMaxs(abs(rm.cor))
 22 |       select.pca = which(rm.score < rm.th)[1]
 23 |     }
 24 |     pca.var = summary(pca)$importance[2,select.pca]
 25 |     ###shuffle expression of every gene.
 26 |     perm.pca.var = sapply(1:10, function(x){
 27 |       perm.dat = norm.dat[select.genes, select.cells]
 28 |       for(i in 1:nrow(perm.dat)){
 29 |         perm.dat[i, ] = sample(perm.dat[i, ])
 30 |       }
 31 |       perm.pca = prcomp(t(as.matrix(perm.dat)),tol=0.5)
 32 |       summary(perm.pca)$importance[2,1]
 33 |     })
 34 |     vg = vg[select.genes,]
 35 |     vg = vg[order(vg$loess.padj),]
 36 |     list(pca.ratio=pca.var/mean(perm.pca.var), pca.var = pca.var, vg.num=length(select.genes), vg=vg)
 37 |   }
 38 | 
 39 | 
 40 | WGCNA_hetero <- function(norm.dat,select.cells, perm.num=10,vg.padj=0.5,rm.gene.mod=NULL)
 41 |   {
 42 |     vg= findVG(2^norm.dat[,select.cells]-1)
 43 |     select.genes = row.names(vg)[vg$loess.padj < vg.padj]
 44 |     dat = norm.dat[select.genes,select.cells]
 45 |     adj=adjacency(t(dat), power = 4,type="unsigned")
 46 |     adj[is.na(adj)]=0
 47 |     TOM = TOMsimilarity(adj,TOMType="unsigned")
 48 |     dissTOM = as.matrix(1-TOM)
 49 |     row.names(dissTOM)= colnames(dissTOM) = row.names(dat)
 50 |     geneTree = flashClust(as.dist(dissTOM), method =  "average")
 51 |     dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM, cutHeight=0.99, deepSplit = 2, pamRespectsDendro = FALSE,minClusterSize = 5)
 52 |     gene.mod = split(row.names(dissTOM), dynamicMods)
 53 |     gene.mod = gene.mod[setdiff(names(gene.mod),"0")]
 54 |     rm.eigen=NULL
 55 |     if(!is.null(rm.gene.mod)){
 56 |       rm.eigen = getEigen(rm.gene.mod, norm.dat, select.cells)
 57 |     }
 58 |     gm= filterGeneMod(norm.dat, select.cells, gene.mod, minModuleSize=5, rm.eigen=rm.eigen,min.padj = 40, padj.th=0.05,min.cells=3, ...)
 59 |     
 60 |     if(length(select.genes)==0){
 61 |       return(0)
 62 |     }
 63 |     if(length(select.genes)==1){
 64 |       return(1)
 65 |     }
 66 |     rd.WGCNA(norm.dat, select.genes, select.cells, minModuleSize=5,...)
 67 | 
 68 |     pca.var = summary(pca)$importance[2,1]
 69 |  
 70 |     ###shuffle expression of every gene.
 71 |     perm.pca.var = sapply(1:10, function(x){
 72 |       perm.dat = norm.dat[select.genes, select.cells]
 73 |       for(i in 1:nrow(perm.dat)){
 74 |         perm.dat[i, ] = sample(perm.dat[i, ])
 75 |       }
 76 |       perm.pca = prcomp(t(as.matrix(perm.dat)),tol=0.5)
 77 |       summary(perm.pca)$importance[2,1]
 78 |     })
 79 |     vg = vg[select.genes,]
 80 |     vg = vg[order(vg$loess.padj),]
 81 |     list(pca.ratio=pca.var/mean(perm.pca.var), pca.var = pca.var, vg.num=length(select.genes), vg=vg)
 82 |   }
 83 | 
 84 | 
 85 | qc.index = setNames(samp.dat$percent_reads_aligned_to_genome_only/samp.dat$percent_reads_aligned_to_mrna,row.names(samp.dat))
 86 | 
 87 | ###Add oligodendrocyte contamination genes
 88 | save(rm.gene.mod, file="rm.gene.mod.hetero.rda")
 89 | pca.hetero= sapply(levels(clean.cl),function(x){
 90 |   print(x)
 91 |   select.cells = names(clean.cl)[clean.cl==x]
 92 |   #tmp.norm = lm_normalize(norm.dat[,select.cells], qc.index[select.cells])[[1]]
 93 |   tmp=pca_hetero(norm.dat,select.cells, perm.num=10,vg.padj=0.5, rm.eigen=rm.eigen)
 94 | },simplify=F)
 95 | pca.ratio = unlist(sapply(pca.hetero, function(x)x[1]))
 96 | 
 97 | names(pca.ratio) = paste(names(pca.hetero), cl.df[names(pca.hetero),"cluster_label"])
 98 | 
 99 | 
100 | 
101 | 


--------------------------------------------------------------------------------
/RNA-seq Analysis/co_cluster.R:
--------------------------------------------------------------------------------
 1 | co.stats = get_cl_co_stats(cl, co.ratio)
 2 | save(co.stats, file="co.stats.rda")
 3 | 
 4 | select.cl= levels(cl.clean)
 5 | co.stats.df = as.data.frame(as.table(co.stats$cl.co.ratio[select.cl, select.cl]))
 6 | colnames(co.stats.df)=c("cl.x","cl.y","co.ratio")
 7 | co.stats.df = co.stats.df[co.stats.df[,3]>0.05,]
 8 | co.stats.df$cl.x.label = cl.df[as.character(co.stats.df$cl.x), "cluster_label"]
 9 | co.stats.df$cl.y.label = cl.df[as.character(co.stats.df$cl.y), "cluster_label"]
10 | co.stats.df = co.stats.df[as.integer(co.stats.df[,1]) <= as.integer(co.stats.df[,2]),]
11 | write.table(co.stats.df, file="co.stats.df.csv",sep=",",row.names=F,quote=F)
12 | 
13 | tmp.dat = co.stats$cl.co.ratio[select.cl, select.cl]
14 | row.names(tmp.dat)=colnames(tmp.dat) = cl.df[colnames(tmp.dat),"cluster_label"]
15 | pdf("cl.co.pdf",height=10,width=10)
16 | heatmap.3(tmp.dat, Rowv=as.dendrogram(dend), Colv=as.dendrogram(dend), trace="none",col=blue.red(100),cexRow=0.5, cexCol=0.5)
17 | dev.off()
18 | 
19 | select.cells = sample_cells(cl.clean, 50)
20 | 
21 | ord=order(match(as.character(cl.clean[select.cells]), labels(dend)), co.stats$cell.cl.confusion[select.cells])
22 | select.cells= select.cells[ord]
23 | 
24 | tmp.dat = co.stats$cell.cl.co.ratio[select.cells,labels(dend)]
25 | colnames(tmp.dat) = cl.df[colnames(tmp.dat),"cluster_label"]
26 | library(gplots)
27 | pdf("cell.cl.pdf")
28 | heatmap.2(tmp.dat, Rowv=NULL, Colv=as.dendrogram(dend), trace="none",col=blue.red(100), labRow=FALSE, labCol=FALSE)
29 | dev.off()
30 | 
31 | library(WGCNA)
32 | tom = TOMsimilarity(as.matrix(co.ratio[select.cells, select.cells]))
33 | colnames(tom)=row.names(tom)=select.cells
34 | all.hc = hclust(as.dist(1-tom), method="average")
35 | ord1 = all.hc$labels[all.hc$order]
36 | ord = names(cl.clean)[order(cl.clean,match(names(cl.clean), ord1))]
37 | sep = cl.clean[ord]
38 | sep=which(sep[-1]!=sep[-length(sep)])
39 | png("all.co.png",height=10,width=10)
40 | heatmap.3(as.matrix(co.ratio[ord,ord]), col = blue.red(100), trace="none", Rowv=NULL, Colv=NULL,colsep=sep,sepcolor="black")
41 | dev.off()
42 | 
43 | 
44 | select.cells = names(cl.clean)[cl.clean %in% row.names(cl.df)[cl.df$top=="Inh"]]
45 | select.cells = unlist(tapply(select.cells,cl.clean[select.cells],function(x)sample(x, min(length(x),100))))
46 | ord1 = all.hc$labels[all.hc$order]
47 | ord1 = ord1[ord1%in% select.cells]
48 | ord = select.cells[order(cl.clean[select.cells],match(select.cells, ord1))]
49 | Inh.co.ratio.100 = co.ratio[ord,ord]
50 | sep = cl.clean[ord]
51 | sep=which(sep[-1]!=sep[-length(sep)])
52 | pdf("Inh.co.100.pdf",height=10,width=10)
53 | #heatmap.3(Inh.co.ratio.100, col = blue.red(100), trace="none", Rowv=NULL, Colv=NULL,colsep=sep,sepcolor="black")
54 | heatmap.3(Inh.co.ratio.100, col = blue.red(100), trace="none", Rowv=NULL, Colv=NULL)
55 | dev.off()
56 | save(Inh.co.ratio.100, file="Inh.co.ratio.100.rda")
57 | 
58 | select.cells = names(cl.clean)[cl.clean %in% row.names(cl.df)[cl.df$top=="Ex"]]
59 | select.cells = unlist(tapply(select.cells,cl.clean[select.cells],function(x)sample(x, min(length(x),100))))
60 | ord1 = all.hc$labels[all.hc$order]
61 | ord1 = ord1[ord1%in% select.cells]
62 | ord = select.cells[order(cl.clean[select.cells],match(select.cells, ord1))]
63 | sep = cl.clean[ord]
64 | sep=which(sep[-1]!=sep[-length(sep)])
65 | Ex.co.ratio.100 = co.ratio[ord,ord]
66 | pdf("Ex.co.100.pdf",height=10,width=10)
67 | heatmap.3(Ex.co.ratio.100, col = blue.red(100), trace="none", Rowv=NULL, Colv=NULL,colsep=sep,sepcolor="black")
68 | dev.off()
69 | save(Ex.co.ratio.100, file="Ex.co.ratio.100.rda")
70 | 
71 | 
72 | 
73 | select.cells = names(cl.clean)[cl.clean %in% row.names(cl.df)[cl.df$top%in%c("Glia","Endo")]]
74 | select.cells = unlist(tapply(select.cells,cl.clean[select.cells],function(x)sample(x, min(length(x),100))))
75 | ord1 = all.hc$labels[all.hc$order]
76 | ord1 = ord1[ord1%in% select.cells]
77 | ord = select.cells[order(cl.clean[select.cells],match(select.cells, ord1))]
78 | sep = cl.clean[ord]
79 | sep=which(sep[-1]!=sep[-length(sep)])
80 | Noneuron.co.ratio.100 = co.ratio[ord,ord]
81 | pdf("Non.neuronal.co.100.pdf",height=10,width=10)
82 | heatmap.3(Noneuron.co.ratio.100, col = blue.red(100), trace="none", Rowv=NULL, Colv=NULL,colsep=sep,sepcolor="black")
83 | dev.off()
84 | save(Noneuron.co.ratio.100, file="Noneuron.co.ratio.100.rda")
85 | 
86 | select.cells = c(colnames(Inh.co.ratio.100),colnames(Ex.co.ratio.100),colnames(Noneuron.co.ratio.100))
87 | tmp.cl = cl.clean[select.cells]
88 | levels(tmp.cl) = cl.df[levels(tmp.cl),"cluster_label"]
89 | tmp.cl = setNames(factor(as.character(tmp.cl), levels=labels(dend)),names(tmp.cl))
90 | select.cells=names(tmp.cl)
91 | ord1 = all.hc$labels[all.hc$order]
92 | ord1 = ord1[ord1%in% select.cells]
93 | ord = select.cells[order(tmp.cl,match(select.cells, ord1))]
94 | co.ratio.100= co.ratio[ord,ord]
95 | save(co.ratio.100, file="co.ratio.100.rda")
96 | 
97 | 


--------------------------------------------------------------------------------
/RNA-seq Analysis/compare_1679.R:
--------------------------------------------------------------------------------
 1 | library(feather)
 2 | library(iterclust)
 3 | library(dplyr)
 4 | library(Matrix)
 5 | 
 6 | 
 7 | load("cl.final.rda")
 8 | load("cl.clean.rda")
 9 | load("norm.dat.rda")
10 | load("samp.dat.rda")
11 | load("V1.cl.rda")
12 | 
13 | 
14 | d = "/allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_VISp_SMV1_1679/"
15 | V1.2016.samp.dat = as.data.frame(read_feather(file.path(d, "anno.feather")))
16 | row.names(V1.2016.samp.dat)=V1.2016.samp.dat[["sample_id"]]
17 | 
18 | V1.2016.counts = as.matrix(round(read.csv("dat_1679/counts.csv",row.names=1,header=T)))
19 | V1.2016.norm.dat = log2(t(t(V1.2016.counts)*10^6/Matrix::colSums(V1.2016.counts))+1)
20 | V1.2016.cl = setNames(V1.2016.samp.dat[["final_label"]],V1.2016.samp.dat[["sample_id"]])
21 | V1.2016.cl = V1.2016.cl[V1.2016.cl!=""]
22 | tmp = unique(V1.2016.cl)
23 | tmp = tmp[order(as.integer(gsub(" f.*$","",tmp)))]
24 | V1.2016.cl = factor(V1.2016.cl,levels= tmp)
25 | tmp.cl = V1.2016.cl
26 | levels(tmp.cl) = 1:length(levels(tmp.cl))
27 | tmp = select_markers(V1.2016.norm.dat, tmp.cl, n.markers=50, de.param = de_param(q1.th=0.4, q.diff.th=0.7))
28 | V1.2016.de.genes  = tmp$de.genes
29 | V1.2016.markers=tmp$markers
30 | common.markers=intersect(V1.2016.markers, row.names(norm.dat))
31 | 
32 | 
33 | map.result = map_sampling(V1.2016.norm.dat[,names(V1.2016.cl)], V1.2016.cl, norm.dat[, V1.cells], markers = common.markers)
34 |  
35 | map.df = map.result$map.df
36 | map.df$org.cl = cl[row.names(map.df)]
37 | map.df$org.cl_label = cl.df[as.character(cl[row.names(map.df)]), "cluster_label"]
38 | tb = as.data.frame(with(map.df%>%filter(prob > 0.9), table(org.cl_label, pred.cl))) %>% filter(Freq > 1)
39 | save(map.df, file="V1.ref.49.df.rda")
40 | map.ref.2016.df = map.df
41 | 
42 | 
43 | load("de.genes.rda")
44 | tmp = select_markers(norm.dat, droplevels(cl[V1.cells]), de.genes=de.genes, n.markers=50)
45 | common.markers=intersect(tmp$markers, row.names(V1.2016.norm.dat))
46 | V1.markers= tmp$markers
47 | save(V1.markers, file="V1.markers.rda")
48 | map.result = map_sampling(as.matrix(norm.dat[common.markers,V1.cells]), droplevels(cl[V1.cells]), V1.2016.norm.dat, markers = common.markers)
49 | map.df = map.result$map.df
50 | map.df$org.cl = V1.2016.cl[row.names(map.df)]
51 | map.df$pred.cl_label = cl.df[as.character(map.df$pred.cl), "cluster_label"]
52 | map.df = map.df[!is.na(map.df$org.cl),]
53 | tb = as.data.frame(with(map.df%>%filter(prob > 0.9), table(org.cl, pred.cl_label))) %>% filter(Freq > 1)
54 | 
55 | save(map.df, file="V1.ref.101.df.rda")
56 | 
57 | 
58 | 
59 | 
60 | 
61 | 
62 | 
63 | 
64 |  
65 | 


--------------------------------------------------------------------------------
/RNA-seq Analysis/compare_JoshWang.R:
--------------------------------------------------------------------------------
 1 | library(feather)
 2 | library(iterclust)
 3 | 
 4 | load("cl.clean.rda")
 5 | load("norm.dat.rda")
 6 | load("samp.dat.rda")
 7 | load("cl.med.rda")
 8 | 
 9 | ###load mapping data and preprocessing
10 | d = "/allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/external/mouse_GABA_Paul_2016_20171002"
11 | Wang.samp.dat = as.data.frame(read_feather(file.path(d, "anno.feather")))
12 | row.names(Wang.samp.dat)=Wang.samp.dat[["sample_id"]]
13 | Wang.counts = as.data.frame(read_feather(file.path(d, "data_t.feather")))
14 | row.names(Wang.counts)= Wang.counts[,1]
15 | Wang.counts= as.matrix(Wang.counts[,-1])
16 | 
17 | ###compute overlapping genes present in both dataset, and normalize data on overlapping genes
18 | common.genes = intersect(row.names(Wang.counts), row.names(norm.dat))
19 | 
20 | Wang.counts=Wang.counts[common.genes,]
21 | Wang.dat = t(t(Wang.counts)*10^6/colSums(Wang.counts))
22 | Wang.dat = log2(Wang.dat+1)
23 | 
24 | ##Use our clusters as reference, compute markers
25 | load("de.genes.rda")
26 | inh.cl = droplevels(cl[cl %in% row.names(cl.df)[cl.df$class_label=="GABAergic"]])
27 | inh.cells=names(inh.cl)
28 | inh.markers = select_markers(norm.dat, inh.cl, de.genes = de.genes, n.markers=50)$markers
29 | inh.markers=intersect(inh.markers, common.genes)
30 | common.markers= inh.markers[rowMaxs(Wang.dat[inh.markers,])>0]
31 | 
32 | map.result = map_sampling(as.matrix(norm.dat[common.markers, inh.cells]), droplevels(cl[inh.cells]), Wang.dat, markers = common.markers)
33 | map.df = map.result$map.df
34 | map.freq = map.result$map.freq
35 | 
36 | tmp.cor = rowMaxs(cor(Wang.dat[common.markers,], cl.med[common.markers, droplevels(cl[inh.cells])]))
37 | tmp.cor[is.na(tmp.cor)]=0
38 | map.df$cor = tmp.cor
39 | 
40 | map.df$cluster_label = as.character(cl.df[as.character(map.df$pred.cl),"cluster_label"])
41 | tmp = is.na(map.df$cluster_label)
42 | map.df$cluster_label[tmp] = map.df$cl[tmp]
43 | map.df$Wang.cl = Wang.samp.dat[match(row.names(map.df),Wang.samp.dat$sample_id),"cell_class_label"]
44 | save(map.df, file="map.Wang.rda")
45 | 
46 |  
47 | 
48 | 


--------------------------------------------------------------------------------
/RNA-seq Analysis/compare_Linnarsson.R:
--------------------------------------------------------------------------------
 1 | library(feather)
 2 | library(iterclust)
 3 | 
 4 | load("cl.clean.rda")
 5 | load("norm.dat.rda")
 6 | load("samp.dat.rda")
 7 | load("cl.med.rda")
 8 | load("V1.cl.rda")
 9 | 
10 | ###load mapping data and preprocessing
11 | d="/allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/external/mouse_SS_CA1_Zeisel_2015_20170620"
12 | linnarsson.samp.dat = as.data.frame(read_feather(file.path(d, "anno.feather")))
13 | row.names(linnarsson.samp.dat)=linnarsson.samp.dat[["sample_id"]]
14 | linnarsson.counts = as.data.frame(read_feather(file.path(d, "data_t.feather")))
15 | row.names(linnarsson.counts)= linnarsson.counts[,1]
16 | linnarsson.counts= as.matrix(linnarsson.counts[,-1])
17 | 
18 | ###compute overlapping genes present in both dataset, and normalize data on overlapping genes
19 | common.genes = intersect(row.names(linnarsson.counts), row.names(norm.dat))
20 | cells= names(cl)
21 | linnarsson.counts=linnarsson.counts[common.genes,]
22 | linnarsson.dat = t(t(linnarsson.counts)*10^6/colSums(linnarsson.counts))
23 | linnarsson.dat = log2(linnarsson.dat+1)
24 | 
25 | ##Use our clusters as reference, compute markers
26 | select.markers=intersect(V1.markers, row.names(linnarsson.dat))
27 | select.markers = select.markers[rowMaxs(linnarsson.dat[select.markers,])>0]
28 | 
29 | map.result = map_sampling(norm.dat[,V1.cells], droplevels(cl[V1.cells]), linnarsson.dat, markers=select.markers)
30 | 
31 | map.df = map.result$map.df
32 | 
33 | map.df$pred_cluster_label = cl.df[as.character(map.df$pred.cl),"cluster_label"]
34 | map.df$cl = linnarsson.samp.dat[match(row.names(map.df),linnarsson.samp.dat$sample_id),"cluster_label"]
35 | map.df$coarse_cl= linnarsson.samp.dat[match(row.names(map.df),linnarsson.samp.dat$sample_id),"coarse_label"]
36 | map.df$group = linnarsson.samp.dat[match(row.names(map.df),linnarsson.samp.dat$sample_id),"group_label"]
37 | map.df$tissue = linnarsson.samp.dat[match(row.names(map.df),linnarsson.samp.dat$sample_id),"tissue_label"]
38 | 
39 | tmp.cor =  rowMaxs(cor(linnarsson.dat[select.markers,],cl.med[select.markers,]))
40 | tmp.cor[is.na(tmp.cor)] = 0
41 | map.df$cor = tmp.cor
42 | save(map.df, file="map.linnarsson.df.rda")
43 | 
44 | 
45 | ss.map.df = with(map.df,map.df[tissue == "sscortex" & cor > 0.4,])
46 | save(ss.map.df, file="map.linnarsson.ss.df.rda")
47 | 
48 | 
49 | tb = as.data.frame(with(ss.map.df%>%filter(prob > 0.9), table(coarse_cl, pred_cluster_label))) %>% filter(Freq > 1)
50 | 
51 | tb = as.data.frame(with(ss.map.df%>%filter(prob > 0.9), table(cl, pred_cluster_label))) %>% filter(Freq > 1)
52 | 
53 | 


--------------------------------------------------------------------------------
/RNA-seq Analysis/compare_Tolias.R:
--------------------------------------------------------------------------------
 1 | library(iterclust)
 2 | load("norm.dat.rda")
 3 | load("cl.final.rda")
 4 | tmp=load("V1.cl.rda")
 5 | load("cl.med.rda")
 6 | 
 7 | d = "/allen/programs/celltypes/workgroups/rnaseqanalysis/osnat/patchseq_simple_mapping/results/Tolias"
 8 | tmp=load(file.path(d, "quick_map.Tolias.rda"))
 9 | 
10 | shiny.d="/allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/patch_seq/tolias_patchseq_20161102"
11 | Tolias.df = as.data.frame(read_feather(file.path(shiny.d, "anno.feather")))
12 | row.names(Tolias.df) = Tolias.df$sample_id
13 | 
14 | 
15 | Tolias.counts = as.matrix(read.csv(file.path(d,"counts.csv"),row.names=1))
16 | 
17 | genes=row.names(Tolias.counts)
18 | genes[genes=="6330527O06Rik"]="Lamp5"
19 | genes[genes=="A930038C07Rik"]="Ndnf"
20 | genes[genes=="Tmem90b"]="Syndig1"
21 | genes[genes=="Cxcr7"]="Ackr3"
22 | row.names(Tolias.counts)=genes
23 | 
24 | 
25 | common.genes= intersect(genes, row.names(norm.dat))
26 | Tolias.counts <- as.matrix(Tolias.counts[common.genes,])
27 | Tolias.dat =log2(t(t(Tolias.counts)*10^6/colSums(Tolias.counts))+1)
28 | 
29 | 
30 | 
31 | common.markers= intersect(V1.markers, common.genes)
32 | map.result = map_sampling(as.matrix(norm.dat[common.markers,V1.cells]), droplevels(cl[V1.cells]), Tolias.dat, markers = common.markers)
33 | map.df = map.result$map.df
34 | map.freq = map.result$map.freq
35 | 
36 | tmp.cor = rowMaxs(cor(Tolias.dat[common.markers,], cl.med[common.markers, droplevels(cl[V1.cells])]))
37 | tmp.cor[is.na(tmp.cor)]=0
38 | map.df$cor = tmp.cor
39 | 
40 | map.df$cluster_label = as.character(cl.df[as.character(map.df$pred.cl),"cluster_label"])
41 | tmp = is.na(map.df$cluster_label)
42 | map.df$cluster_label[tmp] = map.df$cl[tmp]
43 | map.df$class_label = Tolias.df[row.names(map.df), "class_label"]
44 | 
45 | dend=readRDS("dend.RData")
46 | labels(dend) = row.names(cl.df)[match(labels(dend), cl.df$cluster_label)]
47 | V1.dend = prune_dend(dend, setdiff(labels(dend), levels(V1.cl)))
48 | 
49 | 
50 | summarize_cl <- function(dend, map.freq,conf.th=0.7)
51 |   {
52 |     node.height=setNames(get_nodes_attr(dend, "height"),get_nodes_attr(dend, "label"))
53 |     dend.list = dend_list(dend)
54 |     dend.list = dend.list[sapply(dend.list, length)>1]
55 |     memb = sapply(names(dend.list),function(x){
56 |       rowSums(map.freq[,intersect(labels(dend.list[[x]]),colnames(map.freq)),drop=F])
57 |     })
58 |     memb = cbind(memb,map.freq)
59 |     memb.th= lapply(row.names(memb),function(cell){
60 |       ###Check all the node with confidence > conf.th
61 |       x = memb[cell,]
62 |       mapped.node = colnames(memb)[which(x>conf.th)]
63 |                
64 |       ###mapped nodes not met the minimal gene number/ratio constraints 
65 |       ###Choose the deepest nodes that pass all the criteria. 
66 |       mapped.node=mapped.node[order(node.height[mapped.node])]
67 |       i=mapped.node[1]
68 |       ###Get the markers on every mapped nodes. 
69 |       c(cl=i, score=x[i])
70 |     })
71 |     memb.th = do.call("rbind",memb.th)
72 |     row.names(memb.th) = row.names(memb)
73 |     colnames(memb.th)=c("cl","score")
74 |     memb.df = as.data.frame(memb.th)
75 |     memb.df$resolution.index = 1- (node.height[memb.df$cl]/attr(dend,"height"))
76 |     return(memb.df) 
77 |   }
78 | 
79 |  
80 | map.tree.df = summarize_cl(V1.dend, map.freq/100, 0.7)
81 | map.tree.df$cluster_label = as.character(cl.df[as.character(map.tree.df$cl),"cluster_label"])
82 | tmp = is.na(map.tree.df$cluster_label)
83 | map.tree.df$cluster_label[tmp] = as.character(map.tree.df$cl[tmp])
84 | map.tree.df$class_label = Tolias.df[row.names(map.tree.df), "class_label"]
85 | with(map.tree.df, table(class_label, cl))
86 | 
87 | save(map.tree.df, V1.dend, file="map.tree.df.rda")
88 | 
89 | 


--------------------------------------------------------------------------------
/RNA-seq Analysis/consensus.clust.R:
--------------------------------------------------------------------------------
  1 | library(iterclust)
  2 | library(matrixStats)
  3 | 
  4 | load("norm.dat.rda")
  5 | load("select.cells.rda")
  6 | load("top.cl.rda")
  7 | load("rm.eigen.rda")
  8 | 
  9 | ###Use previous version of clustering for annotation. 
 10 | library(feather)
 11 | anno= read_feather("/allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180312/anno.feather")
 12 | ref.cl = setNames(anno$cluster_id, anno$sample_id)
 13 | ref.cl.df = unique(as.data.frame(anno[,c("cluster_id","cluster_color","cluster_label","class_id","class_label","class_color","subclass_id","subclass_label","subclass_color")]))
 14 | ref.cl.df = ref.cl.df[order(ref.cl.df$cluster_id),]
 15 | row.names(ref.cl.df) = ref.cl.df$cluster_id
 16 | load("../process_22679/select.markers.rda")
 17 | ref.markers= select.markers
 18 | 
 19 | 
 20 | ####Prepare parameters for clustering. 
 21 | all.cells=select.cells
 22 | top.result = list(cl=as.factor(top.cl[select.cells]),markers=NULL)
 23 | de.param = de_param(q1.th=0.5, q.diff.th=0.7, de.score.th=150, min.cells=4)
 24 | 
 25 | 
 26 | 
 27 | ####Computing PCA-based consensus clustering result
 28 | d = "subsample_pca/"
 29 | result.files=file.path(d, dir(d, "result.*.rda"))
 30 | co.result <- collect_co_matrix_sparseM(norm.dat, result.files, all.cells)
 31 | save(co.result, file="pca.co.result.rda")
 32 | co.ratio = co.result[[1]]
 33 |    
 34 | consensus.result = iter_consensus_clust(co.ratio, co.result$cl.list, norm.dat, select.cells=all.cells, de.param = de.param)
 35 | 
 36 | save(consensus.result, file="pca.consensus.result.rda")
 37 | cl = consensus.result$cl
 38 | 
 39 | 
 40 | refine.result = refine_cl(consensus.result$cl, co.ratio=co.ratio, tol.th=0.005, confusion.th=0.8)
 41 | cl = refine.result$cl
 42 | merge.result= merge_cl(norm.dat=norm.dat, cl=cl, rd.dat=t(norm.dat[consensus.result$markers,]), de.param = de.param,return.markers=FALSE)
 43 | compare.result = compare_annotate(merge.result$cl, ref.cl, ref.cl.df)
 44 | ggsave("map.PCA.pdf", compare.result$g)
 45 | cl = compare.result$cl
 46 | cl.df = compare.result$cl.df
 47 | cl = setNames(factor(as.character(cl),levels=row.names(cl.df)), names(cl))
 48 | save(cl, cl.df, file="pca.cl.final.rda")
 49 | 
 50 |  
 51 | ####Computing WGCNA-based consensus clustering result
 52 | d = "subsample_WGCNA/"
 53 | result.files=file.path(d, dir(d, "result.*.rda"))
 54 | co.result <- iterclust::collect_co_matrix_sparseM(norm.dat, result.files, all.cells)
 55 | save(co.result, file="WGCNA.co.result.rda")
 56 | co.ratio = co.result[[1]]
 57 | 
 58 | consensus.result = iter_consensus_clust(co.ratio, co.result$cl.list, norm.dat, select.cells=all.cells, de.param = de.param)
 59 | save(consensus.result, file="WGCNA.consensus.result.rda")
 60 | cl = consensus.result$cl
 61 | refine.result = refine_cl(consensus.result$cl, co.ratio=co.ratio, tol.th=0.005, confusion.th=0.8)
 62 | cl = refine.result$cl
 63 | 
 64 | merge.result= merge_cl(norm.dat=norm.dat, cl=cl, rd.dat=t(norm.dat[consensus.result$markers,]), de.param = de.param,return.markers=FALSE,verbose=1)
 65 | compare.result = compare_annotate(merge.result$cl, ref.cl, ref.cl.df)
 66 | ggsave("map.WGCNA.pdf", compare.result$g,height=10, width=10)
 67 | cl = compare.result$cl
 68 | cl.df = compare.result$cl.df
 69 | save(cl, cl.df, file="WGCNA.cl.final.rda")
 70 | 
 71 | 
 72 | WGCNA.cl = cl
 73 | WGCNA.cl.df = cl.df
 74 | 
 75 | load("pca.cl.final.rda")
 76 | write.csv(cl.df, "PCA.cl.df.csv")
 77 | PCA.cl = cl
 78 | PCA.cl.df = cl.df
 79 | 
 80 | compare.result = compare_annotate(WGCNA.cl, PCA.cl, PCA.cl.df)
 81 | ggsave("WGCNA.PCA.compare.pdf", compare.result$g, height=10, width=10)
 82 | 
 83 | 
 84 | ####Reach consensus clusters
 85 | load("pca.co.result.rda")
 86 | PCA.co.result = co.result
 87 | load("WGCNA.co.result.rda")
 88 | WGCNA.co.result = co.result
 89 | 
 90 | 
 91 | co.ratio.min = pmin(PCA.co.result$co.ratio, WGCNA.co.result$co.ratio)
 92 | save(co.ratio.min, file="co.ratio.min.rda")
 93 | 
 94 | cl.list = c(PCA.co.result$cl.list, WGCNA.co.result$cl.list)
 95 | cl.mat = cbind(PCA.co.result$cl.mat, WGCNA.co.result$cl.mat)
 96 | co.ratio.comb = crossprod(t(cl.mat))
 97 | co.ratio.comb@x = co.ratio.comb@x/length(cl.list)
 98 | save(co.ratio.comb, file="co.ratio.comb.rda")
 99 | 
100 | de.param = de_param(q1.th=0.5, q.diff.th=0.7, de.score.th=150, min.cells=4)
101 | consensus.result = iter_consensus_clust(co.ratio.min, cl.list, norm.dat, select.cells=all.cells, de.param = de.param)
102 | 
103 | refine.result = refine_cl(consensus.result$cl, co.ratio=co.ratio.min, tol.th=0.005, confusion.th=0.6)
104 | merge.result= merge_cl(norm.dat=norm.dat, cl=refine.result$cl, rd.dat=t(norm.dat[consensus.result$markers,]), de.param = de.param,return.markers=FALSE)
105 | save(merge.result, file="merge.result.rda")
106 | 
107 | 


--------------------------------------------------------------------------------
/RNA-seq Analysis/copy_bam.R:
--------------------------------------------------------------------------------
 1 | select.cl = c(121, 90, 85, 57, 42, 31, 18, 6)
 2 | tmp.cl = droplevels(cl[cl %in% as.character(select.cl)])
 3 | 
 4 | d = "select_bam"
 5 | dir.create(d)
 6 | 
 7 | 
 8 | 
 9 | 
10 | 
11 | select.cells = sample_cells(tmp.cl, 10)
12 | bam_dir1 = setNames(file.path("/allen/programs/celltypes/workgroups/rnaseqanalysis/STARforLIMS/Mouse/star_out/", select.cells, paste0(select.cells,"_Aligned.sortedByCoord.out.bam")), select.cells)
13 | tmp1=file.exists(bam_dir1)
14 | 
15 | bam_dir2 = setNames(with(samp.dat[select.cells,], file.path(fpkm_dir,paste0("ar_", ar_id, "_STAR_Aligned.sortedByCoord.out.bam"))), select.cells)
16 | tmp2= file.exists(bam_dir2)
17 | 
18 | bam_dir = bam_dir1
19 | bam_dir[!tmp1] = bam_dir2[!tmp1]
20 | table(file.exists(bam_dir))
21 | 
22 | 
23 | tmp=file.copy(bam_dir, file.path(d, paste0(select.cells,".bam")))
24 | write.csv(cl.df, file="cl.df.csv")
25 | write.csv(cl, file="cl.csv")
26 | 
27 | 


--------------------------------------------------------------------------------
/RNA-seq Analysis/fast_tsne.R:
--------------------------------------------------------------------------------
 1 | fast_tsne <- function(dat, ...)
 2 |   {
 3 |     result <- fftRtsne(dat,fast_tsne_path = "~/src/FIt-SNE/bin/fast_tsne", ...)
 4 |     df = as.data.frame(result)
 5 |     row.names(df) = row.names(dat)
 6 |     colnames(df)=c("Lim1","Lim2")
 7 |     df
 8 |   }
 9 | 
10 | 


--------------------------------------------------------------------------------
/RNA-seq Analysis/intron.exon.ratio.R:
--------------------------------------------------------------------------------
 1 | load(file.path(d, "20180228_RSC-11-142_mouse_star_exon.Rdata"))
 2 | load(file.path(d, "20180228_RSC-11-142_mouse_star_intron.Rdata"))
 3 | load(file.path(d, "20180228_RSC-11-142_mouse_star_samp.dat.Rdata"))
 4 | row.names(samp.dat) = samp.dat[[1]]
 5 | 
 6 | cell.cl = cl
 7 | cell.cl.df = cl.df
 8 | 
 9 | 
10 | load("~/zizhen/projects/Mouse/Nuclei/M1_V1/cl.final.rda")
11 | nuclei.cl = cl
12 | nuclei.cl.df = cl.df
13 | 
14 | L45.cells = names(cell.cl)[cell.cl %in% as.character(c(58:61,130,137))]
15 | L45.nuclei = names(nuclei.cl)[nuclei.cl %in% as.character(c(37:39,42))]
16 | 
17 | gene.cell.intron.ratio <- rowSums(intron[,L45.cells])/rowSums(exon[,L45.cells]+intron[,L45.cells])
18 | 
19 | gene.nuclei.intron.ratio <- rowSums(intron[,L45.nuclei])/rowSums(exon[,L45.nuclei]+intron[,L45.nuclei])
20 | 
21 | nuclei.ratio = gene.nuclei.intron.ratio/gene.cell.intron.ratio
22 | nuclei.ratio=nuclei.ratio[!is.na(nuclei.ratio)]
23 | 
24 | 
25 | exon.cpm = t(t(exon)*10^6/Matrix::colSums(exon))
26 | cell.exon = rowMeans(exon.cpm[,L45.cells])
27 | nuclei.exon = rowMeans(exon.cpm[,L45.nuclei])
28 | cell.nuclei.exon = data.frame(cell.exon, nuclei.exon)
29 | cell.nuclei.exon$cell.nuclei.ratio = cell.nuclei.exon[,1]/cell.nuclei.exon[,2]
30 |   
31 | exon.ratio = rowMeans(exon[,L45.cells])/
32 | 
33 | cell.norm.dat = norm.dat
34 | 
35 | tmp=load("~/zizhen/projects/Mouse/Nuclei/M1_V1/norm.dat.rda")
36 | tmp=load("~/zizhen/projects/Mouse/Nuclei/M1_V1/all.col.rda")
37 | all.col[is.na(all.col)]="black"
38 | nuclei.norm.dat= norm.dat
39 | nuclei.all.col = all.col[,L45.nuclei]
40 | tmp=display_cl(droplevels(nuclei.cl[L45.nuclei]), nuclei.norm.dat[,L45.nuclei],prefix="L45.nuclei.new",col=nuclei.all.col)
41 | 


--------------------------------------------------------------------------------
/RNA-seq Analysis/markers.R:
--------------------------------------------------------------------------------
 1 | Inh.markers <- c("Gad2","Slc32a1","Prox1","Adarb2","Nfix","Nfib","Cacna2d1",
 2 |                                      "Cxcl14","Tnfaip8l3","Cplx3","Lamp5","Cd34","Pax6","Krt73",
 3 |                                      "Scrg1","Egln3","Ndnf","Tmem182","Ntn1","Pde11a","Pdlim5",
 4 |                                      "Lsp1","Slc35d3","Nkx2-1","Serpinf1","Col14a1","Vip","Sncg",
 5 |                                      "Crabp1","Slc10a4","Cldn10","Bhlhe22","Crispld2","Slc17a8",
 6 |                                      "Cyb5r2","Nr1h4","Wnt7b","Prss12","Igfbp6","Calb2","Grpr",
 7 |                                      "Pthlh","Elfn1","Rspo1","Slc18a3","Lmo1","Rspo4","Sostdc1",
 8 |                                      "Chat","Cbln4","Gsx2","Gpc3","Mab21l1","C1ql1","Itih5","Mybpc1",
 9 |                                      "Myl1","Lhx6","Sox6","Sst","Chodl","Calb1","Cbln4","Etv1","Edn1",
10 |                                      "Kl","Il1rapl2","Myh8","Ptprk","Chrna2","Myh13","Ptgdr","Crhr2",
11 |                                      "Hpse","Igsf9","C1ql3","Tacstd2","Th","Col6a1","Nts","Tac1","Pvalb",
12 |                                      "Gabrg1","Il7","Bche","Prdm8","Syt2","Ostn","Pdlim3","C1ql1",
13 |                                      "Gpr149","Vipr2","Meis2","Adamts19","Cpa6","Lgr6")
14 | Inh.comb.markers <- c("Reln","Cnr1","Nr2f2","Cck","Npy","Crh","Tac2")
15 |                                      
16 |                  
17 | 
18 | Ex.markers <- unique(c("Slc17a7","Rtn4rl2","Slc30a3","Cux2","Stard8","Otof","Rrad","Penk","Agmat",
19 |                                 "Emx2","S100a3","Macc1","Rorb","Scnn1a","Whrn","Endou","Col26a1",
20 |                                 "Rspo1","Fezf2","Hsd11b1","Batf3","Arhgap25","Colq","Pld5","Olfr78",
21 |                                 "Tcap","Fgf17","Wfdc18","Wfdc17","Aldh1a7","Tgfb1","Ctsc","Rxfp2",
22 |                                 "Prss35","Rgs12","Osr1","Oprk1","Cd52","Col23a1","Col18a1","Car1",
23 |                                 "Car3","Fam84b","Chrna6","Chrnb3","Fn1","Tac1","Lce3c","Erg",
24 |                                 "Cdc42ep5","Bmp5","Pvalb","Depdc7","Stac","C1ql2","Ptgfr","Slco2a1",
25 |                                 "Pappa2","Dppa1","Npsr1","Htr2c","Hpgd","Nxph3","Sla2","Tshz2",
26 |                                 "Rapgef3","Slc17a8","Trh","Nxph2","Foxp2","Col12a1","Syt6","Col5a1",
27 |                                 "Gpr139","Ly6d","Sla","Cpa6","Ppp1r18","Faim3","Ctxn3","Nxph4",
28 |                                 "Cplx3","Ctgf","Col8a1","Mup5","Ngf","Fam150a","F2r","Serpinb11","Fbxl7","P2ry12",
29 |                                 "Crh","Kynu","Hsd17b2","Mup3","Tlcd1","Lhx5","Trp73","Cpa6","Gkn1","Col18a1","Lce3c","Erg","Bmp5","Stac","C1ql2","Slco2a1","Lrrc9","Trhr","Myzap","Krt80","H60b","Fam150a","Clic5","Kcnj5","Olfr110","Olfr111"))
30 | Ex.comb.markers <- c("Reln","Cdh13","Cpne7","Alcam","Rprm","Marcksl1")
31 |                                 
32 | 
33 | 
34 | Global.markers <- c("Fez1","Phyhipl","Aplp1","Gnao1","Caly","Snap25","Atp1a3","Camk2b",
35 |                                         "Syt1","Gabrg2","Fabp3","Stmn2","Kif5c","Slc32a1","Gad2","Dlx1","Dlx5","Dlx2","Dlx6os1",
36 |                                         "Slc6a1","Sox2","Slc17a7","Nrn1","Neurod2","Sv2b","Satb2","Tbr1","Vsig2","Cmtm5","Kcnj10",
37 |                                         "S100a16","S100a13","S1pr1","Gja1","Gjb6","Aqp4","Lcat","Acsbg1","Olig1","Sox10","Neu4",
38 |                                         "Sapcd2","Gpr17","Plp1","Cldn11","Mag","Mog","Nkx6-2","Enpp6","9630013A20Rik","Brca1",
39 |                                         "Mog","Opalin","Gjb1","Hapln2","Cyba","Ctsh","Ifitm3","Sparc","S100a11","Dcn","Col1a1",
40 |                                         "Pltp","Vtn","Slc6a13","Spp1","Slc13a3","Col15a1","Slc47a1","Tgtp2","Ifi47","Esam",
41 |                                         "Slco1a4","Slc38a5","Cldn5","H2-Q7","Slc38a11","Art3","Ace2","Acta2","Myh11","Pln",
42 |                                         "Gja5","Kcnj8","Atp13a5","Aoc3","Ctss","C1qb","C1qc","C1qa","Cbr2","F13a1","Pf4",
43 |                                         "Mrc1","Siglech","Selplg")
44 | 
45 | 
46 | 
47 | cl.present = get_cl_means(norm.dat > 1, cl.clean)
48 | Ex.markers=Ex.markers[order(apply(cl.present[Ex.markers, row.names(cl.df)[c(53:109,111)]]>0.4 , 1, function(x)which(x)[1]))]
49 | write.csv(Ex.markers, file="markers_plot2.csv")
50 | 
51 | Inh.markers=Inh.markers[order(apply(cl.present[Inh.markers, row.names(cl.df)[c(1:52,110)]]>0.4 , 1, function(x)which(x)[1]))]
52 | write.csv(Inh.markers, file="markers_plot3.csv")
53 | 
54 | 


--------------------------------------------------------------------------------
/RNA-seq Analysis/retrograde.R:
--------------------------------------------------------------------------------
 1 | tmp=with(samp.dat, is.na(Injection_type) | Injection_type == "" )
 2 | samp.dat[tmp,"Injection_type"]="0"
 3 | tb = table(ex.cl, samp.dat[names(ex.cl),"Injection_type"])
 4 | select.cl = row.names(tb)[rowMins(tb) > 20]
 5 | select.cells= names(cl)[cl %in% select.cl]
 6 | 
 7 | de.param = de_param(q1.th=0.5, q.diff.th=0.7, de.score.th=150, min.cells=4)
 8 | ###DEG between V1 and ALM within a mixed Inhibitory types.
 9 | 
10 | retro.cl = setNames(paste0(cl[select.cells], samp.dat[select.cells, "Injection_type"]), select.cells)
11 | 
12 | tmp= levels(droplevels(cl[select.cells]))
13 | retro.pairs = data.frame(null=paste0(tmp,"0"), retro=paste0(tmp,"retrograde"), stringsAsFactors=FALSE)
14 | 
15 | row.names(retro.pairs) = paste(retro.pairs[,1], retro.pairs[,2], sep="_")
16 | 
17 | load("select.genes.rda")
18 | retro.de.genes = de_score_pairs(norm.dat[select.genes,select.cells], cl=retro.cl, pairs=retro.pairs, de.param = de.param)$de.genes
19 | unlist(sapply(retro.de.genes, function(x)x$num))
20 | 
21 | tmp=sapply(retro.de.genes, function(x)x$genes)
22 | tmp[sapply(tmp,length)>0]
23 | tmp=sapply(retro.de.genes, function(x)x$up.genes)
24 | tmp[sapply(tmp,length)>0]
25 | tmp=sapply(retro.de.genes, function(x)x$down.genes)
26 | tmp[sapply(tmp,length)>0]
27 | 
28 | 
29 | retro.cells = intersect(row.names(samp.dat)[samp.dat$Injection_type=="retrograde"], names(cl.clean))
30 | tb=table(droplevels(cl.clean[retro.cells]), samp.dat[retro.cells, "injection_roi"])
31 | 
32 | 
33 |   
34 | tb = tb[rowSums(tb > 20) > 1,]
35 | select.cl= row.names(tb)
36 | de.df=sapply(select.cl, function(x){
37 |   select.roi = colnames(tb)[tb[x, ] > 20]
38 |   tmp.cells= retro.cells[as.character(cl.clean[retro.cells])==x & samp.dat[retro.cells,"injection_roi"] %in% select.roi]
39 |   select.roi = setNames(samp.dat[tmp.cells, "injection_roi"], tmp.cells)
40 |   de.df = DE_genes_pw(norm.dat, select.roi, counts=as.matrix(exon[select.genes, names(select.roi)], use.voom=TRUE))
41 |   de.df=sapply(de.df, function(x){
42 |     x=x[order(x$padj,-abs(x$lfc)),]
43 |     x=x[x$padj < 0.05 & abs(x$lfc)>1,]
44 |   },simplify=F)
45 | },simplify=F)
46 | save(de.df, file="retro.roi.de.df.rda")
47 | tmp=sapply(de.df, function(x)sapply(x, nrow))
48 | 
49 | cl.low = droplevels(cl[!names(cl) %in% names(cl.clean)])
50 | save(cl.low, file="cl.low.rda")
51 | 
52 | retro.cells = intersect(names(cl.clean), row.names(samp.dat)[samp.dat$Injection_type=="retrograde"]),
53 | non.retro.cells = setdiff(names(cl.clean),retro.cells)
54 |   
55 | clean.cells=split(non.retro.cells, paste(samp.dat[non.retro.cells,"Region"],cl.df[as.character(cl.clean[non.retro.cells]),"class_label"],sep="_"))
56 | clean.cells=split(non.retro.cells, paste(samp.dat[non.retro.cells,"Region"],samp.dat[non.retro.cells, "full_genotype"]))
57 | 
58 | cells.list = list(control=control.cells, fail.qc = fail.qc.cells, doublet = rm.cells, cl.low = names(cl.low), retro = retro.cells, non.retro.cells = non.retro.cells)
59 | save(cells.list, file="cells.list.rda")
60 | 
61 | 
62 | old.cl = as.data.frame(read_feather("/allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20170913/anno.feather"))
63 | row.names(old.cl) = old.cl[,1]
64 | old.cl[[1]]=NULL
65 | old.cl= old.cl[,c("class_label","cluster_label","region_label","inj_type_label")]
66 | 
67 | absent.cells= setdiff(row.names(old.cl), names(cl))
68 | tb= as.data.frame(table(old.cl[absent.cells, "cluster_label"]))
69 | tb2 = as.data.frame(table(old.cl[,"cluster_label"]))
70 | 
71 | absent.df = inner_join(tb, tb2, by="Var1")
72 | colnames(absent.df)=c("cluster","absent","total")
73 | write.csv(absent.df, "absent.cells.csv")
74 | 
75 | 
76 | 
77 | 
78 |   
79 | 
80 | 
81 | 
82 | 
83 | 
84 | 
85 | 


--------------------------------------------------------------------------------
/RNA-seq Analysis/saturation_test.R:
--------------------------------------------------------------------------------
 1 | library(iterclust)
 2 | 
 3 | subsample.de.genes=list()
 4 | load("norm.dat.rda")
 5 | load("cl.final.rda")
 6 | load("select.genes.rda")
 7 | load("cl.med.rda")
 8 | load("cl.clean.rda")
 9 | 
10 | dir.create("subsample_merge")
11 | load("de.param.rda")
12 | n.bin = 10
13 | bins = sample(1:n.bin, length(cl.clean),replace=TRUE)
14 | for(i in 1:n.bin){
15 |   ####subsample cells and recompute de.genes 
16 |   select.cells = names(cl.clean)[bins<= i]
17 |   print(length(select.cells))
18 |   rd.dat = t(as.matrix(norm.dat[select.genes, select.cells]))
19 |   merge.result= merge_cl(norm.dat, cl.clean[select.cells], rd.dat=rd.dat, de.param=de.param)
20 |   save(merge.result, file=file.path("subsample_merge/",paste0(i,".result.rda")))
21 | }
22 | 
23 | 
24 | 
25 | merge.files <- dir("subsample_merge/",pattern="result.rda")
26 | merge.stats <- do.call("rbind",sapply(merge.files, function(f){
27 |   load(file.path("subsample_merge/",f))
28 |   tmp.cl = merge.result$cl
29 |   cl.size = table(cl[names(tmp.cl)])
30 |   tb=table(tmp.cl, cl.clean[names(tmp.cl)])
31 |   tb.df = as.data.frame(tb)
32 |   tb.df = tb.df[tb.df$Freq>0,]
33 |   tmp=split(tb.df[,2],tb.df[,1])
34 |   merge.size=sapply(tmp, length)
35 |   merge.cl =names(merge.size)[merge.size>1]
36 |   if(length(merge.cl)>0){
37 |     tb.df = tb.df[tb.df[,1]%in% merge.cl,]
38 |     tb.df =tb.df[order(tb.df[,1]),]
39 |     #tb.df[,"mergeCl"] = cl.df[as.character(tb.df[,1]),"cluster_label"]
40 |     #tb.df[,"orgCl"] = cl.df[as.character(tb.df[,2]),"cluster_label"]
41 |     colnames(tb.df) = c("mergeCl", "orgCl")
42 |     absent.cl = colnames(tb)[colSums(tb)==0]
43 |     merge.df = data.frame(org.cl = as.character(tb.df$orgCl), merge.cl = as.character(tb.df$mergeCl),stringsAsFactors=FALSE)
44 |     merge.df = rbind(merge.df, data.frame(org.cl = as.character(absent.cl), merge.cl=rep("absent", length(absent.cl)),stringsAsFactors=FALSE))
45 | 
46 |     nochange.cl = setdiff(colnames(tb), c(absent.cl, as.character(tb.df[,2])))
47 |     merge.df = rbind(merge.df, data.frame(org.cl = as.character(nochange.cl,"cluster_label"), merge.cl=rep("nochange", length(nochange.cl)),stringsAsFactors=FALSE))
48 |     merge.df$sample_size = length(tmp.cl)
49 |     merge.df$cl_size = cl.size[as.character(merge.df$org.cl)]
50 |     return(merge.df)
51 |   }
52 |   return(NULL)
53 | },simplify=F))
54 | 
55 | 
56 | merge.stats$org.cl = factor(as.character(merge.stats$org.cl), labels(dend))
57 | merge.stats$merge.cl = droplevels(factor(merge.stats$merge.cl, levels=c("absent","nochange",labels(dend))))
58 | 
59 | merge.stats$org.cl_label = merge.stats$org.cl
60 | levels(merge.stats$org.cl_label) = cl.df[levels(merge.stats$org.cl_label),"cluster_label"]
61 | merge.stats$merge.cl_label = merge.stats$merge.cl
62 | levels(merge.stats$merge.cl_label) = c("absent", "nochange", as.character(cl.df[levels(merge.stats$merge.cl_label)[-(1:2)],"cluster_label"]))
63 | 
64 | merge.stats$sample_size = factor(merge.stats$sample_size)
65 | saturation.df = merge.stats
66 | save(saturation.df, file="saturation.df.rda")
67 | 
68 | cl.color = setNames(cl.df$cluster_color, cl.df$cluster_label)
69 | cl.color = c("black","white",as.character(cl.color[levels(merge.stats$merge.cl)[-(1:2)]]))
70 | 
71 | g=ggplot(saturation.df, aes(org.cl, sample_size, fill = merge.cl)) + geom_raster() + scale_fill_manual(values=cl.color)
72 | g = g + theme(axis.text.x = element_text(angle=90, hjust=1))
73 | pdf("saturation.pdf",height=6,width=14)
74 | g
75 | dev.off()
76 | 
77 | 
78 | 


--------------------------------------------------------------------------------
/RNA-seq Analysis/subsample.reads.R:
--------------------------------------------------------------------------------
 1 | load("exon.rda")
 2 | source("~/zizhen/My_R/subsample.R")
 3 | library(parallel)
 4 | frac= seq(0.1, 0.9, by=0.1)
 5 | tmp = mclapply(frac, function(f){
 6 |   print(f)
 7 |   dat = subsample_frac(exon, f)
 8 |   colSums(dat > 0)
 9 | },mc.cores=9)
10 | tmp[[10]] = colSums(exon>0)
11 | tmp.df = do.call("cbind",tmp)
12 | colnames(tmp.df) = seq(0.1,1,by=0.1)
13 | subsample.reads.df <- as.data.frame(as.table(tmp.df))
14 | colnames(subsample.reads.df)= c("sample_id","Frac","gene.counts")
15 | 
16 | subsample.reads.df$class = cl.df[as.character(cl[as.character(subsample.reads.df$sample_id)]),"class_label"]
17 | subsample.reads.df[subsample.reads.df$class=="Endothelial", "class"] = "Non-Neuronal"
18 | 
19 | subsample.reads.df= droplevels(subsample.reads.df)
20 | subsample.reads.df$Frac = factor(subsample.reads.df$Frac)
21 | 
22 | g=ggplot(subsample.reads.df,
23 |   aes(x = Frac,
24 |       y = gene.counts)) +
25 |   geom_violin(aes(fill = class),size = 0.1) +
26 |   stat_summary(aes(x = Frac,
27 |                    y = gene.counts),
28 |                fun.ymin = function(z) { quantile(z,0.25) },
29 |                fun.ymax = function(z) { quantile(z,0.75) },
30 |                fun.y = median,
31 |                color = "black",
32 |                size = 0.1) +
33 |   facet_wrap(~ class, nrow=3) + 
34 |   scale_fill_manual(values=c("dodgerblue", "skyblue","gray80")) +
35 |   scale_y_continuous(limits = c(0, 16100), breaks = c(0, 4000, 8000, 12000, 16000)) +
36 |   theme_bw(base_size = 5) +
37 |   theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.3),
38 |         panel.grid.major.x = element_blank(),
39 |         panel.grid.minor.y = element_blank(),
40 |         panel.border = element_blank(),
41 |         legend.position="none") +
42 |   xlab("Sampling rate") +
43 |   ylab("Gene counts")
44 | 
45 | ggsave("subsample.genes_detected.pdf", g, width=3, height=2.5, useDingbats=F)
46 | save(subsample.reads.df, file="subsample.reads.df.rda")
47 | 


--------------------------------------------------------------------------------
/RNA-seq Analysis/tune_parameter.R:
--------------------------------------------------------------------------------
  1 | library(iterclust)
  2 | load("cl.final.rda")
  3 | load("norm.dat.rda")
  4 | load("top.cl.rda")
  5 | load("rm.eigen.rda")
  6 | ref.cl = cl
  7 | ref.cl.df = cl.df
  8 | 
  9 | 
 10 | 
 11 | select.cells=names(cl)[cl  %in% row.names(cl.df)[cl.df$subclass_label=="Sst"]]
 12 | save(select.cells, file="Sst.select.cells.rda")
 13 | all.cells=select.cells
 14 | 
 15 | top.result = list(cl=as.factor(top.cl[select.cells]),markers=NULL)
 16 | de.param = de_param(q1.th=0.5, q.diff.th=0.7, min.cells=4)
 17 | 
 18 | library(parallel)
 19 | nodes <- makeCluster(12, type="FORK")
 20 | clusterExport(nodes,c("norm.dat", "all.cells","rm.eigen","top.result"))
 21 | 
 22 | for(de.score.th in c(80,300)){
 23 |   de.param$de.score.th = de.score.th 
 24 |   clusterExport(nodes,c("de.param"))
 25 |   d = paste0("subsample_pca_",de.score.th)
 26 |   dir.create(d)
 27 |   clusterExport(nodes,c("d"))
 28 |   tmp= parSapply(nodes, 1:100, function(i){
 29 |     library(iterclust)
 30 |     prefix = paste("iter",i,sep=".")
 31 |     outfile= file.path(d, paste0("result.",i,".rda"))
 32 |       if(file.exists(outfile)){
 33 |       return(NULL)
 34 |     }
 35 |     select.cells=sample(all.cells, round(length(all.cells)*0.8))
 36 |     save(select.cells, file=file.path(d, paste0("cells.",i,".rda")))
 37 |     result <- iter_clust(norm.dat=norm.dat, select.cells=select.cells,prefix=prefix, split.size = 10, de.param = de.param, dim.method="pca",result= top.result,rm.eigen=rm.eigen, rm.th=0.7)
 38 |     save(result, file=outfile)
 39 |   })
 40 |   result.files=file.path(d, dir(d, "result.*.rda"))
 41 |   PCA.co.result <- collect_co_matrix_sparseM(norm.dat, result.files, all.cells)
 42 |   
 43 |   d = paste0("subsample_WGCNA_", de.score.th)
 44 |   dir.create(d)
 45 |   clusterExport(nodes,c("d"))
 46 |   tmp= parSapply(nodes, 1:100, function(i){
 47 |     library(iterclust)
 48 |     prefix = paste("iter",i,sep=".")
 49 |     outfile= file.path(d, paste0("result.",i,".rda"))
 50 |     if(file.exists(outfile)){
 51 |       return(NULL)
 52 |     }
 53 |     select.cells=sample(all.cells, round(length(all.cells)*0.8))
 54 |     save(select.cells, file=file.path(d, paste0("cells.",i,".rda")))
 55 |     result <- iter_clust(norm.dat=norm.dat, select.cells=select.cells,prefix=prefix, split.size = 10, de.param = de.param, dim.method="WGCNA",result= top.result,rm.eigen=rm.eigen, rm.th=0.7)
 56 |     save(result, file=outfile)
 57 |   })
 58 |   result.files=file.path(d, dir(d, "result.*.rda"))
 59 |   WGCNA.co.result <- collect_co_matrix_sparseM(norm.dat, result.files, all.cells)
 60 | 
 61 |   cl.list = c(PCA.co.result$cl.list, WGCNA.co.result$cl.list)
 62 |   co.ratio.min = pmin(PCA.co.result$co.ratio, WGCNA.co.result$co.ratio)
 63 |   cl.mat = cbind(PCA.co.result$cl.mat, WGCNA.co.result$cl.mat)
 64 |   
 65 |   consensus.result = iter_consensus_clust(co.ratio.min, cl.list, norm.dat, select.cells=all.cells, de.param = de.param)
 66 | 
 67 |   refine.result = refine_cl(consensus.result$cl, co.ratio=co.ratio.min, tol.th=0.005, confusion.th=0.8)
 68 |   merge.result= merge_cl(norm.dat=norm.dat, cl=refine.result$cl, rd.dat=t(norm.dat[consensus.result$markers,]), de.param = de.param,return.markers=FALSE)
 69 |   compare.result = compare_annotate(merge.result$cl, ref.cl, ref.cl.df)
 70 |   cl = compare.result$cl
 71 |   cl.df = compare.result$cl.df
 72 |   cl = setNames(factor(as.character(cl),levels=row.names(cl.df)), names(cl))
 73 |   save(cl, cl.df, file=paste0("Sst.cl.", de.score.th, ".rda"))
 74 | }
 75 | 
 76 | ref.cl = droplevels(ref.cl[all.cells])
 77 | ref.cl.df = ref.cl.df[levels(ref.cl),]
 78 | 
 79 | 
 80 | 
 81 | 
 82 | source("~/zizhen/My_R/map_river_plot.R")
 83 | source("~/zizhen/My_R/sankey_functions.R")
 84 | map.df = ref.cl.df[as.character(ref.cl),c(2,3,5:11)]
 85 | row.names(map.df) = names(ref.cl)
 86 | colnames(map.df) = paste0("map_",colnames(map.df))
 87 | 
 88 | load("Sst.cl.80.rda")
 89 | cl.df$cluster_id = as.integer(row.names(cl.df))
 90 | compare.result = compare_annotate(cl, ref.cl, ref.cl.df)
 91 | 
 92 | ggsave("Sst.cl.80.map.pdf", compare.result$g)
 93 | map.80.df = cbind(map.df, cl.df[as.character(cl[row.names(map.80.df)]),])
 94 | save(map.80.df, file="map.80.df.rda")
 95 | 
 96 | 
 97 | g80 <- river_plot(map.80.df, min.cells=4, min.frac=0.15)
 98 | ggsave("Sst.80.river.pdf", g80)
 99 | 
100 | load("Sst.cl.300.rda")
101 | map.300.df = cbind(map.df, cl.df[as.character(cl[row.names(map.80.df)]),c(3:4,6:12)])
102 | 
103 | 
104 | g300 <- river_plot(map.300.df, min.cells=4, min.frac=0.1)
105 | ggsave("Sst.300.river.pdf", g300)
106 | save(map.300.df, file="map.300.df.rda")
107 | 
108 | 
109 | source("~/zizhen/My_R/hicat/R/cl.transition.R")
110 | for(de.score.th in c(80,300)){
111 |   load(paste0("Sst.cl.", de.score.th, ".rda"))
112 |   select.markers= select_markers(norm.dat, cl, n.markers=50, de.param = de.param)$markers
113 |   print(de.score.th)
114 |   print(length(cl))
115 |   print(length(select.markers))
116 |   cell.cl.map.df = get_core_transition(norm.dat, cl, select.markers, n.bin=5, n.iter=100, mc.cores=10)
117 |   save(cell.cl.map.df, file=paste0("Sst.cell.cl.map.", de.score.th, ".rda"))
118 | }
119 | 
120 |   
121 | 


--------------------------------------------------------------------------------
/tasic2018analysis.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 


--------------------------------------------------------------------------------