├── .gitignore
├── LICENSE
├── MTG_celltypes.Rproj
├── R
    ├── Code_0_prepareComparisonDataSets.Rmd
    ├── Code_1_buildDendrogram.r
    ├── Code_MatchHumanMouse.r
    ├── Code_QCplots.R
    ├── Code_correlationWithOtherDatasets.r
    ├── Code_metaNeighbor_human.r
    ├── Code_metaNeighbor_mouse.r
    ├── Code_neurosurgical_vs_postmortem.r
    ├── Code_quantifyFISH_L23.R
    ├── Code_violin_groupHeatmaps_jitterPlots.r
    ├── ED_Fig1_plot_facs_metadata.Rmd
    ├── ED_Fig2_tissue_de_genes.R
    ├── Fig1c.R
    ├── Support_2016-11-03-runMetaNeighbor.R
    ├── Support_code0_functions.R
    ├── Support_extraFunctions.r
    ├── Support_heatmap_functions.R
    └── Support_violin_functions.R
├── README.md
└── data
    ├── Boldog2018_clusters.csv
    ├── FinalHumanMTGclusterAnnotation_update.csv
    ├── Habib2017_clusters.csv
    ├── Lake2018_clusters.csv
    ├── Paul2017_Table S3.xlsx
    ├── Paul2017_Table S4.xlsx
    ├── clusterInfoMTG.RData
    ├── clusterInfoMTG.rda
    ├── human_mouse_homology.csv
    ├── mito_genes.txt
    ├── mouseClusters.txt
    ├── mtg_cluster_separability.csv
    ├── mtg_facs_metadata.csv
    ├── sex_genes.txt
    ├── smFISH_Human_L2-L3_2018_04_output.csv
    ├── smFISH_Human_L2-L3_Replicate2_output.csv
    └── smFISH_Human_L2-L3_Replicate3_output.csv


/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | .Ruserdata
 5 | .DS_Store
 6 | output/
 7 | *.html
 8 | MTG/
 9 | mouse/
10 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Allen Institute Software License – This software license is the 2-clause BSD 
 2 | license plus a third clause that prohibits redistribution and use for 
 3 | commercial purposes without further permission. 
 4 | 
 5 | Copyright © 2019. Allen Institute.  All rights reserved.
 6 | 
 7 | Redistribution and use in source and binary forms, with or without 
 8 | modification, are permitted provided that the following conditions are met:
 9 | 
10 | 1. Redistributions of source code must retain the above copyright notice, this 
11 | list of conditions and the following disclaimer.
12 | 
13 | 2. Redistributions in binary form must reproduce the above copyright notice, 
14 | this list of conditions and the following disclaimer in the documentation 
15 | and/or other materials provided with the distribution.
16 | 
17 | 3. Redistributions and use for commercial purposes are not permitted without 
18 | the Allen Institute’s written permission. For purposes of this license, 
19 | commercial purposes are the incorporation of the Allen Institute's software 
20 | into anything for which you will charge fees or other compensation or use of 
21 | the software to perform a commercial service for a third party. Contact 
22 | terms@alleninstitute.org for commercial licensing opportunities.
23 | 
24 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
25 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
26 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
27 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 
28 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
29 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
30 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
31 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
32 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
33 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 | 


--------------------------------------------------------------------------------
/MTG_celltypes.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 


--------------------------------------------------------------------------------
/R/Code_0_prepareComparisonDataSets.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Data input from website"
  3 | author: "Jeremy Miller"
  4 | date: "March 8, 2019"
  5 | output: html_notebook
  6 | ---
  7 | 
  8 | ```{r setup, include=FALSE}
  9 | knitr::opts_chunk$set(echo = TRUE)
 10 | ```
 11 | 
 12 | # Overview
 13 | 
 14 | This script reads converts data downloaded from the Allen Institute Cell Types Database (http://celltypes.brain-map.org/rnaseq) into a format compatible for these scripts.
 15 | 
 16 | ### Prior to running any code, download the data:
 17 |   
 18 | 1. If needed, set the working directory first: e.g., `setwd("C:/Users/yourname/Desktop/analysis/")`  
 19 | 2. Create a subfolder in your current working directory called `website_data`.  **This should be the same folder where you download everything in the `data` directory from `MTG_celltypes`.**
 20 | 3. Download and unzip the following files in `data`.  
 21 |     a. Human MTG:  http://celltypes.brain-map.org/api/v2/well_known_file_download/694416044  
 22 |     b. Mouse VISp: http://celltypes.brain-map.org/api/v2/well_known_file_download/694413985  
 23 |     c. Mouse ALM:  http://celltypes.brain-map.org/api/v2/well_known_file_download/694413179 
 24 | _NOTE: These links are accurate as of April 2019.  If these data are moved in later releases, this code document will be updated accordingly - please post an Issue if needed.  Similarly, note that the dates below may need to be updated._  
 25 | 
 26 | 
 27 | ```{r setwd}
 28 | homeFolder <- "../"  # UPDATE WITH CORRECT PATH
 29 | knitr::opts_knit$set(root.dir = homeFolder)
 30 | #setwd(homeFolder) # UPDATE WITH CORRECT PATH
 31 | ```
 32 | 
 33 | 
 34 | ### Load the relevant libraries
 35 | 
 36 | ```{r load libraries, warnings=FALSE}
 37 | source(paste0(homeFolder,"R/Support_code0_functions.R"))
 38 | suppressPackageStartupMessages({
 39 |   library(feather)
 40 |   library(matrixStats)
 41 |   library(dplyr)
 42 |   library(edgeR)
 43 |   library(data.table)
 44 | })
 45 | options(stringsAsFactors=FALSE)
 46 | ```
 47 |   
 48 |   
 49 | ## Process the human MTG data
 50 |   
 51 | First we need to read the data into R. 
 52 |   
 53 | ```{r read in MTG data}
 54 | tmp      <- fread(paste0(homeFolder,"website_data/human_MTG_2018-06-14_exon-matrix.csv"),header = T, sep = ',',verbose=FALSE)
 55 | exons    <- as.matrix(tmp[,2:dim(tmp)[2]])
 56 | rownames(exons) <- as.character(as.matrix(tmp[,1]))
 57 | tmp      <- fread(paste0(homeFolder,"website_data/human_MTG_2018-06-14_intron-matrix.csv"),header = T, sep = ',',verbose=FALSE)
 58 | introns  <- as.matrix(tmp[,2:dim(tmp)[2]])
 59 | rownames(introns) <- as.character(as.matrix(tmp[,1]))
 60 | geneInfo <- read.csv(paste0(homeFolder,"website_data/human_MTG_2018-06-14_genes-rows.csv"),row.names=1)
 61 | sampInfo <- read.csv(paste0(homeFolder,"website_data/human_MTG_2018-06-14_samples-columns.csv"),row.names=1)
 62 | ```
 63 |   
 64 | Second, convert the meta-data files into formats consistent with the rest of the analysis.  Note that the MTG cluster colors (and other info) which is stored as a data file in `VENcelltypes`.  
 65 |   
 66 | ```{r format MTG metadata}
 67 | # Omit cells with no class
 68 | kp <- sampInfo$cluster!="no class"
 69 | 
 70 | # Format the cluster info
 71 | anno <- auto_annotate(sampInfo[kp,])
 72 | anno$sample_id <- anno$sample_name
 73 | 
 74 | # Update the correct cluster colors and ids
 75 | load(paste0(homeFolder,"data/clusterInfoMTG.RData"))        #### May need to adjust location
 76 | anno$cluster_color <- clusterInfoMTG$cluster_color[match(anno$cluster_label,clusterInfoMTG$cluster_label)]
 77 | anno$cluster_id    <- clusterInfoMTG$cluster_id[match(anno$cluster_label,clusterInfoMTG$cluster_label)]
 78 | ```
 79 |   
 80 |   
 81 | Next, convert the data into CPM(exons+introns) and format appropriately.    
 82 |   
 83 | ```{r format MTG data}
 84 | ## Calculate CPM
 85 | CPM <- cpm(introns[, kp] + exons[, kp])
 86 | rownames(CPM) <- rownames(geneInfo)
 87 | colnames(CPM) <- anno$sample_id
 88 | 
 89 | ## Format appropriately
 90 | expr.data <- transpose(as.data.frame(CPM))
 91 | colnames(expr.data) <- rownames(CPM)
 92 | expr.data$sample_id <- anno$sample_id
 93 | ```
 94 |   
 95 | Finally, output the results to feather files in the `MTG` directory.  
 96 |   
 97 | ```{r output MTG data}
 98 | # Create MTG directory
 99 | dir.create(paste0(homeFolder,"MTG"))
100 | 
101 | # Write annotation file
102 | write_feather(anno,paste0(homeFolder,"MTG/anno.feather"))
103 | 
104 | # Write data file
105 | write_feather(expr.data,paste0(homeFolder,"MTG/data.feather"))
106 | ```
107 |   
108 | ## Process the mouse data
109 |   
110 | We are going to read in the vISp and ALM data separately, and then save them to a single folder location since they were combined for the mouse analysis.  
111 |   
112 | First we need to read the VISp data into R. *This step is slow.*  
113 |   
114 | ```{r read in VISp data}
115 | tmp       <- fread(paste0(homeFolder,"website_data/mouse_VISp_2018-06-14_exon-matrix.csv"),header = T, sep = ',',verbose=FALSE)
116 | exonsV    <- as.matrix(tmp[,2:dim(tmp)[2]])
117 | rownames(exonsV) <- as.character(as.matrix(tmp[,1]))
118 | tmp       <- fread(paste0(homeFolder,"website_data/mouse_VISp_2018-06-14_intron-matrix.csv"),header = T, sep = ',',verbose=FALSE)
119 | intronsV  <- as.matrix(tmp[,2:dim(tmp)[2]])
120 | rownames(intronsV) <- as.character(as.matrix(tmp[,1]))
121 | geneInfoV <- read.csv(paste0(homeFolder,"website_data/mouse_VISp_2018-06-14_genes-rows.csv"),row.names=1)
122 | sampInfoV <- read.csv(paste0(homeFolder,"website_data/mouse_VISp_2018-06-14_samples-columns.csv"),row.names=1)
123 | ```
124 |   
125 | Second, we need to read the ALM data into R.  *This step is slow.*  
126 |   
127 | ```{r read in ALM data}
128 | tmp       <- fread(paste0(homeFolder,"website_data/mouse_ALM_2018-06-14_exon-matrix.csv"),header = T, sep = ',',verbose=FALSE)
129 | exonsA    <- as.matrix(tmp[,2:dim(tmp)[2]])
130 | rownames(exonsA) <- as.character(as.matrix(tmp[,1]))
131 | tmp       <- fread(paste0(homeFolder,"website_data/mouse_ALM_2018-06-14_intron-matrix.csv"),header = T, sep = ',',verbose=FALSE)
132 | intronsA  <- as.matrix(tmp[,2:dim(tmp)[2]])
133 | rownames(intronsA) <- as.character(as.matrix(tmp[,1]))
134 | geneInfoA <- read.csv(paste0(homeFolder,"website_data/mouse_ALM_2018-06-14_genes-rows.csv"),row.names=1)
135 | sampInfoA <- read.csv(paste0(homeFolder,"website_data/mouse_ALM_2018-06-14_samples-columns.csv"),row.names=1)
136 | ```
137 |   
138 |   
139 | Third, we need to merge the ALM and VISp data.  
140 |   
141 | ```{r merge ALM and VISp}
142 | exons    <- cbind(exonsV,exonsA)
143 | introns  <- cbind(intronsV,intronsA)
144 | geneInfo <- geneInfoA
145 | sampInfo <- rbind(sampInfoV,sampInfoA)
146 | ```
147 |   
148 |   
149 | Fourth, convert the meta-data files into formats consistent with the rest of the analysis.  
150 |   
151 | ```{r format VISp metadata}
152 | sampInfo[is.na(sampInfo)]=0
153 | anno <- auto_annotate(sampInfo)
154 | anno$sample_id <- anno$sample_name
155 | ```
156 |   
157 | Next, convert the data into CPM(exons+introns) and format appropriately.  
158 |   
159 | ```{r format VISp data}
160 | ## Calculate CPM
161 | CPM <- cpm(introns+exons)
162 | rownames(CPM) <- rownames(geneInfo)
163 | colnames(CPM) <- anno$sample_id
164 | 
165 | ## Format appropriately
166 | expr.data <- transpose(as.data.frame(CPM))
167 | colnames(expr.data) <- rownames(CPM)
168 | expr.data$sample_id <- anno$sample_id
169 | ```
170 |   
171 | Finally, output the results to feather files in the `mouse` directory.  
172 |   
173 | ```{r output VISp data}
174 | # Create mouse directory
175 | dir.create(paste0(homeFolder, "mouse"))
176 | 
177 | # Write annotation file
178 | write_feather(anno,paste0(homeFolder,"mouse/anno.feather"))
179 | 
180 | # Write data file
181 | write_feather(expr.data,paste0(homeFolder,"mouse/data.feather"))
182 | ```
183 |   
184 |   
185 | Output session information.  
186 |   
187 | ```{r sessionInfo}
188 | sessionInfo()
189 | ```
190 | 


--------------------------------------------------------------------------------
/R/Code_1_buildDendrogram.r:
--------------------------------------------------------------------------------
  1 | # This code builds the dendrogram 
  2 | 
  3 | # FIRST NEED TO SET THE WORKING DIRECTORY TO THE MAIN FOLDER
  4 | # This is the folder where "Start.RData" is located
  5 | 
  6 | #-----------------------------------------------------------------------------------------------
  7 | print("Read in the data and load relevant libraries")
  8 | outputFolder  = "output/"
  9 | scriptsFolder = "R/"
 10 | inputFolder   = "data/"
 11 | dataFolder    = "MTG/"
 12 | 
 13 | # Load these libraries
 14 | library(beeswarm)
 15 | library(WGCNA);
 16 | library(edgeR)   
 17 | library(feather)
 18 | library(dendextend)
 19 | library(monocle)
 20 | library(ggplot2)
 21 | library(dplyr)
 22 | options(stringsAsFactors=FALSE)
 23 | 
 24 | # Read in the extra scripts
 25 | source(paste0(scriptsFolder,"Support_extraFunctions.r"))
 26 | source(paste0(scriptsFolder,"Support_violin_functions.R"))
 27 | source(paste0(scriptsFolder,"Support_heatmap_functions.R"))
 28 | 
 29 | # Create output folder
 30 | if(! dir.exists(outputFolder)) dir.create(outputFolder)
 31 | 
 32 | # Read in the data
 33 | Expr.dat <- feather(paste(dataFolder,"data.feather",sep=""))   # CPM
 34 | anno     <- read_feather(paste(dataFolder,"anno.feather",sep="")) 
 35 | exprData <- as.matrix(Expr.dat[,colnames(Expr.dat)[colnames(Expr.dat)!="sample_id"]])
 36 | dend     <- NULL
 37 | 
 38 | 
 39 | #---------------------------------------------------------------------------------------------
 40 | print("Determine top cluster marker genes here for building the tree and displaying in heatmaps and violin plots.")
 41 | 
 42 | # Save cluster info variable and excluded clusters
 43 | clusterType = anno$class_label 
 44 | cl          = anno$cluster_id
 45 | names(cl)   = Expr.dat$sample_id
 46 | includeClas = c("GABAergic","Glutamatergic","Non-neuronal")
 47 | excludeClas = sort(setdiff(clusterType,includeClas))
 48 | kpSamp      = !is.element(clusterType,excludeClas) 
 49 | 
 50 | # Get median expression per cluster and the proportions
 51 | normDat    = log2(exprData+1)
 52 | rownames(normDat) = Expr.dat$sample_id
 53 | normDat    = t(normDat)
 54 | exprThresh = 1
 55 | medianExpr = do.call("cbind", tapply(names(cl), cl, function(x) rowMedians(normDat[,x]))) 
 56 | propExpr   = do.call("cbind", tapply(names(cl), cl, function(x) rowMeans(normDat[,x]>exprThresh))) 
 57 | rownames(medianExpr) <- rownames(propExpr) <- rownames(normDat)  
 58 | clusterOrd = as.character(sort(unique(anno$cluster_id)))
 59 | clusterKp  = as.character(sort(unique(anno$cluster_id[kpSamp])))
 60 | medianExpr = medianExpr[,clusterOrd]
 61 | propExpr   = propExpr[,clusterOrd]
 62 | 
 63 | # Exclude non-informative genes from cluster names and make sure at least one informative gene is in the shiny
 64 | kpGn         = apply(medianExpr[,clusterKp],1,max)>0
 65 | shinyFail    = (grepl("\\-",rownames(medianExpr)))|is.element(substr(rownames(medianExpr),1,1),0:9)  # -'s crash the shiny, numbers add an "X"
 66 | excludeGenes = sort(rownames(medianExpr)[grepl("LOC",rownames(medianExpr))|grepl("LINC",rownames(medianExpr))|
 67 |   grepl("FAM",rownames(medianExpr))|grepl("ORF",rownames(medianExpr))|grepl("KIAA",rownames(medianExpr))|
 68 |   grepl("FLJ",rownames(medianExpr))|grepl("DKFZ",rownames(medianExpr))|
 69 |   grepl("RPS",rownames(medianExpr))|grepl("RPL",rownames(medianExpr))|shinyFail])  # Also exclude ribosomal genes
 70 | 
 71 | # Also exclude mitochondrial genes and genes on sex chromosomes  
 72 | sexGenes     = scan(paste0(inputFolder,"sex_genes.txt"),what="character",sep="\n")
 73 | mitoGenes    = scan(paste0(inputFolder,"mito_genes.txt"),what="character",sep="\n")
 74 | excludeGenes = sort(unique(c(excludeGenes,sexGenes,mitoGenes)))
 75 | 
 76 | keepGenes    = setdiff(rownames(medianExpr)[kpGn],excludeGenes)
 77 | topGenes     = getTopMarkersByProp(propExpr[keepGenes,clusterKp],1,NULL,medianExpr[keepGenes,clusterKp],fcThresh=0.5,minProp=0.5)
 78 | topGenesAll  = getTopMarkersByProp(propExpr[kpGn,clusterKp],1,NULL,medianExpr[kpGn,clusterKp],fcThresh=0.5,minProp=0.5)
 79 | print(paste("Fraction of LOCxxxxxx markers:",mean(substr(topGenesAll,1,3)=="LOC")))
 80 | # [1] "Fraction of LOCxxxxxx markers: 0.32"
 81 | 
 82 | 
 83 | #-----------------------------------------------------------------------------------------------
 84 | print("Build clustering trees for nuclei based on the cluster calls")
 85 | 
 86 | # Get cluster info
 87 | anno$layer_label = anno$brain_subregion_label
 88 | clusterInfoAll = updateAndOrderClusters(as.data.frame(anno),topGenes=topGenes, classLevels=c(includeClas,excludeClas),
 89 |   regionNameColumn=NULL,newColorNameColumn="cluster_color", classNameColumn="class_label",matchNameColumn="cluster_label")
 90 | sampleInfo  = as.data.frame(anno[kpSamp,])
 91 | 
 92 | sampleInfo$old_cluster_label = sampleInfo$cluster_label
 93 | sampleInfo$old_cluster_id    = sampleInfo$cluster_id
 94 | sampleInfo$old_cluster_color = sampleInfo$cluster_color
 95 | 
 96 | clusterInfo = clusterInfoAll[is.element(clusterInfoAll$class_label,includeClas),]
 97 | 
 98 | majorGenes  = c("GAD1","SLC17A7","TYROBP","AQP4","PDGFRA","OPALIN","NOSTRIN")  #CSPG4
 99 | majorLabels = c("Inh","Exc","Micro","Astro","OPC","Oligo","Endo")
100 | broadGenes  = c("PAX6","LAMP5","VIP","SST","PVALB","LINC00507","RORB","THEMIS","FEZF2","TYROBP","FGFR3","PDGFRA","OPALIN","NOSTRIN")
101 | 
102 | keepGenes2  = c(keepGenes,majorGenes,broadGenes)
103 | newCluster  = renameClusters(sampleInfo,clusterInfo, propExpr[keepGenes2,clusterKp], medianExpr[keepGenes2,clusterKp], 
104 |     propDiff = 0, propMin=0.4, medianFC = 1, propLayer=0.1,excludeGenes = excludeGenes,
105 |     majorGenes=majorGenes, majorLabels=majorLabels, broadGenes=broadGenes)
106 | clusterInfo$cluster_label = newCluster[clusterInfo[,1]]
107 | 
108 | # Reorder cluster information
109 | clustSplit = strsplit(clusterInfo$cluster_label," ")
110 | clusterInfo$broadGene = as.character(lapply(clustSplit,function(x) return(x[3])))
111 | clusterInfo$topGene = as.character(lapply(clustSplit,function(x) return(x[length(x)])))
112 | 
113 | # Read in and update new colors
114 | #newCols = read.csv(paste0(inputFolder,"clusterColors.csv"),row.names=1)
115 | #newCols = newCols[match(clusterInfo$old_cluster_label,rownames(newCols)),]
116 | #clusterInfo$cluster_color = newCols$final_color
117 | #clusterColor <- clusterInfo$cluster_color
118 | #names(clusterColor) <- clusterInfo$cluster_label
119 | #clusterInfo$lrank = newCols$mouse_order
120 | clusterInfo = clusterInfo[order(as.numeric(as.character(clusterInfo$cluster_id))),]
121 | clusterInfo$lrank = 1:dim(clusterInfo)[1]
122 | l.rank      = setNames(as.integer(clusterInfo$lrank), clusterInfo$cluster_label)
123 | 
124 | #write.csv(cbind(clusterInfo[,1:2],newCluster[clusterInfo[,1]]),"newClusterNames_Final2.csv",row.names=FALSE)
125 | 
126 | # Subset data to only include expressed genes and desired non-outlier cell types
127 | kpSamp2    <- as.character(clusterInfo$cluster_id)
128 | kpGn2      <- apply(medianExpr[,kpSamp2],1,max)>0
129 | medianExpr2 = medianExpr[kpGn2,kpSamp2]
130 | propExpr2   = propExpr[kpGn2,kpSamp2]
131 | colnames(medianExpr2) <- colnames(propExpr2) <- clusterInfo$cluster_label
132 | 
133 | # Determine a score for cell type specificity (marker/beta score)
134 | specificityScoreRank <- getBetaScore(propExpr2,FALSE)
135 | 
136 | # Build and reorder the dendrogram
137 | topNgenes  = 1200
138 | dend <- getDend(medianExpr2[specificityScoreRank<=topNgenes,])
139 | dend <- reorder.dend(dend,l.rank)
140 | dend <- collapse_branch(dend, 0.01)
141 | dend <- dend %>% set("leaves_pch", 19) %>% set("leaves_cex", 2) 
142 | save(dend, file=paste0(outputFolder,"dend.RData"))     
143 | save(dend, file=paste0(dataFolder,"dend.rda"))  
144 | 
145 | #-----------------------------------------------------------------------------------------------
146 | print("Update the annotation file with the new cluster names, ids, and colors")
147 | 
148 | clusterInfo = clusterInfo[match(labels(dend),clusterInfo$cluster_label),]
149 | # We read this in from the website, so writing out the same information is unnecessary
150 | #clusterId <- clusterInfo$cluster_id <- 1:length(clusterInfo$cluster_label)
151 | #names(clusterId) <- clusterLabel <- clusterInfo$cluster_label
152 | #names(clusterLabel) <- clusterInfo$old_cluster_label
153 | 
154 | #sampleInfo$cluster_label = clusterLabel[sampleInfo$cluster_label]
155 | #sampleInfo$cluster_id = clusterId[sampleInfo$cluster_label]
156 | #sampleInfo$cluster_color = clusterColor[sampleInfo$cluster_label]
157 | 
158 | # Plot the dendrogram
159 | pdf(paste0(outputFolder,"Fig1c_clusterDendrogramFinal.pdf"),height=12,width=20)
160 | main = paste("Based on top",topNgenes,"specificity genes, correlation distance")
161 | label_color2 = clusterInfo$cluster_color[match(dend %>% labels,clusterInfo$cluster_label)]
162 | rankCompleteDendPlot(dend=dend,label_color=label_color2,main=main,node_size=4)
163 | dev.off()
164 | 
165 | pdf(paste0(outputFolder,"Fig1c_clusterDendrogramFinal_small.pdf"),height=5,width=10)
166 | main = paste("Based on top",topNgenes,"specificity genes, correlation distance")
167 | label_color2 = clusterInfo$cluster_color[match(dend %>% labels,clusterInfo$cluster_label)]
168 | rankCompleteDendPlot(dend=dend,label_color=label_color2,main=main,node_size=0.0001)
169 | dev.off()
170 | 
171 | #-----------------------------------------------------------------------------------------------
172 | # Not needed since this information is written in a previous code document
173 | #print("Rewrite the updated data files to a new directory")
174 | 
175 | #newInputFolder = paste0(inputFolder,"humanNew")
176 | #dir.create(newInputFolder)
177 | 
178 | #exprOut = Expr.dat[match(sampleInfo$sample_id,Expr.dat$sample_id),]
179 | #desc <- read_feather(paste(inputFolder,"human/desc.feather",sep="")) 
180 | 
181 | #write_feather(exprOut,paste(newInputFolder,"/data.feather",sep=""))   # CPM
182 | #write_feather(sampleInfo,paste(newInputFolder,"/anno.feather",sep="")) 
183 | #write_feather(desc,paste(newInputFolder,"/desc.feather",sep="")) 
184 | #save(dend, file=paste0(newInputFolder,"/dend.rda")) 
185 | 
186 | save(clusterInfo,file=paste0(dataFolder,"/clusterInfo.rda"))
187 | ## WE MIGHT NEED "desc.feather"
188 | 
189 | 
190 | 
191 | 


--------------------------------------------------------------------------------
/R/Code_MatchHumanMouse.r:
--------------------------------------------------------------------------------
  1 | # This code identifies and plots genes with common patterns in mouse and human
  2 | 
  3 | # FIRST NEED TO SET THE WORKING DIRECTORY TO THE MAIN FOLDER
  4 | # This is the folder where "Start.RData" is located
  5 | 
  6 | outputFolder  = "output/"
  7 | scriptsFolder = "R/"
  8 | inputFolder   = "data/"
  9 | humanFolder   = "MTG/"
 10 | mouseFolder   = "mouse/"
 11 | 
 12 | library(Hmisc)
 13 | library(gplots)
 14 | library(dplyr);
 15 | library(feather)
 16 | 
 17 | source(paste0(scriptsFolder,"Support_heatmap_functions.R")) 
 18 | source(paste0(scriptsFolder,"Support_violin_functions.R"))
 19 | 
 20 | ########################################################################################
 21 | print("Read in the annotations and homology conversions.")
 22 | 
 23 | annoH <- read_feather(paste(humanFolder,"anno.feather",sep="")) 
 24 | annoM <- read_feather(paste(mouseFolder,"anno.feather",sep="")) 
 25 | mhConvert <- read.csv(paste(inputFolder,"human_mouse_homology.csv",sep="")) 
 26 | 
 27 | 
 28 | ########################################################################################
 29 | print("Update anno.feather files with homology information above.")
 30 | 
 31 | mhOut <- mhConvert[,c("homol_type","tree_order","cluster_color")]
 32 | colnames(mhOut) <- c("homology_cluster_label","homology_cluster_id","homology_cluster_color")
 33 | mhHuman <- as.data.frame(mhOut[match(annoH$cluster_label,mhConvert$cluster),])
 34 | mhMouse <- as.data.frame(mhOut[match(annoM$cluster_label,mhConvert$cluster),])
 35 | 
 36 | ## Update human
 37 | 
 38 | if(!file.exists(paste0(humanFolder,"anno_original.feather"))) 
 39 |   write_feather(annoH,paste0(humanFolder,"anno_original.feather"))
 40 | 
 41 | annoH2 <- cbind(annoH,as.data.frame(mhHuman))  
 42 | 
 43 | file.remove(paste0(humanFolder,"anno.feather"))
 44 | write_feather(annoH2,paste0(humanFolder,"anno.feather"))
 45 | 
 46 | ## Update mouse
 47 | 
 48 | if(!file.exists(paste0(mouseFolder,"anno_original.feather"))) 
 49 |   write_feather(annoM,paste0(mouseFolder,"anno_original.feather"))
 50 | 
 51 | annoM2 <- cbind(annoM,as.data.frame(mhMouse))  
 52 | 
 53 | file.remove(paste0(mouseFolder,"anno.feather"))
 54 | write_feather(annoM2,paste0(mouseFolder,"anno.feather"))
 55 | 
 56 | 
 57 | 
 58 | ########################################################################################
 59 | print("Plot serotonin receptors for Fig 6e.")
 60 | 
 61 | serotoninH = c("HTR1D","HTR1F","HTR2A","HTR2C","HTR4","HTR5A","HTR6","HTR7","HTR3A","HTR3B")
 62 |   
 63 | human_plot <- group_heatmap_plot(data_source = humanFolder,
 64 |                    genes = serotoninH,
 65 |                    group_by = "homology_cluster",
 66 |                    clusters = 1:37,
 67 |                    calculation = "trimmed_mean",
 68 |                    labelheight = 50,
 69 |                    showcounts = TRUE)
 70 | ggsave(paste0(outputFolder,"Fig6e_serotonin_genes_human.pdf"),human_plot,height = 4, width = 6)
 71 |  
 72 | mouse_plot <- group_heatmap_plot(data_source = mouseFolder,
 73 |                    genes = capitalize(tolower(serotoninH)),
 74 |                    group_by = "homology_cluster",
 75 |                    clusters = 1:37,
 76 |                    calculation = "trimmed_mean",
 77 |                    labelheight = 50,
 78 |                    showcounts = TRUE)
 79 | ggsave(paste0(outputFolder,"Fig6e_serotonin_genes_mouse.pdf"),mouse_plot,height = 4, width = 6)
 80 | 
 81 | 
 82 | 
 83 | 
 84 | ########################################################################################
 85 | print("Plot common marker genes in mouse and human (identified elsewhere) for Fig ED13.")
 86 | 
 87 | common_genes = c("SV2C", "KIRREL", "NOS1", "HCRTR2", "TGFB2", "SP8", "PAX6", "FOXO1", "ST3GAL4",
 88 |                  "MYLK", "NDNF", "NR2E1", "RXRG", "NDST4", "SALL1", "ANGPT1", "SHISA8", "PPAPDC1A", 
 89 | 				 "PTGDS", "STAC", "CDHR1", "NDRG1", "LDLR", "RNF152", "NTN1", "CHODL", "CUEDC1", "EOGT", 
 90 | 				 "TACR1", "CRHBP", "TMCC3", "ARHGEF40", "MOXD1", "B3GAT2", "PYGO1", "KLHL14", "GXYLT2", 
 91 | 				 "TMTC4", "CBLN4", "PVALB", "TAC1", "MYO5B", "WIF1", "HHATL", "BCAN", "MYO1E", "BACH1", 
 92 | 				 "FRMPD1", "SLC39A14", "C1QL1", "PTHLH", "MME", "PLEKHH2", "UNC5B", "GFRA1", "ZAK", 
 93 | 				 "TNNT2", "PRDM8", "RMST", "RORB", "S100A6", "PTER", "PTGFRN", "SNTG2", "PLXND1", "MET", 
 94 | 				 "RTKN2", "CLMN", "MYLIP", "LIG4", "CROT", "PLCG1", "DAPK2", "RMDN2", "ADAMTS3", "ACVR1C", 
 95 | 				 "SLC24A4", "MTHFR", "NR4A2", "FAP", "FGF10", "OPRK1", "CCDC134", "MEI1", "AMZ1", 
 96 | 				 "PLEKHA2", "MAPK12", "CACNA1H", "RGMA", "CHST8", "SCN4B", "AGPAT9", "APEH", "SYT2", 
 97 | 				 "KCNN1", "HYOU1", "TRPC3", "SHB", "NXPH4", "NXPH3", "CTGF", "MAOB", "RRP1B", "OLIG2", 
 98 | 				 "CSPG4", "ZCCHC24", "LRP4", "VCAN", "PDGFRA", "EMID1", "CALCRL", "AFAP1L2", "FGFR3", 
 99 | 				 "GJA1", "F3", "MLC1", "ETNPPL", "GRIN2C", "AQP4", "PRDM16", "SLC25A18", "OAF", "PPP1R14A", 
100 | 				 "ERMN", "OPALIN", "MOG", "MAL", "TRIM59", "MAG", "MYRF", "FA2H", "GPR37", "XAF1", "ITIH5", 
101 | 				 "LEF1", "EMCN", "UACA", "TNS1", "APCDD1", "EBF1", "CGNL1", "ATP10A", "C1QC", "LST1", 
102 | 				 "C1QB", "FGD2", "CX3CR1", "TYROBP", "LTC4S", "LY86", "LAPTM5", "CD86")
103 |   
104 | human_plot <- group_heatmap_plot(data_source = humanFolder,
105 |                    genes = common_genes,
106 |                    group_by = "homology_cluster",
107 |                    clusters = 1:37,
108 |                    calculation = "median",
109 |                    labelheight = 5,
110 |                    showcounts = TRUE)
111 | ggsave(paste0(outputFolder,"FigED13_common_genes_human.pdf"),human_plot,height = 20, width = 6)
112 | 
113 | 
114 |  
115 | mouse_plot <- group_heatmap_plot(data_source = mouseFolder,
116 |                    genes = capitalize(tolower(common_genes)),
117 |                    group_by = "homology_cluster",
118 |                    clusters = 1:37,
119 |                    calculation = "median",
120 |                    labelheight = 5,
121 |                    showcounts = TRUE)
122 | ggsave(paste0(outputFolder,"FigED13_common_genes_mouse.pdf"),mouse_plot,height = 20, width = 6)
123 | 
124 | 


--------------------------------------------------------------------------------
/R/Code_QCplots.R:
--------------------------------------------------------------------------------
  1 | # This code builds the QC jitterplots
  2 | 
  3 | # FIRST NEED TO SET THE WORKING DIRECTORY TO THE MAIN FOLDER
  4 | # This is the folder where "Start.RData" is located
  5 | 
  6 | #-----------------------------------------------------------------------------------------------
  7 | print("Read in the data and load relevant libraries")
  8 | outputFolder  = "output/"
  9 | scriptsFolder = "R/"
 10 | inputFolder   = "data/"
 11 | dataFolder    = "MTG/"
 12 | 
 13 | library(dplyr)
 14 | library(ggplot2)
 15 | library(ggbeeswarm)
 16 | library(feather)
 17 | 
 18 | ## Add a cluster order variable to the annotation feather for plotting
 19 | 
 20 | load(paste0(outputFolder,"dend.RData"))
 21 | clord <- 1:length(labels(dend))
 22 | names(clord) <- substr(labels(dend),3,4)
 23 | names(clord) <- gsub("_","",names(clord))
 24 | anno <- read_feather(paste0(dataFolder,"anno.feather"))
 25 | if(!file.exists(paste0(dataFolder,"anno_original.feather"))) 
 26 |   write_feather(anno,paste0(dataFolder,"anno_original.feather"))
 27 | 
 28 | anno$dendcluster_id    <- as.numeric(as.character(anno$cluster_id))
 29 | #anno$dendcluster_id[is.na(anno$dendcluster_id)] = max(anno$dendcluster_id)+5
 30 | anno$dendcluster_label <- paste0("cl",anno$cluster_id)
 31 | anno$dendcluster_color <- anno$cluster_color
 32 | file.remove(paste0(dataFolder,"anno.feather"))
 33 | 
 34 | 
 35 | ## Read in and add the cluster separability
 36 | 
 37 | cc.mean <- read.csv(paste0(inputFolder,"mtg_cluster_separability.csv"))
 38 | anno$cc.mean.within_label <- cc.mean[match(anno$seq_name_label,cc.mean$sample_id),"cc.min.diff"]
 39 | write_feather(anno,paste0(dataFolder,"anno.feather"))
 40 | 
 41 | 
 42 | dendcluster_anno <- anno %>%
 43 |   select(dendcluster_id, dendcluster_label, dendcluster_color) %>%
 44 |   unique()
 45 | 
 46 | ## Required functions
 47 | summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
 48 |                       conf.interval=.95, .drop=TRUE, roundall = F) {
 49 |   require(dplyr)
 50 |   # This does the summary. For each group return a vector with
 51 |   # N, mean, and sd
 52 |   
 53 |   names(data)[names(data) == measurevar] <- "measurevar"
 54 |   
 55 |   datac <- data %>%
 56 |     select(one_of(groupvars,"measurevar")) %>%
 57 |     filter(ifelse(na.rm == T, !is.na(measurevar), T)) %>%
 58 |     mutate(measurevar = as.numeric(measurevar)) %>%
 59 |     group_by_(c(groupvars)) %>%
 60 |     summarise(N = n(),
 61 |               median = median(measurevar),
 62 |               mean = mean(measurevar),
 63 |               max = max(measurevar),
 64 |               sd = ifelse(N == 1, 0, sd(measurevar)),
 65 |               q25 = as.numeric(quantile(measurevar, 0.25, na.rm=na.rm)),
 66 |               q75 = as.numeric(quantile(measurevar, 0.75, na.rm=na.rm))) %>%
 67 |     mutate(se = sd/sqrt(N))
 68 |   #%>%
 69 |   #  mutate(ci =  se * qt(conf.interval/2 + 0.5, N-1))
 70 |   
 71 |   
 72 |   if(roundall) {
 73 |     roundcols <- c("median","mean","max","sd","q25","q75","se","ci")
 74 |     datac[roundcols] <- round(datac[roundcols],3)
 75 |   }
 76 |   
 77 |   # datac <- datac %>%
 78 |   #   mutate(xpos = 1:n())
 79 |   
 80 |   return(datac)
 81 | }
 82 | 
 83 | 
 84 | # This function makes the actual plots!
 85 | qcPlot <- function(name,scaleLimits = c(-5000, 12000), 
 86 |                      scaleBreaks = seq(0, 12000, 2000),
 87 |                      scaleLabels = seq(0,12,2),
 88 | 					 ylab = "value",
 89 | 					 fileName = gsub("\\.","_",gsub("_label","",name)),
 90 | 					 na.rm = FALSE)
 91 | {
 92 | 
 93 | # dendcluster_id is the annotation for cluster ordering based on the current, bootstrapped dendrogram
 94 | stats <- summarySE(data = anno,
 95 |                          measurevar = name,
 96 |                          groupvars = "dendcluster_id",
 97 | 						 na.rm = na.rm)
 98 | 
 99 | 
100 | genes_plot <- ggplot() +
101 |   # geom_quasirandom from the ggbeeswarm package
102 |   # makes violin-shaped jittered point plots
103 |   geom_quasirandom(data = anno,
104 |                    aes(x = dendcluster_id,
105 |                        y = eval(parse(text=name)),  # might need eval(parse(text=name))
106 |                        color = dendcluster_color),  # "skyblue"
107 |                    # Need to set position_jitter height = 0 to prevent
108 |                    # jitter on the y-axis, which changes data representation
109 |                    position = position_jitter(width = .3,height = 0),
110 |                    size = 0.1) +
111 |   # Errorbars built using stats values
112 |   geom_errorbar(data = stats,
113 |                 aes(x = dendcluster_id,
114 |                     ymin = q25,
115 |                     ymax = q75),
116 |                 size = 0.2) +
117 |   # Median points from stats
118 |   geom_point(data = stats,
119 |              aes(x = dendcluster_id,
120 |                  y = median),
121 |              color = "red",
122 |              size = 0.5) +
123 |   # Cluster labels as text objects
124 |   geom_text(data = dendcluster_anno,
125 |             aes(x = dendcluster_id,
126 |                 y = 0,
127 |                 label = dendcluster_label,
128 |                 color = dendcluster_color),
129 |             angle = 90,
130 |             hjust = 2,
131 |             vjust = 0.3,
132 |             size = 2*5/6) +
133 |   scale_color_identity() +
134 |   # Expand the y scale so that the labels are visible
135 |   scale_y_continuous(ylab,
136 |                      limits = scaleLimits, 
137 |                      breaks = scaleBreaks,
138 |                      labels = scaleLabels) +
139 |   # Remove X-axis title
140 |   scale_x_continuous("") +
141 |   theme_bw() +
142 |   # Theme tuning
143 |   theme(axis.text.x = element_blank(),
144 |         axis.ticks = element_blank(),
145 |         panel.border = element_blank(),
146 |         panel.grid.major.x = element_blank(),
147 |         panel.grid.minor.x = element_blank())
148 | 
149 | ggsave(paste0(outputFolder,fileName,"_QC.pdf"), genes_plot, width = 8, height = 2, useDingbats = F)
150 | 
151 | }
152 | 
153 | 
154 | 
155 | ## Make the QC plots for numeric variables!
156 | 
157 | qcPlot("genes_detected_cpm_criterion_label",scaleLimits = c(-2000, 14000), scaleBreaks = seq(0, 14000, 2000), 
158 |   scaleLabels = seq(0,14,2),ylab="Genes Detected (thousands)",fileName="FigED1d_genes_detected")
159 | qcPlot("total_reads_label",scaleLimits = c(-2000000, 7000000), scaleBreaks = seq(0, 7000000, 1000000), 
160 |   scaleLabels = seq(0,7,1),ylab="Total reads (millions)",fileName="FigED1c_total_reads")
161 | qcPlot("cc.mean.within_label",scaleLimits = c(-0.25,1), scaleBreaks = seq(0, 1, 0.25), na.rm=TRUE, 
162 |   scaleLabels = seq(0,1,0.25),ylab="Mean w/in cluster CC",fileName="FigED3a_cluster_separability")
163 | 
164 | 
165 |   


--------------------------------------------------------------------------------
/R/Code_correlationWithOtherDatasets.r:
--------------------------------------------------------------------------------
  1 | # This code makes correlation plots comparing MTG with other data sets.
  2 | 
  3 | # FIRST NEED TO SET THE WORKING DIRECTORY TO THE MAIN FOLDER
  4 | # This is the folder where "Start.RData" is located
  5 | 
  6 | # SECOND, create a "comparison" directory, download the following files, and save them there:
  7 | # Lake et al 2016: NOTE, YOU NEED TO ASK PERMISSION TO ACCESS THESE DATA: https://www.ncbi.nlm.nih.gov/projects/gap/cgi-bin/molecular.cgi?study_id=phs000833.v7.p1&phv=219211&phd=4779&pha=&pht=4360&phvf=&phdf=&phaf=&phtf=1&dssp=1&consent=&temp=1
  8 | # Lake et al 2018:  "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE97930&format=file&file=GSE97930%5FFrontalCortex%5FsnDrop%2Dseq%5FUMI%5FCount%5FMatrix%5F08%2D01%2D2017%2Etxt%2Egz"
  9 | # Habib et al: https://storage.googleapis.com/gtex_additional_datasets/single_cell_data/GTEx_droncseq_hip_pcf.tar
 10 | # Boldog et al: https://github.com/AllenInstitute/L1_rosehip/tree/master/data/human 
 11 | 
 12 | #-----------------------------------------------------------------------------------------------
 13 | print("Read in the data and load relevant libraries")
 14 | outputFolder  = "output/"
 15 | scriptsFolder = "R/"
 16 | dataFolder    = "data/"
 17 | mtgFolder     = "MTG/"
 18 | compFolder    = "comparison/"
 19 | 
 20 | #-----------------------------------------------------------------------------------------------
 21 | # Load these libraries
 22 | library(stringr)
 23 | library(lowcat)
 24 | library(feather)
 25 | library(dplyr)
 26 | library(scrattch.io)
 27 | library(matrixStats)
 28 | library(ggplot2)
 29 | options(stringsAsFactors = F)
 30 | 
 31 | # Read in the extra scripts
 32 | source(paste0(scriptsFolder,"Support_extraFunctions.r"))
 33 | 
 34 | #-----------------------------------------------------------------------------------------------
 35 | # Read in the MTG data
 36 | anno       <- read_feather(paste(mtgFolder,"anno.feather",sep="")) 
 37 | Expr.dat   <- feather(paste(mtgFolder,"data.feather",sep=""))   # FPKM
 38 | Expr.dat   <- Expr.dat[match(anno$sample_id,Expr.dat$sample_id),] # Make sure the expression matches the sample information
 39 | datIn      <- as.matrix(Expr.dat[,names(Expr.dat)!="sample_id"])
 40 | rownames(datIn) <- Expr.dat$sample_id
 41 | datIn      <- t(datIn)
 42 | norm.dat   <- log2(datIn+1)
 43 | 
 44 | 
 45 | # Get median expression per cluster and the proportions
 46 | clm        = anno$cluster_id
 47 | names(clm)  = Expr.dat$sample_id
 48 | exprThresh = 1
 49 | medianExpr = do.call("cbind", tapply(names(clm), clm, function(x) rowMedians(datIn[,x]))) 
 50 | propExpr   = do.call("cbind", tapply(names(clm), clm, function(x) rowMeans(norm.dat[,x]>exprThresh))) 
 51 | rownames(medianExpr) <- rownames(propExpr) <- rownames(norm.dat)  
 52 | colnames(medianExpr) <- colnames(propExpr) <- anno$cluster_label[match(colnames(medianExpr),as.character(anno$cluster_id))]
 53 | 
 54 | 
 55 | # Select features based on beta score >0.4
 56 | betaScore  = getBetaScore(propExpr)
 57 | marker_scores_genes_subset <- names(betaScore)[betaScore>0.4]
 58 | 
 59 | 
 60 | 
 61 | #-----------------------------------------------------------------------------------------------
 62 | # Define the study data
 63 | 
 64 | clusters <- list(Lake2018="Lake2018_clusters.csv",
 65 |                  Habib2017="Habib2017_clusters.csv",
 66 | 				 Boldog="Boldog2018_clusters.csv")
 67 | datas    <- list(Lake2018="GSE97930_FrontalCortex_snDrop-seq_UMI_Count_Matrix_08-01-2017.txt",
 68 |                  Habib2017="GTEx_droncseq_hip_pcf.umi_counts.txt",
 69 | 				 Boldog2018="data_boldog.txt")
 70 | corVal   <- list(Lake2018=0.4, Habib2017=0.3, Boldog2018=0)
 71 | 				 
 72 | # Habib is actually PFC and hippocampus and has N=11317 if we exclude cells mapping to hippocampus clusters
 73 | # Boldog has 872 non-outlier clusters
 74 | # There are different correlation cutoffs for the different data sets as currently presented
 75 | 				 
 76 | for (s in 1:length(datas)){
 77 |   # Read in the study data
 78 |   dataIn     <- read.csv(paste0(compFolder,datas[[s]]), sep = "\t", row.names=1)
 79 |   dataIn     <- as.matrix(dataIn)
 80 |   annotation <- read.csv(paste0(dataFolder,clusters[[s]]),row.names=1)
 81 |   rownames(annotation) <- make.names(rownames(annotation))
 82 |   data       <- dataIn[,rownames(annotation)] # make the order match
 83 |   
 84 |   # subsetting comparison (comp) data
 85 |   new_comp_frame <- log2(data[rownames(data) %in% marker_scores_genes_subset,]+1)
 86 |   new_ref_count  <- log2(medianExpr[rownames(new_comp_frame),]+1) 
 87 | 
 88 |   # Run Pearson correlation (from lowcat)
 89 |   comp_annotated <- max_column_correlation(new_comp_frame, new_ref_count, method = "pearson")
 90 |   names(comp_annotated) <- c("sample_id","cluster_cor","cluster_label")
 91 | 
 92 |   # Plot the results
 93 |   kpSamp  <- comp_annotated$cluster_cor >= corVal[[s]]
 94 |   comp.cl <- droplevels(factor(comp_annotated$cluster_label[kpSamp],levels <- colnames(medianExpr))) 
 95 |   ref.cl  <- droplevels(factor(annotation$cluster[kpSamp],levels=sort(unique(annotation$cluster),decreasing =TRUE)))
 96 |   plotOut <- plot_annotation_comparison(comp.cl,ref.cl,anno,names(datas)[s]) 
 97 |   plotOut
 98 | 
 99 |   # Save the plots
100 |   ggsave(paste0(outputFolder,"FigED5_",names(datas)[s],"_correlation.pdf"),plotOut)
101 | }
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 
108 | 
109 | 


--------------------------------------------------------------------------------
/R/Code_metaNeighbor_human.r:
--------------------------------------------------------------------------------
  1 | # This code runs meta-neighbor in human
  2 | 
  3 | # FIRST NEED TO SET THE WORKING DIRECTORY TO THE MAIN FOLDER
  4 | # This is the folder where "Start.RData" is located
  5 | 
  6 | print("-----Run MetaNeighbor: https://github.com/maggiecrow/MetaNeighbor")
  7 | 
  8 | ########################################################################################
  9 | print("Load relevant libraries and functions")
 10 | 
 11 | outputFolder  = "output/"
 12 | scriptsFolder = "R/"
 13 | inputFolder   = "data/"
 14 | dataFolder    = "MTG/"
 15 | 
 16 | library(gplots)
 17 | library(RColorBrewer)
 18 | library(dplyr);
 19 | library(feather)
 20 | library(readxl)
 21 | library(WGCNA)
 22 | 
 23 | source(paste0(scriptsFolder,"Support_2016-11-03-runMetaNeighbor.R"))  # MetaNeighbor code
 24 | 
 25 | ########################################################################################
 26 | print("Read in the data")
 27 | 
 28 | anno       <- read_feather(paste(dataFolder,"anno.feather",sep="")) 
 29 | Expr.dat   <- feather(paste(dataFolder,"data.feather",sep=""))   # FPKM
 30 | Expr.dat   <- Expr.dat[match(anno$sample_id,Expr.dat$sample_id),] # Make sure the expression matches the sample information
 31 | datIn      <- as.matrix(Expr.dat[,names(Expr.dat)!="sample_id"])
 32 | rownames(datIn) <- Expr.dat$sample_id
 33 | datIn      <- t(datIn)
 34 | 
 35 | kpSamp     <- is.element(anno$class_label,c("GABAergic","Glutamatergic","Non-neuronal"))
 36 | cl3        <- factor(anno$cluster_label,levels = sort(unique(anno$cluster_label)))
 37 | names(cl3) <- colnames(datIn)
 38 | clustersF  <- droplevels(cl3[kpSamp])
 39 | clusts     <- levels(clustersF)
 40 | clustTypes <- anno$class_label[kpSamp]
 41 | 
 42 | # Reorder clusters to match the tree
 43 | load(paste0(dataFolder,"dend.rda"))
 44 | ord = match(labels(dend),clusts)
 45 | clusts     <- levels(clustersF) <- clusts[ord]
 46 | clustCols  <- anno$cluster_color[match(clusts,anno$cluster_label)]
 47 | cl3        <- factor(anno$cluster_label,levels = clusts)
 48 | names(cl3) <- colnames(datIn)
 49 | clustersF  <- droplevels(cl3[kpSamp])
 50 | clusters   <- sort(unique(as.character(clustersF)))
 51 | 
 52 | # Subset the data to only include genes that are expressed
 53 | norm.dat   <- datIn[,kpSamp]
 54 | norm.dat   <- norm.dat[rowMeans(norm.dat)>1,]
 55 | 
 56 | 
 57 | ########################################################################################
 58 | print("Read in gene sets from Paul et al 2017 paper to directly compare results with previous data set.")
 59 | 
 60 | hgncS3   = read_excel(paste0(inputFolder,"Paul2017_Table S3.xlsx"), skip=3)
 61 | customS4 = read_excel(paste0(inputFolder,"Paul2017_Table S4.xlsx"), skip=2) # This is also not used in the current file I sent
 62 | 
 63 | genesets = list()
 64 | for (i in 1:dim(hgncS3)[1]){
 65 |  gs  = as.character(hgncS3[i,1])
 66 |  gns = as.character(hgncS3[i,])
 67 |  gns = toupper(gns[!is.na(gns)])
 68 |  gns = sort(intersect(gns,rownames(norm.dat)))
 69 |  if(length(gns)>=5) genesets[[gs]] = gns
 70 | }
 71 | for (i in 1:dim(customS4)[1]){
 72 |  gs  = as.character(customS4[i,1])
 73 |  gns = as.character(customS4[i,])
 74 |  gns = toupper(gns[!is.na(gns)])
 75 |  gns = sort(intersect(gns,rownames(norm.dat)))
 76 |  if(length(gns)>=5) genesets[[gs]] = gns
 77 | }
 78 | 
 79 | kpGene = NULL
 80 | for (set in names(genesets))   kpGene = c(kpGene,genesets[[set]])
 81 | kpGene = sort(unique(kpGene))
 82 | 
 83 | 
 84 | ########################################################################################
 85 | print("Subset into two groups of up to 20 random cells per group, 10 times (to get error bars on our values).")
 86 | 
 87 | subSamp  = 20
 88 | numIters = 10 
 89 | useMeL <- rand.labL <- list()
 90 | for (j in 1:numIters){
 91 |  seed = j
 92 |  rand.lab = rep(0,length(clustersF))
 93 |  names(rand.lab) = clustersF
 94 |  for (cli in clusters){
 95 |   set.seed(seed)
 96 |   seed   = seed+1*j
 97 |   kp     = which(clustersF==cli)
 98 |   ord    = sample(1:length(kp),min(length(kp),subSamp))
 99 |   ord1   = ord[1:round(length(ord)/2)]
100 |   ord2   = ord[(1+round(length(ord)/2)):length(ord)]
101 |   rand.lab[kp[ord1]] = 1
102 |   rand.lab[kp[ord2]] = 2
103 |  }
104 |  useMeL[[j]] = rand.lab>0
105 |  rand.labL[[j]]  = rand.lab
106 | }
107 | 
108 | ########################################################################################
109 | print("Build the variables and run MetaNeighbor (this takes **SEVERAL HOURS** to run)")
110 | 
111 | clType  = c("GABAergic","Glutamatergic","Non-neuronal")
112 | clTypes = NULL
113 | for (cli in clusters) clTypes = c(clTypes,names(sort(-table(anno$class_label[anno$cluster_label==cli])))[1])
114 | names(clTypes) = clusters
115 | 
116 | suppressWarnings(dir.create(paste0(outputFolder,"auroc")))
117 | aurocL = list()
118 | for (j in 1:numIters){
119 |  auroc = NULL
120 |  for (cli in clusters) {
121 |   print(paste(j,"-",cli))
122 |   sameClass    = clustTypes == clTypes[cli]
123 |   useMe        = useMeL[[j]]&sameClass
124 |   rand.lab     = rand.labL[[j]]
125 |   dataTmp      = norm.dat[kpGene,useMe]
126 |   randexp.lab  = as.character(rand.lab[useMe])
127 |   celltype.lab = t(t((clustersF[useMe]==cli)-1+1))
128 |   rownames(celltype.lab) = names(clustersF[useMe])
129 |  
130 |   fn = paste0(outputFolder,"auroc/MN_tmp")
131 |   AUROC.scores = suppressWarnings(run_MetaNeighbor(data = dataTmp, experiment_labels = randexp.lab, 
132 |       celltype_labels = celltype.lab, genesets = genesets, file_ext=fn))
133 |   auroc = rbind(auroc,AUROC.scores)
134 |  }
135 |  rownames(auroc) = clusters
136 |  aurocL[[j]] = auroc 
137 |  save(aurocL,file=paste0(outputFolder,"aurocValues.rda"))
138 | }
139 | 
140 | # Average out the permutation runs (may not make sense to do this here)
141 | auroc <- maxAuroc <- minAuroc <- aurocL[[1]]
142 | for (j in 2:length(aurocL)) {
143 |   auroc = aurocL[[j]] + auroc
144 |   minAuroc = pmin(minAuroc,aurocL[[j]])
145 |   maxAuroc = pmax(maxAuroc,aurocL[[j]])
146 | }
147 | auroc = auroc / length(aurocL)
148 | 
149 | # Estimate of expected range over 10 iterations 
150 | mean(maxAuroc-minAuroc)
151 | # [1] 0.2261661
152 | 
153 | 
154 | ########################################################################################
155 | print("Summarize results for different functional categories.")
156 | 
157 | getAurocSD <- function(aurocIn,kp){
158 |   tmp=NULL
159 |   for (i in 1:length(aurocIn))
160 | 	tmp = rbind(tmp,colMeans(aurocIn[[i]][kp,],na.rm=TRUE))
161 |   out = apply(tmp,2,sd,na.rm=TRUE)
162 |   return(out)
163 | }
164 | 
165 | meanAuroc <- sdAuroc <- NULL
166 | for (cli in clType){
167 |   meanAuroc = cbind(meanAuroc,colMeans(auroc[clTypes==cli,],na.rm=TRUE))
168 |   sdAuroc = cbind(sdAuroc,getAurocSD(aurocL,clTypes==cli))
169 | }
170 | colnames(meanAuroc) <- paste("Mean",clType)
171 | colnames(sdAuroc) <- paste("SD",clType)
172 | numGenes  = NULL
173 | for (nm in rownames(meanAuroc)) numGenes = c(numGenes,length(genesets[[nm]]))
174 | meanAuroc = cbind(meanAuroc,sdAuroc,numGenes)
175 | meanAuroc = meanAuroc[order(-rowMeans(meanAuroc[,1:2])),]
176 | write.csv(meanAuroc,paste0(outputFolder,"meanAuroc_human.csv"))
177 | 
178 | print("These values will be compared with mouse V1/ALM in the next code document.")
179 | 


--------------------------------------------------------------------------------
/R/Code_metaNeighbor_mouse.r:
--------------------------------------------------------------------------------
  1 | # This code runs meta-neighbor in mouse and compares the result to human
  2 | 
  3 | # FIRST NEED TO SET THE WORKING DIRECTORY TO THE MAIN FOLDER
  4 | # This is the folder where "Start.RData" is located
  5 | 
  6 | print("-----Run MetaNeighbor: https://github.com/maggiecrow/MetaNeighbor")
  7 | 
  8 | ########################################################################################
  9 | print("Load relevant libraries and functions")
 10 | 
 11 | outputFolder  = "output/"
 12 | scriptsFolder = "R/"
 13 | inputFolder   = "data/"
 14 | vispFolder    = "VISp/"
 15 | almFolder     = "ALM/"
 16 | 
 17 | library(gplots)
 18 | library(RColorBrewer)
 19 | library(dplyr);
 20 | library(feather)
 21 | library(readxl)
 22 | library(WGCNA)
 23 | 
 24 | source(paste0(scriptsFolder,"Support_2016-11-03-runMetaNeighbor.R"))  # MetaNeighbor code
 25 | 
 26 | ########################################################################################
 27 | print("Read in the data")
 28 | 
 29 | annoV <- read_feather(paste(vispFolder,"anno.feather",sep="")) 
 30 | ExprV <- feather(paste(vispFolder,"data.feather",sep=""))   
 31 | ExprV <- ExprV[match(annoV$sample_id,ExprV$sample_id),] # Make sure the expression matches the sample information
 32 | 
 33 | annoA <- read_feather(paste(almFolder,"anno.feather",sep="")) 
 34 | ExprA <- feather(paste(almFolder,"data.feather",sep=""))   
 35 | ExprA <- ExprA[match(annoA$sample_id,ExprA$sample_id),] # Make sure the expression matches the sample information
 36 | 
 37 | datV  <- as.matrix(ExprV[,colnames(ExprV)!="sample_id"])
 38 | datA  <- as.matrix(ExprA[,colnames(ExprA)!="sample_id"])
 39 | datM  <- log2(rbind(datV,datA)+1)
 40 | datM  <- t(datM)
 41 | 
 42 | annoM <- rbind(annoV,annoA)
 43 | 
 44 | # Subset to only include relevant clusters
 45 | clustersM <- scan(paste0(inputFolder,"mouseClusters.txt"),what="character",sep="\n")
 46 | kpSampM   <- is.element(annoM$cluster_label,clustersM)
 47 | datM      <- datM[,kpSampM]
 48 | annoM     <- annoM[kpSampM,]
 49 | 
 50 | 
 51 | #############################
 52 | print("Properly format the data")
 53 | 
 54 | kpSamp     <- is.element(annoM$class_label,c("GABAergic","Glutamatergic","Non-Neuronal","Endothelial"))
 55 | cl3        <- factor(annoM$cluster_label,levels = sort(unique(annoM$cluster_label)))
 56 | names(cl3) <- colnames(datM)
 57 | clustersF  <- droplevels(cl3[kpSamp])
 58 | clusts     <- levels(clustersF)
 59 | clustTypes <- annoM$class_label[kpSamp]
 60 | 
 61 | # Reorder clusters to match the tree
 62 | ord        <- match(clustersM,clusts)
 63 | clusts     <- levels(clustersF) <- clusts[ord]
 64 | clustCols  <- annoM$cluster_color[match(clusts,annoM$cluster_label)]
 65 | cl3        <- factor(annoM$cluster_label,levels = clusts)
 66 | names(cl3) <- colnames(datM)
 67 | clustersF  <- droplevels(cl3[kpSamp])
 68 | clusters   <- sort(unique(as.character(clustersF)))
 69 | 
 70 | # Subset the data to only include genes that are expressed
 71 | norm.dat   <- datM[,kpSamp]
 72 | norm.dat   <- norm.dat[rowMeans(norm.dat)>1,]
 73 | 
 74 | 
 75 | ########################################################################################
 76 | print("Read in gene sets from Paul et al 2017 paper to directly compare results with previous data set.")
 77 | 
 78 | hgncS3   = read_excel(paste0(inputFolder,"Paul2017_Table S3.xlsx"), skip=3)
 79 | customS4 = read_excel(paste0(inputFolder,"Paul2017_Table S4.xlsx"), skip=2) # This is also not used in the current file I sent
 80 | 
 81 | genesets = list()
 82 | for (i in 1:dim(hgncS3)[1]){
 83 |  gs  = as.character(hgncS3[i,1])
 84 |  gns = as.character(hgncS3[i,])
 85 |  gns = sort(intersect(gns,rownames(norm.dat)))
 86 |  if(length(gns)>=5) genesets[[gs]] = gns
 87 | }
 88 | for (i in 1:dim(customS4)[1]){
 89 |  gs  = as.character(customS4[i,1])
 90 |  gns = as.character(customS4[i,])
 91 |  gns = sort(intersect(gns,rownames(norm.dat)))
 92 |  if(length(gns)>=5) genesets[[gs]] = gns
 93 | }
 94 | 
 95 | kpGene = NULL
 96 | for (set in names(genesets))   kpGene = c(kpGene,genesets[[set]])
 97 | kpGene = sort(unique(kpGene))
 98 | 
 99 | 
100 | ########################################################################################
101 | print("Subset into two groups of up to 20 random cells per group, 10 times (to get error bars on our values).")
102 | 
103 | subSamp  = 20
104 | numIters = 10 
105 | useMeL <- rand.labL <- list()
106 | for (j in 1:numIters){
107 |  seed = j
108 |  rand.lab = rep(0,length(clustersF))
109 |  names(rand.lab) = clustersF
110 |  for (cli in clusters){
111 |   set.seed(seed)
112 |   seed   = seed+1*j
113 |   kp     = which(clustersF==cli)
114 |   ord    = sample(1:length(kp),min(length(kp),subSamp))
115 |   ord1   = ord[1:round(length(ord)/2)]
116 |   ord2   = ord[(1+round(length(ord)/2)):length(ord)]
117 |   rand.lab[kp[ord1]] = 1
118 |   rand.lab[kp[ord2]] = 2
119 |  }
120 |  useMeL[[j]] = rand.lab>0
121 |  rand.labL[[j]]  = rand.lab
122 | }
123 | 
124 | ########################################################################################
125 | print("Build the variables and run MetaNeighbor (this takes **APROXIMATELY THREE DAYS** to run)")
126 | 
127 | clType  = c("GABAergic","Glutamatergic","Non-Neuronal","Endothelial")
128 | clTypes = NULL
129 | for (cli in clusters) clTypes = c(clTypes,names(sort(-table(annoM$class_label[annoM$cluster_label==cli])))[1])
130 | names(clTypes) = clusters
131 | clType = c("GABAergic","Glutamatergic","Non-Neuronal")
132 | clTypes[clTypes=="Endothelial"] = "Non-Neuronal"
133 | 
134 | suppressWarnings(dir.create(paste0(outputFolder,"auroc")))
135 | aurocL = list()
136 | for (j in 1:numIters){
137 |  auroc = NULL
138 |  for (cli in clusters) {
139 |   print(paste(j,"-",cli))
140 |   sameClass    = clustTypes == clTypes[cli]
141 |   useMe        = useMeL[[j]]&sameClass
142 |   rand.lab     = rand.labL[[j]]
143 |   dataTmp      = norm.dat[kpGene,useMe]
144 |   randexp.lab  = as.character(rand.lab[useMe])
145 |   celltype.lab = t(t((clustersF[useMe]==cli)-1+1))
146 |   rownames(celltype.lab) = names(clustersF[useMe])
147 |  
148 |   fn = paste0(outputFolder,"auroc/MN_tmp")
149 |   AUROC.scores = suppressWarnings(run_MetaNeighbor(data = dataTmp, experiment_labels = randexp.lab, 
150 |       celltype_labels = celltype.lab, genesets = genesets, file_ext=fn))
151 |   auroc = rbind(auroc,AUROC.scores)
152 |  }
153 |  rownames(auroc) = clusters
154 |  aurocL[[j]] = auroc 
155 |  save(aurocL,file=paste0(outputFolder,"aurocValues.csv"))
156 | }
157 | 
158 | # Average out the permutation runs (may not make sense to do this here)
159 | auroc <- maxAuroc <- minAuroc <- aurocL[[1]]
160 | for (j in 2:length(aurocL)) {
161 |   auroc = aurocL[[j]] + auroc
162 |   minAuroc = pmin(minAuroc,aurocL[[j]])
163 |   maxAuroc = pmax(maxAuroc,aurocL[[j]])
164 | }
165 | auroc = auroc / length(aurocL)
166 | 
167 | # Estimate of expected range over 10 iterations 
168 | mean(maxAuroc-minAuroc,na.rm=TRUE)
169 | # [1] 0.2011957
170 | 
171 | 
172 | ########################################################################################
173 | print("Summarize results for different functional categories.")
174 | 
175 | getAurocSD <- function(aurocIn,kp){
176 |   tmp=NULL
177 |   for (i in 1:length(aurocIn))
178 | 	tmp = rbind(tmp,colMeans(aurocIn[[i]][kp,],na.rm=TRUE))
179 |   out = apply(tmp,2,sd,na.rm=TRUE)
180 |   return(out)
181 | }
182 | 
183 | meanAuroc <- sdAuroc <- NULL
184 | for (cli in clType){
185 |   meanAuroc = cbind(meanAuroc,colMeans(auroc[clTypes==cli,],na.rm=TRUE))
186 |   sdAuroc = cbind(sdAuroc,getAurocSD(aurocL,clTypes==cli))
187 | }
188 | colnames(meanAuroc) <- paste("Mean",clType)
189 | colnames(sdAuroc) <- paste("SD",clType)
190 | numGenes  = NULL
191 | for (nm in rownames(meanAuroc)) numGenes = c(numGenes,length(genesets[[nm]]))
192 | meanAuroc = cbind(meanAuroc,sdAuroc,numGenes)
193 | meanAuroc = meanAuroc[order(-rowMeans(meanAuroc[,1:2])),]
194 | write.csv(meanAuroc,paste0(outputFolder,"meanAuroc_mouse.csv"))
195 | 
196 | 
197 | 
198 | 
199 | 
200 | 
201 | 
202 | ########################################################################################
203 | ########################################################################################
204 | ########################################################################################
205 | ########################################################################################
206 | 
207 | 
208 | print("Compare mouse AUROC values for interneurons with human values calculated in previous code document.")
209 | 
210 | ## Required libraries
211 | library(pheatmap)
212 | library(grid)
213 | library(ggplot2)
214 | library(ggrepel)
215 | 
216 | ## Load AUROC
217 | auroc.sp <- list()
218 | aurocL <- read.csv(paste0(outputFolder,"meanAuroc_mouse.csv"))
219 | colnames(aurocL)[1] <- "hgnc"
220 | colnames(aurocL)[-1] <- paste0("mouse_", colnames(aurocL)[-1])
221 | auroc.sp[["mouse"]] <- aurocL
222 | aurocL <- read.csv(paste0(outputFolder,"meanAuroc_human.csv"))
223 | colnames(aurocL)[1] <- "hgnc"
224 | colnames(aurocL)[-1] <- paste0("human_", colnames(aurocL)[-1])
225 | auroc.sp[["human"]] <- aurocL
226 | 
227 | ## Merge AUROCs into one data frame
228 | auroc.df <- merge(auroc.sp[["mouse"]], auroc.sp[["human"]], by = "hgnc")
229 | # Fix mislabeled HGNC family
230 | auroc.df$hgnc <- sub("X5..nucleotidases", 
231 |                      "5-hydroxytryptamine receptors, G protein-coupled", 
232 |                      auroc.df$hgnc)
233 | write.csv(auroc.df, file = paste0(outputFolder,"human_mouse_auroc.csv"), row.names = FALSE)
234 | 
235 | 
236 | ## Plot the correlations between species and classes
237 | pw.cor <- cor(auroc.df[, grep("Mean", colnames(auroc.df))])
238 | ph1 <- pheatmap(pw.cor, clustering_method = "average", show_colnames = FALSE, 
239 |          color = colorRampPalette(brewer.pal(n = 9, "RdPu"))(100))
240 | pdf(paste0(outputFolder,"human_mouse_auroc_heatmap.pdf"), width = 5, height = 3)
241 | grid.newpage()
242 | grid.draw(ph1$gtable)
243 | dev.off()
244 | 
245 | print(cor(auroc.df$mouse_Mean.GABAergic, auroc.df$human_Mean.GABAergic))
246 | # [1] 0.9157582
247 | 
248 | 
249 | ## Plot the scatterplot for the paper
250 | plot.lab <- which(abs(auroc.df$human_Mean.GABAergic - auroc.df$mouse_Mean.GABAergic) > 0.2 | 
251 |                     (auroc.df$human_Mean.GABAergic > 0.85 & 
252 |                     auroc.df$mouse_Mean.GABAergic > 0.85 &
253 |                     auroc.df$human_numGenes < 50))
254 | g.inh <- ggplot(auroc.df, aes(x = mouse_Mean.GABAergic, y = human_Mean.GABAergic)) +
255 |   geom_abline(slope = 1, intercept = 0, color = "blue") +
256 |   geom_hline(yintercept = 0.5, linetype = "dashed", color = "grey") +
257 |   geom_vline(xintercept = 0.5, linetype = "dashed", color = "grey") +
258 |   geom_errorbarh(aes(xmin = mouse_Mean.GABAergic - mouse_SD.GABAergic, 
259 |                      xmax = mouse_Mean.GABAergic + mouse_SD.GABAergic), 
260 |                  color = "grey90") +
261 |   geom_errorbar(aes(ymin = human_Mean.GABAergic - human_SD.GABAergic, 
262 |                     ymax = human_Mean.GABAergic + human_SD.GABAergic),
263 |                 color = "grey90") +
264 |   geom_point(aes(size = human_numGenes)) +
265 |   scale_size_continuous(breaks = c(10, 40, 100)) +
266 |   #geom_text_repel(data = auroc.df[plot.lab, ], aes(label = hgnc), size = 2) +  # Current version omits text
267 |   xlab("Mouse classification accuracy (mean AUROC)") +
268 |   ylab("Human classification accuracy (mean AUROC)") +
269 |   theme_bw() +
270 |   theme(panel.grid.major = element_blank(),
271 |         panel.grid.minor = element_blank()) 
272 | plot(g.inh)
273 | 
274 | ggsave(g.inh, filename = paste0(outputFolder,"Fig6a_human_mouse_inh_auroc.pdf"), width = 6.75, height = 5)
275 | 


--------------------------------------------------------------------------------
/R/Code_neurosurgical_vs_postmortem.r:
--------------------------------------------------------------------------------
 1 | # This code makes some scatterplots comparing neurosurgical vs. postmortem of matched types.
 2 | 
 3 | # FIRST NEED TO SET THE WORKING DIRECTORY TO THE MAIN FOLDER
 4 | # This is the folder where "Start.RData" is located
 5 | 
 6 | 
 7 | #-----------------------------------------------------------------------------------------------
 8 | print("Read in the data and load relevant libraries")
 9 | outputFolder  = "output/"
10 | scriptsFolder = "R/"
11 | dataFolder    = "MTG/"
12 | 
13 | # Load these libraries
14 | library(feather)
15 | options(stringsAsFactors=FALSE)
16 | 
17 | # Read in the data
18 | anno       <- read_feather(paste(dataFolder,"anno.feather",sep="")) 
19 | Expr.dat   <- feather(paste(dataFolder,"data.feather",sep=""))   # FPKM
20 | Expr.dat   <- Expr.dat[match(anno$sample_id,Expr.dat$sample_id),] # Make sure the expression matches the sample information
21 | datIn      <- as.matrix(Expr.dat[,names(Expr.dat)!="sample_id"])
22 | rownames(datIn) <- Expr.dat$sample_id
23 | datIn      <- t(datIn)
24 | norm.dat   <- log2(datIn+1)
25 | 
26 | #-----------------------------------------------------------------------------------------------
27 | print("Compare neurosurgical and postmortem data.")
28 | 
29 | isPM  <- is.element(anno$donor_label,c("H16.24.010","H200.1023","H200.1025","H200.1030"))
30 | homs  <- list(
31 |   SST = paste("Sst",1:5),
32 |   Layer4 = "Exc L4/5 IT",
33 |   Layer5IT = paste("Exc L5/6 IT",1:3)
34 | )
35 | 
36 | pdf(paste0(outputFolder,"FigED2c_neurosurgicalVsPostmortem.pdf"))
37 | for (nm in c("Layer5IT", "Layer4", "SST")){
38 |   ns <- (!isPM)&is.element(anno$homology_cluster_label,homs[[nm]])
39 |   pm <- isPM&is.element(anno$homology_cluster_label,homs[[nm]])
40 |   nsDat <- rowMeans(norm.dat[,ns])
41 |   pmDat <- rowMeans(norm.dat[,pm])
42 |   
43 |   plot(nsDat,pmDat,pch=19,cex=0.3,main=paste(nm,"- R =",signif(cor(nsDat,pmDat),2)),
44 |        xlab=paste("Neurosurgical -",sum(ns)),ylab=paste("Postmortem -",sum(pm)))
45 | }
46 | dev.off()
47 | 


--------------------------------------------------------------------------------
/R/Code_quantifyFISH_L23.R:
--------------------------------------------------------------------------------
  1 | # This code performs the analysis of the mFISH data in the mansucript and plots the results
  2 | 
  3 | # FIRST NEED TO SET THE WORKING DIRECTORY TO THE MAIN FOLDER
  4 | # This is the folder where "Start.RData" is located
  5 | 
  6 | 
  7 | #-----------------------------------------------------------------------------------------------
  8 | print("Read in the data and load relevant libraries")
  9 | 
 10 | outputFolder  = "output/"
 11 | scriptsFolder = "R/"
 12 | inputFolder   = "data/"
 13 | dataFolder    = "MTG/"
 14 | 
 15 | library(dplyr);
 16 | library(feather)
 17 | library(pdist)
 18 | library(WGCNA)
 19 | library(gplots)
 20 | library(mfishtools)
 21 | library(dendextend)
 22 | library(tidyverse)
 23 | library(gridExtra)
 24 | library(viridis)
 25 | options(stringsAsFactors=FALSE)
 26 | 
 27 | # Read in the extra scripts
 28 | source(paste0(scriptsFolder,"Support_extraFunctions.r"))
 29 | 
 30 | anno     <- read_feather(paste(dataFolder,"anno.feather",sep="")) 
 31 | Expr.dat <- feather(paste(dataFolder,"data.feather",sep="")) 
 32 | Expr.dat <- Expr.dat[match(anno$sample_id,Expr.dat$sample_id),] # Make sure the expression matches the sample information
 33 | datIn    <- as.matrix(Expr.dat[,names(Expr.dat)!="sample_id"])
 34 | rownames(datIn) <- Expr.dat$sample_id
 35 | datIn    <- t(datIn)
 36 | 
 37 | load(paste0(dataFolder,"/dend.rda")) 
 38 | load(paste0(dataFolder,"clusterInfo.rda")) 
 39 | rownames(clusterInfo) <- clusterInfo$cluster_label
 40 | 
 41 | ##################################################################
 42 | 
 43 | print("Find the median expression in each non-outlier cluster")
 44 | 
 45 | kpSamp     <- 1:dim(anno)[1]
 46 | cl3        <- factor(anno$cluster_label,levels = sort(unique(anno$cluster_label)))
 47 | names(cl3) <- colnames(datIn)
 48 | clustersF  <- droplevels(cl3[kpSamp])
 49 | 
 50 | normDat    <- log2(datIn[,kpSamp]+1)
 51 | medianExpr <- do.call("cbind", tapply(names(clustersF), clustersF, function(x) apply(normDat[,x],1,median))) 
 52 | medianExpr <- medianExpr[,levels(clustersF)]
 53 | rownames(medianExpr) <- rownames(normDat) 
 54 | 
 55 | 
 56 | ##################################################################
 57 | print("Determine the predicted level of mappability using this gene panel based on FACS data.")
 58 | 
 59 | ## THESE ARE FILES WITH RESULTS FROM THE THREE EXPERIMENTS
 60 | nm = c("smFISH_Human_L2-L3_2018_04_output","smFISH_Human_L2-L3_Replicate2_output","smFISH_Human_L2-L3_Replicate3_output")
 61 | 
 62 | fn  = paste0(inputFolder,nm[1],".csv")
 63 | fishIn = read.csv(fn,row.names=1)
 64 | gns = intersect(rownames(medianExpr),colnames(fishIn))
 65 | 
 66 | kpSamp2 = colSums(normDat[gns,]>0)>1
 67 | facsCor = corTreeMapping(medianExpr,normDat[gns,kpSamp2])
 68 | facsCl  <- rownames(facsCor)[apply(facsCor,2,which.max)]
 69 | facsConf = table(factor(as.character(clustersF[kpSamp2]),levels=intersect(labels(dend),clustersF[kpSamp2])),
 70 |   factor(facsCl,levels=intersect(labels(dend),facsCl)))
 71 | facsFrac <- facsConf / rowSums(facsConf)
 72 | 
 73 | # Which leaves have which nodes?
 74 | has_any_labels <- function(sub_dend, the_labels) any(labels(sub_dend) %in% the_labels)
 75 | node_labels <- NULL
 76 | for (lab in labels(dend))
 77 |   node_labels <- cbind(node_labels,noded_with_condition(dend, has_any_labels,the_labels=lab))
 78 | rownames(node_labels) <- get_nodes_attr(dend,"label")
 79 | colnames(node_labels) <- labels(dend)
 80 | 
 81 | # Which clusters agree at the node level?
 82 | agreeNodes = apply(cbind(facsCl,as.character(clustersF[kpSamp2])),1, function(lab,node_labels){
 83 |   rowSums(node_labels[,lab])==2
 84 | },node_labels)
 85 | colnames(agreeNodes) = as.character(clustersF[kpSamp2])
 86 | 
 87 | # Which clusters are in each nodes?
 88 | isInNodes = t(apply(node_labels,1, function(node,cl,dend){
 89 |   is.element(cl,labels(dend)[node])
 90 | },as.character(clustersF[kpSamp2]),dend))
 91 | colnames(isInNodes) = as.character(clustersF[kpSamp2])
 92 | 
 93 | # For each node, what fraction of cells match?
 94 | fracAgree = rowSums(agreeNodes) / rowSums(isInNodes)
 95 | pdf(paste0(outputFolder,"FACS_mapping_accuracy.pdf"),height=8,width=20)
 96 | par(mar=c(12,5,5,5))
 97 | dend %>% set("nodes_cex",0) %>% set("branches_col", "grey") %>% plot
 98 | text(get_nodes_xy(dend)[,1],get_nodes_xy(dend)[,2],round(fracAgree*100))
 99 | dev.off()
100 | 
101 | # Based on results above above, build a new vector of clusters for FACS comparison
102 | classNew = c(rep("LAMP5- Inh",2),rep("LAMP5+ Inh",4),rep("LAMP5- Inh",39),labels(dend)[46:48],"L5-6",
103 |   labels(dend)[50],rep("RORB+ L3-5",4),rep("RORB+ L4-5",5),labels(dend)[60],rep("L5-6",9),rep("Non-Neuronal",6))
104 | names(classNew) = labels(dend)
105 | medianClass = findFromGroups(t(medianExpr[gns,labels(dend)]),classNew,median)
106 | medianClass = medianClass[,unique(classNew)[c(2,1,3,5,4,7,8,9,6,11)]]  # 9,10,6
107 |    # Omit Exc L4−5 FEZF2 SCN4B, which seems to be a catch-all bucket, and which we know is VERY rare in human
108 | 
109 | 
110 | ##################################################################
111 | print("Read in the data and order consistently.")
112 | 
113 | qprob  = 0.9    # For scaling mFISH to FACS
114 | thresh = 0       # Set counts less than or equal to thresh to 0
115 | fish   = list()
116 | par(mfrow=c(3,3))
117 | nm2 = NULL
118 | for (i in 1:length(nm)){
119 |   fn = paste0(inputFolder,nm[i],".csv")
120 |   fishIn  <- fishTmp <- read.csv(fn,row.names=1)
121 |   
122 |   fishTmp$area_um2 <- fishTmp$area / 100
123 |   fishTmp$total_density <- fishTmp$totalReads / fishTmp$area_um2
124 |   filter = fishTmp$area_um2 > 100 & fishTmp$total_density > 0 & fishTmp$total_density < 1 & fishTmp[,"layerData"]==1
125 |   isExc  = (fishTmp[,"SLC17A7"] >= quantile(fishTmp[filter,"SLC17A7"],0.95))
126 |   isExc  = isExc & fishTmp$area_um2 > 100 & fishTmp$total_density > 0 & fishTmp$total_density < 1 
127 |   
128 |   fishIn$area[fishIn$area==0] = round(mean(fishIn$area))  # To avoid errors later
129 |   fishDat <- as.matrix(t(fishIn[,intersect(colnames(fishIn),rownames(medianClass))]))
130 |   fishDat[fishDat<=thresh] = 0 
131 | 	
132 |   fishDat <- t(t(fishDat)/fishIn$area)*mean(fishIn$area) # Account for spot area in gene expression calculation
133 |   fishDat <- log2(fishDat+1)   # Do log transform (It looks worse if we don't)
134 | 
135 |    plot(density(fishDat["SLC17A7",]))
136 |    plot(density(fishDat["GAD2",]))
137 |    plot(density(fishIn$area))
138 |   
139 |   # Only map to excitatory types found in layers 2 and 3
140 |   isAll   <- colnames(medianClass)  
141 |   is23    <- colnames(medianClass)[3:9]
142 |   
143 |   ###############################################################
144 |   # Scale fish data to roughly match RNA-Seq values
145 |   rn = rownames(fishDat)
146 |   valFACS  = apply(medianClass[rn,is23],1,max)
147 |   qs = apply(fishDat[,isExc],1,quantile,qprob,na.rm=TRUE)
148 |   fishDat2 = fishDat
149 |   for (r in rn) fishDat2[r,] = fishDat2[r,]*valFACS[r]/pmax(qs[r],1)  # Value may need to be adjusted on a case by case basis.
150 |   fishScale=fishDat2
151 |   
152 |   # Scale all values to 0-1 before mapping
153 |   medianScale = medianClass/apply(medianClass,1,max)
154 |   ###############################################################
155 |   
156 |   fishCor <- corTreeMapping(medianScale,fishScale)
157 |   fishCor[is.na(fishCor)] = 0
158 |   fishCl  <- rownames(fishCor)[apply(fishCor,2,which.max)]
159 |   fishType<- ifelse(isExc,"Excitatory","Other")
160 |   fishMax <- apply(fishCor,2,max)
161 |   fishSec <- apply(fishCor,2,function(x,y){
162 |     v = which.max(x)[1]
163 | 	return(max(x[y!=y[v]]))
164 |   },isAll)
165 |   fish2 <- data.frame(cluster = fishCl, maxCor = fishMax, confidence = fishMax-fishSec, type = fishType)
166 |   fish2 <- cbind(fish2,fishIn)
167 |   fish2$x <- fishIn$lateralCoordinate
168 |   fish2$y <- fishIn$layerCoordinate
169 | 
170 |   # Scale to (0,1)
171 |   fish2$x = fish2$x-min(fish2$x)
172 |   fish2$x = fish2$x/max(fish2$x)
173 |   fish2$y = fish2$y-min(fish2$y)
174 |   fish2$y = fish2$y/max(fish2$y)
175 | 
176 |   # Save results
177 |   fish[[i]] = fish2
178 | }
179 | 
180 | # Use class colors rather than cluster colors
181 | cols = as.character(clusterInfo$cluster_color)
182 | names(cols) <- rownames(clusterInfo)
183 | 
184 | 
185 | 
186 | #################################################################################33
187 | ## Cluster-based distribution plots - EXTENDED DATA FIGURE 9D
188 | 
189 | pdf(paste0(outputFolder,"FigED9d_mFISH_classDistribution.pdf"),height=8,width=16)
190 | for (i in 1:length(fish)){
191 |  fishh = fish[[i]][fish[[i]]$type=="Excitatory",]
192 |  clusts = intersect(is23,fishh$cluster)
193 |  par(mfrow=c(1,length(clusts)))
194 |  for (j in clusts){
195 |   kp = fishh$cluster==j
196 |   plot(fishh$x[kp],fishh$y[kp],xlab="x",ylab=paste("y -",j),main="Human ISH",
197 |     xlim=c(0,1),ylim=c(1,0),col="white")
198 |   text(fishh$x[kp],fishh$y[kp],fishh$layerData[kp])
199 |  }
200 |  print(paste("Fraction of excitatory cells mapping to excitatory types:",signif(mean(is.element(fishh$cluster,is23)),3)))
201 | }
202 | dev.off()
203 | #[1] "Fraction of excitatory cells mapping to excitatory types: 0.984"
204 | #[1] "Fraction of excitatory cells mapping to excitatory types: 0.988"
205 | #[1] "Fraction of excitatory cells mapping to excitatory types: 0.996"
206 | 
207 | 
208 | #################################################################################33
209 | ## Heatmap
210 | 
211 | gnH = c("GAD2","SLC17A7","CUX2","LAMP5","CBLN2","PENK","COL5A2","CARTPT","RXFP1","confidence")
212 | cap = 0.001
213 | colorset = c("darkblue", "dodgerblue", "gray80", "orange", "orangered")
214 | heat_colors <- colorRampPalette(colorset)(1001)
215 | 
216 | pdf(paste0(outputFolder,"mFISH_heatmap.pdf"),width=12,height=4)
217 | for (i in 1:length(fish)){
218 |  fishh = fish[[i]][fish[[i]]$type=="Excitatory",]
219 |  datTmp = pmin(as.matrix(fishh[,gnH])/fishh[,"area"],cap)
220 |  datTmp[,"confidence"] = sqrt(fishh[,"confidence"])*cap
221 |  datTmp = datTmp[order(factor(fishh$cluster,levels = clusts),-fishh$x),]
222 |  seps   = cumsum(table(fishh$cluster)[clusts])
223 |  heatmap.2(t(datTmp),Rowv=FALSE,Colv=FALSE,dendrogram="none",trace="none",margins = c(3, 10),
224 |   rowsep=9,colsep=seps,key=TRUE,main=paste("Human (by layer), counts/area capped at",cap),col=heat_colors)
225 |  datTmp = pmin(as.matrix(fishh[,gnH])/fishh[,"area"],cap)
226 |  datTmp[,"confidence"] = sqrt(fishh[,"confidence"])*cap
227 |  datTmp = datTmp[order(factor(fishh$cluster,levels = clusts),fishh$confidence),]
228 |  heatmap.2(t(datTmp),Rowv=FALSE,Colv=FALSE,dendrogram="none",trace="none",margins = c(3, 10),
229 |   rowsep=9,colsep=seps,key=TRUE,main=paste("Human (by confidence), counts/area capped at",cap),col=heat_colors)
230 | }
231 | dev.off()
232 | 
233 | 
234 | 
235 | #################################################################################
236 | ## Comparison with RNA-Seq data
237 | 
238 | fishh     = rbind(fish[[1]],fish[[2]],fish[[3]])
239 | fishh     = fishh[(fishh$type=="Excitatory")&(is.element(fishh$cluster,is23)),]
240 | fishProps = table(fishh$cluster,fishh$layerData)[clusts,]
241 | colnames(fishProps) = paste0("L",colnames(fishProps))
242 | fishProps = fishProps[,-1]
243 | kpL23     = is.element(anno$cluster_label,names(classNew)[is.element(classNew,clusts)])
244 | rSeqProps = table(classNew[anno$cluster_label[kpL23]],paste0("L",anno$brain_subregion_id[kpL23]))[clusts,colnames(fishProps)]
245 | cols2     = clusterInfo[clusts,"final.cluster.color"]
246 | d1        = dim(fishProps)[1]
247 | d2        = dim(fishProps)[2]
248 | 
249 | pdf(paste0(outputFolder,"mFISH_classByLayerBarplot_human.pdf"),width=9,height=9)
250 | plot(fishProps,main="Human (HCR)",ylab="Layer",col=standardColors(),las=2)
251 | plot(rSeqProps,main="Human (RNA-Seq)",ylab="Layer",col=standardColors(),las=2)
252 | plot(t(fishProps),main="Human (HCR)",ylab="Layer",col=cols2,las=2)
253 | plot(t(rSeqProps),main="Human (RNA-Seq)",ylab="Layer",col=cols2,las=2)
254 | 
255 | barplot(t(t(t(fishProps))/colSums(t(fishProps)))[d2:1,],main="Human (HCR)",ylab="Layer",col=standardColors(),las=2,legend=TRUE,ylim=c(0,2))
256 | barplot(t(t(t(rSeqProps))/colSums(t(rSeqProps)))[d2:1,],main="Human (RNA-Seq)",ylab="Layer",col=standardColors(),las=2,legend=TRUE,ylim=c(0,2))
257 | barplot(t(t(fishProps)/colSums(fishProps))[d1:1,],main="Human (HCR)",ylab="Layer",col=cols2,las=2,legend=TRUE,ylim=c(0,2))
258 | barplot(t(t(rSeqProps)/colSums(rSeqProps))[d1:1,],main="Human (RNA-Seq)",ylab="Layer",col=cols2,las=2,legend=TRUE,ylim=c(0,2))
259 | dev.off()
260 | 
261 | 
262 | ##########################################
263 | print("Output the results")
264 | 
265 | fishOut = NULL
266 | for (i in 1:length(fish))
267 |   fishOut = rbind(fishOut,cbind(fish[[i]],nm[i]))
268 | colnames(fishOut) = c(colnames(fish[[1]]),"Experiment")
269 | write.csv(fishOut,paste0(outputFolder,"L23_FISH_output.csv"),row.names=FALSE)
270 | 
271 | 
272 | 
273 | ##########################################
274 | print("Re-read in the FISH data and scale spatial coordinates to experiment smFISH_Human_L2-L3_2018_04")
275 | 
276 | fish <- read_csv(file = paste0(outputFolder,"L23_FISH_output.csv"))
277 | 
278 | fix.exp <- which(fish$experimentName == "smFISH_Human_L2-L3_2018_04")
279 | fish$layerCoordinate[fix.exp] <- fish$layerCoordinate[fix.exp] - min(fish$layerCoordinate[fix.exp])
280 | for (exp1 in unique(fish$experimentName)) {
281 |   lat1 <- fish$lateralCoordinate[fish$experimentName == exp1]
282 |   fish$lateralCoordinate[fish$experimentName == exp1] <- lat1 - min(lat1)
283 | }
284 | fish$LateralPosition <- abs(fish$lateralCoordinate / 10)
285 | fish$CorticalDepth <- -fish$layerCoordinate / 10
286 | fish$area_um2 <- fish$area / 100
287 | fish$total_density <- fish$totalReads / fish$area_um2
288 | 
289 | 
290 | ##########################################
291 | print("Plot the FISH results for ALL clusters")
292 | 
293 | 
294 | plot_fish <- function(dat, grey.pal.rep = 0) {
295 |   dat.sort <- dat[order(dat$density), ]
296 |   density.breaks <- c(0, 1, 3, 10, 30)
297 |   ggplot(data = dat.sort) +
298 |     aes(x = LateralPosition, y = CorticalDepth, 
299 |         color = density, size = density) +
300 |     geom_point() +
301 |     scale_color_gradientn(colors = c(rep("grey90", grey.pal.rep), viridis(10)), 
302 |                           trans = "sqrt", breaks = density.breaks,
303 |                           name = expression(paste("Spots/100", mu, plain(m)^2))) +
304 |     scale_size_area(breaks = density.breaks, name = "") +
305 |     guides(color = guide_colorbar(order = 1),
306 |            size = guide_legend(order = 0)) +
307 |     ggtitle(first(dat$gene)) +
308 |     xlab(expression(paste("Lateral position (", mu, "m)"))) +
309 |     ylab(expression(paste("Cortical depth (", mu, "m)"))) +
310 |     theme_bw() +
311 |     theme(panel.grid.major = element_blank(), 
312 |           panel.grid.minor = element_blank())
313 | }
314 | 
315 | 
316 | fish.genes <- c("SLC17A7", "LAMP5", "PENK", 
317 |                 "CUX2", "COL5A2", "CARTPT",
318 |                 "CBLN2", "RXFP1", "GAD2")
319 | fish.subset <- fish %>%
320 |   filter(area_um2 > 100 & total_density > 0 & total_density < 1) %>%
321 |   gather(gene, count, CUX2:SLC17A7) %>%
322 |   mutate(gene = factor(gene, levels = fish.genes)) %>%  # Reorder plots
323 |   group_by(experimentName, gene) %>%
324 |   mutate(density = count/area_um2 * 100) # Probe density per 100um^2
325 |   
326 | g.all <- fish.subset %>% 
327 |   do(plots = plot_fish(., grey.pal.rep = 1))
328 | 
329 | 
330 | marrangeGrob(g.all$plots, nrow = 3, ncol = 3, top = "")
331 | 
332 | pdf(file = paste0(outputFolder,"FigED9cd_L23_fish_probes.pdf"), width = 10.5, height = 12)
333 | marrangeGrob(g.all$plots, nrow = 3, ncol = 3, top = "")
334 | dev.off()
335 | 
336 | 
337 | ##########################################
338 | print("Plot the data only for the FREM3 cluster.")
339 | 
340 | frem3.genes <- c("LAMP5", "COL5A2")
341 | 
342 | fish.frem3 <- fish %>%
343 |   filter(area_um2 > 100 & total_density > 0 & total_density < 1) %>%
344 |   filter(SLC17A7 > 9.75) %>% 
345 |   filter(cluster == "Exc L2-3 LINC00507 FREM3") %>% 
346 |   gather(gene, count, CUX2:SLC17A7) %>%
347 |   filter(gene %in% frem3.genes) %>% 
348 |   mutate(gene = factor(gene, levels = frem3.genes)) %>%  # Reorder plots
349 |   group_by(experimentName, gene) %>%
350 |   mutate(density = count/area_um2 * 100) # Probe density per 100um^2
351 | 
352 | g.frem3 <-  fish.frem3 %>%
353 |   group_by(experimentName, gene) %>% 
354 |   do(plots = plot_fish(., grey.pal.rep = 1) +
355 |        ylim(c(min(fish.subset$CorticalDepth), 
356 |               max(fish.subset$CorticalDepth)))) # 
357 | 
358 | 
359 | marrangeGrob(grobs = g.frem3$plots, nrow = 1, ncol = 1, top = "")
360 | 
361 | pdf(file = paste0(outputFolder,"Fig3c_L23_FREM3_fish_probes.pdf"), width = 3.5, height = 5)
362 | marrangeGrob(g.frem3$plots, nrow = 1, ncol = 1, top = "")
363 | dev.off()
364 | 
365 | ##########################################
366 | print("------------------------------------------------------------------------------")
367 | print("---------------- DO NOT CLOSE THIS R SESSION YET! ----------------------------")
368 | print("------------------------------------------------------------------------------")
369 | print(" ")
370 | print("If FigED9cd_L23_fish_probes.pdf and Fig3c_L23_FREM3_fish_probes.pdf are blank, ")
371 | print("  copy and paste the code in this script from lines 315 onward and then check")
372 | print("  again.  The first page will be blank, but remaining pages should have plots.")
373 | 
374 | 


--------------------------------------------------------------------------------
/R/Code_violin_groupHeatmaps_jitterPlots.r:
--------------------------------------------------------------------------------
  1 | # This code makes most of the violin plots and group-averaged heatmaps in the manuscript.
  2 | 
  3 | # FIRST NEED TO SET THE WORKING DIRECTORY TO THE MAIN FOLDER
  4 | # This is the folder where "Start.RData" is located
  5 | 
  6 | 
  7 | #-----------------------------------------------------------------------------------------------
  8 | print("Read in the data and load relevant libraries")
  9 | outputFolder  = "output/"
 10 | scriptsFolder = "R/"
 11 | dataFolder    = "MTG/"
 12 | mouseFolder   = "mouse/"
 13 | inputFolder   = "data/"
 14 | fdir          = dataFolder
 15 | 
 16 | # Load these libraries
 17 | library(beeswarm)
 18 | library(WGCNA);
 19 | library(edgeR)   
 20 | library(feather)
 21 | library(dendextend)
 22 | library(monocle)
 23 | library(ggplot2)
 24 | library(dplyr)
 25 | library(cowplot)
 26 | library(matrixStats)
 27 | options(stringsAsFactors=FALSE)
 28 | 
 29 | # Read in the extra scripts
 30 | source(paste0(scriptsFolder,"Support_extraFunctions.r"))
 31 | source(paste0(scriptsFolder,"Support_violin_functions.R"))
 32 | source(paste0(scriptsFolder,"Support_heatmap_functions.R"))
 33 | 
 34 | # Read in the data
 35 | Expr.dat <- feather(paste(dataFolder,"data.feather",sep=""))   # CPM
 36 | anno <- read_feather(paste(dataFolder,"anno.feather",sep="")) 
 37 | exprData <- as.matrix(Expr.dat[,colnames(Expr.dat)[colnames(Expr.dat)!="sample_id"]])
 38 | load(paste0(dataFolder,"/dend.rda")) 
 39 | load(paste0(dataFolder,"clusterInfo.rda")) 
 40 | all_clusters = as.numeric(clusterInfo$cluster_id)
 41 |   
 42 | #-----------------------------------------------------------------------------------------------
 43 | print("Make trimmed mean average heatmap for figure 1.")
 44 | 
 45 | broadGenes = c("GAD1","ADARB2","PAX6","LAMP5","VIP","LHX6","SST","PVALB","SLC17A7","LINC00507",
 46 |   "RORB","THEMIS","FEZF2","SLC1A3","PDGFRA","FGFR3","OPALIN","NOSTRIN","TYROBP")
 47 |   
 48 | broad_plot <- group_heatmap_plot(data_source = fdir,
 49 |                    genes = broadGenes,
 50 |                    group_by = "cluster",
 51 |                    clusters = all_clusters,
 52 |                    calculation = "trimmed_mean",
 53 |                    labelheight = 40,
 54 |                    showcounts = F)
 55 | ggsave(paste0(outputFolder,"Fig1_broad_markers.pdf"),broad_plot,height = 4, width = 10)
 56 | 
 57 | 
 58 | #-----------------------------------------------------------------------------------------------
 59 | print("Make trimmed mean average heatmap for figure 2D.")
 60 | 
 61 | RORB_Genes = c("SLC17A7","RORB","CNR1","PRSS12","ALCAM","MET","MME","NTNG1","HS3ST4","CUX2",
 62 |                "PCP4","GRIN3A","GRIK3","CRHR2","TPBG","POSTN","SMYD1")
 63 |   
 64 | RORB_plot <- group_heatmap_plot(data_source = fdir,
 65 |                    genes = RORB_Genes,
 66 |                    group_by = "cluster",
 67 |                    clusters = c(50:52,54:59,62),
 68 |                    calculation = "trimmed_mean",
 69 |                    labelheight = 40,
 70 |                    showcounts = F)
 71 | ggsave(paste0(outputFolder,"Fig7A_RORB_subtypes.pdf"),RORB_plot,height = 4, width = 6)
 72 | 
 73 | 
 74 | #-----------------------------------------------------------------------------------------------
 75 | ## NOTE: Additional heatmaps are plotted after the violin plots
 76 | #-----------------------------------------------------------------------------------------------
 77 | 
 78 | 
 79 | #-----------------------------------------------------------------------------------------------
 80 | print("Recalculate marker genes to see if any LOC genes would be better markers than the named genes.")
 81 | 
 82 | # Save cluster info variable and excluded clusters
 83 | clusterType = anno$class_label 
 84 | cl          = anno$cluster_id
 85 | names(cl)   = Expr.dat$sample_id
 86 | includeClas = c("inh","exc","glia")
 87 | excludeClas = sort(setdiff(clusterType,includeClas))
 88 | kpSamp      = !is.element(clusterType,excludeClas) 
 89 | 
 90 | 
 91 | # Get median expression per cluster and the proportions
 92 | normDat    = log2(exprData+1)
 93 | rownames(normDat) = Expr.dat$sample_id
 94 | normDat    = t(normDat)
 95 | exprThresh = 1
 96 | medianExpr = do.call("cbind", tapply(names(cl), cl, function(x) rowMedians(normDat[,x]))) 
 97 | propExpr   = do.call("cbind", tapply(names(cl), cl, function(x) rowMeans(normDat[,x]>exprThresh))) 
 98 | rownames(medianExpr) <- rownames(propExpr) <- rownames(normDat)  
 99 | clusterOrd = as.character(sort(unique(anno$cluster_id)))
100 | clusterKp  = as.character(sort(unique(anno$cluster_id[kpSamp])))
101 | medianExpr = medianExpr[,clusterOrd]
102 | propExpr   = propExpr[,clusterOrd]
103 | 
104 | kpGn       = rep(TRUE,dim(propExpr)[1])  #betaScore>=minBeta
105 | specGenes  = getTopMarkersByPropNew(propExpr=propExpr[kpGn,], medianExpr=medianExpr[kpGn,], propDiff = 0, propMin=0.4, medianFC = 1)
106 | topIsLoc   = (substr(specGenes,1,3)=="LOC")|(substr(specGenes,1,4)=="LINC")|(substr(specGenes,1,4)=="DKFZ")|     #DKFZp686K1684 = "PAX6-AS1"
107 |    (substr(specGenes,nchar(specGenes)-3,nchar(specGenes))=="-AS1")
108 | 
109 | # Function for getting the plotting genes (to avoid typing it out a bunch of times)
110 | getPlotGenes <-function(plotGenes,kp){
111 |  for (k in which (kp)){
112 |   plotGenes = c(plotGenes,ifelse(topIsLoc[k],specGenes[k],"none"))
113 |   if(specGenes[k]!=gsub("-","\\.",specGenes[k])) plotGenes=c(plotGenes,gsub("-","\\.",specGenes[k])) # Convert - to . for plotting
114 |   plotGenes = c(plotGenes,clusterInfo$topGene[k])
115 |  }
116 |  return(setdiff(plotGenes,"none"))
117 | }  
118 |    
119 | #-----------------------------------------------------------------------------------------------
120 | print("Make violin plots for figures 2-4.")
121 | 
122 | # Excitatory
123 | kp = clusterInfo$class_label=="Glutamatergic"
124 | plotGenes = getPlotGenes(c("LAMP5","LINC00507","RORB","THEMIS","FEZF2"),kp)
125 | group_violin_plot(data_source = dataFolder, clusters=all_clusters[kp], genes=plotGenes)
126 | ggsave(paste0(outputFolder,"Fig2_exc_violinPlots_specificGenes_withLocs.pdf"), width = 12, height = 15)
127 | 
128 | # Inhibitory 
129 | kp = is.element(1:length(clusterInfo$class_label),1:27)   # ADARB2
130 | plotGenes = getPlotGenes(c("ADARB2","PAX6","LAMP5","VIP","LHX6","SST","HTR3A"),kp)
131 | group_violin_plot(data_source = dataFolder, clusters=all_clusters[kp], genes=plotGenes)
132 | ggsave(paste0(outputFolder,"Fig3_inh_ADARB2_violinPlots_specificGenes_withLocs.pdf"), width = 14, height = 15)
133 | 
134 | kp = is.element(1:length(clusterInfo$class_label),28:45)  # LHX6
135 | plotGenes = getPlotGenes(c("ADARB2","LHX6","SST","PVALB"),kp)
136 | group_violin_plot(data_source = dataFolder, clusters=all_clusters[kp], genes=plotGenes)
137 | ggsave(paste0(outputFolder,"Fig3_inh_LHX6_violinPlots_specificGenes_withLocs.pdf"), width = 11, height = 12)
138 | 
139 | # Glia      # Expand genes to plot manually...
140 | kp    = clusterInfo$class_label=="Non-neuronal"
141 | opc   = c("PDGFRA","COL20A1","OLIG2","PRRX1")  #CSPG4
142 | astro = c("FGFR3","SLC14A1","DIO2","LINC00982","GFAP","AQP4","MT1F","ID3")  # WIF1
143 | oligo = c("OPALIN","PLP1","MAG","KLK6") # "TMEM235","CD22","KLK6"
144 | endo  = c("NOSTRIN","EBF1","ITIH5","EMCN") # "CLDN5","VIM","PALMD"
145 | micro = c("TYROBP","C3","CX3CR1","CSF1R")  # LST1
146 | plotGenes = c("SLC1A3",opc,astro,oligo,endo,micro)
147 | group_violin_plot(data_source = dataFolder, clusters=all_clusters[kp], genes=plotGenes, max_width = 15)
148 | ggsave(paste0(outputFolder,"Fig4_glia_violinPlots_specificGenes.pdf"), width = 4, height = 10)
149 | 
150 | 
151 | #-----------------------------------------------------------------------------------------------
152 | print("Select other violin plots for other figure panels.")
153 | 
154 | # Pvalb 
155 | plotGenes = c("GAD1","PVALB","UNC5B","NOG","COL15A1")
156 | group_violin_plot(data_source = dataFolder, clusters=all_clusters[39:45], genes=plotGenes)
157 | ggsave(paste0(outputFolder,"FigED10d_PVALB_chandelier_markers.pdf"), width = 7, height = 7)
158 | 
159 | # ET types 
160 | plotGenes = c("SLC17A7","NPTX1","FAM84B","POU3F1")
161 | group_violin_plot(data_source = dataFolder, clusters=all_clusters[46:69], genes=plotGenes)
162 | ggsave(paste0(outputFolder,"FigED12e_ET_cluster_markers.pdf"), width = 17, height = 6)
163 | 
164 | # Extended data figure on LOC genes being good markers
165 | plotGenes = c("RORB","CNR1","LINC01164")
166 | group_violin_plot(data_source = dataFolder, clusters=all_clusters[51:54], genes=plotGenes)
167 | ggsave(paste0(outputFolder,"FigED8a_RORB_LOCs.pdf"), width = 6, height = 6)
168 | 
169 | plotGenes = c("RORB","LOC105376081")
170 | group_violin_plot(data_source = dataFolder, clusters=all_clusters[51:54], genes=plotGenes)
171 | ggsave(paste0(outputFolder,"FigED8c_RORB_LOCs.pdf"), width = 6, height = 6)
172 | 
173 | plotGenes = c("RORB","CRYM","LOC401134")
174 | group_violin_plot(data_source = dataFolder, clusters=all_clusters[51:53], genes=plotGenes)
175 | ggsave(paste0(outputFolder,"FigED8d_RORB_LOCs.pdf"), width = 6, height = 6)
176 | 
177 | plotGenes = c("PVALB","LOC102723415")
178 | group_violin_plot(data_source = dataFolder, clusters=all_clusters[41:43], genes=plotGenes)
179 | ggsave(paste0(outputFolder,"FigED8e_PVALB_LOCs.pdf"), width = 6, height = 6)
180 | 
181 | 
182 | #-----------------------------------------------------------------------------------------------
183 | ## NOTE: JITTER PLOTS
184 | #-----------------------------------------------------------------------------------------------
185 | 
186 | #-----------------------------------------------------------------------------------------------
187 | print("Make layer jitter plots for figures 2-4.")
188 | 
189 | # Excitatory
190 | anno$layer_id     <- anno$brain_subregion_id
191 | anno$layer_color  <- anno$brain_subregion_color
192 | anno$layer_label  <- anno$brain_subregion_label
193 | exc_layers <- build_layer_plot(anno, dend, 0, cluster_ids = which(clusterInfo$class_label=="Glutamatergic"),textSize=1.8, maxPerCluster=100)  # fillColor =
194 | exc_plot <- plot_grid(exc_layers,align = "v",nrow = 1,rel_widths = 1,rel_heights = 1,labels = "")
195 | ggsave(paste0(outputFolder,"Fig2_exc_jitterLayerPlot.pdf"), width = 8, height = 6)
196 | 
197 | # Inhibitory 
198 | adarb2_layers <- build_layer_plot(anno,dend,0,cluster_ids = 1:27,textSize=1.8, maxPerCluster=100)
199 | adarb2_plot <- plot_grid(adarb2_layers,align = "v",nrow = 1,rel_widths = 1,rel_heights = 1,labels = "")
200 | ggsave(paste0(outputFolder,"Fig3_inh_ADARB2_jitterLayerPlot.pdf"), width = 8, height = 6)
201 | 
202 | pvalb_sst_layers <- build_layer_plot(anno,dend,0,cluster_ids = 28:45,textSize=1.8, maxPerCluster=100)
203 | ps_plot <- plot_grid(pvalb_sst_layers,align = "v",nrow = 1,rel_widths = 1,rel_heights = 1,labels = "")
204 | ggsave(paste0(outputFolder,"Fig3_inh_LHX6_jitterLayerPlot.pdf"), width = 8, height = 6)
205 | 
206 | # Glia
207 | glia_layers <- build_layer_plot(anno, dend, 0, cluster_ids = which(clusterInfo$class_label=="Non-neuronal"),textSize=1.8, maxPerCluster=100)  # fillColor =
208 | glia_plot <- plot_grid(glia_layers,align = "v",nrow = 1,rel_widths = 1,rel_heights = 1,labels = "")
209 | ggsave(paste0(outputFolder,"Fig4_glia_jitterLayerPlot.pdf"), width = 4, height = 6)
210 | 
211 | 
212 | #-----------------------------------------------------------------------------------------------
213 | ## NOTE: More heatmaps are here along with marker genes for Supplemental table 2
214 | #-----------------------------------------------------------------------------------------------
215 | 
216 | #-----------------------------------------------------------------------------------------------
217 | print("Find the top genes in every cluster and show them all in a heatmap.")
218 | 
219 | # Read in the levelInfo
220 | levelInfo  = read.csv(paste0(inputFolder,"FinalHumanMTGclusterAnnotation_update.csv"))
221 | levels     = list(all = levelInfo$final.tree.order)
222 | for (cn in c("level1","level2","level3")){
223 |   cols = table(levelInfo[,cn])
224 |   cols = names(cols)[cols>2]
225 |   for (cnn in cols){
226 |     levels[[cnn]] = levelInfo$final.tree.order[levelInfo[,cn]==cnn]
227 |   }
228 | }
229 | 
230 | #-----------------------------------------------------------------------------------------------
231 | print("Find the top genes in each cluster (overall and for all three levels).")
232 | 
233 | N = 5
234 | outGenesL <- list()
235 | 
236 | for (cn in names(levels)){
237 |   kp <- levels[[cn]]
238 |   outGenes <- NULL
239 |   for (i in 1:N) {
240 |     print(paste(cn,i))
241 |     outTmp <- getTopMarkersByPropNew(propExpr = propExpr[,kp], medianExpr = medianExpr[,kp], 
242 |                                           propDiff = 0, propMin=0.4, medianFC = 1, excludeGenes = outGenes)
243 |     outGenes <- c(outGenes,outTmp)
244 |   }
245 |   dim(outGenes) = c(length(outGenes)/N,N)
246 |   outTmp <- apply(outGenes,1,paste,collapse=", ")
247 |   outTmp <- gsub(", none","",outTmp)
248 |   names(outTmp) <- kp
249 |   outGenesL[[cn]] <- outTmp
250 | }
251 | 
252 | #-----------------------------------------------------------------------------------------------
253 | print("Save these genes to a table and output")
254 | 
255 | outTable <- levelInfo[,c("final.cluster","final.tree.order","level1","level2","level3")]
256 | outTable$specific_genes = outGenesL[["all"]]
257 | outTable$level1_genes = NA
258 | outTable$level2_genes = NA
259 | outTable$level3_genes = NA
260 | for (cn in c("level1","level2","level3")){
261 |   gn   = paste0(cn,"_genes")
262 |   cols = table(levelInfo[,cn])
263 |   cols = names(cols)[cols>2]
264 |   for (cnn in cols){
265 | 	outTable[match(names(outGenesL[[cnn]]),outTable$final.tree.order),gn] = outGenesL[[cnn]]
266 |   }
267 | }
268 | write.csv(outTable,paste0(outputFolder,"Table_S2_markerGenes.csv"))
269 | 
270 | 
271 | #-----------------------------------------------------------------------------------------------
272 | print("Plot these genes")
273 | 
274 | # Class genes defined using manual selection and NS forest.
275 | classRF <- c("GAD1","GAD2","ADARB2","CXCL14","LOC105375415","FBXL7","SV2C","VIP","LHX6","NXPH1","PLCH1","SST","SOX6","PVALB","TAC1","SLC17A7","SATB2","PDE7B","FAM19A2","GRIK3","PDZRN4","SLC1A3")
276 | 
277 | # The specific genes defined above, separated into inhibitory and other.
278 | isI <- substr(outTable$final.cluster,1,3)=="Inh"
279 | plotGenesI <- plotGenesEN <- classRF
280 | for (i in which(isI)){
281 |  gnTmp      <- strsplit(outTable[i,"specific_genes"],", ")[[1]]
282 |  plotGenesI <- c(plotGenesI, gnTmp)
283 | }
284 | for (i in which(!isI)){
285 |  gnTmp      <- strsplit(outTable[i,"specific_genes"],", ")[[1]]
286 |  plotGenesEN <- c(plotGenesEN, gnTmp)
287 | }
288 | 
289 | all_clusters <- 1:75
290 | markers_plotI <- group_heatmap_plot(data_source = dataFolder,
291 |                    genes = plotGenesI,
292 |                    group_by = "cluster",
293 |                    clusters = all_clusters,
294 |                    calculation = "median",
295 |                    labelheight = 7,
296 |                    showcounts = F)
297 | markers_plotEN <- group_heatmap_plot(data_source = dataFolder,
298 |                    genes = plotGenesEN,
299 |                    group_by = "cluster",
300 |                    clusters = all_clusters,
301 |                    calculation = "median",
302 |                    labelheight = 8,
303 |                    showcounts = F)
304 | 				   
305 | save_plot(paste0(outputFolder,"FigED4_top5_markers_Inh.pdf"),
306 |           markers_plotI,base_height = 17, base_width = 8)
307 | save_plot(paste0(outputFolder,"FigED4_top5_markers_ExNN.pdf"),
308 |           markers_plotEN,base_height = 15, base_width = 8)
309 | #
310 | 


--------------------------------------------------------------------------------
/R/ED_Fig1_plot_facs_metadata.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Human MTG nuclei FACS metadata"
 3 | output: html_notebook
 4 | ---
 5 |   
 6 |   
 7 | ```{r setup, include=FALSE}
 8 | knitr::opts_chunk$set(echo = TRUE)
 9 | ```
10 |   
11 |   
12 | Make sure to set the correct working directory path!  
13 |   
14 | ```{r setwd}
15 | homeFolder <- "../"  # UPDATE WITH CORRECT PATH
16 | knitr::opts_knit$set(root.dir = homeFolder)
17 | #setwd(homeFolder) # UPDATE WITH CORRECT PATH
18 | ```  
19 |   
20 |   
21 | ```{r init-workspace, echo=FALSE, message=FALSE, warning=FALSE}
22 | # Load libraries 
23 | library(reshape2)
24 | library(feather)
25 | library(tidyr)
26 | 
27 | ```
28 | 
29 | 
30 | ```{r load-data, message=FALSE}
31 | facs.meta <- read_csv(file = "../data/mtg_facs_metadata.csv")
32 | anno <- as.data.frame(read_feather(paste0(homeFolder,"MTG/anno.feather")))
33 | load(paste0(homeFolder,"MTG/dend.rda"))
34 | 
35 | human.cl <- unlist(dendrapply(dend, function(x) if (is.leaf(x)) attr(x, "label")))
36 | facs.meta$cluster <- factor(facs.meta$cluster, levels = human.cl)
37 | facs.meta$cluster_type <- factor(facs.meta$cluster_type, levels = c("inh", "exc", "glia"))
38 | facs.meta$cluster_color <- anno$cluster_color[match(facs.meta$cluster, anno$cluster_label)]
39 | 
40 | ```
41 | 
42 | 
43 | ```{r plot-facs, fig.width = 12, fig.height = 8}
44 | cl.cnt <- table(facs.meta$cluster)
45 | keep.cl <- names(cl.cnt[cl.cnt >= 5])
46 | 
47 | 
48 | plot_facs <- function(dat) {
49 |   cl.color <- setNames(dat$cluster_color, dat$cluster)
50 |   ggplot(data = dat) + 
51 |     aes(x = cluster, y = value, color = cluster) +
52 |     facet_grid(facs ~ ., scales = "free_y") +
53 |     # geom_point(position = position_jitter(w = 0.15, h = 0)) + 
54 |     stat_summary(fun.data = "mean_cl_boot") +
55 |     scale_x_discrete(drop = FALSE) +
56 |     scale_color_manual(values = cl.color, guide = FALSE) +
57 |     xlab("") +
58 |     ylab("") +
59 |     theme_bw() + 
60 |     theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
61 | }
62 | 
63 | 
64 | g.facs <- facs.meta %>% 
65 |   filter(cluster %in% keep.cl) %>% 
66 |   rename(NeuN = `PE-A Mean`, FSC = `FSC-A Mean`, SSC =`SSC-A Mean`, DAPI = `DAPI-A Mean`) %>% 
67 |   gather(facs, value, NeuN:SSC) %>% 
68 |   mutate(facs = factor(facs, levels = c("NeuN", "FSC", "SSC"))) %>% 
69 |   filter(! is.na(value)) %>% 
70 |   do(plot = plot_facs(.))
71 | 
72 | plot(g.facs$plot[[1]])
73 | 
74 | ggsave(g.facs$plot[[1]], filename = "../output/facs.meta.by.cluster.pdf", width = 12, height = 8)
75 | 
76 | ```


--------------------------------------------------------------------------------
/R/ED_Fig2_tissue_de_genes.R:
--------------------------------------------------------------------------------
 1 | library(feather)
 2 | library(limma)
 3 | 
 4 | shiny.path <- "//molgen-shiny-server/share/shiny/human/MTG_paper_rev/"
 5 | anno <- as.data.frame(read_feather(paste0(shiny.path, "anno.feather")))
 6 | data <- as.data.frame(read_feather(paste0(shiny.path, "data_t.feather")))
 7 | row.names(data) <- data$gene
 8 | data <- log2(data[, -grep("gene", colnames(data))] + 1)
 9 | 
10 | surg.donors <- c("H16.06.002", "H16.06.008", "H16.06.009", "H16.03.004")
11 | pm.donors <- c("H200.1023", "H200.1025", "H200.1030", "H16.24.010")
12 | female.donors <- c("H200.1023", "H16.06.002", "H16.06.008", "H16.06.009")
13 | anno$donor_type <- factor(ifelse(anno$external_donor_name_label %in% surg.donors, 
14 |                           "surg", "pm"))
15 | anno$donor_sex <- factor(ifelse(anno$external_donor_name_label %in% female.donors, 
16 |                                 "female", "male"))
17 | cl.cnt <- table(anno$cluster_label, anno$donor_type)
18 | keep.cl <- row.names(cl.cnt)[apply(cl.cnt, 1, min) >= 10]
19 | anno.subset <- droplevels(subset(anno, cluster_label %in% keep.cl))
20 | keep.samp <- match(anno.subset$sample_id, colnames(data))
21 | data.subset <- data[, keep.samp]
22 | 
23 | 
24 | # Calc DE
25 | cl <- make.names(anno.subset$cluster_label)
26 | 
27 | design <- model.matrix(~0 + anno.subset$donor_type + anno.subset$donor_sex + cl)
28 | colnames(design) <- sub("anno.subset$", "", colnames(design), fixed = TRUE)
29 | fit1 <- lmFit(data.subset, design = design)
30 | cont.matrix <- makeContrasts(tissue_type = "donor.typesurg - donor.typepm", 
31 |                              levels = design) 
32 | fit2 <- contrasts.fit(fit1, cont.matrix)
33 | fit3 <- eBayes(fit2)
34 | top1 <- topTable(fit3, p.value = 0.01, lfc = 1, number = Inf)
35 | top2 <- data.frame(gene = row.names(top1), top1)
36 | 
37 | # Save tables
38 | de.surg <- subset(top2, logFC > 0)
39 | de.pm <- subset(top2, logFC <= 0)
40 | de.pm$logFC <- -de.pm$logFC
41 | write.csv(de.surg, "output/surg_de_genes.csv", row.names = FALSE)
42 | write.csv(de.pm, "output/pm_de_genes.csv", row.names = FALSE)
43 | 
44 | 
45 | 
46 | # QC metrics by tissue type
47 | with(anno.subset, 
48 |      data.frame(reads = tapply(total_reads_label, donor_type, median),
49 |                 aligned = tapply(star_percent_unique_reads_label, donor_type, median),
50 |                 genes = tapply(Genes.With.FPKM_label, donor_type, median))
51 | )
52 | 
53 | 


--------------------------------------------------------------------------------
/R/Fig1c.R:
--------------------------------------------------------------------------------
 1 | print("Read in the data and load relevant libraries")
 2 | outputFolder  = "output/"
 3 | scriptsFolder = "R/"
 4 | inputFolder   = "data/"
 5 | dataFolder    = "MTG/"
 6 | 
 7 | library(pheatmap)
 8 | library(RColorBrewer)
 9 | library(feather)
10 | 
11 | 
12 | anno     <- read_feather(paste(dataFolder,"anno.feather",sep="")) 
13 | anno.df <- unique(anno[, c("cluster_id", "cluster_label")])
14 | anno$cluster_label <- factor(anno$cluster_label, 
15 |                              levels = anno.df$cluster_label[order(anno.df$cluster_id)])
16 | 
17 | layer.cl <- as.matrix(table(anno$brain_subregion_label, anno$cluster_label))
18 | layer.cl.prop <- sweep(layer.cl, 2, colSums(layer.cl), "/")
19 | 
20 | 
21 | #### Figure 1c ####
22 | hm.colors <- colorRampPalette(c("white", brewer.pal(9, "YlOrRd")))(100)
23 | pheatmap(layer.cl.prop, cluster_rows = FALSE, cluster_cols = FALSE, color = hm.colors)
24 | 


--------------------------------------------------------------------------------
/R/Support_2016-11-03-runMetaNeighbor.R:
--------------------------------------------------------------------------------
  1 | neighbor.voting.LeaveOneExpOut <- function(exp.labels,cell.labels,network,means=T){
  2 |   
  3 |   # genes.label : needs to be in 1s and 0s
  4 |   l <- dim(cell.labels)[2]
  5 |   c <- dim(cell.labels)[1]
  6 |   e <- unique(exp.labels)
  7 |   
  8 |   #print("Make genes label CV matrix")
  9 |   test.cell.labels = matrix(cell.labels, nrow=c, ncol=length(e)*l)
 10 |   
 11 |   exp.cols=rep(e,each=l)
 12 |   
 13 |   for (i in 1:length(e)){
 14 |     d<-which(exp.labels==i)
 15 |     a<-which(exp.cols==i)
 16 |     test.cell.labels[d,a]<-0
 17 |   }
 18 |   
 19 |   #print("Get sums - mat. mul.")
 20 |   #sumin    = ( t(network) %*% test.genes.labels)
 21 |   sumin    = ( (network) %*% test.cell.labels)
 22 |   
 23 |   #print("Get sums - calc sumall")
 24 |   sumall   = matrix(apply(network,2,sum), ncol = dim(sumin)[2], nrow=dim(sumin)[1])
 25 |   
 26 |   #print("Get sums - calc predicts")
 27 |   predicts = sumin/sumall
 28 |   
 29 |   #print("Hide training data")
 30 |   nans = which(test.cell.labels == 1, arr.ind=T)
 31 |   predicts[nans] <- NA
 32 |   
 33 |   #Hide other experiment data 
 34 |   for (i in 1:length(e)){
 35 |     d<-which(exp.labels!=i)
 36 |     a<-which(exp.cols==i)
 37 |     predicts[d,a]<-NA
 38 |   }
 39 |   
 40 |   #print("Rank test data")
 41 |   predicts = apply(abs(predicts), 2, rank,na.last="keep",ties.method="average")
 42 |   
 43 |   filter = matrix(cell.labels, nrow=c, ncol=length(e)*l)
 44 |   for (i in 1:length(e)){
 45 |     d<-which(exp.labels!=i)
 46 |     a<-which(exp.cols==i)
 47 |     filter[d,a]<-NA
 48 |   }
 49 |   negatives = which(filter == 0, arr.ind=T)
 50 |   positives = which(filter == 1, arr.ind=T)
 51 |   
 52 |   predicts[negatives] <- 0
 53 |   
 54 |   #print("Calculate ROC - np")
 55 |   np = colSums(filter,na.rm=T) # Postives
 56 |   
 57 |   #print("Calculate ROC - nn")
 58 |   nn = apply(filter,2,function(x) sum(x==0,na.rm=T))     # Negatives
 59 |   
 60 |   #print("Calculate ROC - p")
 61 |   p =  apply(predicts,2,sum,na.rm=T)
 62 |   
 63 |   #print("Calculate ROC - rocN")
 64 |   rocNV = (p/np - (np+1)/2)/nn
 65 |   rocNV = matrix(rocNV, ncol=length(e), nrow=l)
 66 |   colnames(rocNV)=e
 67 |   rownames(rocNV)=colnames(cell.labels)
 68 |   
 69 |   if(means==T){
 70 |     scores=list(rowMeans(rocNV,na.rm=T))
 71 |   }
 72 |   else {
 73 |     scores = list(rocNV)      
 74 |   }
 75 | }
 76 | 
 77 | run_MetaNeighbor <- function(data, experiment_labels, celltype_labels, genesets, file_ext) {
 78 | 
 79 |     ROCs<-vector("list",length=length(genesets))
 80 |     names(ROCs)=names(genesets)
 81 |     nv.mat<-matrix(0,ncol=dim(celltype_labels)[2],nrow=length(genesets))
 82 |     rownames(nv.mat)=names(genesets)
 83 |     colnames(nv.mat)=colnames(celltype_labels)
 84 |     
 85 |     for (l in 1:length(genesets)){
 86 |       #print(l)
 87 |       geneset=genesets[[l]]
 88 |       m<-match(rownames(data),geneset)
 89 |       dat.sub=data[!is.na(m),]
 90 |       dat.sub=cor(dat.sub,method="s")
 91 |       dat.sub=as.matrix(dat.sub)
 92 |       rank.dat=dat.sub
 93 |       rank.dat[]=rank(dat.sub,ties.method="average",na.last="keep")
 94 |       rank.dat[is.na(rank.dat)]=0
 95 |       rank.dat=rank.dat/max(rank.dat)
 96 |       ROCs[[l]]=neighbor.voting.LeaveOneExpOut(experiment_labels,celltype_labels,rank.dat,means=F)
 97 |     }
 98 |     for(i in 1:length(ROCs)){
 99 |       nv.mat[i,]=rowMeans(ROCs[[i]][[1]],na.rm=T)
100 |     }
101 |     save(ROCs,file=paste(file_ext,"IDscore.list.Rdata",sep="."))
102 |     save(nv.mat, file=paste(file_ext,"IDscore.matrix.Rdata",sep="."))
103 |     return(rowMeans(nv.mat,na.rm=T))
104 | }


--------------------------------------------------------------------------------
/R/Support_code0_functions.R:
--------------------------------------------------------------------------------
  1 | #' Automatically format an annotation file.  From libary(scrattch.io)
  2 | #'
  3 | #' This takes an anno file as input at any stage and properly annotates it for compatability with
  4 | #'   shiny and other scrattch functions.  In particular, it ensures that columns have a label,
  5 | #'   an id, and a color, and that there are no factors.  It won't overwrite columns that have
  6 | #'   already been properly process.
  7 | #'
  8 | #' @param anno an existing annotation data frame
  9 | #' @param scale_num should color scaling of numeric values be "predicted" (default and highly recommended;
 10 | #'   will return either "linear" or "log10" depending on scaling), "linear","log10","log2", or "zscore".
 11 | #' @param na_val_num The value to use to replace NAs for numeric columns. default = 0.
 12 | #' @param colorset_num A vector of colors to use for the color gradient.
 13 | #'   default = c("darkblue","white","red")
 14 | #' @param sort_label_cat a logical value to determine if the data in category columns
 15 | #'   should be arranged alphanumerically before ids are assigned. default = T.
 16 | #' @param na_val_cat The value to use to replace NAs in category and factor variables.
 17 | #'   default = "ZZ_Missing".
 18 | #' @param colorset_cat The colorset to use for assigning category and factor colors.
 19 | #'   Options are "varibow" (default), "rainbow","viridis","inferno","magma", and "terrain"
 20 | #' @param color_order_cat The order in which colors should be assigned for cat and
 21 | #'   factor variables. Options are "sort" and "random". "sort" (default) assigns colors
 22 | #'   in order; "random" will randomly assign colors.
 23 | #'
 24 | #' @return an updated data frame that has been automatically annotated properly
 25 | #'
 26 | #' @export
 27 | auto_annotate <- function (anno, scale_num = "predicted", na_val_num = 0, colorset_num = c("darkblue", 
 28 |     "white", "red"), sort_label_cat = TRUE, na_val_cat = "ZZ_Missing", 
 29 |     colorset_cat = "varibow", color_order_cat = "sort") 
 30 | {
 31 |     anno_out <- anno
 32 |     if (!is.element("sample_name", colnames(anno_out))) {
 33 |         colnames(anno_out) <- gsub("sample_id", "sample_name", 
 34 |             colnames(anno_out))
 35 |     }
 36 |     cn <- colnames(anno_out)
 37 |     convertColumns <- cn[(!grepl("_label", cn)) & (!grepl("_id", 
 38 |         cn)) & (!grepl("_color", cn))]
 39 |     convertColumns <- setdiff(convertColumns, "sample_name")
 40 |     for (cc in convertColumns) {
 41 |         value <- anno_out[, cc]
 42 |         if (is.numeric(value)) {
 43 |             if (is.element(scale_num, c("linear", "log10", "log2", 
 44 |                 "zscore"))) {
 45 |                 anno_out <- annotate_num(df = anno_out, col = cc, 
 46 |                   scale = scale_num, na_val = na_val_num, colorset = colorset_num)
 47 |             }
 48 |             else {
 49 |                 scalePred <- ifelse(min(value) < 0, "linear", 
 50 |                   "log10")
 51 |                 if ((max(value + 1)/min(value + 1)) < 100) {
 52 |                   scalePred <- "linear"
 53 |                 }
 54 |                 if (mean((value - min(value))/diff(range(value))) < 
 55 |                   0.01) {
 56 |                   scalePred <- "log10"
 57 |                 }
 58 |                 anno_out <- annotate_num(df = anno_out, col = cc, 
 59 |                   scale = scalePred, na_val = na_val_num, colorset = colorset_num)
 60 |             }
 61 |         }
 62 |         else {
 63 |             if (is.factor(value)) {
 64 |                 anno_out <- annotate_factor(df = anno_out, col = cc, 
 65 |                   base = cc, na_val = na_val_cat, colorset = colorset_cat, 
 66 |                   color_order = color_order_cat)
 67 |             }
 68 |             else {
 69 |                 anno_out <- annotate_cat(df = anno_out, col = cc, 
 70 |                   base = cc, na_val = na_val_cat, colorset = colorset_cat, 
 71 |                   color_order = color_order_cat, sort_label = sort_label_cat)
 72 |             }
 73 |         }
 74 |     }
 75 |     anno_out <- group_annotations(anno_out)
 76 |     anno_out
 77 | }
 78 | 
 79 | 
 80 | #' Generate colors and ids for numeric annotations.  From library(scrattch.io)
 81 | #'
 82 | #' @param df data frame to annotate
 83 | #' @param col name of the numeric column to annotate
 84 | #' @param base base name for the annotation, which wil be used in the desc table. If not provided, will use col as base.
 85 | #' @param scale The scale to use for assigning colors. Options are "linear","log10","log2, and "zscore"
 86 | #' @param na_val The value to use to replace NAs. default = 0.
 87 | #' @param colorset A vector of colors to use for the color gradient. default = c("darkblue","white","red")
 88 | #'
 89 | #' @return A modified data frame: the annotated column will be renamed base_label, and base_id and base_color columns will be appended
 90 | #'
 91 | #' @export
 92 | annotate_num <- function (df, col = NULL, base = NULL, scale = "log10", na_val = 0, 
 93 |     colorset = c("darkblue", "white", "red")) 
 94 | {
 95 |     if (class(try(is.character(col), silent = T)) == "try-error") {
 96 |         col <- lazyeval::expr_text(col)
 97 |     }
 98 |     else if (class(col) == "NULL") {
 99 |         stop("Specify a column (col) to annotate.")
100 |     }
101 |     if (class(try(is.character(base), silent = T)) == "try-error") {
102 |         base <- lazyeval::expr_text(base)
103 |     }
104 |     else if (class(base) == "NULL") {
105 |         base <- col
106 |     }
107 |     if (!is.numeric(df[[col]])) {
108 |         df[[col]] <- as.numeric(df[[col]])
109 |     }
110 |     df[[col]][is.na(df[[col]])] <- na_val
111 |     x <- df[[col]]
112 |     annotations <- data.frame(label = unique(x)) %>% dplyr::arrange(label) %>% 
113 |         dplyr::mutate(id = 1:dplyr::n())
114 |     if (scale == "log10") {
115 |         colors <- values_to_colors(log10(annotations$label + 
116 |             1), colorset = colorset)
117 |     }
118 |     else if (scale == "log2") {
119 |         colors <- values_to_colors(log2(annotations$label + 1), 
120 |             colorset = colorset)
121 |     }
122 |     else if (scale == "zscore") {
123 |         colors <- values_to_colors(scale(annotations$label), 
124 |             colorset = colorset)
125 |     }
126 |     else if (scale == "linear") {
127 |         colors <- values_to_colors(annotations$label, colorset = colorset)
128 |     }
129 |     annotations <- mutate(annotations, color = colors)
130 |     names(annotations) <- paste0(base, c("_label", "_id", "_color"))
131 |     names(df)[names(df) == col] <- paste0(base, "_label")
132 |     df <- dplyr::left_join(df, annotations, by = paste0(base, 
133 |         "_label"))
134 |     df
135 | }
136 | 
137 | 
138 | #' Generate colors and ids for categorical annotations.  From library(scrattch.io)
139 | #'
140 | #' @param df data frame to annotate
141 | #' @param col name of the character column to annotate
142 | #' @param base base name for the annotation, which wil be used in the desc
143 | #'   table. If not provided, will use col as base.
144 | #' @param sort_label a logical value to determine if the data in col should be
145 | #'   arranged alphanumerically before ids are assigned. default = T.
146 | #' @param na_val The value to use to replace NAs. default = "ZZ_Missing".
147 | #' @param colorset The colorset to use for assigning category colors. Options
148 | #'   are "varibow" (default), "rainbow","viridis","inferno","magma", and "terrain"
149 | #' @param color_order The order in which colors should be assigned. Options are
150 | #'   "sort" and "random". "sort" assigns colors in order; "random" will randomly
151 | #'   assign colors.
152 | #'
153 | #' @return A modified data frame: the annotated column will be renamed
154 | #'   base_label, and base_id and base_color columns will be appended
155 | #'
156 | #' @export
157 | annotate_cat <- function (df, col = NULL, base = NULL, sort_label = T, na_val = "ZZ_Missing", 
158 |     colorset = "varibow", color_order = "sort") 
159 | {
160 |     if (class(try(is.character(col), silent = T)) == "try-error") {
161 |         col <- lazyeval::expr_text(col)
162 |     }
163 |     else if (class(col) == "NULL") {
164 |         stop("Specify a column (col) to annotate.")
165 |     }
166 |     if (class(try(is.character(base), silent = T)) == "try-error") {
167 |         base <- lazyeval::expr_text(base)
168 |     }
169 |     else if (class(base) == "NULL") {
170 |         base <- col
171 |     }
172 |     if (!is.character(df[[col]])) {
173 |         df[[col]] <- as.character(df[[col]])
174 |     }
175 |     df[[col]][is.na(df[[col]])] <- na_val
176 |     x <- df[[col]]
177 |     annotations <- data.frame(label = unique(x), stringsAsFactors = F)
178 |     if (sort_label) {
179 |         annotations <- annotations %>% dplyr::arrange(label)
180 |     }
181 |     annotations <- annotations %>% dplyr::mutate(id = 1:n())
182 |     if (colorset == "varibow") {
183 |         colors <- varibow(nrow(annotations))
184 |     }
185 |     else if (colorset == "rainbow") {
186 |         colors <- sub("FF$", "", grDevices::rainbow(nrow(annotations)))
187 |     }
188 |     else if (colorset == "viridis") {
189 |         colors <- sub("FF$", "", viridisLite::viridis(nrow(annotations)))
190 |     }
191 |     else if (colorset == "magma") {
192 |         colors <- sub("FF$", "", viridisLite::magma(nrow(annotations)))
193 |     }
194 |     else if (colorset == "inferno") {
195 |         colors <- sub("FF$", "", viridisLite::inferno(nrow(annotations)))
196 |     }
197 |     else if (colorset == "plasma") {
198 |         colors <- sub("FF$", "", viridisLite::plasma(nrow(annotations)))
199 |     }
200 |     else if (colorset == "terrain") {
201 |         colors <- sub("FF$", "", grDevices::terrain.colors(nrow(annotations)))
202 |     }
203 |     else if (is.character(colorset)) {
204 |         colors <- (grDevices::colorRampPalette(colorset))(nrow(annotations))
205 |     }
206 |     if (color_order == "random") {
207 |         colors <- sample(colors, length(colors))
208 |     }
209 |     annotations <- dplyr::mutate(annotations, color = colors)
210 |     names(annotations) <- paste0(base, c("_label", "_id", "_color"))
211 |     names(df)[names(df) == col] <- paste0(base, "_label")
212 |     df <- dplyr::left_join(df, annotations, by = paste0(base, 
213 |         "_label"))
214 |     df
215 | }
216 | 
217 | 
218 | #' Generate colors and ids for categorical annotations that are factors. From libary(scrattch.io)
219 | #'
220 | #' @param df data frame to annotate
221 | #' @param col name of the factor column to annotate
222 | #' @param base base name for the annotation, which wil be used in the desc
223 | #'   table. If not provided, will use col as base.
224 | #' @param na_val The value to use to replace NAs. default = "ZZ_Missing".
225 | #' @param colorset The colorset to use for assigning category colors. Options
226 | #'   are "varibow" (default), "rainbow","viridis","inferno","magma", and "terrain"
227 | #' @param color_order The order in which colors should be assigned. Options are
228 | #'   "sort" and "random". "sort" assigns colors in order; "random" will randomly
229 | #'   assign colors.
230 | #'
231 | #' @return A modified data frame: the annotated column will be renamed
232 | #'   base_label, and base_id and base_color columns will be appended
233 | #'
234 | #' @export
235 | annotate_factor <- function (df, col = NULL, base = NULL, na_val = "ZZ_Missing", 
236 |     colorset = "varibow", color_order = "sort") 
237 | {
238 |     if (class(try(is.character(col), silent = T)) == "try-error") {
239 |         col <- lazyeval::expr_text(col)
240 |     }
241 |     else if (class(col) == "NULL") {
242 |         stop("Specify a column (col) to annotate.")
243 |     }
244 |     if (class(try(is.character(base), silent = T)) == "try-error") {
245 |         base <- lazyeval::expr_text(base)
246 |     }
247 |     else if (class(base) == "NULL") {
248 |         base <- col
249 |     }
250 |     if (!is.factor(df[[col]])) {
251 |         df[[col]] <- as.factor(df[[col]])
252 |     }
253 |     if (sum(is.na(df[[col]])) > 0) {
254 |         lev <- c(levels(df[[col]]), na_val)
255 |         levels(df[[col]]) <- lev
256 |         df[[col]][is.na(df[[col]])] <- na_val
257 |     }
258 |     x <- df[[col]]
259 |     annotations <- data.frame(label = as.character(levels(x)), 
260 |         stringsAsFactors = F)
261 |     annotations <- annotations %>% dplyr::mutate(id = 1:n())
262 |     if (colorset == "varibow") {
263 |         colors <- varibow(nrow(annotations))
264 |     }
265 |     else if (colorset == "rainbow") {
266 |         colors <- sub("FF$", "", grDevices::rainbow(nrow(annotations)))
267 |     }
268 |     else if (colorset == "viridis") {
269 |         colors <- sub("FF$", "", viridisLite::viridis(nrow(annotations)))
270 |     }
271 |     else if (colorset == "magma") {
272 |         colors <- sub("FF$", "", viridisLite::magma(nrow(annotations)))
273 |     }
274 |     else if (colorset == "inferno") {
275 |         colors <- sub("FF$", "", viridisLite::inferno(nrow(annotations)))
276 |     }
277 |     else if (colorset == "plasma") {
278 |         colors <- sub("FF$", "", viridisLite::plasma(nrow(annotations)))
279 |     }
280 |     else if (colorset == "terrain") {
281 |         colors <- sub("FF$", "", grDevices::terrain.colors(nrow(annotations)))
282 |     }
283 |     else if (is.character(colorset)) {
284 |         colors <- (grDevices::colorRampPalette(colorset))(nrow(annotations))
285 |     }
286 |     if (color_order == "random") {
287 |         colors <- sample(colors, length(colors))
288 |     }
289 |     annotations <- dplyr::mutate(annotations, color = colors)
290 |     names(annotations) <- paste0(base, c("_label", "_id", "_color"))
291 |     names(df)[names(df) == col] <- paste0(base, "_label")
292 |     df[[col]] <- as.character(df[[col]])
293 |     df <- dplyr::left_join(df, annotations, by = paste0(base, 
294 |         "_label"))
295 |     df
296 | }
297 | 
298 | 
299 | #' Generate a rainbow palette with variation in saturation and value.  From library(scrattch.io)
300 | #'
301 | #' @param n_colors The number of colors to generate
302 | #'
303 | #' @export
304 | varibow <- function (n_colors) 
305 | {
306 |     sats <- rep_len(c(0.55, 0.7, 0.85, 1), length.out = n_colors)
307 |     vals <- rep_len(c(1, 0.8, 0.6), length.out = n_colors)
308 |     sub("FF$", "", grDevices::rainbow(n_colors, s = sats, v = vals))
309 | }
310 | 
311 | 
312 | #' Convert values to colors along a color ramp.  From library(scrattch.io)
313 | #'
314 | #' @param x a numeric vector to be converted to colors
315 | #' @param min_val a number that's used to set the low end of the color scale (default = 0)
316 | #' @param max_val a number that's used to set the high end of the color scale. If NULL (default),
317 | #' use the highest value in x
318 | #' @param colorset a set of colors to interpolate between using colorRampPalette
319 | #' (default = c("darkblue","dodgerblue","gray80","orangered","red"))
320 | #' @param missing_color a color to use for missing (NA) values.
321 | #' 
322 | #' @return a character vector of hex color values generated by colorRampPalette. Color values will
323 | #' remain in the same order as x.
324 | #' 
325 | #' @export
326 | values_to_colors <- function (x, min_val = NULL, max_val = NULL, colorset = c("darkblue", 
327 |     "dodgerblue", "gray80", "orange", "orangered"), missing_color = "black") 
328 | {
329 |     heat_colors <- (grDevices::colorRampPalette(colorset))(1001)
330 |     if (is.null(max_val)) {
331 |         max_val <- max(x, na.rm = T)
332 |     }
333 |     else {
334 |         x[x > max_val] <- max_val
335 |     }
336 |     if (is.null(min_val)) {
337 |         min_val <- min(x, na.rm = T)
338 |     }
339 |     else {
340 |         x[x < min_val] <- min_val
341 |     }
342 |     if (sum(x == min_val, na.rm = TRUE) == length(x)) {
343 |         colors <- rep(heat_colors[1], length(x))
344 |     }
345 |     else {
346 |         if (length(x) > 1) {
347 |             if (var(x, na.rm = TRUE) == 0) {
348 |                 colors <- rep(heat_colors[500], length(x))
349 |             }
350 |             else {
351 |                 heat_positions <- unlist(round((x - min_val)/(max_val - 
352 |                   min_val) * 1000 + 1, 0))
353 |                 colors <- heat_colors[heat_positions]
354 |             }
355 |         }
356 |         else {
357 |             colors <- heat_colors[500]
358 |         }
359 |     }
360 |     if (!is.null(missing_color)) {
361 |         colors[is.na(colors)] <- grDevices::rgb(t(grDevices::col2rgb(missing_color)/255))
362 |     }
363 |     colors
364 | }
365 | 
366 | 
367 | #' Group annotation columns.  From library(scrattch.io)
368 | #'
369 | #' @param df the annotation dataframe to arrange
370 | #' @param sample_col the column with unique sample ids. Default is "sample_name".
371 | #' @param keep_order a logical value. If FALSE, will sort the annotations alphanumerically by base.
372 | #'
373 | #' @return an annotation data frame with reordered columns
374 | #'
375 | #' @export
376 | group_annotations <- function (df, sample_col = "sample_name", keep_order = TRUE) 
377 | {
378 |     labels <- names(df)[grepl("_label", names(df))]
379 |     if (!keep_order) {
380 |         labels <- labels[order(labels)]
381 |     }
382 |     bases <- sub("_label", "", labels)
383 |     anno_cols <- c(paste0(rep(bases, each = 3), c("_id", "_label", 
384 |         "_color")))
385 |     extras <- setdiff(names(df), anno_cols)
386 |     anno <- select(df, one_of(c(sample_col, anno_cols, extras)))
387 | }
388 | 
389 | 
390 | 


--------------------------------------------------------------------------------
/R/Support_extraFunctions.r:
--------------------------------------------------------------------------------
  1 | plotTSNE_using_hicat <- function(norm.dat, anno, seed=123, useClusterInfoForDexGenes = FALSE, 
  2 |   n.markers=50, fn.size=3, cex=1, ...){
  3 | 
  4 |   library(ggplot2)
  5 |   library(M3Drop)
  6 |   library(scrattch.hicat)
  7 | 
  8 |   anno      <- anno[match(colnames(norm.dat),anno$sample_id),]
  9 |   ref.cl.df <- as.data.frame(unique(anno[, c("cluster_label", "cluster_id", "cluster_color")]))
 10 |   ref.cl.df <- ref.cl.df[order(ref.cl.df$cluster_id),]
 11 |   row.names(ref.cl.df) <- ref.cl.df$cluster_id
 12 |   ref.cl    <- droplevels(setNames(factor(anno$cluster_id), anno$sample_id))
 13 | 
 14 |   #### Select markers ####
 15 |   set.seed(seed)
 16 |   if((!useClusterInfoForDexGenes)|(length(levels(ref.cl))<2)){
 17 |     select.markers <- BrenneckeGetVariableGenes(2^norm.dat-1,suppress.plot=TRUE)
 18 |   } else {
 19 |     select.markers <- select_markers(norm.dat, ref.cl, n.markers=n.markers)$markers
 20 |   }
 21 | 
 22 |   #### Plot tSNE ####
 23 |   set.seed(seed+1)
 24 |   tsne.result <- plot_tsne_cl(norm.dat, select.markers, ref.cl, ref.cl.df, fn.size=fn.size, cex=cex, ...)
 25 |   tsne.result$g
 26 | 
 27 | }
 28 | 
 29 | 
 30 | findFromGroups <- function(datExpr,groupVector,fn="mean"){
 31 |   groups   = names(table(groupVector))
 32 |   fn       = match.fun(fn)
 33 |   datMeans = matrix(0,nrow=dim(datExpr)[2],ncol=length(groups))
 34 |   for (i in 1:length(groups)){
 35 |     datIn = datExpr[groupVector==groups[i],]
 36 |     if (is.null(dim(datIn)[1])) { datMeans[,i] = as.numeric(datIn)
 37 |     } else { datMeans[,i] = as.numeric(apply(datIn,2,fn)) }
 38 |   };    colnames(datMeans)  = groups;
 39 |   rownames(datMeans) = colnames(datExpr)
 40 |   return(datMeans)
 41 | }
 42 | 
 43 | 
 44 | error.bar <- function(x, y, upper, lower=upper, length=0.1,...){
 45 |  if(length(x) != length(y) | length(y) !=length(lower) | length(lower) != length(upper))
 46 |  stop("vectors must be same length")
 47 |  arrows(x,y+upper, x, y-lower, angle=90, code=3, length=length, ...)
 48 | }
 49 | 
 50 | 
 51 | errorBarPlot <- function(vals,sampleType,col=standardColors(),legend=TRUE,elwd=2,ylim=NA,xlim=NA,length=0.1,...){
 52 |  if(is.null(dim(vals))) vals = cbind(vals,vals)
 53 |  yy <- t(findFromGroups(vals,sampleType))
 54 |  col = col[1:dim(yy)[1]]
 55 |  ee <- t(findFromGroups(vals,sampleType,sd))
 56 |  if(is.na(ylim[1])) ylim = c(0,max(ee+yy))
 57 |  if(is.na(xlim[1])) xlim = c(0,((dim(ee)[2]+1)*dim(ee)[1])*1.4)
 58 |  barx <- barplot(yy, beside=TRUE,col=col,legend.text=legend,ylim=ylim,xlim=xlim,...)
 59 |  error.bar(barx,yy,ee,lwd=elwd,length=length)
 60 | }
 61 | 
 62 | mouse2human2 <- function (mouse, m2h){
 63 |  # Convert mouse to human symbols
 64 |  rownames(m2h) = m2h$Mou
 65 |  noHumn = which(!(mouse%in%m2h$Mouse_Symbol))
 66 |  humn = which((mouse%in%m2h$Mouse_Symbol))
 67 |  mouse[humn] = as.character(m2h[mouse[humn],1])
 68 |  mouse[noHumn] = toupper(mouse[noHumn])
 69 |  return(mouse)
 70 | }
 71 | 
 72 | t.test.l <- function(x){
 73 |   l  = length(x)
 74 |   tt = t.test(x[1:(l/2)],x[(l/2+1):l],paired=FALSE)
 75 |   out = c(tt$est,tt$stat,tt$p.val)
 76 |   if(is.na(out[2])) out[2] = 0
 77 |   if(is.na(out[3])) out[3] = 1
 78 |   return(out)
 79 | }
 80 | 
 81 | getAnovaPvalforApply <- function(x,varLabels,varWeights=NULL){
 82 |   anovadat  = as.data.frame(cbind(varLabels,x))
 83 |   aov.out   = summary(aov(as.numeric(anovadat[,2])~anovadat[,1],data=anovadat,weights=varWeights))  
 84 |   return(aov.out[[1]]$'Pr(>F)'[1])
 85 | }
 86 | 
 87 | 
 88 | 
 89 | meanEx <- function(x) {if(sum(x)==0) return(0); return(mean(x[x>0]));}
 90 | 
 91 | 
 92 | t.test.l.paired <- function(x){
 93 |   l  = length(x)
 94 |   tt = t.test(x[1:(l/2)],x[(l/2+1):l],paired=TRUE)
 95 |   out = c(tt$est,tt$stat,tt$p.val)
 96 |   if(is.na(out[2])) out[2] = 0
 97 |   if(is.na(out[3])) out[3] = 1
 98 |   return(out)
 99 | }
100 | 
101 |   
102 | calc_beta <- function(y, spec.exp = 2) {
103 |   d1 <- as.matrix(dist(y))
104 |   eps1 <- 1e-10
105 |   # Marker score is combination of specificity and sparsity
106 |   score1 <- sum(d1^spec.exp) / (sum(d1) + eps1)
107 |   return(score1)
108 | }
109 | 
110 | #####################################################################
111 | # FUNCTIONS FOR BUILDING AND PLOTTING THE TREE ARE BELOW
112 | 
113 | getDend <- function(input,distFun = function(x) return(as.dist(1-cor(x)))){
114 |  distCor  = distFun(input) 
115 |  avgClust = hclust(distCor,method="average")
116 |  dend = as.dendrogram(avgClust)
117 |  dend = labelDend(dend)[[1]]
118 | }
119 | 
120 | labelDend <- function(dend,n=1)
121 |   {  
122 |     if(is.null(attr(dend,"label"))){
123 |       attr(dend, "label") =paste0("n",n)
124 |       n= n +1
125 |     }
126 |     if(length(dend)>1){
127 |       for(i in 1:length(dend)){
128 |         tmp = labelDend(dend[[i]], n)
129 |         dend[[i]] = tmp[[1]]
130 |         n = tmp[[2]]
131 |       }
132 |     }
133 |     return(list(dend, n))
134 |   }
135 | 
136 | reorder.dend <- function(dend, l.rank,verbose=FALSE)
137 |   {
138 |     tmp.dend = dend
139 |     sc=sapply(1:length(dend), function(i){
140 |       l = dend[[i]] %>% labels
141 |       mean(l.rank[dend[[i]] %>% labels])
142 |     })
143 |     ord = order(sc)
144 | 	if(verbose){
145 |       print(sc)
146 | 	  print(ord)
147 |     }
148 | 	if(length(dend)>1){
149 |       for(i in 1:length(dend)){
150 |         if(ord[i]!=i){
151 |           dend[[i]]= tmp.dend[[ord[i]]]
152 |         }
153 |         if(length(dend[[i]])>1){
154 |           dend[[i]]=reorder.dend(dend[[i]],l.rank)
155 |         }
156 |       }
157 |     }
158 |     return(dend)
159 |   }
160 | 
161 |   
162 |   
163 | updateAndOrderClusters <- function(sampleInfo, # Subsetted Samp.dat file for samples from non-outlier clusters
164 |   classNameColumn = "cluster_type_label", # Will also be added to kpColumns (this is inh vs. exc. vs. glia)
165 |   layerNameColumn = "layer_label",    # Where is the layer info stored (NULL if none)
166 |   matchNameColumn = "cellmap_label",  # Where is the comparison info stored (e.g., closest mapping mouse type)
167 |   regionNameColumn = "Region_label",  # Where is the region info stored (NULL if none)
168 |   newColorNameColumn = "cellmap_color",  # Where is the column for selecting new cluster colors (e.g., color of closest mapping mouse type) [NULL keeps current colors]
169 |   otherColumns = NULL, # Additional columns to transfer from Samp.dat (Note: "cluster_id", "cluster_label", and "cluster_color" are required)
170 |   topGenes = NULL,                      #  A vector of the top genes for each cluster, with the vector names as cluster_id values (NULL if none)
171 |   classLevels = c("inh","exc","glia"), # A vector of the levels for broad classes.  Set to NULL for none.
172 |   getLayer = function(x) return(as.numeric(substr(as.character(x),2,2))),  # Function for converting layer text to numeric layer.  MAY NEED TO UPDATE
173 |   sep="_")  # For renaming
174 |   {
175 |   
176 |   ## Define clusterInfo variable to store cluster-level info
177 |   kpColumns = unique(c("cluster_id","cluster_label","cluster_color",classNameColumn,matchNameColumn,regionNameColumn,otherColumns))
178 |   clusterInfo = t(sampleInfo[,kpColumns])
179 |   colnames(clusterInfo) = clusterInfo["cluster_label",]
180 |   clusterInfo = clusterInfo[,unique(colnames(clusterInfo))]
181 |   clusterInfo = t(clusterInfo)
182 |   clusterInfo = as.data.frame(clusterInfo)
183 |   clusterInfo$old_cluster_label = clusterInfo$cluster_label 
184 |   rownames(clusterInfo) = 1:dim(clusterInfo)[1]
185 |   if(!is.null(classLevels)) clusterInfo[,classNameColumn] = factor(clusterInfo[,classNameColumn],levels=classLevels)
186 |   classLabel = clusterInfo[,classNameColumn]
187 |   if(is.factor(classLabel)) classLabel = droplevels(classLabel)
188 | 
189 |   
190 |   ## Add additional cluster information for renaming
191 |   cl3 = sampleInfo[,"cluster_id"]
192 |   names(cl3) <- sampleInfo$sample_id
193 |   cl3 = factor(cl3,levels=as.numeric(clusterInfo$cluster_id))
194 |   
195 |   if(is.null(newColorNameColumn)) newColorNameColumn = "cluster_color"
196 |   colorVec = as.character(tapply(names(cl3), cl3, function(x) { 
197 |    col = as.factor(sampleInfo[,newColorNameColumn])
198 |    names(col) = sampleInfo$sample_id
199 |    return(names(sort(-table(col[x])))[1])
200 |   }))
201 |   clusterInfo$cluster_color = colorVec
202 |     
203 |   if(!is.null(regionNameColumn)){
204 |    regionVec = as.character(tapply(names(cl3), cl3, function(x) { 
205 |     rg = as.factor(sampleInfo[,regionNameColumn])
206 | 	names(rg) = sampleInfo$sample_id
207 | 	rg = rg[x]
208 |     rg = table(rg)/table(sampleInfo[,regionNameColumn])
209 |     rg = -sort(-round(100*rg/sum(rg)))[1]
210 |     return(paste(names(rg),rg,sep="~"))
211 |    }))
212 |    clusterInfo$region = regionVec
213 |   }
214 |   clusterInfo$layer = 0
215 |   if(!is.null(layerNameColumn)){
216 |    clLayer <- getLayer(sampleInfo[,layerNameColumn])
217 |    names(clLayer) = names(cl3)
218 |    layerVec <- as.character(tapply(names(cl3), cl3, function(x) {
219 |     lyy    = factor(clLayer)[x] 
220 |     layTab = cbind(as.numeric(names(table(lyy))),table(lyy),table(clLayer))
221 | 	lyy    = clLayer[x] 
222 |     #ly     = signif(mean(lyy),4)  # Only use 1 decimal
223 | 	ly     = signif(mean(lyy),2)
224 |     ly     = paste(ly,paste(rep(0,3-nchar(ly)),collapse="",sep=""),sep="")
225 |     ly     = gsub("00",".0",ly)
226 |     ly     = paste("L",ly,sep="")
227 | 	
228 | 	return(ly)
229 |    }))
230 |    clusterInfo$layer = as.numeric(gsub("L","",layerVec))
231 |   }
232 |   if(!is.null(matchNameColumn)){
233 |    matchVec <- as.character(tapply(names(cl3), cl3, function(x) {
234 |     y  = is.element(sampleInfo$sample_id,x)
235 |     nm = -sort(-table(sampleInfo[y,matchNameColumn]))
236 |     return(names(nm)[1])
237 |    }))
238 |    clusterInfo$topMatch = matchVec
239 |   }
240 | 
241 |   ## Rename the clusters based on the above info
242 |   for (i in 1:dim(clusterInfo)[1]){
243 |     id  = as.character(as.numeric(clusterInfo[i,"cluster_id"]))
244 | 	id2 = paste0("cl",id)
245 | 	broad = ifelse(is.na(classNameColumn),"",substr(clusterInfo[i,classNameColumn],1,1))
246 |     cn  = paste0(broad,sum(cl3==id))
247 | 	lab = paste(id2,cn,sep=sep)
248 |     lab = ifelse(is.null(topGenes),lab,paste(lab,topGenes[as.character(id)],sep=sep))
249 |     lab = ifelse(is.null(matchNameColumn),lab,paste(lab,clusterInfo$topMatch[i],sep=sep))
250 |     lab = ifelse(is.null(regionNameColumn),lab,paste(lab,clusterInfo$region[i],sep=sep))
251 | 	lab = ifelse(is.null(layerNameColumn),lab,paste(lab,layerVec[i],sep=sep))
252 | 	lab = gsub("-","~",lab)               # To avoid shiny crashing
253 | 	clusterInfo[i,"cluster_label"] = lab
254 |   }
255 | 
256 |   ## Determine a new optimal order based on excitatory layer followed by mapped mouse type
257 |   classLabel2 = factor(classLabel,levels=unique(classLabel))
258 |   tmpLayer = clusterInfo[,"layer"]
259 |   tmpLayer[classLabel2=="inh"] = tmpLayer[classLabel2=="inh"]-100 #0
260 |   tmpLayer[classLabel2=="glia"] = tmpLayer[classLabel2=="glia"] +100 #10
261 |   tmpMouse = tmpLayer
262 |   if(!is.null(matchNameColumn)){
263 |    tmpMouse = as.character(clusterInfo[,"topMatch"])
264 |    tmpMouse = gsub("Vip","NVip",tmpMouse)
265 |    tmpMouse = gsub("Smad","ASmad",tmpMouse)
266 |   }
267 |   ordNew   = order(tmpLayer,tmpMouse,clusterInfo$cluster_id)
268 | 
269 |   clusterInfo  = clusterInfo[ordNew,]
270 |   rownames(clusterInfo) <- clusterInfo$lrank <- 1:dim(clusterInfo)[1]
271 |   clusterInfo$cluster_id = as.character(as.numeric(clusterInfo$cluster_id))
272 |   
273 |   ## Return clusterInfo 
274 |   return(clusterInfo)
275 | }
276 | 
277 | 
278 | # Newer function
279 | getTopMarkersByPropNew <- function(propExpr, medianExpr, propDiff = 0, propMin=0.5, medianFC = 1, 
280 |   excludeGenes = NULL, sortByMedian=TRUE){
281 |   specGenes = rep("none",dim(propExpr)[2])
282 |   names(specGenes) = colnames(propExpr)
283 |    propSort = t(apply(propExpr,1,function(x) return(-sort(-x))))
284 |   propWhich= t(apply(propExpr,1,function(x,y) return(y[order(-x)]),colnames(propExpr)))
285 |   medianDif= apply(cbind(as.numeric(propWhich[,1]),medianExpr),1,function(x,y) {
286 |    wIn = y==as.character(x[1])
287 |    mIn = x[2:length(x)][wIn]
288 |    mOut= max(x[2:length(x)][!wIn])
289 |    return(mIn-mOut)
290 |   }, colnames(propExpr))
291 |   keepProp = (propSort[,1]>=propMin)&((propSort[,1]-propSort[,2])>propDiff)&(medianDif>=medianFC)&(!is.element(rownames(propExpr),excludeGenes))
292 |   propSort = propSort[keepProp,]
293 |   propWhich= propWhich[keepProp,]
294 |   ord      = order(-medianDif[keepProp]*ifelse(sortByMedian,1,0), propSort[,2]-propSort[,1])
295 |   propSort = propSort[ord,]
296 |   propWhich= propWhich[ord,]
297 |   while(sum(keepProp)>1){
298 |    keepProp = !is.element(propWhich[,1],names(specGenes)[specGenes!="none"])
299 |    if(sum(keepProp)<=1) break
300 |    tmp = propWhich[keepProp,]
301 |    specGenes[tmp[1,1]] = rownames(tmp)[1]
302 |   }
303 |   return(specGenes)
304 | }
305 | 
306 | #newCluster10 = renameClusters(sampleInfo,clusterInfo, propExpr[keepGenes,], medianExpr[keepGenes,], 
307 | #    propDiff = 0, propMin=0.5, medianFC = 1, propLayer=0.1,excludeGenes = excludeGenes,
308 | #	majorGenes=majorGenes, majorLabels=majorLabels, broadGenes=broadGenes)
309 | 	
310 | 
311 | renameClusters <- function(sampleInfo,clusterInfo, propExpr, medianExpr, propDiff = 0, propMin=0.5, medianFC = 1, 
312 |                            excludeGenes = NULL, majorGenes=c("GAD1","SLC17A7","SLC1A3"), majorLabels= majorGenes, 
313 | 						   broadGenes = majorGenes, propLayer=0.3, layerNameColumn="layer_label", 
314 | 						   getLayer = function(x) return(as.numeric(substr(as.character(x),2,2)))){
315 |   # Layer determination
316 |   clLayer <- getLayer(sampleInfo[,layerNameColumn])
317 |   cl3 = sampleInfo[,"cluster_id"]
318 |   names(cl3) <- sampleInfo$sample_id
319 |   cl3 = factor(cl3,levels=sort(as.numeric(clusterInfo$cluster_id)))
320 |   names(clLayer) = names(cl3)
321 |   layerVec <- (tapply(names(cl3), cl3, function(x) {
322 |     lyy    = factor(clLayer)[x] 
323 |     layTab = cbind(as.numeric(names(table(lyy))),table(lyy),table(clLayer))
324 | 	return(((layTab[,2]/layTab[,3])/sum(layTab[,2]/layTab[,3])))  # replace max with sum?
325 |   }))
326 |   rn       = names(layerVec)
327 |   layerVec = matrix(unlist(layerVec), ncol = 6, byrow = TRUE)
328 |   rownames(layerVec) = rn
329 |   colnames(layerVec) = 1:6
330 |   layLab = apply(layerVec,1,function(x,y){
331 |      z = as.numeric(colnames(layerVec)[x>=y])
332 | 	 if(length(z)==1) return(z)
333 | 	 return(paste(range(z),collapse="-"))
334 |   }, propLayer)
335 |   layLab = paste0("L",layLab)
336 | 
337 |   # Genes determination
338 |   majorLab   = majorGenes[apply(propExpr[majorGenes,],2,which.max)]
339 |   names(majorLabels) <- majorGenes
340 |   
341 |   broadLab   = broadGenes[apply(propExpr[broadGenes,],2,which.max)]
342 |   broadProp  = apply(propExpr[broadGenes,],2,max)
343 |   broadLab[broadProp<propMin] = majorLab[broadProp<propMin]
344 |   names(broadLab) <- colnames(propExpr)
345 |   
346 |   betaScore  = getBetaScore(propExpr)
347 |   kpGn       = rep(TRUE,dim(propExpr)[1])  #betaScore>=minBeta
348 |   specGenes  = getTopMarkersByPropNew(propExpr=propExpr[kpGn,], medianExpr=medianExpr[kpGn,], propDiff = propDiff, propMin=propMin, 
349 |                medianFC = medianFC, excludeGenes = excludeGenes)
350 |   specGenes0 = getTopMarkersByPropNew(propExpr=propExpr[kpGn,], medianExpr=medianExpr[kpGn,], propDiff = 0, propMin=propMin, 
351 |                medianFC = 0, excludeGenes = excludeGenes)
352 |   for (s in colnames(propExpr)[(specGenes=="none")]){
353 |     kp = propExpr[broadLab[s],]>=propMin
354 | 	specGenesTmp = getTopMarkersByPropNew(propExpr=propExpr[kpGn,kp], medianExpr=medianExpr[kpGn,kp], propDiff = propDiff, propMin=propMin, 
355 |                medianFC = medianFC, excludeGenes = excludeGenes)
356 | 	if(specGenesTmp[s]!="none") specGenes[s] = specGenesTmp[s]
357 |     if((specGenes0[s]!="none")&(specGenes[s]=="none")){
358 | 	  kp = (propExpr[specGenes0[s],] > quantile(propExpr[specGenes0[s],],0.8)) & (broadLab==broadLab[s])
359 | 	  specGenesTmp = getTopMarkersByPropNew(propExpr=propExpr[kpGn,kp], medianExpr=medianExpr[kpGn,kp], propDiff = propDiff, propMin=propMin, 
360 |                medianFC = medianFC, excludeGenes = excludeGenes)
361 | 	  specGenes[s] = specGenes0[s]
362 | 	  if (length(grep(broadLab[s],broadGenes))==0) paste(specGenes[s],specGenesTmp[s])
363 | 	}
364 |   }
365 |   
366 |   # Name construction
367 |   nss       = names(specGenes)
368 |   specGenes = paste0(" ",specGenes)
369 |   specGenes[is.element(broadLab,intersect(majorGenes,broadGenes))] = ""
370 |   clNames   = paste(majorLabels[majorLab]," ",layLab," ",broadLab,specGenes,sep="")
371 |   names(clNames) = nss
372 |   clNames = gsub(" none","",clNames)
373 |   
374 |   # Ensure that no excitatory clusters are included in layer 1
375 |   clNames2 = clNames
376 |   clNames = gsub("Exc L1","Exc L2",clNames)
377 |   clNames = gsub("L2-2","L2",clNames)
378 |   
379 |   return(clNames)
380 | }
381 | 
382 | # This is nearly identical to getTopMarkersByPropNew
383 | 
384 | getTopMarkersByProp <- function(...) suppressWarnings(getTopMarkersByProp2(...)) # Warnings are not useful from this function
385 | 
386 | getTopMarkersByProp2 <- function(propExpr,n=1,excludeGenes = NULL,medianExpr=NA,fcThresh=0.5,minProp=0){
387 |  prop   = propExpr[!is.element(rownames(propExpr),excludeGenes),]
388 |  cn     = colnames(prop)
389 |  rn     = rownames(prop)
390 |  wmProp = cn[apply(prop,1,function(x) return(which.max(x)[1]))]
391 |  maxProp= rowMax(prop)
392 |  dfProp = apply(prop,1,function(x) return(max(x)-max(x[x<max(x)])))
393 |  dfProp[dfProp==Inf] = 0
394 |  nmProp = apply(prop,1,function(x) return(sum(x==max(x))))
395 |  dfMed  = dfProp*0+1
396 |  kpI <- kp <- rep(TRUE,length(dfProp))&(nmProp==1)&(maxProp>=minProp)
397 |  if(!is.na(medianExpr[1])){
398 |   med   = medianExpr[!is.element(rownames(medianExpr),excludeGenes),]
399 |   med   = med[rn,]
400 |   wmMed = colnames(med)[apply(med,1,function(x) return(which.max(x)[1]))]
401 |   dfMed = apply(med,1,function(x) return(max(x)-max(x[x<max(x)])))
402 |   kp    = (wmProp==wmMed)&(dfMed>=fcThresh)&kpI&(maxProp>=minProp)
403 |  }
404 |  markCount = table(factor(wmProp[kp],levels=colnames(propExpr)))
405 |  kpVal = kp+kpI
406 |  outGenes = matrix(nrow=length(cn),ncol=n)
407 |  colnames(outGenes) = paste0("Gene_",1:n)
408 |  rownames(outGenes) = cn
409 |  for (cc in cn){
410 |   kk  = wmProp==cc
411 |   ord = order(-kpVal[kk],-dfMed[kk])  #(dfMed[kk]*dfProp[kk]))
412 |   outGenes[cc,] = rn[kk][ord][1:n]
413 |  }
414 |  nm = rownames(outGenes)
415 |  outGenes = as.character(outGenes)
416 |  names(outGenes) = nm
417 |  return(outGenes)
418 | }
419 | 
420 | 
421 | 
422 | updateSampDat <- function(Samp.dat,clusterInfo){
423 |   lab = as.character(Samp.dat$cluster_label)
424 |   id  = as.numeric(Samp.dat$cluster_id)
425 |   for (i in 1:dim(clusterInfo)[1]){
426 |     kpId = as.numeric(clusterInfo[i,"cluster_id"])
427 | 	lab[id==kpId] = clusterInfo[i,"cluster_label"]
428 |   }  
429 |   Samp.dat$cluster_label = lab
430 |   return(Samp.dat)
431 | }
432 | 
433 | 
434 | getBetaScore <- function(propExpr,returnScore=TRUE,spec.exp = 2){
435 |   # Wrapper for calc_beta
436 |   # propExpr = proportions of cells in a given cluster with CPM/FPKM > 1 (or 0, HCT uses 1)
437 |   betaScore <- apply(propExpr, 1, calc_beta)
438 |   betaScore[is.na(betaScore)] <- 0
439 |   if(returnScore) return(betaScore)
440 |   scoreRank = rank(-betaScore)
441 |   return(scoreRank)
442 | }
443 | 
444 | rankCompleteDendPlot <- function(input=NULL,l.rank=NULL,dend=NULL,label_color=NULL,node_size=3,
445 |     main="Tree",distFun = function(x) return(dist(1-(1+cor(x))/2)),...){
446 |  if(is.null(dend)) dend = getRankedDend(input,l.rank,distFun)
447 |  plot_dend(dend,node_size=node_size,label_color=label_color,main=main)
448 | }
449 | 
450 | # Allows plot_dend to work properly in a for loop
451 | plot_dend <- function(...) print(plot_dend2(...))
452 |   
453 | plot_dend2 <- function(dend, dendro_data=NULL,node_size=1,r=NULL,label_color=NULL,main="",rMin=-0.6)  # r=c(-0.1,1)
454 |   {
455 |     require(dendextend)
456 |     require(ggplot2)
457 |     if(is.null(dendro_data)){
458 |       dendro_data = as.ggdend(dend)
459 |       dendro_data$nodes$label =get_nodes_attr(dend, "label")
460 |       dendro_data$nodes = dendro_data$nodes[is.na(dendro_data$nodes$leaf),]
461 |     }
462 |     node_data = dendro_data$nodes
463 |     label_data <- dendro_data$labels
464 |     segment_data <- dendro_data$segments
465 |     if(is.null(node_data$node_color)){
466 |       node_data$node_color="black"
467 |     }
468 | 	if(!is.null(label_color)){
469 |       label_data$col=label_color
470 |     }
471 | 	rMax = max(node_data$height)*1.05
472 | 	main = paste(main,"- Max height =",signif(rMax,3))  # Add/update the title
473 | 	if(is.null(r)) r=c(rMin*rMax,rMax)  # Dinamically update the height
474 |     ggplot() + ggtitle(main) + 
475 |       geom_text(data = node_data, aes(x = x, y = y, label = label,color=node_color),size=node_size,vjust = 1) +
476 |         geom_segment(data = segment_data,
477 |                      aes(x=x,xend=xend,y=y,yend=yend), color="gray50") +
478 |                        geom_text(data = label_data, aes(x = x, y = -0.01, label = label, color = col),size=node_size,angle = 90, hjust = 1) +
479 |                            scale_color_identity() +
480 |                              theme_dendro() +
481 |                                 scale_y_continuous(limits = r)
482 |     
483 |   }
484 | 
485 | 
486 | plot_annotation_comparison <- function (cl, ref.cl, anno, title="plot") 
487 | {
488 |     if(!is.factor(ref.cl)) ref.cl <- as.factor(ref.cl)
489 |     tb <- table(cl = cl, ref.cl = ref.cl)
490 | 	tb <- round(100*tb/rowSums(tb))
491 | 	
492 |     tb.df <- as.data.frame(tb)
493 |     tb.df <- tb.df[tb.df$Freq > 0, ]
494 | 	tb.df$Color <- anno$cluster_color[match(tb.df$cl,anno$cluster_label)]
495 |     g <- ggplot(tb.df, aes(x = cl, y = ref.cl)) + 
496 |          geom_point(aes(size = Freq, color = Color)) + 
497 |          theme(axis.text.x = element_text(vjust = 0.1, 
498 |             hjust = 0.2, angle = 90, size = 7), axis.text.y = element_text(size = 6)) + 
499 |          scale_size(range = c(0, 3)) +
500 | 		 ggtitle(title) +
501 | 		 scale_color_identity() +
502 | 		 theme(legend.position = "none") 
503 |     g
504 | }


--------------------------------------------------------------------------------
/R/Support_heatmap_functions.R:
--------------------------------------------------------------------------------
  1 | ######################################################################################
  2 | # CLUSTER HEATMAPS
  3 | 
  4 | group_heatmap_plot <- function (genes = c("Hspa8", "Snap25", "Gad2", "Vip"), clusters = 1:10, 
  5 |     group_by = "final", calculation = "mean", data_source = "internal", 
  6 |     normalize_rows = FALSE, logscale = T, fontsize = 7, labelheight = 25, 
  7 |     max_width = 10, showcounts = T, rotatecounts = F, maxval = "auto", 
  8 |     colorset = c("darkblue", "dodgerblue", "gray80", "orange", 
  9 |         "orangered")) 
 10 | {
 11 |     library(dplyr)
 12 |     library(ggplot2)
 13 |     genes <- rev(genes)
 14 |     if (data_source == "internal") {
 15 |         data <- get_internal_data(genes, group_by, clusters)
 16 |     }
 17 |     else if (is.list(data_source)) {
 18 |         data <- get_list_data(data_source, genes, group_by, clusters)
 19 |     }
 20 |     else if (grepl("\\\\.db$", data_source)) {
 21 |         data <- get_db_data(data_source, genes, group_by, clusters)
 22 |     }
 23 |     else if (file.exists(paste0(data_source, "/anno.feather"))) {
 24 |         data <- get_feather_data(data_source, genes, group_by, 
 25 |             clusters)
 26 |     }
 27 |     else {
 28 |         stop("Cannot identify data_source.")
 29 |     }
 30 |     data <- data %>% select(-xpos) %>% mutate(xpos = plot_id)
 31 |     genes <- sub("-", ".", genes)
 32 |     genes[grepl("^[0-9]", genes)] <- paste0("X", genes[grepl("^[0-9]", 
 33 |         genes)])
 34 |     names(data)[grepl("^[0-9]", genes)] <- paste0("X", names(data)[grepl("^[0-9]", 
 35 |         genes)])
 36 |     genes <- genes[genes %in% names(data)]
 37 |     ngenes <- length(genes)
 38 |     nclust <- length(unique(data$plot_id))
 39 |     nsamples <- nrow(data)
 40 |     header_labels <- build_header_labels(data = data, ngenes = ngenes, 
 41 |         nsamples = 1, nclust = nclust, labelheight = labelheight, 
 42 |         labeltype = "simple")
 43 |     heat_data <- data %>% select(plot_id, xpos)
 44 |     for (i in 1:length(genes)) {
 45 |         gene <- genes[i]
 46 |         if (calculation == "mean") {
 47 |             gene_func <- paste0("mean(", gene, ")")
 48 |         }
 49 |         else if (calculation == "trimmed_mean") {
 50 |             gene_func <- paste0("mean(", gene, ",trim = 0.25)")
 51 |         }
 52 |         else if (calculation == "percent") {
 53 |             gene_func <- paste0("sum(", gene, " > 0)/length(", 
 54 |                 gene, ")")
 55 |         }
 56 |         else if (calculation == "median") {
 57 |             gene_func <- paste0("stats::median(", gene, ")")
 58 |         }
 59 |         gene_data <- data %>% select(one_of(c("plot_id", gene))) %>% 
 60 |             group_by(plot_id) %>% summarize_(result = gene_func)
 61 |         names(gene_data)[2] <- gene
 62 |         heat_data <- heat_data %>% left_join(gene_data, by = "plot_id")
 63 |     }
 64 |     heat_data <- unique(heat_data)
 65 |     max_vals <- heat_data %>% select(one_of(genes)) %>% summarise_each(funs(max)) %>% 
 66 |         unlist()
 67 |     max_labels <- data.frame(x = (nclust + 0.5) * 1.01, y = 1:ngenes + 
 68 |         0.5, label = sci_label(max_vals))
 69 |     max_header <- data.frame(x = (nclust + 0.5) * 1.01, y = ngenes + 
 70 |         1, label = "Max value")
 71 |     max_width <- nclust * (max_width/100)/(1 - max_width/100)
 72 |     if (logscale) {
 73 |         heat_data[genes] <- log10(heat_data[genes] + 1)
 74 |     }
 75 |     heat_colors <- colorRampPalette(colorset)(1001)
 76 |     if (maxval == "auto") {
 77 |         data_max <- max(unlist(heat_data[genes]))
 78 |     }
 79 |     else {
 80 |         data_max <- maxval
 81 |     }
 82 |     for (gene in genes) {
 83 |         if (normalize_rows == T) {
 84 |             heat_data[gene] <- heat_colors[unlist(round(heat_data[gene]/max(heat_data[gene]) * 
 85 |                 1000 + 1, 0))]
 86 |         }
 87 |         else {
 88 |             color_pos <- unlist(round(heat_data[gene]/data_max * 
 89 |                 1000 + 1, 0))
 90 |             color_pos[color_pos > 1001] <- 1001
 91 |             heat_data[gene] <- heat_colors[color_pos]
 92 |         }
 93 |     }
 94 |     label_y_size <- max(header_labels$ymax) - min(header_labels$ymin)
 95 |     cluster_data <- data %>% group_by(plot_label, plot_color, 
 96 |         plot_id) %>% summarise(cn = n()) %>% as.data.frame(stringsAsFactors = F) %>% 
 97 |         arrange(plot_id) %>% mutate(labely = ngenes + label_y_size * 
 98 |         0.05, cny = max(header_labels$ymax) - 0.1 * label_y_size, 
 99 |         xpos = plot_id)
100 |     p <- ggplot() + scale_fill_identity() + scale_y_continuous("", 
101 |         breaks = 1:length(genes) + 0.5, labels = genes, expand = c(0, 
102 |             0)) + scale_x_continuous("", expand = c(0, 0)) + 
103 |         theme_classic(fontsize) + theme(axis.text = element_text(size = rel(1)), 
104 |         axis.text.x = element_blank(), axis.ticks.x = element_blank(), 
105 |         legend.position = "none")
106 |     for (i in 1:length(genes)) {
107 |         p <- p + geom_rect(data = heat_data, aes_string(xmin = "xpos - 0.5", 
108 |             xmax = "xpos + 0.5", ymin = i, ymax = i + 1, fill = genes[i]))
109 |     }
110 |     p <- p + geom_rect(data = header_labels, aes(xmin = xmin, 
111 |         ymin = ymin, xmax = xmax, ymax = ymax, fill = color)) + 
112 |         geom_text(data = header_labels, aes(x = (xmin + xmax)/2, 
113 |             y = ymin + 0.05, label = label), angle = 90, vjust = 0.35, 
114 |             hjust = 0, size = pt2mm(fontsize)) + geom_rect(aes(xmin = nclust + 
115 |         0.5, xmax = nclust + 0.5 + max_width, ymin = 1, ymax = max(header_labels$ymax)), 
116 |         fill = "white") + geom_text(data = max_header, aes(x = x, 
117 |         y = y, label = label), angle = 90, hjust = 0, vjust = 0.35, 
118 |         size = pt2mm(fontsize)) + geom_text(data = max_labels, 
119 |         aes(x = x, y = y, label = label), hjust = 0, vjust = 0.35, 
120 |         size = pt2mm(fontsize), parse = TRUE)
121 |     if (showcounts) {
122 |         if (rotatecounts) {
123 |             p <- p + geom_text(data = cluster_data, aes(y = cny, 
124 |                 x = xpos, label = cn), angle = 90, vjust = 0.35, 
125 |                 hjust = 1, size = pt2mm(fontsize))
126 |         }
127 |         else {
128 |             p <- p + geom_text(data = cluster_data, aes(y = cny, 
129 |                 x = xpos, label = cn), size = pt2mm(fontsize))
130 |         }
131 |     }
132 |     return(p)
133 | }
134 | 
135 | 
136 | 
137 | 
138 | ######################################################################################
139 | # CELL BY CELL HEATMAPS
140 | 
141 | sample_heatmap_plot <- function(data_source, 
142 |                                 genes = c("Hspa8","Snap25","Gad2","Vip"), 
143 |                                 group_by = "cluster", 
144 |                                 groups = 1:10,
145 |                                 top_labels = "layer",
146 |                                 sample_n = 0,
147 |                                 scale_mode = "scale.log",
148 |                                 showall = F,
149 |                                 autorange = "auto", 
150 |                                 minrange = 0, 
151 |                                 maxrange = 10,
152 |                                 pfontsize = 14,
153 |                                 expand = F,
154 |                                 rotatelabels = F,
155 |                                 showlines = F,
156 |                                 showids = T) {
157 |   
158 |   library(feather)
159 |   library(dplyr)
160 |   library(ggplot2)
161 |   
162 |   data <- read_feather(file.path(data_source, "data.feather"), columns = c("sample_id",genes))
163 |   anno <- read_feather(file.path(data_source, "anno.feather"))
164 |   desc_table <- read_feather(file.path(data_source, "desc.feather"))
165 |   
166 |   primary <- list(base = group_by,
167 |                   id = paste0(group_by,"_id"),
168 |                   label = paste0(group_by,"_label"),
169 |                   color = paste0(group_by,"_color"))
170 |   secondary <- list(base = top_labels,
171 |                     id = paste0(top_labels,"_id"),
172 |                     label = paste0(top_labels,"_label"),
173 |                     color = paste0(top_labels,"_color"))
174 |   
175 |   genes[grepl("^[0-9]",genes)] <- paste0("X",genes[grepl("^[0-9]",genes)])
176 |   
177 |   genes.df <- data
178 | 
179 |   ###############
180 |   ## Filtering ##
181 |   ###############
182 |   
183 |   # Join the annotation and genes data frames
184 |   sub.df <- anno %>% 
185 |     # Filter for the selected clusters
186 |     filter_(paste0(primary$id, " %in% c(", paste(groups, collapse = ","), ")")) %>%
187 |     left_join(genes.df)
188 |   
189 |   # Subsample data per primary group if Sample N is set to something other than 0
190 |   if(sample_n > 0) {
191 |     sub.df <- sub.df %>%
192 |       group_by_(primary$id) %>%
193 |       sample_n(sample_n, replace = T) %>%
194 |       unique() %>%
195 |       ungroup() %>%
196 |       as.data.frame()
197 |   }
198 |   
199 |   #############
200 |   ## Scaling ##
201 |   #############
202 | 
203 |   ## If the y-axis is plotted on a log scale, add 1 to the data values to plot data + 1
204 |   if(scale_mode == "scale.log") {
205 |     for(gene in genes) {
206 |       sub.df[,gene] <- log10(sub.df[,gene] + 1)
207 |     }
208 |   }
209 |   if(scale_mode == "scale.rel") {
210 |     for(gene in genes) {
211 |       sub.df[,gene] <- sub.df[,gene]/max(sub.df[,gene])
212 |     }
213 |   }
214 |   if(scale_mode == "scale.log.rel") {
215 |     for(gene in genes) {
216 |       sub.df[,gene] <- log10(sub.df[,gene]+1)/log10((max(sub.df[,gene]+1)))
217 |     }
218 |   }
219 |   
220 |   #############
221 |   ## Sorting ##
222 |   #############
223 |   
224 |   cluster_order <- data.frame(clust = groups,
225 |                               plot_order = 1:length(groups))
226 |   
227 |   names(cluster_order)[1] <- primary$id
228 |   
229 |   sub.df <- sub.df %>% 
230 |     left_join(cluster_order, by = primary$id)
231 |   
232 |   sort.df <- sub.df %>% 
233 |     arrange_(.dots = c("plot_order", secondary$id)) %>% 
234 |     mutate(xpos = 1:nrow(sub.df))
235 |   
236 |   
237 |   
238 |   # Start buildplot
239 |   genes <- sub("-", ".", genes)
240 |   genes[grepl("^[0-9]",genes)] <- paste0("X",genes[grepl("^[0-9]",genes)])
241 |   
242 |   names(sort.df) <- sub("-",".",names(sort.df))
243 |   
244 |   colors <- colorRampPalette(c("darkblue", "white", "red"))(1001)
245 |   
246 |   if(autorange == "auto") {
247 |     min.val <- 0
248 |     max.val <- max(unlist(sort.df[ ,genes]))
249 |   } else if (autorange == "manual") {
250 |     min.val <- as.numeric(minrange)
251 |     max.val <- as.numeric(maxrange)
252 |   }
253 |   
254 |   ## Convert data to geom_rect() compatible table
255 |   plot.df <- data.frame(xmin=numeric(),xmax=numeric(),ymin=numeric(),ymax=numeric(),fill=character())
256 |   
257 |   for(i in 1:length(genes)) {
258 |     
259 |     fill_ids <- unlist(round( (sort.df[,genes[i]] - min.val) / (max.val - min.val) * 1000 ) + 1)
260 |     fill_ids[fill_ids < 1] <- 1
261 |     fill_ids[fill_ids > 1001] <- 1001
262 |     
263 |     gene.plot <- data.frame(xmin = sort.df$xpos - 1,
264 |                             xmax = sort.df$xpos,
265 |                             ymin = length(genes) - i,
266 |                             ymax = length(genes) - i + 1,
267 |                             fill = colors[fill_ids])
268 |     
269 |     plot.df <- rbind(plot.df, gene.plot)
270 |     
271 |   }
272 |   
273 |   primary.plot <- data.frame(xmin = sort.df$xpos - 1,
274 |                              xmax = sort.df$xpos,
275 |                              ymin = -0.5,
276 |                              ymax = 0,
277 |                              fill = unlist(sort.df[ ,primary$color]))
278 |   
279 |   
280 |   
281 |   plot.df <- rbind(plot.df, primary.plot)
282 |   
283 |   ## add additional secondary color bars
284 |   all.desc <- desc_table
285 |   primary.name   <- all.desc$name[all.desc$base == primary$base]
286 |   secondary.name <- all.desc$name[all.desc$base %in% secondary$base]
287 |   
288 |   primary.desc <- all.desc[all.desc$base == primary$base,]
289 |   secondary.desc <- all.desc[all.desc$base %in% secondary$base,]
290 |   other.desc <- all.desc[!all.desc$base %in% c(primary$base,secondary$base),]
291 |   
292 |   sec.color <- paste0(secondary$base, "_color")
293 |   anno.color <- paste0(other.desc$base, "_color")
294 |   anno_y_labels <- data.frame(breaks = numeric(), labels = character())
295 |   
296 |   if(showall) {
297 |     
298 |     # scale the plot so it's evenly divided between annotations and genes
299 |     #s <- length(genes)/nrow(other.desc)*0.75
300 |     s <- 1
301 |     
302 |     for(j in 1:length(secondary$base)) {
303 |       anno.plot <- data.frame(xmin = sort.df$xpos - 1,
304 |                               xmax = sort.df$xpos,
305 |                               ymin = length(genes) + (j - 1) * s,
306 |                               ymax = length(genes) + (j - 1) * s + s,
307 |                               fill = unlist(sort.df[ ,sec.color[j]]))
308 |       plot.df <- rbind(plot.df, anno.plot)
309 |       
310 |       anno_y <- data.frame(breaks = length(genes) + (j - 1)*s + 0.5*s,
311 |                            labels = secondary.desc$name[secondary.desc$base == secondary$base[j]])
312 |       
313 |       anno_y_labels <- rbind(anno_y_labels,anno_y)
314 |     }
315 |     
316 |     sec_top <- length(secondary$base)
317 |     
318 |     for(j in 1:nrow(other.desc)) {
319 |       anno.plot <- data.frame(xmin = sort.df$xpos - 1,
320 |                               xmax = sort.df$xpos,
321 |                               ymin = length(genes) + (j + sec_top - 1) * s,
322 |                               ymax = length(genes) + (j + sec_top - 1) * s + s,
323 |                               fill = unlist(sort.df[ ,anno.color[j]]))
324 |       plot.df <- rbind(plot.df, anno.plot)
325 |       
326 |       anno_y <- data.frame(breaks = length(genes) + (j + sec_top - 1)*s + 0.5*s,
327 |                            labels = other.desc$name[j])
328 |       
329 |       anno_y_labels <- rbind(anno_y_labels,anno_y)
330 |     }
331 |   } else {
332 |     
333 |     if(length(secondary) > 0) {
334 |       
335 |       for(j in 1:length(secondary$base)) {
336 |         anno.plot <- data.frame(xmin = sort.df$xpos - 1,
337 |                                 xmax = sort.df$xpos,
338 |                                 ymin = length(genes) + (j - 1) * 0.5,
339 |                                 ymax = length(genes) + (j - 1) * 0.5 + 0.5,
340 |                                 fill = unlist(sort.df[,sec.color[j]]))
341 |         plot.df <- rbind(plot.df,anno.plot)
342 |         
343 |         anno_y <- data.frame(breaks = length(genes) + (j - 1) * 0.5 + 0.25,
344 |                              labels = secondary.desc$name[secondary.desc$base == secondary$base[j]])
345 |         anno_y_labels <- rbind(anno_y_labels,anno_y)
346 |       }
347 |     }
348 |   }
349 |   
350 |   ## build new, more complex y-axis labels
351 |   y_labels <- data.frame(breaks = (1:length(genes) - 0.5),
352 |                          labels = rev(genes))
353 |   primary_y_label <- data.frame(breaks = -0.25,
354 |                                 labels = primary.name)
355 |   # secondary_y_label <- data.frame(breaks = mean(c(anno.plot$ymin, secondary.plot$ymax)),
356 |   #                                 labels = secondary.name)
357 |   y_labels <- rbind(y_labels, 
358 |                     primary_y_label, 
359 |                     # secondary_y_label,
360 |                     anno_y_labels)
361 |   
362 |   hlines <- data.frame(yintercept = c(-0.5, max(plot.df$ymax)))
363 |   
364 |   sort.lab <- sort.df %>%
365 |     group_by_(primary$id, primary$label) %>%
366 |     summarise(xmean = mean(c(min(xpos) - 1, xpos)),
367 |               y = length(genes) + 1,
368 |               angle = 90) %>%
369 |     ungroup() %>%
370 |     select_(primary$id, primary$label,"xmean","y","angle")
371 |   names(sort.lab)[1:2] <- c("primary_id","primary_label")
372 |   
373 |   if(showids) {
374 |     sort.lab$primary_label <- paste(sort.lab$primary_id, sort.lab$primary_label)
375 |   }
376 |   
377 |   if(rotatelabels) {
378 |     sort.lab$primary_label <- gsub("[_|;| ]","\n",sort.lab$primary_label)
379 |     sort.lab$angle <- 0
380 |   }
381 |   
382 |   # Segments that divide groups
383 |   segment_lines <- sort.df %>%
384 |     group_by_(primary$id, primary$label) %>%
385 |     summarise(x = max(xpos)) %>%
386 |     mutate(xend = x,
387 |            y = -0.5,
388 |            yend = max(plot.df$ymax)) %>%
389 |     select(x, xend, y , yend) %>%
390 |     as.data.frame()
391 |   
392 |   ##############
393 |   ## Plotting ##
394 |   ##############
395 |   
396 |   p <- ggplot() + 
397 |     # Main heatmap
398 |     geom_rect(data = plot.df,
399 |               aes(xmin = xmin, 
400 |                   xmax = xmax, 
401 |                   ymin = ymin, 
402 |                   ymax = ymax, 
403 |                   fill = fill)) +
404 |     # Axis labels
405 |     scale_x_continuous(breaks = sort.lab$xmean,
406 |                        labels = sort.lab$primary_label,
407 |                        expand = c(0, 0)) +
408 |     scale_y_continuous(breaks = y_labels$breaks,
409 |                        labels = y_labels$labels,
410 |                        expand = c(0, 0)) +
411 |     # fill and theme
412 |     scale_fill_identity(guide=F) +
413 |     theme_classic(base_size=pfontsize) +
414 |     theme(axis.ticks = element_line(size=0.2),
415 |           legend.title = element_blank(),
416 |           axis.title.y = element_blank(),
417 |           axis.title.x = element_blank(),
418 |           axis.line = element_blank())
419 |   
420 |   
421 |   # cluster guide lines
422 |   if(showlines) {
423 |     
424 |     p <- p + 
425 |       geom_segment(data = segment_lines,
426 |                    aes(x = x, 
427 |                        xend = xend, 
428 |                        y = y, 
429 |                        yend = yend),
430 |                    size = 0.2) +
431 |       geom_vline(aes(xintercept = 0), 
432 |                  size = 0.2) + 
433 |       geom_hline(data = hlines,
434 |                  aes(yintercept = yintercept), 
435 |                  size = 0.2)
436 |     
437 |   }
438 |   
439 |   
440 |   # Rotate labels checkbox
441 |   if(rotatelabels) {
442 |     p <- p + 
443 |       theme(axis.text.x = element_text(angle = 0, 
444 |                                        hjust = 0.5, 
445 |                                        vjust=1))
446 |   } else {
447 |     p <- p + 
448 |       theme(axis.text.x = element_text(angle = 90, 
449 |                                        hjust = 1, 
450 |                                        vjust = 0.5))
451 |   }
452 |   
453 |   p
454 |   
455 | }
456 | 
457 | 
458 | build_legend_plot <- function(data_source, 
459 |                               genes = c("Hspa8","Snap25","Gad2","Vip"), 
460 |                               autorange = "auto", 
461 |                               minrange = 0, 
462 |                               maxrange = 10,
463 |                               scale_type = "scale.log",
464 |                               pfontsize = 14) {
465 |   
466 |   library(dplyr)
467 |   library(ggplot2)
468 |   library(feather)
469 |   
470 |   data <- read_feather(file.path(data_source,"data.feather"), columns = genes)
471 |   
472 |   genes <- sub("-", ".", genes)
473 |   genes[grepl("^[0-9]",genes)] <- paste0("X",genes[grepl("^[0-9]",genes)])
474 |   
475 |   names(data) <- sub("-",".",names(data))
476 |   colors <- colorRampPalette(c("darkblue","white","red"))(1001)
477 |   
478 |   if(autorange == "auto") {
479 |     min.val <- 0
480 |     max.val <- max(unlist(data[, genes]))
481 |   } else if (autorange == "manual") {
482 |     min.val <- as.numeric(minrange)
483 |     max.val <- as.numeric(maxrange)
484 |   }
485 |   
486 |   ## Build geom_rect() compatible table
487 |   legend_data <- data.frame(xmin = 1:1001,
488 |                             xmax = 1:1001+1,
489 |                             ymin = 0,
490 |                             ymax = 1,
491 |                             fill = colors)
492 |   
493 |   if(scale_type == "scale.abs") {
494 |     scale_name <- "RPKM"
495 |   } else if(scale_type == "scale.log") {
496 |     scale_name <- "log10(RPKM + 1)"
497 |     min.val <- log10(min.val + 1)
498 |     max.val <- log10(max.val + 1)
499 |   } else if(scale_type == "scale.rel") {
500 |     scale_name <- "RPKM/max(RPKM)"
501 |     min.val <- min.val/max.val
502 |     max.val <- 1
503 |   } else if(scale_type == "scale.log.rel") {
504 |     scale_name <- "log10(RPKM + 1)/max(log10(RPKM + 1))"
505 |     min.val <- log10(min.val + 1)
506 |     max.val <- log10(max.val + 1)
507 |     min.val <- min.val/max.val
508 |     max.val <- 1
509 |   }
510 |   
511 |   segment_data <- data.frame()
512 |   
513 |   legend_plot <- ggplot(legend_data) +
514 |     geom_rect(aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax, fill = fill)) +
515 |     geom_segment(aes(x = min(xmin), xend = max(xmax), y = 0, yend = 0)) +
516 |     scale_fill_identity() +
517 |     scale_y_continuous(expand = c(0,0)) +
518 |     scale_x_continuous(scale_name, breaks=c(0,250,500,750,1000),
519 |                        labels=round(seq(min.val, max.val, by = (max.val-min.val)/4),2)) +
520 |     theme_classic(base_size = pfontsize) +
521 |     theme(axis.text.y = element_blank(),
522 |           axis.ticks.y = element_blank(),
523 |           axis.line.y = element_blank(),
524 |           axis.title.y = element_blank(),
525 |           axis.line.x = element_blank())
526 |   
527 |   return(legend_plot)
528 | }  
529 | 
530 | 
531 | 
532 | 
533 | ##########################################################################################################
534 | ### FUNCTIONS FOR MAKING DOT PLOTS
535 | 
536 | 
537 | # Function to subset a dendrogram by retrieving a node with 
538 | # a given attribute.
539 | get_node_dend <- function(x, match_attr, match_value) {
540 |   
541 |   output <- NULL
542 |   
543 |   for(i in seq_len(length(x))) {
544 |     
545 |     if(attr(x[[i]], match_attr) == match_value) {
546 |       
547 |       output <- x[[i]]
548 |       
549 |     } else {
550 |       if(is.list(x[[i]])) {
551 |         nest <- get_node_dend(x[[i]], match_attr, match_value)
552 |         if(!is.null(nest)) {
553 |           output <- nest
554 |         }
555 |       }
556 |     }
557 |     
558 |   }
559 |   return(output)
560 |   
561 | }
562 | 
563 | # Function to subsample cells
564 | subsampleCells <- function(clustersF, subSamp=25, seed=5){
565 |   # Returns a vector of TRUE false for choosing a maximum of subsamp cells in each cluster 
566 |   # clustersF = vector of cluster labels in factor format
567 |   kpSamp = rep(FALSE,length(clustersF))
568 |   for (cli in unique(as.character(clustersF))){
569 |     set.seed(seed)
570 |     seed   = seed+1
571 |     kp     = which(clustersF==cli)
572 |     kpSamp[kp[sample(1:length(kp),min(length(kp),subSamp))]] = TRUE
573 |   }
574 |   return(kpSamp)
575 | }
576 | 
577 | # Function to build the jittered layer plot + dendrogram
578 | build_layer_plot <- function(anno,
579 |                              dend,
580 |                              cocl,
581 |                              cluster_ids,
582 |                              seed_val = 42,
583 |                              right_pad = 10,
584 | 							 fillColor = c("#ECE09C","#F7F2DA","#A7D7DF","#C1E5E7","#D4EDED","#B3E3E3"),
585 | 							 textSize = 2,
586 | 							 maxPerCluster = Inf,  # How many cells to include per cluster (default is all)
587 | 							 seed = 1) {
588 |   
589 |   # cluster annotations
590 |   anno$cl <- anno$dendcluster_id <- anno$cluster_id
591 |   #anno$layer_label <- as.numeric(substr(anno$layer_label,2,2))  # NOT HELPFUL
592 |   
593 |   cluster_anno <- anno %>%
594 |     # dendcluster_id = dendrogram order
595 |     select(cl, dendcluster_id, cluster_id, cluster_label, cluster_color) %>%
596 |     unique()
597 |   
598 |   # layer annotations to retain for the plot
599 |   keep_layers <- c("L1","L2","L3","L4","L5","L6")
600 |   
601 |   # padding between layers
602 |   xpad <- 0.1
603 |   ypad <- 0.05
604 |   
605 |   # ranges to use in the y-dimension for jittering
606 |   layer_ranges <- data.frame(layer_label = rev(keep_layers),
607 |                              ymin = (1:6 - 1) + ypad,
608 |                              ymax = 1:6 - ypad)
609 |   
610 |   # filter for cells with the selected layer annotations
611 |   filtered_anno <- anno %>%
612 |     filter(cluster_id %in% cluster_ids) %>%
613 |     filter(layer_label %in% keep_layers)
614 |   
615 |   # Subsample cells
616 |   subSamp <- subsampleCells(filtered_anno$cluster_id,maxPerCluster,seed)
617 |   filtered_anno <- filtered_anno[subSamp,]
618 |   
619 |   # ranges to use in the x-dimension for jittering
620 |   cluster_ranges <- filtered_anno %>%
621 |     select(cluster_id, cluster_color, cluster_label, dendcluster_id) %>%
622 |     unique() %>%
623 |     arrange(dendcluster_id) %>%
624 |     mutate(xmin = 1:n() - 1 + xpad,
625 |            xmax = 1:n()     - xpad,
626 |            xmid = 1:n() - 0.5)
627 |   
628 |   set.seed(seed_val)
629 |   
630 |   # Assign random positions within the x and y limits
631 |   # for each cell, as defined above.
632 |   plot_data <- filtered_anno %>%
633 |     select(sample_id,
634 |            dendcluster_id, cluster_color, cluster_label,
635 |            layer_id, layer_color, layer_label) %>%
636 |     left_join(layer_ranges) %>%
637 |     left_join(cluster_ranges) %>%
638 |     group_by(dendcluster_id, layer_id) %>%
639 |     mutate(x = runif(n(),xmin + xpad, xmax - xpad),
640 |            y = runif(n(),ymin + ypad, ymax - ypad),
641 |            fill_color = cluster_color)
642 |   
643 |   # Layer background rectangles
644 |   layer_rects <- layer_ranges %>%
645 |     mutate(xmin = min(cluster_ranges$xmin) - xpad, xmax = max(cluster_ranges$xmax) + xpad) %>%
646 |     mutate(fill = fillColor)  # LAST COLOR ADDED
647 |   
648 |   # Cluster color highlights at bottom of the plot
649 |   cluster_rects <- cluster_ranges %>%
650 |     mutate(ymin = -ypad, ymax = ypad)
651 |   
652 |   # Filter the dendrogram for just the clusters that are present
653 |   # in the plot
654 |   prune_dend_labels <- labels(dend)[!labels(dend) %in% filtered_anno$cluster_label]
655 |   filtered_dend <- dend %>%
656 |     prune(prune_dend_labels)
657 |   # Convert with ggdend to segment coordinates, and rescale the plot
658 |   dend_seg <- as.ggdend(filtered_dend)$segments %>%
659 |     mutate(y = (y/max(y))*3 + max(layer_rects$ymax) + ypad,
660 |            yend = (yend/max(yend))*3 + max(layer_rects$ymax) + ypad,
661 |            x = x - 0.5,
662 |            xend = xend - 0.5)
663 |   dend_seg$col = "black"
664 |   dend_seg$lwd = 1
665 |   dend_seg$lty = 1
666 |   
667 |   # padding rectangle to align with the violin plots
668 |   pad_rect <- data.frame(ymin = min(layer_rects$ymin),
669 |                          ymax = max(layer_rects$ymax),
670 |                          xmin = max(layer_rects$xmax),
671 |                          xmax = max(layer_rects$xmax)*(1 + right_pad/100))
672 |   
673 |   p <- ggplot() +
674 |     # right side padding for alignment
675 |     # with the violin plots
676 |     geom_rect(data = pad_rect,
677 |               aes(xmin = xmin, xmax = xmax,
678 |                   ymin = ymin, ymax = ymax,
679 |                   fill = "#FFFFFF",
680 |                   color = "#FFFFFF")) +
681 |     # dendrogram segments
682 |     geom_segment(data = dend_seg,
683 |                  aes(x = x, xend = xend,
684 |                      y = y, yend = yend,
685 |                      size = lwd,
686 |                      color = col),
687 |                  lineend = "square") +
688 |     # layer background rectangles
689 |     geom_rect(data = layer_rects,
690 |               aes(xmin = xmin, xmax = xmax,
691 |                   ymin = ymin, ymax = ymax,
692 |                   fill = fill)) +
693 |     # cluster label rectangles
694 |     geom_rect(data = cluster_rects,
695 |               aes(xmin = xmin, xmax = xmax,
696 |                   ymin = ymin, ymax = ymax,
697 |                   fill = cluster_color)) +
698 |     geom_rect(data = cluster_rects,
699 |               aes(xmin = xmin, xmax = xmax,
700 |                   ymin = -2 - ypad, ymax = -2,
701 |                   fill = cluster_color)) +
702 |     # jittered cell points
703 |     geom_point(data = plot_data,
704 |                aes(x = x,
705 |                    y = y,
706 |                    color = cluster_color),
707 |                size = 0.1) +
708 |     # cluster label tag rectangles
709 |     geom_rect(data = cluster_ranges,
710 |               aes(xmin = xmid - 0.5 + xpad/2,
711 |                   xmax = xmid + 0.5 - xpad/2,
712 |                   ymax = 0 - ypad,
713 |                   ymin = -2),
714 |               fill = "#CAD7D7")+
715 |     # cluster label text
716 |     geom_text(data = cluster_ranges,
717 |               aes(x = xmid,
718 |                   y = -2 + ypad,
719 |                   label = cluster_label),
720 |               angle = 90,
721 |               vjust = 0.3,
722 |               hjust = 0,
723 |               size = textSize) +
724 |     # Plot settings
725 |     scale_color_identity() +
726 |     scale_size(range = c(0.5,1), guide = FALSE) +
727 |     scale_fill_identity() +
728 |     scale_y_continuous(limits = c(-2.1, 9)) +
729 |     scale_x_continuous(expand = c(0,0)) +
730 |     theme_void(base_size = 7) +
731 |     theme(text = element_text(size = 8),
732 |           legend.box.spacing = unit(0,"pt"))
733 |   
734 |   p
735 | }
736 | 


--------------------------------------------------------------------------------
/R/Support_violin_functions.R:
--------------------------------------------------------------------------------
  1 | #' Read data from a directory of feather files
  2 | #' 
  3 | get_feather_data <- function(feather_dir, genes, group_by, group_ids) {
  4 |   
  5 |   library(dplyr)
  6 |   library(feather)
  7 |   
  8 |   data_file <- paste0(feather_dir, "/data.feather")
  9 |   anno_file <- paste0(feather_dir, "/anno.feather")
 10 |   
 11 |   data <- feather(data_file)
 12 |   
 13 |   # Read annotations and convert factors
 14 |   anno <- read_feather(anno_file) %>%
 15 |     mutate_if(is.factor, as.character)
 16 |   
 17 |   # If an _id column was a factor, it's now a character. Convert to numeric for sorting.
 18 |   id_cols <- names(anno)[grepl("_id$", names(anno)) & names(anno) != "sample_id"]
 19 |   anno[id_cols] <- lapply(anno[id_cols], as.numeric)
 20 |   
 21 |   # Check the provided genes against the column names in data_file
 22 |   data_names <- names(data)
 23 |   
 24 |   if(sum(genes %in% data_names) != length(genes)) {
 25 |     # Report if names don't match after ignorning case
 26 |     not_found <- genes[!toupper(genes) %in% toupper(data_names)]
 27 |     
 28 |     warning(paste(paste0(not_found, collapse = ", "), "not found in feather data!"))
 29 |     
 30 |     # Update genes to use names as formatted in data
 31 |     genes <- data_names[toupper(data_names) %in% toupper(genes)]
 32 |   }
 33 |   
 34 |   # Find column indexes for sample_id and the matched genes
 35 |   # This seems to be faster than calling data[,c("sample_id",genes)] directly
 36 |   data_cols <- which(data_names %in% c("sample_id", genes))
 37 |   
 38 |   # Read the data from the data feather file into memory
 39 |   gene_data <- data[,data_cols]
 40 |   
 41 |   # Change - to . in column names and genes
 42 |   colnames(gene_data) <- gsub("-",".",colnames(gene_data))
 43 |   genes <- gsub("-",".",genes)
 44 |   
 45 |   # rename the _id, _label, and _color for the group_by values for use in plotting
 46 |   all_anno <- anno %>%
 47 |     rename_("plot_id" = paste0(group_by,"_id"),
 48 |             "plot_label" = paste0(group_by,"_label"),
 49 |             "plot_color" = paste0(group_by,"_color"))
 50 |   
 51 |   # use the group_ids to retain the order provided by the group_ids argument
 52 |   cluster_order <- data.frame(group_ids = group_ids) %>%
 53 |     mutate(cluster_x = 1:n())
 54 |   
 55 |   # Filter and order the rows
 56 |   data <- left_join(all_anno, gene_data, by = "sample_id") %>%
 57 |     filter(plot_id %in% group_ids) %>%
 58 |     left_join(cluster_order, by = c("plot_id" = "group_ids")) %>%
 59 |     arrange(cluster_x) %>%
 60 |     mutate(xpos = 1:n()) %>%
 61 |     select(-plot_id) %>%
 62 |     rename_("plot_id" = "cluster_x")
 63 |   
 64 |   return(data)
 65 | }
 66 | 
 67 | #' Convert integers to scientific notation labels
 68 | #' 
 69 | #' @param in_num a numeric vector
 70 | #' @param sig_figs a number indicating how many significant figures should be displayed.
 71 | #' @return a character vector with numeric values reformatted in 1.2E3 format
 72 | #' 
 73 | #' @examples
 74 | #' my_numbers <- c(100,15.359,32687,.000468)
 75 | #' 
 76 | #' sci_label(my_numbers)
 77 | #' 
 78 | #' sci_label(my_numbers,sig_figs=3)
 79 | sci_label <- function(in_num, sig_figs = 2, type = "plot") {
 80 |   labels <- character()
 81 |   for(i in 1:length(in_num)) {
 82 |     x <- in_num[i]
 83 |     if(x == 0) {
 84 |       first <- paste0("0", ".", paste0(rep("0", sig_figs - 1), collapse="") )
 85 |     } else if(log10(x) %% 1 == 0) {
 86 |       first <- substr(x, 1, 1)
 87 |       if(sig_figs > 1) {
 88 |         first <- paste0(first, ".", paste0(rep("0", sig_figs - 1), collapse=""))
 89 |       }
 90 |     } else {
 91 |       first <- round(x / (10 ^ floor(log10(x))), sig_figs - 1)
 92 |     }
 93 |     if(x == 0) {
 94 |       if(type == "plot") {
 95 |         label <- paste0(first, "%*%10^0" )
 96 |       } else if(type == "datatable") {
 97 |         label <- paste0(first, "\u271510<sup>0</sup>" )
 98 |       }
 99 |     } else {
100 |       if(type == "plot") {
101 |         label <- paste0(first, "%*%10^", floor(log10(x)))
102 |       } else if(type == "datatable") {
103 |         label <- paste0(first, "\u271510<sup>", floor(log10(x)),"</sup>")
104 |       }
105 |     }
106 |     labels <- c(labels, label)
107 |   }
108 |   return(labels)
109 | }
110 | 
111 | #' Convert font sizes in pt to mm
112 | #' @param pt A numeric font size in pt.
113 | #' @return A numeric font size in mm.
114 | #' 
115 | #' @examples
116 | #' pt2mm(12)
117 | #' 
118 | #' ggplot(mtcars) +
119 | #'   geom_text(aes(x = mpg, y = wt, label = rownames(mtcars)),
120 | #'             size = pt2mm(7))
121 | pt2mm <- function(pt) {
122 |   mm <- pt / 2.834645669
123 |   return(mm)
124 | }
125 | 
126 | #' Build colorful, rectangular labels for plot headers in plot space
127 | #' 
128 | build_header_labels <- function(data, ngenes, nsamples, nclust, labelheight = 25, labeltype = "simple") {
129 |   
130 |   # Three label types: 
131 |   # simple, which is for use with cluster-based plots
132 |   # angle, for cell-based plots with "angle"-type polygonal labels
133 |   # square, for cell-based plots with "square"-type labels
134 |   
135 |   ## Note on plot dimensions
136 |   # The range of the plot area (not including labels) will be
137 |   # y-axis: 1:(ngenes + 1)
138 |   # x-axis: 0:(nsamples) (for cell-based plots)
139 |   # x-axis: 1:(nclust + 1) (for cluster-based plots)
140 |   
141 |   labheight <- ngenes*(labelheight/100)/(1-labelheight/100)
142 |   
143 |   data <- data %>%
144 |     group_by(plot_id,plot_label,plot_color) %>%
145 |     summarise(minx = min(xpos),
146 |               maxx = max(xpos))
147 |   
148 |   if(labeltype == "simple") {
149 |     xlab.rect <- data.frame(xmin = 1:nclust - 0.5,
150 |                             xmax = 1:nclust + 0.5,
151 |                             ymin = ngenes + 1,
152 |                             ymax = ngenes + 1 + labheight,
153 |                             color = data$plot_color,
154 |                             label = data$plot_label )
155 |   }
156 |   
157 |   if(labeltype == "angle") {
158 |     xlab.rect <- data.frame(xmin = (nsamples) * (1:nclust - 1) / nclust,
159 |                             xmax = (nsamples) * (1:nclust) / nclust,
160 |                             # 10% of the label height is reserved for angled polygons
161 |                             ymin = ngenes + 1 + labheight*0.1,
162 |                             ymax = ngenes + 1 + labheight,
163 |                             color = data$plot_color,
164 |                             label = data$plot_label )
165 |   }
166 |   if(labeltype == "square") {
167 |     xlab.rect <- data %>% 
168 |       group_by(plot_id) %>%
169 |       summarise(xmin = minx - 1,
170 |                 xmax = maxx,
171 |                 ymin = ngenes + 1 + labheight * 0.1,
172 |                 ymax = ngenes + 1 + labheight,
173 |                 color = plot_color[1],
174 |                 label = plot_label[1])
175 |   }
176 |   
177 |   xlab.rect  
178 | }
179 | 
180 | 
181 | #' Violin plots of gene expression for clusters
182 | #' 
183 | #' This function will generate plots similar to Figure 1c of Tasic, et al. (2015).
184 | #' Warning: this is currently only able to work with internally-supplied datasets (v1_data and v1_anno).
185 | #' Extension to user-supplied datasets will come soon.
186 | #' 
187 | #' @param genes A character vector containing gene symbols to be plotted
188 | #' @param clusters A numeric vector containing clusters to plot (for v1_anno, the range is 1:49)
189 | #' @param data_source A character object defining where the data is stored. Currently only works with "internal"
190 | #' @param logscale Logical object, determines if data is log scaled before plotting.
191 | #' @param fontsize numeric object, the font size (in pts) used to make the plot.
192 | #' @param labelheight numeric object, Percent of the plot height that should be used for the labels (0 to 100).
193 | #' 
194 | #' @return a ggplot2 plot object
195 | #' 
196 | #' @examples
197 | #' pottery_plot()
198 | #' 
199 | #' my_genes <- c("Ercc6","Ercc8","Trp53","Pgbd5")
200 | #' my_clusters <- c(1,5,9,10,24,37)
201 | #' pottery_plot(my_genes,my_clusters,logscale=T,fontsize=14)
202 | group_violin_plot <- function(genes = c("Hspa8","Snap25","Gad2","Vip"),
203 |                               group_by = "cluster", clusters = 1:10,
204 |                               data_source,
205 |                               sort = F, logscale = F, showcounts = T, rotatecounts = F,
206 |                               fontsize = 7, labelheight = 25,
207 |                               max_width = 10) {
208 |   library(dplyr)
209 |   library(feather)
210 |   library(ggplot2)
211 |   
212 |   genes <- rev(genes)
213 |   
214 |   # get_feather_data() from data_formatting.R
215 |   data <- get_feather_data(data_source,genes,group_by,clusters)
216 |   
217 |   genes <- genes[genes %in% names(data)]
218 |   
219 |   data <- data %>%
220 |     select(-xpos) %>%
221 |     mutate(xpos = plot_id)
222 |   
223 |   genes <- sub("-",".",genes)
224 |   genes[grepl("^[0-9]",genes)] <- paste0("X",genes[grepl("^[0-9]",genes)])
225 |   names(data)[grepl("^[0-9]",genes)] <- paste0("X",names(data)[grepl("^[0-9]",genes)])
226 |   
227 |   ngenes <- length(genes)
228 |   nclust <- length(unique(data$plot_id))
229 |   nsamples <- nrow(data)
230 |   
231 |   # Compute maximum values before scaling to plot space
232 |   max_vals <- data %>% 
233 |     select(one_of(genes)) %>% 
234 |     summarise_all(max) %>% 
235 |     unlist()
236 |   
237 |   # Variance injection
238 |   # geom_violin() requires some variance, so I add a vanishingly small random number to each data value
239 |   data[genes] <- data[genes] + runif(nrow(data),0,0.00001)
240 |   
241 |   # Scale the data between i and i + 0.9
242 |   for(i in 1:length(genes)) {
243 |     gene <- genes[i]
244 |     gene_max <- max_vals[i]
245 |     if(logscale) {
246 |       data[gene] <- log10(data[gene] + 1) / log10(gene_max + 1) * 0.9 + i
247 |     } else {
248 |       data[gene] <- data[gene] / gene_max * 0.9 + i
249 |     }
250 |   }
251 |   
252 |   header_labels <- build_header_labels(data = data, ngenes = ngenes, nsamples = 1, nclust = nclust, labelheight = labelheight, labeltype = "simple")
253 |   
254 |   # Build the maximum value labels for the right edge
255 |   max_labels <- data.frame(x = (nclust + 0.5) * 1.01,
256 |                            y = 1:ngenes + 0.5,
257 |                            label = sci_label(max_vals) )
258 |   max_header <- data.frame(x = (nclust + 0.5) * 1.01,
259 |                            y = ngenes + 1,
260 |                            label = "Max value")
261 |   max_width <- nclust*(max_width/100)/(1-max_width/100)
262 |   
263 |   label_y_size <- max(header_labels$ymax) - min(header_labels$ymin)
264 |   
265 |   cluster_data <- data %>%
266 |     group_by(plot_label,plot_color,plot_id) %>%
267 |     summarise(cn=n()) %>%
268 |     as.data.frame(stringsAsFactors=F) %>%
269 |     arrange(plot_id) %>%
270 |     mutate(labely = ngenes + label_y_size*0.05,
271 |            cny = max(header_labels$ymax) - 0.1*label_y_size,
272 |            xpos = plot_id)
273 |   
274 |   # Plot setup
275 |   p <- ggplot() +
276 |     scale_fill_identity() +
277 |     scale_y_continuous("", breaks = 1:length(genes) + 0.45, labels = genes, expand = c(0, 0)) +
278 |     scale_x_continuous("", expand = c(0, 0)) +
279 |     theme_classic(fontsize) +
280 |     theme(axis.text = element_text(size = rel(1)),
281 |           axis.text.x = element_blank(),
282 |           axis.ticks.x = element_blank(),
283 |           legend.position = "none") +
284 |     geom_hline(aes(yintercept = 1:(ngenes)), size = 0.2)
285 |   
286 |   # plot the violins for each gene
287 |   for(i in 1:length(genes)) {
288 |     p <- p + 
289 |       geom_violin(data = data,
290 |                   aes_string(x = "xpos", y = genes[i], fill = "plot_color"),
291 |                   scale = "width", adjust = 2) +
292 |       stat_summary(data = data,
293 |                    aes_string(x = "xpos", y = genes[i]),
294 |                    fun.y = "median", fun.ymin = "median", fun.ymax = "median", geom = "point", size = 0.7)
295 |   }
296 |   
297 |   # Cluster labels
298 |   p <- p +
299 |     geom_rect(data = header_labels, 
300 |               aes(xmin = xmin, ymin = ymin, xmax = xmax, ymax = ymax, fill = color)) +
301 |     geom_text(data = header_labels,
302 |               aes(x = (xmin + xmax) / 2, y = ymin + 0.05, label = label),
303 |               angle = 90, vjust = 0.35, hjust = 0, size = pt2mm(fontsize)) +
304 |     # Maximum value labels on right side of plot
305 |     geom_rect(aes(xmin = nclust + 0.5, xmax = nclust + 0.5 + max_width, ymin = 1, ymax = max(header_labels$ymax)),
306 |               fill = "white") +
307 |     geom_text(data = max_header,
308 |               aes(x = x, y = y, label = label),
309 |               angle = 90, hjust = 0, vjust = 0.35, size = pt2mm(fontsize)) +
310 |     geom_text(data = max_labels,
311 |               aes(x = x, y = y, label = label),
312 |               hjust = 0, vjust = 0.35, size = pt2mm(fontsize), parse = TRUE)
313 |   
314 |   # Cluster counts
315 |   if(showcounts) {
316 |     if(rotatecounts) {
317 |       p <- p + geom_text(data = cluster_data,
318 |                          aes(y = cny, x = xpos, label = cn),
319 |                          angle = 90,
320 |                          vjust = 0.35,
321 |                          hjust = 1,
322 |                          size = pt2mm(fontsize))
323 |     } else {
324 |       p <- p + geom_text(data = cluster_data,
325 |                          aes(y = cny, x = xpos,label = cn),
326 |                          size = pt2mm(fontsize))
327 |     }
328 |   }
329 |   
330 |   p
331 | }
332 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Human cortical cell types
 2 | Code to generate figures for the manuscript "Conserved cell types with divergent features in human versus mouse cortex" describing transcriptomic cell types in adult human neocortex.
 3 | 
 4 | ## Level of support
 5 | We are not currently supporting this code, but simply releasing it to the community AS IS but are not able to provide any guarantees of support. The community is welcome to submit issues, but you should not expect an active response.
 6 | 
 7 | ## Notes
 8 | * Run "Code_0_prepareComparisonDataSets" and then "Code_1_buildDendrogram" prior to running other scripts, which can be run in any order (Support... functions do not need to be run)
 9 | * The two "Code_metaNeighbor" take several hours to several days to run, while the other scripts are relatively fast
10 | * Several files need to be downloaded and unzipped prior to running code, as explained in "Code_0_prepareComparisonDataSets" and "Code_correlationWithOtherDatasets.r"
11 | 


--------------------------------------------------------------------------------
/data/FinalHumanMTGclusterAnnotation_update.csv:
--------------------------------------------------------------------------------
 1 | final cluster,HGT,class_old,final tree order,final cluster color,id,name,size,type,layer_wt_mean,layer_enriched,tree order,level1,level1_color,level2,level2_color,level3,level3_color,mouse_cl_for_color,mouse_color,mouse_order
 2 | Inh L1-2 PAX6 CDH12,LAMP5/PAX6/Other,L1/2 ADARB2,1,#DDACC9,14,cl14_i90_GRPR_Ndnf.Cxcl14_L1.4,90,inh,1.4,1,1,GABAergic neuron,#C95170,ADARB2,#C456AA,LAMP5/PAX6/Other,#E37B97,Lamp5 Pax6,#DDACC9,1
 3 | Inh L1-2 PAX6 TNFAIP8L3,LAMP5/PAX6/Other,L1/2 ADARB2,2,#FF88AD,46,cl46_i16_DKFZp686K1684_Ndnf.Cxcl14_L2.0,16,inh,2,2,2,GABAergic neuron,#C95170,ADARB2,#C456AA,LAMP5/PAX6/Other,#E37B97,Lamp5 Egln3_1,#FF88AD,2
 4 | Inh L1 SST NMBR,LAMP5/PAX6/Other,L1/2 LAMP5/SST,3,#E67B73,3,cl3_i283_NMBR_Ndnf.Car4_L1.3,283,inh,1.3,1,3,GABAergic neuron,#C95170,ADARB2,#C456AA,LAMP5/PAX6/Other,#E37B97,Lamp5 Egln3_2,#E67B73,3
 5 | Inh L1-4 LAMP5 LCP2,LAMP5/PAX6/Other,LAMP5 (Rosehip),4,#FFA388,1,cl1_i356_LCP2_Ndnf.Car4_L3.2,356,inh,3.2,3b,5,GABAergic neuron,#C95170,ADARB2,#C456AA,LAMP5/PAX6/Other,#E37B97,Lamp5 Slc35d3,#FFA388,6
 6 | Inh L1-2 LAMP5 DBP,LAMP5/PAX6/Other,L1/2 LAMP5/SST,5,#FF7466,2,cl2_i21_DBP_Ndnf.Car4_L1.2,21,inh,1.2,1,4,GABAergic neuron,#C95170,ADARB2,#C456AA,LAMP5/PAX6/Other,#E37B97,Lamp5 Vax1,#FF7466,7
 7 | Inh L2-6 LAMP5 CA1,LAMP5/PAX6/Other,LAMP5/LHX6 (IGTP),6,#C77963,4,cl4_i256_TMEM255A_Ndnf.Car4_L4.7,256,inh,4.7,5a,6,GABAergic neuron,#C95170,ADARB2,#C456AA,LAMP5/PAX6/Other,#E37B97,Lamp5 Lhx6,#C77963,8
 8 | Inh L1 SST CHRNA4,LAMP5/PAX6/Other,L1/2 ADARB2,7,#DD6091,29,cl29_i52_CHRNA4_Ndnf.Cxcl14_L1.1,52,inh,1.1,1,7,GABAergic neuron,#C95170,ADARB2,#C456AA,LAMP5/PAX6/Other,#E37B97,Lamp5 Egln3_3,#DD6091,4
 9 | Inh L1-2 GAD1 MC4R,LAMP5/PAX6/Other,L1/2 ADARB2,8,#FF7290,13,cl13_i107_MC4R_Ndnf.Cxcl14_L1.4,107,inh,1.4,1,8,GABAergic neuron,#C95170,ADARB2,#C456AA,LAMP5/PAX6/Other,#E37B97,Lamp5 Pdlim5,#FF7290,5
10 | Inh L1-2 SST BAGE2,LAMP5/PAX6/Other,L1/2 ADARB2,9,#9440F3,12,cl12_i108_NPIPB11_Ndnf.Cxcl14_L1.5,108,inh,1.5,1,9,GABAergic neuron,#C95170,ADARB2,#C456AA,LAMP5/PAX6/Other,#E37B97,Sncg Slc17a8,#9440F3,13
11 | Inh L1-3 PAX6 SYT6,VIP,VIP/ADARB2,10,#9900B3,39,cl39_i29_SYT6_Ndnf.Cxcl14_L2.5,29,inh,2.5,2,12,GABAergic neuron,#C95170,ADARB2,#C456AA,VIP,#A531BD,Sncg Ptprk,#9900B3,11
12 | Inh L1-2 VIP TSPAN12,VIP,VIP/ADARB2,11,#6C00BF,30,cl30_i42_TSPAN12_Vip.Mybpc1_L1.5,42,inh,1.5,1,10,GABAergic neuron,#C95170,ADARB2,#C456AA,VIP,#A531BD,Sncg Col14a1,#6C00BF,10
13 | Inh L1-4 VIP CHRNA6,VIP,VIP/ADARB2,12,#7A0099,42,cl42_i25_CHRNA6_Vip.Mybpc1_L2.6,25,inh,2.6,3a,11,GABAergic neuron,#C95170,ADARB2,#C456AA,VIP,#A531BD,Sncg Crispld2,#7A0099,12
14 | Inh L1-3 VIP ADAMTSL1,VIP,VIP/ADARB2,13,#7779BF,18,cl18_i72_ZCCHC12_Vip.Mybpc1_L2.7,72,inh,2.7,3a,13,GABAergic neuron,#C95170,ADARB2,#C456AA,VIP,#A531BD,Vip Igfbp6,#7779BF,14
15 | Inh L1-4 VIP PENK,VIP,VIP/ADARB2,14,#3C3D73,47,cl47_i17_PENK_Vip.Mybpc1_L3.2,17,inh,3.2,3b,14,GABAergic neuron,#C95170,ADARB2,#C456AA,VIP,#A531BD,Vip Reln,#3C3D73,15
16 | Inh L2-6 VIP QPCT,VIP,VIP/ADARB2,15,#A700FF,45,cl45_i37_GPX1_Vip.Chat_L4.3,37,inh,4.3,4,25,GABAergic neuron,#C95170,ADARB2,#C456AA,VIP,#A531BD,Vip Serpinf1,#A700FF,9
17 | Inh L3-6 VIP HS3ST3A1,VIP,VIP/ADARB2,16,#FF00FF,26,cl26_i80_BSPRY_Vip.Chat_L4.0,80,inh,4,4,24,GABAergic neuron,#C95170,ADARB2,#C456AA,VIP,#A531BD,Vip Rspo4,#FF00FF,19
18 | Inh L1-2 VIP PCDH20,VIP,VIP/ADARB2,17,#BD3D9A,22,cl22_i61_PCDH20_Vip.Mybpc1_L1.7,61,inh,1.7,2,23,GABAergic neuron,#C95170,ADARB2,#C456AA,VIP,#A531BD,Vip C1ql1,#BD3D9A,23
19 | Inh L2-5 VIP SERPINF1,VIP,VIP/ADARB2,18,#B09FFF,24,cl24_i55_SERPINF1_Vip.Mybpc1_L3.6,55,inh,3.6,4,26,GABAergic neuron,#C95170,ADARB2,#C456AA,VIP,#A531BD,Vip Gpc3,#B09FFF,16
20 | Inh L2-5 VIP TYR,VIP,VIP/ADARB2,19,#FF4DC1,23,cl23_i62_EREG_Vip.Mybpc1_L3.9,62,inh,3.9,4,27,GABAergic neuron,#C95170,ADARB2,#C456AA,VIP,#A531BD,Vip Htr1f,#FF4DC1,22
21 | Inh L1-3 VIP CHRM2,VIP,VIP/ADARB2,20,#9FAAFF,9,cl9_i175_HRH3_Vip.Chat_L2.4,175,inh,2.4,2,17,GABAergic neuron,#C95170,ADARB2,#C456AA,VIP,#A531BD,Vip Lmo1_1,#9FAAFF,17
22 | Inh L2-4 VIP CBLN1,VIP,VIP/ADARB2,21,#756FB3,20,cl20_i67_CBLN1_Vip.Chat_L3.0,67,inh,3,3b,18,GABAergic neuron,#C95170,ADARB2,#C456AA,VIP,#A531BD,Vip Lmo1_2,#756FB3,18
23 | Inh L1-3 VIP CCDC184,VIP,VIP/ADARB2,22,#FF00B3,37,cl37_i64_CCDC184_Vip.Chat_L2.5,64,inh,2.5,2,16,GABAergic neuron,#C95170,ADARB2,#C456AA,VIP,#A531BD,Vip Cbln4_2,#FF00B3,21
24 | Inh L1-3 VIP GGH,VIP,VIP/ADARB2,23,#B3128A,19,cl19_i68_GGH_Vip.Chat_L2.5,68,inh,2.5,2,15,GABAergic neuron,#C95170,ADARB2,#C456AA,VIP,#A531BD,Vip Cbln4_1,#B3128A,20
25 | Inh L1-2 VIP LBH,VIP,VIP/ADARB2,24,#AF00E6,32,cl32_i47_LBH_Vip.Chat_L1.7,47,inh,1.7,2,19,GABAergic neuron,#C95170,ADARB2,#C456AA,VIP,#A531BD,Vip Crispld2_1,#AF00E6,24
26 | Inh L2-3 VIP CASC6,VIP,VIP/ADARB2,25,#992E81,34,cl34_i45_CASC6_Vip.Mybpc1_L2.5,45,inh,2.5,2,20,GABAergic neuron,#C95170,ADARB2,#C456AA,VIP,#A531BD,Vip Crispld2_2,#992E81,25
27 | Inh L2-4 VIP SPAG17,VIP,VIP/ADARB2,26,#A711C1,36,cl36_i33_LGI2_Vip.Chat_L2.8,33,inh,2.8,3a,21,GABAergic neuron,#C95170,ADARB2,#C456AA,VIP,#A531BD,Vip Crispld2_1,#AF00E6,24
28 | Inh L1-4 VIP OPRM1,VIP,VIP/ADARB2,27,#9F219D,35,cl35_i52_YWHAZP2_Vip.Mybpc1_L3.0,52,inh,3,3b,22,GABAergic neuron,#C95170,ADARB2,#C456AA,VIP,#A531BD,Vip Crispld2_2,#992E81,25
29 | Inh L3-6 SST NPY,SST,SST/LHX6,28,#FFDA50,50,cl50_i15_NPY_Sst.Chodl_L5.0,15,inh,5,5a,45,GABAergic neuron,#C95170,LHX6,#CE4D36,SST,#C06829,Sst Chodl,#FFFF00,26
30 | Inh L3-6 SST HPGD,SST,SST/LHX6,29,#D9C566,25,cl25_i60_HPGD_Sst.Tacstd2_L4.9,60,inh,4.9,5a,32,GABAergic neuron,#C95170,LHX6,#CE4D36,SST,#C06829,Sst Etv1_1,#D9C566,31
31 | Inh L4-6 SST B3GAT2,SST,SST/LHX6,30,#FFBB33,17,cl17_i182_NIPAL3_Sst.Tacstd2_L4.8,182,inh,4.8,5a,34,GABAergic neuron,#C95170,LHX6,#CE4D36,SST,#C06829,Sst Etv1_2,#FFBB33,32
32 | Inh L5-6 SST KLHDC8A,SST,SST/LHX6,31,#C11331,16,cl16_i63_KLHDC8A_Sst.Tacstd2_L5.2,63,inh,5.2,5b,35,GABAergic neuron,#C95170,LHX6,#CE4D36,SST,#C06829,Sst Crhr2_1,#C11331,40
33 | Inh L5-6 SST NPM1P10,SST,SST/LHX6,32,#BF8219,27,cl27_i79_NPM1P10_Sst.Tacstd2_L5.4,79,inh,5.4,5b,37,GABAergic neuron,#C95170,LHX6,#CE4D36,SST,#C06829,Sst Tac2,#BF8219,36
34 | Inh L4-6 SST GXYLT2,SST,SST/LHX6,33,#802600,33,cl33_i41_IRS4_Sst.Tacstd2_L5.1,41,inh,5.1,5b,36,GABAergic neuron,#C95170,LHX6,#CE4D36,SST,#C06829,Sst C1ql3,#802600,37
35 | Inh L4-5 SST STK32A,SST,SST/LHX6,34,#806B19,15,cl15_i93_NMU_Sst.Tacstd2_L4.3,93,inh,4.3,4,33,GABAergic neuron,#C95170,LHX6,#CE4D36,SST,#C06829,Sst Hpse_2,#806B19,39
36 | Inh L1-3 SST CALB1,SST,SST/LHX6,35,#BF480D,10,cl10_i279_CALB1_Sst.Cbln4_L2.4,279,inh,2.4,2,29,GABAergic neuron,#C95170,LHX6,#CE4D36,SST,#C06829,Sst Etv1_3,#BF480D,33
37 | Inh L3-5 SST ADGRG6,SST,SST/LHX6,36,#804811,11,cl11_i132_ADGRG6_Sst.Cbln4_L3.6,132,inh,3.6,4,30,GABAergic neuron,#C95170,LHX6,#CE4D36,SST,#C06829,Sst Etv1_4,#804811,34
38 | Inh L2-4 SST FRZB,SST,SST/LHX6,37,#FF554D,6,cl6_i64_NOS1_Pvalb.Wt1_L3.2,64,inh,3.2,3b,31,GABAergic neuron,#C95170,LHX6,#CE4D36,SST,#C06829,Pvalb Prdm8,#FF554D,46
39 | Inh L5-6 SST TH,SST,SST/LHX6,38,#A81111,40,cl40_i27_TH_Sst.Tacstd2_L5.3,27,inh,5.3,5b,38,GABAergic neuron,#C95170,LHX6,#CE4D36,SST,#C06829,Sst Nts,#A81111,42
40 | Inh L5-6 GAD1 GLP1R,PVALB,L5/6 LHX6,39,#FF2F7E,41,cl41_i27_GLP1R_Pvalb.Wt1_L5.7,27,inh,5.7,6a,44,GABAergic neuron,#C95170,LHX6,#CE4D36,PVALB,#DB3143,Pvalb Th,#FF2F7E,44
41 | Inh L5-6 PVALB LGR5,PVALB,PVALB,40,#ED4C50,28,cl28_i52_CKMT2_Pvalb.Wt1_L5.4,52,inh,5.4,5b,43,GABAergic neuron,#C95170,LHX6,#CE4D36,PVALB,#DB3143,Pvalb Gabrg1,#ED4C50,43
42 | Inh L4-5 PVALB MEPE,PVALB,PVALB,41,#994C00,7,cl7_i64_MEPE_Pvalb.Wt1_L4.9,64,inh,4.9,5a,41,GABAergic neuron,#C95170,LHX6,#CE4D36,PVALB,#DB3143,Pvalb Il1rapl2,#994C00,35
43 | Inh L2-4 PVALB WFDC2,PVALB,PVALB,42,#BC2B11,8,cl8_i387_WFDC2_Pvalb.Wt1_L3.2,387,inh,3.2,3b,39,GABAergic neuron,#C95170,LHX6,#CE4D36,PVALB,#DB3143,Pvalb Calb1,#BC2B11,45
44 | Inh L4-6 PVALB SULF1,PVALB,PVALB,43,#E62A5D,5,cl5_i167_SULF1_Pvalb.Wt1_L4.8,167,inh,4.8,5a,40,GABAergic neuron,#C95170,LHX6,#CE4D36,PVALB,#DB3143,Pvalb Gpr149,#E62A5D,47
45 | Inh L5-6 SST MIR548F2,PVALB,SST/LHX6,44,#D6221D,31,cl31_i80_ACADL_Pvalb.Wt1_L5.3,80,inh,5.3,5b,42,GABAergic neuron,#C95170,LHX6,#CE4D36,PVALB,#DB3143,Pvalb Reln,#D6221D,48
46 | Inh L2-5 PVALB SCUBE3,PVALB,PVALB (Chandelier),45,#FF197F,38,cl38_i32_NOG_Pvalb.Wt1_L3.2,32,inh,3.2,3b,28,GABAergic neuron,#C95170,LHX6,#CE4D36,PVALB,#DB3143,Pvalb Vipr2,#FF197F,49
47 | Exc L2 LAMP5 LTK,Layer 2/3,Superficial Layers,46,#D9F077,52,cl52_e812_LTK_L5b.Cdh13_L2.0,812,exc,2,2,47,Glutamatergic neuron,#3EA185,Superficial Layers,#3ECB88,Superficial Layers,#3ECB88,VISp L2/3 IT Cdh13_1,#D9F077,50
48 | Exc L2-4 LINC00507 GLP2R,Layer 2/3,Superficial Layers,47,#A6E6A9,53,cl53_e170_GLP2R_L5b.Cdh13_L3.0,170,exc,3,3b,48,Glutamatergic neuron,#3EA185,Superficial Layers,#3ECB88,Superficial Layers,#3ECB88,VISp L2/3 IT Cdh13_2,#A6E6A9,51
49 | Exc L2-3 LINC00507 FREM3,Layer 2/3,Superficial Layers,48,#7AE6AB,55,cl55_e2284_FREM3_L5b.Cdh13_L2.8,2284,exc,2.8,3a,46,Glutamatergic neuron,#3EA185,Superficial Layers,#3ECB88,Superficial Layers,#3ECB88,VISp L2/3 IT Agmat,#7AE6AB,52
50 | Exc L5-6 THEMIS C1QL3,Layer 6 IT,Deep Layers,49,#A19922,60,cl60_e1537_KCTD4_L2.Ngb_L5.7,1537,exc,5.7,6a,49,Glutamatergic neuron,#3EA185,Deep Layers,#3E7782,Deep Layers,#3E7782,VISp L6 IT Cd52,#A19922,69
51 | Exc L3-4 RORB CARM1P1,Layer 4,Superficial Layers,50,#00979D,63,cl63_e280_CARM1P1_L5b.Samd3_L3.4,280,exc,3.4,3b,50,Glutamatergic neuron,#3EA185,Superficial Layers,#3ECB88,Superficial Layers,#3ECB88,VISp L4 Rspo1,#00979D,56
52 | Exc L3-5 RORB ESR1,Layer 4,Superficial Layers,51,#00DDC5,56,cl56_e1428_TPBG_L4.Sparcl1_L4.1,1428,exc,4.1,4,54,Glutamatergic neuron,#3EA185,Superficial Layers,#3ECB88,Superficial Layers,#3ECB88,VISp L5 Endou,#00DDC5,57
53 | Exc L3-5 RORB COL22A1,Layer 4,Superficial Layers,52,#00A79D,67,cl67_e160_GPR26_L6.Syt17_L4.0,160,exc,4,4,53,Glutamatergic neuron,#3EA185,Superficial Layers,#3ECB88,Superficial Layers,#3ECB88,ALM L5 IT Wfdc18,#00A79D,63
54 | Exc L3-5 RORB FILIP1L,Layer 4,Superficial Layers,53,#0094C2,62,cl62_e153_UBE2E3_L5a.Deptor_Pacsin2_L3.7,153,exc,3.7,4,51,Glutamatergic neuron,#3EA185,Superficial Layers,#3ECB88,Superficial Layers,#3ECB88,ALM L5 IT Olfr78,#0094C2,62
55 | Exc L3-5 RORB TWIST2,Layer 4,Superficial Layers,54,#77D9B7,72,cl72_e93_CENPCP1_L5a.Deptor_Pacsin2_L4.0,93,exc,4,4,52,Glutamatergic neuron,#3EA185,Superficial Layers,#3ECB88,Superficial Layers,#3ECB88,ALM L5 IT Wfdc17,#77D9B7,64
56 | Exc L4-5 RORB FOLH1B,Layer 5 IT,Superficial Layers,55,#00A809,58,cl58_e870_FOLH1B_L5a.Deptor_Pacsin2_L4.5,870,exc,4.5,4,55,Glutamatergic neuron,#3EA185,Superficial Layers,#3ECB88,Superficial Layers,#3ECB88,VISp L5 IT Batf3,#00A809,59
57 | Exc L4-6 RORB SEMA3E,Layer 5 IT,Deep Layers,56,#26FFF2,57,cl57_e777_MKX_L5a.Deptor_Pacsin2_L4.6,777,exc,4.6,5a,57,Glutamatergic neuron,#3EA185,Deep Layers,#3E7782,Deep Layers,#3E7782,VISp L5 IT Hsd11b1,#26FFF2,58
58 | Exc L4-5 RORB DAPK2,Layer 5 IT,Superficial Layers,57,#00FF00,71,cl71_e173_LNX2_L5a.Deptor_Pacsin2_L4.2,173,exc,4.2,4,56,Glutamatergic neuron,#3EA185,Superficial Layers,#3ECB88,Superficial Layers,#3ECB88,VISp L5 IT Col6a1,#00FF00,60
59 | Exc L5-6 RORB TTC12,Layer 5 IT,Deep Layers,58,#26BF64,66,cl66_e167_SPAG6_L5a.Deptor_Pacsin2_L5.1,167,exc,5.1,5b,59,Glutamatergic neuron,#3EA185,Deep Layers,#3E7782,Deep Layers,#3E7782,VISp L5 IT Pld5,#26BF64,61
60 | Exc L4-6 RORB C1R,Layer 5 IT,Deep Layers,59,#008F1F,68,cl68_e160_C1R_L5a.Deptor_Pacsin2_L5.0,160,exc,5,5a,58,Glutamatergic neuron,#3EA185,Deep Layers,#3E7782,Deep Layers,#3E7782,ALM L5 IT Aldh1a7_1,#008F1F,65
61 | Exc L4-5 FEZF2 SCN4B,Layer 5 PT,Deep Layers,60,#104F00,78,cl78_e25_SCN4B_L5b.Cdh13_L4.7,25,exc,4.7,5a,60,Glutamatergic neuron,#3EA185,Deep Layers,#3E7782,Deep Layers,#3E7782,VISp L5 PT Bmp5,#104F00,75
62 | Exc L5-6 THEMIS DCSTAMP,Layer 6 IT Car3,Deep Layers,61,#5100FF,75,cl75_e53_DCSTAMP_L2.Ngb_L5.3,53,exc,5.3,5b,61,Glutamatergic neuron,#3EA185,Deep Layers,#3E7782,Deep Layers,#3E7782,VISp L6 IT Car3,#5100FF,72
63 | Exc L5-6 THEMIS CRABP1,Layer 6 IT Car3,Deep Layers,62,#8046FF,69,cl69_e147_CRABP1_L2.Ngb_L5.3,147,exc,5.3,5b,62,Glutamatergic neuron,#3EA185,Deep Layers,#3E7782,Deep Layers,#3E7782,VISp L6 IT Car3,#5100FF,72
64 | Exc L5-6 THEMIS FGF10,Layer 6 IT Car3,Deep Layers,63,#B08BFF,70,cl70_e78_FGF10_L2.Ngb_L5.4,78,exc,5.4,5b,63,Glutamatergic neuron,#3EA185,Deep Layers,#3E7782,Deep Layers,#3E7782,VISp L6 IT Car3,#5100FF,72
65 | Exc L4-6 FEZF2 IL26,Layer 5 NP,Deep Layers,64,#73CA95,65,cl65_e344_CD200R1L_L6a.Plcxd3_L5.0,344,exc,5,5a,64,Glutamatergic neuron,#3EA185,Deep Layers,#3E7782,Deep Layers,#3E7782,VISp L5 NP Slc17a8,#73CA95,84
66 | Exc L5-6 FEZF2 ABO,Layer 6 CT,Deep Layers,65,#1F6666,61,cl61_e373_ABO_L6a.Plcxd3_L5.7,373,exc,5.7,6a,69,Glutamatergic neuron,#3EA185,Deep Layers,#3E7782,Deep Layers,#3E7782,VISp L6b Rprm,#1F6666,93
67 | Exc L6 FEZF2 SCUBE1,Layer 6b,Deep Layers,66,#388899,74,cl74_e52_UBQLN2_L6a.Plcxd3_L6.0,52,exc,6,6b,67,Glutamatergic neuron,#3EA185,Deep Layers,#3E7782,Deep Layers,#3E7782,VISp L6b Col8a1,#388899,95
68 | Exc L5-6 SLC17A7 IL15,Layer 6b,Deep Layers,67,#336D99,73,cl73_e56_IL15_L6a.Plcxd3_L5.6,56,exc,5.6,6a,66,Glutamatergic neuron,#3EA185,Deep Layers,#3E7782,Deep Layers,#3E7782,L6b P2ry12,#336D99,98
69 | Exc L6 FEZF2 OR2T8,Layer 6b,Deep Layers,68,#254566,80,cl80_e19_OR2T8_L6a.Plcxd3_L5.9,19,exc,5.9,6b,68,Glutamatergic neuron,#3EA185,Deep Layers,#3E7782,Deep Layers,#3E7782,VISp L6b Crh,#254566,99
70 | Exc L5-6 FEZF2 EFTUD1P1,Layer 6b,Deep Layers,69,#335280,64,cl64_e314_EFTUD1P1_L5a.Deptor_Pacsin2_L5.6,314,exc,5.6,6a,65,Glutamatergic neuron,#3EA185,Deep Layers,#3E7782,Deep Layers,#3E7782,L6b Mup3,#335280,100
71 | OPC L1-6 PDGFRA,OPC,Oligo/OPC,70,#6B998D,82,cl82_g238_GPNMB_OPC.Pdgfra_L3.7,238,glia,3.7,4,73,Non-neuronal,#6D7A67,Oligo/OPC,#598077,Oligo/OPC,#598077,OPC Pdgfra,#476655,104
72 | Astro L1-6 FGFR3 SLC14A1,Astrocyte,Astrocyte,71,#665C47,83,cl83_g230_TP53BP2_Astro.Gja1_L3.4,230,glia,3.4,3b,72,Non-neuronal,#6D7A67,Astrocyte,#696E50,Astrocyte,#696E50,Astro Aqp4,#665C47,103.5
73 | Astro L1-2 FGFR3 GFAP,Astrocyte,Astrocyte,72,#6B8059,85,cl85_g61_GFAP_Astro.Gja1_L1.9,61,glia,1.9,2,71,Non-neuronal,#6D7A67,Astrocyte,#696E50,Astrocyte,#696E50,Astro Aqp4,#665C47,103
74 | Oligo L1-6 OPALIN,Oligodendrocyte,Oligo/OPC,73,#476662,81,cl81_g313_SH3TC2_Oligo.Opalin_L4.7,313,glia,4.7,5a,74,Non-neuronal,#6D7A67,Oligo/OPC,#598077,Oligo/OPC,#598077,Oligo Opalin,#476662,106
75 | Endo L2-6 NOSTRIN,Endothelial,Endothelial,74,#99796B,87,cl87_g9_EPAS1_Endo.Myl9_L3.9,9,glia,3.9,4,75,Non-neuronal,#6D7A67,Endothelial,#99796B,Endothelial,#99796B,Endo Gja5,#99796B,112
76 | Micro L1-3 TYROBP,Microglia,Microglia,75,#598069,84,cl84_g63_IKZF1_Micro.Ctss_L3.2,63,glia,3.2,3b,70,Non-neuronal,#6D7A67,Microglia,#598069,Microglia,#598069,Micro Siglech,#598069,116
77 | 


--------------------------------------------------------------------------------
/data/Paul2017_Table S3.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/MTG_celltypes/1426cbe8d4ede02f4f394dd14d5ef03d8bfe0a57/data/Paul2017_Table S3.xlsx


--------------------------------------------------------------------------------
/data/Paul2017_Table S4.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/MTG_celltypes/1426cbe8d4ede02f4f394dd14d5ef03d8bfe0a57/data/Paul2017_Table S4.xlsx


--------------------------------------------------------------------------------
/data/clusterInfoMTG.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/MTG_celltypes/1426cbe8d4ede02f4f394dd14d5ef03d8bfe0a57/data/clusterInfoMTG.RData


--------------------------------------------------------------------------------
/data/clusterInfoMTG.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/MTG_celltypes/1426cbe8d4ede02f4f394dd14d5ef03d8bfe0a57/data/clusterInfoMTG.rda


--------------------------------------------------------------------------------
/data/human_mouse_homology.csv:
--------------------------------------------------------------------------------
  1 | homol_type,species,cluster,cluster_type,cluster_color,tree_order
  2 | Exc L2/3 IT,human,Exc L2 LAMP5 LTK,exc,#D9F077,26
  3 | Exc L2/3 IT,human,Exc L2-3 LINC00507 FREM3,exc,#D9F077,26
  4 | Exc L2/3 IT,human,Exc L2-4 LINC00507 GLP2R,exc,#D9F077,26
  5 | Exc L2/3 IT,mouse,L2/3 IT ALM Macc1 Lrg1,exc,#D9F077,26
  6 | Exc L2/3 IT,mouse,L2/3 IT ALM Ptrf,exc,#D9F077,26
  7 | Exc L2/3 IT,mouse,L2/3 IT ALM Sla,exc,#D9F077,26
  8 | Exc L2/3 IT,mouse,L2/3 IT VISp Adamts2,exc,#D9F077,26
  9 | Exc L2/3 IT,mouse,L2/3 IT VISp Agmat,exc,#D9F077,26
 10 | Exc L2/3 IT,mouse,L2/3 IT VISp Rrad,exc,#D9F077,26
 11 | Exc L3/5 IT,human,Exc L3-4 RORB CARM1P1,exc,#00979D,22
 12 | Exc L3/5 IT,mouse,L5 IT ALM Tmem163 Dmrtb1,exc,#00979D,22
 13 | Exc L3/5 IT,mouse,L5 IT VISp Hsd11b1 Endou,exc,#00979D,22
 14 | Exc L4/5 IT,human,Exc L3-5 RORB ESR1,exc,#00DDC5,24
 15 | Exc L4/5 IT,human,Exc L3-5 RORB FILIP1L,exc,#00DDC5,24
 16 | Exc L4/5 IT,human,Exc L3-5 RORB TWIST2,exc,#00DDC5,24
 17 | Exc L4/5 IT,mouse,L4 IT VISp Rspo1,exc,#00DDC5,24
 18 | Exc L4/5 IT,mouse,L5 IT ALM Cbln4 Fezf2,exc,#00DDC5,24
 19 | Exc L4/5 IT,mouse,L5 IT ALM Lypd1 Gpr88,exc,#00DDC5,24
 20 | Exc L4/5 IT,mouse,L5 IT ALM Npw,exc,#00DDC5,24
 21 | Exc L4/5 IT,mouse,L5 IT ALM Pld5,exc,#00DDC5,24
 22 | Exc L5 PT,human,Exc L4-5 FEZF2 SCN4B,exc,#104F00,29
 23 | Exc L5 PT,mouse,L5 PT ALM Hpgd,exc,#104F00,29
 24 | Exc L5 PT,mouse,L5 PT ALM Npsr1,exc,#104F00,29
 25 | Exc L5 PT,mouse,L5 PT VISp C1ql2 Cdh13,exc,#104F00,29
 26 | Exc L5 PT,mouse,L5 PT VISp C1ql2 Ptgfr,exc,#104F00,29
 27 | Exc L5 PT,mouse,L5 PT VISp Krt80,exc,#104F00,29
 28 | Exc L5 PT,mouse,L5 PT VISp Lgr5,exc,#104F00,29
 29 | Exc L5/6 IT 1,human,Exc L3-5 RORB COL22A1,exc,#00A79D,21
 30 | Exc L5/6 IT 1,mouse,L5 IT ALM Tnc,exc,#00A79D,21
 31 | Exc L5/6 IT 1,mouse,L6 IT VISp Col18a1,exc,#00A79D,21
 32 | Exc L5/6 IT 2,human,Exc L4-5 RORB DAPK2,exc,#00FF00,23
 33 | Exc L5/6 IT 2,human,Exc L4-5 RORB FOLH1B,exc,#00FF00,23
 34 | Exc L5/6 IT 2,human,Exc L4-6 RORB SEMA3E,exc,#00FF00,23
 35 | Exc L5/6 IT 2,mouse,L5 IT ALM Cpa6 Gpr88,exc,#00FF00,23
 36 | Exc L5/6 IT 2,mouse,L5 IT ALM Tmem163 Arhgap25,exc,#00FF00,23
 37 | Exc L5/6 IT 2,mouse,L5 IT VISp Batf3,exc,#00FF00,23
 38 | Exc L5/6 IT 2,mouse,L5 IT VISp Whrn Tox2,exc,#00FF00,23
 39 | Exc L5/6 IT 3,human,Exc L4-6 RORB C1R,exc,#008F1F,25
 40 | Exc L5/6 IT 3,human,Exc L5-6 RORB TTC12,exc,#008F1F,25
 41 | Exc L5/6 IT 3,mouse,L5 IT VISp Col27a1,exc,#008F1F,25
 42 | Exc L5/6 IT 3,mouse,L5 IT VISp Col6a1 Fezf2,exc,#008F1F,25
 43 | Exc L5/6 IT 3,mouse,L6 IT ALM Tgfb1,exc,#008F1F,25
 44 | Exc L5/6 NP,human,Exc L4-6 FEZF2 IL26,exc,#73CA95,30
 45 | Exc L5/6 NP,mouse,L5 NP ALM Trhr Nefl,exc,#73CA95,30
 46 | Exc L5/6 NP,mouse,L5 NP VISp Trhr Cpne7,exc,#73CA95,30
 47 | Exc L5/6 NP,mouse,L5 NP VISp Trhr Met,exc,#73CA95,30
 48 | Exc L5/6 NP,mouse,L6 NP ALM Trh,exc,#73CA95,30
 49 | Exc L6 CT,human,Exc L5-6 FEZF2 ABO,exc,#1F6666,31
 50 | Exc L6 CT,mouse,L6 CT ALM Cpa6,exc,#1F6666,31
 51 | Exc L6 CT,mouse,L6 CT ALM Nxph2 Sla,exc,#1F6666,31
 52 | Exc L6 CT,mouse,L6 CT VISp Ctxn3 Brinp3,exc,#1F6666,31
 53 | Exc L6 CT,mouse,L6 CT VISp Ctxn3 Sla,exc,#1F6666,31
 54 | Exc L6 CT,mouse,L6 CT VISp Gpr139,exc,#1F6666,31
 55 | Exc L6 CT,mouse,L6 CT VISp Krt80 Sla,exc,#1F6666,31
 56 | Exc L6 CT,mouse,L6 CT VISp Nxph2 Wls,exc,#1F6666,31
 57 | Exc L6 IT 1,human,Exc L5-6 THEMIS C1QL3,exc,#A19922,27
 58 | Exc L6 IT 1,mouse,L6 IT VISp Col23a1 Adamts2,exc,#A19922,27
 59 | Exc L6 IT 1,mouse,L6 IT VISp Penk Col27a1,exc,#A19922,27
 60 | Exc L6 IT 1,mouse,L6 IT VISp Penk Fst,exc,#A19922,27
 61 | Exc L6 IT 2,human,Exc L5-6 THEMIS CRABP1,exc,#8046FF,28
 62 | Exc L6 IT 2,human,Exc L5-6 THEMIS DCSTAMP,exc,#8046FF,28
 63 | Exc L6 IT 2,human,Exc L5-6 THEMIS FGF10,exc,#8046FF,28
 64 | Exc L6 IT 2,mouse,L6 IT ALM Oprk1,exc,#8046FF,28
 65 | Exc L6 IT 2,mouse,L6 IT VISp Car3,exc,#8046FF,28
 66 | Exc L6b,human,Exc L5-6 FEZF2 EFTUD1P1,exc,#335280,32
 67 | Exc L6b,human,Exc L5-6 SLC17A7 IL15,exc,#335280,32
 68 | Exc L6b,human,Exc L6 FEZF2 OR2T8,exc,#335280,32
 69 | Exc L6b,human,Exc L6 FEZF2 SCUBE1,exc,#335280,32
 70 | Exc L6b,mouse,L6b ALM Olfr111 Nxph1,exc,#335280,32
 71 | Exc L6b,mouse,L6b ALM Olfr111 Spon1,exc,#335280,32
 72 | Exc L6b,mouse,L6b VISp Col8a1 Rprm,exc,#335280,32
 73 | Exc L6b,mouse,L6b Hsd17b2,exc,#335280,32
 74 | Exc L6b,mouse,L6b P2ry12,exc,#335280,32
 75 | Exc L6b,mouse,L6b VISp Col8a1 Rxfp1,exc,#335280,32
 76 | Exc L6b,mouse,L6b VISp Crh,exc,#335280,32
 77 | Exc L6b,mouse,L6b VISp Mup5,exc,#335280,32
 78 | none,mouse,CR Lhx5,exc,#AAAAAA,38
 79 | none,mouse,L5 IT ALM Gkn1 Pcdh19,exc,#AAAAAA,38
 80 | none,mouse,L5 PT ALM Slco2a1,exc,#AAAAAA,38
 81 | none,mouse,L5 PT VISp Chrna6,exc,#AAAAAA,38
 82 | Astrocyte,human,Astro L1-2 FGFR3 GFAP,glia,#6B8059,34
 83 | Astrocyte,human,Astro L1-6 FGFR3 SLC14A1,glia,#6B8059,34
 84 | Astrocyte,mouse,Astro Aqp4,glia,#6B8059,34
 85 | Endothelial,human,Endo L2-6 NOSTRIN,glia,#99796B,36
 86 | Endothelial,mouse,Endo Ctla2a,glia,#99796B,36
 87 | Endothelial,mouse,Endo Cytl1,glia,#99796B,36
 88 | Microglia/PVM,human,Micro L1-3 TYROBP,glia,#598069,37
 89 | Microglia/PVM,mouse,Microglia Siglech,glia,#598069,37
 90 | Microglia/PVM,mouse,PVM Mrc1,glia,#598069,37
 91 | none,mouse,Peri Kcnj8,glia,#AAAAAA,38
 92 | none,mouse,SMC Acta2,glia,#AAAAAA,38
 93 | none,mouse,VLMC Osr1 Cd74,glia,#AAAAAA,38
 94 | none,mouse,VLMC Osr1 Mc5r,glia,#AAAAAA,38
 95 | none,mouse,VLMC Spp1 Col15a1,glia,#AAAAAA,38
 96 | none,mouse,VLMC Spp1 Hs3st6,glia,#AAAAAA,38
 97 | Oligo,human,Oligo L1-6 OPALIN,glia,#476662,35
 98 | Oligo,mouse,Oligo Rassf10,glia,#476662,35
 99 | Oligo,mouse,Oligo Serpinb1a,glia,#476662,35
100 | Oligo,mouse,Oligo Synpr,glia,#476662,35
101 | OPC,human,OPC L1-6 PDGFRA,glia,#6B998D,33
102 | OPC,mouse,OPC Pdgfra Grm5,glia,#6B998D,33
103 | OPC,mouse,OPC Pdgfra Ccnb1,glia,#6B998D,33
104 | Chandelier,human,Inh L2-5 PVALB SCUBE3,inh,#FF197F,20
105 | Chandelier,mouse,Pvalb Vipr2,inh,#FF197F,20
106 | Lamp5 1,human,Inh L1 SST NMBR,inh,#E67B73,5
107 | Lamp5 1,human,Inh L1-2 LAMP5 DBP,inh,#E67B73,5
108 | Lamp5 1,mouse,Lamp5 Plch2 Dock5,inh,#E67B73,5
109 | Lamp5 2,human,Inh L1 SST CHRNA4,inh,#DD6091,6
110 | Lamp5 2,human,Inh L1-2 GAD1 MC4R,inh,#DD6091,6
111 | Lamp5 2,mouse,Lamp5 Fam19a1 Pax6,inh,#DD6091,6
112 | Lamp5 2,mouse,Lamp5 Fam19a1 Tmem182,inh,#DD6091,6
113 | Lamp5 2,mouse,Lamp5 Ntn1 Npy2r,inh,#DD6091,6
114 | Lamp5 Lhx6,human,Inh L2-6 LAMP5 CA1,inh,#C77963,2
115 | Lamp5 Lhx6,mouse,Lamp5 Lhx6,inh,#C77963,2
116 | Lamp5 Rosehip,human,Inh L1-4 LAMP5 LCP2,inh,#FFA388,1
117 | Lamp5 Rosehip,mouse,Lamp5 Lsp1,inh,#FFA388,1
118 | none,human,Inh L2-4 SST FRZB,inh,#FF554D,38
119 | none,mouse,Meis2 Adamts19,inh,#AAAAAA,38
120 | none,mouse,Sst Tac2 Myh4,inh,#AAAAAA,38
121 | none,mouse,Vip Igfbp4 Mab21l1,inh,#AAAAAA,38
122 | Pax6,human,Inh L1-2 PAX6 CDH12,inh,#DDACC9,3
123 | Pax6,human,Inh L1-2 PAX6 TNFAIP8L3,inh,#DDACC9,3
124 | Pax6,mouse,Lamp5 Krt73,inh,#DDACC9,3
125 | Pvalb 1,human,Inh L4-5 PVALB MEPE,inh,#994C00,18
126 | Pvalb 1,human,Inh L5-6 GAD1 GLP1R,inh,#994C00,18
127 | Pvalb 1,human,Inh L5-6 PVALB LGR5,inh,#994C00,18
128 | Pvalb 1,human,Inh L5-6 SST MIR548F2,inh,#994C00,18
129 | Pvalb 1,human,Inh L5-6 SST TH,inh,#994C00,18
130 | Pvalb 1,mouse,Pvalb Akr1c18 Ntf3,inh,#994C00,18
131 | Pvalb 1,mouse,Pvalb Gabrg1,inh,#994C00,18
132 | Pvalb 1,mouse,Pvalb Gpr149 Islr,inh,#994C00,18
133 | Pvalb 1,mouse,Pvalb Th Sst,inh,#994C00,18
134 | Pvalb 1,mouse,Sst Nts,inh,#994C00,18
135 | Pvalb 2,human,Inh L2-4 PVALB WFDC2,inh,#BC2B11,19
136 | Pvalb 2,human,Inh L4-6 PVALB SULF1,inh,#BC2B11,19
137 | Pvalb 2,mouse,Pvalb Calb1 Sst,inh,#BC2B11,19
138 | Pvalb 2,mouse,Pvalb Reln Itm2a,inh,#BC2B11,19
139 | Pvalb 2,mouse,Pvalb Reln Tac1,inh,#BC2B11,19
140 | Pvalb 2,mouse,Pvalb Sema3e Kank4,inh,#BC2B11,19
141 | Pvalb 2,mouse,Pvalb Tpbg,inh,#BC2B11,19
142 | Sst 1,human,Inh L3-6 SST HPGD,inh,#D9C566,15
143 | Sst 1,human,Inh L4-6 SST B3GAT2,inh,#D9C566,15
144 | Sst 1,mouse,Sst Chrna2 Glra3,inh,#D9C566,15
145 | Sst 1,mouse,Sst Chrna2 Ptgdr,inh,#D9C566,15
146 | Sst 1,mouse,Sst Myh8 Etv1 ,inh,#D9C566,15
147 | Sst 1,mouse,Sst Myh8 Fibin,inh,#D9C566,15
148 | Sst 1,mouse,Sst Nr2f2 Necab1,inh,#D9C566,15
149 | Sst 2,human,Inh L5-6 SST KLHDC8A,inh,#C11331,13
150 | Sst 2,mouse,Sst Crh 4930553C11Rik ,inh,#C11331,13
151 | Sst 2,mouse,Sst Crhr2 Efemp1,inh,#C11331,13
152 | Sst 2,mouse,Sst Esm1,inh,#C11331,13
153 | Sst 3,human,Inh L4-6 SST GXYLT2,inh,#802600,14
154 | Sst 3,human,Inh L5-6 SST NPM1P10,inh,#802600,14
155 | Sst 3,mouse,Sst Rxfp1 Eya1,inh,#802600,14
156 | Sst 3,mouse,Sst Rxfp1 Prdm8,inh,#802600,14
157 | Sst 3,mouse,Sst Tac2 Tacstd2,inh,#802600,14
158 | Sst 4,human,Inh L3-5 SST ADGRG6,inh,#804811,16
159 | Sst 4,human,Inh L4-5 SST STK32A,inh,#804811,16
160 | Sst 4,mouse,Sst Hpse Cbln4,inh,#804811,16
161 | Sst 4,mouse,Sst Hpse Sema3c,inh,#804811,16
162 | Sst 5,human,Inh L1-3 SST CALB1,inh,#BF480D,17
163 | Sst 5,mouse,Sst Calb2 Necab1,inh,#BF480D,17
164 | Sst 5,mouse,Sst Calb2 Pdlim5,inh,#BF480D,17
165 | Sst 5,mouse,Sst Mme Fam114a1,inh,#BF480D,17
166 | Sst 5,mouse,Sst Tac1 Htr1d,inh,#BF480D,17
167 | Sst 5,mouse,Sst Tac1 Tacr3,inh,#BF480D,17
168 | Sst Chodl,human,Inh L3-6 SST NPY,inh,#FFDA50,12
169 | Sst Chodl,mouse,Sst Chodl,inh,#FFDA50,12
170 | Vip 1,human,Inh L1-2 SST BAGE2,inh,#9440F3,7
171 | Vip 1,human,Inh L1-3 VIP ADAMTSL1,inh,#9440F3,7
172 | Vip 1,human,Inh L1-4 VIP PENK,inh,#9440F3,7
173 | Vip 1,mouse,Vip Igfbp6 Car10,inh,#9440F3,7
174 | Vip 1,mouse,Vip Igfbp6 Pltp,inh,#9440F3,7
175 | Vip 2,human,Inh L2-6 VIP QPCT,inh,#A700FF,8
176 | Vip 2,human,Inh L3-6 VIP HS3ST3A1,inh,#A700FF,8
177 | Vip 2,mouse,Vip Arhgap36 Hmcn1,inh,#A700FF,8
178 | Vip 2,mouse,Vip Gpc3 Slc18a3,inh,#A700FF,8
179 | Vip 2,mouse,Vip Lmo1 Fam159b,inh,#A700FF,8
180 | Vip 2,mouse,Vip Lmo1 Myl1,inh,#A700FF,8
181 | Vip 3,human,Inh L1-2 VIP PCDH20,inh,#BD3D9A,9
182 | Vip 3,human,Inh L2-5 VIP SERPINF1,inh,#BD3D9A,9
183 | Vip 3,human,Inh L2-5 VIP TYR,inh,#BD3D9A,9
184 | Vip 3,mouse,Serpinf1 Aqp5 Vip,inh,#BD3D9A,9
185 | Vip 3,mouse,Vip Chat Htr1f,inh,#BD3D9A,9
186 | Vip 3,mouse,Vip Pygm C1ql1,inh,#BD3D9A,9
187 | Vip 4,human,Inh L1-3 VIP CCDC184,inh,#FF00B3,10
188 | Vip 4,human,Inh L1-3 VIP CHRM2,inh,#FF00B3,10
189 | Vip 4,human,Inh L1-3 VIP GGH,inh,#FF00B3,10
190 | Vip 4,human,Inh L2-4 VIP CBLN1,inh,#FF00B3,10
191 | Vip 4,mouse,Vip Lect1 Oxtr,inh,#FF00B3,10
192 | Vip 4,mouse,Vip Ptprt Pkp2,inh,#FF00B3,10
193 | Vip 4,mouse,Vip Rspo1 Itga4,inh,#FF00B3,10
194 | Vip 4,mouse,Vip Rspo4 Rxfp1 Chat,inh,#FF00B3,10
195 | Vip 5,human,Inh L1-2 VIP LBH,inh,#AF00E6,11
196 | Vip 5,human,Inh L1-4 VIP CHRNA6,inh,#AF00E6,11
197 | Vip 5,human,Inh L1-4 VIP OPRM1,inh,#AF00E6,11
198 | Vip 5,human,Inh L2-3 VIP CASC6,inh,#AF00E6,11
199 | Vip 5,human,Inh L2-4 VIP SPAG17,inh,#AF00E6,11
200 | Vip 5,mouse,Vip Crispld2 Htr2c,inh,#AF00E6,11
201 | Vip 5,mouse,Vip Crispld2 Kcne4,inh,#AF00E6,11
202 | Vip Sncg,human,Inh L1-2 VIP TSPAN12,inh,#6C00BF,4
203 | Vip Sncg,human,Inh L1-3 PAX6 SYT6,inh,#6C00BF,4
204 | Vip Sncg,mouse,Serpinf1 Clrn1,inh,#6C00BF,4
205 | Vip Sncg,mouse,Sncg Gpr50,inh,#6C00BF,4
206 | Vip Sncg,mouse,Sncg Slc17a8,inh,#6C00BF,4
207 | Vip Sncg,mouse,Sncg Vip Itih5,inh,#6C00BF,4
208 | Vip Sncg,mouse,Sncg Vip Nptx2,inh,#6C00BF,4
209 | Vip Sncg,mouse,Vip Col15a1 Pde1a,inh,#6C00BF,4
210 | 


--------------------------------------------------------------------------------
/data/mito_genes.txt:
--------------------------------------------------------------------------------
   1 | AADAT
   2 | AARS2
   3 | AASS
   4 | ABAT
   5 | ABCA13
   6 | ABCA9
   7 | ABCB10
   8 | ABCB6
   9 | ABCB7
  10 | ABCB8
  11 | ABCB9
  12 | ABCD1
  13 | ABCD2
  14 | ABCD3
  15 | ABCF2
  16 | ABHD10
  17 | ABHD11
  18 | ACAA1
  19 | ACAA2
  20 | ACACA
  21 | ACACB
  22 | ACAD10
  23 | ACAD11
  24 | ACAD8
  25 | ACAD9
  26 | ACADL
  27 | ACADM
  28 | ACADS
  29 | ACADSB
  30 | ACADVL
  31 | ACAT1
  32 | ACCS
  33 | ACLY
  34 | ACN9
  35 | ACO1
  36 | ACO2
  37 | ACOT13
  38 | ACOT2
  39 | ACOT7
  40 | ACOT9
  41 | ACOX1
  42 | ACOX3
  43 | ACP6
  44 | ACSF2
  45 | ACSF3
  46 | ACSL1
  47 | ACSL4
  48 | ACSL6
  49 | ACSM1
  50 | ACSM2A
  51 | ACSM3
  52 | ACSM5
  53 | ACSS1
  54 | ACSS3
  55 | ACYP2
  56 | ADCK1
  57 | ADCK2
  58 | ADCK3
  59 | ADCK4
  60 | ADCK5
  61 | ADHFE1
  62 | AFG3L2
  63 | AGK
  64 | AGMAT
  65 | AGPAT5
  66 | AGR2
  67 | AGXT
  68 | AGXT2
  69 | AHCYL1
  70 | AIFM1
  71 | AIFM2
  72 | AIFM3
  73 | AK2
  74 | AK3
  75 | AK4
  76 | AKAP1
  77 | AKAP10
  78 | AKR1B10
  79 | AKR7A2
  80 | ALAS1
  81 | ALAS2
  82 | ALDH18A1
  83 | ALDH1B1
  84 | ALDH1L1
  85 | ALDH1L2
  86 | ALDH2
  87 | ALDH3A2
  88 | ALDH4A1
  89 | ALDH5A1
  90 | ALDH6A1
  91 | ALDH7A1
  92 | ALDH9A1
  93 | ALKBH1
  94 | ALKBH3
  95 | ALKBH7
  96 | AMACR
  97 | AMT
  98 | ANGEL2
  99 | APEX2
 100 | APOA1BP
 101 | APOO
 102 | APOOL
 103 | APOPT1
 104 | ARF5
 105 | ARG2
 106 | ARL2
 107 | ARMC10
 108 | ARMS2
 109 | ASAH2
 110 | ATAD1
 111 | ATAD3A
 112 | ATAD3B
 113 | ATIC
 114 | ATP10D
 115 | ATP5A1
 116 | ATP5B
 117 | ATP5C1
 118 | ATP5D
 119 | ATP5E
 120 | ATP5F1
 121 | ATP5G1
 122 | ATP5G2
 123 | ATP5G3
 124 | ATP5H
 125 | ATP5I
 126 | ATP5J
 127 | ATP5J2
 128 | ATP5J2-PTCD1
 129 | ATP5L
 130 | ATP5O
 131 | ATP5S
 132 | ATP5SL
 133 | ATP6
 134 | ATP8
 135 | ATPAF1
 136 | ATPAF2
 137 | ATPIF1
 138 | ATXN2
 139 | AUH
 140 | AURKAIP1
 141 | BAD
 142 | BAK1
 143 | BAX
 144 | BCAT2
 145 | BCKDHA
 146 | BCKDHB
 147 | BCKDK
 148 | BCL2
 149 | BCL2L1
 150 | BCL2L13
 151 | BCL2L2
 152 | BCS1L
 153 | BDH1
 154 | BID
 155 | BLOC1S1
 156 | BNIP3
 157 | BNIP3L
 158 | BOLA1
 159 | BOLA3
 160 | BPHL
 161 | C10orf10
 162 | C10orf2
 163 | C12orf10
 164 | C12orf65
 165 | C14orf159
 166 | C14orf2
 167 | C15orf40
 168 | C15orf48
 169 | C15orf61
 170 | C16orf91
 171 | C17orf89
 172 | C19orf52
 173 | C19orf70
 174 | C1QBP
 175 | C20orf24
 176 | C21orf33
 177 | C2orf47
 178 | C2orf69
 179 | C3orf33
 180 | C5orf63
 181 | C6orf136
 182 | C6orf203
 183 | C6orf57
 184 | C7orf55
 185 | C8orf82
 186 | CA5A
 187 | CA5B
 188 | CARKD
 189 | CARS2
 190 | CASP8
 191 | CAT
 192 | CBR3
 193 | CBR4
 194 | CCBL2
 195 | CCDC109B
 196 | CCDC127
 197 | CCDC51
 198 | CCDC58
 199 | CCDC90B
 200 | CCT7
 201 | CDC25C
 202 | CECR5
 203 | CEP89
 204 | CHCHD1
 205 | CHCHD10
 206 | CHCHD2
 207 | CHCHD3
 208 | CHCHD4
 209 | CHCHD5
 210 | CHCHD6
 211 | CHCHD7
 212 | CHDH
 213 | CHPT1
 214 | CISD1
 215 | CISD2
 216 | CISD3
 217 | CKMT1A
 218 | CKMT1B
 219 | CKMT2
 220 | CLIC4
 221 | CLPB
 222 | CLPP
 223 | CLPX
 224 | CLYBL
 225 | CMC1
 226 | CMC2
 227 | CMC4
 228 | CMPK2
 229 | COA1
 230 | COA3
 231 | COA4
 232 | COA5
 233 | COA6
 234 | COA7
 235 | COASY
 236 | COMT
 237 | COMTD1
 238 | COQ10A
 239 | COQ10B
 240 | COQ2
 241 | COQ3
 242 | COQ4
 243 | COQ5
 244 | COQ6
 245 | COQ7
 246 | COQ9
 247 | COX1
 248 | COX10
 249 | COX11
 250 | COX14
 251 | COX15
 252 | COX16
 253 | COX17
 254 | COX18
 255 | COX19
 256 | COX2
 257 | COX20
 258 | COX3
 259 | COX4I1
 260 | COX4I2
 261 | COX5A
 262 | COX5B
 263 | COX6A1
 264 | COX6A2
 265 | COX6B1
 266 | COX6B2
 267 | COX6C
 268 | COX7A1
 269 | COX7A2
 270 | COX7A2L
 271 | COX7B
 272 | COX7C
 273 | COX8A
 274 | COX8C
 275 | CPOX
 276 | CPS1
 277 | CPT1A
 278 | CPT1B
 279 | CPT1C
 280 | CPT2
 281 | CRAT
 282 | CRLS1
 283 | CROT
 284 | CRY1
 285 | CRYZ
 286 | CS
 287 | CYB5A
 288 | CYB5B
 289 | CYB5R2
 290 | CYB5R3
 291 | CYC1
 292 | CYCS
 293 | CYP11A1
 294 | CYP11B2
 295 | CYP24A1
 296 | CYP27A1
 297 | CYP27B1
 298 | CYTB
 299 | D2HGDH
 300 | DAP3
 301 | DARS2
 302 | DBI
 303 | DBT
 304 | DCAKD
 305 | DCXR
 306 | DDAH1
 307 | DDX28
 308 | DECR1
 309 | DGUOK
 310 | DHCR24
 311 | DHODH
 312 | DHRS1
 313 | DHRS4
 314 | DHRS7B
 315 | DHRSX
 316 | DHTKD1
 317 | DHX30
 318 | DIABLO
 319 | DLAT
 320 | DLD
 321 | DLST
 322 | DMGDH
 323 | DMPK
 324 | DNA2
 325 | DNAJA3
 326 | DNAJC11
 327 | DNAJC15
 328 | DNAJC19
 329 | DNAJC28
 330 | DNAJC30
 331 | DNAJC4
 332 | DNLZ
 333 | DNM1L
 334 | DTYMK
 335 | DUS2
 336 | DUSP26
 337 | DUT
 338 | EARS2
 339 | ECH1
 340 | ECHDC1
 341 | ECHDC2
 342 | ECHDC3
 343 | ECHS1
 344 | ECI1
 345 | ECI2
 346 | ECSIT
 347 | EEFSEC
 348 | EFHD1
 349 | EHHADH
 350 | ELAC2
 351 | EMC2
 352 | ENDOG
 353 | EPHX2
 354 | ERAL1
 355 | ETFA
 356 | ETFB
 357 | ETFDH
 358 | ETHE1
 359 | EXOG
 360 | FABP1
 361 | FAHD1
 362 | FAHD2A
 363 | FAM136A
 364 | FAM162A
 365 | FAM185A
 366 | FAM195A
 367 | FAM210A
 368 | FAM210B
 369 | FAM213A
 370 | FARS2
 371 | FASN
 372 | FASTK
 373 | FASTKD1
 374 | FASTKD2
 375 | FASTKD3
 376 | FASTKD5
 377 | FBXL4
 378 | FDPS
 379 | FDX1
 380 | FDX1L
 381 | FDXR
 382 | FECH
 383 | FH
 384 | FHIT
 385 | FIS1
 386 | FKBP10
 387 | FKBP8
 388 | FLAD1
 389 | FOXRED1
 390 | FPGS
 391 | FTH1
 392 | FTMT
 393 | FTSJ2
 394 | FUNDC1
 395 | FUNDC2
 396 | FXN
 397 | GADD45GIP1
 398 | GAPDH
 399 | GARS
 400 | GATC
 401 | GATM
 402 | GBAS
 403 | GCAT
 404 | GCDH
 405 | GCSH
 406 | GDAP1
 407 | GFER
 408 | GFM1
 409 | GFM2
 410 | GHITM
 411 | GK
 412 | GLDC
 413 | GLOD4
 414 | GLRX2
 415 | GLRX5
 416 | GLS
 417 | GLS2
 418 | GLUD1
 419 | GLYAT
 420 | GLYCTK
 421 | GNG5
 422 | GOLPH3
 423 | GOT2
 424 | GPAM
 425 | GPD2
 426 | GPI
 427 | GPT2
 428 | GPX1
 429 | GPX4
 430 | GRHPR
 431 | GRPEL1
 432 | GRPEL2
 433 | GRSF1
 434 | GSR
 435 | GSTK1
 436 | GSTO1
 437 | GSTZ1
 438 | GTPBP10
 439 | GTPBP3
 440 | GTPBP6
 441 | GUF1
 442 | GUK1
 443 | HADH
 444 | HADHA
 445 | HADHB
 446 | HAGH
 447 | HAO2
 448 | HARS2
 449 | HCCS
 450 | HDHD3
 451 | HEBP1
 452 | HEMK1
 453 | HIBADH
 454 | HIBCH
 455 | HIGD1A
 456 | HIGD2A
 457 | HINT1
 458 | HINT2
 459 | HINT3
 460 | HK1
 461 | HK2
 462 | HMBS
 463 | HMGCL
 464 | HMGCS2
 465 | HOGA1
 466 | HRSP12
 467 | HSCB
 468 | HSD17B10
 469 | HSD17B4
 470 | HSD17B8
 471 | HSDL1
 472 | HSDL2
 473 | HSPA9
 474 | HSPB7
 475 | HSPD1
 476 | HSPE1
 477 | HTATIP2
 478 | HTRA2
 479 | IARS2
 480 | IBA57
 481 | ICT1
 482 | IDE
 483 | IDH1
 484 | IDH2
 485 | IDH3A
 486 | IDH3B
 487 | IDH3G
 488 | IDI1
 489 | IFI27
 490 | IMMP1L
 491 | IMMP2L
 492 | IMMT
 493 | ISCA1
 494 | ISCA2
 495 | ISCU
 496 | ISOC2
 497 | IVD
 498 | KARS
 499 | KIAA0100
 500 | KIAA0141
 501 | KIAA0391
 502 | KIF1B
 503 | KMO
 504 | KRT5
 505 | L2HGDH
 506 | LACE1
 507 | LACTB
 508 | LACTB2
 509 | LAMC1
 510 | LAP3
 511 | LARS2
 512 | LDHAL6B
 513 | LDHB
 514 | LDHD
 515 | LETM1
 516 | LETM2
 517 | LETMD1
 518 | LIAS
 519 | LIPT1
 520 | LIPT2
 521 | LONP1
 522 | LONP2
 523 | LRPPRC
 524 | LYPLA1
 525 | LYPLAL1
 526 | LYRM1
 527 | LYRM2
 528 | LYRM4
 529 | LYRM5
 530 | LYRM7
 531 | LYRM9
 532 | MACROD1
 533 | MALSU1
 534 | MAOA
 535 | MAOB
 536 | MARC1
 537 | MARC2
 538 | MARCH5
 539 | MARS2
 540 | MAVS
 541 | MCAT
 542 | MCCC1
 543 | MCCC2
 544 | MCEE
 545 | MCU
 546 | MCUR1
 547 | MDH1
 548 | MDH2
 549 | ME1
 550 | ME2
 551 | ME3
 552 | MECR
 553 | METAP1D
 554 | METTL15
 555 | METTL17
 556 | METTL5
 557 | METTL8
 558 | MFF
 559 | MFN1
 560 | MFN2
 561 | MGARP
 562 | MGME1
 563 | MGST1
 564 | MGST3
 565 | MICU1
 566 | MICU2
 567 | MIEF1
 568 | MINOS1
 569 | MIPEP
 570 | MLH1
 571 | MLYCD
 572 | MMAB
 573 | MMACHC
 574 | MMADHC
 575 | MOCS1
 576 | MPC1
 577 | MPC1L
 578 | MPC2
 579 | MPST
 580 | MPV17
 581 | MPV17L
 582 | MPV17L2
 583 | MRM1
 584 | MRPL1
 585 | MRPL10
 586 | MRPL11
 587 | MRPL12
 588 | MRPL13
 589 | MRPL14
 590 | MRPL15
 591 | MRPL16
 592 | MRPL17
 593 | MRPL18
 594 | MRPL19
 595 | MRPL2
 596 | MRPL20
 597 | MRPL21
 598 | MRPL22
 599 | MRPL23
 600 | MRPL24
 601 | MRPL27
 602 | MRPL28
 603 | MRPL3
 604 | MRPL30
 605 | MRPL32
 606 | MRPL33
 607 | MRPL34
 608 | MRPL35
 609 | MRPL36
 610 | MRPL37
 611 | MRPL38
 612 | MRPL39
 613 | MRPL4
 614 | MRPL40
 615 | MRPL41
 616 | MRPL42
 617 | MRPL43
 618 | MRPL44
 619 | MRPL45
 620 | MRPL46
 621 | MRPL47
 622 | MRPL48
 623 | MRPL49
 624 | MRPL50
 625 | MRPL51
 626 | MRPL52
 627 | MRPL53
 628 | MRPL54
 629 | MRPL55
 630 | MRPL57
 631 | MRPL9
 632 | MRPS10
 633 | MRPS11
 634 | MRPS12
 635 | MRPS14
 636 | MRPS15
 637 | MRPS16
 638 | MRPS17
 639 | MRPS18A
 640 | MRPS18B
 641 | MRPS18C
 642 | MRPS2
 643 | MRPS21
 644 | MRPS22
 645 | MRPS23
 646 | MRPS24
 647 | MRPS25
 648 | MRPS26
 649 | MRPS27
 650 | MRPS28
 651 | MRPS30
 652 | MRPS31
 653 | MRPS33
 654 | MRPS34
 655 | MRPS35
 656 | MRPS36
 657 | MRPS5
 658 | MRPS6
 659 | MRPS7
 660 | MRPS9
 661 | MRRF
 662 | MRS2
 663 | MSRA
 664 | MSRB2
 665 | MSRB3
 666 | MTCH1
 667 | MTCH2
 668 | MTCP1
 669 | MTERF
 670 | MTERFD1
 671 | MTERFD2
 672 | MTERFD3
 673 | MTFMT
 674 | MTFP1
 675 | MTFR1
 676 | MTFR1L
 677 | MTG1
 678 | MTG2
 679 | MTHFD1
 680 | MTHFD1L
 681 | MTHFD2
 682 | MTHFD2L
 683 | MTHFS
 684 | MTIF2
 685 | MTIF3
 686 | MTO1
 687 | MTPAP
 688 | MTRF1
 689 | MTRF1L
 690 | MTX1
 691 | MTX2
 692 | MUL1
 693 | MUT
 694 | MUTYH
 695 | NADK2
 696 | NAGS
 697 | NARS2
 698 | NBR1
 699 | NCEH1
 700 | NCOA4
 701 | ND1
 702 | ND2
 703 | ND3
 704 | ND4
 705 | ND4L
 706 | ND5
 707 | ND6
 708 | NDUFA1
 709 | NDUFA10
 710 | NDUFA11
 711 | NDUFA12
 712 | NDUFA13
 713 | NDUFA2
 714 | NDUFA3
 715 | NDUFA4
 716 | NDUFA5
 717 | NDUFA6
 718 | NDUFA7
 719 | NDUFA8
 720 | NDUFA9
 721 | NDUFAB1
 722 | NDUFAF1
 723 | NDUFAF2
 724 | NDUFAF3
 725 | NDUFAF4
 726 | NDUFAF5
 727 | NDUFAF6
 728 | NDUFAF7
 729 | NDUFB1
 730 | NDUFB10
 731 | NDUFB11
 732 | NDUFB2
 733 | NDUFB3
 734 | NDUFB4
 735 | NDUFB5
 736 | NDUFB6
 737 | NDUFB7
 738 | NDUFB8
 739 | NDUFB9
 740 | NDUFC1
 741 | NDUFC2
 742 | NDUFS1
 743 | NDUFS2
 744 | NDUFS3
 745 | NDUFS4
 746 | NDUFS5
 747 | NDUFS6
 748 | NDUFS7
 749 | NDUFS8
 750 | NDUFV1
 751 | NDUFV2
 752 | NDUFV3
 753 | NEU4
 754 | NFS1
 755 | NFU1
 756 | NGRN
 757 | NIF3L1
 758 | NIPSNAP1
 759 | NIPSNAP3A
 760 | NIPSNAP3B
 761 | NIT1
 762 | NIT2
 763 | NLN
 764 | NLRX1
 765 | NME3
 766 | NME4
 767 | NME6
 768 | NMNAT3
 769 | NNT
 770 | NOA1
 771 | NRD1
 772 | NSUN3
 773 | NSUN4
 774 | NT5C
 775 | NT5DC2
 776 | NT5DC3
 777 | NT5M
 778 | NTHL1
 779 | NUBPL
 780 | NUCB2
 781 | NUDT13
 782 | NUDT19
 783 | NUDT2
 784 | NUDT5
 785 | NUDT6
 786 | NUDT8
 787 | NUDT9
 788 | OAT
 789 | OBSCN
 790 | OCIAD1
 791 | OCIAD2
 792 | OGDH
 793 | OGDHL
 794 | OGG1
 795 | OMA1
 796 | OPA1
 797 | OPA3
 798 | OSBPL1A
 799 | OSGEPL1
 800 | OTC
 801 | OXA1L
 802 | OXCT1
 803 | OXLD1
 804 | OXNAD1
 805 | OXR1
 806 | OXSM
 807 | P4HB
 808 | PABPC5
 809 | PACSIN2
 810 | PAICS
 811 | PAK7
 812 | PAM16
 813 | PANK2
 814 | PARK7
 815 | PARL
 816 | PARS2
 817 | PC
 818 | PCBD2
 819 | PCCA
 820 | PCCB
 821 | PCK2
 822 | PDE12
 823 | PDF
 824 | PDHA1
 825 | PDHA2
 826 | PDHB
 827 | PDHX
 828 | PDK1
 829 | PDK2
 830 | PDK3
 831 | PDK4
 832 | PDP1
 833 | PDP2
 834 | PDPR
 835 | PDSS1
 836 | PDSS2
 837 | PET100
 838 | PET112
 839 | PET117
 840 | PEX11B
 841 | PGAM5
 842 | PGS1
 843 | PHB
 844 | PHB2
 845 | PHYH
 846 | PI4KA
 847 | PICK1
 848 | PIF1
 849 | PINK1
 850 | PISD
 851 | PITRM1
 852 | PKLR
 853 | PLGRKT
 854 | PMAIP1
 855 | PMPCA
 856 | PMPCB
 857 | PNPLA8
 858 | PNPO
 859 | PNPT1
 860 | POLDIP2
 861 | POLG
 862 | POLG2
 863 | POLRMT
 864 | PPA2
 865 | PPIF
 866 | PPM1K
 867 | PPOX
 868 | PPTC7
 869 | PPWD1
 870 | PRDX2
 871 | PRDX3
 872 | PRDX4
 873 | PRDX5
 874 | PRDX6
 875 | PRELID1
 876 | PRELID2
 877 | PREPL
 878 | PRODH
 879 | PRODH2
 880 | PROSC
 881 | PRSS35
 882 | PSMA6
 883 | PSTK
 884 | PTCD1
 885 | PTCD2
 886 | PTCD3
 887 | PTGES2
 888 | PTPMT1
 889 | PTPN4
 890 | PTRH1
 891 | PTRH2
 892 | PTS
 893 | PUS1
 894 | PUSL1
 895 | PXMP2
 896 | PXMP4
 897 | PYCR1
 898 | PYCR2
 899 | PYURF
 900 | QDPR
 901 | QRSL1
 902 | QTRT1
 903 | RAB11FIP5
 904 | RAB24
 905 | RAB32
 906 | RAB35
 907 | RARS
 908 | RARS2
 909 | RBFA
 910 | RCN2
 911 | RDH11
 912 | RDH13
 913 | RDH14
 914 | RECQL4
 915 | REXO2
 916 | RFK
 917 | RHOT1
 918 | RHOT2
 919 | RMDN1
 920 | RMDN3
 921 | RMND1
 922 | RNASEH1
 923 | RNMTL1
 924 | RNR1
 925 | RNR2
 926 | ROMO1
 927 | RPIA
 928 | RPL10A
 929 | RPL34
 930 | RPL35A
 931 | RPS14
 932 | RPS15A
 933 | RPS18
 934 | RPUSD3
 935 | RPUSD4
 936 | RSAD1
 937 | RTN4IP1
 938 | SAMM50
 939 | SARDH
 940 | SARS2
 941 | SCCPDH
 942 | SCO1
 943 | SCO2
 944 | SCP2
 945 | SDHA
 946 | SDHAF1
 947 | SDHAF2
 948 | SDHB
 949 | SDHC
 950 | SDHD
 951 | SDR39U1
 952 | SDSL
 953 | SECISBP2
 954 | SELO
 955 | SEPT4
 956 | SERAC1
 957 | SERHL2
 958 | SETD9
 959 | SFXN1
 960 | SFXN2
 961 | SFXN3
 962 | SFXN4
 963 | SFXN5
 964 | SHMT1
 965 | SHMT2
 966 | SIRT3
 967 | SIRT4
 968 | SIRT5
 969 | SLC16A1
 970 | SLC16A11
 971 | SLC16A7
 972 | SLC22A4
 973 | SLC25A1
 974 | SLC25A10
 975 | SLC25A11
 976 | SLC25A12
 977 | SLC25A13
 978 | SLC25A14
 979 | SLC25A15
 980 | SLC25A16
 981 | SLC25A17
 982 | SLC25A18
 983 | SLC25A19
 984 | SLC25A20
 985 | SLC25A21
 986 | SLC25A22
 987 | SLC25A23
 988 | SLC25A24
 989 | SLC25A25
 990 | SLC25A26
 991 | SLC25A27
 992 | SLC25A28
 993 | SLC25A29
 994 | SLC25A3
 995 | SLC25A30
 996 | SLC25A31
 997 | SLC25A32
 998 | SLC25A33
 999 | SLC25A34
1000 | SLC25A35
1001 | SLC25A36
1002 | SLC25A37
1003 | SLC25A38
1004 | SLC25A39
1005 | SLC25A4
1006 | SLC25A40
1007 | SLC25A41
1008 | SLC25A42
1009 | SLC25A43
1010 | SLC25A44
1011 | SLC25A45
1012 | SLC25A46
1013 | SLC25A47
1014 | SLC25A48
1015 | SLC25A5
1016 | SLC25A51
1017 | SLC25A53
1018 | SLC25A6
1019 | SLC30A6
1020 | SLC30A9
1021 | SLC37A4
1022 | SLIRP
1023 | SLMO1
1024 | SLMO2
1025 | SMDT1
1026 | SNAP29
1027 | SND1
1028 | SOD1
1029 | SOD2
1030 | SPATA19
1031 | SPATA20
1032 | SPG7
1033 | SPR
1034 | SPRYD4
1035 | SPTLC2
1036 | SQRDL
1037 | SSBP1
1038 | STAR
1039 | STARD7
1040 | STOM
1041 | STOML1
1042 | STOML2
1043 | STX17
1044 | SUCLA2
1045 | SUCLG1
1046 | SUCLG2
1047 | SUGCT
1048 | SUOX
1049 | SUPV3L1
1050 | SURF1
1051 | SYNJ2BP
1052 | TACO1
1053 | TAMM41
1054 | TARS
1055 | TARS2
1056 | TBRG4
1057 | TCAIM
1058 | TCHP
1059 | TCIRG1
1060 | TDRKH
1061 | TEFM
1062 | TFAM
1063 | TFB1M
1064 | TFB2M
1065 | THEM4
1066 | THG1L
1067 | THNSL1
1068 | TIMM10
1069 | TIMM10B
1070 | TIMM13
1071 | TIMM17A
1072 | TIMM17B
1073 | TIMM21
1074 | TIMM22
1075 | TIMM23
1076 | TIMM44
1077 | TIMM50
1078 | TIMM8A
1079 | TIMM8B
1080 | TIMM9
1081 | TIMMDC1
1082 | TK2
1083 | TKT
1084 | TMBIM4
1085 | TMEM11
1086 | TMEM126A
1087 | TMEM126B
1088 | TMEM143
1089 | TMEM14C
1090 | TMEM177
1091 | TMEM186
1092 | TMEM205
1093 | TMEM65
1094 | TMEM70
1095 | TMLHE
1096 | TOMM20
1097 | TOMM22
1098 | TOMM34
1099 | TOMM40
1100 | TOMM40L
1101 | TOMM5
1102 | TOMM6
1103 | TOMM7
1104 | TOMM70A
1105 | TOP1MT
1106 | TOP3A
1107 | TPI1
1108 | TRAP1
1109 | TRIAP1
1110 | TRIT1
1111 | TRMT1
1112 | TRMT10C
1113 | TRMT11
1114 | TRMT2B
1115 | TRMT61B
1116 | TRMU
1117 | TRNA
1118 | TRNC
1119 | TRND
1120 | TRNE
1121 | TRNF
1122 | TRNG
1123 | TRNH
1124 | TRNI
1125 | TRNK
1126 | TRNL1
1127 | TRNL2
1128 | TRNM
1129 | TRNN
1130 | TRNP
1131 | TRNQ
1132 | TRNR
1133 | TRNS1
1134 | TRNS2
1135 | TRNT
1136 | TRNT1
1137 | TRNV
1138 | TRNW
1139 | TRNY
1140 | TRUB2
1141 | TSFM
1142 | TSPO
1143 | TST
1144 | TSTD1
1145 | TSTD3
1146 | TTC19
1147 | TUBB3
1148 | TUFM
1149 | TXN2
1150 | TXNDC12
1151 | TXNRD1
1152 | TXNRD2
1153 | TYSND1
1154 | UCP1
1155 | UCP2
1156 | UCP3
1157 | UNG
1158 | UQCC1
1159 | UQCC2
1160 | UQCR10
1161 | UQCR11
1162 | UQCRB
1163 | UQCRC1
1164 | UQCRC2
1165 | UQCRFS1
1166 | UQCRH
1167 | UQCRQ
1168 | USMG5
1169 | VARS2
1170 | VDAC1
1171 | VDAC2
1172 | VDAC3
1173 | VWA8
1174 | WARS2
1175 | WBSCR16
1176 | WDR81
1177 | XPNPEP3
1178 | XRCC6BP1
1179 | YARS2
1180 | YBEY
1181 | YME1L1
1182 | ZADH2
1183 | 


--------------------------------------------------------------------------------
/data/mouseClusters.txt:
--------------------------------------------------------------------------------
  1 | Lamp5 Krt73
  2 | Lamp5 Fam19a1 Pax6
  3 | Lamp5 Fam19a1 Tmem182
  4 | Lamp5 Ntn1 Npy2r
  5 | Lamp5 Plch2 Dock5
  6 | Lamp5 Lsp1
  7 | Lamp5 Lhx6
  8 | Sncg Slc17a8
  9 | Sncg Vip Nptx2
 10 | Sncg Gpr50
 11 | Sncg Vip Itih5
 12 | Serpinf1 Clrn1
 13 | Serpinf1 Aqp5 Vip
 14 | Vip Igfbp6 Car10
 15 | Vip Igfbp6 Pltp
 16 | Vip Igfbp4 Mab21l1
 17 | Vip Arhgap36 Hmcn1
 18 | Vip Gpc3 Slc18a3
 19 | Vip Lmo1 Fam159b
 20 | Vip Lmo1 Myl1
 21 | Vip Ptprt Pkp2
 22 | Vip Rspo4 Rxfp1 Chat
 23 | Vip Lect1 Oxtr
 24 | Vip Rspo1 Itga4
 25 | Vip Chat Htr1f
 26 | Vip Pygm C1ql1
 27 | Vip Crispld2 Htr2c
 28 | Vip Crispld2 Kcne4
 29 | Vip Col15a1 Pde1a
 30 | Sst Chodl
 31 | Sst Mme Fam114a1
 32 | Sst Tac1 Htr1d
 33 | Sst Tac1 Tacr3
 34 | Sst Calb2 Necab1
 35 | Sst Calb2 Pdlim5
 36 | Sst Nr2f2 Necab1
 37 | Sst Myh8 Etv1 
 38 | Sst Myh8 Fibin
 39 | Sst Chrna2 Glra3
 40 | Sst Chrna2 Ptgdr
 41 | Sst Tac2 Myh4
 42 | Sst Hpse Sema3c
 43 | Sst Hpse Cbln4
 44 | Sst Crhr2 Efemp1
 45 | Sst Crh 4930553C11Rik 
 46 | Sst Esm1
 47 | Sst Tac2 Tacstd2
 48 | Sst Rxfp1 Eya1
 49 | Sst Rxfp1 Prdm8
 50 | Sst Nts
 51 | Pvalb Gabrg1
 52 | Pvalb Th Sst
 53 | Pvalb Akr1c18 Ntf3
 54 | Pvalb Calb1 Sst
 55 | Pvalb Sema3e Kank4
 56 | Pvalb Gpr149 Islr
 57 | Pvalb Reln Tac1
 58 | Pvalb Reln Itm2a
 59 | Pvalb Tpbg
 60 | Pvalb Vipr2
 61 | L2/3 IT VISp Rrad
 62 | L2/3 IT VISp Adamts2
 63 | L2/3 IT VISp Agmat
 64 | L2/3 IT ALM Sla
 65 | L2/3 IT ALM Ptrf
 66 | L2/3 IT ALM Macc1 Lrg1
 67 | L4 IT VISp Rspo1
 68 | L5 IT VISp Hsd11b1 Endou
 69 | L5 IT VISp Whrn Tox2
 70 | L5 IT VISp Batf3
 71 | L5 IT VISp Col6a1 Fezf2
 72 | L5 IT VISp Col27a1
 73 | L5 IT ALM Npw
 74 | L5 IT ALM Pld5
 75 | L5 IT ALM Cbln4 Fezf2
 76 | L5 IT ALM Lypd1 Gpr88
 77 | L5 IT ALM Tnc
 78 | L5 IT ALM Tmem163 Dmrtb1
 79 | L5 IT ALM Tmem163 Arhgap25
 80 | L5 IT ALM Cpa6 Gpr88
 81 | L5 IT ALM Gkn1 Pcdh19
 82 | L6 IT ALM Tgfb1
 83 | L6 IT ALM Oprk1
 84 | L6 IT VISp Penk Col27a1
 85 | L6 IT VISp Penk Fst
 86 | L6 IT VISp Col23a1 Adamts2
 87 | L6 IT VISp Col18a1
 88 | L6 IT VISp Car3
 89 | L5 PT VISp Chrna6
 90 | L5 PT VISp Lgr5
 91 | L5 PT VISp C1ql2 Ptgfr
 92 | L5 PT VISp C1ql2 Cdh13
 93 | L5 PT VISp Krt80
 94 | L5 PT ALM Slco2a1
 95 | L5 PT ALM Npsr1
 96 | L5 PT ALM Hpgd
 97 | L5 NP VISp Trhr Cpne7
 98 | L5 NP ALM Trhr Nefl
 99 | L5 NP VISp Trhr Met
100 | L6 NP ALM Trh
101 | L6 CT ALM Cpa6
102 | L6 CT ALM Nxph2 Sla
103 | L6 CT VISp Nxph2 Wls
104 | L6 CT VISp Gpr139
105 | L6 CT VISp Ctxn3 Brinp3
106 | L6 CT VISp Ctxn3 Sla
107 | L6 CT VISp Krt80 Sla
108 | L6b VISp Col8a1 Rprm
109 | L6b VISp Mup5
110 | L6b VISp Col8a1 Rxfp1
111 | L6b ALM Olfr111 Spon1
112 | L6b ALM Olfr111 Nxph1
113 | L6b P2ry12
114 | L6b VISp Crh
115 | L6b Hsd17b2
116 | Meis2 Adamts19
117 | CR Lhx5
118 | Astro Aqp4
119 | OPC Pdgfra Grm5
120 | OPC Pdgfra Ccnb1
121 | Oligo Rassf10
122 | Oligo Serpinb1a
123 | Oligo Synpr
124 | VLMC Osr1 Cd74
125 | VLMC Osr1 Mc5r
126 | VLMC Spp1 Hs3st6
127 | VLMC Spp1 Col15a1
128 | Peri Kcnj8
129 | SMC Acta2
130 | Endo Ctla2a
131 | Endo Cytl1
132 | PVM Mrc1
133 | Microglia Siglech


--------------------------------------------------------------------------------