├── .gitattributes
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── NucCellTypes.Rproj
├── R
    ├── Figure 2.Rmd
    ├── Figure 2.nb.html
    ├── Figure1.R
    ├── Figure2a_4ad_S4d.R
    ├── Figure3_4E.R
    ├── Figure4BF_S4ABC.Rmd
    ├── Figure4BF_S4ABC.nb.html
    ├── Figure5.R
    ├── Figure5DH_S5CD.nb.html
    ├── Figure5DH_S5CD.rmd
    ├── extraFunctions.R
    └── match_nuc_cells.R
├── README.md
├── data
    ├── 20170818_VISp_L5_cell
    │   ├── 20170818_VISp_L5_cell_iter_cl_data.rda
    │   ├── anno.feather
    │   ├── cl.cons.csv.gz
    │   ├── dend.RData
    │   └── prop.feather
    ├── 20170818_VISp_L5_cell_exon
    │   ├── 20170818_VISp_L5_cell_exon_iter_cl_data.rda
    │   ├── anno.feather
    │   └── cl.cons.csv.gz
    ├── 20170818_VISp_L5_cell_varE_clIE
    │   ├── anno.feather
    │   └── cl.cons.csv.gz
    ├── 20170818_VISp_L5_cell_varIE_clE
    │   ├── anno.feather
    │   └── cl.cons.csv.gz
    ├── 20170818_VISp_L5_nuc
    │   ├── 20170818_VISp_L5_nuc_iter_cl_data.rda
    │   ├── anno.feather
    │   ├── cl.cons.csv.gz
    │   ├── dend.RData
    │   └── prop.feather
    ├── 20170818_VISp_L5_nuc_exon
    │   ├── 20170818_VISp_L5_nuc_exon_iter_cl_data.rda
    │   ├── anno.feather
    │   └── cl.cons.csv.gz
    ├── 20170818_VISp_L5_nuc_varE_clIE
    │   ├── anno.feather
    │   └── cl.cons.csv.gz
    ├── 20170818_VISp_L5_nuc_varIE_clE
    │   ├── anno.feather
    │   └── cl.cons.csv.gz
    ├── Halpern2015_TableS2_Nuc_Cyto_gene_counts.csv
    ├── TableS6_Figure5_gene_info.csv
    ├── Tasic2016_cluster_name_conversion.csv
    ├── cell_ids.txt
    ├── expr_summary.rda
    ├── map_nuc_to_cells.rda
    ├── mouse_GRCm38_gene_len.csv
    ├── mouse_VISp_SMV1_1679
    │   ├── anno.feather
    │   └── prop.feather
    ├── nuc_ids.txt
    ├── nuc_soma_area_cre_lines.csv
    ├── nuc_soma_area_wt.csv
    ├── nuc_soma_probe_counts.csv
    └── start_data.rda
└── output
    └── .gitignore


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.rda filter=lfs diff=lfs merge=lfs -text
2 | *.RData filter=lfs diff=lfs merge=lfs -text
3 | *.gz filter=lfs diff=lfs merge=lfs -text
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Allen Institute Contribution Agreement
 2 | 
 3 | This document describes the terms under which you may make “Contributions” — 
 4 | which may include without limitation, software additions, revisions, bug fixes, configuration changes,
 5 | documentation, or any other materials — to any of the projects owned or managed by the Allen Institute.
 6 | If you have questions about these terms, please contact us at terms@alleninstitute.org.  
 7 | 
 8 | You certify that:
 9 | 
10 | •	Your Contributions are either:
11 | 
12 | 1.	Created in whole or in part by you and you have the right to submit them under the designated license 
13 | (described below); or
14 | 2.	Based upon previous work that, to the best of your knowledge, is covered under an appropriate 
15 | open source license and you have the right under that license to submit that work with modifications,
16 | whether created in whole or in part by you, under the designated license; or
17 | 
18 | 3.	Provided directly to you by some other person who certified (1) or (2) and you have not modified them.
19 | 
20 | •	You are granting your Contributions to the Allen Institute under the terms of the [2-Clause BSD license](https://opensource.org/licenses/BSD-2-Clause)
21 | (the “designated license”).
22 | 
23 | •	You understand and agree that the Allen Institute projects and your Contributions are public and that 
24 | a record of the Contributions (including all metadata and personal information you submit with them) is 
25 | maintained indefinitely and may be redistributed consistent with the Allen Institute’s mission and the 
26 | 2-Clause BSD license.
27 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Allen Institute Software License – This software license is the 2-clause BSD license 
 2 | plus a third clause that prohibits redistribution for commercial purposes without further permission.
 3 | 
 4 | Copyright © 2017. Allen Institute. All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
 7 | following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the 
10 | following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the 
13 | following disclaimer in the documentation and/or other materials provided with the distribution.
14 | 
15 | 3. Redistributions for commercial purposes are not permitted without the Allen Institute’s written permission.
16 | For purposes of this license, commercial purposes is the incorporation of the Allen Institute's software into
17 | anything for which you will charge fees or other compensation. Contact terms@alleninstitute.org for commercial
18 | licensing opportunities.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
21 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
26 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | 


--------------------------------------------------------------------------------
/NucCellTypes.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Yes
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 


--------------------------------------------------------------------------------
/R/Figure 2.Rmd:
--------------------------------------------------------------------------------
   1 | ---
   2 | title: "Nuclei vs Cells - Figure 2"
   3 | output: html_notebook
   4 | ---
   5 | 
   6 | 
   7 | ```{r init-workspace, message=FALSE, warnings=FALSE}
   8 | # Load libraries 
   9 | library(reshape2)
  10 | library(ggplot2)
  11 | library(ggrepel)
  12 | library(treemap)
  13 | library(limma)
  14 | library(RColorBrewer)
  15 | 
  16 | # Load functions 
  17 | titlecase <- function (x) {
  18 |   # TODO: use Rex package
  19 |   gsub("\\b([a-z])([a-z]+)", "\\U\\1\\L\\2", tolower(x), perl = TRUE)
  20 | }
  21 | 
  22 | ```
  23 | 
  24 | 
  25 | ##### Which low expressing genes are picked up by different methods?
  26 | ```{r load-data, message=FALSE}
  27 | # Exons + Introns
  28 | load("../data/20170818_VISp_L5_nuc/20170818_VISp_L5_nuc_iter_cl_data.rda", verbose = TRUE)
  29 | samp.dat.nuc <- samp.dat
  30 | nbt.data.nuc <- nbt.data
  31 | load("../data/20170818_VISp_L5_cell/20170818_VISp_L5_cell_iter_cl_data.rda", verbose = TRUE)
  32 | samp.dat.cell <- samp.dat
  33 | nbt.data.cell <- nbt.data
  34 | 
  35 | # Exons only
  36 | load("../data/20170818_VISp_L5_nuc_exon/20170818_VISp_L5_nuc_exon_iter_cl_data.rda", verbose = TRUE)
  37 | samp.dat.nuc.exon <- samp.dat
  38 | nbt.data.nuc.exon <- nbt.data
  39 | load("../data/20170818_VISp_L5_cell_exon/20170818_VISp_L5_cell_exon_iter_cl_data.rda", verbose = TRUE)
  40 | samp.dat.cell.exon <- samp.dat
  41 | nbt.data.cell.exon <- nbt.data
  42 | 
  43 | 
  44 | # Load correlation data
  45 | load("../data/map_nuc_to_cells.rda")
  46 | 
  47 | # Load gene lengths
  48 | gene.len <- read.csv(file = "../data/mouse_GRCm38_gene_len.csv")
  49 | gene.len$gene_orig <- gene.len$gene
  50 | gene.len$gene <- toupper(gene.len$gene)
  51 | 
  52 | ```
  53 | 
  54 | 
  55 | ```{r process-data}
  56 | samp.dat <- rbind(samp.dat.nuc, samp.dat.cell)
  57 | samp.dat$Samples <- as.factor(samp.dat$cell_prep_type)
  58 | 
  59 | nuc.cell.genes <- intersect(row.names(nbt.data.nuc), row.names(nbt.data.cell))
  60 | cpm.dat <- cbind(nbt.data.nuc[nuc.cell.genes, ], nbt.data.cell[nuc.cell.genes, ])
  61 | rownames(cpm.dat) <- toupper(rownames(cpm.dat))
  62 | 
  63 | ```
  64 | 
  65 | ## Cell vs. nuclear gene detection
  66 | ```{r Figure_2B-single-cell-detection, message=FALSE, warning=FALSE}
  67 | # Single Rbp4+ cell gene expression detection
  68 | keep.samp <- 1:nrow(samp.dat)
  69 | samp.subset <- droplevels(samp.dat[keep.samp, ])
  70 | cpm.subset <- droplevels(cpm.dat[, keep.samp])
  71 | 
  72 | # Single cell gene detection
  73 | cpm.subset.mean <- t(apply(cpm.subset, 1, 
  74 |                             function(x) tapply(x, samp.subset$Samples, mean)))
  75 | 
  76 | genes <- apply(cpm.subset.mean, 2, function(x) rownames(cpm.subset.mean)[x > 1])
  77 | cell.genes <- setdiff(genes$Cells, genes$Nuclei)
  78 | nuc.genes <- setdiff(genes$Nuclei, genes$Cells)
  79 | 
  80 | write.csv(cell.genes, file="../output/cell.genes_gt1.csv", row.names=FALSE)
  81 | write.csv(nuc.genes, file="../output/nuc.genes_gt1.csv", row.names=FALSE)
  82 | 
  83 | write.csv(genes$Nuclei, file="../output/nuc.all.genes_gt1.csv", row.names=FALSE)
  84 | write.csv(genes$Cells, file="../output/cell.all.genes_gt1.csv", row.names=FALSE)
  85 | 
  86 | 
  87 | # Plot expression correlation within methods (single cell)
  88 | cor.all <- data.frame()
  89 | par(mfrow=c(2, 2))
  90 | for (i in 1:nlevels(samp.subset$Samples)) {
  91 |   method1 <- levels(samp.subset$Samples)[i]
  92 |   col.subset <- which(samp.subset$Samples == method1)
  93 |   corm <- cor(cpm.subset[, col.subset])
  94 |   cor.all <- rbind(cor.all, data.frame(Samples = method1, Cor_method = "Unweighted",
  95 |                                        Correlation = corm[upper.tri(corm)]))
  96 |   
  97 |   # Plot pair with max cor
  98 |   diag(corm) <- 0
  99 |   samp1 <- row(corm)[which.max(corm)]
 100 |   samp2 <- col(corm)[which.max(corm)]
 101 |   cor1 <- cor(cpm.subset[, col.subset[samp1]], cpm.subset[, col.subset[samp2]])
 102 |   samp.pair <- data.frame(samp1 = cpm.subset[, col.subset[samp1]], 
 103 |                           samp2 = cpm.subset[, col.subset[samp2]])
 104 |   g1 <- ggplot(samp.pair, aes(x = samp1, y = samp2)) +
 105 |     geom_abline(intercept = 0, slope = 1) +
 106 |     geom_point(shape = 1) +
 107 |     xlim(c(0, 10)) +
 108 |     ylim(c(0, 10)) +
 109 |     xlab(paste(method1, 1)) +
 110 |     ylab(paste(method1, 2)) +
 111 |     ggtitle(paste("r =", round(cor1, 2))) +
 112 |     theme_bw()
 113 |   plot(g1)
 114 |   ggsave(g1, width = 3, height = 3, dpi = 600,
 115 |          filename = paste0("../output/", method1, "_pair.png"))
 116 | }
 117 | 
 118 | # Add weighted pairwise cor (previously computed)
 119 | cor.all <- rbind(cor.all,
 120 |                  data.frame(Samples = "Cells", Cor_method = "Weighted",
 121 |                             Correlation = cor.cell[upper.tri(cor.cell)]),
 122 |                  data.frame(Samples = "Nuclei", Cor_method = "Weighted",
 123 |                             Correlation = cor.nuc.ordered[upper.tri(cor.nuc.ordered)]))
 124 | 
 125 | cor.all$Samples <- factor(cor.all$Samples, levels = c("Nuclei", "Cells"))
 126 | g0 <- ggplot(cor.all, aes(x = Correlation, color = Samples, linetype = Cor_method)) + 
 127 |   geom_density(size = 1) + 
 128 |   geom_hline(yintercept = 0, color = "grey80", size = 1) +
 129 |   scale_color_brewer(palette = "Set1") +
 130 |   ggtitle("Pairwise correlation between samples") +
 131 |   theme_bw()
 132 | plot(g0)
 133 | ggsave(g0, width = 4, height = 3, 
 134 |        filename = "../output/nuc_vs_cell_pw_cor.pdf")
 135 | 
 136 | 
 137 | # Count # cells expressing each gene
 138 | for (split.v4 in c(FALSE, TRUE, FALSE)) {  # 
 139 |   if (split.v4 == TRUE) {
 140 |     samp.subset$Samples <- as.character(samp.subset$Samples)
 141 |     cell.ids <- which(samp.subset$Samples == "Cells")
 142 |     replace.names <- sample(cell.ids, round(length(cell.ids) / 2), replace = FALSE)
 143 |     samp.subset$Samples[replace.names] <- "Cellsb"
 144 |     samp.subset$Samples <- as.factor(samp.subset$Samples)
 145 |   } else {
 146 |     samp.subset <- droplevels(samp.dat[keep.samp, ])
 147 |   }
 148 |   
 149 |   gene.cnt <- as.data.frame(matrix(NA, nrow(cpm.subset), 
 150 |                                    nlevels(samp.subset$Samples)))
 151 |   rownames(gene.cnt) <- rownames(cpm.subset)
 152 |   colnames(gene.cnt) <- levels(samp.subset$Samples)
 153 |   gene.expr <- as.data.frame(matrix(NA, nrow(cpm.subset), 
 154 |                                     nlevels(samp.subset$Samples)))
 155 |   rownames(gene.expr) <- rownames(cpm.subset)
 156 |   colnames(gene.expr) <- levels(samp.subset$Samples)
 157 |   for (i in 1:nlevels(samp.subset$Samples)) {
 158 |     method1 <- levels(samp.subset$Samples)[i]
 159 |     col.subset <- which(samp.subset$Samples == method1)
 160 |     cnt1 <- apply(cpm.subset[, col.subset], 1, function(x) sum(x > 0))
 161 |     gene.cnt[, i] <- cnt1
 162 |     expr1 <- apply(cpm.subset[, col.subset], 1, function(x) mean(x[x > 0]))
 163 |     expr1[is.na(expr1)] <- 0
 164 |     gene.expr[, i] <- expr1
 165 |   }
 166 |   
 167 |   # Plot proportion of cells where gene is detected
 168 |   expr.mean <- apply(gene.expr, 1, mean)
 169 |   # pairs(gene.cnt, cex = expr.mean)
 170 |   
 171 |   # plot(gene.cnt$Nuclei, gene.cnt$Cells, type = "n")
 172 |   # text(gene.cnt$Nuclei, gene.cnt$Cells,
 173 |   #      rownames(gene.cnt), cex = 1)
 174 |   
 175 |   gene.prop <- sweep(gene.cnt, 2, apply(gene.cnt, 2, max), "/")
 176 |   # write.csv(gene.prop, file = "../output/gene.prop.csv")
 177 |   # keep.genes <- order(apply(gene.prop, 1, sd), decreasing = TRUE)[1:1000]
 178 |   keep.genes <- abs(gene.prop$Cells - gene.prop$Nuclei) > 0.4
 179 |   # pheatmap(gene.prop[keep.genes, ], fontsize_row = 2, fontsize_col = 36, 
 180 |   # cutree_rows = 3, clustering_distance_rows = "euclidean")
 181 |   
 182 |   # Plot expression vs. consistency
 183 |   gene.expr.prop <- data.frame(gene = rownames(gene.expr),
 184 |                                Cell_expr = gene.expr$Cells, 
 185 |                                Nuc_expr = gene.expr$Nuclei, 
 186 |                                Nucleus_prop = gene.prop$Nuclei,
 187 |                                Cell_prop = gene.prop$Cells,
 188 |                                Cell_Nuc_diff = gene.prop$Cells - gene.prop$Nuclei)
 189 |   rownames(gene.expr.prop) <- rownames(gene.expr)
 190 |   
 191 |   if (split.v4 == TRUE) {
 192 |     cut1 <- cut(gene.expr.prop$Cell_expr, c(0, seq(0.01, 12, by=0.2)), include.lowest = TRUE)
 193 |     prop.diff <- abs(gene.prop$Cells - gene.prop$Cellsb)
 194 |     prop.exp <- data.frame(Cell_expr = c(0, seq(0.11, 11.9, 0.2)), 
 195 |                            exprq = tapply(prop.diff, cut1, quantile, 0.975))
 196 |     cut2 <- cut(gene.prop$Cells, seq(0, 1, by=0.02), include.lowest = TRUE)
 197 |     prop.exp2 <- data.frame(Cell_expr = seq(0.01, 0.99, 0.02), 
 198 |                            exprq = tapply(gene.prop$Cellsb, cut2, quantile, 0.975))
 199 |   }
 200 | }
 201 | 
 202 | # Save list of genes
 203 | out.fn1 <- "../output/Cell_vs_nuc_gene_detection.csv"
 204 | if (! file.exists(out.fn1)) {
 205 |   write.csv(gene.expr.prop, file = out.fn1, row.names = FALSE)
 206 | }
 207 | ```
 208 | 
 209 | 
 210 | 
 211 | ```{r dex-genes, message = FALSE, warning = FALSE}
 212 | # DEX genes
 213 | design <- model.matrix(~ 0 + samp.subset$Samples)
 214 | colnames(design) <- gsub("samp.subset$Samples", "", colnames(design), fixed = TRUE)
 215 | # design <- design[sample(1:nrow(design), nrow(design), replace = FALSE), ]  # Perm control
 216 | 
 217 | fit <- lmFit(cpm.subset, design)
 218 | cont.matrix <- makeContrasts(contrasts="Cells-Nuclei", levels = design)
 219 | 
 220 | fit2 <- eBayes(contrasts.fit(fit, cont.matrix))
 221 | dex.ie <- topTable(fit2, number = Inf, p.value = 1, adjust = "BH", sort.by = "none")
 222 | dex.ie$gene <- row.names(dex.ie)
 223 | # dex.ie <- subset(dex.ie, AveExpr > 0)
 224 | 
 225 | out.fn2 <- "../output/Cell_vs_nuc_dex_genes_introns_exons.csv"
 226 | if (! file.exists(out.fn2)) {
 227 |   write.csv(dex.ie, file = out.fn2, row.names = FALSE)
 228 | }
 229 | ```
 230 | 
 231 | 
 232 | 
 233 | ## Cell differential detection vs. DEX
 234 | ```{r Figure_2C-gene-prop-vs-dex, message = FALSE, warning = FALSE, fig.width = 5, fig.height = 4}
 235 | genes.expr <- apply(gene.expr.prop, 1, function(x) ! all(x == 0))
 236 | gene.prop.dex <- merge(gene.expr.prop[genes.expr, ], dex.ie, by = "gene")
 237 | colnames(gene.prop.dex) <- sub("Row.names", "gene", colnames(gene.prop.dex),
 238 |                                fixed = TRUE)
 239 | gene.annot <- subset(gene.prop.dex, gene %in% 
 240 |                        c("FOS", "ARC", "EGR1", "RBFOX3", "RBP4"))
 241 | 
 242 | pal.spectral <- colorRampPalette(rev(brewer.pal(11,'Spectral')))(100)
 243 | 
 244 | # Prop nuc vs. prop Cells
 245 | g5 <- ggplot(gene.prop.dex, aes(x = Nucleus_prop, y = Cell_prop)) +
 246 |   geom_hline(yintercept = 0) +
 247 |   stat_bin_hex(bins = 50) +
 248 |   scale_fill_gradientn(colours=pal.spectral, trans="log10", name="No. of genes") +
 249 |   xlab("Nuclei gene detection") + 
 250 |   ylab("Cells gene detection") + 
 251 |   geom_abline(intercept = 0, slope = 1, color = "grey") +
 252 |   geom_smooth(data = prop.exp2, aes(Cell_expr, exprq), 
 253 |               color = "grey", size = 1, se = FALSE) +
 254 |   geom_smooth(data = prop.exp2, aes(exprq, Cell_expr), 
 255 |               color = "grey", size = 1, se = FALSE) +
 256 |   theme_bw() + 
 257 |   theme(panel.grid.minor = element_blank(),
 258 |         axis.title = element_text(size=12))
 259 | plot(g5)
 260 | ggsave(g5, width = 4.25, height = 3, 
 261 |        filename = "../output/nuc_vs_cell_prop_scatter.pdf")
 262 | 
 263 | 
 264 | # DEX vs. sig
 265 | g2 <- ggplot(gene.prop.dex, aes(x = logFC, y = -log10(adj.P.Val))) +
 266 |   stat_bin_hex(bins = 50) +
 267 |   geom_hline(yintercept = -log10(0.05), color = "grey", size = 0.5) +
 268 |   geom_vline(xintercept = c(-log2(1.5), log2(1.5)), color = "grey", size = 0.5) +
 269 |   xlim(c(-6, 6)) +
 270 |   scale_fill_gradientn(colours=pal.spectral, trans="log10", name="No. of genes") +
 271 |   xlab("Cells vs. Nuclei (log2 fold change)") +
 272 |   ylab("Significance (-log10 P-value)") +
 273 |   theme_bw() +
 274 |   theme(panel.grid.major = element_blank(),
 275 |         panel.grid.minor = element_blank())
 276 | plot(g2)
 277 | ggsave(g2, width = 4.25, height = 3, 
 278 |        filename = "../output/nuc_vs_cell_dex_volcano.pdf")
 279 | 
 280 | 
 281 | # Prop vs. expr
 282 | g4 <- ggplot(gene.prop.dex, aes(Cell_expr, Cell_Nuc_diff)) +
 283 |   geom_hline(yintercept = 0) +
 284 |   stat_bin_hex(bins = 50) +
 285 |   scale_fill_gradientn(colours=pal.spectral, trans="log10", name="# Genes") +
 286 |   geom_smooth(data = prop.exp, aes(Cell_expr, exprq), 
 287 |               color = "grey", size = 2, se = FALSE) +
 288 |   geom_smooth(data = prop.exp, aes(Cell_expr, -exprq), 
 289 |               color = "grey", size = 2, se = FALSE) +
 290 |   # geom_text(data = gene.annot, aes(x = logFC, y = Cell_Nuc_diff, label = gene),
 291 |   #           fontface = 3, size = 3) +
 292 |   xlab("Cell mean expression (cells with RPKM > 0)") + 
 293 |   ylab("Difference in proportion of cells expressing gene\n(Cell - Nucleus)") + 
 294 |   ylim(c(-1, 1)) +
 295 |   theme_bw() + 
 296 |   theme(axis.title = element_text(size=12))
 297 | # plot(g4)
 298 | 
 299 | 
 300 | # DEX vs. Prop
 301 | g3 <- ggplot(gene.prop.dex, aes(x = logFC, y = Cell_Nuc_diff)) +
 302 |   geom_hline(yintercept = 0) +
 303 |   geom_vline(xintercept = 0) +
 304 |   stat_bin_hex(bins = 50) +
 305 |   scale_fill_gradientn(colours=pal.spectral, trans="log10") +
 306 |   geom_text(data = gene.annot, aes(x = logFC, y = Cell_Nuc_diff, label = gene), 
 307 |             fontface = 3, size = 3) +
 308 |   xlab("Cell vs. Nucleus expression (log2 fold change)") +
 309 |   ylab("Difference in proportion of cells expressing gene\n(Cell - Nucleus)") + 
 310 |   theme_bw()
 311 | # plot(g3)
 312 | 
 313 | # DEX vs. expr
 314 | g1 <- ggplot(gene.prop.dex, aes(x = Cell_expr, y = logFC)) +
 315 |   geom_hline(yintercept = 0) +
 316 |   stat_bin_hex(bins = 50) +
 317 |   scale_fill_gradientn(colours=pal.spectral, trans="log10") +
 318 |   xlab("Cell mean expression (cells with RPKM > 0)") +
 319 |   ylab("Cell vs. Nucleus expression (log2 fold change)") +
 320 |   theme_bw()
 321 | # plot(g1)
 322 | 
 323 | ```
 324 | 
 325 | 
 326 | 
 327 | ```{r Figure_2D-gene-distrib-cellvsnuc, fig.width = 4.25, fig.height = 4}
 328 | nuc.enriched.genes <- as.character(gene.prop.dex$gene[order(gene.prop.dex$logFC,
 329 |                                                            decreasing = FALSE)])
 330 | cell.enriched.genes <- as.character(gene.prop.dex$gene[order(gene.prop.dex$logFC,
 331 |                                                            decreasing = TRUE)])
 332 | 
 333 | plot.genes <- list()
 334 | plot.genes[["nuc_vs_wc"]] <- c("CACNA1C", "KCND2","KCNH7", # ion ch  (CACNA1D)
 335 |                                "GRIK2", "GRIK3", "GRM8",  # glut signal
 336 |                                "CNTNAP2","ROBO2","SEMA6D")  # axon guidance (CNTN4)
 337 | plot.genes[["wc_vs_nuc"]] <- c(cell.enriched.genes[grep("^PSM", cell.enriched.genes)][1],  # proteasome
 338 |                                cell.enriched.genes[grep("^RPL", cell.enriched.genes)][1:2],  # ribosome
 339 |                                c("POLR2J", "SNRPD1", "UBL5"),  # RNA processing
 340 |                                c("FOS", "EGR1", "ARC"))  # IEG
 341 | # plot.genes[["K_ch"]] <- nuc.enriched.genes[grep("^KCN", nuc.enriched.genes)[1:9]]
 342 | # plot.genes[["Ca_ch"]] <- nuc.enriched.genes[grep("^CACNA", nuc.enriched.genes)[1:9]]
 343 | plot.genes <- lapply(plot.genes, titlecase)
 344 | 
 345 | all.plot.genes <- unique(unlist(plot.genes))
 346 | rpkmw <- cpm.subset[toupper(all.plot.genes), ]
 347 | rpkmw$gene <- titlecase(rownames(rpkmw))
 348 | rpkmw$gene <- factor(rpkmw$gene, levels = all.plot.genes)
 349 | rpkml <- melt(rpkmw, id.vars = "gene")
 350 | rpkml$cellnuc <- rep(samp.subset$Samples, each = length(rpkmw$gene))
 351 | # rpkml$cellnuc <- factor(rpkml$cellnuc, levels = c("Nuclei", "Cells"))
 352 | 
 353 | expr.violins <- list()
 354 | for (gene.list in names(plot.genes)) {
 355 |   g1 <- ggplot(subset(rpkml, gene %in% plot.genes[[gene.list]]), 
 356 |                aes(x = cellnuc, y = value, fill = cellnuc)) + 
 357 |     facet_wrap(~ gene, nrow = 3, dir = "v") +
 358 |     geom_violin(show.legend = FALSE) +
 359 |     scale_fill_manual(values = brewer.pal(3, "Set1")[2:1]) +
 360 |     xlab("") +
 361 |     ylab("Expression (log2 CPM + 1)") +
 362 |     ggtitle(gene.list) +
 363 |     theme_bw() +
 364 |     theme(panel.grid.major = element_blank(),
 365 |           panel.grid.minor = element_blank(),
 366 |           axis.text.x=element_blank(),
 367 |           axis.ticks.x=element_blank(),
 368 |           strip.text = element_text(face = "italic"))
 369 |   plot(g1)
 370 |   expr.violins[[gene.list]] <- g1
 371 | }
 372 | 
 373 | 
 374 | ggsave(expr.violins[["nuc_vs_wc"]], width = 3.25, height = 4,
 375 |        filename = paste0("../output/nuc_vs_wc_violin.pdf"))
 376 | 
 377 | ggsave(expr.violins[["wc_vs_nuc"]], width = 3.25, height = 4,
 378 |        filename = paste0("../output/wc_vs_nuc_violin.pdf"))
 379 | 
 380 | 
 381 | 
 382 | ```
 383 | 
 384 | 
 385 | ```{r gene-length-vs-nuc-enrichment, fig.width = 5, fig.height = 3, warning=FALSE}
 386 | gene.prop.dex2 <- merge(gene.prop.dex, gene.len, by = "gene")
 387 | gene.prop.dex2$logFC_bin <- cut(gene.prop.dex2$logFC, breaks = seq(-2, 5.5, 0.5))
 388 | gene.prop.dex2 <- gene.prop.dex2[order(gene.prop.dex2$AveExpr), ]
 389 | 
 390 | nuc.short.genes <- subset(gene.prop.dex2, logFC < -log2(1.5) & gene_len < 1e5 & AveExpr > 10)
 391 | cell.long.genes <- subset(gene.prop.dex2, logFC > 0.5 & AveExpr > 7 &
 392 |                             gene_len > 1e6)
 393 | 
 394 | # write.csv(nuc.short.genes$gene, file = "../genes.csv")
 395 | 
 396 | g1 <- ggplot(gene.prop.dex2, aes(x = logFC, y = gene_len, color = AveExpr)) +
 397 |   # geom_vline(xintercept = 0, size = 1, color = "grey") +
 398 |   geom_point() +
 399 |   # geom_text_repel(data=nuc.short.genes, aes(label = gene), color = "black") +
 400 |   # geom_vline(xintercept = c(-log2(1.5), log2(1.5)), size = 1, color = "grey") +
 401 |   # geom_text(data=cell.long.genes, aes(label = gene), 
 402 |   #           color = "black", hjust = 0, nudge_x = 0.1) +
 403 |   xlab("Nuclei vs. Cells (log2 fold change)") +
 404 |   ylab("Genomic length (base pairs)") +
 405 |   scale_color_gradientn(colors = grey.colors(10, 0.9, 0, gamma = 1)) +
 406 |   theme_bw()
 407 | plot(g1)
 408 | ggsave(g1, width = 5, height = 3, dpi = 600,
 409 |          filename = paste0("../output/gene_length_vs_nuc_enrichment.png"))
 410 | 
 411 | 
 412 | ```
 413 | 
 414 | 
 415 | ```{r exon-only-dex-genes}
 416 | nuc.cell.exon.genes <- intersect(row.names(nbt.data.nuc.exon), 
 417 |                                  row.names(nbt.data.cell.exon))
 418 | cpm.dat.exon <- cbind(nbt.data.nuc.exon[nuc.cell.exon.genes, ], 
 419 |                       nbt.data.cell.exon[nuc.cell.exon.genes, ])
 420 | rownames(cpm.dat.exon) <- toupper(rownames(cpm.dat.exon))
 421 | 
 422 | 
 423 | # DEX genes
 424 | design <- model.matrix(~ 0 + samp.subset$Samples)
 425 | colnames(design) <- gsub("samp.subset$Samples", "", colnames(design), fixed = TRUE)
 426 | 
 427 | fit <- lmFit(cpm.dat.exon, design)
 428 | cont.matrix <- makeContrasts(contrasts="Cells-Nuclei", levels = design)
 429 | 
 430 | fit2 <- eBayes(contrasts.fit(fit, cont.matrix))
 431 | dex.exons <- topTable(fit2, number = Inf, p.value = 1, adjust = "BH", sort.by = "none")
 432 | # dex.exons <- subset(dex.exons, AveExpr > 0)
 433 | dex.exons$gene <- row.names(dex.exons)
 434 | 
 435 | 
 436 | out.fn3 <- "../output/Cell_vs_nuc_dex_genes_exons_only.csv"
 437 | if (! file.exists(out.fn3)) {
 438 |   write.csv(dex.exons, file = out.fn3, row.names = FALSE)
 439 | }
 440 | 
 441 | 
 442 | ```
 443 | 
 444 | 
 445 | ```{r Figure_S2B-compare-dex-byreads, fig.width = 8, fig.height=4}
 446 | gene.prop.dex3 <- merge(gene.prop.dex2, dex.exons, by = "gene", 
 447 |                         suffixes = c("_ie", "_ex"))
 448 | 
 449 | gdex <- ggplot(gene.prop.dex3, aes(x = logFC_ie, y = logFC_ex)) +
 450 |   geom_abline(intercept = 0, slope = 1) +
 451 |   # geom_text_repel() +
 452 |     # geom_hline(yintercept = c(-log2(1.5), log2(1.5))) +
 453 |   # geom_vline(xintercept = c(-log2(1.5), log2(1.5))) +
 454 |   geom_point() +
 455 |   theme_bw()
 456 | plot(gdex)
 457 | 
 458 | subset(gene.prop.dex3, logFC_ie > log2(1.5) & adj.P.Val_ex > 0.05)
 459 | 
 460 | subset(gene.prop.dex3, logFC_ie < -log2(1.5) & adj.P.Val_ex > 0.05)
 461 | subset(gene.prop.dex3, logFC_ex < -log2(1.5) & adj.P.Val_ie > 0.05)
 462 | 
 463 | subset(gene.prop.dex3, logFC_ie < -log2(1.5) & logFC_ex < -log2(1.5) &
 464 |          adj.P.Val_ie < 0.05 & adj.P.Val_ex < 0.05)
 465 | 
 466 | subset(gene.prop.dex3, (logFC_ie < -log2(1.5) & adj.P.Val_ie < 0.05) &
 467 |                         (logFC_ex < -log2(1.5) & adj.P.Val_ex < 0.05))
 468 | 
 469 | 
 470 | 
 471 | 
 472 | pdf(file = paste0("../output/gene_length_ecdf.pdf"),
 473 |     width = 8, height = 4)
 474 | 
 475 | par(mfrow = c(1, 2))
 476 | plot.ecdf(gene.prop.dex3$gene_len[gene.prop.dex3$AveExpr_ie > 0],
 477 |           xlim = c(1e1, 1e7), log = "x", las = 1,
 478 |           xlab = "Length (base pairs)", ylab = "Cumulative proportion",
 479 |           main = "Genomic length")
 480 | lines(ecdf(gene.prop.dex3$gene_len[gene.prop.dex3$logFC_ie > log2(1.5)]), 
 481 |           col = "blue", verticals=T, do.points=F,col.01line = NULL)
 482 | lines(ecdf(gene.prop.dex3$gene_len[gene.prop.dex3$logFC_ie < -log2(1.5)]), 
 483 |       col = "red", verticals=T, do.points=F,col.01line = NULL)
 484 | lines(ecdf(gene.prop.dex3$gene_len[gene.prop.dex3$logFC_ex > log2(1.5)]), 
 485 |       col = "light blue", verticals=T, do.points=F,col.01line = NULL)
 486 | lines(ecdf(gene.prop.dex3$gene_len[gene.prop.dex3$logFC_ex < -log2(1.5)]), 
 487 |       col = "pink", verticals=T, do.points=F,col.01line = NULL)
 488 | 
 489 | 
 490 | plot.ecdf(gene.prop.dex3$transcript_len[gene.prop.dex3$AveExpr_ie > 0], 
 491 |           xlim = c(1e1, 1e7), log = "x", las = 1,
 492 |           xlab = "Length (base pairs)", ylab = "Cumulative proportion",
 493 |           main = "Transcript length")
 494 | lines(ecdf(gene.prop.dex3$transcript_len[gene.prop.dex3$logFC_ie > log2(1.5)]), 
 495 |           col = "blue", verticals=T, do.points=F,col.01line = NULL)
 496 | lines(ecdf(gene.prop.dex3$transcript_len[gene.prop.dex3$logFC_ie < -log2(1.5)]), 
 497 |       col = "red", verticals=T, do.points=F,col.01line = NULL)
 498 | lines(ecdf(gene.prop.dex3$transcript_len[gene.prop.dex3$logFC_ex > log2(1.5)]), 
 499 |       col = "light blue", verticals=T, do.points=F,col.01line = NULL)
 500 | lines(ecdf(gene.prop.dex3$transcript_len[gene.prop.dex3$logFC_ex < -log2(1.5)]), 
 501 |       col = "pink", verticals=T, do.points=F,col.01line = NULL)
 502 | 
 503 | legend("bottomright", bty = "n", cex = 0.5, 
 504 |        fill = c("red", "pink", "blue", "light blue", "black"),
 505 |        legend = c("Nuclei (introns + exons)",
 506 |                   "Nuclei (exons)",
 507 |                   "Cells (introns + exons)",
 508 |                   "Cells (exons)",
 509 |                   "All expressed genes"))
 510 | 
 511 | dev.off()
 512 | 
 513 | 
 514 | 
 515 | ```
 516 | 
 517 | 
 518 | ```{r Figure_S2A-go-treemaps}
 519 | # A treemap R script produced by the REVIGO server at http://revigo.irb.hr/
 520 | # If you found REVIGO useful in your work, please cite the following reference:
 521 | # Supek F et al. "REVIGO summarizes and visualizes long lists of Gene Ontology
 522 | # terms" PLoS ONE 2011. doi:10.1371/journal.pone.0021800
 523 | 
 524 | # author: Anton Kratz <anton.kratz@gmail.com>, RIKEN Omics Science Center, Functional Genomics Technology Team, Japan
 525 | # created: Fri, Nov 02, 2012  7:25:52 PM
 526 | # last change: Fri, Nov 09, 2012  3:20:01 PM
 527 | 
 528 | revigo.data.list <- list()
 529 | 
 530 | #### Cell > 4*Nuc (introns + exons) ####
 531 | revigo.data.list[["CC_cell_ie"]] <- rbind(c("GO:0005925","focal adhesion",0.109,12.7773,0.968,0.000,"focal adhesion"),
 532 | c("GO:0022626","cytosolic ribosome",0.187,38.2480,0.522,0.000,"cytosolic ribosome"),
 533 | c("GO:0005694","chromosome",1.505,5.0625,0.646,0.633,"cytosolic ribosome"),
 534 | c("GO:0016272","prefoldin complex",0.056,3.5031,0.714,0.361,"cytosolic ribosome"),
 535 | c("GO:0005697","telomerase holoenzyme complex",0.025,1.9830,0.534,0.505,"cytosolic ribosome"),
 536 | c("GO:0090575","RNA polymerase II transcription factor complex",0.155,2.1349,0.470,0.585,"cytosolic ribosome"),
 537 | c("GO:0008023","transcription elongation factor complex",0.080,1.6253,0.473,0.662,"cytosolic ribosome"),
 538 | c("GO:0033290","eukaryotic 48S preinitiation complex",0.089,9.5331,0.593,0.419,"cytosolic ribosome"),
 539 | c("GO:0016281","eukaryotic translation initiation factor 4F complex",0.002,1.7852,0.664,0.296,"cytosolic ribosome"),
 540 | c("GO:0089701","U2AF",0.017,1.7375,0.534,0.491,"cytosolic ribosome"),
 541 | c("GO:0005730","nucleolus",0.664,33.6144,0.507,0.413,"cytosolic ribosome"),
 542 | c("GO:0005732","small nucleolar ribonucleoprotein complex",0.072,5.0625,0.658,0.412,"cytosolic ribosome"),
 543 | c("GO:0036513","Derlin-1 retrotranslocation complex",0.005,2.9508,0.546,0.656,"cytosolic ribosome"),
 544 | c("GO:0005635","nuclear envelope",0.283,2.4157,0.494,0.617,"cytosolic ribosome"),
 545 | c("GO:1990131","Gtr1-Gtr2 GTPase complex",0.001,1.4283,0.773,0.186,"cytosolic ribosome"),
 546 | c("GO:0005681","spliceosomal complex",0.250,18.6990,0.468,0.610,"cytosolic ribosome"),
 547 | c("GO:0000803","sex chromosome",0.007,1.4789,0.730,0.408,"cytosolic ribosome"),
 548 | c("GO:0097346","INO80-type complex",0.056,2.9393,0.497,0.538,"cytosolic ribosome"),
 549 | c("GO:0036019","endolysosome",0.001,2.3036,0.612,0.612,"cytosolic ribosome"),
 550 | c("GO:0036020","endolysosome membrane",0.000,1.8794,0.578,0.643,"cytosolic ribosome"),
 551 | c("GO:0000805","X chromosome",0.002,3.0137,0.745,0.273,"cytosolic ribosome"),
 552 | c("GO:0005834","heterotrimeric G-protein complex",0.046,1.8125,0.589,0.530,"cytosolic ribosome"),
 553 | c("GO:0005840","ribosome",4.198,22.7520,0.425,0.589,"cytosolic ribosome"),
 554 | c("GO:0005844","polysome",0.037,3.0013,0.672,0.392,"cytosolic ribosome"),
 555 | c("GO:0000407","pre-autophagosomal structure",0.033,3.1612,0.752,0.253,"cytosolic ribosome"),
 556 | c("GO:0036464","cytoplasmic ribonucleoprotein granule",0.127,6.3686,0.544,0.430,"cytosolic ribosome"),
 557 | c("GO:0000974","Prp19 complex",0.017,2.1891,0.733,0.333,"cytosolic ribosome"),
 558 | c("GO:0030904","retromer complex",0.032,3.9431,0.592,0.516,"cytosolic ribosome"),
 559 | c("GO:1903293","phosphatase complex",0.073,6.3969,0.689,0.545,"cytosolic ribosome"),
 560 | c("GO:0030906","retromer, cargo-selective complex",0.002,2.6021,0.641,0.434,"cytosolic ribosome"),
 561 | c("GO:0097422","tubular endosome",0.002,2.0969,0.582,0.580,"cytosolic ribosome"),
 562 | c("GO:0008180","COP9 signalosome",0.031,2.7423,0.519,0.513,"cytosolic ribosome"),
 563 | c("GO:0005850","eukaryotic translation initiation factor 2 complex",0.006,1.4283,0.649,0.311,"cytosolic ribosome"),
 564 | c("GO:0005851","eukaryotic translation initiation factor 2B complex",0.009,1.7375,0.642,0.319,"cytosolic ribosome"),
 565 | c("GO:0099568","cytoplasmic region",0.265,2.2104,0.717,0.307,"cytosolic ribosome"),
 566 | c("GO:0005852","eukaryotic translation initiation factor 3 complex",0.117,8.7932,0.586,0.277,"cytosolic ribosome"),
 567 | c("GO:0005853","eukaryotic translation elongation factor 1 complex",0.012,2.6021,0.636,0.325,"cytosolic ribosome"),
 568 | c("GO:0043596","nuclear replication fork",0.061,1.5421,0.557,0.695,"cytosolic ribosome"),
 569 | c("GO:0031461","cullin-RING ubiquitin ligase complex",0.159,6.7212,0.578,0.695,"cytosolic ribosome"),
 570 | c("GO:0015934","large ribosomal subunit",0.559,17.8447,0.468,0.666,"cytosolic ribosome"),
 571 | c("GO:0005768","endosome",0.319,12.2000,0.507,0.312,"cytosolic ribosome"),
 572 | c("GO:0005774","vacuolar membrane",0.290,15.4597,0.508,0.298,"cytosolic ribosome"),
 573 | c("GO:0005773","vacuole",0.455,11.6819,0.627,0.324,"cytosolic ribosome"),
 574 | c("GO:0070822","Sin3-type complex",0.010,1.6778,0.496,0.519,"cytosolic ribosome"),
 575 | c("GO:0031984","organelle subcompartment",0.269,2.4724,0.618,0.374,"cytosolic ribosome"),
 576 | c("GO:0048500","signal recognition particle",0.100,2.8013,0.594,0.422,"cytosolic ribosome"),
 577 | c("GO:0005785","signal recognition particle receptor complex",0.011,1.4283,0.509,0.690,"cytosolic ribosome"),
 578 | c("GO:0005786","signal recognition particle, endoplasmic reticulum targeting",0.034,2.8013,0.619,0.390,"cytosolic ribosome"),
 579 | c("GO:0048471","perinuclear region of cytoplasm",0.135,5.6799,0.730,0.280,"cytosolic ribosome"),
 580 | c("GO:0005793","endoplasmic reticulum-Golgi intermediate compartment",0.026,4.6459,0.688,0.249,"cytosolic ribosome"),
 581 | c("GO:0005789","endoplasmic reticulum membrane",0.761,8.9547,0.486,0.653,"cytosolic ribosome"),
 582 | c("GO:0048770","pigment granule",0.016,4.2907,0.613,0.674,"cytosolic ribosome"),
 583 | c("GO:0001650","fibrillar center",0.035,1.7852,0.581,0.690,"cytosolic ribosome"),
 584 | c("GO:0070993","translation preinitiation complex",0.093,8.7932,0.596,0.420,"cytosolic ribosome"),
 585 | c("GO:0097525","spliceosomal snRNP complex",0.128,9.9914,0.472,0.575,"cytosolic ribosome"),
 586 | c("GO:0034708","methyltransferase complex",0.080,6.6383,0.602,0.659,"cytosolic ribosome"),
 587 | c("GO:0072669","tRNA-splicing ligase complex",0.006,2.0381,0.698,0.312,"cytosolic ribosome"),
 588 | c("GO:0044452","nucleolar part",0.153,10.1911,0.547,0.642,"cytosolic ribosome"),
 589 | c("GO:0044445","cytosolic part",0.496,33.2403,0.695,0.617,"cytosolic ribosome"),
 590 | c("GO:0035770","ribonucleoprotein granule",0.131,6.0706,0.576,0.431,"cytosolic ribosome"),
 591 | c("GO:1990904","ribonucleoprotein complex",5.291,59.3706,0.669,0.543,"cytosolic ribosome"),
 592 | c("GO:0070971","endoplasmic reticulum exit site",0.006,2.9508,0.614,0.447,"cytosolic ribosome"),
 593 | c("GO:0072686","mitotic spindle",0.078,1.3979,0.622,0.693,"cytosolic ribosome"),
 594 | c("GO:0044427","chromosomal part",1.117,4.0357,0.548,0.608,"cytosolic ribosome"),
 595 | c("GO:0071541","eukaryotic translation initiation factor 3 complex, eIF3m",0.011,5.7399,0.637,0.324,"cytosolic ribosome"),
 596 | c("GO:0002199","zona pellucida receptor complex",0.003,2.9508,0.757,0.297,"cytosolic ribosome"),
 597 | c("GO:0071013","catalytic step 2 spliceosome",0.062,14.1397,0.466,0.541,"cytosolic ribosome"),
 598 | c("GO:0030117","membrane coat",0.264,6.7905,0.535,0.405,"cytosolic ribosome"),
 599 | c("GO:0034709","methylosome",0.006,8.9208,0.601,0.447,"cytosolic ribosome"),
 600 | c("GO:0034715","pICln-Sm protein complex",0.006,4.1355,0.649,0.312,"cytosolic ribosome"),
 601 | c("GO:0000502","proteasome complex",0.389,19.1752,0.540,0.636,"cytosolic ribosome"),
 602 | c("GO:0034719","SMN-Sm protein complex",0.008,3.8327,0.643,0.318,"cytosolic ribosome"),
 603 | c("GO:0008287","protein serine/threonine phosphatase complex",0.073,6.3969,0.652,0.545,"cytosolic ribosome"),
 604 | c("GO:0030532","small nuclear ribonucleoprotein complex",0.138,9.1669,0.487,0.579,"cytosolic ribosome"),
 605 | c("GO:0016592","mediator complex",0.155,3.7447,0.453,0.694,"cytosolic ribosome"),
 606 | c("GO:0033116","endoplasmic reticulum-Golgi intermediate compartment membrane",0.008,3.8827,0.626,0.574,"cytosolic ribosome"),
 607 | c("GO:0032588","trans-Golgi network membrane",0.008,1.3188,0.578,0.573,"cytosolic ribosome"),
 608 | c("GO:0034045","pre-autophagosomal structure membrane",0.018,1.8356,0.712,0.646,"cytosolic ribosome"),
 609 | c("GO:0016607","nuclear speck",0.091,4.3206,0.559,0.614,"cytosolic ribosome"),
 610 | c("GO:0033176","proton-transporting V-type ATPase complex",0.146,2.5768,0.675,0.579,"cytosolic ribosome"),
 611 | c("GO:0015630","microtubule cytoskeleton",0.900,10.3298,0.640,0.592,"cytosolic ribosome"),
 612 | c("GO:0030687","preribosome, large subunit precursor",0.067,4.2708,0.655,0.410,"cytosolic ribosome"),
 613 | c("GO:0033202","DNA helicase complex",0.093,2.0004,0.628,0.556,"cytosolic ribosome"),
 614 | c("GO:0030684","preribosome",0.223,6.8386,0.633,0.600,"cytosolic ribosome"),
 615 | c("GO:0070603","SWI/SNF superfamily-type complex",0.112,3.6882,0.484,0.569,"cytosolic ribosome"),
 616 | c("GO:0031201","SNARE complex",0.038,5.0768,0.597,0.523,"cytosolic ribosome"),
 617 | c("GO:0005885","Arp2/3 protein complex",0.063,5.0975,0.530,0.680,"cytosolic ribosome"),
 618 | c("GO:0000151","ubiquitin ligase complex",0.232,8.5086,0.575,0.604,"cytosolic ribosome"),
 619 | c("GO:0098796","membrane protein complex",2.473,5.8013,0.626,0.533,"cytosolic ribosome"),
 620 | c("GO:0030008","TRAPP complex",0.042,1.4461,0.664,0.354,"cytosolic ribosome"),
 621 | c("GO:0097255","R2TP complex",0.003,2.0969,0.756,0.299,"cytosolic ribosome"),
 622 | c("GO:1990234","transferase complex",1.223,26.7258,0.626,0.439,"cytosolic ribosome"),
 623 | c("GO:0042470","melanosome",0.016,4.2907,0.613,0.673,"cytosolic ribosome"),
 624 | c("GO:0035145","exon-exon junction complex",0.010,1.9830,0.546,0.473,"cytosolic ribosome"),
 625 | c("GO:1902493","acetyltransferase complex",0.152,3.8827,0.643,0.693,"cytosolic ribosome"),
 626 | c("GO:1902494","catalytic complex",3.734,27.8633,0.678,0.353,"cytosolic ribosome"),
 627 | c("GO:0005952","cAMP-dependent protein kinase complex",0.014,2.0381,0.631,0.476,"cytosolic ribosome"),
 628 | c("GO:0030054","cell junction",0.445,3.7235,0.993,0.000,"cell junction"),
 629 | c("GO:0045202","synapse",0.299,2.0969,0.993,0.000,"synapse"),
 630 | c("GO:0048475","coated membrane",0.264,6.7905,0.984,0.000,"coated membrane"),
 631 | c("GO:0030496","midbody",0.040,3.4001,0.921,0.044,"midbody"),
 632 | c("GO:0043209","myelin sheath",0.049,10.1701,0.920,0.044,"myelin sheath"),
 633 | c("GO:0031252","cell leading edge",0.086,3.7190,0.917,0.046,"cell leading edge"),
 634 | c("GO:0044297","cell body",0.087,3.7986,0.917,0.046,"cell body"),
 635 | c("GO:0030427","site of polarized growth",0.091,3.0137,0.917,0.046,"site of polarized growth"),
 636 | c("GO:0043005","neuron projection",0.190,10.4413,0.825,0.049,"neuron projection"),
 637 | c("GO:0032838","cell projection cytoplasm",0.014,2.9788,0.722,0.595,"neuron projection"),
 638 | c("GO:0001726","ruffle",0.035,2.2692,0.875,0.634,"neuron projection"),
 639 | c("GO:0097458","neuron part",0.320,9.3915,0.910,0.051,"neuron part"),
 640 | c("GO:0016234","inclusion body",0.019,2.0947,0.856,0.090,"inclusion body"));
 641 | 
 642 | 
 643 | revigo.data.list[["BP_cell_ie"]] <- rbind(c("GO:0002478","antigen processing and presentation of exogenous peptide antigen",0.004,12.4486,0.931,0.000,"antigen processing and presentation of exogenous peptide antigen"),
 644 | c("GO:0006289","nucleotide-excision repair",0.310,6.0031,0.796,0.565,"antigen processing and presentation of exogenous peptide antigen"),
 645 | c("GO:0042059","negative regulation of epidermal growth factor receptor signaling pathway",0.007,5.9914,0.783,0.559,"antigen processing and presentation of exogenous peptide antigen"),
 646 | c("GO:0019882","antigen processing and presentation",0.027,8.5575,0.944,0.561,"antigen processing and presentation of exogenous peptide antigen"),
 647 | c("GO:0032107","regulation of response to nutrient levels",0.011,7.7645,0.832,0.576,"antigen processing and presentation of exogenous peptide antigen"),
 648 | c("GO:0038093","Fc receptor signaling pathway",0.005,10.1911,0.771,0.505,"antigen processing and presentation of exogenous peptide antigen"),
 649 | c("GO:0080135","regulation of cellular response to stress",0.182,9.7670,0.761,0.548,"antigen processing and presentation of exogenous peptide antigen"),
 650 | c("GO:0015031","protein transport",2.251,39.7545,0.832,0.000,"protein transport"),
 651 | c("GO:0070199","establishment of protein localization to chromosome",0.006,3.4597,0.843,0.537,"protein transport"),
 652 | c("GO:0071166","ribonucleoprotein complex localization",0.097,4.6615,0.841,0.575,"protein transport"),
 653 | c("GO:0016192","vesicle-mediated transport",1.085,12.6402,0.907,0.344,"protein transport"),
 654 | c("GO:0016197","endosomal transport",0.131,13.7399,0.920,0.274,"protein transport"),
 655 | c("GO:1903827","regulation of cellular protein localization",0.132,7.1518,0.700,0.680,"protein transport"),
 656 | c("GO:0050657","nucleic acid transport",0.100,3.1296,0.905,0.533,"protein transport"),
 657 | c("GO:1904874","positive regulation of telomerase RNA localization to Cajal body",0.004,5.0680,0.738,0.484,"protein transport"),
 658 | c("GO:0007034","vacuolar transport",0.133,13.5421,0.920,0.275,"protein transport"),
 659 | c("GO:0007041","lysosomal transport",0.017,3.1972,0.926,0.229,"protein transport"),
 660 | c("GO:0006403","RNA localization",0.118,5.9957,0.871,0.643,"protein transport"),
 661 | c("GO:1990173","protein localization to nucleoplasm",0.002,3.2076,0.850,0.482,"protein transport"),
 662 | c("GO:0090672","telomerase RNA localization",0.004,4.6440,0.883,0.664,"protein transport"),
 663 | c("GO:0090670","RNA localization to Cajal body",0.004,4.6440,0.883,0.662,"protein transport"),
 664 | c("GO:0051656","establishment of organelle localization",0.180,7.7011,0.822,0.281,"protein transport"),
 665 | c("GO:0051668","localization within membrane",0.023,4.1938,0.854,0.519,"protein transport"),
 666 | c("GO:0006913","nucleocytoplasmic transport",0.237,7.6108,0.821,0.617,"protein transport"),
 667 | c("GO:0051640","organelle localization",0.223,7.0665,0.831,0.627,"protein transport"),
 668 | c("GO:0016482","cytosolic transport",0.076,6.5800,0.838,0.619,"protein transport"),
 669 | c("GO:0051169","nuclear transport",0.239,8.0670,0.825,0.677,"protein transport"),
 670 | c("GO:0016071","mRNA metabolic process",0.798,50.2774,0.847,0.000,"mRNA metabolism"),
 671 | c("GO:0045862","positive regulation of proteolysis",0.078,7.6108,0.668,0.609,"mRNA metabolism"),
 672 | c("GO:0061418","regulation of transcription from RNA polymerase II promoter in response to hypoxia",0.003,4.2815,0.753,0.476,"mRNA metabolism"),
 673 | c("GO:0031329","regulation of cellular catabolic process",0.093,18.5607,0.705,0.583,"mRNA metabolism"),
 674 | c("GO:0018205","peptidyl-lysine modification",0.355,3.1965,0.826,0.452,"mRNA metabolism"),
 675 | c("GO:0006369","termination of RNA polymerase II transcription",0.015,3.9957,0.867,0.526,"mRNA metabolism"),
 676 | c("GO:0006367","transcription initiation from RNA polymerase II promoter",0.109,4.0414,0.850,0.601,"mRNA metabolism"),
 677 | c("GO:0006368","transcription elongation from RNA polymerase II promoter",0.082,9.5638,0.852,0.300,"mRNA metabolism"),
 678 | c("GO:0016072","rRNA metabolic process",0.965,38.4179,0.832,0.382,"mRNA metabolism"),
 679 | c("GO:0016073","snRNA metabolic process",0.035,6.2204,0.868,0.518,"mRNA metabolism"),
 680 | c("GO:0070647","protein modification by small protein conjugation or removal",0.821,20.6440,0.818,0.331,"mRNA metabolism"),
 681 | c("GO:0006354","DNA-templated transcription, elongation",0.202,10.1203,0.850,0.280,"mRNA metabolism"),
 682 | c("GO:0006353","DNA-templated transcription, termination",0.185,6.4685,0.851,0.320,"mRNA metabolism"),
 683 | c("GO:0006352","DNA-templated transcription, initiation",0.766,5.3478,0.834,0.361,"mRNA metabolism"),
 684 | c("GO:0044265","cellular macromolecule catabolic process",1.268,44.0757,0.756,0.153,"mRNA metabolism"),
 685 | c("GO:0000338","protein deneddylation",0.017,5.2874,0.819,0.683,"mRNA metabolism"),
 686 | c("GO:0010608","posttranscriptional regulation of gene expression",0.719,23.2480,0.762,0.256,"mRNA metabolism"),
 687 | c("GO:1901566","organonitrogen compound biosynthetic process",14.064,13.0575,0.903,0.391,"mRNA metabolism"),
 688 | c("GO:0043603","cellular amide metabolic process",6.879,13.9830,0.890,0.189,"mRNA metabolism"),
 689 | c("GO:1903311","regulation of mRNA metabolic process",0.044,3.6576,0.745,0.299,"mRNA metabolism"),
 690 | c("GO:0000470","maturation of LSU-rRNA",0.047,4.5591,0.719,0.530,"mRNA metabolism"),
 691 | c("GO:0043543","protein acylation",0.202,4.1090,0.836,0.429,"mRNA metabolism"),
 692 | c("GO:0006259","DNA metabolic process",5.607,3.3526,0.833,0.383,"mRNA metabolism"),
 693 | c("GO:0050684","regulation of mRNA processing",0.035,3.2211,0.732,0.611,"mRNA metabolism"),
 694 | c("GO:0031400","negative regulation of protein modification process",0.151,12.3625,0.679,0.418,"mRNA metabolism"),
 695 | c("GO:0051439","regulation of ubiquitin-protein ligase activity involved in mitotic cell cycle",0.001,16.8996,0.683,0.611,"mRNA metabolism"),
 696 | c("GO:0006521","regulation of cellular amino acid metabolic process",0.018,12.3862,0.796,0.276,"mRNA metabolism"),
 697 | c("GO:0002183","cytoplasmic translational initiation",0.025,3.3420,0.836,0.399,"mRNA metabolism"),
 698 | c("GO:0043488","regulation of mRNA stability",0.018,12.7011,0.801,0.293,"mRNA metabolism"),
 699 | c("GO:0002181","cytoplasmic translation",0.064,10.4750,0.829,0.429,"mRNA metabolism"),
 700 | c("GO:0043487","regulation of RNA stability",0.021,12.1029,0.799,0.573,"mRNA metabolism"),
 701 | c("GO:0036260","RNA capping",0.112,4.0137,0.839,0.542,"mRNA metabolism"),
 702 | c("GO:0016567","protein ubiquitination",0.523,18.9626,0.783,0.470,"mRNA metabolism"),
 703 | c("GO:0006412","translation",5.686,25.1959,0.754,0.661,"mRNA metabolism"),
 704 | c("GO:0009894","regulation of catabolic process",0.146,18.8894,0.760,0.530,"mRNA metabolism"),
 705 | c("GO:0006413","translational initiation",0.518,49.8069,0.801,0.140,"mRNA metabolism"),
 706 | c("GO:0006396","RNA processing",3.210,47.1864,0.819,0.373,"mRNA metabolism"),
 707 | c("GO:0008380","RNA splicing",0.413,28.1379,0.823,0.299,"mRNA metabolism"),
 708 | c("GO:0031647","regulation of protein stability",0.070,3.2076,0.868,0.411,"mRNA metabolism"),
 709 | c("GO:1901361","organic cyclic compound catabolic process",1.164,15.6003,0.818,0.665,"mRNA metabolism"),
 710 | c("GO:0034248","regulation of cellular amide metabolic process",0.700,11.6925,0.752,0.488,"mRNA metabolism"),
 711 | c("GO:0033238","regulation of cellular amine metabolic process",0.021,10.7670,0.809,0.631,"mRNA metabolism"),
 712 | c("GO:0031123","RNA 3'-end processing",0.145,4.0788,0.836,0.554,"mRNA metabolism"),
 713 | c("GO:0098781","ncRNA transcription",0.045,7.4535,0.856,0.528,"mRNA metabolism"),
 714 | c("GO:0009452","7-methylguanosine RNA capping",0.112,4.0137,0.837,0.542,"mRNA metabolism"),
 715 | c("GO:0042795","snRNA transcription from RNA polymerase II promoter",0.003,5.1637,0.867,0.481,"mRNA metabolism"),
 716 | c("GO:0034660","ncRNA metabolic process",3.407,35.8125,0.826,0.453,"mRNA metabolism"),
 717 | c("GO:0022613","ribonucleoprotein complex biogenesis",1.614,52.3316,0.807,0.000,"ribonucleoprotein complex biogenesis"),
 718 | c("GO:0071824","protein-DNA complex subunit organization",0.238,3.8508,0.799,0.629,"ribonucleoprotein complex biogenesis"),
 719 | c("GO:0071826","ribonucleoprotein complex subunit organization",0.377,24.2984,0.792,0.463,"ribonucleoprotein complex biogenesis"),
 720 | c("GO:0016050","vesicle organization",0.130,9.5100,0.812,0.474,"ribonucleoprotein complex biogenesis"),
 721 | c("GO:0043248","proteasome assembly",0.049,4.3768,0.796,0.664,"ribonucleoprotein complex biogenesis"),
 722 | c("GO:0061024","membrane organization",0.759,24.4225,0.806,0.499,"ribonucleoprotein complex biogenesis"),
 723 | c("GO:0051276","chromosome organization",1.477,6.5686,0.775,0.684,"ribonucleoprotein complex biogenesis"),
 724 | c("GO:0006325","chromatin organization",0.668,3.5986,0.783,0.690,"ribonucleoprotein complex biogenesis"),
 725 | c("GO:0006900","membrane budding",0.030,16.1421,0.737,0.367,"ribonucleoprotein complex biogenesis"),
 726 | c("GO:0070271","protein complex biogenesis",0.963,7.3179,0.815,0.684,"ribonucleoprotein complex biogenesis"),
 727 | c("GO:0042147","retrograde transport, endosome to Golgi",0.055,4.6253,0.823,0.671,"ribonucleoprotein complex biogenesis"),
 728 | c("GO:0031468","nuclear envelope reassembly",0.003,3.3420,0.810,0.689,"ribonucleoprotein complex biogenesis"),
 729 | c("GO:1904896","ESCRT complex disassembly",0.000,3.9101,0.852,0.401,"ribonucleoprotein complex biogenesis"),
 730 | c("GO:0044087","regulation of cellular component biogenesis",0.404,6.9208,0.717,0.622,"ribonucleoprotein complex biogenesis"),
 731 | c("GO:1904903","ESCRT III complex disassembly",0.000,3.9101,0.852,0.247,"ribonucleoprotein complex biogenesis"),
 732 | c("GO:1901673","regulation of mitotic spindle assembly",0.005,3.3449,0.685,0.684,"ribonucleoprotein complex biogenesis"),
 733 | c("GO:0000028","ribosomal small subunit assembly",0.020,5.2125,0.791,0.518,"ribonucleoprotein complex biogenesis"),
 734 | c("GO:0070925","organelle assembly",0.571,8.8928,0.766,0.645,"ribonucleoprotein complex biogenesis"),
 735 | c("GO:0016570","histone modification",0.373,3.5017,0.679,0.654,"ribonucleoprotein complex biogenesis"),
 736 | c("GO:0007030","Golgi organization",0.039,3.6576,0.819,0.435,"ribonucleoprotein complex biogenesis"),
 737 | c("GO:0007033","vacuole organization",0.102,11.5702,0.815,0.428,"ribonucleoprotein complex biogenesis"),
 738 | c("GO:1990182","exosomal secretion",0.004,4.8697,0.766,0.493,"ribonucleoprotein complex biogenesis"),
 739 | c("GO:0010256","endomembrane system organization",0.189,7.3019,0.825,0.432,"ribonucleoprotein complex biogenesis"),
 740 | c("GO:0048193","Golgi vesicle transport",0.297,14.3635,0.883,0.643,"ribonucleoprotein complex biogenesis"),
 741 | c("GO:0043254","regulation of protein complex assembly",0.198,8.4001,0.668,0.619,"ribonucleoprotein complex biogenesis"),
 742 | c("GO:0051351","positive regulation of ligase activity",0.002,15.7033,0.863,0.000,"positive regulation of ligase activity"),
 743 | c("GO:0032781","positive regulation of ATPase activity",0.033,3.6556,0.844,0.543,"positive regulation of ligase activity"),
 744 | c("GO:0051340","regulation of ligase activity",0.002,14.3298,0.867,0.375,"positive regulation of ligase activity"),
 745 | c("GO:0051338","regulation of transferase activity",0.368,9.9586,0.821,0.646,"positive regulation of ligase activity"),
 746 | c("GO:0051348","negative regulation of transferase activity",0.088,13.5003,0.822,0.456,"positive regulation of ligase activity"),
 747 | c("GO:0044033","multi-organism metabolic process",0.025,35.6655,0.928,0.011,"multi-organism metabolism"),
 748 | c("GO:1902590","multi-organism organelle organization",0.006,5.4868,0.795,0.608,"multi-organism metabolism"),
 749 | c("GO:0044764","multi-organism cellular process",0.325,26.2132,0.892,0.692,"multi-organism metabolism"),
 750 | c("GO:0044803","multi-organism membrane organization",0.008,4.4989,0.790,0.621,"multi-organism metabolism"),
 751 | c("GO:0019083","viral transcription",0.012,33.5114,0.809,0.562,"multi-organism metabolism"),
 752 | c("GO:0042026","protein refolding",0.069,7.2269,0.959,0.027,"protein refolding"),
 753 | c("GO:0061077","chaperone-mediated protein folding",0.043,4.8697,0.960,0.621,"protein refolding"),
 754 | c("GO:0016236","macroautophagy",0.084,10.4895,0.944,0.027,"macroautophagy"),
 755 | c("GO:0006914","autophagy",0.230,13.1314,0.959,0.030,"autophagy"),
 756 | c("GO:0000278","mitotic cell cycle",0.561,17.3307,0.873,0.033,"mitotic cell cycle"),
 757 | c("GO:0034976","response to endoplasmic reticulum stress",0.100,9.0353,0.885,0.263,"mitotic cell cycle"),
 758 | c("GO:0035966","response to topologically incorrect protein",0.053,8.2248,0.914,0.407,"mitotic cell cycle"),
 759 | c("GO:0051301","cell division",1.230,4.1018,0.912,0.223,"mitotic cell cycle"),
 760 | c("GO:0060071","Wnt signaling pathway, planar cell polarity pathway",0.010,10.7620,0.771,0.130,"mitotic cell cycle"),
 761 | c("GO:0000920","cell separation after cytokinesis",0.013,4.2741,0.935,0.132,"mitotic cell cycle"),
 762 | c("GO:0007164","establishment of tissue polarity",0.019,9.2581,0.965,0.462,"mitotic cell cycle"),
 763 | c("GO:0070911","global genome nucleotide-excision repair",0.003,3.0980,0.844,0.624,"mitotic cell cycle"),
 764 | c("GO:0007049","cell cycle",1.885,15.0031,0.909,0.204,"mitotic cell cycle"),
 765 | c("GO:0007017","microtubule-based process",0.658,5.7852,0.916,0.208,"mitotic cell cycle"),
 766 | c("GO:0006986","response to unfolded protein",0.037,8.3116,0.908,0.605,"mitotic cell cycle"),
 767 | c("GO:0038061","NIK/NF-kappaB signaling",0.018,8.6556,0.821,0.284,"mitotic cell cycle"),
 768 | c("GO:0033209","tumor necrosis factor-mediated signaling pathway",0.016,5.6364,0.784,0.573,"mitotic cell cycle"),
 769 | c("GO:0000715","nucleotide-excision repair, DNA damage recognition",0.033,8.3458,0.701,0.457,"mitotic cell cycle"),
 770 | c("GO:0071495","cellular response to endogenous stimulus",0.402,4.4248,0.932,0.297,"mitotic cell cycle"),
 771 | c("GO:0006457","protein folding",0.903,14.9031,0.955,0.034,"protein folding"),
 772 | c("GO:0016311","dephosphorylation",1.250,3.7986,0.931,0.065,"dephosphorylation"));
 773 | 
 774 | 
 775 | #### Cell > 4*Nuc (exons only) ####
 776 | revigo.data.list[["CC_cell_exons"]] <- rbind(c("GO:0005730","nucleolus",0.664,36.1586,0.516,0.000,"nucleolus"),
 777 | c("GO:0005694","chromosome",1.505,5.5258,0.643,0.633,"nucleolus"),
 778 | c("GO:0005697","telomerase holoenzyme complex",0.025,1.7258,0.543,0.505,"nucleolus"),
 779 | c("GO:0090575","RNA polymerase II transcription factor complex",0.155,4.2510,0.482,0.585,"nucleolus"),
 780 | c("GO:0008023","transcription elongation factor complex",0.080,1.5171,0.487,0.662,"nucleolus"),
 781 | c("GO:0033290","eukaryotic 48S preinitiation complex",0.089,7.8297,0.601,0.419,"nucleolus"),
 782 | c("GO:0089701","U2AF",0.017,1.3063,0.545,0.491,"nucleolus"),
 783 | c("GO:0000346","transcription export complex",0.014,1.6819,0.550,0.483,"nucleolus"),
 784 | c("GO:0022626","cytosolic ribosome",0.187,30.0438,0.530,0.413,"nucleolus"),
 785 | c("GO:0005635","nuclear envelope",0.283,3.5072,0.504,0.617,"nucleolus"),
 786 | c("GO:0005681","spliceosomal complex",0.250,21.0164,0.479,0.610,"nucleolus"),
 787 | c("GO:0071204","histone pre-mRNA 3'end processing complex",0.001,2.2097,0.604,0.416,"nucleolus"),
 788 | c("GO:0000803","sex chromosome",0.007,1.8182,0.727,0.408,"nucleolus"),
 789 | c("GO:0005662","DNA replication factor A complex",0.009,2.3063,0.527,0.515,"nucleolus"),
 790 | c("GO:0097346","INO80-type complex",0.056,2.4935,0.511,0.538,"nucleolus"),
 791 | c("GO:0036019","endolysosome",0.001,1.5243,0.615,0.544,"nucleolus"),
 792 | c("GO:0036020","endolysosome membrane",0.000,1.9066,0.580,0.523,"nucleolus"),
 793 | c("GO:0000805","X chromosome",0.002,2.3224,0.742,0.295,"nucleolus"),
 794 | c("GO:0031428","box C/D snoRNP complex",0.012,1.3063,0.543,0.643,"nucleolus"),
 795 | c("GO:0005840","ribosome",4.198,15.4001,0.433,0.589,"nucleolus"),
 796 | c("GO:0000407","pre-autophagosomal structure",0.033,2.9431,0.751,0.247,"nucleolus"),
 797 | c("GO:0036464","cytoplasmic ribonucleoprotein granule",0.127,6.7878,0.548,0.430,"nucleolus"),
 798 | c("GO:0097422","tubular endosome",0.002,1.6421,0.582,0.574,"nucleolus"),
 799 | c("GO:0008180","COP9 signalosome",0.031,2.0101,0.530,0.513,"nucleolus"),
 800 | c("GO:0099568","cytoplasmic region",0.265,5.1599,0.716,0.288,"nucleolus"),
 801 | c("GO:0015934","large ribosomal subunit",0.559,12.4023,0.475,0.666,"nucleolus"),
 802 | c("GO:0005768","endosome",0.319,14.3391,0.508,0.617,"nucleolus"),
 803 | c("GO:0031965","nuclear membrane",0.100,1.3497,0.516,0.676,"nucleolus"),
 804 | c("GO:0005776","autophagosome",0.024,2.4283,0.668,0.256,"nucleolus"),
 805 | c("GO:0005773","vacuole",0.455,11.7620,0.629,0.329,"nucleolus"),
 806 | c("GO:0070822","Sin3-type complex",0.010,1.5243,0.506,0.581,"nucleolus"),
 807 | c("GO:0031984","organelle subcompartment",0.269,5.7375,0.620,0.402,"nucleolus"),
 808 | c("GO:0005801","cis-Golgi network",0.038,2.7055,0.721,0.266,"nucleolus"),
 809 | c("GO:0048500","signal recognition particle",0.100,2.2097,0.602,0.422,"nucleolus"),
 810 | c("GO:0005785","signal recognition particle receptor complex",0.011,2.1013,0.511,0.598,"nucleolus"),
 811 | c("GO:0048471","perinuclear region of cytoplasm",0.135,8.9281,0.728,0.114,"nucleolus"),
 812 | c("GO:0005793","endoplasmic reticulum-Golgi intermediate compartment",0.026,6.7235,0.690,0.258,"nucleolus"),
 813 | c("GO:0005794","Golgi apparatus",0.969,13.5243,0.532,0.684,"nucleolus"),
 814 | c("GO:0048770","pigment granule",0.016,3.4841,0.615,0.674,"nucleolus"),
 815 | c("GO:0070993","translation preinitiation complex",0.093,7.0985,0.603,0.420,"nucleolus"),
 816 | c("GO:0097525","spliceosomal snRNP complex",0.128,8.2565,0.486,0.575,"nucleolus"),
 817 | c("GO:0044452","nucleolar part",0.153,7.8827,0.556,0.642,"nucleolus"),
 818 | c("GO:0044445","cytosolic part",0.496,24.4283,0.695,0.617,"nucleolus"),
 819 | c("GO:0031519","PcG protein complex",0.017,1.9914,0.545,0.491,"nucleolus"),
 820 | c("GO:0035770","ribonucleoprotein granule",0.131,6.3215,0.583,0.431,"nucleolus"),
 821 | c("GO:0044427","chromosomal part",1.117,4.8928,0.548,0.608,"nucleolus"),
 822 | c("GO:0071013","catalytic step 2 spliceosome",0.062,17.1129,0.477,0.541,"nucleolus"),
 823 | c("GO:0030532","small nuclear ribonucleoprotein complex",0.138,8.5918,0.497,0.579,"nucleolus"),
 824 | c("GO:0016592","mediator complex",0.155,5.2899,0.468,0.694,"nucleolus"),
 825 | c("GO:0033116","endoplasmic reticulum-Golgi intermediate compartment membrane",0.008,5.1568,0.624,0.587,"nucleolus"),
 826 | c("GO:0032588","trans-Golgi network membrane",0.008,2.4283,0.577,0.679,"nucleolus"),
 827 | c("GO:0034045","pre-autophagosomal structure membrane",0.018,1.6946,0.708,0.237,"nucleolus"),
 828 | c("GO:0016607","nuclear speck",0.091,7.3788,0.571,0.614,"nucleolus"),
 829 | c("GO:0015630","microtubule cytoskeleton",0.900,11.7620,0.631,0.592,"nucleolus"),
 830 | c("GO:0000139","Golgi membrane",0.403,17.6576,0.475,0.418,"nucleolus"),
 831 | c("GO:0030684","preribosome",0.223,6.7235,0.644,0.600,"nucleolus"),
 832 | c("GO:0070603","SWI/SNF superfamily-type complex",0.112,3.2000,0.495,0.569,"nucleolus"),
 833 | c("GO:0005881","cytoplasmic microtubule",0.023,2.0799,0.604,0.628,"nucleolus"),
 834 | c("GO:0005885","Arp2/3 protein complex",0.063,4.0640,0.529,0.680,"nucleolus"),
 835 | c("GO:0042470","melanosome",0.016,3.4841,0.615,0.673,"nucleolus"),
 836 | c("GO:0035145","exon-exon junction complex",0.010,1.6326,0.557,0.473,"nucleolus"),
 837 | c("GO:0005925","focal adhesion",0.109,9.5031,0.972,0.000,"focal adhesion"),
 838 | c("GO:0030054","cell junction",0.445,3.5670,0.994,0.000,"cell junction"),
 839 | c("GO:0044456","synapse part",0.230,8.4306,0.949,0.000,"synapse part"),
 840 | c("GO:0045202","synapse",0.299,7.4522,0.994,0.000,"synapse"),
 841 | c("GO:0048475","coated membrane",0.264,9.5031,0.984,0.000,"coated membrane"),
 842 | c("GO:1902494","catalytic complex",3.734,36.0292,0.689,0.000,"catalytic complex"),
 843 | c("GO:0070765","gamma-secretase complex",0.003,1.4841,0.718,0.438,"catalytic complex"),
 844 | c("GO:0016272","prefoldin complex",0.056,2.7747,0.721,0.383,"catalytic complex"),
 845 | c("GO:0000307","cyclin-dependent protein kinase holoenzyme complex",0.027,2.2757,0.595,0.610,"catalytic complex"),
 846 | c("GO:0000306","extrinsic component of vacuolar membrane",0.005,2.3224,0.601,0.677,"catalytic complex"),
 847 | c("GO:0016281","eukaryotic translation initiation factor 4F complex",0.002,1.9872,0.668,0.243,"catalytic complex"),
 848 | c("GO:0005732","small nucleolar ribonucleoprotein complex",0.072,4.4283,0.668,0.321,"catalytic complex"),
 849 | c("GO:0032299","ribonuclease H2 complex",0.009,1.4841,0.697,0.279,"catalytic complex"),
 850 | c("GO:0036513","Derlin-1 retrotranslocation complex",0.005,3.0022,0.548,0.457,"catalytic complex"),
 851 | c("GO:0031248","protein acetyltransferase complex",0.152,3.2299,0.591,0.693,"catalytic complex"),
 852 | c("GO:1990124","messenger ribonucleoprotein complex",0.002,1.6968,0.725,0.243,"catalytic complex"),
 853 | c("GO:0090498","extrinsic component of Golgi membrane",0.001,1.4841,0.607,0.693,"catalytic complex"),
 854 | c("GO:0005834","heterotrimeric G-protein complex",0.046,2.3778,0.585,0.530,"catalytic complex"),
 855 | c("GO:0005844","polysome",0.037,2.3325,0.681,0.368,"catalytic complex"),
 856 | c("GO:0000974","Prp19 complex",0.017,4.7305,0.740,0.289,"catalytic complex"),
 857 | c("GO:0032937","SREBP-SCAP-Insig complex",0.001,1.4841,0.583,0.411,"catalytic complex"),
 858 | c("GO:0030904","retromer complex",0.032,4.1180,0.596,0.516,"catalytic complex"),
 859 | c("GO:1903293","phosphatase complex",0.073,9.3188,0.700,0.545,"catalytic complex"),
 860 | c("GO:0030906","retromer, cargo-selective complex",0.002,2.1013,0.645,0.434,"catalytic complex"),
 861 | c("GO:0005851","eukaryotic translation initiation factor 2B complex",0.009,1.3063,0.646,0.279,"catalytic complex"),
 862 | c("GO:0005852","eukaryotic translation initiation factor 3 complex",0.117,7.0985,0.590,0.405,"catalytic complex"),
 863 | c("GO:0005853","eukaryotic translation elongation factor 1 complex",0.012,2.1013,0.640,0.290,"catalytic complex"),
 864 | c("GO:0031461","cullin-RING ubiquitin ligase complex",0.159,9.0022,0.587,0.695,"catalytic complex"),
 865 | c("GO:0030877","beta-catenin destruction complex",0.004,1.5003,0.658,0.260,"catalytic complex"),
 866 | c("GO:0005786","signal recognition particle, endoplasmic reticulum targeting",0.034,2.2097,0.626,0.366,"catalytic complex"),
 867 | c("GO:0034708","methyltransferase complex",0.080,7.8297,0.612,0.659,"catalytic complex"),
 868 | c("GO:0072669","tRNA-splicing ligase complex",0.006,2.3224,0.703,0.273,"catalytic complex"),
 869 | c("GO:1990904","ribonucleoprotein complex",5.291,56.4260,0.680,0.543,"catalytic complex"),
 870 | c("GO:0070971","endoplasmic reticulum exit site",0.006,2.1746,0.614,0.486,"catalytic complex"),
 871 | c("GO:0031501","mannosyltransferase complex",0.014,1.5100,0.648,0.478,"catalytic complex"),
 872 | c("GO:0071541","eukaryotic translation initiation factor 3 complex, eIF3m",0.011,4.7423,0.641,0.273,"catalytic complex"),
 873 | c("GO:0002199","zona pellucida receptor complex",0.003,2.1746,0.764,0.244,"catalytic complex"),
 874 | c("GO:0030117","membrane coat",0.264,9.5031,0.534,0.366,"catalytic complex"),
 875 | c("GO:0034709","methylosome",0.006,7.4101,0.607,0.447,"catalytic complex"),
 876 | c("GO:1902911","protein kinase complex",0.089,1.7011,0.660,0.671,"catalytic complex"),
 877 | c("GO:0034715","pICln-Sm protein complex",0.006,3.3893,0.653,0.260,"catalytic complex"),
 878 | c("GO:0000502","proteasome complex",0.389,16.8894,0.551,0.636,"catalytic complex"),
 879 | c("GO:0034719","SMN-Sm protein complex",0.008,2.7399,0.647,0.278,"catalytic complex"),
 880 | c("GO:0008287","protein serine/threonine phosphatase complex",0.073,9.3188,0.665,0.545,"catalytic complex"),
 881 | c("GO:0034098","VCP-NPL4-UFD1 AAA ATPase complex",0.005,2.8665,0.548,0.491,"catalytic complex"),
 882 | c("GO:0033176","proton-transporting V-type ATPase complex",0.146,1.6326,0.681,0.579,"catalytic complex"),
 883 | c("GO:0033185","dolichol-phosphate-mannose synthase complex",0.002,1.4841,0.685,0.236,"catalytic complex"),
 884 | c("GO:0030687","preribosome, large subunit precursor",0.067,4.3233,0.663,0.383,"catalytic complex"),
 885 | c("GO:0030677","ribonuclease P complex",0.021,1.5100,0.631,0.492,"catalytic complex"),
 886 | c("GO:0033202","DNA helicase complex",0.093,1.8962,0.637,0.556,"catalytic complex"),
 887 | c("GO:0031201","SNARE complex",0.038,5.0635,0.600,0.523,"catalytic complex"),
 888 | c("GO:0000151","ubiquitin ligase complex",0.232,13.8665,0.586,0.604,"catalytic complex"),
 889 | c("GO:0098796","membrane protein complex",2.473,6.7932,0.635,0.533,"catalytic complex"),
 890 | c("GO:0097255","R2TP complex",0.003,2.7100,0.763,0.247,"catalytic complex"),
 891 | c("GO:1990234","transferase complex",1.223,33.5058,0.638,0.439,"catalytic complex"),
 892 | c("GO:0042788","polysomal ribosome",0.010,1.5100,0.611,0.339,"catalytic complex"),
 893 | c("GO:1902493","acetyltransferase complex",0.152,3.2299,0.655,0.693,"catalytic complex"),
 894 | c("GO:0019898","extrinsic component of membrane",0.249,2.7878,0.982,0.031,"extrinsic component of membrane"),
 895 | c("GO:0030496","midbody",0.040,4.4437,0.922,0.048,"midbody"),
 896 | c("GO:0043209","myelin sheath",0.049,8.7932,0.921,0.049,"myelin sheath"),
 897 | c("GO:0031252","cell leading edge",0.086,4.4828,0.918,0.051,"cell leading edge"),
 898 | c("GO:0044297","cell body",0.087,7.0964,0.918,0.051,"cell body"),
 899 | c("GO:0030427","site of polarized growth",0.091,7.6635,0.918,0.051,"site of polarized growth"),
 900 | c("GO:0043005","neuron projection",0.190,21.0575,0.820,0.055,"neuron projection"),
 901 | c("GO:0032838","cell projection cytoplasm",0.014,4.6696,0.705,0.595,"neuron projection"),
 902 | c("GO:0035253","ciliary rootlet",0.003,1.6968,0.640,0.543,"neuron projection"),
 903 | c("GO:0030027","lamellipodium",0.040,2.6144,0.867,0.640,"neuron projection"),
 904 | c("GO:0097458","neuron part",0.320,18.7959,0.911,0.057,"neuron part"),
 905 | c("GO:0016234","inclusion body",0.019,1.8182,0.857,0.098,"inclusion body"));
 906 | 
 907 |   
 908 | revigo.data.list[["BP_cell_exons"]] <- rbind(c("GO:0015031","protein transport",2.251,45.5258,0.845,0.000,"protein transport"),
 909 | c("GO:0071166","ribonucleoprotein complex localization",0.097,5.1805,0.841,0.587,"protein transport"),
 910 | c("GO:0006900","membrane budding",0.030,17.7100,0.738,0.643,"protein transport"),
 911 | c("GO:1990778","protein localization to cell periphery",0.063,4.3665,0.834,0.635,"protein transport"),
 912 | c("GO:0042147","retrograde transport, endosome to Golgi",0.055,4.3575,0.820,0.671,"protein transport"),
 913 | c("GO:0016192","vesicle-mediated transport",1.085,17.7799,0.909,0.344,"protein transport"),
 914 | c("GO:0016197","endosomal transport",0.131,13.7825,0.922,0.274,"protein transport"),
 915 | c("GO:1903827","regulation of cellular protein localization",0.132,7.6326,0.700,0.680,"protein transport"),
 916 | c("GO:0050657","nucleic acid transport",0.100,3.4776,0.906,0.533,"protein transport"),
 917 | c("GO:1904874","positive regulation of telomerase RNA localization to Cajal body",0.004,3.6990,0.741,0.484,"protein transport"),
 918 | c("GO:0048278","vesicle docking",0.080,3.4647,0.835,0.691,"protein transport"),
 919 | c("GO:0007034","vacuolar transport",0.133,14.1002,0.922,0.275,"protein transport"),
 920 | c("GO:0007041","lysosomal transport",0.017,3.8013,0.928,0.229,"protein transport"),
 921 | c("GO:0030705","cytoskeleton-dependent intracellular transport",0.056,5.1561,0.841,0.605,"protein transport"),
 922 | c("GO:0006403","RNA localization",0.118,6.8297,0.882,0.643,"protein transport"),
 923 | c("GO:0048193","Golgi vesicle transport",0.297,19.6234,0.887,0.298,"protein transport"),
 924 | c("GO:0051668","localization within membrane",0.023,4.7235,0.855,0.529,"protein transport"),
 925 | c("GO:0006913","nucleocytoplasmic transport",0.237,12.1273,0.817,0.291,"protein transport"),
 926 | c("GO:0051640","organelle localization",0.223,9.0083,0.831,0.627,"protein transport"),
 927 | c("GO:0051650","establishment of vesicle localization",0.072,9.4134,0.826,0.605,"protein transport"),
 928 | c("GO:0016482","cytosolic transport",0.076,8.6615,0.837,0.619,"protein transport"),
 929 | c("GO:0051169","nuclear transport",0.239,12.4486,0.824,0.677,"protein transport"),
 930 | c("GO:0016071","mRNA metabolic process",0.798,52.6326,0.851,0.000,"mRNA metabolism"),
 931 | c("GO:0045862","positive regulation of proteolysis",0.078,6.6345,0.654,0.609,"mRNA metabolism"),
 932 | c("GO:0031329","regulation of cellular catabolic process",0.093,17.0814,0.701,0.583,"mRNA metabolism"),
 933 | c("GO:0006369","termination of RNA polymerase II transcription",0.015,4.6925,0.871,0.526,"mRNA metabolism"),
 934 | c("GO:0006370","7-methylguanosine mRNA capping",0.022,6.0438,0.846,0.479,"mRNA metabolism"),
 935 | c("GO:0006367","transcription initiation from RNA polymerase II promoter",0.109,6.2596,0.854,0.601,"mRNA metabolism"),
 936 | c("GO:0006368","transcription elongation from RNA polymerase II promoter",0.082,11.7645,0.854,0.300,"mRNA metabolism"),
 937 | c("GO:0016073","snRNA metabolic process",0.035,5.4828,0.871,0.416,"mRNA metabolism"),
 938 | c("GO:0070647","protein modification by small protein conjugation or removal",0.821,30.0799,0.816,0.331,"mRNA metabolism"),
 939 | c("GO:0006354","DNA-templated transcription, elongation",0.202,11.5331,0.855,0.280,"mRNA metabolism"),
 940 | c("GO:0006353","DNA-templated transcription, termination",0.185,8.2388,0.856,0.320,"mRNA metabolism"),
 941 | c("GO:0006352","DNA-templated transcription, initiation",0.766,7.1701,0.839,0.361,"mRNA metabolism"),
 942 | c("GO:0044265","cellular macromolecule catabolic process",1.268,48.6421,0.761,0.153,"mRNA metabolism"),
 943 | c("GO:0000338","protein deneddylation",0.017,4.1057,0.815,0.683,"mRNA metabolism"),
 944 | c("GO:0043618","regulation of transcription from RNA polymerase II promoter in response to stress",0.026,3.8570,0.721,0.545,"mRNA metabolism"),
 945 | c("GO:0010608","posttranscriptional regulation of gene expression",0.719,23.5421,0.755,0.256,"mRNA metabolism"),
 946 | c("GO:1901566","organonitrogen compound biosynthetic process",14.064,8.4089,0.907,0.391,"mRNA metabolism"),
 947 | c("GO:0043603","cellular amide metabolic process",6.879,8.9318,0.892,0.189,"mRNA metabolism"),
 948 | c("GO:0010563","negative regulation of phosphorus metabolic process",0.182,4.4634,0.755,0.687,"mRNA metabolism"),
 949 | c("GO:0043543","protein acylation",0.202,3.8539,0.834,0.429,"mRNA metabolism"),
 950 | c("GO:0006259","DNA metabolic process",5.607,3.5986,0.836,0.383,"mRNA metabolism"),
 951 | c("GO:0051437","positive regulation of ubiquitin-protein ligase activity involved in regulation of mitotic cell cycle transition",0.001,18.0287,0.587,0.689,"mRNA metabolism"),
 952 | c("GO:0031400","negative regulation of protein modification process",0.151,15.5376,0.660,0.418,"mRNA metabolism"),
 953 | c("GO:0006521","regulation of cellular amino acid metabolic process",0.018,11.9747,0.787,0.276,"mRNA metabolism"),
 954 | c("GO:0043488","regulation of mRNA stability",0.018,15.0526,0.796,0.293,"mRNA metabolism"),
 955 | c("GO:0002181","cytoplasmic translation",0.064,9.2125,0.831,0.429,"mRNA metabolism"),
 956 | c("GO:0043487","regulation of RNA stability",0.021,14.2381,0.794,0.573,"mRNA metabolism"),
 957 | c("GO:0032535","regulation of cellular component size",0.179,4.5086,0.704,0.437,"mRNA metabolism"),
 958 | c("GO:0036260","RNA capping",0.112,5.3516,0.847,0.542,"mRNA metabolism"),
 959 | c("GO:0016567","protein ubiquitination",0.523,28.9957,0.777,0.470,"mRNA metabolism"),
 960 | c("GO:0006412","translation",5.686,18.7878,0.756,0.661,"mRNA metabolism"),
 961 | c("GO:0009894","regulation of catabolic process",0.146,18.7670,0.755,0.530,"mRNA metabolism"),
 962 | c("GO:0006413","translational initiation",0.518,37.7447,0.803,0.140,"mRNA metabolism"),
 963 | c("GO:0006396","RNA processing",3.210,47.2069,0.824,0.373,"mRNA metabolism"),
 964 | c("GO:0008380","RNA splicing",0.413,31.7167,0.832,0.299,"mRNA metabolism"),
 965 | c("GO:1901361","organic cyclic compound catabolic process",1.164,11.7773,0.825,0.665,"mRNA metabolism"),
 966 | c("GO:0034248","regulation of cellular amide metabolic process",0.700,12.3595,0.745,0.488,"mRNA metabolism"),
 967 | c("GO:0008334","histone mRNA metabolic process",0.005,3.6517,0.878,0.522,"mRNA metabolism"),
 968 | c("GO:0006470","protein dephosphorylation",0.585,4.9872,0.813,0.475,"mRNA metabolism"),
 969 | c("GO:0033238","regulation of cellular amine metabolic process",0.021,9.7905,0.803,0.631,"mRNA metabolism"),
 970 | c("GO:0031123","RNA 3'-end processing",0.145,3.5544,0.844,0.554,"mRNA metabolism"),
 971 | c("GO:0098781","ncRNA transcription",0.045,6.9914,0.860,0.287,"mRNA metabolism"),
 972 | c("GO:0000184","nuclear-transcribed mRNA catabolic process, nonsense-mediated decay",0.030,31.2807,0.770,0.619,"mRNA metabolism"),
 973 | c("GO:0031145","anaphase-promoting complex-dependent catabolic process",0.021,16.7773,0.774,0.600,"mRNA metabolism"),
 974 | c("GO:0042795","snRNA transcription from RNA polymerase II promoter",0.003,4.4535,0.871,0.481,"mRNA metabolism"),
 975 | c("GO:0034660","ncRNA metabolic process",3.407,28.5317,0.831,0.453,"mRNA metabolism"),
 976 | c("GO:0022613","ribonucleoprotein complex biogenesis",1.614,42.8297,0.809,0.000,"ribonucleoprotein complex biogenesis"),
 977 | c("GO:0016050","vesicle organization",0.130,13.2299,0.812,0.417,"ribonucleoprotein complex biogenesis"),
 978 | c("GO:0031346","positive regulation of cell projection organization",0.056,6.7670,0.655,0.637,"ribonucleoprotein complex biogenesis"),
 979 | c("GO:0043248","proteasome assembly",0.049,4.1561,0.801,0.510,"ribonucleoprotein complex biogenesis"),
 980 | c("GO:0006364","rRNA processing",0.952,32.3706,0.661,0.683,"ribonucleoprotein complex biogenesis"),
 981 | c("GO:0061024","membrane organization",0.759,24.2328,0.805,0.499,"ribonucleoprotein complex biogenesis"),
 982 | c("GO:0051276","chromosome organization",1.477,5.6198,0.774,0.673,"ribonucleoprotein complex biogenesis"),
 983 | c("GO:0070271","protein complex biogenesis",0.963,8.1141,0.817,0.684,"ribonucleoprotein complex biogenesis"),
 984 | c("GO:0008154","actin polymerization or depolymerization",0.153,4.3188,0.772,0.544,"ribonucleoprotein complex biogenesis"),
 985 | c("GO:0031468","nuclear envelope reassembly",0.003,3.8327,0.807,0.689,"ribonucleoprotein complex biogenesis"),
 986 | c("GO:0044087","regulation of cellular component biogenesis",0.404,7.9101,0.713,0.622,"ribonucleoprotein complex biogenesis"),
 987 | c("GO:0033043","regulation of organelle organization",0.495,8.2041,0.671,0.536,"ribonucleoprotein complex biogenesis"),
 988 | c("GO:0000028","ribosomal small subunit assembly",0.020,3.6478,0.800,0.540,"ribonucleoprotein complex biogenesis"),
 989 | c("GO:0007030","Golgi organization",0.039,5.7825,0.817,0.442,"ribonucleoprotein complex biogenesis"),
 990 | c("GO:0007033","vacuole organization",0.102,11.6635,0.815,0.474,"ribonucleoprotein complex biogenesis"),
 991 | c("GO:0030030","cell projection organization",0.608,4.9872,0.778,0.487,"ribonucleoprotein complex biogenesis"),
 992 | c("GO:0010256","endomembrane system organization",0.189,12.6904,0.824,0.432,"ribonucleoprotein complex biogenesis"),
 993 | c("GO:0044033","multi-organism metabolic process",0.025,27.3197,0.940,0.011,"multi-organism metabolism"),
 994 | c("GO:1902590","multi-organism organelle organization",0.006,4.4976,0.806,0.608,"multi-organism metabolism"),
 995 | c("GO:0044764","multi-organism cellular process",0.325,20.8097,0.905,0.692,"multi-organism metabolism"),
 996 | c("GO:0019083","viral transcription",0.012,25.1898,0.827,0.562,"multi-organism metabolism"),
 997 | c("GO:0038093","Fc receptor signaling pathway",0.005,16.2418,0.755,0.022,"Fc receptor signaling pathway"),
 998 | c("GO:0002478","antigen processing and presentation of exogenous peptide antigen",0.004,11.5986,0.922,0.505,"Fc receptor signaling pathway"),
 999 | c("GO:0034976","response to endoplasmic reticulum stress",0.100,11.2573,0.880,0.252,"Fc receptor signaling pathway"),
1000 | c("GO:0006283","transcription-coupled nucleotide-excision repair",0.038,9.9626,0.819,0.462,"Fc receptor signaling pathway"),
1001 | c("GO:0035966","response to topologically incorrect protein",0.053,9.4318,0.908,0.407,"Fc receptor signaling pathway"),
1002 | c("GO:0042059","negative regulation of epidermal growth factor receptor signaling pathway",0.007,6.8928,0.762,0.559,"Fc receptor signaling pathway"),
1003 | c("GO:0007264","small GTPase mediated signal transduction",0.485,4.9626,0.767,0.443,"Fc receptor signaling pathway"),
1004 | c("GO:0051340","regulation of ligase activity",0.002,14.4660,0.858,0.375,"Fc receptor signaling pathway"),
1005 | c("GO:0051338","regulation of transferase activity",0.368,11.8827,0.807,0.646,"Fc receptor signaling pathway"),
1006 | c("GO:0051348","negative regulation of transferase activity",0.088,14.1101,0.809,0.456,"Fc receptor signaling pathway"),
1007 | c("GO:0051351","positive regulation of ligase activity",0.002,15.7959,0.854,0.121,"Fc receptor signaling pathway"),
1008 | c("GO:0019882","antigen processing and presentation",0.027,7.6478,0.934,0.561,"Fc receptor signaling pathway"),
1009 | c("GO:0060071","Wnt signaling pathway, planar cell polarity pathway",0.010,9.6655,0.747,0.486,"Fc receptor signaling pathway"),
1010 | c("GO:0050690","regulation of defense response to virus by virus",0.005,4.1118,0.782,0.696,"Fc receptor signaling pathway"),
1011 | c("GO:0030518","intracellular steroid hormone receptor signaling pathway",0.024,4.1864,0.791,0.588,"Fc receptor signaling pathway"),
1012 | c("GO:0032107","regulation of response to nutrient levels",0.011,6.8729,0.820,0.576,"Fc receptor signaling pathway"),
1013 | c("GO:0006986","response to unfolded protein",0.037,8.9318,0.904,0.605,"Fc receptor signaling pathway"),
1014 | c("GO:0051098","regulation of binding",0.073,3.9547,0.828,0.560,"Fc receptor signaling pathway"),
1015 | c("GO:0006974","cellular response to DNA damage stimulus",2.360,7.8386,0.849,0.679,"Fc receptor signaling pathway"),
1016 | c("GO:0038061","NIK/NF-kappaB signaling",0.018,8.0931,0.811,0.273,"Fc receptor signaling pathway"),
1017 | c("GO:0080135","regulation of cellular response to stress",0.182,9.7670,0.748,0.548,"Fc receptor signaling pathway"),
1018 | c("GO:0033209","tumor necrosis factor-mediated signaling pathway",0.016,4.1765,0.774,0.573,"Fc receptor signaling pathway"),
1019 | c("GO:0042026","protein refolding",0.069,5.1959,0.958,0.027,"protein refolding"),
1020 | c("GO:0061077","chaperone-mediated protein folding",0.043,4.2396,0.959,0.621,"protein refolding"),
1021 | c("GO:0016236","macroautophagy",0.084,9.0353,0.943,0.027,"macroautophagy"),
1022 | c("GO:0006914","autophagy",0.230,10.5735,0.958,0.030,"autophagy"),
1023 | c("GO:0006457","protein folding",0.903,11.3233,0.954,0.034,"protein folding"),
1024 | c("GO:0007049","cell cycle",1.885,15.4976,0.900,0.059,"cell cycle"),
1025 | c("GO:0007017","microtubule-based process",0.658,5.1759,0.908,0.208,"cell cycle"),
1026 | c("GO:0007018","microtubule-based movement",0.287,4.3665,0.907,0.190,"cell cycle"),
1027 | c("GO:1903047","mitotic cell cycle process",0.514,14.9208,0.863,0.202,"cell cycle"),
1028 | c("GO:0022406","membrane docking",0.099,4.4962,0.919,0.172,"cell cycle"),
1029 | c("GO:0016311","dephosphorylation",1.250,5.4498,0.921,0.065,"dephosphorylation"),
1030 | c("GO:0007164","establishment of tissue polarity",0.019,7.8827,0.950,0.065,"establishment of tissue polarity"));
1031 |   
1032 |   
1033 |   
1034 | #### Nuc > 1.5*Cell (introns + exons) ####
1035 | revigo.data.list[["CC_nuc_ie"]] <- rbind(c("GO:0005911","cell-cell junction",0.222,1.4179,0.957,0.000,"cell-cell junction"),
1036 | c("GO:0030054","cell junction",0.445,5.9747,0.957,0.000,"cell junction"),
1037 | c("GO:0045202","synapse",0.299,10.4597,0.957,0.000,"synapse"),
1038 | c("GO:0098589","membrane region",0.121,5.0731,0.912,0.000,"membrane region"),
1039 | c("GO:0002095","caveolar macromolecular signaling complex",0.000,1.4112,0.694,0.410,"membrane region"),
1040 | c("GO:0098794","postsynapse",0.133,10.4597,0.682,0.000,"postsynapse"),
1041 | c("GO:0060076","excitatory synapse",0.004,6.5406,0.769,0.696,"postsynapse"),
1042 | c("GO:1902495","transmembrane transporter complex",0.864,4.7011,0.671,0.041,"transmembrane transporter complex"),
1043 | c("GO:0008328","ionotropic glutamate receptor complex",0.012,1.5751,0.535,0.681,"transmembrane transporter complex"),
1044 | c("GO:1990351","transporter complex",0.885,4.6383,0.832,0.357,"transmembrane transporter complex"),
1045 | c("GO:1990454","L-type voltage-gated calcium channel complex",0.001,2.3298,0.558,0.586,"transmembrane transporter complex"),
1046 | c("GO:0031225","anchored component of membrane",0.078,2.3233,0.928,0.043,"anchored component of membrane"),
1047 | c("GO:0044297","cell body",0.087,3.3497,0.899,0.045,"cell body"),
1048 | c("GO:0097458","neuron part",0.320,9.2620,0.894,0.050,"neuron part"),
1049 | c("GO:0043005","neuron projection",0.190,9.4179,0.503,0.051,"neuron projection"),
1050 | c("GO:0044224","juxtaparanode region of axon",0.002,1.8962,0.550,0.674,"neuron projection"),
1051 | c("GO:0032839","dendrite cytoplasm",0.003,1.3516,0.554,0.680,"neuron projection"),
1052 | c("GO:0098590","plasma membrane region",0.239,5.3595,0.645,0.052,"plasma membrane region"),
1053 | c("GO:0005887","integral component of plasma membrane",1.211,5.0237,0.603,0.696,"plasma membrane region"),
1054 | c("GO:0044431","Golgi apparatus part",0.608,1.3615,0.871,0.057,"Golgi apparatus part"));
1055 |   
1056 | 
1057 | revigo.data.list[["BP_nuc_ie"]] <- rbind(c("GO:0006935","chemotaxis",0.475,1.5346,0.925,0.000,"chemotaxis"),
1058 | c("GO:0007268","chemical synaptic transmission",0.187,4.9031,0.700,0.000,"chemical synaptic transmission"),
1059 | c("GO:0009214","cyclic nucleotide catabolic process",0.009,1.8996,0.829,0.698,"chemical synaptic transmission"),
1060 | c("GO:0008037","cell recognition",0.067,2.3507,0.844,0.137,"chemical synaptic transmission"),
1061 | c("GO:0031344","regulation of cell projection organization",0.123,3.4023,0.609,0.349,"chemical synaptic transmission"),
1062 | c("GO:0050808","synapse organization",0.070,4.7878,0.761,0.138,"chemical synaptic transmission"),
1063 | c("GO:0015014","heparan sulfate proteoglycan biosynthetic process, polysaccharide chain biosynthetic process",0.001,2.0655,0.864,0.105,"chemical synaptic transmission"),
1064 | c("GO:0007267","cell-cell signaling",0.407,2.6289,0.756,0.426,"chemical synaptic transmission"),
1065 | c("GO:0044089","positive regulation of cellular component biogenesis",0.193,1.6861,0.652,0.333,"chemical synaptic transmission"),
1066 | c("GO:0007196","adenylate cyclase-inhibiting G-protein coupled glutamate receptor signaling pathway",0.002,1.6696,0.765,0.297,"chemical synaptic transmission"),
1067 | c("GO:0051130","positive regulation of cellular component organization",0.374,1.5317,0.617,0.672,"chemical synaptic transmission"),
1068 | c("GO:0030030","cell projection organization",0.608,2.6364,0.727,0.413,"chemical synaptic transmission"),
1069 | c("GO:0006928","movement of cell or subcellular component",0.973,1.6073,0.815,0.171,"chemical synaptic transmission"),
1070 | c("GO:0006198","cAMP catabolic process",0.007,2.0186,0.826,0.247,"chemical synaptic transmission"),
1071 | c("GO:0007610","behavior",0.170,2.2757,0.983,0.000,"behavior"),
1072 | c("GO:0007626","locomotory behavior",0.061,1.6676,0.983,0.000,"locomotory behavior"),
1073 | c("GO:0022610","biological adhesion",0.550,3.4023,0.983,0.000,"biological adhesion"),
1074 | c("GO:0035418","protein localization to synapse",0.006,1.6861,0.959,0.000,"protein localization to synapse"),
1075 | c("GO:0097120","receptor localization to synapse",0.004,1.3401,0.959,0.138,"protein localization to synapse"),
1076 | c("GO:0034765","regulation of ion transmembrane transport",0.197,1.6778,0.760,0.579,"protein localization to synapse"),
1077 | c("GO:1901841","regulation of high voltage-gated calcium channel activity",0.000,1.4597,0.822,0.664,"protein localization to synapse"),
1078 | c("GO:0034220","ion transmembrane transport",3.528,1.5186,0.930,0.210,"protein localization to synapse"),
1079 | c("GO:0040011","locomotion",0.997,1.4522,0.983,0.000,"locomotion"),
1080 | c("GO:0050803","regulation of synapse structure or activity",0.034,6.6925,0.796,0.000,"regulation of synapse structure or activity"),
1081 | c("GO:0050905","neuromuscular process",0.023,1.5969,0.575,0.609,"regulation of synapse structure or activity"),
1082 | c("GO:0008038","neuron recognition",0.012,3.4841,0.441,0.664,"regulation of synapse structure or activity"),
1083 | c("GO:0048589","developmental growth",0.177,1.5186,0.638,0.553,"regulation of synapse structure or activity"),
1084 | c("GO:0051960","regulation of nervous system development",0.172,6.6696,0.340,0.176,"regulation of synapse structure or activity"),
1085 | c("GO:0061588","calcium activated phospholipid scrambling",0.004,1.9136,0.691,0.359,"regulation of synapse structure or activity"),
1086 | c("GO:0019226","transmission of nerve impulse",0.013,2.1624,0.491,0.514,"regulation of synapse structure or activity"),
1087 | c("GO:0010842","retina layer formation",0.006,1.8539,0.535,0.535,"regulation of synapse structure or activity"),
1088 | c("GO:0001508","action potential",0.024,2.0177,0.783,0.400,"regulation of synapse structure or activity"),
1089 | c("GO:0042391","regulation of membrane potential",0.135,3.2132,0.781,0.448,"regulation of synapse structure or activity"),
1090 | c("GO:0035637","multicellular organismal signaling",0.025,3.4023,0.536,0.536,"regulation of synapse structure or activity"),
1091 | c("GO:0031646","positive regulation of neurological system process",0.003,1.7190,0.461,0.630,"regulation of synapse structure or activity"),
1092 | c("GO:0060322","head development",0.151,2.0680,0.637,0.557,"regulation of synapse structure or activity"),
1093 | c("GO:1901078","negative regulation of relaxation of muscle",0.001,2.6459,0.493,0.556,"regulation of synapse structure or activity"),
1094 | c("GO:0032990","cell part morphogenesis",0.174,1.6073,0.547,0.563,"regulation of synapse structure or activity"),
1095 | c("GO:0097264","self proteolysis",0.040,3.2291,0.980,0.000,"self proteolysis"),
1096 | c("GO:0098742","cell-cell adhesion via plasma-membrane adhesion molecules",0.108,8.3468,0.928,0.000,"cell-cell adhesion via plasma-membrane adhesion molecules"));
1097 | 
1098 | 
1099 | #### Nuc > 1.5*Cell (exons) ####
1100 | revigo.data.list[["CC_nuc_exons"]] <- rbind(c("GO:0016607","nuclear speck",0.091,6.8697,0.217,0.000,"nuclear speck"),
1101 | c("GO:0005681","spliceosomal complex",0.250,4.5031,0.153,0.517,"nuclear speck"),
1102 | c("GO:0097525","spliceosomal snRNP complex",0.128,1.4802,0.141,0.531,"nuclear speck"),
1103 | c("GO:0030532","small nuclear ribonucleoprotein complex",0.138,1.3737,0.171,0.534,"nuclear speck"),
1104 | c("GO:0071004","U2-type prespliceosome",0.029,2.7773,0.164,0.443,"nuclear speck"),
1105 | c("GO:0030529","intracellular ribonucleoprotein complex",5.291,1.6326,0.222,0.624,"nuclear speck"),
1106 | c("GO:1990904","ribonucleoprotein complex",5.291,1.6326,0.421,0.000,"ribonucleoprotein complex"));
1107 | 
1108 | 
1109 | revigo.data.list[["BP_nuc_exons"]] <- rbind(c("GO:0008380","RNA splicing",0.413,9.0996,0.149,0.000,"RNA splicing"),
1110 | c("GO:0006396","RNA processing",3.210,4.4989,0.326,0.373,"RNA splicing"),
1111 | c("GO:0016071","mRNA metabolic process",0.798,4.7190,0.364,0.299,"RNA splicing"),
1112 | c("GO:0043484","regulation of RNA splicing",0.040,2.9914,0.089,0.500,"RNA splicing"),
1113 | c("GO:0006397","mRNA processing",0.561,6.1403,0.088,0.624,"RNA splicing"));
1114 | 
1115 | 
1116 | # Treemap plots
1117 | revigo.names <- c("term_ID","description","freqInDbPercent","abslog10pvalue","uniqueness","dispensability","representative");
1118 | 
1119 | pdf( file="../output/nuc_vs_cell_go_enrich_revigo_treemaps.pdf", width=7, height=4) # width and height are in inches
1120 | 
1121 | for (set1 in names(revigo.data.list)) {
1122 |   revigo.data <- revigo.data.list[[set1]]
1123 |   
1124 |   stuff <- data.frame(revigo.data);
1125 |   names(stuff) <- revigo.names;
1126 |   
1127 |   stuff$abslog10pvalue <- as.numeric( as.character(stuff$abslog10pvalue) );
1128 |   stuff$freqInDbPercent <- as.numeric( as.character(stuff$freqInDbPercent) );
1129 |   stuff$uniqueness <- as.numeric( as.character(stuff$uniqueness) );
1130 |   stuff$dispensability <- as.numeric( as.character(stuff$dispensability) );
1131 |   
1132 |   # check the tmPlot command documentation for all possible parameters - there are a lot more
1133 |   treemap(
1134 |     stuff,
1135 |     index = c("representative","description"),
1136 |     vSize = "abslog10pvalue",
1137 |     type = "categorical",
1138 |     vColor = "representative",
1139 |     title = set1,
1140 |     inflate.labels = FALSE,      # set this to TRUE for space-filling group labels - good for posters
1141 |     lowerbound.cex.labels = 0,   # try to draw as many labels as possible (still, some small squares may not get a label)
1142 |     bg.labels = "#CCCCCCAA",     # define background color of group labels
1143 |     # "#CCCCCC00" is fully transparent, "#CCCCCCAA" is semi-transparent grey, NA is opaque
1144 |     position.legend = "none"
1145 |   )
1146 | }
1147 | dev.off()
1148 | 
1149 | ```


--------------------------------------------------------------------------------
/R/Figure1.R:
--------------------------------------------------------------------------------
 1 | library(pheatmap)
 2 | library(RColorBrewer)
 3 | library(ggplot2)
 4 | 
 5 | 
 6 | outpath = "output"
 7 | 
 8 | # Load cell/nucleus matching pairs
 9 | load(file = "data/map_nuc_to_cells.rda")
10 | 
11 | #### Figure 1B ####
12 | pdf(file = paste0(out.path, "/map_nuc_to_cells.pdf"), height = 6, width = 6)
13 | plot(max.cor.nuc, max.cor.all, xlab = "Best matching nucleus",
14 |      ylab = "Best matching cell", main = "Expression correlation")
15 | abline(a = 0, b = 1)
16 | 
17 | 
18 | 
19 | #### SI Figure 1A,B ####
20 | # Cre-line / layer summary
21 | cell.subset <- which(samp.cell$exp_component_name %in% samp.ids[["cell"]])
22 | cre.info <- as.character(samp.cell$transgenic_recombinase[cell.subset])
23 | cre.info[cre.info == ""] <- "Virally labeled"
24 | cre.info[which(samp.cell$exp_component_name[cell.subset] == "SM-D9E66_S95_E1-50")] <- "Sim1"
25 | cre.info <- sub("Rorb_neo", "Rorb", cre.info)
26 | cre.info <- sub("Chrna2_Pvalb-T2A-Dre", "Chrna2_Pvalb", cre.info)
27 | cre.info <- sub("Pvalb-IRES-Cre", "Pvalb", cre.info)
28 | cre.cnt <- table(cre.info)
29 | 
30 | roi.info <- as.character(samp.cell$roi[cell.subset])
31 | roi.info <- sub("VISp_", "", roi.info)
32 | roi.info <- sub("L1-L2-3-L4-L5-L6", "L1-6", roi.info)
33 | roi.info <- sub("L1-L2-3-L4", "L1-4", roi.info)
34 | roi.info <- sub("L2-3-L4", "L2-4", roi.info)
35 | roi.info <- sub("L4-L5-L6", "L4-6", roi.info)
36 | roi.info <- sub("L4-L5", "L4-5", roi.info)
37 | roi.info <- sub("L5-L6", "L5-6", roi.info)
38 | roi.cnt <- table(roi.info)
39 | roi.cnt <- roi.cnt[c(1, 4, 2, 5, 3, 6:11)]
40 | 
41 | 
42 | pdf(file = paste0(out.path, "/mapped_cell_cre.pdf"), height = 5, width = 4)
43 | par(mar = c(5, 7, 3, 3))
44 | barplot(sort(cre.cnt / sum(cre.cnt), decreasing = TRUE), 
45 |         xlab = "Proportion of matched cells", xlim = c(0, 0.3),
46 |         main = "Mouse Cre-line",
47 |         col = "grey", horiz = TRUE, las = 1)
48 | dev.off()
49 | 
50 | pdf(file = paste0(out.path, "/mapped_cell_layer.pdf"), height = 5, width = 4)
51 | par(mar = c(5, 7, 3, 3))
52 | barplot(sort(roi.cnt / sum(roi.cnt), decreasing = TRUE), 
53 |         xlab = "Proportion of matched cells", xlim = c(0, 0.3),
54 |         main = "Dissected layer(s)",
55 |         col = "grey", horiz = TRUE, las = 1)
56 | dev.off()
57 | 
58 | 


--------------------------------------------------------------------------------
/R/Figure2a_4ad_S4d.R:
--------------------------------------------------------------------------------
  1 | ###################################################################################################################
  2 | # This script needs to be run from a windows computer with write-access in R version 3.0 or higher.
  3 | 
  4 | print("Set working folders, read in data, and load libraries.")
  5 | 
  6 | mainFolder     = ""
  7 | outputFolder   = paste(mainFolder,"output/",sep="")
  8 | scriptsFolder  = paste(mainFolder,"R/",sep="")
  9 | inputFolder    = paste(mainFolder,"data/",sep="")
 10 | 
 11 | # Load these libraries
 12 | options(stringsAsFactors=FALSE)
 13 | 
 14 | library(beeswarm)
 15 | library(WGCNA)
 16 | library(edgeR)   
 17 | library(feather)
 18 | library(dendextend)
 19 | library(monocle)
 20 | library(ggplot2)
 21 | library(dplyr)
 22 | library(matrixStats)
 23 | 
 24 | # Read in the data and extra scripts
 25 | source(paste(scriptsFolder,"extraFunctions.r",sep=""))
 26 | load(paste0(inputFolder, "start_data.rda"))
 27 |  # This includes "annoCel", "annoNuc", "countsR", "erccTable", "exprAll", "introns", "sampAll"
 28 |  # the "All" samples include all cell and nuclei, and 30 "ContronTotalRNA" samples
 29 | cpmE = cpm(countsR)
 30 | cpmI = cpm(introns)
 31 | 
 32 | 
 33 | #########################################################################################
 34 | ## FIGURE 2A ----------------------------------------------------------------------------
 35 | ## FIGURE 2A ----------------------------------------------------------------------------
 36 | 
 37 | print("Figure 2A: Plot overall statistics for nuclei, cells, and controls.")
 38 | 
 39 | kpAlignStats   = c("percent_reads_aligned_to_exons","percent_reads_aligned_to_rrna","percent_reads_aligned_to_trna","percent_reads_aligned_to_other_ncrna","percent_reads_aligned_to_mt_exons","percent_reads_aligned_to_mt_rrna","percent_reads_aligned_to_mt_trna","percent_reads_aligned_to_mt_other_ncrna","percent_reads_aligned_intron","percent_reads_intergenic","percent_reads_aligned_to_ecoli","percent_reads_aligned_to_synthetic_constructs")
 40 |  
 41 | otherVars      = c("cre_line","batch_vendor_name","Fill.Date",kpAlignStats,"ERCCLimit")
 42 | 
 43 | sampleType  = sampAll$Type
 44 | alignStatsA = sampAll[,c(kpAlignStats,"percent_unmapped_reads")]
 45 | kpAlignStats2 = c("percent_unmapped_reads","percent_reads_aligned_to_exons","percent_reads_aligned_intron",
 46 |     "percent_reads_aligned_to_other_ncrna","percent_reads_intergenic")
 47 | alignStats = alignStatsA[,kpAlignStats2]
 48 | alignStats$percent_reads_aligned_to_everything_else = 100-rowSums(alignStats,na.rm=TRUE)
 49 | 
 50 | nonGenome   = c("percent_reads_aligned_to_rrna","percent_reads_aligned_to_trna","percent_reads_aligned_to_mt_exons",
 51 |      "percent_reads_aligned_to_mt_rrna","percent_reads_aligned_to_mt_trna","percent_reads_aligned_to_mt_other_ncrna",
 52 | 	 "percent_reads_aligned_to_ecoli","percent_reads_aligned_to_synthetic_constructs")
 53 | 
 54 | genomePerc  = sampAll$percent_reads_aligned_total-rowSums(sampAll[,nonGenome])
 55 | genomeReads = round(sampAll$total_reads*(1/100)*genomePerc)
 56 | 
 57 | EII = data.frame(ExonReads=round(colSums(countsR)),IntronReads = colSums(introns), 
 58 |   IntergenicReads = genomeReads-colSums(countsR)-colSums(introns))
 59 | readLocs   = 100*EII/rowSums(EII)
 60 | readLocs$PercentAlignedToGenome = genomePerc
 61 | readLocs = readLocs[,c(4,1:3)]
 62 | rownames(readLocs) <- colnames(exprAll)
 63 | 
 64 | Ns = c(0,1,2,5,10,25,50,100,250,500,1000)
 65 | geneCounts = list()
 66 | for (n in Ns){
 67 |  N = as.character(n)
 68 |  geneCounts[[N]] = data.frame(AnyCounts = colSums((cpmE>n)|(cpmI>n)), 
 69 |    ExonCounts = colSums(cpmE>n), IntronCounts = colSums(cpmI>n))
 70 |  rownames(geneCounts[[N]]) <- colnames(exprAll)
 71 | }
 72 | # For the manuscript, CPM > 0 is shown.
 73 | 
 74 | stats = geneCounts
 75 | stats[["ALIGNMENT_STATS"]] = readLocs
 76 | cols = c("#387EB8","#E21F26","grey")
 77 | yMax = c(rep(13000,6),rep(6000,2),rep(2000,3),100)
 78 | names(yMax) = names(stats)
 79 | 
 80 | pdf(paste0(outputFolder, "plotAllStats_beeswarm.pdf"),height=10,width=4.5)
 81 | for (N in names(stats)){
 82 |  for (cl in colnames(stats[[N]])){
 83 |   out = data.frame(values = stats[[N]][,cl],SampleType = as.factor(sampleType))
 84 |   beeswarm(values ~ SampleType, data = out, 
 85 |     col = cols, pch = 16, corral = "random", cex=0.9, method="center",
 86 |     main = paste(cl,N,sep="-"),ylim = c(0,yMax[N]),las=1,cex.axis=1.35,ylab="")
 87 |   bxplot(values ~ SampleType, data = out, add = TRUE)
 88 |   abline(h=0)
 89 |  }
 90 | }
 91 | dev.off()
 92 | # Pages 1-3 and 34-37 are in the manuscript
 93 | 
 94 | print("===== Determine statistical significance.")
 95 | 
 96 | pvalAnova <- cbind(readLocs,geneCounts[[1]])
 97 | pvalsAll  <- apply(pvalAnova,2,getAnovaPvalforApply,sampleType)
 98 | norc      <- is.element(sampleType,c("Cells","Nuclei"))
 99 | pvalsNorc <- apply(pvalAnova[norc,],2,getAnovaPvalforApply,sampleType[norc])
100 | cbind(pvalsAll,pvalsNorc)
101 | #                             pvalsAll     pvalsNorc
102 | # PercentAlignedToGenome  5.708036e-41  8.199823e-14
103 | # ExonReads               3.758943e-95  4.077768e-87
104 | # IntronReads            9.712306e-127 3.175153e-105
105 | # IntergenicReads        3.541679e-178 2.043684e-112
106 | # AnyCounts               0.000000e+00  0.000000e+00
107 | # ExonCounts              0.000000e+00  0.000000e+00
108 | # IntronCounts           2.794476e-148 6.719808e-109
109 | 
110 | 
111 | #########################################################################################
112 | ## FIGURE 4 -----------------------------------------------------------------------------
113 | ## FIGURE 4 -----------------------------------------------------------------------------
114 | 
115 | print("Figure 4A/D: Build clustering trees for cells, nuclei, introns, and exons.")
116 | 
117 | kpCell    = sampleType=="Cells"
118 | kpNuc     = sampleType=="Nuclei"
119 | kpBoth    = kpCell|kpNuc
120 | cl        = paste(substr(sampleType,1,1),sampAll$cluster,sep="_")
121 | names(cl) = rownames(sampAll)
122 | 
123 | # For ordering the dendrograms as in the paper
124 | clusters= sort(unique(cl))[c(10:11,1:9,21:22,20,12:19)]  # Clusters, ordered by layer
125 | l.rank  = setNames(1:22, clusters)
126 | cluI    = c("C_Pvalb.Wt1","N_Pvalb.Wt1","C_Sst.Cbln4","N_Sst.Cbln4",
127 |    "C_L6a.Plcxd3","N_L6a.Plcxd3","N_L5b.Cdh13","N_L5b.Samd3",
128 |    "C_L5b.Cdh13","C_L5b.Samd3","C_L5.Chrna6","N_L5.Chrna6",
129 |    "C_L4.Arf5","N_L4.Arf5","C_L4.Hsd11b1","N_L4.Hsd11b1",
130 |    "C_L5a.Batf3","N_L5a.Batf3","C_L6a.Mgp","N_L6a.Mgp",
131 |    "C_L6.Car12","N_L6.Car12")
132 | cluE    = c("C_Pvalb.Wt1","C_Sst.Cbln4","C_L4.Arf5","C_L4.Hsd11b1",
133 |    "C_L5.Chrna6","C_L5b.Cdh13","C_L5b.Samd3","C_L6a.Plcxd3",
134 |    "C_L6a.Mgp","C_L5a.Batf3","C_L6.Car12","N_Pvalb.Wt1",
135 |    "N_Sst.Cbln4","N_L6a.Plcxd3","N_L5.Chrna6","N_L4.Arf5",
136 |    "N_L4.Hsd11b1","N_L5a.Batf3","N_L6.Car12","N_L6a.Mgp",
137 |    "N_L5b.Cdh13","N_L5b.Samd3")
138 | 
139 | # Lists for the four dendrograms 
140 | clL       <- list(cl[kpNuc],cl[kpCell],cl[kpBoth],cl[kpBoth])
141 | exprDataL <- list(cpm(countsR[,kpNuc]+introns[,kpNuc]),cpm(countsR[,kpCell]+introns[,kpCell]),cpmI[,kpBoth],cpmE[,kpBoth])
142 | lRankL    <- list(l.rank[12:22]-11,l.rank[1:11],setNames(1:22, cluI),setNames(1:22, cluE))
143 | mains     <- c("Nuclei","Cells","Introns","Exons")
144 | 
145 | # Plot the dendrograms
146 | pdf(paste0(outputFolder, "clusterDendrograms.pdf"))
147 | for (i in 1:length(mains))
148 |    buildAndPlotTree(exprDataL[[i]],clL[[i]],lRankL[[i]],topNgenes=1200,main=mains[i])
149 | dev.off()
150 | # Note that some additional formatting was done in Illustrator (e.g., colored dots, higher-weight lines).
151 | 
152 | 
153 | #########################################################################################
154 | ## FIGURE S4 -----------------------------------------------------------------------------
155 | ## FIGURE S4 -----------------------------------------------------------------------------
156 | 
157 | print("Figure S4D: Compare intron and exon expression in cells and nuclei.")
158 | 
159 | pdf(paste0(outputFolder, "intronExonCorrelation_cellsVsNuclei.pdf"))
160 | ne = log2(rowMeans(cpmE[,kpNuc])+1)
161 | ce = log2(rowMeans(cpmE[,kpCell])+1)
162 | verboseScatterplot(ne,ce,xlab="Mean log2_CPM(counts in nuclei)",ylab="Mean log2_CPM(counts in cells)",main="Exons",pch=19,cex=0.5)
163 | ne = log2(rowMeans(cpmI[,kpNuc])+1)
164 | ce = log2(rowMeans(cpmI[,kpCell])+1)
165 | verboseScatterplot(ne,ce,xlab="Mean log2_CPM(counts in nuclei)",ylab="Mean log2_CPM(counts in cells)",main="Introns",pch=19,cex=0.5)
166 | dev.off()
167 | 
168 | 
169 | 
170 | 


--------------------------------------------------------------------------------
/R/Figure3_4E.R:
--------------------------------------------------------------------------------
  1 | library(feather)
  2 | library(plyr)
  3 | library(ggplot2)
  4 | library(reshape2)
  5 | library(pheatmap)
  6 | library(RColorBrewer)
  7 | 
  8 | # Load nuc/cell matching
  9 | nuc.ids <- scan("data/nuc_ids.txt", "character")
 10 | cell.ids <- scan("data/cell_ids.txt", "character")
 11 | 
 12 | # Load cc heatmaps
 13 | dirs <- c("nuc", "nuc_varE_clIE", "nuc_varIE_clE", "nuc_exon", 
 14 |           "cell", "cell_varE_clIE", "cell_varIE_clE", "cell_exon")
 15 | sets <- c("nuc_IE_IE", "nuc_E_IE", "nuc_IE_E", "nuc_E_E", 
 16 |           "cell_IE_IE", "cell_E_IE", "cell_IE_E", "cell_E_E")
 17 | names(dirs) <- sets
 18 | paths <- paste0("data/20170818_VISp_L5_", dirs)
 19 | names(paths) <- sets
 20 | 
 21 | 
 22 | cc.list <- list()
 23 | anno.list <- list()
 24 | for (set1 in sets) {
 25 |   anno.list[[set1]] <- as.data.frame(read_feather(paste0(paths[set1], "/anno.feather")))
 26 |   cc1 <- read.csv(file = paste0(paths[set1], "/cl.cons.csv.gz"),
 27 |                   row.names = 1)
 28 |   cc.ids <- sapply(row.names(cc1), function(x) strsplit(x, "~")[[1]][1])
 29 |   cc.order <- match(anno.list[[set1]]$sample_id, cc.ids)
 30 |   cc.list[[set1]] <- cc1[cc.order, cc.order]
 31 | }
 32 | 
 33 | 
 34 | 
 35 | #### Figure 3A,B - Within/between co-clustering - all comparisons ####
 36 | hm.colors <- colorRampPalette(c("white", brewer.pal(9, "YlOrRd")))(100)
 37 | for (sample.type1 in c("nuc", "cell")) {
 38 |   for (set1 in sets[grep(sample.type1, sets)]) {
 39 |     if (set1 == sets[grep(sample.type1, sets)][1]) {
 40 |       ph.ie <- pheatmap(cc.list[[set1]], silent = TRUE)
 41 |       ie.order <- ph.ie$tree_row$order
 42 |       ph.exon <- pheatmap(cc.list[[sets[grep(sample.type1, sets)][4]]], silent = TRUE)
 43 |       exon.order <- ph.exon$tree_row$order
 44 |       dend1 <- as.dendrogram(ph.ie$tree_row)
 45 |       # Match exon only heatmap
 46 |       dend1.reordered <- reorder(dend1, match(ph.exon$tree_row$labels[exon.order], 
 47 |                                               ph.ie$tree_row$labels[ie.order]),
 48 |                                  FUN = "mean")
 49 |       hm.order2 <- as.hclust(dend1.reordered)$order
 50 |     }
 51 |     png(file = paste0("output/", set1, ".png"), 
 52 |         width = 1024, height = 1024, res = 300)
 53 |     pheatmap(cc.list[[set1]][hm.order2, hm.order2], 
 54 |              cluster_rows = FALSE, cluster_cols = FALSE, main = set1, 
 55 |              color = hm.colors, show_rownames = FALSE, show_colnames = FALSE)
 56 |     dev.off()    
 57 |   }
 58 | }
 59 | 
 60 | 
 61 | 
 62 | # Within/between co-clustering stats
 63 | cc.stats <- data.frame()
 64 | for (sample.type1 in c("nuc", "cell")) {
 65 |   for (set1 in sets[grep(sample.type1, sets)]) {
 66 |     cl.lab <- anno.list[[sets[grep(sample.type1, sets)][1]]]$cluster_label
 67 |     cc.mean1 <- apply(cc.list[[set1]], 1, function(x) tapply(x, cl.lab, mean))
 68 |     cc.mean <- apply(cc.mean1, 1, function(x) tapply(x, cl.lab, mean))
 69 |     within.cc <- diag(cc.mean)
 70 |     diag(cc.mean) <- 0
 71 |     max.between.cc <- apply(cc.mean, 1, max)
 72 |     cc.sep <- within.cc - max.between.cc
 73 |     cc.stats <- rbind(cc.stats, data.frame(sample.type = sample.type1, 
 74 |                                            read.set = sub(paste0(sample.type1, "_"), "", set1), 
 75 |                                            cluster = names(within.cc),
 76 |                                            within.cc, max.between.cc, cc.sep))
 77 |   }
 78 | }
 79 | cc.stats$read.set <- factor(cc.stats$read.set, 
 80 |                             levels = c("IE_IE", "IE_E", "E_IE", "E_E"))
 81 | cc.stats$sample.type <- factor(cc.stats$sample.type, 
 82 |                             levels = c("nuc", "cell"))
 83 | 
 84 | 
 85 | 
 86 | #### Figure 3C - Cluster cohesion/separation summary ####
 87 | #getting the convex hull of each unique point set
 88 | find_hull <- function(df) df[chull(df$within.cc, df$cc.sep), ]
 89 | cc.hulls <- ddply(cc.stats, c("sample.type", "read.set"), find_hull)
 90 | # cc.hulls <- subset(cc.hulls, read.set %in% c("IE_IE", "E_E"))
 91 | 
 92 | g1 <- ggplot(cc.stats, aes(x = within.cc, y = cc.sep, 
 93 |                            color = sample.type, fill = sample.type, 
 94 |                            shape = sample.type)) +
 95 |   facet_wrap(~ read.set, ncol = 2) +
 96 |   geom_polygon(data = cc.hulls, alpha = 0.1, color = NA) +
 97 |   geom_point(size = 2) +
 98 |   xlim(c(0.7, 1)) +
 99 |   xlab("Cluster cohesion") +
100 |   ylab("Cluster separation") +
101 |   scale_color_brewer(palette="Set1") +
102 |   theme_bw() +
103 |   theme(panel.grid.minor = element_blank(), 
104 |         strip.background = element_blank(),
105 |         strip.text.x = element_blank())
106 | plot(g1)
107 | 
108 | 
109 | 
110 | 
111 | #### Within/between co-clustering - paired by matching cluster ####
112 | for (set1 in c("nuc_IE_IE", "cell_IE_IE")) {
113 |   samp.dat <- anno.list[[set1]]
114 |   cl.cons.ordered <- cc.list[[set1]]
115 |   diag(cl.cons.ordered) <- NA
116 |   
117 |   max.mean.between.cc.all <- NULL
118 |   for (i in 1:nrow(samp.dat)) {
119 |     cl.cons1 <- as.numeric(cl.cons.ordered[i, ])
120 |     samp.within <- which(samp.dat$cluster_label == samp.dat$cluster_label[i])
121 |     samp.between <- which(samp.dat$cluster_label != samp.dat$cluster_label[i])
122 |     max.mean.between.cc <- max(tapply(cl.cons1[samp.between], 
123 |                                       samp.dat$cluster_label[samp.between], mean))
124 |     max.mean.between.cc.all <- c(max.mean.between.cc.all, max.mean.between.cc)
125 |   }
126 |   anno.list[[set1]]$cc.mean.max.between <- max.mean.between.cc.all
127 |   anno.list[[set1]]$cc.mean.min.diff <- anno.list[[set1]]$cc.mean.within_label - anno.list[[set1]]$cc.mean.max.between
128 | }
129 | 
130 | 
131 | 
132 | keep.anno.cols <- c("cluster_label", "cc.mean.within_label", 
133 |                     "cc.mean.max.between", "cc.mean.min.diff")
134 | anno.nuc.cell <- rbind(data.frame(sample.type = "nuc", anno.list[["nuc_IE_IE"]][, keep.anno.cols]),
135 |                        data.frame(sample.type = "cell", anno.list[["cell_IE_IE"]][, keep.anno.cols]))
136 | colnames(anno.nuc.cell) <- sub("_label", "", colnames(anno.nuc.cell))
137 | anno.nuc.cell$sample.type <- factor(anno.nuc.cell$sample.type, levels = c("nuc", "cell"))
138 | anno.nuc.cell$cluster_pair <- sapply(anno.nuc.cell$cluster, 
139 |                                      function(x) strsplit(as.character(x), "_")[[1]][4])             
140 | 
141 | # Cell v nuc cocl metrics
142 | cellvnuc.list <- list()
143 | for (cc.metric in c("cc.mean.within", 
144 |                     "cc.mean.max.between", 
145 |                     "cc.mean.min.diff")) {
146 |   cellvnuc.cl <- NULL
147 |   for (pair1 in unique(anno.nuc.cell$cluster_pair)) {
148 |     anno.nuc.cell.subset <- subset(anno.nuc.cell, cluster_pair == pair1)
149 |     lm1 <- lm(as.formula(paste(cc.metric, "~ sample.type")), anno.nuc.cell.subset)
150 |     coef1 <- t(coef(summary(lm1))["sample.typenuc", ])
151 |     cellvnuc.cl <- rbind(cellvnuc.cl, 
152 |                          data.frame(cluster_pair = pair1, coef1))
153 |   }
154 |   cellvnuc.cl$p_adj <- p.adjust(cellvnuc.cl$Pr...t.., "bonferroni")
155 |   cellvnuc.list[[cc.metric]] <- cellvnuc.cl
156 | }
157 | 
158 | # lm2 <- lm(cc.mean.min.diff ~ sample.type + cluster_pair, anno.nuc.cell)
159 | # summary(lm2)
160 | 
161 | 
162 | # Cell v nuc cocl plots
163 | g2a <- ggplot(anno.nuc.cell, aes(x = cluster_pair, y = cc.mean.within, 
164 |                                  color = sample.type)) +
165 |   geom_point(position=position_jitterdodge(jitter.width = 0.15), 
166 |              size = 0.5, alpha = 0.1) +
167 |   geom_boxplot(outlier.shape = NA, size = 0.3) +
168 |   xlab("Matched clusters") +
169 |   ylab("Cluster cohesion") +
170 |   scale_color_brewer(palette="Set1", name = "", labels = c("Nuclei", "Cells")) +
171 |   theme_bw() +
172 |   theme(panel.grid.major = element_line(size = 0.1), 
173 |         panel.grid.minor = element_blank(), 
174 |         axis.text.x=element_text(angle=45,hjust=1))
175 | plot(g2a)
176 | ggsave(g2a, width = 6, height = 4, 
177 |        filename = "output/nuc_vs_cell_cc.mean.within.pdf")
178 | 
179 | 
180 | g2b <- ggplot(anno.nuc.cell, aes(x = cluster_pair, y = cc.mean.max.between, 
181 |                                  color = sample.type)) +
182 |   geom_point(position=position_jitterdodge(jitter.width = 0.15), 
183 |              size = 0.5, alpha = 0.1) +
184 |   geom_boxplot(outlier.shape = NA, size = 0.3) +
185 |   xlab("Matched clusters") +
186 |   ylab("Cluster relatedness") +
187 |   scale_color_brewer(palette="Set1", name = "", labels = c("Nuclei", "Cells")) +
188 |   theme_bw() +
189 |   theme(panel.grid.major = element_line(size = 0.1), 
190 |         panel.grid.minor = element_blank(), 
191 |         axis.text.x=element_text(angle=45,hjust=1))
192 | plot(g2b)
193 | ggsave(g2b, width = 6, height = 4, 
194 |        filename = "output/nuc_vs_cell_cc.mean.max.between.pdf")
195 | 
196 | 
197 | #### Figure 4E - Nuclei vs cell cluster separation ####
198 | g2c <- ggplot(anno.nuc.cell, aes(x = cluster_pair, y = cc.mean.min.diff, 
199 |                                  color = sample.type)) +
200 |   geom_point(position=position_jitterdodge(jitter.width = 0.15), 
201 |              size = 0.5, alpha = 0.1) +
202 |   geom_boxplot(outlier.shape = NA, size = 0.3) +
203 |   xlab("Matched clusters") +
204 |   ylab("Cluster separation") +
205 |   scale_color_brewer(palette="Set1", name = "", labels = c("Nuclei", "Cells")) +
206 |   theme_bw() +
207 |   theme(panel.grid.major = element_line(size = 0.1), 
208 |         panel.grid.minor = element_blank(), 
209 |         axis.text.x=element_text(angle=45,hjust=1))
210 | plot(g2c)
211 | ggsave(g2c, width = 6, height = 4, 
212 |        filename = "output/nuc_vs_cell_cc.mean.min.diff.pdf")
213 | 
214 | 
215 | 


--------------------------------------------------------------------------------
/R/Figure4BF_S4ABC.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Figure 4 - Cell type homology"
  3 | output: html_notebook
  4 | ---
  5 |   
  6 |   
  7 | ```{r init-workspace, echo=FALSE, message=FALSE, warning=FALSE}
  8 | # Load libraries 
  9 | library(feather)
 10 | library(reshape2)
 11 | library(dplyr)
 12 | library(ggplot2)
 13 | library(ggrepel)
 14 | library(RColorBrewer)
 15 | library(pheatmap)
 16 | library(boot)
 17 | library(limma)
 18 | 
 19 | # Load functions
 20 | calc_beta <- function(y, spec.exp = 2) {
 21 |   d1 <- as.matrix(dist(y))
 22 |   eps1 <- 1e-10
 23 |   # Marker score is combination of specificity and sparsity
 24 |   score1 <- sum(d1^spec.exp) / (sum(d1) + eps1)
 25 |   return(score1)
 26 | }
 27 | 
 28 | ```
 29 | 
 30 | 
 31 | ```{r load-data}
 32 | all.paths <- c("../data/20170818_VISp_L5_nuc/",
 33 |                "../data/20170818_VISp_L5_cell/",
 34 |                "../data/mouse_VISp_SMV1_1679/")
 35 | names(all.paths) <- c("Mouse_L5_Nuc", "Mouse_L5_Cell", "Mouse_V1_Tasic")
 36 | paths <- all.paths[c("Mouse_L5_Nuc", "Mouse_L5_Cell")]  # Figure 4B, S4B,C
 37 | # paths <- all.paths[c("Mouse_L5_Nuc", "Mouse_L5_Tasic")]  # Figure S4A
 38 | # paths <- all.paths[c("Mouse_L5_Cell", "Mouse_L5_Tasic")]  # Figure S4A
 39 | 
 40 | dend <- list()
 41 | anno <- list()
 42 | anno.cl <- list()
 43 | prop.all <- list()
 44 | for (dat1 in names(paths)) {
 45 |   if (file.exists(paste0(paths[[dat1]], "dend.RData"))) {
 46 |     dend[[dat1]] <- readRDS(paste0(paths[[dat1]], "dend.RData"))
 47 |   }
 48 |   anno[[dat1]] <- as.data.frame(read_feather(paste0(paths[[dat1]], "anno.feather")))
 49 |   colnames(anno[[dat1]])[colnames(anno[[dat1]]) == "final_label"] <- "cluster_label"
 50 |   prop1 <- as.data.frame(t(read_feather(paste0(paths[[dat1]], "prop.feather"))))
 51 |   rownames(prop1) <- toupper(rownames(prop1))
 52 |   
 53 |   if (grepl("Mouse_L5", dat1)) {
 54 |     colnames(prop1) <- unique(anno[[dat1]]$cluster_label)
 55 |     keep.cl <- unlist(dendrapply(dend[[dat1]], function(x) if (is.leaf(x)) attr(x, "label")))
 56 |     anno.cl1 <- data.frame(cluster_label = unique(anno[[dat1]][, c("cluster_label")]))
 57 |   } else if (grepl("Mouse_V1_Tasic", dat1)) {
 58 |     anno[[dat1]]$cluster_label <- sub("^[0-9]+ f[0-9]+ ", "", anno[[dat1]]$cluster_label)
 59 |     colnames(prop1) <- unique(anno[[dat1]]$cluster_label)
 60 |     keep.cl <- unique(anno[[dat1]]$cluster_label[order(anno[[dat1]]$final_id)])
 61 |     keep.cl <- keep.cl[keep.cl != ""]
 62 |     keep.cl <- keep.cl[c(1:42)]  # Neuronal cell types
 63 |     anno[[dat1]] <- droplevels(subset(anno[[dat1]], cluster_label %in% keep.cl))
 64 |     anno.cl1 <- data.frame(cluster_label = unique(anno[[dat1]][, c("cluster_label")]))
 65 |   }
 66 |   
 67 |   # Common annotation
 68 |   cl.size <- table(anno[[dat1]]$cluster_label)
 69 |   anno.cl1$size <- cl.size[match(names(cl.size), anno.cl1$cluster_label)]
 70 |   anno.cl[[dat1]] <- anno.cl1[match(keep.cl, anno.cl1$cluster_label), ]
 71 |   anno[[dat1]] <- droplevels(subset(anno[[dat1]], cluster_label %in% keep.cl))
 72 |   prop1.subset <- prop1[, match(keep.cl, colnames(prop1))]
 73 |   prop.all[[dat1]] <- prop1.subset
 74 | }
 75 | 
 76 | ```
 77 | 
 78 | 
 79 | 
 80 | ```{r process-data, echo=FALSE}
 81 | prop <- prop.all
 82 | 
 83 | shared.genes <- intersect(rownames(prop[[1]]), rownames(prop[[2]]))
 84 | 
 85 | cnt <- list()
 86 | for (dat1 in names(paths)) {
 87 |   prop[[dat1]] <- prop[[dat1]][shared.genes, ]
 88 |   cnt[[dat1]] <- apply(prop[[dat1]], 1, function(x) sum(x > 0.5))
 89 | }
 90 | 
 91 | ```
 92 | 
 93 | 
 94 | ```{r calc-marker-scores}
 95 | beta <- list()
 96 | for (dat1 in names(paths)) {
 97 |   beta[[dat1]] <- apply(prop[[dat1]], 1, calc_beta)
 98 | }
 99 | 
100 | ```
101 | 
102 | 
103 | #### Select genes that mark clusters in both data sets.
104 | ```{r subset-genes}
105 | # Empirical trade-off between number of genes and cell type specificity
106 | cnt1.max <- ncol(prop[[1]]) / 2; cnt2.max <- ncol(prop[[2]]) / 2
107 | keep.genes <- which(cnt[[1]] < cnt1.max & cnt[[2]] < cnt2.max & 
108 |                       cnt[[1]] > 0 & cnt[[2]] > 0 & 
109 |                       beta[[1]] > 0.3 & beta[[2]] > 0.3)
110 | 
111 | 
112 | prop.subset <- list()
113 | cnt.subset <- list()
114 | score.subset <- list()
115 | for (dat1 in names(paths)) {
116 |   prop.subset[[dat1]] <- prop[[dat1]][keep.genes, ]  # grep("L5a", colnames(prop[[dat1]]))
117 |   cnt.subset[[dat1]] <- cnt[[dat1]][keep.genes]
118 |   score.subset[[dat1]] <- beta[[dat1]][keep.genes]
119 | }
120 | 
121 | ```
122 | 
123 | 
124 | 
125 | ```{r match-clusters}
126 | cl.pairs <- expand.grid(colnames(prop.subset[[1]]), colnames(prop.subset[[2]]))
127 | 
128 | cor.all <- NULL
129 | cor.wt <- score.subset[[1]] * score.subset[[2]]
130 | for (i in 1:nrow(cl.pairs)) {
131 |   cl1 <- cl.pairs[i, 1]
132 |   cl2 <- cl.pairs[i, 2]
133 |   cor1 <- cov.wt(cbind(prop.subset[[1]][, cl1], prop.subset[[2]][, cl2]), wt = cor.wt,
134 |                  center = TRUE, cor = TRUE)$cor[1, 2]  # Weighted cor
135 |   cor.all <- c(cor.all, cor1)
136 | }
137 | names(cor.all) <- apply(cl.pairs, 1, function(x) paste(x, collapse = "~"))
138 | 
139 | # tail(sort(cor.all), 20)
140 | 
141 | ```
142 | 
143 | 
144 | ### Compare expression of marker genes between species
145 | #### Correlate proportion of cells/nuclei in each cluster expressing marker genes with FPKM > 1.
146 | ```{r Figure_4B_S4A-plot-cor-heatmap, fig.width = 12.5, fig.height = 4.5}
147 | cor.m <- matrix(0, ncol(prop.subset[[1]]), ncol(prop.subset[[2]]), 
148 |                 dimnames = list(colnames(prop.subset[[1]]),
149 |                                 colnames(prop.subset[[2]])))
150 | 
151 | counter <- 0
152 | for (i in 1:nrow(cl.pairs)) {
153 |   counter <- counter + 1
154 |   cl1 <- cl.pairs[i, 1]
155 |   cl2 <- cl.pairs[i, 2]
156 |   cor.m[cl1, cl2] <- cor.all[counter]
157 | }
158 | 
159 | cor.ml <- melt(cor.m)
160 | 
161 | g1 <- ggplot(cor.ml, aes(x = Var2, y = Var1, fill = value)) +
162 |   geom_tile(color = "grey80") +
163 |   scale_y_discrete(limits = rev(levels(cor.ml$Var1))) +
164 |   # scale_fill_gradient2(low = "dark blue", high = "red", mid = "white",
165 |   #  midpoint = 0, limit = c(min(cor.m), max(cor.m)), space = "Lab", name="Correlation") +
166 |   guides(fill=guide_colorbar("Correlation")) +
167 |   # scale_fill_gradientn(colors = rev(brewer.pal(n = 7, name = "RdYlBu")),
168 |   #                      guide = "colorbar") +
169 |   scale_fill_gradientn(colors = c(rev(brewer.pal(9, "Blues")), "white",
170 |                                   brewer.pal(9, "Reds")), 
171 |                        guide = "colorbar", limits = c(-1, 1)) +
172 |   theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
173 |   xlab(names(paths)[2]) +
174 |   ylab(names(paths)[1])
175 | plot(g1)
176 | ggsave(g1, filename = paste0("../output/", names(paths)[1],
177 |                              "_vs_", names(paths)[2], "_heatmap.pdf"), 
178 |        width = 12, height = 3.5)
179 | 
180 | ```
181 | 
182 | 
183 | #### Threshold correlation matrix to identify best matching cell types (light blue), reciprocal best matches (dark blue), and unique best matches (red).
184 | ```{r find-cl-homol, fig.width = 20, fig.height = 12}
185 | row.max <- t(apply(cor.m, 1, function(x) ifelse(x == max(x), 1, 0)))
186 | col.max <- apply(cor.m, 2, function(x) ifelse(x == max(x), 2, 0))
187 | 
188 | cor.m2 <- row.max + col.max
189 | cl.match <- which(cor.m2 == 3)
190 | for (i in cl.match) {
191 |   row.val <- cor.m2[row(cor.m2)[i], ]
192 |   col.val <- cor.m2[, col(cor.m2)[i]]
193 |   if (sum(row.val) == 3 & sum(col.val) == 3) {
194 |     cor.m2[i] <- 4
195 |   }
196 | }
197 | 
198 | 
199 | # Scale plot by difference in correlation
200 | cor.diff.row <- t(apply(cor.m, 1, function(x) x - max(x[-which.max(x)])))
201 | cor.diff.col <- apply(cor.m, 2, function(x) x - max(x[-which.max(x)]))
202 | cor.diff.rowl <- melt(cor.diff.row)
203 | cor.diff.coll <- melt(cor.diff.col)
204 | cor.diff <- apply(cbind(cor.diff.rowl$value, cor.diff.coll$value), 1, min)
205 | cor.diffm <- matrix(cor.diff, nrow(cor.m), ncol(cor.m))
206 | cor.m2l <- melt(cor.m2)
207 | cor.m2l$specificity <- ifelse(cor.diff < 0, 0, cor.diff) #- min(cor.diffl$value)
208 | cor.m2l$cor <- cor.ml$value
209 | 
210 | cor.m2l$homology_type <- as.factor(cor.m2l$value)
211 | homol.df <- data.frame(orig_lev = c("0", "1", "2", "3", "4"), 
212 |                        new_lev = c("None", paste(names(paths)[1], "best match"), 
213 |                                    paste(names(paths)[2], "best match"), 
214 |                                    "Reciprocal best match", "One-to-one"),
215 |                        lev_color = c("#FFFFFF00", "light blue", "pink", "darkorchid1", "darkorchid4"))
216 | levels(cor.m2l$homology_type) <- homol.df$new_lev[match(levels(cor.m2l$homology_type), homol.df$orig_lev)]
217 | homol.pal <- as.character(homol.df$lev_color[match(levels(cor.m2l$homology_type), homol.df$new_lev)])
218 | 
219 | g2 <- ggplot(cor.m2l, aes(x = Var2, y = Var1, size = specificity, color = homology_type)) +
220 |   geom_point() +
221 |   scale_color_manual(values = homol.pal) +
222 |   theme_bw() +
223 |   scale_y_discrete(limits = rev(levels(cor.m2l$Var1))) +
224 |   scale_size(range = c(3, 8)) +
225 |   theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
226 |   xlab(names(paths)[2]) +
227 |   ylab(names(paths)[1])
228 | plot(g2)
229 | 
230 | ```
231 | 
232 | #### For unique best matching clusters, compare cluster proportions
233 | ```{r Figure_S4C-compare-cl-prop, fig.height = 5, fig.width = 5}
234 | cl.match <- cbind(rownames(cor.m2)[apply(cor.m2, 2, which.max)], 
235 |                   colnames(cor.m2))
236 | good.match <- which(apply(cor.m2, 2, max) >= 3 & apply(cor.diffm, 2, max) > 0.0)
237 | cl.to.plot <- cl.match[good.match, ]
238 | cl.to.plot.lab <- sapply(cl.to.plot[, 2], function(x) strsplit(x, "_")[[1]][4])
239 | # cl.to.plot <- cl.match  # Keep all cluster pairs
240 | # cl.to.plot <- cl.to.plot[order(cl.to.plot[, 2]), ]
241 | 
242 | if (nrow(cl.to.plot) > 0) {
243 |   
244 |   # Cluster sizes
245 |   cl.prop <- list()
246 |   for (i in 1:2) { 
247 |     cl.cnt1 <- as.matrix(table(anno[[i]]$cluster_label))
248 |     cl.cnt.subset <- cl.cnt1[rownames(cl.cnt1) %in% unlist(cl.to.plot), ]
249 |     cl.prop[[i]] <- cl.cnt.subset / sum(cl.cnt.subset)
250 |   }
251 |   
252 |   cl1.prop.subset <- cl.prop[[1]][cl.to.plot[, 1]]
253 |   cl2.prop.subset <- cl.prop[[2]][cl.to.plot[, 2]]
254 |   cl.prop.df <- data.frame(cl.to.plot.lab, cl1.prop.subset, cl2.prop.subset)
255 |   
256 |   g1 <- ggplot(cl.prop.df, aes(x = cl1.prop.subset, y = cl2.prop.subset)) +
257 |     geom_abline(aes(intercept = 0, slope = 1), color = "grey90") +
258 |     geom_point() +
259 |     geom_text_repel(label = cl.to.plot.lab, size = 2) +
260 |     xlim(c(0, 0.25)) +
261 |     ylim(c(0, 0.3)) +
262 |     xlab("Proportion of nuclei in cluster") +
263 |     ylab("Proportion of cells in cluster") +
264 |     theme_bw() +
265 |     theme(panel.grid.major = element_blank(),
266 |           panel.grid.minor = element_blank())
267 |   plot(g1)
268 |   ggsave(g1, filename = "../output/cl_prop_scatter.pdf", 
269 |          width = 3, height = 3)
270 | }
271 | 
272 | nuc.cell.prop <- data.frame(cluster_pair = cl.to.plot.lab,
273 |                             Nuclei_prop = round(cl1.prop.subset, 2),
274 |                             Cell_prop = round(cl2.prop.subset, 2))
275 | print(nuc.cell.prop[order(nuc.cell.prop$cluster_pair), ])
276 | 
277 | write.csv(nuc.cell.prop[order(nuc.cell.prop$cluster_pair), ], 
278 |           file = "../output/nuc_cell_cl_prop_table.csv",
279 |           row.names = FALSE)
280 | ```
281 | 
282 | 
283 | #### For unique best matching clusters, plot expression of marker genes.
284 | #### Label genes that are cell type specific in both data sets.
285 | ```{r Figure_S4B-plot-cl-pair, fig.height = 12, fig.width = 12}
286 | if (nrow(cl.to.plot) > 0) {
287 |   shared.marker.genes <- list()
288 |   pdf(file = "../output/nuc_cell_cl_match_cor.pdf", 
289 |       width = 8, height = 10)
290 |   par(mfrow = c(ceiling(sqrt(nrow(cl.to.plot))), ceiling(sqrt(nrow(cl.to.plot)))))
291 |   cl1.labs <- paste0("N", 1:11, "_", 
292 |                      sapply(cl.to.plot[, 1], function(x) strsplit(x, "_")[[1]][4]))
293 |   cl2.labs <- paste0("C", 1:11, "_", 
294 |                      sapply(cl.to.plot[, 2], function(x) strsplit(x, "_")[[1]][4]))
295 |   for (i in 1:nrow(cl.to.plot)) {
296 |     cl1 <- cl.to.plot[i, 1]
297 |     cl2 <- cl.to.plot[i, 2]
298 |     cl1.lab <- cl1.labs[i]
299 |     cl2.lab <- cl2.labs[i]
300 |     
301 |     cor1 <- cov.wt(cbind(prop.subset[[1]][, cl1], prop.subset[[2]][, cl2]), wt = cor.wt,
302 |                    center = TRUE, cor = TRUE)$cor[1, 2]  # Weighted cor
303 |     label.genes <- prop.subset[[1]][, cl1] > 0.5 & prop.subset[[2]][, cl2] > 0.5 &
304 |       cnt.subset[[1]] == 1 & cnt.subset[[2]] == 1
305 |     
306 |     plot(jitter(prop.subset[[1]][, cl1]),
307 |          jitter(prop.subset[[2]][, cl2]), 
308 |          pch = 19, cex = 0.2, col = "grey", las = 1,
309 |          xlim = c(0, 1), ylim = c(0, 1), xlab = cl1.lab, ylab = cl2.lab, 
310 |          main = paste("r =", signif(cor1, 2)))
311 |     abline(0, 1, col = "grey")
312 |     if (sum(label.genes) > 0) {
313 |       text(jitter(prop.subset[[1]][label.genes, cl1]),
314 |            jitter(prop.subset[[2]][label.genes, cl2]), 
315 |            rownames(prop.subset[[1]])[label.genes], cex = 0.6)
316 |       cl.pair.name <- paste(cl1.lab, cl2.lab)
317 |       shared.marker.genes[[cl.pair.name]] <- paste0(rownames(prop.subset[[1]])[label.genes],
318 |                                                     collapse = ",")
319 |     }
320 |   }
321 |   dev.off()
322 | }
323 | 
324 | ```
325 | 
326 | ```{r Figure_4C-select-shared-markers}
327 | scores <- read.csv("../data/TableS6_Figure5_gene_info.csv")
328 | scores$total.clusters_nuc <- scores$inh.clusters_nuc + scores$exc.clusters_nuc
329 | scores$total.clusters_cell <- scores$inh.clusters_cell + scores$exc.clusters_cell
330 | 
331 | # Select best shared markers of types
332 | as_tibble(scores) %>%
333 |   filter(total.clusters_nuc == 1 & 
334 |            fpkm.max_nuc > 1 &
335 |            !grepl("^Gm", gene) & !grepl("^LOC", gene) &
336 |            !grepl("^[0-9]", gene)) %>%
337 |   group_by(gene) %>%
338 |   mutate(min_score = pmin(marker.score_cell, marker.score_nuc)) %>%
339 |   group_by(fpkm.max.cluster_cell) %>%
340 |   arrange(fpkm.max.cluster_cell) %>%
341 |   top_n(1, min_score) %>%
342 |   select(fpkm.max.cluster_cell, gene, min_score, marker.score_cell, marker.score_nuc)
343 | 
344 | 
345 | ```
346 | 
347 | 
348 | ```{r Figure_4F-compare-marker-scores}
349 | scores.subset <- subset(scores, total.clusters_nuc > 0 & total.clusters_nuc < 11 &
350 | total.clusters_cell > 0 & total.clusters_cell < 11)
351 | summary(lm(marker.score_cell ~ 0 + marker.score_nuc, data = scores.subset))
352 | 
353 | 
354 | g1 <- ggplot(scores.subset, aes(x = marker.score_nuc, y = marker.score_cell)) + 
355 |   geom_point(alpha = 0.1) +
356 |   geom_abline(intercept = 0, slope = 1) +
357 |   geom_smooth(method='lm', se = FALSE, formula = y ~ 0 + x, 
358 |               fullrange = TRUE, color = "blue") +
359 |   xlim(c(0, 1)) +
360 |   ylim(c(0, 1)) +
361 |   xlab("Nuclei marker score") +
362 |   ylab("Cells marker score") +
363 |   scale_color_gradientn(colors = grey.colors(10, 0.9, 0, gamma = 1.4)) +
364 |   theme_bw() +
365 |   theme(panel.grid.minor = element_blank())
366 | plot(g1)
367 | ggsave(g1, width = 2.5, height = 2.5, 
368 |        filename = "../output/nuc_vs_cell_marker_scores.pdf")
369 | 
370 | ```
371 | 
372 | 


--------------------------------------------------------------------------------
/R/Figure5.R:
--------------------------------------------------------------------------------
  1 | # Load libraries
  2 | library(plyr)
  3 | library(feather)
  4 | library(ggplot2)
  5 | library(ggrepel)
  6 | 
  7 | #### Calc nuclear fraction by type - Top 3 genes ####
  8 | load("../data/expr_summary.rda", verbose = TRUE)
  9 | 
 10 | top3.genes <- c("Malat1", "Meg3", "Snhg11")
 11 | nuc.ratio.df <- matrix(NA, ncol(meansIC), 6,
 12 |                        dimnames = list(colnames(meansIC), 
 13 |                                        paste0(top3.genes, c(rep("_mean", 3), rep("_sd", 3)))))
 14 | for (gene1 in top3.genes) {
 15 |   ratio.mean <- meansIC[gene1, ] / meansIN[gene1, ]
 16 |   ratio.var <- ratio.mean^2 * ((sdsIN[gene1, ] / meansIN[gene1, ])^2 +
 17 |                                  (sdsIC[gene1, ] / meansIC[gene1, ])^2)
 18 |   nuc.ratio.df[, paste0(gene1, "_mean")] <- ratio.mean
 19 |   nuc.ratio.df[, paste0(gene1, "_sd")] <- sqrt(ratio.var)
 20 | }
 21 | 
 22 | ratio.mean <- apply(nuc.ratio.df, 1, function(x) mean(x[1:3]))
 23 | ratio.var <- apply(nuc.ratio.df, 1, function(x) sum((x[4:6] / 3)^2))
 24 | nuc.ratio.df <- data.frame(cell_type = row.names(nuc.ratio.df), nuc.ratio.df, 
 25 |                            expr_ratio = ratio.mean, expr_ratio_sd = sqrt(ratio.var))
 26 | 
 27 | 
 28 | 
 29 | #### Calc nuclear fraction by type - Introns ####
 30 | # Load annotation data
 31 | anno.nuc <- as.data.frame(read_feather("../data/20170818_VISp_L5_nuc/anno.feather"))
 32 | anno.cell <- as.data.frame(read_feather("../data/20170818_VISp_L5_cell/anno.feather"))
 33 | shared.anno.cols <- intersect(colnames(anno.nuc), colnames(anno.cell))
 34 | anno.all <- rbind(anno.nuc[, shared.anno.cols], anno.cell[, shared.anno.cols])
 35 | anno.all$cell_prep_type_label <- factor(anno.all$cell_prep_type_label, 
 36 |                                         levels = c("Nuclei", "Cells"))
 37 | anno.all$cell_type <- sapply(anno.all$cluster_label, 
 38 |                              function(x) strsplit(x, "_")[[1]][4])
 39 | anno.all$cell_type <- factor(anno.all$cell_type, levels = rev(nuc.ratio.df$cell_type))
 40 | 
 41 | 
 42 | nuc.intron <- tapply(anno.nuc$percent_reads_aligned_intron_label, anno.nuc$cluster_label, mean)
 43 | nuc.intron.sd <- tapply(anno.nuc$percent_reads_aligned_intron_label, anno.nuc$cluster_label, sd)
 44 | cell.intron <- tapply(anno.cell$percent_reads_aligned_intron_label, anno.cell$cluster_label, mean)
 45 | cell.intron.sd <- tapply(anno.cell$percent_reads_aligned_intron_label, anno.cell$cluster_label, sd)
 46 | 
 47 | nuc.ratio.df <- data.frame(nuc.ratio.df, intron_ratio = NA, intron_ratio_sd = NA)
 48 | for (i in 1:nrow(nuc.ratio.df)) {
 49 |   nuc.idx <- grep(row.names(nuc.ratio.df)[i], names(nuc.intron))
 50 |   cell.idx <- grep(row.names(nuc.ratio.df)[i], names(cell.intron))
 51 |   intron.ratio <- cell.intron[cell.idx] / nuc.intron[nuc.idx]
 52 |   intron.ratio.var <- intron.ratio^2 * ((cell.intron.sd[cell.idx] / cell.intron[cell.idx])^2 +
 53 |                                           (nuc.intron.sd[nuc.idx] / nuc.intron[nuc.idx])^2)
 54 |   nuc.ratio.df[i, c("intron_ratio", "intron_ratio_sd")] <- c(intron.ratio, sqrt(intron.ratio.var))
 55 | }
 56 | 
 57 | order.by.nucfrac <- order(rowMeans(nuc.ratio.df[, c("expr_ratio", "intron_ratio")]))
 58 | nuc.ratio.df <- nuc.ratio.df[order.by.nucfrac, ]
 59 | write.csv(nuc.ratio.df, file = "../output/nuc.ratio.estimate.csv",
 60 |           row.names = FALSE)
 61 | 
 62 | 
 63 | # Load expression data
 64 | load("../data/20170818_VISp_L5_nuc_exon/20170818_VISp_L5_nuc_exon_iter_cl_data.rda")
 65 | expr.nuc <- nbt.data
 66 | load("../data/20170818_VISp_L5_cell_exon/20170818_VISp_L5_cell_exon_iter_cl_data.rda")
 67 | expr.cell <- nbt.data
 68 | 
 69 | nuc.plot.genes <- c("Malat1", "Meg3", "Snhg11")
 70 | expr.anno.df <- data.frame(rbind(t(expr.nuc[nuc.plot.genes, ]), 
 71 |                                  t(expr.cell[nuc.plot.genes, ])), 
 72 |                            anno.all[, c("cell_type", "cell_prep_type_label")])
 73 | 
 74 | 
 75 | 
 76 | #### Figure 5A - Plot intronic read comparison ####
 77 | g.intron.box <- ggplot(anno.all, aes(x = cell_type, 
 78 |                                      y = percent_reads_aligned_intron_label,
 79 |                                      color = cell_prep_type_label)) +
 80 |   geom_point(position=position_jitterdodge(jitter.width = 0.15), 
 81 |              size = 0.5, alpha = 0.1) +
 82 |   geom_boxplot(outlier.shape = NA, size = 0.3) +
 83 |   scale_color_brewer(palette="Set1", name = "", labels = c("Nuclei", "Cells")) +
 84 |   xlab("") +
 85 |   ylab("Intron read percentage") +
 86 |   ggtitle("Intron read percentage") +
 87 |   theme_bw() + 
 88 |   theme(panel.grid.major = element_blank(),
 89 |         panel.grid.minor = element_blank(),
 90 |         legend.title = element_blank(),
 91 |         axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
 92 | plot(g.intron.box)
 93 | ggsave(g.intron.box, filename = "../output/intron_reads_by_type.pdf", 
 94 |        width = 4, height = 3)
 95 | 
 96 | 
 97 | 
 98 | #### Figure 5B, S5A - Plot top 3 gene expression ####
 99 | for (gene1 in nuc.plot.genes) {
100 |   g.expr.box <- ggplot(expr.anno.df, aes(x = cell_type, y = get(gene1),
101 |                                          color = cell_prep_type_label)) +
102 |     geom_point(position=position_jitterdodge(jitter.width = 0.15), 
103 |                size = 0.5, alpha = 0.1) +
104 |     geom_boxplot(outlier.shape = NA, size = 0.3) +
105 |     scale_color_brewer(palette="Set1", name = "", labels = c("Nuclei", "Cells")) +
106 |     xlab("") +
107 |     ylab("log2(CPM + 1)") +
108 |     ggtitle(paste(gene1, "expression")) +
109 |     theme_bw() + 
110 |     theme(panel.grid.major = element_blank(),
111 |           panel.grid.minor = element_blank(),
112 |           legend.title = element_blank(),
113 |           axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
114 |   plot(g.expr.box)
115 |   g.expr.box.fn <- paste0("../output/", gene1, "_expr_by_type.pdf")
116 |   ggsave(g.expr.box, filename = g.expr.box.fn, width = 4, height = 3)
117 | }
118 | 
119 | 
120 | #### Figure 5C - Plot comparison of nuclear fraction estimates ####
121 | g.nucfrac <- ggplot(nuc.ratio.df, aes(x = expr_ratio, y = intron_ratio, label = cell_type)) +
122 |   geom_smooth(method='lm', se = FALSE, formula = y ~ 0 + x, 
123 |               fullrange = TRUE, color = "light blue") +
124 |   geom_errorbarh(aes(xmin = expr_ratio - expr_ratio_sd, 
125 |                      xmax = expr_ratio + expr_ratio_sd,
126 |                      height = 0), color = "grey90") +
127 |   geom_errorbar(aes(ymin = intron_ratio - intron_ratio_sd, 
128 |                     ymax = intron_ratio + intron_ratio_sd, 
129 |                     width = 0), color = "grey90") +
130 |   geom_point() +
131 |   geom_text_repel(size = 3) +
132 |   xlim(c(0, 0.7)) +
133 |   ylim(c(0, 0.9)) +
134 |   xlab("Nuclear gene expression ratio") +
135 |   ylab("Intronic read ratio") +
136 |   ggtitle("Nuclear fraction varies among neuron types") +
137 |   theme_bw() +
138 |   theme(panel.grid.major = element_blank(),
139 |         panel.grid.minor = element_blank())
140 | plot(g.nucfrac)
141 | ggsave(g.nucfrac, filename = "../output/nuc_frac_by_type.pdf", 
142 |        width = 3, height = 3)
143 | 
144 | cor(nuc.ratio.df$intron_ratio, nuc.ratio.df$expr_ratio)
145 | lm.2p <- lm(intron_ratio ~ 1 + expr_ratio, nuc.ratio.df)
146 | lm.1p <- lm(intron_ratio ~ 0 + expr_ratio, nuc.ratio.df)
147 | anova(lm.2p, lm.1p)
148 | coef(summary(lm.1p))
149 | 
150 | 
151 | 
152 | 
153 | 
154 | 
155 | #### Nuc fraction individual genes - properties ####
156 | all.gene.info <- read.csv("../data/TableS6_Figure5_gene_info.csv")
157 | all.gene.info$total.clusters_nuc <- all.gene.info$inh.clusters_nuc + all.gene.info$exc.clusters_nuc
158 | all.gene.info$total.clusters_cell <- all.gene.info$inh.clusters_cell + all.gene.info$exc.clusters_cell
159 | 
160 | # Process gene info
161 | nucprop.bins <- seq(0, 1, 0.1)
162 | all.gene.info$nuclear.prop.bin <- cut(all.gene.info$nuclear.prop, nucprop.bins, include.lowest = TRUE)
163 | levels(all.gene.info$nuclear.prop.bin) <- paste0(nucprop.bins[-length(nucprop.bins)], 
164 |                                             "-", nucprop.bins[-1])
165 | all.gene.info.subset <- subset(all.gene.info, nuclear.prop <= 1 &
166 |                                  ((total.clusters_nuc > 0 & fpkm.max_nuc > 1) | 
167 |                                     (total.clusters_cell > 0 & fpkm.max_cell > 1)))
168 | all.gene.info.subset$type_of_gene <- mapvalues(all.gene.info.subset$type_of_gene, 
169 |                                                from = c("ncRNA", "protein-coding", "pseudo"),
170 |                                                to = c("Non-coding", "Protein-coding", "Pseudogene"))
171 | 
172 | 
173 | 
174 | # Nuclear enriched cell type markers
175 | paste(subset(all.gene.info.subset, marker.score_nuc > 0.4 & nuclear.prop > 0.8 & fpkm.max_nuc > 32)$gene, collapse = ",")
176 | 
177 | # Cytoplasm enriched cell type markers
178 | paste(subset(all.gene.info.subset, marker.score_cell > 0.4 & nuclear.prop < 0.05 & fpkm.max_cell > 32)$gene, collapse = ",")
179 | 
180 | 
181 | #### Figure 5E - Nuclear proportion by gene type ####
182 | gene.types <- c("Non-coding", "Protein-coding", "Pseudogene")
183 | g.nuclear.prop.hist <- ggplot(subset(all.gene.info.subset, type_of_gene %in% gene.types), 
184 |                          aes(x = nuclear.prop, fill = type_of_gene)) +
185 |   facet_wrap(~ type_of_gene, scale = "free_y", ncol = 1) +
186 |   geom_histogram(color = "black", lwd = 0.1, show.legend = FALSE) +
187 |   scale_x_continuous(breaks = seq(0, 1, 0.2)) +
188 |   scale_fill_brewer(palette = "Dark2") +
189 |   xlab("Nuclear proportion") +
190 |   ylab("Number of genes") +
191 |   theme_bw() +
192 |   theme(panel.grid.major = element_blank(),
193 |         panel.grid.minor = element_blank(),
194 |         strip.background = element_blank(),
195 |         strip.text.x = element_text(size = 12), 
196 |         panel.border = element_rect(colour = "black"))
197 | plot(g.nuclear.prop.hist)
198 | ggsave(g.nuclear.prop.hist, filename = "../output/nuclear.prop_hist_by_genetype5.pdf", 
199 |        width = 2.5, height = 5)
200 | 
201 | 
202 | 
203 | #### Figure 5F - Non-coding/pseudogenes better markers ####
204 | # gene <- read.csv("C:/Users/trygveb/Dropbox/AIBS/Transcriptomics/Manuscripts/WholeCell_vs_Nuc/Tables/gene_info.csv")
205 | gene2 <- droplevels(subset(all.gene.info, type_of_gene %in% c("protein-coding", "pseudo", "ncRNA") & 
206 |                              nuclear.prop <= 1 &
207 |                              ((total.clusters_nuc > 0 & fpkm.max_nuc > 1) | 
208 |                                 (total.clusters_cell > 0 & fpkm.max_cell > 1))))
209 | 
210 | kt1 <- kruskal.test(gene2$marker.score_cell ~ gene2$type_of_gene)
211 | pw.wt1 <- pairwise.wilcox.test(gene2$marker.score_cell, gene2$type_of_gene, 
212 |                                paired = FALSE, p.adjust.method = "bonf")
213 | 
214 | 
215 | g.marker.bygene <- ggplot(subset(all.gene.info.subset, type_of_gene %in% gene.types), 
216 |                           aes(x = type_of_gene, y = marker.score_cell, fill = type_of_gene)) +
217 |   geom_violin(lwd = 0.1, show.legend = FALSE) +
218 |   geom_boxplot(width = 0.0001, outlier.shape = NA, coef = 0, show.legend = FALSE) +
219 |   stat_summary(fun.y=median, geom="point", size=1, show.legend = FALSE) +
220 |   scale_fill_brewer(palette = "Dark2") +
221 |   xlab("") +
222 |   ylab("Marker score") +
223 |   theme_bw() +
224 |   theme(panel.grid.major = element_blank(),
225 |         panel.grid.minor = element_blank(),
226 |         strip.background = element_blank(),
227 |         strip.text.x = element_text(size = 12), 
228 |         panel.border = element_rect(colour = "black"),
229 |         axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
230 | plot(g.marker.bygene)
231 | ggsave(g.marker.bygene, filename = "../output/marker_score_vs_genetype3.pdf", 
232 |        width = 2, height = 2.5)
233 | 
234 | 
235 | 
236 | #### Figure 5G - Cytoplasm enriched genes are less likely marker genes ####
237 | nucprop.bins <- seq(0, 1, 0.1)
238 | gene2$nuclear.prop.bin <- cut(gene2$nuclear.prop, nucprop.bins, include.lowest = TRUE)
239 | levels(gene2$nuclear.prop.bin) <- paste0(nucprop.bins[-length(nucprop.bins)], 
240 |                                          "-", nucprop.bins[-1])
241 | kt2 <- kruskal.test(gene2$marker.score_cell ~ gene2$nuclear.prop.bin)
242 | pw.wt2 <- pairwise.wilcox.test(gene2$marker.score_cell, gene2$nuclear.prop.bin, 
243 |                                paired = FALSE, p.adjust.method = "bonf")
244 | summary(lm(marker.score_cell ~ nuclear.prop, 
245 |            data = subset(all.gene.info.subset, type_of_gene %in% gene.types)))
246 | # Coefficients:
247 | #   Estimate Std. Error t value Pr(>|t|)    
248 | # (Intercept) 0.214165   0.003018  70.963  < 2e-16 ***
249 | #   nuclear.prop     0.040506   0.007241   5.594 2.27e-08 ***
250 | # TukeyHSD(aov(gene2$marker.score_cell ~ gene2$nuclear.prop.bin))
251 | 
252 | 
253 | mean.score <- median(subset(all.gene.info.subset, type_of_gene %in% gene.types)$marker.score_cell)
254 | g.marker.bynucprop <- ggplot(subset(all.gene.info.subset, type_of_gene %in% gene.types), 
255 |                              aes(x = nuclear.prop.bin, y = marker.score_cell)) +
256 |   geom_boxplot(fill = "grey", outlier.color = "grey90",
257 |                outlier.size = 0.2, lwd = 0.1, fatten = 8) +
258 |   geom_smooth(method = "lm", formula = y ~ x, se = FALSE, 
259 |               aes(group = 1), col = "light blue") +
260 |   xlab("Nuclear proportion") +
261 |   ylab("Marker score") +
262 |   theme_bw() +
263 |   theme(panel.grid.major = element_blank(),
264 |         panel.grid.minor = element_blank(),
265 |         strip.background = element_blank(),
266 |         strip.text.x = element_text(size = 12), 
267 |         panel.border = element_rect(colour = "black"),
268 |         axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
269 | plot(g.marker.bynucprop)
270 | ggsave(g.marker.bynucprop, filename = "../output/marker_score_by_nucprop5.pdf", 
271 |        width = 2.5, height = 2.5)
272 | 
273 | 
274 | 
275 | 
276 | #### Compare nuclear proportion to literature (Halpern et al. 2015; 10.1016/j.celrep.2015.11.036) ####
277 | nucprop.halpern <- read.csv("../data/Halpern2015_TableS2_Nuc_Cyto_gene_counts.csv")
278 | 
279 | tissue.samps <- c("MIN6.1", "MIN6.2", "liver.1", "liver.2")
280 | for (tissue1 in tissue.samps) {
281 |   nuc1 <- nucprop.halpern[, paste0("Nuc.", tissue1)]
282 |   cyto1 <- nucprop.halpern[, paste0("Cyto.", tissue1)]
283 |   nucprop1 <- nuc1 / (nuc1 + cyto1)
284 |   nucprop1[which(nuc1 < 1 & cyto1 < 1)] <- NA
285 |   nucprop.halpern <- cbind(nucprop.halpern, nucprop1)
286 |   colnames(nucprop.halpern)[ncol(nucprop.halpern)] <- paste0("nucprop.", tissue1)
287 | }
288 | nucprop.cols <- grep("nucprop", colnames(nucprop.halpern))
289 | cor(nucprop.halpern[, nucprop.cols], use = "pair")
290 | nucprop.halpern$nucprop_mean <- apply(nucprop.halpern[, nucprop.cols], 1, 
291 |                                       mean, na.rm = TRUE)
292 | nucprop.halpern <- na.omit(nucprop.halpern)
293 | 
294 | all.gene.info.subset2 <- merge(all.gene.info.subset, nucprop.halpern, 
295 |                                by.x = "gene", by.y = "Gene")
296 | 
297 | 
298 | #### Figure S5E - Plot comparison of nuc proportions - scatter ####
299 | cor1 <- round(cor(all.gene.info.subset2$nuclear.prop, 
300 |                   all.gene.info.subset2$nucprop_mean, use = "pair"), 2)
301 | lm1 <- lm(all.gene.info.subset2$nuclear.prop ~ 0 + all.gene.info.subset2$nucprop_mean)
302 | slope1 <- round(coef(summary(lm1))[1], 2)
303 | 
304 | g.nucprop.compare <- ggplot(all.gene.info.subset2, 
305 |                             aes(x = nuclear.prop, y = nucprop_mean)) +
306 |   geom_point(alpha = 0.1) +
307 |   geom_abline(intercept = 0, slope = 1) +
308 |   geom_smooth(method='lm', se = FALSE, formula = y ~ 0 + x, 
309 |               color = "light blue", fullrange = TRUE, size = 1) +
310 |   geom_point(data = subset(all.gene.info.subset2, gene %in% c("Malat1", "Meg3", "Snhg11")),
311 |              aes(x = nuclear.prop, y = nucprop_mean)) +
312 |   geom_text_repel(data = subset(all.gene.info.subset2, gene %in% c("Malat1", "Meg3", "Snhg11")),
313 |                   aes(x = nuclear.prop, y = nucprop_mean, label = gene, fontface = "italic")) +
314 |   xlab("Estimated nuclear proportion (this study)") +
315 |   ylab("Estimated nuclear proportion (Halpern et al. 2015)") +
316 |   ggtitle(paste0("Correlation = ", cor1, "; Slope = ", slope1)) +
317 |   coord_fixed() +
318 |   theme_bw()
319 | plot(g.nucprop.compare)
320 | ggsave(g.nucprop.compare, width = 4, height = 4,
321 |        filename = "../output/nucprop_compare_halpern2015.pdf")
322 | 
323 | 
324 | #### Figure S5F - Plot comparison of nuc proportions - hist ####
325 | distrib.diff <- ks.test(all.gene.info.subset2$nuclear.prop, all.gene.info.subset2$nucprop_mean)
326 | 
327 | all.gene.info.subset2l <- melt(all.gene.info.subset2[, c("gene", "nuclear.prop", "nucprop_mean")],
328 |                                id = "gene", value.name = "nucprop")
329 | 
330 | g.nucprop.hist.compare <- ggplot(all.gene.info.subset2l, 
331 |                                  aes(x = nucprop, fill = variable)) +
332 |   geom_histogram(binwidth = 0.02, alpha = 0.5, position = "identity") +
333 |   scale_x_continuous(breaks = seq(0, 1, 0.2)) +
334 |   scale_fill_brewer(palette="Set1", name = "", 
335 |                     labels = c("This study", "Halpern et al. 2015")) +
336 |   xlab("Estimated nuclear proportion") +
337 |   ylab("Number of genes") +
338 |   theme_bw(base_size = 8) +
339 |   theme(panel.grid.major = element_blank(),
340 |         panel.grid.minor = element_blank())
341 | plot(g.nucprop.hist.compare)
342 | ggsave(g.nucprop.hist.compare, width = 5, height = 2,
343 |        filename = "../output/nucprop_compare_hist_halpern2015.pdf")
344 | 


--------------------------------------------------------------------------------
/R/Figure5DH_S5CD.rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Figure 5 - Soma vs Nuclei Size"
  3 | output: html_notebook
  4 | ---
  5 | 
  6 | ```{r setup, include=FALSE}
  7 | knitr::opts_chunk$set(echo = TRUE)
  8 | library(reshape2)
  9 | library(ggplot2)
 10 | library(feather)
 11 | 
 12 | RotateCoords <- function(xy, rot.angle = 0) {
 13 |   rotm <- matrix(c(
 14 |       cos(rot.angle),
 15 |       sin(rot.angle),
 16 |       -sin(rot.angle),
 17 |       cos(rot.angle)), ncol = 2)
 18 |   xy.rot <- t(rotm %*% (t(xy) - c(xy[1, 1], xy[1, 2])) + c(xy[1, 1], xy[1, 2]))
 19 | }
 20 | ```
 21 | 
 22 | ## Size of nuclei and soma 
 23 | We aim to measure the size of soma and nuclei for cells in different layers of the mouse visual cortex. Cells labeled by layer-enriched Cre lines crossed to Ai14 are imaged on confocal microscope. Maximum intensity projection of 5-um optical z-stacks (at 1-um intervals) from confocal images are analyzed on CellProfiler to segment soma and their associated nuclei.
 24 | 
 25 | ## Plotting soma area vs nuclei area for Cre-lines.
 26 | ```{r Figure_5D_S5C-plot-nuc-prop-vs-cre, echo=FALSE, fig.width=4, fig.height = 4}
 27 | # Load data
 28 | area.dat <- read.csv(file = "../data/nuc_soma_area_cre_lines.csv")
 29 | 
 30 | area.dat$n_vol_est <- area.dat$n_area^(3/2)
 31 | area.dat$s_vol_est <- area.dat$s_area^(3/2)
 32 | area.dat$nuc_soma_ratio_vol <- area.dat$n_vol_est / area.dat$s_vol_est
 33 | area.dat$cre <- factor(area.dat$cre, levels = c("rbp4", "nr5a1", "scnn1a_tg3"))
 34 | 
 35 | 
 36 | for (cre1 in unique(area.dat$cre)) {
 37 |   area.subset <- subset(area.dat, cre == cre1)
 38 |   lm1 <- lm(I(n_vol_est) ~ I(s_vol_est) + 0, data = area.subset)
 39 |   print(paste(cre1, round(coef(lm1), 2)))
 40 | }
 41 | 
 42 | aov1 <- aov(nuc_soma_ratio_vol ~ cre, data = area.dat)
 43 | summary(aov1)
 44 | print(TukeyHSD(aov1))
 45 | 
 46 | g.scatter <- ggplot(area.dat, aes(x = s_area, y = n_area, color = cre)) +
 47 |   # facet_wrap(~ cre, ncol = 1) +
 48 |   geom_point(size = 0.5) +
 49 |   geom_smooth(method = "lm", formula = "y ~ 0 + x", se = FALSE, size = 0.3) + 
 50 |   xlab(expression(paste("Soma area (", mu*m^2, ")"))) +
 51 |   ylab(expression(paste("Nucleus area (", mu*m^2, ")"))) +
 52 |   theme_bw()
 53 | plot(g.scatter)
 54 | ggsave(g.scatter, file = "../output/nuc_prop_cre_scatter.pdf", 
 55 |        height = 2.5, width = 4)
 56 | 
 57 | 
 58 | # Cre-line nuclear proportions estimated based on cell type mixture
 59 | nuc.prop.expected <- data.frame(cre = c("rbp4", "nr5a1", "scnn1a_tg3"),
 60 |                                 expr_nuc_prop = c(0.29, 0.49, 0.50),
 61 |                                 intron_nuc_prop = c(0.46, 0.72, 0.74))
 62 | 
 63 | g.vio <- ggplot(area.dat, aes(x = cre, y = nuc_soma_ratio_vol)) +
 64 |   geom_violin(size = 0.3) +
 65 |   geom_jitter(alpha = 0.2, width = 0.3, size = 0.5, show.legend = FALSE) +
 66 |   geom_segment(data = nuc.prop.expected, color = "blue", size = 0.8,   
 67 |                mapping = aes(x = cre, xend = cre, y = expr_nuc_prop, 
 68 |                              yend = intron_nuc_prop)) +
 69 |   stat_summary(fun.y = mean, geom = "point", size = 2.5, show.legend = FALSE) +
 70 |   xlab("Cre-line") +
 71 |   ylab("Nuclear proportion") +
 72 |   theme_bw()
 73 | plot(g.vio)
 74 | ggsave(g.vio, file = "../output/nuc_prop_cre_violin.pdf", 
 75 |        height = 2.5, width = 2.5)
 76 | 
 77 | ```
 78 | 
 79 | ```{r Figure_S5C-nuc_prop_vs_depth, fig.width=6, fig.height = 4}
 80 | # Load dat
 81 | nuc.soma <- read.csv(file = "../data/nuc_soma_area_wt.csv")
 82 | 
 83 | 
 84 | nuc.ratio.df <- data.frame()
 85 | for (i in seq(1, nrow(nuc.soma), 2)) {
 86 |   x.mean <- mean(nuc.soma$CenterX[c(i, i + 1)])
 87 |   y.mean <- mean(nuc.soma$CenterY[c(i, i + 1)])
 88 |   # Check that nuc/soma outlines are paired (centers aligned)
 89 |   x.cv <- sd(nuc.soma$CenterX[c(i, i + 1)]) / mean(nuc.soma$CenterX[c(i, i + 1)])
 90 |   nuc.area <- nuc.soma$Area[i + 1]
 91 |   soma.area <- nuc.soma$Area[i]
 92 |   nuc.area.ratio <- nuc.area / soma.area
 93 |   nuc.vol.ratio <- nuc.area.ratio^(3/2)
 94 |   nuc.ratio.df <- rbind(nuc.ratio.df, cbind(x.mean, y.mean, x.cv, nuc.area, soma.area,
 95 |                                             nuc.area.ratio, nuc.vol.ratio))
 96 | }
 97 | 
 98 | # Rotate coords
 99 | alpha = -68  # Measured rotation
100 | xy <- cbind(nuc.ratio.df$x.mean, -nuc.ratio.df$y.mean)
101 | xy.rot <- RotateCoords(xy, rot.angle = alpha)
102 | 
103 | nuc.ratio.df <- cbind(nuc.ratio.df, xy.rot)
104 | nuc.ratio.df$depth_from_pia <- max(nuc.ratio.df$`2`) - nuc.ratio.df$`2` + 30
105 | 
106 | 
107 | # Layer annotation
108 | layer.prop <- c(0, 0.12, 0.36, 0.08, 0.23, 0.21)  # Nissl annotated
109 | layer.depth <- cumsum(layer.prop) * (max(nuc.ratio.df$depth_from_pia) + 10)
110 | nuc.ratio.df$layer <- cut(nuc.ratio.df$depth_from_pia, breaks = layer.depth,
111 |                           labels = paste0("L", c(1, "2/3", 4, 5, 6)))
112 | layer.depth.df <- data.frame(x1 = 0.2, x2 = 0.3,
113 |                              y1 = -layer.depth, y2 = -layer.depth)
114 | prop.bylayer <- tapply(nuc.ratio.df$nuc.vol.ratio, nuc.ratio.df$layer, mean)
115 | 
116 | layer.midpt <- NULL
117 | for (i in 2:length(layer.depth)) {
118 |   layer.width <- layer.depth[i] - layer.depth[i-1]
119 |   layer.midpt <- c(layer.midpt, layer.depth[i] - layer.width / 2)
120 | }
121 | 
122 | g.prop.v.depth <- ggplot(nuc.ratio.df, aes(y = -depth_from_pia, x = nuc.vol.ratio,
123 |                                size = soma.area^(3/2), color = soma.area^(3/2))) +
124 |   geom_point(show.legend = FALSE) +
125 |   geom_segment(data = layer.depth.df, aes(x = x1, xend = x2, y = y1, yend = y2),
126 |                color = "black", size = 1, alpha = 0.5) +
127 |   scale_x_continuous(limits = c(0.2, 1)) +
128 |   xlab("Nuclear proportion") +
129 |   ylab(expression(paste("Depth from pia (", mu*m, ")"))) +
130 |   scale_color_gradient(low = "grey80", high = "black") +
131 |   theme_bw(base_size = 18) +
132 |   theme(panel.grid.minor = element_blank())
133 | plot(g.prop.v.depth)
134 | ggsave(g.prop.v.depth, file = "../output/nuc_prop_vs_depth.pdf", 
135 |        height = 4, width = 4)
136 | 
137 | ```
138 | 
139 | 
140 | ```{r Figure_5H-nuc-soma-probe-counts, fig.width=4, fig.height=3}
141 | probe.cnt <- read.csv(file = "../data/nuc_soma_probe_counts.csv")
142 | 
143 | probes <- c("Calb1", "Pvalb", "Grik1")
144 | probe.cntl <- NULL
145 | for (probe1 in probes) {
146 |   keep.cols <- c("Cell", "Layer", paste0(probe1, "_nuc"), paste0(probe1, "_cyto"))
147 |   probe1.df <- probe.cnt[, keep.cols]
148 |   colnames(probe1.df) <- sub(paste0(probe1, "_"), "", colnames(probe1.df))
149 |   probe1.df$cell <- probe1.df$cyto + probe1.df$nuc
150 |   lm1 <- lm(nuc ~ 0 + cell, data = probe1.df)
151 |   print(paste(probe1, round(coef(lm1), 2)))
152 |   probe.cntl <- rbind(probe.cntl, cbind(Probe = probe1, probe1.df))
153 | }
154 | 
155 | g.counts <- ggplot(probe.cntl, aes(x = cell, y = nuc, color = Probe)) +
156 |   geom_point(alpha = 0.2) +
157 |   stat_smooth(method='lm', se = FALSE, formula = y ~ 0 + x, size = 0.5) +
158 |   xlab("Cell counts") +
159 |   ylab("Nuclear counts") +
160 |   theme_bw(base_size = 18) +
161 |   theme(panel.grid.minor = element_blank())
162 | plot(g.counts)
163 | ggsave(g.counts, file = "../output/nuc_cell_probe_counts.pdf", 
164 |        height = 4, width = 5)
165 | 
166 | 
167 | ```
168 | 
169 | 


--------------------------------------------------------------------------------
/R/extraFunctions.R:
--------------------------------------------------------------------------------
  1 | findFromGroups <- function(datExpr,groupVector,fn="mean"){
  2 |   groups   = names(table(groupVector))
  3 |   fn       = match.fun(fn)
  4 |   datMeans = matrix(0,nrow=dim(datExpr)[2],ncol=length(groups))
  5 |   for (i in 1:length(groups)){
  6 |     datIn = datExpr[groupVector==groups[i],]
  7 |     if (is.null(dim(datIn)[1])) { datMeans[,i] = as.numeric(datIn)
  8 |     } else { datMeans[,i] = as.numeric(apply(datIn,2,fn)) }
  9 |   };    colnames(datMeans)  = groups;
 10 |   rownames(datMeans) = colnames(datExpr)
 11 |   return(datMeans)
 12 | }
 13 | 
 14 | 
 15 | error.bar <- function(x, y, upper, lower=upper, length=0.1,...){
 16 |  if(length(x) != length(y) | length(y) !=length(lower) | length(lower) != length(upper))
 17 |  stop("vectors must be same length")
 18 |  arrows(x,y+upper, x, y-lower, angle=90, code=3, length=length, ...)
 19 | }
 20 | 
 21 | 
 22 | errorBarPlot <- function(vals,sampleType,col=standardColors(),legend=TRUE,elwd=2,ylim=NA,xlim=NA,length=0.1,...){
 23 |  if(is.null(dim(vals))) vals = cbind(vals,vals)
 24 |  yy <- t(findFromGroups(vals,sampleType))
 25 |  col = col[1:dim(yy)[1]]
 26 |  ee <- t(findFromGroups(vals,sampleType,sd))
 27 |  if(is.na(ylim[1])) ylim = c(0,max(ee+yy))
 28 |  if(is.na(xlim[1])) xlim = c(0,((dim(ee)[2]+1)*dim(ee)[1])*1.4)
 29 |  barx <- barplot(yy, beside=TRUE,col=col,legend.text=legend,ylim=ylim,xlim=xlim,...)
 30 |  error.bar(barx,yy,ee,lwd=elwd,length=length)
 31 | }
 32 | 
 33 | mouse2human2 <- function (mouse, m2h){
 34 |  # Convert mouse to human symbols
 35 |  rownames(m2h) = m2h$Mou
 36 |  noHumn = which(!(mouse%in%m2h$Mouse_Symbol))
 37 |  humn = which((mouse%in%m2h$Mouse_Symbol))
 38 |  mouse[humn] = as.character(m2h[mouse[humn],1])
 39 |  mouse[noHumn] = toupper(mouse[noHumn])
 40 |  return(mouse)
 41 | }
 42 | 
 43 | t.test.l <- function(x){
 44 |   l  = length(x)
 45 |   tt = t.test(x[1:(l/2)],x[(l/2+1):l],paired=FALSE)
 46 |   out = c(tt$est,tt$stat,tt$p.val)
 47 |   if(is.na(out[2])) out[2] = 0
 48 |   if(is.na(out[3])) out[3] = 1
 49 |   return(out)
 50 | }
 51 | 
 52 | getAnovaPvalforApply <- function(x,varLabels,varWeights=NULL){
 53 |   anovadat  = as.data.frame(cbind(varLabels,x))
 54 |   aov.out   = summary(aov(as.numeric(anovadat[,2])~anovadat[,1],data=anovadat,weights=varWeights))  
 55 |   return(aov.out[[1]]$'Pr(>F)'[1])
 56 | }
 57 | 
 58 | 
 59 | 
 60 | meanEx <- function(x) {if(sum(x)==0) return(0); return(mean(x[x>0]));}
 61 | 
 62 | 
 63 | t.test.l.paired <- function(x){
 64 |   l  = length(x)
 65 |   tt = t.test(x[1:(l/2)],x[(l/2+1):l],paired=TRUE)
 66 |   out = c(tt$est,tt$stat,tt$p.val)
 67 |   if(is.na(out[2])) out[2] = 0
 68 |   if(is.na(out[3])) out[3] = 1
 69 |   return(out)
 70 | }
 71 | 
 72 |   
 73 | getSpecificityScore <- function(propExpr,returnScore=FALSE) {
 74 |   # GET THE SPECIFICITY SCORE TO DETERMINE GENES THAT WILL GO INTO DENDROGRAM
 75 |   # This function is very similar to the beta marker gene score and was what was used to build the trees
 76 |   # Marker "specificity" score is combination of specificity and sparsity
 77 |   # propExpr = proportions of cells in a given cluster with CPM/FPKM > 1 (or 0, HCT uses 1)
 78 |   keep.cl <- colnames(propExpr)
 79 |   max.scores <- sapply(1:(length(keep.cl) - 1), function(x) {
 80 |     y <- c(rep(1, x), rep(0, length(keep.cl) - x))
 81 |     d1 <- dist(y)
 82 |     sum(d1^2) * sd(d1) / mean(d1)
 83 |     })
 84 |   marker.score <- apply(propExpr, 1, function(x) {
 85 |     d1 <- dist(x)
 86 |     sum(d1^2) * sd(d1) / mean(d1) / max(max.scores)
 87 |   })
 88 |   marker.score[is.na(marker.score)] <- 0
 89 |   if(returnScore) return(marker.score)
 90 |   scoreRank = rank(-marker.score)
 91 |   return(scoreRank)
 92 | }
 93 | 
 94 | 
 95 | calc_beta <- function(y, spec.exp = 2) {
 96 |   d1 <- as.matrix(dist(y))
 97 |   eps1 <- 1e-10
 98 |   # Marker score is combination of specificity and sparsity
 99 |   score1 <- sum(d1^spec.exp) / (sum(d1) + eps1)
100 |   return(score1)
101 | }
102 | 
103 | #####################################################################
104 | # FUNCTIONS FOR BUILDING AND PLOTTING THE TREE ARE BELOW
105 | 
106 | getDend <- function(input,distFun = function(x) return(as.dist(1-cor(x)))){
107 |  distCor  = distFun(input) 
108 |  avgClust = hclust(distCor,method="average")
109 |  dend = as.dendrogram(avgClust)
110 |  dend = labelDend(dend)[[1]]
111 | }
112 | 
113 | labelDend <- function(dend,n=1)
114 |   {  
115 |     if(is.null(attr(dend,"label"))){
116 |       attr(dend, "label") =paste0("n",n)
117 |       n= n +1
118 |     }
119 |     if(length(dend)>1){
120 |       for(i in 1:length(dend)){
121 |         tmp = labelDend(dend[[i]], n)
122 |         dend[[i]] = tmp[[1]]
123 |         n = tmp[[2]]
124 |       }
125 |     }
126 |     return(list(dend, n))
127 |   }
128 | 
129 | reorder.dend <- function(dend, l.rank,verbose=FALSE)
130 |   {
131 |     tmp.dend = dend
132 |     sc=sapply(1:length(dend), function(i){
133 |       l = dend[[i]] %>% labels
134 |       mean(l.rank[dend[[i]] %>% labels])
135 |     })
136 |     ord = order(sc)
137 | 	if(verbose){
138 |       print(sc)
139 | 	  print(ord)
140 |     }
141 | 	if(length(dend)>1){
142 |       for(i in 1:length(dend)){
143 |         if(ord[i]!=i){
144 |           dend[[i]]= tmp.dend[[ord[i]]]
145 |         }
146 |         if(length(dend[[i]])>1){
147 |           dend[[i]]=reorder.dend(dend[[i]],l.rank)
148 |         }
149 |       }
150 |     }
151 |     return(dend)
152 |   }
153 | 
154 | 
155 | # This function builds the tree and plots the dendrogram
156 | buildAndPlotTree <- function(exprData,cl,l.rank,topNgenes = 1200,...){
157 | 
158 |   # Get median expression per cluster and the proportions
159 |   normDat    = log2(exprData+1)
160 |   medianExpr = do.call("cbind", tapply(names(cl), cl, function(x) rowMedians(normDat[,x]))) 
161 |   rownames(medianExpr) = rownames(normDat) 
162 |   medianExpr = medianExpr[apply(medianExpr,1,max)>0,]
163 |   normDat    = normDat[rownames(medianExpr),]
164 |   propExpr   = do.call("cbind", tapply(names(cl), cl, function(x) rowMeans(normDat[,x]>1))) 
165 |   propExpr   = propExpr[,colnames(medianExpr)]
166 |   rownames(propExpr) = rownames(medianExpr) 
167 | 
168 |   # Calculate the marker score and plot the dendrogram
169 |   specificityScoreRank <- getSpecificityScore(propExpr)
170 | 
171 |   # Build and reorder the dendrogram
172 |   dend <- getDend (medianExpr[specificityScoreRank<=topNgenes,])
173 |   dend <- reorder.dend(dend,l.rank)
174 |   dend <- collapse_branch(dend, 0.01)
175 |   dend <- dend %>% set("leaves_pch", 19) %>% set("leaves_cex", 2) 
176 | 
177 |   # Plot the dendrogram
178 |   plot(dend,...)
179 | 
180 | }
181 |   
182 | 
183 | 


--------------------------------------------------------------------------------
/R/match_nuc_cells.R:
--------------------------------------------------------------------------------
  1 | # Note: Data files are large (>1Gb) and have not been included in this GitHub repo
  2 | # A subset of data is available from http://celltypes.brain-map.org/download
  3 | # The following code is provided to show how nucleus/cell pairs were determined
  4 | # The code will not run without input data, but the output is provided in the data subfolder
  5 | input.path.cell <- "cell"
  6 | input.path.cell.nuc <- "nuc"
  7 | out.path <- "output"
  8 | 
  9 | 
 10 | #### Load V1 L5 whole cell data ####
 11 | for (file1 in c("samp.dat", "intron", "exon", "zero.wt")) { 
 12 |   load(paste0(input.path.cell, "/", file1, "_mouse_star_21913.Rdata"))
 13 | }
 14 | 
 15 | keep.cell <- which(grepl("VISp", samp.dat$roi))
 16 | 
 17 | samp.cell <- droplevels(samp.dat[keep.cell, ])
 18 | cd.cell <- introns[, keep.cell] + countsR[, keep.cell]
 19 | cpm.cell <- round(sweep(cd.cell, 2, colSums(cd.cell), "/") * 1e6, 0)
 20 | expr.cell <- log2(cpm.cell + 1)
 21 | wt.cell <- zero.wt$matw  # Dropout weight matrix calculated using scde as described in Methods
 22 | 
 23 | 
 24 | #### Load V1 L5 nuclei data ####
 25 | for (file1 in c("samp.dat", "introns", "exon", "zero.wt")) { 
 26 |   load(paste0(input.path.cell.nuc, "/Nuc/", file1, "_Mouse_Star_VISp_L5_Nuc.Rdata"))
 27 | }
 28 | 
 29 | # QC criteria
 30 | excl.samp <- with(samp.dat, 
 31 |                   reads_aligned_to_mrna + reads_aligned_to_genome_only < 500000 |
 32 |                     percent_reads_aligned_total < 75 |
 33 |                     percent_unique_reads < 50 )
 34 | excl.samp[is.na(excl.samp)] <- FALSE
 35 | keep.nuc <- with(samp.dat, which(cell_prep_type == "Nuclei" & ! excl.samp))
 36 | samp.nuc <- droplevels(samp.dat[keep.nuc, ])
 37 | cd.nuc <- introns[, keep.nuc] + countsR[, keep.nuc]
 38 | cpm.nuc <- round(sweep(cd.nuc, 2, colSums(cd.nuc), "/") * 1e6, 0)
 39 | expr.nuc <- log2(cpm.nuc + 1)
 40 | wt.nuc <- zero.wt$matw
 41 | 
 42 | 
 43 | 
 44 | #### Find nuc/cell correlations ####
 45 | expr.nuc.cell <- cbind(expr.nuc, expr.cell)
 46 | nnuc <- ncol(expr.nuc)
 47 | ncell <- ncol(expr.cell)
 48 | cor.all <- matrix(NA, nnuc, ncell,
 49 |                   dimnames = list(colnames(expr.nuc), colnames(expr.cell)))
 50 | for (nuc1 in 1:nnuc) {
 51 |   print(nuc1)
 52 |   for (cell1 in 1:ncell) {
 53 |     wt1 <- wt.nuc[, nuc1] * wt.cell[, cell1]
 54 |     cor1 <- cov.wt(expr.nuc.cell[, c(nuc1, nnuc + cell1)], wt = wt1, 
 55 |                    center = FALSE, cor = TRUE)$cor
 56 |     cor.all[nuc1, cell1] <- cor1[1, 2]
 57 |   }
 58 | }
 59 | 
 60 | # Select best matching cells
 61 | order.cell <- order(apply(cor.all, 1, max), decreasing = TRUE)
 62 | cor.all.ordered <- cor.all[order.cell, ]
 63 | max.cell.all <- NULL
 64 | max.cor.all <- NULL
 65 | for (i in 1:nrow(cor.all.ordered)) {
 66 |   max.cor <- max(cor.all.ordered[i, ])
 67 |   max.cell <- which.max(cor.all.ordered[i, ])
 68 |   max.cor.all <- c(max.cor.all, max.cor)
 69 |   max.cell.all <- c(max.cell.all, max.cell)
 70 |   cor.all.ordered[, max.cell] <- 0
 71 | }
 72 | 
 73 | 
 74 | #### Find nuc/nuc correlations ####
 75 | nnuc <- ncol(expr.nuc)
 76 | cor.nuc <- matrix(NA, nnuc, nnuc,
 77 |                   dimnames = list(colnames(expr.nuc), colnames(expr.nuc)))
 78 | for (nuc1 in 1:nnuc) {
 79 |   print(nuc1)
 80 |   for (nuc2 in 1:nnuc) {
 81 |     wt1 <- wt.nuc[, nuc1]^2
 82 |     cor1 <- cov.wt(expr.nuc[, c(nuc1, nuc2)], wt = wt1, 
 83 |                    center = FALSE, cor = TRUE)$cor
 84 |     cor.nuc[nuc1, nuc2] <- cor1[1, 2]
 85 |   }
 86 | }
 87 | diag(cor.nuc) <- 0
 88 | 
 89 | 
 90 | # Select best matching nuclei
 91 | cor.nuc.ordered <- cor.nuc[order.cell, order.cell]
 92 | max.nuc.all <- apply(cor.nuc.ordered, 1, which.max)
 93 | max.cor.nuc <- apply(cor.nuc.ordered, 1, max)
 94 | 
 95 | 
 96 | 
 97 | #### Find matching cell/cell correlations ####
 98 | expr.cell.subset <- expr.cell[, names(max.cell.all)]
 99 | ncell <- ncol(expr.cell.subset)
100 | cor.cell <- matrix(NA, ncell, ncell,
101 |                    dimnames = list(colnames(expr.cell.subset), 
102 |                                    colnames(expr.cell.subset)))
103 | for (cell1 in 1:ncell) {
104 |   print(cell1)
105 |   for (cell2 in 1:ncell) {
106 |     wt1 <- wt.cell[keep.genes, cell1] * wt.cell[keep.genes, cell2]
107 |     cor1 <- cov.wt(expr.cell.subset[keep.genes, c(cell1, cell2)], wt = wt1, 
108 |                    center = FALSE, cor = TRUE)$cor
109 |     cor.cell[cell1, cell2] <- cor1[1, 2]
110 |   }
111 | }
112 | diag(cor.cell) <- 0
113 | 
114 | 
115 | 
116 | # Sample ids
117 | samp.ids <- list()
118 | samp.ids[["nuc"]] <- samp.nuc$exp_component_name[order.cell]
119 | samp.ids[["cell"]] <- names(max.cell.all)
120 | samp.ids[["nuc_matched"]] <- samp.nuc$exp_component_name[max.nuc.all][order.cell]
121 | 
122 | 
123 | for (id.set in c("cell", "nuc", "nuc_matched")) {
124 |   write.table(samp.ids[[id.set]], file = paste0(out.path, "/", id.set, "_ids.txt"),
125 |               row.names = FALSE, col.names = FALSE)
126 | }
127 | 
128 | if (! file.exists(paste0(out.path, "/map_nuc_to_cells.rda"))) {
129 |   save(cor.all.ordered, max.cell.all, max.cor.all, 
130 |        cor.nuc.ordered, max.nuc.all, max.cor.nuc,
131 |        samp.ids, cor.cell,
132 |        file = paste0(out.path, "/map_nuc_to_cells.rda"))
133 | }
134 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # NucCellTypes
2 | Code to generate figures for a manuscript describing cell type clustering resolution using single nucleus and single cell RNA-sequencing data. The preprint is available from bioRxiv: https://doi.org/10.1101/239749.
3 | 


--------------------------------------------------------------------------------
/data/20170818_VISp_L5_cell/20170818_VISp_L5_cell_iter_cl_data.rda:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:d87fa56e937ead1ed9c587b8dbb98cb6c3b914f3e7a20faf68fc82e1482ab1ce
3 | size 64240375
4 | 


--------------------------------------------------------------------------------
/data/20170818_VISp_L5_cell/anno.feather:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/NucCellTypes/e666329ffdb795d6367957f405667d82d4074805/data/20170818_VISp_L5_cell/anno.feather


--------------------------------------------------------------------------------
/data/20170818_VISp_L5_cell/cl.cons.csv.gz:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:aea4029596728e940e1de51635f965d3e93e6ac954e8efd29c4ce7986e91a2d6
3 | size 173959
4 | 


--------------------------------------------------------------------------------
/data/20170818_VISp_L5_cell/dend.RData:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:81078748033afab42465aa01a1557c061354302a382b23feffd5e4b4a8624a47
3 | size 6155
4 | 


--------------------------------------------------------------------------------
/data/20170818_VISp_L5_cell/prop.feather:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/NucCellTypes/e666329ffdb795d6367957f405667d82d4074805/data/20170818_VISp_L5_cell/prop.feather


--------------------------------------------------------------------------------
/data/20170818_VISp_L5_cell_exon/20170818_VISp_L5_cell_exon_iter_cl_data.rda:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:635bb58793a8c76f3ab785a1bab6a2673e97466ab95f0248bb412547f44c32cc
3 | size 61815809
4 | 


--------------------------------------------------------------------------------
/data/20170818_VISp_L5_cell_exon/anno.feather:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/NucCellTypes/e666329ffdb795d6367957f405667d82d4074805/data/20170818_VISp_L5_cell_exon/anno.feather


--------------------------------------------------------------------------------
/data/20170818_VISp_L5_cell_exon/cl.cons.csv.gz:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:66d2719704b04327d7b7182b2be99fca9bebadc822a02841be72232ed591179e
3 | size 183871
4 | 


--------------------------------------------------------------------------------
/data/20170818_VISp_L5_cell_varE_clIE/anno.feather:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/NucCellTypes/e666329ffdb795d6367957f405667d82d4074805/data/20170818_VISp_L5_cell_varE_clIE/anno.feather


--------------------------------------------------------------------------------
/data/20170818_VISp_L5_cell_varE_clIE/cl.cons.csv.gz:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:82b2fad04b7a344b6da3c8788135488c0acf3ed1e2041fda4db9fd2fee52fd00
3 | size 171628
4 | 


--------------------------------------------------------------------------------
/data/20170818_VISp_L5_cell_varIE_clE/anno.feather:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/NucCellTypes/e666329ffdb795d6367957f405667d82d4074805/data/20170818_VISp_L5_cell_varIE_clE/anno.feather


--------------------------------------------------------------------------------
/data/20170818_VISp_L5_cell_varIE_clE/cl.cons.csv.gz:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:6dca88285755f98fc8c8eadaff8d76ada5e783275885156958c6f8be54aa92d9
3 | size 166729
4 | 


--------------------------------------------------------------------------------
/data/20170818_VISp_L5_nuc/20170818_VISp_L5_nuc_iter_cl_data.rda:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:8b68688e11c0b11d9210bb52905f3e6f2c947f5a40305d3a8d6d44b477b4abdb
3 | size 47924883
4 | 


--------------------------------------------------------------------------------
/data/20170818_VISp_L5_nuc/anno.feather:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/NucCellTypes/e666329ffdb795d6367957f405667d82d4074805/data/20170818_VISp_L5_nuc/anno.feather


--------------------------------------------------------------------------------
/data/20170818_VISp_L5_nuc/cl.cons.csv.gz:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:d877b198d1fda9ea5f952d7f7dcce15a7c1a94fb836e3853507dedd87f7b8841
3 | size 176708
4 | 


--------------------------------------------------------------------------------
/data/20170818_VISp_L5_nuc/dend.RData:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:68bb11caf504a49eb1d226daf74a7e005d3ec38826811d33a31f1613b7d2b6e4
3 | size 5656
4 | 


--------------------------------------------------------------------------------
/data/20170818_VISp_L5_nuc/prop.feather:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/NucCellTypes/e666329ffdb795d6367957f405667d82d4074805/data/20170818_VISp_L5_nuc/prop.feather


--------------------------------------------------------------------------------
/data/20170818_VISp_L5_nuc_exon/20170818_VISp_L5_nuc_exon_iter_cl_data.rda:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:881ba4300d799d9175fcf3675ff55c324c38703a20e211ba8fbe87053fce3a64
3 | size 44535342
4 | 


--------------------------------------------------------------------------------
/data/20170818_VISp_L5_nuc_exon/anno.feather:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/NucCellTypes/e666329ffdb795d6367957f405667d82d4074805/data/20170818_VISp_L5_nuc_exon/anno.feather


--------------------------------------------------------------------------------
/data/20170818_VISp_L5_nuc_exon/cl.cons.csv.gz:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:916517cd6e56abbc3609c700c9f1fde5f1a101422b763a617d908de86b25be00
3 | size 454004
4 | 


--------------------------------------------------------------------------------
/data/20170818_VISp_L5_nuc_varE_clIE/anno.feather:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/NucCellTypes/e666329ffdb795d6367957f405667d82d4074805/data/20170818_VISp_L5_nuc_varE_clIE/anno.feather


--------------------------------------------------------------------------------
/data/20170818_VISp_L5_nuc_varE_clIE/cl.cons.csv.gz:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:bb51152f409338a69a17fa1c6219a7102dcfecef4bdc7292485ffc0817cc76a1
3 | size 318278
4 | 


--------------------------------------------------------------------------------
/data/20170818_VISp_L5_nuc_varIE_clE/anno.feather:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/NucCellTypes/e666329ffdb795d6367957f405667d82d4074805/data/20170818_VISp_L5_nuc_varIE_clE/anno.feather


--------------------------------------------------------------------------------
/data/20170818_VISp_L5_nuc_varIE_clE/cl.cons.csv.gz:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:949ba15f4d0aff2cc1d217996b57a413bb25cc535c53c3ec43dc3b23658022be
3 | size 246835
4 | 


--------------------------------------------------------------------------------
/data/Tasic2016_cluster_name_conversion.csv:
--------------------------------------------------------------------------------
 1 | old,new
 2 | Pvalb.Wt1,Pvalb Wt1
 3 | Sst.Cbln4,Sst Cbln4
 4 | L4.Arf5,L4 Arf5
 5 | L4.Hsd11b1,L5a Hsd11b1
 6 | L5a.Batf3,L5a Batf3
 7 | L6.Car12,L6a Car12
 8 | L6a.Mgp,L6a Mgp
 9 | L5.Chrna6,L5 Chrna6
10 | L5b.Cdh13,L5b Cdh13
11 | L5b.Samd3,L5b Tph2
12 | L6a.Plcxd3,L6a Sla
13 | 


--------------------------------------------------------------------------------
/data/cell_ids.txt:
--------------------------------------------------------------------------------
  1 | "US-1250273_E1_S16"
  2 | "LS-15912_S53_E1-50"
  3 | "LS-15020_S08_E1-50"
  4 | "LS-15052_S18_E1-50"
  5 | "SM-D9CZF_S34_E1-50"
  6 | "LS-15351_S51_E1-50"
  7 | "LS-15015_S20_E1-50"
  8 | "LS-15912_S57_E1-50"
  9 | "LS-15031_S36_E1-50"
 10 | "US-1250275_E1_S72"
 11 | "US-1250275_E1_S73"
 12 | "LS-15025_S59_E1-50"
 13 | "LS-15365_S29_E1-50"
 14 | "LS-15366_S25_E1-50"
 15 | "US-1250275_E1_S40"
 16 | "LS-15021_S55_E1-50"
 17 | "LS-15365_S42_E1-50"
 18 | "LS-15082_S13_E1-50"
 19 | "SM-D9EPN_S50_E1-50"
 20 | "LS-15342_S89_E1-50"
 21 | "SM-D9EOS_S21_E1-50"
 22 | "LS-15016_S87_E1-50"
 23 | "US-1250273_E1_S02"
 24 | "LS-15360_S16_E1-50"
 25 | "US-1250275_E1_S29"
 26 | "LS-15351_S39_E1-50"
 27 | "LS-15020_S09_E1-50"
 28 | "SM-D9EQK_S88_E1-50"
 29 | "LS-15052_S24_E1-50"
 30 | "LS-15016_S96_E1-50"
 31 | "US-1250275_E1_S64"
 32 | "US-1250275_E1_S16"
 33 | "LS-15360_S23_E1-50"
 34 | "US-1250273_E1_S37"
 35 | "LS-15364_S12_E1-50"
 36 | "LS-15577_S25_E1-50"
 37 | "LS-15342_S96_E1-50"
 38 | "LS-15343_S03_E1-50"
 39 | "SM-D9EOT_S14_E1-50"
 40 | "LS-15050_S32_E1-50"
 41 | "US-1250273_E1_S23"
 42 | "US-1250273_E1_S01"
 43 | "SM-D9EP8_S58_E1-50"
 44 | "US-1250273_E1_S62"
 45 | "LS-15360_S04_E1-50"
 46 | "US-1250273_E1_S24"
 47 | "LS-15350_S36_E1-50"
 48 | "LS-15021_S33_E1-50"
 49 | "LS-14696_S75_E1-50"
 50 | "US-1250275_E1_S25"
 51 | "SM-D9EPX_S07_E1-50"
 52 | "LS-15911_S32_E1-50"
 53 | "SM-D9EOT_S12_E1-50"
 54 | "US-1250273_E1_S06"
 55 | "LS-15360_S10_E1-50"
 56 | "LS-15080_S63_E1-50"
 57 | "LS-15020_S02_E1-50"
 58 | "LS-15043_S32_E1-50"
 59 | "SM-D9EOT_S30_E1-50"
 60 | "US-1250273_E1_S74"
 61 | "US-1250275_E1_S08"
 62 | "US-1250273_E1_S07"
 63 | "LS-15050_S40_E1-50"
 64 | "LS-15359_S08_E1-50"
 65 | "LS-15359_S35_E1-50"
 66 | "LS-15364_S20_E1-50"
 67 | "SM-D9EP8_S69_E1-50"
 68 | "LS-15577_S33_E1-50"
 69 | "LS-15010_S01_E1-50"
 70 | "LS-15576_S53_E1-50"
 71 | "LS-15360_S17_E1-50"
 72 | "LS-15365_S08_E1-50"
 73 | "US-1250273_E1_S33"
 74 | "LS-15021_S56_E1-50"
 75 | "LS-15364_S96_E1-50"
 76 | "SM-D9EP8_S63_E1-50"
 77 | "LS-15363_S20_E1-50"
 78 | "US-1250275_E1_S05"
 79 | "LS-15069_S35_E1-50"
 80 | "SQ-80001_S04_E1-50"
 81 | "SM-D9CZE_S25_E1-50"
 82 | "SM-D9CZK_S23_E1-50"
 83 | "LS-15364_S28_E1-50"
 84 | "US-1250273_E1_S40"
 85 | "LS-14692_S12_E1-50"
 86 | "SM-D9CZ6_S53_E1-50"
 87 | "LS-15352_S59_E1-50"
 88 | "LS-15010_S25_E1-50"
 89 | "LS-15343_S06_E1-50"
 90 | "LS-15041_S08_E1-50"
 91 | "LS-15052_S31_E1-50"
 92 | "LS-15382_S23_E1-50"
 93 | "SM-D9CZ6_S64_E1-50"
 94 | "LS-15577_S27_E1-50"
 95 | "LS-15913_S55_E1-50"
 96 | "US-1250273_E1_S57"
 97 | "LS-15072_S76_E1-50"
 98 | "SM-D9EOT_S13_E1-50"
 99 | "US-1250273_E1_S04"
100 | "LS-15546_S38_E1-50"
101 | "US-1250273_E1_S13"
102 | "US-1250273_E1_S22"
103 | "LS-15052_S19_E1-50"
104 | "LS-15364_S14_E1-50"
105 | "US-1250273_E1_S55"
106 | "SM-D9CZM_S84_E1-50"
107 | "LS-15021_S59_E1-50"
108 | "SM-D9CZF_S19_E1-50"
109 | "LS-15363_S26_E1-50"
110 | "LS-15016_S83_E1-50"
111 | "LS-14696_S72_E1-50"
112 | "US-1250275_E1_S04"
113 | "US-1250275_E1_S84"
114 | "LS-15342_S92_E1-50"
115 | "LS-15365_S56_E1-50"
116 | "LS-15050_S39_E1-50"
117 | "LS-15341_S86_E1-50"
118 | "LS-15350_S90_E1-50"
119 | "LS-15016_S57_E1-50"
120 | "LS-15001_S38_E1-50"
121 | "LS-15081_S88_E1-50"
122 | "LS-15366_S42_E1-50"
123 | "LS-15366_S37_E1-50"
124 | "US-1250273_E1_S66"
125 | "LS-15020_S12_E1-50"
126 | "LS-15352_S33_E1-50"
127 | "US-1250273_E1_S49"
128 | "LS-15003_S42_E1-50"
129 | "SM-D9CZF_S35_E1-50"
130 | "LS-15046_S89_E1-50"
131 | "LS-15007_S72_E1-50"
132 | "LS-15010_S32_E1-50"
133 | "LS-15046_S96_E1-50"
134 | "LS-15072_S91_E1-50"
135 | "LS-15576_S50_E1-50"
136 | "LS-15360_S29_E1-50"
137 | "LS-15052_S05_E1-50"
138 | "SM-D9CY7_S63_E1-50"
139 | "US-1250275_E1_S90"
140 | "LS-15042_S49_E1-50"
141 | "LS-15545_S96_E1-50"
142 | "LS-15346_S78_E1-50"
143 | "LS-15095_S08_E1-50"
144 | "LS-15305_S95_E1-50"
145 | "SM-D9EP5_S46_E1-50"
146 | "LS-15011_S20_E1-50"
147 | "LS-15360_S30_E1-50"
148 | "SM-D9CZF_S25_E1-50"
149 | "SM-D9CZK_S21_E1-50"
150 | "LS-15020_S15_E1-50"
151 | "US-1250275_E1_S82"
152 | "LS-15912_S90_E1-50"
153 | "SM-DAIFD_S58_E1-50"
154 | "LS-14696_S74_E1-50"
155 | "LS-15069_S14_E1-50"
156 | "LS-15041_S17_E1-50"
157 | "LS-15515_S16_E1-50"
158 | "LS-15365_S05_E1-50"
159 | "LS-15029_S96_E1-50"
160 | "US-1250273_E1_S67"
161 | "US-1250273_E1_S21"
162 | "LS-15010_S57_E1-50"
163 | "LS-15546_S02_E1-50"
164 | "LS-15031_S35_E1-50"
165 | "LS-15366_S16_E1-50"
166 | "SM-D9CZE_S18_E1-50"
167 | "LS-15366_S08_E1-50"
168 | "LS-15046_S30_E1-50"
169 | "LS-15361_S72_E1-50"
170 | "LS-15382_S33_E1-50"
171 | "LS-15021_S54_E1-50"
172 | "LS-15058_S64_E1-50"
173 | "SM-D9EOT_S31_E1-50"
174 | "LS-15021_S61_E1-50"
175 | "SM-D9CZE_S24_E1-50"
176 | "SM-D9EP8_S78_E1-50"
177 | "LS-15577_S04_E1-50"
178 | "LS-15542_S45_E1-50"
179 | "LS-15363_S07_E1-50"
180 | "LS-15912_S67_E1-50"
181 | "LS-15032_S15_E1-50"
182 | "LS-15363_S06_E1-50"
183 | "LS-15577_S52_E1-50"
184 | "LS-15000_S30_E1-50"
185 | "LS-15362_S15_E1-50"
186 | "LS-14696_S12_E1-50"
187 | "LS-15021_S93_E1-50"
188 | "LS-15002_S61_E1-50"
189 | "SM-D9EOT_S09_E1-50"
190 | "SM-D9EP8_S70_E1-50"
191 | "LS-15382_S17_E1-50"
192 | "LS-15042_S95_E1-50"
193 | "LS-15365_S32_E1-50"
194 | "LS-15024_S33_E1-50"
195 | "LS-15070_S77_E1-50"
196 | "LS-15382_S38_E1-50"
197 | "LS-15062_S73_E1-50"
198 | "SM-D9CZF_S30_E1-50"
199 | "LS-15544_S32_E1-50"
200 | "SM-D9CZK_S11_E1-50"
201 | "LS-15341_S83_E1-50"
202 | "LS-15042_S42_E1-50"
203 | "LS-15082_S16_E1-50"
204 | "LS-15361_S94_E1-50"
205 | "LS-15059_S42_E1-50"
206 | "US-1250275_E1_S85"
207 | "LS-15041_S09_E1-50"
208 | "SM-D9CZE_S33_E1-50"
209 | "SM-D9CY7_S71_E1-50"
210 | "SM-DAIFF_S66_E1-50"
211 | "LS-15082_S05_E1-50"
212 | "LS-15359_S25_E1-50"
213 | "LS-15363_S29_E1-50"
214 | "LS-15041_S70_E1-50"
215 | "LS-15031_S31_E1-50"
216 | "LS-15002_S41_E1-50"
217 | "LS-15362_S92_E1-50"
218 | "LS-15351_S35_E1-50"
219 | "LS-15360_S22_E1-50"
220 | "US-1250275_E1_S31"
221 | "LS-15390_S94_E1-50"
222 | "LS-15020_S26_E1-50"
223 | "SM-D9CZ6_S40_E1-50"
224 | "LS-15576_S49_E1-50"
225 | "LS-15021_S67_E1-50"
226 | "US-1250275_E1_S61"
227 | "LS-15361_S96_E1-50"
228 | "LS-15024_S34_E1-50"
229 | "LS-15382_S56_E1-50"
230 | "LS-15033_S18_E1-50"
231 | "SM-D9CZF_S39_E1-50"
232 | "SM-DAIFD_S42_E1-50"
233 | "LS-15359_S38_E1-50"
234 | "LS-15049_S79_E1-50"
235 | "LS-15367_S82_E1-50"
236 | "LS-15366_S43_E1-50"
237 | "LS-15058_S24_E1-50"
238 | "LS-15346_S86_E1-50"
239 | "SM-D9CYB_S45_E1-50"
240 | "LS-15546_S46_E1-50"
241 | "LS-15514_S06_E1-50"
242 | "LS-15032_S56_E1-50"
243 | "LS-15021_S50_E1-50"
244 | "SM-D9EP5_S33_E1-50"
245 | "LS-15359_S31_E1-50"
246 | "LS-15032_S58_E1-50"
247 | "SM-D9EQK_S86_E1-50"
248 | "LS-15038_S10_E1-50"
249 | "LS-15041_S04_E1-50"
250 | "LS-15021_S51_E1-50"
251 | "US-1250275_E1_S45"
252 | "LS-15007_S63_E1-50"
253 | "LS-15021_S89_E1-50"
254 | "LS-15341_S90_E1-50"
255 | "LS-15361_S60_E1-50"
256 | "LS-15364_S33_E1-50"
257 | "LS-15341_S91_E1-50"
258 | "LS-15351_S65_E1-50"
259 | "LS-15090_S34_E1-50"
260 | "LS-15364_S11_E1-50"
261 | "SM-D9CYV_S63_E1-50"
262 | "LS-15080_S70_E1-50"
263 | "LS-15038_S21_E1-50"
264 | "LS-15025_S07_E1-50"
265 | "SM-D9CZE_S10_E1-50"
266 | "LS-15069_S05_E1-50"
267 | "LS-15008_S46_E1-50"
268 | "LS-15359_S29_E1-50"
269 | "US-1250275_E1_S74"
270 | "LS-15365_S20_E1-50"
271 | "LS-15016_S71_E1-50"
272 | "LS-15050_S50_E1-50"
273 | "LS-15048_S10_E1-50"
274 | "LS-15003_S55_E1-50"
275 | "LS-15920_S35_E1-50"
276 | "LS-15351_S54_E1-50"
277 | "LS-15912_S59_E1-50"
278 | "SM-D9EOS_S30_E1-50"
279 | "LS-15515_S23_E1-50"
280 | "LS-15010_S62_E1-50"
281 | "US-1250275_E1_S26"
282 | "LS-15025_S57_E1-50"
283 | "LS-15365_S53_E1-50"
284 | "LS-15032_S09_E1-50"
285 | "SM-D9CZE_S31_E1-50"
286 | "LS-15052_S27_E1-50"
287 | "US-1250273_E1_S03"
288 | "LS-15083_S03_E1-50"
289 | "SM-D9CZF_S16_E1-50"
290 | "LS-15074_S12_E1-50"
291 | "LS-15350_S22_E1-50"
292 | "LS-15008_S69_E1-50"
293 | "SM-D9EOT_S07_E1-50"
294 | "LS-15046_S26_E1-50"
295 | "SM-D9CZ6_S96_E1-50"
296 | "LS-15381_S25_E1-50"
297 | "LS-15360_S15_E1-50"
298 | "LS-15006_S26_E1-50"
299 | "SM-D9EP8_S66_E1-50"
300 | "LS-15095_S04_E1-50"
301 | "LS-15552_S51_E1-50"
302 | "LS-15068_S31_E1-50"
303 | "LS-15349_S46_E1-50"
304 | "LS-15050_S27_E1-50"
305 | "SM-D9EPN_S54_E1-50"
306 | "LS-15042_S29_E1-50"
307 | "LS-15351_S22_E1-50"
308 | "LS-15010_S03_E1-50"
309 | "LS-15032_S59_E1-50"
310 | "LS-15341_S88_E1-50"
311 | "LS-14696_S58_E1-50"
312 | "LS-15025_S08_E1-50"
313 | "LS-15052_S03_E1-50"
314 | "LS-15016_S86_E1-50"
315 | "SM-D9CZ6_S47_E1-50"
316 | "LS-15021_S87_E1-50"
317 | "LS-15025_S44_E1-50"
318 | "LS-15364_S41_E1-50"
319 | "LS-15032_S51_E1-50"
320 | "LS-15362_S25_E1-50"
321 | "SM-D9CZ6_S66_E1-50"
322 | "LS-15070_S11_E1-50"
323 | "LS-15020_S05_E1-50"
324 | "LS-15009_S62_E1-50"
325 | "LS-15021_S49_E1-50"
326 | "LS-15010_S26_E1-50"
327 | "LS-15352_S43_E1-50"
328 | "SM-D9CZF_S22_E1-50"
329 | "LS-15003_S39_E1-50"
330 | "LS-15007_S80_E1-50"
331 | "SM-D9D8J_S94_E1-50"
332 | "LS-15363_S04_E1-50"
333 | "LS-15021_S88_E1-50"
334 | "LS-15359_S05_E1-50"
335 | "LS-15382_S53_E1-50"
336 | "LS-15546_S14_E1-50"
337 | "LS-15381_S54_E1-50"
338 | "LS-15021_S60_E1-50"
339 | "LS-15911_S28_E1-50"
340 | "LS-15015_S47_E1-50"
341 | "LS-15346_S80_E1-50"
342 | "SM-D9CZE_S12_E1-50"
343 | "LS-15020_S01_E1-50"
344 | "LS-15011_S10_E1-50"
345 | "SM-D9E66_S95_E1-50"
346 | "SM-D9EOT_S04_E1-50"
347 | "LS-15360_S14_E1-50"
348 | "LS-15350_S17_E1-50"
349 | "LS-15371_S19_E1-50"
350 | "LS-15021_S96_E1-50"
351 | "LS-15008_S08_E1-50"
352 | "LS-15070_S73_E1-50"
353 | "LS-15007_S52_E1-50"
354 | "LS-15342_S93_E1-50"
355 | "LS-15010_S38_E1-50"
356 | "LS-15041_S02_E1-50"
357 | "LS-15010_S27_E1-50"
358 | "SM-D9EOT_S26_E1-50"
359 | "LS-15343_S08_E1-50"
360 | "LS-15349_S56_E1-50"
361 | "SM-D9EPN_S53_E1-50"
362 | "SM-D9EP5_S61_E1-50"
363 | "LS-15061_S82_E1-50"
364 | "LS-15544_S29_E1-50"
365 | "LS-15046_S56_E1-50"
366 | "LS-15038_S19_E1-50"
367 | "LS-15366_S39_E1-50"
368 | "LS-15021_S39_E1-50"
369 | "LS-15346_S93_E1-50"
370 | "LS-15016_S91_E1-50"
371 | "LS-15082_S83_E1-50"
372 | "LS-15032_S52_E1-50"
373 | "LS-15024_S37_E1-50"
374 | "LS-15010_S11_E1-50"
375 | "SM-D9CZE_S22_E1-50"
376 | "SM-DAIFF_S71_E1-50"
377 | "LS-14692_S42_E1-50"
378 | "LS-15521_S93_E1-50"
379 | "LS-15002_S46_E1-50"
380 | "LS-15032_S67_E1-50"
381 | "LS-15046_S21_E1-50"
382 | "LS-15011_S87_E1-50"
383 | "LS-15024_S08_E1-50"
384 | "LS-15024_S39_E1-50"
385 | "LS-15052_S67_E1-50"
386 | "US-1250275_E1_S79"
387 | "LS-15031_S73_E1-50"
388 | "LS-15009_S90_E1-50"
389 | "LS-15546_S20_E1-50"
390 | "LS-15095_S10_E1-50"
391 | "LS-15075_S44_E1-50"
392 | "LS-15044_S25_E1-50"
393 | "LS-15352_S15_E1-50"
394 | "LS-15010_S69_E1-50"
395 | "LS-15025_S69_E1-50"
396 | "LS-15353_S18_E1-50"
397 | "LS-15580_S23_E1-50"
398 | "SM-D9CZE_S20_E1-50"
399 | "LS-15001_S33_E1-50"
400 | "SM-D9D8J_S95_E1-50"
401 | "LS-15007_S73_E1-50"
402 | "LS-15346_S90_E1-50"
403 | "LS-15042_S09_E1-50"
404 | "SM-D9CYT_S03_E1-50"
405 | "LS-15008_S03_E1-50"
406 | "LS-15095_S01_E1-50"
407 | "SM-D9CZE_S21_E1-50"
408 | "LS-15350_S07_E1-50"
409 | "SM-D9EQL_S79_E1-50"
410 | "LS-15044_S71_E1-50"
411 | "SM-D9EP8_S75_E1-50"
412 | "LS-15343_S11_E1-50"
413 | "LS-15032_S63_E1-50"
414 | "LS-15021_S73_E1-50"
415 | "LS-15341_S82_E1-50"
416 | "SM-D9EQI_S84_E1-50"
417 | "LS-15379_S60_E1-50"
418 | "SM-D9CZL_S06_E1-50"
419 | "LS-15041_S66_E1-50"
420 | "LS-15042_S91_E1-50"
421 | "LS-15069_S32_E1-50"
422 | "LS-15032_S68_E1-50"
423 | "LS-15041_S54_E1-50"
424 | "LS-15025_S70_E1-50"
425 | "SM-D9CZ6_S52_E1-50"
426 | "SM-DAIFD_S61_E1-50"
427 | "LS-15016_S90_E1-50"
428 | "LS-15507_S48_E1-50_and_LS-15507_S48_E2-50"
429 | "SM-D9CZF_S27_E1-50"
430 | "SM-D9CZF_S26_E1-50"
431 | "LS-15382_S51_E1-50"
432 | "LS-15016_S26_E1-50"
433 | "LS-15082_S08_E1-50"
434 | "LS-15029_S71_E1-50"
435 | "LS-15010_S55_E1-50"
436 | "LS-15010_S56_E1-50"
437 | "LS-15020_S55_E1-50"
438 | "LS-15007_S66_E1-50"
439 | "LS-15068_S45_E1-50"
440 | "LS-15021_S35_E1-50"
441 | "SM-D9CZ6_S28_E1-50"
442 | "LS-15350_S19_E1-50"
443 | "LS-15032_S16_E1-50"
444 | "SM-D9CZ6_S31_E1-50"
445 | "LS-15396_S89_E1-50"
446 | "LS-15059_S54_E1-50"
447 | "LS-15050_S49_E1-50"
448 | "LS-15032_S62_E1-50"
449 | "SM-D9CZE_S38_E1-50"
450 | "SM-D9EPX_S01_E1-50"
451 | "LS-15083_S07_E1-50"
452 | "SM-D9CYB_S27_E1-50"
453 | "LS-14690_S17_E1-50"
454 | "LS-14696_S69_E1-50"
455 | "LS-15007_S45_E1-50"
456 | "LS-15359_S21_E1-50"
457 | "SM-D9CZL_S05_E1-50"
458 | "LS-14696_S70_E1-50"
459 | "SM-D9CY7_S78_E1-50"
460 | "SM-D9EPN_S57_E1-50"
461 | "LS-15003_S56_E1-50"
462 | "LS-15041_S63_E1-50"
463 | "LS-15365_S54_E1-50"
464 | 


--------------------------------------------------------------------------------
/data/expr_summary.rda:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:4b91b06af9ff1b11e6a194bd120f9562a93b23178826ad87af27194b474a5733
3 | size 8943830
4 | 


--------------------------------------------------------------------------------
/data/map_nuc_to_cells.rda:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:e70fb21f254cac99856dab894b3085935ad6391ddb767a717ce096e6db9ae9ea
3 | size 57769566
4 | 


--------------------------------------------------------------------------------
/data/mouse_VISp_SMV1_1679/anno.feather:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/NucCellTypes/e666329ffdb795d6367957f405667d82d4074805/data/mouse_VISp_SMV1_1679/anno.feather


--------------------------------------------------------------------------------
/data/mouse_VISp_SMV1_1679/prop.feather:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/NucCellTypes/e666329ffdb795d6367957f405667d82d4074805/data/mouse_VISp_SMV1_1679/prop.feather


--------------------------------------------------------------------------------
/data/nuc_ids.txt:
--------------------------------------------------------------------------------
  1 | "SQ-80005_S47_E1-50"
  2 | "LS-15543_S68_E1-50"
  3 | "LS-15541_S50_E1-50"
  4 | "LS-15541_S29_E1-50"
  5 | "LS-15071_S62_E1-50"
  6 | "SQ-80005_S66_E1-50"
  7 | "LS-15543_S32_E1-50"
  8 | "LS-15539_S73_E1-50"
  9 | "LS-15543_S29_E1-50"
 10 | "LS-15045_S73_E1-50"
 11 | "LS-15543_S15_E1-50"
 12 | "SQ-80005_S89_E1-50"
 13 | "LS-15541_S16_E1-50"
 14 | "LS-15045_S60_E1-50"
 15 | "LS-15045_S90_E1-50"
 16 | "LS-15045_S03_E1-50"
 17 | "LS-15543_S84_E1-50"
 18 | "LS-15539_S94_E1-50"
 19 | "LS-15539_S12_E1-50"
 20 | "LS-15541_S06_E1-50"
 21 | "LS-15543_S72_E1-50"
 22 | "LS-15541_S81_E1-50"
 23 | "LS-15045_S01_E1-50"
 24 | "SQ-80005_S60_E1-50"
 25 | "LS-15543_S45_E1-50"
 26 | "LS-15045_S10_E1-50"
 27 | "LS-15539_S23_E1-50"
 28 | "LS-15543_S87_E1-50"
 29 | "LS-15539_S67_E1-50"
 30 | "LS-15541_S51_E1-50"
 31 | "SQ-80005_S82_E1-50"
 32 | "LS-15071_S80_E1-50"
 33 | "LS-15543_S78_E1-50"
 34 | "LS-15541_S08_E1-50"
 35 | "LS-15539_S79_E1-50"
 36 | "LS-15071_S26_E1-50"
 37 | "LS-15543_S85_E1-50"
 38 | "LS-15539_S80_E1-50"
 39 | "LS-15071_S87_E1-50"
 40 | "LS-15543_S77_E1-50"
 41 | "LS-15045_S04_E1-50"
 42 | "LS-15543_S10_E1-50"
 43 | "LS-15543_S74_E1-50"
 44 | "LS-15543_S24_E1-50"
 45 | "LS-15071_S53_E1-50"
 46 | "LS-15543_S28_E1-50"
 47 | "LS-15539_S21_E1-50"
 48 | "LS-15071_S51_E1-50"
 49 | "SQ-80005_S53_E1-50"
 50 | "LS-15543_S30_E1-50"
 51 | "LS-15543_S19_E1-50"
 52 | "LS-15539_S37_E1-50"
 53 | "LS-15071_S63_E1-50"
 54 | "SQ-80005_S57_E1-50"
 55 | "LS-15045_S49_E1-50"
 56 | "SQ-80005_S48_E1-50"
 57 | "LS-15071_S71_E1-50"
 58 | "LS-15045_S92_E1-50"
 59 | "LS-15539_S49_E1-50"
 60 | "LS-15543_S31_E1-50"
 61 | "LS-15539_S82_E1-50"
 62 | "LS-15541_S91_E1-50"
 63 | "LS-15071_S96_E1-50"
 64 | "LS-15539_S68_E1-50"
 65 | "LS-15071_S19_E1-50"
 66 | "LS-15071_S33_E1-50"
 67 | "LS-15071_S20_E1-50"
 68 | "SQ-80005_S93_E1-50"
 69 | "LS-15045_S77_E1-50"
 70 | "LS-15541_S12_E1-50"
 71 | "LS-15045_S30_E1-50"
 72 | "LS-15071_S94_E1-50"
 73 | "LS-15543_S05_E1-50"
 74 | "LS-15543_S13_E1-50"
 75 | "LS-15045_S71_E1-50"
 76 | "LS-15071_S92_E1-50"
 77 | "SQ-80005_S80_E1-50"
 78 | "LS-15071_S64_E1-50"
 79 | "LS-15541_S53_E1-50"
 80 | "LS-15045_S76_E1-50"
 81 | "LS-15071_S38_E1-50"
 82 | "SQ-80005_S52_E1-50"
 83 | "SQ-80005_S76_E1-50"
 84 | "SQ-80005_S59_E1-50"
 85 | "LS-15071_S08_E1-50"
 86 | "LS-15541_S76_E1-50"
 87 | "LS-15541_S80_E1-50"
 88 | "LS-15045_S96_E1-50"
 89 | "LS-15071_S17_E1-50"
 90 | "LS-15543_S02_E1-50"
 91 | "LS-15045_S81_E1-50"
 92 | "LS-15045_S86_E1-50"
 93 | "LS-15543_S35_E1-50"
 94 | "LS-15045_S43_E1-50"
 95 | "LS-15539_S51_E1-50"
 96 | "LS-15539_S39_E1-50"
 97 | "LS-15539_S04_E1-50"
 98 | "LS-15539_S92_E1-50"
 99 | "SQ-80005_S56_E1-50"
100 | "LS-15541_S93_E1-50"
101 | "LS-15045_S78_E1-50"
102 | "LS-15539_S59_E1-50"
103 | "LS-15543_S79_E1-50"
104 | "LS-15045_S50_E1-50"
105 | "LS-15045_S94_E1-50"
106 | "LS-15543_S34_E1-50"
107 | "LS-15071_S06_E1-50"
108 | "LS-15539_S83_E1-50"
109 | "LS-15071_S82_E1-50"
110 | "LS-15071_S44_E1-50"
111 | "LS-15071_S60_E1-50"
112 | "LS-15045_S93_E1-50"
113 | "LS-15045_S20_E1-50"
114 | "LS-15045_S95_E1-50"
115 | "LS-15539_S63_E1-50"
116 | "LS-15045_S70_E1-50"
117 | "SQ-80005_S75_E1-50"
118 | "LS-15045_S55_E1-50"
119 | "LS-15045_S17_E1-50"
120 | "LS-15071_S18_E1-50"
121 | "LS-15541_S10_E1-50"
122 | "LS-15541_S54_E1-50"
123 | "LS-15045_S41_E1-50"
124 | "LS-15539_S71_E1-50"
125 | "LS-15539_S90_E1-50"
126 | "LS-15543_S52_E1-50"
127 | "LS-15543_S22_E1-50"
128 | "LS-15071_S32_E1-50"
129 | "LS-15071_S37_E1-50"
130 | "LS-15539_S66_E1-50"
131 | "LS-15045_S59_E1-50"
132 | "LS-15045_S68_E1-50"
133 | "LS-15541_S72_E1-50"
134 | "LS-15539_S60_E1-50"
135 | "SQ-80005_S90_E1-50"
136 | "LS-15541_S88_E1-50"
137 | "LS-15543_S33_E1-50"
138 | "LS-15541_S01_E1-50"
139 | "LS-15045_S06_E1-50"
140 | "LS-15539_S78_E1-50"
141 | "LS-15541_S21_E1-50"
142 | "LS-15071_S58_E1-50"
143 | "LS-15541_S94_E1-50"
144 | "LS-15541_S64_E1-50"
145 | "LS-15045_S69_E1-50"
146 | "LS-15045_S28_E1-50"
147 | "SQ-80005_S67_E1-50"
148 | "LS-15539_S75_E1-50"
149 | "SQ-80005_S81_E1-50"
150 | "LS-15539_S72_E1-50"
151 | "LS-15071_S83_E1-50"
152 | "SQ-80005_S43_E1-50"
153 | "LS-15541_S79_E1-50"
154 | "LS-15071_S57_E1-50"
155 | "LS-15541_S24_E1-50"
156 | "LS-15539_S58_E1-50"
157 | "LS-15543_S96_E1-50"
158 | "LS-15543_S40_E1-50"
159 | "LS-15541_S31_E1-50"
160 | "LS-15541_S28_E1-50"
161 | "LS-15071_S07_E1-50"
162 | "SQ-80005_S62_E1-50"
163 | "LS-15071_S95_E1-50"
164 | "LS-15543_S43_E1-50"
165 | "LS-15539_S50_E1-50"
166 | "LS-15539_S24_E1-50"
167 | "SQ-80005_S72_E1-50"
168 | "LS-15071_S49_E1-50"
169 | "LS-15045_S85_E1-50"
170 | "LS-15045_S52_E1-50"
171 | "LS-15071_S25_E1-50"
172 | "LS-15541_S70_E1-50"
173 | "LS-15539_S34_E1-50"
174 | "LS-15045_S91_E1-50"
175 | "LS-15543_S90_E1-50"
176 | "LS-15543_S51_E1-50"
177 | "LS-15071_S43_E1-50"
178 | "SQ-80005_S45_E1-50"
179 | "LS-15543_S07_E1-50"
180 | "LS-15543_S47_E1-50"
181 | "LS-15543_S76_E1-50"
182 | "SQ-80005_S50_E1-50"
183 | "SQ-80005_S64_E1-50"
184 | "LS-15045_S67_E1-50"
185 | "LS-15045_S26_E1-50"
186 | "LS-15543_S36_E1-50"
187 | "LS-15045_S88_E1-50"
188 | "LS-15539_S19_E1-50"
189 | "LS-15045_S82_E1-50"
190 | "LS-15071_S68_E1-50"
191 | "LS-15541_S14_E1-50"
192 | "LS-15543_S06_E1-50"
193 | "LS-15543_S17_E1-50"
194 | "SQ-80005_S41_E1-50"
195 | "LS-15045_S51_E1-50"
196 | "LS-15071_S84_E1-50"
197 | "LS-15539_S42_E1-50"
198 | "LS-15539_S56_E1-50"
199 | "LS-15045_S62_E1-50"
200 | "LS-15541_S89_E1-50"
201 | "LS-15541_S60_E1-50"
202 | "LS-15539_S70_E1-50"
203 | "LS-15543_S01_E1-50"
204 | "LS-15539_S02_E1-50"
205 | "LS-15541_S30_E1-50"
206 | "LS-15045_S83_E1-50"
207 | "LS-15541_S07_E1-50"
208 | "LS-15541_S34_E1-50"
209 | "LS-15541_S57_E1-50"
210 | "LS-15539_S95_E1-50"
211 | "LS-15543_S88_E1-50"
212 | "LS-15543_S65_E1-50"
213 | "LS-15045_S61_E1-50"
214 | "LS-15071_S47_E1-50"
215 | "LS-15045_S18_E1-50"
216 | "LS-15045_S19_E1-50"
217 | "LS-15539_S06_E1-50"
218 | "LS-15045_S05_E1-50"
219 | "LS-15071_S21_E1-50"
220 | "LS-15045_S12_E1-50"
221 | "LS-15541_S90_E1-50"
222 | "LS-15541_S33_E1-50"
223 | "LS-15543_S67_E1-50"
224 | "LS-15045_S66_E1-50"
225 | "LS-15071_S30_E1-50"
226 | "LS-15071_S73_E1-50"
227 | "LS-15541_S68_E1-50"
228 | "LS-15539_S07_E1-50"
229 | "LS-15539_S86_E1-50"
230 | "SQ-80005_S79_E1-50"
231 | "LS-15071_S76_E1-50"
232 | "LS-15071_S40_E1-50"
233 | "LS-15071_S79_E1-50"
234 | "LS-15071_S36_E1-50"
235 | "LS-15071_S03_E1-50"
236 | "LS-15045_S02_E1-50"
237 | "LS-15071_S86_E1-50"
238 | "LS-15541_S35_E1-50"
239 | "LS-15543_S95_E1-50"
240 | "LS-15045_S48_E1-50"
241 | "LS-15543_S42_E1-50"
242 | "LS-15045_S25_E1-50"
243 | "LS-15539_S65_E1-50"
244 | "SQ-80005_S69_E1-50"
245 | "LS-15539_S55_E1-50"
246 | "LS-15071_S23_E1-50"
247 | "LS-15541_S73_E1-50"
248 | "SQ-80005_S71_E1-50"
249 | "LS-15541_S71_E1-50"
250 | "LS-15543_S50_E1-50"
251 | "SQ-80005_S91_E1-50"
252 | "LS-15541_S36_E1-50"
253 | "SQ-80005_S55_E1-50"
254 | "LS-15543_S56_E1-50"
255 | "LS-15541_S56_E1-50"
256 | "LS-15541_S63_E1-50"
257 | "LS-15071_S22_E1-50"
258 | "LS-15541_S03_E1-50"
259 | "LS-15539_S01_E1-50"
260 | "LS-15539_S84_E1-50"
261 | "LS-15045_S08_E1-50"
262 | "LS-15541_S23_E1-50"
263 | "LS-15071_S01_E1-50"
264 | "LS-15045_S64_E1-50"
265 | "LS-15541_S52_E1-50"
266 | "LS-15071_S56_E1-50"
267 | "LS-15045_S45_E1-50"
268 | "LS-15539_S13_E1-50"
269 | "LS-15541_S11_E1-50"
270 | "LS-15543_S26_E1-50"
271 | "LS-15071_S85_E1-50"
272 | "LS-15543_S70_E1-50"
273 | "LS-15541_S38_E1-50"
274 | "LS-15045_S16_E1-50"
275 | "SQ-80005_S74_E1-50"
276 | "LS-15539_S87_E1-50"
277 | "LS-15543_S27_E1-50"
278 | "LS-15539_S09_E1-50"
279 | "LS-15541_S04_E1-50"
280 | "LS-15071_S52_E1-50"
281 | "LS-15071_S75_E1-50"
282 | "LS-15543_S80_E1-50"
283 | "LS-15539_S69_E1-50"
284 | "LS-15071_S78_E1-50"
285 | "SQ-80005_S94_E1-50"
286 | "LS-15539_S52_E1-50"
287 | "LS-15045_S47_E1-50"
288 | "LS-15543_S75_E1-50"
289 | "LS-15045_S29_E1-50"
290 | "SQ-80005_S78_E1-50"
291 | "LS-15539_S48_E1-50"
292 | "SQ-80005_S44_E1-50"
293 | "LS-15541_S67_E1-50"
294 | "LS-15539_S62_E1-50"
295 | "LS-15071_S72_E1-50"
296 | "LS-15045_S74_E1-50"
297 | "LS-15539_S85_E1-50"
298 | "LS-15543_S48_E1-50"
299 | "LS-15539_S64_E1-50"
300 | "LS-15541_S13_E1-50"
301 | "LS-15543_S09_E1-50"
302 | "LS-15541_S85_E1-50"
303 | "LS-15543_S71_E1-50"
304 | "LS-15539_S61_E1-50"
305 | "LS-15045_S72_E1-50"
306 | "LS-15543_S69_E1-50"
307 | "LS-15541_S49_E1-50"
308 | "LS-15045_S46_E1-50"
309 | "LS-15071_S24_E1-50"
310 | "LS-15045_S65_E1-50"
311 | "SQ-80005_S61_E1-50"
312 | "LS-15071_S05_E1-50"
313 | "LS-15541_S62_E1-50"
314 | "LS-15541_S27_E1-50"
315 | "LS-15045_S89_E1-50"
316 | "LS-15045_S87_E1-50"
317 | "LS-15543_S14_E1-50"
318 | "LS-15543_S46_E1-50"
319 | "LS-15045_S44_E1-50"
320 | "LS-15045_S80_E1-50"
321 | "LS-15071_S61_E1-50"
322 | "LS-15539_S18_E1-50"
323 | "LS-15071_S45_E1-50"
324 | "LS-15045_S75_E1-50"
325 | "LS-15539_S20_E1-50"
326 | "LS-15071_S04_E1-50"
327 | "SQ-80005_S88_E1-50"
328 | "LS-15071_S93_E1-50"
329 | "LS-15045_S09_E1-50"
330 | "SQ-80005_S87_E1-50"
331 | "LS-15541_S32_E1-50"
332 | "LS-15543_S54_E1-50"
333 | "LS-15071_S67_E1-50"
334 | "LS-15539_S11_E1-50"
335 | "LS-15045_S11_E1-50"
336 | "LS-15045_S58_E1-50"
337 | "LS-15071_S35_E1-50"
338 | "LS-15539_S36_E1-50"
339 | "LS-15071_S28_E1-50"
340 | "LS-15541_S18_E1-50"
341 | "LS-15071_S31_E1-50"
342 | "LS-15543_S73_E1-50"
343 | "LS-15541_S77_E1-50"
344 | "LS-15045_S14_E1-50"
345 | "LS-15541_S87_E1-50"
346 | "LS-15541_S95_E1-50"
347 | "SQ-80005_S42_E1-50"
348 | "LS-15541_S09_E1-50"
349 | "LS-15539_S57_E1-50"
350 | "LS-15071_S55_E1-50"
351 | "LS-15071_S77_E1-50"
352 | "LS-15539_S38_E1-50"
353 | "LS-15045_S53_E1-50"
354 | "SQ-80005_S92_E1-50"
355 | "LS-15071_S50_E1-50"
356 | "LS-15071_S46_E1-50"
357 | "LS-15541_S58_E1-50"
358 | "LS-15071_S65_E1-50"
359 | "LS-15539_S45_E1-50"
360 | "LS-15071_S39_E1-50"
361 | "SQ-80005_S96_E1-50"
362 | "LS-15541_S02_E1-50"
363 | "LS-15541_S59_E1-50"
364 | "LS-15539_S14_E1-50"
365 | "SQ-80005_S68_E1-50"
366 | "LS-15543_S92_E1-50"
367 | "LS-15045_S32_E1-50"
368 | "LS-15541_S25_E1-50"
369 | "LS-15541_S84_E1-50"
370 | "SQ-80005_S95_E1-50"
371 | "LS-15541_S55_E1-50"
372 | "LS-15071_S42_E1-50"
373 | "LS-15539_S76_E1-50"
374 | "LS-15045_S79_E1-50"
375 | "LS-15045_S13_E1-50"
376 | "LS-15539_S81_E1-50"
377 | "LS-15045_S24_E1-50"
378 | "LS-15541_S20_E1-50"
379 | "LS-15543_S20_E1-50"
380 | "LS-15071_S81_E1-50"
381 | "SQ-80005_S77_E1-50"
382 | "LS-15045_S27_E1-50"
383 | "LS-15071_S48_E1-50"
384 | "LS-15539_S96_E1-50"
385 | "LS-15543_S18_E1-50"
386 | "LS-15543_S53_E1-50"
387 | "LS-15541_S19_E1-50"
388 | "LS-15071_S27_E1-50"
389 | "LS-15543_S66_E1-50"
390 | "LS-15543_S41_E1-50"
391 | "LS-15539_S33_E1-50"
392 | "LS-15045_S15_E1-50"
393 | "LS-15539_S35_E1-50"
394 | "LS-15541_S74_E1-50"
395 | "LS-15539_S17_E1-50"
396 | "LS-15543_S82_E1-50"
397 | "LS-15541_S86_E1-50"
398 | "LS-15541_S83_E1-50"
399 | "LS-15539_S54_E1-50"
400 | "LS-15543_S55_E1-50"
401 | "LS-15539_S44_E1-50"
402 | "LS-15071_S70_E1-50"
403 | "LS-15045_S23_E1-50"
404 | "LS-15541_S15_E1-50"
405 | "SQ-80005_S51_E1-50"
406 | "LS-15045_S07_E1-50"
407 | "LS-15541_S65_E1-50"
408 | "LS-15543_S94_E1-50"
409 | "LS-15543_S86_E1-50"
410 | "LS-15543_S03_E1-50"
411 | "LS-15071_S02_E1-50"
412 | "LS-15045_S63_E1-50"
413 | "LS-15541_S78_E1-50"
414 | "LS-15541_S82_E1-50"
415 | "LS-15539_S77_E1-50"
416 | "LS-15543_S08_E1-50"
417 | "LS-15071_S91_E1-50"
418 | "LS-15539_S43_E1-50"
419 | "LS-15541_S26_E1-50"
420 | "LS-15071_S66_E1-50"
421 | "LS-15543_S83_E1-50"
422 | "LS-15071_S29_E1-50"
423 | "LS-15071_S54_E1-50"
424 | "SQ-80005_S49_E1-50"
425 | "LS-15539_S88_E1-50"
426 | "LS-15543_S23_E1-50"
427 | "LS-15541_S40_E1-50"
428 | "LS-15541_S69_E1-50"
429 | "LS-15045_S56_E1-50"
430 | "LS-15543_S21_E1-50"
431 | "LS-15539_S03_E1-50"
432 | "LS-15045_S54_E1-50"
433 | "LS-15539_S91_E1-50"
434 | "LS-15543_S37_E1-50"
435 | "LS-15071_S59_E1-50"
436 | "LS-15539_S41_E1-50"
437 | "LS-15543_S81_E1-50"
438 | "LS-15541_S22_E1-50"
439 | "LS-15045_S57_E1-50"
440 | "LS-15539_S16_E1-50"
441 | "SQ-80005_S63_E1-50"
442 | "LS-15071_S69_E1-50"
443 | "LS-15539_S40_E1-50"
444 | "LS-15543_S49_E1-50"
445 | "LS-15541_S66_E1-50"
446 | "LS-15539_S47_E1-50"
447 | "LS-15543_S91_E1-50"
448 | "LS-15071_S41_E1-50"
449 | "LS-15543_S11_E1-50"
450 | "LS-15539_S22_E1-50"
451 | "LS-15539_S15_E1-50"
452 | "LS-15045_S84_E1-50"
453 | "LS-15543_S25_E1-50"
454 | "LS-15543_S89_E1-50"
455 | "LS-15539_S74_E1-50"
456 | "LS-15541_S17_E1-50"
457 | "LS-15541_S37_E1-50"
458 | "SQ-80005_S70_E1-50"
459 | "LS-15539_S93_E1-50"
460 | "SQ-80005_S58_E1-50"
461 | "LS-15543_S38_E1-50"
462 | "LS-15541_S05_E1-50"
463 | "LS-15045_S22_E1-50"
464 | 


--------------------------------------------------------------------------------
/data/nuc_soma_area_cre_lines.csv:
--------------------------------------------------------------------------------
  1 | "cre","n_area","s_area","nuc_soma_ratio"
  2 | "rbp4",68.4656295596,101.283454896,0.675980392156863
  3 | "rbp4",126.852562382,193.381890598,0.65596919127086
  4 | "rbp4",91.0061631492,131.2713013456,0.693267776096823
  5 | "rbp4",85.5448003852,142.3926218832,0.600767085076709
  6 | "rbp4",90.7579193872,133.6544414608,0.679049034175334
  7 | "rbp4",105.6525451072,152.4713186204,0.692933897753175
  8 | "rbp4",102.9218637252,180.5725124788,0.569975254330492
  9 | "rbp4",110.46847409,202.2690172776,0.546146293568974
 10 | "rbp4",123.5757447236,247.6976257236,0.498897574664261
 11 | "rbp4",63.053915548,114.4900230344,0.550737207285343
 12 | "rbp4",133.7537389656,211.5533339764,0.632245951654541
 13 | "rbp4",118.164030712,207.9289750512,0.56829035339064
 14 | "rbp4",97.2122571992,189.3603416536,0.513371788148925
 15 | "rbp4",71.2956084464,129.335000002,0.551247600767754
 16 | "rbp4",102.9715124776,171.6853857992,0.599768652400231
 17 | "rbp4",93.8857907884,148.5987159332,0.631807550952222
 18 | "rbp4",67.1747619972,108.6314702512,0.618372943327239
 19 | "rbp4",53.3227600776,92.594923226,0.575871313672922
 20 | "rbp4",85.8426928996,117.0717581592,0.733248515691264
 21 | "rbp4",124.7176660288,202.7158560492,0.615233896644624
 22 | "rbp4",84.3035815752,142.3429731308,0.592256714335542
 23 | "rbp4",83.8567428036,140.9031593112,0.595137420718816
 24 | "rbp4",77.5017024964,132.9593589272,0.582897684839432
 25 | "rbp4",69.1607120932,148.1022284092,0.466979550787798
 26 | "rbp4",98.7017197712,178.7851573924,0.5520688697584
 27 | "rbp4",67.0754644924,134.6474165088,0.498156342182891
 28 | "rbp4",78.2464337824,146.712063342,0.533333333333333
 29 | "rbp4",95.822092132,164.6352629584,0.582026537997587
 30 | "rbp4",123.8239884856,202.31866603,0.612024539877301
 31 | "rbp4",59.4295566228,105.3050038404,0.564356435643564
 32 | "rbp4",98.9003147808,136.1865278332,0.726212176449143
 33 | "rbp4",74.0262898284,118.4619232264,0.624895222129086
 34 | "rbp4",86.388829176,120.6961170844,0.715754833401892
 35 | "rbp4",78.6436238016,122.3345259136,0.642857142857143
 36 | "rbp4",95.9710383892,167.2666468356,0.573760759869397
 37 | "rbp4",84.6014740896,118.2633282168,0.71536523929471
 38 | "rbp4",85.0979616136,122.88066219,0.692525252525253
 39 | "rbp4",83.8567428036,114.5893205392,0.731802426343154
 40 | "rbp4",94.33262956,163.7912341676,0.575932100636557
 41 | "rbp4",93.6375470264,147.4071458756,0.63523071741327
 42 | "rbp4",88.9705643008,132.0160326316,0.673937570515231
 43 | "rbp4",61.4651554712,111.8089904048,0.549733570159858
 44 | "rbp4",69.4586046076,109.5747965468,0.633892161304939
 45 | "rbp4",75.217859886,128.3916737064,0.58584686774942
 46 | "rbp4",92.5452744736,142.1443781212,0.651065316101991
 47 | "rbp4",92.7935182356,183.9982763944,0.504317323259579
 48 | "rbp4",90.5096756252,186.8779040336,0.48432518597237
 49 | "rbp4",80.0337888688,146.6624145896,0.545700744752877
 50 | "rbp4",105.8511401168,187.5233378148,0.56446915541435
 51 | "rbp4",85.4951516328,120.5968195796,0.708933717579251
 52 | "rbp4",64.4440806152,116.2277293684,0.554463904314396
 53 | "rbp4",91.7508944352,145.1233032652,0.632227163872733
 54 | "rbp4",62.309184262,98.6024222664,0.631923464249748
 55 | "rbp4",96.7654184276,173.4230921332,0.557973089035213
 56 | "rbp4",62.0609405,102.3260786964,0.606501698204755
 57 | "rbp4",109.6244452992,185.1401976996,0.592115848753017
 58 | "rbp4",69.260009598,110.4188253376,0.627248201438849
 59 | "rbp4",143.8820844552,243.8746717888,0.589983713355049
 60 | "rbp4",89.5663493296,135.541094052,0.660805860805861
 61 | "rbp4",138.4703704436,211.503685224,0.654694835680751
 62 | "rbp4",64.1958368532,93.6375470264,0.685577942735949
 63 | "rbp4",66.6782744732,112.702667948,0.591629955947137
 64 | "rbp4",65.1888119012,95.1270095984,0.685281837160752
 65 | "rbp4",97.559798466,137.030556624,0.71195652173913
 66 | "rbp4",124.121881,206.2409174696,0.601829561868079
 67 | "rbp4",64.9405681392,122.9303109424,0.52827140549273
 68 | "rbp4",74.0759385808,122.3345259136,0.605519480519481
 69 | "rbp4",131.6188426124,236.328061424,0.556932773109244
 70 | "rbp4",76.0618886768,122.384174666,0.621501014198783
 71 | "rbp4",71.1466621892,149.3434472192,0.476396276595745
 72 | "rbp4",86.5377754332,130.2286775452,0.664506290507053
 73 | "rbp4",70.6501746652,120.1996295604,0.587773647253201
 74 | "rbp4",65.4370556632,97.3612034564,0.672106068332483
 75 | "rbp4",84.2042840704,144.7757619984,0.58161865569273
 76 | "rbp4",95.6234971224,183.1542476036,0.522092708050962
 77 | "rbp4",71.8913934752,110.5677715948,0.650202065559048
 78 | "rbp4",119.3059520172,200.084472172,0.596277915632754
 79 | "rbp4",56.6492264884,104.9078138212,0.539990534784666
 80 | "rbp4",143.0380556644,240.6971516352,0.594265676567657
 81 | "rbp4",63.4014568148,105.8511401168,0.598968105065666
 82 | "rbp4",126.3064261056,220.688704418,0.572328458942632
 83 | "rbp4",70.0543896364,106.3972763932,0.658422771815212
 84 | "rbp4",77.452053744,106.2979788884,0.728631480616534
 85 | "rbp4",107.8370902128,160.5640652616,0.671614100185529
 86 | "rbp4",74.6220748572,118.0647332072,0.632043734230446
 87 | "rbp4",88.9705643008,136.4844203476,0.651873408512186
 88 | "rbp4",65.5860019204,106.7944664124,0.614132961413296
 89 | "rbp4",120.3485758176,190.2043704444,0.6327329678935
 90 | "rbp4",118.7101669884,204.6025086404,0.580198980829896
 91 | "rbp4",73.6787485616,108.0853339748,0.681672025723473
 92 | "rbp4",123.6750422284,188.4666641104,0.656217070600632
 93 | "rbp4",80.3813301356,119.0080595028,0.675427617855653
 94 | "rbp4",104.5602725544,175.1111497148,0.597108023816274
 95 | "rbp4",79.43800384,111.7593416524,0.710795202132386
 96 | "rbp4",68.018790788,90.6586218824,0.750273822562979
 97 | "rbp4",84.0553378132,148.7973109428,0.564898231564898
 98 | "rbp4",70.1040383888,107.3902514412,0.652797041146556
 99 | "rbp4",126.7532648772,220.192216894,0.57564825253664
100 | "rbp4",63.3518080624,80.0337888688,0.791563275434243
101 | "rbp4",119.405249522,191.5945355116,0.623218450375745
102 | "rbp4",102.6239712108,130.6258675644,0.785632839224629
103 | "rbp4",92.8928157404,111.5607466428,0.83266577659101
104 | "rbp4",105.4539500976,167.812783112,0.628402366863905
105 | "rbp4",79.8351938592,108.5321727464,0.735590118938701
106 | "rbp4",50.9396199624,90.4103781204,0.563426688632619
107 | "rbp4",75.71434741,120.7954145892,0.626798191533087
108 | "rbp4",80.7785201548,106.8441151648,0.756040892193309
109 | "rbp4",115.681593092,169.2525969316,0.683484892930478
110 | "rbp4",82.6155239936,129.6328925164,0.637303715051704
111 | "rbp4",98.9499635332,161.6563378144,0.612100737100737
112 | "rbp4",84.899366604,155.3509462596,0.546500479386385
113 | "rbp4",85.4951516328,132.2642763936,0.646396396396396
114 | "rbp4",129.1860537448,189.409990406,0.682044560943644
115 | "rbp4",67.1747619972,111.3125028808,0.603479036574487
116 | "rbp4",79.9841401164,139.3640479868,0.573922337014606
117 | "rbp4",74.5227773524,105.8014913644,0.704364148287189
118 | "rbp4",73.8773435712,118.908761998,0.621294363256785
119 | "rbp4",74.9199673716,98.801017276,0.758291457286432
120 | "rbp4",79.0408138208,127.0511573916,0.622118014849551
121 | "rbp4",70.9480671796,118.412274474,0.59916142557652
122 | "nr5a1",105.50359885,134.7467140136,0.782977155490052
123 | "nr5a1",55.7059001928,77.1045124772,0.722472633612363
124 | "nr5a1",68.7138733216,94.2829808076,0.728804634017904
125 | "nr5a1",73.6290998092,115.7312418444,0.636207636207636
126 | "nr5a1",49.2019136284,86.0909366616,0.571510957324106
127 | "nr5a1",85.0979616136,128.342024954,0.663056092843327
128 | "nr5a1",73.1326122852,109.22725528,0.669545454545455
129 | "nr5a1",64.7419731296,103.1204587348,0.62782859894078
130 | "nr5a1",53.2234625728,92.6942207308,0.574183181574719
131 | "nr5a1",63.4014568148,88.374779272,0.717415730337079
132 | "nr5a1",58.2379865652,91.5522994256,0.636117136659436
133 | "nr5a1",62.1602380048,86.7363704428,0.716657126502576
134 | "nr5a1",69.3096583504,105.4539500976,0.657250470809793
135 | "nr5a1",59.1316641084,89.2684568152,0.662402669632925
136 | "nr5a1",55.9541439548,87.7789942432,0.637443438914027
137 | "nr5a1",80.5302763928,131.1223550884,0.614161302536918
138 | "nr5a1",70.3522821508,85.9419904044,0.818601964182553
139 | "nr5a1",66.8272207304,113.199155472,0.590350877192982
140 | "nr5a1",70.7991209224,94.5312245696,0.748949579831933
141 | "nr5a1",76.707322458,105.9007888692,0.724331926863572
142 | "nr5a1",70.9480671796,97.9073397328,0.724645030425964
143 | "nr5a1",72.983666028,106.0993838788,0.687880205896116
144 | "nr5a1",77.8492437632,107.98603647,0.720919540229885
145 | "nr5a1",52.7766238012,91.9494894448,0.573974082073434
146 | "nr5a1",56.0534414596,77.7499462584,0.720945083014049
147 | "nr5a1",63.6000518244,93.09141075,0.6832
148 | "nr5a1",58.33728407,86.2398829188,0.676453655728267
149 | "nr5a1",59.9756928992,88.5733742816,0.677130044843049
150 | "nr5a1",61.9616429952,87.878291748,0.705084745762712
151 | "nr5a1",54.4646813828,75.962591172,0.716993464052288
152 | "nr5a1",73.1326122852,109.3265527848,0.668937329700273
153 | "nr5a1",60.9686679472,86.4384779284,0.705341757610569
154 | "nr5a1",63.4014568148,93.09141075,0.681066666666667
155 | "nr5a1",67.6712495212,96.4178771608,0.701853759011329
156 | "nr5a1",48.3578848376,68.26703455,0.708363636363636
157 | "nr5a1",65.78459693,96.566823418,0.681233933161954
158 | "nr5a1",80.67922265,124.4197735144,0.648443735035914
159 | "nr5a1",68.5649270644,108.6314702512,0.631170018281536
160 | "nr5a1",57.592552784,77.1541612296,0.746460746460746
161 | "nr5a1",59.4295566228,93.7368445312,0.634004237288136
162 | "nr5a1",51.7836487532,81.8211439552,0.632888349514563
163 | "nr5a1",69.4586046076,94.3822783124,0.735928458705944
164 | "scnn1a_tg3",88.0768867576,122.0862821516,0.721431476209841
165 | "scnn1a_tg3",69.2103608456,92.9424644928,0.74465811965812
166 | "scnn1a_tg3",75.962591172,94.9284145888,0.800209205020921
167 | "scnn1a_tg3",89.7649443392,116.426324378,0.771002132196162
168 | "scnn1a_tg3",80.0337888688,113.0005604624,0.708260105448155
169 | "scnn1a_tg3",83.658147794,115.7808905968,0.722555746140652
170 | "scnn1a_tg3",76.0618886768,107.6384952032,0.706642066420664
171 | "scnn1a_tg3",80.1330863736,103.9644875256,0.770773638968481
172 | "scnn1a_tg3",77.1541612296,98.9499635332,0.779729051680883
173 | "scnn1a_tg3",74.0262898284,93.7368445312,0.789724576271186
174 | "scnn1a_tg3",86.3391804236,109.1776065276,0.79081400636653
175 | "scnn1a_tg3",85.4951516328,109.5251477944,0.780598368087035
176 | "scnn1a_tg3",83.4595527844,117.2703531688,0.711685012701101
177 | "scnn1a_tg3",71.99069098,101.035211134,0.712530712530713
178 | "scnn1a_tg3",79.3387063352,106.1986813836,0.747078073866293
179 | "scnn1a_tg3",76.1611861816,101.531698658,0.750122249388753
180 | "scnn1a_tg3",80.3813301356,108.3335777368,0.741979835013749
181 | "scnn1a_tg3",99.9925873336,143.8324357028,0.695201933034173
182 | "scnn1a_tg3",76.459078696,95.325604608,0.802083333333333
183 | "scnn1a_tg3",85.0483128612,114.440374282,0.743167028199566
184 | "scnn1a_tg3",91.6515969304,118.5612207312,0.773031825795645
185 | "scnn1a_tg3",74.721372362,110.6174203472,0.675493716337522
186 | "scnn1a_tg3",82.1190364696,107.1420076792,0.766450417052827
187 | "scnn1a_tg3",75.217859886,83.0623627652,0.905558876270173
188 | "scnn1a_tg3",65.6356506728,86.6867216904,0.757159221076747
189 | "scnn1a_tg3",81.0764126692,102.9218637252,0.787747226242161
190 | "scnn1a_tg3",80.3813301356,100.0918848384,0.803075396825397
191 | "scnn1a_tg3",69.50825336,94.7794683316,0.733368255631221
192 | "scnn1a_tg3",85.5944491376,124.4694222668,0.687674511368169
193 | "scnn1a_tg3",81.423953936,110.7663666044,0.735096369341103
194 | "scnn1a_tg3",71.0970134368,97.8576909804,0.726534753932014
195 | "scnn1a_tg3",67.522303264,91.3040556636,0.739532354540511
196 | "scnn1a_tg3",62.7560230336,108.3832264892,0.579019697663765
197 | "scnn1a_tg3",65.3874069108,83.409904032,0.783928571428571
198 | "scnn1a_tg3",87.5803992336,112.206180424,0.780530973451327
199 | "scnn1a_tg3",75.9129424196,94.2829808076,0.805160610847815
200 | "scnn1a_tg3",74.2745335904,103.1701074872,0.719923002887392
201 | "scnn1a_tg3",75.2675086384,100.1415335908,0.751611303916708
202 | "scnn1a_tg3",68.7138733216,107.4399001936,0.639556377079482
203 | "scnn1a_tg3",76.707322458,101.4820499056,0.75587084148728
204 | "scnn1a_tg3",78.19678503,106.4469251456,0.734608208955224
205 | "scnn1a_tg3",69.1607120932,80.5799251452,0.858287122612446
206 | "scnn1a_tg3",73.3312072948,113.199155472,0.64780701754386
207 | "scnn1a_tg3",72.8843685232,92.5452744736,0.78755364806867
208 | "scnn1a_tg3",88.0272380052,102.028186182,0.862773722627737
209 | "scnn1a_tg3",75.1185623812,106.496573898,0.705361305361305
210 | "scnn1a_tg3",73.5298023044,104.8085163164,0.701563240170535
211 | "scnn1a_tg3",88.7223205388,116.8731631496,0.759133389974512
212 | "scnn1a_tg3",67.6216007688,93.3893032644,0.72408293460925
213 | "scnn1a_tg3",62.805671786,79.8351938592,0.786691542288557
214 | "scnn1a_tg3",80.2323838784,119.901737046,0.669151138716356
215 | "scnn1a_tg3",105.3546525928,127.9944836872,0.823118696664081
216 | "scnn1a_tg3",74.6220748572,107.9363877176,0.691352345906164
217 | "scnn1a_tg3",69.5579021124,102.0778349344,0.681420233463035
218 | "scnn1a_tg3",70.8487696748,113.7452917484,0.622872108249673
219 | "scnn1a_tg3",77.8988925156,99.9925873336,0.779046673286991
220 | "scnn1a_tg3",82.3672802316,102.6736199632,0.802224371373308
221 | "scnn1a_tg3",64.3447831104,88.5237255292,0.726864834548514
222 | "scnn1a_tg3",89.6656468344,131.817437622,0.680225988700565
223 | "scnn1a_tg3",81.8707927076,107.1420076792,0.764133456904541
224 | "scnn1a_tg3",74.9199673716,97.311554704,0.769897959183673
225 | "scnn1a_tg3",79.4876525924,106.0497351264,0.749531835205993
226 | "scnn1a_tg3",71.8417447228,114.8872130536,0.625324114088159
227 | "scnn1a_tg3",89.9635393488,123.625393476,0.727710843373494
228 | "scnn1a_tg3",72.487178504,100.6876698672,0.719921104536489
229 | "scnn1a_tg3",75.3171573908,96.9143646848,0.777151639344262
230 | "scnn1a_tg3",67.7208982736,117.667543188,0.575527426160338
231 | "scnn1a_tg3",68.0684395404,92.6942207308,0.734333154793787
232 | "scnn1a_tg3",97.2619059516,102.9715124776,0.944551591128255
233 | "scnn1a_tg3",75.8632936672,92.6445719784,0.818863879957128
234 | "scnn1a_tg3",76.1611861816,101.5813474104,0.749755620723363
235 | "scnn1a_tg3",68.515278312,102.7232687156,0.666988883518608
236 | "scnn1a_tg3",80.7288714024,110.8656641092,0.728168383340797
237 | "scnn1a_tg3",81.5729001932,106.4469251456,0.766324626865672
238 | "scnn1a_tg3",75.0689136288,110.9153128616,0.676812891674127
239 | "scnn1a_tg3",67.1251132448,91.3040556636,0.735182164219685
240 | "scnn1a_tg3",82.7148214984,109.7733915564,0.753505201266395
241 | "scnn1a_tg3",71.0473646844,104.510623802,0.679809976247031
242 | "scnn1a_tg3",81.8707927076,102.0778349344,0.80204280155642
243 | "scnn1a_tg3",74.5724261048,104.6595700592,0.712523719165085
244 | "scnn1a_tg3",84.4525278324,119.4548982744,0.706982543640898
245 | "scnn1a_tg3",79.3883550876,99.6450460668,0.796711509715994
246 | "scnn1a_tg3",82.2679827268,113.943886758,0.722004357298475
247 | "scnn1a_tg3",82.3176314792,111.6600441476,0.737216540684749
248 | "scnn1a_tg3",82.7148214984,105.9504376216,0.780693533270853
249 | "scnn1a_tg3",77.6010000012,97.1129596944,0.799079754601227
250 | "scnn1a_tg3",79.8848426116,102.772917468,0.777294685990338
251 | "scnn1a_tg3",72.1892859896,93.6375470264,0.770943796394486
252 | "scnn1a_tg3",82.5162264888,113.5963454912,0.726398601398601
253 | "scnn1a_tg3",73.480153552,97.0136621896,0.757420675537359
254 | "scnn1a_tg3",83.6084990416,106.2979788884,0.78654834189631
255 | "scnn1a_tg3",90.5593243776,111.6103953952,0.811387900355872
256 | "scnn1a_tg3",77.2534587344,88.2258330148,0.875633089476646
257 | "scnn1a_tg3",82.3672802316,107.241305184,0.768055555555556
258 | "scnn1a_tg3",92.594923226,122.9799596948,0.752926927735163
259 | "scnn1a_tg3",82.4665777364,109.723742804,0.75158371040724
260 | "scnn1a_tg3",70.7991209224,106.248330136,0.666355140186916
261 | "scnn1a_tg3",92.8928157404,145.3715470272,0.639002732240437
262 | "scnn1a_tg3",84.154635318,115.681593092,0.727467811158798
263 | "scnn1a_tg3",88.12653551,119.15700576,0.739583333333333
264 | "scnn1a_tg3",64.5930268724,95.2263071032,0.678310740354536
265 | "scnn1a_tg3",87.0342629572,123.0792571996,0.707139975796692
266 | "scnn1a_tg3",71.6927984656,100.4394261052,0.713791398912506
267 | 


--------------------------------------------------------------------------------
/data/nuc_soma_probe_counts.csv:
--------------------------------------------------------------------------------
 1 | Cell,Layer,Calb1_nuc,Pvalb_nuc,Grik1_nuc,Calb1_cyto,Pvalb_cyto,Grik1_cyto
 2 | 14,4,23,1,7,26,0,6
 3 | 15,4,15,1,7,8,2,3
 4 | 16,4,4,0,10,16,4,5
 5 | 17,4,16,3,11,27,2,8
 6 | 18,4,13,2,6,15,2,5
 7 | 19,4,11,4,7,13,1,3
 8 | 20,4,5,3,13,26,7,3
 9 | 21,4,1,17,12,0,41,3
10 | 22,4,1,3,2,0,75,4
11 | 23,4,1,4,11,2,45,2
12 | 24,4,1,34,9,0,34,0
13 | 25,4,1,31,7,2,78,4
14 | 26,4,9,2,5,10,3,1
15 | 27,4,4,1,15,4,2,4
16 | 28,4,11,3,11,14,7,2
17 | 29,4,5,1,4,2,7,6
18 | 30,4,6,7,10,4,6,11
19 | 31,4,2,5,7,11,4,4
20 | 32,4,7,2,6,10,4,3
21 | 33,4,9,1,6,9,1,2
22 | 34,4,4,3,6,7,2,1
23 | 35,4,6,1,8,28,0,1
24 | 84,4,1,2,3,21,4,6
25 | 1,5,2,31,18,3,46,7
26 | 2,5,1,17,15,1,29,3
27 | 3,5,1,14,7,4,54,7
28 | 4,5,0,11,4,1,14,2
29 | 5,5,1,28,11,0,77,8
30 | 6,5,1,23,10,1,80,12
31 | 7,5,3,29,7,0,45,2
32 | 8,5,6,18,5,20,53,2
33 | 9,5,3,25,24,11,36,6
34 | 10,5,2,14,14,1,12,6
35 | 11,5,1,20,17,2,46,8
36 | 12,5,20,10,11,27,47,7
37 | 13,5,37,5,18,59,1,9
38 | 36,5,5,1,3,23,12,3
39 | 37,5,0,26,16,0,93,13
40 | 38,5,1,10,8,0,31,5
41 | 39,5,15,3,11,23,5,9
42 | 40,5,7,3,5,40,11,7
43 | 41,5,17,0,18,28,2,13
44 | 42,5,6,2,8,37,0,6
45 | 43,5,1,0,30,0,2,18
46 | 44,5,10,2,6,30,9,10
47 | 45,5,0,2,25,0,0,18
48 | 46,5,4,2,9,13,9,8
49 | 47,5,18,6,7,39,8,5
50 | 48,5,18,10,10,43,46,14
51 | 49,5,0,5,8,2,6,10
52 | 50,5,7,1,5,12,8,8
53 | 51,5,3,3,7,19,10,6
54 | 52,5,0,15,9,0,40,9
55 | 53,5,11,7,6,40,55,11
56 | 54,5,11,0,15,34,0,11
57 | 55,5,0,6,5,1,19,11
58 | 56,5,3,3,8,0,24,13
59 | 57,5,0,9,6,10,29,10
60 | 58,5,1,4,17,3,30,14
61 | 59,5,2,1,9,2,3,12
62 | 60,5,0,3,15,0,19,18
63 | 61,5,0,12,11,1,37,12
64 | 62,5,3,1,3,19,9,13
65 | 63,5,7,0,12,16,1,9
66 | 64,5,13,4,9,28,10,15
67 | 65,5,16,18,13,81,78,10
68 | 66,5,8,2,9,25,10,16
69 | 67,5,11,2,14,24,4,9
70 | 68,5,6,2,9,10,3,15
71 | 69,5,0,42,11,1,79,10
72 | 70,5,13,0,21,23,8,18
73 | 71,5,3,9,6,32,75,3
74 | 72,5,5,4,9,6,4,2
75 | 73,5,3,3,5,2,6,2
76 | 74,5,3,0,11,28,3,5
77 | 75,5,1,1,10,3,8,11
78 | 76,5,1,1,13,1,2,15
79 | 77,5,0,1,13,0,3,39
80 | 78,5,0,1,31,0,5,18
81 | 79,5,11,2,10,12,8,13
82 | 80,5,1,4,13,10,5,4
83 | 81,5,1,3,13,4,7,11
84 | 82,5,3,4,14,7,4,7
85 | 83,5,0,3,10,1,53,7
86 | 85,5_6,0,5,3,0,41,3
87 | 86,5_6,7,1,8,1,9,33
88 | 87,5_6,0,8,6,0,60,6
89 | 88,5_6,0,6,5,0,62,23
90 | 89,5_6,0,27,7,0,91,10
91 | 90,5_6,33,0,6,82,3,10
92 | 91,5_6,1,0,2,0,90,4
93 | 92,5_6,4,7,7,35,5,3
94 | 93,5_6,3,1,8,23,4,3
95 | 94,5_6,1,5,10,3,190,17
96 | 95,5_6,3,51,7,3,218,8
97 | 96,5_6,1,22,5,3,146,12
98 | 97,5_6,2,17,9,2,179,10
99 | 


--------------------------------------------------------------------------------
/data/start_data.rda:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:09a7b517a143a83273d327dbc383d7a45fac774281fb208c878671ab0da683ee
3 | size 64923794
4 | 


--------------------------------------------------------------------------------
/output/.gitignore:
--------------------------------------------------------------------------------
1 | *.pdf
2 | *.png
3 | *.csv
4 | 


--------------------------------------------------------------------------------