├── 1_M1_ref.Rmd ├── 2_Seurat_obj_filter.Rmd ├── 3_Merge_remove_MT.Rmd ├── 4_SCTransform_integrate.Rmd ├── 5_Annotate.Rmd ├── 6_Filter.Rmd ├── 7_Dim_reduction.Rmd ├── 8_Cluster_annotation.Rmd ├── 9_DE_analysis.Rmd └── README.md /1_M1_ref.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "M1_ref" 3 | output: html_notebook 4 | --- 5 | 6 | ```{r} 7 | #Load in packages 8 | 9 | library(Seurat) 10 | library(sctransform) 11 | library(glmGamPoi) 12 | 13 | #Load in Seurat objects - these were created from original FASTQ files that were re-processed with Cell Ranger v4.0 (10X Genomics) and aligned to GRCh38 14 | srt.obj.10X159.1 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Daniel/snATAC.snRNA.R01/R.objects/celltype.annotation/m1.10X.seurat.objects.added.suffix/seurat.obj_10X159-1.rds') 15 | srt.obj.10X159.2 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Daniel/snATAC.snRNA.R01/R.objects/celltype.annotation/m1.10X.seurat.objects.added.suffix/seurat.obj_10X159-2.rds') 16 | srt.obj.10X159.3 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Daniel/snATAC.snRNA.R01/R.objects/celltype.annotation/m1.10X.seurat.objects.added.suffix/seurat.obj_10X159-3.rds') 17 | srt.obj.10X159.4 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Daniel/snATAC.snRNA.R01/R.objects/celltype.annotation/m1.10X.seurat.objects.added.suffix/seurat.obj_10X159-4.rds') 18 | srt.obj.10X159.5 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Daniel/snATAC.snRNA.R01/R.objects/celltype.annotation/m1.10X.seurat.objects.added.suffix/seurat.obj_10X159-5.rds') 19 | srt.obj.10X159.6 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Daniel/snATAC.snRNA.R01/R.objects/celltype.annotation/m1.10X.seurat.objects.added.suffix/seurat.obj_10X159-6.rds') 20 | srt.obj.10X159.7 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Daniel/snATAC.snRNA.R01/R.objects/celltype.annotation/m1.10X.seurat.objects.added.suffix/seurat.obj_10X159-7.rds') 21 | srt.obj.10X160.1 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Daniel/snATAC.snRNA.R01/R.objects/celltype.annotation/m1.10X.seurat.objects.added.suffix/seurat.obj_10X160-1.rds') 22 | srt.obj.10X160.2 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Daniel/snATAC.snRNA.R01/R.objects/celltype.annotation/m1.10X.seurat.objects.added.suffix/seurat.obj_10X160-2.rds') 23 | srt.obj.10X160.3 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Daniel/snATAC.snRNA.R01/R.objects/celltype.annotation/m1.10X.seurat.objects.added.suffix/seurat.obj_10X160-3.rds') 24 | srt.obj.10X160.5 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Daniel/snATAC.snRNA.R01/R.objects/celltype.annotation/m1.10X.seurat.objects.added.suffix/seurat.obj_10X160-5.rds') 25 | srt.obj.10X160.6 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Daniel/snATAC.snRNA.R01/R.objects/celltype.annotation/m1.10X.seurat.objects.added.suffix/seurat.obj_10X160-6.rds') 26 | srt.obj.10X160.7 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Daniel/snATAC.snRNA.R01/R.objects/celltype.annotation/m1.10X.seurat.objects.added.suffix/seurat.obj_10X160-7.rds') 27 | srt.obj.10X160.8 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Daniel/snATAC.snRNA.R01/R.objects/celltype.annotation/m1.10X.seurat.objects.added.suffix/seurat.obj_10X160-8.rds') 28 | 29 | #Merge into 1 combined object 30 | ref.list <- c(srt.obj.10X159.2, srt.obj.10X159.3, srt.obj.10X159.4, srt.obj.10X159.5, srt.obj.10X159.6, srt.obj.10X159.7, 31 | srt.obj.10X160.1, srt.obj.10X160.2, srt.obj.10X160.3, srt.obj.10X160.5, srt.obj.10X160.6, srt.obj.10X160.7, srt.obj.10X160.8) 32 | ref.combined <- merge(x=srt.obj.10X159.1, y = ref.list) 33 | ref.combined 34 | head(ref.combined@meta.data) 35 | saveRDS(ref.combined, '/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_RNA_final/objects/human.m1.ref_combined.rds') 36 | 37 | #Use metadata of published dataset to filter out nuclei that do not have cell type annotations 38 | metadata <- read.csv('/gpfs/fs1/data/chiba-faleklab/Daniel/metadata_10X_M1.csv') 39 | length(metadata$exp_component_name) #76533 40 | length(colnames(ref.combined)) #101176 41 | metadata.filtered <- metadata$exp_component_name%in%colnames(ref.combined) 42 | metadata.filtered <- metadata[metadata.filtered==TRUE,] 43 | cells.use <- metadata.filtered$exp_component_name 44 | length(cells.use) #76519 45 | ref.combined.subset <- subset(ref.combined, cells = cells.use) 46 | ref.combined.subset 47 | #An object of class Seurat 48 | #36601 features across 76519 samples within 1 assay 49 | #Active assay: RNA (36601 features, 0 variable features) 50 | table(metadata.filtered$exp_component_name == colnames(ref.combined.subset)) 51 | #TRUE 52 | #76519 53 | saveRDS(ref.combined.subset, '/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_RNA_final/objects/human.m1.ref_combined_filtered.rds') 54 | 55 | #Create metadata column with run ID suffix 56 | cellIDs <- metadata.filtered$exp_component_name 57 | suffixes <- substr(cellIDs, 18, 38) 58 | ref.combined.suffix <- AddMetaData(ref.combined, metadata = suffixes, col.name = 'suffix') 59 | ref.combined.suffix@meta.data[sample(nrow(ref.combined.suffix@meta.data), 20), ] #check work 60 | saveRDS(ref.combined.suffix, '/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_RNA_final/objects/human.m1.ref_combined_filtered_suffixes.rds') 61 | write.csv(metadata.filtered, '/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_RNA_final/csv_files/human.m1.ref_metadata_filtered.csv') 62 | 63 | #Run SCTransform iteratively for each run 64 | ref.split <- SplitObject(ref.combined.suffix, split.by = "suffix") 65 | ref.split 66 | for (i in 1:length(ref.split)) { 67 | ref.split[[i]] <- SCTransform(ref.split[[i]], method = 'glmGamPoi', return.only.var.genes = FALSE, verbose = FALSE) 68 | } 69 | ref.split 70 | saveRDS(ref.split, '/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_RNA_final/objects/human.m1.ref_combined_sctransform.rds') 71 | 72 | #Integrate runs using reference-based integration 73 | int.features <- SelectIntegrationFeatures(object.list = ref.split, nfeatures = 10000) 74 | ref.split <- PrepSCTIntegration(object.list = ref.split, anchor.features = int.features) 75 | reference_dataset <- which(names(ref.split) == "21L8TX_180927_001_A01") 76 | ref.anchors <- FindIntegrationAnchors(object.list = ref.split, normalization.method = "SCT", 77 | anchor.features = int.features, reference = reference_dataset) 78 | ref.integrated <- IntegrateData(anchorset = ref.anchors, normalization.method = "SCT") 79 | m1.ref <- ref.integrated 80 | m1.ref 81 | #An object of class Seurat 82 | #70712 features across 76519 samples within 3 assays 83 | #Active assay: integrated (4878 features, 4878 variable features) 84 | #2 other assays present: RNA, SCT 85 | saveRDS(m1.ref, '/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_RNA_final/objects/human.m1.ref_integrated.rds') 86 | 87 | #Group cell type annotations in metadata into broader cell types 88 | metadata <- read.csv('/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_RNA_final/csv_files/human.m1.ref_metadata_filtered.csv') 89 | CellType <- metadata$cluster_label 90 | CellType <- as.character(CellType) 91 | CellType <- replace(CellType, startsWith(CellType, 'Astro'), 'Astro') 92 | CellType <- replace(CellType, startsWith(CellType, 'Endo'), 'Endo') 93 | CellType <- replace(CellType, startsWith(CellType, 'Exc'), 'Exc') 94 | CellType <- replace(CellType, startsWith(CellType, 'Inh'), 'Inh') 95 | CellType <- replace(CellType, startsWith(CellType, 'Micro'), 'Micro') 96 | CellType <- replace(CellType, startsWith(CellType, 'Oligo'), 'Oligo') 97 | CellType <- replace(CellType, startsWith(CellType, 'OPC'), 'OPC') 98 | CellType <- replace(CellType, startsWith(CellType, 'VLMC'), 'VLMC') 99 | CellType <- as.factor(CellType) 100 | levels(CellType) 101 | m1.ref$CellType <- CellType 102 | saveRDS(m1.ref, '/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_RNA_final/objects/human.m1.ref_celltypes_10000.int.features.rds') 103 | ``` 104 | -------------------------------------------------------------------------------- /2_Seurat_obj_filter.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Seurat_obj_filter" 3 | output: html_notebook 4 | --- 5 | 6 | ```{r} 7 | #Load in packages 8 | library(dplyr) 9 | library(Seurat) 10 | library(ggplot2) 11 | library(patchwork) 12 | 13 | #Load in snRNA-seq data and create Seurat objects 14 | #Add sex, age, PMI, and sample ID as metadata to each object 15 | 16 | counts1099 <- Read10X(data.dir = "Z:/RAW DATA/Dan/Parallel_snRNAseq_snATACseq/order6353rna_order6353atac/snRNA/Cellranger_v4.0.0/1099rna_count_CR4/filtered_feature_bc_matrix/") 17 | rna1099 <- CreateSeuratObject(counts = counts1099, project = "LOAD") 18 | rna1099 <- AddMetaData(rna1099, metadata = as.factor("Female"), col.name = 'Sex') 19 | rna1099 <- AddMetaData(rna1099, metadata = as.numeric(95), col.name = 'age') 20 | rna1099 <- AddMetaData(rna1099, metadata = as.numeric(6.6), col.name = 'PMI') 21 | rna1099 <- AddMetaData(rna1099, metadata = as.factor("rna1099"), col.name = 'sampID') 22 | 23 | counts273 <- Read10X(data.dir = "Z:/RAW DATA/Dan/Parallel_snRNAseq_snATACseq/order6393rna_order6394atac/snRNA/cellranger v4.0.0/273rna_count_CR4/filtered_feature_bc_matrix/") 24 | rna273 <- CreateSeuratObject(counts = counts273, project = "LOAD") 25 | rna273 <- AddMetaData(rna273, metadata = as.factor("Female"), col.name = 'Sex') 26 | rna273 <- AddMetaData(rna273, metadata = as.numeric(92), col.name = 'age') 27 | rna273 <- AddMetaData(rna273, metadata = as.numeric(14), col.name = 'PMI') 28 | rna273 <- AddMetaData(rna273, metadata = as.factor("rna273"), col.name = 'sampID') 29 | 30 | counts730 <- Read10X(data.dir = "Z:/RAW DATA/Julia/snRNAseq_snATACseq/order6218/snRNA/730rna_count_CR4/filtered_feature_bc_matrix/") 31 | rna730 <- CreateSeuratObject(counts = counts730, project = "LOAD") 32 | rna730 <- AddMetaData(rna730, metadata = as.factor("Female"), col.name = 'Sex') 33 | rna730 <- AddMetaData(rna730, metadata = as.numeric(86), col.name = 'age') 34 | rna730 <- AddMetaData(rna730, metadata = as.numeric(12.2), col.name = 'PMI') 35 | rna730 <- AddMetaData(rna730, metadata = as.factor("rna730"), col.name = 'sampID') 36 | 37 | counts430 <- Read10X(data.dir = "Z:/RAW DATA/Dan/Parallel_snRNAseq_snATACseq/order6410rna_order6411atac/snRNA/p1/430rna_count_CR4/filtered_feature_bc_matrix/") 38 | rna430 <- CreateSeuratObject(counts = counts430, project = "LOAD") 39 | rna430 <- AddMetaData(rna430, metadata = as.factor("Female"), col.name = 'Sex') 40 | rna430 <- AddMetaData(rna430, metadata = as.numeric(81), col.name = 'age') 41 | rna430 <- AddMetaData(rna430, metadata = as.numeric(16.5), col.name = 'PMI') 42 | rna430 <- AddMetaData(rna430, metadata = as.factor("rna430"), col.name = 'sampID') 43 | 44 | counts601 <- Read10X(data.dir = "Z:/RAW DATA/Julia/snRNAseq_snATACseq/orders6789rna_6790atac/snRNA/601_matrix_files/filtered_feature_bc_matrix/") 45 | rna601 <- CreateSeuratObject(counts = counts601, project = "LOAD") 46 | rna601 <- AddMetaData(rna601, metadata = as.factor("Female"), col.name = 'Sex') 47 | rna601 <- AddMetaData(rna601, metadata = as.numeric(76), col.name = 'age') 48 | rna601 <- AddMetaData(rna601, metadata = as.numeric(10.5), col.name = 'PMI') 49 | rna601 <- AddMetaData(rna601, metadata = as.factor("rna601"), col.name = 'sampID') 50 | 51 | counts781 <- Read10X(data.dir = "Z:/RAW DATA/Julia/snRNAseq_snATACseq/orders6789rna_6790atac/snRNA/781_matrix_files/filtered_feature_bc_matrix/") 52 | rna781 <- CreateSeuratObject(counts = counts781, project = "LOAD") 53 | rna781 <- AddMetaData(rna781, metadata = as.factor("Female"), col.name = 'Sex') 54 | rna781 <- AddMetaData(rna781, metadata = as.numeric(79), col.name = 'age') 55 | rna781 <- AddMetaData(rna781, metadata = as.numeric(3.75), col.name = 'PMI') 56 | rna781 <- AddMetaData(rna781, metadata = as.factor("rna781"), col.name = 'sampID') 57 | 58 | counts963 <- Read10X(data.dir = "Z:/RAW DATA/Dan/Parallel_snRNAseq_snATACseq/order6353rna_order6353atac/snRNA/Cellranger_v4.0.0/963rna_count_CR4/filtered_feature_bc_matrix/") 59 | rna963 <- CreateSeuratObject(counts = counts963, project = "Normal") 60 | rna963 <- AddMetaData(rna963, metadata = as.factor("Female"), col.name = 'Sex') 61 | rna963 <- AddMetaData(rna963, metadata = as.numeric(82), col.name = 'age') 62 | rna963 <- AddMetaData(rna963, metadata = as.numeric(8.12), col.name = 'PMI') 63 | rna963 <- AddMetaData(rna963, metadata = as.factor("rna963"), col.name = 'sampID') 64 | 65 | counts1690 <- Read10X(data.dir = "Z:/RAW DATA/Dan/Parallel_snRNAseq_snATACseq/order6218rna_order6218atac/snRNA/1690rna_count_CR4/filtered_feature_bc_matrix/") 66 | rna1690 <- CreateSeuratObject(counts = counts1690, project = "Normal") 67 | rna1690 <- AddMetaData(rna1690, metadata = as.factor("Female"), col.name = 'Sex') 68 | rna1690 <- AddMetaData(rna1690, metadata = as.numeric(84), col.name = 'age') 69 | rna1690 <- AddMetaData(rna1690, metadata = as.numeric(4.42), col.name = 'PMI') 70 | rna1690 <- AddMetaData(rna1690, metadata = as.factor("rna1690"), col.name = 'sampID') 71 | 72 | counts1545 <- Read10X(data.dir = "Z:/RAW DATA/Dan/Parallel_snRNAseq_snATACseq/order6400rna_order6401atac/snRNA/cellranger v4.0.0/1545rna_count_CR4/filtered_feature_bc_matrix/") 73 | rna1545 <- CreateSeuratObject(counts = counts1545, project = "Normal") 74 | rna1545 <- AddMetaData(rna1545, metadata = as.factor("Female"), col.name = 'Sex') 75 | rna1545 <- AddMetaData(rna1545, metadata = as.numeric(64), col.name = 'age') 76 | rna1545 <- AddMetaData(rna1545, metadata = as.numeric(10.67), col.name = 'PMI') 77 | rna1545 <- AddMetaData(rna1545, metadata = as.factor("rna1545"), col.name = 'sampID') 78 | 79 | counts984 <- Read10X(data.dir = "Z:/RAW DATA/Dan/Parallel_snRNAseq_snATACseq/order6400rna_order6401atac/snRNA/cellranger v4.0.0/984rna_count_CR4/filtered_feature_bc_matrix/") 80 | rna984 <- CreateSeuratObject(counts = counts984, project = "Normal") 81 | rna984 <- AddMetaData(rna984, metadata = as.factor("Female"), col.name = 'Sex') 82 | rna984 <- AddMetaData(rna984, metadata = as.numeric(65), col.name = 'age') 83 | rna984 <- AddMetaData(rna984, metadata = as.numeric(1.96), col.name = 'PMI') 84 | rna984 <- AddMetaData(rna984, metadata = as.factor("rna984"), col.name = 'sampID') 85 | 86 | counts1600 <- Read10X(data.dir = "Z:/RAW DATA/Dan/Parallel_snRNAseq_snATACseq/order6410rna_order6411atac/snRNA/p1/1600rna_count_CR4/filtered_feature_bc_matrix/") 87 | rna1600 <- CreateSeuratObject(counts = counts1600, project = "Normal") 88 | rna1600 <- AddMetaData(rna1600, metadata = as.factor("Female"), col.name = 'Sex') 89 | rna1600 <- AddMetaData(rna1600, metadata = as.numeric(85), col.name = 'age') 90 | rna1600 <- AddMetaData(rna1600, metadata = as.numeric(12.97), col.name = 'PMI') 91 | rna1600 <- AddMetaData(rna1600, metadata = as.factor("rna1600"), col.name = 'sampID') 92 | 93 | counts1670 <- Read10X(data.dir = "Z:/RAW DATA/Dan/Parallel_snRNAseq_snATACseq/order6410rna_order6411atac/snRNA/p2/1670rna_count_CR4/filtered_feature_bc_matrix/") 94 | rna1670 <- CreateSeuratObject(counts = counts1670, project = "Normal") 95 | rna1670 <- AddMetaData(rna1670, metadata = as.factor("Female"), col.name = 'Sex') 96 | rna1670 <- AddMetaData(rna1670, metadata = as.numeric(87), col.name = 'age') 97 | rna1670 <- AddMetaData(rna1670, metadata = as.numeric(18), col.name = 'PMI') 98 | rna1670 <- AddMetaData(rna1670, metadata = as.factor("rna1670"), col.name = 'sampID') 99 | 100 | counts111 <- Read10X(data.dir = "Z:/RAW DATA/Dan/Parallel_snRNAseq_snATACseq/order6393rna_order6394atac/snRNA/cellranger v4.0.0/111rna_count_CR4/filtered_feature_bc_matrix/") 101 | rna111 <- CreateSeuratObject(counts = counts111, project = "LOAD") 102 | rna111 <- AddMetaData(rna111, metadata = as.factor("Male"), col.name = 'Sex') 103 | rna111 <- AddMetaData(rna111, metadata = as.numeric(79), col.name = 'age') 104 | rna111 <- AddMetaData(rna111, metadata = as.numeric(0.6167), col.name = 'PMI') 105 | rna111 <- AddMetaData(rna111, metadata = as.factor("rna111"), col.name = 'sampID') 106 | 107 | counts191 <- Read10X(data.dir = "Z:/RAW DATA/Dan/Parallel_snRNAseq_snATACseq/order6410rna_order6411atac/snRNA/p1/191rna_count_CR4/filtered_feature_bc_matrix/") 108 | rna191 <- CreateSeuratObject(counts = counts191, project = "LOAD") 109 | rna191 <- AddMetaData(rna191, metadata = as.factor("Male"), col.name = 'Sex') 110 | rna191 <- AddMetaData(rna191, metadata = as.numeric(82), col.name = 'age') 111 | rna191 <- AddMetaData(rna191, metadata = as.numeric(1.5), col.name = 'PMI') 112 | rna191 <- AddMetaData(rna191, metadata = as.factor("rna191"), col.name = 'sampID') 113 | 114 | counts127 <- Read10X(data.dir = "Z:/RAW DATA/Dan/Parallel_snRNAseq_snATACseq/order6400rna_order6401atac/snRNA/cellranger v4.0.0/127rna_count_CR4/filtered_feature_bc_matrix/") 115 | rna127 <- CreateSeuratObject(counts = counts127, project = "LOAD") 116 | rna127 <- AddMetaData(rna127, metadata = as.factor("Male"), col.name = 'Sex') 117 | rna127 <- AddMetaData(rna127, metadata = as.numeric(66), col.name = 'age') 118 | rna127 <- AddMetaData(rna127, metadata = as.numeric(3), col.name = 'PMI') 119 | rna127 <- AddMetaData(rna127, metadata = as.factor("rna127"), col.name = 'sampID') 120 | 121 | counts347 <- Read10X(data.dir = "Z:/RAW DATA/Dan/Parallel_snRNAseq_snATACseq/order6410rna_order6411atac/snRNA/p2/347rna_count_CR4/filtered_feature_bc_matrix/") 122 | rna347 <- CreateSeuratObject(counts = counts347, project = "LOAD") 123 | rna347 <- AddMetaData(rna347, metadata = as.factor("Male"), col.name = 'Sex') 124 | rna347 <- AddMetaData(rna347, metadata = as.numeric(77), col.name = 'age') 125 | rna347 <- AddMetaData(rna347, metadata = as.numeric(16.83), col.name = 'PMI') 126 | rna347 <- AddMetaData(rna347, metadata = as.factor("rna347"), col.name = 'sampID') 127 | 128 | counts357 <- Read10X(data.dir = "Z:/RAW DATA/Dan/Parallel_snRNAseq_snATACseq/order6410rna_order6411atac/snRNA/p2/357rna_count_CR4/filtered_feature_bc_matrix/") 129 | rna357 <- CreateSeuratObject(counts = counts357, project = "LOAD") 130 | rna357 <- AddMetaData(rna357, metadata = as.factor("Male"), col.name = 'Sex') 131 | rna357 <- AddMetaData(rna357, metadata = as.numeric(75), col.name = 'age') 132 | rna357 <- AddMetaData(rna357, metadata = as.numeric(3), col.name = 'PMI') 133 | rna357 <- AddMetaData(rna357, metadata = as.factor("rna357"), col.name = 'sampID') 134 | 135 | counts372 <- Read10X(data.dir = "Z:/RAW DATA/Dan/Parallel_snRNAseq_snATACseq/order6538rna_order6537atac/rna/372rna_count_CR4_dan/filtered_feature_bc_matrix/") 136 | rna372 <- CreateSeuratObject(counts = counts372, project = "LOAD") 137 | rna372 <- AddMetaData(rna372, metadata = as.factor("Male"), col.name = 'Sex') 138 | rna372 <- AddMetaData(rna372, metadata = as.numeric(87), col.name = 'age') 139 | rna372 <- AddMetaData(rna372, metadata = as.numeric(1.33), col.name = 'PMI') 140 | rna372 <- AddMetaData(rna372, metadata = as.factor("rna372"), col.name = 'sampID') 141 | 142 | counts1557 <- Read10X(data.dir = "Z:/RAW DATA/Dan/Parallel_snRNAseq_snATACseq/order6353rna_order6353atac/snRNA/Cellranger_v4.0.0/1557rna_count_CR4/filtered_feature_bc_matrix/") 143 | rna1557 <- CreateSeuratObject(counts = counts1557, project = "Normal") 144 | rna1557 <- AddMetaData(rna1557, metadata = as.factor("Male"), col.name = 'Sex') 145 | rna1557 <- AddMetaData(rna1557, metadata = as.numeric(90), col.name = 'age') 146 | rna1557 <- AddMetaData(rna1557, metadata = as.numeric(4), col.name = 'PMI') 147 | rna1557 <- AddMetaData(rna1557, metadata = as.factor("rna1557"), col.name = 'sampID') 148 | 149 | counts99 <- Read10X(data.dir = "Z:/RAW DATA/Dan/Parallel_snRNAseq_snATACseq/order6393rna_order6394atac/snRNA/cellranger v4.0.0/99rna_count_CR4/filtered_feature_bc_matrix/") 150 | rna99 <- CreateSeuratObject(counts = counts99, project = "Normal") 151 | rna99 <- AddMetaData(rna99, metadata = as.factor("Male"), col.name = 'Sex') 152 | rna99 <- AddMetaData(rna99, metadata = as.numeric(85), col.name = 'age') 153 | rna99 <- AddMetaData(rna99, metadata = as.numeric(2), col.name = 'PMI') 154 | rna99 <- AddMetaData(rna99, metadata = as.factor("rna99"), col.name = 'sampID') 155 | 156 | counts196 <- Read10X(data.dir = "Z:/RAW DATA/Dan/Parallel_snRNAseq_snATACseq/order6400rna_order6401atac/snRNA/cellranger v4.0.0/196rna_count_CR4/filtered_feature_bc_matrix/") 157 | rna196 <- CreateSeuratObject(counts = counts196, project = "Normal") 158 | rna196 <- AddMetaData(rna196, metadata = as.factor("Male"), col.name = 'Sex') 159 | rna196 <- AddMetaData(rna196, metadata = as.numeric(75), col.name = 'age') 160 | rna196 <- AddMetaData(rna196, metadata = as.numeric(18.88), col.name = 'PMI') 161 | rna196 <- AddMetaData(rna196, metadata = as.factor("rna730"), col.name = 'sampID') 162 | 163 | counts542 <- Read10X(data.dir = "Z:/RAW DATA/Dan/Parallel_snRNAseq_snATACseq/order6410rna_order6411atac/snRNA/p1/542rna_count_CR4/filtered_feature_bc_matrix/") 164 | rna542 <- CreateSeuratObject(counts = counts542, project = "Normal") 165 | rna542 <- AddMetaData(rna542, metadata = as.factor("Male"), col.name = 'Sex') 166 | rna542 <- AddMetaData(rna542, metadata = as.numeric(82), col.name = 'age') 167 | rna542 <- AddMetaData(rna542, metadata = as.numeric(3.25), col.name = 'PMI') 168 | rna542 <- AddMetaData(rna542, metadata = as.factor("rna542"), col.name = 'sampID') 169 | 170 | counts676 <- Read10X(data.dir = "Z:/RAW DATA/Dan/Parallel_snRNAseq_snATACseq/order6410rna_order6411atac/snRNA/p2/676rna_count_CR4/filtered_feature_bc_matrix/") 171 | rna676 <- CreateSeuratObject(counts = counts676, project = "Normal") 172 | rna676 <- AddMetaData(rna676, metadata = as.factor("Male"), col.name = 'Sex') 173 | rna676 <- AddMetaData(rna676, metadata = as.numeric(60), col.name = 'age') 174 | rna676 <- AddMetaData(rna676, metadata = as.numeric(9.5), col.name = 'PMI') 175 | rna676 <- AddMetaData(rna676, metadata = as.factor("rna676"), col.name = 'sampID') 176 | 177 | counts688 <- Read10X(data.dir = "Z:/RAW DATA/Dan/Parallel_snRNAseq_snATACseq/order6538rna_order6537atac/rna/688rna_count_CR4_dan/filtered_feature_bc_matrix/") 178 | rna688 <- CreateSeuratObject(counts = counts688, project = "Normal") 179 | rna688 <- AddMetaData(rna688, metadata = as.factor("Male"), col.name = 'Sex') 180 | rna688 <- AddMetaData(rna688, metadata = as.numeric(63), col.name = 'age') 181 | rna688 <- AddMetaData(rna688, metadata = as.numeric(7), col.name = 'PMI') 182 | rna688 <- AddMetaData(rna688, metadata = as.factor("rna688"), col.name = 'sampID') 183 | 184 | # QC filtering 185 | 186 | rna1099[["percent.mt"]] <- PercentageFeatureSet(rna1099, pattern = "^MT-") 187 | VlnPlot(rna1099, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1) 188 | rna1099sub <- subset(rna1099, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 17.4) 189 | 190 | rna273[["percent.mt"]] <- PercentageFeatureSet(rna273, pattern = "^MT-") 191 | VlnPlot(rna273, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1) 192 | rna273sub <- subset(rna273, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 17.4) 193 | 194 | rna730[["percent.mt"]] <- PercentageFeatureSet(rna730, pattern = "^MT-") 195 | VlnPlot(rna730, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1) 196 | rna730sub <- subset(rna730, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 17.4) 197 | 198 | rna430[["percent.mt"]] <- PercentageFeatureSet(rna430, pattern = "^MT-") 199 | VlnPlot(rna430, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1) 200 | rna430sub <- subset(rna430, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 17.4) 201 | 202 | rna601[["percent.mt"]] <- PercentageFeatureSet(rna601, pattern = "^MT-") 203 | VlnPlot(rna601, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1) 204 | rna601sub <- subset(rna601, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 17.4) 205 | 206 | rna781[["percent.mt"]] <- PercentageFeatureSet(rna781, pattern = "^MT-") 207 | VlnPlot(rna781, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1) 208 | rna781sub <- subset(rna781, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 17.4) 209 | 210 | rna963[["percent.mt"]] <- PercentageFeatureSet(rna963, pattern = "^MT-") 211 | VlnPlot(rna963, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1) 212 | rna963sub <- subset(rna963, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 17.4) 213 | 214 | rna1690[["percent.mt"]] <- PercentageFeatureSet(rna1690, pattern = "^MT-") 215 | VlnPlot(rna1690, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1) 216 | rna1690sub <- subset(rna1690, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 17.4) 217 | 218 | rna1545[["percent.mt"]] <- PercentageFeatureSet(rna1545, pattern = "^MT-") 219 | VlnPlot(rna1545, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1) 220 | rna1545sub <- subset(rna1545, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 17.4) 221 | 222 | rna984[["percent.mt"]] <- PercentageFeatureSet(rna984, pattern = "^MT-") 223 | VlnPlot(rna984, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1) 224 | rna984sub <- subset(rna984, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 17.4) 225 | 226 | rna1600[["percent.mt"]] <- PercentageFeatureSet(rna1600, pattern = "^MT-") 227 | VlnPlot(rna1600, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1) 228 | rna1600sub <- subset(rna1600, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 17.4) 229 | 230 | rna1670[["percent.mt"]] <- PercentageFeatureSet(rna1670, pattern = "^MT-") 231 | VlnPlot(rna1670, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1) 232 | rna1670sub <- subset(rna1670, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 17.4) 233 | 234 | rna111[["percent.mt"]] <- PercentageFeatureSet(rna111, pattern = "^MT-") 235 | VlnPlot(rna111, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1) 236 | rna111sub <- subset(rna111, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 17.4) 237 | 238 | rna191[["percent.mt"]] <- PercentageFeatureSet(rna191, pattern = "^MT-") 239 | VlnPlot(rna191, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1) 240 | rna191sub <- subset(rna191, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 17.4) 241 | 242 | rna127[["percent.mt"]] <- PercentageFeatureSet(rna127, pattern = "^MT-") 243 | VlnPlot(rna127, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1) 244 | rna127sub <- subset(rna127, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 17.4) 245 | 246 | rna347[["percent.mt"]] <- PercentageFeatureSet(rna347, pattern = "^MT-") 247 | VlnPlot(rna347, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1) 248 | rna347sub <- subset(rna347, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 17.4) 249 | 250 | rna357[["percent.mt"]] <- PercentageFeatureSet(rna357, pattern = "^MT-") 251 | VlnPlot(rna357, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1) 252 | rna357sub <- subset(rna357, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 17.4) 253 | 254 | rna372[["percent.mt"]] <- PercentageFeatureSet(rna372, pattern = "^MT-") 255 | VlnPlot(rna372, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1) 256 | rna372sub <- subset(rna372, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 17.4) 257 | 258 | rna1557[["percent.mt"]] <- PercentageFeatureSet(rna1557, pattern = "^MT-") 259 | VlnPlot(rna1557, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1) 260 | rna1557sub <- subset(rna1557, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 17.4) 261 | 262 | rna99[["percent.mt"]] <- PercentageFeatureSet(rna99, pattern = "^MT-") 263 | VlnPlot(rna99, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1) 264 | rna99sub <- subset(rna99, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 17.4) 265 | 266 | rna196[["percent.mt"]] <- PercentageFeatureSet(rna196, pattern = "^MT-") 267 | VlnPlot(rna196, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1) 268 | rna196sub <- subset(rna196, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 17.4) 269 | 270 | rna542[["percent.mt"]] <- PercentageFeatureSet(rna542, pattern = "^MT-") 271 | VlnPlot(rna542, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1) 272 | rna542sub <- subset(rna542, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 17.4) 273 | 274 | rna676[["percent.mt"]] <- PercentageFeatureSet(rna676, pattern = "^MT-") 275 | VlnPlot(rna676, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1) 276 | rna676sub <- subset(rna676, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 17.4) 277 | 278 | rna688[["percent.mt"]] <- PercentageFeatureSet(rna688, pattern = "^MT-") 279 | VlnPlot(rna688, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1) 280 | rna688sub <- subset(rna688, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 17.4) 281 | 282 | #Save objects and transfer to remote high-performance computer for subsequent steps 283 | 284 | saveRDS(rna1099sub, 'C:/Users/jeg51/Documents/10X_data/Parallel_ADRC_prelim/Seurat_objects/rna1099.rds') 285 | saveRDS(rna273sub, 'C:/Users/jeg51/Documents/10X_data/Parallel_ADRC_prelim/Seurat_objects/rna273.rds') 286 | saveRDS(rna730sub, 'C:/Users/jeg51/Documents/10X_data/Parallel_ADRC_prelim/Seurat_objects/rna730.rds') 287 | saveRDS(rna430sub, 'C:/Users/jeg51/Documents/10X_data/Parallel_ADRC_prelim/Seurat_objects/rna430.rds') 288 | saveRDS(rna601sub, 'C:/Users/jeg51/Documents/10X_data/Parallel_ADRC_prelim/Seurat_objects/rna601.rds') 289 | saveRDS(rna781sub, 'C:/Users/jeg51/Documents/10X_data/Parallel_ADRC_prelim/Seurat_objects/rna781.rds') 290 | saveRDS(rna963sub, 'C:/Users/jeg51/Documents/10X_data/Parallel_ADRC_prelim/Seurat_objects/rna963.rds') 291 | saveRDS(rna1690sub, 'C:/Users/jeg51/Documents/10X_data/Parallel_ADRC_prelim/Seurat_objects/rna1690.rds') 292 | saveRDS(rna1545sub, 'C:/Users/jeg51/Documents/10X_data/Parallel_ADRC_prelim/Seurat_objects/rna1545.rds') 293 | saveRDS(rna984sub, 'C:/Users/jeg51/Documents/10X_data/Parallel_ADRC_prelim/Seurat_objects/rna984.rds') 294 | saveRDS(rna1600sub, 'C:/Users/jeg51/Documents/10X_data/Parallel_ADRC_prelim/Seurat_objects/rna1600.rds') 295 | saveRDS(rna1670sub, 'C:/Users/jeg51/Documents/10X_data/Parallel_ADRC_prelim/Seurat_objects/rna1670.rds') 296 | saveRDS(rna111sub, 'C:/Users/jeg51/Documents/10X_data/Parallel_ADRC_prelim/Seurat_objects/rna111.rds') 297 | saveRDS(rna191sub, 'C:/Users/jeg51/Documents/10X_data/Parallel_ADRC_prelim/Seurat_objects/rna191.rds') 298 | saveRDS(rna127sub, 'C:/Users/jeg51/Documents/10X_data/Parallel_ADRC_prelim/Seurat_objects/rna127.rds') 299 | saveRDS(rna347sub, 'C:/Users/jeg51/Documents/10X_data/Parallel_ADRC_prelim/Seurat_objects/rna347.rds') 300 | saveRDS(rna357sub, 'C:/Users/jeg51/Documents/10X_data/Parallel_ADRC_prelim/Seurat_objects/rna357.rds') 301 | saveRDS(rna372sub, 'C:/Users/jeg51/Documents/10X_data/Parallel_ADRC_prelim/Seurat_objects/rna372.rds') 302 | saveRDS(rna1557sub, 'C:/Users/jeg51/Documents/10X_data/Parallel_ADRC_prelim/Seurat_objects/rna1557.rds') 303 | saveRDS(rna99sub, 'C:/Users/jeg51/Documents/10X_data/Parallel_ADRC_prelim/Seurat_objects/rna99.rds') 304 | saveRDS(rna196sub, 'C:/Users/jeg51/Documents/10X_data/Parallel_ADRC_prelim/Seurat_objects/rna196.rds') 305 | saveRDS(rna542sub, 'C:/Users/jeg51/Documents/10X_data/Parallel_ADRC_prelim/Seurat_objects/rna542.rds') 306 | saveRDS(rna676sub, 'C:/Users/jeg51/Documents/10X_data/Parallel_ADRC_prelim/Seurat_objects/rna676.rds') 307 | saveRDS(rna688sub, 'C:/Users/jeg51/Documents/10X_data/Parallel_ADRC_prelim/Seurat_objects/rna688.rds') 308 | ``` 309 | -------------------------------------------------------------------------------- /3_Merge_remove_MT.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Merge_remove_MT" 3 | output: html_notebook 4 | --- 5 | 6 | ```{r} 7 | #Load packages 8 | library('Seurat') 9 | library('dplyr') 10 | 11 | #Load Seurat objects - these were previously filtered 12 | rna99 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_Prelim_RNA/Seurat_objects_not_norm/rna99.rds') 13 | rna111 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_Prelim_RNA/Seurat_objects_not_norm/rna111.rds') 14 | rna127 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_Prelim_RNA/Seurat_objects_not_norm/rna127.rds') 15 | rna191 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_Prelim_RNA/Seurat_objects_not_norm/rna191.rds') 16 | rna196 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_Prelim_RNA/Seurat_objects_not_norm/rna196.rds') 17 | rna273 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_Prelim_RNA/Seurat_objects_not_norm/rna273.rds') 18 | rna347 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_Prelim_RNA/Seurat_objects_not_norm/rna347.rds') 19 | rna357 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_Prelim_RNA/Seurat_objects_not_norm/rna357.rds') 20 | rna372 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_Prelim_RNA/Seurat_objects_not_norm/rna372.rds') 21 | rna430 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_Prelim_RNA/Seurat_objects_not_norm/rna430.rds') 22 | rna542 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_Prelim_RNA/Seurat_objects_not_norm/rna542.rds') 23 | rna601 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_Prelim_RNA/Seurat_objects_not_norm/rna601.rds') 24 | rna676 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_Prelim_RNA/Seurat_objects_not_norm/rna676.rds') 25 | rna688 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_Prelim_RNA/Seurat_objects_not_norm/rna688.rds') 26 | rna730 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_Prelim_RNA/Seurat_objects_not_norm/rna730.rds') 27 | rna781 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_Prelim_RNA/Seurat_objects_not_norm/rna781.rds') 28 | rna963 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_Prelim_RNA/Seurat_objects_not_norm/rna963.rds') 29 | rna984 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_Prelim_RNA/Seurat_objects_not_norm/rna984.rds') 30 | rna1099 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_Prelim_RNA/Seurat_objects_not_norm/rna1099.rds') 31 | rna1545 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_Prelim_RNA/Seurat_objects_not_norm/rna1545.rds') 32 | rna1557 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_Prelim_RNA/Seurat_objects_not_norm/rna1557.rds') 33 | rna1600 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_Prelim_RNA/Seurat_objects_not_norm/rna1600.rds') 34 | rna1670 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_Prelim_RNA/Seurat_objects_not_norm/rna1670.rds') 35 | rna1690 <- readRDS('/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_Prelim_RNA/Seurat_objects_not_norm/rna1690.rds') 36 | 37 | #Merge multiple Seurat objects 38 | list <- c(rna111, rna127, rna191, rna196, rna273, rna347, rna357, rna372, rna430, rna542, rna601, rna676, rna688, rna730, rna781, rna963, rna984, rna1099, rna1545, rna1557, rna1600, rna1670, rna1690) 39 | 40 | combined <- merge(x = rna99, y = list) 41 | #Warning message: 42 | #In CheckDuplicateCellNames(object.list = objects) : 43 | #Some cell names are duplicated across objects provided. Renaming to enforce unique cell names. 44 | 45 | combined 46 | #An object of class Seurat 47 | #36601 features across 209518 samples within 1 assay 48 | #Active assay: RNA (36601 features, 0 variable features) 49 | 50 | #Remove MT genes 51 | combined_data <- GetAssayData(combined, assay = "RNA") 52 | combined_data_noMT <- combined_data[-(which(rownames(combined_data) %in% c('MT-ND1','MT-ND2','MT-CO1','MT-CO2','MT-ATP8', 'MT-ATP6','MT-CO3', 'MT-ND3', 'MT-ND4L', 'MT-ND4', 'MT-ND5', 'MT-ND6', 'MT-CYB'))),] 53 | combined_noMT <- subset(combined, features = rownames(combined_data_noMT)) 54 | combined_noMT_test <- grep("^MT-", rownames(combined_noMT), value = TRUE) 55 | combined_noMT_test 56 | #character(0) 57 | combined <- combined_noMT 58 | combined 59 | #An object of class Seurat 60 | #36588 features across 209518 samples within 1 assay 61 | #Active assay: RNA (36588 features, 0 variable features) 62 | 63 | saveRDS(combined, '/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_RNA_final/objects/combined.rds') 64 | ``` 65 | -------------------------------------------------------------------------------- /4_SCTransform_integrate.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "SCTransform_integrate" 3 | output: html_notebook 4 | --- 5 | 6 | ```{r} 7 | #Load in packages 8 | library('Seurat') 9 | library('dplyr') 10 | library('sctransform') 11 | library('glmGamPoi') 12 | 13 | #Select features for downstream integration and run PrepSCTIntegration, which ensures that all necessary Pearson residuals have been calculated 14 | combined.list <- SplitObject(combined, split.by = "sampID") 15 | combined.list 16 | 17 | for (i in 1:length(combined.list)) { 18 | combined.list[[i]] <- SCTransform(combined.list[[i]], method = 'glmGamPoi', return.only.var.genes = FALSE, verbose = FALSE) 19 | } 20 | 21 | combined.features <- SelectIntegrationFeatures(object.list = combined.list, nfeatures = 10000) 22 | combined.list <- PrepSCTIntegration(object.list = combined.list, anchor.features = combined.features) 23 | 24 | reference_dataset <- which(names(combined.list) == "rna963") #this is for reference-based integration 25 | 26 | #Identify anchors and integrate the datasets with SCT normalization 27 | combined.anchors <- FindIntegrationAnchors(object.list = combined.list, normalization.method = "SCT", 28 | anchor.features = combined.features, reference = reference_dataset) 29 | saveRDS(combined.anchors, '/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_RNA_final/objects/combined_anchors_ref_based.rds') 30 | combined.integrated <- IntegrateData(anchorset = combined.anchors, normalization.method = "SCT") 31 | saveRDS(combined.integrated, '/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_RNA_final/objects/combined_integrated.rds') 32 | combined.integrated 33 | #An object of class Seurat 34 | #74395 features across 209518 samples within 3 assays 35 | #Active assay: integrated (6753 features, 6753 variable features) 36 | #2 other assays present: RNA, SCT 37 | ``` 38 | -------------------------------------------------------------------------------- /5_Annotate.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Annotate" 3 | output: html_notebook 4 | --- 5 | 6 | ```{r} 7 | #Load in packages 8 | library('Seurat') 9 | library('dplyr') 10 | library('sctransform') 11 | 12 | #Load in datasets 13 | reference_dataset <- readRDS('/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_RNA_final/objects/human.m1.ref_celltypes_10000.int.features.rds') 14 | combined.integrated <- readRDS('/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_RNA_final/objects/combined_integrated_updated.rds') 15 | 16 | #Set default assay to integrated 17 | DefaultAssay(reference_dataset) <- "integrated" 18 | DefaultAssay(combined.integrated) <- "integrated" 19 | reference_dataset 20 | #An object of class Seurat 21 | #70712 features across 76519 samples within 3 assays 22 | #Active assay: integrated (4878 features, 4878 variable features) 23 | #2 other assays present: RNA, SCT 24 | combined.integrated 25 | #An object of class Seurat 26 | #74395 features across 209518 samples within 3 assays 27 | #Active assay: integrated (6753 features, 6753 variable features) 28 | #2 other assays present: RNA, SCT 29 | 30 | #Check overlapping variable features 31 | length(rownames(reference_dataset)) #4878 32 | length(rownames(combined.integrated)) #6753 33 | length(intersect(rownames(reference_dataset), rownames(combined.integrated))) #3574 34 | 35 | #Label transfer 36 | transfer.anchors <- FindTransferAnchors(reference = reference_dataset, query = combined.integrated, 37 | normalization.method = "SCT", reference.assay = "integrated", query.assay = "integrated", 38 | project.query = FALSE, features = intersect(rownames(reference_dataset), rownames(combined.integrated))) 39 | predictions <- TransferData(anchorset = transfer.anchors, refdata = reference_dataset$CellType, dims = 1:30) 40 | combined.integrated <- AddMetaData(object = combined.integrated, metadata = predictions) 41 | ``` 42 | -------------------------------------------------------------------------------- /6_Filter.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Filter" 3 | output: html_notebook 4 | --- 5 | 6 | ```{r} 7 | #Load in packages 8 | library('Seurat') 9 | library('dplyr') 10 | 11 | #Visualize distribution of prediction scores from label transfer 12 | png("pred_score_max_histogram_integrated.png") 13 | hist(combined.integrated$prediction.score.max) 14 | dev.off() 15 | 16 | #Filter based on prediction score - remove cells with max score of less than 0.5 17 | combined.integrated <- subset(combined.integrated, subset = prediction.score.max > 0.5) 18 | combined.integrated 19 | #An object of class Seurat 20 | #74395 features across 207778 samples within 3 assays 21 | #Active assay: integrated (6753 features, 6753 variable features) 22 | #2 other assays present: RNA, SCT 23 | saveRDS(combined.integrated, '/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_RNA_final/objects/combined_integrated_labeled_integrated.rds') 24 | 25 | #Filter out 'hybrid' cells 26 | combined.integrated$adjusted.prediction.score.max<-1 #create a new numeric column 27 | #find difference between 1st & 2nd pred.scores, scaled by max prediction score per each cell. 28 | #values close to 1 are accurate 29 | for (i in 1:ncol(combined.integrated)){ 30 | combined.integrated$adjusted.prediction.score.max[i]<-(combined.integrated$prediction.score.max[i] - sort(c(combined.integrated$prediction.score.Astro[i], 31 | combined.integrated$prediction.score.Endo[i], 32 | combined.integrated$prediction.score.Inh[i], 33 | combined.integrated$prediction.score.Exc[i], 34 | combined.integrated$prediction.score.Oligo[i], 35 | combined.integrated$prediction.score.Micro[i], 36 | combined.integrated$prediction.score.OPC[i], 37 | combined.integrated$prediction.score.VLMC[i]), 38 | decreasing = TRUE)[2])/combined.integrated$prediction.score.max[i] 39 | } 40 | 41 | #Visualize distribution of adjusted prediction scores 42 | png("adj_pred_score_max_histogram.png") 43 | hist(combined.integrated$adjusted.prediction.score.max) 44 | dev.off() 45 | 46 | #Remove cells with max adjusted prediction score of less than 0.65 47 | combined.integrated <- subset(combined.integrated, subset = adjusted.prediction.score.max > 0.65) 48 | combined.integrated 49 | #An object of class Seurat 50 | #74395 features across 202688 samples within 3 assays 51 | #Active assay: integrated (6753 features, 6753 variable features) 52 | #2 other assays present: RNA, SCT 53 | 54 | saveRDS(combined.integrated, '/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_RNA_final/objects/combined_integrated_labeled_filt.rds') 55 | 56 | #Fix metadata - rna730 is sampID of sample 5 and 15 when it should only be sample 15. rna196 is missing and needs to replace rna730 as sampID for sample 5 57 | #A separate csv with correct values was created 58 | metadata_196 <- read.csv('combined_integrated_labeled_filt_metadata_rna196.csv') 59 | combined.integrated$sampID <- metadata_196$sampID 60 | metadata_fixed <- combined.integrated@meta.data 61 | write.csv(metadata_fixed, 'combined_integrated_labeled_metadata_fixed.csv') 62 | 63 | #Remove rare cell types that did not form their own clusters in preliminary UMAP analysis (endo & VLMC) 64 | #Count cell types 65 | celltype_counts <- table(combined.integrated$orig.ident, combined.integrated$predicted.id) 66 | write.csv(celltype_counts, 'celltypes_by_orig_ident_unfiltered.csv') 67 | Idents(combined.integrated) <- "predicted.id" 68 | combined.integrated.noEndoVLMC <- subset(combined.integrated, idents = c("Astro", "Exc", "Inh", "Micro", "Oligo", "OPC")) 69 | combined.integrated.noEndoVLMC 70 | #An object of class Seurat 71 | #74395 features across 202223 samples within 3 assays 72 | #Active assay: integrated (6753 features, 6753 variable features) 73 | #2 other assays present: RNA, SCT 74 | 75 | saveRDS(combined.integrated.noEndoVLMC, '/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_RNA_final/objects/combined_integrated_labeled_filt_noEndoVLMC.rds') 76 | 77 | #Check if any nuclei have NaN values in integrated data, as this error can occur for large datasets - none found 78 | int.data.matrix <- as.matrix(combined.integrated@assays$integrated@data) #needed to do which() with characters 79 | which(int.data.matrix=="NaN", arr.ind = TRUE) 80 | #row col 81 | ``` 82 | -------------------------------------------------------------------------------- /7_Dim_reduction.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Dim_reduction" 3 | output: html_notebook 4 | --- 5 | 6 | ```{r} 7 | #Load in packages 8 | library('Seurat') 9 | library('dplyr') 10 | 11 | #Read in dataset 12 | combined.integrated <- readRDS('/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_RNA_final/objects/combined_integrated_labeled_filt_noEndoVLMC.rds') 13 | 14 | #Set default assay to integrated 15 | DefaultAssay(combined.integrated) <- "integrated" 16 | 17 | #Run PCA 18 | combined.integrated <- RunPCA(combined.integrated) 19 | combined.integrated 20 | #An object of class Seurat 21 | #74395 features across 202223 samples within 3 assays 22 | #Active assay: integrated (6753 features, 6753 variable features) 23 | #2 other assays present: RNA, SCT 24 | #1 dimensional reduction calculated: pca 25 | 26 | saveRDS(combined.integrated, '/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_RNA_final/objects/combined_integrated_PCA.rds') 27 | 28 | #Determine dimensionality with elbow plot and compute variance explained by PCs 29 | png("ElbowPlot.png") 30 | ElbowPlot(combined.integrated, ndims = 50) 31 | dev.off() 32 | 33 | pca <- combined.integrated@reductions$pca 34 | eigValues <- (pca@stdev)^2 ## EigenValues 35 | varExplained <- eigValues / sum(eigValues) 36 | varExplained 37 | 38 | #Run UMAP 39 | combined.integrated <- RunUMAP(combined.integrated, dims = 1:30) 40 | combined.integrated <- FindNeighbors(combined.integrated, reduction = "umap", dims = 1:2) #only 2 dims in UMAP reduction 41 | combined.integrated <- FindClusters(combined.integrated, resolution = 0.1) 42 | 43 | png("UMAP_d30_r01.png", width = 700, height = 650) 44 | DimPlot(combined.integrated, reduction = "umap", label = TRUE) 45 | dev.off() 46 | ``` 47 | -------------------------------------------------------------------------------- /8_Cluster_annotation.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Cluster_annotation" 3 | output: html_notebook 4 | --- 5 | 6 | ```{r} 7 | #Load in packages 8 | library('Seurat') 9 | library('dplyr') 10 | library('plyr') 11 | 12 | #Count cell types in each cluster to determine which cell type is majority in each cluster 13 | levels(combined.integrated$seurat_clusters) 14 | cluster_celltype_counts <- table(combined.integrated$seurat_clusters, combined.integrated$predicted.id) 15 | write.csv(cluster_celltype_counts, 'celltypes_in_clusters.csv') 16 | 17 | #Plot cell type-specific markers to confirm predictions 18 | DefaultAssay(combined.integrated) <- "SCT" 19 | png("featplots_celltypemarkers.png", width = 1000, height = 1000) 20 | FeaturePlot(combined.integrated, features = c("SLC1A2", "RBFOX3", "SLC17A7", "GAD1", "GAD2", "SLC6A1", "APBB1IP", "MOBP", "MEGF11"), slot = "data", pt.size = 0.1, reduction = "umap") 21 | dev.off() 22 | 23 | #Manually annotate clusters 24 | new.cluster.ids <- c("Exc1","Oligo1","Oligo2","Astro1","Oligo3","Oligo4","Oligo5","OPC1","Micro1","Exc2","Oligo6","Oligo7","Inh1","Oligo8","Exc3","Exc4","Inh2","Inh3","Exc5","Inh4","Inh5","Exc6","Micro2","Micro3","Exc7","Exc8","Exc9","Exc10","Inh6","Exc11","Inh7","Inh8","OPC2") 25 | names(new.cluster.ids) <- levels(combined.integrated) 26 | combined.integrated <- RenameIdents(combined.integrated, new.cluster.ids) 27 | combined.integrated$cell.type.number <- Idents(combined.integrated) #Create new metadata column with cluster labels 28 | 29 | #count cells in each cluster with new IDs and compute nuclei proportions per sample 30 | cluster_celltype_counts <- table(combined.integrated$cell.type.number) 31 | write.csv(cluster_celltype_counts, 'cell_type_number_counts.csv') 32 | celltype_origident_counts <- table(combined.integrated$orig.ident, combined.integrated$cell.type.number) 33 | write.csv(celltype_origident_counts, 'cell_type_number_counts_split.csv') 34 | cluster_celltype_counts_sampID <- table(combined.integrated$sampID, combined.integrated$cell.type.number) 35 | write.csv(cluster_celltype_counts_sampID, 'cell_type_number_counts_sampID.csv') 36 | 37 | #Create new metadata column of cluster-based cell types 38 | cell.type.number <- combined.integrated$cell.type.number 39 | cell.type.number <- as.character(cell.type.number) 40 | cell.type.number <- replace(cell.type.number, startsWith(cell.type.number, 'Astro'), 'Astro') 41 | cell.type.number <- replace(cell.type.number, startsWith(cell.type.number, 'Exc'), 'Exc') 42 | cell.type.number <- replace(cell.type.number, startsWith(cell.type.number, 'Inh'), 'Inh') 43 | cell.type.number <- replace(cell.type.number, startsWith(cell.type.number, 'Micro'), 'Micro') 44 | cell.type.number <- replace(cell.type.number, startsWith(cell.type.number, 'Oligo'), 'Oligo') 45 | cell.type.number <- replace(cell.type.number, startsWith(cell.type.number, 'OPC'), 'OPC') 46 | cell.type.number <- as.factor(cell.type.number) 47 | levels(cell.type.number) 48 | combined.integrated$cell.type <- cell.type.number 49 | 50 | #Count cells of each type and compute nuclei proportions per sample 51 | cell_type_counts <- table(combined.integrated$cell.type) 52 | write.csv(cell_type_counts, 'cell_type_counts.csv') 53 | cell_type_counts_split <- table(combined.integrated$orig.ident, combined.integrated$cell.type) 54 | write.csv(cell_type_counts_split, 'cell_type_counts_split.csv') 55 | cell_type_counts_sampID <- table(combined.integrated$sampID, combined.integrated$cell.type) 56 | write.csv(cell_type_counts_sampID, 'cell_type_counts_sampID.csv') 57 | 58 | #Find all markers 59 | integrated.markers <- FindAllMarkers(combined.integrated, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25) 60 | saveRDS(integrated.markers, '/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_RNA_final/objects/integrated_markers.rds') 61 | write.csv(integrated.markers, 'cluster_markers.csv') 62 | cluster_markers <- integrated.markers %>% group_by(cluster) %>% top_n(n = 20, wt = avg_log2FC) 63 | write.csv(cluster_markers,"cluster_markers_top20.csv") 64 | 65 | #Add nuclei proportion to metadata 66 | #Nuclei proportion is defined as follows: for each sample, the number of nuclei in a given cluster or of a given cell type divided by that samples total nuclei count 67 | cell.type.proportions <- read.csv('cell_type_proportions_sampID.csv') 68 | combined.integrated <- AddMetaData(combined.integrated, metadata = join(combined.integrated@meta.data, cell.type.proportions)$cell.type.proportion, col.name = 'cell.type.proportion') 69 | combined.integrated@meta.data[sample(nrow(combined.integrated@meta.data), 20), ] #check work 70 | 71 | cell.type.number.proportions <- read.csv('cell_type_number_proportions_sampID.csv') 72 | combined.integrated <- AddMetaData(combined.integrated, metadata = join(combined.integrated@meta.data, cell.type.number.proportions)$cell.type.number.proportion, col.name = 'cell.type.number.proportion') 73 | combined.integrated@meta.data[sample(nrow(combined.integrated@meta.data), 20), ] #check work 74 | saveRDS(combined.integrated, '/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_RNA_final/objects/combined_integrated_PCA_UMAP_metadata.rds') 75 | 76 | #Save data for 1 example cluster to use for DE analysis 77 | Idents(combined.integrated) <- "cell.type.number" 78 | Micro1 <- subset(combined.integrated, idents = "Micro1") 79 | saveRDS(Micro1, '/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_RNA_final/objects/Micro1_for_DE_test.rds') 80 | ``` 81 | -------------------------------------------------------------------------------- /9_DE_analysis.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "DE_analysis" 3 | output: html_notebook 4 | --- 5 | 6 | ```{r} 7 | #Load in packages 8 | library(ggplot2) 9 | library(fitdistrplus) 10 | library(MASS) 11 | library(tidyr) 12 | library(gdata) 13 | library(Seurat) 14 | library(data.table) 15 | library(EnvStats) 16 | library(purrr) 17 | library(dplyr) 18 | library(sn) 19 | library(matrixStats) 20 | library(fmsb) 21 | library(lme4) 22 | 23 | ##################################### 24 | ### Format and transform the data ### 25 | ##################################### 26 | 27 | Micro1_for_DE_test <- readRDS("~/Documents/10X_data/Parallel_LOAD_Normal_RNA_final/Micro1_for_DE_test.rds") 28 | allgenes_JG <- t(as.data.frame(Micro1_for_DE_test@assays$RNA@counts)) 29 | allgenes_JG <- as.data.frame(allgenes_JG) 30 | ngenes <- ncol(allgenes_JG) 31 | allgenes_JG$orig.ident <- Micro1_for_DE_test@meta.data$orig.ident 32 | allgenes_JG$sampID <- Micro1_for_DE_test@meta.data$sampID 33 | allgenes_JG$wellKey <- paste(Micro1_for_DE_test@meta.data$sampID, rownames(Micro1_for_DE_test@meta.data), sep = "_") 34 | rownames(allgenes_JG) <- allgenes_JG$wellKey 35 | allgenes_JG$cell.type.number.proportion <- Micro1_for_DE_test@meta.data$cell.type.number.proportion 36 | allgenes_JG <- allgenes_JG[,c(ngenes+3,ngenes+2,ngenes+1,ngenes+4,1:ngenes)] 37 | genecounts_JG <- as.matrix(t(allgenes_JG[,c(-1,-2,-3,-4)])) 38 | genecounts_JG <- log2(genecounts_JG + 1) #log2 transform 39 | coldata_JG <- allgenes_JG[,1:4] 40 | coldata_JG$orig.ident <- as.factor(coldata_JG$orig.ident) 41 | nrow(genecounts_JG) #36588 genes 42 | 43 | ##################################### 44 | ### Filter the data ### 45 | ##################################### 46 | 47 | #Calculate %cells expressing each gene in each group 48 | LOAD.cells <- as.matrix(t(allgenes_JG[allgenes_JG$orig.ident == 'LOAD', c(-1,-2,-3,-4)])) 49 | ncol(LOAD.cells) #4717 50 | Normal.cells <- as.matrix(t(allgenes_JG[allgenes_JG$orig.ident == 'Normal', c(-1,-2,-3,-4)])) 51 | ncol(Normal.cells) #4569 52 | 53 | PercentAbove <- function(x, threshold){ 54 | return(length(x = x[x > threshold]) / length(x = x)) 55 | } 56 | 57 | pct.exp.LOAD <- apply(X = LOAD.cells, MARGIN = 1, FUN = PercentAbove, threshold = 0) 58 | pct.exp.Normal <- apply(X = Normal.cells, MARGIN = 1, FUN = PercentAbove, threshold = 0) 59 | 60 | # Filter out genes expressed in <10% of cells in 1 group 61 | alpha.min <- pmax(pct.exp.LOAD, pct.exp.Normal) 62 | genes.to.keep <- names(x = which(x = alpha.min >= 0.1)) #6397 genes to keep 63 | genecounts_JG <- genecounts_JG[rownames(genecounts_JG)%in%genes.to.keep,] 64 | 65 | ##################################### 66 | ### Run MAST ### 67 | ##################################### 68 | 69 | genecounts_JG <- genecounts_JG[,rownames(coldata_JG)] #match order of data with metadata 70 | 71 | fData_JG <- data.frame(primerid=rownames(genecounts_JG)) #feature data 72 | sca_JG <- suppressMessages(MAST::FromMatrix(exprsArray=genecounts_JG, cData=coldata_JG, fData=fData_JG)) #makes a single-cell assay with expression data, metadata (e.g. Control_1_cell_1), and feature data 73 | cdr2_JG <- colSums(SummarizedExperiment::assay(sca_JG)>0) #compute cellular detection rate (cdr) 74 | SummarizedExperiment::colData(sca_JG)$ngeneson <- scale(cdr2_JG) #adds centered and scaled cdr to listData in colData of sca object 75 | cond <-factor(SummarizedExperiment::colData(sca_JG)$orig.ident) 76 | cond <-relevel(cond,"Normal") # set the reference level of orig.ident condition to be Normal 77 | SummarizedExperiment::colData(sca_JG)$orig.ident <- cond 78 | SummarizedExperiment::colData(sca_JG)$sampID <- factor(SummarizedExperiment::colData(sca_JG)$sampID) #set sampID to factor 79 | 80 | zlmCond_JG_prop <- suppressMessages(MAST::zlm(~ orig.ident + ngeneson + cell.type.number.proportion + (1 | sampID), sca_JG, method='glmer',ebayes = F, fitArgsD = list(nAGQ = 0), strictConvergence = FALSE)) #runs zero-inflated regression fitting a generalized linear mixed-effects model. 81 | 82 | colnames(coef(zlmCond_JG_prop, 'D')) #check names of modeled coefficients 83 | 84 | # Generate summary of results with likelihood ratio test 85 | summaryCond_JG_prop <- suppressMessages(MAST::summary(zlmCond_JG_prop, 86 | doLRT='orig.identLOAD')) 87 | 88 | #format the results 89 | summaryDt_JG_prop <- summaryCond_JG_prop$datatable 90 | 91 | fcHurdle_JG_prop <- merge(summaryDt_JG_prop[contrast=='orig.identLOAD' & component=='H',.(primerid, `Pr(>Chisq)`)], 92 | summaryDt_JG_prop[contrast=='orig.identLOAD' & component=='logFC', .(primerid, coef, ci.hi, ci.lo)], 93 | by='primerid') 94 | 95 | fcHurdle_JG_fdr_prop <- fcHurdle_JG_prop[,fdr:=p.adjust(`Pr(>Chisq)`, 'fdr')] #add fdr adjusted p-values 96 | 97 | #Add % cells from each group expressing each gene 98 | pct.cells.LOAD <- as.data.frame(pct.exp.LOAD) 99 | pct.cells.LOAD <- subset(pct.cells.LOAD, rownames(pct.cells.LOAD)%in%fcHurdle_JG_fdr_prop$primerid) 100 | pct.cells.LOAD <- tibble::rownames_to_column(pct.cells.LOAD, var = "primerid") 101 | pct.cells.Normal <- as.data.frame(pct.exp.Normal) 102 | pct.cells.Normal <- subset(pct.cells.Normal, rownames(pct.cells.Normal)%in%fcHurdle_JG_fdr_prop$primerid) 103 | pct.cells.Normal <- tibble::rownames_to_column(pct.cells.Normal, var = "primerid") 104 | fcHurdle_JG_fdr_prop_pct.cells <- merge(fcHurdle_JG_fdr_prop, pct.cells.LOAD, by.x = 'primerid', by.y = 'primerid') 105 | fcHurdle_JG_fdr_prop_pct.cells <- merge(fcHurdle_JG_fdr_prop_pct.cells, pct.cells.Normal, by.x = 'primerid', by.y = 'primerid') 106 | 107 | fcHurdle_JG_prop_na_omit <- stats::na.omit(as.data.frame(fcHurdle_JG_fdr_prop_pct.cells)) #2 genes omitted - NA values occur from convergence failures 108 | 109 | hist(fcHurdle_JG_prop_na_omit$fdr) #examine distribution of adjusted p-values 110 | hist(fcHurdle_JG_prop_na_omit$coef, breaks = 25) #examine distribution of logFC 111 | fcHurdleSig_prop_JG <- subset(fcHurdle_JG_prop_na_omit, subset = fdr<0.05) #determine number of significant genes 112 | 113 | 114 | ############################################################################################ 115 | ### For datasets of >30,000 cells per group, replace above formatting with the following ### 116 | ############################################################################################ 117 | 118 | Oligo <- readRDS('/gpfs/fs1/data/chiba-faleklab/Parallel_ADRC_RNA_final/objects/Oligo_for_DE.rds') 119 | 120 | DefaultAssay(Oligo) <- "RNA" 121 | Oligo 122 | 123 | #Remove genes that are not expressed 124 | counts <- Oligo@assays$RNA@counts 125 | nrow(counts) #36588 126 | genes.use <- rowSums(counts) > 0 127 | genes.use <- as.data.frame(genes.use) 128 | head(genes.use) 129 | genes.use <- subset(genes.use, genes.use=='TRUE') 130 | genes.use <- as.vector(rownames(genes.use)) 131 | head(genes.use) 132 | length(genes.use) #31720 133 | 134 | Oligo <- subset(Oligo, features=genes.use) 135 | counts.subset <-Oligo@assays$RNA@counts 136 | nrow(counts.subset) #31720 137 | 138 | #split matrix into submatrices, format, log transform, then re-merge 139 | Oligo1 <- Oligo[,1:45000] 140 | Oligo2 <- Oligo[,45001:83945] 141 | 142 | allgenes_JG <- t(as.data.frame(Oligo1@assays$RNA@counts)) 143 | allgenes_JG <- as.data.frame(allgenes_JG) 144 | ngenes <- ncol(allgenes_JG) 145 | allgenes_JG$orig.ident <- Oligo1@meta.data$orig.ident 146 | allgenes_JG$sampID <- Oligo1@meta.data$sampID 147 | allgenes_JG$Sex <- Oligo1@meta.data$Sex 148 | allgenes_JG$age <- Oligo1@meta.data$age 149 | allgenes_JG$PMI <- Oligo1@meta.data$PMI 150 | allgenes_JG$seq.sat <- Oligo1@meta.data$seq.sat 151 | allgenes_JG$wellKey <- paste(Oligo1@meta.data$sampID, rownames(Oligo1@meta.data), sep = "_") 152 | rownames(allgenes_JG) <- allgenes_JG$wellKey 153 | allgenes_JG$cell.type.number.proportion <- Oligo1@meta.data$cell.type.number.proportion 154 | allgenes_JG <- allgenes_JG[,c(ngenes+7,ngenes+2,ngenes+1,ngenes+3,ngenes+4,ngenes+5,ngenes+6,ngenes+8,1:ngenes)] 155 | genecounts_JG <- as.matrix(t(allgenes_JG[,c(-1,-2,-3,-4,-5,-6,-7,-8)])) 156 | genecounts_JG <- log2(genecounts_JG + 1) #log2 transform 157 | nrow(genecounts_JG) #31720 genes 158 | 159 | allgenes_JG2 <- t(as.data.frame(Oligo2@assays$RNA@counts)) 160 | allgenes_JG2 <- as.data.frame(allgenes_JG2) 161 | ngenes <- ncol(allgenes_JG2) 162 | allgenes_JG2$orig.ident <- Oligo2@meta.data$orig.ident 163 | allgenes_JG2$sampID <- Oligo2@meta.data$sampID 164 | allgenes_JG2$Sex <- Oligo2@meta.data$Sex 165 | allgenes_JG2$age <- Oligo2@meta.data$age 166 | allgenes_JG2$PMI <- Oligo2@meta.data$PMI 167 | allgenes_JG2$seq.sat <- Oligo2@meta.data$seq.sat 168 | allgenes_JG2$wellKey <- paste(Oligo2@meta.data$sampID, rownames(Oligo2@meta.data), sep = "_") 169 | rownames(allgenes_JG2) <- allgenes_JG2$wellKey 170 | allgenes_JG2$cell.type.number.proportion <- Oligo2@meta.data$cell.type.number.proportion 171 | allgenes_JG2 <- allgenes_JG2[,c(ngenes+7,ngenes+2,ngenes+1,ngenes+3,ngenes+4,ngenes+5,ngenes+6,ngenes+8,1:ngenes)] 172 | genecounts_JG2 <- as.matrix(t(allgenes_JG2[,c(-1,-2,-3,-4,-5,-6,-7,-8)])) 173 | genecounts_JG2 <- log2(genecounts_JG2 + 1) #log2 transform 174 | nrow(genecounts_JG2) #31720 genes 175 | 176 | genecounts_JG <- cbind(genecounts_JG, genecounts_JG2) 177 | head(genecounts_JG) 178 | length(rownames(genecounts_JG)) #31720 179 | length(colnames(genecounts_JG)) #83945 180 | 181 | allgenes_JG <- bind_rows(allgenes_JG, allgenes_JG2) 182 | head(allgenes_JG) 183 | length(rownames(allgenes_JG)) #83945 184 | length(colnames(genecounts_JG)) #31728 185 | 186 | coldata_JG <- allgenes_JG[,1:8] 187 | coldata_JG$orig.ident <- as.factor(coldata_JG$orig.ident) 188 | coldata_JG$Sex <- as.factor(coldata_JG$Sex) 189 | ``` 190 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # snRNA-seq-workflow 2 | 3 | This is the R code used to process and analyze single-nuclei (sn)RNA-seq data (10X Genomics) from 24 human post-mortem brain tissue samples derived from temporal cortex. In this pipeline, an annotated reference dataset from [human motor cortex (M1)](https://www.biorxiv.org/content/10.1101/2020.03.31.016972v2.full) is used to identify cell types in our query dataset using a label transfer method. The following steps were performed: 4 | 5 | 1. Process the M1 reference dataset 6 | 2. Generate 24 Seurat objects and perform QC filtering 7 | 3. Merge the 24 objects and remove mitochondrial genes 8 | 4. Perform SCT normalization and integrate the datasets 9 | 5. Use the processed M1 reference to annotate cell types 10 | 6. Filter out nuclei with poor annotation scores 11 | 7. Perform dimensional reduction (PCA & UMAP) 12 | 8. Manually annotate clusters by majority cell type 13 | 9. Conduct differential expression analyses 14 | 15 | The method used to identify 'hybrid' nuclei in step 6 is taken from [Grubman et al, 2019](https://www.nature.com/articles/s41593-019-0539-4). Nuclei were filtered out if the difference between the first and second highest cell type scores were within 65% of the highest cell type score. 16 | 17 | For differential expression analyses in step 9, we used MAST directly with a random effect for sample as in [Zimmerman et al., 2021](https://www.nature.com/articles/s41467-021-21038-1). This is done for 1 example cluster of microglia. Different formatting steps should be used for datasets with >30,000 cells per group, and an example of this is also shown. 18 | 19 | Mitochondrial (MT) gene expression in snRNA-seq data was treated as background signal. Thus, after using % MT as a tool to filter out nuclei with high background, these genes were removed. 20 | 21 | Upon merging the objects, a strange error occured in which the sample ID for one of the samples replaced another in the metadata. This is fixed in step 6. 22 | 23 | --------------------------------------------------------------------------------