├── 00_pkgs_docs_shared.R ├── 01_pbmc3k_duided_tutorial.R ├── 02_seurat_multimodal.R ├── 03_seurat_integration.R ├── 04_seurat_mapping.R ├── 05_seurat_integrate_tips.R ├── 05_seurat_rpca.R ├── 0X_cisTarget_dnload.R ├── README.md ├── advancedSingleCell.Rproj ├── git教程.md └── src ├── 00_pkgs_docs_shared.R ├── 01_mergeSample.R ├── 02.1_cisTarget_dwonload.R └── 02_senic.R /00_pkgs_docs_shared.R: -------------------------------------------------------------------------------- 1 | suppressMessages(library(tidyverse)) 2 | suppressMessages(library(pacman)) 3 | suppressMessages(library(data.table)) 4 | 5 | wkPath <- c('./result', './processData') 6 | for(i in wkPath){ 7 | wkPathi = i 8 | # wkPathi = paste0(sectionName, '/', i) 9 | #每一个子项目都含plot、result、input 10 | if (!dir.exists(wkPathi)) dir.create(wkPathi, recursive=T) 11 | } 12 | rm(list=c('i', 'wkPathi', 'wkPath')) 13 | 14 | # install.packages('umap') 15 | # BiocManager::install("glmGamPoi") 16 | # remotes::install_github('satijalab/seurat-data') 17 | -------------------------------------------------------------------------------- /01_pbmc3k_duided_tutorial.R: -------------------------------------------------------------------------------- 1 | suppressMessages(library(tidyverse)) 2 | suppressMessages(library(pacman)) 3 | suppressMessages(library(data.table)) 4 | suppressMessages(library(Seurat)) 5 | suppressMessages(library(patchwork)) 6 | options(stringsAsFactors = F) 7 | rm(list = ls()) 8 | 9 | inDir = '~/BioFiles/pbmc3k/' 10 | # Load the PBMC dataset 11 | inFile = paste0(inDir, 'filtered_gene_bc_matrices/hg19/') 12 | pbmc.data <- Read10X(data.dir = inFile) 13 | # Initialize the Seurat object with the raw (non-normalized data). 14 | pbmc <- CreateSeuratObject(counts = pbmc.data, project = "pbmc3k", min.cells = 3, min.features = 200) 15 | pbmc 16 | # Lets examine a few genes in the first thirty cells 17 | pbmc.data[c("CD3D", "TCL1A", "MS4A1"), 1:30] 18 | 19 | #-----标准预处理工作流程----------- 20 | #下面的步骤包含了 Seurat 的 scRNA-seq 数据的标准预处理流程。 21 | #这些代表了基于 QC 指标的单元的选择和筛选、数据规范化和缩放以及高度可变特征的检测。 22 | 23 | #-----------QC和选择细胞----------- 24 | # The [[ operator can add columns to object metadata. This is a great place to stash QC stats 25 | pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-") 26 | pbmc[["percent.ribo"]] <- PercentageFeatureSet(pbmc, pattern = "^RP[SL]") 27 | # Visualize QC metrics as a violin plot 28 | VlnPlot(pbmc, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) 29 | plot1 <- FeatureScatter(pbmc, feature1 = "nFeature_RNA", feature2 = "percent.mt") 30 | plot2 <- FeatureScatter(pbmc, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 31 | FeatureScatter(pbmc, feature1 = "nFeature_RNA", feature2 = "percent.ribo") 32 | plot1 + plot2 33 | pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5) 34 | 35 | #----------规范化数据---------- 36 | #从数据集中删除不需要的单元格后,下一步是规范化数据。默认情况下, 37 | #我们使用一个全局缩放标准化方法“ LogNormalize”, 38 | #该方法通过总表达式对每个单元格的特征表达式度量值进行标准化, 39 | #将其乘以一个比例因子(默认为10,000) ,并对结果进行 log-transforms。 40 | #规范化值存储在 pbmc[["RNA"]]@data。 41 | pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000) 42 | #---------特征选择---------- 43 | pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000) 44 | # Identify the 10 most highly variable genes 45 | top10 <- head(VariableFeatures(pbmc), 10) 46 | # plot variable features with and without labels 47 | plot1 <- VariableFeaturePlot(pbmc) 48 | plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE) 49 | plot1 + plot2 50 | 51 | #-----------归一化数据-------- 52 | all.genes <- rownames(pbmc) 53 | pbmc <- ScaleData(pbmc, features = all.genes) 54 | # pbmc <- SCTransform(pbmc,variable.features.n = 3000, vars.to.regress = "percent.mt") 55 | pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc)) 56 | # Examine and visualize PCA results a few different ways 57 | print(pbmc[["pca"]], dims = 1:5, nfeatures = 5) 58 | VizDimLoadings(pbmc, dims = 1:2, reduction = "pca") 59 | DimPlot(pbmc, reduction = "pca") 60 | DimHeatmap(pbmc, dims = 1, cells = 500, balanced = TRUE) 61 | 62 | #--确定数据集的维数------ 63 | pbmc <- JackStraw(pbmc, num.replicate = 100) 64 | pbmc <- ScoreJackStraw(pbmc, dims = 1:20) 65 | JackStrawPlot(pbmc, dims = 1:15) 66 | ElbowPlot(pbmc) 67 | 68 | #---------细胞聚集--------- 69 | pbmc <- FindNeighbors(pbmc, dims = 1:10) 70 | pbmc <- FindClusters(pbmc, resolution = 0.5) 71 | 72 | #---------非线性降维---------- 73 | pbmc <- RunUMAP(pbmc, dims = 1:10) 74 | set.seed(123) 75 | DimPlot(pbmc, reduction = "umap") 76 | saveRDS(pbmc, file = "./processData/01_pbmc_tutorial.rds") 77 | 78 | #-------聚类生物标志物-------- 79 | # find all markers of cluster 1 80 | cluster1.markers <- FindMarkers(pbmc, ident.1 = 2, min.pct = 0.25) 81 | head(cluster1.markers, n = 5) 82 | # find all markers distinguishing cluster 5 from clusters 0 and 3 83 | cluster5.markers <- FindMarkers(pbmc, ident.1 = 5, ident.2 = c(0, 3), min.pct = 0.25) 84 | head(cluster5.markers, n = 5) 85 | # find markers for every cluster compared to all remaining cells, report only the positive ones 86 | pbmc.markers <- FindAllMarkers(pbmc, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25) 87 | pbmc.markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_log2FC) 88 | 89 | # 修拉有几个差异表达式的测试,可以通过 test.use 参数设置(详见我们的 DE vignette)。 90 | #例如,ROC 测试返回任何单个标记的“分类能力”(范围从0-random 到1-perfect)。 91 | cluster1.markers <- FindMarkers(pbmc, ident.1 = 0, logfc.threshold = 0.25, test.use = "roc", only.pos = TRUE) 92 | VlnPlot(pbmc, features = c("MS4A1", "CD79A")) 93 | # you can plot raw counts as well 94 | VlnPlot(pbmc, features = c("NKG7", "PF4"), slot = "counts", log = TRUE) 95 | FeaturePlot(pbmc, features = c("MS4A1", "GNLY", "CD3E", "CD14", "FCER1A", "FCGR3A", "LYZ", "PPBP", 96 | "CD8A")) 97 | top10 <- pbmc.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) 98 | DoHeatmap(pbmc, features = top10$gene) + NoLegend() 99 | 100 | FeaturePlot(pbmc, features = c("FCGR3A", "MS4A7", "FCER1A", "CST3", "PPBP")) 101 | #------------cell type identity------------- 102 | new.cluster.ids <- c("Naive CD4 T", "CD14+ Mono", "Memory CD4 T", "B", "CD8 T", "FCGR3A+ Mono", 103 | "NK", "DC", "Platelet") 104 | names(new.cluster.ids) <- levels(pbmc) 105 | pbmc <- RenameIdents(pbmc, new.cluster.ids) 106 | DimPlot(pbmc, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend() 107 | 108 | saveRDS(pbmc, file = "./processData/01_pbmc3k_final.rds") -------------------------------------------------------------------------------- /02_seurat_multimodal.R: -------------------------------------------------------------------------------- 1 | suppressMessages(library(tidyverse)) 2 | suppressMessages(library(pacman)) 3 | suppressMessages(library(data.table)) 4 | suppressMessages(library(Seurat)) 5 | suppressMessages(library(patchwork)) 6 | options(stringsAsFactors = F) 7 | rm(list = ls()) 8 | # Load in the RNA UMI matrix 9 | 10 | # Note that this dataset also contains ~5% of mouse cells, which we can use as negative controls 11 | # for the protein measurements. For this reason, the gene expression matrix has HUMAN_ or MOUSE_ 12 | # appended to the beginning of each gene. 13 | inFile = "~/BioFiles/GSE100866_CBMC/GSE100866_CBMC_8K_13AB_10X-RNA_umi.csv.gz" 14 | cbmc.rna <- as.sparse(read.csv(file = inFile, sep = ",", 15 | header = TRUE, row.names = 1)) 16 | # To make life a bit easier going forward, we're going to discard all but the top 100 most 17 | # highly expressed mouse genes, and remove the 'HUMAN_' from the CITE-seq prefix 18 | cbmc.rna <- CollapseSpeciesExpressionMatrix(cbmc.rna) 19 | # Load in the ADT UMI matrix 20 | inFile = "~/BioFiles/GSE100866_CBMC/GSE100866_CBMC_8K_13AB_10X-ADT_umi.csv.gz" 21 | cbmc.adt <- as.sparse(read.csv(file = inFile, sep = ",", 22 | header = TRUE, row.names = 1)) 23 | # Note that since measurements were made in the same cells, the two matrices have identical 24 | # column names 25 | all.equal(colnames(cbmc.rna), colnames(cbmc.adt)) 26 | 27 | #---------Seurat: 添加 RNA 和蛋白质数据-------- 28 | # creates a Seurat object based on the scRNA-seq data 29 | cbmc <- CreateSeuratObject(counts = cbmc.rna) 30 | # We can see that by default, the cbmc object contains an assay storing RNA measurement 31 | Assays(cbmc) 32 | # create a new assay to store ADT information 33 | adt_assay <- CreateAssayObject(counts = cbmc.adt) 34 | # add this assay to the previously created Seurat object 35 | cbmc[["ADT"]] <- adt_assay 36 | Assays(cbmc) 37 | # Extract a list of features measured in the ADT assay 38 | rownames(cbmc[["ADT"]]) 39 | # List the current default assay 40 | DefaultAssay(cbmc) 41 | # Switch the default to ADT 42 | DefaultAssay(cbmc) <- "ADT" 43 | DefaultAssay(cbmc) 44 | #-------------Cluster cell----------------- 45 | # Note that all operations below are performed on the RNA assay Set and verify that the default 46 | # assay is RNA 47 | DefaultAssay(cbmc) <- "RNA" 48 | DefaultAssay(cbmc) 49 | # perform visualization and clustering steps 50 | cbmc <- NormalizeData(cbmc) 51 | cbmc <- FindVariableFeatures(cbmc) 52 | cbmc <- ScaleData(cbmc) 53 | cbmc <- RunPCA(cbmc, verbose = FALSE) 54 | cbmc <- FindNeighbors(cbmc, dims = 1:30) 55 | cbmc <- FindClusters(cbmc, resolution = 0.8, verbose = FALSE) 56 | cbmc <- RunUMAP(cbmc, dims = 1:30) 57 | DimPlot(cbmc, label = TRUE) 58 | 59 | #---------------并排查看的多种模式--------- 60 | # Normalize ADT data, 61 | DefaultAssay(cbmc) <- "ADT" 62 | cbmc <- NormalizeData(cbmc, normalization.method = "CLR", margin = 2) 63 | DefaultAssay(cbmc) <- "RNA" 64 | 65 | # Note that the following command is an alternative but returns the same result 66 | cbmc <- NormalizeData(cbmc, normalization.method = "CLR", margin = 2, assay = "ADT") 67 | 68 | # Now, we will visualize CD14 levels for RNA and protein By setting the default assay, we can 69 | # visualize one or the other 70 | DefaultAssay(cbmc) <- "ADT" 71 | p1 <- FeaturePlot(cbmc, "CD19", cols = c("lightgrey", "darkgreen")) + ggtitle("CD19 protein") 72 | DefaultAssay(cbmc) <- "RNA" 73 | p2 <- FeaturePlot(cbmc, "CD19") + ggtitle("CD19 RNA") 74 | # place plots side-by-side 75 | p1 | p2 76 | # for the RNA and protein assays 77 | Key(cbmc[["RNA"]]) 78 | Key(cbmc[["ADT"]]) 79 | # Now, we can include the key in the feature name, which overrides the default assay 80 | p1 <- FeaturePlot(cbmc, "adt_CD19", cols = c("lightgrey", "darkgreen")) + ggtitle("CD19 protein") 81 | p2 <- FeaturePlot(cbmc, "rna_CD19") + ggtitle("CD19 RNA") 82 | p1 | p2 83 | 84 | #-----------识别细胞表面标记----------- 85 | # surface 86 | VlnPlot(cbmc, "adt_CD19") 87 | # we can also identify alternative protein and RNA markers for this cluster through differential 88 | # expression 89 | adt_markers <- FindMarkers(cbmc, ident.1 = 5, assay = "ADT") 90 | rna_markers <- FindMarkers(cbmc, ident.1 = 5, assay = "RNA") 91 | head(adt_markers) 92 | head(rna_markers) 93 | #-----------更多可视化--------- 94 | # Draw ADT scatter plots (like biaxial plots for FACS). Note that you can even 'gate' cells if 95 | # desired by using HoverLocator and FeatureLocator 96 | FeatureScatter(cbmc, feature1 = "adt_CD19", feature2 = "adt_CD3") 97 | # view relationship between protein and RNA 98 | FeatureScatter(cbmc, feature1 = "adt_CD3", feature2 = "rna_CD3E") 99 | FeatureScatter(cbmc, feature1 = "adt_CD4", feature2 = "adt_CD8") 100 | # number in cells, which significantly reduces 'drop-out' in ADT data 101 | FeatureScatter(cbmc, feature1 = "adt_CD4", feature2 = "adt_CD8", slot = "counts") 102 | #----------------10X 多模态数据--------------- 103 | pbmc10k.data <- Read10X(data.dir = "../data/pbmc10k/filtered_feature_bc_matrix/") 104 | rownames(x = pbmc10k.data[["Antibody Capture"]]) <- gsub(pattern = "_[control_]*TotalSeqB", replacement = "", 105 | x = rownames(x = pbmc10k.data[["Antibody Capture"]])) 106 | 107 | pbmc10k <- CreateSeuratObject(counts = pbmc10k.data[["Gene Expression"]], min.cells = 3, min.features = 200) 108 | pbmc10k <- NormalizeData(pbmc10k) 109 | pbmc10k[["ADT"]] <- CreateAssayObject(pbmc10k.data[["Antibody Capture"]][, colnames(x = pbmc10k)]) 110 | pbmc10k <- NormalizeData(pbmc10k, assay = "ADT", normalization.method = "CLR") 111 | 112 | plot1 <- FeatureScatter(pbmc10k, feature1 = "adt_CD19", feature2 = "adt_CD3", pt.size = 1) 113 | plot2 <- FeatureScatter(pbmc10k, feature1 = "adt_CD4", feature2 = "adt_CD8a", pt.size = 1) 114 | plot3 <- FeatureScatter(pbmc10k, feature1 = "adt_CD3", feature2 = "CD3E", pt.size = 1) 115 | (plot1 + plot2 + plot3) & NoLegend() -------------------------------------------------------------------------------- /03_seurat_integration.R: -------------------------------------------------------------------------------- 1 | suppressMessages(library(tidyverse)) 2 | suppressMessages(library(pacman)) 3 | suppressMessages(library(data.table)) 4 | suppressMessages(library(Seurat)) 5 | suppressMessages(library(SeuratData)) 6 | suppressMessages(library(SeuratWrappers)) 7 | suppressMessages(library(patchwork)) 8 | options(stringsAsFactors = F) 9 | rm(list = ls()) 10 | 11 | # load dataset 12 | LoadData("ifnb") 13 | # split the dataset into a list of two seurat objects (stim and CTRL) 14 | ifnb.list <- SplitObject(ifnb, split.by = "stim") 15 | # normalize and identify variable features for each dataset independently 16 | ifnb.list <- lapply(X = ifnb.list, FUN = function(x) { 17 | x <- NormalizeData(x) 18 | x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000) 19 | }) 20 | # select features that are repeatedly variable across datasets for integration 21 | features <- SelectIntegrationFeatures(object.list = ifnb.list) 22 | 23 | # 整合 24 | immune.anchors <- FindIntegrationAnchors(object.list = ifnb.list, anchor.features = features) 25 | # this command creates an 'integrated' data assay 26 | immune.combined <- IntegrateData(anchorset = immune.anchors) 27 | # 综合分析 28 | # specify that we will perform downstream analysis on the corrected data note that the original 29 | # unmodified data still resides in the 'RNA' assay 30 | DefaultAssay(immune.combined) <- "integrated" 31 | 32 | # Run the standard workflow for visualization and clustering 33 | set.seed(457865) 34 | immune.combined <- ScaleData(immune.combined, verbose = FALSE) 35 | immune.combined <- RunPCA(immune.combined, npcs = 30, verbose = FALSE) 36 | immune.combined <- RunUMAP(immune.combined, reduction = "pca", dims = 1:30) 37 | immune.combined <- FindNeighbors(immune.combined, reduction = "pca", dims = 1:30) 38 | immune.combined <- FindClusters(immune.combined, resolution = 0.5) 39 | # Visualization 40 | p1 <- DimPlot(immune.combined, reduction = "umap", group.by = "stim") 41 | p2 <- DimPlot(immune.combined, reduction = "umap", label = TRUE, repel = TRUE) 42 | p1 + p2 43 | DimPlot(immune.combined, reduction = "umap", split.by = "stim") 44 | 45 | # 确定保守的细胞类型标记-------------- 46 | # For performing differential expression after integration, we switch back to the original data 47 | DefaultAssay(immune.combined) <- "RNA" 48 | nk.markers <- FindConservedMarkers(immune.combined, ident.1 = 6, grouping.var = "stim", verbose = FALSE) 49 | head(nk.markers) 50 | 51 | 52 | FeaturePlot(immune.combined, features = c("CD3D", "SELL", "CREM", "CD8A", "GNLY", "CD79A", "FCGR3A", 53 | "CCL2", "PPBP"), min.cutoff = "q9") 54 | 55 | immune.combined <- RenameIdents(immune.combined, `0` = "CD14 Mono", `1` = "CD4 Naive T", `2` = "CD4 Memory T", 56 | `3` = "CD16 Mono", `4` = "B", `5` = "CD8 T", `6` = "NK", `7` = "T activated", `8` = "DC", `9` = "B Activated", 57 | `10` = "Mk", `11` = "pDC", `12` = "Eryth", `13` = "Mono/Mk Doublets", `14` = "HSPC") 58 | DimPlot(immune.combined, label = TRUE) 59 | 60 | Idents(immune.combined) <- factor(Idents(immune.combined), levels = c("HSPC", "Mono/Mk Doublets", 61 | "pDC", "Eryth", "Mk", "DC", "CD14 Mono", "CD16 Mono", "B Activated", "B", "CD8 T", "NK", "T activated", 62 | "CD4 Naive T", "CD4 Memory T")) 63 | markers.to.plot <- c("CD3D", "CREM", "HSPH1", "SELL", "GIMAP5", "CACYBP", "GNLY", "NKG7", "CCL5", 64 | "CD8A", "MS4A1", "CD79A", "MIR155HG", "NME1", "FCGR3A", "VMO1", "CCL2", "S100A9", "HLA-DQA1", 65 | "GPR183", "PPBP", "GNG11", "HBA2", "HBB", "TSPAN13", "IL3RA", "IGJ", "PRSS57") 66 | DotPlot(immune.combined, features = markers.to.plot, cols = c("blue", "red"), dot.scale = 8, split.by = "stim") + 67 | RotatedAxis() 68 | # 识别不同条件下的差异表达基因------------------ 69 | library(ggplot2) 70 | library(cowplot) 71 | theme_set(theme_cowplot()) 72 | t.cells <- subset(immune.combined, idents = "CD4 Naive T") 73 | Idents(t.cells) <- "stim" 74 | avg.t.cells <- as.data.frame(log1p(AverageExpression(t.cells, verbose = FALSE)$RNA)) 75 | avg.t.cells$gene <- rownames(avg.t.cells) 76 | 77 | cd14.mono <- subset(immune.combined, idents = "CD14 Mono") 78 | Idents(cd14.mono) <- "stim" 79 | avg.cd14.mono <- as.data.frame(log1p(AverageExpression(cd14.mono, verbose = FALSE)$RNA)) 80 | avg.cd14.mono$gene <- rownames(avg.cd14.mono) 81 | 82 | genes.to.label = c("ISG15", "LY6E", "IFI6", "ISG20", "MX1", "IFIT2", "IFIT1", "CXCL10", "CCL8") 83 | p1 <- ggplot(avg.t.cells, aes(CTRL, STIM)) + geom_point() + ggtitle("CD4 Naive T Cells") 84 | p1 <- LabelPoints(plot = p1, points = genes.to.label, repel = TRUE) 85 | p2 <- ggplot(avg.cd14.mono, aes(CTRL, STIM)) + geom_point() + ggtitle("CD14 Monocytes") 86 | p2 <- LabelPoints(plot = p2, points = genes.to.label, repel = TRUE) 87 | p1 + p2 88 | 89 | 90 | 91 | immune.combined$celltype.stim <- paste(Idents(immune.combined), immune.combined$stim, sep = "_") 92 | immune.combined$celltype <- Idents(immune.combined) 93 | Idents(immune.combined) <- "celltype.stim" 94 | b.interferon.response <- FindMarkers(immune.combined, ident.1 = "B_STIM", ident.2 = "B_CTRL", verbose = FALSE) 95 | head(b.interferon.response, n = 15) 96 | FeaturePlot(immune.combined, features = c("CD3D", "GNLY", "IFI6"), 97 | split.by = "stim", max.cutoff = 3,cols = c("grey", "red")) 98 | plots <- VlnPlot(immune.combined, features = c("LYZ", "ISG15", "CXCL10"), split.by = "stim", group.by = "celltype", 99 | pt.size = 0, combine = FALSE) 100 | wrap_plots(plots = plots, ncol = 1) 101 | 102 | # 使用 SCTransform 对数据集进行规范化集成-------- 103 | LoadData("ifnb") 104 | ifnb.list <- SplitObject(ifnb, split.by = "stim") 105 | ifnb.list <- lapply(X = ifnb.list, FUN = SCTransform) 106 | features <- SelectIntegrationFeatures(object.list = ifnb.list, nfeatures = 3000) 107 | ifnb.list <- PrepSCTIntegration(object.list = ifnb.list, anchor.features = features) 108 | immune.anchors <- FindIntegrationAnchors(object.list = ifnb.list, normalization.method = "SCT", 109 | anchor.features = features) 110 | immune.combined.sct <- IntegrateData(anchorset = immune.anchors, normalization.method = "SCT") 111 | set.seed(123) 112 | immune.combined.sct <- RunPCA(immune.combined.sct, verbose = FALSE) 113 | immune.combined.sct <- RunUMAP(immune.combined.sct, reduction = "pca", dims = 1:30) 114 | p1 <- DimPlot(immune.combined.sct, reduction = "umap", group.by = "stim") 115 | p2 <- DimPlot(immune.combined.sct, reduction = "umap", group.by = "seurat_annotations", label = TRUE, 116 | repel = TRUE) 117 | p1 + p2 118 | 119 | immune.combined.sct <- FindNeighbors(immune.combined.sct, reduction = "pca", dims = 1:30) 120 | immune.combined.sct <- FindClusters(immune.combined.sct, resolution = 0.5) 121 | # Visualization 122 | p1 <- DimPlot(immune.combined.sct, reduction = "umap", group.by = "stim") 123 | p2 <- DimPlot(immune.combined.sct, reduction = "seurat_annotations", label = TRUE, repel = TRUE) 124 | p1 + p2 125 | 126 | DefaultAssay(immune.combined.sct) <- "RNA" 127 | nk.markers <- FindConservedMarkers(immune.combined.sct, ident.1 = 6, grouping.var = "stim", verbose = FALSE) 128 | head(nk.markers) 129 | FeaturePlot(immune.combined.sct, features = c('FCGR3A', 'LST1'), min.cutoff = "q9") 130 | 131 | 132 | immune.combined.sct <- RenameIdents(immune.combined.sct, `0` = "CD14 Mono", `1` = "CD4 Naive T", `2` = "CD4 Memory T", 133 | `3` = "CD16 Mono", `4` = "B", `5` = "CD8 T", `6` = "NK", `7` = "T activated", `8` = "DC", `9` = "B Activated", 134 | `10` = "Mk", `11` = "pDC", `12` = "Eryth", `13` = "Mono/Mk Doublets", `14` = "HSPC") 135 | DimPlot(immune.combined.sct, label = TRUE) 136 | -------------------------------------------------------------------------------- /04_seurat_mapping.R: -------------------------------------------------------------------------------- 1 | library(Seurat) 2 | library(SeuratData) 3 | library(tidyverse) 4 | library(cowplot) 5 | library(patchwork) 6 | rm(list = ls()) 7 | # InstallData("panc8") 8 | # install.packages("../tmp/panc8.SeuratData_3.0.2.tar.gz", repos = NULL 9 | library(panc8.SeuratData) 10 | data("panc8") 11 | pancreas.list <- SplitObject(panc8, split.by = "tech") 12 | # pancreas.list <- pancreas.list[c("celseq", "celseq2", "fluidigmc1", "smartseq2")] 13 | for (i in 1:length(pancreas.list)) { 14 | pancreas.list[[i]] <- NormalizeData(pancreas.list[[i]], verbose = FALSE) 15 | pancreas.list[[i]] <- FindVariableFeatures(pancreas.list[[i]], selection.method = "vst", nfeatures = 2000, 16 | verbose = FALSE) 17 | } 18 | 19 | reference.list <- pancreas.list[c("celseq", "celseq2", "smartseq2")] 20 | pancreas.anchors <- FindIntegrationAnchors(object.list = reference.list, dims = 1:30) 21 | pancreas.integrated <- IntegrateData(anchorset = pancreas.anchors, dims = 1:30) 22 | 23 | # switch to integrated assay. The variable features of this assay are automatically set during 24 | # IntegrateData 25 | DefaultAssay(pancreas.integrated) <- "integrated" 26 | # Run the standard workflow for visualization and clustering 27 | pancreas.integrated <- ScaleData(pancreas.integrated, verbose = FALSE) 28 | pancreas.integrated <- RunPCA(pancreas.integrated, npcs = 30, verbose = FALSE) 29 | pancreas.integrated <- RunUMAP(pancreas.integrated, reduction = "pca", dims = 1:30, verbose = FALSE) 30 | p1 <- DimPlot(pancreas.integrated, reduction = "umap", group.by = "tech") 31 | p2 <- DimPlot(pancreas.integrated, reduction = "umap", group.by = "celltype", label = TRUE, repel = TRUE) + 32 | NoLegend() 33 | p1 + p2 34 | 35 | # 细胞分类------------ 36 | pancreas.query <- pancreas.list[["indrop"]] 37 | pancreas.anchors <- FindTransferAnchors(reference = pancreas.integrated, query = pancreas.query, 38 | dims = 1:30) 39 | predictions <- TransferData(anchorset = pancreas.anchors, refdata = pancreas.integrated$celltype, 40 | dims = 1:30) 41 | pancreas.query <- AddMetaData(pancreas.query, metadata = predictions) 42 | pancreas.query$prediction.match <- pancreas.query$predicted.id == pancreas.query$celltype 43 | table(pancreas.query$prediction.match) 44 | table(pancreas.query$predicted.id) 45 | VlnPlot(pancreas.query, c("REG1A", "PPY", "SST", "GHRL", "VWF", "SOX10"), group.by = "predicted.id") 46 | 47 | pancreas.integrated <- RunUMAP(pancreas.integrated, dims = 1:30, reduction = "pca", return.model = TRUE) 48 | pancreas.query <- MapQuery(anchorset = pancreas.anchors, reference = pancreas.integrated, query = pancreas.query, 49 | refdata = list(celltype = "celltype"), reference.reduction = "pca", reduction.model = "umap") 50 | 51 | p1 <- DimPlot(pancreas.integrated, reduction = "umap", group.by = "celltype", label = TRUE, label.size = 3, 52 | repel = TRUE) + NoLegend() + ggtitle("Reference annotations") 53 | p2 <- DimPlot(pancreas.query, reduction = "ref.umap", group.by = "predicted.celltype", label = TRUE, 54 | label.size = 3, repel = TRUE) + NoLegend() + ggtitle("Query transferred labels") 55 | p1 + p2 56 | -------------------------------------------------------------------------------- /05_seurat_integrate_tips.R: -------------------------------------------------------------------------------- 1 | library(Seurat) 2 | 3 | library(loomR) 4 | library(SeuratDisk) 5 | inFile = paste0('~/BioFiles/immuneCellAtlas/', 6 | 'cc95ff89-2e68-4a08-a234-480eca21ce79.homo_sapiens.loom') 7 | 8 | 9 | bm280k.loom <- connect(filename = inFile, mode = "r+") 10 | bm280k.loom 11 | bm280k = as.Seurat(bm280k.loom) 12 | bm280k.loom$close_all() 13 | # bm280k.data <- Read10X_h5("../data/ica_bone_marrow_h5.h5") 14 | # bm280k <- CreateSeuratObject(counts = bm280k.data, min.cells = 100, min.features = 500) 15 | bm280k.list <- SplitObject(bm280k, split.by = "orig.ident") 16 | bm280k.list <- lapply(X = bm280k.list, FUN = function(x) { 17 | x <- NormalizeData(x, verbose = FALSE) 18 | x <- FindVariableFeatures(x, verbose = FALSE) 19 | }) 20 | 21 | features <- SelectIntegrationFeatures(object.list = bm280k.list) 22 | bm280k.list <- lapply(X = bm280k.list, FUN = function(x) { 23 | x <- ScaleData(x, features = features, verbose = FALSE) 24 | x <- RunPCA(x, features = features, verbose = FALSE) 25 | }) 26 | 27 | anchors <- FindIntegrationAnchors(object.list = bm280k.list, reference = c(1, 2), reduction = "rpca", 28 | dims = 1:50) 29 | bm280k.integrated <- IntegrateData(anchorset = anchors, dims = 1:50) 30 | 31 | bm280k.integrated <- ScaleData(bm280k.integrated, verbose = FALSE) 32 | bm280k.integrated <- RunPCA(bm280k.integrated, verbose = FALSE) 33 | bm280k.integrated <- RunUMAP(bm280k.integrated, dims = 1:50) 34 | 35 | DimPlot(bm280k.integrated, group.by = "orig.ident") -------------------------------------------------------------------------------- /05_seurat_rpca.R: -------------------------------------------------------------------------------- 1 | library(SeuratData) 2 | 3 | rm(list = ls()) 4 | library(ifnb.SeuratData) 5 | LoadData("ifnb") 6 | # split the dataset into a list of two seurat objects (stim and CTRL) 7 | ifnb.list <- SplitObject(ifnb, split.by = "stim") 8 | # normalize and identify variable features for each dataset independently 9 | ifnb.list <- lapply(X = ifnb.list, FUN = function(x) { 10 | x <- NormalizeData(x) 11 | x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000) 12 | }) 13 | 14 | # select features that are repeatedly variable across datasets for integration run PCA on each 15 | # dataset using these features 16 | features <- SelectIntegrationFeatures(object.list = ifnb.list) 17 | ifnb.list <- lapply(X = ifnb.list, FUN = function(x) { 18 | x <- ScaleData(x, features = features, verbose = FALSE) 19 | x <- RunPCA(x, features = features, verbose = FALSE) 20 | }) 21 | 22 | # 执行集成----------------- 23 | immune.anchors <- FindIntegrationAnchors(object.list = ifnb.list, anchor.features = features, reduction = "rpca") 24 | # this command creates an 'integrated' data assay 25 | immune.combined <- IntegrateData(anchorset = immune.anchors) 26 | #现在我们可以对所有的细胞进行单一的整合分析了! 27 | 28 | # specify that we will perform downstream analysis on the corrected data note that the original 29 | # unmodified data still resides in the 'RNA' assay 30 | DefaultAssay(immune.combined) <- "integrated" 31 | 32 | # Run the standard workflow for visualization and clustering 33 | immune.combined <- ScaleData(immune.combined, verbose = FALSE) 34 | immune.combined <- RunPCA(immune.combined, npcs = 30, verbose = FALSE) 35 | #--确定数据集的维数------ 36 | immune.combined <- JackStraw(immune.combined, num.replicate = 100) 37 | immune.combined <- ScoreJackStraw(immune.combined, dims = 1:20) 38 | JackStrawPlot(immune.combined, dims = 1:20) 39 | ElbowPlot(immune.combined) 40 | 41 | 42 | immune.combined <- RunUMAP(immune.combined, reduction = "pca", dims = 1:30) 43 | immune.combined <- FindNeighbors(immune.combined, reduction = "pca", dims = 1:30) 44 | immune.combined <- FindClusters(immune.combined, resolution = 0.5) 45 | # Visualization 46 | p1 <- DimPlot(immune.combined, reduction = "umap", group.by = "stim") 47 | p2 <- DimPlot(immune.combined, reduction = "umap", group.by = "seurat_annotations", label = TRUE, 48 | repel = TRUE) 49 | p1 + p2 50 | 51 | # 调整整合的力度----------------- 52 | immune.anchors <- FindIntegrationAnchors(object.list = ifnb.list, anchor.features = features, reduction = "rpca", 53 | k.anchor = 20) 54 | immune.combined <- IntegrateData(anchorset = immune.anchors) 55 | immune.combined <- ScaleData(immune.combined, verbose = FALSE) 56 | immune.combined <- RunPCA(immune.combined, npcs = 30, verbose = FALSE) 57 | immune.combined <- RunUMAP(immune.combined, reduction = "pca", dims = 1:30) 58 | immune.combined <- FindNeighbors(immune.combined, reduction = "pca", dims = 1:30) 59 | immune.combined <- FindClusters(immune.combined, resolution = 0.5) 60 | # Visualization 61 | p1 <- DimPlot(immune.combined, reduction = "umap", group.by = "stim") 62 | p2 <- DimPlot(immune.combined, reduction = "umap", label = TRUE, repel = TRUE) 63 | p1 + p2 64 | 65 | # SCTransform规范化集成---------------------- 66 | ifnb.list <- SplitObject(ifnb, split.by = "stim") 67 | ifnb.list <- lapply(X = ifnb.list, FUN = SCTransform, method = "glmGamPoi") 68 | features <- SelectIntegrationFeatures(object.list = ifnb.list, nfeatures = 3000) 69 | ifnb.list <- PrepSCTIntegration(object.list = ifnb.list, anchor.features = features) 70 | ifnb.list <- lapply(X = ifnb.list, FUN = RunPCA, features = features) 71 | immune.anchors <- FindIntegrationAnchors(object.list = ifnb.list, normalization.method = "SCT", 72 | anchor.features = features, dims = 1:30, reduction = "rpca", k.anchor = 20) 73 | immune.combined.sct <- IntegrateData(anchorset = immune.anchors, normalization.method = "SCT", dims = 1:30) 74 | immune.combined.sct <- RunPCA(immune.combined.sct, verbose = FALSE) 75 | immune.combined.sct <- RunUMAP(immune.combined.sct, reduction = "pca", dims = 1:30) 76 | # Visualization 77 | p1 <- DimPlot(immune.combined.sct, reduction = "umap", group.by = "stim") 78 | p2 <- DimPlot(immune.combined.sct, reduction = "umap", group.by = "seurat_annotations", label = TRUE, 79 | repel = TRUE) 80 | p1 + p2 81 | 82 | -------------------------------------------------------------------------------- /0X_cisTarget_dnload.R: -------------------------------------------------------------------------------- 1 | ##1, For human: 2 | dbFiles1 <- c("https://resources.aertslab.org/cistarget/databases/homo_sapiens/hg38/refseq_r80/mc9nr/gene_based/hg38__refseq-r80__500bp_up_and_100bp_down_tss.mc9nr.feather", 3 | "https://resources.aertslab.org/cistarget/databases/homo_sapiens/hg38/refseq_r80/mc9nr/gene_based/hg38__refseq-r80__10kb_up_and_down_tss.mc9nr.feather") 4 | ##2, For mouse: 5 | dbFiles2 <- c("https://resources.aertslab.org/cistarget/databases/mus_musculus/mm10/refseq_r80/mc9nr/gene_based/mm10__refseq-r80__500bp_up_and_100bp_down_tss.mc9nr.feather", 6 | "https://resources.aertslab.org/cistarget/databases/mus_musculus/mm10/refseq_r80/mc9nr/gene_based/mm10__refseq-r80__10kb_up_and_down_tss.mc9nr.feather") 7 | # mc9nr: Motif collection version 9: 24k motifs 8 | 9 | ##4, download 10 | dir.create("~/database/cisTarget_databases"); #创建一个文件夹保存数据库 11 | setwd("~/database/cisTarget_databases") 12 | #如果3个参考数据库都想下载,每次设置变量dbFiles后,都要运行以下代码 13 | dbFiles = c(dbFiles1, dbFiles2) 14 | for(featherURL in dbFiles){ 15 | download.file(featherURL, destfile=basename(featherURL)) # saved in current dir 16 | } 17 | # mc9nr: Motif collection version 9: 24k motifs -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # advancedSingleCell 2 | 3 | 对单细胞测序的学习,跟着流程走‘CNS图表复现’走 4 | 5 | [jimmy的单细胞课程](https://mp.weixin.qq.com/s?__biz=MzAxMDkxODM1Ng%3D%3D&mid=2247496154&idx=3&sn=d3cfaa4a5b18235e0192619f64641635&scene=45#wechat_redirect) 6 | 7 | ### 高级分析 8 | 9 | - [单细胞转录组高级分析一:多样本合并与批次校正](http://mp.weixin.qq.com/s?__biz=MzI1Njk4ODE0MQ==&mid=2247488375&idx=1&sn=a8c73ea647254baab7125babba027071&chksm=ea1f15f5dd689ce3b7c90dd2aeed140b23b83543e7def6094af98c40de58a2d1b08e15e8d2fe&scene=21#wechat_redirect) 10 | - [单细胞转录组高级分析二:转录调控网络分析](http://mp.weixin.qq.com/s?__biz=MzI1Njk4ODE0MQ==&mid=2247488383&idx=1&sn=7b8504ed4449df3a707d1c83ec0b0a7a&chksm=ea1f15fddd689ceb5edf6635d2c74e9271eac0c30c4d1714403c9057cb3fa187a776e5a4f34b&scene=21#wechat_redirect) 11 | - [单细胞转录组高级分析三:细胞通讯分析](http://mp.weixin.qq.com/s?__biz=MzI1Njk4ODE0MQ==&mid=2247488392&idx=1&sn=e0aa3d50eb0b1f3251f1ae7cf62c9616&chksm=ea1f150add689c1c0c75f6b1e1e6bf4d3e1faaf230b6b2ef4466d8530f08958bd6196849d61d&scene=21#wechat_redirect) 12 | - [单细胞转录组高级分析四:scRNA数据推断CNV](http://mp.weixin.qq.com/s?__biz=MzI1Njk4ODE0MQ==&mid=2247488400&idx=1&sn=2cec23311fe972353dec8cbc24c6efbc&chksm=ea1f1512dd689c04ab0e822eabc96158cfd0d437e8cc8721dde77acf5834ccd3d7a26660f8f0&scene=21#wechat_redirect) 13 | - [单细胞转录组高级分析五:GSEA与GSVA分析](http://mp.weixin.qq.com/s?__biz=MzI1Njk4ODE0MQ==&mid=2247488442&idx=1&sn=cfa26b7e4ee68a6e5a7929a0d5b98595&chksm=ea1f1538dd689c2e2bdfff6bf6956531abd9eee0492efd256f20bcfc877f351646e7e5e74934&scene=21#wechat_redirect) 14 | - [单细胞转录组高级分析六:TCGA生存分析](http://mp.weixin.qq.com/s?__biz=MzI1Njk4ODE0MQ==&mid=2247488450&idx=1&sn=de7beeb1c144dee1197942cbc2cbe9fc&chksm=ea1f1540dd689c569c6d2a93ec7d4dd707c76a7290f2454ca61c7706799c72199bd5e60d892f&scene=21#wechat_redirect) 15 | - [单细胞转录组高级分析七:整合scATAC数据](http://mp.weixin.qq.com/s?__biz=MzI1Njk4ODE0MQ==&mid=2247488458&idx=1&sn=890de4c0c4f1e286406560e97c3bf356&chksm=ea1f1548dd689c5e112760efc79562b78fae3e0982f4c9a10fdffb507d35b7de86277f21e4f9&scene=21#wechat_redirect) 16 | - [单细胞转录组高级分析八:整合V(D)J数据](https://mp.weixin.qq.com/s?__biz=MzI1Njk4ODE0MQ==&mid=2247488467&idx=1&sn=96407b7817a64b270752792b5e775d34&scene=21#wechat_redirect) 17 | 18 | 19 | ## Tips for integrating large datasets 20 | https://satijalab.org/seurat/articles/integration_large_datasets.html 21 | 22 | 数据下载: 23 | https://data.humancellatlas.org/explore/projects/cc95ff89-2e68-4a08-a234-480eca21ce79/m/expression-matrices?catalog=dcp1 -------------------------------------------------------------------------------- /advancedSingleCell.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | -------------------------------------------------------------------------------- /git教程.md: -------------------------------------------------------------------------------- 1 | **代码版本控制是作为程序员,必须要考虑的问题**,针对`Rstudio`可以利用`Github`进行版本控,下面对整个操作过程进行说明: 2 | 3 | ## 设定目录 4 | 5 | 在`windows`系统下,选择`Tools` --> `Global Options`,然后选择`Git/SVN`,选择`Git executable`,所以安装前提是你要有先安装`Git`,如下图所示 6 | 7 | ![image-20210329122736942](https://gitee.com/cystone2020/document/raw/master/image-20210329122736942.png) 8 | 9 | 选择git目录 10 | 11 | 12 | 然后`Create RSA Key` 13 | 14 | ![image-20210329122752182](https://gitee.com/cystone2020/document/raw/master/image-20210329122752182.png) 15 | 16 | ![image-20210329122816918](https://gitee.com/cystone2020/document/raw/master/image-20210329122816918.png) 17 | 18 | ![image-20210329122840750](https://gitee.com/cystone2020/document/raw/master/image-20210329122840750.png) 19 | 20 | 打开`Tools`,选择`shell`,输入命令: 21 | `git config --global user.email "youremail@gmail.com` 22 | `git config --global user.name "yourname"` 23 | `ssh -T git@github.com` 24 | 使用`GitHub`上的名字 25 | 26 | ![image-20210329122854117](https://gitee.com/cystone2020/document/raw/master/image-20210329122854117.png)GitHub连接 27 | 28 | ## 新建一个一个工程 29 | 30 | 新建一个工程,选择`New Directory` 31 | 32 | ![image-20210329122906337](https://gitee.com/cystone2020/document/raw/master/image-20210329122906337.png) 33 | 34 | 新建工程 35 | 36 | 然后勾选`Create a git repository` 37 | 38 | ![image-20210329122936787](https://gitee.com/cystone2020/document/raw/master/image-20210329122936787.png) 39 | 40 | 创建Git 41 | 42 | 这个时候`Rstudio`会出现`git`栏,提交到本地,只需要在`git`栏下面点击`commit`,即可提交至本地 43 | 44 | ![image-20210329122958635](https://gitee.com/cystone2020/document/raw/master/image-20210329122958635.png)GitHub提交 45 | 46 | 47 | 48 | 可以将代码保存至`GitHub`上,并且创建分支,在`GitHub`上创建一个`New respository`,命名为`test` 49 | 50 | ![image-20210329123014956](https://gitee.com/cystone2020/document/raw/master/image-20210329123014956.png) 51 | 52 | GitHUb上创建 53 | 54 | 打开`Rstudio`中的`Shell`窗口,输入`git`命令 55 | 56 | 57 | 58 | ```csharp 59 | git remote rm origin 60 | git remote add origin https://github.com/cystone/advancedSingleCell.git 61 | git config remote.origin.url git@github.com:cystone/advancedSingleCell.git 62 | git pull origin master 63 | git push origin master 64 | ``` 65 | 66 | 将`origin`重新定向 67 | `git remote set-url origin https://github.com/chengfeifan/test.git` 68 | 69 | ## 在本地新建一个`GitHub`上已经存在的项目 70 | 71 | 首先在`Rstudio`上新建一个`project`,选择`version control`,然后选`Clone Git Respository`,将`GitHub`上`repository`的`url`加入到选项中 72 | 73 | ![image-20210329123033548](https://gitee.com/cystone2020/document/raw/master/image-20210329123033548.png) 74 | 75 | Paste_Image.png 76 | 77 | 然后在`shell`窗口输入 78 | `git config remote.origin.url git@github.com:ewenharrison/test.git` 79 | 80 | ## git中设置上游 81 | 82 | 在`git`的时候,我们会建立许多有特性的分支,建立分支的时候,如何使得远端也出现分支,需要用到下面的命令: 83 | 84 | ```bash 85 | git push --set-upstream origin master 86 | ``` 87 | 88 | ## 常见错误和解决办法 89 | 90 | $git push 91 | fatal: unable to access 'https://github.com/cystone/advancedSingleCell.git/': LibreSSL SSL_connect: SSL_ERROR_SYSCALL in connection to github.com:443 92 | 93 | ```bash 94 | git config --global --unset http.proxy 95 | git config --global http.sslVerify false 96 | ``` 97 | 98 | 如果有如下: 99 | Enter passphrase for key '~/.ssh/id_rsa': 100 | 101 | ```bash 102 | ssh-add ~/.ssh/id_rsa 103 | ``` 104 | 105 | 106 | 107 | 108 | -------------------------------------------------------------------------------- /src/00_pkgs_docs_shared.R: -------------------------------------------------------------------------------- 1 | suppressMessages(library(tidyverse)) 2 | suppressMessages(library(pacman)) 3 | suppressMessages(library(data.table)) 4 | 5 | wkPath <- c('./src', './output','./data/raw', './data/processed','./reports') 6 | for(i in wkPath){ 7 | wkPathi = i 8 | # wkPathi = paste0(sectionName, '/', i) 9 | #每一个子项目都含plot、result、input 10 | if (!dir.exists(wkPathi)) dir.create(wkPathi, recursive=T) 11 | } 12 | rm(list=c('i', 'wkPathi', 'wkPath')) 13 | -------------------------------------------------------------------------------- /src/01_mergeSample.R: -------------------------------------------------------------------------------- 1 | library(Seurat) 2 | library(tidyverse) 3 | library(patchwork) 4 | library(data.table) 5 | dir.create('./output/01_mergeSample/cluster1', recursive=T) 6 | dir.create('./output/01_mergeSample/cluster2', recursive=T) 7 | dir.create('./output/01_mergeSample/cluster3', recursive=T) 8 | set.seed(123) #设置随机数种子,使结果可重复 9 | 10 | #----------读入数据---------- 11 | ##使用目录向量合并 12 | dir = c('./data/raw/GSE139324_RAW/GSM4138110', 13 | './data/raw/GSE139324_RAW/GSM4138111', 14 | './data/raw/GSE139324_RAW/GSM4138128', 15 | './data/raw/GSE139324_RAW/GSM4138129', 16 | './data/raw/GSE139324_RAW/GSM4138148', 17 | './data/raw/GSE139324_RAW/GSM4138149', 18 | './data/raw/GSE139324_RAW/GSM4138162', 19 | './data/raw/GSE139324_RAW/GSM4138163', 20 | './data/raw/GSE139324_RAW/GSM4138168', 21 | './data/raw/GSE139324_RAW/GSM4138169') 22 | 23 | for(i in dir){ 24 | ind = str_split(i, '/',simplify=T)[5] 25 | indP = paste0(ind, '.') 26 | if(!dir.exists(i)){dir.create(i, recursive=T)} 27 | filern = list.files('./data/raw/GSE139324_RAW/', pattern=indP,include.dirs = F) 28 | if(length(filern == 3)){ 29 | filname = str_split(filern, '_',simplify=T)[,5] 30 | file.rename(paste0('./data/raw/GSE139324_RAW/', filern), 31 | paste0('./data/raw/GSE139324_RAW/', ind, '/',filname)) 32 | }else(print(paste0('please check ', ind))) 33 | file.rename(paste0('./data/raw/GSE139324_RAW/',ind, '/genes.tsv.gz'), 34 | paste0('./data/raw/GSE139324_RAW/', ind, '/features.tsv.gz')) 35 | 36 | } 37 | 38 | names(dir) = c('HNC01PBMC', 'HNC01TIL', 'HNC10PBMC', 'HNC10TIL', 'HNC20PBMC', 39 | 'HNC20TIL', 'PBMC1', 'PBMC2', 'Tonsil1', 'Tonsil2') 40 | counts <- Read10X(data.dir = dir) 41 | 42 | #-------样本合并------- 43 | scRNA1 = CreateSeuratObject(counts, min.cells=1) 44 | dim(scRNA1) #查看基因数和细胞总数 45 | #[1] 23603 19750 46 | table(scRNA1@meta.data$orig.ident) #查看每个样本的细胞数 47 | #HNC01PBMC HNC01TIL HNC10PBMC HNC10TIL HNC20PBMC HNC20TIL PBMC1 PBMC2 Tonsil1 Tonsil2 48 | # 1725 1298 1750 1384 1530 1148 2445 2436 3325 2709 49 | 50 | #使用merge函数合并seurat对象 51 | scRNAlist <- list() 52 | #以下代码会把每个样本的数据创建一个seurat对象,并存放到列表scRNAlist里 53 | for(i in 1:length(dir)){ 54 | counts <- Read10X(data.dir = dir[i]) 55 | scRNAlist[[i]] <- CreateSeuratObject(counts, min.cells=1) 56 | } 57 | #使用merge函数讲10个seurat对象合并成一个seurat对象 58 | scRNA2 <- merge(scRNAlist[[1]], 59 | y=c(scRNAlist[[2]], scRNAlist[[3]], scRNAlist[[4]], 60 | scRNAlist[[5]], scRNAlist[[6]], scRNAlist[[7]], 61 | scRNAlist[[8]], scRNAlist[[9]], scRNAlist[[10]])) 62 | #dim(scRNA2) 63 | # [1] 23603 19750 64 | table(scRNA2@meta.data$orig.ident) 65 | #HNC01PBMC HNC01TIL HNC10PBMC HNC10TIL HNC20PBMC HNC20TIL PBMC1 PBMC2 Tonsil1 Tonsil2 66 | # 1725 1298 1750 1384 1530 1148 2445 2436 3325 2709 67 | 68 | #计算主成分 69 | { 70 | scRNA1 <- NormalizeData(scRNA1) 71 | scRNA1 <- FindVariableFeatures(scRNA1, selection.method = "vst") 72 | scRNA1 <- ScaleData(scRNA1, features = VariableFeatures(scRNA1)) 73 | scRNA1 <- RunPCA(scRNA1, features = VariableFeatures(scRNA1)) 74 | plot1 <- DimPlot(scRNA1, reduction = "pca", group.by="orig.ident") 75 | plot2 <- ElbowPlot(scRNA1, ndims=30, reduction="pca") 76 | plotc <- plot1+plot2 77 | ggsave("./output/01_mergeSample/cluster1/pca.png", plot = plotc, width = 8, 78 | height = 4) 79 | print(c("请选择哪些pc轴用于后续分析?示例如下:","pc.num=1:15")) 80 | } 81 | 82 | #选取主成分 83 | pc.num=1:30 84 | # 降维和非线性聚类 85 | { 86 | ##细胞聚类 87 | scRNA1 <- FindNeighbors(scRNA1, dims = pc.num) 88 | scRNA1 <- FindClusters(scRNA1, resolution = 0.5) 89 | table(scRNA1@meta.data$seurat_clusters) 90 | metadata <- scRNA1@meta.data 91 | cell_cluster <- data.frame(cell_ID=rownames(metadata), 92 | cluster_ID=metadata$seurat_clusters) 93 | write.csv(cell_cluster,'./output/01_mergeSample/cluster1/cell_cluster.csv', 94 | row.names = F) 95 | 96 | ##非线性降维 97 | #tSNE 98 | scRNA1 = RunTSNE(scRNA1, dims = pc.num) 99 | embed_tsne <- Embeddings(scRNA1, 'tsne') #提取tsne图坐标 100 | write.csv(embed_tsne,'./output/01_mergeSample/cluster1/embed_tsne.csv') 101 | #group_by_cluster 102 | plot1 = DimPlot(scRNA1, reduction = "tsne", label=T) 103 | ggsave("./output/01_mergeSample/cluster1/tSNE.png", plot = plot1, width = 8, 104 | height = 7) 105 | #group_by_sample 106 | plot2 = DimPlot(scRNA1, reduction = "tsne", group.by='orig.ident') 107 | ggsave("./output/01_mergeSample/cluster1/tSNE_sample.png", plot = plot2, 108 | width = 8, height = 7) 109 | #combinate 110 | plotc <- plot1+plot2 111 | ggsave("./output/01_mergeSample/cluster1/tSNE_cluster_sample.png", plot = plotc, 112 | width = 10, height = 5) 113 | 114 | #UMAP 115 | scRNA1 <- RunUMAP(scRNA1, dims = pc.num) 116 | embed_umap <- Embeddings(scRNA1, 'umap') #提取umap图坐标 117 | write.csv(embed_umap,'./output/01_mergeSample/cluster1/embed_umap.csv') 118 | #group_by_cluster 119 | plot3 = DimPlot(scRNA1, reduction = "umap", label=T) 120 | ggsave("./output/01_mergeSample/cluster1/UMAP.png", plot = plot3, width = 8, 121 | height = 7) 122 | #group_by_sample 123 | plot4 = DimPlot(scRNA1, reduction = "umap", group.by='orig.ident') 124 | ggsave("./output/01_mergeSample/cluster1/UMAP.png", plot = plot4, width = 8, 125 | height = 7) 126 | #combinate 127 | plotc <- plot3+plot4 128 | ggsave("./output/01_mergeSample/cluster1/UMAP_cluster_sample.png", plot = plotc, 129 | width = 10, height = 5) 130 | } 131 | 132 | 133 | #合并tSNE与UMAP 134 | plotc <- plot2+plot4+ plot_layout(guides = 'collect') 135 | ggsave("./output/01_mergeSample/cluster1/tSNE_UMAP.png", plot = plotc, 136 | width = 10, height = 5) 137 | 138 | ##scRNA2对象的降维聚类参考scRNA1的代码 139 | #------------数据集合并--------------- 140 | 141 | #scRNAlist是之前代码运行保存好的seurat对象列表,保存了10个样本的独立数据 142 | #数据整合之前要对每个样本的seurat对象进行数据标准化和选择高变基因 143 | for (i in 1:length(scRNAlist)) { 144 | scRNAlist[[i]] <- NormalizeData(scRNAlist[[i]]) 145 | scRNAlist[[i]] <- FindVariableFeatures(scRNAlist[[i]], selection.method = "vst") 146 | } 147 | ##以VariableFeatures为基础寻找锚点,运行时间较长 148 | scRNA.anchors <- FindIntegrationAnchors(object.list = scRNAlist) 149 | ##利用锚点整合数据,运行时间较长 150 | scRNA3 <- IntegrateData(anchorset = scRNA.anchors) 151 | dim(scRNA3) 152 | #[1] 2000 19750 153 | #有没有发现基因数据只有2000个了?这是因为seurat整合数据时只用2000个高变基因。 154 | #降维聚类的代码省略 155 | 156 | #-----------数据质控------------ 157 | scRNA <- scRNA3 #以后的分析使用整合的数据进行 158 | ##meta.data添加信息 159 | dir.create("./output/01_mergeSample/QC", recursive=T) 160 | proj_name <- data.frame(proj_name=rep("demo2",ncol(scRNA))) 161 | rownames(proj_name) <- row.names(scRNA@meta.data) 162 | scRNA <- AddMetaData(scRNA, proj_name) 163 | 164 | ##切换数据集 165 | DefaultAssay(scRNA) <- "RNA" 166 | 167 | ##计算线粒体和红细胞基因比例 168 | scRNA[["percent.mt"]] <- PercentageFeatureSet(scRNA, pattern = "^MT-") 169 | #计算红细胞比例 170 | HB.genes <- c("HBA1","HBA2","HBB","HBD","HBE1","HBG1","HBG2","HBM","HBQ1","HBZ") 171 | HB_m <- match(HB.genes, rownames(scRNA@assays$RNA)) 172 | HB.genes <- rownames(scRNA@assays$RNA)[HB_m] 173 | HB.genes <- HB.genes[!is.na(HB.genes)] 174 | scRNA[["percent.HB"]]<-PercentageFeatureSet(scRNA, features=HB.genes) 175 | #head(scRNA@meta.data) 176 | col.num <- length(levels(as.factor(scRNA@meta.data$orig.ident))) 177 | 178 | ##绘制小提琴图 179 | #所有样本一个小提琴图用group.by="proj_name",每个样本一个小提琴图用group.by="orig.ident" 180 | violin <-VlnPlot(scRNA, group.by = "proj_name", 181 | features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.HB"), 182 | cols =rainbow(col.num), 183 | pt.size = 0.01, #不需要显示点,可以设置pt.size = 0 184 | ncol = 4) + 185 | theme(axis.title.x=element_blank(), axis.text.x=element_blank(), 186 | axis.ticks.x=element_blank()) 187 | ggsave("./output/01_mergeSample/QC/vlnplot_before_qc.pdf", plot = violin, 188 | width = 12, height = 6) 189 | ggsave("./output/01_mergeSample/QC/vlnplot_before_qc.png", plot = violin, 190 | width = 12, height = 6) 191 | plot1 <- FeatureScatter(scRNA, feature1 = "nCount_RNA", feature2 = "percent.mt") 192 | plot2 <- FeatureScatter(scRNA, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 193 | plot3 <- FeatureScatter(scRNA, feature1 = "nCount_RNA", feature2 = "percent.HB") 194 | pearplot <- CombinePlots(plots = list(plot1, plot2, plot3), nrow=1, legend="none") 195 | ggsave("./output/01_mergeSample/QC/pearplot_before_qc.pdf", plot = pearplot, 196 | width = 12, height = 5) 197 | ggsave("./output/01_mergeSample/QC/pearplot_before_qc.png", plot = pearplot, 198 | width = 12, height = 5) 199 | 200 | ##设置质控标准 201 | print(c("请输入允许基因数和核糖体比例,示例如下:", "minGene=500", 202 | "maxGene=4000", "pctMT=20")) 203 | minGene=500 204 | maxGene=3000 205 | pctMT=10 206 | 207 | ##数据质控 208 | scRNA <- subset(scRNA, 209 | subset = nFeature_RNA > minGene & nFeature_RNA < maxGene & 210 | percent.mt < pctMT) 211 | col.num <- length(levels(as.factor(scRNA@meta.data$orig.ident))) 212 | violin <-VlnPlot(scRNA, group.by = "proj_name", 213 | features = c("nFeature_RNA", "nCount_RNA", 214 | "percent.mt","percent.HB"), 215 | cols =rainbow(col.num), 216 | pt.size = 0.1, 217 | ncol = 4) + 218 | theme(axis.title.x=element_blank(), axis.text.x=element_blank(), 219 | axis.ticks.x=element_blank()) 220 | ggsave("./output/01_mergeSample/QC/vlnplot_after_qc.pdf", plot = violin, 221 | width = 12, height = 6) 222 | ggsave("./output/01_mergeSample/QC/vlnplot_after_qc.png", plot = violin, 223 | width = 12, height = 6) 224 | 225 | #-----------------细胞类型鉴定------------------- 226 | ##==鉴定细胞类型==## 227 | library(SingleR) 228 | dir.create("./output/01_mergeSample/CellType") 229 | #计算主成分 230 | { 231 | scRNA <- NormalizeData(scRNA) 232 | scRNA <- FindVariableFeatures(scRNA, selection.method = "vst") 233 | scRNA <- ScaleData(scRNA, features = VariableFeatures(scRNA)) 234 | scRNA <- RunPCA(scRNA, features = VariableFeatures(scRNA)) 235 | plot1 <- DimPlot(scRNA, reduction = "pca", group.by="orig.ident") 236 | plot2 <- ElbowPlot(scRNA, ndims=30, reduction="pca") 237 | plotc <- plot1+plot2 238 | ggsave("./output/01_mergeSample/cluster3/pca.png", plot = plotc, width = 8, 239 | height = 4) 240 | print(c("请选择哪些pc轴用于后续分析?示例如下:","pc.num=1:15")) 241 | } 242 | 243 | #选取主成分 244 | pc.num=1:30 245 | # 降维和非线性聚类 246 | { 247 | ##细胞聚类 248 | scRNA <- FindNeighbors(scRNA, dims = pc.num) 249 | scRNA <- FindClusters(scRNA, resolution = 0.5) 250 | table(scRNA@meta.data$seurat_clusters) 251 | metadata <- scRNA@meta.data 252 | cell_cluster <- data.frame(cell_ID=rownames(metadata), 253 | cluster_ID=metadata$seurat_clusters) 254 | write.csv(cell_cluster,'./output/01_mergeSample/cluster3/cell_cluster.csv', 255 | row.names = F) 256 | 257 | ##非线性降维 258 | #tSNE 259 | scRNA = RunTSNE(scRNA, dims = pc.num) 260 | embed_tsne <- Embeddings(scRNA, 'tsne') #提取tsne图坐标 261 | write.csv(embed_tsne,'./output/01_mergeSample/cluster3/embed_tsne.csv') 262 | #group_by_cluster 263 | plot1 = DimPlot(scRNA, reduction = "tsne", label=T) 264 | ggsave("./output/01_mergeSample/cluster3/tSNE.png", plot = plot1, width = 8, 265 | height = 7) 266 | #group_by_sample 267 | plot2 = DimPlot(scRNA, reduction = "tsne", group.by='orig.ident') 268 | ggsave("./output/01_mergeSample/cluster3/tSNE_sample.png", plot = plot2, 269 | width = 8, height = 7) 270 | #combinate 271 | plotc <- plot1+plot2 272 | ggsave("./output/01_mergeSample/cluster3/tSNE_cluster_sample.png", plot = plotc, 273 | width = 10, height = 5) 274 | 275 | #UMAP 276 | scRNA <- RunUMAP(scRNA, dims = pc.num) 277 | embed_umap <- Embeddings(scRNA1, 'umap') #提取umap图坐标 278 | write.csv(embed_umap,'./output/01_mergeSample/cluster3/embed_umap.csv') 279 | #group_by_cluster 280 | plot3 = DimPlot(scRNA, reduction = "umap", label=T) 281 | ggsave("./output/01_mergeSample/cluster3/UMAP.png", plot = plot3, width = 8, 282 | height = 7) 283 | #group_by_sample 284 | plot4 = DimPlot(scRNA, reduction = "umap", group.by='orig.ident') 285 | ggsave("./output/01_mergeSample/cluster3/UMAP.png", plot = plot4, width = 8, 286 | height = 7) 287 | #combinate 288 | plotc <- plot3+plot4 289 | ggsave("./output/01_mergeSample/cluster3/UMAP_cluster_sample.png", plot = plotc, 290 | width = 10, height = 5) 291 | } 292 | 293 | 294 | #合并tSNE与UMAP 295 | plotc <- plot2+plot4+ plot_layout(guides = 'collect') 296 | ggsave("./output/01_mergeSample/cluster3/tSNE_UMAP.png", plot = plotc, 297 | width = 10, height = 5) 298 | 299 | 300 | # refdata <- MonacoImmuneData() 301 | # save(refdata, file = '~/database/SingleR_ref/ref_MonacoImmuneData.RData') 302 | load(file = '~/database/SingleR_ref/ref_MonacoImmuneData.RData') 303 | testdata <- GetAssayData(scRNA, slot="data") 304 | clusters <- scRNA@meta.data$seurat_clusters 305 | #使用Monaco参考数据库鉴定 306 | cellpred <- SingleR(test = testdata, ref = refdata, labels = refdata$label.main, 307 | method = "cluster", clusters = clusters, 308 | assay.type.test = "logcounts", assay.type.ref = "logcounts") 309 | celltype = data.frame(ClusterID=rownames(cellpred), 310 | celltype=cellpred$labels, stringsAsFactors = F) 311 | write.csv(celltype,"./output/01_mergeSample/CellType/celltype_Monaco.csv", 312 | row.names = F) 313 | scRNA@meta.data$celltype_Monaco = "NA" 314 | for(i in 1:nrow(celltype)){ 315 | ind = which(scRNA@meta.data$seurat_clusters == celltype$ClusterID[i]) 316 | scRNA@meta.data[ind,'celltype_Monaco'] <- celltype$celltype[i]} 317 | p1 = DimPlot(scRNA, group.by="celltype_Monaco", repel=T, label=T, label.size=5, 318 | reduction='tsne') 319 | p2 = DimPlot(scRNA, group.by="celltype_Monaco", repel=T, label=T, label.size=5, 320 | reduction='umap') 321 | p3 = p1+p2+ plot_layout(guides = 'collect') 322 | ggsave("./output/01_mergeSample/CellType/tSNE_celltype_Monaco.png", p1, 323 | width=7 ,height=6) 324 | ggsave("./output/01_mergeSample/CellType/UMAP_celltype_Monaco.png", p2, 325 | width=7 ,height=6) 326 | ggsave("./output/01_mergeSample/CellType/celltype_Monaco.png", p3, width=10 , 327 | height=5) 328 | #使用DICE参考数据库鉴定 329 | # refdata <- DatabaseImmuneCellExpressionData() 330 | # save(refdata, file='~/database/SingleR_ref/ref_DICE_1561s.RData') 331 | load('~/database/SingleR_ref/ref_DICE_1561s.RData') 332 | # refdata <- ref_DICE 333 | testdata <- GetAssayData(scRNA, slot="data") 334 | clusters <- scRNA@meta.data$seurat_clusters 335 | #tSNE 336 | scRNA = RunTSNE(scRNA, dims = pc.num) 337 | embed_tsne <- Embeddings(scRNA, 'tsne') 338 | 339 | cellpred <- SingleR(test = testdata, ref = refdata, labels = refdata$label.main, 340 | method = "cluster", clusters = clusters, 341 | assay.type.test = "logcounts", assay.type.ref = "logcounts") 342 | celltype = data.frame(ClusterID=rownames(cellpred), celltype=cellpred$labels, 343 | stringsAsFactors = F) 344 | write.csv(celltype,"./output/01_mergeSample/CellType/celltype_DICE.csv", 345 | row.names = F) 346 | scRNA@meta.data$celltype_DICE = "NA" 347 | for(i in 1:nrow(celltype)){ 348 | ind = which(scRNA@meta.data$seurat_clusters == celltype$ClusterID[i]) 349 | scRNA@meta.data[ind,'celltype_DICE'] <- celltype$celltype[i]} 350 | p4 = DimPlot(scRNA, group.by="celltype_DICE", repel=T, label=T, label.size=5, 351 | reduction='tsne') 352 | p5 = DimPlot(scRNA, group.by="celltype_DICE", repel=T, label=T, label.size=5, 353 | reduction='umap') 354 | p6 = p3+p4+ plot_layout(guides = 'collect') 355 | ggsave("./output/01_mergeSample/CellType/tSNE_celltype_DICE.png", p4, width=7, 356 | height=6) 357 | ggsave("./output/01_mergeSample/CellType/UMAP_celltype_DICE.png", p5, width=7, 358 | height=6) 359 | ggsave("./output/01_mergeSample/CellType/celltype_DICE.png", p6, width=10, 360 | height=5) 361 | #对比两种数据库鉴定的结果 362 | p8 = p1+p4 363 | ggsave("./output/01_mergeSample/CellType/Monaco_DICE.png", p8, width=12,height=5) 364 | 365 | ##保存数据 366 | saveRDS(scRNA,'./output/01_mergeSample/scRNA.rds') -------------------------------------------------------------------------------- /src/02.1_cisTarget_dwonload.R: -------------------------------------------------------------------------------- 1 | ##1, For human: 2 | dbFiles1 <- c("https://resources.aertslab.org/cistarget/databases/homo_sapiens/hg38/refseq_r80/mc9nr/gene_based/hg38__refseq-r80__500bp_up_and_100bp_down_tss.mc9nr.feather", 3 | "https://resources.aertslab.org/cistarget/databases/homo_sapiens/hg38/refseq_r80/mc9nr/gene_based/hg38__refseq-r80__10kb_up_and_down_tss.mc9nr.feather") 4 | ##2, For mouse: 5 | dbFiles2 <- c("https://resources.aertslab.org/cistarget/databases/mus_musculus/mm10/refseq_r80/mc9nr/gene_based/mm10__refseq-r80__500bp_up_and_100bp_down_tss.mc9nr.feather", 6 | "https://resources.aertslab.org/cistarget/databases/mus_musculus/mm10/refseq_r80/mc9nr/gene_based/mm10__refseq-r80__10kb_up_and_down_tss.mc9nr.feather") 7 | # mc9nr: Motif collection version 9: 24k motifs 8 | 9 | ##4, download 10 | dir.create("~/database/cisTarget_databases"); #创建一个文件夹保存数据库 11 | setwd("~/database/cisTarget_databases") 12 | #如果3个参考数据库都想下载,每次设置变量dbFiles后,都要运行以下代码 13 | dbFiles = c(dbFiles1, dbFiles2) 14 | for(featherURL in dbFiles){ 15 | download.file(featherURL, destfile=basename(featherURL)) # saved in current dir 16 | } 17 | # mc9nr: Motif collection version 9: 24k motifs -------------------------------------------------------------------------------- /src/02_senic.R: -------------------------------------------------------------------------------- 1 | library(Seurat) 2 | library(tidyverse) 3 | library(patchwork) 4 | library(SCENIC) 5 | rm(list=ls()) 6 | 7 | BiocManager::install('arrow') 8 | 9 | setwd('~/Desktop/advancedSingleCell') 10 | ##==分析准备==## 11 | dir.create("./output/02_SCENIC") 12 | dir.create("./output/02_SCENIC/int") 13 | scRNA <- readRDS("./output/01_mergeSample/scRNA.rds") 14 | ##准备细胞meta信息 15 | cellInfo <- data.frame(scRNA@meta.data) 16 | colnames(cellInfo)[which(colnames(cellInfo)=="orig.ident")] <- "sample" 17 | colnames(cellInfo)[which(colnames(cellInfo)=="seurat_clusters")] <- "cluster" 18 | colnames(cellInfo)[which(colnames(cellInfo)=="celltype_Monaco")] <- "celltype" 19 | cellInfo <- cellInfo[,c("sample","cluster","celltype")] 20 | saveRDS(cellInfo, file="./output/02_SCENIC/cellInfo.Rds") 21 | 22 | 23 | ##准备表达矩阵 24 | #为了节省计算资源,随机抽取1000个细胞的数据子集 25 | subcell <- sample(colnames(scRNA),1000) 26 | scRNAsub <- scRNA[,subcell] 27 | saveRDS(scRNAsub, "./output/02_SCENIC/scRNAsub.rds") 28 | exprMat <- as.matrix(scRNAsub@assays$RNA@counts) 29 | ##设置分析环境 30 | mydbDIR <- "~/database/cisTarget_databases/" 31 | mydbs <- c("hg38__refseq-r80__500bp_up_and_100bp_down_tss.mc9nr.feather", 32 | "hg38__refseq-r80__10kb_up_and_down_tss.mc9nr.feather") 33 | names(mydbs) <- c("500bp", "10kb") 34 | scenicOptions <- initializeScenic(org="hgnc", 35 | nCores=8, 36 | dbDir=mydbDIR, 37 | dbs = mydbs, 38 | datasetTitle = "HNSCC") 39 | saveRDS(scenicOptions, "./output/02_SCENIC/scenicOptions.rds") 40 | 41 | #----------共表达网络计算------- 42 | 43 | ##==转录调控网络推断==## 44 | ##基因过滤 45 | #过滤标准是基因表达量之和>细胞数*3%,且在1%的细胞中表达 46 | genesKept <- geneFiltering(exprMat, scenicOptions, 47 | minCountsPerGene = 3 * 0.01 * ncol(exprMat), 48 | minSamples = ncol(exprMat) * 0.01) 49 | exprMat_filtered <- exprMat[genesKept, ] 50 | ##计算相关性矩阵 51 | runCorrelation(exprMat_filtered, scenicOptions) 52 | ##TF-Targets相关性回归分析 53 | exprMat_filtered_log <- log2(exprMat_filtered+1) 54 | runGenie3(exprMat_filtered_log, scenicOptions, nParts = 20) 55 | #这一步消耗的计算资源非常大,个人电脑需要几个小时的运行时间 56 | --------------------------------------------------------------------------------