├── 1.Maintypes features.R ├── 10.Validation differences of tumor cells at A and R stage.R ├── 11.iTalk_Tex_Tumor.R ├── 12.multiCox.R ├── 13.Ligand and Receptors heatmap.R ├── 14.SignatureScore.R ├── 15.Survival analysis.R ├── 2.Epithelial cells features.R ├── 3.HNSCC_epi_tumor features.R ├── 4.Fibroblast and Myeloid subtypes features.R ├── 5.Cell-Cell Interaction.R ├── 6.Difference between LN-in and LN-out.R ├── 7.Interaction differences between Tex and Tumor cell at LN-in and LN-out.R.R ├── 8.Differences between stage A and stage R.R ├── 9.Monocle.R └── README.md /1.Maintypes features.R: -------------------------------------------------------------------------------- 1 | # Maintypes features at different stage 2 | HNSCC_Whole <- readRDS('/work/smy/Project/HNSCC/2.data/DeinfeTypes/HNSCC_26sampleAll_DefineTypes.rds.gz') 3 | HNSCC_Whole$Subcluster2 <- ifelse(HNSCC_Whole$Subcluster == 'CD4 Tcells'|HNSCC_Whole$Subcluster == 'CD8 T cells','T cells',HNSCC_Whole$Subcluster) 4 | ###Fig1======================================================= 5 | ##figure1B 6 | Idents(HNSCC_Whole) <- HNSCC_Whole$Subcluster2 7 | DimPlot(HNSCC_Whole,cols = c('#DE901C','#0D5EA4','#C9C9C9','#C64C1E','#BF6196','#158F61','#6B297B','#A8AC1C','#AC142E')) 8 | 9 | HNSCC_Whole$Subcluster2 <- factor(HNSCC_Whole$Subcluster2,levels = unique(HNSCC_Whole$Subcluster2)) 10 | p <- DimPlot(HNSCC_Whole,cols = RColorBrewer::brewer.pal(10,'Set1'),raster=FALSE) 11 | Umap <- HNSCC_Whole@reductions$umap@cell.embeddings[,c('UMAP_1','UMAP_2')] %>% as.data.frame() 12 | Umap$Subcluster <- HNSCC_Whole@meta.data$Subcluster2 13 | 14 | openxlsx::write.xlsx(p$data,file=paste(OutPath,"/",folder,"/Fig1B.xlsx",sep="")) 15 | 16 | ##figure1C 17 | p <- DotPlot(HNSCC_Whole,features = c('CDH5','PECAM1',"COL1A1","COL3A1",'GPM6B','S100B','MCAM','RGS5','CD19','MS4A1','SDC1','MZB1','CD14','FCGR3A', 18 | 'CD3E','CD3G','CD3D','EPCAM','CDH1'),cols = 'Spectral') + theme(axis.text.x = element_text(angle=90,vjust=0.5,hjust=1)) 19 | 20 | openxlsx::write.xlsx(p$data,file=paste(OutPath,"/",folder,"/Fig1C.xlsx",sep="")) 21 | 22 | ##figure1D 23 | p <- DimPlot(HNSCC_Whole,cols = RColorBrewer::brewer.pal(10,'Set1'),split.by = "stage") 24 | 25 | openxlsx::write.xlsx(p$data,file=paste(OutPath,"/",folder,"/Fig1D.xlsx",sep="")) 26 | 27 | ##figure1E(left panel) 28 | PPercentage <- table(HNSCC_Whole@meta.data[,c('Subcluster2',"stage")]) %>% data.frame %>% 29 | #Por:用一群细胞silent/expression占比(total:100%) 30 | dplyr::mutate(Por= Freq/apply(table(HNSCC_Whole@meta.data[,c('Subcluster2',"stage")]),1,sum)) 31 | openxlsx::write.xlsx(PPercentage,file=paste(OutPath,"/",folder,"/Fig1Ekleft.xlsx",sep="")) 32 | 33 | ##figure1E(right panel) 34 | number <- table(HNSCC_Whole$Subcluster2) %>% as.data.frame() 35 | number <- number[order(number$Freq,decreasing = T),] 36 | openxlsx::write.xlsx(number,file=paste(OutPath,"/",folder,"/Fig1Eright.xlsx",sep="")) 37 | 38 | ##figureS1B ====== 39 | subF <- HNSCC_Whole@meta.data 40 | SubCellsDisA <- table(subF[,c("stage","Subcluster2")]) %>% 41 | data.frame %>% set_colnames(c("Stage","CellTypes","Number")) 42 | 43 | SubCellsDisA_Tissue <- lapply(split(SubCellsDisA,SubCellsDisA$Stage),function(X){ 44 | X%>% dplyr::mutate(Per=100*Number/sum(Number)) 45 | }) %>% dplyr::bind_rows(.) 46 | 47 | SubCellsDisA_Tissue$Stage <- as.character(SubCellsDisA_Tissue$Stage) 48 | SubCellsDisA_Tissue$CellTypes <- factor(SubCellsDisA_Tissue$CellTypes,levels = names(table(HNSCC_Whole$Subcluster2))) 49 | 50 | SubCellsDisA_Tissue$CellTypes <- factor(SubCellsDisA_Tissue$CellTypes,levels = unique(HNSCC_Whole$Subcluster2)) 51 | 52 | SubCellsDisA_Tissues <- arrange(SubCellsDisA_Tissue, CellTypes) 53 | 54 | openxlsx::write.xlsx(SubCellsDisA_Tissues,file=paste(OutPath,"/",folder,"/SFig1B.xlsx",sep="")) 55 | -------------------------------------------------------------------------------- /10.Validation differences of tumor cells at A and R stage.R: -------------------------------------------------------------------------------- 1 | OutPath <- "/work/brj/Collaboration/2022/scRNA/HNSCC/Revise3/" 2 | folder <- "Sfig6" 3 | 4 | ## b /work/brj/Collaboration/2022/scRNA/HNSCC/Revise/Code/26.inferCNV_DE.R 5 | library(data.table) 6 | WorkPath <- "/work/brj/Collaboration/2022/scRNA/HNSCC/Revise/Data/inferCNV/" 7 | finalDEG <- fread(paste0(WorkPath,"DEG_CNV.txt")) 8 | finalUp <- finalDEG[finalDEG$class == "Up",] 9 | finalDn <- finalDEG[finalDEG$class == "Down",] 10 | 11 | ## LB DEG 12 | LBgene <- fread("/work/brj/Collaboration/2022/scRNA/HNSCC/Revise/Data/LB/Gene_malignant-Sample_RvsA.txt",data.table = F) 13 | 14 | LBgene$class <- ifelse(LBgene$p_val < 0.05 & LBgene$avg_log2FC > 0.25,"Up",ifelse(LBgene$p_val < 0.05 & LBgene$avg_log2FC < -0.25,"Down","None")) 15 | LBUp <- LBgene[LBgene$class == "Up",] 16 | LBDn <- LBgene[LBgene$class == "Down",] 17 | CNV_UP = finalUp$symbol 18 | scRNA_UP = LBUp$Gene 19 | 20 | length_diff <- abs(length(CNV_UP) - length(scRNA_UP)) 21 | if(length(CNV_UP) < length(scRNA_UP)) { 22 | CNV_UP <- c(CNV_UP, rep(NA, length_diff)) 23 | } else if(length(CNV_UP) > length(scRNA_UP)) { # 如果scRNA_UP较短 24 | scRNA_UP <- c(scRNA_UP, rep(NA, length_diff)) 25 | } 26 | Up <- data.frame(CNV_UP, scRNA_UP) 27 | openxlsx::write.xlsx(Up,file=paste(OutPath,"/",folder,"/sFig6bUp.xlsx",sep="")) 28 | 29 | CNV_DOWN = finalDn$symbol 30 | scRNA_DOWN = LBDn$Gene 31 | 32 | length_diff <- abs(length(CNV_DOWN) - length(scRNA_DOWN)) 33 | if(length(CNV_DOWN) < length(scRNA_DOWN)) { 34 | CNV_DOWN <- c(CNV_DOWN, rep(NA, length_diff)) 35 | } else if(length(CNV_DOWN) > length(scRNA_DOWN)) { # 如果scRNA_DOWN较短 36 | scRNA_DOWN <- c(scRNA_DOWN, rep(NA, length_diff)) 37 | } 38 | DOWN <- data.frame(CNV_DOWN, scRNA_DOWN) 39 | openxlsx::write.xlsx(DOWN,file=paste(OutPath,"/",folder,"/sFig6bDOWN.xlsx",sep="")) 40 | 41 | 42 | 43 | ## d GSE234933 44 | #/work/brj/Collaboration/2022/scRNA/HNSCC/Revise/Code/80.GSE234933_OC_statistic.R 45 | ## GSE234933 TUMOR 46 | PRtumors <- readr::read_rds(paste0("/work/brj/Collaboration/2022/scRNA/HNSCC/Revise/Fig/Recurrent/GSE234933/OC/FeaturePlot/","/PRtumorsOC_Sele.rds.gz")) 47 | Sample <- PRtumors@meta.data %>% tibble::rownames_to_column("Sample") %>% dplyr::select(c(Sample,Tissue)) %>% set_colnames(c("Sample","Group")) 48 | 49 | ## fanjia cell proliferation 50 | PRtumors_Proliferation <- readr::read_rds("/work/brj/Collaboration/2022/scRNA/HNSCC/Revise/Fig/Recurrent/GSE234933/OC/Score/ssGSEA_PR_Proliferation.rds.gz") 51 | ProExp <- t(PRtumors_Proliferation) %>% as.data.frame %>% tibble::rownames_to_column("Sample") 52 | 53 | PRtumors_KEGG <- readr::read_rds("/work/brj/Collaboration/2022/scRNA/HNSCC/Revise/Fig/Recurrent/GSE234933/OC/Score/ssGSEA_PR_OC_KEGG.rds.gz") 54 | Candidatapwy <- c("Glycolysis / Gluconeogenesis","Oxidative phosphorylation","Cell cycle","Antigen processing and presentation") 55 | sobjlists <- t(PRtumors_KEGG) %>% as.data.frame %>% dplyr::select(Candidatapwy) %>% 56 | tibble::rownames_to_column("Sample") %>% inner_join(Sample) %>% 57 | inner_join(ProExp) %>% 58 | dplyr::select(c(Sample,Group,Fanjia_Proliferation,'Oxidative phosphorylation'))%>% 59 | dplyr::rename("Cell proliferation" = "Fanjia_Proliferation") 60 | openxlsx::write.xlsx(sobjlists,file=paste(OutPath,"/",folder,"/sFig6d.xlsx",sep="")) 61 | 62 | pp <- lapply(c(3:4), function(sub){ 63 | p1 <- ggplot(sobjlists,aes(x= Group, y = sobjlists[,sub])) + 64 | geom_violin(aes(fill=Group)) + 65 | geom_boxplot(aes(fill=Group),width=0.1)+#可用于将中位数点添加到箱线图中 66 | labs(y= as.character(names(sobjlists)[sub]), x = "Group") + 67 | scale_fill_manual(limits=c("P","R"),values=c("#4072B5","#2CAE65"),guide=F)+ 68 | theme(panel.background = element_blank(), axis.line = element_line(colour = "black"),plot.title = element_text(hjust = 0.5))+ 69 | ggpubr::stat_compare_means(comparisons = list(c("P","R")), method = "wilcox.test",na.rm = T) # Add pairwise comparisons p-value 70 | return(p1) 71 | }) 72 | 73 | ## e GSE173955 74 | Vpath2 <- "/server1_work//brj/Collaboration/scRNA/2022/HNSCC/Revise/Fig/GEO/" 75 | Markers_GSE173855 <- fread(paste0(Vpath2,"/GSE173855_OC_DEG.txt"),data.table = F) 76 | names(Markers_GSE173855)[1] <- "gene" 77 | subRank <- Markers_GSE173855 %>% dplyr::arrange(desc(logFC)) 78 | subRankList <- subRank$logFC 79 | names(subRankList) <- subRank$gene 80 | subRankList <- rev(sort(subRankList)) 81 | 82 | MetabolismPathways <- read.gmt("/server1_work/yye/Project/Data/Public/GeneList/KEGG_metabolism.gmt") 83 | NonMetabolismPathways <- read.gmt("/server1_work/yye/Project/Data/Public/GeneList/nonMetabolic_KEGG.gmt") 84 | KEGG <- rbind(MetabolismPathways,NonMetabolismPathways) 85 | 86 | 87 | fgseaRes_KEGG <- clusterProfiler::GSEA(geneList = subRankList, 88 | nPerm=100,TERM2GENE=KEGG, 89 | minGSSize=5, 90 | maxGSSize=1000, 91 | pvalueCutoff = 1) 92 | GSEAR_KEGG <- fgseaRes_KEGG@result 93 | openxlsx::write.xlsx(GSEAR_KEGG, paste0(Opath,"GSE173855_GSEAresult_KEGG.xlsx")) 94 | 95 | 96 | candidateGeneSets <- c("Glycolysis / Gluconeogenesis","Oxidative phosphorylation","Cell cycle","Antigen processing and presentation") 97 | 98 | NES_can <- signif(fgseaRes_KEGG[fgseaRes_KEGG$ID %in% candidateGeneSets,c("NES","pvalue")],digits = 2) 99 | 100 | library(enrichplot) 101 | GSEAplot <- lapply(candidateGeneSets,function(candidateGeneSet){ 102 | NES_can <- signif(fgseaRes_KEGG[fgseaRes_KEGG$ID %in% candidateGeneSet,c("NES","pvalue")],digits = 2) 103 | 104 | p <- gseaplot2(fgseaRes_KEGG,##KEGG gse的对象zw 105 | geneSetID = candidateGeneSet,color = "green",#TcellRelated$ID, 106 | pvalue_table=F,subplots = 1:2)+ 107 | annotate("text",x=0.5,y=0.8,label = paste(candidateGeneSet,"\nNES = ",NES_can[1],"; p = ",NES_can[2],sep=""),vjust=1) 108 | return(p) 109 | }) 110 | 111 | ## f 112 | #/work/brj/Collaboration/2022/scRNA/HNSCC/Revise/Code/87.GSE234933_cellphonedb_Plot.R 113 | cpdbresult 114 | openxlsx::write.xlsx(cpdbresult,file=paste(OutPath,"/",folder,"/sFig6f.xlsx",sep="")) 115 | 116 | ## g 117 | PRtumors <- readr::read_rds(paste0("/work/brj/Collaboration/2022/scRNA/HNSCC/Revise/Fig/Recurrent/GSE234933/OC/FeaturePlot/","/PRtumorsOC_Sele.rds.gz")) 118 | Idents(PRtumors) <- "Tissue" 119 | p1 <- VlnPlot(PRtumors,features = "CDKN2A",split.by = "Tissue") 120 | p2 <- VlnPlot(PRtumors,features = "EGFR",split.by = "Tissue") 121 | p3 <- VlnPlot(PRtumors,features = "VEGFA",split.by = "Tissue") 122 | p4 <- VlnPlot(PRtumors,features = 'TGFB1',split.by = "Tissue") 123 | 124 | PP <- p1$data %>% bind_cols(p2$data[,1]) %>% bind_cols(p3$data[,1])%>%bind_cols(p4$data[,1]) %>% 125 | dplyr::select(c(ident,split),everything()) 126 | names(PP)[4:6] <- c("EGFR","VEGFA",'TGFB1') 127 | openxlsx::write.xlsx(PP,file=paste(OutPath,"/",folder,"/sFig6g.xlsx",sep=""),row.names = T) 128 | -------------------------------------------------------------------------------- /11.iTalk_Tex_Tumor.R: -------------------------------------------------------------------------------- 1 | #devtools::install_github("Coolgenome/iTALK", build_vignettes = TRUE) 2 | library(nichenetr) 3 | library(Seurat) 4 | library(tidyverse) 5 | library(circlize) 6 | library(dplyr) 7 | library(clusterProfiler) 8 | library(RColorBrewer) 9 | library(iTALK,lib.loc = "/home/yhdu/R/x86_64-pc-linux-gnu-library/4.1/") 10 | 11 | # #load Seurat rds 12 | HNSCC_Whole <- readRDS('/work/smy/Project/HNSCC/2.data/DeinfeTypes/HNSCC_26sampleAll_DefineTypes.rds.gz') 13 | Idents(HNSCC_Whole) <- HNSCC_Whole$DefineTypes 14 | HNSCC_Whole$celltype_RE <- HNSCC_Whole$DefineTypes 15 | 16 | ## add tumor in epi : metadata$Malignancy == 'malignant' 17 | metadata <- readRDS('/work/smy/Project/HNSCC_26sample/2.data/26sample_tumor_cell_metadata.rds') 18 | malignant <- rownames(metadata[grep('malignant',metadata$Malignancy),]) 19 | HNSCC_Whole$celltype_RE[malignant] <- 'TumorCell' 20 | HNSCC_TumorTex_Allstage <- subset(HNSCC_Whole,subset = celltype_RE %in% c("CD8 Tex","TumorCell")) 21 | HNSCC_TumorTex <- subset(HNSCC_TumorTex_Allstage,subset = stage %in% c("LN-in","LN-out")) 22 | Idents(HNSCC_TumorTex) <- HNSCC_TumorTex$celltype_RE 23 | TumorTex_split <- SplitObject(HNSCC_TumorTex,split.by = 'stage') 24 | 25 | top_genes <- "100" 26 | # target genes = 50 , tumor to CD8 tex 只有 27 | TumorTex_iTALK <- lapply(names(TumorTex_split), function(name){ 28 | merge_seu <- TumorTex_split[[name]] 29 | # iTALK 要求的矩阵: 行为细胞,列为基因 30 | iTALK_data <- as.data.frame(t(merge_seu@assays$RNA@counts)) 31 | #iTALK_data需要加两列:cell_type和compare_group的metadata 32 | iTALK_data$cell_type <- merge_seu$celltype_RE 33 | iTALK_data$compare_group <- merge_seu$stage 34 | 35 | highly_exprs_genes <- rawParse(iTALK_data,top_genes = 100,stats = "mean") 36 | comm_list <- c("growth factor","other","cytokine","checkpoint") 37 | iTALK_res <- NULL 38 | for(i in comm_list){ 39 | res <- FindLR(highly_exprs_genes, datatype="mean count", comm_type = i) 40 | iTALK_res <- rbind(iTALK_res,res) 41 | } 42 | outpath <- "/work/brj/Collaboration/2022/scRNA/HNSCC/Result/iTALK/" 43 | saveRDS(iTALK_res,paste(outpath,"/TextoTumor_",name,"iTALK_res.rds.gz",sep = "")) 44 | return(iTALK_res) 45 | }) 46 | names(TumorTex_iTALK) <- names(TumorTex_split) 47 | 48 | outpath <- "/work/brj/Collaboration/2022/scRNA/HNSCC/Result/iTALK/" 49 | saveRDS(TumorTex_iTALK,paste(outpath,"/TexTumor_iTALK_res.rds.gz",sep = "")) 50 | 51 | TumorTex_iTALK <- readr::read_rds(paste(outpath,"/TexTumor_iTALK_res.rds.gz",sep = "")) 52 | 53 | 54 | iTALK_res_out <- TumorTex_iTALK[[1]] 55 | 56 | checkpoint1 <- iTALK_res_out %>% dplyr::filter(cell_from=="TumorCell"&cell_to=="CD8 Tex"&comm_type=="checkpoint") 57 | pdf(paste(outpath,"/TumortoTex_",top_genes,"iTALK_out_res.pdf",sep = ""),width = 5,height = 5) 58 | LRPlot(checkpoint1,datatype='mean count',transparency=0.5,link.arr.lwd=(checkpoint1$cell_from_mean_exprs),link.arr.width=checkpoint1$cell_to_mean_exprs) 59 | dev.off() 60 | 61 | 62 | pdf(paste(outpath,"/TextoTumor_",top_genes,"iTALK_out_res.pdf",sep = ""),width = 5,height = 5) 63 | LRPlot(checkpoint2,datatype='mean count',transparency=0.5,link.arr.lwd=(checkpoint2$cell_from_mean_exprs),link.arr.width=checkpoint2$cell_to_mean_exprs) 64 | dev.off() 65 | 66 | iTALK_res_in <- TumorTex_iTALK[[2]] 67 | 68 | checkpoint3 <- iTALK_res_in %>% dplyr::filter(cell_from=="TumorCell"&cell_to=="CD8 Tex"&comm_type=="checkpoint") 69 | 70 | pdf(paste(outpath,"/TumortoTex_",top_genes,"iTALK_in_res.pdf",sep = ""),width = 5,height = 5) 71 | LRPlot(checkpoint3,datatype='mean count',transparency=0.5,link.arr.lwd=(checkpoint3$cell_from_mean_exprs),link.arr.width=checkpoint3$cell_to_mean_exprs) 72 | dev.off() 73 | 74 | checkpoint4 <- iTALK_res_in %>% dplyr::filter(cell_from=="CD8 Tex"&cell_to=="TumorCell"&comm_type=="checkpoint") 75 | pdf(paste(outpath,"/TextoTumor_",top_genes,"iTALK_in_res.pdf",sep = ""),width = 5,height = 5) 76 | LRPlot(checkpoint4,datatype='mean count',transparency=0.5,link.arr.lwd=(checkpoint4$cell_from_mean_exprs),link.arr.width=checkpoint4$cell_to_mean_exprs) 77 | dev.off() 78 | 79 | pdf(paste(outpath,"/TextoTumor_iTALK_res_targetn500.pdf",sep = ""),width = 5,height = 5) 80 | cowplot::plot_grid(plotlist = list(LRPlot(checkpoint1,datatype='mean count',transparency=0.5,link.arr.lwd=(checkpoint1$cell_from_mean_exprs),link.arr.width=checkpoint1$cell_to_mean_exprs), 81 | LRPlot(checkpoint2,datatype='mean count',transparency=0.5,link.arr.lwd=(checkpoint2$cell_from_mean_exprs),link.arr.width=checkpoint2$cell_to_mean_exprs), 82 | LRPlot(checkpoint3,datatype='mean count',transparency=0.5,link.arr.lwd=(checkpoint3$cell_from_mean_exprs),link.arr.width=checkpoint3$cell_to_mean_exprs), 83 | LRPlot(checkpoint4,datatype='mean count',transparency=0.5,link.arr.lwd=(checkpoint4$cell_from_mean_exprs),link.arr.width=checkpoint4$cell_to_mean_exprs))) 84 | dev.off() 85 | 86 | 87 | 88 | ### 计算组间 DEG 89 | merge_seu1 <- HNSCC_TumorTex 90 | merge_seu1[["RNA"]]@counts <- as.matrix(merge_seu1[["RNA"]]@counts)+1 91 | 92 | # iTALK 要求的矩阵: 行为细胞,列为基因 93 | iTalk_data <- as.data.frame(t(merge_seu1@assays$RNA@counts)) 94 | #iTALK_data需要加两列:cell_type和compare_group的metadata 95 | iTalk_data$cell_type <- merge_seu1$celltype_RE 96 | iTalk_data$compare_group <- merge_seu1$stage 97 | 98 | 99 | deg_Tex <- DEG(iTalk_data %>% filter(cell_type=='CD8 Tex'),method='DESeq2',contrast=c('LN-out', 'LN-in')) 100 | deg_TumorCell <- DEG(iTalk_data %>% filter(cell_type=='TumorCell'),method='DESeq2',contrast=c('LN-out', 'LN-in')) 101 | 102 | res<-NULL 103 | for(comm_type in comm_list){ 104 | res_cat<-FindLR(deg_Tex,deg_TumorCell,datatype='DEG',comm_type=comm_type) 105 | res<-rbind(res,res_cat) 106 | } 107 | 108 | res_filter <- res[order(res$cell_from_logFC*res$cell_to_logFC,decreasing=T),] 109 | res_filters <- res_filter %>% filter(comm_type %in% c("checkpoint")) 110 | 111 | saveRDS(res,paste(outpath,"/TexTumor_DEGs_iTALK_res.rds.gz",sep = "")) 112 | 113 | 114 | pdf(paste(outpath,"/TextoTumor_Checkpoint_DEG",top_genes,"iTALK_in_res.pdf",sep = ""),width = 5,height = 5) 115 | LRPlot(res_filters,datatype='DEG',link.arr.lwd=res_filters$cell_from_logFC,link.arr.width=res_filters$cell_to_logFC) 116 | dev.off() 117 | 118 | 119 | ### check iTALK DE L-R between out and in 120 | library(Seurat) 121 | library(tidyverse) 122 | library(dplyr) 123 | setwd("/work/brj/Collaboration/2022/scRNA/HNSCC/Result/iTALK/") 124 | # load data 125 | outpath <- "/work/brj/Collaboration/2022/scRNA/HNSCC/Result/iTALK/" 126 | TexTumorDEG <- readr::read_rds(paste(outpath,"/TexTumor_DEGs_iTALK_res.rds.gz",sep = "")) 127 | res_filters <- TexTumorDEG %>% filter(comm_type %in% c("checkpoint")) 128 | TexLigand <- res_filters %>% filter(cell_from == "CD8 Tex") %>% pull(ligand) %>% unique() 129 | TexReceptor <- res_filters %>% filter(cell_to == "CD8 Tex") %>% pull(receptor) %>% unique() 130 | TumorLigand <- res_filters %>% filter(cell_from == "TumorCell") %>% pull(ligand) %>% unique() 131 | TumorReceptor <- res_filters %>% filter(cell_to == "TumorCell") %>% pull(receptor) %>% unique() 132 | 133 | 134 | DefaultAssay(HNSCC_TumorTex) <- "RNA" 135 | 136 | Tex <- subset(HNSCC_TumorTex,subset = celltype_RE == "CD8 Tex") 137 | Tumor <- subset(HNSCC_TumorTex,subset = celltype_RE == "TumorCell") 138 | 139 | p1 <- VlnPlot(Tumor, features = c("TNFRSF14","CD274","LGALS9"),pt.size = 0.05, group.by = "stage",col = c("#882E72","#B17BA6"),ncol =1 ) 140 | p2 <- VlnPlot(Tex, features = c("BTLA","PDCD1","HAVCR2"),pt.size = 0.05, group.by = "stage",col =c("#882E72","#B17BA6"),ncol =1 ) 141 | 142 | 143 | pdf(paste0(outpath,"TexTumoriTalkLigandReceptorViolinPlot.pdf"),width = 6,height = 9) 144 | cowplot::plot_grid(plotlist = list(p1,p2),ncol = 2) 145 | dev.off() 146 | 147 | 148 | 149 | 150 | 151 | 152 | DefaultAssay(Tumor) <- "RNA" 153 | vp_case1 <- function(gene_signature, file_name, test_sign){ 154 | plot_case1 <- function(signature, y_max = NULL){ 155 | VlnPlot(Tumor, features = signature, 156 | pt.size = 0.05, 157 | group.by = "stage" 158 | ) + stat_compare_means(comparisons = test_sign, label = "p.signif") 159 | } 160 | plot_list <- list() 161 | y_max_list <- list() 162 | for (gene in gene_signature) { 163 | plot_list[[gene]] <- plot_case1(gene) 164 | y_max_list[[gene]] <- max(plot_list[[gene]]$data[[gene]]) 165 | plot_list[[gene]] <- plot_case1(gene, y_max = (y_max_list[[gene]] + 1) ) 166 | } 167 | cowplot::plot_grid(plotlist = plot_list) 168 | file_name <- paste0(file_name, "rplot.png") 169 | ggsave(file_name, width = 13, height = 8) 170 | } 171 | 172 | gene_sig <- TumorReceptor 173 | comparisons <- list(c("LN-out","LN-in")) 174 | vp_case1(gene_signature = gene_sig, file_name = "TumorReceptorLN-outLN-in", test_sign = comparisons) 175 | 176 | 177 | 178 | 179 | ### check iTALK DE L-R between out and in 180 | library(Seurat) 181 | library(tidyverse) 182 | library(dplyr) 183 | setwd("/work/brj/Collaboration/2022/scRNA/HNSCC/Result/iTALK/") 184 | # load data 185 | outpath <- "/work/brj/Collaboration/2022/scRNA/HNSCC/Result/iTALK/" 186 | TexTumorDEG <- readr::read_rds(paste(outpath,"/TexTumor_DEGs_iTALK_res.rds.gz",sep = "")) 187 | res_filters <- TexTumorDEG %>% filter(comm_type %in% c("checkpoint")) 188 | TexLigand <- res_filters %>% filter(cell_from == "CD8 Tex") %>% pull(ligand) %>% unique() 189 | TexReceptor <- res_filters %>% filter(cell_to == "CD8 Tex") %>% pull(receptor) %>% unique() 190 | TumorLigand <- res_filters %>% filter(cell_from == "TumorCell") %>% pull(ligand) %>% unique() 191 | TumorReceptor <- res_filters %>% filter(cell_to == "TumorCell") %>% pull(receptor) %>% unique() 192 | 193 | 194 | Tex <- subset(HNSCC_TumorTex,subset = celltype_RE == "CD8 Tex") 195 | Tumor <- subset(HNSCC_TumorTex,subset = celltype_RE == "TumorCell") 196 | 197 | pdf(paste0(outpath,"TexLigandRecptorDotPlot.pdf"),width = 5,height = 3) 198 | DotPlot(Tex,features = c(TexLigand[-c(1,4)],TexReceptor),group.by = "stage")+ 199 | theme(axis.text.x = element_text(angle=90,vjust=0.5,hjust=1))+ 200 | scale_color_distiller(palette = "RdBu") + NoLegend() 201 | dev.off() 202 | 203 | pdf(paste0(outpath,"TumorLigandRecptorDotPlot.pdf"),width = 5,height = 3) 204 | DotPlot(Tumor,features = c(TumorLigand[-c(3)],TumorReceptor),group.by = "stage")+ 205 | theme(axis.text.x = element_text(angle=90,vjust=0.5,hjust=1))+ 206 | scale_color_distiller(palette = "RdBu") + NoLegend() 207 | dev.off() 208 | 209 | VlnPlot(Tex , features = signature, 210 | pt.size = 0.05, 211 | group.by = "stage", 212 | y.max = 1.0 213 | ) + stat_compare_means(comparisons = test_sign, label = "p.signif") 214 | 215 | 216 | 217 | DefaultAssay(Tumor) <- "RNA" 218 | vp_case1 <- function(gene_signature, file_name, test_sign){ 219 | plot_case1 <- function(signature, y_max = NULL){ 220 | VlnPlot(Tex , features = signature, 221 | pt.size = 0.05, 222 | group.by = "stage" #, 223 | #y.max = 1.0 224 | ) + stat_compare_means(comparisons = test_sign, label = "p.signif") 225 | } 226 | plot_list <- list() 227 | y_max_list <- list() 228 | for (gene in gene_signature) { 229 | plot_list[[gene]] <- plot_case1(gene) 230 | y_max_list[[gene]] <- max(plot_list[[gene]]$data[[gene]]) 231 | plot_list[[gene]] <- plot_case1(gene, y_max = (y_max_list[[gene]] + 1) ) 232 | } 233 | cowplot::plot_grid(plotlist = plot_list) 234 | file_name <- paste0(file_name, "rplot.png") 235 | ggsave(file_name, width = 13, height = 8) 236 | } 237 | 238 | gene_sig <- TexReceptor 239 | comparisons <- list(c("LN-out","LN-in")) 240 | vp_case1(gene_signature = gene_sig, file_name = "TexReceptorLN-outLN-in", test_sign = comparisons) 241 | 242 | 243 | 244 | 245 | ### L-R pairs 246 | P1 <- VlnPlot(Tex , features = c("CXCL13"),pt.size = 0.05, group.by = "stage") 247 | P2 <- VlnPlot(Tumor, features = c("ACKR4","CCR10","CXCR3","CXCR5","HTR2A","OPRD1"),pt.size = 0.05, group.by = "stage") 248 | 249 | 250 | Tex <- subset(HNSCC_TumorTex,subset = celltype_RE %in% c("CD8 Tex")) 251 | Tumor <- subset(HNSCC_TumorTex,subset = celltype_RE %in% c("TumorCell")) 252 | P1 <- FeaturePlot(Tex , features = c("CXCL13"),split.by = "stage",ncol = 1) 253 | P2 <- FeaturePlot(Tumor, features = c("CXCR5"),split.by = "stage",ncol = 1) 254 | 255 | pdf(paste0(outpath,"Fig/","FeaturePlotCXCL13.pdf"),width = 12,height = 3) 256 | cowplot::plot_grid(plotlist = list(P1,P2)) 257 | dev.off() 258 | 259 | 260 | -------------------------------------------------------------------------------- /12.multiCox.R: -------------------------------------------------------------------------------- 1 | ### 加上HPV 及margin 重新做multiCox 2 | rm(list=ls()) 3 | library(tidyverse) 4 | library(tibble) 5 | library(data.table) 6 | library(survival) 7 | library(survminer) 8 | ### 1. 5 tumor subcluster : Tumor sub 使用的分组是median 9 | TumorSub_proportion <- read.delim('/server2_work/smy/Project/HNSCC_26sample/2.data/CIBERSORTx_Results_TumorSubcluster.txt') 10 | TumorSub_proportion$Mixture <- gsub('[.]','-',TumorSub_proportion$Mixture) 11 | TumorSub_proportion <- TumorSub_proportion[str_sub(TumorSub_proportion$Mixture,14,16) %in% c("01A"),] 12 | TumorSub_proportion$Mixture <- str_sub(TumorSub_proportion$Mixture,1,12) 13 | TumorSub <- TumorSub_proportion %>% dplyr::select(c(Mixture,cluster_0,cluster_1,cluster_2,cluster_3,cluster_4)) 14 | 15 | ### PSOTN fib, RSPO 16 | GSVAscore <- readr::read_rds("/work/brj/Collaboration/scRNA/2022/HNSCC/Revise/Result/HNSC_POSTN_SPP1.rds.gz") 17 | GSVAF <- GSVAscore$POSTN_SPP1_score[[1]] %>% t() %>% as.data.frame() 18 | GSVAF$Mixture <- substr(rownames(GSVAF),0,12) 19 | HNSC_SignatureF <- GSVAF[substr(rownames(GSVAF),14,16) %in% c("01A"),] 20 | HNSC_SignatureF$Mixture <- gsub("[.]","-",HNSC_SignatureF$Mixture) 21 | 22 | HNSC_POSTN_SPP1 <- HNSC_SignatureF %>% dplyr::select(c(Mixture,POSTNTop50,SPP1Top50)) 23 | 24 | 25 | GSVAscore1 <- readr::read_rds("/work/brj/Collaboration/scRNA/2022/HNSCC/Revise/Result/HNSC_RSPO1_FOLR2.rds.gz") 26 | GSVAF1 <- GSVAscore1$RSPO1_FOLR2_score[[1]] %>% t() %>% as.data.frame() 27 | GSVAF1$Mixture <- substr(rownames(GSVAF1),0,12) 28 | HNSC_SignatureF1 <- GSVAF1[substr(rownames(GSVAF1),14,16) %in% c("01A"),] 29 | HNSC_SignatureF1$Mixture <- gsub("[.]","-",HNSC_SignatureF1$Mixture) 30 | 31 | HNSC_RSPO1_FOLR2 <- HNSC_SignatureF1 %>% dplyr::select(c(Mixture,RSPO1Top50,FOLR2Top50)) 32 | 33 | # SignatureD 34 | GSVAscore <- readr::read_rds("/work/brj/Collaboration/scRNA/2022/HNSCC/Result/HNSC_ExprFPKM_SigantureDE.rds.gz") 35 | GSVAD <- GSVAscore$CD8A_IFNg_TcellSingaling_score[[1]] %>% 36 | t() %>% as.data.frame() %>% 37 | dplyr::filter(substr(rownames(.),14,16) %in% "01A") %>% 38 | dplyr::mutate(Mixture = substr(rownames(.),0,12)) %>% 39 | dplyr::select(c(Mixture,SignatureD)) 40 | GSVAD$Mixture <- gsub('[.]','-',GSVAD$Mixture) 41 | 42 | 43 | HNSC_FPKM <- readr::read_rds("/server2_work/smy/Public/data/RNA-seq/TCGA/HNSCC_FPKM.rds") 44 | HNSC_FPKM_F <- HNSC_FPKM[c("MYBL2","TFDP1"),] 45 | HNSC_FPKM_F <- t(HNSC_FPKM_F) %>% as.data.frame() 46 | HNSC_FPKM_F[,1:2] <- apply(HNSC_FPKM_F[,1:2],2,function(x)log2(x+1)) 47 | HNSC_FPKM_F$Mixture <- substr(rownames(HNSC_FPKM_F),0,12) 48 | HNSC_FPKM_FF <- HNSC_FPKM_F[substr(rownames(HNSC_FPKM_F),14,16) %in% c("01A"),] 49 | HNSC_FPKM_FF$Mixture <- str_sub(HNSC_FPKM_FF$Mixture,1,12) 50 | 51 | HNSCC_proportion <- TumorSub %>% 52 | inner_join(GSVAD) %>% 53 | inner_join(HNSC_FPKM_FF) %>% 54 | inner_join(HNSC_POSTN_SPP1) %>% 55 | inner_join(HNSC_RSPO1_FOLR2) 56 | 57 | ##clinical info old 58 | survivaO <- read.delim("/work/brj/GEO/CRC/multiCox/TCGA_ClinicalData_20180420.txt")#,","gender" 59 | survivaOld <- survivaO %>% dplyr::select("bcr_patient_barcode",'OS','OS.time',"age_at_initial_pathologic_diagnosis","gender","ajcc_pathologic_tumor_stage") %>% 60 | dplyr::filter(bcr_patient_barcode %in% HNSCC_proportion$Mixture) %>% 61 | dplyr::mutate(OS.time = as.numeric(as.numeric(as.character(.$OS.time))/30), 62 | age_at_initial_pathologic_diagnosis = as.numeric(.$age_at_initial_pathologic_diagnosis)) %>% 63 | dplyr::filter(!is.na(OS.time)) %>% 64 | set_colnames(c("Mixture","OS","OS.time","age","gender","stage")) 65 | 66 | ##clinical info new 67 | survivaN <- openxlsx::read.xlsx("/work/brj/Collaboration/scRNA/2022/HNSCC/Data/TCGAHNSCclinical.xlsx") 68 | survival <- survivaN %>% dplyr::select("sampleID","margin_status") %>% #,"hpv_status_by_p16_testing" 69 | set_colnames(c("sampleID","margin")) %>% #,"HPV" 70 | dplyr::mutate(Mixture = substr(.$sampleID,0,12)) %>% 71 | dplyr::inner_join(survivaOld)%>% 72 | #dplyr::filter(!is.na(HPV)) %>% 73 | dplyr::filter(!is.na(margin)) %>% 74 | dplyr::select(-sampleID) %>% 75 | dplyr::select(c(Mixture,OS,OS.time),everything()) %>% 76 | dplyr::filter(!duplicated(.)) 77 | 78 | names(survival) <- c("sample","status","time","margin","age","gender","stage") # stage 有的是432 ,,"stage" ,"HPV" 79 | 80 | survival$stage[grep("Discrepancy",survival$stage)] <- NA 81 | survival$stage[grep("Not",survival$stage)] <- NA 82 | survival$stage[grep("Unknown",survival$stage)] <- NA 83 | 84 | ### 1.按照Stage I、II、III、IV 的条件区分 85 | survival$stage[grep("Stage I$|IA",survival$stage)] <- "Stage I" 86 | survival$stage[grep("Stage II$|IIA|IIB|IIC",survival$stage)] <- "Stage II" 87 | survival$stage[grep("III",survival$stage)] <- "Stage III" 88 | survival$stage[grep("IV",survival$stage)] <- "Stage IV" 89 | survival$stage <- factor(survival$stage,levels = c("Stage I","Stage II","Stage III","Stage IV")) 90 | 91 | ### 2.按照Stage I、II、III、IV 的条件区分 92 | survival$stage[grep("Stage I$|IA|II$|IIA|IIB|IIC",survival$stage)] <- "Early_Stage" 93 | survival$stage[grep("III|IV",survival$stage)] <- "Advanced_Stage" 94 | survival$stage <- factor(survival$stage,levels = c("Early_Stage","Advanced_Stage")) 95 | 96 | survival <- survival %>% 97 | dplyr::filter(!is.na(age)) %>% 98 | dplyr::filter(!is.na(gender)) %>% 99 | dplyr::filter(!is.na(stage)) 100 | 101 | 102 | mulCox <- function(types){ 103 | print(types) 104 | 105 | HNSC_proportion <- HNSCC_proportion %>% dplyr::select(c(Mixture,types)) %>% set_colnames(c("sample","Cluster")) 106 | surv_expr <- survival %>% 107 | inner_join(HNSC_proportion) %>% 108 | column_to_rownames("sample") %>% 109 | dplyr::filter(!is.na(stage)) 110 | which(is.na(surv_expr)) 111 | surv_expr$status <- as.numeric(as.character(surv_expr$status)) 112 | surv_expr$time <- as.numeric(as.character(surv_expr$time)) 113 | surv_expr$age <- as.numeric(surv_expr$age) 114 | surv_expr$gender <- as.factor(surv_expr$gender) 115 | surv_expr$Group <- ifelse(surv_expr[,"Cluster"] > median(surv_expr[,"Cluster"]),paste0(types,'_High'),paste0(types,'_Low')) 116 | surv_expr$Group <- factor(surv_expr$Group,levels = c(paste0(types,'_Low'),paste0(types,'_High'))) 117 | surv_expr$status <- as.numeric(as.character(surv_expr$status)) 118 | surv_expr$time <- as.numeric(as.character(surv_expr$time)) 119 | #surv_expr$HPV <- as.factor(surv_expr$HPV) 120 | surv_expr$margin <- as.factor(surv_expr$margin) 121 | res.cox <- coxph(Surv(time, status) ~ Group+age+gender+margin+stage, data = surv_expr,na.action = na.exclude) 122 | 123 | summary(res.cox) 124 | tty <- ggforest( 125 | model = res.cox, 126 | data = surv_expr, 127 | main = "Hazard ratio", 128 | #cpositions = c(0.00, 0.20, 0.35), 129 | fontsize = 0.6, 130 | noDigits = 2) 131 | return(tty) 132 | } 133 | 134 | types <- c(names(HNSCC_proportion)[-1]) 135 | 136 | plist <- lapply(types, mulCox) 137 | 138 | OutPath <- "/work/brj/Collaboration/scRNA/2022/HNSCC/Revise2/Fig/mCox/" 139 | pdf(paste(OutPath,"mCoxHLStage1-2_TumorSigantureNoHPV.pdf",sep=""),width = 24,height = 12) 140 | pp <- cowplot::plot_grid(plotlist = plist,ncol = 4) 141 | print(pp) 142 | dev.off() 143 | 144 | 145 | -------------------------------------------------------------------------------- /13.Ligand and Receptors heatmap.R: -------------------------------------------------------------------------------- 1 | ### ligand and receptor heatmap 2 | rm(list = ls()) 3 | ### GSE182227 4 | WorkPath <- "/work/brj/Collaboration/2022/scRNA/HNSCC/Revise/Data/GSE182227/NichenetrN/" 5 | nichenet_output <- readr::read_rds(paste0(WorkPath,"1.SenderPOSTN_ReciverSPP1_nichenet_output.rds",sep = '')) 6 | TargetLigand_GSE182227 <- rownames(nichenet_output$ligand_target_matrix) 7 | Targetgenes_GSE182227 <- colnames(nichenet_output$ligand_target_matrix) 8 | Ligands_GSE182227 <- colnames(nichenet_output$ligand_receptor_matrix) # 该矩阵 行是receptors;列是ligands 9 | Receptors_GSE182227 <- rownames(nichenet_output$ligand_receptor_matrix) 10 | 11 | ### 自己的数据 12 | WorkPath <- "/work/brj/Collaboration/2022/scRNA/HNSCC/Result/Fig4/" 13 | nichenet_output_Ref <- readr::read_rds(paste0(WorkPath,"1.SenderPOSTNFibSPP1Reciver_nichenet_output.rds",sep = '')) 14 | TargetLigand_Ref <- rownames(nichenet_output_Ref$ligand_target_matrix) 15 | Targetgenes_Ref <- colnames(nichenet_output_Ref$ligand_target_matrix) 16 | Receptors_Ref <- rownames(nichenet_output_Ref$ligand_receptor_matrix) # row is receptor 17 | Ligands_Ref <- colnames(nichenet_output_Ref$ligand_receptor_matrix) # column is ligand 18 | 19 | ### ligand and target genes heatmap : vis_ligand_target 是行为ligand,列为靶基因的矩阵 20 | ligand_target <- nichenet_output$ligand_target_matrix 21 | inter_ligand <- intersect(Ligands_Ref,TargetLigand_GSE182227)# 22 | inter_target <- intersect(Targetgenes_Ref,Targetgenes_GSE182227)#,Targetgenes_GSE182227 23 | inter_receptor <- intersect(Receptors_Ref,Receptors_GSE182227)#,Targetgenes_GSE182227 24 | 25 | order_ligand <- c("HAS2","CCL2","IL15","ICAM1","COL18A1","ADAM17","HMGB1","CSF1","GAS6","TGFB1","ANGPT1","CXCL12") 26 | 27 | 28 | ### ligand activity 29 | ligand_pearson_df <- nichenet_output$ligand_activities[nichenet_output$ligand_activities$test_ligand %in% inter_ligand,c("test_ligand","pearson")] %>% as.data.frame() 30 | vis_ligand_pearson <- ligand_pearson_df %>% dplyr::select(-test_ligand) %>% dplyr::arrange(pearson) %>% 31 | as.matrix() %>% magrittr::set_rownames(rev(ligand_pearson_df$test_ligand)) 32 | p_ligand_pearson = vis_ligand_pearson %>% nichenetr::make_heatmap_ggplot("Prioritized ligands","Ligand activity", color = "darkorange",legend_position = "top", x_axis_position = "top", legend_title = "Ligand\nActivity") + theme(legend.text = element_text(size = 9)) 33 | 34 | library(magrittr) 35 | ileft <- vis_ligand_pearson %>% as.data.frame() 36 | ilefts <- ileft[order_ligand,] %>% as.data.frame() %>% set_colnames("pearson") %>% set_rownames(order_ligand) 37 | openxlsx::write.xlsx(ilefts,file=paste(OutPath,"/",folder,"/Fig4ileft.xlsx",sep=""),row.names = T) 38 | 39 | 40 | plot_target <- inter_target 41 | order_exp <- ligand_target[rev(order_ligand),plot_target] 42 | p_ligand_target_network = order_exp %>% 43 | nichenetr::make_heatmap_ggplot("Prioritized ligands","Predicted target genes", color = "purple",legend_position = "top", x_axis_position = "top",legend_title = "Regulatory potential") + theme(axis.text.x = element_text(face = "italic")) 44 | 45 | iright <- ligand_target[order_ligand,plot_target] %>% as.data.frame() 46 | openxlsx::write.xlsx(iright,file=paste(OutPath,"/",folder,"/Fig4iright.xlsx",sep=""),row.names = T) 47 | 48 | 49 | pdf(paste0(WorkPath,'/1.ligand_activity_target_heatmapPOSTNSPP1.pdf'),width = 12,height = 5) 50 | cowplot::plot_grid(plotlist = list(p_ligand_pearson,p_ligand_target_network),rel_widths = c(1.25,10.75)) 51 | dev.off() 52 | 53 | ### ligand_receptor_heatmap 54 | ligand_receptor <- nichenet_output$ligand_receptor_matrix %>% as.data.frame() %>% dplyr::select(inter_ligand) 55 | 56 | p_ligand_receptor_network = ligand_receptor[inter_receptor,] %>% as.matrix() %>% t() %>% 57 | nichenetr::make_heatmap_ggplot("Ligands","Receptors", color = "mediumvioletred", x_axis_position = "top",legend_title = "Prior interaction potential") 58 | 59 | j <- ligand_receptor[inter_receptor,] %>% as.data.frame()%>% t() %>% as.data.frame() 60 | openxlsx::write.xlsx(j,file=paste(OutPath,"/",folder,"/Fig4j.xlsx",sep=""),row.names = T) 61 | 62 | 63 | pdf(paste0(WorkPath,'/2.ligand_receptor_heatmapPOSTNSPP1.pdf'),width = 12,height = 5) 64 | p_ligand_receptor_network 65 | dev.off() 66 | 67 | 68 | # 4. GO and KEGG pwy 69 | OurGO <- openxlsx::read.xlsx("/work/brj/Collaboration/2022/scRNA/HNSCC/Result/Fig4/GOtargetPOSTNtoSPP1.xlsx") 70 | OurKEGG <- openxlsx::read.xlsx("/work/brj/Collaboration/2022/scRNA/HNSCC/Result/Fig4/KEGGtargetPOSTNtoSPP1.xlsx") 71 | GSE182227_GO <- openxlsx::read.xlsx("/work/brj/Collaboration/2022/scRNA/HNSCC/Revise/Data/GSE182227/NichenetrN/GOtargetPOSTNtoSPP1.xlsx") 72 | GSE182227_KEGG <- openxlsx::read.xlsx("/work/brj/Collaboration/2022/scRNA/HNSCC/Revise/Data/GSE182227/NichenetrN/KEGGtargetPOSTNtoSPP1.xlsx") 73 | 74 | 75 | GOresult <- GSE182227_GO %>% dplyr::filter(Description %in% OurGO$Description[1:10]) 76 | GOresults <- GOresult %>% dplyr::mutate(Ratio = round(Count/59,4)) %>% 77 | dplyr::select(c(Cluster,Description,Ratio,p.adjust)) %>% 78 | dplyr::arrange(Cluster,p.adjust) #%>% dplyr::group_by(Cluster) %>% dplyr::top_n(-topN,p.adjust) 79 | p1 <- ggplot(GOresults,aes(x=Cluster,y=Description))+ 80 | geom_point(aes(color = -log10(p.adjust+10^-27),size=Ratio))+ 81 | labs(color = "-log10(Adjust P)",size="GeneRatio")+ 82 | scale_colour_gradientn(limit=c(0,max(abs(-log10(GOresults$p.adjust+10^-27)))), 83 | colors= rev(RColorBrewer::brewer.pal(11, "Spectral")[2:11]))+ 84 | theme_bw()+ 85 | scale_y_discrete(limit=rev(OurGO$Description[1:10])) 86 | openxlsx::write.xlsx(p1$data,file=paste(OutPath,"/",folder,"/Fig4kup.xlsx",sep=""),rownames = T) 87 | 88 | 89 | ### Select pwy 90 | KEGGresult <- GSE188737_KEGG %>% dplyr::filter(Description %in% OurKEGG$Description[1:10]) 91 | 92 | KEGGresults <- KEGGresult %>% dplyr::mutate(Ratio = round(Count/49,4)) %>% 93 | dplyr::select(c(Cluster,Description,Ratio,p.adjust)) %>% 94 | dplyr::arrange(Cluster,p.adjust) #%>% dplyr::group_by(Cluster) %>% dplyr::top_n(-topN,p.adjust) 95 | p2 <- ggplot(KEGGresults,aes(x=Cluster,y=Description))+ 96 | geom_point(aes(color = -log10(p.adjust+10^-27),size=Ratio))+ 97 | labs(color = "-log10(Adjust P)",size="GeneRatio")+ 98 | scale_colour_gradientn(limit=c(0,max(abs(-log10(KEGGresults$p.adjust+10^-27)))), 99 | colors= rev(RColorBrewer::brewer.pal(11, "Spectral")[2:11]))+ 100 | theme_bw()+ 101 | scale_y_discrete(limit=rev(OurKEGG$Description[1:10])) 102 | openxlsx::write.xlsx(p2$data,file=paste(OutPath,"/",folder,"/Fig4kdn.xlsx",sep=""),rownames = T) 103 | 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /14.SignatureScore.R: -------------------------------------------------------------------------------- 1 | ### CAF subtypes siganture 2 | library(dplyr) 3 | WorkPath <- "/work/brj/Collaboration/2022/scRNA/HNSCC/Revise/Fig/Signature/" 4 | ### Reference 5 | CAFs <- openxlsx::read.xlsx("/work/brj/Collaboration/2022/scRNA/HNSCC/Revise/Data/Clinical Cancer Research, 2021_CAF subtypes.xlsx",sheet = "Table S3") 6 | names(CAFs) <- CAFs[1,] 7 | CAFsub <- CAFs[-1,] 8 | 9 | Fib_color_panel <- c("Fibroblast" = '#A8373D', 10 | "RSPO1+ fibroblast" = '#F1B998', 11 | "POSTN+ fibroblast" = '#43739F', 12 | "SFRP1+ fibroblast" = '#374D74', 13 | "SEMA4A+ fibroblast" = '#947A7B', 14 | "CCL19+ fibroblast" = '#91553D', 15 | "DES+ myofibroblast" = '#D48054', 16 | "Proliferating fibroblast" = '#E09194') 17 | 18 | ### Fibroblast HNSC 19 | HNSCC_fibro <- readRDS("/work/yye/Project/Collaboration/HNSC/Stroma/HNSC_Fibro1_DefineTypes.rds.gz") 20 | Idents(HNSCC_fibro) <- HNSCC_fibro$DefineTypes 21 | 22 | cols. = c('#A8373D','#F1B998','#43739F','#374D74','#947A7B','#91553D','#D48054','#E09194') 23 | 24 | pdf(paste0(WorkPath,'Dimplot_fibroblast.pdf'),6,4) 25 | DimPlot(HNSCC_fibro,reduction = 'umap',cols = cols.) 26 | dev.off() 27 | 28 | ### 换成文章里面热图的gene list 29 | #Puram.CAF1 <- c("CTHRC1","COL1A1","COL3A1","POSTN","MFAP2") 30 | #Puram.CAF2<- c("CXCL12","NDUFA4L2") 31 | PuramCAF <- openxlsx::read.xlsx("/server1_work/brj/Collaboration/scRNA/2022/HNSCC/Data/Puram_CAF.xlsx",sheet = "Sheet1") 32 | Puram.CAF1 <- PuramCAF$CAF1 33 | Puram.CAF2 <- PuramCAF$CAF2 34 | Puram.MyoFib <- c("ACTA2","MYLK","MYL9","MCAM","IL6","PDGFA") 35 | 36 | HNSCC_fibro@meta.data$Puram.CAF1 <- apply(HNSCC_fibro@assays$RNA@data[rownames(HNSCC_fibro@assays$RNA@data) %in% Puram.CAF1,],2,mean) 37 | HNSCC_fibro@meta.data$Puram.CAF2 <- apply(HNSCC_fibro@assays$RNA@data[rownames(HNSCC_fibro@assays$RNA@data) %in% Puram.CAF2,],2,mean) 38 | HNSCC_fibro@meta.data$Puram.MyoFib <- apply(HNSCC_fibro@assays$RNA@data[rownames(HNSCC_fibro@assays$RNA@data) %in% Puram.MyoFib,],2,mean) 39 | 40 | 41 | # 1.dotplot ======================================================= 42 | PuramCAFdf <- as.data.frame(HNSCC_fibro@meta.data) %>% 43 | dplyr::select(c(DefineTypes,Puram.CAF1,Puram.CAF2,Puram.MyoFib)) 44 | 45 | PuramCAF <- PuramCAFdf %>% tidyr::gather(key = CellTypes, value = Score,-DefineTypes) 46 | 47 | 48 | p1 <- ggplot(PuramCAF,aes(x=DefineTypes,y=CellTypes))+ 49 | geom_point(aes(color=Score))+ 50 | scale_color_gradientn(colors= rev(colorRampPalette(RColorBrewer::brewer.pal(11, "Spectral"),space="rgb")(50))) + 51 | theme(panel.background=element_rect(color="black",fill=NA),legend.key = element_blank(),axis.ticks=element_blank(), 52 | panel.grid=element_blank(),axis.text.x=element_text(size=12,angle=45,vjust = 1,hjust = 1,color="black"), 53 | axis.text.y=element_text(size=10,color="black"),axis.title=element_blank(),legend.position = "bottom")+ 54 | geom_tile(data=PuramCAF,aes(x=DefineTypes,y=CellTypes),fill=NA,color="lightgray") 55 | 56 | pdf(paste0(WorkPath,'Puram.CAFsub_DotPlot.pdf'),5,4) 57 | p1 58 | dev.off() 59 | 60 | # 2.Vlnplot with p ======================================================= 61 | sobjlists = FetchData(object = HNSCC_fibro, vars = names(PuramCAFdf)) 62 | 63 | pp <- lapply(c(2:length(names(PuramCAFdf))), function(sub){ 64 | p1 <- ggplot(sobjlists,aes(x= DefineTypes, y = sobjlists[,sub])) + 65 | geom_violin(aes(fill=DefineTypes),alpha=0.3) + 66 | scale_fill_manual(values = Fib_color_panel,guide = F)+ 67 | geom_boxplot(aes(fill=DefineTypes),width=0.2, alpha=1,outlier.colour = NA) + 68 | labs(y= as.character(names(sobjlists)[sub]), x = "DefineTypes") + 69 | theme(panel.background = element_blank(), axis.line = element_line(colour = "black"),plot.title = element_text(hjust = 0.5), 70 | axis.text.x=element_text(size=12,angle=45,vjust = 1,hjust = 1,color="black"), 71 | axis.text.y=element_text(size=9,color="black"))+ 72 | ggpubr::stat_compare_means(method = "anova",data = sobjlists) # Add pairwise comparisons p-value 73 | return(p1) 74 | }) 75 | 76 | 77 | pdf(paste0(OutPath,'Puram.CAFsub_fibroblast_VlnPlot.pdf'),width = 15,height = 5) 78 | cowplot::plot_grid(plotlist = pp,ncol = 3) 79 | dev.off() 80 | 81 | ### FeaturePlot 82 | pdf(paste0(WorkPath,'Puram.CAFsub_fibroblast_FeaturePlot.pdf'),15,5) 83 | FeaturePlot(HNSCC_fibro,reduction = 'umap',feature = c('Puram.CAF1','Puram.CAF2','Puram.MyoFib'),cols = c("gray","orange","red"),ncol = 3) 84 | dev.off() 85 | 86 | 87 | ### Vln 88 | pdf(paste0(WorkPath,'Puram.CAFsub_fibroblast_VlnPlot.pdf'),15,5) 89 | VlnPlot(HNSCC_fibro,feature = c('Puram.CAF1','Puram.CAF2','Puram.MyoFib'), 90 | cols = c('#A8373D','#F1B998','#43739F','#374D74','#947A7B','#91553D','#D48054','#E09194'),pt.size = 0,ncol = 3)+ 91 | ggpubr::compare_means(method = "anova") + labs(x="") + geom_boxplot(width=0.1) 92 | dev.off() 93 | 94 | 95 | VlnPlot(HNSCC_fibro,feature = c('Puram.CAF1'),cols = c('#A8373D','#F1B998','#43739F','#374D74','#947A7B','#91553D','#D48054','#E09194'),pt.size = 0,ncol = 3)+ 96 | ggpubr::stat_compare_means(comparisons = list(c("POSTN+ fibroblast","CCL19+ fibroblast")),label = "p.format") + labs(x="")+geom_boxplot(width=0.1) 97 | 98 | 99 | 100 | ### pan_myCAF 101 | pan_myCAF = CAFsub$`pan-myCAF` 102 | pan_dCAF = CAFsub$`pan-dCAF` 103 | pan_iCAF = CAFsub$`pan-iCAF` 104 | pan_iCAF_2 = CAFsub$`pan-iCAF-2` 105 | pan_pCAF = CAFsub$`pan-pCAF` 106 | 107 | 108 | HNSCC_fibro@meta.data$pan_myCAF <- apply(HNSCC_fibro@assays$RNA@data[rownames(HNSCC_fibro@assays$RNA@data) %in% pan_myCAF,],2,mean) 109 | HNSCC_fibro@meta.data$pan_dCAF <- apply(HNSCC_fibro@assays$RNA@data[rownames(HNSCC_fibro@assays$RNA@data) %in% pan_dCAF,],2,mean) 110 | HNSCC_fibro@meta.data$pan_iCAF <- apply(HNSCC_fibro@assays$RNA@data[rownames(HNSCC_fibro@assays$RNA@data) %in% pan_iCAF,],2,mean) 111 | HNSCC_fibro@meta.data$pan_iCAF_2 <- apply(HNSCC_fibro@assays$RNA@data[rownames(HNSCC_fibro@assays$RNA@data) %in% pan_iCAF_2,],2,mean) 112 | HNSCC_fibro@meta.data$pan_pCAF <- apply(HNSCC_fibro@assays$RNA@data[rownames(HNSCC_fibro@assays$RNA@data) %in% pan_pCAF,],2,mean) 113 | 114 | # 1.dotplot ======================================================= 115 | PuramCAFdf <- as.data.frame(HNSCC_fibro@meta.data) %>% 116 | dplyr::select(c(DefineTypes,pan_myCAF,pan_dCAF,pan_iCAF,pan_iCAF_2,pan_pCAF)) 117 | xorder <- names(PuramCAFdf)[-1] 118 | #PuramCAFdf[,xorder] <- t(apply(PuramCAFdf[,xorder],1,scale)) 119 | #PuramCAFdf[,xorder] <- apply(PuramCAFdf[,xorder],2,scale) 120 | 121 | PuramCAF <- PuramCAFdf %>% tidyr::gather(key = CellTypes, value = Score,-DefineTypes) 122 | p1 <- ggplot(PuramCAF,aes(x=DefineTypes,y=CellTypes))+ 123 | geom_point(aes(color=Score))+ 124 | scale_color_gradientn(colors= rev(colorRampPalette(RColorBrewer::brewer.pal(11, "Spectral"),space="rgb")(50))) + 125 | theme(panel.background=element_rect(color="black",fill=NA),legend.key = element_blank(),axis.ticks=element_blank(), 126 | panel.grid=element_blank(),axis.text.x=element_text(size=12,angle=45,vjust = 1,hjust = 1,color="black"), 127 | axis.text.y=element_text(size=10,color="black"),axis.title=element_blank(),legend.position = "bottom")+ 128 | geom_tile(data=PuramCAF,aes(x=DefineTypes,y=CellTypes),fill=NA,color="lightgray") 129 | 130 | pdf(paste0(WorkPath,'Five.CAFsub_DotPlot.pdf'),5,5) 131 | p1 132 | dev.off() 133 | 134 | # 2.Vlnplot with p ======================================================= 135 | sobjlists = FetchData(object = HNSCC_fibro, vars = names(PuramCAFdf)) 136 | 137 | pp <- lapply(c(2:length(names(PuramCAFdf))), function(sub){ 138 | p1 <- ggplot(sobjlists,aes(x= DefineTypes, y = sobjlists[,sub])) + 139 | geom_violin(aes(fill=DefineTypes),alpha=0.3) + 140 | scale_fill_manual(values = Fib_color_panel,guide = F)+ 141 | geom_boxplot(aes(fill=DefineTypes),width=0.1,alpha=1,alpha=1,outlier.colour = NA) + # alpha=0 可以让中间的boxplot的颜色变为跟vlnplot一样的 142 | labs(y= as.character(names(sobjlists)[sub]), x = "DefineTypes") + 143 | theme(panel.background = element_blank(), axis.line = element_line(colour = "black"),plot.title = element_text(hjust = 0.5), 144 | axis.text.x=element_text(size=12,angle=45,vjust = 1,hjust = 1,color="black"), 145 | axis.text.y=element_text(size=9,color="black"))+ 146 | ggpubr::stat_compare_means(method = "anova",data = sobjlists) # Add pairwise comparisons p-value 147 | return(p1) 148 | }) 149 | 150 | 151 | pdf(paste0(OutPath,'Five.CAFsub_fibroblast_VlnPlot.pdf'),width = 25,height = 5) 152 | cowplot::plot_grid(plotlist = pp,ncol = 5) 153 | dev.off() 154 | 155 | ### FeaturePlot 156 | pdf(paste0(WorkPath,'5CAFsub_fibroblast_FeaturePlot.pdf'),25,5) 157 | FeaturePlot(HNSCC_fibro,reduction = 'umap',feature = c('pan_myCAF','pan_dCAF','pan_iCAF',"pan_iCAF_2","pan_pCAF"),cols = c("gray","orange","red"),ncol = 5) 158 | dev.off() 159 | 160 | 161 | ### Vln 162 | pdf(paste0(WorkPath,'5CAFsub_fibroblast_VlnPlot.pdf'),25,5) 163 | VlnPlot(HNSCC_fibro,feature = c('pan_myCAF','pan_dCAF','pan_iCAF',"pan_iCAF_2","pan_pCAF"),cols = c('#A8373D','#F1B998','#43739F','#374D74','#947A7B','#91553D','#D48054','#E09194'),pt.size = 0,ncol = 5) 164 | dev.off() 165 | 166 | 167 | ### iCAF, myCAF, ApCAF, M1 or M2 macrophages 168 | CAF3s <- openxlsx::read.xlsx("/work/brj/Collaboration/2022/scRNA/HNSCC/Revise/Data/mouseCAF.xlsx",sheet = "Sheet1") 169 | Homo <- read.csv("/server1_work/yye/Project/Data/Public/Reference/mouse.human.homology.genes.csv")[,c("mgi_symbol","hgnc_symbol")] %>% unique 170 | iCAFs <- CAF3s[,1] %>% as.data.frame() %>% set_colnames("gene") 171 | iCAFs <- merge(iCAFs,Homo,by.x="gene",by.y="mgi_symbol") 172 | 173 | myCAFs <- CAF3s[,3] %>% as.data.frame() %>% set_colnames("gene") 174 | myCAFs <- merge(myCAFs,Homo,by.x="gene",by.y="mgi_symbol") 175 | 176 | ApCAFs <- CAF3s[,2] %>% as.data.frame() %>% set_colnames("gene") 177 | ApCAFs <- merge(ApCAFs,Homo,by.x="gene",by.y="mgi_symbol") 178 | 179 | iCAF <- iCAFs$hgnc_symbol 180 | myCAF <- myCAFs$hgnc_symbol 181 | ApCAF <- ApCAFs$hgnc_symbol 182 | 183 | 184 | HNSCC_fibro@meta.data$iCAF <- apply(HNSCC_fibro@assays$RNA@data[rownames(HNSCC_fibro@assays$RNA@data) %in% iCAF,],2,mean) 185 | HNSCC_fibro@meta.data$myCAF <- apply(HNSCC_fibro@assays$RNA@data[rownames(HNSCC_fibro@assays$RNA@data) %in% myCAF,],2,mean) 186 | HNSCC_fibro@meta.data$ApCAF <- apply(HNSCC_fibro@assays$RNA@data[rownames(HNSCC_fibro@assays$RNA@data) %in% ApCAF,],2,mean) 187 | 188 | 189 | 190 | ### FeaturePlot 191 | pdf(paste0(WorkPath,'3CAFsub_fibroblast_FeaturePlot_FromMouse.pdf'),16.5,5) 192 | FeaturePlot(HNSCC_fibro,reduction = 'umap',feature = c('iCAF','myCAF','ApCAF'),order = T,cols = c("gray","orange","red"),ncol = 3) 193 | dev.off() 194 | 195 | ### Vln 196 | pdf(paste0(WorkPath,'3CAFsub_fibroblast_VlnPlot_FromMouse.pdf'),15,6) 197 | VlnPlot(HNSCC_fibro,feature = c('iCAF','myCAF','ApCAF'),cols = c('#A8373D','#F1B998','#43739F','#374D74','#947A7B','#91553D','#D48054','#E09194'),pt.size = 0,ncol = 3) 198 | dev.off() 199 | 200 | ## CANCER DISGOVERY marker 201 | MHCII <- grep("HLA-DR|HLA-DQ|HLA-DP",rownames(HNSCC_fibro@assays$RNA@data),value = T) 202 | 203 | iCAFh <- c("IL6","PDGFRA","CXCL12","CFD","DPT","LMNA","AGTR","HAS1","CXCL1","CXCL2","CCL2","IL8") 204 | myCAFh <- c("ACTA2","TAGLN","MMP11","MYL9","HOPX","POSTN","TPM1","TPM2") 205 | apCAFh <- c("CD74","HLA-DRA","HLA-DPA1","HLA-DQA1","SLPI") 206 | 207 | 208 | HNSCC_fibro@meta.data$iCAF_Human <- apply(HNSCC_fibro@assays$RNA@data[rownames(HNSCC_fibro@assays$RNA@data) %in% iCAFh,],2,mean) 209 | HNSCC_fibro@meta.data$myCAF_Human <- apply(HNSCC_fibro@assays$RNA@data[rownames(HNSCC_fibro@assays$RNA@data) %in% myCAFh,],2,mean) 210 | HNSCC_fibro@meta.data$ApCAF_Human <- apply(HNSCC_fibro@assays$RNA@data[rownames(HNSCC_fibro@assays$RNA@data) %in% apCAFh,],2,mean) 211 | 212 | # 1.dotplot ======================================================= 213 | PuramCAFdf <- as.data.frame(HNSCC_fibro@meta.data) %>% 214 | dplyr::select(c(DefineTypes,iCAF_Human,myCAF_Human,ApCAF_Human)) 215 | #xorder <- names(PuramCAFdf)[-1] 216 | #PuramCAFdf[,xorder] <- t(apply(PuramCAFdf[,xorder],1,scale)) 217 | PuramCAF <- PuramCAFdf %>% tidyr::gather(key = CellTypes, value = Score,-DefineTypes) 218 | p1 <- ggplot(PuramCAF,aes(x=DefineTypes,y=CellTypes))+ 219 | geom_point(aes(color=Score))+ 220 | scale_color_gradientn(colors= rev(colorRampPalette(RColorBrewer::brewer.pal(11, "Spectral"),space="rgb")(50))) + 221 | theme(panel.background=element_rect(color="black",fill=NA),legend.key = element_blank(),axis.ticks=element_blank(), 222 | panel.grid=element_blank(),axis.text.x=element_text(size=12,angle=45,vjust = 1,hjust = 1,color="black"), 223 | axis.text.y=element_text(size=10,color="black"),axis.title=element_blank(),legend.position = "bottom")+ 224 | geom_tile(data=PuramCAF,aes(x=DefineTypes,y=CellTypes),fill=NA,color="lightgray") 225 | 226 | pdf(paste0(WorkPath,'ThreeHuman.CAFsub_DotPlot.pdf'),5,4) 227 | p1 228 | dev.off() 229 | 230 | 231 | # 2.Vlnplot with p ======================================================= 232 | sobjlists = FetchData(object = HNSCC_fibro, vars = names(PuramCAFdf)) 233 | pp <- lapply(c(2:length(names(PuramCAFdf))), function(sub){ 234 | p1 <- ggplot(sobjlists,aes(x= DefineTypes, y = sobjlists[,sub])) + 235 | geom_violin(aes(fill=DefineTypes),alpha=0.3) + 236 | scale_fill_manual(values = Fib_color_panel,guide = F)+ 237 | geom_boxplot(aes(fill=DefineTypes),width=0.1,alpha=1,outlier.colour = NA) + 238 | labs(y= as.character(names(sobjlists)[sub]), x = "DefineTypes") + 239 | theme(panel.background = element_blank(), axis.line = element_line(colour = "black"),plot.title = element_text(hjust = 0.5), 240 | axis.text.x=element_text(size=12,angle=45,vjust = 1,hjust = 1,color="black"), 241 | axis.text.y=element_text(size=9,color="black"))+ 242 | ggpubr::stat_compare_means(method = "anova",data = sobjlists) # Add pairwise comparisons p-value 243 | return(p1) 244 | }) 245 | pdf(paste0(OutPath,'Three.CAFsub_fibroblast_VlnPlot.pdf'),width =15,height = 5) 246 | cowplot::plot_grid(plotlist = pp,ncol = 3) 247 | dev.off() 248 | 249 | 250 | 251 | ### FeaturePlot 252 | pdf(paste0(WorkPath,'3CAFsub_fibroblast_FeaturePlot.pdf'),15,5) 253 | FeaturePlot(HNSCC_fibro,reduction = 'umap',feature = c('iCAF_Human','myCAF_Human','ApCAF_Human'),order = T,cols = c("gray","orange","red"),ncol = 3) 254 | dev.off() 255 | 256 | ### Vln 257 | pdf(paste0(WorkPath,'3CAFsub_fibroblast_VlnPlot.pdf'),15,5) 258 | VlnPlot(HNSCC_fibro,feature = c('iCAF_Human','myCAF_Human','ApCAF_Human'),cols = c('#A8373D','#F1B998','#43739F','#374D74','#947A7B','#91553D','#D48054','#E09194'),pt.size = 0,ncol = 3) 259 | dev.off() 260 | 261 | ### Macrophages 262 | HNSCC_myeloids <- readRDS('/work/smy/Project/HNSCC/2.data/DeinfeTypes/26Sample_Myeloid_DefineTypesFinal.rds') 263 | HNSCC_myeloid <- subset(HNSCC_myeloids,subset = DefineTypes %in% c("CXCL10+ macrophages","C1QC+MRC-macrophages","SPP1+ macorphages","FOLR2+ macorphages") ) 264 | HNSCC_myeloid$DefineTypes <- factor(HNSCC_myeloid$DefineTypes,levels = c("CXCL10+ macrophages","C1QC+MRC-macrophages","SPP1+ macorphages","FOLR2+ macorphages")) 265 | Idents(HNSCC_myeloid) <- HNSCC_myeloid$DefineTypes 266 | 267 | pdf(paste0(WorkPath,'Dimplot_Myeloid.pdf'),7,4) 268 | DimPlot(HNSCC_myeloid,reduction = 'umap',cols = c('#79545C','#FAD181','#483D4D','#A44E41')) 269 | dev.off() 270 | 271 | M1 <- c("IL23","TNF","CXCL9","CXCL10","CXCL11","CD86","IL1A","IL1B","IL6","CCL5","IRF5","IRF1","CD40","IDO1","KYNU","CCR7") 272 | M2 <- c("IL4R","CCL4","CCL13","CCL20","CCL17","CCL18","CCL22","CCL24","LYVE1","VEGFA","VEGFB","VEGFC","VEGFD","EGF","CTSA","CTSB","CTSC","CTSD","TGFB1","TGFB2","TGFB3","MMP14","MMP19","MMP9","CLEC7A","WNT7B","FASL","TNFSF12","TNFSF8","CD276","VTCN1","MSR1","FN1","IRF4") 273 | 274 | HNSCC_myeloid@meta.data$M1 <- apply(HNSCC_myeloid@assays$RNA@data[rownames(HNSCC_myeloid@assays$RNA@data) %in% M1,],2,mean) 275 | HNSCC_myeloid@meta.data$M2 <- apply(HNSCC_myeloid@assays$RNA@data[rownames(HNSCC_myeloid@assays$RNA@data) %in% M2,],2,mean) 276 | HNSCC_myeloid@meta.data$M1_divide_M2 <- as.numeric(HNSCC_myeloid@meta.data$M1/HNSCC_myeloid@meta.data$M2) 277 | 278 | 279 | 280 | marcotypes <- grep("macro|macor",HNSCC_myeloid@meta.data$DefineTypes,value = T) 281 | HNSCC_marco <- subset(HNSCC_myeloid, subset = DefineTypes %in% marcotypes ) 282 | 283 | MacroDF <- HNSCC_marco@meta.data %>% as.data.frame() 284 | 285 | ### save 286 | readr::write_rds(MacroDF,"/work/brj/Collaboration/2022/scRNA/HNSCC/Revise/Data/Signature/MacScore.rds") 287 | 288 | 289 | library(harmony) 290 | HNSCC_marco <- HNSCC_marco %>% 291 | RunHarmony("orig.ident", plot_convergence = TRUE) 292 | harmony_embeddings <- Embeddings(HNSCC_marco, 'harmony') 293 | HNSCC_marco <- HNSCC_marco %>% 294 | RunUMAP(reduction = "harmony", dims = 1:30) %>% 295 | FindNeighbors(reduction = "harmony", dims = 1:30) 296 | 297 | pdf(paste0(WorkPath,'Dimplot_Macro.pdf'),7,4) 298 | DimPlot(HNSCC_marco,reduction = 'umap',cols = c('#79545C','#FAD181','#483D4D','#A44E41')) 299 | dev.off() 300 | 301 | FeaturePlot(HNSCC_marco,reduction = 'umap',feature = c( 'CXCL10','CXCL9','C1QC','MRC1','SPP1','CD68','FOLR2','CCL18'),cols = c("gray","orange","red"),ncol = 2) 302 | 303 | 304 | 305 | ### FeaturePlot 306 | pdf(paste0(WorkPath,'M1M2_Macro_FeaturePlot.pdf'),11,5) 307 | FeaturePlot(HNSCC_marco,reduction = 'umap',feature = c('M1','M2'),cols = c("gray","orange","red"),ncol = 2) 308 | dev.off() 309 | 310 | 311 | ### Vln 312 | pdf(paste0(WorkPath,'M1M2_Macro_VlnPlot.pdf'),10,6) 313 | VlnPlot(HNSCC_marco,feature = c('M1','M2'),cols =c('#79545C','#FAD181','#483D4D','#A44E41'),pt.size = 0,ncol =2) 314 | dev.off() 315 | 316 | ###M1,M2的Density 317 | data = HNSCC_marco@meta.data[,c("DefineTypes","M1","M2")] 318 | PP <- lapply(unique(marcotypes), function(marco){ 319 | dataFilter2 <- data %>% dplyr::filter(M1 != 0 & M2 != 0 & DefineTypes== as.character(marco)) 320 | M1M2out <-ggplot(dataFilter2,aes(x=M1,y=M2)) + 321 | geom_point(alpha=0)+stat_density_2d(aes(fill = ..density..), geom = "raster", contour = FALSE) +xlim(0,1)+ylim(0,3)+ 322 | scale_fill_distiller(palette="YlOrRd", direction=1) + 323 | theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 324 | panel.background = element_blank(), axis.line = element_line(colour = "black"), 325 | axis.ticks = element_blank(), axis.text.y = element_blank(), axis.text.x = element_blank(),# x,y刻度去掉 326 | axis.line.x = element_blank(),axis.line.y = element_blank()) # x, y 轴线去掉 327 | }) 328 | pdf(paste0(WorkPath,'M1M2_Density.pdf'),20,4) 329 | cowplot::plot_grid(plotlist = PP,labels = unique(marcotypes),rel_widths = c(5,5),ncol = 4) 330 | dev.off() 331 | 332 | ###M1,M2的heatmap 333 | data = HNSCC_marco@meta.data[,c("DefineTypes","M1","M2","M1_divide_M2")] 334 | data <- data %>% dplyr::filter(!is.infinite(M1_divide_M2)&!is.na(M1_divide_M2)) 335 | DefineTypes = unique(data$DefineTypes) 336 | test = t(as.data.frame(c("SPP1+ HNSCC_marcophages",colMeans(data[data$DefineTypes=="SPP1+ HNSCC_marcophages",2:4])))) 337 | 338 | for (i in DefineTypes) { 339 | test1 = t(as.data.frame(c(i,colMeans(data[data$DefineTypes==i,2:4])))) 340 | test = rbind(test,test1) 341 | } 342 | colMeans(data[,2:4]) 343 | test=test[-1,] 344 | test <- as.data.frame(test) 345 | test[,c(2:4)] <- apply(test[,c(2:4)],2,as.numeric) 346 | colnames(test)=c("DefineTypes","M1","M2","M1_divide_M2") 347 | rownames(test) <- test$DefineTypes 348 | plotdf <- test[,-1] 349 | plotdfe <- plotdf[,-3] 350 | # 按mac order and out first 351 | library(pheatmap) 352 | p1 <- pheatmap(plotdfe,scale = "column", 353 | cluster_rows = F, 354 | show_rownames = T, 355 | cluster_cols = F, 356 | color = colorRampPalette(RColorBrewer::brewer.pal(9, "OrRd"))(50)) 357 | 358 | pdf(paste0(WorkPath,"M1_M2Heatmap.pdf"),width = 4,height = 3) 359 | print(p1) 360 | dev.off() 361 | 362 | # 1.dotplot ======================================================= 363 | Macrophagedf <- as.data.frame(HNSCC_myeloid@meta.data) %>% 364 | dplyr::select(c(DefineTypes,M1,M2)) 365 | 366 | #xorder <- names(Macrophagedf)[-1] 367 | #Macrophagedf[,xorder] <- t(apply(Macrophagedf[,xorder],1,scale)) 368 | Macrophage <- Macrophagedf %>% tidyr::gather(key = CellTypes, value = Score,-DefineTypes) 369 | p1 <- ggplot(Macrophage,aes(x=DefineTypes,y=CellTypes))+ 370 | geom_point(aes(color=Score))+ 371 | scale_color_gradientn(colors= rev(colorRampPalette(RColorBrewer::brewer.pal(11, "Spectral"),space="rgb")(50)))+ 372 | theme(panel.background=element_rect(color="black",fill=NA),legend.key = element_blank(),axis.ticks=element_blank(), 373 | panel.grid=element_blank(),axis.text.x=element_text(size=12,angle=45,vjust = 1,hjust = 1,color="black"), 374 | axis.text.y=element_text(size=10,color="black"),axis.title=element_blank(),legend.position = "bottom")+ 375 | geom_tile(data=Macrophage,aes(x=DefineTypes,y=CellTypes),fill=NA,color="lightgray") 376 | 377 | pdf(paste0(WorkPath,'M1M2_DotPlot.pdf'),5,4) 378 | p1 379 | dev.off() 380 | 381 | 382 | # 2.Vlnplot with p ======================================================= 383 | sobjlists = FetchData(object = HNSCC_myeloid, vars = names(Macrophagedf)) 384 | 385 | pp <- lapply(c(2:length(names(Macrophagedf))), function(sub){ 386 | p1 <- ggplot(sobjlists,aes(x= DefineTypes, y = sobjlists[,sub])) + 387 | geom_violin(aes(fill=DefineTypes), alpha=0.3 ) + 388 | scale_fill_manual(limits = unique(sobjlists$DefineTypes), 389 | values = c('#79545C','#FAD181','#483D4D','#A44E41'),guide = F)+ 390 | geom_boxplot(aes(fill=DefineTypes),width=0.1, alpha=1,outlier.colour = NA) + # 不要离群值 391 | labs(y= as.character(names(sobjlists)[sub]), x = "DefineTypes") + 392 | theme(panel.background = element_blank(), axis.line = element_line(colour = "black"),plot.title = element_text(hjust = 0.5), 393 | axis.text.x=element_text(size=12,angle=45,vjust = 1,hjust = 1,color="black"), 394 | axis.text.y=element_text(size=9,color="black"))+ 395 | ggpubr::stat_compare_means(method = "anova",data = sobjlists) # Add pairwise comparisons p-value 396 | return(p1) 397 | }) 398 | 399 | pdf(paste0(WorkPath,'M1M2_VlnPlot.pdf'),width = 5,height = 5) 400 | cowplot::plot_grid(plotlist = pp,ncol = 2) 401 | dev.off() 402 | 403 | 404 | 405 | ### epi 406 | HNSCC_epi_tumor <- readRDS('/work/smy/Project/HNSCC_26sample/2.data/26sample_tumorcells_Harmony_immuneReference_All.rds') 407 | Idents(HNSCC_epi_tumor) <- HNSCC_epi_tumor$RNA_snn_res_final 408 | 409 | 410 | TumorSignature <- openxlsx::read.xlsx("/work/brj/Collaboration/2022/scRNA/HNSCC/Revise/Data/Puram.Tumor.xlsx") 411 | 412 | Puram.Cell.Cycle <- TumorSignature$Cell.Cycle 413 | Puram.p.EMT <- TumorSignature$`p-EMT` 414 | Puram.Epi.Dif1 <- TumorSignature$Epi.dif..1 415 | Puram.Epi.Dif2 <- TumorSignature$Epi.dif..2 416 | Puram.Stress <- TumorSignature$Stress 417 | Puram.Hypoxia <- TumorSignature$Hypoxia 418 | 419 | 420 | HNSCC_epi_tumor@meta.data$Puram.Cell.Cycle <- apply(HNSCC_epi_tumor@assays$RNA@data[rownames(HNSCC_epi_tumor@assays$RNA@data) %in% Puram.Cell.Cycle,],2,mean) 421 | HNSCC_epi_tumor@meta.data$Puram.p.EMT <- apply(HNSCC_epi_tumor@assays$RNA@data[rownames(HNSCC_epi_tumor@assays$RNA@data) %in% Puram.p.EMT,],2,mean) 422 | HNSCC_epi_tumor@meta.data$Puram.Epi.Dif1 <- apply(HNSCC_epi_tumor@assays$RNA@data[rownames(HNSCC_epi_tumor@assays$RNA@data) %in% Puram.Epi.Dif1,],2,mean) 423 | HNSCC_epi_tumor@meta.data$Puram.Epi.Dif2 <- apply(HNSCC_epi_tumor@assays$RNA@data[rownames(HNSCC_epi_tumor@assays$RNA@data) %in% Puram.Epi.Dif2,],2,mean) 424 | HNSCC_epi_tumor@meta.data$Puram.Stress <- apply(HNSCC_epi_tumor@assays$RNA@data[rownames(HNSCC_epi_tumor@assays$RNA@data) %in% Puram.Stress,],2,mean) 425 | HNSCC_epi_tumor@meta.data$Puram.Hypoxia <- apply(HNSCC_epi_tumor@assays$RNA@data[rownames(HNSCC_epi_tumor@assays$RNA@data) %in% Puram.Hypoxia,],2,mean) 426 | 427 | TumorDF <- HNSCC_epi_tumor@meta.data %>% as.data.frame() 428 | ### save 429 | readr::write_rds(TumorDF,"/work/brj/Collaboration/2022/scRNA/HNSCC/Revise/Data/Signature/TumorScore.rds") 430 | 431 | 432 | pdf(paste0(WorkPath,'Tumor_DimPlot.pdf'),5,5) 433 | DimPlot(HNSCC_epi_tumor,cols = RColorBrewer::brewer.pal(10,'Dark2')) 434 | dev.off() 435 | 436 | ### FeaturePlot 437 | pdf(paste0(WorkPath,'Puram_tumor_FeaturePlot.pdf'),30,5) 438 | FeaturePlot(HNSCC_epi_tumor,reduction = 'umap',feature = c('Puram.Cell.Cycle','Puram.p.EMT',"Puram.Epi.Dif1","Puram.Epi.Dif2","Puram.Stress","Puram.Hypoxia"),cols = c("gray","orange","red"),ncol = 6) 439 | dev.off() 440 | 441 | 442 | ### Vln 443 | pdf(paste0(WorkPath,'Puram_tumor_VlnPlot.pdf'),30,5) 444 | VlnPlot(HNSCC_epi_tumor,feature = c('Puram.Cell.Cycle','Puram.p.EMT',"Puram.Epi.Dif1","Puram.Epi.Dif2","Puram.Stress","Puram.Hypoxia"),cols = RColorBrewer::brewer.pal(10,'Dark2'),pt.size = 0,ncol = 6) 445 | dev.off() 446 | 447 | # 1.dotplot ======================================================= 448 | TumorSubdf <- as.data.frame(HNSCC_epi_tumor@meta.data) %>% 449 | dplyr::select(c(RNA_snn_res_final,Puram.Cell.Cycle,Puram.p.EMT,Puram.Epi.Dif1,Puram.Epi.Dif2,Puram.Stress,Puram.Hypoxia)) 450 | TumorSubdf$RNA_snn_res_final <- paste0("C",TumorSubdf$RNA_snn_res_final) 451 | xorder <- names(TumorSubdf)[-1] 452 | TumorSubdf[,xorder] <- t(apply(TumorSubdf[,xorder],1,scale)) 453 | #TumorSubdf[,xorder] <- apply(TumorSubdf[,xorder],2,scale) 454 | TumorSub <- TumorSubdf %>% tidyr::gather(key = CellTypes, value = Score,-RNA_snn_res_final) 455 | 456 | p1 <- ggplot(TumorSub,aes(x=RNA_snn_res_final,y=CellTypes))+ 457 | geom_point(aes(color=Score))+ 458 | scale_color_gradientn(colors= rev(colorRampPalette(RColorBrewer::brewer.pal(11, "Spectral"),space="rgb")(50)))+ 459 | theme(panel.background=element_rect(color="black",fill=NA),legend.key = element_blank(),axis.ticks=element_blank(), 460 | panel.grid=element_blank(),axis.text.x=element_text(size=12,angle=45,vjust = 1,hjust = 1,color="black"), 461 | axis.text.y=element_text(size=10,color="black"),axis.title=element_blank(),legend.position = "bottom")+ 462 | geom_tile(data=TumorSub,aes(x=RNA_snn_res_final,y=CellTypes),fill=NA,color="lightgray") 463 | 464 | pdf(paste0(WorkPath,'Puram.Tumor_DotPlot.pdf'),5,5) 465 | p1 466 | dev.off() 467 | 468 | # 2.Vlnplot with p ======================================================= 469 | sobjlists = FetchData(object = HNSCC_epi_tumor, vars = names(TumorSubdf)) 470 | sobjlists$RNA_snn_res_final <- paste0("C",sobjlists$RNA_snn_res_final) 471 | sobjlists$RNA_snn_res_final <- factor(sobjlists$RNA_snn_res_final,levels = c("C0","C1","C2","C3","C4")) 472 | 473 | pp <- lapply(c(2:length(names(TumorSubdf))), function(sub){ 474 | p1 <- ggplot(sobjlists,aes(x= RNA_snn_res_final, y = sobjlists[,sub])) + 475 | geom_violin(aes(fill=RNA_snn_res_final),alpha=0.3) + 476 | scale_fill_manual(limits = c("C0","C1","C2","C3","C4"), 477 | values = RColorBrewer::brewer.pal(10,'Dark2'),guide = F)+ 478 | geom_boxplot(aes(fill=RNA_snn_res_final),width=0.1, alpha=1,outlier.colour = NA) + 479 | labs(y= as.character(names(sobjlists)[sub]), x = "RNA_snn_res_final") + 480 | theme(panel.background = element_blank(), axis.line = element_line(colour = "black"),plot.title = element_text(hjust = 0.5), 481 | axis.text.x=element_text(size=12,angle=45,vjust = 1,hjust = 1,color="black"), 482 | axis.text.y=element_text(size=9,color="black"))+ 483 | ggpubr::stat_compare_means(method = "anova",data = sobjlists) # Add pairwise comparisons p-value 484 | return(p1) 485 | }) 486 | pdf(paste0(OutPath,'Puram.TumorVlnPlot.pdf'),width = 30,height = 5) 487 | cowplot::plot_grid(plotlist = pp,ncol = 6) 488 | dev.off() 489 | 490 | ### Heatmap 491 | TumorDF <- readr::read_rds("/work/brj/Collaboration/2022/scRNA/HNSCC/Revise/Data/Signature/TumorScore.rds") 492 | CandiCol <- c("Puram.Cell.Cycle","Puram.p.EMT","Puram.Epi.Dif1","Puram.Epi.Dif2","Puram.Stress","Puram.Hypoxia") 493 | 494 | data <- TumorDF %>% dplyr::mutate(DefineTypes = paste0("C",RNA_snn_res_final)) %>% 495 | dplyr::select(c(DefineTypes,CandiCol)) 496 | 497 | test <- t(as.data.frame(c("C0",colMeans(data[data$DefineTypes=="C0",2:ncol(data)])))) 498 | DefineTypes <- unique(data$DefineTypes) 499 | for (i in DefineTypes) { 500 | test1 <- t(as.data.frame(c(i,colMeans(data[data$DefineTypes==i,2:ncol(data)])))) 501 | test <- rbind(test,test1) 502 | } 503 | test <- test[-1,] 504 | test <- as.data.frame(test) 505 | 506 | test[,2:ncol(data)] <- apply(test[,c(2:ncol(data))],2,as.numeric) 507 | 508 | colnames(test)=c("DefineTypes",CandiCol) 509 | rownames(test) <- test$DefineTypes 510 | plotdf <- test[c("C0","C1","C2","C3","C4"),-1] 511 | plotdff <- t(plotdf) 512 | 513 | # 按mac order and out first 514 | library(pheatmap) 515 | p1 <- pheatmap(plotdff,scale = "row", 516 | cluster_rows = F, 517 | show_rownames = T, 518 | cluster_cols = F, 519 | color = colorRampPalette(RColorBrewer::brewer.pal(9, "OrRd"))(50)) 520 | 521 | pdf(paste0(OutPath,"Puram_Tumor_Heatmap.pdf"),width = 4,height = 2.5) 522 | print(p1) 523 | dev.off() 524 | 525 | 526 | -------------------------------------------------------------------------------- /15.Survival analysis.R: -------------------------------------------------------------------------------- 1 | ### survival 2 | rm(list=ls()) 3 | library(magrittr) 4 | library(maxstat) 5 | library(ggplot2) 6 | library(dplyr) 7 | library(DOSE) 8 | library(clusterProfiler) #GSEA 9 | library(fgsea) 10 | library(magrittr) 11 | library(tibble) 12 | library(RColorBrewer) 13 | options(stringsAsFactors = F) 14 | # Clinical 15 | ClinicalData <- read.delim("/data/public/TCGA_data/TCGA_preliminary/TCGA_ClinicalData_20180420.txt") 16 | ClinicalData$PFI.time <- as.numeric(as.character(ClinicalData$PFI.time)) 17 | ClinicalData$OS.time <- as.numeric(as.character(ClinicalData$OS.time)) 18 | 19 | OutPath <- "/work/brj/Collaboration/scRNA/2022/HNSCC/Revise/Fig/Survival/" 20 | 21 | #signature score 22 | GSVAscore <- readr::read_rds("/work/brj/Collaboration/scRNA/2022/HNSCC/Revise/Result/HNSC_Regulons.rds.gz") 23 | names(GSVAscore)[2] <- "Score" 24 | GSVAscore <- GSVAscore %>% dplyr::mutate( 25 | xcell = purrr::map(.x=Score,function(.x){ 26 | try( 27 | { 28 | #.x=Expr$expr[[1]] 29 | result=data.frame(geneset=rownames(.x),.x)#%>% 30 | },silent = F) 31 | })) 32 | 33 | colnames(GSVAscore)[1]='cancer_types' 34 | Candidate <- "RegulonsTFDP1" #RegulonsMYBL2 35 | #1.提取 目标基因 36 | GSVAscore %>% dplyr::mutate(Expr=purrr::map(.x=xcell,function(.x){ 37 | .x %>% as.data.frame() %>% dplyr::filter(geneset %in% Candidate) %>% tidyr::gather(key="barcode",value="Expression",-geneset)})) %>% 38 | dplyr::mutate(Expr = purrr::map(.x=Expr,function(.x){.x %>% 39 | dplyr::mutate(types = data.frame(do.call(rbind,strsplit(barcode,"-")))$X1, 40 | sample_class = data.frame(do.call(rbind,strsplit(barcode,"-")))$X2)})) -> Candidateexp # 3colnames(Gene,barcode()) 41 | 42 | #.x <- Candidateexp$Expr[[1]] %>% as.data.frame() 43 | #spls <- substr(.x$barcode,14,16) 44 | 45 | 46 | #2.目标基因分高低组 47 | Candidateexp <- Candidateexp %>% dplyr::mutate(Candidate_sur=purrr::map(.x=Expr,function(.x){ 48 | .x <- .x %>% dplyr::filter(substr(barcode,14,16) %in% c("01A")) %>% 49 | dplyr::mutate(barcode = gsub("\\.","-",substr(barcode,1,12))) %>% 50 | dplyr::inner_join(ClinicalData,by=c("barcode"="bcr_patient_barcode")) 51 | .x <- .x %>% dplyr::mutate(OS.time=as.numeric(OS.time)/30,PFI.time=as.numeric(PFI.time)/30) %>% dplyr::filter(!is.na(OS.time)) 52 | .x <- .x %>% 53 | dplyr::mutate(ExpressionGroup = ifelse(Expression > median(Expression),"High","Low"), 54 | OS = as.numeric(as.character(OS)), 55 | PFI = as.numeric(as.character(PFI))) 56 | aa_try <- try(aa <- maxstat.test(Surv(OS.time, OS) ~ Expression, data= .x,smethod="LogRank"),silent=TRUE) 57 | .x <- .x %>% dplyr::mutate(ExpressionGroupB = ifelse(Expression >aa_try$estimate,"High","Low")) 58 | return(.x) 59 | })) 60 | 61 | Candidateexp <- Candidateexp %>% dplyr::mutate(Candidate_HR=purrr::map(.x=Candidate_sur,function(.x){ 62 | model1 <- coxph(survival::Surv(OS.time, OS) ~ Expression, data=.x, na.action=na.exclude) 63 | HR = signif(as.numeric(summary(model1)$coefficients[1,c( "exp(coef)")]),digit=4) 64 | Coxp <- signif(as.numeric(summary(model1)$coefficients[1,c( "Pr(>|z|)")]),digit=4) 65 | HR_detail = summary(model1) 66 | CILow = HR_detail$conf.int[,"lower .95"] 67 | CIHigh = HR_detail$conf.int[,"upper .95"] 68 | model1 <- survival::survdiff(survival::Surv(OS.time, OS) ~ ExpressionGroup,data=.x, na.action=na.exclude) 69 | 70 | KMP <- 1-pchisq(model1$chisq, df=length(levels(factor(.x$ExpressionGroup)))-1) 71 | 72 | model2 <- survival::survdiff(survival::Surv(OS.time, OS) ~ ExpressionGroupB,data=.x, na.action=na.exclude) 73 | 74 | KMP2 <- 1-pchisq(model2$chisq, df=length(levels(factor(.x$ExpressionGroupB)))-1) 75 | 76 | print(nrow(.x)) 77 | data.frame(cancertypes=unique(.x$type),HR, Coxp,CILow,CIHigh,KMP,BestKMP=KMP2) 78 | data.frame(cancertypes=unique(.x$type),HR, Coxp,CILow,CIHigh,KMP) 79 | })) 80 | 81 | Candidateexp <- Candidateexp %>% dplyr::mutate(Candidate_plot=purrr::map(.x= Candidate_sur,function(.x){ 82 | model1 <- survival::coxph(survival::Surv(OS.time, OS) ~ Expression, data=.x, na.action=na.exclude) 83 | HR = signif(as.numeric(summary(model1)$coefficients[1,c( "exp(coef)")]),digit=4) 84 | Coxp <- signif(as.numeric(summary(model1)$coefficients[1,c( "Pr(>|z|)")]),digit=4) 85 | HR_detail = summary(model1) 86 | CI = paste(signif(HR_detail$conf.int[,"lower .95"],2),'-',signif(HR_detail$conf.int[,"upper .95"],2),sep=" ") #Result[i1, c('CI_95%_for_HR')] 87 | fit<- survival::survfit(survival::Surv(OS.time, OS) ~ ExpressionGroup,data=.x) 88 | model1 <- survival::survdiff(survival::Surv(OS.time, OS) ~ ExpressionGroup,data=.x, na.action=na.exclude) 89 | 90 | KMP <- 1-pchisq(model1$chisq, df=length(levels(factor(.x$ExpressionGroup)))-1) 91 | 92 | p <- survminer::ggsurvplot(fit,palette = c("red", "blue"), 93 | # risk.table = TRUE, risk.table.col = "strata", 94 | pval = F, title=paste(unique(.x$type),": p = ",signif(as.numeric(KMP),digits = 2), 95 | "\nHR ",HR, " [95% CI:",CI,"]",sep=""), 96 | fun = function(y) y*100,legend = c(0.2, 0.2), legend.title=F,font.legend=10,font.title=10, 97 | legend.labs = c(paste("High (n=",table(.x$ExpressionGroup)[[1]],")",sep=""), 98 | paste("Low (n=",table(.x$ExpressionGroup)[[2]],")",sep=""))) 99 | p$plot <- p$plot+ labs(x="Months") + theme(legend.title = element_blank(),legend.background = element_blank()) 100 | return(p$plot) 101 | })) 102 | 103 | Candidateexp <- Candidateexp %>% dplyr::mutate(Candidate_plotBestCurv=purrr::map(.x=Candidate_sur,function(.x){ 104 | model1 <- survival::coxph(survival::Surv(OS.time, OS) ~ Expression, data=.x, na.action=na.exclude) 105 | HR = signif(as.numeric(summary(model1)$coefficients[1,c( "exp(coef)")]),digit=4) 106 | Coxp <- signif(as.numeric(summary(model1)$coefficients[1,c( "Pr(>|z|)")]),digit=4) 107 | HR_detail = summary(model1) 108 | CI = paste(signif(HR_detail$conf.int[,"lower .95"],2),'-',signif(HR_detail$conf.int[,"upper .95"],2),sep=" ") #Result[i1, c('CI_95%_for_HR')] 109 | fit<- survival::survfit(survival::Surv(OS.time, OS) ~ ExpressionGroupB,data=.x) 110 | model1 <- survival::survdiff(survival::Surv(OS.time, OS) ~ ExpressionGroupB,data=.x, na.action=na.exclude) 111 | 112 | KMP <- 1-pchisq(model1$chisq, df=length(levels(factor(.x$ExpressionGroupB)))-1) 113 | 114 | p <- survminer::ggsurvplot(fit,palette = c("red", "blue"), 115 | # risk.table = TRUE, risk.table.col = "strata", 116 | pval = F, title=paste(unique(.x$type),": p = ",signif(as.numeric(KMP),digits = 2), 117 | "\nHR ",HR, " [95% CI:",CI,"]",sep=""), 118 | fun = function(y) y*100,legend = c(0.2, 0.2), legend.title=F,font.legend=10,font.title=10, 119 | legend.labs = c(paste("High (n=",table(.x$ExpressionGroupB)[[1]],")",sep=""), 120 | paste("Low (n=",table(.x$ExpressionGroupB)[[2]],")",sep=""))) 121 | p$plot <- p$plot + labs(x="Months") + theme(legend.title = element_blank(),legend.background = element_blank()) 122 | return(p$plot) 123 | })) 124 | 125 | 126 | pdf(paste(OutPath,Candidate,"_OS_Survival.pdf",sep=""),width = 5,height = 5) 127 | pp <- cowplot::plot_grid(plotlist = Candidateexp$Candidate_plot,labels = Candidate) 128 | print(pp) 129 | dev.off() 130 | 131 | pdf(paste(OutPath,Candidate,"BC_OS_Survival.pdf",sep=""),width = 5,height = 5) 132 | pp <- cowplot::plot_grid(plotlist = Candidateexp$Candidate_plotBestCurv,labels= Candidate) 133 | print(pp) 134 | dev.off() 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | -------------------------------------------------------------------------------- /2.Epithelial cells features.R: -------------------------------------------------------------------------------- 1 | # Epithelial features and function enrichment and regulon construction for each malignant cell type 2 | HNSCC_epi_tumor <- readRDS('/work/smy/Project/HNSCC/2.data/26sample_tumorcells_Harmony_immuneReference_All.rds') 3 | Idents(HNSCC_epi_tumor) <- HNSCC_epi_tumor$stage 4 | p1 <- DimPlot(HNSCC_epi_tumor,cols = .cluster_cols) 5 | Idents(HNSCC_epi_tumor) <- HNSCC_epi_tumor$copykat 6 | p2 <- DimPlot(HNSCC_epi_tumor,cols = RColorBrewer::brewer.pal(10,'Dark2')[c(8,7)]) 7 | Idents(HNSCC_epi_tumor) <- HNSCC_epi_tumor$RNA_snn_res_final 8 | p3 <- DimPlot(HNSCC_epi_tumor,cols = RColorBrewer::brewer.pal(10,'Dark2')) 9 | ### Fig2D 10 | TumorSub_proportion <- read.delim('/work/smy/Project/HNSCC_26sample/2.data/CIBERSORTx_Results_TumorSubcluster.txt') 11 | Tumorsub <- TumorSub_proportion %>% dplyr::select(c(Mixture,cluster_0,cluster_1,cluster_2,cluster_3,cluster_4)) 12 | HNSCC_proportion <- Tumorsub 13 | HNSCC_proportion <- HNSCC_proportion[substr(HNSCC_proportion$Mixture,14,16) == "01A",] 14 | ClinicalData <- read.delim("/server1_work/brj/GEO/CRC/multiCox/TCGA_ClinicalData_20180420.txt") 15 | ClinicalData$OS <- as.numeric(as.character(ClinicalData$OS)) 16 | ClinicalData$OS.time <- as.numeric(as.character(ClinicalData$OS.time)) 17 | HNSCC_proportion$Mixture <- substr(HNSCC_proportion$Mixture,1,12) 18 | HNSCC_proportion$Mixture <- gsub('[.]','-',HNSCC_proportion$Mixture) 19 | HNSCC_proportion[,c(7,8)] <- ClinicalData[match(HNSCC_proportion$Mixture,ClinicalData$bcr_patient_barcode),c('OS','OS.time')] 20 | HNSCC_proportion <- HNSCC_proportion %>% dplyr::mutate(OS.time = OS.time/30) %>% dplyr::filter(!is.na(OS.time)) 21 | openxlsx::write.xlsx(HNSCC_proportion,file=paste(OutPath,"/",folder,"/Fig2D.xlsx",sep="")) 22 | ### Fig2E 23 | source("/server1_work/yye/AliRstudio/Data/Public/ToolsData/ulcerative_colitis/scores.r") 24 | HNSCC_epi_tumor <- readRDS('/work/smy/Project/HNSCC_26sample/2.data/26sample_tumorcells_Harmony_immuneReference_All.rds') 25 | HMterms <- gmtPathways("/work/smy/Public/data/GeneList/h.all.v6.1.symbols.gmt") 26 | 27 | HMscores <- score_cells(seur=HNSCC_epi_tumor, names=HMterms, combine_genes='mean', groups=NULL, group_stat='mean', cells.use=NULL) 28 | HMscoresMatrix <- as.matrix(HMscores) 29 | HMscoresMatrix <- t(HMscoresMatrix ) 30 | HMscoresMatrix <- apply(HMscoresMatrix,2,function(x)signif(x,digits = 3)) 31 | #rownames(HMscoresMatrix ) <- names(nHMPathways) 32 | colnames(HMscoresMatrix) <- rownames(HNSCC_epi_tumor@meta.data) 33 | HMscoresMatrixSeurat <- CreateSeuratObject(counts = HMscoresMatrix ) 34 | HMscoresMatrixSeurat@meta.data <- HNSCC_epi_tumor@meta.data 35 | HNSCC_epi_tumor@assays$HM <- HMscoresMatrixSeurat@assays$RNA 36 | DefaultAssay(HNSCC_epi_tumor) <- "HM" 37 | Idents(HNSCC_epi_tumor) <- "RNA_snn_res_final" 38 | HNSCC_epi_tumor_HM_FeatureDiff <- FindAllMarkers(HNSCC_epi_tumor,min.pct=0.1,logfc.threshold = 0.05,pseudocount.use = 0.1,only.pos = T) 39 | topN <- 5 40 | HNSCC_epi_tumor_HM_FeatureDiff$cluster <- as.character(HNSCC_epi_tumor_HM_FeatureDiff$cluster) 41 | HNSCC_epi_tumor_HM_FeatureDiff_Order <- HNSCC_epi_tumor_HM_FeatureDiff %>% dplyr::arrange(cluster,desc(avg_log2FC)) %>% dplyr::group_by(cluster) %>% dplyr::top_n(topN,avg_log2FC) # 42 | HNSCC_epi_tumor_HM_FeatureDiff_Order$gene <- gsub("[.]"," ",HNSCC_epi_tumor_HM_FeatureDiff_Order$gene) 43 | HNSCC_epi_tumor_HM_FeatureDiff_Order$gene <- gsub(" "," ",HNSCC_epi_tumor_HM_FeatureDiff_Order$gene) 44 | TT <- data.frame(Subtypes=rep(unique(HNSCC_epi_tumor_HM_FeatureDiff_Order$cluster),times=length(unique(HNSCC_epi_tumor_HM_FeatureDiff_Order$gene))), 45 | gene=rep(unique(HNSCC_epi_tumor_HM_FeatureDiff_Order$gene),each=length(unique(HNSCC_epi_tumor_HM_FeatureDiff_Order$cluster)))) 46 | #图片处理 美观 47 | HNSCC_epi_tumor_HM_FeatureDiff_OrderF <- HNSCC_epi_tumor_HM_FeatureDiff_Order %>% dplyr::select(gene,cluster,avg_log2FC) %>% 48 | tidyr::spread(cluster,avg_log2FC,fill=0) 49 | HNSCC_epi_tumor_HM_FeatureDiff_OrderF <- HNSCC_epi_tumor_HM_FeatureDiff_OrderF[do.call(order,HNSCC_epi_tumor_HM_FeatureDiff_OrderF[,2:ncol(HNSCC_epi_tumor_HM_FeatureDiff_OrderF)]),] 50 | HNSCC_epi_tumor_HM_FeatureDiff_Order$cluster <- paste0("C",HNSCC_epi_tumor_HM_FeatureDiff_Order$cluster) 51 | openxlsx::write.xlsx(HNSCC_epi_tumor_HM_FeatureDiff_Order,file=paste(OutPath,"/",folder,"/Fig2E.xlsx",sep="")) 52 | 53 | ### Fig2F 54 | MBterms <- gmtPathways("/work/smy/Public/data/GeneList/KEGG_metabolism.gmt") 55 | MBscores <- score_cells(seur=HNSCC_epi_tumor, names=MBterms, combine_genes='mean', groups=NULL, group_stat='mean', cells.use=NULL) 56 | MBscoresMatrix <- as.matrix(MBscores) 57 | MBscoresMatrix <- t(MBscoresMatrix ) 58 | MBscoresMatrix <- apply(MBscoresMatrix,2,function(x)signif(x,digits = 3)) 59 | #rownames(MBscoresMatrix ) <- names(nMBPathways) 60 | colnames(MBscoresMatrix) <- rownames(HNSCC_epi_tumor@meta.data) 61 | MBscoresMatrixSeurat <- CreateSeuratObject(counts = MBscoresMatrix ) 62 | MBscoresMatrixSeurat@meta.data <- HNSCC_epi_tumor@meta.data 63 | HNSCC_epi_tumor@assays$MB <- MBscoresMatrixSeurat@assays$RNA 64 | DefaultAssay(HNSCC_epi_tumor) <- "MB" 65 | Idents(HNSCC_epi_tumor) <- "RNA_snn_res_final" 66 | HNSCC_epi_tumor_MB_FeatureDiff <- FindAllMarkers(HNSCC_epi_tumor,min.pct=0.1,logfc.threshold = 0.05,pseudocount.use = 0.1,only.pos = T) 67 | topN <- 6 68 | HNSCC_epi_tumor_MB_FeatureDiff$cluster <- as.character(HNSCC_epi_tumor_MB_FeatureDiff$cluster) 69 | HNSCC_epi_tumor_MB_FeatureDiff_Order <- HNSCC_epi_tumor_MB_FeatureDiff %>% dplyr::arrange(cluster,desc(avg_log2FC)) %>% dplyr::group_by(cluster) %>% dplyr::top_n(topN,avg_log2FC) # 70 | HNSCC_epi_tumor_MB_FeatureDiff_Order$gene <- gsub("[.]"," ",HNSCC_epi_tumor_MB_FeatureDiff_Order$gene) 71 | HNSCC_epi_tumor_MB_FeatureDiff_Order$gene <- gsub(" "," ",HNSCC_epi_tumor_MB_FeatureDiff_Order$gene) 72 | TT <- data.frame(Subtypes=rep(unique(HNSCC_epi_tumor_MB_FeatureDiff_Order$cluster),times=length(unique(HNSCC_epi_tumor_MB_FeatureDiff_Order$gene))), 73 | gene=rep(unique(HNSCC_epi_tumor_MB_FeatureDiff_Order$gene),each=length(unique(HNSCC_epi_tumor_MB_FeatureDiff_Order$cluster)))) 74 | HNSCC_epi_tumor_MB_FeatureDiff_OrderF <- HNSCC_epi_tumor_MB_FeatureDiff_Order %>% dplyr::select(gene,cluster,avg_log2FC) %>% 75 | tidyr::spread(cluster,avg_log2FC,fill=0) 76 | HNSCC_epi_tumor_MB_FeatureDiff_OrderF <- HNSCC_epi_tumor_MB_FeatureDiff_OrderF[do.call(order,HNSCC_epi_tumor_MB_FeatureDiff_OrderF[,2:ncol(HNSCC_epi_tumor_MB_FeatureDiff_OrderF)]),] 77 | 78 | ggplot(HNSCC_epi_tumor_MB_FeatureDiff_Order,aes(x=gene,y=cluster))+ 79 | geom_point(aes(color=avg_log2FC,size=-log10(p_val_adj+10^(-100))))+ 80 | scale_color_gradientn(colors= rev(colorRampPalette(RColorBrewer::brewer.pal(11, "Spectral"),space="rgb")(50)))+ 81 | theme(panel.background=element_rect(color="black",fill=NA),legend.key = element_blank(),axis.ticks=element_blank(), 82 | panel.grid=element_blank(),axis.text.x=element_text(size=8,angle=90,vjust = 0.5,hjust = 1,color="black"), 83 | axis.text.y=element_text(size=8,color="black"),axis.title=element_blank(),legend.position = "bottom")+ 84 | scale_x_discrete(limit=rev(HNSCC_epi_tumor_MB_FeatureDiff_OrderF$gene))+ 85 | #scale_y_discrete(limit = c('0','1','2'))+ 86 | coord_flip()+ 87 | geom_tile(data=TT,aes(y=Subtypes,x=gene),fill=NA,color="lightgray") 88 | 89 | HNSCC_epi_tumor_MB_FeatureDiff_Order$cluster <- paste0("C",HNSCC_epi_tumor_MB_FeatureDiff_Order$cluster) 90 | openxlsx::write.xlsx(HNSCC_epi_tumor_MB_FeatureDiff_Order,file=paste(OutPath,"/",folder,"/Fig2F.xlsx",sep="")) 91 | 92 | ### Fig2H left 93 | ### server1:/work/smy/Project/HNSCC/1.code/13.26Sample_tumorcell_SCENIC.R 94 | Tumor_Anno <- readRDS('/server1_work/smy/Project/HNSCC/2.data/SCENIC/TumorAnno_AddTFs.rds.gz') 95 | Tumor_Anno@meta.data$Resolution_final <- HNSCC_epi_tumor@meta.data[match(rownames(Tumor_Anno@meta.data),rownames(HNSCC_epi_tumor@meta.data)),'RNA_snn_res_final'] 96 | Sub_Data <- Tumor_Anno 97 | 98 | CandidateFeature <- openxlsx::read.xlsx("/server1_work/smy/Project/HNSCC/2.data/SCENIC/Tumor_Regulon_Will.xlsx") 99 | CandidateFeature <- CandidateFeature[-grep('extended',CandidateFeature$gene),] 100 | CandidateFeature$cluster <- factor(CandidateFeature$cluster,levels=unique(CandidateFeature$cluster)) 101 | 102 | CandidateFeature <- CandidateFeature[(order(CandidateFeature$cluster,sort(CandidateFeature$p_val))),] 103 | CandidateFeature <- CandidateFeature %>% dplyr::group_by(cluster) %>% dplyr::arrange(cluster,p_val,desc(avg_logFC)) 104 | CandidateFeature <- CandidateFeature %>% dplyr::top_n(5,rev(p_val)) #select top10 TF regulons 105 | 106 | CandidateFeatureList <- lapply(levels(CandidateFeature$cluster),function(cluster){ 107 | ls1 <- CandidateFeature[CandidateFeature$cluster == cluster,]$gene 108 | ls2 <- CandidateFeature[CandidateFeature$cluster != cluster,]$gene 109 | ls1_unique <- ls1[!ls1 %in% ls2] 110 | if (length(ls1_unique) > 10){ 111 | sub <- CandidateFeature[CandidateFeature$cluster == cluster & CandidateFeature$gene %in% ls1_unique,] 112 | ls1_unique <- sort(sub$p_val,decreasing = F)[1:10] 113 | }else(ls1_unique <- ls1_unique) 114 | return(ls1_unique) 115 | }) 116 | names(CandidateFeatureList) <- levels(CandidateFeature$cluster) 117 | 118 | PlotGenes <- CandidateFeature %>% dplyr::top_n(10,rev(p_val)) 119 | PlotGenesF <- PlotGenes$gene 120 | PlotGenes_Can <- CandidateFeature %>% dplyr::group_by(cluster) %>% dplyr::top_n(10,rev(p_val)) 121 | #PlotGenes_CanF <- PlotGenes_Can[PlotGenes_Can$cluster %in% "FAP+ Tumorroblasts",]$gene 122 | 123 | #get TF matrix 124 | SubDatam <- Sub_Data@assays$TF@data%>% as.matrix 125 | colnames(SubDatam) <- gsub("\\.","-",colnames(SubDatam)) 126 | SubDatam <- SubDatam[apply(SubDatam,1,sum) > 0,] 127 | ClusterMean <- t(apply(SubDatam,1,function(x){ 128 | lapply(Sub_ClusterID,function(ID){ 129 | mean(x[ID]) 130 | }) %>% unlist 131 | })) 132 | ClusterMean <- ClusterMean[,levels(CandidateFeature$cluster)] 133 | ClusterMean <- ClusterMean[apply(ClusterMean,1,sum)>0,] 134 | 135 | ClusterMeanM <- ClusterMean[match(CandidateFeature$gene,rownames(ClusterMean)),] 136 | ClusterMeanM[,1:ncol(ClusterMeanM)] <- t(apply(ClusterMeanM,1,scale)) 137 | 138 | PlotDF <- ClusterMeanM %>% as.data.frame() 139 | 140 | openxlsx::write.xlsx(PlotDF,file=paste(OutPath,"/","Fig2","/Fig2Hleft.xlsx",sep=""),row.names = T) 141 | 142 | ### right 143 | TF_candidate <- read.table('/work/smy/Project/HNSCC_26sample/2.data/SCENIC_Tumorcell_TF_filter_MeanExpr.txt') 144 | names(TF_candidate) <- gsub("C","X",names(TF_candidate)) 145 | openxlsx::write.xlsx(TF_candidate,file=paste(OutPath,"/",folder,"/Fig2Hright.xlsx",sep=""),row.names = T) 146 | ### Fig2H right 147 | Sub_Data <- HNSCC_epi_tumor 148 | #####plot TF relative heatmap 149 | sig_gene_names1 <- c("CEBPD","BCL3","GRHL1", 150 | "MXD1","RORC","MYBL2","POLE3", 151 | "BRCA1","TFDP1","STAT1","TP63","KLF7", 152 | "EHF","FOXA1","FOXM1","MYBL2","YBX1","TFDP1","MAZ", 153 | "TP63","IRF6","KDM5B","BHLHE40","TFAP2A","ID1","PITX1","NR4A1","CREB3L1","SPDEF","XBP1","FOSB") 154 | PlotGenes <- sig_gene_names1 155 | Sub_TF_Matrix <- Sub_Data@assays$RNA@data[PlotGenes,] %>% as.matrix() 156 | Sub_TF_Matrix <- Sub_TF_Matrix[apply(Sub_TF_Matrix,1,sum) > 0,] 157 | 158 | Sub_ClusterID <- lapply(split(Sub_Data@meta.data,list(Sub_Data$RNA_snn_res_final)), function(x)rownames(x)) 159 | 160 | Sub_TF_ClusterMean <- t(apply(Sub_TF_Matrix,1,function(x){ 161 | lapply(Sub_ClusterID,function(ID){ 162 | mean(x[ID]) 163 | }) %>% unlist 164 | })) 165 | Sub_TF_ClusterMean <- Sub_TF_ClusterMean[apply(Sub_TF_ClusterMean,1,sum)>0,] 166 | Sub_TF_ClusterMean[,1:ncol(Sub_TF_ClusterMean)] <- t(apply(Sub_TF_ClusterMean,1,scale)) 167 | PlotDF <- Sub_TF_ClusterMean %>% as.data.frame() 168 | 169 | openxlsx::write.xlsx(PlotDF,file=paste(OutPath,"/","Fig2","/Fig2Hright.xlsx",sep="")) 170 | 171 | ##2K : SECNIC TF survival======= 172 | TF_candidate <- read.table('/work/smy/Project/HNSCC_26sample/2.data/SCENIC_Tumorcell_TF_filter_MeanExpr.txt') 173 | rownames(TF_candidate) 174 | HNSC_FPKM <- readr::read_rds("/work/smy/Public/data/RNA-seq/TCGA/HNSCC_FPKM.rds") 175 | ClinicalData <- read.delim("/work/smy/Public/data/RNA-seq/TCGA/TCGA_ClinicalData_20180420.txt") 176 | ClinicalData$PFI.time <- as.numeric(as.character(ClinicalData$PFI.time)) 177 | ClinicalData$OS.time <- as.numeric(as.character(ClinicalData$OS.time)) 178 | ClinicalDataF <- ClinicalData[,c("bcr_patient_barcode",'OS','OS.time')] 179 | HNSC_FPKM_F <- HNSC_FPKM[rownames(TF_candidate),] 180 | HNSC_FPKM_F <- t(HNSC_FPKM_F) %>% as.data.frame() 181 | HNSC_FPKM_F[,1:22] <- apply(HNSC_FPKM_F[,1:22],2,function(x)log2(x+1)) 182 | 183 | HNSC_FPKM_F$bcr_patient_barcode <- substr(rownames(HNSC_FPKM_F),0,12) 184 | HNSC_FPKM_FF <- HNSC_FPKM_F[substr(rownames(HNSC_FPKM_F),14,16) %in% c("01A"),] 185 | 186 | HNSC_FPKM_clinical <- merge(HNSC_FPKM_FF,ClinicalDataF,by = "bcr_patient_barcode") %>% 187 | dplyr::filter(!is.na(OS.time)) %>% 188 | dplyr::mutate(OS.time = OS.time/30) %>% 189 | dplyr::select(c(bcr_patient_barcode,TFDP1,OS,OS.time)) 190 | openxlsx::write.xlsx(HNSC_FPKM_clinical,file=paste(OutPath,"/",folder,"/Fig2K.xlsx",sep="")) 191 | -------------------------------------------------------------------------------- /3.HNSCC_epi_tumor features.R: -------------------------------------------------------------------------------- 1 | ### malignant features 2 | HNSCC_Whole <- readRDS('/work/smy/Project/HNSCC/2.data/DeinfeTypes/HNSCC_26sampleAll_DefineTypes.rds.gz') 3 | Idents(HNSCC_Whole) <- 'Maincluster' 4 | ##Epi细胞===== 5 | HNSCC_epi <- subset(HNSCC_Whole,idents = c('Epithelial cells')) ##22751 6 | ### Copykat 结果 7 | copykat_epi <- readRDS('/work/smy/Project/HNSCC_26sample/2.data/copykatN4/26sample_copykat_epi_result.rds') 8 | copykat_epi$prediction <- as.data.frame(copykat_epi$prediction) 9 | HNSCC_epi$copykat <- copykat_epi$prediction[match(rownames(HNSCC_epi@meta.data),copykat_epi$prediction$cell.names),2] 10 | 11 | PPercentage <- table(HNSCC_epi@meta.data[,c('orig.ident',"copykat")]) %>% data.frame %>% 12 | #Por:用一群细胞silent/expression占比(total:100%) 13 | dplyr::mutate(Por= Freq/apply(table(HNSCC_epi@meta.data[,c('orig.ident',"copykat")]),1,sum)) 14 | 15 | 16 | p <- ggplot(PPercentage,aes(x=orig.ident,y=Por*100,fill=copykat))+ 17 | geom_bar(stat="identity",width = 0.7)+labs(x="",y="Percentage (%)")+ 18 | scale_y_continuous(expand = c(0,0))+ 19 | #scale_fill_manual(values=c('#85A3C2','#3A7699'),name="")+ 20 | #geom_text(aes(y=Por*100,label=signif(Por*100,digits = 2)), vjust=1.6, color="black", size=5)+labs(title='BCAT1')+ 21 | theme(axis.text.x = element_text(angle=90,vjust=0.5,hjust=1,size=10),legend.position = "top",panel.background = element_blank(), 22 | axis.line = element_line()) 23 | 24 | ### SFig2B 25 | HNSCC_epi_tumor <- readRDS('/work/smy/Project/HNSCC_26sample/2.data/26sample_tumorcells_Harmony_immuneReference_All.rds') 26 | cellcyle <- readxl::read_xlsx('/work/smy/Public/data/GeneList/cellcyle_signature.xlsx') 27 | colnames(cellcyle) <- cellcyle[1,] 28 | cellcyle <- cellcyle[-1,] 29 | cellcyle$`CELL CYCLE: G1/S` <- as.character(cellcyle$`CELL CYCLE: G1/S`) 30 | cellcyle$`CELL CYCLE: G2/M` <- as.character(cellcyle$`CELL CYCLE: G2/M`) 31 | 32 | DefaultAssay(HNSCC_epi_tumor) <- 'RNA' 33 | HNSCC_epi_tumor <- CellCycleScoring(HNSCC_epi_tumor,s.features = cellcyle$`CELL CYCLE: G1/S`,g2m.features = cellcyle$`CELL CYCLE: G2/M`) 34 | 35 | meta <- HNSCC_epi_tumor@meta.data[,c('orig.ident','Phase')] 36 | 37 | HNSCC_epi_tumor$cycle <- ifelse(HNSCC_epi_tumor$Phase == 'G1','non-cycling cell','cycling cell') 38 | PPercentage <- table(HNSCC_epi_tumor@meta.data[,c("stage","cycle")])%>% data.frame %>% 39 | #Por:用一群细胞silent/expression占比(total:100%) 40 | dplyr::mutate(Por= Freq/apply(table(HNSCC_epi_tumor@meta.data[,c("stage","cycle")]),1,sum)) 41 | PPercentage$cycle <- as.character(PPercentage$cycle) 42 | openxlsx::write.xlsx(PPercentage,file=paste(OutPath,"/",folder,"/SFig2B.xlsx",sep="")) 43 | 44 | 45 | ##Sfig2c 46 | HNSCC_Whole <- readRDS('/work/smy/Project/HNSCC/2.data/DeinfeTypes/HNSCC_26sampleAll_DefineTypes.rds.gz') 47 | Idents(HNSCC_Whole) <- 'Maincluster' 48 | ##Epi细胞===== 49 | HNSCC_epi <- subset(HNSCC_Whole,idents = c('Epithelial cells')) ##22751 50 | ### copykat result 51 | copykat_epi <- readRDS('/work/smy/Project/HNSCC_26sample/2.data/copykatN4/26sample_copykat_epi_result.rds') 52 | Prediction <- copykat_epi$prediction %>% as.data.frame() 53 | ### HNSCC_epi 54 | SplDf <- HNSCC_epi@meta.data %>% 55 | as.data.frame() %>% 56 | tibble::rownames_to_column("cellid") %>% 57 | dplyr::select(c(cellid,orig.ident,stage)) %>% 58 | inner_join(Prediction,by = c("cellid" = "cell.names")) %>% 59 | dplyr::filter(orig.ident %in% c("NT1","Pre-Ca1","E-OSCC1", 60 | "NT2","Pre-Ca2","E-OSCC2", 61 | "NT3","Pre-Ca3","E-OSCC3")) 62 | 63 | CNA.test <- copykat_epi$CNAmat 64 | expr_1 <- CNA.test[,-c(1:3)] 65 | cnv_score <- as.data.frame(colSums(expr_1 * expr_1)) 66 | colnames(cnv_score)="cnv_score" 67 | cnv_score <- tibble::rownames_to_column(cnv_score, var='cellid') 68 | cnv_score$cellid <- gsub('[.]','-',cnv_score$cellid) #不包含参考的样本在内 69 | 70 | spl <- c("NT3","Pre-Ca3","E-OSCC3") 71 | CNV_mtx <- cnv_score %>% 72 | inner_join(SplDf) %>% 73 | dplyr::filter(orig.ident %in% spl) %>% 74 | dplyr::mutate(group = paste(.$stage, .$copykat.pred,sep = "_")) 75 | CNV_mtxs <- CNV_mtx %>% dplyr::filter(copykat.pred == "aneuploid") 76 | sobjlists <- CNV_mtxs %>% dplyr::select(c(stage,cnv_score)) 77 | sobjlists$stage <- factor(sobjlists$stage,levels = c("NT","Pre","E")) 78 | sobjlists <- sobjlists %>% arrange(stage) 79 | openxlsx::write.xlsx(sobjlists,file=paste(OutPath,"/",folder,"/SFig2C.xlsx",sep="")) 80 | 81 | ##F 82 | features <- c("NR4A1","CCL4","CXCL2","CXCL3","IL7R","CXCL10","CXCL14","IL1RN", 83 | "IL1R2","IL18","TYMP","CXCL9","TNFRSF12A","INHBA","TNC","PLAU","IL36G","SDC1", 84 | "ACKR3","EDN2","CXCL8","CXCL1","HTN3","EGFR","SAA2","SAA1","DEFB1","IL16", 85 | "IL4R","CD40","IL32","ANGPTL4","IL20RB","SEMA4B") 86 | Idents(HNSCC_epi_tumor) <- HNSCC_epi_tumor$stage 87 | p <- DotPlot(HNSCC_epi_tumor,features = features,cols = 'RdBu') + 88 | theme(axis.text.x = element_text(size = 10,angle=90,hjust = 1)) + 89 | scale_y_discrete(limits = rev(c('NT','Pre','E','A','LN-in','LN-out','LN-normal','R')))+ 90 | scale_colour_gradientn(colours =rev(RColorBrewer::brewer.pal(12,'RdBu')),limits = c(-2.5,2.5),name="") 91 | PlotDf <- p$data 92 | 93 | openxlsx::write.xlsx(PlotDf,file=paste(OutPath,"/",folder,"/SFig2F.xlsx",sep=""),row.names = F) 94 | 95 | ##g 96 | PPercentage <- table(HNSCC_epi_tumor@meta.data[,c('RNA_snn_res_final',"stage")]) %>% data.frame %>% 97 | #Por:用一群细胞silent/expression占比(total:100%) 98 | dplyr::mutate(Por= Freq/apply(table(HNSCC_epi_tumor@meta.data[,c('RNA_snn_res_final',"stage")]),1,sum)) 99 | 100 | openxlsx::write.xlsx(PPercentage,file=paste(OutPath,"/",folder,"/SFig2G.xlsx",sep="")) 101 | 102 | ##j 103 | library(stringr) 104 | TumorSub_proportion <- read.delim('/work/smy/Project/HNSCC_26sample/2.data/CIBERSORTx_Results_TumorSubcluster.txt') 105 | TumorSub_proportion$Mixture <- gsub('[.]','-',TumorSub_proportion$Mixture) 106 | TumorSub_proportion <- TumorSub_proportion[str_sub(TumorSub_proportion$Mixture,14,16) %in% c("01A"),] 107 | TumorSub_proportion$Mixture <- str_sub(TumorSub_proportion$Mixture,1,12) 108 | TumorSub <- TumorSub_proportion %>% dplyr::select(c(Mixture,cluster_0,cluster_1,cluster_2,cluster_3,cluster_4)) 109 | 110 | ##clinical info 111 | surviva <- read.delim("/server1_work/brj/GEO/CRC/multiCox/TCGA_ClinicalData_20180420.txt")#,","gender" 112 | survival <- surviva %>% dplyr::select("bcr_patient_barcode",'OS','OS.time') %>% 113 | dplyr::rename("Mixture" = "bcr_patient_barcode") %>% 114 | inner_join(TumorSub) %>% 115 | dplyr::mutate(OS.time = as.numeric(as.numeric(as.character(.$OS.time))/30)) %>% 116 | dplyr::filter(!is.na(OS.time)) 117 | openxlsx::write.xlsx(survival,file=paste(OutPath,"/",folder,"/SFig2j.xlsx",sep="")) 118 | 119 | 120 | ###k 121 | ## /work/brj/Collaboration/2022/scRNA/HNSCC/Revise2/Code/6.TumorPuramSignatureScore.R 122 | p1$data 123 | openxlsx::write.xlsx(p1$data,file=paste(OutPath,"/",folder,"/SFig2k.xlsx",sep="")) 124 | 125 | ###l-m 126 | HNSCC_epi_tumorF <- subset(HNSCC_epi_tumor,idents = c('1','2')) 127 | Markers <- FindAllMarkers(HNSCC_epi_tumorF,only.pos = TRUE, min.pct = 0.1, logfc.threshold = 0.25) 128 | 129 | Markers$Class <- ifelse( Markers$avg_log2FC > 0.4 & Markers$p_val < 0.05 & Markers$cluster == '1',"Cluster_1", 130 | ifelse(Markers$avg_log2FC > 0.4 & Markers$p_val < 0.05 & Markers$cluster == '2',"Cluster_2",'None')) 131 | Markers$avg_log2FC <- ifelse(Markers$cluster == '2',-Markers$avg_log2FC,Markers$avg_log2FC) 132 | Markers$pvalue_L <- -log10(Markers$p_val+10^-200) 133 | sub <- Markers 134 | 135 | Gene_Labels <- sub %>% dplyr::filter(Class %in% c("Cluster_1","Cluster_2") ) %>% dplyr::group_by(Class) %>% 136 | dplyr::top_n(15,abs(avg_log2FC)) 137 | 138 | GO_BP_Fun <- function(CompareGroup,groups=c("Cluster_1","Cluster_2"),OrgData="org.Hs.eg.db",pcut=0.05,qcut=0.2){ 139 | library(clusterProfiler) 140 | if(OrgData=="org.Hs.eg.db"){library(org.Hs.eg.db)}else{library(org.Hs.eg.db)} 141 | CompareGroup <- CompareGroup %>% dplyr::mutate(GO_Compare=purrr::map(.x=CompareResult,function(.x){ 142 | SigSub <- .x[.x$Class %in% groups,] %>% as.data.frame() 143 | if(length(unique(SigSub$gene))>80){ 144 | formula_res <- clusterProfiler::compareCluster( 145 | keyType="SYMBOL", 146 | gene ~ Class, 147 | data=SigSub, 148 | fun="enrichGO", 149 | OrgDb=OrgData, 150 | ont = "BP", 151 | pAdjustMethod = "BH", 152 | pvalueCutoff = pcut, 153 | qvalueCutoff = qcut 154 | ) 155 | 156 | # Run GO enrichment test and merge terms that are close to each other to remove result redundancy 157 | # Simple_ego <- clusterProfiler::simplify( 158 | # formula_res, 159 | # cutoff=0.7, 160 | # by="p.adjust", 161 | # select_fun=min 162 | # ) 163 | return(formula_res ) # 164 | }else{return(NA)} # 165 | })) 166 | 167 | } 168 | 169 | library(KEGG.db,lib.loc = '/home/xyding/R/x86_64-pc-linux-gnu-library/4.1/') 170 | KeggPathway <- readRDS('/work/xyding/2022/metabolism/KEGG_hsapathway.rds.gz') 171 | 172 | KEGG_Fun <- function(CompareGroup,groups=c("Cluster_1","Cluster_2"),OrgSp="hsa",pcut=0.05,qcut=0.2){ 173 | CompareGroup <- CompareGroup %>% dplyr::mutate(KEGG_Compare= purrr::map(.x=CompareResult,function(.x){ 174 | 175 | SigSub <- .x[.x$Class %in% c("Cluster_2","Cluster_1"),] %>% as.data.frame() 176 | genelist <- clusterProfiler::bitr(SigSub$gene,fromType = "SYMBOL", 177 | toType = c("ENTREZID", "SYMBOL"), OrgDb = org.Hs.eg.db) 178 | 179 | SigSub <- merge( SigSub,genelist,by.x="gene",by.y="SYMBOL") 180 | if(length(unique(SigSub$gene))>80){ 181 | formula_res <- clusterProfiler::compareCluster( 182 | #keyType="SYMBOL", 183 | ENTREZID~Class, 184 | data=SigSub, 185 | organism=OrgSp, 186 | fun="enrichKEGG", 187 | pAdjustMethod = "BH", 188 | pvalueCutoff = pcut, 189 | qvalueCutoff = qcut, 190 | use_internal_data = TRUE #设置使用内部database 191 | 192 | ) 193 | formula_res@compareClusterResult$Description <- lapply(formula_res@compareClusterResult$ID,function(x)KeggPathway[KeggPathway$ID %in% x,]$Description) %>% unlist() 194 | 195 | return(formula_res) 196 | }else{return(NA)} 197 | })) 198 | 199 | } 200 | 201 | table(Markers$Class) 202 | CompareList = c("Cluster_1 vs Cluster_2") 203 | CompareGroup <- tibble::tibble(CompareList=c("Cluster_1 vs Cluster_2"),CompareResult = list(V1=Markers)) 204 | 205 | #GO 206 | CompareGroup <- GO_BP_Fun(CompareGroup,groups=c("Cluster_1","Cluster_2"),OrgData="org.Hs.eg.db",pcut=0.05,qcut=0.2) 207 | GOresult <- CompareGroup$GO_Compare$V1@compareClusterResult 208 | p <- dotplot(CompareGroup$GO_Compare$V1, showCategory = 10)+ 209 | scale_color_gradientn(colors= colorRampPalette(RColorBrewer::brewer.pal(11, "Spectral"),space="rgb")(50)) 210 | 211 | openxlsx::write.xlsx(p$data,file=paste(OutPath,"/",folder,"/SFig2l.xlsx",sep="")) 212 | 213 | #KEGG 214 | CompareGroup <- KEGG_Fun(CompareGroup,groups=c("Cluster_1","Cluster_2"),OrgSp="hsa",pcut=0.05,qcut=0.2) 215 | KEGGresult <- CompareGroup$KEGG_Compare$V1@compareClusterResult 216 | 217 | p <- dotplot(CompareGroup$KEGG_Compare$V1, showCategory = 10)+ 218 | scale_color_gradientn(colors= colorRampPalette(RColorBrewer::brewer.pal(11, "Spectral"),space="rgb")(50)) 219 | 220 | openxlsx::write.xlsx(p$data,file=paste(OutPath,"/",folder,"/SFig2m.xlsx",sep="")) 221 | 222 | ### n 223 | library(monocle,lib.loc = "/home/yhdu/R/x86_64-pc-linux-gnu-library/4.1/") 224 | HSMM_HNSCC_epi_tumor <- readRDS('/work/smy/Project/HNSCC_26sample/2.data/26sample_epi_tumor_monocleFigure.rds') 225 | 226 | PPercentage <- table(pData(HSMM_HNSCC_epi_tumor)[,c('State2',"stage")]) %>% data.frame %>% 227 | #Por:用一群细胞silent/expression占比(total:100%) 228 | dplyr::mutate(Por= Freq/apply(table(pData(HSMM_HNSCC_epi_tumor)[,c('State2',"stage")]),1,sum)) 229 | PPercentage$State2 <- paste0("S",PPercentage$State2) 230 | names(PPercentage)[1] <- "State" 231 | openxlsx::write.xlsx(PPercentage,file=paste(OutPath,"/",folder,"/SFig2n.xlsx",sep="")) 232 | 233 | ### o 没做 234 | library(velocyto.R,lib.loc = "/home/yhdu/R/x86_64-pc-linux-gnu-library/4.1/") 235 | Tb_emb <- readRDS('/work/smy/Project/HNSCC_26sample/2.data/26Sample_epi_velocity_Tb_emb.rds') 236 | Tb_rvel.cd <- readRDS('/work/smy/Project/HNSCC_26sample/2.data/26Sample_epi_velocity_Tb_rvel_cd.rds') 237 | Tb_cell.colors <- readRDS('/work/smy/Project/HNSCC_26sample/2.data/26Sample_epi_velocity_Tb_cell_colors.rds') 238 | 239 | show.velocity.on.embedding.cor(Tb_emb,Tb_rvel.cd,n = 2000, scale = "sqrt", cell.colors = ac(x = Tb_cell.colors, alpha = 0.5), 240 | cex = 0.8, arrow.scale = 3, show.grid.flow = TRUE, min.grid.cell.mass = 0.5, grid.n = 40, arrow.lwd = 1, 241 | do.par = FALSE, cell.border.alpha = 0.1) 242 | ### p 243 | #/work/brj/Collaboration/2022/scRNA/HNSCC/Revise/Code/40.Monocle_P2.R 244 | p1 <- plot_cell_trajectory(HSMM_HNSCC_epi_tumor, 245 | theta = -70, 246 | show_branch_points = F, 247 | show_tree = TRUE, color_by = "Pseudotime", cell_size = 0.8) + 248 | scale_color_gradientn(colors= colorRampPalette(RColorBrewer::brewer.pal(20,"YlOrRd"),space="rgb")(50),name = 'Pseudotime')+ 249 | theme(legend.position = "bottom") 250 | openxlsx::write.xlsx(p1$data,file=paste(OutPath,"/",folder,"/SFig2p1.xlsx",sep="")) 251 | 252 | df <- pData(HSMM_HNSCC_epi_tumor) 253 | df$State2 <- ifelse(df$State %in% c("3","4","6","5","7"),"S2",ifelse(df$State=="1","S1","S3")) 254 | pData(HSMM_HNSCC_epi_tumor) <- df 255 | p2 <- plot_cell_trajectory(HSMM_HNSCC_epi_tumor, 256 | theta = -70, 257 | show_branch_points = F, 258 | show_tree = TRUE, color_by = "State2", cell_size = 0.8) + 259 | scale_color_manual(values = RColorBrewer::brewer.pal(3,'Set2'))+ 260 | theme(legend.position = "bottom") 261 | openxlsx::write.xlsx(p2$data,file=paste(OutPath,"/",folder,"/SFig2p2.xlsx",sep="")) 262 | 263 | 264 | 265 | PPercentage <- table(pData(HSMM_HNSCC_epi_tumor)[,c('State2',"RNA_snn_res_final")]) %>% data.frame %>% 266 | #Por:用一群细胞silent/expression占比(total:100%) 267 | dplyr::mutate(Por= Freq/apply(table(pData(HSMM_HNSCC_epi_tumor)[,c('State2',"RNA_snn_res_final")]),1,sum)) 268 | openxlsx::write.xlsx(PPercentage,file=paste(OutPath,"/",folder,"/SFig2p3.xlsx",sep="")) 269 | 270 | 271 | p4 <- plot_cell_trajectory(HSMM_HNSCC_epi_tumor, 272 | theta = -70, 273 | show_branch_points = F, 274 | show_tree = TRUE, color_by = "RNA_snn_res_final", cell_size = 0.8) + 275 | scale_color_manual(values = RColorBrewer::brewer.pal(10,'Dark2'))+ 276 | theme(legend.position = "bottom") + facet_wrap('~RNA_snn_res_final',nrow = 1) 277 | 278 | openxlsx::write.xlsx(p4$data,file=paste(OutPath,"/",folder,"/SFig2p4.xlsx",sep="")) 279 | 280 | ### q 281 | # /work/brj/Collaboration/2022/scRNA/HNSCC/Revise/Code/41.Monocle_P10Re.R 282 | p1 <- plot_cell_trajectory(HSMM_HNSCC_epi_tumor, 283 | theta = -70, 284 | show_branch_points = F, 285 | show_tree = TRUE, color_by = "Pseudotime", cell_size = 0.8) + 286 | scale_color_gradientn(colors= colorRampPalette(RColorBrewer::brewer.pal(20,"YlOrRd"),space="rgb")(50),name = 'Pseudotime')+ 287 | theme(legend.position = "bottom") 288 | openxlsx::write.xlsx(p1$data,file=paste(OutPath,"/",folder,"/SFig2q1.xlsx",sep="")) 289 | 290 | df <- pData(HSMM_HNSCC_epi_tumor) 291 | df$State2 <- ifelse(df$State == "1","S1",ifelse(df$State=="7","S2","S3")) 292 | pData(HSMM_HNSCC_epi_tumor) <- df 293 | p2 <- plot_cell_trajectory(HSMM_HNSCC_epi_tumor, 294 | theta = -70, 295 | show_branch_points = F, 296 | show_tree = TRUE, color_by = "State2", cell_size = 0.8) + 297 | scale_color_manual(values = RColorBrewer::brewer.pal(3,'Set2'))+ 298 | theme(legend.position = "bottom") 299 | openxlsx::write.xlsx(p2$data,file=paste(OutPath,"/",folder,"/SFig2q2.xlsx",sep="")) 300 | 301 | 302 | 303 | PPercentage <- table(pData(HSMM_HNSCC_epi_tumor)[,c('State2',"RNA_snn_res_final")]) %>% data.frame %>% 304 | #Por:用一群细胞silent/expression占比(total:100%) 305 | dplyr::mutate(Por= Freq/apply(table(pData(HSMM_HNSCC_epi_tumor)[,c('State2',"RNA_snn_res_final")]),1,sum)) 306 | openxlsx::write.xlsx(PPercentage,file=paste(OutPath,"/",folder,"/SFig2q3.xlsx",sep="")) 307 | 308 | 309 | p4 <- plot_cell_trajectory(HSMM_HNSCC_epi_tumor, 310 | theta = -70, 311 | show_branch_points = F, 312 | show_tree = TRUE, color_by = "RNA_snn_res_final", cell_size = 0.8) + 313 | scale_color_manual(values = RColorBrewer::brewer.pal(10,'Dark2'))+ 314 | theme(legend.position = "bottom") + facet_wrap('~RNA_snn_res_final',nrow = 1) 315 | 316 | openxlsx::write.xlsx(p4$data,file=paste(OutPath,"/",folder,"/SFig2q4.xlsx",sep="")) 317 | 318 | ### r 319 | p1 <- plot_cell_trajectory(HSMM_HNSCC_epi_tumor, 320 | theta = -70, 321 | show_branch_points = F, 322 | show_tree = TRUE, color_by = "Pseudotime", cell_size = 0.8) + 323 | scale_color_gradientn(colors= colorRampPalette(RColorBrewer::brewer.pal(20,"YlOrRd"),space="rgb")(50),name = 'Pseudotime')+ 324 | theme(legend.position = "bottom") 325 | openxlsx::write.xlsx(p1$data,file=paste(OutPath,"/",folder,"/SFig2r1.xlsx",sep="")) 326 | 327 | df <- pData(HSMM_HNSCC_epi_tumor) 328 | df$State2 <- ifelse(df$State == "1","S1",ifelse(df$State=="2","S2","S3")) 329 | pData(HSMM_HNSCC_epi_tumor) <- df 330 | p2 <- plot_cell_trajectory(HSMM_HNSCC_epi_tumor, 331 | theta = -70, 332 | show_branch_points = F, 333 | show_tree = TRUE, color_by = "State2", cell_size = 0.8) + 334 | scale_color_manual(values = RColorBrewer::brewer.pal(3,'Set2'))+ 335 | theme(legend.position = "bottom") 336 | openxlsx::write.xlsx(p2$data,file=paste(OutPath,"/",folder,"/SFig2r2.xlsx",sep="")) 337 | 338 | 339 | 340 | PPercentage <- table(pData(HSMM_HNSCC_epi_tumor)[,c('State2',"RNA_snn_res_final")]) %>% data.frame %>% 341 | #Por:用一群细胞silent/expression占比(total:100%) 342 | dplyr::mutate(Por= Freq/apply(table(pData(HSMM_HNSCC_epi_tumor)[,c('State2',"RNA_snn_res_final")]),1,sum)) 343 | openxlsx::write.xlsx(PPercentage,file=paste(OutPath,"/",folder,"/SFig2r3.xlsx",sep="")) 344 | 345 | 346 | p4 <- plot_cell_trajectory(HSMM_HNSCC_epi_tumor, 347 | theta = -70, 348 | show_branch_points = F, 349 | show_tree = TRUE, color_by = "RNA_snn_res_final", cell_size = 0.8) + 350 | scale_color_manual(values = RColorBrewer::brewer.pal(10,'Dark2'))+ 351 | theme(legend.position = "bottom") + facet_wrap('~RNA_snn_res_final',nrow = 1) 352 | -------------------------------------------------------------------------------- /4.Fibroblast and Myeloid subtypes features.R: -------------------------------------------------------------------------------- 1 | 2 | HNSCC_fibro <- readRDS("/work/yye/Project/Collaboration/HNSC/Stroma/HNSC_Fibro1_DefineTypes.rds.gz") 3 | Idents(HNSCC_fibro) <- HNSCC_fibro$DefineTypes 4 | 5 | ##A 6 | p <- DimPlot(HNSCC_fibro,reduction = 'umap',cols = c('#A8373D','#F1B998','#43739F','#374D74','#947A7B','#91553D','#D48054','#E09194')) 7 | openxlsx::write.xlsx(p$data,file=paste(OutPath,"/",folder,"/Fig3A.xlsx",sep="")) 8 | 9 | ##B 10 | p <-DotPlot(HNSCC_fibro,feature = c('COL1A1','COL1A2','RSPO1','CRABP1','POSTN','LAMP5','SFRP1','PLA2G2A','SEMA4A','SOD2','CCL19','CCL2','DES','MYF5','MKI67','TOP2A'),cols = 'Spectral')+ 11 | theme(axis.text.x = element_text(angle=90,vjust=0.5,hjust=1)) 12 | openxlsx::write.xlsx(p$data,file=paste(OutPath,"/",folder,"/Fig3B.xlsx",sep="")) 13 | 14 | 15 | ##C 16 | HNSCC_fibro$stage <- ifelse(HNSCC_fibro$orig.ident == 'A-OSCC1'|HNSCC_fibro$orig.ident == 'A-OSCC3'|HNSCC_fibro$orig.ident == 'A-OSCC4'| 17 | HNSCC_fibro$orig.ident == 'A-OSCC5'|HNSCC_fibro$orig.ident == 'A-OSCC6'|HNSCC_fibro$orig.ident == 'A-OSCC7'|HNSCC_fibro$orig.ident == 'E-OSCC4','A', 18 | ifelse(HNSCC_fibro$orig.ident == 'E-OSCC1'|HNSCC_fibro$orig.ident == 'E-OSCC2'|HNSCC_fibro$orig.ident == 'E-OSCC3','E', 19 | ifelse(HNSCC_fibro$orig.ident == 'LN1'|HNSCC_fibro$orig.ident == 'LN6','LN-out', 20 | ifelse(HNSCC_fibro$orig.ident == 'LN2'|HNSCC_fibro$orig.ident == 'LN3'|HNSCC_fibro$orig.ident == 'LN8','LN-in', 21 | ifelse(HNSCC_fibro$orig.ident == 'LN4'|HNSCC_fibro$orig.ident == 'LN5','LN-normal', 22 | ifelse(HNSCC_fibro$orig.ident == 'NT1'|HNSCC_fibro$orig.ident == 'NT2'|HNSCC_fibro$orig.ident == 'NT3','NT', 23 | ifelse(HNSCC_fibro$orig.ident == 'Pre-Ca1'|HNSCC_fibro$orig.ident == 'Pre-Ca2'|HNSCC_fibro$orig.ident == 'Pre-Ca3','Pre', 24 | ifelse(HNSCC_fibro$orig.ident == 'R-OSCC1'|HNSCC_fibro$orig.ident == 'R-OSCC2'| 25 | HNSCC_fibro$orig.ident == 'R-OSCC3'|HNSCC_fibro$orig.ident == 'R-OSCC4','R','NO')))))))) 26 | HNSCC_fibro$stage <- factor(HNSCC_fibro$stage,levels = c('NT','Pre','E','A','LN-in','LN-out','LN-normal','R')) 27 | HNSCC_fibroSub <- subset(HNSCC_fibro,subset = stage %in% c('NT','Pre','E','A','R')) 28 | 29 | ##比例变化 30 | subF <- HNSCC_fibroSub@meta.data 31 | SufibrosDisA <- table(subF[,c("stage","DefineTypes")]) %>% 32 | data.frame %>% set_colnames(c("Stage","CellTypes","Number")) 33 | 34 | SufibrosDisA_Tissue <- lapply(split(SufibrosDisA,SufibrosDisA$Stage),function(X){ 35 | X%>% dplyr::mutate(Per=100*Number/sum(Number)) 36 | }) %>% dplyr::bind_rows(.) 37 | 38 | SufibrosDisA_Tissue$Stage <- factor(SufibrosDisA_Tissue$Stage,levels = c('NT','Pre','E','A','R')) 39 | SufibrosDisA_Tissue$CellTypes <- factor(SufibrosDisA_Tissue$CellTypes,levels = names(table(HNSCC_fibro$DefineTypes))) 40 | 41 | SufibrosDisA_TissueP <- arrange(SufibrosDisA_Tissue,CellTypes) %>% dplyr::filter(Number != 0 ) 42 | openxlsx::write.xlsx(SufibrosDisA_TissueP,file=paste(OutPath,"/",folder,"/Fig3C.xlsx",sep="")) 43 | 44 | ###D 45 | ### PSOTN fib, RSPO 46 | GSVAscore <- readr::read_rds("/server1_work/brj/Collaboration/scRNA/2022/HNSCC/Revise/Result/HNSC_POSTN_SPP1.rds.gz") 47 | GSVAF <- GSVAscore$POSTN_SPP1_score[[1]] %>% t() %>% as.data.frame() 48 | GSVAF$Mixture <- substr(rownames(GSVAF),0,12) 49 | HNSC_SignatureF <- GSVAF[substr(rownames(GSVAF),14,16) %in% c("01A"),] 50 | HNSC_SignatureF$Mixture <- gsub("[.]","-",HNSC_SignatureF$Mixture) 51 | 52 | HNSC_POSTN_SPP1 <- HNSC_SignatureF %>% dplyr::select(c(Mixture,POSTNTop50,SPP1Top50)) 53 | 54 | 55 | GSVAscore1 <- readr::read_rds("/server1_work/brj/Collaboration/scRNA/2022/HNSCC/Revise/Result/HNSC_RSPO1_FOLR2.rds.gz") 56 | GSVAF1 <- GSVAscore1$RSPO1_FOLR2_score[[1]] %>% t() %>% as.data.frame() 57 | GSVAF1$Mixture <- substr(rownames(GSVAF1),0,12) 58 | HNSC_SignatureF1 <- GSVAF1[substr(rownames(GSVAF1),14,16) %in% c("01A"),] 59 | HNSC_SignatureF1$Mixture <- gsub("[.]","-",HNSC_SignatureF1$Mixture) 60 | 61 | HNSC_RSPO1_FOLR2 <- HNSC_SignatureF1 62 | 63 | HNSCC_proportion <- HNSC_SignatureF %>% inner_join(HNSC_SignatureF1) %>% 64 | dplyr::select(c(Mixture,RSPO1Top50,POSTNTop50,FOLR2Top50,SPP1Top50)) 65 | names(HNSCC_proportion) <- gsub("Top50","",names(HNSCC_proportion)) 66 | ##clinical info 67 | surviva <- read.delim("/server1_work/brj/GEO/CRC/multiCox/TCGA_ClinicalData_20180420.txt")#,","gender" 68 | survival <- surviva %>% dplyr::select("bcr_patient_barcode",'OS','OS.time') %>% 69 | rename("Mixture" = "bcr_patient_barcode") %>% 70 | dplyr::filter(Mixture %in% HNSCC_proportion$Mixture) %>% 71 | dplyr::mutate(OS.time = as.numeric(as.numeric(as.character(.$OS.time))/30)) %>% 72 | dplyr::filter(!is.na(OS.time)) %>% inner_join(HNSCC_proportion) %>% 73 | rename("Sample" = "Mixture") 74 | 75 | openxlsx::write.xlsx(survival,file=paste(OutPath,"/",folder,"/FigDH.xlsx",sep="")) 76 | 77 | 78 | 79 | ## I 80 | CompareGroup <- readRDS('/work/smy/Project/HNSCC_26sample/2.data/POSTN vs RSPO1_GO_KEGGenrichmentResult.rds') 81 | p <- dotplot(CompareGroup$GO_Compare$V1, showCategory = 10)+ 82 | scale_color_gradientn(colors= colorRampPalette(RColorBrewer::brewer.pal(11, "Spectral"),space="rgb")(50)) 83 | 84 | Plot <- p$data 85 | openxlsx::write.xlsx(Plot,file=paste(OutPath,"/",folder,"/Fig3I.xlsx",sep="")) 86 | 87 | 88 | ## E 89 | ##mye cell======= 90 | HNSCC_myeloid <- readRDS('/work/smy/Project/HNSCC/2.data/DeinfeTypes/26Sample_Myeloid_DefineTypesFinal.rds') 91 | HNSCC_myeloid$DefineTypes <- factor(HNSCC_myeloid$DefineTypes,levels = c("cDC1","cDC2","pDC","LAMP3+ DCs","Monocytes","Mast cells", 92 | "CXCL10+ macrophages","C1QC+MRC-macrophages","SPP1+ macorphages","FOLR2+ macorphages","Proliferating myeloid cells")) 93 | Idents(HNSCC_myeloid) <- HNSCC_myeloid$DefineTypes 94 | p <- DimPlot(HNSCC_myeloid,reduction = 'umap',cols = c('#FEE0C4','#EE948B','#F4B3B1','#5C6479','#334873','#BBA399', 95 | '#79545C','#FAD181','#483D4D','#A44E41','#F1AC73')) 96 | openxlsx::write.xlsx(p$data,file=paste(OutPath,"/",folder,"/Fig3E.xlsx",sep="")) 97 | ## G 98 | HNSCC_myeloid$stage <- ifelse(HNSCC_myeloid$orig.ident == 'A-OSCC1'|HNSCC_myeloid$orig.ident == 'A-OSCC3'|HNSCC_myeloid$orig.ident == 'A-OSCC4'| 99 | HNSCC_myeloid$orig.ident == 'A-OSCC5'|HNSCC_myeloid$orig.ident == 'A-OSCC6'|HNSCC_myeloid$orig.ident == 'A-OSCC7'|HNSCC_myeloid$orig.ident == 'E-OSCC4','A', 100 | ifelse(HNSCC_myeloid$orig.ident == 'E-OSCC1'|HNSCC_myeloid$orig.ident == 'E-OSCC2'|HNSCC_myeloid$orig.ident == 'E-OSCC3','E', 101 | ifelse(HNSCC_myeloid$orig.ident == 'LN1'|HNSCC_myeloid$orig.ident == 'LN6','LN-out', 102 | ifelse(HNSCC_myeloid$orig.ident == 'LN2'|HNSCC_myeloid$orig.ident == 'LN3'|HNSCC_myeloid$orig.ident == 'LN8','LN-in', 103 | ifelse(HNSCC_myeloid$orig.ident == 'LN4'|HNSCC_myeloid$orig.ident == 'LN5','LN-normal', 104 | ifelse(HNSCC_myeloid$orig.ident == 'NT1'|HNSCC_myeloid$orig.ident == 'NT2'|HNSCC_myeloid$orig.ident == 'NT3','NT', 105 | ifelse(HNSCC_myeloid$orig.ident == 'Pre-Ca1'|HNSCC_myeloid$orig.ident == 'Pre-Ca2'|HNSCC_myeloid$orig.ident == 'Pre-Ca3','Pre', 106 | ifelse(HNSCC_myeloid$orig.ident == 'R-OSCC1'|HNSCC_myeloid$orig.ident == 'R-OSCC2'| 107 | HNSCC_myeloid$orig.ident == 'R-OSCC3'|HNSCC_myeloid$orig.ident == 'R-OSCC4','R','NO')))))))) 108 | 109 | HNSCC_myeloidSub <- subset(HNSCC_myeloid,subset = stage %in% c('NT','Pre','E','A','R')) 110 | HNSCC_myeloidSub$stage <- factor(HNSCC_myeloidSub$stage,levels = c('NT','Pre','E','A','R')) 111 | 112 | ##比例变化 113 | subF <- HNSCC_myeloidSub@meta.data 114 | SufibrosDisA <- table(subF[,c("stage","DefineTypes")]) %>% 115 | data.frame %>% set_colnames(c("Stage","CellTypes","Number")) 116 | 117 | SufibrosDisA_Tissue <- lapply(split(SufibrosDisA,SufibrosDisA$Stage),function(X){ 118 | X%>% dplyr::mutate(Per=100*Number/sum(Number)) 119 | }) %>% dplyr::bind_rows(.) 120 | 121 | SufibrosDisA_Tissue$Stage <- factor(SufibrosDisA_Tissue$Stage,levels = c('NT','Pre','E','A','R')) 122 | SufibrosDisA_Tissue$CellTypes <- factor(SufibrosDisA_Tissue$CellTypes,levels = names(table(HNSCC_myeloid$DefineTypes))) 123 | 124 | SufibrosDisA_TissueP <- arrange(SufibrosDisA_Tissue,CellTypes) %>% dplyr::filter(Number != 0 ) 125 | openxlsx::write.xlsx(SufibrosDisA_TissueP,file=paste(OutPath,"/",folder,"/Fig3G.xlsx",sep="")) 126 | 127 | ## F 128 | p <- DotPlot(HNSCC_myeloid,feature = c('CLEC9A','BATF3','CD1C','FCER1A','LILRA4','GZMB','LAMP3','FSCN1','FCN1','VCAN','TPSB2','CMA1', 129 | 'CXCL10','CXCL9','C1QC','MRC1','SPP1','CD68','FOLR2','CCL18','MKI67','TOP2A'),cols = 'Spectral')+ 130 | theme(axis.text.x = element_text(angle=90,vjust=0.5,hjust=1)) 131 | Plot <- p$data 132 | 133 | openxlsx::write.xlsx(Plot,file=paste(OutPath,"/",folder,"/Fig3F.xlsx",sep="")) 134 | 135 | ## J 136 | CompareGroup <- readRDS('/work/smy/Project/HNSCC_26sample/2.data/SPP1 vs FOLR2_GO_KEGGenrichmentResult.rds') 137 | p <- dotplot(CompareGroup$GO_Compare$V1, showCategory = 10)+ 138 | scale_color_gradientn(colors= colorRampPalette(RColorBrewer::brewer.pal(11, "Spectral"),space="rgb")(50)) 139 | 140 | Plot <- p$data 141 | openxlsx::write.xlsx(Plot,file=paste(OutPath,"/",folder,"/Fig3J.xlsx",sep="")) 142 | -------------------------------------------------------------------------------- /5.Cell-Cell Interaction.R: -------------------------------------------------------------------------------- 1 | # Cell-cell interaction 2 | cellchatMyeFib <- readRDS('/work/brj/Collaboration/2022/scRNA/HNSCC/Data/26sample_definetype_MyeFib_cellchat_stageSplit.rds') 3 | cellchatMyeFib <- cellchatMyeFib[c(6,7,2,1,4,3,5,8)] 4 | NetWeightMyeFib <- lapply(c(1:8), function(sub){ 5 | mat <- cellchatMyeFib[[sub]]@net$weight 6 | mat <- as.data.frame(mat) 7 | MyeFib <- grep("macro|macor|DC|Mast|myeloid|Mono|fibroblast|Fib",colnames(mat),value = T) 8 | mat2 <- mat[c("POSTN+ fibroblast","RSPO1+ fibroblast","SPP1+ macorphages","FOLR2+ macorphages"),MyeFib] 9 | mat2$stage <- names(cellchatMyeFib)[sub] 10 | return(mat2) 11 | }) 12 | 13 | NetWeightMyeFib <- bind_rows(NetWeightMyeFib) 14 | NetWeightMyeFib_melt <- melt(NetWeightMyeFib) 15 | NetWeightMyeFib_melt$cluster <- rep(c("POSTN+ fibroblast","RSPO1+ fibroblast","SPP1+ macrophages","FOLR2+ macrophages"),times = 152)# 152: length(NetWeightMyeFib_melt$variable)/4 16 | NetWeightMyeFib_melt$cellchat <- paste(NetWeightMyeFib_melt$cluster,'to',NetWeightMyeFib_melt$variable,sep = ' ') 17 | 18 | NetWeightMyeFib_meltF2 <- NetWeightMyeFib_melt %>% 19 | filter(cluster %in% c('POSTN+ fibroblast')) %>% 20 | filter(variable %in% c("SPP1+ macorphages","FOLR2+ macorphages")) %>% 21 | filter(stage %in% c("NT","Pre","E","A")) %>% 22 | mutate(cellchat2 = paste(cellchat,stage)) 23 | NetWeightMyeFib_meltF2$stage <- factor(NetWeightMyeFib_meltF2$stage,levels = c("NT","Pre","E","A")) 24 | NetWeightMyeFib_meltF2$cellchat2 <- factor(NetWeightMyeFib_meltF2$cellchat2,levels = rev(unique(NetWeightMyeFib_meltF2$cellchat2))) 25 | Yorder <- c('POSTN+ fibroblast to SPP1+ macorphages NT','POSTN+ fibroblast to SPP1+ macorphages Pre', 26 | "POSTN+ fibroblast to SPP1+ macorphages E","POSTN+ fibroblast to SPP1+ macorphages A", 27 | '', 28 | "POSTN+ fibroblast to FOLR2+ macorphages NT",'POSTN+ fibroblast to FOLR2+ macorphages Pre', 29 | "POSTN+ fibroblast to FOLR2+ macorphages E","POSTN+ fibroblast to FOLR2+ macorphages A") 30 | NetWeightMyeFib_meltF1 <- NetWeightMyeFib_melt %>% 31 | filter(cluster %in% c('SPP1+ macrophages')) %>% 32 | filter(variable %in% c("POSTN+ fibroblast","RSPO1+ fibroblast")) %>% 33 | filter(stage %in% c("NT","Pre","E","A")) %>% 34 | mutate(cellchat2 = paste(cellchat,stage)) 35 | 36 | 37 | 38 | NetWeightMyeFib_meltF1$stage <- factor(NetWeightMyeFib_meltF1$stage,levels = c("NT","Pre","E","A")) 39 | NetWeightMyeFib_meltF1$cellchat2 <- factor(NetWeightMyeFib_meltF1$cellchat2,levels = rev(unique(NetWeightMyeFib_meltF1$cellchat2))) 40 | Yorder <- c('SPP1+ macrophages to POSTN+ fibroblast NT','SPP1+ macrophages to POSTN+ fibroblast Pre', 41 | "SPP1+ macrophages to POSTN+ fibroblast E","SPP1+ macrophages to POSTN+ fibroblast A", 42 | '', 43 | "SPP1+ macrophages to RSPO1+ fibroblast NT",'SPP1+ macrophages to RSPO1+ fibroblast Pre', 44 | "SPP1+ macrophages to RSPO1+ fibroblast E","SPP1+ macrophages to RSPO1+ fibroblast A") 45 | HNSCC_Whole <- readRDS('/work/smy/Project/HNSCC/2.data/DeinfeTypes/HNSCC_26sampleAll_DefineTypes.rds.gz') 46 | Idents(HNSCC_Whole) <- HNSCC_Whole$DefineTypes 47 | HNSCC_Whole$celltype_RE <- HNSCC_Whole$DefineTypes 48 | 49 | ## add tumor in epi : metadata$Malignancy == 'malignant' 50 | metadata <- readRDS('/work/smy/Project/HNSCC_26sample/2.data/26sample_tumor_cell_metadata.rds') 51 | malignant <- rownames(metadata[grep('malignant',metadata$Malignancy),]) 52 | HNSCC_Whole$celltype_RE[malignant] <- 'TumorCell' 53 | 54 | HNSCC_TumorFib_Allstage <- subset(HNSCC_Whole,subset = celltype_RE %in% c("POSTN+ fibroblast","TumorCell")) 55 | HNSCC_TumorFib <- subset(HNSCC_TumorFib_Allstage,subset = stage %in% c("NT","Pre","E","A")) 56 | HNSCC_TumorFib$celltype_Re <- paste(HNSCC_TumorFib$celltype_RE,HNSCC_TumorFib$stage,sep = "_") 57 | #HNSCC_TumorFib$celltype_aggregate <- ifelse(HNSCC_TumorFib$celltype_Re %in% c("TumorCell_E","TumorCell_NT","TumorCell_Pre"),"TumorCell_NTPreE",HNSCC_TumorFib$celltype_Re) 58 | 59 | HNSCC_TumorFib$celltype_aggregate <- HNSCC_TumorFib$celltype_Re 60 | 61 | seurat_obj <- HNSCC_TumorFib 62 | celltype_id = "celltype_aggregate" # metadata column name of the cell type of interest 63 | seurat_obj = SetIdent(seurat_obj, value = seurat_obj[[celltype_id]]) 64 | 65 | ####Read in the NicheNet ligand-receptor network and ligand-target matrix 66 | # ##the following of human origin 67 | ligand_target_matrix <- readRDS("/work/brj/Collaboration/2022/scRNA/HNSCC/NichenetrData/ligand_target_matrix.rds") 68 | lr_network <- readRDS("/work/brj/Collaboration/2022/scRNA/HNSCC/NichenetrData/lr_network.rds") 69 | lr_network = lr_network %>% mutate(bonafide = ! database %in% c("ppi_prediction","ppi_prediction_go")) 70 | lr_network = lr_network %>% dplyr::rename(ligand = from, receptor = to) %>% distinct(ligand, receptor, bonafide) 71 | 72 | organism = "human" # user adaptation required on own dataset 73 | 74 | 75 | 76 | ####nichnet 77 | #####1. Define the niches/microenvironments of interest 78 | #! Important: your receiver cell type should consist of 1 cluster! 79 | table(seurat_obj$celltype_aggregate) 80 | 81 | niches = list( 82 | "Stage_A" = list( 83 | "sender" = "POSTN+ fibroblast_A", 84 | "receiver" = "TumorCell_A"), 85 | "Stage_E" = list( 86 | "sender" = c("POSTN+ fibroblast_E"), 87 | "receiver" = "TumorCell_E"), 88 | "Stage_Pre" = list( 89 | "sender" = c("POSTN+ fibroblast_Pre"), 90 | "receiver" = "TumorCell_Pre"), 91 | "Stage_NT" = list( 92 | "sender" = c("POSTN+ fibroblast_NT"), 93 | "receiver" = "TumorCell_NT") 94 | ) # user adaptation required on own dataset 95 | 96 | 97 | 98 | #####2. Calculate differential expression between the niches # 得到差异性受体配体矩阵,受体--配体 99 | assay_oi = "RNA" # other possibilities: RNA,... 100 | DE_sender = calculate_niche_de(seurat_obj = seurat_obj %>% subset(features = lr_network$ligand %>% unique()), niches = niches, type = "sender", assay_oi = assay_oi) # only ligands important for sender cell types 101 | DE_receiver = calculate_niche_de(seurat_obj = seurat_obj %>% subset(features = lr_network$receptor %>% unique()), niches = niches, type = "receiver", assay_oi = assay_oi) # only receptors now, later on: DE analysis to find targets 102 | DE_sender = DE_sender %>% mutate(avg_log2FC = ifelse(avg_log2FC == Inf, max(avg_log2FC[is.finite(avg_log2FC)]), ifelse(avg_log2FC == -Inf, min(avg_log2FC[is.finite(avg_log2FC)]), avg_log2FC))) 103 | DE_receiver = DE_receiver %>% mutate(avg_log2FC = ifelse(avg_log2FC == Inf, max(avg_log2FC[is.finite(avg_log2FC)]), ifelse(avg_log2FC == -Inf, min(avg_log2FC[is.finite(avg_log2FC)]), avg_log2FC))) 104 | expression_pct = 0.10 ###Process DE results and filter 105 | DE_sender_processed = process_niche_de(DE_table = DE_sender, niches = niches, expression_pct = expression_pct, type = "sender") 106 | DE_receiver_processed = process_niche_de(DE_table = DE_receiver, niches = niches, expression_pct = expression_pct, type = "receiver") 107 | specificity_score_LR_pairs = "min_lfc" ###Combine sender-receiver DE based on L-R pairs: 108 | DE_sender_receiver = combine_sender_receiver_de(DE_sender_processed, DE_receiver_processed, lr_network, specificity_score = specificity_score_LR_pairs) 109 | 110 | 111 | #####3. Optional: Calculate differential expression between the different spatial regions 112 | include_spatial_info_sender = FALSE # if not spatial info to include: put this to false # user adaptation required on own dataset 113 | include_spatial_info_receiver = FALSE # if spatial info to include: put this to true # user adaptation required on own dataset 114 | #spatial_info = tibble(celltype_region_oi = "CAF_High", celltype_other_region = "myofibroblast_High", niche = "pEMT_High_niche", celltype_type = "sender") # user adaptation required on own dataset 115 | #specificity_score_spatial = "lfc" 116 | # this is how this should be defined if you don't have spatial info 117 | # mock spatial info 118 | if(include_spatial_info_sender == FALSE & include_spatial_info_receiver == FALSE){ 119 | spatial_info = tibble(celltype_region_oi = NA, celltype_other_region = NA) %>% mutate(niche = niches %>% names() %>% head(1), celltype_type = "sender") 120 | } 121 | 122 | if(include_spatial_info_sender == TRUE){ 123 | sender_spatial_DE = calculate_spatial_DE(seurat_obj = seurat_obj %>% subset(features = lr_network$ligand %>% unique()), spatial_info = spatial_info %>% filter(celltype_type == "sender")) 124 | sender_spatial_DE_processed = process_spatial_de(DE_table = sender_spatial_DE, type = "sender", lr_network = lr_network, expression_pct = expression_pct, specificity_score = specificity_score_spatial) 125 | 126 | # add a neutral spatial score for sender celltypes in which the spatial is not known / not of importance 127 | sender_spatial_DE_others = get_non_spatial_de(niches = niches, spatial_info = spatial_info, type = "sender", lr_network = lr_network) 128 | sender_spatial_DE_processed = sender_spatial_DE_processed %>% bind_rows(sender_spatial_DE_others) 129 | 130 | sender_spatial_DE_processed = sender_spatial_DE_processed %>% mutate(scaled_ligand_score_spatial = scale_quantile_adapted(ligand_score_spatial)) 131 | 132 | } else { 133 | # # add a neutral spatial score for all sender celltypes (for none of them, spatial is relevant in this case) 134 | sender_spatial_DE_processed = get_non_spatial_de(niches = niches, spatial_info = spatial_info, type = "sender", lr_network = lr_network) 135 | sender_spatial_DE_processed = sender_spatial_DE_processed %>% mutate(scaled_ligand_score_spatial = scale_quantile_adapted(ligand_score_spatial)) 136 | 137 | } 138 | ## [1] "Calculate Spatial DE between: CAF_High and myofibroblast_High" 139 | 140 | if(include_spatial_info_receiver == TRUE){ 141 | receiver_spatial_DE = calculate_spatial_DE(seurat_obj = seurat_obj %>% subset(features = lr_network$receptor %>% unique()), spatial_info = spatial_info %>% filter(celltype_type == "receiver")) 142 | receiver_spatial_DE_processed = process_spatial_de(DE_table = receiver_spatial_DE, type = "receiver", lr_network = lr_network, expression_pct = expression_pct, specificity_score = specificity_score_spatial) 143 | 144 | # add a neutral spatial score for receiver celltypes in which the spatial is not known / not of importance 145 | receiver_spatial_DE_others = get_non_spatial_de(niches = niches, spatial_info = spatial_info, type = "receiver", lr_network = lr_network) 146 | receiver_spatial_DE_processed = receiver_spatial_DE_processed %>% bind_rows(receiver_spatial_DE_others) 147 | 148 | receiver_spatial_DE_processed = receiver_spatial_DE_processed %>% mutate(scaled_receptor_score_spatial = scale_quantile_adapted(receptor_score_spatial)) 149 | 150 | } else { 151 | # # add a neutral spatial score for all receiver celltypes (for none of them, spatial is relevant in this case) 152 | receiver_spatial_DE_processed = get_non_spatial_de(niches = niches, spatial_info = spatial_info, type = "receiver", lr_network = lr_network) 153 | receiver_spatial_DE_processed = receiver_spatial_DE_processed %>% mutate(scaled_receptor_score_spatial = scale_quantile_adapted(receptor_score_spatial)) 154 | } 155 | 156 | #####4. Calculate ligand activities and infer active ligand-target links 配体-- target 157 | lfc_cutoff = 0.15 # recommended for 10x as min_lfc cutoff. 158 | specificity_score_targets = "min_lfc" 159 | DE_receiver_targets = calculate_niche_de_targets(seurat_obj = seurat_obj, niches = niches, lfc_cutoff = lfc_cutoff, expression_pct = expression_pct, assay_oi = assay_oi) 160 | DE_receiver_processed_targets = process_receiver_target_de(DE_receiver_targets = DE_receiver_targets, niches = niches, expression_pct = expression_pct, specificity_score = specificity_score_targets) 161 | 162 | background = DE_receiver_processed_targets %>% pull(target) %>% unique() 163 | geneset_niche1 = DE_receiver_processed_targets %>% filter(receiver == niches[[1]]$receiver & target_score >= lfc_cutoff & target_significant == 1 & target_present == 1) %>% pull(target) %>% unique() 164 | geneset_niche2 = DE_receiver_processed_targets %>% filter(receiver == niches[[2]]$receiver & target_score >= lfc_cutoff & target_significant == 1 & target_present == 1) %>% pull(target) %>% unique() 165 | geneset_niche3 = DE_receiver_processed_targets %>% filter(receiver == niches[[3]]$receiver & target_score >= lfc_cutoff & target_significant == 1 & target_present == 1) %>% pull(target) %>% unique() 166 | geneset_niche4 = DE_receiver_processed_targets %>% filter(receiver == niches[[4]]$receiver & target_score >= lfc_cutoff & target_significant == 1 & target_present == 1) %>% pull(target) %>% unique() 167 | 168 | 169 | # Good idea to check which genes will be left out of the ligand activity analysis (=when not present in the rownames of the ligand-target matrix). 170 | # If many genes are left out, this might point to some issue in the gene naming (eg gene aliases and old gene symbols, bad human-mouse mapping) 171 | geneset_niche1 %>% setdiff(rownames(ligand_target_matrix)) 172 | geneset_niche2 %>% setdiff(rownames(ligand_target_matrix)) 173 | geneset_niche3 %>% setdiff(rownames(ligand_target_matrix)) 174 | geneset_niche4 %>% setdiff(rownames(ligand_target_matrix)) 175 | 176 | print(length(geneset_niche1))##We recommend having between 20 and 1000 genes in the geneset of interest 177 | print(length(geneset_niche2)) 178 | print(length(geneset_niche3)) 179 | print(length(geneset_niche4)) 180 | 181 | top_n_target = 250 182 | 183 | 184 | niche_geneset_list = list( 185 | "Stage_A" = list( 186 | "receiver" = niches[[1]]$receiver, 187 | "geneset" = geneset_niche1, 188 | "background" = background), 189 | "Stage_E" = list( 190 | "receiver" = niches[[2]]$receiver, 191 | "geneset" = geneset_niche2 , 192 | "background" = background), 193 | "Stage_Pre" = list( 194 | "receiver" = niches[[3]]$receiver, 195 | "geneset" = geneset_niche3 , 196 | "background" = background), 197 | "Stage_NT" = list( 198 | "receiver" = niches[[4]]$receiver, 199 | "geneset" = geneset_niche4 , 200 | "background" = background) 201 | ) 202 | ligand_activities_targets = get_ligand_activities_targets(niche_geneset_list = niche_geneset_list, ligand_target_matrix = ligand_target_matrix, top_n_target = top_n_target) 203 | #[1] "Calculate Ligand activities for: TumorCell_A" 204 | #[1] "Calculate Ligand activities for: SNCG+ Basal_R" 205 | 206 | #####5. Calculate (scaled) expression of ligands, receptors and targets across cell types of interest (log expression values and expression fractions) 207 | features_oi = union(lr_network$ligand, lr_network$receptor) %>% union(ligand_activities_targets$target) %>% setdiff(NA) 208 | dotplot = suppressWarnings(Seurat::DotPlot(seurat_obj %>% subset(idents = niches %>% unlist() %>% unique()), features = features_oi, assay = assay_oi)) 209 | exprs_tbl = dotplot$data %>% as_tibble() 210 | exprs_tbl = exprs_tbl %>% rename(celltype = id, gene = features.plot, expression = avg.exp, expression_scaled = avg.exp.scaled, fraction = pct.exp) %>% 211 | mutate(fraction = fraction/100) %>% as_tibble() %>% select(celltype, gene, expression, expression_scaled, fraction) %>% distinct() %>% arrange(gene) %>% mutate(gene = as.character(gene)) 212 | exprs_tbl_ligand = exprs_tbl %>% filter(gene %in% lr_network$ligand) %>% rename(sender = celltype, ligand = gene, ligand_expression = expression, ligand_expression_scaled = expression_scaled, ligand_fraction = fraction) 213 | exprs_tbl_receptor = exprs_tbl %>% filter(gene %in% lr_network$receptor) %>% rename(receiver = celltype, receptor = gene, receptor_expression = expression, receptor_expression_scaled = expression_scaled, receptor_fraction = fraction) 214 | exprs_tbl_target = exprs_tbl %>% filter(gene %in% ligand_activities_targets$target) %>% rename(receiver = celltype, target = gene, target_expression = expression, target_expression_scaled = expression_scaled, target_fraction = fraction) 215 | 216 | exprs_tbl_ligand = exprs_tbl_ligand %>% mutate(scaled_ligand_expression_scaled = scale_quantile_adapted(ligand_expression_scaled)) %>% mutate(ligand_fraction_adapted = ligand_fraction) %>% mutate_cond(ligand_fraction >= expression_pct, ligand_fraction_adapted = expression_pct) %>% mutate(scaled_ligand_fraction_adapted = scale_quantile_adapted(ligand_fraction_adapted)) 217 | exprs_tbl_receptor = exprs_tbl_receptor %>% mutate(scaled_receptor_expression_scaled = scale_quantile_adapted(receptor_expression_scaled)) %>% mutate(receptor_fraction_adapted = receptor_fraction) %>% mutate_cond(receptor_fraction >= expression_pct, receptor_fraction_adapted = expression_pct) %>% mutate(scaled_receptor_fraction_adapted = scale_quantile_adapted(receptor_fraction_adapted)) 218 | #####6. Expression fraction and receptor 219 | exprs_sender_receiver = lr_network %>% 220 | inner_join(exprs_tbl_ligand, by = c("ligand")) %>% 221 | inner_join(exprs_tbl_receptor, by = c("receptor")) %>% inner_join(DE_sender_receiver %>% distinct(niche, sender, receiver)) 222 | ligand_scaled_receptor_expression_fraction_df = exprs_sender_receiver %>% group_by(ligand, receiver) %>% mutate(rank_receptor_expression = dense_rank(receptor_expression), rank_receptor_fraction = dense_rank(receptor_fraction)) %>% mutate(ligand_scaled_receptor_expression_fraction = 0.5*( (rank_receptor_fraction / max(rank_receptor_fraction)) + ((rank_receptor_expression / max(rank_receptor_expression))) ) ) %>% distinct(ligand, receptor, receiver, ligand_scaled_receptor_expression_fraction, bonafide) %>% distinct() %>% ungroup() 223 | 224 | #####7. Prioritization of ligand-receptor and ligand-target links 225 | prioritizing_weights = c("scaled_ligand_score" = 5, 226 | "scaled_ligand_expression_scaled" = 1, 227 | "ligand_fraction" = 1, 228 | "scaled_ligand_score_spatial" = 2, 229 | "scaled_receptor_score" = 0.5, 230 | "scaled_receptor_expression_scaled" = 0.5, 231 | "receptor_fraction" = 1, 232 | "ligand_scaled_receptor_expression_fraction" = 1, 233 | "scaled_receptor_score_spatial" = 0, 234 | "scaled_activity" = 0, 235 | "scaled_activity_normalized" = 1, 236 | "bona_fide" = 1) 237 | output = list(DE_sender_receiver = DE_sender_receiver, ligand_scaled_receptor_expression_fraction_df = ligand_scaled_receptor_expression_fraction_df, sender_spatial_DE_processed = sender_spatial_DE_processed, receiver_spatial_DE_processed = receiver_spatial_DE_processed, 238 | ligand_activities_targets = ligand_activities_targets, DE_receiver_processed_targets = DE_receiver_processed_targets, exprs_tbl_ligand = exprs_tbl_ligand, exprs_tbl_receptor = exprs_tbl_receptor, exprs_tbl_target = exprs_tbl_target) 239 | prioritization_tables = get_prioritization_tables(output, prioritizing_weights) 240 | prioritization_tables$prioritization_tbl_ligand_receptor %>% filter(receiver == niches[[2]]$receiver) %>% head(10) 241 | prioritization_tables$prioritization_tbl_ligand_target %>% filter(receiver == niches[[2]]$receiver) %>% head(10) 242 | 243 | ####8. Visualization of the Differential NicheNet output 244 | top_ligand_niche_df = prioritization_tables$prioritization_tbl_ligand_receptor %>% select(niche, sender, receiver, ligand, receptor, prioritization_score) %>% group_by(ligand) %>% top_n(1, prioritization_score) %>% ungroup() %>% select(ligand, receptor, niche) %>% rename(top_niche = niche) 245 | top_ligand_receptor_niche_df = prioritization_tables$prioritization_tbl_ligand_receptor %>% select(niche, sender, receiver, ligand, receptor, prioritization_score) %>% group_by(ligand, receptor) %>% top_n(1, prioritization_score) %>% ungroup() %>% select(ligand, receptor, niche) %>% rename(top_niche = niche) 246 | ligand_prioritized_tbl_oi = prioritization_tables$prioritization_tbl_ligand_receptor %>% select(niche, sender, receiver, ligand, prioritization_score) %>% group_by(ligand, niche) %>% top_n(1, prioritization_score) %>% ungroup() %>% distinct() %>% inner_join(top_ligand_niche_df) %>% filter(niche == top_niche) %>% group_by(niche) %>% top_n(20, prioritization_score) %>% ungroup() # get the top50 ligands per niche 247 | 248 | outpath <- "/work/brj/Collaboration/2022/scRNA/HNSCC/Result/nichenetr/" 249 | targetcell <- "POSTNTumorsplit" 250 | 251 | receiver_oi = "TumorCell_A" 252 | sender_oi="POSTN+ fibroblast_A" 253 | filtered_ligands = ligand_prioritized_tbl_oi %>% filter(receiver == receiver_oi) %>% filter(sender == sender_oi) %>% pull(ligand) %>% unique() 254 | prioritized_tbl_oi = prioritization_tables$prioritization_tbl_ligand_receptor %>% filter(ligand %in% filtered_ligands) %>% select(niche, sender, receiver, ligand, receptor, ligand_receptor, prioritization_score) %>% distinct() %>% inner_join(top_ligand_receptor_niche_df) %>% group_by(ligand) %>% filter(receiver == receiver_oi) %>% top_n(2, prioritization_score) %>% ungroup() 255 | lfc_plot = make_ligand_receptor_lfc_plot(receiver_oi, prioritized_tbl_oi, prioritization_tables$prioritization_tbl_ligand_receptor, plot_legend = FALSE, heights = NULL, widths = NULL) 256 | 257 | pdf(paste(outpath,targetcell,"/1.Top20ligand_plot.pdf",sep = ""),width = 10,height = 10) 258 | lfc_plot 259 | dev.off() 260 | 261 | 262 | ##Ligand expression, activity and target genes 配体表达,活性和靶基因 263 | exprs_activity_target_plot = make_ligand_activity_target_exprs_plot(receiver_oi, prioritized_tbl_oi, prioritization_tables$prioritization_tbl_ligand_receptor, prioritization_tables$prioritization_tbl_ligand_target, output$exprs_tbl_ligand, output$exprs_tbl_target, lfc_cutoff, ligand_target_matrix, plot_legend = T, heights = 1, widths = 1) 264 | 265 | pdf(paste(outpath,targetcell,"/2.LigandExpressionActivityTargetgenes_plotLegend.pdf",sep = ""),width = 40,height = 10) 266 | exprs_activity_target_plot$combined_plot 267 | dev.off() 268 | 269 | filtered_ligands = ligand_prioritized_tbl_oi %>% filter(receiver == receiver_oi) %>% filter(sender == sender_oi)%>% top_n(20, prioritization_score) %>% pull(ligand) %>% unique() 270 | prioritized_tbl_oi = prioritization_tables$prioritization_tbl_ligand_receptor %>% filter(ligand %in% filtered_ligands) %>% select(niche, sender, receiver, ligand, receptor, ligand_receptor, prioritization_score) %>% distinct() %>% inner_join(top_ligand_receptor_niche_df) %>% group_by(ligand) %>% filter(receiver == receiver_oi) %>% top_n(2, prioritization_score) %>% ungroup() 271 | exprs_activity_target_plot = make_ligand_activity_target_exprs_plot(receiver_oi, prioritized_tbl_oi, prioritization_tables$prioritization_tbl_ligand_receptor, prioritization_tables$prioritization_tbl_ligand_target, output$exprs_tbl_ligand, output$exprs_tbl_target, lfc_cutoff, ligand_target_matrix, plot_legend = FALSE, heights = NULL, widths = NULL) 272 | 273 | pdf(paste(outpath,targetcell,"/3.LigandExpressionActivityTargetgenes_Filterplot.pdf",sep = ""),width = 40,height = 10) 274 | exprs_activity_target_plot$combined_plot 275 | dev.off() 276 | 277 | 278 | ####### L-R pairs 圈图# 展示四个阶段 279 | # barplot(c(1:5),col = c("lavender","#F7F7F7","#CCCCCC", "#969696","#636363")) 280 | receiver_oi <- c("TumorCell_NT","TumorCell_Pre","TumorCell_E","TumorCell_A") 281 | sender_oi <- c("POSTN+ fibroblast_NT","POSTN+ fibroblast_Pre","POSTN+ fibroblast_E","POSTN+ fibroblast_A") 282 | filtered_ligands = prioritization_tables$prioritization_tbl_ligand_receptor %>% filter(receiver == receiver_oi) %>% filter(sender == sender_oi)%>% top_n(20, prioritization_score) %>% pull(ligand) %>% unique() 283 | prioritized_tbl_oi = prioritization_tables$prioritization_tbl_ligand_receptor %>% filter(ligand %in% filtered_ligands) %>% dplyr::select(niche, sender, receiver, ligand, receptor, ligand_receptor, prioritization_score) %>% distinct() %>% inner_join(top_ligand_receptor_niche_df) %>% group_by(ligand) %>% filter(receiver == receiver_oi) %>% top_n(2, prioritization_score) %>% ungroup() 284 | 285 | prioritized_tbl_oi$sender <- factor(prioritized_tbl_oi$sender,levels = sender_oi) 286 | prioritized_tbl_oi$receiver <- factor(prioritized_tbl_oi$receiver,levels = receiver_oi ) 287 | 288 | 289 | colors_sender = RColorBrewer::brewer.pal(n = prioritized_tbl_oi$sender %>% unique() %>% sort() %>% length(), name = 'Set2') %>% magrittr::set_names(prioritized_tbl_oi$sender %>% unique() %>% sort()) 290 | colors_receiver = c("#E0E0E0","#BABABA","#878787","#4D4D4D") %>% magrittr::set_names(prioritized_tbl_oi$receiver %>% unique() %>% sort()) 291 | circos_output = make_circos_lr(prioritized_tbl_oi, colors_sender, colors_receiver) 292 | 293 | pdf(paste(outpath,targetcell,"/4.Circosplot.pdf",sep = ""),width = 7,height =6) 294 | circos_output$p_circos 295 | dev.off() 296 | pdf(paste(outpath,targetcell,"/4.Circosplotlegend.pdf",sep = ""),width = 5,height = 5) 297 | circos_output$p_legend 298 | dev.off() 299 | -------------------------------------------------------------------------------- /6.Difference between LN-in and LN-out.R: -------------------------------------------------------------------------------- 1 | ##Figure 5 2 | 3 | ##LN-out LN-in上皮细胞差异基因====== 4 | HNSCC_epi_tumor <- readRDS('/work/smy/Project/HNSCC_26sample/2.data/26sample_tumorcells_Harmony_immuneReference_All.rds') 5 | 6 | DimPlot(HNSCC_epi_tumor) 7 | HNSCC_epi_tumor$stage <- as.character(HNSCC_epi_tumor$stage) 8 | HNSCC_epi_tumor$stage[grep('LN1|LN6|LN7',HNSCC_epi_tumor$orig.ident)] <- 'LN-out' 9 | HNSCC_epi_tumor$stage[grep('LN2|LN3|LN8',HNSCC_epi_tumor$orig.ident)] <- 'LN-in' 10 | VlnPlot(HNSCC_epi_tumor,features = c('APOE','FN1')) 11 | 12 | Idents(HNSCC_epi_tumor) <- HNSCC_epi_tumor$stage 13 | Epi <- subset(HNSCC_epi_tumor,idents = c('LN-in','LN-out')) 14 | 15 | Markers <- FindAllMarkers(Epi,only.pos = TRUE, min.pct = 0.1, logfc.threshold = 0.25) 16 | 17 | Markers$pvalue_L <- -log10(Markers$p_val+10^-200) 18 | Markers$Class <- ifelse( Markers$avg_log2FC > 0.25 & Markers$p_val < 0.05 & Markers$cluster == 'LN-out',"Up", 19 | ifelse(Markers$avg_log2FC > 0.25 & Markers$p_val < 0.05 & Markers$cluster == 'LN-in' ,"Down","None" )) 20 | 21 | Markers$avg_log2FC <- ifelse(Markers$cluster == 'LN-in',-Markers$avg_log2FC,Markers$avg_log2FC) 22 | Gene_Labels <- Markers %>% dplyr::filter(Class %in% c("Up","Down")) %>% dplyr::group_by(Class) %>% 23 | dplyr::top_n(10,abs(avg_log2FC)) 24 | 25 | outpath <- "/work/brj/Collaboration/2022/scRNA/HNSCC/Result/" 26 | openxlsx::write.xlsx(Markers,paste0(outpath,"Fig5/","LN-in vs LN-out diff genes.xlsx")) 27 | 28 | 29 | PP <- ggplot(Markers,aes(x=avg_log2FC,y= pvalue_L))+ 30 | geom_point(aes(color=Class),size=0.8) + 31 | # geom_point(color=NA)+ 32 | labs(x="log2(Foldchange)",y="-log10(p-value)")+ 33 | scale_color_manual(limits=c("Up","Down"),values=c("#e41a1c","#3182bd"),#values=c(NA,NA,NA),# 34 | labels=c(paste("LN-out (n=",table(Markers$Class)['Up'],")",sep=""), 35 | paste("LN-in (n=",table(Markers$Class)['Down'],")",sep="") 36 | ),name="")+ 37 | geom_vline(xintercept = c(-1,1),linetype="dashed")+ 38 | geom_hline(yintercept = -log10(0.05),linetype="dashed")+ 39 | theme(panel.background = element_rect(fill=NA,color="black"),axis.title= element_text(color="black",size=12),panel.grid.major = element_blank(), 40 | axis.text= element_text(color="black",size=10),legend.position = "bottom", 41 | legend.background = element_blank(),legend.key = element_blank())+ 42 | #ggrepel::geom_text_repel(data =Markers[Markers$Gene %in% TF$V1 & Markers$Class !="None",],aes(label=Gene),force=1,color="red")+ 43 | ggrepel::geom_text_repel(data =Gene_Labels,aes(label=gene),force=1,color="black")+ 44 | geom_point(data =Markers[Markers$gene %in% c(Gene_Labels$gene) & Markers$Class !="None",],shape=1) 45 | 46 | outpath <- "/work/brj/Collaboration/2022/scRNA/HNSCC/Result/" 47 | pdf(paste0(outpath,"Fig5/","Epithial_0_1_diffGeneVolcano.pdf"),width = 6,height = 6) 48 | print(PP) 49 | dev.off() 50 | 51 | ##LN-out LN-in上皮细胞通路富集====== 52 | GO_BP_Fun <- function(CompareGroup,groups=c("LN-in","LN-out"),OrgData="org.Hs.eg.db",pcut=0.05,qcut=0.2){ 53 | library(clusterProfiler) 54 | if(OrgData=="org.Hs.eg.db"){library(org.Hs.eg.db)}else{library(org.Hs.eg.db)} 55 | CompareGroup <- CompareGroup %>% dplyr::mutate(GO_Compare=purrr::map(.x=CompareResult,function(.x){ 56 | SigSub <- .x[.x$Class %in% groups,] %>% as.data.frame() 57 | if(length(unique(SigSub$gene))>10){ 58 | formula_res <- clusterProfiler::compareCluster( 59 | keyType="SYMBOL", 60 | gene ~ Class, 61 | data=SigSub, 62 | fun="enrichGO", 63 | OrgDb=OrgData, 64 | ont = "BP", 65 | pAdjustMethod = "BH", 66 | pvalueCutoff = pcut, 67 | qvalueCutoff = qcut 68 | ) 69 | 70 | # Run GO enrichment test and merge terms that are close to each other to remove result redundancy 71 | # Simple_ego <- clusterProfiler::simplify( 72 | # formula_res, 73 | # cutoff=0.7, 74 | # by="p.adjust", 75 | # select_fun=min 76 | # ) 77 | return(formula_res ) # 78 | }else{return(NA)} # 79 | })) 80 | 81 | } 82 | KEGG_Fun <- function(CompareGroup,groups=c("LN-in","LN-out"),OrgSp="hsa",pcut=0.05,qcut=0.2){ 83 | CompareGroup <- CompareGroup %>% dplyr::mutate(KEGG_Compare= purrr::map(.x=CompareResult,function(.x){ 84 | 85 | SigSub <- .x[.x$Class %in% c("LN-out","LN-in"),] %>% as.data.frame() 86 | genelist <- clusterProfiler::bitr(SigSub$gene,fromType = "SYMBOL", 87 | toType = c("ENTREZID", "SYMBOL"), OrgDb = org.Hs.eg.db) 88 | 89 | SigSub <- merge( SigSub,genelist,by.x="gene",by.y="SYMBOL") 90 | if(length(unique(SigSub$gene))>10){ 91 | formula_res <- clusterProfiler::compareCluster( 92 | #keyType="SYMBOL", 93 | ENTREZID~Class, 94 | data=SigSub, 95 | organism=OrgSp, 96 | fun="enrichKEGG", 97 | # OrgDb="org.Mm.eg.db", 98 | pAdjustMethod = "BH", 99 | pvalueCutoff = pcut, 100 | qvalueCutoff = qcut 101 | ) 102 | 103 | return(formula_res) 104 | }else{return(NA)} 105 | })) 106 | 107 | } 108 | 109 | Markers <- FindAllMarkers(Epi,only.pos = TRUE, min.pct = 0.1, logfc.threshold = 0.25) 110 | Markers$Class <- ifelse(Markers$avg_log2FC > 0.25 & Markers$cluster == 'LN-in',"LN-in", 111 | ifelse(Markers$avg_log2FC > 0.25 & Markers$cluster == 'LN-out',"LN-out","None" )) 112 | table(Markers$Class) 113 | CompareList = c("LN-in vs LN-out") 114 | CompareGroup <- tibble::tibble(CompareList=c("LN-in vs LN-out"),CompareResult = list(V1=Markers)) 115 | 116 | #GO 117 | CompareGroup <- GO_BP_Fun(CompareGroup,groups=c("LN-in","LN-out"),OrgData="org.Hs.eg.db",pcut=0.05,qcut=0.2) 118 | GOresult <- CompareGroup$GO_Compare$V1@compareClusterResult 119 | readr::write_csv(GOresult,paste0(outpath,"Fig5/","LNin vs LNout_GOenrichmentResult.csv")) 120 | 121 | pdf(paste0(outpath,"Fig5/","LNin vs LNout_GO.pdf"),width = 6,height = 8) 122 | dotplot(CompareGroup$GO_Compare$V1, showCategory = 10)+ 123 | scale_color_gradientn(colors= colorRampPalette(RColorBrewer::brewer.pal(11, "Spectral"),space="rgb")(50)) 124 | dev.off() 125 | 126 | 127 | 128 | ### 不画共有的 129 | GOresult <- CompareGroup$GO_Compare$V1@compareClusterResult 130 | INres <- GOresult %>% dplyr::filter(Cluster == "LN-in") %>% dplyr::select(Description) 131 | Outres <- GOresult %>% dplyr::filter(Cluster == "LN-out") %>% dplyr::select(Description) 132 | SpecificIN <- setdiff(INres$Description,Outres$Description) # 取第一个参数唯一的 133 | SpecificOuts <- setdiff(Outres$Description,INres$Description) # 取第一个参数唯一的 134 | Virus <- grep("virus|viral",SpecificOuts,value = T) 135 | SpecificOut <- setdiff(SpecificOuts,Virus) 136 | 137 | topN <- 10 138 | GOresults <- GOresult %>% dplyr::mutate(Ratio = round(Count/584,4)) %>% 139 | dplyr::select(c(Cluster,Description,Ratio,p.adjust)) %>% 140 | dplyr::arrange(Cluster,p.adjust) #%>% dplyr::group_by(Cluster) %>% dplyr::top_n(-topN,p.adjust) 141 | GOresultsOut <- GOresults %>% dplyr::filter(Description %in% SpecificOut) %>% dplyr::top_n(-topN,p.adjust) 142 | GOresultsIn <- GOresults %>% dplyr::filter(Description %in% SpecificIN) %>% dplyr::top_n(-topN,p.adjust) 143 | 144 | 145 | DE_DescriptionFun_Groups_BPAll <- bind_rows(GOresultsOut,GOresultsIn) 146 | 147 | TT2 <- data.frame(Cluster=rep(unique(DE_DescriptionFun_Groups_BPAll$Cluster),times=length(unique(DE_DescriptionFun_Groups_BPAll$Description))), 148 | Description=rep(unique(DE_DescriptionFun_Groups_BPAll$Description),each=length(unique(DE_DescriptionFun_Groups_BPAll$Cluster)))) 149 | 150 | #图片处理 美观 151 | DE_DescriptionFun_Groups_BPAllF <- DE_DescriptionFun_Groups_BPAll %>% dplyr::select(Description,Cluster,Ratio) %>% 152 | tidyr::spread(Cluster,Ratio,fill=0) 153 | DE_DescriptionFun_Groups_BPAllF <- DE_DescriptionFun_Groups_BPAllF[do.call(order,DE_DescriptionFun_Groups_BPAllF[,2:ncol(DE_DescriptionFun_Groups_BPAllF)]),] 154 | 155 | 156 | #DE_DescriptionFun_Groups_BPAll$Ratio <- ifelse(DE_DescriptionFun_Groups_BPAll$Ratio > 1,1,DE_DescriptionFun_Groups_BPAll$Ratio) 157 | 158 | pdf(paste0(outpath,"Fig5/","LNin vs LNout_GOspecific.pdf"),width = 7,height = 6) 159 | P <- ggplot(DE_DescriptionFun_Groups_BPAll,aes(x=Cluster,y=Description))+ 160 | geom_point(aes(color=-log10(p.adjust+10^-27),size=Ratio))+ 161 | labs(color = "-log10(Adjust P)",size="GeneRatio")+ 162 | scale_color_gradientn(limit=c(min(-log10(DE_DescriptionFun_Groups_BPAll$p.adjust)),max(-log10(DE_DescriptionFun_Groups_BPAll$p.adjust))), 163 | colors= colorRampPalette(rev(RColorBrewer::brewer.pal(9, "RdBu")),space="rgb")(50))+ 164 | theme(panel.background=element_rect(color="black",fill=NA),legend.key = element_blank(),axis.ticks=element_blank(), 165 | panel.grid=element_blank(),axis.text.x=element_text(size=12,angle=90,vjust = 0.5,hjust = 1,color="black"), 166 | axis.text.y=element_text(size=15,color="black"),axis.title=element_blank(),legend.position = "bottom")+ 167 | scale_y_discrete(limit=(DE_DescriptionFun_Groups_BPAllF$Description))+ 168 | geom_tile(data=TT2,aes(x=Cluster,y=Description),fill=NA,color="lightgray") 169 | dev.off() 170 | 171 | #Kegg 172 | CompareGroup <- KEGG_Fun(CompareGroup,groups=c("LN-in","LN-out"),OrgSp="hsa",pcut=0.05,qcut=0.2) 173 | KEGGresult <- CompareGroup$KEGG_Compare$V1@compareClusterResult 174 | readr::write_csv(KEGGresult,paste0(outpath,"Fig5/","LNin vs LNout_KEGGenrichmentResult.csv")) 175 | 176 | dotplot(CompareGroup$KEGG_Compare$V1, showCategory = 10)+ 177 | scale_color_gradientn(colors= colorRampPalette(RColorBrewer::brewer.pal(11, "Spectral"),space="rgb")(50)) 178 | 179 | ##cellchat互作强度====== 180 | #上皮细胞只保留肿瘤细胞 181 | #1.取出LN的样本 182 | #2.上皮细胞只保留肿瘤细胞 183 | #3.分stage跑cellchat 184 | Idents(HNSCC_Whole) <- HNSCC_Whole$stage 185 | HNSCC_LN <- subset(HNSCC_Whole,idents = c('LN-in','LN-out','LN-normal')) 186 | 187 | HNSCC_LN$Subcluster_RE <- HNSCC_LN$Subcluster 188 | Tumorcells <- rownames(HNSCC_epi_tumor@meta.data) 189 | HNSCC_LN$Subcluster_RE[Tumorcells] <- 'Tumor cells' 190 | unique(HNSCC_LN$Subcluster_RE) 191 | 192 | HNSCC_LN_split <- SplitObject(HNSCC_LN,split.by = 'stage') 193 | 194 | 195 | HNSCC_LN_cellchat <- lapply(names(HNSCC_LN_split), function(name){ 196 | sub <- HNSCC_LN_split[[name]] 197 | ##cellchat 198 | cellchat <- createCellChat(object = sub,group.by = 'Subcluster_RE') 199 | 200 | #数据库信息 201 | CellChatDB <- CellChatDB.human 202 | 203 | ##pre-process 204 | cellchat@DB <- CellChatDB 205 | cellchat <- subsetData(cellchat) 206 | cellchat <- identifyOverExpressedGenes(cellchat) 207 | cellchat <- identifyOverExpressedInteractions(cellchat) 208 | #cellchat@LR$LRsig 209 | cellchat <- projectData(cellchat,PPI.human) 210 | 211 | ##infer interaction 212 | ##liagnd receptor 213 | cellchat <- computeCommunProb(cellchat,raw.use = FALSE,population.size = TRUE) 214 | #filter cell<10 215 | cellchat <- filterCommunication(cellchat,min.cells = 10) 216 | df.net <- subsetCommunication(cellchat) 217 | 218 | cellchat <- computeCommunProbPathway(cellchat) 219 | df.netp <- subsetCommunication(cellchat,slot.name = 'netP') 220 | 221 | cellchat <- aggregateNet(cellchat) 222 | 223 | readr::write_rds(cellchat,paste('/work/smy/Project/HNSCC_26sample/2.data/26sample_LNcell_',name,'_cellchat_Subcluster.rds',sep = '')) 224 | }) 225 | names(HNSCC_LN_cellchat) <- names(HNSCC_LN_split) 226 | readr::write_rds(HNSCC_LN_cellchat,'26Sample_HNSCC_LN_cellchat.rds') 227 | 228 | 229 | ##figure 230 | HNSCC_LN_cellchat <- readRDS('/work/smy/Project/HNSCC_26sample/2.data/26Sample_HNSCC_LN_cellchat.rds') 231 | 232 | NetWeightAll <- lapply(c(1:3), function(sub){ 233 | mat <- HNSCC_LN_cellchat[[sub]]@net$weight 234 | mat <- as.data.frame(mat) 235 | mat$stage <- names(HNSCC_LN_cellchat)[sub] 236 | return(mat) 237 | }) 238 | 239 | NetWeightAll <- bind_rows(NetWeightAll) 240 | NetWeightAll_melt <- melt(NetWeightAll) 241 | NetWeightAll_melt$cluster <- rep(colnames(NetWeightAll)[1:10],times = 3) 242 | NetWeightAll_melt$cellchat <- paste(NetWeightAll_melt$cluster,'to',NetWeightAll_melt$variable,sep = ' ') 243 | 244 | unique(NetWeightAll_melt$cellchat) 245 | 246 | NetWeightAll_meltF <- NetWeightAll_melt %>% filter(cluster == 'Tumor cells') 247 | NetWeightAll_meltF$cellchat2 <- paste(NetWeightAll_meltF$cellchat,NetWeightAll_meltF$stage) 248 | NetWeightAll_meltF <- NetWeightAll_meltF[c(7:9,19:21,4:6,1:3,10:12,16:18,22:27),] 249 | NetWeightAll_meltF$cellchat2 <- stringr::str_remove(NetWeightAll_meltF$cellchat2,"Tumor cells to ") 250 | 251 | pdf(paste0(outpath,"Fig5/","TumortoSubclusterCellchatLolliplot.pdf"),width = 4,height = 6) 252 | P <- ggplot(NetWeightAll_meltF,aes(x=value,y=cellchat2))+ 253 | ylab('')+ 254 | xlab('weight')+ 255 | #ggtitle(sub)+ 256 | geom_segment(aes(yend=cellchat2),xend=0,colour='grey50')+ ###绘制以数据点为端点的线段 257 | geom_point(size=3,aes(colour=cellchat))+ ###此处我们将以正负相关(postive negative)映射其颜色 258 | scale_colour_brewer(palette = 'Set1')+ ###颜色加深 259 | # scale_y_discrete(limits= rev(c('NT','Pre','E','A','R')))+ 260 | theme_bw() + 261 | theme(panel.grid.major.y = element_blank(), 262 | panel.grid.major.x = element_blank(), 263 | panel.grid.minor.x = element_blank()) + 264 | scale_y_discrete(limits= rev(NetWeightAll_meltF$cellchat2))+ 265 | NoLegend()###删除网格线 266 | dev.off() 267 | 268 | names(PP) <- unique(NetWeightAll_meltF$cellchat) 269 | cowplot::plot_grid(plotlist = PP,ncol = 4) 270 | 271 | 272 | 273 | 274 | ### cell chat venn plot olp gene 275 | #### 提取stage A 特异的信号通路中的基因 276 | FibToMacGeneList <- c("EGF","FGF","GDNF","HGF","IL6","NRG","PARS","PDGF") 277 | MacToFibGeneList <- c("CD23","COMPLEMENT","DESMOSOME","EPHB","FGF","FLT3","GDNF","NGF","NRG","PDGF") 278 | FibToTumorGeneList <- c("CD99","CDH5","CSF3","DESMOSOME","EPO","GDNF","IL4","LIFR","NGF","OCLN","OSM","PDGF","PRL","PSAP","SEMA5","VEGF") 279 | MacToTumorGeneList <- c("ACTIVIN","ADGRE5","CDH5","EPHB","HGF","L1CAM","LIFR","MPZ","MSTN","NPR2","OCLN","TGFb","VEGF") 280 | 281 | 282 | ## cellchat myefib 283 | cellchatMyeFib <- readRDS('/work/brj/Collaboration/2022/scRNA/HNSCC/Data/26sample_definetype_MyeFib_cellchat_stageSplit.rds') 284 | cellchatMyeFib <- cellchatMyeFib[c(6,7,2,1,4,3,5,8)] 285 | 286 | A.Fib.net <- subsetCommunication(cellchatMyeFib$A,slot.name = 'net',sources.use = "POSTN+ fibroblast",targets.use = "SPP1+ macorphages" ) %>% 287 | dplyr::filter(pathway_name %in% FibToMacGeneList ) %>% dplyr::select(ligand) %>% unique() 288 | 289 | A.Mac.net <- subsetCommunication(cellchatMyeFib$A,slot.name = 'net',sources.use = "SPP1+ macorphages",targets.use = "POSTN+ fibroblast" ) %>% 290 | dplyr::filter(pathway_name %in% MacToFibGeneList ) %>% dplyr::select(ligand) %>% unique() 291 | 292 | ## cellchat myefibtumor 293 | cellchatMFT <- readRDS('/work/brj/Collaboration/2022/scRNA/HNSCC/Data/26sample_MyeFibTumor_cellchat_stageSplit2.rds') 294 | cellchatMFT <- cellchatMFT[c(6,7,2,1,4,3,5,8)] 295 | 296 | FibToTumor <- subsetCommunication(cellchatMFT$A,slot.name = 'net',sources.use = "POSTN+ fibroblast",targets.use = "TumorCell_malignant") %>% 297 | dplyr::filter(pathway_name %in% FibToTumorGeneList) %>% dplyr::select(ligand) %>% unique() 298 | 299 | MacToTumor <- subsetCommunication(cellchatMFT$A,slot.name = 'net',sources.use = "SPP1+ macorphages",targets.use = "TumorCell_malignant") %>% 300 | dplyr::filter(pathway_name %in% MacToTumorGeneList) %>% dplyr::select(ligand) %>% unique() 301 | 302 | TwoOlp <- Reduce(intersect, list(A.Fib.net$ligand, A.Mac.net$ligand)) 303 | TwoOlpdf <- data.frame(ligand = TwoOlp) 304 | 305 | TwoUnion <- Reduce(union, list(A.Fib.net$ligand, A.Mac.net$ligand)) 306 | TwoUniondf <- data.frame(ligand = TwoUnion) 307 | 308 | openxlsx::write.xlsx(TwoUniondf,"/work/brj/Collaboration/2022/scRNA/HNSCC/Result/Fig4/TwoUniondfGeneList.xlsx") 309 | 310 | 311 | 312 | 313 | FourUnion <- Reduce(union, list(A.Fib.net$ligand, A.Mac.net$ligand,FibToTumor$ligand,MacToTumor$ligand)) 314 | Fourdf <- data.frame(ligand = FourUnion) 315 | 316 | openxlsx::write.xlsx(Fourdf,"/work/brj/Collaboration/2022/scRNA/HNSCC/Result/Fig4/FourGeneList.xlsx") 317 | 318 | 319 | 320 | 321 | -------------------------------------------------------------------------------- /7.Interaction differences between Tex and Tumor cell at LN-in and LN-out.R.R: -------------------------------------------------------------------------------- 1 | library(nichenetr) 2 | library(Seurat) 3 | library(tidyverse) 4 | library(circlize) 5 | library(dplyr) 6 | library(clusterProfiler) 7 | library(RColorBrewer) 8 | set.seed(42) 9 | options(stringsAsFactors = F) 10 | # #load Seurat rds 11 | HNSCC_Whole <- readRDS('/work/smy/Project/HNSCC/2.data/DeinfeTypes/HNSCC_26sampleAll_DefineTypes.rds.gz') 12 | Idents(HNSCC_Whole) <- HNSCC_Whole$DefineTypes 13 | HNSCC_Whole$celltype_RE <- HNSCC_Whole$DefineTypes 14 | 15 | ## add tumor in epi : metadata$Malignancy == 'malignant' 16 | metadata <- readRDS('/work/smy/Project/HNSCC_26sample/2.data/26sample_tumor_cell_metadata.rds') 17 | malignant <- rownames(metadata[grep('malignant',metadata$Malignancy),]) 18 | HNSCC_Whole$celltype_RE[malignant] <- 'TumorCell' 19 | 20 | HNSCC_TumorTex_Allstage <- subset(HNSCC_Whole,subset = celltype_RE %in% c("CD8 Tex","TumorCell")) 21 | HNSCC_TumorTex <- subset(HNSCC_TumorTex_Allstage,subset = stage %in% c("LN-in","LN-out")) 22 | HNSCC_TumorTex$celltype_Re <- paste(HNSCC_TumorTex$celltype_RE,HNSCC_TumorTex$stage,sep = "_") 23 | 24 | HNSCC_TumorTex$celltype_aggregate <- HNSCC_TumorTex$celltype_Re 25 | 26 | seurat_obj <- HNSCC_TumorTex 27 | celltype_id = "celltype_aggregate" # metadata column name of the cell type of interest 28 | seurat_obj = SetIdent(seurat_obj, value = seurat_obj[[celltype_id]]) 29 | 30 | ####Read in the NicheNet ligand-receptor network and ligand-target matrix 31 | # ##the following of human origin 32 | ligand_target_matrix <- readRDS("/work/brj/Collaboration/2022/scRNA/HNSCC/NichenetrData/ligand_target_matrix.rds") 33 | lr_network <- readRDS("/work/brj/Collaboration/2022/scRNA/HNSCC/NichenetrData/lr_network.rds") 34 | lr_network = lr_network %>% mutate(bonafide = ! database %in% c("ppi_prediction","ppi_prediction_go")) 35 | lr_network = lr_network %>% dplyr::rename(ligand = from, receptor = to) %>% distinct(ligand, receptor, bonafide) 36 | 37 | organism = "human" # user adaptation required on own dataset 38 | ####nichnet 39 | #####1. Define the niches/microenvironments of interest 40 | #! Important: your receiver cell type should consist of 1 cluster! 41 | table(seurat_obj$celltype_aggregate) 42 | 43 | niches = list( 44 | "Stage_LN-out" = list( 45 | "sender" = "TumorCell_LN-out", 46 | "receiver" = "CD8 Tex_LN-out"), 47 | "Stage_LN-in" = list( 48 | "sender" = c("TumorCell_LN-in"), 49 | "receiver" = "CD8 Tex_LN-in") 50 | ) 51 | 52 | # user adaptation required on own dataset 53 | 54 | 55 | 56 | #####2. Calculate differential expression between the niches # 得到差异性受体配体矩阵,受体--配体 57 | assay_oi = "RNA" # other possibilities: RNA,... 58 | DE_sender = calculate_niche_de(seurat_obj = seurat_obj %>% subset(features = lr_network$ligand %>% unique()), niches = niches, type = "sender", assay_oi = assay_oi) # only ligands important for sender cell types 59 | DE_receiver = calculate_niche_de(seurat_obj = seurat_obj %>% subset(features = lr_network$receptor %>% unique()), niches = niches, type = "receiver", assay_oi = assay_oi) # only receptors now, later on: DE analysis to find targets 60 | DE_sender = DE_sender %>% mutate(avg_log2FC = ifelse(avg_log2FC == Inf, max(avg_log2FC[is.finite(avg_log2FC)]), ifelse(avg_log2FC == -Inf, min(avg_log2FC[is.finite(avg_log2FC)]), avg_log2FC))) 61 | DE_receiver = DE_receiver %>% mutate(avg_log2FC = ifelse(avg_log2FC == Inf, max(avg_log2FC[is.finite(avg_log2FC)]), ifelse(avg_log2FC == -Inf, min(avg_log2FC[is.finite(avg_log2FC)]), avg_log2FC))) 62 | expression_pct = 0.10 ###Process DE results and filter 63 | DE_sender_processed = process_niche_de(DE_table = DE_sender, niches = niches, expression_pct = expression_pct, type = "sender") 64 | DE_receiver_processed = process_niche_de(DE_table = DE_receiver, niches = niches, expression_pct = expression_pct, type = "receiver") 65 | specificity_score_LR_pairs = "min_lfc" ###Combine sender-receiver DE based on L-R pairs: 66 | DE_sender_receiver = combine_sender_receiver_de(DE_sender_processed, DE_receiver_processed, lr_network, specificity_score = specificity_score_LR_pairs) 67 | 68 | #####3. Optional: Calculate differential expression between the different spatial regions 69 | include_spatial_info_sender = FALSE # if not spatial info to include: put this to false # user adaptation required on own dataset 70 | include_spatial_info_receiver = FALSE # if spatial info to include: put this to true # user adaptation required on own dataset 71 | #spatial_info = tibble(celltype_region_oi = "CAF_High", celltype_other_region = "myofibroblast_High", niche = "pEMT_High_niche", celltype_type = "sender") # user adaptation required on own dataset 72 | #specificity_score_spatial = "lfc" 73 | # this is how this should be defined if you don't have spatial info 74 | # mock spatial info 75 | if(include_spatial_info_sender == FALSE & include_spatial_info_receiver == FALSE){ 76 | spatial_info = tibble(celltype_region_oi = NA, celltype_other_region = NA) %>% mutate(niche = niches %>% names() %>% head(1), celltype_type = "sender") 77 | } 78 | 79 | if(include_spatial_info_sender == TRUE){ 80 | sender_spatial_DE = calculate_spatial_DE(seurat_obj = seurat_obj %>% subset(features = lr_network$ligand %>% unique()), spatial_info = spatial_info %>% filter(celltype_type == "sender")) 81 | sender_spatial_DE_processed = process_spatial_de(DE_table = sender_spatial_DE, type = "sender", lr_network = lr_network, expression_pct = expression_pct, specificity_score = specificity_score_spatial) 82 | 83 | # add a neutral spatial score for sender celltypes in which the spatial is not known / not of importance 84 | sender_spatial_DE_others = get_non_spatial_de(niches = niches, spatial_info = spatial_info, type = "sender", lr_network = lr_network) 85 | sender_spatial_DE_processed = sender_spatial_DE_processed %>% bind_rows(sender_spatial_DE_others) 86 | 87 | sender_spatial_DE_processed = sender_spatial_DE_processed %>% mutate(scaled_ligand_score_spatial = scale_quantile_adapted(ligand_score_spatial)) 88 | 89 | } else { 90 | # # add a neutral spatial score for all sender celltypes (for none of them, spatial is relevant in this case) 91 | sender_spatial_DE_processed = get_non_spatial_de(niches = niches, spatial_info = spatial_info, type = "sender", lr_network = lr_network) 92 | sender_spatial_DE_processed = sender_spatial_DE_processed %>% mutate(scaled_ligand_score_spatial = scale_quantile_adapted(ligand_score_spatial)) 93 | 94 | } 95 | ## [1] "Calculate Spatial DE between: CAF_High and myofibroblast_High" 96 | 97 | if(include_spatial_info_receiver == TRUE){ 98 | receiver_spatial_DE = calculate_spatial_DE(seurat_obj = seurat_obj %>% subset(features = lr_network$receptor %>% unique()), spatial_info = spatial_info %>% filter(celltype_type == "receiver")) 99 | receiver_spatial_DE_processed = process_spatial_de(DE_table = receiver_spatial_DE, type = "receiver", lr_network = lr_network, expression_pct = expression_pct, specificity_score = specificity_score_spatial) 100 | 101 | # add a neutral spatial score for receiver celltypes in which the spatial is not known / not of importance 102 | receiver_spatial_DE_others = get_non_spatial_de(niches = niches, spatial_info = spatial_info, type = "receiver", lr_network = lr_network) 103 | receiver_spatial_DE_processed = receiver_spatial_DE_processed %>% bind_rows(receiver_spatial_DE_others) 104 | 105 | receiver_spatial_DE_processed = receiver_spatial_DE_processed %>% mutate(scaled_receptor_score_spatial = scale_quantile_adapted(receptor_score_spatial)) 106 | 107 | } else { 108 | # # add a neutral spatial score for all receiver celltypes (for none of them, spatial is relevant in this case) 109 | receiver_spatial_DE_processed = get_non_spatial_de(niches = niches, spatial_info = spatial_info, type = "receiver", lr_network = lr_network) 110 | receiver_spatial_DE_processed = receiver_spatial_DE_processed %>% mutate(scaled_receptor_score_spatial = scale_quantile_adapted(receptor_score_spatial)) 111 | } 112 | 113 | #####4. Calculate ligand activities and infer active ligand-target links 配体-- target 114 | lfc_cutoff = 0.15 # recommended for 10x as min_lfc cutoff=0.15. 115 | specificity_score_targets = "min_lfc" 116 | DE_receiver_targets = calculate_niche_de_targets(seurat_obj = seurat_obj, niches = niches, lfc_cutoff = lfc_cutoff, expression_pct = expression_pct, assay_oi = assay_oi) 117 | DE_receiver_processed_targets = process_receiver_target_de(DE_receiver_targets = DE_receiver_targets, niches = niches, expression_pct = expression_pct, specificity_score = specificity_score_targets) 118 | 119 | background = DE_receiver_processed_targets %>% pull(target) %>% unique() 120 | geneset_niche1 = DE_receiver_processed_targets %>% filter(receiver == niches[[1]]$receiver & target_score >= lfc_cutoff & target_significant == 1 & target_present == 1) %>% pull(target) %>% unique() 121 | geneset_niche2 = DE_receiver_processed_targets %>% filter(receiver == niches[[2]]$receiver & target_score >= lfc_cutoff & target_significant == 1 & target_present == 1) %>% pull(target) %>% unique() 122 | 123 | 124 | # Good idea to check which genes will be left out of the ligand activity analysis (=when not present in the rownames of the ligand-target matrix). 125 | # If many genes are left out, this might point to some issue in the gene naming (eg gene aliases and old gene symbols, bad human-mouse mapping) 126 | geneset_niche1 %>% setdiff(rownames(ligand_target_matrix)) 127 | geneset_niche2 %>% setdiff(rownames(ligand_target_matrix)) 128 | 129 | print(length(geneset_niche1))##We recommend having between 20 and 1000 genes in the geneset of interest 130 | print(length(geneset_niche2)) 131 | 132 | top_n_target = 3000 133 | niche_geneset_list = list( 134 | "Stage_LN-out" = list( 135 | "receiver" = niches[[1]]$receiver, 136 | "geneset" = geneset_niche1, 137 | "background" = background), 138 | "Stage_LN-in" = list( 139 | "receiver" = niches[[2]]$receiver, 140 | "geneset" = geneset_niche2 , 141 | "background" = background) 142 | ) 143 | ligand_activities_targets = get_ligand_activities_targets(niche_geneset_list = niche_geneset_list, ligand_target_matrix = ligand_target_matrix, top_n_target = top_n_target) 144 | 145 | #####5. Calculate (scaled) expression of ligands, receptors and targets across cell types of interest (log expression values and expression fractions) 146 | features_oi = union(lr_network$ligand, lr_network$receptor) %>% union(ligand_activities_targets$target) %>% setdiff(NA) 147 | dotplot = suppressWarnings(Seurat::DotPlot(seurat_obj %>% subset(idents = niches %>% unlist() %>% unique()), features = features_oi, assay = assay_oi)) 148 | exprs_tbl = dotplot$data %>% as_tibble() 149 | exprs_tbl = exprs_tbl %>% rename(celltype = id, gene = features.plot, expression = avg.exp, expression_scaled = avg.exp.scaled, fraction = pct.exp) %>% 150 | mutate(fraction = fraction/100) %>% as_tibble() %>% select(celltype, gene, expression, expression_scaled, fraction) %>% distinct() %>% arrange(gene) %>% mutate(gene = as.character(gene)) 151 | exprs_tbl_ligand = exprs_tbl %>% filter(gene %in% lr_network$ligand) %>% rename(sender = celltype, ligand = gene, ligand_expression = expression, ligand_expression_scaled = expression_scaled, ligand_fraction = fraction) 152 | exprs_tbl_receptor = exprs_tbl %>% filter(gene %in% lr_network$receptor) %>% rename(receiver = celltype, receptor = gene, receptor_expression = expression, receptor_expression_scaled = expression_scaled, receptor_fraction = fraction) 153 | exprs_tbl_target = exprs_tbl %>% filter(gene %in% ligand_activities_targets$target) %>% rename(receiver = celltype, target = gene, target_expression = expression, target_expression_scaled = expression_scaled, target_fraction = fraction) 154 | 155 | exprs_tbl_ligand = exprs_tbl_ligand %>% mutate(scaled_ligand_expression_scaled = scale_quantile_adapted(ligand_expression_scaled)) %>% mutate(ligand_fraction_adapted = ligand_fraction) %>% mutate_cond(ligand_fraction >= expression_pct, ligand_fraction_adapted = expression_pct) %>% mutate(scaled_ligand_fraction_adapted = scale_quantile_adapted(ligand_fraction_adapted)) 156 | exprs_tbl_receptor = exprs_tbl_receptor %>% mutate(scaled_receptor_expression_scaled = scale_quantile_adapted(receptor_expression_scaled)) %>% mutate(receptor_fraction_adapted = receptor_fraction) %>% mutate_cond(receptor_fraction >= expression_pct, receptor_fraction_adapted = expression_pct) %>% mutate(scaled_receptor_fraction_adapted = scale_quantile_adapted(receptor_fraction_adapted)) 157 | #####6. Expression fraction and receptor 158 | exprs_sender_receiver = lr_network %>% 159 | inner_join(exprs_tbl_ligand, by = c("ligand")) %>% 160 | inner_join(exprs_tbl_receptor, by = c("receptor")) %>% inner_join(DE_sender_receiver %>% distinct(niche, sender, receiver)) 161 | ligand_scaled_receptor_expression_fraction_df = exprs_sender_receiver %>% group_by(ligand, receiver) %>% mutate(rank_receptor_expression = dense_rank(receptor_expression), rank_receptor_fraction = dense_rank(receptor_fraction)) %>% mutate(ligand_scaled_receptor_expression_fraction = 0.5*( (rank_receptor_fraction / max(rank_receptor_fraction)) + ((rank_receptor_expression / max(rank_receptor_expression))) ) ) %>% distinct(ligand, receptor, receiver, ligand_scaled_receptor_expression_fraction, bonafide) %>% distinct() %>% ungroup() 162 | 163 | #####7. Prioritization of ligand-receptor and ligand-target links 164 | prioritizing_weights = c("scaled_ligand_score" = 5, 165 | "scaled_ligand_expression_scaled" = 1, 166 | "ligand_fraction" = 1, 167 | "scaled_ligand_score_spatial" = 2, 168 | "scaled_receptor_score" = 0.5, 169 | "scaled_receptor_expression_scaled" = 0.5, 170 | "receptor_fraction" = 1, 171 | "ligand_scaled_receptor_expression_fraction" = 1, 172 | "scaled_receptor_score_spatial" = 0, 173 | "scaled_activity" = 0, 174 | "scaled_activity_normalized" = 1, 175 | "bona_fide" = 1) 176 | output = list(DE_sender_receiver = DE_sender_receiver, ligand_scaled_receptor_expression_fraction_df = ligand_scaled_receptor_expression_fraction_df, sender_spatial_DE_processed = sender_spatial_DE_processed, receiver_spatial_DE_processed = receiver_spatial_DE_processed, 177 | ligand_activities_targets = ligand_activities_targets, DE_receiver_processed_targets = DE_receiver_processed_targets, exprs_tbl_ligand = exprs_tbl_ligand, exprs_tbl_receptor = exprs_tbl_receptor, exprs_tbl_target = exprs_tbl_target) 178 | prioritization_tables = get_prioritization_tables(output, prioritizing_weights) 179 | prioritization_tables$prioritization_tbl_ligand_receptor %>% filter(receiver == niches[[2]]$receiver) %>% head(10) 180 | prioritization_tables$prioritization_tbl_ligand_target %>% filter(receiver == niches[[2]]$receiver) %>% head(10) 181 | 182 | ####8. Visualization of the Differential NicheNet output 183 | top_ligand_niche_df = prioritization_tables$prioritization_tbl_ligand_receptor %>% as.data.frame() %>% dplyr::select(niche, sender, receiver, ligand, receptor, prioritization_score) %>% group_by(ligand) %>% top_n(1, prioritization_score) %>% ungroup() %>% dplyr::select(ligand, receptor, niche) %>% dplyr::rename(top_niche = niche) 184 | top_ligand_receptor_niche_df = prioritization_tables$prioritization_tbl_ligand_receptor %>% as.data.frame() %>% dplyr::select(niche, sender, receiver, ligand, receptor, prioritization_score) %>% group_by(ligand, receptor) %>% top_n(1, prioritization_score) %>% ungroup() %>% dplyr::select(ligand, receptor, niche) %>% dplyr::rename(top_niche = niche) 185 | 186 | ligand_prioritized_tbl_oi = prioritization_tables$prioritization_tbl_ligand_receptor%>% as.data.frame() %>% dplyr::select(niche, sender, receiver, ligand, prioritization_score) %>% group_by(ligand, niche) %>% top_n(1, prioritization_score) %>% ungroup() %>% distinct() %>% inner_join(top_ligand_niche_df) %>% filter(niche == top_niche) %>% group_by(niche) %>% top_n(20, prioritization_score) %>% ungroup() # get the top50 ligands per niche 187 | 188 | sender_oi="TumorCell_LN-out" 189 | receiver_oi = "CD8 Tex_LN-out" 190 | 191 | filtered_ligands = ligand_prioritized_tbl_oi %>% filter(receiver == receiver_oi) %>% filter(sender == sender_oi) %>% pull(ligand) %>% unique() 192 | prioritized_tbl_oi = prioritization_tables$prioritization_tbl_ligand_receptor %>% filter(ligand %in% filtered_ligands) %>% dplyr::select(niche, sender, receiver, ligand, receptor, ligand_receptor, prioritization_score) %>% distinct() %>% inner_join(top_ligand_receptor_niche_df) %>% group_by(ligand) %>% filter(receiver == receiver_oi) %>% top_n(2, prioritization_score) %>% ungroup() 193 | lfc_plot = make_ligand_receptor_lfc_plot(receiver_oi, prioritized_tbl_oi, prioritization_tables$prioritization_tbl_ligand_receptor, plot_legend = FALSE, heights = NULL, widths = NULL) 194 | 195 | outpath <- "/work/brj/Collaboration/2022/scRNA/HNSCC/Result/nichenetr/" 196 | targetcell <- "TumorCD8Texsplit" 197 | 198 | pdf(paste(outpath,targetcell,"/1.Top20ligand_plot.pdf",sep = ""),width = 10,height = 10) 199 | lfc_plot 200 | dev.off() 201 | 202 | #lfc_plot_spatial = make_ligand_receptor_lfc_spatial_plot(receiver_oi, prioritized_tbl_oi, prioritization_tables$prioritization_tbl_ligand_receptor, ligand_spatial = include_spatial_info_sender, receptor_spatial = include_spatial_info_receiver, plot_legend = FALSE, heights = NULL, widths = NULL) 203 | 204 | ##Ligand expression, activity and target genes 配体表达,活性和靶基因 205 | exprs_activity_target_plot = make_ligand_activity_target_exprs_plot(receiver_oi, prioritized_tbl_oi, prioritization_tables$prioritization_tbl_ligand_receptor, prioritization_tables$prioritization_tbl_ligand_target, output$exprs_tbl_ligand, output$exprs_tbl_target, lfc_cutoff, ligand_target_matrix, plot_legend = FALSE, heights = NULL, widths = NULL) 206 | 207 | pdf(paste(outpath,targetcell,"/2.LigandExpressionActivityTargetgenes_plotLegend.pdf",sep = ""),width = 40,height = 10) 208 | exprs_activity_target_plot$combined_plot 209 | dev.off() 210 | 211 | filtered_ligands = ligand_prioritized_tbl_oi %>% filter(receiver == receiver_oi) %>% filter(sender == sender_oi)%>% top_n(20, prioritization_score) %>% pull(ligand) %>% unique() 212 | prioritized_tbl_oi = prioritization_tables$prioritization_tbl_ligand_receptor %>% filter(ligand %in% filtered_ligands) %>% dplyr::select(niche, sender, receiver, ligand, receptor, ligand_receptor, prioritization_score) %>% distinct() %>% inner_join(top_ligand_receptor_niche_df) %>% group_by(ligand) %>% filter(receiver == receiver_oi) %>% top_n(2, prioritization_score) %>% ungroup() 213 | exprs_activity_target_plot = make_ligand_activity_target_exprs_plot(receiver_oi, prioritized_tbl_oi, prioritization_tables$prioritization_tbl_ligand_receptor, prioritization_tables$prioritization_tbl_ligand_target, output$exprs_tbl_ligand, output$exprs_tbl_target, lfc_cutoff, ligand_target_matrix, plot_legend = FALSE, heights = NULL, widths = NULL) 214 | 215 | pdf(paste(outpath,targetcell,"/3.LigandExpressionActivityTargetgenes_plotfiltered.pdf",sep = ""),width = 40,height = 10) 216 | exprs_activity_target_plot$combined_plot 217 | dev.off() 218 | 219 | 220 | ####### L-R pairs 圈图,展示不同阶段 221 | receiver_oi <- c("CD8 Tex_LN-in","CD8 Tex_LN-out") 222 | sender_oi <- c("TumorCell_LN-in","TumorCell_LN-out") 223 | 224 | filtered_ligands = prioritization_tables$prioritization_tbl_ligand_receptor %>% filter(receiver == receiver_oi) %>% filter(sender == sender_oi)%>% top_n(30, prioritization_score) %>% pull(ligand) %>% unique() 225 | prioritized_tbl_oi = prioritization_tables$prioritization_tbl_ligand_receptor %>% filter(ligand %in% filtered_ligands) %>% dplyr::select(niche, sender, receiver, ligand, receptor, ligand_receptor, prioritization_score) %>% distinct() %>% inner_join(top_ligand_receptor_niche_df) %>% group_by(ligand) %>% filter(receiver == receiver_oi) %>% top_n(2, prioritization_score) %>% ungroup() 226 | 227 | prioritized_tbl_oi$sender <- factor(prioritized_tbl_oi$sender,levels = sender_oi) 228 | prioritized_tbl_oi$receiver <- factor(prioritized_tbl_oi$receiver,levels = receiver_oi ) 229 | 230 | colors_sender = c("#E5C494","#B3B3B3") %>% magrittr::set_names(prioritized_tbl_oi$sender %>% unique() %>% sort()) 231 | colors_receiver = c("lavender","#636363") %>% magrittr::set_names(prioritized_tbl_oi$receiver %>% unique() %>% sort()) 232 | circos_output = make_circos_lr(prioritized_tbl_oi, colors_sender, colors_receiver) 233 | 234 | pdf(paste(outpath,targetcell,"/4.Circosplot.pdf",sep = ""),width = 7,height = 6) 235 | circos_output$p_circos 236 | dev.off() 237 | pdf(paste(outpath,targetcell,"/4.Circosplotlegend.pdf",sep = ""),width = 5,height = 5) 238 | circos_output$p_legend 239 | dev.off() 240 | 241 | openxlsx::write.xlsx(prioritized_tbl_oi,file=paste(OutPath,"/",folder,"/Fig5h.xlsx",sep="")) 242 | 243 | #######savedata 244 | saveRDS(output,paste(outpath,targetcell,"/tumor_CD8Tex_nichr.rds.gz",sep = "")) 245 | saveRDS(prioritization_tables,paste(outpath,targetcell,"/tumor_CD8Tex_nichr_prioritization_tables.rds.gz",sep = "")) 246 | 247 | 248 | output <- readRDS(paste(outpath,targetcell,"/tumor_CD8Tex_nichr.rds.gz",sep = "")) 249 | prioritization_tables <- readRDS(paste(outpath,targetcell,"/tumor_CD8Tex_nichr_prioritization_tables.rds.gz",sep = "")) 250 | -------------------------------------------------------------------------------- /8.Differences between stage A and stage R.R: -------------------------------------------------------------------------------- 1 | 2 | #.libPaths(c('~/miniconda3/envs/R4.1/lib/R/library',"/home/smy/R/x86_64-pc-linux-gnu-library/4.1","/usr/local/lib64/R/library")) 3 | library(rlang) 4 | library(infercnv) 5 | library(Seurat) 6 | library(future) 7 | setwd('/work/brj/Collaboration/2022/scRNA/HNSCC/inferCNV/') 8 | 9 | HNSCC_Whole <- readRDS('/work/smy/Project/HNSCC/2.data/DeinfeTypes/HNSCC_26sampleAll_DefineTypes.rds.gz') 10 | Idents(HNSCC_Whole) <- 'Maincluster' 11 | DimPlot(HNSCC_Whole) 12 | HNSCC_epi <- subset(HNSCC_Whole,subset = Maincluster == 'Epithelial cells' & stage %in% c("A","R","NT")) 13 | 14 | 15 | dfcount = as.data.frame(HNSCC_epi@assays$RNA@counts) 16 | 17 | 18 | groupinfo= data.frame(cellId = colnames(dfcount)) 19 | groupinfo$stage = HNSCC_epi$stage 20 | 21 | library(AnnoProbe) 22 | geneInfor=annoGene(rownames(dfcount),"SYMBOL",'human') 23 | geneInfor=geneInfor[with(geneInfor, order(chr, start)),c(1,4:6)] 24 | geneInfor=geneInfor[!duplicated(geneInfor[,1]),] 25 | 26 | 27 | dfcount =dfcount [rownames(dfcount ) %in% geneInfor[,1],] 28 | dfcount =dfcount [match( geneInfor[,1], rownames(dfcount) ),] 29 | 30 | 31 | expFile='expFile.txt' 32 | write.table(dfcount ,file = expFile,sep = '\t',quote = F) 33 | groupFiles='groupFiles.txt' 34 | head(groupinfo) 35 | write.table(groupinfo,file = groupFiles,sep = '\t',quote = F,col.names = F,row.names = F) 36 | head(geneInfor) 37 | geneFile='geneFile.txt' 38 | write.table(geneInfor,file = geneFile,sep = '\t',quote = F,col.names = F,row.names = F) 39 | 40 | 41 | infercnv_obj = CreateInfercnvObject(raw_counts_matrix='expFile.txt', 42 | annotations_file='groupFiles.txt', 43 | delim="\t", 44 | gene_order_file='geneFile.txt', 45 | ref_group_names = c("NT")) 46 | future::plan("multiprocess",workers=12) 47 | 48 | infercnv_obj = infercnv::run(infercnv_obj, 49 | cutoff=0.1, # use 1 for smart-seq, 0.1 for 10x-genomics 50 | out_dir="output_dir1", # 输出文件夹 51 | cluster_by_groups=T, # 聚类 52 | denoise=T, #去噪 53 | HMM=T,# 是否基于HMM预测CNV 54 | BayesMaxPNormal = 0, 55 | num_threads=10) 56 | 57 | readr::write_rds(infercnv_obj,'infercnv_obj',compress = 'gz') 58 | 59 | expr <- read.table("/work/brj/Collaboration/2022/scRNA/HNSCC/inferCNV/output_dir1/infercnv.observations.txt", header=T) %>% as.matrix() 60 | expr.scale <- scale(t(expr)) 61 | tmp1 <- sweep(expr.scale, 2, apply(expr.scale, 2, min),'-') 62 | tmp2 <- apply(expr.scale, 2, max) - apply(expr.scale,2,min) 63 | expr_1 <- t(2*sweep(tmp1, 2, tmp2, "/")-1) 64 | 65 | cnv_score <- as.data.frame(colSums(expr_1 * expr_1)) 66 | colnames(cnv_score)="cnv_score" 67 | cnv_score <- tibble::rownames_to_column(cnv_score, var='cell') 68 | cnv_score$cell <- gsub('[.]','-',cnv_score$cell) #不包含参考的样本在内 69 | 70 | HNSCC_Whole <- readRDS('/work/smy/Project/HNSCC/2.data/DeinfeTypes/HNSCC_26sampleAll_DefineTypes.rds.gz') 71 | Idents(HNSCC_Whole) <- 'Maincluster' 72 | #DimPlot(HNSCC_Whole) 73 | HNSCC_epi <- subset(HNSCC_Whole,subset = Maincluster == 'Epithelial cells' & stage %in% c("A","R")) # 不包括参考样本 74 | 75 | HNSCC_epi$cnv_score <- cnv_score[match(rownames(HNSCC_epi@meta.data),cnv_score$cell),2] 76 | HNSCC_epi@meta.data[grep('TRUE',is.na(HNSCC_epi$cnv_score)),] 77 | cell <- rownames(HNSCC_epi@meta.data[grep('FALSE',is.na(HNSCC_epi$cnv_score)),]) 78 | HNSCC_epiF <- subset(HNSCC_epi,cells = cell) 79 | 80 | 81 | #value <- as.numeric(HNSCC_epiF$cnv_score) 82 | #thresthold <- median(value) - 1 * sd(value) # 这里是一个标准差 83 | 84 | #meta %>% mutate(condition=if_else(.$cnv_score <= thresthold,'non-malignant','malignant')) -> TumorEpi@meta.data # 分出了恶性和非恶性 85 | 86 | #HNSCC_epiF$cnv_group <- ifelse(HNSCC_epiF$cnv_score <=thresthold,'non-malignant','malignant') 87 | #Idents(HNSCC_epiF) <- 'cnv_group' 88 | #table(HNSCC_epiF$cnv_group) 89 | 90 | #HNSCC_epiMalignant <- subset(HNSCC_epiF,idents = 'malignant') 91 | #table(HNSCC_epiMalignant$stage) 92 | 93 | # 94 | #Idents(HNSCC_epi) <- HNSCC_epi$stage 95 | #HNSCC_epi_non_Malignant <- subset(HNSCC_epi,idents = c('NT','LN-normal')) 96 | #HNSCC_epi_non_Malignant$cnv_group <- 'non-malignant' 97 | 98 | #HNSCC_epi_inferCNV <- merge(HNSCC_epiMalignant,HNSCC_epi_non_Malignant) 99 | #readr::write_rds(HNSCC_epi_inferCNV,'/work/smy/Project/HNSCC_26sample/2.data/HNSCC_epi_inferCNV.rds') 100 | 101 | #violin plot ======================================================= 102 | sobjlists = FetchData(object = HNSCC_epiF, vars = c("stage","cnv_score")) 103 | openxlsx::write.xlsx(sobjlists,file=paste(OutPath,"/",folder,"/Fig6A.xlsx",sep="")) 104 | 105 | my_comparisons <- list(c("A","R")) 106 | # A: "#E78AC3" R:"#E39A35" 107 | p1 <- ggplot(sobjlists,aes(x= stage, y = cnv_score)) + 108 | geom_violin(aes(fill=stage)) + #scale_color_manual(values = c("#E78AC3","#E39A35")) + 109 | scale_fill_manual(limits=c("A","R"), 110 | values= c("#E78AC3","#58A4C3"),name="")+ 111 | stat_summary(fun.y = median, geom = "point", shape = 23, size=4)+#可用于将中位数点添加到箱线图中 112 | labs(y= "cnv_score", x = "stage") + 113 | theme(panel.background = element_blank(), axis.line = element_line(colour = "black"),plot.title = element_text(hjust = 0.5))+ 114 | ggpubr::stat_compare_means(comparisons = my_comparisons, method = "wilcox.test",na.rm = T) 115 | 116 | pdf("/work/brj/Collaboration/2022/scRNA/HNSCC/inferCNV/CNV_stageRAviolin.pdf",width = 5,height = 5) 117 | p1 118 | dev.off() 119 | 120 | expr <- read.table("/work/brj/Collaboration/2022/scRNA/HNSCC/inferCNV//output_dir1/infercnv.observations.txt", header=T) %>% as.matrix() 121 | 122 | Candigenes <- c("CCND1","CDKN2A","EGFR","MYC","VEGFA","TGFB1") 123 | Expr <- expr %>% as.data.frame() %>% dplyr::filter(rownames(.) %in% Candigenes) %>% 124 | t() %>% as.data.frame() %>% tibble::rownames_to_column("cell") 125 | Expr$cell <- gsub('[.]','-',Expr$cell) 126 | 127 | ### 128 | HNSCC_Whole <- readRDS('/work/smy/Project/HNSCC/2.data/DeinfeTypes/HNSCC_26sampleAll_DefineTypes.rds.gz') 129 | HNSCC_epi <- subset(HNSCC_Whole,subset = Maincluster == 'Epithelial cells' & stage %in% c("A","R")) 130 | HNSCC_epi$CCND1_cnv <- Expr[match(rownames(HNSCC_epi@meta.data),Expr$cell),"CCND1"] 131 | HNSCC_epi$CDKN2A_cnv <- Expr[match(rownames(HNSCC_epi@meta.data),Expr$cell),"CDKN2A"] 132 | HNSCC_epi$EGFR_cnv <- Expr[match(rownames(HNSCC_epi@meta.data),Expr$cell),"EGFR"] 133 | HNSCC_epi$MYC_cnv <- Expr[match(rownames(HNSCC_epi@meta.data),Expr$cell),"MYC"] 134 | HNSCC_epi$VEGFA_cnv <- Expr[match(rownames(HNSCC_epi@meta.data),Expr$cell),"VEGFA"] 135 | HNSCC_epi$TGFB1_cnv <- Expr[match(rownames(HNSCC_epi@meta.data),Expr$cell),"TGFB1"] 136 | 137 | 138 | sobjlists = FetchData(object = HNSCC_epi, vars = c("stage","CCND1_cnv","CDKN2A_cnv","EGFR_cnv","MYC_cnv","VEGFA_cnv","TGFB1_cnv")) 139 | my_comparisons <- list(c("A","R")) 140 | # A: "#E78AC3" R:"#E39A35" 141 | pp <- lapply(c(2:7), function(sub){ 142 | p1 <- ggplot(sobjlists,aes(x= stage, y = sobjlists[,sub])) + 143 | geom_violin(aes(fill=stage)) + 144 | scale_fill_manual(limits=c("A","R"), 145 | values= c("#E78AC3","#58A4C3"),name="")+ 146 | stat_summary(fun.y = mean, geom = "point", shape = 23, size=4)+#可用于将中位数点添加到箱线图中 147 | labs(y= as.character(names(sobjlists)[sub]), x = "stage") + 148 | theme(panel.background = element_blank(), axis.line = element_line(colour = "black"),plot.title = element_text(hjust = 0.5))+ 149 | ggpubr::stat_compare_means(comparisons = my_comparisons, method = "wilcox.test",na.rm = T) # Add pairwise comparisons p-value 150 | return(p1) 151 | }) 152 | 153 | pdf("/work/brj/Collaboration/2022/scRNA/HNSCC/inferCNV/CNV_6Genes_Vln.pdf",width = 15,height = 10) 154 | cowplot::plot_grid(plotlist = pp,ncol = 3) 155 | dev.off() 156 | 157 | expr <- read.table("/work/brj/Collaboration/2022/scRNA/HNSCC/inferCNV//output_dir1/infercnv.observations.txt", header=T) %>% as.matrix() 158 | ### 所有的值-1后取绝对值 159 | Candigenes <- c("CCND1","CDKN2A","EGFR","MYC","VEGFA","TGFB1") 160 | ### 减1 161 | expr.impute <- sweep(expr, 2, 1,'-') 162 | ### abs 163 | expr.abs <- apply(expr.impute, 2, abs) 164 | 165 | Expr <- expr.abs %>% as.data.frame() %>% dplyr::filter(rownames(.) %in% Candigenes) %>% 166 | t() %>% as.data.frame() %>% tibble::rownames_to_column("cell") 167 | Expr$cell <- gsub('[.]','-',Expr$cell) 168 | 169 | ### 170 | HNSCC_Whole <- readRDS('/work/smy/Project/HNSCC/2.data/DeinfeTypes/HNSCC_26sampleAll_DefineTypes.rds.gz') 171 | HNSCC_epi <- subset(HNSCC_Whole,subset = Maincluster == 'Epithelial cells' & stage %in% c("A","R")) 172 | HNSCC_epi$CCND1_cnv <- Expr[match(rownames(HNSCC_epi@meta.data),Expr$cell),"CCND1"] 173 | HNSCC_epi$CDKN2A_cnv <- Expr[match(rownames(HNSCC_epi@meta.data),Expr$cell),"CDKN2A"] 174 | HNSCC_epi$EGFR_cnv <- Expr[match(rownames(HNSCC_epi@meta.data),Expr$cell),"EGFR"] 175 | HNSCC_epi$MYC_cnv <- Expr[match(rownames(HNSCC_epi@meta.data),Expr$cell),"MYC"] 176 | HNSCC_epi$VEGFA_cnv <- Expr[match(rownames(HNSCC_epi@meta.data),Expr$cell),"VEGFA"] 177 | HNSCC_epi$TGFB1_cnv <- Expr[match(rownames(HNSCC_epi@meta.data),Expr$cell),"TGFB1"] 178 | 179 | 180 | sobjlists = FetchData(object = HNSCC_epi, vars = c("stage","CCND1_cnv","CDKN2A_cnv","EGFR_cnv","MYC_cnv","VEGFA_cnv","TGFB1_cnv")) 181 | my_comparisons <- list(c("A","R")) 182 | # A: "#E78AC3" R:"#E39A35" 183 | pp <- lapply(c(2:7), function(sub){ 184 | p1 <- ggplot(sobjlists,aes(x= stage, y = sobjlists[,sub])) + 185 | geom_violin(aes(fill=stage)) + 186 | scale_fill_manual(limits=c("A","R"), 187 | values= c("#E78AC3","#58A4C3"),name="")+ 188 | stat_summary(fun.y = mean, geom = "point", shape = 23, size=4)+#可用于将中位数点添加到箱线图中 189 | labs(y= as.character(names(sobjlists)[sub]), x = "stage") + 190 | theme(panel.background = element_blank(), axis.line = element_line(colour = "black"),plot.title = element_text(hjust = 0.5))+ 191 | ggpubr::stat_compare_means(comparisons = my_comparisons, method = "wilcox.test",na.rm = T) # Add pairwise comparisons p-value 192 | return(p1) 193 | }) 194 | 195 | 196 | 197 | 198 | pdf("/work/brj/Collaboration/2022/scRNA/HNSCC/inferCNV/CNV_6Genes_scale_Vln.pdf",width = 15,height = 10) 199 | cowplot::plot_grid(plotlist = pp,ncol = 3) 200 | dev.off() 201 | 202 | 203 | 204 | expr <- read.table("/work/brj/Collaboration/2022/scRNA/HNSCC/inferCNV//output_dir1/infercnv.observations.txt", header=T) %>% as.matrix() 205 | ### 归一化到1 206 | Candigenes <- c("CCND1","CDKN2A","EGFR","MYC","VEGFA","TGFB1") 207 | expr.scale <- scale(t(expr)) 208 | tmp1 <- sweep(expr.scale, 2, apply(expr.scale, 2, min),'-') 209 | tmp2 <- apply(expr.scale, 2, max) - apply(expr.scale,2,min) 210 | expr_1 <- t(2*sweep(tmp1, 2, tmp2, "/")-1) 211 | 212 | Expr <- expr_1 %>% as.data.frame() %>% dplyr::filter(rownames(.) %in% Candigenes) %>% 213 | t() %>% as.data.frame() %>% tibble::rownames_to_column("cell") 214 | Expr$cell <- gsub('[.]','-',Expr$cell) #不包含参考的样本在内 215 | 216 | ### 217 | HNSCC_Whole <- readRDS('/work/smy/Project/HNSCC/2.data/DeinfeTypes/HNSCC_26sampleAll_DefineTypes.rds.gz') 218 | HNSCC_epi <- subset(HNSCC_Whole,subset = Maincluster == 'Epithelial cells' & stage %in% c("A","R")) 219 | HNSCC_epi$CCND1_cnv <- Expr[match(rownames(HNSCC_epi@meta.data),Expr$cell),"CCND1"] 220 | HNSCC_epi$CDKN2A_cnv <- Expr[match(rownames(HNSCC_epi@meta.data),Expr$cell),"CDKN2A"] 221 | HNSCC_epi$EGFR_cnv <- Expr[match(rownames(HNSCC_epi@meta.data),Expr$cell),"EGFR"] 222 | HNSCC_epi$MYC_cnv <- Expr[match(rownames(HNSCC_epi@meta.data),Expr$cell),"MYC"] 223 | HNSCC_epi$VEGFA_cnv <- Expr[match(rownames(HNSCC_epi@meta.data),Expr$cell),"VEGFA"] 224 | HNSCC_epi$TGFB1_cnv <- Expr[match(rownames(HNSCC_epi@meta.data),Expr$cell),"TGFB1"] 225 | 226 | 227 | sobjlists = FetchData(object = HNSCC_epi, vars = c("stage","CCND1_cnv","CDKN2A_cnv","EGFR_cnv","MYC_cnv","VEGFA_cnv","TGFB1_cnv")) 228 | my_comparisons <- list(c("A","R")) 229 | # A: "#E78AC3" R:"#E39A35" 230 | pp <- lapply(c(2:7), function(sub){ 231 | p1 <- ggplot(sobjlists,aes(x= stage, y = sobjlists[,sub])) + 232 | geom_violin(aes(fill=stage)) + 233 | scale_fill_manual(limits=c("A","R"), 234 | values= c("#E78AC3","#58A4C3"),name="")+ 235 | stat_summary(fun.y = mean, geom = "point", shape = 23, size=4)+#可用于将中位数点添加到箱线图中 236 | labs(y= as.character(names(sobjlists)[sub]), x = "stage") + 237 | theme(panel.background = element_blank(), axis.line = element_line(colour = "black"),plot.title = element_text(hjust = 0.5))+ 238 | ggpubr::stat_compare_means(comparisons = my_comparisons, method = "wilcox.test",na.rm = T) # Add pairwise comparisons p-value 239 | return(p1) 240 | }) 241 | 242 | pdf("/work/brj/Collaboration/2022/scRNA/HNSCC/inferCNV/CNV_6Genes_scale1_Vln.pdf",width = 15,height = 10) 243 | cowplot::plot_grid(plotlist = pp,ncol = 3) 244 | dev.off() 245 | -------------------------------------------------------------------------------- /9.Monocle.R: -------------------------------------------------------------------------------- 1 | library(monocle) 2 | 3 | HSMM_HNSCC_epi_tumor <- readRDS('/work/smy/Project/HNSCC_26sample/2.data/26sample_epi_tumor_monocleFigure.rds') 4 | plot_cell_trajectory(HSMM_HNSCC_epi_tumor, color_by = "stage") 5 | pdf('/work/brj/Collaboration/2022/scRNA/HNSCC/Result/Monocle/Monocle_Stage.pdf',4,4) 6 | plot_cell_trajectory(HSMM_HNSCC_epi_tumor, 7 | theta = -10, 8 | show_branch_points = F, 9 | show_tree = TRUE, color_by = "stage", cell_size = 0.8) + 10 | scale_color_manual(values = .cluster_cols)+ NoLegend() 11 | #theme(legend.position = "bottom") 12 | dev.off() 13 | 14 | pdf('/work/brj/Collaboration/2022/scRNA/HNSCC/Result/Monocle/Monocle_Split.pdf',32,4) 15 | plot_cell_trajectory(HSMM_HNSCC_epi_tumor, 16 | theta = -10, 17 | show_branch_points = F, 18 | show_tree = TRUE, color_by = "stage", cell_size = 0.8) + 19 | scale_color_manual(values = .cluster_cols)+ 20 | NoLegend() + facet_wrap('~stage',nrow = 1) 21 | dev.off() 22 | 23 | sig_gene_names1 <- c("ELF3","GRHL1","MXD1","EHF","FOXA1","FOXM1","MYBL2","YBX1","TFDP1","MAZ", 24 | "TP63","IRF6","KDM5B","BHLHE40","TFAP2A","ID1","PITX1","NR4A1","CREB3L1","SPDEF","XBP1","FOSB") 25 | 26 | sig_gene_names2 <- c("CEBPD","BCL3","GRHL1","MXD1","RORC","MYBL2","POLE3","BRCA1","TFDP1", 27 | "STAT1","TP63","KLF7","STAT1","HIF1A","MYC","GRHL3","RARG", 28 | "SOX15","GRHL1","MAF","BATF","RUNX3","FLI1","IRF4","FOXP1") 29 | 30 | ## 使用BEAM函数进行分支表达建模分析 BEAM:branched expression analysis modeling 31 | BEAM_res <- BEAM(HSMM_HNSCC_epi_tumor, branch_point = 1, cores = 1) 32 | BEAM_res <- BEAM_res[order(BEAM_res$qval),] 33 | BEAM_res <- BEAM_res[,c("gene_short_name", "pval", "qval")] 34 | 35 | readr::write_rds(BEAM_res,"/work/brj/Collaboration/2022/scRNA/HNSCC/Result/Monocle/BEAM_res.rds.gz") 36 | 37 | BEAM_res <- readr::read_rds("/work/brj/Collaboration/2022/scRNA/HNSCC/Result/Monocle/BEAM_res.rds.gz") 38 | pdf('/work/brj/Collaboration/2022/scRNA/HNSCC/Result/Monocle/branched_heatmap.pdf',width = 4.5, height = 15) 39 | plot_genes_branched_heatmap(HSMM_HNSCC_epi_tumor[row.names(subset(BEAM_res,qval < 1e-4)),], 40 | cluster_rows = F, 41 | branch_point = 1, 42 | num_clusters = 4, 43 | cores = 1, 44 | use_gene_short_name = T, 45 | show_rownames = T) 46 | dev.off() 47 | 48 | pdf('/work/brj/Collaboration/2022/scRNA/HNSCC/Result/Monocle/branched_heatmapsig1.pdf',width = 4.5, height = 15) 49 | plot_genes_branched_heatmap(HSMM_HNSCC_epi_tumor[row.names(subset(BEAM_res,gene_short_name %in% sig_gene_names1)),], 50 | branch_point = 1, 51 | cores = 1, 52 | use_gene_short_name = T, 53 | cluster_rows = F, 54 | show_rownames = T) 55 | dev.off() 56 | 57 | pdf('/work/brj/Collaboration/2022/scRNA/HNSCC/Result/Monocle/branched_heatmapsig2.pdf',width = 4.5, height = 15) 58 | plot_genes_branched_heatmap(HSMM_HNSCC_epi_tumor[row.names(subset(BEAM_res,gene_short_name %in% unique(sig_gene_names2))),], 59 | cluster_rows = FALSE, 60 | #branch_point = 1, 61 | #branch_states = c(2,3), 62 | #num_clusters = 1, 63 | cores = 1, 64 | use_gene_short_name = T, 65 | show_rownames = T) 66 | dev.off() 67 | 68 | #https://alexthiery.github.io/otic-reprogramming/downstream/smartseq2_downstream/ 69 | pdf('/work/brj/Collaboration/2022/scRNA/HNSCC/Result/Monocle/branched_heatmapsig1clusters1.pdf',width = 4, height = 4) 70 | beam_hm = plot_genes_branched_heatmap(HSMM_HNSCC_epi_tumor[sig_gene_names1,], 71 | branch_point = 1, 72 | cluster_rows=FALSE, 73 | num_clusters = 1, 74 | cores = 1, 75 | use_gene_short_name = T, 76 | show_rownames = T, 77 | return_heatmap=T, 78 | branch_colors=c("#66C2A5","#FC8D62","#8DA0CB"), 79 | branch_labels=c("S2",'S3')) 80 | dev.off() 81 | pdf('/work/brj/Collaboration/2022/scRNA/HNSCC/Result/Monocle/branched_heatmapsig2clusters1.pdf',width = 4, height = 4) 82 | beam_hm = plot_genes_branched_heatmap(HSMM_HNSCC_epi_tumor[unique(sig_gene_names2),], 83 | branch_point = 1, 84 | cluster_rows=FALSE, 85 | num_clusters = 1, 86 | cores = 1, 87 | use_gene_short_name = T, 88 | show_rownames = T, 89 | return_heatmap=T, 90 | branch_colors=c("#66C2A5","#FC8D62","#8DA0CB"), 91 | branch_labels=c("S2",'S3')) 92 | dev.off() 93 | 94 | 95 | pdf('/work/brj/Collaboration/2022/scRNA/HNSCC/Result/Monocle/branched_heatmapsig1cluster3.pdf',width = 4, height = 4) 96 | beam_hm = plot_genes_branched_heatmap(HSMM_HNSCC_epi_tumor[sig_gene_names1,], 97 | branch_point = 1, 98 | cluster_rows=FALSE, 99 | num_clusters = 3, 100 | cores = 1, 101 | use_gene_short_name = T, 102 | show_rownames = T, 103 | return_heatmap=T, 104 | branch_colors=c("#66C2A5","#FC8D62","#8DA0CB"), 105 | branch_labels=c("S2",'S3')) 106 | dev.off() 107 | pdf('/work/brj/Collaboration/2022/scRNA/HNSCC/Result/Monocle/branched_heatmapsig2cluster3.pdf',width = 4, height = 4) 108 | beam_hm = plot_genes_branched_heatmap(HSMM_HNSCC_epi_tumor[unique(sig_gene_names2),], 109 | branch_point = 1, 110 | cluster_rows=FALSE, 111 | num_clusters = 3, 112 | cores = 1, 113 | use_gene_short_name = T, 114 | show_rownames = T, 115 | return_heatmap=T, 116 | branch_colors=c("#66C2A5","#FC8D62","#8DA0CB"), 117 | branch_labels=c("S2",'S3')) 118 | dev.off() 119 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HNSCC_scRNA 2 | Single-cell transcriptomic profiling of human HNSCC at different stages, including assessing the robustness of maintypes and subtypes cell types classification, the function enrichment of cell types, interaction between two cell types and cell-cell communications in celltypes HNSCC scRNA-seq dataset, and delineating the distinct features of malignant epithelial cells in primary and recurrent tumors. 3 | --------------------------------------------------------------------------------