├── resource └── AllBP.gene.rds ├── 01.TBL1XR1.bulkRNAseq.preprocessing.sh ├── 02.ATAC_chipseq_preprocessing.sh ├── 00.run_RCTD.R ├── 01.TBL1XR1.bulkRNAseq.DESeq.r ├── 00.sample_layer_cor.R ├── 03.scRNAseq_clustering_scanpy.py ├── 00.Find_zonation_pathway_phyper.test.R ├── README.md ├── 00.Ligand_receptor_interaction_zonation_analysis.R ├── 00.run_hostspot.py ├── 00.run_scenic.py ├── 00.Stereo_seq_Matrix2SeuratObject-pipeline.R ├── 02.ATAC_chipseq_Motif_scan.r └── 00.cut_zonation_layer_and_pathway_module_score.R /resource/AllBP.gene.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoshijie13/LISTA/HEAD/resource/AllBP.gene.rds -------------------------------------------------------------------------------- /01.TBL1XR1.bulkRNAseq.preprocessing.sh: -------------------------------------------------------------------------------- 1 | rsem-prepare-reference \ 2 | -p 3 \ 3 | --gtf $gtf \ 4 | $genome \ 5 | $out 6 | 7 | STAR --genomeDir $genome \ 8 | --readFilesIn $r1 $r2 \ 9 | --outFileNamePrefix ${outdir}/$name \ 10 | --outSAMtype BAM SortedByCoordinate \ 11 | --runThreadN 5 --limitOutSJcollapsed 5000000 \ 12 | --quantMode TranscriptomeSAM GeneCounts 13 | 14 | rsem-calculate-expression --paired-end --no-bam-output --alignments -p 5 \ 15 | -q $bam \ 16 | $genome ${outdir}/${name} 17 | -------------------------------------------------------------------------------- /02.ATAC_chipseq_preprocessing.sh: -------------------------------------------------------------------------------- 1 | 2 | # Align to genome 3 | align_pe() { genome=/mnt/3/ywlai_genome/genome/mm10/mm10_bowtie2_index/mm10; r1=$1; r2=`echo $r1 | sed 's/_1./_2./g'` 4 | out=`echo $r1 | sed 's/.fastq//g'`; bowtie2 --very-sensitive -p 2 --no-unal -x $genome -1 $r1 -2 $r2 -S $out".bowtie2.sam" 5 | } 6 | export -f align_pe 7 | 8 | 9 | # filter, sorting and generating CPM normalized bigwig files 10 | samtools view -q 30 -bS -F 0x04 $f | samtools sort -@ 2 > $g 11 | bamCoverage -p 20 --bam f -o file.bw --binSize 10 --normalizeUsing CPM 12 | 13 | # peak calling for ATAC-seq 14 | macs2 callpeak -B --nomodel --keep-dup 1 -g mm --call-summits -t t -f BAM --outdir out -n name -q 0.01 15 | 16 | # peak calling for ChIP-seq 17 | macs2 callpeak --nomodel -B --keep-dup 1 -g mm --call-summits -t t -c c -f BAM \ 18 | --outdir outdir -n name -q 0.01 19 | 20 | # Differential peaks comparing H3K27ac ChIP-seq at 40 hours post-PHx and time 0 21 | macs2 bdgdiff --t1 $t1 --t2 $t2 --c1 $c1 --c2 $c2 --outdir outdir --o-prefix prefix 22 | -------------------------------------------------------------------------------- /00.run_RCTD.R: -------------------------------------------------------------------------------- 1 | args<-commandArgs(T) 2 | library(data.table) 3 | load(args[1]) # Load the seurat object of your sptaial data which named by SeuObj 4 | sc=readRDS("GSE192742.CD45.rds") # Load the annotated single cell data 5 | #sc=subset(sc,subset=annotation_lyw!="Erythrocyte") 6 | library(Seurat) 7 | #exp_spatial=FetchData(SeuObj,vars=rownames(SeuObj@assays$RNA@counts),slot="counts") 8 | exp_spatial=as.matrix(SeuObj@assays$RNA@counts) 9 | exp_spatial=as.data.frame(exp_spatial) 10 | coord_spatial=SeuObj@meta.data[,c("coor_x","coor_y")] # the coordinations of each spot were stored in variables "coor_x" and "coor_y" 11 | nUMI_spatial=SeuObj@meta.data[,"nCount_RNA"] 12 | names(nUMI_spatial)=rownames(SeuObj@meta.data) 13 | 14 | #exp_sc=FetchData(sc,vars=rownames(sc@assays$RNA@counts),slot="counts") 15 | exp_sc=as.matrix(sc@assays$RNA@counts) 16 | exp_sc=as.data.frame(exp_sc) 17 | celltype_sc=sc$annotation # Specify your single cell annotation 18 | celltype_sc=as.factor(celltype_sc) 19 | nUMI_sc=sc@meta.data[,"nCount_RNA"] 20 | names(nUMI_sc)=rownames(sc@meta.data) 21 | 22 | library(RCTD) 23 | # Create RCTD object 24 | reference <- Reference(exp_sc, celltype_sc, nUMI_sc) 25 | puck <- SpatialRNA(coord_spatial, exp_spatial, nUMI_spatial) 26 | # Clean the environment 27 | rm(sc) 28 | rm(SeuObj) 29 | gc() 30 | # Run RCTD analysis 31 | myRCTD <- create.RCTD(puck, reference, max_cores = 1) 32 | myRCTD <- run.RCTD(myRCTD, doublet_mode = 'multi') 33 | 34 | save(myRCTD,file="myRCTD_20220216.Rdata") 35 | -------------------------------------------------------------------------------- /01.TBL1XR1.bulkRNAseq.DESeq.r: -------------------------------------------------------------------------------- 1 | library(DESeq2) 2 | library(ggplot2) 3 | library(pheatmap) 4 | library(data.table) 5 | 6 | 7 | count_final <- read.delim("TBLXR1_count.txt") 8 | head(count_final) 9 | 10 | samplenames <- colnames(count_final) 11 | group <- c("NC", "NC","NC","NC","KD","KD","KD","KD") 12 | count_final <- as.matrix(count_final) 13 | table.all <- data.frame(name = samplenames, 14 | condition=group) 15 | dds.all <- DESeqDataSetFromMatrix(floor(count_final), colData=table.all, design= ~ condition) 16 | dds.all <- dds.all[ rowSums(counts(dds.all)) > 1, ] 17 | deg = results(dds.all, contrast = c("condition","KD","NC")) 18 | deg = deg[deg$pvalue < 0.05,] 19 | deg_up = deg[deg$log2FoldChange > log(1.5, 2),] 20 | deg_down = deg[deg$log2FoldChange < -log(1.5, 2),] 21 | write.csv(deg_up, 'KD_tbl1xr1.up_FC1.5.csv', row.names = F) 22 | write.csv(deg_down, 'KD_tbl1xr1.down_FC1.5.csv', row.names = F) 23 | 24 | deg$change = 'No' 25 | deg$change[match(deg_up$SYMBOL, deg$SYMBOL)] = 'Up' 26 | deg$change[match(deg_down$SYMBOL, deg$SYMBOL)] = 'Down' 27 | 28 | deg_down_labeling = unique(deg_down$SYMBOL) 29 | gene = c('Ccnd1','Tbl1xr1','Axin2','Acadm','Crot','Lgr5') 30 | deg_down_labeling_sel = deg_down_labeling[match(gene, deg_down_labeling$SYMBOL),] 31 | 32 | p <- ggplot(data=deg, aes(x=log2FoldChange, y=log10pvalue, col = change, label = labeling)) + geom_point()+ theme_minimal()+ 33 | scale_color_manual(values=c("blue", "black", "red"))+ geom_text( color="black") 34 | 35 | ggsave('volcano_plot.pdf',p) 36 | -------------------------------------------------------------------------------- /00.sample_layer_cor.R: -------------------------------------------------------------------------------- 1 | library(Seurat) 2 | library(dplyr) 3 | SeuObj<-readRDS("add_Time_sample_change_marker_rank_factor.rds") # load rds file including all sections 4 | grp=cut(1:nrow(SeuObj@assays$RNA@data),breaks = 100,labels = 1:100) 5 | x=split(rownames(SeuObj@assays$RNA@data),f = grp) 6 | #result=lapply(x,function(x){exp=FetchData(SeuObj,vars = x);aggregate(exp,by=list("layer"=paste0(SeuObj$Time,"_",SeuObj$rank)),mean)}) 7 | #test<-bind_cols(result, .name_repair="unique") 8 | #saveRDS(test,file="layer_exp.rds") 9 | 10 | result=lapply(x,function(x){exp=FetchData(SeuObj,vars = x);aggregate(exp,by=list("layer"=paste0(SeuObj$sample,"_",SeuObj$rank)),mean)}) 11 | test<-bind_cols(result, .name_repair="unique") 12 | saveRDS(test,file="sample_layer_exp.rds") 13 | 14 | #result=lapply(x,function(x){exp=FetchData(SeuObj,vars = x);aggregate(exp,by=list("layer"=SeuObj$sample),mean)}) 15 | #test<-bind_cols(result, .name_repair="unique") 16 | #saveRDS(test,file="sample_exp.rds") 17 | 18 | #df<-readRDS("sample_layer_exp.rds") 19 | rownames(df)=df[,1] 20 | df=df[,-1] 21 | df=t(df) 22 | res=apply(df,2,as.numeric) 23 | res=as.data.frame(res) 24 | rownames(res)=rownames(df) 25 | res=res[complete.cases(res),] 26 | cor_res=cor(res) 27 | pdf("sample_layer_cor.pdf",width=30,height=30) 28 | pheatmap::pheatmap(cor_res[order(sub("_.*_","_",sub("[^_]*_","",rownames(cor_res)))),][,order(sub("_.*_","_",sub("[^_]*_","",colnames(cor_res))))][c(91:135,37:90,136:306),c(91:135,37:90,136:306)],cluster_rows=F,cluster_cols=F) 29 | dev.off() 30 | write.table(cor_res[order(sub("_.*_","_",sub("[^_]*_","",rownames(cor_res)))),][,order(sub("_.*_","_",sub("[^_]*_","",colnames(cor_res))))][c(91:135,37:90,136:306),c(91:135,37:90,136:306)],file="sample_layer_cor.xls",row.names=T,col.names=T,sep="\t") 31 | -------------------------------------------------------------------------------- /03.scRNAseq_clustering_scanpy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | 5 | import scanpy as sc 6 | import os 7 | import pandas as pd 8 | import numpy as np 9 | import matplotlib 10 | import matplotlib.pyplot as plt 11 | import scanpy.external as sce 12 | 13 | 14 | 15 | adata = sc.read('./01.qc/PHx.combine.h5ad') # Input adata file after quality control. 16 | 17 | adata.raw.var.index = pd.Index(adata.var['features']) 18 | adata.var.index = pd.Index(adata.var['features']) 19 | adata.obs['Library'] = adata.obs['split'].tolist() 20 | adata.var['mt'] = adata.var_names.str.startswith('mt-') 21 | sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, use_raw=True, log1p=False, inplace=True) 22 | 23 | sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts','pct_counts_mt'], 24 | jitter=0.4, multi_panel=True) 25 | 26 | bdata = adata[(adata.obs.n_genes_by_counts < 4000) &( adata.obs.n_genes_by_counts > 500 )& (adata.obs.total_counts < 20000) & (adata.obs.total_counts > 1000) & (adata.obs.pct_counts_mt < 10), :] 27 | sc.pl.violin(bdata, ['n_genes_by_counts', 'total_counts','pct_counts_mt'], 28 | jitter=0.4, multi_panel=True) 29 | 30 | sc.pp.highly_variable_genes(bdata, min_mean=0.0125, max_mean=3, min_disp=0.5) # Find highly variable genes. 31 | bdata = bdata[:, bdata.var.highly_variable] 32 | sc.pp.regress_out(bdata, ['total_counts', 'pct_counts_mt'], n_jobs = 50) # Data normalization. 33 | sc.pp.scale(bdata, max_value=10) # Data scaling. 34 | sc.tl.pca(bdata, svd_solver='arpack', use_highly_variable = True) # Dimmesion reduction. 35 | sce.pp.harmony_integrate(bdata, key = 'Library', basis='X_pca', adjusted_basis='X_pca_harmony') # Remove batch effect. 36 | sc.pp.neighbors(bdata, use_rep = 'X_pca_harmony', n_neighbors=10, n_pcs=40) # Find cell neighbors. 37 | sc.tl.umap(bdata) # Embedded cells in a 2-D space. 38 | bdata.write_h5ad('PHx.combine_filter.h5ad') 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /00.Find_zonation_pathway_phyper.test.R: -------------------------------------------------------------------------------- 1 | pathway=read.table("all.kegg.pathway.list",sep="\t") # Input gene lists of pathways. 2 | pathways=apply(as.matrix(pathway$V2),1,function(x){strsplit(x,split=",")[[1]]}) 3 | names(pathways)=pathway$V1 4 | 5 | zonated_gene=read.table("zonated.gene_Halpern.list") # Input zonation gene list 6 | zonated_gene=zonated_gene$V1 7 | exp=readRDS("layer_exp_change.rds") # Input layer averaged gene expression 8 | zonated_gene=zonated_gene[zonated_gene %in% rownames(exp)] 9 | library(dplyr) 10 | pvalues=lapply(pathways,function(x){1-phyper(length(x[x %in% zonated_gene]),length(zonated_gene),nrow(exp),length(x))}) # Hypergeometric test of genes of each pathway against zonation gene 11 | df=t(as.data.frame(pvalues)) 12 | rownames(df)=names(pvalues) 13 | #write.table(df[df[,1]<0.05,],sep="\t",quote=F,file="pathway_phyper.list") 14 | pathw_gene=lapply(pathways,function(x){paste0(x[x %in% zonated_gene],collapse=",")}) 15 | df1=t(as.data.frame(pathw_gene)) 16 | rownames(df1)=names(pathw_gene) 17 | df2=merge(df,df1,by=0) 18 | rownames(df2)=df2$Row.names 19 | df2=df2[,-1] 20 | colnames(df2)=c("Pvalue","genes") 21 | #write.table(df2[df2[,1]<0.05,],sep="\t",quote=F,file="pathway_phyper.list") 22 | write.table(df2[df2[,1]<0.05,],sep="\t",quote=F,file="pathway_phyper_1.list") 23 | res=apply(as.matrix(df2[df2$Pvalue<0.05,]),1,function(x){genes=strsplit(x[2],split=",")[[1]];if(length(genes)>1){colSums(exp[genes,19:27])}else{exp[genes,19:27]}}) 24 | res1=as.data.frame(t(res)) 25 | 26 | pathway_gene=lapply(pathways,function(x){paste0(x,collapse=",")}) 27 | tab=cbind(df2[rownames(res1),],res1,as.character(pathw_gene[rownames(res1)]),as.character(pathway_gene[rownames(res1)])) 28 | colnames(tab)=c("Pvalue","zonated genes",colnames(tab)[3:11],"zonated genes","all genes") 29 | tab=tab[,-2] 30 | tab$number_zg=apply(as.matrix(tab$`zonated gene`),1,function(x){length(strsplit(x,",")[[1]])}) 31 | tab$number_gene=apply(as.matrix(tab$`all genes`),1,function(x){length(strsplit(x,",")[[1]])}) 32 | tab$qvalue=qvalue::qvalue(tab$Pvalue, lambda = seq(0, max(tab$Pvalue), 0.05))$qvalue 33 | tab=tab[,c(1,15,13,14,2:10,11,12)] 34 | write.table(tab,row.names=T,file="pathway_phyper_table_halpern.tsv",sep="\t",quote=F) 35 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LISTA (LIver Spatio-Temporal Atlas) 2 | ## Codes used in LISTA project. 3 | 4 | ### CITE: Xu, J., Guo, P., Hao, S. et al. A spatiotemporal atlas of mouse liver homeostasis and regeneration. Nat Genet (2024). https://doi.org/10.1038/s41588-024-01709-7 5 | 6 | ### Database: https://db.cngb.org/stomics/lista/ 7 | 8 | ![image](https://github.com/haoshijie13/LISTA/assets/59014440/92db2bcd-39fd-4bbb-906c-ed2e4b0f0e5c) 9 | 10 | **Step 1:** Run 00.Stereo_seq_Matrix2SeuratObject-pipeline.R to create Seurat objects. You can run SAW analysis pipeline (https://github.com/STOmics/SAW) to create matrix or download the processed matrix from our database: https://db.cngb.org/stomics/lista/download/ 11 | 12 | **Step 2:** Run 00.cut_zonation_layer_and_pathway_module_score.R to split spots into 9 zonation layers. The pathway module score can be add simutaneously. 13 | 14 | **Step 3:** Run 00.Ligand_receptor_interaction_zonation_analysis.R to calculate interaction score of ligand & receptor pairs. The ligand receptor pairs used in our study was provided in the mouse_lr_pair.txt file. You can investigate all of them or a part. 15 | 16 | **Step 4:** Run 00.Find_zonation_pathway_phyper.test.R to detect pathways enriched with zonation genes. 17 | 18 | **Step 5:** Run 00.run_RCTD.R to calculate cell type projection score of scRNAseq on Stereo-seq data. You can find a detailed tutorial from RCTD offtial website: https://github.com/dmcable/spacexr 19 | 20 | **Step 6:** Run 00.run_scenic.py to calculate gene regulatory network. You can find a detailed tutorial from SCENIC offtial website: https://pyscenic.readthedocs.io/en/latest/ 21 | 22 | **Step 7:** Run 00.run_hotspot.py to calculate gene coexpression modules base on their expression pattern. You can find a detailed tutorial from Hotspot offtial website: https://yoseflab.github.io/Hotspot/ 23 | 24 | **Step 8:** Run 01.TBL1XR1.bulkRNAseq.preprocessing.sh to get raw bulk RNAseq matrix of TBL1XR1 purturbation data. 25 | 26 | **Step 9:** Run 01.TBL1XR1.bulkRNAseq.DEseq.r to get differential expressed genes of TBL1XR1 purturbation data. 27 | 28 | **Step 10:** Run 02.ATAC_chipseq_preprocessing.sh to get chromatin modification regions of ATAC or Chip-seq data. 29 | 30 | **Step 11:** Run 02.ATAC_chipseq_Motif_scan.r to get putative binding motifs of specific chromatin modification regions. 31 | 32 | **Step 12:** Run 03.scRNAseq_clustering_scanpy.py to cluster scRNAseq datasets. 33 | 34 | -------------------------------------------------------------------------------- /00.Ligand_receptor_interaction_zonation_analysis.R: -------------------------------------------------------------------------------- 1 | args<-commandArgs(T) 2 | library(Seurat) 3 | SeuObj<-readRDS("add_Time_sample_change_marker_rank_factor.rds") # Load the seurat object you want to deal with. 4 | sub=subset(SeuObj,subset=Time==args[1]) # Select the dataset part for further analysis. OPTIONAL. 5 | sub$group=as.integer(sample(1:nrow(sub@meta.data)/9/50,nrow(sub@meta.data),replace=T)) # Generate a random group label for each bin spot. 6 | #lr<-read.table("../28.ligand/mouse_lr_pair.txt",header=T) 7 | lr<-read.table("manual.lr.list",header=T) # Read the ligand receptor list in this reportoiry or input a subset you want to deal with. 8 | genes=unique(c(lr[,2],lr[,3])) 9 | genes=genes[genes %in% rownames(sub@assays$RNA@counts)] 10 | 11 | #grp=cut(1:nrow(sub@assays$RNA@data),breaks = 100,labels = 1:100) 12 | #x=split(rownames(sub@assays$RNA@data),f = grp) 13 | 14 | exp_mat=aggregate(as.data.frame(t(as.matrix(sub@assays$RNA@counts[genes,]))),by=list("rank"=sub$rank,"group"=sub$group),mean) # Aggregate the gene expression based on layer and group layer. Each layer will be split into random 50 groups. 15 | 16 | res=apply(as.matrix(lr[ lr$ligand_gene_symbol %in% colnames(exp_mat) & lr$receptor_gene_symbol %in% colnames(exp_mat),]),1,function(x){x=as.vector(x); exp_mat[,x[2]] * exp_mat[,x[3]] }) # Multiply the ligand gene expression by the corresponded receptor gene expression of each group as the interaction strength score. 17 | colnames(res)=lr[ lr$ligand_gene_symbol %in% colnames(exp_mat) & lr$receptor_gene_symbol %in% colnames(exp_mat),"lr_pair"] 18 | res=as.data.frame(res) 19 | res$rank=exp_mat$rank 20 | pvalues=apply(as.matrix(colnames(res)[1:(ncol(res)-1)]),1,function(x){test=kruskal.test( get(x) ~ rank, data = res);test$p.value}) # KW test 21 | names(pvalues)=colnames(res)[1:(ncol(res)-1)] 22 | test=aggregate(res,by=list("rank"=res$rank),mean) 23 | test=as.data.frame(test) 24 | 25 | pdf(paste0("manual_lxr.",args[1],".pdf"),height=length(na.omit(pvalues[pvalues<0.05]))/5.5,width=5) 26 | pheatmap::pheatmap(t(test[,names(na.omit(pvalues[pvalues<0.05]))]),scale="row",cluster_cols=F,cluster_rows=T) 27 | dev.off() 28 | #df=as.data.frame(cbind(t(test[,names(na.omit(pvalues[pvalues<0.05]))]),na.omit(pvalues[pvalues<0.05]))) 29 | df=as.data.frame(cbind(t(test[,names(pvalues)]),pvalues)) 30 | colnames(df)=c(paste0("layer_",1:9),"Pvalue") 31 | write.table(df,file=paste0("manual_lxr.",args[1],".table.xls"),sep="\t",quote=F,row.names=T,col.names=T) 32 | -------------------------------------------------------------------------------- /00.run_hostspot.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import sys 4 | import numpy as np 5 | import pandas as pd 6 | import hotspot 7 | import matplotlib.pyplot as plt 8 | import matplotlib.colors 9 | import seaborn as sns 10 | import pickle 11 | # Load the counts and positions 12 | counts_file = sys.argv[2] # input the cell×gene matrix 13 | pos_file = sys.argv[3] # input the cell×spatial position matrix 14 | OUTDIR = "." 15 | NAME = sys.argv[1] # output prefix 16 | 17 | HS_RESULTS = ''.join([OUTDIR,"/",NAME,"_hs_results.p"]) 18 | LCZ = ''.join([OUTDIR, "/", NAME, "_lcz.p"]) 19 | MODULES = ''.join([OUTDIR, "/", NAME, "_modules.p"]) 20 | HOTSPOT = ''.join([OUTDIR, "/", NAME, "_hotspot.p"]) 21 | pos = pd.read_csv(pos_file, index_col=0) 22 | counts = pd.read_csv(counts_file, index_col=0) # Takes a while, ~10min 23 | # Align the indices 24 | counts = counts.loc[:, pos.index] 25 | barcodes = pos.index.values 26 | # Swap position axes 27 | # We swap x'=y and y'=-x to match the slides in the paper 28 | pos = pd.DataFrame( 29 | { 30 | 'X': pos.X, 31 | 'Y': pos.Y, 32 | }, index=pos.index 33 | ) 34 | num_umi = counts.sum(axis=0) 35 | # Filter genes 36 | #gene_counts = (counts > 0).sum(axis=1) 37 | #valid_genes = gene_counts >= 50 38 | #counts = counts.loc[valid_genes] 39 | 40 | 41 | # Create the Hotspot object and the neighborhood graph 42 | hs = hotspot.Hotspot(counts, model='normal', latent=pos) 43 | 44 | hs.create_knn_graph( 45 | weighted_graph=False, n_neighbors=5, 46 | ) 47 | 48 | 49 | hs_results = hs.compute_autocorrelations(jobs=20) 50 | 51 | with open(HS_RESULTS, "wb") as f: 52 | pickle.dump(hs_results,f) 53 | 54 | 55 | #select the genes with significant spatial autocorrelation 56 | hs_genes = hs_results.index[hs_results.FDR < 0.05] 57 | 58 | # Compute pair-wise local correlations between these genes 59 | lcz = hs.compute_local_correlations(hs_genes, jobs=20) 60 | 61 | with open(LCZ, "wb") as f: 62 | pickle.dump(lcz,f) 63 | 64 | 65 | modules = hs.create_modules( 66 | min_gene_threshold=5, core_only=False, fdr_threshold=0.05 67 | ) 68 | 69 | with open(MODULES, "wb") as f: 70 | pickle.dump(modules, f) 71 | 72 | 73 | #with open(HOTSPOT, "wb") as f: 74 | # pickle.dump(hs,f) 75 | 76 | results = hs.results.join(hs.modules) 77 | results.to_csv("".join([sys.argv[1],"-Regulon2Gene.csv"])) 78 | 79 | module_scores = hs.calculate_module_scores() 80 | module_scores.to_csv("".join([sys.argv[1],"-module_score.csv"])) 81 | 82 | plt.rcParams['figure.figsize'] = (15.0, 12.0) 83 | hs.plot_local_correlations() 84 | plt.savefig("".join([sys.argv[1],"-regulon_module_number.pdf"]), dpi = 600) 85 | -------------------------------------------------------------------------------- /00.run_scenic.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import pickle 4 | import pandas as pd 5 | import numpy as np 6 | import sys 7 | 8 | ####################################################################################################### 9 | ################## You can find a detailed tutorial from SCENIC offtial website.####################### 10 | ####################################################################################################### 11 | 12 | 13 | args = sys.argv 14 | print(args[1]) 15 | 16 | from dask.diagnostics import ProgressBar 17 | if __name__ == '__main__': 18 | ProgressBar = ProgressBar() 19 | from arboreto.utils import load_tf_names 20 | from arboreto.algo import grnboost2 21 | from pyscenic.rnkdb import FeatherRankingDatabase as RankingDatabase 22 | from pyscenic.utils import modules_from_adjacencies, load_motifs 23 | from pyscenic.prune import prune2df, df2regulons 24 | from pyscenic.aucell import aucell 25 | import seaborn as sns 26 | import dask 27 | dask.config.set(num_workers=80) 28 | if __name__ == '__main__': 29 | print(os.getcwd()) 30 | ex_matrix = pd.read_table(args[1], sep='\t', header=0, index_col=0).T # Read cell × gene matrix. 31 | print(ex_matrix) 32 | ################################################################## 33 | ########### We used mm10 database provided by SCENIC ############# 34 | ################################################################## 35 | DATA_FOLDER="./" 36 | RESOURCES_FOLDER="liver_zonation/SCENIC" 37 | DATABASE_FOLDER="SCENIC/motif" 38 | # SCHEDULER="123.122.8.24:8786" 39 | DATABASES_GLOB = os.path.join(DATABASE_FOLDER, "mm10_*.mc9nr.feather") 40 | MOTIF_ANNOTATIONS_FNAME = os.path.join(RESOURCES_FOLDER, "motifs-v9-nr.mgi-m0.001-o0.0.tbl") 41 | MM_TFS_FNAME = os.path.join(RESOURCES_FOLDER, 'mm_mgi_tfs.txt') 42 | REGULONS_FNAME = ''.join([ DATA_FOLDER, args[1],"_regulons.p"]) 43 | MOTIFS_FNAME = ''.join([ DATA_FOLDER, args[1], "_motifs.txt"]) 44 | ADJACENCIES_FNAME = ''.join([DATA_FOLDER, args[1], "_adjacencies.txt"]) 45 | MODULES_FNAME = ''.join([DATA_FOLDER, args[1], "_modules.p"]) 46 | AUC_FNAME = ''.join([DATA_FOLDER, args[1], "_AUC.txt"]) 47 | 48 | print(REGULONS_FNAME) 49 | print(MOTIFS_FNAME) 50 | 51 | tf_names = load_tf_names(MM_TFS_FNAME) # Load database. 52 | db_fnames = glob.glob(DATABASES_GLOB) 53 | def name(fname): 54 | return os.path.splitext(os.path.basename(fname))[0] 55 | dbs = [RankingDatabase(fname=fname, name=name(fname)) for fname in db_fnames] 56 | 57 | print(dbs) 58 | adjancencies = grnboost2(expression_data=ex_matrix, tf_names=tf_names, verbose=True) # Calculate gene coexpression relationships. 59 | adjancencies.to_csv(ADJACENCIES_FNAME, index=False, sep='\t') 60 | modules = list(modules_from_adjacencies(adjancencies, ex_matrix)) 61 | with open(MODULES_FNAME, 'wb') as f: 62 | pickle.dump(modules, f) 63 | df = prune2df(dbs, modules, MOTIF_ANNOTATIONS_FNAME) 64 | df.head() 65 | df.to_csv(MOTIFS_FNAME) 66 | regulons = df2regulons(df) 67 | with open(REGULONS_FNAME, 'wb') as f: 68 | pickle.dump(regulons, f) 69 | auc_mtx = aucell(ex_matrix, regulons, num_workers=5) 70 | auc_mtx.to_csv(AUC_FNAME, sep = "\t") 71 | -------------------------------------------------------------------------------- /00.Stereo_seq_Matrix2SeuratObject-pipeline.R: -------------------------------------------------------------------------------- 1 | library("Seurat") 2 | library("ggplot2") 3 | library("reshape2") 4 | library("tidyr") 5 | library(data.table) 6 | library(Matrix) 7 | # library("future") 8 | # plan("multiprocess", workers = 30) 9 | options(future.globals.maxSize=100000000000) 10 | args=commandArgs(T) # input the matrix file as first parameter which contain at least four column: geneID, x, y, and MIDCount (UMI). 11 | base=basename(args[1]) 12 | dir=dirname(args[1]) 13 | prefix=sub(".txt","",base) 14 | 15 | read<-function(mat,bin){ 16 | data<-fread(mat,header=T) # fread is a paralleled processing command 17 | data$cellID<-paste(as.character(round(data$x/bin, digits = 0)),"_",as.character(round(data$y/bin, digits = 0)),sep="") # Aggregated DNBs in each bin spot 18 | gene=unique(data$geneID) 19 | cell=unique(data$cellID) 20 | gene_idx=c(1:length(gene)) 21 | cell_idx=c(1:length(cell)) 22 | names(gene_idx)=gene 23 | names(cell_idx)=cell 24 | print(head(gene_idx[data$geneID])) 25 | data=as.data.frame(data) 26 | mat=sparseMatrix(i=gene_idx[data$geneID],j=cell_idx[data$cellID],x=data[,4]) # Create SparseMatrix from the matrix file 27 | rownames(mat)=gene 28 | colnames(mat)=cell 29 | return(mat) 30 | } 31 | 32 | analyze <- function(mat,bin){ 33 | data=mat 34 | data[is.na(data)]=0 35 | print("ok") 36 | SeuObj<-CreateSeuratObject(counts = data, names.delim = "-", project = "SeuObj") 37 | SeuObj[["percent.mt"]] <- PercentageFeatureSet(SeuObj, pattern = "^mt-|^MT-|^Mt-") 38 | #save(SeuObj,file=paste(dir,"/bin",bin,".Rdata",sep="")) 39 | #q() 40 | 41 | # What below is Seurat recommend analysis pipeline 42 | 43 | SeuObj <- NormalizeData(SeuObj, normalization.method = "LogNormalize", scale.factor = 10000) 44 | SeuObj <- FindVariableFeatures(SeuObj, selection.method = "vst", nfeatures = 2000) 45 | all.genes <- rownames(SeuObj) 46 | SeuObj <- ScaleData(SeuObj, features = all.genes, vars.to.regress = "nCount_RNA") 47 | # SeuObj <- SCTransform(SeuObj, vars.to.regress = "percent.mt", verbose = FALSE) 48 | SeuObj <- RunPCA(SeuObj, verbose = FALSE) 49 | SeuObj <- RunUMAP(SeuObj, dims = 1:15, verbose = FALSE) 50 | SeuObj <- FindNeighbors(SeuObj, dims = 1:15, verbose = FALSE) 51 | SeuObj <- FindClusters(SeuObj, verbose = FALSE) 52 | SeuObj@meta.data$coor_x=sub(rownames(SeuObj@meta.data),pattern = "_.*",replacement = "") 53 | SeuObj@meta.data$coor_y=sub(rownames(SeuObj@meta.data),pattern = ".*_",replacement = "") 54 | SeuObj@meta.data$coor_x=sub(SeuObj@meta.data$coor_x,pattern = "X",replacement = "") 55 | SeuObj@meta.data$coor_x=as.integer(SeuObj@meta.data$coor_x) 56 | SeuObj@meta.data$coor_y=as.integer(SeuObj@meta.data$coor_y) 57 | AllMG <<- FindAllMarkers(SeuObj) 58 | SeuObj <<- SeuObj 59 | # if(bin==100){ 60 | # print(mean(SeuObj@meta.data$nFeature_RNA)) 61 | # print(mean(SeuObj@meta.data$nCount_RNA)) 62 | # count = as.data.frame(SeuObj@assays$RNA@counts) 63 | # count_1=count>0 64 | # count_1=as.data.frame(count_1) 65 | # print(mean(rowSums(as.data.frame(count_1)))/ncol(count_1)) 66 | # } 67 | filename=paste(dir,"/bin",bin,".MG",sep="") 68 | write.table(AllMG,file=filename) 69 | save(SeuObj,file=paste(dir,"/bin",bin,".Rdata",sep="")) 70 | } 71 | 72 | bins=c(100,50) 73 | for(bin in bins){ 74 | mat<-read(args[1],bin) # You can choose the bin size which you prefer 75 | analyze(mat,bin) 76 | } 77 | -------------------------------------------------------------------------------- /02.ATAC_chipseq_Motif_scan.r: -------------------------------------------------------------------------------- 1 | library(motifmatchr) 2 | 3 | bed_to_granges <- function(file){ 4 | df <- read.table(file, 5 | header=F, 6 | stringsAsFactors=F) 7 | 8 | if(length(df) > 6){ 9 | df <- df[,-c(7:length(df))] 10 | } 11 | 12 | if(length(df)<3){ 13 | stop("File has less than 3 columns") 14 | } 15 | 16 | header <- c('chr','start','end','id','score','strand') 17 | names(df) <- header[1:length(names(df))] 18 | 19 | if('strand' %in% colnames(df)){ 20 | df$strand <- gsub(pattern="[^+-]+", replacement = '*', x = df$strand) 21 | } 22 | 23 | library("GenomicRanges") 24 | 25 | if(length(df)==3){ 26 | gr <- with(df, GRanges(chr, IRanges(start, end))) 27 | } else if (length(df)==4){ 28 | gr <- with(df, GRanges(chr, IRanges(start, end), id=id)) 29 | } else if (length(df)==5){ 30 | gr <- with(df, GRanges(chr, IRanges(start, end), id=id, score=score)) 31 | } else if (length(df)==6){ 32 | gr <- with(df, GRanges(chr, IRanges(start, end), id=id, score=score, strand=strand)) 33 | } 34 | return(gr) 35 | } 36 | 37 | prepareMotifmatchr <- function(genome, motifs){ 38 | res <- list() 39 | 40 | # get the species name and the genome sequence object based on the object 41 | genomeObj <- genome 42 | if (!is.element("BSgenome", class(genomeObj))){ 43 | genomeObj <- getGenomeObject(genome) 44 | } 45 | spec <- organism(genomeObj) 46 | 47 | # get the motif PWMs 48 | motifL <- TFBSTools::PWMatrixList() 49 | if (is.character(motifs)){ 50 | if (is.element("jaspar", motifs)){ 51 | # copied code from chromVAR, but updated the JASPAR version 52 | opts <- list() 53 | opts["species"] <- spec 54 | opts["collection"] <- "CORE" 55 | # gets the non-redundant set by default 56 | mlCur <- TFBSTools::getMatrixSet(JASPAR2018::JASPAR2018, opts) 57 | if (!isTRUE(all.equal(TFBSTools::name(mlCur), names(mlCur)))){ 58 | names(mlCur) <- paste(names(mlCur), TFBSTools::name(mlCur), sep = "_") 59 | } 60 | motifL <- c(motifL, TFBSTools::toPWM(mlCur)) 61 | } 62 | if (is.element("jaspar_vert", motifs)){ 63 | # JASPER for all vertebrate TFBS 64 | opts <- list() 65 | opts["tax_group"] <- "vertebrates" 66 | opts["collection"] <- "CORE" 67 | # gets the non-redundant set by default 68 | mlCur <- TFBSTools::getMatrixSet(JASPAR2020::JASPAR2020, opts) 69 | if (!isTRUE(all.equal(TFBSTools::name(mlCur), names(mlCur)))){ 70 | names(mlCur) <- paste(names(mlCur), TFBSTools::name(mlCur), sep = "_") 71 | } 72 | motifL <- c(motifL, TFBSTools::toPWM(mlCur)) 73 | } 74 | if (is.element("jaspar2016", motifs)){ 75 | motifL <- c(motifL, TFBSTools::toPWM(chromVAR::getJasparMotifs(species=spec))) 76 | } 77 | if (is.element("homer", motifs)){ 78 | if (!requireNamespace("chromVARmotifs")) logger.error(c("Could not load dependency: chromVARmotifs")) 79 | data("homer_pwms") 80 | motifL <- c(motifL, chromVARmotifs::homer_pwms) 81 | } 82 | if (is.element("encode", motifs)){ 83 | if (!requireNamespace("chromVARmotifs")) logger.error(c("Could not load dependency: chromVARmotifs")) 84 | data("encode_pwms") 85 | motifL <- c(motifL, chromVARmotifs::encode_pwms) 86 | } 87 | if (is.element("cisbp", motifs)){ 88 | if (!requireNamespace("chromVARmotifs")) logger.error(c("Could not load dependency: chromVARmotifs")) 89 | if (spec == "Mus musculus"){ 90 | data("mouse_pwms_v1") 91 | motifL <- c(motifL, chromVARmotifs::mouse_pwms_v1) 92 | } else if (spec == "Homo sapiens"){ 93 | data("human_pwms_v1") 94 | motifL <- c(motifL, chromVARmotifs::human_pwms_v1) 95 | } else { 96 | logger.warning(c("Could not find cisBP annotation for species", spec)) 97 | } 98 | } 99 | if (is.element("cisbp_v2", motifs)){ 100 | if (!requireNamespace("chromVARmotifs")) logger.error(c("Could not load dependency: chromVARmotifs")) 101 | if (spec == "Mus musculus"){ 102 | data("mouse_pwms_v2") 103 | motifL <- c(motifL, chromVARmotifs::mouse_pwms_v2) 104 | } else if (spec == "Homo sapiens"){ 105 | data("human_pwms_v2") 106 | motifL <- c(motifL, chromVARmotifs::human_pwms_v2) 107 | } else { 108 | logger.warning(c("Could not find cisBP annotation for species", spec)) 109 | } 110 | } 111 | if (length(motifL) < 1) { 112 | logger.error(c("No motifs were loaded. Unsupported motifs (?) :", motifs)) 113 | } 114 | } else if (is.element("PWMatrixList", class(motifs)) || is.element("PFMatrixList", class(motifs))) { 115 | motifL <- motifs 116 | } else { 117 | logger.error(c("unsupported value for motifs:", motifs)) 118 | } 119 | res[["genome"]] <- genomeObj 120 | res[["motifs"]] <- motifL 121 | return(res) 122 | } 123 | 124 | 125 | peak = '/mnt/4/liver_project/atac/01.align/male_liver/Tbl1xr1.anno.bed' 126 | peaks <- bed_to_granges(peak) 127 | cisbp_motif <- prepareMotifmatchr("mm10", "cisbp")$motifs 128 | sel_motif = c('Jun','Egr1','Fos','Cebpd') 129 | mtf_set = c() 130 | for ( f in sel_motif){ 131 | sel_motif_s <- cisbp_motif[grep(f,names(cisbp_motif))] 132 | mtf_set = append(mtf_set, sel_motif_s) 133 | } 134 | motif_pos <- matchMotifs(mtf_set, peaks, genome = "mm10", 135 | out = "positions") 136 | -------------------------------------------------------------------------------- /00.cut_zonation_layer_and_pathway_module_score.R: -------------------------------------------------------------------------------- 1 | pathways<-list("acylglycerol metabolic process"=c("Apoa1","Apof","Cdk8","Cps1","Insig1","Ldlr","Pcsk9","Plb1","Plce1"),"ATP metabolic process"=c("Afg1l","Ak4","Aldob","Ampd3","Apoc3","Atp5a1","Atp5b","Atp5c1","Atp5e","Atp5g1","Atp5g3","Atp5h","Atp5j","Atp5j2","Atp5k","Atp5l","Cfh","Cox4i1","Cox5a","Cox5b","Cox7a2","Cox7a2l","Cox7c","Cyc1","Cycs","Ddit4","Dnm1l","Fbp1","Gm10358","Gm3839","Hk1","Hspa8","Igf1","Khk","Ndufa8","Ndufb6","Ndufb8","Ndufb9","Ndufc2","Ndufs2","Ndufs6","Ndufv2","Park7","Pklr","Prkag2","Sdhd","Slc25a25","Tkfc","Uqcr10","Uqcrfs1","Uqcrh"),"alpha-amino acid metabolic process"=c("Aass","Acmsd","Agxt","Amdhd1","Arg1","Asl","Aspg","Ass1","Cbs","Cth","Ftcd","Gcsh","Gldc","Gls2","Gmps","Gnmt","Got1","Hal","Hnf4a","Kyat1","Kyat3","Kynu","Mat1a","Mccc2","Nox4","Otc","Park7","Ppat","Pycr2","Qdpr","Sdsl","Sephs2","Slc7a7","Tat","Uroc1"),"electron transport chain"=c("Afg1l","Cox4i1","Cox5a","Cox5b","Cox7c","Cyb561","Cyc1","Cycs","Ndufa5","Ndufa8","Ndufb6","Ndufb8","Ndufb9","Ndufc2","Ndufs2","Ndufs6","Ndufv2","Park7","Sdhb","Sdhd","Slc25a12","Uqcr10","Uqcrfs1","Uqcrh"),"generation of precursor metabolites and energy"=c("Aco2","Chchd4","Cox7a1","Dlst","Eno1b","Etfrf1","Fh1","G6pc","G6pdx","Gpi1","Grb10","Idh3g","Lepr","Mdh1","Mybbp1a","Ndufs1","Nfatc4","Ogdh","Oxct1","Per2","Ppargc1a","Ppif","Sdha","Slc25a22","Slc37a2","Suclg1","Tpi1","Uqcrc1"),"organic anion transport"=c("Abat","Abcb1a","Ace","Apoa4","Apoc2","Arg2","Cyp4f18","G6pc","Gipc1","Kmo","Mfsd2a","Nfkbie","P2ry2","Per2","Plin2","Plscr1","Prelid2","Scp2","Slc16a5","Slc25a22","Slc26a1","Slc2a2","Slc37a2","Slc38a2","Slc51b","Slc6a8","Slc7a4","Slc9a3r1"),"cellular ketone metabolic process"=c("Apoa4","Apoc2","Cyp21a1","Cyp4f18","Fdxr","Fh1","Kmo","Mfsd2a","Mlycd","Oxct1","Pdss1","Ppargc1a","Scp2","Slc37a2","Srd5a1","Tdo2"),"arachidonic acid metabolic process"=c("Cyp2a22","Cyp2a4","Cyp2a5","Cyp2b10","Cyp2c37","Cyp2c38","Cyp2c40","Cyp2c69","Cyp2d12","Cyp2d22","Cyp2j6","Cyp4a12a","Cyp4a12b"),"fatty acid metabolic process"=c("Asah2","Cpt2","Cyp2a22","Cyp2a4","Cyp2a5","Cyp2b10","Cyp2c37","Cyp2c38","Cyp2c40","Cyp2c69","Cyp2d12","Cyp2d22","Cyp2j6","Cyp4a12a","Cyp4a12b","Etfa","Lipg","Pdk4","Pla2g10","Pparg","Slc25a17","Slc27a1","Them4","Trib3","Tysnd1"),"anion transmembrane transport"=c("Aacs","Abcd3","Abcd4","Abhd3","Acaa1a","Acaa1b","Acaa2","Acat1","Acat2","Acot1","Acot12","Acot2","Acot3","Acot4","Acot6","Acot8","Acox1","Acsf2","Acsl1","Acsm1","Acsm3","Acsm5","Acss2","Adtrp","Akr1c14","Akr1c20","Akr1c6","Aldh3a2","Avpr1a","Cyb5a","Cyp1a2","Cyp2c23","Cyp2c29","Cyp2c39","Cyp2c50","Cyp2c54","Cyp2c55","Cyp2c67","Cyp2c68","Cyp2d10","Cyp2d9","Cyp2e1","Cyp2g1","Cyp4a10","Cyp4a14","Cyp4a32","Cyp4f14","Cyp4f15","Decr1","Decr2","Dgat2","Ech1","Eci1","Eci2","Elovl1","Elovl3","Ephx2","Fabp1","Fabp2","Gcdh","Gstm4","Hacd1","Hacd2","Hacd3","Hacl1","Hadh","Hsd17b4","Lias","Lonp2","Mlxipl","Pex7","Phyh","Por","Rgn","Slc27a2","Slc27a5","Tmem189"),"organic acid catabolic process"=c("Abcd3","Abcd4","Abhd3","Acaa1a","Acaa1b","Acaa2","Acat1","Acat2","Acot2","Acot4","Acot8","Acox1","Adtrp","Agxt2","Akr1a1","Aldh4a1","Bckdhb","Blmh","Csad","Cyp26a1","Cyp26c1","Cyp4f14","Cyp4f15","Decr1","Eci1","Eci2","Fabp1","Fah","Gcdh","Glud1","Gstz1","Hacl1","Hadh","Hgd","Hibadh","Hmgcl","Hpd","Hsd17b4","Hyal1","Ldhd","Lonp2","Mtrr","Pex7","Phyh","Pon1","Pon3","Prodh","Shmt1","Slc27a2"),"sulfur compound metabolic process"=c("Abcc2","Acaa2","Acat1","Acot1","Acot12","Acot2","Acot3","Acot4","Acot6","Acot8","Acsl1","Acsm1","Acsm3","Acsm5","Acss2","Blmh","Comt","Cs","Csad","Dgat2","Dlat","Enpp1","Gcdh","Gclm","Ghr","Gsta3","Gstm1","Gstm3","Gstm4","Gstm6","Gstt1","Gstz1","Hmgcl","Hsd17b4","Idh1","Lias","Mat2a","Mgst1","Mtrr","Mvk","Nat8","Nfe2l2","Papss2","Pmvk","Sod1","Stat5a","Sult1b1","Sult1d1","Sult1e1","Ugdh"),"alcohol metabolic process"=c("Acer3","Adh1","Akr1a1","Akr1c14","Akr1c20","Akr1c6","Akr7a5","Aldh1a1","Aldh3a2","Apoc1","Apoe","Clcn2","Coq3","Cyp1a2","Cyp27a1","Dgat2","Dpm1","Ebp","Ephx2","Fdx1","Fech","Gba2","H6pd","Idh1","Lcat","Lmf1","Lpcat3","Lrp5","Mvk","Npc1","Nsdhl","P2ry1","Pctp","Plcb1","Pmvk","Pon1","Por","Prkg1","Rbp4","Rdh10","Soat2","Sod1","Sord","Srd5a3","Sult1b1","Sult1e1","Tm7sf2","Ttc39b"),"lipid homeostasis"=c("Abcd1","Abcg8","Acox2","Apoa2","Apoc4","Commd1","Cyp7a1","Gck","Lipc","Mia2","Nr1d1","Nr5a2"),"purine-containing compound metabolic process"=c("Abcd1","Acnat1","Acot7","Acsl5","Adk","Crot","Gamt","Gck","Mif","Mpc1","Mpc2","Nudt2","Pemt","Pipox","Ppara","Prps1l3","Slc25a13","Ttr"),"ER stress"=c("Dnajb11","Dnajb9","Hspa8","Hspa9","Xbp1","Manf","Atf4","Dnajc10")) 2 | library(Seurat) 3 | SeuObj<-readRDS("D0.rds") # Read the Seurat object which you want to deal with 4 | SeuObj<-AddModuleScore(SeuObj,features=pathways) # Calculate the module score of each pathway 5 | colnames(SeuObj@meta.data)=c(colnames(SeuObj@meta.data)[1:31],names(pathways)) 6 | library(ggplot2) 7 | dir.create("pathways"); 8 | apply(as.matrix(colnames(SeuObj@meta.data)[32:46]),1,function(x){ # apply function runs faster than for loop 9 | df=SeuObj@meta.data[SeuObj@meta.data[,x]quantile(SeuObj@meta.data[,x],0.005)[[1]],]; 10 | p<-ggplot(df,aes(x=coor_x,y=coor_y,fill=SeuObj@meta.data[,x][SeuObj@meta.data[,x]quantile(SeuObj@meta.data[,x],0.005)[[1]]]))+geom_tile()+coord_fixed()+theme_void()+scale_fill_viridis_c(option="B",direction=-1)+labs(fill=""); 11 | ggsave(filename=paste0("pathways/",x,".pdf"),plot=p) 12 | }) 13 | 14 | CV=c("Alb","Aldh1b1","Aldob","Apoa4","Apoa5","Apoc2","Arg1","Asl","Ass1","C9","Cdh1","Cyp2f2","Etnppl","Fbp1","G6pc","Gls2","Gnmt","Hal","Hp","Hpx","Hsd17b6","Hsd17b13","Mfsd2a","Mup20","Pck1","Pigr","Rida","Sds","Selenbp2","Selenop","Serpina1c","Serpina1e","Tdo2","Trf","Ugt2b38","Uox") 15 | PV=c("Akr1c6","Aldh1a1","Aldh3a2","Car3","Ces1c","Csad","Cyb5a","Cyp1a2","Cyp2a5","Cyp2c29","Cyp2c37","Cyp2c40","Cyp2c50","Cyp2c54","Cyp2e1","Cyp3a11","Cyp4a10","Cyp4a14","Glul","Gsta3","Gstm1","Gulo","Lect2","Mgst1","Mup11","Mup15","Mup16","Mup17","Mup18","Oat","Pon1","Rgn","Rnase4","Slc1a2","Slc22a1","Slco1b2","Sord","Ugt1a10","Ugt2b1") 16 | SeuObj=AddModuleScore( SeuObj,features = list("CV"=sample(CV,size = x)),name = "CV") 17 | SeuObj=AddModuleScore( SeuObj,features = list("PV"=sample(PV,size = x)),name = "PV") 18 | SeuObj$rank=cut_number(SeuObj$PV1-SeuObj$CV1,n = 9,label=1:9) # cut_number function can divide numbers into even groups by their values. 19 | saveRDS(SeuObj,"add.Pathway.layer.rds") 20 | --------------------------------------------------------------------------------