├── resource
    └── AllBP.gene.rds
├── 01.TBL1XR1.bulkRNAseq.preprocessing.sh
├── 02.ATAC_chipseq_preprocessing.sh
├── 00.run_RCTD.R
├── 01.TBL1XR1.bulkRNAseq.DESeq.r
├── 00.sample_layer_cor.R
├── 03.scRNAseq_clustering_scanpy.py
├── 00.Find_zonation_pathway_phyper.test.R
├── README.md
├── 00.Ligand_receptor_interaction_zonation_analysis.R
├── 00.run_hostspot.py
├── 00.run_scenic.py
├── 00.Stereo_seq_Matrix2SeuratObject-pipeline.R
├── 02.ATAC_chipseq_Motif_scan.r
└── 00.cut_zonation_layer_and_pathway_module_score.R


/resource/AllBP.gene.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haoshijie13/LISTA/HEAD/resource/AllBP.gene.rds


--------------------------------------------------------------------------------
/01.TBL1XR1.bulkRNAseq.preprocessing.sh:
--------------------------------------------------------------------------------
 1 | rsem-prepare-reference \
 2 | -p 3 \
 3 | --gtf  $gtf \
 4 | $genome \
 5 | $out
 6 | 
 7 | STAR --genomeDir $genome \
 8 | --readFilesIn $r1 $r2  \
 9 | --outFileNamePrefix  ${outdir}/$name \
10 | --outSAMtype BAM SortedByCoordinate \
11 | --runThreadN 5 --limitOutSJcollapsed 5000000 \
12 | --quantMode TranscriptomeSAM GeneCounts
13 | 
14 | rsem-calculate-expression --paired-end --no-bam-output --alignments -p 5 \
15 | -q $bam \
16 | $genome  ${outdir}/${name}
17 | 


--------------------------------------------------------------------------------
/02.ATAC_chipseq_preprocessing.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | # Align to genome
 3 | align_pe() { genome=/mnt/3/ywlai_genome/genome/mm10/mm10_bowtie2_index/mm10; r1=$1; r2=`echo $r1 | sed 's/_1./_2./g'`
 4 |    out=`echo $r1 | sed 's/.fastq//g'`; bowtie2 --very-sensitive -p 2 --no-unal -x $genome -1 $r1 -2 $r2 -S $out".bowtie2.sam" 
 5 | }
 6 |   export -f align_pe
 7 | 
 8 | 
 9 | # filter, sorting and generating CPM normalized bigwig files
10 | samtools view -q 30 -bS -F 0x04 $f | samtools sort -@ 2 > $g
11 | bamCoverage -p 20 --bam f -o file.bw --binSize 10 --normalizeUsing CPM
12 | 
13 | # peak calling for ATAC-seq
14 | macs2 callpeak -B --nomodel --keep-dup 1 -g mm --call-summits -t t -f BAM --outdir out -n name -q 0.01
15 | 
16 | # peak calling for ChIP-seq
17 | macs2 callpeak --nomodel  -B --keep-dup 1 -g mm --call-summits -t t -c c -f BAM \
18 | --outdir outdir -n name -q 0.01
19 | 
20 | # Differential peaks comparing H3K27ac ChIP-seq at 40 hours post-PHx and time 0 
21 | macs2 bdgdiff --t1 $t1 --t2 $t2 --c1 $c1 --c2 $c2 --outdir outdir --o-prefix prefix
22 | 


--------------------------------------------------------------------------------
/00.run_RCTD.R:
--------------------------------------------------------------------------------
 1 | args<-commandArgs(T)
 2 | library(data.table)
 3 | load(args[1])   # Load the seurat object of your sptaial data which named by SeuObj
 4 | sc=readRDS("GSE192742.CD45.rds") # Load the annotated single cell data 
 5 | #sc=subset(sc,subset=annotation_lyw!="Erythrocyte")
 6 | library(Seurat) 
 7 | #exp_spatial=FetchData(SeuObj,vars=rownames(SeuObj@assays$RNA@counts),slot="counts")
 8 | exp_spatial=as.matrix(SeuObj@assays$RNA@counts) 
 9 | exp_spatial=as.data.frame(exp_spatial) 
10 | coord_spatial=SeuObj@meta.data[,c("coor_x","coor_y")] # the coordinations of each spot were stored in variables "coor_x" and "coor_y"
11 | nUMI_spatial=SeuObj@meta.data[,"nCount_RNA"]
12 | names(nUMI_spatial)=rownames(SeuObj@meta.data)
13 | 
14 | #exp_sc=FetchData(sc,vars=rownames(sc@assays$RNA@counts),slot="counts")
15 | exp_sc=as.matrix(sc@assays$RNA@counts)
16 | exp_sc=as.data.frame(exp_sc)
17 | celltype_sc=sc$annotation  # Specify your single cell annotation
18 | celltype_sc=as.factor(celltype_sc)
19 | nUMI_sc=sc@meta.data[,"nCount_RNA"]
20 | names(nUMI_sc)=rownames(sc@meta.data)
21 | 
22 | library(RCTD)
23 | # Create RCTD object
24 | reference <- Reference(exp_sc, celltype_sc, nUMI_sc)
25 | puck <- SpatialRNA(coord_spatial, exp_spatial, nUMI_spatial)
26 | # Clean the environment
27 | rm(sc)
28 | rm(SeuObj)
29 | gc()
30 | # Run RCTD analysis
31 | myRCTD <- create.RCTD(puck, reference, max_cores = 1)
32 | myRCTD <- run.RCTD(myRCTD, doublet_mode = 'multi')
33 | 
34 | save(myRCTD,file="myRCTD_20220216.Rdata")
35 | 


--------------------------------------------------------------------------------
/01.TBL1XR1.bulkRNAseq.DESeq.r:
--------------------------------------------------------------------------------
 1 | library(DESeq2)
 2 | library(ggplot2)
 3 | library(pheatmap)
 4 | library(data.table)
 5 | 
 6 | 
 7 | count_final <- read.delim("TBLXR1_count.txt")
 8 | head(count_final)
 9 | 
10 | samplenames <- colnames(count_final)
11 | group <- c("NC", "NC","NC","NC","KD","KD","KD","KD")
12 | count_final <- as.matrix(count_final)
13 | table.all <- data.frame(name = samplenames, 
14 |                         condition=group)
15 | dds.all <- DESeqDataSetFromMatrix(floor(count_final), colData=table.all, design= ~ condition)
16 | dds.all <- dds.all[ rowSums(counts(dds.all)) > 1, ]
17 | deg = results(dds.all, contrast = c("condition","KD","NC"))
18 | deg = deg[deg$pvalue < 0.05,]
19 | deg_up = deg[deg$log2FoldChange >  log(1.5, 2),]
20 | deg_down = deg[deg$log2FoldChange <  -log(1.5, 2),]
21 | write.csv(deg_up, 'KD_tbl1xr1.up_FC1.5.csv', row.names = F)
22 | write.csv(deg_down, 'KD_tbl1xr1.down_FC1.5.csv', row.names = F)
23 | 
24 | deg$change = 'No'
25 | deg$change[match(deg_up$SYMBOL, deg$SYMBOL)] = 'Up'
26 | deg$change[match(deg_down$SYMBOL, deg$SYMBOL)] = 'Down'
27 | 
28 | deg_down_labeling = unique(deg_down$SYMBOL)
29 | gene = c('Ccnd1','Tbl1xr1','Axin2','Acadm','Crot','Lgr5')
30 | deg_down_labeling_sel = deg_down_labeling[match(gene, deg_down_labeling$SYMBOL),]
31 | 
32 | p <- ggplot(data=deg, aes(x=log2FoldChange, y=log10pvalue, col = change, label = labeling)) + geom_point()+ theme_minimal()+   
33 | scale_color_manual(values=c("blue", "black", "red"))+ geom_text( color="black")
34 | 
35 | ggsave('volcano_plot.pdf',p)
36 | 


--------------------------------------------------------------------------------
/00.sample_layer_cor.R:
--------------------------------------------------------------------------------
 1 | library(Seurat)
 2 | library(dplyr)
 3 | SeuObj<-readRDS("add_Time_sample_change_marker_rank_factor.rds") # load rds file including all sections 
 4 | grp=cut(1:nrow(SeuObj@assays$RNA@data),breaks = 100,labels = 1:100)
 5 | x=split(rownames(SeuObj@assays$RNA@data),f = grp)
 6 | #result=lapply(x,function(x){exp=FetchData(SeuObj,vars = x);aggregate(exp,by=list("layer"=paste0(SeuObj$Time,"_",SeuObj$rank)),mean)})
 7 | #test<-bind_cols(result, .name_repair="unique")
 8 | #saveRDS(test,file="layer_exp.rds")
 9 | 
10 | result=lapply(x,function(x){exp=FetchData(SeuObj,vars = x);aggregate(exp,by=list("layer"=paste0(SeuObj$sample,"_",SeuObj$rank)),mean)})
11 | test<-bind_cols(result, .name_repair="unique")
12 | saveRDS(test,file="sample_layer_exp.rds")
13 | 
14 | #result=lapply(x,function(x){exp=FetchData(SeuObj,vars = x);aggregate(exp,by=list("layer"=SeuObj$sample),mean)})
15 | #test<-bind_cols(result, .name_repair="unique")
16 | #saveRDS(test,file="sample_exp.rds")
17 | 
18 | #df<-readRDS("sample_layer_exp.rds")
19 | rownames(df)=df[,1]
20 | df=df[,-1]
21 | df=t(df)
22 | res=apply(df,2,as.numeric)
23 | res=as.data.frame(res)
24 | rownames(res)=rownames(df)
25 | res=res[complete.cases(res),]
26 | cor_res=cor(res)
27 | pdf("sample_layer_cor.pdf",width=30,height=30)
28 | pheatmap::pheatmap(cor_res[order(sub("_.*_","_",sub("[^_]*_","",rownames(cor_res)))),][,order(sub("_.*_","_",sub("[^_]*_","",colnames(cor_res))))][c(91:135,37:90,136:306),c(91:135,37:90,136:306)],cluster_rows=F,cluster_cols=F)
29 | dev.off()
30 | write.table(cor_res[order(sub("_.*_","_",sub("[^_]*_","",rownames(cor_res)))),][,order(sub("_.*_","_",sub("[^_]*_","",colnames(cor_res))))][c(91:135,37:90,136:306),c(91:135,37:90,136:306)],file="sample_layer_cor.xls",row.names=T,col.names=T,sep="\t")
31 | 


--------------------------------------------------------------------------------
/03.scRNAseq_clustering_scanpy.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | 
 5 | import scanpy as sc
 6 | import os
 7 | import pandas as pd
 8 | import numpy as np
 9 | import matplotlib
10 | import matplotlib.pyplot as plt
11 | import scanpy.external as sce
12 | 
13 | 
14 | 
15 | adata = sc.read('./01.qc/PHx.combine.h5ad')     # Input adata file after quality control.
16 | 
17 | adata.raw.var.index = pd.Index(adata.var['features'])
18 | adata.var.index = pd.Index(adata.var['features'])
19 | adata.obs['Library'] = adata.obs['split'].tolist()
20 | adata.var['mt'] = adata.var_names.str.startswith('mt-') 
21 | sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, use_raw=True, log1p=False, inplace=True)
22 | 
23 | sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts','pct_counts_mt'],
24 |              jitter=0.4, multi_panel=True)
25 | 
26 | bdata = adata[(adata.obs.n_genes_by_counts < 4000) &( adata.obs.n_genes_by_counts > 500 )& (adata.obs.total_counts < 20000) & (adata.obs.total_counts > 1000) & (adata.obs.pct_counts_mt < 10), :]
27 | sc.pl.violin(bdata, ['n_genes_by_counts', 'total_counts','pct_counts_mt'],
28 |              jitter=0.4, multi_panel=True)
29 | 
30 | sc.pp.highly_variable_genes(bdata, min_mean=0.0125, max_mean=3, min_disp=0.5)    # Find highly variable genes.
31 | bdata = bdata[:, bdata.var.highly_variable]
32 | sc.pp.regress_out(bdata, ['total_counts', 'pct_counts_mt'], n_jobs = 50)        # Data normalization.
33 | sc.pp.scale(bdata, max_value=10)                                                # Data scaling.
34 | sc.tl.pca(bdata, svd_solver='arpack', use_highly_variable = True)               # Dimmesion reduction.
35 | sce.pp.harmony_integrate(bdata, key = 'Library', basis='X_pca', adjusted_basis='X_pca_harmony')              # Remove batch effect.
36 | sc.pp.neighbors(bdata, use_rep = 'X_pca_harmony', n_neighbors=10, n_pcs=40)                                  # Find cell neighbors.
37 | sc.tl.umap(bdata)                                                                                            # Embedded cells in a 2-D space.
38 | bdata.write_h5ad('PHx.combine_filter.h5ad')
39 | 
40 | 
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/00.Find_zonation_pathway_phyper.test.R:
--------------------------------------------------------------------------------
 1 | pathway=read.table("all.kegg.pathway.list",sep="\t")            # Input gene lists of pathways.
 2 | pathways=apply(as.matrix(pathway$V2),1,function(x){strsplit(x,split=",")[[1]]})
 3 | names(pathways)=pathway$V1
 4 | 
 5 | zonated_gene=read.table("zonated.gene_Halpern.list")            # Input zonation gene list
 6 | zonated_gene=zonated_gene$V1 
 7 | exp=readRDS("layer_exp_change.rds")                             # Input layer averaged gene expression
 8 | zonated_gene=zonated_gene[zonated_gene %in% rownames(exp)]
 9 | library(dplyr)
10 | pvalues=lapply(pathways,function(x){1-phyper(length(x[x %in% zonated_gene]),length(zonated_gene),nrow(exp),length(x))})    # Hypergeometric test of genes of each pathway against zonation gene
11 | df=t(as.data.frame(pvalues))
12 | rownames(df)=names(pvalues)
13 | #write.table(df[df[,1]<0.05,],sep="\t",quote=F,file="pathway_phyper.list")
14 | pathw_gene=lapply(pathways,function(x){paste0(x[x %in% zonated_gene],collapse=",")})
15 | df1=t(as.data.frame(pathw_gene))
16 | rownames(df1)=names(pathw_gene)
17 | df2=merge(df,df1,by=0)
18 | rownames(df2)=df2$Row.names
19 | df2=df2[,-1]
20 | colnames(df2)=c("Pvalue","genes")
21 | #write.table(df2[df2[,1]<0.05,],sep="\t",quote=F,file="pathway_phyper.list")
22 | write.table(df2[df2[,1]<0.05,],sep="\t",quote=F,file="pathway_phyper_1.list")
23 | res=apply(as.matrix(df2[df2$Pvalue<0.05,]),1,function(x){genes=strsplit(x[2],split=",")[[1]];if(length(genes)>1){colSums(exp[genes,19:27])}else{exp[genes,19:27]}})
24 | res1=as.data.frame(t(res))
25 | 
26 | pathway_gene=lapply(pathways,function(x){paste0(x,collapse=",")})
27 | tab=cbind(df2[rownames(res1),],res1,as.character(pathw_gene[rownames(res1)]),as.character(pathway_gene[rownames(res1)]))
28 | colnames(tab)=c("Pvalue","zonated genes",colnames(tab)[3:11],"zonated genes","all genes")
29 | tab=tab[,-2]
30 | tab$number_zg=apply(as.matrix(tab$`zonated gene`),1,function(x){length(strsplit(x,",")[[1]])})
31 | tab$number_gene=apply(as.matrix(tab$`all genes`),1,function(x){length(strsplit(x,",")[[1]])})
32 | tab$qvalue=qvalue::qvalue(tab$Pvalue, lambda = seq(0, max(tab$Pvalue), 0.05))$qvalue
33 | tab=tab[,c(1,15,13,14,2:10,11,12)]
34 | write.table(tab,row.names=T,file="pathway_phyper_table_halpern.tsv",sep="\t",quote=F)
35 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # LISTA (LIver Spatio-Temporal Atlas)
 2 | ## Codes used in LISTA project.
 3 | 
 4 | ### CITE: Xu, J., Guo, P., Hao, S. et al. A spatiotemporal atlas of mouse liver homeostasis and regeneration. Nat Genet (2024). https://doi.org/10.1038/s41588-024-01709-7
 5 | 
 6 | ### Database: https://db.cngb.org/stomics/lista/
 7 | 
 8 | ![image](https://github.com/haoshijie13/LISTA/assets/59014440/92db2bcd-39fd-4bbb-906c-ed2e4b0f0e5c)
 9 | 
10 | **Step 1:** Run 00.Stereo_seq_Matrix2SeuratObject-pipeline.R to create Seurat objects. You can run SAW analysis pipeline (https://github.com/STOmics/SAW) to create matrix or download the processed matrix from our database: https://db.cngb.org/stomics/lista/download/
11 | 
12 | **Step 2:** Run 00.cut_zonation_layer_and_pathway_module_score.R to split spots into 9 zonation layers. The pathway module score can be add simutaneously. 
13 | 
14 | **Step 3:** Run 00.Ligand_receptor_interaction_zonation_analysis.R to calculate interaction score of ligand & receptor pairs. The ligand receptor pairs used in our study was provided in the mouse_lr_pair.txt file. You can investigate all of them or a part.
15 | 
16 | **Step 4:** Run 00.Find_zonation_pathway_phyper.test.R to detect pathways enriched with zonation genes.
17 | 
18 | **Step 5:** Run 00.run_RCTD.R to calculate cell type projection score of scRNAseq on Stereo-seq data. You can find a detailed tutorial from RCTD offtial website: https://github.com/dmcable/spacexr
19 | 
20 | **Step 6:** Run 00.run_scenic.py to calculate gene regulatory network. You can find a detailed tutorial from SCENIC offtial website: https://pyscenic.readthedocs.io/en/latest/
21 | 
22 | **Step 7:** Run 00.run_hotspot.py to calculate gene coexpression modules base on their expression pattern. You can find a detailed tutorial from Hotspot offtial website: https://yoseflab.github.io/Hotspot/
23 | 
24 | **Step 8:** Run 01.TBL1XR1.bulkRNAseq.preprocessing.sh to get raw bulk RNAseq matrix of TBL1XR1 purturbation data.
25 | 
26 | **Step 9:** Run 01.TBL1XR1.bulkRNAseq.DEseq.r to get differential expressed genes of TBL1XR1 purturbation data.
27 | 
28 | **Step 10:** Run 02.ATAC_chipseq_preprocessing.sh to get chromatin modification regions of ATAC or Chip-seq data.
29 | 
30 | **Step 11:** Run 02.ATAC_chipseq_Motif_scan.r to get putative binding motifs of specific chromatin modification regions.
31 | 
32 | **Step 12:** Run 03.scRNAseq_clustering_scanpy.py to cluster scRNAseq datasets.
33 | 
34 | 


--------------------------------------------------------------------------------
/00.Ligand_receptor_interaction_zonation_analysis.R:
--------------------------------------------------------------------------------
 1 | args<-commandArgs(T)
 2 | library(Seurat)
 3 | SeuObj<-readRDS("add_Time_sample_change_marker_rank_factor.rds")  # Load the seurat object you want to deal with.
 4 | sub=subset(SeuObj,subset=Time==args[1])  # Select the dataset part for further analysis. OPTIONAL.
 5 | sub$group=as.integer(sample(1:nrow(sub@meta.data)/9/50,nrow(sub@meta.data),replace=T))  # Generate a random group label for each bin spot.
 6 | #lr<-read.table("../28.ligand/mouse_lr_pair.txt",header=T)
 7 | lr<-read.table("manual.lr.list",header=T)  # Read the ligand receptor list in this reportoiry or input a subset you want to deal with. 
 8 | genes=unique(c(lr[,2],lr[,3]))
 9 | genes=genes[genes %in% rownames(sub@assays$RNA@counts)]
10 | 
11 | #grp=cut(1:nrow(sub@assays$RNA@data),breaks = 100,labels = 1:100)
12 | #x=split(rownames(sub@assays$RNA@data),f = grp)
13 | 
14 | exp_mat=aggregate(as.data.frame(t(as.matrix(sub@assays$RNA@counts[genes,]))),by=list("rank"=sub$rank,"group"=sub$group),mean)  # Aggregate the gene expression based on layer and group layer. Each layer will be split into random 50 groups.
15 | 
16 | res=apply(as.matrix(lr[ lr$ligand_gene_symbol %in% colnames(exp_mat) & lr$receptor_gene_symbol %in% colnames(exp_mat),]),1,function(x){x=as.vector(x); exp_mat[,x[2]] * exp_mat[,x[3]] })  # Multiply the ligand gene expression by the corresponded receptor gene expression of each group as the interaction strength score.
17 | colnames(res)=lr[ lr$ligand_gene_symbol %in% colnames(exp_mat) & lr$receptor_gene_symbol %in% colnames(exp_mat),"lr_pair"]
18 | res=as.data.frame(res)
19 | res$rank=exp_mat$rank
20 | pvalues=apply(as.matrix(colnames(res)[1:(ncol(res)-1)]),1,function(x){test=kruskal.test( get(x) ~ rank, data = res);test$p.value})    # KW test
21 | names(pvalues)=colnames(res)[1:(ncol(res)-1)]
22 | test=aggregate(res,by=list("rank"=res$rank),mean)
23 | test=as.data.frame(test)
24 | 
25 | pdf(paste0("manual_lxr.",args[1],".pdf"),height=length(na.omit(pvalues[pvalues<0.05]))/5.5,width=5)
26 | pheatmap::pheatmap(t(test[,names(na.omit(pvalues[pvalues<0.05]))]),scale="row",cluster_cols=F,cluster_rows=T)
27 | dev.off()
28 | #df=as.data.frame(cbind(t(test[,names(na.omit(pvalues[pvalues<0.05]))]),na.omit(pvalues[pvalues<0.05])))
29 | df=as.data.frame(cbind(t(test[,names(pvalues)]),pvalues))
30 | colnames(df)=c(paste0("layer_",1:9),"Pvalue")
31 | write.table(df,file=paste0("manual_lxr.",args[1],".table.xls"),sep="\t",quote=F,row.names=T,col.names=T)
32 | 


--------------------------------------------------------------------------------
/00.run_hostspot.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | import sys
 4 | import numpy as np
 5 | import pandas as pd
 6 | import hotspot
 7 | import matplotlib.pyplot as plt
 8 | import matplotlib.colors
 9 | import seaborn as sns
10 | import pickle
11 | # Load the counts and positions
12 | counts_file = sys.argv[2]      # input the cell×gene matrix
13 | pos_file = sys.argv[3]         # input the cell×spatial position matrix
14 | OUTDIR = "."
15 | NAME = sys.argv[1]             # output prefix
16 | 
17 | HS_RESULTS = ''.join([OUTDIR,"/",NAME,"_hs_results.p"])
18 | LCZ = ''.join([OUTDIR, "/", NAME, "_lcz.p"])
19 | MODULES = ''.join([OUTDIR, "/", NAME, "_modules.p"])
20 | HOTSPOT = ''.join([OUTDIR, "/", NAME, "_hotspot.p"])
21 | pos = pd.read_csv(pos_file, index_col=0)
22 | counts = pd.read_csv(counts_file, index_col=0) # Takes a while, ~10min
23 | # Align the indices
24 | counts = counts.loc[:, pos.index]
25 | barcodes = pos.index.values
26 | # Swap position axes
27 | # We swap x'=y and y'=-x to match the slides in the paper
28 | pos = pd.DataFrame(
29 |     {
30 |         'X': pos.X,
31 |         'Y': pos.Y,
32 |     }, index=pos.index
33 | )
34 | num_umi = counts.sum(axis=0)
35 | # Filter genes
36 | #gene_counts = (counts > 0).sum(axis=1)
37 | #valid_genes = gene_counts >= 50
38 | #counts = counts.loc[valid_genes]
39 | 
40 | 
41 | # Create the Hotspot object and the neighborhood graph
42 | hs = hotspot.Hotspot(counts, model='normal', latent=pos)
43 | 
44 | hs.create_knn_graph(
45 |     weighted_graph=False, n_neighbors=5,
46 | )
47 | 
48 | 
49 | hs_results = hs.compute_autocorrelations(jobs=20)
50 | 
51 | with open(HS_RESULTS, "wb") as f:
52 |     pickle.dump(hs_results,f)
53 | 
54 | 
55 | #select the genes with significant spatial autocorrelation
56 | hs_genes = hs_results.index[hs_results.FDR < 0.05]
57 | 
58 | # Compute pair-wise local correlations between these genes
59 | lcz = hs.compute_local_correlations(hs_genes, jobs=20)
60 | 
61 | with open(LCZ, "wb") as f:
62 |     pickle.dump(lcz,f)
63 | 
64 | 
65 | modules = hs.create_modules(
66 |     min_gene_threshold=5, core_only=False, fdr_threshold=0.05
67 | )
68 | 
69 | with open(MODULES, "wb") as f:
70 |     pickle.dump(modules, f)
71 | 
72 | 
73 | #with open(HOTSPOT, "wb") as f:
74 | #    pickle.dump(hs,f)
75 | 
76 | results = hs.results.join(hs.modules)
77 | results.to_csv("".join([sys.argv[1],"-Regulon2Gene.csv"]))
78 | 
79 | module_scores = hs.calculate_module_scores()
80 | module_scores.to_csv("".join([sys.argv[1],"-module_score.csv"]))
81 | 
82 | plt.rcParams['figure.figsize'] = (15.0, 12.0)
83 | hs.plot_local_correlations()
84 | plt.savefig("".join([sys.argv[1],"-regulon_module_number.pdf"]), dpi = 600)
85 | 


--------------------------------------------------------------------------------
/00.run_scenic.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | import pickle
 4 | import pandas as pd
 5 | import numpy as np
 6 | import sys
 7 | 
 8 | #######################################################################################################
 9 | ################## You can find a detailed tutorial from SCENIC offtial website.#######################
10 | #######################################################################################################
11 | 
12 | 
13 | args = sys.argv
14 | print(args[1])
15 | 
16 | from dask.diagnostics import ProgressBar
17 | if __name__ == '__main__':
18 |     ProgressBar = ProgressBar()
19 | from arboreto.utils import load_tf_names
20 | from arboreto.algo import grnboost2
21 | from pyscenic.rnkdb import FeatherRankingDatabase as RankingDatabase
22 | from pyscenic.utils import modules_from_adjacencies, load_motifs
23 | from pyscenic.prune import prune2df, df2regulons
24 | from pyscenic.aucell import aucell
25 | import seaborn as sns
26 | import dask
27 | dask.config.set(num_workers=80)
28 | if __name__ == '__main__':
29 |         print(os.getcwd())
30 |         ex_matrix = pd.read_table(args[1], sep='\t', header=0, index_col=0).T  # Read cell × gene matrix.
31 |         print(ex_matrix)
32 |         ##################################################################
33 |         ########### We used mm10 database provided by SCENIC #############
34 |         ##################################################################
35 |         DATA_FOLDER="./"
36 |         RESOURCES_FOLDER="liver_zonation/SCENIC"
37 |         DATABASE_FOLDER="SCENIC/motif"
38 | #       SCHEDULER="123.122.8.24:8786"
39 |         DATABASES_GLOB = os.path.join(DATABASE_FOLDER, "mm10_*.mc9nr.feather")
40 |         MOTIF_ANNOTATIONS_FNAME = os.path.join(RESOURCES_FOLDER, "motifs-v9-nr.mgi-m0.001-o0.0.tbl")
41 |         MM_TFS_FNAME = os.path.join(RESOURCES_FOLDER, 'mm_mgi_tfs.txt')
42 |         REGULONS_FNAME = ''.join([ DATA_FOLDER, args[1],"_regulons.p"])
43 |         MOTIFS_FNAME = ''.join([ DATA_FOLDER, args[1], "_motifs.txt"])
44 |         ADJACENCIES_FNAME = ''.join([DATA_FOLDER, args[1], "_adjacencies.txt"])
45 |         MODULES_FNAME = ''.join([DATA_FOLDER, args[1], "_modules.p"])
46 |         AUC_FNAME = ''.join([DATA_FOLDER, args[1], "_AUC.txt"])
47 | 
48 |         print(REGULONS_FNAME)
49 |         print(MOTIFS_FNAME)
50 | 
51 |         tf_names = load_tf_names(MM_TFS_FNAME)    # Load database.
52 |         db_fnames = glob.glob(DATABASES_GLOB)
53 |         def name(fname):
54 |             return os.path.splitext(os.path.basename(fname))[0]
55 |         dbs = [RankingDatabase(fname=fname, name=name(fname)) for fname in db_fnames]
56 | 
57 |         print(dbs)
58 |         adjancencies = grnboost2(expression_data=ex_matrix, tf_names=tf_names, verbose=True)    # Calculate gene coexpression relationships.
59 |         adjancencies.to_csv(ADJACENCIES_FNAME, index=False, sep='\t')
60 |         modules = list(modules_from_adjacencies(adjancencies, ex_matrix))
61 |         with open(MODULES_FNAME, 'wb') as f:
62 |             pickle.dump(modules, f)
63 |         df = prune2df(dbs, modules, MOTIF_ANNOTATIONS_FNAME)
64 |         df.head()
65 |         df.to_csv(MOTIFS_FNAME)
66 |         regulons = df2regulons(df)
67 |         with open(REGULONS_FNAME, 'wb') as f:
68 |             pickle.dump(regulons, f)
69 |         auc_mtx = aucell(ex_matrix, regulons, num_workers=5)
70 |         auc_mtx.to_csv(AUC_FNAME, sep = "\t")
71 | 


--------------------------------------------------------------------------------
/00.Stereo_seq_Matrix2SeuratObject-pipeline.R:
--------------------------------------------------------------------------------
 1 | library("Seurat")
 2 | library("ggplot2")
 3 | library("reshape2")
 4 | library("tidyr")
 5 | library(data.table)
 6 | library(Matrix)
 7 | # library("future")
 8 | # plan("multiprocess", workers = 30)
 9 | options(future.globals.maxSize=100000000000)
10 | args=commandArgs(T)         # input the matrix file as first parameter which contain at least four column: geneID, x, y, and MIDCount (UMI).
11 | base=basename(args[1])
12 | dir=dirname(args[1])
13 | prefix=sub(".txt","",base) 
14 | 
15 | read<-function(mat,bin){
16 |         data<-fread(mat,header=T)        # fread is a paralleled processing command
17 |         data$cellID<-paste(as.character(round(data$x/bin, digits = 0)),"_",as.character(round(data$y/bin, digits = 0)),sep="")        # Aggregated DNBs in each bin spot 
18 |         gene=unique(data$geneID)
19 |         cell=unique(data$cellID)
20 |         gene_idx=c(1:length(gene))
21 |         cell_idx=c(1:length(cell))
22 |         names(gene_idx)=gene
23 |         names(cell_idx)=cell
24 |         print(head(gene_idx[data$geneID]))
25 |         data=as.data.frame(data)
26 |         mat=sparseMatrix(i=gene_idx[data$geneID],j=cell_idx[data$cellID],x=data[,4])        # Create SparseMatrix from the matrix file
27 |         rownames(mat)=gene
28 |         colnames(mat)=cell
29 |         return(mat)
30 | }
31 | 
32 | analyze <- function(mat,bin){
33 |         data=mat
34 |         data[is.na(data)]=0
35 |         print("ok")
36 |         SeuObj<-CreateSeuratObject(counts = data, names.delim = "-", project = "SeuObj")
37 |         SeuObj[["percent.mt"]] <- PercentageFeatureSet(SeuObj, pattern = "^mt-|^MT-|^Mt-")
38 |         #save(SeuObj,file=paste(dir,"/bin",bin,".Rdata",sep=""))
39 |         #q()
40 | 
41 |         # What below is Seurat recommend analysis pipeline
42 |         
43 |         SeuObj <- NormalizeData(SeuObj, normalization.method = "LogNormalize", scale.factor = 10000)
44 |         SeuObj <- FindVariableFeatures(SeuObj, selection.method = "vst", nfeatures = 2000)
45 |         all.genes <- rownames(SeuObj)
46 |         SeuObj <- ScaleData(SeuObj, features = all.genes, vars.to.regress = "nCount_RNA")
47 | #        SeuObj <- SCTransform(SeuObj, vars.to.regress = "percent.mt", verbose = FALSE)
48 |         SeuObj <- RunPCA(SeuObj, verbose = FALSE)
49 |         SeuObj <- RunUMAP(SeuObj, dims = 1:15, verbose = FALSE)
50 |         SeuObj <- FindNeighbors(SeuObj, dims = 1:15, verbose = FALSE)
51 |         SeuObj <- FindClusters(SeuObj, verbose = FALSE)
52 |         SeuObj@meta.data$coor_x=sub(rownames(SeuObj@meta.data),pattern = "_.*",replacement = "")
53 |         SeuObj@meta.data$coor_y=sub(rownames(SeuObj@meta.data),pattern = ".*_",replacement = "")
54 |         SeuObj@meta.data$coor_x=sub(SeuObj@meta.data$coor_x,pattern = "X",replacement = "")
55 |         SeuObj@meta.data$coor_x=as.integer(SeuObj@meta.data$coor_x)
56 |         SeuObj@meta.data$coor_y=as.integer(SeuObj@meta.data$coor_y)
57 |         AllMG <<- FindAllMarkers(SeuObj)
58 |         SeuObj <<- SeuObj
59 | #       if(bin==100){
60 | #               print(mean(SeuObj@meta.data$nFeature_RNA))
61 | #               print(mean(SeuObj@meta.data$nCount_RNA))
62 | #               count = as.data.frame(SeuObj@assays$RNA@counts)
63 | #               count_1=count>0
64 | #               count_1=as.data.frame(count_1)
65 | #               print(mean(rowSums(as.data.frame(count_1)))/ncol(count_1))
66 | #       }
67 |     filename=paste(dir,"/bin",bin,".MG",sep="")
68 |     write.table(AllMG,file=filename)
69 |     save(SeuObj,file=paste(dir,"/bin",bin,".Rdata",sep=""))
70 | }
71 | 
72 | bins=c(100,50)
73 | for(bin in bins){
74 |         mat<-read(args[1],bin)         # You can choose the bin size which you prefer
75 |         analyze(mat,bin)
76 | }
77 | 


--------------------------------------------------------------------------------
/02.ATAC_chipseq_Motif_scan.r:
--------------------------------------------------------------------------------
  1 | library(motifmatchr)
  2 | 
  3 | bed_to_granges <- function(file){
  4 |    df <- read.table(file,
  5 |                     header=F,
  6 |                     stringsAsFactors=F)
  7 |  
  8 |    if(length(df) > 6){
  9 |       df <- df[,-c(7:length(df))]
 10 |    }
 11 |  
 12 |    if(length(df)<3){
 13 |       stop("File has less than 3 columns")
 14 |    }
 15 |  
 16 |    header <- c('chr','start','end','id','score','strand')
 17 |    names(df) <- header[1:length(names(df))]
 18 |  
 19 |    if('strand' %in% colnames(df)){
 20 |       df$strand <- gsub(pattern="[^+-]+", replacement = '*', x = df$strand)
 21 |    }
 22 |  
 23 |    library("GenomicRanges")
 24 |  
 25 |    if(length(df)==3){
 26 |       gr <- with(df, GRanges(chr, IRanges(start, end)))
 27 |    } else if (length(df)==4){
 28 |       gr <- with(df, GRanges(chr, IRanges(start, end), id=id))
 29 |    } else if (length(df)==5){
 30 |       gr <- with(df, GRanges(chr, IRanges(start, end), id=id, score=score))
 31 |    } else if (length(df)==6){
 32 |       gr <- with(df, GRanges(chr, IRanges(start, end), id=id, score=score, strand=strand))
 33 |    }
 34 |    return(gr)
 35 | }
 36 | 
 37 | prepareMotifmatchr <- function(genome, motifs){
 38 | 	res <- list()
 39 | 
 40 | 	# get the species name and the genome sequence object based on the object
 41 | 	genomeObj <- genome
 42 | 	if (!is.element("BSgenome", class(genomeObj))){
 43 | 		genomeObj <- getGenomeObject(genome)
 44 | 	}
 45 | 	spec <- organism(genomeObj)
 46 | 
 47 | 	# get the motif PWMs
 48 | 	motifL <- TFBSTools::PWMatrixList()
 49 | 	if (is.character(motifs)){
 50 | 		if (is.element("jaspar", motifs)){
 51 | 			# copied code from chromVAR, but updated the JASPAR version
 52 | 			opts <- list()
 53 | 			opts["species"] <- spec
 54 | 			opts["collection"] <- "CORE"
 55 | 			# gets the non-redundant set by default
 56 | 			mlCur <- TFBSTools::getMatrixSet(JASPAR2018::JASPAR2018, opts)
 57 | 			if (!isTRUE(all.equal(TFBSTools::name(mlCur), names(mlCur)))){
 58 | 				names(mlCur) <- paste(names(mlCur), TFBSTools::name(mlCur), sep = "_")
 59 | 			} 
 60 | 			motifL <- c(motifL, TFBSTools::toPWM(mlCur))
 61 | 		}
 62 | 		if (is.element("jaspar_vert", motifs)){
 63 | 			# JASPER for all vertebrate TFBS
 64 | 			opts <- list()
 65 | 			opts["tax_group"] <- "vertebrates"
 66 | 			opts["collection"] <- "CORE"
 67 | 			# gets the non-redundant set by default
 68 | 			mlCur <- TFBSTools::getMatrixSet(JASPAR2020::JASPAR2020, opts)
 69 | 			if (!isTRUE(all.equal(TFBSTools::name(mlCur), names(mlCur)))){
 70 | 				names(mlCur) <- paste(names(mlCur), TFBSTools::name(mlCur), sep = "_")
 71 | 			} 
 72 | 			motifL <- c(motifL, TFBSTools::toPWM(mlCur))
 73 | 		}
 74 | 		if (is.element("jaspar2016", motifs)){
 75 | 			motifL <- c(motifL, TFBSTools::toPWM(chromVAR::getJasparMotifs(species=spec)))
 76 | 		}
 77 | 		if (is.element("homer", motifs)){
 78 | 			if (!requireNamespace("chromVARmotifs")) logger.error(c("Could not load dependency: chromVARmotifs"))
 79 | 			data("homer_pwms")
 80 | 			motifL <- c(motifL, chromVARmotifs::homer_pwms)
 81 | 		}
 82 | 		if (is.element("encode", motifs)){
 83 | 			if (!requireNamespace("chromVARmotifs")) logger.error(c("Could not load dependency: chromVARmotifs"))
 84 | 			data("encode_pwms")
 85 | 			motifL <- c(motifL, chromVARmotifs::encode_pwms)
 86 | 		}
 87 | 		if (is.element("cisbp", motifs)){
 88 | 			if (!requireNamespace("chromVARmotifs")) logger.error(c("Could not load dependency: chromVARmotifs"))
 89 | 			if (spec == "Mus musculus"){
 90 | 				data("mouse_pwms_v1")
 91 | 				motifL <- c(motifL, chromVARmotifs::mouse_pwms_v1)
 92 | 			} else if (spec == "Homo sapiens"){
 93 | 				data("human_pwms_v1")
 94 | 				motifL <- c(motifL, chromVARmotifs::human_pwms_v1)
 95 | 			} else {
 96 | 				logger.warning(c("Could not find cisBP annotation for species", spec))
 97 | 			}
 98 | 		}
 99 | 		if (is.element("cisbp_v2", motifs)){
100 | 			if (!requireNamespace("chromVARmotifs")) logger.error(c("Could not load dependency: chromVARmotifs"))
101 | 			if (spec == "Mus musculus"){
102 | 				data("mouse_pwms_v2")
103 | 				motifL <- c(motifL, chromVARmotifs::mouse_pwms_v2)
104 | 			} else if (spec == "Homo sapiens"){
105 | 				data("human_pwms_v2")
106 | 				motifL <- c(motifL, chromVARmotifs::human_pwms_v2)
107 | 			} else {
108 | 				logger.warning(c("Could not find cisBP annotation for species", spec))
109 | 			}
110 | 		}
111 | 		if (length(motifL) < 1) {
112 | 			logger.error(c("No motifs were loaded. Unsupported motifs (?) :", motifs))
113 | 		}	
114 | 	} else if (is.element("PWMatrixList", class(motifs)) || is.element("PFMatrixList", class(motifs))) {
115 | 		motifL <- motifs
116 | 	} else {
117 | 		logger.error(c("unsupported value for motifs:", motifs))
118 | 	}	
119 | 	res[["genome"]] <- genomeObj
120 | 	res[["motifs"]] <- motifL
121 | 	return(res)
122 | }
123 | 
124 | 
125 | peak = '/mnt/4/liver_project/atac/01.align/male_liver/Tbl1xr1.anno.bed'
126 | peaks <- bed_to_granges(peak)
127 | cisbp_motif <- prepareMotifmatchr("mm10", "cisbp")$motifs  
128 | sel_motif = c('Jun','Egr1','Fos','Cebpd')
129 | mtf_set = c()
130 | for ( f in sel_motif){
131 |     sel_motif_s <- cisbp_motif[grep(f,names(cisbp_motif))] 
132 |     mtf_set = append(mtf_set, sel_motif_s)
133 | }
134 | motif_pos <- matchMotifs(mtf_set, peaks, genome = "mm10", 
135 |                          out = "positions") 
136 | 


--------------------------------------------------------------------------------
/00.cut_zonation_layer_and_pathway_module_score.R:
--------------------------------------------------------------------------------
 1 | pathways<-list("acylglycerol metabolic process"=c("Apoa1","Apof","Cdk8","Cps1","Insig1","Ldlr","Pcsk9","Plb1","Plce1"),"ATP metabolic process"=c("Afg1l","Ak4","Aldob","Ampd3","Apoc3","Atp5a1","Atp5b","Atp5c1","Atp5e","Atp5g1","Atp5g3","Atp5h","Atp5j","Atp5j2","Atp5k","Atp5l","Cfh","Cox4i1","Cox5a","Cox5b","Cox7a2","Cox7a2l","Cox7c","Cyc1","Cycs","Ddit4","Dnm1l","Fbp1","Gm10358","Gm3839","Hk1","Hspa8","Igf1","Khk","Ndufa8","Ndufb6","Ndufb8","Ndufb9","Ndufc2","Ndufs2","Ndufs6","Ndufv2","Park7","Pklr","Prkag2","Sdhd","Slc25a25","Tkfc","Uqcr10","Uqcrfs1","Uqcrh"),"alpha-amino acid metabolic process"=c("Aass","Acmsd","Agxt","Amdhd1","Arg1","Asl","Aspg","Ass1","Cbs","Cth","Ftcd","Gcsh","Gldc","Gls2","Gmps","Gnmt","Got1","Hal","Hnf4a","Kyat1","Kyat3","Kynu","Mat1a","Mccc2","Nox4","Otc","Park7","Ppat","Pycr2","Qdpr","Sdsl","Sephs2","Slc7a7","Tat","Uroc1"),"electron transport chain"=c("Afg1l","Cox4i1","Cox5a","Cox5b","Cox7c","Cyb561","Cyc1","Cycs","Ndufa5","Ndufa8","Ndufb6","Ndufb8","Ndufb9","Ndufc2","Ndufs2","Ndufs6","Ndufv2","Park7","Sdhb","Sdhd","Slc25a12","Uqcr10","Uqcrfs1","Uqcrh"),"generation of precursor metabolites and energy"=c("Aco2","Chchd4","Cox7a1","Dlst","Eno1b","Etfrf1","Fh1","G6pc","G6pdx","Gpi1","Grb10","Idh3g","Lepr","Mdh1","Mybbp1a","Ndufs1","Nfatc4","Ogdh","Oxct1","Per2","Ppargc1a","Ppif","Sdha","Slc25a22","Slc37a2","Suclg1","Tpi1","Uqcrc1"),"organic anion transport"=c("Abat","Abcb1a","Ace","Apoa4","Apoc2","Arg2","Cyp4f18","G6pc","Gipc1","Kmo","Mfsd2a","Nfkbie","P2ry2","Per2","Plin2","Plscr1","Prelid2","Scp2","Slc16a5","Slc25a22","Slc26a1","Slc2a2","Slc37a2","Slc38a2","Slc51b","Slc6a8","Slc7a4","Slc9a3r1"),"cellular ketone metabolic process"=c("Apoa4","Apoc2","Cyp21a1","Cyp4f18","Fdxr","Fh1","Kmo","Mfsd2a","Mlycd","Oxct1","Pdss1","Ppargc1a","Scp2","Slc37a2","Srd5a1","Tdo2"),"arachidonic acid metabolic process"=c("Cyp2a22","Cyp2a4","Cyp2a5","Cyp2b10","Cyp2c37","Cyp2c38","Cyp2c40","Cyp2c69","Cyp2d12","Cyp2d22","Cyp2j6","Cyp4a12a","Cyp4a12b"),"fatty acid metabolic process"=c("Asah2","Cpt2","Cyp2a22","Cyp2a4","Cyp2a5","Cyp2b10","Cyp2c37","Cyp2c38","Cyp2c40","Cyp2c69","Cyp2d12","Cyp2d22","Cyp2j6","Cyp4a12a","Cyp4a12b","Etfa","Lipg","Pdk4","Pla2g10","Pparg","Slc25a17","Slc27a1","Them4","Trib3","Tysnd1"),"anion transmembrane transport"=c("Aacs","Abcd3","Abcd4","Abhd3","Acaa1a","Acaa1b","Acaa2","Acat1","Acat2","Acot1","Acot12","Acot2","Acot3","Acot4","Acot6","Acot8","Acox1","Acsf2","Acsl1","Acsm1","Acsm3","Acsm5","Acss2","Adtrp","Akr1c14","Akr1c20","Akr1c6","Aldh3a2","Avpr1a","Cyb5a","Cyp1a2","Cyp2c23","Cyp2c29","Cyp2c39","Cyp2c50","Cyp2c54","Cyp2c55","Cyp2c67","Cyp2c68","Cyp2d10","Cyp2d9","Cyp2e1","Cyp2g1","Cyp4a10","Cyp4a14","Cyp4a32","Cyp4f14","Cyp4f15","Decr1","Decr2","Dgat2","Ech1","Eci1","Eci2","Elovl1","Elovl3","Ephx2","Fabp1","Fabp2","Gcdh","Gstm4","Hacd1","Hacd2","Hacd3","Hacl1","Hadh","Hsd17b4","Lias","Lonp2","Mlxipl","Pex7","Phyh","Por","Rgn","Slc27a2","Slc27a5","Tmem189"),"organic acid catabolic process"=c("Abcd3","Abcd4","Abhd3","Acaa1a","Acaa1b","Acaa2","Acat1","Acat2","Acot2","Acot4","Acot8","Acox1","Adtrp","Agxt2","Akr1a1","Aldh4a1","Bckdhb","Blmh","Csad","Cyp26a1","Cyp26c1","Cyp4f14","Cyp4f15","Decr1","Eci1","Eci2","Fabp1","Fah","Gcdh","Glud1","Gstz1","Hacl1","Hadh","Hgd","Hibadh","Hmgcl","Hpd","Hsd17b4","Hyal1","Ldhd","Lonp2","Mtrr","Pex7","Phyh","Pon1","Pon3","Prodh","Shmt1","Slc27a2"),"sulfur compound metabolic process"=c("Abcc2","Acaa2","Acat1","Acot1","Acot12","Acot2","Acot3","Acot4","Acot6","Acot8","Acsl1","Acsm1","Acsm3","Acsm5","Acss2","Blmh","Comt","Cs","Csad","Dgat2","Dlat","Enpp1","Gcdh","Gclm","Ghr","Gsta3","Gstm1","Gstm3","Gstm4","Gstm6","Gstt1","Gstz1","Hmgcl","Hsd17b4","Idh1","Lias","Mat2a","Mgst1","Mtrr","Mvk","Nat8","Nfe2l2","Papss2","Pmvk","Sod1","Stat5a","Sult1b1","Sult1d1","Sult1e1","Ugdh"),"alcohol metabolic process"=c("Acer3","Adh1","Akr1a1","Akr1c14","Akr1c20","Akr1c6","Akr7a5","Aldh1a1","Aldh3a2","Apoc1","Apoe","Clcn2","Coq3","Cyp1a2","Cyp27a1","Dgat2","Dpm1","Ebp","Ephx2","Fdx1","Fech","Gba2","H6pd","Idh1","Lcat","Lmf1","Lpcat3","Lrp5","Mvk","Npc1","Nsdhl","P2ry1","Pctp","Plcb1","Pmvk","Pon1","Por","Prkg1","Rbp4","Rdh10","Soat2","Sod1","Sord","Srd5a3","Sult1b1","Sult1e1","Tm7sf2","Ttc39b"),"lipid homeostasis"=c("Abcd1","Abcg8","Acox2","Apoa2","Apoc4","Commd1","Cyp7a1","Gck","Lipc","Mia2","Nr1d1","Nr5a2"),"purine-containing compound metabolic process"=c("Abcd1","Acnat1","Acot7","Acsl5","Adk","Crot","Gamt","Gck","Mif","Mpc1","Mpc2","Nudt2","Pemt","Pipox","Ppara","Prps1l3","Slc25a13","Ttr"),"ER stress"=c("Dnajb11","Dnajb9","Hspa8","Hspa9","Xbp1","Manf","Atf4","Dnajc10"))
 2 | library(Seurat)
 3 | SeuObj<-readRDS("D0.rds")   # Read the Seurat object which you want to deal with
 4 | SeuObj<-AddModuleScore(SeuObj,features=pathways)  # Calculate the module score of each pathway 
 5 | colnames(SeuObj@meta.data)=c(colnames(SeuObj@meta.data)[1:31],names(pathways))
 6 | library(ggplot2)
 7 | dir.create("pathways"); 
 8 | apply(as.matrix(colnames(SeuObj@meta.data)[32:46]),1,function(x){  # apply function runs faster than for loop
 9 |   df=SeuObj@meta.data[SeuObj@meta.data[,x]<quantile(SeuObj@meta.data[,x],0.995)[[1]] & SeuObj@meta.data[,x]>quantile(SeuObj@meta.data[,x],0.005)[[1]],];
10 |   p<-ggplot(df,aes(x=coor_x,y=coor_y,fill=SeuObj@meta.data[,x][SeuObj@meta.data[,x]<quantile(SeuObj@meta.data[,x],0.995)[[1]] & SeuObj@meta.data[,x]>quantile(SeuObj@meta.data[,x],0.005)[[1]]]))+geom_tile()+coord_fixed()+theme_void()+scale_fill_viridis_c(option="B",direction=-1)+labs(fill="");
11 |   ggsave(filename=paste0("pathways/",x,".pdf"),plot=p)
12 | })
13 | 
14 | CV=c("Alb","Aldh1b1","Aldob","Apoa4","Apoa5","Apoc2","Arg1","Asl","Ass1","C9","Cdh1","Cyp2f2","Etnppl","Fbp1","G6pc","Gls2","Gnmt","Hal","Hp","Hpx","Hsd17b6","Hsd17b13","Mfsd2a","Mup20","Pck1","Pigr","Rida","Sds","Selenbp2","Selenop","Serpina1c","Serpina1e","Tdo2","Trf","Ugt2b38","Uox")
15 | PV=c("Akr1c6","Aldh1a1","Aldh3a2","Car3","Ces1c","Csad","Cyb5a","Cyp1a2","Cyp2a5","Cyp2c29","Cyp2c37","Cyp2c40","Cyp2c50","Cyp2c54","Cyp2e1","Cyp3a11","Cyp4a10","Cyp4a14","Glul","Gsta3","Gstm1","Gulo","Lect2","Mgst1","Mup11","Mup15","Mup16","Mup17","Mup18","Oat","Pon1","Rgn","Rnase4","Slc1a2","Slc22a1","Slco1b2","Sord","Ugt1a10","Ugt2b1")
16 | SeuObj=AddModuleScore( SeuObj,features = list("CV"=sample(CV,size = x)),name = "CV")
17 | SeuObj=AddModuleScore( SeuObj,features = list("PV"=sample(PV,size = x)),name = "PV")
18 | SeuObj$rank=cut_number(SeuObj$PV1-SeuObj$CV1,n = 9,label=1:9)    # cut_number function can divide numbers into even groups by their values.
19 | saveRDS(SeuObj,"add.Pathway.layer.rds")
20 | 


--------------------------------------------------------------------------------