├── Graphic-abstrct.png ├── Workflow-figure.png ├── Step0_LRToolsFunction ├── PyMINEr_shell.sh ├── scConnect_shell.sh ├── NATMI_shell.sh ├── CellPhoneDB3_shell.sh ├── CellPhoneDB2_shell.sh ├── cell2cell_shell.sh ├── Domino_shell2.sh ├── Domino_shell1.sh ├── scConnect_python.py ├── cell2cell_python.py ├── ScriptForAddLRscore.R └── Step1_LRToolsFunction.R ├── Step8_LRTToolsFunction ├── HoloNet_shell.sh ├── LRTPrediction_5tools.sh ├── RunSTScript.R ├── HoloNet.py └── Step8_LRTToolsFunction.R ├── Step3_MIForLRBench ├── Step2_ScriptForDLRC.R ├── Step1_ScriptForCalMI.R └── function.R ├── Step4_SIRSIForLRBench ├── ScriptForCalSIRSI.R └── function.R ├── Step6_LRBenchSampling ├── example.sh └── RunScript.R ├── Step9_LRTBench ├── Step2_ScriptForRecord.R ├── Step3_ScriptForTimeMen.R ├── Step1_ScriptForBench.R └── function.R ├── Step7_LRBenchSamplingBench ├── Step3_TimeMenRecord.R ├── Step2_JaccardIndex.R ├── function.R └── Step1_DatasetRecord.R ├── Step1_LRPredictionResult ├── LRPredition_19tools.sh └── RunScript.R ├── Step2_PreSTForLRBench ├── ScriptForCK.R ├── ScriptForCID.R ├── function.R └── ScriptForMouseEymbryo.R ├── Step5_BenchBasedCAGEProteomic ├── function.R ├── ScriptForCAGE.R └── ScriptForProteomics.R └── README.md /Graphic-abstrct.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SunXQlab/ESICCC/HEAD/Graphic-abstrct.png -------------------------------------------------------------------------------- /Workflow-figure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SunXQlab/ESICCC/HEAD/Workflow-figure.png -------------------------------------------------------------------------------- /Step0_LRToolsFunction/PyMINEr_shell.sh: -------------------------------------------------------------------------------- 1 | source /home/ljx/miniconda3/bin/activate pyminer 2 | pyminer.py -i $1 -manual_sample_groups $2 -rand_seed 123 -species $3 3 | -------------------------------------------------------------------------------- /Step0_LRToolsFunction/scConnect_shell.sh: -------------------------------------------------------------------------------- 1 | source /home/ljx/miniconda3/bin/activate cell2cell 2 | /home/ljx/miniconda3/envs/cell2cell/bin/python ../Script/Step0_LRToolsFunction/scConnect_python.py $1 $2 $3 $4 -------------------------------------------------------------------------------- /Step0_LRToolsFunction/NATMI_shell.sh: -------------------------------------------------------------------------------- 1 | cd ./Step0_SharedInfo/NATMI/ 2 | /home/ljx/miniconda3/bin/python ExtractEdges.py --species $1 --emFile $2 --annFile $3 --interDB lrc2p --coreNum 10 --out $4 --interSpecies $1 -------------------------------------------------------------------------------- /Step0_LRToolsFunction/CellPhoneDB3_shell.sh: -------------------------------------------------------------------------------- 1 | source /home/ljx/miniconda3/bin/activate cpdb 2 | cellphonedb method degs_analysis $1 $2 $3 --threshold 0.05 --output-path $4 --threads 10 --counts-data gene_name --debug-seed 123 -------------------------------------------------------------------------------- /Step0_LRToolsFunction/CellPhoneDB2_shell.sh: -------------------------------------------------------------------------------- 1 | source /home/ljx/miniconda3/bin/activate cpdb2 2 | cellphonedb method statistical_analysis $1 $2 --threshold 0.05 --output-path $3 --threads 10 --counts-data gene_name --debug-seed 123 3 | -------------------------------------------------------------------------------- /Step8_LRTToolsFunction/HoloNet_shell.sh: -------------------------------------------------------------------------------- 1 | source /home/ljx/miniconda3/bin/activate HoloNet 2 | /home/ljx/miniconda3/envs/HoloNet/bin/python ~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Script/Step8_LRTToolsFunction/HoloNet.py $1 $2 $3 $4 $5 -------------------------------------------------------------------------------- /Step0_LRToolsFunction/cell2cell_shell.sh: -------------------------------------------------------------------------------- 1 | source /home/ljx/miniconda3/bin/activate cell2cell 2 | /home/ljx/miniconda3/envs/cell2cell/bin/python ~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Script/Step0_LRToolsFunction/cell2cell_python.py $1 $2 $3 $4 -------------------------------------------------------------------------------- /Step0_LRToolsFunction/Domino_shell2.sh: -------------------------------------------------------------------------------- 1 | source /home/ljx/miniconda3/bin/activate pyscenic 2 | 3 | pyscenic grn --num_workers 30 -o $1 $2 $5 --seed 123 4 | 5 | pyscenic ctx $1 $6 $7 --annotations_fname $8 --expression_mtx_fname $2 --mode "dask_multiprocessing" --output $3 --num_workers 30 6 | 7 | pyscenic aucell $2 $3 -o $4 --num_workers 30 --seed 123 -------------------------------------------------------------------------------- /Step0_LRToolsFunction/Domino_shell1.sh: -------------------------------------------------------------------------------- 1 | source /home/ljx/miniconda3/bin/activate pyscenic 2 | 3 | pyscenic grn --num_workers 30 -o $1 $2 $5 --seed 123 4 | 5 | pyscenic ctx $1 $6 $7 $8 $9 ${10} ${11} --annotations_fname ${12} --expression_mtx_fname $2 --mode "dask_multiprocessing" --output $3 --num_workers 30 6 | 7 | pyscenic aucell $2 $3 -o $4 --num_workers 30 --seed 123 -------------------------------------------------------------------------------- /Step3_MIForLRBench/Step2_ScriptForDLRC.R: -------------------------------------------------------------------------------- 1 | rm(list = ls());gc() 2 | source('./Script/Step3_MIForLRBench/function.R') 3 | set.seed(123) 4 | 5 | result.path <- list.files('./Data/Step3_MIPCCForLRBench', full.names = TRUE) 6 | samples <- gsub('_result.rds', '', substring(result.path, 30)) 7 | 8 | tmp <- lapply(seq(result.path), function(i){ 9 | Eval1Result <- Eval1Process(result.path[i]) 10 | Eval1Result <- EvalIndex_DLRC(Eval1Result) 11 | Eval1Result 12 | }) 13 | names(tmp) <- samples 14 | tmp <- do.call(rbind, tmp) 15 | tmp <- tibble::rownames_to_column(tmp, 'datasets') 16 | tmp$datasets <- gsub('\\.[0-9]+', '', tmp$datasets) -------------------------------------------------------------------------------- /Step8_LRTToolsFunction/LRTPrediction_5tools.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | tools="NicheNet MISTy stMLnet HoloNet CytoTalk" 3 | 4 | sampleID='CID44971 CID4465 control_P7 control_P8 UKF243_T_ST UKF260_T_ST UKF266_T_ST UKF334_T_ST' 5 | 6 | for i in $sampleID; 7 | do 8 | fpath='./Step6_LRTPredictionResult/'$i"/" 9 | 10 | if [ ! -d $fpath ] 11 | then 12 | mkdir $fpath 13 | fi 14 | 15 | for j in $tools; 16 | do 17 | fpathout=$fpath"/"$j"/" 18 | if [ ! -d $fpathout ] 19 | then 20 | mkdir $fpathout 21 | fi 22 | 23 | if [[ !($j == 'CytoTalk') ]] 24 | then 25 | /usr/bin/time -v -o $fpathout"TimeMemRecord.txt" /usr/bin/Rscript ../Script/Step8_LRTToolsFunction/RunSTScript.R $j $i human; 26 | else 27 | /usr/bin/time -v -o $fpathout"TimeMemRecord.txt" /home/ljx/software/R-4.1.0/bin/Rscript ../Script/Step8_LRTToolsFunction/RunSTScript.R $j $i human; 28 | fi 29 | 30 | done 31 | done 32 | -------------------------------------------------------------------------------- /Step4_SIRSIForLRBench/ScriptForCalSIRSI.R: -------------------------------------------------------------------------------- 1 | rm(list = ls());gc() 2 | suppressMessages(library(Seurat)) 3 | source('./Script/Step3_MIForLRBench/function.R') 4 | source('./Script/Step4_SIRSIForLRBench/function.R') 5 | set.seed(123) 6 | 7 | datasets <- c('CID4465', 'CID44971', 'CK357', 'CK358', 8 | 'CK368', 'CK162', 'CK362', 'CK361', 9 | 'CK161', 'CK165', 'Slide14') 10 | ck.dataset <- readRDS('~/1-Datasets/Nature_2022_MI/dataset_id.rds') 11 | 12 | # Calculate SI 13 | for(data in datasets){ 14 | print(data) 15 | SIResult <- CalSI_2(data) 16 | saveRDS(SIResult, file = paste0('./Data/Step4_SIRSIForLRBench/', data, '_SIResult.rds')) 17 | } 18 | rm(SIResult, data);gc() 19 | 20 | #Calculate RSI 21 | for (data in datasets) { 22 | print(data) 23 | RSIResult <- CalRSI(data) 24 | saveRDS(RSIResult, file = paste0('./Data/Step4_SIRSIForLRBench/', data, '_RSIResult.rds')) 25 | } 26 | rm(RSIResult, data);gc() 27 | -------------------------------------------------------------------------------- /Step0_LRToolsFunction/scConnect_python.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # In[1]: 5 | 6 | 7 | import pandas as pd 8 | import scanpy as sc 9 | import scConnect as cn 10 | import matplotlib 11 | import matplotlib.pyplot as plt 12 | import sys 13 | import random 14 | random.seed(123) 15 | 16 | counts_path = sys.argv[1] 17 | meta_path = sys.argv[2] 18 | species = sys.argv[3] 19 | output_path = sys.argv[4] 20 | adata = sc.read_csv(counts_path).T 21 | meta = pd.read_csv(meta_path, index_col="Cell") 22 | adata.obs = meta 23 | 24 | adata_tissue = cn.genecall.meanExpression(adata, groupby="Annotation", normalization=False, use_raw=False, transformation="log1p") 25 | adata_tissue = cn.connect.ligands(adata_tissue, organism=species) 26 | adata_tissue = cn.connect.receptors(adata_tissue, organism=species) 27 | adata_tissue = cn.connect.specificity(adata_tissue, n=100, groupby="Annotation", organism=species) 28 | 29 | edges = cn.connect.interactions(emitter=adata_tissue, target=adata_tissue, self_reference=True, organism=species) 30 | nodes = cn.connect.nodes(adata_tissue) 31 | 32 | edge = pd.DataFrame(edges) 33 | col_new = list(edge.iloc[0,2].keys()) 34 | edge.rename(columns={0:'sender', 1:'reciever', 2:'info'}, inplace = True) 35 | for each in col_new: 36 | edge[each] = edge["info"].map(lambda x:x[each]) 37 | 38 | edge.to_csv(output_path) 39 | 40 | -------------------------------------------------------------------------------- /Step6_LRBenchSampling/example.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | tools="CellPhoneDB2 CellPhoneDB3 CellTalker Connectome NATMI ICELLNET scConnect CellChat SingleCellSignalR CellCall scSeqComm NicheNet Domino PyMINEr iTALK scMLnet" 3 | 4 | sampleID='CID44971 CID4465 CK357 CK358 CK161 CK165 CK361 CK362 CK162 CK368 Slide14 pbmc4k pbmc6k pbmc8k' 5 | ratios='90 80 70 60 50' 6 | 7 | for i in $sampleID; 8 | do 9 | for ratio in $ratios; 10 | do 11 | 12 | fpath='./Step10_LRBenchSamplingResult/'$i"_"$ratio"/" 13 | 14 | if [ ! -d $fpath ] 15 | then 16 | mkdir $fpath 17 | fi 18 | for j in $tools; 19 | do 20 | fpathout=$fpath"/"$j"/" 21 | if [ ! -d $fpathout ] 22 | then 23 | mkdir $fpathout 24 | fi 25 | 26 | if [[ "$i" =~ 'Slide' && !($j == 'CytoTalk') ]] 27 | then 28 | /usr/bin/time -v -o $fpathout"TimeMemRecord.txt" /usr/bin/Rscript ../Script/Step10_LRBenchSampling/RunScript.R $j $i mouse $ratio; 29 | elif [[ "$i" =~ 'Slide' && ($j == 'CytoTalk') ]] 30 | then 31 | /usr/bin/time -v -o $fpathout"TimeMemRecord.txt" /home/ljx/software/R-4.1.0/bin/Rscript ../Script/Step10_LRBenchSampling/RunScript.R $j $i mouse $ratio; 32 | elif [[ ($j == 'CytoTalk') ]] 33 | then 34 | /usr/bin/time -v -o $fpathout"TimeMemRecord.txt" /home/ljx/software/R-4.1.0/bin/Rscript ../Script/Step10_LRBenchSampling/RunScript.R $j $i human $ratio; 35 | else 36 | /usr/bin/time -v -o $fpathout"TimeMemRecord.txt" /usr/bin/Rscript ../Script/Step10_LRBenchSampling/RunScript.R $j $i human $ratio; 37 | fi 38 | 39 | done 40 | done 41 | done -------------------------------------------------------------------------------- /Step9_LRTBench/Step2_ScriptForRecord.R: -------------------------------------------------------------------------------- 1 | 2 | files <- list.files('./Data/Step8_LRTBenchResult/record', full.names = TRUE) 3 | record_data <- lapply(files, function(file){ 4 | result <- readRDS(file) 5 | record_methods <- lapply(result, function(method){ 6 | record_cellines <- lapply(method, function(cellines){ 7 | if(dim(cellines)[[1]] == 0){ 8 | text <- 'No rec/lig' 9 | }else{ 10 | tmp <- as.data.frame(table(cellines$label)) 11 | false <- as.numeric(tmp$Freq[which(tmp$Var1==0)]) 12 | true <- as.numeric(tmp$Freq[which(tmp$Var1==1)]) 13 | text <- paste0('F: ', false, '; T: ', true) 14 | } 15 | text 16 | }) 17 | names(record_cellines) <- names(method) 18 | record_cellines 19 | }) 20 | record_methods <- do.call(cbind, record_methods) 21 | record_methods <- as.data.frame(record_methods) 22 | record_methods 23 | }) 24 | names(record_data) <- gsub('\\.rds', '', list.files('./Data/Step8_LRTBenchResult/record')) 25 | 26 | record_data1 <- lapply(seq(5, 29, 2), function(i){ 27 | print(i) 28 | marco <- record_data[[i]] 29 | mal <- record_data[[i+1]] 30 | marco.col <- setdiff(colnames(mal), colnames(marco)) 31 | mal.col <- setdiff(colnames(marco), colnames(mal)) 32 | 33 | if (length(marco.col)>0) { 34 | marco[, marco.col] <- '——' 35 | marco <- marco[, colnames(mal)] 36 | }else if(length(mal.col)>0){ 37 | mal[, mal.col] <- '——' 38 | mal <- mal[, colnames(marco)] 39 | } 40 | tmp <- rbind(marco, mal) 41 | tmp 42 | }) 43 | 44 | names(record_data1) <- gsub('_macro', '', names(record_data)[seq(5, 29, 2)]) 45 | -------------------------------------------------------------------------------- /Step7_LRBenchSamplingBench/Step3_TimeMenRecord.R: -------------------------------------------------------------------------------- 1 | rm(list = ls());gc() 2 | suppressMessages(library(tidyverse)) 3 | source('./Script/Step11_LRBenchSamplingBench/function.R') 4 | set.seed(123) 5 | 6 | DataRecord <- readRDS(file = './Data/Step7_LRBenchSampling/DatasetsRecord.rds') 7 | DataRecord$ratio <- gsub('S', '', DataRecord$ratio) 8 | datasets <- unique(DataRecord$datasets) 9 | methods <- c('CellPhoneDB2', 'CellPhoneDB3', 'CellTalker', 'Connectome', 10 | 'NATMI', 'ICELLNET', 'scConnect', 'CellChat', 'SingleCellSignalR', 11 | 'CellCall', 'scSeqComm', 'NicheNet', 'scMLnet', 'iTALK', 12 | 'RNAMagnet', 'PyMINEr', 'CytoTalk', 'Domino') 13 | ratios <- c(50, 60, 70, 80, 90, 100) 14 | 15 | TimeMenRecord <- lapply(datasets, function(data){ 16 | print(data) 17 | tmp <- lapply(ratios, function(ratio){ 18 | print(ratio) 19 | temp <- RunTimeMemRecord(data, ratio, methods, DataRecord) 20 | temp <- do.call(rbind, temp) 21 | temp <- tibble::rownames_to_column(temp, 'methods') 22 | temp$ratios <- ratio 23 | temp 24 | }) 25 | tmp <- do.call(rbind, tmp) %>% as.data.frame() 26 | tmp$datasets <- data 27 | tmp 28 | }) 29 | TimeMenRecord <- do.call(rbind, TimeMenRecord) %>% as.data.frame() 30 | TimeMenRecord$class <- paste(TimeMenRecord$datasets, TimeMenRecord$ratios, sep = '_') 31 | 32 | DataRecord$class <- paste(DataRecord$datasets, DataRecord$ratio, sep = '_') 33 | DataRecord <- DataRecord[, -c(1:2)] 34 | TimeMenRecord <- merge(TimeMenRecord, DataRecord, by = 'class') 35 | 36 | saveRDS(TimeMenRecord, file = paste0('./Data/Step7_LRBenchSampling/TimeMemRecord.rds')) 37 | -------------------------------------------------------------------------------- /Step8_LRTToolsFunction/RunSTScript.R: -------------------------------------------------------------------------------- 1 | suppressMessages(library(Seurat)) 2 | source('../Script/Step8_LRTToolsFunction/Step8_LRTToolsFunction.R') 3 | set.seed(123) 4 | 5 | args <- commandArgs() 6 | tools <- args[6] 7 | print(tools) 8 | sampleID <- args[7] 9 | print(sampleID) 10 | 11 | if(grepl('CID', sampleID)){ 12 | fpath.dataset <- paste0('/home/ljx/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/ST_Data/', sampleID, '_ser.rds') 13 | receiver <- 'CancerEpithelial' 14 | }else if(grepl('UKF', sampleID)){ 15 | fpath.dataset <- paste0('/home/ljx/1-Datasets/CancerCell_2022_Glioma/ScriptForLRTBenchmark/ST_Data/', sampleID, '_ser.rds') 16 | receiver <- c('macrophages', 'Malignant') 17 | }else{#MI 18 | fpath.dataset <- paste0('/home/ljx/1-Datasets/Nature_2022_MI/ScriptForCCC/ST_Data/', sampleID, '_ser.rds') 19 | receiver <- c('Fibroblast') 20 | } 21 | 22 | dir.output <- paste0('./Step8_LRTPredictionResult/', sampleID) 23 | if(!dir.exists(dir.output)){ 24 | dir.create(dir.output) 25 | } 26 | 27 | fpath.tools <- paste(dir.output, tools, sep = '/') 28 | if(!dir.exists(fpath.tools)){ 29 | dir.create(fpath.tools) 30 | } 31 | 32 | ser <- readRDS(fpath.dataset) 33 | 34 | if(tools == 'CytoTalk'){ 35 | CytoTalk_function(ser, fpath.tools, receiver = receiver) 36 | }else if(tools == 'HoloNet'){ 37 | HoloNet_function(ser, fpath.tools, sampleID, receiver = receiver) 38 | }else if(tools == 'MISTy'){ 39 | MISTy_function(ser, fpath.tools, sampleID, receiver = receiver) 40 | }else if(tools == 'NicheNet'){ 41 | NicheNet_function(ser, fpath.tools, sampleID, receiver = receiver) 42 | }else if(tools == 'stMLnet'){ 43 | stMLnet_function(ser, fpath.tools, sampleID, receiver = receiver) 44 | } -------------------------------------------------------------------------------- /Step1_LRPredictionResult/LRPredition_19tools.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | tools="CellPhoneDB2 CellPhoneDB3 CellTalker Connectome NATMI ICELLNET scConnect CellChat SingleCellSignalR CellCall scSeqComm NicheNet Domino PyMINEr iTALK cell2cell scMLnet" 3 | 4 | sampleID='CID44971 CID4465 CK357 CK358 CK161 CK165 CK361 CK362 CK162 CK368 Slide14 pbmc4k pbmc6k pbmc8k" 5 | 6 | for i in $sampleID; 7 | do 8 | if [[ "$i" =~ 'CID' ]] 9 | then 10 | fpath='./Step1_LRPredictionResult/NG_BC_'$i"/" 11 | elif [[ "$i" =~ 'CK' ]] 12 | then 13 | fpath='./Step1_LRPredictionResult/N_MI_'$i"/" 14 | elif [[ "$i" =~ 'Slide' ]] 15 | then 16 | fpath='./Step1_LRPredictionResult/MouseEmbryo_'$i"/" 17 | else 18 | fpath='./Step1_LRPredictionResult/GSE106487_'$i"/" 19 | m='ST' 20 | n='SSC' 21 | fi 22 | 23 | if [ ! -d $fpath ] 24 | then 25 | mkdir $fpath 26 | fi 27 | for j in $tools; 28 | do 29 | fpathout=$fpath"/"$j"/" 30 | if [ ! -d $fpathout ] 31 | then 32 | mkdir $fpathout 33 | fi 34 | 35 | if [[ "$i" =~ 'Slide' && !($j == 'CytoTalk') ]] 36 | then 37 | /usr/bin/time -v -o $fpathout"TimeMemRecord.txt" /usr/bin/Rscript ../Script/Step1_LRPredictionResult/RunScript.R $j $i mouse; 38 | elif [[ "$i" =~ 'Slide' && ($j == 'CytoTalk') ]] 39 | then 40 | /usr/bin/time -v -o $fpathout"TimeMemRecord.txt" /home/ljx/software/R-4.1.0/bin/Rscript ../Script/Step1_LRPredictionResult/RunScript.R $j $i mouse; 41 | elif [[ ($j == 'CytoTalk') ]] 42 | then 43 | /usr/bin/time -v -o $fpathout"TimeMemRecord.txt" /home/ljx/software/R-4.1.0/bin/Rscript ../Script/Step1_LRPredictionResult/RunScript.R $j $i human; 44 | else 45 | /usr/bin/time -v -o $fpathout"TimeMemRecord.txt" /usr/bin/Rscript ../Script/Step1_LRPredictionResult/RunScript.R $j $i human; 46 | fi 47 | 48 | done 49 | done 50 | -------------------------------------------------------------------------------- /Step2_PreSTForLRBench/ScriptForCK.R: -------------------------------------------------------------------------------- 1 | rm(list = ls()) 2 | suppressWarnings(library(Seurat)) 3 | source('./Script/Step2_PreSTForLRBench/function.R') 4 | set.seed(123) 5 | 6 | samples <- readRDS('~/1-Datasets/Nature_2022_MI/dataset_id.rds') 7 | samples <- samples$ST 8 | 9 | for(sample in samples){ 10 | output.path <- paste0('./Data/Step2_PreSTForLRBench/', sample) 11 | if(!dir.exists(output.path)){ 12 | dir.create(output.path) 13 | } 14 | 15 | ser2 <- readRDS(paste0('~/1-Datasets/Nature_2022_MI/ST_Data/Visium-', sample, '.rds')) 16 | meta <- ser2@meta.data 17 | meta <- meta[, 15, drop = FALSE]; colnames(meta) <- 'celltype' 18 | meta$celltype <- gsub('Cycling.cells', 'Cycling', meta$celltype) 19 | rm(ser2);gc() 20 | 21 | st.path <- paste0('~/1-Datasets/Nature_2022_MI/ST_Data/', sample) 22 | counts <- Read10X_h5(paste0(st.path, '/filtered_feature_bc_matrix.h5')) 23 | img <- Read10X_Image(paste0(st.path, '/spatial')) 24 | 25 | share.barcode <- intersect(rownames(meta), colnames(counts)) 26 | share.barcode <- intersect(share.barcode, rownames(img@coordinates)) 27 | 28 | meta <- meta[share.barcode, , drop = FALSE] 29 | counts <- counts[, rownames(meta)] 30 | img <- img[colnames(counts)] 31 | DefaultAssay(object = img) <- 'Spatial' 32 | 33 | identical(rownames(meta), colnames(counts)) 34 | 35 | ser <- CreateSeuratObject(counts, meta.data = meta, 36 | min.cells = 1, min.features = 1, 37 | assay = 'Spatial') 38 | ser <- SCTransform(ser, assay = "Spatial") 39 | ser[['image']] <- img 40 | saveRDS(ser, file = paste0(output.path, '/STser.rds')) 41 | 42 | close_distant <- CloDistCP(ser) 43 | saveRDS(close_distant, file = paste0(output.path, '/CloDistCP.rds')) 44 | } 45 | -------------------------------------------------------------------------------- /Step2_PreSTForLRBench/ScriptForCID.R: -------------------------------------------------------------------------------- 1 | rm(list = ls()) 2 | suppressMessages(library(Seurat)) 3 | source('./Script/Step2_PreSTForLRBench/function.R') 4 | set.seed(123) 5 | 6 | # CID datasets 7 | img.path <- list.dirs('~/1-Datasets/NatureGenetics_2021_BC/ST_Data/spatial')[-1] 8 | matrix.path <- list.dirs('~/1-Datasets/NatureGenetics_2021_BC/ST_Data/filtered_count_matrices')[-1] 9 | meta.path <- list.dirs('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/ST_deconvoulution', recursive = FALSE) 10 | samples <- substring(meta.path, 76) 11 | 12 | for (i in seq(img.path)) { 13 | output.path <- paste0('./Data/Step2_PreSTForLRBench/', samples[i]) 14 | if(!dir.exists(output.path)){ 15 | dir.create(output.path) 16 | } 17 | 18 | # Save SeuratObject of ST dataset 19 | if(T){ 20 | count.st <- Read10X(matrix.path[i], gene.column = 1) 21 | img <- Read10X_Image(image.dir = img.path[i]) 22 | meta.st <- read.csv(paste0(meta.path[i], '/c2l_means_result.csv')) 23 | meta.st <- tibble::column_to_rownames(meta.st, var = "X") 24 | colnames(meta.st) <- gsub('meanscell_abundance_w_sf_', '', colnames(meta.st)) 25 | meta.st$celltype <- apply(meta.st, 1, function(x){ 26 | colnames(meta.st)[which.max(x)] 27 | }) 28 | 29 | shared.barcode <- intersect(colnames(count.st), rownames(meta.st)) 30 | shared.barcode <- intersect(shared.barcode, rownames(img@coordinates)) 31 | 32 | meta.st <- meta.st[shared.barcode, ] 33 | count.st <- count.st[, rownames(meta.st)] 34 | img <- img[colnames(count.st)] 35 | DefaultAssay(object = img) <- 'Spatial' 36 | 37 | identical(rownames(meta.st), colnames(count.st)) 38 | 39 | ser <- CreateSeuratObject(count.st, meta.data = meta.st, 40 | min.cells = 1, min.features = 1, 41 | assay = 'Spatial') 42 | ser <- SCTransform(ser, assay = "Spatial") 43 | ser[['image']] <- img 44 | 45 | saveRDS(ser, file = paste0(output.path, '/STser.rds')) 46 | } 47 | 48 | close_distant <- CloDistCP(ser) 49 | saveRDS(close_distant, file = paste0(output.path, '/CloDistCP.rds')) 50 | } -------------------------------------------------------------------------------- /Step0_LRToolsFunction/cell2cell_python.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # In[ ]: 5 | 6 | 7 | import cell2cell as c2c 8 | import scanpy as sc 9 | import pandas as pd 10 | import sys 11 | import random 12 | random.seed(123) 13 | 14 | counts_path = sys.argv[1] 15 | meta_path = sys.argv[2] 16 | lr_path = sys.argv[3] 17 | output_path = sys.argv[4] 18 | 19 | adata = sc.read_csv(counts_path).T 20 | meta = pd.read_csv(meta_path, index_col = "Cell") 21 | adata.obs = meta 22 | 23 | lr_pairs = pd.read_csv(lr_path) 24 | lr_pairs = lr_pairs.astype(str) 25 | meta = adata.obs.copy() 26 | 27 | interactions = c2c.analysis.SingleCellInteractions(rnaseq_data=adata.to_df().T, 28 | ppi_data=lr_pairs, 29 | metadata=meta, 30 | interaction_columns=('ligand_symbol', 'receptor_symbol'), 31 | communication_score='expression_thresholding', 32 | expression_threshold=0.05, # values after aggregation 33 | cci_score='bray_curtis', 34 | cci_type='undirected', 35 | aggregation_method='nn_cell_fraction', 36 | barcode_col='Cell', 37 | celltype_col='Annotation', 38 | complex_sep='&', 39 | verbose=False) 40 | interactions.compute_pairwise_communication_scores() 41 | # interactions.compute_pairwise_communication_scores() return result: 42 | ccc_matrix_path = output_path + '/communication_matrix.csv' 43 | interactions.interaction_space.interaction_elements['communication_matrix'].to_csv(ccc_matrix_path) 44 | ccc_pvals_path = output_path + '/ccc_pval.csv' 45 | ccc_pvals = interactions.permute_cell_labels(evaluation='communication',verbose=True, random_state = 123) 46 | ccc_pvals.to_csv(ccc_pvals_path) 47 | 48 | -------------------------------------------------------------------------------- /Step5_BenchBasedCAGEProteomic/function.R: -------------------------------------------------------------------------------- 1 | ################### 2 | ## calculate_auc ## 3 | ################### 4 | 5 | ## get different metrices and ROC/PRC preformance object 6 | get_evaluate_metrics <- function(pred,label) 7 | { 8 | 9 | find_optimal_cutoff <- function(TPR, FPR, threshold){ 10 | 11 | y = TPR - FPR 12 | Youden_index = which.max(y) 13 | # optimal_threshold = threshold[Youden_index] 14 | return(Youden_index) 15 | 16 | } 17 | 18 | if(length(which(label==TRUE))!=0 & length(which(label==FALSE))!=0){ 19 | pred <- prediction(pred, label) 20 | 21 | perf_ROC <- performance(pred, measure = "tpr", x.measure = "fpr") 22 | ind_ROC <- find_optimal_cutoff(perf_ROC@y.values[[1]],perf_ROC@x.values[[1]],perf_ROC@alpha.values[[1]]) 23 | cutoff_ROC <- perf_ROC@alpha.values[[1]][ind_ROC] 24 | 25 | perf_PRC <- performance(pred, measure = "prec", x.measure = "rec") 26 | ind_PRC <- find_optimal_cutoff(perf_PRC@y.values[[1]],perf_PRC@x.values[[1]],perf_PRC@alpha.values[[1]]) 27 | cutoff_PRC <- perf_PRC@alpha.values[[1]][ind_PRC] 28 | 29 | ACC <- performance(pred, measure = "acc")@y.values[[1]] %>% .[ind_ROC] %>% signif(.,4) 30 | ERR <- performance(pred, measure = "err")@y.values[[1]] %>% .[ind_ROC] %>% signif(.,4) 31 | PPV <- performance(pred, measure = "ppv")@y.values[[1]] %>% .[ind_ROC] %>% signif(.,4) 32 | #MCC <- performance(pred, measure = "mat")@y.values[[1]] %>% .[ind_ROC] %>% signif(.,4) 33 | F1 <- performance(pred, measure = "f")@y.values[[1]] %>% .[ind_ROC] %>% signif(.,4) 34 | AUC <- performance(pred, measure = "auc")@y.values[[1]] %>% signif(.,4) 35 | AUCPR <- performance(pred, measure = "aucpr")@y.values[[1]] %>% signif(.,4) 36 | 37 | res = list(perf_ROC = perf_ROC, 38 | perf_PRC = perf_PRC, 39 | perf_metrics = c(AUROC=AUC,AUPRC=AUCPR, 40 | ACC=ACC,ERR=ERR,PPV=PPV, F1=F1)) 41 | 42 | }else{ 43 | 44 | res <- list(perf_ROC = NA, perf_PRC = NA, 45 | perf_metrics = rep(NA,6)) 46 | names(res$perf_metrics) <- c('AUROC','AUPRC','ACC','ERR','PPV','F1') 47 | 48 | } 49 | 50 | 51 | return(res) 52 | 53 | } -------------------------------------------------------------------------------- /Step0_LRToolsFunction/ScriptForAddLRscore.R: -------------------------------------------------------------------------------- 1 | rm(list = ls());gc() 2 | suppressMessages(library(Seurat)) 3 | suppressMessages(library(tidyverse)) 4 | set.seed(123) 5 | 6 | files <- list.dirs('./Data/Step1_LRPredictionResult', recursive = FALSE) 7 | methods <- c('scMLnet', 'Domino') 8 | 9 | for (file in files) { 10 | print(file) 11 | if(grepl('Slide', file)){ 12 | ser <- readRDS('~/1-Datasets/MouseEmbryo_GSE166692/ScriptForCCC/DataForCCC/Slide14_hs_ser.rds') 13 | }else if(grepl('N_MI', file)){ 14 | sample <- substring(file, 38) 15 | ser <- readRDS(paste0('~/1-Datasets/Nature_2022_MI/ScriptForCCC/scRNA_Data/', sample, '_ser.rds')) 16 | }else if(grepl('NG_BC', file)){ 17 | sample <- substring(file, 39) 18 | ser <- readRDS(paste0('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/scRNA_Data/', sample, '_ser.rds')) 19 | }else if(grepl('pbmc', file)){ 20 | sample <- substring(file, 33) 21 | ser <- readRDS(paste0('~/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/DataForCCC/', sample, '_ser.rds')) 22 | }else if(grepl('Sper', file)){ 23 | ser <- readRDS('~/1-Datasets/Spermatogenesis_GSE106487/ScriptForCCC/Sper_ser.rds') 24 | } 25 | 26 | data <- GetAssayData(ser, 'data', 'RNA') %>% 27 | as.matrix(.) %>% t(.) %>% 28 | as.data.frame() %>% 29 | tibble::rownames_to_column(., 'barcodes') 30 | meta <- ser@meta.data %>% .[, 'celltype', drop =FALSE] %>% tibble::rownames_to_column(., 'barcodes') 31 | rm(ser);gc() 32 | 33 | data <- merge(data, meta, by = 'barcodes') 34 | data$barcodes <- NULL 35 | data <- aggregate(.~celltype, data, mean) 36 | data <- pivot_longer(data, -celltype, 'genes') 37 | data$ct_genes <- paste(data$celltype, data$genes, sep = '_') 38 | data <- data[ -c(1:2)] 39 | rm(meta);gc() 40 | 41 | for (method in methods) { 42 | print(method) 43 | result <- readRDS(paste0(file, '/', method, '/result.rds')) 44 | result_tmp <- result$result 45 | result_tmp$sl <- paste(result_tmp$Sender, result_tmp$Ligand, sep = '_') 46 | result_tmp$rr <- paste(result_tmp$Receiver, result_tmp$Receptor, sep = '_') 47 | result_tmp <- merge(result_tmp, data, by.x = 'sl', by.y = 'ct_genes') 48 | colnames(result_tmp)[8] <- 'ligand_value' 49 | result_tmp <- merge(result_tmp, data, by.x = 'rr', by.y = 'ct_genes') 50 | colnames(result_tmp)[9] <- 'receptor_value' 51 | result_tmp$LRscore <- result_tmp$ligand_value*result_tmp$receptor_value 52 | result_tmp <- result_tmp[,c('Ligand', 'Receptor', 'Sender', 'Receiver', 'LRscore', 'all')] 53 | result$result <- result_tmp 54 | saveRDS(result, paste0(file, '/', method, '/result1.rds')) 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /Step2_PreSTForLRBench/function.R: -------------------------------------------------------------------------------- 1 | # Calculate the euclidean distance between cell pairs 2 | CloDistCP <- function(ser, image = TRUE){ 3 | if(image){ 4 | coord <- ser@images$image@coordinates[, c('imagerow', 'imagecol')] 5 | }else{ 6 | coord <- ser@meta.data[, c('imagerow', 'imagecol')] 7 | } 8 | 9 | meta <- ser@meta.data[, 'celltype', drop = FALSE] 10 | identical(rownames(meta), rownames(coord)) 11 | 12 | meta <- cbind(meta, coord) 13 | meta <- meta[order(meta$celltype),] 14 | 15 | dist.cp <- as.matrix(dist(meta[, 2:3], method = "euclidean")) 16 | dist.cp[dist.cp==0] <- 1 17 | dist.cp[!lower.tri(dist.cp, diag = TRUE)] <- 0 18 | dist.cp <- as.data.frame(dist.cp) 19 | dist.cp <- tibble::rownames_to_column(dist.cp, var = "cell.1") 20 | dist.cp <- tidyr::pivot_longer(data = dist.cp, cols = -cell.1, 21 | names_to = "cell.2", values_to = "distance") 22 | dist.cp <- dist.cp[which(dist.cp$distance != 0), ] 23 | 24 | ct <- data.frame(barcode = rownames(meta), celltype = meta$celltype) 25 | dist.cp <- dplyr::inner_join(dist.cp, ct, by = c('cell.1' = 'barcode')) 26 | colnames(dist.cp)[4] <- "celltype.1" 27 | dist.cp <- dplyr::inner_join(dist.cp, ct, by = c('cell.2' = 'barcode')) 28 | colnames(dist.cp)[5] <- "celltype.2" 29 | dist.cp$cp <- paste(dist.cp$celltype.1, dist.cp$celltype.2, sep = "_") 30 | dist.cp <- dist.cp[which(dist.cp$celltype.1!=dist.cp$celltype.2), ] 31 | dist.cp <- dist.cp[,c("cell.1", "cell.2", "distance", "cp")] 32 | 33 | # remove cell pairs with the number less than 30 34 | remove.cp <- names(which(table(dist.cp$cp)<30)) 35 | if(length(remove.cp)!=0){ 36 | dist.cp <- dist.cp[-which(dist.cp$cp %in% remove.cp), ] 37 | } 38 | 39 | perc <- c(0.1, 0.2, 0.3, 0.4, 0.5) 40 | cellpairs <- unique(dist.cp$cp) 41 | library(doParallel) 42 | cl <- makeCluster(5) 43 | registerDoParallel(cl) 44 | close_distant <- foreach(per = perc) %dopar% { 45 | tmp.close_distant <- lapply(cellpairs, function(cp){ 46 | dist <- list() 47 | tmp.dist <- dist.cp[which(dist.cp$cp == cp), ] 48 | tmp.dist <- tmp.dist[order(tmp.dist$distance, decreasing = T), ] 49 | pct.close <- floor(dim(tmp.dist)[1]*per) 50 | pct.distant <- floor(dim(tmp.dist)[1]*per) 51 | close.dist <- tail(tmp.dist, n = pct.close) 52 | distant.dist <- head(tmp.dist, n = pct.distant) 53 | dist[["close"]] <- close.dist 54 | dist[["distant"]] <- distant.dist 55 | dist 56 | }) 57 | names(tmp.close_distant) <- unique(dist.cp$cp) 58 | return(tmp.close_distant) 59 | } 60 | stopCluster(cl) 61 | names(close_distant) <- perc 62 | return(close_distant) 63 | } 64 | -------------------------------------------------------------------------------- /Step2_PreSTForLRBench/ScriptForMouseEymbryo.R: -------------------------------------------------------------------------------- 1 | rm(list = ls()) 2 | suppressMessages(library(Seurat)) 3 | source('./Script/Step2_PreSTForLRBench/function.R') 4 | set.seed(123) 5 | 6 | counts.st <- Read10X('~/1-Datasets/MouseEmbryo_GSE166692/sciSpaceData/data/', 7 | gene.column = 1) 8 | 9 | cellmeta <- read.csv('~/1-Datasets/MouseEmbryo_GSE166692/sciSpaceData/cellmeta.tsv', 10 | sep = '\t') 11 | genes <- read.csv('~/1-Datasets/MouseEmbryo_GSE166692/sciSpaceData/genemeta.tsv', 12 | sep = '\t') 13 | 14 | for (slide in unique(cellmeta$slide_id)) { 15 | output.path <- paste0('./Data/Step2_PreSTForLRBench/', gsub(' ', '', slide)) 16 | if(!dir.exists(output.path)){ 17 | dir.create(output.path) 18 | } 19 | 20 | if(T){ 21 | cells_sel <- rownames(cellmeta)[which(cellmeta$slide_id==slide)] 22 | 23 | # counts: solve the problem of duplicated genes 24 | if(T){ 25 | counts_sub <- counts.st[, cells_sel] 26 | counts_sub <- as.matrix(counts_sub) 27 | counts_sub <- as.data.frame(counts_sub) 28 | counts_sub$gene <- genes$gene_short_name; rownames(counts_sub) <- NULL 29 | dup_genes <- counts_sub$gene[which(duplicated(counts_sub$gene))] 30 | counts_sub1 <- counts_sub[-which(counts_sub$gene %in% dup_genes),] 31 | counts_sub2 <- counts_sub[which(counts_sub$gene %in% dup_genes),] 32 | counts_sub2 <- aggregate(.~gene,mean,data=counts_sub2) 33 | counts_sub <- rbind(counts_sub1, counts_sub2); rownames(counts_sub) <- NULL 34 | rm(counts_sub1, counts_sub2, dup_genes, cells_sel); gc() 35 | counts_sub <- tibble::column_to_rownames(counts_sub, 'gene') 36 | counts_sub <- as.matrix(counts_sub) 37 | } 38 | 39 | meta_sub <- cellmeta[which(cellmeta$slide_id==slide),] 40 | colnames(meta_sub)[c(9,10,19)] <- c('imagerow', 'imagecol', 'celltype') 41 | meta_sub$celltype <- gsub(' ', '', meta_sub$celltype) 42 | meta_sub$celltype <- gsub('Cardiacmusclelineages', 'CardiacMuscleLineages', meta_sub$celltype) 43 | 44 | shared.barcode <- intersect(colnames(counts_sub), rownames(meta_sub)) 45 | 46 | meta_sub <- meta_sub[shared.barcode, ] 47 | counts_sub <- counts_sub[, rownames(meta_sub)] 48 | identical(rownames(meta_sub), colnames(counts_sub)) 49 | 50 | ser <- CreateSeuratObject(counts = counts_sub, meta.data = meta_sub, 51 | assay = 'Spatial', min.cells = 1, min.features = 1) 52 | ser <- SCTransform(ser, assay = "Spatial") 53 | 54 | saveRDS(ser, file = paste0(output.path, '/STser.rds')) 55 | rm(counts_sub, meta_sub);gc() 56 | } 57 | 58 | ser <- readRDS(paste0(output.path, '/STser.rds')) 59 | close_distant <- CloDistCP(ser, image = FALSE) 60 | saveRDS(close_distant, file = paste0(output.path, '/CloDistCP.rds')) 61 | } 62 | 63 | 64 | -------------------------------------------------------------------------------- /Step8_LRTToolsFunction/HoloNet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # In[ ]: 5 | 6 | 7 | import HoloNet as hn 8 | 9 | import os 10 | import pandas as pd 11 | import random 12 | import numpy as np 13 | import scanpy as sc 14 | import matplotlib.pyplot as plt 15 | import torch 16 | import sys 17 | 18 | import warnings 19 | warnings.filterwarnings('ignore') 20 | hn.set_figure_params(tex_fonts=False) 21 | sc.settings.figdir = './figures/' 22 | 23 | 24 | # In[ ]: 25 | 26 | 27 | random.seed(123) 28 | np.random.seed(123) 29 | 30 | counts_path = sys.argv[1] 31 | meta_path = sys.argv[2] 32 | img_path = sys.argv[3] 33 | goi_fpath = sys.argv[4] 34 | output_path = sys.argv[5] 35 | 36 | 37 | # In[ ]: 38 | 39 | 40 | # load ST data (gene expression matrix + celltype + spatial) 41 | adata = sc.read_csv(counts_path).T 42 | meta = pd.read_csv(meta_path, index_col="Barcodes") 43 | adata.obs = meta 44 | adata = sc.read_visium(adata, path = img_path) 45 | 46 | 47 | # In[ ]: 48 | 49 | 50 | # run normalization 51 | sc.pp.normalize_total(adata, inplace=True) 52 | sc.pp.log1p(adata) 53 | sc.pp.highly_variable_genes(adata, flavor="seurat", n_top_genes=2000) 54 | 55 | 56 | # In[ ]: 57 | 58 | 59 | # load LR database and filter genes epressed by less than 5% of cells 60 | LR_df = hn.pp.load_lr_df() 61 | expressed_LR_df = hn.pp.get_expressed_lr_df(LR_df, adata, expressed_proportion=0.05) 62 | 63 | 64 | # In[ ]: 65 | 66 | 67 | # calculate w_best 68 | w_best = hn.tl.default_w_visium(adata) 69 | w_best 70 | 71 | 72 | # In[ ]: 73 | 74 | 75 | # construct CE 76 | CE_tensor = hn.tl.compute_ce_tensor(adata, lr_df=expressed_LR_df, w_best=w_best) 77 | CE_tensor_filtered = hn.tl.filter_ce_tensor(CE_tensor, adata, 78 | lr_df=expressed_LR_df, w_best=w_best) 79 | 80 | 81 | # In[ ]: 82 | 83 | 84 | # Selecting the target gene (icgs) to be predicted 85 | target_all_gene_expr, used_gene_list = hn.pr.get_gene_expr(adata, expressed_LR_df, 86 | max_sparse = 0.05) 87 | 88 | 89 | # In[ ]: 90 | 91 | 92 | goi = pd.read_csv(goi_fpath) 93 | goi = list(set(goi["x"]).intersection(set(used_gene_list))) 94 | len(goi) 95 | 96 | 97 | # In[ ]: 98 | 99 | 100 | X, cell_type_names = hn.pr.get_one_hot_cell_type_tensor(adata, categorical_cell_type_col = 'celltype') 101 | adj = hn.pr.adj_normalize(adj=CE_tensor_filtered, cell_type_tensor=X, only_between_cell_type=True) 102 | 103 | for gene in goi: 104 | target = hn.pr.get_one_case_expr(target_all_gene_expr, cases_list=used_gene_list, 105 | used_case_name= gene) 106 | trained_MGC_model_list = hn.pr.mgc_repeat_training(X, adj, target, device='gpu') 107 | predict_result = hn.pl.plot_mgc_result(trained_MGC_model_list, adata, X, adj) 108 | for lr in list(expressed_LR_df['LR_Pair']): 109 | _ = hn.pl.fce_cell_type_network_plot(trained_MGC_model_list, expressed_LR_df, X, adj, 110 | cell_type_names, plot_lr=lr, edge_thres=0.2, 111 | ) 112 | res = pd.DataFrame(_[0]) 113 | file_path = output_path + lr + "_" + gene + ".csv" 114 | res.to_csv(file_path) 115 | 116 | -------------------------------------------------------------------------------- /Step9_LRTBench/Step3_ScriptForTimeMen.R: -------------------------------------------------------------------------------- 1 | rm(list = ls()); gc() 2 | suppressMessages(library(ggplot2)) 3 | source('./Script/Step9_LRTBench/function.R') 4 | set.seed(123) 5 | 6 | # Record the number of cell types in each dataset 7 | if(F){ 8 | suppressMessages(library(Seurat)) 9 | set.seed(123) 10 | ser1.path <- list.files('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/ST_Data', full.names = TRUE) 11 | ser2.path <- list.files('~/1-Datasets/Nature_2022_MI/ScriptForCCC/ST_Data', full.names = TRUE) 12 | ser3.path <- list.files('~/1-Datasets/CancerCell_2022_Glioma/ScriptForLRTBenchmark/ST_Data', full.names = TRUE) 13 | ser.path <- c(ser1.path, ser2.path, ser3.path) 14 | rm(ser1.path, ser2.path, ser3.path);gc() 15 | 16 | tmp <- c() 17 | for (i in seq(ser.path)) { 18 | ser <- readRDS(ser.path[i]) 19 | remove.ct <- as.numeric(length(which(table(ser$celltype)<3))) 20 | temp <- length(unique(ser$celltype))-remove.ct 21 | tmp <- c(tmp, temp) 22 | rm(ser); gc() 23 | } 24 | ct_record <- data.frame(datasets = gsub('_ser.rds', '', 25 | unlist(lapply(stringr::str_split(ser.path, '/'), 26 | function(x){x[8]}))), 27 | num = as.numeric(tmp)) 28 | 29 | saveRDS(ct_record, file = './Data/Step9_LRTBenchResult/DatasetCellTypeInfo.rds') 30 | rm(i, ser.path, temp, tmp, remove.ct, ct_record);gc() 31 | } 32 | 33 | # Record the running time of methods in each dataset 34 | if(T){ 35 | datasets <- list.dirs('./Data/Step8_LRTPredictionResult', recursive = FALSE, full.names = FALSE) 36 | methods <- c('CytoTalk', 'NicheNet', 'HoloNet', 'MISTy', 'stMLnet') 37 | DataRecord <- readRDS('./Data/Step9_LRTBenchResult/DatasetCellTypeInfo.rds') 38 | wd <- './Data/Step8_LRTPredictionResult/' 39 | 40 | TimeMemRecord <- lapply(datasets, function(data){ 41 | print(data) 42 | temp <- RunTimeMemRecord(wd, data, methods) 43 | temp 44 | }) 45 | names(TimeMemRecord) <- datasets 46 | TimeMemRecord <- lapply(TimeMemRecord, function(data){ 47 | temp <- do.call(rbind, data) 48 | temp <- tibble::rownames_to_column(temp, 'methods') 49 | temp 50 | }) 51 | TimeMemRecord <- do.call(rbind, TimeMemRecord) 52 | TimeMemRecord <- tibble::rownames_to_column(TimeMemRecord, 'datasets') 53 | TimeMemRecord$datasets <- gsub('\\.[0-9]+', '', TimeMemRecord$datasets) 54 | 55 | # Specific handle: the running time of CytoTalk in GBM datasets 56 | CytoTalk_TimeMemRecord <- TimeMemRecord[which(TimeMemRecord$methods == 'CytoTalk' & grepl('UKF', TimeMemRecord$datasets)), ] 57 | CytoTalk_TimeMemRecord <- merge(CytoTalk_TimeMemRecord, DataRecord, by = 'datasets') 58 | CytoTalk_TimeMemRecord$total_num <- 2*(CytoTalk_TimeMemRecord$num-1) 59 | CytoTalk_TimeMemRecord$actual_num <- 2*(CytoTalk_TimeMemRecord$num-1)-1 60 | CytoTalk_TimeMemRecord$clock_time <- (CytoTalk_TimeMemRecord$clock_time/CytoTalk_TimeMemRecord$total_num)*CytoTalk_TimeMemRecord$actual_num 61 | CytoTalk_TimeMemRecord$linux_time <- (CytoTalk_TimeMemRecord$linux_time/CytoTalk_TimeMemRecord$total_num)*CytoTalk_TimeMemRecord$actual_num 62 | CytoTalk_TimeMemRecord <- CytoTalk_TimeMemRecord[, -c(7:9)] 63 | TimeMemRecord <- TimeMemRecord[-which(TimeMemRecord$methods == 'CytoTalk' & grepl('UKF', TimeMemRecord$datasets)), ] 64 | TimeMemRecord <- rbind(TimeMemRecord, CytoTalk_TimeMemRecord) 65 | rm(CytoTalk_TimeMemRecord, DataRecord, datasets, methods, wd); gc() 66 | } -------------------------------------------------------------------------------- /Step7_LRBenchSamplingBench/Step2_JaccardIndex.R: -------------------------------------------------------------------------------- 1 | rm(list = ls());gc() 2 | suppressMessages(library(tidyverse)) 3 | suppressMessages(library(ggsci)) 4 | set.seed(123) 5 | 6 | datasets <- list.files('./Data/Step6_LRBenchSamplingResult/', recursive = FALSE) %>% 7 | gsub('_[0-9]+', '',.) %>% unique() 8 | methods <- c("CellPhoneDB2", "CellPhoneDB3", "CellTalker", 9 | "Connectome", "ICELLNET", "NATMI", "iTALK", 10 | "scConnect", "SingleCellSignalR", "CellChat", 11 | "RNAMagnet", "PyMINEr", 12 | "scSeqComm", "NicheNet", 13 | "scMLnet", "CellCall","CytoTalk", "Domino") 14 | 15 | JaccardIndex <- function(a, b) { 16 | intersection = length(intersect(unique(a), unique(b))) 17 | union = length(a) + length(b) - intersection 18 | return (intersection/union) 19 | } 20 | 21 | result <- lapply(datasets, function(data){ 22 | print(data) 23 | temp <- lapply(methods, function(method){ 24 | print(method) 25 | if(grepl('CK', data)){ 26 | res100 <- readRDS(paste('./Data/Step1_LRPredictionResult', 27 | paste0('N_MI_', data), method, 28 | ifelse(method=='iTALK','result1.rds','result.rds'), 29 | sep = '/')) %>% .$result 30 | }else if(grepl('CID', data)){ 31 | res100 <- readRDS(paste('./Data/Step1_LRPredictionResult', 32 | paste0('NG_BC_', data), method, 33 | ifelse(method=='iTALK','result1.rds','result.rds'), 34 | sep = '/')) %>% .$result 35 | }else if(grepl('Slide14', data)){ 36 | res100 <- readRDS(paste('./Data/Step1_LRPredictionResult', 37 | paste0('MouseEmbryo_', data), method, 38 | ifelse(method=='iTALK','result1.rds','result.rds'), 39 | sep = '/')) %>% .$result 40 | }else{ 41 | res100 <- readRDS(paste('./Data/Step1_LRPredictionResult', 42 | data, method, 43 | ifelse(method=='iTALK','result1.rds','result.rds'), 44 | sep = '/')) %>% .$result 45 | } 46 | 47 | if(dim(res100)[[1]]==0 | is.null(res100)){ 48 | tmp <- rep(NA, 5) 49 | }else{ 50 | res90 <- readRDS(paste('./Data/Step10_LRBenchSamplingResult', 51 | paste0(data, '_90'), method, 'result.rds', sep = '/'))%>% .$result 52 | JI_90 <- JaccardIndex(res100$all, res90$all) 53 | res80 <- readRDS(paste('./Data/Step10_LRBenchSamplingResult', 54 | paste0(data, '_80'), method, 'result.rds', sep = '/'))%>% .$result 55 | JI_80 <- JaccardIndex(res100$all, res80$all) 56 | res70 <- readRDS(paste('./Data/Step10_LRBenchSamplingResult', 57 | paste0(data, '_70'), method, 'result.rds', sep = '/'))%>% .$result 58 | JI_70 <- JaccardIndex(res100$all, res70$all) 59 | res60 <- readRDS(paste('./Data/Step10_LRBenchSamplingResult', 60 | paste0(data, '_60'), method, 'result.rds', sep = '/'))%>% .$result 61 | JI_60 <- JaccardIndex(res100$all, res60$all) 62 | res50 <- readRDS(paste('./Data/Step10_LRBenchSamplingResult', 63 | paste0(data, '_50'), method, 'result.rds', sep = '/'))%>% .$result 64 | JI_50 <- JaccardIndex(res100$all, res50$all) 65 | tmp <- c(JI_50, JI_60, JI_70, JI_80, JI_90) 66 | } 67 | 68 | tmp 69 | }) 70 | names(temp) <- methods 71 | temp <- do.call(rbind, temp) 72 | temp <- as.data.frame(temp) 73 | colnames(temp) <- c('S50', 'S60', 'S70', 'S80', 'S90') 74 | temp <- tibble::rownames_to_column(temp, 'methods') 75 | temp$datasets <- data 76 | temp 77 | }) 78 | result <- do.call(rbind, result) 79 | saveRDS(result, file = './Data/Step7_LRBenchSampling/JaccardIndex.rds') 80 | -------------------------------------------------------------------------------- /Step3_MIForLRBench/Step1_ScriptForCalMI.R: -------------------------------------------------------------------------------- 1 | rm(list = ls()) 2 | suppressMessages(library(Seurat)) 3 | source('./Script/Step3_MIForLRBench/function.R') 4 | set.seed(123) 5 | 6 | datasets <- c('CID4465', 'CID44971', 'CK357', 'CK358', 7 | 'CK368', 'CK162', 'CK362', 'CK361', 8 | 'CK161', 'CK165', 'Slide14') 9 | methods <- c('CellPhoneDB2', 'CellPhoneDB3', 'CellTalker', 'Connectome', 10 | 'NATMI', 'ICELLNET', 'scConnect', 'CellChat', 'SingleCellSignalR', 11 | 'CellCall', 'scSeqComm', 'NicheNet', 'Domino', 'scMLnet', 'iTALK', 12 | 'cell2cell', 'RNAMagnet', 'PyMINEr', 'CytoTalk') 13 | ck.dataset <- readRDS('~/1-Datasets/Nature_2022_MI/dataset_id.rds') 14 | # loop: datasets 15 | for(data in datasets){ 16 | print(data) 17 | if(grepl('CK', data)){ 18 | # get normalized data of ST 19 | STser <- readRDS(paste0('./Data/Step2_PreSTForLRBench/', ck.dataset$ST[which(ck.dataset$snRNA == data)], '/STser.rds')) 20 | # get close and distant cell pairs 21 | CloDistCP <- readRDS(paste0('./Data/Step2_PreSTForLRBench/', ck.dataset$ST[which(ck.dataset$snRNA == data)], '/CloDistCP.rds')) 22 | }else{ 23 | # get normalized data of ST 24 | STser <- readRDS(paste0('./Data/Step2_PreSTForLRBench/', data, '/STser.rds')) 25 | # get close and distant cell pairs 26 | CloDistCP <- readRDS(paste0('./Data/Step2_PreSTForLRBench/', data, '/CloDistCP.rds')) 27 | } 28 | 29 | # get normalized data of ST 30 | norm.data <- GetAssayData(STser, 'data', 'SCT') 31 | #norm.data <- as.matrix(norm.data) 32 | # get close and distant cell pairs 33 | CloDistCP[['0.5']] <- NULL 34 | # get celltype of ST data 35 | celltype.st <- unique(STser$celltype) 36 | rm(STser); gc() 37 | 38 | # get result path 39 | if(grepl('CID', data)){ 40 | result.path <- paste0('./Data/Step1_LRPredictionResult/NG_BC_', data) 41 | }else if(grepl('CK', data)){ 42 | result.path <- paste0('./Data/Step1_LRPredictionResult/N_MI_', data) 43 | }else if(grepl('Slide', data)){ 44 | result.path <- paste0('./Data/Step1_LRPredictionResult/MouseEmbryo_', data) 45 | } 46 | 47 | output.path <- paste0('./Data/Step3_MIPCCForLRBench/', data, '_result.rds') 48 | dataset_EvalIndex1 <- lapply(methods, function(method){ 49 | print(method) 50 | 51 | # remove the LR pairs whose sender/receiver celltypes are not in celltypes of ST 52 | result <- readRDS(paste0(result.path, '/', method, '/result.rds')) 53 | result <- result$result 54 | result <- result[which(result$Sender %in% celltype.st), ] 55 | result <- result[which(result$Receiver %in% celltype.st), ] 56 | result$sr <- paste(result$Sender, result$Receiver, sep = '_') 57 | CellPairs <- names(CloDistCP$`0.1`) 58 | CellPairs1 <- as.data.frame(stringr::str_split(CellPairs, '_', simplify = TRUE)) 59 | CellPairs1 <- paste(CellPairs1$V2, CellPairs1$V1, sep = '_') 60 | CellPairs <- c(CellPairs, CellPairs1) 61 | remove.sr <- setdiff(unique(result$sr), CellPairs) 62 | if(length(remove.sr) != 0){ 63 | result <- result[-which(result$sr %in% remove.sr), ] 64 | } 65 | 66 | if(dim(result)[1]!=0){ 67 | if(grepl('CID', data) & method == 'RNAMagnet'){ 68 | genes <- readRDS('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/hs2mm_genes.rds') 69 | result <- mm2hs(result, genes) 70 | }else if(grepl('CK', data) & method == 'RNAMagnet'){ 71 | genes <- readRDS('~/1-Datasets/Nature_2022_MI/ScriptForCCC/hs2mm_genes.rds') 72 | result <- mm2hs(result, genes) 73 | }else if(grepl('Slide', data) & 74 | method %in% c('CellPhoneDB2', 'CellPhoneDB3', 'CellTalker', 'NATMI', 75 | 'ICELLNET', 'NicheNet', 'scMLnet', 'iTALK', 'cell2cell')){ 76 | genes <- readRDS('~/1-Datasets/MouseEmbryo_GSE166692/sciSpaceData/mm2hs.rds') 77 | result <- hs2mm(result, genes) 78 | } 79 | 80 | EvaIndex1_result <- lapply(CloDistCP, function(CloDist){ 81 | result.tmp <- EvaIndex1_2(CloDist, norm.data, result) 82 | result.tmp 83 | }) 84 | 85 | }else{ 86 | EvaIndex1_result <- NA 87 | } 88 | 89 | return(EvaIndex1_result) 90 | }) 91 | names(dataset_EvalIndex1) <- methods 92 | saveRDS(dataset_EvalIndex1, file = output.path) 93 | } -------------------------------------------------------------------------------- /Step7_LRBenchSamplingBench/function.R: -------------------------------------------------------------------------------- 1 | RunTimeMemRecord <- function(data, ratio, methods, DataRecord){ 2 | # get result path 3 | if(ratio==100){ 4 | if(grepl('CK', data)){ 5 | result.path <- paste0('./Data/Step1_LRPredictionResult/N_MI_', data) 6 | }else if(grepl('CID', data)){ 7 | result.path <- paste0('./Data/Step1_LRPredictionResult/NG_BC_', data) 8 | }else if(grepl('Slide14', data)){ 9 | result.path <- paste0('./Data/Step1_LRPredictionResult/MouseEmbryo_', data) 10 | }else{ 11 | result.path <- paste0('./Data/Step1_LRPredictionResult/', data) 12 | } 13 | }else{ 14 | result.path <- paste0('./Data/Step10_LRBenchSamplingResult/', data, '_', ratio) 15 | } 16 | 17 | dataset_Record <- lapply(methods, function(method){ 18 | print(method) 19 | 20 | if(method == 'CytoTalk'){ 21 | record_dir <- paste0(result.path, '/', method) 22 | record_files <- list.files(record_dir, pattern = 'TimeMemRecord', full.names = TRUE) 23 | temp_record <- lapply(record_files, function(file){ 24 | record <- tryCatch(read.table(file, header = FALSE, sep = '\t', fill = TRUE), 25 | error=function(e){NA} 26 | ) 27 | if(class(record)=='data.frame'){ 28 | tmp_record <- RunTimeMemRecord_2(record) 29 | }else{ 30 | tmp_record <- NA 31 | } 32 | tmp_record 33 | }) 34 | temp_record[which(is.na(temp_record))] <- NULL 35 | temp_record <- do.call(rbind, temp_record) %>% as.data.frame() 36 | 37 | if(ratio == 100){ 38 | number_record <- DataRecord$Celltypes[which((DataRecord$ratio == ratio) & (DataRecord$datasets==data))] 39 | actual_cp <- number_record * number_record - number_record - number_record 40 | combn_cp <- dim(combn(seq(number_record), 2))[2] 41 | temp_record$clock_time <- (temp_record$clock_time/actual_cp)*combn_cp 42 | temp_record$linux_time <- (temp_record$linux_time/actual_cp)*combn_cp 43 | } 44 | 45 | temp_record <- data.frame(clock_time = sum(temp_record$clock_time), 46 | linux_time = sum(temp_record$linux_time), 47 | max_memory = max(temp_record$max_memory), 48 | cpu_linux = paste0(round(mean(as.numeric(gsub('%', '', temp_record$cpu_linux))), 0), '%')) 49 | }else{ 50 | record <- read.table(paste0(result.path, '/', method, '/', 'TimeMemRecord.txt'), 51 | header = FALSE, sep = '\t', fill = TRUE) 52 | temp_record <- RunTimeMemRecord_2(record) 53 | } 54 | 55 | temp_record 56 | }) 57 | names(dataset_Record) <- methods 58 | return(dataset_Record) 59 | } 60 | 61 | 62 | RunTimeMemRecord_2 <- function(record){ 63 | 64 | record <- record[, 2, drop = FALSE] 65 | if(dim(record)[1]==24){ 66 | record <- record[-1, , drop = FALSE] 67 | } 68 | record <- record[c(2,3,4,5,10),, drop = FALSE] 69 | 70 | 71 | # Linux_time: user+system time 72 | user_time <- as.numeric(gsub('User time \\(seconds\\): ', '', record[1,])) %>% as.numeric() 73 | sys_time <- as.numeric(gsub('System time \\(seconds\\): ', '', record[2,])) %>% as.numeric() 74 | linux_time <- user_time+sys_time 75 | 76 | # Clock_time 77 | clock_time <- gsub('Elapsed \\(wall clock\\) time \\(h:mm:ss or m:ss\\): ', '', record[4,]) 78 | if(stringr::str_count(clock_time, ':') == 1){ 79 | min_time <- as.numeric(unlist(stringr::str_split(clock_time, ':'))[1]) 80 | min_time <- min_time*60 81 | sec_time <- as.numeric(unlist(stringr::str_split(clock_time, ':'))[2]) 82 | clock_time <- min_time+sec_time; rm(sec_time, min_time) 83 | }else if(stringr::str_count(clock_time, ':') == 2){ 84 | h_time <- as.numeric(unlist(stringr::str_split(clock_time, ':'))[1]) 85 | h_time <- h_time*60*60 86 | min_time <- as.numeric(unlist(stringr::str_split(clock_time, ':'))[2]) 87 | min_time <- min_time*60 88 | sec_time <- as.numeric(unlist(stringr::str_split(clock_time, ':'))[3]) 89 | clock_time <- h_time+min_time+sec_time; 90 | rm(sec_time, min_time, h_time);gc() 91 | } 92 | 93 | mem_linux <- gsub('Maximum resident set size \\(kbytes\\): ', '', record[5,]) 94 | mem_linux <- round(as.numeric(mem_linux)/1024/1024, 2) 95 | cpu_linux <- gsub('Percent of CPU this job got: ', '', record[3,]) 96 | 97 | return(data.frame(clock_time = clock_time, linux_time = linux_time, max_memory = mem_linux, cpu_linux = cpu_linux)) 98 | } 99 | -------------------------------------------------------------------------------- /Step6_LRBenchSampling/RunScript.R: -------------------------------------------------------------------------------- 1 | suppressMessages(library(Seurat)) 2 | source('../Script/Step0_LRToolsFunction/Step1_LRToolsFunction.R') 3 | set.seed(123) 4 | 5 | args <- commandArgs() 6 | tools <- args[6] 7 | print(tools) 8 | sampleID <- args[7] 9 | print(sampleID) 10 | 11 | species <- args[8] 12 | ratio <- args[9] 13 | print(species) 14 | print(ratio) 15 | 16 | if(grepl('CID', sampleID) & tools == 'RNAMagnet'){ 17 | 18 | fpath.dataset <- paste0('/home/ljx/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/scRNA_Data_sampling_Mm/', 19 | sampleID,'_', ratio, '_ser.rds') 20 | 21 | }else if(grepl('CID', sampleID) & !(tools == 'RNAMagnet')){ 22 | 23 | fpath.dataset <- paste0('/home/ljx/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/scRNA_Data_sampling/', 24 | sampleID, '_', ratio, '_ser.rds') 25 | 26 | }else if(grepl('CK', sampleID) & tools == 'RNAMagnet'){ 27 | 28 | fpath.dataset <- paste0('/home/ljx/1-Datasets/Nature_2022_MI/ScriptForCCC/scRNA_Data_sampling_Mm/', 29 | sampleID, '_', ratio , '_ser.rds') 30 | 31 | }else if(grepl('CK', sampleID) & !(tools == 'RNAMagnet')){ 32 | 33 | fpath.dataset <- paste0('/home/ljx/1-Datasets/Nature_2022_MI/ScriptForCCC/scRNA_Data_sampling/', 34 | sampleID, '_', ratio , '_ser.rds') 35 | 36 | }else if(grepl('Slide', sampleID)){ 37 | if(tools %in% c('CellPhoneDB2', 'CellPhoneDB3', 'CellTalker', 'NicheNet', 'iTALK', 'ICELLNET', 'scMLnet','NATMI', 'cell2cell')){ 38 | fpath.dataset <- paste0('/home/ljx/1-Datasets/MouseEmbryo_GSE166692/ScriptForCCC/scRNA_Data_Sampling/', 39 | sampleID, '_', ratio , '_hs_ser.rds') 40 | }else{ 41 | fpath.dataset <- paste0('/home/ljx/1-Datasets/MouseEmbryo_GSE166692/ScriptForCCC/scRNA_Data_Sampling/', 42 | sampleID, '_', ratio , '_mm_ser.rds') 43 | } 44 | }else if(grepl('pbmc', sampleID) & !(tools == 'RNAMagnet')){ 45 | fpath.dataset <- paste0('/home/ljx/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/scRNA_Data_sampling/', sampleID, '_', ratio , '_ser.rds') 46 | }else if(grepl('pbmc', sampleID) & (tools == 'RNAMagnet')){ 47 | fpath.dataset <- paste0('~/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/scRNA_Data_sampling/', sampleID, '_', ratio , '_mm_ser.rds') 48 | } 49 | 50 | dir.output <- paste0('/home/ljx/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data/Step10_LRBenchSamplingResult/', sampleID, '_', ratio) 51 | 52 | ser <- readRDS(fpath.dataset) 53 | 54 | if(!dir.exists(dir.output)){ 55 | dir.create(dir.output) 56 | } 57 | 58 | fpath.tools <- paste(dir.output, tools, sep = '/') 59 | if(!dir.exists(fpath.tools)){ 60 | dir.create(fpath.tools) 61 | } 62 | 63 | if(tools == 'CellCall'){ 64 | if(grepl('CK', sampleID) | grepl('pbmc', sampleID)){ 65 | n <- 2 66 | }else if(grepl('CID', sampleID) | grepl('Sper', sampleID)){ 67 | n <- 3 68 | }else if(grepl('Slide', sampleID)){ 69 | n <- 5 70 | } 71 | result <- CellCall_function(ser, n, species = species) 72 | }else if(tools == 'CellChat'){ 73 | result <- CellChat_function(ser, species = species) 74 | }else if(tools == 'CellPhoneDB2'){ 75 | result <- CellPhoneDB2_function(ser, fpath.tools) 76 | }else if(tools == 'CellPhoneDB3'){ 77 | result <- CellPhoneDB3_function(ser, fpath.tools) 78 | }else if(tools == 'CellTalker'){ 79 | result <- CellTalker_function(ser) 80 | }else if(tools == 'Connectome'){ 81 | result <- Connectome_function(ser, species = species) 82 | }else if(tools == 'CytoTalk'){ 83 | result <- CytoTalk_function(ser, fpath.tools, species = species) 84 | }else if(tools == 'Domino'){ 85 | if(sampleID %in% c('pbmc4k', 'pbmc8k')){ 86 | species <- 'hg38' 87 | }else if(sampleID == 'pbmc6k' | grepl('Sper' ,sampleID)){ 88 | species <- 'hg19' 89 | }else if(grepl('Slide' ,sampleID)){ 90 | species <- 'mm10' 91 | }else{ 92 | species <- 'hg38' 93 | } 94 | result <- Domino_function(ser, fpath.tools, species = species) 95 | }else if(tools == 'ICELLNET'){ 96 | result <- ICELLNET_function(ser) 97 | }else if(tools == 'iTALK'){ 98 | result <- iTALK_function(ser) 99 | }else if(tools == 'NATMI'){ 100 | result <- NATMI_function(ser, fpath.tools) 101 | }else if(tools == 'NicheNet'){ 102 | result <- NicheNet_function(ser, lr = TRUE) 103 | }else if(tools == 'PyMINEr'){ 104 | result <- PyMINEr_function(ser, fpath.tools, species) 105 | }else if(tools == 'RNAMagnet'){ 106 | result <- RNAMagnet_function(ser) 107 | }else if(tools == 'scConnect'){ 108 | result <- scConnect_function(ser, fpath.tools, species) 109 | }else if(tools == 'scMLnet'){ 110 | result <- scMLnet_function(ser) 111 | }else if(tools == 'scSeqComm'){ 112 | result <- scSeqComm_function(ser, species) 113 | }else if(tools == 'SingleCellSignalR'){ 114 | result <- SCSR_function(ser, species) 115 | } 116 | saveRDS(result, file = paste0(fpath.tools, '/result.rds')) -------------------------------------------------------------------------------- /Step7_LRBenchSamplingBench/Step1_DatasetRecord.R: -------------------------------------------------------------------------------- 1 | rm(list = ls()); gc() 2 | suppressMessages(library(tidyverse)) 3 | set.seed(123) 4 | 5 | datasets <- list.files('./Data/Step6_LRBenchSamplingResult/', recursive = FALSE) %>% 6 | gsub('_[0-9]+', '',.) %>% unique() 7 | ratios <- c(50, 60, 70, 80, 90, 100) 8 | 9 | record_cells <- lapply(datasets, function(dataset){ 10 | tmp <- lapply(ratios, function(ratio){ 11 | if(grepl('CID', dataset) & ratio == 100){ 12 | fpath.dataset <- paste0('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/scRNA_Data/', dataset, '_ser.rds') 13 | }else if(grepl('CID', dataset) & ratio != 100){ 14 | fpath.dataset <- paste0('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/scRNA_Data_sampling/', dataset,'_', ratio, '_ser.rds') 15 | }else if(grepl('CK', dataset) & ratio == 100){ 16 | fpath.dataset <- paste0('~/1-Datasets/Nature_2022_MI/ScriptForCCC/scRNA_Data/', dataset, '_ser.rds') 17 | }else if(grepl('CK', dataset) & ratio != 100){ 18 | fpath.dataset <- paste0('~/1-Datasets/Nature_2022_MI/ScriptForCCC/scRNA_Data_sampling/', dataset, '_', ratio , '_ser.rds') 19 | }else if(grepl('Slide', dataset) & ratio == 100){ 20 | fpath.dataset <- paste0('~/1-Datasets/MouseEmbryo_GSE166692/ScriptForCCC/DataForCCC/', dataset, '_mm_ser.rds') 21 | }else if(grepl('Slide', dataset) & ratio != 100){ 22 | fpath.dataset <- paste0('~/1-Datasets/MouseEmbryo_GSE166692/ScriptForCCC/scRNA_Data_Sampling/', dataset, '_', ratio , '_mm_ser.rds') 23 | }else if(grepl('pbmc', dataset) & ratio == 100){ 24 | fpath.dataset <- paste0('~/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/DataForCCC/', dataset, '_ser.rds') 25 | }else if(grepl('pbmc', dataset) & ratio != 100){ 26 | fpath.dataset <- paste0('~/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/scRNA_Data_sampling/', dataset, '_', ratio , '_ser.rds') 27 | } 28 | ser <- readRDS(fpath.dataset) 29 | 30 | cells <- dim(ser)[[2]] 31 | })%>% unlist() 32 | 33 | tmp 34 | }) 35 | names(record_cells) <- datasets 36 | record_cells <- do.call(rbind, record_cells) 37 | colnames(record_cells) <- paste0('S', ratios) 38 | record_cells <- as.data.frame(record_cells) 39 | record_cells <- tibble::rownames_to_column(record_cells, 'datasets') 40 | record_cells <- pivot_longer(record_cells, -datasets, 'SampleRate') 41 | colnames(record_cells)[3] <- 'Cells' 42 | record_cells$class <- paste(record_cells$datasets, record_cells$SampleRate, sep = '_') 43 | record_cells <- record_cells[,-c(1:2)] 44 | 45 | 46 | record_celltypes <- lapply(datasets, function(dataset){ 47 | tmp <- lapply(ratios, function(ratio){ 48 | if(grepl('CID', dataset) & ratio == 100){ 49 | fpath.dataset <- paste0('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/scRNA_Data/', dataset, '_ser.rds') 50 | }else if(grepl('CID', dataset) & ratio != 100){ 51 | fpath.dataset <- paste0('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/scRNA_Data_sampling/', dataset,'_', ratio, '_ser.rds') 52 | }else if(grepl('CK', dataset) & ratio == 100){ 53 | fpath.dataset <- paste0('~/1-Datasets/Nature_2022_MI/ScriptForCCC/scRNA_Data/', dataset, '_ser.rds') 54 | }else if(grepl('CK', dataset) & ratio != 100){ 55 | fpath.dataset <- paste0('~/1-Datasets/Nature_2022_MI/ScriptForCCC/scRNA_Data_sampling/', dataset, '_', ratio , '_ser.rds') 56 | }else if(grepl('Slide', dataset) & ratio == 100){ 57 | fpath.dataset <- paste0('~/1-Datasets/MouseEmbryo_GSE166692/ScriptForCCC/DataForCCC/', dataset, '_mm_ser.rds') 58 | }else if(grepl('Slide', dataset) & ratio != 100){ 59 | fpath.dataset <- paste0('~/1-Datasets/MouseEmbryo_GSE166692/ScriptForCCC/scRNA_Data_Sampling/', dataset, '_', ratio , '_mm_ser.rds') 60 | }else if(grepl('pbmc', dataset) & ratio == 100){ 61 | fpath.dataset <- paste0('~/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/DataForCCC/', dataset, '_ser.rds') 62 | }else if(grepl('pbmc', dataset) & ratio != 100){ 63 | fpath.dataset <- paste0('~/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/scRNA_Data_sampling/', dataset, '_', ratio , '_ser.rds') 64 | } 65 | ser <- readRDS(fpath.dataset) 66 | 67 | celltype <- length(unique(ser$celltype)) 68 | })%>% unlist() 69 | 70 | tmp 71 | }) 72 | names(record_celltypes) <- datasets 73 | record_celltypes <- do.call(rbind, record_celltypes) 74 | colnames(record_celltypes) <- paste0('S', ratios) 75 | record_celltypes <- as.data.frame(record_celltypes) 76 | record_celltypes <- tibble::rownames_to_column(record_celltypes, 'datasets') 77 | record_celltypes <- pivot_longer(record_celltypes, -datasets, 'SampleRate') 78 | colnames(record_celltypes)[3] <- 'Celltypes' 79 | record_celltypes$class <- paste(record_celltypes$datasets, record_celltypes$SampleRate, sep = '_') 80 | record_celltypes <- record_celltypes[,-c(1:2)] 81 | 82 | record <- merge(record_cells, record_celltypes, by = 'class') 83 | record <- separate(record, class, c('datasets', 'ratio'), sep = '_') 84 | saveRDS(record, file = './Data/Step6_LRBenchSampling/DatasetsRecord.rds') 85 | -------------------------------------------------------------------------------- /Step5_BenchBasedCAGEProteomic/ScriptForCAGE.R: -------------------------------------------------------------------------------- 1 | rm(list = ls());gc() 2 | suppressMessages(library(ROCR)) 3 | suppressMessages(library(tidyverse)) 4 | source('./Script/Step5_BenchBasedCAGEProteomic/function.R') 5 | source('./Script/Step3_MIForLRBench/function.R') 6 | set.seed(123) 7 | 8 | #process cage data 9 | if(F){ 10 | celltype <- c('CD14+CD16- Monocytes', 'CD14-CD16+ Monocytes', 'CD19+ B cells', 11 | 'Dendritic Monocyte Immature derived', 'Dendritic Plasmacytoid', 12 | 'CD4+CD25-CD45RA- memory conventional T cells', 13 | 'CD4+CD25-CD45RA+ naive conventional T cells', 14 | 'CD8+ T cells', 'NK cells') 15 | cage_all <- data.table::fread('~/1-Datasets/Fantom5_CAGE/ExpressionGenes.txt', header = TRUE, sep = '\t') 16 | cage_all <- as.data.frame(cage_all) 17 | cage_all <- cage_all[ , c(1, which(colnames(cage_all) %in% celltype))] 18 | cage_all <- tibble::column_to_rownames(cage_all, 'ApprovedSymbol') 19 | colnames(cage_all) <- c('CD14Mono', 'CD16Mono', 'B', 20 | 'naiveCD4T', 'memoryCD4T', 'CD8T', 21 | 'mDC', 'pDC', 'NK') 22 | cage_all <- as.matrix(cage_all) 23 | cage <- lapply(1:dim(cage_all)[[2]], function(i){ 24 | x <- cage_all[,i] 25 | genes <- rownames(cage_all)[which(x>10)] 26 | paste(genes, colnames(cage_all)[i], sep = '_') 27 | }) 28 | cage <- unlist(cage) 29 | cage_genes <- list(all = rownames(cage_all), genes_ct = cage) 30 | saveRDS(cage_genes, file = './Data/Step5_BenchBasedCAGEProteomic/cagedata.rds') 31 | } 32 | 33 | #calculate index 34 | if(F){ 35 | cage_genes <- readRDS('./Data/Step5_BenchBasedCAGEProteomic/cagedata.rds') 36 | 37 | datasets <- c('pbmc4k', 'pbmc6k', 'pbmc8k') 38 | 39 | for (data in datasets) { 40 | fpath <- paste0('./Data/Step1_LRPredictionResult/', data) 41 | result.files <- list.files(fpath, full.names = TRUE) 42 | result.files <- result.files[-which(grepl('cellinker', result.files))] 43 | result_index <- lapply(result.files, function(result.file){ 44 | print(result.file) 45 | if(grepl('Domino', result.file) | grepl('scMLnet', result.file)){ 46 | result <- readRDS(paste0(result.file, '/result1.rds')) 47 | }else{ 48 | result <- readRDS(paste0(result.file, '/result.rds')) 49 | } 50 | 51 | result <- result$result 52 | 53 | if((dim(result)[[1]] != 0) & 'LRscore' %in% colnames(result)){ 54 | if(grepl('RNAMagnet', result.file)){ 55 | genes <- readRDS(paste0('~/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/', data, '_hs2mm.rds')) 56 | result <- mm2hs(result, genes) 57 | } 58 | 59 | result$receptor_ct <- lapply(1:dim(result)[[1]], function(i){ 60 | receptor <- result$Receptor[i] 61 | if(grepl('&', receptor)){ 62 | receptor <- unlist(strsplit(receptor, '&')) 63 | } 64 | receiver <- result$Receiver[i] 65 | tmp <- paste(receptor, receiver, sep = '_') 66 | label <- ifelse(all(tmp %in% cage_genes$genes_ct), TRUE, FALSE) 67 | if(!label){ 68 | label <- ifelse(all(receptor %in% cage_genes$all), FALSE, NA) 69 | } 70 | label 71 | }) %>% unlist() 72 | 73 | result$ligand_ct <- lapply(1:dim(result)[[1]], function(i){ 74 | ligand <- result$Ligand[i] 75 | if(grepl('&', ligand)){ 76 | ligand <- unlist(strsplit(ligand, '&')) 77 | } 78 | sender <- result$Sender[i] 79 | tmp <- paste(ligand, sender, sep = '_') 80 | label <- ifelse(all(tmp %in% cage_genes$genes_ct), TRUE, FALSE) 81 | if(!label){ 82 | label <- ifelse(all(ligand %in% cage_genes$all), FALSE, NA) 83 | } 84 | label 85 | }) %>% unlist() 86 | 87 | result <- result[which(!is.na(result$receptor_ct)), ] 88 | result <- result[which(!is.na(result$ligand_ct)),] 89 | 90 | result$label <- lapply(1:dim(result)[[1]], function(i){ 91 | all(result$ligand_ct[i] & result$receptor_ct[i]) 92 | }) %>% unlist() 93 | 94 | result$sr <- paste(result$Sender, result$Receiver, sep = '_') 95 | result <- result[, c('LRscore', 'label', 'sr')] 96 | result <- split(result, result$sr) 97 | 98 | res_index <- lapply(result, function(res){ 99 | index <- get_evaluate_metrics(as.numeric(res$LRscore), res$label) 100 | index <- index$perf_metrics 101 | }) 102 | res_index <- do.call(rbind, res_index) 103 | res_index <- as.data.frame(res_index) 104 | 105 | }else{ 106 | res_index <- NA 107 | } 108 | res_index 109 | }) 110 | 111 | names(result_index) <- substring(result.files, 40) 112 | result_index[which(is.na(result_index))] <- NULL 113 | result_index <- do.call(rbind, result_index) 114 | result_index <- result_index[which(rowSums(result_index)!=0), ] 115 | result_index <- tibble::rownames_to_column(result_index, 'temp') 116 | result_index <- tidyr::separate(result_index, temp, c('methods', 'sr'), '\\.') 117 | result_index$dataset <- data 118 | result_index 119 | saveRDS(result_index, file = paste0('./Data/Step5_BenchBasedCAGEProteomic/', data, '_cage.rds')) 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /Step1_LRPredictionResult/RunScript.R: -------------------------------------------------------------------------------- 1 | suppressMessages(library(Seurat)) 2 | source('../Script/Step0_LRToolsFunction/Step1_LRToolsFunction.R') 3 | set.seed(123) 4 | 5 | args <- commandArgs() 6 | tools <- args[6] 7 | print(tools) 8 | sampleID <- args[7] 9 | print(sampleID) 10 | species <- args[8] 11 | print(species) 12 | 13 | if(grepl('CID', sampleID) & tools == 'RNAMagnet'){ 14 | fpath.dataset <- paste0('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/scRNA_Data_Mm/', 15 | sampleID, '_ser.rds') 16 | dir.output <- paste0('~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data/Step1_LRPredictionResult/NG_BC_', sampleID) 17 | }else if(grepl('CID', sampleID) & !(tools == 'RNAMagnet')){ 18 | fpath.dataset <- paste0('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/scRNA_Data/', 19 | sampleID, '_ser.rds') 20 | dir.output <- paste0('~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data/Step1_LRPredictionResult/NG_BC_', sampleID) 21 | }else if(grepl('CK', sampleID) & tools == 'RNAMagnet'){ 22 | fpath.dataset <- paste0('~/1-Datasets/Nature_2022_MI/ScriptForCCC/scRNA_Data_Mm/', 23 | sampleID, '_ser.rds') 24 | dir.output <- paste0('~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data/Step1_LRPredictionResult/N_MI_', sampleID) 25 | }else if(grepl('CK', sampleID) & !(tools == 'RNAMagnet')){ 26 | fpath.dataset <- paste0('~/1-Datasets/Nature_2022_MI/ScriptForCCC/scRNA_Data/', 27 | sampleID, '_ser.rds') 28 | dir.output <- paste0('~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data/Step1_LRPredictionResult/N_MI_', sampleID) 29 | }else if(grepl('Slide', sampleID)){ 30 | if(tools %in% c('CellPhoneDB2', 'CellPhoneDB3', 'CellTalker', 'NicheNet', 'iTALK', 'ICELLNET', 'scMLnet','NATMI', 'cell2cell')){ 31 | fpath.dataset <- paste0('~/1-Datasets/MouseEmbryo_GSE166692/ScriptForCCC/DataForCCC/', 32 | sampleID, '_hs_ser.rds') 33 | }else{ 34 | fpath.dataset <- paste0('~/1-Datasets/MouseEmbryo_GSE166692/ScriptForCCC/DataForCCC/', 35 | sampleID, '_mm_ser.rds') 36 | } 37 | dir.output <- paste0('~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data/Step1_LRPredictionResult/MouseEmbryo_', sampleID) 38 | }else if(grepl('pbmc', sampleID) & !(tools == 'RNAMagnet')){ 39 | fpath.dataset <- paste0('~/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/DataForCCC/', sampleID, '_ser.rds') 40 | dir.output <- paste0('~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data/Step1_LRPredictionResult/', sampleID) 41 | }else if(grepl('pbmc', sampleID) & (tools == 'RNAMagnet')){ 42 | fpath.dataset <- paste0('~/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/DataForCCC/', sampleID, '_mm_ser.rds') 43 | dir.output <- paste0('~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data/Step1_LRPredictionResult/', sampleID) 44 | } 45 | 46 | ser <- readRDS(fpath.dataset) 47 | 48 | if(!dir.exists(dir.output)){ 49 | dir.create(dir.output) 50 | } 51 | 52 | fpath.tools <- paste(dir.output, tools, sep = '/') 53 | if(!dir.exists(fpath.tools)){ 54 | dir.create(fpath.tools) 55 | } 56 | 57 | if(tools == 'cell2cell'){ 58 | result <- cell2cell_function(ser, fpath = fpath.tools, species = species) 59 | }else if(tools == 'CellCall'){ 60 | if(grepl('CK', sampleID) | grepl('pbmc', sampleID)){ 61 | n <- 2 62 | }else if(grepl('CID', sampleID)){ 63 | n <- 3 64 | }else if(grepl('Slide', sampleID)){ 65 | n <- 5 66 | } 67 | result <- CellCall_function(ser, n, species = species) 68 | }else if(tools == 'CellChat'){ 69 | result <- CellChat_function(ser, species = species) 70 | }else if(tools == 'CellPhoneDB2'){ 71 | result <- CellPhoneDB2_function(ser, fpath.tools) 72 | }else if(tools == 'CellPhoneDB3'){ 73 | result <- CellPhoneDB3_function(ser, fpath.tools) 74 | }else if(tools == 'CellTalker'){ 75 | result <- CellTalker_function(ser) 76 | }else if(tools == 'Connectome'){ 77 | result <- Connectome_function(ser, species = species) 78 | }else if(tools == 'CytoTalk'){ 79 | result <- CytoTalk_function(ser, fpath.tools, species = species) 80 | }else if(tools == 'Domino'){ 81 | if(sampleID %in% c('pbmc4k', 'pbmc8k')){ 82 | species <- 'hg38' 83 | }else if(sampleID == 'pbmc6k'){ 84 | species <- 'hg19' 85 | }else if(grepl('Slide' ,sampleID)){ 86 | species <- 'mm10' 87 | }else{ 88 | species <- 'hg38' 89 | } 90 | result <- Domino_function(ser, fpath.tools, species = species) 91 | }else if(tools == 'ICELLNET'){ 92 | result <- ICELLNET_function(ser) 93 | }else if(tools == 'iTALK'){ 94 | result <- iTALK_function(ser) 95 | }else if(tools == 'NATMI'){ 96 | result <- NATMI_function(ser, fpath.tools, species = species) 97 | }else if(tools == 'NicheNet'){ 98 | result <- NicheNet_function(ser, lr = TRUE) 99 | }else if(tools == 'PyMINEr'){ 100 | result <- PyMINEr_function(ser, fpath.tools, species) 101 | }else if(tools == 'RNAMagnet'){ 102 | result <- RNAMagnet_function(ser) 103 | }else if(tools == 'scConnect'){ 104 | result <- scConnect_function(ser, fpath.tools, species) 105 | }else if(tools == 'scMLnet'){ 106 | result <- scMLnet_function(ser) 107 | }else if(tools == 'scSeqComm'){ 108 | result <- scSeqComm_function(ser, species) 109 | }else if(tools == 'SingleCellSignalR'){ 110 | result <- SCSR_function(ser, species) 111 | } 112 | 113 | saveRDS(result, file = paste0(fpath.tools, '/result.rds')) -------------------------------------------------------------------------------- /Step9_LRTBench/Step1_ScriptForBench.R: -------------------------------------------------------------------------------- 1 | rm(list = ls());gc() 2 | setwd('~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data/Step8_LRTPredictionResult/') 3 | library(dplyr) 4 | library(ROCR) 5 | library(ggplot2) 6 | library(ggsci) 7 | library(ggpubr) 8 | library(tidyverse) 9 | source('../../Script/Step5_BenchBasedCAGEProteomic/function.R') 10 | set.seed(123) 11 | 12 | 13 | ########### 14 | ## color ## 15 | ########### 16 | 17 | scales::show_col(pal_aaas(palette = "default", alpha = 0.6)(10)) 18 | mycolors_aaas <- pal_aaas(palette = "default", alpha = 0.6)(10) 19 | 20 | mycolor_software <- mycolors_aaas[c(1:5)] 21 | names(mycolor_software) <- c('CytoTalk', "NicheNet", "MISTy", "HoloNet", "stMLnet") 22 | scales::show_col(mycolor_software) 23 | 24 | ##################### 25 | ## load all result ## 26 | ##################### 27 | 28 | if(F){ 29 | data_dirs <- list.files('.', full.names = TRUE) 30 | all_result <- lapply(data_dirs, function(data_dir){ 31 | print(data_dir) 32 | methods_dirs <- list.files(data_dir, full.names = TRUE) 33 | methods <- list.files(data_dir) 34 | used_methods <- c('CytoTalk', 'HoloNet', 'MISTy', 'NicheNet', 'stMLnet') 35 | index <- which(methods %in% used_methods) 36 | methods_dirs <- methods_dirs[index] 37 | methods <- methods[index] 38 | data_result <- lapply(seq(methods_dirs), function(i){ 39 | print(i) 40 | res_files <- list.files(methods_dirs[i], '_result.rds', full.names = TRUE) 41 | if(length(res_files)>0){ 42 | res <- lapply(res_files, function(res_file){ 43 | res_temp <- readRDS(res_file) 44 | if(dim(res_temp)[[1]] != 0){ 45 | res_temp$methods <- methods[i] 46 | }else{ 47 | res_temp <- NA 48 | } 49 | res_temp 50 | }) 51 | names(res) <- list.files(methods_dirs[i], '_result.rds') %>% gsub('_result.rds', '', .) 52 | res[which(is.na(res))] <- NULL 53 | }else{ 54 | res <- NA 55 | } 56 | res 57 | }) 58 | names(data_result) <- methods 59 | data_result[which(is.na(data_result))] <- NULL 60 | data_result 61 | }) 62 | names(all_result) <- list.files('.') 63 | saveRDS(all_result, file = 'all_result.rds') 64 | } 65 | 66 | setwd('~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data') 67 | source('../Script/Step9_LRTBench/function.R') 68 | all_result <- readRDS('./Step8_LRTPredictionResult/all_result.rds') 69 | 70 | ############### 71 | ## Calculate ## 72 | ############### 73 | 74 | result_index <- lapply(names(all_result), function(data){ 75 | print(data) 76 | data_res <- all_result[[data]] 77 | 78 | if(grepl('CID', data)){ 79 | 80 | ## groundtrue 81 | degs_ls <- readRDS('./Step0_SharedInfo/LRTBench/BC_celline/degs_ls_p.rds') 82 | 83 | methods_res <- lapply(data_res, function(x){x$CancerEpithelial}) 84 | methods_res[unlist(lapply(methods_res, is.null))] <- NULL 85 | 86 | result <- getallmetrics(methods_res, degs_ls) 87 | result$celltype <- 'CancerEpithelial' 88 | 89 | result1 <- getallrecord(methods_res, degs_ls) 90 | saveRDS(result1, file = paste0('./Step8_LRTBenchResult/record/', data, '.rds')) 91 | 92 | }else if(grepl('control', data)){ 93 | 94 | ## groundtrue 95 | degs_ls <- readRDS('~/1-Datasets/CellLinesDatasets/ProjectForLRTBench/result/degs_ls_p.rds') 96 | used <- which(grepl('GSE206947', names(degs_ls)) | 97 | grepl('GSE181575', names(degs_ls)) | 98 | grepl('GSE123018', names(degs_ls))) 99 | degs_ls_used <- degs_ls[used] 100 | 101 | methods_res <- lapply(data_res, function(x){x$Fibroblast}) 102 | methods_res[unlist(lapply(methods_res, is.null))] <- NULL 103 | 104 | result <- getallmetrics(methods_res, degs_ls_used) 105 | result$celltype <- 'Fibroblast' 106 | 107 | result1 <- getallrecord(methods_res, degs_ls_used) 108 | saveRDS(result1, file = paste0('./Step8_LRTBenchResult/record/', data, '.rds')) 109 | 110 | }else if(grepl('UKF', data)){ 111 | 112 | ## groundtrue 113 | degs_ls <- readRDS('~/1-Datasets/CellLinesDatasets/ProjectForLRTBench/result/degs_ls_p.rds') 114 | 115 | methods_res_macro <- lapply(data_res, function(x){x$macrophages}) 116 | methods_res_macro[unlist(lapply(methods_res_macro, is.null))] <- NULL 117 | 118 | methods_res_mal <- lapply(data_res, function(x){x$Malignant}) 119 | methods_res_mal[unlist(lapply(methods_res_mal, is.null))] <- NULL 120 | 121 | used <- which(grepl('GSE69104', names(degs_ls)) & grepl('TAM', names(degs_ls))) 122 | degs_ls_used <- degs_ls[used] 123 | result_macro <- getallmetrics(methods_res_macro, degs_ls_used) 124 | result_macro1 <- getallrecord(methods_res_macro, degs_ls_used) 125 | saveRDS(result_macro1, file = paste0('./Step8_LRTBenchResult/record/', data, '_macro.rds')) 126 | 127 | used <- which(grepl('GSE140145', names(degs_ls))| 128 | grepl('GSE116414', names(degs_ls))) 129 | degs_ls_used <- degs_ls[used] 130 | result_mal <- getallmetrics(methods_res_mal, degs_ls_used) 131 | result_mal1 <- getallrecord(methods_res_mal, degs_ls_used) 132 | saveRDS(result_mal1, file = paste0('./Step8_LRTBenchResult/record/', data, '_mal.rds')) 133 | 134 | result_macro$celltype <- 'macrophages' 135 | result_mal$celltype <- 'malignant' 136 | result <- rbind(result_macro, result_mal) 137 | 138 | } 139 | result 140 | }) 141 | names(result_index) <- names(all_result) 142 | saveRDS(result_index,file = './Step8_LRTBenchResult/result_index.rds') 143 | -------------------------------------------------------------------------------- /Step4_SIRSIForLRBench/function.R: -------------------------------------------------------------------------------- 1 | IDTransform <- function(data, method, result){ 2 | if(dim(result)[1]!=0){ 3 | if(grepl('CID', data) & method == 'RNAMagnet'){ 4 | genes <- readRDS('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/hs2mm_genes.rds') 5 | result <- mm2hs(result, genes) 6 | }else if(grepl('CK', data) & method == 'RNAMagnet'){ 7 | genes <- readRDS('~/1-Datasets/Nature_2022_MI/ScriptForCCC/hs2mm_genes.rds') 8 | result <- mm2hs(result, genes) 9 | }else if(grepl('Slide', data) & 10 | method %in% c('CellPhoneDB2', 'CellPhoneDB3', 'CellTalker', 'NATMI', 11 | 'ICELLNET', 'NicheNet', 'scMLnet', 'iTALK', 'cell2cell')){ 12 | genes <- readRDS('~/1-Datasets/MouseEmbryo_GSE166692/sciSpaceData/mm2hs.rds') 13 | result <- hs2mm(result, genes) 14 | } 15 | } 16 | return(result) 17 | } 18 | 19 | CalSI_1 <- function(a, b) { 20 | intersection = length(intersect(unique(a), unique(b))) 21 | #union = length(a) + length(b) - intersection 22 | union <- min(length(unique(a)), length(unique(b))) 23 | return (intersection/union) 24 | } 25 | 26 | CalSI_2 <- function(data){ 27 | methods <- c('CellPhoneDB2', 'CellPhoneDB3', 'CellTalker', 'Connectome', 28 | 'NATMI', 'ICELLNET', 'scConnect', 'CellChat', 'SingleCellSignalR', 29 | 'CellCall', 'scSeqComm', 'NicheNet', 'Domino', 'scMLnet', 'iTALK', 30 | 'cell2cell', 'RNAMagnet', 'PyMINEr', 'CytoTalk') 31 | 32 | # get result path 33 | if(grepl('CID', data)){ 34 | result.path <- paste0('./Data/Step1_LRPredictionResult/NG_BC_', data) 35 | }else if(grepl('CK', data)){ 36 | result.path <- paste0('./Data/Step1_LRPredictionResult/N_MI_', data) 37 | }else if(grepl('Slide', data)){ 38 | result.path <- paste0('./Data/Step1_LRPredictionResult/MouseEmbryo_', data) 39 | } 40 | 41 | SI_res <- lapply(methods, function(method1){ 42 | print(method1) 43 | if(method1 == 'scMLnet' | method1 == 'Domino'){ 44 | result1 <- readRDS(paste0(result.path, '/', method1, '/result1.rds')) 45 | }else{ 46 | result1 <- readRDS(paste0(result.path, '/', method1, '/result.rds')) 47 | } 48 | 49 | result1 <- result1$result 50 | result1 <- IDTransform(data, method1, result1) 51 | temp.si <- lapply(methods, function(method2){ 52 | if(method2 == 'scMLnet' | method2 == 'Domino'){ 53 | result2 <- readRDS(paste0(result.path, '/', method2, '/result1.rds')) 54 | }else{ 55 | result2 <- readRDS(paste0(result.path, '/', method2, '/result.rds')) 56 | } 57 | 58 | result2 <- result2$result 59 | result2 <- IDTransform(data, method2, result2) 60 | if(dim(result1)[1]==0 | dim(result2)[1]==0){ 61 | tmp.si <- NA 62 | }else{ 63 | tmp.si <- CalSI_1(as.character(result1$all), as.character(result2$all)) 64 | } 65 | 66 | tmp.si 67 | }) 68 | temp.si <- data.frame(SI = unlist(temp.si), row.names = methods) 69 | temp.si 70 | }) 71 | SI_res <- do.call(cbind, SI_res) 72 | colnames(SI_res) <- methods 73 | #SI_res <- SI_res[!apply(SI_res, 1, function(x){all(is.na(x))}), ] 74 | #SI_res <- SI_res[, !apply(SI_res, 2, function(x){all(is.na(x))})] 75 | return(SI_res) 76 | } 77 | 78 | CalRSI <- function(data){ 79 | # remove cell2cell 80 | methods <- c('CellPhoneDB2', 'CellPhoneDB3', 'CellTalker', 'Connectome', 81 | 'NATMI', 'ICELLNET', 'scConnect', 'CellChat', 'SingleCellSignalR', 82 | 'CellCall', 'scSeqComm', 'NicheNet', 'Domino', 'scMLnet', 'iTALK', 83 | 'cell2cell', 'RNAMagnet', 'PyMINEr', 'CytoTalk') 84 | 85 | # get result path 86 | if(grepl('CID', data)){ 87 | result.path <- paste0('./Data/Step1_LRPredictionResult/NG_BC_', data) 88 | }else if(grepl('CK', data)){ 89 | result.path <- paste0('./Data/Step1_LRPredictionResult/N_MI_', data) 90 | }else if(grepl('Slide', data)){ 91 | result.path <- paste0('./Data/Step1_LRPredictionResult/MouseEmbryo_', data) 92 | } 93 | 94 | RSI_res <- lapply(methods, function(method1){ 95 | print(paste0('Method1: ', method1)) 96 | if(method1 == 'scMLnet' | method1 == 'Domino'){ 97 | result1 <- readRDS(paste0(result.path, '/', method1, '/result1.rds')) # recode the LR score 98 | }else{ 99 | result1 <- readRDS(paste0(result.path, '/', method1, '/result.rds')) 100 | } 101 | 102 | result1 <- as.data.frame(result1$result) 103 | result1$LRscore <- as.numeric(result1$LRscore) 104 | result1 <- IDTransform(data, method1, result1) 105 | if(method1 == 'CytoTalk'){ 106 | result1$rank <- rank(result1$LRscore) 107 | }else{ 108 | result1$rank <- rank(-result1$LRscore) 109 | } 110 | 111 | temp.rsi <- lapply(methods, function(method2){ 112 | print(paste0('Method2: ', method2)) 113 | if(method2 == 'scMLnet' | method2 == 'Domino'){ 114 | result2 <- readRDS(paste0(result.path, '/', method2, '/result1.rds')) 115 | }else{ 116 | result2 <- readRDS(paste0(result.path, '/', method2, '/result.rds')) 117 | } 118 | 119 | result2 <- as.data.frame(result2$result) 120 | result2$LRscore <- as.numeric(result2$LRscore) 121 | result2 <- IDTransform(data, method2, result2) 122 | if(method2 == 'CytoTalk'){ 123 | result2$rank <- rank(result2$LRscore) 124 | }else{ 125 | result2$rank <- rank(-result2$LRscore) 126 | } 127 | 128 | if(dim(result1)[1]==0 | dim(result2)[1]==0){ 129 | tmp.rsi <- NA 130 | }else{ 131 | overlap.lr <- intersect(result1$all, result2$all) 132 | if(length(overlap.lr)!=0){ 133 | rank1 <- result1[which(result1$all %in% overlap.lr),]$rank/dim(result1)[1] 134 | rank2 <- result2[which(result2$all %in% overlap.lr), ]$rank/dim(result2)[1] 135 | mean.rank <- mean(abs(rank1-rank2)) 136 | tmp.rsi <- 1-mean.rank 137 | }else{tmp.rsi <- 0} 138 | } 139 | tmp.rsi 140 | }) 141 | temp.rsi <- data.frame(RSI = unlist(temp.rsi), row.names = methods) 142 | temp.rsi 143 | }) 144 | RSI_res <- do.call(cbind, RSI_res) 145 | colnames(RSI_res) <- methods 146 | #RSI_res <- RSI_res[!apply(RSI_res, 1, function(x){all(is.na(x))}), ] 147 | #RSI_res <- RSI_res[, !apply(RSI_res, 2, function(x){all(is.na(x))})] 148 | 149 | return(RSI_res) 150 | } 151 | -------------------------------------------------------------------------------- /Step5_BenchBasedCAGEProteomic/ScriptForProteomics.R: -------------------------------------------------------------------------------- 1 | rm(list = ls());gc() 2 | suppressMessages(library(data.table)) 3 | suppressMessages(library(tidyverse)) 4 | suppressMessages(library(ROCR)) 5 | source('./Script/Step3_MIForLRBench/function.R') 6 | source('./Script/Step8_BenchBasedCAGEProteomic/function.R') 7 | set.seed(123) 8 | 9 | # process proteomics data 10 | if(F){ 11 | proteinGroup <- fread('~/1-Datasets/Proteomics_28/Data/proteinGroups.txt') 12 | pep <- proteinGroup[, c(7,549:816)] 13 | rm(proteinGroup); gc() 14 | 15 | pep <- pep[which(pep$Gene.names != ''), ] 16 | # pep 17 | if(F){ 18 | colnames(pep) <- gsub('Peptides.', '', colnames(pep)) 19 | pep <- as.data.frame(pep) 20 | pep <- pep[, c(1, which(grepl('steady-state', colnames(pep))))] 21 | pep <- pep[, which(!grepl('Library_single', colnames(pep)))] 22 | colnames(pep) <- gsub(' \\(steady-state\\)', '', colnames(pep)) 23 | 24 | colnames(pep)[1] <- 'genes' 25 | pep <- tidyr::separate_rows(pep, genes, sep = ';') 26 | pep <- aggregate(.~genes, data = pep, FUN=mean) %>% 27 | tibble::column_to_rownames(., 'genes') %>% 28 | as.matrix() %>% t() %>% as.data.frame() 29 | 30 | pep$celltype <- gsub('_[0-9]+', '', rownames(pep)) 31 | pep <- aggregate(.~celltype, data = pep, FUN = mean) %>% 32 | tibble::column_to_rownames(., 'celltype') %>% 33 | as.matrix() %>% t() %>% as.data.frame() 34 | 35 | celltype <- c('MO.classical', 'MO.nonclassical', 'mDC', 'pDC', 36 | 'B.naive', 'B.memory', 37 | 'T8.naive', 'T8.CM', 'T8.EM', 'T8.EMRA', 38 | 'T4.CM', 'T4.EM', 'T4.naive', 39 | 'NK.dim', 'NK.bright', 'Thrombocyte' 40 | ) 41 | 42 | pep <- pep[ , which(colnames(pep) %in% celltype)] 43 | 44 | pep <- as.matrix(pep) %>% t() %>% as.data.frame() 45 | pep$celltype <- c('B', 'B', 'mDC', 'CD14Mono', 'CD16Mono', 46 | 'NK', 'NK', 'pDC', 'memoryCD4T', 'memoryCD4T', 47 | 'naiveCD4T', 'CD8T', 'CD8T', 'CD8T', 'CD8T', 'Platelet') 48 | pep <- aggregate(.~celltype, data = pep, FUN = mean) %>% 49 | tibble::column_to_rownames(., 'celltype') %>% 50 | as.matrix() %>% t() %>% as.data.frame() 51 | 52 | pep_genes <- lapply(1:dim(pep)[[2]], function(i){ 53 | x <- pep[,i] 54 | genes <- rownames(pep)[which(x>=2)] 55 | paste(genes, colnames(pep)[i], sep = '_') 56 | }) 57 | pep_genes <- unlist(pep_genes) 58 | pep_genes_list <- list(all = rownames(pep), genes_ct = pep_genes) 59 | saveRDS(pep_genes_list, 60 | file = '~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data/Step5_BenchBasedCAGEProteomic/pepdata.rds') 61 | } 62 | } 63 | 64 | rm(list = ls());gc() 65 | #calculate index -- peptides 66 | if(F){ 67 | pep_genes <- readRDS('./Data/Step8_BenchBasedCAGEProteomic/pepdata.rds') 68 | 69 | datasets <- c('pbmc4k', 'pbmc6k', 'pbmc8k') 70 | 71 | for (data in datasets) { 72 | fpath <- paste0('./Data/Step1_LRPredictionResult/', data) 73 | result.files <- list.files(fpath, full.names = TRUE) 74 | result.files <- result.files[-which(grepl('cellinker', result.files))] 75 | result_index <- lapply(result.files, function(result.file){ 76 | print(result.file) 77 | if(grepl('scMLnet', result.file) | grepl('Domino', result.file)){ 78 | result <- readRDS(paste0(result.file, '/result1.rds')) 79 | }else{ 80 | result <- readRDS(paste0(result.file, '/result.rds')) 81 | } 82 | result <- result$result 83 | 84 | if((dim(result)[[1]] != 0) & 'LRscore' %in% colnames(result)){ 85 | if(grepl('RNAMagnet', result.file)){ 86 | genes <- readRDS(paste0('~/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/', data, '_hs2mm.rds')) 87 | result <- mm2hs(result, genes) 88 | } 89 | 90 | result$receptor_ct <- lapply(1:dim(result)[[1]], function(i){ 91 | receptor <- result$Receptor[i] 92 | if(grepl('&', receptor)){ 93 | receptor <- unlist(strsplit(receptor, '&')) 94 | } 95 | receiver <- result$Receiver[i] 96 | tmp <- paste(receptor, receiver, sep = '_') 97 | label <- ifelse(all(tmp %in% pep_genes$genes_ct), TRUE, FALSE) 98 | if(!label){ 99 | label <- ifelse(all(receptor %in% pep_genes$all), FALSE, NA) 100 | } 101 | label 102 | }) %>% unlist() 103 | 104 | result$ligand_ct <- lapply(1:dim(result)[[1]], function(i){ 105 | ligand <- result$Ligand[i] 106 | if(grepl('&', ligand)){ 107 | ligand <- unlist(strsplit(ligand, '&')) 108 | } 109 | sender <- result$Sender[i] 110 | tmp <- paste(ligand, sender, sep = '_') 111 | label <- ifelse(all(tmp %in% pep_genes$genes_ct), TRUE, FALSE) 112 | if(!label){ 113 | label <- ifelse(all(ligand %in% pep_genes$all), FALSE, NA) 114 | } 115 | label 116 | }) %>% unlist() 117 | 118 | result <- result[which(!is.na(result$receptor_ct)), ] 119 | result <- result[which(!is.na(result$ligand_ct)),] 120 | 121 | result$label <- lapply(1:dim(result)[[1]], function(i){ 122 | all(result$ligand_ct[i] & result$receptor_ct[i]) 123 | }) %>% unlist() 124 | 125 | result$sr <- paste(result$Sender, result$Receiver, sep = '_') 126 | result <- result[, c('LRscore', 'label', 'sr')] 127 | result <- split(result, result$sr) 128 | 129 | res_index <- lapply(result, function(res){ 130 | index <- get_evaluate_metrics(as.numeric(res$LRscore), res$label) 131 | index <- index$perf_metrics 132 | }) 133 | res_index <- do.call(rbind, res_index) 134 | res_index <- as.data.frame(res_index) 135 | 136 | }else{ 137 | res_index <- NA 138 | } 139 | res_index 140 | }) 141 | 142 | names(result_index) <- substring(result.files, 40) 143 | result_index[which(is.na(result_index))] <- NULL 144 | result_index <- do.call(rbind, result_index) 145 | result_index <- result_index[which(rowSums(result_index)!=0), ] 146 | result_index <- tibble::rownames_to_column(result_index, 'temp') 147 | result_index <- tidyr::separate(result_index, temp, c('methods', 'sr'), '\\.') 148 | result_index$dataset <- data 149 | result_index 150 | saveRDS(result_index, file = paste0('./Data/Step5_BenchBasedCAGEProteomic/', data, '_pep.rds')) 151 | } 152 | } 153 | -------------------------------------------------------------------------------- /Step9_LRTBench/function.R: -------------------------------------------------------------------------------- 1 | getallmetrics <- function(methods_res, degs_ls){ 2 | result <- lapply(methods_res, function(res){ 3 | degs.tmp <- lapply(names(degs_ls), function(degs_names){ 4 | 5 | key <- strsplit(degs_names,'_')[[1]][2] 6 | type <- ifelse(grepl('KO',degs_names),'Receptor','Ligand') 7 | 8 | res$regulon <- as.character(res$regulon) 9 | res$target <- as.character(res$target) 10 | res$value <- as.numeric(res$value) 11 | 12 | if(grepl('NicheNet', res$methods[1]) & type == 'Receptor'){ 13 | if(key == 'AXL'){ 14 | score <- res[which(res$regulon=='GAS6'),] 15 | }else if(key == 'CXCR4'){ 16 | score <- res[which(res$regulon=='CXCL12'),] 17 | }else if(key == 'NRP1'){ 18 | score <- res[which(res$regulon=='VEGFA'),] 19 | }else if(key == 'CSF1R'){ 20 | score <- res[which(res$regulon=='CSF1'),] 21 | }else if(key == 'FGFR1'){ 22 | score <- res[which(res$regulon=='FGF1'),] 23 | }else if(key == 'ALK'){ 24 | score <- res[which(res$regulon %in% c('ALKAL1', 'ALKAL2')),] 25 | if(dim(score)[[1]]!=0){ 26 | score <- aggregate(value~target, score, sum) 27 | } 28 | } 29 | }else{ 30 | score <- res %>% filter(regulon == key & type == type) 31 | } 32 | 33 | if(grepl('GSE181575',degs_names)){ 34 | neg.genes <- setdiff(score$target, names(degs_ls[[degs_names]])) 35 | isDEGs <- rep(FALSE, length(neg.genes)) 36 | names(isDEGs) <- neg.genes 37 | degs_ls_used <- append(degs_ls[[degs_names]], isDEGs) 38 | }else{ 39 | degs_ls_used <- degs_ls[[degs_names]] 40 | } 41 | 42 | genes <- intersect(score$target, names(degs_ls_used)) 43 | label <- degs_ls_used[genes] 44 | pred <- score$value[match(genes,score$target)] 45 | res.tmp <- get_evaluate_metrics(pred,label) 46 | res.tmp <- res.tmp$perf_metrics 47 | 48 | if(length(names(table(label))) == 2){ 49 | AUPRCRatios <- res.tmp[2]/(as.numeric(table(label)['TRUE'])/length(label)) 50 | names(AUPRCRatios) <- 'AUPRCRatios' 51 | }else{ 52 | AUPRCRatios <- NA 53 | names(AUPRCRatios) <- 'AUPRCRatios' 54 | } 55 | res.tmp <- append(res.tmp, AUPRCRatios) 56 | res.tmp 57 | 58 | }) 59 | names(degs.tmp) <- names(degs_ls) 60 | degs.tmp <- do.call(rbind, degs.tmp) %>% 61 | as.data.frame() %>% 62 | tibble::rownames_to_column(., 'celllines') 63 | degs.tmp 64 | }) 65 | result <- do.call(rbind, result) %>% as.data.frame() %>% 66 | tibble::rownames_to_column(., 'methods') 67 | result$methods <- gsub('\\.[0-9]+', '', result$methods) 68 | return(result) 69 | } 70 | 71 | getallrecord <- function(methods_res, degs_ls){ 72 | result1 <- lapply(methods_res, function(res){ 73 | degs.tmp <- lapply(names(degs_ls), function(degs_names){ 74 | 75 | key <- strsplit(degs_names,'_')[[1]][2] 76 | type <- ifelse(grepl('KO',degs_names),'Receptor','Ligand') 77 | 78 | res$regulon <- as.character(res$regulon) 79 | res$target <- as.character(res$target) 80 | res$value <- as.numeric(res$value) 81 | 82 | if(grepl('NicheNet', res$methods[1]) & type == 'Receptor'){ 83 | if(key == 'AXL'){ 84 | score <- res[which(res$regulon=='GAS6'),] 85 | }else if(key == 'CXCR4'){ 86 | score <- res[which(res$regulon=='CXCL12'),] 87 | }else if(key == 'NRP1'){ 88 | score <- res[which(res$regulon=='VEGFA'),] 89 | }else if(key == 'CSF1R'){ 90 | score <- res[which(res$regulon=='CSF1'),] 91 | }else if(key == 'FGFR1'){ 92 | score <- res[which(res$regulon=='FGF1'),] 93 | }else if(key == 'ALK'){ 94 | score <- res[which(res$regulon %in% c('ALKAL1', 'ALKAL2')),] 95 | if(dim(score)[[1]]!=0){ 96 | score <- aggregate(value~target, score, sum) 97 | } 98 | } 99 | }else{ 100 | score <- res %>% filter(regulon == key & type == type) 101 | } 102 | 103 | if(grepl('GSE181575',degs_names)){ 104 | neg.genes <- setdiff(score$target, names(degs_ls[[degs_names]])) 105 | isDEGs <- rep(FALSE, length(neg.genes)) 106 | names(isDEGs) <- neg.genes 107 | degs_ls_used <- append(degs_ls[[degs_names]], isDEGs) 108 | }else{ 109 | degs_ls_used <- degs_ls[[degs_names]] 110 | } 111 | 112 | genes <- intersect(score$target, names(degs_ls_used)) 113 | label <- degs_ls_used[genes] 114 | pred <- score$value[match(genes,score$target)] 115 | 116 | res.tmp <- cbind(label, pred) %>% as.data.frame() %>% tibble::rownames_to_column(., 'genes') 117 | }) 118 | names(degs.tmp) <- names(degs_ls) 119 | degs.tmp 120 | }) 121 | return(result1) 122 | } 123 | 124 | RunTimeMemRecord <- function(wd, data, methods){ 125 | # get result path 126 | result.path <- paste0(wd, data) 127 | 128 | dataset_Record <- lapply(methods, function(method){ 129 | print(method) 130 | 131 | record <- read.table(paste0(result.path, '/', method, '/', 'TimeMemRecord.txt'), 132 | header = FALSE, sep = '\t', fill = TRUE) 133 | record <- record[, 2, drop = FALSE] 134 | if(dim(record)[1]==24){ 135 | record <- record[-1, , drop = FALSE] 136 | } 137 | record <- record[c(2,3,4,5,10),, drop = FALSE] 138 | 139 | 140 | # Linux_time: user+system time 141 | user_time <- as.numeric(gsub('User time \\(seconds\\): ', '', record[1,])) 142 | sys_time <- as.numeric(gsub('System time \\(seconds\\): ', '', record[2,])) 143 | time_linux <- user_time+sys_time 144 | 145 | # Clock_time 146 | clock_time <- gsub('Elapsed \\(wall clock\\) time \\(h:mm:ss or m:ss\\): ', '', record[4,]) 147 | if(stringr::str_count(clock_time, ':') == 1){ 148 | min_time <- as.numeric(unlist(stringr::str_split(clock_time, ':'))[1]) 149 | min_time <- min_time*60 150 | sec_time <- as.numeric(unlist(stringr::str_split(clock_time, ':'))[2]) 151 | clock_time <- min_time+sec_time; rm(sec_time, min_time) 152 | }else if(stringr::str_count(clock_time, ':') == 2){ 153 | h_time <- as.numeric(unlist(stringr::str_split(clock_time, ':'))[1]) 154 | h_time <- h_time*60*60 155 | min_time <- as.numeric(unlist(stringr::str_split(clock_time, ':'))[2]) 156 | min_time <- min_time*60 157 | sec_time <- as.numeric(unlist(stringr::str_split(clock_time, ':'))[3]) 158 | clock_time <- h_time+min_time+sec_time; rm(sec_time, min_time, h_time) 159 | } 160 | 161 | # Max memory usage | CPU_used 162 | mem_linux <- gsub('Maximum resident set size \\(kbytes\\): ', '', record[5,]) 163 | mem_linux <- round(as.numeric(mem_linux)/1024/1024, 2) 164 | cpu_linux <- gsub('Percent of CPU this job got: ', '', record[3,]) 165 | 166 | return(data.frame(clock_time = clock_time, linux_time = time_linux, 167 | max_memory = mem_linux, cpu_linux = cpu_linux)) 168 | }) 169 | names(dataset_Record) <- methods 170 | return(dataset_Record) 171 | } -------------------------------------------------------------------------------- /Step3_MIForLRBench/function.R: -------------------------------------------------------------------------------- 1 | EvaIndex1_1 <- function(CloDist, norm.data, result, cp, choose, logic){ 2 | result.tmp <- result[which(result$sr == cp), ] 3 | if(!logic){ 4 | cp.1 <- unlist(stringr::str_split(cp, "_"))[1] 5 | cp.2 <- unlist(stringr::str_split(cp, "_"))[2] 6 | cp <- paste(cp.2, cp.1, sep = "_") 7 | } 8 | 9 | if(choose == "close"){ 10 | distant.tmp <- CloDist[[cp]]$close 11 | }else if(choose == 'distant'){ 12 | distant.tmp <- CloDist[[cp]]$distant 13 | } 14 | 15 | tmp.result <- lapply(1:dim(result.tmp)[1], function(n){ 16 | #print(n) 17 | ligands <- result.tmp[n,"Ligand"] 18 | ligands <- unlist(stringr::str_split(ligands, "&")) 19 | 20 | receptors <- result.tmp[n, "Receptor"] 21 | receptors <- unlist(stringr::str_split(receptors, "&")) 22 | 23 | tmp.lr <- expand.grid(ligands, receptors) 24 | colnames(tmp.lr) <- c('ligand', 'receptor') 25 | 26 | temp.result <- lapply(1:dim(tmp.lr)[1], function(m){ 27 | ligand <- tmp.lr[m, "ligand"] 28 | receptor <- tmp.lr[m, "receptor"] 29 | 30 | if((ligand %in% rownames(norm.data)) & (receptor %in% rownames(norm.data))){ 31 | if(logic){ 32 | value.1 <- norm.data[ligand, distant.tmp$cell.1] 33 | value.2 <- norm.data[receptor, distant.tmp$cell.2] 34 | }else{ 35 | value.1 <- norm.data[ligand, distant.tmp$cell.2] 36 | value.2 <- norm.data[receptor, distant.tmp$cell.1] 37 | } 38 | 39 | if(sum(value.1)!=0 & sum(value.2)!=0){ 40 | tmp.sender.mat <- data.frame(value.1 = value.1, barcode.1 = names(value.1)) 41 | tmp.reciever.mat <- data.frame(value.2 = value.2, barcode.2 = names(value.2)) 42 | data.tmp = cbind(tmp.sender.mat, tmp.reciever.mat) 43 | data.tmp <- data.tmp[,c("value.1", "value.2")] 44 | rm(value.1, value.2, tmp.reciever.mat, tmp.sender.mat) 45 | 46 | ## calculate the mutual information 47 | data.disc <- infotheo::discretize(data.tmp) 48 | tmp.mi <- infotheo::mutinformation(data.disc[,1],data.disc[,2]) 49 | 50 | ## calculate the Pearson Correlation Coefficient 51 | pcc.tmp <- cor.test(data.tmp[,1], data.tmp[,2], method = "pearson") 52 | tmp.pcc <- pcc.tmp$estimate[[1]] 53 | tmp.pcc.p <- pcc.tmp$p.value 54 | tmp <- c(tmp.mi, tmp.pcc, tmp.pcc.p) 55 | 56 | tmp 57 | }else{ 58 | tmp.mi <- NA 59 | tmp.pcc <- NA 60 | tmp.pcc.p <- NA 61 | tmp <- c(tmp.mi, tmp.pcc, tmp.pcc.p) 62 | tmp <- as.data.frame(t(tmp)) 63 | tmp 64 | } 65 | }else{ 66 | tmp.mi <- NA 67 | tmp.pcc <- NA 68 | tmp.pcc.p <- NA 69 | tmp <- c(tmp.mi, tmp.pcc, tmp.pcc.p) 70 | tmp <- as.data.frame(t(tmp)) 71 | tmp 72 | } 73 | 74 | tmp 75 | 76 | }) 77 | names(temp.result) <- paste(tmp.lr$ligand, tmp.lr$receptor, sep = "_") 78 | #temp.result[which(is.na(temp.result))] <- NULL 79 | temp.result <- do.call(rbind, temp.result) 80 | temp.result <- as.data.frame(temp.result) 81 | temp.result <- tibble::rownames_to_column(temp.result, 'lr') 82 | 83 | temp.result 84 | }) 85 | 86 | bench.result <- do.call(rbind, tmp.result) 87 | 88 | bench.result 89 | } 90 | 91 | EvaIndex1_2 <- function(CloDist, norm.data, result){ 92 | result.bench <- lapply(unique(result$sr), function(cp){ 93 | print(cp) 94 | if(cp %in% names(CloDist)){ 95 | close.bench <- EvaIndex1_1(CloDist, norm.data, result, cp, "close", TRUE) 96 | distant.bench <- EvaIndex1_1(CloDist, norm.data, result, cp, "distant", TRUE) 97 | identical(distant.bench$lr, close.bench$lr) 98 | tmp.result <- cbind(close.bench, distant.bench) 99 | tmp.result <- tmp.result[,-5] 100 | colnames(tmp.result) <- c("lr", "closeMI", "closePCC", "closePCCPvalue", 101 | "distantMI", "distantPCC","distantPCCPvalue") 102 | }else{ 103 | close.bench <- EvaIndex1_1(CloDist, norm.data, result, cp, "close", FALSE) 104 | distant.bench <- EvaIndex1_1(CloDist, norm.data, result, cp, "distant", FALSE) 105 | tmp.result <- cbind(close.bench, distant.bench) 106 | tmp.result <- tmp.result[,-5] 107 | colnames(tmp.result) <- c("lr","closeMI", "closePCC", "closePCCPvalue", 108 | "distantMI", "distantPCC","distantPCCPvalue") 109 | 110 | } 111 | tmp.result <- as.data.frame(tmp.result) 112 | tmp.result 113 | }) 114 | names(result.bench) <- unique(result$sr) 115 | result.bench 116 | } 117 | 118 | 119 | mm2hs <- function(result, genes){ 120 | genes <- genes[, c('mouse', 'human')] 121 | 122 | result <- merge(result, genes, by.x = 'Ligand', by.y = 'mouse', all.x = TRUE) 123 | multi <- which(is.na(result$human) & grepl('&', result$Ligand)) 124 | if(length(multi)>0){ 125 | result$human[multi] <- unlist(lapply(stringr::str_split(result$Ligand[multi], '&'), function(x){ 126 | tmp <- unlist(lapply(seq(x), function(y){genes$human[which(genes$mouse %in% x[y])]})) 127 | paste(tmp, collapse = '&') 128 | })) 129 | } 130 | result <- result[!(is.na(result$human) & !grepl('&', result$Ligand)), ] 131 | result$Ligand <- NULL 132 | colnames(result)[which(colnames(result) == 'human')] <- 'Ligand' 133 | 134 | result <- merge(result, genes, by.x = 'Receptor', by.y = 'mouse', all.x = TRUE) 135 | multi <- which(is.na(result$human) & grepl('&', result$Receptor)) 136 | if(length(multi)>0){ 137 | result$human[multi] <- unlist(lapply(stringr::str_split(result$Receptor[multi], '&'), function(x){ 138 | tmp <- unlist(lapply(seq(x), function(y){genes$human[which(genes$mouse %in% x[y])]})) 139 | paste(tmp, collapse = '&') 140 | })) 141 | } 142 | result <- result[!(is.na(result$human) & !grepl('&', result$Receptor)), ] 143 | result$Receptor <- NULL 144 | colnames(result)[which(colnames(result) == 'human')] <- 'Receptor' 145 | result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_') 146 | 147 | return(result) 148 | } 149 | 150 | hs2mm <- function(result, genes){ 151 | genes <- genes[, c('mouse', 'human')] 152 | 153 | result <- merge(result, genes, by.x = 'Ligand', by.y = 'human', all.x = TRUE) 154 | multi <- which(is.na(result$mouse) & grepl('&', result$Ligand)) 155 | if(length(multi)>0){ 156 | result$mouse[multi] <- unlist(lapply(stringr::str_split(result$Ligand[multi], '&'), function(x){ 157 | tmp <- unlist(lapply(seq(x), function(y){genes$mouse[which(genes$human %in% x[y])]})) 158 | paste(tmp, collapse = '&') 159 | })) 160 | } 161 | result <- result[!(is.na(result$mouse) & !grepl('&', result$Ligand)), ] 162 | result$Ligand <- NULL 163 | colnames(result)[which(colnames(result) == 'mouse')] <- 'Ligand' 164 | 165 | result <- merge(result, genes, by.x = 'Receptor', by.y = 'human', all.x = TRUE) 166 | multi <- which(is.na(result$mouse) & grepl('&', result$Receptor)) 167 | if(length(multi)>0){ 168 | result$mouse[multi] <- unlist(lapply(stringr::str_split(result$Receptor[multi], '&'), function(x){ 169 | tmp <- unlist(lapply(seq(x), function(y){genes$mouse[which(genes$human %in% x[y])]})) 170 | paste(tmp, collapse = '&') 171 | })) 172 | } 173 | result <- result[!(is.na(result$mouse) & !grepl('&', result$Receptor)), ] 174 | result$Receptor <- NULL 175 | colnames(result)[which(colnames(result) == 'mouse')] <- 'Receptor' 176 | result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_') 177 | 178 | return(result) 179 | } 180 | 181 | 182 | # further handle the results of MI 183 | Eval1Process <- function(result.path){ 184 | Eval1 <- readRDS(result.path) 185 | Eval1[which(is.na(Eval1))] <- NULL 186 | Eval1 <- lapply(Eval1, function(methods_eval){ 187 | perc_temp <- lapply(methods_eval, function(perc_eval){ 188 | perc_tmp <- do.call(rbind, perc_eval) 189 | perc_tmp <- tibble::rownames_to_column(perc_tmp, 'sr') 190 | perc_tmp$sr <- gsub('\\.[0-9]+', '', perc_tmp$sr) 191 | perc_tmp 192 | }) 193 | names(perc_temp) <- c(10, 20, 30, 40) 194 | perc_temp <- do.call(rbind, perc_temp) 195 | perc_temp <- tibble::rownames_to_column(perc_temp, 'perc') 196 | perc_temp$perc <- gsub('\\.[0-9]+', '', perc_temp$perc) 197 | perc_temp 198 | }) 199 | Eval1 <- do.call(rbind, Eval1) 200 | Eval1 <- tibble::rownames_to_column(Eval1, 'methods') 201 | Eval1$methods <- gsub('\\.[0-9]+', '', Eval1$methods) 202 | allna <- apply(Eval1[, c("closeMI", "closePCC", "distantMI", "distantPCC")], 203 | 1, FUN = function(x){any(is.na(x))}) 204 | Eval1 <- Eval1[which(!allna), ] 205 | Eval1$all <- paste(Eval1$methods, Eval1$perc, Eval1$sr, Eval1$lr, sep = '_') 206 | Eval1 <- dplyr::distinct(Eval1, all, .keep_all = TRUE) 207 | 208 | return(Eval1) 209 | } 210 | 211 | # Calculate the DLRC index of each method —— p.value(whether significant?) + median + weight 212 | EvalIndex_DLRC <- function(Eval1Result){ 213 | Eval1Result$class <- paste(Eval1Result$methods, Eval1Result$perc, sep = '_') 214 | 215 | temp <- lapply(unique(Eval1Result$class), function(class){ 216 | result_sub <- Eval1Result[which(Eval1Result$class == class),] 217 | medianMI_close <- median(result_sub$closeMI) 218 | medianMI_distant <- median(result_sub$distantMI) 219 | medianPCC_close <- median(abs(result_sub$closePCC)) 220 | medianPCC_distant <- median(abs(result_sub$distantPCC)) 221 | PCC_pval <- wilcox.test (result_sub$closePCC, result_sub$distantPCC, 222 | alternative = "greater")$p.value 223 | MI_pval <- wilcox.test (result_sub$closeMI, result_sub$distantMI, 224 | alternative = "greater")$p.value 225 | return(data.frame(medianMI_close = medianMI_close, medianMI_distant = medianMI_distant, MI_pval = MI_pval, 226 | medianPCC_close = medianPCC_close, medianPCC_distant = medianPCC_distant, PCC_pval = PCC_pval)) 227 | }) 228 | names(temp) <- unique(Eval1Result$class) 229 | temp <- do.call(rbind, temp) 230 | temp <- tibble::rownames_to_column(temp, 'class') 231 | temp <- tidyr::separate(temp, class, c('methods', 'perc'), '_') 232 | #temp <- temp[-which(temp$perc == 40),] 233 | 234 | tmp <- lapply(unique(temp$methods), function(method){ 235 | result_sub <- temp[which(temp$methods == method), ] 236 | if(dim(result_sub)[1]==4){ 237 | result_sub$MIIndex <- (0.5-as.numeric(result_sub$perc)*0.01)* 238 | ifelse(result_sub$MI_pval<0.05, 1, 0)* 239 | (result_sub$medianMI_close - result_sub$medianMI_distant) 240 | 241 | result_sub$PCCIndex <- (0.5-as.numeric(result_sub$perc)*0.01)* 242 | ifelse(result_sub$PCC_pval<0.05, 1, 0)* 243 | (result_sub$medianPCC_close - result_sub$medianPCC_distant) 244 | return(data.frame(MIIndex = sum(result_sub$MIIndex), PCCIndex = sum(result_sub$PCCIndex))) 245 | } 246 | }) 247 | names(tmp) <- unique(temp$methods) 248 | tmp <- do.call(rbind, tmp) 249 | tmp <- tibble::rownames_to_column(tmp, 'methods') 250 | tmp$MIRank <- rank(-tmp$MIIndex) 251 | tmp$PCCRank <- rank(-tmp$PCCIndex) 252 | return(tmp) 253 | } 254 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![](https://github.com/SunXQlab/CCC-Benchmark/blob/main/Graphic-abstrct.png) 2 | ## Introduction 3 | 4 | We benchmark two types of CCC inference methods, one type of methods predict LR pairs based on scRNA-seq data, and another type of methods that can predict ligand/receptor-targets regulations. 5 | 6 | For the first benchmark, we evaluated the accuracy, stability and usability of 18 LR inference methods. **In term of accuracy**, paired ST datasets, CAGE expression/Proteomics data and sampled scRNA-seq datasets were used to benchmark the 18 methods. Firstly, 11 scRNA-seq datasets were used as input for methods to predict intercellular communication and the two defined similarity index (SI, modified Jaccard index) and rank-based similarity index (RSI) were used to compare the similarity of LR pairs predicted by methods.Furthermore, we benchmark the 18 methods using 11 paired ST datasets with the hypothesis that the values of mutual information (MI) of LR pairs are greater in the close group than that in the distant group. In addition, three PBMC datasets from 10X Genomics website were used as input for methods to predict LR pairs and CAGE expression/Proteomics data were used as pseudo gold standards to benchmark the 18 methods. **In term of stability**, we ramdomly sampled different ratios of cells in all the scRNA-seq, resulting 70 sampled datasets and 14 original datasets as input for methods. We calculated the Jaccard index of the LR pairs predicted based sampled datasets and original datasets and a stability value was defined to test the robustness of methods to sampling rates of scRNA-seq data. **In term of usability**, we recorded the running time and maximum memory usage of methods in all the 84 scRNA-seq datasets. 7 | 8 | For the second benchmark, 8 ST datasets were used as the input for 5 LR-Targets inference tools to predict ligand/receptor-targets regulations, and the cell line perturbation datasets were used for evaluation, involving knockout/mutant conditions for 5 receptors, and treatment conditions for 10 ligands. And the differentially expressed genes (DEGs) in each cell line perturbation dataset, were used as the ground truth of ligand/receptor-targets regulations. The score of ligand/receptor-targets predicted by different tools were compared to the differential expression status (DGEs or not DEGs) of corresponding targets to calculate AUROC and AUPRC. In addition, we also record the running time and maximum memory usage of methods in all the ST datasets. 9 | 10 | ## Workflow 11 | ![](https://github.com/SunXQlab/CCC-Benchmark/blob/main/Workflow-figure.png) 12 | - **Step0\_LRToolsFunction** contains the R/Python/Shell scripts that package the running code of 19 methods with Seurat objects as input into function. 13 | - **Step1\_LRPredictionResult** contains the R/Shell scripts to run 19 methods for inferring LR pairs from the 14 scRNA-seq datasets. 14 | - **Step2\_PreSTForLRBench** contains the R scripts to get the different ratios (e.g.top 10%, 20%, 30%, 40%) of cell type specific close and distant cell pairs in each dataset for the preparation of the benchmarking using mutual infomation. 15 | - **Step3\_MIForLRBench** contains the R scripts to calculate MI of LR interactions predicted by methods in the different ratios of cell type specific close and distant groups and calculate DLRC index of methods in each dataset. 16 | - **Step4\_SIRSIForLRBench** contains the R scripts to benchmark the similarity (SI and RSI) of the LR interactions predicted by each two methods. 17 | - **Step5\_BenchBasedCAGEProteomic** contains the R scripts to benchmark the 18 LR inference methods using the CAGE expression and proteomics data. 18 | - **Step6\_LRBenchSampling** contains the R/Shell scripts to run the 18 LR inference methods for inferring LR pairs from 70 sampled scRNA-seq datasets. 19 | - **Step7\_LRBenchSamplingBench** contains the R/Shell scripts to calculate Jaccard index between the LR pairs predicted based on the sampled datasets and the original datasets, and record the running time and maximum memory usage of methods in each dataset. 20 | - **Step8\_LRTToolsFunction** contains the R/Python/Shell scripts to run the 5 LR-Target inference methods for predicting ligand/receptor-targets using ST datasets as input. 21 | - **Step9\_LRTBench** contains the R scripts to benchmark the 5 LR-Target inference methods using cell line perturbation datasets for evaluation, and record the running time and maximum memory usage of methods in each dataset. 22 | 23 | ## Datasets 24 | - **scRNA-seq and ST datasets** 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 |
Tissue (Disease)SampleID
(scRNA-seq)
SampleID
(ST)
Literature PMIDDownload URL
(scRNA-seq)
Download URL
(ST)
Evaluation purpose
Heart Tissue (Health)CK357control_P735948637URLURLLR interactions
LR-Target regulations
CK358control_P8
Heart Tissue (ICM)CK368FZ_GT_P19LR interactions
CK162FZ_GT_P4
CK362RZ_P11
Heart Tissue (AMI)CK361IZ_P10
CK161IZ_P3
CK165IZ_BZ_P2
Tumor Tissue
(Breast cancer)
CID44971CID4497134493872URLURLLR interactions
LR-Target regulations
CID4465CID4465
Mouse embryo——Slide1434210887——URLLR interactions
PBMCPBMC4K————URL——LR interactions
PBMC6K————URL——
PBMC8K————URL——
Tumor Tissue
(Gliomas)
——UKF243_T_ST35700707——URLLR-Target interactions
——UKF260_T_ST
——UKF266_T_ST
——UKF334_T_ST
91 | 92 | 93 | - **Cell line perturbation datasets** 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 |
DatasetsLigand/ReceptorTypeConditionCell LineDisease
GSE120268AXLreceptorKnockdownMDA-MB-231Breast Cancer
GSE157680NRP1receptorKnockdownMDA-MB-231
GSE15893CXCR4receptorMutantMDA-MB-231
CXCL12ligandTreatmentMDA-MB-231
GSE160990TGFB1ligandTreatmentMDA-MB-231
GSE36051DLL4(1)ligandTreatmentMCF7
DLL4(2)ligandTreatmentMDA-MB-231
JAG1ligandTreatmentMDA-MB-231
GSE65398IGF1(1)ligandTreatmentMCF7
GSE7561IGF1(2)ligandTreatmentMCF7
GSE69104CSF1RreceptorInhibitTAMsGliomas
GSE116414FGFR1receptorInhibitGSLC
GSE206947EFNB2ligandTreatmentcardiac fibroblastsHealth
GSE181575TGFB1ligandTreatmentcardiac fibroblasts
GSE123018TGFB1ligandTreatmentcardiac fibroblasts
151 | 152 | 153 | ## Tools for inferring intercellular LR pairs 154 | 155 | - CellPhoneDB (Python, version: 3.0.0) 156 | - CellTalker (R, version: 0.0.4.9000) 157 | - Connectome (R, version: 1.0.1) 158 | - NATMI (Python) 159 | - ICELLNET (R, version: 1.0.1) 160 | - scConnect (Python, version: 1.0.3) 161 | - CellChat (R, version: 1.4.0) 162 | - SingleCellSignalR (R, version: 1.2.0) 163 | - CytoTalk (R, version: 0.99.9) 164 | - CellCall (R, version: 0.0.0.9000) 165 | - scSeqComm (R, version: 1.0.0) 166 | - NicheNet (R, version: 1.1.0) 167 | - Domino (R, version: 0.1.1) 168 | - scMLnet (R, version: 0.2.0) 169 | - PyMINEr (Python, version: 0.10.0) 170 | - iTALK (R, version: 0.1.0) 171 | - cell2cell (Python, version: 0.5.10) 172 | - RNAMagnet (R, version: 0.1.0) 173 | 174 | ## Tools for predicting ligand/receptor-targets regulations 175 | 176 | - CytoTalk (R, version: 0.99.9) 177 | - NicheNet (R, version: 1.1.0) 178 | - stMLnet (R, version: 0.1.0) 179 | - MISTy (R, version: 1.3.8) 180 | - HoloNet (Python, version: 0.0.5) 181 | 182 | ## Citation 183 | 184 | Please cite ESICCC as follows: 185 | 186 | Luo J, Deng M, Zhang X, Sun X*. ESICCC as a systematic computational framework for evaluation, selection and integration of cell-cell communication inference methods. Genome Research. 2023. doi: 10.1101/gr.278001.123 187 | 188 | ## Contact 189 | 190 | If you encounter any problems, please contact (sunxq6@mail.sysu.edu.cn). 191 | -------------------------------------------------------------------------------- /Step8_LRTToolsFunction/Step8_LRTToolsFunction.R: -------------------------------------------------------------------------------- 1 | ############## Run in parallel with 10 cores 2 | ## CytoTalk ## Run in R 4.1.0 version 3 | ############## Filter the spots with the number of cells less than 3 4 | CytoTalk_function <- function(ser, fpath, sender = NULL, receiver = NULL, species = 'human'){ 5 | suppressMessages(library(CytoTalk)) 6 | suppressMessages(library(Seurat)) 7 | suppressMessages(library(tidyverse)) 8 | suppressMessages(library(igraph)) 9 | set.seed(123) 10 | 11 | # create conda environment 12 | if(F){ 13 | library(reticulate) # To install and call Python modules from R. 14 | conda_create(envname = "r_reticulate_CytoTalk", python_version = "3.7.3") # Create a new Conda environment to facilitate the Python module installation. 15 | conda_install(envname = "r_reticulate_CytoTalk", "pybind11") # Install two necessary Python modules for correctly compiling and using the "pcst_fast" Python module. 16 | conda_install(envname = "r_reticulate_CytoTalk", "numpy") 17 | conda_install(envname = "r_reticulate_CytoTalk", "git+https://github.com/fraenkel-lab/pcst_fast.git", pip = TRUE) # To install the "pcst_fast" module. 18 | } 19 | 20 | # input 21 | if(T){ 22 | input_fpath <- paste0(fpath, '/input/') 23 | if(!dir.exists(input_fpath)){ 24 | dir.create(input_fpath) 25 | } 26 | 27 | fpath.mat <- paste0(input_fpath, "counts.csv") 28 | fpath.meta <- paste0(input_fpath,"metadata.csv") 29 | 30 | norm.matrix <- as.matrix(GetAssayData(ser, "data", "SCT")) 31 | write.csv(norm.matrix, fpath.mat, quote=F) 32 | 33 | meta.data <- data.frame(rownames(ser@meta.data), ser@meta.data$celltype) 34 | colnames(meta.data) <- NULL 35 | meta.data <- as.matrix(meta.data) 36 | write.csv(meta.data, fpath.meta, quote=F, row.names = FALSE) 37 | rm(norm.matrix, meta.data);gc() 38 | } 39 | 40 | if(species == 'human'){ 41 | pcg = CytoTalk::pcg_human 42 | lrp = CytoTalk::lrp_human 43 | }else{ 44 | pcg = CytoTalk::pcg_mouse 45 | lrp = CytoTalk::lrp_mouse 46 | } 47 | 48 | lst.sc <- read_matrix_with_meta(fpath.mat, fpath.meta) 49 | 50 | sender_ct <- unique(ser$celltype) 51 | remove_ct <- as.character(names(which(table(ser$celltype)<3))) 52 | if (length(remove_ct)>0) { 53 | sender_ct <- sender_ct[-which(sender_ct %in% remove_ct)] 54 | } 55 | if(is.null(receiver)){ 56 | receiver_ct <- unique(ser$celltype) 57 | }else if(!is.null(receiver)){ 58 | receiver_ct <- receiver 59 | } 60 | rm(sender, receiver);gc() 61 | 62 | result <- list() 63 | for (sender in sender_ct) { 64 | for (receiver in receiver_ct[which(receiver_ct!=sender)]) { 65 | tmp <- tryCatch(CytoTalk::run_cytotalk(lst.sc, sender, receiver, cores = 10, 66 | cutoff_a = 0.05, cutoff_b = 0.05, 67 | pcg = pcg, lrp = lrp), 68 | error=function(e){NA} 69 | ) 70 | cp <- paste(sender, receiver, sep = "_") 71 | result[[cp]] <- tmp 72 | } 73 | } 74 | 75 | result[which(is.na(result))] <- NULL 76 | saveRDS(result, file = paste0(fpath, '/', 'result.rds')) 77 | 78 | 79 | for (receiver in receiver_ct) { 80 | result.tmp <- lapply(names(result), function(cp){ 81 | sender <- gsub("_.*", "",cp) 82 | 83 | tmp <- list() 84 | res <- result[[cp]] 85 | if(!is.null(res$pathways)){ 86 | network <- res$pcst$final_network 87 | network <- network[!(network$node1_type == sender & network$node2_type == sender), ] 88 | network <- network[!(network$node1_type == receiver & network$node2_type == sender), ] 89 | network$node1 <- toupper(network$node1) 90 | network$node2 <- toupper(network$node2) 91 | network$node1 <- gsub("ORF", "orf", network$node1) 92 | network$node2 <- gsub("ORF", "orf", network$node2) 93 | LR <- network[which(network$node1_type == sender & network$node2_type == receiver), ] 94 | LR <- paste(LR$node1, LR$node2, sep = "_") 95 | Ligand <- network[which(network$node1_type == sender & network$node2_type == receiver), "node1"] %>% 96 | unique() 97 | Receptor <- network[which(network$node1_type == sender & network$node2_type == receiver), "node2"] %>% 98 | unique() 99 | Target <- c(network[which(network$node1_type == receiver & network$node2_type == receiver), "node1"], 100 | network[which(network$node1_type == receiver & network$node2_type == receiver), "node2"]) %>% 101 | unique() 102 | Target <- setdiff(Target, c(Ligand, Receptor)) 103 | tmp <- list(Edge = network, 104 | LR = LR, 105 | Ligand = Ligand, 106 | Receptor = Receptor, 107 | Target = Target) 108 | }else{ 109 | tmp <- NA 110 | } 111 | tmp 112 | }) 113 | 114 | names(result.tmp) <- names(result); 115 | result.tmp[which(is.na(result.tmp))] <- NULL 116 | 117 | edge_list <- lapply(result.tmp, function(res){ 118 | res <- res$Edge[,c(1,2)] 119 | colnames(res) <- c('from', 'to') 120 | res 121 | }) 122 | edge_df <- do.call(rbind, edge_list) 123 | edge_df <- distinct(edge_df, from, to) 124 | intranetwork <- graph_from_edgelist(as.matrix(edge_df), directed = FALSE) 125 | 126 | Ligand <- lapply(result.tmp, function(res){res$Ligand}) %>% unlist() %>% unique() 127 | Receptor <- lapply(result.tmp, function(res){res$Receptor}) %>% unlist() %>% unique() 128 | Target <- lapply(result.tmp, function(res){res$Target}) %>% unlist() %>% unique() 129 | 130 | distance <- distances(intranetwork, 131 | v = c(Ligand,Receptor), 132 | to = Target) 133 | distance <- reshape2::melt(distance) 134 | distance <- distance[!is.infinite(distance$value),] 135 | colnames(distance) <- c("regulon", 'target',"value") 136 | distance$type <- ifelse(distance$regulon %in% Ligand,"ligand","receptor") 137 | 138 | saveRDS(distance, file = paste0(fpath, "/", receiver, '_result.rds')) 139 | } 140 | } 141 | 142 | ############## 143 | ## NicheNet ## Filter the spots with the number of cells less than 3 144 | ############## 145 | NicheNet_function <- function(ser, fpath, sampleID, sender = NULL, receiver = NULL){ 146 | suppressMessages(library(nichenetr)) 147 | suppressMessages(library(Seurat)) 148 | suppressMessages(library(tidyverse)) 149 | set.seed(123) 150 | 151 | # load data 152 | if(T){ 153 | ligand_target_matrix = readRDS("./Step0_SharedInfo/NicheNet/ligand_target_matrix.rds") 154 | 155 | lr_network = readRDS("./Step0_SharedInfo/NicheNet/lr_network.rds") 156 | 157 | weighted_networks = readRDS("./Step0_SharedInfo/NicheNet/weighted_networks.rds") 158 | weighted_networks_lr = weighted_networks$lr_sig %>% inner_join(lr_network %>% distinct(from,to), by = c("from","to")) 159 | } 160 | 161 | sender_ct <- unique(ser$celltype) 162 | remove_ct <- as.character(names(which(table(ser$celltype)<3))) 163 | if (length(remove_ct)>0) { 164 | sender_ct <- sender_ct[-which(sender_ct %in% remove_ct)] 165 | } 166 | if(is.null(receiver)){ 167 | receiver_ct <- unique(ser$celltype) 168 | }else if(!is.null(receiver)){ 169 | receiver_ct <- receiver 170 | } 171 | rm(sender, receiver);gc() 172 | 173 | for (receiver in receiver_ct) { 174 | # define the sender genes 175 | expressed_genes_sender <- lapply(sender_ct[which(sender_ct != receiver)], function(ct){ 176 | expressed_genes_sender_ct <- get_expressed_genes(ct, ser, pct = 0.05) 177 | expressed_genes_sender_ct 178 | }) 179 | expressed_genes_sender = expressed_genes_sender %>% unlist() %>% unique() 180 | 181 | # define the receiver genes 182 | expressed_genes_receiver = get_expressed_genes(receiver, ser, pct = 0.05) 183 | background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)] 184 | 185 | # define the genes of interest 186 | if(grepl('CID', sampleID)){ 187 | icgs_fpath <- paste0('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/Giotto_result/', sampleID, '_icgs.rds') 188 | }else if(grepl('UKF', sampleID)){ 189 | icgs_fpath <- paste0('~/1-Datasets/CancerCell_2022_Glioma/ScriptForLRTBenchmark/Giotto_result/', sampleID, '_icgs.rds') 190 | }else{ 191 | icgs_fpath <- paste0('~/1-Datasets/Nature_2022_MI/ScriptForCCC/Giotto_result/', sampleID,'_icgs.rds') 192 | } 193 | geneset = readRDS(file = icgs_fpath) 194 | geneset <- geneset[[receiver]] %>% unlist() %>% unique() 195 | geneset = geneset %>% .[. %in% rownames(ligand_target_matrix)] 196 | 197 | # define the potential ligands 198 | ligands = lr_network %>% pull(from) %>% unique() 199 | receptors = lr_network %>% pull(to) %>% unique() 200 | 201 | expressed_ligands = intersect(ligands,expressed_genes_sender) 202 | expressed_receptors = intersect(receptors,expressed_genes_receiver) 203 | 204 | potential_ligands = lr_network %>% 205 | filter(from %in% expressed_ligands & to %in% expressed_receptors) %>% 206 | pull(from) %>% 207 | unique() 208 | 209 | # Perform NicheNet ligand activity analysis 210 | ligand_activities = predict_ligand_activities(geneset = geneset, 211 | background_expressed_genes = background_expressed_genes, 212 | ligand_target_matrix = ligand_target_matrix, 213 | potential_ligands = potential_ligands) 214 | ligand_activities = ligand_activities %>% arrange(-pearson) %>% mutate(rank = rank(desc(pearson))) 215 | 216 | # Get all the ligands for downstreams analysis 217 | best_upstream_ligands = ligand_activities %>% arrange(-pearson) %>% pull(test_ligand) %>% unique() 218 | head(best_upstream_ligands) 219 | 220 | # Get the active ligand-target links 221 | active_ligand_target_links_df = best_upstream_ligands %>% 222 | lapply(get_weighted_ligand_target_links, 223 | geneset = geneset, 224 | ligand_target_matrix = ligand_target_matrix, 225 | n = nrow(ligand_target_matrix)) %>% 226 | bind_rows() %>% drop_na() 227 | colnames(active_ligand_target_links_df) <- c("regulon", 'target',"value") 228 | active_ligand_target_links_df$type <- 'ligand' 229 | 230 | saveRDS(active_ligand_target_links_df, file = paste0(fpath, '/', receiver, '_result.rds')) 231 | } 232 | } 233 | 234 | 235 | ############ 236 | ## MISTy ## Filter the spots with the number of cells less than 3 237 | ############ 238 | MISTy_function <- function(ser, fpath, sampleID, sender = NULL, receiver=NULL){ 239 | # load package 240 | if(T){ 241 | # MISTy 242 | suppressMessages(library(mistyR)) 243 | suppressMessages(library(future)) 244 | suppressMessages(library(future)) 245 | 246 | # Seurat 247 | suppressMessages(library(Seurat)) 248 | suppressMessages(library(Giotto)) 249 | 250 | # data manipulation 251 | suppressMessages(library(Matrix)) 252 | suppressMessages(library(tibble)) 253 | suppressMessages(library(dplyr)) 254 | suppressMessages(library(purrr)) 255 | 256 | # normalization 257 | suppressMessages(library(sctransform)) 258 | 259 | # resource 260 | suppressMessages(library(progeny)) 261 | 262 | # setup parallel execution 263 | options(future.globals.maxSize = 1024^3) 264 | plan(multisession) 265 | set.seed(123) 266 | } 267 | 268 | source('./Step0_SharedInfo/MistyR/code.R') 269 | 270 | if(is.null(receiver)){ 271 | receiver_ct <- unique(ser$celltype) 272 | }else if(!is.null(receiver)){ 273 | receiver_ct <- receiver 274 | } 275 | rm(receiver);gc() 276 | 277 | # run normalization 278 | sct.data <- vst(GetAssayData( 279 | object = ser, 280 | slot = "counts", 281 | assay = "Spatial" 282 | ), 283 | verbosity = 0 284 | ) 285 | 286 | ser[["SCT"]] <- CreateAssayObject(data = sct.data$y) 287 | 288 | gene.expression <- GetAssayData(ser, assay = "SCT") 289 | coverage <- rowSums(gene.expression > 0) / ncol(gene.expression) 290 | slide.markers <- names(which(coverage >= 0.05)) 291 | 292 | Databases <- readRDS('./Step0_SharedInfo/stMLnet/Databases.rds') 293 | ligands <- Databases$LigRec.DB$source %>% unique() %>% .[. %in% slide.markers] 294 | receptors <- Databases$LigRec.DB$target %>% unique() %>% .[. %in% slide.markers] 295 | 296 | # load ICGs 297 | if(grepl('CID', sampleID)){ 298 | icgs_fpath <- paste0('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/Giotto_result/', sampleID, '_icgs.rds') 299 | }else if(grepl('UKF', sampleID)){ 300 | icgs_fpath <- paste0('~/1-Datasets/CancerCell_2022_Glioma/ScriptForLRTBenchmark/Giotto_result/', sampleID, '_icgs.rds') 301 | }else{ 302 | icgs_fpath <- paste0('~/1-Datasets/Nature_2022_MI/ScriptForCCC/Giotto_result/', sampleID,'_icgs.rds') 303 | } 304 | icgs <- readRDS(icgs_fpath) 305 | 306 | celltypes <- ser$celltype %>% as.character() 307 | remove_ct <- as.character(names(which(table(ser$celltype)<3))) 308 | if (length(remove_ct)>0) { 309 | celltypes <- celltypes[-which(celltypes %in% remove_ct)] 310 | } 311 | 312 | geometry <- ser@images$image@coordinates %>% select(., row,col) 313 | 314 | output_fpath <- paste0(fpath, '/output') 315 | if(!dir.exists(output_fpath)){ 316 | dir.create(output_fpath) 317 | } 318 | 319 | ######### 320 | ## run ## 321 | ######### 322 | 323 | for (receiver in receiver_ct) { 324 | ct <- receiver 325 | message(paste0('running jobs:',ct)) 326 | targets <- icgs[[ct]] %>% unlist() %>% unique() 327 | 328 | ############### 329 | ## parameter ## 330 | ############### 331 | 332 | view.assays <- list( 333 | "main" = "SCT", 334 | "ligand" = "SCT" 335 | ) 336 | 337 | # Define features for each view 338 | view.features <- list( 339 | "main" = c(targets,receptors), 340 | "ligand" = ligands 341 | ) 342 | 343 | # Define spatial context for each view 344 | view.types <- list( 345 | "main" = "intra", 346 | "ligand" = "para" 347 | ) 348 | 349 | # Define additional parameters (l in the case of paraview) 350 | view.params <- list( 351 | "main" = NULL, 352 | "ligand" = 10 353 | ) 354 | 355 | # Define specific properties for each view 356 | view.properties <- list( 357 | "main" = ifelse(celltypes == ct,1,0), 358 | "ligand" = ifelse(celltypes != ct,1,0) 359 | ) 360 | 361 | ######### 362 | ## Run ## 363 | ######### 364 | 365 | spot.ids = NULL 366 | out.alias = paste0(output_fpath, "/results_",ct,"_paraview_10") 367 | 368 | # Extracting data 369 | view.data <- map(view.assays, 370 | extract_seurat_data, 371 | geometry = geometry, 372 | visium.slide = ser 373 | ) 374 | str(view.data,max.level = 1) 375 | 376 | # Adding all spots ids in case they are not defined 377 | if (is.null(spot.ids)) { 378 | spot.ids <- rownames(view.data[[1]]) 379 | } 380 | 381 | # First filter the features from the data 382 | view.data.filt <- map2(view.data, view.features, filter_data_features) 383 | str(view.data.filt,max.level = 1) 384 | view.data.filt[[1]][1:4,1:1] 385 | view.data.filt[[2]][1:4,1:1] 386 | 387 | # specific properties: celltype 388 | view.data.spec <- map2(view.data.filt, view.properties, add_specific_properties) 389 | str(view.data.spec,max.level = 1) 390 | view.data.spec[[1]][1:6,1:2] 391 | view.data.spec[[2]][1:6,1:2] 392 | 393 | # Create initial view 394 | views.main <- create_initial_view(view.data.spec[[1]] %>% 395 | rownames_to_column() %>% 396 | filter(rowname %in% spot.ids) %>% 397 | select(-rowname)) 398 | str(views.main,max.level = 2) 399 | 400 | # Create other views 401 | view.names <- names(view.data.spec) 402 | 403 | all.views <- pmap(list( 404 | view.data.filt[-1], 405 | view.types[-1], 406 | view.params[-1], 407 | view.names[-1] 408 | ), 409 | create_default_views, 410 | spot.ids = spot.ids, 411 | geometry = geometry 412 | ) 413 | str(all.views,max.level = 2) 414 | 415 | pline.views <- add_views( 416 | views.main, 417 | unlist(all.views, recursive = FALSE) 418 | ) 419 | 420 | # Run MISTy 421 | run_misty(pline.views, out.alias) 422 | misty.results <- collect_results(out.alias) 423 | 424 | ############ 425 | ## output ## 426 | ############ 427 | 428 | misty_score <- misty.results$importances.aggregated %>% na.omit() 429 | misty_score <- misty_score %>% 430 | select(Predictor,Target,Importance,view) %>% 431 | filter(Target %in% targets, Predictor %in% c(ligands,receptors)) %>% 432 | rename(regulon=Predictor,target=Target,value=Importance,type=view) 433 | misty_score$type <- gsub('_10','',misty_score$type) 434 | misty_score$type <- gsub('intra','receptor',misty_score$type) 435 | 436 | saveRDS(misty_score, paste0(fpath, '/', receiver, '_result.rds')) 437 | } 438 | } 439 | 440 | 441 | ############# 442 | ## stMLnet ## Filter the spots with the number of cells less than 3 443 | ############# 444 | stMLnet_function <- function(ser, fpath, sampleID, sender = NULL, receiver = NULL){ 445 | suppressMessages(library(Seurat)) 446 | suppressMessages(library(stMLnet)) 447 | suppressMessages(library(tidyverse)) 448 | suppressMessages(library(doSNOW)) 449 | set.seed(123) 450 | 451 | sender_ct <- unique(ser$celltype) 452 | if(is.null(receiver)){ 453 | receiver_ct <- unique(ser$celltype) 454 | }else if(!is.null(receiver)){ 455 | receiver_ct <- receiver 456 | } 457 | rm(receiver);gc() 458 | 459 | # load input data 460 | if(T){ 461 | norm.matrix <- as.matrix(GetAssayData(ser, "data", "SCT")) 462 | metadata <- data.frame(Barcode = colnames(ser), Cluster = ser$celltype) 463 | 464 | loc <- ser@images$image@coordinates %>% select(., row,col) 465 | 466 | ex_databases <- readRDS("./Step0_SharedInfo/stMLnet/Databases.rds") 467 | 468 | # load ICGs 469 | if(grepl('CID', sampleID)){ 470 | icgs_fpath <- paste0('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/Giotto_result/', sampleID, '_icgs.rds') 471 | }else if(grepl('UKF', sampleID)){ 472 | icgs_fpath <- paste0('~/1-Datasets/CancerCell_2022_Glioma/ScriptForLRTBenchmark/Giotto_result/', sampleID, '_icgs.rds') 473 | }else{ 474 | icgs_fpath <- paste0('~/1-Datasets/Nature_2022_MI/ScriptForCCC/Giotto_result/', sampleID,'_icgs.rds') 475 | } 476 | ICG_list <- readRDS(icgs_fpath) 477 | 478 | # define highly differentially expressed ligands 479 | if(T){ 480 | remove_ct <- as.character(names(which(table(ser$celltype)<3))) 481 | lig_clusters <- ser@active.ident %>% as.character() %>% unique() 482 | if (length(remove_ct)>0) { 483 | lig_clusters <- lig_clusters[-which(lig_clusters %in% remove_ct)] 484 | } 485 | 486 | if(F){ 487 | ## ligands 488 | ligs_in_db <- ex_databases$LigRec.DB$source %>% unique() 489 | ligs_in_db <- intersect(ligs_in_db, rownames(ser)) 490 | df_markers_ligs <- lapply(lig_clusters, function(cluster){ 491 | print(cluster) 492 | df <- FindMarkers(ser, ident.1 = cluster, features = ligs_in_db, 493 | only.pos = T, min.pct = 0.05) 494 | if(dim(df)[[1]]!=0){ 495 | df$gene <- rownames(df) 496 | df$ident.1 <- cluster 497 | }else{ 498 | df <- NA 499 | } 500 | df 501 | }) 502 | df_markers_ligs[which(is.na(df_markers_ligs))] <- NULL 503 | df_markers_ligs <- do.call(rbind, df_markers_ligs) 504 | Ligs_up_list <- split(df_markers_ligs$gene,df_markers_ligs$ident.1) 505 | str(Ligs_up_list) 506 | } 507 | } 508 | 509 | # define highly differentially expressed receptors 510 | if(T){ 511 | BarCluTable <- data.frame(Barcode = rownames(ser@meta.data), 512 | Cluster = ser@meta.data$celltype) 513 | 514 | ## parameters 515 | expr.ct <- 0.1 516 | pct.ct <- 0.05 517 | 518 | ## receptors in prior database 519 | recs_in_db <- ex_databases$LigRec.DB$target %>% unique() 520 | 521 | ## calculate mean and pct 522 | rec_clusters <- receiver_ct 523 | #clusters <- BarCluTable$Cluster %>% as.character() %>% unique() 524 | 525 | meanExpr_of_LR <- lapply(rec_clusters, function(cluster){ 526 | 527 | cluster.ids <- BarCluTable$Barcode[BarCluTable$Cluster == cluster] 528 | source_mean <- rowMeans(norm.matrix[,cluster.ids]) 529 | names(source_mean) <- rownames(norm.matrix) 530 | source_mean 531 | 532 | }) %>% do.call('cbind',.) %>% as.data.frame() 533 | colnames(meanExpr_of_LR) <- rec_clusters 534 | 535 | pct_of_LR <- lapply(rec_clusters, function(cluster){ 536 | 537 | cluster.ids <- BarCluTable$Barcode[BarCluTable$Cluster == cluster] 538 | dat <- norm.matrix[,cluster.ids] 539 | pct <- rowSums(dat>0)/ncol(dat) 540 | names(pct) <- rownames(norm.matrix) 541 | pct 542 | 543 | }) %>% do.call('cbind',.) %>% as.data.frame() 544 | colnames(pct_of_LR) <- rec_clusters 545 | 546 | Recs_expr_list <- lapply(rec_clusters, function(cluster){ 547 | 548 | recs <- rownames(norm.matrix)[meanExpr_of_LR[,cluster] >= expr.ct & pct_of_LR[,cluster] >= pct.ct] 549 | intersect(recs, recs_in_db) 550 | 551 | }) 552 | names(Recs_expr_list) <- rec_clusters 553 | str(Recs_expr_list) 554 | } 555 | 556 | } 557 | 558 | output_fpath <- paste0(fpath, '/output/') 559 | if(!dir.exists(output_fpath)){ 560 | dir.create(output_fpath) 561 | } 562 | setwd(output_fpath) 563 | 564 | # step1 create mulityayer network 565 | 566 | quan.cutoff <- 0.98 567 | Databases <- ex_databases 568 | Databases$RecTF.DB <- Databases$RecTF.DB %>% 569 | .[.$score > quantile(.$score, quan.cutoff), ] %>% 570 | dplyr::distinct(source, target) 571 | Databases$LigRec.DB <- Databases$LigRec.DB %>% 572 | dplyr::distinct(source, target) %>% 573 | dplyr::filter(target %in% Databases$RecTF.DB$source) 574 | Databases$TFTG.DB <- Databases$TFTG.DB %>% 575 | dplyr::distinct(source, target) %>% 576 | dplyr::filter(source %in% Databases$RecTF.DB$target) 577 | 578 | resMLnet <- runMLnet(ExprMat = norm.matrix, AnnoMat = metadata, Normalize = F, 579 | LigClus = lig_clusters, RecClus = rec_clusters, 580 | logfc.ct = 0.1, pct.ct = 0.05, expr.ct = 0.1, 581 | ProjectName = '2023-1', Databases = Databases, 582 | TGList=ICG_list, RecList=Recs_expr_list) #LigList=Ligs_up_list, 583 | rm(ICG_list, Ligs_up_list, Recs_expr_list, ser);gc() 584 | 585 | # step2 calculate Signal Activity 586 | if(T){ 587 | ## calculate distant 588 | DistMat <- as.matrix(dist(loc)) 589 | colnames(DistMat) <- rownames(loc) 590 | rownames(DistMat) <- rownames(loc) 591 | 592 | ## imputation 593 | exprMat.Impute <- runImputation(exprMat = norm.matrix) 594 | rm(norm.matrix);gc() 595 | 596 | # remove empty list 597 | ex_mulnetlist <- list() 598 | for (i in 1:length(resMLnet$mlnets)) { 599 | mlnets <- resMLnet$mlnets[[i]] 600 | for (j in 1:length(mlnets)) { 601 | mlnet <- mlnets[[j]] 602 | if(nrow(mlnet$LigRec)!=0) ex_mulnetlist[[names(mlnets)[j]]] = mlnet 603 | } 604 | } 605 | rm(i, j, mlnets, mlnet,loc);gc() 606 | 607 | Sender <- NULL 608 | for (receiver in receiver_ct) { 609 | resSigActList_tme <- getSiganlActivity(ExprMat = exprMat.Impute, 610 | DistMat = DistMat, 611 | AnnoMat = metadata, 612 | MulNetList = ex_mulnetlist, 613 | Receiver = receiver, Sender = Sender, 614 | Downsample = FALSE, ProjectName = '2023-1') 615 | } 616 | } 617 | 618 | # step3 getCPSiganlActivity 619 | if(T){ 620 | inputDir <- paste0(getwd(),'/runModel/work_2023-1/') 621 | files <- list.files(inputDir) 622 | 623 | time_ls <- c() 624 | for(f in files){ 625 | 626 | label <- paste(unlist(strsplit(f,'[_.]'))[3:4],collapse = '-') 627 | LRTG_allscore <- readRDS(paste0(getwd(),'/runModel/work_2023-1/',f)) 628 | message(paste0('running jobs: ',label)) 629 | 630 | t1 <- Sys.time() 631 | getSiganlImport(SiganlActivity = LRTG_allscore, Lable = label, ProjectName = '2023-1', 632 | NCores = 1, AutoPara = TRUE, NTrees = 500, NTrys = 10, 633 | TreeMethod = 'variance', NodeSize = 5, NPert = 10) 634 | t2 <- Sys.time() 635 | time_ls <- c(time_ls,paste(signif(t2-t1,4),units(signif(t2-t1,4)),sep = ' ')) 636 | 637 | } 638 | } 639 | 640 | #handle result 641 | files <- list.files('./getPIM/work_2023-1', full.names = TRUE, pattern = 'LRTG_pim_clean_') 642 | for (f in files) { 643 | result <- readRDS(f) 644 | result$pIM <- lapply(result$pIM,function(s){ifelse(s>=0,s,0)}) %>% unlist() %>% as.numeric() 645 | colnames(result) <- c("regulon", 'target',"value", 'type') 646 | saveRDS(result, file = paste0('../', gsub('.rds', '',substring(f, 41)), '_result.rds')) 647 | } 648 | } 649 | 650 | 651 | ############# 652 | ## HoloNet ## 653 | ############# 654 | HoloNet_function <- function(ser, fpath, sampleID, sender = NULL, receiver=NULL){ 655 | suppressMessages(library(Seurat)) 656 | suppressMessages(library(stringr)) 657 | suppressMessages(library(tidyverse)) 658 | suppressMessages(library(tidyr)) 659 | set.seed(123) 660 | 661 | sender_ct <- unique(ser$celltype) 662 | if(is.null(receiver)){ 663 | receiver_ct <- unique(ser$celltype) 664 | }else if(!is.null(receiver)){ 665 | receiver_ct <- receiver 666 | } 667 | rm(receiver);gc() 668 | 669 | input_fpath <- paste0(fpath, '/input/') 670 | if(!dir.exists(input_fpath)){ 671 | dir.create(input_fpath) 672 | } 673 | 674 | output_fpath <- paste0(fpath, '/output/') 675 | if (!dir.exists(output_fpath)) { 676 | dir.create(output_fpath) 677 | } 678 | 679 | ## load ICGs 680 | if(grepl('CID', sampleID)){ 681 | icgs_fpath <- paste0('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/Giotto_result/', sampleID, '_icgs.rds') 682 | }else if(grepl('UKF', sampleID)){ 683 | icgs_fpath <- paste0('~/1-Datasets/CancerCell_2022_Glioma/ScriptForLRTBenchmark/Giotto_result/', sampleID, '_icgs.rds') 684 | }else{ 685 | icgs_fpath <- paste0('~/1-Datasets/Nature_2022_MI/ScriptForCCC/Giotto_result/', sampleID,'_icgs.rds') 686 | } 687 | icgs <- readRDS(icgs_fpath) 688 | goi <- lapply(receiver_ct, function(receiver){ 689 | icgs.tmp <- icgs[[receiver]] 690 | goi.tmp <- unique(unlist(icgs.tmp)) 691 | goi.tmp 692 | }) 693 | goi <- unique(unlist(goi)) 694 | goi_fpath <- paste0(input_fpath, 'goi.csv') 695 | write.csv(goi, file = goi_fpath, quote = FALSE, row.names = FALSE) 696 | 697 | ## run HoloNet 698 | # get gene expression matrix 699 | counts_fpath <- paste0(input_fpath, "raw_count.csv") 700 | raw.count <- as.matrix(GetAssayData(ser, "count", "Spatial")) 701 | write.csv(raw.count, file = counts_fpath, quote=F) 702 | 703 | # load cell type metadata 704 | meta_fpath <- paste0(input_fpath, "meta.csv") 705 | meta <- data.frame(Barcodes = rownames(ser@meta.data), 706 | celltype = ser@meta.data$celltype, 707 | row.names = rownames(ser@meta.data)) 708 | write.csv(meta, file = meta_fpath,quote=F, row.names = FALSE) 709 | 710 | 711 | if(grepl('CID',sampleID)){ 712 | 713 | img_fpath1 <- paste0('~/1-Datasets/NatureGenetics_2021_BC/ST_Data/spatial/', 714 | gsub('A', '', sampleID),'_spatial') 715 | img_fpath <- paste0(input_fpath, 'spatial') 716 | copy_commond <- paste0('cp -r ', img_fpath1, ' ', img_fpath) 717 | system(copy_commond) 718 | h5_file <- './Step0_SharedInfo/HoloNet/filtered_feature_bc_matrix.h5' 719 | copy_commond <- paste0('cp -r ', h5_file, ' ', input_fpath) 720 | system(copy_commond) 721 | command <- paste('sh ../Script/Step6_LRTToolsFunction/HoloNet_shell.sh', 722 | counts_fpath, meta_fpath, input_fpath, 723 | goi_fpath, output_fpath, sep = ' ') 724 | 725 | }else if(grepl('UKF', sampleID)){ 726 | 727 | visumn_fpath <- paste0('/home/ljx/1-Datasets/CancerCell_2022_Glioma/10XVisium/',sampleID, '/outs/') 728 | command <- paste('sh ../Script/Step6_LRTToolsFunction/HoloNet_shell.sh', 729 | counts_fpath, meta_fpath, visumn_fpath, 730 | goi_fpath, output_fpath, sep = ' ') 731 | 732 | }else{ ## MI dataset 733 | 734 | visumn_fpath <- paste0('/home/ljx/1-Datasets/Nature_2022_MI/ST_Data/',sampleID) 735 | command <- paste('sh ../Script/Step6_LRTToolsFunction/HoloNet_shell.sh', 736 | counts_fpath, meta_fpath, visumn_fpath, 737 | goi_fpath, output_fpath, sep = ' ') 738 | 739 | } 740 | 741 | 742 | system(command) 743 | 744 | ## handle result 745 | setwd(fpath) 746 | if(T){ 747 | files <- list.files("./output/") 748 | result <- lapply(files, function(file){ 749 | lr <- str_split(file, "_")[[1]][1] 750 | target <- str_split(file, "_")[[1]][2] %>% str_replace(".csv", "") 751 | 752 | res <- read.csv(paste0("./output/", file)) %>% 753 | pivot_longer(cols = -X, names_to = "receiver", values_to = "value") 754 | colnames(res)[1] <- "sender" 755 | 756 | res <- res[which(res$receiver %in% receiver_ct), ] 757 | res$lr <- rep(lr, n = nrow(res)) 758 | res <- separate(res, lr, c("ligand", "receptor"), ":") 759 | res$target <- rep(target, n = nrow(res)) 760 | res 761 | }) 762 | result <- do.call(rbind, result) 763 | result <- result[which(result$sender!=result$receiver), ] 764 | 765 | for (receiver in unique(result$receiver)) { 766 | icgs_tmp <- unique(unlist(icgs[[receiver]])) 767 | res <- result[which(result$receiver == receiver),] 768 | res <- res[which(res$target %in% icgs_tmp), ] 769 | res$LRT <- paste(res$ligand, res$receptor, res$target, sep = "_") 770 | res <- res[,c(3,7)] 771 | res <- aggregate(value ~ LRT, data = res, sum) 772 | res <- tidyr::separate(res, LRT, c('ligand', 'receptor', 'target'), "_") 773 | res_ligand <- res[, c(1,3,4)] 774 | res_receptor <- res[, c(2,3,4)] 775 | 776 | res_ligand <- aggregate(value ~ ligand+target, data = res_ligand, sum) 777 | res_receptor <- aggregate(value ~ receptor+target, data = res_receptor, sum) 778 | 779 | colnames(res_ligand) <- c("regulon", 'target',"value") 780 | colnames(res_receptor) <- c("regulon", 'target',"value") 781 | res_ligand$type <- 'ligand' 782 | res_receptor$type <- 'receptor' 783 | 784 | res <- rbind(res_ligand, res_receptor) 785 | saveRDS(res, file = paste0('./', receiver, '_result.rds')) 786 | } 787 | } 788 | } 789 | 790 | 791 | -------------------------------------------------------------------------------- /Step0_LRToolsFunction/Step1_LRToolsFunction.R: -------------------------------------------------------------------------------- 1 | rm(list = ls()) 2 | # All the code run in the '/Data' directory 3 | 4 | ####################### 5 | ## SingleCellSignalR ## 6 | ####################### 7 | SCSR_function <- function(ser, species = 'human'){ 8 | suppressMessages(library(SingleCellSignalR)) 9 | suppressMessages(library(Seurat)) 10 | suppressMessages(library(tidyr)) 11 | suppressMessages(library(dplyr)) 12 | suppressMessages(library(lobstr)) 13 | set.seed(123) 14 | 15 | start.time <- proc.time() 16 | 17 | if(species == 'human'){ 18 | species <- "homo sapiens" 19 | }else{ 20 | species <- "mus musculus" 21 | } 22 | 23 | matrix.sc <- GetAssayData(ser, "data", "RNA") 24 | matrix.sc <- as.matrix(matrix.sc) 25 | meta.sc <- data.frame(celltype = ser$celltype, row.names = colnames(ser)) 26 | 27 | # Digitize Labels 28 | i <- 1 29 | for(ct in unique(meta.sc$celltype)){ 30 | meta.sc[which(meta.sc$celltype == ct), "ct_num"] <- i 31 | i <- i+1 32 | } 33 | c.names <- as.character(unique(meta.sc$celltype)) 34 | 35 | 36 | signal <- cell_signaling(data = matrix.sc, genes = rownames(matrix.sc), int.type = "paracrine", 37 | species = species, cluster = meta.sc$ct_num, c.names = c.names, write = FALSE) 38 | inter.net <- inter_network(data = matrix.sc, signal = signal, genes = rownames(matrix.sc), 39 | cluster = meta.sc$ct_num, c.names = c.names, species = species,write = FALSE) 40 | 41 | used.time <- proc.time()-start.time 42 | used.memory <- mem_used() 43 | 44 | result <- inter.net$`full-network` 45 | result$interaction.type <- NULL 46 | result <- separate(result, 'ligand', c('Sender', 'Ligand'), sep = '\\.') 47 | result <- separate(result, 'receptor', c('Receiver','Receptor'), sep = '\\.') 48 | result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_') 49 | result <- distinct(result, all, .keep_all = TRUE) 50 | rownames(result) <- NULL 51 | 52 | result_record <- list(result=result, 53 | used_time = paste0(round(used.time[3]/60,3), ' min'), 54 | used_memory = round(used.memory/1024/1024/1024,3)) 55 | return(result_record) 56 | } 57 | 58 | ############## 59 | ## CellChat ## 60 | ############## 61 | CellChat_function <- function(ser, species = 'human'){ 62 | suppressMessages(library(Seurat)) 63 | suppressMessages(library(CellChat)) 64 | suppressMessages(library(tidyverse)) 65 | suppressMessages(library(lobstr)) 66 | set.seed(123) 67 | 68 | start.time <- proc.time() 69 | 70 | matrix.sc <- GetAssayData(ser, "data", "RNA") 71 | matrix.sc <- as.matrix(matrix.sc) 72 | meta.sc <- data.frame(celltype = ser$celltype, row.names = colnames(ser)) 73 | meta.sc$celltype <- as.character(meta.sc$celltype) 74 | 75 | cellchat <- createCellChat(object = matrix.sc, meta = meta.sc, group.by = "celltype") 76 | if(species == 'human'){ 77 | cellchat@DB <- CellChatDB.human 78 | }else{ 79 | cellchat@DB <- CellChatDB.mouse 80 | } 81 | 82 | cellchat <- subsetData(cellchat) # This step is necessary even if using the whole database 83 | cellchat <- identifyOverExpressedGenes(cellchat) 84 | cellchat <- identifyOverExpressedInteractions(cellchat) 85 | result <- computeCommunProb(cellchat) %>% filterCommunication(.) %>% 86 | subsetCommunication(.) 87 | 88 | used.time <- proc.time()-start.time 89 | used.memory <- mem_used() 90 | 91 | result <- result[,1:5] 92 | colnames(result) <- c('Sender','Receiver','Ligand', 'Receptor', 'LRscore') 93 | result <- result[which(result$Sender != result$Receiver),] 94 | result$Ligand <- gsub('_', '&', result$Ligand) 95 | result$Receptor <- gsub('_', '&', result$Receptor) 96 | result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_') 97 | result <- dplyr::distinct(result, all, .keep_all = TRUE) 98 | 99 | 100 | result_record <- list(result=result, 101 | used_time = paste0(round(used.time[3]/60,3), ' min'), 102 | used_memory = round(used.memory/1024/1024/1024,3)) 103 | return(result_record) 104 | } 105 | 106 | ########### 107 | ## iTALK ## top_genes was set to 50 (default) 108 | ########### Only for human (LR prior databases); the LR prior databases can be designed by users (complex) 109 | iTALK_function <- function(ser){ 110 | suppressMessages(library(iTALK)) 111 | suppressMessages(library(Seurat)) 112 | suppressMessages(library(lobstr)) 113 | set.seed(123) 114 | 115 | start.time <- proc.time() 116 | 117 | matrix.sc <- GetAssayData(ser, "data", "RNA") 118 | matrix.sc <- as.matrix(matrix.sc) 119 | matrix.sc <- as.data.frame(t(matrix.sc)) 120 | matrix.sc$cell_type <- ser$celltype 121 | 122 | # find top 50 percent highly expressed genes 123 | highly_exprs_genes <- rawParse(matrix.sc,stats='mean') 124 | # find the ligand-receptor pairs from highly expressed genes 125 | comm.list<-c('growth factor','other','cytokine','checkpoint') 126 | 127 | result <- NULL 128 | for(comm.type in comm.list){ 129 | res.tmp <- FindLR(highly_exprs_genes,datatype='mean count',comm_type=comm.type) 130 | res.tmp <- res.tmp[order(res.tmp$cell_from_mean_exprs*res.tmp$cell_to_mean_exprs,decreasing=T),] 131 | result<-rbind(result,res.tmp) 132 | } 133 | 134 | used.time <- proc.time()-start.time 135 | used.memory <- mem_used() 136 | 137 | result$LRscore <- result$cell_from_mean_exprs*result$cell_to_mean_exprs 138 | result[,c(3,5,7)] <- NULL 139 | colnames(result) <- c('Ligand', 'Receptor', 'Sender', 'Receiver', 'LRscore') 140 | result <- result[which(result$Receiver != result$Sender), ] 141 | result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_') 142 | result <- dplyr::distinct(result, all, .keep_all = TRUE) 143 | 144 | result_record <- list(result=result, 145 | used_time = paste0(round(used.time[3]/60,3), ' min'), 146 | used_memory = round(used.memory/1024/1024/1024,3)) 147 | return(result_record) 148 | } 149 | 150 | ############## Run in parallel with 10 cores 151 | ## CytoTalk ## Run in R 4.1.0 version 152 | ############## 153 | CytoTalk_function <- function(ser, fpath, sender = NULL, receiver = NULL, species = 'human'){ 154 | suppressMessages(library(CytoTalk)) 155 | suppressMessages(library(Seurat)) 156 | suppressMessages(library(lobstr)) 157 | set.seed(123) 158 | 159 | # create conda environment 160 | if(F){ 161 | library(reticulate) # To install and call Python modules from R. 162 | conda_create(envname = "r_reticulate_CytoTalk", python_version = "3.7.3") # Create a new Conda environment to facilitate the Python module installation. 163 | conda_install(envname = "r_reticulate_CytoTalk", "pybind11") # Install two necessary Python modules for correctly compiling and using the "pcst_fast" Python module. 164 | conda_install(envname = "r_reticulate_CytoTalk", "numpy") 165 | conda_install(envname = "r_reticulate_CytoTalk", "git+https://github.com/fraenkel-lab/pcst_fast.git", pip = TRUE) # To install the "pcst_fast" module. 166 | } 167 | 168 | # input 169 | if(T){ 170 | input_fpath <- paste0(fpath, '/input/') 171 | if(!dir.exists(input_fpath)){ 172 | dir.create(input_fpath) 173 | } 174 | 175 | fpath.mat <- paste0(input_fpath, "counts.csv") 176 | fpath.meta <- paste0(input_fpath,"metadata.csv") 177 | 178 | if(T){ 179 | norm.matrix <- as.matrix(GetAssayData(ser, "data", "RNA")) 180 | write.csv(norm.matrix, fpath.mat, quote=F) 181 | 182 | meta.data <- data.frame(rownames(ser@meta.data), ser@meta.data$celltype) 183 | colnames(meta.data) <- NULL 184 | meta.data <- as.matrix(meta.data) 185 | write.csv(meta.data, fpath.meta, quote=F, row.names = FALSE) 186 | rm(norm.matrix, meta.data, ser);gc() 187 | } 188 | } 189 | 190 | start.time <- proc.time() 191 | 192 | if(species == 'human'){ 193 | pcg = CytoTalk::pcg_human 194 | lrp = CytoTalk::lrp_human 195 | }else{ 196 | pcg = CytoTalk::pcg_mouse 197 | lrp = CytoTalk::lrp_mouse 198 | } 199 | 200 | lst.sc <- read_matrix_with_meta(fpath.mat, fpath.meta) 201 | celltype <- unique(lst.sc$cell_types) 202 | 203 | celltypes <- as.character(unique(ser$celltype)) 204 | comb <- combn(celltypes, 2) 205 | comb <- t(comb) 206 | comb <- as.data.frame(comb) 207 | 208 | result <- list() 209 | for (i in 1:dim(comb)[[1]]) { 210 | tmp <- tryCatch(CytoTalk::run_cytotalk(lst.sc, comb[i, 1], comb[i, 2], cores = 10, 211 | cutoff_a = 0.05, cutoff_b = 0.05, 212 | pcg = pcg, lrp = lrp), 213 | error=function(e){NA} 214 | ) 215 | cp <- paste(comb[i, 1], comb[i, 2], sep = "_") 216 | result[[cp]] <- tmp 217 | } 218 | 219 | used.time <- proc.time()-start.time 220 | used.memory <- mem_used() 221 | 222 | result[which(is.na(result))] <- NULL 223 | 224 | result <- lapply(result, function(res){res$pcst$final_network}) 225 | result <- do.call(rbind, result) 226 | result <- result[which(result$node1_type!=result$node2_type),] 227 | if (species=='human') { 228 | result$node1 <- toupper(result$node1) 229 | result$node2 <- toupper(result$node2) 230 | result$node1 <- gsub("ORF", "orf", result$node1) 231 | result$node2 <- gsub("ORF", "orf", result$node2) 232 | } 233 | result <- result[,c(1:4,10)] 234 | colnames(result) <- c('Ligand', 'Receptor', 'Sender', 'Receiver', 'LRscore') 235 | result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_') 236 | result <- dplyr::distinct(result, all, .keep_all = TRUE) 237 | rownames(result) <- NULL 238 | 239 | result_record <- list(result=result, 240 | used_time = paste0(round(used.time[3]/60,3), ' min'), 241 | used_memory = round(used.memory/1024/1024/1024,3)) 242 | return(result_record) 243 | } 244 | 245 | ############## 246 | ## CellCall ## 247 | ############## 248 | CellCall_function <- function(ser, n=3, species = 'human'){ 249 | suppressMessages(library(Seurat)) 250 | suppressMessages(library(cellcall)) 251 | suppressMessages(library(lobstr)) 252 | set.seed(123) 253 | 254 | start.time <- proc.time() 255 | 256 | if(species == 'human'){ 257 | species <- "Homo sapiens" 258 | }else{ 259 | species <- "Mus musculus" 260 | } 261 | 262 | matrix.sc <- GetAssayData(ser, "counts", "RNA") 263 | matrix.sc <- as.matrix(matrix.sc) 264 | colnames(matrix.sc) <- paste(colnames(matrix.sc), gsub("-", " ", ser$celltype), sep = "_") 265 | matrix.sc <- as.data.frame(matrix.sc) 266 | 267 | mt <- CreateNichConObject(data=matrix.sc, 268 | names.field = n, 269 | names.delim = "_", 270 | source = "UMI", 271 | scale.factor = 10^6, 272 | Org = species, 273 | project = "Microenvironment") 274 | 275 | mt <- TransCommuProfile(object = mt, 276 | pValueCor = 0.05, 277 | CorValue = 0.1, 278 | topTargetCor=1, 279 | p.adjust = 0.05, 280 | use.type="mean", 281 | method="weighted", 282 | IS_core = TRUE, 283 | Org = species) 284 | 285 | result <- mt@data$expr_l_r_log2_scale 286 | 287 | used.time <- proc.time()-start.time 288 | used.memory <- mem_used() 289 | 290 | result <- as.data.frame(result) 291 | result <- tibble::rownames_to_column(result, var = "LR") 292 | result <- tidyr::pivot_longer(result,cols = -LR, names_to = "sr", values_to = "value") 293 | result <- result[which(result$value > 0), ] 294 | result <- tidyr::separate(data = result, col = sr, into = c("Sender", "Receiver"), sep = "-") 295 | result <- tidyr::separate(data = result, col = LR, into = c("Ligand", "Receptor"), sep = "-") 296 | colnames(result)[5] <- 'LRscore' 297 | result <- result[which(result$Sender != result$Receiver), ] 298 | result$Ligand <- gsub(',', '&', result$Ligand) 299 | result$Receptor <- gsub(',', '&', result$Receptor) 300 | result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_') 301 | result <- dplyr::distinct(result, all, .keep_all = TRUE) 302 | 303 | 304 | result_record <- list(result=result, 305 | used_time = paste0(round(used.time[3]/60,3), ' min'), 306 | used_memory = round(used.memory/1024/1024/1024,3)) 307 | return(result_record) 308 | } 309 | 310 | ############### 311 | ## scSeqComm ## S_inter and S_intra were set to 0.8 312 | ############### Run in parallel with 10 cores 313 | scSeqComm_function <- function(ser, species = 'human'){ 314 | suppressMessages(library(scSeqComm)) 315 | suppressMessages(library(Seurat)) 316 | suppressMessages(library(lobstr)) 317 | set.seed(123) 318 | 319 | start.time <- proc.time() 320 | 321 | ## scRNA-seq dataset 322 | matrix.sc <- GetAssayData(ser, "data", "RNA") 323 | cell_cluster <- lapply(unique(ser$celltype), function(ct){ 324 | meta <- ser@meta.data 325 | cells <- rownames(meta)[which(meta$celltype == ct)] 326 | cells 327 | }) 328 | names(cell_cluster) <- unique(ser$celltype) 329 | 330 | if(species == 'human'){ 331 | ## Ligand - receptor pairs 332 | LR_db <- LR_pairs_Jin_2020 333 | 334 | ## Transcriptional regulatory network——combined 335 | if(T){ 336 | TF_TG_db <- c(TF_TG_HTRIdb, TF_TG_RegNetwork_High, TF_TG_RegNetwork_Med_High, 337 | TF_TG_TRRUSTv2, TF_TG_TRRUSTv2_HTRIdb_RegNetwork_High) 338 | tmp.db <- list() 339 | for (tf in unique(names(TF_TG_db))) { 340 | rep <- which(names(TF_TG_db) == tf) 341 | if(length(rep)>1){ 342 | tmp.tg <- unique(unlist(TF_TG_db[rep])) 343 | tmp.db[[tf]] <- tmp.tg 344 | }else{ 345 | tmp.db[[tf]] <- TF_TG_db[[tf]] 346 | } 347 | } 348 | TF_TG_db <- tmp.db 349 | rm(rep, tf, tmp.tg, tmp.db);gc() 350 | } 351 | 352 | ## Receptor-Transcription factor a-priori association from gene signaling networks 353 | TF_PPR_db <- TF_PPR_KEGG_human 354 | }else{ 355 | ## Ligand - receptor pairs 356 | LR_db <- LR_pairs_Jin_2020_mouse 357 | 358 | ## Transcriptional regulatory network——combined 359 | if(T){ 360 | TF_TG_db <- c(TF_TG_RegNetwork_High_mouse, TF_TG_RegNetwork_Med_High_mouse, 361 | TF_TG_TRRUSTv2_mouse, TF_TG_TRRUSTv2_RegNetwork_High_mouse) 362 | tmp.db <- list() 363 | for (tf in unique(names(TF_TG_db))) { 364 | rep <- which(names(TF_TG_db) == tf) 365 | if(length(rep)>1){ 366 | tmp.tg <- unique(unlist(TF_TG_db[rep])) 367 | tmp.db[[tf]] <- tmp.tg 368 | }else{ 369 | tmp.db[[tf]] <- TF_TG_db[[tf]] 370 | } 371 | } 372 | TF_TG_db <- tmp.db 373 | rm(rep, tf, tmp.tg, tmp.db);gc() 374 | } 375 | 376 | ## Receptor-Transcription factor a-priori association from gene signaling networks 377 | TF_PPR_db <- TF_PPR_KEGG_mouse 378 | 379 | } 380 | 381 | ## Intercellular and intracellular signaling analysis 382 | scSeqComm_res <- scSeqComm_analyze(gene_expr = matrix.sc, 383 | cell_group = cell_cluster, 384 | LR_pairs_DB = LR_db, 385 | TF_reg_DB = TF_TG_db, 386 | R_TF_association = TF_PPR_db, 387 | N_cores = 10) 388 | result <- scSeqComm_res$comm_results 389 | result <- dplyr::filter(result, S_inter>0.8, S_intra>0.8) 390 | 391 | used.time <- proc.time()-start.time 392 | used.memory <- mem_used() 393 | 394 | result <- result[which(result$cluster_L != result$cluster_R),] 395 | result <- result[,c(1:2,4:5,9)] 396 | colnames(result) <- c('Ligand', 'Receptor', 'Sender', 'Receiver', 'LRscore') 397 | result$Ligand <- gsub(',', '&', result$Ligand) 398 | result$Receptor <- gsub(',', '&', result$Receptor) 399 | result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_') 400 | result <- dplyr::distinct(result, all, .keep_all = TRUE) 401 | 402 | 403 | result_record <- list(result=result, 404 | used_time = paste0(round(used.time[3]/60,3), ' min'), 405 | used_memory = round(used.memory/1024/1024/1024,3)) 406 | return(result_record) 407 | } 408 | 409 | ################ 410 | ## Connectome ## 411 | ################ 412 | Connectome_function <- function(ser, species = 'human'){ 413 | suppressMessages(library(Seurat)) 414 | suppressMessages(library(Connectome)) 415 | suppressMessages(library(lobstr)) 416 | set.seed(123) 417 | 418 | start.time <- proc.time() 419 | 420 | if(species == 'human'){ 421 | connectome.genes <- union(Connectome::ncomms8866_human$Ligand.ApprovedSymbol, 422 | Connectome::ncomms8866_human$Receptor.ApprovedSymbol) 423 | }else{ 424 | connectome.genes <- union(Connectome::ncomms8866_mouse$Ligand.ApprovedSymbol, 425 | Connectome::ncomms8866_mouse$Receptor.ApprovedSymbol) 426 | species <- 'mouse' 427 | } 428 | 429 | genes <- connectome.genes[connectome.genes %in% rownames(ser)] 430 | ser <- ScaleData(ser,features = genes) 431 | sc.con <- CreateConnectome(ser,species = species, calculate.DOR = TRUE) 432 | result <- FilterConnectome(sc.con, max.p = 0.05, min.pct = 0.05, remove.na = T) 433 | 434 | used.time <- proc.time()-start.time 435 | used.memory <- mem_used() 436 | 437 | result <- result[which(result$source != result$target),] 438 | result <- result[,c(1:4, 17)] 439 | colnames(result) <- c('Sender', 'Receiver', 'Ligand', 'Receptor', 'LRscore') 440 | result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_') 441 | result <- dplyr::distinct(result, all, .keep_all = TRUE) 442 | 443 | 444 | result_record <- list(result=result, 445 | used_time = paste0(round(used.time[3]/60,3), ' min'), 446 | used_memory = round(used.memory/1024/1024/1024,3)) 447 | return(result_record) 448 | } 449 | 450 | ################ 451 | ## CellTalker ## min_expression was set to 0 452 | ################ Only for human (LR prior databases) 453 | CellTalker_function <- function(ser){ 454 | suppressMessages(library(Seurat)) 455 | suppressMessages(library(celltalker)) 456 | suppressMessages(library(dplyr)) 457 | suppressMessages(library(lobstr)) 458 | suppressMessages(library(tidyr)) 459 | source('./Step0_SharedInfo/CellTalker/celltalk_code.R') 460 | set.seed(123) 461 | 462 | start.time <- proc.time() 463 | 464 | result <- celltalk(input_object=ser, 465 | metadata_grouping="celltype", 466 | ligand_receptor_pairs=ramilowski_pairs, 467 | number_cells_required=1, 468 | min_expression=0, 469 | max_expression=20000, 470 | scramble_times=10) 471 | result <- result %>% 472 | mutate(fdr=p.adjust(p_val,method="fdr")) %>% 473 | filter(fdr < 0.05) %>% 474 | filter(p_val < 0.05) %>% 475 | filter(interact_ratio > 0) 476 | 477 | used.time <- proc.time()-start.time 478 | used.memory <- mem_used() 479 | 480 | result <- tidyr::separate(result, 'interaction', c('Ligand', 'Receptor'), sep = '_') 481 | result <- tidyr::separate(result, 'interaction_pairs', c('Sender', 'Receiver'), sep = '_') 482 | result <- result[,c(1:5)]; colnames(result)[5] <- 'LRscore' 483 | result <- result[which(result$Sender != result$Receiver),] 484 | result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_') 485 | result <- dplyr::distinct(result, all, .keep_all = TRUE) 486 | 487 | result_record <- list(result=result, 488 | used_time = paste0(round(used.time[3]/60,3), ' min'), 489 | used_memory = round(used.memory/1024/1024/1024,3)) 490 | return(result_record) 491 | } 492 | 493 | ############## 494 | ## ICELLNET ## filter.perc was set to 5 495 | ############## Only for human (LR prior databases) 496 | ICELLNET_function <- function(ser){ 497 | suppressMessages(library(icellnet)) 498 | suppressMessages(library(Seurat)) 499 | suppressMessages(library(BiocGenerics)) 500 | suppressMessages(library("org.Hs.eg.db")) 501 | suppressMessages(library("hgu133plus2.db")) 502 | suppressMessages(library(jetset)) 503 | suppressMessages(library(dplyr)) 504 | suppressMessages(library(gridExtra)) 505 | suppressMessages(library(lobstr)) 506 | set.seed(123) 507 | 508 | start.time <- proc.time() 509 | 510 | db=as.data.frame(read.csv('./Step0_SharedInfo/ICELLNET/ICELLNETdb.tsv', sep="\t", 511 | header = T, check.names=FALSE, stringsAsFactors = FALSE, na.strings = "")) 512 | 513 | ## Retrieve gene expression matrix 514 | # Taking into account the total nb of cells in each cluster 515 | # filter out the percent of genes less than 5% 516 | filter.perc=5 517 | average.clean= sc.data.cleaning(object = ser, db = db, filter.perc = filter.perc, save_file = F) 518 | ## Apply icellnet pipeline on cluster of interest 519 | data.icell=as.data.frame(gene.scaling(as.data.frame(average.clean), n=1, db=db)) 520 | 521 | PC.data=as.data.frame(data.icell[, colnames(data.icell)], row.names = rownames(data.icell)) 522 | PC.target=data.frame(Class = colnames(PC.data)[-dim(data.icell)[2]], 523 | ID = colnames(PC.data)[-dim(data.icell)[2]], 524 | Cell_type = colnames(PC.data)[-dim(data.icell)[2]]) 525 | rownames(PC.target) = colnames(PC.data)[-dim(data.icell)[2]] 526 | 527 | PC.ct <- colnames(PC.data)[-dim(data.icell)[2]] 528 | CC.ct <- colnames(PC.data)[-dim(data.icell)[2]] 529 | 530 | result.all <- lapply(CC.ct, function(ct){ 531 | ## Compute intercellular communication scores 532 | score.computation = icellnet.score(direction = "out", PC.data = PC.data, 533 | CC.data = as.data.frame(data.icell[,ct], row.names = rownames(data.icell)), 534 | PC.target = PC.target, PC = PC.ct[which(PC.ct!=ct)], CC.type = "RNAseq", 535 | PC.type = "RNAseq", db = db) 536 | lr <- as.matrix(score.computation[[2]][apply(score.computation[[2]], 1, function(y) any(!is.na(y))),]) 537 | lr <- as.matrix(lr[which(rowSums(lr) > 0),]) 538 | lr 539 | }) 540 | 541 | names(result.all) <- colnames(PC.data)[-dim(data.icell)[2]] 542 | 543 | used.time <- proc.time()-start.time 544 | used.memory <- mem_used() 545 | 546 | result <- lapply(names(result.all), function(ct){ 547 | lr <- result.all[[ct]] 548 | lr <- as.data.frame(lr) 549 | lr <- tibble::rownames_to_column(lr, "LR") 550 | colnames(lr) <- c("LR", paste(ct, colnames(lr)[2:dim(lr)[2]], sep = "_")) 551 | result.lr <- lr %>% tidyr::pivot_longer(cols = -LR, names_to = "sr", values_to = "LRscore") 552 | result.lr <- tidyr::separate(data = result.lr, col = sr, into = c("Sender", "Receiver"), sep = "_") 553 | result.lr <- tidyr::separate(data = result.lr, col = LR, into = c("Ligand", "Receptor"), sep = " / ") 554 | result.lr <- result.lr[which(result.lr$LRscore>0), ] 555 | result.lr <- result.lr[which(result.lr$Sender != result.lr$Receiver),] 556 | result.lr$Ligand <- gsub(' \\+ ', '&', result.lr$Ligand) 557 | result.lr$Receptor <- gsub(' \\+ ', '&', result.lr$Receptor) 558 | result.lr$all <- paste(result.lr$Sender, result.lr$Ligand, result.lr$Receiver, result.lr$Receptor, sep = '_') 559 | result.lr <- distinct(result.lr, all, .keep_all = TRUE) 560 | }) 561 | result <- do.call(rbind, result) 562 | 563 | result_record <- list(result=result, 564 | used_time = paste0(round(used.time[3]/60,3), ' min'), 565 | used_memory = round(used.memory/1024/1024/1024,3)) 566 | result_record 567 | } 568 | 569 | ############## 570 | ## NicheNet ## top 20 ligands, top 250 targets of ligand 571 | ############## Only for human (LR prior databases) 572 | NicheNet_function <- function(ser, sender = NULL, receiver = NULL, lr = TRUE){ 573 | suppressMessages(library(nichenetr)) 574 | suppressMessages(library(Seurat)) 575 | suppressMessages(library(tidyverse)) 576 | suppressMessages(library(lobstr)) 577 | set.seed(123) 578 | 579 | start.time <- proc.time() 580 | 581 | # load data 582 | if(T){ 583 | ligand_target_matrix = readRDS("./Step0_SharedInfo/NicheNet/ligand_target_matrix.rds") 584 | 585 | lr_network = readRDS("./Step0_SharedInfo/NicheNet/lr_network.rds") 586 | 587 | weighted_networks = readRDS("./Step0_SharedInfo/NicheNet/weighted_networks.rds") 588 | weighted_networks_lr = weighted_networks$lr_sig %>% inner_join(lr_network %>% distinct(from,to), by = c("from","to")) 589 | } 590 | 591 | # find DEGs in all celltypes 592 | DE_table_receiver_all = FindAllMarkers(object = ser, min.pct = 0.05, 593 | logfc.threshold = 0.15, test.use = "t", 594 | return.thresh = 0.05) 595 | if(is.null(sender) & is.null(receiver)){ 596 | sender_ct <- unique(ser$celltype) 597 | receiver_ct <- DE_table_receiver_all[which(DE_table_receiver_all$p_val_adj<=0.05), ]$cluster %>% 598 | unique(.) %>% as.character(.) 599 | }else{ 600 | sender_ct <- sender 601 | receiver_ct <- receiver 602 | } 603 | 604 | result <- lapply(sender_ct, function(sender){ 605 | tmp.result <- list() 606 | for (receiver in receiver_ct[which(receiver_ct != sender)]) { 607 | # define the sender and receiver celltypes 608 | expressed_genes_receiver = get_expressed_genes(receiver, ser, pct = 0.05) 609 | background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)] 610 | 611 | expressed_genes_sender = get_expressed_genes(sender, ser, pct = 0.05) 612 | 613 | # find DEGs in receiver cells 614 | DE_table_receiver <- DE_table_receiver_all[which(DE_table_receiver_all$cluster == receiver), ] 615 | geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05) %>% pull(gene) 616 | geneset_oi = geneset_oi %>% .[. %in% rownames(ligand_target_matrix)] 617 | 618 | # define the potential ligands 619 | ligands = lr_network %>% pull(from) %>% unique() 620 | receptors = lr_network %>% pull(to) %>% unique() 621 | 622 | expressed_ligands = intersect(ligands,expressed_genes_sender) 623 | expressed_receptors = intersect(receptors,expressed_genes_receiver) 624 | 625 | potential_ligands = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) %>% pull(from) %>% unique() 626 | 627 | # Perform NicheNet ligand activity analysis 628 | ligand_activities = predict_ligand_activities(geneset = geneset_oi, 629 | background_expressed_genes = background_expressed_genes, 630 | ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) 631 | ligand_activities = ligand_activities %>% arrange(-pearson) %>% mutate(rank = rank(desc(pearson))) 632 | 633 | best_upstream_ligands = ligand_activities %>% top_n(20, pearson) %>% 634 | arrange(-pearson) %>% pull(test_ligand) %>% unique() 635 | 636 | # Infer receptors and top-predicted target genes of top-ranked ligands 637 | ## Active target gene inference 638 | active_ligand_target_links_df = best_upstream_ligands %>% 639 | lapply(get_weighted_ligand_target_links,geneset = geneset_oi, 640 | ligand_target_matrix = ligand_target_matrix, n = 200) %>% 641 | bind_rows() %>% drop_na() 642 | colnames(active_ligand_target_links_df) <- c("Ligand", 'Target', 'Score') 643 | active_ligand_target_links_df$Sender <- sender 644 | active_ligand_target_links_df$Receiver <- receiver 645 | 646 | ## Receptors of top-ranked ligands 647 | lr_network_top = lr_network %>% 648 | filter(from %in% best_upstream_ligands & to %in% expressed_receptors) %>% 649 | distinct(from,to) 650 | 651 | best_upstream_receptors = lr_network_top %>% pull(to) %>% unique() 652 | 653 | lr_network_top_df_large = weighted_networks_lr %>% filter(from %in% best_upstream_ligands & to %in% best_upstream_receptors) 654 | colnames(lr_network_top_df_large) <- c('Ligand', 'Receptor', 'LRscore') 655 | lr_network_top_df_large$Sender <- sender 656 | lr_network_top_df_large$Receiver <- receiver 657 | 658 | cp <- paste(sender, receiver, sep = "_") 659 | 660 | if(lr){ 661 | tmp.result[[cp]] <- lr_network_top_df_large 662 | }else{ 663 | tmp.result[[cp]] <- active_ligand_target_links_df 664 | } 665 | } 666 | tmp.result <- do.call(rbind, tmp.result) 667 | rownames(tmp.result) <- NULL 668 | tmp.result 669 | }) 670 | 671 | used.time <- proc.time()-start.time 672 | used.memory <- mem_used() 673 | 674 | result <- do.call(rbind, result) 675 | result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_') 676 | result <- dplyr::distinct(result, all, .keep_all = TRUE) 677 | 678 | result_record <- list(result=result, 679 | used_time = paste0(round(used.time[3]/60,3), ' min'), 680 | used_memory = round(used.memory/1024/1024/1024,3)) 681 | return(result_record) 682 | } 683 | 684 | ################## Run in parallel with 10 cores 685 | ## CellPhoneDB2 ## Only for human (LR prior databases) 686 | ################## 687 | CellPhoneDB2_function <- function(ser, fpath){ 688 | suppressMessages(library(tidyr)) 689 | suppressMessages(library(Seurat)) 690 | suppressMessages(library(lobstr)) 691 | set.seed(123) 692 | 693 | # input 694 | if(T){ 695 | input_fpath <- paste0(fpath, '/input/') 696 | if(!dir.exists(input_fpath)){ 697 | dir.create(input_fpath) 698 | } 699 | 700 | fpath.mat <- paste0(input_fpath, "counts.txt") 701 | fpath.meta <- paste0(input_fpath,"metadata.txt") 702 | 703 | norm.matrix <- as.matrix(GetAssayData(ser, "data", "RNA")) 704 | write.table(norm.matrix, fpath.mat, sep='\t', quote=F) 705 | 706 | meta.data <- data.frame(rownames(ser@meta.data), ser@meta.data$celltype) 707 | colnames(meta.data) <- NULL 708 | meta.data <- as.matrix(meta.data) 709 | write.table(meta.data, fpath.meta, sep='\t', quote=F, row.names=F) 710 | rm(norm.matrix, meta.data, ser);gc() 711 | 712 | output_fpath <- paste0(fpath, '/output/') 713 | if(!dir.exists(output_fpath)){ 714 | dir.create(output_fpath) 715 | } 716 | } 717 | 718 | start.time <- proc.time() 719 | 720 | command <- paste('sh ../Script/Step0_LRToolsFunction/CellPhoneDB2_shell.sh', 721 | fpath.meta, fpath.mat, output_fpath, sep = ' ') 722 | system(command) 723 | 724 | used.time <- proc.time()-start.time 725 | used.memory <- mem_used() 726 | 727 | file.path <- paste(output_fpath, 'significant_means.txt', sep = '/') 728 | result <- read.table(file.path, header = TRUE, sep = "\t") 729 | result <- result[, c(2, 13:dim(result)[2])] 730 | 731 | result <- result %>% pivot_longer(cols = -interacting_pair, names_to = "sr", values_to = "LRscore") 732 | result <- dplyr::filter(result, !is.na(LRscore)) 733 | result <- tidyr::separate(data = result, col = sr, into = c("Sender", "Receiver"), sep = "\\.") 734 | 735 | # handle the complex information 736 | if(T){ 737 | cpdb.complex <-read.csv("./Step0_SharedInfo/CellPhoneDB/complexes.csv") 738 | rec.complex <- cpdb.complex$complex_name[which(cpdb.complex$receptor == TRUE)] 739 | lig.complex <- cpdb.complex$complex_name[which(cpdb.complex$receptor != TRUE)] 740 | cpdb.complex <- cpdb.complex[,1:5] 741 | cpdb.gene <- read.csv("./Step0_SharedInfo/CellPhoneDB/genes.csv") 742 | cpdb.gene <- dplyr::distinct(cpdb.gene, gene_name, uniprot, hgnc_symbol, .keep_all = TRUE) 743 | cpdb.gene <- cpdb.gene[,1:2] 744 | 745 | tmp.cpdb.complex <- merge(cpdb.gene, cpdb.complex, by.y = "uniprot_1", by.x = "uniprot") 746 | tmp.cpdb.complex <- tmp.cpdb.complex[,-1] 747 | colnames(tmp.cpdb.complex)[1] <- "gene_1" 748 | tmp.cpdb.complex <- merge(cpdb.gene, tmp.cpdb.complex, by.y = "uniprot_2", by.x = "uniprot") 749 | tmp.cpdb.complex <- tmp.cpdb.complex[,-1] 750 | colnames(tmp.cpdb.complex)[1] <- "gene_2" 751 | tmp.cpdb.complex <- tmp.cpdb.complex[,-5] 752 | tmp.cpdb.complex <- merge(cpdb.gene, tmp.cpdb.complex, by.y = "uniprot_3", by.x = "uniprot", all.y = TRUE) 753 | tmp.cpdb.complex <- tmp.cpdb.complex[-117,] 754 | tmp.cpdb.complex <- tmp.cpdb.complex[,-1] 755 | colnames(tmp.cpdb.complex)[1] <- "gene_3" 756 | cpdb.complex <- tmp.cpdb.complex[,4:1] 757 | rm(cpdb.gene, tmp.cpdb.complex) 758 | 759 | cpdb.complex$gene <- paste(cpdb.complex$gene_1, cpdb.complex$gene_2, cpdb.complex$gene_3, sep = "&") 760 | cpdb.complex$gene <- gsub("&NA", "", cpdb.complex$gene) 761 | cpdb.complex <- cpdb.complex[,c("complex_name", "gene")] 762 | } 763 | 764 | # combine the complex information and result of cellphonedb 765 | if(T){ 766 | complexes <- cpdb.complex$complex_name[which(stringr::str_count(cpdb.complex$complex_name, pattern = '_')==1)] 767 | for (complex in complexes) { 768 | change.pair <- which(grepl(complex, result$interacting_pair)) 769 | if(length(change.pair)>0){ 770 | change.complex <- gsub('_', '*', complex) 771 | result$interacting_pair <- gsub(complex, change.complex, result$interacting_pair) 772 | } 773 | } 774 | 775 | result <- tidyr::separate(data = result, col = interacting_pair, into = c("Ligand", "Receptor"), sep = "_") 776 | result$Ligand <- gsub('\\*', '_', result$Ligand) 777 | result$Receptor <- gsub('\\*', '_', result$Receptor) 778 | result <- merge(result, cpdb.complex, by.x = "Ligand", by.y = "complex_name", all.x = TRUE) 779 | result$Ligand[!is.na(result$gene)] <- result$gene[!is.na(result$gene)] 780 | result <- result[,-6] 781 | result <- merge(result, cpdb.complex, by.x = "Receptor", by.y = "complex_name", all.x = TRUE) 782 | result$Receptor[!is.na(result$gene)] <- result$gene[!is.na(result$gene)] 783 | result <- result[,-6] 784 | 785 | result.rec <- which(result$Receptor %in% rec.complex) 786 | result.lig <- which(result$Ligand %in% lig.complex) 787 | 788 | if(length(result.lig)>0){ 789 | result <- result[-result.lig,] 790 | print(paste0('Delate:', length(result.lig))) 791 | }else if(length(result.rec)>0){ 792 | result <- result[-result.rec,] 793 | print(paste0('Delate:', length(result.rec))) 794 | } 795 | 796 | result <- result[which(result$Sender!=result$Receiver),] 797 | result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_') 798 | result <- dplyr::distinct(result, all, .keep_all = TRUE) 799 | } 800 | 801 | result_record <- list(result=result, 802 | used_time = paste0(round(used.time[3]/60,3), ' min'), 803 | used_memory = round(used.memory/1024/1024/1024,3)) 804 | return(result_record) 805 | } 806 | 807 | ################## 808 | ## CellPhoneDB3 ## Only for human (LR prior databases) 809 | ################## Run in parallel with 10 cores 810 | CellPhoneDB3_function <- function(ser, fpath){ 811 | suppressMessages(library(tidyr)) 812 | suppressMessages(library(Seurat)) 813 | suppressMessages(library(lobstr)) 814 | set.seed(123) 815 | 816 | # input 817 | if(T){ 818 | input_fpath <- paste0(fpath, '/input/') 819 | if(!dir.exists(input_fpath)){ 820 | dir.create(input_fpath) 821 | } 822 | 823 | fpath.mat <- paste0(input_fpath, "counts.txt") 824 | fpath.meta <- paste0(input_fpath,"metadata.txt") 825 | fpath.deg <- paste0(input_fpath, 'degs.txt') 826 | 827 | norm.matrix <- as.matrix(GetAssayData(ser, "data", "RNA")) 828 | write.table(norm.matrix, fpath.mat, sep='\t', quote=F) 829 | 830 | meta.data <- data.frame(rownames(ser@meta.data), ser@meta.data$celltype) 831 | colnames(meta.data) <- NULL 832 | meta.data <- as.matrix(meta.data) 833 | write.table(meta.data, fpath.meta, sep='\t', quote=F, row.names=F) 834 | 835 | Idents(ser) <- ser$celltype 836 | DEGs <- FindAllMarkers(ser, test.use = 't', 837 | verbose = F, only.pos = T, 838 | random.seed = 123, logfc.threshold = 0.15, 839 | min.pct = 0.05, return.thresh = 0.05) 840 | fDEGs = subset(DEGs, p_val_adj < 0.05 & avg_log2FC > 0.15) 841 | fDEGs = fDEGs[, c('cluster', 'gene', 'p_val_adj', 'p_val', 'avg_log2FC', 'pct.1', 'pct.2')] 842 | write.table(fDEGs, file = fpath.deg, sep = '\t', quote = F, row.names = F) 843 | 844 | rm(norm.matrix, meta.data, DEGs, fDEGs, ser);gc() 845 | 846 | output_fpath <- paste0(fpath, '/output/') 847 | if(!dir.exists(output_fpath)){ 848 | dir.create(output_fpath) 849 | } 850 | } 851 | 852 | start.time <- proc.time() 853 | 854 | command <- paste('sh ../Script/Step0_LRToolsFunction/CellPhoneDB3_shell.sh', fpath.meta, fpath.mat, fpath.deg, output_fpath, sep = ' ') 855 | system(command) 856 | 857 | used.time <- proc.time()-start.time 858 | used.memory <- mem_used() 859 | 860 | file.path <- paste(output_fpath, 'significant_means.txt', sep = '/') 861 | 862 | result <- read.table(file.path, header = TRUE, sep = "\t") 863 | result <- result[, c(2, 13:dim(result)[2])] 864 | 865 | result <- result %>% pivot_longer(cols = -interacting_pair, names_to = "sr", values_to = "LRscore") 866 | result <- dplyr::filter(result, !is.na(LRscore)) 867 | result <- tidyr::separate(data = result, col = sr, into = c("Sender", "Receiver"), sep = "\\.") 868 | 869 | # handle the complex information 870 | if(T){ 871 | cpdb.complex <-read.csv("./Step0_SharedInfo/CellPhoneDB/complexes.csv") 872 | rec.complex <- cpdb.complex$complex_name[which(cpdb.complex$receptor == TRUE)] 873 | lig.complex <- cpdb.complex$complex_name[which(cpdb.complex$receptor != TRUE)] 874 | cpdb.complex <- cpdb.complex[,1:5] 875 | cpdb.gene <- read.csv("./Step0_SharedInfo/CellPhoneDB/genes.csv") 876 | cpdb.gene <- dplyr::distinct(cpdb.gene, gene_name, uniprot, hgnc_symbol, .keep_all = TRUE) 877 | cpdb.gene <- cpdb.gene[,1:2] 878 | 879 | tmp.cpdb.complex <- merge(cpdb.gene, cpdb.complex, by.y = "uniprot_1", by.x = "uniprot") 880 | tmp.cpdb.complex <- tmp.cpdb.complex[,-1] 881 | colnames(tmp.cpdb.complex)[1] <- "gene_1" 882 | tmp.cpdb.complex <- merge(cpdb.gene, tmp.cpdb.complex, by.y = "uniprot_2", by.x = "uniprot") 883 | tmp.cpdb.complex <- tmp.cpdb.complex[,-1] 884 | colnames(tmp.cpdb.complex)[1] <- "gene_2" 885 | tmp.cpdb.complex <- tmp.cpdb.complex[,-5] 886 | tmp.cpdb.complex <- merge(cpdb.gene, tmp.cpdb.complex, by.y = "uniprot_3", by.x = "uniprot", all.y = TRUE) 887 | tmp.cpdb.complex <- tmp.cpdb.complex[-117,] 888 | tmp.cpdb.complex <- tmp.cpdb.complex[,-1] 889 | colnames(tmp.cpdb.complex)[1] <- "gene_3" 890 | cpdb.complex <- tmp.cpdb.complex[,4:1] 891 | rm(cpdb.gene, tmp.cpdb.complex) 892 | 893 | cpdb.complex$gene <- paste(cpdb.complex$gene_1, cpdb.complex$gene_2, cpdb.complex$gene_3, sep = "&") 894 | cpdb.complex$gene <- gsub("&NA", "", cpdb.complex$gene) 895 | cpdb.complex <- cpdb.complex[,c("complex_name", "gene")] 896 | } 897 | 898 | # combine the complex information and result of cellphonedb 899 | if(T){ 900 | complexes <- cpdb.complex$complex_name[which(stringr::str_count(cpdb.complex$complex_name, pattern = '_')==1)] 901 | for (complex in complexes) { 902 | change.pair <- which(grepl(complex, result$interacting_pair)) 903 | if(length(change.pair)>0){ 904 | change.complex <- gsub('_', '*', complex) 905 | result$interacting_pair <- gsub(complex, change.complex, result$interacting_pair) 906 | } 907 | } 908 | 909 | result <- tidyr::separate(data = result, col = interacting_pair, into = c("Ligand", "Receptor"), sep = "_") 910 | result$Ligand <- gsub('\\*', '_', result$Ligand) 911 | result$Receptor <- gsub('\\*', '_', result$Receptor) 912 | result <- merge(result, cpdb.complex, by.x = "Ligand", by.y = "complex_name", all.x = TRUE) 913 | result$Ligand[!is.na(result$gene)] <- result$gene[!is.na(result$gene)] 914 | result <- result[,-6] 915 | result <- merge(result, cpdb.complex, by.x = "Receptor", by.y = "complex_name", all.x = TRUE) 916 | result$Receptor[!is.na(result$gene)] <- result$gene[!is.na(result$gene)] 917 | result <- result[,-6] 918 | 919 | result.rec <- which(result$Receptor %in% rec.complex) 920 | result.lig <- which(result$Ligand %in% lig.complex) 921 | 922 | if(length(result.lig)>0){ 923 | result <- result[-result.lig,] 924 | print(paste0('Delate:', length(result.lig))) 925 | }else if(length(result.rec)>0){ 926 | result <- result[-result.rec,] 927 | print(paste0('Delate:', length(result.rec))) 928 | } 929 | 930 | result <- result[which(result$Sender!=result$Receiver),] 931 | result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_') 932 | result <- dplyr::distinct(result, all, .keep_all = TRUE) 933 | } 934 | 935 | result_record <- list(result=result, 936 | used_time = paste0(round(used.time[3]/60,3), ' min'), 937 | used_memory = round(used.memory/1024/1024/1024,3)) 938 | return(result_record) 939 | } 940 | 941 | ########### 942 | ## NATMI ## Run in parallel with 10 cores 943 | ########### 944 | NATMI_function <- function(ser, fpath, species = 'human'){ 945 | suppressMessages(library(Seurat)) 946 | suppressMessages(library(lobstr)) 947 | set.seed(123) 948 | 949 | input_fpath <- paste0(fpath, '/input/') 950 | if(!dir.exists(input_fpath)){ 951 | dir.create(input_fpath) 952 | } 953 | 954 | output_fpath <- paste0(fpath, '/output/') 955 | if(!dir.exists(output_fpath)){ 956 | dir.create(output_fpath) 957 | } 958 | 959 | fpath.mat <- paste0(input_fpath, "counts.csv") 960 | write.csv(100 * (exp(as.matrix(GetAssayData(object = ser, assay = "RNA", slot = "data"))) - 1), 961 | fpath.mat, row.names = T) 962 | meta <- data.frame(Cell = colnames(ser), Annotation = ser$celltype) 963 | fpath.meta <- paste0(input_fpath, "metadata.csv") 964 | write.csv(meta,fpath.meta, row.names = FALSE) 965 | rm(ser);gc() 966 | 967 | start.time <- proc.time() 968 | 969 | if(species != 'human'){ 970 | species = 'mouse' 971 | } 972 | 973 | command <- paste('sh ../Script/Step0_LRToolsFunction/NATMI_shell.sh', 974 | species, fpath.mat, fpath.meta, output_fpath, sep = ' ') 975 | system(command) 976 | 977 | used.time <- proc.time()-start.time 978 | used.memory <- mem_used() 979 | 980 | result_path <- paste0(output_fpath, 'Edges_lrc2p.csv') 981 | result <- read.csv(result_path) 982 | result <- result[,c(1:4, 6:20)] 983 | result <- dplyr::filter(result, Ligand.detection.rate > 0.05, Receptor.detection.rate > 0.05) 984 | result <- result[,c(1:4, 16)] 985 | colnames(result) <- c("Sender", "Ligand", "Receptor", "Receiver", "LRscore") 986 | result <- result[which(result$Sender!=result$Receiver),] 987 | result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_') 988 | result <- dplyr::distinct(result, all, .keep_all = TRUE) 989 | 990 | result_record <- list(result=result, 991 | used_time = paste0(round(used.time[3]/60,3), ' min'), 992 | used_memory = round(used.memory/1024/1024/1024,3)) 993 | return(result_record) 994 | } 995 | 996 | ############### 997 | ## scConnect ## 998 | ############### 999 | scConnect_function <- function(ser, fpath, species = 'human'){ 1000 | suppressMessages(library(Seurat)) 1001 | suppressMessages(library(lobstr)) 1002 | set.seed(123) 1003 | 1004 | input_fpath <- paste0(fpath, '/input/') 1005 | if(!dir.exists(input_fpath)){ 1006 | dir.create(input_fpath) 1007 | } 1008 | 1009 | fpath.mat <- paste0(input_fpath, "counts.csv") 1010 | norm.matrix <- as.matrix(GetAssayData(ser, "data", "RNA")) 1011 | write.csv(norm.matrix, fpath.mat, quote=F) 1012 | 1013 | fpath.meta <- paste0(input_fpath, "metadata.csv") 1014 | cell.meta <- data.frame(Cell = rownames(ser@meta.data), Annotation = ser$celltype) 1015 | write.csv(cell.meta, fpath.meta, quote=F, row.names = FALSE) 1016 | rm(norm.matrix, cell.meta, ser);gc() 1017 | 1018 | start.time <- proc.time() 1019 | 1020 | if(species == 'human'){ 1021 | species = 'hsapiens' 1022 | }else{ 1023 | species = 'mmusculus' 1024 | } 1025 | 1026 | output_fpath <- paste0(fpath, '/output/') 1027 | if(!dir.exists(output_fpath)){ 1028 | dir.create(output_fpath) 1029 | } 1030 | output_fpath <- paste0(output_fpath, 'result.csv') 1031 | 1032 | command <- paste('sh ../Script/Step0_LRToolsFunction/scConnect_shell.sh', 1033 | fpath.mat, fpath.meta, species, output_fpath, sep = ' ') 1034 | system(command) 1035 | 1036 | used.time <- proc.time()-start.time 1037 | used.memory <- mem_used() 1038 | 1039 | # handle result 1040 | if(T){ 1041 | result <- read.csv(output_fpath) 1042 | result <- result[,-1] 1043 | 1044 | ligands <- read.csv("./Step0_SharedInfo/scConnect/ligands.csv", header = T) 1045 | ligands <- ligands[,-c(1, 3, 9)] 1046 | result <- merge(ligands, result,by = "ligand") 1047 | colnames(result)[2] <- "ligand_gene" 1048 | result <- result[,-c(3:6)] 1049 | 1050 | 1051 | receptors <- read.csv('./Step0_SharedInfo/scConnect/receptors.csv') 1052 | receptors <- dplyr::distinct(receptors, receptor, gene, .keep_all = FALSE) 1053 | result <- merge(receptors, result, by = 'receptor') 1054 | colnames(result)[2] <- 'receptor_gene' 1055 | result <- result[which(result$ligand_pval<0.05), ] 1056 | result <- result[which(result$receptor_pval<0.05),] 1057 | 1058 | result <- result[,c("sender", "reciever", "ligand_gene", "receptor_gene", "score")] 1059 | colnames(result) <- c("Sender", "Receiver", "Ligand", "Receptor", "LRscore") 1060 | result <- result[which(result$Sender!=result$Receiver),] 1061 | result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_') 1062 | result <- dplyr::distinct(result, all, .keep_all = TRUE) 1063 | } 1064 | 1065 | 1066 | result_record <- list(result=result, 1067 | used_time = paste0(round(used.time[3]/60,3), ' min'), 1068 | used_memory = round(used.memory/1024/1024/1024,3)) 1069 | return(result_record) 1070 | } 1071 | 1072 | ############### 1073 | ## cell2cell ## Only for human (LR prior databases); the LR prior databases can be designed by users (complex) 1074 | ############### 1075 | cell2cell_function <- function(ser, fpath, species = 'human'){ 1076 | suppressMessages(library(Seurat)) 1077 | suppressMessages(library(lobstr)) 1078 | set.seed(123) 1079 | 1080 | input_fpath <- paste0(fpath, '/input/') 1081 | if(!dir.exists(input_fpath)){ 1082 | dir.create(input_fpath) 1083 | } 1084 | 1085 | fpath.mat <- paste0(input_fpath, "counts.csv") 1086 | norm.matrix <- as.matrix(GetAssayData(ser, "data", "RNA")) 1087 | write.csv(norm.matrix, fpath.mat, quote=F) 1088 | 1089 | fpath.meta <- paste0(input_fpath, "metadata.csv") 1090 | cell.meta <- data.frame(Cell = rownames(ser@meta.data), Annotation = ser$celltype) 1091 | write.csv(cell.meta, fpath.meta, quote=F, row.names = FALSE) 1092 | rm(norm.matrix, cell.meta, ser);gc() 1093 | 1094 | output_fpath <- paste0(fpath, '/output/') 1095 | if(!dir.exists(output_fpath)){ 1096 | dir.create(output_fpath) 1097 | } 1098 | 1099 | start.time <- proc.time() 1100 | 1101 | if(species == 'human'){ 1102 | fpath.lr <- './Step0_SharedInfo/cell2cell/lr_human.csv' 1103 | }else{ 1104 | fpath.lr <- './Step0_SharedInfo/cell2cell/lr_human.csv' 1105 | } 1106 | 1107 | command <- paste('sh ../Script/Step0_LRToolsFunction/cell2cell_shell.sh', fpath.mat, fpath.meta, 1108 | fpath.lr, output_fpath, sep = ' ') 1109 | system(command) 1110 | used.time <- proc.time()-start.time 1111 | used.memory <- mem_used() 1112 | 1113 | result.pval <- read.csv(paste0(output_fpath, 'ccc_pval.csv')) 1114 | result.pval <- tidyr::pivot_longer(result.pval, -X, names_to = 'sr', values_to = 'pvalue') 1115 | result.pval <- tidyr::separate(result.pval, X, c('Ligand', 'Receptor'), sep = ',') 1116 | result.pval$Ligand <- gsub("\\(\\'", '', result.pval$Ligand) 1117 | result.pval$Ligand <- gsub("\\'", '', result.pval$Ligand) 1118 | result.pval$Receptor <- gsub("\\'\\)", '', result.pval$Receptor) 1119 | result.pval$Receptor <- gsub("\\'", '', result.pval$Receptor) 1120 | result.pval <- tidyr::separate(result.pval, sr, c('Sender', 'Receiver'), sep = '\\.') 1121 | result.pval$all <- paste(result.pval$Sender, result.pval$Ligand, result.pval$Receiver, result.pval$Receptor, sep = '_') 1122 | 1123 | result.value <- read.csv(paste0(output_fpath, 'communication_matrix.csv')) 1124 | result.value <- tidyr::pivot_longer(result.value, -X, names_to = 'sr', values_to = 'value') 1125 | result.value <- tidyr::separate(result.value, X, c('Ligand', 'Receptor'), sep = ',') 1126 | result.value$Ligand <- gsub("\\(\\'", '', result.value$Ligand) 1127 | result.value$Ligand <- gsub("\\'", '', result.value$Ligand) 1128 | result.value$Receptor <- gsub("\\'\\)", '', result.value$Receptor) 1129 | result.value$Receptor <- gsub("\\'", '', result.value$Receptor) 1130 | result.value <- tidyr::separate(result.value, sr, c('Sender', 'Receiver'), sep = '\\.') 1131 | result.value$all <- paste(result.value$Sender, result.value$Ligand, result.value$Receiver, result.value$Receptor, sep = '_') 1132 | result.value <- result.value[,c('all', 'value')] 1133 | 1134 | result <- merge(result.pval, result.value, by = 'all') 1135 | result <- result[which(result$Sender != result$Receiver),] 1136 | result <- dplyr::distinct(result, all, .keep_all = TRUE) 1137 | result <- result[which(result$value>0), ] 1138 | result <- result[which(result$pvalue<0.05),] 1139 | result <- result[,1:5] 1140 | 1141 | result_record <- list(result=result, 1142 | used_time = paste0(round(used.time[3]/60,3), ' min'), 1143 | used_memory = round(used.memory/1024/1024/1024,3)) 1144 | return(result_record) 1145 | } 1146 | 1147 | 1148 | ############ 1149 | ## Domino ## Parameters setting: max_tf_per_clust = 10, max_rec_per_tf = 10 1150 | ############ Run in parallel with 30 cores (pyscenic) 1151 | Domino_function <- function(ser, fpath, species = 'hg38'){ 1152 | suppressMessages(library(Seurat)) 1153 | suppressMessages(library(domino)) 1154 | suppressMessages(library(dplyr)) 1155 | suppressMessages(library(lobstr)) 1156 | set.seed(123) 1157 | 1158 | input_fpath <- paste0(fpath, '/input/') 1159 | if(!dir.exists(input_fpath)){ 1160 | dir.create(input_fpath) 1161 | } 1162 | 1163 | fpath.mat <- paste0(input_fpath, 'counts.csv') 1164 | write.csv(t(as.matrix(ser@assays$RNA@counts)), file = fpath.mat) 1165 | 1166 | start.time <- proc.time() 1167 | 1168 | if(species == 'hg38'){ 1169 | fpath.tfs <- './Step0_SharedInfo/Domino/hg38/hs_hgnc_curated_tfs.txt' 1170 | fpath.feather.1 <- './Step0_SharedInfo/Domino/hg38/hg38__refseq-r80__10kb_up_and_down_tss.mc9nr.genes_vs_motifs.rankings.feather' 1171 | fpath.feather.2 <- './Step0_SharedInfo/Domino/hg38/hg38__refseq-r80__500bp_up_and_100bp_down_tss.mc9nr.genes_vs_motifs.rankings.feather' 1172 | fpath.motif <- './Step0_SharedInfo/Domino/hg38/motifs-v9-nr.hgnc-m0.001-o0.0.tbl' 1173 | }else if(species == 'hg38' & grepl('pbmc', fpath)){ 1174 | fpath.tfs <- './Step0_SharedInfo/Domino/hg38/hs_hgnc_curated_tfs.txt' 1175 | fpath.feather.1 <- './Step0_SharedInfo/Domino/hg38/hg38__refseq-r80__10kb_up_and_down_tss.mc9nr.genes_vs_motifs.rankings.feather' 1176 | fpath.feather.2 <- './Step0_SharedInfo/Domino/hg38/hg38__refseq-r80__500bp_up_and_100bp_down_tss.mc9nr.genes_vs_motifs.rankings.feather' 1177 | fpath.motif <- './Step0_SharedInfo/Domino/hg38/motifs-v9-nr.hgnc-m0.001-o0.0.tbl' 1178 | }else if(species == 'hg19'){ 1179 | fpath.tfs <- './Step0_SharedInfo/Domino/hg38/hs_hgnc_curated_tfs.txt' 1180 | fpath.feather.1 <- './Step0_SharedInfo/Domino/hg19/hg19-500bp-upstream-10species.mc9nr.genes_vs_motifs.rankings.feather' 1181 | fpath.feather.2 <- './Step0_SharedInfo/Domino/hg19/hg19-500bp-upstream-7species.mc9nr.genes_vs_motifs.rankings.feather' 1182 | fpath.feather.3 <- './Step0_SharedInfo/Domino/hg19/hg19-tss-centered-10kb-10species.mc9nr.genes_vs_motifs.rankings.feather' 1183 | fpath.feather.4 <- './Step0_SharedInfo/Domino/hg19/hg19-tss-centered-10kb-7species.mc9nr.genes_vs_motifs.rankings.feather' 1184 | fpath.feather.5 <- './Step0_SharedInfo/Domino/hg19/hg19-tss-centered-5kb-10species.mc9nr.genes_vs_motifs.rankings.feather' 1185 | fpath.feather.6 <- './Step0_SharedInfo/Domino/hg19/hg19-tss-centered-5kb-7species.mc9nr.genes_vs_motifs.rankings.feather' 1186 | fpath.motif <- './Step0_SharedInfo/Domino/hg38/motifs-v9-nr.hgnc-m0.001-o0.0.tbl' 1187 | }else if(species == 'mm9'){ 1188 | fpath.tfs <- './Step0_SharedInfo/Domino/mm9/mm_mgi_tfs.txt' 1189 | fpath.feather.1 <- './Step0_SharedInfo/Domino/mm9/mm9-500bp-upstream-10species.mc9nr.genes_vs_motifs.rankings.feather' 1190 | fpath.feather.2 <- './Step0_SharedInfo/Domino/mm9/mm9-500bp-upstream-7species.mc9nr.genes_vs_motifs.rankings.feather' 1191 | fpath.feather.3 <- './Step0_SharedInfo/Domino/mm9/mm9-tss-centered-10kb-10species.mc9nr.genes_vs_motifs.rankings.feather' 1192 | fpath.feather.4 <- './Step0_SharedInfo/Domino/mm9/mm9-tss-centered-10kb-7species.mc9nr.genes_vs_motifs.rankings.feather' 1193 | fpath.feather.5 <- './Step0_SharedInfo/Domino/mm9/mm9-tss-centered-5kb-10species.mc9nr.genes_vs_motifs.rankings.feather' 1194 | fpath.feather.6 <- './Step0_SharedInfo/Domino/mm9/mm9-tss-centered-5kb-7species.mc9nr.genes_vs_motifs.rankings.feather' 1195 | fpath.motif <- './Step0_SharedInfo/Domino/mm9/motifs-v9-nr.mgi-m0.001-o0.0.tbl' 1196 | }else if(species == 'mm10'){ 1197 | fpath.tfs <- './Step0_SharedInfo/Domino/mm9/mm_mgi_tfs.txt' 1198 | fpath.feather.1 <- './Step0_SharedInfo/Domino/mm10/mm10_refseq-r80_10kb_up_and_down_tss.mc9nr.genes_vs_motifs.rankings.feather' 1199 | fpath.feather.2 <- './Step0_SharedInfo/Domino/mm10/mm10_refseq-r80_500bp_up_and_100bp_down_tss.mc9nr.genes_vs_motifs.rankings.feather' 1200 | fpath.motif <- './Step0_SharedInfo/Domino/mm9/motifs-v9-nr.mgi-m0.001-o0.0.tbl' 1201 | } 1202 | 1203 | fpath.adj <- paste0(input_fpath, 'adj.tsv') 1204 | fpath.reg <- paste0(input_fpath, 'reg.csv') 1205 | fpath.auc <- paste0(input_fpath, 'auc_mtx.csv') 1206 | 1207 | if(species == 'hg19' | species == 'mm9'){ 1208 | command <- paste('sh ../Script/Step0_LRToolsFunction/Domino_shell1.sh', 1209 | fpath.adj, fpath.mat, fpath.reg, fpath.auc, 1210 | fpath.tfs, fpath.feather.1, fpath.feather.2, fpath.feather.3, 1211 | fpath.feather.4, fpath.feather.5, fpath.feather.6, 1212 | fpath.motif, sep = ' ') 1213 | }else if(species == 'hg38' | species == 'mm10'){ 1214 | command <- paste('sh ../Script/Step0_LRToolsFunction/Domino_shell2.sh', 1215 | fpath.adj, fpath.mat, fpath.reg, fpath.auc, 1216 | fpath.tfs, fpath.feather.1, fpath.feather.2, 1217 | fpath.motif, sep = ' ') 1218 | } 1219 | 1220 | system(command) 1221 | 1222 | # run domino 1223 | if(T){ 1224 | auc = t(read.table(fpath.auc, header = TRUE, row.names = 1, stringsAsFactors = FALSE, sep = ',')) 1225 | 1226 | ser <- ser[, colnames(auc)] 1227 | ser <- ScaleData(ser, features = rownames(ser)) 1228 | 1229 | source("./Step0_SharedInfo/Domino/create_domino.R") 1230 | # line 200 of rscript of creat_domino function: change tf_genes = df[row, 10] to tf_genes = df[row, 9] 1231 | dom = create_domino(signaling_db = './Step0_SharedInfo/CellPhoneDB', 1232 | features = auc, counts = ser@assays$RNA@counts, z_scores = ser@assays$RNA@scale.data, 1233 | clusters = ser@active.ident, df = fpath.reg) 1234 | dom = build_domino(dom, min_tf_pval = 0.05, max_tf_per_clust = 10, max_rec_per_tf = 10) 1235 | } 1236 | 1237 | used.time <- proc.time()-start.time 1238 | used.memory <- mem_used() 1239 | 1240 | # bulid ligand-receptor dataframe 1241 | if(T){ 1242 | lig_to_rec <- data.frame() 1243 | for (rec in names(dom@linkages$rec_lig)) { 1244 | lig <- dom@linkages$rec_lig[[rec]] 1245 | 1246 | if(length(lig)>0){ 1247 | lig.rec <- data.frame(ligand = lig, receptor = rep(rec, length(lig))) 1248 | }else{ 1249 | lig.rec <- data.frame(ligand = lig, receptor = rec) 1250 | } 1251 | 1252 | lig_to_rec <- rbind(lig_to_rec, lig.rec) 1253 | } 1254 | lig_to_rec <- lig_to_rec[which(lig_to_rec$ligand!=""),] 1255 | } 1256 | 1257 | # handle result 1258 | if(T){ 1259 | result <- lapply(levels(dom@clusters), function(reciever){ 1260 | print(reciever) 1261 | # get ligands of sender cells which are communicated with reciever cells 1262 | sender.ligands <- dom@cl_signaling_matrices[[reciever]] %>% 1263 | as.data.frame(.) %>% tibble::rownames_to_column(var = "ligand") %>% 1264 | tidyr::pivot_longer(cols = -ligand, names_to = "sender", values_to = "expression") %>% 1265 | .[which(.$expression>0), ] 1266 | sender.ligands$sender <- stringr::str_replace_all(sender.ligands$sender, "L_", "") 1267 | sender.ligands <- sender.ligands[, -3] 1268 | 1269 | if(dim(sender.ligands)[1] != 0){ 1270 | # get tfs of reciever cells 1271 | reciever.tfs <- dom@linkages$clust_tf[[reciever]] 1272 | rec_tf <- data.frame() 1273 | for(tf in reciever.tfs){ 1274 | rec <- dom@linkages$tf_rec[[tf]] 1275 | 1276 | if(length(rec)>0){ 1277 | rec.tf <- data.frame(receptor = rec, tf = rep(tf, length(rec))) 1278 | }else{ 1279 | rec.tf <- data.frame() 1280 | } 1281 | rec_tf <- rbind(rec_tf, rec.tf) 1282 | } 1283 | 1284 | lig_rec_tf <- merge(lig_to_rec, rec_tf, by = "receptor") 1285 | lig_rec_tf <- merge(sender.ligands, lig_rec_tf, by = "ligand") 1286 | lig_rec_tf$reciever <- reciever 1287 | lig_rec_tf$tf <- stringr::str_replace_all(lig_rec_tf$tf, "\\...", "") 1288 | }else{ 1289 | lig_rec_tf <- NA 1290 | } 1291 | lig_rec_tf 1292 | }) 1293 | 1294 | result[which(is.na(result))] <- NULL 1295 | 1296 | result <- do.call(rbind, result) 1297 | result <- result[which(result$sender != result$reciever),] 1298 | result <- result[,-4] 1299 | colnames(result) <- c('Ligand', 'Sender', 'Receptor', 'Receiver') 1300 | result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_') 1301 | result <- dplyr::distinct(result, all, .keep_all = TRUE) 1302 | } 1303 | 1304 | result_record <- list(result=result, 1305 | used_time = paste0(round(used.time[3]/60,3), ' min'), 1306 | used_memory = round(used.memory/1024/1024/1024,3)) 1307 | return(result_record) 1308 | } 1309 | 1310 | ############# 1311 | ## scMLnet ## 1312 | ############# 1313 | scMLnet_function <- function(ser, sender = NULL, receiver = NULL){ 1314 | pacman::p_unload(Seurat) 1315 | suppressMessages(library(Seurat, lib.loc = '/home/ljx/R/x86_64-redhat-linux-gnu-library/library2/')) 1316 | suppressMessages(library(scMLnet)) 1317 | suppressMessages(library(lobstr)) 1318 | set.seed(123) 1319 | 1320 | start.time <- proc.time() 1321 | 1322 | # scMLnet v0.2.0 1323 | # pacakge: https://github.com/SunXQlab/scMLnet2.0 1324 | GCMat<- GetAssayData(ser, "counts", "RNA") 1325 | BarCluTable <- data.frame(Barcode = colnames(ser), Cluster = ser$celltype) 1326 | types <- unique(BarCluTable$Cluster) 1327 | 1328 | LigRecLib <- read.table("./Step0_SharedInfo/scMLnet/LigRec.txt", header = T) 1329 | colnames(LigRecLib)[2:3] <- c("source", "target") 1330 | TFTarLib <- read.table("./Step0_SharedInfo/scMLnet/TFTargetGene.txt", header = T) 1331 | colnames(TFTarLib)[1:2] <- c("source", "target") 1332 | RecTFLib <- read.table("./Step0_SharedInfo/scMLnet/RecTF.txt", header = T) 1333 | colnames(RecTFLib)[1:2] <- c("source", "target") 1334 | 1335 | if(is.null(sender) & is.null(receiver)){ 1336 | sender_ct <- types 1337 | receiver_ct <- types 1338 | }else{ 1339 | sender_ct <- sender 1340 | receiver_ct <- receiver 1341 | } 1342 | 1343 | result <- list() 1344 | for (LigClu in sender_ct) { 1345 | for (RecClu in receiver_ct[which(receiver_ct != LigClu)]) { 1346 | netList <- tryCatch(RunMLnet(data = GCMat, BarCluTable = BarCluTable, 1347 | RecClu = RecClu, LigClu = LigClu, 1348 | LigRec.DB = LigRecLib, 1349 | TFTG.DB = TFTarLib, 1350 | RecTF.DB = RecTFLib), 1351 | error=function(e){NA} 1352 | ) 1353 | list.names <- paste(LigClu, RecClu, sep = "_") 1354 | result[[list.names]] <- netList 1355 | } 1356 | } 1357 | 1358 | used.time <- proc.time()-start.time 1359 | used.memory <- mem_used() 1360 | 1361 | result[which(is.na(result))] <- NULL 1362 | 1363 | result <- lapply(result, function(res){ 1364 | res <- res$LigRec 1365 | res 1366 | }) 1367 | result <- do.call(rbind, result) 1368 | if(!is.null(result)){ 1369 | result <- tibble::rownames_to_column(result, 'sr') 1370 | result$sr <- gsub('\\.[0-9]+', '', result$sr) 1371 | result <- tidyr::separate(result, sr, c('Sender', 'Receiver'), sep = '_') 1372 | result <- result[,-5] 1373 | colnames(result)[3:4] <- c('Ligand', "Receptor") 1374 | result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_') 1375 | result <- dplyr::distinct(result, all, .keep_all = TRUE) 1376 | result <- result[which(result$Sender!=result$Receiver),] 1377 | } 1378 | 1379 | result_record <- list(result=result, 1380 | used_time = paste0(round(used.time[3]/60,3), ' min'), 1381 | used_memory = round(used.memory/1024/1024/1024,3)) 1382 | return(result_record) 1383 | } 1384 | 1385 | ############# 1386 | ## PyMINEr ## 1387 | ############# 1388 | PyMINEr_function <- function(ser, fpath, species = 'human'){ 1389 | suppressMessages(library(Seurat)) 1390 | suppressMessages(library(lobstr)) 1391 | set.seed(123) 1392 | 1393 | # input 1394 | if(T){ 1395 | input_fpath <- paste0(fpath, '/input/') 1396 | if(!dir.exists(input_fpath)){ 1397 | dir.create(input_fpath) 1398 | } 1399 | 1400 | fpath.mat <- paste0(input_fpath, 'counts.txt') 1401 | matrix.sc <- GetAssayData(ser, "data", "RNA") 1402 | matrix.sc <- as.data.frame(matrix.sc) 1403 | matrix.sc <- tibble::rownames_to_column(matrix.sc, var = "gene") 1404 | write.table(matrix.sc, file = fpath.mat, row.names = FALSE, sep = "\t") 1405 | 1406 | fpath.meta <- paste0(input_fpath, 'metadata.txt') 1407 | meta.sc <- data.frame(barcode = colnames(ser), celltype = ser$celltype) 1408 | i <- 0 1409 | for(ct in unique(meta.sc$celltype)){ 1410 | meta.sc[which(meta.sc$celltype == ct), "ct_num"] <- i 1411 | i <- i+1 1412 | } 1413 | meta <- meta.sc[,2:3] 1414 | meta <- dplyr::distinct(meta) 1415 | rownames(meta) <- NULL 1416 | 1417 | meta.sc <- meta.sc[,-2] 1418 | write.table(meta.sc, file = fpath.meta,row.names = F, col.names = F, sep = "\t") 1419 | rm(matrix.sc, meta.sc, ct, i, ser);gc() 1420 | } 1421 | 1422 | start.time <- proc.time() 1423 | 1424 | if(species == 'human'){ 1425 | species <- 'hsapiens' 1426 | }else{ 1427 | species <- 'mmusculus' 1428 | } 1429 | 1430 | command <- paste('sh ../Script/Step0_LRToolsFunction/PyMINEr_shell.sh', 1431 | fpath.mat, fpath.meta, species, sep = ' ') 1432 | system(command) 1433 | 1434 | used.time <- proc.time()-start.time 1435 | used.memory <- mem_used() 1436 | 1437 | result_path <- paste0(input_fpath, 1438 | 'autocrine_paracrine_signaling/extracellular_plasma_membrane_cell_type_specific_interactions.txt') 1439 | result <- read.table(result_path, header = FALSE, sep = ',', fileEncoding = "utf-8") 1440 | result <- tidyr::separate(result, V1, paste0('V', 2:19), sep = '\t') 1441 | result <- result[,-c(12:17)] 1442 | result[1, 12] <- 'LRscore' 1443 | colnames(result) <- result[1,] 1444 | result <- result[-1,] 1445 | 1446 | result <- result[which(result$cell_type_1!=result$cell_type_2),] 1447 | result <- result[,c(2,4,6,8,12)] 1448 | meta$ct_num <- paste0('bool_sample_group_', meta$ct_num) 1449 | result <- merge(result, meta, by.x = 'cell_type_1', by.y = 'ct_num') 1450 | colnames(result)[6] <- 'Sender' 1451 | result <- merge(result, meta, by.x = 'cell_type_2', by.y = 'ct_num') 1452 | colnames(result)[7] <- 'Receiver' 1453 | result <- result[,-c(1:2)] 1454 | colnames(result)[1:2] <- c('Ligand', 'Receptor') 1455 | result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_') 1456 | result <- dplyr::distinct(result, all, .keep_all = TRUE) 1457 | 1458 | result_record <- list(result=result, 1459 | used_time = paste0(round(used.time[3]/60,3), ' min'), 1460 | used_memory = round(used.memory/1024/1024/1024,3)) 1461 | return(result_record) 1462 | } 1463 | 1464 | ############### 1465 | ## RNAMagnet ## 1466 | ############### Only for mouse (LR prior databases) 1467 | RNAMagnet_function <- function(ser){ 1468 | suppressMessages(library(RNAMagnet)) 1469 | suppressMessages(library(Seurat)) 1470 | suppressMessages(library(lobstr)) 1471 | set.seed(123) 1472 | 1473 | start.time <- proc.time() 1474 | 1475 | result <- RNAMagnetSignaling(ser, .version = "2.0.0", 1476 | .cellularCompartment = c("Membrane", "ECM", "Both", "Secreted")) 1477 | 1478 | temp <- c() 1479 | for (sender in unique(ser$celltype)) { 1480 | for (receiver in unique(ser$celltype)) { 1481 | tmp <- getRNAMagnetGenes(result, sender, receiver) 1482 | if(dim(tmp)[[1]]!=0){ 1483 | tmp$Sender <- sender 1484 | tmp$Receiver <- receiver 1485 | temp <- rbind(temp,tmp) 1486 | } 1487 | } 1488 | } 1489 | 1490 | used.time <- proc.time()-start.time 1491 | used.memory <- mem_used() 1492 | 1493 | result <- temp 1494 | result <- result[which(result$Sender!=result$Receiver),] 1495 | result <- tidyr::separate(result, pair, c('Ligand', 'Receptor'), sep = '-') 1496 | rownames(result) <- NULL 1497 | need <- which(grepl('\\|', result$Ligand) | grepl('\\|', result$Receptor)) 1498 | if(length(need)>0){ 1499 | result_sub1 <- result[need, ] 1500 | result_sub2 <- result[-need, ] 1501 | 1502 | result_tmp <- c() 1503 | for (i in seq(need)) { 1504 | ligands <- unlist(stringr::str_split(result_sub1[i, 'Ligand'], '\\|')) 1505 | receptors <- unlist(stringr::str_split(result_sub1[i, 'Receptor'], '\\|')) 1506 | tmp <- expand.grid(ligands, receptors) 1507 | colnames(tmp) <- c('Ligand', 'Receptor') 1508 | tmp$Sender <- rep(result_sub1[i, 'Sender'], n = dim(tmp)[1]) 1509 | tmp$Receiver <- rep(result_sub1[i, 'Receiver'], n = dim(tmp)[1]) 1510 | tmp$score <- rep(result_sub1[i, 'score'], n = dim(tmp)[1]) 1511 | tmp <- tmp[, c(5, 1:4)] 1512 | result_tmp <- rbind(result_tmp, tmp) 1513 | } 1514 | result <- rbind(result_tmp, result_sub2) 1515 | } 1516 | 1517 | result <- aggregate(score~Ligand+Receptor+Sender+Receiver, mean, data=result) 1518 | result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_') 1519 | colnames(result)[5] <- 'LRscore' 1520 | 1521 | 1522 | result_record <- list(result=result, 1523 | used_time = paste0(round(used.time[3]/60,3), ' min'), 1524 | used_memory = round(used.memory/1024/1024/1024,3)) 1525 | return(result_record) 1526 | } 1527 | --------------------------------------------------------------------------------