├── Graphic-abstrct.png ├── Workflow-figure.png ├── Step0_LRToolsFunction ├── PyMINEr_shell.sh ├── scConnect_shell.sh ├── NATMI_shell.sh ├── CellPhoneDB3_shell.sh ├── CellPhoneDB2_shell.sh ├── cell2cell_shell.sh ├── Domino_shell2.sh ├── Domino_shell1.sh ├── scConnect_python.py ├── cell2cell_python.py ├── ScriptForAddLRscore.R └── Step1_LRToolsFunction.R ├── Step8_LRTToolsFunction ├── HoloNet_shell.sh ├── LRTPrediction_5tools.sh ├── RunSTScript.R ├── HoloNet.py └── Step8_LRTToolsFunction.R ├── Step3_MIForLRBench ├── Step2_ScriptForDLRC.R ├── Step1_ScriptForCalMI.R └── function.R ├── Step4_SIRSIForLRBench ├── ScriptForCalSIRSI.R └── function.R ├── Step6_LRBenchSampling ├── example.sh └── RunScript.R ├── Step9_LRTBench ├── Step2_ScriptForRecord.R ├── Step3_ScriptForTimeMen.R ├── Step1_ScriptForBench.R └── function.R ├── Step7_LRBenchSamplingBench ├── Step3_TimeMenRecord.R ├── Step2_JaccardIndex.R ├── function.R └── Step1_DatasetRecord.R ├── Step1_LRPredictionResult ├── LRPredition_19tools.sh └── RunScript.R ├── Step2_PreSTForLRBench ├── ScriptForCK.R ├── ScriptForCID.R ├── function.R └── ScriptForMouseEymbryo.R ├── Step5_BenchBasedCAGEProteomic ├── function.R ├── ScriptForCAGE.R └── ScriptForProteomics.R └── README.md /Graphic-abstrct.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SunXQlab/ESICCC/HEAD/Graphic-abstrct.png -------------------------------------------------------------------------------- /Workflow-figure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SunXQlab/ESICCC/HEAD/Workflow-figure.png -------------------------------------------------------------------------------- /Step0_LRToolsFunction/PyMINEr_shell.sh: -------------------------------------------------------------------------------- 1 | source /home/ljx/miniconda3/bin/activate pyminer 2 | pyminer.py -i $1 -manual_sample_groups $2 -rand_seed 123 -species $3 3 | -------------------------------------------------------------------------------- /Step0_LRToolsFunction/scConnect_shell.sh: -------------------------------------------------------------------------------- 1 | source /home/ljx/miniconda3/bin/activate cell2cell 2 | /home/ljx/miniconda3/envs/cell2cell/bin/python ../Script/Step0_LRToolsFunction/scConnect_python.py $1 $2 $3 $4 -------------------------------------------------------------------------------- /Step0_LRToolsFunction/NATMI_shell.sh: -------------------------------------------------------------------------------- 1 | cd ./Step0_SharedInfo/NATMI/ 2 | /home/ljx/miniconda3/bin/python ExtractEdges.py --species $1 --emFile $2 --annFile $3 --interDB lrc2p --coreNum 10 --out $4 --interSpecies $1 -------------------------------------------------------------------------------- /Step0_LRToolsFunction/CellPhoneDB3_shell.sh: -------------------------------------------------------------------------------- 1 | source /home/ljx/miniconda3/bin/activate cpdb 2 | cellphonedb method degs_analysis $1 $2 $3 --threshold 0.05 --output-path $4 --threads 10 --counts-data gene_name --debug-seed 123 -------------------------------------------------------------------------------- /Step0_LRToolsFunction/CellPhoneDB2_shell.sh: -------------------------------------------------------------------------------- 1 | source /home/ljx/miniconda3/bin/activate cpdb2 2 | cellphonedb method statistical_analysis $1 $2 --threshold 0.05 --output-path $3 --threads 10 --counts-data gene_name --debug-seed 123 3 | -------------------------------------------------------------------------------- /Step8_LRTToolsFunction/HoloNet_shell.sh: -------------------------------------------------------------------------------- 1 | source /home/ljx/miniconda3/bin/activate HoloNet 2 | /home/ljx/miniconda3/envs/HoloNet/bin/python ~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Script/Step8_LRTToolsFunction/HoloNet.py $1 $2 $3 $4 $5 -------------------------------------------------------------------------------- /Step0_LRToolsFunction/cell2cell_shell.sh: -------------------------------------------------------------------------------- 1 | source /home/ljx/miniconda3/bin/activate cell2cell 2 | /home/ljx/miniconda3/envs/cell2cell/bin/python ~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Script/Step0_LRToolsFunction/cell2cell_python.py $1 $2 $3 $4 -------------------------------------------------------------------------------- /Step0_LRToolsFunction/Domino_shell2.sh: -------------------------------------------------------------------------------- 1 | source /home/ljx/miniconda3/bin/activate pyscenic 2 | 3 | pyscenic grn --num_workers 30 -o $1 $2 $5 --seed 123 4 | 5 | pyscenic ctx $1 $6 $7 --annotations_fname $8 --expression_mtx_fname $2 --mode "dask_multiprocessing" --output $3 --num_workers 30 6 | 7 | pyscenic aucell $2 $3 -o $4 --num_workers 30 --seed 123 -------------------------------------------------------------------------------- /Step0_LRToolsFunction/Domino_shell1.sh: -------------------------------------------------------------------------------- 1 | source /home/ljx/miniconda3/bin/activate pyscenic 2 | 3 | pyscenic grn --num_workers 30 -o $1 $2 $5 --seed 123 4 | 5 | pyscenic ctx $1 $6 $7 $8 $9 ${10} ${11} --annotations_fname ${12} --expression_mtx_fname $2 --mode "dask_multiprocessing" --output $3 --num_workers 30 6 | 7 | pyscenic aucell $2 $3 -o $4 --num_workers 30 --seed 123 -------------------------------------------------------------------------------- /Step3_MIForLRBench/Step2_ScriptForDLRC.R: -------------------------------------------------------------------------------- 1 | rm(list = ls());gc() 2 | source('./Script/Step3_MIForLRBench/function.R') 3 | set.seed(123) 4 | 5 | result.path <- list.files('./Data/Step3_MIPCCForLRBench', full.names = TRUE) 6 | samples <- gsub('_result.rds', '', substring(result.path, 30)) 7 | 8 | tmp <- lapply(seq(result.path), function(i){ 9 | Eval1Result <- Eval1Process(result.path[i]) 10 | Eval1Result <- EvalIndex_DLRC(Eval1Result) 11 | Eval1Result 12 | }) 13 | names(tmp) <- samples 14 | tmp <- do.call(rbind, tmp) 15 | tmp <- tibble::rownames_to_column(tmp, 'datasets') 16 | tmp$datasets <- gsub('\\.[0-9]+', '', tmp$datasets) -------------------------------------------------------------------------------- /Step8_LRTToolsFunction/LRTPrediction_5tools.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | tools="NicheNet MISTy stMLnet HoloNet CytoTalk" 3 | 4 | sampleID='CID44971 CID4465 control_P7 control_P8 UKF243_T_ST UKF260_T_ST UKF266_T_ST UKF334_T_ST' 5 | 6 | for i in $sampleID; 7 | do 8 | fpath='./Step6_LRTPredictionResult/'$i"/" 9 | 10 | if [ ! -d $fpath ] 11 | then 12 | mkdir $fpath 13 | fi 14 | 15 | for j in $tools; 16 | do 17 | fpathout=$fpath"/"$j"/" 18 | if [ ! -d $fpathout ] 19 | then 20 | mkdir $fpathout 21 | fi 22 | 23 | if [[ !($j == 'CytoTalk') ]] 24 | then 25 | /usr/bin/time -v -o $fpathout"TimeMemRecord.txt" /usr/bin/Rscript ../Script/Step8_LRTToolsFunction/RunSTScript.R $j $i human; 26 | else 27 | /usr/bin/time -v -o $fpathout"TimeMemRecord.txt" /home/ljx/software/R-4.1.0/bin/Rscript ../Script/Step8_LRTToolsFunction/RunSTScript.R $j $i human; 28 | fi 29 | 30 | done 31 | done 32 | -------------------------------------------------------------------------------- /Step4_SIRSIForLRBench/ScriptForCalSIRSI.R: -------------------------------------------------------------------------------- 1 | rm(list = ls());gc() 2 | suppressMessages(library(Seurat)) 3 | source('./Script/Step3_MIForLRBench/function.R') 4 | source('./Script/Step4_SIRSIForLRBench/function.R') 5 | set.seed(123) 6 | 7 | datasets <- c('CID4465', 'CID44971', 'CK357', 'CK358', 8 | 'CK368', 'CK162', 'CK362', 'CK361', 9 | 'CK161', 'CK165', 'Slide14') 10 | ck.dataset <- readRDS('~/1-Datasets/Nature_2022_MI/dataset_id.rds') 11 | 12 | # Calculate SI 13 | for(data in datasets){ 14 | print(data) 15 | SIResult <- CalSI_2(data) 16 | saveRDS(SIResult, file = paste0('./Data/Step4_SIRSIForLRBench/', data, '_SIResult.rds')) 17 | } 18 | rm(SIResult, data);gc() 19 | 20 | #Calculate RSI 21 | for (data in datasets) { 22 | print(data) 23 | RSIResult <- CalRSI(data) 24 | saveRDS(RSIResult, file = paste0('./Data/Step4_SIRSIForLRBench/', data, '_RSIResult.rds')) 25 | } 26 | rm(RSIResult, data);gc() 27 | -------------------------------------------------------------------------------- /Step0_LRToolsFunction/scConnect_python.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # In[1]: 5 | 6 | 7 | import pandas as pd 8 | import scanpy as sc 9 | import scConnect as cn 10 | import matplotlib 11 | import matplotlib.pyplot as plt 12 | import sys 13 | import random 14 | random.seed(123) 15 | 16 | counts_path = sys.argv[1] 17 | meta_path = sys.argv[2] 18 | species = sys.argv[3] 19 | output_path = sys.argv[4] 20 | adata = sc.read_csv(counts_path).T 21 | meta = pd.read_csv(meta_path, index_col="Cell") 22 | adata.obs = meta 23 | 24 | adata_tissue = cn.genecall.meanExpression(adata, groupby="Annotation", normalization=False, use_raw=False, transformation="log1p") 25 | adata_tissue = cn.connect.ligands(adata_tissue, organism=species) 26 | adata_tissue = cn.connect.receptors(adata_tissue, organism=species) 27 | adata_tissue = cn.connect.specificity(adata_tissue, n=100, groupby="Annotation", organism=species) 28 | 29 | edges = cn.connect.interactions(emitter=adata_tissue, target=adata_tissue, self_reference=True, organism=species) 30 | nodes = cn.connect.nodes(adata_tissue) 31 | 32 | edge = pd.DataFrame(edges) 33 | col_new = list(edge.iloc[0,2].keys()) 34 | edge.rename(columns={0:'sender', 1:'reciever', 2:'info'}, inplace = True) 35 | for each in col_new: 36 | edge[each] = edge["info"].map(lambda x:x[each]) 37 | 38 | edge.to_csv(output_path) 39 | 40 | -------------------------------------------------------------------------------- /Step6_LRBenchSampling/example.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | tools="CellPhoneDB2 CellPhoneDB3 CellTalker Connectome NATMI ICELLNET scConnect CellChat SingleCellSignalR CellCall scSeqComm NicheNet Domino PyMINEr iTALK scMLnet" 3 | 4 | sampleID='CID44971 CID4465 CK357 CK358 CK161 CK165 CK361 CK362 CK162 CK368 Slide14 pbmc4k pbmc6k pbmc8k' 5 | ratios='90 80 70 60 50' 6 | 7 | for i in $sampleID; 8 | do 9 | for ratio in $ratios; 10 | do 11 | 12 | fpath='./Step10_LRBenchSamplingResult/'$i"_"$ratio"/" 13 | 14 | if [ ! -d $fpath ] 15 | then 16 | mkdir $fpath 17 | fi 18 | for j in $tools; 19 | do 20 | fpathout=$fpath"/"$j"/" 21 | if [ ! -d $fpathout ] 22 | then 23 | mkdir $fpathout 24 | fi 25 | 26 | if [[ "$i" =~ 'Slide' && !($j == 'CytoTalk') ]] 27 | then 28 | /usr/bin/time -v -o $fpathout"TimeMemRecord.txt" /usr/bin/Rscript ../Script/Step10_LRBenchSampling/RunScript.R $j $i mouse $ratio; 29 | elif [[ "$i" =~ 'Slide' && ($j == 'CytoTalk') ]] 30 | then 31 | /usr/bin/time -v -o $fpathout"TimeMemRecord.txt" /home/ljx/software/R-4.1.0/bin/Rscript ../Script/Step10_LRBenchSampling/RunScript.R $j $i mouse $ratio; 32 | elif [[ ($j == 'CytoTalk') ]] 33 | then 34 | /usr/bin/time -v -o $fpathout"TimeMemRecord.txt" /home/ljx/software/R-4.1.0/bin/Rscript ../Script/Step10_LRBenchSampling/RunScript.R $j $i human $ratio; 35 | else 36 | /usr/bin/time -v -o $fpathout"TimeMemRecord.txt" /usr/bin/Rscript ../Script/Step10_LRBenchSampling/RunScript.R $j $i human $ratio; 37 | fi 38 | 39 | done 40 | done 41 | done -------------------------------------------------------------------------------- /Step9_LRTBench/Step2_ScriptForRecord.R: -------------------------------------------------------------------------------- 1 | 2 | files <- list.files('./Data/Step8_LRTBenchResult/record', full.names = TRUE) 3 | record_data <- lapply(files, function(file){ 4 | result <- readRDS(file) 5 | record_methods <- lapply(result, function(method){ 6 | record_cellines <- lapply(method, function(cellines){ 7 | if(dim(cellines)[[1]] == 0){ 8 | text <- 'No rec/lig' 9 | }else{ 10 | tmp <- as.data.frame(table(cellines$label)) 11 | false <- as.numeric(tmp$Freq[which(tmp$Var1==0)]) 12 | true <- as.numeric(tmp$Freq[which(tmp$Var1==1)]) 13 | text <- paste0('F: ', false, '; T: ', true) 14 | } 15 | text 16 | }) 17 | names(record_cellines) <- names(method) 18 | record_cellines 19 | }) 20 | record_methods <- do.call(cbind, record_methods) 21 | record_methods <- as.data.frame(record_methods) 22 | record_methods 23 | }) 24 | names(record_data) <- gsub('\\.rds', '', list.files('./Data/Step8_LRTBenchResult/record')) 25 | 26 | record_data1 <- lapply(seq(5, 29, 2), function(i){ 27 | print(i) 28 | marco <- record_data[[i]] 29 | mal <- record_data[[i+1]] 30 | marco.col <- setdiff(colnames(mal), colnames(marco)) 31 | mal.col <- setdiff(colnames(marco), colnames(mal)) 32 | 33 | if (length(marco.col)>0) { 34 | marco[, marco.col] <- '——' 35 | marco <- marco[, colnames(mal)] 36 | }else if(length(mal.col)>0){ 37 | mal[, mal.col] <- '——' 38 | mal <- mal[, colnames(marco)] 39 | } 40 | tmp <- rbind(marco, mal) 41 | tmp 42 | }) 43 | 44 | names(record_data1) <- gsub('_macro', '', names(record_data)[seq(5, 29, 2)]) 45 | -------------------------------------------------------------------------------- /Step7_LRBenchSamplingBench/Step3_TimeMenRecord.R: -------------------------------------------------------------------------------- 1 | rm(list = ls());gc() 2 | suppressMessages(library(tidyverse)) 3 | source('./Script/Step11_LRBenchSamplingBench/function.R') 4 | set.seed(123) 5 | 6 | DataRecord <- readRDS(file = './Data/Step7_LRBenchSampling/DatasetsRecord.rds') 7 | DataRecord$ratio <- gsub('S', '', DataRecord$ratio) 8 | datasets <- unique(DataRecord$datasets) 9 | methods <- c('CellPhoneDB2', 'CellPhoneDB3', 'CellTalker', 'Connectome', 10 | 'NATMI', 'ICELLNET', 'scConnect', 'CellChat', 'SingleCellSignalR', 11 | 'CellCall', 'scSeqComm', 'NicheNet', 'scMLnet', 'iTALK', 12 | 'RNAMagnet', 'PyMINEr', 'CytoTalk', 'Domino') 13 | ratios <- c(50, 60, 70, 80, 90, 100) 14 | 15 | TimeMenRecord <- lapply(datasets, function(data){ 16 | print(data) 17 | tmp <- lapply(ratios, function(ratio){ 18 | print(ratio) 19 | temp <- RunTimeMemRecord(data, ratio, methods, DataRecord) 20 | temp <- do.call(rbind, temp) 21 | temp <- tibble::rownames_to_column(temp, 'methods') 22 | temp$ratios <- ratio 23 | temp 24 | }) 25 | tmp <- do.call(rbind, tmp) %>% as.data.frame() 26 | tmp$datasets <- data 27 | tmp 28 | }) 29 | TimeMenRecord <- do.call(rbind, TimeMenRecord) %>% as.data.frame() 30 | TimeMenRecord$class <- paste(TimeMenRecord$datasets, TimeMenRecord$ratios, sep = '_') 31 | 32 | DataRecord$class <- paste(DataRecord$datasets, DataRecord$ratio, sep = '_') 33 | DataRecord <- DataRecord[, -c(1:2)] 34 | TimeMenRecord <- merge(TimeMenRecord, DataRecord, by = 'class') 35 | 36 | saveRDS(TimeMenRecord, file = paste0('./Data/Step7_LRBenchSampling/TimeMemRecord.rds')) 37 | -------------------------------------------------------------------------------- /Step8_LRTToolsFunction/RunSTScript.R: -------------------------------------------------------------------------------- 1 | suppressMessages(library(Seurat)) 2 | source('../Script/Step8_LRTToolsFunction/Step8_LRTToolsFunction.R') 3 | set.seed(123) 4 | 5 | args <- commandArgs() 6 | tools <- args[6] 7 | print(tools) 8 | sampleID <- args[7] 9 | print(sampleID) 10 | 11 | if(grepl('CID', sampleID)){ 12 | fpath.dataset <- paste0('/home/ljx/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/ST_Data/', sampleID, '_ser.rds') 13 | receiver <- 'CancerEpithelial' 14 | }else if(grepl('UKF', sampleID)){ 15 | fpath.dataset <- paste0('/home/ljx/1-Datasets/CancerCell_2022_Glioma/ScriptForLRTBenchmark/ST_Data/', sampleID, '_ser.rds') 16 | receiver <- c('macrophages', 'Malignant') 17 | }else{#MI 18 | fpath.dataset <- paste0('/home/ljx/1-Datasets/Nature_2022_MI/ScriptForCCC/ST_Data/', sampleID, '_ser.rds') 19 | receiver <- c('Fibroblast') 20 | } 21 | 22 | dir.output <- paste0('./Step8_LRTPredictionResult/', sampleID) 23 | if(!dir.exists(dir.output)){ 24 | dir.create(dir.output) 25 | } 26 | 27 | fpath.tools <- paste(dir.output, tools, sep = '/') 28 | if(!dir.exists(fpath.tools)){ 29 | dir.create(fpath.tools) 30 | } 31 | 32 | ser <- readRDS(fpath.dataset) 33 | 34 | if(tools == 'CytoTalk'){ 35 | CytoTalk_function(ser, fpath.tools, receiver = receiver) 36 | }else if(tools == 'HoloNet'){ 37 | HoloNet_function(ser, fpath.tools, sampleID, receiver = receiver) 38 | }else if(tools == 'MISTy'){ 39 | MISTy_function(ser, fpath.tools, sampleID, receiver = receiver) 40 | }else if(tools == 'NicheNet'){ 41 | NicheNet_function(ser, fpath.tools, sampleID, receiver = receiver) 42 | }else if(tools == 'stMLnet'){ 43 | stMLnet_function(ser, fpath.tools, sampleID, receiver = receiver) 44 | } -------------------------------------------------------------------------------- /Step1_LRPredictionResult/LRPredition_19tools.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | tools="CellPhoneDB2 CellPhoneDB3 CellTalker Connectome NATMI ICELLNET scConnect CellChat SingleCellSignalR CellCall scSeqComm NicheNet Domino PyMINEr iTALK cell2cell scMLnet" 3 | 4 | sampleID='CID44971 CID4465 CK357 CK358 CK161 CK165 CK361 CK362 CK162 CK368 Slide14 pbmc4k pbmc6k pbmc8k" 5 | 6 | for i in $sampleID; 7 | do 8 | if [[ "$i" =~ 'CID' ]] 9 | then 10 | fpath='./Step1_LRPredictionResult/NG_BC_'$i"/" 11 | elif [[ "$i" =~ 'CK' ]] 12 | then 13 | fpath='./Step1_LRPredictionResult/N_MI_'$i"/" 14 | elif [[ "$i" =~ 'Slide' ]] 15 | then 16 | fpath='./Step1_LRPredictionResult/MouseEmbryo_'$i"/" 17 | else 18 | fpath='./Step1_LRPredictionResult/GSE106487_'$i"/" 19 | m='ST' 20 | n='SSC' 21 | fi 22 | 23 | if [ ! -d $fpath ] 24 | then 25 | mkdir $fpath 26 | fi 27 | for j in $tools; 28 | do 29 | fpathout=$fpath"/"$j"/" 30 | if [ ! -d $fpathout ] 31 | then 32 | mkdir $fpathout 33 | fi 34 | 35 | if [[ "$i" =~ 'Slide' && !($j == 'CytoTalk') ]] 36 | then 37 | /usr/bin/time -v -o $fpathout"TimeMemRecord.txt" /usr/bin/Rscript ../Script/Step1_LRPredictionResult/RunScript.R $j $i mouse; 38 | elif [[ "$i" =~ 'Slide' && ($j == 'CytoTalk') ]] 39 | then 40 | /usr/bin/time -v -o $fpathout"TimeMemRecord.txt" /home/ljx/software/R-4.1.0/bin/Rscript ../Script/Step1_LRPredictionResult/RunScript.R $j $i mouse; 41 | elif [[ ($j == 'CytoTalk') ]] 42 | then 43 | /usr/bin/time -v -o $fpathout"TimeMemRecord.txt" /home/ljx/software/R-4.1.0/bin/Rscript ../Script/Step1_LRPredictionResult/RunScript.R $j $i human; 44 | else 45 | /usr/bin/time -v -o $fpathout"TimeMemRecord.txt" /usr/bin/Rscript ../Script/Step1_LRPredictionResult/RunScript.R $j $i human; 46 | fi 47 | 48 | done 49 | done 50 | -------------------------------------------------------------------------------- /Step2_PreSTForLRBench/ScriptForCK.R: -------------------------------------------------------------------------------- 1 | rm(list = ls()) 2 | suppressWarnings(library(Seurat)) 3 | source('./Script/Step2_PreSTForLRBench/function.R') 4 | set.seed(123) 5 | 6 | samples <- readRDS('~/1-Datasets/Nature_2022_MI/dataset_id.rds') 7 | samples <- samples$ST 8 | 9 | for(sample in samples){ 10 | output.path <- paste0('./Data/Step2_PreSTForLRBench/', sample) 11 | if(!dir.exists(output.path)){ 12 | dir.create(output.path) 13 | } 14 | 15 | ser2 <- readRDS(paste0('~/1-Datasets/Nature_2022_MI/ST_Data/Visium-', sample, '.rds')) 16 | meta <- ser2@meta.data 17 | meta <- meta[, 15, drop = FALSE]; colnames(meta) <- 'celltype' 18 | meta$celltype <- gsub('Cycling.cells', 'Cycling', meta$celltype) 19 | rm(ser2);gc() 20 | 21 | st.path <- paste0('~/1-Datasets/Nature_2022_MI/ST_Data/', sample) 22 | counts <- Read10X_h5(paste0(st.path, '/filtered_feature_bc_matrix.h5')) 23 | img <- Read10X_Image(paste0(st.path, '/spatial')) 24 | 25 | share.barcode <- intersect(rownames(meta), colnames(counts)) 26 | share.barcode <- intersect(share.barcode, rownames(img@coordinates)) 27 | 28 | meta <- meta[share.barcode, , drop = FALSE] 29 | counts <- counts[, rownames(meta)] 30 | img <- img[colnames(counts)] 31 | DefaultAssay(object = img) <- 'Spatial' 32 | 33 | identical(rownames(meta), colnames(counts)) 34 | 35 | ser <- CreateSeuratObject(counts, meta.data = meta, 36 | min.cells = 1, min.features = 1, 37 | assay = 'Spatial') 38 | ser <- SCTransform(ser, assay = "Spatial") 39 | ser[['image']] <- img 40 | saveRDS(ser, file = paste0(output.path, '/STser.rds')) 41 | 42 | close_distant <- CloDistCP(ser) 43 | saveRDS(close_distant, file = paste0(output.path, '/CloDistCP.rds')) 44 | } 45 | -------------------------------------------------------------------------------- /Step2_PreSTForLRBench/ScriptForCID.R: -------------------------------------------------------------------------------- 1 | rm(list = ls()) 2 | suppressMessages(library(Seurat)) 3 | source('./Script/Step2_PreSTForLRBench/function.R') 4 | set.seed(123) 5 | 6 | # CID datasets 7 | img.path <- list.dirs('~/1-Datasets/NatureGenetics_2021_BC/ST_Data/spatial')[-1] 8 | matrix.path <- list.dirs('~/1-Datasets/NatureGenetics_2021_BC/ST_Data/filtered_count_matrices')[-1] 9 | meta.path <- list.dirs('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/ST_deconvoulution', recursive = FALSE) 10 | samples <- substring(meta.path, 76) 11 | 12 | for (i in seq(img.path)) { 13 | output.path <- paste0('./Data/Step2_PreSTForLRBench/', samples[i]) 14 | if(!dir.exists(output.path)){ 15 | dir.create(output.path) 16 | } 17 | 18 | # Save SeuratObject of ST dataset 19 | if(T){ 20 | count.st <- Read10X(matrix.path[i], gene.column = 1) 21 | img <- Read10X_Image(image.dir = img.path[i]) 22 | meta.st <- read.csv(paste0(meta.path[i], '/c2l_means_result.csv')) 23 | meta.st <- tibble::column_to_rownames(meta.st, var = "X") 24 | colnames(meta.st) <- gsub('meanscell_abundance_w_sf_', '', colnames(meta.st)) 25 | meta.st$celltype <- apply(meta.st, 1, function(x){ 26 | colnames(meta.st)[which.max(x)] 27 | }) 28 | 29 | shared.barcode <- intersect(colnames(count.st), rownames(meta.st)) 30 | shared.barcode <- intersect(shared.barcode, rownames(img@coordinates)) 31 | 32 | meta.st <- meta.st[shared.barcode, ] 33 | count.st <- count.st[, rownames(meta.st)] 34 | img <- img[colnames(count.st)] 35 | DefaultAssay(object = img) <- 'Spatial' 36 | 37 | identical(rownames(meta.st), colnames(count.st)) 38 | 39 | ser <- CreateSeuratObject(count.st, meta.data = meta.st, 40 | min.cells = 1, min.features = 1, 41 | assay = 'Spatial') 42 | ser <- SCTransform(ser, assay = "Spatial") 43 | ser[['image']] <- img 44 | 45 | saveRDS(ser, file = paste0(output.path, '/STser.rds')) 46 | } 47 | 48 | close_distant <- CloDistCP(ser) 49 | saveRDS(close_distant, file = paste0(output.path, '/CloDistCP.rds')) 50 | } -------------------------------------------------------------------------------- /Step0_LRToolsFunction/cell2cell_python.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # In[ ]: 5 | 6 | 7 | import cell2cell as c2c 8 | import scanpy as sc 9 | import pandas as pd 10 | import sys 11 | import random 12 | random.seed(123) 13 | 14 | counts_path = sys.argv[1] 15 | meta_path = sys.argv[2] 16 | lr_path = sys.argv[3] 17 | output_path = sys.argv[4] 18 | 19 | adata = sc.read_csv(counts_path).T 20 | meta = pd.read_csv(meta_path, index_col = "Cell") 21 | adata.obs = meta 22 | 23 | lr_pairs = pd.read_csv(lr_path) 24 | lr_pairs = lr_pairs.astype(str) 25 | meta = adata.obs.copy() 26 | 27 | interactions = c2c.analysis.SingleCellInteractions(rnaseq_data=adata.to_df().T, 28 | ppi_data=lr_pairs, 29 | metadata=meta, 30 | interaction_columns=('ligand_symbol', 'receptor_symbol'), 31 | communication_score='expression_thresholding', 32 | expression_threshold=0.05, # values after aggregation 33 | cci_score='bray_curtis', 34 | cci_type='undirected', 35 | aggregation_method='nn_cell_fraction', 36 | barcode_col='Cell', 37 | celltype_col='Annotation', 38 | complex_sep='&', 39 | verbose=False) 40 | interactions.compute_pairwise_communication_scores() 41 | # interactions.compute_pairwise_communication_scores() return result: 42 | ccc_matrix_path = output_path + '/communication_matrix.csv' 43 | interactions.interaction_space.interaction_elements['communication_matrix'].to_csv(ccc_matrix_path) 44 | ccc_pvals_path = output_path + '/ccc_pval.csv' 45 | ccc_pvals = interactions.permute_cell_labels(evaluation='communication',verbose=True, random_state = 123) 46 | ccc_pvals.to_csv(ccc_pvals_path) 47 | 48 | -------------------------------------------------------------------------------- /Step5_BenchBasedCAGEProteomic/function.R: -------------------------------------------------------------------------------- 1 | ################### 2 | ## calculate_auc ## 3 | ################### 4 | 5 | ## get different metrices and ROC/PRC preformance object 6 | get_evaluate_metrics <- function(pred,label) 7 | { 8 | 9 | find_optimal_cutoff <- function(TPR, FPR, threshold){ 10 | 11 | y = TPR - FPR 12 | Youden_index = which.max(y) 13 | # optimal_threshold = threshold[Youden_index] 14 | return(Youden_index) 15 | 16 | } 17 | 18 | if(length(which(label==TRUE))!=0 & length(which(label==FALSE))!=0){ 19 | pred <- prediction(pred, label) 20 | 21 | perf_ROC <- performance(pred, measure = "tpr", x.measure = "fpr") 22 | ind_ROC <- find_optimal_cutoff(perf_ROC@y.values[[1]],perf_ROC@x.values[[1]],perf_ROC@alpha.values[[1]]) 23 | cutoff_ROC <- perf_ROC@alpha.values[[1]][ind_ROC] 24 | 25 | perf_PRC <- performance(pred, measure = "prec", x.measure = "rec") 26 | ind_PRC <- find_optimal_cutoff(perf_PRC@y.values[[1]],perf_PRC@x.values[[1]],perf_PRC@alpha.values[[1]]) 27 | cutoff_PRC <- perf_PRC@alpha.values[[1]][ind_PRC] 28 | 29 | ACC <- performance(pred, measure = "acc")@y.values[[1]] %>% .[ind_ROC] %>% signif(.,4) 30 | ERR <- performance(pred, measure = "err")@y.values[[1]] %>% .[ind_ROC] %>% signif(.,4) 31 | PPV <- performance(pred, measure = "ppv")@y.values[[1]] %>% .[ind_ROC] %>% signif(.,4) 32 | #MCC <- performance(pred, measure = "mat")@y.values[[1]] %>% .[ind_ROC] %>% signif(.,4) 33 | F1 <- performance(pred, measure = "f")@y.values[[1]] %>% .[ind_ROC] %>% signif(.,4) 34 | AUC <- performance(pred, measure = "auc")@y.values[[1]] %>% signif(.,4) 35 | AUCPR <- performance(pred, measure = "aucpr")@y.values[[1]] %>% signif(.,4) 36 | 37 | res = list(perf_ROC = perf_ROC, 38 | perf_PRC = perf_PRC, 39 | perf_metrics = c(AUROC=AUC,AUPRC=AUCPR, 40 | ACC=ACC,ERR=ERR,PPV=PPV, F1=F1)) 41 | 42 | }else{ 43 | 44 | res <- list(perf_ROC = NA, perf_PRC = NA, 45 | perf_metrics = rep(NA,6)) 46 | names(res$perf_metrics) <- c('AUROC','AUPRC','ACC','ERR','PPV','F1') 47 | 48 | } 49 | 50 | 51 | return(res) 52 | 53 | } -------------------------------------------------------------------------------- /Step0_LRToolsFunction/ScriptForAddLRscore.R: -------------------------------------------------------------------------------- 1 | rm(list = ls());gc() 2 | suppressMessages(library(Seurat)) 3 | suppressMessages(library(tidyverse)) 4 | set.seed(123) 5 | 6 | files <- list.dirs('./Data/Step1_LRPredictionResult', recursive = FALSE) 7 | methods <- c('scMLnet', 'Domino') 8 | 9 | for (file in files) { 10 | print(file) 11 | if(grepl('Slide', file)){ 12 | ser <- readRDS('~/1-Datasets/MouseEmbryo_GSE166692/ScriptForCCC/DataForCCC/Slide14_hs_ser.rds') 13 | }else if(grepl('N_MI', file)){ 14 | sample <- substring(file, 38) 15 | ser <- readRDS(paste0('~/1-Datasets/Nature_2022_MI/ScriptForCCC/scRNA_Data/', sample, '_ser.rds')) 16 | }else if(grepl('NG_BC', file)){ 17 | sample <- substring(file, 39) 18 | ser <- readRDS(paste0('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/scRNA_Data/', sample, '_ser.rds')) 19 | }else if(grepl('pbmc', file)){ 20 | sample <- substring(file, 33) 21 | ser <- readRDS(paste0('~/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/DataForCCC/', sample, '_ser.rds')) 22 | }else if(grepl('Sper', file)){ 23 | ser <- readRDS('~/1-Datasets/Spermatogenesis_GSE106487/ScriptForCCC/Sper_ser.rds') 24 | } 25 | 26 | data <- GetAssayData(ser, 'data', 'RNA') %>% 27 | as.matrix(.) %>% t(.) %>% 28 | as.data.frame() %>% 29 | tibble::rownames_to_column(., 'barcodes') 30 | meta <- ser@meta.data %>% .[, 'celltype', drop =FALSE] %>% tibble::rownames_to_column(., 'barcodes') 31 | rm(ser);gc() 32 | 33 | data <- merge(data, meta, by = 'barcodes') 34 | data$barcodes <- NULL 35 | data <- aggregate(.~celltype, data, mean) 36 | data <- pivot_longer(data, -celltype, 'genes') 37 | data$ct_genes <- paste(data$celltype, data$genes, sep = '_') 38 | data <- data[ -c(1:2)] 39 | rm(meta);gc() 40 | 41 | for (method in methods) { 42 | print(method) 43 | result <- readRDS(paste0(file, '/', method, '/result.rds')) 44 | result_tmp <- result$result 45 | result_tmp$sl <- paste(result_tmp$Sender, result_tmp$Ligand, sep = '_') 46 | result_tmp$rr <- paste(result_tmp$Receiver, result_tmp$Receptor, sep = '_') 47 | result_tmp <- merge(result_tmp, data, by.x = 'sl', by.y = 'ct_genes') 48 | colnames(result_tmp)[8] <- 'ligand_value' 49 | result_tmp <- merge(result_tmp, data, by.x = 'rr', by.y = 'ct_genes') 50 | colnames(result_tmp)[9] <- 'receptor_value' 51 | result_tmp$LRscore <- result_tmp$ligand_value*result_tmp$receptor_value 52 | result_tmp <- result_tmp[,c('Ligand', 'Receptor', 'Sender', 'Receiver', 'LRscore', 'all')] 53 | result$result <- result_tmp 54 | saveRDS(result, paste0(file, '/', method, '/result1.rds')) 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /Step2_PreSTForLRBench/function.R: -------------------------------------------------------------------------------- 1 | # Calculate the euclidean distance between cell pairs 2 | CloDistCP <- function(ser, image = TRUE){ 3 | if(image){ 4 | coord <- ser@images$image@coordinates[, c('imagerow', 'imagecol')] 5 | }else{ 6 | coord <- ser@meta.data[, c('imagerow', 'imagecol')] 7 | } 8 | 9 | meta <- ser@meta.data[, 'celltype', drop = FALSE] 10 | identical(rownames(meta), rownames(coord)) 11 | 12 | meta <- cbind(meta, coord) 13 | meta <- meta[order(meta$celltype),] 14 | 15 | dist.cp <- as.matrix(dist(meta[, 2:3], method = "euclidean")) 16 | dist.cp[dist.cp==0] <- 1 17 | dist.cp[!lower.tri(dist.cp, diag = TRUE)] <- 0 18 | dist.cp <- as.data.frame(dist.cp) 19 | dist.cp <- tibble::rownames_to_column(dist.cp, var = "cell.1") 20 | dist.cp <- tidyr::pivot_longer(data = dist.cp, cols = -cell.1, 21 | names_to = "cell.2", values_to = "distance") 22 | dist.cp <- dist.cp[which(dist.cp$distance != 0), ] 23 | 24 | ct <- data.frame(barcode = rownames(meta), celltype = meta$celltype) 25 | dist.cp <- dplyr::inner_join(dist.cp, ct, by = c('cell.1' = 'barcode')) 26 | colnames(dist.cp)[4] <- "celltype.1" 27 | dist.cp <- dplyr::inner_join(dist.cp, ct, by = c('cell.2' = 'barcode')) 28 | colnames(dist.cp)[5] <- "celltype.2" 29 | dist.cp$cp <- paste(dist.cp$celltype.1, dist.cp$celltype.2, sep = "_") 30 | dist.cp <- dist.cp[which(dist.cp$celltype.1!=dist.cp$celltype.2), ] 31 | dist.cp <- dist.cp[,c("cell.1", "cell.2", "distance", "cp")] 32 | 33 | # remove cell pairs with the number less than 30 34 | remove.cp <- names(which(table(dist.cp$cp)<30)) 35 | if(length(remove.cp)!=0){ 36 | dist.cp <- dist.cp[-which(dist.cp$cp %in% remove.cp), ] 37 | } 38 | 39 | perc <- c(0.1, 0.2, 0.3, 0.4, 0.5) 40 | cellpairs <- unique(dist.cp$cp) 41 | library(doParallel) 42 | cl <- makeCluster(5) 43 | registerDoParallel(cl) 44 | close_distant <- foreach(per = perc) %dopar% { 45 | tmp.close_distant <- lapply(cellpairs, function(cp){ 46 | dist <- list() 47 | tmp.dist <- dist.cp[which(dist.cp$cp == cp), ] 48 | tmp.dist <- tmp.dist[order(tmp.dist$distance, decreasing = T), ] 49 | pct.close <- floor(dim(tmp.dist)[1]*per) 50 | pct.distant <- floor(dim(tmp.dist)[1]*per) 51 | close.dist <- tail(tmp.dist, n = pct.close) 52 | distant.dist <- head(tmp.dist, n = pct.distant) 53 | dist[["close"]] <- close.dist 54 | dist[["distant"]] <- distant.dist 55 | dist 56 | }) 57 | names(tmp.close_distant) <- unique(dist.cp$cp) 58 | return(tmp.close_distant) 59 | } 60 | stopCluster(cl) 61 | names(close_distant) <- perc 62 | return(close_distant) 63 | } 64 | -------------------------------------------------------------------------------- /Step2_PreSTForLRBench/ScriptForMouseEymbryo.R: -------------------------------------------------------------------------------- 1 | rm(list = ls()) 2 | suppressMessages(library(Seurat)) 3 | source('./Script/Step2_PreSTForLRBench/function.R') 4 | set.seed(123) 5 | 6 | counts.st <- Read10X('~/1-Datasets/MouseEmbryo_GSE166692/sciSpaceData/data/', 7 | gene.column = 1) 8 | 9 | cellmeta <- read.csv('~/1-Datasets/MouseEmbryo_GSE166692/sciSpaceData/cellmeta.tsv', 10 | sep = '\t') 11 | genes <- read.csv('~/1-Datasets/MouseEmbryo_GSE166692/sciSpaceData/genemeta.tsv', 12 | sep = '\t') 13 | 14 | for (slide in unique(cellmeta$slide_id)) { 15 | output.path <- paste0('./Data/Step2_PreSTForLRBench/', gsub(' ', '', slide)) 16 | if(!dir.exists(output.path)){ 17 | dir.create(output.path) 18 | } 19 | 20 | if(T){ 21 | cells_sel <- rownames(cellmeta)[which(cellmeta$slide_id==slide)] 22 | 23 | # counts: solve the problem of duplicated genes 24 | if(T){ 25 | counts_sub <- counts.st[, cells_sel] 26 | counts_sub <- as.matrix(counts_sub) 27 | counts_sub <- as.data.frame(counts_sub) 28 | counts_sub$gene <- genes$gene_short_name; rownames(counts_sub) <- NULL 29 | dup_genes <- counts_sub$gene[which(duplicated(counts_sub$gene))] 30 | counts_sub1 <- counts_sub[-which(counts_sub$gene %in% dup_genes),] 31 | counts_sub2 <- counts_sub[which(counts_sub$gene %in% dup_genes),] 32 | counts_sub2 <- aggregate(.~gene,mean,data=counts_sub2) 33 | counts_sub <- rbind(counts_sub1, counts_sub2); rownames(counts_sub) <- NULL 34 | rm(counts_sub1, counts_sub2, dup_genes, cells_sel); gc() 35 | counts_sub <- tibble::column_to_rownames(counts_sub, 'gene') 36 | counts_sub <- as.matrix(counts_sub) 37 | } 38 | 39 | meta_sub <- cellmeta[which(cellmeta$slide_id==slide),] 40 | colnames(meta_sub)[c(9,10,19)] <- c('imagerow', 'imagecol', 'celltype') 41 | meta_sub$celltype <- gsub(' ', '', meta_sub$celltype) 42 | meta_sub$celltype <- gsub('Cardiacmusclelineages', 'CardiacMuscleLineages', meta_sub$celltype) 43 | 44 | shared.barcode <- intersect(colnames(counts_sub), rownames(meta_sub)) 45 | 46 | meta_sub <- meta_sub[shared.barcode, ] 47 | counts_sub <- counts_sub[, rownames(meta_sub)] 48 | identical(rownames(meta_sub), colnames(counts_sub)) 49 | 50 | ser <- CreateSeuratObject(counts = counts_sub, meta.data = meta_sub, 51 | assay = 'Spatial', min.cells = 1, min.features = 1) 52 | ser <- SCTransform(ser, assay = "Spatial") 53 | 54 | saveRDS(ser, file = paste0(output.path, '/STser.rds')) 55 | rm(counts_sub, meta_sub);gc() 56 | } 57 | 58 | ser <- readRDS(paste0(output.path, '/STser.rds')) 59 | close_distant <- CloDistCP(ser, image = FALSE) 60 | saveRDS(close_distant, file = paste0(output.path, '/CloDistCP.rds')) 61 | } 62 | 63 | 64 | -------------------------------------------------------------------------------- /Step8_LRTToolsFunction/HoloNet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # In[ ]: 5 | 6 | 7 | import HoloNet as hn 8 | 9 | import os 10 | import pandas as pd 11 | import random 12 | import numpy as np 13 | import scanpy as sc 14 | import matplotlib.pyplot as plt 15 | import torch 16 | import sys 17 | 18 | import warnings 19 | warnings.filterwarnings('ignore') 20 | hn.set_figure_params(tex_fonts=False) 21 | sc.settings.figdir = './figures/' 22 | 23 | 24 | # In[ ]: 25 | 26 | 27 | random.seed(123) 28 | np.random.seed(123) 29 | 30 | counts_path = sys.argv[1] 31 | meta_path = sys.argv[2] 32 | img_path = sys.argv[3] 33 | goi_fpath = sys.argv[4] 34 | output_path = sys.argv[5] 35 | 36 | 37 | # In[ ]: 38 | 39 | 40 | # load ST data (gene expression matrix + celltype + spatial) 41 | adata = sc.read_csv(counts_path).T 42 | meta = pd.read_csv(meta_path, index_col="Barcodes") 43 | adata.obs = meta 44 | adata = sc.read_visium(adata, path = img_path) 45 | 46 | 47 | # In[ ]: 48 | 49 | 50 | # run normalization 51 | sc.pp.normalize_total(adata, inplace=True) 52 | sc.pp.log1p(adata) 53 | sc.pp.highly_variable_genes(adata, flavor="seurat", n_top_genes=2000) 54 | 55 | 56 | # In[ ]: 57 | 58 | 59 | # load LR database and filter genes epressed by less than 5% of cells 60 | LR_df = hn.pp.load_lr_df() 61 | expressed_LR_df = hn.pp.get_expressed_lr_df(LR_df, adata, expressed_proportion=0.05) 62 | 63 | 64 | # In[ ]: 65 | 66 | 67 | # calculate w_best 68 | w_best = hn.tl.default_w_visium(adata) 69 | w_best 70 | 71 | 72 | # In[ ]: 73 | 74 | 75 | # construct CE 76 | CE_tensor = hn.tl.compute_ce_tensor(adata, lr_df=expressed_LR_df, w_best=w_best) 77 | CE_tensor_filtered = hn.tl.filter_ce_tensor(CE_tensor, adata, 78 | lr_df=expressed_LR_df, w_best=w_best) 79 | 80 | 81 | # In[ ]: 82 | 83 | 84 | # Selecting the target gene (icgs) to be predicted 85 | target_all_gene_expr, used_gene_list = hn.pr.get_gene_expr(adata, expressed_LR_df, 86 | max_sparse = 0.05) 87 | 88 | 89 | # In[ ]: 90 | 91 | 92 | goi = pd.read_csv(goi_fpath) 93 | goi = list(set(goi["x"]).intersection(set(used_gene_list))) 94 | len(goi) 95 | 96 | 97 | # In[ ]: 98 | 99 | 100 | X, cell_type_names = hn.pr.get_one_hot_cell_type_tensor(adata, categorical_cell_type_col = 'celltype') 101 | adj = hn.pr.adj_normalize(adj=CE_tensor_filtered, cell_type_tensor=X, only_between_cell_type=True) 102 | 103 | for gene in goi: 104 | target = hn.pr.get_one_case_expr(target_all_gene_expr, cases_list=used_gene_list, 105 | used_case_name= gene) 106 | trained_MGC_model_list = hn.pr.mgc_repeat_training(X, adj, target, device='gpu') 107 | predict_result = hn.pl.plot_mgc_result(trained_MGC_model_list, adata, X, adj) 108 | for lr in list(expressed_LR_df['LR_Pair']): 109 | _ = hn.pl.fce_cell_type_network_plot(trained_MGC_model_list, expressed_LR_df, X, adj, 110 | cell_type_names, plot_lr=lr, edge_thres=0.2, 111 | ) 112 | res = pd.DataFrame(_[0]) 113 | file_path = output_path + lr + "_" + gene + ".csv" 114 | res.to_csv(file_path) 115 | 116 | -------------------------------------------------------------------------------- /Step9_LRTBench/Step3_ScriptForTimeMen.R: -------------------------------------------------------------------------------- 1 | rm(list = ls()); gc() 2 | suppressMessages(library(ggplot2)) 3 | source('./Script/Step9_LRTBench/function.R') 4 | set.seed(123) 5 | 6 | # Record the number of cell types in each dataset 7 | if(F){ 8 | suppressMessages(library(Seurat)) 9 | set.seed(123) 10 | ser1.path <- list.files('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/ST_Data', full.names = TRUE) 11 | ser2.path <- list.files('~/1-Datasets/Nature_2022_MI/ScriptForCCC/ST_Data', full.names = TRUE) 12 | ser3.path <- list.files('~/1-Datasets/CancerCell_2022_Glioma/ScriptForLRTBenchmark/ST_Data', full.names = TRUE) 13 | ser.path <- c(ser1.path, ser2.path, ser3.path) 14 | rm(ser1.path, ser2.path, ser3.path);gc() 15 | 16 | tmp <- c() 17 | for (i in seq(ser.path)) { 18 | ser <- readRDS(ser.path[i]) 19 | remove.ct <- as.numeric(length(which(table(ser$celltype)<3))) 20 | temp <- length(unique(ser$celltype))-remove.ct 21 | tmp <- c(tmp, temp) 22 | rm(ser); gc() 23 | } 24 | ct_record <- data.frame(datasets = gsub('_ser.rds', '', 25 | unlist(lapply(stringr::str_split(ser.path, '/'), 26 | function(x){x[8]}))), 27 | num = as.numeric(tmp)) 28 | 29 | saveRDS(ct_record, file = './Data/Step9_LRTBenchResult/DatasetCellTypeInfo.rds') 30 | rm(i, ser.path, temp, tmp, remove.ct, ct_record);gc() 31 | } 32 | 33 | # Record the running time of methods in each dataset 34 | if(T){ 35 | datasets <- list.dirs('./Data/Step8_LRTPredictionResult', recursive = FALSE, full.names = FALSE) 36 | methods <- c('CytoTalk', 'NicheNet', 'HoloNet', 'MISTy', 'stMLnet') 37 | DataRecord <- readRDS('./Data/Step9_LRTBenchResult/DatasetCellTypeInfo.rds') 38 | wd <- './Data/Step8_LRTPredictionResult/' 39 | 40 | TimeMemRecord <- lapply(datasets, function(data){ 41 | print(data) 42 | temp <- RunTimeMemRecord(wd, data, methods) 43 | temp 44 | }) 45 | names(TimeMemRecord) <- datasets 46 | TimeMemRecord <- lapply(TimeMemRecord, function(data){ 47 | temp <- do.call(rbind, data) 48 | temp <- tibble::rownames_to_column(temp, 'methods') 49 | temp 50 | }) 51 | TimeMemRecord <- do.call(rbind, TimeMemRecord) 52 | TimeMemRecord <- tibble::rownames_to_column(TimeMemRecord, 'datasets') 53 | TimeMemRecord$datasets <- gsub('\\.[0-9]+', '', TimeMemRecord$datasets) 54 | 55 | # Specific handle: the running time of CytoTalk in GBM datasets 56 | CytoTalk_TimeMemRecord <- TimeMemRecord[which(TimeMemRecord$methods == 'CytoTalk' & grepl('UKF', TimeMemRecord$datasets)), ] 57 | CytoTalk_TimeMemRecord <- merge(CytoTalk_TimeMemRecord, DataRecord, by = 'datasets') 58 | CytoTalk_TimeMemRecord$total_num <- 2*(CytoTalk_TimeMemRecord$num-1) 59 | CytoTalk_TimeMemRecord$actual_num <- 2*(CytoTalk_TimeMemRecord$num-1)-1 60 | CytoTalk_TimeMemRecord$clock_time <- (CytoTalk_TimeMemRecord$clock_time/CytoTalk_TimeMemRecord$total_num)*CytoTalk_TimeMemRecord$actual_num 61 | CytoTalk_TimeMemRecord$linux_time <- (CytoTalk_TimeMemRecord$linux_time/CytoTalk_TimeMemRecord$total_num)*CytoTalk_TimeMemRecord$actual_num 62 | CytoTalk_TimeMemRecord <- CytoTalk_TimeMemRecord[, -c(7:9)] 63 | TimeMemRecord <- TimeMemRecord[-which(TimeMemRecord$methods == 'CytoTalk' & grepl('UKF', TimeMemRecord$datasets)), ] 64 | TimeMemRecord <- rbind(TimeMemRecord, CytoTalk_TimeMemRecord) 65 | rm(CytoTalk_TimeMemRecord, DataRecord, datasets, methods, wd); gc() 66 | } -------------------------------------------------------------------------------- /Step7_LRBenchSamplingBench/Step2_JaccardIndex.R: -------------------------------------------------------------------------------- 1 | rm(list = ls());gc() 2 | suppressMessages(library(tidyverse)) 3 | suppressMessages(library(ggsci)) 4 | set.seed(123) 5 | 6 | datasets <- list.files('./Data/Step6_LRBenchSamplingResult/', recursive = FALSE) %>% 7 | gsub('_[0-9]+', '',.) %>% unique() 8 | methods <- c("CellPhoneDB2", "CellPhoneDB3", "CellTalker", 9 | "Connectome", "ICELLNET", "NATMI", "iTALK", 10 | "scConnect", "SingleCellSignalR", "CellChat", 11 | "RNAMagnet", "PyMINEr", 12 | "scSeqComm", "NicheNet", 13 | "scMLnet", "CellCall","CytoTalk", "Domino") 14 | 15 | JaccardIndex <- function(a, b) { 16 | intersection = length(intersect(unique(a), unique(b))) 17 | union = length(a) + length(b) - intersection 18 | return (intersection/union) 19 | } 20 | 21 | result <- lapply(datasets, function(data){ 22 | print(data) 23 | temp <- lapply(methods, function(method){ 24 | print(method) 25 | if(grepl('CK', data)){ 26 | res100 <- readRDS(paste('./Data/Step1_LRPredictionResult', 27 | paste0('N_MI_', data), method, 28 | ifelse(method=='iTALK','result1.rds','result.rds'), 29 | sep = '/')) %>% .$result 30 | }else if(grepl('CID', data)){ 31 | res100 <- readRDS(paste('./Data/Step1_LRPredictionResult', 32 | paste0('NG_BC_', data), method, 33 | ifelse(method=='iTALK','result1.rds','result.rds'), 34 | sep = '/')) %>% .$result 35 | }else if(grepl('Slide14', data)){ 36 | res100 <- readRDS(paste('./Data/Step1_LRPredictionResult', 37 | paste0('MouseEmbryo_', data), method, 38 | ifelse(method=='iTALK','result1.rds','result.rds'), 39 | sep = '/')) %>% .$result 40 | }else{ 41 | res100 <- readRDS(paste('./Data/Step1_LRPredictionResult', 42 | data, method, 43 | ifelse(method=='iTALK','result1.rds','result.rds'), 44 | sep = '/')) %>% .$result 45 | } 46 | 47 | if(dim(res100)[[1]]==0 | is.null(res100)){ 48 | tmp <- rep(NA, 5) 49 | }else{ 50 | res90 <- readRDS(paste('./Data/Step10_LRBenchSamplingResult', 51 | paste0(data, '_90'), method, 'result.rds', sep = '/'))%>% .$result 52 | JI_90 <- JaccardIndex(res100$all, res90$all) 53 | res80 <- readRDS(paste('./Data/Step10_LRBenchSamplingResult', 54 | paste0(data, '_80'), method, 'result.rds', sep = '/'))%>% .$result 55 | JI_80 <- JaccardIndex(res100$all, res80$all) 56 | res70 <- readRDS(paste('./Data/Step10_LRBenchSamplingResult', 57 | paste0(data, '_70'), method, 'result.rds', sep = '/'))%>% .$result 58 | JI_70 <- JaccardIndex(res100$all, res70$all) 59 | res60 <- readRDS(paste('./Data/Step10_LRBenchSamplingResult', 60 | paste0(data, '_60'), method, 'result.rds', sep = '/'))%>% .$result 61 | JI_60 <- JaccardIndex(res100$all, res60$all) 62 | res50 <- readRDS(paste('./Data/Step10_LRBenchSamplingResult', 63 | paste0(data, '_50'), method, 'result.rds', sep = '/'))%>% .$result 64 | JI_50 <- JaccardIndex(res100$all, res50$all) 65 | tmp <- c(JI_50, JI_60, JI_70, JI_80, JI_90) 66 | } 67 | 68 | tmp 69 | }) 70 | names(temp) <- methods 71 | temp <- do.call(rbind, temp) 72 | temp <- as.data.frame(temp) 73 | colnames(temp) <- c('S50', 'S60', 'S70', 'S80', 'S90') 74 | temp <- tibble::rownames_to_column(temp, 'methods') 75 | temp$datasets <- data 76 | temp 77 | }) 78 | result <- do.call(rbind, result) 79 | saveRDS(result, file = './Data/Step7_LRBenchSampling/JaccardIndex.rds') 80 | -------------------------------------------------------------------------------- /Step3_MIForLRBench/Step1_ScriptForCalMI.R: -------------------------------------------------------------------------------- 1 | rm(list = ls()) 2 | suppressMessages(library(Seurat)) 3 | source('./Script/Step3_MIForLRBench/function.R') 4 | set.seed(123) 5 | 6 | datasets <- c('CID4465', 'CID44971', 'CK357', 'CK358', 7 | 'CK368', 'CK162', 'CK362', 'CK361', 8 | 'CK161', 'CK165', 'Slide14') 9 | methods <- c('CellPhoneDB2', 'CellPhoneDB3', 'CellTalker', 'Connectome', 10 | 'NATMI', 'ICELLNET', 'scConnect', 'CellChat', 'SingleCellSignalR', 11 | 'CellCall', 'scSeqComm', 'NicheNet', 'Domino', 'scMLnet', 'iTALK', 12 | 'cell2cell', 'RNAMagnet', 'PyMINEr', 'CytoTalk') 13 | ck.dataset <- readRDS('~/1-Datasets/Nature_2022_MI/dataset_id.rds') 14 | # loop: datasets 15 | for(data in datasets){ 16 | print(data) 17 | if(grepl('CK', data)){ 18 | # get normalized data of ST 19 | STser <- readRDS(paste0('./Data/Step2_PreSTForLRBench/', ck.dataset$ST[which(ck.dataset$snRNA == data)], '/STser.rds')) 20 | # get close and distant cell pairs 21 | CloDistCP <- readRDS(paste0('./Data/Step2_PreSTForLRBench/', ck.dataset$ST[which(ck.dataset$snRNA == data)], '/CloDistCP.rds')) 22 | }else{ 23 | # get normalized data of ST 24 | STser <- readRDS(paste0('./Data/Step2_PreSTForLRBench/', data, '/STser.rds')) 25 | # get close and distant cell pairs 26 | CloDistCP <- readRDS(paste0('./Data/Step2_PreSTForLRBench/', data, '/CloDistCP.rds')) 27 | } 28 | 29 | # get normalized data of ST 30 | norm.data <- GetAssayData(STser, 'data', 'SCT') 31 | #norm.data <- as.matrix(norm.data) 32 | # get close and distant cell pairs 33 | CloDistCP[['0.5']] <- NULL 34 | # get celltype of ST data 35 | celltype.st <- unique(STser$celltype) 36 | rm(STser); gc() 37 | 38 | # get result path 39 | if(grepl('CID', data)){ 40 | result.path <- paste0('./Data/Step1_LRPredictionResult/NG_BC_', data) 41 | }else if(grepl('CK', data)){ 42 | result.path <- paste0('./Data/Step1_LRPredictionResult/N_MI_', data) 43 | }else if(grepl('Slide', data)){ 44 | result.path <- paste0('./Data/Step1_LRPredictionResult/MouseEmbryo_', data) 45 | } 46 | 47 | output.path <- paste0('./Data/Step3_MIPCCForLRBench/', data, '_result.rds') 48 | dataset_EvalIndex1 <- lapply(methods, function(method){ 49 | print(method) 50 | 51 | # remove the LR pairs whose sender/receiver celltypes are not in celltypes of ST 52 | result <- readRDS(paste0(result.path, '/', method, '/result.rds')) 53 | result <- result$result 54 | result <- result[which(result$Sender %in% celltype.st), ] 55 | result <- result[which(result$Receiver %in% celltype.st), ] 56 | result$sr <- paste(result$Sender, result$Receiver, sep = '_') 57 | CellPairs <- names(CloDistCP$`0.1`) 58 | CellPairs1 <- as.data.frame(stringr::str_split(CellPairs, '_', simplify = TRUE)) 59 | CellPairs1 <- paste(CellPairs1$V2, CellPairs1$V1, sep = '_') 60 | CellPairs <- c(CellPairs, CellPairs1) 61 | remove.sr <- setdiff(unique(result$sr), CellPairs) 62 | if(length(remove.sr) != 0){ 63 | result <- result[-which(result$sr %in% remove.sr), ] 64 | } 65 | 66 | if(dim(result)[1]!=0){ 67 | if(grepl('CID', data) & method == 'RNAMagnet'){ 68 | genes <- readRDS('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/hs2mm_genes.rds') 69 | result <- mm2hs(result, genes) 70 | }else if(grepl('CK', data) & method == 'RNAMagnet'){ 71 | genes <- readRDS('~/1-Datasets/Nature_2022_MI/ScriptForCCC/hs2mm_genes.rds') 72 | result <- mm2hs(result, genes) 73 | }else if(grepl('Slide', data) & 74 | method %in% c('CellPhoneDB2', 'CellPhoneDB3', 'CellTalker', 'NATMI', 75 | 'ICELLNET', 'NicheNet', 'scMLnet', 'iTALK', 'cell2cell')){ 76 | genes <- readRDS('~/1-Datasets/MouseEmbryo_GSE166692/sciSpaceData/mm2hs.rds') 77 | result <- hs2mm(result, genes) 78 | } 79 | 80 | EvaIndex1_result <- lapply(CloDistCP, function(CloDist){ 81 | result.tmp <- EvaIndex1_2(CloDist, norm.data, result) 82 | result.tmp 83 | }) 84 | 85 | }else{ 86 | EvaIndex1_result <- NA 87 | } 88 | 89 | return(EvaIndex1_result) 90 | }) 91 | names(dataset_EvalIndex1) <- methods 92 | saveRDS(dataset_EvalIndex1, file = output.path) 93 | } -------------------------------------------------------------------------------- /Step7_LRBenchSamplingBench/function.R: -------------------------------------------------------------------------------- 1 | RunTimeMemRecord <- function(data, ratio, methods, DataRecord){ 2 | # get result path 3 | if(ratio==100){ 4 | if(grepl('CK', data)){ 5 | result.path <- paste0('./Data/Step1_LRPredictionResult/N_MI_', data) 6 | }else if(grepl('CID', data)){ 7 | result.path <- paste0('./Data/Step1_LRPredictionResult/NG_BC_', data) 8 | }else if(grepl('Slide14', data)){ 9 | result.path <- paste0('./Data/Step1_LRPredictionResult/MouseEmbryo_', data) 10 | }else{ 11 | result.path <- paste0('./Data/Step1_LRPredictionResult/', data) 12 | } 13 | }else{ 14 | result.path <- paste0('./Data/Step10_LRBenchSamplingResult/', data, '_', ratio) 15 | } 16 | 17 | dataset_Record <- lapply(methods, function(method){ 18 | print(method) 19 | 20 | if(method == 'CytoTalk'){ 21 | record_dir <- paste0(result.path, '/', method) 22 | record_files <- list.files(record_dir, pattern = 'TimeMemRecord', full.names = TRUE) 23 | temp_record <- lapply(record_files, function(file){ 24 | record <- tryCatch(read.table(file, header = FALSE, sep = '\t', fill = TRUE), 25 | error=function(e){NA} 26 | ) 27 | if(class(record)=='data.frame'){ 28 | tmp_record <- RunTimeMemRecord_2(record) 29 | }else{ 30 | tmp_record <- NA 31 | } 32 | tmp_record 33 | }) 34 | temp_record[which(is.na(temp_record))] <- NULL 35 | temp_record <- do.call(rbind, temp_record) %>% as.data.frame() 36 | 37 | if(ratio == 100){ 38 | number_record <- DataRecord$Celltypes[which((DataRecord$ratio == ratio) & (DataRecord$datasets==data))] 39 | actual_cp <- number_record * number_record - number_record - number_record 40 | combn_cp <- dim(combn(seq(number_record), 2))[2] 41 | temp_record$clock_time <- (temp_record$clock_time/actual_cp)*combn_cp 42 | temp_record$linux_time <- (temp_record$linux_time/actual_cp)*combn_cp 43 | } 44 | 45 | temp_record <- data.frame(clock_time = sum(temp_record$clock_time), 46 | linux_time = sum(temp_record$linux_time), 47 | max_memory = max(temp_record$max_memory), 48 | cpu_linux = paste0(round(mean(as.numeric(gsub('%', '', temp_record$cpu_linux))), 0), '%')) 49 | }else{ 50 | record <- read.table(paste0(result.path, '/', method, '/', 'TimeMemRecord.txt'), 51 | header = FALSE, sep = '\t', fill = TRUE) 52 | temp_record <- RunTimeMemRecord_2(record) 53 | } 54 | 55 | temp_record 56 | }) 57 | names(dataset_Record) <- methods 58 | return(dataset_Record) 59 | } 60 | 61 | 62 | RunTimeMemRecord_2 <- function(record){ 63 | 64 | record <- record[, 2, drop = FALSE] 65 | if(dim(record)[1]==24){ 66 | record <- record[-1, , drop = FALSE] 67 | } 68 | record <- record[c(2,3,4,5,10),, drop = FALSE] 69 | 70 | 71 | # Linux_time: user+system time 72 | user_time <- as.numeric(gsub('User time \\(seconds\\): ', '', record[1,])) %>% as.numeric() 73 | sys_time <- as.numeric(gsub('System time \\(seconds\\): ', '', record[2,])) %>% as.numeric() 74 | linux_time <- user_time+sys_time 75 | 76 | # Clock_time 77 | clock_time <- gsub('Elapsed \\(wall clock\\) time \\(h:mm:ss or m:ss\\): ', '', record[4,]) 78 | if(stringr::str_count(clock_time, ':') == 1){ 79 | min_time <- as.numeric(unlist(stringr::str_split(clock_time, ':'))[1]) 80 | min_time <- min_time*60 81 | sec_time <- as.numeric(unlist(stringr::str_split(clock_time, ':'))[2]) 82 | clock_time <- min_time+sec_time; rm(sec_time, min_time) 83 | }else if(stringr::str_count(clock_time, ':') == 2){ 84 | h_time <- as.numeric(unlist(stringr::str_split(clock_time, ':'))[1]) 85 | h_time <- h_time*60*60 86 | min_time <- as.numeric(unlist(stringr::str_split(clock_time, ':'))[2]) 87 | min_time <- min_time*60 88 | sec_time <- as.numeric(unlist(stringr::str_split(clock_time, ':'))[3]) 89 | clock_time <- h_time+min_time+sec_time; 90 | rm(sec_time, min_time, h_time);gc() 91 | } 92 | 93 | mem_linux <- gsub('Maximum resident set size \\(kbytes\\): ', '', record[5,]) 94 | mem_linux <- round(as.numeric(mem_linux)/1024/1024, 2) 95 | cpu_linux <- gsub('Percent of CPU this job got: ', '', record[3,]) 96 | 97 | return(data.frame(clock_time = clock_time, linux_time = linux_time, max_memory = mem_linux, cpu_linux = cpu_linux)) 98 | } 99 | -------------------------------------------------------------------------------- /Step6_LRBenchSampling/RunScript.R: -------------------------------------------------------------------------------- 1 | suppressMessages(library(Seurat)) 2 | source('../Script/Step0_LRToolsFunction/Step1_LRToolsFunction.R') 3 | set.seed(123) 4 | 5 | args <- commandArgs() 6 | tools <- args[6] 7 | print(tools) 8 | sampleID <- args[7] 9 | print(sampleID) 10 | 11 | species <- args[8] 12 | ratio <- args[9] 13 | print(species) 14 | print(ratio) 15 | 16 | if(grepl('CID', sampleID) & tools == 'RNAMagnet'){ 17 | 18 | fpath.dataset <- paste0('/home/ljx/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/scRNA_Data_sampling_Mm/', 19 | sampleID,'_', ratio, '_ser.rds') 20 | 21 | }else if(grepl('CID', sampleID) & !(tools == 'RNAMagnet')){ 22 | 23 | fpath.dataset <- paste0('/home/ljx/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/scRNA_Data_sampling/', 24 | sampleID, '_', ratio, '_ser.rds') 25 | 26 | }else if(grepl('CK', sampleID) & tools == 'RNAMagnet'){ 27 | 28 | fpath.dataset <- paste0('/home/ljx/1-Datasets/Nature_2022_MI/ScriptForCCC/scRNA_Data_sampling_Mm/', 29 | sampleID, '_', ratio , '_ser.rds') 30 | 31 | }else if(grepl('CK', sampleID) & !(tools == 'RNAMagnet')){ 32 | 33 | fpath.dataset <- paste0('/home/ljx/1-Datasets/Nature_2022_MI/ScriptForCCC/scRNA_Data_sampling/', 34 | sampleID, '_', ratio , '_ser.rds') 35 | 36 | }else if(grepl('Slide', sampleID)){ 37 | if(tools %in% c('CellPhoneDB2', 'CellPhoneDB3', 'CellTalker', 'NicheNet', 'iTALK', 'ICELLNET', 'scMLnet','NATMI', 'cell2cell')){ 38 | fpath.dataset <- paste0('/home/ljx/1-Datasets/MouseEmbryo_GSE166692/ScriptForCCC/scRNA_Data_Sampling/', 39 | sampleID, '_', ratio , '_hs_ser.rds') 40 | }else{ 41 | fpath.dataset <- paste0('/home/ljx/1-Datasets/MouseEmbryo_GSE166692/ScriptForCCC/scRNA_Data_Sampling/', 42 | sampleID, '_', ratio , '_mm_ser.rds') 43 | } 44 | }else if(grepl('pbmc', sampleID) & !(tools == 'RNAMagnet')){ 45 | fpath.dataset <- paste0('/home/ljx/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/scRNA_Data_sampling/', sampleID, '_', ratio , '_ser.rds') 46 | }else if(grepl('pbmc', sampleID) & (tools == 'RNAMagnet')){ 47 | fpath.dataset <- paste0('~/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/scRNA_Data_sampling/', sampleID, '_', ratio , '_mm_ser.rds') 48 | } 49 | 50 | dir.output <- paste0('/home/ljx/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data/Step10_LRBenchSamplingResult/', sampleID, '_', ratio) 51 | 52 | ser <- readRDS(fpath.dataset) 53 | 54 | if(!dir.exists(dir.output)){ 55 | dir.create(dir.output) 56 | } 57 | 58 | fpath.tools <- paste(dir.output, tools, sep = '/') 59 | if(!dir.exists(fpath.tools)){ 60 | dir.create(fpath.tools) 61 | } 62 | 63 | if(tools == 'CellCall'){ 64 | if(grepl('CK', sampleID) | grepl('pbmc', sampleID)){ 65 | n <- 2 66 | }else if(grepl('CID', sampleID) | grepl('Sper', sampleID)){ 67 | n <- 3 68 | }else if(grepl('Slide', sampleID)){ 69 | n <- 5 70 | } 71 | result <- CellCall_function(ser, n, species = species) 72 | }else if(tools == 'CellChat'){ 73 | result <- CellChat_function(ser, species = species) 74 | }else if(tools == 'CellPhoneDB2'){ 75 | result <- CellPhoneDB2_function(ser, fpath.tools) 76 | }else if(tools == 'CellPhoneDB3'){ 77 | result <- CellPhoneDB3_function(ser, fpath.tools) 78 | }else if(tools == 'CellTalker'){ 79 | result <- CellTalker_function(ser) 80 | }else if(tools == 'Connectome'){ 81 | result <- Connectome_function(ser, species = species) 82 | }else if(tools == 'CytoTalk'){ 83 | result <- CytoTalk_function(ser, fpath.tools, species = species) 84 | }else if(tools == 'Domino'){ 85 | if(sampleID %in% c('pbmc4k', 'pbmc8k')){ 86 | species <- 'hg38' 87 | }else if(sampleID == 'pbmc6k' | grepl('Sper' ,sampleID)){ 88 | species <- 'hg19' 89 | }else if(grepl('Slide' ,sampleID)){ 90 | species <- 'mm10' 91 | }else{ 92 | species <- 'hg38' 93 | } 94 | result <- Domino_function(ser, fpath.tools, species = species) 95 | }else if(tools == 'ICELLNET'){ 96 | result <- ICELLNET_function(ser) 97 | }else if(tools == 'iTALK'){ 98 | result <- iTALK_function(ser) 99 | }else if(tools == 'NATMI'){ 100 | result <- NATMI_function(ser, fpath.tools) 101 | }else if(tools == 'NicheNet'){ 102 | result <- NicheNet_function(ser, lr = TRUE) 103 | }else if(tools == 'PyMINEr'){ 104 | result <- PyMINEr_function(ser, fpath.tools, species) 105 | }else if(tools == 'RNAMagnet'){ 106 | result <- RNAMagnet_function(ser) 107 | }else if(tools == 'scConnect'){ 108 | result <- scConnect_function(ser, fpath.tools, species) 109 | }else if(tools == 'scMLnet'){ 110 | result <- scMLnet_function(ser) 111 | }else if(tools == 'scSeqComm'){ 112 | result <- scSeqComm_function(ser, species) 113 | }else if(tools == 'SingleCellSignalR'){ 114 | result <- SCSR_function(ser, species) 115 | } 116 | saveRDS(result, file = paste0(fpath.tools, '/result.rds')) -------------------------------------------------------------------------------- /Step7_LRBenchSamplingBench/Step1_DatasetRecord.R: -------------------------------------------------------------------------------- 1 | rm(list = ls()); gc() 2 | suppressMessages(library(tidyverse)) 3 | set.seed(123) 4 | 5 | datasets <- list.files('./Data/Step6_LRBenchSamplingResult/', recursive = FALSE) %>% 6 | gsub('_[0-9]+', '',.) %>% unique() 7 | ratios <- c(50, 60, 70, 80, 90, 100) 8 | 9 | record_cells <- lapply(datasets, function(dataset){ 10 | tmp <- lapply(ratios, function(ratio){ 11 | if(grepl('CID', dataset) & ratio == 100){ 12 | fpath.dataset <- paste0('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/scRNA_Data/', dataset, '_ser.rds') 13 | }else if(grepl('CID', dataset) & ratio != 100){ 14 | fpath.dataset <- paste0('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/scRNA_Data_sampling/', dataset,'_', ratio, '_ser.rds') 15 | }else if(grepl('CK', dataset) & ratio == 100){ 16 | fpath.dataset <- paste0('~/1-Datasets/Nature_2022_MI/ScriptForCCC/scRNA_Data/', dataset, '_ser.rds') 17 | }else if(grepl('CK', dataset) & ratio != 100){ 18 | fpath.dataset <- paste0('~/1-Datasets/Nature_2022_MI/ScriptForCCC/scRNA_Data_sampling/', dataset, '_', ratio , '_ser.rds') 19 | }else if(grepl('Slide', dataset) & ratio == 100){ 20 | fpath.dataset <- paste0('~/1-Datasets/MouseEmbryo_GSE166692/ScriptForCCC/DataForCCC/', dataset, '_mm_ser.rds') 21 | }else if(grepl('Slide', dataset) & ratio != 100){ 22 | fpath.dataset <- paste0('~/1-Datasets/MouseEmbryo_GSE166692/ScriptForCCC/scRNA_Data_Sampling/', dataset, '_', ratio , '_mm_ser.rds') 23 | }else if(grepl('pbmc', dataset) & ratio == 100){ 24 | fpath.dataset <- paste0('~/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/DataForCCC/', dataset, '_ser.rds') 25 | }else if(grepl('pbmc', dataset) & ratio != 100){ 26 | fpath.dataset <- paste0('~/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/scRNA_Data_sampling/', dataset, '_', ratio , '_ser.rds') 27 | } 28 | ser <- readRDS(fpath.dataset) 29 | 30 | cells <- dim(ser)[[2]] 31 | })%>% unlist() 32 | 33 | tmp 34 | }) 35 | names(record_cells) <- datasets 36 | record_cells <- do.call(rbind, record_cells) 37 | colnames(record_cells) <- paste0('S', ratios) 38 | record_cells <- as.data.frame(record_cells) 39 | record_cells <- tibble::rownames_to_column(record_cells, 'datasets') 40 | record_cells <- pivot_longer(record_cells, -datasets, 'SampleRate') 41 | colnames(record_cells)[3] <- 'Cells' 42 | record_cells$class <- paste(record_cells$datasets, record_cells$SampleRate, sep = '_') 43 | record_cells <- record_cells[,-c(1:2)] 44 | 45 | 46 | record_celltypes <- lapply(datasets, function(dataset){ 47 | tmp <- lapply(ratios, function(ratio){ 48 | if(grepl('CID', dataset) & ratio == 100){ 49 | fpath.dataset <- paste0('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/scRNA_Data/', dataset, '_ser.rds') 50 | }else if(grepl('CID', dataset) & ratio != 100){ 51 | fpath.dataset <- paste0('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/scRNA_Data_sampling/', dataset,'_', ratio, '_ser.rds') 52 | }else if(grepl('CK', dataset) & ratio == 100){ 53 | fpath.dataset <- paste0('~/1-Datasets/Nature_2022_MI/ScriptForCCC/scRNA_Data/', dataset, '_ser.rds') 54 | }else if(grepl('CK', dataset) & ratio != 100){ 55 | fpath.dataset <- paste0('~/1-Datasets/Nature_2022_MI/ScriptForCCC/scRNA_Data_sampling/', dataset, '_', ratio , '_ser.rds') 56 | }else if(grepl('Slide', dataset) & ratio == 100){ 57 | fpath.dataset <- paste0('~/1-Datasets/MouseEmbryo_GSE166692/ScriptForCCC/DataForCCC/', dataset, '_mm_ser.rds') 58 | }else if(grepl('Slide', dataset) & ratio != 100){ 59 | fpath.dataset <- paste0('~/1-Datasets/MouseEmbryo_GSE166692/ScriptForCCC/scRNA_Data_Sampling/', dataset, '_', ratio , '_mm_ser.rds') 60 | }else if(grepl('pbmc', dataset) & ratio == 100){ 61 | fpath.dataset <- paste0('~/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/DataForCCC/', dataset, '_ser.rds') 62 | }else if(grepl('pbmc', dataset) & ratio != 100){ 63 | fpath.dataset <- paste0('~/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/scRNA_Data_sampling/', dataset, '_', ratio , '_ser.rds') 64 | } 65 | ser <- readRDS(fpath.dataset) 66 | 67 | celltype <- length(unique(ser$celltype)) 68 | })%>% unlist() 69 | 70 | tmp 71 | }) 72 | names(record_celltypes) <- datasets 73 | record_celltypes <- do.call(rbind, record_celltypes) 74 | colnames(record_celltypes) <- paste0('S', ratios) 75 | record_celltypes <- as.data.frame(record_celltypes) 76 | record_celltypes <- tibble::rownames_to_column(record_celltypes, 'datasets') 77 | record_celltypes <- pivot_longer(record_celltypes, -datasets, 'SampleRate') 78 | colnames(record_celltypes)[3] <- 'Celltypes' 79 | record_celltypes$class <- paste(record_celltypes$datasets, record_celltypes$SampleRate, sep = '_') 80 | record_celltypes <- record_celltypes[,-c(1:2)] 81 | 82 | record <- merge(record_cells, record_celltypes, by = 'class') 83 | record <- separate(record, class, c('datasets', 'ratio'), sep = '_') 84 | saveRDS(record, file = './Data/Step6_LRBenchSampling/DatasetsRecord.rds') 85 | -------------------------------------------------------------------------------- /Step5_BenchBasedCAGEProteomic/ScriptForCAGE.R: -------------------------------------------------------------------------------- 1 | rm(list = ls());gc() 2 | suppressMessages(library(ROCR)) 3 | suppressMessages(library(tidyverse)) 4 | source('./Script/Step5_BenchBasedCAGEProteomic/function.R') 5 | source('./Script/Step3_MIForLRBench/function.R') 6 | set.seed(123) 7 | 8 | #process cage data 9 | if(F){ 10 | celltype <- c('CD14+CD16- Monocytes', 'CD14-CD16+ Monocytes', 'CD19+ B cells', 11 | 'Dendritic Monocyte Immature derived', 'Dendritic Plasmacytoid', 12 | 'CD4+CD25-CD45RA- memory conventional T cells', 13 | 'CD4+CD25-CD45RA+ naive conventional T cells', 14 | 'CD8+ T cells', 'NK cells') 15 | cage_all <- data.table::fread('~/1-Datasets/Fantom5_CAGE/ExpressionGenes.txt', header = TRUE, sep = '\t') 16 | cage_all <- as.data.frame(cage_all) 17 | cage_all <- cage_all[ , c(1, which(colnames(cage_all) %in% celltype))] 18 | cage_all <- tibble::column_to_rownames(cage_all, 'ApprovedSymbol') 19 | colnames(cage_all) <- c('CD14Mono', 'CD16Mono', 'B', 20 | 'naiveCD4T', 'memoryCD4T', 'CD8T', 21 | 'mDC', 'pDC', 'NK') 22 | cage_all <- as.matrix(cage_all) 23 | cage <- lapply(1:dim(cage_all)[[2]], function(i){ 24 | x <- cage_all[,i] 25 | genes <- rownames(cage_all)[which(x>10)] 26 | paste(genes, colnames(cage_all)[i], sep = '_') 27 | }) 28 | cage <- unlist(cage) 29 | cage_genes <- list(all = rownames(cage_all), genes_ct = cage) 30 | saveRDS(cage_genes, file = './Data/Step5_BenchBasedCAGEProteomic/cagedata.rds') 31 | } 32 | 33 | #calculate index 34 | if(F){ 35 | cage_genes <- readRDS('./Data/Step5_BenchBasedCAGEProteomic/cagedata.rds') 36 | 37 | datasets <- c('pbmc4k', 'pbmc6k', 'pbmc8k') 38 | 39 | for (data in datasets) { 40 | fpath <- paste0('./Data/Step1_LRPredictionResult/', data) 41 | result.files <- list.files(fpath, full.names = TRUE) 42 | result.files <- result.files[-which(grepl('cellinker', result.files))] 43 | result_index <- lapply(result.files, function(result.file){ 44 | print(result.file) 45 | if(grepl('Domino', result.file) | grepl('scMLnet', result.file)){ 46 | result <- readRDS(paste0(result.file, '/result1.rds')) 47 | }else{ 48 | result <- readRDS(paste0(result.file, '/result.rds')) 49 | } 50 | 51 | result <- result$result 52 | 53 | if((dim(result)[[1]] != 0) & 'LRscore' %in% colnames(result)){ 54 | if(grepl('RNAMagnet', result.file)){ 55 | genes <- readRDS(paste0('~/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/', data, '_hs2mm.rds')) 56 | result <- mm2hs(result, genes) 57 | } 58 | 59 | result$receptor_ct <- lapply(1:dim(result)[[1]], function(i){ 60 | receptor <- result$Receptor[i] 61 | if(grepl('&', receptor)){ 62 | receptor <- unlist(strsplit(receptor, '&')) 63 | } 64 | receiver <- result$Receiver[i] 65 | tmp <- paste(receptor, receiver, sep = '_') 66 | label <- ifelse(all(tmp %in% cage_genes$genes_ct), TRUE, FALSE) 67 | if(!label){ 68 | label <- ifelse(all(receptor %in% cage_genes$all), FALSE, NA) 69 | } 70 | label 71 | }) %>% unlist() 72 | 73 | result$ligand_ct <- lapply(1:dim(result)[[1]], function(i){ 74 | ligand <- result$Ligand[i] 75 | if(grepl('&', ligand)){ 76 | ligand <- unlist(strsplit(ligand, '&')) 77 | } 78 | sender <- result$Sender[i] 79 | tmp <- paste(ligand, sender, sep = '_') 80 | label <- ifelse(all(tmp %in% cage_genes$genes_ct), TRUE, FALSE) 81 | if(!label){ 82 | label <- ifelse(all(ligand %in% cage_genes$all), FALSE, NA) 83 | } 84 | label 85 | }) %>% unlist() 86 | 87 | result <- result[which(!is.na(result$receptor_ct)), ] 88 | result <- result[which(!is.na(result$ligand_ct)),] 89 | 90 | result$label <- lapply(1:dim(result)[[1]], function(i){ 91 | all(result$ligand_ct[i] & result$receptor_ct[i]) 92 | }) %>% unlist() 93 | 94 | result$sr <- paste(result$Sender, result$Receiver, sep = '_') 95 | result <- result[, c('LRscore', 'label', 'sr')] 96 | result <- split(result, result$sr) 97 | 98 | res_index <- lapply(result, function(res){ 99 | index <- get_evaluate_metrics(as.numeric(res$LRscore), res$label) 100 | index <- index$perf_metrics 101 | }) 102 | res_index <- do.call(rbind, res_index) 103 | res_index <- as.data.frame(res_index) 104 | 105 | }else{ 106 | res_index <- NA 107 | } 108 | res_index 109 | }) 110 | 111 | names(result_index) <- substring(result.files, 40) 112 | result_index[which(is.na(result_index))] <- NULL 113 | result_index <- do.call(rbind, result_index) 114 | result_index <- result_index[which(rowSums(result_index)!=0), ] 115 | result_index <- tibble::rownames_to_column(result_index, 'temp') 116 | result_index <- tidyr::separate(result_index, temp, c('methods', 'sr'), '\\.') 117 | result_index$dataset <- data 118 | result_index 119 | saveRDS(result_index, file = paste0('./Data/Step5_BenchBasedCAGEProteomic/', data, '_cage.rds')) 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /Step1_LRPredictionResult/RunScript.R: -------------------------------------------------------------------------------- 1 | suppressMessages(library(Seurat)) 2 | source('../Script/Step0_LRToolsFunction/Step1_LRToolsFunction.R') 3 | set.seed(123) 4 | 5 | args <- commandArgs() 6 | tools <- args[6] 7 | print(tools) 8 | sampleID <- args[7] 9 | print(sampleID) 10 | species <- args[8] 11 | print(species) 12 | 13 | if(grepl('CID', sampleID) & tools == 'RNAMagnet'){ 14 | fpath.dataset <- paste0('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/scRNA_Data_Mm/', 15 | sampleID, '_ser.rds') 16 | dir.output <- paste0('~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data/Step1_LRPredictionResult/NG_BC_', sampleID) 17 | }else if(grepl('CID', sampleID) & !(tools == 'RNAMagnet')){ 18 | fpath.dataset <- paste0('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/scRNA_Data/', 19 | sampleID, '_ser.rds') 20 | dir.output <- paste0('~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data/Step1_LRPredictionResult/NG_BC_', sampleID) 21 | }else if(grepl('CK', sampleID) & tools == 'RNAMagnet'){ 22 | fpath.dataset <- paste0('~/1-Datasets/Nature_2022_MI/ScriptForCCC/scRNA_Data_Mm/', 23 | sampleID, '_ser.rds') 24 | dir.output <- paste0('~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data/Step1_LRPredictionResult/N_MI_', sampleID) 25 | }else if(grepl('CK', sampleID) & !(tools == 'RNAMagnet')){ 26 | fpath.dataset <- paste0('~/1-Datasets/Nature_2022_MI/ScriptForCCC/scRNA_Data/', 27 | sampleID, '_ser.rds') 28 | dir.output <- paste0('~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data/Step1_LRPredictionResult/N_MI_', sampleID) 29 | }else if(grepl('Slide', sampleID)){ 30 | if(tools %in% c('CellPhoneDB2', 'CellPhoneDB3', 'CellTalker', 'NicheNet', 'iTALK', 'ICELLNET', 'scMLnet','NATMI', 'cell2cell')){ 31 | fpath.dataset <- paste0('~/1-Datasets/MouseEmbryo_GSE166692/ScriptForCCC/DataForCCC/', 32 | sampleID, '_hs_ser.rds') 33 | }else{ 34 | fpath.dataset <- paste0('~/1-Datasets/MouseEmbryo_GSE166692/ScriptForCCC/DataForCCC/', 35 | sampleID, '_mm_ser.rds') 36 | } 37 | dir.output <- paste0('~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data/Step1_LRPredictionResult/MouseEmbryo_', sampleID) 38 | }else if(grepl('pbmc', sampleID) & !(tools == 'RNAMagnet')){ 39 | fpath.dataset <- paste0('~/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/DataForCCC/', sampleID, '_ser.rds') 40 | dir.output <- paste0('~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data/Step1_LRPredictionResult/', sampleID) 41 | }else if(grepl('pbmc', sampleID) & (tools == 'RNAMagnet')){ 42 | fpath.dataset <- paste0('~/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/DataForCCC/', sampleID, '_mm_ser.rds') 43 | dir.output <- paste0('~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data/Step1_LRPredictionResult/', sampleID) 44 | } 45 | 46 | ser <- readRDS(fpath.dataset) 47 | 48 | if(!dir.exists(dir.output)){ 49 | dir.create(dir.output) 50 | } 51 | 52 | fpath.tools <- paste(dir.output, tools, sep = '/') 53 | if(!dir.exists(fpath.tools)){ 54 | dir.create(fpath.tools) 55 | } 56 | 57 | if(tools == 'cell2cell'){ 58 | result <- cell2cell_function(ser, fpath = fpath.tools, species = species) 59 | }else if(tools == 'CellCall'){ 60 | if(grepl('CK', sampleID) | grepl('pbmc', sampleID)){ 61 | n <- 2 62 | }else if(grepl('CID', sampleID)){ 63 | n <- 3 64 | }else if(grepl('Slide', sampleID)){ 65 | n <- 5 66 | } 67 | result <- CellCall_function(ser, n, species = species) 68 | }else if(tools == 'CellChat'){ 69 | result <- CellChat_function(ser, species = species) 70 | }else if(tools == 'CellPhoneDB2'){ 71 | result <- CellPhoneDB2_function(ser, fpath.tools) 72 | }else if(tools == 'CellPhoneDB3'){ 73 | result <- CellPhoneDB3_function(ser, fpath.tools) 74 | }else if(tools == 'CellTalker'){ 75 | result <- CellTalker_function(ser) 76 | }else if(tools == 'Connectome'){ 77 | result <- Connectome_function(ser, species = species) 78 | }else if(tools == 'CytoTalk'){ 79 | result <- CytoTalk_function(ser, fpath.tools, species = species) 80 | }else if(tools == 'Domino'){ 81 | if(sampleID %in% c('pbmc4k', 'pbmc8k')){ 82 | species <- 'hg38' 83 | }else if(sampleID == 'pbmc6k'){ 84 | species <- 'hg19' 85 | }else if(grepl('Slide' ,sampleID)){ 86 | species <- 'mm10' 87 | }else{ 88 | species <- 'hg38' 89 | } 90 | result <- Domino_function(ser, fpath.tools, species = species) 91 | }else if(tools == 'ICELLNET'){ 92 | result <- ICELLNET_function(ser) 93 | }else if(tools == 'iTALK'){ 94 | result <- iTALK_function(ser) 95 | }else if(tools == 'NATMI'){ 96 | result <- NATMI_function(ser, fpath.tools, species = species) 97 | }else if(tools == 'NicheNet'){ 98 | result <- NicheNet_function(ser, lr = TRUE) 99 | }else if(tools == 'PyMINEr'){ 100 | result <- PyMINEr_function(ser, fpath.tools, species) 101 | }else if(tools == 'RNAMagnet'){ 102 | result <- RNAMagnet_function(ser) 103 | }else if(tools == 'scConnect'){ 104 | result <- scConnect_function(ser, fpath.tools, species) 105 | }else if(tools == 'scMLnet'){ 106 | result <- scMLnet_function(ser) 107 | }else if(tools == 'scSeqComm'){ 108 | result <- scSeqComm_function(ser, species) 109 | }else if(tools == 'SingleCellSignalR'){ 110 | result <- SCSR_function(ser, species) 111 | } 112 | 113 | saveRDS(result, file = paste0(fpath.tools, '/result.rds')) -------------------------------------------------------------------------------- /Step9_LRTBench/Step1_ScriptForBench.R: -------------------------------------------------------------------------------- 1 | rm(list = ls());gc() 2 | setwd('~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data/Step8_LRTPredictionResult/') 3 | library(dplyr) 4 | library(ROCR) 5 | library(ggplot2) 6 | library(ggsci) 7 | library(ggpubr) 8 | library(tidyverse) 9 | source('../../Script/Step5_BenchBasedCAGEProteomic/function.R') 10 | set.seed(123) 11 | 12 | 13 | ########### 14 | ## color ## 15 | ########### 16 | 17 | scales::show_col(pal_aaas(palette = "default", alpha = 0.6)(10)) 18 | mycolors_aaas <- pal_aaas(palette = "default", alpha = 0.6)(10) 19 | 20 | mycolor_software <- mycolors_aaas[c(1:5)] 21 | names(mycolor_software) <- c('CytoTalk', "NicheNet", "MISTy", "HoloNet", "stMLnet") 22 | scales::show_col(mycolor_software) 23 | 24 | ##################### 25 | ## load all result ## 26 | ##################### 27 | 28 | if(F){ 29 | data_dirs <- list.files('.', full.names = TRUE) 30 | all_result <- lapply(data_dirs, function(data_dir){ 31 | print(data_dir) 32 | methods_dirs <- list.files(data_dir, full.names = TRUE) 33 | methods <- list.files(data_dir) 34 | used_methods <- c('CytoTalk', 'HoloNet', 'MISTy', 'NicheNet', 'stMLnet') 35 | index <- which(methods %in% used_methods) 36 | methods_dirs <- methods_dirs[index] 37 | methods <- methods[index] 38 | data_result <- lapply(seq(methods_dirs), function(i){ 39 | print(i) 40 | res_files <- list.files(methods_dirs[i], '_result.rds', full.names = TRUE) 41 | if(length(res_files)>0){ 42 | res <- lapply(res_files, function(res_file){ 43 | res_temp <- readRDS(res_file) 44 | if(dim(res_temp)[[1]] != 0){ 45 | res_temp$methods <- methods[i] 46 | }else{ 47 | res_temp <- NA 48 | } 49 | res_temp 50 | }) 51 | names(res) <- list.files(methods_dirs[i], '_result.rds') %>% gsub('_result.rds', '', .) 52 | res[which(is.na(res))] <- NULL 53 | }else{ 54 | res <- NA 55 | } 56 | res 57 | }) 58 | names(data_result) <- methods 59 | data_result[which(is.na(data_result))] <- NULL 60 | data_result 61 | }) 62 | names(all_result) <- list.files('.') 63 | saveRDS(all_result, file = 'all_result.rds') 64 | } 65 | 66 | setwd('~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data') 67 | source('../Script/Step9_LRTBench/function.R') 68 | all_result <- readRDS('./Step8_LRTPredictionResult/all_result.rds') 69 | 70 | ############### 71 | ## Calculate ## 72 | ############### 73 | 74 | result_index <- lapply(names(all_result), function(data){ 75 | print(data) 76 | data_res <- all_result[[data]] 77 | 78 | if(grepl('CID', data)){ 79 | 80 | ## groundtrue 81 | degs_ls <- readRDS('./Step0_SharedInfo/LRTBench/BC_celline/degs_ls_p.rds') 82 | 83 | methods_res <- lapply(data_res, function(x){x$CancerEpithelial}) 84 | methods_res[unlist(lapply(methods_res, is.null))] <- NULL 85 | 86 | result <- getallmetrics(methods_res, degs_ls) 87 | result$celltype <- 'CancerEpithelial' 88 | 89 | result1 <- getallrecord(methods_res, degs_ls) 90 | saveRDS(result1, file = paste0('./Step8_LRTBenchResult/record/', data, '.rds')) 91 | 92 | }else if(grepl('control', data)){ 93 | 94 | ## groundtrue 95 | degs_ls <- readRDS('~/1-Datasets/CellLinesDatasets/ProjectForLRTBench/result/degs_ls_p.rds') 96 | used <- which(grepl('GSE206947', names(degs_ls)) | 97 | grepl('GSE181575', names(degs_ls)) | 98 | grepl('GSE123018', names(degs_ls))) 99 | degs_ls_used <- degs_ls[used] 100 | 101 | methods_res <- lapply(data_res, function(x){x$Fibroblast}) 102 | methods_res[unlist(lapply(methods_res, is.null))] <- NULL 103 | 104 | result <- getallmetrics(methods_res, degs_ls_used) 105 | result$celltype <- 'Fibroblast' 106 | 107 | result1 <- getallrecord(methods_res, degs_ls_used) 108 | saveRDS(result1, file = paste0('./Step8_LRTBenchResult/record/', data, '.rds')) 109 | 110 | }else if(grepl('UKF', data)){ 111 | 112 | ## groundtrue 113 | degs_ls <- readRDS('~/1-Datasets/CellLinesDatasets/ProjectForLRTBench/result/degs_ls_p.rds') 114 | 115 | methods_res_macro <- lapply(data_res, function(x){x$macrophages}) 116 | methods_res_macro[unlist(lapply(methods_res_macro, is.null))] <- NULL 117 | 118 | methods_res_mal <- lapply(data_res, function(x){x$Malignant}) 119 | methods_res_mal[unlist(lapply(methods_res_mal, is.null))] <- NULL 120 | 121 | used <- which(grepl('GSE69104', names(degs_ls)) & grepl('TAM', names(degs_ls))) 122 | degs_ls_used <- degs_ls[used] 123 | result_macro <- getallmetrics(methods_res_macro, degs_ls_used) 124 | result_macro1 <- getallrecord(methods_res_macro, degs_ls_used) 125 | saveRDS(result_macro1, file = paste0('./Step8_LRTBenchResult/record/', data, '_macro.rds')) 126 | 127 | used <- which(grepl('GSE140145', names(degs_ls))| 128 | grepl('GSE116414', names(degs_ls))) 129 | degs_ls_used <- degs_ls[used] 130 | result_mal <- getallmetrics(methods_res_mal, degs_ls_used) 131 | result_mal1 <- getallrecord(methods_res_mal, degs_ls_used) 132 | saveRDS(result_mal1, file = paste0('./Step8_LRTBenchResult/record/', data, '_mal.rds')) 133 | 134 | result_macro$celltype <- 'macrophages' 135 | result_mal$celltype <- 'malignant' 136 | result <- rbind(result_macro, result_mal) 137 | 138 | } 139 | result 140 | }) 141 | names(result_index) <- names(all_result) 142 | saveRDS(result_index,file = './Step8_LRTBenchResult/result_index.rds') 143 | -------------------------------------------------------------------------------- /Step4_SIRSIForLRBench/function.R: -------------------------------------------------------------------------------- 1 | IDTransform <- function(data, method, result){ 2 | if(dim(result)[1]!=0){ 3 | if(grepl('CID', data) & method == 'RNAMagnet'){ 4 | genes <- readRDS('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/hs2mm_genes.rds') 5 | result <- mm2hs(result, genes) 6 | }else if(grepl('CK', data) & method == 'RNAMagnet'){ 7 | genes <- readRDS('~/1-Datasets/Nature_2022_MI/ScriptForCCC/hs2mm_genes.rds') 8 | result <- mm2hs(result, genes) 9 | }else if(grepl('Slide', data) & 10 | method %in% c('CellPhoneDB2', 'CellPhoneDB3', 'CellTalker', 'NATMI', 11 | 'ICELLNET', 'NicheNet', 'scMLnet', 'iTALK', 'cell2cell')){ 12 | genes <- readRDS('~/1-Datasets/MouseEmbryo_GSE166692/sciSpaceData/mm2hs.rds') 13 | result <- hs2mm(result, genes) 14 | } 15 | } 16 | return(result) 17 | } 18 | 19 | CalSI_1 <- function(a, b) { 20 | intersection = length(intersect(unique(a), unique(b))) 21 | #union = length(a) + length(b) - intersection 22 | union <- min(length(unique(a)), length(unique(b))) 23 | return (intersection/union) 24 | } 25 | 26 | CalSI_2 <- function(data){ 27 | methods <- c('CellPhoneDB2', 'CellPhoneDB3', 'CellTalker', 'Connectome', 28 | 'NATMI', 'ICELLNET', 'scConnect', 'CellChat', 'SingleCellSignalR', 29 | 'CellCall', 'scSeqComm', 'NicheNet', 'Domino', 'scMLnet', 'iTALK', 30 | 'cell2cell', 'RNAMagnet', 'PyMINEr', 'CytoTalk') 31 | 32 | # get result path 33 | if(grepl('CID', data)){ 34 | result.path <- paste0('./Data/Step1_LRPredictionResult/NG_BC_', data) 35 | }else if(grepl('CK', data)){ 36 | result.path <- paste0('./Data/Step1_LRPredictionResult/N_MI_', data) 37 | }else if(grepl('Slide', data)){ 38 | result.path <- paste0('./Data/Step1_LRPredictionResult/MouseEmbryo_', data) 39 | } 40 | 41 | SI_res <- lapply(methods, function(method1){ 42 | print(method1) 43 | if(method1 == 'scMLnet' | method1 == 'Domino'){ 44 | result1 <- readRDS(paste0(result.path, '/', method1, '/result1.rds')) 45 | }else{ 46 | result1 <- readRDS(paste0(result.path, '/', method1, '/result.rds')) 47 | } 48 | 49 | result1 <- result1$result 50 | result1 <- IDTransform(data, method1, result1) 51 | temp.si <- lapply(methods, function(method2){ 52 | if(method2 == 'scMLnet' | method2 == 'Domino'){ 53 | result2 <- readRDS(paste0(result.path, '/', method2, '/result1.rds')) 54 | }else{ 55 | result2 <- readRDS(paste0(result.path, '/', method2, '/result.rds')) 56 | } 57 | 58 | result2 <- result2$result 59 | result2 <- IDTransform(data, method2, result2) 60 | if(dim(result1)[1]==0 | dim(result2)[1]==0){ 61 | tmp.si <- NA 62 | }else{ 63 | tmp.si <- CalSI_1(as.character(result1$all), as.character(result2$all)) 64 | } 65 | 66 | tmp.si 67 | }) 68 | temp.si <- data.frame(SI = unlist(temp.si), row.names = methods) 69 | temp.si 70 | }) 71 | SI_res <- do.call(cbind, SI_res) 72 | colnames(SI_res) <- methods 73 | #SI_res <- SI_res[!apply(SI_res, 1, function(x){all(is.na(x))}), ] 74 | #SI_res <- SI_res[, !apply(SI_res, 2, function(x){all(is.na(x))})] 75 | return(SI_res) 76 | } 77 | 78 | CalRSI <- function(data){ 79 | # remove cell2cell 80 | methods <- c('CellPhoneDB2', 'CellPhoneDB3', 'CellTalker', 'Connectome', 81 | 'NATMI', 'ICELLNET', 'scConnect', 'CellChat', 'SingleCellSignalR', 82 | 'CellCall', 'scSeqComm', 'NicheNet', 'Domino', 'scMLnet', 'iTALK', 83 | 'cell2cell', 'RNAMagnet', 'PyMINEr', 'CytoTalk') 84 | 85 | # get result path 86 | if(grepl('CID', data)){ 87 | result.path <- paste0('./Data/Step1_LRPredictionResult/NG_BC_', data) 88 | }else if(grepl('CK', data)){ 89 | result.path <- paste0('./Data/Step1_LRPredictionResult/N_MI_', data) 90 | }else if(grepl('Slide', data)){ 91 | result.path <- paste0('./Data/Step1_LRPredictionResult/MouseEmbryo_', data) 92 | } 93 | 94 | RSI_res <- lapply(methods, function(method1){ 95 | print(paste0('Method1: ', method1)) 96 | if(method1 == 'scMLnet' | method1 == 'Domino'){ 97 | result1 <- readRDS(paste0(result.path, '/', method1, '/result1.rds')) # recode the LR score 98 | }else{ 99 | result1 <- readRDS(paste0(result.path, '/', method1, '/result.rds')) 100 | } 101 | 102 | result1 <- as.data.frame(result1$result) 103 | result1$LRscore <- as.numeric(result1$LRscore) 104 | result1 <- IDTransform(data, method1, result1) 105 | if(method1 == 'CytoTalk'){ 106 | result1$rank <- rank(result1$LRscore) 107 | }else{ 108 | result1$rank <- rank(-result1$LRscore) 109 | } 110 | 111 | temp.rsi <- lapply(methods, function(method2){ 112 | print(paste0('Method2: ', method2)) 113 | if(method2 == 'scMLnet' | method2 == 'Domino'){ 114 | result2 <- readRDS(paste0(result.path, '/', method2, '/result1.rds')) 115 | }else{ 116 | result2 <- readRDS(paste0(result.path, '/', method2, '/result.rds')) 117 | } 118 | 119 | result2 <- as.data.frame(result2$result) 120 | result2$LRscore <- as.numeric(result2$LRscore) 121 | result2 <- IDTransform(data, method2, result2) 122 | if(method2 == 'CytoTalk'){ 123 | result2$rank <- rank(result2$LRscore) 124 | }else{ 125 | result2$rank <- rank(-result2$LRscore) 126 | } 127 | 128 | if(dim(result1)[1]==0 | dim(result2)[1]==0){ 129 | tmp.rsi <- NA 130 | }else{ 131 | overlap.lr <- intersect(result1$all, result2$all) 132 | if(length(overlap.lr)!=0){ 133 | rank1 <- result1[which(result1$all %in% overlap.lr),]$rank/dim(result1)[1] 134 | rank2 <- result2[which(result2$all %in% overlap.lr), ]$rank/dim(result2)[1] 135 | mean.rank <- mean(abs(rank1-rank2)) 136 | tmp.rsi <- 1-mean.rank 137 | }else{tmp.rsi <- 0} 138 | } 139 | tmp.rsi 140 | }) 141 | temp.rsi <- data.frame(RSI = unlist(temp.rsi), row.names = methods) 142 | temp.rsi 143 | }) 144 | RSI_res <- do.call(cbind, RSI_res) 145 | colnames(RSI_res) <- methods 146 | #RSI_res <- RSI_res[!apply(RSI_res, 1, function(x){all(is.na(x))}), ] 147 | #RSI_res <- RSI_res[, !apply(RSI_res, 2, function(x){all(is.na(x))})] 148 | 149 | return(RSI_res) 150 | } 151 | -------------------------------------------------------------------------------- /Step5_BenchBasedCAGEProteomic/ScriptForProteomics.R: -------------------------------------------------------------------------------- 1 | rm(list = ls());gc() 2 | suppressMessages(library(data.table)) 3 | suppressMessages(library(tidyverse)) 4 | suppressMessages(library(ROCR)) 5 | source('./Script/Step3_MIForLRBench/function.R') 6 | source('./Script/Step8_BenchBasedCAGEProteomic/function.R') 7 | set.seed(123) 8 | 9 | # process proteomics data 10 | if(F){ 11 | proteinGroup <- fread('~/1-Datasets/Proteomics_28/Data/proteinGroups.txt') 12 | pep <- proteinGroup[, c(7,549:816)] 13 | rm(proteinGroup); gc() 14 | 15 | pep <- pep[which(pep$Gene.names != ''), ] 16 | # pep 17 | if(F){ 18 | colnames(pep) <- gsub('Peptides.', '', colnames(pep)) 19 | pep <- as.data.frame(pep) 20 | pep <- pep[, c(1, which(grepl('steady-state', colnames(pep))))] 21 | pep <- pep[, which(!grepl('Library_single', colnames(pep)))] 22 | colnames(pep) <- gsub(' \\(steady-state\\)', '', colnames(pep)) 23 | 24 | colnames(pep)[1] <- 'genes' 25 | pep <- tidyr::separate_rows(pep, genes, sep = ';') 26 | pep <- aggregate(.~genes, data = pep, FUN=mean) %>% 27 | tibble::column_to_rownames(., 'genes') %>% 28 | as.matrix() %>% t() %>% as.data.frame() 29 | 30 | pep$celltype <- gsub('_[0-9]+', '', rownames(pep)) 31 | pep <- aggregate(.~celltype, data = pep, FUN = mean) %>% 32 | tibble::column_to_rownames(., 'celltype') %>% 33 | as.matrix() %>% t() %>% as.data.frame() 34 | 35 | celltype <- c('MO.classical', 'MO.nonclassical', 'mDC', 'pDC', 36 | 'B.naive', 'B.memory', 37 | 'T8.naive', 'T8.CM', 'T8.EM', 'T8.EMRA', 38 | 'T4.CM', 'T4.EM', 'T4.naive', 39 | 'NK.dim', 'NK.bright', 'Thrombocyte' 40 | ) 41 | 42 | pep <- pep[ , which(colnames(pep) %in% celltype)] 43 | 44 | pep <- as.matrix(pep) %>% t() %>% as.data.frame() 45 | pep$celltype <- c('B', 'B', 'mDC', 'CD14Mono', 'CD16Mono', 46 | 'NK', 'NK', 'pDC', 'memoryCD4T', 'memoryCD4T', 47 | 'naiveCD4T', 'CD8T', 'CD8T', 'CD8T', 'CD8T', 'Platelet') 48 | pep <- aggregate(.~celltype, data = pep, FUN = mean) %>% 49 | tibble::column_to_rownames(., 'celltype') %>% 50 | as.matrix() %>% t() %>% as.data.frame() 51 | 52 | pep_genes <- lapply(1:dim(pep)[[2]], function(i){ 53 | x <- pep[,i] 54 | genes <- rownames(pep)[which(x>=2)] 55 | paste(genes, colnames(pep)[i], sep = '_') 56 | }) 57 | pep_genes <- unlist(pep_genes) 58 | pep_genes_list <- list(all = rownames(pep), genes_ct = pep_genes) 59 | saveRDS(pep_genes_list, 60 | file = '~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data/Step5_BenchBasedCAGEProteomic/pepdata.rds') 61 | } 62 | } 63 | 64 | rm(list = ls());gc() 65 | #calculate index -- peptides 66 | if(F){ 67 | pep_genes <- readRDS('./Data/Step8_BenchBasedCAGEProteomic/pepdata.rds') 68 | 69 | datasets <- c('pbmc4k', 'pbmc6k', 'pbmc8k') 70 | 71 | for (data in datasets) { 72 | fpath <- paste0('./Data/Step1_LRPredictionResult/', data) 73 | result.files <- list.files(fpath, full.names = TRUE) 74 | result.files <- result.files[-which(grepl('cellinker', result.files))] 75 | result_index <- lapply(result.files, function(result.file){ 76 | print(result.file) 77 | if(grepl('scMLnet', result.file) | grepl('Domino', result.file)){ 78 | result <- readRDS(paste0(result.file, '/result1.rds')) 79 | }else{ 80 | result <- readRDS(paste0(result.file, '/result.rds')) 81 | } 82 | result <- result$result 83 | 84 | if((dim(result)[[1]] != 0) & 'LRscore' %in% colnames(result)){ 85 | if(grepl('RNAMagnet', result.file)){ 86 | genes <- readRDS(paste0('~/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/', data, '_hs2mm.rds')) 87 | result <- mm2hs(result, genes) 88 | } 89 | 90 | result$receptor_ct <- lapply(1:dim(result)[[1]], function(i){ 91 | receptor <- result$Receptor[i] 92 | if(grepl('&', receptor)){ 93 | receptor <- unlist(strsplit(receptor, '&')) 94 | } 95 | receiver <- result$Receiver[i] 96 | tmp <- paste(receptor, receiver, sep = '_') 97 | label <- ifelse(all(tmp %in% pep_genes$genes_ct), TRUE, FALSE) 98 | if(!label){ 99 | label <- ifelse(all(receptor %in% pep_genes$all), FALSE, NA) 100 | } 101 | label 102 | }) %>% unlist() 103 | 104 | result$ligand_ct <- lapply(1:dim(result)[[1]], function(i){ 105 | ligand <- result$Ligand[i] 106 | if(grepl('&', ligand)){ 107 | ligand <- unlist(strsplit(ligand, '&')) 108 | } 109 | sender <- result$Sender[i] 110 | tmp <- paste(ligand, sender, sep = '_') 111 | label <- ifelse(all(tmp %in% pep_genes$genes_ct), TRUE, FALSE) 112 | if(!label){ 113 | label <- ifelse(all(ligand %in% pep_genes$all), FALSE, NA) 114 | } 115 | label 116 | }) %>% unlist() 117 | 118 | result <- result[which(!is.na(result$receptor_ct)), ] 119 | result <- result[which(!is.na(result$ligand_ct)),] 120 | 121 | result$label <- lapply(1:dim(result)[[1]], function(i){ 122 | all(result$ligand_ct[i] & result$receptor_ct[i]) 123 | }) %>% unlist() 124 | 125 | result$sr <- paste(result$Sender, result$Receiver, sep = '_') 126 | result <- result[, c('LRscore', 'label', 'sr')] 127 | result <- split(result, result$sr) 128 | 129 | res_index <- lapply(result, function(res){ 130 | index <- get_evaluate_metrics(as.numeric(res$LRscore), res$label) 131 | index <- index$perf_metrics 132 | }) 133 | res_index <- do.call(rbind, res_index) 134 | res_index <- as.data.frame(res_index) 135 | 136 | }else{ 137 | res_index <- NA 138 | } 139 | res_index 140 | }) 141 | 142 | names(result_index) <- substring(result.files, 40) 143 | result_index[which(is.na(result_index))] <- NULL 144 | result_index <- do.call(rbind, result_index) 145 | result_index <- result_index[which(rowSums(result_index)!=0), ] 146 | result_index <- tibble::rownames_to_column(result_index, 'temp') 147 | result_index <- tidyr::separate(result_index, temp, c('methods', 'sr'), '\\.') 148 | result_index$dataset <- data 149 | result_index 150 | saveRDS(result_index, file = paste0('./Data/Step5_BenchBasedCAGEProteomic/', data, '_pep.rds')) 151 | } 152 | } 153 | -------------------------------------------------------------------------------- /Step9_LRTBench/function.R: -------------------------------------------------------------------------------- 1 | getallmetrics <- function(methods_res, degs_ls){ 2 | result <- lapply(methods_res, function(res){ 3 | degs.tmp <- lapply(names(degs_ls), function(degs_names){ 4 | 5 | key <- strsplit(degs_names,'_')[[1]][2] 6 | type <- ifelse(grepl('KO',degs_names),'Receptor','Ligand') 7 | 8 | res$regulon <- as.character(res$regulon) 9 | res$target <- as.character(res$target) 10 | res$value <- as.numeric(res$value) 11 | 12 | if(grepl('NicheNet', res$methods[1]) & type == 'Receptor'){ 13 | if(key == 'AXL'){ 14 | score <- res[which(res$regulon=='GAS6'),] 15 | }else if(key == 'CXCR4'){ 16 | score <- res[which(res$regulon=='CXCL12'),] 17 | }else if(key == 'NRP1'){ 18 | score <- res[which(res$regulon=='VEGFA'),] 19 | }else if(key == 'CSF1R'){ 20 | score <- res[which(res$regulon=='CSF1'),] 21 | }else if(key == 'FGFR1'){ 22 | score <- res[which(res$regulon=='FGF1'),] 23 | }else if(key == 'ALK'){ 24 | score <- res[which(res$regulon %in% c('ALKAL1', 'ALKAL2')),] 25 | if(dim(score)[[1]]!=0){ 26 | score <- aggregate(value~target, score, sum) 27 | } 28 | } 29 | }else{ 30 | score <- res %>% filter(regulon == key & type == type) 31 | } 32 | 33 | if(grepl('GSE181575',degs_names)){ 34 | neg.genes <- setdiff(score$target, names(degs_ls[[degs_names]])) 35 | isDEGs <- rep(FALSE, length(neg.genes)) 36 | names(isDEGs) <- neg.genes 37 | degs_ls_used <- append(degs_ls[[degs_names]], isDEGs) 38 | }else{ 39 | degs_ls_used <- degs_ls[[degs_names]] 40 | } 41 | 42 | genes <- intersect(score$target, names(degs_ls_used)) 43 | label <- degs_ls_used[genes] 44 | pred <- score$value[match(genes,score$target)] 45 | res.tmp <- get_evaluate_metrics(pred,label) 46 | res.tmp <- res.tmp$perf_metrics 47 | 48 | if(length(names(table(label))) == 2){ 49 | AUPRCRatios <- res.tmp[2]/(as.numeric(table(label)['TRUE'])/length(label)) 50 | names(AUPRCRatios) <- 'AUPRCRatios' 51 | }else{ 52 | AUPRCRatios <- NA 53 | names(AUPRCRatios) <- 'AUPRCRatios' 54 | } 55 | res.tmp <- append(res.tmp, AUPRCRatios) 56 | res.tmp 57 | 58 | }) 59 | names(degs.tmp) <- names(degs_ls) 60 | degs.tmp <- do.call(rbind, degs.tmp) %>% 61 | as.data.frame() %>% 62 | tibble::rownames_to_column(., 'celllines') 63 | degs.tmp 64 | }) 65 | result <- do.call(rbind, result) %>% as.data.frame() %>% 66 | tibble::rownames_to_column(., 'methods') 67 | result$methods <- gsub('\\.[0-9]+', '', result$methods) 68 | return(result) 69 | } 70 | 71 | getallrecord <- function(methods_res, degs_ls){ 72 | result1 <- lapply(methods_res, function(res){ 73 | degs.tmp <- lapply(names(degs_ls), function(degs_names){ 74 | 75 | key <- strsplit(degs_names,'_')[[1]][2] 76 | type <- ifelse(grepl('KO',degs_names),'Receptor','Ligand') 77 | 78 | res$regulon <- as.character(res$regulon) 79 | res$target <- as.character(res$target) 80 | res$value <- as.numeric(res$value) 81 | 82 | if(grepl('NicheNet', res$methods[1]) & type == 'Receptor'){ 83 | if(key == 'AXL'){ 84 | score <- res[which(res$regulon=='GAS6'),] 85 | }else if(key == 'CXCR4'){ 86 | score <- res[which(res$regulon=='CXCL12'),] 87 | }else if(key == 'NRP1'){ 88 | score <- res[which(res$regulon=='VEGFA'),] 89 | }else if(key == 'CSF1R'){ 90 | score <- res[which(res$regulon=='CSF1'),] 91 | }else if(key == 'FGFR1'){ 92 | score <- res[which(res$regulon=='FGF1'),] 93 | }else if(key == 'ALK'){ 94 | score <- res[which(res$regulon %in% c('ALKAL1', 'ALKAL2')),] 95 | if(dim(score)[[1]]!=0){ 96 | score <- aggregate(value~target, score, sum) 97 | } 98 | } 99 | }else{ 100 | score <- res %>% filter(regulon == key & type == type) 101 | } 102 | 103 | if(grepl('GSE181575',degs_names)){ 104 | neg.genes <- setdiff(score$target, names(degs_ls[[degs_names]])) 105 | isDEGs <- rep(FALSE, length(neg.genes)) 106 | names(isDEGs) <- neg.genes 107 | degs_ls_used <- append(degs_ls[[degs_names]], isDEGs) 108 | }else{ 109 | degs_ls_used <- degs_ls[[degs_names]] 110 | } 111 | 112 | genes <- intersect(score$target, names(degs_ls_used)) 113 | label <- degs_ls_used[genes] 114 | pred <- score$value[match(genes,score$target)] 115 | 116 | res.tmp <- cbind(label, pred) %>% as.data.frame() %>% tibble::rownames_to_column(., 'genes') 117 | }) 118 | names(degs.tmp) <- names(degs_ls) 119 | degs.tmp 120 | }) 121 | return(result1) 122 | } 123 | 124 | RunTimeMemRecord <- function(wd, data, methods){ 125 | # get result path 126 | result.path <- paste0(wd, data) 127 | 128 | dataset_Record <- lapply(methods, function(method){ 129 | print(method) 130 | 131 | record <- read.table(paste0(result.path, '/', method, '/', 'TimeMemRecord.txt'), 132 | header = FALSE, sep = '\t', fill = TRUE) 133 | record <- record[, 2, drop = FALSE] 134 | if(dim(record)[1]==24){ 135 | record <- record[-1, , drop = FALSE] 136 | } 137 | record <- record[c(2,3,4,5,10),, drop = FALSE] 138 | 139 | 140 | # Linux_time: user+system time 141 | user_time <- as.numeric(gsub('User time \\(seconds\\): ', '', record[1,])) 142 | sys_time <- as.numeric(gsub('System time \\(seconds\\): ', '', record[2,])) 143 | time_linux <- user_time+sys_time 144 | 145 | # Clock_time 146 | clock_time <- gsub('Elapsed \\(wall clock\\) time \\(h:mm:ss or m:ss\\): ', '', record[4,]) 147 | if(stringr::str_count(clock_time, ':') == 1){ 148 | min_time <- as.numeric(unlist(stringr::str_split(clock_time, ':'))[1]) 149 | min_time <- min_time*60 150 | sec_time <- as.numeric(unlist(stringr::str_split(clock_time, ':'))[2]) 151 | clock_time <- min_time+sec_time; rm(sec_time, min_time) 152 | }else if(stringr::str_count(clock_time, ':') == 2){ 153 | h_time <- as.numeric(unlist(stringr::str_split(clock_time, ':'))[1]) 154 | h_time <- h_time*60*60 155 | min_time <- as.numeric(unlist(stringr::str_split(clock_time, ':'))[2]) 156 | min_time <- min_time*60 157 | sec_time <- as.numeric(unlist(stringr::str_split(clock_time, ':'))[3]) 158 | clock_time <- h_time+min_time+sec_time; rm(sec_time, min_time, h_time) 159 | } 160 | 161 | # Max memory usage | CPU_used 162 | mem_linux <- gsub('Maximum resident set size \\(kbytes\\): ', '', record[5,]) 163 | mem_linux <- round(as.numeric(mem_linux)/1024/1024, 2) 164 | cpu_linux <- gsub('Percent of CPU this job got: ', '', record[3,]) 165 | 166 | return(data.frame(clock_time = clock_time, linux_time = time_linux, 167 | max_memory = mem_linux, cpu_linux = cpu_linux)) 168 | }) 169 | names(dataset_Record) <- methods 170 | return(dataset_Record) 171 | } -------------------------------------------------------------------------------- /Step3_MIForLRBench/function.R: -------------------------------------------------------------------------------- 1 | EvaIndex1_1 <- function(CloDist, norm.data, result, cp, choose, logic){ 2 | result.tmp <- result[which(result$sr == cp), ] 3 | if(!logic){ 4 | cp.1 <- unlist(stringr::str_split(cp, "_"))[1] 5 | cp.2 <- unlist(stringr::str_split(cp, "_"))[2] 6 | cp <- paste(cp.2, cp.1, sep = "_") 7 | } 8 | 9 | if(choose == "close"){ 10 | distant.tmp <- CloDist[[cp]]$close 11 | }else if(choose == 'distant'){ 12 | distant.tmp <- CloDist[[cp]]$distant 13 | } 14 | 15 | tmp.result <- lapply(1:dim(result.tmp)[1], function(n){ 16 | #print(n) 17 | ligands <- result.tmp[n,"Ligand"] 18 | ligands <- unlist(stringr::str_split(ligands, "&")) 19 | 20 | receptors <- result.tmp[n, "Receptor"] 21 | receptors <- unlist(stringr::str_split(receptors, "&")) 22 | 23 | tmp.lr <- expand.grid(ligands, receptors) 24 | colnames(tmp.lr) <- c('ligand', 'receptor') 25 | 26 | temp.result <- lapply(1:dim(tmp.lr)[1], function(m){ 27 | ligand <- tmp.lr[m, "ligand"] 28 | receptor <- tmp.lr[m, "receptor"] 29 | 30 | if((ligand %in% rownames(norm.data)) & (receptor %in% rownames(norm.data))){ 31 | if(logic){ 32 | value.1 <- norm.data[ligand, distant.tmp$cell.1] 33 | value.2 <- norm.data[receptor, distant.tmp$cell.2] 34 | }else{ 35 | value.1 <- norm.data[ligand, distant.tmp$cell.2] 36 | value.2 <- norm.data[receptor, distant.tmp$cell.1] 37 | } 38 | 39 | if(sum(value.1)!=0 & sum(value.2)!=0){ 40 | tmp.sender.mat <- data.frame(value.1 = value.1, barcode.1 = names(value.1)) 41 | tmp.reciever.mat <- data.frame(value.2 = value.2, barcode.2 = names(value.2)) 42 | data.tmp = cbind(tmp.sender.mat, tmp.reciever.mat) 43 | data.tmp <- data.tmp[,c("value.1", "value.2")] 44 | rm(value.1, value.2, tmp.reciever.mat, tmp.sender.mat) 45 | 46 | ## calculate the mutual information 47 | data.disc <- infotheo::discretize(data.tmp) 48 | tmp.mi <- infotheo::mutinformation(data.disc[,1],data.disc[,2]) 49 | 50 | ## calculate the Pearson Correlation Coefficient 51 | pcc.tmp <- cor.test(data.tmp[,1], data.tmp[,2], method = "pearson") 52 | tmp.pcc <- pcc.tmp$estimate[[1]] 53 | tmp.pcc.p <- pcc.tmp$p.value 54 | tmp <- c(tmp.mi, tmp.pcc, tmp.pcc.p) 55 | 56 | tmp 57 | }else{ 58 | tmp.mi <- NA 59 | tmp.pcc <- NA 60 | tmp.pcc.p <- NA 61 | tmp <- c(tmp.mi, tmp.pcc, tmp.pcc.p) 62 | tmp <- as.data.frame(t(tmp)) 63 | tmp 64 | } 65 | }else{ 66 | tmp.mi <- NA 67 | tmp.pcc <- NA 68 | tmp.pcc.p <- NA 69 | tmp <- c(tmp.mi, tmp.pcc, tmp.pcc.p) 70 | tmp <- as.data.frame(t(tmp)) 71 | tmp 72 | } 73 | 74 | tmp 75 | 76 | }) 77 | names(temp.result) <- paste(tmp.lr$ligand, tmp.lr$receptor, sep = "_") 78 | #temp.result[which(is.na(temp.result))] <- NULL 79 | temp.result <- do.call(rbind, temp.result) 80 | temp.result <- as.data.frame(temp.result) 81 | temp.result <- tibble::rownames_to_column(temp.result, 'lr') 82 | 83 | temp.result 84 | }) 85 | 86 | bench.result <- do.call(rbind, tmp.result) 87 | 88 | bench.result 89 | } 90 | 91 | EvaIndex1_2 <- function(CloDist, norm.data, result){ 92 | result.bench <- lapply(unique(result$sr), function(cp){ 93 | print(cp) 94 | if(cp %in% names(CloDist)){ 95 | close.bench <- EvaIndex1_1(CloDist, norm.data, result, cp, "close", TRUE) 96 | distant.bench <- EvaIndex1_1(CloDist, norm.data, result, cp, "distant", TRUE) 97 | identical(distant.bench$lr, close.bench$lr) 98 | tmp.result <- cbind(close.bench, distant.bench) 99 | tmp.result <- tmp.result[,-5] 100 | colnames(tmp.result) <- c("lr", "closeMI", "closePCC", "closePCCPvalue", 101 | "distantMI", "distantPCC","distantPCCPvalue") 102 | }else{ 103 | close.bench <- EvaIndex1_1(CloDist, norm.data, result, cp, "close", FALSE) 104 | distant.bench <- EvaIndex1_1(CloDist, norm.data, result, cp, "distant", FALSE) 105 | tmp.result <- cbind(close.bench, distant.bench) 106 | tmp.result <- tmp.result[,-5] 107 | colnames(tmp.result) <- c("lr","closeMI", "closePCC", "closePCCPvalue", 108 | "distantMI", "distantPCC","distantPCCPvalue") 109 | 110 | } 111 | tmp.result <- as.data.frame(tmp.result) 112 | tmp.result 113 | }) 114 | names(result.bench) <- unique(result$sr) 115 | result.bench 116 | } 117 | 118 | 119 | mm2hs <- function(result, genes){ 120 | genes <- genes[, c('mouse', 'human')] 121 | 122 | result <- merge(result, genes, by.x = 'Ligand', by.y = 'mouse', all.x = TRUE) 123 | multi <- which(is.na(result$human) & grepl('&', result$Ligand)) 124 | if(length(multi)>0){ 125 | result$human[multi] <- unlist(lapply(stringr::str_split(result$Ligand[multi], '&'), function(x){ 126 | tmp <- unlist(lapply(seq(x), function(y){genes$human[which(genes$mouse %in% x[y])]})) 127 | paste(tmp, collapse = '&') 128 | })) 129 | } 130 | result <- result[!(is.na(result$human) & !grepl('&', result$Ligand)), ] 131 | result$Ligand <- NULL 132 | colnames(result)[which(colnames(result) == 'human')] <- 'Ligand' 133 | 134 | result <- merge(result, genes, by.x = 'Receptor', by.y = 'mouse', all.x = TRUE) 135 | multi <- which(is.na(result$human) & grepl('&', result$Receptor)) 136 | if(length(multi)>0){ 137 | result$human[multi] <- unlist(lapply(stringr::str_split(result$Receptor[multi], '&'), function(x){ 138 | tmp <- unlist(lapply(seq(x), function(y){genes$human[which(genes$mouse %in% x[y])]})) 139 | paste(tmp, collapse = '&') 140 | })) 141 | } 142 | result <- result[!(is.na(result$human) & !grepl('&', result$Receptor)), ] 143 | result$Receptor <- NULL 144 | colnames(result)[which(colnames(result) == 'human')] <- 'Receptor' 145 | result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_') 146 | 147 | return(result) 148 | } 149 | 150 | hs2mm <- function(result, genes){ 151 | genes <- genes[, c('mouse', 'human')] 152 | 153 | result <- merge(result, genes, by.x = 'Ligand', by.y = 'human', all.x = TRUE) 154 | multi <- which(is.na(result$mouse) & grepl('&', result$Ligand)) 155 | if(length(multi)>0){ 156 | result$mouse[multi] <- unlist(lapply(stringr::str_split(result$Ligand[multi], '&'), function(x){ 157 | tmp <- unlist(lapply(seq(x), function(y){genes$mouse[which(genes$human %in% x[y])]})) 158 | paste(tmp, collapse = '&') 159 | })) 160 | } 161 | result <- result[!(is.na(result$mouse) & !grepl('&', result$Ligand)), ] 162 | result$Ligand <- NULL 163 | colnames(result)[which(colnames(result) == 'mouse')] <- 'Ligand' 164 | 165 | result <- merge(result, genes, by.x = 'Receptor', by.y = 'human', all.x = TRUE) 166 | multi <- which(is.na(result$mouse) & grepl('&', result$Receptor)) 167 | if(length(multi)>0){ 168 | result$mouse[multi] <- unlist(lapply(stringr::str_split(result$Receptor[multi], '&'), function(x){ 169 | tmp <- unlist(lapply(seq(x), function(y){genes$mouse[which(genes$human %in% x[y])]})) 170 | paste(tmp, collapse = '&') 171 | })) 172 | } 173 | result <- result[!(is.na(result$mouse) & !grepl('&', result$Receptor)), ] 174 | result$Receptor <- NULL 175 | colnames(result)[which(colnames(result) == 'mouse')] <- 'Receptor' 176 | result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_') 177 | 178 | return(result) 179 | } 180 | 181 | 182 | # further handle the results of MI 183 | Eval1Process <- function(result.path){ 184 | Eval1 <- readRDS(result.path) 185 | Eval1[which(is.na(Eval1))] <- NULL 186 | Eval1 <- lapply(Eval1, function(methods_eval){ 187 | perc_temp <- lapply(methods_eval, function(perc_eval){ 188 | perc_tmp <- do.call(rbind, perc_eval) 189 | perc_tmp <- tibble::rownames_to_column(perc_tmp, 'sr') 190 | perc_tmp$sr <- gsub('\\.[0-9]+', '', perc_tmp$sr) 191 | perc_tmp 192 | }) 193 | names(perc_temp) <- c(10, 20, 30, 40) 194 | perc_temp <- do.call(rbind, perc_temp) 195 | perc_temp <- tibble::rownames_to_column(perc_temp, 'perc') 196 | perc_temp$perc <- gsub('\\.[0-9]+', '', perc_temp$perc) 197 | perc_temp 198 | }) 199 | Eval1 <- do.call(rbind, Eval1) 200 | Eval1 <- tibble::rownames_to_column(Eval1, 'methods') 201 | Eval1$methods <- gsub('\\.[0-9]+', '', Eval1$methods) 202 | allna <- apply(Eval1[, c("closeMI", "closePCC", "distantMI", "distantPCC")], 203 | 1, FUN = function(x){any(is.na(x))}) 204 | Eval1 <- Eval1[which(!allna), ] 205 | Eval1$all <- paste(Eval1$methods, Eval1$perc, Eval1$sr, Eval1$lr, sep = '_') 206 | Eval1 <- dplyr::distinct(Eval1, all, .keep_all = TRUE) 207 | 208 | return(Eval1) 209 | } 210 | 211 | # Calculate the DLRC index of each method —— p.value(whether significant?) + median + weight 212 | EvalIndex_DLRC <- function(Eval1Result){ 213 | Eval1Result$class <- paste(Eval1Result$methods, Eval1Result$perc, sep = '_') 214 | 215 | temp <- lapply(unique(Eval1Result$class), function(class){ 216 | result_sub <- Eval1Result[which(Eval1Result$class == class),] 217 | medianMI_close <- median(result_sub$closeMI) 218 | medianMI_distant <- median(result_sub$distantMI) 219 | medianPCC_close <- median(abs(result_sub$closePCC)) 220 | medianPCC_distant <- median(abs(result_sub$distantPCC)) 221 | PCC_pval <- wilcox.test (result_sub$closePCC, result_sub$distantPCC, 222 | alternative = "greater")$p.value 223 | MI_pval <- wilcox.test (result_sub$closeMI, result_sub$distantMI, 224 | alternative = "greater")$p.value 225 | return(data.frame(medianMI_close = medianMI_close, medianMI_distant = medianMI_distant, MI_pval = MI_pval, 226 | medianPCC_close = medianPCC_close, medianPCC_distant = medianPCC_distant, PCC_pval = PCC_pval)) 227 | }) 228 | names(temp) <- unique(Eval1Result$class) 229 | temp <- do.call(rbind, temp) 230 | temp <- tibble::rownames_to_column(temp, 'class') 231 | temp <- tidyr::separate(temp, class, c('methods', 'perc'), '_') 232 | #temp <- temp[-which(temp$perc == 40),] 233 | 234 | tmp <- lapply(unique(temp$methods), function(method){ 235 | result_sub <- temp[which(temp$methods == method), ] 236 | if(dim(result_sub)[1]==4){ 237 | result_sub$MIIndex <- (0.5-as.numeric(result_sub$perc)*0.01)* 238 | ifelse(result_sub$MI_pval<0.05, 1, 0)* 239 | (result_sub$medianMI_close - result_sub$medianMI_distant) 240 | 241 | result_sub$PCCIndex <- (0.5-as.numeric(result_sub$perc)*0.01)* 242 | ifelse(result_sub$PCC_pval<0.05, 1, 0)* 243 | (result_sub$medianPCC_close - result_sub$medianPCC_distant) 244 | return(data.frame(MIIndex = sum(result_sub$MIIndex), PCCIndex = sum(result_sub$PCCIndex))) 245 | } 246 | }) 247 | names(tmp) <- unique(temp$methods) 248 | tmp <- do.call(rbind, tmp) 249 | tmp <- tibble::rownames_to_column(tmp, 'methods') 250 | tmp$MIRank <- rank(-tmp$MIIndex) 251 | tmp$PCCRank <- rank(-tmp$PCCIndex) 252 | return(tmp) 253 | } 254 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |  2 | ## Introduction 3 | 4 | We benchmark two types of CCC inference methods, one type of methods predict LR pairs based on scRNA-seq data, and another type of methods that can predict ligand/receptor-targets regulations. 5 | 6 | For the first benchmark, we evaluated the accuracy, stability and usability of 18 LR inference methods. **In term of accuracy**, paired ST datasets, CAGE expression/Proteomics data and sampled scRNA-seq datasets were used to benchmark the 18 methods. Firstly, 11 scRNA-seq datasets were used as input for methods to predict intercellular communication and the two defined similarity index (SI, modified Jaccard index) and rank-based similarity index (RSI) were used to compare the similarity of LR pairs predicted by methods.Furthermore, we benchmark the 18 methods using 11 paired ST datasets with the hypothesis that the values of mutual information (MI) of LR pairs are greater in the close group than that in the distant group. In addition, three PBMC datasets from 10X Genomics website were used as input for methods to predict LR pairs and CAGE expression/Proteomics data were used as pseudo gold standards to benchmark the 18 methods. **In term of stability**, we ramdomly sampled different ratios of cells in all the scRNA-seq, resulting 70 sampled datasets and 14 original datasets as input for methods. We calculated the Jaccard index of the LR pairs predicted based sampled datasets and original datasets and a stability value was defined to test the robustness of methods to sampling rates of scRNA-seq data. **In term of usability**, we recorded the running time and maximum memory usage of methods in all the 84 scRNA-seq datasets. 7 | 8 | For the second benchmark, 8 ST datasets were used as the input for 5 LR-Targets inference tools to predict ligand/receptor-targets regulations, and the cell line perturbation datasets were used for evaluation, involving knockout/mutant conditions for 5 receptors, and treatment conditions for 10 ligands. And the differentially expressed genes (DEGs) in each cell line perturbation dataset, were used as the ground truth of ligand/receptor-targets regulations. The score of ligand/receptor-targets predicted by different tools were compared to the differential expression status (DGEs or not DEGs) of corresponding targets to calculate AUROC and AUPRC. In addition, we also record the running time and maximum memory usage of methods in all the ST datasets. 9 | 10 | ## Workflow 11 |  12 | - **Step0\_LRToolsFunction** contains the R/Python/Shell scripts that package the running code of 19 methods with Seurat objects as input into function. 13 | - **Step1\_LRPredictionResult** contains the R/Shell scripts to run 19 methods for inferring LR pairs from the 14 scRNA-seq datasets. 14 | - **Step2\_PreSTForLRBench** contains the R scripts to get the different ratios (e.g.top 10%, 20%, 30%, 40%) of cell type specific close and distant cell pairs in each dataset for the preparation of the benchmarking using mutual infomation. 15 | - **Step3\_MIForLRBench** contains the R scripts to calculate MI of LR interactions predicted by methods in the different ratios of cell type specific close and distant groups and calculate DLRC index of methods in each dataset. 16 | - **Step4\_SIRSIForLRBench** contains the R scripts to benchmark the similarity (SI and RSI) of the LR interactions predicted by each two methods. 17 | - **Step5\_BenchBasedCAGEProteomic** contains the R scripts to benchmark the 18 LR inference methods using the CAGE expression and proteomics data. 18 | - **Step6\_LRBenchSampling** contains the R/Shell scripts to run the 18 LR inference methods for inferring LR pairs from 70 sampled scRNA-seq datasets. 19 | - **Step7\_LRBenchSamplingBench** contains the R/Shell scripts to calculate Jaccard index between the LR pairs predicted based on the sampled datasets and the original datasets, and record the running time and maximum memory usage of methods in each dataset. 20 | - **Step8\_LRTToolsFunction** contains the R/Python/Shell scripts to run the 5 LR-Target inference methods for predicting ligand/receptor-targets using ST datasets as input. 21 | - **Step9\_LRTBench** contains the R scripts to benchmark the 5 LR-Target inference methods using cell line perturbation datasets for evaluation, and record the running time and maximum memory usage of methods in each dataset. 22 | 23 | ## Datasets 24 | - **scRNA-seq and ST datasets** 25 | 26 | 27 |
28 || Tissue (Disease) | SampleID (scRNA-seq) | SampleID (ST) | Literature PMID | Download URL (scRNA-seq) | Download URL (ST) | Evaluation purpose | 32 |
|---|---|---|---|---|---|---|
| Heart Tissue (Health) | CK357 | control_P7 | 35948637 | URL | URL | LR interactions LR-Target regulations |
37 |
| CK358 | control_P8 | 40 ||||||
| Heart Tissue (ICM) | CK368 | FZ_GT_P19 | LR interactions | 43 ||||
| CK162 | FZ_GT_P4 | 46 ||||||
| CK362 | RZ_P11 | 49 ||||||
| Heart Tissue (AMI) | CK361 | IZ_P10 | 52 |||||
| CK161 | IZ_P3 | 55 ||||||
| CK165 | IZ_BZ_P2 | 58 ||||||
| Tumor Tissue (Breast cancer) | CID44971 | CID44971 | 34493872 | URL | URL | LR interactions LR-Target regulations |
61 |
| CID4465 | CID4465 | 64 ||||||
| Mouse embryo | —— | Slide14 | 34210887 | —— | URL | LR interactions | 67 |
| PBMC | PBMC4K | —— | —— | URL | —— | LR interactions | 70 |
| PBMC6K | —— | —— | URL | —— | 73 |||
| PBMC8K | —— | —— | URL | —— | 76 |||
| Tumor Tissue (Gliomas) | —— | UKF243_T_ST | 35700707 | —— | URL | LR-Target interactions | 79 |
| —— | UKF260_T_ST | 82 ||||||
| —— | UKF266_T_ST | 85 ||||||
| —— | UKF334_T_ST | 88 |
| Datasets | Ligand/Receptor | Type | Condition | Cell Line | Disease | 101 |
|---|---|---|---|---|---|
| GSE120268 | AXL | receptor | Knockdown | MDA-MB-231 | Breast Cancer | 106 |
| GSE157680 | NRP1 | receptor | Knockdown | MDA-MB-231 | 109 ||
| GSE15893 | CXCR4 | receptor | Mutant | MDA-MB-231 | 112 ||
| CXCL12 | ligand | Treatment | MDA-MB-231 | 115 |||
| GSE160990 | TGFB1 | ligand | Treatment | MDA-MB-231 | 118 ||
| GSE36051 | DLL4(1) | ligand | Treatment | MCF7 | 121 ||
| DLL4(2) | ligand | Treatment | MDA-MB-231 | 124 |||
| JAG1 | ligand | Treatment | MDA-MB-231 | 127 |||
| GSE65398 | IGF1(1) | ligand | Treatment | MCF7 | 130 ||
| GSE7561 | IGF1(2) | ligand | Treatment | MCF7 | 133 ||
| GSE69104 | CSF1R | receptor | Inhibit | TAMs | Gliomas | 136 |
| GSE116414 | FGFR1 | receptor | Inhibit | GSLC | 139 ||
| GSE206947 | EFNB2 | ligand | Treatment | cardiac fibroblasts | Health | 142 |
| GSE181575 | TGFB1 | ligand | Treatment | cardiac fibroblasts | 145 ||
| GSE123018 | TGFB1 | ligand | Treatment | cardiac fibroblasts | 148 |