├── Graphic-abstrct.png
├── Workflow-figure.png
├── Step0_LRToolsFunction
    ├── PyMINEr_shell.sh
    ├── scConnect_shell.sh
    ├── NATMI_shell.sh
    ├── CellPhoneDB3_shell.sh
    ├── CellPhoneDB2_shell.sh
    ├── cell2cell_shell.sh
    ├── Domino_shell2.sh
    ├── Domino_shell1.sh
    ├── scConnect_python.py
    ├── cell2cell_python.py
    ├── ScriptForAddLRscore.R
    └── Step1_LRToolsFunction.R
├── Step8_LRTToolsFunction
    ├── HoloNet_shell.sh
    ├── LRTPrediction_5tools.sh
    ├── RunSTScript.R
    ├── HoloNet.py
    └── Step8_LRTToolsFunction.R
├── Step3_MIForLRBench
    ├── Step2_ScriptForDLRC.R
    ├── Step1_ScriptForCalMI.R
    └── function.R
├── Step4_SIRSIForLRBench
    ├── ScriptForCalSIRSI.R
    └── function.R
├── Step6_LRBenchSampling
    ├── example.sh
    └── RunScript.R
├── Step9_LRTBench
    ├── Step2_ScriptForRecord.R
    ├── Step3_ScriptForTimeMen.R
    ├── Step1_ScriptForBench.R
    └── function.R
├── Step7_LRBenchSamplingBench
    ├── Step3_TimeMenRecord.R
    ├── Step2_JaccardIndex.R
    ├── function.R
    └── Step1_DatasetRecord.R
├── Step1_LRPredictionResult
    ├── LRPredition_19tools.sh
    └── RunScript.R
├── Step2_PreSTForLRBench
    ├── ScriptForCK.R
    ├── ScriptForCID.R
    ├── function.R
    └── ScriptForMouseEymbryo.R
├── Step5_BenchBasedCAGEProteomic
    ├── function.R
    ├── ScriptForCAGE.R
    └── ScriptForProteomics.R
└── README.md


/Graphic-abstrct.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SunXQlab/ESICCC/HEAD/Graphic-abstrct.png


--------------------------------------------------------------------------------
/Workflow-figure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SunXQlab/ESICCC/HEAD/Workflow-figure.png


--------------------------------------------------------------------------------
/Step0_LRToolsFunction/PyMINEr_shell.sh:
--------------------------------------------------------------------------------
1 | source /home/ljx/miniconda3/bin/activate pyminer
2 | pyminer.py -i $1 -manual_sample_groups $2 -rand_seed 123 -species $3
3 | 


--------------------------------------------------------------------------------
/Step0_LRToolsFunction/scConnect_shell.sh:
--------------------------------------------------------------------------------
1 | source /home/ljx/miniconda3/bin/activate cell2cell
2 | /home/ljx/miniconda3/envs/cell2cell/bin/python ../Script/Step0_LRToolsFunction/scConnect_python.py $1 $2 $3 $4


--------------------------------------------------------------------------------
/Step0_LRToolsFunction/NATMI_shell.sh:
--------------------------------------------------------------------------------
1 | cd ./Step0_SharedInfo/NATMI/
2 | /home/ljx/miniconda3/bin/python ExtractEdges.py --species $1 --emFile $2 --annFile $3 --interDB lrc2p --coreNum 10 --out $4 --interSpecies $1


--------------------------------------------------------------------------------
/Step0_LRToolsFunction/CellPhoneDB3_shell.sh:
--------------------------------------------------------------------------------
1 | source /home/ljx/miniconda3/bin/activate cpdb
2 | cellphonedb method degs_analysis $1 $2 $3 --threshold 0.05 --output-path $4 --threads 10 --counts-data gene_name --debug-seed 123


--------------------------------------------------------------------------------
/Step0_LRToolsFunction/CellPhoneDB2_shell.sh:
--------------------------------------------------------------------------------
1 | source /home/ljx/miniconda3/bin/activate cpdb2
2 | cellphonedb method statistical_analysis $1 $2 --threshold 0.05 --output-path $3 --threads 10 --counts-data gene_name --debug-seed 123
3 | 


--------------------------------------------------------------------------------
/Step8_LRTToolsFunction/HoloNet_shell.sh:
--------------------------------------------------------------------------------
1 | source /home/ljx/miniconda3/bin/activate HoloNet
2 | /home/ljx/miniconda3/envs/HoloNet/bin/python ~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Script/Step8_LRTToolsFunction/HoloNet.py $1 $2 $3 $4 $5


--------------------------------------------------------------------------------
/Step0_LRToolsFunction/cell2cell_shell.sh:
--------------------------------------------------------------------------------
1 | source /home/ljx/miniconda3/bin/activate cell2cell
2 | /home/ljx/miniconda3/envs/cell2cell/bin/python ~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Script/Step0_LRToolsFunction/cell2cell_python.py $1 $2 $3 $4


--------------------------------------------------------------------------------
/Step0_LRToolsFunction/Domino_shell2.sh:
--------------------------------------------------------------------------------
1 | source /home/ljx/miniconda3/bin/activate pyscenic
2 | 
3 | pyscenic grn --num_workers 30 -o $1 $2 $5 --seed 123
4 | 
5 | pyscenic ctx $1 $6 $7 --annotations_fname $8 --expression_mtx_fname $2 --mode "dask_multiprocessing" --output $3 --num_workers 30
6 | 
7 | pyscenic aucell $2 $3 -o $4 --num_workers 30 --seed 123


--------------------------------------------------------------------------------
/Step0_LRToolsFunction/Domino_shell1.sh:
--------------------------------------------------------------------------------
1 | source /home/ljx/miniconda3/bin/activate pyscenic
2 | 
3 | pyscenic grn --num_workers 30 -o $1 $2 $5 --seed 123
4 | 
5 | pyscenic ctx $1 $6 $7 $8 $9 ${10} ${11} --annotations_fname ${12} --expression_mtx_fname $2 --mode "dask_multiprocessing" --output $3 --num_workers 30
6 | 
7 | pyscenic aucell $2 $3 -o $4 --num_workers 30 --seed 123


--------------------------------------------------------------------------------
/Step3_MIForLRBench/Step2_ScriptForDLRC.R:
--------------------------------------------------------------------------------
 1 | rm(list = ls());gc()
 2 | source('./Script/Step3_MIForLRBench/function.R')
 3 | set.seed(123)
 4 | 
 5 | result.path <- list.files('./Data/Step3_MIPCCForLRBench', full.names = TRUE)
 6 | samples <- gsub('_result.rds', '', substring(result.path, 30))
 7 | 
 8 | tmp <- lapply(seq(result.path), function(i){
 9 |   Eval1Result <- Eval1Process(result.path[i])
10 |   Eval1Result <- EvalIndex_DLRC(Eval1Result)
11 |   Eval1Result
12 | })
13 | names(tmp) <- samples
14 | tmp <- do.call(rbind, tmp)
15 | tmp <- tibble::rownames_to_column(tmp, 'datasets')
16 | tmp$datasets <- gsub('\\.[0-9]+', '', tmp$datasets)


--------------------------------------------------------------------------------
/Step8_LRTToolsFunction/LRTPrediction_5tools.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | tools="NicheNet MISTy stMLnet HoloNet CytoTalk"
 3 | 
 4 | sampleID='CID44971 CID4465 control_P7 control_P8 UKF243_T_ST UKF260_T_ST UKF266_T_ST UKF334_T_ST'
 5 | 
 6 | for i in $sampleID;
 7 | do
 8 | fpath='./Step6_LRTPredictionResult/'$i"/"
 9 | 
10 | if [ ! -d $fpath ]
11 | then
12 | mkdir $fpath
13 | fi
14 | 
15 | for j in $tools;
16 | do
17 | fpathout=$fpath"/"$j"/"
18 | if [ ! -d $fpathout ]
19 | then
20 | mkdir $fpathout
21 | fi
22 | 
23 | if [[ !($j == 'CytoTalk') ]]
24 | then
25 | /usr/bin/time -v -o $fpathout"TimeMemRecord.txt" /usr/bin/Rscript ../Script/Step8_LRTToolsFunction/RunSTScript.R $j $i human;
26 | else
27 | /usr/bin/time -v -o $fpathout"TimeMemRecord.txt" /home/ljx/software/R-4.1.0/bin/Rscript ../Script/Step8_LRTToolsFunction/RunSTScript.R $j $i human;
28 | fi
29 | 
30 | done
31 | done
32 | 


--------------------------------------------------------------------------------
/Step4_SIRSIForLRBench/ScriptForCalSIRSI.R:
--------------------------------------------------------------------------------
 1 | rm(list = ls());gc()
 2 | suppressMessages(library(Seurat))
 3 | source('./Script/Step3_MIForLRBench/function.R')
 4 | source('./Script/Step4_SIRSIForLRBench/function.R')
 5 | set.seed(123)
 6 | 
 7 | datasets <- c('CID4465', 'CID44971', 'CK357', 'CK358',
 8 |               'CK368', 'CK162', 'CK362', 'CK361',
 9 |               'CK161', 'CK165', 'Slide14') 
10 | ck.dataset <- readRDS('~/1-Datasets/Nature_2022_MI/dataset_id.rds')
11 | 
12 | # Calculate SI
13 | for(data in datasets){
14 |   print(data)
15 |   SIResult <- CalSI_2(data)
16 |   saveRDS(SIResult, file = paste0('./Data/Step4_SIRSIForLRBench/', data, '_SIResult.rds'))
17 | }
18 | rm(SIResult, data);gc()
19 | 
20 | #Calculate RSI
21 | for (data in datasets) {
22 |   print(data)
23 |   RSIResult <- CalRSI(data)
24 |   saveRDS(RSIResult, file = paste0('./Data/Step4_SIRSIForLRBench/', data, '_RSIResult.rds'))
25 | }
26 | rm(RSIResult, data);gc()
27 | 


--------------------------------------------------------------------------------
/Step0_LRToolsFunction/scConnect_python.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | # In[1]:
 5 | 
 6 | 
 7 | import pandas as pd
 8 | import scanpy as sc
 9 | import scConnect as cn
10 | import matplotlib
11 | import matplotlib.pyplot as plt
12 | import sys
13 | import random
14 | random.seed(123)
15 | 
16 | counts_path = sys.argv[1]
17 | meta_path = sys.argv[2]
18 | species = sys.argv[3]
19 | output_path = sys.argv[4]
20 | adata = sc.read_csv(counts_path).T
21 | meta = pd.read_csv(meta_path,  index_col="Cell")
22 | adata.obs = meta
23 | 
24 | adata_tissue = cn.genecall.meanExpression(adata, groupby="Annotation", normalization=False, use_raw=False, transformation="log1p")
25 | adata_tissue = cn.connect.ligands(adata_tissue, organism=species)
26 | adata_tissue = cn.connect.receptors(adata_tissue, organism=species)
27 | adata_tissue = cn.connect.specificity(adata_tissue, n=100, groupby="Annotation", organism=species)
28 | 
29 | edges = cn.connect.interactions(emitter=adata_tissue, target=adata_tissue, self_reference=True, organism=species)
30 | nodes = cn.connect.nodes(adata_tissue)
31 | 
32 | edge = pd.DataFrame(edges)
33 | col_new = list(edge.iloc[0,2].keys())
34 | edge.rename(columns={0:'sender', 1:'reciever', 2:'info'}, inplace = True)
35 | for each in col_new:
36 |     edge[each] = edge["info"].map(lambda x:x[each])
37 |     
38 | edge.to_csv(output_path)
39 | 
40 | 


--------------------------------------------------------------------------------
/Step6_LRBenchSampling/example.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | tools="CellPhoneDB2 CellPhoneDB3 CellTalker Connectome NATMI ICELLNET scConnect CellChat SingleCellSignalR CellCall scSeqComm NicheNet Domino PyMINEr iTALK scMLnet"
 3 | 
 4 | sampleID='CID44971 CID4465 CK357 CK358 CK161 CK165 CK361 CK362 CK162 CK368 Slide14 pbmc4k pbmc6k pbmc8k'
 5 | ratios='90 80 70 60 50'
 6 | 
 7 | for i in $sampleID;
 8 | do
 9 | for ratio in $ratios;
10 | do
11 | 
12 | fpath='./Step10_LRBenchSamplingResult/'$i"_"$ratio"/"
13 | 
14 | if [ ! -d $fpath ]
15 | then
16 | mkdir $fpath
17 | fi
18 | for j in $tools;
19 | do
20 | fpathout=$fpath"/"$j"/"
21 | if [ ! -d $fpathout ]
22 | then
23 | mkdir $fpathout
24 | fi
25 | 
26 | if [[ "$i" =~ 'Slide'  && !($j == 'CytoTalk') ]]
27 | then
28 | /usr/bin/time -v -o $fpathout"TimeMemRecord.txt" /usr/bin/Rscript ../Script/Step10_LRBenchSampling/RunScript.R $j $i mouse $ratio;
29 | elif [[ "$i" =~ 'Slide'  && ($j == 'CytoTalk') ]]
30 | then
31 | /usr/bin/time -v -o $fpathout"TimeMemRecord.txt" /home/ljx/software/R-4.1.0/bin/Rscript ../Script/Step10_LRBenchSampling/RunScript.R $j $i mouse $ratio;
32 | elif [[ ($j == 'CytoTalk') ]]
33 | then
34 | /usr/bin/time -v -o $fpathout"TimeMemRecord.txt" /home/ljx/software/R-4.1.0/bin/Rscript ../Script/Step10_LRBenchSampling/RunScript.R $j $i human $ratio;
35 | else
36 | /usr/bin/time -v -o $fpathout"TimeMemRecord.txt" /usr/bin/Rscript ../Script/Step10_LRBenchSampling/RunScript.R $j $i human $ratio;
37 | fi
38 | 
39 | done
40 | done
41 | done


--------------------------------------------------------------------------------
/Step9_LRTBench/Step2_ScriptForRecord.R:
--------------------------------------------------------------------------------
 1 | 
 2 | files <- list.files('./Data/Step8_LRTBenchResult/record', full.names = TRUE)
 3 | record_data <- lapply(files, function(file){
 4 |   result <- readRDS(file)
 5 |   record_methods <- lapply(result, function(method){
 6 |     record_cellines <- lapply(method, function(cellines){
 7 |       if(dim(cellines)[[1]] == 0){
 8 |         text <- 'No rec/lig'
 9 |       }else{
10 |         tmp <- as.data.frame(table(cellines$label))
11 |         false <- as.numeric(tmp$Freq[which(tmp$Var1==0)])
12 |         true <- as.numeric(tmp$Freq[which(tmp$Var1==1)])
13 |         text <- paste0('F: ', false, '; T: ', true)
14 |       }
15 |       text
16 |     })
17 |     names(record_cellines) <- names(method)
18 |     record_cellines
19 |   })
20 |   record_methods <- do.call(cbind, record_methods)
21 |   record_methods <- as.data.frame(record_methods)
22 |   record_methods
23 | })
24 | names(record_data) <- gsub('\\.rds', '', list.files('./Data/Step8_LRTBenchResult/record'))
25 | 
26 | record_data1 <- lapply(seq(5, 29, 2), function(i){
27 |   print(i)
28 |   marco <- record_data[[i]]
29 |   mal <- record_data[[i+1]]
30 |   marco.col <- setdiff(colnames(mal), colnames(marco))
31 |   mal.col <- setdiff(colnames(marco), colnames(mal))
32 |   
33 |   if (length(marco.col)>0) {
34 |     marco[, marco.col] <- '——'
35 |     marco <- marco[, colnames(mal)]
36 |   }else if(length(mal.col)>0){
37 |     mal[, mal.col] <- '——'
38 |     mal <- mal[, colnames(marco)]
39 |   }
40 |   tmp <- rbind(marco, mal)
41 |   tmp
42 | })
43 | 
44 | names(record_data1) <- gsub('_macro', '', names(record_data)[seq(5, 29, 2)])
45 | 


--------------------------------------------------------------------------------
/Step7_LRBenchSamplingBench/Step3_TimeMenRecord.R:
--------------------------------------------------------------------------------
 1 | rm(list = ls());gc()
 2 | suppressMessages(library(tidyverse))
 3 | source('./Script/Step11_LRBenchSamplingBench/function.R')
 4 | set.seed(123)
 5 | 
 6 | DataRecord <- readRDS(file = './Data/Step7_LRBenchSampling/DatasetsRecord.rds')
 7 | DataRecord$ratio <- gsub('S', '', DataRecord$ratio)
 8 | datasets <- unique(DataRecord$datasets)
 9 | methods <- c('CellPhoneDB2', 'CellPhoneDB3',	'CellTalker',	'Connectome',
10 |              'NATMI',	'ICELLNET',	'scConnect', 'CellChat', 'SingleCellSignalR',
11 |              'CellCall',	'scSeqComm',	'NicheNet',	'scMLnet', 'iTALK',	
12 |              'RNAMagnet', 'PyMINEr', 'CytoTalk', 'Domino')
13 | ratios <- c(50, 60, 70, 80, 90, 100)
14 | 
15 | TimeMenRecord <- lapply(datasets, function(data){
16 |   print(data)
17 |   tmp <- lapply(ratios, function(ratio){
18 |     print(ratio)
19 |     temp <- RunTimeMemRecord(data, ratio, methods, DataRecord)
20 |     temp <- do.call(rbind, temp)
21 |     temp <- tibble::rownames_to_column(temp, 'methods')
22 |     temp$ratios <- ratio
23 |     temp
24 |   })
25 |   tmp <- do.call(rbind, tmp) %>% as.data.frame()
26 |   tmp$datasets <- data
27 |   tmp
28 | })
29 | TimeMenRecord <- do.call(rbind, TimeMenRecord) %>% as.data.frame()
30 | TimeMenRecord$class <- paste(TimeMenRecord$datasets, TimeMenRecord$ratios, sep = '_')
31 | 
32 | DataRecord$class <- paste(DataRecord$datasets, DataRecord$ratio, sep = '_')
33 | DataRecord <- DataRecord[, -c(1:2)]
34 | TimeMenRecord <- merge(TimeMenRecord, DataRecord, by = 'class')
35 | 
36 | saveRDS(TimeMenRecord, file = paste0('./Data/Step7_LRBenchSampling/TimeMemRecord.rds'))
37 | 


--------------------------------------------------------------------------------
/Step8_LRTToolsFunction/RunSTScript.R:
--------------------------------------------------------------------------------
 1 | suppressMessages(library(Seurat))
 2 | source('../Script/Step8_LRTToolsFunction/Step8_LRTToolsFunction.R')
 3 | set.seed(123)
 4 | 
 5 | args <- commandArgs()
 6 | tools <- args[6]
 7 | print(tools)
 8 | sampleID <- args[7]
 9 | print(sampleID)
10 | 
11 | if(grepl('CID', sampleID)){
12 |   fpath.dataset <- paste0('/home/ljx/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/ST_Data/', sampleID, '_ser.rds')
13 |   receiver <- 'CancerEpithelial'
14 | }else if(grepl('UKF', sampleID)){
15 |   fpath.dataset <- paste0('/home/ljx/1-Datasets/CancerCell_2022_Glioma/ScriptForLRTBenchmark/ST_Data/', sampleID, '_ser.rds')
16 |   receiver <- c('macrophages', 'Malignant')
17 | }else{#MI
18 |   fpath.dataset <- paste0('/home/ljx/1-Datasets/Nature_2022_MI/ScriptForCCC/ST_Data/', sampleID, '_ser.rds')
19 |   receiver <- c('Fibroblast')
20 | }
21 | 
22 | dir.output <- paste0('./Step8_LRTPredictionResult/', sampleID)
23 | if(!dir.exists(dir.output)){
24 |   dir.create(dir.output)
25 | }
26 | 
27 | fpath.tools <- paste(dir.output, tools, sep = '/')
28 | if(!dir.exists(fpath.tools)){
29 |   dir.create(fpath.tools)
30 | }
31 | 
32 | ser <- readRDS(fpath.dataset)
33 | 
34 | if(tools == 'CytoTalk'){
35 |   CytoTalk_function(ser, fpath.tools, receiver = receiver)
36 | }else if(tools == 'HoloNet'){
37 |   HoloNet_function(ser, fpath.tools, sampleID, receiver = receiver)
38 | }else if(tools == 'MISTy'){
39 |   MISTy_function(ser, fpath.tools, sampleID, receiver = receiver)
40 | }else if(tools == 'NicheNet'){
41 |   NicheNet_function(ser, fpath.tools, sampleID, receiver = receiver)
42 | }else if(tools == 'stMLnet'){
43 |   stMLnet_function(ser, fpath.tools, sampleID, receiver = receiver)
44 | }


--------------------------------------------------------------------------------
/Step1_LRPredictionResult/LRPredition_19tools.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | tools="CellPhoneDB2 CellPhoneDB3 CellTalker Connectome NATMI ICELLNET scConnect CellChat SingleCellSignalR CellCall scSeqComm NicheNet Domino PyMINEr iTALK cell2cell scMLnet"
 3 | 
 4 | sampleID='CID44971 CID4465 CK357 CK358 CK161 CK165 CK361 CK362 CK162 CK368 Slide14 pbmc4k pbmc6k pbmc8k"
 5 |   
 6 | for i in $sampleID;
 7 | do
 8 | if [[ "$i" =~ 'CID' ]]
 9 | then
10 | fpath='./Step1_LRPredictionResult/NG_BC_'$i"/"
11 | elif [[ "$i" =~ 'CK' ]]
12 | then
13 | fpath='./Step1_LRPredictionResult/N_MI_'$i"/"
14 | elif [[ "$i" =~ 'Slide' ]]
15 | then
16 | fpath='./Step1_LRPredictionResult/MouseEmbryo_'$i"/"
17 | else
18 | fpath='./Step1_LRPredictionResult/GSE106487_'$i"/"
19 | m='ST'
20 | n='SSC'
21 | fi
22 | 
23 | if [ ! -d $fpath ]
24 | then
25 | mkdir $fpath
26 | fi
27 |   for j in $tools;
28 |   do
29 |   fpathout=$fpath"/"$j"/"
30 |   if [ ! -d $fpathout ]
31 |   then
32 |   mkdir $fpathout
33 |   fi
34 |   
35 |   if [[ "$i" =~ 'Slide'  && !($j == 'CytoTalk') ]]
36 |   then
37 |   /usr/bin/time -v -o $fpathout"TimeMemRecord.txt" /usr/bin/Rscript ../Script/Step1_LRPredictionResult/RunScript.R $j $i mouse;
38 |   elif [[ "$i" =~ 'Slide'  && ($j == 'CytoTalk') ]]
39 |   then
40 |   /usr/bin/time -v -o $fpathout"TimeMemRecord.txt" /home/ljx/software/R-4.1.0/bin/Rscript ../Script/Step1_LRPredictionResult/RunScript.R $j $i mouse;
41 |   elif [[ ($j == 'CytoTalk') ]]
42 |   then
43 |   /usr/bin/time -v -o $fpathout"TimeMemRecord.txt" /home/ljx/software/R-4.1.0/bin/Rscript ../Script/Step1_LRPredictionResult/RunScript.R $j $i human;
44 |   else
45 |   /usr/bin/time -v -o $fpathout"TimeMemRecord.txt" /usr/bin/Rscript ../Script/Step1_LRPredictionResult/RunScript.R $j $i human;
46 |   fi
47 | 
48 |   done
49 | done
50 | 


--------------------------------------------------------------------------------
/Step2_PreSTForLRBench/ScriptForCK.R:
--------------------------------------------------------------------------------
 1 | rm(list = ls())
 2 | suppressWarnings(library(Seurat))
 3 | source('./Script/Step2_PreSTForLRBench/function.R')
 4 | set.seed(123)
 5 | 
 6 | samples <- readRDS('~/1-Datasets/Nature_2022_MI/dataset_id.rds')
 7 | samples <- samples$ST
 8 | 
 9 | for(sample in samples){
10 |   output.path <- paste0('./Data/Step2_PreSTForLRBench/', sample)
11 |   if(!dir.exists(output.path)){
12 |     dir.create(output.path)
13 |   }
14 |   
15 |   ser2 <- readRDS(paste0('~/1-Datasets/Nature_2022_MI/ST_Data/Visium-', sample, '.rds'))
16 |   meta <- ser2@meta.data
17 |   meta <- meta[, 15, drop = FALSE]; colnames(meta) <- 'celltype'
18 |   meta$celltype <- gsub('Cycling.cells', 'Cycling', meta$celltype)
19 |   rm(ser2);gc()
20 |   
21 |   st.path <- paste0('~/1-Datasets/Nature_2022_MI/ST_Data/', sample)
22 |   counts <- Read10X_h5(paste0(st.path, '/filtered_feature_bc_matrix.h5'))
23 |   img <- Read10X_Image(paste0(st.path, '/spatial'))
24 |  
25 |   share.barcode <- intersect(rownames(meta), colnames(counts))
26 |   share.barcode <- intersect(share.barcode, rownames(img@coordinates))
27 |   
28 |   meta <- meta[share.barcode, , drop = FALSE]
29 |   counts <- counts[, rownames(meta)]
30 |   img <- img[colnames(counts)]
31 |   DefaultAssay(object = img) <- 'Spatial'
32 | 
33 |   identical(rownames(meta), colnames(counts))
34 |   
35 |   ser <- CreateSeuratObject(counts, meta.data = meta,
36 |                             min.cells = 1, min.features = 1, 
37 |                             assay = 'Spatial')
38 |   ser <- SCTransform(ser, assay = "Spatial")
39 |   ser[['image']] <- img
40 |   saveRDS(ser, file = paste0(output.path, '/STser.rds'))
41 |   
42 |   close_distant <- CloDistCP(ser)
43 |   saveRDS(close_distant, file = paste0(output.path, '/CloDistCP.rds'))
44 | }
45 | 


--------------------------------------------------------------------------------
/Step2_PreSTForLRBench/ScriptForCID.R:
--------------------------------------------------------------------------------
 1 | rm(list = ls())
 2 | suppressMessages(library(Seurat))
 3 | source('./Script/Step2_PreSTForLRBench/function.R')
 4 | set.seed(123)
 5 | 
 6 | # CID datasets
 7 | img.path <- list.dirs('~/1-Datasets/NatureGenetics_2021_BC/ST_Data/spatial')[-1]
 8 | matrix.path <- list.dirs('~/1-Datasets/NatureGenetics_2021_BC/ST_Data/filtered_count_matrices')[-1]
 9 | meta.path <- list.dirs('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/ST_deconvoulution', recursive = FALSE)
10 | samples <- substring(meta.path, 76)
11 | 
12 | for (i in seq(img.path)) {
13 |   output.path <- paste0('./Data/Step2_PreSTForLRBench/', samples[i])
14 |   if(!dir.exists(output.path)){
15 |     dir.create(output.path)
16 |   }
17 |   
18 |   # Save SeuratObject of ST dataset
19 |   if(T){
20 |     count.st <- Read10X(matrix.path[i], gene.column = 1)
21 |     img <- Read10X_Image(image.dir = img.path[i])
22 |     meta.st <- read.csv(paste0(meta.path[i], '/c2l_means_result.csv'))
23 |     meta.st <- tibble::column_to_rownames(meta.st, var = "X")
24 |     colnames(meta.st) <- gsub('meanscell_abundance_w_sf_', '', colnames(meta.st))
25 |     meta.st$celltype <- apply(meta.st, 1, function(x){
26 |       colnames(meta.st)[which.max(x)]
27 |     })
28 |     
29 |     shared.barcode <- intersect(colnames(count.st), rownames(meta.st))
30 |     shared.barcode <- intersect(shared.barcode, rownames(img@coordinates))
31 |     
32 |     meta.st <- meta.st[shared.barcode, ]
33 |     count.st <- count.st[, rownames(meta.st)]
34 |     img <- img[colnames(count.st)]
35 |     DefaultAssay(object = img) <- 'Spatial'
36 |     
37 |     identical(rownames(meta.st), colnames(count.st))
38 |     
39 |     ser <- CreateSeuratObject(count.st, meta.data = meta.st,
40 |                               min.cells = 1, min.features = 1, 
41 |                               assay = 'Spatial')
42 |     ser <- SCTransform(ser, assay = "Spatial")
43 |     ser[['image']] <- img
44 |     
45 |     saveRDS(ser, file = paste0(output.path, '/STser.rds'))
46 |   }
47 |   
48 |   close_distant <- CloDistCP(ser)
49 |   saveRDS(close_distant, file = paste0(output.path, '/CloDistCP.rds'))
50 | }


--------------------------------------------------------------------------------
/Step0_LRToolsFunction/cell2cell_python.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | # In[ ]:
 5 | 
 6 | 
 7 | import cell2cell as c2c
 8 | import scanpy as sc
 9 | import pandas as pd
10 | import sys
11 | import random
12 | random.seed(123)
13 | 
14 | counts_path = sys.argv[1]
15 | meta_path = sys.argv[2]
16 | lr_path = sys.argv[3]
17 | output_path = sys.argv[4]
18 | 
19 | adata = sc.read_csv(counts_path).T
20 | meta = pd.read_csv(meta_path, index_col = "Cell")
21 | adata.obs = meta
22 | 
23 | lr_pairs = pd.read_csv(lr_path)
24 | lr_pairs = lr_pairs.astype(str)
25 | meta = adata.obs.copy()
26 | 
27 | interactions = c2c.analysis.SingleCellInteractions(rnaseq_data=adata.to_df().T,
28 |                                                    ppi_data=lr_pairs,
29 |                                                    metadata=meta,
30 |                                                    interaction_columns=('ligand_symbol', 'receptor_symbol'),
31 |                                                    communication_score='expression_thresholding',
32 |                                                    expression_threshold=0.05, # values after aggregation
33 |                                                    cci_score='bray_curtis',
34 |                                                    cci_type='undirected',
35 |                                                    aggregation_method='nn_cell_fraction',
36 |                                                    barcode_col='Cell',
37 |                                                    celltype_col='Annotation',
38 |                                                    complex_sep='&',
39 |                                                    verbose=False)
40 | interactions.compute_pairwise_communication_scores()
41 | # interactions.compute_pairwise_communication_scores() return result: 
42 | ccc_matrix_path = output_path + '/communication_matrix.csv'
43 | interactions.interaction_space.interaction_elements['communication_matrix'].to_csv(ccc_matrix_path)
44 | ccc_pvals_path = output_path + '/ccc_pval.csv'
45 | ccc_pvals = interactions.permute_cell_labels(evaluation='communication',verbose=True, random_state = 123)
46 | ccc_pvals.to_csv(ccc_pvals_path)
47 | 
48 | 


--------------------------------------------------------------------------------
/Step5_BenchBasedCAGEProteomic/function.R:
--------------------------------------------------------------------------------
 1 | ###################
 2 | ## calculate_auc ##
 3 | ###################
 4 | 
 5 | ## get different metrices and ROC/PRC preformance object
 6 | get_evaluate_metrics <- function(pred,label)
 7 | {
 8 |   
 9 |   find_optimal_cutoff <- function(TPR, FPR, threshold){
10 |     
11 |     y = TPR - FPR
12 |     Youden_index = which.max(y)
13 |     # optimal_threshold = threshold[Youden_index]
14 |     return(Youden_index)
15 |     
16 |   }
17 |   
18 |   if(length(which(label==TRUE))!=0 & length(which(label==FALSE))!=0){
19 |     pred <- prediction(pred, label)
20 |     
21 |     perf_ROC <- performance(pred, measure = "tpr", x.measure = "fpr")
22 |     ind_ROC <- find_optimal_cutoff(perf_ROC@y.values[[1]],perf_ROC@x.values[[1]],perf_ROC@alpha.values[[1]])
23 |     cutoff_ROC <- perf_ROC@alpha.values[[1]][ind_ROC]
24 |     
25 |     perf_PRC <- performance(pred, measure = "prec", x.measure = "rec")
26 |     ind_PRC <- find_optimal_cutoff(perf_PRC@y.values[[1]],perf_PRC@x.values[[1]],perf_PRC@alpha.values[[1]])
27 |     cutoff_PRC <- perf_PRC@alpha.values[[1]][ind_PRC]
28 |     
29 |     ACC <- performance(pred, measure = "acc")@y.values[[1]] %>% .[ind_ROC] %>% signif(.,4)
30 |     ERR <- performance(pred, measure = "err")@y.values[[1]] %>% .[ind_ROC]  %>% signif(.,4)
31 |     PPV <- performance(pred, measure = "ppv")@y.values[[1]] %>% .[ind_ROC]  %>% signif(.,4)
32 |     #MCC <- performance(pred, measure = "mat")@y.values[[1]] %>% .[ind_ROC]  %>% signif(.,4)
33 |     F1 <- performance(pred, measure = "f")@y.values[[1]] %>% .[ind_ROC]  %>% signif(.,4)
34 |     AUC <- performance(pred, measure = "auc")@y.values[[1]] %>% signif(.,4)
35 |     AUCPR <- performance(pred, measure = "aucpr")@y.values[[1]] %>% signif(.,4)
36 |     
37 |     res = list(perf_ROC = perf_ROC,
38 |                perf_PRC = perf_PRC,
39 |                perf_metrics = c(AUROC=AUC,AUPRC=AUCPR,
40 |                                 ACC=ACC,ERR=ERR,PPV=PPV, F1=F1))
41 |     
42 |   }else{
43 |     
44 |     res <- list(perf_ROC = NA, perf_PRC = NA, 
45 |                 perf_metrics = rep(NA,6))
46 |     names(res$perf_metrics) <- c('AUROC','AUPRC','ACC','ERR','PPV','F1')
47 |     
48 |   }
49 |   
50 |   
51 |   return(res)
52 |   
53 | }


--------------------------------------------------------------------------------
/Step0_LRToolsFunction/ScriptForAddLRscore.R:
--------------------------------------------------------------------------------
 1 | rm(list = ls());gc()
 2 | suppressMessages(library(Seurat))
 3 | suppressMessages(library(tidyverse))
 4 | set.seed(123)
 5 | 
 6 | files <- list.dirs('./Data/Step1_LRPredictionResult', recursive = FALSE)
 7 | methods <- c('scMLnet', 'Domino')
 8 | 
 9 | for (file in files) {
10 |   print(file)
11 |   if(grepl('Slide', file)){
12 |     ser <- readRDS('~/1-Datasets/MouseEmbryo_GSE166692/ScriptForCCC/DataForCCC/Slide14_hs_ser.rds')
13 |   }else if(grepl('N_MI', file)){
14 |     sample <- substring(file, 38)
15 |     ser <- readRDS(paste0('~/1-Datasets/Nature_2022_MI/ScriptForCCC/scRNA_Data/', sample, '_ser.rds'))
16 |   }else if(grepl('NG_BC', file)){
17 |     sample <- substring(file, 39)
18 |     ser <- readRDS(paste0('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/scRNA_Data/', sample, '_ser.rds'))
19 |   }else if(grepl('pbmc', file)){
20 |     sample <- substring(file, 33)
21 |     ser <- readRDS(paste0('~/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/DataForCCC/', sample, '_ser.rds'))
22 |   }else if(grepl('Sper', file)){
23 |     ser <- readRDS('~/1-Datasets/Spermatogenesis_GSE106487/ScriptForCCC/Sper_ser.rds')
24 |   }
25 |   
26 |   data <- GetAssayData(ser, 'data', 'RNA') %>% 
27 |     as.matrix(.) %>% t(.) %>% 
28 |     as.data.frame() %>% 
29 |     tibble::rownames_to_column(., 'barcodes')
30 |   meta <- ser@meta.data %>% .[, 'celltype', drop =FALSE] %>% tibble::rownames_to_column(., 'barcodes')
31 |   rm(ser);gc()
32 |   
33 |   data <- merge(data, meta, by = 'barcodes')
34 |   data$barcodes <- NULL
35 |   data <- aggregate(.~celltype, data, mean)
36 |   data <- pivot_longer(data, -celltype, 'genes')
37 |   data$ct_genes <- paste(data$celltype, data$genes, sep = '_')
38 |   data <- data[ -c(1:2)]
39 |   rm(meta);gc()
40 |   
41 |   for (method in methods) {
42 |     print(method)
43 |     result <- readRDS(paste0(file, '/', method, '/result.rds'))
44 |     result_tmp <- result$result
45 |     result_tmp$sl <- paste(result_tmp$Sender, result_tmp$Ligand, sep = '_')
46 |     result_tmp$rr <- paste(result_tmp$Receiver, result_tmp$Receptor, sep = '_')
47 |     result_tmp <- merge(result_tmp, data, by.x = 'sl', by.y = 'ct_genes')
48 |     colnames(result_tmp)[8] <- 'ligand_value'
49 |     result_tmp <- merge(result_tmp, data, by.x = 'rr', by.y = 'ct_genes')
50 |     colnames(result_tmp)[9] <- 'receptor_value'
51 |     result_tmp$LRscore <- result_tmp$ligand_value*result_tmp$receptor_value
52 |     result_tmp <- result_tmp[,c('Ligand', 'Receptor', 'Sender', 'Receiver', 'LRscore', 'all')]
53 |     result$result <- result_tmp
54 |     saveRDS(result, paste0(file, '/', method, '/result1.rds'))
55 |   }
56 | }
57 | 


--------------------------------------------------------------------------------
/Step2_PreSTForLRBench/function.R:
--------------------------------------------------------------------------------
 1 | # Calculate the euclidean distance between cell pairs
 2 | CloDistCP <- function(ser, image = TRUE){
 3 |   if(image){
 4 |     coord <- ser@images$image@coordinates[, c('imagerow', 'imagecol')]
 5 |   }else{
 6 |     coord <- ser@meta.data[, c('imagerow', 'imagecol')]
 7 |   }
 8 |   
 9 |   meta <- ser@meta.data[, 'celltype', drop = FALSE]
10 |   identical(rownames(meta), rownames(coord))
11 |   
12 |   meta <- cbind(meta, coord)
13 |   meta <- meta[order(meta$celltype),]
14 |   
15 |   dist.cp <- as.matrix(dist(meta[, 2:3], method = "euclidean"))
16 |   dist.cp[dist.cp==0] <- 1
17 |   dist.cp[!lower.tri(dist.cp, diag = TRUE)] <- 0
18 |   dist.cp <- as.data.frame(dist.cp)
19 |   dist.cp <- tibble::rownames_to_column(dist.cp, var = "cell.1")
20 |   dist.cp <- tidyr::pivot_longer(data = dist.cp, cols = -cell.1, 
21 |                                  names_to = "cell.2", values_to = "distance")
22 |   dist.cp <- dist.cp[which(dist.cp$distance != 0), ]
23 |   
24 |   ct <- data.frame(barcode = rownames(meta), celltype = meta$celltype)
25 |   dist.cp <- dplyr::inner_join(dist.cp, ct, by = c('cell.1' = 'barcode'))
26 |   colnames(dist.cp)[4] <- "celltype.1"
27 |   dist.cp <- dplyr::inner_join(dist.cp, ct, by = c('cell.2' = 'barcode'))
28 |   colnames(dist.cp)[5] <- "celltype.2"
29 |   dist.cp$cp <- paste(dist.cp$celltype.1, dist.cp$celltype.2, sep = "_")
30 |   dist.cp <- dist.cp[which(dist.cp$celltype.1!=dist.cp$celltype.2), ]
31 |   dist.cp <- dist.cp[,c("cell.1", "cell.2", "distance", "cp")]
32 |   
33 |   # remove cell pairs with the number less than 30
34 |   remove.cp <- names(which(table(dist.cp$cp)<30))
35 |   if(length(remove.cp)!=0){
36 |     dist.cp <- dist.cp[-which(dist.cp$cp %in% remove.cp), ]
37 |   }
38 |   
39 |   perc <- c(0.1, 0.2, 0.3, 0.4, 0.5)
40 |   cellpairs <- unique(dist.cp$cp)
41 |   library(doParallel)
42 |   cl <- makeCluster(5)
43 |   registerDoParallel(cl)
44 |   close_distant <- foreach(per = perc) %dopar% {
45 |     tmp.close_distant <- lapply(cellpairs, function(cp){
46 |       dist <- list()
47 |       tmp.dist <- dist.cp[which(dist.cp$cp == cp), ]
48 |       tmp.dist <- tmp.dist[order(tmp.dist$distance, decreasing = T), ]
49 |       pct.close <- floor(dim(tmp.dist)[1]*per)
50 |       pct.distant <- floor(dim(tmp.dist)[1]*per)
51 |       close.dist <- tail(tmp.dist, n = pct.close)
52 |       distant.dist <- head(tmp.dist, n = pct.distant)
53 |       dist[["close"]] <- close.dist
54 |       dist[["distant"]] <- distant.dist
55 |       dist
56 |     })
57 |     names(tmp.close_distant) <- unique(dist.cp$cp)
58 |     return(tmp.close_distant)
59 |   }
60 |   stopCluster(cl)
61 |   names(close_distant) <- perc
62 |   return(close_distant)
63 | }
64 | 


--------------------------------------------------------------------------------
/Step2_PreSTForLRBench/ScriptForMouseEymbryo.R:
--------------------------------------------------------------------------------
 1 | rm(list = ls())
 2 | suppressMessages(library(Seurat))
 3 | source('./Script/Step2_PreSTForLRBench/function.R')
 4 | set.seed(123)
 5 | 
 6 | counts.st <- Read10X('~/1-Datasets/MouseEmbryo_GSE166692/sciSpaceData/data/', 
 7 |                      gene.column = 1)
 8 | 
 9 | cellmeta <- read.csv('~/1-Datasets/MouseEmbryo_GSE166692/sciSpaceData/cellmeta.tsv', 
10 |                      sep = '\t')
11 | genes <- read.csv('~/1-Datasets/MouseEmbryo_GSE166692/sciSpaceData/genemeta.tsv', 
12 |                   sep = '\t')
13 | 
14 | for (slide in unique(cellmeta$slide_id)) {
15 |   output.path <- paste0('./Data/Step2_PreSTForLRBench/', gsub(' ', '', slide))
16 |   if(!dir.exists(output.path)){
17 |     dir.create(output.path)
18 |   }
19 |   
20 |   if(T){
21 |     cells_sel <- rownames(cellmeta)[which(cellmeta$slide_id==slide)]
22 |     
23 |     # counts: solve the problem of duplicated genes
24 |     if(T){
25 |       counts_sub <- counts.st[, cells_sel]
26 |       counts_sub <- as.matrix(counts_sub)
27 |       counts_sub <- as.data.frame(counts_sub)
28 |       counts_sub$gene <- genes$gene_short_name; rownames(counts_sub) <- NULL
29 |       dup_genes <- counts_sub$gene[which(duplicated(counts_sub$gene))]
30 |       counts_sub1 <- counts_sub[-which(counts_sub$gene %in% dup_genes),]
31 |       counts_sub2 <- counts_sub[which(counts_sub$gene %in% dup_genes),]
32 |       counts_sub2 <- aggregate(.~gene,mean,data=counts_sub2)
33 |       counts_sub <- rbind(counts_sub1, counts_sub2); rownames(counts_sub) <- NULL
34 |       rm(counts_sub1, counts_sub2, dup_genes, cells_sel); gc()
35 |       counts_sub <- tibble::column_to_rownames(counts_sub, 'gene')
36 |       counts_sub <- as.matrix(counts_sub)
37 |     }
38 |     
39 |     meta_sub <- cellmeta[which(cellmeta$slide_id==slide),]
40 |     colnames(meta_sub)[c(9,10,19)] <- c('imagerow', 'imagecol', 'celltype')
41 |     meta_sub$celltype <- gsub(' ', '', meta_sub$celltype)
42 |     meta_sub$celltype <- gsub('Cardiacmusclelineages', 'CardiacMuscleLineages', meta_sub$celltype)
43 |     
44 |     shared.barcode <- intersect(colnames(counts_sub), rownames(meta_sub))
45 |     
46 |     meta_sub <- meta_sub[shared.barcode, ]
47 |     counts_sub <- counts_sub[, rownames(meta_sub)]
48 |     identical(rownames(meta_sub), colnames(counts_sub))
49 |     
50 |     ser <- CreateSeuratObject(counts = counts_sub, meta.data = meta_sub, 
51 |                               assay = 'Spatial', min.cells = 1, min.features = 1)
52 |     ser <- SCTransform(ser, assay = "Spatial")
53 |     
54 |     saveRDS(ser, file = paste0(output.path, '/STser.rds'))
55 |     rm(counts_sub, meta_sub);gc()
56 |   }
57 |   
58 |   ser <- readRDS(paste0(output.path, '/STser.rds'))
59 |   close_distant <- CloDistCP(ser, image = FALSE)
60 |   saveRDS(close_distant, file = paste0(output.path, '/CloDistCP.rds'))
61 | }
62 | 
63 | 
64 | 


--------------------------------------------------------------------------------
/Step8_LRTToolsFunction/HoloNet.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | # In[ ]:
  5 | 
  6 | 
  7 | import HoloNet as hn
  8 | 
  9 | import os
 10 | import pandas as pd
 11 | import random
 12 | import numpy as np
 13 | import scanpy as sc
 14 | import matplotlib.pyplot as plt
 15 | import torch
 16 | import sys
 17 | 
 18 | import warnings
 19 | warnings.filterwarnings('ignore')
 20 | hn.set_figure_params(tex_fonts=False)
 21 | sc.settings.figdir = './figures/'
 22 | 
 23 | 
 24 | # In[ ]:
 25 | 
 26 | 
 27 | random.seed(123) 
 28 | np.random.seed(123)
 29 | 
 30 | counts_path = sys.argv[1]
 31 | meta_path = sys.argv[2]
 32 | img_path = sys.argv[3]
 33 | goi_fpath = sys.argv[4]
 34 | output_path = sys.argv[5]
 35 | 
 36 | 
 37 | # In[ ]:
 38 | 
 39 | 
 40 | # load ST data (gene expression matrix + celltype + spatial)
 41 | adata = sc.read_csv(counts_path).T
 42 | meta = pd.read_csv(meta_path,  index_col="Barcodes")
 43 | adata.obs = meta
 44 | adata = sc.read_visium(adata, path = img_path)
 45 | 
 46 | 
 47 | # In[ ]:
 48 | 
 49 | 
 50 | # run normalization
 51 | sc.pp.normalize_total(adata, inplace=True)
 52 | sc.pp.log1p(adata)
 53 | sc.pp.highly_variable_genes(adata, flavor="seurat", n_top_genes=2000)
 54 | 
 55 | 
 56 | # In[ ]:
 57 | 
 58 | 
 59 | # load LR database and filter genes epressed by less than 5% of cells
 60 | LR_df = hn.pp.load_lr_df()
 61 | expressed_LR_df = hn.pp.get_expressed_lr_df(LR_df, adata, expressed_proportion=0.05)
 62 | 
 63 | 
 64 | # In[ ]:
 65 | 
 66 | 
 67 | # calculate w_best
 68 | w_best = hn.tl.default_w_visium(adata)
 69 | w_best
 70 | 
 71 | 
 72 | # In[ ]:
 73 | 
 74 | 
 75 | # construct CE
 76 | CE_tensor = hn.tl.compute_ce_tensor(adata, lr_df=expressed_LR_df, w_best=w_best)
 77 | CE_tensor_filtered = hn.tl.filter_ce_tensor(CE_tensor, adata,
 78 |                                             lr_df=expressed_LR_df, w_best=w_best)
 79 | 
 80 | 
 81 | # In[ ]:
 82 | 
 83 | 
 84 | # Selecting the target gene (icgs) to be predicted
 85 | target_all_gene_expr, used_gene_list = hn.pr.get_gene_expr(adata, expressed_LR_df,
 86 |                                                            max_sparse = 0.05)
 87 | 
 88 | 
 89 | # In[ ]:
 90 | 
 91 | 
 92 | goi = pd.read_csv(goi_fpath)
 93 | goi = list(set(goi["x"]).intersection(set(used_gene_list)))
 94 | len(goi)
 95 | 
 96 | 
 97 | # In[ ]:
 98 | 
 99 | 
100 | X, cell_type_names = hn.pr.get_one_hot_cell_type_tensor(adata, categorical_cell_type_col = 'celltype')
101 | adj = hn.pr.adj_normalize(adj=CE_tensor_filtered, cell_type_tensor=X, only_between_cell_type=True)
102 | 
103 | for gene in goi:
104 |   target = hn.pr.get_one_case_expr(target_all_gene_expr, cases_list=used_gene_list,
105 |                                    used_case_name= gene)
106 |   trained_MGC_model_list = hn.pr.mgc_repeat_training(X, adj, target, device='gpu')
107 |   predict_result = hn.pl.plot_mgc_result(trained_MGC_model_list, adata, X, adj)
108 |   for lr in list(expressed_LR_df['LR_Pair']):
109 |     _ = hn.pl.fce_cell_type_network_plot(trained_MGC_model_list, expressed_LR_df, X, adj,
110 |                                        cell_type_names, plot_lr=lr, edge_thres=0.2,
111 |                                        )
112 |     res = pd.DataFrame(_[0])
113 |     file_path = output_path + lr + "_" + gene + ".csv"
114 |     res.to_csv(file_path)
115 | 
116 | 


--------------------------------------------------------------------------------
/Step9_LRTBench/Step3_ScriptForTimeMen.R:
--------------------------------------------------------------------------------
 1 | rm(list = ls()); gc()
 2 | suppressMessages(library(ggplot2))
 3 | source('./Script/Step9_LRTBench/function.R')
 4 | set.seed(123)
 5 | 
 6 | # Record the number of cell types in each dataset
 7 | if(F){
 8 |   suppressMessages(library(Seurat))
 9 |   set.seed(123)
10 |   ser1.path <- list.files('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/ST_Data', full.names = TRUE)
11 |   ser2.path <- list.files('~/1-Datasets/Nature_2022_MI/ScriptForCCC/ST_Data', full.names = TRUE)
12 |   ser3.path <- list.files('~/1-Datasets/CancerCell_2022_Glioma/ScriptForLRTBenchmark/ST_Data', full.names = TRUE)
13 |   ser.path <- c(ser1.path, ser2.path, ser3.path)
14 |   rm(ser1.path, ser2.path, ser3.path);gc()
15 |   
16 |   tmp <- c()
17 |   for (i in seq(ser.path)) {
18 |     ser <- readRDS(ser.path[i])
19 |     remove.ct <- as.numeric(length(which(table(ser$celltype)<3)))
20 |     temp <- length(unique(ser$celltype))-remove.ct
21 |     tmp <- c(tmp, temp)
22 |     rm(ser); gc()
23 |   }
24 |   ct_record <- data.frame(datasets = gsub('_ser.rds', '', 
25 |                                           unlist(lapply(stringr::str_split(ser.path, '/'), 
26 |                                                         function(x){x[8]}))),
27 |                           num = as.numeric(tmp))
28 |   
29 |   saveRDS(ct_record, file = './Data/Step9_LRTBenchResult/DatasetCellTypeInfo.rds')
30 |   rm(i, ser.path, temp, tmp, remove.ct, ct_record);gc()
31 | }
32 | 
33 | # Record the running time of methods in each dataset
34 | if(T){
35 |   datasets <- list.dirs('./Data/Step8_LRTPredictionResult', recursive = FALSE, full.names = FALSE)
36 |   methods <- c('CytoTalk', 'NicheNet', 'HoloNet', 'MISTy', 'stMLnet')
37 |   DataRecord <- readRDS('./Data/Step9_LRTBenchResult/DatasetCellTypeInfo.rds')
38 |   wd <- './Data/Step8_LRTPredictionResult/'
39 |   
40 |   TimeMemRecord <- lapply(datasets, function(data){
41 |     print(data)
42 |     temp <- RunTimeMemRecord(wd, data, methods)
43 |     temp
44 |   })
45 |   names(TimeMemRecord) <- datasets
46 |   TimeMemRecord <- lapply(TimeMemRecord, function(data){
47 |     temp <- do.call(rbind, data)
48 |     temp <- tibble::rownames_to_column(temp, 'methods')
49 |     temp
50 |   })
51 |   TimeMemRecord <- do.call(rbind, TimeMemRecord)
52 |   TimeMemRecord <- tibble::rownames_to_column(TimeMemRecord, 'datasets')
53 |   TimeMemRecord$datasets <- gsub('\\.[0-9]+', '', TimeMemRecord$datasets)
54 |   
55 |   # Specific handle: the running time of CytoTalk in GBM datasets
56 |   CytoTalk_TimeMemRecord <- TimeMemRecord[which(TimeMemRecord$methods == 'CytoTalk' & grepl('UKF', TimeMemRecord$datasets)), ]
57 |   CytoTalk_TimeMemRecord <- merge(CytoTalk_TimeMemRecord, DataRecord, by = 'datasets')
58 |   CytoTalk_TimeMemRecord$total_num <- 2*(CytoTalk_TimeMemRecord$num-1)
59 |   CytoTalk_TimeMemRecord$actual_num <- 2*(CytoTalk_TimeMemRecord$num-1)-1
60 |   CytoTalk_TimeMemRecord$clock_time <- (CytoTalk_TimeMemRecord$clock_time/CytoTalk_TimeMemRecord$total_num)*CytoTalk_TimeMemRecord$actual_num
61 |   CytoTalk_TimeMemRecord$linux_time <- (CytoTalk_TimeMemRecord$linux_time/CytoTalk_TimeMemRecord$total_num)*CytoTalk_TimeMemRecord$actual_num
62 |   CytoTalk_TimeMemRecord <- CytoTalk_TimeMemRecord[, -c(7:9)]
63 |   TimeMemRecord <- TimeMemRecord[-which(TimeMemRecord$methods == 'CytoTalk' & grepl('UKF', TimeMemRecord$datasets)), ]
64 |   TimeMemRecord <- rbind(TimeMemRecord, CytoTalk_TimeMemRecord)
65 |   rm(CytoTalk_TimeMemRecord, DataRecord, datasets, methods, wd); gc()
66 | }


--------------------------------------------------------------------------------
/Step7_LRBenchSamplingBench/Step2_JaccardIndex.R:
--------------------------------------------------------------------------------
 1 | rm(list = ls());gc()
 2 | suppressMessages(library(tidyverse))
 3 | suppressMessages(library(ggsci))
 4 | set.seed(123)
 5 | 
 6 | datasets <- list.files('./Data/Step6_LRBenchSamplingResult/', recursive = FALSE) %>%
 7 |   gsub('_[0-9]+', '',.) %>% unique()
 8 | methods <- c("CellPhoneDB2", "CellPhoneDB3", "CellTalker", 
 9 |              "Connectome", "ICELLNET",  "NATMI", "iTALK", 
10 |              "scConnect", "SingleCellSignalR", "CellChat", 
11 |              "RNAMagnet", "PyMINEr",
12 |              "scSeqComm", "NicheNet",
13 |              "scMLnet", "CellCall","CytoTalk", "Domino")
14 | 
15 | JaccardIndex <- function(a, b) {
16 |   intersection = length(intersect(unique(a), unique(b)))
17 |   union = length(a) + length(b) - intersection
18 |   return (intersection/union)
19 | }
20 | 
21 | result <- lapply(datasets, function(data){
22 |   print(data)
23 |   temp <- lapply(methods, function(method){
24 |     print(method)
25 |     if(grepl('CK', data)){
26 |       res100 <- readRDS(paste('./Data/Step1_LRPredictionResult',
27 |                               paste0('N_MI_', data), method, 
28 |                               ifelse(method=='iTALK','result1.rds','result.rds'), 
29 |                               sep = '/')) %>% .$result
30 |     }else if(grepl('CID', data)){
31 |       res100 <- readRDS(paste('./Data/Step1_LRPredictionResult',
32 |                               paste0('NG_BC_', data), method, 
33 |                               ifelse(method=='iTALK','result1.rds','result.rds'), 
34 |                               sep = '/')) %>% .$result
35 |     }else if(grepl('Slide14', data)){
36 |       res100 <- readRDS(paste('./Data/Step1_LRPredictionResult',
37 |                               paste0('MouseEmbryo_', data), method, 
38 |                               ifelse(method=='iTALK','result1.rds','result.rds'), 
39 |                               sep = '/')) %>% .$result
40 |     }else{
41 |       res100 <- readRDS(paste('./Data/Step1_LRPredictionResult',
42 |                               data, method, 
43 |                               ifelse(method=='iTALK','result1.rds','result.rds'), 
44 |                               sep = '/')) %>% .$result
45 |     }
46 |     
47 |     if(dim(res100)[[1]]==0 | is.null(res100)){
48 |       tmp <- rep(NA, 5)
49 |     }else{
50 |       res90 <- readRDS(paste('./Data/Step10_LRBenchSamplingResult',
51 |                              paste0(data, '_90'), method, 'result.rds', sep = '/'))%>% .$result
52 |       JI_90 <- JaccardIndex(res100$all, res90$all)
53 |       res80 <- readRDS(paste('./Data/Step10_LRBenchSamplingResult',
54 |                              paste0(data, '_80'), method, 'result.rds', sep = '/'))%>% .$result
55 |       JI_80 <- JaccardIndex(res100$all, res80$all)
56 |       res70 <- readRDS(paste('./Data/Step10_LRBenchSamplingResult',
57 |                              paste0(data, '_70'), method, 'result.rds', sep = '/'))%>% .$result
58 |       JI_70 <- JaccardIndex(res100$all, res70$all)
59 |       res60 <- readRDS(paste('./Data/Step10_LRBenchSamplingResult',
60 |                              paste0(data, '_60'), method, 'result.rds', sep = '/'))%>% .$result
61 |       JI_60 <- JaccardIndex(res100$all, res60$all)
62 |       res50 <- readRDS(paste('./Data/Step10_LRBenchSamplingResult',
63 |                              paste0(data, '_50'), method, 'result.rds', sep = '/'))%>% .$result
64 |       JI_50 <- JaccardIndex(res100$all, res50$all)
65 |       tmp <- c(JI_50, JI_60, JI_70, JI_80, JI_90)
66 |     }
67 |     
68 |     tmp
69 |   })
70 |   names(temp) <- methods
71 |   temp <- do.call(rbind, temp)
72 |   temp <- as.data.frame(temp)
73 |   colnames(temp) <- c('S50', 'S60', 'S70', 'S80', 'S90')
74 |   temp <- tibble::rownames_to_column(temp, 'methods')
75 |   temp$datasets <- data
76 |   temp
77 | })
78 | result <- do.call(rbind, result)
79 | saveRDS(result, file = './Data/Step7_LRBenchSampling/JaccardIndex.rds')
80 | 


--------------------------------------------------------------------------------
/Step3_MIForLRBench/Step1_ScriptForCalMI.R:
--------------------------------------------------------------------------------
 1 | rm(list = ls())
 2 | suppressMessages(library(Seurat))
 3 | source('./Script/Step3_MIForLRBench/function.R')
 4 | set.seed(123)
 5 | 
 6 | datasets <- c('CID4465', 'CID44971', 'CK357', 'CK358',
 7 |               'CK368', 'CK162', 'CK362', 'CK361',
 8 |               'CK161', 'CK165', 'Slide14')
 9 | methods <- c('CellPhoneDB2', 'CellPhoneDB3',	'CellTalker',	'Connectome',
10 |              'NATMI',	'ICELLNET',	'scConnect', 'CellChat', 'SingleCellSignalR',
11 |              'CellCall',	'scSeqComm',	'NicheNet', 'Domino',	'scMLnet', 'iTALK',	
12 |              'cell2cell', 'RNAMagnet', 'PyMINEr', 'CytoTalk')
13 | ck.dataset <- readRDS('~/1-Datasets/Nature_2022_MI/dataset_id.rds')
14 | # loop: datasets
15 | for(data in datasets){
16 |   print(data)
17 |   if(grepl('CK', data)){
18 |     # get normalized data of ST
19 |     STser <- readRDS(paste0('./Data/Step2_PreSTForLRBench/', ck.dataset$ST[which(ck.dataset$snRNA == data)], '/STser.rds'))
20 |     # get close and distant cell pairs
21 |     CloDistCP <- readRDS(paste0('./Data/Step2_PreSTForLRBench/', ck.dataset$ST[which(ck.dataset$snRNA == data)], '/CloDistCP.rds'))
22 |   }else{
23 |     # get normalized data of ST
24 |     STser <- readRDS(paste0('./Data/Step2_PreSTForLRBench/', data, '/STser.rds'))
25 |     # get close and distant cell pairs
26 |     CloDistCP <- readRDS(paste0('./Data/Step2_PreSTForLRBench/', data, '/CloDistCP.rds'))
27 |   }
28 |   
29 |   # get normalized data of ST
30 |   norm.data <- GetAssayData(STser, 'data', 'SCT')
31 |   #norm.data <- as.matrix(norm.data)
32 |   # get close and distant cell pairs
33 |   CloDistCP[['0.5']] <- NULL
34 |   # get celltype of ST data
35 |   celltype.st <- unique(STser$celltype)
36 |   rm(STser); gc()
37 |   
38 |   # get result path
39 |   if(grepl('CID', data)){
40 |     result.path <- paste0('./Data/Step1_LRPredictionResult/NG_BC_', data)
41 |   }else if(grepl('CK', data)){
42 |     result.path <- paste0('./Data/Step1_LRPredictionResult/N_MI_', data)
43 |   }else if(grepl('Slide', data)){
44 |     result.path <- paste0('./Data/Step1_LRPredictionResult/MouseEmbryo_', data)
45 |   }
46 |   
47 |   output.path <- paste0('./Data/Step3_MIPCCForLRBench/', data, '_result.rds')
48 |   dataset_EvalIndex1 <- lapply(methods, function(method){
49 |     print(method)
50 |     
51 |     # remove the LR pairs whose sender/receiver celltypes are not in celltypes of ST
52 |     result <- readRDS(paste0(result.path, '/', method, '/result.rds'))
53 |     result <- result$result
54 |     result <- result[which(result$Sender %in% celltype.st), ]
55 |     result <- result[which(result$Receiver %in% celltype.st), ]
56 |     result$sr <- paste(result$Sender, result$Receiver, sep = '_')
57 |     CellPairs <- names(CloDistCP$`0.1`)
58 |     CellPairs1 <- as.data.frame(stringr::str_split(CellPairs, '_', simplify = TRUE))
59 |     CellPairs1 <- paste(CellPairs1$V2, CellPairs1$V1, sep = '_')
60 |     CellPairs <- c(CellPairs, CellPairs1)
61 |     remove.sr <- setdiff(unique(result$sr), CellPairs)
62 |     if(length(remove.sr) != 0){
63 |       result <- result[-which(result$sr %in% remove.sr), ]
64 |     }
65 |     
66 |     if(dim(result)[1]!=0){
67 |       if(grepl('CID', data) & method == 'RNAMagnet'){
68 |         genes <- readRDS('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/hs2mm_genes.rds')
69 |         result <- mm2hs(result, genes)
70 |       }else if(grepl('CK', data) & method == 'RNAMagnet'){
71 |         genes <- readRDS('~/1-Datasets/Nature_2022_MI/ScriptForCCC/hs2mm_genes.rds')
72 |         result <- mm2hs(result, genes)
73 |       }else if(grepl('Slide', data) & 
74 |                method %in% c('CellPhoneDB2', 'CellPhoneDB3', 'CellTalker', 'NATMI', 
75 |                              'ICELLNET', 'NicheNet', 'scMLnet', 'iTALK', 'cell2cell')){
76 |         genes <- readRDS('~/1-Datasets/MouseEmbryo_GSE166692/sciSpaceData/mm2hs.rds')
77 |         result <- hs2mm(result, genes)
78 |       }
79 |       
80 |       EvaIndex1_result <- lapply(CloDistCP, function(CloDist){
81 |         result.tmp <- EvaIndex1_2(CloDist, norm.data, result)
82 |         result.tmp
83 |       })
84 |       
85 |     }else{
86 |       EvaIndex1_result <- NA
87 |     }
88 |     
89 |     return(EvaIndex1_result)
90 |   })
91 |   names(dataset_EvalIndex1) <- methods
92 |   saveRDS(dataset_EvalIndex1, file = output.path)
93 | }


--------------------------------------------------------------------------------
/Step7_LRBenchSamplingBench/function.R:
--------------------------------------------------------------------------------
 1 | RunTimeMemRecord <- function(data, ratio, methods, DataRecord){
 2 |   # get result path
 3 |   if(ratio==100){
 4 |     if(grepl('CK', data)){
 5 |       result.path <- paste0('./Data/Step1_LRPredictionResult/N_MI_', data)
 6 |     }else if(grepl('CID', data)){
 7 |       result.path <- paste0('./Data/Step1_LRPredictionResult/NG_BC_', data)
 8 |     }else if(grepl('Slide14', data)){
 9 |       result.path <- paste0('./Data/Step1_LRPredictionResult/MouseEmbryo_', data)
10 |     }else{
11 |       result.path <- paste0('./Data/Step1_LRPredictionResult/', data)
12 |     }
13 |   }else{
14 |     result.path <- paste0('./Data/Step10_LRBenchSamplingResult/', data, '_', ratio)
15 |   }
16 |   
17 |   dataset_Record <- lapply(methods, function(method){
18 |     print(method)
19 |       
20 |     if(method == 'CytoTalk'){
21 |       record_dir <- paste0(result.path, '/', method)
22 |       record_files <- list.files(record_dir, pattern = 'TimeMemRecord', full.names = TRUE)
23 |       temp_record <- lapply(record_files, function(file){
24 |         record <- tryCatch(read.table(file, header = FALSE, sep = '\t', fill = TRUE),
25 |                         error=function(e){NA}
26 |         )
27 |         if(class(record)=='data.frame'){
28 |           tmp_record <- RunTimeMemRecord_2(record)
29 |         }else{
30 |           tmp_record <- NA
31 |         }
32 |         tmp_record
33 |       })
34 |       temp_record[which(is.na(temp_record))] <- NULL
35 |       temp_record <- do.call(rbind, temp_record) %>% as.data.frame()
36 |       
37 |       if(ratio == 100){
38 |         number_record <- DataRecord$Celltypes[which((DataRecord$ratio == ratio) & (DataRecord$datasets==data))]
39 |         actual_cp <- number_record * number_record - number_record - number_record
40 |         combn_cp <- dim(combn(seq(number_record), 2))[2]
41 |         temp_record$clock_time <- (temp_record$clock_time/actual_cp)*combn_cp
42 |         temp_record$linux_time <- (temp_record$linux_time/actual_cp)*combn_cp
43 |       }
44 |       
45 |       temp_record <- data.frame(clock_time = sum(temp_record$clock_time), 
46 |                                 linux_time = sum(temp_record$linux_time), 
47 |                                 max_memory = max(temp_record$max_memory), 
48 |                                 cpu_linux = paste0(round(mean(as.numeric(gsub('%', '', temp_record$cpu_linux))), 0), '%'))
49 |      }else{
50 |        record <- read.table(paste0(result.path, '/', method, '/', 'TimeMemRecord.txt'),
51 |                              header = FALSE, sep = '\t', fill = TRUE)
52 |        temp_record <- RunTimeMemRecord_2(record)
53 |       }
54 |     
55 |     temp_record
56 |   })
57 |   names(dataset_Record) <- methods
58 |   return(dataset_Record)
59 | }
60 | 
61 | 
62 | RunTimeMemRecord_2 <- function(record){
63 |   
64 |   record <- record[, 2, drop = FALSE]
65 |   if(dim(record)[1]==24){
66 |     record <- record[-1, , drop = FALSE]
67 |   }
68 |   record <- record[c(2,3,4,5,10),, drop = FALSE]
69 |   
70 |   
71 |   # Linux_time: user+system time
72 |   user_time <- as.numeric(gsub('User time \\(seconds\\): ', '', record[1,])) %>% as.numeric()
73 |   sys_time <- as.numeric(gsub('System time \\(seconds\\): ', '', record[2,])) %>% as.numeric()
74 |   linux_time <- user_time+sys_time
75 |   
76 |   # Clock_time
77 |   clock_time <- gsub('Elapsed \\(wall clock\\) time \\(h:mm:ss or m:ss\\): ', '', record[4,])
78 |   if(stringr::str_count(clock_time, ':') == 1){
79 |     min_time <- as.numeric(unlist(stringr::str_split(clock_time, ':'))[1])
80 |     min_time <- min_time*60
81 |     sec_time <- as.numeric(unlist(stringr::str_split(clock_time, ':'))[2])
82 |     clock_time <- min_time+sec_time; rm(sec_time, min_time)
83 |   }else if(stringr::str_count(clock_time, ':') == 2){
84 |     h_time <- as.numeric(unlist(stringr::str_split(clock_time, ':'))[1])
85 |     h_time <- h_time*60*60
86 |     min_time <- as.numeric(unlist(stringr::str_split(clock_time, ':'))[2])
87 |     min_time <- min_time*60
88 |     sec_time <- as.numeric(unlist(stringr::str_split(clock_time, ':'))[3])
89 |     clock_time <- h_time+min_time+sec_time; 
90 |     rm(sec_time, min_time, h_time);gc()
91 |   }
92 |   
93 |   mem_linux <- gsub('Maximum resident set size \\(kbytes\\): ', '', record[5,]) 
94 |   mem_linux <- round(as.numeric(mem_linux)/1024/1024, 2)
95 |   cpu_linux <- gsub('Percent of CPU this job got: ', '', record[3,])
96 |   
97 |   return(data.frame(clock_time = clock_time, linux_time = linux_time, max_memory = mem_linux, cpu_linux = cpu_linux))
98 | }
99 | 


--------------------------------------------------------------------------------
/Step6_LRBenchSampling/RunScript.R:
--------------------------------------------------------------------------------
  1 | suppressMessages(library(Seurat))
  2 | source('../Script/Step0_LRToolsFunction/Step1_LRToolsFunction.R')
  3 | set.seed(123)
  4 | 
  5 | args <- commandArgs()
  6 | tools <- args[6]
  7 | print(tools)
  8 | sampleID <- args[7]
  9 | print(sampleID)
 10 | 
 11 | species <- args[8]
 12 | ratio <- args[9]
 13 | print(species)
 14 | print(ratio)
 15 | 
 16 | if(grepl('CID', sampleID) & tools == 'RNAMagnet'){
 17 |   
 18 |   fpath.dataset <- paste0('/home/ljx/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/scRNA_Data_sampling_Mm/',
 19 |                           sampleID,'_', ratio,  '_ser.rds')
 20 |   
 21 | }else if(grepl('CID', sampleID) & !(tools == 'RNAMagnet')){
 22 |   
 23 |   fpath.dataset <- paste0('/home/ljx/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/scRNA_Data_sampling/',
 24 |                           sampleID, '_', ratio, '_ser.rds')
 25 |   
 26 | }else if(grepl('CK', sampleID) & tools == 'RNAMagnet'){
 27 |   
 28 |   fpath.dataset <- paste0('/home/ljx/1-Datasets/Nature_2022_MI/ScriptForCCC/scRNA_Data_sampling_Mm/',
 29 |                           sampleID, '_', ratio , '_ser.rds')
 30 |   
 31 | }else if(grepl('CK', sampleID) & !(tools == 'RNAMagnet')){
 32 |   
 33 |   fpath.dataset <- paste0('/home/ljx/1-Datasets/Nature_2022_MI/ScriptForCCC/scRNA_Data_sampling/',
 34 |                           sampleID, '_', ratio , '_ser.rds')
 35 |   
 36 | }else if(grepl('Slide', sampleID)){
 37 |   if(tools %in% c('CellPhoneDB2', 'CellPhoneDB3', 'CellTalker', 'NicheNet', 'iTALK', 'ICELLNET', 'scMLnet','NATMI', 'cell2cell')){
 38 |     fpath.dataset <- paste0('/home/ljx/1-Datasets/MouseEmbryo_GSE166692/ScriptForCCC/scRNA_Data_Sampling/',
 39 |                             sampleID, '_', ratio , '_hs_ser.rds')
 40 |   }else{
 41 |     fpath.dataset <- paste0('/home/ljx/1-Datasets/MouseEmbryo_GSE166692/ScriptForCCC/scRNA_Data_Sampling/',
 42 |                             sampleID, '_', ratio , '_mm_ser.rds')
 43 |   }
 44 | }else if(grepl('pbmc', sampleID) & !(tools == 'RNAMagnet')){
 45 |   fpath.dataset <- paste0('/home/ljx/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/scRNA_Data_sampling/', sampleID, '_', ratio , '_ser.rds')
 46 | }else if(grepl('pbmc', sampleID) & (tools == 'RNAMagnet')){
 47 |   fpath.dataset <- paste0('~/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/scRNA_Data_sampling/', sampleID, '_', ratio , '_mm_ser.rds')
 48 | }
 49 | 
 50 | dir.output <- paste0('/home/ljx/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data/Step10_LRBenchSamplingResult/', sampleID, '_', ratio)
 51 | 
 52 | ser <- readRDS(fpath.dataset)
 53 | 
 54 | if(!dir.exists(dir.output)){
 55 |   dir.create(dir.output)
 56 | }
 57 | 
 58 | fpath.tools <- paste(dir.output, tools, sep = '/')
 59 | if(!dir.exists(fpath.tools)){
 60 |   dir.create(fpath.tools)
 61 | }
 62 | 
 63 | if(tools == 'CellCall'){
 64 |   if(grepl('CK', sampleID) | grepl('pbmc', sampleID)){
 65 |     n <- 2
 66 |   }else if(grepl('CID', sampleID) | grepl('Sper', sampleID)){
 67 |     n <- 3
 68 |   }else if(grepl('Slide', sampleID)){
 69 |     n <- 5
 70 |   }
 71 |   result <- CellCall_function(ser, n, species = species)
 72 | }else if(tools == 'CellChat'){
 73 |   result <- CellChat_function(ser, species = species)
 74 | }else if(tools == 'CellPhoneDB2'){
 75 |   result <- CellPhoneDB2_function(ser, fpath.tools)
 76 | }else if(tools == 'CellPhoneDB3'){
 77 |   result <- CellPhoneDB3_function(ser, fpath.tools)
 78 | }else if(tools == 'CellTalker'){
 79 |   result <- CellTalker_function(ser)
 80 | }else if(tools == 'Connectome'){
 81 |   result <- Connectome_function(ser, species = species)
 82 | }else if(tools == 'CytoTalk'){
 83 |   result <- CytoTalk_function(ser, fpath.tools, species = species)
 84 | }else if(tools == 'Domino'){
 85 |   if(sampleID %in% c('pbmc4k', 'pbmc8k')){
 86 |     species <- 'hg38'
 87 |   }else if(sampleID == 'pbmc6k' | grepl('Sper' ,sampleID)){
 88 |     species <- 'hg19'
 89 |   }else if(grepl('Slide' ,sampleID)){
 90 |     species <- 'mm10'
 91 |   }else{
 92 |     species <- 'hg38'
 93 |   }
 94 |   result <- Domino_function(ser, fpath.tools, species = species)
 95 | }else if(tools == 'ICELLNET'){
 96 |   result <- ICELLNET_function(ser)
 97 | }else if(tools == 'iTALK'){
 98 |   result <- iTALK_function(ser)
 99 | }else if(tools == 'NATMI'){
100 |   result <- NATMI_function(ser, fpath.tools)
101 | }else if(tools == 'NicheNet'){
102 |   result <- NicheNet_function(ser, lr = TRUE)
103 | }else if(tools == 'PyMINEr'){
104 |   result <- PyMINEr_function(ser, fpath.tools, species)
105 | }else if(tools == 'RNAMagnet'){
106 |   result <- RNAMagnet_function(ser)
107 | }else if(tools == 'scConnect'){
108 |   result <- scConnect_function(ser, fpath.tools, species)
109 | }else if(tools == 'scMLnet'){
110 |   result <- scMLnet_function(ser)
111 | }else if(tools == 'scSeqComm'){
112 |   result <- scSeqComm_function(ser, species)
113 | }else if(tools == 'SingleCellSignalR'){
114 |   result <- SCSR_function(ser, species)
115 | }
116 | saveRDS(result, file = paste0(fpath.tools, '/result.rds'))


--------------------------------------------------------------------------------
/Step7_LRBenchSamplingBench/Step1_DatasetRecord.R:
--------------------------------------------------------------------------------
 1 | rm(list = ls()); gc()
 2 | suppressMessages(library(tidyverse))
 3 | set.seed(123)
 4 | 
 5 | datasets <- list.files('./Data/Step6_LRBenchSamplingResult/', recursive = FALSE) %>%
 6 |   gsub('_[0-9]+', '',.) %>% unique()
 7 | ratios <- c(50, 60, 70, 80, 90, 100)
 8 | 
 9 | record_cells <- lapply(datasets, function(dataset){
10 |   tmp <- lapply(ratios, function(ratio){
11 |     if(grepl('CID', dataset) & ratio == 100){
12 |       fpath.dataset <- paste0('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/scRNA_Data/', dataset,  '_ser.rds')
13 |     }else if(grepl('CID', dataset) & ratio != 100){
14 |       fpath.dataset <- paste0('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/scRNA_Data_sampling/', dataset,'_', ratio,  '_ser.rds')
15 |     }else if(grepl('CK', dataset) & ratio == 100){
16 |       fpath.dataset <- paste0('~/1-Datasets/Nature_2022_MI/ScriptForCCC/scRNA_Data/', dataset, '_ser.rds')
17 |     }else if(grepl('CK', dataset) & ratio != 100){
18 |       fpath.dataset <- paste0('~/1-Datasets/Nature_2022_MI/ScriptForCCC/scRNA_Data_sampling/', dataset, '_', ratio , '_ser.rds')
19 |     }else if(grepl('Slide', dataset) & ratio == 100){
20 |       fpath.dataset <- paste0('~/1-Datasets/MouseEmbryo_GSE166692/ScriptForCCC/DataForCCC/', dataset, '_mm_ser.rds')
21 |     }else if(grepl('Slide', dataset) & ratio != 100){
22 |       fpath.dataset <- paste0('~/1-Datasets/MouseEmbryo_GSE166692/ScriptForCCC/scRNA_Data_Sampling/', dataset, '_', ratio , '_mm_ser.rds')
23 |     }else if(grepl('pbmc', dataset) & ratio == 100){
24 |       fpath.dataset <- paste0('~/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/DataForCCC/', dataset, '_ser.rds')
25 |     }else if(grepl('pbmc', dataset) & ratio != 100){
26 |       fpath.dataset <- paste0('~/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/scRNA_Data_sampling/', dataset, '_', ratio , '_ser.rds')
27 |     }
28 |     ser <- readRDS(fpath.dataset)
29 |     
30 |     cells <- dim(ser)[[2]]
31 |   })%>% unlist()
32 |   
33 |   tmp
34 | })
35 | names(record_cells) <- datasets
36 | record_cells <- do.call(rbind, record_cells)
37 | colnames(record_cells) <- paste0('S', ratios)
38 | record_cells <- as.data.frame(record_cells)
39 | record_cells <- tibble::rownames_to_column(record_cells, 'datasets')
40 | record_cells <- pivot_longer(record_cells, -datasets, 'SampleRate')
41 | colnames(record_cells)[3] <- 'Cells'
42 | record_cells$class <- paste(record_cells$datasets, record_cells$SampleRate, sep = '_')
43 | record_cells <- record_cells[,-c(1:2)]
44 | 
45 | 
46 | record_celltypes <- lapply(datasets, function(dataset){
47 |   tmp <- lapply(ratios, function(ratio){
48 |     if(grepl('CID', dataset) & ratio == 100){
49 |       fpath.dataset <- paste0('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/scRNA_Data/', dataset,  '_ser.rds')
50 |     }else if(grepl('CID', dataset) & ratio != 100){
51 |       fpath.dataset <- paste0('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/scRNA_Data_sampling/', dataset,'_', ratio,  '_ser.rds')
52 |     }else if(grepl('CK', dataset) & ratio == 100){
53 |       fpath.dataset <- paste0('~/1-Datasets/Nature_2022_MI/ScriptForCCC/scRNA_Data/', dataset, '_ser.rds')
54 |     }else if(grepl('CK', dataset) & ratio != 100){
55 |       fpath.dataset <- paste0('~/1-Datasets/Nature_2022_MI/ScriptForCCC/scRNA_Data_sampling/', dataset, '_', ratio , '_ser.rds')
56 |     }else if(grepl('Slide', dataset) & ratio == 100){
57 |       fpath.dataset <- paste0('~/1-Datasets/MouseEmbryo_GSE166692/ScriptForCCC/DataForCCC/', dataset, '_mm_ser.rds')
58 |     }else if(grepl('Slide', dataset) & ratio != 100){
59 |       fpath.dataset <- paste0('~/1-Datasets/MouseEmbryo_GSE166692/ScriptForCCC/scRNA_Data_Sampling/', dataset, '_', ratio , '_mm_ser.rds')
60 |     }else if(grepl('pbmc', dataset) & ratio == 100){
61 |       fpath.dataset <- paste0('~/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/DataForCCC/', dataset, '_ser.rds')
62 |     }else if(grepl('pbmc', dataset) & ratio != 100){
63 |       fpath.dataset <- paste0('~/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/scRNA_Data_sampling/', dataset, '_', ratio , '_ser.rds')
64 |     }
65 |     ser <- readRDS(fpath.dataset)
66 |     
67 |     celltype <- length(unique(ser$celltype))
68 |   })%>% unlist()
69 |   
70 |   tmp
71 | })
72 | names(record_celltypes) <- datasets
73 | record_celltypes <- do.call(rbind, record_celltypes)
74 | colnames(record_celltypes) <- paste0('S', ratios)
75 | record_celltypes <- as.data.frame(record_celltypes)
76 | record_celltypes <- tibble::rownames_to_column(record_celltypes, 'datasets')
77 | record_celltypes <- pivot_longer(record_celltypes, -datasets, 'SampleRate')
78 | colnames(record_celltypes)[3] <- 'Celltypes'
79 | record_celltypes$class <- paste(record_celltypes$datasets, record_celltypes$SampleRate, sep = '_')
80 | record_celltypes <- record_celltypes[,-c(1:2)]
81 | 
82 | record <- merge(record_cells, record_celltypes, by = 'class')
83 | record <- separate(record, class, c('datasets', 'ratio'), sep = '_')
84 | saveRDS(record, file = './Data/Step6_LRBenchSampling/DatasetsRecord.rds')
85 | 


--------------------------------------------------------------------------------
/Step5_BenchBasedCAGEProteomic/ScriptForCAGE.R:
--------------------------------------------------------------------------------
  1 | rm(list = ls());gc()
  2 | suppressMessages(library(ROCR))
  3 | suppressMessages(library(tidyverse))
  4 | source('./Script/Step5_BenchBasedCAGEProteomic/function.R')
  5 | source('./Script/Step3_MIForLRBench/function.R')
  6 | set.seed(123)
  7 | 
  8 | #process cage data 
  9 | if(F){
 10 |   celltype <- c('CD14+CD16- Monocytes', 'CD14-CD16+ Monocytes', 'CD19+ B cells', 
 11 |                 'Dendritic Monocyte Immature derived', 'Dendritic Plasmacytoid',
 12 |                 'CD4+CD25-CD45RA- memory conventional T cells', 
 13 |                 'CD4+CD25-CD45RA+ naive conventional T cells',
 14 |                 'CD8+ T cells', 'NK cells')
 15 |   cage_all <- data.table::fread('~/1-Datasets/Fantom5_CAGE/ExpressionGenes.txt', header = TRUE, sep = '\t')
 16 |   cage_all <- as.data.frame(cage_all)
 17 |   cage_all <- cage_all[ , c(1, which(colnames(cage_all) %in% celltype))]
 18 |   cage_all <- tibble::column_to_rownames(cage_all, 'ApprovedSymbol')
 19 |   colnames(cage_all) <- c('CD14Mono', 'CD16Mono', 'B', 
 20 |                           'naiveCD4T', 'memoryCD4T', 'CD8T',
 21 |                           'mDC', 'pDC', 'NK')
 22 |   cage_all <- as.matrix(cage_all)
 23 |   cage <- lapply(1:dim(cage_all)[[2]], function(i){
 24 |     x <- cage_all[,i]
 25 |     genes <- rownames(cage_all)[which(x>10)]
 26 |     paste(genes, colnames(cage_all)[i], sep = '_')
 27 |   })
 28 |   cage <- unlist(cage)
 29 |   cage_genes <- list(all = rownames(cage_all), genes_ct = cage)
 30 |   saveRDS(cage_genes, file = './Data/Step5_BenchBasedCAGEProteomic/cagedata.rds')
 31 | }
 32 | 
 33 | #calculate index
 34 | if(F){
 35 |   cage_genes <- readRDS('./Data/Step5_BenchBasedCAGEProteomic/cagedata.rds')
 36 |   
 37 |   datasets <- c('pbmc4k', 'pbmc6k', 'pbmc8k')
 38 |   
 39 |   for (data in datasets) {
 40 |     fpath <- paste0('./Data/Step1_LRPredictionResult/', data)
 41 |     result.files <- list.files(fpath, full.names = TRUE)
 42 |     result.files <- result.files[-which(grepl('cellinker', result.files))]
 43 |     result_index <- lapply(result.files, function(result.file){
 44 |       print(result.file)
 45 |       if(grepl('Domino', result.file) | grepl('scMLnet', result.file)){
 46 |         result <- readRDS(paste0(result.file, '/result1.rds'))
 47 |       }else{
 48 |         result <- readRDS(paste0(result.file, '/result.rds'))
 49 |       }
 50 |       
 51 |       result <- result$result
 52 |       
 53 |       if((dim(result)[[1]] != 0) & 'LRscore' %in% colnames(result)){
 54 |         if(grepl('RNAMagnet', result.file)){
 55 |           genes <- readRDS(paste0('~/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/', data, '_hs2mm.rds'))
 56 |           result <- mm2hs(result, genes)
 57 |         }
 58 |         
 59 |         result$receptor_ct <- lapply(1:dim(result)[[1]], function(i){
 60 |           receptor <- result$Receptor[i]
 61 |           if(grepl('&', receptor)){
 62 |             receptor <- unlist(strsplit(receptor, '&'))
 63 |           }
 64 |           receiver <- result$Receiver[i]
 65 |           tmp <- paste(receptor, receiver, sep = '_')
 66 |           label <- ifelse(all(tmp %in% cage_genes$genes_ct), TRUE, FALSE)
 67 |           if(!label){
 68 |             label <- ifelse(all(receptor %in% cage_genes$all), FALSE, NA)
 69 |           }
 70 |           label
 71 |         }) %>% unlist()
 72 |         
 73 |         result$ligand_ct <- lapply(1:dim(result)[[1]], function(i){
 74 |           ligand <- result$Ligand[i]
 75 |           if(grepl('&', ligand)){
 76 |             ligand <- unlist(strsplit(ligand, '&'))
 77 |           }
 78 |           sender <- result$Sender[i]
 79 |           tmp <- paste(ligand, sender, sep = '_')
 80 |           label <- ifelse(all(tmp %in% cage_genes$genes_ct), TRUE, FALSE)
 81 |           if(!label){
 82 |             label <- ifelse(all(ligand %in% cage_genes$all), FALSE, NA)
 83 |           }
 84 |           label
 85 |         }) %>% unlist()
 86 |         
 87 |         result <- result[which(!is.na(result$receptor_ct)), ]
 88 |         result <- result[which(!is.na(result$ligand_ct)),]
 89 |         
 90 |         result$label <- lapply(1:dim(result)[[1]], function(i){
 91 |           all(result$ligand_ct[i] & result$receptor_ct[i])
 92 |         }) %>% unlist()
 93 |         
 94 |         result$sr <- paste(result$Sender, result$Receiver, sep = '_')
 95 |         result <- result[, c('LRscore', 'label', 'sr')]
 96 |         result <- split(result, result$sr)
 97 |         
 98 |         res_index <- lapply(result, function(res){
 99 |           index <- get_evaluate_metrics(as.numeric(res$LRscore), res$label)
100 |           index <- index$perf_metrics
101 |         })
102 |         res_index <- do.call(rbind, res_index)
103 |         res_index <- as.data.frame(res_index)
104 |         
105 |       }else{
106 |         res_index <- NA
107 |       }
108 |       res_index
109 |     })
110 |     
111 |     names(result_index) <- substring(result.files, 40)
112 |     result_index[which(is.na(result_index))] <- NULL
113 |     result_index <- do.call(rbind, result_index)
114 |     result_index <- result_index[which(rowSums(result_index)!=0), ]
115 |     result_index <- tibble::rownames_to_column(result_index, 'temp')
116 |     result_index <- tidyr::separate(result_index, temp, c('methods', 'sr'), '\\.')
117 |     result_index$dataset <- data
118 |     result_index
119 |     saveRDS(result_index, file = paste0('./Data/Step5_BenchBasedCAGEProteomic/', data, '_cage.rds'))
120 |   }
121 | }
122 | 


--------------------------------------------------------------------------------
/Step1_LRPredictionResult/RunScript.R:
--------------------------------------------------------------------------------
  1 | suppressMessages(library(Seurat))
  2 | source('../Script/Step0_LRToolsFunction/Step1_LRToolsFunction.R')
  3 | set.seed(123)
  4 | 
  5 | args <- commandArgs()
  6 | tools <- args[6]
  7 | print(tools)
  8 | sampleID <- args[7]
  9 | print(sampleID)
 10 | species <- args[8]
 11 | print(species)
 12 | 
 13 | if(grepl('CID', sampleID) & tools == 'RNAMagnet'){
 14 |   fpath.dataset <- paste0('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/scRNA_Data_Mm/',
 15 |                           sampleID, '_ser.rds')
 16 |   dir.output <- paste0('~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data/Step1_LRPredictionResult/NG_BC_', sampleID)
 17 | }else if(grepl('CID', sampleID) & !(tools == 'RNAMagnet')){
 18 |   fpath.dataset <- paste0('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/scRNA_Data/',
 19 |                           sampleID, '_ser.rds')
 20 |   dir.output <- paste0('~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data/Step1_LRPredictionResult/NG_BC_', sampleID)
 21 | }else if(grepl('CK', sampleID) & tools == 'RNAMagnet'){
 22 |   fpath.dataset <- paste0('~/1-Datasets/Nature_2022_MI/ScriptForCCC/scRNA_Data_Mm/',
 23 |                           sampleID, '_ser.rds')
 24 |   dir.output <- paste0('~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data/Step1_LRPredictionResult/N_MI_', sampleID)
 25 | }else if(grepl('CK', sampleID) & !(tools == 'RNAMagnet')){
 26 |   fpath.dataset <- paste0('~/1-Datasets/Nature_2022_MI/ScriptForCCC/scRNA_Data/',
 27 |                           sampleID, '_ser.rds')
 28 |   dir.output <- paste0('~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data/Step1_LRPredictionResult/N_MI_', sampleID)
 29 | }else if(grepl('Slide', sampleID)){
 30 |   if(tools %in% c('CellPhoneDB2', 'CellPhoneDB3', 'CellTalker', 'NicheNet', 'iTALK', 'ICELLNET', 'scMLnet','NATMI', 'cell2cell')){
 31 |     fpath.dataset <- paste0('~/1-Datasets/MouseEmbryo_GSE166692/ScriptForCCC/DataForCCC/',
 32 |                             sampleID, '_hs_ser.rds')
 33 |   }else{
 34 |     fpath.dataset <- paste0('~/1-Datasets/MouseEmbryo_GSE166692/ScriptForCCC/DataForCCC/',
 35 |                             sampleID, '_mm_ser.rds')
 36 |   }
 37 |   dir.output <- paste0('~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data/Step1_LRPredictionResult/MouseEmbryo_', sampleID)
 38 | }else if(grepl('pbmc', sampleID) & !(tools == 'RNAMagnet')){
 39 |   fpath.dataset <- paste0('~/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/DataForCCC/', sampleID, '_ser.rds')
 40 |   dir.output <- paste0('~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data/Step1_LRPredictionResult/', sampleID)
 41 | }else if(grepl('pbmc', sampleID) & (tools == 'RNAMagnet')){
 42 |   fpath.dataset <- paste0('~/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/DataForCCC/', sampleID, '_mm_ser.rds')
 43 |   dir.output <- paste0('~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data/Step1_LRPredictionResult/', sampleID)
 44 | }
 45 | 
 46 | ser <- readRDS(fpath.dataset)
 47 | 
 48 | if(!dir.exists(dir.output)){
 49 |   dir.create(dir.output)
 50 | }
 51 | 
 52 | fpath.tools <- paste(dir.output, tools, sep = '/')
 53 | if(!dir.exists(fpath.tools)){
 54 |   dir.create(fpath.tools)
 55 | }
 56 | 
 57 | if(tools == 'cell2cell'){
 58 |   result <- cell2cell_function(ser, fpath = fpath.tools, species = species)
 59 | }else if(tools == 'CellCall'){
 60 |   if(grepl('CK', sampleID) | grepl('pbmc', sampleID)){
 61 |     n <- 2
 62 |   }else if(grepl('CID', sampleID)){
 63 |     n <- 3
 64 |   }else if(grepl('Slide', sampleID)){
 65 |     n <- 5
 66 |   }
 67 |   result <- CellCall_function(ser, n, species = species)
 68 | }else if(tools == 'CellChat'){
 69 |   result <- CellChat_function(ser, species = species)
 70 | }else if(tools == 'CellPhoneDB2'){
 71 |   result <- CellPhoneDB2_function(ser, fpath.tools)
 72 | }else if(tools == 'CellPhoneDB3'){
 73 |   result <- CellPhoneDB3_function(ser, fpath.tools)
 74 | }else if(tools == 'CellTalker'){
 75 |   result <- CellTalker_function(ser)
 76 | }else if(tools == 'Connectome'){
 77 |   result <- Connectome_function(ser, species = species)
 78 | }else if(tools == 'CytoTalk'){
 79 |   result <- CytoTalk_function(ser, fpath.tools, species = species)
 80 | }else if(tools == 'Domino'){
 81 |   if(sampleID %in% c('pbmc4k', 'pbmc8k')){
 82 |     species <- 'hg38'
 83 |   }else if(sampleID == 'pbmc6k'){
 84 |     species <- 'hg19'
 85 |   }else if(grepl('Slide' ,sampleID)){
 86 |     species <- 'mm10'
 87 |   }else{
 88 |     species <- 'hg38'
 89 |   }
 90 |   result <- Domino_function(ser, fpath.tools, species = species)
 91 | }else if(tools == 'ICELLNET'){
 92 |   result <- ICELLNET_function(ser)
 93 | }else if(tools == 'iTALK'){
 94 |   result <- iTALK_function(ser)
 95 | }else if(tools == 'NATMI'){
 96 |   result <- NATMI_function(ser, fpath.tools, species = species)
 97 | }else if(tools == 'NicheNet'){
 98 |   result <- NicheNet_function(ser, lr = TRUE)
 99 | }else if(tools == 'PyMINEr'){
100 |   result <- PyMINEr_function(ser, fpath.tools, species)
101 | }else if(tools == 'RNAMagnet'){
102 |   result <- RNAMagnet_function(ser)
103 | }else if(tools == 'scConnect'){
104 |   result <- scConnect_function(ser, fpath.tools, species)
105 | }else if(tools == 'scMLnet'){
106 |   result <- scMLnet_function(ser)
107 | }else if(tools == 'scSeqComm'){
108 |   result <- scSeqComm_function(ser, species)
109 | }else if(tools == 'SingleCellSignalR'){
110 |   result <- SCSR_function(ser, species)
111 | }
112 | 
113 | saveRDS(result, file = paste0(fpath.tools, '/result.rds'))


--------------------------------------------------------------------------------
/Step9_LRTBench/Step1_ScriptForBench.R:
--------------------------------------------------------------------------------
  1 | rm(list = ls());gc()
  2 | setwd('~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data/Step8_LRTPredictionResult/')
  3 | library(dplyr)
  4 | library(ROCR)
  5 | library(ggplot2)
  6 | library(ggsci)
  7 | library(ggpubr)
  8 | library(tidyverse)
  9 | source('../../Script/Step5_BenchBasedCAGEProteomic/function.R')
 10 | set.seed(123)
 11 | 
 12 | 
 13 | ###########
 14 | ## color ##
 15 | ###########
 16 | 
 17 | scales::show_col(pal_aaas(palette = "default", alpha = 0.6)(10))
 18 | mycolors_aaas <- pal_aaas(palette = "default", alpha = 0.6)(10)
 19 | 
 20 | mycolor_software <- mycolors_aaas[c(1:5)]
 21 | names(mycolor_software) <- c('CytoTalk', "NicheNet", "MISTy", "HoloNet", "stMLnet")
 22 | scales::show_col(mycolor_software)
 23 | 
 24 | #####################
 25 | ## load all result ##
 26 | #####################
 27 | 
 28 | if(F){
 29 |   data_dirs <- list.files('.', full.names = TRUE)
 30 |   all_result <- lapply(data_dirs, function(data_dir){
 31 |     print(data_dir)
 32 |     methods_dirs <- list.files(data_dir, full.names = TRUE)
 33 |     methods <- list.files(data_dir)
 34 |     used_methods <- c('CytoTalk', 'HoloNet', 'MISTy', 'NicheNet', 'stMLnet')
 35 |     index <- which(methods %in% used_methods)
 36 |     methods_dirs <- methods_dirs[index]
 37 |     methods <- methods[index]
 38 |     data_result <- lapply(seq(methods_dirs), function(i){
 39 |       print(i)
 40 |       res_files <- list.files(methods_dirs[i], '_result.rds', full.names = TRUE)
 41 |       if(length(res_files)>0){
 42 |         res <- lapply(res_files, function(res_file){
 43 |           res_temp <- readRDS(res_file)
 44 |           if(dim(res_temp)[[1]] != 0){
 45 |             res_temp$methods <- methods[i]
 46 |           }else{
 47 |             res_temp <- NA
 48 |           }
 49 |           res_temp
 50 |         })
 51 |         names(res) <- list.files(methods_dirs[i], '_result.rds') %>% gsub('_result.rds', '', .)
 52 |         res[which(is.na(res))] <- NULL
 53 |       }else{
 54 |         res <- NA
 55 |       }
 56 |       res
 57 |     })
 58 |     names(data_result) <- methods
 59 |     data_result[which(is.na(data_result))] <- NULL
 60 |     data_result
 61 |   })
 62 |   names(all_result) <- list.files('.')
 63 |   saveRDS(all_result, file = 'all_result.rds')
 64 | }
 65 | 
 66 | setwd('~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data')
 67 | source('../Script/Step9_LRTBench/function.R')
 68 | all_result <- readRDS('./Step8_LRTPredictionResult/all_result.rds')
 69 | 
 70 | ###############
 71 | ## Calculate ##
 72 | ###############
 73 | 
 74 | result_index <- lapply(names(all_result), function(data){
 75 |   print(data)
 76 |   data_res <- all_result[[data]]
 77 |   
 78 |   if(grepl('CID', data)){
 79 |     
 80 |     ## groundtrue
 81 |     degs_ls <- readRDS('./Step0_SharedInfo/LRTBench/BC_celline/degs_ls_p.rds')
 82 |     
 83 |     methods_res <- lapply(data_res, function(x){x$CancerEpithelial})
 84 |     methods_res[unlist(lapply(methods_res, is.null))] <- NULL
 85 |     
 86 |     result <- getallmetrics(methods_res, degs_ls)
 87 |     result$celltype <- 'CancerEpithelial'
 88 |     
 89 |     result1 <- getallrecord(methods_res, degs_ls)
 90 |     saveRDS(result1, file = paste0('./Step8_LRTBenchResult/record/', data, '.rds'))
 91 |     
 92 |   }else if(grepl('control', data)){
 93 |     
 94 |     ## groundtrue
 95 |     degs_ls <- readRDS('~/1-Datasets/CellLinesDatasets/ProjectForLRTBench/result/degs_ls_p.rds')
 96 |     used <- which(grepl('GSE206947', names(degs_ls)) | 
 97 |                     grepl('GSE181575', names(degs_ls)) | 
 98 |                     grepl('GSE123018', names(degs_ls)))
 99 |     degs_ls_used <- degs_ls[used]
100 |     
101 |     methods_res <- lapply(data_res, function(x){x$Fibroblast})
102 |     methods_res[unlist(lapply(methods_res, is.null))] <- NULL
103 |     
104 |     result <- getallmetrics(methods_res, degs_ls_used)
105 |     result$celltype <- 'Fibroblast'
106 |     
107 |     result1 <- getallrecord(methods_res, degs_ls_used)
108 |     saveRDS(result1, file = paste0('./Step8_LRTBenchResult/record/', data, '.rds'))
109 |     
110 |   }else if(grepl('UKF', data)){
111 |     
112 |     ## groundtrue
113 |     degs_ls <- readRDS('~/1-Datasets/CellLinesDatasets/ProjectForLRTBench/result/degs_ls_p.rds')
114 |     
115 |     methods_res_macro <- lapply(data_res, function(x){x$macrophages})
116 |     methods_res_macro[unlist(lapply(methods_res_macro, is.null))] <- NULL
117 |     
118 |     methods_res_mal <- lapply(data_res, function(x){x$Malignant})
119 |     methods_res_mal[unlist(lapply(methods_res_mal, is.null))] <- NULL
120 |     
121 |     used <- which(grepl('GSE69104', names(degs_ls)) & grepl('TAM', names(degs_ls)))
122 |     degs_ls_used <- degs_ls[used]
123 |     result_macro <- getallmetrics(methods_res_macro, degs_ls_used)
124 |     result_macro1 <- getallrecord(methods_res_macro, degs_ls_used)
125 |     saveRDS(result_macro1, file = paste0('./Step8_LRTBenchResult/record/', data, '_macro.rds'))
126 |     
127 |     used <- which(grepl('GSE140145', names(degs_ls))|
128 |                     grepl('GSE116414', names(degs_ls)))
129 |     degs_ls_used <- degs_ls[used]
130 |     result_mal <- getallmetrics(methods_res_mal, degs_ls_used)
131 |     result_mal1 <- getallrecord(methods_res_mal, degs_ls_used)
132 |     saveRDS(result_mal1, file = paste0('./Step8_LRTBenchResult/record/', data, '_mal.rds'))
133 |     
134 |     result_macro$celltype <- 'macrophages'
135 |     result_mal$celltype <- 'malignant'
136 |     result <- rbind(result_macro, result_mal)
137 |     
138 |   }
139 |   result
140 | })
141 | names(result_index) <- names(all_result)
142 | saveRDS(result_index,file = './Step8_LRTBenchResult/result_index.rds')
143 | 


--------------------------------------------------------------------------------
/Step4_SIRSIForLRBench/function.R:
--------------------------------------------------------------------------------
  1 | IDTransform <- function(data, method, result){
  2 |   if(dim(result)[1]!=0){
  3 |     if(grepl('CID', data) & method == 'RNAMagnet'){
  4 |       genes <- readRDS('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/hs2mm_genes.rds')
  5 |       result <- mm2hs(result, genes)
  6 |     }else if(grepl('CK', data) & method == 'RNAMagnet'){
  7 |       genes <- readRDS('~/1-Datasets/Nature_2022_MI/ScriptForCCC/hs2mm_genes.rds')
  8 |       result <- mm2hs(result, genes)
  9 |     }else if(grepl('Slide', data) & 
 10 |              method %in% c('CellPhoneDB2', 'CellPhoneDB3', 'CellTalker', 'NATMI', 
 11 |                            'ICELLNET', 'NicheNet', 'scMLnet', 'iTALK', 'cell2cell')){
 12 |       genes <- readRDS('~/1-Datasets/MouseEmbryo_GSE166692/sciSpaceData/mm2hs.rds')
 13 |       result <- hs2mm(result, genes)
 14 |     }
 15 |   }
 16 |   return(result)
 17 | }
 18 | 
 19 | CalSI_1 <- function(a, b) {
 20 |   intersection = length(intersect(unique(a), unique(b)))
 21 |   #union = length(a) + length(b) - intersection
 22 |   union <- min(length(unique(a)), length(unique(b)))
 23 |   return (intersection/union)
 24 | }
 25 | 
 26 | CalSI_2 <- function(data){
 27 |   methods <- c('CellPhoneDB2', 'CellPhoneDB3',	'CellTalker',	'Connectome',
 28 |                'NATMI',	'ICELLNET',	'scConnect', 'CellChat', 'SingleCellSignalR',
 29 |                'CellCall',	'scSeqComm',	'NicheNet', 'Domino',	'scMLnet', 'iTALK',	
 30 |                'cell2cell', 'RNAMagnet', 'PyMINEr', 'CytoTalk')
 31 |   
 32 |   # get result path
 33 |   if(grepl('CID', data)){
 34 |     result.path <- paste0('./Data/Step1_LRPredictionResult/NG_BC_', data)
 35 |   }else if(grepl('CK', data)){
 36 |     result.path <- paste0('./Data/Step1_LRPredictionResult/N_MI_', data)
 37 |   }else if(grepl('Slide', data)){
 38 |     result.path <- paste0('./Data/Step1_LRPredictionResult/MouseEmbryo_', data)
 39 |   }
 40 |   
 41 |   SI_res <- lapply(methods, function(method1){
 42 |     print(method1)
 43 |     if(method1 == 'scMLnet' | method1 == 'Domino'){
 44 |       result1 <- readRDS(paste0(result.path, '/', method1, '/result1.rds'))
 45 |     }else{
 46 |       result1 <- readRDS(paste0(result.path, '/', method1, '/result.rds'))
 47 |     }
 48 |     
 49 |     result1 <- result1$result
 50 |     result1 <- IDTransform(data, method1, result1)
 51 |     temp.si <- lapply(methods, function(method2){
 52 |       if(method2 == 'scMLnet' | method2 == 'Domino'){
 53 |         result2 <- readRDS(paste0(result.path, '/', method2, '/result1.rds'))
 54 |       }else{
 55 |         result2 <- readRDS(paste0(result.path, '/', method2, '/result.rds'))
 56 |       }
 57 |       
 58 |       result2 <- result2$result
 59 |       result2 <- IDTransform(data, method2, result2)
 60 |       if(dim(result1)[1]==0 | dim(result2)[1]==0){
 61 |         tmp.si <- NA
 62 |       }else{
 63 |         tmp.si <- CalSI_1(as.character(result1$all), as.character(result2$all))
 64 |       }
 65 |       
 66 |       tmp.si
 67 |     })
 68 |     temp.si <- data.frame(SI = unlist(temp.si), row.names = methods)
 69 |     temp.si
 70 |   })
 71 |   SI_res <- do.call(cbind, SI_res)
 72 |   colnames(SI_res) <- methods
 73 |   #SI_res <- SI_res[!apply(SI_res, 1, function(x){all(is.na(x))}), ]
 74 |   #SI_res <- SI_res[, !apply(SI_res, 2, function(x){all(is.na(x))})]
 75 |   return(SI_res)
 76 | }
 77 | 
 78 | CalRSI <- function(data){
 79 |   # remove cell2cell
 80 |   methods <- c('CellPhoneDB2', 'CellPhoneDB3',	'CellTalker',	'Connectome',
 81 |                'NATMI',	'ICELLNET',	'scConnect', 'CellChat', 'SingleCellSignalR',
 82 |                'CellCall',	'scSeqComm',	'NicheNet', 'Domino',	'scMLnet', 'iTALK',	
 83 |                'cell2cell', 'RNAMagnet', 'PyMINEr', 'CytoTalk')
 84 |   
 85 |   # get result path
 86 |   if(grepl('CID', data)){
 87 |     result.path <- paste0('./Data/Step1_LRPredictionResult/NG_BC_', data)
 88 |   }else if(grepl('CK', data)){
 89 |     result.path <- paste0('./Data/Step1_LRPredictionResult/N_MI_', data)
 90 |   }else if(grepl('Slide', data)){
 91 |     result.path <- paste0('./Data/Step1_LRPredictionResult/MouseEmbryo_', data)
 92 |   }
 93 |   
 94 |   RSI_res <- lapply(methods, function(method1){
 95 |     print(paste0('Method1: ', method1))
 96 |     if(method1 == 'scMLnet' | method1 == 'Domino'){
 97 |       result1 <- readRDS(paste0(result.path, '/', method1, '/result1.rds')) # recode the LR score
 98 |     }else{
 99 |       result1 <- readRDS(paste0(result.path, '/', method1, '/result.rds'))
100 |     }
101 |     
102 |     result1 <- as.data.frame(result1$result)
103 |     result1$LRscore <- as.numeric(result1$LRscore)
104 |     result1 <- IDTransform(data, method1, result1)
105 |     if(method1 == 'CytoTalk'){
106 |       result1$rank <- rank(result1$LRscore)
107 |     }else{
108 |       result1$rank <- rank(-result1$LRscore)
109 |     }
110 |     
111 |     temp.rsi <- lapply(methods, function(method2){
112 |       print(paste0('Method2: ', method2))
113 |       if(method2 == 'scMLnet' | method2 == 'Domino'){
114 |         result2 <- readRDS(paste0(result.path, '/', method2, '/result1.rds'))
115 |       }else{
116 |         result2 <- readRDS(paste0(result.path, '/', method2, '/result.rds'))
117 |       }
118 |       
119 |       result2 <- as.data.frame(result2$result)
120 |       result2$LRscore <- as.numeric(result2$LRscore)
121 |       result2 <- IDTransform(data, method2, result2)
122 |       if(method2 == 'CytoTalk'){
123 |         result2$rank <- rank(result2$LRscore)
124 |       }else{
125 |         result2$rank <- rank(-result2$LRscore)
126 |       }
127 |       
128 |       if(dim(result1)[1]==0 | dim(result2)[1]==0){
129 |         tmp.rsi <- NA
130 |       }else{
131 |         overlap.lr <- intersect(result1$all, result2$all)
132 |         if(length(overlap.lr)!=0){
133 |           rank1 <- result1[which(result1$all %in% overlap.lr),]$rank/dim(result1)[1]
134 |           rank2 <- result2[which(result2$all %in% overlap.lr), ]$rank/dim(result2)[1]
135 |           mean.rank <- mean(abs(rank1-rank2))
136 |           tmp.rsi <- 1-mean.rank
137 |         }else{tmp.rsi <- 0}
138 |         }
139 |       tmp.rsi
140 |     })
141 |     temp.rsi <- data.frame(RSI = unlist(temp.rsi), row.names = methods)
142 |     temp.rsi
143 |   })
144 |   RSI_res <- do.call(cbind, RSI_res)
145 |   colnames(RSI_res) <- methods
146 |   #RSI_res <- RSI_res[!apply(RSI_res, 1, function(x){all(is.na(x))}), ]
147 |   #RSI_res <- RSI_res[, !apply(RSI_res, 2, function(x){all(is.na(x))})]
148 |   
149 |   return(RSI_res)
150 | }
151 | 


--------------------------------------------------------------------------------
/Step5_BenchBasedCAGEProteomic/ScriptForProteomics.R:
--------------------------------------------------------------------------------
  1 | rm(list = ls());gc()
  2 | suppressMessages(library(data.table))
  3 | suppressMessages(library(tidyverse))
  4 | suppressMessages(library(ROCR))
  5 | source('./Script/Step3_MIForLRBench/function.R')
  6 | source('./Script/Step8_BenchBasedCAGEProteomic/function.R')
  7 | set.seed(123)
  8 | 
  9 | # process proteomics data
 10 | if(F){
 11 |   proteinGroup <- fread('~/1-Datasets/Proteomics_28/Data/proteinGroups.txt')
 12 |   pep <- proteinGroup[, c(7,549:816)]
 13 |   rm(proteinGroup); gc()
 14 |   
 15 |   pep <- pep[which(pep$Gene.names != ''), ]
 16 |   # pep
 17 |   if(F){
 18 |     colnames(pep) <- gsub('Peptides.', '', colnames(pep))
 19 |     pep <- as.data.frame(pep)
 20 |     pep <- pep[, c(1, which(grepl('steady-state', colnames(pep))))]
 21 |     pep <- pep[, which(!grepl('Library_single', colnames(pep)))]
 22 |     colnames(pep) <- gsub(' \\(steady-state\\)', '', colnames(pep))
 23 |     
 24 |     colnames(pep)[1] <- 'genes'
 25 |     pep <- tidyr::separate_rows(pep, genes, sep = ';')
 26 |     pep <- aggregate(.~genes, data = pep, FUN=mean) %>%
 27 |       tibble::column_to_rownames(., 'genes') %>% 
 28 |       as.matrix() %>% t() %>% as.data.frame()
 29 |     
 30 |     pep$celltype <- gsub('_[0-9]+', '', rownames(pep))
 31 |     pep <- aggregate(.~celltype, data = pep, FUN = mean) %>%
 32 |       tibble::column_to_rownames(., 'celltype') %>%
 33 |       as.matrix() %>% t() %>% as.data.frame()
 34 |     
 35 |     celltype <- c('MO.classical', 'MO.nonclassical', 'mDC', 'pDC',
 36 |                   'B.naive', 'B.memory',
 37 |                   'T8.naive', 'T8.CM', 'T8.EM', 'T8.EMRA',
 38 |                   'T4.CM', 'T4.EM', 'T4.naive',
 39 |                   'NK.dim', 'NK.bright', 'Thrombocyte'
 40 |     )
 41 |     
 42 |     pep <- pep[ , which(colnames(pep) %in% celltype)]
 43 |     
 44 |     pep <- as.matrix(pep) %>% t() %>% as.data.frame()
 45 |     pep$celltype <- c('B', 'B', 'mDC', 'CD14Mono', 'CD16Mono',
 46 |                       'NK', 'NK', 'pDC', 'memoryCD4T', 'memoryCD4T',
 47 |                       'naiveCD4T', 'CD8T', 'CD8T', 'CD8T', 'CD8T', 'Platelet')
 48 |     pep <- aggregate(.~celltype, data = pep, FUN = mean) %>%
 49 |       tibble::column_to_rownames(., 'celltype') %>%
 50 |       as.matrix() %>% t() %>% as.data.frame()
 51 |     
 52 |     pep_genes <- lapply(1:dim(pep)[[2]], function(i){
 53 |       x <- pep[,i]
 54 |       genes <- rownames(pep)[which(x>=2)]
 55 |       paste(genes, colnames(pep)[i], sep = '_')
 56 |     })
 57 |     pep_genes <- unlist(pep_genes)
 58 |     pep_genes_list <- list(all = rownames(pep), genes_ct = pep_genes)
 59 |     saveRDS(pep_genes_list, 
 60 |             file = '~/2-CCC-benchmark-202204-202208/CCC-benchmark-202212/Data/Step5_BenchBasedCAGEProteomic/pepdata.rds')
 61 |   }
 62 | }
 63 | 
 64 | rm(list = ls());gc()
 65 | #calculate index -- peptides
 66 | if(F){
 67 |   pep_genes <- readRDS('./Data/Step8_BenchBasedCAGEProteomic/pepdata.rds')
 68 |   
 69 |   datasets <- c('pbmc4k', 'pbmc6k', 'pbmc8k')
 70 |   
 71 |   for (data in datasets) {
 72 |     fpath <- paste0('./Data/Step1_LRPredictionResult/', data)
 73 |     result.files <- list.files(fpath, full.names = TRUE)
 74 |     result.files <- result.files[-which(grepl('cellinker', result.files))]
 75 |     result_index <- lapply(result.files, function(result.file){
 76 |       print(result.file)
 77 |       if(grepl('scMLnet', result.file) | grepl('Domino', result.file)){
 78 |         result <- readRDS(paste0(result.file, '/result1.rds'))
 79 |       }else{
 80 |         result <- readRDS(paste0(result.file, '/result.rds'))
 81 |       }
 82 |       result <- result$result
 83 |       
 84 |       if((dim(result)[[1]] != 0) & 'LRscore' %in% colnames(result)){
 85 |         if(grepl('RNAMagnet', result.file)){
 86 |           genes <- readRDS(paste0('~/1-Datasets/10XGenomics_PBMC/ScriptForCellTypeAnn/', data, '_hs2mm.rds'))
 87 |           result <- mm2hs(result, genes)
 88 |         }
 89 |         
 90 |         result$receptor_ct <- lapply(1:dim(result)[[1]], function(i){
 91 |           receptor <- result$Receptor[i]
 92 |           if(grepl('&', receptor)){
 93 |             receptor <- unlist(strsplit(receptor, '&'))
 94 |           }
 95 |           receiver <- result$Receiver[i]
 96 |           tmp <- paste(receptor, receiver, sep = '_')
 97 |           label <- ifelse(all(tmp %in% pep_genes$genes_ct), TRUE, FALSE)
 98 |           if(!label){
 99 |             label <- ifelse(all(receptor %in% pep_genes$all), FALSE, NA)
100 |           }
101 |           label
102 |         }) %>% unlist()
103 |         
104 |         result$ligand_ct <- lapply(1:dim(result)[[1]], function(i){
105 |           ligand <- result$Ligand[i]
106 |           if(grepl('&', ligand)){
107 |             ligand <- unlist(strsplit(ligand, '&'))
108 |           }
109 |           sender <- result$Sender[i]
110 |           tmp <- paste(ligand, sender, sep = '_')
111 |           label <- ifelse(all(tmp %in% pep_genes$genes_ct), TRUE, FALSE)
112 |           if(!label){
113 |             label <- ifelse(all(ligand %in% pep_genes$all), FALSE, NA)
114 |           }
115 |           label
116 |         }) %>% unlist()
117 |         
118 |         result <- result[which(!is.na(result$receptor_ct)), ]
119 |         result <- result[which(!is.na(result$ligand_ct)),]
120 |         
121 |         result$label <- lapply(1:dim(result)[[1]], function(i){
122 |           all(result$ligand_ct[i] & result$receptor_ct[i])
123 |         }) %>% unlist()
124 |         
125 |         result$sr <- paste(result$Sender, result$Receiver, sep = '_')
126 |         result <- result[, c('LRscore', 'label', 'sr')]
127 |         result <- split(result, result$sr)
128 |         
129 |         res_index <- lapply(result, function(res){
130 |           index <- get_evaluate_metrics(as.numeric(res$LRscore), res$label)
131 |           index <- index$perf_metrics
132 |         })
133 |         res_index <- do.call(rbind, res_index)
134 |         res_index <- as.data.frame(res_index)
135 |         
136 |       }else{
137 |         res_index <- NA
138 |       }
139 |       res_index
140 |     })
141 |     
142 |     names(result_index) <- substring(result.files, 40)
143 |     result_index[which(is.na(result_index))] <- NULL
144 |     result_index <- do.call(rbind, result_index)
145 |     result_index <- result_index[which(rowSums(result_index)!=0), ]
146 |     result_index <- tibble::rownames_to_column(result_index, 'temp')
147 |     result_index <- tidyr::separate(result_index, temp, c('methods', 'sr'), '\\.')
148 |     result_index$dataset <- data
149 |     result_index
150 |     saveRDS(result_index, file = paste0('./Data/Step5_BenchBasedCAGEProteomic/', data, '_pep.rds'))
151 |   }
152 | }
153 | 


--------------------------------------------------------------------------------
/Step9_LRTBench/function.R:
--------------------------------------------------------------------------------
  1 | getallmetrics <- function(methods_res, degs_ls){
  2 |   result <- lapply(methods_res, function(res){
  3 |     degs.tmp <- lapply(names(degs_ls), function(degs_names){
  4 |       
  5 |       key <- strsplit(degs_names,'_')[[1]][2]
  6 |       type <- ifelse(grepl('KO',degs_names),'Receptor','Ligand')
  7 |       
  8 |       res$regulon <- as.character(res$regulon)
  9 |       res$target <- as.character(res$target)
 10 |       res$value <- as.numeric(res$value)
 11 |       
 12 |       if(grepl('NicheNet', res$methods[1]) & type == 'Receptor'){
 13 |         if(key == 'AXL'){
 14 |           score <- res[which(res$regulon=='GAS6'),]
 15 |         }else if(key == 'CXCR4'){
 16 |           score <- res[which(res$regulon=='CXCL12'),]
 17 |         }else if(key == 'NRP1'){
 18 |           score <- res[which(res$regulon=='VEGFA'),]
 19 |         }else if(key == 'CSF1R'){
 20 |           score <- res[which(res$regulon=='CSF1'),]
 21 |         }else if(key == 'FGFR1'){
 22 |           score <- res[which(res$regulon=='FGF1'),]
 23 |         }else if(key == 'ALK'){
 24 |           score <- res[which(res$regulon %in% c('ALKAL1', 'ALKAL2')),]
 25 |           if(dim(score)[[1]]!=0){
 26 |             score <- aggregate(value~target, score, sum)
 27 |           }
 28 |         }
 29 |       }else{
 30 |         score <- res %>% filter(regulon == key & type == type)
 31 |       }     
 32 |       
 33 |       if(grepl('GSE181575',degs_names)){
 34 |         neg.genes <- setdiff(score$target, names(degs_ls[[degs_names]]))
 35 |         isDEGs <- rep(FALSE, length(neg.genes))
 36 |         names(isDEGs) <- neg.genes
 37 |         degs_ls_used <- append(degs_ls[[degs_names]], isDEGs)
 38 |       }else{
 39 |         degs_ls_used <- degs_ls[[degs_names]]
 40 |       }
 41 |       
 42 |       genes <- intersect(score$target, names(degs_ls_used))
 43 |       label <- degs_ls_used[genes]
 44 |       pred <- score$value[match(genes,score$target)]
 45 |       res.tmp <- get_evaluate_metrics(pred,label)
 46 |       res.tmp <- res.tmp$perf_metrics
 47 |       
 48 |       if(length(names(table(label))) == 2){
 49 |         AUPRCRatios <- res.tmp[2]/(as.numeric(table(label)['TRUE'])/length(label))
 50 |         names(AUPRCRatios) <- 'AUPRCRatios'
 51 |       }else{
 52 |         AUPRCRatios <- NA
 53 |         names(AUPRCRatios) <- 'AUPRCRatios'
 54 |       }
 55 |       res.tmp <- append(res.tmp, AUPRCRatios)
 56 |       res.tmp
 57 |       
 58 |     })
 59 |     names(degs.tmp) <- names(degs_ls)
 60 |     degs.tmp <- do.call(rbind, degs.tmp) %>% 
 61 |       as.data.frame() %>% 
 62 |       tibble::rownames_to_column(., 'celllines')
 63 |     degs.tmp
 64 |   })
 65 |   result <- do.call(rbind, result) %>% as.data.frame() %>% 
 66 |     tibble::rownames_to_column(., 'methods')
 67 |   result$methods <- gsub('\\.[0-9]+', '', result$methods)
 68 |   return(result)
 69 | }
 70 | 
 71 | getallrecord <- function(methods_res, degs_ls){
 72 |   result1 <- lapply(methods_res, function(res){
 73 |     degs.tmp <- lapply(names(degs_ls), function(degs_names){
 74 |       
 75 |       key <- strsplit(degs_names,'_')[[1]][2]
 76 |       type <- ifelse(grepl('KO',degs_names),'Receptor','Ligand')
 77 |       
 78 |       res$regulon <- as.character(res$regulon)
 79 |       res$target <- as.character(res$target)
 80 |       res$value <- as.numeric(res$value)
 81 |       
 82 |       if(grepl('NicheNet', res$methods[1]) & type == 'Receptor'){
 83 |         if(key == 'AXL'){
 84 |           score <- res[which(res$regulon=='GAS6'),]
 85 |         }else if(key == 'CXCR4'){
 86 |           score <- res[which(res$regulon=='CXCL12'),]
 87 |         }else if(key == 'NRP1'){
 88 |           score <- res[which(res$regulon=='VEGFA'),]
 89 |         }else if(key == 'CSF1R'){
 90 |           score <- res[which(res$regulon=='CSF1'),]
 91 |         }else if(key == 'FGFR1'){
 92 |           score <- res[which(res$regulon=='FGF1'),]
 93 |         }else if(key == 'ALK'){
 94 |           score <- res[which(res$regulon %in% c('ALKAL1', 'ALKAL2')),]
 95 |           if(dim(score)[[1]]!=0){
 96 |             score <- aggregate(value~target, score, sum)
 97 |           }
 98 |         }
 99 |       }else{
100 |         score <- res %>% filter(regulon == key & type == type)
101 |       }     
102 |       
103 |       if(grepl('GSE181575',degs_names)){
104 |         neg.genes <- setdiff(score$target, names(degs_ls[[degs_names]]))
105 |         isDEGs <- rep(FALSE, length(neg.genes))
106 |         names(isDEGs) <- neg.genes
107 |         degs_ls_used <- append(degs_ls[[degs_names]], isDEGs)
108 |       }else{
109 |         degs_ls_used <- degs_ls[[degs_names]]
110 |       }
111 |       
112 |       genes <- intersect(score$target, names(degs_ls_used))
113 |       label <- degs_ls_used[genes]
114 |       pred <- score$value[match(genes,score$target)]
115 |       
116 |       res.tmp <- cbind(label, pred) %>% as.data.frame() %>% tibble::rownames_to_column(., 'genes')
117 |     })
118 |     names(degs.tmp) <- names(degs_ls)
119 |     degs.tmp
120 |   })
121 |   return(result1)
122 | }
123 | 
124 | RunTimeMemRecord <- function(wd, data, methods){
125 |   # get result path
126 |   result.path <- paste0(wd, data)
127 |   
128 |   dataset_Record <- lapply(methods, function(method){
129 |     print(method)
130 |     
131 |     record <- read.table(paste0(result.path, '/', method, '/', 'TimeMemRecord.txt'),
132 |                          header = FALSE, sep = '\t', fill = TRUE)
133 |     record <- record[, 2, drop = FALSE]
134 |     if(dim(record)[1]==24){
135 |       record <- record[-1, , drop = FALSE]
136 |     }
137 |     record <- record[c(2,3,4,5,10),, drop = FALSE]
138 |     
139 |     
140 |     # Linux_time: user+system time
141 |     user_time <- as.numeric(gsub('User time \\(seconds\\): ', '', record[1,]))
142 |     sys_time <- as.numeric(gsub('System time \\(seconds\\): ', '', record[2,]))
143 |     time_linux <- user_time+sys_time
144 |     
145 |     # Clock_time
146 |     clock_time <- gsub('Elapsed \\(wall clock\\) time \\(h:mm:ss or m:ss\\): ', '', record[4,])
147 |     if(stringr::str_count(clock_time, ':') == 1){
148 |       min_time <- as.numeric(unlist(stringr::str_split(clock_time, ':'))[1])
149 |       min_time <- min_time*60
150 |       sec_time <- as.numeric(unlist(stringr::str_split(clock_time, ':'))[2])
151 |       clock_time <- min_time+sec_time; rm(sec_time, min_time)
152 |     }else if(stringr::str_count(clock_time, ':') == 2){
153 |       h_time <- as.numeric(unlist(stringr::str_split(clock_time, ':'))[1])
154 |       h_time <- h_time*60*60
155 |       min_time <- as.numeric(unlist(stringr::str_split(clock_time, ':'))[2])
156 |       min_time <- min_time*60
157 |       sec_time <- as.numeric(unlist(stringr::str_split(clock_time, ':'))[3])
158 |       clock_time <- h_time+min_time+sec_time; rm(sec_time, min_time, h_time)
159 |     }
160 |     
161 |     # Max memory usage | CPU_used
162 |     mem_linux <- gsub('Maximum resident set size \\(kbytes\\): ', '', record[5,]) 
163 |     mem_linux <- round(as.numeric(mem_linux)/1024/1024, 2)
164 |     cpu_linux <- gsub('Percent of CPU this job got: ', '', record[3,])
165 |     
166 |     return(data.frame(clock_time = clock_time, linux_time = time_linux, 
167 |                       max_memory = mem_linux, cpu_linux = cpu_linux))
168 |   })
169 |   names(dataset_Record) <- methods
170 |   return(dataset_Record)
171 | }


--------------------------------------------------------------------------------
/Step3_MIForLRBench/function.R:
--------------------------------------------------------------------------------
  1 | EvaIndex1_1 <- function(CloDist, norm.data, result, cp, choose, logic){
  2 |   result.tmp <- result[which(result$sr == cp), ]
  3 |   if(!logic){
  4 |     cp.1 <- unlist(stringr::str_split(cp, "_"))[1]
  5 |     cp.2 <- unlist(stringr::str_split(cp, "_"))[2]
  6 |     cp <- paste(cp.2, cp.1, sep = "_")
  7 |   }
  8 |   
  9 |   if(choose == "close"){
 10 |     distant.tmp <- CloDist[[cp]]$close
 11 |   }else if(choose == 'distant'){
 12 |     distant.tmp <- CloDist[[cp]]$distant
 13 |   }
 14 |   
 15 |   tmp.result <- lapply(1:dim(result.tmp)[1], function(n){
 16 |     #print(n)
 17 |     ligands <- result.tmp[n,"Ligand"]
 18 |     ligands <- unlist(stringr::str_split(ligands, "&"))
 19 |     
 20 |     receptors <- result.tmp[n, "Receptor"]
 21 |     receptors <- unlist(stringr::str_split(receptors, "&"))
 22 |     
 23 |     tmp.lr <- expand.grid(ligands, receptors)
 24 |     colnames(tmp.lr) <- c('ligand', 'receptor')
 25 |     
 26 |     temp.result <- lapply(1:dim(tmp.lr)[1], function(m){
 27 |       ligand <- tmp.lr[m, "ligand"]
 28 |       receptor <- tmp.lr[m, "receptor"]
 29 |       
 30 |       if((ligand %in% rownames(norm.data)) & (receptor %in% rownames(norm.data))){
 31 |         if(logic){
 32 |           value.1 <- norm.data[ligand, distant.tmp$cell.1]
 33 |           value.2 <- norm.data[receptor, distant.tmp$cell.2]
 34 |         }else{
 35 |           value.1 <- norm.data[ligand, distant.tmp$cell.2]
 36 |           value.2 <- norm.data[receptor, distant.tmp$cell.1]
 37 |         }
 38 |         
 39 |         if(sum(value.1)!=0 & sum(value.2)!=0){
 40 |           tmp.sender.mat <- data.frame(value.1 = value.1, barcode.1 = names(value.1))
 41 |           tmp.reciever.mat <- data.frame(value.2 = value.2, barcode.2 = names(value.2))
 42 |           data.tmp = cbind(tmp.sender.mat, tmp.reciever.mat)
 43 |           data.tmp <- data.tmp[,c("value.1", "value.2")]
 44 |           rm(value.1, value.2, tmp.reciever.mat, tmp.sender.mat)
 45 |           
 46 |           ## calculate the mutual information
 47 |           data.disc <- infotheo::discretize(data.tmp)
 48 |           tmp.mi <- infotheo::mutinformation(data.disc[,1],data.disc[,2])
 49 |           
 50 |           ## calculate the Pearson Correlation Coefficient
 51 |           pcc.tmp <- cor.test(data.tmp[,1], data.tmp[,2], method = "pearson")
 52 |           tmp.pcc <- pcc.tmp$estimate[[1]]
 53 |           tmp.pcc.p <- pcc.tmp$p.value
 54 |           tmp <- c(tmp.mi, tmp.pcc, tmp.pcc.p)
 55 |           
 56 |           tmp
 57 |         }else{
 58 |           tmp.mi <- NA
 59 |           tmp.pcc <- NA
 60 |           tmp.pcc.p <- NA
 61 |           tmp <- c(tmp.mi, tmp.pcc, tmp.pcc.p)
 62 |           tmp <- as.data.frame(t(tmp))
 63 |           tmp
 64 |         }
 65 |       }else{
 66 |         tmp.mi <- NA
 67 |         tmp.pcc <- NA
 68 |         tmp.pcc.p <- NA
 69 |         tmp <- c(tmp.mi, tmp.pcc, tmp.pcc.p)
 70 |         tmp <- as.data.frame(t(tmp))
 71 |         tmp
 72 |       }
 73 | 
 74 |       tmp
 75 |       
 76 |     })
 77 |     names(temp.result) <- paste(tmp.lr$ligand, tmp.lr$receptor, sep = "_")
 78 |     #temp.result[which(is.na(temp.result))] <- NULL
 79 |     temp.result <- do.call(rbind, temp.result)
 80 |     temp.result <- as.data.frame(temp.result)
 81 |     temp.result <- tibble::rownames_to_column(temp.result, 'lr')
 82 |     
 83 |     temp.result
 84 |   })
 85 | 
 86 |   bench.result <- do.call(rbind, tmp.result)
 87 |   
 88 |   bench.result
 89 | }
 90 | 
 91 | EvaIndex1_2 <- function(CloDist, norm.data, result){
 92 |   result.bench <- lapply(unique(result$sr), function(cp){
 93 |     print(cp)
 94 |     if(cp %in% names(CloDist)){
 95 |       close.bench <- EvaIndex1_1(CloDist, norm.data, result, cp, "close", TRUE)
 96 |       distant.bench <- EvaIndex1_1(CloDist, norm.data, result, cp, "distant", TRUE)
 97 |       identical(distant.bench$lr, close.bench$lr)
 98 |       tmp.result <- cbind(close.bench, distant.bench)
 99 |       tmp.result <- tmp.result[,-5]
100 |       colnames(tmp.result) <- c("lr", "closeMI", "closePCC", "closePCCPvalue",
101 |                                 "distantMI", "distantPCC","distantPCCPvalue")
102 |     }else{
103 |       close.bench <- EvaIndex1_1(CloDist, norm.data, result, cp, "close", FALSE)
104 |       distant.bench <- EvaIndex1_1(CloDist, norm.data, result, cp,  "distant", FALSE)
105 |       tmp.result <- cbind(close.bench, distant.bench)
106 |       tmp.result <- tmp.result[,-5]
107 |       colnames(tmp.result) <- c("lr","closeMI", "closePCC", "closePCCPvalue",
108 |                                 "distantMI", "distantPCC","distantPCCPvalue")
109 |       
110 |     }
111 |     tmp.result <- as.data.frame(tmp.result)
112 |     tmp.result
113 |   })
114 |   names(result.bench) <- unique(result$sr)
115 |   result.bench
116 | }
117 | 
118 | 
119 | mm2hs <- function(result, genes){
120 |   genes <- genes[, c('mouse', 'human')]
121 |   
122 |   result <- merge(result, genes, by.x = 'Ligand', by.y = 'mouse', all.x = TRUE)
123 |   multi <- which(is.na(result$human) & grepl('&', result$Ligand))
124 |   if(length(multi)>0){
125 |     result$human[multi] <- unlist(lapply(stringr::str_split(result$Ligand[multi], '&'), function(x){
126 |       tmp <- unlist(lapply(seq(x), function(y){genes$human[which(genes$mouse %in% x[y])]}))
127 |       paste(tmp, collapse = '&')
128 |     }))
129 |   }
130 |   result <- result[!(is.na(result$human) & !grepl('&', result$Ligand)), ]
131 |   result$Ligand <- NULL
132 |   colnames(result)[which(colnames(result) == 'human')] <- 'Ligand'
133 |   
134 |   result <- merge(result, genes, by.x = 'Receptor', by.y = 'mouse', all.x = TRUE)
135 |   multi <- which(is.na(result$human) & grepl('&', result$Receptor))
136 |   if(length(multi)>0){
137 |     result$human[multi] <- unlist(lapply(stringr::str_split(result$Receptor[multi], '&'), function(x){
138 |       tmp <- unlist(lapply(seq(x), function(y){genes$human[which(genes$mouse %in% x[y])]}))
139 |       paste(tmp, collapse = '&')
140 |     }))
141 |   }
142 |   result <- result[!(is.na(result$human) & !grepl('&', result$Receptor)), ]
143 |   result$Receptor <- NULL
144 |   colnames(result)[which(colnames(result) == 'human')] <- 'Receptor'
145 |   result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_')
146 |   
147 |   return(result)
148 | }
149 | 
150 | hs2mm <- function(result, genes){
151 |   genes <- genes[, c('mouse', 'human')]
152 |   
153 |   result <- merge(result, genes, by.x = 'Ligand', by.y = 'human', all.x = TRUE)
154 |   multi <- which(is.na(result$mouse) & grepl('&', result$Ligand))
155 |   if(length(multi)>0){
156 |     result$mouse[multi] <- unlist(lapply(stringr::str_split(result$Ligand[multi], '&'), function(x){
157 |       tmp <- unlist(lapply(seq(x), function(y){genes$mouse[which(genes$human %in% x[y])]}))
158 |       paste(tmp, collapse = '&')
159 |     }))
160 |   }
161 |   result <- result[!(is.na(result$mouse) & !grepl('&', result$Ligand)), ]
162 |   result$Ligand <- NULL
163 |   colnames(result)[which(colnames(result) == 'mouse')] <- 'Ligand'
164 |   
165 |   result <- merge(result, genes, by.x = 'Receptor', by.y = 'human', all.x = TRUE)
166 |   multi <- which(is.na(result$mouse) & grepl('&', result$Receptor))
167 |   if(length(multi)>0){
168 |     result$mouse[multi] <- unlist(lapply(stringr::str_split(result$Receptor[multi], '&'), function(x){
169 |       tmp <- unlist(lapply(seq(x), function(y){genes$mouse[which(genes$human %in% x[y])]}))
170 |       paste(tmp, collapse = '&')
171 |     }))
172 |   }
173 |   result <- result[!(is.na(result$mouse) & !grepl('&', result$Receptor)), ]
174 |   result$Receptor <- NULL
175 |   colnames(result)[which(colnames(result) == 'mouse')] <- 'Receptor'
176 |   result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_')
177 |   
178 |   return(result)
179 | }
180 | 
181 | 
182 | # further handle the results of MI
183 | Eval1Process <- function(result.path){
184 |   Eval1 <- readRDS(result.path)
185 |   Eval1[which(is.na(Eval1))] <- NULL
186 |   Eval1 <- lapply(Eval1, function(methods_eval){
187 |     perc_temp <- lapply(methods_eval, function(perc_eval){
188 |       perc_tmp <- do.call(rbind, perc_eval)
189 |       perc_tmp <- tibble::rownames_to_column(perc_tmp, 'sr')
190 |       perc_tmp$sr <- gsub('\\.[0-9]+', '', perc_tmp$sr)
191 |       perc_tmp
192 |     })
193 |     names(perc_temp) <- c(10, 20, 30, 40)
194 |     perc_temp <- do.call(rbind, perc_temp)
195 |     perc_temp <- tibble::rownames_to_column(perc_temp, 'perc')
196 |     perc_temp$perc <- gsub('\\.[0-9]+', '', perc_temp$perc)
197 |     perc_temp
198 |   })
199 |   Eval1 <- do.call(rbind, Eval1)
200 |   Eval1 <- tibble::rownames_to_column(Eval1, 'methods')
201 |   Eval1$methods <- gsub('\\.[0-9]+', '', Eval1$methods)
202 |   allna <- apply(Eval1[, c("closeMI", "closePCC", "distantMI", "distantPCC")], 
203 |                  1, FUN = function(x){any(is.na(x))})
204 |   Eval1 <- Eval1[which(!allna), ]
205 |   Eval1$all <- paste(Eval1$methods, Eval1$perc, Eval1$sr, Eval1$lr, sep = '_')
206 |   Eval1 <- dplyr::distinct(Eval1, all, .keep_all = TRUE)
207 |   
208 |   return(Eval1)
209 | }
210 | 
211 | # Calculate the DLRC index of each method —— p.value(whether significant?) + median + weight
212 | EvalIndex_DLRC <- function(Eval1Result){
213 |   Eval1Result$class <- paste(Eval1Result$methods, Eval1Result$perc, sep = '_')
214 |   
215 |   temp <- lapply(unique(Eval1Result$class), function(class){
216 |     result_sub <- Eval1Result[which(Eval1Result$class == class),]
217 |     medianMI_close <- median(result_sub$closeMI)
218 |     medianMI_distant <- median(result_sub$distantMI)
219 |     medianPCC_close <- median(abs(result_sub$closePCC))
220 |     medianPCC_distant <- median(abs(result_sub$distantPCC))
221 |     PCC_pval <- wilcox.test (result_sub$closePCC, result_sub$distantPCC, 
222 |                              alternative = "greater")$p.value
223 |     MI_pval <- wilcox.test (result_sub$closeMI, result_sub$distantMI, 
224 |                             alternative = "greater")$p.value
225 |     return(data.frame(medianMI_close = medianMI_close, medianMI_distant = medianMI_distant, MI_pval = MI_pval,
226 |                       medianPCC_close = medianPCC_close, medianPCC_distant = medianPCC_distant, PCC_pval = PCC_pval))
227 |   })
228 |   names(temp) <- unique(Eval1Result$class)
229 |   temp <- do.call(rbind, temp)
230 |   temp <- tibble::rownames_to_column(temp, 'class')
231 |   temp <- tidyr::separate(temp, class, c('methods', 'perc'), '_')
232 |   #temp <- temp[-which(temp$perc == 40),]
233 |   
234 |   tmp <- lapply(unique(temp$methods), function(method){
235 |     result_sub <- temp[which(temp$methods == method), ]
236 |     if(dim(result_sub)[1]==4){
237 |       result_sub$MIIndex <- (0.5-as.numeric(result_sub$perc)*0.01)*
238 |         ifelse(result_sub$MI_pval<0.05, 1, 0)*
239 |         (result_sub$medianMI_close - result_sub$medianMI_distant)
240 |       
241 |       result_sub$PCCIndex <- (0.5-as.numeric(result_sub$perc)*0.01)*
242 |         ifelse(result_sub$PCC_pval<0.05, 1, 0)*
243 |         (result_sub$medianPCC_close - result_sub$medianPCC_distant)
244 |       return(data.frame(MIIndex = sum(result_sub$MIIndex), PCCIndex = sum(result_sub$PCCIndex)))
245 |     }
246 |   })
247 |   names(tmp) <- unique(temp$methods)
248 |   tmp <- do.call(rbind, tmp)
249 |   tmp <- tibble::rownames_to_column(tmp, 'methods')
250 |   tmp$MIRank <- rank(-tmp$MIIndex)
251 |   tmp$PCCRank <- rank(-tmp$PCCIndex)
252 |   return(tmp)
253 | }
254 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ![](https://github.com/SunXQlab/CCC-Benchmark/blob/main/Graphic-abstrct.png)  
  2 | ## Introduction
  3 | 
  4 | We benchmark two types of CCC inference methods, one type of methods predict LR pairs based on scRNA-seq data, and another type of methods that can predict ligand/receptor-targets regulations. 
  5 | 
  6 | For the first benchmark, we evaluated the accuracy, stability and usability of 18 LR inference methods. **In term of accuracy**, paired ST datasets, CAGE expression/Proteomics data and sampled scRNA-seq datasets were used to benchmark the 18 methods. Firstly, 11 scRNA-seq datasets were used as input for methods to predict intercellular communication and the two defined similarity index (SI, modified Jaccard index) and rank-based similarity index (RSI) were used to compare the similarity of LR pairs predicted by methods.Furthermore, we benchmark the 18 methods using 11 paired ST datasets with the hypothesis that the values of mutual information (MI) of LR pairs are greater in the close group than that in the distant group. In addition, three PBMC datasets from 10X Genomics website were used as input for methods to predict LR pairs and CAGE expression/Proteomics data were used as pseudo gold standards to benchmark the 18 methods. **In term of stability**, we ramdomly sampled different ratios of cells in all the scRNA-seq, resulting 70 sampled datasets and 14 original datasets as input for methods. We calculated the Jaccard index of the LR pairs predicted based sampled datasets and original datasets and a stability value was defined to test the robustness of methods to sampling rates of scRNA-seq data. **In term of usability**, we recorded the running time and maximum memory usage of methods in all the 84 scRNA-seq datasets.  
  7 | 
  8 | For the second benchmark, 8 ST datasets were used as the input for 5 LR-Targets inference tools to predict ligand/receptor-targets regulations, and the cell line perturbation datasets were used for evaluation, involving knockout/mutant conditions for 5 receptors, and treatment conditions for 10 ligands. And the differentially expressed genes (DEGs) in each cell line perturbation dataset, were used as the ground truth of ligand/receptor-targets regulations. The score of ligand/receptor-targets predicted by different tools were compared to the differential expression status (DGEs or not DEGs) of corresponding targets to calculate AUROC and AUPRC. In addition, we also record the running time and maximum memory usage of methods in all the ST datasets. 
  9 | 
 10 | ## Workflow
 11 | ![](https://github.com/SunXQlab/CCC-Benchmark/blob/main/Workflow-figure.png)  
 12 | -   **Step0\_LRToolsFunction** contains the R/Python/Shell scripts that package the running code of 19 methods with Seurat objects as input into function.  
 13 | -   **Step1\_LRPredictionResult** contains the R/Shell scripts to run 19 methods for inferring LR pairs from the 14 scRNA-seq datasets.  
 14 | -   **Step2\_PreSTForLRBench** contains the R scripts to get the different ratios (e.g.top 10%, 20%, 30%, 40%) of cell type specific close and distant cell pairs in each dataset for the preparation of the benchmarking using mutual infomation.  
 15 | -   **Step3\_MIForLRBench** contains the R scripts to calculate MI of LR interactions predicted by methods in the different ratios of cell type specific close and distant groups and calculate DLRC index of methods in each dataset.  
 16 | -   **Step4\_SIRSIForLRBench** contains the R scripts to benchmark the similarity (SI and RSI) of the LR interactions predicted by each two methods.  
 17 | -   **Step5\_BenchBasedCAGEProteomic** contains the R scripts to benchmark the 18 LR inference methods using the CAGE expression and proteomics data.  
 18 | -   **Step6\_LRBenchSampling** contains the R/Shell scripts to run the 18 LR inference methods for inferring LR pairs from 70 sampled scRNA-seq datasets.  
 19 | -   **Step7\_LRBenchSamplingBench** contains the R/Shell scripts to calculate Jaccard index between the LR pairs predicted based on the sampled datasets and the original datasets, and record the running time and maximum memory usage of methods in each dataset.  
 20 | -   **Step8\_LRTToolsFunction** contains the R/Python/Shell scripts to run the 5 LR-Target inference methods for predicting ligand/receptor-targets using ST datasets as input.  
 21 | -   **Step9\_LRTBench** contains the R scripts to benchmark the 5 LR-Target inference methods using cell line perturbation datasets for evaluation, and record the running time and maximum memory usage of methods in each dataset.  
 22 | 
 23 | ## Datasets
 24 | -   **scRNA-seq and ST datasets**   
 25 | 
 26 | 
 27 | <body>
 28 |     <table border="1" cellspacing="1" width="900">
 29 |         <thead>
 30 |             <tr>
 31 |                 <th>Tissue (Disease)</th><th>SampleID <br>(scRNA-seq)</br></th><th>SampleID <br>(ST)</br></th><th>Literature PMID</th><th>Download URL <br>(scRNA-seq)</br></th><th>Download URL <br>(ST)</br></th><th>Evaluation purpose</th>
 32 |             </tr>
 33 |         </thead>
 34 |         <tbody>
 35 |             <tr>
 36 |                 <td rowspan="2">Heart Tissue (Health)</td><td>CK357</td><td>control_P7</td><td rowspan="8">35948637</td><td rowspan="8"><a href="https://zenodo.org/record/6578617">URL</a></td><td rowspan="8"><a href="https://zenodo.org/record/6580069">URL</a></td><td rowspan="2">LR interactions<br>LR-Target regulations</br></td>
 37 |             </tr>
 38 |             <tr>
 39 |                 <td>CK358</td><td>control_P8</td>
 40 |             </tr>
 41 |             <tr>
 42 |                 <td rowspan="3">Heart Tissue (ICM)</td><td>CK368</td><td>FZ_GT_P19</td><td rowspan="6">LR interactions</td>
 43 |             </tr>
 44 |             <tr>
 45 |                 <td>CK162</td><td>FZ_GT_P4</td>
 46 |             </tr>
 47 |             <tr>
 48 |                 <td>CK362</td><td>RZ_P11</td>
 49 |             </tr>
 50 |             <tr>
 51 |                 <td rowspan="3">Heart Tissue (AMI)</td><td>CK361</td><td>IZ_P10</td>
 52 |             </tr>
 53 |             <tr>
 54 |                 <td>CK161</td><td>IZ_P3</td>
 55 |             </tr>
 56 |             <tr>
 57 |                 <td>CK165</td><td>IZ_BZ_P2</td>
 58 |             </tr>
 59 |             <tr>
 60 |                 <td rowspan="2">Tumor Tissue<br>(Breast cancer)</br></td><td>CID44971</td><td>CID44971</td><td rowspan="2">34493872</td><td rowspan="2"><a href="https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE176078">URL</a></td><td rowspan="2"><a href="https://zenodo.org/record/4739739">URL</a></td><td rowspan="2">LR interactions<br>LR-Target regulations</br></td>
 61 |             </tr>
 62 |             <tr>
 63 |                 <td>CID4465</td><td>CID4465</td>
 64 |             </tr>
 65 |             <tr>
 66 |                 <td>Mouse embryo</td><td>——</td><td>Slide14</td><td>34210887</td><td>——</td><td><a href="https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE166692">URL</a></td><td>LR interactions</td>
 67 |             </tr>
 68 |             <tr>
 69 |                 <td rowspan="3">PBMC</td><td>PBMC4K</td><td>——</td><td>——</td><td><a href="https://www.10xgenomics.com/resources/datasets/4-k-pbm-cs-from-a-healthy-donor-2-standard-2-0-1">URL</a></td><td>——</td><td rowspan="3">LR interactions</td>
 70 |             </tr>
 71 |             <tr>
 72 |                 <td>PBMC6K</td><td>——</td><td>——</td><td><a href="https://www.10xgenomics.com/resources/datasets/6-k-pbm-cs-from-a-healthy-donor-1-standard-1-1-0">URL</a></td><td>——</td>
 73 |             </tr>
 74 |             <tr>
 75 |                 <td>PBMC8K</td><td>——</td><td>——</td><td><a href="https://www.10xgenomics.com/resources/datasets/8-k-pbm-cs-from-a-healthy-donor-2-standard-2-0-1">URL</a></td><td>——</td>
 76 |             </tr>
 77 |             <tr>
 78 |                 <td rowspan="4">Tumor Tissue<br>(Gliomas)</br></td><td>——</td><td>UKF243_T_ST</td><td rowspan="4">35700707</td><td rowspan="4">——</td><td rowspan="4"><a href="https://doi.org/10.5061/dryad.h70rxwdmj">URL</a></td><td rowspan="4">LR-Target interactions</td>
 79 |             </tr>
 80 |             <tr>
 81 |                 <td>——</td><td>UKF260_T_ST</td>
 82 |             </tr>
 83 |             <tr>
 84 |                 <td>——</td><td>UKF266_T_ST</td>
 85 |             </tr>
 86 |             <tr>
 87 |                 <td>——</td><td>UKF334_T_ST</td>
 88 |             </tr>
 89 |         </tbody>
 90 |     </table>
 91 | </body>
 92 |   
 93 | -   **Cell line perturbation datasets**  
 94 | 
 95 | 
 96 | <body>
 97 |     <table border="1" cellspacing="1" width="900">
 98 |         <thead>
 99 |             <tr>
100 |                 <th>Datasets</th><th>Ligand/Receptor</th><th>Type</th><th>Condition</th><th>Cell Line</th><th>Disease</th>
101 |             </tr>
102 |         </thead>
103 |         <tbody>
104 |             <tr>
105 |                 <td>GSE120268</td><td>AXL</td><td>receptor</td><td>Knockdown</td><td>MDA-MB-231</td><td rowspan="10">Breast Cancer</td>
106 |             </tr>
107 |             <tr>
108 |                 <td>GSE157680</td><td>NRP1</td><td>receptor</td><td>Knockdown</td><td>MDA-MB-231</td>
109 |             </tr>
110 |             <tr>
111 |                 <td rowspan="2">GSE15893</td><td>CXCR4</td><td>receptor</td><td>Mutant</td><td>MDA-MB-231</td>
112 |             </tr>
113 |             <tr>
114 |                 <td>CXCL12</td><td>ligand</td><td>Treatment</td><td>MDA-MB-231</td>
115 |             </tr>
116 |             <tr>
117 |                 <td>GSE160990</td><td>TGFB1</td><td>ligand</td><td>Treatment</td><td>MDA-MB-231</td>
118 |             </tr>
119 |             <tr>
120 |                 <td rowspan="3">GSE36051</td><td>DLL4(1)</td><td>ligand</td><td>Treatment</td><td>MCF7</td>
121 |             </tr>
122 |             <tr>
123 |                 <td>DLL4(2)</td><td>ligand</td><td>Treatment</td><td>MDA-MB-231</td>
124 |             </tr>
125 |             <tr>
126 |                 <td>JAG1</td><td>ligand</td><td>Treatment</td><td>MDA-MB-231</td>
127 |             </tr>
128 |             <tr>
129 |                 <td>GSE65398</td><td>IGF1(1)</td><td>ligand</td><td>Treatment</td><td>MCF7</td>
130 |             </tr>
131 |             <tr>
132 |                 <td>GSE7561</td><td>IGF1(2)</td><td>ligand</td><td>Treatment</td><td>MCF7</td>
133 |             </tr>
134 |             <tr>
135 |                 <td>GSE69104</td><td>CSF1R</td><td>receptor</td><td>Inhibit</td><td>TAMs</td><td rowspan="2">Gliomas</td>
136 |             </tr>
137 |             <tr>
138 |                 <td>GSE116414</td><td>FGFR1</td><td>receptor</td><td>Inhibit</td><td>GSLC</td>
139 |             </tr>
140 |             <tr>
141 |                 <td>GSE206947</td><td>EFNB2</td><td>ligand</td><td>Treatment</td><td>cardiac fibroblasts</td><td rowspan="3">Health</td>
142 |             </tr>
143 |             <tr>
144 |                 <td>GSE181575</td><td>TGFB1</td><td>ligand</td><td>Treatment</td><td>cardiac fibroblasts</td>
145 |             </tr>
146 |             <tr>
147 |                 <td>GSE123018</td><td>TGFB1</td><td>ligand</td><td>Treatment</td><td>cardiac fibroblasts</td>
148 |             </tr>
149 |         </tbody>
150 |     </table>
151 | </body>
152 | 
153 | ## Tools for inferring intercellular LR pairs 
154 | 
155 | -   CellPhoneDB (Python, version: 3.0.0)
156 | -   CellTalker (R, version: 0.0.4.9000)
157 | -   Connectome (R, version: 1.0.1)
158 | -   NATMI (Python)
159 | -   ICELLNET (R, version: 1.0.1)
160 | -   scConnect (Python, version: 1.0.3)
161 | -   CellChat (R, version: 1.4.0)
162 | -   SingleCellSignalR (R, version: 1.2.0)
163 | -   CytoTalk (R, version: 0.99.9)
164 | -   CellCall (R, version: 0.0.0.9000)
165 | -   scSeqComm (R, version: 1.0.0)
166 | -   NicheNet (R, version: 1.1.0)
167 | -   Domino (R, version: 0.1.1)
168 | -   scMLnet (R, version: 0.2.0)
169 | -   PyMINEr (Python, version: 0.10.0)
170 | -   iTALK (R, version: 0.1.0)
171 | -   cell2cell (Python, version: 0.5.10)
172 | -   RNAMagnet (R, version: 0.1.0)
173 | 
174 | ## Tools for predicting ligand/receptor-targets regulations
175 | 
176 | -   CytoTalk (R, version: 0.99.9)
177 | -   NicheNet (R, version: 1.1.0)
178 | -   stMLnet (R, version: 0.1.0)
179 | -   MISTy (R, version: 1.3.8)
180 | -   HoloNet (Python, version: 0.0.5)
181 | 
182 | ## Citation
183 | 
184 | Please cite ESICCC as follows:
185 | 
186 | Luo J, Deng M, Zhang X, Sun X*. ESICCC as a systematic computational framework for evaluation, selection and integration of cell-cell communication inference methods. Genome Research. 2023. doi: 10.1101/gr.278001.123
187 | 
188 | ## Contact  
189 | 
190 | If you encounter any problems, please contact (sunxq6@mail.sysu.edu.cn).
191 | 


--------------------------------------------------------------------------------
/Step8_LRTToolsFunction/Step8_LRTToolsFunction.R:
--------------------------------------------------------------------------------
  1 | ############## Run in parallel with 10 cores
  2 | ## CytoTalk ## Run in R 4.1.0 version
  3 | ############## Filter the spots with the number of cells less than 3
  4 | CytoTalk_function <- function(ser, fpath, sender = NULL, receiver = NULL, species = 'human'){
  5 |   suppressMessages(library(CytoTalk))
  6 |   suppressMessages(library(Seurat))
  7 |   suppressMessages(library(tidyverse))
  8 |   suppressMessages(library(igraph))
  9 |   set.seed(123)
 10 |   
 11 |   # create conda environment
 12 |   if(F){
 13 |     library(reticulate)  # To install and call Python modules from R.
 14 |     conda_create(envname = "r_reticulate_CytoTalk", python_version = "3.7.3")  # Create a new Conda environment to facilitate the Python module installation.
 15 |     conda_install(envname = "r_reticulate_CytoTalk", "pybind11")  # Install two necessary Python modules for correctly compiling and using the "pcst_fast" Python module.
 16 |     conda_install(envname = "r_reticulate_CytoTalk", "numpy")
 17 |     conda_install(envname = "r_reticulate_CytoTalk", "git+https://github.com/fraenkel-lab/pcst_fast.git", pip = TRUE) # To install the "pcst_fast" module.
 18 |   }
 19 |   
 20 |   # input
 21 |   if(T){
 22 |     input_fpath <- paste0(fpath, '/input/')
 23 |     if(!dir.exists(input_fpath)){
 24 |       dir.create(input_fpath)
 25 |     }
 26 |     
 27 |     fpath.mat <- paste0(input_fpath, "counts.csv")
 28 |     fpath.meta <- paste0(input_fpath,"metadata.csv")
 29 |    
 30 |     norm.matrix <- as.matrix(GetAssayData(ser, "data", "SCT"))
 31 |     write.csv(norm.matrix, fpath.mat, quote=F)
 32 |     
 33 |     meta.data <- data.frame(rownames(ser@meta.data), ser@meta.data$celltype)
 34 |     colnames(meta.data) <- NULL
 35 |     meta.data <- as.matrix(meta.data)
 36 |     write.csv(meta.data, fpath.meta, quote=F, row.names = FALSE)
 37 |     rm(norm.matrix, meta.data);gc()
 38 |   }
 39 |   
 40 |   if(species == 'human'){
 41 |     pcg = CytoTalk::pcg_human
 42 |     lrp = CytoTalk::lrp_human
 43 |   }else{
 44 |     pcg = CytoTalk::pcg_mouse
 45 |     lrp = CytoTalk::lrp_mouse
 46 |   }
 47 |   
 48 |   lst.sc <- read_matrix_with_meta(fpath.mat, fpath.meta)
 49 |   
 50 |   sender_ct <- unique(ser$celltype)
 51 |   remove_ct <- as.character(names(which(table(ser$celltype)<3)))
 52 |   if (length(remove_ct)>0) {
 53 |     sender_ct <- sender_ct[-which(sender_ct %in% remove_ct)]
 54 |   }
 55 |   if(is.null(receiver)){
 56 |     receiver_ct <- unique(ser$celltype)
 57 |   }else if(!is.null(receiver)){
 58 |     receiver_ct <- receiver
 59 |   }
 60 |   rm(sender, receiver);gc()
 61 |   
 62 |   result <- list()
 63 |   for (sender in sender_ct) {
 64 |     for (receiver in receiver_ct[which(receiver_ct!=sender)]) {
 65 |       tmp <- tryCatch(CytoTalk::run_cytotalk(lst.sc, sender, receiver, cores = 10, 
 66 |                                              cutoff_a = 0.05, cutoff_b = 0.05,
 67 |                                              pcg = pcg, lrp = lrp),
 68 |                       error=function(e){NA}
 69 |       )
 70 |       cp <- paste(sender, receiver, sep = "_")
 71 |       result[[cp]] <- tmp
 72 |     }
 73 |   }
 74 |   
 75 |   result[which(is.na(result))] <- NULL
 76 |   saveRDS(result, file = paste0(fpath, '/', 'result.rds'))
 77 |   
 78 |   
 79 |   for (receiver in receiver_ct) {
 80 |     result.tmp <- lapply(names(result), function(cp){
 81 |       sender <-  gsub("_.*", "",cp)
 82 |       
 83 |       tmp <- list()
 84 |       res <- result[[cp]]
 85 |       if(!is.null(res$pathways)){
 86 |         network <- res$pcst$final_network
 87 |         network <- network[!(network$node1_type == sender & network$node2_type == sender), ]
 88 |         network <- network[!(network$node1_type == receiver & network$node2_type == sender), ]
 89 |         network$node1 <- toupper(network$node1)
 90 |         network$node2 <- toupper(network$node2)
 91 |         network$node1 <- gsub("ORF", "orf", network$node1)
 92 |         network$node2 <- gsub("ORF", "orf", network$node2)
 93 |         LR <- network[which(network$node1_type == sender & network$node2_type == receiver), ]
 94 |         LR <- paste(LR$node1, LR$node2, sep = "_")
 95 |         Ligand <- network[which(network$node1_type == sender & network$node2_type == receiver), "node1"] %>%
 96 |           unique()
 97 |         Receptor <- network[which(network$node1_type == sender & network$node2_type == receiver), "node2"] %>%
 98 |           unique()
 99 |         Target <- c(network[which(network$node1_type == receiver & network$node2_type == receiver), "node1"],
100 |                     network[which(network$node1_type == receiver & network$node2_type == receiver), "node2"]) %>%
101 |           unique()
102 |         Target <- setdiff(Target, c(Ligand, Receptor))
103 |         tmp <- list(Edge = network,
104 |                     LR = LR,
105 |                     Ligand = Ligand, 
106 |                     Receptor = Receptor, 
107 |                     Target = Target)
108 |       }else{
109 |         tmp <- NA
110 |       }
111 |       tmp
112 |     })
113 |     
114 |     names(result.tmp) <- names(result);
115 |     result.tmp[which(is.na(result.tmp))] <- NULL
116 |     
117 |     edge_list <- lapply(result.tmp, function(res){
118 |       res <- res$Edge[,c(1,2)]
119 |       colnames(res) <- c('from', 'to')
120 |       res
121 |     })
122 |     edge_df <- do.call(rbind, edge_list)
123 |     edge_df <- distinct(edge_df, from, to)
124 |     intranetwork <- graph_from_edgelist(as.matrix(edge_df), directed = FALSE)
125 |     
126 |     Ligand <- lapply(result.tmp, function(res){res$Ligand}) %>% unlist() %>% unique()
127 |     Receptor <- lapply(result.tmp, function(res){res$Receptor}) %>% unlist() %>% unique()
128 |     Target <- lapply(result.tmp, function(res){res$Target}) %>% unlist() %>% unique()
129 |     
130 |     distance <- distances(intranetwork, 
131 |                           v = c(Ligand,Receptor), 
132 |                           to = Target)
133 |     distance <- reshape2::melt(distance)
134 |     distance <- distance[!is.infinite(distance$value),]
135 |     colnames(distance) <- c("regulon", 'target',"value")
136 |     distance$type <- ifelse(distance$regulon %in% Ligand,"ligand","receptor")
137 |     
138 |     saveRDS(distance, file = paste0(fpath, "/", receiver, '_result.rds'))
139 |   }
140 | }
141 | 
142 | ##############
143 | ## NicheNet ## Filter the spots with the number of cells less than 3
144 | ############## 
145 | NicheNet_function <- function(ser, fpath, sampleID, sender = NULL, receiver = NULL){
146 |   suppressMessages(library(nichenetr))
147 |   suppressMessages(library(Seurat))
148 |   suppressMessages(library(tidyverse))
149 |   set.seed(123)
150 |  
151 |   # load data
152 |   if(T){
153 |     ligand_target_matrix = readRDS("./Step0_SharedInfo/NicheNet/ligand_target_matrix.rds")
154 |     
155 |     lr_network = readRDS("./Step0_SharedInfo/NicheNet/lr_network.rds")
156 |     
157 |     weighted_networks = readRDS("./Step0_SharedInfo/NicheNet/weighted_networks.rds")
158 |     weighted_networks_lr = weighted_networks$lr_sig %>% inner_join(lr_network %>% distinct(from,to), by = c("from","to"))
159 |   }
160 |   
161 |   sender_ct <- unique(ser$celltype)
162 |   remove_ct <- as.character(names(which(table(ser$celltype)<3)))
163 |   if (length(remove_ct)>0) {
164 |     sender_ct <- sender_ct[-which(sender_ct %in% remove_ct)]
165 |   }
166 |   if(is.null(receiver)){
167 |     receiver_ct <- unique(ser$celltype)
168 |   }else if(!is.null(receiver)){
169 |     receiver_ct <- receiver
170 |   }
171 |   rm(sender, receiver);gc()
172 |   
173 |   for (receiver in receiver_ct) {
174 |     # define the sender genes
175 |     expressed_genes_sender <- lapply(sender_ct[which(sender_ct != receiver)], function(ct){
176 |       expressed_genes_sender_ct <- get_expressed_genes(ct, ser, pct = 0.05)
177 |       expressed_genes_sender_ct
178 |     })
179 |     expressed_genes_sender = expressed_genes_sender %>% unlist() %>% unique()
180 |     
181 |     # define the receiver genes
182 |     expressed_genes_receiver = get_expressed_genes(receiver, ser, pct = 0.05)
183 |     background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)]
184 |     
185 |     # define the genes of interest
186 |     if(grepl('CID', sampleID)){
187 |       icgs_fpath <- paste0('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/Giotto_result/', sampleID, '_icgs.rds')
188 |     }else if(grepl('UKF', sampleID)){
189 |       icgs_fpath <- paste0('~/1-Datasets/CancerCell_2022_Glioma/ScriptForLRTBenchmark/Giotto_result/', sampleID, '_icgs.rds')
190 |     }else{
191 |       icgs_fpath <- paste0('~/1-Datasets/Nature_2022_MI/ScriptForCCC/Giotto_result/', sampleID,'_icgs.rds')
192 |     }
193 |     geneset =  readRDS(file = icgs_fpath)
194 |     geneset <- geneset[[receiver]] %>% unlist() %>% unique()
195 |     geneset = geneset %>% .[. %in% rownames(ligand_target_matrix)]
196 |     
197 |     # define the potential ligands
198 |     ligands = lr_network %>% pull(from) %>% unique()
199 |     receptors = lr_network %>% pull(to) %>% unique()
200 |     
201 |     expressed_ligands = intersect(ligands,expressed_genes_sender)
202 |     expressed_receptors = intersect(receptors,expressed_genes_receiver)
203 |     
204 |     potential_ligands = lr_network %>% 
205 |       filter(from %in% expressed_ligands & to %in% expressed_receptors) %>% 
206 |       pull(from) %>% 
207 |       unique()
208 |     
209 |     # Perform NicheNet ligand activity analysis
210 |     ligand_activities = predict_ligand_activities(geneset = geneset, 
211 |                                                   background_expressed_genes = background_expressed_genes, 
212 |                                                   ligand_target_matrix = ligand_target_matrix, 
213 |                                                   potential_ligands = potential_ligands)
214 |     ligand_activities = ligand_activities %>% arrange(-pearson) %>% mutate(rank = rank(desc(pearson)))
215 |     
216 |     # Get all the ligands for downstreams analysis
217 |     best_upstream_ligands = ligand_activities %>% arrange(-pearson) %>% pull(test_ligand) %>% unique()
218 |     head(best_upstream_ligands)
219 |     
220 |     # Get the active ligand-target links
221 |     active_ligand_target_links_df = best_upstream_ligands %>% 
222 |       lapply(get_weighted_ligand_target_links,
223 |              geneset = geneset, 
224 |              ligand_target_matrix = ligand_target_matrix, 
225 |              n = nrow(ligand_target_matrix)) %>% 
226 |       bind_rows() %>% drop_na()
227 |     colnames(active_ligand_target_links_df) <- c("regulon", 'target',"value")
228 |     active_ligand_target_links_df$type <- 'ligand'
229 |     
230 |     saveRDS(active_ligand_target_links_df, file = paste0(fpath, '/', receiver, '_result.rds'))
231 |   }
232 | }
233 | 
234 | 
235 | ############
236 | ## MISTy  ## Filter the spots with the number of cells less than 3
237 | ############ 
238 | MISTy_function <- function(ser, fpath, sampleID, sender = NULL, receiver=NULL){
239 |   # load package
240 |   if(T){
241 |     # MISTy
242 |     suppressMessages(library(mistyR))
243 |     suppressMessages(library(future))
244 |     suppressMessages(library(future))
245 |     
246 |     # Seurat
247 |     suppressMessages(library(Seurat))
248 |     suppressMessages(library(Giotto))
249 |     
250 |     # data manipulation
251 |     suppressMessages(library(Matrix))
252 |     suppressMessages(library(tibble))
253 |     suppressMessages(library(dplyr))
254 |     suppressMessages(library(purrr))
255 |     
256 |     # normalization
257 |     suppressMessages(library(sctransform))
258 |     
259 |     # resource
260 |     suppressMessages(library(progeny))
261 |     
262 |     # setup parallel execution
263 |     options(future.globals.maxSize = 1024^3)
264 |     plan(multisession)
265 |     set.seed(123)
266 |   }
267 |   
268 |   source('./Step0_SharedInfo/MistyR/code.R')
269 |   
270 |   if(is.null(receiver)){
271 |     receiver_ct <- unique(ser$celltype)
272 |   }else if(!is.null(receiver)){
273 |     receiver_ct <- receiver
274 |   }
275 |   rm(receiver);gc()
276 |   
277 |   # run normalization
278 |   sct.data <- vst(GetAssayData(
279 |     object = ser,
280 |     slot = "counts",
281 |     assay = "Spatial"
282 |   ),
283 |   verbosity = 0
284 |   )
285 |   
286 |   ser[["SCT"]] <- CreateAssayObject(data = sct.data$y)
287 |   
288 |   gene.expression <- GetAssayData(ser, assay = "SCT")
289 |   coverage <- rowSums(gene.expression > 0) / ncol(gene.expression)
290 |   slide.markers <- names(which(coverage >= 0.05))
291 |   
292 |   Databases <- readRDS('./Step0_SharedInfo/stMLnet/Databases.rds')
293 |   ligands <- Databases$LigRec.DB$source %>% unique() %>% .[. %in% slide.markers]
294 |   receptors <- Databases$LigRec.DB$target %>% unique() %>% .[. %in% slide.markers]
295 |   
296 |   # load ICGs
297 |   if(grepl('CID', sampleID)){
298 |     icgs_fpath <- paste0('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/Giotto_result/', sampleID, '_icgs.rds')
299 |   }else if(grepl('UKF', sampleID)){
300 |     icgs_fpath <- paste0('~/1-Datasets/CancerCell_2022_Glioma/ScriptForLRTBenchmark/Giotto_result/', sampleID, '_icgs.rds')
301 |   }else{
302 |     icgs_fpath <- paste0('~/1-Datasets/Nature_2022_MI/ScriptForCCC/Giotto_result/', sampleID,'_icgs.rds')
303 |   }
304 |   icgs <- readRDS(icgs_fpath)
305 |   
306 |   celltypes <- ser$celltype %>% as.character()
307 |   remove_ct <- as.character(names(which(table(ser$celltype)<3)))
308 |   if (length(remove_ct)>0) {
309 |     celltypes <- celltypes[-which(celltypes %in% remove_ct)]
310 |   }
311 |   
312 |   geometry <- ser@images$image@coordinates %>% select(., row,col)
313 |   
314 |   output_fpath <- paste0(fpath, '/output')
315 |   if(!dir.exists(output_fpath)){
316 |     dir.create(output_fpath)
317 |   }
318 |   
319 |   #########
320 |   ## run ##
321 |   #########
322 |   
323 |   for (receiver in receiver_ct) {
324 |     ct <- receiver
325 |     message(paste0('running jobs:',ct))
326 |     targets <- icgs[[ct]] %>% unlist() %>% unique()
327 |     
328 |     ###############
329 |     ## parameter ##
330 |     ###############
331 |     
332 |     view.assays <- list(
333 |       "main" = "SCT",
334 |       "ligand" = "SCT"
335 |     )
336 |     
337 |     # Define features for each view
338 |     view.features <- list(
339 |       "main" = c(targets,receptors),
340 |       "ligand" = ligands
341 |     )
342 |     
343 |     # Define spatial context for each view
344 |     view.types <- list(
345 |       "main" = "intra",
346 |       "ligand" = "para"
347 |     )
348 |     
349 |     # Define additional parameters (l in the case of paraview)
350 |     view.params <- list(
351 |       "main" = NULL,
352 |       "ligand" = 10
353 |     )
354 |     
355 |     # Define specific properties for each view
356 |     view.properties <- list(
357 |       "main" = ifelse(celltypes == ct,1,0),
358 |       "ligand" = ifelse(celltypes != ct,1,0)
359 |     )
360 |     
361 |     #########
362 |     ## Run ##
363 |     #########
364 |     
365 |     spot.ids = NULL
366 |     out.alias = paste0(output_fpath, "/results_",ct,"_paraview_10")
367 |     
368 |     # Extracting data
369 |     view.data <- map(view.assays,
370 |                      extract_seurat_data,
371 |                      geometry = geometry,
372 |                      visium.slide = ser
373 |     )
374 |     str(view.data,max.level = 1)
375 |     
376 |     # Adding all spots ids in case they are not defined
377 |     if (is.null(spot.ids)) {
378 |       spot.ids <- rownames(view.data[[1]])
379 |     }
380 |     
381 |     # First filter the features from the data
382 |     view.data.filt <- map2(view.data, view.features, filter_data_features)
383 |     str(view.data.filt,max.level = 1)
384 |     view.data.filt[[1]][1:4,1:1]
385 |     view.data.filt[[2]][1:4,1:1]
386 |     
387 |     # specific properties: celltype
388 |     view.data.spec <- map2(view.data.filt, view.properties, add_specific_properties)
389 |     str(view.data.spec,max.level = 1)
390 |     view.data.spec[[1]][1:6,1:2]
391 |     view.data.spec[[2]][1:6,1:2]
392 |     
393 |     # Create initial view
394 |     views.main <- create_initial_view(view.data.spec[[1]] %>%
395 |                                         rownames_to_column() %>%
396 |                                         filter(rowname %in% spot.ids) %>%
397 |                                         select(-rowname))
398 |     str(views.main,max.level = 2)
399 |     
400 |     # Create other views
401 |     view.names <- names(view.data.spec)
402 |     
403 |     all.views <- pmap(list(
404 |       view.data.filt[-1],
405 |       view.types[-1],
406 |       view.params[-1],
407 |       view.names[-1]
408 |     ),
409 |     create_default_views,
410 |     spot.ids = spot.ids,
411 |     geometry = geometry
412 |     )
413 |     str(all.views,max.level = 2)
414 |     
415 |     pline.views <- add_views(
416 |       views.main,
417 |       unlist(all.views, recursive = FALSE)
418 |     )
419 |     
420 |     # Run MISTy
421 |     run_misty(pline.views, out.alias)
422 |     misty.results <- collect_results(out.alias)
423 |     
424 |     ############
425 |     ## output ##
426 |     ############
427 |     
428 |     misty_score <- misty.results$importances.aggregated %>% na.omit()
429 |     misty_score <- misty_score %>% 
430 |       select(Predictor,Target,Importance,view) %>%
431 |       filter(Target %in% targets, Predictor %in% c(ligands,receptors)) %>%
432 |       rename(regulon=Predictor,target=Target,value=Importance,type=view)
433 |     misty_score$type <- gsub('_10','',misty_score$type)
434 |     misty_score$type <- gsub('intra','receptor',misty_score$type)
435 |     
436 |     saveRDS(misty_score, paste0(fpath, '/', receiver, '_result.rds'))
437 |   }
438 | }
439 | 
440 | 
441 | #############
442 | ## stMLnet ## Filter the spots with the number of cells less than 3
443 | ############# 
444 | stMLnet_function <- function(ser, fpath, sampleID, sender = NULL, receiver = NULL){
445 |   suppressMessages(library(Seurat))
446 |   suppressMessages(library(stMLnet))
447 |   suppressMessages(library(tidyverse))
448 |   suppressMessages(library(doSNOW))
449 |   set.seed(123)
450 |   
451 |   sender_ct <- unique(ser$celltype)
452 |   if(is.null(receiver)){
453 |     receiver_ct <- unique(ser$celltype)
454 |   }else if(!is.null(receiver)){
455 |     receiver_ct <- receiver
456 |   }
457 |   rm(receiver);gc()
458 |   
459 |   # load input data
460 |   if(T){
461 |     norm.matrix <- as.matrix(GetAssayData(ser, "data", "SCT"))
462 |     metadata <- data.frame(Barcode = colnames(ser), Cluster = ser$celltype)
463 |     
464 |     loc <- ser@images$image@coordinates %>% select(., row,col)
465 |     
466 |     ex_databases <- readRDS("./Step0_SharedInfo/stMLnet/Databases.rds") 
467 |     
468 |     # load ICGs
469 |     if(grepl('CID', sampleID)){
470 |       icgs_fpath <- paste0('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/Giotto_result/', sampleID, '_icgs.rds')
471 |     }else if(grepl('UKF', sampleID)){
472 |       icgs_fpath <- paste0('~/1-Datasets/CancerCell_2022_Glioma/ScriptForLRTBenchmark/Giotto_result/', sampleID, '_icgs.rds')
473 |     }else{
474 |       icgs_fpath <- paste0('~/1-Datasets/Nature_2022_MI/ScriptForCCC/Giotto_result/', sampleID,'_icgs.rds')
475 |     }
476 |     ICG_list <- readRDS(icgs_fpath)
477 |     
478 |     # define highly differentially expressed ligands
479 |     if(T){
480 |       remove_ct <- as.character(names(which(table(ser$celltype)<3)))
481 |       lig_clusters <- ser@active.ident %>% as.character() %>% unique()
482 |       if (length(remove_ct)>0) {
483 |         lig_clusters <- lig_clusters[-which(lig_clusters %in% remove_ct)]
484 |       }
485 |       
486 |       if(F){
487 |         ## ligands
488 |         ligs_in_db <- ex_databases$LigRec.DB$source %>% unique()
489 |         ligs_in_db <- intersect(ligs_in_db, rownames(ser))
490 |         df_markers_ligs <- lapply(lig_clusters, function(cluster){
491 |           print(cluster)
492 |           df <- FindMarkers(ser, ident.1 = cluster, features = ligs_in_db, 
493 |                             only.pos = T, min.pct = 0.05)
494 |           if(dim(df)[[1]]!=0){
495 |             df$gene <- rownames(df)
496 |             df$ident.1 <- cluster
497 |           }else{
498 |             df <- NA
499 |           }
500 |           df
501 |         })
502 |         df_markers_ligs[which(is.na(df_markers_ligs))] <- NULL
503 |         df_markers_ligs <- do.call(rbind, df_markers_ligs)
504 |         Ligs_up_list <- split(df_markers_ligs$gene,df_markers_ligs$ident.1)
505 |         str(Ligs_up_list)
506 |       }
507 |     }
508 |     
509 |     # define highly differentially expressed receptors
510 |     if(T){
511 |       BarCluTable <- data.frame(Barcode = rownames(ser@meta.data), 
512 |                                 Cluster = ser@meta.data$celltype)
513 |       
514 |       ## parameters
515 |       expr.ct <- 0.1
516 |       pct.ct <- 0.05
517 |       
518 |       ## receptors in prior database 
519 |       recs_in_db <- ex_databases$LigRec.DB$target %>% unique()
520 |       
521 |       ## calculate mean and pct
522 |       rec_clusters <- receiver_ct
523 |       #clusters <- BarCluTable$Cluster %>% as.character() %>% unique()
524 |       
525 |       meanExpr_of_LR <- lapply(rec_clusters, function(cluster){
526 |         
527 |         cluster.ids <- BarCluTable$Barcode[BarCluTable$Cluster == cluster]
528 |         source_mean <- rowMeans(norm.matrix[,cluster.ids])
529 |         names(source_mean) <- rownames(norm.matrix)
530 |         source_mean
531 |         
532 |       }) %>% do.call('cbind',.) %>% as.data.frame()
533 |       colnames(meanExpr_of_LR) <- rec_clusters
534 |       
535 |       pct_of_LR <- lapply(rec_clusters, function(cluster){
536 |         
537 |         cluster.ids <- BarCluTable$Barcode[BarCluTable$Cluster == cluster]
538 |         dat <- norm.matrix[,cluster.ids]
539 |         pct <- rowSums(dat>0)/ncol(dat)
540 |         names(pct) <- rownames(norm.matrix)
541 |         pct
542 |         
543 |       }) %>% do.call('cbind',.) %>% as.data.frame()
544 |       colnames(pct_of_LR) <- rec_clusters
545 |       
546 |       Recs_expr_list <- lapply(rec_clusters, function(cluster){
547 |         
548 |         recs <- rownames(norm.matrix)[meanExpr_of_LR[,cluster] >= expr.ct & pct_of_LR[,cluster] >= pct.ct]
549 |         intersect(recs, recs_in_db)
550 |         
551 |       })
552 |       names(Recs_expr_list) <- rec_clusters
553 |       str(Recs_expr_list)
554 |     }
555 |     
556 |   }
557 |   
558 |   output_fpath <- paste0(fpath, '/output/')
559 |   if(!dir.exists(output_fpath)){
560 |     dir.create(output_fpath)
561 |   }
562 |   setwd(output_fpath)
563 |   
564 |   # step1 create mulityayer network
565 |   
566 |   quan.cutoff <- 0.98
567 |   Databases <- ex_databases
568 |   Databases$RecTF.DB <- Databases$RecTF.DB %>% 
569 |     .[.$score > quantile(.$score, quan.cutoff), ] %>% 
570 |     dplyr::distinct(source, target)
571 |   Databases$LigRec.DB <- Databases$LigRec.DB %>% 
572 |     dplyr::distinct(source, target) %>% 
573 |     dplyr::filter(target %in% Databases$RecTF.DB$source)
574 |   Databases$TFTG.DB <- Databases$TFTG.DB %>% 
575 |     dplyr::distinct(source, target) %>% 
576 |     dplyr::filter(source %in% Databases$RecTF.DB$target)
577 |   
578 |   resMLnet <- runMLnet(ExprMat = norm.matrix, AnnoMat = metadata, Normalize = F, 
579 |                        LigClus = lig_clusters, RecClus = rec_clusters,
580 |                        logfc.ct = 0.1, pct.ct = 0.05, expr.ct = 0.1,
581 |                        ProjectName = '2023-1', Databases = Databases,
582 |                        TGList=ICG_list, RecList=Recs_expr_list) #LigList=Ligs_up_list, 
583 |   rm(ICG_list, Ligs_up_list, Recs_expr_list, ser);gc()
584 |   
585 |   # step2 calculate Signal Activity
586 |   if(T){
587 |     ## calculate distant
588 |     DistMat <- as.matrix(dist(loc))
589 |     colnames(DistMat) <- rownames(loc)
590 |     rownames(DistMat) <- rownames(loc)
591 |     
592 |     ## imputation
593 |     exprMat.Impute <- runImputation(exprMat = norm.matrix)
594 |     rm(norm.matrix);gc()
595 |     
596 |     # remove empty list
597 |     ex_mulnetlist <- list()
598 |     for (i in 1:length(resMLnet$mlnets)) {
599 |       mlnets <- resMLnet$mlnets[[i]]
600 |       for (j in 1:length(mlnets)) {
601 |         mlnet <- mlnets[[j]]
602 |         if(nrow(mlnet$LigRec)!=0) ex_mulnetlist[[names(mlnets)[j]]] = mlnet
603 |       }
604 |     }
605 |     rm(i, j, mlnets, mlnet,loc);gc()
606 |     
607 |     Sender <- NULL
608 |     for (receiver in receiver_ct) {
609 |       resSigActList_tme <- getSiganlActivity(ExprMat = exprMat.Impute,
610 |                                              DistMat = DistMat,
611 |                                              AnnoMat = metadata,
612 |                                              MulNetList = ex_mulnetlist,
613 |                                              Receiver = receiver, Sender = Sender,
614 |                                              Downsample = FALSE, ProjectName = '2023-1')
615 |     }
616 |   }
617 |   
618 |   # step3 getCPSiganlActivity
619 |   if(T){
620 |     inputDir <- paste0(getwd(),'/runModel/work_2023-1/')
621 |     files <- list.files(inputDir)
622 |     
623 |     time_ls <- c()
624 |     for(f in files){
625 |       
626 |       label <- paste(unlist(strsplit(f,'[_.]'))[3:4],collapse = '-')
627 |       LRTG_allscore <- readRDS(paste0(getwd(),'/runModel/work_2023-1/',f))
628 |       message(paste0('running jobs: ',label))
629 |       
630 |       t1 <- Sys.time()
631 |       getSiganlImport(SiganlActivity = LRTG_allscore, Lable = label, ProjectName = '2023-1',
632 |                       NCores = 1, AutoPara = TRUE, NTrees = 500, NTrys = 10,
633 |                       TreeMethod = 'variance', NodeSize = 5,  NPert = 10)
634 |       t2 <- Sys.time()
635 |       time_ls <- c(time_ls,paste(signif(t2-t1,4),units(signif(t2-t1,4)),sep = ' '))
636 |       
637 |     }
638 |   }
639 |   
640 |   #handle result
641 |   files <- list.files('./getPIM/work_2023-1', full.names = TRUE, pattern = 'LRTG_pim_clean_')
642 |   for (f in files) {
643 |     result <- readRDS(f)
644 |     result$pIM <- lapply(result$pIM,function(s){ifelse(s>=0,s,0)}) %>% unlist() %>% as.numeric()
645 |     colnames(result) <- c("regulon", 'target',"value", 'type')
646 |     saveRDS(result, file = paste0('../', gsub('.rds', '',substring(f, 41)), '_result.rds'))
647 |   }
648 | }
649 | 
650 | 
651 | ############# 
652 | ## HoloNet ##
653 | ############# 
654 | HoloNet_function <- function(ser, fpath, sampleID, sender = NULL, receiver=NULL){
655 |   suppressMessages(library(Seurat))
656 |   suppressMessages(library(stringr))
657 |   suppressMessages(library(tidyverse))
658 |   suppressMessages(library(tidyr))
659 |   set.seed(123)
660 |   
661 |   sender_ct <- unique(ser$celltype)
662 |   if(is.null(receiver)){
663 |     receiver_ct <- unique(ser$celltype)
664 |   }else if(!is.null(receiver)){
665 |     receiver_ct <- receiver
666 |   }
667 |   rm(receiver);gc()
668 |   
669 |   input_fpath <- paste0(fpath, '/input/')
670 |   if(!dir.exists(input_fpath)){
671 |     dir.create(input_fpath)
672 |   }
673 |   
674 |   output_fpath <- paste0(fpath, '/output/')
675 |   if (!dir.exists(output_fpath)) {
676 |     dir.create(output_fpath)
677 |   }
678 |   
679 |   ## load ICGs
680 |   if(grepl('CID', sampleID)){
681 |     icgs_fpath <- paste0('~/1-Datasets/NatureGenetics_2021_BC/ScriptForCCC/Giotto_result/', sampleID, '_icgs.rds')
682 |   }else if(grepl('UKF', sampleID)){
683 |     icgs_fpath <- paste0('~/1-Datasets/CancerCell_2022_Glioma/ScriptForLRTBenchmark/Giotto_result/', sampleID, '_icgs.rds')
684 |   }else{
685 |     icgs_fpath <- paste0('~/1-Datasets/Nature_2022_MI/ScriptForCCC/Giotto_result/', sampleID,'_icgs.rds')
686 |   }
687 |   icgs <- readRDS(icgs_fpath)
688 |   goi <- lapply(receiver_ct, function(receiver){
689 |     icgs.tmp <- icgs[[receiver]]
690 |     goi.tmp <- unique(unlist(icgs.tmp))
691 |     goi.tmp
692 |   })
693 |   goi <- unique(unlist(goi))
694 |   goi_fpath <- paste0(input_fpath, 'goi.csv')
695 |   write.csv(goi, file = goi_fpath, quote = FALSE, row.names = FALSE)
696 |   
697 |   ## run HoloNet
698 |   # get gene expression matrix
699 |   counts_fpath <- paste0(input_fpath, "raw_count.csv")
700 |   raw.count <- as.matrix(GetAssayData(ser, "count", "Spatial"))
701 |   write.csv(raw.count, file = counts_fpath, quote=F)
702 |   
703 |   # load cell type metadata
704 |   meta_fpath <- paste0(input_fpath, "meta.csv")
705 |   meta <- data.frame(Barcodes = rownames(ser@meta.data), 
706 |                      celltype = ser@meta.data$celltype, 
707 |                      row.names = rownames(ser@meta.data))
708 |   write.csv(meta, file = meta_fpath,quote=F, row.names = FALSE)
709 |   
710 |   
711 |   if(grepl('CID',sampleID)){
712 |     
713 |     img_fpath1 <- paste0('~/1-Datasets/NatureGenetics_2021_BC/ST_Data/spatial/', 
714 |                         gsub('A', '', sampleID),'_spatial')
715 |     img_fpath <- paste0(input_fpath, 'spatial')
716 |     copy_commond <- paste0('cp -r ', img_fpath1, ' ', img_fpath)
717 |     system(copy_commond)
718 |     h5_file <- './Step0_SharedInfo/HoloNet/filtered_feature_bc_matrix.h5'
719 |     copy_commond <- paste0('cp -r ', h5_file, ' ', input_fpath)
720 |     system(copy_commond)
721 |     command <- paste('sh ../Script/Step6_LRTToolsFunction/HoloNet_shell.sh', 
722 |                      counts_fpath, meta_fpath, input_fpath, 
723 |                      goi_fpath, output_fpath, sep = ' ')
724 |     
725 |   }else if(grepl('UKF', sampleID)){
726 |     
727 |     visumn_fpath <- paste0('/home/ljx/1-Datasets/CancerCell_2022_Glioma/10XVisium/',sampleID, '/outs/')
728 |     command <- paste('sh ../Script/Step6_LRTToolsFunction/HoloNet_shell.sh', 
729 |                      counts_fpath, meta_fpath, visumn_fpath, 
730 |                      goi_fpath, output_fpath, sep = ' ')
731 |   
732 |   }else{ ## MI dataset
733 |     
734 |     visumn_fpath <- paste0('/home/ljx/1-Datasets/Nature_2022_MI/ST_Data/',sampleID)
735 |     command <- paste('sh ../Script/Step6_LRTToolsFunction/HoloNet_shell.sh', 
736 |                      counts_fpath, meta_fpath, visumn_fpath, 
737 |                      goi_fpath, output_fpath, sep = ' ')
738 |     
739 |   }
740 |   
741 |   
742 |   system(command)
743 |   
744 |   ## handle result
745 |   setwd(fpath)
746 |   if(T){
747 |     files <- list.files("./output/")
748 |     result <- lapply(files, function(file){
749 |       lr <- str_split(file, "_")[[1]][1]
750 |       target <- str_split(file, "_")[[1]][2] %>% str_replace(".csv", "")
751 |       
752 |       res <- read.csv(paste0("./output/", file)) %>%
753 |         pivot_longer(cols = -X, names_to = "receiver", values_to = "value")
754 |       colnames(res)[1] <- "sender"
755 |       
756 |       res <- res[which(res$receiver %in% receiver_ct), ]
757 |       res$lr <- rep(lr, n = nrow(res))
758 |       res <- separate(res, lr, c("ligand", "receptor"), ":")
759 |       res$target <- rep(target, n = nrow(res))
760 |       res
761 |     })
762 |     result <- do.call(rbind, result)
763 |     result <- result[which(result$sender!=result$receiver), ]
764 |     
765 |     for (receiver in unique(result$receiver)) {
766 |       icgs_tmp <- unique(unlist(icgs[[receiver]]))
767 |       res <- result[which(result$receiver == receiver),]
768 |       res <- res[which(res$target %in% icgs_tmp), ]
769 |       res$LRT <- paste(res$ligand, res$receptor, res$target, sep = "_")
770 |       res <- res[,c(3,7)]
771 |       res <- aggregate(value ~ LRT, data = res, sum)
772 |       res <- tidyr::separate(res, LRT, c('ligand', 'receptor', 'target'), "_")
773 |       res_ligand <- res[, c(1,3,4)]
774 |       res_receptor <- res[, c(2,3,4)]
775 |       
776 |       res_ligand <- aggregate(value ~ ligand+target, data = res_ligand, sum)
777 |       res_receptor <- aggregate(value ~ receptor+target, data = res_receptor, sum)
778 |       
779 |       colnames(res_ligand) <- c("regulon", 'target',"value")
780 |       colnames(res_receptor) <- c("regulon", 'target',"value")
781 |       res_ligand$type <- 'ligand'
782 |       res_receptor$type <- 'receptor'
783 |       
784 |       res <- rbind(res_ligand, res_receptor)
785 |       saveRDS(res, file = paste0('./', receiver, '_result.rds'))
786 |     }
787 |   }
788 | }
789 | 
790 | 
791 | 


--------------------------------------------------------------------------------
/Step0_LRToolsFunction/Step1_LRToolsFunction.R:
--------------------------------------------------------------------------------
   1 | rm(list = ls())
   2 | # All the code run in the '/Data' directory
   3 | 
   4 | #######################
   5 | ## SingleCellSignalR ##
   6 | #######################
   7 | SCSR_function <- function(ser, species = 'human'){
   8 |   suppressMessages(library(SingleCellSignalR))
   9 |   suppressMessages(library(Seurat))
  10 |   suppressMessages(library(tidyr))
  11 |   suppressMessages(library(dplyr))
  12 |   suppressMessages(library(lobstr))
  13 |   set.seed(123)
  14 |   
  15 |   start.time <- proc.time()
  16 |   
  17 |   if(species == 'human'){
  18 |     species <- "homo sapiens"
  19 |   }else{
  20 |     species <- "mus musculus"
  21 |   }
  22 |   
  23 |   matrix.sc <- GetAssayData(ser, "data", "RNA")
  24 |   matrix.sc <- as.matrix(matrix.sc)
  25 |   meta.sc <- data.frame(celltype = ser$celltype, row.names = colnames(ser))
  26 |   
  27 |   # Digitize Labels
  28 |   i <- 1
  29 |   for(ct in unique(meta.sc$celltype)){
  30 |     meta.sc[which(meta.sc$celltype == ct), "ct_num"] <- i
  31 |     i <- i+1
  32 |   }
  33 |   c.names <- as.character(unique(meta.sc$celltype))
  34 |   
  35 |   
  36 |   signal <- cell_signaling(data = matrix.sc, genes = rownames(matrix.sc), int.type = "paracrine",
  37 |                            species = species, cluster = meta.sc$ct_num, c.names = c.names, write = FALSE)
  38 |   inter.net <- inter_network(data = matrix.sc, signal = signal, genes = rownames(matrix.sc), 
  39 |                              cluster = meta.sc$ct_num, c.names = c.names, species = species,write = FALSE)
  40 |   
  41 |   used.time <- proc.time()-start.time
  42 |   used.memory <- mem_used()
  43 |   
  44 |   result <- inter.net$`full-network`
  45 |   result$interaction.type <- NULL
  46 |   result <- separate(result, 'ligand', c('Sender', 'Ligand'), sep = '\\.')
  47 |   result <- separate(result, 'receptor', c('Receiver','Receptor'), sep = '\\.')
  48 |   result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_')
  49 |   result <- distinct(result, all, .keep_all = TRUE)
  50 |   rownames(result) <- NULL
  51 |   
  52 |   result_record <- list(result=result, 
  53 |                         used_time = paste0(round(used.time[3]/60,3), ' min'), 
  54 |                         used_memory =  round(used.memory/1024/1024/1024,3))
  55 |   return(result_record)
  56 | }
  57 | 
  58 | ##############
  59 | ## CellChat ##
  60 | ##############
  61 | CellChat_function <- function(ser, species = 'human'){
  62 |   suppressMessages(library(Seurat))
  63 |   suppressMessages(library(CellChat))
  64 |   suppressMessages(library(tidyverse))
  65 |   suppressMessages(library(lobstr))
  66 |   set.seed(123)
  67 |   
  68 |   start.time <- proc.time()
  69 |   
  70 |   matrix.sc <- GetAssayData(ser, "data", "RNA")
  71 |   matrix.sc <- as.matrix(matrix.sc)
  72 |   meta.sc <- data.frame(celltype = ser$celltype, row.names = colnames(ser))
  73 |   meta.sc$celltype <- as.character(meta.sc$celltype)
  74 |   
  75 |   cellchat <- createCellChat(object = matrix.sc, meta = meta.sc, group.by = "celltype")
  76 |   if(species == 'human'){
  77 |     cellchat@DB <- CellChatDB.human
  78 |   }else{
  79 |     cellchat@DB <- CellChatDB.mouse
  80 |   }
  81 |   
  82 |   cellchat <- subsetData(cellchat) # This step is necessary even if using the whole database
  83 |   cellchat <- identifyOverExpressedGenes(cellchat)
  84 |   cellchat <- identifyOverExpressedInteractions(cellchat)
  85 |   result <- computeCommunProb(cellchat) %>% filterCommunication(.) %>% 
  86 |     subsetCommunication(.)
  87 |   
  88 |   used.time <- proc.time()-start.time
  89 |   used.memory <- mem_used()
  90 |   
  91 |   result <- result[,1:5]
  92 |   colnames(result) <- c('Sender','Receiver','Ligand', 'Receptor', 'LRscore')
  93 |   result <- result[which(result$Sender != result$Receiver),]
  94 |   result$Ligand <- gsub('_', '&', result$Ligand)
  95 |   result$Receptor <- gsub('_', '&', result$Receptor)
  96 |   result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_')
  97 |   result <- dplyr::distinct(result, all, .keep_all = TRUE)
  98 |   
  99 |   
 100 |   result_record <- list(result=result, 
 101 |                         used_time = paste0(round(used.time[3]/60,3), ' min'), 
 102 |                         used_memory =  round(used.memory/1024/1024/1024,3))
 103 |   return(result_record)
 104 | }
 105 | 
 106 | ########### 
 107 | ## iTALK ## top_genes was set to 50 (default)
 108 | ########### Only for human (LR prior databases); the LR prior databases can be designed by users (complex)
 109 | iTALK_function <- function(ser){
 110 |   suppressMessages(library(iTALK))
 111 |   suppressMessages(library(Seurat))
 112 |   suppressMessages(library(lobstr))
 113 |   set.seed(123)
 114 |   
 115 |   start.time <- proc.time()
 116 |   
 117 |   matrix.sc <- GetAssayData(ser, "data", "RNA")
 118 |   matrix.sc <- as.matrix(matrix.sc)
 119 |   matrix.sc <- as.data.frame(t(matrix.sc))
 120 |   matrix.sc$cell_type <- ser$celltype
 121 |   
 122 |   # find top 50 percent highly expressed genes
 123 |   highly_exprs_genes <- rawParse(matrix.sc,stats='mean')
 124 |   # find the ligand-receptor pairs from highly expressed genes
 125 |   comm.list<-c('growth factor','other','cytokine','checkpoint')
 126 |   
 127 |   result <- NULL
 128 |   for(comm.type in comm.list){
 129 |     res.tmp <- FindLR(highly_exprs_genes,datatype='mean count',comm_type=comm.type)
 130 |     res.tmp <- res.tmp[order(res.tmp$cell_from_mean_exprs*res.tmp$cell_to_mean_exprs,decreasing=T),]
 131 |     result<-rbind(result,res.tmp)
 132 |   }
 133 |   
 134 |   used.time <- proc.time()-start.time
 135 |   used.memory <- mem_used()
 136 |   
 137 |   result$LRscore <- result$cell_from_mean_exprs*result$cell_to_mean_exprs
 138 |   result[,c(3,5,7)] <- NULL
 139 |   colnames(result) <- c('Ligand', 'Receptor', 'Sender', 'Receiver', 'LRscore')
 140 |   result <- result[which(result$Receiver != result$Sender), ]
 141 |   result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_')
 142 |   result <- dplyr::distinct(result, all, .keep_all = TRUE)
 143 |   
 144 |   result_record <- list(result=result, 
 145 |                         used_time = paste0(round(used.time[3]/60,3), ' min'), 
 146 |                         used_memory =  round(used.memory/1024/1024/1024,3))
 147 |   return(result_record)
 148 | }
 149 | 
 150 | ############## Run in parallel with 10 cores 
 151 | ## CytoTalk ## Run in R 4.1.0 version
 152 | ############## 
 153 | CytoTalk_function <- function(ser, fpath, sender = NULL, receiver = NULL, species = 'human'){
 154 |   suppressMessages(library(CytoTalk))
 155 |   suppressMessages(library(Seurat))
 156 |   suppressMessages(library(lobstr))
 157 |   set.seed(123)
 158 |   
 159 |   # create conda environment
 160 |   if(F){
 161 |     library(reticulate)  # To install and call Python modules from R.
 162 |     conda_create(envname = "r_reticulate_CytoTalk", python_version = "3.7.3")  # Create a new Conda environment to facilitate the Python module installation.
 163 |     conda_install(envname = "r_reticulate_CytoTalk", "pybind11")  # Install two necessary Python modules for correctly compiling and using the "pcst_fast" Python module.
 164 |     conda_install(envname = "r_reticulate_CytoTalk", "numpy")
 165 |     conda_install(envname = "r_reticulate_CytoTalk", "git+https://github.com/fraenkel-lab/pcst_fast.git", pip = TRUE) # To install the "pcst_fast" module.
 166 |   }
 167 |   
 168 |   # input
 169 |   if(T){
 170 |     input_fpath <- paste0(fpath, '/input/')
 171 |     if(!dir.exists(input_fpath)){
 172 |       dir.create(input_fpath)
 173 |     }
 174 |     
 175 |     fpath.mat <- paste0(input_fpath, "counts.csv")
 176 |     fpath.meta <- paste0(input_fpath,"metadata.csv")
 177 |     
 178 |     if(T){
 179 |       norm.matrix <- as.matrix(GetAssayData(ser, "data", "RNA"))
 180 |       write.csv(norm.matrix, fpath.mat, quote=F)
 181 |       
 182 |       meta.data <- data.frame(rownames(ser@meta.data), ser@meta.data$celltype)
 183 |       colnames(meta.data) <- NULL
 184 |       meta.data <- as.matrix(meta.data)
 185 |       write.csv(meta.data, fpath.meta, quote=F, row.names = FALSE)
 186 |       rm(norm.matrix, meta.data, ser);gc()
 187 |     }
 188 |   }
 189 |   
 190 |   start.time <- proc.time()
 191 | 
 192 |   if(species == 'human'){
 193 |     pcg = CytoTalk::pcg_human
 194 |     lrp = CytoTalk::lrp_human
 195 |   }else{
 196 |     pcg = CytoTalk::pcg_mouse
 197 |     lrp = CytoTalk::lrp_mouse
 198 |   }
 199 |   
 200 |   lst.sc <- read_matrix_with_meta(fpath.mat, fpath.meta)
 201 |   celltype <- unique(lst.sc$cell_types)
 202 |   
 203 |   celltypes <- as.character(unique(ser$celltype))
 204 |   comb <- combn(celltypes, 2)
 205 |   comb <- t(comb)
 206 |   comb <- as.data.frame(comb)
 207 |   
 208 |   result <- list()
 209 |   for (i in 1:dim(comb)[[1]]) {
 210 |     tmp <- tryCatch(CytoTalk::run_cytotalk(lst.sc, comb[i, 1], comb[i, 2], cores = 10, 
 211 |                                            cutoff_a = 0.05, cutoff_b = 0.05,
 212 |                                            pcg = pcg, lrp = lrp),
 213 |                     error=function(e){NA}
 214 |     )
 215 |     cp <- paste(comb[i, 1], comb[i, 2], sep = "_")
 216 |     result[[cp]] <- tmp
 217 |   }
 218 |   
 219 |   used.time <- proc.time()-start.time
 220 |   used.memory <- mem_used()
 221 |   
 222 |   result[which(is.na(result))] <- NULL
 223 |   
 224 |   result <- lapply(result, function(res){res$pcst$final_network})
 225 |   result <- do.call(rbind, result)
 226 |   result <- result[which(result$node1_type!=result$node2_type),]
 227 |   if (species=='human') {
 228 |     result$node1 <- toupper(result$node1)
 229 |     result$node2 <- toupper(result$node2)
 230 |     result$node1 <- gsub("ORF", "orf", result$node1)
 231 |     result$node2 <- gsub("ORF", "orf", result$node2)
 232 |   }
 233 |   result <- result[,c(1:4,10)]
 234 |   colnames(result) <- c('Ligand', 'Receptor', 'Sender', 'Receiver', 'LRscore')
 235 |   result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_')
 236 |   result <- dplyr::distinct(result, all, .keep_all = TRUE)
 237 |   rownames(result) <- NULL
 238 |   
 239 |   result_record <- list(result=result, 
 240 |                         used_time = paste0(round(used.time[3]/60,3), ' min'), 
 241 |                         used_memory =  round(used.memory/1024/1024/1024,3))
 242 |   return(result_record)
 243 | }
 244 | 
 245 | ##############
 246 | ## CellCall ##
 247 | ##############
 248 | CellCall_function <- function(ser, n=3, species = 'human'){
 249 |   suppressMessages(library(Seurat))
 250 |   suppressMessages(library(cellcall))
 251 |   suppressMessages(library(lobstr))
 252 |   set.seed(123)
 253 |   
 254 |   start.time <- proc.time()
 255 |   
 256 |   if(species == 'human'){
 257 |     species <- "Homo sapiens"
 258 |   }else{
 259 |     species <- "Mus musculus"
 260 |   }
 261 |   
 262 |   matrix.sc <- GetAssayData(ser, "counts", "RNA")
 263 |   matrix.sc <- as.matrix(matrix.sc)
 264 |   colnames(matrix.sc) <- paste(colnames(matrix.sc), gsub("-", " ", ser$celltype), sep = "_")
 265 |   matrix.sc <- as.data.frame(matrix.sc)
 266 |   
 267 |   mt <- CreateNichConObject(data=matrix.sc,
 268 |                             names.field = n,
 269 |                             names.delim = "_",
 270 |                             source = "UMI",
 271 |                             scale.factor = 10^6,
 272 |                             Org = species,
 273 |                             project = "Microenvironment")
 274 |   
 275 |   mt <- TransCommuProfile(object = mt,
 276 |                           pValueCor = 0.05,
 277 |                           CorValue = 0.1,
 278 |                           topTargetCor=1,
 279 |                           p.adjust = 0.05,
 280 |                           use.type="mean",
 281 |                           method="weighted",
 282 |                           IS_core = TRUE,
 283 |                           Org = species)
 284 |   
 285 |   result <- mt@data$expr_l_r_log2_scale
 286 |   
 287 |   used.time <- proc.time()-start.time
 288 |   used.memory <- mem_used()
 289 |   
 290 |   result <- as.data.frame(result)
 291 |   result <- tibble::rownames_to_column(result, var = "LR")
 292 |   result <- tidyr::pivot_longer(result,cols = -LR, names_to = "sr", values_to = "value")
 293 |   result <- result[which(result$value > 0), ]
 294 |   result <- tidyr::separate(data = result, col = sr, into = c("Sender", "Receiver"), sep = "-")
 295 |   result <- tidyr::separate(data = result, col = LR, into = c("Ligand", "Receptor"), sep = "-")
 296 |   colnames(result)[5] <- 'LRscore'
 297 |   result <- result[which(result$Sender != result$Receiver), ]
 298 |   result$Ligand <- gsub(',', '&', result$Ligand)
 299 |   result$Receptor <- gsub(',', '&', result$Receptor)
 300 |   result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_')
 301 |   result <- dplyr::distinct(result, all, .keep_all = TRUE)
 302 |   
 303 |   
 304 |   result_record <- list(result=result, 
 305 |                         used_time = paste0(round(used.time[3]/60,3), ' min'), 
 306 |                         used_memory =  round(used.memory/1024/1024/1024,3))
 307 |   return(result_record)
 308 | }
 309 | 
 310 | ############### 
 311 | ## scSeqComm ## S_inter and S_intra were set to 0.8
 312 | ############### Run in parallel with 10 cores
 313 | scSeqComm_function <- function(ser, species = 'human'){
 314 |   suppressMessages(library(scSeqComm))
 315 |   suppressMessages(library(Seurat))
 316 |   suppressMessages(library(lobstr))
 317 |   set.seed(123)
 318 |   
 319 |   start.time <- proc.time()
 320 |   
 321 |   ## scRNA-seq dataset
 322 |   matrix.sc <- GetAssayData(ser, "data", "RNA")
 323 |   cell_cluster <- lapply(unique(ser$celltype), function(ct){
 324 |     meta <- ser@meta.data
 325 |     cells <- rownames(meta)[which(meta$celltype == ct)]
 326 |     cells
 327 |   })
 328 |   names(cell_cluster) <- unique(ser$celltype)
 329 |   
 330 |   if(species == 'human'){
 331 |     ## Ligand - receptor pairs
 332 |     LR_db <- LR_pairs_Jin_2020
 333 |     
 334 |     ## Transcriptional regulatory network——combined
 335 |     if(T){
 336 |       TF_TG_db <- c(TF_TG_HTRIdb, TF_TG_RegNetwork_High, TF_TG_RegNetwork_Med_High, 
 337 |                     TF_TG_TRRUSTv2, TF_TG_TRRUSTv2_HTRIdb_RegNetwork_High)
 338 |       tmp.db <- list()
 339 |       for (tf in unique(names(TF_TG_db))) {
 340 |         rep <- which(names(TF_TG_db) == tf)
 341 |         if(length(rep)>1){
 342 |           tmp.tg <- unique(unlist(TF_TG_db[rep]))
 343 |           tmp.db[[tf]] <- tmp.tg
 344 |         }else{
 345 |           tmp.db[[tf]] <- TF_TG_db[[tf]]
 346 |         }
 347 |       }
 348 |       TF_TG_db <- tmp.db
 349 |       rm(rep, tf, tmp.tg, tmp.db);gc()
 350 |     }
 351 |     
 352 |     ## Receptor-Transcription factor a-priori association from gene signaling networks
 353 |     TF_PPR_db <- TF_PPR_KEGG_human
 354 |   }else{
 355 |     ## Ligand - receptor pairs
 356 |     LR_db <- LR_pairs_Jin_2020_mouse
 357 |     
 358 |     ## Transcriptional regulatory network——combined
 359 |     if(T){
 360 |       TF_TG_db <- c(TF_TG_RegNetwork_High_mouse, TF_TG_RegNetwork_Med_High_mouse, 
 361 |                     TF_TG_TRRUSTv2_mouse, TF_TG_TRRUSTv2_RegNetwork_High_mouse)
 362 |       tmp.db <- list()
 363 |       for (tf in unique(names(TF_TG_db))) {
 364 |         rep <- which(names(TF_TG_db) == tf)
 365 |         if(length(rep)>1){
 366 |           tmp.tg <- unique(unlist(TF_TG_db[rep]))
 367 |           tmp.db[[tf]] <- tmp.tg
 368 |         }else{
 369 |           tmp.db[[tf]] <- TF_TG_db[[tf]]
 370 |         }
 371 |       }
 372 |       TF_TG_db <- tmp.db
 373 |       rm(rep, tf, tmp.tg, tmp.db);gc()
 374 |     }
 375 |     
 376 |     ## Receptor-Transcription factor a-priori association from gene signaling networks
 377 |     TF_PPR_db <- TF_PPR_KEGG_mouse
 378 |     
 379 |   }
 380 |   
 381 |   ## Intercellular and intracellular signaling analysis
 382 |   scSeqComm_res <- scSeqComm_analyze(gene_expr = matrix.sc,
 383 |                                      cell_group = cell_cluster,
 384 |                                      LR_pairs_DB = LR_db,
 385 |                                      TF_reg_DB = TF_TG_db, 
 386 |                                      R_TF_association = TF_PPR_db,
 387 |                                      N_cores = 10)
 388 |   result <- scSeqComm_res$comm_results
 389 |   result <- dplyr::filter(result, S_inter>0.8, S_intra>0.8)
 390 |   
 391 |   used.time <- proc.time()-start.time
 392 |   used.memory <- mem_used()
 393 |   
 394 |   result <- result[which(result$cluster_L != result$cluster_R),]
 395 |   result <- result[,c(1:2,4:5,9)]
 396 |   colnames(result) <- c('Ligand', 'Receptor', 'Sender', 'Receiver', 'LRscore')
 397 |   result$Ligand <- gsub(',', '&', result$Ligand)
 398 |   result$Receptor <- gsub(',', '&', result$Receptor)
 399 |   result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_')
 400 |   result <- dplyr::distinct(result, all, .keep_all = TRUE)
 401 |  
 402 |   
 403 |   result_record <- list(result=result, 
 404 |                         used_time = paste0(round(used.time[3]/60,3), ' min'), 
 405 |                         used_memory =  round(used.memory/1024/1024/1024,3))
 406 |   return(result_record)
 407 | }
 408 | 
 409 | ################
 410 | ## Connectome ##
 411 | ################
 412 | Connectome_function <- function(ser, species = 'human'){
 413 |   suppressMessages(library(Seurat))
 414 |   suppressMessages(library(Connectome))
 415 |   suppressMessages(library(lobstr))
 416 |   set.seed(123)
 417 |   
 418 |   start.time <- proc.time()
 419 |   
 420 |   if(species == 'human'){
 421 |     connectome.genes <- union(Connectome::ncomms8866_human$Ligand.ApprovedSymbol,
 422 |                               Connectome::ncomms8866_human$Receptor.ApprovedSymbol)
 423 |   }else{
 424 |     connectome.genes <- union(Connectome::ncomms8866_mouse$Ligand.ApprovedSymbol,
 425 |                               Connectome::ncomms8866_mouse$Receptor.ApprovedSymbol)
 426 |     species <- 'mouse'
 427 |   }
 428 |   
 429 |   genes <- connectome.genes[connectome.genes %in% rownames(ser)]
 430 |   ser <- ScaleData(ser,features = genes)
 431 |   sc.con <- CreateConnectome(ser,species = species, calculate.DOR = TRUE)
 432 |   result <- FilterConnectome(sc.con, max.p = 0.05, min.pct = 0.05, remove.na = T)
 433 |   
 434 |   used.time <- proc.time()-start.time
 435 |   used.memory <- mem_used()
 436 |   
 437 |   result <- result[which(result$source != result$target),]
 438 |   result <- result[,c(1:4, 17)]
 439 |   colnames(result) <- c('Sender', 'Receiver', 'Ligand', 'Receptor', 'LRscore')
 440 |   result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_')
 441 |   result <- dplyr::distinct(result, all, .keep_all = TRUE)
 442 |   
 443 |  
 444 |   result_record <- list(result=result, 
 445 |                         used_time = paste0(round(used.time[3]/60,3), ' min'), 
 446 |                         used_memory =  round(used.memory/1024/1024/1024,3))
 447 |   return(result_record)
 448 | }
 449 | 
 450 | ################
 451 | ## CellTalker ## min_expression was set to 0
 452 | ################ Only for human (LR prior databases)
 453 | CellTalker_function <- function(ser){
 454 |   suppressMessages(library(Seurat))
 455 |   suppressMessages(library(celltalker))
 456 |   suppressMessages(library(dplyr))
 457 |   suppressMessages(library(lobstr))
 458 |   suppressMessages(library(tidyr))
 459 |   source('./Step0_SharedInfo/CellTalker/celltalk_code.R')
 460 |   set.seed(123)
 461 |   
 462 |   start.time <- proc.time()
 463 |   
 464 |   result <- celltalk(input_object=ser,
 465 |                      metadata_grouping="celltype",
 466 |                      ligand_receptor_pairs=ramilowski_pairs,
 467 |                      number_cells_required=1,
 468 |                      min_expression=0,
 469 |                      max_expression=20000,
 470 |                      scramble_times=10)
 471 |   result <- result %>%
 472 |     mutate(fdr=p.adjust(p_val,method="fdr")) %>%
 473 |     filter(fdr < 0.05) %>%
 474 |     filter(p_val < 0.05) %>%
 475 |     filter(interact_ratio > 0)
 476 |   
 477 |   used.time <- proc.time()-start.time
 478 |   used.memory <- mem_used()
 479 |   
 480 |   result <- tidyr::separate(result, 'interaction', c('Ligand', 'Receptor'), sep = '_')
 481 |   result <- tidyr::separate(result, 'interaction_pairs', c('Sender', 'Receiver'), sep = '_')
 482 |   result <- result[,c(1:5)]; colnames(result)[5] <- 'LRscore'
 483 |   result <- result[which(result$Sender != result$Receiver),]
 484 |   result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_')
 485 |   result <- dplyr::distinct(result, all, .keep_all = TRUE)
 486 |   
 487 |   result_record <- list(result=result, 
 488 |                         used_time = paste0(round(used.time[3]/60,3), ' min'), 
 489 |                         used_memory =  round(used.memory/1024/1024/1024,3))
 490 |   return(result_record)
 491 | }
 492 | 
 493 | ############## 
 494 | ## ICELLNET ## filter.perc was set to 5
 495 | ############## Only for human (LR prior databases)
 496 | ICELLNET_function <- function(ser){
 497 |   suppressMessages(library(icellnet))
 498 |   suppressMessages(library(Seurat))
 499 |   suppressMessages(library(BiocGenerics))
 500 |   suppressMessages(library("org.Hs.eg.db"))
 501 |   suppressMessages(library("hgu133plus2.db"))
 502 |   suppressMessages(library(jetset))
 503 |   suppressMessages(library(dplyr))
 504 |   suppressMessages(library(gridExtra))
 505 |   suppressMessages(library(lobstr))
 506 |   set.seed(123)
 507 |   
 508 |   start.time <- proc.time()
 509 |   
 510 |   db=as.data.frame(read.csv('./Step0_SharedInfo/ICELLNET/ICELLNETdb.tsv',  sep="\t",
 511 |                             header = T, check.names=FALSE, stringsAsFactors = FALSE, na.strings = ""))
 512 |   
 513 |   ## Retrieve gene expression matrix
 514 |   # Taking into account the total nb of cells in each cluster
 515 |   # filter out the percent of genes less than 5%
 516 |   filter.perc=5
 517 |   average.clean= sc.data.cleaning(object = ser, db = db, filter.perc = filter.perc, save_file = F)
 518 |   ## Apply icellnet pipeline on cluster of interest
 519 |   data.icell=as.data.frame(gene.scaling(as.data.frame(average.clean), n=1, db=db))
 520 |   
 521 |   PC.data=as.data.frame(data.icell[, colnames(data.icell)], row.names = rownames(data.icell))
 522 |   PC.target=data.frame(Class = colnames(PC.data)[-dim(data.icell)[2]], 
 523 |                        ID = colnames(PC.data)[-dim(data.icell)[2]], 
 524 |                        Cell_type = colnames(PC.data)[-dim(data.icell)[2]])
 525 |   rownames(PC.target) = colnames(PC.data)[-dim(data.icell)[2]]
 526 |   
 527 |   PC.ct <- colnames(PC.data)[-dim(data.icell)[2]]
 528 |   CC.ct <- colnames(PC.data)[-dim(data.icell)[2]]
 529 |   
 530 |   result.all <- lapply(CC.ct, function(ct){
 531 |     ## Compute intercellular communication scores
 532 |     score.computation = icellnet.score(direction = "out", PC.data = PC.data, 
 533 |                                        CC.data = as.data.frame(data.icell[,ct], row.names = rownames(data.icell)),  
 534 |                                        PC.target = PC.target, PC = PC.ct[which(PC.ct!=ct)], CC.type = "RNAseq", 
 535 |                                        PC.type = "RNAseq",  db = db)
 536 |     lr <- as.matrix(score.computation[[2]][apply(score.computation[[2]], 1, function(y) any(!is.na(y))),])
 537 |     lr <- as.matrix(lr[which(rowSums(lr) > 0),])
 538 |     lr
 539 |   })
 540 |   
 541 |   names(result.all) <- colnames(PC.data)[-dim(data.icell)[2]]
 542 |   
 543 |   used.time <- proc.time()-start.time
 544 |   used.memory <- mem_used()
 545 |   
 546 |   result <- lapply(names(result.all), function(ct){
 547 |     lr <- result.all[[ct]]
 548 |     lr <- as.data.frame(lr)
 549 |     lr <- tibble::rownames_to_column(lr, "LR")
 550 |     colnames(lr) <- c("LR", paste(ct, colnames(lr)[2:dim(lr)[2]], sep = "_"))
 551 |     result.lr <- lr %>% tidyr::pivot_longer(cols = -LR, names_to = "sr", values_to = "LRscore")
 552 |     result.lr <- tidyr::separate(data = result.lr, col = sr, into = c("Sender", "Receiver"), sep = "_")
 553 |     result.lr <- tidyr::separate(data = result.lr, col = LR, into = c("Ligand", "Receptor"), sep = " / ")
 554 |     result.lr <- result.lr[which(result.lr$LRscore>0), ]
 555 |     result.lr <- result.lr[which(result.lr$Sender != result.lr$Receiver),]
 556 |     result.lr$Ligand <- gsub(' \\+ ', '&', result.lr$Ligand)
 557 |     result.lr$Receptor <- gsub(' \\+ ', '&', result.lr$Receptor)
 558 |     result.lr$all <- paste(result.lr$Sender, result.lr$Ligand, result.lr$Receiver, result.lr$Receptor, sep = '_')
 559 |     result.lr <- distinct(result.lr, all, .keep_all = TRUE)
 560 |   })
 561 |   result <- do.call(rbind, result)
 562 |   
 563 |   result_record <- list(result=result, 
 564 |                         used_time = paste0(round(used.time[3]/60,3), ' min'), 
 565 |                         used_memory =  round(used.memory/1024/1024/1024,3))
 566 |   result_record
 567 | }
 568 | 
 569 | ############## 
 570 | ## NicheNet ## top 20 ligands, top 250 targets of ligand
 571 | ############## Only for human (LR prior databases)
 572 | NicheNet_function <- function(ser, sender = NULL, receiver = NULL, lr = TRUE){
 573 |   suppressMessages(library(nichenetr))
 574 |   suppressMessages(library(Seurat))
 575 |   suppressMessages(library(tidyverse))
 576 |   suppressMessages(library(lobstr))
 577 |   set.seed(123)
 578 |   
 579 |   start.time <- proc.time()
 580 |   
 581 |   # load data
 582 |   if(T){
 583 |     ligand_target_matrix = readRDS("./Step0_SharedInfo/NicheNet/ligand_target_matrix.rds")
 584 |     
 585 |     lr_network = readRDS("./Step0_SharedInfo/NicheNet/lr_network.rds")
 586 |     
 587 |     weighted_networks = readRDS("./Step0_SharedInfo/NicheNet/weighted_networks.rds")
 588 |     weighted_networks_lr = weighted_networks$lr_sig %>% inner_join(lr_network %>% distinct(from,to), by = c("from","to"))
 589 |   }
 590 |   
 591 |   # find DEGs in all celltypes
 592 |   DE_table_receiver_all = FindAllMarkers(object = ser, min.pct = 0.05, 
 593 |                                          logfc.threshold = 0.15, test.use = "t", 
 594 |                                          return.thresh = 0.05)
 595 |   if(is.null(sender) & is.null(receiver)){
 596 |     sender_ct <- unique(ser$celltype)
 597 |     receiver_ct <- DE_table_receiver_all[which(DE_table_receiver_all$p_val_adj<=0.05), ]$cluster %>%
 598 |       unique(.) %>% as.character(.)
 599 |   }else{
 600 |     sender_ct <- sender
 601 |     receiver_ct <- receiver
 602 |   }
 603 |   
 604 |   result <- lapply(sender_ct, function(sender){
 605 |     tmp.result <- list()
 606 |     for (receiver in receiver_ct[which(receiver_ct != sender)]) {
 607 |       # define the sender and receiver celltypes
 608 |       expressed_genes_receiver = get_expressed_genes(receiver, ser, pct = 0.05)
 609 |       background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)]
 610 |       
 611 |       expressed_genes_sender = get_expressed_genes(sender, ser, pct = 0.05)
 612 |       
 613 |       # find DEGs in receiver cells
 614 |       DE_table_receiver <- DE_table_receiver_all[which(DE_table_receiver_all$cluster == receiver), ]
 615 |       geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05) %>% pull(gene)
 616 |       geneset_oi = geneset_oi %>% .[. %in% rownames(ligand_target_matrix)]
 617 |       
 618 |       # define the potential ligands
 619 |       ligands = lr_network %>% pull(from) %>% unique()
 620 |       receptors = lr_network %>% pull(to) %>% unique()
 621 |       
 622 |       expressed_ligands = intersect(ligands,expressed_genes_sender)
 623 |       expressed_receptors = intersect(receptors,expressed_genes_receiver)
 624 |       
 625 |       potential_ligands = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) %>% pull(from) %>% unique()
 626 |       
 627 |       # Perform NicheNet ligand activity analysis
 628 |       ligand_activities = predict_ligand_activities(geneset = geneset_oi, 
 629 |                                                     background_expressed_genes = background_expressed_genes, 
 630 |                                                     ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands)
 631 |       ligand_activities = ligand_activities %>% arrange(-pearson) %>% mutate(rank = rank(desc(pearson)))
 632 |       
 633 |       best_upstream_ligands = ligand_activities %>% top_n(20, pearson) %>% 
 634 |         arrange(-pearson) %>% pull(test_ligand) %>% unique()
 635 |       
 636 |       # Infer receptors and top-predicted target genes of top-ranked ligands
 637 |       ## Active target gene inference
 638 |       active_ligand_target_links_df = best_upstream_ligands %>% 
 639 |         lapply(get_weighted_ligand_target_links,geneset = geneset_oi, 
 640 |                ligand_target_matrix = ligand_target_matrix, n = 200) %>% 
 641 |         bind_rows() %>% drop_na()
 642 |       colnames(active_ligand_target_links_df) <- c("Ligand", 'Target', 'Score')
 643 |       active_ligand_target_links_df$Sender <- sender
 644 |       active_ligand_target_links_df$Receiver <- receiver
 645 |       
 646 |       ## Receptors of top-ranked ligands
 647 |       lr_network_top = lr_network %>% 
 648 |         filter(from %in% best_upstream_ligands & to %in% expressed_receptors) %>%
 649 |         distinct(from,to)
 650 |       
 651 |       best_upstream_receptors = lr_network_top %>% pull(to) %>% unique()
 652 |       
 653 |       lr_network_top_df_large = weighted_networks_lr %>% filter(from %in% best_upstream_ligands & to %in% best_upstream_receptors)
 654 |       colnames(lr_network_top_df_large) <- c('Ligand', 'Receptor', 'LRscore')
 655 |       lr_network_top_df_large$Sender <- sender
 656 |       lr_network_top_df_large$Receiver <- receiver
 657 |       
 658 |       cp <- paste(sender, receiver, sep = "_")
 659 |       
 660 |       if(lr){
 661 |         tmp.result[[cp]] <- lr_network_top_df_large
 662 |       }else{
 663 |         tmp.result[[cp]] <- active_ligand_target_links_df
 664 |       }
 665 |     }
 666 |     tmp.result <- do.call(rbind, tmp.result)
 667 |     rownames(tmp.result) <- NULL
 668 |     tmp.result
 669 |   })
 670 |   
 671 |   used.time <- proc.time()-start.time
 672 |   used.memory <- mem_used()
 673 |   
 674 |   result <- do.call(rbind, result)
 675 |   result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_')
 676 |   result <- dplyr::distinct(result, all, .keep_all = TRUE)
 677 |   
 678 |   result_record <- list(result=result, 
 679 |                         used_time = paste0(round(used.time[3]/60,3), ' min'), 
 680 |                         used_memory =  round(used.memory/1024/1024/1024,3))
 681 |   return(result_record)
 682 | }
 683 | 
 684 | ################## Run in parallel with 10 cores
 685 | ## CellPhoneDB2 ## Only for human (LR prior databases)
 686 | ################## 
 687 | CellPhoneDB2_function <- function(ser, fpath){
 688 |   suppressMessages(library(tidyr))
 689 |   suppressMessages(library(Seurat))
 690 |   suppressMessages(library(lobstr))
 691 |   set.seed(123)
 692 |   
 693 |   # input
 694 |   if(T){
 695 |     input_fpath <- paste0(fpath, '/input/')
 696 |     if(!dir.exists(input_fpath)){
 697 |       dir.create(input_fpath)
 698 |     }
 699 |     
 700 |     fpath.mat <- paste0(input_fpath, "counts.txt")
 701 |     fpath.meta <- paste0(input_fpath,"metadata.txt")
 702 |     
 703 |     norm.matrix <- as.matrix(GetAssayData(ser, "data", "RNA"))
 704 |     write.table(norm.matrix, fpath.mat, sep='\t', quote=F)
 705 |     
 706 |     meta.data <- data.frame(rownames(ser@meta.data), ser@meta.data$celltype)
 707 |     colnames(meta.data) <- NULL
 708 |     meta.data <- as.matrix(meta.data)
 709 |     write.table(meta.data, fpath.meta, sep='\t', quote=F, row.names=F)
 710 |     rm(norm.matrix, meta.data, ser);gc()
 711 |     
 712 |     output_fpath <- paste0(fpath, '/output/')
 713 |     if(!dir.exists(output_fpath)){
 714 |       dir.create(output_fpath)
 715 |     }
 716 |   }
 717 |   
 718 |   start.time <- proc.time()
 719 |   
 720 |   command <- paste('sh ../Script/Step0_LRToolsFunction/CellPhoneDB2_shell.sh', 
 721 |                    fpath.meta, fpath.mat, output_fpath, sep = ' ')
 722 |   system(command)
 723 |   
 724 |   used.time <- proc.time()-start.time
 725 |   used.memory <- mem_used()
 726 |   
 727 |   file.path <- paste(output_fpath, 'significant_means.txt', sep = '/')
 728 |   result <- read.table(file.path, header = TRUE, sep = "\t")
 729 |   result <- result[, c(2, 13:dim(result)[2])]
 730 |   
 731 |   result <- result %>% pivot_longer(cols = -interacting_pair, names_to = "sr", values_to = "LRscore")
 732 |   result <- dplyr::filter(result,  !is.na(LRscore))
 733 |   result <- tidyr::separate(data = result, col = sr, into = c("Sender", "Receiver"), sep = "\\.")
 734 |   
 735 |   # handle the complex information
 736 |   if(T){
 737 |     cpdb.complex <-read.csv("./Step0_SharedInfo/CellPhoneDB/complexes.csv")
 738 |     rec.complex <- cpdb.complex$complex_name[which(cpdb.complex$receptor == TRUE)]
 739 |     lig.complex <- cpdb.complex$complex_name[which(cpdb.complex$receptor != TRUE)]
 740 |     cpdb.complex <- cpdb.complex[,1:5]
 741 |     cpdb.gene <- read.csv("./Step0_SharedInfo/CellPhoneDB/genes.csv")
 742 |     cpdb.gene <- dplyr::distinct(cpdb.gene, gene_name, uniprot, hgnc_symbol, .keep_all = TRUE)
 743 |     cpdb.gene <- cpdb.gene[,1:2]
 744 |     
 745 |     tmp.cpdb.complex <- merge(cpdb.gene, cpdb.complex, by.y = "uniprot_1", by.x = "uniprot")
 746 |     tmp.cpdb.complex <- tmp.cpdb.complex[,-1]
 747 |     colnames(tmp.cpdb.complex)[1] <- "gene_1"
 748 |     tmp.cpdb.complex <- merge(cpdb.gene, tmp.cpdb.complex, by.y = "uniprot_2", by.x = "uniprot")
 749 |     tmp.cpdb.complex <- tmp.cpdb.complex[,-1]
 750 |     colnames(tmp.cpdb.complex)[1] <- "gene_2"
 751 |     tmp.cpdb.complex <- tmp.cpdb.complex[,-5]
 752 |     tmp.cpdb.complex <- merge(cpdb.gene, tmp.cpdb.complex, by.y = "uniprot_3", by.x = "uniprot", all.y = TRUE)
 753 |     tmp.cpdb.complex <- tmp.cpdb.complex[-117,]
 754 |     tmp.cpdb.complex <- tmp.cpdb.complex[,-1]
 755 |     colnames(tmp.cpdb.complex)[1] <- "gene_3"
 756 |     cpdb.complex <- tmp.cpdb.complex[,4:1]
 757 |     rm(cpdb.gene, tmp.cpdb.complex)
 758 |     
 759 |     cpdb.complex$gene <- paste(cpdb.complex$gene_1, cpdb.complex$gene_2, cpdb.complex$gene_3, sep = "&")
 760 |     cpdb.complex$gene <- gsub("&NA", "", cpdb.complex$gene)
 761 |     cpdb.complex <- cpdb.complex[,c("complex_name", "gene")]
 762 |   }
 763 |   
 764 |   # combine the complex information and result of cellphonedb
 765 |   if(T){
 766 |     complexes <- cpdb.complex$complex_name[which(stringr::str_count(cpdb.complex$complex_name, pattern = '_')==1)]
 767 |     for (complex in complexes) {
 768 |       change.pair <- which(grepl(complex, result$interacting_pair))
 769 |       if(length(change.pair)>0){
 770 |         change.complex <- gsub('_', '*', complex)
 771 |         result$interacting_pair <- gsub(complex, change.complex, result$interacting_pair)
 772 |       }
 773 |     }
 774 |     
 775 |     result <- tidyr::separate(data = result, col = interacting_pair, into = c("Ligand", "Receptor"), sep = "_")
 776 |     result$Ligand <- gsub('\\*', '_', result$Ligand)
 777 |     result$Receptor <- gsub('\\*', '_', result$Receptor)
 778 |     result <- merge(result, cpdb.complex, by.x = "Ligand", by.y = "complex_name", all.x = TRUE)
 779 |     result$Ligand[!is.na(result$gene)] <- result$gene[!is.na(result$gene)]
 780 |     result <- result[,-6]
 781 |     result <- merge(result, cpdb.complex, by.x = "Receptor", by.y = "complex_name", all.x = TRUE)
 782 |     result$Receptor[!is.na(result$gene)] <- result$gene[!is.na(result$gene)]
 783 |     result <- result[,-6]
 784 |     
 785 |     result.rec <- which(result$Receptor %in% rec.complex)
 786 |     result.lig <- which(result$Ligand %in% lig.complex)
 787 |     
 788 |     if(length(result.lig)>0){
 789 |       result <- result[-result.lig,]
 790 |       print(paste0('Delate：', length(result.lig)))
 791 |     }else if(length(result.rec)>0){
 792 |       result <- result[-result.rec,]
 793 |       print(paste0('Delate：', length(result.rec)))
 794 |     }
 795 |     
 796 |     result <- result[which(result$Sender!=result$Receiver),]
 797 |     result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_')
 798 |     result <- dplyr::distinct(result, all, .keep_all = TRUE)
 799 |   }
 800 |   
 801 |   result_record <- list(result=result, 
 802 |                         used_time = paste0(round(used.time[3]/60,3), ' min'), 
 803 |                         used_memory =  round(used.memory/1024/1024/1024,3))
 804 |   return(result_record)
 805 | }
 806 | 
 807 | ################## 
 808 | ## CellPhoneDB3 ## Only for human (LR prior databases)
 809 | ################## Run in parallel with 10 cores
 810 | CellPhoneDB3_function <- function(ser, fpath){
 811 |   suppressMessages(library(tidyr))
 812 |   suppressMessages(library(Seurat))
 813 |   suppressMessages(library(lobstr))
 814 |   set.seed(123)
 815 |   
 816 |   # input
 817 |   if(T){
 818 |     input_fpath <- paste0(fpath, '/input/')
 819 |     if(!dir.exists(input_fpath)){
 820 |       dir.create(input_fpath)
 821 |     }
 822 |     
 823 |     fpath.mat <- paste0(input_fpath, "counts.txt")
 824 |     fpath.meta <- paste0(input_fpath,"metadata.txt")
 825 |     fpath.deg <- paste0(input_fpath, 'degs.txt')
 826 |     
 827 |     norm.matrix <- as.matrix(GetAssayData(ser, "data", "RNA"))
 828 |     write.table(norm.matrix, fpath.mat, sep='\t', quote=F)
 829 |     
 830 |     meta.data <- data.frame(rownames(ser@meta.data), ser@meta.data$celltype)
 831 |     colnames(meta.data) <- NULL
 832 |     meta.data <- as.matrix(meta.data)
 833 |     write.table(meta.data, fpath.meta, sep='\t', quote=F, row.names=F)
 834 |     
 835 |     Idents(ser) <- ser$celltype
 836 |     DEGs <- FindAllMarkers(ser, test.use = 't',
 837 |                            verbose = F, only.pos = T, 
 838 |                            random.seed = 123, logfc.threshold = 0.15, 
 839 |                            min.pct = 0.05, return.thresh = 0.05)
 840 |     fDEGs = subset(DEGs, p_val_adj < 0.05 & avg_log2FC > 0.15)
 841 |     fDEGs = fDEGs[, c('cluster', 'gene', 'p_val_adj', 'p_val', 'avg_log2FC', 'pct.1', 'pct.2')]
 842 |     write.table(fDEGs, file = fpath.deg, sep = '\t', quote = F, row.names = F)
 843 |     
 844 |     rm(norm.matrix, meta.data, DEGs, fDEGs, ser);gc()
 845 |     
 846 |     output_fpath <- paste0(fpath, '/output/')
 847 |     if(!dir.exists(output_fpath)){
 848 |       dir.create(output_fpath)
 849 |     }
 850 |   }
 851 |   
 852 |   start.time <- proc.time()
 853 |   
 854 |   command <- paste('sh ../Script/Step0_LRToolsFunction/CellPhoneDB3_shell.sh', fpath.meta, fpath.mat, fpath.deg, output_fpath, sep = ' ')
 855 |   system(command)
 856 |   
 857 |   used.time <- proc.time()-start.time
 858 |   used.memory <- mem_used()
 859 |   
 860 |   file.path <- paste(output_fpath, 'significant_means.txt', sep = '/')
 861 |   
 862 |   result <- read.table(file.path, header = TRUE, sep = "\t")
 863 |   result <- result[, c(2, 13:dim(result)[2])]
 864 |   
 865 |   result <- result %>% pivot_longer(cols = -interacting_pair, names_to = "sr", values_to = "LRscore")
 866 |   result <- dplyr::filter(result,  !is.na(LRscore))
 867 |   result <- tidyr::separate(data = result, col = sr, into = c("Sender", "Receiver"), sep = "\\.")
 868 |   
 869 |   # handle the complex information
 870 |   if(T){
 871 |     cpdb.complex <-read.csv("./Step0_SharedInfo/CellPhoneDB/complexes.csv")
 872 |     rec.complex <- cpdb.complex$complex_name[which(cpdb.complex$receptor == TRUE)]
 873 |     lig.complex <- cpdb.complex$complex_name[which(cpdb.complex$receptor != TRUE)]
 874 |     cpdb.complex <- cpdb.complex[,1:5]
 875 |     cpdb.gene <- read.csv("./Step0_SharedInfo/CellPhoneDB/genes.csv")
 876 |     cpdb.gene <- dplyr::distinct(cpdb.gene, gene_name, uniprot, hgnc_symbol, .keep_all = TRUE)
 877 |     cpdb.gene <- cpdb.gene[,1:2]
 878 |     
 879 |     tmp.cpdb.complex <- merge(cpdb.gene, cpdb.complex, by.y = "uniprot_1", by.x = "uniprot")
 880 |     tmp.cpdb.complex <- tmp.cpdb.complex[,-1]
 881 |     colnames(tmp.cpdb.complex)[1] <- "gene_1"
 882 |     tmp.cpdb.complex <- merge(cpdb.gene, tmp.cpdb.complex, by.y = "uniprot_2", by.x = "uniprot")
 883 |     tmp.cpdb.complex <- tmp.cpdb.complex[,-1]
 884 |     colnames(tmp.cpdb.complex)[1] <- "gene_2"
 885 |     tmp.cpdb.complex <- tmp.cpdb.complex[,-5]
 886 |     tmp.cpdb.complex <- merge(cpdb.gene, tmp.cpdb.complex, by.y = "uniprot_3", by.x = "uniprot", all.y = TRUE)
 887 |     tmp.cpdb.complex <- tmp.cpdb.complex[-117,]
 888 |     tmp.cpdb.complex <- tmp.cpdb.complex[,-1]
 889 |     colnames(tmp.cpdb.complex)[1] <- "gene_3"
 890 |     cpdb.complex <- tmp.cpdb.complex[,4:1]
 891 |     rm(cpdb.gene, tmp.cpdb.complex)
 892 |     
 893 |     cpdb.complex$gene <- paste(cpdb.complex$gene_1, cpdb.complex$gene_2, cpdb.complex$gene_3, sep = "&")
 894 |     cpdb.complex$gene <- gsub("&NA", "", cpdb.complex$gene)
 895 |     cpdb.complex <- cpdb.complex[,c("complex_name", "gene")]
 896 |   }
 897 |   
 898 |   # combine the complex information and result of cellphonedb
 899 |   if(T){
 900 |     complexes <- cpdb.complex$complex_name[which(stringr::str_count(cpdb.complex$complex_name, pattern = '_')==1)]
 901 |     for (complex in complexes) {
 902 |       change.pair <- which(grepl(complex, result$interacting_pair))
 903 |       if(length(change.pair)>0){
 904 |         change.complex <- gsub('_', '*', complex)
 905 |         result$interacting_pair <- gsub(complex, change.complex, result$interacting_pair)
 906 |       }
 907 |     }
 908 |     
 909 |     result <- tidyr::separate(data = result, col = interacting_pair, into = c("Ligand", "Receptor"), sep = "_")
 910 |     result$Ligand <- gsub('\\*', '_', result$Ligand)
 911 |     result$Receptor <- gsub('\\*', '_', result$Receptor)
 912 |     result <- merge(result, cpdb.complex, by.x = "Ligand", by.y = "complex_name", all.x = TRUE)
 913 |     result$Ligand[!is.na(result$gene)] <- result$gene[!is.na(result$gene)]
 914 |     result <- result[,-6]
 915 |     result <- merge(result, cpdb.complex, by.x = "Receptor", by.y = "complex_name", all.x = TRUE)
 916 |     result$Receptor[!is.na(result$gene)] <- result$gene[!is.na(result$gene)]
 917 |     result <- result[,-6]
 918 |     
 919 |     result.rec <- which(result$Receptor %in% rec.complex)
 920 |     result.lig <- which(result$Ligand %in% lig.complex)
 921 |     
 922 |     if(length(result.lig)>0){
 923 |       result <- result[-result.lig,]
 924 |       print(paste0('Delate：', length(result.lig)))
 925 |     }else if(length(result.rec)>0){
 926 |       result <- result[-result.rec,]
 927 |       print(paste0('Delate：', length(result.rec)))
 928 |     }
 929 |     
 930 |     result <- result[which(result$Sender!=result$Receiver),]
 931 |     result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_')
 932 |     result <- dplyr::distinct(result, all, .keep_all = TRUE)
 933 |   }
 934 |   
 935 |   result_record <- list(result=result, 
 936 |                         used_time = paste0(round(used.time[3]/60,3), ' min'), 
 937 |                         used_memory =  round(used.memory/1024/1024/1024,3))
 938 |   return(result_record)
 939 | }
 940 | 
 941 | ########### 
 942 | ## NATMI ## Run in parallel with 10 cores
 943 | ########### 
 944 | NATMI_function <- function(ser, fpath, species = 'human'){
 945 |   suppressMessages(library(Seurat))
 946 |   suppressMessages(library(lobstr))
 947 |   set.seed(123)
 948 |   
 949 |   input_fpath <- paste0(fpath, '/input/')
 950 |   if(!dir.exists(input_fpath)){
 951 |     dir.create(input_fpath)
 952 |   }
 953 |   
 954 |   output_fpath <- paste0(fpath, '/output/')
 955 |   if(!dir.exists(output_fpath)){
 956 |     dir.create(output_fpath)
 957 |   }
 958 |   
 959 |   fpath.mat <- paste0(input_fpath, "counts.csv")
 960 |   write.csv(100 * (exp(as.matrix(GetAssayData(object = ser, assay = "RNA", slot = "data"))) - 1), 
 961 |             fpath.mat, row.names = T)
 962 |   meta <- data.frame(Cell = colnames(ser), Annotation = ser$celltype)
 963 |   fpath.meta <- paste0(input_fpath, "metadata.csv")
 964 |   write.csv(meta,fpath.meta, row.names = FALSE)
 965 |   rm(ser);gc()
 966 |   
 967 |   start.time <- proc.time()
 968 |   
 969 |   if(species != 'human'){
 970 |     species = 'mouse'
 971 |   }
 972 |   
 973 |   command <- paste('sh ../Script/Step0_LRToolsFunction/NATMI_shell.sh', 
 974 |                    species, fpath.mat, fpath.meta, output_fpath, sep = ' ')
 975 |   system(command)
 976 |   
 977 |   used.time <- proc.time()-start.time
 978 |   used.memory <- mem_used()
 979 |   
 980 |   result_path <- paste0(output_fpath, 'Edges_lrc2p.csv')
 981 |   result <- read.csv(result_path)
 982 |   result <- result[,c(1:4, 6:20)]
 983 |   result <- dplyr::filter(result, Ligand.detection.rate > 0.05, Receptor.detection.rate > 0.05)
 984 |   result <- result[,c(1:4, 16)]
 985 |   colnames(result) <- c("Sender", "Ligand", "Receptor", "Receiver", "LRscore")
 986 |   result <- result[which(result$Sender!=result$Receiver),]
 987 |   result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_')
 988 |   result <- dplyr::distinct(result, all, .keep_all = TRUE)
 989 |   
 990 |   result_record <- list(result=result, 
 991 |                         used_time = paste0(round(used.time[3]/60,3), ' min'), 
 992 |                         used_memory =  round(used.memory/1024/1024/1024,3))
 993 |   return(result_record)
 994 | }
 995 | 
 996 | ############### 
 997 | ## scConnect ## 
 998 | ###############
 999 | scConnect_function <- function(ser, fpath, species = 'human'){
1000 |   suppressMessages(library(Seurat))
1001 |   suppressMessages(library(lobstr))
1002 |   set.seed(123)
1003 |   
1004 |   input_fpath <- paste0(fpath, '/input/')
1005 |   if(!dir.exists(input_fpath)){
1006 |     dir.create(input_fpath)
1007 |   }
1008 |   
1009 |   fpath.mat <- paste0(input_fpath, "counts.csv")
1010 |   norm.matrix <- as.matrix(GetAssayData(ser, "data", "RNA"))
1011 |   write.csv(norm.matrix, fpath.mat, quote=F)
1012 |   
1013 |   fpath.meta <- paste0(input_fpath, "metadata.csv")
1014 |   cell.meta <- data.frame(Cell = rownames(ser@meta.data), Annotation = ser$celltype)
1015 |   write.csv(cell.meta, fpath.meta, quote=F, row.names = FALSE)
1016 |   rm(norm.matrix, cell.meta, ser);gc()
1017 |   
1018 |   start.time <- proc.time()
1019 |   
1020 |   if(species == 'human'){
1021 |     species = 'hsapiens'
1022 |   }else{
1023 |     species = 'mmusculus'
1024 |   }
1025 |   
1026 |   output_fpath <- paste0(fpath, '/output/')
1027 |   if(!dir.exists(output_fpath)){
1028 |     dir.create(output_fpath)
1029 |   }
1030 |   output_fpath <- paste0(output_fpath, 'result.csv')
1031 |   
1032 |   command <- paste('sh ../Script/Step0_LRToolsFunction/scConnect_shell.sh', 
1033 |                    fpath.mat, fpath.meta, species, output_fpath, sep = ' ')
1034 |   system(command)
1035 |   
1036 |   used.time <- proc.time()-start.time
1037 |   used.memory <- mem_used()
1038 |   
1039 |   # handle result
1040 |   if(T){
1041 |     result <- read.csv(output_fpath)
1042 |     result <- result[,-1]
1043 |     
1044 |     ligands <- read.csv("./Step0_SharedInfo/scConnect/ligands.csv", header = T)
1045 |     ligands <- ligands[,-c(1, 3, 9)]
1046 |     result <- merge(ligands, result,by = "ligand")
1047 |     colnames(result)[2] <- "ligand_gene"
1048 |     result <- result[,-c(3:6)]
1049 |     
1050 |     
1051 |     receptors <- read.csv('./Step0_SharedInfo/scConnect/receptors.csv')
1052 |     receptors <- dplyr::distinct(receptors, receptor, gene, .keep_all = FALSE)
1053 |     result <- merge(receptors, result, by = 'receptor')
1054 |     colnames(result)[2] <- 'receptor_gene'
1055 |     result <- result[which(result$ligand_pval<0.05), ]
1056 |     result <- result[which(result$receptor_pval<0.05),]
1057 |     
1058 |     result <- result[,c("sender", "reciever", "ligand_gene", "receptor_gene", "score")]
1059 |     colnames(result) <- c("Sender", "Receiver", "Ligand", "Receptor", "LRscore")
1060 |     result <- result[which(result$Sender!=result$Receiver),]
1061 |     result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_')
1062 |     result <- dplyr::distinct(result, all, .keep_all = TRUE)
1063 |   }
1064 |   
1065 |   
1066 |   result_record <- list(result=result, 
1067 |                         used_time = paste0(round(used.time[3]/60,3), ' min'), 
1068 |                         used_memory =  round(used.memory/1024/1024/1024,3))
1069 |   return(result_record)
1070 | }
1071 | 
1072 | ###############
1073 | ## cell2cell ## Only for human (LR prior databases); the LR prior databases can be designed by users (complex)
1074 | ############### 
1075 | cell2cell_function <- function(ser, fpath, species = 'human'){
1076 |   suppressMessages(library(Seurat))
1077 |   suppressMessages(library(lobstr))
1078 |   set.seed(123)
1079 |   
1080 |   input_fpath <- paste0(fpath, '/input/')
1081 |   if(!dir.exists(input_fpath)){
1082 |     dir.create(input_fpath)
1083 |   }
1084 |   
1085 |   fpath.mat <- paste0(input_fpath, "counts.csv")
1086 |   norm.matrix <- as.matrix(GetAssayData(ser, "data", "RNA"))
1087 |   write.csv(norm.matrix, fpath.mat, quote=F)
1088 |   
1089 |   fpath.meta <- paste0(input_fpath, "metadata.csv")
1090 |   cell.meta <- data.frame(Cell = rownames(ser@meta.data), Annotation = ser$celltype)
1091 |   write.csv(cell.meta, fpath.meta, quote=F, row.names = FALSE)
1092 |   rm(norm.matrix, cell.meta, ser);gc()
1093 |   
1094 |   output_fpath <- paste0(fpath, '/output/')
1095 |   if(!dir.exists(output_fpath)){
1096 |     dir.create(output_fpath)
1097 |   }
1098 |   
1099 |   start.time <- proc.time()
1100 |   
1101 |   if(species == 'human'){
1102 |     fpath.lr <- './Step0_SharedInfo/cell2cell/lr_human.csv'
1103 |   }else{
1104 |     fpath.lr <- './Step0_SharedInfo/cell2cell/lr_human.csv'
1105 |   }
1106 |   
1107 |   command <- paste('sh ../Script/Step0_LRToolsFunction/cell2cell_shell.sh', fpath.mat, fpath.meta, 
1108 |                    fpath.lr, output_fpath, sep = ' ')
1109 |   system(command)
1110 |   used.time <- proc.time()-start.time
1111 |   used.memory <- mem_used()
1112 |   
1113 |   result.pval <- read.csv(paste0(output_fpath, 'ccc_pval.csv'))
1114 |   result.pval <- tidyr::pivot_longer(result.pval, -X, names_to = 'sr', values_to = 'pvalue')
1115 |   result.pval <- tidyr::separate(result.pval, X, c('Ligand', 'Receptor'), sep = ',')
1116 |   result.pval$Ligand <- gsub("\\(\\'", '', result.pval$Ligand)
1117 |   result.pval$Ligand <- gsub("\\'", '', result.pval$Ligand)
1118 |   result.pval$Receptor <- gsub("\\'\\)", '', result.pval$Receptor)
1119 |   result.pval$Receptor <- gsub("\\'", '', result.pval$Receptor)
1120 |   result.pval <- tidyr::separate(result.pval, sr, c('Sender', 'Receiver'), sep = '\\.')
1121 |   result.pval$all <- paste(result.pval$Sender, result.pval$Ligand, result.pval$Receiver, result.pval$Receptor, sep = '_')
1122 |   
1123 |   result.value <- read.csv(paste0(output_fpath, 'communication_matrix.csv'))
1124 |   result.value <- tidyr::pivot_longer(result.value, -X, names_to = 'sr', values_to = 'value')
1125 |   result.value <- tidyr::separate(result.value, X, c('Ligand', 'Receptor'), sep = ',')
1126 |   result.value$Ligand <- gsub("\\(\\'", '', result.value$Ligand)
1127 |   result.value$Ligand <- gsub("\\'", '', result.value$Ligand)
1128 |   result.value$Receptor <- gsub("\\'\\)", '', result.value$Receptor)
1129 |   result.value$Receptor <- gsub("\\'", '', result.value$Receptor)
1130 |   result.value <- tidyr::separate(result.value, sr, c('Sender', 'Receiver'), sep = '\\.')
1131 |   result.value$all <- paste(result.value$Sender, result.value$Ligand, result.value$Receiver, result.value$Receptor, sep = '_')
1132 |   result.value <- result.value[,c('all', 'value')]
1133 |   
1134 |   result <- merge(result.pval, result.value, by = 'all')
1135 |   result <- result[which(result$Sender != result$Receiver),]
1136 |   result <- dplyr::distinct(result, all, .keep_all = TRUE)
1137 |   result <- result[which(result$value>0), ]
1138 |   result <- result[which(result$pvalue<0.05),]
1139 |   result <- result[,1:5]
1140 |   
1141 |   result_record <- list(result=result, 
1142 |                         used_time = paste0(round(used.time[3]/60,3), ' min'), 
1143 |                         used_memory =  round(used.memory/1024/1024/1024,3))
1144 |   return(result_record) 
1145 | }
1146 | 
1147 | 
1148 | ############
1149 | ## Domino ## Parameters setting: max_tf_per_clust = 10, max_rec_per_tf = 10
1150 | ############ Run in parallel with 30 cores (pyscenic)
1151 | Domino_function <- function(ser, fpath, species = 'hg38'){
1152 |   suppressMessages(library(Seurat))
1153 |   suppressMessages(library(domino))
1154 |   suppressMessages(library(dplyr))
1155 |   suppressMessages(library(lobstr))
1156 |   set.seed(123)
1157 |   
1158 |   input_fpath <- paste0(fpath, '/input/')
1159 |   if(!dir.exists(input_fpath)){
1160 |     dir.create(input_fpath)
1161 |   }
1162 |   
1163 |   fpath.mat <- paste0(input_fpath, 'counts.csv')
1164 |   write.csv(t(as.matrix(ser@assays$RNA@counts)), file = fpath.mat)
1165 |   
1166 |   start.time <- proc.time()
1167 |   
1168 |   if(species == 'hg38'){
1169 |     fpath.tfs <- './Step0_SharedInfo/Domino/hg38/hs_hgnc_curated_tfs.txt'
1170 |     fpath.feather.1 <- './Step0_SharedInfo/Domino/hg38/hg38__refseq-r80__10kb_up_and_down_tss.mc9nr.genes_vs_motifs.rankings.feather'
1171 |     fpath.feather.2 <- './Step0_SharedInfo/Domino/hg38/hg38__refseq-r80__500bp_up_and_100bp_down_tss.mc9nr.genes_vs_motifs.rankings.feather'
1172 |     fpath.motif <- './Step0_SharedInfo/Domino/hg38/motifs-v9-nr.hgnc-m0.001-o0.0.tbl'
1173 |   }else if(species == 'hg38' & grepl('pbmc', fpath)){
1174 |     fpath.tfs <- './Step0_SharedInfo/Domino/hg38/hs_hgnc_curated_tfs.txt'
1175 |     fpath.feather.1 <- './Step0_SharedInfo/Domino/hg38/hg38__refseq-r80__10kb_up_and_down_tss.mc9nr.genes_vs_motifs.rankings.feather'
1176 |     fpath.feather.2 <- './Step0_SharedInfo/Domino/hg38/hg38__refseq-r80__500bp_up_and_100bp_down_tss.mc9nr.genes_vs_motifs.rankings.feather'
1177 |     fpath.motif <- './Step0_SharedInfo/Domino/hg38/motifs-v9-nr.hgnc-m0.001-o0.0.tbl'
1178 |   }else if(species == 'hg19'){
1179 |     fpath.tfs <- './Step0_SharedInfo/Domino/hg38/hs_hgnc_curated_tfs.txt'
1180 |     fpath.feather.1 <- './Step0_SharedInfo/Domino/hg19/hg19-500bp-upstream-10species.mc9nr.genes_vs_motifs.rankings.feather'
1181 |     fpath.feather.2 <- './Step0_SharedInfo/Domino/hg19/hg19-500bp-upstream-7species.mc9nr.genes_vs_motifs.rankings.feather'
1182 |     fpath.feather.3 <- './Step0_SharedInfo/Domino/hg19/hg19-tss-centered-10kb-10species.mc9nr.genes_vs_motifs.rankings.feather'
1183 |     fpath.feather.4 <- './Step0_SharedInfo/Domino/hg19/hg19-tss-centered-10kb-7species.mc9nr.genes_vs_motifs.rankings.feather'
1184 |     fpath.feather.5 <- './Step0_SharedInfo/Domino/hg19/hg19-tss-centered-5kb-10species.mc9nr.genes_vs_motifs.rankings.feather'
1185 |     fpath.feather.6 <- './Step0_SharedInfo/Domino/hg19/hg19-tss-centered-5kb-7species.mc9nr.genes_vs_motifs.rankings.feather'
1186 |     fpath.motif <- './Step0_SharedInfo/Domino/hg38/motifs-v9-nr.hgnc-m0.001-o0.0.tbl'
1187 |   }else if(species == 'mm9'){
1188 |     fpath.tfs <- './Step0_SharedInfo/Domino/mm9/mm_mgi_tfs.txt'
1189 |     fpath.feather.1 <- './Step0_SharedInfo/Domino/mm9/mm9-500bp-upstream-10species.mc9nr.genes_vs_motifs.rankings.feather'
1190 |     fpath.feather.2 <- './Step0_SharedInfo/Domino/mm9/mm9-500bp-upstream-7species.mc9nr.genes_vs_motifs.rankings.feather'
1191 |     fpath.feather.3 <- './Step0_SharedInfo/Domino/mm9/mm9-tss-centered-10kb-10species.mc9nr.genes_vs_motifs.rankings.feather'
1192 |     fpath.feather.4 <- './Step0_SharedInfo/Domino/mm9/mm9-tss-centered-10kb-7species.mc9nr.genes_vs_motifs.rankings.feather'
1193 |     fpath.feather.5 <- './Step0_SharedInfo/Domino/mm9/mm9-tss-centered-5kb-10species.mc9nr.genes_vs_motifs.rankings.feather'
1194 |     fpath.feather.6 <- './Step0_SharedInfo/Domino/mm9/mm9-tss-centered-5kb-7species.mc9nr.genes_vs_motifs.rankings.feather'
1195 |     fpath.motif <- './Step0_SharedInfo/Domino/mm9/motifs-v9-nr.mgi-m0.001-o0.0.tbl'
1196 |   }else if(species == 'mm10'){
1197 |     fpath.tfs <- './Step0_SharedInfo/Domino/mm9/mm_mgi_tfs.txt'
1198 |     fpath.feather.1 <- './Step0_SharedInfo/Domino/mm10/mm10_refseq-r80_10kb_up_and_down_tss.mc9nr.genes_vs_motifs.rankings.feather'
1199 |     fpath.feather.2 <- './Step0_SharedInfo/Domino/mm10/mm10_refseq-r80_500bp_up_and_100bp_down_tss.mc9nr.genes_vs_motifs.rankings.feather'
1200 |     fpath.motif <- './Step0_SharedInfo/Domino/mm9/motifs-v9-nr.mgi-m0.001-o0.0.tbl'
1201 |   }
1202 | 
1203 |   fpath.adj <- paste0(input_fpath, 'adj.tsv')
1204 |   fpath.reg <- paste0(input_fpath, 'reg.csv')
1205 |   fpath.auc <- paste0(input_fpath, 'auc_mtx.csv')
1206 |   
1207 |   if(species == 'hg19' | species == 'mm9'){
1208 |     command <- paste('sh ../Script/Step0_LRToolsFunction/Domino_shell1.sh', 
1209 |                      fpath.adj, fpath.mat, fpath.reg, fpath.auc, 
1210 |                      fpath.tfs, fpath.feather.1, fpath.feather.2, fpath.feather.3,
1211 |                      fpath.feather.4, fpath.feather.5, fpath.feather.6,
1212 |                      fpath.motif, sep = ' ')
1213 |   }else if(species == 'hg38' | species == 'mm10'){
1214 |     command <- paste('sh ../Script/Step0_LRToolsFunction/Domino_shell2.sh', 
1215 |                      fpath.adj, fpath.mat, fpath.reg, fpath.auc, 
1216 |                      fpath.tfs, fpath.feather.1, fpath.feather.2,
1217 |                      fpath.motif, sep = ' ')
1218 |   }
1219 |   
1220 |   system(command)
1221 |   
1222 |   # run domino
1223 |   if(T){
1224 |     auc = t(read.table(fpath.auc, header = TRUE, row.names = 1, stringsAsFactors = FALSE, sep = ','))
1225 |     
1226 |     ser <- ser[, colnames(auc)]
1227 |     ser <- ScaleData(ser, features = rownames(ser))
1228 |     
1229 |     source("./Step0_SharedInfo/Domino/create_domino.R") 
1230 |     # line 200 of rscript of creat_domino function: change tf_genes = df[row, 10] to tf_genes = df[row, 9]
1231 |     dom = create_domino(signaling_db = './Step0_SharedInfo/CellPhoneDB', 
1232 |                         features = auc, counts = ser@assays$RNA@counts, z_scores = ser@assays$RNA@scale.data, 
1233 |                         clusters = ser@active.ident, df = fpath.reg)
1234 |     dom = build_domino(dom, min_tf_pval = 0.05, max_tf_per_clust = 10, max_rec_per_tf = 10)
1235 |   }
1236 |   
1237 |   used.time <- proc.time()-start.time
1238 |   used.memory <- mem_used()
1239 |   
1240 |   # bulid ligand-receptor dataframe
1241 |   if(T){
1242 |     lig_to_rec <- data.frame()
1243 |     for (rec in names(dom@linkages$rec_lig)) {
1244 |       lig <- dom@linkages$rec_lig[[rec]]
1245 |       
1246 |       if(length(lig)>0){
1247 |         lig.rec <- data.frame(ligand = lig, receptor = rep(rec, length(lig)))
1248 |       }else{
1249 |         lig.rec <- data.frame(ligand = lig, receptor = rec)
1250 |       }
1251 |       
1252 |       lig_to_rec <- rbind(lig_to_rec, lig.rec)
1253 |     }
1254 |     lig_to_rec <- lig_to_rec[which(lig_to_rec$ligand!=""),]
1255 |   }
1256 |   
1257 |   # handle result
1258 |   if(T){
1259 |     result <- lapply(levels(dom@clusters), function(reciever){
1260 |       print(reciever)
1261 |       # get ligands of sender cells which are communicated with reciever cells
1262 |       sender.ligands <- dom@cl_signaling_matrices[[reciever]] %>%
1263 |         as.data.frame(.) %>% tibble::rownames_to_column(var = "ligand") %>%
1264 |         tidyr::pivot_longer(cols = -ligand, names_to = "sender", values_to = "expression") %>%
1265 |         .[which(.$expression>0), ]
1266 |       sender.ligands$sender <- stringr::str_replace_all(sender.ligands$sender, "L_", "")
1267 |       sender.ligands <- sender.ligands[, -3]
1268 |       
1269 |       if(dim(sender.ligands)[1] != 0){
1270 |         # get tfs of reciever cells
1271 |         reciever.tfs <- dom@linkages$clust_tf[[reciever]]
1272 |         rec_tf <- data.frame()
1273 |         for(tf in reciever.tfs){
1274 |           rec <- dom@linkages$tf_rec[[tf]]
1275 |           
1276 |           if(length(rec)>0){
1277 |             rec.tf <- data.frame(receptor = rec, tf = rep(tf, length(rec)))
1278 |           }else{
1279 |             rec.tf <- data.frame()
1280 |           }
1281 |           rec_tf <- rbind(rec_tf, rec.tf)
1282 |         }
1283 |         
1284 |         lig_rec_tf <- merge(lig_to_rec, rec_tf, by = "receptor")
1285 |         lig_rec_tf <- merge(sender.ligands, lig_rec_tf, by = "ligand")
1286 |         lig_rec_tf$reciever <- reciever
1287 |         lig_rec_tf$tf <- stringr::str_replace_all(lig_rec_tf$tf, "\\...", "")
1288 |       }else{
1289 |         lig_rec_tf <- NA
1290 |       }
1291 |       lig_rec_tf
1292 |     })
1293 |     
1294 |     result[which(is.na(result))] <- NULL
1295 |     
1296 |     result <- do.call(rbind, result)
1297 |     result <- result[which(result$sender != result$reciever),]
1298 |     result <- result[,-4]
1299 |     colnames(result) <- c('Ligand', 'Sender', 'Receptor', 'Receiver')
1300 |     result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_')
1301 |     result <- dplyr::distinct(result, all, .keep_all = TRUE)
1302 |   }
1303 |   
1304 |   result_record <- list(result=result, 
1305 |                         used_time = paste0(round(used.time[3]/60,3), ' min'), 
1306 |                         used_memory =  round(used.memory/1024/1024/1024,3))
1307 |   return(result_record)
1308 | }
1309 | 
1310 | #############
1311 | ## scMLnet ## 
1312 | #############
1313 | scMLnet_function <- function(ser, sender = NULL, receiver = NULL){
1314 |   pacman::p_unload(Seurat)
1315 |   suppressMessages(library(Seurat, lib.loc = '/home/ljx/R/x86_64-redhat-linux-gnu-library/library2/'))
1316 |   suppressMessages(library(scMLnet))
1317 |   suppressMessages(library(lobstr))
1318 |   set.seed(123)
1319 |   
1320 |   start.time <- proc.time()
1321 |   
1322 |   # scMLnet v0.2.0
1323 |   # pacakge: https://github.com/SunXQlab/scMLnet2.0
1324 |   GCMat<- GetAssayData(ser, "counts", "RNA")
1325 |   BarCluTable <- data.frame(Barcode = colnames(ser), Cluster = ser$celltype)
1326 |   types <- unique(BarCluTable$Cluster)
1327 |   
1328 |   LigRecLib <- read.table("./Step0_SharedInfo/scMLnet/LigRec.txt", header = T)
1329 |   colnames(LigRecLib)[2:3] <- c("source", "target")
1330 |   TFTarLib <- read.table("./Step0_SharedInfo/scMLnet/TFTargetGene.txt", header = T)
1331 |   colnames(TFTarLib)[1:2] <- c("source", "target")
1332 |   RecTFLib <- read.table("./Step0_SharedInfo/scMLnet/RecTF.txt", header = T)
1333 |   colnames(RecTFLib)[1:2] <- c("source", "target")
1334 |   
1335 |   if(is.null(sender) & is.null(receiver)){
1336 |     sender_ct <- types
1337 |     receiver_ct <- types
1338 |   }else{
1339 |     sender_ct <- sender
1340 |     receiver_ct <- receiver
1341 |   }
1342 |   
1343 |   result <- list()
1344 |   for (LigClu in sender_ct) {
1345 |     for (RecClu in receiver_ct[which(receiver_ct != LigClu)]) {
1346 |       netList <- tryCatch(RunMLnet(data = GCMat, BarCluTable = BarCluTable, 
1347 |                                    RecClu = RecClu, LigClu = LigClu,
1348 |                                    LigRec.DB = LigRecLib, 
1349 |                                    TFTG.DB = TFTarLib,
1350 |                                    RecTF.DB = RecTFLib),
1351 |                           error=function(e){NA}
1352 |       )
1353 |       list.names <- paste(LigClu, RecClu, sep = "_")
1354 |       result[[list.names]] <- netList
1355 |     }
1356 |   }
1357 |   
1358 |   used.time <- proc.time()-start.time
1359 |   used.memory <- mem_used()
1360 |   
1361 |   result[which(is.na(result))] <- NULL
1362 |   
1363 |   result <- lapply(result, function(res){
1364 |     res <- res$LigRec
1365 |     res
1366 |   })
1367 |   result <- do.call(rbind, result)
1368 |   if(!is.null(result)){
1369 |     result <- tibble::rownames_to_column(result, 'sr')
1370 |     result$sr <- gsub('\\.[0-9]+', '', result$sr)
1371 |     result <- tidyr::separate(result, sr, c('Sender', 'Receiver'), sep = '_')
1372 |     result <- result[,-5]
1373 |     colnames(result)[3:4] <- c('Ligand', "Receptor")
1374 |     result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_')
1375 |     result <- dplyr::distinct(result, all, .keep_all = TRUE)
1376 |     result <- result[which(result$Sender!=result$Receiver),]
1377 |   }
1378 |   
1379 |   result_record <- list(result=result, 
1380 |                         used_time = paste0(round(used.time[3]/60,3), ' min'), 
1381 |                         used_memory =  round(used.memory/1024/1024/1024,3))
1382 |   return(result_record)
1383 | }
1384 | 
1385 | #############
1386 | ## PyMINEr ## 
1387 | #############
1388 | PyMINEr_function <- function(ser, fpath, species = 'human'){
1389 |   suppressMessages(library(Seurat))
1390 |   suppressMessages(library(lobstr))
1391 |   set.seed(123)
1392 |   
1393 |   # input
1394 |   if(T){
1395 |     input_fpath <- paste0(fpath, '/input/')
1396 |     if(!dir.exists(input_fpath)){
1397 |       dir.create(input_fpath)
1398 |     }
1399 |     
1400 |     fpath.mat <- paste0(input_fpath, 'counts.txt')
1401 |     matrix.sc <- GetAssayData(ser, "data", "RNA")
1402 |     matrix.sc <- as.data.frame(matrix.sc)
1403 |     matrix.sc <- tibble::rownames_to_column(matrix.sc, var = "gene")
1404 |     write.table(matrix.sc, file = fpath.mat, row.names = FALSE, sep = "\t")
1405 |     
1406 |     fpath.meta <- paste0(input_fpath, 'metadata.txt')
1407 |     meta.sc <- data.frame(barcode = colnames(ser), celltype = ser$celltype)
1408 |     i <- 0
1409 |     for(ct in unique(meta.sc$celltype)){
1410 |       meta.sc[which(meta.sc$celltype == ct), "ct_num"] <- i
1411 |       i <- i+1
1412 |     }
1413 |     meta <- meta.sc[,2:3]
1414 |     meta <- dplyr::distinct(meta)
1415 |     rownames(meta) <- NULL
1416 |     
1417 |     meta.sc <- meta.sc[,-2]
1418 |     write.table(meta.sc, file = fpath.meta,row.names = F, col.names = F, sep = "\t")
1419 |     rm(matrix.sc, meta.sc, ct, i, ser);gc()
1420 |   }
1421 |   
1422 |   start.time <- proc.time()
1423 |   
1424 |   if(species == 'human'){
1425 |     species <- 'hsapiens'
1426 |   }else{
1427 |     species <- 'mmusculus'
1428 |   }
1429 |   
1430 |   command <- paste('sh ../Script/Step0_LRToolsFunction/PyMINEr_shell.sh', 
1431 |                    fpath.mat, fpath.meta, species, sep = ' ')
1432 |   system(command)
1433 |   
1434 |   used.time <- proc.time()-start.time
1435 |   used.memory <- mem_used()
1436 |   
1437 |   result_path <- paste0(input_fpath, 
1438 |                         'autocrine_paracrine_signaling/extracellular_plasma_membrane_cell_type_specific_interactions.txt')
1439 |   result <- read.table(result_path, header = FALSE, sep = ',', fileEncoding = "utf-8")
1440 |   result <- tidyr::separate(result, V1, paste0('V', 2:19), sep = '\t')
1441 |   result <- result[,-c(12:17)]
1442 |   result[1, 12] <- 'LRscore'
1443 |   colnames(result) <- result[1,]
1444 |   result <- result[-1,]
1445 |   
1446 |   result <- result[which(result$cell_type_1!=result$cell_type_2),]
1447 |   result <- result[,c(2,4,6,8,12)]
1448 |   meta$ct_num <- paste0('bool_sample_group_', meta$ct_num)
1449 |   result <- merge(result, meta, by.x = 'cell_type_1', by.y = 'ct_num')
1450 |   colnames(result)[6] <- 'Sender'
1451 |   result <- merge(result, meta, by.x = 'cell_type_2', by.y = 'ct_num')
1452 |   colnames(result)[7] <- 'Receiver'
1453 |   result <- result[,-c(1:2)]
1454 |   colnames(result)[1:2] <- c('Ligand', 'Receptor')
1455 |   result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_')
1456 |   result <- dplyr::distinct(result, all, .keep_all = TRUE)
1457 |   
1458 |   result_record <- list(result=result, 
1459 |                         used_time = paste0(round(used.time[3]/60,3), ' min'), 
1460 |                         used_memory =  round(used.memory/1024/1024/1024,3))
1461 |   return(result_record)
1462 | }
1463 | 
1464 | ###############
1465 | ## RNAMagnet ## 
1466 | ############### Only for mouse (LR prior databases)
1467 | RNAMagnet_function <- function(ser){
1468 |   suppressMessages(library(RNAMagnet))
1469 |   suppressMessages(library(Seurat))
1470 |   suppressMessages(library(lobstr))
1471 |   set.seed(123)
1472 |   
1473 |   start.time <- proc.time()
1474 |   
1475 |   result <- RNAMagnetSignaling(ser, .version = "2.0.0", 
1476 |                                .cellularCompartment = c("Membrane", "ECM", "Both", "Secreted"))
1477 |   
1478 |   temp <- c()
1479 |   for (sender in unique(ser$celltype)) {
1480 |     for (receiver in unique(ser$celltype)) {
1481 |       tmp <- getRNAMagnetGenes(result, sender, receiver)
1482 |       if(dim(tmp)[[1]]!=0){
1483 |         tmp$Sender <- sender
1484 |         tmp$Receiver <- receiver
1485 |         temp <- rbind(temp,tmp)
1486 |       }
1487 |     }
1488 |   }
1489 |   
1490 |   used.time <- proc.time()-start.time
1491 |   used.memory <- mem_used()
1492 |   
1493 |   result <- temp
1494 |   result <- result[which(result$Sender!=result$Receiver),]
1495 |   result <- tidyr::separate(result, pair, c('Ligand', 'Receptor'), sep = '-')
1496 |   rownames(result) <- NULL
1497 |   need <- which(grepl('\\|', result$Ligand) | grepl('\\|', result$Receptor))
1498 |   if(length(need)>0){
1499 |     result_sub1 <- result[need, ]
1500 |     result_sub2 <- result[-need, ]
1501 |     
1502 |     result_tmp <- c()
1503 |     for (i in seq(need)) {
1504 |       ligands <- unlist(stringr::str_split(result_sub1[i, 'Ligand'], '\\|'))
1505 |       receptors <- unlist(stringr::str_split(result_sub1[i, 'Receptor'], '\\|'))
1506 |       tmp <- expand.grid(ligands, receptors)
1507 |       colnames(tmp) <- c('Ligand', 'Receptor')
1508 |       tmp$Sender <- rep(result_sub1[i, 'Sender'], n = dim(tmp)[1])
1509 |       tmp$Receiver <- rep(result_sub1[i, 'Receiver'], n = dim(tmp)[1])
1510 |       tmp$score <- rep(result_sub1[i, 'score'], n = dim(tmp)[1])
1511 |       tmp <- tmp[, c(5, 1:4)]
1512 |       result_tmp <- rbind(result_tmp, tmp)
1513 |     }
1514 |     result <- rbind(result_tmp, result_sub2)
1515 |   }
1516 |   
1517 |   result <- aggregate(score~Ligand+Receptor+Sender+Receiver, mean, data=result)
1518 |   result$all <- paste(result$Sender, result$Ligand, result$Receiver, result$Receptor, sep = '_')
1519 |   colnames(result)[5] <- 'LRscore'
1520 |   
1521 |   
1522 |   result_record <- list(result=result, 
1523 |                         used_time = paste0(round(used.time[3]/60,3), ' min'), 
1524 |                         used_memory =  round(used.memory/1024/1024/1024,3))
1525 |   return(result_record)
1526 | }
1527 | 


--------------------------------------------------------------------------------