├── .DS_Store ├── fig6 ├── GSE169246 │ ├── .DS_Store │ ├── subT1_extract_from_b2 │ │ └── 1_extract │ │ │ └── extract.R │ ├── b1_harmony │ │ ├── 1_injectBatchinfo │ │ │ ├── submit.sh │ │ │ └── inject.R │ │ └── 2_run_harmony │ │ │ ├── test.R │ │ │ └── harmony.sh │ ├── 1_merge │ │ └── merge.R │ ├── subT2_harmony │ │ ├── 1_run_harmony │ │ │ ├── harmony.R │ │ │ └── harmony.sh │ │ └── 2_run_harmony │ │ │ ├── harmony.R │ │ │ └── harmony.sh │ ├── 2_seurat_pipeline │ │ ├── submitjob.sh │ │ └── callBack.sh │ ├── b2_seurat_pipeline │ │ ├── submitjob.sh │ │ └── callBack.sh │ ├── subT3_seurat_pipeline │ │ ├── submitjob.sh │ │ └── callBack.sh │ ├── subT2_split_by_marker │ │ └── split.R │ ├── 3_extractT │ │ └── extract.R │ ├── 4_mapping │ │ └── submitJob_Mapping_.sh │ ├── 4_mapping_multimap │ │ └── submitJob_Mapping.sh │ ├── 4_mapping_filter │ │ └── submitJob_Mapping.sh │ ├── subT3_mapping_split_by_marker │ │ └── submitJob_Mapping.sh │ ├── 6_validate_mapping │ │ └── validate_batch.R │ └── subT3_mapping_filter_split_by_marker │ │ └── submitJob_Mapping.sh ├── GSE179994 │ ├── .DS_Store │ ├── 2_extractTcell │ │ ├── .DS_Store │ │ └── extract.R │ ├── 2_extractTcell_proliferative │ │ ├── .DS_Store │ │ └── extract.R │ ├── 1_merge │ │ └── merge.R │ ├── 4_mapping_multimap │ │ └── submitJob_Mapping.sh │ ├── 4_mapping_filter_CD4_Proliferative │ │ └── submitJob_Mapping.sh │ ├── 4_mapping_filter │ │ └── submitJob_Mapping.sh │ └── 4_mapping │ │ └── submitJob_Mapping.sh ├── GSE144649 │ ├── 6_mapping_multimap │ │ ├── int.R │ │ └── submitJob_Mapping.sh │ ├── 2_QC │ │ └── qc.sh │ ├── 3_pca │ │ ├── pca.sh │ │ └── pca.R │ ├── 1_merge │ │ ├── merge.sh │ │ └── merge.R │ ├── 5_extractT │ │ └── extract.R │ ├── 4_seurat_pipeline │ │ ├── submitjob.sh │ │ └── callBack.sh │ ├── 6_mapping │ │ ├── submitJob_Mapping.sh │ │ └── Mapping.R │ └── 6_mapping_filter_genes │ │ ├── submitJob_Mapping.sh │ │ └── Mapping.R ├── GSE173351 │ ├── 2_QC │ │ └── qc.sh │ ├── 1_merge │ │ ├── merge.sh │ │ └── merge.R │ ├── 3_pca │ │ ├── pca.sh │ │ └── pca.R │ ├── 4_harmony │ │ ├── 1_injectBatchinfo │ │ │ └── inject.R │ │ └── 2_run_harmony │ │ │ └── harmony.sh │ ├── 6_extractT │ │ └── extract.R │ ├── 5_seurat_pipeline │ │ ├── submitjob.sh │ │ └── callBack.sh │ ├── 6_extractT_proliferative │ │ └── extract.R │ ├── 7_mapping │ │ └── submitJob_Mapping.sh │ ├── 7_mapping_filter │ │ └── submitJob_Mapping.sh │ ├── 7_mapping_multimap │ │ └── submitJob_Mapping.sh │ └── 7_mapping_filter_proliferative │ │ └── submitJob_Mapping.sh └── SCP1288 │ ├── 0_merge │ └── merge.R │ └── 1_mapping_filter │ └── submitJob_Mapping.sh ├── README.md ├── data_preprocess ├── NKTMAIT │ ├── extractData.R │ ├── p1_NKTMAIT_v6.sh │ └── callBack_NKTMAIT.sh ├── 0_src │ ├── RunNormalizeScale.R │ ├── visualize_batch.R │ ├── RunCCA.R │ ├── visualize.R │ ├── determinePC.R │ ├── FindCluster.R │ ├── snn-marker.R │ ├── snn-harmony-umap.R │ ├── RunPCA_RPCA.R │ ├── snn-harmony-best-umap-finder.R │ ├── qc-by-cluster.R │ └── RunUMAP.R ├── TFH │ ├── callBack_CD4TFH.sh │ └── p1_sub_TFH_CD4_V6.sh ├── Treg │ ├── callBack_Treg.sh │ └── p1_sub_Treg_CD4_V5.sh ├── CD4 │ ├── p1CD4_V7.sh │ └── callBack_CD4.sh ├── CD8 │ ├── p1CD8_V6.sh │ └── callBack_CD8.sh ├── proliferative │ ├── p1_Proliferative_V4.sh │ └── callBack_Proliferative.sh └── 0_run_seurat_pipeline │ ├── RunUMAPJobs.sh │ └── FindClusterJobs.sh └── fig5 └── fig5b ├── plotMapScale.R └── zoomIn.sh /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Coolgenome/TCM/HEAD/.DS_Store -------------------------------------------------------------------------------- /fig6/GSE169246/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Coolgenome/TCM/HEAD/fig6/GSE169246/.DS_Store -------------------------------------------------------------------------------- /fig6/GSE179994/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Coolgenome/TCM/HEAD/fig6/GSE179994/.DS_Store -------------------------------------------------------------------------------- /fig6/GSE179994/2_extractTcell/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Coolgenome/TCM/HEAD/fig6/GSE179994/2_extractTcell/.DS_Store -------------------------------------------------------------------------------- /fig6/GSE179994/2_extractTcell_proliferative/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Coolgenome/TCM/HEAD/fig6/GSE179994/2_extractTcell_proliferative/.DS_Store -------------------------------------------------------------------------------- /fig6/GSE144649/6_mapping_multimap/int.R: -------------------------------------------------------------------------------- 1 | #-------------------------------------------------------------- 2 | # filename : int.R 3 | # Date : 2022-04-29 4 | # contributor : Yanshuo Chu 5 | # function: int 6 | #-------------------------------------------------------------- 7 | 8 | print('<==== int.R ====>') 9 | 10 | suppressMessages({ 11 | library(optparse) 12 | library(tidyverse) 13 | library(Seurat) 14 | library(SeuratObject) 15 | library(cowplot) 16 | library(MultiMap) 17 | }) 18 | 19 | CD4PredictedT <- readRDS("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE179994/MappingResult_MultiMap/CD4/predictedT_2022-04-29.rds") 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # T Cell Map 2 | Codes used in pan-cancer T cell study 3 | 4 | - "TCellMap.R" is a tool we developed to automatically align and annotate T cells in a scRNA-seq dataset. It uniformly aligns T cells from the query dataset with the T cell maps that we built in our pan-cancer T cell study. 5 | 6 | - The python script "Res_largerT.py" functions as a pipeline for the analysis and visualization of SRT data. 7 | 8 | - The directories titled "fig1/2/3/4/5/6" contain additional scripts used for generating figures in the manuscript. Furthermore, the "data_preprocess" folder hosts scripts designed to preprocess raw data. Those scripts are used for tasks such as batch-correction and dimensional reduction, etc. 9 | -------------------------------------------------------------------------------- /data_preprocess/NKTMAIT/extractData.R: -------------------------------------------------------------------------------- 1 | #'-------------------------------------------------------------- 2 | #' filename : extractData.R 3 | #' Date : 2021-07-18 4 | #' contributor : Yanshuo Chu 5 | #' function: extractData 6 | #'-------------------------------------------------------------- 7 | 8 | print('<==== extractData.R ====>') 9 | 10 | library(optparse) 11 | library(tidyverse) 12 | library(Seurat) 13 | 14 | NKGDT_PATH <- "/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/NKGDT_V5/nPC_10/UMAP_dist_0.1_nneighbor_35/p1NKGDT_V4_10_UMAP_dist_0.1_nneighbor_35_CLUSTER_res_0.3/cluster.rds" 15 | 16 | seuratObj <- readRDS(NKGDT_PATH) 17 | 18 | 19 | Idents(seuratObj) <- seuratObj$seurat_clusters 20 | 21 | 22 | subSeuratObj <- subset(seuratObj, idents = c(0, 2, 5), invert = T) 23 | 24 | saveRDS(subSeuratObj, file.path("/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/NKTMAIT_V6/data.rds")) 25 | -------------------------------------------------------------------------------- /fig6/GSE169246/subT1_extract_from_b2/1_extract/extract.R: -------------------------------------------------------------------------------- 1 | #-------------------------------------------------------------- 2 | # filename : extract.R 3 | # Date : 2022-09-01 4 | # contributor : Yanshuo Chu 5 | # function: extract 6 | #-------------------------------------------------------------- 7 | 8 | print('<==== extract.R ====>') 9 | 10 | library(optparse) 11 | library(tidyverse) 12 | library(Seurat) 13 | library(GEOquery) 14 | 15 | seuratObj <- readRDS("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE169246/b1_harmony/2_run_harmony/nPC_30/UMAP_dist_0.1_nneighbor_50/GSE169246_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds") 16 | 17 | TCellClusters <- c(0, 1, 2, 3, 7, 9, 13, 16, 20) 18 | 19 | Idents(seuratObj) <- seuratObj$seurat_clusters 20 | 21 | subObj <- subset(seuratObj, idents = TCellClusters) 22 | 23 | figurePath <- file.path("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE169246/subT1_extract_from_b2/1_extract/outs") 24 | if(!dir.exists(figurePath)){ 25 | dir.create(figurePath, recursive = T) 26 | } 27 | setwd(figurePath) 28 | saveRDS(subObj, file.path(getwd(), paste0('subObj', "_", Sys.Date(), '.rds'))) 29 | -------------------------------------------------------------------------------- /fig6/GSE169246/b1_harmony/1_injectBatchinfo/submit.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J submit 2 | #BSUB -q e80medium 3 | #BSUB -W 23:00 4 | #BSUB -n 1 5 | #BSUB -M 550 6 | #BSUB -R rusage[mem=550] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/b1_harmony/1_injectBatchinfo/submit.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/b1_harmony/1_injectBatchinfo/submit.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/b1_harmony/1_injectBatchinfo/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/b1_harmony/1_injectBatchinfo/submit.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/b1_harmony/1_injectBatchinfo/submit.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/4.0.3 17 | #____----____----____ 18 | 19 | 20 | Rscript --no-save /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/b1_harmony/1_injectBatchinfo/inject.R 21 | -------------------------------------------------------------------------------- /fig6/GSE144649/2_QC/qc.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J qc 2 | #BSUB -q short 3 | #BSUB -W 2:30 4 | #BSUB -n 1 5 | #BSUB -M 100 6 | #BSUB -R rusage[mem=100] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/2_QC/qc.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/2_QC/qc.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/2_QC/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/2_QC/qc.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/2_QC/qc.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/3.6.0 17 | #____----____----____ 18 | 19 | OutDir=/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE144649/2_QC 20 | if [ ! -d $OutDir ]; then 21 | mkdir -p $OutDir 22 | fi 23 | 24 | Rscript --no-save /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/src/public/filter-QC.R \ 25 | -d /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE144649/1_merge/merged.rds \ 26 | -o $OutDir/qc.rds 27 | -------------------------------------------------------------------------------- /fig6/GSE173351/2_QC/qc.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J qc 2 | #BSUB -q medium 3 | #BSUB -W 23:00 4 | #BSUB -n 1 5 | #BSUB -M 80 6 | #BSUB -R rusage[mem=80] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/2_QC/qc.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/2_QC/qc.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/2_QC/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/2_QC/qc.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/2_QC/qc.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/3.6.0 17 | #____----____----____ 18 | 19 | OutDir=/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE173351/2_QC 20 | if [ ! -d $OutDir ]; then 21 | mkdir -p $OutDir 22 | fi 23 | 24 | Rscript --no-save /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/src/public/filter-QC.R \ 25 | -d /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE173351/1_merge/merged.rds \ 26 | -o $OutDir/qc.rds 27 | -------------------------------------------------------------------------------- /fig6/GSE169246/1_merge/merge.R: -------------------------------------------------------------------------------- 1 | #-------------------------------------------------------------- 2 | # filename : merge.R 3 | # Date : 2022-02-16 4 | # contributor : Yanshuo Chu 5 | # function: merge 6 | #-------------------------------------------------------------- 7 | 8 | print('<==== merge.R ====>') 9 | rm(list=ls()) 10 | 11 | library(data.table) 12 | library(Seurat) 13 | library(ggplot2) 14 | library(tidyverse) 15 | library(harmony) 16 | 17 | ## seuratObj <- readRDS("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE169246/merged/merged.obj") 18 | 19 | data <- Read10X(data.dir = "/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/data/GSE169246/raw/RNA", gene.column = 1) 20 | seuratObj = CreateSeuratObject(counts = data) 21 | 22 | seuratObj <- seuratObj %>% 23 | Seurat::NormalizeData(verbose = FALSE) %>% 24 | FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>% 25 | ScaleData(verbose = FALSE) %>% 26 | RunPCA(npcs = 100, verbose = FALSE) 27 | 28 | figurePath <- file.path("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result", "GSE169246", "merged") 29 | if(!dir.exists(figurePath)){ 30 | dir.create(figurePath, recursive = T) 31 | } 32 | setwd(figurePath) 33 | saveRDS(seuratObj, "merged.obj") 34 | 35 | -------------------------------------------------------------------------------- /fig6/GSE144649/3_pca/pca.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J pca 2 | #BSUB -q short 3 | #BSUB -W 2:50 4 | #BSUB -n 1 5 | #BSUB -M 100 6 | #BSUB -R rusage[mem=100] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/3_pca/pca.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/3_pca/pca.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/3_pca/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/3_pca/pca.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/3_pca/pca.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/3.6.0 17 | #____----____----____ 18 | 19 | OutDir=/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE144649/3_pca 20 | if [ ! -d $OutDir ]; then 21 | mkdir -p $OutDir 22 | fi 23 | 24 | Rscript --no-save /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/3_pca/pca.R \ 25 | -d /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE144649/2_QC/qc.rds \ 26 | -o $OutDir 27 | -------------------------------------------------------------------------------- /fig6/GSE144649/1_merge/merge.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J merge 2 | #BSUB -q highmem 3 | #BSUB -W 23:00 4 | #BSUB -n 1 5 | #BSUB -M 100 6 | #BSUB -R rusage[mem=100] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/1_merge/merge.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/1_merge/merge.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/1_merge/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/1_merge/merge.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/1_merge/merge.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/3.6.0 17 | #____----____----____ 18 | 19 | OutFolder=/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE144649/1_merge 20 | if [ ! -d $OutFolder ]; then 21 | mkdir -p $OutFolder 22 | fi 23 | 24 | Rscript --no-save /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/1_merge/merge.R -d /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/data/GSE144469/raw -o $OutFolder 25 | -------------------------------------------------------------------------------- /fig6/GSE144649/5_extractT/extract.R: -------------------------------------------------------------------------------- 1 | #-------------------------------------------------------------- 2 | # filename : extract.R 3 | # Date : 2022-05-06 4 | # contributor : Yanshuo Chu 5 | # function: extract 6 | #-------------------------------------------------------------- 7 | 8 | print('<==== extract.R ====>') 9 | rm(list=ls()) 10 | 11 | suppressMessages({ 12 | library(Seurat) 13 | library(tidyverse) 14 | library(ggplot2) 15 | }) 16 | 17 | figurePath <- file.path("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE144649/5_extractT") 18 | if(!dir.exists(figurePath)){ 19 | dir.create(figurePath, recursive = T) 20 | } 21 | setwd(figurePath) 22 | 23 | 24 | seuratObj <- readRDS("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE144649/3_pca/nPC_50/UMAP_dist_0.1_nneighbor_50/GSE144649_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds") 25 | 26 | Idents(seuratObj) <- seuratObj$seurat_clusters 27 | 28 | CD4_Clusters <- c( 0, 2, 4 ) 29 | CD8_Clusters <- c( 1, 3, 5, 17, 18 ) 30 | 31 | CD4_Obj <- subset(seuratObj, idents = CD4_Clusters) 32 | 33 | saveRDS(CD4_Obj, file.path(getwd(), paste0('CD4.rds'))) 34 | 35 | CD8_Obj <- subset(seuratObj, idents = CD8_Clusters) 36 | 37 | saveRDS(CD8_Obj, file.path(getwd(), paste0('CD8.rds'))) 38 | -------------------------------------------------------------------------------- /fig6/GSE173351/1_merge/merge.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J merge 2 | #BSUB -q highmem 3 | #BSUB -W 23:00 4 | #BSUB -n 1 5 | #BSUB -M 100 6 | #BSUB -R rusage[mem=100] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/1_merge/merge.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/1_merge/merge.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/1_merge/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/1_merge/merge.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/1_merge/merge.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/3.6.0 17 | #____----____----____ 18 | 19 | OutFolder=/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE173351/1_merge 20 | if [ ! -d $OutFolder ]; then 21 | mkdir -p $OutFolder 22 | fi 23 | 24 | Rscript --no-save /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/1_merge/merge.R -d /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/data/GSE173351/T -o $OutFolder 25 | -------------------------------------------------------------------------------- /fig6/GSE173351/3_pca/pca.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J pca 2 | #BSUB -q short 3 | #BSUB -W 2:50 4 | #BSUB -n 1 5 | #BSUB -M 100 6 | #BSUB -R rusage[mem=100] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/3_pca/pca.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/3_pca/pca.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/3_pca/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/3_pca/pca.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/3_pca/pca.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/3.6.0 17 | #____----____----____ 18 | 19 | 20 | OutDir=/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE173351/3_pca 21 | if [ ! -d $OutDir ]; then 22 | mkdir -p $OutDir 23 | fi 24 | 25 | Rscript --no-save /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/3_pca/pca.R \ 26 | -d /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE173351/2_QC/qc.rds \ 27 | -o $OutDir 28 | -------------------------------------------------------------------------------- /fig6/GSE173351/4_harmony/1_injectBatchinfo/inject.R: -------------------------------------------------------------------------------- 1 | library(optparse) 2 | library(tidyverse) 3 | library(Seurat) 4 | library(GEOquery) 5 | 6 | 7 | ## gse <- getGEO(filename = "/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/data/GSE173351/GSE176021_series_matrix.txt.gz", 8 | ## GSEMatrix = TRUE, 9 | ## getGPL = FALSE) 10 | ## metaInfo <- gse@phenoData@data %>% 11 | ## select(title, `response status:ch1`, source_name_ch1) 12 | ## write_tsv(metaInfo, file.path("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/data/GSE173351", paste0('metaInfo', "_", Sys.Date(), '.tsv'))) 13 | 14 | 15 | metaInfo <- read_tsv("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/data/GSE173351/metaInfo_2022-05-13.tsv") 16 | 17 | seuratObj <- readRDS("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE173351/3_pca/pca.rds") 18 | 19 | seuratObj$response <- metaInfo$response[match(seuratObj$orig.ident, metaInfo$orig.ident)] 20 | seuratObj$tissue <- metaInfo$tissue[match(seuratObj$orig.ident, metaInfo$orig.ident)] 21 | seuratObj$batch <- metaInfo$batch[match(seuratObj$orig.ident, metaInfo$orig.ident)] 22 | 23 | outDir <- "/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE173351/4_harmony" 24 | if(!dir.exists(outDir)){ 25 | dir.create(outDir, recursive = T) 26 | } 27 | saveRDS(seuratObj, file.path(outDir, paste0('harmony_input.rds'))) 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /fig6/GSE144649/3_pca/pca.R: -------------------------------------------------------------------------------- 1 | #'-------------------------------------------------------------- 2 | #' filename : pca.R 3 | #' Date : 2022-05-04 4 | #' contributor : Yanshuo Chu 5 | #' function: pca 6 | #'-------------------------------------------------------------- 7 | 8 | print('<==== pca.R ====>') 9 | 10 | suppressMessages({ 11 | library(optparse) 12 | library(Seurat) 13 | library(tidyverse) 14 | library(ggplot2) 15 | }) 16 | 17 | option_list = list( 18 | make_option(c("-d","--data"), 19 | type = 'character', 20 | help = 'data.rds', 21 | metavar = 'character'), 22 | make_option(c("-o","--out"), 23 | type = 'character', 24 | help = 'folder', 25 | metavar = 'character'), 26 | make_option(c("-n",'--npc'), 27 | type = 'integer', 28 | default = 100, 29 | help = 'dims = 1:npc', 30 | metavar = 'integer') 31 | ); 32 | 33 | opt_parser = OptionParser(option_list = option_list); 34 | opt = parse_args(opt_parser); 35 | 36 | seuratObj <- readRDS(opt$data) %>% 37 | Seurat::NormalizeData(verbose = FALSE) %>% 38 | FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>% 39 | ScaleData(verbose = FALSE) %>% 40 | RunPCA(npcs = opt$npc, verbose = FALSE) 41 | 42 | saveRDS(seuratObj, file.path(opt$out, "pca.rds")) 43 | 44 | 45 | -------------------------------------------------------------------------------- /fig6/GSE173351/3_pca/pca.R: -------------------------------------------------------------------------------- 1 | #'-------------------------------------------------------------- 2 | #' filename : pca.R 3 | #' Date : 2022-05-04 4 | #' contributor : Yanshuo Chu 5 | #' function: pca 6 | #'-------------------------------------------------------------- 7 | 8 | print('<==== pca.R ====>') 9 | 10 | suppressMessages({ 11 | library(optparse) 12 | library(Seurat) 13 | library(tidyverse) 14 | library(ggplot2) 15 | }) 16 | 17 | option_list = list( 18 | make_option(c("-d","--data"), 19 | type = 'character', 20 | help = 'data.rds', 21 | metavar = 'character'), 22 | make_option(c("-o","--out"), 23 | type = 'character', 24 | help = 'folder', 25 | metavar = 'character'), 26 | make_option(c("-n",'--npc'), 27 | type = 'integer', 28 | default = 100, 29 | help = 'dims = 1:npc', 30 | metavar = 'integer') 31 | ); 32 | 33 | opt_parser = OptionParser(option_list = option_list); 34 | opt = parse_args(opt_parser); 35 | 36 | seuratObj <- readRDS(opt$data) %>% 37 | Seurat::NormalizeData(verbose = FALSE) %>% 38 | FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>% 39 | ScaleData(verbose = FALSE) %>% 40 | RunPCA(npcs = opt$npc, verbose = FALSE) 41 | 42 | saveRDS(seuratObj, file.path(opt$out, "pca.rds")) 43 | 44 | 45 | -------------------------------------------------------------------------------- /fig6/GSE173351/4_harmony/2_run_harmony/harmony.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J harmony 2 | #BSUB -q highmem 3 | #BSUB -W 23:00 4 | #BSUB -n 1 5 | #BSUB -M 100 6 | #BSUB -R rusage[mem=100] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/4_harmony/2_run_harmony/harmony.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/4_harmony/2_run_harmony/harmony.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/4_harmony/2_run_harmony/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/4_harmony/2_run_harmony/harmony.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/4_harmony/2_run_harmony/harmony.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/3.6.0 17 | #____----____----____ 18 | 19 | OutDir=/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE173351/4_harmony 20 | if [ ! -d $OutDir ]; then 21 | mkdir -p $OutDir 22 | fi 23 | 24 | Rscript --no-save /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/src/public/run-harmony.R \ 25 | -d /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE173351/4_harmony/harmony_input.rds \ 26 | -o $OutDir/harmony_output.rds 27 | -------------------------------------------------------------------------------- /fig6/GSE179994/1_merge/merge.R: -------------------------------------------------------------------------------- 1 | #-------------------------------------------------------------- 2 | # filename : merge.R 3 | # Date : 2022-02-16 4 | # contributor : Yanshuo Chu 5 | # function: merge 6 | #-------------------------------------------------------------- 7 | 8 | print('<==== merge.R ====>') 9 | rm(list=ls()) 10 | 11 | library(data.table) 12 | library(Seurat) 13 | library(ggplot2) 14 | library(tidyverse) 15 | library(harmony) 16 | 17 | rcs <- readRDS("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/data/GSE179994/raw/GSE179994_all.Tcell.rawCounts.rds") 18 | meta.data <- read_tsv("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/data/GSE179994/raw/GSE179994_Tcell.metadata.tsv") %>% 19 | replace_na(list(celltype = 'NA', cluster = 'NA')) 20 | rownames(meta.data) <- meta.data$cellid 21 | seuratObj <- CreateSeuratObject(counts = rcs, meta.data = as.data.frame(meta.data)) 22 | 23 | seuratObj <- seuratObj %>% 24 | Seurat::NormalizeData(verbose = FALSE) %>% 25 | FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>% 26 | ScaleData(verbose = FALSE) %>% 27 | RunPCA(npcs = 20, verbose = FALSE) 28 | 29 | figurePath <- file.path("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result", "GSE179994", "merged") 30 | if(!dir.exists(figurePath)){ 31 | dir.create(figurePath, recursive = T) 32 | } 33 | setwd(figurePath) 34 | 35 | saveRDS(seuratObj, "merged.obj") 36 | 37 | 38 | -------------------------------------------------------------------------------- /fig6/GSE169246/b1_harmony/2_run_harmony/test.R: -------------------------------------------------------------------------------- 1 | ##libraries 2 | library(Seurat) 3 | library(tidyverse) 4 | library(harmony) 5 | 6 | print('---snn clustering---') 7 | ##CLI parsing 8 | option_list = list( 9 | make_option(c("-d", "--data"), 10 | type = "character", 11 | default = NULL, 12 | help = "r data file input(after normalization", 13 | metavar = 'character'), 14 | make_option(c("-o",'--out'), 15 | type = 'character', 16 | default = 'harmony.pdf', 17 | help = 'output file name for the r data file [default = %default]', 18 | metavar = 'character') 19 | ); 20 | 21 | opt_parser = OptionParser(option_list = option_list); 22 | opt = parse_args(opt_parser); 23 | 24 | if(is.null(opt$data)) { 25 | print_help(opt_parser) 26 | stop("Input data must be provided", call. = F) 27 | } 28 | 29 | ##Load data 30 | 31 | ## seuratObj <- readRDS(opt$data) 32 | seuratObj <- readRDS("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE169246/b1_harmony/1_injectBatchinfo/outs/harmony_input.rds") 33 | 34 | ## By default, use all pc 35 | seuratObj <- RunHarmony(seuratObj, "batch", assay.use = "RNA", reduction = "pca", verbose = T) 36 | saveRDS(seuratObj, "/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE169246/b1_harmony/2_run_harmony/harmony_out.obj") 37 | 38 | print('---end---') 39 | -------------------------------------------------------------------------------- /data_preprocess/0_src/RunNormalizeScale.R: -------------------------------------------------------------------------------- 1 | #'-------------------------------------------------------------- 2 | #' filename : RunNormalizeScale.R 3 | #' Date : 2020-09-21 4 | #' contributor : Yanshuo Chu 5 | #' function: RunNormalizeScale 6 | #'-------------------------------------------------------------- 7 | 8 | print('<==== RunNormalizeScale ====>') 9 | 10 | ##libraries 11 | suppressMessages({library(optparse) 12 | library(readr) 13 | library(rjson) 14 | library(Seurat)}) 15 | print('---snn clustering---') 16 | ##CLI parsing 17 | option_list = list( 18 | make_option(c("-d", "--data"), 19 | type = "character", 20 | default = NULL, 21 | help = "r data file input(after normalization", 22 | metavar = 'character'), 23 | make_option(c("-o",'--out'), 24 | type = 'character', 25 | default = 'snn-harmony.rds', 26 | help = 'output file name for the r data file [default = %default]', 27 | metavar = 'character') 28 | ); 29 | 30 | opt_parser = OptionParser(option_list = option_list); 31 | opt = parse_args(opt_parser); 32 | 33 | if(is.null(opt$data)) { 34 | print_help(opt_parser) 35 | stop("Input data must be provided", call. = F) 36 | } 37 | 38 | ##Load data 39 | seuratObj <- readRDS(opt$data) 40 | 41 | DefaultAssay(seuratObj) <- "RNA" 42 | ##run snn clustering 43 | seuratObj <- NormalizeData(seuratObj) 44 | ## seuratObj <- ScaleData(seuratObj) 45 | 46 | saveRDS(seuratObj, file = opt$out) 47 | print('---end---') 48 | -------------------------------------------------------------------------------- /fig6/GSE173351/6_extractT/extract.R: -------------------------------------------------------------------------------- 1 | #-------------------------------------------------------------- 2 | # filename : extract.R 3 | # Date : 2022-05-06 4 | # contributor : Yanshuo Chu 5 | # function: extract 6 | #-------------------------------------------------------------- 7 | 8 | print('<==== extract.R ====>') 9 | rm(list=ls()) 10 | 11 | suppressMessages({ 12 | library(Seurat) 13 | library(tidyverse) 14 | library(ggplot2) 15 | }) 16 | 17 | figurePath <- file.path("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE173351/6_extractT") 18 | if(!dir.exists(figurePath)){ 19 | dir.create(figurePath, recursive = T) 20 | } 21 | setwd(figurePath) 22 | 23 | seuratObj <- readRDS("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE173351/4_harmony/nPC_30/UMAP_dist_0.1_nneighbor_50/GSE173351_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds") 24 | 25 | Idents(seuratObj) <- seuratObj$seurat_clusters 26 | 27 | ## mait_markers <- c("LTB", "KLRB1", "IL7R", "GZMK", "TRAV1-2", "SLC4A10") 28 | ## pdf(file.path(getwd(), "mait_bubbleplot.pdf")) 29 | ## DotPlot(seuratObj, features = mait_markers) 30 | ## dev.off() 31 | 32 | CD4_Clusters <- c( 0, 5, 13, 11) 33 | CD8_Clusters <- c( 1, 3, 4, 8) 34 | 35 | ## 2 MAIT 36 | ## 6 NKT 37 | ## 10 PROLIFERATIVE 38 | ## 12 firboblast 39 | ## 7 DC 40 | ## 9 B 41 | ## 3 CD4+CD8+ 42 | 43 | CD4_Obj <- subset(seuratObj, idents = CD4_Clusters) 44 | saveRDS(CD4_Obj, file.path(getwd(), paste0('CD4.rds'))) 45 | 46 | CD8_Obj <- subset(seuratObj, idents = CD8_Clusters) 47 | saveRDS(CD8_Obj, file.path(getwd(), paste0('CD8.rds'))) 48 | -------------------------------------------------------------------------------- /fig6/GSE169246/subT2_harmony/1_run_harmony/harmony.R: -------------------------------------------------------------------------------- 1 | #-------------------------------------------------------------- 2 | # filename : harmony.R 3 | # Date : 2022-09-01 4 | # contributor : Yanshuo Chu 5 | # function: harmony 6 | #-------------------------------------------------------------- 7 | 8 | print('<==== harmony.R ====>') 9 | 10 | suppressMessages({ 11 | library(optparse) 12 | library(tidyverse) 13 | library(harmony) 14 | library(Seurat)}) 15 | 16 | option_list = list( 17 | make_option(c("-d", "--data"), 18 | type = "character", 19 | default = NULL, 20 | help = "r data file input(after normalization", 21 | metavar = 'character'), 22 | make_option(c("-o",'--out'), 23 | type = 'character', 24 | default = 'harmony.pdf', 25 | help = 'output file name for the r data file [default = %default]', 26 | metavar = 'character') 27 | ); 28 | 29 | opt_parser = OptionParser(option_list = option_list); 30 | opt = parse_args(opt_parser); 31 | 32 | if(is.null(opt$data)) { 33 | print_help(opt_parser) 34 | stop("Input data must be provided", call. = F) 35 | } 36 | 37 | ##Load data 38 | seuratObj <- readRDS(opt$data) %>% 39 | Seurat::NormalizeData(verbose = FALSE) %>% 40 | FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>% 41 | ScaleData(verbose = FALSE) %>% 42 | RunPCA(npcs = 100, verbose = FALSE) 43 | 44 | ## By default, use all pc 45 | seuratObj <- RunHarmony(seuratObj, "batch") 46 | 47 | saveRDS(seuratObj, opt$out) 48 | 49 | print('---end---') 50 | -------------------------------------------------------------------------------- /fig6/GSE169246/subT2_harmony/2_run_harmony/harmony.R: -------------------------------------------------------------------------------- 1 | #-------------------------------------------------------------- 2 | # filename : harmony.R 3 | # Date : 2022-09-01 4 | # contributor : Yanshuo Chu 5 | # function: harmony 6 | #-------------------------------------------------------------- 7 | 8 | print('<==== harmony.R ====>') 9 | 10 | suppressMessages({ 11 | library(optparse) 12 | library(tidyverse) 13 | library(harmony) 14 | library(Seurat)}) 15 | 16 | option_list = list( 17 | make_option(c("-d", "--data"), 18 | type = "character", 19 | default = NULL, 20 | help = "r data file input(after normalization", 21 | metavar = 'character'), 22 | make_option(c("-o",'--out'), 23 | type = 'character', 24 | default = 'harmony.pdf', 25 | help = 'output file name for the r data file [default = %default]', 26 | metavar = 'character') 27 | ); 28 | 29 | opt_parser = OptionParser(option_list = option_list); 30 | opt = parse_args(opt_parser); 31 | 32 | if(is.null(opt$data)) { 33 | print_help(opt_parser) 34 | stop("Input data must be provided", call. = F) 35 | } 36 | 37 | ##Load data 38 | seuratObj <- readRDS(opt$data) %>% 39 | Seurat::NormalizeData(verbose = FALSE) %>% 40 | FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>% 41 | ScaleData(verbose = FALSE) %>% 42 | RunPCA(npcs = 100, verbose = FALSE) 43 | 44 | ## By default, use all pc 45 | seuratObj <- RunHarmony(seuratObj, "batch") 46 | 47 | saveRDS(seuratObj, opt$out) 48 | 49 | print('---end---') 50 | -------------------------------------------------------------------------------- /data_preprocess/0_src/visualize_batch.R: -------------------------------------------------------------------------------- 1 | #' filename : visualize_batch.R 2 | #' Date : 2020-07-07 3 | #' contributor : Yanshuo Chu 4 | #' function: visualize_batch 5 | 6 | ##libraries 7 | suppressMessages({library(optparse) 8 | library(readr) 9 | library(rjson) 10 | library(Seurat) 11 | library(dplyr) 12 | library(rlist) 13 | library(ggpubr) 14 | library(ggplot2) 15 | }) 16 | print('---visualize embeding---') 17 | ##CLI parsing 18 | option_list = list( 19 | make_option(c("-d", "--data"), 20 | type = "character", 21 | help = "r data file input(after runtsne/runumap)", 22 | metavar = 'character') 23 | ); 24 | 25 | opt_parser = OptionParser(option_list = option_list); 26 | opt = parse_args(opt_parser); 27 | 28 | if(is.null(opt$data)) { 29 | print_help(opt_parser) 30 | stop("Input data must be provided", call. = F) 31 | } 32 | 33 | ##Load data 34 | seuratObj <- readRDS(opt$data) 35 | 36 | extraInWidth <- round((length(unique(seuratObj@meta.data$batch)) - 10) / 10) 37 | isPDF <- length(Cells(seuratObj)) < 50000 38 | if(isPDF){ 39 | print(file.path(dirname(opt$data),'batch.pdf')) 40 | pdf(file.path(dirname(opt$data),'batch.pdf'), height = 9, width = (9 + extraInWidth)) 41 | print(DimPlot(object=seuratObj, reduction="umap", group.by='batch', label=TRUE)) 42 | dev.off() 43 | }else{ 44 | print(file.path(dirname(opt$data),'batch.png')) 45 | png(file.path(dirname(opt$data),'batch.png'), height = 9, width = (9 + extraInWidth), units = "in", res = 300) 46 | print(DimPlot(object=seuratObj, reduction="umap", group.by='batch', label=TRUE)) 47 | dev.off() 48 | } 49 | 50 | print('---end---') 51 | -------------------------------------------------------------------------------- /data_preprocess/TFH/callBack_CD4TFH.sh: -------------------------------------------------------------------------------- 1 | ##!/usr/bin/env bash 2 | 3 | projectPath=/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject 4 | analysisPath=${projectPath}/analysis 5 | mainscriptsFolder=${analysisPath}/scripts 6 | pipelinesFolder=${mainscriptsFolder}/pipelines 7 | srcD=${analysisPath}/scripts/src 8 | paramD=${analysisPath}/scripts/params 9 | databaseD='/rsrch3/home/genomic_med/ychu2/share/database' 10 | 11 | runR="Rscript --no-save " 12 | 13 | dataPath=${1} 14 | 15 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/THelper.txt 16 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/TCD4.txt 17 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/Pan-T/CD4/TFH/CD4TFHMarkers.txt 18 | # ${runR} ${srcD}/stack-ViolinPlot.R -d $dataPath -m ${databaseD}/Pan-T/CD4/TFH/CD4TFHMarkers.txt 19 | 20 | # JOBNAME=job_monocle3 21 | # tempFolder=$(dirname $dataPath) 22 | # JOBFOLDER=${tempFolder} 23 | # if [ ! -d $tempFolder ]; then 24 | # mkdir -p $tempFolder 25 | # fi 26 | # if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 27 | # rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 28 | # fi 29 | # bsub \ 30 | # -J ${JOBNAME} \ 31 | # -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 32 | # -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 33 | # -cwd ${JOBFOLDER} \ 34 | # -q long \ 35 | # -W 120:00 \ 36 | # -n 1 \ 37 | # -M 500\ 38 | # -R rusage[mem=500] \ 39 | # -B \ 40 | # -N \ 41 | # -u ychu2@mdanderson.org \ 42 | # /bin/bash -c "module load python/3.7.3-anaconda; module load R/3.6.0; ${runR} ${srcD}/trajectory_monocle3_from_seurat.r -d ${dataPath} -c 3" 43 | -------------------------------------------------------------------------------- /fig5/fig5b/plotMapScale.R: -------------------------------------------------------------------------------- 1 | #'-------------------------------------------------------------- 2 | #' filename : plotMapScale.R 3 | #' Date : 2022-08-15 4 | #' contributor : Yanshuo Chu 5 | #' function: plotMapScale 6 | #'-------------------------------------------------------------- 7 | 8 | print('<==== plotMapScale.R ====>') 9 | 10 | suppressMessages({ 11 | library(optparse) 12 | library(imager) 13 | library(tidyverse) 14 | }) 15 | 16 | option_list = list( 17 | make_option(c("-d","--data"), 18 | type = 'character', 19 | help = 'data.rds', 20 | metavar = 'character'), 21 | make_option(c("-w","--width"), 22 | type = 'integer', 23 | default = 50, 24 | help = 'width', 25 | metavar = 'integer'), 26 | make_option(c("-h","--height"), 27 | type = 'integer', 28 | default = 50, 29 | help = 'height', 30 | metavar = 'integer'), 31 | make_option(c("-o","--out"), 32 | type = 'character', 33 | help = 'out', 34 | metavar = 'character') 35 | ); 36 | 37 | opt_parser = OptionParser(option_list = option_list, add_help_option = F); 38 | opt = parse_args(opt_parser); 39 | 40 | im <- load.image(opt$data) 41 | jpeg(opt$out, width = opt$width, height = opt$height, units = "px") 42 | par(mar = rep(0, 4)) 43 | plot(im, axes = F, xaxs="i", yaxs="i", xlim = c(1, opt$width), ylim =c(opt$height, 1)) 44 | segments(10, opt$height - 10, 288, opt$height - 10, col = "white", lwd = 2) 45 | segments(10, opt$height - 10, 10, opt$height - 15, col = "white", lwd = 2) 46 | segments(288, opt$height - 10, 288, opt$height - 15, col = "white", lwd = 2) 47 | dev.off() 48 | -------------------------------------------------------------------------------- /data_preprocess/0_src/RunCCA.R: -------------------------------------------------------------------------------- 1 | #' filename : RunCCA.R 2 | #' Date : 2020-07-07 3 | #' contributor : Yanshuo Chu 4 | #' function: RunCCA 5 | 6 | 7 | suppressMessages({library(optparse) 8 | library(ggplot2) 9 | library(readr) 10 | library(stringr) 11 | library(rjson) 12 | library(tidyverse) 13 | library(Seurat)}) 14 | print('----loading cellranger outputs----') 15 | ##CLI parsing 16 | option_list = list( 17 | make_option(c("-d","--data"), 18 | type = 'character', 19 | help = 'dataFolder', 20 | metavar = 'character'), 21 | make_option(c("-o",'--out'), 22 | type = 'character', 23 | default = 'cellranger.rds', 24 | help = 'result file name [default = %default]', 25 | metavar = 'character') 26 | ); 27 | 28 | opt_parser = OptionParser(option_list = option_list); 29 | opt = parse_args(opt_parser); 30 | 31 | print(paste0("Loading data from ", opt$data)) 32 | samples = list.files(path = opt$data, recursive = F) 33 | samples = basename(samples) 34 | if(length(samples) < 1) stop(paste0('Failed to find data in ', opt$data)) 35 | 36 | obj.list = list() 37 | genes.use = list() 38 | for (ids in seq_along(samples)) { 39 | ds = samples[ids] 40 | print(ds) 41 | seuratObj <- readRDS(file.path(opt$data, ds)) 42 | genes.use <- c(genes.use, head(rownames(seuratObj@hvg.info), 2000)) 43 | 44 | obj.list[[length(obj.list)+1]] = seuratObj 45 | } 46 | 47 | for (i in 1:length(obj.list)) { 48 | genes.use <- genes.use[genes.use %in% rownames(obj.list[[i]]@scale.data)] 49 | } 50 | 51 | seuratObj <- RunMultiCCA(obj.list, genes.use = genes.use) 52 | 53 | ##save data 54 | print("saving output") 55 | saveRDS(combined.data, file = opt$out) 56 | print('----end----') 57 | -------------------------------------------------------------------------------- /fig6/GSE169246/2_seurat_pipeline/submitjob.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J submitjob 2 | #BSUB -q short 3 | #BSUB -W 1:00 4 | #BSUB -n 1 5 | #BSUB -M 10 6 | #BSUB -R rusage[mem=10] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/2_seurat_pipeline/submitjob.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/2_seurat_pipeline/submitjob.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/2_seurat_pipeline/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/2_seurat_pipeline/submitjob.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/2_seurat_pipeline/submitjob.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/3.6.0 17 | #____----____ 18 | 19 | 20 | /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/public/UMAP_CLUSTER_JOBS_EMBEDED/run.sh \ 21 | --inData /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE169246/merged/merged.obj \ 22 | --reduction pca \ 23 | --mainscriptsFolder /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/2_seurat_pipeline \ 24 | --parentJobName "GSE169246" \ 25 | --npcArray "30;50;60" \ 26 | --UMAPDistArray "0.1" \ 27 | --ClusterResArray "0.3" \ 28 | --NneighborsArray "50" \ 29 | --toRunUMAP "YES" \ 30 | --toRunClustering "YES" \ 31 | --toRunCommonAnalysis "YES" \ 32 | --toRunCallBack "YES" \ 33 | --callBackPath "/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/2_seurat_pipeline/callBack.sh" 34 | -------------------------------------------------------------------------------- /fig6/GSE144649/4_seurat_pipeline/submitjob.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J submitjob 2 | #BSUB -q short 3 | #BSUB -W 1:00 4 | #BSUB -n 1 5 | #BSUB -M 10 6 | #BSUB -R rusage[mem=10] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/4_seurat_pipeline/submitjob.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/4_seurat_pipeline/submitjob.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/4_seurat_pipeline/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/4_seurat_pipeline/submitjob.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/4_seurat_pipeline/submitjob.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/3.6.0 17 | #____----____----____ 18 | 19 | 20 | /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/public/UMAP_CLUSTER_JOBS_EMBEDED/run.sh \ 21 | --inData /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE144649/3_pca/pca.rds \ 22 | --reduction pca \ 23 | --mainscriptsFolder /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/4_seurat_pipeline \ 24 | --parentJobName "GSE144649" \ 25 | --npcArray "30;50;60" \ 26 | --UMAPDistArray "0.1" \ 27 | --ClusterResArray "0.3" \ 28 | --NneighborsArray "50" \ 29 | --toRunUMAP "YES" \ 30 | --toRunClustering "YES" \ 31 | --toRunCommonAnalysis "YES" \ 32 | --toRunCallBack "YES" \ 33 | --callBackPath "/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/4_seurat_pipeline/callBack.sh" 34 | -------------------------------------------------------------------------------- /fig6/GSE173351/5_seurat_pipeline/submitjob.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J submitjob 2 | #BSUB -q short 3 | #BSUB -W 2:00 4 | #BSUB -n 1 5 | #BSUB -M 10 6 | #BSUB -R rusage[mem=10] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/5_seurat_pipeline/submitjob.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/5_seurat_pipeline/submitjob.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/5_seurat_pipeline/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/5_seurat_pipeline/submitjob.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/5_seurat_pipeline/submitjob.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/3.6.0 17 | #____----____----____ 18 | 19 | 20 | /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/public/public/UMAP_CLUSTER_JOBS_EMBEDED/run.sh \ 21 | --inData /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE173351/4_harmony/harmony_output.rds \ 22 | --reduction harmony \ 23 | --mainscriptsFolder /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/5_seurat_pipeline \ 24 | --parentJobName "GSE173351" \ 25 | --npcArray "30" \ 26 | --UMAPDistArray "0.1" \ 27 | --ClusterResArray "0.3" \ 28 | --NneighborsArray "50" \ 29 | --toRunUMAP "NO" \ 30 | --toRunClustering "NO" \ 31 | --toRunCommonAnalysis "YES" \ 32 | --toRunCallBack "NO" \ 33 | --callBackPath "/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/5_seurat_pipeline/callBack.sh" 34 | -------------------------------------------------------------------------------- /fig6/GSE169246/b2_seurat_pipeline/submitjob.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J submitjob 2 | #BSUB -q e80medium 3 | #BSUB -W 23:00 4 | #BSUB -n 1 5 | #BSUB -M 550 6 | #BSUB -R rusage[mem=550] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/b2_seurat_pipeline/submitjob.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/b2_seurat_pipeline/submitjob.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/b2_seurat_pipeline/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/b2_seurat_pipeline/submitjob.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/b2_seurat_pipeline/submitjob.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/4.0.3 17 | #____----____----____ 18 | 19 | 20 | /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/public/public/UMAP_CLUSTER_JOBS_EMBEDED/run.sh \ 21 | --inData /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE169246/b1_harmony/2_run_harmony/harmony_out.obj \ 22 | --reduction harmony \ 23 | --mainscriptsFolder /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/b2_seurat_pipeline/ \ 24 | --parentJobName "GSE169246" \ 25 | --npcArray "20;30;50" \ 26 | --UMAPDistArray "0.1" \ 27 | --ClusterResArray "0.3" \ 28 | --NneighborsArray "50" \ 29 | --toRunUMAP "NO" \ 30 | --toRunClustering "NO" \ 31 | --toRunCommonAnalysis "YES" \ 32 | --toRunCallBack "YES" \ 33 | --callBackPath "/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/b2_seurat_pipeline/callBack.sh" 34 | -------------------------------------------------------------------------------- /fig6/GSE173351/6_extractT_proliferative/extract.R: -------------------------------------------------------------------------------- 1 | #-------------------------------------------------------------- 2 | # filename : extract.R 3 | # Date : 2022-05-06 4 | # contributor : Yanshuo Chu 5 | # function: extract 6 | #-------------------------------------------------------------- 7 | 8 | print('<==== extract.R ====>') 9 | rm(list=ls()) 10 | 11 | suppressMessages({ 12 | library(Seurat) 13 | library(tidyverse) 14 | library(ggplot2) 15 | }) 16 | 17 | figure_path <- file.path("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE173351/6_extractT_proliferative/") 18 | if (!dir.exists(figure_path)) { 19 | dir.create(figure_path, recursive = T) 20 | } 21 | setwd(figure_path) 22 | 23 | 24 | seuratObj <- readRDS("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE173351/4_harmony/nPC_30/UMAP_dist_0.1_nneighbor_50/GSE173351_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds") 25 | 26 | Idents(seuratObj) <- seuratObj$seurat_clusters 27 | 28 | ## mait_markers <- c("LTB", "KLRB1", "IL7R", "GZMK", "TRAV1-2", "SLC4A10") 29 | ## pdf(file.path(getwd(), "mait_bubbleplot.pdf")) 30 | ## DotPlot(seuratObj, features = mait_markers) 31 | ## dev.off() 32 | 33 | CD4_Clusters <- c( 0, 5, 13) 34 | CD8_Clusters <- c( 1, 4, 8) 35 | CD4CD8_Clusters <- c(3, 11) 36 | P_Clusters <- 10 37 | 38 | ## 2 MAIT 39 | ## 6 NKT 40 | ## 10 PROLIFERATIVE 41 | ## 12 firboblast 42 | ## 7 DC 43 | ## 9 B 44 | ## 3 CD4+CD8+ 45 | 46 | CD4_Obj <- subset(seuratObj, idents = CD4_Clusters) 47 | saveRDS(CD4_Obj, file.path(getwd(), paste0('CD4.rds'))) 48 | CD8_Obj <- subset(seuratObj, idents = CD8_Clusters) 49 | saveRDS(CD8_Obj, file.path(getwd(), paste0('CD8.rds'))) 50 | CD4CD8_Obj <- subset(seuratObj, idents = CD4CD8_Clusters) 51 | saveRDS(CD4CD8_Obj, file.path(getwd(), paste0('CD4CD8.rds'))) 52 | P_Obj <- subset(seuratObj, idents = P_Clusters) 53 | saveRDS(P_Obj, file.path(getwd(), paste0('P.rds'))) 54 | 55 | -------------------------------------------------------------------------------- /fig6/GSE169246/subT3_seurat_pipeline/submitjob.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J seurat_h1 2 | #BSUB -q e80medium 3 | #BSUB -w 'done(19238686)' 4 | #BSUB -W 23:00 5 | #BSUB -n 1 6 | #BSUB -M 550 7 | #BSUB -R rusage[mem=550] 8 | #BSUB -B 9 | #BSUB -N 10 | #BSUB -u ychu2@mdanderson.org 11 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/subT3_seurat_pipeline/submitjob.o.txt 12 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/subT3_seurat_pipeline/submitjob.e.txt 13 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/subT3_seurat_pipeline/ 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/subT3_seurat_pipeline/submitjob.o.txt 15 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/subT3_seurat_pipeline/submitjob.e.txt 16 | module load python/3.7.3-anaconda 17 | module load R/4.0.3 18 | #____----____----____ 19 | 20 | /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/public/public/UMAP_CLUSTER_JOBS_EMBEDED/run.sh \ 21 | --inData /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE169246/subT2_harmony/1_run_harmony/harmony_output.rds\ 22 | --reduction harmony \ 23 | --mainscriptsFolder /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/subT3_seurat_pipeline/ \ 24 | --parentJobName "GSE169246_subT_h1" \ 25 | --npcArray "15;20;30;50" \ 26 | --UMAPDistArray "0.1" \ 27 | --ClusterResArray "0.3;0.6;0.5" \ 28 | --NneighborsArray "50" \ 29 | --toRunUMAP "YES" \ 30 | --toRunClustering "YES" \ 31 | --toRunCommonAnalysis "YES" \ 32 | --toRunCallBack "YES" \ 33 | --callBackPath "/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/subT3_seurat_pipeline/callBack.sh" 34 | -------------------------------------------------------------------------------- /fig6/GSE169246/b1_harmony/2_run_harmony/harmony.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J harmony 2 | #BSUB -q e80medium 3 | #BSUB -w 'done(19226129)' 4 | #BSUB -W 23:00 5 | #BSUB -n 1 6 | #BSUB -M 550 7 | #BSUB -R rusage[mem=550] 8 | #BSUB -B 9 | #BSUB -N 10 | #BSUB -u ychu2@mdanderson.org 11 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/b1_harmony/2_run_harmony/harmony.o.txt 12 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/b1_harmony/2_run_harmony/harmony.e.txt 13 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/b1_harmony/2_run_harmony/ 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/b1_harmony/2_run_harmony/harmony.o.txt 15 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/b1_harmony/2_run_harmony/harmony.e.txt 16 | module load python/3.7.3-anaconda 17 | module load R/4.0.3 18 | #____----____----____ 19 | 20 | PROJECT_FOLDER=/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7 21 | DATA_FOLDER=${PROJECT_FOLDER}/data 22 | RESULT_FOLDER=${PROJECT_FOLDER}/result 23 | CODE_FOLDER=${PROJECT_FOLDER}/code 24 | PIPELINE_FOLDER=${CODE_FOLDER}/pipeline 25 | SRC_FOLDER=${CODE_FOLDER}/src 26 | KNOWLEDGE_FOLDER=${PROJECT_FOLDER}/knowledge 27 | PIPELINE_NAME=GSE169246__b1_harmony__2_run_harmony 28 | PIPELINE_PATH_NAME=GSE169246/b1_harmony/2_run_harmony 29 | PROJECT_NAME=$(basename ${PROJECT_FOLDER}) 30 | 31 | OutDir=$RESULT_FOLDER/$PIPELINE_PATH_NAME 32 | if [ ! -d $OutDir ]; then 33 | mkdir -p $OutDir 34 | fi 35 | 36 | Rscript --no-save /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/src/public/run-harmony.R \ 37 | -d /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE169246/b1_harmony/1_injectBatchinfo/outs/harmony_input.rds \ 38 | -o $OutDir/harmony_output.rds 39 | -------------------------------------------------------------------------------- /fig6/GSE169246/subT2_harmony/2_run_harmony/harmony.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J harmony 2 | #BSUB -q e80medium 3 | #BSUB -W 23:00 4 | #BSUB -n 1 5 | #BSUB -M 550 6 | #BSUB -R rusage[mem=550] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/subT2_harmony/2_run_harmony/harmony.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/subT2_harmony/2_run_harmony/harmony.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/subT2_harmony/2_run_harmony/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/subT2_harmony/2_run_harmony/harmony.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/subT2_harmony/2_run_harmony/harmony.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/4.0.3 17 | #____----____----____ 18 | 19 | 20 | PROJECT_FOLDER=/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7 21 | DATA_FOLDER=${PROJECT_FOLDER}/data 22 | RESULT_FOLDER=${PROJECT_FOLDER}/result 23 | CODE_FOLDER=${PROJECT_FOLDER}/code 24 | PIPELINE_FOLDER=${CODE_FOLDER}/pipeline 25 | SRC_FOLDER=${CODE_FOLDER}/src 26 | KNOWLEDGE_FOLDER=${PROJECT_FOLDER}/knowledge 27 | PIPELINE_NAME=GSE169246__subT2_harmony__2_run_harmony 28 | PIPELINE_PATH_NAME=GSE169246/subT2_harmony/2_run_harmony 29 | PROJECT_NAME=$(basename ${PROJECT_FOLDER}) 30 | 31 | OutDir=$RESULT_FOLDER/$PIPELINE_PATH_NAME 32 | if [ ! -d $OutDir ]; then 33 | mkdir -p $OutDir 34 | fi 35 | 36 | Rscript --no-save /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/src/public/run-harmony.R \ 37 | -d /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE169246/subT1_extract_from_b2/1_extract/outs/subObj_2022-09-01.rds \ 38 | -o $OutDir/harmony_output.rds 39 | -------------------------------------------------------------------------------- /fig6/GSE169246/subT2_harmony/1_run_harmony/harmony.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J harmony 2 | #BSUB -q e80medium 3 | #BSUB -W 23:50 4 | #BSUB -n 1 5 | #BSUB -M 550 6 | #BSUB -R rusage[mem=550] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/subT2_harmony/1_run_harmony/harmony.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/subT2_harmony/1_run_harmony/harmony.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/subT2_harmony/1_run_harmony/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/subT2_harmony/1_run_harmony/harmony.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/subT2_harmony/1_run_harmony/harmony.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/4.0.3 17 | #____----____----____ 18 | 19 | PROJECT_FOLDER=/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7 20 | DATA_FOLDER=${PROJECT_FOLDER}/data 21 | RESULT_FOLDER=${PROJECT_FOLDER}/result 22 | CODE_FOLDER=${PROJECT_FOLDER}/code 23 | PIPELINE_FOLDER=${CODE_FOLDER}/pipeline 24 | SRC_FOLDER=${CODE_FOLDER}/src 25 | KNOWLEDGE_FOLDER=${PROJECT_FOLDER}/knowledge 26 | PIPELINE_NAME=GSE169246__subT2_harmony__1_run_harmony 27 | PIPELINE_PATH_NAME=GSE169246/subT2_harmony/1_run_harmony 28 | PROJECT_NAME=$(basename ${PROJECT_FOLDER}) 29 | 30 | OutDir=$RESULT_FOLDER/$PIPELINE_PATH_NAME 31 | if [ ! -d $OutDir ]; then 32 | mkdir -p $OutDir 33 | fi 34 | 35 | 36 | Rscript --no-save /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/subT2_harmony/1_run_harmony/harmony.R \ 37 | -d /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE169246/subT1_extract_from_b2/1_extract/outs/subObj_2022-09-01.rds \ 38 | -o $OutDir/harmony_output.rds 39 | -------------------------------------------------------------------------------- /fig6/GSE169246/subT2_split_by_marker/split.R: -------------------------------------------------------------------------------- 1 | #-------------------------------------------------------------- 2 | # filename : split.R 3 | # Date : 2022-09-01 4 | # contributor : Yanshuo Chu 5 | # function: split 6 | #-------------------------------------------------------------- 7 | 8 | print('<==== split.R ====>') 9 | 10 | rm(list=ls()) 11 | library(tidyverse) 12 | library(Seurat) 13 | 14 | figurePath <- file.path("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE169246/subT2_split_by_marker/outs") 15 | if(!dir.exists(figurePath)){ 16 | dir.create(figurePath, recursive = T) 17 | } 18 | setwd(figurePath) 19 | 20 | seuratObj <- readRDS("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE169246/subT1_extract_from_b2/1_extract/outs/subObj_2022-09-01.rds") 21 | 22 | seuratObj <- Seurat::FindVariableFeatures(seuratObj) 23 | 24 | hgs <- VariableFeatures(seuratObj) 25 | hgs <- union(hgs, c("CD3D", "CD3E", "CD4", "CD8A", "CD8B")) 26 | 27 | seuratObj <- ScaleData(seuratObj, features = hgs) 28 | 29 | targetMatrix <- seuratObj@assays$RNA@scale.data[c("CD4", "CD8A", "CD8B"),] 30 | targetMatrix <- as.matrix(targetMatrix) 31 | targetMatrix[2,] <- apply(targetMatrix[2:3,], 2, max) 32 | targetMatrix <- targetMatrix[1:2,] 33 | targetMatrix <- t(targetMatrix) 34 | 35 | 36 | targetTibble <- as_tibble(targetMatrix) %>% 37 | mutate(barcode = rownames(targetMatrix)) 38 | 39 | threhold <- 0.1527 40 | targetTibble$CellType <- "Else" 41 | targetTibble$CellType[targetTibble$CD4 - targetTibble$CD8A > threhold] <- "CD4" 42 | targetTibble$CellType[targetTibble$CD4 - targetTibble$CD8A < -threhold] <- "CD8" 43 | table(targetTibble$CellType) 44 | 45 | CD4Obj <- subset(seuratObj, cells = targetTibble$barcode[targetTibble$CellType == "CD4"]) 46 | CD8Obj <- subset(seuratObj, cells = targetTibble$barcode[targetTibble$CellType == "CD8"]) 47 | 48 | ## saveRDS(CD4Obj, file.path(getwd(), paste0('CD4Obj', "_", Sys.Date(), '.rds'))) 49 | saveRDS(CD8Obj, file.path(getwd(), paste0('CD8Obj', "_", Sys.Date(), '.rds'))) 50 | -------------------------------------------------------------------------------- /data_preprocess/0_src/visualize.R: -------------------------------------------------------------------------------- 1 | #' filename : visualize.R 2 | #' Date : 2020-07-07 3 | #' contributor : Yanshuo Chu 4 | #' function: visualize 5 | 6 | ##libraries 7 | 8 | 9 | suppressMessages({library(optparse) 10 | library(readr) 11 | library(rjson) 12 | library(Seurat) 13 | library(dplyr) 14 | library(rlist) 15 | library(ggpubr) 16 | library(ggplot2)}) 17 | print('---visualize embeding---') 18 | ##CLI parsing 19 | option_list = list( 20 | make_option(c("-d", "--data"), 21 | type = "character", 22 | help = "r data file input(after runtsne/runumap)", 23 | metavar = 'character') 24 | ); 25 | 26 | opt_parser = OptionParser(option_list = option_list); 27 | opt = parse_args(opt_parser); 28 | 29 | if(is.null(opt$data)) { 30 | print_help(opt_parser) 31 | stop("Input data must be provided", call. = F) 32 | } 33 | ##Load data 34 | seuratObj <- readRDS(opt$data) 35 | 36 | coord = Embeddings(object = seuratObj, reduction = "umap") 37 | coord = coord[,c(1,2)] 38 | colnames(coord) = c("dim1", "dim2") 39 | coord = data.frame(ID = rownames(coord), coord) 40 | meta = seuratObj@meta.data; 41 | meta = data.frame(ID = rownames(meta), meta,stringsAsFactors = F) 42 | meta = left_join(meta, coord, by = 'ID') 43 | write_tsv(meta, file.path(dirname(opt$data),'visualization_coordinates.tsv')) 44 | 45 | isPDF <- length(Cells(seuratObj)) < 50000 46 | extraInWidth <- round((length(unique(seuratObj@meta.data$seurat_clusters)) - 10) / 10) 47 | if(isPDF){ 48 | print(file.path(dirname(opt$data),'umap.pdf')) 49 | pdf(file.path(dirname(opt$data),'umap.pdf'), height = 9, width = (9 + extraInWidth)) 50 | print(DimPlot(object=seuratObj, reduction="umap", group.by='seurat_clusters', label=TRUE)) 51 | dev.off() 52 | }else{ 53 | print(file.path(dirname(opt$data),'umap.png')) 54 | png(file.path(dirname(opt$data),'umap.png'), height = 9, width = (9 + extraInWidth), units = "in", res=600) 55 | print(DimPlot(object=seuratObj, reduction="umap", group.by='seurat_clusters', label=TRUE)) 56 | dev.off() 57 | } 58 | 59 | print('---end---') 60 | -------------------------------------------------------------------------------- /data_preprocess/0_src/determinePC.R: -------------------------------------------------------------------------------- 1 | #' filename : determinePC.R 2 | #' Date : 2020-07-07 3 | #' contributor : Yanshuo Chu 4 | #' function: determinePC 5 | 6 | print('----determinePC----') 7 | 8 | ##libraries 9 | suppressMessages({ 10 | library(optparse) 11 | library(future) 12 | library(readr) 13 | library(rjson) 14 | library(Seurat) 15 | }) 16 | 17 | ##CLI parsing 18 | option_list = list( 19 | make_option(c("-d", "--data"), 20 | type = "character", 21 | default = NULL, 22 | help = "r data file input(after normalization", 23 | metavar = 'character'), 24 | make_option(c("-o",'--out'), 25 | type = 'character', 26 | default = 'seuratObj.rds', 27 | help = 'output of seuratObj to determinePC', 28 | metavar = 'character') 29 | ); 30 | 31 | opt_parser = OptionParser(option_list = option_list); 32 | opt = parse_args(opt_parser); 33 | 34 | if(is.null(opt$data)) { 35 | print_help(opt_parser) 36 | stop("Input data must be provided", call. = F) 37 | } 38 | 39 | ##Load data 40 | seuratObj <- readRDS(opt$data) 41 | 42 | options(future.globals.maxSize = 10 * 1000 * 1024^2) 43 | 44 | plan("multiprocess", workers = 12) 45 | seuratObj <- NormalizeData(object = seuratObj, 46 | normalization.method = "LogNormalize", 47 | scale.factor = 1e4) 48 | plan("sequential") 49 | 50 | seuratObj <- FindVariableFeatures(object = seuratObj, selection.method = 'vst', nfeatures = 2000) 51 | 52 | hvg = VariableFeatures(object = seuratObj) 53 | 54 | plan("multiprocess", workers = 12) 55 | seuratObj <- ScaleData(object = seuratObj, 56 | features = hvg, 57 | vars.to.regress = c("nCount_RNA", "percent.mito")) 58 | plan("sequential") 59 | 60 | seuratObj <- RunPCA(object = seuratObj, features = hvg, npcs=150, verbose = FALSE) 61 | 62 | ##generate PCA loadings plot 63 | pdf(paste0(opt$out, ".elbowplot.pdf")) 64 | ElbowPlot(object = seuratObj, ndims = 150, reduction = 'pca') 65 | dev.off() 66 | 67 | saveRDS(seuratObj, opt$out) 68 | print('----end----') 69 | -------------------------------------------------------------------------------- /fig6/SCP1288/0_merge/merge.R: -------------------------------------------------------------------------------- 1 | #-------------------------------------------------------------- 2 | # filename : merge.R 3 | # Date : 2022-11-05 4 | # contributor : Yanshuo Chu 5 | # function: merge 6 | # R version: R/4.0.3 7 | #-------------------------------------------------------------- 8 | 9 | print('<==== merge.R ====>') 10 | rm(list=ls()) 11 | 12 | tenx.data = Read10X("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/data/SCP1288/expression/60c76a18771a5b0ba10ea91b") 13 | seurat_obj = CreateSeuratObject(counts = tenx.data, 14 | min.cells = 3, 15 | min.features = 200, 16 | project = "SCP1288") 17 | 18 | metaInfo <- read_tsv("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/data/SCP1288/metadata/Final_SCP_Metadata.txt") 19 | metaInfo <- metaInfo[2:dim(metaInfo)[1],] 20 | clusterInfo <- read_tsv("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/data/SCP1288/cluster/Final_SCP_ClusterFile.txt") 21 | clusterInfo <- clusterInfo[2:dim(clusterInfo)[1],] 22 | 23 | 24 | seurat_obj$cell.type <- clusterInfo$FinalCellType[match(Cells(seurat_obj), clusterInfo$NAME)] 25 | 26 | for (cln in colnames(metaInfo)[2:length(colnames(metaInfo))]) { 27 | seurat_obj@meta.data[,cln] <- "" 28 | seurat_obj@meta.data[,cln] <- metaInfo[match(Cells(seurat_obj), metaInfo$NAME), cln] 29 | } 30 | 31 | CD8_clusters <- c( 32 | "41BB-Hi CD8+ T cell", 33 | "41BB-Lo CD8+ T cell", 34 | "Cycling CD8+ T cell", 35 | "MitoHigh CD8+ T cell", 36 | "MX1-Hi CD8+ T cell") 37 | 38 | 39 | CD4_clusters <- c( 40 | "Effector T-Helper", 41 | "Memory T-Helper", 42 | "MitoHigh T-Helper", 43 | "T-Reg" ) 44 | 45 | figure_path <- file.path("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/SCP1288/0_merge/") 46 | if (!dir.exists(figure_path)) { 47 | dir.create(figure_path, recursive = T) 48 | } 49 | setwd(figure_path) 50 | 51 | Idents(seurat_obj) <- seurat_obj$FinalCellType 52 | CD8_obj <- subset(seurat_obj, idents = CD8_clusters) 53 | saveRDS(CD8_obj, file.path(getwd(), paste0('CD8.rds'))) 54 | 55 | CD4_obj <- subset(seurat_obj, idents = CD4_clusters) 56 | saveRDS(CD4_obj, file.path(getwd(), paste0('CD4.rds'))) 57 | -------------------------------------------------------------------------------- /data_preprocess/0_src/FindCluster.R: -------------------------------------------------------------------------------- 1 | #' filename : umap-harmony.R 2 | #' Date : 2020-04-23 3 | #' contributor : Yanshuo Chu 4 | #' function: run umap for harmony data 5 | 6 | ##libraries 7 | suppressMessages({library(optparse) 8 | library(readr) 9 | library(rjson) 10 | library(SeuratData) 11 | library(harmony) 12 | library(Seurat)}) 13 | print('---snn clustering---') 14 | ##CLI parsing 15 | option_list = list( 16 | make_option(c("-d", "--data"), 17 | type = "character", 18 | default = NULL, 19 | help = "r data file input(after normalization", 20 | metavar = 'character'), 21 | make_option(c("-o",'--out'), 22 | type = 'character', 23 | default = 'snn-harmony.rds', 24 | help = 'output file name for the r data file [default = %default]', 25 | metavar = 'character'), 26 | make_option(c("-r",'--reduction'), 27 | type = 'character', 28 | default = 'harmony', 29 | help = 'reduction method harmony', 30 | metavar = 'character'), 31 | make_option(c("-n",'--npc'), 32 | type = 'integer', 33 | default = 40, 34 | help = 'npc default 4 for dims', 35 | metavar = 'integer'), 36 | make_option(c("-e",'--resolution'), 37 | type = 'double', 38 | default = 0.2, 39 | help = 'resolution default 0.4', 40 | metavar = 'double') 41 | ); 42 | 43 | opt_parser = OptionParser(option_list = option_list); 44 | opt = parse_args(opt_parser); 45 | 46 | if(is.null(opt$data)) { 47 | print_help(opt_parser) 48 | stop("Input data must be provided", call. = F) 49 | } 50 | 51 | ##Load data 52 | seuratObj <- readRDS(opt$data) 53 | 54 | 55 | seuratObj <- FindNeighbors(object = seuratObj, 56 | reduction=opt$reduction, 57 | dims = 1:opt$npc) 58 | 59 | seuratObj <- FindClusters(object = seuratObj, 60 | resolution = opt$resolution) 61 | 62 | 63 | DefaultAssay(seuratObj) <- "RNA" 64 | 65 | saveRDS(seuratObj, file = opt$out) 66 | print('---end---') 67 | -------------------------------------------------------------------------------- /data_preprocess/Treg/callBack_Treg.sh: -------------------------------------------------------------------------------- 1 | ##!/usr/bin/env bash 2 | 3 | projectPath=/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject 4 | analysisPath=${projectPath}/analysis 5 | mainscriptsFolder=${analysisPath}/scripts 6 | pipelinesFolder=${mainscriptsFolder}/pipelines 7 | srcD=${analysisPath}/scripts/src 8 | paramD=${analysisPath}/scripts/params 9 | databaseD='/rsrch3/home/genomic_med/ychu2/share/database' 10 | 11 | runR="Rscript --no-save " 12 | 13 | dataPath=${1} 14 | 15 | 16 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/THelper.txt 17 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/TCD4.txt 18 | 19 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/Markers/Treg/Glycolytic.txt 20 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/Markers/Treg/TregSignatures.txt 21 | 22 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/Pan-T/CD4/Treg/CD4TregMarkers.txt 23 | # ${runR} ${srcD}/stack-ViolinPlot.R -d $dataPath -m ${databaseD}/Pan-T/CD4/Treg/CD4TregMarkers.txt 24 | 25 | # JOBNAME=job_monocle3 26 | # tempFolder=$(dirname $dataPath) 27 | # JOBFOLDER=${tempFolder} 28 | # if [ ! -d $tempFolder ]; then 29 | # mkdir -p $tempFolder 30 | # fi 31 | # if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 32 | # rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 33 | # fi 34 | # bsub \ 35 | # -J ${JOBNAME} \ 36 | # -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 37 | # -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 38 | # -cwd ${JOBFOLDER} \ 39 | # -q long \ 40 | # -W 120:00 \ 41 | # -n 1 \ 42 | # -M 500\ 43 | # -R rusage[mem=500] \ 44 | # -B \ 45 | # -N \ 46 | # -u ychu2@mdanderson.org \ 47 | # /bin/bash -c "module load python/3.7.3-anaconda; module load R/3.6.0; ${runR} ${srcD}/trajectory_monocle3_from_seurat.r -d ${dataPath} -s /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/Sub_Treg_CD4_V5/nPC_15/UMAP_dist_0.1_nneighbor_20/p1_sub_Treg_CD4_V5_UMAP_dist_0.1_nneighbor_20_CLUSTER_res_0.3/startCells.txt" 48 | -------------------------------------------------------------------------------- /data_preprocess/0_src/snn-marker.R: -------------------------------------------------------------------------------- 1 | #' cluster-markers.r 2 | #' 3 | #' 19-10-07 09:53:55 4 | #' 5 | #' contributor: guangchun 6 | #' 7 | #' cluster markers analysis 8 | #' 9 | 10 | ##libraries 11 | suppressMessages({ 12 | library(optparse) 13 | library(readr) 14 | library(rjson) 15 | library(Seurat) 16 | }) 17 | print('---calculate snn cluster makers---') 18 | ##CLI parsing 19 | option_list = list( 20 | make_option(c("-d", "--data"), 21 | type = "character", 22 | default = NULL, 23 | help = "r data file input(after snn clustering)", 24 | metavar = 'character'), 25 | make_option(c("-o",'--out'), 26 | type = 'character', 27 | default = 'markers.txt', 28 | help = 'output file name for the markers [default = %default]', 29 | metavar = 'character'), 30 | make_option(c("-c","--param"), 31 | type = 'character', 32 | help = 'json file name contain function parameters', 33 | metavar = 'character') 34 | ); 35 | 36 | opt_parser = OptionParser(option_list = option_list); 37 | opt = parse_args(opt_parser); 38 | 39 | if(is.null(opt$data)) { 40 | print_help(opt_parser) 41 | stop("Input data must be provided", call. = F) 42 | } 43 | if(is.null(opt$param)) { 44 | print_help(opt_parser) 45 | stop("json file name (containing parameters) must be provided", call. = F) 46 | } 47 | 48 | ##load param 49 | 50 | param <- fromJSON(file = opt$param) 51 | 52 | ##Load data 53 | seuratObj <- readRDS(opt$data) 54 | Idents(seuratObj) <- seuratObj@meta.data$seurat_clusters 55 | 56 | ## hvg <- VariableFeatures(seuratObj) 57 | ## gene.pattern <- c("MALAT1", "^MT-", "^RPL", "^RPS", "^LOC(0-9)", "^TR(A|B|G|D)V", "^MTRNR") 58 | ## hvg <- hvg[!hvg %in% grep(paste0(gene.pattern, collapse = "|"), hvg, value = T)] 59 | 60 | markers <- FindAllMarkers(object = seuratObj) 61 | ## markers <- FindAllMarkers(object = seuratObj, 62 | ## only.pos = TRUE, 63 | ## min.pct = 0.25, 64 | ## logfc.threshold = 0.25) 65 | 66 | write_tsv(data.frame(markers), opt$out) 67 | print('---end---') 68 | -------------------------------------------------------------------------------- /fig6/GSE144649/1_merge/merge.R: -------------------------------------------------------------------------------- 1 | #-------------------------------------------------------------- 2 | # filename : merge.R 3 | # Date : 2022-02-16 4 | # contributor : Yanshuo Chu 5 | # function: merge 6 | #-------------------------------------------------------------- 7 | 8 | print('<==== merge.R ====>') 9 | 10 | suppressMessages({library(optparse) 11 | library(ggplot2) 12 | library(readr) 13 | library(rjson) 14 | library(Seurat)}) 15 | 16 | print('----loading cellranger outputs----') 17 | 18 | option_list = list( 19 | make_option(c("-d","--data"), 20 | type = 'character', 21 | help = 'dataFolder', 22 | metavar = 'character'), 23 | 24 | make_option(c("-o",'--out'), 25 | type = 'character', 26 | default = 'cellranger.rds', 27 | help = 'result file name [default = %default]', 28 | metavar = 'character') 29 | ); 30 | 31 | opt_parser = OptionParser(option_list = option_list); 32 | opt = parse_args(opt_parser); 33 | 34 | print(paste0("Loading data from ",opt$data)) 35 | samples = list.dirs(path = opt$data,recursive = F) 36 | samples = basename(samples) 37 | if(length(samples) < 1) stop(paste0('Failed to find data in ',opt$data)) 38 | obj.list = list() 39 | for (ids in seq_along(samples)) { 40 | ds = samples[ids] 41 | print(ds) 42 | tenx.data = Read10X(file.path(opt$data, ds)) 43 | tenx0 = CreateSeuratObject(counts = tenx.data, min.cells = 3, 44 | min.features = 200, 45 | project = ds) 46 | obj.list[[length(obj.list)+1]] = tenx0 47 | } 48 | 49 | if(length(samples) > 1) { 50 | combined.data = merge(x = obj.list[[1]],y = obj.list[-1],add.cell.ids = samples) 51 | } else { 52 | combined.data = obj.list[[1]] 53 | } 54 | 55 | ##Human: MT; Mouse: mt 56 | mito.features = grep(pattern = '^MT-|^mt-', x = rownames(x = combined.data), value = T) 57 | percent.mito <- Matrix::colSums(x = GetAssayData(object = combined.data, slot = 'counts')[mito.features, ]) / Matrix::colSums(x = GetAssayData(object = combined.data, slot = 'counts')) 58 | combined.data[['percent.mito']] = percent.mito 59 | 60 | print("saving output") 61 | saveRDS(combined.data, file = file.path(opt$out, "merged.rds")) 62 | print('----end----') 63 | 64 | -------------------------------------------------------------------------------- /fig6/GSE173351/1_merge/merge.R: -------------------------------------------------------------------------------- 1 | #-------------------------------------------------------------- 2 | # filename : merge.R 3 | # Date : 2022-02-16 4 | # contributor : Yanshuo Chu 5 | # function: merge 6 | #-------------------------------------------------------------- 7 | 8 | print('<==== merge.R ====>') 9 | 10 | suppressMessages({library(optparse) 11 | library(ggplot2) 12 | library(readr) 13 | library(rjson) 14 | library(Seurat)}) 15 | 16 | print('----loading cellranger outputs----') 17 | 18 | option_list = list( 19 | make_option(c("-d","--data"), 20 | type = 'character', 21 | help = 'dataFolder', 22 | metavar = 'character'), 23 | 24 | make_option(c("-o",'--out'), 25 | type = 'character', 26 | default = 'cellranger.rds', 27 | help = 'result file name [default = %default]', 28 | metavar = 'character') 29 | ); 30 | 31 | opt_parser = OptionParser(option_list = option_list); 32 | opt = parse_args(opt_parser); 33 | 34 | print(paste0("Loading data from ",opt$data)) 35 | samples = list.dirs(path = opt$data,recursive = F) 36 | samples = basename(samples) 37 | if(length(samples) < 1) stop(paste0('Failed to find data in ',opt$data)) 38 | obj.list = list() 39 | for (ids in seq_along(samples)) { 40 | ds = samples[ids] 41 | print(ds) 42 | tenx.data = Read10X(file.path(opt$data, ds)) 43 | tenx0 = CreateSeuratObject(counts = tenx.data, min.cells = 3, 44 | min.features = 200, 45 | project = ds) 46 | obj.list[[length(obj.list)+1]] = tenx0 47 | } 48 | 49 | if(length(samples) > 1) { 50 | combined.data = merge(x = obj.list[[1]],y = obj.list[-1],add.cell.ids = samples) 51 | } else { 52 | combined.data = obj.list[[1]] 53 | } 54 | 55 | ##Human: MT; Mouse: mt 56 | mito.features = grep(pattern = '^MT-|^mt-', x = rownames(x = combined.data), value = T) 57 | percent.mito <- Matrix::colSums(x = GetAssayData(object = combined.data, slot = 'counts')[mito.features, ]) / Matrix::colSums(x = GetAssayData(object = combined.data, slot = 'counts')) 58 | combined.data[['percent.mito']] = percent.mito 59 | 60 | print("saving output") 61 | saveRDS(combined.data, file = file.path(opt$out, "merged.rds")) 62 | print('----end----') 63 | 64 | -------------------------------------------------------------------------------- /data_preprocess/0_src/snn-harmony-umap.R: -------------------------------------------------------------------------------- 1 | ##libraries 2 | suppressMessages({library(optparse) 3 | library(readr) 4 | library(rjson) 5 | library(harmony) 6 | library(Seurat)}) 7 | print('---snn clustering---') 8 | ##CLI parsing 9 | option_list = list( 10 | make_option(c("-d", "--data"), 11 | type = "character", 12 | default = NULL, 13 | help = "r data file input(after normalization", 14 | metavar = 'character'), 15 | make_option(c("-o",'--out'), 16 | type = 'character', 17 | default = 'snn.rds', 18 | help = 'output file name for the r data file [default = %default]', 19 | metavar = 'character'), 20 | make_option(c("-c","--param"), 21 | type = 'character', 22 | help = 'json file name contain function parameters', 23 | metavar = 'character') 24 | ); 25 | 26 | opt_parser = OptionParser(option_list = option_list); 27 | opt = parse_args(opt_parser); 28 | 29 | if(is.null(opt$data)) { 30 | print_help(opt_parser) 31 | stop("Input data must be provided", call. = F) 32 | } 33 | if(is.null(opt$param)) { 34 | print_help(opt_parser) 35 | stop("json file name (containing parameters) must be provided", call. = F) 36 | } 37 | 38 | ##load param 39 | param <- fromJSON(file = opt$param) 40 | 41 | ##Load data 42 | norm.data <- readRDS(opt$data) 43 | norm.data <- FindVariableFeatures(object = norm.data, selection.method = 'vst', 44 | nfeatures = 2000) 45 | length(x = VariableFeatures(object = norm.data)) 46 | hvg = VariableFeatures(object = norm.data) 47 | norm.data <- ScaleData(object = norm.data, features = hvg, 48 | vars.to.regress = c("nCount_RNA", "percent.mito")) 49 | norm.data <- RunPCA(object = norm.data, features = hvg, verbose = FALSE) 50 | norm.data <- RunHarmony(norm.data, "batch") 51 | 52 | npc = param$npc 53 | ##run snn clustering 54 | norm.data <- RunUMAP(object = norm.data, 55 | reduction = "harmony", 56 | dims = 1:npc, 57 | min.dist = param$dist, 58 | n.neighbors = param$nneigh) 59 | 60 | norm.data <- FindNeighbors(object = norm.data, reduction="harmony", 61 | dims = 1:npc, k.param = param$k) 62 | snn.obj <- FindClusters(object = norm.data, resolution = param$res) 63 | 64 | saveRDS(snn.obj,file = opt$out) 65 | print('---end---') 66 | -------------------------------------------------------------------------------- /data_preprocess/CD4/p1CD4_V7.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J p1CD4_V7 2 | #BSUB -q short 3 | #BSUB -W 1:00 4 | #BSUB -n 1 5 | #BSUB -M 10 6 | #BSUB -R rusage[mem=10] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/CD4/p1CD4_V7.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/CD4/p1CD4_V7.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/CD4/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/CD4/p1CD4_V7.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/CD4/p1CD4_V7.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/3.6.0 17 | #____----____----____----____----____----____----____----____----____----____----____----____----____----____----____----____---- 18 | 19 | 20 | projectPath=/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject 21 | DataD=${projectPath}/data/T/filterd 22 | 23 | analysisPath=${projectPath}/analysis 24 | mainscriptsFolder=${analysisPath}/scripts 25 | pipelinesFolder=${mainscriptsFolder}/pipelines 26 | srcD=${analysisPath}/scripts/src 27 | paramD=${analysisPath}/scripts/params 28 | databaseD='/rsrch3/home/genomic_med/ychu2/share/database' 29 | 30 | 31 | ResD=${analysisPath}/validate/CD4_V7 32 | if [ ! -d $ResD ]; then 33 | mkdir -p $ResD 34 | fi 35 | 36 | runR="Rscript --no-save " 37 | 38 | 39 | ##do the job 40 | echo "load data" 41 | 42 | #' submit multile jobs to normalize and scale separately ############################################# 43 | # ${runR} ${srcD}/load-scaleTObjectList.R -d ${DataD} -o ${ResD}/PCA.rds 44 | 45 | # ${runR} ${srcD}/RunPCA_RPCA.R -d ${ResD}/data.rds -o ${ResD}/pca.rds 46 | # ${runR} ${srcD}/visualize_PCAgenes.R -d ${ResD}/pca.rds 47 | 48 | ${HOME}/share/UMAP_CLUSTER_JOBS_EMBEDED/run.sh \ 49 | --inData ${ResD}/pca.rds \ 50 | --reduction pca \ 51 | --mainscriptsFolder ${mainscriptsFolder} \ 52 | --parentJobName "p1CD4_V7" \ 53 | --npcArray "50" \ 54 | --UMAPDistArray "0.1" \ 55 | --ClusterResArray "0.3" \ 56 | --NneighborsArray "50" \ 57 | --toRunUMAP "NO" \ 58 | --toRunClustering "NO" \ 59 | --toRunCommonAnalysis "NO" \ 60 | --toRunCallBack "YES" \ 61 | --callBackPath "/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/CD4/callBack_CD4.sh" 62 | 63 | -------------------------------------------------------------------------------- /data_preprocess/CD8/p1CD8_V6.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J p1CD8_V6 2 | #BSUB -q short 3 | #BSUB -W 1:00 4 | #BSUB -n 1 5 | #BSUB -M 10 6 | #BSUB -R rusage[mem=10] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/CD8/p1CD8_V6.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/CD8/p1CD8_V6.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/CD8/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/CD8/p1CD8_V6.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/CD8/p1CD8_V6.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/3.6.0 17 | #____----____----____----____----____----____----____----____----____----____----____----____----____----____----____----____---- 18 | 19 | 20 | 21 | 22 | 23 | projectPath=/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject 24 | DataD=${projectPath}/data/T/filterd 25 | 26 | analysisPath=${projectPath}/analysis 27 | mainscriptsFolder=${analysisPath}/scripts 28 | pipelinesFolder=${mainscriptsFolder}/pipelines 29 | srcD=${analysisPath}/scripts/src 30 | paramD=${analysisPath}/scripts/params 31 | databaseD='/rsrch3/home/genomic_med/ychu2/share/database' 32 | 33 | ResD=${analysisPath}/validate/CD8_V6 34 | if [ ! -d $ResD ]; then 35 | mkdir -p $ResD 36 | fi 37 | 38 | runR="Rscript --no-save " 39 | 40 | 41 | ##do the job 42 | echo "load data" 43 | 44 | #' submit multile jobs to normalize and scale separately ############################################# 45 | #${runR} ${srcD}/load-scaleTObjectList.R -d ${DataD} -o ${ResD}/PCA.rds 46 | 47 | # ${runR} ${srcD}/RunPCA_RPCA.R -d ${ResD}/data.rds -o ${ResD}/pca.rds 48 | # ${runR} ${srcD}/visualize_PCAgenes.R -d ${ResD}/pca.rds 49 | 50 | 51 | ${HOME}/share/UMAP_CLUSTER_JOBS_EMBEDED/run.sh \ 52 | --inData ${ResD}/pca.rds \ 53 | --reduction pca \ 54 | --mainscriptsFolder ${mainscriptsFolder} \ 55 | --parentJobName "p1CD8_V6" \ 56 | --npcArray "50" \ 57 | --UMAPDistArray "0.1" \ 58 | --ClusterResArray "0.3" \ 59 | --NneighborsArray "50" \ 60 | --toRunUMAP "NO" \ 61 | --toRunClustering "NO" \ 62 | --toRunCommonAnalysis "NO" \ 63 | --toRunCallBack "YES" \ 64 | --callBackPath "/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/CD8/callBack_CD8.sh" 65 | -------------------------------------------------------------------------------- /data_preprocess/TFH/p1_sub_TFH_CD4_V6.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J p1_sub_TFH_CD4_V6 2 | #BSUB -q short 3 | #BSUB -W 1:00 4 | #BSUB -n 1 5 | #BSUB -M 1 6 | #BSUB -R rusage[mem=1] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/p1_sub_TFH_CD4_V6.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/p1_sub_TFH_CD4_V6.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/p1_sub_TFH_CD4_V6.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/p1_sub_TFH_CD4_V6.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/3.6.0 17 | #____----____----____----____----____----____----____----____----____----____----____----____----____----____----____----____---- 18 | 19 | 20 | projectPath=/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject 21 | DataD=${projectPath}/data/T/filterd 22 | 23 | analysisPath=${projectPath}/analysis 24 | mainscriptsFolder=${analysisPath}/scripts 25 | pipelinesFolder=${mainscriptsFolder}/pipelines 26 | srcD=${analysisPath}/scripts/src 27 | paramD=${analysisPath}/scripts/params 28 | databaseD='/rsrch3/home/genomic_med/ychu2/share/database' 29 | 30 | ResD=${analysisPath}/validate/Sub_TFH_CD4_V6 31 | if [ ! -d $ResD ]; then 32 | mkdir -p $ResD 33 | fi 34 | 35 | runR="Rscript --no-save " 36 | 37 | 38 | ##do the job 39 | echo "load data" 40 | 41 | #' submit multile jobs to normalize and scale separately ############################################# 42 | #${runR} ${srcD}/load-scaleTObjectList.R -d ${DataD} -o ${ResD}/PCA.rds 43 | 44 | # ${runR} ${srcD}/RunPCA_RPCA.R -d ${ResD}/data.rds -o ${ResD}/pca.rds 45 | # ${runR} ${srcD}/visualize_PCAgenes.R -d ${ResD}/pca.rds 46 | 47 | ${HOME}/share/UMAP_CLUSTER_JOBS_EMBEDED/run.sh \ 48 | --inData ${ResD}/pca.rds \ 49 | --reduction pca \ 50 | --mainscriptsFolder ${mainscriptsFolder} \ 51 | --parentJobName "p1_sub_TFH_CD4_V6" \ 52 | --npcArray "10" \ 53 | --UMAPDistArray "0.01" \ 54 | --ClusterResArray "0.3" \ 55 | --NneighborsArray "50" \ 56 | --toRunUMAP "NO" \ 57 | --toRunClustering "NO" \ 58 | --toRunCommonAnalysis "YES" \ 59 | --toRunCallBack "YES" \ 60 | --callBackPath "${pipelinesFolder}/split/callBack_CD4TFH.sh" 61 | # --callBackPath "${pipelinesFolder}/split/callBack.sh" 62 | 63 | -------------------------------------------------------------------------------- /data_preprocess/Treg/p1_sub_Treg_CD4_V5.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J p1_sub_Treg_CD4_V5 2 | #BSUB -q short 3 | #BSUB -W 1:00 4 | #BSUB -n 1 5 | #BSUB -M 1 6 | #BSUB -R rusage[mem=1] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/p1_sub_Treg_CD4_V5.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/p1_sub_Treg_CD4_V5.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/p1_sub_Treg_CD4_V5.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/p1_sub_Treg_CD4_V5.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/3.6.0 17 | #____----____----____----____----____----____----____----____----____----____----____----____----____----____----____----____---- 18 | 19 | 20 | projectPath=/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject 21 | DataD=${projectPath}/data/T/filterd 22 | 23 | analysisPath=${projectPath}/analysis 24 | mainscriptsFolder=${analysisPath}/scripts 25 | pipelinesFolder=${mainscriptsFolder}/pipelines 26 | srcD=${analysisPath}/scripts/src 27 | paramD=${analysisPath}/scripts/params 28 | databaseD='/rsrch3/home/genomic_med/ychu2/share/database' 29 | 30 | ResD=${analysisPath}/validate/Sub_Treg_CD4_V5 31 | if [ ! -d $ResD ]; then 32 | mkdir -p $ResD 33 | fi 34 | 35 | runR="Rscript --no-save " 36 | 37 | 38 | ##do the job 39 | echo "load data" 40 | 41 | #' submit multile jobs to normalize and scale separately ############################################# 42 | #${runR} ${srcD}/load-scaleTObjectList.R -d ${DataD} -o ${ResD}/PCA.rds 43 | 44 | # ${runR} ${srcD}/RunPCA_RPCA.R -d ${ResD}/data.rds -o ${ResD}/pca.rds 45 | # ${runR} ${srcD}/visualize_PCAgenes.R -d ${ResD}/pca.rds 46 | 47 | ${HOME}/share/UMAP_CLUSTER_JOBS_EMBEDED/run.sh \ 48 | --inData ${ResD}/pca.rds \ 49 | --reduction pca \ 50 | --mainscriptsFolder ${mainscriptsFolder} \ 51 | --parentJobName "p1_sub_Treg_CD4_V5" \ 52 | --npcArray "15" \ 53 | --UMAPDistArray "0.1" \ 54 | --ClusterResArray "0.3" \ 55 | --NneighborsArray "20" \ 56 | --toRunUMAP "NO" \ 57 | --toRunClustering "NO" \ 58 | --toRunCommonAnalysis "YES" \ 59 | --toRunCallBack "YES" \ 60 | --callBackPath "${pipelinesFolder}/split/callBack_Treg.sh" 61 | 62 | # --callBackPath "${pipelinesFolder}/split/callBack.sh" 63 | 64 | -------------------------------------------------------------------------------- /fig6/GSE169246/3_extractT/extract.R: -------------------------------------------------------------------------------- 1 | #-------------------------------------------------------------- 2 | # filename : extract.R 3 | # Date : 2022-03-04 4 | # contributor : Yanshuo Chu 5 | # function: extract 6 | #-------------------------------------------------------------- 7 | 8 | print('<==== extract.R ====>') 9 | rm(list=ls()) 10 | suppressMessages({ 11 | library(Seurat) 12 | library(tidyverse) 13 | library(ggplot2) 14 | }) 15 | 16 | TCellsFolder <- "/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE169246/TCells" 17 | if(!dir.exists(TCellsFolder)){ 18 | dir.create(TCellsFolder) 19 | } 20 | setwd(TCellsFolder) 21 | 22 | seuratObj <- readRDS("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE169246/merged/nPC_30/UMAP_dist_0.1_nneighbor_50/GSE169246_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds") 23 | 24 | seuratObj$sample <- stringr::str_extract(pattern = "(?<=[ATGC]{16}.).*", Cells(seuratObj)) 25 | Idents(seuratObj) <- seuratObj$sample 26 | pdf(file.path(getwd(), "orig_ident.pdf")) 27 | DimPlot(seuratObj, label = F) + 28 | theme(legend.position = "none") 29 | dev.off() 30 | 31 | 32 | Idents(seuratObj) <- seuratObj$seurat_clusters 33 | seuratObj_P <- subset(seuratObj, idents = 12) 34 | saveRDS(seuratObj_P, file.path(paste0("P", "_", Sys.Date(), ".rds"))) 35 | 36 | TCellClusters <- c(0,1,2,8,14,16,21,23) 37 | seuratObj_T <- subset(seuratObj, idents = TCellClusters) 38 | saveRDS(seuratObj_T, file.path(paste0('T', "_", Sys.Date(), '.rds'))) 39 | 40 | CD4EXP <- seuratObj@assays$RNA@data["CD4",] 41 | CD8EXP <- matrixStats::colMaxs(as.matrix(seuratObj@assays$RNA@data[c("CD8A", "CD8B"), ])) 42 | seuratObj_T$cell.type <- "CD4" 43 | seuratObj_T$cell.type[CD4EXP < CD8EXP] <- "CD8" 44 | 45 | Idents(seuratObj_T) <- seuratObj_T$cell.type 46 | pdf(file.path(getwd(), "T_bubbleplot.pdf")) 47 | DotPlot(seuratObj_T, features = c("CD4", "CD8A", "CD8B", "FOXP3")) 48 | dev.off() 49 | 50 | CD4SeuratObj <- subset(seuratObj_T, idents = "CD4") 51 | saveRDS(CD4SeuratObj, file.path(paste0("CD4T", "_", Sys.Date(), ".rds"))) 52 | 53 | CD8SeuratObj <- subset(seuratObj_T, idents = "CD8") 54 | saveRDS(CD8SeuratObj, file.path(paste0("CD8T", "_", Sys.Date(), ".rds"))) 55 | 56 | ## B plasma: 57 | ## 5 58 | ## 6 59 | ## 9 60 | ## 13 61 | ## 20 62 | 63 | ## Monocyte: 64 | ## 4 65 | ## 22 66 | ## 7 67 | ## 10 68 | ## 15 69 | 70 | ## unknown: 71 | ## 17 72 | ## 19 73 | ## 20 74 | ## 22 75 | 76 | ## mast: 77 | ## 18 78 | 79 | ## proliferative T: 80 | ## 12 81 | 82 | ## proliferative B: 83 | ## 13 84 | 85 | ## Treg: 86 | ## 8 87 | 88 | ## NK/NKT: 89 | ## 11 90 | ## 3 91 | 92 | 93 | -------------------------------------------------------------------------------- /fig6/GSE173351/5_seurat_pipeline/callBack.sh: -------------------------------------------------------------------------------- 1 | ##!/usr/bin/env bash 2 | 3 | 4 | srcD=/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/src/public 5 | 6 | runR="Rscript --no-save " 7 | 8 | dataPath=${1} 9 | 10 | echo "begin" 11 | ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m /rsrch3/home/genomic_med/ychu2/share/database/Pan-T/Reference/PMID34290406.txt 12 | 13 | # rootFolder=$(dirname ${dataPath}) 14 | # tempMarkerFolder=/rsrch3/home/genomic_med/ychu2/configs/scSeqs/database/Markers/CD4/Genelistbyfunction 15 | # for markerFile in $(ls $tempMarkerFolder); do 16 | # JOBNAME=job_${markerFile%%.*} 17 | # tempFolder=${rootFolder} 18 | # JOBFOLDER=${tempFolder} 19 | # if [ ! -d $tempFolder ]; then 20 | # mkdir -p $tempFolder 21 | # fi 22 | # if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 23 | # rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 24 | # fi 25 | # bsub \ 26 | # -J ${JOBNAME} \ 27 | # -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 28 | # -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 29 | # -cwd ${JOBFOLDER} \ 30 | # -q short \ 31 | # -W 3:00 \ 32 | # -n 1 \ 33 | # -M 100\ 34 | # -R rusage[mem=100] \ 35 | # -B \ 36 | # -N \ 37 | # -u ychu2@mdanderson.org \ 38 | # /bin/bash -c "module load python/3.7.3-anaconda; module load R/3.6.0; ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${tempMarkerFolder}/${markerFile}; ${runR} ${srcD}/stack-ViolinPlot.R -d $dataPath -m ${tempMarkerFolder}/${markerFile}; ${runR} ${srcD}/feature-plot.R -d $dataPath -o $(dirname ${dataPath})/featureplot/${markerFile%%.*} -c ${paramD}/feature-plot-origin.json -m ${tempMarkerFolder}/${markerFile}" 39 | # done 40 | 41 | # ${runR} ${srcD}/marker-classification.R -d $(dirname ${dataPath})/snn-single-markers.tsv -m /rsrch3/home/genomic_med/ychu2/configs/scSeqs/database/Markers/CD4/Genelistbyfunction 42 | 43 | # ${runR} ${srcD}/tissue-composition-plot.R -d $dataPath 44 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/Markers/CD4/markers/CD4_naive_clusters_comparison.txt 45 | 46 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/TCD4.txt 47 | # ${runR} ${srcD}/stack-ViolinPlot.R -d $dataPath -m ${databaseD}/Pan-T/CD4/CD4Markers.txt 48 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/Pan-T/CD4/CD4Markers.txt 49 | # ${runR} ${srcD}/stack-ViolinPlot.R -d $dataPath -m ${databaseD}/Pan-T/CD4/CD4Markers.txt 50 | # ${runR} ${srcD}/monocleForTest.R -d $dataPath -n 8 51 | -------------------------------------------------------------------------------- /fig6/GSE144649/4_seurat_pipeline/callBack.sh: -------------------------------------------------------------------------------- 1 | ##!/usr/bin/env bash 2 | 3 | 4 | srcD=/rsrch3/home/genomic_med/ychu2/projects/project18/code/src/public 5 | databaseD=/rsrch3/home/genomic_med/ychu2/share/database 6 | 7 | runR="Rscript --no-save " 8 | 9 | dataPath=${1} 10 | 11 | echo "begin" 12 | ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m /rsrch3/home/genomic_med/ychu2/share/database/Pan-T/Reference/PMID34290406.txt 13 | 14 | # rootFolder=$(dirname ${dataPath}) 15 | # tempMarkerFolder=/rsrch3/home/genomic_med/ychu2/configs/scSeqs/database/Markers/CD4/Genelistbyfunction 16 | # for markerFile in $(ls $tempMarkerFolder); do 17 | # JOBNAME=job_${markerFile%%.*} 18 | # tempFolder=${rootFolder} 19 | # JOBFOLDER=${tempFolder} 20 | # if [ ! -d $tempFolder ]; then 21 | # mkdir -p $tempFolder 22 | # fi 23 | # if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 24 | # rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 25 | # fi 26 | # bsub \ 27 | # -J ${JOBNAME} \ 28 | # -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 29 | # -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 30 | # -cwd ${JOBFOLDER} \ 31 | # -q short \ 32 | # -W 3:00 \ 33 | # -n 1 \ 34 | # -M 100\ 35 | # -R rusage[mem=100] \ 36 | # -B \ 37 | # -N \ 38 | # -u ychu2@mdanderson.org \ 39 | # /bin/bash -c "module load python/3.7.3-anaconda; module load R/3.6.0; ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${tempMarkerFolder}/${markerFile}; ${runR} ${srcD}/stack-ViolinPlot.R -d $dataPath -m ${tempMarkerFolder}/${markerFile}; ${runR} ${srcD}/feature-plot.R -d $dataPath -o $(dirname ${dataPath})/featureplot/${markerFile%%.*} -c ${paramD}/feature-plot-origin.json -m ${tempMarkerFolder}/${markerFile}" 40 | # done 41 | 42 | # ${runR} ${srcD}/marker-classification.R -d $(dirname ${dataPath})/snn-single-markers.tsv -m /rsrch3/home/genomic_med/ychu2/configs/scSeqs/database/Markers/CD4/Genelistbyfunction 43 | 44 | # ${runR} ${srcD}/tissue-composition-plot.R -d $dataPath 45 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/Markers/CD4/markers/CD4_naive_clusters_comparison.txt 46 | 47 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/TCD4.txt 48 | # ${runR} ${srcD}/stack-ViolinPlot.R -d $dataPath -m ${databaseD}/Pan-T/CD4/CD4Markers.txt 49 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/Pan-T/CD4/CD4Markers.txt 50 | # ${runR} ${srcD}/stack-ViolinPlot.R -d $dataPath -m ${databaseD}/Pan-T/CD4/CD4Markers.txt 51 | # ${runR} ${srcD}/monocleForTest.R -d $dataPath -n 8 52 | -------------------------------------------------------------------------------- /fig6/GSE169246/2_seurat_pipeline/callBack.sh: -------------------------------------------------------------------------------- 1 | ##!/usr/bin/env bash 2 | 3 | 4 | srcD=/rsrch3/home/genomic_med/ychu2/projects/project18/code/src/public 5 | databaseD=/rsrch3/home/genomic_med/ychu2/share/database 6 | 7 | runR="Rscript --no-save " 8 | 9 | dataPath=${1} 10 | 11 | echo "begin" 12 | ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m /rsrch3/home/genomic_med/ychu2/share/database/Pan-T/Reference/PMID34290406.txt 13 | 14 | # rootFolder=$(dirname ${dataPath}) 15 | # tempMarkerFolder=/rsrch3/home/genomic_med/ychu2/configs/scSeqs/database/Markers/CD4/Genelistbyfunction 16 | # for markerFile in $(ls $tempMarkerFolder); do 17 | # JOBNAME=job_${markerFile%%.*} 18 | # tempFolder=${rootFolder} 19 | # JOBFOLDER=${tempFolder} 20 | # if [ ! -d $tempFolder ]; then 21 | # mkdir -p $tempFolder 22 | # fi 23 | # if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 24 | # rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 25 | # fi 26 | # bsub \ 27 | # -J ${JOBNAME} \ 28 | # -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 29 | # -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 30 | # -cwd ${JOBFOLDER} \ 31 | # -q short \ 32 | # -W 3:00 \ 33 | # -n 1 \ 34 | # -M 100\ 35 | # -R rusage[mem=100] \ 36 | # -B \ 37 | # -N \ 38 | # -u ychu2@mdanderson.org \ 39 | # /bin/bash -c "module load python/3.7.3-anaconda; module load R/3.6.0; ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${tempMarkerFolder}/${markerFile}; ${runR} ${srcD}/stack-ViolinPlot.R -d $dataPath -m ${tempMarkerFolder}/${markerFile}; ${runR} ${srcD}/feature-plot.R -d $dataPath -o $(dirname ${dataPath})/featureplot/${markerFile%%.*} -c ${paramD}/feature-plot-origin.json -m ${tempMarkerFolder}/${markerFile}" 40 | # done 41 | 42 | # ${runR} ${srcD}/marker-classification.R -d $(dirname ${dataPath})/snn-single-markers.tsv -m /rsrch3/home/genomic_med/ychu2/configs/scSeqs/database/Markers/CD4/Genelistbyfunction 43 | 44 | # ${runR} ${srcD}/tissue-composition-plot.R -d $dataPath 45 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/Markers/CD4/markers/CD4_naive_clusters_comparison.txt 46 | 47 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/TCD4.txt 48 | # ${runR} ${srcD}/stack-ViolinPlot.R -d $dataPath -m ${databaseD}/Pan-T/CD4/CD4Markers.txt 49 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/Pan-T/CD4/CD4Markers.txt 50 | # ${runR} ${srcD}/stack-ViolinPlot.R -d $dataPath -m ${databaseD}/Pan-T/CD4/CD4Markers.txt 51 | # ${runR} ${srcD}/monocleForTest.R -d $dataPath -n 8 52 | -------------------------------------------------------------------------------- /fig6/GSE169246/b2_seurat_pipeline/callBack.sh: -------------------------------------------------------------------------------- 1 | ##!/usr/bin/env bash 2 | 3 | 4 | srcD=/rsrch3/home/genomic_med/ychu2/projects/project18/code/src/public 5 | databaseD=/rsrch3/home/genomic_med/ychu2/share/database 6 | 7 | runR="Rscript --no-save " 8 | 9 | dataPath=${1} 10 | 11 | echo "begin" 12 | ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m /rsrch3/home/genomic_med/ychu2/share/database/Pan-T/Reference/PMID34290406.txt 13 | 14 | # rootFolder=$(dirname ${dataPath}) 15 | # tempMarkerFolder=/rsrch3/home/genomic_med/ychu2/configs/scSeqs/database/Markers/CD4/Genelistbyfunction 16 | # for markerFile in $(ls $tempMarkerFolder); do 17 | # JOBNAME=job_${markerFile%%.*} 18 | # tempFolder=${rootFolder} 19 | # JOBFOLDER=${tempFolder} 20 | # if [ ! -d $tempFolder ]; then 21 | # mkdir -p $tempFolder 22 | # fi 23 | # if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 24 | # rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 25 | # fi 26 | # bsub \ 27 | # -J ${JOBNAME} \ 28 | # -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 29 | # -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 30 | # -cwd ${JOBFOLDER} \ 31 | # -q short \ 32 | # -W 3:00 \ 33 | # -n 1 \ 34 | # -M 100\ 35 | # -R rusage[mem=100] \ 36 | # -B \ 37 | # -N \ 38 | # -u ychu2@mdanderson.org \ 39 | # /bin/bash -c "module load python/3.7.3-anaconda; module load R/3.6.0; ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${tempMarkerFolder}/${markerFile}; ${runR} ${srcD}/stack-ViolinPlot.R -d $dataPath -m ${tempMarkerFolder}/${markerFile}; ${runR} ${srcD}/feature-plot.R -d $dataPath -o $(dirname ${dataPath})/featureplot/${markerFile%%.*} -c ${paramD}/feature-plot-origin.json -m ${tempMarkerFolder}/${markerFile}" 40 | # done 41 | 42 | # ${runR} ${srcD}/marker-classification.R -d $(dirname ${dataPath})/snn-single-markers.tsv -m /rsrch3/home/genomic_med/ychu2/configs/scSeqs/database/Markers/CD4/Genelistbyfunction 43 | 44 | # ${runR} ${srcD}/tissue-composition-plot.R -d $dataPath 45 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/Markers/CD4/markers/CD4_naive_clusters_comparison.txt 46 | 47 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/TCD4.txt 48 | # ${runR} ${srcD}/stack-ViolinPlot.R -d $dataPath -m ${databaseD}/Pan-T/CD4/CD4Markers.txt 49 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/Pan-T/CD4/CD4Markers.txt 50 | # ${runR} ${srcD}/stack-ViolinPlot.R -d $dataPath -m ${databaseD}/Pan-T/CD4/CD4Markers.txt 51 | # ${runR} ${srcD}/monocleForTest.R -d $dataPath -n 8 52 | -------------------------------------------------------------------------------- /fig6/GSE169246/subT3_seurat_pipeline/callBack.sh: -------------------------------------------------------------------------------- 1 | ##!/usr/bin/env bash 2 | 3 | 4 | srcD=/rsrch3/home/genomic_med/ychu2/projects/project18/code/src/public 5 | databaseD=/rsrch3/home/genomic_med/ychu2/share/database 6 | 7 | runR="Rscript --no-save " 8 | 9 | dataPath=${1} 10 | 11 | echo "begin" 12 | ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m /rsrch3/home/genomic_med/ychu2/share/database/Pan-T/Reference/PMID34290406.txt 13 | 14 | # rootFolder=$(dirname ${dataPath}) 15 | # tempMarkerFolder=/rsrch3/home/genomic_med/ychu2/configs/scSeqs/database/Markers/CD4/Genelistbyfunction 16 | # for markerFile in $(ls $tempMarkerFolder); do 17 | # JOBNAME=job_${markerFile%%.*} 18 | # tempFolder=${rootFolder} 19 | # JOBFOLDER=${tempFolder} 20 | # if [ ! -d $tempFolder ]; then 21 | # mkdir -p $tempFolder 22 | # fi 23 | # if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 24 | # rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 25 | # fi 26 | # bsub \ 27 | # -J ${JOBNAME} \ 28 | # -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 29 | # -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 30 | # -cwd ${JOBFOLDER} \ 31 | # -q short \ 32 | # -W 3:00 \ 33 | # -n 1 \ 34 | # -M 100\ 35 | # -R rusage[mem=100] \ 36 | # -B \ 37 | # -N \ 38 | # -u ychu2@mdanderson.org \ 39 | # /bin/bash -c "module load python/3.7.3-anaconda; module load R/3.6.0; ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${tempMarkerFolder}/${markerFile}; ${runR} ${srcD}/stack-ViolinPlot.R -d $dataPath -m ${tempMarkerFolder}/${markerFile}; ${runR} ${srcD}/feature-plot.R -d $dataPath -o $(dirname ${dataPath})/featureplot/${markerFile%%.*} -c ${paramD}/feature-plot-origin.json -m ${tempMarkerFolder}/${markerFile}" 40 | # done 41 | 42 | # ${runR} ${srcD}/marker-classification.R -d $(dirname ${dataPath})/snn-single-markers.tsv -m /rsrch3/home/genomic_med/ychu2/configs/scSeqs/database/Markers/CD4/Genelistbyfunction 43 | 44 | # ${runR} ${srcD}/tissue-composition-plot.R -d $dataPath 45 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/Markers/CD4/markers/CD4_naive_clusters_comparison.txt 46 | 47 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/TCD4.txt 48 | # ${runR} ${srcD}/stack-ViolinPlot.R -d $dataPath -m ${databaseD}/Pan-T/CD4/CD4Markers.txt 49 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/Pan-T/CD4/CD4Markers.txt 50 | # ${runR} ${srcD}/stack-ViolinPlot.R -d $dataPath -m ${databaseD}/Pan-T/CD4/CD4Markers.txt 51 | # ${runR} ${srcD}/monocleForTest.R -d $dataPath -n 8 52 | -------------------------------------------------------------------------------- /fig5/fig5b/zoomIn.sh: -------------------------------------------------------------------------------- 1 | ##!/usr/bin/env bash 2 | #' filename : zoomIn.sh 3 | #' Date : 2022-08-15 4 | #' contributor : Yanshuo Chu 5 | #' function: convert cut images and add a map scale 6 | 7 | POSITIONAL=() 8 | while [[ $# -gt 0 ]] 9 | do 10 | key="$1" 11 | 12 | case $key in 13 | --inDataPath) 14 | inDataPath="$2" 15 | shift # past argument 16 | shift # past value 17 | ;; 18 | --outDataPath) 19 | outDataPath="$2" 20 | shift # past argument 21 | shift # past value 22 | ;; 23 | --xmin) 24 | xmin="$2" 25 | shift # past argument 26 | shift # past value 27 | ;; 28 | --xmax) 29 | xmax="$2" 30 | shift # past argument 31 | shift # past value 32 | ;; 33 | --ymin) 34 | ymin="$2" 35 | shift # past argument 36 | shift # past value 37 | ;; 38 | --ymax) 39 | ymax="$2" 40 | IFS=';' read -a npcArray <<< "$npcArray" 41 | shift # past argument 42 | shift # past value 43 | ;; 44 | *) # unknown option 45 | POSITIONAL+=("$1") # save it in an array for later 46 | shift # past argument 47 | ;; 48 | esac 49 | done 50 | set -- "${POSITIONAL[@]}" # restore positional parameters 51 | 52 | echo "run parameters: 53 | inDataPath=${inDataPath} 54 | outDataPath=${outDataPath} 55 | xmin=${xmin} 56 | xmax=${xmax} 57 | ymin=${ymin} 58 | ymax=${ymax} 59 | " 60 | 61 | if [[ -n $1 ]]; then 62 | echo "Last line of file specified as non-opt/last argument:" 63 | tail -1 "$1" 64 | fi 65 | 66 | convert $inDataPath -crop $((${xmax}-${xmin}))x$((${ymax}-${ymin}))+${xmin}+${ymin} ${outDataPath}.bak 67 | 68 | module load R/3.6.0 69 | echo " Rscript --no-save /rsrch3/scratch/genomic_med/ychu2/projects/p1review/CosMx/code/pipeline/private/1_MapScale/plotMapScale.R -d ${outDataPath}.bak -o ${outDataPath} -w $((${xmax}-${xmin})) -h $((${ymax}-${ymin})) " 70 | Rscript --no-save /rsrch3/scratch/genomic_med/ychu2/projects/p1review/CosMx/code/pipeline/private/1_MapScale/plotMapScale.R -d ${outDataPath}.bak -o ${outDataPath} -w $((${xmax}-${xmin})) -h $((${ymax}-${ymin})) 71 | 72 | module load R/4.1.0 73 | echo " Rscript --no-save /rsrch3/scratch/genomic_med/ychu2/projects/p1review/CosMx/code/pipeline/private/1_MapScale/plotMapScale.R -d ${outDataPath}.bak -o ${outDataPath} -w $((${xmax}-${xmin})) -h $((${ymax}-${ymin})) " 74 | Rscript --no-save /rsrch3/scratch/genomic_med/ychu2/projects/p1review/CosMx/code/pipeline/private/1_MapScale/plotGiotto.R \ 75 | -f $(echo $(basename $inDataPath) | egrep -o "[0-9]+") --xmin ${xmin} --xmax ${xmax} --ymin ${ymin} --ymax ${ymax} \ 76 | -o ${outDataPath%.jpg}_loc.png 77 | 78 | 79 | -------------------------------------------------------------------------------- /data_preprocess/0_src/RunPCA_RPCA.R: -------------------------------------------------------------------------------- 1 | #'----------------------------------- 2 | #' filename : RunPCA_RPCA.R 3 | #' Date : 2020-07-26 4 | #' contributor : Yanshuo Chu 5 | #' function: RunPCA_RPCA 6 | #'----------------------------------- 7 | 8 | print('<== Run PCA ==>') 9 | 10 | ##libraries 11 | suppressMessages({library(optparse) 12 | library(tidyverse) 13 | library(Seurat)}) 14 | 15 | 16 | 17 | ##CLI parsing 18 | option_list = list( 19 | make_option(c("-d", "--data"), 20 | type = "character", 21 | default = NULL, 22 | help = "r data file input(after normalization", 23 | metavar = 'character'), 24 | ## make_option(c("-t", "--table"), 25 | ## type = "character", 26 | ## default = NULL, 27 | ## help = "bad gene table", 28 | ## metavar = 'character'), 29 | make_option(c("-o",'--out'), 30 | type = 'character', 31 | default = 'snn-harmony.rds', 32 | help = 'output file name for the r data file [default = %default]', 33 | metavar = 'character') 34 | ); 35 | 36 | opt_parser = OptionParser(option_list = option_list); 37 | opt = parse_args(opt_parser); 38 | 39 | if(is.null(opt$data)) { 40 | print_help(opt_parser) 41 | stop("Input data must be provided", call. = F) 42 | } 43 | 44 | ##Load data 45 | seuratObj <- readRDS(opt$data) 46 | DefaultAssay(seuratObj) <- "integrated" 47 | 48 | 49 | cellCycleGeneT1 <- read_tsv("/rsrch3/home/genomic_med/ychu2/configs/scSeqs/database/general/cell-cycle-gene-list.txt") 50 | cellCycleGeneT2 <- read_tsv("/rsrch3/home/genomic_med/ychu2/configs/scSeqs/database/general/regev_lab_cell_cycle_genes.txt") 51 | seuratObj <- FindVariableFeatures(object = seuratObj, selection.method = 'vst', nfeatures = 3000) 52 | hvg = VariableFeatures(object = seuratObj) 53 | gene.pattern <- c("MALAT1", "^MT-", "^RPL", "^RPS", "^LOC(0-9)", "^TR(A|B|G|D)V", "^MTRNR") 54 | hvg <- hvg[!hvg %in% grep(paste0(gene.pattern, collapse = "|"), hvg, value = T)] 55 | ## hvg <- setdiff(hvg, cellCycleGeneT1$marker) 56 | ## hvg <- setdiff(hvg, cellCycleGeneT2$marker) 57 | ## print("move out proliferative markers") 58 | seuratObj <- RunPCA(object = seuratObj, features= hvg, npcs=150, verbose = FALSE) 59 | VariableFeatures(seuratObj) <- hvg 60 | 61 | 62 | ## seuratObj <- FindVariableFeatures(object = seuratObj, selection.method = 'vst', nfeatures = 2200) 63 | ## hvg = VariableFeatures(object = seuratObj) 64 | ## gene.pattern <- c("MALAT1", "^MT-", "^RPL", "^RPS", "^LOC(0-9)", "^TR(A|B|G|D)V", "^MTRNR") 65 | ## hvg <- hvg[!hvg %in% grep(paste0(gene.pattern, collapse = "|"), hvg, value = T)] 66 | ## print("length(hvg)") 67 | ## print(length(hvg)) 68 | ## seuratObj <- RunPCA(object = seuratObj, features= hvg, npcs=150, verbose = FALSE) 69 | ## VariableFeatures(seuratObj) <- hvg 70 | 71 | saveRDS(seuratObj, file = opt$out) 72 | print('---end---') 73 | 74 | -------------------------------------------------------------------------------- /data_preprocess/0_src/snn-harmony-best-umap-finder.R: -------------------------------------------------------------------------------- 1 | ##libraries 2 | suppressMessages({library(optparse) 3 | library(readr) 4 | library(rjson) 5 | library(harmony) 6 | library(Seurat)}) 7 | print('---snn clustering---') 8 | ##CLI parsing 9 | option_list = list( 10 | make_option(c("-d", "--data"), 11 | type = "character", 12 | default = NULL, 13 | help = "r data file input(after normalization", 14 | metavar = 'character'), 15 | make_option(c("-o",'--out'), 16 | type = 'character', 17 | default = 'umaps.pdf', 18 | help = 'output file name for the r data file [default = %default]', 19 | metavar = 'character') 20 | ); 21 | 22 | opt_parser = OptionParser(option_list = option_list); 23 | opt = parse_args(opt_parser); 24 | 25 | if(is.null(opt$data)) { 26 | print_help(opt_parser) 27 | stop("Input data must be provided", call. = F) 28 | } 29 | 30 | ##Load data 31 | norm.data <- readRDS(opt$data) 32 | 33 | ##run snn clustering 34 | 35 | for(dist in c(0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0.05, 0.01, 0.005, 0.001)){ 36 | norm.umap.data <- RunUMAP(object = norm.data, 37 | reduction = "harmony", 38 | dims = 1:40, 39 | min.dist = dist) 40 | 41 | norm.umap.data <- FindNeighbors(object = norm.umap.data, reduction="harmony", dims = 1:40) 42 | snn.obj <- FindClusters(object = norm.umap.data, resolution = 0.2) 43 | 44 | pdf(paste0(opt$out, 'batch: dist(', dist, ').pdf')) 45 | print(DimPlot(object=snn.obj, reduction="umap", group.by='batch', label=TRUE, plot.title=paste0('batch: dist(', dist))) 46 | print(DimPlot(object=snn.obj, reduction="umap", group.by='control', label=TRUE, plot.title=paste0('batch: dist(', dist))) 47 | print(DimPlot(object=snn.obj, reduction="umap", group.by='seurat_clusters', label=TRUE, plot.title=paste0('batch: dist(', dist))) 48 | dev.off() 49 | } 50 | 51 | norm.umap.data <- RunUMAP(object = norm.data, 52 | reduction = "harmony", 53 | dims = 1:40, 54 | min.dist = 0.1) 55 | norm.umap.data <- FindNeighbors(object = norm.umap.data, reduction="harmony", dims = 1:40) 56 | 57 | for(res in c(0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1)){ 58 | snn.obj <- FindClusters(object = norm.umap.data, resolution = res) 59 | print(paste0("plot:",opt$out, 'batch: res(', res, ').pdf')) 60 | pdf(paste0(opt$out, 'batch: res(', res, ').pdf')) 61 | print(DimPlot(object=snn.obj, reduction="umap", group.by='batch', label=TRUE, plot.title=paste0('res', res))) 62 | print(DimPlot(object=snn.obj, reduction="umap", group.by='control', label=TRUE, plot.title=paste0('res', res))) 63 | print(DimPlot(object=snn.obj, reduction="umap", group.by='seurat_clusters', label=TRUE, plot.title=paste0('res', res))) 64 | dev.off() 65 | } 66 | 67 | # saveRDS(snn.obj,file = opt$out) 68 | print('---end---') 69 | -------------------------------------------------------------------------------- /fig6/GSE169246/b1_harmony/1_injectBatchinfo/inject.R: -------------------------------------------------------------------------------- 1 | #-------------------------------------------------------------- 2 | # filename : inject.R 3 | # Date : 2022-08-31 4 | # contributor : Yanshuo Chu 5 | # function: inject 6 | #-------------------------------------------------------------- 7 | 8 | print('<==== inject.R ====>') 9 | 10 | library(optparse) 11 | library(tidyverse) 12 | library(Seurat) 13 | library(GEOquery) 14 | 15 | seuratObj <- readRDS("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE169246/merged/merged.obj") 16 | 17 | TotalAntiPDL1ChemoPatients <- c("P019", 18 | "P010", 19 | "P012", 20 | "P007", 21 | "P017", 22 | "P001", 23 | "P002", 24 | "P014", 25 | "P004", 26 | "P005", 27 | "P016") 28 | 29 | TotalChemoPatients <- c("P022", 30 | "P011", 31 | "P020", 32 | "P008", 33 | "P013", 34 | "P025", 35 | "P018", 36 | "P023", 37 | "P024", 38 | "P003", 39 | "P028") 40 | 41 | AllResponsePatients <- c("P019", 42 | "P010", 43 | "P012", 44 | "P007", 45 | "P022", 46 | "P011", 47 | "P020", 48 | "P008", 49 | "P013") 50 | 51 | seuratObj@meta.data$Sample <- stringr::str_extract(Cells(seuratObj), "(?<=^.{10,20}\\.).+") 52 | seuratObj@meta.data$Patient <- stringr::str_extract(seuratObj@meta.data$Sample, "P\\d+") 53 | seuratObj@meta.data$Tissue <- stringr::str_extract(seuratObj@meta.data$Sample, "\\w$") 54 | seuratObj@meta.data$TumorTreatment <- stringr::str_extract(seuratObj@meta.data$Sample, "^[a-zA-Z]+") 55 | seuratObj@meta.data$isResponse <- "NR" 56 | seuratObj@meta.data$isResponse[seuratObj@meta.data$Patient %in% AllResponsePatients] <- "R" 57 | seuratObj@meta.data$isResponse[seuratObj@meta.data$Patient == "P028"] <- "-" 58 | seuratObj@meta.data$TreatmentType <- "PDL1+Chemo" 59 | seuratObj@meta.data$TreatmentType[seuratObj@meta.data$Patient %in% TotalChemoPatients] <- "Chemo" 60 | seuratObj@meta.data$group <- paste0(seuratObj@meta.data$TreatmentType, "-", seuratObj@meta.data$TumorTreatment, "-", seuratObj@meta.data$isResponse) 61 | 62 | seuratObj$batch <- seuratObj$Patient 63 | 64 | figurePath <- file.path("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE169246/b1_harmony/1_injectBatchinfo/outs") 65 | if(!dir.exists(figurePath)){ 66 | dir.create(figurePath, recursive = T) 67 | } 68 | setwd(figurePath) 69 | saveRDS(seuratObj, file.path(paste0('harmony_input.rds'))) 70 | 71 | -------------------------------------------------------------------------------- /fig6/GSE179994/4_mapping_multimap/submitJob_Mapping.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J submitJob_Mapping 2 | #BSUB -q short 3 | #BSUB -W 1:00 4 | #BSUB -n 1 5 | #BSUB -M 10 6 | #BSUB -R rusage[mem=10] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE179994/4_mapping_multimap/submitJob_Mapping.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE179994/4_mapping_multimap/submitJob_Mapping.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE179994/4_mapping_multimap/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE179994/4_mapping_multimap/submitJob_Mapping.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE179994/4_mapping_multimap/submitJob_Mapping.e.txt 15 | #____----____----____ 16 | 17 | runR="Rscript --no-save " 18 | 19 | QUERYDATAFOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE179994/TCells/ForMapping" 20 | 21 | CD8_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD8_V6/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD8_V6_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 22 | CD4_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD4_V7/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD4_V7_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 23 | 24 | PIPELINE_FOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE179994/4_mapping_multimap" 25 | 26 | qName=long 27 | wTime=100:00 28 | cn=1 29 | mem=300 30 | 31 | for dataPath in ${QUERYDATAFOLDER}/*; do 32 | dataFile=$(basename $dataPath) 33 | extension="${dataFile##*.}" 34 | filename="${dataFile%.*}" 35 | referenceDataPath="" 36 | if [ "${filename}" = "CD8" ]; then 37 | referenceDataPath=${CD8_ReferenceDataPath} 38 | fi 39 | if [ "${filename}" = "CD4" ]; then 40 | referenceDataPath=${CD4_ReferenceDataPath} 41 | fi 42 | JOBFOLDER=/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE179994/MappingResult_MultiMap/${filename} 43 | if [ ! -d $JOBFOLDER ]; then 44 | mkdir -p $JOBFOLDER 45 | fi 46 | JOBNAME=Mapping_${filename} 47 | if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 48 | rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 49 | fi 50 | bsub \ 51 | -J ${JOBNAME} \ 52 | -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 53 | -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 54 | -cwd ${JOBFOLDER} \ 55 | -q ${qName} \ 56 | -W ${wTime} \ 57 | -n ${cn} \ 58 | -M ${mem} \ 59 | -R rusage[mem=${mem}] \ 60 | -B \ 61 | -N \ 62 | -u ychu2@mdanderson.org \ 63 | /bin/bash -c "module load R/4.0.3; Rscript ${PIPELINE_FOLDER}/Mapping.R -r ${referenceDataPath} -q ${dataPath} -o ${JOBFOLDER}" 64 | done 65 | 66 | -------------------------------------------------------------------------------- /fig6/GSE173351/7_mapping/submitJob_Mapping.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J submitJob_Mapping 2 | #BSUB -q short 3 | #BSUB -W 1:00 4 | #BSUB -n 1 5 | #BSUB -M 10 6 | #BSUB -R rusage[mem=10] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/7_mapping/submitJob_Mapping.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/7_mapping/submitJob_Mapping.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/7_mapping/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/7_mapping/submitJob_Mapping.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/7_mapping/submitJob_Mapping.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/3.6.0 17 | #____----____----____ 18 | 19 | runR="Rscript --no-save " 20 | 21 | QUERYDATAFOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE173351/6_extractT" 22 | 23 | CD8_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD8_V6/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD8_V6_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 24 | CD4_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD4_V7/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD4_V7_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 25 | 26 | PIPELINE_FOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/7_mapping" 27 | OUT_ROOT="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE173351/7_mapping" 28 | 29 | qName=highmem 30 | wTime=24:00 31 | cn=1 32 | mem=100 33 | 34 | for dataPath in ${QUERYDATAFOLDER}/*; do 35 | dataFile=$(basename $dataPath) 36 | extension="${dataFile##*.}" 37 | filename="${dataFile%.*}" 38 | referenceDataPath="" 39 | if [ "${filename}" = "CD8" ]; then 40 | referenceDataPath=${CD8_ReferenceDataPath} 41 | fi 42 | if [ "${filename}" = "CD4" ]; then 43 | referenceDataPath=${CD4_ReferenceDataPath} 44 | fi 45 | JOBFOLDER=${OUT_ROOT}/${filename} 46 | if [ ! -d $JOBFOLDER ]; then 47 | mkdir -p $JOBFOLDER 48 | fi 49 | JOBNAME=Mapping_${filename} 50 | if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 51 | rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 52 | fi 53 | bsub \ 54 | -J ${JOBNAME} \ 55 | -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 56 | -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 57 | -cwd ${JOBFOLDER} \ 58 | -q ${qName} \ 59 | -W ${wTime} \ 60 | -n ${cn} \ 61 | -M ${mem} \ 62 | -R rusage[mem=${mem}] \ 63 | -B \ 64 | -N \ 65 | -u ychu2@mdanderson.org \ 66 | /bin/bash -c "module load R/4.0.3; Rscript ${PIPELINE_FOLDER}/Mapping.R -r ${referenceDataPath} -q ${dataPath} -o ${JOBFOLDER}" 67 | done 68 | 69 | -------------------------------------------------------------------------------- /data_preprocess/NKTMAIT/p1_NKTMAIT_v6.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J p1_NKTMAIT_v6 2 | #BSUB -q highmem 3 | #BSUB -W 24:00 4 | #BSUB -n 1 5 | #BSUB -M 500 6 | #BSUB -R rusage[mem=500] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/NKTMAIT/p1_NKTMAIT_v6.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/NKTMAIT/p1_NKTMAIT_v6.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/NKTMAIT/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/NKTMAIT/p1_NKTMAIT_v6.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/NKTMAIT/p1_NKTMAIT_v6.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/3.6.0 17 | #____----____----____----____----____----____----____----____----____----____----____----____----____----____----____----____---- 18 | 19 | 20 | 21 | projectPath=/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject 22 | DataD=${projectPath}/data/T/filterd 23 | 24 | analysisPath=${projectPath}/analysis 25 | mainscriptsFolder=${analysisPath}/scripts 26 | pipelinesFolder=${mainscriptsFolder}/pipelines 27 | srcD=${analysisPath}/scripts/src 28 | paramD=${analysisPath}/scripts/params 29 | databaseD='/rsrch3/home/genomic_med/ychu2/share/database' 30 | 31 | ResD=${analysisPath}/validate/NKTMAIT_V6 32 | if [ ! -d $ResD ]; then 33 | mkdir -p $ResD 34 | fi 35 | 36 | runR="Rscript --no-save " 37 | 38 | 39 | ##do the job 40 | echo "load data" 41 | 42 | #' submit multile jobs to normalize and scale separately ############################################# 43 | #${runR} ${srcD}/load-scaleTObjectList.R -d ${DataD} -o ${ResD}/PCA.rds 44 | 45 | ############################################################################### 46 | #' split subcluster '# 47 | # ${runR} ${srcD}/split-cluster.R -i /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD8_V6/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD8_V6_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds -o ${ResD}/data.rds -c "7;1" 48 | ############################################################################### 49 | 50 | # ${runR} ${srcD}/RunPCA_RPCA.R -d ${ResD}/data.rds -o ${ResD}/pca.rds 51 | # ${runR} ${srcD}/visualize_PCAgenes.R -d ${ResD}/pca.rds 52 | 53 | ${HOME}/share/UMAP_CLUSTER_JOBS_EMBEDED/run.sh \ 54 | --inData ${ResD}/pca.rds \ 55 | --reduction pca \ 56 | --mainscriptsFolder ${mainscriptsFolder} \ 57 | --parentJobName "p1_NKTMAIT_v6" \ 58 | --npcArray "5" \ 59 | --UMAPDistArray "0.1" \ 60 | --ClusterResArray "0.3" \ 61 | --NneighborsArray "35" \ 62 | --toRunUMAP "NO" \ 63 | --toRunClustering "NO" \ 64 | --toRunCommonAnalysis "NO" \ 65 | --toRunCallBack "YES" \ 66 | --callBackPath "${pipelinesFolder}/split/NKTMAIT/callBack_NKTMAIT.sh" 67 | 68 | -------------------------------------------------------------------------------- /fig6/GSE144649/6_mapping/submitJob_Mapping.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J submitJob_Mapping 2 | #BSUB -q short 3 | #BSUB -W 1:00 4 | #BSUB -n 1 5 | #BSUB -M 10 6 | #BSUB -R rusage[mem=10] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/6_mapping/submitJob_Mapping.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/6_mapping/submitJob_Mapping.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/6_mapping/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/6_mapping/submitJob_Mapping.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/6_mapping/submitJob_Mapping.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/3.6.0 17 | #____----____----____ 18 | 19 | 20 | runR="Rscript --no-save " 21 | 22 | QUERYDATAFOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE144649/5_extractT" 23 | 24 | CD8_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD8_V6/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD8_V6_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 25 | CD4_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD4_V7/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD4_V7_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 26 | 27 | PIPELINE_FOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/6_mapping" 28 | OUT_ROOT="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE144649/6_mapping" 29 | 30 | qName=highmem 31 | wTime=24:00 32 | cn=1 33 | mem=100 34 | 35 | for dataPath in ${QUERYDATAFOLDER}/*; do 36 | dataFile=$(basename $dataPath) 37 | extension="${dataFile##*.}" 38 | filename="${dataFile%.*}" 39 | referenceDataPath="" 40 | if [ "${filename}" = "CD8" ]; then 41 | referenceDataPath=${CD8_ReferenceDataPath} 42 | fi 43 | if [ "${filename}" = "CD4" ]; then 44 | referenceDataPath=${CD4_ReferenceDataPath} 45 | fi 46 | JOBFOLDER=${OUT_ROOT}/${filename} 47 | if [ ! -d $JOBFOLDER ]; then 48 | mkdir -p $JOBFOLDER 49 | fi 50 | JOBNAME=Mapping_${filename} 51 | if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 52 | rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 53 | fi 54 | bsub \ 55 | -J ${JOBNAME} \ 56 | -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 57 | -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 58 | -cwd ${JOBFOLDER} \ 59 | -q ${qName} \ 60 | -W ${wTime} \ 61 | -n ${cn} \ 62 | -M ${mem} \ 63 | -R rusage[mem=${mem}] \ 64 | -B \ 65 | -N \ 66 | -u ychu2@mdanderson.org \ 67 | /bin/bash -c "module load R/4.0.3; Rscript ${PIPELINE_FOLDER}/Mapping.R -r ${referenceDataPath} -q ${dataPath} -o ${JOBFOLDER}" 68 | done 69 | 70 | -------------------------------------------------------------------------------- /fig6/GSE179994/4_mapping_filter_CD4_Proliferative/submitJob_Mapping.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J submitJob_Mapping 2 | #BSUB -q short 3 | #BSUB -W 3:00 4 | #BSUB -n 1 5 | #BSUB -M 10 6 | #BSUB -R rusage[mem=10] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE179994/4_mapping_filter_CD4_Proliferative/submitJob_Mapping.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE179994/4_mapping_filter_CD4_Proliferative/submitJob_Mapping.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE179994/4_mapping_filter_CD4_Proliferative/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE179994/4_mapping_filter_CD4_Proliferative/submitJob_Mapping.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE179994/4_mapping_filter_CD4_Proliferative/submitJob_Mapping.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/4.0.3 17 | #____----____----____ 18 | 19 | PROJECT_FOLDER=/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7 20 | DATA_FOLDER=${PROJECT_FOLDER}/data 21 | RESULT_FOLDER=${PROJECT_FOLDER}/result 22 | CODE_FOLDER=${PROJECT_FOLDER}/code 23 | PIPELINE_FOLDER=${CODE_FOLDER}/pipeline 24 | SRC_FOLDER=${CODE_FOLDER}/src 25 | KNOWLEDGE_FOLDER=${PROJECT_FOLDER}/knowledge 26 | PIPELINE_NAME=GSE179994__4_mapping_filter_CD4_Proliferative 27 | PIPELINE_PATH_NAME=GSE179994/4_mapping_filter_CD4_Proliferative 28 | PROJECT_NAME=$(basename ${PROJECT_FOLDER}) 29 | 30 | OutDir=$RESULT_FOLDER/$PIPELINE_PATH_NAME 31 | if [ ! -d $OutDir ]; then 32 | mkdir -p $OutDir 33 | fi 34 | 35 | 36 | runR="Rscript --no-save " 37 | 38 | OUT_ROOT=$OutDir 39 | 40 | qName=medium 41 | wTime=24:00 42 | cn=1 43 | mem=100 44 | 45 | dataPath=/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE179994/2_extractTcell_proliferative/CD4ProlifSeuratObj_2022-10-31.rds 46 | dataFile=$(basename $dataPath) 47 | extension="${dataFile##*.}" 48 | 49 | referenceDataPath="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/figureCode/result/0_write_sample_info/Proliferative_2022-10-20.rds" 50 | JOBFOLDER=${OUT_ROOT}/proliferative 51 | if [ ! -d $JOBFOLDER ]; then 52 | mkdir -p $JOBFOLDER 53 | fi 54 | JOBNAME=Mapping_proliferative 55 | if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 56 | rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 57 | fi 58 | bsub \ 59 | -J ${JOBNAME} \ 60 | -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 61 | -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 62 | -cwd ${JOBFOLDER} \ 63 | -q ${qName} \ 64 | -W ${wTime} \ 65 | -n ${cn} \ 66 | -M ${mem} \ 67 | -R rusage[mem=${mem}] \ 68 | -B \ 69 | -N \ 70 | -u ychu2@mdanderson.org \ 71 | /bin/bash -c "module load R/4.0.3; Rscript /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE179994/4_mapping_filter_CD4_Proliferative/Mapping.R -r ${referenceDataPath} -q ${dataPath} -o ${JOBFOLDER}" 72 | 73 | -------------------------------------------------------------------------------- /data_preprocess/NKTMAIT/callBack_NKTMAIT.sh: -------------------------------------------------------------------------------- 1 | ##!/usr/bin/env bash 2 | 3 | projectPath=/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject 4 | analysisPath=${projectPath}/analysis 5 | mainscriptsFolder=${analysisPath}/scripts 6 | pipelinesFolder=${mainscriptsFolder}/pipelines 7 | srcD=${analysisPath}/scripts/src 8 | paramD=${analysisPath}/scripts/params 9 | databaseD='/rsrch3/home/genomic_med/ychu2/share/database' 10 | 11 | runR="Rscript --no-save " 12 | 13 | dataPath=${1} 14 | 15 | rootFolder=$(dirname ${dataPath}) 16 | 17 | # ${runR} ${srcD}/iml_check_bubble-plot.R -d $dataPath -c "NKT_c0_CD8 MAIT-like_c1 MAIT_c2 MAIT-like_c3_SELL NKT_c4_KIR2D" 18 | 19 | ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m /rsrch3/home/genomic_med/ychu2/share/database/Pan-T/Reference/PMID34290406.txt 20 | 21 | # ${runR} ${srcD}/AlluvialPlot.R -l /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/NKGDT_V5/nPC_10/UMAP_dist_0.1_nneighbor_35/p1NKGDT_V4_10_UMAP_dist_0.1_nneighbor_35_CLUSTER_res_0.3/cluster.rds -r $dataPath 22 | # ${runR} ${srcD}/DEG-bubble-plot.R -d $dataPath --DEGs $(dirname ${dataPath})/snn-single-markers.tsv --number 200 23 | # ${runR} ${srcD}/tissue-composition-plot.R -d $dataPath 24 | 25 | 26 | # FracArray=($(seq 0.1 0.1 0.8)) 27 | # for tempFrac in "${FracArray[@]}"; do 28 | # JOBNAME=cd8_c1c7_monocle2_${tempFrac} 29 | # tempFolder=$(dirname $dataPath) 30 | # JOBFOLDER=${tempFolder} 31 | # if [ ! -d $tempFolder ]; then 32 | # mkdir -p $tempFolder 33 | # fi 34 | # if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 35 | # rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 36 | # fi 37 | # bsub \ 38 | # -J ${JOBNAME} \ 39 | # -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 40 | # -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 41 | # -cwd ${JOBFOLDER} \ 42 | # -q long \ 43 | # -W 120:00 \ 44 | # -n 1 \ 45 | # -M 800\ 46 | # -R rusage[mem=800] \ 47 | # -B \ 48 | # -N \ 49 | # -u ychu2@mdanderson.org \ 50 | # /bin/bash -c "module load python/3.7.3-anaconda; module load R/3.6.0; ${runR} ${srcD}/monocle2.R -d ${dataPath} -f ${tempFrac}" 51 | # done 52 | 53 | # JOBNAME=cd8_c1c7_monocle3 54 | # tempFolder=$(dirname $dataPath) 55 | # JOBFOLDER=${tempFolder} 56 | # if [ ! -d $tempFolder ]; then 57 | # mkdir -p $tempFolder 58 | # fi 59 | # if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 60 | # rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 61 | # fi 62 | # bsub \ 63 | # -J ${JOBNAME} \ 64 | # -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 65 | # -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 66 | # -cwd ${JOBFOLDER} \ 67 | # -q long \ 68 | # -W 120:00 \ 69 | # -n 1 \ 70 | # -M 500\ 71 | # -R rusage[mem=500] \ 72 | # -B \ 73 | # -N \ 74 | # -u ychu2@mdanderson.org \ 75 | # /bin/bash -c "module load python/3.7.3-anaconda; module load R/3.6.0; ${runR} ${srcD}/trajectory_monocle3_from_seurat.r -d ${dataPath} -s $(dirname ${dataPath})/startCells.txt" 76 | -------------------------------------------------------------------------------- /fig6/GSE169246/4_mapping/submitJob_Mapping_.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J mapping 2 | #BSUB -q short 3 | #BSUB -W 1:00 4 | #BSUB -n 1 5 | #BSUB -M 10 6 | #BSUB -R rusage[mem=10] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/4_mapping/mapping.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/4_mapping/mapping.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/4_mapping/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/4_mapping/mapping.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/4_mapping/mapping.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/3.6.0 17 | #____----____----____----____----____----____----____----____----____----____----____----____----____----____----____----____---- 18 | 19 | 20 | runR="Rscript --no-save " 21 | 22 | QUERYDATAFOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE169246/TCells/forMapping" 23 | 24 | CD8_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD8_V6/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD8_V6_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 25 | CD4_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD4_V7/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD4_V7_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 26 | 27 | PIPELINE_FOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/4_mapping" 28 | 29 | qName=long 30 | wTime=100:00 31 | cn=1 32 | mem=300 33 | 34 | for dataPath in ${QUERYDATAFOLDER}/*; do 35 | dataFile=$(basename $dataPath) 36 | extension="${dataFile##*.}" 37 | filename="${dataFile%.*}" 38 | referenceDataPath="" 39 | if [ "${filename}" = "CD8" ]; then 40 | referenceDataPath=${CD8_ReferenceDataPath} 41 | fi 42 | if [ "${filename}" = "CD4" ]; then 43 | referenceDataPath=${CD4_ReferenceDataPath} 44 | fi 45 | JOBFOLDER=/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE169246/MappingResult/${filename} 46 | if [ ! -d $JOBFOLDER ]; then 47 | mkdir -p $JOBFOLDER 48 | fi 49 | JOBNAME=Mapping_${filename} 50 | if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 51 | rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 52 | fi 53 | bsub \ 54 | -J ${JOBNAME} \ 55 | -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 56 | -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 57 | -cwd ${JOBFOLDER} \ 58 | -q ${qName} \ 59 | -W ${wTime} \ 60 | -n ${cn} \ 61 | -M ${mem} \ 62 | -R rusage[mem=${mem}] \ 63 | -B \ 64 | -N \ 65 | -u ychu2@mdanderson.org \ 66 | /bin/bash -c "module load R/4.0.3; Rscript ${PIPELINE_FOLDER}/Mapping.R -r ${referenceDataPath} -q ${dataPath} -o ${JOBFOLDER}" 67 | done 68 | 69 | -------------------------------------------------------------------------------- /fig6/GSE169246/4_mapping_multimap/submitJob_Mapping.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J submitJob_Mapping 2 | #BSUB -q short 3 | #BSUB -W 1:00 4 | #BSUB -n 1 5 | #BSUB -M 1 6 | #BSUB -R rusage[mem=1] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/4_mapping_multimap/submitJob_Mapping.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/4_mapping_multimap/submitJob_Mapping.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/4_mapping_multimap/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/4_mapping_multimap/submitJob_Mapping.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/4_mapping_multimap/submitJob_Mapping.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/3.6.0 17 | #____----____----____ 18 | 19 | 20 | runR="Rscript --no-save " 21 | QUERYDATAFOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE169246/TCells/forMapping" 22 | CD8_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD8_V6/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD8_V6_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 23 | CD4_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD4_V7/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD4_V7_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 24 | PIPELINE_FOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/4_mapping_multimap" 25 | qName=long 26 | wTime=100:00 27 | cn=1 28 | mem=300 29 | 30 | for dataPath in ${QUERYDATAFOLDER}/*; do 31 | dataFile=$(basename $dataPath) 32 | extension="${dataFile##*.}" 33 | filename="${dataFile%.*}" 34 | referenceDataPath="" 35 | if [ "${filename}" = "CD8" ]; then 36 | referenceDataPath=${CD8_ReferenceDataPath} 37 | fi 38 | if [ "${filename}" = "CD4" ]; then 39 | referenceDataPath=${CD4_ReferenceDataPath} 40 | fi 41 | 42 | JOBFOLDER=/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE169246/MappingResult_MultiMap/${filename} 43 | if [ ! -d $JOBFOLDER ]; then 44 | mkdir -p $JOBFOLDER 45 | fi 46 | 47 | JOBNAME=Mapping_${filename} 48 | if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 49 | rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 50 | fi 51 | bsub \ 52 | -J ${JOBNAME} \ 53 | -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 54 | -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 55 | -cwd ${JOBFOLDER} \ 56 | -q ${qName} \ 57 | -W ${wTime} \ 58 | -n ${cn} \ 59 | -M ${mem} \ 60 | -R rusage[mem=${mem}] \ 61 | -B \ 62 | -N \ 63 | -u ychu2@mdanderson.org \ 64 | /bin/bash -c "module load R/4.0.3; Rscript ${PIPELINE_FOLDER}/Mapping.R -r ${referenceDataPath} -q ${dataPath} -o ${JOBFOLDER}" 65 | done 66 | 67 | -------------------------------------------------------------------------------- /fig6/GSE173351/7_mapping_filter/submitJob_Mapping.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J submitJob_Mapping 2 | #BSUB -q short 3 | #BSUB -W 1:00 4 | #BSUB -n 1 5 | #BSUB -M 10 6 | #BSUB -R rusage[mem=10] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/7_mapping_filter/submitJob_Mapping.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/7_mapping_filter/submitJob_Mapping.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/7_mapping_filter/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/7_mapping_filter/submitJob_Mapping.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/7_mapping_filter/submitJob_Mapping.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/3.6.0 17 | #____----____----____ 18 | 19 | runR="Rscript --no-save " 20 | QUERYDATAFOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE173351/6_extractT" 21 | CD8_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD8_V6/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD8_V6_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 22 | CD4_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD4_V7/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD4_V7_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 23 | 24 | PIPELINE_FOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/7_mapping_filter" 25 | OUT_ROOT="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE173351/7_mapping_filter" 26 | 27 | qName=highmem 28 | wTime=24:00 29 | cn=1 30 | mem=100 31 | 32 | for dataPath in ${QUERYDATAFOLDER}/*; do 33 | dataFile=$(basename $dataPath) 34 | extension="${dataFile##*.}" 35 | filename="${dataFile%.*}" 36 | referenceDataPath="" 37 | if [ "${filename}" = "CD8" ]; then 38 | referenceDataPath=${CD8_ReferenceDataPath} 39 | fi 40 | if [ "${filename}" = "CD4" ]; then 41 | referenceDataPath=${CD4_ReferenceDataPath} 42 | fi 43 | JOBFOLDER=${OUT_ROOT}/${filename} 44 | if [ ! -d $JOBFOLDER ]; then 45 | mkdir -p $JOBFOLDER 46 | fi 47 | JOBNAME=Mapping_${filename} 48 | if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 49 | rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 50 | fi 51 | bsub \ 52 | -J ${JOBNAME} \ 53 | -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 54 | -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 55 | -cwd ${JOBFOLDER} \ 56 | -q ${qName} \ 57 | -W ${wTime} \ 58 | -n ${cn} \ 59 | -M ${mem} \ 60 | -R rusage[mem=${mem}] \ 61 | -B \ 62 | -N \ 63 | -u ychu2@mdanderson.org \ 64 | /bin/bash -c "module load R/4.0.3; Rscript ${PIPELINE_FOLDER}/Mapping.R -r ${referenceDataPath} -q ${dataPath} -o ${JOBFOLDER}" 65 | done 66 | 67 | -------------------------------------------------------------------------------- /fig6/GSE179994/4_mapping_filter/submitJob_Mapping.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J submitJob_Mapping 2 | #BSUB -q short 3 | #BSUB -W 1:00 4 | #BSUB -n 1 5 | #BSUB -M 10 6 | #BSUB -R rusage[mem=10] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE179994/4_mapping_filter/submitJob_Mapping.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE179994/4_mapping_filter/submitJob_Mapping.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE179994/4_mapping_filter/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE179994/4_mapping_filter/submitJob_Mapping.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE179994/4_mapping_filter/submitJob_Mapping.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/3.6.0 17 | #____----____----____ 18 | 19 | 20 | runR="Rscript --no-save " 21 | QUERYDATAFOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE179994/2_extractTcell_proliferative/ForMapping" 22 | CD8_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD8_V6/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD8_V6_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 23 | CD4_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD4_V7/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD4_V7_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 24 | 25 | PIPELINE_FOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE179994/4_mapping_filter" 26 | 27 | qName=e40long 28 | wTime=100:00 29 | cn=1 30 | mem=300 31 | 32 | for dataPath in ${QUERYDATAFOLDER}/*; do 33 | dataFile=$(basename $dataPath) 34 | extension="${dataFile##*.}" 35 | filename="${dataFile%.*}" 36 | referenceDataPath="" 37 | if [ "${filename}" = "CD8" ]; then 38 | referenceDataPath=${CD8_ReferenceDataPath} 39 | fi 40 | if [ "${filename}" = "CD4" ]; then 41 | referenceDataPath=${CD4_ReferenceDataPath} 42 | fi 43 | JOBFOLDER=/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE179994/MappingResult_filter/${filename} 44 | if [ ! -d $JOBFOLDER ]; then 45 | mkdir -p $JOBFOLDER 46 | fi 47 | JOBNAME=Mapping_${filename} 48 | if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 49 | rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 50 | fi 51 | bsub \ 52 | -J ${JOBNAME} \ 53 | -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 54 | -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 55 | -cwd ${JOBFOLDER} \ 56 | -q ${qName} \ 57 | -W ${wTime} \ 58 | -n ${cn} \ 59 | -M ${mem} \ 60 | -R rusage[mem=${mem}] \ 61 | -B \ 62 | -N \ 63 | -u ychu2@mdanderson.org \ 64 | /bin/bash -c "module load R/4.0.3; Rscript ${PIPELINE_FOLDER}/Mapping.R -r ${referenceDataPath} -q ${dataPath} -o ${JOBFOLDER}" 65 | done 66 | 67 | -------------------------------------------------------------------------------- /fig6/GSE144649/6_mapping_multimap/submitJob_Mapping.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J submitJob_Mapping 2 | #BSUB -q short 3 | #BSUB -W 1:00 4 | #BSUB -n 1 5 | #BSUB -M 10 6 | #BSUB -R rusage[mem=10] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/4_mapping_multimap/submitJob_Mapping.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/4_mapping_multimap/submitJob_Mapping.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/4_mapping_multimap/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/4_mapping_multimap/submitJob_Mapping.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/4_mapping_multimap/submitJob_Mapping.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/3.6.0 17 | #____----____----____ 18 | 19 | runR="Rscript --no-save " 20 | 21 | QUERYDATAFOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE144649/5_extractT" 22 | 23 | CD8_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD8_V6/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD8_V6_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 24 | CD4_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD4_V7/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD4_V7_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 25 | 26 | PIPELINE_FOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/6_mapping_multimap" 27 | OUTFOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE144649/6_mapping_multimap" 28 | 29 | qName=highmem 30 | wTime=100:00 31 | cn=1 32 | mem=300 33 | 34 | for dataPath in ${QUERYDATAFOLDER}/*; do 35 | dataFile=$(basename $dataPath) 36 | extension="${dataFile##*.}" 37 | filename="${dataFile%.*}" 38 | referenceDataPath="" 39 | if [ "${filename}" = "CD8" ]; then 40 | referenceDataPath=${CD8_ReferenceDataPath} 41 | fi 42 | if [ "${filename}" = "CD4" ]; then 43 | referenceDataPath=${CD4_ReferenceDataPath} 44 | fi 45 | JOBFOLDER=$PIPELINE_FOLDER/${filename} 46 | if [ ! -d $JOBFOLDER ]; then 47 | mkdir -p $JOBFOLDER 48 | fi 49 | JOBNAME=Mapping_${filename} 50 | if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 51 | rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 52 | fi 53 | bsub \ 54 | -J ${JOBNAME} \ 55 | -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 56 | -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 57 | -cwd ${JOBFOLDER} \ 58 | -q ${qName} \ 59 | -W ${wTime} \ 60 | -n ${cn} \ 61 | -M ${mem} \ 62 | -R rusage[mem=${mem}] \ 63 | -B \ 64 | -N \ 65 | -u ychu2@mdanderson.org \ 66 | /bin/bash -c "module load R/4.0.3; Rscript ${PIPELINE_FOLDER}/Mapping.R -r ${referenceDataPath} -q ${dataPath} -o ${JOBFOLDER}" 67 | done 68 | -------------------------------------------------------------------------------- /fig6/GSE173351/7_mapping_multimap/submitJob_Mapping.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J submitJob_Mapping 2 | #BSUB -q short 3 | #BSUB -W 1:00 4 | #BSUB -n 1 5 | #BSUB -M 10 6 | #BSUB -R rusage[mem=10] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/7_mapping_multimap/submitJob_Mapping.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/7_mapping_multimap/submitJob_Mapping.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/7_mapping_multimap/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/7_mapping_multimap/submitJob_Mapping.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/7_mapping_multimap/submitJob_Mapping.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/3.6.0 17 | #____----____----____ 18 | 19 | runR="Rscript --no-save " 20 | 21 | QUERYDATAFOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE173351/6_extractT" 22 | 23 | CD8_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD8_V6/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD8_V6_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 24 | CD4_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD4_V7/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD4_V7_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 25 | 26 | 27 | PIPELINE_FOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/7_mapping_multimap" 28 | OUT_ROOT="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE173351/7_mapping_multimap" 29 | 30 | qName=highmem 31 | wTime=100:00 32 | cn=1 33 | mem=300 34 | 35 | for dataPath in ${QUERYDATAFOLDER}/*; do 36 | dataFile=$(basename $dataPath) 37 | extension="${dataFile##*.}" 38 | filename="${dataFile%.*}" 39 | referenceDataPath="" 40 | if [ "${filename}" = "CD8" ]; then 41 | referenceDataPath=${CD8_ReferenceDataPath} 42 | fi 43 | if [ "${filename}" = "CD4" ]; then 44 | referenceDataPath=${CD4_ReferenceDataPath} 45 | fi 46 | JOBFOLDER=${OUT_ROOT}/${filename} 47 | if [ ! -d $JOBFOLDER ]; then 48 | mkdir -p $JOBFOLDER 49 | fi 50 | JOBNAME=Mapping_${filename} 51 | if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 52 | rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 53 | fi 54 | bsub \ 55 | -J ${JOBNAME} \ 56 | -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 57 | -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 58 | -cwd ${JOBFOLDER} \ 59 | -q ${qName} \ 60 | -W ${wTime} \ 61 | -n ${cn} \ 62 | -M ${mem} \ 63 | -R rusage[mem=${mem}] \ 64 | -B \ 65 | -N \ 66 | -u ychu2@mdanderson.org \ 67 | /bin/bash -c "module load R/4.0.3; Rscript ${PIPELINE_FOLDER}/Mapping.R -r ${referenceDataPath} -q ${dataPath} -o ${JOBFOLDER}" 68 | done 69 | -------------------------------------------------------------------------------- /data_preprocess/proliferative/p1_Proliferative_V4.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J p1_Proliferative_V4 2 | #BSUB -q medium 3 | #BSUB -W 24:00 4 | #BSUB -n 1 5 | #BSUB -M 500 6 | #BSUB -R rusage[mem=500] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/proliferative/p1_Proliferative_V4.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/proliferative/p1_Proliferative_V4.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/proliferative/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/proliferative/p1_Proliferative_V4.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/pipelines/split/proliferative/p1_Proliferative_V4.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/3.6.0 17 | #____----____----____----____----____----____----____----____----____----____----____----____----____----____----____----____---- 18 | 19 | 20 | projectPath=/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject 21 | DataD=${projectPath}/data/T/filterd 22 | 23 | analysisPath=${projectPath}/analysis 24 | mainscriptsFolder=${analysisPath}/scripts 25 | pipelinesFolder=${mainscriptsFolder}/pipelines 26 | srcD=${analysisPath}/scripts/src 27 | paramD=${analysisPath}/scripts/params 28 | databaseD='/rsrch3/home/genomic_med/ychu2/share/database' 29 | 30 | ResD=${analysisPath}/validate/Proliferative_V4 31 | if [ ! -d $ResD ]; then 32 | mkdir -p $ResD 33 | fi 34 | 35 | runR="Rscript --no-save " 36 | 37 | 38 | ##do the job 39 | echo "load data" 40 | 41 | #' submit multile jobs to normalize and scale separately ############################################# 42 | #${runR} ${srcD}/load-scaleTObjectList.R -d ${DataD} -o ${ResD}/PCA.rds 43 | 44 | ############################################################################### 45 | #' split subcluster '# 46 | # ${runR} ${srcD}/split-cluster.R -i /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD8_V6/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD8_V6_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds -o ${ResD}/data.rds -c "7;1" 47 | ############################################################################### 48 | 49 | # ${runR} ${srcD}/RunPCA_RPCA.R -d ${ResD}/data.rds -o ${ResD}/pca.rds 50 | # ${runR} ${srcD}/visualize_PCAgenes.R -d ${ResD}/pca.rds 51 | 52 | ${HOME}/share/UMAP_CLUSTER_JOBS_EMBEDED/run.sh \ 53 | --inData ${ResD}/pca.rds \ 54 | --reduction pca \ 55 | --mainscriptsFolder ${mainscriptsFolder} \ 56 | --parentJobName "p1_Proliferative_V4" \ 57 | --npcArray "15" \ 58 | --UMAPDistArray "0.1" \ 59 | --ClusterResArray "0.3" \ 60 | --NneighborsArray "35" \ 61 | --toRunUMAP "NO" \ 62 | --toRunClustering "NO" \ 63 | --toRunCommonAnalysis "NO" \ 64 | --toRunCallBack "YES" \ 65 | --callBackPath "${pipelinesFolder}/split/proliferative/callBack_Proliferative.sh" 66 | -------------------------------------------------------------------------------- /fig6/GSE179994/4_mapping/submitJob_Mapping.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J submitJob_Mapping 2 | #BSUB -q short 3 | #BSUB -W 1:00 4 | #BSUB -n 1 5 | #BSUB -M 10 6 | #BSUB -R rusage[mem=10] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE179994/4_mapping/submitJob_Mapping.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE179994/4_mapping/submitJob_Mapping.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE179994/4_mapping/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE179994/4_mapping/submitJob_Mapping.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE179994/4_mapping/submitJob_Mapping.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/3.6.0 17 | #____----____----____----____----____----____----____----____----____----____----____----____----____----____----____----____---- 18 | 19 | runR="Rscript --no-save " 20 | 21 | QUERYDATAFOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE179994/TCells/ForMapping" 22 | 23 | CD8_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD8_V6/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD8_V6_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 24 | CD4_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD4_V7/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD4_V7_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 25 | 26 | PIPELINE_FOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE179994/4_mapping" 27 | 28 | qName=long 29 | wTime=100:00 30 | cn=1 31 | mem=300 32 | 33 | for dataPath in ${QUERYDATAFOLDER}/*; do 34 | dataFile=$(basename $dataPath) 35 | extension="${dataFile##*.}" 36 | filename="${dataFile%.*}" 37 | referenceDataPath="" 38 | if [ "${filename}" = "CD8" ]; then 39 | referenceDataPath=${CD8_ReferenceDataPath} 40 | fi 41 | if [ "${filename}" = "CD4" ]; then 42 | referenceDataPath=${CD4_ReferenceDataPath} 43 | fi 44 | JOBFOLDER=/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE179994/MappingResult/${filename} 45 | if [ ! -d $JOBFOLDER ]; then 46 | mkdir -p $JOBFOLDER 47 | fi 48 | JOBNAME=Mapping_${filename} 49 | if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 50 | rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 51 | fi 52 | bsub \ 53 | -J ${JOBNAME} \ 54 | -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 55 | -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 56 | -cwd ${JOBFOLDER} \ 57 | -q ${qName} \ 58 | -W ${wTime} \ 59 | -n ${cn} \ 60 | -M ${mem} \ 61 | -R rusage[mem=${mem}] \ 62 | -B \ 63 | -N \ 64 | -u ychu2@mdanderson.org \ 65 | /bin/bash -c "module load R/4.0.3; Rscript ${PIPELINE_FOLDER}/Mapping.R -r ${referenceDataPath} -q ${dataPath} -o ${JOBFOLDER}" 66 | done 67 | 68 | -------------------------------------------------------------------------------- /fig6/GSE144649/6_mapping_filter_genes/submitJob_Mapping.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J submitJob_Mapping 2 | #BSUB -q short 3 | #BSUB -W 1:00 4 | #BSUB -n 1 5 | #BSUB -M 10 6 | #BSUB -R rusage[mem=10] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/6_mapping_filter_genes/submitJob_Mapping.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/6_mapping_filter_genes/submitJob_Mapping.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/6_mapping_filter_genes/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/6_mapping_filter_genes/submitJob_Mapping.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/6_mapping_filter_genes/submitJob_Mapping.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/3.6.0 17 | #____----____----____ 18 | 19 | runR="Rscript --no-save " 20 | QUERYDATAFOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE144649/5_extractT" 21 | 22 | CD8_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD8_V6/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD8_V6_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 23 | CD4_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD4_V7/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD4_V7_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 24 | 25 | PIPELINE_FOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE144649/6_mapping_filter_genes/" 26 | OUT_ROOT="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE144649/6_mapping_filter" 27 | 28 | qName=highmem 29 | wTime=24:00 30 | cn=1 31 | mem=100 32 | 33 | for dataPath in ${QUERYDATAFOLDER}/*; do 34 | dataFile=$(basename $dataPath) 35 | extension="${dataFile##*.}" 36 | filename="${dataFile%.*}" 37 | referenceDataPath="" 38 | if [ "${filename}" = "CD8" ]; then 39 | referenceDataPath=${CD8_ReferenceDataPath} 40 | fi 41 | if [ "${filename}" = "CD4" ]; then 42 | referenceDataPath=${CD4_ReferenceDataPath} 43 | fi 44 | JOBFOLDER=${OUT_ROOT}/${filename} 45 | if [ ! -d $JOBFOLDER ]; then 46 | mkdir -p $JOBFOLDER 47 | fi 48 | JOBNAME=Mapping_${filename} 49 | if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 50 | rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 51 | fi 52 | bsub \ 53 | -J ${JOBNAME} \ 54 | -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 55 | -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 56 | -cwd ${JOBFOLDER} \ 57 | -q ${qName} \ 58 | -W ${wTime} \ 59 | -n ${cn} \ 60 | -M ${mem} \ 61 | -R rusage[mem=${mem}] \ 62 | -B \ 63 | -N \ 64 | -u ychu2@mdanderson.org \ 65 | /bin/bash -c "module load R/4.0.3; Rscript ${PIPELINE_FOLDER}/Mapping.R -r ${referenceDataPath} -q ${dataPath} -o ${JOBFOLDER}" 66 | done 67 | 68 | -------------------------------------------------------------------------------- /data_preprocess/CD4/callBack_CD4.sh: -------------------------------------------------------------------------------- 1 | ##!/usr/bin/env bash 2 | 3 | projectPath=/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject 4 | analysisPath=${projectPath}/analysis 5 | mainscriptsFolder=${analysisPath}/scripts 6 | pipelinesFolder=${mainscriptsFolder}/pipelines 7 | srcD=${analysisPath}/scripts/src 8 | paramD=${analysisPath}/scripts/params 9 | databaseD='/rsrch3/home/genomic_med/ychu2/share/database' 10 | 11 | runR="Rscript --no-save " 12 | 13 | dataPath=${1} 14 | 15 | # ${runR} ${srcD}/iml_check_bubble-plot.R -d $dataPath -c "CD4_c0_Tcm CD4_c1_Treg CD4_c2_Naive_RP CD4_c3_Tfh CD4_c4_Stressed CD4_c5_Cytotoxic CD4_c6_Naive_FHIT CD4_c7_Naive_TCEA3 CD4_c8_Th17 CD4_c9_Naive_TCF7_SLC40A1 CD4_c10_Naive_TCF7_ANKRD55 CD4_c11_IFN" 16 | 17 | echo "begin" 18 | ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m /rsrch3/home/genomic_med/ychu2/share/database/Pan-T/Reference/PMID34290406.txt 19 | 20 | # rootFolder=$(dirname ${dataPath}) 21 | # tempMarkerFolder=/rsrch3/home/genomic_med/ychu2/configs/scSeqs/database/Markers/CD4/Genelistbyfunction 22 | # for markerFile in $(ls $tempMarkerFolder); do 23 | # JOBNAME=job_${markerFile%%.*} 24 | # tempFolder=${rootFolder} 25 | # JOBFOLDER=${tempFolder} 26 | # if [ ! -d $tempFolder ]; then 27 | # mkdir -p $tempFolder 28 | # fi 29 | # if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 30 | # rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 31 | # fi 32 | # bsub \ 33 | # -J ${JOBNAME} \ 34 | # -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 35 | # -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 36 | # -cwd ${JOBFOLDER} \ 37 | # -q short \ 38 | # -W 3:00 \ 39 | # -n 1 \ 40 | # -M 100\ 41 | # -R rusage[mem=100] \ 42 | # -B \ 43 | # -N \ 44 | # -u ychu2@mdanderson.org \ 45 | # /bin/bash -c "module load python/3.7.3-anaconda; module load R/3.6.0; ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${tempMarkerFolder}/${markerFile}; ${runR} ${srcD}/stack-ViolinPlot.R -d $dataPath -m ${tempMarkerFolder}/${markerFile}; ${runR} ${srcD}/feature-plot.R -d $dataPath -o $(dirname ${dataPath})/featureplot/${markerFile%%.*} -c ${paramD}/feature-plot-origin.json -m ${tempMarkerFolder}/${markerFile}" 46 | # done 47 | 48 | # ${runR} ${srcD}/marker-classification.R -d $(dirname ${dataPath})/snn-single-markers.tsv -m /rsrch3/home/genomic_med/ychu2/configs/scSeqs/database/Markers/CD4/Genelistbyfunction 49 | 50 | # ${runR} ${srcD}/tissue-composition-plot.R -d $dataPath 51 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/Markers/CD4/markers/CD4_naive_clusters_comparison.txt 52 | 53 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/TCD4.txt 54 | # ${runR} ${srcD}/stack-ViolinPlot.R -d $dataPath -m ${databaseD}/Pan-T/CD4/CD4Markers.txt 55 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/Pan-T/CD4/CD4Markers.txt 56 | # ${runR} ${srcD}/stack-ViolinPlot.R -d $dataPath -m ${databaseD}/Pan-T/CD4/CD4Markers.txt 57 | # ${runR} ${srcD}/monocleForTest.R -d $dataPath -n 8 58 | -------------------------------------------------------------------------------- /data_preprocess/0_run_seurat_pipeline/RunUMAPJobs.sh: -------------------------------------------------------------------------------- 1 | ##!/usr/bin/env bash 2 | 3 | module load python/3.7.3-anaconda 4 | module load R/4.0.3 5 | 6 | mainscriptFolder=${1} 7 | inData=${2} 8 | currentFolder=${3} 9 | dist=${4} 10 | reduction=${5} 11 | npc=${6} 12 | parentJobName=${7} 13 | ClusterArray=${8} 14 | 15 | IFS=';' read -a ClusterArray <<< "$ClusterArray" 16 | 17 | nneighbors=${9} 18 | toRunUMAP=${10} 19 | toRunClustering=${11} 20 | toRunCommonAnalysis=${12} 21 | toRunCallBack=${13} 22 | callBackPath=${14} 23 | 24 | echo "RunUMAPJobs parameters: 25 | mainscriptFolder=${mainscriptFolder} 26 | inData=${inData} 27 | currentFolder=${currentFolder} 28 | dist=${dist} 29 | reduction=${reduction} 30 | npc=${npc} 31 | parentJobName=${parentJobName} 32 | ClusterArray[@]=${ClusterArray[@]} 33 | nneighbors=${nneighbors} 34 | toRunUMAP=${toRunUMAP} 35 | toRunClustering=${toRunClustering} 36 | toRunCommonAnalysis=${toRunCommonAnalysis} 37 | toRunCallBack=${toRunCallBack} 38 | callBackPath=${callBackPath} 39 | " 40 | 41 | srcD=${HOME}/configs/public/pipeline/UMAP_CLUSTER_JOBS_EMBEDED 42 | runR="Rscript --no-save " 43 | 44 | if [ $toRunUMAP = "YES" ]; then 45 | ${runR} ${srcD}/RunUMAP.R -d ${inData} -o ${currentFolder}/umap.rds -r ${reduction} -n ${npc} -i ${dist} -e ${nneighbors} 46 | fi 47 | 48 | inData=${currentFolder}/umap.rds 49 | mmForInData=`du --apparent-size --block-size=1000000000 ${inData} | awk '{print $1}'` # GB 50 | if [ "$mmForInData" -gt "1200" ]; then 51 | mmForInData=350 52 | fi 53 | 54 | hoursTime=$(( ${mmForInData} + 24 )) 55 | wTime="${hoursTime}:00" 56 | 57 | qName="e80short" 58 | if [ "${hoursTime}" -lt "2" ]; then 59 | qName="e80short" 60 | else if [ "${hoursTime}" -lt "24" ]; then 61 | qName="e80medium" 62 | else if [ "${hoursTime}" -lt "120" ]; then 63 | qName="highmem" 64 | else 65 | qName="vhighmem" 66 | fi 67 | fi 68 | fi 69 | 70 | if [ "$toRunClustering" == "NO" ]; then 71 | mmForInData=320 72 | qName="highmem" 73 | wTime="72:00" 74 | fi 75 | 76 | rootFolder=$currentFolder 77 | for res in "${ClusterArray[@]}"; do 78 | JOBNAME=${parentJobName}_CLUSTER_res_${res} 79 | tempFolder=${rootFolder}/${JOBNAME} 80 | if [ ! -d $tempFolder ]; then 81 | mkdir -p $tempFolder 82 | fi 83 | JOBFOLDER=${tempFolder} 84 | if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 85 | rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 86 | fi 87 | 88 | bsub \ 89 | -J ${JOBNAME} \ 90 | -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 91 | -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 92 | -cwd ${JOBFOLDER} \ 93 | -q ${qName} \ 94 | -W ${wTime} \ 95 | -n 1 \ 96 | -M $(( ${mmForInData} + 200 ))\ 97 | -R rusage[mem=$(( ${mmForInData} + 200 ))] \ 98 | -B \ 99 | -N \ 100 | -u ychu2@mdanderson.org \ 101 | /bin/bash -c "${HOME}/configs/public/pipeline/UMAP_CLUSTER_JOBS_EMBEDED/FindClusterJobs.sh ${mainscriptFolder} ${inData} ${tempFolder} ${res} ${reduction} ${npc} ${JOBNAME} ${toRunClustering} ${toRunCommonAnalysis} ${toRunCallBack} ${callBackPath}" 102 | done 103 | -------------------------------------------------------------------------------- /fig6/GSE179994/2_extractTcell/extract.R: -------------------------------------------------------------------------------- 1 | #-------------------------------------------------------------- 2 | # filename : extract.R 3 | # Date : 2022-02-16 4 | # contributor : Yanshuo Chu 5 | # function: extract 6 | #-------------------------------------------------------------- 7 | 8 | print('<==== extract.R ====>') 9 | rm(list=ls()) 10 | 11 | library(data.table) 12 | library(Seurat) 13 | library(ggplot2) 14 | library(tidyverse) 15 | library(harmony) 16 | library(ggstatsplot) 17 | 18 | seuratObj <- readRDS("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE179994/merged/merged.obj") 19 | 20 | Idents(seuratObj) <- seuratObj$celltype 21 | CD4SeuratObj <- subset(seuratObj, idents = "CD4") 22 | saveRDS(CD4SeuratObj, file.path("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE179994/", paste0('CD4SeuratObj', "_", Sys.Date(), '.rds'))) 23 | CD8SeuratObj <- subset(seuratObj, idents = "CD8") 24 | saveRDS(CD8SeuratObj, file.path("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE179994/", paste0('CD8SeuratObj', "_", Sys.Date(), '.rds'))) 25 | 26 | ## md <- as_tibble(seuratObj@meta.data) 27 | ## md %>% 28 | ## group_by(celltype, cluster) %>% 29 | ## count %>% 30 | ## as.data.frame 31 | 32 | 33 | clinicT <- read_tsv("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/data/GSE179994/ClinicData.txt") %>% 34 | filter(`Treatment Hx` %in% c("On treatment")) 35 | noResponseSamples <- clinicT %>% 36 | filter(Response == "No") %>% 37 | pull(`Sample Name`) 38 | ResponseSamples <- clinicT %>% 39 | filter(Response == "Yes") %>% 40 | pull(`Sample Name`) 41 | 42 | md <- seuratObj@meta.data 43 | TotalSampleCellNum <- md %>% 44 | group_by(sample) %>% 45 | count() 46 | 47 | totalT <- c() 48 | for(tempCluster in unique(md$cluster)){ 49 | TNR <- md %>% 50 | filter(cluster == tempCluster) %>% 51 | group_by(sample) %>% 52 | count() %>% 53 | filter(sample %in% c(noResponseSamples, ResponseSamples)) 54 | 55 | TNR$Frac <- 0.0 56 | TNR$Frac <- TNR$n / TotalSampleCellNum$n[match(TNR$sample, TotalSampleCellNum$sample)] 57 | 58 | TNR$isResponse <- "NO" 59 | TNR$isResponse[TNR$sample %in% ResponseSamples] <- "YES" 60 | 61 | TNR$cluster <- tempCluster 62 | 63 | totalT <- bind_rows(totalT, TNR) 64 | } 65 | 66 | 67 | g <- totalT %>% 68 | ggstatsplot::grouped_ggbetweenstats( 69 | data = ., 70 | x = isResponse, 71 | y = Frac, 72 | grouping.var = cluster, 73 | xlab = "", 74 | ylab = "Sample fraction", 75 | ## pairwise.display = "aiwl", # display only significant pairwise comparisons 76 | p.adjust.method = "fdr", # adjust p-values for multiple tests using this method 77 | ggtheme = theme_classic(), 78 | package = "ggsci", 79 | palette = "default_jco", 80 | plotgrid.args = list(ncol = 1)) 81 | 82 | figurePath <- file.path("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE179994/2_extractTcell/outs") 83 | if(!dir.exists(figurePath)){ 84 | dir.create(figurePath, recursive = T) 85 | } 86 | setwd(figurePath) 87 | 88 | ggsave(file.path(paste0("response_bar.pdf")), g, width = 200, height = 1200, units = "mm") 89 | -------------------------------------------------------------------------------- /fig6/GSE169246/4_mapping_filter/submitJob_Mapping.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J submitJob_Mapping 2 | #BSUB -q short 3 | #BSUB -W 1:00 4 | #BSUB -n 1 5 | #BSUB -M 10 6 | #BSUB -R rusage[mem=10] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/4_mapping_filter/submitJob_Mapping.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/4_mapping_filter/submitJob_Mapping.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/4_mapping_filter/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/4_mapping_filter/submitJob_Mapping.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/4_mapping_filter/submitJob_Mapping.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/3.6.0 17 | #____----____----____ 18 | 19 | runR="Rscript --no-save " 20 | 21 | QUERYDATAFOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE169246/TCells/forMapping" 22 | CD8_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD8_V6/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD8_V6_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 23 | CD4_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD4_V7/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD4_V7_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 24 | P_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/Tcellproject/analysis/validate/Proliferative_V4/nPC_15/UMAP_dist_0.1_nneighbor_35/p1_Proliferative_V4_UMAP_dist_0.1_nneighbor_35_CLUSTER_res_0.3/cluster.rds" 25 | 26 | PIPELINE_FOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/4_mapping_filter" 27 | 28 | qName=e80long 29 | wTime=100:00 30 | cn=1 31 | mem=300 32 | 33 | for dataPath in ${QUERYDATAFOLDER}/*; do 34 | dataFile=$(basename $dataPath) 35 | extension="${dataFile##*.}" 36 | filename="${dataFile%.*}" 37 | referenceDataPath="" 38 | if [ "${filename}" = "CD8" ]; then 39 | referenceDataPath=${CD8_ReferenceDataPath} 40 | fi 41 | if [ "${filename}" = "CD4" ]; then 42 | referenceDataPath=${CD4_ReferenceDataPath} 43 | fi 44 | if [ "${filename}" = "P" ]; then 45 | referenceDataPath=${P_ReferenceDataPath} 46 | fi 47 | JOBFOLDER=/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE169246/MappingResult_filter/${filename} 48 | if [ ! -d $JOBFOLDER ]; then 49 | mkdir -p $JOBFOLDER 50 | fi 51 | JOBNAME=Mapping_${filename}_169246 52 | if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 53 | rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 54 | fi 55 | bsub \ 56 | -J ${JOBNAME} \ 57 | -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 58 | -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 59 | -cwd ${JOBFOLDER} \ 60 | -q ${qName} \ 61 | -W ${wTime} \ 62 | -n ${cn} \ 63 | -M ${mem} \ 64 | -R rusage[mem=${mem}] \ 65 | -B \ 66 | -N \ 67 | -u ychu2@mdanderson.org \ 68 | /bin/bash -c "module load R/4.0.3; Rscript ${PIPELINE_FOLDER}/Mapping.R -r ${referenceDataPath} -q ${dataPath} -o ${JOBFOLDER}" 69 | done 70 | 71 | -------------------------------------------------------------------------------- /fig6/GSE144649/6_mapping/Mapping.R: -------------------------------------------------------------------------------- 1 | #-------------------------------------------------------------- 2 | # filename : Mapping.R 3 | # Date : 2022-05-02 4 | # contributor : Yanshuo Chu 5 | # function: Mapping 6 | #-------------------------------------------------------------- 7 | 8 | print('<==== Mapping.R ====>') 9 | 10 | suppressMessages({ 11 | library(optparse) 12 | library(tidyverse) 13 | library(Seurat) 14 | library(SeuratObject) 15 | library(cowplot) 16 | }) 17 | 18 | option_list = list( 19 | make_option(c("-r","--referenceData"), 20 | type = 'character', 21 | help = 'data.rds', 22 | metavar = 'character'), 23 | make_option(c("-q","--queryData"), 24 | type = 'character', 25 | help = 'data.rds', 26 | metavar = 'character'), 27 | make_option(c("-o","--out"), 28 | type = 'character', 29 | help = 'out', 30 | metavar = 'character') 31 | ); 32 | 33 | opt_parser = OptionParser(option_list = option_list); 34 | opt = parse_args(opt_parser); 35 | 36 | 37 | ## CD8_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD8_V6/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD8_V6_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 38 | ## CD4_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD4_V7/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD4_V7_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 39 | ## QUERYDATAFOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE186144/1_split/A" 40 | 41 | refSeuratObj <- readRDS(opt$referenceData) 42 | querySeuratObj <- readRDS(opt$queryData) 43 | 44 | ## refSeuratObj <- readRDS(CD8_ReferenceDataPath) 45 | ## querySeuratObj <- readRDS("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE144649/5_extractT/CD8.rds") 46 | 47 | DefaultAssay(refSeuratObj) <- "RNA" 48 | DefaultAssay(querySeuratObj) <- "RNA" 49 | 50 | refSeuratObj <- refSeuratObj %>% 51 | NormalizeData(verbose = T) %>% 52 | FindVariableFeatures(selection.method = "vst") 53 | hvgR = VariableFeatures(object = refSeuratObj) 54 | refSeuratObj <- refSeuratObj %>% 55 | ScaleData(verbose = T) %>% 56 | RunPCA(verbose = T, features = hvgR) 57 | 58 | querySeuratObj <- querySeuratObj %>% 59 | NormalizeData(verbose = T) %>% 60 | FindVariableFeatures(selection.method = "vst") 61 | hvgR = VariableFeatures(object = querySeuratObj) 62 | querySeuratObj <- querySeuratObj %>% 63 | ScaleData(verbose = T) %>% 64 | RunPCA(verbose = T, features = hvgR) 65 | 66 | 67 | temp.anchors <- FindTransferAnchors(reference = refSeuratObj, 68 | query = querySeuratObj, 69 | reference.reduction = "pca", 70 | k.filter = NA, 71 | dims = 1:20, 72 | features = intersect(rownames(refSeuratObj), rownames(querySeuratObj))) 73 | 74 | querySeuratObj <- MapQuery(anchorset = temp.anchors, 75 | reference = refSeuratObj, 76 | query = querySeuratObj, 77 | refdata = refSeuratObj$seurat_clusters) 78 | 79 | querySeuratObj$predicted.id <- 80 | factor(querySeuratObj$predicted.id, 81 | levels = levels(refSeuratObj$seurat_clusters)) 82 | 83 | saveRDS(querySeuratObj, file.path(opt$out, paste0('querySeuratObj', "_", Sys.Date(), '.rds'))) 84 | -------------------------------------------------------------------------------- /fig6/SCP1288/1_mapping_filter/submitJob_Mapping.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J mapping_SCP1288 2 | #BSUB -q short 3 | #BSUB -W 1:00 4 | #BSUB -n 1 5 | #BSUB -M 10 6 | #BSUB -R rusage[mem=10] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/SCP1288/1_mapping_filter/mapping_SCP1288.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/SCP1288/1_mapping_filter/mapping_SCP1288.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/SCP1288/1_mapping_filter/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/SCP1288/1_mapping_filter/mapping_SCP1288.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/SCP1288/1_mapping_filter/mapping_SCP1288.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/4.0.3 17 | #____----____----____ 18 | 19 | PROJECT_FOLDER=/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7 20 | DATA_FOLDER=${PROJECT_FOLDER}/data 21 | RESULT_FOLDER=${PROJECT_FOLDER}/result 22 | CODE_FOLDER=${PROJECT_FOLDER}/code 23 | PIPELINE_FOLDER=${CODE_FOLDER}/pipeline 24 | SRC_FOLDER=${CODE_FOLDER}/src 25 | KNOWLEDGE_FOLDER=${PROJECT_FOLDER}/knowledge 26 | PIPELINE_NAME=SCP1288__1_mapping_filter 27 | PIPELINE_PATH_NAME=SCP1288/1_mapping_filter 28 | PROJECT_NAME=$(basename ${PROJECT_FOLDER}) 29 | 30 | OutDir=$RESULT_FOLDER/$PIPELINE_PATH_NAME 31 | if [ ! -d $OutDir ]; then 32 | mkdir -p $OutDir 33 | fi 34 | 35 | runR="Rscript --no-save " 36 | QUERYDATAFOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/SCP1288/0_merge" 37 | CD8_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD8_V6/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD8_V6_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 38 | CD4_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD4_V7/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD4_V7_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 39 | 40 | PIPELINE_FOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/SCP1288/1_mapping_filter" 41 | 42 | qName=e40long 43 | wTime=100:00 44 | cn=1 45 | mem=300 46 | 47 | for dataPath in ${QUERYDATAFOLDER}/*; do 48 | dataFile=$(basename $dataPath) 49 | extension="${dataFile##*.}" 50 | filename="${dataFile%.*}" 51 | referenceDataPath="" 52 | if [ "${filename}" = "CD8" ]; then 53 | referenceDataPath=${CD8_ReferenceDataPath} 54 | fi 55 | if [ "${filename}" = "CD4" ]; then 56 | referenceDataPath=${CD4_ReferenceDataPath} 57 | fi 58 | JOBFOLDER=$OutDir/${filename} 59 | if [ ! -d $JOBFOLDER ]; then 60 | mkdir -p $JOBFOLDER 61 | fi 62 | JOBNAME=Mapping_${filename} 63 | if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 64 | rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 65 | fi 66 | bsub \ 67 | -J ${JOBNAME} \ 68 | -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 69 | -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 70 | -cwd ${JOBFOLDER} \ 71 | -q ${qName} \ 72 | -W ${wTime} \ 73 | -n ${cn} \ 74 | -M ${mem} \ 75 | -R rusage[mem=${mem}] \ 76 | -B \ 77 | -N \ 78 | -u ychu2@mdanderson.org \ 79 | /bin/bash -c "module load R/4.0.3; Rscript ${PIPELINE_FOLDER}/Mapping.R -r ${referenceDataPath} -q ${dataPath} -o ${JOBFOLDER}" 80 | done 81 | 82 | -------------------------------------------------------------------------------- /data_preprocess/proliferative/callBack_Proliferative.sh: -------------------------------------------------------------------------------- 1 | ##!/usr/bin/env bash 2 | 3 | projectPath=/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject 4 | analysisPath=${projectPath}/analysis 5 | mainscriptsFolder=${analysisPath}/scripts 6 | pipelinesFolder=${mainscriptsFolder}/pipelines 7 | srcD=${analysisPath}/scripts/src 8 | paramD=${analysisPath}/scripts/params 9 | databaseD='/rsrch3/home/genomic_med/ychu2/share/database' 10 | 11 | runR="Rscript --no-save " 12 | 13 | dataPath=${1} 14 | 15 | rootFolder=$(dirname ${dataPath}) 16 | 17 | 18 | ${runR} ${srcD}/iml_check_bubble-plot.R -d $dataPath -c "P_c0_CD8_CCL4L2 P_c1_DNT P_c2_CD4 P_c3_DNT_GZMK P_c4_CD8_C1QBP P_c5_Treg P_c6_CD8_CCL4L1 P_c7_CD8_GZMK" 19 | 20 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/temp2.txt 21 | # ${runR} ${srcD}/AlluvialPlot.R -l /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/Proliferating_V3_V2/nPC_15/UMAP_dist_0.01_nneighbor_30/p1Proliferating_V3_V2_UMAP_dist_0.01_nneighbor_30_CLUSTER_res_0.3/cluster.rds -r $dataPath 22 | 23 | # ${runR} /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/scripts/src/AlluvialPlot.R -l /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/Proliferating_V3_V2/nPC_15/UMAP_dist_0.01_nneighbor_30/p1Proliferating_V3_V2_UMAP_dist_0.01_nneighbor_30_CLUSTER_res_0.3/cluster.rds -r /rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/Proliferative_V4/nPC_15/UMAP_dist_0.1_nneighbor_35/p1_Proliferative_V4_UMAP_dist_0.1_nneighbor_35_CLUSTER_res_0.3/cluster.rds 24 | 25 | # ${runR} ${srcD}/DEG-bubble-plot.R -d $dataPath --DEGs $(dirname ${dataPath})/snn-single-markers.tsv --number 200 26 | # ${runR} ${srcD}/tissue-composition-plot.R -d $dataPath 27 | 28 | 29 | # FracArray=($(seq 0.1 0.1 0.8)) 30 | # for tempFrac in "${FracArray[@]}"; do 31 | # JOBNAME=cd8_c1c7_monocle2_${tempFrac} 32 | # tempFolder=$(dirname $dataPath) 33 | # JOBFOLDER=${tempFolder} 34 | # if [ ! -d $tempFolder ]; then 35 | # mkdir -p $tempFolder 36 | # fi 37 | # if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 38 | # rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 39 | # fi 40 | # bsub \ 41 | # -J ${JOBNAME} \ 42 | # -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 43 | # -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 44 | # -cwd ${JOBFOLDER} \ 45 | # -q long \ 46 | # -W 120:00 \ 47 | # -n 1 \ 48 | # -M 800\ 49 | # -R rusage[mem=800] \ 50 | # -B \ 51 | # -N \ 52 | # -u ychu2@mdanderson.org \ 53 | # /bin/bash -c "module load python/3.7.3-anaconda; module load R/3.6.0; ${runR} ${srcD}/monocle2.R -d ${dataPath} -f ${tempFrac}" 54 | # done 55 | 56 | # JOBNAME=cd8_c1c7_monocle3 57 | # tempFolder=$(dirname $dataPath) 58 | # JOBFOLDER=${tempFolder} 59 | # if [ ! -d $tempFolder ]; then 60 | # mkdir -p $tempFolder 61 | # fi 62 | # if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 63 | # rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 64 | # fi 65 | # bsub \ 66 | # -J ${JOBNAME} \ 67 | # -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 68 | # -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 69 | # -cwd ${JOBFOLDER} \ 70 | # -q long \ 71 | # -W 120:00 \ 72 | # -n 1 \ 73 | # -M 500\ 74 | # -R rusage[mem=500] \ 75 | # -B \ 76 | # -N \ 77 | # -u ychu2@mdanderson.org \ 78 | # /bin/bash -c "module load python/3.7.3-anaconda; module load R/3.6.0; ${runR} ${srcD}/trajectory_monocle3_from_seurat.r -d ${dataPath} -s $(dirname ${dataPath})/startCells.txt" 79 | -------------------------------------------------------------------------------- /fig6/GSE169246/subT3_mapping_split_by_marker/submitJob_Mapping.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J mapping 2 | #BSUB -q short 3 | #BSUB -W 1:00 4 | #BSUB -n 1 5 | #BSUB -M 10 6 | #BSUB -R rusage[mem=10] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/subT3_mapping_split_by_marker/mapping.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/subT3_mapping_split_by_marker/mapping.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/subT3_mapping_split_by_marker/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/subT3_mapping_split_by_marker/mapping.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/subT3_mapping_split_by_marker/mapping.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/4.0.3 17 | #____----____----____ 18 | 19 | PROJECT_FOLDER=/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7 20 | DATA_FOLDER=${PROJECT_FOLDER}/data 21 | RESULT_FOLDER=${PROJECT_FOLDER}/result 22 | CODE_FOLDER=${PROJECT_FOLDER}/code 23 | PIPELINE_FOLDER=${CODE_FOLDER}/pipeline 24 | SRC_FOLDER=${CODE_FOLDER}/src 25 | KNOWLEDGE_FOLDER=${PROJECT_FOLDER}/knowledge 26 | PIPELINE_NAME=GSE169246__subT3_mapping_split_by_marker 27 | PIPELINE_PATH_NAME=GSE169246/subT3_mapping_split_by_marker 28 | PROJECT_NAME=$(basename ${PROJECT_FOLDER}) 29 | 30 | OutDir=$RESULT_FOLDER/$PIPELINE_PATH_NAME 31 | if [ ! -d $OutDir ]; then 32 | mkdir -p $OutDir 33 | fi 34 | 35 | runR="Rscript --no-save " 36 | 37 | QUERYDATAFOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE169246/subT2_split_by_marker/outs" 38 | 39 | CD8_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD8_V6/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD8_V6_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 40 | CD4_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD4_V7/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD4_V7_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 41 | 42 | PIPELINE_FOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/subT3_mapping_split_by_marker" 43 | 44 | qName=e80long 45 | wTime=100:00 46 | cn=1 47 | mem=300 48 | 49 | for dataPath in ${QUERYDATAFOLDER}/*; do 50 | dataFile=$(basename $dataPath) 51 | extension="${dataFile##*.}" 52 | filename="${dataFile%.*}" 53 | referenceDataPath="" 54 | if [ "${filename}" = "CD8" ]; then 55 | referenceDataPath=${CD8_ReferenceDataPath} 56 | fi 57 | if [ "${filename}" = "CD4" ]; then 58 | referenceDataPath=${CD4_ReferenceDataPath} 59 | fi 60 | JOBFOLDER=${OutDir}/${filename} 61 | if [ ! -d $JOBFOLDER ]; then 62 | mkdir -p $JOBFOLDER 63 | fi 64 | 65 | JOBNAME=Mapping_${filename} 66 | if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 67 | rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 68 | fi 69 | 70 | bsub \ 71 | -J ${JOBNAME} \ 72 | -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 73 | -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 74 | -cwd ${JOBFOLDER} \ 75 | -q ${qName} \ 76 | -W ${wTime} \ 77 | -n ${cn} \ 78 | -M ${mem} \ 79 | -R rusage[mem=${mem}] \ 80 | -B \ 81 | -N \ 82 | -u ychu2@mdanderson.org \ 83 | /bin/bash -c "module load R/4.0.3; Rscript ${PIPELINE_FOLDER}/Mapping.R -r ${referenceDataPath} -q ${dataPath} -o ${JOBFOLDER}" 84 | done 85 | 86 | -------------------------------------------------------------------------------- /data_preprocess/0_src/qc-by-cluster.R: -------------------------------------------------------------------------------- 1 | #' filename : qc-doublets-viability-by-cluster.r 2 | #' Date : 2020-04-14 3 | #' contributor : Yanshuo Chu 4 | 5 | ##libraries 6 | suppressMessages({ 7 | library(optparse) 8 | library(Seurat) 9 | library(ggplot2) 10 | library(ggpubr) 11 | library(rjson) 12 | library(rlist) 13 | library(tidyverse) 14 | library(facetscales) 15 | }) 16 | 17 | option_list = list( 18 | make_option(c("-d", "--data"), 19 | type = "character", 20 | default = NULL, 21 | help = "rds file generated by load-cellranger.r", 22 | metavar = 'character'), 23 | make_option(c("-o",'--out'), 24 | type = 'character', 25 | default = 'doublets-viability-by-cluster-plot.pdf', 26 | help = 'output file name for the plot [default = %default]', 27 | metavar = 'character') 28 | ); 29 | 30 | opt_parser = OptionParser(option_list = option_list); 31 | opt = parse_args(opt_parser); 32 | 33 | if(is.null(opt$data)) { 34 | print_help(opt_parser) 35 | stop("Input data must be provided", call. = F) 36 | } 37 | 38 | 39 | ##load data 40 | print('loading data') 41 | seuratObj = readRDS(opt$data) 42 | 43 | print('making plots') 44 | 45 | gpdat = tidyr::gather(data.frame(rname = rownames(seuratObj@meta.data), 46 | seuratObj@meta.data), 47 | key = type, 48 | value = value, -rname, -seurat_clusters, 49 | c('percent.mito', 'nFeature_RNA', 'nCount_RNA')) 50 | 51 | temp.data <- filter(gpdat,type == c('percent.mito', 'nFeature_RNA', 'nCount_RNA')) 52 | temp.data$seurat_clusters <- as.factor(temp.data$seurat_clusters) 53 | temp.data$value <- as.numeric(temp.data$value) 54 | 55 | plot.all = ggplot(data = temp.data, aes(x = seurat_clusters, y=value)) + 56 | geom_violin(aes(fill = seurat_clusters, color = seurat_clusters), width=1.3) + 57 | geom_boxplot(width=0.1, color = "white", alpha = 0.2, outlier.shape = NA) + 58 | xlab('seurat_clusters') + ylab('') +facet_grid(type ~ ., scales="free") + 59 | theme(axis.text.x = element_text(angle = 40,hjust = 1, vjust = 1), 60 | legend.position = 'none') 61 | 62 | scales_y <- list( 63 | `percent.mito` = scale_y_continuous(limits = c(0, 0.15)), 64 | `nCount_RNA` = scale_y_continuous(limits = c(0, 5000), breaks = c(0, 200, 500, 1000, 3000, 5000)), 65 | `nFeature_RNA` = scale_y_continuous(limits = c(0, 2000), breaks = c(0, 200, 500, 1000, 2000)) 66 | ) 67 | 68 | plot.all.mini = ggplot(data = temp.data, aes(x = seurat_clusters, y=value)) + 69 | geom_violin(aes(fill = seurat_clusters, color = seurat_clusters), width=1.3) + 70 | geom_boxplot(width=0.1, color = "white", alpha = 0.2, outlier.shape = NA) + 71 | xlab('seurat_clusters') + ylab('') +facet_grid_sc(type ~ ., scales=list(y = scales_y)) + 72 | theme(axis.text.x = element_text(angle = 40,hjust = 1, vjust = 1), 73 | legend.position = 'none') 74 | 75 | gpdat <- as_tibble(data.frame(rname = rownames(seuratObj@meta.data), seuratObj@meta.data)) 76 | gpdat <- gpdat %>% group_by(seurat_clusters) %>% count() 77 | gpdat$n <- as.factor(gpdat$n) 78 | 79 | g <- ggplot(gpdat, aes(x=seurat_clusters, y=n, label=n, color=n))+ 80 | geom_point() + geom_text(aes(color=factor(n))) 81 | 82 | gp.list = list(plot.all, plot.all.mini, g) 83 | 84 | ht <- 6 85 | wh <- round(4/10 * length(unique(seuratObj@meta.data$seurat_clusters))) 86 | 87 | if(ht < 3){ht = 3;} 88 | if(wh < 5){wh = 5;} 89 | 90 | do.call(ggarrange, c(gp.list, ncol = 1, nrow = 1)) -> combined.gp 91 | pdf(opt$out,height = ht, width = wh) 92 | print(combined.gp) 93 | dev.off() 94 | 95 | print('----end----') 96 | -------------------------------------------------------------------------------- /data_preprocess/CD8/callBack_CD8.sh: -------------------------------------------------------------------------------- 1 | ##!/usr/bin/env bash 2 | 3 | projectPath=/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject 4 | analysisPath=${projectPath}/analysis 5 | mainscriptsFolder=${analysisPath}/scripts 6 | pipelinesFolder=${mainscriptsFolder}/pipelines 7 | srcD=${analysisPath}/scripts/src 8 | paramD=${analysisPath}/scripts/params 9 | databaseD='/rsrch3/home/genomic_med/ychu2/share/database' 10 | 11 | runR="Rscript --no-save " 12 | 13 | dataPath=${1} 14 | 15 | rootFolder=$(dirname ${dataPath}) 16 | 17 | 18 | # ${runR} ${srcD}/iml_check_bubble-plot.R -d $dataPath -c "CD8_c0_Tcm CD8_c1_Tex CD8_c2_Teff CD8_c3_Naive CD8_c4_Stressed CD8_c5_ISG CD8_c6_Tcm_DKK3 CD8_c7_Pre-Tex CD8_c8_Teff_CX3CR1 CD8_c9_KLRC4 CD8_c10_Teff_CD244 CD8_c11_Teff_SEMA4A CD8_c12_Trm CD8_c13_Naive_TCF7 CD8_c14_gdT" 19 | 20 | ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m /rsrch3/home/genomic_med/ychu2/share/database/Pan-T/Reference/PMID34290406.txt 21 | 22 | # tempMarkerFolder=/rsrch3/home/genomic_med/ychu2/configs/scSeqs/database/Markers/CD8/Genelistbyfunction 23 | # for markerFile in $(ls $tempMarkerFolder); do 24 | # JOBNAME=job_${markerFile%%.*} 25 | # tempFolder=${rootFolder} 26 | # JOBFOLDER=${tempFolder} 27 | # if [ ! -d $tempFolder ]; then 28 | # mkdir -p $tempFolder 29 | # fi 30 | # if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 31 | # rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 32 | # fi 33 | # bsub \ 34 | # -J ${JOBNAME} \ 35 | # -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 36 | # -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 37 | # -cwd ${JOBFOLDER} \ 38 | # -q short \ 39 | # -W 3:00 \ 40 | # -n 1 \ 41 | # -M 100\ 42 | # -R rusage[mem=100] \ 43 | # -B \ 44 | # -N \ 45 | # -u ychu2@mdanderson.org \ 46 | # /bin/bash -c "module load python/3.7.3-anaconda; module load R/3.6.0; ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${tempMarkerFolder}/${markerFile}; ${runR} ${srcD}/stack-ViolinPlot.R -d $dataPath -m ${tempMarkerFolder}/${markerFile}; ${runR} ${srcD}/feature-plot.R -d $dataPath -o $(dirname ${dataPath})/featureplot/${markerFile%%.*} -c ${paramD}/feature-plot-origin.json -m ${tempMarkerFolder}/${markerFile}" 47 | # done 48 | 49 | # ${runR} ${srcD}/marker-classification.R -d $(dirname ${dataPath})/snn-single-markers.tsv -m $tempMarkerFolder 50 | 51 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/Pan-T/CD8/CD8Markers_byfunction_fig1A.txt 52 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/Pan-T/CD8/CD8Markers_fig1A.txt 53 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/Pan-T/CD8/CD8Markers.txt 54 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${databaseD}/Markers/CD8/ProliferativeSignatures.txt 55 | 56 | # ${runR} ${srcD}/stack-ViolinPlot.R -d $dataPath -m ${databaseD}/Pan-T/CD8/CD8Markers.txt 57 | 58 | 59 | # tempMarkerFolder=/rsrch3/home/genomic_med/ychu2/configs/scSeqs/database/Markers/CD8/Genelistbyfunction 60 | 61 | # for markerFile in $(ls $tempMarkerFolder); do 62 | # ${runR} ${srcD}/bubble-plot.R -d $dataPath -o $(dirname ${dataPath})/bubbleplot -m ${tempMarkerFolder}/${markerFile} 63 | # ${runR} ${srcD}/stack-ViolinPlot.R -d $dataPath -m ${tempMarkerFolder}/${markerFile} 64 | # ${runR} ${srcD}/feature-plot.R -d $dataPath -o $(dirname ${dataPath})/featureplot/${markerFile%%.*} -c ${paramD}/feature-plot-origin.json -m ${tempMarkerFolder}/${markerFile} 65 | # done 66 | -------------------------------------------------------------------------------- /fig6/GSE169246/6_validate_mapping/validate_batch.R: -------------------------------------------------------------------------------- 1 | #-------------------------------------------------------------- 2 | # filename : validate.R 3 | # Date : 2022-08-31 4 | # contributor : Yanshuo Chu 5 | # function: validate 6 | #-------------------------------------------------------------- 7 | 8 | print('<==== validate.R ====>') 9 | 10 | rm(list=ls()) 11 | 12 | library(Seurat) 13 | library(tidyverse) 14 | library(ggplot2) 15 | library(cowplot) 16 | library(ggpubr) 17 | 18 | figurePath <- file.path("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE169246/6_validate_mapping/outs") 19 | if(!dir.exists(figurePath)){ 20 | dir.create(figurePath, recursive = T) 21 | } 22 | setwd(figurePath) 23 | 24 | 25 | seuratObj <- readRDS("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE169246/merged/nPC_30/UMAP_dist_0.1_nneighbor_50/GSE169246_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds") 26 | 27 | ## Idents(seuratObj) <- seuratObj$orig.ident 28 | ## pdf(file.path(getwd(), "orig_ident.pdf")) 29 | ## DimPlot(seuratObj, label =T) 30 | ## dev.off() 31 | 32 | TotalAntiPDL1ChemoPatients <- c("P019", 33 | "P010", 34 | "P012", 35 | "P007", 36 | "P017", 37 | "P001", 38 | "P002", 39 | "P014", 40 | "P004", 41 | "P005", 42 | "P016") 43 | 44 | TotalChemoPatients <- c("P022", 45 | "P011", 46 | "P020", 47 | "P008", 48 | "P013", 49 | "P025", 50 | "P018", 51 | "P023", 52 | "P024", 53 | "P003", 54 | "P028") 55 | 56 | AllResponsePatients <- c("P019", 57 | "P010", 58 | "P012", 59 | "P007", 60 | "P022", 61 | "P011", 62 | "P020", 63 | "P008", 64 | "P013") 65 | 66 | seuratObj@meta.data$Sample <- stringr::str_extract(Cells(seuratObj), "(?<=^.{10,20}\\.).+") 67 | seuratObj@meta.data$Patient <- stringr::str_extract(seuratObj@meta.data$Sample, "P\\d+") 68 | seuratObj@meta.data$Tissue <- stringr::str_extract(seuratObj@meta.data$Sample, "\\w$") 69 | seuratObj@meta.data$TumorTreatment <- stringr::str_extract(seuratObj@meta.data$Sample, "^[a-zA-Z]+") 70 | seuratObj@meta.data$isResponse <- "NR" 71 | seuratObj@meta.data$isResponse[seuratObj@meta.data$Patient %in% AllResponsePatients] <- "R" 72 | seuratObj@meta.data$isResponse[seuratObj@meta.data$Patient == "P028"] <- "-" 73 | seuratObj@meta.data$TreatmentType <- "PDL1+Chemo" 74 | seuratObj@meta.data$TreatmentType[seuratObj@meta.data$Patient %in% TotalChemoPatients] <- "Chemo" 75 | seuratObj@meta.data$group <- paste0(seuratObj@meta.data$TreatmentType, "-", seuratObj@meta.data$TumorTreatment, "-", seuratObj@meta.data$isResponse) 76 | 77 | Idents(seuratObj) <- seuratObj$Sample 78 | pdf(file.path(getwd(), "sample.pdf"), width = 15) 79 | DimPlot(seuratObj, label =T) 80 | dev.off() 81 | 82 | Idents(seuratObj) <- seuratObj$Patient 83 | pdf(file.path(getwd(), "patient.pdf")) 84 | DimPlot(seuratObj, label =T) 85 | dev.off() 86 | 87 | Idents(seuratObj) <- seuratObj$Tissue 88 | pdf(file.path(getwd(), "tissue.pdf")) 89 | DimPlot(seuratObj, label =T) 90 | dev.off() 91 | 92 | Idents(seuratObj) <- seuratObj$TreatmentType 93 | pdf(file.path(getwd(), "treatment_type.pdf")) 94 | DimPlot(seuratObj, label =T) 95 | dev.off() 96 | -------------------------------------------------------------------------------- /fig6/GSE169246/subT3_mapping_filter_split_by_marker/submitJob_Mapping.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J submitJob_Mapping 2 | #BSUB -q short 3 | #BSUB -W 1:00 4 | #BSUB -n 1 5 | #BSUB -M 10 6 | #BSUB -R rusage[mem=10] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/subT3_mapping_filter_split_by_marker/submitJob_Mapping.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/subT3_mapping_filter_split_by_marker/submitJob_Mapping.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/subT3_mapping_filter_split_by_marker/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/subT3_mapping_filter_split_by_marker/submitJob_Mapping.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/subT3_mapping_filter_split_by_marker/submitJob_Mapping.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/4.0.3 17 | #____----____----____ 18 | 19 | PROJECT_FOLDER=/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7 20 | DATA_FOLDER=${PROJECT_FOLDER}/data 21 | RESULT_FOLDER=${PROJECT_FOLDER}/result 22 | CODE_FOLDER=${PROJECT_FOLDER}/code 23 | PIPELINE_FOLDER=${CODE_FOLDER}/pipeline 24 | SRC_FOLDER=${CODE_FOLDER}/src 25 | KNOWLEDGE_FOLDER=${PROJECT_FOLDER}/knowledge 26 | PIPELINE_NAME=GSE169246__subT3_mapping_filter_split_by_marker 27 | PIPELINE_PATH_NAME=GSE169246/subT3_mapping_filter_split_by_marker 28 | PROJECT_NAME=$(basename ${PROJECT_FOLDER}) 29 | 30 | OutDir=$RESULT_FOLDER/$PIPELINE_PATH_NAME 31 | if [ ! -d $OutDir ]; then 32 | mkdir -p $OutDir 33 | fi 34 | 35 | runR="Rscript --no-save " 36 | 37 | QUERYDATAFOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE169246/subT2_split_by_marker/outs" 38 | 39 | CD8_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD8_V6/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD8_V6_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 40 | CD4_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD4_V7/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD4_V7_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 41 | 42 | PIPELINE_FOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE169246/subT3_mapping_filter_split_by_marker" 43 | 44 | qName=e80long 45 | wTime=100:00 46 | cn=1 47 | mem=300 48 | 49 | for dataPath in ${QUERYDATAFOLDER}/*; do 50 | dataFile=$(basename $dataPath) 51 | extension="${dataFile##*.}" 52 | filename="${dataFile%.*}" 53 | referenceDataPath="" 54 | if [ "${filename}" = "CD8" ]; then 55 | referenceDataPath=${CD8_ReferenceDataPath} 56 | fi 57 | if [ "${filename}" = "CD4" ]; then 58 | referenceDataPath=${CD4_ReferenceDataPath} 59 | fi 60 | JOBFOLDER=${OutDir}/${filename} 61 | if [ ! -d $JOBFOLDER ]; then 62 | mkdir -p $JOBFOLDER 63 | fi 64 | 65 | JOBNAME=Mapping_filter_${filename} 66 | if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 67 | rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 68 | fi 69 | 70 | bsub \ 71 | -J ${JOBNAME} \ 72 | -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 73 | -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 74 | -cwd ${JOBFOLDER} \ 75 | -q ${qName} \ 76 | -W ${wTime} \ 77 | -n ${cn} \ 78 | -M ${mem} \ 79 | -R rusage[mem=${mem}] \ 80 | -B \ 81 | -N \ 82 | -u ychu2@mdanderson.org \ 83 | /bin/bash -c "module load R/4.0.3; Rscript ${PIPELINE_FOLDER}/Mapping.R -r ${referenceDataPath} -q ${dataPath} -o ${JOBFOLDER}" 84 | done 85 | 86 | -------------------------------------------------------------------------------- /data_preprocess/0_src/RunUMAP.R: -------------------------------------------------------------------------------- 1 | #' filename : umap-harmony.R 2 | #' Date : 2020-04-23 3 | #' contributor : Yanshuo Chu 4 | #' function: run umap for harmony data 5 | 6 | ##libraries 7 | suppressMessages({library(optparse) 8 | library(readr) 9 | library(rjson) 10 | library(SeuratData) 11 | library(harmony) 12 | library(Seurat)}) 13 | print('---snn clustering---') 14 | ##CLI parsing 15 | option_list = list( 16 | make_option(c("-d", "--data"), 17 | type = "character", 18 | default = NULL, 19 | help = "r data file input(after normalization", 20 | metavar = 'character'), 21 | make_option(c("-o",'--out'), 22 | type = 'character', 23 | default = 'snn-harmony.rds', 24 | help = 'output file name for the r data file [default = %default]', 25 | metavar = 'character'), 26 | make_option(c("-r",'--reduction'), 27 | type = 'character', 28 | default = 'harmony', 29 | help = 'reduction method harmony', 30 | metavar = 'character'), 31 | make_option(c("-n",'--npc'), 32 | type = 'integer', 33 | default = 40, 34 | help = 'npc default 4 for dims', 35 | metavar = 'integer'), 36 | make_option(c("-i",'--dist'), 37 | type = 'double', 38 | default = 0.4, 39 | help = 'dist default 0.4', 40 | metavar = 'double'), 41 | make_option(c("-e",'--nneighbors'), 42 | type = 'integer', 43 | default = 30, 44 | help = 'n neighbors, default 30', 45 | metavar = 'integer') 46 | ); 47 | 48 | opt_parser = OptionParser(option_list = option_list); 49 | opt = parse_args(opt_parser); 50 | 51 | if(is.null(opt$data)) { 52 | print_help(opt_parser) 53 | stop("Input data must be provided", call. = F) 54 | } 55 | 56 | ##Load data 57 | seuratObj <- readRDS(opt$data) 58 | 59 | ## ## ############################################################################### 60 | ## ## #' scale regress out proliferative markers '# 61 | ## ## ############################################################################### 62 | ## cellCycleGeneT1 <- read_tsv("/rsrch3/home/genomic_med/ychu2/configs/scSeqs/database/general/cell-cycle-gene-list.txt") 63 | ## cellCycleGeneT2 <- read_tsv("/rsrch3/home/genomic_med/ychu2/configs/scSeqs/database/general/regev_lab_cell_cycle_genes.txt") 64 | ## seuratObj <- CellCycleScoring(seuratObj, 65 | ## s.features = s.genes, 66 | ## g2m.features = g2m.genes, 67 | ## set.ident = TRUE) 68 | ## seuratObj <- ScaleData(seuratObj, 69 | ## vars.to.regress =c("S.Score", "G2M.Score"), 70 | ## features = rownames(seuratObj)) 71 | ## seuratObj <- FindVariableFeatures(object = seuratObj, selection.method = 'vst', nfeatures = 3000) 72 | ## hvg = VariableFeatures(object = seuratObj) 73 | ## gene.pattern <- c("MALAT1", "^MT-", "^RPL", "^RPS", "^LOC(0-9)", "^TR(A|B|G|D)V", "^MTRNR") 74 | ## hvg <- hvg[!hvg %in% grep(paste0(gene.pattern, collapse = "|"), hvg, value = T)] 75 | ## hvg <- setdiff(hvg, cellCycleGeneT1) 76 | ## hvg <- setdiff(hvg, cellCycleGeneT2) 77 | ## seuratObj <- RunPCA(object = seuratObj, features= hvg, npcs=150, verbose = FALSE) 78 | ## VariableFeatures(seuratObj) <- hvg 79 | ## ## ############################################################################### 80 | 81 | ##run snn clustering 82 | seuratObj <- RunUMAP(object = seuratObj, 83 | reduction = opt$reduction, 84 | dims = 1:opt$npc, 85 | min.dist = opt$dist, 86 | n.neighbors = opt$nneighbors) 87 | 88 | saveRDS(seuratObj, file = opt$out) 89 | print('---end---') 90 | -------------------------------------------------------------------------------- /fig6/GSE179994/2_extractTcell_proliferative/extract.R: -------------------------------------------------------------------------------- 1 | #-------------------------------------------------------------- 2 | # filename : extract.R 3 | # Date : 2022-02-16 4 | # contributor : Yanshuo Chu 5 | # function: extract 6 | #-------------------------------------------------------------- 7 | 8 | print('<==== extract.R ====>') 9 | rm(list=ls()) 10 | 11 | library(data.table) 12 | library(Seurat) 13 | library(ggplot2) 14 | library(tidyverse) 15 | library(harmony) 16 | library(ggstatsplot) 17 | 18 | figure_path <- file.path("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE179994/2_extractTcell_proliferative/") 19 | if (!dir.exists(figure_path)) { 20 | dir.create(figure_path, recursive = T) 21 | } 22 | setwd(figure_path) 23 | 24 | seuratObj <- readRDS("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE179994/merged/merged.obj") 25 | 26 | md <- as_tibble(seuratObj@meta.data) 27 | md %>% 28 | group_by(celltype, cluster) %>% 29 | count %>% 30 | as.data.frame 31 | 32 | CD4_clusters <- c("CD4_C1-Naive", "CD4_C2-Tcm", "CD4_C3-Tem", "CD4_C4-CD69", "CD4_C5-ISG15", "CD4_C6-RPL", "CD4_C7-Th1-like", "CD4_C8-Treg", "XCL1") 33 | CD4_prolif_clusters <- c("CD4_C9-Prolif.") 34 | CD8_clusters <- c("Non-exhausted", "Tex") 35 | CD8_prolif_clusters <- c("Prolif.") 36 | 37 | Idents(seuratObj) <- seuratObj$cluster 38 | 39 | CD4SeuratObj <- subset(seuratObj, idents = CD4_clusters) 40 | saveRDS(CD4SeuratObj, paste0('CD4SeuratObj', "_", Sys.Date(), '.rds')) 41 | CD4ProlifSeuratObj <- subset(seuratObj, idents = CD4_prolif_clusters) 42 | saveRDS(CD4ProlifSeuratObj, paste0('CD4ProlifSeuratObj', "_", Sys.Date(), '.rds')) 43 | 44 | CD8SeuratObj <- subset(seuratObj, idents = CD8_clusters) 45 | saveRDS(CD8SeuratObj, paste0('CD8SeuratObj', "_", Sys.Date(), '.rds')) 46 | CD8ProlifSeuratObj <- subset(seuratObj, idents = CD8_prolif_clusters) 47 | saveRDS(CD8ProlifSeuratObj, paste0('CD8ProlifSeuratObj', "_", Sys.Date(), '.rds')) 48 | 49 | 50 | 51 | clinicT <- read_tsv("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/data/GSE179994/ClinicData.txt") %>% 52 | filter(`Treatment Hx` %in% c("On treatment")) 53 | noResponseSamples <- clinicT %>% 54 | filter(Response == "No") %>% 55 | pull(`Sample Name`) 56 | ResponseSamples <- clinicT %>% 57 | filter(Response == "Yes") %>% 58 | pull(`Sample Name`) 59 | 60 | md <- seuratObj@meta.data 61 | TotalSampleCellNum <- md %>% 62 | group_by(sample) %>% 63 | count() 64 | 65 | totalT <- c() 66 | for(tempCluster in unique(md$cluster)){ 67 | TNR <- md %>% 68 | filter(cluster == tempCluster) %>% 69 | group_by(sample) %>% 70 | count() %>% 71 | filter(sample %in% c(noResponseSamples, ResponseSamples)) 72 | 73 | TNR$Frac <- 0.0 74 | TNR$Frac <- TNR$n / TotalSampleCellNum$n[match(TNR$sample, TotalSampleCellNum$sample)] 75 | 76 | TNR$isResponse <- "NO" 77 | TNR$isResponse[TNR$sample %in% ResponseSamples] <- "YES" 78 | 79 | TNR$cluster <- tempCluster 80 | 81 | totalT <- bind_rows(totalT, TNR) 82 | } 83 | 84 | 85 | g <- totalT %>% 86 | ggstatsplot::grouped_ggbetweenstats( 87 | data = ., 88 | x = isResponse, 89 | y = Frac, 90 | grouping.var = cluster, 91 | xlab = "", 92 | ylab = "Sample fraction", 93 | ## pairwise.display = "aiwl", # display only significant pairwise comparisons 94 | p.adjust.method = "fdr", # adjust p-values for multiple tests using this method 95 | ggtheme = theme_classic(), 96 | package = "ggsci", 97 | palette = "default_jco", 98 | plotgrid.args = list(ncol = 1)) 99 | 100 | 101 | ggsave(file.path(paste0("response_bar.pdf")), g, width = 200, height = 1200, units = "mm") 102 | -------------------------------------------------------------------------------- /fig6/GSE173351/7_mapping_filter_proliferative/submitJob_Mapping.sh: -------------------------------------------------------------------------------- 1 | #BSUB -J submitJob_Mapping 2 | #BSUB -q short 3 | #BSUB -W 1:00 4 | #BSUB -n 1 5 | #BSUB -M 10 6 | #BSUB -R rusage[mem=10] 7 | #BSUB -B 8 | #BSUB -N 9 | #BSUB -u ychu2@mdanderson.org 10 | #BSUB -o /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/7_mapping_filter_proliferative/submitJob_Mapping.o.txt 11 | #BSUB -e /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/7_mapping_filter_proliferative/submitJob_Mapping.e.txt 12 | #BSUB -cwd /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/7_mapping_filter_proliferative/ 13 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/7_mapping_filter_proliferative/submitJob_Mapping.o.txt 14 | rm -rf /rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/7_mapping_filter_proliferative/submitJob_Mapping.e.txt 15 | module load python/3.7.3-anaconda 16 | module load R/4.0.3 17 | #____----____----____ 18 | 19 | PROJECT_FOLDER=/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7 20 | DATA_FOLDER=${PROJECT_FOLDER}/data 21 | RESULT_FOLDER=${PROJECT_FOLDER}/result 22 | CODE_FOLDER=${PROJECT_FOLDER}/code 23 | PIPELINE_FOLDER=${CODE_FOLDER}/pipeline 24 | SRC_FOLDER=${CODE_FOLDER}/src 25 | KNOWLEDGE_FOLDER=${PROJECT_FOLDER}/knowledge 26 | PIPELINE_NAME=GSE173351__7_mapping_filter_proliferative 27 | PIPELINE_PATH_NAME=GSE173351/7_mapping_filter_proliferative 28 | PROJECT_NAME=$(basename ${PROJECT_FOLDER}) 29 | 30 | OutDir=$RESULT_FOLDER/$PIPELINE_PATH_NAME 31 | if [ ! -d $OutDir ]; then 32 | mkdir -p $OutDir 33 | fi 34 | 35 | runR="Rscript --no-save " 36 | QUERYDATAFOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE173351/6_extractT_proliferative/ForMapping" 37 | CD8_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD8_V6/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD8_V6_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 38 | CD4_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD4_V7/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD4_V7_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 39 | P_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/figureCode/result/0_write_sample_info/Proliferative_2022-10-20.rds" 40 | 41 | PIPELINE_FOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/code/pipeline/private/GSE173351/7_mapping_filter_proliferative" 42 | OUT_ROOT=$OutDir 43 | 44 | qName=highmem 45 | wTime=24:00 46 | cn=1 47 | mem=100 48 | 49 | for dataPath in ${QUERYDATAFOLDER}/*; do 50 | dataFile=$(basename $dataPath) 51 | extension="${dataFile##*.}" 52 | filename="${dataFile%.*}" 53 | referenceDataPath="" 54 | if [ "${filename}" = "CD8" ]; then 55 | referenceDataPath=${CD8_ReferenceDataPath} 56 | fi 57 | if [ "${filename}" = "CD4" ]; then 58 | referenceDataPath=${CD4_ReferenceDataPath} 59 | fi 60 | if [ "${filename}" = "P" ]; then 61 | referenceDataPath=${P_ReferenceDataPath} 62 | fi 63 | JOBFOLDER=${OUT_ROOT}/${filename} 64 | if [ ! -d $JOBFOLDER ]; then 65 | mkdir -p $JOBFOLDER 66 | fi 67 | JOBNAME=Mapping_${filename} 68 | if [ -f ${JOBFOLDER}/${JOBNAME}.o.txt ] || [ -f ${JOBFOLDER}/${JOBNAME}.e.txt ]; then 69 | rm ${JOBFOLDER}/${JOBNAME}.*.txt -f 70 | fi 71 | bsub \ 72 | -J ${JOBNAME} \ 73 | -o ${JOBFOLDER}/${JOBNAME}.o.txt \ 74 | -e ${JOBFOLDER}/${JOBNAME}.e.txt \ 75 | -cwd ${JOBFOLDER} \ 76 | -q ${qName} \ 77 | -W ${wTime} \ 78 | -n ${cn} \ 79 | -M ${mem} \ 80 | -R rusage[mem=${mem}] \ 81 | -B \ 82 | -N \ 83 | -u ychu2@mdanderson.org \ 84 | /bin/bash -c "module load R/4.0.3; Rscript ${PIPELINE_FOLDER}/Mapping.R -r ${referenceDataPath} -q ${dataPath} -o ${JOBFOLDER}" 85 | done 86 | 87 | -------------------------------------------------------------------------------- /fig6/GSE144649/6_mapping_filter_genes/Mapping.R: -------------------------------------------------------------------------------- 1 | #-------------------------------------------------------------- 2 | # filename : Mapping.R 3 | # Date : 2022-05-02 4 | # contributor : Yanshuo Chu 5 | # function: Mapping 6 | #-------------------------------------------------------------- 7 | 8 | print('<==== Mapping.R ====>') 9 | 10 | suppressMessages({ 11 | library(optparse) 12 | library(tidyverse) 13 | library(Seurat) 14 | library(SeuratObject) 15 | library(cowplot) 16 | }) 17 | 18 | option_list = list( 19 | make_option(c("-r","--referenceData"), 20 | type = 'character', 21 | help = 'data.rds', 22 | metavar = 'character'), 23 | make_option(c("-q","--queryData"), 24 | type = 'character', 25 | help = 'data.rds', 26 | metavar = 'character'), 27 | make_option(c("-o","--out"), 28 | type = 'character', 29 | help = 'out', 30 | metavar = 'character') 31 | ); 32 | 33 | opt_parser = OptionParser(option_list = option_list); 34 | opt = parse_args(opt_parser); 35 | 36 | 37 | ## CD8_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD8_V6/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD8_V6_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 38 | ## CD4_ReferenceDataPath="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/CD4_V7/nPC_50/UMAP_dist_0.1_nneighbor_50/p1CD4_V7_UMAP_dist_0.1_nneighbor_50_CLUSTER_res_0.3/cluster.rds" 39 | ## QUERYDATAFOLDER="/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE186144/1_split/A" 40 | 41 | refSeuratObj <- readRDS(opt$referenceData) 42 | querySeuratObj <- readRDS(opt$queryData) 43 | 44 | cellCycleGeneT1 <- read_tsv("/rsrch3/home/genomic_med/ychu2/configs/scSeqs/database/general/cell-cycle-gene-list.txt") 45 | cellCycleGeneT2 <- read_tsv("/rsrch3/home/genomic_med/ychu2/configs/scSeqs/database/general/regev_lab_cell_cycle_genes.txt") 46 | ## refSeuratObj <- readRDS(CD8_ReferenceDataPath) 47 | ## querySeuratObj <- readRDS("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/result/GSE144649/5_extractT/CD8.rds") 48 | 49 | DefaultAssay(refSeuratObj) <- "RNA" 50 | DefaultAssay(querySeuratObj) <- "RNA" 51 | 52 | refSeuratObj <- refSeuratObj %>% 53 | NormalizeData(verbose = T) %>% 54 | FindVariableFeatures(selection.method = "vst") 55 | hvgR = VariableFeatures(object = refSeuratObj) 56 | hvgR <- setdiff(hvgR, cellCycleGeneT1$marker) 57 | hvgR <- setdiff(hvgR, cellCycleGeneT2$marker) 58 | refSeuratObj <- refSeuratObj %>% 59 | ScaleData(verbose = T) %>% 60 | RunPCA(verbose = T, features = hvgR) 61 | 62 | querySeuratObj <- querySeuratObj %>% 63 | NormalizeData(verbose = T) %>% 64 | FindVariableFeatures(selection.method = "vst") 65 | hvgR = VariableFeatures(object = querySeuratObj) 66 | hvgR <- setdiff(hvgR, cellCycleGeneT1$marker) 67 | hvgR <- setdiff(hvgR, cellCycleGeneT2$marker) 68 | querySeuratObj <- querySeuratObj %>% 69 | ScaleData(verbose = T) %>% 70 | RunPCA(verbose = T, features = hvgR) 71 | 72 | 73 | temp.anchors <- FindTransferAnchors(reference = refSeuratObj, 74 | query = querySeuratObj, 75 | reference.reduction = "pca", 76 | k.filter = NA, 77 | dims = 1:20, 78 | features = intersect(rownames(refSeuratObj), rownames(querySeuratObj))) 79 | 80 | querySeuratObj <- MapQuery(anchorset = temp.anchors, 81 | reference = refSeuratObj, 82 | query = querySeuratObj, 83 | refdata = refSeuratObj$seurat_clusters) 84 | 85 | querySeuratObj$predicted.id <- 86 | factor(querySeuratObj$predicted.id, 87 | levels = levels(refSeuratObj$seurat_clusters)) 88 | 89 | saveRDS(querySeuratObj, file.path(opt$out, paste0('querySeuratObj', "_", Sys.Date(), '.rds'))) 90 | -------------------------------------------------------------------------------- /data_preprocess/0_run_seurat_pipeline/FindClusterJobs.sh: -------------------------------------------------------------------------------- 1 | ##!/usr/bin/env bash 2 | 3 | module load python/3.7.3-anaconda 4 | module load R/4.0.3 5 | 6 | mainscriptFolder=${1} 7 | inData=${2} 8 | currentFolder=${3} 9 | res=${4} 10 | reduction=${5} 11 | npc=${6} 12 | parentJobName=${7} 13 | toRunClustering=${8} 14 | toRunCommonAnalysis=${9} 15 | toRunCallBack=${10} 16 | callBackPath=${11} 17 | 18 | echo "FindClusterJobs parameters: 19 | mainscriptFolder=${mainscriptFolder} 20 | inData=${inData} 21 | currentFolder=${currentFolder} 22 | res=${res} 23 | reduction=${reduction} 24 | npc=${npc} 25 | parentJobName=${parentJobName} 26 | toRunClustering=${toRunClustering} 27 | toRunCommonAnalysis=${toRunCommonAnalysis} 28 | toRunCallBack=${toRunCallBack} 29 | callBackPath=${callBackPath} 30 | " 31 | 32 | runR="Rscript --no-save " 33 | 34 | ############################################################################### 35 | #' Run Find Cluster '# 36 | ############################################################################### 37 | if [ $toRunClustering = "YES" ]; then 38 | ${runR} ${HOME}/configs/public/pipeline/UMAP_CLUSTER_JOBS_EMBEDED/FindCluster.R -d ${inData} -o ${currentFolder}/cluster.rds -r ${reduction} -n ${npc} -e ${res} 39 | fi 40 | 41 | 42 | 43 | ############################################################################### 44 | #' Run Common Analysis '# 45 | ############################################################################### 46 | srcD=${HOME}/configs/public/src 47 | ResD=${currentFolder} 48 | paramD=${HOME}/configs/public/params 49 | databaseD=${HOME}/configs/public/knowledge/database 50 | 51 | if [ $toRunCommonAnalysis = "YES" ]; then 52 | 53 | if [ ! -d ${ResD}/bubbleplot ]; then 54 | mkdir -p ${ResD}/bubbleplot 55 | fi 56 | 57 | ${runR} ${srcD}/bubble-plot.R -d ${ResD}/cluster.rds -o ${ResD}/bubbleplot -m ${databaseD}/TMarkers.txt 58 | ${runR} ${srcD}/bubble-plot.R -d ${ResD}/cluster.rds -o ${ResD}/bubbleplot -m ${databaseD}/topLevel.txt 59 | ${runR} ${srcD}/bubble-plot.R -d ${ResD}/cluster.rds -o ${ResD}/bubbleplot -m ${databaseD}/general/generalAll.txt 60 | 61 | if [ ! -d ${ResD}/featureplot ]; then 62 | mkdir -p ${ResD}/featureplot 63 | fi 64 | 65 | # ${runR} ${srcD}/feature-plot.R -d ${ResD}/cluster.rds -o ${ResD}/featureplot/topLevel -c ${paramD}/feature-plot-origin.json -m ${databaseD}/topLevel.txt 66 | # ${runR} ${srcD}/feature-plot.R -d ${ResD}/cluster.rds -o ${ResD}/featureplot/tmarkers -c ${paramD}/feature-plot-origin.json -m ${databaseD}/TMarkers.txt 67 | 68 | ${runR} ${srcD}/qc-by-cluster.R -d ${ResD}/cluster.rds -o ${ResD}/qc-by-cluster.pdf 69 | ${runR} ${srcD}/visualize.R -d ${ResD}/cluster.rds 70 | # ${runR} ${srcD}/visualize_batch.R -d ${ResD}/cluster.rds 71 | # ${runR} ${srcD}/findmarker.r -d ${ResD}/cluster.rds -o ${ResD}/snn-single-markers.tsv 72 | 73 | # ${runR} ${srcD}/snn-marker.R -d ${ResD}/cluster.rds -o ${ResD}/snn-markers.tsv -c ${paramD}/snn-marker.json 74 | # ${runR} ${srcD}/snn-heatmap.R -d ${ResD}/cluster.rds -o ${ResD}/markersHeatmap.pdf -c ${paramD}/snn-heatmap.json -m ${ResD}/snn-markers.tsv -p heatmap 75 | # python ${srcD}/statMarkers.py --markersTop ${ResD}/snn-markers.tsv --markersDatabase ${databaseD}/immuneCellMarkerAllinBox_Yanshuo.txt --out ${ResD}/markers.ys_celltype.tsv 76 | # python ${srcD}/statMarkers.py --markersTop ${ResD}/markers.top.tsv --markersDatabase ${databaseD}/immuneCellMarkerAllinBox_Yanshuo.txt --out ${ResD}/markers.top.ys_celltype.tsv 77 | fi 78 | 79 | ############################################################################### 80 | #' Run Callback Script '# 81 | ############################################################################### 82 | if [ $toRunCallBack = "YES" ]; then 83 | if [ -f $callBackPath ]; then 84 | $callBackPath ${ResD}/cluster.rds 85 | fi 86 | fi 87 | --------------------------------------------------------------------------------