├── vignette_files ├── F6_L5IT.png ├── F2_SC_dimplot.png ├── F4_schart_vis.png ├── F5_scoloc_vis.png ├── F3_scst_coembed.png ├── F7_scoexp_heatmap.png ├── F8_modulescore_umap.png ├── F1_ST_spatialdimplot.png ├── F5_scoloc_vis_updated.png ├── F9_modulescore_schart.png └── CellTrek_logo_redesign.png ├── man ├── euc_dist.Rd ├── KL_.Rd ├── img_gs.Rd ├── rbfk.Rd ├── wgcna_gene_k.Rd ├── cor_remove.Rd ├── celltrek_repel.Rd ├── as_dummy_df.Rd ├── cc_gene_k.Rd ├── scoloc_vis.Rd ├── wgcna_wrapper.Rd ├── cc_wrapper.Rd ├── wcor.Rd ├── build_delaunayn.Rd ├── celltrek_vis.Rd ├── edge_odds.Rd ├── FindCorMarkers.Rd ├── celltrek_chart.Rd ├── KD_boot_mst.Rd ├── DT_boot_mst.Rd ├── kdist.Rd ├── scoloc.Rd ├── run_kdist.Rd ├── sp_grid_kern_bin.Rd ├── scoexp.Rd ├── celltrek_dist.Rd ├── celltrek_from_dist.Rd ├── traint.Rd ├── KL_boot_mst.Rd └── celltrek.Rd ├── CellTrek.Rproj ├── NAMESPACE ├── DESCRIPTION ├── R ├── FindCorMarkers.R ├── scoloc_vis.R ├── celltrek_vis.R ├── scoexp.R ├── scoloc.R └── celltrek.R └── README.md /vignette_files/F6_L5IT.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/navinlabcode/CellTrek/HEAD/vignette_files/F6_L5IT.png -------------------------------------------------------------------------------- /vignette_files/F2_SC_dimplot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/navinlabcode/CellTrek/HEAD/vignette_files/F2_SC_dimplot.png -------------------------------------------------------------------------------- /vignette_files/F4_schart_vis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/navinlabcode/CellTrek/HEAD/vignette_files/F4_schart_vis.png -------------------------------------------------------------------------------- /vignette_files/F5_scoloc_vis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/navinlabcode/CellTrek/HEAD/vignette_files/F5_scoloc_vis.png -------------------------------------------------------------------------------- /vignette_files/F3_scst_coembed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/navinlabcode/CellTrek/HEAD/vignette_files/F3_scst_coembed.png -------------------------------------------------------------------------------- /vignette_files/F7_scoexp_heatmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/navinlabcode/CellTrek/HEAD/vignette_files/F7_scoexp_heatmap.png -------------------------------------------------------------------------------- /vignette_files/F8_modulescore_umap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/navinlabcode/CellTrek/HEAD/vignette_files/F8_modulescore_umap.png -------------------------------------------------------------------------------- /vignette_files/F1_ST_spatialdimplot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/navinlabcode/CellTrek/HEAD/vignette_files/F1_ST_spatialdimplot.png -------------------------------------------------------------------------------- /vignette_files/F5_scoloc_vis_updated.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/navinlabcode/CellTrek/HEAD/vignette_files/F5_scoloc_vis_updated.png -------------------------------------------------------------------------------- /vignette_files/F9_modulescore_schart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/navinlabcode/CellTrek/HEAD/vignette_files/F9_modulescore_schart.png -------------------------------------------------------------------------------- /vignette_files/CellTrek_logo_redesign.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/navinlabcode/CellTrek/HEAD/vignette_files/CellTrek_logo_redesign.png -------------------------------------------------------------------------------- /man/euc_dist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scoloc.R 3 | \name{euc_dist} 4 | \alias{euc_dist} 5 | \title{Title} 6 | \usage{ 7 | euc_dist(x1, x2) 8 | } 9 | \arguments{ 10 | \item{x2}{} 11 | } 12 | \value{ 13 | 14 | } 15 | \description{ 16 | Title 17 | } 18 | -------------------------------------------------------------------------------- /man/KL_.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scoloc.R 3 | \name{KL_} 4 | \alias{KL_} 5 | \title{Title} 6 | \usage{ 7 | KL_(X, eps = 1e-20) 8 | } 9 | \arguments{ 10 | \item{X}{Density matrix} 11 | 12 | \item{eps}{Small value added} 13 | } 14 | \value{ 15 | KL-divergence matrix 16 | } 17 | \description{ 18 | Title 19 | } 20 | \examples{ 21 | KL_res <- KL_(X=X, eps=1e-20) 22 | } 23 | -------------------------------------------------------------------------------- /man/img_gs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/celltrek_vis.R 3 | \name{img_gs} 4 | \alias{img_gs} 5 | \title{Convert RGB image to gray scale image} 6 | \usage{ 7 | img_gs(img) 8 | } 9 | \arguments{ 10 | \item{img}{Image data with RGB-channels} 11 | } 12 | \value{ 13 | A gray scale image 14 | } 15 | \description{ 16 | Convert RGB image to gray scale image 17 | } 18 | \examples{ 19 | img_gs <- img_gs(img_raw) 20 | } 21 | -------------------------------------------------------------------------------- /man/rbfk.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scoexp.R 3 | \name{rbfk} 4 | \alias{rbfk} 5 | \title{Radial basis function kernel} 6 | \usage{ 7 | rbfk(dis_mat, sigm, zero_diag = T) 8 | } 9 | \arguments{ 10 | \item{dis_mat}{Distance matrix} 11 | 12 | \item{sigm}{Width of rbfk} 13 | 14 | \item{zero_diag}{} 15 | } 16 | \value{ 17 | 18 | } 19 | \description{ 20 | Radial basis function kernel 21 | } 22 | \examples{ 23 | rbfk(dis_mat, sigm, zero_diag=F) 24 | } 25 | -------------------------------------------------------------------------------- /CellTrek.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | LineEndingConversion: Posix 18 | 19 | BuildType: Package 20 | PackageUseDevtools: Yes 21 | PackageInstallArgs: --no-multiarch --with-keep.source 22 | PackageRoxygenize: rd,collate,namespace 23 | -------------------------------------------------------------------------------- /man/wgcna_gene_k.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scoexp.R 3 | \name{wgcna_gene_k} 4 | \alias{wgcna_gene_k} 5 | \title{Title} 6 | \usage{ 7 | wgcna_gene_k( 8 | wgcna_res, 9 | cor_mat, 10 | avg_cor_min = 0.5, 11 | min_gen = 10, 12 | max_gen = 100 13 | ) 14 | } 15 | \arguments{ 16 | \item{max_gen}{} 17 | } 18 | \value{ 19 | 20 | } 21 | \description{ 22 | Title 23 | } 24 | \examples{ 25 | wgcna_gene_k(wgcna_res, cor_mat, avg_cor_min=.5, min_gen=10, max_gen=100) 26 | } 27 | -------------------------------------------------------------------------------- /man/cor_remove.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scoexp.R 3 | \name{cor_remove} 4 | \alias{cor_remove} 5 | \title{Remove low correlation genes until reaching threshold} 6 | \usage{ 7 | cor_remove(cor_mat, ave_cor_cut = 0.5, min_n = 5, max_n = 100, na_diag = F) 8 | } 9 | \arguments{ 10 | \item{na_diag}{} 11 | } 12 | \value{ 13 | 14 | } 15 | \description{ 16 | Remove low correlation genes until reaching threshold 17 | } 18 | \examples{ 19 | cor_remove(cor_mat, ave_cor_cut = 0.5, min_n=10, max_n=100, na_diag=F) 20 | } 21 | -------------------------------------------------------------------------------- /man/celltrek_repel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/celltrek.R 3 | \name{celltrek_repel} 4 | \alias{celltrek_repel} 5 | \title{Mannually repelling celltrek cells} 6 | \usage{ 7 | celltrek_repel(celltrek_inp, repel_r = 5, repel_iter = 10) 8 | } 9 | \arguments{ 10 | \item{celltrek_inp}{CellTrek(Seurat) object} 11 | 12 | \item{repel_r}{Repelling radius} 13 | 14 | \item{repel_iter}{Repelling iterations} 15 | } 16 | \value{ 17 | CellTrek(Seurat) object 18 | } 19 | \description{ 20 | Mannually repelling celltrek cells 21 | } 22 | -------------------------------------------------------------------------------- /man/as_dummy_df.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scoloc.R 3 | \name{as_dummy_df} 4 | \alias{as_dummy_df} 5 | \title{Title} 6 | \usage{ 7 | as_dummy_df(data_inp, col_cell = "cell_names") 8 | } 9 | \arguments{ 10 | \item{data_inp}{Cell-coordinates matrix} 11 | 12 | \item{col_cell}{Column name of cell type, cell type names must be syntactically valid} 13 | } 14 | \value{ 15 | dummy data frame with cells on columns 16 | } 17 | \description{ 18 | Title 19 | } 20 | \examples{ 21 | cell_dummy_df <- as_dummy_df(celltrek_df, col_cell='Cell') 22 | } 23 | -------------------------------------------------------------------------------- /man/cc_gene_k.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scoexp.R 3 | \name{cc_gene_k} 4 | \alias{cc_gene_k} 5 | \title{Consensus gene module selection} 6 | \usage{ 7 | cc_gene_k( 8 | cc_res, 9 | cor_mat, 10 | k = 8, 11 | avg_con_min = 0.5, 12 | avg_cor_min = 0.5, 13 | min_gen = 10, 14 | max_gen = 100 15 | ) 16 | } 17 | \arguments{ 18 | \item{max_gen}{} 19 | } 20 | \value{ 21 | 22 | } 23 | \description{ 24 | Consensus gene module selection 25 | } 26 | \examples{ 27 | cc_gene_k(cc_res, cor_mat, k=8, avg_con_min=.5, avg_cor_min=.5, min_gen=20, max_gen=100) 28 | } 29 | -------------------------------------------------------------------------------- /man/scoloc_vis.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scoloc_vis.R 3 | \name{scoloc_vis} 4 | \alias{scoloc_vis} 5 | \title{Visualization of SColoc results} 6 | \usage{ 7 | scoloc_vis(adj_mat, meta_data = NULL, directed = F) 8 | } 9 | \arguments{ 10 | \item{adj_mat}{Adjacent matrix} 11 | 12 | \item{meta_data}{Optional, must contain id column match with adj_mat col/rownames} 13 | 14 | \item{directed}{Generate a directed graph? (Default is false)} 15 | } 16 | \value{ 17 | shiny app 18 | } 19 | \description{ 20 | Visualization of SColoc results 21 | } 22 | \examples{ 23 | scoloc_vis(scoloc_test) 24 | } 25 | -------------------------------------------------------------------------------- /man/wgcna_wrapper.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scoexp.R 3 | \name{wgcna_wrapper} 4 | \alias{wgcna_wrapper} 5 | \title{A wrapper function of WGCNA with wcor matrix as input} 6 | \usage{ 7 | wgcna_wrapper(sim_mat, powerVector = c(1:20), minClusterSize = 50, ...) 8 | } 9 | \arguments{ 10 | \item{sim_mat}{similarity matrix} 11 | 12 | \item{powerVector}{power vector} 13 | 14 | \item{...}{} 15 | } 16 | \value{ 17 | 18 | } 19 | \description{ 20 | A wrapper function of WGCNA with wcor matrix as input 21 | } 22 | \examples{ 23 | wgcna_wrapper(sim_mat, powerVector=c(1:20), minClusterSize=50) 24 | } 25 | -------------------------------------------------------------------------------- /man/cc_wrapper.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scoexp.R 3 | \name{cc_wrapper} 4 | \alias{cc_wrapper} 5 | \title{A wrapper function of ConensusClusterPlus with some default parameters} 6 | \usage{ 7 | cc_wrapper( 8 | d, 9 | maxK = 8, 10 | reps = 20, 11 | distance = "spearman", 12 | verbose = F, 13 | plot = "png", 14 | ... 15 | ) 16 | } 17 | \arguments{ 18 | \item{...}{} 19 | } 20 | \value{ 21 | 22 | } 23 | \description{ 24 | A wrapper function of ConensusClusterPlus with some default parameters 25 | } 26 | \examples{ 27 | cc_wrapper(d, maxK=8, reps=20, distance='spearman', verbose=T) 28 | } 29 | -------------------------------------------------------------------------------- /man/wcor.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scoexp.R 3 | \name{wcor} 4 | \alias{wcor} 5 | \title{Weighted cross correlation} 6 | \usage{ 7 | wcor(X, W, method = c("pearson", "spearman")[1], na_zero = T) 8 | } 9 | \arguments{ 10 | \item{X}{Expression matrix, n X p} 11 | 12 | \item{W}{Weight matrix, n X n} 13 | 14 | \item{method}{Correlation method, pearson or spearman} 15 | 16 | \item{na_zero}{Na to zero} 17 | } 18 | \value{ 19 | Weighted correlation matrix, p X p 20 | } 21 | \description{ 22 | Weighted cross correlation 23 | } 24 | \examples{ 25 | wcor(X=expr_test, W=rbfk_out, method='spearman') 26 | } 27 | -------------------------------------------------------------------------------- /man/build_delaunayn.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scoloc.R 3 | \name{build_delaunayn} 4 | \alias{build_delaunayn} 5 | \title{Build Delaunay triangulation network} 6 | \usage{ 7 | build_delaunayn(coord_df, return_name = T, dist_cutoff = NULL) 8 | } 9 | \arguments{ 10 | \item{coord_df}{Coordinates df} 11 | 12 | \item{return_name}{Return edge list with names?} 13 | 14 | \item{dist_cutoff}{Remove edges with distance >= dist_cutoff} 15 | } 16 | \value{ 17 | Delaunay triangulation network edge list 18 | } 19 | \description{ 20 | Build Delaunay triangulation network 21 | } 22 | \examples{ 23 | test <- build_delaunayn(coord_df, return_name=T) 24 | } 25 | -------------------------------------------------------------------------------- /man/celltrek_vis.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/celltrek_vis.R 3 | \name{celltrek_vis} 4 | \alias{celltrek_vis} 5 | \title{Visualization of CellTrek results} 6 | \usage{ 7 | celltrek_vis(celltrek_df, img, scale_fac) 8 | } 9 | \arguments{ 10 | \item{celltrek_df}{CellTrek output meta data, must contain coord_x and coord_y} 11 | 12 | \item{img}{Image data} 13 | 14 | \item{scale_fac}{Scale factor} 15 | } 16 | \value{ 17 | Shiny GUI 18 | } 19 | \description{ 20 | Visualization of CellTrek results 21 | } 22 | \examples{ 23 | celltrek_vis(test_celltrek@meta.data, test_celltrek@images[[1]]@image, scale_fac=test_celltrek@images[[1]]@scale.factors$lowres) 24 | } 25 | -------------------------------------------------------------------------------- /man/edge_odds.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scoloc.R 3 | \name{edge_odds} 4 | \alias{edge_odds} 5 | \title{Title} 6 | \usage{ 7 | edge_odds(el_inp, meta_data, col_cell = "cell_names") 8 | } 9 | \arguments{ 10 | \item{el_inp}{Edge list of Delaunay triangulation network} 11 | 12 | \item{meta_data}{Meta data, must contain column of cell type} 13 | 14 | \item{col_cell}{Column name of cell type, cell type names must be syntactically valid} 15 | } 16 | \value{ 17 | A list of 1.Network-based cell co-occurrence; 2.Cell co-occurrence log odds ratio 18 | } 19 | \description{ 20 | Title 21 | } 22 | \examples{ 23 | test <- edge_odds(el_inp, meta_data, col_cell='cell_names') 24 | } 25 | -------------------------------------------------------------------------------- /man/FindCorMarkers.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FindCorMarkers.R 3 | \name{FindCorMarkers} 4 | \alias{FindCorMarkers} 5 | \title{Find highly correlated markers to target features} 6 | \usage{ 7 | FindCorMarkers(srt_inp, assay = "RNA", features, method = "spearman") 8 | } 9 | \arguments{ 10 | \item{srt_inp}{Seurat input} 11 | 12 | \item{features}{Features that genes correlated with} 13 | 14 | \item{method}{Correlation method} 15 | } 16 | \value{ 17 | 18 | } 19 | \description{ 20 | Find highly correlated markers to target features 21 | } 22 | \examples{ 23 | CorMarkers <- FindCorMarkers(srt_inp=test_seurat, assay='RNA', features=c('k_dist', 'sig_score'), method='spearman') 24 | } 25 | -------------------------------------------------------------------------------- /man/celltrek_chart.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/celltrek.R 3 | \name{celltrek_chart} 4 | \alias{celltrek_chart} 5 | \title{Title} 6 | \usage{ 7 | celltrek_chart( 8 | dist_mat, 9 | coord_df, 10 | dist_cut = 500, 11 | top_spot = 10, 12 | spot_n = 10, 13 | repel_r = 5, 14 | repel_iter = 10 15 | ) 16 | } 17 | \arguments{ 18 | \item{dist_mat}{Distance matrix of sc-st (sc in rows and st in columns)} 19 | 20 | \item{coord_df}{Coordinates data frame of st (must contain coord_x, coord_y columns, barcode rownames)} 21 | 22 | \item{dist_cut}{Distance cutoff} 23 | 24 | \item{top_spot}{Maximum number of spots that one cell can be charted} 25 | 26 | \item{spot_n}{Maximum number of cells that one spot can contain} 27 | 28 | \item{repel_r}{Repelling radius} 29 | 30 | \item{repel_iter}{Repelling iterations} 31 | } 32 | \value{ 33 | SC coordinates 34 | } 35 | \description{ 36 | Title 37 | } 38 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(FindCorMarkers) 4 | export(build_delaunayn) 5 | export(cc_wrapper) 6 | export(celltrek) 7 | export(celltrek_from_dist) 8 | export(celltrek_repel) 9 | export(celltrek_vis) 10 | export(img_gs) 11 | export(kdist) 12 | export(run_kdist) 13 | export(scoexp) 14 | export(scoloc) 15 | export(scoloc_vis) 16 | export(traint) 17 | export(wcor) 18 | export(wgcna_wrapper) 19 | import(MASS) 20 | import(RColorBrewer) 21 | import(Seurat) 22 | import(data.table) 23 | import(dbscan) 24 | import(dplyr) 25 | import(dynamicTreeCut) 26 | import(fastcluster) 27 | import(ggpubr) 28 | import(igraph) 29 | import(magrittr) 30 | import(philentropy) 31 | import(plotly) 32 | import(purrr) 33 | import(randomForestSRC) 34 | import(reshape2) 35 | import(scales) 36 | import(shiny) 37 | import(tibble) 38 | import(tidyr) 39 | import(visNetwork) 40 | importFrom(akima,interpp) 41 | importFrom(geometry,delaunayn) 42 | importFrom(packcircles,circleRepelLayout) 43 | -------------------------------------------------------------------------------- /man/KD_boot_mst.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scoloc.R 3 | \name{KD_boot_mst} 4 | \alias{KD_boot_mst} 5 | \title{Title} 6 | \usage{ 7 | KD_boot_mst( 8 | meta_df, 9 | coord_df, 10 | col_cell = "cell_names", 11 | boot_n = 100, 12 | prop = 0.8, 13 | replace = T, 14 | k = 10 15 | ) 16 | } 17 | \arguments{ 18 | \item{meta_df}{Meta data, must contain cell type column} 19 | 20 | \item{coord_df}{Coordinates data, must be the same order of meta_df} 21 | 22 | \item{col_cell}{Column name of cell type, cell type names must be syntactically valid} 23 | 24 | \item{boot_n}{Number of bootstrapping iterations} 25 | 26 | \item{prop}{Subsample proportion} 27 | 28 | \item{replace}{should sampling be with replacement?} 29 | 30 | \item{k}{Number of NNs} 31 | } 32 | \value{ 33 | A list of 1. Bootstrap logOR distance; 2. MST consensus matrix 34 | } 35 | \description{ 36 | Title 37 | } 38 | \examples{ 39 | test <- KD_boot_mst(meta_df, coord_df, col_cell='cell_names', boot_n=100, prop=.8, eps=1e-5) 40 | } 41 | -------------------------------------------------------------------------------- /man/DT_boot_mst.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scoloc.R 3 | \name{DT_boot_mst} 4 | \alias{DT_boot_mst} 5 | \title{Title} 6 | \usage{ 7 | DT_boot_mst( 8 | meta_df, 9 | coord_df, 10 | col_cell = "cell_names", 11 | boot_n = 100, 12 | prop = 0.8, 13 | replace = T, 14 | dist_cutoff = NULL 15 | ) 16 | } 17 | \arguments{ 18 | \item{meta_df}{Meta data, must contain cell type column} 19 | 20 | \item{coord_df}{Coordinates data, must be the same order of meta_df} 21 | 22 | \item{col_cell}{Column name of cell type, cell type names must be syntactically valid} 23 | 24 | \item{boot_n}{Number of bootstrapping iterations} 25 | 26 | \item{prop}{Subsample proportion} 27 | 28 | \item{replace}{should sampling be with replacement?} 29 | 30 | \item{dist_cutoff}{Remove edges with distance >= dist_cutoff} 31 | } 32 | \value{ 33 | A list of 1. Bootstrap logOR distance; 2. MST consensus matrix 34 | } 35 | \description{ 36 | Title 37 | } 38 | \examples{ 39 | test <- DT_boot_mst(meta_df, coord_df, col_cell='cell_names', boot_n=100, prop=.8) 40 | } 41 | -------------------------------------------------------------------------------- /man/kdist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scoloc.R 3 | \name{kdist} 4 | \alias{kdist} 5 | \title{Calculated K-distance between query cells and reference cells based on their spatial coordinates} 6 | \usage{ 7 | kdist( 8 | inp_df, 9 | ref = NULL, 10 | ref_type = "all", 11 | que = NULL, 12 | k = 10, 13 | new_name = "kdist", 14 | keep_nn = F 15 | ) 16 | } 17 | \arguments{ 18 | \item{inp_df}{inp_df must contain cell_names, 'coord_x', 'coord_y' columns} 19 | 20 | \item{ref}{Reference groups} 21 | 22 | \item{ref_type}{'all' or 'each'} 23 | 24 | \item{que}{Query groups} 25 | 26 | \item{k}{The number of nearest neighbors} 27 | 28 | \item{new_name}{New name for kdist} 29 | 30 | \item{keep_nn}{Keep Nearest Neighboor id matrix?} 31 | } 32 | \value{ 33 | A list of 1. kdist data frame and 2. a list of knn id matrix 34 | } 35 | \description{ 36 | Calculated K-distance between query cells and reference cells based on their spatial coordinates 37 | } 38 | \examples{ 39 | kdist_out <- kdist(inp_df=test_df, ref=c('A', 'B', 'C'), ref_type='each', que=unique(test_df$cell_names), k=10, keep_nn=T) 40 | } 41 | -------------------------------------------------------------------------------- /man/scoloc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scoloc.R 3 | \name{scoloc} 4 | \alias{scoloc} 5 | \title{SColoc module} 6 | \usage{ 7 | scoloc( 8 | celltrek_inp, 9 | col_cell = "cell_names", 10 | use_method = c("KL", "DT", "KD")[2], 11 | h = celltrek_inp@images[[1]]@scale.factors$spot_dis, 12 | n = 25, 13 | boot_n = 20, 14 | ... 15 | ) 16 | } 17 | \arguments{ 18 | \item{celltrek_inp}{CellTrek input} 19 | 20 | \item{col_cell}{Column name of cell type, cell type names must be syntactically valid} 21 | 22 | \item{use_method}{Use density-based Kullback Leibler divergence or Delaunay Triangulation network} 23 | 24 | \item{h}{Bandwidths for x and y directions, for KL, more details in kde2d function in MASS package} 25 | 26 | \item{n}{Number of grid points in each directions, for KL, more details in kde2d function in MASS package} 27 | 28 | \item{...}{See in boot_mst} 29 | } 30 | \value{ 31 | A list of 1.Bootstrap distance; 2.MST consensus matrix 32 | } 33 | \description{ 34 | SColoc module 35 | } 36 | \examples{ 37 | cell_scoloc <- scoloc(celltrek_inp, col_cell='cell_names', use_method='KL', h=140, n=25) 38 | } 39 | -------------------------------------------------------------------------------- /man/run_kdist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scoloc.R 3 | \name{run_kdist} 4 | \alias{run_kdist} 5 | \title{Run K-distance with CellTrek(Seurat) object and add a metadata column} 6 | \usage{ 7 | run_kdist( 8 | celltrek_inp, 9 | grp_col = "cell_type", 10 | ref = NULL, 11 | ref_type = "all", 12 | que = NULL, 13 | k = 10, 14 | new_name = "kdist", 15 | keep_nn = F 16 | ) 17 | } 18 | \arguments{ 19 | \item{celltrek_inp}{SChart seurat input} 20 | 21 | \item{grp_col}{Column name in meta data for reference and query groups} 22 | 23 | \item{ref}{Reference groups} 24 | 25 | \item{ref_type}{'all' or 'each'} 26 | 27 | \item{que}{Query groups} 28 | 29 | \item{k}{The number of nearest neighbors} 30 | 31 | \item{new_name}{New name for kdist} 32 | 33 | \item{keep_nn}{Keep Nearest Neighboor id matrix?} 34 | } 35 | \value{ 36 | SChart seurat output 37 | } 38 | \description{ 39 | Run K-distance with CellTrek(Seurat) object and add a metadata column 40 | } 41 | \examples{ 42 | celltrek_test <- run_kdist(celltrek_inp=celltrek_inp, grp_col='cell_type', ref=c('A', 'B'), ref_type='each', que=unique(test_df$cell_names), k=10) 43 | } 44 | -------------------------------------------------------------------------------- /man/sp_grid_kern_bin.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scoloc.R 3 | \name{sp_grid_kern_bin} 4 | \alias{sp_grid_kern_bin} 5 | \title{Title} 6 | \usage{ 7 | sp_grid_kern_bin( 8 | data, 9 | coord, 10 | min_num = 15, 11 | h, 12 | n = 25, 13 | tot_norm = TRUE, 14 | Xlim = range(coord[, 1]), 15 | Ylim = range(coord[, 2]) 16 | ) 17 | } 18 | \arguments{ 19 | \item{data}{Cell type dummy table} 20 | 21 | \item{coord}{Coordinates data with X and Y columns} 22 | 23 | \item{min_num}{For cells number < min_num, add 1 till min_num} 24 | 25 | \item{h}{Bandwidths for x and y directions, more details in kde2d function in MASS package} 26 | 27 | \item{n}{Number of grid points in each directions, more details in kde2d function in MASS package} 28 | 29 | \item{tot_norm}{Normalization by total?} 30 | 31 | \item{Xlim}{The limits of X-axis} 32 | 33 | \item{Ylim}{The limits of Y-axis} 34 | } 35 | \value{ 36 | Kenerl density on grid 37 | } 38 | \description{ 39 | Title 40 | } 41 | \examples{ 42 | kern_res <- sp_grid_kern_bin(data=cell_dummy_df, coord=coord_df, h=150, n=25, tot_norm=TRUE, Xlim=range(coord_df$X), Ylim=range(coor_df$Y)) 43 | } 44 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: CellTrek 2 | Title: Spatial Charting of Single Cell Transcriptomes in Tissues 3 | Version: 0.0.94 4 | Authors@R: c( 5 | person("Runmin", 'Wei', email='wander1021@gmail.com', role=c('aut', 'cre')), 6 | person("Siyuan", 'He', email='SHe2@mdanderson.org', role=c('aut', 'ctb')) ) 7 | Description: CellTrek is a computational toolkit that can directly map single cells back to their spatial coordinates in tissue sections based on scRNA-seq and ST data. The CellTrek toolkit also provides two downstream analysis modules, including SColoc for spatial colocalization analysis and SCoexp for spatial co-expression analysis. 8 | License: `use_mit_license()`, `use_gpl3_license()` or friends to pick a 9 | license 10 | Encoding: UTF-8 11 | Roxygen: list(markdown = TRUE) 12 | RoxygenNote: 7.1.2 13 | Imports: 14 | akima, 15 | data.table, 16 | dbscan, 17 | dplyr, 18 | dynamicTreeCut, 19 | fastcluster, 20 | geometry, 21 | ggpubr, 22 | igraph, 23 | magrittr, 24 | MASS, 25 | packcircles, 26 | philentropy, 27 | plotly, 28 | purrr, 29 | randomForestSRC, 30 | RColorBrewer, 31 | reshape2, 32 | scales, 33 | Seurat, 34 | shiny, 35 | tibble, 36 | tidyr, 37 | visNetwork 38 | -------------------------------------------------------------------------------- /man/scoexp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scoexp.R 3 | \name{scoexp} 4 | \alias{scoexp} 5 | \title{SCoexp module} 6 | \usage{ 7 | scoexp( 8 | celltrek_inp, 9 | sigm = NULL, 10 | assay = "RNA", 11 | gene_select = NULL, 12 | zero_cutoff = 5, 13 | cor_method = "spearman", 14 | approach = c("cc", "wgcna")[1], 15 | maxK = 8, 16 | k = 8, 17 | avg_con_min = 0.5, 18 | avg_cor_min = 0.5, 19 | min_gen = 20, 20 | max_gen = 100, 21 | keep_cc = T, 22 | keep_wgcna = T, 23 | keep_kern = T, 24 | keep_wcor = T, 25 | ... 26 | ) 27 | } 28 | \arguments{ 29 | \item{celltrek_inp}{CellTrek input on cell of interests} 30 | 31 | \item{approach}{Which approach to use? consensus clustering (cc) or weighted correlation network analysis (wgcna)} 32 | 33 | \item{keep_cc}{If TRUE, keep the cc model} 34 | 35 | \item{keep_wgcna}{If TRUE, keep the wgcna model} 36 | 37 | \item{...}{} 38 | } 39 | \value{ 40 | 41 | } 42 | \description{ 43 | SCoexp module 44 | } 45 | \examples{ 46 | scoexp(celltrek_inp, sigm=NULL, assay='RNA', gene_select=NULL, zero_cutoff=5, cor_method='spearman', approach=c('cc', 'wgcna')[1], maxK=8, k=8, avg_con_min=.5, avg_cor_min=.5, min_gen=20, max_gen=100, keep_cc=T, keep_wgcna=T, keep_kern=T, keep_wcor=T) 47 | } 48 | -------------------------------------------------------------------------------- /man/celltrek_dist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/celltrek.R 3 | \name{celltrek_dist} 4 | \alias{celltrek_dist} 5 | \title{Calculate the RF-distance between sc and st} 6 | \usage{ 7 | celltrek_dist( 8 | st_sc_int, 9 | int_assay = "traint", 10 | reduction = "pca", 11 | intp = T, 12 | intp_pnt = 10000, 13 | intp_lin = F, 14 | nPCs = 30, 15 | ntree = 1000, 16 | keep_model = T 17 | ) 18 | } 19 | \arguments{ 20 | \item{st_sc_int}{Seurat traint object} 21 | 22 | \item{int_assay}{Name of integration assay} 23 | 24 | \item{reduction}{Dimension reduction method used, usually pca} 25 | 26 | \item{intp}{If TRUE, do interpolation} 27 | 28 | \item{intp_pnt}{Interpolation point number} 29 | 30 | \item{intp_lin}{If TRUE, use linear interpolation} 31 | 32 | \item{nPCs}{Number of PCs used for CellTrek} 33 | 34 | \item{ntree}{Number of trees in random forest} 35 | 36 | \item{keep_model}{If TRUE, return the trained random forest model} 37 | } 38 | \value{ 39 | A list of 1. celltrek_distance matrix; 2. trained random forest model (optional) 40 | } 41 | \description{ 42 | Calculate the RF-distance between sc and st 43 | } 44 | \examples{ 45 | dist_test <- celltrek_dist(st_sc_int=st_sc_int, int_assay='traint', reduction='pca', intp = T, intp_pnt=10000, intp_lin=F, nPCs=30, ntree=1000, keep_model=T) 46 | } 47 | -------------------------------------------------------------------------------- /man/celltrek_from_dist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/celltrek.R 3 | \name{celltrek_from_dist} 4 | \alias{celltrek_from_dist} 5 | \title{CellTrek from a pre-computed SC-ST distance matrix} 6 | \usage{ 7 | celltrek_from_dist( 8 | dist_mat, 9 | coord_df, 10 | dist_cut, 11 | top_spot = 10, 12 | spot_n = 10, 13 | repel_r = 5, 14 | repel_iter = 10, 15 | sc_data, 16 | sc_assay = "RNA", 17 | st_data = NULL 18 | ) 19 | } 20 | \arguments{ 21 | \item{dist_mat}{Distance matrix of sc-st (sc in rows and st in columns)} 22 | 23 | \item{coord_df}{Coordinates data frame of st (must contain two columns as coord_x, coord_y and rownames as barcodes)} 24 | 25 | \item{dist_cut}{Distance cutoff} 26 | 27 | \item{top_spot}{Maximum number of spots that one cell can be charted} 28 | 29 | \item{spot_n}{Maximum number of cells that one spot can contain} 30 | 31 | \item{repel_r}{Repelling radius} 32 | 33 | \item{repel_iter}{Repelling iterations} 34 | 35 | \item{sc_data}{SC data} 36 | 37 | \item{sc_assay}{SC assay} 38 | 39 | \item{st_data}{ST data, optional} 40 | } 41 | \value{ 42 | A list of 1.Seurat object 43 | } 44 | \description{ 45 | CellTrek from a pre-computed SC-ST distance matrix 46 | } 47 | \examples{ 48 | celltrek_res <- celltrek_from_dist(dist_mat, coord_df, dist_cut, top_spot=10, spot_n=10, r=NULL, sc_data, sc_assay='RNA') 49 | } 50 | -------------------------------------------------------------------------------- /man/traint.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/celltrek.R 3 | \name{traint} 4 | \alias{traint} 5 | \title{Co-embedding ST and SC data using Seurat transfer anchors} 6 | \usage{ 7 | traint( 8 | st_data, 9 | sc_data, 10 | st_assay = "Spatial", 11 | sc_assay = "scint", 12 | norm = "LogNormalize", 13 | nfeatures = 2000, 14 | cell_names = "cell_names", 15 | coord_xy = c("imagerow", "imagecol"), 16 | gene_kept = NULL, 17 | ... 18 | ) 19 | } 20 | \arguments{ 21 | \item{st_data}{Seurat ST data object} 22 | 23 | \item{sc_data}{Seurat SC data object} 24 | 25 | \item{st_assay}{ST assay} 26 | 27 | \item{sc_assay}{SC assay} 28 | 29 | \item{norm}{normalization method: LogNormalize/SCTransform} 30 | 31 | \item{nfeatures}{number of features for integration} 32 | 33 | \item{cell_names}{cell cluster/type column name in SC meta data} 34 | 35 | \item{coord_xy}{coordinates column names in ST images slot} 36 | 37 | \item{gene_kept}{selected genes to be kept during integration} 38 | } 39 | \value{ 40 | Seurat object of SC-ST co-embedding 41 | } 42 | \description{ 43 | Co-embedding ST and SC data using Seurat transfer anchors 44 | } 45 | \examples{ 46 | st_sc_traint <- traint(st_data=brain_st, sc_data=brain_sc, st_assay='Spatial', sc_assay='scint', nfeatures=2000, cell_names='cell_names', coord_xy=c('imagerow', 'imagecol'), gene_kept=NULL) 47 | } 48 | -------------------------------------------------------------------------------- /R/FindCorMarkers.R: -------------------------------------------------------------------------------- 1 | #' Find highly correlated markers to target features 2 | #' 3 | #' @param srt_inp Seurat input 4 | #' @param assay 5 | #' @param features Features that genes correlated with 6 | #' @param method Correlation method 7 | #' 8 | #' @return 9 | #' @export 10 | #' 11 | #' @import magrittr 12 | #' 13 | #' @examples CorMarkers <- FindCorMarkers(srt_inp=test_seurat, assay='RNA', features=c('k_dist', 'sig_score'), method='spearman') 14 | FindCorMarkers <- function(srt_inp, assay='RNA', features, method='spearman') { 15 | exp_dat <- as.matrix(srt_inp[[assay]]@data) 16 | output_df <- data.frame(p_val=numeric(), cor=numeric(), p_val_adj=numeric(), feature=character(), gene=character()) 17 | if (all(features %in% colnames(srt_inp@meta.data))) 18 | for (i in 1:length(features)) { 19 | feature_i <- features[i] 20 | cat('Calculating feature:', feature_i, '... \n') 21 | feature_dat <- srt_inp@meta.data[, feature_i] 22 | cor_test <- apply(exp_dat, 1, function(row_x) { 23 | cor_test_temp=cor.test(row_x, feature_dat, na.action='na.omit', method=method) 24 | c(p_val=as.numeric(cor_test_temp$p.value), cor=as.numeric(cor_test_temp$estimate)) 25 | }) %>% t %>% data.frame 26 | cor_test$p_val_adj <- p.adjust(cor_test$p_val, method='bonferroni') 27 | cor_test$feature <- feature_i 28 | cor_test$gene <- gsub('(.*)\\..*', '\\1', rownames(exp_dat)) 29 | output_df <- rbind(output_df, cor_test) 30 | } 31 | output_df %<>% na.omit 32 | output_df 33 | } 34 | -------------------------------------------------------------------------------- /man/KL_boot_mst.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scoloc.R 3 | \name{KL_boot_mst} 4 | \alias{KL_boot_mst} 5 | \title{Title} 6 | \usage{ 7 | KL_boot_mst( 8 | dummy_df, 9 | coord_df, 10 | min_num = 15, 11 | Xlim = range(coord_df[, 1]), 12 | Ylim = range(coord_df[, 2]), 13 | boot_n = 100, 14 | prop = 0.8, 15 | replace = T, 16 | h = 150, 17 | n = 25, 18 | tot_norm = T, 19 | eps = 1e-20 20 | ) 21 | } 22 | \arguments{ 23 | \item{dummy_df}{Cell type dummy df} 24 | 25 | \item{coord_df}{Coordinates df} 26 | 27 | \item{min_num}{For cells number < min_num, add 1 till min_num} 28 | 29 | \item{Xlim}{The limits of X-axis} 30 | 31 | \item{Ylim}{The limits of Y-axis} 32 | 33 | \item{boot_n}{Number of bootstrapping iterations} 34 | 35 | \item{prop}{Subsample proportion} 36 | 37 | \item{replace}{should sampling be with replacement?} 38 | 39 | \item{h}{Bandwidths for x and y directions, more details in kde2d function in MASS package} 40 | 41 | \item{n}{Number of grid points in each directions, more details in kde2d function in MASS package} 42 | 43 | \item{tot_norm}{Normalization by total?} 44 | 45 | \item{eps}{Small value when calculate KL-divergence} 46 | } 47 | \value{ 48 | A list of 1. Bootstrap KL-divergence; 2. MST consensus matrix 49 | } 50 | \description{ 51 | Title 52 | } 53 | \examples{ 54 | boot_mst_res <- KL_boot_mst(dummy_df=cell_type_dummy, coord_df=range(coord_df$X), Xlim=range(coord_df$Y), boot_n=100, prop=0.8, h=150, n=25, tot_norm=T, eps=1e-20) 55 | } 56 | -------------------------------------------------------------------------------- /man/celltrek.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/celltrek.R 3 | \name{celltrek} 4 | \alias{celltrek} 5 | \title{The core function of CellTrek} 6 | \usage{ 7 | celltrek( 8 | st_sc_int, 9 | int_assay = "traint", 10 | sc_data = NULL, 11 | sc_assay = "RNA", 12 | reduction = "pca", 13 | intp = T, 14 | intp_pnt = 10000, 15 | intp_lin = F, 16 | nPCs = 30, 17 | ntree = 1000, 18 | dist_thresh = 0.4, 19 | top_spot = 10, 20 | spot_n = 10, 21 | repel_r = 5, 22 | repel_iter = 10, 23 | keep_model = F, 24 | ... 25 | ) 26 | } 27 | \arguments{ 28 | \item{st_sc_int}{Seurat traint object} 29 | 30 | \item{int_assay}{Integration assay ('traint')} 31 | 32 | \item{sc_data}{SC data, optional} 33 | 34 | \item{sc_assay}{SC assay} 35 | 36 | \item{reduction}{Dimension reduction method, usually 'pca'} 37 | 38 | \item{intp}{If True, do interpolation} 39 | 40 | \item{intp_pnt}{Number of interpolation points} 41 | 42 | \item{intp_lin}{If Ture, do linear interpolation} 43 | 44 | \item{nPCs}{Number of PCs} 45 | 46 | \item{ntree}{Number of Trees} 47 | 48 | \item{dist_thresh}{Distance threshold} 49 | 50 | \item{top_spot}{Maximum number of spots that one cell can be charted} 51 | 52 | \item{spot_n}{Maximum number of cells that one spot can contain} 53 | 54 | \item{repel_r}{Repelling radius} 55 | 56 | \item{repel_iter}{Repelling iterations} 57 | 58 | \item{keep_model}{If TRUE, return the trained random forest model} 59 | 60 | \item{...}{} 61 | } 62 | \value{ 63 | Seurat object 64 | } 65 | \description{ 66 | The core function of CellTrek 67 | } 68 | \examples{ 69 | celltrek_res <- celltrek(st_sc_int, int_assay='traint', sc_data=NULL, sc_assay='RNA', reduction='pca', intp=T, intp_pnt=10000, intp_lin=F, nPCs=30, ntree=1000, dist_thresh=.4, top_spot=10, spot_n=10, r=NULL, keep_model=F, ...) 70 | } 71 | -------------------------------------------------------------------------------- /R/scoloc_vis.R: -------------------------------------------------------------------------------- 1 | #' Visualization of SColoc results 2 | #' 3 | #' @param adj_mat Adjacent matrix 4 | #' @param meta_data Optional, must contain id column match with adj_mat col/rownames 5 | #' @param directed Generate a directed graph? (Default is false) 6 | #' 7 | #' @return shiny app 8 | #' @export 9 | #' 10 | #' @import reshape2 11 | #' @import magrittr 12 | #' @import shiny 13 | #' @import dplyr 14 | #' @import ggpubr 15 | #' @import visNetwork 16 | #' 17 | #' @examples scoloc_vis(scoloc_test) 18 | scoloc_vis <- function(adj_mat, meta_data=NULL, directed=F) { 19 | mst_cons_am <- adj_mat 20 | mst_cons_node <- data.frame(id=rownames(mst_cons_am), label=rownames(mst_cons_am)) 21 | 22 | if (!directed) mst_cons_am[upper.tri(mst_cons_am, diag = T)] <- NA 23 | 24 | mst_cons_am <- data.frame(id=rownames(mst_cons_am), mst_cons_am, check.names=F) 25 | mst_cons_edge <- reshape2::melt(mst_cons_am) %>% na.omit() %>% magrittr::set_colnames(c('from', 'to', 'value')) 26 | 27 | app <- list( 28 | ui=fluidPage( 29 | sidebarLayout( 30 | sidebarPanel( 31 | sliderInput('edge_val', 'Edge Value Cutoff', 32 | min=round(range(mst_cons_edge$value)[1], 2), max=round(range(mst_cons_edge$value)[2], 2), value=round(range(mst_cons_edge$value)[1], 2), step=diff(round(range(mst_cons_edge$value), 2))/100), 33 | selectInput(inputId='node_col', label='Color', choices=c('None', colnames(meta_data)), selected='None'), 34 | selectInput(inputId='node_size', label='Size', choices=c('None', colnames(meta_data)), selected='None'), 35 | checkboxInput(inputId='smooth', label='Smooth', value=FALSE), 36 | checkboxInput(inputId='physics', label='Physics', value=FALSE), 37 | numericInput('mass', 'Mass', value=.5, step=.01), 38 | sliderInput('fontsize', 'FontSize', value=15, min=5, max=25), 39 | tags$hr(), 40 | actionButton('StopID', 'Stop') 41 | ), 42 | mainPanel(visNetworkOutput("network", height = '800px')) 43 | ) 44 | ), 45 | server=function(input, output) { 46 | 47 | observeEvent(input$StopID, { 48 | stopApp() 49 | }) 50 | 51 | output$network <- renderVisNetwork({ 52 | if (!is.null(meta_data)) { 53 | if (input$node_col=='color') { 54 | col_colmn <- as.character(input$node_col) 55 | col_df <- data.frame(id=meta_data$id, color=meta_data[, col_colmn]) 56 | mst_cons_node <- dplyr::left_join(mst_cons_node, col_df, by = "id") %>% data.frame 57 | } else if (input$node_col!='None') { 58 | col_colmn <- as.character(input$node_col) 59 | col_df <- data.frame(id=meta_data$id, col_=meta_data[, col_colmn]) 60 | node_cols <- ggpubr::get_palette('Set1', length(unique(col_df$col_))) 61 | names(node_cols) <- unique(col_df$col_) 62 | col_df$color <- node_cols[col_df$col_] 63 | mst_cons_node <- dplyr::left_join(mst_cons_node, col_df, by = "id") %>% data.frame 64 | } 65 | if (input$node_size!='None') { 66 | size_colmn <- as.character(input$node_size) 67 | size_df <- data.frame(id=meta_data$id, value=as.numeric(meta_data[, size_colmn])/sum(as.numeric(meta_data[, size_colmn]))) 68 | mst_cons_node <- dplyr::left_join(mst_cons_node, size_df, by = "id") %>% data.frame 69 | } 70 | } 71 | mst_cons_edge <- mst_cons_edge[mst_cons_edge$value > input$edge_val, ] 72 | visNetwork::visNetwork(mst_cons_node, mst_cons_edge) %>% 73 | visNetwork::visNodes(mass=input$mass, size=15, font = list(size=input$fontsize)) %>% 74 | visNetwork::visEdges(smooth=input$smooth, physics=input$physics, 75 | arrows=list(to=list(enabled=directed, scaleFactor=.2)), 76 | color=list(color='rgba(132, 132, 132, .5)')) 77 | }) 78 | } 79 | ) 80 | shiny::runApp(app) 81 | } 82 | 83 | -------------------------------------------------------------------------------- /R/celltrek_vis.R: -------------------------------------------------------------------------------- 1 | #' Convert RGB image to gray scale image 2 | #' 3 | #' @param img Image data with RGB-channels 4 | #' 5 | #' @return A gray scale image 6 | #' @export 7 | #' 8 | #' @examples img_gs <- img_gs(img_raw) 9 | img_gs <- function (img) { 10 | img_out <- img[,,1]+img[,,2]+img[,,3] 11 | img_out <- img_out/max(img_out) 12 | return(img_out) 13 | } 14 | 15 | #' Visualization of CellTrek results 16 | #' 17 | #' @param celltrek_df CellTrek output meta data, must contain coord_x and coord_y 18 | #' @param img Image data 19 | #' @param scale_fac Scale factor 20 | #' 21 | #' @return Shiny GUI 22 | #' @export 23 | #' 24 | #' @import shiny 25 | #' @import plotly 26 | #' @import RColorBrewer 27 | #' 28 | #' @examples celltrek_vis(test_celltrek@meta.data, test_celltrek@images[[1]]@image, scale_fac=test_celltrek@images[[1]]@scale.factors$lowres) 29 | celltrek_vis <- function(celltrek_df, img, scale_fac) { 30 | 31 | img_fact <- scale_fac 32 | img_temp <- img 33 | img_data <- celltrek_df 34 | img_data$coord_x_new=img_data$coord_y*img_fact 35 | img_data$coord_y_new=dim(img_temp)[1]-img_data$coord_x*img_fact 36 | if (!('id_new' %in% colnames(img_data))) img_data$id_new <- rownames(img_data) 37 | 38 | app <- list( 39 | ui=fluidPage( 40 | titlePanel('CellTrek visualization'), 41 | sidebarLayout(sidebarPanel(width=3, 42 | selectInput(inputId='color_inp', label='Color', choices=colnames(img_data), selected='None'), 43 | radioButtons(inputId='color_typ', label='Type', choices=c('Categorical', 'Continuous'), selected='Categorical'), 44 | selectInput(inputId='shape_inp', label='Shape', choices=c('None', colnames(img_data)), selected='None'), 45 | actionButton('Plot', 'Plot'), 46 | tags$hr(), 47 | textInput('colID', 'Add Type:'), 48 | actionButton('AddID', 'Add'), 49 | downloadButton('downloadData', 'Download'), 50 | tags$hr(), 51 | actionButton('StopID', 'Stop')), 52 | mainPanel(plotlyOutput('CellTrek', height='1000px', width='1200px'), 53 | dataTableOutput('Tab_temp'))) 54 | ), 55 | 56 | server=function(input, output, session) { 57 | options(warn=-1) 58 | data_react <- reactiveValues() 59 | data_react$DF <- data.frame(id_new=character(), coord_x=numeric(), coord_y=numeric(), add_col=character()) 60 | 61 | observeEvent(input$StopID, { 62 | stopApp() 63 | }) 64 | 65 | observeEvent(input$Plot, { 66 | color_var <- isolate(input$color_inp) 67 | type_var <- isolate(input$color_typ) 68 | shape_var <- isolate(input$shape_inp) 69 | 70 | if (type_var=='Categorical') { 71 | output$CellTrek <- renderPlotly({ 72 | img_data$color_var <- factor(img_data[, color_var]) 73 | 74 | if (shape_var=='None') {img_data$shape_var <- ''} 75 | else {img_data$shape_var <- factor(img_data[, shape_var])} 76 | 77 | if (length(levels(img_data$color_var))<=9) {pnt_colors <- brewer.pal(length(levels(img_data$color_var)), "Set1")} 78 | else {pnt_colors <- colorRampPalette(brewer.pal(9, "Set1"))(length(levels(img_data$color_var)))} 79 | 80 | plotly::plot_ly(d=img_data, x=~coord_x_new, y=~coord_y_new, customdata=~id_new, 81 | color=~color_var, type = 'scatter', mode = 'markers', text = ~color_var, symbol=~shape_var, 82 | colors=pnt_colors, 83 | marker = list( 84 | line = list(color = 'rgb(1, 1, 1)', width = .5), 85 | size=8, 86 | opacity=.8)) %>% 87 | plotly::layout( 88 | xaxis = list(range = c(0, dim(img_temp)[2]), showgrid = FALSE, showline = FALSE), 89 | yaxis = list(range = c(0, dim(img_temp)[1]), showgrid = FALSE, showline = FALSE), 90 | images = list(source = plotly::raster2uri(as.raster(img_temp)), x=0, y=0, 91 | sizex=dim(img_temp)[2], sizey=dim(img_temp)[1], 92 | xref = "x", yref = "y", xanchor = "left", yanchor = "bottom", layer = "below", sizing = "stretch")) 93 | })} 94 | if (type_var=='Continuous') { 95 | output$CellTrek <- renderPlotly({ 96 | img_data$color_var <- img_data[, color_var] 97 | if (shape_var=='None') {img_data$shape_var <- ''} 98 | else {img_data$shape_var <- factor(img_data[, shape_var])} 99 | plotly::plot_ly(d=img_data, x=~coord_x_new, y=~coord_y_new, customdata=~id_new, 100 | color=~color_var, type = 'scatter', mode = 'markers', text=~color_var, symbol=~shape_var, 101 | colors=c('#377EB8', 'white', '#E41A1C'), 102 | marker = list(line = list(color = 'rgb(1, 1, 1)', width = .5), size=8, opacity=.8)) %>% 103 | plotly::layout(xaxis = list(range = c(0, dim(img_temp)[2]), showgrid = FALSE, showline = FALSE), 104 | yaxis = list(range = c(0, dim(img_temp)[1]), showgrid = FALSE, showline = FALSE), 105 | images = list(source = plotly::raster2uri(as.raster(img_temp)), x=0, y=0, 106 | sizex=dim(img_temp)[2], sizey=dim(img_temp)[1], 107 | xref = "x", yref = "y", xanchor = "left", yanchor = "bottom", layer = "below", sizing = "stretch")) 108 | })} 109 | }) 110 | 111 | observeEvent(input$AddID, { 112 | tab_temp <- event_data('plotly_selected') 113 | if (!is.null(tab_temp)) { 114 | data_temp <- img_data[match(tab_temp$customdata, img_data$id_new), c('id_new', 'coord_x', 'coord_y')] 115 | data_temp$add_col <- input$colID 116 | data_react$DF <- bind_rows(data_react$DF, data_temp) 117 | output$Tab_temp <- renderDataTable(data_react$DF) 118 | } 119 | }) 120 | output$downloadData <- downloadHandler( 121 | filename = function() { 122 | paste("changemyname.csv", sep = ",") 123 | }, 124 | content = function(file) { 125 | write.csv(data_react$DF, file, row.names = F) 126 | } 127 | ) 128 | } 129 | ) 130 | shiny::runApp(app) 131 | } 132 | 133 | -------------------------------------------------------------------------------- /R/scoexp.R: -------------------------------------------------------------------------------- 1 | #' Radial basis function kernel 2 | #' 3 | #' @param dis_mat Distance matrix 4 | #' @param sigm Width of rbfk 5 | #' @param zero_diag 6 | #' 7 | #' @return 8 | #' 9 | #' @examples rbfk(dis_mat, sigm, zero_diag=F) 10 | rbfk <- function (dis_mat, sigm, zero_diag=T) { 11 | rbfk_out <- exp(-1*(dis_mat^2)/(2*sigm^2)) 12 | if (zero_diag) diag(rbfk_out) <- 0 13 | return(rbfk_out) 14 | } 15 | 16 | #' Weighted cross correlation 17 | #' 18 | #' @param X Expression matrix, n X p 19 | #' @param W Weight matrix, n X n 20 | #' @param method Correlation method, pearson or spearman 21 | #' @param na_zero Na to zero 22 | #' 23 | #' @return Weighted correlation matrix, p X p 24 | #' @export 25 | #' 26 | #' @import magrittr 27 | #' 28 | #' @examples wcor(X=expr_test, W=rbfk_out, method='spearman') 29 | wcor <- function(X, W, method=c('pearson', 'spearman')[1], na_zero=T) { 30 | if (method=='spearman') X <- apply(X, 2, rank) 31 | X <- scale(X) 32 | X[is.nan(X)] <- NA 33 | W_cov_temp <- (t(X) %*% W %*% X) 34 | W_diag_mat <- sqrt(diag(W_cov_temp) %*% t(diag(W_cov_temp))) 35 | cor_mat <- W_cov_temp/W_diag_mat 36 | if (na_zero) cor_mat[which(is.na(cor_mat), arr.ind=T)] <- 0 37 | cor_mat 38 | } 39 | 40 | #' Remove low correlation genes until reaching threshold 41 | #' 42 | #' @param cor_mat 43 | #' @param ave_cor_cut 44 | #' @param min_n 45 | #' @param max_n 46 | #' @param na_diag 47 | #' 48 | #' @return 49 | #' 50 | #' 51 | #' @examples cor_remove(cor_mat, ave_cor_cut = 0.5, min_n=10, max_n=100, na_diag=F) 52 | cor_remove <- function (cor_mat, ave_cor_cut = 0.5, min_n=5, max_n=100, na_diag=F) { 53 | if (na_diag) { 54 | diag(cor_mat) <- NA 55 | } 56 | cor_mat_temp <- cor_mat 57 | cor_ave <- rowMeans(cor_mat_temp, na.rm = T) 58 | cor_max <- max(cor_mat_temp, na.rm = T) 59 | if ((cor_max < ave_cor_cut)|(nrow(cor_mat_temp)=max_n) { 66 | cor_mat_temp <- cor_mat_temp[-cor_min_idx, -cor_min_idx] 67 | cor_ave <- rowMeans(cor_mat_temp, na.rm = T) 68 | cor_min <- min(cor_ave) 69 | cor_min_idx <- which.min(cor_ave) 70 | idx <- idx + 1 71 | } 72 | } 73 | return (cor_mat_temp) 74 | } 75 | 76 | #' A wrapper function of ConensusClusterPlus with some default parameters 77 | #' 78 | #' @param d 79 | #' @param maxK 80 | #' @param reps 81 | #' @param distance 82 | #' @param verbose 83 | #' @param ... 84 | #' 85 | #' @return 86 | #' @export 87 | #' 88 | #' 89 | #' @examples cc_wrapper(d, maxK=8, reps=20, distance='spearman', verbose=T) 90 | cc_wrapper <- function(d, maxK=8, reps=20, distance='spearman', verbose=F, plot='png', ...) { 91 | cc_output <- ConsensusClusterPlus::ConsensusClusterPlus(d, maxK=maxK, reps=reps, distance=distance, verbose=verbose, plot=plot, ...) 92 | return(cc_output) 93 | } 94 | 95 | #' Consensus gene module selection 96 | #' 97 | #' @param cc_res 98 | #' @param cor_mat 99 | #' @param k 100 | #' @param avg_con_min 101 | #' @param avg_cor_min 102 | #' @param min_gen 103 | #' @param max_gen 104 | #' 105 | #' @return 106 | #' 107 | #' @import magrittr 108 | #' 109 | #' @examples cc_gene_k(cc_res, cor_mat, k=8, avg_con_min=.5, avg_cor_min=.5, min_gen=20, max_gen=100) 110 | cc_gene_k <- function(cc_res, cor_mat, k=8, avg_con_min=.5, avg_cor_min=.5, min_gen=10, max_gen=100) { 111 | cc_res_k <- cc_res[[k]] 112 | con_k <- cc_res_k$consensusMatrix %>% magrittr::set_rownames(names(cc_res_k$consensusClass)) %>% magrittr::set_colnames(names(cc_res_k$consensusClass)) 113 | res <- list() 114 | for (k_i in 1:k) { 115 | cat(k_i, ' ') 116 | gene_k <- names(cc_res_k$consensusClass)[cc_res_k$consensusClass==k_i] 117 | cor_mat_temp <- cor_mat[gene_k, gene_k, drop=F] 118 | cor_mat_temp <- cor_remove(cor_mat_temp, max_n=1000, min_n=1, ave_cor_cut=avg_cor_min) 119 | gene_k <- rownames(cor_mat_temp) 120 | if (is.null(gene_k)) gene_k <- NA 121 | con_mat_temp <- con_k[gene_k, gene_k, drop=F] 122 | con_mat_temp <- cor_remove(con_mat_temp, max_n=max_gen, min_n=1, ave_cor_cut=avg_con_min) 123 | gene_k <- rownames(con_mat_temp) 124 | if (is.null(gene_k)) gene_k <- NA 125 | res[[k_i]] <- gene_k 126 | } 127 | names(res) <- paste0('k', 1:k) 128 | res <- res[sapply(res, function(x) length(x)>=min_gen)] 129 | res 130 | } 131 | 132 | #' A wrapper function of WGCNA with wcor matrix as input 133 | #' 134 | #' @param sim_mat similarity matrix 135 | #' @param powerVector power vector 136 | #' @param minClusterSize 137 | #' @param ... 138 | #' 139 | #' @return 140 | #' @export 141 | #' 142 | #' @import purrr 143 | #' @import fastcluster 144 | #' @import dynamicTreeCut 145 | #' 146 | #' @examples wgcna_wrapper(sim_mat, powerVector=c(1:20), minClusterSize=50) 147 | wgcna_wrapper <- function(sim_mat, powerVector=c(1:20), minClusterSize=50, ...) { 148 | res <- list() 149 | ## map to [0, 1] and symmetric ## 150 | sim_mat <- sim_mat - min(sim_mat, na.rm = T) 151 | sim_mat <- sim_mat/max(sim_mat, na.rm = T) 152 | sim_mat <- (sim_mat + t(sim_mat))/2 153 | sft_thresh <- WGCNA::pickSoftThreshold.fromSimilarity(sim_mat, powerVector=powerVector) 154 | pw <- sft_thresh$powerEstimate 155 | res$sft_thresh <- sft_thresh 156 | disTom <- 1 - (WGCNA::adjacency.fromSimilarity(sim_mat, power=pw) %>% WGCNA::TOMsimilarity(.)) 157 | geneTree <- fastcluster::hclust(as.dist(disTom), method = 'average') 158 | dynamicMods_cut <- dynamicTreeCut::cutreeDynamic(dendro = geneTree, method="tree", minClusterSize=minClusterSize) 159 | gene_vec <- dynamicMods_cut %>% purrr::set_names(rownames(sim_mat)) 160 | gene_vec <- gene_vec[gene_vec!=0] 161 | res$gv <- gene_vec 162 | return(res) 163 | } 164 | 165 | 166 | #' 167 | #' 168 | #' @param wgcna_res 169 | #' @param cor_mat 170 | #' @param avg_cor_min 171 | #' @param min_gen 172 | #' @param max_gen 173 | #' 174 | #' @return 175 | #' 176 | #' @examples wgcna_gene_k(wgcna_res, cor_mat, avg_cor_min=.5, min_gen=10, max_gen=100) 177 | wgcna_gene_k <- function(wgcna_res, cor_mat, avg_cor_min=.5, min_gen=10, max_gen=100) { 178 | res <- list() 179 | gene_vec <- wgcna_res$gv 180 | for (k_i in sort(unique(gene_vec))) { 181 | cat(k_i, ' ') 182 | gene_k <- names(gene_vec)[gene_vec==k_i] 183 | cor_mat_temp <- cor_mat[gene_k, gene_k, drop=F] 184 | cor_mat_temp <- cor_remove(cor_mat_temp, max_n=max_gen, min_n=1, ave_cor_cut=avg_cor_min) 185 | gene_k <- rownames(cor_mat_temp) 186 | if (is.null(gene_k)) gene_k <- NA 187 | res[[k_i]] <- gene_k 188 | } 189 | names(res) <- paste0('k', sort(unique(gene_vec))) 190 | res <- res[sapply(res, function(x) length(x)>=min_gen)] 191 | res 192 | } 193 | 194 | 195 | #' SCoexp module 196 | #' 197 | #' @param celltrek_inp CellTrek input on cell of interests 198 | #' @param sigm 199 | #' @param assay 200 | #' @param gene_select 201 | #' @param zero_cutoff 202 | #' @param cor_method 203 | #' @param approach Which approach to use? consensus clustering (cc) or weighted correlation network analysis (wgcna) 204 | #' @param maxK 205 | #' @param k 206 | #' @param avg_con_min 207 | #' @param avg_cor_min 208 | #' @param min_gen 209 | #' @param max_gen 210 | #' @param keep_cc If TRUE, keep the cc model 211 | #' @param keep_wgcna If TRUE, keep the wgcna model 212 | #' @param keep_kern 213 | #' @param keep_wcor 214 | #' @param ... 215 | #' 216 | #' @return 217 | #' @export 218 | #' 219 | #' @import Seurat 220 | #' @import magrittr 221 | #' 222 | #' @examples scoexp(celltrek_inp, sigm=NULL, assay='RNA', gene_select=NULL, zero_cutoff=5, cor_method='spearman', approach=c('cc', 'wgcna')[1], maxK=8, k=8, avg_con_min=.5, avg_cor_min=.5, min_gen=20, max_gen=100, keep_cc=T, keep_wgcna=T, keep_kern=T, keep_wcor=T) 223 | scoexp <- function(celltrek_inp, sigm=NULL, assay='RNA', gene_select=NULL, zero_cutoff=5, cor_method='spearman', approach=c('cc', 'wgcna')[1], maxK=8, k=8, avg_con_min=.5, avg_cor_min=.5, min_gen=20, max_gen=100, keep_cc=T, keep_wgcna=T, keep_kern=T, keep_wcor=T, ...) { 224 | if (!all(c('coord_x', 'coord_y') %in% colnames(celltrek_inp@meta.data))) stop('coord_x and coord_y not detected in the metadata') 225 | if (is.null(sigm)) sigm <- celltrek_inp@images[[1]]@scale.factors$spot_dis 226 | if (is.null(gene_select)) { 227 | cat('gene filtering...\n') 228 | feature_nz <- apply(celltrek_inp[[assay]]@data, 1, function(x) mean(x!=0)*100) 229 | features <- names(feature_nz)[feature_nz > zero_cutoff] 230 | cat(length(features), 'features after filtering...\n') 231 | } else if (length(gene_select) > 1) { 232 | features <- intersect(gene_select, rownames(celltrek_inp[[assay]]@data)) 233 | if (length(features)==0) stop('No genes in gene_select detected') 234 | } 235 | celltrek_inp <- Seurat::ScaleData(celltrek_inp, features=features) 236 | res <- list(gs=c(), cc=c(), rbfk=c(), wcor=c()) 237 | dist_mat <- dist(celltrek_inp@meta.data[, c('coord_x', 'coord_y')]) %>% as.matrix 238 | kern_mat <- rbfk(dist_mat, sigm=sigm, zero_diag=F) 239 | expr_mat <- t(as.matrix(celltrek_inp[[assay]]@scale.data)) 240 | cat('Calculating spatial-weighted cross-correlation...\n') 241 | wcor_mat <- wcor(X=expr_mat, W=kern_mat, method=cor_method) 242 | 243 | if (approach=='cc') { 244 | wcor_dis <- as.dist(1-wcor_mat) 245 | cat('Consensus clustering...\n') 246 | cc_res <- cc_wrapper(d=wcor_dis, maxK=maxK, ...) 247 | cons_mat <- cc_res[[k]]$consensusMatrix %>% data.frame 248 | colnames(cons_mat) <- rownames(cons_mat) <- rownames(wcor_mat) 249 | cat('Gene module detecting...\n') 250 | K_gl <- cc_gene_k(cc_res=cc_res, cor_mat=wcor_mat, k=k, avg_con_min=avg_con_min, avg_cor_min=avg_cor_min, min_gen=min_gen, max_gen=max_gen) 251 | res$gs <- K_gl 252 | if (keep_cc) res$cc <- cc_res 253 | if (keep_kern) res$rbfk <- kern_mat 254 | if (keep_wcor) res$wcor <- wcor_mat 255 | } else if (approach=='wgcna') { 256 | cat('WGCNA...\n') 257 | wgcna_res <- wgcna_wrapper(sim_mat=wcor_mat, minClusterSize=min_gen, ...) 258 | K_gl <- wgcna_gene_k(wgcna_res, cor_mat=wcor_mat, avg_cor_min=avg_cor_min, min_gen=min_gen, max_gen=max_gen) 259 | res$gs <- K_gl 260 | if (keep_wgcna) res$wgcna <- wgcna_res 261 | if (keep_kern) res$rbfk <- kern_mat 262 | if (keep_wcor) res$wcor <- wcor_mat 263 | } 264 | return(res) 265 | } 266 | 267 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # A Quick Tour of CellTrek Toolkit 2 | 3 | ## 1. Introduction and installation 4 | CellTrek is a computational framework that can directly map single cells back to their spatial coordinates in tissue sections based on scRNA-seq and ST data. This method provides a new paradigm that is distinct from ST deconvolution, enabling a more flexible and direct investigation of single cell data with spatial topography. The CellTrek toolkit also provides two downstream analysis modules, including SColoc for spatial colocalization analysis and SCoexp for spatial co-expression analysis. 5 | 6 | In this tutorial, we will demonstrate the cell charting workflow based on the mouse brain data as part of our paper Figure 2 7 | ``` r 8 | library(devtools) 9 | install_github("navinlabcode/CellTrek") 10 | ``` 11 | ## 2. Loading the packages and datasets (scRNA-seq and ST data) 12 | We start by loading the packages needed for the analyses. Please install them if you haven't. 13 | ``` r 14 | options(stringsAsFactors = F) 15 | library("CellTrek") 16 | library("dplyr") 17 | library("Seurat") 18 | library("viridis") 19 | library("ConsensusClusterPlus") 20 | 21 | ``` 22 | We then load mouse brain scRNA-seq and ST data, respectively. For ST data, we only used the frontal cortex region for this study. For scRNA-seq data, if you are running the code on a personal laptop, you may need to subset the scRNA-seq data to hundreds of cells since it will cost several minutes for using the whole scRNA-seq data in the CellTrek step. 23 | 24 | You can download the scRNA-seq data here: https://www.dropbox.com/s/ruseq3necn176c7/brain_sc.rds?dl=0 25 | 26 | You can download the ST data here: https://www.dropbox.com/s/azjysbt7lbpmbew/brain_st_cortex.rds?dl=0 27 | ``` r 28 | brain_st_cortex <- readRDS("brain_st_cortex.rds") 29 | brain_sc <- readRDS("brain_sc.rds") 30 | 31 | ## Rename the cells/spots with syntactically valid names 32 | brain_st_cortex <- RenameCells(brain_st_cortex, new.names=make.names(Cells(brain_st_cortex))) 33 | brain_sc <- RenameCells(brain_sc, new.names=make.names(Cells(brain_sc))) 34 | 35 | ## Visualize the ST data 36 | SpatialDimPlot(brain_st_cortex) 37 | ``` 38 | ![](vignette_files/F1_ST_spatialdimplot.png) 39 | 40 | ``` r 41 | ## Visualize the scRNA-seq data 42 | DimPlot(brain_sc, label = T, label.size = 4.5) 43 | ``` 44 | ![](vignette_files/F2_SC_dimplot.png) 45 | 46 | ## 3. Cell charting using CellTrek 47 | We first co-embed ST and scRNA-seq datasets using *traint* 48 | ``` r 49 | brain_traint <- CellTrek::traint(st_data=brain_st_cortex, sc_data=brain_sc, sc_assay='RNA', cell_names='cell_type') 50 | ``` 51 | ``` r 52 | ## Finding transfer anchors... 53 | ## Using 2000 features for integration... 54 | ## Data transfering... 55 | ## Creating new Seurat object... 56 | ## Scaling -> PCA -> UMAP... 57 | ``` 58 | ``` r 59 | ## We can check the co-embedding result to see if there is overlap between these two data modalities 60 | DimPlot(brain_traint, group.by = "type") 61 | ``` 62 | ![](vignette_files/F3_scst_coembed.png) 63 | After coembedding, we can chart single cells to their spatial locations. Here, we use the non-linear interpolation (intp = T, intp_lin=F) approach to augment the ST spots. 64 | ``` r 65 | brain_celltrek <- CellTrek::celltrek(st_sc_int=brain_traint, int_assay='traint', sc_data=brain_sc, sc_assay = 'RNA', 66 | reduction='pca', intp=T, intp_pnt=5000, intp_lin=F, nPCs=30, ntree=1000, 67 | dist_thresh=0.55, top_spot=5, spot_n=5, repel_r=20, repel_iter=20, keep_model=T)$celltrek 68 | ``` 69 | ``` r 70 | ## Distance between spots is: 138 71 | ## Interpolating... 72 | ## Random Forest training... 73 | ## Random Forest prediction... 74 | ## Making distance matrix... 75 | ## Making graph... 76 | ## Pruning graph... 77 | ## Spatial Charting SC data... 78 | ## Repelling points... 79 | ## Creating Seurat Object... 80 | ## sc data... 81 | ``` 82 | After cell charting, we can interactively visualize the CellTrek result using *celltrek_vis* 83 | ``` r 84 | brain_celltrek$cell_type <- factor(brain_celltrek$cell_type, levels=sort(unique(brain_celltrek$cell_type))) 85 | 86 | CellTrek::celltrek_vis(brain_celltrek@meta.data %>% dplyr::select(coord_x, coord_y, cell_type:id_new), 87 | brain_celltrek@images$anterior1@image, brain_celltrek@images$anterior1@scale.factors$lowres) 88 | ``` 89 | We select “cell_type” from the “Color” option and set “Categorical” from “Type” option. 90 | ![](vignette_files/F4_schart_vis.png) 91 | 92 | # 4. Cell colocalization analysis 93 | Based on the CellTrek result, we can summarize the colocalization patterns between different cell types using SColoc module. Here, we are using glutamatergic neuron cell types as an example (it is recommended to remove some cell types with very few cells, e.g., n<20). 94 | We first subset the glutamatergic neuron cell types from our charting result. 95 | ``` r 96 | glut_cell <- c('L2/3 IT', 'L4', 'L5 IT', 'L5 PT', 'NP', 'L6 IT', 'L6 CT', 'L6b') 97 | names(glut_cell) <- make.names(glut_cell) 98 | brain_celltrek_glut <- subset(brain_celltrek, subset=cell_type %in% glut_cell) 99 | brain_celltrek_glut$cell_type <- factor(brain_celltrek_glut$cell_type, levels=glut_cell) 100 | ``` 101 | Then we can use scoloc module to perform colocalization analysis. 102 | ``` r 103 | brain_sgraph_KL <- CellTrek::scoloc(brain_celltrek_glut, col_cell='cell_type', use_method='KL', eps=1e-50) 104 | ``` 105 | ``` r 106 | ## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 107 | ``` 108 | ``` r 109 | ## We extract the minimum spanning tree (MST) result from the graph 110 | brain_sgraph_KL_mst_cons <- brain_sgraph_KL$mst_cons 111 | rownames(brain_sgraph_KL_mst_cons) <- colnames(brain_sgraph_KL_mst_cons) <- glut_cell[colnames(brain_sgraph_KL_mst_cons)] 112 | ## We then extract the metadata (including cell types and their frequencies) 113 | brain_cell_class <- brain_celltrek@meta.data %>% dplyr::select(id=cell_type) %>% unique 114 | brain_celltrek_count <- data.frame(freq = table(brain_celltrek$cell_type)) 115 | brain_cell_class_new <- merge(brain_cell_class, brain_celltrek_count, by.x ="id", by.y = "freq.Var1") 116 | ``` 117 | Next, we can visualize the colocalization result. Feel free to adjust the edge value cutoff. 118 | ``` r 119 | CellTrek::scoloc_vis(brain_sgraph_KL_mst_cons, meta_data=brain_cell_class) 120 | ``` 121 | ![](vignette_files/F5_scoloc_vis_updated.png) 122 | # 5. Spatial-weighted gene co-expression analysis within the cell type of interest 123 | Based on the CellTrek result, we can further investigate the co-expression patterns within the cell type of interest using SCoexp module. Here, we will take L5 IT cells as an example using consensus clustering (CC) method. 124 | L5 IT cells first are extracted from the charting result. 125 | ``` r 126 | brain_celltrek_l5 <- subset(brain_celltrek, subset=cell_type=='L5 IT') 127 | brain_celltrek_l5@assays$RNA@scale.data <- matrix(NA, 1, 1) 128 | brain_celltrek_l5$cluster <- gsub('L5 IT VISp ', '', brain_celltrek_l5$cluster) 129 | DimPlot(brain_celltrek_l5, group.by = 'cluster') 130 | ``` 131 | ![](vignette_files/F6_L5IT.png) 132 | We select top 2000 variable genes (exclude mitochondrial, ribosomal and high-zero genes) 133 | ``` r 134 | brain_celltrek_l5 <- FindVariableFeatures(brain_celltrek_l5) 135 | vst_df <- brain_celltrek_l5@assays$RNA@meta.features %>% data.frame %>% mutate(id=rownames(.)) 136 | nz_test <- apply(as.matrix(brain_celltrek_l5[['RNA']]@data), 1, function(x) mean(x!=0)*100) 137 | hz_gene <- names(nz_test)[nz_test<20] 138 | mt_gene <- grep('^Mt-', rownames(brain_celltrek_l5), value=T) 139 | rp_gene <- grep('^Rpl|^Rps', rownames(brain_celltrek_l5), value=T) 140 | vst_df <- vst_df %>% dplyr::filter(!(id %in% c(mt_gene, rp_gene, hz_gene))) %>% arrange(., -vst.variance.standardized) 141 | feature_temp <- vst_df$id[1:2000] 142 | ``` 143 | We use scoexp to do the spatial-weighted gene co-expression analysis. 144 | ``` r 145 | brain_celltrek_l5_scoexp_res_cc <- CellTrek::scoexp(celltrek_inp=brain_celltrek_l5, assay='RNA', approach='cc', gene_select = feature_temp, sigm=140, avg_cor_min=.4, zero_cutoff=3, min_gen=40, max_gen=400) 146 | ``` 147 | ``` r 148 | ## Calculating spatial-weighted cross-correlation... 149 | ## Consensus clustering... 150 | ## Gene module detecting... 151 | ## 1 2 3 4 5 6 7 8 152 | ``` 153 | We can visualize the co-expression modules using heatmap. 154 | ``` r 155 | brain_celltrek_l5_k <- rbind(data.frame(gene=c(brain_celltrek_l5_scoexp_res_cc$gs[[1]]), G='K1'), 156 | data.frame(gene=c(brain_celltrek_l5_scoexp_res_cc$gs[[2]]), G='K2')) %>% 157 | magrittr::set_rownames(.$gene) %>% dplyr::select(-1) 158 | pheatmap::pheatmap(brain_celltrek_l5_scoexp_res_cc$wcor[rownames(brain_celltrek_l5_k), rownames(brain_celltrek_l5_k)], 159 | clustering_method='ward.D2', annotation_row=brain_celltrek_l5_k, show_rownames=F, show_colnames=F, 160 | treeheight_row=10, treeheight_col=10, annotation_legend = T, fontsize=8, 161 | color=viridis(10), main='L5 IT spatial co-expression') 162 | ``` 163 | ![](vignette_files/F7_scoexp_heatmap.png) 164 | We identified two distinct modules. 165 | Based on our identified co-expression modules, we can calculated the module scores. 166 | ``` r 167 | brain_celltrek_l5 <- AddModuleScore(brain_celltrek_l5, features=brain_celltrek_l5_scoexp_res_cc$gs, name='CC_', nbin=10, ctrl=50, seed=42) 168 | ## First we look into the coexpression module based on the scRNA-seq embedding 169 | FeaturePlot(brain_celltrek_l5, grep('CC_', colnames(brain_celltrek_l5@meta.data), value=T)) 170 | ``` 171 | ![](vignette_files/F8_modulescore_umap.png) 172 | Next we investigate the module scores at the spatial level. 173 | ``` r 174 | SpatialFeaturePlot(brain_celltrek_l5, grep('CC_', colnames(brain_celltrek_l5@meta.data), value=T)) 175 | ``` 176 | ![](vignette_files/F9_modulescore_schart.png) 177 | ``` r 178 | sessionInfo() 179 | ``` 180 | ``` r 181 | ## R version 3.6.2 (2019-12-12) 182 | ## Platform: x86_64-pc-linux-gnu (64-bit) 183 | ## Running under: Red Hat Enterprise Linux 184 | ## 185 | ## Matrix products: default 186 | ## BLAS: /usr/lib64/R/lib/libRblas.so 187 | ## LAPACK: /usr/lib64/R/lib/libRlapack.so 188 | ## 189 | ## locale: 190 | ## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C 191 | ## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 192 | ## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 193 | ## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C 194 | ## [9] LC_ADDRESS=C LC_TELEPHONE=C 195 | ## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C 196 | ## 197 | ## attached base packages: 198 | ## [1] stats graphics grDevices utils datasets methods base 199 | ## 200 | ## other attached packages: 201 | ## [1] viridis_0.5.1 viridisLite_0.3.0 plotly_4.9.3 202 | ## [4] ggplot2_3.3.3 shiny_1.5.0 visNetwork_2.0.9 203 | ## [7] reshape2_1.4.4 hdf5r_1.3.0 SeuratData_0.2.1 204 | ## [10] Seurat_3.1.4.9904 spatstat_1.63-2 rpart_4.1-15 205 | ## [13] nlme_3.1-148 spatstat.data_2.1-0 pheatmap_1.0.12 206 | ## [16] dbscan_1.1-6 magrittr_2.0.1 dplyr_1.0.5 207 | ## 208 | ## loaded via a namespace (and not attached): 209 | ## [1] philentropy_0.4.0 Rtsne_0.15 210 | ## [3] colorspace_2.0-0 deldir_0.1-25 211 | ## [5] ellipsis_0.3.1 ggridges_0.5.3 212 | ## [7] farver_2.1.0 leiden_0.3.3 213 | ## [9] listenv_0.8.0 ggrepel_0.9.1 214 | ## [11] bit64_4.0.2 RSpectra_0.16-0 215 | ## [13] fansi_0.4.2 codetools_0.2-16 216 | ## [15] splines_3.6.2 knitr_1.31 217 | ## [17] polyclip_1.10-0 jsonlite_1.7.2 218 | ## [19] ica_1.0-2 cluster_2.1.0 219 | ## [21] png_0.1-7 uwot_0.1.8 220 | ## [23] data.tree_1.0.0 sctransform_0.2.1 221 | ## [25] DiagrammeR_1.0.6.1 compiler_3.6.2 222 | ## [27] httr_1.4.2 randomForestSRC_2.10.1 223 | ## [29] CellTrek_0.0.0.9000 assertthat_0.2.1 224 | ## [31] Matrix_1.2-18 fastmap_1.0.1 225 | ## [33] lazyeval_0.2.2 cli_2.0.2 226 | ## [35] later_1.1.0.1 htmltools_0.5.1.1 227 | ## [37] tools_3.6.2 rsvd_1.0.3 228 | ## [39] igraph_1.2.6 gtable_0.3.0 229 | ## [41] glue_1.4.2 RANN_2.6.1 230 | ## [43] rappdirs_0.3.3 Rcpp_1.0.6 231 | ## [45] Biobase_2.46.0 vctrs_0.3.7 232 | ## [47] debugme_1.1.0 ape_5.4 233 | ## [49] lmtest_0.9-38 xfun_0.22 234 | ## [51] stringr_1.4.0 globals_0.14.0 235 | ## [53] akima_0.6-2.1 mime_0.10 236 | ## [55] miniUI_0.1.1.1 lifecycle_1.0.0 237 | ## [57] irlba_2.3.3 goftest_1.2-2 238 | ## [59] future_1.21.0 packcircles_0.3.4 239 | ## [61] MASS_7.3-51.6 zoo_1.8-9 240 | ## [63] scales_1.1.1 promises_1.2.0.1 241 | ## [65] spatstat.utils_2.1-0 parallel_3.6.2 242 | ## [67] RColorBrewer_1.1-2 yaml_2.2.1 243 | ## [69] reticulate_1.16 pbapply_1.4-2 244 | ## [71] gridExtra_2.3 stringi_1.5.3 245 | ## [73] highr_0.8 BiocGenerics_0.32.0 246 | ## [75] rlang_0.4.10 pkgconfig_2.0.3 247 | ## [77] evaluate_0.14 lattice_0.20-41 248 | ## [79] ROCR_1.0-11 purrr_0.3.4 249 | ## [81] tensor_1.5 labeling_0.4.2 250 | ## [83] patchwork_1.0.1 htmlwidgets_1.5.3 251 | ## [85] cowplot_1.0.0 bit_4.0.4 252 | ## [87] tidyselect_1.1.0 parallelly_1.24.0 253 | ## [89] RcppAnnoy_0.0.18 plyr_1.8.6 254 | ## [91] R6_2.5.0 generics_0.1.0 255 | ## [93] DBI_1.1.0 withr_2.4.1 256 | ## [95] pillar_1.5.1 mgcv_1.8-31 257 | ## [97] fitdistrplus_1.1-1 sp_1.4-5 258 | ## [99] survival_3.2-3 abind_1.4-5 259 | ## [101] tibble_3.1.0 future.apply_1.7.0 260 | ## [103] tsne_0.1-3 crayon_1.4.1 261 | ## [105] KernSmooth_2.23-17 utf8_1.2.1 262 | ## [107] rmarkdown_2.3 grid_3.6.2 263 | ## [109] data.table_1.14.0 blob_1.2.1 264 | ## [111] ConsensusClusterPlus_1.50.0 digest_0.6.27 265 | ## [113] xtable_1.8-4 tidyr_1.1.3 266 | ## [115] httpuv_1.5.5 munsell_0.5.0 267 | ``` 268 | -------------------------------------------------------------------------------- /R/scoloc.R: -------------------------------------------------------------------------------- 1 | #' Title 2 | #' 3 | #' @param data_inp Cell-coordinates matrix 4 | #' @param col_cell Column name of cell type, cell type names must be syntactically valid 5 | #' 6 | #' @return dummy data frame with cells on columns 7 | #' 8 | #' @import dplyr 9 | #' @import tidyr 10 | #' @import magrittr 11 | #' 12 | #' @examples cell_dummy_df <- as_dummy_df(celltrek_df, col_cell='Cell') 13 | as_dummy_df <- function(data_inp, col_cell='cell_names') { 14 | cell_names <- sort(as.character(unique(data_inp[, col_cell]))) 15 | data_inp$id_add <- rownames(data_inp) 16 | data_inp$col_add <- data_inp[, col_cell] 17 | data_out <- data_inp %>% dplyr::select(id_add, col_add) %>% dplyr::mutate(value=1) %>% 18 | tidyr::pivot_wider(id_cols=id_add, names_from=col_add, values_fill=list(value=0)) %>% 19 | data.frame(check.names=F) 20 | data_out <- data_out[, c('id_add', cell_names)] 21 | ## Reorder cell names ## 22 | data_out <- dplyr::left_join(data_inp, data_out, by='id_add') %>% 23 | set_rownames(.$id_add) %>% dplyr::select(-c(id_add, col_add)) 24 | return(data_out) 25 | } 26 | 27 | #' Title 28 | #' 29 | #' @param x1 30 | #' @param x2 31 | #' 32 | #' @return 33 | #' 34 | #' @examples 35 | euc_dist <- function(x1, x2) {sqrt(sum((x1 - x2) ^ 2))} 36 | 37 | #' Title 38 | #' 39 | #' @param data Cell type dummy table 40 | #' @param coord Coordinates data with X and Y columns 41 | #' @param min_num For cells number < min_num, add 1 till min_num 42 | #' @param h Bandwidths for x and y directions, more details in kde2d function in MASS package 43 | #' @param n Number of grid points in each directions, more details in kde2d function in MASS package 44 | #' @param tot_norm Normalization by total? 45 | #' @param Xlim The limits of X-axis 46 | #' @param Ylim The limits of Y-axis 47 | #' 48 | #' @return Kenerl density on grid 49 | #' 50 | #' @import MASS 51 | #' @import magrittr 52 | #' @import reshape2 53 | #' 54 | #' 55 | #' @examples kern_res <- sp_grid_kern_bin(data=cell_dummy_df, coord=coord_df, h=150, n=25, tot_norm=TRUE, Xlim=range(coord_df$X), Ylim=range(coor_df$Y)) 56 | sp_grid_kern_bin <- function(data, coord, min_num=15, h, n=25, tot_norm=TRUE, Xlim=range(coord[, 1]), Ylim=range(coord[, 2])) { 57 | 58 | ## For cells with less than min_num, randomly add 1 ## 59 | data_ <- data 60 | if (min_num<0) min_num <- 1 61 | col_min <- which(colSums(data) < min_num) 62 | if (length(col_min) > 0) { 63 | for (i in 1:length(col_min)) { 64 | set.seed(i) 65 | data_[, col_min[i]][sample(which(data_[, col_min[i]]==0), min_num-sum(data_[, col_min[i]]))] <- 1 66 | } 67 | } 68 | 69 | # data_out_zero <- matrix(0, n^2, length(col_rm)) %>% data.frame %>% set_colnames(names(col_rm)) 70 | # if (length(col_rm)>0) { 71 | # data_ <- data[, -col_rm] 72 | # } else {data_ <- data} 73 | 74 | colnames(coord) <- paste0('X', 1:length(colnames(coord))) 75 | data_merge <- merge(coord, data_, by='row.names') 76 | 77 | data_out_list <- data_merge %>% dplyr::select(colnames(data_)) %>% 78 | apply(2, function(x) { 79 | k2d_temp <- MASS::kde2d(data_merge$X1[which(x==1)], data_merge$X2[which(x==1)], lims=c(Xlim, Ylim), h, n) %>% 80 | extract2('z') %>% reshape2::melt() %>% data.frame %>% set_colnames(c('grid_1', 'grid_2', 'Val')) 81 | if (tot_norm) {k2d_temp$Val <- k2d_temp$Val/sum(k2d_temp$Val)} 82 | k2d_temp 83 | }) 84 | 85 | ## Rename the Val column ## 86 | data_out_list <- lapply(1:length(data_out_list), function(i) { 87 | colnames(data_out_list[[i]])[3] <- names(data_out_list)[i] 88 | data_out_list[[i]] 89 | }) 90 | 91 | data_out <- Reduce(function(x, y) merge(x, y, by=c('grid_1', 'grid_2'), suffixes=c(colnames(x)[3], names(y))), data_out_list) 92 | data_out <- data_out[, c('grid_1', 'grid_2', colnames(data))] 93 | ## Keep the same cell order with input data ## 94 | data_out 95 | } 96 | 97 | 98 | #' Title 99 | #' 100 | #' @param X Density matrix 101 | #' @param eps Small value added 102 | #' 103 | #' @return KL-divergence matrix 104 | #' 105 | #' @import philentropy 106 | #' 107 | #' @examples KL_res <- KL_(X=X, eps=1e-20) 108 | KL_ <- function(X, eps=1e-20) { 109 | ## Need Suppress print-outs ## 110 | X_ <- X 111 | X_[X_% data.frame 113 | KL_D <- suppressMessages(philentropy::KL(t(X_), unit='log')) 114 | rownames(KL_D) <- colnames(KL_D) <- colnames(X_) 115 | KL_D[is.infinite(KL_D)] <- 650 116 | return(KL_D) 117 | } 118 | 119 | 120 | #' Title 121 | #' 122 | #' @param dummy_df Cell type dummy df 123 | #' @param coord_df Coordinates df 124 | #' @param min_num For cells number < min_num, add 1 till min_num 125 | #' @param Xlim The limits of X-axis 126 | #' @param Ylim The limits of Y-axis 127 | #' @param boot_n Number of bootstrapping iterations 128 | #' @param prop Subsample proportion 129 | #' @param h Bandwidths for x and y directions, more details in kde2d function in MASS package 130 | #' @param n Number of grid points in each directions, more details in kde2d function in MASS package 131 | #' @param tot_norm Normalization by total? 132 | #' @param eps Small value when calculate KL-divergence 133 | #' @param replace should sampling be with replacement? 134 | #' 135 | #' @return A list of 1. Bootstrap KL-divergence; 2. MST consensus matrix 136 | #' 137 | #' @import magrittr 138 | #' @import igraph 139 | #' 140 | #' @examples boot_mst_res <- KL_boot_mst(dummy_df=cell_type_dummy, coord_df=range(coord_df$X), Xlim=range(coord_df$Y), boot_n=100, prop=0.8, h=150, n=25, tot_norm=T, eps=1e-20) 141 | KL_boot_mst <- function(dummy_df, coord_df, min_num=15, Xlim=range(coord_df[, 1]), Ylim=range(coord_df[, 2]), boot_n=100, prop=0.8, replace=T, h=150, n=25, tot_norm=T, eps=1e-20) { 142 | n_smp <- nrow(dummy_df) 143 | n_typ <- ncol(dummy_df) 144 | 145 | dis_boot_array <- array(NA, dim=c(n_typ, n_typ, boot_n), 146 | dimnames=list(colnames(dummy_df), colnames(dummy_df), paste0('X_', c(1:boot_n)))) 147 | mst_cons <- matrix(0, n_typ, n_typ) %>% data.frame %>% magrittr::set_rownames(colnames(dummy_df)) %>% magrittr::set_colnames(colnames(dummy_df)) 148 | for (i in 1:boot_n) { 149 | cat(i, ' ') 150 | set.seed(i) 151 | ## Bootstrap ## 152 | idx <- sample(1:n_smp, round(n_smp*prop), replace=replace) 153 | data_boot <- dummy_df[idx, ] 154 | coord_boot <- coord_df[idx, ] 155 | k2d_boot <- sp_grid_kern_bin(data=data_boot, coord=coord_boot, min_num=min_num, Xlim=Xlim, Ylim=Ylim, h=h, n=n, tot_norm=tot_norm) 156 | dis_boot <- k2d_boot[, -c(1, 2)] %>% KL_(., eps=eps) 157 | dis_boot_array[, , i] <- dis_boot 158 | ## MST ## 159 | mst_boot <- igraph::graph.adjacency(dis_boot, weighted=T, mode='upper') %>% igraph::mst(., algorithm='prim') %>% 160 | igraph::as_adjacency_matrix() %>% as.data.frame(make.names=F) 161 | mst_boot <- mst_boot[colnames(dummy_df), ] 162 | mst_boot <- mst_boot[, colnames(dummy_df)] 163 | mst_cons <- mst_cons + mst_boot/boot_n 164 | } 165 | output <- list(boot_array=dis_boot_array, mst_cons=mst_cons) 166 | return(output) 167 | } 168 | 169 | #' Build Delaunay triangulation network 170 | #' 171 | #' @param coord_df Coordinates df 172 | #' @param return_name Return edge list with names? 173 | #' @param dist_cutoff Remove edges with distance >= dist_cutoff 174 | #' 175 | #' @return Delaunay triangulation network edge list 176 | #' @export 177 | #' 178 | #' @importFrom geometry delaunayn 179 | #' 180 | #' @examples test <- build_delaunayn(coord_df, return_name=T) 181 | build_delaunayn <- function(coord_df, return_name=T, dist_cutoff=NULL) { 182 | delaunayn_res <- geometry::delaunayn(coord_df) 183 | delaunayn_el <- rbind(delaunayn_res[, c(1, 2)], delaunayn_res[, c(1, 3)], delaunayn_res[, c(2, 3)], 184 | delaunayn_res[, c(2, 1)], delaunayn_res[, c(3, 1)], delaunayn_res[, c(3, 2)]) %>% 185 | unique %>% data.frame 186 | if (!is.null(dist_cutoff)) { 187 | dist_df <- as.matrix(dist(coord_df)) 188 | delaunayn_el_dist <- dist_df[as.matrix(delaunayn_el)] 189 | dist_idx <- which(delaunayn_el_dist < dist_cutoff) 190 | delaunayn_el <- delaunayn_el[dist_idx, ] 191 | } 192 | if (return_name) { 193 | delaunayn_el$X1 <- rownames(coord_df)[delaunayn_el$X1] 194 | delaunayn_el$X2 <- rownames(coord_df)[delaunayn_el$X2] 195 | } 196 | return(delaunayn_el) 197 | } 198 | 199 | #' Title 200 | #' 201 | #' @param el_inp Edge list of Delaunay triangulation network 202 | #' @param meta_data Meta data, must contain column of cell type 203 | #' @param col_cell Column name of cell type, cell type names must be syntactically valid 204 | #' 205 | #' @return A list of 1.Network-based cell co-occurrence; 2.Cell co-occurrence log odds ratio 206 | #' 207 | #' @import dplyr 208 | #' @import tidyr 209 | #' @import magrittr 210 | #' 211 | #' @examples test <- edge_odds(el_inp, meta_data, col_cell='cell_names') 212 | edge_odds <- function(el_inp, meta_data, col_cell='cell_names') { 213 | cell_el <- data.frame(X1=meta_data[el_inp[, 1], col_cell], X2=meta_data[el_inp[, 2], col_cell]) %>% 214 | dplyr::group_by(X1, X2) %>% dplyr::tally() %>% data.frame 215 | cell_adj_mat <- cell_el %>% 216 | tidyr::pivot_wider(id_cols=X1, names_from=X2, values_from=n, values_fill=list(n=0)) %>% 217 | data.frame(check.names = F) %>% magrittr::set_rownames(.$X1) %>% dplyr::select(-1) 218 | cell_adj_mat <- cell_adj_mat[, rownames(cell_adj_mat)] 219 | 220 | ## Calculate odds ratio ## 221 | cell_coo <- cell_adj_mat 222 | diag(cell_coo) <- 0 223 | ## 224 | cell_coo <- cell_coo + 1 225 | ## 226 | cell_row <- -sweep(cell_coo, 1, rowSums(cell_coo)) 227 | cell_col <- -sweep(cell_coo, 2, colSums(cell_coo)) 228 | cell_non <- -((cell_row + cell_col)-sum(cell_coo)+cell_coo) 229 | cell_odd <- (cell_coo*cell_non)/(cell_row*cell_col) 230 | cell_odd[cell_odd<0] <- 0 231 | cell_odd_log <- log(cell_odd) 232 | ## Test ## 233 | # cell_A='L2/3 IT' 234 | # cell_B='L4' 235 | # cell_test <- cell_el 236 | # cell_test <- cell_test[-which(cell_test$X1==cell_test$X2), ] 237 | # cell_test$X1[cell_test$X1!=cell_A] <- 'Non_A' 238 | # cell_test$X2[cell_test$X2!=cell_B] <- 'Non_B' 239 | # cell_test_count <- cell_test %>% group_by(X1, X2) %>% dplyr::summarise(n=sum(n)) %>% 240 | # tidyr::pivot_wider(id_cols=X1, names_from=X2, values_from = n) %>% data.frame %>% set_rownames(.$X1) %>% dplyr::select(-1) 241 | # fisher.test(cell_test_count) 242 | ### 243 | return(list(cell_edgeCnt=cell_adj_mat, cell_logOR=cell_odd_log)) 244 | } 245 | 246 | #' Title 247 | #' 248 | #' @param meta_df Meta data, must contain cell type column 249 | #' @param coord_df Coordinates data, must be the same order of meta_df 250 | #' @param col_cell Column name of cell type, cell type names must be syntactically valid 251 | #' @param boot_n Number of bootstrapping iterations 252 | #' @param prop Subsample proportion 253 | #' @param dist_cutoff Remove edges with distance >= dist_cutoff 254 | #' @param replace should sampling be with replacement? 255 | #' 256 | #' @return A list of 1. Bootstrap logOR distance; 2. MST consensus matrix 257 | #' 258 | #' @import magrittr 259 | #' @import tibble 260 | #' @import tidyr 261 | #' @import igraph 262 | #' 263 | #' @examples test <- DT_boot_mst(meta_df, coord_df, col_cell='cell_names', boot_n=100, prop=.8) 264 | DT_boot_mst <- function(meta_df, coord_df, col_cell='cell_names', boot_n=100, prop=.8, replace=T, dist_cutoff=NULL) { 265 | cell_names <- unique(meta_df[, col_cell]) %>% sort 266 | n_smp <- nrow(meta_df) 267 | n_typ <- length(cell_names) 268 | dis_boot_array <- array(NA, dim=c(n_typ, n_typ, boot_n), dimnames=list(cell_names, cell_names, paste0('X_', c(1:boot_n)))) 269 | mst_cons <- matrix(0, n_typ, n_typ) %>% data.frame %>% magrittr::set_rownames(cell_names) %>% magrittr::set_colnames(cell_names) 270 | for (i in 1:boot_n) { 271 | set.seed(i) 272 | cat(i, ' ') 273 | idx <- sort(sample(1:n_smp, round(n_smp*prop), replace=replace)) 274 | data_boot <- meta_df[idx, ] 275 | coord_boot <- coord_df[idx, ] 276 | DT_el <- build_delaunayn(coord_boot, return_name = F, dist_cutoff=dist_cutoff) 277 | DT_OR <- edge_odds(DT_el, meta_data=data_boot, col_cell=col_cell) 278 | DT_OR_melt <- DT_OR$cell_logOR %>% tibble::rownames_to_column(var='Cell_A') %>% 279 | tidyr::pivot_longer(cols=-Cell_A, names_to='Cell_B', values_to='logOR') %>% data.frame 280 | dis_boot_temp <- dis_boot_array[, , i] 281 | ## Convert logOR to Distance ## 282 | dis_boot_temp[as.matrix(DT_OR_melt[, c(1, 2)])] <- max(DT_OR_melt$logOR) - DT_OR_melt$logOR + 1 283 | dis_boot_temp[is.na(dis_boot_temp)] <- max(dis_boot_temp) 284 | dis_boot_array[, , i] <- dis_boot_temp 285 | ## MST ## 286 | mst_boot <- igraph::graph.adjacency(dis_boot_temp, weighted=T, mode='upper') %>% igraph::mst(., algorithm='prim') %>% 287 | igraph::as_adjacency_matrix() %>% as.data.frame 288 | mst_boot <- mst_boot[cell_names, ] 289 | mst_boot <- mst_boot[, cell_names] 290 | mst_cons <- mst_cons + mst_boot/boot_n 291 | } 292 | output <- list(boot_array=dis_boot_array, mst_cons=mst_cons) 293 | return(output) 294 | } 295 | 296 | #' Title 297 | #' 298 | #' @param meta_df Meta data, must contain cell type column 299 | #' @param coord_df Coordinates data, must be the same order of meta_df 300 | #' @param col_cell Column name of cell type, cell type names must be syntactically valid 301 | #' @param boot_n Number of bootstrapping iterations 302 | #' @param k Number of NNs 303 | #' @param prop Subsample proportion 304 | #' @param replace should sampling be with replacement? 305 | #' 306 | #' @return A list of 1. Bootstrap logOR distance; 2. MST consensus matrix 307 | #' 308 | #' @import magrittr 309 | #' @import dplyr 310 | #' @import igraph 311 | #' 312 | #' @examples test <- KD_boot_mst(meta_df, coord_df, col_cell='cell_names', boot_n=100, prop=.8, eps=1e-5) 313 | KD_boot_mst <- function(meta_df, coord_df, col_cell='cell_names', boot_n=100, prop=.8, replace=T, k=10) { 314 | cell_names <- unique(meta_df[, col_cell]) %>% sort 315 | n_smp <- nrow(meta_df) 316 | n_typ <- length(cell_names) 317 | dis_boot_array <- array(NA, dim=c(n_typ, n_typ, boot_n), dimnames=list(cell_names, cell_names, paste0('X_', c(1:boot_n)))) 318 | mst_cons <- matrix(0, n_typ, n_typ) %>% data.frame %>% magrittr::set_rownames(cell_names) %>% magrittr::set_colnames(cell_names) 319 | for (i in 1:boot_n) { 320 | set.seed(i) 321 | cat(i, ' ') 322 | idx <- sort(sample(1:n_smp, round(n_smp*prop), replace=replace)) 323 | data_boot <- meta_df[idx, ] 324 | coord_boot <- coord_df[idx, ] 325 | inp_df <- data.frame(data_boot, coord_boot) 326 | kdist_i <- kdist(inp_df=inp_df, ref=unique(inp_df[, col_cell]), ref_type='each', que=unique(inp_df[, col_cell]), k=k, keep_nn=F) 327 | kdist_mat <- data.frame(kdist_i$kdist_df, cell_names=inp_df[, col_cell]) %>% dplyr::group_by(cell_names) %>% dplyr::summarise_all(mean) %>% 328 | data.frame %>% magrittr::set_rownames(.$cell_names) %>% dplyr::select(-1) 329 | colnames(kdist_mat) <- gsub('_kdist', '', colnames(kdist_mat)) 330 | kdist_mat <- kdist_mat[cell_names, cell_names] 331 | dis_boot_temp <- log(as.matrix(kdist_mat)) 332 | dis_boot_array[, , i] <- dis_boot_temp 333 | 334 | ## MST ## 335 | mst_boot <- igraph::graph.adjacency(dis_boot_temp, weighted=T, mode='directed') %>% igraph::mst(., algorithm='prim') %>% 336 | igraph::as_adjacency_matrix() %>% as.data.frame 337 | mst_boot <- mst_boot[cell_names, ] 338 | mst_boot <- mst_boot[, cell_names] 339 | mst_cons <- mst_cons + mst_boot/boot_n 340 | } 341 | output <- list(boot_array=dis_boot_array, mst_cons=mst_cons) 342 | return(output) 343 | } 344 | 345 | #' Calculated K-distance between query cells and reference cells based on their spatial coordinates 346 | #' 347 | #' @param inp_df inp_df must contain cell_names, 'coord_x', 'coord_y' columns 348 | #' @param ref Reference groups 349 | #' @param ref_type 'all' or 'each' 350 | #' @param que Query groups 351 | #' @param k The number of nearest neighbors 352 | #' @param new_name New name for kdist 353 | #' @param keep_nn Keep Nearest Neighboor id matrix? 354 | #' 355 | #' @return A list of 1. kdist data frame and 2. a list of knn id matrix 356 | #' @export 357 | #' 358 | #' @import dbscan 359 | #' @import magrittr 360 | #' 361 | #' @examples kdist_out <- kdist(inp_df=test_df, ref=c('A', 'B', 'C'), ref_type='each', que=unique(test_df$cell_names), k=10, keep_nn=T) 362 | kdist <- function(inp_df, ref=NULL, ref_type='all', que=NULL, k=10, new_name='kdist', keep_nn=F) { 363 | ## Check ## 364 | if (!all(c('cell_names', 'coord_x', 'coord_y') %in% colnames(inp_df))) {stop("Input data must contain 'cell_names', 'coord_x', 'coord_y' columns")} 365 | if (any(!c(ref %in% inp_df$cell_names, que %in% inp_df$cell_names))) {stop('Reference or query group not in cell_names')} 366 | 367 | que_dat <- inp_df[inp_df$cell_names %in% que, c('coord_x', 'coord_y')] 368 | kNN_dist_df <- data.frame(matrix(NA, nrow=nrow(que_dat), ncol=0)) 369 | kNN_nn_list <- list() 370 | if (ref_type=='each' & length(ref)>0) { 371 | for (i in 1:length(ref)) { 372 | ref_i <- ref[i] 373 | ref_dat <- inp_df[inp_df$cell_names %in% ref_i, c('coord_x', 'coord_y')] 374 | ## error when k >= nrow(ref_dat), reset k ## 375 | if (nrow(ref_dat) <= k) {k <- (nrow(ref_dat)-1)} 376 | 377 | kNN_res <- dbscan::kNN(x=ref_dat, k=k, query=que_dat) 378 | kNN_dist <- apply(kNN_res$dist, 1, mean) 379 | kNN_dist_df <- base::cbind(kNN_dist_df, kNN_dist) 380 | if (keep_nn) { 381 | nn_mat <- kNN_res$id 382 | nn_mat[] <- rownames(ref_dat)[c(nn_mat)] 383 | kNN_nn_list[[i]] <- nn_mat 384 | } else { 385 | kNN_nn_list[[i]] <- matrix(NA, 0, 0) 386 | } 387 | } 388 | colnames(kNN_dist_df) <- paste0(ref, '_kdist') 389 | names(kNN_nn_list) <- paste0(ref, '_ref') 390 | } else { 391 | ref_dat <- inp_df[inp_df$cell_names %in% ref, c('coord_x', 'coord_y')] 392 | kNN_res <- dbscan::kNN(x=ref_dat, k=k, query=que_dat) 393 | kNN_dist_df <- data.frame(apply(kNN_res$dist, 1, mean)) %>% magrittr::set_colnames(new_name) 394 | if (keep_nn) { 395 | nn_mat <- kNN_res$id 396 | nn_mat[] <- rownames(ref_dat)[c(nn_mat)] 397 | kNN_nn_list$nn_ref <- nn_mat 398 | } else { 399 | kNN_nn_list[[1]] <- matrix(NA, 0, 0) 400 | } 401 | } 402 | output <- list(kdist_df=kNN_dist_df, knn_list=kNN_nn_list) 403 | return(output) 404 | } 405 | 406 | #' Run K-distance with CellTrek(Seurat) object and add a metadata column 407 | #' 408 | #' @param celltrek_inp SChart seurat input 409 | #' @param grp_col Column name in meta data for reference and query groups 410 | #' @param ref Reference groups 411 | #' @param ref_type 'all' or 'each' 412 | #' @param que Query groups 413 | #' @param k The number of nearest neighbors 414 | #' @param new_name New name for kdist 415 | #' @param keep_nn Keep Nearest Neighboor id matrix? 416 | #' 417 | #' @return SChart seurat output 418 | #' @export 419 | #' 420 | #' @import dplyr 421 | #' @import magrittr 422 | #' @import Seurat 423 | #' 424 | #' @examples celltrek_test <- run_kdist(celltrek_inp=celltrek_inp, grp_col='cell_type', ref=c('A', 'B'), ref_type='each', que=unique(test_df$cell_names), k=10) 425 | run_kdist <- function(celltrek_inp, grp_col='cell_type', ref=NULL, ref_type='all', que=NULL, k=10, new_name='kdist', keep_nn=F) { 426 | ## Check ## 427 | if (any(!c(grp_col, 'coord_x', 'coord_y') %in% colnames(celltrek_inp@meta.data))) {stop("celltrek_inp metadata must contain grp_col, 'coord_x', 'coord_y' columns")} 428 | 429 | inp_df <- celltrek_inp@meta.data %>% dplyr::select(cell_names=dplyr::one_of(grp_col), coord_x, coord_y) 430 | output <- kdist(inp_df=inp_df, ref=ref, ref_type=ref_type, que = que, k=k, new_name=new_name, keep_nn=keep_nn) 431 | celltrek_out <- Seurat::AddMetaData(celltrek_inp, metadata=output$kdist_df) 432 | return (celltrek_out) 433 | } 434 | 435 | #' SColoc module 436 | #' 437 | #' @param celltrek_inp CellTrek input 438 | #' @param col_cell Column name of cell type, cell type names must be syntactically valid 439 | #' @param h Bandwidths for x and y directions, for KL, more details in kde2d function in MASS package 440 | #' @param n Number of grid points in each directions, for KL, more details in kde2d function in MASS package 441 | #' @param ... See in boot_mst 442 | #' @param use_method Use density-based Kullback Leibler divergence or Delaunay Triangulation network 443 | #' 444 | #' @return A list of 1.Bootstrap distance; 2.MST consensus matrix 445 | #' @export 446 | #' 447 | #' 448 | #' @examples cell_scoloc <- scoloc(celltrek_inp, col_cell='cell_names', use_method='KL', h=140, n=25) 449 | scoloc <- function(celltrek_inp, col_cell='cell_names', use_method=c('KL', 'DT', 'KD')[2], h=celltrek_inp@images[[1]]@scale.factors$spot_dis, n=25, boot_n=20, ...) { 450 | celltrek_temp <- celltrek_inp 451 | celltrek_temp$cell_names <- make.names(as.character(celltrek_temp@meta.data[, col_cell])) 452 | Idents(celltrek_temp) <- celltrek_temp$cell_names 453 | if (use_method=='KL') { 454 | cell_dummy_df <- as_dummy_df(celltrek_temp@meta.data %>% dplyr::select(id_new, cell_names, coord_x, coord_y), col_cell='cell_names') 455 | cell_mst_con <- KL_boot_mst(dummy_df=cell_dummy_df[, c(5:ncol(cell_dummy_df))], coord_df=cell_dummy_df[, c(3, 4)], h=h, n=n, boot_n=boot_n, ...) 456 | } 457 | if (use_method=='DT') { 458 | cell_mst_con <- DT_boot_mst(meta_df = celltrek_temp@meta.data, coord_df = celltrek_temp@meta.data[, c('coord_x', 'coord_y')], boot_n=boot_n, ...) 459 | } 460 | if (use_method=='KD') { 461 | cell_mst_con <- KD_boot_mst(meta_df = celltrek_temp@meta.data, coord_df = celltrek_temp@meta.data[, c('coord_x', 'coord_y')], boot_n=boot_n, ...) 462 | } 463 | return(cell_mst_con) 464 | } 465 | -------------------------------------------------------------------------------- /R/celltrek.R: -------------------------------------------------------------------------------- 1 | #' Co-embedding ST and SC data using Seurat transfer anchors 2 | #' 3 | #' @param st_data Seurat ST data object 4 | #' @param sc_data Seurat SC data object 5 | #' @param st_assay ST assay 6 | #' @param sc_assay SC assay 7 | #' @param nfeatures number of features for integration 8 | #' @param cell_names cell cluster/type column name in SC meta data 9 | #' @param coord_xy coordinates column names in ST images slot 10 | #' @param gene_kept selected genes to be kept during integration 11 | #' @param norm normalization method: LogNormalize/SCTransform 12 | #' 13 | #' 14 | #' @return Seurat object of SC-ST co-embedding 15 | #' @export 16 | #' 17 | #' @import Seurat 18 | #' @import dplyr 19 | #' 20 | #' @examples 21 | #' st_sc_traint <- traint(st_data=brain_st, sc_data=brain_sc, st_assay='Spatial', sc_assay='scint', nfeatures=2000, cell_names='cell_names', coord_xy=c('imagerow', 'imagecol'), gene_kept=NULL) 22 | traint <- function (st_data, sc_data, st_assay='Spatial', sc_assay='scint', norm='LogNormalize', nfeatures=2000, 23 | cell_names='cell_names', coord_xy=c('imagerow', 'imagecol'), gene_kept=NULL, ...) { 24 | 25 | st_data$id <- names(st_data$orig.ident) 26 | sc_data$id <- names(sc_data$orig.ident) 27 | sc_data$cell_names <- make.names(sc_data@meta.data[, cell_names]) 28 | st_data$type <- 'st' 29 | sc_data$type <- 'sc' 30 | st_data$coord_x <- st_data@images[[1]]@coordinates[, coord_xy[1]] 31 | st_data$coord_y <- st_data@images[[1]]@coordinates[, coord_xy[2]] 32 | DefaultAssay(st_data) <- st_assay 33 | DefaultAssay(sc_data) <- sc_assay 34 | 35 | cat('Finding transfer anchors... \n') 36 | st_idx <- st_data$id 37 | sc_idx <- sc_data$id 38 | 39 | ## Integration features ## 40 | sc_st_list <- list(st_data=st_data, sc_data=sc_data) 41 | sc_st_features <- Seurat::SelectIntegrationFeatures(sc_st_list, nfeatures=nfeatures) 42 | if (!is.null(gene_kept)) { 43 | sc_st_features <- union(sc_st_features, gene_kept) 44 | } 45 | 46 | sc_st_features <- sc_st_features[(sc_st_features %in% rownames(st_data[[st_assay]]@data)) & 47 | (sc_st_features %in% rownames(sc_data[[sc_assay]]@data))] 48 | cat('Using', length(sc_st_features), 'features for integration... \n') 49 | ### 50 | 51 | sc_st_anchors <- Seurat::FindTransferAnchors(reference = sc_data, query = st_data, 52 | reference.assay = sc_assay, query.assay = st_assay, 53 | normalization.method = norm, features = sc_st_features, reduction = 'cca', ...) 54 | 55 | cat('Data transfering... \n') 56 | st_data_trans <- Seurat::TransferData(anchorset = sc_st_anchors, 57 | refdata = GetAssayData(sc_data, assay = sc_assay, slot='data')[sc_st_features, ], weight.reduction = 'cca') 58 | st_data@assays$transfer <- st_data_trans 59 | 60 | cat('Creating new Seurat object... \n') 61 | sc_st_meta <- dplyr::bind_rows(st_data@meta.data, sc_data@meta.data) 62 | counts_temp <- cbind(data.frame(st_data[['transfer']]@data), data.frame(sc_data[[sc_assay]]@data[sc_st_features, ] %>% data.frame)) 63 | rownames(sc_st_meta) <- make.names(sc_st_meta$id) 64 | colnames(counts_temp) <- make.names(sc_st_meta$id) 65 | sc_st_int <- CreateSeuratObject(counts = counts_temp, assay = 'traint', meta.data = sc_st_meta) 66 | sc_st_int[['traint']]@data <- sc_st_int[['traint']]@counts 67 | sc_st_int[['traint']]@counts <- matrix(NA, nrow = 0, ncol = 0) 68 | 69 | cat('Scaling -> PCA -> UMAP... \n') 70 | sc_st_int <- ScaleData(sc_st_int, features = sc_st_features) %>% 71 | RunPCA(features = sc_st_features) 72 | sc_st_int <- RunUMAP(sc_st_int, dims = 1:30) 73 | sc_st_int@images <- st_data@images 74 | sc_st_int@images[[1]]@coordinates <- data.frame(imagerow=sc_st_int@meta.data$coord_x, 75 | imagecol=sc_st_int@meta.data$coord_y) %>% 76 | set_rownames(rownames(sc_st_int@meta.data)) 77 | 78 | return (sc_st_int) 79 | } 80 | 81 | #' Mannually repelling celltrek cells 82 | #' 83 | #' @param celltrek_inp CellTrek(Seurat) object 84 | #' @param repel_r Repelling radius 85 | #' @param repel_iter Repelling iterations 86 | #' 87 | #' @return CellTrek(Seurat) object 88 | #' @export 89 | #' 90 | #' @import Seurat 91 | #' @importFrom packcircles circleRepelLayout 92 | #' 93 | #' @examples 94 | celltrek_repel <- function(celltrek_inp, repel_r=5, repel_iter=10) { 95 | celltrek_dr_raw <- Embeddings(celltrek_inp, 'celltrek_raw') 96 | celltrek_dr <- Embeddings(celltrek_inp, 'celltrek') 97 | repel_input <- data.frame(celltrek_dr_raw, repel_r=repel_r) 98 | 99 | ## Add noise ## 100 | theta <- runif(nrow(celltrek_dr), 0, 2*pi) 101 | alpha <- sqrt(runif(nrow(celltrek_dr), 0, 1)) 102 | repel_input[, 1] <- repel_input[, 1] + sin(theta)*alpha*repel_r 103 | repel_input[, 2] <- repel_input[, 2] + cos(theta)*alpha*repel_r 104 | 105 | ## Repelling ## 106 | cat('Repelling points...\n') 107 | celltrek_repel <- circleRepelLayout(repel_input, sizetype='radius', maxiter=repel_iter) 108 | celltrek_dr[, 1] <- celltrek_repel$layout$x 109 | celltrek_dr[, 2] <- celltrek_repel$layout$y 110 | celltrek_out <- celltrek_inp 111 | celltrek_out@reductions$celltrek@cell.embeddings <- celltrek_dr 112 | return(celltrek_out) 113 | } 114 | 115 | #' Calculate the RF-distance between sc and st 116 | #' 117 | #' @param st_sc_int Seurat traint object 118 | #' @param int_assay Name of integration assay 119 | #' @param reduction Dimension reduction method used, usually pca 120 | #' @param intp If TRUE, do interpolation 121 | #' @param intp_pnt Interpolation point number 122 | #' @param intp_lin If TRUE, use linear interpolation 123 | #' @param nPCs Number of PCs used for CellTrek 124 | #' @param ntree Number of trees in random forest 125 | #' @param keep_model If TRUE, return the trained random forest model 126 | #' 127 | #' @return A list of 1. celltrek_distance matrix; 2. trained random forest model (optional) 128 | #' 129 | #' @import dbscan 130 | #' @importFrom akima interpp 131 | #' @import magrittr 132 | #' @import dplyr 133 | #' @import randomForestSRC 134 | #' 135 | #' @examples dist_test <- celltrek_dist(st_sc_int=st_sc_int, int_assay='traint', reduction='pca', intp = T, intp_pnt=10000, intp_lin=F, nPCs=30, ntree=1000, keep_model=T) 136 | celltrek_dist <- function (st_sc_int, int_assay='traint', reduction='pca', intp = T, intp_pnt=10000, intp_lin=F, nPCs=30, ntree=1000, keep_model=T) { 137 | DefaultAssay(st_sc_int) <- int_assay 138 | kNN_dist <- dbscan::kNN(na.omit(st_sc_int@meta.data[, c('coord_x', 'coord_y')]), k=6)$dist 139 | spot_dis <- median(kNN_dist) %>% round 140 | cat('Distance between spots is:', spot_dis, '\n') 141 | 142 | st_sc_int$id <- names(st_sc_int$orig.ident) 143 | st_idx <- st_sc_int$id[st_sc_int$type=='st'] 144 | sc_idx <- st_sc_int$id[st_sc_int$type=='sc'] 145 | meta_df <- data.frame(st_sc_int@meta.data) 146 | 147 | st_sc_int_pca <- st_sc_int@reductions[[reduction]]@cell.embeddings[, 1:nPCs] %>% data.frame %>% 148 | mutate(id=st_sc_int$id, type=st_sc_int$type, class=st_sc_int$cell_names, 149 | coord_x=st_sc_int$coord_x, coord_y=st_sc_int$coord_y) 150 | st_pca <- st_sc_int_pca %>% dplyr::filter(type=='st') %>% dplyr::select(-c(id:class)) 151 | 152 | ## Interpolation ## 153 | ## Uniform sampling ## 154 | if (intp) { 155 | cat ('Interpolating...\n') 156 | spot_ratio <- intp_pnt/nrow(st_pca) 157 | st_intp_df <- apply(st_pca[, c('coord_x', 'coord_y')], 1, function(row_x) { 158 | runif_test <- runif(1) 159 | if (runif_test < spot_ratio%%1) { 160 | theta <- runif(ceiling(spot_ratio), 0, 2*pi) 161 | alpha <- sqrt(runif(ceiling(spot_ratio), 0, 1)) 162 | coord_x <- row_x[1] + (spot_dis/2)*sin(theta)*alpha 163 | coord_y <- row_x[2] + (spot_dis/2)*cos(theta)*alpha 164 | } else { 165 | theta <- runif(floor(spot_ratio), 0, 2*pi) 166 | alpha <- sqrt(runif(floor(spot_ratio), 0, 1)) 167 | coord_x <- row_x[1] + (spot_dis/2)*sin(theta)*alpha 168 | coord_y <- row_x[2] + (spot_dis/2)*cos(theta)*alpha 169 | } 170 | data.frame(coord_x, coord_y) 171 | }) %>% Reduce(rbind, .) 172 | 173 | st_intp_df <- apply(st_pca[, 1:nPCs], 2, function(col_x) { 174 | akima::interpp(x=st_pca$coord_x, y=st_pca$coord_y, z=col_x, 175 | linear=intp_lin, xo=st_intp_df$coord_x, yo=st_intp_df$coord_y) %>% 176 | magrittr::extract2('z') 177 | }) %>% data.frame(., id='X', type='st_intp', st_intp_df) %>% na.omit 178 | st_intp_df$id <- make.names(st_intp_df$id, unique = T) 179 | st_sc_int_pca <- bind_rows(st_sc_int_pca, st_intp_df) 180 | } 181 | 182 | cat('Random Forest training... \n') 183 | ## Training on ST ## 184 | data_train <- st_sc_int_pca %>% dplyr::filter(type=='st') %>% dplyr::select(-c(id:class)) 185 | rf_train <- randomForestSRC::rfsrc(Multivar(coord_x, coord_y) ~ ., data_train, block.size=5, ntree=ntree) 186 | 187 | cat('Random Forest prediction... \n') 188 | ## Testing on all ## 189 | data_test <- st_sc_int_pca 190 | rf_pred <- randomForestSRC::predict.rfsrc(rf_train, newdata=data_test[, c(1:nPCs)], distance='all') 191 | 192 | cat('Making distance matrix... \n') 193 | rf_pred_dist <- rf_pred$distance[data_test$type=='sc', data_test$type!='sc'] %>% 194 | set_rownames(data_test$id[data_test$type=='sc']) %>% set_colnames(data_test$id[data_test$type!='sc']) 195 | 196 | output <- list() 197 | output$spot_d <- spot_dis 198 | output$celltrek_dist <- rf_pred_dist 199 | output$coord_df <- st_sc_int_pca[, c('id', 'type', 'coord_x', 'coord_y')] %>% 200 | dplyr::filter(type!='sc') %>% magrittr::set_rownames(.$id) %>% dplyr::select(-id) 201 | if (keep_model) { 202 | output$model <- rf_train 203 | } 204 | return (output) 205 | } 206 | 207 | #' 208 | #' 209 | #' @param dist_mat Distance matrix of sc-st (sc in rows and st in columns) 210 | #' @param coord_df Coordinates data frame of st (must contain coord_x, coord_y columns, barcode rownames) 211 | #' @param dist_cut Distance cutoff 212 | #' @param top_spot Maximum number of spots that one cell can be charted 213 | #' @param spot_n Maximum number of cells that one spot can contain 214 | #' @param repel_r Repelling radius 215 | #' @param repel_iter Repelling iterations 216 | #' 217 | #' @return SC coordinates 218 | #' 219 | #' @import data.table 220 | #' @import scales 221 | #' @import dplyr 222 | #' @importFrom packcircles circleRepelLayout 223 | #' 224 | #' @examples 225 | celltrek_chart <- function (dist_mat, coord_df, dist_cut=500, top_spot=10, spot_n=10, repel_r=5, repel_iter=10) { 226 | cat('Making graph... \n') 227 | dist_mat[dist_mat>dist_cut] <- NA 228 | dist_mat_dt <- data.table::data.table(Var1=rownames(dist_mat), dist_mat) 229 | dist_edge_list <- data.table::melt(dist_mat_dt, id=1, na.rm=T) 230 | colnames(dist_edge_list) <- c('Var1', 'Var2', 'value') 231 | dist_edge_list$val_rsc <- scales::rescale(dist_edge_list$value, to=c(0, repel_r)) 232 | dist_edge_list$Var1 %<>% as.character 233 | dist_edge_list$Var2 %<>% as.character 234 | dist_edge_list$Var1_type <- 'sc' 235 | dist_edge_list$Var2_type <- 'non-sc' 236 | 237 | cat('Pruning graph...\n') 238 | dist_edge_list_sub <- dplyr::inner_join(dist_edge_list %>% group_by(Var1) %>% top_n(n=top_spot, wt=-value), 239 | dist_edge_list %>% group_by(Var2) %>% top_n(n=spot_n, wt=-value)) %>% data.frame 240 | 241 | cat('Spatial Charting SC data...\n') 242 | sc_coord <- sc_coord_raw <- data.frame(id_raw=dist_edge_list_sub$Var1, id_new=make.names(dist_edge_list_sub$Var1, unique = T)) 243 | sc_coord$coord_x <- sc_coord_raw$coord_x <- coord_df$coord_x[match(dist_edge_list_sub$Var2, rownames(coord_df))] 244 | sc_coord$coord_y <- sc_coord_raw$coord_y <- coord_df$coord_y[match(dist_edge_list_sub$Var2, rownames(coord_df))] 245 | ## Add noise ## 246 | theta <- runif(nrow(dist_edge_list_sub), 0, 2*pi) 247 | alpha <- sqrt(runif(nrow(dist_edge_list_sub), 0, 1)) 248 | sc_coord$coord_x <- sc_coord$coord_x + dist_edge_list_sub$val_rsc*sin(theta)*alpha 249 | sc_coord$coord_y <- sc_coord$coord_y + dist_edge_list_sub$val_rsc*cos(theta)*alpha 250 | ## Point repelling ## 251 | cat('Repelling points...\n') 252 | sc_repel_input <- data.frame(sc_coord[, c('coord_x', 'coord_y')], repel_r=repel_r) 253 | sc_repel <- packcircles::circleRepelLayout(sc_repel_input, sizetype='radius', maxiter=repel_iter) 254 | sc_coord$coord_x <- sc_repel$layout$x 255 | sc_coord$coord_y <- sc_repel$layout$y 256 | return(list(sc_coord_raw, sc_coord)) 257 | } 258 | 259 | #' CellTrek from a pre-computed SC-ST distance matrix 260 | #' 261 | #' @param dist_mat Distance matrix of sc-st (sc in rows and st in columns) 262 | #' @param coord_df Coordinates data frame of st (must contain two columns as coord_x, coord_y and rownames as barcodes) 263 | #' @param dist_cut Distance cutoff 264 | #' @param top_spot Maximum number of spots that one cell can be charted 265 | #' @param spot_n Maximum number of cells that one spot can contain 266 | #' @param sc_data SC data 267 | #' @param sc_assay SC assay 268 | #' @param repel_r Repelling radius 269 | #' @param repel_iter Repelling iterations 270 | #' @param st_data ST data, optional 271 | #' 272 | #' @return A list of 1.Seurat object 273 | #' @export 274 | #' 275 | #' @import dbscan 276 | #' @import Seurat 277 | #' @import dplyr 278 | #' 279 | #' @examples celltrek_res <- celltrek_from_dist(dist_mat, coord_df, dist_cut, top_spot=10, spot_n=10, r=NULL, sc_data, sc_assay='RNA') 280 | celltrek_from_dist <- function (dist_mat, coord_df, dist_cut, top_spot=10, spot_n=10, repel_r=5, repel_iter=10, sc_data, sc_assay='RNA', st_data=NULL) { 281 | colnames(coord_df) <- c('coord_x', 'coord_y') 282 | spot_dis <- median(unlist(dbscan::kNN(coord_df[, c('coord_x', 'coord_y')], k=4)$dist)) 283 | if (is.null(repel_r)) {repel_r=spot_dis/4} 284 | sc_coord_list <- celltrek_chart(dist_mat=dist_mat, coord_df=coord_df, dist_cut=dist_cut, top_spot=top_spot, spot_n=spot_n, repel_r=repel_r, repel_iter=repel_iter) 285 | sc_coord_raw <- sc_coord_list[[1]] 286 | sc_coord <- sc_coord_list[[2]] 287 | sc_out <- CreateSeuratObject(counts=sc_data[[sc_assay]]@data[, sc_coord$id_raw] %>% set_colnames(sc_coord$id_new), 288 | project='celltrek', assay=sc_assay, 289 | meta.data=sc_data@meta.data[sc_coord$id_raw, ] %>% 290 | dplyr::rename(id_raw=id) %>% 291 | mutate(id_new=sc_coord$id_new) %>% 292 | set_rownames(sc_coord$id_new)) 293 | sc_out@meta.data <- dplyr::left_join(sc_out@meta.data, sc_coord) %>% data.frame %>% set_rownames(sc_out$id_new) 294 | sc_out[[sc_assay]]@data <- sc_out[[sc_assay]]@counts 295 | sc_out[[sc_assay]]@counts <- matrix(nrow = 0, ncol = 0) 296 | sc_coord_raw_df <- CreateDimReducObject(embeddings=sc_coord_raw %>% 297 | dplyr::mutate(coord1=coord_y, coord2=max(coord_x)+min(coord_x)-coord_x) %>% 298 | dplyr::select(c(coord1, coord2)) %>% set_rownames(sc_coord_raw$id_new) %>% as.matrix, 299 | assay=sc_assay, key='celltrek_raw') 300 | sc_coord_dr <- CreateDimReducObject(embeddings=sc_coord %>% 301 | dplyr::mutate(coord1=coord_y, coord2=max(coord_x)+min(coord_x)-coord_x) %>% 302 | dplyr::select(c(coord1, coord2)) %>% set_rownames(sc_coord$id_new) %>% as.matrix, 303 | assay=sc_assay, key='celltrek') 304 | sc_pca_dr <- CreateDimReducObject(embeddings=sc_data@reductions$pca@cell.embeddings[sc_coord$id_raw, ] %>% 305 | set_rownames(sc_coord$id_new) %>% as.matrix, assay=sc_assay, key='pca') 306 | sc_umap_dr <- CreateDimReducObject(embeddings=sc_data@reductions$umap@cell.embeddings[sc_coord$id_raw, ] %>% 307 | set_rownames(sc_coord$id_new) %>% as.matrix, assay=sc_assay, key='umap') 308 | sc_out@reductions$celltrek <- sc_coord_dr 309 | sc_out@reductions$celltrek_raw <- sc_coord_raw_df 310 | sc_out@reductions$pca <- sc_pca_dr 311 | sc_out@reductions$umap <- sc_umap_dr 312 | if (!is.null(st_data)) { 313 | sc_out@images <- st_data@images 314 | sc_out@images[[1]]@assay <- DefaultAssay(sc_out) 315 | sc_out@images[[1]]@coordinates <- data.frame(imagerow=sc_coord$coord_x, imagecol=sc_coord$coord_y) %>% set_rownames(sc_coord$id_new) 316 | sc_out@images[[1]]@scale.factors$spot_dis <- spot_dis 317 | } 318 | output <- list(celltrek=sc_out) 319 | return(output) 320 | } 321 | 322 | #' The core function of CellTrek 323 | #' 324 | #' @param st_sc_int Seurat traint object 325 | #' @param int_assay Integration assay ('traint') 326 | #' @param sc_data SC data, optional 327 | #' @param sc_assay SC assay 328 | #' @param reduction Dimension reduction method, usually 'pca' 329 | #' @param intp If True, do interpolation 330 | #' @param intp_pnt Number of interpolation points 331 | #' @param intp_lin If Ture, do linear interpolation 332 | #' @param nPCs Number of PCs 333 | #' @param ntree Number of Trees 334 | #' @param dist_thresh Distance threshold 335 | #' @param top_spot Maximum number of spots that one cell can be charted 336 | #' @param spot_n Maximum number of cells that one spot can contain 337 | #' @param keep_model If TRUE, return the trained random forest model 338 | #' @param repel_r Repelling radius 339 | #' @param repel_iter Repelling iterations 340 | #' @param ... 341 | #' 342 | #' @return Seurat object 343 | #' @export 344 | #' 345 | #' @import dbscan 346 | #' @import Seurat 347 | #' @import dplyr 348 | #' @import magrittr 349 | #' 350 | #' @examples celltrek_res <- celltrek(st_sc_int, int_assay='traint', sc_data=NULL, sc_assay='RNA', reduction='pca', intp=T, intp_pnt=10000, intp_lin=F, nPCs=30, ntree=1000, dist_thresh=.4, top_spot=10, spot_n=10, r=NULL, keep_model=F, ...) 351 | celltrek <- function (st_sc_int, int_assay='traint', sc_data=NULL, sc_assay='RNA', reduction='pca', 352 | intp=T, intp_pnt=10000, intp_lin=F, nPCs=30, 353 | ntree=1000, dist_thresh=.4, top_spot=10, spot_n=10, repel_r=5, repel_iter=10, keep_model=F, ...) { 354 | dist_res <- celltrek_dist(st_sc_int=st_sc_int, int_assay=int_assay, reduction=reduction, intp=intp, intp_pnt=intp_pnt, intp_lin=intp_lin, nPCs=nPCs, ntree=ntree, keep_model=T) 355 | spot_dis_intp <- median(unlist(dbscan::kNN(dist_res$coord_df[, c('coord_x', 'coord_y')], k=4)$dist)) 356 | if (is.null(repel_r)) {repel_r=spot_dis_intp/4} 357 | sc_coord_list <- celltrek_chart(dist_mat=dist_res$celltrek_dist, coord_df=dist_res$coord_df, dist_cut=ntree*dist_thresh, top_spot=top_spot, spot_n=spot_n, repel_r=repel_r, repel_iter=repel_iter) 358 | sc_coord_raw <- sc_coord_list[[1]] 359 | sc_coord <- sc_coord_list[[2]] 360 | cat('Creating Seurat Object... \n') 361 | if (!is.null(sc_data)) { 362 | cat('sc data...') 363 | sc_data$id <- Seurat::Cells(sc_data) 364 | sc_out <- CreateSeuratObject(counts=sc_data[[sc_assay]]@data[, sc_coord$id_raw] %>% set_colnames(sc_coord$id_new), 365 | project='celltrek', assay=sc_assay, 366 | meta.data=sc_data@meta.data[sc_coord$id_raw, ] %>% 367 | dplyr::rename(id_raw=id) %>% 368 | mutate(id_new=sc_coord$id_new) %>% 369 | set_rownames(sc_coord$id_new)) 370 | sc_out@meta.data <- dplyr::left_join(sc_out@meta.data, sc_coord) %>% data.frame %>% set_rownames(sc_out$id_new) 371 | 372 | sc_out[[sc_assay]]@data <- sc_out[[sc_assay]]@counts 373 | sc_out[[sc_assay]]@counts <- matrix(nrow = 0, ncol = 0) 374 | sc_coord_raw_df <- CreateDimReducObject(embeddings=sc_coord_raw %>% 375 | dplyr::mutate(coord1=coord_y, coord2=max(coord_x)+min(coord_x)-coord_x) %>% 376 | dplyr::select(c(coord1, coord2)) %>% set_rownames(sc_coord_raw$id_new) %>% as.matrix, 377 | assay=sc_assay, key='celltrek_raw') 378 | sc_coord_dr <- CreateDimReducObject(embeddings=sc_coord %>% 379 | dplyr::mutate(coord1=coord_y, coord2=max(coord_x)+min(coord_x)-coord_x) %>% 380 | dplyr::select(c(coord1, coord2)) %>% set_rownames(sc_coord$id_new) %>% as.matrix, 381 | assay=sc_assay, key='celltrek') 382 | sc_out@reductions$celltrek <- sc_coord_dr 383 | sc_out@reductions$celltrek_raw <- sc_coord_raw_df 384 | if ('pca' %in% names(sc_data@reductions)) { 385 | sc_pca_dr <- CreateDimReducObject(embeddings=sc_data@reductions$pca@cell.embeddings[sc_coord$id_raw, ] %>% 386 | set_rownames(sc_coord$id_new) %>% as.matrix, assay=sc_assay, key='pca') 387 | sc_out@reductions$pca <- sc_pca_dr 388 | } 389 | if ('umap' %in% names(sc_data@reductions)) { 390 | sc_umap_dr <- CreateDimReducObject(embeddings=sc_data@reductions$umap@cell.embeddings[sc_coord$id_raw, ] %>% 391 | set_rownames(sc_coord$id_new) %>% as.matrix, assay=sc_assay, key='umap') 392 | sc_out@reductions$umap <- sc_umap_dr 393 | } 394 | if ('tsne' %in% names(sc_data@reductions)) { 395 | sc_tsne_dr <- CreateDimReducObject(embeddings=sc_data@reductions$tsne@cell.embeddings[sc_coord$id_raw, ] %>% 396 | set_rownames(sc_coord$id_new) %>% as.matrix, assay=sc_assay, key='tsne') 397 | sc_out@reductions$tsne <- sc_tsne_dr 398 | } 399 | } else { 400 | cat('no sc data...') 401 | sc_out <- CreateSeuratObject(counts=st_sc_int[[int_assay]]@data[, sc_coord$id_raw] %>% 402 | set_colnames(sc_coord$id_new), 403 | project='celltrek', assay=int_assay, 404 | meta.data=st_sc_int@meta.data[sc_coord$id_raw, ] %>% 405 | dplyr::rename(id_raw=id) %>% 406 | mutate(id_new=sc_coord$id_new) %>% 407 | set_rownames(sc_coord$id_new)) 408 | sc_out$coord_x <- sc_coord$coord_x[match(sc_coord$id_new, sc_out$id_new)] 409 | sc_out$coord_y <- sc_coord$coord_y[match(sc_coord$id_new, sc_out$id_new)] 410 | 411 | sc_out[[int_assay]]@counts <- matrix(nrow = 0, ncol = 0) 412 | sc_out[[int_assay]]@scale.data <- st_sc_int[[int_assay]]@scale.data[, sc_coord$id_raw] %>% set_colnames(sc_coord$id_new) 413 | sc_coord_raw_df <- CreateDimReducObject(embeddings=sc_coord_raw %>% 414 | dplyr::mutate(coord1=coord_y, coord2=max(coord_x)+min(coord_x)-coord_x) %>% 415 | dplyr::select(c(coord1, coord2)) %>% set_rownames(sc_coord_raw$id_new) %>% as.matrix, 416 | assay=sc_assay, key='celltrek_raw') 417 | sc_coord_dr <- CreateDimReducObject(embeddings = sc_coord %>% 418 | dplyr::mutate(coord1=coord_y, coord2=max(coord_x)+min(coord_x)-coord_x) %>% 419 | dplyr::select(c(coord1, coord2)) %>% 420 | set_rownames(sc_coord$id_new) %>% 421 | as.matrix, 422 | assay=int_assay, key='celltrek') 423 | sc_out@reductions$celltrek <- sc_coord_dr 424 | sc_out@reductions$celltrek_raw <- sc_coord_raw_df 425 | if ('pca' %in% names(st_sc_int@reductions)) { 426 | sc_pca_dr <- CreateDimReducObject(embeddings=st_sc_int@reductions$pca@cell.embeddings[sc_coord$id_raw, ] %>% 427 | set_rownames(sc_coord$id_new) %>% as.matrix, assay=int_assay, key='pca') 428 | sc_out@reductions$pca <- sc_pca_dr 429 | } 430 | if ('umap' %in% names(st_sc_int@reductions)) { 431 | sc_umap_dr <- CreateDimReducObject(embeddings=st_sc_int@reductions$umap@cell.embeddings[sc_coord$id_raw, ] %>% 432 | set_rownames(sc_coord$id_new) %>% as.matrix, assay=int_assay, key='umap') 433 | sc_out@reductions$umap <- sc_umap_dr 434 | } 435 | if ('tsne' %in% names(st_sc_int@reductions)) { 436 | sc_tsne_dr <- CreateDimReducObject(embeddings=st_sc_int@reductions$tsne@cell.embeddings[sc_coord$id_raw, ] %>% 437 | set_rownames(sc_coord$id_new) %>% as.matrix, assay=int_assay, key='tsne') 438 | sc_out@reductions$tsne <- sc_tsne_dr 439 | } 440 | } 441 | sc_out@images <- st_sc_int@images 442 | sc_out@images[[1]]@assay <- DefaultAssay(sc_out) 443 | sc_out@images[[1]]@coordinates <- data.frame(imagerow=sc_coord$coord_x, imagecol=sc_coord$coord_y) %>% set_rownames(sc_coord$id_new) 444 | sc_out@images[[1]]@scale.factors$spot_dis <- dist_res$spot_d 445 | sc_out@images[[1]]@scale.factors$spot_dis_intp <- spot_dis_intp 446 | 447 | output <- list(celltrek=sc_out) 448 | if (keep_model) { 449 | output[[length(output)+1]] <- dist_res$model 450 | names(output)[length(output)] <- 'model' 451 | } 452 | return(output) 453 | } 454 | --------------------------------------------------------------------------------