├── DESCRIPTION
├── NAMESPACE
├── R
    ├── SpatialPCA-package.R
    ├── SpatialPCA.R
    ├── SpatialPCA_EstimateLoading.R
    ├── SpatialPCA_SpatialPCs.R
    ├── SpatialPCA_buildKernel.R
    ├── SpatialPCA_highresolution.R
    ├── SpatialPCA_multiple_sample.R
    └── SpatialPCA_utilties.R
├── README.md
└── man
    ├── CreateSpatialPCAObject.Rd
    ├── Spatial-package.Rd
    ├── SpatialPCA-class.Rd
    ├── SpatialPCA_EstimateLoading.Rd
    ├── SpatialPCA_Multiple_Sample.Rd
    ├── SpatialPCA_SpatialPCs.Rd
    ├── SpatialPCA_buildKernel.Rd
    ├── SpatialPCA_expr_pred.Rd
    ├── SpatialPCA_highresolution.Rd
    ├── bandwidth_select.Rd
    ├── fx_CHAOS.Rd
    ├── fx_PAS.Rd
    ├── get_NMF.Rd
    ├── get_PCA.Rd
    ├── kernel_build.Rd
    ├── kernel_build_sparse.Rd
    ├── louvain_clustering.Rd
    ├── plot_RGB_UMAP.Rd
    ├── plot_RGB_tSNE.Rd
    ├── plot_cluster.Rd
    ├── plot_each_cluster.Rd
    ├── plot_factor_value.Rd
    ├── plot_trajectory.Rd
    ├── refine_cluster_10x.Rd
    └── walktrap_clustering.Rd


/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: SpatialPCA
 2 | Title: Spatially Aware Dimension Reduction for Spatial Transcriptomics
 3 | Version: 1.3.0
 4 | Authors@R: 
 5 |     c(person("Lulu", "Shang", , "shanglu@umich.edu", role = c("aut","cre")),
 6 |       person("Xiang", "Zhou", , "xzhousph@umich.edu", role = c("aut")),
 7 |       person("Michael", "Kleinsasser", , "biostat-cran-manager@umich.edu", role = c("cre")))
 8 | Description: SpatialPCA is a spatially aware dimension reduction method that explicitly accounts for the spatial correlation across tissue locations. SpatialPCA can extract a low dimensional representation of the spatial transcriptomics data with enriched biological signal and preserved spatial correlation structure, thus unlocking many existing computational tools previously developed in single-cell RNAseq studies for tailored and novel analysis of spatial transcriptomics. 
 9 | License: GPL (>= 3)
10 | Encoding: UTF-8
11 | Roxygen: list(markdown = TRUE)
12 | RoxygenNote: 7.2.0
13 | Suggests: knitr, rmarkdown
14 | Imports: methods, MASS, Matrix, RSpectra, Rtsne, SPARK, Seurat,
15 |         parallel, pdist, tidyr, umap, ggplot2, splatter, dplyr,
16 |         assertthat
17 | Remotes: xzhoulab/SPARK
18 | biocViews: bluster
19 | VignetteBuilder: knitr
20 | NeedsCompilation: no
21 | Author: Lulu Shang [aut],
22 |   Xiang Zhou [aut],
23 |   Michael Kleinsasser [cre]
24 | Maintainer: Lulu Shang <shanglu@umich.edu>
25 | Packaged: 2022-08-12 20:52:16 UTC; shanglu
26 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(CreateSpatialPCAObject)
 4 | export(SpatialPCA_EstimateLoading)
 5 | export(SpatialPCA_Multiple_Sample)
 6 | export(SpatialPCA_SpatialPCs)
 7 | export(SpatialPCA_buildKernel)
 8 | export(SpatialPCA_expr_pred)
 9 | export(SpatialPCA_highresolution)
10 | export(bandwidth_select)
11 | export(fx_CHAOS)
12 | export(fx_PAS)
13 | export(get_NMF)
14 | export(get_PCA)
15 | export(kernel_build)
16 | export(kernel_build_sparse)
17 | export(louvain_clustering)
18 | export(plot_RGB_UMAP)
19 | export(plot_RGB_tSNE)
20 | export(plot_cluster)
21 | export(plot_factor_value)
22 | export(plot_trajectory)
23 | export(refine_cluster_10x)
24 | export(walktrap_clustering)
25 | exportClasses(SpatialPCA)
26 | import(MASS)
27 | import(RSpectra)
28 | import(Rtsne)
29 | import(SPARK)
30 | import(Seurat)
31 | import(ggplot2)
32 | import(parallel)
33 | import(pdist)
34 | import(tidyr)
35 | import(umap)
36 | importFrom(grDevices,rgb)
37 | importFrom(methods,as)
38 | importFrom(methods,new)
39 | importFrom(stats,bw.SJ)
40 | importFrom(stats,bw.nrd0)
41 | importFrom(stats,dist)
42 | importFrom(stats,filter)
43 | importFrom(stats,median)
44 | importFrom(stats,na.omit)
45 | importFrom(stats,optim)
46 | 


--------------------------------------------------------------------------------
/R/SpatialPCA-package.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #' The 'SpatialPCA' package.
 3 | #'
 4 | #' @description A DESCRIPTION OF THE PACKAGE
 5 | #' 
 6 | #' @name Spatial-package
 7 | #' @aliases SpatialPCA
 8 | #' 
 9 | #' 
10 | #' @importFrom grDevices rgb
11 | #' @importFrom methods as new
12 | #' @importFrom stats bw.SJ bw.nrd0 dist filter median na.omit optim
13 | #'
14 | #'
15 | #' @references
16 | #' 
17 | #'
18 | NULL
19 | 


--------------------------------------------------------------------------------
/R/SpatialPCA.R:
--------------------------------------------------------------------------------
  1 | #####################################################################
  2 | # Package: SpatialPCA
  3 | # Version: 1.1.0
  4 | # Date : 2021-10-27
  5 | # Title : Spatially Aware Dimension Reduction for Spatial Transcriptomics 
  6 | # Authors: L. Shang and X. Zhou
  7 | # Contacts: shanglu@umich.edu 
  8 | #          University of Michigan, Department of Biostatistics
  9 | ######################################################################
 10 | 
 11 | #' Each SpatialPCA object has a number of slots which store information. Key slots to access
 12 | #' are listed below.
 13 | #'
 14 | #' @slot counts The raw expression count matrix. Rows are genes, columns are spots/cells.
 15 | #' @slot normalized_expr Normalized (by default we use SCTransform normalization in Seurat R package) expression matrix.
 16 | #' @slot project Name of the project (for record keeping).
 17 | #' @slot covariate The covariates in experiments (if any covariate included).
 18 | #' @slot location Cell/spot spatial coordinates to compute the kernel matrix.
 19 | #' @slot kernelmat The kernel matrix for spatial relationship between locations.
 20 | #' @slot kerneltype The type of kernel to be used, either "gaussian" for gaussian kernel, or "cauchy" for cauchy kernel, or "quadratic" for rational quadratic kernel.
 21 | #' @slot bandwidthtype The type of bandwidth to be used in Gaussian kernel, "SJ" for Sheather & Jones (1991) method (usually used in small sample size datasets), "Silverman" for Silverman's ‘rule of thumb’ method (1986)(usually used in large sample size datasets).
 22 | #' @slot bandwidth The bandwidth in Gaussian kernel, users can also specify their preferred bandwidth.
 23 | #' @slot sparseKernel To choose if the user wants to use a sparse kernel matrix or not. It is recommended to choose sparseKernel="TRUE" when sample size is large and you want to speed up the calculation. 
 24 | #' @slot sparseKernel_tol When sparseKernel=TRUE, sparseKernel_tol is the cut-off value when building sparse kernel matrix, any element in the kernel matrix greater than sparseKernel_tol will be kept, otherwise will be set to 0 to save memory.
 25 | #' @slot sparseKernel_ncore When sparseKernel=TRUE, sparseKernel_ncore is the number of CPU cores to use when building the sparse kernel matrix.
 26 | #' @slot fast Select "TRUE" to accrelerate the algorithm by performing low-rank approximation on the kernel matrix, otherwise "FALSE" for calculation without low-rank approximation on the kernel matrix.
 27 | #' @slot eigenvecnum When fast=TRUE, the user can optionally specify the number of top eigenvectors and eigenvalues to be used in low-rank approximation when performing eigen decomposition on the kernel matrix.
 28 | #' @slot tau The variance parameter in covariance matrix for the spatial PCs, to be inferred through the algorithm.
 29 | #' @slot sigma2_0 The residual error variance, to be inferred through the algorithm.
 30 | #' @slot SpatialPCnum The number of Spatial PCs, specified by the user, default is 20.
 31 | #' @slot W The factor loading matrix.
 32 | #' @slot SpatialPCs The estimated spatial PCs.
 33 | #' @slot highPCs The estimated high resolution spatial PCs, if needed.
 34 | #' @slot highPos The scaled locations of estimated high resolution spatial PCs, if needed.
 35 | #' @slot expr_pred The predicted gene expression on new locations when highPCs and highPos are avaliable.
 36 | #' @slot params List of model parameters.
 37 | #' @export
 38 | 
 39 | setClass("SpatialPCA", slots=list(
 40 |   counts = "ANY",
 41 |   normalized_expr = "ANY",
 42 |   project = "character",
 43 |   covariate = "ANY",
 44 |   location = "matrix", 
 45 |   kernelmat = "ANY",
 46 |   kerneltype = "character",
 47 |   bandwidthtype = "character",
 48 |   bandwidth = "numeric",
 49 |   sparseKernel="logical",
 50 |   sparseKernel_tol = "numeric",
 51 |   sparseKernel_ncore = "numeric",
 52 |   fast = "logical",
 53 |   eigenvecnum = "numeric",
 54 |   SpatialPCnum = "numeric",
 55 |   tau = "numeric",
 56 |   sigma2_0 = "numeric",
 57 |   W = "ANY",
 58 |   SpatialPCs = "ANY",
 59 |   highPCs = "ANY",
 60 |   highPos = "ANY",
 61 |   expr_pred="ANY",
 62 |   params = "ANY"
 63 | ) )
 64 | 
 65 | 
 66 | #' Create the SpatialPCA object with filtering and normalization step.
 67 | #' @param counts Gene expression count matrix (matrix), the dimension is m x n, where m is the number of genes and n is the number of locations.
 68 | #' @param location Spatial location matrix (matrix), the dimension is n x d, n is the number of locations, d is dimensin of spatial coordinates, e.g. d=2 for locations on 2D space. The rownames of locations and the colnames of count matrix should be matched.
 69 | #' @param covariate The covariates in experiments (matrix, if any covariate included), n x q, n is the number of locations, q is the number of covariates. The rownames of covariates and the rownames of locations should be matched.
 70 | #' @param project Name of the project (for record keeping).
 71 | #' @param min.loctions The features (genes) detected in at least min.loctions number of loctions, default is 20.
 72 | #' @param min.features The locations where at least min.features number of features (genes) are detected, default is 20.
 73 | #' @param gene.type The type of genes to be used: "spatial" for spatially expressed genes; "hvg" for highly variable genes; "custom" for user specified genes, default is "spatial".
 74 | #' @param gene.number The number of top highly variable genes if gene.selection=="hvg" (use all HVG genes if this number is not specified); 
 75 | #' number of top spatially expressed genes if gene.selection=="spatial" (use all significant spatially expressed genes if this number is not specified).
 76 | #' @param customGenelist A list of user specified genes if  gene.type=="custom".
 77 | #' @param sparkversion In spatial gene selection, specify "spark" for small sample size data for higher detection power of spatial genes, "sparkx" for large sample size data for saving time and memory.
 78 | #' @param numCores_spark If gene.type="spatial", specify the number of CPU cores in SPARK package to use when selecting spatial genes.
 79 | #' @return Returns SpatialPCA object, with filtered and normalized gene expression matrix and corresponding location matrix.
 80 | #' 
 81 | #' @import Seurat
 82 | #' @import SPARK
 83 | #' 
 84 | #' @examples 
 85 | #' 
 86 | #' 
 87 | #' 
 88 | #' @export
 89 | CreateSpatialPCAObject <- function(counts, location, covariate=NULL,project = "SpatialPCA", gene.type="spatial", sparkversion="spark",numCores_spark=1, gene.number=3000,customGenelist=NULL,min.loctions = 20,  min.features=20){
 90 |   
 91 |   #suppressMessages(require(Seurat))
 92 | 	#suppressMessages(require(SPARK))
 93 | 
 94 | 	## check dimension
 95 | 	if(ncol(counts)!=nrow(location)){
 96 | 		stop("The number of cells in counts and location should be consistent (counts -- m genes x n locations; location -- n locations x d dimension).")
 97 | 	}# end fi
 98 |   
 99 | 	## check data order should consistent
100 | 	if(!identical(colnames(counts), rownames(location))){
101 | 		stop("The column names of counts and row names of location should be should be matched (counts -- m genes x n locations; location -- n locations x d dimension).")
102 | 	}# end fi
103 |  
104 | 
105 | 	## inheriting
106 | 	object <- new(
107 | 		Class = "SpatialPCA",
108 | 		counts = counts,
109 | 		location = location,
110 | 		project = project
111 | 	)
112 |   
113 |     if(!is.null(covariate)){
114 | 		  	## check data order should consistent
115 | 			if(!identical(rownames(covariate), rownames(location))){
116 | 				stop("The row names of covariate and row names of location should be should be matched (covariate -- n locations x q covariates; location -- n locations x d dimension).")
117 | 			}# end fi
118 | 			
119 | 			q=dim(covariate)[2]
120 | 			n_covariate=dim(covariate)[1]
121 | 		# remove the intercept if added by user, later intercept will add automatically
122 | 			if(length(unique(covariate[,1])) == 1){
123 | 				covariate = covariate[, -1]
124 | 				q=q-1
125 | 			}# end fi
126 | 		
127 | 		object@covariate = as.matrix(covariate,n_covariate,q)
128 | 
129 | 	}# end fi
130 | 
131 |  
132 | 
133 |   Seu <- CreateSeuratObject(counts = counts, project = project, min.cells = min.loctions, min.features = min.features)
134 | 	
135 | 	object@counts <- counts # store count matrix in sparse matrix
136 | 	object@location <- location
137 | 	object@project <- project
138 | 
139 | 	rm(counts) # to save memory
140 | 	rm(location)
141 | 
142 | 	if(!is.null(customGenelist)){ # if user specified customGenelist
143 | 
144 | 								cat(paste("## Use SCTransform function in Seurat to normalize data. \n"))
145 | 								Seu = SCTransform(Seu, return.only.var.genes = FALSE, variable.features.n = NULL,  variable.features.rv.th = 1.3)
146 | 						 		cat(paste("## Custom gene list contains ",length(customGenelist)," genes. \n"))
147 | 						 		customGenelist = as.character(customGenelist)
148 | 						 		ind_match = na.omit(match(customGenelist, rownames(Seu@assays$SCT@scale.data)))
149 | 						 		cat(paste("## In total ",length(ind_match)," custom genes are matched with genes in the count matrix. \n"))
150 | 						 		object@normalized_expr = Seu@assays$SCT@scale.data[ind_match,]
151 | 						 		cat(paste("## Use ",length(ind_match)," custom genes for analysis. \n"))
152 | 
153 | 	}else{		# if user didn't specify customGenelist
154 | 
155 |   	if(gene.type=="hvg"){
156 | 
157 | 								cat(paste("## Use SCTransform function in Seurat to normalize data. \n"))
158 | 
159 | 								if(is.null(gene.number)){
160 | 
161 | 								Seu = SCTransform(Seu, return.only.var.genes = TRUE, variable.features.n = NULL,  variable.features.rv.th = 1.3)
162 | 								object@normalized_expr = Seu@assays$SCT@scale.data
163 | 								gene.number = dim(Seu@assays$SCT@scale.data)[1]
164 | 								cat(paste("## Gene number is not specified, using all ",gene.number," highly variable genes. \n"))  		
165 | 
166 | 								}else{
167 | 
168 | 								Seu = SCTransform(Seu, return.only.var.genes = TRUE, variable.features.n = NULL,  variable.features.rv.th = 1.3)
169 | 								hvg_gene_num = dim(Seu@assays$SCT@scale.data)[1]
170 | 
171 | 								if( gene.number < hvg_gene_num ){
172 | 
173 | 								object@normalized_expr = Seu@assays$SCT@scale.data[1:gene.number,]
174 | 								
175 | 								cat(paste("## Using top ",gene.number," highly variable genes. \n"))
176 | 
177 | 								}else{
178 | 
179 | 								object@normalized_expr = Seu@assays$SCT@scale.data
180 | 								cat("The  number of highly variable genes is less than the specified number of genes. \n")
181 | 								cat(paste("## Using ",hvg_gene_num," highly variable genes for analysis. \n"))
182 | 
183 | 								}
184 | 
185 | 								}
186 |   	}else if(gene.type=="spatial"){
187 | 
188 | 						  	# normalize data
189 | 						  	cat(paste("## Use SCTransform function in Seurat to normalize data. \n"))
190 | 						  	Seu = SCTransform(Seu, return.only.var.genes = FALSE, variable.features.n = NULL,  variable.features.rv.th = 1.3)
191 | 						 	
192 | 				  	# select spatial genes
193 | 				 	if(sparkversion=="spark"){
194 | 												cat(paste("## Use spark.test function in SPARK package to select spatially variable genes. \n"))
195 | 				                #suppressMessages(require(SPARK))
196 | 				                count_test_spark = object@counts[na.omit(match(rownames(Seu@assays$SCT@scale.data), rownames(object@counts))), na.omit(match(colnames(Seu@assays$SCT@scale.data),colnames(object@counts)))]
197 | 				                location_test_spark = as.data.frame(object@location[match(colnames(Seu@assays$SCT@scale.data), rownames(object@location)), ])
198 | 				                spark_result <- spark(count_test_spark, location_test_spark,numCores = numCores_spark)
199 | 				                significant_gene_number = sum(spark_result@res_mtest$adjusted_pvalue <= 0.05)
200 | 				                SVGnames = rownames(spark_result@res_mtest[order(spark_result@res_mtest$adjusted_pvalue),])[1:significant_gene_number]
201 | 				                cat(paste("## Identified ", length(SVGnames)," spatial genes through spark.test function. \n"))
202 | 					}else if(sparkversion=="sparkx"){
203 | 
204 | 												cat(paste("## Use sparkx function in SPARK to select spatially variable genes. \n"))
205 | 												count_test_spark = object@counts[na.omit(match(rownames(Seu@assays$SCT@scale.data), rownames(object@counts))),na.omit(match(colnames(Seu@assays$SCT@scale.data), colnames(object@counts)))]
206 | 												location_test_spark = as.data.frame(object@location[match(colnames(Seu@assays$SCT@scale.data), rownames(object@location)),])
207 | 												location_test_spark = as.matrix(location_test_spark)
208 | 												sparkX <- sparkx(	count_test_spark, location_test_spark,numCores=numCores_spark)
209 | 												significant_gene_number = sum(sparkX$res_mtest$adjustedPval<=0.05)
210 | 												SVGnames = rownames(sparkX$res_mtest[order(sparkX$res_mtest$adjustedPval),])[1:significant_gene_number]
211 | 												cat(paste("## Identified ",length(SVGnames)," spatial genes through SPARK-X function. \n"))
212 | 					}
213 | 
214 | 					# subset normalized data with spatial genes
215 | 					if(is.null(gene.number)){
216 | 						
217 | 						object@normalized_expr = Seu@assays$SCT@scale.data[na.omit(match(SVGnames, rownames(Seu@assays$SCT@scale.data))),]
218 | 						cat(paste("## Gene number is not specified, we use all ",gene.number," spatially variable genes. \n")) 
219 | 					
220 | 					}else {
221 | 
222 | 						if(length(SVGnames) < gene.number){
223 | 							cat("The  number of significant spatial genes is less than the specified number of spatial genes. \n")
224 | 							cat(paste("## Using ",length(SVGnames)," significant spatially variable genes. \n"))
225 | 							object@normalized_expr = Seu@assays$SCT@scale.data[na.omit(match(SVGnames, rownames(Seu@assays$SCT@scale.data))),]
226 | 						}else{
227 | 							cat(paste("## Using top ",gene.number," significant spatially variable genes. \n"))
228 | 							object@normalized_expr = Seu@assays$SCT@scale.data[na.omit(match(SVGnames[1:gene.number], rownames(Seu@assays$SCT@scale.data))),]
229 | 						}
230 | 					}
231 | 
232 | }
233 | 
234 | }
235 | 
236 | 					#  location for normalized expression matrix
237 | 					object@location = object@location[match(colnames(object@normalized_expr), rownames(object@location)),]
238 | 
239 | 					#  covariates, i.e., confounding or batch effects
240 | 					if(!is.null(covariate)){
241 | 						object@covariate = object@covariate[match(colnames(object@normalized_expr), rownames(object@location)),1:q]
242 | 						object@covariate = as.matrix(object@covariate,dim(object@normalized_expr)[2],q )
243 | 					}
244 | 
245 | 				 	## store count matrix as a sparse matrix
246 | 					if(class(object@counts)[1] != "dgCMatrix" ){
247 | 						object@counts <- as(object@counts, "dgCMatrix")
248 | 					}# end fi
249 | 
250 | 					
251 | 					object@params = list()
252 | 
253 | 					rm(Seu)
254 | 
255 | 					return(object)
256 | }# end function
257 | 
258 | 
259 | #' @import SPARK
260 | spark = function(rawcount, location, numCores){
261 | 	# library(SPARK)
262 | 	location = as.data.frame(location)
263 | 	rownames(location) = colnames(rawcount)
264 | 
265 | 	spark <- CreateSPARKObject(counts=rawcount, location=location,percentage = 0.1, min_total_counts = 10)
266 | 	spark@lib_size <- apply(rawcount, 2, sum)
267 | 	spark <- spark.vc(spark, 
268 |           covariates = NULL, 
269 |                     lib_size = spark@lib_size, 
270 |                     num_core = numCores,
271 |                     verbose = F,
272 |                     fit.model="gaussian")
273 | 	spark <- spark.test(spark, 
274 |                     check_positive = T, 
275 |                     verbose = F)
276 | 	return(spark)  
277 | }
278 | 
279 | 
280 | 
281 | 
282 | 
283 | 
284 | 
285 | 
286 | 
287 | 
288 | 
289 | 


--------------------------------------------------------------------------------
/R/SpatialPCA_EstimateLoading.R:
--------------------------------------------------------------------------------
  1 | ########################################################################################################################
  2 | # Package: SpatialPCA
  3 | # Version: 1.1.0
  4 | # Date   : 2021-10-27
  5 | # Title : Spatially Aware Dimension Reduction for Spatial Transcriptomics
  6 | # Authors: L. Shang and X. Zhou
  7 | # Contacts: shanglu@umich.edu
  8 | #          University of Michigan, Department of Biostatistics
  9 | ####################################################################################################
 10 | 
 11 | 
 12 | #' Calculate loading matrix.
 13 | #'
 14 | #' @param object SpatialPCA object.
 15 | #' @param maxiter Maximum iteration number. Default is 300.
 16 | #' @param initial_tau Initial value of tau. Default is 1. Because we need tau to be positive, we calculate exp(log(tau)) during iterations.
 17 | #' @param fast Select "TRUE" if the user wants to use low-rank approximation on the kernel matrix to accelerate the algorithm, otherwise select "FALSE".
 18 | #' @param eigenvecnum When fast=TRUE, eigenvecnum is the number of top eigenvectors and eigenvalues to be used in low-rank approximation in the eigen decomposition step for kernel matrix.
 19 | #' The default is NULL, if specified, it is recommended to use eigenvecnum=20 when sample size is large (e.g. >5,000). When sample size is small, eigenvecnum is suggested to explain at least 90% variance.
 20 | #' @param SpatialPCnum Number of spatial PCs.
 21 | #' @return Returns SpatialPCA object with estimated loading matrix W.
 22 | #'
 23 | #' @import RSpectra
 24 | #'
 25 | #' @export
 26 | #'
 27 | SpatialPCA_EstimateLoading = function(object, maxiter=300,initial_tau=1,fast=FALSE,eigenvecnum=NULL,SpatialPCnum=20){
 28 | 
 29 |       suppressMessages(require(RSpectra))
 30 |       set.seed(1234)
 31 |       param_ini=log(initial_tau)
 32 |       object@SpatialPCnum = SpatialPCnum
 33 |       object@fast = fast
 34 |       object@params$X = scale(object@location)
 35 |       object@params$n = dim(object@params$X)[1]
 36 |       object@params$p=dim(object@params$X)[2]
 37 | 
 38 |   if(is.null(object@covariate)){
 39 |       object@params$H = matrix(1, dim(object@params$X)[1],1)
 40 |       HH_inv=solve(t(object@params$H)%*%object@params$H,tol = 1e-40)
 41 |       HH = object@params$H%*%HH_inv%*%t(object@params$H)
 42 |       object@params$M=diag(object@params$n)-HH
 43 |       # Y=expr
 44 |       object@params$tr_YMY=sum(diag(object@params$expr%*%object@params$M%*%t(object@params$expr)))
 45 |       object@params$YM = object@params$expr%*%object@params$M
 46 |       object@params$q=1
 47 |   }else{
 48 |       object@params$q = dim(object@covariate)[2]+1
 49 |       object@params$H = matrix(0, object@params$n,object@params$q)
 50 |       object@params$H[,1]=1
 51 |       object@params$H[,2:object@params$q] = object@covariate
 52 |       HH_inv=solve(t(object@params$H)%*%object@params$H,tol = 1e-40)
 53 |       HH=object@params$H%*%HH_inv%*%t(object@params$H)
 54 |       object@params$M=diag(object@params$n)-HH
 55 |       #Y=expr
 56 |       object@params$tr_YMY=sum(diag(object@params$expr%*%object@params$M%*%t(object@params$expr)))
 57 |       object@params$YM = object@params$expr%*%object@params$M
 58 |   }
 59 | 
 60 | 
 61 |   if(fast==FALSE){
 62 |       object@fast=fast
 63 |       print("Eigen decomposition on kernel matrix!")
 64 |       eigen_res = eigen(object@kernelmat)
 65 |       object@params$delta = eigen_res$values
 66 |       object@params$U = eigen_res$vectors
 67 |       print("Using all eigenvectors and eigenvalues in the Kernel matrix!")
 68 |   }else{
 69 |       object@fast=fast
 70 |       if(!is.null(eigenvecnum)){
 71 |         print("Eigen decomposition on kernel matrix!")
 72 |         object@eigenvecnum=eigenvecnum
 73 |         if(object@sparseKernel==TRUE){
 74 |           eigen_res = eigs_sym(object@kernelmat, k=object@eigenvecnum)
 75 |           object@params$delta = eigen_res$values
 76 |           object@params$U = eigen_res$vectors
 77 |           }else{
 78 |           eigen_res = eigs_sym(object@kernelmat, k=object@eigenvecnum, which = "LM")
 79 |           object@params$delta = eigen_res$values
 80 |           object@params$U = eigen_res$vectors
 81 |         }
 82 | 
 83 |         print("Low rank approximation!")
 84 |         print(paste0("Using user selected top ",object@eigenvecnum," eigenvectors and eigenvalues in the Kernel matrix!"))
 85 |       }else if(object@params$n>5000){
 86 |         print("Eigen decomposition on kernel matrix!")
 87 |         if(object@sparseKernel==TRUE){
 88 |           eigen_res = eigs_sym(object@kernelmat, k=20)
 89 |           object@params$delta = eigen_res$values
 90 |           object@params$U = eigen_res$vectors
 91 |           }else{
 92 |           eigen_res = eigs_sym(object@kernelmat, k=20, which = "LM")
 93 |           object@params$delta = eigen_res$values
 94 |           object@params$U = eigen_res$vectors
 95 |         }
 96 |         print("Low rank approximation!")
 97 |         print("Large sample, using top 20 eigenvectors and eigenvalues in the Kernel matrix!")
 98 |         }else{
 99 |         eigen_res = eigen(object@kernelmat)
100 |         delta_all = eigen_res$values
101 |         U_all = eigen_res$vectors
102 |         ind = which(cumsum(delta_all/length(delta_all))>0.9)[1]
103 |         print("Low rank approximation!")
104 |         print(paste0("Small sample, using top ",ind," eigenvectors and eigenvalues in the Kernel matrix!"))
105 |         object@params$delta = delta_all[1:ind]
106 |         object@params$U = U_all[,1:ind]
107 |         rm(U_all)
108 |         }
109 |     }
110 | 
111 | 
112 |     object@params$MYt = object@params$M %*% t(object@params$expr)
113 |     object@params$YMMYt = object@params$YM %*% object@params$MYt
114 |     object@params$YMU = object@params$YM %*% object@params$U
115 |     object@params$Xt = t(object@params$H)
116 |     object@params$XtU = object@params$Xt %*% object@params$U
117 |     object@params$Ut = t(object@params$U)
118 |     object@params$UtX = object@params$Ut %*% object@params$H
119 |     object@params$YMX = object@params$YM %*% object@params$H
120 |     object@params$UtU = object@params$Ut %*% object@params$U
121 |     object@params$XtX = object@params$Xt %*% object@params$H
122 |     object@params$SpatialPCnum = SpatialPCnum
123 | 
124 | 
125 |     optim_result =try(optim(param_ini, SpatialPCA_estimate_parameter,params=object@params,control = list(maxit = maxiter), lower = -10, upper = 10,method="Brent"),silent=T)
126 | 
127 |     object@tau = exp(optim_result$par)
128 |     k = dim(object@params$expr)[1]
129 |     n = dim(object@params$expr)[2]
130 |     q=object@params$q
131 |     tauD_UtU_inv = solve(object@tau*diag(object@params$delta) + object@params$UtU, tol = 1e-40)
132 |     YMU_tauD_UtU_inv_Ut = object@params$YMU %*% tauD_UtU_inv %*% object@params$Ut
133 |     YMU_tauD_UtU_inv_UtX = YMU_tauD_UtU_inv_Ut %*% object@params$H
134 |     XtU_inv_UtX = object@params$XtU %*% tauD_UtU_inv %*% object@params$UtX
135 |     left = object@params$YMX - YMU_tauD_UtU_inv_UtX
136 |     right = t(left)
137 |     middle = solve(-XtU_inv_UtX, tol = 1e-40)
138 |     G_each = object@params$YMMYt - YMU_tauD_UtU_inv_Ut %*% object@params$MYt - left %*% middle %*% right
139 |     object@W = eigs_sym(G_each, k=SpatialPCnum, which = "LM")$vectors
140 |     object@sigma2_0 = as.numeric((object@params$tr_YMY+F_funct_sameG(object@W,G_each))/(k*(n-q)))
141 | 
142 |     rm(eigen_res)
143 |     rm(tauD_UtU_inv)
144 |     rm(YMU_tauD_UtU_inv_Ut)
145 |     rm(YMU_tauD_UtU_inv_UtX)
146 |     rm(XtU_inv_UtX)
147 |     rm(left)
148 |     rm(right)
149 |     rm(middle)
150 |     rm(G_each)
151 |     gc()
152 | 
153 |   return(object)
154 | }
155 | 
156 | 
157 | #' @import RSpectra
158 | SpatialPCA_estimate_parameter = function(param_ini, params){
159 |     # suppressMessages(require(RSpectra))
160 |     set.seed(1234)
161 |     tau=exp(param_ini[1])
162 |     k = dim(params$expr)[1]
163 |     n = dim(params$expr)[2]
164 |     q=params$q
165 |     PCnum=params$SpatialPCnum
166 |     tauD_UtU_inv = solve(tau*diag(params$delta) + params$UtU, tol = 1e-40)
167 |     YMU_tauD_UtU_inv_Ut = params$YMU %*% tauD_UtU_inv %*% params$Ut
168 |     YMU_tauD_UtU_inv_UtX = YMU_tauD_UtU_inv_Ut %*% params$H
169 |     XtU_inv_UtX = params$XtU %*% tauD_UtU_inv %*% params$UtX
170 |     left = params$YMX - YMU_tauD_UtU_inv_UtX
171 |     right = t(left)
172 |     middle = solve(-XtU_inv_UtX, tol = 1e-40)
173 |     G_each = params$YMMYt - YMU_tauD_UtU_inv_Ut %*% params$MYt - left %*% middle %*% right
174 |     log_det_tauK_I = determinant(1/tau*diag(1/params$delta)+ params$UtU, logarithm=TRUE)$modulus[1] + determinant(tau*diag(params$delta), logarithm=TRUE)$modulus[1]
175 |     Xt_invmiddle_X = params$XtX - params$XtU %*% solve(params$UtU + 1/tau *diag( 1/params$delta) , tol = 1e-40) %*% params$UtX
176 |     log_det_Xt_inv_X = determinant(Xt_invmiddle_X, logarithm=TRUE)$modulus[1]
177 |     sum_det=0
178 |     sum_det=sum_det+(0.5*log_det_tauK_I+0.5*log_det_Xt_inv_X  )*PCnum
179 | 
180 |     rm(tauD_UtU_inv)
181 |     rm(YMU_tauD_UtU_inv_Ut)
182 |     rm(YMU_tauD_UtU_inv_UtX)
183 |     rm(XtU_inv_UtX)
184 |     rm(left)
185 |     rm(middle)
186 |     rm(right)
187 |     rm(Xt_invmiddle_X)
188 |     gc()
189 | 
190 |     W_est_here = eigs_sym(G_each, k=PCnum, which = "LM")$vectors
191 |     -(-sum_det -(k*(n-q))/2*log(params$tr_YMY+F_funct_sameG(W_est_here,G_each)))
192 | }
193 | 
194 | 
195 | F_funct_sameG = function(X,G){ # G is a matrix
196 |   return_val=0
197 |   for(i in 1: dim(X)[2]){
198 |     return_val=return_val+t(X[,i])%*%G%*%X[,i]
199 |   }
200 |   -return_val
201 | }
202 | 
203 | 
204 | 
205 | 
206 | 


--------------------------------------------------------------------------------
/R/SpatialPCA_SpatialPCs.R:
--------------------------------------------------------------------------------
 1 | ########################################################################################################################
 2 | # Package: SpatialPCA
 3 | # Version: 1.1.0
 4 | # Date   : 2021-10-27
 5 | # Title : Spatially Aware Dimension Reduction for Spatial Transcriptomics 
 6 | # Authors: L. Shang and X. Zhou
 7 | # Contacts: shanglu@umich.edu 
 8 | #          University of Michigan, Department of Biostatistics
 9 | ####################################################################################################
10 | 
11 | 
12 | #' Calculating Spatial PCs (latent factor matrix Z).
13 | #' @param object SpatialPCA object.
14 | #' @param fast Select fast=TRUE if the user wants to use low-rank approximation on the kernel matrix to calculate the spatial PCs, otherwise select FALSE. 
15 | #' @param eigenvecnum When fast=TRUE, eigenvecnum is the number of top eigenvectors and eigenvalues to be used in low-rank approximation in the eigen decomposition step for kernel matrix. 
16 | #' The default is NULL, if specified, it is recommended that these top eigen values explain >=90% of the variance. 
17 | #' In estimating spatial PCs, we need larger number of eigenvectors in kernel matrix for more accurate estimation.
18 | #' @return Returns SpatialPCA object with estimated Spatial PCs.
19 | #' 
20 | #' @import RSpectra
21 | #' 
22 | #' @export
23 | SpatialPCA_SpatialPCs= function(object,fast=FALSE,eigenvecnum=NULL){
24 | 
25 | 	# suppressMessages(require(RSpectra))
26 | 
27 |     n = object@params$n
28 |     PCnum = object@SpatialPCnum
29 |     Z_hat = matrix(0, PCnum, n)
30 |     tau = object@tau
31 |     W_hat = object@W
32 | 
33 | if(fast==FALSE){
34 |         U=object@params$U
35 |         delta=object@params$delta
36 | }else if(fast==TRUE){
37 |     
38 |     if(!is.null(eigenvecnum)){
39 |         print(paste0("Low rank approximation!"))
40 |         print(paste0("Using user selected top ",eigenvecnum," eigenvectors and eigenvalues in the Kernel matrix!"))
41 |         EIGEN = eigs_sym(object@kernelmat, k=eigenvecnum, which = "LM")
42 |         U=EIGEN$vectors
43 |         delta=EIGEN$values
44 | 
45 |     }else if(n>5000){
46 |         fast_eigen_num = ceiling(n*0.1)
47 |         print(paste0("Low rank approximation!"))
48 |         print("Large sample, using top 10% sample size of eigenvectors and eigenvalues in the Kernel matrix!")
49 |         EIGEN = eigs_sym(object@kernelmat, k=fast_eigen_num, which = "LM")
50 |         U=EIGEN$vectors
51 |         delta=EIGEN$values
52 |     }else{
53 |         U=object@params$U
54 |         delta=object@params$delta
55 |         ind=length(delta)
56 |         #print(paste0("Low rank approximation!"))
57 |         print(paste0("Small sample, using top ",ind," eigenvectors and eigenvalues in the Kernel matrix!"))
58 |     }
59 | }
60 |     object@params$U=U
61 |     object@params$delta=delta
62 | 
63 |     W_hat_t = t(W_hat)
64 |     WtYM = W_hat_t%*% object@params$YM
65 |     WtYMK = WtYM %*% object@kernelmat
66 |     WtYMU = WtYM %*% object@params$U
67 |     Ut=t(object@params$U)
68 |     UtM = Ut %*% object@params$M
69 |     UtMK = UtM %*% object@kernelmat
70 |     UtMU = UtM %*% object@params$U
71 |     middle_inv = solve(1/tau * diag(1/delta) + UtMU, tol = 1e-40)
72 |     
73 |     object@SpatialPCs = tau*WtYMK - tau*WtYMU %*% middle_inv %*% UtMK
74 |     object@SpatialPCs = as.matrix(object@SpatialPCs)
75 |     rm(W_hat_t)
76 |     rm(WtYM)
77 |     rm(WtYMK)
78 |     rm(WtYMU)
79 |     rm(Ut)
80 |     rm(UtM)
81 |     rm(UtMK)
82 |     rm(UtMU)
83 |     rm(middle_inv)
84 |     gc()
85 | 
86 | 
87 |     return(object)
88 | }
89 | 
90 | 
91 | 
92 | 
93 | 
94 | 
95 | 
96 | 
97 | 


--------------------------------------------------------------------------------
/R/SpatialPCA_buildKernel.R:
--------------------------------------------------------------------------------
  1 | ########################################################################################################################
  2 | # Package: SpatialPCA
  3 | # Version: 1.1.0
  4 | # Date   : 2021-10-27
  5 | # Title : Spatially Aware Dimension Reduction for Spatial Transcriptomics
  6 | # Authors: L. Shang and X. Zhou
  7 | # Contacts: shanglu@umich.edu
  8 | #          University of Michigan, Department of Biostatistics
  9 | ####################################################################################################
 10 | 
 11 | 
 12 | #' Calculating kernel matrix from spatial locations.
 13 | #'
 14 | #' @param object SpatialPCA object.
 15 | #' @param kerneltype The type of kernel to be used, either "gaussian", or "cauchy" for cauchy kernel, or "quadratic" for rational quadratic kernel, and "delaunday" for gaussian kernel built with non-linear Delaunay triangulation based distance.
 16 | #' @param bandwidthtype The type of bandwidth to be used in Gaussian kernel, "SJ" for Sheather & Jones (1991) method (usually used in small size datasets), "Silverman" for Silverman's ‘rule of thumb’ method (1986)(usually used in large size datasets).
 17 | #' @param bandwidth.set.by.user User could select their own bandwidth (a numeric value) if the recommended bandwidth doesn't work in their dataset.
 18 | #' @param sparseKernel Select "TURE" if the user wants to use a sparse kernel matrix or "FALSE" if not. It is recommended to choose sparseKernel="TRUE" when sample size is large.
 19 | #' @param sparseKernel_tol When sparseKernel=TRUE, the cut-off value when building sparse kernel matrix, any element in the kernel matrix greater than sparseKernel_tol will be kept, otherwise will be set to 0 to save memory.
 20 | #' @param sparseKernel_ncore When sparseKernel=TRUE, the number of CPU cores to build sparse kernel matrix.
 21 | #' @export
 22 | SpatialPCA_buildKernel = function(object, kerneltype="gaussian", bandwidthtype="SJ",bandwidth.set.by.user=NULL,sparseKernel=FALSE,sparseKernel_tol=1e-20,sparseKernel_ncore=1) {
 23 | 
 24 | 	## extract the data from the slot of object, createSpatialPCAobject() function goes first
 25 | 	if(length(object@counts) == 0) {
 26 |         stop("object@counts has not been set. Run CreateSpatialPCAObject() first and then retry.")
 27 |     }# end fi
 28 | 
 29 | 	object@kerneltype = kerneltype
 30 | 	object@bandwidthtype = bandwidthtype
 31 | 
 32 | 	cat(paste("## Selected kernel type is: ", object@kerneltype," \n"))
 33 | 
 34 | 	#************************************************************#
 35 | 	#    Calculate the bandwidth for kernel matrix               #
 36 | 	#************************************************************#
 37 | 
 38 | 	#cat(paste("## Scale the expression of each gene. \n"))
 39 | 	expr=object@normalized_expr
 40 |  	for(i in 1:dim(object@normalized_expr)[1]){
 41 |   		expr[i,] = scale(object@normalized_expr[i,])
 42 | 	}
 43 | 
 44 | 	if(is.null(bandwidth.set.by.user)){
 45 | 		object@bandwidth = bandwidth_select(expr, method=object@bandwidthtype)
 46 | 	}else{
 47 | 		object@bandwidth = bandwidth.set.by.user
 48 | 	}
 49 | 	object@params$expr=expr
 50 | 
 51 | 	rm(expr)
 52 | 
 53 | 	cat(paste("## The bandwidth is: ", object@bandwidth," \n"))
 54 | 
 55 |   	#************************************************************#
 56 | 	#    Calculate the kernel matrix with above bandwidth        #
 57 | 	#************************************************************#
 58 | 
 59 | 	location_normalized = scale(object@location)
 60 | 
 61 | 	if(sparseKernel==FALSE){
 62 | 		cat(paste("## Calculating kernel matrix\n"))
 63 |   		object@kernelmat = kernel_build(kerneltype=object@kerneltype,location=location_normalized, bandwidth=object@bandwidth)
 64 |   		object@sparseKernel=sparseKernel
 65 |   	}else if(sparseKernel==TRUE){
 66 |   		cat(paste("## Calculating sparse kernel matrix\n"))
 67 |   		object@sparseKernel=sparseKernel
 68 |   		object@sparseKernel_tol = sparseKernel_tol
 69 | 		object@sparseKernel_ncore = sparseKernel_ncore
 70 | 		object@kernelmat = kernel_build_sparse(kerneltype=object@kerneltype,location=location_normalized, bandwidth=object@bandwidth,tol = object@sparseKernel_tol, ncores=object@sparseKernel_ncore)
 71 | 
 72 |   	}
 73 | 
 74 |   	cat(paste("## Finished calculating kernel matrix.\n"))
 75 | 
 76 | 	# return results
 77 | 	return(object)
 78 | }# end function
 79 | 
 80 | 
 81 | 
 82 | #' @title Select bandwidth in Gaussian kernel.
 83 | #' @description This function selects bandwidth in Gaussian kernel.
 84 | #' @param expr A m gene by n location matrix of normalized gene expression matrix.
 85 | #' @param method The method used in bandwidth selection, "SJ" usually for small sample size data, "Silverman" usually for large sample size data.
 86 | #' @return A numeric value of calculated bandwidth.
 87 | #' @export
 88 | bandwidth_select=function (expr, method)
 89 | {
 90 |     N = dim(expr)[2]
 91 |     if (method == "SJ") {
 92 | 
 93 |               bw_SJ = c()
 94 |         for (i in 1:dim(expr)[1]) {
 95 |             tryCatch({
 96 |               #print(i)
 97 |             bw_SJ[i] = bw.SJ(expr[i, ], method = "dpi")
 98 |              }, error=function(e){cat("Gene",i," :",conditionMessage(e), "\n")})
 99 |         }
100 | 
101 |         beta = median(na.omit(bw_SJ))
102 |     }
103 |     else if (method == "Silverman") {
104 |         bw_Silverman = c()
105 |         for (i in 1:dim(expr)[1]) {
106 |             tryCatch({
107 |             bw_Silverman[i] = bw.nrd0(expr[i, ])
108 |             }, error=function(e){cat("Gene",i," :",conditionMessage(e), "\n")})
109 |         }
110 |         beta = median(na.omit(bw_Silverman))
111 |     }
112 | }
113 | 
114 | 
115 | 
116 | #' @title Build kernel matrix.
117 | #' @description This function calculates kernel matrix from spatial locations.
118 | #' @param kerneltype The type of kernel to be used, either "gaussian", or "cauchy" for cauchy kernel, or "quadratic" for rational quadratic kernel, and "delaunday" for gaussian kernel built with non-linear Delaunay triangulation based distance.
119 | #' @param location A n by d matrix of cell/spot location coordinates.
120 | #' @param bandwidth A numeric value of bandwidth.
121 | #' @return The kernel matrix for spatial relationship between locations.
122 | #' @export
123 | kernel_build = function (kerneltype = "gaussian", location, bandwidth)
124 | {
125 |     if (kerneltype == "gaussian") {
126 |         K = exp(-1*as.matrix(dist(location)^2)/bandwidth)
127 |     }
128 |     else if (kerneltype == "cauchy") {
129 |         K = 1/(1 + 1*as.matrix(dist(location)^2)/as.numeric(bandwidth))
130 |     }
131 |     else if (kerneltype == "quadratic") {
132 |     	ED2=1*as.matrix(dist(location)^2)
133 |         K = 1 - ED2/(ED2 + as.numeric(bandwidth))
134 |     }else if (kerneltype == "delaunday") {
135 |     	require(spatstat.geom)
136 |     	tmp <- ppp(location[,1], location[,2],window=owin(c(-3,3),c(-3,3)))
137 | 		Delaunay_dist=delaunayDistance(tmp)
138 | 		K = exp(-Delaunay_dist^2/30) 
139 |     }
140 |     return(K)
141 | }
142 | 
143 | 
144 | 
145 | 
146 | #' @title Build sparse kernel matrix.
147 | #' @description This function calculates kernel matrix.
148 | #' @param kerneltype The type of kernel to be used, either "gaussian", or "cauchy" for cauchy kernel, or "quadratic" for rational quadratic kernel.
149 | #' @param location A n by d matrix of cell/spot location coordinates.
150 | #' @param bandwidth A numeric value of bandwidth.
151 | #' @param tol A numeric value of cut-off value when building sparse kernel matrix.
152 | #' @param ncores A integer value of number of CPU cores to use when building sparse kernel matrix.
153 | #' @return The sparse kernel matrix for spatial relationship between locations.
154 | #'
155 | #' @import parallel
156 | #' @import MASS
157 | #' @import pdist
158 | #' @import tidyr
159 | #'
160 | #' @export
161 | kernel_build_sparse = function(kerneltype,location, bandwidth,tol, ncores)
162 | {
163 | 
164 | 	# suppressMessages(require(tidyr))
165 | 	# suppressMessages(require(parallel))
166 | 	# suppressMessages(require(MASS))
167 | 	# suppressMessages(require(pdist))
168 | 	# suppressMessages(require(Matrix))
169 | 
170 | 	if (kerneltype == "gaussian") {
171 | 		fx_gaussian <- function(i){
172 | 			line_i = rep(0,dim(location)[1])
173 | 			line_i[i] = 1
174 | 			line_i[-i] = exp(-(pdist(location[i,],location[-i,])@dist^2)/bandwidth)
175 | 			ind_i=which(line_i>=tol)
176 | 			return(list("ind_i"=ind_i,"ind_j"=rep(i,length(ind_i)),"val_i"=line_i[ind_i] ))
177 | 		}
178 | 
179 | 		results = mclapply(1:dim(location)[1], fx_gaussian, mc.cores = ncores)
180 |   		tib = tibble(results)  %>%  unnest_wider(results)
181 | 		K_sparse = Matrix::sparseMatrix(i =unlist(tib[[1]]), j= unlist(tib[[2]]), x= unlist(tib[[3]]),  dims = c(dim(location)[1],dim(location)[1] ))
182 |         #K = exp(-1*as.matrix(dist(location)^2)/bandwidth)
183 |     } else if (kerneltype == "cauchy") {
184 |     	fx_cauchy <- function(i){
185 | 			line_i = rep(0,dim(location)[1])
186 | 			line_i[i] = 1
187 | 			line_i[-i] = 1/(1 + (pdist(location[i,],location[-i,])@dist^2)/as.numeric(bandwidth))
188 | 			ind_i=which(line_i>=tol)
189 | 			return(list("ind_i"=ind_i,"ind_j"=rep(i,length(ind_i)),"val_i"=line_i[ind_i] ))
190 | 		}
191 | 
192 | 		results = mclapply(1:dim(location)[1], fx_cauchy, mc.cores = ncores)
193 |   		tib = tibble(results)  %>%  unnest_wider(results)
194 | 		K_sparse = Matrix::sparseMatrix(i =unlist(tib[[1]]), j= unlist(tib[[2]]), x= unlist(tib[[3]]),  dims = c(dim(location)[1],dim(location)[1] ))
195 |         # K = 1/(1 + 1*as.matrix(dist(location)^2)/as.numeric(bandwidth))
196 |     }else if (kerneltype == "quadratic"){
197 | 
198 |     	fx_quadratic <- function(i){
199 | 			line_i = rep(0,dim(location)[1])
200 | 			line_i[i] = 1
201 | 			ED2=pdist(location[i,],location[-i,])@dist^2
202 | 			line_i[-i] = 1 - ED2/(ED2 + as.numeric(bandwidth))
203 | 			ind_i=which(line_i>=tol)
204 | 			return(list("ind_i"=ind_i,"ind_j"=rep(i,length(ind_i)),"val_i"=line_i[ind_i] ))
205 | 		}
206 | 
207 | 		results = mclapply(1:dim(location)[1], fx_quadratic, mc.cores = ncores)
208 |   		tib = tibble(results)  %>%  unnest_wider(results)
209 | 		K_sparse = sparseMatrix(i =unlist(tib[[1]]), j= unlist(tib[[2]]), x= unlist(tib[[3]]),  dims = c(dim(location)[1],dim(location)[1] ))
210 |        # ED2=1*as.matrix(dist(location)^2)
211 |        # K = 1 - ED2/(ED2 + as.numeric(bandwidth))
212 | 
213 |     }else if (kerneltype == "delaunday"){
214 | 
215 |     	require(spatstat.geom)
216 |     	tmp <- ppp(location[,1], location[,2],window=owin(c(-3,3),c(-3,3))) # scaled distance often ranges from -3 to 3
217 | 		Delaunay_dist=delaunayDistance(tmp)
218 | 		K = exp(-Delaunay_dist^2/30) 
219 | 		K[K<tol]=0
220 | 		K_sparse = as(K, "sparseMatrix")
221 |     }
222 | 
223 |     return(K_sparse)
224 | 
225 | }
226 | 
227 | 
228 | 
229 | 
230 | 
231 | 
232 | 
233 | 
234 | 
235 | 
236 | 
237 | 
238 | 
239 | 
240 | 
241 | 
242 | 
243 | 
244 | 
245 | 


--------------------------------------------------------------------------------
/R/SpatialPCA_highresolution.R:
--------------------------------------------------------------------------------
  1 | ########################################################################################################################
  2 | # Package: SpatialPCA
  3 | # Version: 1.1.0
  4 | # Date   : 2021-10-27
  5 | # Title : Spatially Aware Dimension Reduction for Spatial Transcriptomics
  6 | # Authors: L. Shang and X. Zhou
  7 | # Contacts: shanglu@umich.edu
  8 | #          University of Michigan, Department of Biostatistics
  9 | ####################################################################################################
 10 | 
 11 | 
 12 | #' High-resolution spatial map construction.
 13 | #' @param object SpatialPCA object.
 14 | #' @param platform 
 15 | #' "ST": the 10X ST platform, impute 9 subspots in a square shape for each spot; 
 16 | #' "Visium": the 10X Visium platform, impute 6 subspots in a hexagonal shape for each spot; 
 17 | #  "Other": impute data from any platform, jitter each existing location to surrounding 4 locations as new locations.
 18 | #  if platform is selected as NULL: the user may input newlocation matrix with coordinates on the locations they want to impute.
 19 | #' @param newlocation A n* by d location matrix, n* is number of new locations, d is dimension of locations.
 20 | #' Users can optionally provide new locations at the original location scale.
 21 | #' @return Returns SpatialPCA object with estimated Spatial PCs on new locations.
 22 | #' @export
 23 | 
 24 | SpatialPCA_highresolution = function(object,platform="ST",newlocation=NULL){
 25 | 
 26 |     info = scale(object@location)
 27 |     #K = object@kernelmat
 28 |     ED = 1*as.matrix(dist(info))
 29 |     tau = object@tau
 30 |     est_W = object@W
 31 |     est_sigma0 = object@sigma2_0
 32 |     n=dim(info)[1]
 33 | 
 34 | if(is.null(newlocation)){
 35 |         if(platform=="ST"){
 36 | 
 37 |             newinfo = .make_subspot_coldata(object@location, platform="ST")[,5:6]
 38 |             newinfo=as.matrix(newinfo)
 39 |             colnames(info) = c("adj_x","adj_y")
 40 |             colnames(newinfo) = c("adj_x","adj_y")
 41 |             info = object@location
 42 |             info_all = scale(rbind(info,newinfo))
 43 |             num_obs_all = dim(newinfo)[1]
 44 | 
 45 |         }else if(platform=="Visium"){
 46 | 
 47 |             newinfo = .make_subspot_coldata(object@location, platform="Visium")[,5:6]
 48 |             newinfo=as.matrix(newinfo)
 49 |             info = object@location
 50 |             colnames(info) = c("adj_x","adj_y")
 51 |             colnames(newinfo) = c("adj_x","adj_y")
 52 |             info_all = scale(rbind(info,newinfo))
 53 |             num_obs_all = dim(newinfo)[1]
 54 | 
 55 |         }else if(platform=="Other"){
 56 | 
 57 |             dis = c()
 58 |             for(i in 1:dim(ED)[1]){
 59 |                 dis[i] = min(ED[i,-i]) # not count the cell it self, only use distance to its nearest cell
 60 |             }
 61 |             small_distance = median(dis)/4
 62 |             z_star=matrix(0,object@SpatialPCnum,n)
 63 | 
 64 |             info_new = matrix(0,n,2)
 65 |             info_new1 = info_new2 = info_new3 = info_new4 = info
 66 |             info_new1[,1] = info[,1] - small_distance
 67 |             info_new1[,2] = info[,2] + small_distance
 68 |             info_new2[,1] = info[,1] + small_distance
 69 |             info_new2[,2] = info[,2] + small_distance
 70 |             info_new3[,1] = info[,1] - small_distance
 71 |             info_new3[,2] = info[,2] - small_distance
 72 |             info_new4[,1] = info[,1] + small_distance
 73 |             info_new4[,2] = info[,2] - small_distance
 74 |             newinfo = rbind(info_new1,info_new2,info_new3,info_new4)
 75 |             num_obs_all = dim(newinfo)[1]
 76 |             colnames(info) = c("adj_x","adj_y")
 77 |             colnames(newinfo) = c("adj_x","adj_y")
 78 |             info_all = rbind(info,newinfo)
 79 | 
 80 |         }
 81 | }else if(!is.null(newlocation)){
 82 | 
 83 |     newinfo = as.matrix(newlocation)
 84 |     num_obs_all = dim(newinfo)[1]
 85 |     info = object@location
 86 |     colnames(info) = c("adj_x","adj_y")
 87 |     colnames(newinfo) = c("adj_x","adj_y")
 88 |     info_all = scale(rbind(info,newinfo))
 89 | 
 90 | }
 91 | 
 92 |     #-------------------------------------------------------------------------------#
 93 |     # calculate kernel matrix from new locations                                    #
 94 |     #-------------------------------------------------------------------------------#
 95 | 
 96 |     if (object@kerneltype == "gaussian") {
 97 |         K_all = exp(-1*as.matrix(dist(info_all)^2)/object@bandwidth)
 98 |     }else if (object@kerneltype == "cauchy") {
 99 |         K_all = 1/(1 + 1*as.matrix(dist(info_all)^2)/as.numeric(object@bandwidth))
100 |     }else if (object@kerneltype == "quadratic") {
101 |         ED2=1*as.matrix(dist(info_all)^2)
102 |         K_all = 1 - ED2/(ED2 + as.numeric(object@bandwidth))
103 |     }
104 | 
105 | 
106 |     Sigma_YX = K_all[(n+1):(n+num_obs_all),1:n]
107 |     K_inv = object@params$U %*% diag(1/object@params$delta) %*% t(object@params$U)
108 | 
109 |     z_star_t= Sigma_YX %*% K_inv %*% t(object@SpatialPCs)
110 |     z_star = t(z_star_t)
111 | 
112 |     rownames(newinfo) = NULL
113 |     object@highPCs = z_star
114 |     object@highPos = newinfo
115 | 
116 |     return(object)
117 | }
118 | 
119 | 
120 | 
121 | 
122 | #' High-resolution gene expression prediction.
123 | #' @param object SpatialPCA object with high resolution predicted spatial PCs.
124 | #' Users can optionally provide new locations at the original location scale.
125 | #' @return Returns SpatialPCA object with predicted gene expression on new locations.
126 | #' We can predict normalized gene expression for the genes in the object@normalized_expr matrix.
127 | #' @export
128 | SpatialPCA_expr_pred = function(object){
129 |     if(!exists(object@highPCs)){
130 |       print("Please first use SpatialPCA_highresolution function to predict high resolution spatial PCs.")
131 |       return(object)
132 |     }else{
133 |     object@expr_pred =  object@W %*% object@highPCs
134 |     return(object)
135 |     }
136 | }
137 | 
138 | 
139 | # follow BayesSpace to make subspots
140 | .make_subspot_offsets <- function(n_subspots_per) {
141 |     if (n_subspots_per == 6) {
142 |         rbind(expand.grid(c(1/3, -1/3), c(1/3,-1/3)), expand.grid(c(2/3, -2/3), 0))
143 |     # } else if (n_subspots_per == 7) {
144 |     #     rbind(expand.grid(c(1/3, -1/3), c(1/3, -1/3)), expand.grid(c(2/3, -2/3, 0), 0))
145 |     } else if (n_subspots_per == 9) {
146 |         rbind(expand.grid(c(1/3, -1/3, 0), c(1/3, -1/3, 0)))
147 |     } else {
148 |         stop("Only 6 and 9 subspots currently supported.")
149 |     }
150 | }
151 | 
152 | # follow BayesSpace to make subspots
153 | .make_subspot_coldata <- function(positions, platform="ST") {
154 | 
155 |     require(assertthat)
156 |     n_subspots_per <- ifelse(platform == "Visium", 6, 9)
157 | 
158 |     positions <- as.data.frame(positions)
159 |     colnames(positions)=c("row","col")
160 |     #colnames(cdata) <- c("imagecol", "imagerow")
161 |     
162 |     n_spots <- nrow(positions)
163 |     n_subspots <- n_spots*n_subspots_per
164 |     cdata = data.frame("row"=rep(NA,n_subspots),"col"=rep(NA,n_subspots))
165 |     assertthat::assert_that(nrow(cdata) == n_spots * n_subspots_per)
166 |     
167 |     ## Index of parent spot is (subspot % n_spots)
168 |     idxs <- seq_len(n_subspots)
169 |     spot_idxs <- ((idxs - 1) %% n_spots) + 1
170 |     subspot_idxs <- rep(seq_len(n_subspots_per), each=n_spots)
171 |     cdata$spot.idx <- spot_idxs
172 |     cdata$subspot.idx <- subspot_idxs
173 |     rownames(cdata) <- paste0("subspot_", spot_idxs, ".", subspot_idxs)
174 |     
175 |     offsets <- .make_subspot_offsets(n_subspots_per)
176 |     cdata$spot.row <- rep(positions$row, n_subspots_per)
177 |     cdata$spot.col <- rep(positions$col, n_subspots_per)
178 |     cdata$col <- cdata$spot.col + rep(offsets[, 1], each=n_spots)
179 |     cdata$row <- cdata$spot.row + rep(offsets[, 2], each=n_spots)
180 | 
181 |     cols <- c("spot.idx", "subspot.idx", "spot.row", "spot.col", "row", "col")
182 |     cdata[, cols]
183 | }
184 | 
185 | 
186 | 
187 | 
188 | 
189 | 
190 | 
191 | 
192 | 
193 | 
194 | 
195 | 
196 | 


--------------------------------------------------------------------------------
/R/SpatialPCA_multiple_sample.R:
--------------------------------------------------------------------------------
  1 | ########################################################################################################################
  2 | # Package: SpatialPCA
  3 | # Version: 1.2.0
  4 | # Date   : 2022-8-12
  5 | # Title : Spatially Aware Dimension Reduction for Spatial Transcriptomics
  6 | # Authors: L. Shang and X. Zhou
  7 | # Contacts: shanglu@umich.edu
  8 | #          University of Michigan, Department of Biostatistics
  9 | ####################################################################################################
 10 | 
 11 | 
 12 | #' Multiple sample SpatialPCA. 
 13 | #' In this extension, we construct the covariance matrix for the latent factors in the form of a block diagonal matrix: 
 14 | #' it consists of the kernel matrices constructed using the spatial location information within each dataset, with zero correlation for pairs of locations across datasets. 
 15 | #' This way, the latent factors within each dataset are correlated a priori across spatial locations, while the latent factors across datasets are not correlated a priori. 
 16 | #' Certainly, if one wants to model the a priori correlation between latent factors across datasets, due to, for example, their similarity in the features extracted from histology images, then one can also modify the kernel matrices by constructing them using features other than spatial location information.
 17 | #' @param count_list A list of g by n count matrix, g is gene number, n is location number. 
 18 | #' @param location_list A list of n by d location matrix, n is location number, d is location dimension. The rownames of each location matrix should match with the colnames of its corresponding count matrix.
 19 | #' @return Returns SpatialPCA object with estimated Spatial PCs on locations.
 20 | #' @export
 21 | 
 22 | SpatialPCA_Multiple_Sample = function(count_list,location_list,gene.type="spatial",sparkversion="spark",numCores_spark=5,gene.number=3000, customGenelist=NULL,min.loctions = 20, min.features=20,bandwidth_common=0.1){
 23 | 
 24 |     require(Seurat)
 25 |     if(length(count_list) != length(location_list)) {
 26 |             stop("The number of count matrix should match with the number of location matrix. ")
 27 |     }# end fi
 28 | 
 29 |     # create seurat object for each dataset
 30 |     seurat_list = list()
 31 |     for(count in 1:length(count_list)){
 32 |         colnames(count_list[[count]]) = paste0("Sample",count,"_",colnames(count_list[[count]]))
 33 |         rownames(location_list[[count]]) = colnames(count_list[[count]])
 34 |         seurat_list[[count]] <- CreateSeuratObject(counts = count_list[[count]], project = "multiple")
 35 |     }
 36 | 
 37 |     # normalize and identify variable features for each dataset independently
 38 |     seurat_list <- lapply(X = seurat_list, FUN = function(x) {
 39 |         print(paste0("Normalizing dataset"))
 40 |         x <- NormalizeData(x)
 41 |         x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 10000)
 42 |     })
 43 | 
 44 |     # select features that are repeatedly variable across datasets for integration
 45 |     features <- Seurat::SelectIntegrationFeatures(object.list = seurat_list,nfeatures = 10000)
 46 |     # integration, find anchors
 47 |     MultipleSample.anchors <- Seurat::FindIntegrationAnchors(object.list = seurat_list, anchor.features = features)
 48 |     # this command creates an 'integrated' data assay
 49 |     MultipleSample.combined <- Seurat::IntegrateData(anchorset = MultipleSample.anchors)
 50 |     DefaultAssay(MultipleSample.combined) <- "integrated"
 51 |     # Run the standard workflow for visualization and clustering
 52 |     MultipleSample.combined <- Seurat::ScaleData(MultipleSample.combined, verbose = FALSE)
 53 |     # obtain integrated and normalized data
 54 |     integrated_data = MultipleSample.combined@assays$integrated@scale.data
 55 | 
 56 |     # create each SpatialPCA object, this step is essentially for spatial gene selection
 57 |     spatialpca_list = list()
 58 |     for(count in 1:length(seurat_list)){
 59 |         print(paste0("Creating SpatialPCA object for dataset ",count))
 60 |         spatialpca_list[[count]] <- CreateSpatialPCAObject(counts=count_list[[count]], location=location_list[[count]], project = "SpatialPCA",gene.type=gene.type,sparkversion=sparkversion,numCores_spark=numCores_spark,gene.number=gene.number, customGenelist=customGenelist,min.loctions = min.loctions, min.features=min.features)
 61 |     }
 62 | 
 63 |     matched_data = list()
 64 |     matched_location = list()
 65 |     Kernal_mat = list()
 66 |     for(count in 1:length(count_list)){
 67 |         print(paste0("Creating SpatialPCA kernel for dataset ",count))
 68 |         match_gene = na.omit(match(rownames( spatialpca_list[[count]]@normalized_expr), rownames(integrated_data)))
 69 |         match_spot = grep(paste0("1_",count),colnames(integrated_data))
 70 |         matched_data[[count]] = integrated_data[match_gene,match_spot]
 71 |         matched_location[[count]] = spatialpca_list[[count]]@location
 72 |         spatialpca_list[[count]]@normalized_expr = t(scale(t(matched_data[[count]])))
 73 |         spatialpca_list[[count]]@location = scale(matched_location[[count]])
 74 |         spatialpca_list[[count]] = SpatialPCA_buildKernel(spatialpca_list[[count]], kerneltype="gaussian", bandwidth.set.by.user=bandwidth_common)
 75 |         Kernal_mat[[count]] = spatialpca_list[[count]]@kernelmat
 76 |     }
 77 | 
 78 |     common_genes = unique(unlist(lapply(matched_data, function(x){ row.names(x)})))
 79 |     MultipleSample_merge = spatialpca_list[[1]] # initialize using the first data, will replace with integrated data later
 80 |     MultipleSample_merge@normalized_expr = t(scale(t(integrated_data[match(common_genes, rownames(integrated_data)),])))
 81 |     MultipleSample_merge@kernelmat = Matrix::bdiag(Kernal_mat)
 82 |     MultipleSample_merge@kernelmat = as.matrix(MultipleSample_merge@kernelmat)
 83 |     # MultipleSample_merge@kernelmat = as(MultipleSample_merge@kernelmat, "sparseMatrix")
 84 |     # MultipleSample_merge@sparseKernel = TRUE
 85 |     MultipleSample_merge@params$expr = MultipleSample_merge@normalized_expr
 86 |     MultipleSample_merge@location = do.call("rbind",lapply(spatialpca_list, function(x){ x@location}))
 87 | 
 88 |     # perform SpatialPCA on integrated expression and integrated block diagonal kernel
 89 |     MultipleSample_merge = SpatialPCA_EstimateLoading(MultipleSample_merge,fast=TRUE,SpatialPCnum=20)
 90 |     MultipleSample_merge = SpatialPCA_SpatialPCs(MultipleSample_merge, fast=TRUE)
 91 |     colnames(MultipleSample_merge@SpatialPCs) = rownames(MultipleSample_merge@location)
 92 |     
 93 |     id_list = list()
 94 |     SpatialPC_list = list()
 95 |     Location_spatialpc_list = list()
 96 |     for(sample in 1:length(count_list)){
 97 | 	    id_list[[sample]] = grep(paste0("Sample",sample), colnames(MultipleSample_merge@SpatialPCs))
 98 | 	    SpatialPC_list[[sample]] = MultipleSample_merge@SpatialPCs[,id_list[[sample]]]
 99 |             Location_spatialpc_list[[sample]] = spatialpca_list[[sample]]@location
100 |     }
101 | 
102 |     names(SpatialPC_list) = paste0("Sample",1:length(SpatialPC_list))
103 |     names(Location_spatialpc_list) = paste0("Sample",1:length(Location_spatialpc_list))
104 |     return(list("MultipleSample_SpatialPCA_object"=MultipleSample_merge, 
105 | 		"SpatialPC_list" = SpatialPC_list, 
106 | 		"Location_spatialpc_list"=Location_spatialpc_list))
107 | }
108 | 
109 | 
110 | 
111 | 
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 
125 | 


--------------------------------------------------------------------------------
/R/SpatialPCA_utilties.R:
--------------------------------------------------------------------------------
  1 | ########################################################################################################################
  2 | # Package: SpatialPCA
  3 | # Version: 1.1.0
  4 | # Date   : 2021-10-27
  5 | # Title : Spatially Aware Dimension Reduction for Spatial Transcriptomics
  6 | # Authors: L. Shang and X. Zhou
  7 | # Contacts: shanglu@umich.edu
  8 | #          University of Michigan, Department of Biostatistics
  9 | ####################################################################################################
 10 | 
 11 | 
 12 | #' Obtain PCA low dimensional components for dimension reduction methods comparison.
 13 | #' @param expr Normalized gene expression g by n matrix. g is gene number, n is sample size.
 14 | #' @param PCnum Number of PCs.
 15 | #' @return A d by n matrix of low dimensional components from PCA. d is number of low dimensional components, n is sample size.
 16 | #' @export
 17 | get_PCA = function(expr,PCnum){
 18 | 
 19 |   n=dim(expr)[2]
 20 |   k = dim(expr)[1]
 21 |   output_sub_mean=matrix(0,k,n)
 22 |   for(i_k in 1:k){
 23 |     output_sub_mean[i_k,]=expr[i_k,]-mean(expr[i_k,])
 24 |   }
 25 |   svd_output_sub_mean=svd(output_sub_mean)
 26 |   A_ini=svd_output_sub_mean$u[,1:PCnum]
 27 |   Z_pca = t(A_ini) %*% output_sub_mean
 28 |   return(Z_pca)
 29 | }
 30 | 
 31 | #' Obtain NMF low dimensional components for dimension reduction methods comparison.
 32 | #' @param count Count expression g by n matrix. g is gene number, n is sample size.
 33 | #' @param PCnum Number of PCs.
 34 | #' @return A d by n matrix of low dimensional components from NMF. d is number of low dimensional components, which is same as number of Spatial PCs in SpatialPCA. n is sample size.
 35 | #' @export
 36 | get_NMF = function(count, PCnum){
 37 |   suppressMessages(require(scater))
 38 |   suppressMessages(require(NMF))
 39 |   suppressMessages(require(SingleCellExperiment))
 40 |   suppressMessages(require(RcppML))
 41 |   #expr = log(count+1) # non negative
 42 |   sce <- SingleCellExperiment(list(counts=count))
 43 |   sce <- logNormCounts(sce)
 44 |   #sce <- runNMF(sce,ncomponents = PCnum)
 45 |   res <- scater::calculateNMF(sce, ncomponents = PCnum)
 46 |   Z_NMF = t(res)
 47 |   return(Z_NMF)
 48 | }
 49 | 
 50 | 
 51 | 
 52 | 
 53 | #' @title Obtain clustering cluster labels through louvain method.
 54 | #' @description This function performs louvain clustering on input low dimensional components.
 55 | #' @param clusternum The desired number of clusters the user wants to obtain.
 56 | #' @param latent_dat A d by n matrix of low dimensional components, d is number of PCs, n is number of spots.
 57 | #' @param knearest An integers, number of nearest neighbors for KNN graph construction in louvain clustering.
 58 | #' @return The cluster labels.
 59 | #'
 60 | #' @export
 61 | louvain_clustering = function(clusternum, latent_dat,knearest=100){
 62 |   set.seed(1234)
 63 |   # suppressMessages(require(FNN))
 64 |   # suppressMessages(require(igraph))
 65 |   # suppressMessages(require(bluster))
 66 | 
 67 |   PCvalues = latent_dat
 68 |   info.spatial = as.data.frame(t(PCvalues))
 69 |   colnames(info.spatial) =  paste0("factor", 1:nrow(PCvalues))
 70 |   knn.norm = FNN::get.knn(as.matrix(t(PCvalues)), k = knearest)
 71 |   knn.norm = data.frame(from = rep(1:nrow(knn.norm$nn.index),
 72 |   k=knearest), to = as.vector(knn.norm$nn.index), weight = 1/(1 + as.vector(knn.norm$nn.dist)))
 73 |   nw.norm = igraph::graph_from_data_frame(knn.norm, directed = FALSE)
 74 |   nw.norm = igraph::simplify(nw.norm)
 75 |   lc.norm = igraph::cluster_louvain(nw.norm)
 76 |   merged <- bluster::mergeCommunities(nw.norm, lc.norm$membership, number=clusternum)
 77 |   clusterlabel = as.character(as.integer(as.factor(paste0("cluster",merged))))
 78 |   return("cluster_label"=clusterlabel)
 79 | }
 80 | 
 81 | 
 82 | #' @title Obtain clustering cluster labels through walktrap method.
 83 | #' @description This function performs walktrap clustering on input low dimensional components.
 84 | #' @param clusternum The desired number of clusters the user wants to obtain.
 85 | #' @param latent_dat A d by n matrix of low dimensional components, d is number of PCs, n is number of spots.
 86 | #' @param knearest An integers, number of nearest neighbors for SNN graph construction in walktrap clustering.
 87 | #' @return The cluster labels.
 88 | #'
 89 | #' @export
 90 | walktrap_clustering = function(clusternum, latent_dat,knearest=100){
 91 |   set.seed(1234)
 92 |   # suppressMessages(require(bluster))
 93 |   # suppressMessages(require(igraph))
 94 | 
 95 |   PCvalues = latent_dat
 96 |   g <- bluster::makeSNNGraph(as.matrix(t(PCvalues)),k = knearest)
 97 |   #clusters <- igraph::cluster_fast_greedy(g)$membership
 98 |   g_walk <- igraph::cluster_walktrap(g)
 99 |   cluster_label_new = as.character(igraph::cut_at(g_walk, no=clusternum))
100 | 
101 |   return("cluster_label"=cluster_label_new)
102 | }
103 | 
104 | 
105 | 
106 | 
107 | #' @title Refine clustering results for 10x ST or Visium data.
108 | #' @description This function refines spatial clustering of ST or Visium data.
109 | #' @param clusterlabels The cluster label obtained (e.g. from louvain method or walktrap method).
110 | #' @param location A n by 2 location matrix of spots.
111 | #' @param shape Select shape='hexagon' for Visium data, 'square' for ST data.
112 | #' @return The refined cluster labels.
113 | #' @export
114 | refine_cluster_10x = function(clusterlabels, location, shape="square"){
115 | 
116 |   dis_df = as.matrix(dist(location))
117 |   if(shape=="square"){
118 |     num_obs = 4
119 |   }else if(shape == "hexagon"){
120 |     num_obs = 6
121 |   }else{
122 |     print("Select shape='hexagon' for Visium data, 'square' for ST data.")
123 |   }
124 |   refined_pred = clusterlabels
125 |   for(i in 1:length(clusterlabels)){
126 |     nearby_spot_ind = order(dis_df[i,])[1:(num_obs+1)]
127 |     labels_nearby_spot_ind = refined_pred[nearby_spot_ind]  # use updated cluster
128 |     spot_of_interest = refined_pred[i]
129 |     labels_table = table(labels_nearby_spot_ind)
130 |     if( labels_table[spot_of_interest]<num_obs/2 & max(labels_table)>num_obs/2){
131 |       refined_pred[i] = names(labels_table)[which.max(labels_table)]
132 |     }else{
133 |       refined_pred[i] = spot_of_interest
134 |     }
135 | 
136 |   }
137 | 
138 |   return(refined_pred)
139 | }
140 | 
141 | 
142 | 
143 | 
144 | #' @title Visualize PCs on their locations.
145 | #' @description This function visualizes the low dimensional component values.
146 | #' @param location A n cell by k dimension of location matrix. n is cell number, k=2 if the spots are on 2D space.
147 | #' @param PCs A d by n matrix of low dimensional components.
148 | #' @param textmethod A text string of the name of method used to extract latent factors, e.g. "SpatialPCA" or "PCA". It will be shown as the title of the figures.
149 | #' @param pointsize The point size of each location for visualization.
150 | #' @param textsize The text size in the figure legend.
151 | #' @return A list of ggplot objects for factor value plots.
152 | #'
153 | #' @import ggplot2
154 | #'
155 | #' @export
156 | plot_factor_value = function(location, PCs,textmethod,pointsize=2,textsize=15){
157 | 
158 |   location = as.data.frame(location)
159 |   PCnum=dim(PCs)[1]
160 |   p = list()
161 |   for(k in 1:PCnum){
162 |       locc1 = location[,1]
163 |       locc2 = location[,2]
164 |       PC_value = PCs[k,]
165 |       datt = data.frame(PC_value, locc1, locc2)
166 |       p[[k]] = ggplot(datt, aes(x = locc1, y = locc2, color = PC_value)) +
167 |         geom_point(size=pointsize, alpha = 1) +
168 |         scale_color_gradientn(colours = c("#4E84C4", "#FFDB6D")) +
169 |         ggtitle(paste0(textmethod," PC ",k))+
170 |         theme_void()+
171 |         theme(plot.title = element_text(size = textsize),
172 |               text = element_text(size = textsize),
173 |               #axis.title = element_text(face="bold"),
174 |               #axis.text.x=element_text(size = 22) ,
175 |               legend.position = "bottom")
176 | 
177 | }
178 |   return(p)
179 | }
180 | 
181 | 
182 | 
183 | 
184 | #' @title Visualize cluster labels on locations.
185 | #' @description This function visualizes cluster labels on locations.
186 | #' @param location A n by k matrix of spot locations.
187 | #' @param clusterlabel A vector of cluster labels for spots.
188 | #' @param pointsize An integer, the point size of each spot.
189 | #' @param textsize An integer, the text size in the legend.
190 | #' @param title_in A character string, the title you want to display at the top of the figure.
191 | #' @param color_in A vector of colors for each cluster.
192 | #' @param legend A character string, the position of the figure legend. Select from "top", "bottom","left" or "right".
193 | #' @return A ggplot object.
194 | #' @export
195 | plot_cluster = function(location, clusterlabel, pointsize=3,text_size=15 ,title_in,color_in,legend="none"){
196 |   cluster = clusterlabel
197 |   loc_x=location[,1]
198 |   loc_y=location[,2]
199 |   datt = data.frame(cluster, loc_x, loc_y)
200 |   p = ggplot(datt, aes(x = location[,1], y = location[,2], color = cluster)) +
201 |         geom_point( alpha = 1,size=pointsize) +
202 |         scale_color_manual(values = color_in)+
203 |         ggtitle(paste0(title_in))+
204 |         theme_void()+
205 |         theme(plot.title = element_text(size = text_size,  face = "bold"),
206 |               text = element_text(size = text_size),
207 |               #axis.title = element_text(face="bold"),
208 |               #axis.text.x=element_text(size = 15) ,
209 |               legend.position =legend)
210 |   p
211 | }
212 | 
213 | 
214 | 
215 | 
216 | 
217 | #' @title Visualize RGB plot from tSNE.
218 | #' @description We summarized the inferred low dimensional components into three tSNE components and visualized the three resulting components with red/green/blue (RGB) colors in the RGB plot.
219 | #' @param location A n by k location matrix. n is spot number.
220 | #' @param latent_dat A d by n matrix of low dimensional components.
221 | #' @param pointsize The point size of each spot.
222 | #' @param textsize The text size in the legend.
223 | #' @return A list.
224 | #' \item{RGB}{A data frame with five columns: x coordinate, y coordinate, R, G, and B color index}
225 | #' \item{figure}{A ggplot object for RGB plot from tSNE}
226 | #'
227 | #' @import Rtsne
228 | #'
229 | #' @export
230 | plot_RGB_tSNE=function(location, latent_dat,pointsize=2,textsize=15){
231 | 
232 |   # suppressMessages(require(Rtsne))
233 | 
234 |   info = as.data.frame(location)
235 |   colnames(info) = c("sdimx","sdimy")
236 | 
237 |   PCvalues = latent_dat
238 | 
239 |   tsne <- Rtsne(t(PCvalues),dims=3,check_duplicates = FALSE)
240 |   r = (tsne$Y[,1]-min(tsne$Y[,1]))/(max(tsne$Y[,1])-min(tsne$Y[,1]))
241 |   g = (tsne$Y[,2]-min(tsne$Y[,2]))/(max(tsne$Y[,2])-min(tsne$Y[,2]))
242 |   b = (tsne$Y[,3]-min(tsne$Y[,3]))/(max(tsne$Y[,3])-min(tsne$Y[,3]))
243 |   x =  info$sdimx
244 |   y =  info$sdimy
245 |   dat = data.frame(x,y,r,g,b)
246 |   p1=ggplot(data=dat, aes(x=x, y=y, col=rgb(r,g,b))) +
247 |       geom_point(size=pointsize) +
248 |       scale_color_identity()+
249 |       ggtitle(paste0("RGB tSNE"))+
250 |       theme_void()+
251 |       theme(plot.title = element_text(size = textsize),
252 |               text = element_text(size = textsize),
253 |               #axis.title = element_text(face="bold"),
254 |               #axis.text.x=element_text(size = 22) ,
255 |               legend.position = "bottom")
256 | 
257 |   return(list("RGB"=dat,"figure"=p1))
258 | }
259 | 
260 | 
261 | 
262 | #' @title Visualize RGB plot from UMAP.
263 | #' @description We summarized the inferred low dimensional components into three UMAP. components and visualized the three resulting components with red/green/blue (RGB) colors in the RGB plot.
264 | #' @param location A n by k location matrix. n is spot number.
265 | #' @param latent_dat A d by n matrix of low dimensional components.
266 | #' @param pointsize The point size of each spot.
267 | #' @param textsize The text size in the legend.
268 | #' @return A list.
269 | #' \item{RGB}{A data frame with five columns: x coordinate, y coordinate, R, G, and B color index}
270 | #' \item{figure}{A ggplot object for RGB plot from UMAP.}
271 | #'
272 | #' @import umap
273 | #'
274 | #' @export
275 | plot_RGB_UMAP=function(location, latent_dat,pointsize=2,textsize=15){
276 | 
277 |   # suppressMessages(require(umap))
278 | 
279 |   info = as.data.frame(location)
280 |   colnames(info) = c("sdimx","sdimy")
281 | 
282 |   PCvalues = latent_dat
283 | 
284 |   umap <- umap(t(PCvalues),n_components = 3)
285 |   r = (umap$layout[,1]-min(umap$layout[,1]))/(max(umap$layout[,1])-min(umap$layout[,1]))
286 |   g = (umap$layout[,2]-min(umap$layout[,2]))/(max(umap$layout[,2])-min(umap$layout[,2]))
287 |   b = (umap$layout[,3]-min(umap$layout[,3]))/(max(umap$layout[,3])-min(umap$layout[,3]))
288 |   x =  info$sdimx
289 |   y = info$sdimy
290 |   dat = data.frame(x,y,r,g,b)
291 |   p1=ggplot(data=dat, aes(x=x, y=y, col=rgb(r,g,b))) +
292 |       geom_point(size=pointsize) +
293 |       scale_color_identity()+
294 |       ggtitle(paste0("RGB UMAP"))+
295 |       theme_void()+
296 |       theme(plot.title = element_text(size = textsize),
297 |               text = element_text(size = textsize),
298 |               #axis.title = element_text(face="bold"),
299 |               #axis.text.x=element_text(size = 22) ,
300 |               legend.position = "bottom")
301 | 
302 |   return(list("RGB"=dat,"figure"=p1))
303 | }
304 | 
305 | 
306 | 
307 | #' @title Visualize pseudotimes on locations.
308 | #' @description This function visualizes pseudotimes on locations.
309 | #' @param pseudotime A length n vector of pseudotime inferred from Slingshot.
310 | #' @param location A n by 2 data frame of spot locations.
311 | #' @param clusterlabels A vector of integers, the cluster labels for each spot
312 | #' @param gridnum Number of grids that evenly segment the whole tissue section.
313 | #' @param color_in A vector of character strings representing colors for each cluster.
314 | #' @param pointsize An integer, the point size of each spot
315 | #' @param arrowlength An integer, the length of arrows inside a grid between one spot with smallest pseudotime and largest pseudotime.
316 | #' @param arrowsize An integer, the size of arrows inside a grid between one spot with smallest pseudotime and largest pseudotime.
317 | #' @param textsize An integer, the size of text in the figure.
318 | #' @return A ggplot object.
319 | #' \item{Pseudotime}{A ggplot object visualizing pseudotime on locations.}
320 | #' \item{Arrowplot1}{A ggplot object for arrows pointing from smallest pseudotime and largest pseudotime in each grid.}
321 | #' \item{Arrowplot2}{A ggplot object for arrows pointing from largest pseudotime and smallest pseudotime in each grid.}
322 | #' \item{Arrowoverlay1}{A ggplot object for arrows pointing from smallest pseudotime and largest pseudotime in each grid, overlayed on clustering plot.}
323 | #' \item{Arrowoverlay2}{A ggplot object for arrows pointing from largest pseudotime and smallest pseudotime in each grid, overlayed on clustering plot.}
324 | #' @export
325 | plot_trajectory = function(pseudotime, location,clusterlabels,gridnum,color_in,pointsize=5 ,arrowlength=0.2,arrowsize=1,textsize=22){
326 | 
327 |   pseudotime_use=pseudotime
328 |   info = as.data.frame(location)
329 |   colnames(info) = c("sdimx","sdimy")
330 |   grids = gridnum
331 | 
332 |   min_x = min(info$sdimx)
333 |   min_y = min(info$sdimy)
334 |   max_x = max(info$sdimx)
335 |   max_y = max(info$sdimy)
336 | 
337 |   x_anchor = c()
338 |   for(x_i in 1:(grids+1)){
339 |       space_x = (max_x - min_x)/grids
340 |       x_anchor[x_i] = min_x+(x_i-1)*space_x
341 |   }
342 |   y_anchor = c()
343 |   for(y_i in 1:(grids+1)){
344 |       space_y = (max_y - min_y)/grids
345 |       y_anchor[y_i] = min_y+(y_i-1)*space_y
346 |   }
347 | 
348 | # label square by num_x, num_y
349 | count = 0
350 | squares = list()
351 | direction_pseudotime_point = list()
352 | start_x_dat = c()
353 | start_y_dat = c()
354 | end_x_dat = c()
355 | end_y_dat = c()
356 | for(num_x in 1:grids){
357 |   for(num_y in 1:grids){
358 | 
359 |     filter_x = which(info$sdimx >= x_anchor[num_x] & info$sdimx <= x_anchor[num_x+1])
360 |     filter_y = which(info$sdimy >= y_anchor[num_y] & info$sdimy <= y_anchor[num_y+1])
361 |     # find points in each grid
362 |     points_in_grid = intersect(filter_x, filter_y)
363 | 
364 | 
365 |     # find min pseudotime and max pseudotime in each grid
366 |     if(length(points_in_grid)>1 & sum(which.min(pseudotime_use[points_in_grid]))>0){
367 |       count = count + 1
368 |       squares[[count]]= intersect(filter_x, filter_y)
369 |       direction_pseudotime_point[[count]] = list()
370 |       direction_pseudotime_point[[count]]$min_point = info[squares[[count]][which.min(pseudotime_use[squares[[count]]])],]
371 |       direction_pseudotime_point[[count]]$max_point = info[squares[[count]][which.max(pseudotime_use[squares[[count]]])],]
372 |       start_x_dat[count] = unlist(direction_pseudotime_point[[count]]$min_point$sdimx)
373 |       start_y_dat[count] = unlist(direction_pseudotime_point[[count]]$min_point$sdimy)
374 |       end_x_dat[count] = unlist(direction_pseudotime_point[[count]]$max_point$sdimx)
375 |       end_y_dat[count] = unlist(direction_pseudotime_point[[count]]$max_point$sdimy)
376 |     }
377 |   }
378 | }
379 | 
380 | 
381 | loc1 = info$sdimx
382 | loc2 = info$sdimy
383 | 
384 | time = pseudotime_use+0.01
385 | datt = data.frame(time, loc1, loc2)
386 | datt2 = data.frame(start_x_dat, start_y_dat, end_x_dat, end_y_dat)
387 | p01=ggplot(datt, aes(x = loc1, y = loc2, color = time)) +
388 |     geom_point( alpha = 1,size=pointsize) +
389 |     scale_color_gradientn(colours = c("red", "green")) +
390 |         theme_void()+
391 |     theme(plot.title = element_text(size = textsize,  face = "bold"),
392 |               text = element_text(size = textsize),
393 |               legend.position = "bottom")
394 | p02= ggplot()+
395 |   geom_segment(aes(x = start_x_dat, y = start_y_dat, xend = end_x_dat, yend = end_y_dat,colour = "black"),
396 |     arrow = arrow(length = unit(arrowlength,"cm")),size=arrowsize,color="black",data = datt2) +
397 |         theme_void()+
398 |     theme(plot.title = element_text(size = textsize,  face = "bold"),
399 |               text = element_text(size = textsize),
400 |               legend.position = "bottom")
401 | 
402 | p03= ggplot()+
403 |   geom_segment(aes(x = end_x_dat, y = end_y_dat, xend = start_x_dat, yend = start_y_dat,colour = "black"),
404 |     arrow = arrow(length = unit(arrowlength,"cm")),size=arrowsize,color="black",data = datt2) +
405 |         theme_void()+
406 |     theme(plot.title = element_text(size = textsize,  face = "bold"),
407 |               text = element_text(size = textsize),
408 |               legend.position = "bottom")
409 | 
410 | time = pseudotime_use+0.1
411 | datt1 = data.frame(time, loc1, loc2)
412 | p1 = ggplot(datt1, aes(x = loc1, y = loc2)) +
413 |     geom_point( alpha =1,size=pointsize,aes(color=clusterlabels)) +
414 |         theme_void()+
415 |         scale_colour_manual(values=color_in)+
416 |     theme(plot.title = element_text(size = textsize,  face = "bold"),
417 |               text = element_text(size = textsize),
418 |               legend.position = "bottom")
419 | datt2 = data.frame(start_x_dat, start_y_dat, end_x_dat, end_y_dat)
420 | p2= geom_segment(aes(x = start_x_dat, y = start_y_dat, xend = end_x_dat, yend = end_y_dat,colour = "segment"),
421 |     arrow = arrow(length = unit(arrowlength,"cm")),size=arrowsize,arrow.fill="black",data = datt2)
422 | p22=p1+p2
423 | 
424 | 
425 | time = pseudotime_use+0.1
426 | datt1 = data.frame(time, loc1, loc2)
427 | p1 = ggplot(datt1, aes(x = loc1, y = loc2)) +
428 |        geom_point( alpha =1,size=pointsize,aes(color=clusterlabels)) +
429 |         theme_void()+
430 |         scale_colour_manual(values=color_in)+
431 |     theme(plot.title = element_text(size = textsize,  face = "bold"),
432 |               text = element_text(size = textsize),
433 |               legend.position = "bottom")
434 | datt2 = data.frame(start_x_dat, start_y_dat, end_x_dat, end_y_dat)
435 | p2= geom_segment(aes(x = end_x_dat, y = end_y_dat, xend = start_x_dat, yend = start_y_dat,colour = "segment"),
436 |     arrow = arrow(length = unit(arrowlength,"cm")),size=arrowsize,arrow.fill="black",data = datt2)
437 | p33=p1+p2
438 | 
439 | 
440 | return(list("Pseudotime"=p01,"Arrowplot1"=p02,"Arrowplot2"=p03,"Arrowoverlay1"=p22,"Arrowoverlay2"=p33))
441 | 
442 | }
443 | 
444 | 
445 | 
446 | 
447 | #' @title Calculate CHAOS score to measure clustering performance.
448 | #' @description CHAOS score measures the spatial continuity of the detected spatial domains.
449 | #' Lower CHAOS score indicates better spatial domian clustering performance.
450 | #' @param clusterlabel Cluster labels.
451 | #' @param location A n by k matrix of spatial locations.
452 | #' @return A numeric value for CHAOS score.
453 | #'
454 | #' @import parallel
455 | #'
456 | #' @export
457 | fx_CHAOS = function(clusterlabel, location){
458 |   # require(parallel)
459 |   matched_location=location
460 |   NAs = which(is.na(clusterlabel))
461 |   if(length(NAs>0)){
462 |     clusterlabel=clusterlabel[-NAs]
463 |     matched_location = matched_location[-NAs,]
464 |   }
465 |   matched_location = scale(matched_location)
466 |   dist_val = rep(0,length(unique(clusterlabel)))
467 |   count = 0
468 |   for(k in unique(clusterlabel)){
469 |     count = count + 1
470 |     location_cluster = matched_location[which(clusterlabel == k),]
471 |     if(length(location_cluster)==2){next}
472 |     #require(parallel)
473 |     results = mclapply(1:dim(location_cluster)[1], fx_1NN, location_in=location_cluster,mc.cores = 5)
474 |     dist_val[count] = sum(unlist(results))
475 |   }
476 |   dist_val = na.omit(dist_val)
477 |   return(sum(dist_val)/length(clusterlabel))
478 | 
479 | }
480 | 
481 | 
482 | #' @title Calculate PAS score to measure clustering performance.
483 | #' @description PAS score measures the randomness of the spots that located outside of the spatial region where it was clustered to.
484 | #' Lower PAS score indicates better spatial domian clustering performance.
485 | #' @param clusterlabel Cluster labels.
486 | #' @param location A n by k matrix of spatial locations.
487 | #' @return A numeric value for PAS score.
488 | #'
489 | #' @import parallel
490 | #'
491 | #' @export
492 | fx_PAS = function(clusterlabel, location){
493 |   # require(parallel)
494 | 
495 |   matched_location=location
496 |   NAs = which(is.na(clusterlabel))
497 |   if(length(NAs>0)){
498 |     clusterlabel=clusterlabel[-NAs]
499 |     matched_location = matched_location[-NAs,]
500 |   }
501 | 
502 |   results = mclapply(1:dim(matched_location)[1], fx_kNN, location_in=matched_location,k=10,cluster_in=clusterlabel, mc.cores = 5)
503 |   return(sum(unlist(results))/length(clusterlabel))
504 | }
505 | 
506 | 
507 | #' @import pdist
508 | fx_1NN = function(i,location_in){
509 |   # library(pdist)
510 |   line_i = rep(0,dim(location_in)[1])
511 |   line_i = pdist(location_in[i,],location_in[-i,])@dist
512 |   return(min(line_i))
513 | }
514 | 
515 | #' @import pdist
516 | fx_kNN = function(i,location_in,k,cluster_in){
517 |   #library(pdist)
518 |   line_i = rep(0,dim(location_in)[1])
519 |   line_i = pdist(location_in[i,],location_in[-i,])@dist
520 |   ind = order(line_i)[1:k]
521 |   cluster_use = cluster_in[-i]
522 |   if(sum(cluster_use[ind] != cluster_in[i])>(k/2)){
523 |     return(1)
524 |   }else{
525 |     return(0)
526 |   }
527 | 
528 | }
529 | 
530 | 
531 | 
532 | # mapDrugToColor<-function(annotations){
533 | #     colorsVector = ifelse(annotations["category"]=="Cluster1",
534 | #         "red", ifelse(annotations["category"]=="Cluster2",
535 | #         "orange",ifelse(annotations["category"]=="Cluster3",
536 | #         "yellow",ifelse(annotations["category"]=="Cluster4",
537 | #         "green",ifelse(annotations["category"]=="Cluster5",
538 | #         "blue",ifelse(annotations["category"]=="Cluster6",
539 | #         "purple",ifelse(annotations["category"]=="Cluster7",
540 | #            "skyblue",ifelse(annotations["category"]=="Cluster8",
541 | #         "black","grey"))))))))
542 | #     return(colorsVector)
543 | # }
544 | 
545 | # testHeatmap3<-function(logCPM, annotations) {
546 | #     sampleColors = mapDrugToColor(annotations)
547 | #     # Assign just column annotations
548 | #     heatmap3(logCPM, margins=c(10,10),
549 | #       ColSideColors=sampleColors,scale="none",
550 | #       col = colorRampPalette(c( "#0072B2","#F0E442", "#D16103"))(1024),
551 | #       Rowv=NA,
552 | #       Colv=NA,
553 | #         xlab = "Cell ID",
554 | #     ylab = "Marker genes",
555 | #     showColDendro = F,
556 | #   showRowDendro = F)
557 | #     #Assign column annotations and make a custom legend for them
558 | #     heatmap3(logCPM, margins=c(10,10), ColSideColors=sampleColors,
559 | #       scale="none",
560 | #       col = colorRampPalette(c( "#0072B2", "#F0E442","#D16103"))(1024),
561 | #         legendfun=function()showLegend(legend=paste0("Cluster",1:7), col=c("red", "orange", "yellow","green","blue","purple","skyblue"), cex=1),
562 | #               Rowv=NA,
563 | #       Colv=NA,
564 | #         xlab = "Cell ID",
565 | #     ylab = "Marker genes",
566 | #     showColDendro = F,
567 | #   showRowDendro = F)
568 | 
569 | #     #Assign column annotations as a mini-graph instead of colors,
570 | #     #and use the built-in labeling for them
571 | #     ColSideAnn<-data.frame(Cluster=annotations[["category"]])
572 | #     heatmap3(logCPM, ColSideAnn=ColSideAnn,
573 | #         #ColSideFun=function(x)showAnn(x),
574 | #         margins=c(10,10),
575 | #         ColSideWidth=0.8,
576 | #         Rowv=NA,
577 | #       Colv=NA,
578 | #         xlab = "Cell ID",
579 | #       ylab = "Marker genes",
580 | #       showColDendro = F,
581 | #       showRowDendro = F)
582 | # }
583 | 
584 | 
585 | 
586 | 
587 | # plot_celltype_barplot_total100 = function(clusternum, celltypes, meta_data_RCTD,method,color_in,textsize=22){
588 | 
589 | #     if(method == "SpatialPCA"){
590 | #       percentage = matrix(0,clusternum,celltypes)
591 | #       for(k in 1:clusternum){
592 | #       metadata_sub = meta_data_RCTD[which(meta_data_RCTD$SpatialPCA_Louvain==k ),]
593 | #       match_type = metadata_sub$celltype
594 | #       percentage[k,] = round(unlist(table(match_type))/dim(meta_data_RCTD)[1]*100,2)
595 | #       }
596 | #     }else if (method == "PCA"){
597 | #       percentage = matrix(0,clusternum,celltypes)
598 | #       for(k in 1:clusternum){
599 | #       metadata_sub = meta_data_RCTD[which(meta_data_RCTD$PCA_Louvain==k ),]
600 | #       match_type = metadata_sub$celltype
601 | #       percentage[k,] = round(unlist(table(match_type))/dim(meta_data_RCTD)[1]*100,2)
602 | #       }
603 | #     }else if (method == "NMF"){
604 | #       percentage = matrix(0,clusternum,celltypes)
605 | #       for(k in 1:clusternum){
606 | #       metadata_sub = meta_data_RCTD[which(meta_data_RCTD$NMF_Louvain==k ),]
607 | #       match_type = metadata_sub$celltype
608 | #       percentage[k,] = round(unlist(table(match_type))/dim(meta_data_RCTD)[1]*100,2)
609 | #       }
610 | #     }else if (method == "HMRF"){
611 | #       percentage = matrix(0,clusternum,celltypes)
612 | #       for(k in 1:clusternum){
613 | #       metadata_sub = meta_data_RCTD[which(meta_data_RCTD$HMRF==k ),]
614 | #       match_type = metadata_sub$celltype
615 | #       percentage[k,] = round(unlist(table(match_type))/dim(meta_data_RCTD)[1]*100,2)
616 | #       }
617 | #     }
618 | 
619 | 
620 | #     celltype = names(table(match_type))
621 | #     rownames(percentage) = paste0("Cluster",1:clusternum)
622 | #     colnames(percentage) = names(table(match_type))
623 | 
624 | #     percentage_vec = c(percentage)
625 | #     cluster = c(rep(c(paste0("Cluster",1:clusternum)),celltypes))
626 | #     celltype = c(rep(celltype,each=clusternum))
627 | #     dat = data.frame(cluster, percentage_vec,celltype)
628 | #     dat$cluster = factor(cluster, level=paste0("Cluster",1:clusternum))
629 | 
630 | #   p = ggplot(dat, aes(y = percentage_vec,
631 | #              x = factor(cluster), fill = celltype)) +        ## global aes
632 | #   scale_fill_manual(values=color_in)+
633 | #   geom_bar(position="stack", stat="identity",width=0.8,color="grey2") +
634 | #   theme_classic()+xlab("")+ylab("")+
635 | #    theme(plot.title = element_text(size = textsize),
636 | #               text = element_text(size = textsize),
637 | #               #axis.title = element_text(face="bold"),
638 | #               #axis.text.x=element_text(size = 12,angle = 60,hjust = 1) ,
639 | #               #axis.text.x=element_blank(),
640 | #               legend.position = "right")# +
641 | 
642 | 
643 | # p
644 | 
645 | # }
646 | 
647 | 
648 | # plot_celltype_barplot_each100 = function(clusternum, celltypes, meta_data_RCTD,method,color_in,textsize=22){
649 | 
650 | #     if(method == "SpatialPCA"){
651 | #       percentage = matrix(0,clusternum,celltypes)
652 | #       for(k in 1:clusternum){
653 | #       metadata_sub = meta_data_RCTD[which(meta_data_RCTD$SpatialPCA_Louvain==k ),]
654 | #       match_type = metadata_sub$celltype
655 | #       percentage[k,] = round(unlist(table(match_type))/dim(metadata_sub)[1]*100,2)
656 | #       }
657 | #     }else if (method == "PCA"){
658 | #       percentage = matrix(0,clusternum,celltypes)
659 | #       for(k in 1:clusternum){
660 | #       metadata_sub = meta_data_RCTD[which(meta_data_RCTD$PCA_Louvain==k ),]
661 | #       match_type = metadata_sub$celltype
662 | #       percentage[k,] = round(unlist(table(match_type))/dim(metadata_sub)[1]*100,2)
663 | #       }
664 | #     }else if (method == "NMF"){
665 | #       percentage = matrix(0,clusternum,celltypes)
666 | #       for(k in 1:clusternum){
667 | #       metadata_sub = meta_data_RCTD[which(meta_data_RCTD$NMF_Louvain==k ),]
668 | #       match_type = metadata_sub$celltype
669 | #       percentage[k,] = round(unlist(table(match_type))/dim(metadata_sub)[1]*100,2)
670 | #       }
671 | #     }else if (method == "HMRF"){
672 | #       percentage = matrix(0,clusternum,celltypes)
673 | #       for(k in 1:clusternum){
674 | #       metadata_sub = meta_data_RCTD[which(meta_data_RCTD$HMRF==k ),]
675 | #       match_type = metadata_sub$celltype
676 | #       percentage[k,] = round(unlist(table(match_type))/dim(metadata_sub)[1]*100,2)
677 | #       }
678 | #     }
679 | 
680 | 
681 | #     celltype = names(table(match_type))
682 | #     rownames(percentage) = paste0("Cluster",1:clusternum)
683 | #     percentage_vec = c(percentage)
684 | #     cluster = c(rep(c(paste0("Cluster",1:clusternum)),celltypes))
685 | #     celltype = c(rep(celltype,each=clusternum))
686 | #     dat = data.frame(cluster, percentage_vec,celltype)
687 | #     dat$cluster = factor(cluster, level=paste0("Cluster",1:clusternum))
688 | 
689 | 
690 | 
691 | #   p = ggplot(dat, aes(y = percentage_vec,
692 | #              x = factor(cluster), fill = celltype)) +        ## global aes
693 | #   scale_fill_manual(values=color_in)+
694 | #   geom_bar(position="stack", stat="identity",width=0.8,color="grey2") +
695 | #   theme_classic()+xlab("")+ylab("")+
696 | #    theme(plot.title = element_text(size = textsize),
697 | #               text = element_text(size = textsize),
698 | #               #axis.title = element_text(face="bold"),
699 | #               #axis.text.x=element_text(size = 12,angle = 60,hjust = 1) ,
700 | #               #axis.text.x=element_blank(),
701 | #               legend.position = "right")# +
702 | 
703 | 
704 | # p
705 | 
706 | # }
707 | 
708 | # #' @title Obtain each cluster label visualized on locations.
709 | # #' @description This function visualizes each cluster label on locations.
710 | # #' @param loc_x: A vector of x coordiantes.
711 | # #' @param loc_y: A vector of y coordinates.
712 | # #' @param clusterlabel_in: A vector of integers, the cluster labels for each cell.
713 | # #' @param pointsize: An integer, the point size of each cell.
714 | # #' @param textsize: An integer, the text size in the legend.
715 | # #' @param title_in: A character string, the title you want to display at the top of the figure.
716 | # #' @param color_in: A vector of colors for each cluster.
717 | # #' @return A list of ggplot objects.
718 | # #' @export
719 | # plot_each_cluster = function(loc_x,loc_y, clusterlabel_in, pointsize=3,text_size=15 ,title_in,color_in){
720 | #   x = loc_x
721 | #   y = loc_y
722 | #   cluster = clusterlabel_in
723 | #   count = 0
724 | #   p=list()
725 | #     for(subcluster in 1:length(unique(cluster))){
726 | #       count = count + 1
727 | #       print(count)
728 | #       clusters = rep("Other_clusters",length(cluster))
729 | #       clusters[which(as.character(cluster)==as.character(subcluster))]=paste0("cluster_",subcluster)
730 | #       datt = data.frame(clusters, x, y)
731 | #       p[[count]] = ggplot(datt, aes(x = x, y = y, color = clusters)) +
732 | #         geom_point( alpha = 0.8,size=pointsize) +
733 | #         scale_colour_manual(values = c("#FFDB6D", "#4E84C4")) +
734 | #           ggtitle(paste0(title_in))+
735 | #         theme_void()+
736 | #         theme(plot.title = element_text(size = text_size),
737 | #               text = element_text(size = text_size),
738 | #               #axis.title = element_text(face="bold"),
739 | #               #axis.text.x=element_text(size = 22) ,
740 | #               legend.position = "bottom")
741 | 
742 | 
743 | #   }
744 | # p
745 | # }
746 | 
747 | # #' @title Obtain louvain clustering cluster labels.
748 | # #' @description This function performs louvain clustering on input low dimensional components.
749 | # #' @param knearest: A vector of integers, number of nearest neighbors for KNN graph construction in louvain clustering.
750 | # #' @param latent_dat: A d by n matrix of low dimensional components.
751 | # #' @return A list of clustering results.
752 | # #' \item{cluster_label}{a list of cluster labels, each corresponds to the vector of nearest neighbors.}
753 | # #' \item{cluster_num}{a list of total number of clusters in each cluster label.}
754 | # #' @export
755 | # louvain_clustering = function(knearest, latent_dat){
756 | #   set.seed(1234)
757 | 
758 | #   suppressMessages(require(FNN))
759 | #   suppressMessages(require(igraph))
760 | 
761 | #     PCvalues = latent_dat
762 | #   info.spatial = as.data.frame(t(PCvalues))
763 | #   colnames(info.spatial) =  paste0("factor", 1:nrow(PCvalues))
764 | #   N_k_nearest=knearest
765 | #   clusterlabel = list()
766 | #   p=list()
767 | #   count = 0
768 | #   max_num = c()
769 | #   for(k_nearest in N_k_nearest){
770 | #       count = count + 1
771 | #       print(count)
772 | #       knn.norm = get.knn(as.matrix(t(PCvalues)), k = k_nearest)
773 | #       knn.norm = data.frame(from = rep(1:nrow(knn.norm$nn.index),
774 | #       k=k_nearest), to = as.vector(knn.norm$nn.index), weight = 1/(1 + as.vector(knn.norm$nn.dist)))
775 | #       nw.norm = graph_from_data_frame(knn.norm, directed = FALSE)
776 | #       nw.norm = simplify(nw.norm)
777 | #       lc.norm = cluster_louvain(nw.norm)
778 | #       info.spatial$louvain = as.factor(membership(lc.norm))
779 | #       max_num[count] = max(membership(lc.norm))
780 | #       clusterlabel[[count]] = as.character(info.spatial$louvain)
781 | #   }
782 | #   return(list("cluster_label"=clusterlabel, "cluster_num"= max_num))
783 | # }
784 | 
785 | 
786 | 
787 | # #' @title Obtain walktrap clustering cluster labels.
788 | # #' @description This function performs walktrap clustering on input low dimensional components.
789 | # #' @param knearest: A vector of integers, number of nearest neighbors for SNN graph construction in walktrap clustering.
790 | # #' @param latent_dat: A d by n matrix of low dimensional components.
791 | # #' @return A list of clustering results.
792 | # #' \item{cluster_label}{a list of cluster labels, each corresponding to the vector of nearest neighbors}
793 | # #' \item{cluster_num}{a list of total number of clusters in each cluster label.}
794 | # #' @export
795 | # walktrap_clustering = function(knearest, latent_dat){
796 | #   set.seed(1234)
797 | 
798 | #   suppressMessages(require(bluster))
799 | #   suppressMessages(require(igraph))
800 | 
801 | #     PCvalues = latent_dat
802 | #   info.spatial = as.data.frame(t(PCvalues))
803 | #   colnames(info.spatial) =  paste0("factor", 1:nrow(PCvalues))
804 | #   N_k_nearest=knearest
805 | #   clusterlabel = list()
806 | #   p=list()
807 | #   count = 0
808 | #   max_num = c()
809 | #   for(k_nearest in N_k_nearest){
810 | #       count = count + 1
811 | #       print(count)
812 | #       g <- makeSNNGraph(as.matrix(t(PCvalues)),k = k_nearest)
813 | #     clusters <- igraph::cluster_fast_greedy(g)$membership
814 | #     g_walk <- igraph::cluster_walktrap(g)
815 | #     info.spatial$walk = as.factor(membership(g_walk))
816 | #     max_num[count] = max(membership(g_walk))
817 | #       clusterlabel[[count]] = as.character(info.spatial$walk)
818 | #   }
819 | #   return(list("cluster_label"=clusterlabel, "cluster_num"= max_num))
820 | # }
821 | 
822 | 
823 | 
824 | 
825 | 
826 | 
827 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # **SpatialPCA** 
 2 | 
 3 | SpatialPCA is a spatially aware dimension reduction method that aims to infer a low dimensional representation of the gene expression data in spatial transcriptomics. SpatialPCA builds upon the probabilistic version of PCA, incorporates localization information as additional input, and uses a kernel matrix to explicitly model the spatial correlation structure across tissue locations. SpatialPCA is implemented as an open-source R package, freely available at www.xzlab.org/software.html.
 4 | 
 5 | 
 6 | 
 7 | <img align="top" src="https://raw.githubusercontent.com/shangll123/workflowr_Test/main/docs/assets/main_figure.jpeg" alt="drawing" width="600"/>
 8 | 
 9 | 
10 | ## Install the Package
11 | You can install the current version of SpatialPCA from GitHub with:
12 | ```r
13 | library(devtools)
14 | install_github("shangll123/SpatialPCA")
15 | ```
16 | 
17 | ## Package Tutorial
18 | Please see the [SpatialPCA tutorial website.](http://lulushang.org/SpatialPCA_Tutorial/)
19 | 
20 | The tutorial includes main example codes for multiple spatial transcriptomics datasets (e.g. DLPFC, Slide-Seq cerebellum, Slide-Seq V2 hippocampus, Human breast tumor, and Vizgen MERFISH.)
21 | 
22 | Other analysis codes for this project can be found [here](https://github.com/shangll123/SpatialPCA_analysis_codes).
23 | Example data can be found here: https://drive.google.com/drive/folders/1Ibz5uNsFKHJ4roPpaec5nPL_EBF3-wxY?usp=share_link.
24 | ## Operating systems (version 1.3.0 SpatialPCA) tested on:
25 | macOS Catalina 10.15.7
26 | 
27 | Ubuntu 18.04.5 LTS (Bionic Beaver)
28 | 
29 | CentOS Linux 7 (Core)
30 | 
31 | ## License
32 | 
33 | SpatialPCA is licensed under the GNU General Public License v3.0.
34 | 
35 | ## Citation
36 | Lulu Shang, and Xiang Zhou (2022). Spatially aware dimension reduction for spatial transcriptomics. [Nature Communications](https://www.nature.com/articles/s41467-022-34879-1).
37 | 
38 | doi: https://www.nature.com/articles/s41467-022-34879-1
39 | 


--------------------------------------------------------------------------------
/man/CreateSpatialPCAObject.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpatialPCA.R
 3 | \name{CreateSpatialPCAObject}
 4 | \alias{CreateSpatialPCAObject}
 5 | \title{Create the SpatialPCA object with filtering and normalization step.}
 6 | \usage{
 7 | CreateSpatialPCAObject(
 8 |   counts,
 9 |   location,
10 |   covariate = NULL,
11 |   project = "SpatialPCA",
12 |   gene.type = "spatial",
13 |   sparkversion = "spark",
14 |   numCores_spark = 1,
15 |   gene.number = 3000,
16 |   customGenelist = NULL,
17 |   min.loctions = 20,
18 |   min.features = 20
19 | )
20 | }
21 | \arguments{
22 | \item{counts}{Gene expression count matrix (matrix), the dimension is m x n, where m is the number of genes and n is the number of locations.}
23 | 
24 | \item{location}{Spatial location matrix (matrix), the dimension is n x d, n is the number of locations, d is dimensin of spatial coordinates, e.g. d=2 for locations on 2D space. The rownames of locations and the colnames of count matrix should be matched.}
25 | 
26 | \item{covariate}{The covariates in experiments (matrix, if any covariate included), n x q, n is the number of locations, q is the number of covariates. The rownames of covariates and the rownames of locations should be matched.}
27 | 
28 | \item{project}{Name of the project (for record keeping).}
29 | 
30 | \item{gene.type}{The type of genes to be used: "spatial" for spatially expressed genes; "hvg" for highly variable genes; "custom" for user specified genes, default is "spatial".}
31 | 
32 | \item{sparkversion}{In spatial gene selection, specify "spark" for small sample size data for higher detection power of spatial genes, "sparkx" for large sample size data for saving time and memory.}
33 | 
34 | \item{numCores_spark}{If gene.type="spatial", specify the number of CPU cores in SPARK package to use when selecting spatial genes.}
35 | 
36 | \item{gene.number}{The number of top highly variable genes if gene.selection=="hvg" (use all HVG genes if this number is not specified);
37 | number of top spatially expressed genes if gene.selection=="spatial" (use all significant spatially expressed genes if this number is not specified).}
38 | 
39 | \item{customGenelist}{A list of user specified genes if  gene.type=="custom".}
40 | 
41 | \item{min.loctions}{The features (genes) detected in at least min.loctions number of loctions, default is 20.}
42 | 
43 | \item{min.features}{The locations where at least min.features number of features (genes) are detected, default is 20.}
44 | }
45 | \value{
46 | Returns SpatialPCA object, with filtered and normalized gene expression matrix and corresponding location matrix.
47 | }
48 | \description{
49 | Create the SpatialPCA object with filtering and normalization step.
50 | }
51 | 


--------------------------------------------------------------------------------
/man/Spatial-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpatialPCA-package.R
 3 | \name{Spatial-package}
 4 | \alias{Spatial-package}
 5 | \alias{SpatialPCA}
 6 | \title{The 'SpatialPCA' package.}
 7 | \description{
 8 | A DESCRIPTION OF THE PACKAGE
 9 | }
10 | 


--------------------------------------------------------------------------------
/man/SpatialPCA-class.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpatialPCA.R
 3 | \docType{class}
 4 | \name{SpatialPCA-class}
 5 | \alias{SpatialPCA-class}
 6 | \title{Each SpatialPCA object has a number of slots which store information. Key slots to access
 7 | are listed below.}
 8 | \description{
 9 | Each SpatialPCA object has a number of slots which store information. Key slots to access
10 | are listed below.
11 | }
12 | \section{Slots}{
13 | 
14 | \describe{
15 | \item{\code{counts}}{The raw expression count matrix. Rows are genes, columns are spots/cells.}
16 | 
17 | \item{\code{normalized_expr}}{Normalized (by default we use SCTransform normalization in Seurat R package) expression matrix.}
18 | 
19 | \item{\code{project}}{Name of the project (for record keeping).}
20 | 
21 | \item{\code{covariate}}{The covariates in experiments (if any covariate included).}
22 | 
23 | \item{\code{location}}{Cell/spot spatial coordinates to compute the kernel matrix.}
24 | 
25 | \item{\code{kernelmat}}{The kernel matrix for spatial relationship between locations.}
26 | 
27 | \item{\code{kerneltype}}{The type of kernel to be used, either "gaussian" for gaussian kernel, or "cauchy" for cauchy kernel, or "quadratic" for rational quadratic kernel.}
28 | 
29 | \item{\code{bandwidthtype}}{The type of bandwidth to be used in Gaussian kernel, "SJ" for Sheather & Jones (1991) method (usually used in small sample size datasets), "Silverman" for Silverman's ‘rule of thumb’ method (1986)(usually used in large sample size datasets).}
30 | 
31 | \item{\code{bandwidth}}{The bandwidth in Gaussian kernel, users can also specify their preferred bandwidth.}
32 | 
33 | \item{\code{sparseKernel}}{To choose if the user wants to use a sparse kernel matrix or not. It is recommended to choose sparseKernel="TRUE" when sample size is large and you want to speed up the calculation.}
34 | 
35 | \item{\code{sparseKernel_tol}}{When sparseKernel=TRUE, sparseKernel_tol is the cut-off value when building sparse kernel matrix, any element in the kernel matrix greater than sparseKernel_tol will be kept, otherwise will be set to 0 to save memory.}
36 | 
37 | \item{\code{sparseKernel_ncore}}{When sparseKernel=TRUE, sparseKernel_ncore is the number of CPU cores to use when building the sparse kernel matrix.}
38 | 
39 | \item{\code{fast}}{Select "TRUE" to accrelerate the algorithm by performing low-rank approximation on the kernel matrix, otherwise "FALSE" for calculation without low-rank approximation on the kernel matrix.}
40 | 
41 | \item{\code{eigenvecnum}}{When fast=TRUE, the user can optionally specify the number of top eigenvectors and eigenvalues to be used in low-rank approximation when performing eigen decomposition on the kernel matrix.}
42 | 
43 | \item{\code{tau}}{The variance parameter in covariance matrix for the spatial PCs, to be inferred through the algorithm.}
44 | 
45 | \item{\code{sigma2_0}}{The residual error variance, to be inferred through the algorithm.}
46 | 
47 | \item{\code{SpatialPCnum}}{The number of Spatial PCs, specified by the user, default is 20.}
48 | 
49 | \item{\code{W}}{The factor loading matrix.}
50 | 
51 | \item{\code{SpatialPCs}}{The estimated spatial PCs.}
52 | 
53 | \item{\code{highPCs}}{The estimated high resolution spatial PCs, if needed.}
54 | 
55 | \item{\code{highPos}}{The scaled locations of estimated high resolution spatial PCs, if needed.}
56 | 
57 | \item{\code{expr_pred}}{The predicted gene expression on new locations when highPCs and highPos are avaliable.}
58 | 
59 | \item{\code{params}}{List of model parameters.}
60 | }}
61 | 
62 | 


--------------------------------------------------------------------------------
/man/SpatialPCA_EstimateLoading.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpatialPCA_EstimateLoading.R
 3 | \name{SpatialPCA_EstimateLoading}
 4 | \alias{SpatialPCA_EstimateLoading}
 5 | \title{Calculate loading matrix.}
 6 | \usage{
 7 | SpatialPCA_EstimateLoading(
 8 |   object,
 9 |   maxiter = 300,
10 |   initial_tau = 1,
11 |   fast = FALSE,
12 |   eigenvecnum = NULL,
13 |   SpatialPCnum = 20
14 | )
15 | }
16 | \arguments{
17 | \item{object}{SpatialPCA object.}
18 | 
19 | \item{maxiter}{Maximum iteration number. Default is 300.}
20 | 
21 | \item{initial_tau}{Initial value of tau. Default is 1. Because we need tau to be positive, we calculate exp(log(tau)) during iterations.}
22 | 
23 | \item{fast}{Select "TRUE" if the user wants to use low-rank approximation on the kernel matrix to accelerate the algorithm, otherwise select "FALSE".}
24 | 
25 | \item{eigenvecnum}{When fast=TRUE, eigenvecnum is the number of top eigenvectors and eigenvalues to be used in low-rank approximation in the eigen decomposition step for kernel matrix.
26 | The default is NULL, if specified, it is recommended to use eigenvecnum=20 when sample size is large (e.g. >5,000). When sample size is small, eigenvecnum is suggested to explain at least 90\% variance.}
27 | 
28 | \item{SpatialPCnum}{Number of spatial PCs.}
29 | }
30 | \value{
31 | Returns SpatialPCA object with estimated loading matrix W.
32 | }
33 | \description{
34 | Calculate loading matrix.
35 | }
36 | 


--------------------------------------------------------------------------------
/man/SpatialPCA_Multiple_Sample.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpatialPCA_multiple_sample.R
 3 | \name{SpatialPCA_Multiple_Sample}
 4 | \alias{SpatialPCA_Multiple_Sample}
 5 | \title{Multiple sample SpatialPCA.
 6 | In this extension, we construct the covariance matrix for the latent factors in the form of a block diagonal matrix:
 7 | it consists of the kernel matrices constructed using the spatial location information within each dataset, with zero correlation for pairs of locations across datasets.
 8 | This way, the latent factors within each dataset are correlated a priori across spatial locations, while the latent factors across datasets are not correlated a priori.
 9 | Certainly, if one wants to model the a priori correlation between latent factors across datasets, due to, for example, their similarity in the features extracted from histology images, then one can also modify the kernel matrices by constructing them using features other than spatial location information.}
10 | \usage{
11 | SpatialPCA_Multiple_Sample(
12 |   count_list,
13 |   location_list,
14 |   gene.type = "spatial",
15 |   sparkversion = "spark",
16 |   numCores_spark = 5,
17 |   gene.number = 3000,
18 |   customGenelist = NULL,
19 |   min.loctions = 20,
20 |   min.features = 20,
21 |   bandwidth_common = 0.1
22 | )
23 | }
24 | \arguments{
25 | \item{count_list}{A list of g by n count matrix, g is gene number, n is location number.}
26 | 
27 | \item{location_list}{A list of n by d location matrix, n is location number, d is location dimension. The rownames of each location matrix should match with the colnames of its corresponding count matrix.}
28 | }
29 | \value{
30 | Returns SpatialPCA object with estimated Spatial PCs on locations.
31 | }
32 | \description{
33 | Multiple sample SpatialPCA.
34 | In this extension, we construct the covariance matrix for the latent factors in the form of a block diagonal matrix:
35 | it consists of the kernel matrices constructed using the spatial location information within each dataset, with zero correlation for pairs of locations across datasets.
36 | This way, the latent factors within each dataset are correlated a priori across spatial locations, while the latent factors across datasets are not correlated a priori.
37 | Certainly, if one wants to model the a priori correlation between latent factors across datasets, due to, for example, their similarity in the features extracted from histology images, then one can also modify the kernel matrices by constructing them using features other than spatial location information.
38 | }
39 | 


--------------------------------------------------------------------------------
/man/SpatialPCA_SpatialPCs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpatialPCA_SpatialPCs.R
 3 | \name{SpatialPCA_SpatialPCs}
 4 | \alias{SpatialPCA_SpatialPCs}
 5 | \title{Calculating Spatial PCs (latent factor matrix Z).}
 6 | \usage{
 7 | SpatialPCA_SpatialPCs(object, fast = FALSE, eigenvecnum = NULL)
 8 | }
 9 | \arguments{
10 | \item{object}{SpatialPCA object.}
11 | 
12 | \item{fast}{Select fast=TRUE if the user wants to use low-rank approximation on the kernel matrix to calculate the spatial PCs, otherwise select FALSE.}
13 | 
14 | \item{eigenvecnum}{When fast=TRUE, eigenvecnum is the number of top eigenvectors and eigenvalues to be used in low-rank approximation in the eigen decomposition step for kernel matrix.
15 | The default is NULL, if specified, it is recommended that these top eigen values explain >=90\% of the variance.
16 | In estimating spatial PCs, we need larger number of eigenvectors in kernel matrix for more accurate estimation.}
17 | }
18 | \value{
19 | Returns SpatialPCA object with estimated Spatial PCs.
20 | }
21 | \description{
22 | Calculating Spatial PCs (latent factor matrix Z).
23 | }
24 | 


--------------------------------------------------------------------------------
/man/SpatialPCA_buildKernel.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpatialPCA_buildKernel.R
 3 | \name{SpatialPCA_buildKernel}
 4 | \alias{SpatialPCA_buildKernel}
 5 | \title{Calculating kernel matrix from spatial locations.}
 6 | \usage{
 7 | SpatialPCA_buildKernel(
 8 |   object,
 9 |   kerneltype = "gaussian",
10 |   bandwidthtype = "SJ",
11 |   bandwidth.set.by.user = NULL,
12 |   sparseKernel = FALSE,
13 |   sparseKernel_tol = 1e-20,
14 |   sparseKernel_ncore = 1
15 | )
16 | }
17 | \arguments{
18 | \item{object}{SpatialPCA object.}
19 | 
20 | \item{kerneltype}{The type of kernel to be used, either "gaussian", or "cauchy" for cauchy kernel, or "quadratic" for rational quadratic kernel, and "delaunday" for gaussian kernel built with non-linear Delaunay triangulation based distance.}
21 | 
22 | \item{bandwidthtype}{The type of bandwidth to be used in Gaussian kernel, "SJ" for Sheather & Jones (1991) method (usually used in small size datasets), "Silverman" for Silverman's ‘rule of thumb’ method (1986)(usually used in large size datasets).}
23 | 
24 | \item{bandwidth.set.by.user}{User could select their own bandwidth (a numeric value) if the recommended bandwidth doesn't work in their dataset.}
25 | 
26 | \item{sparseKernel}{Select "TURE" if the user wants to use a sparse kernel matrix or "FALSE" if not. It is recommended to choose sparseKernel="TRUE" when sample size is large.}
27 | 
28 | \item{sparseKernel_tol}{When sparseKernel=TRUE, the cut-off value when building sparse kernel matrix, any element in the kernel matrix greater than sparseKernel_tol will be kept, otherwise will be set to 0 to save memory.}
29 | 
30 | \item{sparseKernel_ncore}{When sparseKernel=TRUE, the number of CPU cores to build sparse kernel matrix.}
31 | }
32 | \description{
33 | Calculating kernel matrix from spatial locations.
34 | }
35 | 


--------------------------------------------------------------------------------
/man/SpatialPCA_expr_pred.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpatialPCA_highresolution.R
 3 | \name{SpatialPCA_expr_pred}
 4 | \alias{SpatialPCA_expr_pred}
 5 | \title{High-resolution gene expression prediction.}
 6 | \usage{
 7 | SpatialPCA_expr_pred(object)
 8 | }
 9 | \arguments{
10 | \item{object}{SpatialPCA object with high resolution predicted spatial PCs.
11 | Users can optionally provide new locations at the original location scale.}
12 | }
13 | \value{
14 | Returns SpatialPCA object with predicted gene expression on new locations.
15 | We can predict normalized gene expression for the genes in the object@normalized_expr matrix.
16 | }
17 | \description{
18 | High-resolution gene expression prediction.
19 | }
20 | 


--------------------------------------------------------------------------------
/man/SpatialPCA_highresolution.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpatialPCA_highresolution.R
 3 | \name{SpatialPCA_highresolution}
 4 | \alias{SpatialPCA_highresolution}
 5 | \title{High-resolution spatial map construction.}
 6 | \usage{
 7 | SpatialPCA_highresolution(object, platform = "ST", newlocation = NULL)
 8 | }
 9 | \arguments{
10 | \item{object}{SpatialPCA object.}
11 | 
12 | \item{platform}{"ST": the 10X ST platform, impute 9 subspots in a square shape for each spot;
13 | "Visium": the 10X Visium platform, impute 6 subspots in a hexagonal shape for each spot;}
14 | 
15 | \item{newlocation}{A n* by d location matrix, n* is number of new locations, d is dimension of locations.
16 | Users can optionally provide new locations at the original location scale.}
17 | }
18 | \value{
19 | Returns SpatialPCA object with estimated Spatial PCs on new locations.
20 | }
21 | \description{
22 | High-resolution spatial map construction.
23 | }
24 | 


--------------------------------------------------------------------------------
/man/bandwidth_select.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpatialPCA_buildKernel.R
 3 | \name{bandwidth_select}
 4 | \alias{bandwidth_select}
 5 | \title{Select bandwidth in Gaussian kernel.}
 6 | \usage{
 7 | bandwidth_select(expr, method)
 8 | }
 9 | \arguments{
10 | \item{expr}{A m gene by n location matrix of normalized gene expression matrix.}
11 | 
12 | \item{method}{The method used in bandwidth selection, "SJ" usually for small sample size data, "Silverman" usually for large sample size data.}
13 | }
14 | \value{
15 | A numeric value of calculated bandwidth.
16 | }
17 | \description{
18 | This function selects bandwidth in Gaussian kernel.
19 | }
20 | 


--------------------------------------------------------------------------------
/man/fx_CHAOS.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpatialPCA_utilties.R
 3 | \name{fx_CHAOS}
 4 | \alias{fx_CHAOS}
 5 | \title{Calculate CHAOS score to measure clustering performance.}
 6 | \usage{
 7 | fx_CHAOS(clusterlabel, location)
 8 | }
 9 | \arguments{
10 | \item{clusterlabel}{Cluster labels.}
11 | 
12 | \item{location}{A n by k matrix of spatial locations.}
13 | }
14 | \value{
15 | A numeric value for CHAOS score.
16 | }
17 | \description{
18 | CHAOS score measures the spatial continuity of the detected spatial domains.
19 | Lower CHAOS score indicates better spatial domian clustering performance.
20 | }
21 | 


--------------------------------------------------------------------------------
/man/fx_PAS.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpatialPCA_utilties.R
 3 | \name{fx_PAS}
 4 | \alias{fx_PAS}
 5 | \title{Calculate PAS score to measure clustering performance.}
 6 | \usage{
 7 | fx_PAS(clusterlabel, location)
 8 | }
 9 | \arguments{
10 | \item{clusterlabel}{Cluster labels.}
11 | 
12 | \item{location}{A n by k matrix of spatial locations.}
13 | }
14 | \value{
15 | A numeric value for PAS score.
16 | }
17 | \description{
18 | PAS score measures the randomness of the spots that located outside of the spatial region where it was clustered to.
19 | Lower PAS score indicates better spatial domian clustering performance.
20 | }
21 | 


--------------------------------------------------------------------------------
/man/get_NMF.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpatialPCA_utilties.R
 3 | \name{get_NMF}
 4 | \alias{get_NMF}
 5 | \title{Obtain NMF low dimensional components for dimension reduction methods comparison.}
 6 | \usage{
 7 | get_NMF(count, PCnum)
 8 | }
 9 | \arguments{
10 | \item{count}{Count expression g by n matrix. g is gene number, n is sample size.}
11 | 
12 | \item{PCnum}{Number of PCs.}
13 | }
14 | \value{
15 | A d by n matrix of low dimensional components from NMF. d is number of low dimensional components, which is same as number of Spatial PCs in SpatialPCA. n is sample size.
16 | }
17 | \description{
18 | Obtain NMF low dimensional components for dimension reduction methods comparison.
19 | }
20 | 


--------------------------------------------------------------------------------
/man/get_PCA.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpatialPCA_utilties.R
 3 | \name{get_PCA}
 4 | \alias{get_PCA}
 5 | \title{Obtain PCA low dimensional components for dimension reduction methods comparison.}
 6 | \usage{
 7 | get_PCA(expr, PCnum)
 8 | }
 9 | \arguments{
10 | \item{expr}{Normalized gene expression g by n matrix. g is gene number, n is sample size.}
11 | 
12 | \item{PCnum}{Number of PCs.}
13 | }
14 | \value{
15 | A d by n matrix of low dimensional components from PCA. d is number of low dimensional components, n is sample size.
16 | }
17 | \description{
18 | Obtain PCA low dimensional components for dimension reduction methods comparison.
19 | }
20 | 


--------------------------------------------------------------------------------
/man/kernel_build.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpatialPCA_buildKernel.R
 3 | \name{kernel_build}
 4 | \alias{kernel_build}
 5 | \title{Build kernel matrix.}
 6 | \usage{
 7 | kernel_build(kerneltype = "gaussian", location, bandwidth)
 8 | }
 9 | \arguments{
10 | \item{kerneltype}{The type of kernel to be used, either "gaussian", or "cauchy" for cauchy kernel, or "quadratic" for rational quadratic kernel, and "delaunday" for gaussian kernel built with non-linear Delaunay triangulation based distance.}
11 | 
12 | \item{location}{A n by d matrix of cell/spot location coordinates.}
13 | 
14 | \item{bandwidth}{A numeric value of bandwidth.}
15 | }
16 | \value{
17 | The kernel matrix for spatial relationship between locations.
18 | }
19 | \description{
20 | This function calculates kernel matrix from spatial locations.
21 | }
22 | 


--------------------------------------------------------------------------------
/man/kernel_build_sparse.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpatialPCA_buildKernel.R
 3 | \name{kernel_build_sparse}
 4 | \alias{kernel_build_sparse}
 5 | \title{Build sparse kernel matrix.}
 6 | \usage{
 7 | kernel_build_sparse(kerneltype, location, bandwidth, tol, ncores)
 8 | }
 9 | \arguments{
10 | \item{kerneltype}{The type of kernel to be used, either "gaussian", or "cauchy" for cauchy kernel, or "quadratic" for rational quadratic kernel.}
11 | 
12 | \item{location}{A n by d matrix of cell/spot location coordinates.}
13 | 
14 | \item{bandwidth}{A numeric value of bandwidth.}
15 | 
16 | \item{tol}{A numeric value of cut-off value when building sparse kernel matrix.}
17 | 
18 | \item{ncores}{A integer value of number of CPU cores to use when building sparse kernel matrix.}
19 | }
20 | \value{
21 | The sparse kernel matrix for spatial relationship between locations.
22 | }
23 | \description{
24 | This function calculates kernel matrix.
25 | }
26 | 


--------------------------------------------------------------------------------
/man/louvain_clustering.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpatialPCA_utilties.R
 3 | \name{louvain_clustering}
 4 | \alias{louvain_clustering}
 5 | \title{Obtain clustering cluster labels through louvain method.}
 6 | \usage{
 7 | louvain_clustering(clusternum, latent_dat, knearest = 100)
 8 | }
 9 | \arguments{
10 | \item{clusternum}{The desired number of clusters the user wants to obtain.}
11 | 
12 | \item{latent_dat}{A d by n matrix of low dimensional components, d is number of PCs, n is number of spots.}
13 | 
14 | \item{knearest}{An integers, number of nearest neighbors for KNN graph construction in louvain clustering.}
15 | }
16 | \value{
17 | The cluster labels.
18 | }
19 | \description{
20 | This function performs louvain clustering on input low dimensional components.
21 | }
22 | 


--------------------------------------------------------------------------------
/man/plot_RGB_UMAP.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpatialPCA_utilties.R
 3 | \name{plot_RGB_UMAP}
 4 | \alias{plot_RGB_UMAP}
 5 | \title{Visualize RGB plot from UMAP.}
 6 | \usage{
 7 | plot_RGB_UMAP(location, latent_dat, pointsize = 2, textsize = 15)
 8 | }
 9 | \arguments{
10 | \item{location}{A n by k location matrix. n is spot number.}
11 | 
12 | \item{latent_dat}{A d by n matrix of low dimensional components.}
13 | 
14 | \item{pointsize}{The point size of each spot.}
15 | 
16 | \item{textsize}{The text size in the legend.}
17 | }
18 | \value{
19 | A list.
20 | \item{RGB}{A data frame with five columns: x coordinate, y coordinate, R, G, and B color index}
21 | \item{figure}{A ggplot object for RGB plot from UMAP.}
22 | }
23 | \description{
24 | We summarized the inferred low dimensional components into three UMAP. components and visualized the three resulting components with red/green/blue (RGB) colors in the RGB plot.
25 | }
26 | 


--------------------------------------------------------------------------------
/man/plot_RGB_tSNE.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpatialPCA_utilties.R
 3 | \name{plot_RGB_tSNE}
 4 | \alias{plot_RGB_tSNE}
 5 | \title{Visualize RGB plot from tSNE.}
 6 | \usage{
 7 | plot_RGB_tSNE(location, latent_dat, pointsize = 2, textsize = 15)
 8 | }
 9 | \arguments{
10 | \item{location}{A n by k location matrix. n is spot number.}
11 | 
12 | \item{latent_dat}{A d by n matrix of low dimensional components.}
13 | 
14 | \item{pointsize}{The point size of each spot.}
15 | 
16 | \item{textsize}{The text size in the legend.}
17 | }
18 | \value{
19 | A list.
20 | \item{RGB}{A data frame with five columns: x coordinate, y coordinate, R, G, and B color index}
21 | \item{figure}{A ggplot object for RGB plot from tSNE}
22 | }
23 | \description{
24 | We summarized the inferred low dimensional components into three tSNE components and visualized the three resulting components with red/green/blue (RGB) colors in the RGB plot.
25 | }
26 | 


--------------------------------------------------------------------------------
/man/plot_cluster.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpatialPCA_utilties.R
 3 | \name{plot_cluster}
 4 | \alias{plot_cluster}
 5 | \title{Visualize cluster labels on locations.}
 6 | \usage{
 7 | plot_cluster(
 8 |   location,
 9 |   clusterlabel,
10 |   pointsize = 3,
11 |   text_size = 15,
12 |   title_in,
13 |   color_in,
14 |   legend = "none"
15 | )
16 | }
17 | \arguments{
18 | \item{location}{A n by k matrix of spot locations.}
19 | 
20 | \item{clusterlabel}{A vector of cluster labels for spots.}
21 | 
22 | \item{pointsize}{An integer, the point size of each spot.}
23 | 
24 | \item{title_in}{A character string, the title you want to display at the top of the figure.}
25 | 
26 | \item{color_in}{A vector of colors for each cluster.}
27 | 
28 | \item{legend}{A character string, the position of the figure legend. Select from "top", "bottom","left" or "right".}
29 | 
30 | \item{textsize}{An integer, the text size in the legend.}
31 | }
32 | \value{
33 | A ggplot object.
34 | }
35 | \description{
36 | This function visualizes cluster labels on locations.
37 | }
38 | 


--------------------------------------------------------------------------------
/man/plot_each_cluster.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpatialPCA_utilties.R
 3 | \name{plot_each_cluster}
 4 | \alias{plot_each_cluster}
 5 | \title{Obtain each cluster label visualized on locations.}
 6 | \usage{
 7 | plot_each_cluster(
 8 |   loc_x,
 9 |   loc_y,
10 |   clusterlabel_in,
11 |   pointsize = 3,
12 |   text_size = 15,
13 |   title_in,
14 |   color_in
15 | )
16 | }
17 | \arguments{
18 | \item{loc_x:}{A vector of x coordiantes.}
19 | 
20 | \item{loc_y:}{A vector of y coordinates.}
21 | 
22 | \item{clusterlabel_in:}{A vector of integers, the cluster labels for each cell.}
23 | 
24 | \item{pointsize:}{An integer, the point size of each cell.}
25 | 
26 | \item{textsize:}{An integer, the text size in the legend.}
27 | 
28 | \item{title_in:}{A character string, the title you want to display at the top of the figure.}
29 | 
30 | \item{color_in:}{A vector of colors for each cluster.}
31 | }
32 | \value{
33 | A list of ggplot objects.
34 | }
35 | \description{
36 | This function visualizes each cluster label on locations.
37 | }
38 | 


--------------------------------------------------------------------------------
/man/plot_factor_value.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpatialPCA_utilties.R
 3 | \name{plot_factor_value}
 4 | \alias{plot_factor_value}
 5 | \title{Visualize PCs on their locations.}
 6 | \usage{
 7 | plot_factor_value(location, PCs, textmethod, pointsize = 2, textsize = 15)
 8 | }
 9 | \arguments{
10 | \item{location}{A n cell by k dimension of location matrix. n is cell number, k=2 if the spots are on 2D space.}
11 | 
12 | \item{PCs}{A d by n matrix of low dimensional components.}
13 | 
14 | \item{textmethod}{A text string of the name of method used to extract latent factors, e.g. "SpatialPCA" or "PCA". It will be shown as the title of the figures.}
15 | 
16 | \item{pointsize}{The point size of each location for visualization.}
17 | 
18 | \item{textsize}{The text size in the figure legend.}
19 | }
20 | \value{
21 | A list of ggplot objects for factor value plots.
22 | }
23 | \description{
24 | This function visualizes the low dimensional component values.
25 | }
26 | 


--------------------------------------------------------------------------------
/man/plot_trajectory.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpatialPCA_utilties.R
 3 | \name{plot_trajectory}
 4 | \alias{plot_trajectory}
 5 | \title{Visualize pseudotimes on locations.}
 6 | \usage{
 7 | plot_trajectory(
 8 |   pseudotime,
 9 |   location,
10 |   clusterlabels,
11 |   gridnum,
12 |   color_in,
13 |   pointsize = 5,
14 |   arrowlength = 0.2,
15 |   arrowsize = 1,
16 |   textsize = 22
17 | )
18 | }
19 | \arguments{
20 | \item{pseudotime}{A length n vector of pseudotime inferred from Slingshot.}
21 | 
22 | \item{location}{A n by 2 data frame of spot locations.}
23 | 
24 | \item{clusterlabels}{A vector of integers, the cluster labels for each spot}
25 | 
26 | \item{gridnum}{Number of grids that evenly segment the whole tissue section.}
27 | 
28 | \item{color_in}{A vector of character strings representing colors for each cluster.}
29 | 
30 | \item{pointsize}{An integer, the point size of each spot}
31 | 
32 | \item{arrowlength}{An integer, the length of arrows inside a grid between one spot with smallest pseudotime and largest pseudotime.}
33 | 
34 | \item{arrowsize}{An integer, the size of arrows inside a grid between one spot with smallest pseudotime and largest pseudotime.}
35 | 
36 | \item{textsize}{An integer, the size of text in the figure.}
37 | }
38 | \value{
39 | A ggplot object.
40 | \item{Pseudotime}{A ggplot object visualizing pseudotime on locations.}
41 | \item{Arrowplot1}{A ggplot object for arrows pointing from smallest pseudotime and largest pseudotime in each grid.}
42 | \item{Arrowplot2}{A ggplot object for arrows pointing from largest pseudotime and smallest pseudotime in each grid.}
43 | \item{Arrowoverlay1}{A ggplot object for arrows pointing from smallest pseudotime and largest pseudotime in each grid, overlayed on clustering plot.}
44 | \item{Arrowoverlay2}{A ggplot object for arrows pointing from largest pseudotime and smallest pseudotime in each grid, overlayed on clustering plot.}
45 | }
46 | \description{
47 | This function visualizes pseudotimes on locations.
48 | }
49 | 


--------------------------------------------------------------------------------
/man/refine_cluster_10x.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpatialPCA_utilties.R
 3 | \name{refine_cluster_10x}
 4 | \alias{refine_cluster_10x}
 5 | \title{Refine clustering results for 10x ST or Visium data.}
 6 | \usage{
 7 | refine_cluster_10x(clusterlabels, location, shape = "square")
 8 | }
 9 | \arguments{
10 | \item{clusterlabels}{The cluster label obtained (e.g. from louvain method or walktrap method).}
11 | 
12 | \item{location}{A n by 2 location matrix of spots.}
13 | 
14 | \item{shape}{Select shape='hexagon' for Visium data, 'square' for ST data.}
15 | }
16 | \value{
17 | The refined cluster labels.
18 | }
19 | \description{
20 | This function refines spatial clustering of ST or Visium data.
21 | }
22 | 


--------------------------------------------------------------------------------
/man/walktrap_clustering.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpatialPCA_utilties.R
 3 | \name{walktrap_clustering}
 4 | \alias{walktrap_clustering}
 5 | \title{Obtain clustering cluster labels through walktrap method.}
 6 | \usage{
 7 | walktrap_clustering(clusternum, latent_dat, knearest = 100)
 8 | }
 9 | \arguments{
10 | \item{clusternum}{The desired number of clusters the user wants to obtain.}
11 | 
12 | \item{latent_dat}{A d by n matrix of low dimensional components, d is number of PCs, n is number of spots.}
13 | 
14 | \item{knearest}{An integers, number of nearest neighbors for SNN graph construction in walktrap clustering.}
15 | }
16 | \value{
17 | The cluster labels.
18 | }
19 | \description{
20 | This function performs walktrap clustering on input low dimensional components.
21 | }
22 | 


--------------------------------------------------------------------------------