├── .Rbuildignore
├── .gitignore
├── DESCRIPTION
├── NAMESPACE
├── R
    └── SpaGene.R
├── README.md
├── Rawdata
    ├── BreastCancer
    │   └── bc_raw.rds
    ├── HDST
    │   └── CN24_D1_unmodgtf_filtered_red_ut_HDST_final_clean.rds
    ├── LRpair_human.rds
    ├── MerFish
    │   └── merfish_raw.rds
    ├── Slideseq
    │   ├── SlideseqV2_ROI.rds
    │   └── slideseqv2_mob_raw.rds
    ├── brain10X
    │   └── brain10x_raw.rds
    └── mob
    │   └── mob_raw.rds
├── SpaGene.Rproj
├── Tutorial
    ├── Breastcancer.html
    ├── HDST.html
    ├── MERFISH.html
    ├── mb_anterior_posterior.html
    ├── mbrain_10X.html
    ├── mc_slideseqv2.html
    ├── mob.html
    └── mob_slideseqv2.html
└── man
    ├── FindPattern.Rd
    ├── FindPattern_Multi.Rd
    ├── LRactivity.Rd
    ├── PlotPattern.Rd
    ├── PlotPattern_Multi.Rd
    ├── SpaGene.Rd
    ├── SpaGene_CT.Rd
    ├── SpaGene_LR.Rd
    ├── SpaGene_sparse.Rd
    └── plotLR.Rd


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: SpaGene
 2 | Type: Package
 3 | Title: Identify spatially localized genes and colocalized gene pairs 
 4 | Version: 0.1.0
 5 | Author: Qi Liu
 6 | Maintainer: Qi Liu <qi.liu@vumc.org>
 7 | Description: Characterize spatial patterns from spatial omics data
 8 | License: LGPL
 9 | Encoding: UTF-8
10 | LazyData: true
11 | Depends: R (>= 3.6),Matrix (>= 1.2),ggplot2 (>= 3.3.5),patchwork (>= 1.1)
12 | Imports: 
13 |     RANN (>= 2.6),
14 |     RcppML (>= 0.3.7) 
15 | Suggests:
16 |     RColorBrewer (>= 1.1),
17 | RoxygenNote: 7.1.2
18 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(FindPattern)
 4 | export(FindPattern_Multi)
 5 | export(LRactivity)
 6 | export(PlotPattern)
 7 | export(PlotPattern_Multi)
 8 | export(SpaGene)
 9 | export(SpaGene_CT)
10 | export(SpaGene_LR)
11 | export(SpaGene_sparse)
12 | export(plotLR)
13 | 


--------------------------------------------------------------------------------
/R/SpaGene.R:
--------------------------------------------------------------------------------
  1 | #' Identify spatially variable genes
  2 | #'
  3 | #' @description Identify spatial variable genes based on spatial connectness of spots with high expression compared to random permutation
  4 | 
  5 | #' @param expr gene expression matrix, the row is the gene and the column is the spot/cell
  6 | #' @param location location matrix, the row number of location should match the column number of expr
  7 | #' @param normalize whether to normalize the data (default: TRUE)
  8 | #' @param topn the ratio of spots/cells considered high expression (default: 20 percent of the total spots/cells)
  9 | #' @param knn the number of nearest neighbours to search (default: 8)
 10 | #' @param perm the number of random permutations (default: 500)
 11 | #' @param minN the minimum number of spots/cells with gene expression. Genes expressed equal to or less than minN spots/cells are excluded (default:0)
 12 | #' @param sizefactor the size factor for normalization (default:10000)
 13 | #' @param weight  weights assigned to degree. If NULL, equal weight, wi=1, i is the degree, i=0,1,...2*knn; if "linear", wi=0.5+0.5*i/(2*knn); or weight is a numeric vector of length 2*knn+1 (default:NULL)
 14 | #' @return a list containing results of each gene (spagene_res) and normalized gene expression matrix (normexp)
 15 | 
 16 | #' @export
 17 | 
 18 | SpaGene <- function(expr,location,normalize=T,topn=floor(0.2*dim(location)[1]),knn=8,perm=500,minN=0,sizefactor=10000,weight=NULL) {
 19 |   set.seed(1)
 20 |   expr<-expr[Matrix::rowSums(expr>0)>minN,]
 21 | 
 22 |   ncell<-dim(location)[1]
 23 |   ngene<-dim(expr)[1]
 24 | 
 25 |   if (dim(expr)[2]!=ncell) {stop("the ncol of expr should match the nrow of location")}
 26 | 
 27 |   if (is.null(rownames(expr))){rownames(expr)<- paste0("gene",1:ngene)}
 28 | 
 29 |    nnmatrix<-RANN::nn2(location,k=knn)$nn.idx
 30 | 
 31 |    rand_result<-unlist(lapply(1:perm,function(x){ind<-sample(1:ncell,topn);return(Caldegree(ind,nnmatrix,knn,weight=weight))}))
 32 | 
 33 |    mean_rand<-mean(rand_result)
 34 |    sd_rand<-sd(rand_result)
 35 | 
 36 |    spagene_res<-data.frame(score=rep(NA,ngene),row.names=rownames(expr),stringsAsFactors = FALSE)
 37 | 
 38 | 
 39 |   if (is(expr,"sparseMatrix")){
 40 |     exprt<-Matrix::t(expr)
 41 |     colind<-exprt@i+1
 42 |     dp<-exprt@p
 43 |     expval<-exprt@x
 44 | 
 45 |     if (normalize==TRUE) {
 46 |        lib_size<-Matrix::rowSums(exprt)
 47 |        expval<-log(expval/lib_size[colind]*sizefactor+1)
 48 |        exprt@x<-expval
 49 |        expr<-Matrix::t(exprt)
 50 |      }
 51 | 
 52 | 
 53 | 
 54 |     for (geneind in 1:ngene) {
 55 | 
 56 |       geneexp<-rep(0,ncell)
 57 |       subind<-(dp[geneind]+1):dp[geneind+1]
 58 |       geneexp[colind[subind]]<-expval[subind]
 59 |       highind<-order(geneexp,sample(ncell,ncell),decreasing=T)[1:topn]
 60 |       spagene_res$score[geneind]<-Caldegree(highind,nnmatrix,knn,weight=weight)
 61 | 
 62 | 
 63 |     }
 64 |   } else{
 65 | 
 66 |          if (normalize==TRUE) {
 67 |              expr<-log(t(t(expr)/(colSums(expr))*sizefactor)+1)
 68 |           }
 69 | 
 70 |         for (geneind in 1:ngene) {
 71 |               geneexp<-expr[geneind,]
 72 | 
 73 |               highind<-order(geneexp,sample(ncell,ncell),decreasing=T)[1:topn]
 74 |               spagene_res$score[geneind]<-Caldegree(highind,nnmatrix,knn,weight=weight)
 75 |          }
 76 |     }
 77 | 
 78 | 
 79 | 
 80 | 
 81 |   spagene_res$zval<-(spagene_res$score-mean_rand)/sd_rand
 82 | 
 83 | 
 84 |   spagene_res$pval<-pnorm(spagene_res$score,mean=mean_rand,sd=sd_rand)
 85 |   spagene_res$adjp<-p.adjust(spagene_res$pval,method="BH")
 86 |   return(list(normexp=expr,spagene_res=spagene_res))
 87 | }
 88 | 
 89 | #' Find spatial patterns
 90 | #' @description Find spatial patterns
 91 | #' @param spagene_res result from SpaGene, a list containing normexp and spagene_res
 92 | #' @param cutoff the adjp cutoff to select spatially variable genes (default: 0.01)
 93 | #' @param nPattern the number of patterns (default:8)
 94 | #' @return a list containing the pattern (pattern), gene similarity with the pattern (genepattern), and the pattern weight (patternw)
 95 | #' @export
 96 | 
 97 | FindPattern<-function(spagene_res,cutoff=0.01,nPattern=8){
 98 | 
 99 |   genes<-rownames(spagene_res$spagene_res)[spagene_res$spagene_res$adjp<cutoff]
100 |   data<-spagene_res$normexp
101 |   data_g<-as.matrix(data[rownames(data)%in%genes,])
102 |   set.seed(16)
103 |   model<-RcppML::nmf(data_g,nPattern,verbose = FALSE)
104 |   cellload<-model$h
105 |   patternw<-model$d
106 |   genew<-model$w
107 |   rownames(genew)<-rownames(data_g)
108 |   rownames(cellload)<-paste0("Pattern",1:nPattern)
109 |   genepattern<-cor(t(data_g),t(cellload),method="spearman")
110 |   return(list(pattern=cellload,genepattern=genepattern,patternw=patternw))
111 | }
112 | 
113 | 
114 | #' Identify spatially colocalized ligand-receptor pairs
115 | #'
116 | #' @description Identify spatially colocalized ligand-receptor pairs
117 | 
118 | #' @param expr gene expression matrix, the row is the gene and the column is the spot/cell
119 | #' @param location location matrix, the row number of location should match the column number of expr
120 | #' @param normalize whether to normalize the data (default: TRUE)
121 | #' @param topn the number of spots/cells considered high expression (default: 20 percent of the total spots/cells)
122 | #' @param knn the number of nearest neighbours to search (default: 8)
123 | #' @param perm the number of random permutations (default: 500)
124 | #' @param minN the minimum number of spots/cells with gene expression. Genes expressed equal to or less than minN spots/cells are excluded (default:0)
125 | #' @param sizefactor the size factor for normalization (default:10000)
126 | #' @param LRpair ligand-receptor pair
127 | #' @return a data frame containing the result of each ligand-receptor pair
128 | 
129 | #' @export
130 | 
131 | 
132 | 
133 | SpaGene_LR<-function(expr,location,normalize=T, topn=floor(0.2*dim(location)[1]),knn=8,perm=500,minN=0,sizefactor=10000,LRpair=LRpair) {
134 | 
135 |   set.seed(1)
136 |   expr<-expr[Matrix::rowSums(expr>0)>minN,]
137 | 
138 |   ncell<-dim(location)[1]
139 | 
140 | 
141 |   if (dim(expr)[2]!=ncell) {stop("the ncol of expr should match the nrow of location")}
142 | 
143 |   if (is.null(rownames(expr))){rownames(expr)<- paste0("gene",1:ngene)}
144 | 
145 |   nnmatrix<-RANN::nn2(location,k=knn)$nn.idx
146 | 
147 |   rand_result<-unlist(lapply(1:perm,function(x){return(Caldegree_pair(sample(1:ncell,topn),sample(1:ncell,topn),nnmatrix,knn))}))
148 | 
149 |   mean_rand<-mean(rand_result)
150 |   sd_rand<-sd(rand_result)
151 | 
152 |   if (normalize==TRUE) {
153 |     expr<-Matrix::t(Matrix::t(expr)/(Matrix::colSums(expr))*sizefactor)
154 |   }
155 | 
156 |   npair<-dim(LRpair)[1]
157 |   lr_result<-data.frame(score=rep(NA,npair),comm=rep(NA,npair),row.names=rownames(LRpair),stringsAsFactors = FALSE)
158 | 
159 |   for (pairid in 1:dim(LRpair)[1]) {
160 |     ligand<-LRpair[pairid,1]
161 |     receptor<-LRpair[pairid,2]
162 |     if (sum(rownames(expr) %in% c(ligand,receptor))==2) {
163 |       ligandind<-order(expr[rownames(expr)==ligand,],sample(ncell,ncell),decreasing=T)[1:topn]
164 |       receptorind<-order(expr[rownames(expr)==receptor,],sample(ncell,ncell),decreasing=T)[1:topn]
165 |       lr_result$score[pairid]<-Caldegree_pair(ligandind,receptorind,nnmatrix,knn)
166 |       lr_result$comm[pairid]<-length(intersect(ligandind,receptorind))
167 |     }
168 |   }
169 | 
170 | 
171 | 
172 |   lr_result<-lr_result[!is.na(lr_result$score),]
173 | 
174 |   lr_result$zval<-(lr_result$score-mean_rand)/sd_rand
175 | 
176 | 
177 |   lr_result$pval<-pnorm(lr_result$score,mean=mean_rand,sd=sd_rand)
178 |   lr_result$adjp<-p.adjust(lr_result$pval,method="BH")
179 | 
180 |   return(lr_result)
181 | 
182 | }
183 | 
184 | 
185 | #' Identify spatially variable genes for extremely sparse data
186 | #'
187 | #' @description Identify spatial variable genes based on spatial connectness of spots/cells with high expression. For genes with different sparsity level, the function adjusts the neighborhood search region automatically.
188 | 
189 | #' @param expr gene expression matrix, the row is the gene and the column is the spot/cell
190 | #' @param location location matrix, the row number of location should match the column number of expr
191 | #' @param normalize whether to normalize the data (default: TRUE)
192 | #' @param maxN the maximum number of spots/cells considered high expression (default: 10 percent of the total spots/cells)
193 | #' @param minN the minimum number of spots/cells considered high expression (default: 50. genes with less than 50 cells/spots expressed are excluded)
194 | #' @param perm the number of random permutations (default: 500)
195 | #' @param sizefactor the size factor for normalization (default:10000)
196 | #' @param weight  weights assigned to degree. If NULL, equal weight, wi=1, i is the degree, i=0,1,...2*knn; if "linear", wi=0.5+0.5*i/(2*knn); or weight can be a numeric vector of length 2*knn+1 (default:NULL)
197 | #' @return a list containing results of each gene (spagene_res) and normalized gene expression matrix (normexp)
198 | 
199 | #' @export
200 | 
201 | SpaGene_sparse<-function(expr,location,normalize=TRUE,maxN=floor(0.1*dim(location)[1]),minN=50,perm=500,sizefactor=10000,weight=NULL) {
202 | 
203 |   expr<-expr[Matrix::rowSums(expr>0)>minN,]
204 | 
205 |   ncell<-dim(location)[1]
206 |   ngene<-dim(expr)[1]
207 | 
208 |   if (dim(expr)[2]!=ncell) {stop("the ncol of expr should match the nrow of location")}
209 | 
210 |   if (is.null(rownames(expr))){rownames(expr)<- paste0("gene",1:ngene)}
211 | 
212 |   num<-round(log2(maxN/minN))+1
213 |   topn<-minN*2^(seq(0,num-1,1))
214 |   knn<-rev(cumsum(seq(8,8*num,8)))
215 | 
216 |   mean_rand<-sd_rand<-rep(0,num)
217 |   nnmatrix<-list()
218 | 
219 |   for (i in 1:num) {
220 |     nnmatrix[[i]]<-RANN::nn2(location,k=knn[i])$nn.idx
221 | 
222 |     ##the permutation result
223 | 
224 |     rand_result<-unlist(lapply(1:perm,function(x){ind<-sample(1:ncell,topn[i]);return(Caldegree(ind,nnmatrix[[i]],knn[i],weight=weight))}))
225 | 
226 |     mean_rand[i]<-mean(rand_result)
227 |     sd_rand[i]<-sd(rand_result)
228 | 
229 |   }
230 | 
231 |   spagene_res<-data.frame(score=rep(NA,ngene),zval=rep(NA,ngene),pval=rep(NA,ngene),row.names=rownames(expr),stringsAsFactors = FALSE)
232 | 
233 |   if (is(expr,"sparseMatrix")){
234 |     exprt<-Matrix::t(expr)
235 |     colind<-exprt@i+1
236 |     dp<-exprt@p
237 |     expval<-exprt@x
238 | 
239 |     if (normalize==TRUE) {
240 |         lib_size<-Matrix::rowSums(exprt)
241 |         expval<-log(expval/lib_size[colind]*sizefactor+1)
242 |         exprt@x<-expval
243 |         expr<-Matrix::t(exprt)
244 |       }
245 | 
246 | 
247 |     for (geneind in 1:ngene) {
248 | 
249 | 
250 |        ind<-max(which(topn<(dp[geneind+1]-dp[geneind])))
251 |        geneexp<-rep(0,ncell)
252 |        valind<-(dp[geneind]+1):dp[geneind+1]
253 |        geneexp[colind[valind]]<-expval[valind]
254 | 
255 |        highind<-order(geneexp,sample(ncell,ncell),decreasing=T)[1:topn[ind]]
256 | 
257 |        spagene_res$score[geneind]<-high<-Caldegree(highind,nnmatrix[[ind]],knn[ind],weight=weight)
258 |        spagene_res$zval[geneind]<-(high-mean_rand[ind])/sd_rand[ind]
259 |        spagene_res$pval[geneind]<-pnorm(high,mean=mean_rand[ind],sd=sd_rand[ind])
260 |     }
261 | 
262 |   }else{
263 | 
264 |     if (normalize==TRUE) {
265 |         expr<-log(t(t(expr)/(colSums(expr))*sizefactor)+1)
266 |       }
267 |       for (geneind in 1:ngene) {
268 | 
269 |          geneexp<-expr[geneind,]
270 |          ind<-max(which(topn<sum(geneexp>0)))
271 | 
272 |          highind<-order(geneexp,sample(ncell,ncell),decreasing=T)[1:topn[ind]]
273 | 
274 |         spagene_res$score[geneind]<-high<-Caldegree(highind,nnmatrix[[ind]],knn[ind],weight=weight)
275 |         spagene_res$zval[geneind]<-(high-mean_rand[ind])/sd_rand[ind]
276 |         spagene_res$pval[geneind]<-pnorm(high,mean=mean_rand[ind],sd=sd_rand[ind])
277 |        }
278 | 
279 |     }
280 |     spagene_res$adjp<-p.adjust(spagene_res$pval,method="BH")
281 |     return(list(normexp=expr,spagene_res=spagene_res))
282 |  }
283 | 
284 | 
285 | #' Identify spatially variable genes within the same cell type
286 | #'
287 | #' @description Identify spatial variable genes within the same cell type
288 | 
289 | #' @param expr gene expression matrix, the row is the gene and the column is the spot/cell
290 | #' @param location location matrix, the row number of location should match the column number of expr
291 | #' @param CellType the cell type, the length should match the column number of locations
292 | #' @param normalize whether to normalize the data (default: TRUE)
293 | #' @param top the maximum ratio of spots/cells in the same cell type considered high expression (default: 20 percent of the spots/cells within a cell type, 10 is used if top is less than 10)
294 | #' @param knn the number of nearest neighbours to search (default: 8)
295 | #' @param minN the minimum number of spots/cells  (default: 0. genes with less than or equal to minN cells/spots expressed are excluded)
296 | #' @param perm the number of random permutations (default: 500)
297 | #' @param weight  weights assigned to degree. If NULL, equal weight, wi=1, i is the degree, i=0,1,...2*knn; if "linear", wi=0.5+0.5*i/(2*knn); or weight is a numeric vector of length 2*knn+1 (default:NULL)
298 | 
299 | #' @return a data frame containing results of each gene in each cell type
300 | 
301 | #' @export
302 | 
303 | SpaGene_CT<-function(expr,location,CellType, normalize=T,top=0.2,knn=8,minN=0,perm=500,weight=NULL) {
304 | 
305 | 
306 |   expr<-expr[Matrix::rowSums(expr>0)>minN,]
307 | 
308 | 
309 |   ncell<-dim(location)[1]
310 |   ngene<-dim(expr)[1]
311 | 
312 |   CT<- (unique(CellType))
313 | 
314 |   if (dim(expr)[2]!=ncell) {stop("the ncol of expr should match the nrow of location ")}
315 |   if (length(CellType)!=ncell) {stop ("the cell type length should match the nrow of location")}
316 |   if (is.null(rownames(expr))){rownames(expr)<- paste0("gene",1:ngene)}
317 | 
318 |   if (normalize==TRUE) {expr<-Matrix::t(Matrix::t(expr)/(Matrix::colSums(expr)))}
319 | 
320 |   nnmatrix<-RANN::nn2(location,k=knn)$nn.idx
321 | 
322 | 
323 |   if (is(expr,"sparseMatrix")){
324 |     exprt<-Matrix::t(expr)
325 |     colind<-exprt@i+1
326 |     dp<-exprt@p
327 |     expval<-exprt@x
328 |   }
329 | 
330 | 
331 | 
332 | 
333 |   spa_ct_result<-NULL
334 | 
335 |   for ( nCT in 1: length(CT)) {
336 | 
337 | 
338 |     celltypeid<- which(CellType==CT[nCT])
339 |     if (length(celltypeid)>10) {
340 | 
341 |       topn<-max(floor(length(celltypeid)*top),10)
342 | 
343 |       rand_result<-unlist(lapply(1:perm,function(x){ind<-sample(celltypeid,topn);return(Caldegree(ind,nnmatrix,knn,weight=weight))}))
344 |       mean_rand<-mean(rand_result)
345 |       sd_rand<-sd(rand_result)
346 | 
347 |       result<-data.frame(score=rep(NA,ngene),names=rownames(expr),CT=CT[nCT],stringsAsFactors = FALSE)
348 |       for (geneind in 1:ngene) {
349 | 
350 | 
351 |         if (is(expr,"sparseMatrix")){
352 |           geneexp<-rep(0,ncell)
353 |           ind<-(dp[geneind]+1):dp[geneind+1]
354 |           geneexp[colind[ind]]<-expval[ind]
355 | 
356 |         } else{  geneexp<-expr[geneind,]}
357 | 
358 |         highind<-order(geneexp,sample(ncell,ncell),decreasing=T)
359 |         highind<-highind[highind %in% celltypeid] [1:topn]
360 |         result$score[geneind]<-Caldegree(highind,nnmatrix,knn,weight=weight)
361 |       }
362 | 
363 | 
364 |       result$zval<-(result$score-mean_rand)/sd_rand
365 |       result$pval<-pnorm(result$score,mean=mean_rand,sd=sd_rand)
366 |       result$adjp<-p.adjust(result$pval,method="BH")
367 |       spa_ct_result<-rbind(spa_ct_result,result)
368 |     }
369 |   }
370 | 
371 | 
372 |   return(spa_ct_result)
373 | }
374 | 
375 | 
376 | 
377 | 
378 | #' Plot patterns
379 | #'
380 | #' @description plot patterns from spatially variable genes
381 | 
382 | #' @param pattern pattern result from FindPattern
383 | #' @param location location matrix
384 | #' @param max.cutoff the maximum value cutoff (default:0.9)
385 | #' @param pt.size the point size (default:2)
386 | #' @param alpha.min the alpha value of the minimum value (default:0.1)
387 | #' @return a list of ggplot
388 | #' @export
389 | 
390 | PlotPattern<-function(pattern,location,max.cutoff=0.9,pt.size=2,alpha.min=0.1) {
391 | 
392 |   if(!requireNamespace("RColorBrewer", quietly = TRUE)){install.packages("RColorBrewer")}
393 | 
394 |    colnames(location)<-c("x","y")
395 |    npattern<-dim(pattern$pattern)[1]
396 |    plist<-list()
397 | 
398 | 
399 |   for (i in 1:npattern) {
400 | 
401 |     feature=pattern$pattern[i,]
402 |     max.use<-quantile(feature,max.cutoff)
403 |     feature[feature>max.use]<-max.use
404 |     alpha=(feature-min(feature))/(max(feature)-min(feature))*(1-alpha.min)+alpha.min
405 |     tmp<-as.data.frame(cbind(location,exp=feature,alpha=alpha))
406 | 
407 |     p1<-ggplot(tmp,aes(x=x,y=y,col=exp,alpha=alpha))+geom_point(size=pt.size)+scale_y_reverse()+scale_color_gradientn(colours=rev(RColorBrewer::brewer.pal(n = 10, name = "RdYlBu")))+xlab("")+ylab("")+theme(axis.line=element_blank(),axis.text.x=element_blank(), axis.text.y=element_blank(),axis.ticks.x=element_blank(),axis.ticks.y=element_blank())+guides(color = "none", alpha = "none")+ggtitle(paste0("Pattern",i))
408 |     plist[[i]]<-p1
409 | 
410 |   }
411 |   patchwork::wrap_plots(plist)
412 | }
413 | 
414 | #' plot one specific ligand-receptor pair
415 | #'
416 | #' @description plot one specific ligand-receptor pair to find the colocalized region
417 | 
418 | #' @param expr gene expression matrix, the row is the gene and the column is the spot/cell
419 | #' @param location location matrix, the row number of location should match the column number of expr
420 | #' @param normalize whether to normalize the data (default: TRUE)
421 | #' @param topn the number of spots/cells considered high expression (default: 20 percent of the total spots/cells)
422 | #' @param knn the number of nearest neighbours to search (default: 8)
423 | #' @param LRpair the ligand-receptor pair for plot
424 | #' @param pt.size the point size (default:2)
425 | #' @param alpha.min the alpha for the minimum value (default:0.1)
426 | #' @param max.cut the maximum cutoff for the LR activity
427 | #' @return a data frame containing the result of each ligand-receptor pair
428 | #' @export
429 | 
430 | plotLR<-function(expr,location,normalize=T,topn=floor(0.2*dim(location)[1]),knn=8,LRpair=c("Ptn","Ptprz1"),pt.size=2,alpha.min=0.1,max.cut=0.95){
431 |   if (sum(rownames(expr) %in% LRpair)!=2) { stop("ligand or receptor are not expressed")}
432 |   nnmatrix<-RANN::nn2(location,k=knn)$nn.idx
433 |   countsum<-Matrix::colSums(expr)
434 | 
435 |   ncell<-dim(expr)[2]
436 |   if (normalize==TRUE) {
437 |     expr<-Matrix::t(log(Matrix::t(expr)/countsum*median(countsum)+1))
438 |   }
439 | 
440 | 
441 |   ligand<-expr[LRpair[1],]
442 |   receptor<-expr[LRpair[2],]
443 |   LRexp<-rbind(ligand,receptor)
444 |   neighexp<-apply(nnmatrix,1,function(x){apply(LRexp[,x[2:knn]],1,max)})
445 | 
446 |   #LRexp<-t(scale(t(LRexp)))
447 |   #neighexp<-t(scale(t(neighexp)))
448 |   #LRexp[LRexp<0]<-0
449 |   #neighexp[neighexp<0]<-0
450 |   LRadd<-pmax(LRexp[1,]*neighexp[2,],LRexp[2,]*neighexp[1,])
451 |   LRadd_max<-quantile(LRadd,probs=max.cut)
452 |   LRadd[LRadd>LRadd_max]<-LRadd_max
453 |   if (sum(ligand>0)>topn) {n1<-order(ligand,sample(ncell,ncell),decreasing=T)[1:topn]} else{n1<-which(ligand>0)}
454 |   if (sum(receptor>0)>topn) {n2<-order(receptor,sample(ncell,ncell),decreasing=T)[1:topn]} else{n2<-which(receptor>0)}
455 |   expcol<-rep(0,ncell)
456 |   expcol[n1]<-1
457 |   expcol[n2]<-2
458 |   expcol[intersect(n1,n2)]<-3
459 |   tmp<-data.frame(x=location[,1],y=location[,2],Exp=as.factor(expcol))
460 |   tmpLRadd<-data.frame(x=location[,1],y=location[,2],LR=LRadd)
461 | 
462 |   alpha=(LRadd-min(LRadd))/(max(LRadd)-min(LRadd))*(1-alpha.min)+alpha.min
463 | 
464 |   p1<-ggplot(tmp,aes(x=x,y=y,col=Exp))+geom_point(size=pt.size)+scale_color_manual(values=c("gray","red","green","blue"),labels=c("Both low","Ligand high","Receptor High","Both High"))+ggtitle(paste0(LRpair,collapse="_"))+xlab("")+ylab("")+theme(axis.line=element_blank(),axis.text.x=element_blank(), axis.text.y=element_blank(),axis.ticks.x=element_blank(),axis.ticks.y=element_blank())
465 |   p2<-ggplot(tmpLRadd,aes(x=x,y=y,col=LR))+geom_point(size=pt.size,alpha=alpha)+scale_color_gradient2(midpoint=quantile(LRadd,probs=0.5),low="gray",high="red",mid="gray")+xlab("")+ylab("")+theme(axis.line=element_blank(),axis.text.x=element_blank(), axis.text.y=element_blank(),axis.ticks.x=element_blank(),axis.ticks.y=element_blank())+labs(color = "LR")
466 |   p1+p2&scale_y_reverse()
467 | }
468 | 
469 | 
470 | Caldegree<-function(nodelist,nnmatrix,knnnum,weight=NULL) {
471 | 
472 |   nodenum<-length(nodelist)
473 | 
474 |   nnmatrix_sub<-nnmatrix[nodelist,-1]
475 | 
476 | 
477 |   if (!is.null(weight) ) {
478 |     if( is.character(weight)){
479 |       if (weight=="linear")
480 |          weight<-0.5+0:(knnnum*2)/(knnnum*2)*0.5 }else {
481 | 
482 |         if (length(weight)!=2*knnnum+1 & is.numeric(weight))  {stop("the weight should be a numeric vector and its length should be equal to 2*k+1")}
483 |       }
484 |   }
485 |   if(is.null(weight)) {
486 |     num_edge<-sum(!is.na(match(nnmatrix_sub,nodelist)))
487 |     dis<-2*knnnum-num_edge/nodenum
488 |     return(dis)
489 |   }else {
490 | 
491 |     deg<-rep(0,nodenum)
492 | 
493 | 
494 |     matchres<-matrix(match(nnmatrix_sub,nodelist),ncol=knnnum-1,byrow=F)
495 | 
496 |     ind<-!is.na(matchres)
497 |     deg<-deg+rowSums(ind)
498 | 
499 |     matchres<-matchres[ind]
500 |     for (i in 1:length(matchres)) deg[matchres[i]]<-deg[matchres[i]]+1
501 | 
502 |     dis<-sum(cumsum(tabulate(deg+1,nbins=2*knnnum+1)/nodenum*weight))
503 |     return(dis)
504 |   }
505 | }
506 | 
507 | 
508 | Caldegree_pair<-function(nodelist1,nodelist2,nnmatrix,knnnum) {
509 |   nodenum<-length(nodelist1)
510 | 
511 |   nnmatrix_sub<-nnmatrix[nodelist1,-1]
512 | 
513 |   deg1<-rep(0,nodenum)
514 |   matchres<-matrix(match(nnmatrix_sub,nodelist2),ncol=knnnum-1,byrow=F)
515 |   ind<-!is.na(matchres)
516 |   deg1<-deg1+rowSums(ind)
517 |   deg2<-rep(0,length(nodelist2))
518 |   matchres<-matchres[ind]
519 |   for (i in 1:length(matchres)) deg2[matchres[i]]<-deg2[matchres[i]]+1
520 | 
521 |   deg<-c(deg1,deg2)
522 | 
523 |   dis<-sum(cumsum(tabulate(deg+1,nbins=2*knnnum+1)/nodenum))
524 |   return(dis)
525 | }
526 | 
527 | 
528 | 
529 | #' calculate the activity for each LR pair
530 | #'
531 | #' @description calcuate the LR activity
532 | #' @param expr gene expression matrix, the row is the gene and the column is the spot/cell
533 | #' @param location location matrix, the row number of location should match the column number of expr
534 | #' @param normalize whether to normalize the data (default: TRUE)
535 | #' @param knn the number of nearest neighbours to search (default: 8)
536 | #' @param LRpair the ligand-receptor pair for plot
537 | #' @return a data matrix with LR activity in each location, row is LR pair, column is location.
538 | #' @export
539 | LRactivity<-function (expr, location, normalize = T, knn = 8, LRpair = LRpair) {
540 | 
541 |   nnmatrix <- RANN::nn2(location, k = knn)$nn.idx
542 |   countsum <- Matrix::colSums(expr)
543 |   ncell <- dim(expr)[2]
544 |   if (normalize == TRUE) {
545 |     expr <- Matrix::t(log(Matrix::t(expr)/countsum * median(countsum) +
546 |                             1))
547 |   }
548 | 
549 |   Lrlist<-unique(c(LRpair[,1],LRpair[,2]))
550 | 
551 |   lr_exp<-as.matrix(expr[rownames(expr)%in%Lrlist,])
552 | 
553 | 
554 | 
555 | 
556 | 
557 | 
558 |   # lr_expneigh<-apply(nnmatrix,1,function(x){rowMeans(lr_exp[,x[2:knn[1]]])})
559 | 
560 | 
561 |   lr_expneigh<-apply(nnmatrix,1,function(x){apply(lr_exp[,x[2:knn[1]]],1,max)})
562 |   LRactivity<-NULL
563 | 
564 |   for (lrind in 1:dim(LRpair)[1]){
565 |     if ( sum( rownames(lr_exp) %in% LRpair[lrind,1:2])==2){
566 |       Lexp<-lr_exp[LRpair[lrind,1],]
567 |       Rexp<-lr_exp[LRpair[lrind,2],]
568 |       Lexp_nn<-lr_expneigh[LRpair[lrind,1],]
569 |       Rexp_nn<-lr_expneigh[LRpair[lrind,2],]
570 |       LRadd<-pmax(Lexp *Rexp_nn,Rexp*Lexp_nn)
571 |       LRactivity<-rbind(LRactivity,LRadd)
572 | 
573 |     }
574 | 
575 |   }
576 |   return(LRactivity)
577 | }
578 | 
579 | 
580 | #' Find spatial patterns across multiple samples
581 | #' @description Find spatial patterns across multiple samples
582 | #' @param spagene_list a list of results from SpaGene, each result containing normexp and spagene_res
583 | #' @param cutoff the adjp cutoff to select spatially variable genes (default: 0.01)
584 | #' @param nPattern the number of patterns (default:12)
585 | #' @return a list containing the pattern (pattern), gene similarity with the pattern (genepattern), and the pattern weight (patternw)
586 | #' @export
587 | 
588 | 
589 | FindPattern_Multi<-function(spagene_list,cutoff=0.01,nPattern=12){
590 |   spageneres<-spagene_list[[1]]$spagene_res
591 |   genes<-rownames(spageneres)[spageneres$adjp<cutoff]
592 |   data<-spagene_list[[1]]$normexp
593 |   for (i in 2:length(spagene_list)){
594 |     spageneres<-spagene_list[[i]]$spagene_res
595 |     genes<-unique(c(genes,rownames(spageneres)[spageneres$adjp<cutoff]))
596 |     tmpdata<-spagene_list[[i]]$normexp
597 |     data<-merge(data,tmpdata,by=0)
598 |     rownames(data)<-data[,1]
599 |     data<-data[,-1]
600 | 
601 |   }
602 | 
603 |   data_g<-as.matrix(data[rownames(data)%in%genes,])
604 |   set.seed(16)
605 |   model<-RcppML::nmf(data_g,nPattern,verbose = FALSE)
606 |   cellload<-model$h
607 |   patternw<-model$d
608 |   genew<-model$w
609 |   rownames(genew)<-rownames(data_g)
610 |   rownames(cellload)<-paste0("Pattern",1:nPattern)
611 |   genepattern<-cor(t(data_g),t(cellload),method="spearman")
612 |   return(list(pattern=cellload,genepattern=genepattern,patternw=patternw))
613 | }
614 | 
615 | 
616 | #' Plot patterns across multiple samples
617 | #'
618 | #' @description plot patterns from spatially variable genes from multiple samples
619 | 
620 | #' @param pattern pattern result from FindPattern_Multi
621 | #' @param location a list of location matrix
622 | #' @param max.cutoff the maximum value cutoff (default:0.9)
623 | #' @param pt.size the point size (default:2)
624 | #' @param alpha.min the alpha value of the minimum value (default:0.1)
625 | #' @return a list of ggplot
626 | #' @export
627 | PlotPattern_Multi<-function(pattern,locationlist,patternid=1,max.cutoff=0.9,pt.size=2,alpha.min=0.1) {
628 | 
629 |   if(!requireNamespace("RColorBrewer", quietly = TRUE)){install.packages("RColorBrewer")}
630 | 
631 | 
632 | 
633 |   plist<-list()
634 |   locind<-1
635 |   maxoverall<-quantile(pattern$pattern[patternid,],max.cutoff)
636 | 
637 |   for (i in 1:length(locationlist)) {
638 | 
639 |     location<-locationlist[[i]]
640 |     colnames(location)<-c("x","y")
641 | 
642 |     feature=pattern$pattern[patternid,locind:(locind+nrow(location)-1)]
643 |     max.use<-min(quantile(feature,max.cutoff),maxoverall)
644 | 
645 |     feature[feature>max.use ]<-max.use
646 |     alpha=(feature-min(feature))/(max(feature)-min(feature))*(1-alpha.min)+alpha.min
647 |     tmp<-as.data.frame(cbind(location,exp=feature,alpha=alpha))
648 | 
649 |     p1<-ggplot(tmp,aes(x=x,y=y,col=exp,alpha=alpha))+geom_point(size=pt.size)+scale_y_reverse()+scale_color_gradientn(limits=c(0,maxoverall),colours=rev(RColorBrewer::brewer.pal(n = 10, name = "RdYlBu")))+xlab("")+ylab("")+theme(axis.line=element_blank(),axis.text.x=element_blank(), axis.text.y=element_blank(),axis.ticks.x=element_blank(),axis.ticks.y=element_blank())+guides( color="none",alpha = "none")+ggtitle(paste0("Sample ",i,":Pattern",patternid))
650 |     plist[[i]]<-p1
651 |     locind<-locind+nrow(location)
652 | 
653 |   }
654 |   patchwork::wrap_plots(plist)
655 | }
656 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # SpaGene
 2 | ==========
 3 | * [Introduction](#introduction)
 4 | * [Installation](#installation)
 5 | * [Citation](#citation)
 6 | * [Tutorial](#tutorial)
 7 | 
 8 | <a name="introduction"/>
 9 | 
10 | # Introduction
11 | 
12 | SpaGene is a R package to find spatially variable genes and colocalized gene pairs from spatial omics data.
13 | 
14 | <a name="installation"/>
15 | 
16 | # Installation
17 | 
18 | ```R
19 | 
20 | library(devtools)
21 | install_github("liuqivandy/SpaGene")
22 | ```
23 | 
24 | 
25 | <a name="citation"/>
26 | 
27 | # Citation
28 | 
29 |  Liu Q, Hsu Chih-Yuan, Shyr Yu. Scalable and model-free detection of spatial patterns and colocalization, Genome Research,2022(doi: 10.1101/gr.276851.122) 
30 |  
31 | 
32 | <a name="tutoral"/>
33 | 
34 | # Tutorial
35 | - [Analyze spatial transcriptomics MOB data](https://htmlpreview.github.io/?https://github.com/liuqivandy/SpaGene/blob/master/Tutorial/mob.html)
36 | - [Analyze MERFISH data](https://htmlpreview.github.io/?https://github.com/liuqivandy/SpaGene/blob/master/Tutorial/MERFISH.html)
37 | - [Analyze Slideseq V2 mouse cerebellum data](https://htmlpreview.github.io/?https://github.com/liuqivandy/SpaGene/blob/master/Tutorial/mc_slideseqv2.html)
38 | - [Analyze 10X Visium mouse brain data](https://htmlpreview.github.io/?https://github.com/liuqivandy/SpaGene/blob/master/Tutorial/mbrain_10X.html)
39 | - [Analyze HDST MOB data](https://htmlpreview.github.io/?https://github.com/liuqivandy/SpaGene/blob/master/Tutorial/HDST.html)
40 | - [Analyze spatial transcriptomics Breat Cancer data](https://htmlpreview.github.io/?https://github.com/liuqivandy/SpaGene/blob/master/Tutorial/Breastcancer.html)
41 | - [Analyze Slideseq V2 MOB data](https://htmlpreview.github.io/?https://github.com/liuqivandy/SpaGene/blob/master/Tutorial/mob_slideseqv2.html)
42 | - [Analyze two 10X Visum mouse brain data: anterior and posterior regions](https://htmlpreview.github.io/?https://github.com/liuqivandy/SpaGene/blob/master/Tutorial/mb_anterior_posterior.html)
43 | 


--------------------------------------------------------------------------------
/Rawdata/BreastCancer/bc_raw.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuqivandy/SpaGene/7606d98af6f350d920b2e448fef0e47171513b6e/Rawdata/BreastCancer/bc_raw.rds


--------------------------------------------------------------------------------
/Rawdata/HDST/CN24_D1_unmodgtf_filtered_red_ut_HDST_final_clean.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuqivandy/SpaGene/7606d98af6f350d920b2e448fef0e47171513b6e/Rawdata/HDST/CN24_D1_unmodgtf_filtered_red_ut_HDST_final_clean.rds


--------------------------------------------------------------------------------
/Rawdata/LRpair_human.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuqivandy/SpaGene/7606d98af6f350d920b2e448fef0e47171513b6e/Rawdata/LRpair_human.rds


--------------------------------------------------------------------------------
/Rawdata/MerFish/merfish_raw.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuqivandy/SpaGene/7606d98af6f350d920b2e448fef0e47171513b6e/Rawdata/MerFish/merfish_raw.rds


--------------------------------------------------------------------------------
/Rawdata/Slideseq/SlideseqV2_ROI.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuqivandy/SpaGene/7606d98af6f350d920b2e448fef0e47171513b6e/Rawdata/Slideseq/SlideseqV2_ROI.rds


--------------------------------------------------------------------------------
/Rawdata/Slideseq/slideseqv2_mob_raw.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuqivandy/SpaGene/7606d98af6f350d920b2e448fef0e47171513b6e/Rawdata/Slideseq/slideseqv2_mob_raw.rds


--------------------------------------------------------------------------------
/Rawdata/brain10X/brain10x_raw.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuqivandy/SpaGene/7606d98af6f350d920b2e448fef0e47171513b6e/Rawdata/brain10X/brain10x_raw.rds


--------------------------------------------------------------------------------
/Rawdata/mob/mob_raw.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuqivandy/SpaGene/7606d98af6f350d920b2e448fef0e47171513b6e/Rawdata/mob/mob_raw.rds


--------------------------------------------------------------------------------
/SpaGene.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 | 


--------------------------------------------------------------------------------
/man/FindPattern.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpaGene.R
 3 | \name{FindPattern}
 4 | \alias{FindPattern}
 5 | \title{Find spatial patterns}
 6 | \usage{
 7 | FindPattern(spagene_res, cutoff = 0.01, nPattern = 8)
 8 | }
 9 | \arguments{
10 | \item{spagene_res}{result from SpaGene, a list containing normexp and spagene_res}
11 | 
12 | \item{cutoff}{the adjp cutoff to select spatially variable genes (default: 0.01)}
13 | 
14 | \item{nPattern}{the number of patterns (default:8)}
15 | }
16 | \value{
17 | a list containing the pattern (pattern), gene similarity with the pattern (genepattern), and the pattern weight (patternw)
18 | }
19 | \description{
20 | Find spatial patterns
21 | }
22 | 


--------------------------------------------------------------------------------
/man/FindPattern_Multi.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpaGene.R
 3 | \name{FindPattern_Multi}
 4 | \alias{FindPattern_Multi}
 5 | \title{Find spatial patterns across multiple samples}
 6 | \usage{
 7 | FindPattern_Multi(spagene_list, cutoff = 0.01, nPattern = 12)
 8 | }
 9 | \arguments{
10 | \item{spagene_list}{a list of results from SpaGene, each result containing normexp and spagene_res}
11 | 
12 | \item{cutoff}{the adjp cutoff to select spatially variable genes (default: 0.01)}
13 | 
14 | \item{nPattern}{the number of patterns (default:12)}
15 | }
16 | \value{
17 | a list containing the pattern (pattern), gene similarity with the pattern (genepattern), and the pattern weight (patternw)
18 | }
19 | \description{
20 | Find spatial patterns across multiple samples
21 | }
22 | 


--------------------------------------------------------------------------------
/man/LRactivity.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpaGene.R
 3 | \name{LRactivity}
 4 | \alias{LRactivity}
 5 | \title{calculate the activity for each LR pair}
 6 | \usage{
 7 | LRactivity(expr, location, normalize = T, knn = 8, LRpair = LRpair)
 8 | }
 9 | \arguments{
10 | \item{expr}{gene expression matrix, the row is the gene and the column is the spot/cell}
11 | 
12 | \item{location}{location matrix, the row number of location should match the column number of expr}
13 | 
14 | \item{normalize}{whether to normalize the data (default: TRUE)}
15 | 
16 | \item{knn}{the number of nearest neighbours to search (default: 8)}
17 | 
18 | \item{LRpair}{the ligand-receptor pair for plot}
19 | }
20 | \value{
21 | a data matrix with LR activity in each location, row is LR pair, column is location.
22 | }
23 | \description{
24 | calcuate the LR activity
25 | }
26 | 


--------------------------------------------------------------------------------
/man/PlotPattern.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpaGene.R
 3 | \name{PlotPattern}
 4 | \alias{PlotPattern}
 5 | \title{Plot patterns}
 6 | \usage{
 7 | PlotPattern(pattern, location, max.cutoff = 0.9, pt.size = 2, alpha.min = 0.1)
 8 | }
 9 | \arguments{
10 | \item{pattern}{pattern result from FindPattern}
11 | 
12 | \item{location}{location matrix}
13 | 
14 | \item{max.cutoff}{the maximum value cutoff (default:0.9)}
15 | 
16 | \item{pt.size}{the point size (default:2)}
17 | 
18 | \item{alpha.min}{the alpha value of the minimum value (default:0.1)}
19 | }
20 | \value{
21 | a list of ggplot
22 | }
23 | \description{
24 | plot patterns from spatially variable genes
25 | }
26 | 


--------------------------------------------------------------------------------
/man/PlotPattern_Multi.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpaGene.R
 3 | \name{PlotPattern_Multi}
 4 | \alias{PlotPattern_Multi}
 5 | \title{Plot patterns across multiple samples}
 6 | \usage{
 7 | PlotPattern_Multi(
 8 |   pattern,
 9 |   locationlist,
10 |   patternid = 1,
11 |   max.cutoff = 0.9,
12 |   pt.size = 2,
13 |   alpha.min = 0.1
14 | )
15 | }
16 | \arguments{
17 | \item{pattern}{pattern result from FindPattern_Multi}
18 | 
19 | \item{max.cutoff}{the maximum value cutoff (default:0.9)}
20 | 
21 | \item{pt.size}{the point size (default:2)}
22 | 
23 | \item{alpha.min}{the alpha value of the minimum value (default:0.1)}
24 | 
25 | \item{location}{a list of location matrix}
26 | }
27 | \value{
28 | a list of ggplot
29 | }
30 | \description{
31 | plot patterns from spatially variable genes from multiple samples
32 | }
33 | 


--------------------------------------------------------------------------------
/man/SpaGene.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpaGene.R
 3 | \name{SpaGene}
 4 | \alias{SpaGene}
 5 | \title{Identify spatially variable genes}
 6 | \usage{
 7 | SpaGene(
 8 |   expr,
 9 |   location,
10 |   normalize = T,
11 |   topn = floor(0.2 * dim(location)[1]),
12 |   knn = 8,
13 |   perm = 500,
14 |   minN = 0,
15 |   sizefactor = 10000,
16 |   weight = NULL
17 | )
18 | }
19 | \arguments{
20 | \item{expr}{gene expression matrix, the row is the gene and the column is the spot/cell}
21 | 
22 | \item{location}{location matrix, the row number of location should match the column number of expr}
23 | 
24 | \item{normalize}{whether to normalize the data (default: TRUE)}
25 | 
26 | \item{topn}{the ratio of spots/cells considered high expression (default: 20 percent of the total spots/cells)}
27 | 
28 | \item{knn}{the number of nearest neighbours to search (default: 8)}
29 | 
30 | \item{perm}{the number of random permutations (default: 500)}
31 | 
32 | \item{minN}{the minimum number of spots/cells with gene expression. Genes expressed equal to or less than minN spots/cells are excluded (default:0)}
33 | 
34 | \item{sizefactor}{the size factor for normalization (default:10000)}
35 | 
36 | \item{weight}{weights assigned to degree. If NULL, equal weight, wi=1, i is the degree, i=0,1,...2*knn; if "linear", wi=0.5+0.5*i/(2*knn); or weight is a numeric vector of length 2*knn+1 (default:NULL)}
37 | }
38 | \value{
39 | a list containing results of each gene (spagene_res) and normalized gene expression matrix (normexp)
40 | }
41 | \description{
42 | Identify spatial variable genes based on spatial connectness of spots with high expression compared to random permutation
43 | }
44 | 


--------------------------------------------------------------------------------
/man/SpaGene_CT.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpaGene.R
 3 | \name{SpaGene_CT}
 4 | \alias{SpaGene_CT}
 5 | \title{Identify spatially variable genes within the same cell type}
 6 | \usage{
 7 | SpaGene_CT(
 8 |   expr,
 9 |   location,
10 |   CellType,
11 |   normalize = T,
12 |   top = 0.2,
13 |   knn = 8,
14 |   minN = 0,
15 |   perm = 500,
16 |   weight = NULL
17 | )
18 | }
19 | \arguments{
20 | \item{expr}{gene expression matrix, the row is the gene and the column is the spot/cell}
21 | 
22 | \item{location}{location matrix, the row number of location should match the column number of expr}
23 | 
24 | \item{CellType}{the cell type, the length should match the column number of locations}
25 | 
26 | \item{normalize}{whether to normalize the data (default: TRUE)}
27 | 
28 | \item{top}{the maximum ratio of spots/cells in the same cell type considered high expression (default: 20 percent of the spots/cells within a cell type, 10 is used if top is less than 10)}
29 | 
30 | \item{knn}{the number of nearest neighbours to search (default: 8)}
31 | 
32 | \item{minN}{the minimum number of spots/cells  (default: 0. genes with less than or equal to minN cells/spots expressed are excluded)}
33 | 
34 | \item{perm}{the number of random permutations (default: 500)}
35 | 
36 | \item{weight}{weights assigned to degree. If NULL, equal weight, wi=1, i is the degree, i=0,1,...2*knn; if "linear", wi=0.5+0.5*i/(2*knn); or weight is a numeric vector of length 2*knn+1 (default:NULL)}
37 | }
38 | \value{
39 | a data frame containing results of each gene in each cell type
40 | }
41 | \description{
42 | Identify spatial variable genes within the same cell type
43 | }
44 | 


--------------------------------------------------------------------------------
/man/SpaGene_LR.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpaGene.R
 3 | \name{SpaGene_LR}
 4 | \alias{SpaGene_LR}
 5 | \title{Identify spatially colocalized ligand-receptor pairs}
 6 | \usage{
 7 | SpaGene_LR(
 8 |   expr,
 9 |   location,
10 |   normalize = T,
11 |   topn = floor(0.2 * dim(location)[1]),
12 |   knn = 8,
13 |   perm = 500,
14 |   minN = 0,
15 |   sizefactor = 10000,
16 |   LRpair = LRpair
17 | )
18 | }
19 | \arguments{
20 | \item{expr}{gene expression matrix, the row is the gene and the column is the spot/cell}
21 | 
22 | \item{location}{location matrix, the row number of location should match the column number of expr}
23 | 
24 | \item{normalize}{whether to normalize the data (default: TRUE)}
25 | 
26 | \item{topn}{the number of spots/cells considered high expression (default: 20 percent of the total spots/cells)}
27 | 
28 | \item{knn}{the number of nearest neighbours to search (default: 8)}
29 | 
30 | \item{perm}{the number of random permutations (default: 500)}
31 | 
32 | \item{minN}{the minimum number of spots/cells with gene expression. Genes expressed equal to or less than minN spots/cells are excluded (default:0)}
33 | 
34 | \item{sizefactor}{the size factor for normalization (default:10000)}
35 | 
36 | \item{LRpair}{ligand-receptor pair}
37 | }
38 | \value{
39 | a data frame containing the result of each ligand-receptor pair
40 | }
41 | \description{
42 | Identify spatially colocalized ligand-receptor pairs
43 | }
44 | 


--------------------------------------------------------------------------------
/man/SpaGene_sparse.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpaGene.R
 3 | \name{SpaGene_sparse}
 4 | \alias{SpaGene_sparse}
 5 | \title{Identify spatially variable genes for extremely sparse data}
 6 | \usage{
 7 | SpaGene_sparse(
 8 |   expr,
 9 |   location,
10 |   normalize = TRUE,
11 |   maxN = floor(0.1 * dim(location)[1]),
12 |   minN = 50,
13 |   perm = 500,
14 |   sizefactor = 10000,
15 |   weight = NULL
16 | )
17 | }
18 | \arguments{
19 | \item{expr}{gene expression matrix, the row is the gene and the column is the spot/cell}
20 | 
21 | \item{location}{location matrix, the row number of location should match the column number of expr}
22 | 
23 | \item{normalize}{whether to normalize the data (default: TRUE)}
24 | 
25 | \item{maxN}{the maximum number of spots/cells considered high expression (default: 10 percent of the total spots/cells)}
26 | 
27 | \item{minN}{the minimum number of spots/cells considered high expression (default: 50. genes with less than 50 cells/spots expressed are excluded)}
28 | 
29 | \item{perm}{the number of random permutations (default: 500)}
30 | 
31 | \item{sizefactor}{the size factor for normalization (default:10000)}
32 | 
33 | \item{weight}{weights assigned to degree. If NULL, equal weight, wi=1, i is the degree, i=0,1,...2*knn; if "linear", wi=0.5+0.5*i/(2*knn); or weight can be a numeric vector of length 2*knn+1 (default:NULL)}
34 | }
35 | \value{
36 | a list containing results of each gene (spagene_res) and normalized gene expression matrix (normexp)
37 | }
38 | \description{
39 | Identify spatial variable genes based on spatial connectness of spots/cells with high expression. For genes with different sparsity level, the function adjusts the neighborhood search region automatically.
40 | }
41 | 


--------------------------------------------------------------------------------
/man/plotLR.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpaGene.R
 3 | \name{plotLR}
 4 | \alias{plotLR}
 5 | \title{plot one specific ligand-receptor pair}
 6 | \usage{
 7 | plotLR(
 8 |   expr,
 9 |   location,
10 |   normalize = T,
11 |   topn = floor(0.2 * dim(location)[1]),
12 |   knn = 8,
13 |   LRpair = c("Ptn", "Ptprz1"),
14 |   pt.size = 2,
15 |   alpha.min = 0.1,
16 |   max.cut = 0.95
17 | )
18 | }
19 | \arguments{
20 | \item{expr}{gene expression matrix, the row is the gene and the column is the spot/cell}
21 | 
22 | \item{location}{location matrix, the row number of location should match the column number of expr}
23 | 
24 | \item{normalize}{whether to normalize the data (default: TRUE)}
25 | 
26 | \item{topn}{the number of spots/cells considered high expression (default: 20 percent of the total spots/cells)}
27 | 
28 | \item{knn}{the number of nearest neighbours to search (default: 8)}
29 | 
30 | \item{LRpair}{the ligand-receptor pair for plot}
31 | 
32 | \item{pt.size}{the point size (default:2)}
33 | 
34 | \item{alpha.min}{the alpha for the minimum value (default:0.1)}
35 | 
36 | \item{max.cut}{the maximum cutoff for the LR activity}
37 | }
38 | \value{
39 | a data frame containing the result of each ligand-receptor pair
40 | }
41 | \description{
42 | plot one specific ligand-receptor pair to find the colocalized region
43 | }
44 | 


--------------------------------------------------------------------------------