├── .DS_Store
├── .Rbuildignore
├── .gitignore
├── DESCRIPTION
├── Hub genes
    ├── BC_Hub_genes_TCGA.csv
    ├── BC_Hub_genes_edges.xlsx
    └── Merfish_Hub_genes_edges.xlsx
├── NAMESPACE
├── R
    ├── SpaceX.R
    └── pqlseq_modified.R
├── README.Rmd
├── README.md
├── SpaceX.Rproj
├── SpaceX_Overview.jpg
├── data
    ├── BC_count.rda
    └── BC_loc.rda
├── man
    ├── SpaceX.Rd
    ├── figures
    │   └── README-pressure-1.png
    └── pqlseq_modified.Rd
└── src
    ├── Makevars
    ├── Makevars.win
    ├── Makevars.win~
    ├── SpaceX.dll
    ├── SpaceX.f95
    ├── SpaceX.o
    └── SpaceX.so


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayesrx/SpaceX/262a7a55a60ae42b1e966c3ea684d80a6ac4ad04/.DS_Store


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^README\.Rmd$
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: SpaceX
 2 | Type: Package
 3 | Title: Gene Co-expression Network Estimation for Spatial Transcriptomics
 4 | Version: 0.1.0
 5 | Author: Satwik Acharyya <satwik91@gmail.com>
 6 | Maintainer: Satwik Acharyya <satwik91@gmail.com>
 7 | Description: Provides shared and cluster specfic gene co-expression networks 
 8 |              for spatial transcriptomics data.
 9 | License: GPL-3
10 | Encoding: UTF-8
11 | LazyData: true
12 | RoxygenNote: 7.2.3
13 | Imports: doParallel, MSFA, foreach
14 | Depends: PQLseq
15 | 


--------------------------------------------------------------------------------
/Hub genes/BC_Hub_genes_TCGA.csv:
--------------------------------------------------------------------------------
 1 | Single_cell_hub_genes,Spatial_shared_hub_genes,Intersecting Hub Genes
 2 | SPDEF,COL3A1,FN1
 3 | MMP14,FN1,LUM
 4 | SERF2,LUM,XBP1
 5 | GNAI2,COL1A2,S100A10
 6 | GAPDH,POSTN,HNRNPA2B1
 7 | MMP11,FSTL1,COL5A1
 8 | GLIS2,COL1A1,RPLP0
 9 | GADD45GIP1,COL6A3,RPS27
10 | CD63,XBP1,COL6A2
11 | TCEB2,SPARC,IGFBP7
12 | IGFBP7,S100A10,H3F3A
13 | SMG7,COL12A1,MFAP2
14 | RPL29,GNAS,DPYSL3
15 | VIM,MMP11,VIM
16 | MRC2,AEBP1,COL16A1
17 | S100A14,HNRNPA2B1,RPS14
18 | CD81,COL5A1,HTRA1
19 | CIRBP,SELT,CD63
20 | TUBA1B,ASPN,RPL3
21 | MYL9,RPLP0,
22 | ATP5B,STAT1,
23 | HNRNPA2B1,SSR4,
24 | TPI1,RPS11,
25 | XBP1,TMSB4X,
26 | SELM,RPS27,
27 | APOE,COL6A2,
28 | LMNA,VWF,
29 | S100A10,SAA1,
30 | PODXL2,DBI,
31 | CYC1,IGFBP7,
32 | GPAA1,RPL36,
33 | LGALS3,RPLP1,
34 | RPS15,TFF3,
35 | SPNS1,APOD,
36 | MAPK3,B2M,
37 | HTRA1,H3F3A,
38 | CXCL12,TIMP3,
39 | RPS27,MFAP2,
40 | ADAM15,KLF6,
41 | FN1,DCN,
42 | DPYSL3,AZGP1,
43 | FXYD3,FOS,
44 | C1R,DPYSL3,
45 | COL5A1,VIM,
46 | RPLP0,PEG10,
47 | ROMO1,COL16A1,
48 | EEF1A1,ADIRF,
49 | RPL10,RPS18,
50 | RPS14,TGFB1,
51 | COL6A2,MGP,
52 | LRP1,CHCHD2,
53 | TECR,RPS14,
54 | SPINT2,HTRA1,
55 | RPL3,RPS4X,
56 | LUM,CD63,
57 | BSG,RPL30,
58 | MYO1C,SPPL2B,
59 | CD55,RPL3,
60 | RPS9,LRRC15,
61 | COX6B1,,
62 | NDUFA11,,
63 | GGCT,,
64 | COL10A1,,
65 | LDHA,,
66 | COL16A1,,
67 | MAFB,,
68 | NDUFB10,,
69 | MFAP2,,
70 | LGMN,,
71 | LDHB,,
72 | H3F3A,,
73 | MGST3,,
74 | MIF,,
75 | YIF1B,,
76 | BCAP31,,
77 | EIF4G2,,
78 | KRT17,,


--------------------------------------------------------------------------------
/Hub genes/BC_Hub_genes_edges.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayesrx/SpaceX/262a7a55a60ae42b1e966c3ea684d80a6ac4ad04/Hub genes/BC_Hub_genes_edges.xlsx


--------------------------------------------------------------------------------
/Hub genes/Merfish_Hub_genes_edges.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayesrx/SpaceX/262a7a55a60ae42b1e966c3ea684d80a6ac4ad04/Hub genes/Merfish_Hub_genes_edges.xlsx


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | importFrom("stats", "binomial", "dist", "glm", "model.frame",
 2 |                "na.omit", "na.pass", "pchisq", "poisson")
 3 | importFrom("stats", "model.matrix", "var")
 4 | export(SpaceX)
 5 | export(pqlseq_modified)
 6 | importFrom("doParallel", "registerDoParallel")
 7 | importFrom("parallel", "detectCores")
 8 | importFrom("foreach", "%dopar%", "foreach")
 9 | importFrom("MSFA", "sp_msfa")
10 | useDynLib(SpaceX)
11 | 


--------------------------------------------------------------------------------
/R/SpaceX.R:
--------------------------------------------------------------------------------
  1 | #' @title Estimation of shared and cluster specific gene co-expression networks for spatial transcriptomics data.
  2 | #' @title Estimation of shared and cluster specific gene co-expression networks for spatial transcriptomics data.
  3 | #'
  4 | #' @description SpaceX function estimates shared and cluster specific gene co-expression networks for spatial transcriptomics data. Please make sure to provide both inputs as dataframe. More details about the SpaceX algorithm can be found in the reference paper.
  5 | #'
  6 | #' @param Gene_expression_mat Gene expression dataframe (N X G).
  7 | #' @param Spatial_locations Spatial locations with coordinates. This should be provided as dataframe.
  8 | #' @param Cluster_annotations Cluster annotations for each of the spatial location.
  9 | #' @param sPMM If \code{TRUE}, the code will return the estimates of sigma1_sq and sigma2_sq from the spatial Poisson mixed model.
 10 | #' @param Post_process If \code{FALSE}, the code will return the posterior samples of \code{Phi} and \code{Psi^c} (based on definition in equation 1 of the SpaceX paper) only.
 11 | #' Default is \code{TRUE} and the code will return all the posterior samples, shared and cluster specific co-expressions.
 12 | #' @param numCore The number of cores for parallel computing (default = 1).
 13 | #' @param nrun default = 10000
 14 | #' @param burn default = 5000
 15 | #'
 16 | #' @return
 17 | #' \item{Posterior_samples}{Posterior samples}
 18 | #' \item{Shared_network}{Shared co-expression matrix}
 19 | #' \item{Cluster_network}{Cluster specific co-expression matrices}
 20 | #'
 21 | #' @references Acharyya S., Zhou X., Baladandayuthapani V. (2021). SpaceX: Gene Co-expression Network Estimation for Spatial Transcriptomics.
 22 | #'
 23 | #' @examples Implementation details and examples can be found at this link https://bookdown.org/satwik91/SpaceX_supplementary/.
 24 | #'
 25 | #'
 26 | SpaceX <- function(Gene_expression_mat, Spatial_locations, Cluster_annotations,
 27 |                    sPMM=FALSE,Post_process=FALSE,numCore = 1,
 28 |        	  nrun=10000,burn=5000){
 29 | 
 30 | Spatial_loc = as.data.frame(cbind(Spatial_locations,Cluster_annotations))
 31 | 
 32 | #### Global Parameters ######
 33 | G <-dim(Gene_expression_mat)[2]
 34 | L <- length(unique(Spatial_loc[,3]))
 35 | Clusters <- unique(Spatial_loc[,3])
 36 | N_l <- numeric()
 37 | sigma1_sq_est <- matrix(0,G,L)
 38 | sigma2_sq_est <- matrix(0,G,L)
 39 | u <-list()
 40 | Z_est <- list() ##latent gene expression matrix
 41 | 
 42 | ### Cluster sizes ####
 43 | for (l in 1:L) {
 44 |   pos <- which(Spatial_loc[,3] == Clusters[l])
 45 |   N_l[l] <- length(pos)
 46 |   Z_est[[l]] <- matrix(0,nrow = N_l[l],ncol = G)
 47 | }
 48 | 
 49 | 
 50 | ### Poisson mixed model with PQLSEQ algorithm
 51 | for (l in 1:L) {
 52 | 
 53 |   pos <- which(Spatial_loc[,3] == Clusters[l])
 54 | 
 55 |   ### Rho estimation ###
 56 |   a <- dist(Spatial_loc[pos,-3])
 57 |   a_max <- log10(2*max(a))
 58 |   a_min <- log10(min(a)/2)
 59 |   a_seq <- seq(a_min, a_max, length.out = 10)
 60 |   rho_l <- 10^(a_seq[5])
 61 | 
 62 |   Y_mat <- as.matrix(Gene_expression_mat[pos,], rownames.force = F)
 63 |   colnames(Y_mat) <- NULL
 64 |   location <- Spatial_loc[pos,-3]
 65 | 
 66 |   cov_kernel_l <- matrix(0,N_l[l],N_l[l])
 67 |   for (i in 1:N_l[l]) {
 68 |     for (j in 1:i) {
 69 |       dist_loc <- (Spatial_loc[i,1] - Spatial_loc[j,1])^2 + (Spatial_loc[i,2] - Spatial_loc[j,2])^2
 70 |       cov_kernel_l[i,j] <- exp(-dist_loc/(2*rho_l^2))
 71 |       cov_kernel_l[j,i] <- cov_kernel_l[i,j]
 72 |     }}
 73 | 
 74 |   print("Spatial Poisson Mixed Model")
 75 |   fit <- pqlseq_modified(RawCountDataSet=t(Y_mat),Phenotypes= rep(1,N_l[l]), RelatednessMatrix = cov_kernel_l,
 76 |                    fit.model="PMM", numCore = numCore)
 77 | 
 78 |   j = 1 + (0:(G-1))*N_l[l]
 79 |   sigma1_sq_est[,l] <- fit$tau1[j]
 80 |   sigma2_sq_est[,l] <- fit$tau2[j]
 81 |   u[[l]] <- matrix(fit$residual, nrow = G, byrow = TRUE)
 82 | 
 83 |   ## Estimation of latent gene expression
 84 |   for (g in 1:G) {
 85 | 
 86 |     if(sigma1_sq_est[g,l]< 0.01 || sigma2_sq_est[g,l]< 0.01){
 87 |       V <- (sigma1_sq_est[g,l]+0.001)*cov_kernel_l + (sigma2_sq_est[g,l]+0.001)*diag(N_l[l])
 88 |     }
 89 |     else{
 90 |       V <- (sigma1_sq_est[g,l])*cov_kernel_l + (sigma2_sq_est[g,l])*diag(N_l[l])
 91 |     }
 92 | 
 93 |     if(sigma2_sq_est[g,l]==0){
 94 |       Z_est[[l]][,g] <- ((sigma2_sq_est[g,l]+0.001)*solve(V))%*%u[[l]][g,]
 95 |     }
 96 |     else{
 97 |       Z_est[[l]][,g] <- ((sigma2_sq_est[g,l])*solve(V))%*%u[[l]][g,]
 98 |     }
 99 | 
100 |   }
101 |   print(l)
102 | }
103 | 
104 | ## Applying multi-study factor model on latent gene expression matrix
105 | print("Multi-Study Factor Model")
106 | fit_MSFA = sp_msfa(Z_est,  k = 10,  j_s = rep(10,L), trace = FALSE)
107 | 
108 | if(Post_process==FALSE){
109 | AA <- list(Posterior_samples=fit_MSFA)
110 | }
111 | else{
112 | ## Post processing of the posterior samples
113 | nrun <- nrun - burn
114 | F <- dim(fit_MSFA$Phi)[2]
115 | 
116 | SpaceProc <- .Fortran("bigtdsub",n=as.integer(G),
117 |    m=as.integer(F),o=as.integer(nrun),
118 |    x=as.single(fit_MSFA$Phi),
119 |    z=as.single(unlist(fit_MSFA$Lambda)),
120 |    b=as.single(rep(0,G*G)),s=as.single(rep(0,G*G*L)),L=as.integer(L))
121 | 
122 | Sh1 <- matrix(SpaceProc$b,nrow=G,ncol=G)
123 | Clus1 <- array(SpaceProc$s,c(G,G,L))
124 | 
125 | AA <- list(Posterior_samples=fit_MSFA,Shared_network=Sh1,Cluster_network=Clus1)
126 | }
127 | 
128 | 
129 | if(sPMM==FALSE){
130 |   return(AA)
131 | }
132 | else{
133 |   return(c(AA,sigma1_sq_est=sigma1_sq_est,sigma2_sq_est=sigma2_sq_est))
134 | }
135 | 
136 | }
137 | 


--------------------------------------------------------------------------------
/R/pqlseq_modified.R:
--------------------------------------------------------------------------------
  1 | ##################################################################################
  2 | ##
  3 | ## The pqlseq algorithm is developed in the following paper:
  4 | ## Title  : Heritability Estimation and Differential Analysis with Generalized Linear Mixed Models in Large-Scale Genomic Sequencing Studies
  5 | ## Authors: Shiquan Sun, Jiaqiang Zhu, and Xiang Zhou: Package: PQLseq.
  6 | ## This is a modified version of the pqlseq algorithm particularly for the SpaceX package.
  7 | ##
  8 | ##################################################################################
  9 | 
 10 | #' Fit Generalized Linear Mixed Model with Known Kinship Matrices Through Penalized-quasi Likelihood
 11 | #'
 12 | #' Fit a generalized linear mixed model with a random intercept. The covariance matrix of the random intercept is proportional to a known kinship matrix. This is a modified version of the pqlseq algorithm particularly for the SpaceX package. For more details check the pqlseq function from PQLseq package.
 13 | #'
 14 | #' @param RawCountDataSet a data frame containing the read count.
 15 | #' @param Phenotypes a vector containing the predictor of interest.
 16 | #' @param Covariates a data frame containing the covariates subject to adjustment (Default = NULL).
 17 | #' @param RelatednessMatrix a known relationship matrix (e.g. kinship matrix in genetic studies). When supplied with a matrix, this matrix should be a positive semi-definite matrix with dimensions equal to the sample size in count data, and the order of subjects in this matrix should also match the order of subjects in count data. Currently there is no ID checking feature implemented, and it is the user's responsibility to match the orders.
 18 | #' @param LibSize a data frame containing the total read count. For possion mixed model, it will be calculated automatically if users do not provide. For binomial mixed model, it is required.
 19 | #' @param fit.model a description of the error distribution and link function to be used in the model. Either "PMM" for possion model, or "BMM" for binomial model (default = "PMM").
 20 | #' @param fit.method method of fitting the generalized linear mixed model, currently only "REML" version is available.
 21 | #' @param fit.maxiter a positive integer specifying the maximum number of iterations when fitting the generalized linear mixed model (default = 500).
 22 | #' @param fit.tol a positive number specifying tolerance, the difference threshold for parameter estimates below which iterations should be stopped (default = 1e-5).
 23 | #' @param numCore a positive integer specifying the number of cores for parallel computing (default = 1).
 24 | #' @param filtering a logical switch for RNAseq data. By default, for each gene, at least two individuals should have read counts greater than 5. Otherwise, the gene is filtered (default = TRUE).
 25 | #' @param verbose a logical switch for printing detailed information (parameter estimates in each iteration) for testing and debugging purpose (default = FALSE).
 26 | #' @param ... additional arguments that could be passed to glm.
 27 | #'
 28 | #' @return
 29 | #' \item{numIDV}{number of individuals with data being analyzed}
 30 | #' \item{beta}{the fixed effect parameter estimate for the predictor of interest.}
 31 | #' \item{se_beta}{the standard deviation of fixed effect.}
 32 | #' \item{pvalue}{P value for the fixed effect, based on the wald test.}
 33 | #' \item{h2}{heritability of the transformed rate.}
 34 | #' \item{sigma2}{total variance component.}
 35 | #' \item{overdisp}{dispersion parameter estimate.}
 36 | #' \item{converged}{a logical indicator for convergence.}
 37 | #'
 38 | #' @references Sun, S., Hood, M., Scott, L., Peng, Q., Mukherjee, S., Tung, J., and Zhou, X. (2017). Differential expression analysis for rnaseq using poisson mixed models. Nucleicacids research, 45(11), e106–e106.
 39 | #'
 40 | 
 41 | pqlseq_modified <- function(RawCountDataSet, Phenotypes, Covariates=NULL, RelatednessMatrix=NULL, LibSize=NULL,
 42 |                       fit.model="PMM", fit.method = "AI.REML", fit.maxiter=500, fit.tol=1e-5, numCore=1,
 43 |                       filtering=TRUE, verbose=FALSE, ...) {
 44 |   # specify the number of cores we want to use
 45 |   if(numCore > 1){
 46 |     if(numCore>detectCores()){warning("PQLseq:: the number of cores you're setting is larger than detected cores!");numCore = detectCores()-1}
 47 |   }
 48 | 
 49 |   registerDoParallel(numCore)
 50 | 
 51 |   # cl <- makeCluster(numCore)
 52 |   # registerDoParallel(cl,cores=numCore)
 53 |   # on.exit(stopCluster(cl))
 54 | 
 55 |   # filtering genes/sites
 56 |   #if (filtering & fit.model == "PMM"){
 57 |   #  unfilterIdx <- apply(RawCountDataSet, 1, function(x){length(x[x>5])>=2} )
 58 |   #  CountData   <- RawCountDataSet[unfilterIdx,]
 59 |   #}else{
 60 |   #  CountData   <- RawCountDataSet
 61 |   #}
 62 |   CountData   <- RawCountDataSet
 63 |   rm(RawCountDataSet)
 64 | 
 65 |   numVar <- dim(CountData)[1]
 66 |   numIDV <- dim(CountData)[2]
 67 | 
 68 |   # remove the intercept
 69 |   if(length(unique(Covariates[,1])) == 1){
 70 |     Covariates<- Covariates[,-1]
 71 |   }
 72 | 
 73 |   if(is.null(Covariates)){
 74 |     numCov <- 0
 75 |   }else{
 76 |     numCov     <- dim(Covariates)[2]
 77 |     Covariates <- as.matrix(Covariates)
 78 |   }
 79 | 
 80 |   cat(paste("## number of total individuals: ", numIDV,"\n"))
 81 |   cat(paste("## number of total genes/sites: ", numVar,"\n"))
 82 |   cat(paste("## number of adjusted covariates: ", numCov,"\n"))
 83 | 
 84 | 
 85 |   CountData  <- as.matrix(CountData)
 86 |   Phenotypes <- as.matrix(Phenotypes)
 87 | 
 88 | 
 89 |   # if(is.null(RelatednessMatrix)){
 90 |   #   stop("PQLseq::please input relatedness matrix!")
 91 |   # }else{
 92 |   #   RelatednessMatrix <- as.matrix(RelatednessMatrix)
 93 |   #   scalerM           <- diag(numIDV)-(rep(1,numIDV)%*%t(rep(1,numIDV)))/numIDV
 94 |   #   eig               <- eigen(RelatednessMatrix)
 95 |   #   eigval            <- eig$value
 96 |   #   eigvector         <- eig$vectors
 97 |   #   if(any(eigval<1e-10)){
 98 |   #     warning("PQLseq::the relatedness matrix is singular, it has been modified!")
 99 |   #     RelatednessMatrix <- as.matrix(nearPD(RelatednessMatrix,corr=T)$mat)
100 |   #   }
101 |   #   rm(scalerM)
102 |   #   rm(eig)
103 |   #   rm(eigval)
104 |   #   rm(eigvector)
105 |   # }
106 | 
107 |   RelatednessMatrix <- list(RelatednessMatrix, diag(numIDV))
108 | 
109 |   #***********************************#
110 |   #       Poisson Mixed Model         #
111 |   #***********************************#
112 |   if(fit.model == "PMM"){
113 |     cat("# fitting Poisson mixed model ... \n")
114 |     if(is.null(LibSize)){
115 |       LibSize <- apply(CountData, 2, sum)
116 |       LibSize <- as.matrix(LibSize)
117 |     }else{
118 |       LibSize <- as.matrix(t(LibSize))
119 |     }
120 | 
121 | 
122 |     # do parallel using foreach function
123 |     iVar   <- NULL
124 |     resPMM <-foreach(iVar=1:numVar,.combine=rbind)%dopar%{
125 |       numAnalysis <- beta <- tau1 <- tau2 <- se_beta <- pvalue <- converged <- h2 <- sigma2 <- overdisp <- NA
126 |       if(numCov==0){
127 |         model0 <- try(glm(formula = CountData[iVar,]~1 + offset(log(LibSize)), family = poisson(link="log")))
128 |         idx   <- match(rownames(model.frame(formula = CountData[iVar,]~1 + offset(log(LibSize)), na.action = na.omit)),
129 |                        rownames(model.frame(formula = CountData[iVar,]~1 + offset(log(LibSize)), na.action = na.pass)))
130 |       }else{
131 |         model0 <- try(glm(formula = CountData[iVar,]~Covariates + Phenotypes + offset(log(LibSize)), family = poisson(link="log")))
132 |         idx   <- match(rownames(model.frame(formula = CountData[iVar,]~Covariates + Phenotypes + offset(log(LibSize)), na.action = na.omit)),
133 |                        rownames(model.frame(formula = CountData[iVar,]~Covariates + Phenotypes + offset(log(LibSize)), na.action = na.pass)))
134 |       }
135 | 
136 |       if(verbose) {cat(paste("NO. Gene = ",iVar,"\n"))}
137 | 
138 |       tmpRelatednessMatrix <- RelatednessMatrix
139 |       if(class(tmpRelatednessMatrix) == "matrix") {
140 |         tmpRelatednessMatrix <- tmpRelatednessMatrix[idx, idx]
141 |       }else {
142 |         for(ik in seq_len(length(tmpRelatednessMatrix)) ) {tmpRelatednessMatrix[[ik]] <- tmpRelatednessMatrix[[ik]][idx, idx]}
143 |       }
144 | 
145 |       names(tmpRelatednessMatrix) <- paste("kins", 1:length(tmpRelatednessMatrix), sep="")
146 | 
147 |       if(class(model0)[1]!="try-error"){
148 |         # t1 <- system.time(model1 <- try(PQLseq.fit(model0, tmpRelatednessMatrix)))
149 |         model1 <- try(PQLseq.fit(model0, tmpRelatednessMatrix))
150 |       }else{
151 |         model1 <- NULL
152 |       }
153 | 
154 |       if(!is.null(model1)&(class(model1)!="try-error")){
155 |         if(verbose){cat(paste("PQLseq::PMM::tau = ", model1$theta,"\n"))}
156 |         numAnalysis <- length(idx)
157 |         beta        <- model1$coefficients[length(model1$coefficients)]
158 |         alpha       <- model1$coefficients[1]
159 |         se_beta     <- sqrt(diag(model1$cov)[length(model1$coefficients)] )
160 |         pvalue      <- pchisq( (beta/se_beta)^2, 1, lower.tail = F)
161 |         sigma2      <- model1$theta[2]+model1$theta[3]
162 |         h2          <- model1$theta[2]/(sigma2)
163 |         tau1        <- model1$theta[2]
164 |         tau2        <- model1$theta[3]
165 |         residual    <- model1$residuals
166 |         fitted_values <- model1$fitted.values
167 |         converged   <- model1$converged
168 |       }else{converged <- FALSE}
169 | 
170 |       res <- data.frame(numIDV = numAnalysis, beta = beta, alpha=alpha, se_beta = se_beta,
171 |                         pvalue = pvalue, h2 = h2, sigma2 = sigma2,tau1=tau1,tau2=tau2,
172 |                         fitted_values = fitted_values,
173 |                         residual = residual, converged = converged)
174 |     }# end for iVar, parallel
175 |     rm(iVar)
176 |     closeAllConnections()
177 |     # if(nrow(showConnections())!=0){closeAllConnections()}
178 | 
179 |     rownames(resPMM) <- rownames(CountData)
180 |     return(resPMM)
181 |   }# end PMM
182 |   #***********************************#
183 |   #       Binomial Mixed Model        #
184 |   #***********************************#
185 |   if(fit.model == "BMM"){
186 |     cat("# fitting binomial mixed model ... \n")
187 |     if(is.null(LibSize)){
188 |       stop("PQLseq::BMM::ERROR: please input the LibSize (total counts) file!!")
189 |     }else{
190 |       LibSize <- as.matrix(LibSize)
191 |     }
192 | 
193 |     ratio               <- CountData/LibSize
194 |     ratio[is.na(ratio)] <- 0
195 |     flag                <- ratio>1.0
196 |     sumflag             <- apply(flag,1, sum)
197 |     idx                 <- which(sumflag>0)
198 | 
199 |     if (length(idx)>0){
200 |       CountData <- CountData[-idx,]
201 |       LibSize   <- LibSize[-idx,]
202 |     }else{
203 |       CountData <- CountData
204 |       LibSize   <- LibSize
205 |     }
206 | 
207 |     numVar <- dim(CountData)[1]
208 |     numIDV <- dim(CountData)[2]
209 |     iVar   <- NULL
210 | 
211 |     # do parallel
212 |     resBMM <- foreach(iVar=1:numVar,.combine=rbind)%dopar%{
213 |       numAnalysis <- beta <- tau1 <- tau2 <- se_beta <- pvalue <- converged <- h2 <- sigma2 <- overdisp <- NA
214 |       if(verbose){cat(paste("NO. Gene/Site = ",iVar,"\n"))}
215 |       if(sum(dim(LibSize)==dim(CountData)) != 2){
216 |         stop("PQLseq::BMM::ERROR: the dimensions of read counts and total read counts do not match!")
217 |       }
218 | 
219 |       LibSize <- as.matrix(LibSize)
220 | 
221 |       if(numCov == 0){
222 |         model0 <- glm(formula = CountData[iVar,]/LibSize[iVar,]~Phenotypes, family = binomial(link = "logit"), weights = LibSize[iVar,])
223 |         idx    <- match(rownames(model.frame(formula = CountData[iVar,]/LibSize[iVar,]~Phenotypes, na.action = na.omit)),
224 |                         rownames(model.frame(formula = CountData[iVar,]/LibSize[iVar,]~Phenotypes, na.action = na.pass)))
225 |       }else{
226 |         model0 <- glm(formula = CountData[iVar,]/LibSize[iVar,]~Covariates + Phenotypes, family = binomial(link = "logit"), weights = LibSize[iVar,] )
227 |         idx    <- match(rownames(model.frame(formula = CountData[iVar,]/LibSize[iVar,]~Covariates + Phenotypes, na.action = na.omit)),
228 |                         rownames(model.frame(formula = CountData[iVar,]/LibSize[iVar,]~Covariates + Phenotypes, na.action = na.pass)))
229 |       }
230 | 
231 |       model0$numTotal <- LibSize[iVar,idx]
232 |       model0$numSucc  <- CountData[iVar,idx]
233 | 
234 |       redflag <- FALSE
235 |       for( ierr in c(2:dim(model.matrix(model0))[2])){
236 |         if(length(unique(model.matrix(model0)[,ierr])) == 1){
237 |           warning(paste("PQLseq::BMM::the ",ierr-1,"-th column of covariates are the same for gene/site ",rownames(CountData)[iVar],"!",sep = "") )
238 |           redflag <- TRUE
239 |         }
240 |       }
241 |       if(!redflag){
242 | 
243 |         tmpRelatednessMatrix <- RelatednessMatrix
244 |         if(class(tmpRelatednessMatrix) == "matrix") {
245 |           tmpRelatednessMatrix <- tmpRelatednessMatrix[idx, idx]
246 |         }else {
247 |           for(ik in seq_len(length(tmpRelatednessMatrix)) ) {
248 |             tmpRelatednessMatrix[[ik]] <- tmpRelatednessMatrix[[ik]][idx, idx]
249 |           }
250 |         }
251 |         names(tmpRelatednessMatrix) <- paste("kins", 1:length(tmpRelatednessMatrix), sep="")
252 | 
253 |         # t1 <- system.time(model1 <- try( PQLseq.fit(model0, tmpRelatednessMatrix) ))
254 |         model1 <- try(PQLseq.fit(model0, tmpRelatednessMatrix))
255 | 
256 |         if(class(model1) != "try-error"&!is.null(model1)){
257 |           if(verbose){cat(paste("PQLseq::BMM::tau = ", model1$theta,"\n"))}
258 |           numAnalysis <- length(idx)
259 |           beta        <- model1$coefficients[ length(model1$coefficients) ]# the last one
260 |           se_beta     <- sqrt( diag(model1$cov)[ length(model1$coefficients) ] )
261 |           pvalue      <- pchisq( (beta/se_beta)^2, 1, lower.tail = F)
262 |           sigma2      <- model1$theta[2]+model1$theta[3]
263 |           h2          <- model1$theta[2]/(sigma2)
264 |           tau1        <- model1$theta[2]
265 |           tau2        <- model1$theta[3]
266 |           converged   <- model1$converged
267 |         }else{converged <- FALSE}
268 | 
269 |         res <- data.frame(numIDV = numAnalysis, beta = beta, se_beta = se_beta,
270 |                           pvalue = pvalue, h2 = h2, sigma2 = sigma2,
271 |                           converged = converged)
272 |       }# end for iVar, parallel
273 | 
274 |     }
275 |     rm(iVar)
276 | 
277 |     # if(nrow(showConnections())!=0){closeAllConnections()}
278 |     closeAllConnections()
279 |     rownames(resBMM) <- rownames(CountData)
280 |     return(resBMM)
281 |   }# end BMM
282 | 
283 | }# end function PQLseq
284 | 
285 | 
286 | ##########################################################
287 | #           	   PQLseq FIT FUNCTION					 #
288 | ##########################################################
289 | 
290 | PQLseq.fit <- function(model0, RelatednessMatrix, method = "REML", method.optim = "AI", maxiter = 500, tol = 1e-5, verbose = FALSE) {
291 | 
292 |   names(RelatednessMatrix) <- paste("kins", 1:length(RelatednessMatrix), sep="")
293 |   # if((method.optim == "AI")&(!sum(model0$fitted.values<1e-5))) {
294 |   if(method.optim == "AI") {
295 |     fixtau.old 	<- rep(0, length(RelatednessMatrix)+1)
296 |     # to use average information method to fit alternative model
297 |     model1 		<- PQLseq.AI(model0, RelatednessMatrix, maxiter = maxiter, tol = tol, verbose = verbose)
298 |     fixtau.new 	<- 1*(model1$theta < 1.01 * tol)
299 | 
300 |     while(any(fixtau.new != fixtau.old)) {
301 |       fixtau.old <- fixtau.new
302 |       model1 	<- PQLseq.AI(model0, RelatednessMatrix, fixtau = fixtau.old, maxiter = maxiter, tol = tol, verbose = verbose)
303 |       fixtau.new <- 1*(model1$theta < 1.01 * tol)
304 |     }
305 |   }else{
306 |     model1 <- NULL
307 |   }
308 |   return(model1)
309 | }
310 | 
311 | ##########################################################
312 | #       PQLseq FIT AVERAGE INFORMATION FUNCTION			 #
313 | ##########################################################
314 | 
315 | PQLseq.AI <- function(model0, RelatednessMatrix, tau = rep(0, length(RelatednessMatrix)+1), fixtau = rep(0, length(RelatednessMatrix)+1), maxiter = 500, tol = 1e-5, verbose = FALSE) {
316 | 
317 |   if(model0$family$family %in% c("binomial")){
318 |     y <- model0$numSucc
319 |   }else{
320 |     y <- model0$y
321 |   }
322 |   numIDV <- length(y)
323 |   offset <- model0$offset
324 |   if(is.null(offset)) {offset <- rep(0, numIDV)}
325 | 
326 |   family <- model0$family
327 |   eta <- model0$linear.predictors
328 |   mu <- model0$fitted.values
329 |   mu.eta <- family$mu.eta(eta)
330 |   D <- mu.eta/sqrt(model0$family$variance(mu))
331 | 
332 |   if(family$family %in% c("binomial")){
333 |     mu.eta <- model0$numTotal*mu.eta
334 |     D <- mu.eta/sqrt(model0$numTotal*model0$family$variance(mu))
335 |     mu <- model0$numTotal*mu
336 |   }
337 | 
338 |   Y <- eta - offset + (y - mu)/mu.eta
339 |   X <- model.matrix(model0)
340 |   alpha <- model0$coef
341 | 
342 |   if(family$family %in% c("poisson", "binomial")) {
343 |     tau[1] <- 1
344 |     fixtau[1] <- 1
345 |   }
346 |   numK <- length(RelatednessMatrix)
347 |   idxtau <- which(fixtau == 0)
348 |   numK2 <- sum(fixtau == 0)
349 | 
350 |   ### this part needs to be changed for intercept only model same as spark (Satwik)
351 |   if(numK2 > 0) {
352 |     tau[fixtau == 0] <- rep(min(0.9,var(Y)/(numK+1)), numK2)
353 | 
354 |     H <- tau[1]*diag(1/D^2)
355 |     for(ik in 1:numK) {H <- H + tau[ik+1]*RelatednessMatrix[[ik]]}
356 | 
357 |     Hinv 	<- chol2inv(chol(H))
358 |     HinvX 	<- crossprod(Hinv, X)
359 |     XHinvX 	<- crossprod(X, HinvX)
360 | 
361 |     P <- try(Hinv - tcrossprod(tcrossprod(HinvX, chol2inv(chol( XHinvX ))), HinvX))
362 | 
363 |     if(class(P) == "try-error"){
364 |       stop("Error in P matrix calculation!")
365 |     }
366 | 
367 |     PY <- crossprod(P, Y)
368 |     tau0 <- tau
369 |     for(ik in 1:numK2) {
370 |       if(ik == 1 && fixtau[1] == 0) tau[1] <- max(0, tau0[1] + tau0[1]^2 * (sum((PY/D)^2) - sum(diag(P)/D^2))/numIDV)
371 |       else {
372 |         PAPY <- crossprod(P, crossprod(RelatednessMatrix[[idxtau[ik]-1]], PY))
373 |         tau[idxtau[ik]] <- max(0, tau0[idxtau[ik]] + tau0[idxtau[ik]]^2 * (crossprod(Y, PAPY) - sum(P*RelatednessMatrix[[idxtau[ik]-1]]))/numIDV)
374 |       }
375 |     }
376 |   }
377 | 
378 |   for (iter in seq_len(maxiter)) {
379 |     alpha0 	<- alpha
380 |     tau0 	<- tau
381 |     model1 	<- AI(Y, X, length(RelatednessMatrix), RelatednessMatrix, D^2, tau, fixtau, tol)
382 | 
383 |     tau <- as.numeric(model1$tau)
384 |     cov <- as.matrix(model1$cov)
385 |     alpha <- as.numeric(model1$alpha)
386 |     eta <- as.numeric(model1$eta) + offset
387 | 
388 | 
389 |     mu <- family$linkinv(eta)
390 |     mu.eta <- family$mu.eta(eta)
391 |     D <- mu.eta/sqrt(family$variance(mu))
392 | 
393 |     if(family$family %in% c("binomial")){
394 |       mu.eta <- model0$numTotal*mu.eta
395 |       D <- mu.eta/sqrt(model0$numTotal*family$variance(mu))
396 |       mu <- model0$numTotal*mu
397 |     }
398 | 
399 |     Y <- eta - offset + (y - mu)/mu.eta
400 | 
401 |     if(2*max(abs(alpha - alpha0)/(abs(alpha) + abs(alpha0) + tol), abs(tau - tau0)/(abs(tau) + abs(tau0) + tol)) < tol) {break}
402 |     if(max(tau) > tol^(-2)|any(is.infinite(D))|any(is.infinite(mu))|any(is.infinite(eta)) ) {
403 | 
404 |       iter <- maxiter
405 |       break
406 |     }
407 |   }
408 | 
409 |   converged <- ifelse(iter < maxiter, TRUE, FALSE)
410 |   res <- y - mu
411 |   P <- model1$P
412 |   return(list(theta = tau, coefficients = alpha, linear.predictors = eta, fitted.values = mu, Y = Y, P = P, residuals = res, cov = cov, converged = converged))
413 | }# end function
414 | 
415 | 
416 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
417 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
418 | 
419 | AI <- function(Yin, Xin, numKin, Phiin, Din, tauin, fixtauin, tolin) {
420 |   .Call('_PQLseq_AI', PACKAGE = 'PQLseq', Yin, Xin, numKin, Phiin, Din, tauin, fixtauin, tolin)
421 | }
422 | 
423 | rcpparma_hello_world <- function() {
424 |   .Call('_PQLseq_rcpparma_hello_world', PACKAGE = 'PQLseq')
425 | }
426 | 
427 | rcpparma_outerproduct <- function(x) {
428 |   .Call('_PQLseq_rcpparma_outerproduct', PACKAGE = 'PQLseq', x)
429 | }
430 | 
431 | rcpparma_innerproduct <- function(x) {
432 |   .Call('_PQLseq_rcpparma_innerproduct', PACKAGE = 'PQLseq', x)
433 | }
434 | 
435 | rcpparma_bothproducts <- function(x) {
436 |   .Call('_PQLseq_rcpparma_bothproducts', PACKAGE = 'PQLseq', x)
437 | }
438 | 
439 | 
440 | #########################################
441 | #             CODE END                  #
442 | #########################################
443 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: github_document
  3 | ---
  4 | 
  5 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  6 | 
  7 | ```{r, include = FALSE}
  8 | knitr::opts_chunk$set(
  9 |   collapse = TRUE,
 10 |   comment = "#>",
 11 |   fig.path = "man/figures/README-",
 12 |   out.width = "100%"
 13 | )
 14 | ```
 15 | 
 16 | # SpaceX overview
 17 | 
 18 | <!-- badges: start -->
 19 | <!-- badges: end -->
 20 | 
 21 | ![SpaceXpipeline](SpaceX_Overview.jpg) The SpaceX ([spa]{.underline}tially dependent gene [c]{.underline}o-[ex]{.underline}pression network) is a Bayesian methodology to identify both shared and cluster-specific co-expression network across genes. These clusters can be cell type specific or based on spatial regions. SpaceX uses an over-dispersed spatial Poisson model coupled with a high-dimensional factor model which is based on a dimension reduction technique for computational efficiency.
 22 | 
 23 | The Figure above shows the overall conceptual flow of our pipeline. **Panel A** is an image of a tissue section from the region of interest. **Panel B** shows spatial gene expression and biomarkers which are recorded from that tissue section with the help of sequencing techniques. **Panel C** is the resulting data matrix of gene expression along with spatial locations and cluster annotations on the tissue. All these serve as input for the SpaceX model to obtain the shared (**Panel D**) and cluster-specific co-expression networks (**Panel E**). Finally, we use these networks for downstream analysis to detect gene modules and hub genes across spatial regions (**Panel F** & **Panel G** respectively) for biological interpretation.
 24 | 
 25 | <!-- The goal of SpaceX is to provide shared and cluster specfic gene co-expression networks for spatial transcriptomics data. -->
 26 | 
 27 | ## Installation
 28 | This package requires a Fortran compiler in order to work. Here are the instructions:
 29 | 
 30 | + Windows: install the Rtools package that is appropriate for your version of R
 31 | 
 32 | + Mac: Go to this website and follow the instructions: (https://mac.R-project.org/tools/)
 33 | 
 34 | + Linux: From a terminal, do the following: `sudo apt install gcc`. That will bring in multiple compilers.
 35 | 
 36 | The package requires a dependency that is not available on CRAN. Install it with:
 37 | 
 38 | ``` r
 39 | remotes::install_github("rdevito/MSFA")
 40 | ```
 41 | 
 42 | You can install the released version of SpaceX from (https://github.com/SatwikAch/SpaceX) with:
 43 | 
 44 | ``` r
 45 | devtools::install_github("SatwikAch/SpaceX")
 46 | ```
 47 | 
 48 | ```{r}
 49 | library(SpaceX)
 50 | ```
 51 | 
 52 | ## SpaceX function 
 53 | ### Inputs
 54 | 
 55 | The first input is **Gene_expression_mat** which is $N \times G$ dataframe. Here $N$ denotes the number of spatial locations and $G$ denotes number of genes. 
 56 | 
 57 | The second input is **Spatial_locations** is a dataframe which contains spatial coordinates.
 58 | 
 59 | The third input is **Cluster_annotations**.
 60 | 
 61 | The fourth input is **sPMM**. If TRUE, the code will return the estimates of sigma1_sq and sigma2_sq from the spatial Poisson mixed model.
 62 | 
 63 | The fifth input is **Post_process**. If FALSE, the code will return the posterior samples of $\Phi$ and $\Psi^c$ (based on definition in equation 1 of the SpaceX paper) only. Default is TRUE and the code will return all the posterior samples, shared and cluster specific co-expressions.
 64 | 
 65 | The final input is **numCore**. The number of requested cores for parallel computing and default is set to be 1. 
 66 | 
 67 | ### Output
 68 | You will obtain a list of objects as output.
 69 | 
 70 | **Posterior_samples**	contains all the posterior samples.
 71 | 
 72 | **Shared_network** provides the shared co-expression matrix (transformed correlation matrix of $G_{s} = \Phi \Phi^{T}$).
 73 | 
 74 | **Cluster_network** provides the cluster specific co-expression matrices (transformed correlation matrices of $G_{c} = \Phi \Phi^{T} + \Psi^{c} {\Psi^{c^{T}}}$).
 75 | 
 76 | 
 77 | 
 78 | ## Example 
 79 | An example code with the breast cancer data to demonstrate how to run the SpaceX function and obtain shared and cluster specific networks.
 80 | ```{r , eval=FALSE} 
 81 | ## Reading the Breast cancer data
 82 | 
 83 | ## Spatial locations
 84 | head(BC_loc)
 85 | 
 86 | ## Gene expression for data
 87 | head(BC_count) 
 88 | 
 89 | ## Data processing
 90 | G <-dim(BC_count)[2] ## number of genes
 91 | N <-dim(BC_count)[1] ## number of locations
 92 | 
 93 | ## Application to SpaceX algorithm (Please make sure to request for large enough memory to work with the posterior samples)
 94 | BC_fit <- SpaceX(BC_count,BC_loc[,1:2],BC_loc[,3],sPMM=FALSE,Post_process = TRUE,numCore = 2)
 95 | 
 96 | ## Shared_network :: Shared co-expression matrix
 97 | ## Cluster_network :: Cluster specific co-expression matrices
 98 | 
 99 | ```
100 | 
101 | 
102 | ## Tutorial website
103 | The tutorial website can be found [here](https://satwikach.github.io/SpaceX.github.io/).
104 | 
105 | ## Paper 
106 | Satwik Acharyya, Xiang Zhou and Veerabhadran Baladandayuthapani (2022). [SpaceX: Gene Co-expression Network Estimation for Spatial Transcriptomics](https://doi.org/10.1093/bioinformatics/btac645). Bioinformatics,  38(22): 5033–5041.
107 | 
108 | ## Supplementary file 
109 | [Supplementary](https://bookdown.org/satwik91/SpaceX_supplementary/)
110 | 
111 | ## Points to note
112 | + Please run the SpaceX package in R 4.1.2.  
113 | 
114 | + Please email at satwika@umich.edu for any issues. 
115 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  3 | 
  4 | # SpaceX overview
  5 | 
  6 | <!-- badges: start -->
  7 | <!-- badges: end -->
  8 | 
  9 | ![SpaceXpipeline](SpaceX_Overview.jpg) The SpaceX (<u>spa</u>tially
 10 | dependent gene <u>c</u>o-<u>ex</u>pression network) is a Bayesian
 11 | methodology to identify both shared and cluster-specific co-expression
 12 | network across genes. These clusters can be cell type specific or based
 13 | on spatial regions. SpaceX uses an over-dispersed spatial Poisson model
 14 | coupled with a high-dimensional factor model which is based on a
 15 | dimension reduction technique for computational efficiency.
 16 | 
 17 | The Figure above shows the overall conceptual flow of our pipeline.
 18 | **Panel A** is an image of a tissue section from the region of interest.
 19 | **Panel B** shows spatial gene expression and biomarkers which are
 20 | recorded from that tissue section with the help of sequencing
 21 | techniques. **Panel C** is the resulting data matrix of gene expression
 22 | along with spatial locations and cluster annotations on the tissue. All
 23 | these serve as input for the SpaceX model to obtain the shared (**Panel
 24 | D**) and cluster-specific co-expression networks (**Panel E**). Finally,
 25 | we use these networks for downstream analysis to detect gene modules and
 26 | hub genes across spatial regions (**Panel F** & **Panel G**
 27 | respectively) for biological interpretation.
 28 | 
 29 | <!-- The goal of SpaceX is to provide shared and cluster specfic gene co-expression networks for spatial transcriptomics data. -->
 30 | 
 31 | ## Installation
 32 | 
 33 | This package requires a Fortran compiler in order to work. Here are the
 34 | instructions:
 35 | 
 36 | -   Windows: install the Rtools package that is appropriate for your
 37 |     version of R
 38 | 
 39 | -   Mac: Go to this website and follow the instructions:
 40 |     (<https://mac.R-project.org/tools/>)
 41 | 
 42 | -   Linux: From a terminal, do the following: `sudo apt install gcc`.
 43 |     That will bring in multiple compilers.
 44 | 
 45 | The package requires a dependency that is not available on CRAN. Install
 46 | it with:
 47 | 
 48 | ``` r
 49 | remotes::install_github("rdevito/MSFA")
 50 | ```
 51 | 
 52 | You can install the released version of SpaceX from
 53 | (<https://github.com/SatwikAch/SpaceX>) with:
 54 | 
 55 | ``` r
 56 | devtools::install_github("SatwikAch/SpaceX")
 57 | ```
 58 | 
 59 | ``` r
 60 | library(SpaceX)
 61 | #> Loading required package: PQLseq
 62 | ```
 63 | 
 64 | ## SpaceX function
 65 | 
 66 | ### Inputs
 67 | 
 68 | The first input is **Gene\_expression\_mat** which is $N \times G$
 69 | dataframe. Here $N$ denotes the number of spatial locations and $G$
 70 | denotes number of genes.
 71 | 
 72 | The second input is **Spatial\_locations** is a dataframe which contains
 73 | spatial coordinates.
 74 | 
 75 | The third input is **Cluster\_annotations**.
 76 | 
 77 | The fourth input is **sPMM**. If TRUE, the code will return the
 78 | estimates of sigma1\_sq and sigma2\_sq from the spatial Poisson mixed
 79 | model.
 80 | 
 81 | The fifth input is **Post\_process**. If FALSE, the code will return the
 82 | posterior samples of $\Phi$ and $\Psi^c$ (based on definition in
 83 | equation 1 of the SpaceX paper) only. Default is TRUE and the code will
 84 | return all the posterior samples, shared and cluster specific
 85 | co-expressions.
 86 | 
 87 | The final input is **numCore**. The number of requested cores for
 88 | parallel computing and default is set to be 1.
 89 | 
 90 | ### Output
 91 | 
 92 | You will obtain a list of objects as output.
 93 | 
 94 | **Posterior\_samples** contains all the posterior samples.
 95 | 
 96 | **Shared\_network** provides the shared co-expression matrix
 97 | (transformed correlation matrix of $G_{s} = \Phi \Phi^{T}$).
 98 | 
 99 | **Cluster\_network** provides the cluster specific co-expression
100 | matrices (transformed correlation matrices of
101 | $G_{c} = \Phi \Phi^{T} + \Psi^{c} {\Psi^{c^{T}}}$).
102 | 
103 | ## Example
104 | 
105 | An example code with the breast cancer data to demonstrate how to run
106 | the SpaceX function and obtain shared and cluster specific networks.
107 | 
108 | ``` r
109 | ## Reading the Breast cancer data
110 | 
111 | ## Spatial locations
112 | head(BC_loc)
113 | 
114 | ## Gene expression for data
115 | head(BC_count) 
116 | 
117 | ## Data processing
118 | G <-dim(BC_count)[2] ## number of genes
119 | N <-dim(BC_count)[1] ## number of locations
120 | 
121 | ## Application to SpaceX algorithm (Please make sure to request for large enough memory to work with the posterior samples)
122 | BC_fit <- SpaceX(BC_count,BC_loc[,1:2],BC_loc[,3],sPMM=FALSE,Post_process = TRUE,numCore = 2)
123 | 
124 | ## Shared_network :: Shared co-expression matrix
125 | ## Cluster_network :: Cluster specific co-expression matrices
126 | ```
127 | 
128 | ## Tutorial website
129 | 
130 | The tutorial website can be found
131 | [here](https://satwikach.github.io/SpaceX.github.io/).
132 | 
133 | ## Paper
134 | 
135 | Satwik Acharyya, Xiang Zhou and Veerabhadran Baladandayuthapani (2022).
136 | [SpaceX: Gene Co-expression Network Estimation for Spatial
137 | Transcriptomics](https://doi.org/10.1093/bioinformatics/btac645).
138 | Bioinformatics, 38(22): 5033–5041.
139 | 
140 | ## Supplementary file
141 | 
142 | [Supplementary](https://bookdown.org/satwik91/SpaceX_supplementary/)
143 | 
144 | ## Points to note
145 | 
146 | -   Please run the SpaceX package in R 4.1.2.
147 | 
148 | -   Please email at <satwika@umich.edu> for any issues.
149 | 


--------------------------------------------------------------------------------
/SpaceX.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace,vignette
22 | 


--------------------------------------------------------------------------------
/SpaceX_Overview.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayesrx/SpaceX/262a7a55a60ae42b1e966c3ea684d80a6ac4ad04/SpaceX_Overview.jpg


--------------------------------------------------------------------------------
/data/BC_count.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayesrx/SpaceX/262a7a55a60ae42b1e966c3ea684d80a6ac4ad04/data/BC_count.rda


--------------------------------------------------------------------------------
/data/BC_loc.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayesrx/SpaceX/262a7a55a60ae42b1e966c3ea684d80a6ac4ad04/data/BC_loc.rda


--------------------------------------------------------------------------------
/man/SpaceX.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SpaceX.R
 3 | \name{SpaceX}
 4 | \alias{SpaceX}
 5 | \title{Estimation of shared and cluster specific gene co-expression networks for spatial transcriptomics data.}
 6 | \usage{
 7 | SpaceX(
 8 |   Gene_expression_mat,
 9 |   Spatial_locations,
10 |   Cluster_annotations,
11 |   sPMM = FALSE,
12 |   Post_process = FALSE,
13 |   numCore = 1,
14 |   nrun = 10000,
15 |   burn = 5000
16 | )
17 | }
18 | \arguments{
19 | \item{Gene_expression_mat}{Gene expression dataframe (N X G).}
20 | 
21 | \item{Spatial_locations}{Spatial locations with coordinates. This should be provided as dataframe.}
22 | 
23 | \item{Cluster_annotations}{Cluster annotations for each of the spatial location.}
24 | 
25 | \item{sPMM}{If \code{TRUE}, the code will return the estimates of sigma1_sq and sigma2_sq from the spatial Poisson mixed model.}
26 | 
27 | \item{Post_process}{If \code{FALSE}, the code will return the posterior samples of \code{Phi} and \code{Psi^c} (based on definition in equation 1 of the SpaceX paper) only.
28 | Default is \code{TRUE} and the code will return all the posterior samples, shared and cluster specific co-expressions.}
29 | 
30 | \item{numCore}{The number of cores for parallel computing (default = 1).}
31 | 
32 | \item{nrun}{default = 10000}
33 | 
34 | \item{burn}{default = 5000}
35 | }
36 | \value{
37 | \item{Posterior_samples}{Posterior samples}
38 | \item{Shared_network}{Shared co-expression matrix}
39 | \item{Cluster_network}{Cluster specific co-expression matrices}
40 | }
41 | \description{
42 | SpaceX function estimates shared and cluster specific gene co-expression networks for spatial transcriptomics data. Please make sure to provide both inputs as dataframe. More details about the SpaceX algorithm can be found in the reference paper.
43 | }
44 | \examples{
45 | Implementation details and examples can be found at this link https://bookdown.org/satwik91/SpaceX_supplementary/.
46 | 
47 | 
48 | }
49 | \references{
50 | Acharyya S., Zhou X., Baladandayuthapani V. (2021). SpaceX: Gene Co-expression Network Estimation for Spatial Transcriptomics.
51 | }
52 | 


--------------------------------------------------------------------------------
/man/figures/README-pressure-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayesrx/SpaceX/262a7a55a60ae42b1e966c3ea684d80a6ac4ad04/man/figures/README-pressure-1.png


--------------------------------------------------------------------------------
/man/pqlseq_modified.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pqlseq_modified.R
 3 | \name{pqlseq_modified}
 4 | \alias{pqlseq_modified}
 5 | \title{Fit Generalized Linear Mixed Model with Known Kinship Matrices Through Penalized-quasi Likelihood}
 6 | \usage{
 7 | pqlseq_modified(
 8 |   RawCountDataSet,
 9 |   Phenotypes,
10 |   Covariates = NULL,
11 |   RelatednessMatrix = NULL,
12 |   LibSize = NULL,
13 |   fit.model = "PMM",
14 |   fit.method = "AI.REML",
15 |   fit.maxiter = 500,
16 |   fit.tol = 1e-05,
17 |   numCore = 1,
18 |   filtering = TRUE,
19 |   verbose = FALSE,
20 |   ...
21 | )
22 | }
23 | \arguments{
24 | \item{RawCountDataSet}{a data frame containing the read count.}
25 | 
26 | \item{Phenotypes}{a vector containing the predictor of interest.}
27 | 
28 | \item{Covariates}{a data frame containing the covariates subject to adjustment (Default = NULL).}
29 | 
30 | \item{RelatednessMatrix}{a known relationship matrix (e.g. kinship matrix in genetic studies). When supplied with a matrix, this matrix should be a positive semi-definite matrix with dimensions equal to the sample size in count data, and the order of subjects in this matrix should also match the order of subjects in count data. Currently there is no ID checking feature implemented, and it is the user's responsibility to match the orders.}
31 | 
32 | \item{LibSize}{a data frame containing the total read count. For possion mixed model, it will be calculated automatically if users do not provide. For binomial mixed model, it is required.}
33 | 
34 | \item{fit.model}{a description of the error distribution and link function to be used in the model. Either "PMM" for possion model, or "BMM" for binomial model (default = "PMM").}
35 | 
36 | \item{fit.method}{method of fitting the generalized linear mixed model, currently only "REML" version is available.}
37 | 
38 | \item{fit.maxiter}{a positive integer specifying the maximum number of iterations when fitting the generalized linear mixed model (default = 500).}
39 | 
40 | \item{fit.tol}{a positive number specifying tolerance, the difference threshold for parameter estimates below which iterations should be stopped (default = 1e-5).}
41 | 
42 | \item{numCore}{a positive integer specifying the number of cores for parallel computing (default = 1).}
43 | 
44 | \item{filtering}{a logical switch for RNAseq data. By default, for each gene, at least two individuals should have read counts greater than 5. Otherwise, the gene is filtered (default = TRUE).}
45 | 
46 | \item{verbose}{a logical switch for printing detailed information (parameter estimates in each iteration) for testing and debugging purpose (default = FALSE).}
47 | 
48 | \item{...}{additional arguments that could be passed to glm.}
49 | }
50 | \value{
51 | \item{numIDV}{number of individuals with data being analyzed}
52 | \item{beta}{the fixed effect parameter estimate for the predictor of interest.}
53 | \item{se_beta}{the standard deviation of fixed effect.}
54 | \item{pvalue}{P value for the fixed effect, based on the wald test.}
55 | \item{h2}{heritability of the transformed rate.}
56 | \item{sigma2}{total variance component.}
57 | \item{overdisp}{dispersion parameter estimate.}
58 | \item{converged}{a logical indicator for convergence.}
59 | }
60 | \description{
61 | Fit a generalized linear mixed model with a random intercept. The covariance matrix of the random intercept is proportional to a known kinship matrix. This is a modified version of the pqlseq algorithm particularly for the SpaceX package. For more details check the pqlseq function from PQLseq package.
62 | }
63 | \references{
64 | Sun, S., Hood, M., Scott, L., Peng, Q., Mukherjee, S., Tung, J., and Zhou, X. (2017). Differential expression analysis for rnaseq using poisson mixed models. Nucleicacids research, 45(11), e106–e106.
65 | }
66 | 


--------------------------------------------------------------------------------
/src/Makevars:
--------------------------------------------------------------------------------
1 | FC= gfortran
2 | F77= gfortran
3 | CC = gcc
4 | 
5 | SpaceX.o: SpaceX.f95
6 | 	$(FC) -c SpaceX.f95 -o SpaceX.o  -fPIC
7 | 	$(FC) -o SpaceX.so SpaceX.o  -shared  -fopenmp
8 | 
9 | 


--------------------------------------------------------------------------------
/src/Makevars.win:
--------------------------------------------------------------------------------
1 | FC= gfortran
2 | F77= gfortran
3 | CC = gcc
4 | 
5 | SpaceX.o: SpaceX.f95
6 | 	$(FC) -c SpaceX.f95 -o SpaceX.o  -fPIC
7 | 	$(FC) -o SpaceX.dll SpaceX.o  -shared  -fopenmp
8 | 
9 | 


--------------------------------------------------------------------------------
/src/Makevars.win~:
--------------------------------------------------------------------------------
1 | FC= gfortran
2 | F77= gfortran
3 | CC = gcc
4 | 
5 | newSpace2.o: newSpace2.f95
6 | 	$(FC) -c newSpace2.f95 -o newSpace2.o  -fPIC
7 | 	$(FC) -o newSpace2.dll newSpace2.o  -shared  -fopenmp
8 | 
9 | 


--------------------------------------------------------------------------------
/src/SpaceX.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayesrx/SpaceX/262a7a55a60ae42b1e966c3ea684d80a6ac4ad04/src/SpaceX.dll


--------------------------------------------------------------------------------
/src/SpaceX.f95:
--------------------------------------------------------------------------------
  1 | subroutine bigtdsub(n,m,o,x,z,b,s,L)
  2 | 
  3 | 
  4 |   use omp_lib
  5 | 
  6 |   implicit none
  7 |   integer :: i
  8 |   integer :: j
  9 |   integer :: k
 10 |   integer :: n
 11 |   integer :: m
 12 |   integer :: p
 13 |   integer :: o
 14 |   integer :: ii
 15 |   integer :: L
 16 |   
 17 |   real(kind = 4)   :: x(n*m*o)
 18 |   real(kind = 4)  :: z(n*m*o*L) 
 19 |   real  :: b(n*n)
 20 |   real  :: s(n*n*L)
 21 |   real , allocatable :: a(:,:,:)  
 22 |   real , allocatable :: y(:,:,:)
 23 |   real , allocatable :: f(:,:,:,:)
 24 |   real , allocatable :: u(:,:,:,:)
 25 |   real , allocatable :: d(:,:,:,:)      
 26 |   real , allocatable :: c(:,:,:)
 27 |   real , allocatable :: g(:,:,:)
 28 |   real , allocatable :: v(:,:,:)
 29 |   real , allocatable :: h(:,:)
 30 |   real , allocatable :: e(:,:)
 31 |   real , allocatable :: r(:,:)
 32 |   real , allocatable :: t(:,:,:)
 33 |   real , allocatable :: sq(:)
 34 |   real , allocatable :: bsq(:,:)
 35 |   real , allocatable :: w(:,:)
 36 |   real , allocatable :: q(:,:,:)
 37 | 
 38 |   real , parameter :: pi = 3.141592653589793D+00
 39 |   integer :: thread_num
 40 |   real  wtime
 41 | 
 42 | 
 43 |   thread_num = omp_get_max_threads ( )
 44 |   p=L
 45 | 
 46 | 
 47 |   allocate(y(1:n,1:m,1:o))
 48 |   allocate(t(1:m,1:n,1:o))
 49 |   allocate(d(1:n,1:m,1:o,1:p))
 50 |   allocate(c(1:n,1:n,1:o))
 51 |   allocate(sq(1:n))
 52 |   allocate(bsq(1:n,1:n))
 53 |   allocate(w(1:n,1:n))
 54 |   allocate(v(1:n,1:n,1:o))
 55 |   allocate(h(1:n,1:n))
 56 |   allocate(e(1:n,1:n))
 57 |   allocate(r(1:n,1:n))
 58 |   allocate(f(1:m,1:n,1:o,1:p))
 59 |   allocate(u(1:n,1:n,1:o,1:p))
 60 |   allocate(q(1:n,1:n,1:p))  
 61 |   wtime = omp_get_wtime ( )
 62 | 
 63 |       y = reshape(x,(/n,m,o/))
 64 |       d = reshape(z,(/n,m,o,p/))
 65 | 
 66 | 
 67 |       
 68 |   
 69 |   
 70 |   do ii=1,o
 71 |      t(1:m,1:n,ii) = TRANSPOSE(y(1:n,1:m,ii))
 72 |      do j=1,p
 73 |         f(1:m,1:n,ii,j) = TRANSPOSE(d(1:n,1:m,ii,j))
 74 |         enddo
 75 |   enddo
 76 |   
 77 | 
 78 |   
 79 |   !$omp parallel shared ( y,t,c, n, m,v,p,h,e,r,w,u) default(none) 
 80 |   do ii=1,o
 81 | 
 82 |      call dot(y(1:n,1:m,ii),t(1:m,1:n,ii),n,m,c(1:n,1:n,ii))
 83 |      call cov2acor(c(1:n,1:n,ii),n,v(1:n,1:n,ii))
 84 |      do j=1,3
 85 |         call dot(y(1:n,1:m,ii),t(1:m,1:n,ii),n,m,h)
 86 |         call dot(d(1:n,1:m,ii,j),f(1:m,1:n,ii,j),n,m,e)
 87 | 
 88 |         r = h + e
 89 | 
 90 |         call cov2acor(r,n,w)
 91 | 
 92 |         u(1:n,1:n,ii,j)=w
 93 | enddo
 94 | enddo
 95 | 
 96 | 
 97 | call mean1sub(n,n,o,v,w)
 98 |         call mean2sub(n,n,o,L,u,q)
 99 |     
100 | 
101 | !$omp end parallel 
102 | 
103 |   wtime = omp_get_wtime ( ) - wtime
104 |   b = pack(w,.true.)
105 |   s = pack(q,.true.)
106 |   
107 | !
108 | !  Free memory.
109 | !
110 |   deallocate ( y )
111 |   deallocate ( t )
112 | 
113 | !
114 | !  Terminate.
115 | !
116 | 
117 | 
118 | Contains
119 |   Subroutine dot( a, b,n,m,c)
120 |     Real, Dimension(:,:), Intent( In    ) :: a
121 |     Real, Dimension(:,:), Intent(In  ):: b
122 |     Real, Dimension(:,:),Intent(   Out ) :: c
123 |     Integer :: i,p,j,n,m
124 |   !$omp do
125 |   do i = 1, n
126 |     do j = 1, n
127 |       c(i,j) = 0.0
128 |       do k = 1, m
129 |         c(i,j) = c(i,j) + a(i,k) * b(k,j)
130 |       end do
131 |     end do
132 | enddo
133 | 
134 |  !$omp end do
135 | return
136 | End Subroutine dot
137 | 
138 | 
139 |   subroutine cov2acor( a,n,b)
140 |     Real, Dimension(:,:), Intent( In    ) :: a
141 |     Real, Dimension(:,:),Intent(   Out ) :: b
142 |     Real :: sq(n)
143 |     Real :: bsq(n,n)
144 |     Integer :: i,p,j,n,m
145 | 
146 | 
147 |  !$omp do
148 |  do i=1,n
149 |         sq(i)=sqrt((1/a(i,i)))
150 |     bsq(1:n,i) = sq(i)
151 |  enddo
152 |  do i=1,n
153 |     do j=1,n
154 | 
155 |        b(i,j) = sq(i) * a(i,j) * bsq(i,j)
156 |     enddo
157 |  enddo
158 |  
159 |  !$omp end do
160 |  
161 | 
162 | end subroutine cov2acor
163 | 
164 | subroutine mean1sub(n,m,o,a,b)
165 |   implicit none
166 |   integer :: n,m,i,j,k,o
167 |   real(kind = 4) :: f(n*m*o)
168 |   
169 |   real :: b(n,m),sum1
170 |   real :: d(n*m)
171 |   real :: a(n,m,o)
172 | !  a = reshape(f,(/n,m,o/))
173 | 
174 |   
175 | !$omp do  
176 |   do i=1,n
177 |      do j=1,m
178 |         sum1 = 0.0
179 |         do k=1,o
180 |            sum1 = sum1 + a(i,j,k)
181 |         enddo
182 |         b(i,j) = sum1/o
183 |      enddo
184 |   enddo
185 | 
186 |   !$omp end do
187 |   
188 | end subroutine mean1sub
189 | 
190 | subroutine mean2sub(n,m,o,p,a,b)
191 |   implicit none
192 |   integer :: n,m,i,j,k,o,l,p
193 |   real(kind = 4) :: f(n*m*o*p)
194 |   real :: b(n,m,o),sum1
195 |   real :: d(n*m*o)
196 |   real :: a(n,m,o,p)
197 | !  a = reshape(f,(/n,m,o,p/))
198 |   
199 | !$omp do  
200 |   do i=1,n
201 |      do j=1,m
202 |         do k=1,p
203 |            sum1 = 0.0
204 |            do l=1,o
205 |            sum1 = sum1 + a(i,j,l,k)
206 |         enddo
207 |         b(i,j,k) = sum1/o
208 |      enddo
209 |   enddo
210 | enddo
211 | 
212 | !$omp end do
213 | end subroutine mean2sub
214 | 
215 | 
216 | end subroutine bigtdsub
217 | 
218 | 
219 | 
220 | 


--------------------------------------------------------------------------------
/src/SpaceX.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayesrx/SpaceX/262a7a55a60ae42b1e966c3ea684d80a6ac4ad04/src/SpaceX.o


--------------------------------------------------------------------------------
/src/SpaceX.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayesrx/SpaceX/262a7a55a60ae42b1e966c3ea684d80a6ac4ad04/src/SpaceX.so


--------------------------------------------------------------------------------