├── .DS_Store ├── .Rbuildignore ├── .gitignore ├── DESCRIPTION ├── Hub genes ├── BC_Hub_genes_TCGA.csv ├── BC_Hub_genes_edges.xlsx └── Merfish_Hub_genes_edges.xlsx ├── NAMESPACE ├── R ├── SpaceX.R └── pqlseq_modified.R ├── README.Rmd ├── README.md ├── SpaceX.Rproj ├── SpaceX_Overview.jpg ├── data ├── BC_count.rda └── BC_loc.rda ├── man ├── SpaceX.Rd ├── figures │ └── README-pressure-1.png └── pqlseq_modified.Rd └── src ├── Makevars ├── Makevars.win ├── Makevars.win~ ├── SpaceX.dll ├── SpaceX.f95 ├── SpaceX.o └── SpaceX.so /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayesrx/SpaceX/262a7a55a60ae42b1e966c3ea684d80a6ac4ad04/.DS_Store -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^README\.Rmd$ 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: SpaceX 2 | Type: Package 3 | Title: Gene Co-expression Network Estimation for Spatial Transcriptomics 4 | Version: 0.1.0 5 | Author: Satwik Acharyya 6 | Maintainer: Satwik Acharyya 7 | Description: Provides shared and cluster specfic gene co-expression networks 8 | for spatial transcriptomics data. 9 | License: GPL-3 10 | Encoding: UTF-8 11 | LazyData: true 12 | RoxygenNote: 7.2.3 13 | Imports: doParallel, MSFA, foreach 14 | Depends: PQLseq 15 | -------------------------------------------------------------------------------- /Hub genes/BC_Hub_genes_TCGA.csv: -------------------------------------------------------------------------------- 1 | Single_cell_hub_genes,Spatial_shared_hub_genes,Intersecting Hub Genes 2 | SPDEF,COL3A1,FN1 3 | MMP14,FN1,LUM 4 | SERF2,LUM,XBP1 5 | GNAI2,COL1A2,S100A10 6 | GAPDH,POSTN,HNRNPA2B1 7 | MMP11,FSTL1,COL5A1 8 | GLIS2,COL1A1,RPLP0 9 | GADD45GIP1,COL6A3,RPS27 10 | CD63,XBP1,COL6A2 11 | TCEB2,SPARC,IGFBP7 12 | IGFBP7,S100A10,H3F3A 13 | SMG7,COL12A1,MFAP2 14 | RPL29,GNAS,DPYSL3 15 | VIM,MMP11,VIM 16 | MRC2,AEBP1,COL16A1 17 | S100A14,HNRNPA2B1,RPS14 18 | CD81,COL5A1,HTRA1 19 | CIRBP,SELT,CD63 20 | TUBA1B,ASPN,RPL3 21 | MYL9,RPLP0, 22 | ATP5B,STAT1, 23 | HNRNPA2B1,SSR4, 24 | TPI1,RPS11, 25 | XBP1,TMSB4X, 26 | SELM,RPS27, 27 | APOE,COL6A2, 28 | LMNA,VWF, 29 | S100A10,SAA1, 30 | PODXL2,DBI, 31 | CYC1,IGFBP7, 32 | GPAA1,RPL36, 33 | LGALS3,RPLP1, 34 | RPS15,TFF3, 35 | SPNS1,APOD, 36 | MAPK3,B2M, 37 | HTRA1,H3F3A, 38 | CXCL12,TIMP3, 39 | RPS27,MFAP2, 40 | ADAM15,KLF6, 41 | FN1,DCN, 42 | DPYSL3,AZGP1, 43 | FXYD3,FOS, 44 | C1R,DPYSL3, 45 | COL5A1,VIM, 46 | RPLP0,PEG10, 47 | ROMO1,COL16A1, 48 | EEF1A1,ADIRF, 49 | RPL10,RPS18, 50 | RPS14,TGFB1, 51 | COL6A2,MGP, 52 | LRP1,CHCHD2, 53 | TECR,RPS14, 54 | SPINT2,HTRA1, 55 | RPL3,RPS4X, 56 | LUM,CD63, 57 | BSG,RPL30, 58 | MYO1C,SPPL2B, 59 | CD55,RPL3, 60 | RPS9,LRRC15, 61 | COX6B1,, 62 | NDUFA11,, 63 | GGCT,, 64 | COL10A1,, 65 | LDHA,, 66 | COL16A1,, 67 | MAFB,, 68 | NDUFB10,, 69 | MFAP2,, 70 | LGMN,, 71 | LDHB,, 72 | H3F3A,, 73 | MGST3,, 74 | MIF,, 75 | YIF1B,, 76 | BCAP31,, 77 | EIF4G2,, 78 | KRT17,, -------------------------------------------------------------------------------- /Hub genes/BC_Hub_genes_edges.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayesrx/SpaceX/262a7a55a60ae42b1e966c3ea684d80a6ac4ad04/Hub genes/BC_Hub_genes_edges.xlsx -------------------------------------------------------------------------------- /Hub genes/Merfish_Hub_genes_edges.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayesrx/SpaceX/262a7a55a60ae42b1e966c3ea684d80a6ac4ad04/Hub genes/Merfish_Hub_genes_edges.xlsx -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | importFrom("stats", "binomial", "dist", "glm", "model.frame", 2 | "na.omit", "na.pass", "pchisq", "poisson") 3 | importFrom("stats", "model.matrix", "var") 4 | export(SpaceX) 5 | export(pqlseq_modified) 6 | importFrom("doParallel", "registerDoParallel") 7 | importFrom("parallel", "detectCores") 8 | importFrom("foreach", "%dopar%", "foreach") 9 | importFrom("MSFA", "sp_msfa") 10 | useDynLib(SpaceX) 11 | -------------------------------------------------------------------------------- /R/SpaceX.R: -------------------------------------------------------------------------------- 1 | #' @title Estimation of shared and cluster specific gene co-expression networks for spatial transcriptomics data. 2 | #' @title Estimation of shared and cluster specific gene co-expression networks for spatial transcriptomics data. 3 | #' 4 | #' @description SpaceX function estimates shared and cluster specific gene co-expression networks for spatial transcriptomics data. Please make sure to provide both inputs as dataframe. More details about the SpaceX algorithm can be found in the reference paper. 5 | #' 6 | #' @param Gene_expression_mat Gene expression dataframe (N X G). 7 | #' @param Spatial_locations Spatial locations with coordinates. This should be provided as dataframe. 8 | #' @param Cluster_annotations Cluster annotations for each of the spatial location. 9 | #' @param sPMM If \code{TRUE}, the code will return the estimates of sigma1_sq and sigma2_sq from the spatial Poisson mixed model. 10 | #' @param Post_process If \code{FALSE}, the code will return the posterior samples of \code{Phi} and \code{Psi^c} (based on definition in equation 1 of the SpaceX paper) only. 11 | #' Default is \code{TRUE} and the code will return all the posterior samples, shared and cluster specific co-expressions. 12 | #' @param numCore The number of cores for parallel computing (default = 1). 13 | #' @param nrun default = 10000 14 | #' @param burn default = 5000 15 | #' 16 | #' @return 17 | #' \item{Posterior_samples}{Posterior samples} 18 | #' \item{Shared_network}{Shared co-expression matrix} 19 | #' \item{Cluster_network}{Cluster specific co-expression matrices} 20 | #' 21 | #' @references Acharyya S., Zhou X., Baladandayuthapani V. (2021). SpaceX: Gene Co-expression Network Estimation for Spatial Transcriptomics. 22 | #' 23 | #' @examples Implementation details and examples can be found at this link https://bookdown.org/satwik91/SpaceX_supplementary/. 24 | #' 25 | #' 26 | SpaceX <- function(Gene_expression_mat, Spatial_locations, Cluster_annotations, 27 | sPMM=FALSE,Post_process=FALSE,numCore = 1, 28 | nrun=10000,burn=5000){ 29 | 30 | Spatial_loc = as.data.frame(cbind(Spatial_locations,Cluster_annotations)) 31 | 32 | #### Global Parameters ###### 33 | G <-dim(Gene_expression_mat)[2] 34 | L <- length(unique(Spatial_loc[,3])) 35 | Clusters <- unique(Spatial_loc[,3]) 36 | N_l <- numeric() 37 | sigma1_sq_est <- matrix(0,G,L) 38 | sigma2_sq_est <- matrix(0,G,L) 39 | u <-list() 40 | Z_est <- list() ##latent gene expression matrix 41 | 42 | ### Cluster sizes #### 43 | for (l in 1:L) { 44 | pos <- which(Spatial_loc[,3] == Clusters[l]) 45 | N_l[l] <- length(pos) 46 | Z_est[[l]] <- matrix(0,nrow = N_l[l],ncol = G) 47 | } 48 | 49 | 50 | ### Poisson mixed model with PQLSEQ algorithm 51 | for (l in 1:L) { 52 | 53 | pos <- which(Spatial_loc[,3] == Clusters[l]) 54 | 55 | ### Rho estimation ### 56 | a <- dist(Spatial_loc[pos,-3]) 57 | a_max <- log10(2*max(a)) 58 | a_min <- log10(min(a)/2) 59 | a_seq <- seq(a_min, a_max, length.out = 10) 60 | rho_l <- 10^(a_seq[5]) 61 | 62 | Y_mat <- as.matrix(Gene_expression_mat[pos,], rownames.force = F) 63 | colnames(Y_mat) <- NULL 64 | location <- Spatial_loc[pos,-3] 65 | 66 | cov_kernel_l <- matrix(0,N_l[l],N_l[l]) 67 | for (i in 1:N_l[l]) { 68 | for (j in 1:i) { 69 | dist_loc <- (Spatial_loc[i,1] - Spatial_loc[j,1])^2 + (Spatial_loc[i,2] - Spatial_loc[j,2])^2 70 | cov_kernel_l[i,j] <- exp(-dist_loc/(2*rho_l^2)) 71 | cov_kernel_l[j,i] <- cov_kernel_l[i,j] 72 | }} 73 | 74 | print("Spatial Poisson Mixed Model") 75 | fit <- pqlseq_modified(RawCountDataSet=t(Y_mat),Phenotypes= rep(1,N_l[l]), RelatednessMatrix = cov_kernel_l, 76 | fit.model="PMM", numCore = numCore) 77 | 78 | j = 1 + (0:(G-1))*N_l[l] 79 | sigma1_sq_est[,l] <- fit$tau1[j] 80 | sigma2_sq_est[,l] <- fit$tau2[j] 81 | u[[l]] <- matrix(fit$residual, nrow = G, byrow = TRUE) 82 | 83 | ## Estimation of latent gene expression 84 | for (g in 1:G) { 85 | 86 | if(sigma1_sq_est[g,l]< 0.01 || sigma2_sq_est[g,l]< 0.01){ 87 | V <- (sigma1_sq_est[g,l]+0.001)*cov_kernel_l + (sigma2_sq_est[g,l]+0.001)*diag(N_l[l]) 88 | } 89 | else{ 90 | V <- (sigma1_sq_est[g,l])*cov_kernel_l + (sigma2_sq_est[g,l])*diag(N_l[l]) 91 | } 92 | 93 | if(sigma2_sq_est[g,l]==0){ 94 | Z_est[[l]][,g] <- ((sigma2_sq_est[g,l]+0.001)*solve(V))%*%u[[l]][g,] 95 | } 96 | else{ 97 | Z_est[[l]][,g] <- ((sigma2_sq_est[g,l])*solve(V))%*%u[[l]][g,] 98 | } 99 | 100 | } 101 | print(l) 102 | } 103 | 104 | ## Applying multi-study factor model on latent gene expression matrix 105 | print("Multi-Study Factor Model") 106 | fit_MSFA = sp_msfa(Z_est, k = 10, j_s = rep(10,L), trace = FALSE) 107 | 108 | if(Post_process==FALSE){ 109 | AA <- list(Posterior_samples=fit_MSFA) 110 | } 111 | else{ 112 | ## Post processing of the posterior samples 113 | nrun <- nrun - burn 114 | F <- dim(fit_MSFA$Phi)[2] 115 | 116 | SpaceProc <- .Fortran("bigtdsub",n=as.integer(G), 117 | m=as.integer(F),o=as.integer(nrun), 118 | x=as.single(fit_MSFA$Phi), 119 | z=as.single(unlist(fit_MSFA$Lambda)), 120 | b=as.single(rep(0,G*G)),s=as.single(rep(0,G*G*L)),L=as.integer(L)) 121 | 122 | Sh1 <- matrix(SpaceProc$b,nrow=G,ncol=G) 123 | Clus1 <- array(SpaceProc$s,c(G,G,L)) 124 | 125 | AA <- list(Posterior_samples=fit_MSFA,Shared_network=Sh1,Cluster_network=Clus1) 126 | } 127 | 128 | 129 | if(sPMM==FALSE){ 130 | return(AA) 131 | } 132 | else{ 133 | return(c(AA,sigma1_sq_est=sigma1_sq_est,sigma2_sq_est=sigma2_sq_est)) 134 | } 135 | 136 | } 137 | -------------------------------------------------------------------------------- /R/pqlseq_modified.R: -------------------------------------------------------------------------------- 1 | ################################################################################## 2 | ## 3 | ## The pqlseq algorithm is developed in the following paper: 4 | ## Title : Heritability Estimation and Differential Analysis with Generalized Linear Mixed Models in Large-Scale Genomic Sequencing Studies 5 | ## Authors: Shiquan Sun, Jiaqiang Zhu, and Xiang Zhou: Package: PQLseq. 6 | ## This is a modified version of the pqlseq algorithm particularly for the SpaceX package. 7 | ## 8 | ################################################################################## 9 | 10 | #' Fit Generalized Linear Mixed Model with Known Kinship Matrices Through Penalized-quasi Likelihood 11 | #' 12 | #' Fit a generalized linear mixed model with a random intercept. The covariance matrix of the random intercept is proportional to a known kinship matrix. This is a modified version of the pqlseq algorithm particularly for the SpaceX package. For more details check the pqlseq function from PQLseq package. 13 | #' 14 | #' @param RawCountDataSet a data frame containing the read count. 15 | #' @param Phenotypes a vector containing the predictor of interest. 16 | #' @param Covariates a data frame containing the covariates subject to adjustment (Default = NULL). 17 | #' @param RelatednessMatrix a known relationship matrix (e.g. kinship matrix in genetic studies). When supplied with a matrix, this matrix should be a positive semi-definite matrix with dimensions equal to the sample size in count data, and the order of subjects in this matrix should also match the order of subjects in count data. Currently there is no ID checking feature implemented, and it is the user's responsibility to match the orders. 18 | #' @param LibSize a data frame containing the total read count. For possion mixed model, it will be calculated automatically if users do not provide. For binomial mixed model, it is required. 19 | #' @param fit.model a description of the error distribution and link function to be used in the model. Either "PMM" for possion model, or "BMM" for binomial model (default = "PMM"). 20 | #' @param fit.method method of fitting the generalized linear mixed model, currently only "REML" version is available. 21 | #' @param fit.maxiter a positive integer specifying the maximum number of iterations when fitting the generalized linear mixed model (default = 500). 22 | #' @param fit.tol a positive number specifying tolerance, the difference threshold for parameter estimates below which iterations should be stopped (default = 1e-5). 23 | #' @param numCore a positive integer specifying the number of cores for parallel computing (default = 1). 24 | #' @param filtering a logical switch for RNAseq data. By default, for each gene, at least two individuals should have read counts greater than 5. Otherwise, the gene is filtered (default = TRUE). 25 | #' @param verbose a logical switch for printing detailed information (parameter estimates in each iteration) for testing and debugging purpose (default = FALSE). 26 | #' @param ... additional arguments that could be passed to glm. 27 | #' 28 | #' @return 29 | #' \item{numIDV}{number of individuals with data being analyzed} 30 | #' \item{beta}{the fixed effect parameter estimate for the predictor of interest.} 31 | #' \item{se_beta}{the standard deviation of fixed effect.} 32 | #' \item{pvalue}{P value for the fixed effect, based on the wald test.} 33 | #' \item{h2}{heritability of the transformed rate.} 34 | #' \item{sigma2}{total variance component.} 35 | #' \item{overdisp}{dispersion parameter estimate.} 36 | #' \item{converged}{a logical indicator for convergence.} 37 | #' 38 | #' @references Sun, S., Hood, M., Scott, L., Peng, Q., Mukherjee, S., Tung, J., and Zhou, X. (2017). Differential expression analysis for rnaseq using poisson mixed models. Nucleicacids research, 45(11), e106–e106. 39 | #' 40 | 41 | pqlseq_modified <- function(RawCountDataSet, Phenotypes, Covariates=NULL, RelatednessMatrix=NULL, LibSize=NULL, 42 | fit.model="PMM", fit.method = "AI.REML", fit.maxiter=500, fit.tol=1e-5, numCore=1, 43 | filtering=TRUE, verbose=FALSE, ...) { 44 | # specify the number of cores we want to use 45 | if(numCore > 1){ 46 | if(numCore>detectCores()){warning("PQLseq:: the number of cores you're setting is larger than detected cores!");numCore = detectCores()-1} 47 | } 48 | 49 | registerDoParallel(numCore) 50 | 51 | # cl <- makeCluster(numCore) 52 | # registerDoParallel(cl,cores=numCore) 53 | # on.exit(stopCluster(cl)) 54 | 55 | # filtering genes/sites 56 | #if (filtering & fit.model == "PMM"){ 57 | # unfilterIdx <- apply(RawCountDataSet, 1, function(x){length(x[x>5])>=2} ) 58 | # CountData <- RawCountDataSet[unfilterIdx,] 59 | #}else{ 60 | # CountData <- RawCountDataSet 61 | #} 62 | CountData <- RawCountDataSet 63 | rm(RawCountDataSet) 64 | 65 | numVar <- dim(CountData)[1] 66 | numIDV <- dim(CountData)[2] 67 | 68 | # remove the intercept 69 | if(length(unique(Covariates[,1])) == 1){ 70 | Covariates<- Covariates[,-1] 71 | } 72 | 73 | if(is.null(Covariates)){ 74 | numCov <- 0 75 | }else{ 76 | numCov <- dim(Covariates)[2] 77 | Covariates <- as.matrix(Covariates) 78 | } 79 | 80 | cat(paste("## number of total individuals: ", numIDV,"\n")) 81 | cat(paste("## number of total genes/sites: ", numVar,"\n")) 82 | cat(paste("## number of adjusted covariates: ", numCov,"\n")) 83 | 84 | 85 | CountData <- as.matrix(CountData) 86 | Phenotypes <- as.matrix(Phenotypes) 87 | 88 | 89 | # if(is.null(RelatednessMatrix)){ 90 | # stop("PQLseq::please input relatedness matrix!") 91 | # }else{ 92 | # RelatednessMatrix <- as.matrix(RelatednessMatrix) 93 | # scalerM <- diag(numIDV)-(rep(1,numIDV)%*%t(rep(1,numIDV)))/numIDV 94 | # eig <- eigen(RelatednessMatrix) 95 | # eigval <- eig$value 96 | # eigvector <- eig$vectors 97 | # if(any(eigval<1e-10)){ 98 | # warning("PQLseq::the relatedness matrix is singular, it has been modified!") 99 | # RelatednessMatrix <- as.matrix(nearPD(RelatednessMatrix,corr=T)$mat) 100 | # } 101 | # rm(scalerM) 102 | # rm(eig) 103 | # rm(eigval) 104 | # rm(eigvector) 105 | # } 106 | 107 | RelatednessMatrix <- list(RelatednessMatrix, diag(numIDV)) 108 | 109 | #***********************************# 110 | # Poisson Mixed Model # 111 | #***********************************# 112 | if(fit.model == "PMM"){ 113 | cat("# fitting Poisson mixed model ... \n") 114 | if(is.null(LibSize)){ 115 | LibSize <- apply(CountData, 2, sum) 116 | LibSize <- as.matrix(LibSize) 117 | }else{ 118 | LibSize <- as.matrix(t(LibSize)) 119 | } 120 | 121 | 122 | # do parallel using foreach function 123 | iVar <- NULL 124 | resPMM <-foreach(iVar=1:numVar,.combine=rbind)%dopar%{ 125 | numAnalysis <- beta <- tau1 <- tau2 <- se_beta <- pvalue <- converged <- h2 <- sigma2 <- overdisp <- NA 126 | if(numCov==0){ 127 | model0 <- try(glm(formula = CountData[iVar,]~1 + offset(log(LibSize)), family = poisson(link="log"))) 128 | idx <- match(rownames(model.frame(formula = CountData[iVar,]~1 + offset(log(LibSize)), na.action = na.omit)), 129 | rownames(model.frame(formula = CountData[iVar,]~1 + offset(log(LibSize)), na.action = na.pass))) 130 | }else{ 131 | model0 <- try(glm(formula = CountData[iVar,]~Covariates + Phenotypes + offset(log(LibSize)), family = poisson(link="log"))) 132 | idx <- match(rownames(model.frame(formula = CountData[iVar,]~Covariates + Phenotypes + offset(log(LibSize)), na.action = na.omit)), 133 | rownames(model.frame(formula = CountData[iVar,]~Covariates + Phenotypes + offset(log(LibSize)), na.action = na.pass))) 134 | } 135 | 136 | if(verbose) {cat(paste("NO. Gene = ",iVar,"\n"))} 137 | 138 | tmpRelatednessMatrix <- RelatednessMatrix 139 | if(class(tmpRelatednessMatrix) == "matrix") { 140 | tmpRelatednessMatrix <- tmpRelatednessMatrix[idx, idx] 141 | }else { 142 | for(ik in seq_len(length(tmpRelatednessMatrix)) ) {tmpRelatednessMatrix[[ik]] <- tmpRelatednessMatrix[[ik]][idx, idx]} 143 | } 144 | 145 | names(tmpRelatednessMatrix) <- paste("kins", 1:length(tmpRelatednessMatrix), sep="") 146 | 147 | if(class(model0)[1]!="try-error"){ 148 | # t1 <- system.time(model1 <- try(PQLseq.fit(model0, tmpRelatednessMatrix))) 149 | model1 <- try(PQLseq.fit(model0, tmpRelatednessMatrix)) 150 | }else{ 151 | model1 <- NULL 152 | } 153 | 154 | if(!is.null(model1)&(class(model1)!="try-error")){ 155 | if(verbose){cat(paste("PQLseq::PMM::tau = ", model1$theta,"\n"))} 156 | numAnalysis <- length(idx) 157 | beta <- model1$coefficients[length(model1$coefficients)] 158 | alpha <- model1$coefficients[1] 159 | se_beta <- sqrt(diag(model1$cov)[length(model1$coefficients)] ) 160 | pvalue <- pchisq( (beta/se_beta)^2, 1, lower.tail = F) 161 | sigma2 <- model1$theta[2]+model1$theta[3] 162 | h2 <- model1$theta[2]/(sigma2) 163 | tau1 <- model1$theta[2] 164 | tau2 <- model1$theta[3] 165 | residual <- model1$residuals 166 | fitted_values <- model1$fitted.values 167 | converged <- model1$converged 168 | }else{converged <- FALSE} 169 | 170 | res <- data.frame(numIDV = numAnalysis, beta = beta, alpha=alpha, se_beta = se_beta, 171 | pvalue = pvalue, h2 = h2, sigma2 = sigma2,tau1=tau1,tau2=tau2, 172 | fitted_values = fitted_values, 173 | residual = residual, converged = converged) 174 | }# end for iVar, parallel 175 | rm(iVar) 176 | closeAllConnections() 177 | # if(nrow(showConnections())!=0){closeAllConnections()} 178 | 179 | rownames(resPMM) <- rownames(CountData) 180 | return(resPMM) 181 | }# end PMM 182 | #***********************************# 183 | # Binomial Mixed Model # 184 | #***********************************# 185 | if(fit.model == "BMM"){ 186 | cat("# fitting binomial mixed model ... \n") 187 | if(is.null(LibSize)){ 188 | stop("PQLseq::BMM::ERROR: please input the LibSize (total counts) file!!") 189 | }else{ 190 | LibSize <- as.matrix(LibSize) 191 | } 192 | 193 | ratio <- CountData/LibSize 194 | ratio[is.na(ratio)] <- 0 195 | flag <- ratio>1.0 196 | sumflag <- apply(flag,1, sum) 197 | idx <- which(sumflag>0) 198 | 199 | if (length(idx)>0){ 200 | CountData <- CountData[-idx,] 201 | LibSize <- LibSize[-idx,] 202 | }else{ 203 | CountData <- CountData 204 | LibSize <- LibSize 205 | } 206 | 207 | numVar <- dim(CountData)[1] 208 | numIDV <- dim(CountData)[2] 209 | iVar <- NULL 210 | 211 | # do parallel 212 | resBMM <- foreach(iVar=1:numVar,.combine=rbind)%dopar%{ 213 | numAnalysis <- beta <- tau1 <- tau2 <- se_beta <- pvalue <- converged <- h2 <- sigma2 <- overdisp <- NA 214 | if(verbose){cat(paste("NO. Gene/Site = ",iVar,"\n"))} 215 | if(sum(dim(LibSize)==dim(CountData)) != 2){ 216 | stop("PQLseq::BMM::ERROR: the dimensions of read counts and total read counts do not match!") 217 | } 218 | 219 | LibSize <- as.matrix(LibSize) 220 | 221 | if(numCov == 0){ 222 | model0 <- glm(formula = CountData[iVar,]/LibSize[iVar,]~Phenotypes, family = binomial(link = "logit"), weights = LibSize[iVar,]) 223 | idx <- match(rownames(model.frame(formula = CountData[iVar,]/LibSize[iVar,]~Phenotypes, na.action = na.omit)), 224 | rownames(model.frame(formula = CountData[iVar,]/LibSize[iVar,]~Phenotypes, na.action = na.pass))) 225 | }else{ 226 | model0 <- glm(formula = CountData[iVar,]/LibSize[iVar,]~Covariates + Phenotypes, family = binomial(link = "logit"), weights = LibSize[iVar,] ) 227 | idx <- match(rownames(model.frame(formula = CountData[iVar,]/LibSize[iVar,]~Covariates + Phenotypes, na.action = na.omit)), 228 | rownames(model.frame(formula = CountData[iVar,]/LibSize[iVar,]~Covariates + Phenotypes, na.action = na.pass))) 229 | } 230 | 231 | model0$numTotal <- LibSize[iVar,idx] 232 | model0$numSucc <- CountData[iVar,idx] 233 | 234 | redflag <- FALSE 235 | for( ierr in c(2:dim(model.matrix(model0))[2])){ 236 | if(length(unique(model.matrix(model0)[,ierr])) == 1){ 237 | warning(paste("PQLseq::BMM::the ",ierr-1,"-th column of covariates are the same for gene/site ",rownames(CountData)[iVar],"!",sep = "") ) 238 | redflag <- TRUE 239 | } 240 | } 241 | if(!redflag){ 242 | 243 | tmpRelatednessMatrix <- RelatednessMatrix 244 | if(class(tmpRelatednessMatrix) == "matrix") { 245 | tmpRelatednessMatrix <- tmpRelatednessMatrix[idx, idx] 246 | }else { 247 | for(ik in seq_len(length(tmpRelatednessMatrix)) ) { 248 | tmpRelatednessMatrix[[ik]] <- tmpRelatednessMatrix[[ik]][idx, idx] 249 | } 250 | } 251 | names(tmpRelatednessMatrix) <- paste("kins", 1:length(tmpRelatednessMatrix), sep="") 252 | 253 | # t1 <- system.time(model1 <- try( PQLseq.fit(model0, tmpRelatednessMatrix) )) 254 | model1 <- try(PQLseq.fit(model0, tmpRelatednessMatrix)) 255 | 256 | if(class(model1) != "try-error"&!is.null(model1)){ 257 | if(verbose){cat(paste("PQLseq::BMM::tau = ", model1$theta,"\n"))} 258 | numAnalysis <- length(idx) 259 | beta <- model1$coefficients[ length(model1$coefficients) ]# the last one 260 | se_beta <- sqrt( diag(model1$cov)[ length(model1$coefficients) ] ) 261 | pvalue <- pchisq( (beta/se_beta)^2, 1, lower.tail = F) 262 | sigma2 <- model1$theta[2]+model1$theta[3] 263 | h2 <- model1$theta[2]/(sigma2) 264 | tau1 <- model1$theta[2] 265 | tau2 <- model1$theta[3] 266 | converged <- model1$converged 267 | }else{converged <- FALSE} 268 | 269 | res <- data.frame(numIDV = numAnalysis, beta = beta, se_beta = se_beta, 270 | pvalue = pvalue, h2 = h2, sigma2 = sigma2, 271 | converged = converged) 272 | }# end for iVar, parallel 273 | 274 | } 275 | rm(iVar) 276 | 277 | # if(nrow(showConnections())!=0){closeAllConnections()} 278 | closeAllConnections() 279 | rownames(resBMM) <- rownames(CountData) 280 | return(resBMM) 281 | }# end BMM 282 | 283 | }# end function PQLseq 284 | 285 | 286 | ########################################################## 287 | # PQLseq FIT FUNCTION # 288 | ########################################################## 289 | 290 | PQLseq.fit <- function(model0, RelatednessMatrix, method = "REML", method.optim = "AI", maxiter = 500, tol = 1e-5, verbose = FALSE) { 291 | 292 | names(RelatednessMatrix) <- paste("kins", 1:length(RelatednessMatrix), sep="") 293 | # if((method.optim == "AI")&(!sum(model0$fitted.values<1e-5))) { 294 | if(method.optim == "AI") { 295 | fixtau.old <- rep(0, length(RelatednessMatrix)+1) 296 | # to use average information method to fit alternative model 297 | model1 <- PQLseq.AI(model0, RelatednessMatrix, maxiter = maxiter, tol = tol, verbose = verbose) 298 | fixtau.new <- 1*(model1$theta < 1.01 * tol) 299 | 300 | while(any(fixtau.new != fixtau.old)) { 301 | fixtau.old <- fixtau.new 302 | model1 <- PQLseq.AI(model0, RelatednessMatrix, fixtau = fixtau.old, maxiter = maxiter, tol = tol, verbose = verbose) 303 | fixtau.new <- 1*(model1$theta < 1.01 * tol) 304 | } 305 | }else{ 306 | model1 <- NULL 307 | } 308 | return(model1) 309 | } 310 | 311 | ########################################################## 312 | # PQLseq FIT AVERAGE INFORMATION FUNCTION # 313 | ########################################################## 314 | 315 | PQLseq.AI <- function(model0, RelatednessMatrix, tau = rep(0, length(RelatednessMatrix)+1), fixtau = rep(0, length(RelatednessMatrix)+1), maxiter = 500, tol = 1e-5, verbose = FALSE) { 316 | 317 | if(model0$family$family %in% c("binomial")){ 318 | y <- model0$numSucc 319 | }else{ 320 | y <- model0$y 321 | } 322 | numIDV <- length(y) 323 | offset <- model0$offset 324 | if(is.null(offset)) {offset <- rep(0, numIDV)} 325 | 326 | family <- model0$family 327 | eta <- model0$linear.predictors 328 | mu <- model0$fitted.values 329 | mu.eta <- family$mu.eta(eta) 330 | D <- mu.eta/sqrt(model0$family$variance(mu)) 331 | 332 | if(family$family %in% c("binomial")){ 333 | mu.eta <- model0$numTotal*mu.eta 334 | D <- mu.eta/sqrt(model0$numTotal*model0$family$variance(mu)) 335 | mu <- model0$numTotal*mu 336 | } 337 | 338 | Y <- eta - offset + (y - mu)/mu.eta 339 | X <- model.matrix(model0) 340 | alpha <- model0$coef 341 | 342 | if(family$family %in% c("poisson", "binomial")) { 343 | tau[1] <- 1 344 | fixtau[1] <- 1 345 | } 346 | numK <- length(RelatednessMatrix) 347 | idxtau <- which(fixtau == 0) 348 | numK2 <- sum(fixtau == 0) 349 | 350 | ### this part needs to be changed for intercept only model same as spark (Satwik) 351 | if(numK2 > 0) { 352 | tau[fixtau == 0] <- rep(min(0.9,var(Y)/(numK+1)), numK2) 353 | 354 | H <- tau[1]*diag(1/D^2) 355 | for(ik in 1:numK) {H <- H + tau[ik+1]*RelatednessMatrix[[ik]]} 356 | 357 | Hinv <- chol2inv(chol(H)) 358 | HinvX <- crossprod(Hinv, X) 359 | XHinvX <- crossprod(X, HinvX) 360 | 361 | P <- try(Hinv - tcrossprod(tcrossprod(HinvX, chol2inv(chol( XHinvX ))), HinvX)) 362 | 363 | if(class(P) == "try-error"){ 364 | stop("Error in P matrix calculation!") 365 | } 366 | 367 | PY <- crossprod(P, Y) 368 | tau0 <- tau 369 | for(ik in 1:numK2) { 370 | if(ik == 1 && fixtau[1] == 0) tau[1] <- max(0, tau0[1] + tau0[1]^2 * (sum((PY/D)^2) - sum(diag(P)/D^2))/numIDV) 371 | else { 372 | PAPY <- crossprod(P, crossprod(RelatednessMatrix[[idxtau[ik]-1]], PY)) 373 | tau[idxtau[ik]] <- max(0, tau0[idxtau[ik]] + tau0[idxtau[ik]]^2 * (crossprod(Y, PAPY) - sum(P*RelatednessMatrix[[idxtau[ik]-1]]))/numIDV) 374 | } 375 | } 376 | } 377 | 378 | for (iter in seq_len(maxiter)) { 379 | alpha0 <- alpha 380 | tau0 <- tau 381 | model1 <- AI(Y, X, length(RelatednessMatrix), RelatednessMatrix, D^2, tau, fixtau, tol) 382 | 383 | tau <- as.numeric(model1$tau) 384 | cov <- as.matrix(model1$cov) 385 | alpha <- as.numeric(model1$alpha) 386 | eta <- as.numeric(model1$eta) + offset 387 | 388 | 389 | mu <- family$linkinv(eta) 390 | mu.eta <- family$mu.eta(eta) 391 | D <- mu.eta/sqrt(family$variance(mu)) 392 | 393 | if(family$family %in% c("binomial")){ 394 | mu.eta <- model0$numTotal*mu.eta 395 | D <- mu.eta/sqrt(model0$numTotal*family$variance(mu)) 396 | mu <- model0$numTotal*mu 397 | } 398 | 399 | Y <- eta - offset + (y - mu)/mu.eta 400 | 401 | if(2*max(abs(alpha - alpha0)/(abs(alpha) + abs(alpha0) + tol), abs(tau - tau0)/(abs(tau) + abs(tau0) + tol)) < tol) {break} 402 | if(max(tau) > tol^(-2)|any(is.infinite(D))|any(is.infinite(mu))|any(is.infinite(eta)) ) { 403 | 404 | iter <- maxiter 405 | break 406 | } 407 | } 408 | 409 | converged <- ifelse(iter < maxiter, TRUE, FALSE) 410 | res <- y - mu 411 | P <- model1$P 412 | return(list(theta = tau, coefficients = alpha, linear.predictors = eta, fitted.values = mu, Y = Y, P = P, residuals = res, cov = cov, converged = converged)) 413 | }# end function 414 | 415 | 416 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand 417 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 418 | 419 | AI <- function(Yin, Xin, numKin, Phiin, Din, tauin, fixtauin, tolin) { 420 | .Call('_PQLseq_AI', PACKAGE = 'PQLseq', Yin, Xin, numKin, Phiin, Din, tauin, fixtauin, tolin) 421 | } 422 | 423 | rcpparma_hello_world <- function() { 424 | .Call('_PQLseq_rcpparma_hello_world', PACKAGE = 'PQLseq') 425 | } 426 | 427 | rcpparma_outerproduct <- function(x) { 428 | .Call('_PQLseq_rcpparma_outerproduct', PACKAGE = 'PQLseq', x) 429 | } 430 | 431 | rcpparma_innerproduct <- function(x) { 432 | .Call('_PQLseq_rcpparma_innerproduct', PACKAGE = 'PQLseq', x) 433 | } 434 | 435 | rcpparma_bothproducts <- function(x) { 436 | .Call('_PQLseq_rcpparma_bothproducts', PACKAGE = 'PQLseq', x) 437 | } 438 | 439 | 440 | ######################################### 441 | # CODE END # 442 | ######################################### 443 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, include = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "man/figures/README-", 12 | out.width = "100%" 13 | ) 14 | ``` 15 | 16 | # SpaceX overview 17 | 18 | 19 | 20 | 21 | ![SpaceXpipeline](SpaceX_Overview.jpg) The SpaceX ([spa]{.underline}tially dependent gene [c]{.underline}o-[ex]{.underline}pression network) is a Bayesian methodology to identify both shared and cluster-specific co-expression network across genes. These clusters can be cell type specific or based on spatial regions. SpaceX uses an over-dispersed spatial Poisson model coupled with a high-dimensional factor model which is based on a dimension reduction technique for computational efficiency. 22 | 23 | The Figure above shows the overall conceptual flow of our pipeline. **Panel A** is an image of a tissue section from the region of interest. **Panel B** shows spatial gene expression and biomarkers which are recorded from that tissue section with the help of sequencing techniques. **Panel C** is the resulting data matrix of gene expression along with spatial locations and cluster annotations on the tissue. All these serve as input for the SpaceX model to obtain the shared (**Panel D**) and cluster-specific co-expression networks (**Panel E**). Finally, we use these networks for downstream analysis to detect gene modules and hub genes across spatial regions (**Panel F** & **Panel G** respectively) for biological interpretation. 24 | 25 | 26 | 27 | ## Installation 28 | This package requires a Fortran compiler in order to work. Here are the instructions: 29 | 30 | + Windows: install the Rtools package that is appropriate for your version of R 31 | 32 | + Mac: Go to this website and follow the instructions: (https://mac.R-project.org/tools/) 33 | 34 | + Linux: From a terminal, do the following: `sudo apt install gcc`. That will bring in multiple compilers. 35 | 36 | The package requires a dependency that is not available on CRAN. Install it with: 37 | 38 | ``` r 39 | remotes::install_github("rdevito/MSFA") 40 | ``` 41 | 42 | You can install the released version of SpaceX from (https://github.com/SatwikAch/SpaceX) with: 43 | 44 | ``` r 45 | devtools::install_github("SatwikAch/SpaceX") 46 | ``` 47 | 48 | ```{r} 49 | library(SpaceX) 50 | ``` 51 | 52 | ## SpaceX function 53 | ### Inputs 54 | 55 | The first input is **Gene_expression_mat** which is $N \times G$ dataframe. Here $N$ denotes the number of spatial locations and $G$ denotes number of genes. 56 | 57 | The second input is **Spatial_locations** is a dataframe which contains spatial coordinates. 58 | 59 | The third input is **Cluster_annotations**. 60 | 61 | The fourth input is **sPMM**. If TRUE, the code will return the estimates of sigma1_sq and sigma2_sq from the spatial Poisson mixed model. 62 | 63 | The fifth input is **Post_process**. If FALSE, the code will return the posterior samples of $\Phi$ and $\Psi^c$ (based on definition in equation 1 of the SpaceX paper) only. Default is TRUE and the code will return all the posterior samples, shared and cluster specific co-expressions. 64 | 65 | The final input is **numCore**. The number of requested cores for parallel computing and default is set to be 1. 66 | 67 | ### Output 68 | You will obtain a list of objects as output. 69 | 70 | **Posterior_samples** contains all the posterior samples. 71 | 72 | **Shared_network** provides the shared co-expression matrix (transformed correlation matrix of $G_{s} = \Phi \Phi^{T}$). 73 | 74 | **Cluster_network** provides the cluster specific co-expression matrices (transformed correlation matrices of $G_{c} = \Phi \Phi^{T} + \Psi^{c} {\Psi^{c^{T}}}$). 75 | 76 | 77 | 78 | ## Example 79 | An example code with the breast cancer data to demonstrate how to run the SpaceX function and obtain shared and cluster specific networks. 80 | ```{r , eval=FALSE} 81 | ## Reading the Breast cancer data 82 | 83 | ## Spatial locations 84 | head(BC_loc) 85 | 86 | ## Gene expression for data 87 | head(BC_count) 88 | 89 | ## Data processing 90 | G <-dim(BC_count)[2] ## number of genes 91 | N <-dim(BC_count)[1] ## number of locations 92 | 93 | ## Application to SpaceX algorithm (Please make sure to request for large enough memory to work with the posterior samples) 94 | BC_fit <- SpaceX(BC_count,BC_loc[,1:2],BC_loc[,3],sPMM=FALSE,Post_process = TRUE,numCore = 2) 95 | 96 | ## Shared_network :: Shared co-expression matrix 97 | ## Cluster_network :: Cluster specific co-expression matrices 98 | 99 | ``` 100 | 101 | 102 | ## Tutorial website 103 | The tutorial website can be found [here](https://satwikach.github.io/SpaceX.github.io/). 104 | 105 | ## Paper 106 | Satwik Acharyya, Xiang Zhou and Veerabhadran Baladandayuthapani (2022). [SpaceX: Gene Co-expression Network Estimation for Spatial Transcriptomics](https://doi.org/10.1093/bioinformatics/btac645). Bioinformatics, 38(22): 5033–5041. 107 | 108 | ## Supplementary file 109 | [Supplementary](https://bookdown.org/satwik91/SpaceX_supplementary/) 110 | 111 | ## Points to note 112 | + Please run the SpaceX package in R 4.1.2. 113 | 114 | + Please email at satwika@umich.edu for any issues. 115 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # SpaceX overview 5 | 6 | 7 | 8 | 9 | ![SpaceXpipeline](SpaceX_Overview.jpg) The SpaceX (spatially 10 | dependent gene co-expression network) is a Bayesian 11 | methodology to identify both shared and cluster-specific co-expression 12 | network across genes. These clusters can be cell type specific or based 13 | on spatial regions. SpaceX uses an over-dispersed spatial Poisson model 14 | coupled with a high-dimensional factor model which is based on a 15 | dimension reduction technique for computational efficiency. 16 | 17 | The Figure above shows the overall conceptual flow of our pipeline. 18 | **Panel A** is an image of a tissue section from the region of interest. 19 | **Panel B** shows spatial gene expression and biomarkers which are 20 | recorded from that tissue section with the help of sequencing 21 | techniques. **Panel C** is the resulting data matrix of gene expression 22 | along with spatial locations and cluster annotations on the tissue. All 23 | these serve as input for the SpaceX model to obtain the shared (**Panel 24 | D**) and cluster-specific co-expression networks (**Panel E**). Finally, 25 | we use these networks for downstream analysis to detect gene modules and 26 | hub genes across spatial regions (**Panel F** & **Panel G** 27 | respectively) for biological interpretation. 28 | 29 | 30 | 31 | ## Installation 32 | 33 | This package requires a Fortran compiler in order to work. Here are the 34 | instructions: 35 | 36 | - Windows: install the Rtools package that is appropriate for your 37 | version of R 38 | 39 | - Mac: Go to this website and follow the instructions: 40 | () 41 | 42 | - Linux: From a terminal, do the following: `sudo apt install gcc`. 43 | That will bring in multiple compilers. 44 | 45 | The package requires a dependency that is not available on CRAN. Install 46 | it with: 47 | 48 | ``` r 49 | remotes::install_github("rdevito/MSFA") 50 | ``` 51 | 52 | You can install the released version of SpaceX from 53 | () with: 54 | 55 | ``` r 56 | devtools::install_github("SatwikAch/SpaceX") 57 | ``` 58 | 59 | ``` r 60 | library(SpaceX) 61 | #> Loading required package: PQLseq 62 | ``` 63 | 64 | ## SpaceX function 65 | 66 | ### Inputs 67 | 68 | The first input is **Gene\_expression\_mat** which is $N \times G$ 69 | dataframe. Here $N$ denotes the number of spatial locations and $G$ 70 | denotes number of genes. 71 | 72 | The second input is **Spatial\_locations** is a dataframe which contains 73 | spatial coordinates. 74 | 75 | The third input is **Cluster\_annotations**. 76 | 77 | The fourth input is **sPMM**. If TRUE, the code will return the 78 | estimates of sigma1\_sq and sigma2\_sq from the spatial Poisson mixed 79 | model. 80 | 81 | The fifth input is **Post\_process**. If FALSE, the code will return the 82 | posterior samples of $\Phi$ and $\Psi^c$ (based on definition in 83 | equation 1 of the SpaceX paper) only. Default is TRUE and the code will 84 | return all the posterior samples, shared and cluster specific 85 | co-expressions. 86 | 87 | The final input is **numCore**. The number of requested cores for 88 | parallel computing and default is set to be 1. 89 | 90 | ### Output 91 | 92 | You will obtain a list of objects as output. 93 | 94 | **Posterior\_samples** contains all the posterior samples. 95 | 96 | **Shared\_network** provides the shared co-expression matrix 97 | (transformed correlation matrix of $G_{s} = \Phi \Phi^{T}$). 98 | 99 | **Cluster\_network** provides the cluster specific co-expression 100 | matrices (transformed correlation matrices of 101 | $G_{c} = \Phi \Phi^{T} + \Psi^{c} {\Psi^{c^{T}}}$). 102 | 103 | ## Example 104 | 105 | An example code with the breast cancer data to demonstrate how to run 106 | the SpaceX function and obtain shared and cluster specific networks. 107 | 108 | ``` r 109 | ## Reading the Breast cancer data 110 | 111 | ## Spatial locations 112 | head(BC_loc) 113 | 114 | ## Gene expression for data 115 | head(BC_count) 116 | 117 | ## Data processing 118 | G <-dim(BC_count)[2] ## number of genes 119 | N <-dim(BC_count)[1] ## number of locations 120 | 121 | ## Application to SpaceX algorithm (Please make sure to request for large enough memory to work with the posterior samples) 122 | BC_fit <- SpaceX(BC_count,BC_loc[,1:2],BC_loc[,3],sPMM=FALSE,Post_process = TRUE,numCore = 2) 123 | 124 | ## Shared_network :: Shared co-expression matrix 125 | ## Cluster_network :: Cluster specific co-expression matrices 126 | ``` 127 | 128 | ## Tutorial website 129 | 130 | The tutorial website can be found 131 | [here](https://satwikach.github.io/SpaceX.github.io/). 132 | 133 | ## Paper 134 | 135 | Satwik Acharyya, Xiang Zhou and Veerabhadran Baladandayuthapani (2022). 136 | [SpaceX: Gene Co-expression Network Estimation for Spatial 137 | Transcriptomics](https://doi.org/10.1093/bioinformatics/btac645). 138 | Bioinformatics, 38(22): 5033–5041. 139 | 140 | ## Supplementary file 141 | 142 | [Supplementary](https://bookdown.org/satwik91/SpaceX_supplementary/) 143 | 144 | ## Points to note 145 | 146 | - Please run the SpaceX package in R 4.1.2. 147 | 148 | - Please email at for any issues. 149 | -------------------------------------------------------------------------------- /SpaceX.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace,vignette 22 | -------------------------------------------------------------------------------- /SpaceX_Overview.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayesrx/SpaceX/262a7a55a60ae42b1e966c3ea684d80a6ac4ad04/SpaceX_Overview.jpg -------------------------------------------------------------------------------- /data/BC_count.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayesrx/SpaceX/262a7a55a60ae42b1e966c3ea684d80a6ac4ad04/data/BC_count.rda -------------------------------------------------------------------------------- /data/BC_loc.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayesrx/SpaceX/262a7a55a60ae42b1e966c3ea684d80a6ac4ad04/data/BC_loc.rda -------------------------------------------------------------------------------- /man/SpaceX.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SpaceX.R 3 | \name{SpaceX} 4 | \alias{SpaceX} 5 | \title{Estimation of shared and cluster specific gene co-expression networks for spatial transcriptomics data.} 6 | \usage{ 7 | SpaceX( 8 | Gene_expression_mat, 9 | Spatial_locations, 10 | Cluster_annotations, 11 | sPMM = FALSE, 12 | Post_process = FALSE, 13 | numCore = 1, 14 | nrun = 10000, 15 | burn = 5000 16 | ) 17 | } 18 | \arguments{ 19 | \item{Gene_expression_mat}{Gene expression dataframe (N X G).} 20 | 21 | \item{Spatial_locations}{Spatial locations with coordinates. This should be provided as dataframe.} 22 | 23 | \item{Cluster_annotations}{Cluster annotations for each of the spatial location.} 24 | 25 | \item{sPMM}{If \code{TRUE}, the code will return the estimates of sigma1_sq and sigma2_sq from the spatial Poisson mixed model.} 26 | 27 | \item{Post_process}{If \code{FALSE}, the code will return the posterior samples of \code{Phi} and \code{Psi^c} (based on definition in equation 1 of the SpaceX paper) only. 28 | Default is \code{TRUE} and the code will return all the posterior samples, shared and cluster specific co-expressions.} 29 | 30 | \item{numCore}{The number of cores for parallel computing (default = 1).} 31 | 32 | \item{nrun}{default = 10000} 33 | 34 | \item{burn}{default = 5000} 35 | } 36 | \value{ 37 | \item{Posterior_samples}{Posterior samples} 38 | \item{Shared_network}{Shared co-expression matrix} 39 | \item{Cluster_network}{Cluster specific co-expression matrices} 40 | } 41 | \description{ 42 | SpaceX function estimates shared and cluster specific gene co-expression networks for spatial transcriptomics data. Please make sure to provide both inputs as dataframe. More details about the SpaceX algorithm can be found in the reference paper. 43 | } 44 | \examples{ 45 | Implementation details and examples can be found at this link https://bookdown.org/satwik91/SpaceX_supplementary/. 46 | 47 | 48 | } 49 | \references{ 50 | Acharyya S., Zhou X., Baladandayuthapani V. (2021). SpaceX: Gene Co-expression Network Estimation for Spatial Transcriptomics. 51 | } 52 | -------------------------------------------------------------------------------- /man/figures/README-pressure-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayesrx/SpaceX/262a7a55a60ae42b1e966c3ea684d80a6ac4ad04/man/figures/README-pressure-1.png -------------------------------------------------------------------------------- /man/pqlseq_modified.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pqlseq_modified.R 3 | \name{pqlseq_modified} 4 | \alias{pqlseq_modified} 5 | \title{Fit Generalized Linear Mixed Model with Known Kinship Matrices Through Penalized-quasi Likelihood} 6 | \usage{ 7 | pqlseq_modified( 8 | RawCountDataSet, 9 | Phenotypes, 10 | Covariates = NULL, 11 | RelatednessMatrix = NULL, 12 | LibSize = NULL, 13 | fit.model = "PMM", 14 | fit.method = "AI.REML", 15 | fit.maxiter = 500, 16 | fit.tol = 1e-05, 17 | numCore = 1, 18 | filtering = TRUE, 19 | verbose = FALSE, 20 | ... 21 | ) 22 | } 23 | \arguments{ 24 | \item{RawCountDataSet}{a data frame containing the read count.} 25 | 26 | \item{Phenotypes}{a vector containing the predictor of interest.} 27 | 28 | \item{Covariates}{a data frame containing the covariates subject to adjustment (Default = NULL).} 29 | 30 | \item{RelatednessMatrix}{a known relationship matrix (e.g. kinship matrix in genetic studies). When supplied with a matrix, this matrix should be a positive semi-definite matrix with dimensions equal to the sample size in count data, and the order of subjects in this matrix should also match the order of subjects in count data. Currently there is no ID checking feature implemented, and it is the user's responsibility to match the orders.} 31 | 32 | \item{LibSize}{a data frame containing the total read count. For possion mixed model, it will be calculated automatically if users do not provide. For binomial mixed model, it is required.} 33 | 34 | \item{fit.model}{a description of the error distribution and link function to be used in the model. Either "PMM" for possion model, or "BMM" for binomial model (default = "PMM").} 35 | 36 | \item{fit.method}{method of fitting the generalized linear mixed model, currently only "REML" version is available.} 37 | 38 | \item{fit.maxiter}{a positive integer specifying the maximum number of iterations when fitting the generalized linear mixed model (default = 500).} 39 | 40 | \item{fit.tol}{a positive number specifying tolerance, the difference threshold for parameter estimates below which iterations should be stopped (default = 1e-5).} 41 | 42 | \item{numCore}{a positive integer specifying the number of cores for parallel computing (default = 1).} 43 | 44 | \item{filtering}{a logical switch for RNAseq data. By default, for each gene, at least two individuals should have read counts greater than 5. Otherwise, the gene is filtered (default = TRUE).} 45 | 46 | \item{verbose}{a logical switch for printing detailed information (parameter estimates in each iteration) for testing and debugging purpose (default = FALSE).} 47 | 48 | \item{...}{additional arguments that could be passed to glm.} 49 | } 50 | \value{ 51 | \item{numIDV}{number of individuals with data being analyzed} 52 | \item{beta}{the fixed effect parameter estimate for the predictor of interest.} 53 | \item{se_beta}{the standard deviation of fixed effect.} 54 | \item{pvalue}{P value for the fixed effect, based on the wald test.} 55 | \item{h2}{heritability of the transformed rate.} 56 | \item{sigma2}{total variance component.} 57 | \item{overdisp}{dispersion parameter estimate.} 58 | \item{converged}{a logical indicator for convergence.} 59 | } 60 | \description{ 61 | Fit a generalized linear mixed model with a random intercept. The covariance matrix of the random intercept is proportional to a known kinship matrix. This is a modified version of the pqlseq algorithm particularly for the SpaceX package. For more details check the pqlseq function from PQLseq package. 62 | } 63 | \references{ 64 | Sun, S., Hood, M., Scott, L., Peng, Q., Mukherjee, S., Tung, J., and Zhou, X. (2017). Differential expression analysis for rnaseq using poisson mixed models. Nucleicacids research, 45(11), e106–e106. 65 | } 66 | -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | FC= gfortran 2 | F77= gfortran 3 | CC = gcc 4 | 5 | SpaceX.o: SpaceX.f95 6 | $(FC) -c SpaceX.f95 -o SpaceX.o -fPIC 7 | $(FC) -o SpaceX.so SpaceX.o -shared -fopenmp 8 | 9 | -------------------------------------------------------------------------------- /src/Makevars.win: -------------------------------------------------------------------------------- 1 | FC= gfortran 2 | F77= gfortran 3 | CC = gcc 4 | 5 | SpaceX.o: SpaceX.f95 6 | $(FC) -c SpaceX.f95 -o SpaceX.o -fPIC 7 | $(FC) -o SpaceX.dll SpaceX.o -shared -fopenmp 8 | 9 | -------------------------------------------------------------------------------- /src/Makevars.win~: -------------------------------------------------------------------------------- 1 | FC= gfortran 2 | F77= gfortran 3 | CC = gcc 4 | 5 | newSpace2.o: newSpace2.f95 6 | $(FC) -c newSpace2.f95 -o newSpace2.o -fPIC 7 | $(FC) -o newSpace2.dll newSpace2.o -shared -fopenmp 8 | 9 | -------------------------------------------------------------------------------- /src/SpaceX.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayesrx/SpaceX/262a7a55a60ae42b1e966c3ea684d80a6ac4ad04/src/SpaceX.dll -------------------------------------------------------------------------------- /src/SpaceX.f95: -------------------------------------------------------------------------------- 1 | subroutine bigtdsub(n,m,o,x,z,b,s,L) 2 | 3 | 4 | use omp_lib 5 | 6 | implicit none 7 | integer :: i 8 | integer :: j 9 | integer :: k 10 | integer :: n 11 | integer :: m 12 | integer :: p 13 | integer :: o 14 | integer :: ii 15 | integer :: L 16 | 17 | real(kind = 4) :: x(n*m*o) 18 | real(kind = 4) :: z(n*m*o*L) 19 | real :: b(n*n) 20 | real :: s(n*n*L) 21 | real , allocatable :: a(:,:,:) 22 | real , allocatable :: y(:,:,:) 23 | real , allocatable :: f(:,:,:,:) 24 | real , allocatable :: u(:,:,:,:) 25 | real , allocatable :: d(:,:,:,:) 26 | real , allocatable :: c(:,:,:) 27 | real , allocatable :: g(:,:,:) 28 | real , allocatable :: v(:,:,:) 29 | real , allocatable :: h(:,:) 30 | real , allocatable :: e(:,:) 31 | real , allocatable :: r(:,:) 32 | real , allocatable :: t(:,:,:) 33 | real , allocatable :: sq(:) 34 | real , allocatable :: bsq(:,:) 35 | real , allocatable :: w(:,:) 36 | real , allocatable :: q(:,:,:) 37 | 38 | real , parameter :: pi = 3.141592653589793D+00 39 | integer :: thread_num 40 | real wtime 41 | 42 | 43 | thread_num = omp_get_max_threads ( ) 44 | p=L 45 | 46 | 47 | allocate(y(1:n,1:m,1:o)) 48 | allocate(t(1:m,1:n,1:o)) 49 | allocate(d(1:n,1:m,1:o,1:p)) 50 | allocate(c(1:n,1:n,1:o)) 51 | allocate(sq(1:n)) 52 | allocate(bsq(1:n,1:n)) 53 | allocate(w(1:n,1:n)) 54 | allocate(v(1:n,1:n,1:o)) 55 | allocate(h(1:n,1:n)) 56 | allocate(e(1:n,1:n)) 57 | allocate(r(1:n,1:n)) 58 | allocate(f(1:m,1:n,1:o,1:p)) 59 | allocate(u(1:n,1:n,1:o,1:p)) 60 | allocate(q(1:n,1:n,1:p)) 61 | wtime = omp_get_wtime ( ) 62 | 63 | y = reshape(x,(/n,m,o/)) 64 | d = reshape(z,(/n,m,o,p/)) 65 | 66 | 67 | 68 | 69 | 70 | do ii=1,o 71 | t(1:m,1:n,ii) = TRANSPOSE(y(1:n,1:m,ii)) 72 | do j=1,p 73 | f(1:m,1:n,ii,j) = TRANSPOSE(d(1:n,1:m,ii,j)) 74 | enddo 75 | enddo 76 | 77 | 78 | 79 | !$omp parallel shared ( y,t,c, n, m,v,p,h,e,r,w,u) default(none) 80 | do ii=1,o 81 | 82 | call dot(y(1:n,1:m,ii),t(1:m,1:n,ii),n,m,c(1:n,1:n,ii)) 83 | call cov2acor(c(1:n,1:n,ii),n,v(1:n,1:n,ii)) 84 | do j=1,3 85 | call dot(y(1:n,1:m,ii),t(1:m,1:n,ii),n,m,h) 86 | call dot(d(1:n,1:m,ii,j),f(1:m,1:n,ii,j),n,m,e) 87 | 88 | r = h + e 89 | 90 | call cov2acor(r,n,w) 91 | 92 | u(1:n,1:n,ii,j)=w 93 | enddo 94 | enddo 95 | 96 | 97 | call mean1sub(n,n,o,v,w) 98 | call mean2sub(n,n,o,L,u,q) 99 | 100 | 101 | !$omp end parallel 102 | 103 | wtime = omp_get_wtime ( ) - wtime 104 | b = pack(w,.true.) 105 | s = pack(q,.true.) 106 | 107 | ! 108 | ! Free memory. 109 | ! 110 | deallocate ( y ) 111 | deallocate ( t ) 112 | 113 | ! 114 | ! Terminate. 115 | ! 116 | 117 | 118 | Contains 119 | Subroutine dot( a, b,n,m,c) 120 | Real, Dimension(:,:), Intent( In ) :: a 121 | Real, Dimension(:,:), Intent(In ):: b 122 | Real, Dimension(:,:),Intent( Out ) :: c 123 | Integer :: i,p,j,n,m 124 | !$omp do 125 | do i = 1, n 126 | do j = 1, n 127 | c(i,j) = 0.0 128 | do k = 1, m 129 | c(i,j) = c(i,j) + a(i,k) * b(k,j) 130 | end do 131 | end do 132 | enddo 133 | 134 | !$omp end do 135 | return 136 | End Subroutine dot 137 | 138 | 139 | subroutine cov2acor( a,n,b) 140 | Real, Dimension(:,:), Intent( In ) :: a 141 | Real, Dimension(:,:),Intent( Out ) :: b 142 | Real :: sq(n) 143 | Real :: bsq(n,n) 144 | Integer :: i,p,j,n,m 145 | 146 | 147 | !$omp do 148 | do i=1,n 149 | sq(i)=sqrt((1/a(i,i))) 150 | bsq(1:n,i) = sq(i) 151 | enddo 152 | do i=1,n 153 | do j=1,n 154 | 155 | b(i,j) = sq(i) * a(i,j) * bsq(i,j) 156 | enddo 157 | enddo 158 | 159 | !$omp end do 160 | 161 | 162 | end subroutine cov2acor 163 | 164 | subroutine mean1sub(n,m,o,a,b) 165 | implicit none 166 | integer :: n,m,i,j,k,o 167 | real(kind = 4) :: f(n*m*o) 168 | 169 | real :: b(n,m),sum1 170 | real :: d(n*m) 171 | real :: a(n,m,o) 172 | ! a = reshape(f,(/n,m,o/)) 173 | 174 | 175 | !$omp do 176 | do i=1,n 177 | do j=1,m 178 | sum1 = 0.0 179 | do k=1,o 180 | sum1 = sum1 + a(i,j,k) 181 | enddo 182 | b(i,j) = sum1/o 183 | enddo 184 | enddo 185 | 186 | !$omp end do 187 | 188 | end subroutine mean1sub 189 | 190 | subroutine mean2sub(n,m,o,p,a,b) 191 | implicit none 192 | integer :: n,m,i,j,k,o,l,p 193 | real(kind = 4) :: f(n*m*o*p) 194 | real :: b(n,m,o),sum1 195 | real :: d(n*m*o) 196 | real :: a(n,m,o,p) 197 | ! a = reshape(f,(/n,m,o,p/)) 198 | 199 | !$omp do 200 | do i=1,n 201 | do j=1,m 202 | do k=1,p 203 | sum1 = 0.0 204 | do l=1,o 205 | sum1 = sum1 + a(i,j,l,k) 206 | enddo 207 | b(i,j,k) = sum1/o 208 | enddo 209 | enddo 210 | enddo 211 | 212 | !$omp end do 213 | end subroutine mean2sub 214 | 215 | 216 | end subroutine bigtdsub 217 | 218 | 219 | 220 | -------------------------------------------------------------------------------- /src/SpaceX.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayesrx/SpaceX/262a7a55a60ae42b1e966c3ea684d80a6ac4ad04/src/SpaceX.o -------------------------------------------------------------------------------- /src/SpaceX.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayesrx/SpaceX/262a7a55a60ae42b1e966c3ea684d80a6ac4ad04/src/SpaceX.so --------------------------------------------------------------------------------