├── .Rbuildignore ├── .gitignore ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── Parallelized Bash Script ├── .DS_Store ├── SCENT_parallelization.R └── parallelizedSCENT.sh ├── R ├── .gitignore ├── SCENTfunctions.R └── import_packages.R ├── README.md ├── SCENT.Rproj ├── data └── GeneBody_500kb_margin.bed ├── fig ├── .DS_Store ├── cover_image.png └── cover_image2.png ├── man ├── CreatePeakToGeneList.Rd ├── SCENT-class.Rd ├── SCENT_algorithm.Rd ├── assoc_negbin.Rd ├── assoc_poisson.Rd ├── basic_p.Rd ├── check_dimensions.Rd └── interp_pval.Rd └── vignettes ├── .gitignore ├── SCENT_interactive.Rmd └── SCENT_parallelize.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | inst/doc 6 | improvements 7 | READMEupdate.md 8 | RData/ 9 | .DS_Store 10 | .DS_Store 11 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: SCENT 2 | Type: Package 3 | Title: Single-Cell ENhancer Target (SCENT) gene mapping for single cell multimodal data 4 | Version: 1.0.1 5 | Author: Saori Sakaue and Shakson Isaac 6 | Maintainer: Shakson Isaac 7 | Description: R package that contains functions for the SCENT algorithm. SCENT uses 8 | single-cell multimodal data (e.g., 10X Multiome RNA/ATAC) and links 9 | ATAC-seq peaks (putative enhancers) to their target genes by modeling association 10 | between chromatin accessibility and gene expression across individual single cells. 11 | Depends: R (>= 3.5.0) 12 | Imports: 13 | methods, 14 | Hmisc, 15 | R.utils, 16 | data.table, 17 | lme4, 18 | stringr, 19 | boot, 20 | MASS, 21 | Matrix, 22 | parallel 23 | Suggests: 24 | knitr, 25 | rmarkdown 26 | SystemRequirements: 27 | bedtools (https://github.com/arq5x/bedtools2) 28 | License: MIT + file LICENSE 29 | Encoding: UTF-8 30 | LazyData: true 31 | RoxygenNote: 7.2.3 32 | VignetteBuilder: knitr 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2022 2 | COPYRIGHT HOLDER: Raychaudhuri Lab 3 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(CreatePeakToGeneList) 4 | export(CreateSCENTObj) 5 | export(SCENT_algorithm) 6 | export(assoc_negbin) 7 | export(assoc_poisson) 8 | export(basic_p) 9 | export(check_dimensions) 10 | export(interp_pval) 11 | exportClasses(SCENT) 12 | import(Hmisc) 13 | import(MASS) 14 | import(Matrix) 15 | import(R.utils) 16 | import(boot) 17 | import(data.table) 18 | import(lme4) 19 | import(methods) 20 | import(parallel) 21 | import(stringr) 22 | importFrom(stats,as.formula) 23 | importFrom(stats,coef) 24 | importFrom(stats,glm) 25 | importFrom(stats,vcov) 26 | importFrom(utils,write.table) 27 | -------------------------------------------------------------------------------- /Parallelized Bash Script/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/immunogenomics/SCENT/e80b5ba6b445f972c7fe28fb41e24ef4f5b2e373/Parallelized Bash Script/.DS_Store -------------------------------------------------------------------------------- /Parallelized Bash Script/SCENT_parallelization.R: -------------------------------------------------------------------------------- 1 | #Libraries to Load: 2 | library(SCENT) 3 | 4 | ####### INPUTS 5 | #Obtain arguments: (from Cluster) 6 | node = as.integer(commandArgs(trailingOnly = T)[1]) # integer. JOB ARRAY number: node usage 7 | cores = as.integer(commandArgs(trailingOnly = T)[2]) # integer. Number of Cores 8 | SCENTobj_rds = commandArgs(trailingOnly = T)[3] # character. RDS object file type 9 | celltype = commandArgs(trailingOnly = T)[4] # character. CellType 10 | regr = commandArgs(trailingOnly = T)[5] # character. Regression Type 11 | bin = as.logical(commandArgs(trailingOnly = T)[6]) # logical. Binarize ATAC counts 12 | output_dir = commandArgs(trailingOnly = T)[7] # character. Output of each text file to a specific folder 13 | 14 | ###Example of inputs from the bash script: parallelizedSCENT.sh 15 | # node <- 1 16 | # cores <- 6 17 | # celltype <- "Tnk" 18 | # SCENTobj_rds <- "./Testing/Output/SCENT_obj.rds" 19 | # output_dir <- "./Testing/Output/" 20 | 21 | 22 | #### Load: 23 | SCENT_obj <- readRDS(SCENTobj_rds) 24 | 25 | #### Get the corresponding dataframe from the list: 26 | SCENT_obj@peak.info <- SCENT_obj@peak.info.list[[node]] 27 | 28 | #### Run SCENT algorithm of Tnk cell type and use 6 cores for parallelization: 29 | SCENT_obj <- SCENT_algorithm(SCENT_obj, celltype, cores, regr, bin) 30 | 31 | #### Output SCENT results for each gene-peak pair block. 32 | filename <- paste0(output_dir,"/SCENTresult_",node,".txt") 33 | 34 | write.table(SCENT_obj@SCENT.result, file = filename, row.names = F, col.names = T, quote = F) 35 | -------------------------------------------------------------------------------- /Parallelized Bash Script/parallelizedSCENT.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #BSUB -J SCENT[1-100] #Number of job arrays dependent on number of peak-gene pair batches 4 | #BSUB -q big #node for multi-parallelized threading and nodes 5 | #BSUB -M 18000 #18 GB 6 | #BSUB -n 6 #6 cores 7 | #BSUB -o Output_%J_%I.out #output file %J is job %I is job array index 8 | #BSUB -e Error_%J_%I.err #error file %J is job %I is job array index 9 | 10 | 11 | module load R 12 | Rscript SCENT_parallelization.R $LSB_JOBINDEX ${num_cores} ${file_SCENT_obj} ${celltype} ${regr} ${bin} ${output_dir} 13 | 14 | -------------------------------------------------------------------------------- /R/.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | inst/doc 6 | improvements 7 | READMEupdate.md 8 | RData/ 9 | .DS_Store 10 | -------------------------------------------------------------------------------- /R/SCENTfunctions.R: -------------------------------------------------------------------------------- 1 | ## define functions 2 | #' Interpolate a p-value from quantiles that should be "null scaled" 3 | #' 4 | #' @param q bootstrap quantiles, centered so that under the null, theta = 0 5 | #' @return two-sided p-value 6 | #' @export 7 | interp_pval = function(q) { 8 | R = length(q) 9 | tstar = sort(q) 10 | zero = findInterval(0, tstar) 11 | if(zero == 0 || zero == R) return(2/R) # at/beyond extreme values 12 | pval = 2*min(zero/R, (R-zero)/R) 13 | pval 14 | } 15 | 16 | 17 | #' Derive a p-value from a vector of bootstrap samples using the "basic" calculation 18 | #' 19 | #' @param obs observed value of parameter (using actual data) 20 | #' @param boot vector of bootstraps 21 | #' 22 | #' @return p-value 23 | #' @export 24 | basic_p = function(obs, boot, null = 0){ 25 | interp_pval(2*obs - boot - null) 26 | } 27 | 28 | 29 | #' Perform poisson regression: exprs ~ peak + covariates 30 | #' 31 | #' @param data contains expr values and associated peak and covariates for a gene. 32 | #' @param idx rows of the data to use: argument for boot function (bootstrapping) 33 | #' @param formula user defined formula based on initialization in CreateSCENTObj Constructor 34 | #' 35 | #' @return vector: (coefficient of the peak effect on gene, variance of peak effect on gene) 36 | #' @export 37 | assoc_poisson = function(data, idx = seq_len(nrow(data)), formula){ 38 | gg = glm(formula, family = 'poisson', data = data[idx,,drop = FALSE]) 39 | c(coef(gg)['atac'], diag(vcov(gg))['atac']) 40 | } 41 | 42 | 43 | #' Perform negative binomial regression: exprs ~ peak + covariates 44 | #' 45 | #' @param data contains expr values and associated peak and covariates for a gene. 46 | #' @param idx rows of the data to use: argument for boot function (bootstrapping) 47 | #' @param formula user defined formula based on initialization in CreateSCENTObj Constructor 48 | #' 49 | #' @return vector: (coefficient of the peak effect on gene, variance of peak effect on gene) 50 | #' @export 51 | assoc_negbin = function(data, idx = seq_len(nrow(data)), formula){ 52 | gg = glm.nb(formula, data = data[idx,,drop = FALSE]) 53 | c(coef(gg)['atac'], diag(vcov(gg))['atac']) 54 | } 55 | 56 | 57 | 58 | #' Validity and Type Checking for CreateSCENTObject Constructor 59 | #' 60 | #' @param object SCENT object constructed from class CreateSCENTObject 61 | #' 62 | #' @return None OR Errors dependent on if the object follows the guidelines for SCENT 63 | #' RNA: matrix of (genes x cells) 64 | #' ATAC: matrix of (peaks x cells) 65 | #' @export 66 | check_dimensions <- function(object){ 67 | errors <- character() 68 | 69 | #Check dimensionality of cells: 70 | num_cells_rna <- lengths(object@rna@Dimnames)[2] 71 | num_cells_atac <- lengths(object@atac@Dimnames)[2] 72 | 73 | num_genes <- lengths(object@rna@Dimnames)[1] 74 | num_peaks <- lengths(object@atac@Dimnames)[1] 75 | 76 | #Check if the number of cells match between rna and atac matrix. 77 | if(num_cells_rna != num_cells_atac){ 78 | msg <- paste("Error: The num of cells in scRNA matrix is: ", num_cells_rna, 79 | " and the num of cells in scATAC matrix is: ", num_cells_atac, 80 | ". These should EQUAL EACH OTHER, please check to make sure", 81 | " both matrices for scRNA and scATAC are read in as", 82 | " (genes x cells) and (peaks x cells), respectively. ") 83 | errors <- c(errors, msg) 84 | } 85 | 86 | 87 | #Most likely the number of peaks is greater than the number of genes if not WARN. 88 | if(num_peaks < num_genes){ 89 | warning(paste("Warning: in general there are more peaks found through ATAC", 90 | " than genes. Currently you have number of peaks =", num_peaks, 91 | " and number of genes =",num_genes)) 92 | } 93 | 94 | #If peak.info is present check the following: 95 | if(!(length(object@peak.info) == 0)){ 96 | #Check if genes correspond between rna matrix and peak.info dataframe: 97 | if(!all(object@peak.info[[1]] %in% object@rna@Dimnames[[1]])){ 98 | msg <- paste("The gene names in the peak.info dataframe is NOT a subset of the gene names in", 99 | " the scRNA matrix") 100 | errors <- c(errors, msg) 101 | } 102 | 103 | 104 | #Check if peaks correspond between atac matrix and peak.info dataframe: 105 | if(!all(object@peak.info[[2]] %in% object@atac@Dimnames[[1]])){ 106 | msg <- paste("The peak ids in the peak.info dataframe is NOT a subset of the peak names in", 107 | " the scATAC matrix") 108 | errors <- c(errors, msg) 109 | } 110 | } 111 | 112 | 113 | ###Additional things to check: 114 | #Check if meta.data table with covariates has the correct cell column names 115 | #Check if covariates are in the columns of meta.data 116 | if (length(errors) == 0) TRUE else errors 117 | } 118 | 119 | 120 | 121 | #' SCENT Class Constructor 122 | #' 123 | #' @slot rna dgCMatrix. scRNAseq matrix read as a sparse matrix 124 | #' @slot atac dgCMatrix. scATACseq matrix read as a sparse matrix 125 | #' @slot meta.data data.frame. Metadata table with covariates and a cell ID column ("cell") 126 | #' @slot peak.info data.frame. Dataframe that contains gene-peak pairs for SCENT to search through 127 | #' @slot peak.info.list list. List of dataframes that contain gene-peak pairs to parallelize through 128 | #' @slot covariates character. Assign covariates that are needed for the analysis. Must be names that are in the columns of meta.data 129 | #' @slot celltypes character. Assign celltype column from meta.data 130 | #' @slot SCENT.result data.frame. Initialized as empty. Becomes a table of resultant significant gene peak pairs 131 | #' 132 | #' @return SCENT object to use for further analysis 133 | #' @export 134 | CreateSCENTObj <- setClass( 135 | Class = "SCENT", 136 | slots = c( 137 | rna = 'dgCMatrix', 138 | atac = 'dgCMatrix', 139 | meta.data = 'data.frame', 140 | peak.info = 'data.frame', ###Must be gene (1st column) then peak (2nd column) 141 | peak.info.list = 'list', 142 | covariates = 'character', 143 | celltypes = 'character', 144 | SCENT.result = 'data.frame' 145 | ), 146 | validity = check_dimensions 147 | ) 148 | 149 | #' SCENT Algorithm: Poisson Regression with Empirical P-values through Bootstrapping. 150 | #' 151 | #' @param object SCENT object 152 | #' @param celltype character. User specified cell type defined in celltypes column of meta.data 153 | #' @param ncores numeric. Number of cores to use for Parallelization 154 | #' @param regr character. Regression type: "poisson" or "negbin" for Poisson regression and Negative Binomial regression, respectively 155 | #' @param bin logical. TRUE to binarize ATAC counts. FALSE to NOT binarize ATAC counts 156 | #' 157 | #' @return SCENT object with updated field SCENT.results 158 | #' @export 159 | SCENT_algorithm <- function(object, celltype, ncores, regr = "poisson", bin = TRUE){ 160 | res <- data.frame() 161 | for (n in 1:nrow(object@peak.info)){ ####c(1:nrow(chunkinfo)) 162 | gene <- object@peak.info[n,1] #GENE is FIRST COLUMN OF PEAK.INFO 163 | this_peak <- object@peak.info[n,2] #PEAK is SECOND COLUMN OF PEAK.INFO 164 | atac_target <- data.frame(cell = colnames(object@atac), atac = object@atac[this_peak,]) 165 | 166 | 167 | #binarize peaks: 168 | if(bin){ 169 | if(nrow(atac_target[atac_target$atac>0,])>0){ 170 | atac_target[atac_target$atac>0,]$atac<-1 171 | } 172 | } 173 | 174 | mrna_target <- object@rna[gene,] 175 | df <- data.frame(cell=names(mrna_target),exprs=as.numeric(mrna_target)) 176 | df<-merge(df,atac_target,by="cell") 177 | df<-merge(df,object@meta.data,by="cell") 178 | 179 | df2 <- df[df[[object@celltypes]] == celltype,] 180 | 181 | nonzero_m <- length( df2$exprs[ df2$exprs > 0] ) / length( df2$exprs ) 182 | nonzero_a <- length( df2$atac[ df2$atac > 0] ) / length( df2$atac ) 183 | if(nonzero_m > 0.05 & nonzero_a > 0.05){ 184 | #Run Regression Once Before Bootstrapping: 185 | res_var <- "exprs" 186 | pred_var <- c("atac", object@covariates) 187 | formula <- as.formula(paste(res_var, paste(pred_var, collapse = "+"), sep = "~")) 188 | 189 | 190 | #Estimated Coefficients Obtained without Bootstrapping: 191 | if(regr == "poisson"){ 192 | base = glm(formula, family = 'poisson', data = df2) 193 | coefs<-summary(base)$coefficients["atac",] 194 | assoc <- assoc_poisson 195 | } else if (regr == "negbin"){ 196 | base = glm.nb(formula, data = df2) 197 | coefs<-summary(base)$coefficients["atac",] 198 | assoc <- assoc_negbin 199 | } 200 | 201 | ###Iterative Bootstrapping Procedure: Estimate the Beta coefficients and associate a 2-sided p-value. 202 | bs = boot::boot(df2,assoc, R = 100, formula = formula, stype = 'i', parallel = "multicore", ncpus = ncores) 203 | p0 = basic_p(bs$t0[1], bs$t[,1]) 204 | if(p0<0.1){ 205 | bs = boot::boot(df2,assoc, R = 500, formula = formula, stype = 'i', parallel = "multicore", ncpus = ncores) 206 | p0 = basic_p(bs$t0[1], bs$t[,1]) 207 | } 208 | if(p0<0.05){ 209 | bs = boot::boot(df2,assoc, R = 2500, formula = formula, stype = 'i', parallel = "multicore", ncpus = ncores) 210 | p0 = basic_p(bs$t0[1], bs$t[,1]) 211 | } 212 | if(p0<0.01){ 213 | bs = boot::boot(df2,assoc, R = 25000, formula = formula, stype = 'i', parallel = "multicore", ncpus = ncores) 214 | p0 = basic_p(bs$t0[1], bs$t[,1]) 215 | } 216 | if(p0<0.001){ 217 | bs = boot::boot(df2,assoc, R = 50000, formula = formula, stype = 'i', parallel = "multicore", ncpus = ncores) 218 | p0 = basic_p(bs$t0[1], bs$t[,1]) 219 | } 220 | out <- data.frame(gene=gene,peak=this_peak,beta=coefs[1],se=coefs[2],z=coefs[3],p=coefs[4],boot_basic_p=p0) 221 | res<-rbind(res,out) 222 | } 223 | } 224 | 225 | #Update the SCENT.result field of the constructor in R: 226 | object@SCENT.result <- res 227 | return(object) 228 | } 229 | 230 | 231 | 232 | #' Creating Cis Gene-Peak Pair Lists to Parallelize Through 233 | #' 234 | #' @param object SCENT object 235 | #' @param genebed character. File directory for bed file that contains 500 kb windows for each gene 236 | #' @param nbatch numeric. Number of batches to produce: Length of the list 237 | #' @param tmpfile character. Location of temporary file. 238 | #' @param intersectedfile character. Location of intersected file. 239 | #' 240 | #' @return SCENT object with updated field of peak.info.list 241 | #' @export 242 | CreatePeakToGeneList <- function(object,genebed="/path/to/GeneBody_500kb_margin.bed",nbatch,tmpfile="./temporary_atac_peak.bed",intersectedfile="./temporary_atac_peak_intersected.bed.gz"){ 243 | peaknames <- rownames(object@atac) # peak by cell matrix 244 | peaknames_r <- gsub(":","-",peaknames) # in case separator included ":" 245 | peaknames_r <- gsub("_","-",peaknames_r) # in case separator included "_" 246 | peak_bed <- data.frame(chr = str_split_fixed(peaknames_r,"-",3)[,1], start = str_split_fixed(peaknames_r,"-",3)[,2], end = str_split_fixed(peaknames_r,"-",3)[,3], peak=peaknames) 247 | write.table(peak_bed,tmpfile,quote=F,row=F,col=F,sep="\t") 248 | system(paste("bedtools intersect -a",genebed,"-b ",tmpfile, " -wa -wb -loj | gzip -c >", intersectedfile)) 249 | system(paste("rm ", tmpfile)) 250 | d <- fread(intersectedfile,sep="\t") 251 | d<-data.frame(d) 252 | d <- d[d$V5 != ".",] 253 | 254 | #Obtain gene to peak pairs. 255 | cis.g2p <- d[c("V4","V8")] 256 | colnames(cis.g2p) <- c("gene","peak") 257 | genes_in_rna <- rownames(object@rna) # gene by cell matrix 258 | cis.g2p <- cis.g2p[cis.g2p$gene %in% genes_in_rna,] # make sure g2p genes are all included in rna matrix 259 | 260 | cis.g2p$index <- 1:nrow(cis.g2p) 261 | cis.g2p$batch_index <- cut2(cis.g2p$index, g = nbatch, levels.mean = TRUE) 262 | cis.g2p_list <- split(cis.g2p, f = cis.g2p$batch_index) 263 | cis.g2p_list <- lapply(cis.g2p_list, function(x) x[(names(x) %in% c("peak", "gene"))]) 264 | names(cis.g2p_list) <- 1:length(cis.g2p_list) 265 | # Update the SCENT.peak.info field of the constructor in R: 266 | object@peak.info.list <- cis.g2p_list 267 | return(object) 268 | } 269 | 270 | 271 | 272 | 273 | -------------------------------------------------------------------------------- /R/import_packages.R: -------------------------------------------------------------------------------- 1 | #' @import methods Hmisc R.utils data.table stringr 2 | #' @import lme4 boot MASS parallel 3 | #' @import Matrix 4 | #' @importFrom stats as.formula coef glm vcov 5 | #' @importFrom utils write.table 6 | NULL 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # SCENT 4 | 5 | Single-Cell ENhancer Target gene mapping using multimodal data with ATAC + RNA 6 | 7 | The manuscript is now publised in *Nature Genetics*! (Sakaue et al. ["**Tissue-specific enhancer-gene maps from multimodal single-cell data identify causal disease alleles**"](https://www.nature.com/articles/s41588-024-01682-1)) 8 | 9 | 10 | 11 | ### Overview 12 | 13 | SCENT uses single-cell multimodal data (e.g., 10X Multiome RNA/ATAC) and links ATAC-seq peaks (putative enhancers) to their target genes by modeling association between chromatin accessibility and gene expression across individual single cells. 14 | 15 |
16 | 17 |
18 | 19 | 20 | 21 | We use Poisson regression to associate gene expression (raw) count and (binarized) peak accessibility, and estimate errors in coefficients by bootstrapping framework to control for type I error. 22 | 23 | 24 | ### Release notes 25 | 26 | - **v1.0.1**: Aug 2024, bug fix in parallelization scripts in `Parallelized Bash Script` folder 27 | - **v1.0.0**: Jan 2024, first official release 28 | 29 | ### Installation of SCENT Package 30 | 31 | You can install the development version of SCENT from [GitHub](https://github.com/) with: 32 | 33 | ``` r 34 | # install.packages("devtools") 35 | devtools::install_github("immunogenomics/SCENT") 36 | ``` 37 | 38 | 39 | ### Requirements 40 | 41 | The SCENT package will automatically install CRAN R packages. The packages below will go into your `R`. 42 | 43 | - `methods` 44 | - `data.table` 45 | - `lme4` 46 | - `stringr` 47 | - `boot` 48 | - `MASS` 49 | - `Matrix` 50 | - `parallel` 51 | 52 | The SCENT package also requires command-line tool, bedtools, for developing a list of: gene-peak pair dataframes to parallelize through. 53 | - `https://github.com/arq5x/bedtools2` 54 | 55 | 56 | ### Example usage 57 | 58 | Vignettes are posted in this github repo to show 2 potential uses of the SCENT package. 59 | 60 | ### 1.) Using SCENT interactively for testing small sets of gene-peak associations 61 | 62 | `SCENT_interactive.Rmd` vignette contains an example of using the SCENT package to generate results on small sets of gene-peak associations. 63 | 64 | In summary, the main functionality is the SCENT object construction: 65 | 66 | ```r 67 | library(SCENT) 68 | 69 | SCENT_obj <- CreateSCENTObj(rna = mrna, atac = atac, meta.data = meta, 70 | peak.info = gene_peak, 71 | covariates = c("log(nUMI)","percent.mito","sample", "batch"), 72 | celltypes = "celltype") 73 | ``` 74 | 75 | Followed by SCENT algorithm: 76 | 77 | ```r 78 | SCENT_obj <- SCENT_algorithm(object = SCENT_obj, celltype = "Tcell", ncores = 6, regr = 'poisson', bin = TRUE) 79 | ``` 80 | The user specifies a `celltype` (in this case “Tcell”) for association analysis (in `meta.data` slot in SCENT object), `ncores` for the number of cores for parallelized bootstrapping, `regr` for the regression type (Poisson ‘poisson’ or Negative Binomial ‘negbin’ regression), and `bin` for whether to binarize ATAC counts (TRUE for binarization or FALSE for not). 81 | 82 | The output of the SCENT algorithm will be contained in the field: 83 | ```r 84 | SCENT_obj@SCENT.result 85 | ``` 86 | which can be saved as a textfile for further downstream analysis. 87 | 88 | 89 | Further information on Inputs and Outputs of SCENT are detailed below: 90 | 91 | #### Arguments To `CreateSCENTObj`: 92 | 93 | | # | Argument name (format) | Descriptions | 94 | | ---- | ---------------------------- | ------------------------------------------------------------ | 95 | | 1 | rna (sparse matrix) | A gene-by-cell count matrix from multimodal RNA-seq data. This is a raw count matrix without any normalization. The row names should be the gene names used in the `peak.info` file. The column names are the cell names which should be the same names used in the `cell`column of the dataframe specified for `meta.data`. Sparse matrix format is required. | 96 | | 2 | atac (sparse matrix) | A peak-by-cell count matrix from multimodal ATAC-seq data. This is a raw count matrix without any normalization. The row names should be the peak names used in the `peak.info` file. The column names are the cell names which should be the same names used in `rna` and the `cell`column of dataframe specified for `meta.data`. The matrix may not be binarized while it will be binarized within the function. Sparse matrix format is required. | 97 | | 3 | meta.data (dataframe) | A meta data frame for cells (rows are cells, and **cell names should be in the column named as "cell"**; see below example). Additionally, this text should include covariates to use in the model. Examples include: % mitochondrial reads, log(nUMI), sample, and batch as covariates. Dataframe format is required. | 98 | | 4 | peak.info (dataframe) | A table with two columns indicating which gene-peak pairs you want to test in this chunk (see below example) **genes should be in the 1st column and peaks in the 2nd column**. We highly recommend splitting gene-peak pairs into many chunks to increase computational efficiency (See Parallelized Jobs Info in Section 2). List(Dataframe) format which is a list of multiple data frames for parallelization is required. \* | 99 | | 5 | covariates (a vector of character) | A vector of character fields that denote the covariates listed in the meta.data. For example, a set of covariates can be: %mitochondrial reads, log_nUMI, sample, and batch. Additionally the user can specify transformations to the covariates such as log transformation on nUMI counts for direct usage in the SCENT algorithm invoking poisson glm. **We recommend users to at least use log(number_of_total_RNA_UMI_count_per_cell) as the base model is Poisson regression and we do not include the offset term into the default model.** | 100 | | 6 | celltypes (character) | User specified naming of the celltype column in the meta.data file. This column should contain the names of the celltypes you want to test in this association analysis. | 101 | 102 | \* Extra Argument: The peak.info.list field can be left blank initially and a created List(Dataframe) can be constructed using the CreatePeakToGeneList function in the SCENT package. This function requires the user to specify a bed file that specifies ~500 kb windows of multiple gene loci to identify cis gene-peak pairs to test. The vignette, SCENT_parallelize.Rmd, will show steps to produce a SCENT object with a peak.info.list field that is used for parallelization in the SCENT_parallelization.R script. 103 | 104 | 105 | 106 | #### Example Formats: 107 | The example format of `peak.info` argument: 108 | 109 | ```bash 110 | > gene_peak <- read.table("/path/to/your_gene_peak_text_file.txt") 111 | > head(gene_peak) 112 | 113 | V1 V2 114 | 1 A1BG chr19-57849279-57850722 115 | 2 A1BG chr19-57888160-57889279 116 | 3 A1BG chr19-57915851-57917093 117 | 4 A1BG chr19-57934422-57935603 118 | 5 A1BG chr19-57946848-57948062 119 | ``` 120 | 121 | We usually only select peaks of which the center falls within 500 kb from the target gene (*cis* analysis). Also, while we have a function to QC peaks and genes so that they are present in at least 5% of all cells within `SCENT.R`, **it is more efficient to only include these QCed peaks and genes in `peak.info` to reduce the number of tests**. 122 | 123 | 124 | The example format of `meta.data` argument: 125 | 126 | ```r 127 | meta <- readRDS(metafile) 128 | meta$`log(nUMI)` <- log(meta$nUMI) 129 | head(meta) 130 | 131 | cell nUMI percent.mito sample batch 132 | AAACAGCCAAGGAATC-1 AAACAGCCAAGGAATC-1 8380 0.01503428 sample_1 batch_a 133 | AAACAGCCAATCCCTT-1 AAACAGCCAATCCCTT-1 3771 0.02207505 sample_1 batch_a 134 | AAACAGCCAATGCGCT-1 AAACAGCCAATGCGCT-1 6876 0.01435579 sample_1 batch_a 135 | AAACAGCCACACTAAT-1 AAACAGCCACACTAAT-1 1733 0.03881841 sample_1 batch_a 136 | AAACAGCCACCAACCG-1 AAACAGCCACCAACCG-1 5415 0.01600768 sample_1 batch_a 137 | AAACAGCCAGGATAAC-1 AAACAGCCAGGATAAC-1 2759 0.02485340 sample_1 batch_a 138 | celltype log(nUMI) 139 | AAACAGCCAAGGAATC-1 Tcell 9.033603 140 | AAACAGCCAATCCCTT-1 Tcell 8.235095 141 | AAACAGCCAATGCGCT-1 Tcell 8.835792 142 | AAACAGCCACACTAAT-1 Tcell 7.457609 143 | AAACAGCCACCAACCG-1 Tcell 8.596928 144 | AAACAGCCAGGATAAC-1 Tcell 7.922624 145 | ``` 146 | 147 | 148 | #### Output of SCENT (`SCENT.result` slot) 149 | 150 | ```bash 151 | > head(SCENT_obj@SCENT.result) 152 | gene peak beta se z p boot_basic_p 153 | A1BG chr19-57849279-57850722 0.587060911718621 0.227961010352348 2.57526894977009 0.0100162168431262 0.0192 154 | A1BG chr19-57888160-57889279 -0.0842330294127105 0.232845263030106 -0.3617553920425660.717534829528597 0.688 155 | A1BG chr19-57915851-57917093 -0.00971211792633636 0.225020479431863 -0.0431610400566990.965573161660521 1 156 | A1BG chr19-57934422-57935603 0.0136752444069743 0.249810124611214 0.05474255468331160.956343566437322 0.968 157 | ``` 158 | 159 | Each column indicates ... 160 | 161 | | Column | Descriptions | 162 | | ------------ | ------------------------------------------------------------ | 163 | | gene | The gene(-peak) pair in each test statistics | 164 | | peak | The (gene-)peak pair in each test statistics | 165 | | beta | The regression coefficient from primary Poisson regression | 166 | | se | The standard error from primary Poisson regression | 167 | | z | The Z score from primary Poisson regression | 168 | | p | The raw p value from primary Poisson regression | 169 | | boot_basic_p | The bootstrap p value calculated from bootstrapping analyses | 170 | 171 | 172 | 173 | ### 2.) Using SCENT with parallelized jobs. 174 | 175 | `SCENT_parallelization.R` is the example code necessary for running parallelized SCENT jobs. 176 | This code needs a `SCENT_Object.rds` file that contains a list of gene-peak pairs. 177 | To generate this object please follow the SCENT_parallelize.Rmd vignette file. 178 | 179 | The corresponding bash script `parallelizedSCENT.sh` contains a parallelization scheme that is 180 | dependent on the amount of gene-peak pair batches that is user defined (for context please refer to the 181 | SCENT_parallelize.Rmd vignette). The main part of the bash script contains the line: 182 | 183 | ```bash 184 | Rscript SCENT_parallelization.R $LSB_JOBINDEX ${num_cores} ${file_SCENT_obj} ${celltype} ${regr} ${bin} ${output_dir} 185 | ``` 186 | 187 | Arguments in the bash file are user specified as follows: 188 | 189 | |# | Argument Name | Descriptions | 190 | | ---- | ------------- | ------------ | 191 | |1 | LSB_JOBINDEX | jobarray index specified by BSUB -J SCENT[1-100] | 192 | |2 | num_cores | number of cores (ex. 6) to parallelize to the SCENT algorithm | 193 | |3 | file_SCENT_obj | SCENT object that contains atac_matrix, rna_matrix, metafile, peak_gene_list, etc. To run the SCENT algorithm | 194 | |4 | celltype | User specified celltype (ex. "Tcells") to run the SCENT algorithm | 195 | |5 | regr | User specified regression type (ex. "poisson") to run SCENT algorithm | 196 | |6 | bin | User specified choice to binarize ATAC counts (ex. TRUE) | 197 | |7 | output_dir | User specified directory to output the SCENT results to aggregate once completed | 198 | 199 | ### Enhancer-gene links from the paper 200 | 201 | SCENT enhancer-gene linkages (FDR<10%) from the 8 datasets that we described in the paper can be downloaded from the following dropbox link. 202 | 203 | https://www.dropbox.com/scl/fo/g20tfnwkcuhib4a6z1wp4/ABYaK5s8bwTLnzrJ0KoZn48?rlkey=j1s5365gso53r2v2dsdynnsr2&st=5np1fq0a&dl=0 204 | 205 | ### Contact 206 | 207 | Saori Sakaue ssakaue@broadinstitute.org 208 | -------------------------------------------------------------------------------- /SCENT.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /fig/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/immunogenomics/SCENT/e80b5ba6b445f972c7fe28fb41e24ef4f5b2e373/fig/.DS_Store -------------------------------------------------------------------------------- /fig/cover_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/immunogenomics/SCENT/e80b5ba6b445f972c7fe28fb41e24ef4f5b2e373/fig/cover_image.png -------------------------------------------------------------------------------- /fig/cover_image2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/immunogenomics/SCENT/e80b5ba6b445f972c7fe28fb41e24ef4f5b2e373/fig/cover_image2.png -------------------------------------------------------------------------------- /man/CreatePeakToGeneList.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SCENTfunctions.R 3 | \name{CreatePeakToGeneList} 4 | \alias{CreatePeakToGeneList} 5 | \title{Creating Cis Gene-Peak Pair Lists to Parallelize Through} 6 | \usage{ 7 | CreatePeakToGeneList( 8 | object, 9 | genebed = "/path/to/GeneBody_500kb_margin.bed", 10 | nbatch, 11 | tmpfile = "./temporary_atac_peak.bed", 12 | intersectedfile = "./temporary_atac_peak_intersected.bed.gz" 13 | ) 14 | } 15 | \arguments{ 16 | \item{object}{SCENT object} 17 | 18 | \item{genebed}{character. File directory for bed file that contains 500 kb windows for each gene} 19 | 20 | \item{nbatch}{numeric. Number of batches to produce: Length of the list} 21 | 22 | \item{tmpfile}{character. Location of temporary file.} 23 | 24 | \item{intersectedfile}{character. Location of intersected file.} 25 | } 26 | \value{ 27 | SCENT object with updated field of peak.info.list 28 | } 29 | \description{ 30 | Creating Cis Gene-Peak Pair Lists to Parallelize Through 31 | } 32 | -------------------------------------------------------------------------------- /man/SCENT-class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SCENTfunctions.R 3 | \docType{class} 4 | \name{SCENT-class} 5 | \alias{SCENT-class} 6 | \alias{CreateSCENTObj} 7 | \title{SCENT Class Constructor} 8 | \value{ 9 | SCENT object to use for further analysis 10 | } 11 | \description{ 12 | SCENT Class Constructor 13 | } 14 | \section{Slots}{ 15 | 16 | \describe{ 17 | \item{\code{rna}}{dgCMatrix. scRNAseq matrix read as a sparse matrix} 18 | 19 | \item{\code{atac}}{dgCMatrix. scATACseq matrix read as a sparse matrix} 20 | 21 | \item{\code{meta.data}}{data.frame. Metadata table with covariates and a cell ID column ("cell")} 22 | 23 | \item{\code{peak.info}}{data.frame. Dataframe that contains gene-peak pairs for SCENT to search through} 24 | 25 | \item{\code{peak.info.list}}{list. List of dataframes that contain gene-peak pairs to parallelize through} 26 | 27 | \item{\code{covariates}}{character. Assign covariates that are needed for the analysis. Must be names that are in the columns of meta.data} 28 | 29 | \item{\code{celltypes}}{character. Assign celltype column from meta.data} 30 | 31 | \item{\code{SCENT.result}}{data.frame. Initialized as empty. Becomes a table of resultant significant gene peak pairs} 32 | }} 33 | 34 | -------------------------------------------------------------------------------- /man/SCENT_algorithm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SCENTfunctions.R 3 | \name{SCENT_algorithm} 4 | \alias{SCENT_algorithm} 5 | \title{SCENT Algorithm: Poisson Regression with Empirical P-values through Bootstrapping.} 6 | \usage{ 7 | SCENT_algorithm(object, celltype, ncores, regr = "poisson", bin = TRUE) 8 | } 9 | \arguments{ 10 | \item{object}{SCENT object} 11 | 12 | \item{celltype}{character. User specified cell type defined in celltypes column of meta.data} 13 | 14 | \item{ncores}{numeric. Number of cores to use for Parallelization} 15 | 16 | \item{regr}{character. Regression type: "poisson" or "negbin" for Poisson regression and Negative Binomial regression, respectively} 17 | 18 | \item{bin}{logical. TRUE to binarize ATAC counts. FALSE to NOT binarize ATAC counts} 19 | } 20 | \value{ 21 | SCENT object with updated field SCENT.results 22 | } 23 | \description{ 24 | SCENT Algorithm: Poisson Regression with Empirical P-values through Bootstrapping. 25 | } 26 | -------------------------------------------------------------------------------- /man/assoc_negbin.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SCENTfunctions.R 3 | \name{assoc_negbin} 4 | \alias{assoc_negbin} 5 | \title{Perform negative binomial regression: exprs ~ peak + covariates} 6 | \usage{ 7 | assoc_negbin(data, idx = seq_len(nrow(data)), formula) 8 | } 9 | \arguments{ 10 | \item{data}{contains expr values and associated peak and covariates for a gene.} 11 | 12 | \item{idx}{rows of the data to use: argument for boot function (bootstrapping)} 13 | 14 | \item{formula}{user defined formula based on initialization in CreateSCENTObj Constructor} 15 | } 16 | \value{ 17 | vector: (coefficient of the peak effect on gene, variance of peak effect on gene) 18 | } 19 | \description{ 20 | Perform negative binomial regression: exprs ~ peak + covariates 21 | } 22 | -------------------------------------------------------------------------------- /man/assoc_poisson.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SCENTfunctions.R 3 | \name{assoc_poisson} 4 | \alias{assoc_poisson} 5 | \title{Perform poisson regression: exprs ~ peak + covariates} 6 | \usage{ 7 | assoc_poisson(data, idx = seq_len(nrow(data)), formula) 8 | } 9 | \arguments{ 10 | \item{data}{contains expr values and associated peak and covariates for a gene.} 11 | 12 | \item{idx}{rows of the data to use: argument for boot function (bootstrapping)} 13 | 14 | \item{formula}{user defined formula based on initialization in CreateSCENTObj Constructor} 15 | } 16 | \value{ 17 | vector: (coefficient of the peak effect on gene, variance of peak effect on gene) 18 | } 19 | \description{ 20 | Perform poisson regression: exprs ~ peak + covariates 21 | } 22 | -------------------------------------------------------------------------------- /man/basic_p.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SCENTfunctions.R 3 | \name{basic_p} 4 | \alias{basic_p} 5 | \title{Derive a p-value from a vector of bootstrap samples using the "basic" calculation} 6 | \usage{ 7 | basic_p(obs, boot, null = 0) 8 | } 9 | \arguments{ 10 | \item{obs}{observed value of parameter (using actual data)} 11 | 12 | \item{boot}{vector of bootstraps} 13 | } 14 | \value{ 15 | p-value 16 | } 17 | \description{ 18 | Derive a p-value from a vector of bootstrap samples using the "basic" calculation 19 | } 20 | -------------------------------------------------------------------------------- /man/check_dimensions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SCENTfunctions.R 3 | \name{check_dimensions} 4 | \alias{check_dimensions} 5 | \title{Validity and Type Checking for CreateSCENTObject Constructor} 6 | \usage{ 7 | check_dimensions(object) 8 | } 9 | \arguments{ 10 | \item{object}{SCENT object constructed from class CreateSCENTObject} 11 | } 12 | \value{ 13 | None OR Errors dependent on if the object follows the guidelines for SCENT 14 | RNA: matrix of (genes x cells) 15 | ATAC: matrix of (peaks x cells) 16 | } 17 | \description{ 18 | Validity and Type Checking for CreateSCENTObject Constructor 19 | } 20 | -------------------------------------------------------------------------------- /man/interp_pval.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SCENTfunctions.R 3 | \name{interp_pval} 4 | \alias{interp_pval} 5 | \title{Interpolate a p-value from quantiles that should be "null scaled"} 6 | \usage{ 7 | interp_pval(q) 8 | } 9 | \arguments{ 10 | \item{q}{bootstrap quantiles, centered so that under the null, theta = 0} 11 | } 12 | \value{ 13 | two-sided p-value 14 | } 15 | \description{ 16 | Interpolate a p-value from quantiles that should be "null scaled" 17 | } 18 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | Output/ 4 | RData/ 5 | temporary_atac_peak_intersected.bed.gz 6 | -------------------------------------------------------------------------------- /vignettes/SCENT_interactive.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "SCENT_interactive" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{SCENT_interactive} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r, include = FALSE} 11 | knitr::opts_chunk$set( 12 | collapse = TRUE, 13 | comment = "#>" 14 | ) 15 | ``` 16 | 17 | ## Load Package 18 | 19 | ```{r setup} 20 | library(SCENT) 21 | ``` 22 | 23 | ## Load Inputs 24 | 25 | ```{r} 26 | #Initialize directories: (Example) 27 | input_atac <- "./RData/Data/pbmc_multimodal.atac.rds" 28 | input_mrna <- "./RData/Data/pbmc_multimodal.rna.rds" 29 | input_meta <- "./RData/Data/pbmc_multimodal.meta.rds" 30 | input_gene_peak <- "./RData/Data/qced_Tnk.G2P.txt" 31 | output <- "./Output/test_output.txt" 32 | 33 | options(stringsAsFactors = F) 34 | 35 | #Read-in Necessary Files: 36 | atac <- readRDS(input_atac) 37 | mrna <- readRDS(input_mrna) 38 | meta <- readRDS(input_meta) 39 | gene_peak <- read.table(input_gene_peak) 40 | colnames(gene_peak) <- c("gene","peak") 41 | 42 | ``` 43 | 44 | ## SCENT Object 45 | 46 | ```{r SCENT} 47 | ##Using the SCENT Object: 48 | SCENT_obj <- CreateSCENTObj(rna = mrna, atac = atac, meta.data = meta, 49 | peak.info = gene_peak, 50 | covariates = c("log(nCount_RNA)","percent.mito"), 51 | celltypes = "newCT") 52 | 53 | ##Example Outputs of the SCENT Object 54 | head(SCENT_obj@rna[1:10,1:2]) 55 | head(SCENT_obj@atac[1:10,1:2]) 56 | head(SCENT_obj@meta.data) 57 | head(SCENT_obj@peak.info) 58 | str(SCENT_obj) 59 | ``` 60 | 61 | ## SCENT Algorithm: Obtain small list of gene-peak pairs. 62 | 63 | ```{r gene_peak} 64 | #Of the set of peak gene pairs: pick a set of pairs to test: 65 | #Example: (first 10 gene-peak pairs) 66 | SCENT_obj@peak.info <- SCENT_obj@peak.info[1:10,] 67 | head(SCENT_obj@peak.info) 68 | ``` 69 | ## SCENT Algorithm: Options for Regression w/ Bootstrapping. 70 | 71 | ```{r gene_peak} 72 | #Run SCENT algorithm of Tnk cell type and use 6 cores for parallelization: 73 | 74 | 75 | #Default: Poisson regression and Binarized ATAC counts 76 | SCENT_obj_ver1 <- SCENT_algorithm(SCENT_obj, "Tnk", 6) 77 | # By default settings the above will perform parallelizations using Poisson regression and Binarized counts. 78 | 79 | #Option 1: Poisson regression and Non-Binarized ATAC counts 80 | SCENT_obj_ver2 <- SCENT_algorithm(SCENT_obj, "Tnk", 6, regr = "poisson", bin = FALSE) 81 | 82 | #Option 2: Negative Binomial regression and Binarized ATAC counts 83 | SCENT_obj_ver3 <- SCENT_algorithm(SCENT_obj, "Tnk", 6, regr = "negbin", bin = TRUE) 84 | 85 | #Option 3: Negative Binomial regression and Non-Binarized ATAC counts 86 | SCENT_obj_ver4 <- SCENT_algorithm(SCENT_obj, "Tnk", 6, regr = "negbin", bin = FALSE) 87 | 88 | ``` 89 | 90 | ## Output of SCENT Algorithm 91 | 92 | ```{r SCENT_algo} 93 | head(SCENT_obj_ver1@SCENT.result) 94 | head(SCENT_obj_ver2@SCENT.result) 95 | head(SCENT_obj_ver3@SCENT.result) 96 | head(SCENT_obj_ver4@SCENT.result) 97 | ``` 98 | 99 | ``` 100 | 101 | -------------------------------------------------------------------------------- /vignettes/SCENT_parallelize.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Parallelization of SCENT" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{Parallelization of SCENT} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r, include = FALSE} 11 | knitr::opts_chunk$set( 12 | collapse = TRUE, 13 | comment = "#>" 14 | ) 15 | ``` 16 | 17 | ## Load Package 18 | 19 | ```{r setup} 20 | library(SCENT) 21 | ``` 22 | 23 | ## Data Inputs 24 | 25 | ```{r inputs} 26 | #Initialize directories: (Example) 27 | input_atac <- "./RData/Data/pbmc_multimodal.atac.rds" 28 | input_mrna <- "./RData/Data/pbmc_multimodal.rna.rds" 29 | input_meta <- "./RData/Data/pbmc_multimodal.meta.rds" 30 | input_gene_peak <- "./RData/Data/qced_Tnk.G2P.txt" 31 | output_rds <- "./Output/SCENT_obj.rds" 32 | ####Obtaining Gene-Peak Pairs using defined bed file 33 | genebed_loc <- "./RData/Data/GeneBody_500kb_margin_chr.bed" 34 | 35 | 36 | options(stringsAsFactors = F) 37 | 38 | #Read-in Necessary Files: 39 | atac <- readRDS(input_atac) 40 | mrna <- readRDS(input_mrna) 41 | meta <- readRDS(input_meta) 42 | gene_peak <- read.table(input_gene_peak) 43 | colnames(gene_peak) <- c("gene","peak") 44 | ``` 45 | 46 | 47 | 48 | ## SCENT Object 49 | 50 | ```{r SCENT} 51 | ####Using the SCENT Object: 52 | SCENT_obj <- CreateSCENTObj(rna = mrna, atac = atac, meta.data = meta, 53 | covariates = c("log(nCount_RNA)","percent.mito"), 54 | celltypes = "newCT") 55 | str(SCENT_obj) 56 | ``` 57 | 58 | ## Make Gene-Peak Pair Lists 59 | 60 | ```{r gene_peak_list} 61 | SCENT_obj <- CreatePeakToGeneList(SCENT_obj, genebed = genebed_loc, 62 | nbatch = 1000,tmpfile="./temporary_atac_peak.bed", 63 | intersectedfile="./temporary_atac_peak_intersected.bed.gz") 64 | str(SCENT_obj, max.level = 2) 65 | ``` 66 | 67 | ##Save the SCENT obj for parallelized jobs on the cluster to get SCENT results. 68 | 69 | ```{r SCENT_obj_save} 70 | saveRDS(SCENT_obj, file = output_rds) #Takes a couple minutes. 71 | ``` 72 | 73 | ## Use the saved SCENT_obj to run a parallelized bash script: Located in "Parallelized Bash Script" Folder 74 | 75 | ``` 76 | --------------------------------------------------------------------------------