├── .Rbuildignore
├── .gitignore
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── Parallelized Bash Script
    ├── .DS_Store
    ├── SCENT_parallelization.R
    └── parallelizedSCENT.sh
├── R
    ├── .gitignore
    ├── SCENTfunctions.R
    └── import_packages.R
├── README.md
├── SCENT.Rproj
├── data
    └── GeneBody_500kb_margin.bed
├── fig
    ├── .DS_Store
    ├── cover_image.png
    └── cover_image2.png
├── man
    ├── CreatePeakToGeneList.Rd
    ├── SCENT-class.Rd
    ├── SCENT_algorithm.Rd
    ├── assoc_negbin.Rd
    ├── assoc_poisson.Rd
    ├── basic_p.Rd
    ├── check_dimensions.Rd
    └── interp_pval.Rd
└── vignettes
    ├── .gitignore
    ├── SCENT_interactive.Rmd
    └── SCENT_parallelize.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | .Ruserdata
 5 | inst/doc
 6 | improvements
 7 | READMEupdate.md
 8 | RData/
 9 | .DS_Store
10 | .DS_Store
11 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: SCENT
 2 | Type: Package
 3 | Title: Single-Cell ENhancer Target (SCENT) gene mapping for single cell multimodal data 
 4 | Version: 1.0.1
 5 | Author: Saori Sakaue and Shakson Isaac
 6 | Maintainer: Shakson Isaac <shakson_isaac@g.harvard.edu>
 7 | Description: R package that contains functions for the SCENT algorithm. SCENT uses 
 8 |     single-cell multimodal data (e.g., 10X Multiome RNA/ATAC) and links 
 9 |     ATAC-seq peaks (putative enhancers) to their target genes by modeling association
10 |     between chromatin accessibility and gene expression across individual single cells.
11 | Depends: R (>= 3.5.0)
12 | Imports:
13 |     methods, 
14 |     Hmisc, 
15 |     R.utils, 
16 |     data.table, 
17 |     lme4, 
18 |     stringr,
19 |     boot, 
20 |     MASS, 
21 |     Matrix, 
22 |     parallel
23 | Suggests:
24 |     knitr, 
25 |     rmarkdown
26 | SystemRequirements:
27 |     bedtools (https://github.com/arq5x/bedtools2)
28 | License: MIT + file LICENSE
29 | Encoding: UTF-8
30 | LazyData: true
31 | RoxygenNote: 7.2.3
32 | VignetteBuilder: knitr
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2022
2 | COPYRIGHT HOLDER: Raychaudhuri Lab
3 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(CreatePeakToGeneList)
 4 | export(CreateSCENTObj)
 5 | export(SCENT_algorithm)
 6 | export(assoc_negbin)
 7 | export(assoc_poisson)
 8 | export(basic_p)
 9 | export(check_dimensions)
10 | export(interp_pval)
11 | exportClasses(SCENT)
12 | import(Hmisc)
13 | import(MASS)
14 | import(Matrix)
15 | import(R.utils)
16 | import(boot)
17 | import(data.table)
18 | import(lme4)
19 | import(methods)
20 | import(parallel)
21 | import(stringr)
22 | importFrom(stats,as.formula)
23 | importFrom(stats,coef)
24 | importFrom(stats,glm)
25 | importFrom(stats,vcov)
26 | importFrom(utils,write.table)
27 | 


--------------------------------------------------------------------------------
/Parallelized Bash Script/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/immunogenomics/SCENT/e80b5ba6b445f972c7fe28fb41e24ef4f5b2e373/Parallelized Bash Script/.DS_Store


--------------------------------------------------------------------------------
/Parallelized Bash Script/SCENT_parallelization.R:
--------------------------------------------------------------------------------
 1 | #Libraries to Load:
 2 | library(SCENT)
 3 | 
 4 | ####### INPUTS
 5 | #Obtain arguments: (from Cluster)
 6 | node = as.integer(commandArgs(trailingOnly = T)[1]) # integer. JOB ARRAY number: node usage
 7 | cores = as.integer(commandArgs(trailingOnly = T)[2]) # integer. Number of Cores
 8 | SCENTobj_rds = commandArgs(trailingOnly = T)[3] # character. RDS object file type
 9 | celltype = commandArgs(trailingOnly = T)[4] # character. CellType
10 | regr = commandArgs(trailingOnly = T)[5] # character. Regression Type
11 | bin = as.logical(commandArgs(trailingOnly = T)[6]) # logical. Binarize ATAC counts
12 | output_dir  = commandArgs(trailingOnly = T)[7] # character. Output of each text file to a specific folder
13 | 
14 | ###Example of inputs from the bash script: parallelizedSCENT.sh
15 | # node <- 1
16 | # cores <- 6
17 | # celltype <- "Tnk"
18 | # SCENTobj_rds <- "./Testing/Output/SCENT_obj.rds"
19 | # output_dir <- "./Testing/Output/"
20 | 
21 | 
22 | #### Load:
23 | SCENT_obj <- readRDS(SCENTobj_rds)
24 | 
25 | #### Get the corresponding dataframe from the list:
26 | SCENT_obj@peak.info <- SCENT_obj@peak.info.list[[node]]
27 | 
28 | #### Run SCENT algorithm of Tnk cell type and use 6 cores for parallelization:
29 | SCENT_obj <- SCENT_algorithm(SCENT_obj, celltype, cores, regr, bin)
30 | 
31 | #### Output SCENT results for each gene-peak pair block.
32 | filename <- paste0(output_dir,"/SCENTresult_",node,".txt")
33 | 
34 | write.table(SCENT_obj@SCENT.result, file = filename, row.names = F, col.names = T, quote = F)
35 | 


--------------------------------------------------------------------------------
/Parallelized Bash Script/parallelizedSCENT.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #BSUB -J SCENT[1-100] #Number of job arrays dependent on number of peak-gene pair batches
 4 | #BSUB -q big     #node for multi-parallelized threading and nodes
 5 | #BSUB -M 18000   #18 GB
 6 | #BSUB -n 6       #6 cores
 7 | #BSUB -o Output_%J_%I.out  #output file %J is job %I is job array index
 8 | #BSUB -e Error_%J_%I.err   #error file %J is job %I is job array index
 9 | 
10 | 
11 | module load R
12 | Rscript SCENT_parallelization.R $LSB_JOBINDEX ${num_cores} ${file_SCENT_obj} ${celltype} ${regr} ${bin} ${output_dir}
13 | 
14 | 


--------------------------------------------------------------------------------
/R/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | .Ruserdata
 5 | inst/doc
 6 | improvements
 7 | READMEupdate.md
 8 | RData/
 9 | .DS_Store
10 | 


--------------------------------------------------------------------------------
/R/SCENTfunctions.R:
--------------------------------------------------------------------------------
  1 | ## define functions
  2 | #' Interpolate a p-value from quantiles that should be "null scaled"
  3 | #'
  4 | #' @param q bootstrap quantiles, centered so that under the null, theta = 0
  5 | #' @return two-sided p-value
  6 | #' @export
  7 | interp_pval = function(q) {
  8 |   R = length(q)
  9 |   tstar = sort(q)
 10 |   zero = findInterval(0, tstar)
 11 |   if(zero == 0 || zero == R) return(2/R) # at/beyond extreme values
 12 |   pval = 2*min(zero/R, (R-zero)/R)
 13 |   pval
 14 | }
 15 | 
 16 | 
 17 | #' Derive a p-value from a vector of bootstrap samples using the "basic" calculation
 18 | #'
 19 | #' @param obs observed value of parameter (using actual data)
 20 | #' @param boot vector of bootstraps
 21 | #'
 22 | #' @return p-value
 23 | #' @export
 24 | basic_p = function(obs, boot, null = 0){
 25 |   interp_pval(2*obs - boot - null)
 26 | }
 27 | 
 28 | 
 29 | #' Perform poisson regression: exprs ~ peak + covariates
 30 | #'
 31 | #' @param data contains expr values and associated peak and covariates for a gene.
 32 | #' @param idx rows of the data to use: argument for boot function (bootstrapping)
 33 | #' @param formula user defined formula based on initialization in CreateSCENTObj Constructor
 34 | #'
 35 | #' @return vector: (coefficient of the peak effect on gene, variance of peak effect on gene)
 36 | #' @export
 37 | assoc_poisson = function(data, idx = seq_len(nrow(data)), formula){
 38 |   gg = glm(formula, family = 'poisson', data = data[idx,,drop = FALSE])
 39 |   c(coef(gg)['atac'], diag(vcov(gg))['atac'])
 40 | }
 41 | 
 42 | 
 43 | #' Perform negative binomial regression: exprs ~ peak + covariates
 44 | #'
 45 | #' @param data contains expr values and associated peak and covariates for a gene.
 46 | #' @param idx rows of the data to use: argument for boot function (bootstrapping)
 47 | #' @param formula user defined formula based on initialization in CreateSCENTObj Constructor
 48 | #'
 49 | #' @return vector: (coefficient of the peak effect on gene, variance of peak effect on gene)
 50 | #' @export
 51 | assoc_negbin = function(data, idx = seq_len(nrow(data)), formula){
 52 |   gg = glm.nb(formula, data = data[idx,,drop = FALSE])
 53 |   c(coef(gg)['atac'], diag(vcov(gg))['atac'])
 54 | }
 55 | 
 56 | 
 57 | 
 58 | #' Validity and Type Checking for CreateSCENTObject Constructor
 59 | #'
 60 | #' @param object SCENT object constructed from class CreateSCENTObject
 61 | #'
 62 | #' @return None OR Errors dependent on if the object follows the guidelines for SCENT
 63 | #' RNA: matrix of (genes x cells)
 64 | #' ATAC: matrix of (peaks x cells)
 65 | #' @export
 66 | check_dimensions <- function(object){
 67 |   errors <- character()
 68 | 
 69 |   #Check dimensionality of cells:
 70 |   num_cells_rna <- lengths(object@rna@Dimnames)[2]
 71 |   num_cells_atac <- lengths(object@atac@Dimnames)[2]
 72 | 
 73 |   num_genes <- lengths(object@rna@Dimnames)[1]
 74 |   num_peaks <- lengths(object@atac@Dimnames)[1]
 75 | 
 76 |   #Check if the number of cells match between rna and atac matrix.
 77 |   if(num_cells_rna != num_cells_atac){
 78 |     msg <- paste("Error: The num of cells in scRNA matrix is: ", num_cells_rna,
 79 |                  " and the num of cells in scATAC matrix is: ", num_cells_atac,
 80 |                  ". These should EQUAL EACH OTHER, please check to make sure",
 81 |                  " both matrices for scRNA and scATAC are read in as",
 82 |                  " (genes x cells) and (peaks x cells), respectively. ")
 83 |     errors <- c(errors, msg)
 84 |   }
 85 | 
 86 | 
 87 |   #Most likely the number of peaks is greater than the number of genes if not WARN.
 88 |   if(num_peaks < num_genes){
 89 |     warning(paste("Warning: in general there are more peaks found through ATAC",
 90 |                    " than genes. Currently you have number of peaks =", num_peaks,
 91 |                    " and number of genes =",num_genes))
 92 |   }
 93 | 
 94 |   #If peak.info is present check the following:
 95 |   if(!(length(object@peak.info) == 0)){
 96 |     #Check if genes correspond between rna matrix and peak.info dataframe:
 97 |     if(!all(object@peak.info[[1]] %in% object@rna@Dimnames[[1]])){
 98 |       msg <- paste("The gene names in the peak.info dataframe is NOT a subset of the gene names in",
 99 |                    " the scRNA matrix")
100 |       errors <- c(errors, msg)
101 |     }
102 | 
103 | 
104 |     #Check if peaks correspond between atac matrix and peak.info dataframe:
105 |     if(!all(object@peak.info[[2]] %in% object@atac@Dimnames[[1]])){
106 |       msg <- paste("The peak ids in the peak.info dataframe is NOT a subset of the peak names in",
107 |                    " the scATAC matrix")
108 |       errors <- c(errors, msg)
109 |     }
110 |   }
111 | 
112 | 
113 |   ###Additional things to check:
114 |   #Check if meta.data table with covariates has the correct cell column names
115 |   #Check if covariates are in the columns of meta.data
116 |   if (length(errors) == 0) TRUE else errors
117 | }
118 | 
119 | 
120 | 
121 | #' SCENT Class Constructor
122 | #'
123 | #' @slot rna dgCMatrix. scRNAseq matrix read as a sparse matrix
124 | #' @slot atac dgCMatrix. scATACseq matrix read as a sparse matrix
125 | #' @slot meta.data data.frame. Metadata table with covariates and a cell ID column ("cell")
126 | #' @slot peak.info data.frame. Dataframe that contains gene-peak pairs for SCENT to search through
127 | #' @slot peak.info.list list. List of dataframes that contain gene-peak pairs to parallelize through
128 | #' @slot covariates character. Assign covariates that are needed for the analysis. Must be names that are in the columns of meta.data
129 | #' @slot celltypes character. Assign celltype column from meta.data
130 | #' @slot SCENT.result data.frame. Initialized as empty. Becomes a table of resultant significant gene peak pairs
131 | #'
132 | #' @return SCENT object to use for further analysis
133 | #' @export
134 | CreateSCENTObj <- setClass(
135 |   Class = "SCENT",
136 |   slots = c(
137 |     rna = 'dgCMatrix',
138 |     atac = 'dgCMatrix',
139 |     meta.data = 'data.frame',
140 |     peak.info = 'data.frame',  ###Must be gene (1st column) then peak (2nd column)
141 |     peak.info.list = 'list',
142 |     covariates = 'character',
143 |     celltypes = 'character',
144 |     SCENT.result = 'data.frame'
145 |   ),
146 |   validity = check_dimensions
147 | )
148 | 
149 | #' SCENT Algorithm: Poisson Regression with Empirical P-values through Bootstrapping.
150 | #'
151 | #' @param object SCENT object
152 | #' @param celltype character. User specified cell type defined in celltypes column of meta.data
153 | #' @param ncores numeric. Number of cores to use for Parallelization
154 | #' @param regr character. Regression type: "poisson" or "negbin" for Poisson regression and Negative Binomial regression, respectively
155 | #' @param bin logical. TRUE to binarize ATAC counts. FALSE to NOT binarize ATAC counts
156 | #'
157 | #' @return SCENT object with updated field SCENT.results
158 | #' @export
159 | SCENT_algorithm <- function(object, celltype, ncores, regr = "poisson", bin = TRUE){
160 |   res <- data.frame()
161 |   for (n in 1:nrow(object@peak.info)){ ####c(1:nrow(chunkinfo))
162 |     gene <- object@peak.info[n,1] #GENE is FIRST COLUMN OF PEAK.INFO
163 |     this_peak <- object@peak.info[n,2] #PEAK is SECOND COLUMN OF PEAK.INFO
164 |     atac_target <- data.frame(cell = colnames(object@atac), atac = object@atac[this_peak,])
165 | 
166 | 
167 |     #binarize peaks:
168 |     if(bin){
169 |       if(nrow(atac_target[atac_target$atac>0,])>0){
170 |         atac_target[atac_target$atac>0,]$atac<-1
171 |       }
172 |     }
173 | 
174 |     mrna_target <- object@rna[gene,]
175 |     df <- data.frame(cell=names(mrna_target),exprs=as.numeric(mrna_target))
176 |     df<-merge(df,atac_target,by="cell")
177 |     df<-merge(df,object@meta.data,by="cell")
178 | 
179 |     df2 <- df[df[[object@celltypes]] == celltype,]
180 | 
181 |     nonzero_m  <- length( df2$exprs[ df2$exprs > 0] ) / length( df2$exprs )
182 |     nonzero_a  <- length( df2$atac[ df2$atac > 0] ) / length( df2$atac )
183 |     if(nonzero_m > 0.05 & nonzero_a > 0.05){
184 |       #Run Regression Once Before Bootstrapping:
185 |       res_var <- "exprs"
186 |       pred_var <- c("atac", object@covariates)
187 |       formula <- as.formula(paste(res_var, paste(pred_var, collapse = "+"), sep = "~"))
188 | 
189 | 
190 |       #Estimated Coefficients Obtained without Bootstrapping:
191 |       if(regr == "poisson"){
192 |         base = glm(formula, family = 'poisson', data = df2)
193 |         coefs<-summary(base)$coefficients["atac",]
194 |         assoc <- assoc_poisson
195 |       } else if (regr == "negbin"){
196 |         base = glm.nb(formula, data = df2)
197 |         coefs<-summary(base)$coefficients["atac",]
198 |         assoc <- assoc_negbin
199 |       }
200 | 
201 |       ###Iterative Bootstrapping Procedure: Estimate the Beta coefficients and associate a 2-sided p-value.
202 |       bs = boot::boot(df2,assoc, R = 100, formula = formula, stype = 'i', parallel = "multicore", ncpus = ncores)
203 |       p0 = basic_p(bs$t0[1], bs$t[,1])
204 |       if(p0<0.1){
205 |         bs = boot::boot(df2,assoc, R = 500, formula = formula,  stype = 'i', parallel = "multicore", ncpus = ncores)
206 |         p0 = basic_p(bs$t0[1], bs$t[,1])
207 |       }
208 |       if(p0<0.05){
209 |         bs = boot::boot(df2,assoc, R = 2500, formula = formula,  stype = 'i', parallel = "multicore", ncpus = ncores)
210 |         p0 = basic_p(bs$t0[1], bs$t[,1])
211 |       }
212 |       if(p0<0.01){
213 |         bs = boot::boot(df2,assoc, R = 25000, formula = formula,  stype = 'i', parallel = "multicore", ncpus = ncores)
214 |         p0 = basic_p(bs$t0[1], bs$t[,1])
215 |       }
216 |       if(p0<0.001){
217 |         bs = boot::boot(df2,assoc, R = 50000, formula = formula, stype = 'i', parallel = "multicore", ncpus = ncores)
218 |         p0 = basic_p(bs$t0[1], bs$t[,1])
219 |       }
220 |       out <- data.frame(gene=gene,peak=this_peak,beta=coefs[1],se=coefs[2],z=coefs[3],p=coefs[4],boot_basic_p=p0)
221 |       res<-rbind(res,out)
222 |     }
223 |   }
224 | 
225 |   #Update the SCENT.result field of the constructor in R:
226 |   object@SCENT.result <- res
227 |   return(object)
228 | }
229 | 
230 | 
231 | 
232 | #' Creating Cis Gene-Peak Pair Lists to Parallelize Through
233 | #'
234 | #' @param object SCENT object
235 | #' @param genebed character. File directory for bed file that contains 500 kb windows for each gene
236 | #' @param nbatch numeric. Number of batches to produce: Length of the list
237 | #' @param tmpfile character. Location of temporary file.
238 | #' @param intersectedfile character. Location of intersected file.
239 | #'
240 | #' @return SCENT object with updated field of peak.info.list
241 | #' @export
242 | CreatePeakToGeneList <- function(object,genebed="/path/to/GeneBody_500kb_margin.bed",nbatch,tmpfile="./temporary_atac_peak.bed",intersectedfile="./temporary_atac_peak_intersected.bed.gz"){
243 |   peaknames <- rownames(object@atac) # peak by cell matrix
244 |   peaknames_r <- gsub(":","-",peaknames) # in case separator included ":"
245 |   peaknames_r <- gsub("_","-",peaknames_r) # in case separator included "_"
246 |   peak_bed <- data.frame(chr = str_split_fixed(peaknames_r,"-",3)[,1], start = str_split_fixed(peaknames_r,"-",3)[,2], end = str_split_fixed(peaknames_r,"-",3)[,3], peak=peaknames)
247 |   write.table(peak_bed,tmpfile,quote=F,row=F,col=F,sep="\t")
248 |   system(paste("bedtools intersect -a",genebed,"-b ",tmpfile, " -wa -wb -loj | gzip -c >", intersectedfile))
249 |   system(paste("rm ", tmpfile))
250 |   d <- fread(intersectedfile,sep="\t")
251 |   d<-data.frame(d)
252 |   d <- d[d$V5 != ".",]
253 | 
254 |   #Obtain gene to peak pairs.
255 |   cis.g2p <- d[c("V4","V8")]
256 |   colnames(cis.g2p) <- c("gene","peak")
257 |   genes_in_rna <- rownames(object@rna) # gene by cell matrix
258 |   cis.g2p <- cis.g2p[cis.g2p$gene %in% genes_in_rna,] # make sure g2p genes are all included in rna matrix
259 | 
260 |   cis.g2p$index <- 1:nrow(cis.g2p)
261 |   cis.g2p$batch_index <- cut2(cis.g2p$index, g = nbatch, levels.mean = TRUE)
262 |   cis.g2p_list <- split(cis.g2p, f = cis.g2p$batch_index)
263 |   cis.g2p_list <- lapply(cis.g2p_list, function(x) x[(names(x) %in% c("peak", "gene"))])
264 |   names(cis.g2p_list) <- 1:length(cis.g2p_list)
265 |   # Update the SCENT.peak.info field of the constructor in R:
266 |   object@peak.info.list <- cis.g2p_list
267 |   return(object)
268 | }
269 | 
270 | 
271 | 
272 | 
273 | 


--------------------------------------------------------------------------------
/R/import_packages.R:
--------------------------------------------------------------------------------
1 | #' @import methods Hmisc R.utils data.table stringr
2 | #' @import lme4 boot MASS parallel
3 | #' @import Matrix
4 | #' @importFrom stats as.formula coef glm vcov
5 | #' @importFrom utils write.table
6 | NULL
7 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | # SCENT
  4 | 
  5 | Single-Cell ENhancer Target gene mapping using multimodal data with ATAC + RNA
  6 | 
  7 | The manuscript is now publised in *Nature Genetics*! (Sakaue et al. ["**Tissue-specific enhancer-gene maps from multimodal single-cell data identify causal disease alleles**"](https://www.nature.com/articles/s41588-024-01682-1))
  8 | 
  9 | 
 10 | 
 11 | ### Overview
 12 | 
 13 | SCENT uses single-cell multimodal data (e.g., 10X Multiome RNA/ATAC) and links ATAC-seq peaks (putative enhancers) to their target genes by modeling association between chromatin accessibility and gene expression across individual single cells.
 14 | 
 15 | <div align="center">
 16 | <img src="https://raw.githubusercontent.com/immunogenomics/SCENT/main/fig/cover_image2.png" width=90%>
 17 | </div>
 18 | 
 19 | 
 20 | 
 21 | We use Poisson regression to associate gene expression (raw) count and (binarized) peak accessibility, and estimate errors in coefficients by bootstrapping framework to control for type I error.
 22 | 
 23 | 
 24 | ### Release notes
 25 | 
 26 | - **v1.0.1**: Aug 2024, bug fix in parallelization scripts in `Parallelized Bash Script` folder
 27 | - **v1.0.0**: Jan 2024, first official release
 28 | 
 29 | ### Installation of SCENT Package
 30 | 
 31 | You can install the development version of SCENT from [GitHub](https://github.com/) with:
 32 | 
 33 | ``` r
 34 | # install.packages("devtools")
 35 | devtools::install_github("immunogenomics/SCENT")
 36 | ```
 37 | 
 38 | 
 39 | ### Requirements
 40 | 
 41 | The SCENT package will automatically install CRAN R packages. The packages below will go into your `R`.
 42 | 
 43 | - `methods`
 44 | - `data.table`
 45 | - `lme4`
 46 | - `stringr`
 47 | - `boot`
 48 | - `MASS`
 49 | - `Matrix`
 50 | - `parallel`
 51 | 
 52 | The SCENT package also requires command-line tool, bedtools, for developing a list of: gene-peak pair dataframes to parallelize through.
 53 | - `https://github.com/arq5x/bedtools2`
 54 | 
 55 | 
 56 | ### Example usage
 57 | 
 58 | Vignettes are posted in this github repo to show 2 potential uses of the SCENT package.
 59 | 
 60 | ### 1.) Using SCENT interactively for testing small sets of gene-peak associations
 61 | 
 62 | `SCENT_interactive.Rmd` vignette contains an example of using the SCENT package to generate results on small sets of gene-peak associations. 
 63 | 
 64 | In summary, the main functionality is the SCENT object construction:
 65 | 
 66 | ```r
 67 | library(SCENT)
 68 | 
 69 | SCENT_obj <- CreateSCENTObj(rna = mrna, atac = atac, meta.data = meta,
 70 |                             peak.info = gene_peak,
 71 |                             covariates = c("log(nUMI)","percent.mito","sample", "batch"), 
 72 |                             celltypes = "celltype")
 73 | ```
 74 | 
 75 | Followed by SCENT algorithm:
 76 | 
 77 | ```r
 78 | SCENT_obj <- SCENT_algorithm(object = SCENT_obj, celltype = "Tcell", ncores = 6, regr = 'poisson', bin = TRUE)
 79 | ```
 80 | The user specifies a `celltype` (in this case “Tcell”) for association analysis (in `meta.data` slot in SCENT object), `ncores` for the number of cores for parallelized bootstrapping, `regr` for the regression type (Poisson ‘poisson’ or Negative Binomial ‘negbin’ regression), and `bin` for whether to binarize ATAC counts (TRUE for binarization or FALSE for not).
 81 | 
 82 | The output of the SCENT algorithm will be contained in the field:
 83 | ```r
 84 | SCENT_obj@SCENT.result
 85 | ```
 86 | which can be saved as a textfile for further downstream analysis.
 87 | 
 88 | 
 89 | Further information on Inputs and Outputs of SCENT are detailed below:
 90 | 
 91 | #### Arguments To `CreateSCENTObj`:
 92 | 
 93 | | #    | Argument name (format)       | Descriptions                                                 |
 94 | | ---- | ---------------------------- | ------------------------------------------------------------ |
 95 | | 1    | rna (sparse matrix) | A gene-by-cell count matrix from multimodal RNA-seq data. This is a raw count matrix without any normalization. The row names should be the gene names used in the `peak.info` file. The column names are the cell names which should be the same names used in the `cell`column of the dataframe specified for `meta.data`. Sparse matrix format is required. |
 96 | | 2    | atac (sparse matrix) | A peak-by-cell count matrix from multimodal ATAC-seq data. This is a raw count matrix without any normalization. The row names should be the peak names used in the `peak.info` file. The column names are the cell names which should be the same names used in `rna` and the `cell`column of dataframe specified for `meta.data`. The matrix may not be binarized while it will be binarized within the function. Sparse matrix format is required. |
 97 | | 3    | meta.data (dataframe)     | A meta data frame for cells (rows are cells, and **cell names should be in the column named as "cell"**; see below example). Additionally, this text should include covariates to use in the model. Examples include: % mitochondrial reads, log(nUMI), sample, and batch as covariates. Dataframe format is required. |
 98 | | 4    | peak.info (dataframe) | A table with two columns indicating which gene-peak pairs you want to test in this chunk (see below example) **genes should be in the 1st column and peaks in the 2nd column**. We highly recommend splitting gene-peak pairs into many chunks to increase computational efficiency (See Parallelized Jobs Info in Section 2). List(Dataframe) format which is a list of multiple data frames for parallelization is required. \* |
 99 | | 5    | covariates (a vector of character) | A vector of character fields that denote the covariates listed in the meta.data. For example, a set of covariates can be: %mitochondrial reads, log_nUMI, sample, and batch. Additionally the user can specify transformations to the covariates such as log transformation on nUMI counts for direct usage in the SCENT algorithm invoking poisson glm. **We recommend users to at least use log(number_of_total_RNA_UMI_count_per_cell) as the base model is Poisson regression and we do not include the offset term into the default model.** |
100 | | 6    | celltypes (character)        | User specified naming of the celltype column in the meta.data file. This column should contain the names of the celltypes you want to test in this association analysis. |
101 | 
102 | \* Extra Argument: The peak.info.list field can be left blank initially and a created List(Dataframe) can be constructed using the CreatePeakToGeneList function in the SCENT package. This function requires the user to specify a bed file that specifies ~500 kb windows of multiple gene loci to identify cis gene-peak pairs to test. The vignette, SCENT_parallelize.Rmd, will show steps to produce a SCENT object with a peak.info.list field that is used for parallelization in the SCENT_parallelization.R script.
103 | 
104 | 
105 | 
106 | #### Example Formats: 
107 | The example format of  `peak.info` argument:
108 | 
109 | ```bash
110 | > gene_peak <- read.table("/path/to/your_gene_peak_text_file.txt")
111 | > head(gene_peak)
112 | 
113 |     V1                      V2
114 | 1 A1BG chr19-57849279-57850722
115 | 2 A1BG chr19-57888160-57889279
116 | 3 A1BG chr19-57915851-57917093
117 | 4 A1BG chr19-57934422-57935603
118 | 5 A1BG chr19-57946848-57948062
119 | ```
120 | 
121 | We usually only select peaks of which the center falls within 500 kb from the target gene (*cis* analysis). Also, while we have a function to QC peaks and genes so that they are present in at least 5% of all cells within `SCENT.R`, **it is more efficient to only include these QCed peaks and genes in  `peak.info`  to reduce the number of tests**.
122 | 
123 | 
124 | The example format of  `meta.data` argument:
125 | 
126 | ```r
127 | meta <- readRDS(metafile)
128 | meta$`log(nUMI)` <- log(meta$nUMI)
129 | head(meta)
130 | 
131 |                                  cell nUMI percent.mito   sample   batch
132 | AAACAGCCAAGGAATC-1 AAACAGCCAAGGAATC-1 8380   0.01503428 sample_1 batch_a
133 | AAACAGCCAATCCCTT-1 AAACAGCCAATCCCTT-1 3771   0.02207505 sample_1 batch_a
134 | AAACAGCCAATGCGCT-1 AAACAGCCAATGCGCT-1 6876   0.01435579 sample_1 batch_a
135 | AAACAGCCACACTAAT-1 AAACAGCCACACTAAT-1 1733   0.03881841 sample_1 batch_a
136 | AAACAGCCACCAACCG-1 AAACAGCCACCAACCG-1 5415   0.01600768 sample_1 batch_a
137 | AAACAGCCAGGATAAC-1 AAACAGCCAGGATAAC-1 2759   0.02485340 sample_1 batch_a
138 |                    celltype  log(nUMI)
139 | AAACAGCCAAGGAATC-1    Tcell   9.033603
140 | AAACAGCCAATCCCTT-1    Tcell   8.235095
141 | AAACAGCCAATGCGCT-1    Tcell   8.835792
142 | AAACAGCCACACTAAT-1    Tcell   7.457609
143 | AAACAGCCACCAACCG-1    Tcell   8.596928
144 | AAACAGCCAGGATAAC-1    Tcell   7.922624
145 | ```
146 | 
147 | 
148 | #### Output of SCENT (`SCENT.result` slot)
149 | 
150 | ```bash
151 | > head(SCENT_obj@SCENT.result)
152 | gene	peak	beta	se	z	p	boot_basic_p
153 | A1BG	chr19-57849279-57850722	0.587060911718621	0.227961010352348	2.57526894977009	0.0100162168431262	0.0192
154 | A1BG	chr19-57888160-57889279	-0.0842330294127105	0.232845263030106	-0.3617553920425660.717534829528597	0.688
155 | A1BG	chr19-57915851-57917093	-0.00971211792633636	0.225020479431863	-0.0431610400566990.965573161660521	1
156 | A1BG	chr19-57934422-57935603	0.0136752444069743	0.249810124611214	0.05474255468331160.956343566437322	0.968
157 | ```
158 | 
159 | Each column indicates ...
160 | 
161 | | Column       | Descriptions                                                 |
162 | | ------------ | ------------------------------------------------------------ |
163 | | gene         | The gene(-peak) pair in each test statistics                 |
164 | | peak         | The (gene-)peak pair in each test statistics                 |
165 | | beta         | The regression coefficient from primary Poisson regression   |
166 | | se           | The standard error  from primary Poisson regression          |
167 | | z            | The Z score from primary Poisson regression                  |
168 | | p            | The raw p value from primary Poisson regression              |
169 | | boot_basic_p | The bootstrap p value calculated from bootstrapping analyses |
170 | 
171 | 
172 | 
173 | ### 2.) Using SCENT with parallelized jobs.
174 | 
175 | `SCENT_parallelization.R` is the example code necessary for running parallelized SCENT jobs.
176 | This code needs a `SCENT_Object.rds` file that contains a list of gene-peak pairs. 
177 | To generate this object please follow the SCENT_parallelize.Rmd vignette file.
178 | 
179 | The corresponding bash script `parallelizedSCENT.sh` contains a parallelization scheme that is 
180 | dependent on the amount of gene-peak pair batches that is user defined (for context please refer to the
181 | SCENT_parallelize.Rmd vignette). The main part of the bash script contains the line:
182 | 
183 | ```bash
184 | Rscript SCENT_parallelization.R $LSB_JOBINDEX ${num_cores} ${file_SCENT_obj} ${celltype} ${regr} ${bin} ${output_dir}
185 | ```
186 | 
187 | Arguments in the bash file are user specified as follows:
188 | 
189 | |#      | Argument Name | Descriptions |
190 | | ----  | ------------- | ------------ |
191 | |1    | LSB_JOBINDEX   | jobarray index specified by BSUB -J SCENT[1-100] |
192 | |2    | num_cores      | number of cores (ex. 6) to parallelize to the SCENT algorithm |
193 | |3    | file_SCENT_obj | SCENT object that contains atac_matrix, rna_matrix, metafile, peak_gene_list, etc. To run the SCENT algorithm |
194 | |4    | celltype       | User specified celltype (ex. "Tcells") to run the SCENT algorithm |
195 | |5    | regr           | User specified regression type (ex. "poisson") to run SCENT algorithm |
196 | |6    | bin            | User specified choice to binarize ATAC counts (ex. TRUE) |
197 | |7    | output_dir     | User specified directory to output the SCENT results to aggregate once completed |
198 | 
199 | ### Enhancer-gene links from the paper
200 | 
201 | SCENT enhancer-gene linkages (FDR<10%) from the 8 datasets that we described in the paper can be downloaded from the following dropbox link.
202 | 
203 | https://www.dropbox.com/scl/fo/g20tfnwkcuhib4a6z1wp4/ABYaK5s8bwTLnzrJ0KoZn48?rlkey=j1s5365gso53r2v2dsdynnsr2&st=5np1fq0a&dl=0
204 | 
205 | ### Contact
206 | 
207 | Saori Sakaue ssakaue@broadinstitute.org
208 | 


--------------------------------------------------------------------------------
/SCENT.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 | 


--------------------------------------------------------------------------------
/fig/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/immunogenomics/SCENT/e80b5ba6b445f972c7fe28fb41e24ef4f5b2e373/fig/.DS_Store


--------------------------------------------------------------------------------
/fig/cover_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/immunogenomics/SCENT/e80b5ba6b445f972c7fe28fb41e24ef4f5b2e373/fig/cover_image.png


--------------------------------------------------------------------------------
/fig/cover_image2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/immunogenomics/SCENT/e80b5ba6b445f972c7fe28fb41e24ef4f5b2e373/fig/cover_image2.png


--------------------------------------------------------------------------------
/man/CreatePeakToGeneList.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SCENTfunctions.R
 3 | \name{CreatePeakToGeneList}
 4 | \alias{CreatePeakToGeneList}
 5 | \title{Creating Cis Gene-Peak Pair Lists to Parallelize Through}
 6 | \usage{
 7 | CreatePeakToGeneList(
 8 |   object,
 9 |   genebed = "/path/to/GeneBody_500kb_margin.bed",
10 |   nbatch,
11 |   tmpfile = "./temporary_atac_peak.bed",
12 |   intersectedfile = "./temporary_atac_peak_intersected.bed.gz"
13 | )
14 | }
15 | \arguments{
16 | \item{object}{SCENT object}
17 | 
18 | \item{genebed}{character. File directory for bed file that contains 500 kb windows for each gene}
19 | 
20 | \item{nbatch}{numeric. Number of batches to produce: Length of the list}
21 | 
22 | \item{tmpfile}{character. Location of temporary file.}
23 | 
24 | \item{intersectedfile}{character. Location of intersected file.}
25 | }
26 | \value{
27 | SCENT object with updated field of peak.info.list
28 | }
29 | \description{
30 | Creating Cis Gene-Peak Pair Lists to Parallelize Through
31 | }
32 | 


--------------------------------------------------------------------------------
/man/SCENT-class.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SCENTfunctions.R
 3 | \docType{class}
 4 | \name{SCENT-class}
 5 | \alias{SCENT-class}
 6 | \alias{CreateSCENTObj}
 7 | \title{SCENT Class Constructor}
 8 | \value{
 9 | SCENT object to use for further analysis
10 | }
11 | \description{
12 | SCENT Class Constructor
13 | }
14 | \section{Slots}{
15 | 
16 | \describe{
17 | \item{\code{rna}}{dgCMatrix. scRNAseq matrix read as a sparse matrix}
18 | 
19 | \item{\code{atac}}{dgCMatrix. scATACseq matrix read as a sparse matrix}
20 | 
21 | \item{\code{meta.data}}{data.frame. Metadata table with covariates and a cell ID column ("cell")}
22 | 
23 | \item{\code{peak.info}}{data.frame. Dataframe that contains gene-peak pairs for SCENT to search through}
24 | 
25 | \item{\code{peak.info.list}}{list. List of dataframes that contain gene-peak pairs to parallelize through}
26 | 
27 | \item{\code{covariates}}{character. Assign covariates that are needed for the analysis. Must be names that are in the columns of meta.data}
28 | 
29 | \item{\code{celltypes}}{character. Assign celltype column from meta.data}
30 | 
31 | \item{\code{SCENT.result}}{data.frame. Initialized as empty. Becomes a table of resultant significant gene peak pairs}
32 | }}
33 | 
34 | 


--------------------------------------------------------------------------------
/man/SCENT_algorithm.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SCENTfunctions.R
 3 | \name{SCENT_algorithm}
 4 | \alias{SCENT_algorithm}
 5 | \title{SCENT Algorithm: Poisson Regression with Empirical P-values through Bootstrapping.}
 6 | \usage{
 7 | SCENT_algorithm(object, celltype, ncores, regr = "poisson", bin = TRUE)
 8 | }
 9 | \arguments{
10 | \item{object}{SCENT object}
11 | 
12 | \item{celltype}{character. User specified cell type defined in celltypes column of meta.data}
13 | 
14 | \item{ncores}{numeric. Number of cores to use for Parallelization}
15 | 
16 | \item{regr}{character. Regression type: "poisson" or "negbin" for Poisson regression and Negative Binomial regression, respectively}
17 | 
18 | \item{bin}{logical. TRUE to binarize ATAC counts. FALSE to NOT binarize ATAC counts}
19 | }
20 | \value{
21 | SCENT object with updated field SCENT.results
22 | }
23 | \description{
24 | SCENT Algorithm: Poisson Regression with Empirical P-values through Bootstrapping.
25 | }
26 | 


--------------------------------------------------------------------------------
/man/assoc_negbin.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SCENTfunctions.R
 3 | \name{assoc_negbin}
 4 | \alias{assoc_negbin}
 5 | \title{Perform negative binomial regression: exprs ~ peak + covariates}
 6 | \usage{
 7 | assoc_negbin(data, idx = seq_len(nrow(data)), formula)
 8 | }
 9 | \arguments{
10 | \item{data}{contains expr values and associated peak and covariates for a gene.}
11 | 
12 | \item{idx}{rows of the data to use: argument for boot function (bootstrapping)}
13 | 
14 | \item{formula}{user defined formula based on initialization in CreateSCENTObj Constructor}
15 | }
16 | \value{
17 | vector: (coefficient of the peak effect on gene, variance of peak effect on gene)
18 | }
19 | \description{
20 | Perform negative binomial regression: exprs ~ peak + covariates
21 | }
22 | 


--------------------------------------------------------------------------------
/man/assoc_poisson.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SCENTfunctions.R
 3 | \name{assoc_poisson}
 4 | \alias{assoc_poisson}
 5 | \title{Perform poisson regression: exprs ~ peak + covariates}
 6 | \usage{
 7 | assoc_poisson(data, idx = seq_len(nrow(data)), formula)
 8 | }
 9 | \arguments{
10 | \item{data}{contains expr values and associated peak and covariates for a gene.}
11 | 
12 | \item{idx}{rows of the data to use: argument for boot function (bootstrapping)}
13 | 
14 | \item{formula}{user defined formula based on initialization in CreateSCENTObj Constructor}
15 | }
16 | \value{
17 | vector: (coefficient of the peak effect on gene, variance of peak effect on gene)
18 | }
19 | \description{
20 | Perform poisson regression: exprs ~ peak + covariates
21 | }
22 | 


--------------------------------------------------------------------------------
/man/basic_p.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SCENTfunctions.R
 3 | \name{basic_p}
 4 | \alias{basic_p}
 5 | \title{Derive a p-value from a vector of bootstrap samples using the "basic" calculation}
 6 | \usage{
 7 | basic_p(obs, boot, null = 0)
 8 | }
 9 | \arguments{
10 | \item{obs}{observed value of parameter (using actual data)}
11 | 
12 | \item{boot}{vector of bootstraps}
13 | }
14 | \value{
15 | p-value
16 | }
17 | \description{
18 | Derive a p-value from a vector of bootstrap samples using the "basic" calculation
19 | }
20 | 


--------------------------------------------------------------------------------
/man/check_dimensions.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SCENTfunctions.R
 3 | \name{check_dimensions}
 4 | \alias{check_dimensions}
 5 | \title{Validity and Type Checking for CreateSCENTObject Constructor}
 6 | \usage{
 7 | check_dimensions(object)
 8 | }
 9 | \arguments{
10 | \item{object}{SCENT object constructed from class CreateSCENTObject}
11 | }
12 | \value{
13 | None OR Errors dependent on if the object follows the guidelines for SCENT
14 | RNA: matrix of (genes x cells)
15 | ATAC: matrix of (peaks x cells)
16 | }
17 | \description{
18 | Validity and Type Checking for CreateSCENTObject Constructor
19 | }
20 | 


--------------------------------------------------------------------------------
/man/interp_pval.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SCENTfunctions.R
 3 | \name{interp_pval}
 4 | \alias{interp_pval}
 5 | \title{Interpolate a p-value from quantiles that should be "null scaled"}
 6 | \usage{
 7 | interp_pval(q)
 8 | }
 9 | \arguments{
10 | \item{q}{bootstrap quantiles, centered so that under the null, theta = 0}
11 | }
12 | \value{
13 | two-sided p-value
14 | }
15 | \description{
16 | Interpolate a p-value from quantiles that should be "null scaled"
17 | }
18 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | Output/
4 | RData/
5 | temporary_atac_peak_intersected.bed.gz
6 | 


--------------------------------------------------------------------------------
/vignettes/SCENT_interactive.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "SCENT_interactive"
  3 | output: rmarkdown::html_vignette
  4 | vignette: >
  5 |   %\VignetteIndexEntry{SCENT_interactive}
  6 |   %\VignetteEngine{knitr::rmarkdown}
  7 |   %\VignetteEncoding{UTF-8}
  8 | ---
  9 | 
 10 | ```{r, include = FALSE}
 11 | knitr::opts_chunk$set(
 12 |   collapse = TRUE,
 13 |   comment = "#>"
 14 | )
 15 | ```
 16 | 
 17 | ## Load Package
 18 | 
 19 | ```{r setup}
 20 | library(SCENT)
 21 | ```
 22 | 
 23 | ## Load Inputs
 24 | 
 25 | ```{r}
 26 | #Initialize directories: (Example)
 27 | input_atac <- "./RData/Data/pbmc_multimodal.atac.rds"
 28 | input_mrna <- "./RData/Data/pbmc_multimodal.rna.rds"
 29 | input_meta <- "./RData/Data/pbmc_multimodal.meta.rds"
 30 | input_gene_peak <- "./RData/Data/qced_Tnk.G2P.txt"
 31 | output <- "./Output/test_output.txt"
 32 | 
 33 | options(stringsAsFactors = F)
 34 | 
 35 | #Read-in Necessary Files:
 36 | atac <- readRDS(input_atac)
 37 | mrna <- readRDS(input_mrna)
 38 | meta <- readRDS(input_meta)
 39 | gene_peak <- read.table(input_gene_peak)
 40 | colnames(gene_peak) <- c("gene","peak")
 41 | 
 42 | ```
 43 | 
 44 | ## SCENT Object
 45 | 
 46 | ```{r SCENT}
 47 | ##Using the SCENT Object:
 48 | SCENT_obj <- CreateSCENTObj(rna = mrna, atac = atac, meta.data = meta,
 49 |                             peak.info = gene_peak,
 50 |                             covariates = c("log(nCount_RNA)","percent.mito"), 
 51 |                             celltypes = "newCT")
 52 | 
 53 | ##Example Outputs of the SCENT Object
 54 | head(SCENT_obj@rna[1:10,1:2])
 55 | head(SCENT_obj@atac[1:10,1:2])
 56 | head(SCENT_obj@meta.data)
 57 | head(SCENT_obj@peak.info)
 58 | str(SCENT_obj)
 59 | ```
 60 | 
 61 | ## SCENT Algorithm: Obtain small list of gene-peak pairs.
 62 | 
 63 | ```{r gene_peak}
 64 | #Of the set of peak gene pairs: pick a set of pairs to test: 
 65 | #Example: (first 10 gene-peak pairs)
 66 | SCENT_obj@peak.info <- SCENT_obj@peak.info[1:10,]
 67 | head(SCENT_obj@peak.info)
 68 | ```
 69 | ## SCENT Algorithm: Options for Regression w/ Bootstrapping.
 70 | 
 71 | ```{r gene_peak}
 72 | #Run SCENT algorithm of Tnk cell type and use 6 cores for parallelization:
 73 | 
 74 | 
 75 | #Default: Poisson regression and Binarized ATAC counts
 76 | SCENT_obj_ver1 <- SCENT_algorithm(SCENT_obj, "Tnk", 6) 
 77 | # By default settings the above will perform parallelizations using Poisson regression and Binarized counts.
 78 | 
 79 | #Option 1: Poisson regression and Non-Binarized ATAC counts
 80 | SCENT_obj_ver2 <- SCENT_algorithm(SCENT_obj, "Tnk", 6, regr = "poisson", bin = FALSE)
 81 | 
 82 | #Option 2: Negative Binomial regression and Binarized ATAC counts
 83 | SCENT_obj_ver3 <- SCENT_algorithm(SCENT_obj, "Tnk", 6, regr = "negbin", bin = TRUE)
 84 | 
 85 | #Option 3: Negative Binomial regression and Non-Binarized ATAC counts
 86 | SCENT_obj_ver4 <- SCENT_algorithm(SCENT_obj, "Tnk", 6, regr = "negbin", bin = FALSE)
 87 | 
 88 | ```
 89 | 
 90 | ## Output of SCENT Algorithm
 91 | 
 92 | ```{r SCENT_algo}
 93 | head(SCENT_obj_ver1@SCENT.result)
 94 | head(SCENT_obj_ver2@SCENT.result)
 95 | head(SCENT_obj_ver3@SCENT.result)
 96 | head(SCENT_obj_ver4@SCENT.result)
 97 | ```
 98 | 
 99 | ```
100 | 
101 | 


--------------------------------------------------------------------------------
/vignettes/SCENT_parallelize.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Parallelization of SCENT"
 3 | output: rmarkdown::html_vignette
 4 | vignette: >
 5 |   %\VignetteIndexEntry{Parallelization of SCENT}
 6 |   %\VignetteEngine{knitr::rmarkdown}
 7 |   %\VignetteEncoding{UTF-8}
 8 | ---
 9 | 
10 | ```{r, include = FALSE}
11 | knitr::opts_chunk$set(
12 |   collapse = TRUE,
13 |   comment = "#>"
14 | )
15 | ```
16 | 
17 | ## Load Package
18 | 
19 | ```{r setup}
20 | library(SCENT)
21 | ```
22 | 
23 | ## Data Inputs
24 | 
25 | ```{r inputs}
26 | #Initialize directories: (Example)
27 | input_atac <- "./RData/Data/pbmc_multimodal.atac.rds"
28 | input_mrna <- "./RData/Data/pbmc_multimodal.rna.rds"
29 | input_meta <- "./RData/Data/pbmc_multimodal.meta.rds"
30 | input_gene_peak <- "./RData/Data/qced_Tnk.G2P.txt"
31 | output_rds <- "./Output/SCENT_obj.rds"
32 | ####Obtaining Gene-Peak Pairs using defined bed file
33 | genebed_loc <- "./RData/Data/GeneBody_500kb_margin_chr.bed" 
34 | 
35 | 
36 | options(stringsAsFactors = F)
37 | 
38 | #Read-in Necessary Files:
39 | atac <- readRDS(input_atac)
40 | mrna <- readRDS(input_mrna)
41 | meta <- readRDS(input_meta)
42 | gene_peak <- read.table(input_gene_peak)
43 | colnames(gene_peak) <- c("gene","peak")
44 | ```
45 | 
46 | 
47 | 
48 | ## SCENT Object
49 | 
50 | ```{r SCENT}
51 | ####Using the SCENT Object:
52 | SCENT_obj <- CreateSCENTObj(rna = mrna, atac = atac, meta.data = meta,
53 |                             covariates = c("log(nCount_RNA)","percent.mito"), 
54 |                             celltypes = "newCT")
55 | str(SCENT_obj)
56 | ```
57 | 
58 | ## Make Gene-Peak Pair Lists
59 | 
60 | ```{r gene_peak_list}
61 | SCENT_obj <- CreatePeakToGeneList(SCENT_obj, genebed = genebed_loc,
62 |                                   nbatch = 1000,tmpfile="./temporary_atac_peak.bed",
63 |                                   intersectedfile="./temporary_atac_peak_intersected.bed.gz")
64 | str(SCENT_obj, max.level = 2)
65 | ```
66 | 
67 | ##Save the SCENT obj for parallelized jobs on the cluster to get SCENT results.
68 | 
69 | ```{r SCENT_obj_save}
70 | saveRDS(SCENT_obj, file = output_rds) #Takes a couple minutes.
71 | ```
72 | 
73 | ## Use the saved SCENT_obj to run a parallelized bash script: Located in "Parallelized Bash Script" Folder
74 | 
75 | ```
76 | 


--------------------------------------------------------------------------------