├── .gitignore ├── tests ├── testthat │ ├── test_numOverlaps.R │ ├── test_data_files │ │ ├── empty_file.txt │ │ ├── regs.bed │ │ ├── regs.txt │ │ ├── alt_sep1.txt │ │ ├── alt_sep2.txt │ │ ├── noheader.txt │ │ ├── 4columns.bed │ │ ├── 4columns.txt │ │ ├── bed_with_header.bed │ │ ├── two_valid_seps.txt │ │ ├── alt_sep2_with_other_seps.txt │ │ ├── only_comments.txt │ │ ├── only_comments_and_header.txt │ │ └── comments_and_header.txt │ ├── test_genomesAndMasks.R │ ├── test_permTest.R │ ├── test_evaluationFunctions.R │ ├── test_randomizationFunctions.R │ └── test_toGRanges.R └── testthat.R ├── vignettes └── figures │ ├── Example1_fig1.pdf │ ├── Example1_fig1.png │ ├── Example2_fig1.pdf │ ├── Example2_fig1.png │ ├── Example2_fig2.pdf │ ├── Example2_fig2.png │ ├── Example2_fig3.pdf │ ├── Example2_fig3.png │ ├── Example2_fig4.pdf │ ├── Example2_fig4.png │ ├── regular_genes_5000perm.pdf │ ├── regular_genes_5000perm.png │ ├── special_genes_5000perm.pdf │ ├── special_genes_5000perm.png │ ├── mask_test_regioneR_small.pdf │ ├── mask_test_regioneR_small.png │ ├── speed_test_regioneR_small.pdf │ └── speed_test_regioneR_small.png ├── inst ├── extdata │ ├── my.altered.regions.txt │ ├── my.regular.genes.txt │ └── my.special.genes.txt └── CITATION ├── man ├── summary.permTestResults.Rd ├── emptyCacheRegioneR.Rd ├── summary.permTestResultsList.Rd ├── listChrTypes.Rd ├── getChromosomesByOrganism.Rd ├── maskFromBSGenome.Rd ├── characterToBSGenome.Rd ├── print.permTestResults.Rd ├── recomputePermTest.Rd ├── meanDistance.Rd ├── toDataframe.Rd ├── subtractRegions.Rd ├── getMask.Rd ├── resampleRegions.Rd ├── plotRegions.Rd ├── plot.localZScoreResults.Rd ├── uniqueRegions.Rd ├── numOverlaps.Rd ├── overlapGraphicalSummary.Rd ├── commonRegions.Rd ├── splitRegions.Rd ├── meanInRegions.Rd ├── localZScore.Rd ├── plot.localZScoreResultsList.Rd ├── getGenomeAndMask.Rd ├── extendRegions.Rd ├── mergeRegions.Rd ├── resampleGenome.Rd ├── joinRegions.Rd ├── filterChromosomes.Rd ├── createFunctionsList.Rd ├── getGenome.Rd ├── plot.permTestResults.Rd ├── plot.permTestResultsList.Rd ├── circularRandomizeRegions.Rd ├── createRandomRegions.Rd ├── randomizeRegions.Rd ├── overlapPermTest.Rd ├── overlapRegions.Rd ├── permTest.Rd └── toGRanges.Rd ├── R ├── emptyCacheRegioneR.R ├── listChrTypes.R ├── summary.permTestResultsList.R ├── print.permTestResults.R ├── summary.permTestResults.R ├── recomputePermTest.R ├── meanDistance.R ├── utils.R ├── subtractRegions.R ├── characterToBSGenome.R ├── commonRegions.R ├── toDataframe.R ├── getMask.R ├── numOverlaps.R ├── uniqueRegions.R ├── plot.localZScoreResultsList.R ├── mergeRegions.R ├── joinRegions.R ├── resampleRegions.R ├── extendRegions.R ├── plot.permTestResultsList.R ├── plot.localZScoreResults.R ├── getGenomeAndMask.R ├── getChromosomesByOrganism.R ├── meanInRegions.R ├── resampleGenome.R ├── filterChromosomes.R ├── maskFromBSGenome.R ├── getGenome.R ├── createRandomRegions.R ├── createFunctionsList.R ├── localZScore.R ├── overlapGraphicalSummary.R ├── splitRegions.R ├── plotRegions.R ├── overlapPermTest.R ├── circularRandomizeRegions.R └── plot.permTestResults.R ├── DESCRIPTION ├── NEWS └── NAMESPACE /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | \.Rhistory 3 | -------------------------------------------------------------------------------- /tests/testthat/test_numOverlaps.R: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/testthat/test_data_files/empty_file.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/testthat/test_data_files/regs.bed: -------------------------------------------------------------------------------- 1 | 1 1 10000 2 | 1 20000 30000 3 | 2 1 2 4 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(regioneR) 3 | 4 | test_check("regioneR") 5 | -------------------------------------------------------------------------------- /tests/testthat/test_data_files/regs.txt: -------------------------------------------------------------------------------- 1 | C S E1 2 | 1 1 10000 3 | 1 20000 30000 4 | 2 1 2 5 | -------------------------------------------------------------------------------- /tests/testthat/test_data_files/alt_sep1.txt: -------------------------------------------------------------------------------- 1 | 1;1;10000;col 2 | 1;20000;30000;nan 3 | 2;1;2;col 4 | -------------------------------------------------------------------------------- /tests/testthat/test_data_files/alt_sep2.txt: -------------------------------------------------------------------------------- 1 | 1,1,10000,col 2 | 1,20000,30000,nan 3 | 2,1,2,col 4 | -------------------------------------------------------------------------------- /tests/testthat/test_data_files/noheader.txt: -------------------------------------------------------------------------------- 1 | 1 1 10000 col 2 | 1 20000 30000 nan 3 | 2 1 2 col 4 | -------------------------------------------------------------------------------- /tests/testthat/test_genomesAndMasks.R: -------------------------------------------------------------------------------- 1 | library(regioneR) 2 | context("Genomes and Masks") 3 | 4 | #TODO 5 | -------------------------------------------------------------------------------- /tests/testthat/test_data_files/4columns.bed: -------------------------------------------------------------------------------- 1 | C S E1 data 2 | 1 1 10000 col 3 | 1 20000 30000 nan 4 | 2 1 2 col 5 | -------------------------------------------------------------------------------- /tests/testthat/test_data_files/4columns.txt: -------------------------------------------------------------------------------- 1 | C S E1 data 2 | 1 1 10000 col 3 | 1 20000 30000 nan 4 | 2 1 2 col 5 | -------------------------------------------------------------------------------- /tests/testthat/test_data_files/bed_with_header.bed: -------------------------------------------------------------------------------- 1 | chr start end 2 | 1 1 10000 3 | 1 20000 30000 4 | 2 1 2 5 | -------------------------------------------------------------------------------- /tests/testthat/test_data_files/two_valid_seps.txt: -------------------------------------------------------------------------------- 1 | C S E1 2 | 1. 1. 10000. 3 | 1. 20000. 30000. 4 | 2. 1. 2. 5 | -------------------------------------------------------------------------------- /tests/testthat/test_data_files/alt_sep2_with_other_seps.txt: -------------------------------------------------------------------------------- 1 | 1,1,10000,col 2 | 1,20000,30000,n an 3 | 2,1,2,c;ol 4 | -------------------------------------------------------------------------------- /tests/testthat/test_data_files/only_comments.txt: -------------------------------------------------------------------------------- 1 | #These 2 | #are 3 | #multiple 4 | #comment 5 | #lines 6 | 7 | 8 | -------------------------------------------------------------------------------- /vignettes/figures/Example1_fig1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bernatgel/regioneR/HEAD/vignettes/figures/Example1_fig1.pdf -------------------------------------------------------------------------------- /vignettes/figures/Example1_fig1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bernatgel/regioneR/HEAD/vignettes/figures/Example1_fig1.png -------------------------------------------------------------------------------- /vignettes/figures/Example2_fig1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bernatgel/regioneR/HEAD/vignettes/figures/Example2_fig1.pdf -------------------------------------------------------------------------------- /vignettes/figures/Example2_fig1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bernatgel/regioneR/HEAD/vignettes/figures/Example2_fig1.png -------------------------------------------------------------------------------- /vignettes/figures/Example2_fig2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bernatgel/regioneR/HEAD/vignettes/figures/Example2_fig2.pdf -------------------------------------------------------------------------------- /vignettes/figures/Example2_fig2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bernatgel/regioneR/HEAD/vignettes/figures/Example2_fig2.png -------------------------------------------------------------------------------- /vignettes/figures/Example2_fig3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bernatgel/regioneR/HEAD/vignettes/figures/Example2_fig3.pdf -------------------------------------------------------------------------------- /vignettes/figures/Example2_fig3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bernatgel/regioneR/HEAD/vignettes/figures/Example2_fig3.png -------------------------------------------------------------------------------- /vignettes/figures/Example2_fig4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bernatgel/regioneR/HEAD/vignettes/figures/Example2_fig4.pdf -------------------------------------------------------------------------------- /vignettes/figures/Example2_fig4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bernatgel/regioneR/HEAD/vignettes/figures/Example2_fig4.png -------------------------------------------------------------------------------- /tests/testthat/test_data_files/only_comments_and_header.txt: -------------------------------------------------------------------------------- 1 | #These 2 | #are 3 | #multiple 4 | #comment 5 | #lines 6 | chr start end 7 | 8 | -------------------------------------------------------------------------------- /vignettes/figures/regular_genes_5000perm.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bernatgel/regioneR/HEAD/vignettes/figures/regular_genes_5000perm.pdf -------------------------------------------------------------------------------- /vignettes/figures/regular_genes_5000perm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bernatgel/regioneR/HEAD/vignettes/figures/regular_genes_5000perm.png -------------------------------------------------------------------------------- /vignettes/figures/special_genes_5000perm.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bernatgel/regioneR/HEAD/vignettes/figures/special_genes_5000perm.pdf -------------------------------------------------------------------------------- /vignettes/figures/special_genes_5000perm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bernatgel/regioneR/HEAD/vignettes/figures/special_genes_5000perm.png -------------------------------------------------------------------------------- /vignettes/figures/mask_test_regioneR_small.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bernatgel/regioneR/HEAD/vignettes/figures/mask_test_regioneR_small.pdf -------------------------------------------------------------------------------- /vignettes/figures/mask_test_regioneR_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bernatgel/regioneR/HEAD/vignettes/figures/mask_test_regioneR_small.png -------------------------------------------------------------------------------- /vignettes/figures/speed_test_regioneR_small.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bernatgel/regioneR/HEAD/vignettes/figures/speed_test_regioneR_small.pdf -------------------------------------------------------------------------------- /vignettes/figures/speed_test_regioneR_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bernatgel/regioneR/HEAD/vignettes/figures/speed_test_regioneR_small.png -------------------------------------------------------------------------------- /tests/testthat/test_data_files/comments_and_header.txt: -------------------------------------------------------------------------------- 1 | #These 2 | #are 3 | #multiple 4 | #comment 5 | #lines 6 | chr start end 7 | 1 1 10000 8 | 1 20000 30000 9 | 2 1 2 10 | -------------------------------------------------------------------------------- /inst/extdata/my.altered.regions.txt: -------------------------------------------------------------------------------- 1 | chr start end 2 | chr1 10000000 60000000 3 | chr1 110000000 230000000 4 | chr2 5000000 70000000 5 | chr3 1 198022430 6 | chr7 1 80000000 7 | chr9 20000000 80000000 8 | chr17 3000000 70000000 9 | chr22 1 51304566 10 | -------------------------------------------------------------------------------- /man/summary.permTestResults.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/summary.permTestResults.R 3 | \name{summary.permTestResults} 4 | \alias{summary.permTestResults} 5 | \title{Summary of permTestResults objects} 6 | \usage{ 7 | \method{summary}{permTestResults}(object, ...) 8 | } 9 | \value{ 10 | the summary is printed 11 | } 12 | \description{ 13 | Summary of permTestResults objects 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/emptyCacheRegioneR.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/emptyCacheRegioneR.R 3 | \name{emptyCacheRegioneR} 4 | \alias{emptyCacheRegioneR} 5 | \title{Empty Cache regioneR} 6 | \usage{ 7 | emptyCacheRegioneR() 8 | } 9 | \value{ 10 | The cache is emptied 11 | } 12 | \description{ 13 | Empties the caches used by the memoised functions in the regioneR package. 14 | } 15 | \examples{ 16 | emptyCacheRegioneR() 17 | 18 | } 19 | -------------------------------------------------------------------------------- /man/summary.permTestResultsList.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/summary.permTestResultsList.R 3 | \name{summary.permTestResultsList} 4 | \alias{summary.permTestResultsList} 5 | \title{Summary of permTestResultsList objects} 6 | \usage{ 7 | \method{summary}{permTestResultsList}(object, ...) 8 | } 9 | \value{ 10 | the summary is printed 11 | } 12 | \description{ 13 | Summary of permTestResultsList objects 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /R/emptyCacheRegioneR.R: -------------------------------------------------------------------------------- 1 | #' Empty Cache regioneR 2 | #' 3 | #' @description 4 | #' Empties the caches used by the memoised functions in the regioneR package. 5 | #' 6 | #' @usage 7 | #' emptyCacheRegioneR() 8 | #' 9 | #' @return The cache is emptied 10 | #' 11 | #' @examples 12 | #' emptyCacheRegioneR() 13 | #' 14 | #' @export emptyCacheRegioneR 15 | #' 16 | #' @importFrom memoise forget 17 | 18 | 19 | emptyCacheRegioneR <- function() { 20 | memoise::forget(getGenome) 21 | memoise::forget(getMask) 22 | memoise::forget(getGenomeAndMask) 23 | memoise::forget(maskFromBSGenome) 24 | memoise::forget(characterToBSGenome) 25 | 26 | } 27 | -------------------------------------------------------------------------------- /tests/testthat/test_permTest.R: -------------------------------------------------------------------------------- 1 | library(regioneR) 2 | context("Built-in Evaluation Functions") 3 | 4 | #Define some GRanges to use in the tests 5 | emptyGR <- toGRanges(data.frame(chr=character(), start=numeric(), end=numeric())) 6 | smallA <- toGRanges(data.frame(chr=rep(c("chr1", "chr2"), 10), start=100*(1:20), end=120*(1:20))) 7 | smallB <- toGRanges(data.frame(chr=rep(c("chr2", "chr1"), 10), start=100*(1:20), end=105*(1:20))) 8 | 9 | bigA <- toGRanges(system.file("extdata", "my.special.genes.txt", package="regioneR")) 10 | universeA <- toGRanges(system.file("extdata", "all.genes.txt", package="regioneR")) 11 | bigB <- toGRanges(system.file("extdata", "my.altered.regions.txt", package="regioneR")) 12 | 13 | 14 | #TODO 15 | -------------------------------------------------------------------------------- /man/listChrTypes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/listChrTypes.R 3 | \name{listChrTypes} 4 | \alias{listChrTypes} 5 | \title{filterChromosomes 6 | listChrTypes} 7 | \usage{ 8 | listChrTypes() 9 | } 10 | \value{ 11 | the list of available chrs and organisms is printed 12 | } 13 | \description{ 14 | Prints a list of the available organisms and chromosomes sets in the predefined chromosomes sets information. 15 | } 16 | \examples{ 17 | 18 | g <- getGenomeAndMask("hg19")$genome 19 | 20 | listChrTypes() 21 | 22 | g <- filterChromosomes(g, chr.type="autosomal", organism="hg19") 23 | 24 | } 25 | \seealso{ 26 | \code{\link{filterChromosomes}}, \code{\link{getChromosomesByOrganism}} 27 | } 28 | -------------------------------------------------------------------------------- /man/getChromosomesByOrganism.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getChromosomesByOrganism.R 3 | \name{getChromosomesByOrganism} 4 | \alias{getChromosomesByOrganism} 5 | \title{getChromosomesByOrganism} 6 | \usage{ 7 | getChromosomesByOrganism() 8 | } 9 | \value{ 10 | a list with the organism as keys and the list of available chromosome sets as values 11 | } 12 | \description{ 13 | Function to obtain a list of organisms with their canonical and (when applicable) the autosomal chromosome names. 14 | This function is not usually used by the end user directly but through the filterChromosomes function. 15 | } 16 | \examples{ 17 | 18 | chrsByOrg <- getChromosomesByOrganism() 19 | chrsByOrg[["hg"]] 20 | chrsByOrg[["hg"]][["autosomal"]] 21 | 22 | } 23 | \seealso{ 24 | \code{\link{getGenome}}, \code{\link{filterChromosomes}} 25 | } 26 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | bibentry(bibtype = "Article", 2 | key = "regioneR", 3 | title = "regioneR: an R/Bioconductor package for the association analysis of genomic regions based on permutation tests", 4 | author = c(person(given = "Bernat", family = "Gel"), 5 | person(given = "Anna", family = "Diez-Villanueva"), 6 | person(given = "Eduard", family = "Serra"), 7 | person(given = c("Marcus"), family = "Buschbeck"), 8 | person(given = c("Miguel", "A."), family = "Peinado"), 9 | person(given = c("Roberto"), family = "Malinverni")), 10 | journal = "Bioinformatics", 11 | year = "2016", 12 | volume = "32", 13 | number = "2", 14 | pages = "289-291", 15 | doi = "10.1093/bioinformatics/btv562", 16 | pubmed = "26424858") 17 | -------------------------------------------------------------------------------- /man/maskFromBSGenome.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/maskFromBSGenome.R 3 | \name{maskFromBSGenome} 4 | \alias{maskFromBSGenome} 5 | \title{maskFromBSGenome} 6 | \usage{ 7 | maskFromBSGenome(bsgenome) 8 | } 9 | \arguments{ 10 | \item{bsgenome}{a \code{\link{BSgenome}} object} 11 | } 12 | \value{ 13 | A \code{\link{GRanges}} object with the active mask in the \code{\link{BSgenome}} 14 | } 15 | \description{ 16 | Extracts the merge of all the active masks from a \code{\link{BSgenome}} 17 | } 18 | \note{ 19 | This function is memoised (cached) using the \code{\link{memoise}} package. To empty the cache, use \code{\link{forget}(maskFromBSGenome)} 20 | } 21 | \examples{ 22 | g <- characterToBSGenome("hg19") 23 | 24 | maskFromBSGenome(g) 25 | 26 | } 27 | \seealso{ 28 | \code{\link{getGenomeAndMask}}, \code{\link{characterToBSGenome}}, \code{\link{emptyCacheRegioneR}} 29 | } 30 | -------------------------------------------------------------------------------- /man/characterToBSGenome.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/characterToBSGenome.R 3 | \name{characterToBSGenome} 4 | \alias{characterToBSGenome} 5 | \title{characterToBSGenome} 6 | \usage{ 7 | characterToBSGenome(genome.name) 8 | } 9 | \arguments{ 10 | \item{genome.name}{a character string uniquely identifying a \code{\link{BSgenome}} (e.g. "hg19", "mm10" are ok, but "hg" is not)} 11 | } 12 | \value{ 13 | A \code{\link{BSgenome}} object 14 | } 15 | \description{ 16 | Given a character string with the "name" of a genome, it returns a \code{\link{BSgenome}} object if available. 17 | } 18 | \note{ 19 | This function is memoised (cached) using the \code{memoise} package. To empty the cache, use \code{forget(charecterToBSGenome)} 20 | } 21 | \examples{ 22 | g <- characterToBSGenome("hg19") 23 | } 24 | \seealso{ 25 | \code{\link{getGenomeAndMask}}, \code{\link{maskFromBSGenome}} 26 | } 27 | -------------------------------------------------------------------------------- /man/print.permTestResults.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/print.permTestResults.R 3 | \name{print.permTestResults} 4 | \alias{print.permTestResults} 5 | \title{Print permTestResults objects} 6 | \usage{ 7 | \method{print}{permTestResults}(x, ...) 8 | } 9 | \value{ 10 | the object is printed 11 | } 12 | \description{ 13 | Print permTestResults objects 14 | } 15 | \examples{ 16 | genome <- filterChromosomes(getGenome("hg19"), keep.chr="chr1") 17 | A <- createRandomRegions(nregions=20, length.mean=10000000, length.sd=20000, genome=genome, non.overlapping=FALSE) 18 | B <- c(A, createRandomRegions(nregions=10, length.mean=10000, length.sd=20000, genome=genome, non.overlapping=FALSE)) 19 | 20 | pt <- permTest(A=A, B=B, ntimes=10, alternative="auto", verbose=TRUE, genome=genome, evaluate.function=meanDistance, randomize.function=randomizeRegions, non.overlapping=FALSE) 21 | print(pt) 22 | 23 | } 24 | \keyword{internal} 25 | -------------------------------------------------------------------------------- /man/recomputePermTest.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/recomputePermTest.R 3 | \name{recomputePermTest} 4 | \alias{recomputePermTest} 5 | \title{Recompute Permutation Test} 6 | \usage{ 7 | recomputePermTest(ptr) 8 | } 9 | \arguments{ 10 | \item{ptr}{an object of class \code{permTestResults}} 11 | } 12 | \value{ 13 | A list of class \code{permTestResults} containing the same components as \code{\link{permTest}} results. 14 | } 15 | \description{ 16 | Recomputes the permutation test changing the alternative hypotesis 17 | } 18 | \examples{ 19 | A <- createRandomRegions(nregions=10, length.mean=1000000) 20 | 21 | B <- createRandomRegions(nregions=10, length.mean=1000000) 22 | 23 | resPerm <- permTest(A=A, B=B, ntimes=5, alternative="less", genome="hg19", evaluate.function=meanDistance, randomize.function=randomizeRegions) 24 | 25 | plot(resPerm) 26 | 27 | 28 | 29 | 30 | } 31 | \seealso{ 32 | \code{\link{permTest}} 33 | } 34 | -------------------------------------------------------------------------------- /R/listChrTypes.R: -------------------------------------------------------------------------------- 1 | #' filterChromosomes 2 | #' listChrTypes 3 | #' 4 | #' @description 5 | #' Prints a list of the available organisms and chromosomes sets in the predefined chromosomes sets information. 6 | #' 7 | #' 8 | #' @usage listChrTypes() 9 | #' 10 | #' @return the list of available chrs and organisms is printed 11 | #' 12 | #' @seealso \code{\link{filterChromosomes}}, \code{\link{getChromosomesByOrganism}} 13 | #' 14 | #' @examples 15 | #' 16 | #' g <- getGenomeAndMask("hg19")$genome 17 | #' 18 | #' listChrTypes() 19 | #' 20 | #' g <- filterChromosomes(g, chr.type="autosomal", organism="hg19") 21 | #' 22 | #' @export listChrTypes 23 | 24 | 25 | listChrTypes <- function() { 26 | 27 | chrs <- getChromosomesByOrganism() 28 | for(org in names(chrs)) { 29 | chr.ty <- names(chrs[[org]]) 30 | chr.ty <- chr.ty[chr.ty != "org.name"] 31 | cat(paste0(chrs[[org]][["org.name"]], " (", org, "): ", paste(chr.ty, collapse=", ")), "\n") 32 | } 33 | } 34 | 35 | -------------------------------------------------------------------------------- /man/meanDistance.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/meanDistance.R 3 | \name{meanDistance} 4 | \alias{meanDistance} 5 | \title{Mean Distance} 6 | \usage{ 7 | meanDistance(A, B, ...) 8 | } 9 | \arguments{ 10 | \item{A}{a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)} 11 | 12 | \item{B}{a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)} 13 | 14 | \item{...}{any additional parameter needed} 15 | } 16 | \value{ 17 | The mean of the distances of each region in A to the nearest region in B. 18 | } 19 | \description{ 20 | Computes the mean distance of regions in A to the nearest element in B 21 | } 22 | \note{ 23 | If a region in A is in a chromosome where no B region is, it will be ignored and removed from the mean computation. 24 | } 25 | \examples{ 26 | A <- data.frame("chr1", c(1, 10, 20, 30), c(12, 13, 28, 40)) 27 | 28 | B <- data.frame("chr1", 25, 35) 29 | 30 | meanDistance(A, B) 31 | 32 | } 33 | -------------------------------------------------------------------------------- /R/summary.permTestResultsList.R: -------------------------------------------------------------------------------- 1 | 2 | #' Summary of permTestResultsList objects 3 | #' 4 | #' @method summary permTestResultsList 5 | #' @return the summary is printed 6 | #' @keywords internal 7 | #' @export 8 | # summary.permTestResultsList 9 | 10 | 11 | 12 | summary.permTestResultsList <- function(object, ...) { 13 | 14 | 15 | if(class(object)!="permTestResultsList") stop("object must be a permTestResultsList object") 16 | 17 | if(length(object) > 0) { 18 | 19 | res <- do.call(rbind, lapply(object, function(x) { 20 | return(data.frame(pvalue=x$pval, zscore=x$zscore, test=x$alternative)) 21 | })) 22 | 23 | 24 | cat(paste0("Permutation tests: ", length(object), "\n")) 25 | cat(paste0("Significant permutation tests: ", length(which(res$pvalue<=0.05)), "\n")) 26 | cat(paste0("Iterations: ", object[[1]]$ntimes, "\n")) 27 | cat(paste0("Randomization Function: ", object[[1]]$randomize.function.name, "\n")) 28 | cat("Tests Results:\n") 29 | print(res) 30 | } else { 31 | cat(paste0("permTestResultList object of length 0\n")) 32 | } 33 | 34 | 35 | } 36 | 37 | -------------------------------------------------------------------------------- /man/toDataframe.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/toDataframe.R 3 | \name{toDataframe} 4 | \alias{toDataframe} 5 | \title{toDataframe} 6 | \usage{ 7 | toDataframe(A, stranded=FALSE) 8 | } 9 | \arguments{ 10 | \item{A}{a \code{\link{GRanges}} object.} 11 | 12 | \item{stranded}{(only used when A is a \code{\link{GRanges}} object) a logical indicating whether a column with the strand information have to be added to the result (Defaults to FALSE)} 13 | } 14 | \value{ 15 | A \code{data.frame} with the regions in A. If A was a \code{\link{GRanges}} object, the output will include any metadata present in A. 16 | } 17 | \description{ 18 | Transforms a \code{\link{GRanges}} object or a \code{\link{data.frame}}containing a region set into a \code{\link{data.frame}}. 19 | } 20 | \details{ 21 | If the oject is of class \code{\link{data.frame}}, it will be returned untouched. 22 | } 23 | \examples{ 24 | A <- data.frame(chr=1, start=c(1, 15, 24), end=c(10, 20, 30), x=c(1,2,3), y=c("a", "b", "c")) 25 | 26 | A2 <- toGRanges(A) 27 | 28 | toDataframe(A2) 29 | 30 | } 31 | \seealso{ 32 | \code{\link{toGRanges}} 33 | } 34 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: regioneR 2 | Type: Package 3 | Title: Association analysis of genomic regions based on permutation tests 4 | Version: 1.41.2 5 | Date: 2018-08-14 6 | Author: Anna Diez-Villanueva , Roberto Malinverni 7 | and Bernat Gel 8 | Maintainer: Bernat Gel 9 | Description: regioneR offers a statistical framework based on 10 | customizable permutation tests to assess the association 11 | between genomic region sets and other genomic features. 12 | License: Artistic-2.0 13 | Depends: 14 | GenomicRanges 15 | Imports: 16 | memoise, 17 | GenomicRanges, 18 | IRanges, 19 | BSgenome, 20 | Biostrings, 21 | rtracklayer, 22 | parallel, 23 | graphics, 24 | stats, 25 | utils, 26 | methods, 27 | GenomeInfoDb, 28 | S4Vectors, 29 | tools 30 | Suggests: 31 | BiocStyle, 32 | knitr, 33 | rmarkdown, 34 | BSgenome.Hsapiens.UCSC.hg19.masked, 35 | testthat 36 | VignetteBuilder: knitr 37 | Encoding: UTF-8 38 | biocViews: Genetics, ChIPSeq, DNASeq, MethylSeq, CopyNumberVariation 39 | NeedsCompilation: no 40 | RoxygenNote: 7.2.1 41 | -------------------------------------------------------------------------------- /R/print.permTestResults.R: -------------------------------------------------------------------------------- 1 | #' Print permTestResults objects 2 | #' @return the object is printed 3 | #' 4 | #' @examples 5 | #' genome <- filterChromosomes(getGenome("hg19"), keep.chr="chr1") 6 | #' A <- createRandomRegions(nregions=20, length.mean=10000000, length.sd=20000, genome=genome, non.overlapping=FALSE) 7 | #' B <- c(A, createRandomRegions(nregions=10, length.mean=10000, length.sd=20000, genome=genome, non.overlapping=FALSE)) 8 | #' 9 | #' pt <- permTest(A=A, B=B, ntimes=10, alternative="auto", verbose=TRUE, genome=genome, evaluate.function=meanDistance, randomize.function=randomizeRegions, non.overlapping=FALSE) 10 | #' print(pt) 11 | #' 12 | #' @keywords internal 13 | #' @export 14 | 15 | 16 | print.permTestResults <- function(x, ...) { 17 | cat(paste0("P-value: ", x$pval, "\n")) 18 | cat(paste0("Z-score: ", x$zscore, "\n")) 19 | cat(paste0("Number of iterations: ", x$ntimes, "\n")) 20 | if(x$ntimes<20) cat("Note: less than 20 iterations might produce unreliable results\n") 21 | cat(paste0("Alternative: ", x$alternative, "\n")) 22 | cat(paste0("Evaluation of the original region set: ", x$observed, "\n")) 23 | cat(paste0("Evaluation function: ", x$evaluate.function.name, "\n")) 24 | cat(paste0("Randomization function: ", x$randomize.function.name, "\n")) 25 | } 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /man/subtractRegions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/subtractRegions.R 3 | \name{subtractRegions} 4 | \alias{subtractRegions} 5 | \title{Subtract Regions} 6 | \usage{ 7 | subtractRegions(A, B) 8 | } 9 | \arguments{ 10 | \item{A}{a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)} 11 | 12 | \item{B}{a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)} 13 | } 14 | \value{ 15 | A GenomicRanges object 16 | } 17 | \description{ 18 | Function for subtracting a region set from another region set. 19 | } 20 | \details{ 21 | This function returns the regions in A minus the parts of them overlapping the regions in B. Overlapping regions in the result will be fused. 22 | 23 | The implementation relies completely in the \code{setdiff} function from \code{IRanges} package. 24 | } 25 | \examples{ 26 | A <- data.frame(chr=1, start=c(1, 15, 24, 31), end=c(10, 20, 30, 35)) 27 | 28 | B <- data.frame(chr=1, start=c(2, 12, 24, 35), end=c(5, 25, 29, 40)) 29 | 30 | subtract <- subtractRegions(A, B) 31 | 32 | plotRegions(list(A, B, subtract), chromosome=1, regions.labels=c("A", "B", "subtract"), regions.colors=3:1) 33 | 34 | } 35 | -------------------------------------------------------------------------------- /man/getMask.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getMask.R 3 | \name{getMask} 4 | \alias{getMask} 5 | \title{getMask} 6 | \usage{ 7 | getMask(genome) 8 | } 9 | \arguments{ 10 | \item{genome}{the genome from where the mask will be extracted. It can be either a \code{\link{BSgenome}} object or a character string uniquely identifying a \code{\link{BSgenome}} object installed (e.g. "hg19", "mm10", ...)} 11 | } 12 | \value{ 13 | A \code{\link{GRanges}} object with the genomic regions to be masked out 14 | } 15 | \description{ 16 | Function to obtain a mask given a genome available as a \code{\link{BSgenome}}. The mask returned is the merge of all the active masks in the \code{\link{BSgenome}}. 17 | 18 | Since it uses \code{\link{characterToBSGenome}}, the genome can be either a \code{\link{BSgenome}} object or a character string uniquely identifying the a \code{\link{BSgenome}} object installed. 19 | } 20 | \note{ 21 | This function is memoised (cached) using the \code{\link{memoise}} package. To empty the cache, use \code{\link{forget}(getMask)} 22 | } 23 | \examples{ 24 | hg19.mask <- getMask("hg19") 25 | 26 | hg19.mask 27 | 28 | } 29 | \seealso{ 30 | \code{\link{getGenome}}, \code{\link{getGenomeAndMask}}, \code{\link{characterToBSGenome}}, \code{\link{maskFromBSGenome}}, \code{\link{emptyCacheRegioneR}} 31 | } 32 | -------------------------------------------------------------------------------- /man/resampleRegions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/resampleRegions.R 3 | \name{resampleRegions} 4 | \alias{resampleRegions} 5 | \title{Resample Regions} 6 | \usage{ 7 | resampleRegions(A, universe, per.chromosome=FALSE, ...) 8 | } 9 | \arguments{ 10 | \item{A}{a region set in any of the formats accepted by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)} 11 | 12 | \item{universe}{a region set in any of the formats accepted by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)} 13 | 14 | \item{per.chromosome}{boolean indicating if sample must be by chromosome.} 15 | 16 | \item{...}{further arguments to be passed to or from methods.} 17 | } 18 | \value{ 19 | a \code{\link{GenomicRanges}} object. A sample from the \code{univers} with the same length as A. 20 | } 21 | \description{ 22 | Function for sampling a region set from a universe of region sets. 23 | } 24 | \examples{ 25 | universe <- data.frame(chr=1, start=c(1,15,24,40,50), end=c(10,20,30,45,55)) 26 | 27 | A <- data.frame(chr=1, start=c(2,12,28,35), end=c(5,25,33,43)) 28 | 29 | resampleRegions(A, universe, per.chromosome=TRUE) 30 | 31 | } 32 | \seealso{ 33 | \code{\link{toDataframe}}, \code{\link{toGRanges}}, \code{\link{randomizeRegions}}, \code{\link{createRandomRegions}} 34 | } 35 | -------------------------------------------------------------------------------- /man/plotRegions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotRegions.R 3 | \name{plotRegions} 4 | \alias{plotRegions} 5 | \title{Plot Regions} 6 | \usage{ 7 | plotRegions(x, chromosome, start=NULL, end=NULL, regions.labels=NULL, regions.colors=NULL, ...) 8 | } 9 | \arguments{ 10 | \item{x}{list of objects to be ploted.} 11 | 12 | \item{chromosome}{character or numeric value indicating which chromosome you want to plot.} 13 | 14 | \item{start}{numeric value indicating from which position you want to plot.} 15 | 16 | \item{end}{numeric value indicating to which position you want to plot.} 17 | 18 | \item{regions.labels}{vector indicating the labels for the y axes. It must have the same length as x.} 19 | 20 | \item{regions.colors}{character vector indicating the colors for the plotted regions. It must have the same length as x.} 21 | 22 | \item{...}{Arguments to be passed to methods, such as graphical parameters (see \code{\link{par}}).} 23 | } 24 | \value{ 25 | A plot is created on the current graphics device. 26 | } 27 | \description{ 28 | Plots sets of regions 29 | } 30 | \examples{ 31 | A <- data.frame(chr=1, start=c(1,15,24,40,50), end=c(10,20,30,45,55)) 32 | 33 | B <- data.frame(chr=1, start=c(2,12,28,35), end=c(5,25,33,43)) 34 | 35 | plotRegions(list(A,B), chromosome=1, regions.labels=c("A","B"), regions.colors=3:2) 36 | 37 | 38 | } 39 | -------------------------------------------------------------------------------- /R/summary.permTestResults.R: -------------------------------------------------------------------------------- 1 | 2 | #' Summary of permTestResults objects 3 | #' 4 | #' @method summary permTestResults 5 | #' @return the summary is printed 6 | #' @keywords internal 7 | #' @export 8 | # summary.permTestResults 9 | 10 | 11 | 12 | summary.permTestResults <- function(object, ...) { 13 | 14 | 15 | if(class(object)!="permTestResults") stop("object must be a permTestResults object") 16 | 17 | cat(paste0("Number of permutations: ", object$ntimes, "\n")) 18 | if(object$ntimes<20) cat("Note: less than 20 permutations might produce unreliable results\n") 19 | cat("\n") 20 | cat(paste0("Alternative: ", object$alternative, "\n\n")) 21 | cat(paste0("Evaluation of the original region set: ", object$observed, "\n\n")) 22 | cat(paste0("Summary of the evaluation of the permuted region set: \n")) 23 | print(summary(object$permuted)) 24 | cat("\n\n") 25 | cat(paste0("Z-score: ", object$zscore, "\n\n")) 26 | if(object$pval < 0.001 & object$pval >= 0) code <- "***" 27 | if(object$pval < 0.01 & object$pval >= 0.001) code <- "**" 28 | if(object$pval < 0.05 & object$pval >= 0.01) code <- "*" 29 | if(object$pval < 0.1 & object$pval >= 0.05) code <- "." 30 | if(object$pval <= 1 & object$pval >= 0.1) code <- " " 31 | cat(paste0("P-value: ", object$pval, " ", code, "\n")) 32 | cat(paste0("--- \n Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1")) 33 | 34 | } 35 | -------------------------------------------------------------------------------- /man/plot.localZScoreResults.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot.localZScoreResults.R 3 | \name{plot.localZScoreResults} 4 | \alias{plot.localZScoreResults} 5 | \title{Plot localZscore results} 6 | \usage{ 7 | \method{plot}{localZScoreResults}(x, main = "", num.x.labels = 5, ...) 8 | } 9 | \arguments{ 10 | \item{x}{an object of class \code{localZScoreResults}.} 11 | 12 | \item{main}{a character specifying the main title of the plot. Defaults to no title.} 13 | 14 | \item{num.x.labels}{a numeric specifying the number of ticks to label the x axis. The total number will be 2*num.x.labels + 1. Defaults to 5.} 15 | 16 | \item{...}{further arguments to be passed to or from methods.} 17 | } 18 | \value{ 19 | A plot is created on the current graphics device. 20 | } 21 | \description{ 22 | Function for plotting the a \code{localZScoreResults} object. 23 | } 24 | \examples{ 25 | 26 | genome <- filterChromosomes(getGenome("hg19"), keep.chr="chr1") 27 | A <- createRandomRegions(nregions=20, length.mean=10000000, length.sd=20000, genome=genome, non.overlapping=FALSE) 28 | B <- c(A, createRandomRegions(nregions=10, length.mean=100000, length.sd=20000, genome=genome, non.overlapping=FALSE)) 29 | 30 | pt <- overlapPermTest(A=A, B=B, ntimes=10, genome=genome, non.overlapping=FALSE) 31 | 32 | lz <- localZScore(A=A, B=B, pt=pt) 33 | plot(lz) 34 | 35 | } 36 | \seealso{ 37 | \code{\link{localZScore}} 38 | } 39 | -------------------------------------------------------------------------------- /man/uniqueRegions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/uniqueRegions.R 3 | \name{uniqueRegions} 4 | \alias{uniqueRegions} 5 | \title{Unique Regions} 6 | \usage{ 7 | uniqueRegions(A, B) 8 | } 9 | \arguments{ 10 | \item{A}{a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)} 11 | 12 | \item{B}{a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)} 13 | } 14 | \value{ 15 | It returns a \code{\link{GenomicRanges}} object with the regions unique to one of the region sets. 16 | } 17 | \description{ 18 | Returns the regions unique to only one of the two region sets, that is, all parts of the genome covered by only one of the two region sets. 19 | } 20 | \note{ 21 | All metadata (additional columns in the region set in addition to chromosome, start and end) will be ignored and not present in the returned region set. 22 | } 23 | \examples{ 24 | A <- data.frame("chr1", c(1, 10, 20, 30), c(12, 13, 28, 40)) 25 | 26 | B <- data.frame("chr1", 25, 35) 27 | 28 | uniques <- uniqueRegions(A, B) 29 | 30 | plotRegions(list(A, B, uniques), chromosome="chr1", regions.labels=c("A", "B", "uniques"), regions.colors=3:1) 31 | 32 | } 33 | \seealso{ 34 | \code{\link{toGRanges}}, \code{\link{subtractRegions}}, \code{\link{commonRegions}}, \code{\link{mergeRegions}} 35 | } 36 | -------------------------------------------------------------------------------- /man/numOverlaps.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/numOverlaps.R 3 | \name{numOverlaps} 4 | \alias{numOverlaps} 5 | \title{Number Of Overlaps} 6 | \usage{ 7 | numOverlaps(A, B, count.once=FALSE, ...) 8 | } 9 | \arguments{ 10 | \item{A}{a region set in any of the formats accepted by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)} 11 | 12 | \item{B}{a region set in any of the formats accepted by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)} 13 | 14 | \item{count.once}{boolean indicating whether the overlap of multiple B regions with a single A region should be counted once or multiple times} 15 | 16 | \item{...}{any additional parameters needed} 17 | } 18 | \value{ 19 | It returns a numeric value that is the number of regions in A overlapping at least one region in B. 20 | } 21 | \description{ 22 | Returns the number of regions in A overlapping any region in B 23 | } 24 | \examples{ 25 | 26 | genome <- filterChromosomes(getGenome("hg19"), keep.chr="chr1") 27 | A <- createRandomRegions(nregions=20, length.mean=10000000, length.sd=20000, genome=genome, non.overlapping=FALSE) 28 | B <- c(A, createRandomRegions(nregions=10, length.mean=10000, length.sd=20000, genome=genome, non.overlapping=FALSE)) 29 | 30 | numOverlaps(A, B) 31 | numOverlaps(A, B, count.once=TRUE) 32 | 33 | } 34 | \seealso{ 35 | \code{\link{overlapPermTest}}, \code{\link{permTest}} 36 | } 37 | -------------------------------------------------------------------------------- /man/overlapGraphicalSummary.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/overlapGraphicalSummary.R 3 | \name{overlapGraphicalSummary} 4 | \alias{overlapGraphicalSummary} 5 | \title{Overlap Graphical Summary} 6 | \usage{ 7 | overlapGraphicalSummary(A, B, regions.labels=c("A","B"), regions.colors=c("black","forestgreen","darkred"), ...) 8 | } 9 | \arguments{ 10 | \item{A}{a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)} 11 | 12 | \item{B}{a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)} 13 | 14 | \item{regions.labels}{vector indicating the labels for the y axes.} 15 | 16 | \item{regions.colors}{character vector indicating the colors for the regions.} 17 | 18 | \item{...}{Arguments to be passed to methods, such as graphical parameters (see \code{\link{par}}). 19 | 20 | @return A plot is created on the current graphics device.} 21 | } 22 | \description{ 23 | Graphical summary of the overlap between two set of regions. 24 | } 25 | \examples{ 26 | A <- data.frame(chr=1, start=c(1,15,24,40,50), end=c(10,20,30,45,55)) 27 | 28 | B <- data.frame(chr=1, start=c(2,12,28,35), end=c(5,25,33,43)) 29 | 30 | overlapGraphicalSummary(A, B, regions.labels=c("A","B"), regions.colors=c(4,5,6)) 31 | 32 | } 33 | \seealso{ 34 | \code{\link{overlapPermTest}}, \code{\link{overlapRegions}} 35 | } 36 | -------------------------------------------------------------------------------- /R/recomputePermTest.R: -------------------------------------------------------------------------------- 1 | #' Recompute Permutation Test 2 | #' 3 | #' @description 4 | #' Recomputes the permutation test changing the alternative hypotesis 5 | #' 6 | #' @usage recomputePermTest(ptr) 7 | #' 8 | #' @param ptr an object of class \code{permTestResults} 9 | #' 10 | #' @return 11 | #' A list of class \code{permTestResults} containing the same components as \code{\link{permTest}} results. 12 | #' 13 | #' @seealso \code{\link{permTest}} 14 | #' 15 | #' @examples 16 | #' A <- createRandomRegions(nregions=10, length.mean=1000000) 17 | #' 18 | #' B <- createRandomRegions(nregions=10, length.mean=1000000) 19 | #' 20 | #' resPerm <- permTest(A=A, B=B, ntimes=5, alternative="less", genome="hg19", evaluate.function=meanDistance, randomize.function=randomizeRegions) 21 | #' 22 | #' plot(resPerm) 23 | #' 24 | # resPermRecomputed <- recomputePermTest(resPerm) 25 | #' 26 | # summary(resPermRecomputed) 27 | #' 28 | # plot(resPermRecomputed) 29 | #' 30 | #' @export recomputePermTest 31 | 32 | 33 | 34 | recomputePermTest<-function(ptr){ 35 | 36 | if(class(ptr)!="permTestResults") stop("x must be a permTestResults object") 37 | 38 | ptr2<-ptr 39 | if(ptr$alternative == "less"){ 40 | ptr2$pval <- (sum(ptr$observed <= ptr$permuted) + 1) / (ptr$ntimes + 1) 41 | ptr2$alternative<-"greater" 42 | } 43 | if(ptr$alternative == "greater"){ 44 | ptr2$pval <- (sum(ptr$observed >= ptr$permuted) + 1) / (ptr$ntimes + 1) 45 | ptr2$alternative<-"less" 46 | } 47 | return(ptr2) 48 | } 49 | -------------------------------------------------------------------------------- /man/commonRegions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/commonRegions.R 3 | \name{commonRegions} 4 | \alias{commonRegions} 5 | \title{Common Regions} 6 | \usage{ 7 | commonRegions(A, B) 8 | } 9 | \arguments{ 10 | \item{A}{a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)} 11 | 12 | \item{B}{a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)} 13 | } 14 | \value{ 15 | It returns a \code{\link{GenomicRanges}} object with the regions present in both region sets. 16 | } 17 | \description{ 18 | Returns the regions that are common in two region sets, its intersection. 19 | } 20 | \note{ 21 | All metadata (additional columns in the region set in addition to chromosome, start and end) will be ignored and not present in the returned region set. 22 | } 23 | \examples{ 24 | A <- data.frame("chr1", c(1, 10, 20, 30), c(12, 13, 28, 40)) 25 | 26 | B <- data.frame("chr1", 25, 35) 27 | 28 | commons <- commonRegions(A, B) 29 | 30 | plotRegions(list(A, B, commons), chromosome="chr1", regions.labels=c("A", "B", "common"), regions.colors=3:1) 31 | 32 | } 33 | \seealso{ 34 | \code{\link{plotRegions}}, \code{\link{toDataframe}}, \code{\link{toGRanges}}, \code{\link{subtractRegions}}, \code{\link{splitRegions}}, \code{\link{extendRegions}}, \code{\link{joinRegions}}, \code{\link{mergeRegions}}, \code{\link{overlapRegions}} 35 | } 36 | -------------------------------------------------------------------------------- /R/meanDistance.R: -------------------------------------------------------------------------------- 1 | #' Mean Distance 2 | #' 3 | #' @description 4 | #' Computes the mean distance of regions in A to the nearest element in B 5 | #' 6 | #' @usage 7 | #' meanDistance(A, B, ...) 8 | #' 9 | #' @note 10 | #' If a region in A is in a chromosome where no B region is, it will be ignored and removed from the mean computation. 11 | #' 12 | #' @param A a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...) 13 | #' @param B a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...) 14 | #' @param ... any additional parameter needed 15 | #' 16 | #' @return 17 | #' The mean of the distances of each region in A to the nearest region in B. 18 | #' 19 | #' @examples 20 | #' A <- data.frame("chr1", c(1, 10, 20, 30), c(12, 13, 28, 40)) 21 | #' 22 | #' B <- data.frame("chr1", 25, 35) 23 | #' 24 | #' meanDistance(A, B) 25 | #' 26 | #' @export meanDistance 27 | #' 28 | #' @importFrom GenomicRanges distanceToNearest 29 | 30 | 31 | meanDistance <- function(A, B, ...) { 32 | 33 | if(!hasArg(A)) stop("A is missing") 34 | if(!hasArg(B)) stop("B is missing") 35 | 36 | A <- toGRanges(A) 37 | B <- toGRanges(B) 38 | 39 | d <- GenomicRanges::distanceToNearest(A, B) 40 | 41 | return(mean(as.matrix(d@elementMetadata@listData$distance)[,1], na.rm=TRUE)) #--> BioC 2.13 42 | #return(mean(d@listData$distance, na.rm=TRUE)) #--> BioC 2.11 43 | 44 | } 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /man/splitRegions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/splitRegions.R 3 | \name{splitRegions} 4 | \alias{splitRegions} 5 | \title{Split Regions} 6 | \usage{ 7 | splitRegions(A, B, min.size=1, track.original=TRUE) 8 | } 9 | \arguments{ 10 | \item{A}{a region set in any of the formats accepted by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)} 11 | 12 | \item{B}{a region set in any of the formats accepted by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)} 13 | 14 | \item{min.size}{numeric value, minimal size of the new regions} 15 | 16 | \item{track.original}{logical indicating if you want to keep the original regions and additional information in the output} 17 | } 18 | \value{ 19 | A GRanges with the splitted regions. 20 | } 21 | \description{ 22 | Splits a region set A by both ends of the regions in a second region set B. 23 | } 24 | \examples{ 25 | A <- data.frame(chr=1, start=c(1, 15, 24, 40, 50), end=c(10, 20, 30, 45, 55)) 26 | 27 | B <- data.frame(chr=1, start=c(2, 12, 28, 35), end=c(5, 25, 33, 43)) 28 | 29 | splits <- splitRegions(A, B) 30 | 31 | plotRegions(list(A, B, splits), chromosome=1, regions.labels=c("A", "B", "splits"), regions.colors=3:1) 32 | 33 | } 34 | \seealso{ 35 | \code{\link{toDataframe}}, \code{\link{toGRanges}}, \code{\link{subtractRegions}}, \code{\link{commonRegions}}, \code{\link{extendRegions}}, \code{\link{joinRegions}}, \code{\link{mergeRegions}}, \code{\link{overlapRegions}} 36 | } 37 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | getSeparator <- function(l, seps=c("\t", ",", ";")) { 5 | #Check all seps and find the one where all rows are split into the same 6 | #number of elements and said number is bigger than 1 and it's the largest 7 | num.fields <- unlist(lapply(seps, function(sep) { 8 | split.length <- unlist(lapply(strsplit(x = l, split = sep), length)) 9 | if(split.length[1]>1 && all(split.length==split.length[1])) return(split.length[1]) 10 | else return(-1) 11 | })) 12 | max.sep <- IRanges::which.max(num.fields)[1] #if more than one sep is equally good, chose the first one 13 | if(num.fields[max.sep]>1) return(seps[max.sep]) 14 | return(NULL) 15 | } 16 | 17 | 18 | hasHeader <- function(l, sep) { 19 | #Check if the first line does not contain numerics (since this is expected 20 | #to be used by toGRanges, we expect there will be numeric columns) 21 | nums <- suppressWarnings(as.numeric(strsplit(x = l[1], split = sep)[[1]])) 22 | return(all(is.na(nums))) 23 | } 24 | 25 | #Supports only single line comments 26 | firstNonCommentLine <- function(file.name, comment.char="#") { 27 | pattern <- paste0("^", comment.char) 28 | step <- 100 29 | total <- step 30 | previous.num.lines <- 0 31 | ll <- readLines(file.name, n = total) 32 | while(all(grepl(pattern, ll)) && length(ll)>previous.num.lines) { 33 | previous.num.lines <- length(ll) 34 | total <- total + step 35 | ll <- readLines(file.name, n = total) 36 | } 37 | if(all(grepl(pattern, ll))) return(NULL) 38 | return(which(!grepl(pattern, ll))[1]) 39 | } 40 | 41 | -------------------------------------------------------------------------------- /R/subtractRegions.R: -------------------------------------------------------------------------------- 1 | #' Subtract Regions 2 | #' 3 | #' @description 4 | #' Function for subtracting a region set from another region set. 5 | #' 6 | #' @details 7 | #' This function returns the regions in A minus the parts of them overlapping the regions in B. Overlapping regions in the result will be fused. 8 | #' 9 | #' The implementation relies completely in the \code{setdiff} function from \code{IRanges} package. 10 | #' 11 | #' @usage subtractRegions(A, B) 12 | #' 13 | #' @param A a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...) 14 | #' @param B a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...) 15 | #' 16 | #' @return A GenomicRanges object 17 | #' 18 | #' @examples 19 | #' A <- data.frame(chr=1, start=c(1, 15, 24, 31), end=c(10, 20, 30, 35)) 20 | #' 21 | #' B <- data.frame(chr=1, start=c(2, 12, 24, 35), end=c(5, 25, 29, 40)) 22 | #' 23 | #' subtract <- subtractRegions(A, B) 24 | #' 25 | #' plotRegions(list(A, B, subtract), chromosome=1, regions.labels=c("A", "B", "subtract"), regions.colors=3:1) 26 | #' 27 | #' @export subtractRegions 28 | #' 29 | 30 | 31 | subtractRegions <- function(A, B) { 32 | 33 | if(!hasArg(A)) stop("A is missing") 34 | if(!hasArg(B)) stop("B is missing") 35 | 36 | A <- toGRanges(A) 37 | B <- toGRanges(B) 38 | 39 | if(length(A)==0 | length(B)==0) { return(A) } 40 | 41 | C <- GenomicRanges::setdiff(A, B) #Use the functionality available in GRanges 42 | 43 | return(C) 44 | 45 | } 46 | 47 | -------------------------------------------------------------------------------- /R/characterToBSGenome.R: -------------------------------------------------------------------------------- 1 | #' characterToBSGenome 2 | #' 3 | #' @description 4 | #' Given a character string with the "name" of a genome, it returns a \code{\link{BSgenome}} object if available. 5 | #' 6 | #' @note 7 | #' This function is memoised (cached) using the \code{memoise} package. To empty the cache, use \code{forget(charecterToBSGenome)} 8 | #' 9 | #' @usage characterToBSGenome(genome.name) 10 | #' 11 | #' @param genome.name a character string uniquely identifying a \code{\link{BSgenome}} (e.g. "hg19", "mm10" are ok, but "hg" is not) 12 | #' 13 | #' @return 14 | #' A \code{\link{BSgenome}} object 15 | #' 16 | #' @examples 17 | #' g <- characterToBSGenome("hg19") 18 | #' @seealso \code{\link{getGenomeAndMask}}, \code{\link{maskFromBSGenome}} 19 | #' 20 | #' @export characterToBSGenome 21 | 22 | 23 | characterToBSGenome <- memoise(function(genome.name) { 24 | 25 | if(!hasArg(genome.name)) stop("parameter genome.name is required") 26 | if(!is.character(genome.name)) stop("genome.name must be a character") 27 | 28 | bsg <- NULL 29 | #Try to get the masked BSgenome with the getBSgenome 30 | tryCatch( 31 | expr={ 32 | bsg <- getBSgenome(genome.name, masked=TRUE) 33 | }, 34 | error = function(err) { 35 | #do nothing 36 | }) 37 | 38 | 39 | if(is.null(bsg)) { #Try to get the unmasked BSgenome with the getBSgenome if the masked was not available 40 | bsg <- getBSgenome(genome.name, masked=FALSE) 41 | message(paste0("The masked version of '", genome.name, "' is not installed. Using the unmasked version. This means that no automatic masking will be available.")) 42 | } 43 | 44 | return(bsg) 45 | }) 46 | 47 | -------------------------------------------------------------------------------- /man/meanInRegions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/meanInRegions.R 3 | \name{meanInRegions} 4 | \alias{meanInRegions} 5 | \title{Mean In Regions} 6 | \usage{ 7 | meanInRegions(A, x, col.name=NULL, ...) 8 | } 9 | \arguments{ 10 | \item{A}{a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)} 11 | 12 | \item{x}{a region set in any of the accepted formats with an additional column with a value associated to every region. Regions in \code{x} can be points (single base regions).} 13 | 14 | \item{col.name}{character indicating the name of the column. If NULL and if a column with the name "value" exist, it will be used. The 4th column will be used otherwise (or the 5th if 4th is the strand).} 15 | 16 | \item{...}{any additional parameter needed} 17 | } 18 | \value{ 19 | It returns a numeric value that is the weighted mean of "value" defined in \code{x} over the regions in \code{A}. That is, the mean of the value of all 20 | regions in \code{x} overlapping each region in \code{A} weighted according to the number of bases overlapping. 21 | } 22 | \description{ 23 | Returns the mean of a value defined by a region set over another set of regions. 24 | } 25 | \examples{ 26 | 27 | A <- data.frame("chr1", c(1, 10, 20, 30), c(12, 13, 28, 40)) 28 | 29 | positions <- sample(1:40,30) 30 | 31 | x <- data.frame("chr1", positions, positions, rnorm(30,4,1)) 32 | 33 | meanInRegions(A, x) 34 | 35 | x <- GRanges(seqnames=x[,1],ranges=IRanges(x[,2],end=x[,2]),mcols=x[,3]) 36 | 37 | meanInRegions(A, x) 38 | 39 | } 40 | \seealso{ 41 | \code{\link{permTest}} 42 | } 43 | -------------------------------------------------------------------------------- /R/commonRegions.R: -------------------------------------------------------------------------------- 1 | #' Common Regions 2 | #' 3 | #' @description 4 | #' Returns the regions that are common in two region sets, its intersection. 5 | #' 6 | #' @note 7 | #' All metadata (additional columns in the region set in addition to chromosome, start and end) will be ignored and not present in the returned region set. 8 | #' 9 | #' 10 | #' @usage 11 | #' commonRegions(A, B) 12 | #' 13 | #' @param A a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...) 14 | #' @param B a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...) 15 | #' 16 | #' @return 17 | #' It returns a \code{\link{GenomicRanges}} object with the regions present in both region sets. 18 | #' 19 | #' @seealso \code{\link{plotRegions}}, \code{\link{toDataframe}}, \code{\link{toGRanges}}, \code{\link{subtractRegions}}, \code{\link{splitRegions}}, \code{\link{extendRegions}}, \code{\link{joinRegions}}, \code{\link{mergeRegions}}, \code{\link{overlapRegions}} 20 | #' 21 | #' @examples 22 | #' A <- data.frame("chr1", c(1, 10, 20, 30), c(12, 13, 28, 40)) 23 | #' 24 | #' B <- data.frame("chr1", 25, 35) 25 | #' 26 | #' commons <- commonRegions(A, B) 27 | #' 28 | #' plotRegions(list(A, B, commons), chromosome="chr1", regions.labels=c("A", "B", "common"), regions.colors=3:1) 29 | #' 30 | #' @export commonRegions 31 | #' 32 | #' 33 | #' 34 | 35 | 36 | commonRegions <- function(A, B) { 37 | 38 | if(!hasArg(A)) stop("A is missing") 39 | if(!hasArg(B)) stop("B is missing") 40 | 41 | A <- toGRanges(A) 42 | B <- toGRanges(B) 43 | 44 | intersect(A, B) 45 | 46 | } 47 | -------------------------------------------------------------------------------- /man/localZScore.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/localZScore.R 3 | \name{localZScore} 4 | \alias{localZScore} 5 | \title{Local z-score} 6 | \usage{ 7 | localZScore(A, pt, window, step, ...) 8 | } 9 | \arguments{ 10 | \item{A}{a region set in any of the formats accepted by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)} 11 | 12 | \item{pt}{a permTestResult object} 13 | 14 | \item{window}{a window in wich the local Z-score will be calculated (bp)} 15 | 16 | \item{step}{the number of bp that divide each Z-score evaluation} 17 | 18 | \item{...}{further arguments to be passed to other methods.} 19 | } 20 | \value{ 21 | It returns a local z-score object 22 | } 23 | \description{ 24 | Evaluates tthe variation of the z-score in the vicinty of the original region set 25 | } 26 | \examples{ 27 | 28 | genome <- filterChromosomes(getGenome("hg19"), keep.chr="chr1") 29 | A <- createRandomRegions(nregions=20, length.mean=10000, length.sd=20000, genome=genome, non.overlapping=FALSE) 30 | B <- c(A, createRandomRegions(nregions=10, length.mean=10000, length.sd=20000, genome=genome, non.overlapping=FALSE)) 31 | 32 | pt <- overlapPermTest(A=A, B=B, ntimes=10, genome=genome, non.overlapping=FALSE) 33 | plot(pt) 34 | 35 | lz <- localZScore(A=A, B=B, pt=pt) 36 | plot(lz) 37 | 38 | 39 | pt2 <- permTest(A=A, B=B, ntimes=10, randomize.function=randomizeRegions, evaluate.function=list(overlap=numOverlaps, distance=meanDistance), genome=genome, non.overlapping=FALSE) 40 | plot(pt2) 41 | 42 | lz2 <- localZScore(A=A, B=B, pt2) 43 | plot(lz2) 44 | 45 | 46 | } 47 | \seealso{ 48 | \code{\link{overlapPermTest}}, \code{\link{permTest}} 49 | } 50 | -------------------------------------------------------------------------------- /R/toDataframe.R: -------------------------------------------------------------------------------- 1 | #' toDataframe 2 | #' 3 | #' @description 4 | #' Transforms a \code{\link{GRanges}} object or a \code{\link{data.frame}}containing a region set into a \code{\link{data.frame}}. 5 | #' 6 | #' @details 7 | #' If the oject is of class \code{\link{data.frame}}, it will be returned untouched. 8 | #' 9 | #' @usage toDataframe(A, stranded=FALSE) 10 | #' 11 | #' @param A a \code{\link{GRanges}} object. 12 | #' @param stranded (only used when A is a \code{\link{GRanges}} object) a logical indicating whether a column with the strand information have to be added to the result (Defaults to FALSE) 13 | #' 14 | #' @return 15 | #' A \code{data.frame} with the regions in A. If A was a \code{\link{GRanges}} object, the output will include any metadata present in A. 16 | #' 17 | #' @seealso \code{\link{toGRanges}} 18 | #' 19 | #' @examples 20 | #' A <- data.frame(chr=1, start=c(1, 15, 24), end=c(10, 20, 30), x=c(1,2,3), y=c("a", "b", "c")) 21 | #' 22 | #' A2 <- toGRanges(A) 23 | #' 24 | #' toDataframe(A2) 25 | #' 26 | #' @export toDataframe 27 | 28 | 29 | #TODO: CHANGE STRANDED TO TRUE OR TO "AUTO", returning a strand column only if there's strand info? 30 | 31 | toDataframe <- function(A, stranded=FALSE) { 32 | 33 | if(!hasArg(A)) stop("A is missing") 34 | 35 | if(is(A, "data.frame")) { 36 | return(A) 37 | } 38 | 39 | if(is(A, "GRanges")) { 40 | A <- suppressWarnings(as.data.frame(A)[,-4]) #Return the data in the GRanges removing the strand and width 41 | names(A)[1] <- "chr" 42 | if(!stranded) { 43 | A <- A[,-4] 44 | } 45 | return(A) 46 | } 47 | 48 | warning("Unidentified class in toDataFrame. Returning without modifications") 49 | return(A) 50 | 51 | } 52 | -------------------------------------------------------------------------------- /man/plot.localZScoreResultsList.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot.localZScoreResultsList.R 3 | \name{plot.localZScoreResultsList} 4 | \alias{plot.localZScoreResultsList} 5 | \title{Plot a list of localZscore results} 6 | \usage{ 7 | \method{plot}{localZScoreResultsList}(x, ncol = NA, main = "", num.x.labels = 5, ...) 8 | } 9 | \arguments{ 10 | \item{x}{an object of class \code{localZScoreResultsList}.} 11 | 12 | \item{main}{a character specifying the main title of the plot. Defaults to no title.} 13 | 14 | \item{num.x.labels}{a numeric specifying the number of ticks to label the x axis. The total number will be 2*num.x.labels + 1. Defaults to 5.} 15 | 16 | \item{...}{further arguments to be passed to or from methods.} 17 | } 18 | \value{ 19 | A plot is created on the current graphics device. 20 | } 21 | \description{ 22 | Function for plotting the a \code{localZScoreResultsList} object. 23 | } 24 | \examples{ 25 | 26 | genome <- filterChromosomes(getGenome("hg19"), keep.chr="chr1") 27 | A <- createRandomRegions(nregions=20, length.mean=10000000, length.sd=20000, genome=genome, non.overlapping=FALSE) 28 | B <- c(A, createRandomRegions(nregions=10, length.mean=100000, length.sd=20000, genome=genome, non.overlapping=FALSE)) 29 | 30 | pt <- overlapPermTest(A=A, B=B, ntimes=10, genome=genome, non.overlapping=FALSE) 31 | 32 | lz <- localZScore(A=A, B=B, pt=pt) 33 | plot(lz) 34 | 35 | pt2 <- permTest(A=A, B=B, ntimes=10, randomize.function=randomizeRegions, evaluate.function=list(overlap=numOverlaps, distance=meanDistance), genome=genome, non.overlapping=FALSE) 36 | plot(pt2) 37 | 38 | 39 | } 40 | \seealso{ 41 | \code{\link{localZScore}} 42 | } 43 | \keyword{internal} 44 | -------------------------------------------------------------------------------- /man/getGenomeAndMask.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getGenomeAndMask.R 3 | \name{getGenomeAndMask} 4 | \alias{getGenomeAndMask} 5 | \title{getGenomeAndMask} 6 | \usage{ 7 | getGenomeAndMask(genome, mask=NULL) 8 | } 9 | \arguments{ 10 | \item{genome}{the genome object or genome identifier.} 11 | 12 | \item{mask}{the mask of the genome in a valid RS format (data.frame, GRanges, BED-like file...). If mask is \code{\link{NULL}}, it will try to get a mask from the genome. If mask is \code{\link{NA}} it will return an empty mask. (Default=NULL)} 13 | } 14 | \value{ 15 | A list with two elements: genome and mask. Genome and mask are GRanges objects. 16 | } 17 | \description{ 18 | Function to obtain a valid genome and mask pair given a valid genome identifier and optionally a mask. 19 | 20 | If the genome is not a \code{\link{BSgenome}} object or a character string uniquely identifying a \code{\link{BSgenome}} package installed, it will return the genome "as is". If a mask is provided, it will simply return it. Otherwise it will return the mask returned by \code{\link{getMask}(genome)} or an empty mask if genome is not a valid \code{\link{BSgenome}} or \code{\link{BSgenome}} identifier. 21 | } 22 | \note{ 23 | This function is memoised (cached) using the \code{\link{memoise}} package. To empty the cache, use \code{\link{forget}(getGenomeAndMask)} 24 | } 25 | \examples{ 26 | getGenomeAndMask("hg19", mask=NA) 27 | 28 | getGenomeAndMask(genome=data.frame(c("chrA", "chrB"), c(15000000, 10000000)), mask=NA) 29 | 30 | } 31 | \seealso{ 32 | \code{\link{getMask}}, \code{\link{getGenome}}, \code{\link{characterToBSGenome}}, \code{\link{maskFromBSGenome}}, \code{\link{emptyCacheRegioneR}} 33 | } 34 | -------------------------------------------------------------------------------- /man/extendRegions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/extendRegions.R 3 | \name{extendRegions} 4 | \alias{extendRegions} 5 | \title{Extend Regions} 6 | \usage{ 7 | extendRegions(A, extend.start=0, extend.end=0) 8 | } 9 | \arguments{ 10 | \item{A}{a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)} 11 | 12 | \item{extend.start}{an integer. The number of bases to be subtracted from the start of the region.} 13 | 14 | \item{extend.end}{an integer. The number of bases to be added at the end of the region.} 15 | } 16 | \value{ 17 | a \code{\link{GenomicRanges}} object with the extended regions. 18 | } 19 | \description{ 20 | Extends the regions a number of bases at each end. Negative numbers will reduce the region instead of enlarging it. 21 | } 22 | \note{ 23 | If negative values are provided and the new extremes are "flipped", the function will fail. It does not check if the extended regions fit into the genome. 24 | } 25 | \examples{ 26 | A <- data.frame("chr1", c(10, 20, 30), c(13, 28, 40)) 27 | 28 | extend1 <- extendRegions(A, extend.start=5, extend.end=2) 29 | 30 | extend2 <- extendRegions(A, extend.start=15) 31 | 32 | extend3 <- extendRegions(A, extend.start=-1) 33 | 34 | plotRegions(list(A, extend1, extend2, extend3), chromosome="chr1", regions.labels=c("A", "extend1", "extend2", "extend3"), regions.colors=4:1) 35 | 36 | 37 | } 38 | \seealso{ 39 | \code{\link{plotRegions}}, \code{\link{toDataframe}}, \code{\link{toGRanges}}, \code{\link{subtractRegions}}, \code{\link{splitRegions}}, \code{\link{overlapRegions}}, \code{\link{commonRegions}}, \code{\link{mergeRegions}}, \code{\link{joinRegions}} 40 | } 41 | -------------------------------------------------------------------------------- /R/getMask.R: -------------------------------------------------------------------------------- 1 | #' getMask 2 | #' 3 | #' @description 4 | #' Function to obtain a mask given a genome available as a \code{\link{BSgenome}}. The mask returned is the merge of all the active masks in the \code{\link{BSgenome}}. 5 | #' 6 | #' Since it uses \code{\link{characterToBSGenome}}, the genome can be either a \code{\link{BSgenome}} object or a character string uniquely identifying the a \code{\link{BSgenome}} object installed. 7 | #' 8 | #' @note 9 | #' This function is memoised (cached) using the \code{\link{memoise}} package. To empty the cache, use \code{\link{forget}(getMask)} 10 | #' 11 | #' @usage getMask(genome) 12 | # @usage getMask(...) 13 | #' 14 | #' @param genome the genome from where the mask will be extracted. It can be either a \code{\link{BSgenome}} object or a character string uniquely identifying a \code{\link{BSgenome}} object installed (e.g. "hg19", "mm10", ...) 15 | #' 16 | #' @return 17 | #' A \code{\link{GRanges}} object with the genomic regions to be masked out 18 | #' 19 | #' @seealso \code{\link{getGenome}}, \code{\link{getGenomeAndMask}}, \code{\link{characterToBSGenome}}, \code{\link{maskFromBSGenome}}, \code{\link{emptyCacheRegioneR}} 20 | #' 21 | #' @examples 22 | #' hg19.mask <- getMask("hg19") 23 | #' 24 | #' hg19.mask 25 | #' 26 | #' @export getMask 27 | 28 | 29 | getMask <- memoise::memoise(function(genome) { 30 | 31 | mask <- NULL 32 | #if specified as a character, get it from the BS packages 33 | if(is.character(genome)) { 34 | genome <- characterToBSGenome(genome) 35 | } 36 | 37 | if(is(genome, "BSgenome")) { #it may be a BS genome because it was originally or because it has been transformed from a chracter 38 | mask <- maskFromBSGenome(genome) 39 | } 40 | 41 | return(mask) 42 | }) 43 | -------------------------------------------------------------------------------- /R/numOverlaps.R: -------------------------------------------------------------------------------- 1 | #' Number Of Overlaps 2 | #' 3 | #' @description 4 | #' Returns the number of regions in A overlapping any region in B 5 | #' 6 | #' @usage 7 | #' numOverlaps(A, B, count.once=FALSE, ...) 8 | #' 9 | #' @param A a region set in any of the formats accepted by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...) 10 | #' @param B a region set in any of the formats accepted by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...) 11 | #' @param count.once boolean indicating whether the overlap of multiple B regions with a single A region should be counted once or multiple times 12 | #' @param ... any additional parameters needed 13 | #' 14 | #' @return 15 | #' It returns a numeric value that is the number of regions in A overlapping at least one region in B. 16 | #' 17 | #' @seealso \code{\link{overlapPermTest}}, \code{\link{permTest}} 18 | #' 19 | #' @examples 20 | #' 21 | #' genome <- filterChromosomes(getGenome("hg19"), keep.chr="chr1") 22 | #' A <- createRandomRegions(nregions=20, length.mean=10000000, length.sd=20000, genome=genome, non.overlapping=FALSE) 23 | #' B <- c(A, createRandomRegions(nregions=10, length.mean=10000, length.sd=20000, genome=genome, non.overlapping=FALSE)) 24 | #' 25 | #' numOverlaps(A, B) 26 | #' numOverlaps(A, B, count.once=TRUE) 27 | #' 28 | #' @export numOverlaps 29 | 30 | 31 | 32 | numOverlaps <- function(A, B, count.once=FALSE, ...) { 33 | 34 | if(!hasArg(A)) stop("A is missing") 35 | if(!hasArg(B)) stop("B is missing") 36 | 37 | if(count.once) { 38 | return(length(which(overlapRegions(A, B, only.boolean=TRUE, ...)))) 39 | } else { 40 | return(overlapRegions(A, B, only.count=TRUE, ...)) 41 | } 42 | } 43 | 44 | 45 | -------------------------------------------------------------------------------- /R/uniqueRegions.R: -------------------------------------------------------------------------------- 1 | #' Unique Regions 2 | #' 3 | #' @description 4 | #' Returns the regions unique to only one of the two region sets, that is, all parts of the genome covered by only one of the two region sets. 5 | #' 6 | #' @note 7 | #' All metadata (additional columns in the region set in addition to chromosome, start and end) will be ignored and not present in the returned region set. 8 | #' 9 | #' 10 | #' @usage 11 | #' uniqueRegions(A, B) 12 | #' 13 | #' @param A a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...) 14 | #' @param B a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...) 15 | #' 16 | #' @return 17 | #' It returns a \code{\link{GenomicRanges}} object with the regions unique to one of the region sets. 18 | #' 19 | #' @seealso \code{\link{toGRanges}}, \code{\link{subtractRegions}}, \code{\link{commonRegions}}, \code{\link{mergeRegions}} 20 | #' 21 | #' @examples 22 | #' A <- data.frame("chr1", c(1, 10, 20, 30), c(12, 13, 28, 40)) 23 | #' 24 | #' B <- data.frame("chr1", 25, 35) 25 | #' 26 | #' uniques <- uniqueRegions(A, B) 27 | #' 28 | #' plotRegions(list(A, B, uniques), chromosome="chr1", regions.labels=c("A", "B", "uniques"), regions.colors=3:1) 29 | #' 30 | #' @export uniqueRegions 31 | #' 32 | 33 | 34 | #The implementation is based on diff=union - intersection 35 | uniqueRegions <- function(A, B) { 36 | 37 | if(!hasArg(A)) stop("A is missing") 38 | if(!hasArg(B)) stop("B is missing") 39 | 40 | A <- toGRanges(A) 41 | B <- toGRanges(B) 42 | 43 | merged <- mergeRegions(A, B) 44 | common <- commonRegions(A, B) 45 | 46 | return(subtractRegions(merged,common)) 47 | 48 | } -------------------------------------------------------------------------------- /man/mergeRegions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mergeRegions.R 3 | \name{mergeRegions} 4 | \alias{mergeRegions} 5 | \title{Merge Regions} 6 | \usage{ 7 | mergeRegions(A, B) 8 | } 9 | \arguments{ 10 | \item{A}{a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)} 11 | 12 | \item{B}{a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)} 13 | } 14 | \value{ 15 | It returns a \code{\link{GenomicRanges}} object with the regions resulting from the merging process. Any two overlapping regions from any of the two sets will be fused into one. 16 | } 17 | \description{ 18 | Merges the overlapping regions from two region sets. The two region sets are first merged into one and then overlapping regions are fused. 19 | } 20 | \note{ 21 | All metadata (additional columns in the region set in addition to chromosome, start and end) will be ignored and not present in the returned region set. 22 | 23 | The implementation relies completely in the \code{\link{reduce}} function from \code{IRanges} package. 24 | } 25 | \examples{ 26 | A <- data.frame("chr1", c(1, 5, 20, 30), c(8, 13, 28, 40), x=c(1,2,3,4), y=c("a", "b", "c", "d")) 27 | 28 | B <- data.frame("chr1", 25, 35) 29 | 30 | merges <- mergeRegions(A, B) 31 | 32 | plotRegions(list(A, B, merges), chromosome="chr1", regions.labels=c("A", "B", "merges"), regions.colors=3:1) 33 | 34 | } 35 | \seealso{ 36 | \code{\link{plotRegions}}, \code{\link{toDataframe}}, \code{\link{toGRanges}}, \code{\link{subtractRegions}}, \code{\link{splitRegions}}, \code{\link{extendRegions}}, \code{\link{joinRegions}}, \code{\link{commonRegions}}, \code{\link{overlapRegions}} 37 | } 38 | -------------------------------------------------------------------------------- /man/resampleGenome.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/resampleGenome.R 3 | \name{resampleGenome} 4 | \alias{resampleGenome} 5 | \title{resampleGenome} 6 | \usage{ 7 | resampleGenome(A, simple = FALSE, per.chromosome = FALSE, genome="hg19", min.tile.width=1000, ...) 8 | } 9 | \arguments{ 10 | \item{A}{an object of class GenomigRanges} 11 | 12 | \item{simple}{logical, if TRUE the randomization process will not take into account the specific width of each region in A. (defalut = FALSE)} 13 | 14 | \item{per.chromosome}{logical, if TRUE the randomization will be perform by chromosome. (default = TRUE)} 15 | 16 | \item{genome}{character or GenomicRanges, genome using for the randomization} 17 | 18 | \item{min.tile.width}{integer, the minimum size of the genome tiles. If they are too small, the functions gets very slow and may even fail to work. (default = 1000, 1kb tiles)} 19 | 20 | \item{...}{further arguments to be passed to other methods.} 21 | } 22 | \value{ 23 | a \code{\link{GenomicRanges}} object. A sample from the \code{universe} with the same length as A. 24 | } 25 | \description{ 26 | Fast alternative to randomizeRegions. It creates a tiling (binning) of the whole genome 27 | with tiles the mean size of the regions in A and then places the regions by sampling a 28 | length(A) number of tiles and placing the resampled regions there. 29 | } 30 | \examples{ 31 | 32 | A <- data.frame(chr=1, start=c(2,12,28,35), end=c(5,25,33,43)) 33 | 34 | B <- resampleGenome(A) 35 | B 36 | width(B) 37 | 38 | B2 <- resampleGenome(A, simple=TRUE) 39 | B2 40 | width(B2) 41 | 42 | resampleGenome(A, per.chromosome=TRUE) 43 | 44 | 45 | } 46 | \seealso{ 47 | \code{\link{toDataframe}}, \code{\link{toGRanges}}, \code{\link{randomizeRegions}}, \code{\link{createRandomRegions}} 48 | } 49 | -------------------------------------------------------------------------------- /man/joinRegions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/joinRegions.R 3 | \name{joinRegions} 4 | \alias{joinRegions} 5 | \title{Join Regions} 6 | \usage{ 7 | joinRegions(A, min.dist=1) 8 | } 9 | \arguments{ 10 | \item{A}{a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)} 11 | 12 | \item{min.dist}{an integer indicating the minimum distance required between two regions in order to not fuse them. Any pair of regions closer than \code{min.dist} bases will be fused in a larger region. Defaults to 1, so it will only join overlapping regions.} 13 | } 14 | \value{ 15 | It returns a \code{\link{GenomicRanges}} object with the regions resulting from the joining process. 16 | } 17 | \description{ 18 | Joins the regions from a region set A that are less than \code{min.dist} bases apart. 19 | } 20 | \note{ 21 | All metadata (additional columns in the region set in addition to chromosome, start and end) will be ignored and not present in the returned region set. 22 | 23 | The implementation relies completely in the \code{\link{reduce}} function from \code{IRanges} package. 24 | } 25 | \examples{ 26 | A <- data.frame("chr1", c(1, 10, 20, 30), c(12, 13, 28, 40)) 27 | 28 | join1 <- joinRegions(A) 29 | 30 | join2 <- joinRegions(A, min.dist=3) 31 | 32 | join3 <- joinRegions(A, min.dist=10) 33 | 34 | plotRegions(list(A, join1, join2, join3), chromosome="chr1", regions.labels=c("A", "join1", "join2", "join3"), regions.colors=4:1) 35 | 36 | } 37 | \seealso{ 38 | \code{\link{plotRegions}}, \code{\link{toDataframe}}, \code{\link{toGRanges}}, \code{\link{subtractRegions}}, \code{\link{splitRegions}}, \code{\link{extendRegions}}, \code{\link{commonRegions}}, \code{\link{mergeRegions}}, \code{\link{overlapRegions}} 39 | } 40 | -------------------------------------------------------------------------------- /tests/testthat/test_evaluationFunctions.R: -------------------------------------------------------------------------------- 1 | library(regioneR) 2 | context("Built-in Evaluation Functions") 3 | 4 | #Define some GRanges to use in the tests 5 | emptyGR <- toGRanges(data.frame(chr=character(), start=numeric(), end=numeric())) 6 | smallA <- toGRanges(data.frame(chr=rep(c("chr1", "chr2"), 10), start=100*(1:20), end=120*(1:20))) 7 | smallB <- toGRanges(data.frame(chr=rep(c("chr2", "chr1"), 10), start=100*(1:20), end=105*(1:20))) 8 | 9 | bigA <- toGRanges(system.file("extdata", "my.special.genes.txt", package="regioneR")) 10 | universeA <- toGRanges(system.file("extdata", "all.genes.txt", package="regioneR")) 11 | bigB <- toGRanges(system.file("extdata", "my.altered.regions.txt", package="regioneR")) 12 | 13 | #Test numOverlaps 14 | test_that("the numOverlaps function returns a correct result", { 15 | expect_equal(numOverlaps(smallA, smallA, count.once=FALSE), 38) 16 | expect_equal(numOverlaps(smallA, smallA, count.once=TRUE), length(smallA)) 17 | expect_equal(numOverlaps(smallA, smallB), 18) 18 | expect_equal(numOverlaps(smallA, smallB, count.once=TRUE), 15) 19 | expect_equal(numOverlaps(emptyGR, emptyGR), 0) 20 | expect_equal(numOverlaps(smallA, emptyGR, count.once=FALSE), 0) 21 | expect_equal(numOverlaps(smallA, emptyGR, count.once=TRUE), 0) 22 | 23 | expect_equal(numOverlaps(bigA, universeA, count.once=TRUE), length(bigA)) 24 | expect_equal(numOverlaps(universeA, bigA, count.once=FALSE), numOverlaps(universeA, bigA, count.once=F)) 25 | expect_equal(numOverlaps(universeA, bigA, count.once=FALSE), numOverlaps(bigA, universeA)) 26 | 27 | expect_equal(numOverlaps(smallA, emptyGR, foo=TRUE, bar=3, baz="C"), 0) 28 | }) 29 | 30 | #meanDistance 31 | test_that("the meanDistance function returns a correct result", { 32 | expect_equal(meanDistance(smallA, smallB), 10) 33 | expect_equal(meanDistance(smallA, smallA), 0) 34 | expect_equal(meanDistance(A=smallA, B=smallA), 0) 35 | }) 36 | -------------------------------------------------------------------------------- /man/filterChromosomes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/filterChromosomes.R 3 | \name{filterChromosomes} 4 | \alias{filterChromosomes} 5 | \title{filterChromosomes} 6 | \usage{ 7 | filterChromosomes(A, organism="hg", chr.type="canonical", keep.chr=NULL) 8 | } 9 | \arguments{ 10 | \item{A}{a region set in any of the formats accepted by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)} 11 | 12 | \item{organism}{a character indicating the organism from which to get the predefined chromosome sets. It can be the organism code as used in \code{\link{BSgenome}} (e.g. hg for human, mm for mouse...) or the full genome assembly identifier, since any digit will be removed to get the organism code.} 13 | 14 | \item{chr.type}{a character indicating the specific chromosome set to be used. Usually "autosomal" or "canonical", althought other values could be available for certain organisms.} 15 | 16 | \item{keep.chr}{is a character vector stating the names of the chromosomes to keep. Any chromosome not in the vector will be filtered out. If keep.chr is supplied, organism and chr.type are ignored.} 17 | } 18 | \value{ 19 | A \code{\link{GRanges}} object containing only the regions in the original region set belonging to the selected chromosomes. All regions in non selected chromosomes are removed. 20 | } 21 | \description{ 22 | Filters the chromosomes in a region set. It can either filter using a predefined chromosome set (e.g. "autosomal 23 | chromosomes in Homo sapiens") or using a custom chromosome set (e.g. only chromosomes "chr22" and "chrX") 24 | } 25 | \examples{ 26 | 27 | g <- getGenomeAndMask("hg19")$genome 28 | listChrTypes() 29 | g <- filterChromosomes(g, chr.type="autosomal", organism="hg19") 30 | g <- filterChromosomes(g, keep.chr=c("chr1", "chr2", "chr3")) 31 | 32 | 33 | } 34 | \seealso{ 35 | \code{\link{getGenomeAndMask}}, \code{\link{listChrTypes}} \code{\link{getChromosomesByOrganism}} 36 | } 37 | -------------------------------------------------------------------------------- /R/plot.localZScoreResultsList.R: -------------------------------------------------------------------------------- 1 | #' Plot a list of localZscore results 2 | #' 3 | #' @description 4 | #' Function for plotting the a \code{localZScoreResultsList} object. 5 | #' 6 | #' @method plot localZScoreResultsList 7 | #' 8 | #' @param x an object of class \code{localZScoreResultsList}. 9 | #' @param main a character specifying the main title of the plot. Defaults to no title. 10 | #' @param num.x.labels a numeric specifying the number of ticks to label the x axis. The total number will be 2*num.x.labels + 1. Defaults to 5. 11 | #' @param ... further arguments to be passed to or from methods. 12 | #' 13 | #' @return A plot is created on the current graphics device. 14 | #' 15 | #' @seealso \code{\link{localZScore}} 16 | #' 17 | #' @examples 18 | #' 19 | #' genome <- filterChromosomes(getGenome("hg19"), keep.chr="chr1") 20 | #' A <- createRandomRegions(nregions=20, length.mean=10000000, length.sd=20000, genome=genome, non.overlapping=FALSE) 21 | #' B <- c(A, createRandomRegions(nregions=10, length.mean=100000, length.sd=20000, genome=genome, non.overlapping=FALSE)) 22 | #' 23 | #' pt <- overlapPermTest(A=A, B=B, ntimes=10, genome=genome, non.overlapping=FALSE) 24 | #' 25 | #' lz <- localZScore(A=A, B=B, pt=pt) 26 | #' plot(lz) 27 | #' 28 | #' pt2 <- permTest(A=A, B=B, ntimes=10, randomize.function=randomizeRegions, evaluate.function=list(overlap=numOverlaps, distance=meanDistance), genome=genome, non.overlapping=FALSE) 29 | #' plot(pt2) 30 | #' 31 | ## lz2 <- localZScore(A=A, B=B, pt2) 32 | ## plot(lz2) 33 | #' 34 | #' @keywords internal 35 | #' @export 36 | 37 | 38 | plot.localZScoreResultsList <- function(x, ncol=NA, main="", num.x.labels=5, ...) { 39 | 40 | if(!is(x, "localZScoreResultsList")) stop("x must be a localZScoreResultsList object") 41 | 42 | if(is.na(ncol)) ncol <- floor(sqrt(length(x))) 43 | 44 | nrow <- ceiling(length(x)/ncol) 45 | 46 | old.par <- par(mfrow=c(nrow, ncol)) 47 | 48 | lapply(x, plot) 49 | 50 | par(mfrow=old.par) 51 | 52 | } 53 | -------------------------------------------------------------------------------- /R/mergeRegions.R: -------------------------------------------------------------------------------- 1 | #' Merge Regions 2 | #' 3 | #' @description 4 | #' Merges the overlapping regions from two region sets. The two region sets are first merged into one and then overlapping regions are fused. 5 | #' 6 | #' @note 7 | #' All metadata (additional columns in the region set in addition to chromosome, start and end) will be ignored and not present in the returned region set. 8 | #' 9 | #' The implementation relies completely in the \code{\link{reduce}} function from \code{IRanges} package. 10 | #' 11 | #' @usage 12 | #' mergeRegions(A, B) 13 | #' 14 | #' @param A a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...) 15 | #' @param B a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...) 16 | #' 17 | #' @return 18 | #' It returns a \code{\link{GenomicRanges}} object with the regions resulting from the merging process. Any two overlapping regions from any of the two sets will be fused into one. 19 | #' 20 | #' @seealso \code{\link{plotRegions}}, \code{\link{toDataframe}}, \code{\link{toGRanges}}, \code{\link{subtractRegions}}, \code{\link{splitRegions}}, \code{\link{extendRegions}}, \code{\link{joinRegions}}, \code{\link{commonRegions}}, \code{\link{overlapRegions}} 21 | #' 22 | #' @examples 23 | #' A <- data.frame("chr1", c(1, 5, 20, 30), c(8, 13, 28, 40), x=c(1,2,3,4), y=c("a", "b", "c", "d")) 24 | #' 25 | #' B <- data.frame("chr1", 25, 35) 26 | #' 27 | #' merges <- mergeRegions(A, B) 28 | #' 29 | #' plotRegions(list(A, B, merges), chromosome="chr1", regions.labels=c("A", "B", "merges"), regions.colors=3:1) 30 | #' 31 | #' @export mergeRegions 32 | #' 33 | 34 | 35 | 36 | mergeRegions <- function(A, B) { 37 | 38 | if(!hasArg(A)) stop("A is missing") 39 | if(!hasArg(B)) stop("B is missing") 40 | 41 | A <- toGRanges(A) 42 | B <- toGRanges(B) 43 | 44 | C <- c(A,B, ignore.mcols=TRUE) 45 | 46 | return(GenomicRanges::reduce(C)) 47 | 48 | } -------------------------------------------------------------------------------- /man/createFunctionsList.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/createFunctionsList.R 3 | \name{createFunctionsList} 4 | \alias{createFunctionsList} 5 | \title{Create Functions List} 6 | \usage{ 7 | createFunctionsList(FUN, param.name, values, func.names) 8 | } 9 | \arguments{ 10 | \item{FUN}{Function. the function to be partially applied} 11 | 12 | \item{param.name}{Character. The name of the parameter to pre-set.} 13 | 14 | \item{values}{A list or vector of values to preassign. A function will be created for each of the values in values. If present, the names of the list will be the names of the functions.} 15 | 16 | \item{func.names}{Character. The names of the functions created. Useful to identify the functions created. Defaults to the names of the values list or to Function1, Function2... if the values list has no names.} 17 | } 18 | \value{ 19 | It returns a list of functions with parameter param.value pre-set to values. 20 | } 21 | \description{ 22 | Partially applies (the standard Curry function in functional programming) a list of arguments 23 | to a function and returns a list of preapplied functions. The result of this function is a 24 | list of functions suitable for the multiple evaluation functions in permTest. 25 | } 26 | \note{ 27 | It uses the code posted by "hadley" at http://stackoverflow.com/questions/6547219/how-to-bind-function-arguments 28 | } 29 | \examples{ 30 | f <- function(a, b) { 31 | return(a+b) 32 | } 33 | 34 | funcs <- createFunctionsList(FUN=f, param.name="b", values=c(1,2,3), func.names=c("plusone", "plustwo", "plusthree")) 35 | 36 | funcs$plusone(2) 37 | funcs$plusone(10) 38 | funcs$plusthree(2) 39 | 40 | A <- createRandomRegions(nregions=20, length.mean=10000000, length.sd=0, mask=NA) 41 | B <- createRandomRegions(nregions=20, length.mean=10000000, length.sd=0, mask=NA) 42 | 43 | overlapsWith <- createFunctionsList(FUN=numOverlaps, param.name="B", values=list(a=A, b=B)) 44 | overlapsWith$a(A=A) 45 | overlapsWith$b(A=A) 46 | 47 | } 48 | \seealso{ 49 | \code{\link{permTest}}, \code{\link{overlapPermTest}} 50 | } 51 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | CHANGES IN VERSION 1.18.0 2 | ----------------------- 3 | 4 | NEW FEATURES 5 | 6 | o Expanded toGRanges support. It is now possible to transform coverage 7 | objects (i.e. toGRanges(coverage(A))) into GRanges. It also supports 8 | ".assoc" files produced by PLINK. 9 | o overlapPermTest now supports multiple region sets in B and will perform 10 | a multi- permutation test against each one much faster than testing them 11 | independently. 12 | 13 | BUG FIXES 14 | 15 | o Multiple bug fixes 16 | 17 | 18 | CHANGES IN VERSION 1.13.2 19 | ----------------------- 20 | 21 | BUG FIXES 22 | 23 | o createRandomRegions ignored the non.overlapping argument. It does work now. 24 | 25 | 26 | CHANGES IN VERSION 1.13.1 27 | ----------------------- 28 | 29 | NEW FEATURES 30 | 31 | o Revamped toGRanges now accepts genome region descriptions as used by 32 | UCSC and IGV ("chr9:23000-25000"). It also may take a genome parameter 33 | and set the genome information of the GRanges accordingly. 34 | 35 | 36 | CHANGES IN VERSION 1.9.2 37 | ----------------------- 38 | 39 | NEW FEATURES 40 | 41 | o Simplified the interface of toGRanges for simpler use when manually 42 | creating GRanges. Now toGRanges("chr1", 10, 20) is valid. 43 | 44 | BUG FIXES 45 | 46 | o Multiple minor bug fixes 47 | 48 | 49 | 50 | CHANGES IN VERSION 1.1.8 51 | ----------------------- 52 | 53 | NEW FEATURES 54 | 55 | o Added new functionality to permTest to use multiple evaluation functions 56 | with a single randomization procedure. This gives a significant speedup 57 | when comparing a single region set with multiple other features 58 | 59 | o Created a new function createFunctionsList() that given a function 60 | and a list of values, creates a list of curried functions 61 | (e.g with one parameter preassigned to each of the given values) 62 | 63 | PERFORMANCE IMPROVEMENTS 64 | 65 | o Complete rewrite of randomizeRegions() resulting in a 10 to 100 fold speedup 66 | 67 | BUG FIXES 68 | 69 | o Multiple minor bug fixes -------------------------------------------------------------------------------- /R/joinRegions.R: -------------------------------------------------------------------------------- 1 | #' Join Regions 2 | #' 3 | #' @description 4 | #' Joins the regions from a region set A that are less than \code{min.dist} bases apart. 5 | #' 6 | #' @note 7 | #' All metadata (additional columns in the region set in addition to chromosome, start and end) will be ignored and not present in the returned region set. 8 | #' 9 | #' The implementation relies completely in the \code{\link{reduce}} function from \code{IRanges} package. 10 | #' 11 | #' @usage 12 | #' joinRegions(A, min.dist=1) 13 | #' 14 | #' @param A a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...) 15 | #' @param min.dist an integer indicating the minimum distance required between two regions in order to not fuse them. Any pair of regions closer than \code{min.dist} bases will be fused in a larger region. Defaults to 1, so it will only join overlapping regions. 16 | #' 17 | #' @return 18 | #' It returns a \code{\link{GenomicRanges}} object with the regions resulting from the joining process. 19 | #' 20 | #' @seealso \code{\link{plotRegions}}, \code{\link{toDataframe}}, \code{\link{toGRanges}}, \code{\link{subtractRegions}}, \code{\link{splitRegions}}, \code{\link{extendRegions}}, \code{\link{commonRegions}}, \code{\link{mergeRegions}}, \code{\link{overlapRegions}} 21 | #' 22 | #' @examples 23 | #' A <- data.frame("chr1", c(1, 10, 20, 30), c(12, 13, 28, 40)) 24 | #' 25 | #' join1 <- joinRegions(A) 26 | #' 27 | #' join2 <- joinRegions(A, min.dist=3) 28 | #' 29 | #' join3 <- joinRegions(A, min.dist=10) 30 | #' 31 | #' plotRegions(list(A, join1, join2, join3), chromosome="chr1", regions.labels=c("A", "join1", "join2", "join3"), regions.colors=4:1) 32 | #' 33 | #' @export joinRegions 34 | #' 35 | #' @importFrom GenomicRanges reduce 36 | 37 | 38 | 39 | #The implementation relies completely in the reduce function from IRanges 40 | joinRegions <- function(A, min.dist=1) { 41 | 42 | if(!hasArg(A)) stop("A is missing") 43 | if(!is.numeric(min.dist)) stop("min.dist must be numeric") 44 | 45 | A <- toGRanges(A) 46 | 47 | return(GenomicRanges::reduce(A, min.gapwidth=min.dist)) 48 | 49 | } -------------------------------------------------------------------------------- /R/resampleRegions.R: -------------------------------------------------------------------------------- 1 | #' Resample Regions 2 | #' 3 | #' @description 4 | #' Function for sampling a region set from a universe of region sets. 5 | #' 6 | #' @usage 7 | #' resampleRegions(A, universe, per.chromosome=FALSE, ...) 8 | #' 9 | #' @param A a region set in any of the formats accepted by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...) 10 | #' @param universe a region set in any of the formats accepted by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...) 11 | #' @param per.chromosome boolean indicating if sample must be by chromosome. 12 | #' @param ... further arguments to be passed to or from methods. 13 | #' 14 | #' @return a \code{\link{GenomicRanges}} object. A sample from the \code{univers} with the same length as A. 15 | #' 16 | #' @seealso \code{\link{toDataframe}}, \code{\link{toGRanges}}, \code{\link{randomizeRegions}}, \code{\link{createRandomRegions}} 17 | #' 18 | #' @examples 19 | #' universe <- data.frame(chr=1, start=c(1,15,24,40,50), end=c(10,20,30,45,55)) 20 | #' 21 | #' A <- data.frame(chr=1, start=c(2,12,28,35), end=c(5,25,33,43)) 22 | #' 23 | #' resampleRegions(A, universe, per.chromosome=TRUE) 24 | #' 25 | #' @export resampleRegions 26 | #' 27 | 28 | 29 | 30 | resampleRegions <- function(A, universe, per.chromosome=FALSE, ...) { 31 | 32 | if(!hasArg(A)) stop("A is missing") 33 | if(!hasArg(universe)) stop("universe is missing") 34 | if(!is.logical(per.chromosome)) stop("per.chromosome must be logical") 35 | 36 | 37 | A <- toGRanges(A) 38 | universe <- toGRanges(universe) 39 | 40 | 41 | if(per.chromosome){ 42 | chrResample <- function(chr) { 43 | Achr <- A[seqnames(A) == chr] 44 | universe.chr <- universe[seqnames(universe) == chr] 45 | resample.chr <- universe.chr[sample(1:length(universe.chr), length(Achr))] 46 | return(resample.chr) 47 | } 48 | 49 | chr.resampled <- lapply(as.list(seqlevels(A)), chrResample) 50 | resampled <- do.call(c, chr.resampled) 51 | 52 | }else{ 53 | resampled <- universe[sample(1:length(universe), length(A))] 54 | } 55 | 56 | return(resampled) 57 | 58 | } 59 | 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /tests/testthat/test_randomizationFunctions.R: -------------------------------------------------------------------------------- 1 | library(regioneR) 2 | context("Built-in Randomization Functions") 3 | 4 | #Define some GRanges to use in the tests 5 | emptyGR <- toGRanges(data.frame(chr=character(), start=numeric(), end=numeric())) 6 | smallA <- toGRanges(data.frame(chr=rep(c("chr1", "chr2"), 10), start=100*(1:20), end=120*(1:20))) 7 | smallB <- toGRanges(data.frame(chr=rep(c("chr2", "chr1"), 10), start=100*(1:20), end=105*(1:20))) 8 | 9 | bigRegionsA <- toGRanges(data.frame(chr=rep(c("chr1", "chr2"), 10), start=100*(1:20), end=1200000*(1:20))) 10 | 11 | gam <- getGenomeAndMask("hg19") 12 | 13 | #Randomize Regions 14 | test_that("the class of randomized regions is correct (randomizeRegions)", { 15 | expect_is(randomizeRegions(smallA), "GRanges") 16 | expect_is(randomizeRegions(emptyGR), "GRanges") 17 | }) 18 | 19 | test_that("the number of randomized regions is correct (randomizeRegions)", { 20 | expect_equal(length(randomizeRegions(smallA)), length(smallA)) 21 | expect_equal(length(randomizeRegions(smallA, per.chromosome=TRUE)), length(smallA)) 22 | expect_equal(length(randomizeRegions(smallA, non.overlapping=TRUE)), length(smallA)) 23 | 24 | }) 25 | 26 | test_that("the randomized regions do not overlap the mask (randomizeRegions)", { 27 | expect_equal(numOverlaps(randomizeRegions(bigRegionsA, genome=gam$genome, mask=gam$mask), gam$mask), 0) 28 | expect_equal(numOverlaps(randomizeRegions(bigRegionsA, genome=gam$genome, mask=gam$mask, per.chromosome=TRUE), gam$mask), 0) 29 | expect_equal(numOverlaps(randomizeRegions(bigRegionsA, genome=gam$genome, mask=gam$mask, non.overlapping=TRUE), gam$mask), 0) 30 | }) 31 | 32 | test_that("the randomized regions do not overlap betwen them when allow.overlaps=FALSE (randomizeRegions)", { 33 | overlapsItself <- function(A) { 34 | ff <- findOverlaps(A, A) 35 | overlapping.regs <- unique(queryHits(ff)[((queryHits(ff) - subjectHits(ff)) != 0)]) #which region overlaps any region that is not itself? 36 | return(length(overlapping.regs)>0) 37 | } 38 | expect_false(overlapsItself(randomizeRegions(bigRegionsA, allow.overlaps=FALSE))) 39 | expect_false(overlapsItself(randomizeRegions(bigRegionsA[1:10], per.chromosome=TRUE, allow.overlaps=FALSE))) 40 | }) 41 | 42 | -------------------------------------------------------------------------------- /R/extendRegions.R: -------------------------------------------------------------------------------- 1 | #' Extend Regions 2 | #' 3 | #' @description 4 | #' Extends the regions a number of bases at each end. Negative numbers will reduce the region instead of enlarging it. 5 | #' 6 | #' @note 7 | #' If negative values are provided and the new extremes are "flipped", the function will fail. It does not check if the extended regions fit into the genome. 8 | #' 9 | #' @usage 10 | #' extendRegions(A, extend.start=0, extend.end=0) 11 | #' 12 | #' @param A a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...) 13 | #' @param extend.start an integer. The number of bases to be subtracted from the start of the region. 14 | #' @param extend.end an integer. The number of bases to be added at the end of the region. 15 | #' 16 | #' @return 17 | #' a \code{\link{GenomicRanges}} object with the extended regions. 18 | #' 19 | #' @seealso \code{\link{plotRegions}}, \code{\link{toDataframe}}, \code{\link{toGRanges}}, \code{\link{subtractRegions}}, \code{\link{splitRegions}}, \code{\link{overlapRegions}}, \code{\link{commonRegions}}, \code{\link{mergeRegions}}, \code{\link{joinRegions}} 20 | #' 21 | #' @examples 22 | #' A <- data.frame("chr1", c(10, 20, 30), c(13, 28, 40)) 23 | #' 24 | #' extend1 <- extendRegions(A, extend.start=5, extend.end=2) 25 | #' 26 | #' extend2 <- extendRegions(A, extend.start=15) 27 | #' 28 | #' extend3 <- extendRegions(A, extend.start=-1) 29 | #' 30 | #' plotRegions(list(A, extend1, extend2, extend3), chromosome="chr1", regions.labels=c("A", "extend1", "extend2", "extend3"), regions.colors=4:1) 31 | #' 32 | #' 33 | #' @export extendRegions 34 | #' 35 | #' @importFrom GenomicRanges start end 36 | 37 | 38 | #It does'nt check for inclusion in the genome. Start may end up being less than 0 and end greater than chromosome length. 39 | extendRegions <- function(A, extend.start=0, extend.end=0) { 40 | 41 | if(!hasArg(A)) stop("A is missing") 42 | if(!is.numeric(extend.start)) stop("extend.start must be numeric") 43 | if(!is.numeric(extend.end)) stop("extend.end must be numeric") 44 | 45 | A <- toGRanges(A) 46 | 47 | GenomicRanges::start(A) <- GenomicRanges::start(A) - extend.start 48 | GenomicRanges::end(A) <- GenomicRanges::end(A) + extend.end 49 | 50 | return(A) 51 | 52 | } -------------------------------------------------------------------------------- /man/getGenome.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getGenome.R 3 | \name{getGenome} 4 | \alias{getGenome} 5 | \title{getGenome} 6 | \usage{ 7 | getGenome(genome) 8 | } 9 | \arguments{ 10 | \item{genome}{The genome object or genome identifier.} 11 | } 12 | \value{ 13 | A GRanges object with the "genome" data c(Chromosome, Start (by default, 1), Chromosome Length) given a \code{\link{BSgenome}}, a genome name, a \code{\link{data.frame}} or a GRanges. 14 | 15 | A \code{\link{GRanges}} representing the genome with one region per chromosome. 16 | } 17 | \description{ 18 | Function to obtain a genome 19 | } 20 | \details{ 21 | If genome is a \code{\link{BSgenome}} (from the package \code{BioStrings}), it will transform it into a \code{\link{GRanges}} with chromosomes and chromosome lengths. 22 | 23 | If genome is a \code{\link{data.frame}} with 3 columns, it will transform it into a GRanges. 24 | 25 | If genome is a \code{\link{data.frame}} with 2 columns, it will assume the first is the chromosome, the second is the length of the chromosomes and will add 1 as start. 26 | 27 | If genome is a \code{character} string uniquely identifying a \code{\link{BSgenome}} installed in the system (e.g. "hg19", "mm10",... but not "hg"), it will create a genome based on the \code{\link{BSgenome}} object identified by the character string. 28 | 29 | If genome is a \code{ \link{GRanges}} object, it will return it as is. 30 | 31 | If genome is non of the above, it will give a warning and try to transform it into a GRanges using \link{toGRanges}. This can be helpful if \code{genome} is a connection to a file. 32 | } 33 | \note{ 34 | This function is memoised (cached) using the \code{\link{memoise}} package. To empty the cache, use \code{\link{forget}(getGenome)} 35 | 36 | Please note that passing this function the path to a file will not work, since it will assume the character is the identifier of a genome. To read the genome 37 | from a file, please use \code{getGenome(toGRanges("path/to/file"))} 38 | } 39 | \examples{ 40 | getGenome("hg19") 41 | 42 | getGenome(data.frame(c("chrA", "chrB"), c(15000000, 10000000))) 43 | 44 | } 45 | \seealso{ 46 | \code{\link{getMask}}, \code{\link{getGenomeAndMask}}, \code{\link{characterToBSGenome}}, \code{\link{maskFromBSGenome}}, \code{\link{emptyCacheRegioneR}} 47 | } 48 | -------------------------------------------------------------------------------- /man/plot.permTestResults.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot.permTestResults.R 3 | \name{plot.permTestResults} 4 | \alias{plot.permTestResults} 5 | \title{Function for plotting the results from a \code{permTestResults} object.} 6 | \usage{ 7 | \method{plot}{permTestResults}( 8 | x, 9 | pvalthres = 0.05, 10 | plotType = "Tailed", 11 | main = "", 12 | xlab = NULL, 13 | ylab = "", 14 | ylim = NULL, 15 | xlim = NULL, 16 | ... 17 | ) 18 | } 19 | \arguments{ 20 | \item{x}{an object of class \code{permTestResults}.} 21 | 22 | \item{pvalthres}{p-value threshold for significance. Default is 0.05.} 23 | 24 | \item{plotType}{the type of plot to display. This must be one of \code{"Area"} or \code{"Tailed"}. Default is \code{"Area"}.} 25 | 26 | \item{main}{a character specifying the title of the plot. Defaults to "".} 27 | 28 | \item{xlab}{a character specifying the label of the x axis. Defaults to NULL, which produces a plot with the evaluation function name as the x axis label.} 29 | 30 | \item{ylab}{a character specifying the label of the y axis. Defaults to "".} 31 | 32 | \item{ylim}{defines the y limits of the plot. Passed to the underlying \code{plot} call.} 33 | 34 | \item{xlim}{defines the x limits of the plot. Passed to the underlying \code{plot} call.} 35 | 36 | \item{...}{further arguments to be passed to or from methods.} 37 | } 38 | \value{ 39 | A plot is created on the current graphics device. 40 | } 41 | \description{ 42 | Function for plotting the results from a \code{permTestResults} object. 43 | } 44 | \examples{ 45 | 46 | genome <- filterChromosomes(getGenome("hg19"), keep.chr="chr1") 47 | A <- createRandomRegions(nregions=20, length.mean=10000000, length.sd=20000, genome=genome, non.overlapping=FALSE) 48 | B <- c(A, createRandomRegions(nregions=10, length.mean=10000, length.sd=20000, genome=genome, non.overlapping=FALSE)) 49 | 50 | pt <- overlapPermTest(A=A, B=B, ntimes=10, genome=genome, non.overlapping=FALSE) 51 | summary(pt) 52 | plot(pt) 53 | plot(pt, plotType="Tailed") 54 | 55 | pt2 <- permTest(A=A, B=B, ntimes=10, alternative="auto", genome=genome, evaluate.function=meanDistance, randomize.function=randomizeRegions, non.overlapping=FALSE) 56 | summary(pt2) 57 | plot(pt2) 58 | plot(pt2, plotType="Tailed") 59 | 60 | } 61 | \seealso{ 62 | \code{\link{permTest}} 63 | } 64 | -------------------------------------------------------------------------------- /man/plot.permTestResultsList.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot.permTestResultsList.R 3 | \name{plot.permTestResultsList} 4 | \alias{plot.permTestResultsList} 5 | \title{Function for plotting the results from a \code{permTestResultsList} object when more than one evaluation function was used.} 6 | \usage{ 7 | \method{plot}{permTestResultsList}( 8 | x, 9 | ncol = NA, 10 | pvalthres = 0.05, 11 | plotType = "Tailed", 12 | main = "", 13 | xlab = NULL, 14 | ylab = "", 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{x}{an object of class \code{permTestResultsList}.} 20 | 21 | \item{ncol}{number of plots per row. ncol=NA means ncol=floor(sqrt(length(x)))so the plot is more or less square (default=NA)} 22 | 23 | \item{pvalthres}{p-value threshold for significance. Default is 0.05.} 24 | 25 | \item{plotType}{the type of plot to display. This must be one of \code{"Area"} or \code{"Tailed"}. Default is \code{"Area"}.} 26 | 27 | \item{main}{a character specifying the title of the plot. Defaults to "".} 28 | 29 | \item{xlab}{a character specifying the label of the x axis. Defaults to NULL, which produces a plot with the evaluation function name as the x axis label.} 30 | 31 | \item{ylab}{a character specifying the label of the y axis. Defaults to "".} 32 | 33 | \item{...}{further arguments to be passed to or from methods.} 34 | } 35 | \value{ 36 | A plot is created on the current graphics device. 37 | } 38 | \description{ 39 | Function for plotting the results from a \code{permTestResultsList} object when more than one evaluation function was used. 40 | } 41 | \examples{ 42 | 43 | genome <- filterChromosomes(getGenome("hg19"), keep.chr="chr1") 44 | A <- createRandomRegions(nregions=20, length.mean=10000000, length.sd=20000, genome=genome, non.overlapping=FALSE) 45 | B <- c(A, createRandomRegions(nregions=10, length.mean=10000, length.sd=20000, genome=genome, non.overlapping=FALSE)) 46 | 47 | pt <- overlapPermTest(A=A, B=B, ntimes=10, genome=genome, non.overlapping=FALSE) 48 | summary(pt) 49 | plot(pt) 50 | plot(pt, plotType="Tailed") 51 | 52 | pt2 <- permTest(A=A, B=B, ntimes=10, alternative="auto", genome=genome, evaluate.function=list(distance=meanDistance, numberOfOverlaps=numOverlaps), randomize.function=randomizeRegions, non.overlapping=FALSE) 53 | summary(pt2) 54 | plot(pt2) 55 | plot(pt2, plotType="Tailed") 56 | 57 | } 58 | \seealso{ 59 | \code{\link{permTest}} 60 | } 61 | -------------------------------------------------------------------------------- /R/plot.permTestResultsList.R: -------------------------------------------------------------------------------- 1 | # Plot Permutation Test Results List 2 | # 3 | # @description 4 | #' Function for plotting the results from a \code{permTestResultsList} object when more than one evaluation function was used. 5 | #' 6 | #' @method plot permTestResultsList 7 | #' 8 | #' @param x an object of class \code{permTestResultsList}. 9 | #' @param ncol number of plots per row. ncol=NA means ncol=floor(sqrt(length(x)))so the plot is more or less square (default=NA) 10 | #' @param pvalthres p-value threshold for significance. Default is 0.05. 11 | #' @param plotType the type of plot to display. This must be one of \code{"Area"} or \code{"Tailed"}. Default is \code{"Area"}. 12 | #' @param main a character specifying the title of the plot. Defaults to "". 13 | #' @param xlab a character specifying the label of the x axis. Defaults to NULL, which produces a plot with the evaluation function name as the x axis label. 14 | #' @param ylab a character specifying the label of the y axis. Defaults to "". 15 | #' @param ... further arguments to be passed to or from methods. 16 | #' 17 | #' @return A plot is created on the current graphics device. 18 | #' 19 | #' @seealso \code{\link{permTest}} 20 | #' 21 | #' @examples 22 | #' 23 | #' genome <- filterChromosomes(getGenome("hg19"), keep.chr="chr1") 24 | #' A <- createRandomRegions(nregions=20, length.mean=10000000, length.sd=20000, genome=genome, non.overlapping=FALSE) 25 | #' B <- c(A, createRandomRegions(nregions=10, length.mean=10000, length.sd=20000, genome=genome, non.overlapping=FALSE)) 26 | #' 27 | #' pt <- overlapPermTest(A=A, B=B, ntimes=10, genome=genome, non.overlapping=FALSE) 28 | #' summary(pt) 29 | #' plot(pt) 30 | #' plot(pt, plotType="Tailed") 31 | #' 32 | #' pt2 <- permTest(A=A, B=B, ntimes=10, alternative="auto", genome=genome, evaluate.function=list(distance=meanDistance, numberOfOverlaps=numOverlaps), randomize.function=randomizeRegions, non.overlapping=FALSE) 33 | #' summary(pt2) 34 | #' plot(pt2) 35 | #' plot(pt2, plotType="Tailed") 36 | #' 37 | #' @export 38 | 39 | 40 | plot.permTestResultsList<-function(x, ncol=NA, pvalthres=0.05, plotType="Tailed", main="", xlab=NULL, ylab="", ...){ 41 | 42 | if(!is(x, "permTestResultsList")) stop("x must be a permTestResultsList object") 43 | 44 | if(is.na(ncol)) ncol <- floor(sqrt(length(x))) 45 | 46 | nrow <- ceiling(length(x)/ncol) 47 | 48 | old.par <- par(mfrow=c(nrow, ncol)) 49 | 50 | lapply(x, plot, ...) 51 | 52 | par(mfrow=old.par) 53 | 54 | } 55 | -------------------------------------------------------------------------------- /R/plot.localZScoreResults.R: -------------------------------------------------------------------------------- 1 | #' Plot localZscore results 2 | #' 3 | #' @description 4 | #' Function for plotting the a \code{localZScoreResults} object. 5 | #' 6 | #' @method plot localZScoreResults 7 | #' 8 | #' @param x an object of class \code{localZScoreResults}. 9 | #' @param main a character specifying the main title of the plot. Defaults to no title. 10 | #' @param num.x.labels a numeric specifying the number of ticks to label the x axis. The total number will be 2*num.x.labels + 1. Defaults to 5. 11 | #' @param ... further arguments to be passed to or from methods. 12 | #' 13 | #' @return A plot is created on the current graphics device. 14 | #' 15 | #' @seealso \code{\link{localZScore}} 16 | #' 17 | #' @examples 18 | #' 19 | #' genome <- filterChromosomes(getGenome("hg19"), keep.chr="chr1") 20 | #' A <- createRandomRegions(nregions=20, length.mean=10000000, length.sd=20000, genome=genome, non.overlapping=FALSE) 21 | #' B <- c(A, createRandomRegions(nregions=10, length.mean=100000, length.sd=20000, genome=genome, non.overlapping=FALSE)) 22 | #' 23 | #' pt <- overlapPermTest(A=A, B=B, ntimes=10, genome=genome, non.overlapping=FALSE) 24 | #' 25 | #' lz <- localZScore(A=A, B=B, pt=pt) 26 | #' plot(lz) 27 | #' 28 | # @keywords internal 29 | #' @export 30 | 31 | 32 | plot.localZScoreResults <- function(x, main="", num.x.labels=5, ...) { 33 | #Convert a number to a "human readable" label 34 | toLabel <- function(n) { 35 | if(abs(n) < 1000) return(as.character(n)) 36 | if(abs(n) < 1000000) return(paste0(as.character(round(n/10)/100), "Kb")) 37 | return(paste0(as.character(round(n/10000)/100), "Mb")) 38 | } 39 | 40 | if(nchar(main)==0) main <- "Local z-score" 41 | 42 | old.scipen <- options("scipen")$scipen 43 | options(scipen=999) 44 | on.exit(options(scipen=old.scipen), add=TRUE) 45 | 46 | #Set the positions for the x labels 47 | if(num.x.labels < 1) { 48 | x.lab.pos <- 0 49 | } else { 50 | x.lab.dist <- floor(x$window/num.x.labels) 51 | x.lab.pos <- (1:num.x.labels)*x.lab.dist 52 | x.lab.pos <- c(rev(-1*x.lab.pos), 0, x.lab.pos) 53 | } 54 | x.labs <- sapply(x.lab.pos, toLabel) 55 | 56 | y.max <- max(x$shifted.z.scores, 2) 57 | y.min <- min(x$shifted.z.scores, -2) 58 | plot(x=x$shifts, y=x$shifted.z.scores, type="l", ylim=c(y.min, y.max), ylab="Shifted z-scores", xlab="Shifts", main=main, xaxt="n", las=1, ...) 59 | if(num.x.labels != 0) { 60 | axis(1, at=x.lab.pos, labels=x.labs, las=2, cex.axis=0.7, tck=-.01, ...) 61 | } 62 | box(lwd=1.2) 63 | 64 | } 65 | -------------------------------------------------------------------------------- /R/getGenomeAndMask.R: -------------------------------------------------------------------------------- 1 | #' getGenomeAndMask 2 | #' 3 | #' @description 4 | #' Function to obtain a valid genome and mask pair given a valid genome identifier and optionally a mask. 5 | #' 6 | #' If the genome is not a \code{\link{BSgenome}} object or a character string uniquely identifying a \code{\link{BSgenome}} package installed, it will return the genome "as is". If a mask is provided, it will simply return it. Otherwise it will return the mask returned by \code{\link{getMask}(genome)} or an empty mask if genome is not a valid \code{\link{BSgenome}} or \code{\link{BSgenome}} identifier. 7 | #' 8 | #' @note 9 | #' This function is memoised (cached) using the \code{\link{memoise}} package. To empty the cache, use \code{\link{forget}(getGenomeAndMask)} 10 | #' 11 | #' 12 | #' @usage getGenomeAndMask(genome, mask=NULL) 13 | # @usage getGenomeAndMask(...) 14 | #' 15 | #' @param genome the genome object or genome identifier. 16 | #' @param mask the mask of the genome in a valid RS format (data.frame, GRanges, BED-like file...). If mask is \code{\link{NULL}}, it will try to get a mask from the genome. If mask is \code{\link{NA}} it will return an empty mask. (Default=NULL) 17 | #' 18 | #' @return 19 | #' A list with two elements: genome and mask. Genome and mask are GRanges objects. 20 | #' 21 | #' @seealso \code{\link{getMask}}, \code{\link{getGenome}}, \code{\link{characterToBSGenome}}, \code{\link{maskFromBSGenome}}, \code{\link{emptyCacheRegioneR}} 22 | #' 23 | #' @examples 24 | #' getGenomeAndMask("hg19", mask=NA) 25 | #' 26 | #' getGenomeAndMask(genome=data.frame(c("chrA", "chrB"), c(15000000, 10000000)), mask=NA) 27 | #' 28 | #' @export getGenomeAndMask 29 | 30 | getGenomeAndMask <- memoise::memoise(function(genome, mask=NULL) { 31 | 32 | 33 | #if genome is a character, get it from the BS packages 34 | if(is.character(genome)) { 35 | genome <- characterToBSGenome(genome) 36 | } 37 | 38 | 39 | if(is(genome, "MaskedBSgenome") && is.null(mask)) { 40 | mask <- getMask(genome) 41 | } else { 42 | #check if it seems to be a valid mask 43 | # try to create a GRanges object with it, if it works, assume its valid. If not, return an empty mask 44 | mask <- try(exp=toGRanges(mask), silent=TRUE) 45 | if(!(is(mask, "GenomicRanges"))) {mask <- toGRanges(data.frame(chr=character(), start=numeric(), end=numeric())) } 46 | 47 | } 48 | 49 | genome <- getGenome(genome) 50 | 51 | #finally, adjust the seqlevels of mask and genome 52 | seqlevels(mask)<-seqlevels(genome) 53 | levels(seqnames(mask))<-seqlevels(genome) 54 | 55 | return(list(mask=mask, genome=genome)) 56 | }) 57 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(plot,localZScoreResults) 4 | S3method(plot,localZScoreResultsList) 5 | S3method(plot,permTestResults) 6 | S3method(plot,permTestResultsList) 7 | S3method(print,permTestResults) 8 | S3method(summary,permTestResults) 9 | S3method(summary,permTestResultsList) 10 | export(characterToBSGenome) 11 | export(circularRandomizeRegions) 12 | export(commonRegions) 13 | export(createFunctionsList) 14 | export(createRandomRegions) 15 | export(emptyCacheRegioneR) 16 | export(extendRegions) 17 | export(filterChromosomes) 18 | export(getChromosomesByOrganism) 19 | export(getGenome) 20 | export(getGenomeAndMask) 21 | export(getMask) 22 | export(joinRegions) 23 | export(listChrTypes) 24 | export(localZScore) 25 | export(maskFromBSGenome) 26 | export(meanDistance) 27 | export(meanInRegions) 28 | export(mergeRegions) 29 | export(numOverlaps) 30 | export(overlapGraphicalSummary) 31 | export(overlapPermTest) 32 | export(overlapRegions) 33 | export(permTest) 34 | export(plotRegions) 35 | export(randomizeRegions) 36 | export(recomputePermTest) 37 | export(resampleGenome) 38 | export(resampleRegions) 39 | export(splitRegions) 40 | export(subtractRegions) 41 | export(toDataframe) 42 | export(toGRanges) 43 | export(uniqueRegions) 44 | import(BSgenome) 45 | import(GenomeInfoDb) 46 | import(graphics) 47 | import(parallel) 48 | importFrom(Biostrings,collapse) 49 | importFrom(Biostrings,masks) 50 | importFrom(GenomeInfoDb,seqlevels) 51 | importFrom(GenomeInfoDb,seqnames) 52 | importFrom(GenomicRanges,GRanges) 53 | importFrom(GenomicRanges,GRangesList) 54 | importFrom(GenomicRanges,countOverlaps) 55 | importFrom(GenomicRanges,distanceToNearest) 56 | importFrom(GenomicRanges,elementMetadata) 57 | importFrom(GenomicRanges,end) 58 | importFrom(GenomicRanges,findOverlaps) 59 | importFrom(GenomicRanges,mcols) 60 | importFrom(GenomicRanges,reduce) 61 | importFrom(GenomicRanges,resize) 62 | importFrom(GenomicRanges,shift) 63 | importFrom(GenomicRanges,start) 64 | importFrom(GenomicRanges,tile) 65 | importFrom(GenomicRanges,width) 66 | importFrom(IRanges,IRanges) 67 | importFrom(IRanges,tolower) 68 | importFrom(S4Vectors,Rle) 69 | importFrom(S4Vectors,queryHits) 70 | importFrom(S4Vectors,subjectHits) 71 | importFrom(memoise,forget) 72 | importFrom(memoise,memoise) 73 | importFrom(methods,hasArg) 74 | importFrom(methods,is) 75 | importFrom(rtracklayer,import) 76 | importFrom(stats,dnorm) 77 | importFrom(stats,qnorm) 78 | importFrom(stats,rnorm) 79 | importFrom(stats,runif) 80 | importFrom(stats,sd) 81 | importFrom(stats,setNames) 82 | importFrom(tools,file_ext) 83 | importFrom(utils,as.roman) 84 | importFrom(utils,head) 85 | importFrom(utils,setTxtProgressBar) 86 | importFrom(utils,txtProgressBar) 87 | -------------------------------------------------------------------------------- /R/getChromosomesByOrganism.R: -------------------------------------------------------------------------------- 1 | #' getChromosomesByOrganism 2 | #' 3 | #' @description 4 | #' Function to obtain a list of organisms with their canonical and (when applicable) the autosomal chromosome names. 5 | #' This function is not usually used by the end user directly but through the filterChromosomes function. 6 | #' 7 | #' 8 | #' @usage getChromosomesByOrganism() 9 | #' 10 | #' 11 | #' @return 12 | #' a list with the organism as keys and the list of available chromosome sets as values 13 | #' 14 | #' @seealso \code{\link{getGenome}}, \code{\link{filterChromosomes}} 15 | #' 16 | #' @examples 17 | #' 18 | #' chrsByOrg <- getChromosomesByOrganism() 19 | #' chrsByOrg[["hg"]] 20 | #' chrsByOrg[["hg"]][["autosomal"]] 21 | #' 22 | #' @export getChromosomesByOrganism 23 | #' @importFrom utils as.roman 24 | 25 | 26 | getChromosomesByOrganism <-function() { 27 | 28 | chromosomesByOrganism <- list( 29 | hg = list(autosomal=paste0("chr", c(1:22)), 30 | canonical=paste0("chr",c(1:22,"X","Y")), 31 | org.name=("Homo sapiens")), 32 | 33 | mm = list(autosomal=paste0("chr",c(1:19)), 34 | canonical=paste0("chr",c(1:19,"X","Y")), 35 | org.name=("Mus musculus")), 36 | 37 | bosTau = list(autosomal=paste0("chr",c(1:29)), 38 | canonical=paste0("chr",c(1:29,"X","Y")), 39 | org.name=("Bos taurus")), 40 | 41 | ce = list(autosomal=paste0("chr",c(1:5)), 42 | canonical=paste0("chr",c(1:5,"X","Y")), 43 | org.name=("Caenorhabditis elegans")), 44 | 45 | danRer = list(canonical=paste0("chr",c(1:25)), 46 | org.name=("Danio rerio")), 47 | 48 | rheMac = list(autosomal=paste0("chr",c(1:20)), 49 | canonical=paste0("chr",c(1:20,"X","Y")), 50 | org.name=("Macaca mulata")), 51 | 52 | rn = list(autosomal=paste0("chr",c(1:20)), 53 | canonical=paste0("chr",c(1:20,"X","Y")), 54 | org.name=("Rattus norvegicus")), 55 | 56 | sacCer = list(autosomal=paste0("chr",c(as.character(as.roman(1:16)), "M")), 57 | canonical=paste0("chr",c(as.character(as.roman(1:16)), "M")), 58 | org.name=("Saccharomyces cerevisiae")), 59 | 60 | dm = list(autosomal=paste0("chr",c("2L","2R","3L","3R","4")), 61 | canonical=paste0("chr",c("2L","2R","3L","3R","4","X")), 62 | org.name=("Drosophila melanogaster")), 63 | 64 | panTro = list(autosomal=paste0("chr",c(1,"2A","2B",3:22)), 65 | canonical=paste0("chr",c(1,"2A","2B",3:22,"X","Y")), 66 | org.name=("Pan troglodytes")) 67 | ) 68 | 69 | return(chromosomesByOrganism) 70 | } 71 | -------------------------------------------------------------------------------- /man/circularRandomizeRegions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/circularRandomizeRegions.R 3 | \name{circularRandomizeRegions} 4 | \alias{circularRandomizeRegions} 5 | \title{Circular Randomize Regions} 6 | \usage{ 7 | circularRandomizeRegions(A, genome="hg19", mask=NULL, max.mask.overlap=NULL, max.retries=10, verbose=TRUE, ...) 8 | } 9 | \arguments{ 10 | \item{A}{The set of regions to randomize. A region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)} 11 | 12 | \item{genome}{The reference genome to use. A valid genome object. Either a \code{\link{GenomicRanges}} or \code{\link{data.frame}} containing one region per whole chromosome or a character uniquely identifying a genome in \code{\link{BSgenome}} (e.g. "hg19", "mm10" but not "hg"). Internally it uses \code{\link{getGenomeAndMask}}.} 13 | 14 | \item{mask}{The set of regions specifying where a random region can not be (centromeres, repetitive regions, unmappable regions...). A region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}},\code{\link{data.frame}}, ...). If \code{\link{NULL}} it will try to derive a mask from the genome (currently only works is the genome is a character string) and if \code{\link{NA}} it will explicitly give an empty mask.} 15 | 16 | \item{max.mask.overlap}{numeric value} 17 | 18 | \item{max.retries}{numeric value} 19 | 20 | \item{verbose}{a boolean.} 21 | 22 | \item{...}{further arguments to be passed to or from methods.} 23 | } 24 | \value{ 25 | It returns a \code{\link{GenomicRanges}} object with the regions resulting from the randomization process. 26 | } 27 | \description{ 28 | Given a set of regions A and a genome, this function returns a new set of regions created by applying a random 29 | spin to each chromosome. 30 | } 31 | \details{ 32 | This randomization strategy is useful when the spatial relation between the regions in the RS is important and has to be conserved. 33 | } 34 | \examples{ 35 | A <- data.frame("chr1", c(1, 10, 20, 30), c(12, 13, 28, 40)) 36 | 37 | mask <- data.frame("chr1", c(20000000, 100000000), c(22000000, 130000000)) 38 | 39 | genome <- data.frame(c("chr1", "chr2"), c(1, 1), c(180000000, 20000000)) 40 | 41 | circularRandomizeRegions(A) 42 | 43 | circularRandomizeRegions(A, genome=genome, mask=mask, per.chromosome=TRUE, non.overlapping=TRUE) 44 | 45 | } 46 | \seealso{ 47 | \code{\link{randomizeRegions}}, \code{\link{toDataframe}}, \code{\link{toGRanges}}, \code{\link{getGenome}}, \code{\link{getMask}}, \code{\link{getGenomeAndMask}}, \code{\link{characterToBSGenome}}, \code{\link{maskFromBSGenome}}, \code{\link{resampleRegions}}, \code{\link{createRandomRegions}} 48 | } 49 | -------------------------------------------------------------------------------- /R/meanInRegions.R: -------------------------------------------------------------------------------- 1 | #' Mean In Regions 2 | #' 3 | #' @description 4 | #' Returns the mean of a value defined by a region set over another set of regions. 5 | #' 6 | #' @usage 7 | #' meanInRegions(A, x, col.name=NULL, ...) 8 | #' 9 | #' @param A a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...) 10 | #' @param x a region set in any of the accepted formats with an additional column with a value associated to every region. Regions in \code{x} can be points (single base regions). 11 | #' @param col.name character indicating the name of the column. If NULL and if a column with the name "value" exist, it will be used. The 4th column will be used otherwise (or the 5th if 4th is the strand). 12 | #' @param ... any additional parameter needed 13 | #' 14 | #' @return 15 | #' It returns a numeric value that is the weighted mean of "value" defined in \code{x} over the regions in \code{A}. That is, the mean of the value of all 16 | #' regions in \code{x} overlapping each region in \code{A} weighted according to the number of bases overlapping. 17 | #' 18 | #' @seealso \code{\link{permTest}} 19 | #' 20 | #' @examples 21 | #' 22 | #' A <- data.frame("chr1", c(1, 10, 20, 30), c(12, 13, 28, 40)) 23 | #' 24 | #' positions <- sample(1:40,30) 25 | #' 26 | #' x <- data.frame("chr1", positions, positions, rnorm(30,4,1)) 27 | #' 28 | #' meanInRegions(A, x) 29 | #' 30 | #' x <- GRanges(seqnames=x[,1],ranges=IRanges(x[,2],end=x[,2]),mcols=x[,3]) 31 | #' 32 | #' meanInRegions(A, x) 33 | #' 34 | #' @export meanInRegions 35 | #' 36 | #' @importFrom GenomicRanges mcols 37 | 38 | 39 | 40 | 41 | meanInRegions <- function(A, x, col.name=NULL, ...) { 42 | 43 | if(!hasArg(A)) stop("A is missing") 44 | if(!hasArg(x)) stop("x is missing") 45 | 46 | A <- toGRanges(A) 47 | x <- toGRanges(x) 48 | 49 | if(length(GenomicRanges::mcols(x))<1) { 50 | stop("x does not have a values column") 51 | } 52 | 53 | if(!is.null(col.name)) { 54 | value.col <-grep(col.name, names(mcols(x))) 55 | if(length(value.col) > 0) { 56 | value.col <- value.col[1] 57 | } else { 58 | value.col <- 1 59 | } 60 | } else { 61 | value.col <- 1 62 | } 63 | 64 | if(!is.numeric(GenomicRanges::mcols(x)[,value.col])) { 65 | stop("the values column in x is not numeric") 66 | } 67 | 68 | value.col.name <- names(GenomicRanges::mcols(x))[value.col] 69 | 70 | over <- overlapRegions(A=A, B=x, colB=value.col, get.bases=TRUE) 71 | 72 | if(length(over)==0) { 73 | warning("NA returned. There is no overlap between x and A.") 74 | return(NA) 75 | } 76 | 77 | total.value <- sum(as.numeric(over$ov.bases * over[,value.col.name])) #Using as.numeric to escape possible integer overflows 78 | total.overlap <- sum(as.numeric(over$ov.bases)) 79 | 80 | return(total.value/total.overlap) 81 | 82 | } 83 | 84 | 85 | -------------------------------------------------------------------------------- /R/resampleGenome.R: -------------------------------------------------------------------------------- 1 | #' resampleGenome 2 | #' 3 | #' Fast alternative to randomizeRegions. It creates a tiling (binning) of the whole genome 4 | #' with tiles the mean size of the regions in A and then places the regions by sampling a 5 | #' length(A) number of tiles and placing the resampled regions there. 6 | #' 7 | #' 8 | #' @usage resampleGenome(A, simple = FALSE, per.chromosome = FALSE, genome="hg19", min.tile.width=1000, ...) 9 | #' 10 | #' @param A an object of class GenomigRanges 11 | #' @param simple logical, if TRUE the randomization process will not take into account the specific width of each region in A. (defalut = FALSE) 12 | #' @param per.chromosome logical, if TRUE the randomization will be perform by chromosome. (default = TRUE) 13 | #' @param genome character or GenomicRanges, genome using for the randomization 14 | #' @param min.tile.width integer, the minimum size of the genome tiles. If they are too small, the functions gets very slow and may even fail to work. (default = 1000, 1kb tiles) 15 | #' @param ... further arguments to be passed to other methods. 16 | #' 17 | #' 18 | #' @return a \code{\link{GenomicRanges}} object. A sample from the \code{universe} with the same length as A. 19 | #' 20 | #' @seealso \code{\link{toDataframe}}, \code{\link{toGRanges}}, \code{\link{randomizeRegions}}, \code{\link{createRandomRegions}} 21 | #' 22 | #' 23 | #' @examples 24 | #' 25 | #' A <- data.frame(chr=1, start=c(2,12,28,35), end=c(5,25,33,43)) 26 | #' 27 | #' B <- resampleGenome(A) 28 | #' B 29 | #' width(B) 30 | #' 31 | #' B2 <- resampleGenome(A, simple=TRUE) 32 | #' B2 33 | #' width(B2) 34 | #' 35 | #' resampleGenome(A, per.chromosome=TRUE) 36 | #' 37 | #' 38 | #' @importFrom GenomeInfoDb seqlevels 39 | #' @importFrom GenomeInfoDb seqnames 40 | #' @importFrom GenomicRanges width 41 | #' @importFrom GenomicRanges tile 42 | #' @importFrom GenomicRanges resize 43 | #' 44 | #' @export resampleGenome 45 | #' 46 | 47 | 48 | 49 | 50 | resampleGenome <- function(A, simple = FALSE, per.chromosome = FALSE, genome = "hg19", min.tile.width=1000, ...) { 51 | 52 | if (!methods::hasArg(A)) { 53 | stop("A is missing") 54 | } 55 | 56 | if (!is.logical(per.chromosome)) { 57 | stop("per.chromosome must be logical") 58 | } 59 | 60 | 61 | A <- toGRanges(A, genome=genome) 62 | 63 | #Build the universe by genome tiling 64 | mwidth <- round(mean(GenomicRanges::width(A))) 65 | universe <- unlist(GenomicRanges::tile(getGenome(genome), width = max(mwidth, min.tile.width))) 66 | 67 | #Call resample regions 68 | resampled <- resampleRegions(A=A, universe = universe, per.chromosome = per.chromosome, ...) 69 | 70 | #And resize the selected regions as needed 71 | if(simple == TRUE) { 72 | resampled <- GenomicRanges::resize(resampled, width = mwidth, fix = "center", use.names = FALSE) 73 | } else { 74 | resampled <- GenomicRanges::resize(resampled, width = GenomicRanges::width(A), fix = "center", use.names = FALSE) 75 | } 76 | 77 | return(resampled) 78 | } 79 | -------------------------------------------------------------------------------- /man/createRandomRegions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/createRandomRegions.R 3 | \name{createRandomRegions} 4 | \alias{createRandomRegions} 5 | \title{Create Random Regions} 6 | \usage{ 7 | createRandomRegions(nregions=100, length.mean=250, length.sd=20, genome="hg19", mask=NULL, non.overlapping=TRUE) 8 | } 9 | \arguments{ 10 | \item{nregions}{The number of regions to be created.} 11 | 12 | \item{length.mean}{The mean size of the regions created. This is not guaranteed to be the mean of the final region set. See note.} 13 | 14 | \item{length.sd}{The standard deviation of the region size. This is not guaranteed to be the standard deviation of the final region set. See note.} 15 | 16 | \item{genome}{The reference genome to use. A valid genome object. Either a \code{\link{GenomicRanges}} or \code{\link{data.frame}} containing one region per whole chromosome or a character uniquely identifying a genome in \code{\link{BSgenome}} (e.g. "hg19", "mm10" but not "hg"). Internally it uses \code{\link{getGenomeAndMask}}.} 17 | 18 | \item{mask}{The set of regions specifying where a random region can not be (centromeres, repetitive regions, unmappable regions...). A region set in any of the accepted formats (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, ...). \code{\link{NULL}} will try to derive a mask from the genome (currently only works is the genome is a character string) and \code{\link{NA}} explicitly gives an empty mask.} 19 | 20 | \item{non.overlapping}{A boolean stating whether the random regions can overlap (FALSE) or not (TRUE).} 21 | } 22 | \value{ 23 | It returns a \code{\link{GenomicRanges}} object with the regions resulting from the randomization process. 24 | } 25 | \description{ 26 | Creates a set of random regions with a given mean size and standard deviation. 27 | } 28 | \details{ 29 | A set of nregions will be created and randomly placed over the genome. The lengths of the region set will follow a normal distribution with a mean size \code{length.mean} and a standard deviation \code{length.sd}. The new regions can be made explicitly non overlapping by setting \code{non.overlapping} to TRUE. 30 | A mask can be provided so no regions fall in a forbidden part of the genome. 31 | } 32 | \note{ 33 | If the standard deviation of the length is large with respect to the mean, negative lengths might be created. These region lengths will be 34 | transfromed to into a 1 and so the, for large standard deviations the mean and sd of the lengths are not guaranteed to be the ones in the parameters. 35 | } 36 | \examples{ 37 | genome <- data.frame(c("chr1", "chr2"), c(1, 1), c(180000000, 20000000)) 38 | mask <- data.frame("chr1", c(20000000, 100000000), c(22000000, 130000000)) 39 | 40 | createRandomRegions(nregions=10, length.mean=1000, length.sd=500) 41 | 42 | createRandomRegions(nregions=10, genome=genome, mask=mask, non.overlapping=TRUE) 43 | 44 | } 45 | \seealso{ 46 | \code{\link{getGenome}}, \code{\link{getMask}}, \code{\link{getGenomeAndMask}}, \code{\link{characterToBSGenome}}, \code{\link{maskFromBSGenome}}, \code{\link{randomizeRegions}}, \code{\link{resampleRegions}} 47 | } 48 | -------------------------------------------------------------------------------- /R/filterChromosomes.R: -------------------------------------------------------------------------------- 1 | #' filterChromosomes 2 | #' 3 | #' @description 4 | #' Filters the chromosomes in a region set. It can either filter using a predefined chromosome set (e.g. "autosomal 5 | #' chromosomes in Homo sapiens") or using a custom chromosome set (e.g. only chromosomes "chr22" and "chrX") 6 | #' 7 | #' @usage filterChromosomes(A, organism="hg", chr.type="canonical", keep.chr=NULL) 8 | #' 9 | #' @param A a region set in any of the formats accepted by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...) 10 | #' @param organism a character indicating the organism from which to get the predefined chromosome sets. It can be the organism code as used in \code{\link{BSgenome}} (e.g. hg for human, mm for mouse...) or the full genome assembly identifier, since any digit will be removed to get the organism code. 11 | #' @param chr.type a character indicating the specific chromosome set to be used. Usually "autosomal" or "canonical", althought other values could be available for certain organisms. 12 | #' @param keep.chr is a character vector stating the names of the chromosomes to keep. Any chromosome not in the vector will be filtered out. If keep.chr is supplied, organism and chr.type are ignored. 13 | #' 14 | #' @return 15 | #' A \code{\link{GRanges}} object containing only the regions in the original region set belonging to the selected chromosomes. All regions in non selected chromosomes are removed. 16 | #' 17 | #' @seealso \code{\link{getGenomeAndMask}}, \code{\link{listChrTypes}} \code{\link{getChromosomesByOrganism}} 18 | #' 19 | #' @examples 20 | #' 21 | #' g <- getGenomeAndMask("hg19")$genome 22 | #' listChrTypes() 23 | #' g <- filterChromosomes(g, chr.type="autosomal", organism="hg19") 24 | #' g <- filterChromosomes(g, keep.chr=c("chr1", "chr2", "chr3")) 25 | #' 26 | #' 27 | #' @export filterChromosomes 28 | 29 | 30 | filterChromosomes <-function(A, organism="hg", chr.type="canonical", keep.chr=NULL) { 31 | 32 | A <- toGRanges(A) 33 | 34 | if(chr.type == "custom" | !is.null(keep.chr)){ 35 | valid.chr <- keep.chr 36 | } else { 37 | org <- getChromosomesByOrganism() 38 | org.code <- gsub("\\d","", organism) #The name of the organism is assumed to be the assembly identifier minus the digits e.g. hg19 -> hg 39 | 40 | if (org.code %in% names(org)) { 41 | if (chr.type %in% names(org[[org.code]]) & (chr.type != "org.name")) { 42 | valid.chr<-as.character(org[[org.code]][chr.type][[1]]) 43 | } else { 44 | valid.types <- names(org[[org.code]]) 45 | valid.types <- valid.types[valid.types != "org.name"] 46 | stop(paste("Chromosome type ", chr.type, " for organism", org.code, " (", organism, ") not recognized. Available valid values for ", org.code, " are: ", paste(valid.types, collapse=", "))) 47 | } 48 | } else { 49 | stop(paste("In filterChromosomes: Organism ", org.code, " (", organism, ") not recognized. Available organisms are: ", paste(names(org), collapse=", "))) 50 | } 51 | } 52 | 53 | valid.chr <- valid.chr[valid.chr %in% seqlevels(A)] 54 | A <- keepSeqlevels(A, valid.chr, pruning.mode="coarse") 55 | return(A) 56 | 57 | } -------------------------------------------------------------------------------- /R/maskFromBSGenome.R: -------------------------------------------------------------------------------- 1 | #' maskFromBSGenome 2 | #' 3 | #' @description 4 | #' Extracts the merge of all the active masks from a \code{\link{BSgenome}} 5 | #' 6 | #' @note 7 | #' This function is memoised (cached) using the \code{\link{memoise}} package. To empty the cache, use \code{\link{forget}(maskFromBSGenome)} 8 | #' 9 | #' @usage maskFromBSGenome(bsgenome) 10 | # @usage maskFromBSGenome(...) 11 | #' 12 | #' @param bsgenome a \code{\link{BSgenome}} object 13 | #' 14 | #' @return 15 | #' A \code{\link{GRanges}} object with the active mask in the \code{\link{BSgenome}} 16 | #' 17 | #' @seealso \code{\link{getGenomeAndMask}}, \code{\link{characterToBSGenome}}, \code{\link{emptyCacheRegioneR}} 18 | #' 19 | #' @examples 20 | #' g <- characterToBSGenome("hg19") 21 | #' 22 | #' maskFromBSGenome(g) 23 | #' 24 | #' @export maskFromBSGenome 25 | #' 26 | #' @importFrom Biostrings masks collapse 27 | #' @importFrom S4Vectors Rle 28 | 29 | maskFromBSGenome <- memoise::memoise(function(bsgenome) { 30 | 31 | 32 | if(!methods::hasArg(bsgenome)) stop("parameter bsgenome is required") 33 | if(!methods::is(bsgenome, "BSgenome")) stop("bsgenome must be a BSGenome object") 34 | 35 | if(!methods::is(bsgenome, "MaskedBSgenome")) { 36 | warning("bsgenome is not a MaskedBSgenome. Returning an empty mask.") 37 | return(GenomicRanges::GRanges()) 38 | } 39 | 40 | 41 | #WARNING: This is ugly. Since I have not found a way to extract the positions 42 | #of the masks from a BSGenome object in a simple way, 43 | # we are doing it by iterating over the chromosomes 44 | 45 | #get the chromosome names using the getGenomes function, so we get exactly the same chromosomes 46 | chrs <- as.character(GenomicRanges::seqnames(getGenome(bsgenome))) 47 | 48 | 49 | chr.masks <- sapply(chrs, function(chr) { 50 | mm <- Biostrings::masks(bsgenome[[chr]]) 51 | if(is.null(mm)) { 52 | return(NULL) 53 | } else { 54 | mm <- Biostrings::collapse(mm)[[1]] 55 | return(mm) 56 | }}) 57 | 58 | if(do.call(all, lapply(chr.masks, is.null))) { #If the mask is null for all chromosomes, rise a warning and return an empty GRanges 59 | warning("No mask is active for this BSgenome. Returning an empty mask.") 60 | return(GenomicRanges::GRanges()) 61 | } 62 | 63 | 64 | chr.masks <- sapply(chrs, function(chr) { 65 | if(is.null(chr.masks[[chr]])) { 66 | return(NULL) 67 | } else { 68 | return(GenomicRanges::GRanges(seqnames=S4Vectors::Rle(rep(chr, length(chr.masks[[chr]]))), ranges=chr.masks[[chr]], seqinfo = seqinfo(bsgenome))) 69 | } 70 | }) 71 | 72 | #Combine the mask for each chromosome into a single mask 73 | mask <- GenomicRanges::GRanges(seqinfo = seqinfo(bsgenome)) 74 | for(chr in chrs) { 75 | if(!is.null(chr.masks[[chr]])) { 76 | suppressWarnings(mask <- c(mask, chr.masks[[chr]])) 77 | } 78 | } 79 | 80 | return(mask) 81 | }) 82 | 83 | -------------------------------------------------------------------------------- /man/randomizeRegions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/randomizeRegions.R 3 | \name{randomizeRegions} 4 | \alias{randomizeRegions} 5 | \title{Randomize Regions} 6 | \usage{ 7 | randomizeRegions(A, genome="hg19", mask=NULL, allow.overlaps=TRUE, per.chromosome=FALSE, ...) 8 | } 9 | \arguments{ 10 | \item{A}{The set of regions to randomize. A region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)} 11 | 12 | \item{genome}{The reference genome to use. A valid genome object. Either a \code{\link{GenomicRanges}} or \code{\link{data.frame}} containing one region per whole chromosome or a character uniquely identifying a genome in \code{\link{BSgenome}} (e.g. "hg19", "mm10",... but not "hg"). Internally it uses \code{\link{getGenomeAndMask}}.} 13 | 14 | \item{mask}{The set of regions specifying where a random region can not be (centromeres, repetitive regions, unmappable regions...). A region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}},\code{\link{data.frame}}, ...). If \code{\link{NULL}} it will try to derive a mask from the genome (currently only works if the genome is a character string). If \code{\link{NA}} it gives, explicitly, an empty mask.} 15 | 16 | \item{allow.overlaps}{A boolean stating whether the random regions can overlap (FALSE) or not (TRUE).} 17 | 18 | \item{per.chromosome}{Boolean. If TRUE, the regions will be created in a per chromosome maner -every region in A will be moved into a random position at the same chromosome where it was originally-.} 19 | 20 | \item{...}{further arguments to be passed to or from methods.} 21 | } 22 | \value{ 23 | It returns a \code{\link{GenomicRanges}} object with the regions resulting from the randomization process. 24 | } 25 | \description{ 26 | Given a set of regions A and a genome, this function returns a new set of regions randomly distributted in the genome. 27 | } 28 | \details{ 29 | The new set of regions will be created with the same sizes of the original ones, and optionally placed in the same chromosomes. 30 | 31 | In addition, they can be made explicitly non overlapping and a mask can be provided so no regions fall in an undesirable part of the genome. 32 | } 33 | \note{ 34 | randomizeRegions assumes that chromosomes start at base 1. If a 35 | chromosome starts at another base number, for example at base 1000, random 36 | regions might appear in the [1:1000] interval. This should not affect most 37 | uses of randomizeRegions, but might be important in some advanced analysis 38 | involving artificially contructed genomes. 39 | } 40 | \examples{ 41 | A <- data.frame("chr1", c(1, 10, 20, 30), c(12, 13, 28, 40)) 42 | 43 | mask <- data.frame("chr1", c(20000000, 100000000), c(22000000, 130000000)) 44 | 45 | genome <- data.frame(c("chr1", "chr2"), c(1, 1), c(180000000, 20000000)) 46 | 47 | randomizeRegions(A) 48 | 49 | randomizeRegions(A, genome=genome, mask=mask, per.chromosome=TRUE, allow.overlaps=FALSE) 50 | 51 | } 52 | \seealso{ 53 | \code{\link{toDataframe}}, \code{\link{toGRanges}}, \code{\link{getGenome}}, \code{\link{getMask}}, \code{\link{getGenomeAndMask}}, \code{\link{characterToBSGenome}}, \code{\link{maskFromBSGenome}}, \code{\link{resampleRegions}}, \code{\link{createRandomRegions}}, \code{\link{circularRandomizeRegions}} 54 | } 55 | -------------------------------------------------------------------------------- /man/overlapPermTest.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/overlapPermTest.R 3 | \name{overlapPermTest} 4 | \alias{overlapPermTest} 5 | \title{Permutation Test for Overlap} 6 | \usage{ 7 | overlapPermTest (A, B, alternative="auto", ...) 8 | } 9 | \arguments{ 10 | \item{A}{a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)} 11 | 12 | \item{B}{a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)} 13 | 14 | \item{alternative}{the alternative hypothesis must be one of \code{"greater"}, \code{"less"} or \code{"auto"}. If \code{"auto"}, the alternative will be decided depending on the data.} 15 | 16 | \item{...}{further arguments to be passed to or from methods.} 17 | } 18 | \value{ 19 | A list of class \code{permTestResults} containing the following components: 20 | \itemize{ 21 | \item \bold{\code{pval}} the p-value of the test. 22 | \item \bold{\code{ntimes}} the number of permutations. 23 | \item \bold{\code{alternative}} a character string describing the alternative hypotesis. 24 | \item \bold{\code{observed}} the value of the statistic for the original data set. 25 | \item \bold{\code{permuted}} the values of the statistic for each permuted data set. 26 | \item \bold{\code{zscore}} the value of the standard score. \code{(observed-\link{mean}(permuted))/\link{sd}(permuted)} 27 | } 28 | } 29 | \description{ 30 | Performs a permutation test to see if the overlap between two sets of regions 31 | A and B is higher (or lower) than expected by chance. It will internally 32 | call \code{\link{permTest}} with the appropiate parameters to perform the 33 | permutation test. If B is a list or a GRangesList, it will perform one 34 | permutation test per element of the list, testing the overlap between 35 | A and each element of B independently. 36 | } 37 | \note{ 38 | \bold{IMPORTANT:} Since it uses \code{link{permTest}} internally, it 39 | is possible to use most of the parameters of that function in 40 | \code{overlapPermTest}, including: \code{ntimes}, \code{force.parallel}, 41 | \code{min.parallel} and \code{verbose}. In addition, this function 42 | accepts most parameters of the \code{\link{randomizeRegions}} function 43 | including \code{genome}, \code{mask}, \code{allow.overlaps} and 44 | \code{per.chromosome} and the parameters of \code{\link{numOverlaps}} such 45 | as \code{count.once}. 46 | } 47 | \examples{ 48 | genome <- filterChromosomes(getGenome("hg19"), keep.chr="chr1") 49 | A <- createRandomRegions(nregions=20, length.mean=10000000, length.sd=20000, genome=genome, non.overlapping=FALSE) 50 | B <- c(A, createRandomRegions(nregions=10, length.mean=10000, length.sd=20000, genome=genome, non.overlapping=FALSE)) 51 | 52 | pt <- overlapPermTest(A=A, B=B, ntimes=10, genome=genome, non.overlapping=FALSE, verbose=TRUE) 53 | summary(pt) 54 | plot(pt) 55 | plot(pt, plotType="Tailed") 56 | 57 | 58 | C <- c(B, createRandomRegions(nregions=10, length.mean=10000, length.sd=20000, genome=genome, non.overlapping=FALSE)) 59 | pt <- overlapPermTest(A=A, B=list(B=B, C=C), ntimes=10, genome=genome, non.overlapping=FALSE, verbose=TRUE) 60 | summary(pt) 61 | plot(pt) 62 | 63 | } 64 | \seealso{ 65 | \code{\link{overlapGraphicalSummary}}, \code{\link{overlapRegions}}, \code{\link{toDataframe}}, \code{\link{toGRanges}}, \code{\link{permTest}} 66 | } 67 | -------------------------------------------------------------------------------- /R/getGenome.R: -------------------------------------------------------------------------------- 1 | #' getGenome 2 | #' 3 | #' @description 4 | #' 5 | #' Function to obtain a genome 6 | #' 7 | #' @return 8 | #' A GRanges object with the "genome" data c(Chromosome, Start (by default, 1), Chromosome Length) given a \code{\link{BSgenome}}, a genome name, a \code{\link{data.frame}} or a GRanges. 9 | #' 10 | #' @details 11 | #' 12 | #' If genome is a \code{\link{BSgenome}} (from the package \code{BioStrings}), it will transform it into a \code{\link{GRanges}} with chromosomes and chromosome lengths. 13 | #' 14 | #' If genome is a \code{\link{data.frame}} with 3 columns, it will transform it into a GRanges. 15 | #' 16 | #' If genome is a \code{\link{data.frame}} with 2 columns, it will assume the first is the chromosome, the second is the length of the chromosomes and will add 1 as start. 17 | #' 18 | #' If genome is a \code{character} string uniquely identifying a \code{\link{BSgenome}} installed in the system (e.g. "hg19", "mm10",... but not "hg"), it will create a genome based on the \code{\link{BSgenome}} object identified by the character string. 19 | #' 20 | #' If genome is a \code{ \link{GRanges}} object, it will return it as is. 21 | #' 22 | #' If genome is non of the above, it will give a warning and try to transform it into a GRanges using \link{toGRanges}. This can be helpful if \code{genome} is a connection to a file. 23 | #' 24 | # @usage getGenome(genome) <- Real Documentation. Problems with codoc 25 | #' 26 | #' @usage getGenome(genome) 27 | #' 28 | #' @param genome The genome object or genome identifier. 29 | #' 30 | #' @return 31 | #' A \code{\link{GRanges}} representing the genome with one region per chromosome. 32 | #' 33 | #' @note 34 | #' 35 | #' This function is memoised (cached) using the \code{\link{memoise}} package. To empty the cache, use \code{\link{forget}(getGenome)} 36 | #' 37 | #' Please note that passing this function the path to a file will not work, since it will assume the character is the identifier of a genome. To read the genome 38 | #' from a file, please use \code{getGenome(toGRanges("path/to/file"))} 39 | #' 40 | #' @seealso \code{\link{getMask}}, \code{\link{getGenomeAndMask}}, \code{\link{characterToBSGenome}}, \code{\link{maskFromBSGenome}}, \code{\link{emptyCacheRegioneR}} 41 | #' 42 | #' @examples 43 | #' getGenome("hg19") 44 | #' 45 | #' getGenome(data.frame(c("chrA", "chrB"), c(15000000, 10000000))) 46 | #' 47 | #' @export getGenome 48 | #' 49 | #' @importFrom memoise memoise 50 | 51 | 52 | getGenome <- memoise::memoise(function(genome) { 53 | 54 | if(!hasArg(genome)) {stop("No genome was specified. genome is a required parameter")} 55 | 56 | if(is(genome, "GRanges")) { 57 | return(genome) 58 | } 59 | 60 | #if specified as a character, get it from the BS packages 61 | if(is.character(genome)) { 62 | genome <- characterToBSGenome(genome) 63 | } 64 | 65 | if(is(genome, "BSgenome")) { #it may be a BS genome because it was originally or because it has been transformed from a chracter 66 | ss <- GenomeInfoDb::seqinfo(genome) 67 | return(toGRanges(as.character(ss@seqnames), 1, as.numeric(ss@seqlengths), genome = genome)) 68 | } 69 | 70 | 71 | #if the genome is a data frame (not GRanges) and has no starts but only lengths, add them 72 | if(is(genome, "data.frame") && dim(genome)[2]==2) { 73 | return(toGRanges(genome[,1], 1, as.numeric(genome[,2]))) 74 | } 75 | 76 | if(is(genome, "data.frame") && dim(genome)[2]==3) { 77 | return(toGRanges(genome)) 78 | } 79 | 80 | warning("Genome format not identified. Trying to to transform with toGRanges.") 81 | return(toGRanges(genome)) 82 | }) 83 | -------------------------------------------------------------------------------- /man/overlapRegions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/overlapRegions.R 3 | \name{overlapRegions} 4 | \alias{overlapRegions} 5 | \title{Overlap Regions} 6 | \usage{ 7 | overlapRegions(A, B, colA=NULL, colB=NULL, type="any", min.bases=1, min.pctA=NULL, min.pctB=NULL, get.pctA=FALSE, get.pctB=FALSE, get.bases=FALSE, only.boolean=FALSE, only.count=FALSE, ...) 8 | } 9 | \arguments{ 10 | \item{A}{a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)} 11 | 12 | \item{B}{a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)} 13 | 14 | \item{colA}{numeric vector indicating which columns of A the results will contain (default NULL)} 15 | 16 | \item{colB}{numeric vector indicating which columns of B the results will contain (default NULL)} 17 | 18 | \item{type}{\itemize{ 19 | \item{\code{AinB}: the region in A is contained in a region in B} 20 | \item{\code{BinA}: the region in B is contained in A} 21 | \item{\code{within}: the region in A or B is contained in a region in the other region set} 22 | \item{\code{equal}: the region in A has the same chromosome, start and end as a region in B} 23 | \item{\code{AleftB}: the end of the region from A overlaps the beginning of a region in B} 24 | \item{\code{ArightB}: the start of a region from A overlaps the end of a region in B} 25 | \item{\code{any}: any kind of overlap is returned} 26 | }} 27 | 28 | \item{min.bases}{numeric minimun number of bp accepted to define a overlap (default 1)} 29 | 30 | \item{min.pctA}{numeric minimun percentage of bases of A accepted to define a overlap (default NULL)} 31 | 32 | \item{min.pctB}{numeric minimun percentage of bases of B accepted to define a overlap (default NULL)} 33 | 34 | \item{get.pctA}{boolean if TRUE add a column in the results indicating the number percentage of A are involved in the overlap (default FALSE)} 35 | 36 | \item{get.pctB}{boolean if TRUE add a column in the results indicating the number percentage of B are involved in the overlap (default FALSE)} 37 | 38 | \item{get.bases}{boolean if TRUE add in the results the number of overlapped bases (default FALSE)} 39 | 40 | \item{only.boolean}{boolean if TRUE devolve as result a boolean vector containing the overlap state of each regions of A (default FALSE)} 41 | 42 | \item{only.count}{boolean if TRUE devolve as result the number of regions of A overlapping with B} 43 | 44 | \item{...}{any additional parameter (are there any left?)} 45 | } 46 | \value{ 47 | the default results is a \code{\link{data.frame}} with at least 5 columns "chr" indicating the chromosome of the appartenence of each overlap, "startA", "endA", "startB", "endB", indicating the coordinates of the region A and B for each overlap 48 | "type" that describe the nature of the overlap (see arguments "type") eventually other columns can be added (see see arguments "colA", "colB", "get.pctA", "get.pctB", "get.bases") 49 | } 50 | \description{ 51 | return overlap between 2 regios set A and B 52 | } 53 | \note{ 54 | The implementation uses when possible the \code{\link{countOverlaps}} function from \code{IRanges} package. 55 | } 56 | \examples{ 57 | A <- data.frame("chr1", c(1, 5, 20, 30), c(8, 13, 28, 40), x=c(1,2,3,4), y=c("a", "b", "c", "d")) 58 | 59 | B <- data.frame("chr1", 25, 35) 60 | 61 | overlapRegions(A, B) 62 | 63 | } 64 | \seealso{ 65 | \code{\link{plotRegions}}, \code{\link{toDataframe}}, \code{\link{toGRanges}}, \code{\link{subtractRegions}}, \code{\link{splitRegions}}, \code{\link{extendRegions}}, \code{\link{commonRegions}}, \code{\link{mergeRegions}}, \code{\link{joinRegions}} 66 | } 67 | -------------------------------------------------------------------------------- /R/createRandomRegions.R: -------------------------------------------------------------------------------- 1 | #' Create Random Regions 2 | #' 3 | #' @description 4 | #' Creates a set of random regions with a given mean size and standard deviation. 5 | #' 6 | #' @details 7 | #' A set of nregions will be created and randomly placed over the genome. The lengths of the region set will follow a normal distribution with a mean size \code{length.mean} and a standard deviation \code{length.sd}. The new regions can be made explicitly non overlapping by setting \code{non.overlapping} to TRUE. 8 | #' A mask can be provided so no regions fall in a forbidden part of the genome. 9 | #' 10 | #' @note 11 | #' If the standard deviation of the length is large with respect to the mean, negative lengths might be created. These region lengths will be 12 | #' transfromed to into a 1 and so the, for large standard deviations the mean and sd of the lengths are not guaranteed to be the ones in the parameters. 13 | #' 14 | #' @usage createRandomRegions(nregions=100, length.mean=250, length.sd=20, genome="hg19", mask=NULL, non.overlapping=TRUE) 15 | #' 16 | #' @param nregions The number of regions to be created. 17 | #' @param length.mean The mean size of the regions created. This is not guaranteed to be the mean of the final region set. See note. 18 | #' @param length.sd The standard deviation of the region size. This is not guaranteed to be the standard deviation of the final region set. See note. 19 | #' @param genome The reference genome to use. A valid genome object. Either a \code{\link{GenomicRanges}} or \code{\link{data.frame}} containing one region per whole chromosome or a character uniquely identifying a genome in \code{\link{BSgenome}} (e.g. "hg19", "mm10" but not "hg"). Internally it uses \code{\link{getGenomeAndMask}}. 20 | #' @param mask The set of regions specifying where a random region can not be (centromeres, repetitive regions, unmappable regions...). A region set in any of the accepted formats (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, ...). \code{\link{NULL}} will try to derive a mask from the genome (currently only works is the genome is a character string) and \code{\link{NA}} explicitly gives an empty mask. 21 | #' @param non.overlapping A boolean stating whether the random regions can overlap (FALSE) or not (TRUE). 22 | #' 23 | #' @return 24 | #' It returns a \code{\link{GenomicRanges}} object with the regions resulting from the randomization process. 25 | #' 26 | #' @seealso \code{\link{getGenome}}, \code{\link{getMask}}, \code{\link{getGenomeAndMask}}, \code{\link{characterToBSGenome}}, \code{\link{maskFromBSGenome}}, \code{\link{randomizeRegions}}, \code{\link{resampleRegions}} 27 | #' 28 | #' @examples 29 | #' genome <- data.frame(c("chr1", "chr2"), c(1, 1), c(180000000, 20000000)) 30 | #' mask <- data.frame("chr1", c(20000000, 100000000), c(22000000, 130000000)) 31 | #' 32 | #' createRandomRegions(nregions=10, length.mean=1000, length.sd=500) 33 | #' 34 | #' createRandomRegions(nregions=10, genome=genome, mask=mask, non.overlapping=TRUE) 35 | #' 36 | #' @export createRandomRegions 37 | 38 | 39 | createRandomRegions <- function(nregions=100, length.mean=250, length.sd=20, genome="hg19", mask=NULL, non.overlapping=TRUE) { 40 | 41 | if(!is.numeric(nregions)) stop("nregions must be numeric") 42 | if(!is.numeric(length.mean)) stop("length.mean must be numeric") 43 | if(!is.numeric(length.sd)) stop("length.sd must be numeric") 44 | 45 | gg <- getGenome(genome) 46 | 47 | #create a set of regions with the specified length distribution 48 | lengths<-rnorm(n=nregions, mean=length.mean, sd=length.sd) 49 | lengths[lengths<1] <- 1 50 | regs <- data.frame(chr=seqlevels(gg)[1], start=1, end=lengths, stringsAsFactors=FALSE) 51 | 52 | return(randomizeRegions(A=regs, genome=genome, mask=mask, allow.overlaps = !non.overlapping)) 53 | 54 | } 55 | 56 | -------------------------------------------------------------------------------- /R/createFunctionsList.R: -------------------------------------------------------------------------------- 1 | #' Create Functions List 2 | #' 3 | #' @description 4 | #' Partially applies (the standard Curry function in functional programming) a list of arguments 5 | #' to a function and returns a list of preapplied functions. The result of this function is a 6 | #' list of functions suitable for the multiple evaluation functions in permTest. 7 | #' 8 | #' 9 | #' @note 10 | #' It uses the code posted by "hadley" at http://stackoverflow.com/questions/6547219/how-to-bind-function-arguments 11 | #' 12 | #' @usage 13 | #' createFunctionsList(FUN, param.name, values, func.names) 14 | #' 15 | #' @param FUN Function. the function to be partially applied 16 | #' @param param.name Character. The name of the parameter to pre-set. 17 | #' @param values A list or vector of values to preassign. A function will be created for each of the values in values. If present, the names of the list will be the names of the functions. 18 | #' @param func.names Character. The names of the functions created. Useful to identify the functions created. Defaults to the names of the values list or to Function1, Function2... if the values list has no names. 19 | #' 20 | #' @return 21 | #' It returns a list of functions with parameter param.value pre-set to values. 22 | #' 23 | #' @seealso \code{\link{permTest}}, \code{\link{overlapPermTest}} 24 | #' 25 | #' @examples 26 | #' f <- function(a, b) { 27 | #' return(a+b) 28 | #' } 29 | #' 30 | #' funcs <- createFunctionsList(FUN=f, param.name="b", values=c(1,2,3), func.names=c("plusone", "plustwo", "plusthree")) 31 | #' 32 | #' funcs$plusone(2) 33 | #' funcs$plusone(10) 34 | #' funcs$plusthree(2) 35 | #' 36 | #' A <- createRandomRegions(nregions=20, length.mean=10000000, length.sd=0, mask=NA) 37 | #' B <- createRandomRegions(nregions=20, length.mean=10000000, length.sd=0, mask=NA) 38 | #' 39 | #' overlapsWith <- createFunctionsList(FUN=numOverlaps, param.name="B", values=list(a=A, b=B)) 40 | #' overlapsWith$a(A=A) 41 | #' overlapsWith$b(A=A) 42 | #' 43 | #' @export createFunctionsList 44 | 45 | 46 | createFunctionsList <- function(FUN, param.name, values, func.names=NULL) { 47 | if(!hasArg(FUN)) stop("FUN is missing") 48 | if(!hasArg(param.name)) stop("param.name is missing") 49 | if(!is.character(param.name)) stop("param.name must be a character") 50 | if(length(param.name)>1) stop("param.name must be a single character string") 51 | if(!hasArg(values)) stop("values is missing") 52 | if(!(is.list(values) | is.vector(values))) stop("values must be a list or a vector") 53 | 54 | 55 | if(is.null(func.names)) { 56 | if(is.list(values) && !is.null(names(values))) { 57 | if(any(duplicated(names(values)))) stop("the names of the values list must be unique") 58 | func.names <- names(values) 59 | } else { 60 | func.names <- paste0("Function", seq_len(length(values))) 61 | } 62 | } 63 | 64 | curried.funcs <- list() 65 | for(i in seq_len(length(values))) { 66 | curry.args <- list(FUN=FUN) 67 | curry.args[[param.name]] <- values[[i]] 68 | curried.funcs[[func.names[i]]] <- do.call("Curry", curry.args) 69 | } 70 | return(curried.funcs) 71 | } 72 | 73 | 74 | 75 | #Curry function. From http://stackoverflow.com/questions/6547219/how-to-bind-function-arguments 76 | Curry <- function(FUN, ...) { 77 | args <- match.call(expand.dots = FALSE)$... 78 | args$... <- as.name("...") 79 | 80 | env <- new.env(parent = parent.frame()) 81 | 82 | if (is.name(FUN)) { 83 | fname <- FUN 84 | } else if (is.character(FUN)) { 85 | fname <- as.name(FUN) 86 | } else if (is.function(FUN)){ 87 | fname <- as.name("FUN") 88 | env$FUN <- FUN 89 | } else { 90 | stop("FUN not function or name of function") 91 | } 92 | curry_call <- as.call(c(list(fname), args)) 93 | 94 | f <- eval(call("function", as.pairlist(alist(... = )), curry_call)) 95 | environment(f) <- env 96 | f 97 | } -------------------------------------------------------------------------------- /R/localZScore.R: -------------------------------------------------------------------------------- 1 | #' Local z-score 2 | #' 3 | #' @description 4 | #' Evaluates tthe variation of the z-score in the vicinty of the original region set 5 | #' 6 | #' @usage 7 | #' localZScore(A, pt, window, step, ...) 8 | #' 9 | #' @param A a region set in any of the formats accepted by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...) 10 | #' @param pt a permTestResult object 11 | #' @param window a window in wich the local Z-score will be calculated (bp) 12 | #' @param step the number of bp that divide each Z-score evaluation 13 | #' @param ... further arguments to be passed to other methods. 14 | #' 15 | #' @return 16 | #' It returns a local z-score object 17 | #' 18 | #' @seealso \code{\link{overlapPermTest}}, \code{\link{permTest}} 19 | #' 20 | #' @examples 21 | #' 22 | #' genome <- filterChromosomes(getGenome("hg19"), keep.chr="chr1") 23 | #' A <- createRandomRegions(nregions=20, length.mean=10000, length.sd=20000, genome=genome, non.overlapping=FALSE) 24 | #' B <- c(A, createRandomRegions(nregions=10, length.mean=10000, length.sd=20000, genome=genome, non.overlapping=FALSE)) 25 | #' 26 | #' pt <- overlapPermTest(A=A, B=B, ntimes=10, genome=genome, non.overlapping=FALSE) 27 | #' plot(pt) 28 | #' 29 | #' lz <- localZScore(A=A, B=B, pt=pt) 30 | #' plot(lz) 31 | #' 32 | #' 33 | #' pt2 <- permTest(A=A, B=B, ntimes=10, randomize.function=randomizeRegions, evaluate.function=list(overlap=numOverlaps, distance=meanDistance), genome=genome, non.overlapping=FALSE) 34 | #' plot(pt2) 35 | #' 36 | #' lz2 <- localZScore(A=A, B=B, pt2) 37 | #' plot(lz2) 38 | #' 39 | #' 40 | #' @export localZScore 41 | #' 42 | #' @importFrom GenomicRanges shift 43 | 44 | 45 | 46 | 47 | 48 | 49 | localZScore <- function(A, pt, window, step, ...) { 50 | 51 | if(!hasArg(A)) stop("A is missing") 52 | if(!hasArg(pt)) stop("pt is missing") 53 | 54 | A <- toGRanges(A) 55 | 56 | if(!hasArg(window)) { 57 | window <- 5*mean(width(A)) 58 | } 59 | if(!hasArg(step)) { 60 | step <- floor(window/10) 61 | } 62 | 63 | #if pt is a permTestResults object, compute the localZCScore and return it 64 | if(class(pt) == "permTestResults") { 65 | mean.permuted <- mean(pt$permuted) 66 | sd.permuted <- stats::sd(pt$permuted) 67 | 68 | num.steps <- floor(window/step) 69 | 70 | shifts <- (1:num.steps)*step 71 | shifts <- c(rev(-1*shifts), 0, shifts) 72 | 73 | shifted.z.score <- function(shift) { 74 | shifted.A <- GenomicRanges::shift(A, shift) 75 | shifted.evaluation <- tryCatch(pt$evaluate.function(shifted.A, ...), 76 | error=function(e) {stop(paste0("There was an error when computing evaluation function of the shifted region set: \n", as.character(e), 77 | "Evaluation Function: ", pt$evaluate.function.name, 78 | "\n Shift: ", shift))} 79 | ) 80 | shifted.z.score <- (shifted.evaluation - mean.permuted)/sd.permuted 81 | return(shifted.z.score) 82 | } 83 | 84 | shifted <- lapply(as.list(shifts), shifted.z.score) 85 | 86 | shifted.z.scores <- do.call(c, shifted) 87 | 88 | rZ <- list(shifted.z.scores=shifted.z.scores, shifts=shifts, window=window, step=step, original.z.score=pt$zscore) 89 | class(rZ) <- "localZScoreResults" 90 | return(rZ) 91 | 92 | } else { #else, if it's a list, run localZscore for each element in the list 93 | if(class(pt) == "permTestResultsList") { 94 | lz <- mclapply(pt, function(ptt) {return(localZScore(A=A, pt=ptt, window=window, step=step, ...))}) 95 | class(lz) <- "localZScoreResultsList" 96 | return(lz) 97 | } else { 98 | stop(paste0("pt must be of class permTestResults or permTestResultsList and is a: ", class(pt))) 99 | } 100 | } 101 | } 102 | 103 | -------------------------------------------------------------------------------- /R/overlapGraphicalSummary.R: -------------------------------------------------------------------------------- 1 | #' Overlap Graphical Summary 2 | #' 3 | #' @description 4 | #' Graphical summary of the overlap between two set of regions. 5 | #' 6 | #' @usage overlapGraphicalSummary(A, B, regions.labels=c("A","B"), regions.colors=c("black","forestgreen","darkred"), ...) 7 | #' 8 | #' @param A a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...) 9 | #' @param B a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...) 10 | #' @param regions.labels vector indicating the labels for the y axes. 11 | #' @param regions.colors character vector indicating the colors for the regions. 12 | #' @param ... Arguments to be passed to methods, such as graphical parameters (see \code{\link{par}}). 13 | #' 14 | #' @return A plot is created on the current graphics device. 15 | #' 16 | #' @seealso \code{\link{overlapPermTest}}, \code{\link{overlapRegions}} 17 | #' 18 | #' @examples 19 | #' A <- data.frame(chr=1, start=c(1,15,24,40,50), end=c(10,20,30,45,55)) 20 | #' 21 | #' B <- data.frame(chr=1, start=c(2,12,28,35), end=c(5,25,33,43)) 22 | #' 23 | #' overlapGraphicalSummary(A, B, regions.labels=c("A","B"), regions.colors=c(4,5,6)) 24 | #' 25 | #' @export overlapGraphicalSummary 26 | 27 | 28 | overlapGraphicalSummary <- function(A, B, regions.labels=c("A","B"), regions.colors=c("black","forestgreen","darkred"), ...) { 29 | 30 | if(!hasArg(A)) stop("A is required") 31 | if(!hasArg(B)) stop("B is required") 32 | 33 | if(!hasArg(main)) main <- "" 34 | 35 | old.scipen <- options()$scipen 36 | 37 | options(scipen=999) 38 | 39 | A <- toGRanges(A) 40 | B <- toGRanges(B) 41 | 42 | test <- overlapRegions(A, B, get.pctA=TRUE, get.pctB=TRUE, get.bases=TRUE) 43 | test.gr <- toGRanges(test) 44 | 45 | lin <- (test$pct.basesA + test$pct.basesB) / 2 46 | linA <- (test$pct.basesA) 47 | linB <- (test$pct.basesB) 48 | 49 | layout(matrix(c(1,2,3,4,5,6), nrow=2, byrow = TRUE), widths=c(3,1.5,3), heights=c(3,3)) 50 | pl <- plot(lin[order(lin)], type="l", lwd=5, ylim=c(0,110), main="", col=regions.colors[1], las=1, ylab="", xlab="", ...) 51 | abline(h=50, lty=2) 52 | abline(h=100, lty=2) 53 | lines(linA[order(linA)], col=regions.colors[2], lwd=3) 54 | lines(linB[order(linB)], col=regions.colors[3], lwd=3) 55 | title(paste("overlap ",regions.labels[1]," on ",regions.labels[2]," = ",round((length(unique(test.gr))/length(A))*100, digits=2),"%")) 56 | box(lwd=1.2) 57 | 58 | par(xpd=TRUE) 59 | plot(1, type="n", axes=FALSE, xlab="", ylab="", ...) 60 | legend("right", c(paste0(regions.labels[1],"+",regions.labels[2]),paste0(regions.labels)), col=regions.colors, lwd=3) 61 | par(xpd=FALSE) 62 | 63 | C <- c(length(A), length(B), length(unique(test.gr)), sum(test$pct.basesA==100), sum(test$pct.basesB==100), sum((test$pct.basesA==100)&(test$pct.basesB==100))) 64 | namearg <- c(paste0("reg.",regions.labels[1]),paste0("reg.",regions.labels[2]),paste0("uni.ov.",regions.labels[1]),paste0(regions.labels[1],".in.",regions.labels[2]),paste0(regions.labels[2],".in.",regions.labels[1]),paste0(regions.labels[1],"=",regions.labels[1])) 65 | barplot(C, main="overlap types", names.arg=namearg, las=2, ylim=c(0, max(C)+0.1), ...) 66 | box(lwd=1.2) 67 | 68 | pieA <- c(length(A)-length(unique(test.gr)), length(A)-(length(A)-length(unique(test.gr)))) 69 | pie(pieA, labels = paste0(pieA), main = "Overlap of A", col=c("white","gray")) 70 | box(lwd=1.2) 71 | 72 | par(xpd=TRUE) 73 | plot(1, type="n", axes=FALSE, xlab="", ylab="", ...) 74 | legend("center", legend=c("non overlaped","overlaped"), fill=c("white","gray")) 75 | par(xpd=FALSE) 76 | 77 | pieB<-c(length(B)-length(unique(test.gr)),length(B)-(length(B)-length(unique(test.gr)))) 78 | pie(pieB, labels = paste0(pieB), main = "Overlap of B", col=c("white","gray")) 79 | box(lwd=1.2) 80 | 81 | 82 | par(mfrow=c(1,1)) 83 | options(scipen=old.scipen) 84 | 85 | } 86 | 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /man/permTest.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/permTest.R 3 | \name{permTest} 4 | \alias{permTest} 5 | \title{Permutation Test} 6 | \usage{ 7 | permTest(A, ntimes=100, randomize.function, evaluate.function, alternative="auto", min.parallel=1000, force.parallel=NULL, randomize.function.name=NULL, evaluate.function.name=NULL, verbose=FALSE, ...) 8 | } 9 | \arguments{ 10 | \item{A}{a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)} 11 | 12 | \item{ntimes}{number of permutations} 13 | 14 | \item{randomize.function}{function to create random regions. It must return a set of regions.} 15 | 16 | \item{evaluate.function}{function to search for association. It must return a numeric value.} 17 | 18 | \item{alternative}{the alternative hypothesis must be one of \code{"greater"}, \code{"less"} or \code{"auto"}. If \code{"auto"}, the alternative will be decided depending on the data.} 19 | 20 | \item{min.parallel}{if force.parallel is not specified, this will be used to determine the threshold for parallel computation. If \code{length(A) * ntimes > min.parallel}, it will activate the parallel computation. Single threaded otherwise.} 21 | 22 | \item{force.parallel}{logical indicating if the computation must be paralelized.} 23 | 24 | \item{randomize.function.name}{character. If specified, the permTestResults object will have this name instead of the name of the randomization function used. Useful specially when using unnamed anonymous functions.} 25 | 26 | \item{evaluate.function.name}{character. If specified, the permTestResults object will have this name instead of the name of the evaluation function used. Useful specially when using unnamed anonymous functions.} 27 | 28 | \item{verbose}{a boolean. If verbose=TRUE it creates a progress bar to show the computation progress. When combined with parallel computation, it might have an impact in the total computation time.} 29 | 30 | \item{...}{further arguments to be passed to other methods.} 31 | } 32 | \value{ 33 | A list of class \code{permTestResults} containing the following components: 34 | \itemize{ 35 | \item \bold{\code{pval}} the p-value of the test. 36 | \item \bold{\code{ntimes}} the number of permutations. 37 | \item \bold{\code{alternative}} a character string describing the alternative hypotesis. 38 | \item \bold{\code{observed}} the value of the statistic for the original data set. 39 | \item \bold{\code{permuted}} the values of the statistic for each permuted data set. 40 | \item \bold{\code{zscore}} the value of the standard score. \code{(observed-\link{mean}(permuted))/\link{sd}(permuted)} 41 | \item \bold{\code{randomize.function}} the randomization function used. 42 | \item \bold{\code{randomize.function.name}} the name of the randomization used. 43 | \item \bold{\code{evaluate.function}} the evaluation function used. 44 | \item \bold{\code{evaluate.function.name}} the name of the evaluation function used. 45 | } 46 | } 47 | \description{ 48 | Performs a permutation test to see if there is an association between a region set and some other feature using 49 | an evaluation function. 50 | } 51 | \details{ 52 | permTest performs a permutation test of the regions in RS to test the association with the feature evaluated with the evaluation function. 53 | The regions are randomized using the randomization.function and the evaluation.function is used to evaluate them. More information can be found in 54 | the vignette. 55 | } 56 | \examples{ 57 | genome <- filterChromosomes(getGenome("hg19"), keep.chr="chr1") 58 | A <- createRandomRegions(nregions=20, length.mean=10000000, length.sd=20000, genome=genome, non.overlapping=FALSE) 59 | B <- c(A, createRandomRegions(nregions=10, length.mean=10000, length.sd=20000, genome=genome, non.overlapping=FALSE)) 60 | 61 | 62 | pt2 <- permTest(A=A, B=B, ntimes=10, alternative="auto", verbose=TRUE, genome=genome, evaluate.function=meanDistance, randomize.function=randomizeRegions, non.overlapping=FALSE) 63 | summary(pt2) 64 | plot(pt2) 65 | plot(pt2, plotType="Tailed") 66 | 67 | } 68 | \references{ 69 | Davison, A. C. and Hinkley, D. V. (1997) Bootstrap methods and their application, Cambridge University Press, United Kingdom, 156-160 70 | } 71 | \seealso{ 72 | \code{\link{overlapPermTest}} 73 | } 74 | -------------------------------------------------------------------------------- /R/splitRegions.R: -------------------------------------------------------------------------------- 1 | #' Split Regions 2 | #' 3 | #' @description 4 | #' Splits a region set A by both ends of the regions in a second region set B. 5 | #' 6 | #' @usage splitRegions(A, B, min.size=1, track.original=TRUE) 7 | #' 8 | #' @param A a region set in any of the formats accepted by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...) 9 | #' @param B a region set in any of the formats accepted by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...) 10 | #' @param min.size numeric value, minimal size of the new regions 11 | #' @param track.original logical indicating if you want to keep the original regions and additional information in the output 12 | #' 13 | #' @return 14 | #' A GRanges with the splitted regions. 15 | #' 16 | #' @seealso \code{\link{toDataframe}}, \code{\link{toGRanges}}, \code{\link{subtractRegions}}, \code{\link{commonRegions}}, \code{\link{extendRegions}}, \code{\link{joinRegions}}, \code{\link{mergeRegions}}, \code{\link{overlapRegions}} 17 | #' 18 | #' @examples 19 | #' A <- data.frame(chr=1, start=c(1, 15, 24, 40, 50), end=c(10, 20, 30, 45, 55)) 20 | #' 21 | #' B <- data.frame(chr=1, start=c(2, 12, 28, 35), end=c(5, 25, 33, 43)) 22 | #' 23 | #' splits <- splitRegions(A, B) 24 | #' 25 | #' plotRegions(list(A, B, splits), chromosome=1, regions.labels=c("A", "B", "splits"), regions.colors=3:1) 26 | #' 27 | #' @export splitRegions 28 | 29 | 30 | splitRegions <- function(A, B, min.size=1, track.original=TRUE) { 31 | 32 | if(!hasArg(A)) stop("A is missing") 33 | if(!hasArg(B)) stop("B is missing") 34 | if(!is.numeric(min.size)) stop("min.size must be numeric") 35 | if(!is.logical(track.original)) stop("track.original must be logical") 36 | 37 | 38 | A <- toDataframe(A) 39 | B <- toDataframe(B) 40 | 41 | #creating the outputfiles 42 | if(track.original == FALSE) outA <- rep(NA, 3) 43 | if(track.original == TRUE) outA <- rep(NA, dim(A)[2]+2) 44 | 45 | #for each chromosome 46 | chr<-unique(c(as.character(as.vector(A[,1])), as.character(as.vector(B[,1])))) 47 | for(i in 1:length(chr)) { 48 | Achr <- A[A[,1]==chr[i],] 49 | Bchr <- B[B[,1]==chr[i],] 50 | 51 | 52 | #adding all the starts and ends of A and B and indicating if they are start (s) or end (e) 53 | splits <- data.frame(x=c(Achr[,2], Bchr[,2], Achr[,3], Bchr[,3]),y=c(rep("s", dim(Achr)[1]), 54 | rep("s", dim(Bchr)[1]),rep("e", dim(Achr)[1]),rep("e", dim(Bchr)[1])), 55 | stringsAsFactors=FALSE) 56 | #sorting 57 | splits <- splits[order(splits[,1]),] 58 | #removing duplicated rows 59 | #splits<-splits[!duplicated(splits),] 60 | 61 | #once we have the splits we want to find the regions 62 | start <- end <- 0 63 | for(j in 1:(dim(splits)[1]-1)) { 64 | #if the split is start, the start point of the region will be the split but the end point of the region will be th split minus 1 65 | #if the split is end,the start point of the region will be the split plus 1 and the end point of the region will be the next split 66 | if(splits[j,2]=="s") { 67 | start <- c(start,splits[j,1]) 68 | } else { 69 | start <- c(start,splits[j,1]+1) 70 | } 71 | if(splits[(j+1),2]=="s") { 72 | end <- c(end,splits[(j+1),1]-1) 73 | } else { 74 | end <- c(end,splits[j+1,1]) 75 | } 76 | 77 | } 78 | start <- start[-1] 79 | end <- end[-1] 80 | newregions <- data.frame(start,end) 81 | #removing regions with no length 82 | long <- end-start+1 83 | newregions <- newregions[long>0,] 84 | 85 | #constructing the output files and adding the original info if track.original==TRUE 86 | for(j in 1:dim(newregions)[1]) { 87 | auxA <- which(Achr[,2]<=newregions[j,1] & Achr[,3]>=newregions[j,2]) 88 | if(length(auxA)>0) { 89 | if(track.original==TRUE){ 90 | d1 <- newregions[j,] 91 | d2 <- d1[rep(seq_len(nrow(d1)), length(auxA)), ] 92 | outA <- rbind(outA, data.frame(Achr[auxA,1], d2, Achr[auxA,2:dim(Achr)[2]])) 93 | } 94 | if(track.original==FALSE) outA <- rbind(outA, c(chr[i], newregions[j,])) 95 | } 96 | } 97 | } 98 | 99 | colnames(outA)[1] <- "chr" 100 | rownames(outA) <- NULL 101 | outA <- outA[-1,] 102 | 103 | res <- toGRanges(outA) 104 | return(res) 105 | } 106 | 107 | 108 | 109 | 110 | 111 | -------------------------------------------------------------------------------- /R/plotRegions.R: -------------------------------------------------------------------------------- 1 | #' Plot Regions 2 | #' 3 | #' @description 4 | #' Plots sets of regions 5 | #' 6 | #' @usage plotRegions(x, chromosome, start=NULL, end=NULL, regions.labels=NULL, regions.colors=NULL, ...) 7 | #' 8 | #' @param x list of objects to be ploted. 9 | #' @param chromosome character or numeric value indicating which chromosome you want to plot. 10 | #' @param start numeric value indicating from which position you want to plot. 11 | #' @param end numeric value indicating to which position you want to plot. 12 | #' @param regions.labels vector indicating the labels for the y axes. It must have the same length as x. 13 | #' @param regions.colors character vector indicating the colors for the plotted regions. It must have the same length as x. 14 | #' @param ... Arguments to be passed to methods, such as graphical parameters (see \code{\link{par}}). 15 | #' 16 | #' @return A plot is created on the current graphics device. 17 | #' 18 | #' @examples 19 | #' A <- data.frame(chr=1, start=c(1,15,24,40,50), end=c(10,20,30,45,55)) 20 | #' 21 | #' B <- data.frame(chr=1, start=c(2,12,28,35), end=c(5,25,33,43)) 22 | #' 23 | #' plotRegions(list(A,B), chromosome=1, regions.labels=c("A","B"), regions.colors=3:2) 24 | #' 25 | #' 26 | #' @export plotRegions 27 | #' 28 | 29 | 30 | plotRegions <- function(x, chromosome, start=NULL, end=NULL, regions.labels=NULL, regions.colors=NULL, ...) { 31 | 32 | old.scipen <- options()$scipen 33 | 34 | options(scipen=999) 35 | if(!hasArg(chromosome)) stop("chromosome is missing") 36 | if(!is.null(start) & !is.numeric(start)) stop("start must be numeric") 37 | if(!is.null(end) & !is.numeric(end)) stop("end must be numeric") 38 | if(!is.null(regions.colors) & length(regions.colors)!=length(x)) stop("regions.colors must have the same length as x") 39 | if(!is.null(regions.labels) & length(regions.labels)!=length(x)) stop("regions.labels must have the same length as x") 40 | if(is.null(regions.colors)) regions.colors<-rep("black",length(x)) 41 | 42 | 43 | if(!hasArg(xlab)) xlab <- "position" 44 | if(!hasArg(ylab)) ylab <- "" 45 | if(!hasArg(main)) main <- "regions" 46 | 47 | #transforming to GRanges and calculating the axes limits for the plot 48 | maxx <- minx <- rep(0,length(x)) 49 | for(i in 1:length(x)) { 50 | x[[i]] <- toGRanges(x[[i]]) 51 | x[[i]] <- x[[i]][seqnames(x[[i]])==chromosome,] 52 | if(!is.null(start)) x[[i]] <- x[[i]][start(x[[i]])>=start,] 53 | if(!is.null(end)) x[[i]] <- x[[i]][end(x[[i]])<=end,] 54 | maxx[i] <- max(start(x[[i]]), end(x[[i]]), na.rm=TRUE) 55 | minx[i] <- min(start(x[[i]]), end(x[[i]]), na.rm=TRUE) 56 | } 57 | maxend <- max(maxx) 58 | minstart <- min(minx) 59 | 60 | #plot definition 61 | plot.new() 62 | plot.window(xlim=c(minstart, maxend+((maxend-minstart)/10)), ylim=c(0.5, length(x)+0.5), xlab=xlab, ylab=ylab, main=main) 63 | axis(1, at=round(seq(minstart,maxend,(maxend-minstart)/10),0), labels=round(seq(minstart,maxend,(maxend-minstart)/10),0)) 64 | axis(2, at=c(length(x):1), las=1, labels=regions.labels) 65 | 66 | for(reg in 1:length(x)){ 67 | 68 | reads <- toDataframe(x[[reg]]) 69 | # sort the reads by their start positions 70 | reads <- reads[order(reads$start),]; 71 | 72 | # initialise yread: a list to keep track of used y levels 73 | yread <- c(minstart - 1); 74 | ypos <- c(); #holds the y position of the ith segment 75 | 76 | 77 | # iterate over segments 78 | for (r in 1:nrow(reads)){ 79 | read <- reads[r,]; 80 | start <- read$start; 81 | placed <- FALSE; 82 | 83 | # iterate through yread to find the next availible 84 | # y pos at this x pos (start) 85 | y <- 1; 86 | while(!placed){ 87 | 88 | if(yread[y] < start){ 89 | ypos[r] <- y; 90 | yread[y] <- read$end + 0.999999; 91 | placed <- TRUE; 92 | } 93 | 94 | # current y pos is used by another segment, increment 95 | y <- y + 1; 96 | # initialize another y pos if we're at the end of the list 97 | if(y > length(yread)){ 98 | yread[y] <- minstart-1; 99 | } 100 | } 101 | } 102 | 103 | 104 | # find the maximum y pos that is used to size up the plot 105 | lengthy <- length(yread) 106 | ypos <- abs(ypos-(length(unique(ypos))+1)) 107 | reads$ypos <- (ypos + 1)/lengthy 108 | aux <- length(x)-reg 109 | reads$ypos <- reads$ypos+aux 110 | segments(reads$start, reads$ypos, reads$end + 0.999999, reads$ypos, col=regions.colors[[reg]], lwd=3) 111 | } 112 | box(lwd=1.2) 113 | options(scipen=old.scipen) 114 | 115 | } 116 | 117 | -------------------------------------------------------------------------------- /R/overlapPermTest.R: -------------------------------------------------------------------------------- 1 | #' Permutation Test for Overlap 2 | #' 3 | #' @description 4 | #' Performs a permutation test to see if the overlap between two sets of regions 5 | #' A and B is higher (or lower) than expected by chance. It will internally 6 | #' call \code{\link{permTest}} with the appropiate parameters to perform the 7 | #' permutation test. If B is a list or a GRangesList, it will perform one 8 | #' permutation test per element of the list, testing the overlap between 9 | #' A and each element of B independently. 10 | #' 11 | #' @note \bold{IMPORTANT:} Since it uses \code{link{permTest}} internally, it 12 | #' is possible to use most of the parameters of that function in 13 | #' \code{overlapPermTest}, including: \code{ntimes}, \code{force.parallel}, 14 | #' \code{min.parallel} and \code{verbose}. In addition, this function 15 | #' accepts most parameters of the \code{\link{randomizeRegions}} function 16 | #' including \code{genome}, \code{mask}, \code{allow.overlaps} and 17 | #' \code{per.chromosome} and the parameters of \code{\link{numOverlaps}} such 18 | #' as \code{count.once}. 19 | #' 20 | #' 21 | #' @usage overlapPermTest (A, B, alternative="auto", ...) 22 | #' 23 | #' @param A a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...) 24 | #' @param B a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...) 25 | #' @param alternative the alternative hypothesis must be one of \code{"greater"}, \code{"less"} or \code{"auto"}. If \code{"auto"}, the alternative will be decided depending on the data. 26 | #' @param ... further arguments to be passed to or from methods. 27 | #' 28 | #' @return 29 | #' A list of class \code{permTestResults} containing the following components: 30 | #' \itemize{ 31 | #' \item \bold{\code{pval}} the p-value of the test. 32 | #' \item \bold{\code{ntimes}} the number of permutations. 33 | #' \item \bold{\code{alternative}} a character string describing the alternative hypotesis. 34 | #' \item \bold{\code{observed}} the value of the statistic for the original data set. 35 | #' \item \bold{\code{permuted}} the values of the statistic for each permuted data set. 36 | #' \item \bold{\code{zscore}} the value of the standard score. \code{(observed-\link{mean}(permuted))/\link{sd}(permuted)} 37 | #' } 38 | #' 39 | #' @seealso \code{\link{overlapGraphicalSummary}}, \code{\link{overlapRegions}}, \code{\link{toDataframe}}, \code{\link{toGRanges}}, \code{\link{permTest}} 40 | #' 41 | #' @examples 42 | #' genome <- filterChromosomes(getGenome("hg19"), keep.chr="chr1") 43 | #' A <- createRandomRegions(nregions=20, length.mean=10000000, length.sd=20000, genome=genome, non.overlapping=FALSE) 44 | #' B <- c(A, createRandomRegions(nregions=10, length.mean=10000, length.sd=20000, genome=genome, non.overlapping=FALSE)) 45 | #' 46 | #' pt <- overlapPermTest(A=A, B=B, ntimes=10, genome=genome, non.overlapping=FALSE, verbose=TRUE) 47 | #' summary(pt) 48 | #' plot(pt) 49 | #' plot(pt, plotType="Tailed") 50 | #' 51 | #' 52 | #' C <- c(B, createRandomRegions(nregions=10, length.mean=10000, length.sd=20000, genome=genome, non.overlapping=FALSE)) 53 | #' pt <- overlapPermTest(A=A, B=list(B=B, C=C), ntimes=10, genome=genome, non.overlapping=FALSE, verbose=TRUE) 54 | #' summary(pt) 55 | #' plot(pt) 56 | #' 57 | #' @export overlapPermTest 58 | 59 | #Convenience function to perform a a permutation test to assess the overlap between two different sets of regions A and B 60 | 61 | overlapPermTest <- function(A, B, alternative="auto", ...) { 62 | 63 | if(!hasArg(A)) stop("A is missing") 64 | if(!hasArg(B)) stop("B is missing") 65 | alternative <- match.arg(alternative,c("less","greater", "auto")) 66 | 67 | A <- toGRanges(A) 68 | 69 | if(methods::is(B, "GRangesList")) { 70 | B <- as.list(B) 71 | } 72 | 73 | #If there are multiple B's, create a list of curried functions 74 | #(with parameter B pre-applied) and use that as the evaluation functions 75 | if(is.list(B) || is.vector(B)) { 76 | func.names <- NULL 77 | if(is.null(names(B))) { 78 | #if it's a vector of characters use them 79 | if(is.vector(B) && all(is.character(B))) { 80 | func.names <- B 81 | } 82 | #if it's a list of characters, use them 83 | if(is.list(B) && all(unlist(lapply(B, is.character)))) { 84 | func.names <- unlist(B) 85 | } 86 | } 87 | functs <- createFunctionsList(numOverlaps, param.name = "B", values = B, func.names = func.names) 88 | 89 | return(permTest(A=A, randomize.function=randomizeRegions, evaluate.function=functs, alternative=alternative, ...)) 90 | } else { 91 | B <- toGRanges(B) 92 | return(permTest(A=A, B=B, randomize.function=randomizeRegions, evaluate.function=numOverlaps, alternative=alternative, ...)) 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /R/circularRandomizeRegions.R: -------------------------------------------------------------------------------- 1 | #' Circular Randomize Regions 2 | #' 3 | #' @description 4 | #' Given a set of regions A and a genome, this function returns a new set of regions created by applying a random 5 | #' spin to each chromosome. 6 | #' 7 | #' @details 8 | #' This randomization strategy is useful when the spatial relation between the regions in the RS is important and has to be conserved. 9 | #' 10 | #' @usage 11 | #' circularRandomizeRegions(A, genome="hg19", mask=NULL, max.mask.overlap=NULL, max.retries=10, verbose=TRUE, ...) 12 | #' 13 | #' @param A The set of regions to randomize. A region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...) 14 | #' @param genome The reference genome to use. A valid genome object. Either a \code{\link{GenomicRanges}} or \code{\link{data.frame}} containing one region per whole chromosome or a character uniquely identifying a genome in \code{\link{BSgenome}} (e.g. "hg19", "mm10" but not "hg"). Internally it uses \code{\link{getGenomeAndMask}}. 15 | #' @param mask The set of regions specifying where a random region can not be (centromeres, repetitive regions, unmappable regions...). A region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}},\code{\link{data.frame}}, ...). If \code{\link{NULL}} it will try to derive a mask from the genome (currently only works is the genome is a character string) and if \code{\link{NA}} it will explicitly give an empty mask. 16 | #' @param max.mask.overlap numeric value 17 | #' @param max.retries numeric value 18 | #' @param verbose a boolean. 19 | #' @param ... further arguments to be passed to or from methods. 20 | #' 21 | #' 22 | #' @return 23 | #' It returns a \code{\link{GenomicRanges}} object with the regions resulting from the randomization process. 24 | #' 25 | #' @seealso \code{\link{randomizeRegions}}, \code{\link{toDataframe}}, \code{\link{toGRanges}}, \code{\link{getGenome}}, \code{\link{getMask}}, \code{\link{getGenomeAndMask}}, \code{\link{characterToBSGenome}}, \code{\link{maskFromBSGenome}}, \code{\link{resampleRegions}}, \code{\link{createRandomRegions}} 26 | #' 27 | #' @examples 28 | #' A <- data.frame("chr1", c(1, 10, 20, 30), c(12, 13, 28, 40)) 29 | #' 30 | #' mask <- data.frame("chr1", c(20000000, 100000000), c(22000000, 130000000)) 31 | #' 32 | #' genome <- data.frame(c("chr1", "chr2"), c(1, 1), c(180000000, 20000000)) 33 | #' 34 | #' circularRandomizeRegions(A) 35 | #' 36 | #' circularRandomizeRegions(A, genome=genome, mask=mask, per.chromosome=TRUE, non.overlapping=TRUE) 37 | #' 38 | #' @export circularRandomizeRegions 39 | #' 40 | 41 | 42 | 43 | 44 | circularRandomizeRegions <- function(A, genome="hg19", mask=NULL, max.mask.overlap=NULL, max.retries=10, verbose=TRUE, ...) { 45 | A <- toDataframe(toGRanges(A)) 46 | gam <- getGenomeAndMask(genome, mask) 47 | 48 | 49 | 50 | getRandomChr <- function(chr, gam) { 51 | 52 | chr.A <- A[A$chr==chr,] 53 | chr.len <- end(gam$genome[seqnames(gam$genome)==as.character(chr)]) 54 | 55 | spin <- floor(runif(1)*chr.len) 56 | spin.A <- spinChromosome(chr.A, spin, chr.len) 57 | if(!is.null(max.mask.overlap)) { 58 | #check mask 59 | num.ov <- numOverlaps(A=spin.A, B=gam$mask, count.once=TRUE) 60 | 61 | num.retries <- 0 62 | while(num.ov/nrow(spin.A) > max.mask.overlap & num.retries < max.retries) { 63 | if(verbose==TRUE) { 64 | message(paste0("Chromosome ", chr, ". Too much overlap with the mask: Num. Regions: ", nrow(chr.A), " Num. Overlaps: ", num.ov, " Pct. Overlap: ", (num.ov/nrow(spin.A))*100, ". Retrying...")) 65 | } 66 | spin <- floor(runif(1)*chr.len) 67 | spin.A <- spinChromosome(chr.A, spin, chr.len) 68 | num.ov <- numOverlaps(A=spin.A, B=gam$mask, count.once=TRUE) 69 | num.retries <- num.retries + 1 70 | } 71 | if(num.retries >= max.retries) { 72 | stop(paste0("ERROR: After ", max.retries, " retries, it was not possible to find a spin for chromosome ", chr," with an overlap with the mask lower than ", max.mask.overlap, ". The mask is too dense or there are too many features.")) 73 | } 74 | } 75 | return(spin.A) 76 | } 77 | 78 | rand.A <- lapply(unique(A$chr), getRandomChr, gam=gam) 79 | rand.A <- do.call(rbind, rand.A) 80 | return(toGRanges(rand.A)) 81 | 82 | } 83 | 84 | 85 | 86 | 87 | spinChromosome <- function(A, spin, chr.len) { 88 | 89 | A$start <- (A$start + spin) %% chr.len 90 | A$end <- (A$end + spin) %% chr.len 91 | 92 | part.out <- A$end < A$start 93 | if(any(part.out)) { 94 | C <- A[part.out,] 95 | A[part.out, "end"] <- chr.len 96 | C$start <- rep(1, nrow(C)) 97 | 98 | A <- rbind(A, C) 99 | } 100 | 101 | return(A) 102 | } 103 | 104 | 105 | 106 | -------------------------------------------------------------------------------- /tests/testthat/test_toGRanges.R: -------------------------------------------------------------------------------- 1 | library(regioneR) 2 | context("toGRanges") 3 | 4 | #Define some GRanges to use in the tests 5 | starts <- c(1, 15, 24) 6 | ends <- c(10, 20, 30) 7 | chrs <- c("1", "1", "2") 8 | A <- data.frame(chr=chrs, start=starts, end=ends, x=c(1,2,3), y=c("a", "b", "c")) 9 | gr1 <- toGRanges(A) 10 | 11 | 12 | 13 | #from data.frame 14 | test_that(" toGRanges works for a data.frame", { 15 | 16 | expect_is(gr1, "GRanges") 17 | expect_length(gr1, 3) 18 | expect_equal(names(mcols(gr1)), c("x", "y")) 19 | expect_is(gr1$x, "numeric") 20 | expect_is(gr1$y, "character") 21 | expect_equal(GenomicRanges::start(gr1), starts) 22 | expect_equal(GenomicRanges::end(gr1), ends) 23 | expect_equal(as.character(seqnames(gr1)), chrs) 24 | 25 | gr1.2 <- toGRanges(data.frame(1, starts, ends)) 26 | expect_equal(as.character(seqnames(gr1.2)), c("1", "1", "1")) 27 | 28 | expect_equal(toGRanges(1, starts, ends), gr1.2) 29 | 30 | expect_equal(toGRanges(toDataframe(gr1)), gr1) 31 | 32 | 33 | #check the genome 34 | expect_length(genome(gr1), 2) 35 | expect_length(genome(gr1.2), 1) 36 | 37 | gr1.3 <- toGRanges(A, genome = "hg19") 38 | expect_equal(seqlengths(gr1.3), seqlengths(getGenome("hg19"))) 39 | expect_equal(as.character(seqnames(gr1.3)), c("chr1", "chr1", "chr2")) 40 | 41 | }) 42 | 43 | 44 | #from files 45 | test_that(" toGRanges works for a file", { 46 | 47 | #Bed files 48 | #using the rtracklayer example just check it does not error and the returned 49 | #object class is valid 50 | gr1 <- expect_silent(toGRanges(system.file("tests", "test.bed", package = "rtracklayer"))) 51 | expect_is(gr1, "GRanges") 52 | 53 | #using our own test file, check the specific content 54 | gr1 <- expect_silent(toGRanges("test_data_files/regs.bed")) 55 | expect_is(gr1, "GRanges") 56 | expect_length(gr1, 3) 57 | expect_equal(width(gr1), c(9999, 10000, 1)) 58 | expect_equal(ncol(mcols(gr1)), 0) 59 | expect_equal(as.character(seqnames(gr1)), c("1","1","2")) 60 | expect_equal(GenomicRanges::start(gr1), c(2, 20001, 2)) 61 | expect_equal(GenomicRanges::end(gr1), c(10000, 30000, 2)) 62 | 63 | #using an out-of-spec bed file with header 64 | expect_error(toGRanges("test_data_files/bed_with_header.bed")) 65 | 66 | 67 | #Generic files 68 | gr1 <- expect_silent(toGRanges("test_data_files/regs.txt")) 69 | expect_is(gr1, "GRanges") 70 | expect_length(gr1, 3) 71 | expect_equal(width(gr1), c(10000, 10001, 2)) 72 | expect_equal(ncol(mcols(gr1)), 0) 73 | expect_equal(as.character(seqnames(gr1)), c("1","1","2")) 74 | expect_equal(GenomicRanges::start(gr1), c(1, 20000, 1)) 75 | expect_equal(GenomicRanges::end(gr1), c(10000, 30000, 2)) 76 | 77 | expect_equal(toGRanges("test_data_files/comments_and_header.txt"), gr1) 78 | 79 | expect_equal(toGRanges("test_data_files/two_valid_seps.txt"), gr1) 80 | 81 | 82 | #with an additional column 83 | gr1 <- expect_silent(toGRanges("test_data_files/4columns.txt")) 84 | expect_is(gr1, "GRanges") 85 | expect_length(gr1, 3) 86 | expect_equal(width(gr1), c(10000, 10001, 2)) 87 | expect_equal(ncol(mcols(gr1)), 1) 88 | expect_equal(names(mcols(gr1)), "data") 89 | expect_equal(as.character(seqnames(gr1)), c("1","1","2")) 90 | expect_equal(GenomicRanges::start(gr1), c(1, 20000, 1)) 91 | expect_equal(GenomicRanges::end(gr1), c(10000, 30000, 2)) 92 | expect_equal(as.character(gr1$data), c("col", "nan", "col")) 93 | 94 | gr2 <- expect_silent(toGRanges("test_data_files/alt_sep1.txt")) 95 | names(mcols(gr2)) <- "data" 96 | expect_equal(gr2, gr1) 97 | 98 | gr2 <- expect_silent(toGRanges("test_data_files/alt_sep2.txt")) 99 | names(mcols(gr2)) <- "data" 100 | expect_equal(gr2, gr1) 101 | 102 | gr2 <- expect_silent(toGRanges("test_data_files/alt_sep2_with_other_seps.txt")) 103 | expect_is(gr2, "GRanges") 104 | expect_length(gr2, 3) 105 | expect_equal(width(gr2), c(10000, 10001, 2)) 106 | expect_equal(ncol(mcols(gr2)), 1) 107 | expect_equal(names(mcols(gr2)), "V4") 108 | expect_equal(as.character(seqnames(gr2)), c("1","1","2")) 109 | expect_equal(GenomicRanges::start(gr2), c(1, 20000, 1)) 110 | expect_equal(GenomicRanges::end(gr2), c(10000, 30000, 2)) 111 | expect_equal(as.character(gr2$V4), c("col", "n\tan", "c;ol")) 112 | 113 | 114 | 115 | gr1 <- expect_silent(toGRanges("test_data_files/only_comments_and_header.txt")) 116 | expect_is(gr1, "GRanges") 117 | expect_length(gr1, 0) 118 | 119 | expect_equal(toGRanges("test_data_files/only_comments.txt"), gr1) 120 | 121 | expect_equal(toGRanges("test_data_files/empty_file.txt"), gr1) 122 | 123 | }) 124 | 125 | #' A <- data.frame(chr=1, start=c(1, 15, 24), end=c(10, 20, 30), x=c(1,2,3), y=c("a", "b", "c")) 126 | #' gr1 <- toGRanges(A) 127 | #' 128 | #' #No need to give the data.frame columns any specific name 129 | #' A <- data.frame(1, c(1, 15, 24), c(10, 20, 30), x=c(1,2,3), y=c("a", "b", "c")) 130 | #' gr2 <- toGRanges(A) 131 | #' 132 | #' #We can pass the data without building the data.frame 133 | #' gr3 <- toGRanges("chr9", 34229289, 34982376, x="X") 134 | #' 135 | #' #And each argument can be a vector (they will be recycled as needed) 136 | #' gr4 <- toGRanges("chr9", c(34229289, 40000000), c(34982376, 50000000), x="X", y=c("a", "b")) 137 | #' 138 | #' #toGRanges will automatically convert the second and third argument into numerics 139 | #' gr5 <- toGRanges("chr9", "34229289", "34982376") 140 | #' 141 | #' #It can be a file from disk 142 | #' bed.file <- system.file("extdata", "my.special.genes.bed", package="regioneR") 143 | #' gr6 <- toGRanges(bed.file) 144 | #' 145 | #' #Or a URL to a valid file 146 | #' gr7 <- toGRanges("http://molb7621.github.io/workshop/_downloads/lamina.bed") 147 | #' 148 | #' #It can also parse genomic location strings 149 | #' gr8 <- toGRanges("chr9:34229289-34982376") 150 | #' 151 | #' #more than one 152 | #' gr9 <- toGRanges(c("chr9:34229289-34982376", "chr10:1000-2000")) 153 | #' 154 | #' #even with mixed strange and mixed syntaxes 155 | #' gr10 <- toGRanges(c("chr4:3873-92928", "chr4:3873,92928", "chr5:33,444-45,555")) 156 | #' 157 | #' #if the genome is given it is used to annotate the resulting GRanges 158 | #' gr11 <- toGRanges(c("chr9:34229289-34982376", "chr10:1000-2000"), genome="hg19") 159 | #' 160 | #' 161 | #' #and the genome is added to the GRanges even if A is a GRanges 162 | #' gr12 <- toGRanges(gr6, genome="hg19") 163 | #' 164 | #' #And it will change the chromosome naming of the GRanges to match that of the 165 | #' #genome if it is possible (using GenomeInfoDb::seqlevelsStyle) 166 | #' gr2 167 | #' gr13 <- toGRanges(gr2, genome="hg19") 168 | #' 169 | #' #in addition, it can convert other objects into GRanges such as the 170 | #' #result of GenomicRanges::coverage 171 | #' 172 | #' gr14 <- toGRanges(c("1:1-20", "1:5-25", "1:18-40")) 173 | #' cover <- GenomicRanges::coverage(gr14) 174 | #' gr15 <- toGRanges(cover) 175 | 176 | -------------------------------------------------------------------------------- /R/plot.permTestResults.R: -------------------------------------------------------------------------------- 1 | # Plot Permutation Test Results 2 | # 3 | # @description 4 | #' Function for plotting the results from a \code{permTestResults} object. 5 | #' 6 | #' @method plot permTestResults 7 | #' 8 | #' @param x an object of class \code{permTestResults}. 9 | #' @param pvalthres p-value threshold for significance. Default is 0.05. 10 | #' @param plotType the type of plot to display. This must be one of \code{"Area"} or \code{"Tailed"}. Default is \code{"Area"}. 11 | #' @param main a character specifying the title of the plot. Defaults to "". 12 | #' @param xlab a character specifying the label of the x axis. Defaults to NULL, which produces a plot with the evaluation function name as the x axis label. 13 | #' @param ylab a character specifying the label of the y axis. Defaults to "". 14 | #' @param ylim defines the y limits of the plot. Passed to the underlying \code{plot} call. 15 | #' @param xlim defines the x limits of the plot. Passed to the underlying \code{plot} call. 16 | #' @param ... further arguments to be passed to or from methods. 17 | #' 18 | #' @return A plot is created on the current graphics device. 19 | #' 20 | #' @seealso \code{\link{permTest}} 21 | #' 22 | #' @examples 23 | #' 24 | #' genome <- filterChromosomes(getGenome("hg19"), keep.chr="chr1") 25 | #' A <- createRandomRegions(nregions=20, length.mean=10000000, length.sd=20000, genome=genome, non.overlapping=FALSE) 26 | #' B <- c(A, createRandomRegions(nregions=10, length.mean=10000, length.sd=20000, genome=genome, non.overlapping=FALSE)) 27 | #' 28 | #' pt <- overlapPermTest(A=A, B=B, ntimes=10, genome=genome, non.overlapping=FALSE) 29 | #' summary(pt) 30 | #' plot(pt) 31 | #' plot(pt, plotType="Tailed") 32 | #' 33 | #' pt2 <- permTest(A=A, B=B, ntimes=10, alternative="auto", genome=genome, evaluate.function=meanDistance, randomize.function=randomizeRegions, non.overlapping=FALSE) 34 | #' summary(pt2) 35 | #' plot(pt2) 36 | #' plot(pt2, plotType="Tailed") 37 | #' 38 | #' @import graphics 39 | #' @importFrom stats dnorm qnorm rnorm runif 40 | #' 41 | #' @export 42 | 43 | 44 | plot.permTestResults<-function(x, pvalthres=0.05, plotType="Tailed", main="", xlab=NULL, ylab="", ylim=NULL, xlim=NULL, ...){ 45 | 46 | old.scipen <- options()$scipen 47 | 48 | options(scipen=999) 49 | 50 | if(class(x)!="permTestResults") stop("x must be a permTestResults object") 51 | if(!is.numeric(pvalthres)) stop("pvalthres must be numeric") 52 | plotType<-match.arg(plotType,c("Area","Tailed")) 53 | 54 | 55 | if(is.null(xlab)) xlab <- paste0(x$evaluate.function.name) 56 | if(nchar(main)>0) main <- paste0(main, "\n") 57 | 58 | alternative<-x$alternative 59 | xcoords<-x$permuted 60 | xcoords<-xcoords[order(xcoords)] 61 | pval<-round(x$pval,4) 62 | nperm<-x$ntimes 63 | mperm<-mean(xcoords,na.rm=TRUE) 64 | mobs<-x$observed 65 | zscore<-round(x$zscore,3) 66 | 67 | if(is.finite(zscore)){ 68 | y<-dnorm(xcoords,mean=mean(xcoords,na.rm=TRUE),sd=stats::sd(xcoords,na.rm=TRUE)) 69 | xhist<-hist(xcoords,breaks=30,plot=FALSE)$density 70 | ymax<-max(max(y,na.rm=TRUE),max(xhist,na.rm=TRUE)) 71 | 72 | if (alternative=="greater") aux<-qnorm((1-pvalthres),mean=mean(xcoords,na.rm=TRUE),sd=sd(xcoords,na.rm=TRUE)) 73 | if (alternative=="less") aux<-qnorm(pvalthres,mean=mean(xcoords,na.rm=TRUE),sd=sd(xcoords,na.rm=TRUE)) 74 | 75 | xmin<-min(mobs, min(xcoords,na.rm=TRUE), min(aux,na.rm=TRUE), na.rm=TRUE) 76 | xmax<-max(mobs, max(xcoords,na.rm=TRUE), max(aux,na.rm=TRUE), na.rm=TRUE) 77 | 78 | if(is.null(ylim)) ylim <- c(0,ymax) 79 | if(is.null(xlim)) xlim <- c(xmin,xmax) 80 | 81 | hist(xcoords, prob = TRUE, ylim = ylim, breaks = 30, xlim = xlim, 82 | las = 1, col = "lightgray", border = "lightgray", xlab=xlab, ylab=ylab, main=paste(main, "p-value: ", 83 | pval, "\n Z-score: ", zscore, "\n n perm: ", nperm, "\n randomization: ", paste0(x$randomize.function.name)), 84 | cex.main=0.8, ...) 85 | 86 | if(plotType=="Area"){ 87 | if(alternative=="greater"){ 88 | polygon(c(aux,aux,xmax,xmax),c(max(y,na.rm=TRUE),0,0,max(y,na.rm=TRUE)),col="red",density=10,border="white") 89 | lines(c(aux,aux),c(0,ymax*0.8),col="red",lwd=3) 90 | text(aux,ymax*0.9,bquote(alpha==.(pvalthres)),cex=0.8,pos=4) 91 | } 92 | if(alternative=="less"){ 93 | polygon(c(aux,aux,xmin,xmin),c(max(y,na.rm=TRUE),0,0,max(y,na.rm=TRUE)),col="red",density=10,border="white") 94 | lines(c(aux,aux),c(0,ymax*0.8),col="red",lwd=3) 95 | text(aux,ymax*0.9,bquote(alpha==.(pvalthres)),cex=0.8) 96 | } 97 | } 98 | 99 | if(plotType=="Tailed"){ 100 | if(alternative=="greater"){ 101 | aux3<-seq(aux,xmax,length=50) 102 | y3<-dnorm(aux3,mean(xcoords,na.rm=TRUE),sd(xcoords,na.rm=TRUE)) 103 | polygon(c(aux3[1],aux3,aux3[length(aux3)]),c(0,y3,0),col="red",density=10,border="white") 104 | lines(c(aux,aux),c(0,ymax*0.8),col="red",lwd=3) 105 | text(aux,ymax*0.9,bquote(alpha==.(pvalthres)),cex=0.8) 106 | } 107 | if(alternative=="less"){ 108 | aux3<-seq(aux,xmin,length=50) 109 | y3<-dnorm(aux3,mean(xcoords,na.rm=TRUE),sd(xcoords,na.rm=TRUE)) 110 | polygon(c(aux3[1],aux3,aux3[length(aux3)]),c(0,y3,0),col="red",density=10,border="white") 111 | lines(c(aux,aux),c(0,ymax*0.8),col="red",lwd=3) 112 | text(aux,ymax*0.9,bquote(alpha==.(pvalthres)),cex=0.8) 113 | } 114 | } 115 | 116 | 117 | lines(xcoords,y,lwd=2) 118 | lines(c(mperm,mperm),c(0,ymax*0.8),col="black",lwd=3) 119 | text(mperm,ymax*0.9,expression(Ev[perm]),cex=0.8) 120 | 121 | lines(c(mobs,mobs),c(0,ymax*0.8),col="forestgreen",lwd=3) 122 | text(mobs,ymax*0.9,expression(Ev[obs]),cex=0.8) 123 | arrows(mperm,ymax*0.75,mobs,ymax*0.75,length=0.1,code=3) 124 | box(lwd=1.2) 125 | } 126 | 127 | if(!is.finite(zscore)){ 128 | xhist<-hist(xcoords,breaks=30,plot=FALSE)$density 129 | ymax<-max(xhist,na.rm=TRUE) 130 | 131 | if(is.null(ylim)) ylim <- c(0,ymax) 132 | if(is.null(xlim)) xlim <- c(min(mobs,min(xcoords,na.rm=TRUE),na.rm=TRUE), max(mobs,max(xcoords,na.rm=TRUE),na.rm=TRUE)) 133 | 134 | 135 | hist(xcoords, prob = TRUE, ylim = ylim, breaks = 30, xlim = xlim, las = 1, col = "lightgray", border = "lightgray", xlab=xlab, ylab=ylab, main=paste(main, "p-value: ", pval, "\n Z-score: ", zscore, "\n n perm: ", nperm, "\n randomization: ", paste0(x$randomize.function.name)), cex.main=0.8, ...) 136 | 137 | lines(c(mperm,mperm),c(0,ymax*0.8),col="black",lwd=3) 138 | text(mperm,ymax*0.9,expression(Ev[perm]),cex=0.8) 139 | 140 | lines(c(mobs,mobs),c(0,ymax*0.8),col="forestgreen",lwd=3) 141 | text(mobs,ymax*0.9,expression(Ev[obs]),cex=0.8) 142 | arrows(mperm,ymax*0.75,mobs,ymax*0.75,length=0.1,code=3) 143 | box(lwd=1.2) 144 | 145 | 146 | warning(paste0("all permuted values are equal to ",xcoords[1],". It is not posible to adjust a normal distribution nor to compute a Z-score.")) 147 | 148 | } 149 | 150 | options(scipen=old.scipen) 151 | 152 | } 153 | -------------------------------------------------------------------------------- /man/toGRanges.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/toGRanges.R 3 | \name{toGRanges} 4 | \alias{toGRanges} 5 | \title{toGRanges} 6 | \usage{ 7 | toGRanges(A, ..., genome=NULL, sep=NULL, comment.char="#") 8 | } 9 | \arguments{ 10 | \item{A}{a \code{\link{data.frame}} containing a region set, a \code{\link{GRanges}} object, a BED file, any type of file supported by \code{rtracklayer::import} or a \code{"SimpleRleList"} returned by \code{GenomicRanges::coverage}. If there are more than 1 argument, it will build a dataframe out ouf them and process it as usual. If there's only a single argument and it's a character, if it's not an existing file name it will be treated as the definition of a genomic region in the UCSC/IGV format (i.e. "chr9:34229289-34982376") and parsed.} 11 | 12 | \item{...}{further arguments to be passed to other methods.} 13 | 14 | \item{genome}{(character or BSgenome) The genome info to be attached to the created GRanges. If NULL no genome info will be attached. (defaults to NULL)} 15 | 16 | \item{sep}{(character) The field separator in the text file. If NULL it will be automatically guessed. Only used when reading some file formats. (Defaults to NULL)} 17 | 18 | \item{comment.char}{(character) The character marking comment lines. Only used when reading some file formats. (Defaults to "#")} 19 | } 20 | \value{ 21 | A \code{\link{GRanges}} object with the regions in A 22 | } 23 | \description{ 24 | Transforms a file or an object containing a region set into a 25 | \code{\link{GRanges}} object. 26 | } 27 | \details{ 28 | If A is already a \code{\link{GRanges}} object, it will be returned untouched. 29 | 30 | If A is a data frame, the function will assume the first three columns are 31 | chromosome, start and end and create a \code{\link{GRanges}} object. Any 32 | additional column will be considered metadata and stored as such in the 33 | \code{\link{GRanges}} object. There are 2 special cases: 1) if A is a 34 | data.frame with only 2 columns, it will assume the first one is the 35 | chromosome and the second one the position and it will create a GRanges with 36 | single base regions and 2) if the data.frame has the first 3 columns named 37 | "SNP", "CHR" and "BP" it will shuffle the columns and repeat "BP" to build 38 | a GRanges of single base regions (this is the standard ouput format of plink). 39 | 40 | If A is not a data.frame and there are more parameters, it will try to build 41 | a data.frame with all parameters and use that data.frame to build the 42 | GRanges. This allows the user to call it like 43 | \code{toGRanges("chr1", 10, 20)}. 44 | 45 | If A is a character or a character vector and it's not a file or a URL, it 46 | assumes it's a genomic position description in the form used by UCSC or 47 | IGV, "chr2:1000-2000". It will try to parse the character strings into 48 | chromosome, start and end and create a GRanges. The parser can deal with 49 | commas separating thousands (e.g. "chr2:1,000-2,000") and with the comma 50 | used as a start/end separator (e.g. "chr2:1000,2000"). These different 51 | variants can be mixed in the same character vector. 52 | 53 | If A is a "SimpleRleList" it will be interpreted as the result from 54 | GenomicRanges::coverage and the function will return a GRanges with a 55 | single metadata column named "coverage". 56 | 57 | If A is a file name (local or remote) or a connection to a file, it will try 58 | to load it in different ways: 59 | * BED files (identified by a "bed" extension): will be loaded using 60 | \code{rtracklayer::import} function. Coordinates are 0 based as 61 | described in the BED specification (https://genome.ucsc.edu/FAQ/FAQformat.html#format1). 62 | * PLINK assoc files (identified by ".assoc", ".assoc.fisher", 63 | ".assoc.dosage", ".assoc.linear", ".assoc.logistic"): will be loaded 64 | as single-base ranges with all original columns present and the SNPs ids 65 | as the ranges names 66 | * Any other file: It assumes the file is a "generic" tabular file. To load 67 | it it will ignore any header line starting with \code{comment.char}, 68 | autodetect the field separator (if not provided by the user), 69 | autodetect if it has a header and read it accordingly. 70 | 71 | The \code{genome} parameter can be used to set the genome information of 72 | the created GRanges. It can be either a \code{\link{BSgenome}} object or a 73 | character string defining a genome (e.g. "hg19", "mm10"...) as accepted 74 | by the \code{BSgenome::getBSgenome} function. If a valid genome is 75 | given and the corresponding BSgenome package is installed, the genome 76 | information will be attached to the GRanges. If the chromosome naming style 77 | from the GRanges and the genome object are different, it will try to change 78 | the GRanges styles to match those of the genome using 79 | \code{GenomeInfoDb::seqlevelsStyle}. 80 | } 81 | \note{ 82 | **IMPORTANT:** Regarding the coordinates, BED files are 0 based 83 | while \code{data.frames} and generic files are treated as 1 based. Therefore 84 | reading a line "chr9 100 200" from a BED file will create a 99 bases wide 85 | interval starting at base 101 and ending at 200 but reading it from a txt 86 | file or from a \code{data.frame} will create a 100 bases wide interval 87 | starting at 100 and ending at 200. This is specially relevant in 1bp 88 | intervals. For example, the 10th base of chromosome 1 would be 89 | "chr1 9 10" in a BED file and "chr1 10 10" in a txt file. 90 | } 91 | \examples{ 92 | A <- data.frame(chr=1, start=c(1, 15, 24), end=c(10, 20, 30), x=c(1,2,3), y=c("a", "b", "c")) 93 | gr1 <- toGRanges(A) 94 | 95 | #No need to give the data.frame columns any specific name 96 | A <- data.frame(1, c(1, 15, 24), c(10, 20, 30), x=c(1,2,3), y=c("a", "b", "c")) 97 | gr2 <- toGRanges(A) 98 | 99 | #We can pass the data without building the data.frame 100 | gr3 <- toGRanges("chr9", 34229289, 34982376, x="X") 101 | 102 | #And each argument can be a vector (they will be recycled as needed) 103 | gr4 <- toGRanges("chr9", c(34229289, 40000000), c(34982376, 50000000), x="X", y=c("a", "b")) 104 | 105 | #toGRanges will automatically convert the second and third argument into numerics 106 | gr5 <- toGRanges("chr9", "34229289", "34982376") 107 | 108 | #It can be a file from disk 109 | bed.file <- system.file("extdata", "my.special.genes.txt", package="regioneR") 110 | gr6 <- toGRanges(bed.file) 111 | 112 | #Or a URL to a valid file 113 | #gr7 <- toGRanges("http://path.to/myfile.bed") 114 | 115 | #It can also parse genomic location strings 116 | gr8 <- toGRanges("chr9:34229289-34982376") 117 | 118 | #more than one 119 | gr9 <- toGRanges(c("chr9:34229289-34982376", "chr10:1000-2000")) 120 | 121 | #even with strange and mixed syntaxes 122 | gr10 <- toGRanges(c("chr4:3873-92928", "chr4:3873,92928", "chr5:33,444-45,555")) 123 | 124 | #if the genome is given it is used to annotate the resulting GRanges 125 | gr11 <- toGRanges(c("chr9:34229289-34982376", "chr10:1000-2000"), genome="hg19") 126 | 127 | 128 | #and the genome is added to the GRanges even if A is a GRanges 129 | gr12 <- toGRanges(gr6, genome="hg19") 130 | 131 | #And it will change the chromosome naming of the GRanges to match that of the 132 | #genome if it is possible (using GenomeInfoDb::seqlevelsStyle) 133 | gr2 134 | gr13 <- toGRanges(gr2, genome="hg19") 135 | 136 | #in addition, it can convert other objects into GRanges such as the 137 | #result of GenomicRanges::coverage 138 | 139 | gr14 <- toGRanges(c("1:1-20", "1:5-25", "1:18-40")) 140 | cover <- GenomicRanges::coverage(gr14) 141 | gr15 <- toGRanges(cover) 142 | 143 | 144 | 145 | } 146 | \seealso{ 147 | \code{\link{toDataframe}} 148 | } 149 | -------------------------------------------------------------------------------- /inst/extdata/my.regular.genes.txt: -------------------------------------------------------------------------------- 1 | chr start end V6 V4 2 | chr2 152214105 152236562 + NM_007115 3 | chr1 31421964 31422052 - NR_033295 4 | chr4 185814153 185820615 - NR_039975 5 | chr11 19734880 20143147 + NM_001244963 6 | chr10 93170041 93274585 + NM_001284274 7 | chr9 131452253 131458675 + NM_001248001 8 | chr12 50523088 50561316 - NM_001281731 9 | chr19 55434876 55458873 - NM_206828 10 | chr14 96722546 96731100 + NM_000710 11 | chr1 113499036 113506690 + NR_103744 12 | chr7 92817898 92855832 - NM_001288804 13 | chr7 6414125 6443598 + NM_018890 14 | chrX 151918386 151922408 - NM_005361 15 | chr6 31367560 31383092 + NM_001289153 16 | chr17 38710021 38717265 - NM_001301717 17 | chr15 83211950 83316356 - NM_001288819 18 | chr7 23140846 23145322 - NR_046220 19 | chr8 41119475 41166990 - NM_003012 20 | chr2 9346893 9545812 + NM_001135191 21 | chr10 27484142 27531068 - NM_001042473 22 | chr7 100813773 100823557 - NM_198571 23 | chr1 184020784 184043344 + NM_052965 24 | chr15 28671636 28671730 + NR_039732 25 | chr7 142478756 142482399 + NR_001296 26 | chr17 4692253 4693884 + NM_001014985 27 | chr6 161768589 163148834 - NM_013987 28 | chr21 18885223 18942429 + NM_001207064 29 | chr1 158901336 158946849 + NM_152501 30 | chr6 109765265 109777190 - NM_001159291 31 | chr7 66018454 66057394 - NR_111979 32 | chr3 184089722 184097476 - NM_001289998 33 | chr20 50767816 50808524 - NM_022088 34 | chr19 54822434 54824409 - NM_181879 35 | chr14 42076763 42373752 + NM_152447 36 | chr5 179105558 179107975 - NM_001164444 37 | chr15 31357234 31357344 - NR_029624 38 | chr3 19988571 20026667 + NM_001292048 39 | chr1 213165523 213189217 - NM_001300757 40 | chr2 202047620 202086383 + NM_001230 41 | chr21 15646119 15673692 + NR_003087 42 | chr6 101846860 102517958 + NM_175768 43 | chr1 213003484 213020991 - NM_001024601 44 | chr1 21132784 21503381 - NM_001198801 45 | chr11 14479048 14521404 - NM_016451 46 | chr12 75728462 75764169 + NM_152779 47 | chr19 18304039 18307550 + NM_032683 48 | chr19 56989242 57006805 + NR_036521 49 | chr12 119772516 119978852 + NM_178499 50 | chr1 51435641 51440309 + NM_078626 51 | chr6 31694816 31698039 - NM_013974 52 | chr14 51800110 51832275 + NR_038358 53 | chr1 62660473 62678001 + NM_019079 54 | chr1 12907260 12908578 - NM_001013631 55 | chr3 153202283 153220486 + NM_001101337 56 | chr19 14693895 14721956 + NR_109784 57 | chr15 52121824 52204331 + NM_014547 58 | chr9 12775011 12823059 + NM_203403 59 | chr7 150709296 150721586 - NR_073169 60 | chr7 129470572 129592800 - NM_003344 61 | chr12 111086490 111126962 - NM_032369 62 | chrX 133303700 133303796 - NR_029491 63 | chr3 27757439 27764206 - NM_005442 64 | chr8 120569316 120651106 - NM_001130863 65 | chrX 125683365 125686842 - NM_178470 66 | chr1 152730505 152734529 + NM_001025231 67 | chr5 52405671 52410956 + NR_034107 68 | chr22 39410264 39414825 + NM_014508 69 | chr2 55461299 55461985 + NR_039624 70 | chrX 51075082 51080377 + NM_153183 71 | chr1 20439142 20446059 - NM_012400 72 | chr9 123850573 123939886 + NM_007018 73 | chr17 56048909 56065615 - NM_007146 74 | chr19 35168543 35177302 + NM_001012320 75 | chr8 120428551 120436678 + NM_002514 76 | chr4 150999425 151178608 + NR_036614 77 | chr5 159848813 159855751 + NM_004219 78 | chr14 96176303 96180533 - NM_001098725 79 | chr6 34214156 34216885 - NM_178508 80 | chr6 32260474 32339689 - NM_006781 81 | chr8 94041978 94042067 + NR_107051 82 | chr15 52043757 52108558 + NM_001142885 83 | chr11 126132813 126138877 - NM_003139 84 | chr20 3776385 3786768 + NM_021873 85 | chrX 149934808 150067289 - NM_001242614 86 | chr12 9747869 9760497 - NM_002258 87 | chr3 48473579 48481529 - NM_001256968 88 | chr1 206940947 206945839 - NM_000572 89 | chr1 3652547 3663937 - NR_033710 90 | chr1 6581406 6614658 - NM_024654 91 | chr9 45003233 45005290 - NR_121606 92 | chr2 127805598 127864903 - NM_004305 93 | chr11 9596233 9611313 + NM_001143976 94 | chr12 7342440 7364079 + NM_001300789 95 | chr19 11663857 11670051 - NM_032377 96 | chr9 93589701 93660842 + NM_001174167 97 | chr13 100634025 100639019 + NM_007129 98 | chr19 8576215 8579048 - NM_032370 99 | chr6 4186553 4190263 - NR_125874 100 | chr2 218899656 218955304 + NR_034176 101 | chr1 762970 794826 + NR_047523 102 | chrX 128872945 128903525 + NM_003399 103 | chr21 45959067 45960078 - NM_198691 104 | chr7 143880547 143883173 + NM_198495 105 | chrX 47917566 47930508 - NM_001037735 106 | chr6 43968336 43973694 + NM_001171992 107 | chr14 78174413 78183941 + NM_001267863 108 | chr16 67207755 67209640 + NM_001276311 109 | chrX 41192560 41209540 + NM_001356 110 | chr2 36923832 37041937 + NM_053276 111 | chr19 50341902 50354933 - NR_040037 112 | chr2 239419330 239464140 - NR_037809 113 | chr20 36888576 36917348 + NR_104170 114 | chr1 92495532 92529093 + NM_173567 115 | chr5 133634114 133706738 - NM_001300853 116 | chr18 77160273 77289323 + NM_172389 117 | chr1 47681961 47697387 - NM_001287347 118 | chr21 38378862 38391958 + NM_018962 119 | chr17 79163392 79196751 - NM_014984 120 | chr20 1161214 1165117 - NM_018354 121 | chr6 80714321 80752244 + NM_003318 122 | chr7 135071821 135194875 - NM_001008225 123 | chr8 7170367 7177473 + NR_003668 124 | chrX 52079698 52079784 - NR_107055 125 | chr3 111311746 111314182 - NM_024508 126 | chr22 19467348 19508135 + NM_001178011 127 | chr8 54628102 54755871 - NM_015941 128 | chr6 3259161 3264097 + NM_001135750 129 | chr19 45251977 45263301 + NM_005178 130 | chr11 78244221 78269611 + NR_120566 131 | chr8 1993157 2093380 + NM_003970 132 | chr13 51796469 51858377 + NM_001242312 133 | chr15 33595857 33602446 - NR_120326 134 | chr18 63417487 63548175 + NM_033646 135 | chr17 66255322 66417000 + NM_014960 136 | chr6 86323692 86351169 - NM_001159675 137 | chr12 12224401 12252627 + NM_030766 138 | chr14 32546494 32628934 + NM_001173 139 | chr4 15969848 16085623 - NM_001145848 140 | chr5 133936838 133968533 - NM_001033503 141 | chr7 80231522 80303734 + NM_001289911 142 | chr12 9217772 9220651 + NR_026971 143 | chrX 134866213 134874249 + NM_001291535 144 | chr2 69546900 69614386 - NM_001244710 145 | chr12 13127798 13153243 - NM_015987 146 | chr3 127317199 127341278 + NM_004526 147 | chr16 19535178 19564728 + NM_001199022 148 | chr8 7340025 7343909 - NM_001040704 149 | chr13 88270919 88270995 - NR_039722 150 | chr7 128502856 128505903 + NM_001198909 151 | chr5 55807220 55902059 - NM_001287053 152 | chr1 38462441 38471187 - NM_004468 153 | chr14 20896969 20903801 - NM_001109997 154 | chr19 15464331 15490612 - NM_005858 155 | chr15 83140203 83182973 - NR_102747 156 | chr4 9364854 9366447 + NM_001256867 157 | chr3 40432441 40494799 - NR_040100 158 | chr9 137533650 137736688 + NM_001278074 159 | chr3 184089722 184097476 - NM_001290027 160 | chr6 26634610 26659980 - NM_001242797 161 | chr1 55532031 55681039 - NM_015306 162 | chr3 25469753 25639422 + NM_001290276 163 | chr1 149754244 149783928 - NM_001161334 164 | chr18 66340924 66382353 - NM_019022 165 | chr6 112408673 112423993 + NM_001033564 166 | chr8 37263981 37351431 - NR_121620 167 | chr12 131438451 131626008 + NM_198827 168 | chr6 112574983 112575917 - NM_001105208 169 | chr7 129804552 129845338 - NM_001301163 170 | chr21 35818985 35831902 - NM_001127669 171 | chr2 71295407 71305998 + NM_017567 172 | chr12 120779132 120806983 - NM_002442 173 | chr1 248860362 248862411 + NR_125950 174 | chr1 54604667 54618679 - NM_201546 175 | chr10 74766979 74856732 - NM_001142595 176 | chr3 33038099 33138694 - NM_000404 177 | chr19 5690271 5691678 + NM_033643 178 | chr14 39735501 39820397 + NM_203356 179 | chr11 62760295 62783317 - NM_001184736 180 | chr16 20775311 20808479 + NM_005622 181 | chr10 38238794 38265453 - NM_145011 182 | chr11 5528529 5531153 - NM_017481 183 | chrX 16862774 16888534 - NM_002893 184 | chr2 68592321 68624585 + NM_002664 185 | chr12 58218391 58218475 - NR_029847 186 | chr6 97010423 97064512 + NM_001170807 187 | chr22 46731297 46753237 + NM_001282782 188 | chr2 242053049 242088919 - NM_001252124 189 | chr5 1317999 1345002 - NM_030782 190 | chr3 10327433 10334631 - NR_024132 191 | chrX 75392763 75398145 + NM_001300888 192 | chr5 137225124 137276156 + NM_001258449 193 | chr21 31933416 31933608 - NM_181614 194 | chr3 183165395 183173800 + NR_038302 195 | chr2 88056717 88125286 - NM_001078170 196 | chr2 114648174 114719129 + NM_001277140 197 | chrX 120101740 120105058 - NM_173571 198 | chr7 99775365 99812010 + NM_001282718 199 | chr13 99906966 99910682 - NM_005292 200 | chr5 70196495 70203942 + NM_001178087 201 | chr2 232373136 232379050 - NR_024079 202 | -------------------------------------------------------------------------------- /inst/extdata/my.special.genes.txt: -------------------------------------------------------------------------------- 1 | chr start end V6 V4 2 | chr6 30539169 30559309 + NM_001025091 3 | chr6 28129538 28137373 + NR_103448 4 | chr5 137673223 137685418 + NM_001135647 5 | chr5 136310986 136835018 - NM_004598 6 | chr10 118430702 118502085 - NM_025015 7 | chr6 30457182 30461982 + NM_005516 8 | chr17 66863427 66951533 - NM_001288986 9 | chr19 10812111 10824043 + NM_031209 10 | chr12 32943679 33049780 - NM_001005242 11 | chr20 62496580 62522898 + NM_003288 12 | chr6 32121775 32136062 + NR_037861 13 | chr3 50643884 50649262 - NM_145071 14 | chr20 10733737 10734675 + NR_110611 15 | chr9 134378288 134399193 + NM_007171 16 | chr7 20686965 20796637 + NM_178559 17 | chr19 21987750 22034870 - NM_001256648 18 | chr3 43120720 43147575 - NM_032806 19 | chr2 188328957 188419219 - NM_006287 20 | chr11 89057521 89224146 - NM_001291927 21 | chr12 54104902 54121307 - NM_001143682 22 | chr7 29238655 29248586 - NR_120522 23 | chr2 26915580 26954066 + NM_002246 24 | chr10 17428934 17450285 + NR_034129 25 | chr16 57728712 57765367 + NM_032269 26 | chr6 55299170 55444012 - NR_109869 27 | chr18 22208145 22242162 + NR_040033 28 | chr20 23355155 23402156 - NM_001283026 29 | chr12 76738265 76742222 - NM_024685 30 | chrX 153029650 153044801 + NM_005393 31 | chr1 10707262 10856733 - NM_017766 32 | chr5 68485374 68506184 + NM_022909 33 | chr9 124030379 124095120 + NM_001127662 34 | chr10 135207620 135234174 + NM_138384 35 | chr4 108968700 109090112 - NM_001130713 36 | chrY 27768263 27770485 + NM_170723 37 | chr11 2017988 2018061 - NR_030533 38 | chr8 49984899 49988642 + NM_001256598 39 | chr3 119147806 119182529 - NR_073506 40 | chr19 58570606 58581110 + NM_001164529 41 | chr4 113970784 114304896 + NM_020977 42 | chr1 160454819 160493052 - NM_001184716 43 | chr15 89055713 89089912 - NR_026645 44 | chr12 31824070 31882108 - NR_103522 45 | chr12 56555635 56583351 - NM_139067 46 | chr6 35310334 35395968 + NM_001171820 47 | chr7 154002296 154686000 + NM_001936 48 | chr5 132332676 132362275 - NR_125337 49 | chr9 95820969 95847418 + NM_001287007 50 | chr7 107204401 107218968 + NR_073003 51 | chr16 89894906 89937727 + NM_032451 52 | chr5 37291734 37371228 - NM_004298 53 | chr12 50721094 50790405 - NM_001145475 54 | chr10 18240767 18332221 + NM_001282734 55 | chr1 109656584 109749403 + NM_020775 56 | chr6 10762955 10838788 - NM_001242957 57 | chr2 113973573 114036498 - NM_013953 58 | chr10 74870132 74891586 + NM_001283017 59 | chr7 130626518 130794675 - NR_109855 60 | chr11 33563876 33695646 + NM_012194 61 | chr10 104404251 104418076 + NM_030912 62 | chr15 80135888 80189339 - NR_037654 63 | chr1 247273461 247275719 - NM_207401 64 | chr12 15260715 15374411 - NM_032918 65 | chr8 104383742 104395232 + NM_138455 66 | chr22 51007289 51021428 - NR_027928 67 | chr12 94853778 94856344 + NR_027035 68 | chr19 10596795 10614054 - NM_203500 69 | chr15 77223961 77242601 + NM_002902 70 | chr4 119771842 119982402 + NM_001286754 71 | chr9 101978706 101984246 - NR_024532 72 | chr5 159343739 159400017 + NM_000679 73 | chr15 102358389 102359328 + NM_001001674 74 | chr10 43867091 43871783 + NM_173160 75 | chr1 16767166 16786584 + NM_001145277 76 | chr19 40885177 40896094 - NM_144685 77 | chr14 101292444 101327360 + NR_046466 78 | chrX 21392535 21672813 + NM_014927 79 | chr9 139981378 140003639 + NR_045720 80 | chr8 54138275 54164194 - NM_000912 81 | chr16 61554 64090 + NR_045117 82 | chr2 155555092 155714864 + NM_002239 83 | chr17 76227390 76237068 + NM_001204211 84 | chr11 2421717 2425108 + NM_001297660 85 | chrX 118108576 118152318 + NR_110311 86 | chr1 11796141 11810828 + NM_001040197 87 | chr15 66839805 66857835 - NM_207338 88 | chr19 16472388 16582823 - NR_047666 89 | chr7 73624086 73644164 + NM_032463 90 | chr18 54318615 54697036 + NM_015285 91 | chr19 50168398 50177173 + NM_138639 92 | chr19 53115617 53141644 - NM_001105552 93 | chr5 134303595 134347397 + NM_178019 94 | chr9 116077930 116102620 - NM_001012361 95 | chr6 26087508 26095469 + NM_139010 96 | chr16 29985187 29999726 + NM_016151 97 | chrX 22180848 22191100 - NR_046639 98 | chr12 6456008 6486523 - NM_001159575 99 | chr7 44178462 44180916 - NM_021223 100 | chr1 852197 855072 - NR_122045 101 | chr4 9345873 9347466 + NM_001242327 102 | chr17 26205339 26220409 - NM_001076680 103 | chr1 115397423 115537990 + NM_001282542 104 | chr1 20686293 20755287 - NR_033887 105 | chr17 56422535 56429599 - NM_003168 106 | chr22 48670175 48670227 + NR_036172 107 | chr22 19023794 19109967 - NM_001173533 108 | chr2 37311593 37326387 + NM_174931 109 | chr22 48934711 48943199 - NR_038917 110 | chr3 168801286 169381563 - NM_004991 111 | chr17 6926368 6932961 + NM_181844 112 | chr17 5082830 5095178 - NM_032530 113 | chr1 209941832 209953002 + NM_001287754 114 | chr1 212781969 212794119 + NM_001040619 115 | chr1 154193324 154235981 + NM_001287815 116 | chr3 51741080 51752625 + NM_000839 117 | chr1 31769828 31837800 + NM_001282567 118 | chr22 39410264 39414825 + NM_014508 119 | chr17 38904272 38911584 - NM_181534 120 | chr1 152658598 152659876 + NM_014357 121 | chr2 27587218 27593324 - NM_001034116 122 | chr3 52017299 52023218 + NM_001198895 123 | chr3 39509063 39570988 + NR_003090 124 | chr17 65821779 65980494 + NM_182641 125 | chr17 40724327 40729849 - NM_001256016 126 | chr1 182350838 182361341 - NM_001033044 127 | chr3 120315127 120321258 + NM_004547 128 | chr3 131100624 131107674 + NR_033268 129 | chr2 27848505 27851745 - NM_001142683 130 | chr2 10184371 10194963 + NM_001177718 131 | chr1 206516199 206581301 + NM_001271872 132 | chr3 188280024 188286454 - NR_046623 133 | chr17 34842470 34855154 + NR_104011 134 | chr3 49758908 49761407 - NM_013334 135 | chr1 110753335 110776674 + NR_036437 136 | chr1 24865763 24882515 - NR_109781 137 | chr22 31518908 31530683 + NM_001284286 138 | chr3 194815316 194816786 + NR_104188 139 | chr3 27757439 27764206 - NM_001278182 140 | chr1 26644410 26647014 + NM_001803 141 | chr1 152551859 152552980 - NM_032563 142 | chr1 27938800 27952751 - NM_001042747 143 | chr9 36214438 36277053 - NM_001190388 144 | chr17 28521336 28562986 - NM_001045 145 | chr17 34415602 34417506 - NM_002983 146 | chr3 14989090 15090786 + NM_003298 147 | chr3 129800673 129817233 + NM_001136152 148 | chr1 152004981 152009511 - NM_005620 149 | chr7 16127151 16460947 - NM_001101426 150 | chr22 28315363 28320951 + NR_026962 151 | chr1 202860229 202896371 - NM_021633 152 | chr17 39868577 39872221 + NM_000805 153 | chr22 39708886 39715670 - NM_000967 154 | chr22 24823529 24838328 + NM_001278499 155 | chr7 33169151 33645680 + NM_198428 156 | chr2 20883773 21022890 - NM_001282720 157 | chr1 145470507 145475647 + NM_001039888 158 | chr22 21396680 21398538 - NR_002829 159 | chr1 222763261 222765975 + NR_110613 160 | chr3 166958076 167098071 - NM_001199202 161 | chr22 25714223 25716193 + NR_029395 162 | chr1 28879528 28905057 + NM_017846 163 | chr3 171318194 171528284 - NM_002662 164 | chr17 34231223 34238027 - NR_126038 165 | chr1 197473878 197744623 - NR_125340 166 | chr1 168148082 168161633 + NM_001031800 167 | chr3 62247493 62304622 - NR_038283 168 | chr3 52558384 52567793 - NM_001134231 169 | chr22 38226861 38240353 - NM_138797 170 | chr1 175291934 175712752 - NM_003285 171 | chr17 37913967 38020441 - NM_001284515 172 | chr1 28585962 28609002 + NM_031459 173 | chr1 147954634 147955419 - NM_001143883 174 | chr3 154797952 154901518 + NM_007288 175 | chr1 211831598 211848972 - NM_001204182 176 | chr22 51061181 51066601 - NM_001085425 177 | chr9 21994789 22121093 + NR_003529 178 | chr17 43973148 43976164 + NR_024560 179 | chr1 178062863 178448648 + NM_170692 180 | chr1 111982511 111991830 - NM_024102 181 | chr7 73082173 73086440 + NM_001077621 182 | chr22 51205919 51222087 - NM_001130919 183 | chr7 73442118 73484236 + NM_001278915 184 | chr3 150690464 150797617 + NR_024066 185 | chr2 26403584 26412532 + NM_001191033 186 | chr1 150336586 150449041 + NM_001297673 187 | chr1 13409127 13414502 + NM_001039361 188 | chr17 46668618 46671103 - NM_002147 189 | chr2 47129008 47143007 - NM_139279 190 | chr3 48663155 48672926 - NM_001281732 191 | chr3 189507448 189615068 + NM_001114980 192 | chr22 37600276 37606729 - NM_001278687 193 | chr7 72476618 72520245 + NR_027776 194 | chr1 175913961 176176380 - NM_001286644 195 | chr7 44084238 44101315 + NM_001014436 196 | chr1 208195587 208417665 - NM_025179 197 | chr2 68361224 68384692 - NM_001256476 198 | chr3 52847005 52864717 - NM_001166449 199 | chr1 213165523 213189217 - NR_125333 200 | chr1 173768687 173793777 - NM_001127181 201 | chr3 167727653 167813417 - NM_014498 202 | --------------------------------------------------------------------------------