├── .svn ├── format ├── wc.db-journal ├── entries ├── wc.db └── pristine │ ├── 18 │ └── 18d19e0f6727f50e5ab77d34454fab81e82d049a.svn-base │ ├── 21 │ └── 2189e6a627b4c711e766c184f50bb9cdc230e821.svn-base │ ├── 33 │ └── 331922c1408ddd46de74575d157bb2d47ccfa3e9.svn-base │ ├── 40 │ └── 40904d832591d5491cf5d562b1708a3f20d517f6.svn-base │ ├── 50 │ └── 50170fd723a599624e474c619511a6ac10f2d072.svn-base │ ├── 65 │ ├── 65e9e8d47fee95ed321413de21d8f8d13f873b17.svn-base │ └── 6537ab78a07d200e9fe894802b6b5f17bdd38fd9.svn-base │ ├── 73 │ └── 73eb11efc6635fdbac1cdc8ec4032d0dd1a2cbe6.svn-base │ ├── 81 │ └── 81fa7ab77bea17ab59d67e253f098ea223b2e1d3.svn-base │ ├── 91 │ └── 9145858a39a39a3d59b0aa20ef1971ab302d2f47.svn-base │ ├── ff │ └── ff35c68c1ccb291931f7f7bc302993d96557fe78.svn-base │ ├── fc │ └── fc49a2303f1b357ae6a059042d8d0c15fabb68fe.svn-base │ ├── e0 │ └── e07c222eeebfc149377f972722b7aa62b4fc86cc.svn-base │ ├── f1 │ └── f1748b5922e3eb9abfa76932622f609be9ff4d0c.svn-base │ ├── 06 │ └── 065e68d6b11e4bf90da04ffd904757e8ce3c422b.svn-base │ ├── a1 │ └── a129614aff000a6de02c214a739f8867a6f01752.svn-base │ └── 4e │ └── 4e9ec76b932b7ba44f5280dec6263ea963e53920.svn-base ├── .gitignore ├── tests ├── testthat.R └── testthat │ ├── test_AnnotationFilterList.R │ ├── test_translate-utils.R │ └── test_AnnotationFilter.R ├── NOTES.md ├── README.md ├── NEWS ├── R ├── AllGenerics.R ├── translate-utils.R ├── AnnotationFilterList.R └── AnnotationFilter.R ├── man ├── GenenameFilter.Rd ├── AnnotationFilterList.Rd └── AnnotationFilter.Rd ├── DESCRIPTION ├── NAMESPACE └── vignettes └── AnnotationFilter.Rmd /.svn/format: -------------------------------------------------------------------------------- 1 | 12 2 | -------------------------------------------------------------------------------- /.svn/wc.db-journal: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.svn/entries: -------------------------------------------------------------------------------- 1 | 12 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rhistory 2 | .RData 3 | .svn* 4 | -------------------------------------------------------------------------------- /.svn/wc.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/AnnotationFilter/devel/.svn/wc.db -------------------------------------------------------------------------------- /.svn/pristine/ff/ff35c68c1ccb291931f7f7bc302993d96557fe78.svn-base: -------------------------------------------------------------------------------- 1 | .Rhistory 2 | .RData 3 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(AnnotationFilter) 3 | 4 | test_check("AnnotationFilter") 5 | -------------------------------------------------------------------------------- /.svn/pristine/73/73eb11efc6635fdbac1cdc8ec4032d0dd1a2cbe6.svn-base: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(AnnotationFilter) 3 | 4 | test_check("AnnotationFilter") 5 | -------------------------------------------------------------------------------- /NOTES.md: -------------------------------------------------------------------------------- 1 | # Development guidelines 2 | 3 | - roxygen2 documentation 4 | - testthat unit tests 5 | - file name correspondence between code `R/foo.R`, tests 6 | `tests/testthat/test_foo.R`, and documentation `man/foo.Rd`. 7 | - version bump on master commit 8 | - commits to master pass R CMD build && R CMD check 9 | -------------------------------------------------------------------------------- /.svn/pristine/50/50170fd723a599624e474c619511a6ac10f2d072.svn-base: -------------------------------------------------------------------------------- 1 | # Development guidelines 2 | 3 | - roxygen2 documentation 4 | - testthat unit tests 5 | - file name correspondence between code `R/foo.R`, tests 6 | `tests/testthat/test_foo.R`, and documentation `man/foo.Rd`. 7 | - version bump on master commit 8 | - commits to master pass R CMD build && R CMD check 9 | -------------------------------------------------------------------------------- /.svn/pristine/fc/fc49a2303f1b357ae6a059042d8d0c15fabb68fe.svn-base: -------------------------------------------------------------------------------- 1 | CHANGES IN VERSION 1.1.2 2 | ------------------------ 3 | 4 | NEW FEATURES 5 | 6 | o supportFilters returns a data.frame with filter class name and field. 7 | 8 | 9 | CHANGES IN VERSION 0.99.5 10 | -------------------------- 11 | 12 | NEW FEATURES 13 | 14 | o Add convertFilterExpressionQuoted function. 15 | o Add field method. 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [](https://bioconductor.org/) 2 | 3 | **AnnotationFilter** is an R/Bioconductor package that provides facilities for filtering Bioconductor annotation resources. 4 | 5 | See https://bioconductor.org/packages/AnnotationFilter for more information including how to install the release version of the package (please refrain from installing directly from GitHub). 6 | 7 | -------------------------------------------------------------------------------- /.svn/pristine/91/9145858a39a39a3d59b0aa20ef1971ab302d2f47.svn-base: -------------------------------------------------------------------------------- 1 | ## Generic methods. 2 | setGeneric("condition", function(object, ...) standardGeneric("condition")) 3 | 4 | setGeneric("field", function(object, ...) standardGeneric("field")) 5 | 6 | setGeneric("value", function(object, ...) standardGeneric("value")) 7 | 8 | setGeneric("logicOp", function(object, ...) standardGeneric("logicOp")) 9 | 10 | setGeneric("supportedFilters", function(object, ...) 11 | standardGeneric("supportedFilters")) 12 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | CHANGES IN VERSION 1.5.2 2 | ------------------------ 3 | 4 | USER VISIBLE CHANGES 5 | 6 | o Rename GenenameFilter into GeneNameFilter and deprecate GenenameFilter 7 | (issue #22). 8 | 9 | 10 | CHANGES IN VERSION 1.3.1 11 | ------------------------ 12 | 13 | NEW FEATURES 14 | 15 | o Add DoubleFilter 16 | 17 | 18 | CHANGES IN VERSION 1.1.2 19 | ------------------------ 20 | 21 | NEW FEATURES 22 | 23 | o supportFilters returns a data.frame with filter class name and field. 24 | 25 | 26 | CHANGES IN VERSION 0.99.5 27 | -------------------------- 28 | 29 | NEW FEATURES 30 | 31 | o Add convertFilterExpressionQuoted function. 32 | o Add field method. 33 | -------------------------------------------------------------------------------- /R/AllGenerics.R: -------------------------------------------------------------------------------- 1 | ## Generic methods. 2 | setGeneric("condition", function(object, ...) standardGeneric("condition")) 3 | 4 | setGeneric("field", function(object, ...) standardGeneric("field")) 5 | 6 | setGeneric("value", function(object, ...) standardGeneric("value")) 7 | 8 | setGeneric("logicOp", function(object, ...) standardGeneric("logicOp")) 9 | 10 | setGeneric("not", function(object, ...) standardGeneric("not")) 11 | 12 | setGeneric("simplify", function(object, ...) standardGeneric("simplify")) 13 | 14 | setGeneric("convertFilter", function(object, db, ...) 15 | standardGeneric("convertFilter")) 16 | 17 | setGeneric("distributeNegation", function(object, ...) 18 | standardGeneric("distributeNegation")) 19 | 20 | setGeneric("supportedFilters", function(object, ...) 21 | standardGeneric("supportedFilters")) 22 | -------------------------------------------------------------------------------- /man/GenenameFilter.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AnnotationFilter.R 3 | \name{GenenameFilter} 4 | \alias{GenenameFilter} 5 | \alias{GenenameFilter-class} 6 | \title{DEPRECATED Gene name filter} 7 | \usage{ 8 | GenenameFilter(value, condition = "==", not = FALSE) 9 | } 10 | \arguments{ 11 | \item{value}{\code{character()} value for the filter} 12 | 13 | \item{condition}{\code{character(1)} defining the condition to be 14 | used in the filter. One of \code{"=="}, \code{"!="}, \code{"startsWith"}, \code{"endsWith"} 15 | or \code{"contains"}. Default condition is \code{"=="}.} 16 | 17 | \item{not}{\code{logical(1)} whether the \code{AnnotationFilter} is negated. 18 | \code{TRUE} indicates is negated (!). \code{FALSE} indicates not 19 | negated. Default not is \code{FALSE}.} 20 | } 21 | \value{ 22 | The constructor function return a \code{GenenameFilter}. 23 | } 24 | \description{ 25 | The \code{GenenameFilter} class and functions are deprecated. Please use the 26 | \code{\link[=GeneNameFilter]{GeneNameFilter()}} instead. 27 | } 28 | -------------------------------------------------------------------------------- /.svn/pristine/40/40904d832591d5491cf5d562b1708a3f20d517f6.svn-base: -------------------------------------------------------------------------------- 1 | Package: AnnotationFilter 2 | Title: Facilities for Filtering Bioconductor Annotation Resources 3 | Version: 0.99.8 4 | Authors@R: c( person("Martin", "Morgan", email = 5 | "martin.morgan@roswellpark.org", role = "aut"), 6 | person("Johannes", "Rainer", email = 7 | "johannes.rainer@eurac.edu", role = "aut"), 8 | person("Bioconductor", "Maintainer", 9 | email="maintainer@bioconductor.org", role = "cre")) 10 | URL: https://github.com/Bioconductor/AnnotationFilter 11 | BugReports: https://github.com/Bioconductor/AnnotationFilter/issues 12 | Description: This package provides class and other infrastructure to 13 | implement filters for manipulating Bioconductor annotation 14 | resources. The filters will be used by ensembldb, 15 | Organism.dplyr, and other packages. 16 | Depends: R (>= 3.4.0) 17 | Imports: utils, methods, GenomicRanges, lazyeval 18 | Suggests: BiocStyle, knitr, testthat, RSQLite, org.Hs.eg.db 19 | VignetteBuilder: knitr 20 | License: Artistic-2.0 21 | biocViews: Annotation, Infrastructure, Software 22 | Encoding: UTF-8 23 | LazyData: true 24 | RoxygenNote: 6.0.1 25 | Collate: 'AllGenerics.R' 'AnnotationFilter.R' 'AnnotationFilterList.R' 26 | 'translate-utils.R' 27 | -------------------------------------------------------------------------------- /.svn/pristine/e0/e07c222eeebfc149377f972722b7aa62b4fc86cc.svn-base: -------------------------------------------------------------------------------- 1 | Package: AnnotationFilter 2 | Title: Facilities for Filtering Bioconductor Annotation Resources 3 | Version: 1.1.3 4 | Authors@R: c( 5 | person("Martin", "Morgan", email = "martin.morgan@roswellpark.org", 6 | role = "aut"), 7 | person("Johannes", "Rainer", email = "johannes.rainer@eurac.edu", 8 | role = "aut"), 9 | person("Joachim", "Bargsten", email = "jw@bargsten.org", role = "ctb"), 10 | person("Bioconductor", "Maintainer", email="maintainer@bioconductor.org", 11 | role = "cre")) 12 | URL: https://github.com/Bioconductor/AnnotationFilter 13 | BugReports: https://github.com/Bioconductor/AnnotationFilter/issues 14 | Description: This package provides class and other infrastructure to 15 | implement filters for manipulating Bioconductor annotation 16 | resources. The filters will be used by ensembldb, Organism.dplyr, 17 | and other packages. 18 | Depends: 19 | R (>= 3.4.0) 20 | Imports: 21 | utils, 22 | methods, 23 | GenomicRanges, 24 | lazyeval 25 | Suggests: 26 | BiocStyle, 27 | knitr, 28 | testthat, 29 | RSQLite, 30 | org.Hs.eg.db 31 | VignetteBuilder: knitr 32 | License: Artistic-2.0 33 | biocViews: Annotation, Infrastructure, Software 34 | Encoding: UTF-8 35 | LazyData: true 36 | RoxygenNote: 6.0.1 37 | Collate: 38 | 'AllGenerics.R' 39 | 'AnnotationFilter.R' 40 | 'AnnotationFilterList.R' 41 | 'translate-utils.R' 42 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: AnnotationFilter 2 | Title: Facilities for Filtering Bioconductor Annotation Resources 3 | Version: 1.35.0 4 | Authors@R: c( 5 | person("Martin", "Morgan", email = "martin.morgan@roswellpark.org", 6 | role = "aut"), 7 | person("Johannes", "Rainer", email = "johannes.rainer@eurac.edu", 8 | role = "aut"), 9 | person("Joachim", "Bargsten", email = "jw@bargsten.org", role = "ctb"), 10 | person("Daniel", "Van Twisk", email = "daniel.vantwisk@roswellpark.org", 11 | role = "ctb"), 12 | person("Bioconductor Package", "Maintainer", 13 | email="maintainer@bioconductor.org", 14 | role = "cre")) 15 | URL: https://github.com/Bioconductor/AnnotationFilter 16 | BugReports: https://github.com/Bioconductor/AnnotationFilter/issues 17 | Description: This package provides class and other infrastructure to 18 | implement filters for manipulating Bioconductor annotation 19 | resources. The filters will be used by ensembldb, Organism.dplyr, 20 | and other packages. 21 | Depends: 22 | R (>= 3.4.0) 23 | Imports: 24 | utils, 25 | methods, 26 | GenomicRanges, 27 | lazyeval 28 | Suggests: 29 | BiocStyle, 30 | knitr, 31 | testthat, 32 | RSQLite, 33 | org.Hs.eg.db, 34 | rmarkdown 35 | VignetteBuilder: knitr 36 | License: Artistic-2.0 37 | biocViews: Annotation, Infrastructure, Software 38 | Encoding: UTF-8 39 | LazyData: true 40 | RoxygenNote: 6.0.1 41 | Collate: 42 | 'AllGenerics.R' 43 | 'AnnotationFilter.R' 44 | 'AnnotationFilterList.R' 45 | 'translate-utils.R' 46 | -------------------------------------------------------------------------------- /.svn/pristine/18/18d19e0f6727f50e5ab77d34454fab81e82d049a.svn-base: -------------------------------------------------------------------------------- 1 | context("AnnotationFilterList") 2 | 3 | test_that("AnnotationFilterList() works", { 4 | f1 <- GeneIdFilter("somegene") 5 | f2 <- SeqNameFilter("chr3") 6 | f3 <- GeneBiotypeFilter("protein_coding", "!=") 7 | 8 | fL <- AnnotationFilter:::AnnotationFilterList(f1, f2) 9 | expect_true(length(fL) == 2) 10 | expect_equal(fL[[1]], f1) 11 | expect_equal(fL[[2]], f2) 12 | expect_true(all(logicOp(fL) == "&")) 13 | 14 | fL <- AnnotationFilter:::AnnotationFilterList(f1, f2, f3, 15 | logicOp = c("&", "|")) 16 | expect_true(length(fL) == 3) 17 | expect_equal(fL[[1]], f1) 18 | expect_equal(fL[[2]], f2) 19 | expect_equal(fL[[3]], f3) 20 | expect_equal(logicOp(fL), c("&", "|")) 21 | 22 | ## A AnnotationFilterList with and AnnotationFilterList 23 | fL <- AnnotationFilter:::AnnotationFilterList(f1, f2, logicOp = "|") 24 | fL2 <- AnnotationFilter:::AnnotationFilterList(f3, fL, logicOp = "&") 25 | expect_true(length(fL) == 2) 26 | expect_true(length(fL2) == 2) 27 | expect_true(is(value(fL2)[[1]], "GeneBiotypeFilter")) 28 | expect_true(is(value(fL2)[[2]], "AnnotationFilterList")) 29 | expect_equal(value(fL2)[[2]], fL) 30 | expect_equal(fL2[[2]], fL) 31 | expect_equal(logicOp(fL2), "&") 32 | expect_equal(logicOp(fL2[[2]]), "|") 33 | }) 34 | 35 | test_that("empty elements in AnnotationFilterList", { 36 | ## empty elements should be removed from the AnnotationFilterList. 37 | empty_afl <- AnnotationFilterList() 38 | afl <- AnnotationFilterList(empty_afl) 39 | expect_true(length(afl) == 0) 40 | afl <- AnnotationFilterList(GeneIdFilter(4), empty_afl) 41 | expect_true(length(afl) == 1) 42 | afl <- AnnotationFilterList(GeneIdFilter(4), 43 | AnnotationFilter(~ gene_id == 3 | seq_name == 4), 44 | empty_afl) 45 | expect_true(length(afl) == 2) 46 | ## Check validate. 47 | afl@.Data <- c(afl@.Data, list(empty_afl)) 48 | ## Fix also the logOp. 49 | afl@logOp <- c(afl@logOp, "|") 50 | expect_error(validObject(afl)) 51 | }) 52 | -------------------------------------------------------------------------------- /.svn/pristine/f1/f1748b5922e3eb9abfa76932622f609be9ff4d0c.svn-base: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(AnnotationFilter) 4 | export(AnnotationFilterList) 5 | export(CdsEndFilter) 6 | export(CdsStartFilter) 7 | export(EntrezFilter) 8 | export(ExonEndFilter) 9 | export(ExonIdFilter) 10 | export(ExonNameFilter) 11 | export(ExonRankFilter) 12 | export(ExonStartFilter) 13 | export(GRangesFilter) 14 | export(GeneBiotypeFilter) 15 | export(GeneEndFilter) 16 | export(GeneIdFilter) 17 | export(GeneStartFilter) 18 | export(GenenameFilter) 19 | export(ProteinIdFilter) 20 | export(SeqNameFilter) 21 | export(SeqStrandFilter) 22 | export(SymbolFilter) 23 | export(TxBiotypeFilter) 24 | export(TxEndFilter) 25 | export(TxIdFilter) 26 | export(TxNameFilter) 27 | export(TxStartFilter) 28 | export(UniprotFilter) 29 | export(feature) 30 | export(logicOp) 31 | exportClasses(AnnotationFilter) 32 | exportClasses(AnnotationFilterList) 33 | exportClasses(CdsEndFilter) 34 | exportClasses(CdsStartFilter) 35 | exportClasses(CharacterFilter) 36 | exportClasses(EntrezFilter) 37 | exportClasses(ExonEndFilter) 38 | exportClasses(ExonIdFilter) 39 | exportClasses(ExonNameFilter) 40 | exportClasses(ExonRankFilter) 41 | exportClasses(ExonStartFilter) 42 | exportClasses(GRangesFilter) 43 | exportClasses(GeneBiotypeFilter) 44 | exportClasses(GeneEndFilter) 45 | exportClasses(GeneIdFilter) 46 | exportClasses(GeneStartFilter) 47 | exportClasses(GenenameFilter) 48 | exportClasses(IntegerFilter) 49 | exportClasses(ProteinIdFilter) 50 | exportClasses(SeqNameFilter) 51 | exportClasses(SeqStrandFilter) 52 | exportClasses(SymbolFilter) 53 | exportClasses(TxBiotypeFilter) 54 | exportClasses(TxEndFilter) 55 | exportClasses(TxIdFilter) 56 | exportClasses(TxNameFilter) 57 | exportClasses(TxStartFilter) 58 | exportClasses(UniprotFilter) 59 | exportMethods(condition) 60 | exportMethods(field) 61 | exportMethods(show) 62 | exportMethods(supportedFilters) 63 | exportMethods(value) 64 | importClassesFrom(GenomicRanges,GRanges) 65 | importFrom(GenomicRanges,GRanges) 66 | importFrom(GenomicRanges,show) 67 | importFrom(lazyeval,f_eval) 68 | importFrom(methods,callNextMethod) 69 | importFrom(methods,initialize) 70 | importFrom(methods,is) 71 | importFrom(methods,new) 72 | importFrom(methods,show) 73 | importFrom(methods,validObject) 74 | importFrom(utils,tail) 75 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(AnnotationFilter) 4 | export(AnnotationFilterList) 5 | export(CdsEndFilter) 6 | export(CdsStartFilter) 7 | export(EntrezFilter) 8 | export(ExonEndFilter) 9 | export(ExonIdFilter) 10 | export(ExonNameFilter) 11 | export(ExonRankFilter) 12 | export(ExonStartFilter) 13 | export(GRangesFilter) 14 | export(GeneBiotypeFilter) 15 | export(GeneEndFilter) 16 | export(GeneIdFilter) 17 | export(GeneNameFilter) 18 | export(GeneStartFilter) 19 | export(GenenameFilter) 20 | export(ProteinIdFilter) 21 | export(SeqNameFilter) 22 | export(SeqStrandFilter) 23 | export(SymbolFilter) 24 | export(TxBiotypeFilter) 25 | export(TxEndFilter) 26 | export(TxIdFilter) 27 | export(TxNameFilter) 28 | export(TxStartFilter) 29 | export(UniprotFilter) 30 | export(feature) 31 | export(logicOp) 32 | export(not) 33 | exportClasses(AnnotationFilter) 34 | exportClasses(AnnotationFilterList) 35 | exportClasses(CdsEndFilter) 36 | exportClasses(CdsStartFilter) 37 | exportClasses(CharacterFilter) 38 | exportClasses(DoubleFilter) 39 | exportClasses(EntrezFilter) 40 | exportClasses(ExonEndFilter) 41 | exportClasses(ExonIdFilter) 42 | exportClasses(ExonNameFilter) 43 | exportClasses(ExonRankFilter) 44 | exportClasses(ExonStartFilter) 45 | exportClasses(GRangesFilter) 46 | exportClasses(GeneBiotypeFilter) 47 | exportClasses(GeneEndFilter) 48 | exportClasses(GeneIdFilter) 49 | exportClasses(GeneNameFilter) 50 | exportClasses(GeneStartFilter) 51 | exportClasses(GenenameFilter) 52 | exportClasses(IntegerFilter) 53 | exportClasses(ProteinIdFilter) 54 | exportClasses(SeqNameFilter) 55 | exportClasses(SeqStrandFilter) 56 | exportClasses(SymbolFilter) 57 | exportClasses(TxBiotypeFilter) 58 | exportClasses(TxEndFilter) 59 | exportClasses(TxIdFilter) 60 | exportClasses(TxNameFilter) 61 | exportClasses(TxStartFilter) 62 | exportClasses(UniprotFilter) 63 | exportMethods(condition) 64 | exportMethods(convertFilter) 65 | exportMethods(distributeNegation) 66 | exportMethods(field) 67 | exportMethods(not) 68 | exportMethods(show) 69 | exportMethods(supportedFilters) 70 | exportMethods(value) 71 | importClassesFrom(GenomicRanges,GRanges) 72 | importFrom(GenomicRanges,GRanges) 73 | importFrom(GenomicRanges,show) 74 | importFrom(lazyeval,f_eval) 75 | importFrom(methods,callNextMethod) 76 | importFrom(methods,initialize) 77 | importFrom(methods,is) 78 | importFrom(methods,new) 79 | importFrom(methods,show) 80 | importFrom(methods,validObject) 81 | importFrom(utils,head) 82 | importFrom(utils,tail) 83 | -------------------------------------------------------------------------------- /.svn/pristine/65/65e9e8d47fee95ed321413de21d8f8d13f873b17.svn-base: -------------------------------------------------------------------------------- 1 | context("AnnotationFilter") 2 | 3 | test_that("supportedFilters() works", { 4 | expect_true(inherits(supportedFilters(), "data.frame")) 5 | expect_identical( 6 | nrow(supportedFilters()), 7 | length(unlist(AnnotationFilter:::.FIELD, use.names=FALSE)) + 8 | length(AnnotationFilter:::.FILTERS_WO_FIELD) 9 | ) 10 | }) 11 | 12 | test_that("SymbolFilter as representative for character filters", { 13 | expect_true(validObject(new("SymbolFilter"))) 14 | expect_error(SymbolFilter()) 15 | expect_error(SymbolFilter(1, ">")) 16 | expect_error(SymbolFilter(1, "foo")) 17 | expect_error(SymbolFilter(c("foo","bar"), "startsWith")) 18 | ## Getter / setter 19 | fl <- SymbolFilter("BCL2") 20 | expect_equal(value(fl), "BCL2") 21 | fl <- SymbolFilter(c(4, 5)) 22 | expect_equal(value(fl), c("4", "5")) 23 | fl <- SymbolFilter(3) 24 | expect_equal(value(fl), "3") 25 | expect_error(SymbolFilter(NA)) 26 | ## condition. 27 | expect_equal(condition(fl), "==") 28 | fl <- SymbolFilter("a", condition = "!=") 29 | expect_equal(condition(fl), "!=") 30 | expect_error(SymbolFilter("a", condition = "<")) 31 | expect_error(SymbolFilter("a", condition = "")) 32 | expect_error(SymbolFilter("a", condition = c("==", ">"))) 33 | expect_error(SymbolFilter("a", condition = NULL)) 34 | expect_error(SymbolFilter("a", condition = NA)) 35 | expect_error(SymbolFilter("a", condition = 4)) 36 | }) 37 | 38 | test_that("GeneStartFilter as representative for integer filters", { 39 | gsf <- GeneStartFilter(10000, condition = ">") 40 | expect_equal(condition(gsf), ">") 41 | expect_error(GeneStartFilter("3")) 42 | expect_error(GeneStartFilter("B")) 43 | expect_error(GeneStartFilter(NA)) 44 | expect_error(GeneStartFilter(NULL)) 45 | expect_error(GeneStartFilter()) 46 | ## Condition 47 | expect_error(GeneStartFilter(10000, condition = "startsWith")) 48 | expect_error(GeneStartFilter(10000, condition = "endsWith")) 49 | expect_error(GeneStartFilter(10000, condition = c("==", "<"))) 50 | }) 51 | 52 | test_that("GRangesFilter works", { 53 | GRanges <- GenomicRanges::GRanges 54 | grf <- GRangesFilter(GRanges("chr10:87869000-87876000")) 55 | expect_equal(condition(grf), "any") 56 | expect_error(GRangesFilter(value = 3)) 57 | expect_error(GRangesFilter( 58 | GRanges("chr10:87869000-87876000"), 59 | type = "==" 60 | )) 61 | grf <- GRangesFilter( 62 | GRanges("chr10:87869000-87876000"), 63 | type = "within", 64 | feature = "tx" 65 | ) 66 | expect_equal(condition(grf), "within") 67 | expect_equal(feature(grf), "tx") 68 | }) 69 | 70 | test_that("fieldToClass works", { 71 | expect_identical(AnnotationFilter:::.fieldToClass("gene_id"), 72 | "GeneIdFilter") 73 | ## Support replacement for multiple _ : issue #13 74 | expect_identical(AnnotationFilter:::.fieldToClass("gene_seq_start"), 75 | "GeneSeqStartFilter") 76 | }) 77 | -------------------------------------------------------------------------------- /tests/testthat/test_AnnotationFilterList.R: -------------------------------------------------------------------------------- 1 | context("AnnotationFilterList") 2 | 3 | test_that("AnnotationFilterList() works", { 4 | f1 <- GeneIdFilter("somegene") 5 | f2 <- SeqNameFilter("chr3") 6 | f3 <- GeneBiotypeFilter("protein_coding", "!=") 7 | 8 | fL <- AnnotationFilter:::AnnotationFilterList(f1, f2) 9 | expect_true(length(fL) == 2) 10 | expect_equal(fL[[1]], f1) 11 | expect_equal(fL[[2]], f2) 12 | expect_true(all(logicOp(fL) == "&")) 13 | 14 | fL <- AnnotationFilter:::AnnotationFilterList(f1, f2, f3, 15 | logicOp = c("&", "|")) 16 | expect_true(length(fL) == 3) 17 | expect_equal(fL[[1]], f1) 18 | expect_equal(fL[[2]], f2) 19 | expect_equal(fL[[3]], f3) 20 | expect_equal(logicOp(fL), c("&", "|")) 21 | 22 | ## A AnnotationFilterList with and AnnotationFilterList 23 | fL <- AnnotationFilter:::AnnotationFilterList(f1, f2, logicOp = "|") 24 | fL2 <- AnnotationFilter:::AnnotationFilterList(f3, fL, logicOp = "&") 25 | expect_true(length(fL) == 2) 26 | expect_true(length(fL2) == 2) 27 | expect_true(is(value(fL2)[[1]], "GeneBiotypeFilter")) 28 | expect_true(is(value(fL2)[[2]], "AnnotationFilterList")) 29 | expect_equal(value(fL2)[[2]], fL) 30 | expect_equal(fL2[[2]], fL) 31 | expect_equal(logicOp(fL2), "&") 32 | expect_equal(logicOp(fL2[[2]]), "|") 33 | }) 34 | 35 | test_that("empty elements in AnnotationFilterList", { 36 | ## empty elements should be removed from the AnnotationFilterList. 37 | empty_afl <- AnnotationFilterList() 38 | afl <- AnnotationFilterList(empty_afl) 39 | expect_true(length(afl) == 0) 40 | afl <- AnnotationFilterList(GeneIdFilter(4), empty_afl) 41 | expect_true(length(afl) == 1) 42 | afl <- AnnotationFilterList(GeneIdFilter(4), 43 | AnnotationFilter(~ gene_id == 3 | seq_name == 4),empty_afl) 44 | expect_true(length(afl) == 2) 45 | ## Check validate. 46 | afl@.Data <- c(afl@.Data, list(empty_afl)) 47 | ## Fix also the logOp. 48 | afl@logOp <- c(afl@logOp, "|") 49 | expect_error(validObject(afl)) 50 | }) 51 | 52 | test_that("convertFilter works", { 53 | smbl <- SymbolFilter("ADA") 54 | txid <- TxIdFilter(1000) 55 | gr <- GRangesFilter(GenomicRanges::GRanges("chr15:25062333-25065121")) 56 | 57 | expect_identical(convertFilter(AnnotationFilter(~smbl | txid)), 58 | "symbol == 'ADA' | tx_id == '1000'") 59 | expect_identical(convertFilter(AnnotationFilter(~smbl & (smbl | txid))), 60 | "symbol == 'ADA' & (symbol == 'ADA' | tx_id == '1000')") 61 | expect_identical(convertFilter(AnnotationFilter(~smbl & !(smbl | txid))), 62 | "symbol == 'ADA' & !(symbol == 'ADA' | tx_id == '1000')") 63 | expect_error(convertFilter(AnnotationFilter(smbl | (txid & gr)))) 64 | 65 | }) 66 | 67 | test_that("distributeNegation works", { 68 | afl <- AnnotationFilter(~!(symbol == 'ADA' | symbol %startsWith% 'SNORD')) 69 | afl2 <- AnnotationFilter(~!symbol == 'ADA' & !symbol %startsWith% 'SNORD') 70 | expect_identical(distributeNegation(afl), afl2) 71 | }) 72 | -------------------------------------------------------------------------------- /.svn/pristine/33/331922c1408ddd46de74575d157bb2d47ccfa3e9.svn-base: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AnnotationFilterList.R 3 | \docType{methods} 4 | \name{AnnotationFilterList} 5 | \alias{AnnotationFilterList} 6 | \alias{AnnotationFilterList-class} 7 | \alias{AnnotationFilterList} 8 | \alias{value,AnnotationFilterList-method} 9 | \alias{logicOp,AnnotationFilterList-method} 10 | \alias{logicOp} 11 | \alias{show,AnnotationFilterList-method} 12 | \title{Combining annotation filters} 13 | \usage{ 14 | AnnotationFilterList(..., logicOp = character(), logOp = character()) 15 | 16 | \S4method{value}{AnnotationFilterList}(object) 17 | 18 | \S4method{logicOp}{AnnotationFilterList}(object) 19 | 20 | \S4method{show}{AnnotationFilterList}(object) 21 | } 22 | \arguments{ 23 | \item{...}{individual \code{\link{AnnotationFilter}} objects or a 24 | mixture of \code{AnnotationFilter} and 25 | \code{AnnotationFilterList} objects.} 26 | 27 | \item{logicOp}{\code{character} of length equal to the number 28 | of submitted \code{AnnotationFilter} objects - 1. Each value 29 | representing the logical operation to combine consecutive 30 | filters, i.e. the first element being the logical operation to 31 | combine the first and second \code{AnnotationFilter}, the 32 | second element being the logical operation to combine the 33 | second and third \code{AnnotationFilter} and so on. Allowed 34 | values are \code{"&"} and \code{"|"}. The function assumes a 35 | logical \emph{and} between all elements by default.} 36 | 37 | \item{logOp}{Deprecated; use \code{logicOp=}.} 38 | 39 | \item{object}{An object of class \code{AnnotationFilterList}.} 40 | } 41 | \value{ 42 | \code{AnnotationFilterList} returns an \code{AnnotationFilterList}. 43 | 44 | \code{value()} returns a \code{list} with \code{AnnotationFilter} 45 | objects. 46 | 47 | \code{logicOp()} returns a \code{character()} vector of 48 | \dQuote{&} or \dQuote{|} symbols. 49 | } 50 | \description{ 51 | The \code{AnnotationFilterList} allows to combine 52 | filter objects extending the \code{\link{AnnotationFilter}} 53 | class to construct more complex queries. Consecutive filter 54 | objects in the \code{AnnotationFilterList} can be combined by a 55 | logical \emph{and} (\code{&}) or \emph{or} (\code{|}). The 56 | \code{AnnotationFilterList} extends \code{list}, individual 57 | elements can thus be accessed with \code{[[}. 58 | 59 | \code{value()} get a \code{list} with the 60 | \code{AnnotationFilter} objects. Use \code{[[} to access 61 | individual filters. 62 | 63 | \code{logicOp()} gets the logical operators separating 64 | successive \code{AnnotationFilter}. 65 | } 66 | \note{ 67 | The \code{AnnotationFilterList} does not support containing empty 68 | elements, hence all elements of \code{length == 0} are removed in 69 | the constructor function. 70 | } 71 | \examples{ 72 | ## Create some AnnotationFilters 73 | gf <- GenenameFilter(c("BCL2", "BCL2L11")) 74 | tbtf <- TxBiotypeFilter("protein_coding", condition = "!=") 75 | 76 | ## Combine both to an AnnotationFilterList. By default elements are combined 77 | ## using a logical "and" operator. The filter list represents thus a query 78 | ## like: get all features where the gene name is either ("BCL2" or "BCL2L11") 79 | ## and the transcript biotype is not "protein_coding". 80 | afl <- AnnotationFilterList(gf, tbtf) 81 | afl 82 | 83 | ## Access individual filters. 84 | afl[[1]] 85 | 86 | ## Create a filter in the form of: get all features where the gene name is 87 | ## either ("BCL2" or "BCL2L11") and the transcript biotype is not 88 | ## "protein_coding" or the seq_name is "Y". Hence, this will get all feature 89 | ## also found by the previous AnnotationFilterList and returns also all 90 | ## features on chromosome Y. 91 | afl <- AnnotationFilterList(gf, tbtf, SeqNameFilter("Y"), 92 | logicOp = c("&", "|")) 93 | afl 94 | 95 | } 96 | \seealso{ 97 | \code{\link{supportedFilters}} for available 98 | \code{\link{AnnotationFilter}} objects 99 | } 100 | -------------------------------------------------------------------------------- /.svn/pristine/06/065e68d6b11e4bf90da04ffd904757e8ce3c422b.svn-base: -------------------------------------------------------------------------------- 1 | #' @include AnnotationFilter.R 2 | 3 | ## Functionality to translate a query condition to an AnnotationFilter. 4 | 5 | #' Adapted from GenomicDataCommons. 6 | #' 7 | #' @importFrom methods is validObject initialize 8 | #' 9 | #' @noRd 10 | .binary_op <- function(sep) { 11 | force(sep) 12 | function(e1, e2) { 13 | ## First create the class. Throws an error if not possible i.e. no 14 | ## class for the field available. 15 | field <- as.character(substitute(e1)) 16 | class <- .fieldToClass(field) 17 | filter <- tryCatch({ 18 | new(class, condition = sep, field = field) 19 | }, error = function(e) { 20 | stop("No AnnotationFilter class '", class, "' for field '", 21 | field, "' defined") 22 | }) 23 | ## Fill with values. 24 | force(e2) 25 | if (is(filter, "CharacterFilter")) { 26 | e2 <- as.character(e2) 27 | } else if (is(filter, "IntegerFilter")) { 28 | e2 <- as.integer(e2) 29 | } 30 | initialize(filter, value = e2) 31 | } 32 | } 33 | 34 | #' Combine filters into a AnnotationFilterList combbined with \code{sep} 35 | #' 36 | #' @noRd 37 | .combine_op <- function(sep) { 38 | force(sep) 39 | function(e1, e2) { 40 | ## Avoid implicit nesting of AnnotationFilterList - should be done 41 | ## eventually 42 | if (is(e1, "AnnotationFilterList")) { 43 | sep <- c(logicOp(e1), sep) 44 | e1 <- .aflvalue(e1) 45 | } else 46 | e1 <- list(e1) 47 | if (is(e2, "AnnotationFilterList")) { 48 | sep <- c(logicOp(e2), sep) 49 | e2 <- .aflvalue(e2) 50 | } else 51 | e2 <- list(e2) 52 | ## Don't use the constructor here. 53 | new("AnnotationFilterList", c(e1, e2), logOp = sep) 54 | } 55 | } 56 | 57 | #' The \code{.LOG_OP_REG} is a \code{list} providing functions for 58 | #' common logical operations to translate expressions into AnnotationFilter 59 | #' objects. 60 | #' 61 | #' @noRd 62 | .LOG_OP_REG <- list() 63 | ## Assign conditions. 64 | .LOG_OP_REG$`==` <- .binary_op("==") 65 | .LOG_OP_REG$`%in%` <- .binary_op("==") 66 | .LOG_OP_REG$`!=` <- .binary_op("!=") 67 | .LOG_OP_REG$`>` <- .binary_op(">") 68 | .LOG_OP_REG$`<` <- .binary_op("<") 69 | .LOG_OP_REG$`>=` <- .binary_op(">=") 70 | .LOG_OP_REG$`<=` <- .binary_op("<=") 71 | ## combine filters 72 | .LOG_OP_REG$`&` <- .combine_op("&") 73 | .LOG_OP_REG$`|` <- .combine_op("|") 74 | 75 | #' @rdname AnnotationFilter 76 | #' 77 | #' @description \code{AnnotationFilter} \emph{translates} a filter 78 | #' expression such as \code{~ gene_id == "BCL2"} into a filter object 79 | #' extending the \code{\link{AnnotationFilter}} class (in the example a 80 | #' \code{\link{GeneIdFilter}} object) or an 81 | #' \code{\link{AnnotationFilterList}} if the expression contains multiple 82 | #' conditions (see examples below). Filter expressions have to be written 83 | #' in the form \code{~ }, with \code{} 84 | #' being the default field of the filter class (use the 85 | #' \code{supportedFilter} function to list all fields and filter classes), 86 | #' \code{} the logical expression and \code{} the value 87 | #' for the filter. 88 | #' 89 | #' @details Filter expressions for the \code{AnnotationFilter} class have to be 90 | #' written as formulas, i.e. starting with a \code{~}. 91 | #' 92 | #' @note Translation of nested filter expressions using the 93 | #' \code{AnnotationFilter} function is not yet supported. 94 | #' 95 | #' @param expr A filter expression, written as a \code{formula}, to be 96 | #' converted to an \code{AnnotationFilter} or \code{AnnotationFilterList} 97 | #' class. See below for examples. 98 | #' 99 | #' @return \code{AnnotationFilter} returns an 100 | #' \code{\link{AnnotationFilter}} or an \code{\link{AnnotationFilterList}}. 101 | #' 102 | #' @importFrom lazyeval f_eval 103 | #' 104 | #' @examples 105 | #' 106 | #' ## Convert a filter expression based on a gene ID to a GeneIdFilter 107 | #' gnf <- AnnotationFilter(~ gene_id == "BCL2") 108 | #' gnf 109 | #' 110 | #' ## Same conversion but for two gene IDs. 111 | #' gnf <- AnnotationFilter(~ gene_id %in% c("BCL2", "BCL2L11")) 112 | #' gnf 113 | #' 114 | #' ## Converting an expression that combines multiple filters. As a result we 115 | #' ## get an AnnotationFilterList containing the corresponding filters. 116 | #' ## Be aware that nesting of expressions/filters does not work. 117 | #' flt <- AnnotationFilter(~ gene_id %in% c("BCL2", "BCL2L11") & 118 | #' tx_biotype == "nonsense_mediated_decay" | 119 | #' seq_name == "Y") 120 | #' flt 121 | #' 122 | #' @export 123 | AnnotationFilter <- function(expr) { 124 | f_eval(expr, data = .LOG_OP_REG) 125 | } 126 | -------------------------------------------------------------------------------- /tests/testthat/test_translate-utils.R: -------------------------------------------------------------------------------- 1 | context("expression translation") 2 | 3 | test_that("translation of expression works for single filter/condition", { 4 | ## Check for some character filter. 5 | ## exon_id 6 | flt <- ExonIdFilter("EX1", condition = "==") 7 | flt2 <- AnnotationFilter(~ exon_id == "EX1") 8 | expect_equal(flt, flt2) 9 | flt <- ExonIdFilter(c("EX1", "EX2"), condition = "!=") 10 | flt2 <- AnnotationFilter(~ exon_id != c("EX1", "EX2")) 11 | expect_equal(flt, flt2) 12 | ## seq_name 13 | flt <- SeqNameFilter(c("chr3", "chrX"), condition = "==") 14 | flt2 <- AnnotationFilter(~ seq_name == c("chr3", "chrX")) 15 | expect_equal(flt, flt2) 16 | flt <- SeqNameFilter(1:3, condition = "==") 17 | flt2 <- AnnotationFilter(~ seq_name %in% 1:3) 18 | expect_equal(flt, flt2) 19 | ## Check IntegerFilter 20 | flt <- GeneStartFilter(123, condition = ">") 21 | flt2 <- AnnotationFilter(~ gene_start > 123) 22 | expect_equal(flt, flt2) 23 | flt <- TxStartFilter(123, condition = "<") 24 | flt2 <- AnnotationFilter(~ tx_start < 123) 25 | expect_equal(flt, flt2) 26 | flt <- GeneEndFilter(123, condition = ">=") 27 | flt2 <- AnnotationFilter(~ gene_end >= 123) 28 | expect_equal(flt, flt2) 29 | flt <- ExonEndFilter(123, condition = "<=") 30 | flt2 <- AnnotationFilter(~ exon_end <= 123) 31 | expect_equal(flt, flt2) 32 | ## Test exceptions/errors. 33 | expect_error(AnnotationFilter(~ not_existing == 1:3)) 34 | ## Throws an error, but is not self-explanatory. 35 | expect_error(AnnotationFilter(~ gene_id * 3)) 36 | }) 37 | 38 | test_that("translation of combined expressions works", { 39 | res <- AnnotationFilter(~ exon_id == "EX1" & genename == "BCL2") 40 | cmp <- AnnotationFilterList(ExonIdFilter("EX1"), GenenameFilter("BCL2")) 41 | expect_equal(res, cmp) 42 | res <- AnnotationFilter(~ exon_id == "EX1" | genename != "BCL2") 43 | cmp <- AnnotationFilterList(ExonIdFilter("EX1"), 44 | GenenameFilter("BCL2", "!="), logicOp = "|") 45 | expect_equal(res, cmp) 46 | ## 3 filters. 47 | res <- AnnotationFilter(~ exon_id == "EX1" & genename == "BCL2" | 48 | seq_name != 3) 49 | ## Expect an AnnotationFilterList of length 3. 50 | expect_equal(length(res), 3) 51 | cmp <- AnnotationFilterList(ExonIdFilter("EX1"), GenenameFilter("BCL2"), 52 | SeqNameFilter(3, "!="), logicOp = c("&", "|")) 53 | expect_equal(res, cmp) 54 | ## 4 filters. 55 | res <- AnnotationFilter(~ exon_id == "EX1" & genename == "BCL2" | 56 | seq_name != 3 | seq_name == "Y") 57 | expect_equal(length(res), 4) 58 | cmp <- AnnotationFilterList(ExonIdFilter("EX1"), GenenameFilter("BCL2"), 59 | SeqNameFilter(3, "!="), SeqNameFilter("Y"), 60 | logicOp = c("&", "|", "|")) 61 | expect_equal(res, cmp) 62 | }) 63 | 64 | test_that("translation works from within other functions", { 65 | simpleFun <- function(x) 66 | AnnotationFilter(x) 67 | expect_equal(simpleFun(~ gene_id == 4), AnnotationFilter(~ gene_id == 4)) 68 | filter_expr <- ~ gene_id == 4 69 | expect_equal(simpleFun(filter_expr), 70 | AnnotationFilter(~ gene_id == 4)) 71 | }) 72 | 73 | ## This might be a test if we get the nesting working. 74 | ## test_that("translation of nested expressions works" { 75 | ## res <- convertFilterExpression((exon_id == "EX1" & gene_id == "BCL2") | 76 | ## (exon_id == "EX3" & gene_id == "BCL2L11")) 77 | ## expect_equal(logicOp(res), "|") 78 | ## expect_true(is(res[[1]], "AnnotationFilterList")) 79 | ## expect_equal(res[[1]][[1]], ExonIdFilter("EX1")) 80 | ## expect_equal(res[[1]][[2]], GeneIdFilter("BCL2")) 81 | ## expect_equal(logicOp(res[[1]]), "&") 82 | ## expect_true(is(res[[2]], "AnnotationFilterList")) 83 | ## expect_equal(res[[2]][[1]], ExonIdFilter("EX3")) 84 | ## expect_equal(res[[2]][[2]], GeneIdFilter("BCL2L11")) 85 | ## expect_equal(logicOp(res[[2]]), "&") 86 | ## ## 87 | ## res <- convertFilterExpression(seq_name == "Y" | 88 | ## (exon_id == "EX1" & gene_id == "BCL2") & 89 | ## (exon_id == "EX3" & gene_id == "BCL2L11")) 90 | ## ## Expect: length 3, first being a SeqNameFilter, second an 91 | ## ## AnnotationFilterList, third a AnnotationFilterList. 92 | ## expect_equal(res[[1]], SeqNameFilter("Y")) 93 | ## expect_equal(logicOp(res), "|") 94 | ## expect_true(is(res[[2]], "AnnotationFilterList")) 95 | ## expect_equal(res[[1]][[1]], ExonIdFilter("EX1")) 96 | ## expect_equal(res[[1]][[2]], GeneIdFilter("BCL2")) 97 | ## expect_equal(logicOp(res[[1]]), "&") 98 | ## expect_true(is(res[[2]], "AnnotationFilterList")) 99 | ## expect_equal(res[[2]][[1]], ExonIdFilter("EX3")) 100 | ## expect_equal(res[[2]][[2]], GeneIdFilter("BCL2L11")) 101 | ## expect_equal(logicOp(res[[2]]), "&") 102 | 103 | ## expect_true(is(res[[1]], "AnnotationFilterList")) 104 | ## expect_true(is(res[[2]], "AnnotationFilterList")) 105 | 106 | ## convertFilterExpression((gene_id == 3) () 107 | ## }) 108 | 109 | -------------------------------------------------------------------------------- /.svn/pristine/a1/a129614aff000a6de02c214a739f8867a6f01752.svn-base: -------------------------------------------------------------------------------- 1 | context("expression translation") 2 | 3 | test_that("translation of expression works for single filter/condition", { 4 | ## Check for some character filter. 5 | ## exon_id 6 | flt <- ExonIdFilter("EX1", condition = "==") 7 | flt2 <- AnnotationFilter(~ exon_id == "EX1") 8 | expect_equal(flt, flt2) 9 | flt <- ExonIdFilter(c("EX1", "EX2"), condition = "!=") 10 | flt2 <- AnnotationFilter(~ exon_id != c("EX1", "EX2")) 11 | expect_equal(flt, flt2) 12 | ## seq_name 13 | flt <- SeqNameFilter(c("chr3", "chrX"), condition = "==") 14 | flt2 <- AnnotationFilter(~ seq_name == c("chr3", "chrX")) 15 | expect_equal(flt, flt2) 16 | flt <- SeqNameFilter(1:3, condition = "==") 17 | flt2 <- AnnotationFilter(~ seq_name %in% 1:3) 18 | expect_equal(flt, flt2) 19 | ## Check IntegerFilter 20 | flt <- GeneStartFilter(123, condition = ">") 21 | flt2 <- AnnotationFilter(~ gene_start > 123) 22 | expect_equal(flt, flt2) 23 | flt <- TxStartFilter(123, condition = "<") 24 | flt2 <- AnnotationFilter(~ tx_start < 123) 25 | expect_equal(flt, flt2) 26 | flt <- GeneEndFilter(123, condition = ">=") 27 | flt2 <- AnnotationFilter(~ gene_end >= 123) 28 | expect_equal(flt, flt2) 29 | flt <- ExonEndFilter(123, condition = "<=") 30 | flt2 <- AnnotationFilter(~ exon_end <= 123) 31 | expect_equal(flt, flt2) 32 | ## Test exceptions/errors. 33 | expect_error(AnnotationFilter(~ not_existing == 1:3)) 34 | ## Throws an error, but is not self-explanatory. 35 | expect_error(AnnotationFilter(~ gene_id * 3)) 36 | }) 37 | 38 | test_that("translation of combined expressions works", { 39 | res <- AnnotationFilter(~ exon_id == "EX1" & genename == "BCL2") 40 | cmp <- AnnotationFilterList(ExonIdFilter("EX1"), GenenameFilter("BCL2")) 41 | expect_equal(res, cmp) 42 | res <- AnnotationFilter(~ exon_id == "EX1" | genename != "BCL2") 43 | cmp <- AnnotationFilterList(ExonIdFilter("EX1"), 44 | GenenameFilter("BCL2", "!="), logicOp = "|") 45 | expect_equal(res, cmp) 46 | ## 3 filters. 47 | res <- AnnotationFilter(~ exon_id == "EX1" & genename == "BCL2" | 48 | seq_name != 3) 49 | ## Expect an AnnotationFilterList of length 3. 50 | expect_equal(length(res), 3) 51 | cmp <- AnnotationFilterList(ExonIdFilter("EX1"), GenenameFilter("BCL2"), 52 | SeqNameFilter(3, "!="), logicOp = c("&", "|")) 53 | expect_equal(res, cmp) 54 | ## 4 filters. 55 | res <- AnnotationFilter(~ exon_id == "EX1" & genename == "BCL2" | 56 | seq_name != 3 | seq_name == "Y") 57 | expect_equal(length(res), 4) 58 | cmp <- AnnotationFilterList(ExonIdFilter("EX1"), GenenameFilter("BCL2"), 59 | SeqNameFilter(3, "!="), SeqNameFilter("Y"), 60 | logicOp = c("&", "|", "|")) 61 | expect_equal(res, cmp) 62 | }) 63 | 64 | test_that("translation works from within other functions", { 65 | simpleFun <- function(x) 66 | AnnotationFilter(x) 67 | expect_equal(simpleFun(~ gene_id == 4), AnnotationFilter(~ gene_id == 4)) 68 | filter_expr <- ~ gene_id == 4 69 | expect_equal(simpleFun(filter_expr), 70 | AnnotationFilter(~ gene_id == 4)) 71 | }) 72 | 73 | ## This might be a test if we get the nesting working. 74 | ## test_that("translation of nested expressions works" { 75 | ## res <- convertFilterExpression((exon_id == "EX1" & gene_id == "BCL2") | 76 | ## (exon_id == "EX3" & gene_id == "BCL2L11")) 77 | ## expect_equal(logicOp(res), "|") 78 | ## expect_true(is(res[[1]], "AnnotationFilterList")) 79 | ## expect_equal(res[[1]][[1]], ExonIdFilter("EX1")) 80 | ## expect_equal(res[[1]][[2]], GeneIdFilter("BCL2")) 81 | ## expect_equal(logicOp(res[[1]]), "&") 82 | ## expect_true(is(res[[2]], "AnnotationFilterList")) 83 | ## expect_equal(res[[2]][[1]], ExonIdFilter("EX3")) 84 | ## expect_equal(res[[2]][[2]], GeneIdFilter("BCL2L11")) 85 | ## expect_equal(logicOp(res[[2]]), "&") 86 | ## ## 87 | ## res <- convertFilterExpression(seq_name == "Y" | 88 | ## (exon_id == "EX1" & gene_id == "BCL2") & 89 | ## (exon_id == "EX3" & gene_id == "BCL2L11")) 90 | ## ## Expect: length 3, first being a SeqNameFilter, second an 91 | ## ## AnnotationFilterList, third a AnnotationFilterList. 92 | ## expect_equal(res[[1]], SeqNameFilter("Y")) 93 | ## expect_equal(logicOp(res), "|") 94 | ## expect_true(is(res[[2]], "AnnotationFilterList")) 95 | ## expect_equal(res[[1]][[1]], ExonIdFilter("EX1")) 96 | ## expect_equal(res[[1]][[2]], GeneIdFilter("BCL2")) 97 | ## expect_equal(logicOp(res[[1]]), "&") 98 | ## expect_true(is(res[[2]], "AnnotationFilterList")) 99 | ## expect_equal(res[[2]][[1]], ExonIdFilter("EX3")) 100 | ## expect_equal(res[[2]][[2]], GeneIdFilter("BCL2L11")) 101 | ## expect_equal(logicOp(res[[2]]), "&") 102 | 103 | ## expect_true(is(res[[1]], "AnnotationFilterList")) 104 | ## expect_true(is(res[[2]], "AnnotationFilterList")) 105 | 106 | ## convertFilterExpression((gene_id == 3) () 107 | ## }) 108 | 109 | -------------------------------------------------------------------------------- /tests/testthat/test_AnnotationFilter.R: -------------------------------------------------------------------------------- 1 | context("AnnotationFilter") 2 | 3 | test_that("supportedFilters() works", { 4 | expect_true(inherits(supportedFilters(), "data.frame")) 5 | expect_identical( 6 | nrow(supportedFilters()), 7 | length(unlist(AnnotationFilter:::.FIELD, use.names=FALSE)) + 8 | length(AnnotationFilter:::.FILTERS_WO_FIELD) 9 | ) 10 | }) 11 | 12 | test_that("SymbolFilter as representative for character filters", { 13 | expect_true(validObject(new("SymbolFilter"))) 14 | expect_error(SymbolFilter()) 15 | expect_error(SymbolFilter(1, ">")) 16 | expect_error(SymbolFilter(1, "foo")) 17 | expect_error(SymbolFilter(c("foo","bar"), "startsWith")) 18 | ## Getter / setter 19 | fl <- SymbolFilter("BCL2") 20 | expect_equal(value(fl), "BCL2") 21 | fl <- SymbolFilter(c(4, 5)) 22 | expect_equal(value(fl), c("4", "5")) 23 | fl <- SymbolFilter(3) 24 | expect_equal(value(fl), "3") 25 | expect_error(SymbolFilter(NA)) 26 | ## condition. 27 | expect_equal(condition(fl), "==") 28 | fl <- SymbolFilter("a", condition = "!=") 29 | expect_equal(condition(fl), "!=") 30 | expect_error(SymbolFilter("a", condition = "<")) 31 | expect_error(SymbolFilter("a", condition = "")) 32 | expect_error(SymbolFilter("a", condition = c("==", ">"))) 33 | expect_error(SymbolFilter("a", condition = NULL)) 34 | expect_error(SymbolFilter("a", condition = NA)) 35 | expect_error(SymbolFilter("a", condition = 4)) 36 | }) 37 | 38 | test_that("GeneStartFilter as representative for integer filters", { 39 | gsf <- GeneStartFilter(10000, condition = ">") 40 | expect_equal(condition(gsf), ">") 41 | expect_error(GeneStartFilter("3")) 42 | expect_error(GeneStartFilter("B")) 43 | expect_error(GeneStartFilter(NA)) 44 | expect_error(GeneStartFilter(NULL)) 45 | expect_error(GeneStartFilter()) 46 | ## Condition 47 | expect_error(GeneStartFilter(10000, condition = "startsWith")) 48 | expect_error(GeneStartFilter(10000, condition = "endsWith")) 49 | expect_error(GeneStartFilter(10000, condition = c("==", "<"))) 50 | }) 51 | 52 | test_that("GRangesFilter works", { 53 | GRanges <- GenomicRanges::GRanges 54 | grf <- GRangesFilter(GRanges("chr10:87869000-87876000")) 55 | expect_equal(condition(grf), "any") 56 | expect_error(GRangesFilter(value = 3)) 57 | expect_error(GRangesFilter( 58 | GRanges("chr10:87869000-87876000"), 59 | type = "==" 60 | )) 61 | grf <- GRangesFilter( 62 | GRanges("chr10:87869000-87876000"), 63 | type = "within", 64 | feature = "tx" 65 | ) 66 | expect_equal(condition(grf), "within") 67 | expect_equal(feature(grf), "tx") 68 | }) 69 | 70 | test_that("fieldToClass works", { 71 | expect_identical(AnnotationFilter:::.fieldToClass("gene_id"), 72 | "GeneIdFilter") 73 | ## Support replacement for multiple _ : issue #13 74 | expect_identical(AnnotationFilter:::.fieldToClass("gene_seq_start"), 75 | "GeneSeqStartFilter") 76 | }) 77 | 78 | test_that("convertFilter Works", { 79 | expect_identical(convertFilter(SymbolFilter("ADA")), "symbol == 'ADA'") 80 | expect_identical(convertFilter(SymbolFilter("ADA", "!=")), 81 | "symbol != 'ADA'") 82 | expect_identical(convertFilter(SymbolFilter("ADA", "startsWith")), 83 | "symbol %like% 'ADA%'") 84 | expect_identical(convertFilter(SymbolFilter("ADA", "endsWith")), 85 | "symbol %like% '%ADA'") 86 | expect_identical(convertFilter(SymbolFilter("ADA", "contains")), 87 | "symbol %like% 'ADA'") 88 | 89 | expect_identical(convertFilter(TxStartFilter(1000)), "tx_start == '1000'") 90 | expect_identical(convertFilter(TxStartFilter(1000, "!=")), 91 | "tx_start != '1000'") 92 | expect_identical(convertFilter(TxStartFilter(1000, ">")), "tx_start > 1000") 93 | expect_identical(convertFilter(TxStartFilter(1000, "<")), "tx_start < 1000") 94 | expect_identical(convertFilter(TxStartFilter(1000, ">=")), 95 | "tx_start >= 1000") 96 | expect_identical(convertFilter(TxStartFilter(1000, "<=")), 97 | "tx_start <= 1000") 98 | 99 | ## check NOT works 100 | 101 | expect_identical(convertFilter(SymbolFilter("ADA", not=TRUE)), 102 | "!symbol == 'ADA'") 103 | expect_identical(convertFilter(SymbolFilter("ADA", "!=", not=TRUE)), 104 | "!symbol != 'ADA'") 105 | expect_identical(convertFilter(SymbolFilter("ADA", "startsWith", not=TRUE)), 106 | "!symbol %like% 'ADA%'") 107 | expect_identical(convertFilter(SymbolFilter("ADA", "endsWith", not=TRUE)), 108 | "!symbol %like% '%ADA'") 109 | expect_identical(convertFilter(SymbolFilter("ADA", "contains", not=TRUE)), 110 | "!symbol %like% 'ADA'") 111 | 112 | expect_identical(convertFilter(TxStartFilter(1000, not=TRUE)), 113 | "!tx_start == '1000'") 114 | expect_identical(convertFilter(TxStartFilter(1000, "!=", not=TRUE)), 115 | "!tx_start != '1000'") 116 | expect_identical(convertFilter(TxStartFilter(1000, ">", not=TRUE)), 117 | "!tx_start > 1000") 118 | expect_identical(convertFilter(TxStartFilter(1000, "<", not=TRUE)), 119 | "!tx_start < 1000") 120 | expect_identical(convertFilter(TxStartFilter(1000, ">=", not=TRUE)), 121 | "!tx_start >= 1000") 122 | expect_identical(convertFilter(TxStartFilter(1000, "<=", not=TRUE)), 123 | "!tx_start <= 1000") 124 | }) 125 | -------------------------------------------------------------------------------- /man/AnnotationFilterList.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AnnotationFilterList.R 3 | \docType{methods} 4 | \name{AnnotationFilterList} 5 | \alias{AnnotationFilterList} 6 | \alias{AnnotationFilterList-class} 7 | \alias{AnnotationFilterList} 8 | \alias{value,AnnotationFilterList-method} 9 | \alias{logicOp,AnnotationFilterList-method} 10 | \alias{logicOp} 11 | \alias{not,AnnotationFilterList-method} 12 | \alias{not} 13 | \alias{distributeNegation,AnnotationFilterList-method} 14 | \alias{distributeNegation} 15 | \alias{convertFilter,AnnotationFilterList,missing-method} 16 | \alias{convertFilter} 17 | \alias{show,AnnotationFilterList-method} 18 | \title{Combining annotation filters} 19 | \usage{ 20 | AnnotationFilterList(..., logicOp = character(), logOp = character(), 21 | not = FALSE, .groupingFlag = FALSE) 22 | 23 | \S4method{value}{AnnotationFilterList}(object) 24 | 25 | \S4method{logicOp}{AnnotationFilterList}(object) 26 | 27 | \S4method{not}{AnnotationFilterList}(object) 28 | 29 | \S4method{distributeNegation}{AnnotationFilterList}(object, 30 | .prior_negation = FALSE) 31 | 32 | \S4method{convertFilter}{AnnotationFilterList,missing}(object) 33 | 34 | \S4method{show}{AnnotationFilterList}(object) 35 | } 36 | \arguments{ 37 | \item{...}{individual \code{\link{AnnotationFilter}} objects or a 38 | mixture of \code{AnnotationFilter} and 39 | \code{AnnotationFilterList} objects.} 40 | 41 | \item{logicOp}{\code{character} of length equal to the number 42 | of submitted \code{AnnotationFilter} objects - 1. Each value 43 | representing the logical operation to combine consecutive 44 | filters, i.e. the first element being the logical operation to 45 | combine the first and second \code{AnnotationFilter}, the 46 | second element being the logical operation to combine the 47 | second and third \code{AnnotationFilter} and so on. Allowed 48 | values are \code{"&"} and \code{"|"}. The function assumes a 49 | logical \emph{and} between all elements by default.} 50 | 51 | \item{logOp}{Deprecated; use \code{logicOp=}.} 52 | 53 | \item{not}{\code{logical} of length one. Indicates whether the grouping 54 | of \code{AnnotationFilters} are to be negated.} 55 | 56 | \item{.groupingFlag}{Flag desginated for internal use only.} 57 | 58 | \item{object}{An object of class \code{AnnotationFilterList}.} 59 | 60 | \item{.prior_negation}{\code{logical(1)} unused argument.} 61 | } 62 | \value{ 63 | \code{AnnotationFilterList} returns an \code{AnnotationFilterList}. 64 | 65 | \code{value()} returns a \code{list} with \code{AnnotationFilter} 66 | objects. 67 | 68 | \code{logicOp()} returns a \code{character()} vector of 69 | \dQuote{&} or \dQuote{|} symbols. 70 | 71 | \code{not()} returns a \code{character()} vector of 72 | \dQuote{&} or \dQuote{|} symbols. 73 | 74 | \code{AnnotationFilterList} object with DeMorgan's law applied to 75 | it such that it is equal to the original \code{AnnotationFilterList} 76 | object but all \code{!}'s are distributed out of the 77 | \code{AnnotationFilterList} object and to the nested 78 | \code{AnnotationFilter} objects. 79 | 80 | \code{character(1)} that can be used as input to a \code{dplyr} 81 | filter. 82 | } 83 | \description{ 84 | The \code{AnnotationFilterList} allows to combine 85 | filter objects extending the \code{\link{AnnotationFilter}} 86 | class to construct more complex queries. Consecutive filter 87 | objects in the \code{AnnotationFilterList} can be combined by a 88 | logical \emph{and} (\code{&}) or \emph{or} (\code{|}). The 89 | \code{AnnotationFilterList} extends \code{list}, individual 90 | elements can thus be accessed with \code{[[}. 91 | 92 | \code{value()} get a \code{list} with the 93 | \code{AnnotationFilter} objects. Use \code{[[} to access 94 | individual filters. 95 | 96 | \code{logicOp()} gets the logical operators separating 97 | successive \code{AnnotationFilter}. 98 | 99 | \code{not()} gets the logical operators separating 100 | successive \code{AnnotationFilter}. 101 | 102 | 103 | 104 | Converts an \code{AnnotationFilterList} object to a 105 | \code{character(1)} giving an equation that can be used as input to 106 | a \code{dplyr} filter. 107 | } 108 | \note{ 109 | The \code{AnnotationFilterList} does not support containing empty 110 | elements, hence all elements of \code{length == 0} are removed in 111 | the constructor function. 112 | } 113 | \examples{ 114 | ## Create some AnnotationFilters 115 | gf <- GeneNameFilter(c("BCL2", "BCL2L11")) 116 | tbtf <- TxBiotypeFilter("protein_coding", condition = "!=") 117 | 118 | ## Combine both to an AnnotationFilterList. By default elements are combined 119 | ## using a logical "and" operator. The filter list represents thus a query 120 | ## like: get all features where the gene name is either ("BCL2" or "BCL2L11") 121 | ## and the transcript biotype is not "protein_coding". 122 | afl <- AnnotationFilterList(gf, tbtf) 123 | afl 124 | 125 | ## Access individual filters. 126 | afl[[1]] 127 | 128 | ## Create a filter in the form of: get all features where the gene name is 129 | ## either ("BCL2" or "BCL2L11") and the transcript biotype is not 130 | ## "protein_coding" or the seq_name is "Y". Hence, this will get all feature 131 | ## also found by the previous AnnotationFilterList and returns also all 132 | ## features on chromosome Y. 133 | afl <- AnnotationFilterList(gf, tbtf, SeqNameFilter("Y"), 134 | logicOp = c("&", "|")) 135 | afl 136 | 137 | afl <- AnnotationFilter(~!(symbol == 'ADA' | symbol \%startsWith\% 'SNORD')) 138 | afl <- distributeNegation(afl) 139 | afl 140 | afl <- AnnotationFilter(~symbol=="ADA" & tx_start > "400000") 141 | result <- convertFilter(afl) 142 | result 143 | } 144 | \seealso{ 145 | \code{\link{supportedFilters}} for available 146 | \code{\link{AnnotationFilter}} objects 147 | } 148 | -------------------------------------------------------------------------------- /R/translate-utils.R: -------------------------------------------------------------------------------- 1 | #' @include AnnotationFilter.R 2 | 3 | ## Functionality to translate a query condition to an AnnotationFilter. 4 | 5 | #' Adapted from GenomicDataCommons. 6 | #' 7 | #' @importFrom methods is validObject initialize 8 | #' 9 | #' @noRd 10 | .binary_op <- function(sep) { 11 | force(sep) 12 | function(e1, e2) { 13 | ## First create the class. Throws an error if not possible i.e. no 14 | ## class for the field available. 15 | field <- as.character(substitute(e1)) 16 | class <- .fieldToClass(field) 17 | filter <- tryCatch({ 18 | new(class, condition = sep, field = field) 19 | }, error = function(e) { 20 | stop("No AnnotationFilter class '", class, "' for field '", 21 | field, "' defined") 22 | }) 23 | ## Fill with values. 24 | force(e2) 25 | if (is(filter, "CharacterFilter")) { 26 | e2 <- as.character(e2) 27 | } else if (is(filter, "IntegerFilter")) { 28 | e2 <- as.integer(e2) 29 | } 30 | initialize(filter, value = e2) 31 | } 32 | } 33 | 34 | #' Functionality to translate a unary operation into an AnnotationFilter. 35 | #' 36 | #' @noRd 37 | .not_op <- function(sep) { 38 | force(sep) 39 | function(x) { 40 | if(is(x, "AnnotationFilterList") || is(x, "AnnotationFilter")) { 41 | if(x@not) 42 | x@not <- FALSE 43 | else 44 | x@not <- TRUE 45 | if(is(x, "AnnotationFilterList")) 46 | x@.groupingFlag <- FALSE 47 | return(x) 48 | } 49 | # else if (is(x, "AnnotationFilter")) 50 | # AnnotationFilterList(x, logicOp=character(), not=TRUE) 51 | else 52 | stop('Arguments to "!" must be an AnnotationFilter or AnnotationFilerList.') 53 | } 54 | } 55 | 56 | .parenthesis_op <- function(sep) { 57 | force(sep) 58 | function(x) { 59 | if (is(x, "AnnotationFilterList")) { 60 | x@.groupingFlag <- FALSE 61 | x 62 | } 63 | else 64 | AnnotationFilterList(x, .groupingFlag=FALSE) 65 | } 66 | } 67 | 68 | 69 | #' Combine filters into a AnnotationFilterList combbined with \code{sep} 70 | #' 71 | #' @noRd 72 | .combine_op <- function(sep) { 73 | force(sep) 74 | function(e1, e2) { 75 | op1 <- character() 76 | op2 <- character() 77 | if (is(e1, "AnnotationFilterList") && e1@.groupingFlag) { 78 | op1 <- logicOp(e1) 79 | e1 <- .aflvalue(e1) 80 | } else { 81 | e1 <- list(e1) 82 | } 83 | if (is(e2, "AnnotationFilterList") && e2@.groupingFlag) { 84 | op2 <- logicOp(e2) 85 | e2 <- .aflvalue(e2) 86 | } else { 87 | e2 <- list(e2) 88 | } 89 | input <- c(e1, e2) 90 | input[['logicOp']] <- c(op1, sep, op2) 91 | input[['.groupingFlag']] <- TRUE 92 | do.call("AnnotationFilterList", input) 93 | } 94 | } 95 | 96 | #' The \code{.LOG_OP_REG} is a \code{list} providing functions for 97 | #' common logical operations to translate expressions into AnnotationFilter 98 | #' objects. 99 | #' 100 | #' @noRd 101 | .LOG_OP_REG <- list() 102 | ## Assign conditions. 103 | .LOG_OP_REG$`==` <- .binary_op("==") 104 | .LOG_OP_REG$`%in%` <- .binary_op("==") 105 | .LOG_OP_REG$`!=` <- .binary_op("!=") 106 | .LOG_OP_REG$`>` <- .binary_op(">") 107 | .LOG_OP_REG$`<` <- .binary_op("<") 108 | .LOG_OP_REG$`>=` <- .binary_op(">=") 109 | .LOG_OP_REG$`<=` <- .binary_op("<=") 110 | ## Custom binary operators 111 | .LOG_OP_REG$`%startsWith%` <- .binary_op("startsWith") 112 | .LOG_OP_REG$`%endsWith%` <- .binary_op("endsWith") 113 | .LOG_OP_REG$`%contains%` <- .binary_op("contains") 114 | ## not conditional. 115 | .LOG_OP_REG$`!` <- .not_op("!") 116 | ## parenthesis 117 | .LOG_OP_REG$`(` <- .parenthesis_op("(") 118 | ## combine filters 119 | .LOG_OP_REG$`&` <- .combine_op("&") 120 | .LOG_OP_REG$`|` <- .combine_op("|") 121 | 122 | `%startsWith%` <- function(e1, e2){} 123 | `%endsWith%` <- function(e1, e2){} 124 | `%contains%` <- function(e1, e2){} 125 | 126 | #' @rdname AnnotationFilter 127 | #' 128 | #' @description \code{AnnotationFilter} \emph{translates} a filter 129 | #' expression such as \code{~ gene_id == "BCL2"} into a filter object 130 | #' extending the \code{\link{AnnotationFilter}} class (in the example a 131 | #' \code{\link{GeneIdFilter}} object) or an 132 | #' \code{\link{AnnotationFilterList}} if the expression contains multiple 133 | #' conditions (see examples below). Filter expressions have to be written 134 | #' in the form \code{~ }, with \code{} 135 | #' being the default field of the filter class (use the 136 | #' \code{supportedFilter} function to list all fields and filter classes), 137 | #' \code{} the logical expression and \code{} the value 138 | #' for the filter. 139 | #' 140 | #' @details Filter expressions for the \code{AnnotationFilter} class have to be 141 | #' written as formulas, i.e. starting with a \code{~}. 142 | #' 143 | #' @note Translation of nested filter expressions using the 144 | #' \code{AnnotationFilter} function is not yet supported. 145 | #' 146 | #' @param expr A filter expression, written as a \code{formula}, to be 147 | #' converted to an \code{AnnotationFilter} or \code{AnnotationFilterList} 148 | #' class. See below for examples. 149 | #' 150 | #' @return \code{AnnotationFilter} returns an 151 | #' \code{\link{AnnotationFilter}} or an \code{\link{AnnotationFilterList}}. 152 | #' 153 | #' @importFrom lazyeval f_eval 154 | #' 155 | #' @examples 156 | #' 157 | #' ## Convert a filter expression based on a gene ID to a GeneIdFilter 158 | #' gnf <- AnnotationFilter(~ gene_id == "BCL2") 159 | #' gnf 160 | #' 161 | #' ## Same conversion but for two gene IDs. 162 | #' gnf <- AnnotationFilter(~ gene_id %in% c("BCL2", "BCL2L11")) 163 | #' gnf 164 | #' 165 | #' ## Converting an expression that combines multiple filters. As a result we 166 | #' ## get an AnnotationFilterList containing the corresponding filters. 167 | #' ## Be aware that nesting of expressions/filters does not work. 168 | #' flt <- AnnotationFilter(~ gene_id %in% c("BCL2", "BCL2L11") & 169 | #' tx_biotype == "nonsense_mediated_decay" | 170 | #' seq_name == "Y") 171 | #' flt 172 | #' 173 | #' @export 174 | AnnotationFilter <- function(expr) { 175 | res <- f_eval(expr, data = .LOG_OP_REG) 176 | if(is(res, "AnnotationFilterList")) res@.groupingFlag <- FALSE 177 | res 178 | } 179 | -------------------------------------------------------------------------------- /.svn/pristine/65/6537ab78a07d200e9fe894802b6b5f17bdd38fd9.svn-base: -------------------------------------------------------------------------------- 1 | #' @include AnnotationFilter.R 2 | 3 | #' @rdname AnnotationFilterList 4 | #' 5 | #' @name AnnotationFilterList 6 | #' 7 | #' @title Combining annotation filters 8 | #' 9 | #' @aliases AnnotationFilterList-class 10 | #' 11 | #' @description The \code{AnnotationFilterList} allows to combine 12 | #' filter objects extending the \code{\link{AnnotationFilter}} 13 | #' class to construct more complex queries. Consecutive filter 14 | #' objects in the \code{AnnotationFilterList} can be combined by a 15 | #' logical \emph{and} (\code{&}) or \emph{or} (\code{|}). The 16 | #' \code{AnnotationFilterList} extends \code{list}, individual 17 | #' elements can thus be accessed with \code{[[}. 18 | #' 19 | #' @note The \code{AnnotationFilterList} does not support containing empty 20 | #' elements, hence all elements of \code{length == 0} are removed in 21 | #' the constructor function. 22 | #' 23 | #' @exportClass AnnotationFilterList 24 | NULL 25 | 26 | .AnnotationFilterList <- setClass( 27 | "AnnotationFilterList", 28 | contains = "list", 29 | slots = c(logOp = "character") 30 | ) 31 | 32 | .LOG_OPS <- c("&", "|") 33 | 34 | setValidity("AnnotationFilterList", 35 | function(object) 36 | { 37 | txt <- character() 38 | filters <- .aflvalue(object) 39 | logOp <- .logOp(object) 40 | if (length(filters) == 0 && length(logOp)) { 41 | txt <- c( 42 | txt, "'logicOp' can not have length > 0 if the object is empty" 43 | ) 44 | } else if (length(filters) != 0) { 45 | ## Note: we allow length of filters being 1, but then logOp has 46 | ## to be empty. Check content: 47 | fun <- function(z) 48 | is(z, "AnnotationFilter") || is(z, "AnnotationFilterList") 49 | test <- vapply(filters, fun, logical(1)) 50 | if (!all(test)){ 51 | txt <- c( 52 | txt, "only 'AnnotationFilter' or 'AnnotationFilterList' allowed" 53 | ) 54 | } 55 | ## Check that all elements are non-empty (issue #17). Doing this 56 | ## separately from the check above to ensure we get a different error 57 | ## message. 58 | if (!all(lengths(filters) > 0)) 59 | txt <- c(txt, "Lengths of all elements have to be > 0") 60 | ## Check that logOp has length object -1 61 | if (length(logOp) != length(filters) - 1) 62 | txt <- c(txt, "length of 'logicOp' has to be length of the object -1") 63 | ## Check content of logOp. 64 | if (!all(logOp %in% .LOG_OPS)) 65 | txt <- c(txt, "'logicOp' can only contain '&' and '|'") 66 | } 67 | 68 | if (length(txt)) txt else TRUE 69 | }) 70 | 71 | ## AnnotationFilterList constructor function. 72 | #' @rdname AnnotationFilterList 73 | #' 74 | #' @name AnnotationFilterList 75 | #' 76 | #' @param ... individual \code{\link{AnnotationFilter}} objects or a 77 | #' mixture of \code{AnnotationFilter} and 78 | #' \code{AnnotationFilterList} objects. 79 | #' 80 | #' @param logicOp \code{character} of length equal to the number 81 | #' of submitted \code{AnnotationFilter} objects - 1. Each value 82 | #' representing the logical operation to combine consecutive 83 | #' filters, i.e. the first element being the logical operation to 84 | #' combine the first and second \code{AnnotationFilter}, the 85 | #' second element being the logical operation to combine the 86 | #' second and third \code{AnnotationFilter} and so on. Allowed 87 | #' values are \code{"&"} and \code{"|"}. The function assumes a 88 | #' logical \emph{and} between all elements by default. 89 | #' 90 | #' @param logOp Deprecated; use \code{logicOp=}. 91 | #' 92 | #' @seealso \code{\link{supportedFilters}} for available 93 | #' \code{\link{AnnotationFilter}} objects 94 | #' 95 | #' @return \code{AnnotationFilterList} returns an \code{AnnotationFilterList}. 96 | #' 97 | #' @examples 98 | #' ## Create some AnnotationFilters 99 | #' gf <- GenenameFilter(c("BCL2", "BCL2L11")) 100 | #' tbtf <- TxBiotypeFilter("protein_coding", condition = "!=") 101 | #' 102 | #' ## Combine both to an AnnotationFilterList. By default elements are combined 103 | #' ## using a logical "and" operator. The filter list represents thus a query 104 | #' ## like: get all features where the gene name is either ("BCL2" or "BCL2L11") 105 | #' ## and the transcript biotype is not "protein_coding". 106 | #' afl <- AnnotationFilterList(gf, tbtf) 107 | #' afl 108 | #' 109 | #' ## Access individual filters. 110 | #' afl[[1]] 111 | #' 112 | #' ## Create a filter in the form of: get all features where the gene name is 113 | #' ## either ("BCL2" or "BCL2L11") and the transcript biotype is not 114 | #' ## "protein_coding" or the seq_name is "Y". Hence, this will get all feature 115 | #' ## also found by the previous AnnotationFilterList and returns also all 116 | #' ## features on chromosome Y. 117 | #' afl <- AnnotationFilterList(gf, tbtf, SeqNameFilter("Y"), 118 | #' logicOp = c("&", "|")) 119 | #' afl 120 | #' 121 | #' @export 122 | AnnotationFilterList <- 123 | function(..., logicOp = character(), logOp = character()) 124 | { 125 | if (!missing(logOp) && missing(logicOp)) { 126 | logicOp <- logOp 127 | .Deprecated(msg = "'logOp' deprecated, use 'logicOp'") 128 | } 129 | filters <- list(...) 130 | ## Remove empty elements (issue #17) 131 | filters <- filters[lengths(filters) > 0] 132 | ## By default we're assuming & between elements. 133 | if (length(filters) > 1 & length(logicOp) == 0) 134 | logicOp <- rep("&", (length(filters) - 1)) 135 | .AnnotationFilterList(filters, logOp = logicOp) 136 | } 137 | 138 | .logOp <- function(object) object@logOp 139 | 140 | .aflvalue <- function(object) object@.Data 141 | 142 | #' @rdname AnnotationFilterList 143 | #' 144 | #' @description \code{value()} get a \code{list} with the 145 | #' \code{AnnotationFilter} objects. Use \code{[[} to access 146 | #' individual filters. 147 | #' 148 | #' @return \code{value()} returns a \code{list} with \code{AnnotationFilter} 149 | #' objects. 150 | #' 151 | #' @export 152 | setMethod("value", "AnnotationFilterList", .aflvalue) 153 | 154 | #' @rdname AnnotationFilterList 155 | #' 156 | #' @aliases logicOp 157 | #' 158 | #' @description \code{logicOp()} gets the logical operators separating 159 | #' successive \code{AnnotationFilter}. 160 | #' 161 | #' @return \code{logicOp()} returns a \code{character()} vector of 162 | #' \dQuote{&} or \dQuote{|} symbols. 163 | #' 164 | #' @export logicOp 165 | setMethod("logicOp", "AnnotationFilterList", .logOp) 166 | 167 | #' @rdname AnnotationFilterList 168 | #' 169 | #' @param object An object of class \code{AnnotationFilterList}. 170 | #' 171 | #' @importFrom utils tail 172 | #' @export 173 | setMethod("show", "AnnotationFilterList", 174 | function(object) 175 | { 176 | cat( 177 | "class: ", class(object), "\n", 178 | "length: ", length(object), "\n", 179 | sep = "" 180 | ) 181 | if (length(object)) { 182 | cat("filters:\n\n") 183 | show(object[[1]]) 184 | for (i in tail(seq_along(object), -1L)) { 185 | cat("\n", logicOp(object)[i - 1L], "\n\n") 186 | show(object[[i]]) 187 | } 188 | } 189 | }) 190 | 191 | -------------------------------------------------------------------------------- /.svn/pristine/81/81fa7ab77bea17ab59d67e253f098ea223b2e1d3.svn-base: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AnnotationFilter.R, R/translate-utils.R 3 | \docType{methods} 4 | \name{AnnotationFilter} 5 | \alias{AnnotationFilter} 6 | \alias{CdsStartFilter} 7 | \alias{CdsEndFilter} 8 | \alias{ExonIdFilter} 9 | \alias{ExonNameFilter} 10 | \alias{ExonStartFilter} 11 | \alias{ExonEndFilter} 12 | \alias{ExonRankFilter} 13 | \alias{GeneIdFilter} 14 | \alias{GenenameFilter} 15 | \alias{GeneBiotypeFilter} 16 | \alias{GeneStartFilter} 17 | \alias{GeneEndFilter} 18 | \alias{EntrezFilter} 19 | \alias{SymbolFilter} 20 | \alias{TxIdFilter} 21 | \alias{TxNameFilter} 22 | \alias{TxBiotypeFilter} 23 | \alias{TxStartFilter} 24 | \alias{TxEndFilter} 25 | \alias{ProteinIdFilter} 26 | \alias{UniprotFilter} 27 | \alias{SeqNameFilter} 28 | \alias{SeqStrandFilter} 29 | \alias{AnnotationFilter-class} 30 | \alias{CharacterFilter-class} 31 | \alias{IntegerFilter-class} 32 | \alias{CdsStartFilter-class} 33 | \alias{CdsEndFilter-class} 34 | \alias{ExonIdFilter-class} 35 | \alias{ExonNameFilter-class} 36 | \alias{ExonStartFilter-class} 37 | \alias{ExonEndFilter-class} 38 | \alias{ExonRankFilter-class} 39 | \alias{GeneIdFilter-class} 40 | \alias{GenenameFilter-class} 41 | \alias{GeneBiotypeFilter-class} 42 | \alias{GeneStartFilter-class} 43 | \alias{GeneEndFilter-class} 44 | \alias{EntrezFilter-class} 45 | \alias{SymbolFilter-class} 46 | \alias{TxIdFilter-class} 47 | \alias{TxNameFilter-class} 48 | \alias{TxBiotypeFilter-class} 49 | \alias{TxStartFilter-class} 50 | \alias{TxEndFilter-class} 51 | \alias{ProteinIdFilter-class} 52 | \alias{UniprotFilter-class} 53 | \alias{SeqNameFilter-class} 54 | \alias{SeqStrandFilter-class} 55 | \alias{supportedFilters} 56 | \alias{show,AnnotationFilter-method} 57 | \alias{show,CharacterFilter-method} 58 | \alias{show,IntegerFilter-method} 59 | \alias{show,GRangesFilter-method} 60 | \alias{condition,AnnotationFilter-method} 61 | \alias{condition} 62 | \alias{value,AnnotationFilter-method} 63 | \alias{value} 64 | \alias{field,AnnotationFilter-method} 65 | \alias{field} 66 | \alias{GRangesFilter-class} 67 | \alias{.GRangesFilter} 68 | \alias{GRangesFilter} 69 | \alias{feature} 70 | \alias{AnnotationFilter} 71 | \alias{supportedFilters,missing-method} 72 | \alias{AnnotationFilter} 73 | \title{Filters for annotation objects} 74 | \usage{ 75 | CdsStartFilter(value, condition = "==") 76 | CdsEndFilter(value, condition = "==") 77 | ExonIdFilter(value, condition = "==") 78 | ExonNameFilter(value, condition = "==") 79 | ExonRankFilter(value, condition = "==") 80 | ExonStartFilter(value, condition = "==") 81 | ExonEndFilter(value, condition = "==") 82 | GeneIdFilter(value, condition = "==") 83 | GenenameFilter(value, condition = "==") 84 | GeneBiotypeFilter(value, condition = "==") 85 | GeneStartFilter(value, condition = "==") 86 | GeneEndFilter(value, condition = "==") 87 | EntrezFilter(value, condition = "==") 88 | SymbolFilter(value, condition = "==") 89 | TxIdFilter(value, condition = "==") 90 | TxNameFilter(value, condition = "==") 91 | TxBiotypeFilter(value, condition = "==") 92 | TxStartFilter(value, condition = "==") 93 | TxEndFilter(value, condition = "==") 94 | ProteinIdFilter(value, condition = "==") 95 | UniprotFilter(value, condition = "==") 96 | SeqNameFilter(value, condition = "==") 97 | SeqStrandFilter(value, condition = "==") 98 | 99 | \S4method{condition}{AnnotationFilter}(object) 100 | 101 | \S4method{value}{AnnotationFilter}(object) 102 | 103 | \S4method{field}{AnnotationFilter}(object) 104 | 105 | GRangesFilter(value, feature = "gene", type = c("any", "start", "end", 106 | "within", "equal")) 107 | 108 | feature(object) 109 | 110 | \S4method{supportedFilters}{missing}(object) 111 | 112 | AnnotationFilter(expr) 113 | } 114 | \arguments{ 115 | \item{object}{An \code{AnnotationFilter} object.} 116 | 117 | \item{value}{\code{character()}, \code{integer()}, or 118 | \code{GRanges()} value for the filter} 119 | 120 | \item{feature}{\code{character(1)} defining on what feature the 121 | \code{GRangesFilter} should be applied. Choices could be 122 | \code{"gene"}, \code{"tx"} or \code{"exon"}.} 123 | 124 | \item{type}{\code{character(1)} indicating how overlaps are to be 125 | filtered. See \code{findOverlaps} in the IRanges package for a 126 | description of this argument.} 127 | 128 | \item{expr}{A filter expression, written as a \code{formula}, to be 129 | converted to an \code{AnnotationFilter} or \code{AnnotationFilterList} 130 | class. See below for examples.} 131 | 132 | \item{condition}{\code{character(1)} defining the condition to be 133 | used in the filter. For \code{IntegerFilter}, one of 134 | \code{"=="}, \code{"!="}, \code{">"}, \code{"<"}, \code{">="} 135 | or \code{"<="}. For \code{CharacterFilter}, one of \code{"=="}, 136 | \code{"!="}, \code{"startsWith"}, \code{"endsWith"} or \code{"contains"}. 137 | Default condition is \code{"=="}.} 138 | } 139 | \value{ 140 | The constructor function return an object extending 141 | \code{AnnotationFilter}. For the return value of the other methods see 142 | the methods' descriptions. 143 | 144 | \code{AnnotationFilter} returns an 145 | \code{\link{AnnotationFilter}} or an \code{\link{AnnotationFilterList}}. 146 | } 147 | \description{ 148 | The filters extending the base \code{AnnotationFilter} class 149 | represent a simple filtering concept for annotation resources. 150 | Each filter object is thought to filter on a single (database) 151 | table column using the provided values and the defined condition. 152 | 153 | Filter instances created using the constructor functions (e.g. 154 | \code{GeneIdFilter}). 155 | 156 | \code{supportedFilters()} lists all defined filters. It returns a two column 157 | \code{data.frame} with the filter class name and its default field. 158 | Packages using \code{AnnotationFilter} should implement the 159 | \code{supportedFilters} for their annotation resource object (e.g. for 160 | \code{object = "EnsDb"} in the \code{ensembldb} package) to list all 161 | supported filters for the specific resource. 162 | 163 | \code{condition()} get the \code{condition} value for 164 | the filter \code{object}. 165 | 166 | \code{value()} get the \code{value} for the filter 167 | \code{object}. 168 | 169 | \code{field()} get the \code{field} for the filter 170 | \code{object}. 171 | 172 | \code{feature()} get the \code{feature} for the 173 | \code{GRangesFilter} \code{object}. 174 | 175 | \code{AnnotationFilter} \emph{translates} a filter 176 | expression such as \code{~ gene_id == "BCL2"} into a filter object 177 | extending the \code{\link{AnnotationFilter}} class (in the example a 178 | \code{\link{GeneIdFilter}} object) or an 179 | \code{\link{AnnotationFilterList}} if the expression contains multiple 180 | conditions (see examples below). Filter expressions have to be written 181 | in the form \code{~ }, with \code{} 182 | being the default field of the filter class (use the 183 | \code{supportedFilter} function to list all fields and filter classes), 184 | \code{} the logical expression and \code{} the value 185 | for the filter. 186 | } 187 | \details{ 188 | By default filters are only available for tables containing the 189 | field on which the filter acts (i.e. that contain a column with the 190 | name matching the value of the \code{field} slot of the 191 | object). See the vignette for a description to use filters for 192 | databases in which the database table column name differs from the 193 | default \code{field} of the filter. 194 | 195 | Filter expressions for the \code{AnnotationFilter} class have to be 196 | written as formulas, i.e. starting with a \code{~}. 197 | } 198 | \note{ 199 | Translation of nested filter expressions using the 200 | \code{AnnotationFilter} function is not yet supported. 201 | } 202 | \examples{ 203 | ## filter by GRanges 204 | GRangesFilter(GenomicRanges::GRanges("chr10:87869000-87876000")) 205 | ## Create a SymbolFilter to filter on a gene's symbol. 206 | sf <- SymbolFilter("BCL2") 207 | sf 208 | 209 | ## Create a GeneStartFilter to filter based on the genes' chromosomal start 210 | ## coordinates 211 | gsf <- GeneStartFilter(10000, condition = ">") 212 | gsf 213 | 214 | supportedFilters() 215 | 216 | ## Convert a filter expression based on a gene ID to a GeneIdFilter 217 | gnf <- AnnotationFilter(~ gene_id == "BCL2") 218 | gnf 219 | 220 | ## Same conversion but for two gene IDs. 221 | gnf <- AnnotationFilter(~ gene_id \%in\% c("BCL2", "BCL2L11")) 222 | gnf 223 | 224 | ## Converting an expression that combines multiple filters. As a result we 225 | ## get an AnnotationFilterList containing the corresponding filters. 226 | ## Be aware that nesting of expressions/filters does not work. 227 | flt <- AnnotationFilter(~ gene_id \%in\% c("BCL2", "BCL2L11") & 228 | tx_biotype == "nonsense_mediated_decay" | 229 | seq_name == "Y") 230 | flt 231 | 232 | } 233 | \seealso{ 234 | \code{\link{AnnotationFilterList}} for combining 235 | \code{AnnotationFilter} objects. 236 | } 237 | -------------------------------------------------------------------------------- /man/AnnotationFilter.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AnnotationFilter.R, R/translate-utils.R 3 | \docType{methods} 4 | \name{AnnotationFilter} 5 | \alias{AnnotationFilter} 6 | \alias{CdsStartFilter} 7 | \alias{CdsEndFilter} 8 | \alias{ExonIdFilter} 9 | \alias{ExonNameFilter} 10 | \alias{ExonStartFilter} 11 | \alias{ExonEndFilter} 12 | \alias{ExonRankFilter} 13 | \alias{GeneIdFilter} 14 | \alias{GeneNameFilter} 15 | \alias{GeneBiotypeFilter} 16 | \alias{GeneStartFilter} 17 | \alias{GeneEndFilter} 18 | \alias{EntrezFilter} 19 | \alias{SymbolFilter} 20 | \alias{TxIdFilter} 21 | \alias{TxNameFilter} 22 | \alias{TxBiotypeFilter} 23 | \alias{TxStartFilter} 24 | \alias{TxEndFilter} 25 | \alias{ProteinIdFilter} 26 | \alias{UniprotFilter} 27 | \alias{SeqNameFilter} 28 | \alias{SeqStrandFilter} 29 | \alias{AnnotationFilter-class} 30 | \alias{CharacterFilter-class} 31 | \alias{DoubleFilter-class} 32 | \alias{IntegerFilter-class} 33 | \alias{CdsStartFilter-class} 34 | \alias{CdsEndFilter-class} 35 | \alias{ExonIdFilter-class} 36 | \alias{ExonNameFilter-class} 37 | \alias{ExonStartFilter-class} 38 | \alias{ExonEndFilter-class} 39 | \alias{ExonRankFilter-class} 40 | \alias{GeneIdFilter-class} 41 | \alias{GeneNameFilter-class} 42 | \alias{GeneBiotypeFilter-class} 43 | \alias{GeneStartFilter-class} 44 | \alias{GeneEndFilter-class} 45 | \alias{EntrezFilter-class} 46 | \alias{SymbolFilter-class} 47 | \alias{TxIdFilter-class} 48 | \alias{TxNameFilter-class} 49 | \alias{TxBiotypeFilter-class} 50 | \alias{TxStartFilter-class} 51 | \alias{TxEndFilter-class} 52 | \alias{ProteinIdFilter-class} 53 | \alias{UniprotFilter-class} 54 | \alias{SeqNameFilter-class} 55 | \alias{SeqStrandFilter-class} 56 | \alias{supportedFilters} 57 | \alias{show,AnnotationFilter-method} 58 | \alias{show,CharacterFilter-method} 59 | \alias{show,IntegerFilter-method} 60 | \alias{show,GRangesFilter-method} 61 | \alias{show,DoubleFilter-method} 62 | \alias{condition,AnnotationFilter-method} 63 | \alias{condition} 64 | \alias{value,AnnotationFilter-method} 65 | \alias{value} 66 | \alias{field,AnnotationFilter-method} 67 | \alias{field} 68 | \alias{not,AnnotationFilter-method} 69 | \alias{GRangesFilter-class} 70 | \alias{.GRangesFilter} 71 | \alias{GRangesFilter} 72 | \alias{feature} 73 | \alias{AnnotationFilter} 74 | \alias{convertFilter,AnnotationFilter,missing-method} 75 | \alias{supportedFilters,missing-method} 76 | \alias{AnnotationFilter} 77 | \title{Filters for annotation objects} 78 | \usage{ 79 | CdsStartFilter(value, condition = "==", not = FALSE) 80 | CdsEndFilter(value, condition = "==", not = FALSE) 81 | ExonIdFilter(value, condition = "==", not = FALSE) 82 | ExonNameFilter(value, condition = "==", not = FALSE) 83 | ExonRankFilter(value, condition = "==", not = FALSE) 84 | ExonStartFilter(value, condition = "==", not = FALSE) 85 | ExonEndFilter(value, condition = "==", not = FALSE) 86 | GeneIdFilter(value, condition = "==", not = FALSE) 87 | GeneNameFilter(value, condition = "==", not = FALSE) 88 | GeneBiotypeFilter(value, condition = "==", not = FALSE) 89 | GeneStartFilter(value, condition = "==", not = FALSE) 90 | GeneEndFilter(value, condition = "==", not = FALSE) 91 | EntrezFilter(value, condition = "==", not = FALSE) 92 | SymbolFilter(value, condition = "==", not = FALSE) 93 | TxIdFilter(value, condition = "==", not = FALSE) 94 | TxNameFilter(value, condition = "==", not = FALSE) 95 | TxBiotypeFilter(value, condition = "==", not = FALSE) 96 | TxStartFilter(value, condition = "==", not = FALSE) 97 | TxEndFilter(value, condition = "==", not = FALSE) 98 | ProteinIdFilter(value, condition = "==", not = FALSE) 99 | UniprotFilter(value, condition = "==", not = FALSE) 100 | SeqNameFilter(value, condition = "==", not = FALSE) 101 | SeqStrandFilter(value, condition = "==", not = FALSE) 102 | 103 | \S4method{condition}{AnnotationFilter}(object) 104 | 105 | \S4method{value}{AnnotationFilter}(object) 106 | 107 | \S4method{field}{AnnotationFilter}(object) 108 | 109 | \S4method{not}{AnnotationFilter}(object) 110 | 111 | GRangesFilter(value, feature = "gene", type = c("any", "start", "end", 112 | "within", "equal")) 113 | 114 | feature(object) 115 | 116 | \S4method{convertFilter}{AnnotationFilter,missing}(object) 117 | 118 | \S4method{supportedFilters}{missing}(object) 119 | 120 | AnnotationFilter(expr) 121 | } 122 | \arguments{ 123 | \item{object}{An \code{AnnotationFilter} object.} 124 | 125 | \item{value}{\code{character()}, \code{integer()}, or 126 | \code{GRanges()} value for the filter} 127 | 128 | \item{feature}{\code{character(1)} defining on what feature the 129 | \code{GRangesFilter} should be applied. Choices could be 130 | \code{"gene"}, \code{"tx"} or \code{"exon"}.} 131 | 132 | \item{type}{\code{character(1)} indicating how overlaps are to be 133 | filtered. See \code{findOverlaps} in the IRanges package for a 134 | description of this argument.} 135 | 136 | \item{expr}{A filter expression, written as a \code{formula}, to be 137 | converted to an \code{AnnotationFilter} or \code{AnnotationFilterList} 138 | class. See below for examples.} 139 | 140 | \item{condition}{\code{character(1)} defining the condition to be 141 | used in the filter. For \code{IntegerFilter} or \code{DoubleFilter}, 142 | one of \code{"=="}, \code{"!="}, \code{">"}, \code{"<"}, \code{">="} 143 | or \code{"<="}. For \code{CharacterFilter}, one of \code{"=="}, 144 | \code{"!="}, \code{"startsWith"}, \code{"endsWith"} or \code{"contains"}. 145 | Default condition is \code{"=="}.} 146 | 147 | \item{not}{\code{logical(1)} whether the \code{AnnotationFilter} is negated. 148 | \code{TRUE} indicates is negated (!). \code{FALSE} indicates not 149 | negated. Default not is \code{FALSE}.} 150 | } 151 | \value{ 152 | The constructor function return an object extending 153 | \code{AnnotationFilter}. For the return value of the other methods see 154 | the methods' descriptions. 155 | 156 | \code{character(1)} that can be used as input to a \code{dplyr} 157 | filter. 158 | 159 | \code{AnnotationFilter} returns an 160 | \code{\link{AnnotationFilter}} or an \code{\link{AnnotationFilterList}}. 161 | } 162 | \description{ 163 | The filters extending the base \code{AnnotationFilter} class 164 | represent a simple filtering concept for annotation resources. 165 | Each filter object is thought to filter on a single (database) 166 | table column using the provided values and the defined condition. 167 | 168 | Filter instances created using the constructor functions (e.g. 169 | \code{GeneIdFilter}). 170 | 171 | \code{supportedFilters()} lists all defined filters. It returns a two column 172 | \code{data.frame} with the filter class name and its default field. 173 | Packages using \code{AnnotationFilter} should implement the 174 | \code{supportedFilters} for their annotation resource object (e.g. for 175 | \code{object = "EnsDb"} in the \code{ensembldb} package) to list all 176 | supported filters for the specific resource. 177 | 178 | \code{condition()} get the \code{condition} value for 179 | the filter \code{object}. 180 | 181 | \code{value()} get the \code{value} for the filter 182 | \code{object}. 183 | 184 | \code{field()} get the \code{field} for the filter 185 | \code{object}. 186 | 187 | \code{not()} get the \code{not} for the filter \code{object}. 188 | 189 | \code{feature()} get the \code{feature} for the 190 | \code{GRangesFilter} \code{object}. 191 | 192 | Converts an \code{AnnotationFilter} object to a 193 | \code{character(1)} giving an equation that can be used as input to 194 | a \code{dplyr} filter. 195 | 196 | \code{AnnotationFilter} \emph{translates} a filter 197 | expression such as \code{~ gene_id == "BCL2"} into a filter object 198 | extending the \code{\link{AnnotationFilter}} class (in the example a 199 | \code{\link{GeneIdFilter}} object) or an 200 | \code{\link{AnnotationFilterList}} if the expression contains multiple 201 | conditions (see examples below). Filter expressions have to be written 202 | in the form \code{~ }, with \code{} 203 | being the default field of the filter class (use the 204 | \code{supportedFilter} function to list all fields and filter classes), 205 | \code{} the logical expression and \code{} the value 206 | for the filter. 207 | } 208 | \details{ 209 | By default filters are only available for tables containing the 210 | field on which the filter acts (i.e. that contain a column with the 211 | name matching the value of the \code{field} slot of the 212 | object). See the vignette for a description to use filters for 213 | databases in which the database table column name differs from the 214 | default \code{field} of the filter. 215 | 216 | Filter expressions for the \code{AnnotationFilter} class have to be 217 | written as formulas, i.e. starting with a \code{~}. 218 | } 219 | \note{ 220 | Translation of nested filter expressions using the 221 | \code{AnnotationFilter} function is not yet supported. 222 | } 223 | \examples{ 224 | ## filter by GRanges 225 | GRangesFilter(GenomicRanges::GRanges("chr10:87869000-87876000")) 226 | ## Create a SymbolFilter to filter on a gene's symbol. 227 | sf <- SymbolFilter("BCL2") 228 | sf 229 | 230 | ## Create a GeneStartFilter to filter based on the genes' chromosomal start 231 | ## coordinates 232 | gsf <- GeneStartFilter(10000, condition = ">") 233 | gsf 234 | 235 | filter <- SymbolFilter("ADA", "==") 236 | result <- convertFilter(filter) 237 | result 238 | supportedFilters() 239 | 240 | ## Convert a filter expression based on a gene ID to a GeneIdFilter 241 | gnf <- AnnotationFilter(~ gene_id == "BCL2") 242 | gnf 243 | 244 | ## Same conversion but for two gene IDs. 245 | gnf <- AnnotationFilter(~ gene_id \%in\% c("BCL2", "BCL2L11")) 246 | gnf 247 | 248 | ## Converting an expression that combines multiple filters. As a result we 249 | ## get an AnnotationFilterList containing the corresponding filters. 250 | ## Be aware that nesting of expressions/filters does not work. 251 | flt <- AnnotationFilter(~ gene_id \%in\% c("BCL2", "BCL2L11") & 252 | tx_biotype == "nonsense_mediated_decay" | 253 | seq_name == "Y") 254 | flt 255 | 256 | } 257 | \seealso{ 258 | \code{\link{AnnotationFilterList}} for combining 259 | \code{AnnotationFilter} objects. 260 | } 261 | -------------------------------------------------------------------------------- /R/AnnotationFilterList.R: -------------------------------------------------------------------------------- 1 | #' @include AnnotationFilter.R 2 | 3 | #' @rdname AnnotationFilterList 4 | #' 5 | #' @name AnnotationFilterList 6 | #' 7 | #' @title Combining annotation filters 8 | #' 9 | #' @aliases AnnotationFilterList-class 10 | #' 11 | #' @description The \code{AnnotationFilterList} allows to combine 12 | #' filter objects extending the \code{\link{AnnotationFilter}} 13 | #' class to construct more complex queries. Consecutive filter 14 | #' objects in the \code{AnnotationFilterList} can be combined by a 15 | #' logical \emph{and} (\code{&}) or \emph{or} (\code{|}). The 16 | #' \code{AnnotationFilterList} extends \code{list}, individual 17 | #' elements can thus be accessed with \code{[[}. 18 | #' 19 | #' @note The \code{AnnotationFilterList} does not support containing empty 20 | #' elements, hence all elements of \code{length == 0} are removed in 21 | #' the constructor function. 22 | #' 23 | #' @exportClass AnnotationFilterList 24 | NULL 25 | 26 | .AnnotationFilterList <- setClass( 27 | "AnnotationFilterList", 28 | contains = "list", 29 | slots = c(logOp = "character", 30 | not = "logical", 31 | .groupingFlag = "logical") 32 | ) 33 | 34 | .LOG_OPS <- c("&", "|") 35 | 36 | setValidity("AnnotationFilterList", 37 | function(object) 38 | { 39 | txt <- character() 40 | filters <- .aflvalue(object) 41 | logOp <- .logOp(object) 42 | not <- .not(object) 43 | if (length(filters) == 0 && length(logOp)) { 44 | txt <- c( 45 | txt, "'logicOp' can not have length > 0 if the object is empty" 46 | ) 47 | } else if (length(filters) != 0) { 48 | ## Note: we allow length of filters being 1, but then logOp has 49 | ## to be empty. Check content: 50 | fun <- function(z) 51 | is(z, "AnnotationFilter") || is(z, "AnnotationFilterList") 52 | test <- vapply(filters, fun, logical(1)) 53 | if (!all(test)){ 54 | txt <- c( 55 | txt, "only 'AnnotationFilter' or 'AnnotationFilterList' allowed" 56 | ) 57 | } 58 | # Check that all elements are non-empty (issue #17). Doing this 59 | ## separately from the check above to ensure we get a different error 60 | ## message. 61 | if (!all(lengths(filters) > 0)) 62 | txt <- c(txt, "Lengths of all elements have to be > 0") 63 | ## Check that logOp has length object -1 64 | if (length(logOp) != length(filters) - 1) 65 | txt <- c(txt, "length of 'logicOp' has to be length of the object -1") 66 | ## Check content of logOp. 67 | if (!all(logOp %in% .LOG_OPS)) 68 | txt <- c(txt, "'logicOp' can only contain '&' and '|'") 69 | } 70 | 71 | if (length(txt)) txt else TRUE 72 | }) 73 | 74 | ## AnnotationFilterList constructor function. 75 | #' @rdname AnnotationFilterList 76 | #' 77 | #' @name AnnotationFilterList 78 | #' 79 | #' @param ... individual \code{\link{AnnotationFilter}} objects or a 80 | #' mixture of \code{AnnotationFilter} and 81 | #' \code{AnnotationFilterList} objects. 82 | #' 83 | #' @param logicOp \code{character} of length equal to the number 84 | #' of submitted \code{AnnotationFilter} objects - 1. Each value 85 | #' representing the logical operation to combine consecutive 86 | #' filters, i.e. the first element being the logical operation to 87 | #' combine the first and second \code{AnnotationFilter}, the 88 | #' second element being the logical operation to combine the 89 | #' second and third \code{AnnotationFilter} and so on. Allowed 90 | #' values are \code{"&"} and \code{"|"}. The function assumes a 91 | #' logical \emph{and} between all elements by default. 92 | #' 93 | #' @param logOp Deprecated; use \code{logicOp=}. 94 | #' 95 | #' @param .groupingFlag Flag desginated for internal use only. 96 | #' 97 | #' @param not \code{logical} of length one. Indicates whether the grouping 98 | #' of \code{AnnotationFilters} are to be negated. 99 | #' 100 | #' @seealso \code{\link{supportedFilters}} for available 101 | #' \code{\link{AnnotationFilter}} objects 102 | #' 103 | #' @return \code{AnnotationFilterList} returns an \code{AnnotationFilterList}. 104 | #' 105 | #' @examples 106 | #' ## Create some AnnotationFilters 107 | #' gf <- GeneNameFilter(c("BCL2", "BCL2L11")) 108 | #' tbtf <- TxBiotypeFilter("protein_coding", condition = "!=") 109 | #' 110 | #' ## Combine both to an AnnotationFilterList. By default elements are combined 111 | #' ## using a logical "and" operator. The filter list represents thus a query 112 | #' ## like: get all features where the gene name is either ("BCL2" or "BCL2L11") 113 | #' ## and the transcript biotype is not "protein_coding". 114 | #' afl <- AnnotationFilterList(gf, tbtf) 115 | #' afl 116 | #' 117 | #' ## Access individual filters. 118 | #' afl[[1]] 119 | #' 120 | #' ## Create a filter in the form of: get all features where the gene name is 121 | #' ## either ("BCL2" or "BCL2L11") and the transcript biotype is not 122 | #' ## "protein_coding" or the seq_name is "Y". Hence, this will get all feature 123 | #' ## also found by the previous AnnotationFilterList and returns also all 124 | #' ## features on chromosome Y. 125 | #' afl <- AnnotationFilterList(gf, tbtf, SeqNameFilter("Y"), 126 | #' logicOp = c("&", "|")) 127 | #' afl 128 | #' 129 | #' @export 130 | AnnotationFilterList <- 131 | function(..., logicOp = character(), logOp = character(), not = FALSE, 132 | .groupingFlag=FALSE) 133 | { 134 | if (!missing(logOp) && missing(logicOp)) { 135 | logicOp <- logOp 136 | .Deprecated(msg = "'logOp' deprecated, use 'logicOp'") 137 | } 138 | filters <- list(...) 139 | 140 | ## Remove empty nested lists and AnnotationFilterLists 141 | removal <- lengths(filters) != 0 142 | filters <- filters[removal] 143 | 144 | if (length(filters) > 1 & length(logicOp) == 0) 145 | ## By default we're assuming & between elements. 146 | logicOp <- rep("&", (length(filters) - 1)) 147 | .AnnotationFilterList(filters, logOp = logicOp, not = not, 148 | .groupingFlag=.groupingFlag) 149 | } 150 | 151 | .logOp <- function(object) object@logOp 152 | 153 | .aflvalue <- function(object) object@.Data 154 | 155 | .not <- function(object) object@not 156 | 157 | #' @rdname AnnotationFilterList 158 | #' 159 | #' @description \code{value()} get a \code{list} with the 160 | #' \code{AnnotationFilter} objects. Use \code{[[} to access 161 | #' individual filters. 162 | #' 163 | #' @return \code{value()} returns a \code{list} with \code{AnnotationFilter} 164 | #' objects. 165 | #' 166 | #' @export 167 | setMethod("value", "AnnotationFilterList", .aflvalue) 168 | 169 | #' @rdname AnnotationFilterList 170 | #' 171 | #' @aliases logicOp 172 | #' 173 | #' @description \code{logicOp()} gets the logical operators separating 174 | #' successive \code{AnnotationFilter}. 175 | #' 176 | #' @return \code{logicOp()} returns a \code{character()} vector of 177 | #' \dQuote{&} or \dQuote{|} symbols. 178 | #' 179 | #' @export logicOp 180 | setMethod("logicOp", "AnnotationFilterList", .logOp) 181 | 182 | #' @rdname AnnotationFilterList 183 | #' 184 | #' @aliases not 185 | #' 186 | #' @description \code{not()} gets the logical operators separating 187 | #' successive \code{AnnotationFilter}. 188 | #' 189 | #' @return \code{not()} returns a \code{character()} vector of 190 | #' \dQuote{&} or \dQuote{|} symbols. 191 | #' 192 | #' @export not 193 | setMethod("not", "AnnotationFilterList", .not) 194 | 195 | .distributeNegation <- function(object, .prior_negation=FALSE) 196 | { 197 | if(.not(object)) 198 | .prior_negation <- ifelse(.prior_negation, FALSE, TRUE) 199 | filters <- lapply(object, function(x){ 200 | if(is(x, "AnnotationFilterList")) 201 | distributeNegation(x, .prior_negation) 202 | else{ 203 | if(.prior_negation) x@not <- ifelse(x@not, FALSE, TRUE) 204 | x 205 | } 206 | }) 207 | ops <- vapply(logicOp(object), function(x) { 208 | if(.prior_negation){ 209 | if(x == '&') 210 | '|' 211 | else 212 | '&' 213 | } 214 | else 215 | x 216 | } 217 | ,character(1) 218 | ) 219 | ops <- unname(ops) 220 | filters[['logicOp']] <- ops 221 | do.call("AnnotationFilterList", filters) 222 | } 223 | 224 | #' @rdname AnnotationFilterList 225 | #' 226 | #' @aliases distributeNegation 227 | #' 228 | #' @description 229 | #' 230 | #' @param .prior_negation \code{logical(1)} unused argument. 231 | #' 232 | #' @return \code{AnnotationFilterList} object with DeMorgan's law applied to 233 | #' it such that it is equal to the original \code{AnnotationFilterList} 234 | #' object but all \code{!}'s are distributed out of the 235 | #' \code{AnnotationFilterList} object and to the nested 236 | #' \code{AnnotationFilter} objects. 237 | #' 238 | #' @examples 239 | #' afl <- AnnotationFilter(~!(symbol == 'ADA' | symbol %startsWith% 'SNORD')) 240 | #' afl <- distributeNegation(afl) 241 | #' afl 242 | #' @export 243 | setMethod("distributeNegation", "AnnotationFilterList", .distributeNegation) 244 | 245 | #' @importFrom utils head 246 | #' 247 | #' @noRd 248 | .convertFilterList <- function(object, show, granges=list(), nested=FALSE) 249 | { 250 | filters <- value(object) 251 | result <- character(length(filters)) 252 | for (i in seq_len(length(filters))) { 253 | if (is(filters[[i]], "AnnotationFilterList")) { 254 | res <- .convertFilterList(filters[[i]], show=show, granges=granges, 255 | nested=TRUE) 256 | granges <- c(granges, res[[2]]) 257 | result[i] <- res[[1]] 258 | } 259 | else if (field(filters[[i]]) == "granges") { 260 | if(!show) 261 | result[i] <- .convertFilter(filters[[i]]) 262 | else { 263 | nam <- paste0("GRangesFilter_", length(granges) + 1) 264 | granges <- c(granges, list(filters[[i]])) 265 | result[i] <- nam 266 | } 267 | } 268 | else 269 | result[i] <- .convertFilter(filters[[i]]) 270 | } 271 | 272 | result_last <- tail(result, 1) 273 | result <- head(result, -1) 274 | result <- c(rbind(result, logicOp(object))) 275 | result <- c(result, result_last) 276 | result <- paste(result, collapse=" ") 277 | if(nested || object@not) 278 | result <- paste0("(", result, ")") 279 | if(object@not) 280 | result <- paste0("!", result) 281 | 282 | list(result, granges) 283 | } 284 | 285 | #' @rdname AnnotationFilterList 286 | #' 287 | #' @aliases convertFilter 288 | #' 289 | #' @description Converts an \code{AnnotationFilterList} object to a 290 | #' \code{character(1)} giving an equation that can be used as input to 291 | #' a \code{dplyr} filter. 292 | #' 293 | #' @return \code{character(1)} that can be used as input to a \code{dplyr} 294 | #' filter. 295 | #' 296 | #' @examples 297 | #' afl <- AnnotationFilter(~symbol=="ADA" & tx_start > "400000") 298 | #' result <- convertFilter(afl) 299 | #' result 300 | #' @export 301 | setMethod("convertFilter", signature(object = "AnnotationFilterList", 302 | db = "missing") , function(object) 303 | { 304 | result <- .convertFilterList(object, show=FALSE) 305 | result[[1]] 306 | }) 307 | 308 | #' @rdname AnnotationFilterList 309 | #' 310 | #' @param object An object of class \code{AnnotationFilterList}. 311 | #' 312 | #' @importFrom utils tail 313 | #' @export 314 | setMethod("show", "AnnotationFilterList", function(object) 315 | { 316 | result <- .convertFilterList(object, show=TRUE) 317 | granges <- result[[2]] 318 | result <- result[[1]] 319 | cat("AnnotationFilterList of length", length(object), "\n") 320 | cat(result) 321 | cat("\n") 322 | for(i in seq_len(length(granges))) { 323 | cat("\n") 324 | cat("Symbol: GRangesFilter_", i, "\n", sep="") 325 | show(granges[[1]]) 326 | cat("\n") 327 | } 328 | }) 329 | -------------------------------------------------------------------------------- /.svn/pristine/21/2189e6a627b4c711e766c184f50bb9cdc230e821.svn-base: -------------------------------------------------------------------------------- 1 | #' @name AnnotationFilter 2 | #' 3 | #' @title Filters for annotation objects 4 | #' 5 | #' @aliases CdsStartFilter CdsEndFilter ExonIdFilter ExonNameFilter 6 | #' ExonStartFilter ExonEndFilter ExonRankFilter GeneIdFilter 7 | #' GenenameFilter GeneBiotypeFilter GeneStartFilter GeneEndFilter 8 | #' EntrezFilter SymbolFilter TxIdFilter TxNameFilter 9 | #' TxBiotypeFilter TxStartFilter TxEndFilter ProteinIdFilter 10 | #' UniprotFilter SeqNameFilter SeqStrandFilter 11 | #' AnnotationFilter-class CharacterFilter-class 12 | #' IntegerFilter-class CdsStartFilter-class CdsEndFilter-class 13 | #' ExonIdFilter-class ExonNameFilter-class ExonStartFilter-class 14 | #' ExonEndFilter-class ExonRankFilter-class GeneIdFilter-class 15 | #' GenenameFilter-class GeneBiotypeFilter-class 16 | #' GeneStartFilter-class GeneEndFilter-class EntrezFilter-class 17 | #' SymbolFilter-class TxIdFilter-class TxNameFilter-class 18 | #' TxBiotypeFilter-class TxStartFilter-class TxEndFilter-class 19 | #' ProteinIdFilter-class UniprotFilter-class SeqNameFilter-class 20 | #' SeqStrandFilter-class supportedFilters 21 | #' show,AnnotationFilter-method show,CharacterFilter-method 22 | #' show,IntegerFilter-method show,GRangesFilter-method 23 | #' 24 | #' @description 25 | #' 26 | #' The filters extending the base \code{AnnotationFilter} class 27 | #' represent a simple filtering concept for annotation resources. 28 | #' Each filter object is thought to filter on a single (database) 29 | #' table column using the provided values and the defined condition. 30 | #' 31 | #' Filter instances created using the constructor functions (e.g. 32 | #' \code{GeneIdFilter}). 33 | #' 34 | #' \code{supportedFilters()} lists all defined filters. It returns a two column 35 | #' \code{data.frame} with the filter class name and its default field. 36 | #' Packages using \code{AnnotationFilter} should implement the 37 | #' \code{supportedFilters} for their annotation resource object (e.g. for 38 | #' \code{object = "EnsDb"} in the \code{ensembldb} package) to list all 39 | #' supported filters for the specific resource. 40 | #' 41 | #' @details 42 | #' 43 | #' By default filters are only available for tables containing the 44 | #' field on which the filter acts (i.e. that contain a column with the 45 | #' name matching the value of the \code{field} slot of the 46 | #' object). See the vignette for a description to use filters for 47 | #' databases in which the database table column name differs from the 48 | #' default \code{field} of the filter. 49 | #' 50 | #' @usage 51 | #' 52 | #' CdsStartFilter(value, condition = "==") 53 | #' CdsEndFilter(value, condition = "==") 54 | #' ExonIdFilter(value, condition = "==") 55 | #' ExonNameFilter(value, condition = "==") 56 | #' ExonRankFilter(value, condition = "==") 57 | #' ExonStartFilter(value, condition = "==") 58 | #' ExonEndFilter(value, condition = "==") 59 | #' GeneIdFilter(value, condition = "==") 60 | #' GenenameFilter(value, condition = "==") 61 | #' GeneBiotypeFilter(value, condition = "==") 62 | #' GeneStartFilter(value, condition = "==") 63 | #' GeneEndFilter(value, condition = "==") 64 | #' EntrezFilter(value, condition = "==") 65 | #' SymbolFilter(value, condition = "==") 66 | #' TxIdFilter(value, condition = "==") 67 | #' TxNameFilter(value, condition = "==") 68 | #' TxBiotypeFilter(value, condition = "==") 69 | #' TxStartFilter(value, condition = "==") 70 | #' TxEndFilter(value, condition = "==") 71 | #' ProteinIdFilter(value, condition = "==") 72 | #' UniprotFilter(value, condition = "==") 73 | #' SeqNameFilter(value, condition = "==") 74 | #' SeqStrandFilter(value, condition = "==") 75 | #' 76 | #' @param value \code{character()}, \code{integer()}, or 77 | #' \code{GRanges()} value for the filter 78 | #' 79 | #' @param condition \code{character(1)} defining the condition to be 80 | #' used in the filter. For \code{IntegerFilter}, one of 81 | #' \code{"=="}, \code{"!="}, \code{">"}, \code{"<"}, \code{">="} 82 | #' or \code{"<="}. For \code{CharacterFilter}, one of \code{"=="}, 83 | #' \code{"!="}, \code{"startsWith"}, \code{"endsWith"} or \code{"contains"}. 84 | #' Default condition is \code{"=="}. 85 | #' 86 | #' @return The constructor function return an object extending 87 | #' \code{AnnotationFilter}. For the return value of the other methods see 88 | #' the methods' descriptions. 89 | #' 90 | #' @seealso \code{\link{AnnotationFilterList}} for combining 91 | #' \code{AnnotationFilter} objects. 92 | NULL 93 | 94 | .CONDITION <- list( 95 | IntegerFilter = c("==", "!=", ">", "<", ">=", "<="), 96 | CharacterFilter = c("==", "!=", "startsWith", "endsWith", "contains"), 97 | GRangesFilter = c("any", "start", "end", "within", "equal") 98 | ) 99 | 100 | .FIELD <- list( 101 | CharacterFilter = c( 102 | "exon_id", "exon_name", "gene_id", "genename", "gene_biotype", 103 | "entrez", "symbol", "tx_id", "tx_name", "tx_biotype", 104 | "protein_id", "uniprot", "seq_name", "seq_strand"), 105 | IntegerFilter = c( 106 | "cds_start", "cds_end", "exon_start", "exon_rank", "exon_end", 107 | "gene_start", "gene_end", "tx_start", "tx_end") 108 | ) 109 | 110 | .valid_condition <- function(condition, class) { 111 | txt <- character() 112 | 113 | test0 <- length(condition) == 1L 114 | if (!test0) 115 | txt <- c(txt, "'condition' must be length 1") 116 | 117 | test1 <- test0 && (condition %in% .CONDITION[[class]]) 118 | if (!test1) { 119 | value <- paste(sQuote(.CONDITION[[class]]), collapse=" ") 120 | txt <- c(txt, paste0("'", condition, "' must be in ", value)) 121 | } 122 | 123 | if (length(txt)) txt else TRUE 124 | } 125 | 126 | ############################################################ 127 | ## AnnotationFilter 128 | ## 129 | 130 | #' @exportClass AnnotationFilter 131 | .AnnotationFilter <- setClass( 132 | "AnnotationFilter", 133 | contains = "VIRTUAL", 134 | slots = c( 135 | field="character", 136 | condition="character", 137 | value="ANY" 138 | ), 139 | prototype=list( 140 | condition= "==" 141 | ) 142 | ) 143 | 144 | setValidity("AnnotationFilter", function(object) { 145 | txt <- character() 146 | 147 | value <- .value(object) 148 | condition <- .condition(object) 149 | test_len <- length(condition) == 1L 150 | test_NA <- !any(is.na(condition)) 151 | 152 | if (test_len && !test_NA) 153 | txt <- c(txt, "'condition' can not be NA") 154 | test0 <- test_len && test_NA 155 | 156 | test1 <- condition %in% c("startsWith", "endsWith", "contains", ">", 157 | "<", ">=", "<=") 158 | if (test0 && test1 && length(value) > 1L) 159 | txt <- c(txt, paste0("'", condition, "' requires length 1 'value'")) 160 | 161 | if (any(is.na(value))) 162 | txt <- c(txt, "'value' can not be NA") 163 | 164 | if (length(txt)) txt else TRUE 165 | }) 166 | 167 | .field <- function(object) object@field 168 | 169 | .condition <- function(object) object@condition 170 | 171 | .value <- function(object) object@value 172 | 173 | #' @rdname AnnotationFilter 174 | #' 175 | #' @aliases condition 176 | #' 177 | #' @description \code{condition()} get the \code{condition} value for 178 | #' the filter \code{object}. 179 | #' 180 | #' @param object An \code{AnnotationFilter} object. 181 | #' 182 | #' @export 183 | setMethod("condition", "AnnotationFilter", .condition) 184 | 185 | #' @rdname AnnotationFilter 186 | #' 187 | #' @aliases value 188 | #' 189 | #' @description \code{value()} get the \code{value} for the filter 190 | #' \code{object}. 191 | #' 192 | #' @export 193 | setMethod("value", "AnnotationFilter", .value) 194 | 195 | #' @rdname AnnotationFilter 196 | #' 197 | #' @aliases field 198 | #' 199 | #' @description \code{field()} get the \code{field} for the filter 200 | #' \code{object}. 201 | #' 202 | #' @export 203 | setMethod("field", "AnnotationFilter", .field) 204 | 205 | #' @importFrom methods show 206 | #' 207 | #' @export 208 | setMethod("show", "AnnotationFilter", function(object){ 209 | cat("class:", class(object), 210 | "\ncondition:", .condition(object), "\n") 211 | }) 212 | 213 | ############################################################ 214 | ## CharacterFilter, IntegerFilter 215 | ## 216 | 217 | #' @exportClass CharacterFilter 218 | .CharacterFilter <- setClass( 219 | "CharacterFilter", 220 | contains = c("VIRTUAL", "AnnotationFilter"), 221 | slots = c(value = "character"), 222 | prototype = list( 223 | value = character() 224 | ) 225 | ) 226 | 227 | setValidity("CharacterFilter", function(object) { 228 | .valid_condition(.condition(object), "CharacterFilter") 229 | }) 230 | 231 | #' @importFrom methods show callNextMethod 232 | #' 233 | #' @export 234 | setMethod("show", "CharacterFilter", function(object) { 235 | callNextMethod() 236 | cat("value:", .value(object), "\n") 237 | }) 238 | 239 | #' @exportClass IntegerFilter 240 | .IntegerFilter <- setClass( 241 | "IntegerFilter", 242 | contains = c("VIRTUAL", "AnnotationFilter"), 243 | slots = c(value = "integer"), 244 | prototype = list( 245 | value = integer() 246 | ) 247 | ) 248 | 249 | setValidity("IntegerFilter", function(object) { 250 | .valid_condition(.condition(object), "IntegerFilter") 251 | }) 252 | 253 | #' @export 254 | setMethod("show", "IntegerFilter", function(object) { 255 | callNextMethod() 256 | cat("value:", .value(object), "\n") 257 | }) 258 | 259 | #' @rdname AnnotationFilter 260 | #' 261 | #' @importFrom GenomicRanges GRanges 262 | #' 263 | #' @importClassesFrom GenomicRanges GRanges 264 | #' 265 | #' @exportClass GRangesFilter 266 | .GRangesFilter <- setClass( 267 | "GRangesFilter", 268 | contains = "AnnotationFilter", 269 | slots = c( 270 | value = "GRanges", 271 | feature = "character" 272 | ), 273 | prototype = list( 274 | value = GRanges(), 275 | condition = "any", 276 | field = "granges", 277 | feature = "gene" 278 | ) 279 | ) 280 | 281 | setValidity("GRangesFilter", function(object) { 282 | .valid_condition(.condition(object), "GRangesFilter") 283 | }) 284 | 285 | .feature <- function(object) object@feature 286 | 287 | #' @rdname AnnotationFilter 288 | #' 289 | #' @param type \code{character(1)} indicating how overlaps are to be 290 | #' filtered. See \code{findOverlaps} in the IRanges package for a 291 | #' description of this argument. 292 | #' 293 | #' @examples 294 | #' ## filter by GRanges 295 | #' GRangesFilter(GenomicRanges::GRanges("chr10:87869000-87876000")) 296 | #' @export 297 | GRangesFilter <- 298 | function(value, feature = "gene", 299 | type = c("any", "start", "end", "within", "equal")) 300 | { 301 | condition <- match.arg(type) 302 | .GRangesFilter( 303 | field = "granges", 304 | value = value, 305 | condition = condition, 306 | feature = feature) 307 | } 308 | 309 | .feature <- function(object) object@feature 310 | 311 | #' @aliases feature 312 | #' 313 | #' @description \code{feature()} get the \code{feature} for the 314 | #' \code{GRangesFilter} \code{object}. 315 | #' 316 | #' @rdname AnnotationFilter 317 | #' 318 | #' @export 319 | feature <- .feature 320 | 321 | #' @importFrom GenomicRanges show 322 | #' 323 | #' @export 324 | setMethod("show", "GRangesFilter", function(object) { 325 | callNextMethod() 326 | cat("feature:", .feature(object), 327 | "\nvalue:\n") 328 | show(value(object)) 329 | }) 330 | 331 | 332 | ############################################################ 333 | ## Create install-time classes 334 | ## 335 | 336 | #' @rdname AnnotationFilter 337 | #' 338 | #' @name AnnotationFilter 339 | #' 340 | #' @param feature \code{character(1)} defining on what feature the 341 | #' \code{GRangesFilter} should be applied. Choices could be 342 | #' \code{"gene"}, \code{"tx"} or \code{"exon"}. 343 | #' 344 | #' @examples 345 | #' ## Create a SymbolFilter to filter on a gene's symbol. 346 | #' sf <- SymbolFilter("BCL2") 347 | #' sf 348 | #' 349 | #' ## Create a GeneStartFilter to filter based on the genes' chromosomal start 350 | #' ## coordinates 351 | #' gsf <- GeneStartFilter(10000, condition = ">") 352 | #' gsf 353 | #' 354 | #' @export CdsStartFilter CdsEndFilter ExonIdFilter ExonNameFilter 355 | #' @export ExonStartFilter ExonEndFilter ExonRankFilter GeneIdFilter 356 | #' @export GenenameFilter GeneBiotypeFilter GeneStartFilter 357 | #' @export GeneEndFilter EntrezFilter SymbolFilter TxIdFilter 358 | #' @export TxNameFilter TxBiotypeFilter TxStartFilter TxEndFilter 359 | #' @export ProteinIdFilter UniprotFilter SeqNameFilter SeqStrandFilter 360 | #' 361 | #' @importFrom methods new 362 | #' 363 | #' @exportClass CdsStartFilter CdsEndFilter ExonIdFilter 364 | #' ExonNameFilter ExonStartFilter ExonEndFilter ExonRankFilter 365 | #' GeneIdFilter GenenameFilter GeneBiotypeFilter GeneStartFilter 366 | #' GeneEndFilter EntrezFilter SymbolFilter TxIdFilter TxNameFilter 367 | #' TxBiotypeFilter TxStartFilter TxEndFilter ProteinIdFilter 368 | #' UniprotFilter SeqNameFilter SeqStrandFilter 369 | NULL 370 | 371 | .fieldToClass <- function(field) { 372 | class <- gsub("_([[:alpha:]])", "\\U\\1", field, perl=TRUE) 373 | class <- sub("^([[:alpha:]])", "\\U\\1", class, perl=TRUE) 374 | paste0(class, if (length(class)) "Filter" else character(0)) 375 | } 376 | 377 | .filterFactory <- function(field, class) { 378 | force(field); force(class) # watch for lazy evaluation 379 | as.value <- 380 | if (field %in% .FIELD[["CharacterFilter"]]) { 381 | as.character 382 | } else { 383 | function(x) { 384 | stopifnot(is.numeric(x)) 385 | as.integer(x) 386 | } 387 | } 388 | 389 | function(value, condition = "==") { 390 | value <- as.value(value) 391 | condition <- as.character(condition) 392 | new(class, field=field, condition = condition, value=value) 393 | } 394 | } 395 | 396 | local({ 397 | makeClass <- function(contains) { 398 | fields <- .FIELD[[contains]] 399 | classes <- .fieldToClass(fields) 400 | for (i in seq_along(fields)) { 401 | setClass(classes[[i]], contains=contains, where=topenv()) 402 | assign( 403 | classes[[i]], 404 | .filterFactory(fields[[i]], classes[[i]]), 405 | envir=topenv() 406 | ) 407 | } 408 | } 409 | for (contains in names(.FIELD)) 410 | makeClass(contains) 411 | }) 412 | 413 | ############################################################ 414 | ## Utilities - supportedFilters 415 | ## 416 | 417 | .FILTERS_WO_FIELD <- c("GRangesFilter") 418 | 419 | .supportedFilters <- function() { 420 | fields <- unlist(.FIELD, use.names=FALSE) 421 | filters <- .fieldToClass(fields) 422 | d <- data.frame( 423 | filter=c(filters, .FILTERS_WO_FIELD), 424 | field=c(fields, rep(NA, length(.FILTERS_WO_FIELD))) 425 | ) 426 | d[order(d$filter),] 427 | } 428 | 429 | #' @rdname AnnotationFilter 430 | #' 431 | #' @examples 432 | #' supportedFilters() 433 | #' @export 434 | setMethod("supportedFilters", "missing", function(object) { 435 | .supportedFilters() 436 | }) 437 | -------------------------------------------------------------------------------- /.svn/pristine/4e/4e9ec76b932b7ba44f5280dec6263ea963e53920.svn-base: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Facilities for Filtering Bioconductor Annotation Resources" 3 | output: 4 | BiocStyle::html_document2: 5 | toc_float: true 6 | vignette: > 7 | %\VignetteIndexEntry{Facilities for Filtering Bioconductor Annotation resources} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | %\VignettePackage{AnnotationFilter} 11 | %\VignetteDepends{org.Hs.eg.db,BiocStyle,RSQLite} 12 | --- 13 | 14 | ```{r style, echo = FALSE, results = 'asis', message=FALSE} 15 | BiocStyle::markdown() 16 | ``` 17 | 18 | **Package**: `r Biocpkg("AnnotationFilter")`
19 | **Authors**: `r packageDescription("AnnotationFilter")[["Author"]] `
20 | **Last modified:** `r file.info("AnnotationFilter.Rmd")$mtime`
21 | **Compiled**: `r date()` 22 | 23 | 24 | # Introduction 25 | 26 | A large variety of annotation resources are available in Bioconductor. Accessing 27 | the full content of these databases or even of single tables is computationally 28 | expensive and in many instances not required, as users may want to extract only 29 | sub-sets of the data e.g. genomic coordinates of a single gene. In that respect, 30 | filtering annotation resources before data extraction has a major impact on 31 | performance and increases the usability of such genome-scale databases. 32 | 33 | The `r Biocpkg("AnnotationFilter")` package was thus developed to provide basic 34 | filter classes to enable a common filtering framework for Bioconductor 35 | annotation resources. `r Biocpkg("AnnotationFilter")` defines filter classes for 36 | some of the most commonly used features in annotation databases, such as 37 | *symbol* or *genename*. Each filter class is supposed to work on a single 38 | database table column and to facilitate filtering on the provided values. Such 39 | filter classes enable the user to build complex queries to retrieve specific 40 | annotations without needing to know column or table names or the layout of the 41 | underlying databases. While initially being developed to be used in the 42 | `r Biocpkg("Organism.dplyr")` and `r Biocpkg("ensembldb")` packages, the filter 43 | classes and the related filtering concept can be easily added to other 44 | annotation packages too. 45 | 46 | 47 | # Filter classes 48 | 49 | All filter classes extend the basic `AnnotationFilter` class and take one or 50 | more *values* and a *condition* to allow filtering on a single database table 51 | column. Based on the type of the input value, filter classes are divided into: 52 | 53 | - `CharacterFilter`: takes a `character` value of length >= 1 and supports 54 | conditions `==`, `!=`, `startsWith` and `endsWith`. An example would be a 55 | `GeneIdFilter` that allows to filter on gene IDs. 56 | 57 | - `IntegerFilter`: takes a single `integer` as input and supports the conditions 58 | `==`, `!=`, `>`, `<`, `>=` and `<=`. An example would be a `GeneStartFilter` 59 | that filters results on the (chromosomal) start coordinates of genes. 60 | 61 | - `GRangesFilter`: is a special filter, as it takes a `GRanges` as `value` and 62 | performs the filtering on a combination of columns (i.e. start and end 63 | coordinate as well as sequence name and strand). To be consistent with the 64 | `findOverlaps` method from the `r Biocpkg("IRanges")` package, the constructor 65 | of the `GRangesFilter` filter takes a `type` argument to define its 66 | condition. Supported values are `"any"` (the default) that retrieves all 67 | entries overlapping the `GRanges`, `"start"` and `"end"` matching all features 68 | with the same start and end coordinate respectively, `"within"` that matches 69 | all features that are *within* the range defined by the `GRanges` and 70 | `"equal"` that returns features that are equal to the `GRanges`. 71 | 72 | The names of the filter classes are intuitive, the first part corresponding to 73 | the database column name with each character following a `_` being capitalized, 74 | followed by the key word `Filter`. The name of a filter for a database table 75 | column `gene_id` is thus called `GeneIdFilter`. The default database column for 76 | a filter is stored in its `field` slot (accessible *via* the `field` method). 77 | 78 | The `supportedFilters` method can be used to get an overview of all available 79 | filter objects defined in `AnnotationFilter`. 80 | 81 | ```{r supportedFilters} 82 | library(AnnotationFilter) 83 | supportedFilters() 84 | ``` 85 | 86 | Note that the `AnnotationFilter` package does provides only the filter classes 87 | but not the functionality to apply the filtering. Such functionality is 88 | annotation resource and database layout dependent and needs thus to be 89 | implemented in the packages providing access to annotation resources. 90 | 91 | 92 | # Usage 93 | 94 | Filters are created *via* their dedicated constructor functions, such as the 95 | `GeneIdFilter` function for the `GeneIdFilter` class. Because of this simple and 96 | cheap creation, filter classes are thought to be *read-only* and thus don't 97 | provide *setter* methods to change their slot values. In addition to the 98 | constructor functions, `AnnotationFilter` provides the functionality to 99 | *translate* query expressions into filter classes (see further below for an 100 | example). 101 | 102 | Below we create a `SymbolFilter` that could be used to filter an annotation 103 | resource to retrieve all entries associated with the specified symbol value(s). 104 | 105 | ```{r symbol-filter} 106 | library(AnnotationFilter) 107 | 108 | smbl <- SymbolFilter("BCL2") 109 | smbl 110 | ``` 111 | 112 | Such a filter is supposed to be used to retrieve all entries associated to 113 | features with a value in a database table column called *symbol* matching the 114 | filter's value `"BCL2"`. 115 | 116 | Using the `"startsWith"` condition we could define a filter to retrieve all 117 | entries for genes with a gene name/symbol starting with the specified value 118 | (e.g. `"BCL2"` and `"BCL2L11"` for the example below. 119 | 120 | ```{r symbol-startsWith} 121 | smbl <- SymbolFilter("BCL2", condition = "startsWith") 122 | smbl 123 | ``` 124 | 125 | In addition to the constructor functions, `AnnotationFilter` provides a 126 | functionality to create filter instances in a more natural and intuitive way by 127 | *translating* filter expressions (written as a *formula*, i.e. starting with a 128 | `~`). 129 | 130 | ```{r convert-expression} 131 | smbl <- AnnotationFilter(~ symbol == "BCL2") 132 | smbl 133 | ``` 134 | 135 | Individual `AnnotationFilter` objects can be combined in an 136 | `AnnotationFilterList`. This class extends `list` and provides an additional 137 | `logicOp()` that defines how its individual filters are supposed to be 138 | combined. The length of `logicOp()` has to be 1 less than the number of filter 139 | objects. Each element in `logicOp()` defines how two consecutive filters should 140 | be combined. Below we create a `AnnotationFilterList` containing two filter 141 | objects to be combined with a logical *AND*. 142 | 143 | ```{r convert-multi-expression} 144 | flt <- AnnotationFilter(~ symbol == "BCL2" & 145 | tx_biotype == "protein_coding") 146 | flt 147 | ``` 148 | 149 | Note that the `AnnotationFilter` function does not (yet) support translation of 150 | nested expressions, such as `(symbol == "BCL2L11" & tx_biotype == 151 | "nonsense_mediated_decay") | (symbol == "BCL2" & tx_biotype == 152 | "protein_coding")`. Such queries can however be build by nesting 153 | `AnnotationFilterList` classes. 154 | 155 | ```{r nested-query} 156 | ## Define the filter query for the first pair of filters. 157 | afl1 <- AnnotationFilterList(SymbolFilter("BCL2L11"), 158 | TxBiotypeFilter("nonsense_mediated_decay")) 159 | ## Define the second filter pair in ( brackets should be combined. 160 | afl2 <- AnnotationFilterList(SymbolFilter("BCL2"), 161 | TxBiotypeFilter("protein_coding")) 162 | ## Now combine both with a logical OR 163 | afl <- AnnotationFilterList(afl1, afl2, logicOp = "|") 164 | 165 | afl 166 | ``` 167 | 168 | This `AnnotationFilterList` would now select all entries for all transcripts of 169 | the gene *BCL2L11* with the biotype *nonsense_mediated_decay* or for all protein 170 | coding transcripts of the gene *BCL2*. 171 | 172 | 173 | # Using `AnnotationFilter` in other packages 174 | 175 | The `AnnotationFilter` package does only provide filter classes, but no 176 | filtering functionality. This has to be implemented in the package using the 177 | filters. In this section we first show in a very simple example how 178 | `AnnotationFilter` classes could be used to filter a `data.frame` and 179 | subsequently explore how a simple filter framework could be implemented for a 180 | SQL based annotation resources. 181 | 182 | Let's first define a simple `data.frame` containing the data we want to 183 | filter. Note that subsetting this `data.frame` using `AnnotationFilter` is 184 | obviously not the best solution, but it should help to understand the basic 185 | concept. 186 | 187 | ```{r define-data.frame} 188 | ## Define a simple gene table 189 | gene <- data.frame(gene_id = 1:10, 190 | symbol = c(letters[1:9], "b"), 191 | seq_name = paste0("chr", c(1, 4, 4, 8, 1, 2, 5, 3, "X", 4)), 192 | stringsAsFactors = FALSE) 193 | gene 194 | ``` 195 | 196 | Next we generate a `SymbolFilter` and inspect what information we can extract 197 | from it. 198 | 199 | ```{r simple-symbol} 200 | smbl <- SymbolFilter("b") 201 | ``` 202 | 203 | We can access the filter *condition* using the `condition` method 204 | 205 | ```{r simple-symbol-condition} 206 | condition(smbl) 207 | ``` 208 | 209 | The value of the filter using the `value` method 210 | 211 | ```{r simple-symbol-value} 212 | value(smbl) 213 | ``` 214 | 215 | And finally the *field* (i.e. column in the data table) using the `field` 216 | method. 217 | 218 | ```{r simple-symbol-field} 219 | field(smbl) 220 | ``` 221 | 222 | With this information we can define a simple function that takes the data table 223 | and the filter as input and returns a `logical` with length equal to the number 224 | of rows of the table, `TRUE` for rows matching the filter. 225 | 226 | ```{r doMatch} 227 | 228 | doMatch <- function(x, filter) { 229 | do.call(condition(filter), list(x[, field(filter)], value(filter))) 230 | } 231 | 232 | ## Apply this function 233 | doMatch(gene, smbl) 234 | 235 | ``` 236 | 237 | Note that this simple function does not support multiple filters and also not 238 | conditions `"startsWith"` or `"endsWith"`. Next we define a second function that 239 | extracts the relevant data from the data resource. 240 | 241 | ```{r doExtract} 242 | 243 | doExtract <- function(x, filter) { 244 | x[doMatch(x, filter), ] 245 | } 246 | 247 | ## Apply it on the data 248 | doExtract(gene, smbl) 249 | ``` 250 | 251 | We could even modify the `doMatch` function to enable filter expressions. 252 | 253 | ```{r doMatch-formula} 254 | 255 | doMatch <- function(x, filter) { 256 | if (is(filter, "formula")) 257 | filter <- AnnotationFilter(filter) 258 | do.call(condition(filter), list(x[, field(filter)], value(filter))) 259 | } 260 | 261 | doExtract(gene, ~ gene_id == '2') 262 | 263 | ``` 264 | 265 | For such simple examples `AnnotationFilter` might be an overkill as the same 266 | could be achieved (much simpler) using standard R operations. A real case 267 | scenario in which `AnnotationFilter` becomes useful are SQL-based annotation 268 | resources. We will thus explore next how SQL resources could be filtered using 269 | `AnnotationFilter`. 270 | 271 | We use the SQLite database from the `r Biocpkg("org.Hs.eg.db")` package that 272 | provides a variety of annotations for all human genes. Using the packages' 273 | connection to the database we inspect first what database tables are available 274 | and then select one for our simple filtering example. 275 | 276 | We use an `EnsDb` SQLite database used by the `r Biocpkg("ensembldb")` package 277 | and implement simple filter functions to extract specific data from one of its 278 | database tables. We thus load below the `EnsDb.Hsapiens.v75` package that 279 | provides access to human gene, transcript, exon and protein annotations. Using 280 | its connection to the database we inspect first what database tables are 281 | available and then what *fields* (i.e. columns) the *gene* table has. 282 | 283 | ```{r orgDb, message = FALSE} 284 | ## Load the required packages 285 | library(org.Hs.eg.db) 286 | library(RSQLite) 287 | ## Get the database connection 288 | dbcon <- org.Hs.eg_dbconn() 289 | 290 | ## What tables do we have? 291 | dbListTables(dbcon) 292 | ``` 293 | 294 | `org.Hs.eg.db` provides many different tables, one for each identifier or 295 | annotation resource. We will use the *gene_info* table and determine which 296 | *fields* (i.e. columns) the table provides. 297 | 298 | ```{r gene_info} 299 | ## What fields are there in the gene_info table? 300 | dbListFields(dbcon, "gene_info") 301 | ``` 302 | 303 | The *gene_info* table provides the official gene symbol and the gene name. The 304 | column *symbol* matches the default `field` value of the `SymbolFilter`. For the 305 | `GenenameFilter` we would have to re-map its default field `"genename"` to the 306 | database column *gene_name*. There are many possibilities to do this, one would 307 | be to implement an own function to extract the field from the `AnnotationFilter` 308 | classes specific to the database. This function eventually renames the extracted 309 | field value to match the corresponding name of the database column name. 310 | 311 | We next implement a simple `doExtractGene` function that retrieves data from the 312 | *gene_info* table and re-uses the `doFilter` function to extract specific 313 | data. The parameter `x` is now the database connection object. 314 | 315 | ```{r doExtractSQL} 316 | 317 | doExtractGene <- function(x, filter) { 318 | gene <- dbGetQuery(x, "select * from gene_info") 319 | doExtract(gene, filter) 320 | } 321 | 322 | ## Extract all entries for BCL2 323 | bcl2 <- doExtractGene(dbcon, SymbolFilter("BCL2")) 324 | 325 | bcl2 326 | ``` 327 | 328 | This works, but is not really efficient, since the function first fetches the 329 | full database table and subsets it only afterwards. A much more efficient 330 | solution is to *translate* the `AnnotationFilter` class(es) to an SQL *where* 331 | condition and hence perform the filtering on the database level. Here we have to 332 | do some small modifications, since not all condition values can be used 1:1 in 333 | SQL calls. The condition `"=="` has for example to be converted into `"="` and 334 | the `"startsWith"` into a SQL `"like"` by adding also a `"%"` wildcard to the 335 | value of the filter. We would also have to deal with filters that have a `value` 336 | of length > 1. A `SymbolFilter` with a `value` being `c("BCL2", "BCL2L11")` 337 | would for example have to be converted to a SQL call `"symbol in 338 | ('BCL2','BCL2L11')"`. Here we skip these special cases and define a simple 339 | function that translates an `AnnotationFilter` to a *where* condition to be 340 | included into the SQL call. Depending on whether the filter extends 341 | `CharacterFilter` or `IntegerFilter` the value has also to be quoted. 342 | 343 | ```{r simpleSQL} 344 | 345 | ## Define a simple function that covers some condition conversion 346 | conditionForSQL <- function(x) { 347 | switch(x, 348 | "==" = "=", 349 | x) 350 | } 351 | 352 | ## Define a function to translate a filter into an SQL where condition. 353 | ## Character values have to be quoted. 354 | where <- function(x) { 355 | if (is(x, "CharacterFilter")) 356 | value <- paste0("'", value(x), "'") 357 | else value <- value(x) 358 | paste0(field(x), conditionForSQL(condition(x)), value) 359 | } 360 | 361 | ## Now "translate" a filter using this function 362 | where(SeqNameFilter("Y")) 363 | 364 | ``` 365 | 366 | Next we implement a new function which integrates the filter into the SQL call 367 | to let the database server take care of the filtering. 368 | 369 | ```{r doExtractGene2} 370 | 371 | ## Define a function that 372 | doExtractGene2 <- function(x, filter) { 373 | if (is(filter, "formula")) 374 | filter <- AnnotationFilter(filter) 375 | query <- paste0("select * from gene_info where ", where(filter)) 376 | dbGetQuery(x, query) 377 | } 378 | 379 | bcl2 <- doExtractGene2(dbcon, ~ symbol == "BCL2") 380 | bcl2 381 | 382 | ``` 383 | 384 | Below we compare the performance of both approaches. 385 | 386 | ```{r performance} 387 | system.time(doExtractGene(dbcon, ~ symbol == "BCL2")) 388 | 389 | system.time(doExtractGene2(dbcon, ~ symbol == "BCL2")) 390 | 391 | ``` 392 | 393 | Not surprisingly, the second approach is much faster. 394 | 395 | Be aware that the examples shown here are only for illustration purposes. In a 396 | real world situation additional factors, like combinations of filters, which 397 | database tables to join, which columns to be returned etc would have to be 398 | considered too. 399 | 400 | # Session information 401 | 402 | ```{r si} 403 | sessionInfo() 404 | ``` 405 | -------------------------------------------------------------------------------- /vignettes/AnnotationFilter.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Facilities for Filtering Bioconductor Annotation Resources" 3 | output: 4 | BiocStyle::html_document: 5 | toc_float: true 6 | vignette: > 7 | %\VignetteIndexEntry{Facilities for Filtering Bioconductor Annotation resources} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | %\VignettePackage{AnnotationFilter} 11 | %\VignetteDepends{org.Hs.eg.db,BiocStyle,RSQLite} 12 | --- 13 | 14 | ```{r style, echo = FALSE, results = 'asis', message=FALSE} 15 | BiocStyle::markdown() 16 | ``` 17 | 18 | **Package**: `r Biocpkg("AnnotationFilter")`
19 | **Authors**: `r packageDescription("AnnotationFilter")[["Author"]] `
20 | **Last modified:** `r file.info("AnnotationFilter.Rmd")$mtime`
21 | **Compiled**: `r date()` 22 | 23 | 24 | # Introduction 25 | 26 | A large variety of annotation resources are available in Bioconductor. Accessing 27 | the full content of these databases or even of single tables is computationally 28 | expensive and in many instances not required, as users may want to extract only 29 | sub-sets of the data e.g. genomic coordinates of a single gene. In that respect, 30 | filtering annotation resources before data extraction has a major impact on 31 | performance and increases the usability of such genome-scale databases. 32 | 33 | The `r Biocpkg("AnnotationFilter")` package was thus developed to provide basic 34 | filter classes to enable a common filtering framework for Bioconductor 35 | annotation resources. `r Biocpkg("AnnotationFilter")` defines filter classes for 36 | some of the most commonly used features in annotation databases, such as 37 | *symbol* or *genename*. Each filter class is supposed to work on a single 38 | database table column and to facilitate filtering on the provided values. Such 39 | filter classes enable the user to build complex queries to retrieve specific 40 | annotations without needing to know column or table names or the layout of the 41 | underlying databases. While initially being developed to be used in the 42 | `r Biocpkg("Organism.dplyr")` and `r Biocpkg("ensembldb")` packages, the filter 43 | classes and the related filtering concept can be easily added to other 44 | annotation packages too. 45 | 46 | 47 | # Filter classes 48 | 49 | All filter classes extend the basic `AnnotationFilter` class and take one or 50 | more *values* and a *condition* to allow filtering on a single database table 51 | column. Based on the type of the input value, filter classes are divided into: 52 | 53 | - `CharacterFilter`: takes a `character` value of length >= 1 and supports 54 | conditions `==`, `!=`, `startsWith` and `endsWith`. An example would be a 55 | `GeneIdFilter` that allows to filter on gene IDs. 56 | 57 | - `IntegerFilter`: takes a single `integer` as input and supports the conditions 58 | `==`, `!=`, `>`, `<`, `>=` and `<=`. An example would be a `GeneStartFilter` 59 | that filters results on the (chromosomal) start coordinates of genes. 60 | 61 | - `DoubleFilter`: takes a single `numeric` as input and supports the conditions 62 | `==`, `!=`, `>`, `<`, `>=` and `<=`. 63 | 64 | - `GRangesFilter`: is a special filter, as it takes a `GRanges` as `value` and 65 | performs the filtering on a combination of columns (i.e. start and end 66 | coordinate as well as sequence name and strand). To be consistent with the 67 | `findOverlaps` method from the `r Biocpkg("IRanges")` package, the constructor 68 | of the `GRangesFilter` filter takes a `type` argument to define its 69 | condition. Supported values are `"any"` (the default) that retrieves all 70 | entries overlapping the `GRanges`, `"start"` and `"end"` matching all features 71 | with the same start and end coordinate respectively, `"within"` that matches 72 | all features that are *within* the range defined by the `GRanges` and 73 | `"equal"` that returns features that are equal to the `GRanges`. 74 | 75 | The names of the filter classes are intuitive, the first part corresponding to 76 | the database column name with each character following a `_` being capitalized, 77 | followed by the key word `Filter`. The name of a filter for a database table 78 | column `gene_id` is thus called `GeneIdFilter`. The default database column for 79 | a filter is stored in its `field` slot (accessible *via* the `field` method). 80 | 81 | The `supportedFilters` method can be used to get an overview of all available 82 | filter objects defined in `AnnotationFilter`. 83 | 84 | ```{r supportedFilters} 85 | library(AnnotationFilter) 86 | supportedFilters() 87 | ``` 88 | 89 | Note that the `AnnotationFilter` package does provides only the filter classes 90 | but not the functionality to apply the filtering. Such functionality is 91 | annotation resource and database layout dependent and needs thus to be 92 | implemented in the packages providing access to annotation resources. 93 | 94 | 95 | # Usage 96 | 97 | Filters are created *via* their dedicated constructor functions, such as the 98 | `GeneIdFilter` function for the `GeneIdFilter` class. Because of this simple and 99 | cheap creation, filter classes are thought to be *read-only* and thus don't 100 | provide *setter* methods to change their slot values. In addition to the 101 | constructor functions, `AnnotationFilter` provides the functionality to 102 | *translate* query expressions into filter classes (see further below for an 103 | example). 104 | 105 | Below we create a `SymbolFilter` that could be used to filter an annotation 106 | resource to retrieve all entries associated with the specified symbol value(s). 107 | 108 | ```{r symbol-filter} 109 | library(AnnotationFilter) 110 | 111 | smbl <- SymbolFilter("BCL2") 112 | smbl 113 | ``` 114 | 115 | Such a filter is supposed to be used to retrieve all entries associated to 116 | features with a value in a database table column called *symbol* matching the 117 | filter's value `"BCL2"`. 118 | 119 | Using the `"startsWith"` condition we could define a filter to retrieve all 120 | entries for genes with a gene name/symbol starting with the specified value 121 | (e.g. `"BCL2"` and `"BCL2L11"` for the example below. 122 | 123 | ```{r symbol-startsWith} 124 | smbl <- SymbolFilter("BCL2", condition = "startsWith") 125 | smbl 126 | ``` 127 | 128 | In addition to the constructor functions, `AnnotationFilter` provides a 129 | functionality to create filter instances in a more natural and intuitive way by 130 | *translating* filter expressions (written as a *formula*, i.e. starting with a 131 | `~`). 132 | 133 | ```{r convert-expression} 134 | smbl <- AnnotationFilter(~ symbol == "BCL2") 135 | smbl 136 | ``` 137 | 138 | Individual `AnnotationFilter` objects can be combined in an 139 | `AnnotationFilterList`. This class extends `list` and provides an additional 140 | `logicOp()` that defines how its individual filters are supposed to be 141 | combined. The length of `logicOp()` has to be 1 less than the number of filter 142 | objects. Each element in `logicOp()` defines how two consecutive filters should 143 | be combined. Below we create a `AnnotationFilterList` containing two filter 144 | objects to be combined with a logical *AND*. 145 | 146 | ```{r convert-multi-expression} 147 | flt <- AnnotationFilter(~ symbol == "BCL2" & 148 | tx_biotype == "protein_coding") 149 | flt 150 | ``` 151 | 152 | Note that the `AnnotationFilter` function does not (yet) support translation of 153 | nested expressions, such as `(symbol == "BCL2L11" & tx_biotype == 154 | "nonsense_mediated_decay") | (symbol == "BCL2" & tx_biotype == 155 | "protein_coding")`. Such queries can however be build by nesting 156 | `AnnotationFilterList` classes. 157 | 158 | ```{r nested-query} 159 | ## Define the filter query for the first pair of filters. 160 | afl1 <- AnnotationFilterList(SymbolFilter("BCL2L11"), 161 | TxBiotypeFilter("nonsense_mediated_decay")) 162 | ## Define the second filter pair in ( brackets should be combined. 163 | afl2 <- AnnotationFilterList(SymbolFilter("BCL2"), 164 | TxBiotypeFilter("protein_coding")) 165 | ## Now combine both with a logical OR 166 | afl <- AnnotationFilterList(afl1, afl2, logicOp = "|") 167 | 168 | afl 169 | ``` 170 | 171 | This `AnnotationFilterList` would now select all entries for all transcripts of 172 | the gene *BCL2L11* with the biotype *nonsense_mediated_decay* or for all protein 173 | coding transcripts of the gene *BCL2*. 174 | 175 | 176 | # Using `AnnotationFilter` in other packages 177 | 178 | The `AnnotationFilter` package does only provide filter classes, but no 179 | filtering functionality. This has to be implemented in the package using the 180 | filters. In this section we first show in a very simple example how 181 | `AnnotationFilter` classes could be used to filter a `data.frame` and 182 | subsequently explore how a simple filter framework could be implemented for a 183 | SQL based annotation resources. 184 | 185 | Let's first define a simple `data.frame` containing the data we want to 186 | filter. Note that subsetting this `data.frame` using `AnnotationFilter` is 187 | obviously not the best solution, but it should help to understand the basic 188 | concept. 189 | 190 | ```{r define-data.frame} 191 | ## Define a simple gene table 192 | gene <- data.frame(gene_id = 1:10, 193 | symbol = c(letters[1:9], "b"), 194 | seq_name = paste0("chr", c(1, 4, 4, 8, 1, 2, 5, 3, "X", 4)), 195 | stringsAsFactors = FALSE) 196 | gene 197 | ``` 198 | 199 | Next we generate a `SymbolFilter` and inspect what information we can extract 200 | from it. 201 | 202 | ```{r simple-symbol} 203 | smbl <- SymbolFilter("b") 204 | ``` 205 | 206 | We can access the filter *condition* using the `condition` method 207 | 208 | ```{r simple-symbol-condition} 209 | condition(smbl) 210 | ``` 211 | 212 | The value of the filter using the `value` method 213 | 214 | ```{r simple-symbol-value} 215 | value(smbl) 216 | ``` 217 | 218 | And finally the *field* (i.e. column in the data table) using the `field` 219 | method. 220 | 221 | ```{r simple-symbol-field} 222 | field(smbl) 223 | ``` 224 | 225 | With this information we can define a simple function that takes the data table 226 | and the filter as input and returns a `logical` with length equal to the number 227 | of rows of the table, `TRUE` for rows matching the filter. 228 | 229 | ```{r doMatch} 230 | 231 | doMatch <- function(x, filter) { 232 | do.call(condition(filter), list(x[, field(filter)], value(filter))) 233 | } 234 | 235 | ## Apply this function 236 | doMatch(gene, smbl) 237 | 238 | ``` 239 | 240 | Note that this simple function does not support multiple filters and also not 241 | conditions `"startsWith"` or `"endsWith"`. Next we define a second function that 242 | extracts the relevant data from the data resource. 243 | 244 | ```{r doExtract} 245 | 246 | doExtract <- function(x, filter) { 247 | x[doMatch(x, filter), ] 248 | } 249 | 250 | ## Apply it on the data 251 | doExtract(gene, smbl) 252 | ``` 253 | 254 | We could even modify the `doMatch` function to enable filter expressions. 255 | 256 | ```{r doMatch-formula} 257 | 258 | doMatch <- function(x, filter) { 259 | if (is(filter, "formula")) 260 | filter <- AnnotationFilter(filter) 261 | do.call(condition(filter), list(x[, field(filter)], value(filter))) 262 | } 263 | 264 | doExtract(gene, ~ gene_id == '2') 265 | 266 | ``` 267 | 268 | For such simple examples `AnnotationFilter` might be an overkill as the same 269 | could be achieved (much simpler) using standard R operations. A real case 270 | scenario in which `AnnotationFilter` becomes useful are SQL-based annotation 271 | resources. We will thus explore next how SQL resources could be filtered using 272 | `AnnotationFilter`. 273 | 274 | We use the SQLite database from the `r Biocpkg("org.Hs.eg.db")` package that 275 | provides a variety of annotations for all human genes. Using the packages' 276 | connection to the database we inspect first what database tables are available 277 | and then select one for our simple filtering example. 278 | 279 | We use an `EnsDb` SQLite database used by the `r Biocpkg("ensembldb")` package 280 | and implement simple filter functions to extract specific data from one of its 281 | database tables. We thus load below the `EnsDb.Hsapiens.v75` package that 282 | provides access to human gene, transcript, exon and protein annotations. Using 283 | its connection to the database we inspect first what database tables are 284 | available and then what *fields* (i.e. columns) the *gene* table has. 285 | 286 | ```{r orgDb, message = FALSE} 287 | ## Load the required packages 288 | library(org.Hs.eg.db) 289 | library(RSQLite) 290 | ## Get the database connection 291 | dbcon <- org.Hs.eg_dbconn() 292 | 293 | ## What tables do we have? 294 | dbListTables(dbcon) 295 | ``` 296 | 297 | `org.Hs.eg.db` provides many different tables, one for each identifier or 298 | annotation resource. We will use the *gene_info* table and determine which 299 | *fields* (i.e. columns) the table provides. 300 | 301 | ```{r gene_info} 302 | ## What fields are there in the gene_info table? 303 | dbListFields(dbcon, "gene_info") 304 | ``` 305 | 306 | The *gene_info* table provides the official gene symbol and the gene name. The 307 | column *symbol* matches the default `field` value of the `SymbolFilter` as does 308 | the column *gene_name* for the *GeneNameFilter*. If the column in the database 309 | would not match the field of an `AnnotationFilter`, we would have to implement a 310 | function that maps the default field of the filter object to the database 311 | column. See the end of the section for an example. 312 | 313 | We next implement a simple `doExtractGene` function that retrieves data from the 314 | *gene_info* table and re-uses the `doFilter` function to extract specific 315 | data. The parameter `x` is now the database connection object. 316 | 317 | ```{r doExtractSQL} 318 | 319 | doExtractGene <- function(x, filter) { 320 | gene <- dbGetQuery(x, "select * from gene_info") 321 | doExtract(gene, filter) 322 | } 323 | 324 | ## Extract all entries for BCL2 325 | bcl2 <- doExtractGene(dbcon, SymbolFilter("BCL2")) 326 | 327 | bcl2 328 | ``` 329 | 330 | This works, but is not really efficient, since the function first fetches the 331 | full database table and subsets it only afterwards. A much more efficient 332 | solution is to *translate* the `AnnotationFilter` class(es) to an SQL *where* 333 | condition and hence perform the filtering on the database level. Here we have to 334 | do some small modifications, since not all condition values can be used 1:1 in 335 | SQL calls. The condition `"=="` has for example to be converted into `"="` and 336 | the `"startsWith"` into a SQL `"like"` by adding also a `"%"` wildcard to the 337 | value of the filter. We would also have to deal with filters that have a `value` 338 | of length > 1. A `SymbolFilter` with a `value` being `c("BCL2", "BCL2L11")` 339 | would for example have to be converted to a SQL call `"symbol in 340 | ('BCL2','BCL2L11')"`. Here we skip these special cases and define a simple 341 | function that translates an `AnnotationFilter` to a *where* condition to be 342 | included into the SQL call. Depending on whether the filter extends 343 | `CharacterFilter` or `IntegerFilter` the value has also to be quoted. 344 | 345 | ```{r simpleSQL} 346 | 347 | ## Define a simple function that covers some condition conversion 348 | conditionForSQL <- function(x) { 349 | switch(x, 350 | "==" = "=", 351 | x) 352 | } 353 | 354 | ## Define a function to translate a filter into an SQL where condition. 355 | ## Character values have to be quoted. 356 | where <- function(x) { 357 | if (is(x, "CharacterFilter")) 358 | value <- paste0("'", value(x), "'") 359 | else value <- value(x) 360 | paste0(field(x), conditionForSQL(condition(x)), value) 361 | } 362 | 363 | ## Now "translate" a filter using this function 364 | where(SeqNameFilter("Y")) 365 | 366 | ``` 367 | 368 | Next we implement a new function which integrates the filter into the SQL call 369 | to let the database server take care of the filtering. 370 | 371 | ```{r doExtractGene2} 372 | 373 | ## Define a function that 374 | doExtractGene2 <- function(x, filter) { 375 | if (is(filter, "formula")) 376 | filter <- AnnotationFilter(filter) 377 | query <- paste0("select * from gene_info where ", where(filter)) 378 | dbGetQuery(x, query) 379 | } 380 | 381 | bcl2 <- doExtractGene2(dbcon, ~ symbol == "BCL2") 382 | bcl2 383 | 384 | ``` 385 | 386 | Below we compare the performance of both approaches. 387 | 388 | ```{r performance} 389 | system.time(doExtractGene(dbcon, ~ symbol == "BCL2")) 390 | 391 | system.time(doExtractGene2(dbcon, ~ symbol == "BCL2")) 392 | 393 | ``` 394 | 395 | Not surprisingly, the second approach is much faster. 396 | 397 | Be aware that the examples shown here are only for illustration purposes. In a 398 | real world situation additional factors, like combinations of filters, which 399 | database tables to join, which columns to be returned etc would have to be 400 | considered too. 401 | 402 | What if the database column on which we want to filter does not match the 403 | `field` of an `AnnotatioFilter`? If for example the database column is named 404 | *hgnc_symbol* instead of *symbol* we could for example package-internally 405 | overwrite the default `field` method for `SymbolFilter` to return the correct 406 | field for the database column. 407 | 408 | ```{r symbol-overwrite} 409 | ## Default method from AnnotationFilter: 410 | field(SymbolFilter("a")) 411 | 412 | ## Overwrite the default method. 413 | setMethod("field", "SymbolFilter", function(object, ...) "hgnc_symbol") 414 | 415 | ## Call to field returns now the "correct" database column 416 | field(SymbolFilter("a")) 417 | 418 | ``` 419 | 420 | 421 | # Session information 422 | 423 | ```{r si} 424 | sessionInfo() 425 | ``` 426 | -------------------------------------------------------------------------------- /R/AnnotationFilter.R: -------------------------------------------------------------------------------- 1 | #' @name AnnotationFilter 2 | #' 3 | #' @title Filters for annotation objects 4 | #' 5 | #' @aliases CdsStartFilter CdsEndFilter ExonIdFilter ExonNameFilter 6 | #' ExonStartFilter ExonEndFilter ExonRankFilter GeneIdFilter 7 | #' GeneNameFilter GeneBiotypeFilter GeneStartFilter GeneEndFilter 8 | #' EntrezFilter SymbolFilter TxIdFilter TxNameFilter 9 | #' TxBiotypeFilter TxStartFilter TxEndFilter ProteinIdFilter 10 | #' UniprotFilter SeqNameFilter SeqStrandFilter 11 | #' AnnotationFilter-class CharacterFilter-class DoubleFilter-class 12 | #' IntegerFilter-class CdsStartFilter-class CdsEndFilter-class 13 | #' ExonIdFilter-class ExonNameFilter-class ExonStartFilter-class 14 | #' ExonEndFilter-class ExonRankFilter-class GeneIdFilter-class 15 | #' GeneNameFilter-class GeneBiotypeFilter-class 16 | #' GeneStartFilter-class GeneEndFilter-class EntrezFilter-class 17 | #' SymbolFilter-class TxIdFilter-class TxNameFilter-class 18 | #' TxBiotypeFilter-class TxStartFilter-class TxEndFilter-class 19 | #' ProteinIdFilter-class UniprotFilter-class SeqNameFilter-class 20 | #' SeqStrandFilter-class supportedFilters 21 | #' show,AnnotationFilter-method show,CharacterFilter-method 22 | #' show,IntegerFilter-method show,GRangesFilter-method 23 | #' show,DoubleFilter-method 24 | #' 25 | #' @description 26 | #' 27 | #' The filters extending the base \code{AnnotationFilter} class 28 | #' represent a simple filtering concept for annotation resources. 29 | #' Each filter object is thought to filter on a single (database) 30 | #' table column using the provided values and the defined condition. 31 | #' 32 | #' Filter instances created using the constructor functions (e.g. 33 | #' \code{GeneIdFilter}). 34 | #' 35 | #' \code{supportedFilters()} lists all defined filters. It returns a two column 36 | #' \code{data.frame} with the filter class name and its default field. 37 | #' Packages using \code{AnnotationFilter} should implement the 38 | #' \code{supportedFilters} for their annotation resource object (e.g. for 39 | #' \code{object = "EnsDb"} in the \code{ensembldb} package) to list all 40 | #' supported filters for the specific resource. 41 | #' 42 | #' @details 43 | #' 44 | #' By default filters are only available for tables containing the 45 | #' field on which the filter acts (i.e. that contain a column with the 46 | #' name matching the value of the \code{field} slot of the 47 | #' object). See the vignette for a description to use filters for 48 | #' databases in which the database table column name differs from the 49 | #' default \code{field} of the filter. 50 | #' 51 | #' @usage 52 | #' 53 | #' CdsStartFilter(value, condition = "==", not = FALSE) 54 | #' CdsEndFilter(value, condition = "==", not = FALSE) 55 | #' ExonIdFilter(value, condition = "==", not = FALSE) 56 | #' ExonNameFilter(value, condition = "==", not = FALSE) 57 | #' ExonRankFilter(value, condition = "==", not = FALSE) 58 | #' ExonStartFilter(value, condition = "==", not = FALSE) 59 | #' ExonEndFilter(value, condition = "==", not = FALSE) 60 | #' GeneIdFilter(value, condition = "==", not = FALSE) 61 | #' GeneNameFilter(value, condition = "==", not = FALSE) 62 | #' GeneBiotypeFilter(value, condition = "==", not = FALSE) 63 | #' GeneStartFilter(value, condition = "==", not = FALSE) 64 | #' GeneEndFilter(value, condition = "==", not = FALSE) 65 | #' EntrezFilter(value, condition = "==", not = FALSE) 66 | #' SymbolFilter(value, condition = "==", not = FALSE) 67 | #' TxIdFilter(value, condition = "==", not = FALSE) 68 | #' TxNameFilter(value, condition = "==", not = FALSE) 69 | #' TxBiotypeFilter(value, condition = "==", not = FALSE) 70 | #' TxStartFilter(value, condition = "==", not = FALSE) 71 | #' TxEndFilter(value, condition = "==", not = FALSE) 72 | #' ProteinIdFilter(value, condition = "==", not = FALSE) 73 | #' UniprotFilter(value, condition = "==", not = FALSE) 74 | #' SeqNameFilter(value, condition = "==", not = FALSE) 75 | #' SeqStrandFilter(value, condition = "==", not = FALSE) 76 | #' 77 | #' @param value \code{character()}, \code{integer()}, or 78 | #' \code{GRanges()} value for the filter 79 | #' 80 | #' @param condition \code{character(1)} defining the condition to be 81 | #' used in the filter. For \code{IntegerFilter} or \code{DoubleFilter}, 82 | #' one of \code{"=="}, \code{"!="}, \code{">"}, \code{"<"}, \code{">="} 83 | #' or \code{"<="}. For \code{CharacterFilter}, one of \code{"=="}, 84 | #' \code{"!="}, \code{"startsWith"}, \code{"endsWith"} or \code{"contains"}. 85 | #' Default condition is \code{"=="}. 86 | #' 87 | #' @param not \code{logical(1)} whether the \code{AnnotationFilter} is negated. 88 | #' \code{TRUE} indicates is negated (!). \code{FALSE} indicates not 89 | #' negated. Default not is \code{FALSE}. 90 | #' 91 | #' @return The constructor function return an object extending 92 | #' \code{AnnotationFilter}. For the return value of the other methods see 93 | #' the methods' descriptions. 94 | #' 95 | #' @seealso \code{\link{AnnotationFilterList}} for combining 96 | #' \code{AnnotationFilter} objects. 97 | NULL 98 | 99 | .CONDITION <- list( 100 | IntegerFilter = c("==", "!=", ">", "<", ">=", "<="), 101 | DoubleFilter = c("==", "!=", ">", "<", ">=", "<="), 102 | CharacterFilter = c("==", "!=", "startsWith", "endsWith", "contains"), 103 | GRangesFilter = c("any", "start", "end", "within", "equal") 104 | ) 105 | 106 | .FIELD <- list( 107 | CharacterFilter = c( 108 | "exon_id", "exon_name", "gene_id", "gene_name", "gene_biotype", 109 | "entrez", "symbol", "tx_id", "tx_name", "tx_biotype", 110 | "protein_id", "uniprot", "seq_name", "seq_strand"), 111 | IntegerFilter = c( 112 | "cds_start", "cds_end", "exon_start", "exon_rank", "exon_end", 113 | "gene_start", "gene_end", "tx_start", "tx_end") 114 | ) 115 | 116 | .valid_condition <- function(condition, class) { 117 | txt <- character() 118 | 119 | test0 <- length(condition) == 1L 120 | if (!test0) 121 | txt <- c(txt, "'condition' must be length 1") 122 | 123 | test1 <- test0 && (condition %in% .CONDITION[[class]]) 124 | if (!test1) { 125 | value <- paste(sQuote(.CONDITION[[class]]), collapse=" ") 126 | txt <- c(txt, paste0("'", condition, "' must be in ", value)) 127 | } 128 | 129 | if (length(txt)) txt else TRUE 130 | } 131 | 132 | ############################################################ 133 | ## AnnotationFilter 134 | ## 135 | 136 | #' @exportClass AnnotationFilter 137 | .AnnotationFilter <- setClass( 138 | "AnnotationFilter", 139 | contains = "VIRTUAL", 140 | slots = c( 141 | field="character", 142 | condition="character", 143 | value="ANY", 144 | not="logical" 145 | ), 146 | prototype=list( 147 | condition= "==", 148 | not= FALSE 149 | ) 150 | ) 151 | 152 | setValidity("AnnotationFilter", function(object) { 153 | txt <- character() 154 | 155 | value <- .value(object) 156 | condition <- .condition(object) 157 | not <- .not(object) 158 | test_len <- length(condition) == 1L 159 | test_NA <- !any(is.na(condition)) 160 | 161 | if (test_len && !test_NA) 162 | txt <- c(txt, "'condition' can not be NA") 163 | test0 <- test_len && test_NA 164 | 165 | test1 <- condition %in% c("startsWith", "endsWith", "contains", ">", 166 | "<", ">=", "<=") 167 | if (test0 && test1 && length(value) > 1L) 168 | txt <- c(txt, paste0("'", condition, "' requires length 1 'value'")) 169 | 170 | if(length(not) != 1) 171 | txt <- c(txt, '"not" value must be of length 1.') 172 | 173 | if (any(is.na(value))) 174 | txt <- c(txt, "'value' can not be NA") 175 | 176 | if (length(txt)) txt else TRUE 177 | }) 178 | 179 | .field <- function(object) object@field 180 | 181 | .condition <- function(object) object@condition 182 | 183 | .value <- function(object) object@value 184 | 185 | .not <- function(object) object@not 186 | 187 | #' @rdname AnnotationFilter 188 | #' 189 | #' @aliases condition 190 | #' 191 | #' @description \code{condition()} get the \code{condition} value for 192 | #' the filter \code{object}. 193 | #' 194 | #' @param object An \code{AnnotationFilter} object. 195 | #' 196 | #' @export 197 | setMethod("condition", "AnnotationFilter", .condition) 198 | 199 | #' @rdname AnnotationFilter 200 | #' 201 | #' @aliases value 202 | #' 203 | #' @description \code{value()} get the \code{value} for the filter 204 | #' \code{object}. 205 | #' 206 | #' @export 207 | setMethod("value", "AnnotationFilter", .value) 208 | 209 | #' @rdname AnnotationFilter 210 | #' 211 | #' @aliases field 212 | #' 213 | #' @description \code{field()} get the \code{field} for the filter 214 | #' \code{object}. 215 | #' 216 | #' @export 217 | setMethod("field", "AnnotationFilter", .field) 218 | 219 | #' @rdname AnnotationFilter 220 | #' 221 | #' @description \code{not()} get the \code{not} for the filter \code{object}. 222 | #' 223 | #' @export 224 | setMethod("not", "AnnotationFilter", .not) 225 | 226 | #' @importFrom methods show 227 | #' 228 | #' @export 229 | setMethod("show", "AnnotationFilter", function(object){ 230 | if(.not(object)) cat("NOT\n") 231 | cat("class:", class(object), 232 | "\ncondition:", .condition(object), "\n") 233 | }) 234 | 235 | ############################################################ 236 | ## CharacterFilter, IntegerFilter 237 | ## 238 | 239 | #' @exportClass CharacterFilter 240 | .CharacterFilter <- setClass( 241 | "CharacterFilter", 242 | contains = c("VIRTUAL", "AnnotationFilter"), 243 | slots = c(value = "character"), 244 | prototype = list( 245 | value = character() 246 | ) 247 | ) 248 | 249 | setValidity("CharacterFilter", function(object) { 250 | .valid_condition(.condition(object), "CharacterFilter") 251 | }) 252 | 253 | #' @importFrom methods show callNextMethod 254 | #' 255 | #' @export 256 | setMethod("show", "CharacterFilter", function(object) { 257 | callNextMethod() 258 | cat("value:", .value(object), "\n") 259 | }) 260 | 261 | #' @exportClass IntegerFilter 262 | .IntegerFilter <- setClass( 263 | "IntegerFilter", 264 | contains = c("VIRTUAL", "AnnotationFilter"), 265 | slots = c(value = "integer"), 266 | prototype = list( 267 | value = integer() 268 | ) 269 | ) 270 | 271 | setValidity("IntegerFilter", function(object) { 272 | .valid_condition(.condition(object), "IntegerFilter") 273 | }) 274 | 275 | #' @export 276 | setMethod("show", "IntegerFilter", function(object) { 277 | callNextMethod() 278 | cat("value:", .value(object), "\n") 279 | }) 280 | 281 | #' @exportClass DoubleFilter 282 | .DoubleFilter <- setClass( 283 | "DoubleFilter", 284 | contains = c("VIRTUAL", "AnnotationFilter"), 285 | slots = c(value = "numeric"), 286 | prototype = list( 287 | value = double() 288 | ) 289 | ) 290 | 291 | setValidity("DoubleFilter", function(object) { 292 | .valid_condition(.condition(object), "DoubleFilter") 293 | }) 294 | 295 | #' @export 296 | setMethod("show", "DoubleFilter", function(object) { 297 | callNextMethod() 298 | cat("value:", .value(object), "\n") 299 | }) 300 | 301 | #' @rdname AnnotationFilter 302 | #' 303 | #' @importFrom GenomicRanges GRanges 304 | #' 305 | #' @importClassesFrom GenomicRanges GRanges 306 | #' 307 | #' @exportClass GRangesFilter 308 | .GRangesFilter <- setClass( 309 | "GRangesFilter", 310 | contains = "AnnotationFilter", 311 | slots = c( 312 | value = "GRanges", 313 | feature = "character" 314 | ), 315 | prototype = list( 316 | value = GRanges(), 317 | condition = "any", 318 | field = "granges", 319 | feature = "gene" 320 | ) 321 | ) 322 | 323 | setValidity("GRangesFilter", function(object) { 324 | .valid_condition(.condition(object), "GRangesFilter") 325 | }) 326 | 327 | .feature <- function(object) object@feature 328 | 329 | #' @rdname AnnotationFilter 330 | #' 331 | #' @param type \code{character(1)} indicating how overlaps are to be 332 | #' filtered. See \code{findOverlaps} in the IRanges package for a 333 | #' description of this argument. 334 | #' 335 | #' @examples 336 | #' ## filter by GRanges 337 | #' GRangesFilter(GenomicRanges::GRanges("chr10:87869000-87876000")) 338 | #' @export 339 | GRangesFilter <- 340 | function(value, feature = "gene", 341 | type = c("any", "start", "end", "within", "equal")) 342 | { 343 | condition <- match.arg(type) 344 | .GRangesFilter( 345 | field = "granges", 346 | value = value, 347 | condition = condition, 348 | feature = feature) 349 | } 350 | 351 | .feature <- function(object) object@feature 352 | 353 | #' @aliases feature 354 | #' 355 | #' @description \code{feature()} get the \code{feature} for the 356 | #' \code{GRangesFilter} \code{object}. 357 | #' 358 | #' @rdname AnnotationFilter 359 | #' 360 | #' @export 361 | feature <- .feature 362 | 363 | #' @importFrom GenomicRanges show 364 | #' 365 | #' @export 366 | setMethod("show", "GRangesFilter", function(object) { 367 | callNextMethod() 368 | cat("feature:", .feature(object), 369 | "\nvalue:\n") 370 | show(value(object)) 371 | }) 372 | 373 | 374 | ############################################################ 375 | ## Create install-time classes 376 | ## 377 | 378 | #' @rdname AnnotationFilter 379 | #' 380 | #' @name AnnotationFilter 381 | #' 382 | #' @param feature \code{character(1)} defining on what feature the 383 | #' \code{GRangesFilter} should be applied. Choices could be 384 | #' \code{"gene"}, \code{"tx"} or \code{"exon"}. 385 | #' 386 | #' @examples 387 | #' ## Create a SymbolFilter to filter on a gene's symbol. 388 | #' sf <- SymbolFilter("BCL2") 389 | #' sf 390 | #' 391 | #' ## Create a GeneStartFilter to filter based on the genes' chromosomal start 392 | #' ## coordinates 393 | #' gsf <- GeneStartFilter(10000, condition = ">") 394 | #' gsf 395 | #' 396 | #' @export CdsStartFilter CdsEndFilter ExonIdFilter ExonNameFilter 397 | #' @export ExonStartFilter ExonEndFilter ExonRankFilter GeneIdFilter 398 | #' @export GeneNameFilter GeneBiotypeFilter GeneStartFilter 399 | #' @export GeneEndFilter EntrezFilter SymbolFilter TxIdFilter 400 | #' @export TxNameFilter TxBiotypeFilter TxStartFilter TxEndFilter 401 | #' @export ProteinIdFilter UniprotFilter SeqNameFilter SeqStrandFilter 402 | #' 403 | #' @importFrom methods new 404 | #' 405 | #' @exportClass CdsStartFilter CdsEndFilter ExonIdFilter 406 | #' ExonNameFilter ExonStartFilter ExonEndFilter ExonRankFilter 407 | #' GeneIdFilter GeneNameFilter GeneBiotypeFilter GeneStartFilter 408 | #' GeneEndFilter EntrezFilter SymbolFilter TxIdFilter TxNameFilter 409 | #' TxBiotypeFilter TxStartFilter TxEndFilter ProteinIdFilter 410 | #' UniprotFilter SeqNameFilter SeqStrandFilter 411 | NULL 412 | 413 | .fieldToClass <- function(field) { 414 | class <- gsub("_([[:alpha:]])", "\\U\\1", field, perl=TRUE) 415 | class <- sub("^([[:alpha:]])", "\\U\\1", class, perl=TRUE) 416 | paste0(class, if (length(class)) "Filter" else character(0)) 417 | } 418 | 419 | .filterFactory <- function(field, class) { 420 | force(field); force(class) # watch for lazy evaluation 421 | as.value <- 422 | if (field %in% .FIELD[["CharacterFilter"]]) { 423 | function(x) { 424 | # if(!is.character(x)) 425 | # stop("Input to a ", field, 426 | # "filter must be a character vector.") 427 | as.character(x) 428 | } 429 | } else { 430 | function(x) { 431 | if(!is.numeric(x)) 432 | stop("Input to a ", field, 433 | "filter must be a numeric vector.") 434 | as.integer(x) 435 | } 436 | } 437 | 438 | function(value, condition = "==", not = FALSE) { 439 | value <- as.value(value) 440 | condition <- as.character(condition) 441 | not <- as.logical(not) 442 | new(class, field=field, condition = condition, value=value, not=not) 443 | } 444 | } 445 | 446 | local({ 447 | makeClass <- function(contains) { 448 | fields <- .FIELD[[contains]] 449 | classes <- .fieldToClass(fields) 450 | for (i in seq_along(fields)) { 451 | setClass(classes[[i]], contains=contains, where=topenv()) 452 | assign( 453 | classes[[i]], 454 | .filterFactory(fields[[i]], classes[[i]]), 455 | envir=topenv() 456 | ) 457 | } 458 | } 459 | for (contains in names(.FIELD)) 460 | makeClass(contains) 461 | }) 462 | 463 | ############################################################ 464 | ## Utilities 465 | ## 466 | 467 | .convertFilter <- function(object) { 468 | field <- field(object) 469 | if (field == "granges") 470 | stop("GRangesFilter cannot be converted using convertFilter().") 471 | value <- value(object) 472 | condition <- condition(object) 473 | not <- not(object) 474 | 475 | op <- switch( 476 | condition, 477 | "==" = if (length(value) == 1) "==" else "%in%", 478 | "!=" = if (length(value) == 1) "!=" else "%in%", 479 | "startsWith" = "%like%", 480 | "endsWith" = "%like%", 481 | "contains" = "%like%" 482 | ) 483 | 484 | not_val <- ifelse(not, '!', '') 485 | 486 | if (condition %in% c("==", "!=")) 487 | value <- paste0("'", value, "'", collapse=", ") 488 | 489 | if (!is.null(op) && op %in% c("==", "!=")) 490 | sprintf("%s%s %s %s", not_val, field, op, value) 491 | else if ((condition == "==") && op == "%in%") 492 | sprintf("%s%s %s c(%s)", not_val, field, op, value) 493 | else if ((condition == "!=") && op == "%in%") 494 | if(not) sprintf("%s %s c(%s)", field, op, value) 495 | else sprintf("!%s%s %s c(%s)", not_val, field, op, value) 496 | else if (condition == "startsWith") 497 | sprintf("%s%s %s '%s%%'", not_val, field, op, value) 498 | else if (condition == "endsWith") 499 | sprintf("%s%s %s '%%%s'", not_val, field, op, value) 500 | else if (condition == "contains") 501 | sprintf("%s%s %s '%s'", not_val, field, op, value) 502 | else if (condition %in% c(">", "<", ">=", "<=")) { 503 | sprintf("%s%s %s %s", not_val, field, condition, as.integer(value)) 504 | } 505 | } 506 | 507 | #' @rdname AnnotationFilter 508 | #' 509 | #' @description Converts an \code{AnnotationFilter} object to a 510 | #' \code{character(1)} giving an equation that can be used as input to 511 | #' a \code{dplyr} filter. 512 | #' 513 | #' @return \code{character(1)} that can be used as input to a \code{dplyr} 514 | #' filter. 515 | #' 516 | #' @examples 517 | #' filter <- SymbolFilter("ADA", "==") 518 | #' result <- convertFilter(filter) 519 | #' result 520 | #' @export 521 | setMethod("convertFilter", signature(object = "AnnotationFilter", 522 | db = "missing"), .convertFilter) 523 | 524 | .FILTERS_WO_FIELD <- c("GRangesFilter") 525 | 526 | .supportedFilters <- function() { 527 | fields <- unlist(.FIELD, use.names=FALSE) 528 | filters <- .fieldToClass(fields) 529 | d <- data.frame( 530 | filter=c(filters, .FILTERS_WO_FIELD), 531 | field=c(fields, "granges") #rep(NA, length(.FILTERS_WO_FIELD))) 532 | ) 533 | d[order(d$filter),] 534 | } 535 | 536 | #' @rdname AnnotationFilter 537 | #' 538 | #' @examples 539 | #' supportedFilters() 540 | #' @export 541 | setMethod("supportedFilters", "missing", function(object) { 542 | .supportedFilters() 543 | }) 544 | 545 | #' @rdname GenenameFilter 546 | #' 547 | #' @title DEPRECATED Gene name filter 548 | #' 549 | #' @aliases GenenameFilter-class 550 | #' 551 | #' @description 552 | #' 553 | #' The `GenenameFilter` class and functions are deprecated. Please use the 554 | #' [GeneNameFilter()] instead. 555 | #' 556 | #' @param value `character()` value for the filter 557 | #' 558 | #' @param condition `character(1)` defining the condition to be 559 | #' used in the filter. One of `"=="`, `"!="`, `"startsWith"`, `"endsWith"` 560 | #' or `"contains"`. Default condition is `"=="`. 561 | #' 562 | #' @param not `logical(1)` whether the `AnnotationFilter` is negated. 563 | #' `TRUE` indicates is negated (!). `FALSE` indicates not 564 | #' negated. Default not is `FALSE`. 565 | #' 566 | #' @return The constructor function return a `GenenameFilter`. 567 | #' 568 | #' @md 569 | #' 570 | #' @export 571 | #' 572 | #' @exportClass GenenameFilter 573 | GenenameFilter <- function(value, condition = "==", not = FALSE) { 574 | .Deprecated("GeneNameFilter") 575 | new("GenenameFilter", value = value, condition = condition, not = not) 576 | } 577 | 578 | .GenenameFilter <- setClass( 579 | "GenenameFilter", 580 | contains = "CharacterFilter", 581 | prototype = list( 582 | field = "genename" 583 | ) 584 | ) 585 | --------------------------------------------------------------------------------