├── .svn
    ├── format
    ├── wc.db-journal
    ├── entries
    ├── wc.db
    └── pristine
    │   ├── 18
    │       └── 18d19e0f6727f50e5ab77d34454fab81e82d049a.svn-base
    │   ├── 21
    │       └── 2189e6a627b4c711e766c184f50bb9cdc230e821.svn-base
    │   ├── 33
    │       └── 331922c1408ddd46de74575d157bb2d47ccfa3e9.svn-base
    │   ├── 40
    │       └── 40904d832591d5491cf5d562b1708a3f20d517f6.svn-base
    │   ├── 50
    │       └── 50170fd723a599624e474c619511a6ac10f2d072.svn-base
    │   ├── 65
    │       ├── 65e9e8d47fee95ed321413de21d8f8d13f873b17.svn-base
    │       └── 6537ab78a07d200e9fe894802b6b5f17bdd38fd9.svn-base
    │   ├── 73
    │       └── 73eb11efc6635fdbac1cdc8ec4032d0dd1a2cbe6.svn-base
    │   ├── 81
    │       └── 81fa7ab77bea17ab59d67e253f098ea223b2e1d3.svn-base
    │   ├── 91
    │       └── 9145858a39a39a3d59b0aa20ef1971ab302d2f47.svn-base
    │   ├── ff
    │       └── ff35c68c1ccb291931f7f7bc302993d96557fe78.svn-base
    │   ├── fc
    │       └── fc49a2303f1b357ae6a059042d8d0c15fabb68fe.svn-base
    │   ├── e0
    │       └── e07c222eeebfc149377f972722b7aa62b4fc86cc.svn-base
    │   ├── f1
    │       └── f1748b5922e3eb9abfa76932622f609be9ff4d0c.svn-base
    │   ├── 06
    │       └── 065e68d6b11e4bf90da04ffd904757e8ce3c422b.svn-base
    │   ├── a1
    │       └── a129614aff000a6de02c214a739f8867a6f01752.svn-base
    │   └── 4e
    │       └── 4e9ec76b932b7ba44f5280dec6263ea963e53920.svn-base
├── .gitignore
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test_AnnotationFilterList.R
    │   ├── test_translate-utils.R
    │   └── test_AnnotationFilter.R
├── NOTES.md
├── README.md
├── NEWS
├── R
    ├── AllGenerics.R
    ├── translate-utils.R
    ├── AnnotationFilterList.R
    └── AnnotationFilter.R
├── man
    ├── GenenameFilter.Rd
    ├── AnnotationFilterList.Rd
    └── AnnotationFilter.Rd
├── DESCRIPTION
├── NAMESPACE
└── vignettes
    └── AnnotationFilter.Rmd


/.svn/format:
--------------------------------------------------------------------------------
1 | 12
2 | 


--------------------------------------------------------------------------------
/.svn/wc.db-journal:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.svn/entries:
--------------------------------------------------------------------------------
1 | 12
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rhistory
2 | .RData
3 | .svn*
4 | 


--------------------------------------------------------------------------------
/.svn/wc.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/AnnotationFilter/devel/.svn/wc.db


--------------------------------------------------------------------------------
/.svn/pristine/ff/ff35c68c1ccb291931f7f7bc302993d96557fe78.svn-base:
--------------------------------------------------------------------------------
1 | .Rhistory
2 | .RData
3 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(AnnotationFilter)
3 | 
4 | test_check("AnnotationFilter")
5 | 


--------------------------------------------------------------------------------
/.svn/pristine/73/73eb11efc6635fdbac1cdc8ec4032d0dd1a2cbe6.svn-base:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(AnnotationFilter)
3 | 
4 | test_check("AnnotationFilter")
5 | 


--------------------------------------------------------------------------------
/NOTES.md:
--------------------------------------------------------------------------------
1 | # Development guidelines
2 | 
3 | - roxygen2 documentation
4 | - testthat unit tests
5 | - file name correspondence between code `R/foo.R`, tests
6 |   `tests/testthat/test_foo.R`, and documentation `man/foo.Rd`.
7 | - version bump on master commit
8 | - commits to master pass R CMD build && R CMD check
9 | 


--------------------------------------------------------------------------------
/.svn/pristine/50/50170fd723a599624e474c619511a6ac10f2d072.svn-base:
--------------------------------------------------------------------------------
1 | # Development guidelines
2 | 
3 | - roxygen2 documentation
4 | - testthat unit tests
5 | - file name correspondence between code `R/foo.R`, tests
6 |   `tests/testthat/test_foo.R`, and documentation `man/foo.Rd`.
7 | - version bump on master commit
8 | - commits to master pass R CMD build && R CMD check
9 | 


--------------------------------------------------------------------------------
/.svn/pristine/fc/fc49a2303f1b357ae6a059042d8d0c15fabb68fe.svn-base:
--------------------------------------------------------------------------------
 1 | CHANGES IN VERSION 1.1.2
 2 | ------------------------
 3 | 
 4 | NEW FEATURES
 5 | 
 6 |     o supportFilters returns a data.frame with filter class name and field.
 7 | 
 8 | 
 9 | CHANGES IN VERSION 0.99.5
10 | --------------------------
11 | 
12 | NEW FEATURES
13 | 
14 |     o Add convertFilterExpressionQuoted function.
15 |     o Add field method.
16 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [<img src="https://www.bioconductor.org/images/logo/jpg/bioconductor_logo_rgb.jpg" width="200" align="right"/>](https://bioconductor.org/)
2 | 
3 | **AnnotationFilter** is an R/Bioconductor package that provides facilities for filtering Bioconductor annotation resources.
4 | 
5 | See https://bioconductor.org/packages/AnnotationFilter for more information including how to install the release version of the package (please refrain from installing directly from GitHub).
6 | 
7 | 


--------------------------------------------------------------------------------
/.svn/pristine/91/9145858a39a39a3d59b0aa20ef1971ab302d2f47.svn-base:
--------------------------------------------------------------------------------
 1 | ## Generic methods.
 2 | setGeneric("condition", function(object, ...) standardGeneric("condition"))
 3 | 
 4 | setGeneric("field", function(object, ...) standardGeneric("field"))
 5 | 
 6 | setGeneric("value", function(object, ...) standardGeneric("value"))
 7 | 
 8 | setGeneric("logicOp", function(object, ...) standardGeneric("logicOp"))
 9 | 
10 | setGeneric("supportedFilters", function(object, ...)
11 |     standardGeneric("supportedFilters"))
12 | 


--------------------------------------------------------------------------------
/NEWS:
--------------------------------------------------------------------------------
 1 | CHANGES IN VERSION 1.5.2
 2 | ------------------------
 3 | 
 4 | USER VISIBLE CHANGES
 5 | 
 6 |     o Rename GenenameFilter into GeneNameFilter and deprecate GenenameFilter
 7 |       (issue #22).
 8 | 
 9 | 
10 | CHANGES IN VERSION 1.3.1
11 | ------------------------
12 | 
13 | NEW FEATURES
14 | 
15 |     o Add DoubleFilter
16 | 
17 | 
18 | CHANGES IN VERSION 1.1.2
19 | ------------------------
20 | 
21 | NEW FEATURES
22 | 
23 |     o supportFilters returns a data.frame with filter class name and field.
24 | 
25 | 
26 | CHANGES IN VERSION 0.99.5
27 | --------------------------
28 | 
29 | NEW FEATURES
30 | 
31 |     o Add convertFilterExpressionQuoted function.
32 |     o Add field method.
33 | 


--------------------------------------------------------------------------------
/R/AllGenerics.R:
--------------------------------------------------------------------------------
 1 | ## Generic methods.
 2 | setGeneric("condition", function(object, ...) standardGeneric("condition"))
 3 | 
 4 | setGeneric("field", function(object, ...) standardGeneric("field"))
 5 | 
 6 | setGeneric("value", function(object, ...) standardGeneric("value"))
 7 | 
 8 | setGeneric("logicOp", function(object, ...) standardGeneric("logicOp"))
 9 | 
10 | setGeneric("not", function(object, ...) standardGeneric("not"))
11 | 
12 | setGeneric("simplify", function(object, ...) standardGeneric("simplify"))
13 | 
14 | setGeneric("convertFilter", function(object, db, ...)
15 |     standardGeneric("convertFilter"))
16 | 
17 | setGeneric("distributeNegation", function(object, ...)
18 |     standardGeneric("distributeNegation"))
19 | 
20 | setGeneric("supportedFilters", function(object, ...)
21 |     standardGeneric("supportedFilters"))
22 | 


--------------------------------------------------------------------------------
/man/GenenameFilter.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AnnotationFilter.R
 3 | \name{GenenameFilter}
 4 | \alias{GenenameFilter}
 5 | \alias{GenenameFilter-class}
 6 | \title{DEPRECATED Gene name filter}
 7 | \usage{
 8 | GenenameFilter(value, condition = "==", not = FALSE)
 9 | }
10 | \arguments{
11 | \item{value}{\code{character()} value for the filter}
12 | 
13 | \item{condition}{\code{character(1)} defining the condition to be
14 | used in the filter. One of \code{"=="}, \code{"!="}, \code{"startsWith"}, \code{"endsWith"}
15 | or \code{"contains"}. Default condition is \code{"=="}.}
16 | 
17 | \item{not}{\code{logical(1)} whether the \code{AnnotationFilter} is negated.
18 | \code{TRUE} indicates is negated (!). \code{FALSE} indicates not
19 | negated. Default not is \code{FALSE}.}
20 | }
21 | \value{
22 | The constructor function return a \code{GenenameFilter}.
23 | }
24 | \description{
25 | The \code{GenenameFilter} class and functions are deprecated. Please use the
26 | \code{\link[=GeneNameFilter]{GeneNameFilter()}} instead.
27 | }
28 | 


--------------------------------------------------------------------------------
/.svn/pristine/40/40904d832591d5491cf5d562b1708a3f20d517f6.svn-base:
--------------------------------------------------------------------------------
 1 | Package: AnnotationFilter
 2 | Title: Facilities for Filtering Bioconductor Annotation Resources
 3 | Version: 0.99.8
 4 | Authors@R: c( person("Martin", "Morgan", email =
 5 |         "martin.morgan@roswellpark.org", role = "aut"),
 6 |         person("Johannes", "Rainer", email =
 7 |         "johannes.rainer@eurac.edu", role = "aut"),
 8 |         person("Bioconductor", "Maintainer",
 9 |         email="maintainer@bioconductor.org", role = "cre"))
10 | URL: https://github.com/Bioconductor/AnnotationFilter
11 | BugReports: https://github.com/Bioconductor/AnnotationFilter/issues
12 | Description: This package provides class and other infrastructure to
13 |         implement filters for manipulating Bioconductor annotation
14 |         resources. The filters will be used by ensembldb,
15 |         Organism.dplyr, and other packages.
16 | Depends: R (>= 3.4.0)
17 | Imports: utils, methods, GenomicRanges, lazyeval
18 | Suggests: BiocStyle, knitr, testthat, RSQLite, org.Hs.eg.db
19 | VignetteBuilder: knitr
20 | License: Artistic-2.0
21 | biocViews: Annotation, Infrastructure, Software
22 | Encoding: UTF-8
23 | LazyData: true
24 | RoxygenNote: 6.0.1
25 | Collate: 'AllGenerics.R' 'AnnotationFilter.R' 'AnnotationFilterList.R'
26 |         'translate-utils.R'
27 | 


--------------------------------------------------------------------------------
/.svn/pristine/e0/e07c222eeebfc149377f972722b7aa62b4fc86cc.svn-base:
--------------------------------------------------------------------------------
 1 | Package: AnnotationFilter
 2 | Title: Facilities for Filtering Bioconductor Annotation Resources
 3 | Version: 1.1.3
 4 | Authors@R: c(
 5 |     person("Martin", "Morgan", email = "martin.morgan@roswellpark.org",
 6 |         role = "aut"),
 7 |     person("Johannes", "Rainer", email = "johannes.rainer@eurac.edu",
 8 |         role = "aut"),
 9 |     person("Joachim", "Bargsten", email = "jw@bargsten.org", role = "ctb"),
10 |     person("Bioconductor", "Maintainer", email="maintainer@bioconductor.org",
11 |         role = "cre"))
12 | URL: https://github.com/Bioconductor/AnnotationFilter
13 | BugReports: https://github.com/Bioconductor/AnnotationFilter/issues
14 | Description: This package provides class and other infrastructure to
15 |     implement filters for manipulating Bioconductor annotation
16 |     resources. The filters will be used by ensembldb, Organism.dplyr,
17 |     and other packages.
18 | Depends:
19 |     R (>= 3.4.0)
20 | Imports:
21 |     utils,
22 |     methods,
23 |     GenomicRanges,
24 |     lazyeval
25 | Suggests:
26 |     BiocStyle,
27 |     knitr,
28 |     testthat,
29 |     RSQLite,
30 |     org.Hs.eg.db
31 | VignetteBuilder: knitr
32 | License: Artistic-2.0
33 | biocViews: Annotation, Infrastructure, Software
34 | Encoding: UTF-8
35 | LazyData: true
36 | RoxygenNote: 6.0.1
37 | Collate:
38 |     'AllGenerics.R'
39 |     'AnnotationFilter.R'
40 |     'AnnotationFilterList.R'
41 |     'translate-utils.R'
42 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: AnnotationFilter
 2 | Title: Facilities for Filtering Bioconductor Annotation Resources
 3 | Version: 1.35.0
 4 | Authors@R: c(
 5 |     person("Martin", "Morgan", email = "martin.morgan@roswellpark.org",
 6 |         role = "aut"),
 7 |     person("Johannes", "Rainer", email = "johannes.rainer@eurac.edu",
 8 |         role = "aut"),
 9 |     person("Joachim", "Bargsten", email = "jw@bargsten.org", role = "ctb"),
10 |     person("Daniel", "Van Twisk", email = "daniel.vantwisk@roswellpark.org",
11 |         role = "ctb"),
12 |     person("Bioconductor Package", "Maintainer",
13 |         email="maintainer@bioconductor.org",
14 |         role = "cre"))
15 | URL: https://github.com/Bioconductor/AnnotationFilter
16 | BugReports: https://github.com/Bioconductor/AnnotationFilter/issues
17 | Description: This package provides class and other infrastructure to
18 |     implement filters for manipulating Bioconductor annotation
19 |     resources. The filters will be used by ensembldb, Organism.dplyr,
20 |     and other packages.
21 | Depends:
22 |     R (>= 3.4.0)
23 | Imports:
24 |     utils,
25 |     methods,
26 |     GenomicRanges,
27 |     lazyeval
28 | Suggests:
29 |     BiocStyle,
30 |     knitr,
31 |     testthat,
32 |     RSQLite,
33 |     org.Hs.eg.db,
34 |     rmarkdown
35 | VignetteBuilder: knitr
36 | License: Artistic-2.0
37 | biocViews: Annotation, Infrastructure, Software
38 | Encoding: UTF-8
39 | LazyData: true
40 | RoxygenNote: 6.0.1
41 | Collate:
42 |     'AllGenerics.R'
43 |     'AnnotationFilter.R'
44 |     'AnnotationFilterList.R'
45 |     'translate-utils.R'
46 | 


--------------------------------------------------------------------------------
/.svn/pristine/18/18d19e0f6727f50e5ab77d34454fab81e82d049a.svn-base:
--------------------------------------------------------------------------------
 1 | context("AnnotationFilterList")
 2 | 
 3 | test_that("AnnotationFilterList() works", {
 4 |     f1 <- GeneIdFilter("somegene")
 5 |     f2 <- SeqNameFilter("chr3")
 6 |     f3 <- GeneBiotypeFilter("protein_coding", "!=")
 7 | 
 8 |     fL <- AnnotationFilter:::AnnotationFilterList(f1, f2)
 9 |     expect_true(length(fL) == 2)
10 |     expect_equal(fL[[1]], f1)
11 |     expect_equal(fL[[2]], f2)
12 |     expect_true(all(logicOp(fL) == "&"))
13 |     
14 |     fL <- AnnotationFilter:::AnnotationFilterList(f1, f2, f3,
15 |                                                   logicOp = c("&", "|"))
16 |     expect_true(length(fL) == 3)
17 |     expect_equal(fL[[1]], f1)
18 |     expect_equal(fL[[2]], f2)
19 |     expect_equal(fL[[3]], f3)
20 |     expect_equal(logicOp(fL), c("&", "|"))
21 | 
22 |     ## A AnnotationFilterList with and AnnotationFilterList
23 |     fL <- AnnotationFilter:::AnnotationFilterList(f1, f2, logicOp = "|")
24 |     fL2 <- AnnotationFilter:::AnnotationFilterList(f3, fL, logicOp = "&")
25 |     expect_true(length(fL) == 2)
26 |     expect_true(length(fL2) == 2)
27 |     expect_true(is(value(fL2)[[1]], "GeneBiotypeFilter"))
28 |     expect_true(is(value(fL2)[[2]], "AnnotationFilterList"))
29 |     expect_equal(value(fL2)[[2]], fL)
30 |     expect_equal(fL2[[2]], fL)
31 |     expect_equal(logicOp(fL2), "&")
32 |     expect_equal(logicOp(fL2[[2]]), "|")
33 | })
34 | 
35 | test_that("empty elements in AnnotationFilterList", {
36 |     ## empty elements should be removed from the AnnotationFilterList.
37 |     empty_afl <- AnnotationFilterList()
38 |     afl <- AnnotationFilterList(empty_afl)
39 |     expect_true(length(afl) == 0)
40 |     afl <- AnnotationFilterList(GeneIdFilter(4), empty_afl)
41 |     expect_true(length(afl) == 1)
42 |     afl <- AnnotationFilterList(GeneIdFilter(4),
43 |                                 AnnotationFilter(~ gene_id == 3 | seq_name == 4),
44 |                                 empty_afl)
45 |     expect_true(length(afl) == 2)
46 |     ## Check validate.
47 |     afl@.Data <- c(afl@.Data, list(empty_afl))
48 |     ## Fix also the logOp.
49 |     afl@logOp <- c(afl@logOp, "|")
50 |     expect_error(validObject(afl))
51 | })
52 | 


--------------------------------------------------------------------------------
/.svn/pristine/f1/f1748b5922e3eb9abfa76932622f609be9ff4d0c.svn-base:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(AnnotationFilter)
 4 | export(AnnotationFilterList)
 5 | export(CdsEndFilter)
 6 | export(CdsStartFilter)
 7 | export(EntrezFilter)
 8 | export(ExonEndFilter)
 9 | export(ExonIdFilter)
10 | export(ExonNameFilter)
11 | export(ExonRankFilter)
12 | export(ExonStartFilter)
13 | export(GRangesFilter)
14 | export(GeneBiotypeFilter)
15 | export(GeneEndFilter)
16 | export(GeneIdFilter)
17 | export(GeneStartFilter)
18 | export(GenenameFilter)
19 | export(ProteinIdFilter)
20 | export(SeqNameFilter)
21 | export(SeqStrandFilter)
22 | export(SymbolFilter)
23 | export(TxBiotypeFilter)
24 | export(TxEndFilter)
25 | export(TxIdFilter)
26 | export(TxNameFilter)
27 | export(TxStartFilter)
28 | export(UniprotFilter)
29 | export(feature)
30 | export(logicOp)
31 | exportClasses(AnnotationFilter)
32 | exportClasses(AnnotationFilterList)
33 | exportClasses(CdsEndFilter)
34 | exportClasses(CdsStartFilter)
35 | exportClasses(CharacterFilter)
36 | exportClasses(EntrezFilter)
37 | exportClasses(ExonEndFilter)
38 | exportClasses(ExonIdFilter)
39 | exportClasses(ExonNameFilter)
40 | exportClasses(ExonRankFilter)
41 | exportClasses(ExonStartFilter)
42 | exportClasses(GRangesFilter)
43 | exportClasses(GeneBiotypeFilter)
44 | exportClasses(GeneEndFilter)
45 | exportClasses(GeneIdFilter)
46 | exportClasses(GeneStartFilter)
47 | exportClasses(GenenameFilter)
48 | exportClasses(IntegerFilter)
49 | exportClasses(ProteinIdFilter)
50 | exportClasses(SeqNameFilter)
51 | exportClasses(SeqStrandFilter)
52 | exportClasses(SymbolFilter)
53 | exportClasses(TxBiotypeFilter)
54 | exportClasses(TxEndFilter)
55 | exportClasses(TxIdFilter)
56 | exportClasses(TxNameFilter)
57 | exportClasses(TxStartFilter)
58 | exportClasses(UniprotFilter)
59 | exportMethods(condition)
60 | exportMethods(field)
61 | exportMethods(show)
62 | exportMethods(supportedFilters)
63 | exportMethods(value)
64 | importClassesFrom(GenomicRanges,GRanges)
65 | importFrom(GenomicRanges,GRanges)
66 | importFrom(GenomicRanges,show)
67 | importFrom(lazyeval,f_eval)
68 | importFrom(methods,callNextMethod)
69 | importFrom(methods,initialize)
70 | importFrom(methods,is)
71 | importFrom(methods,new)
72 | importFrom(methods,show)
73 | importFrom(methods,validObject)
74 | importFrom(utils,tail)
75 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(AnnotationFilter)
 4 | export(AnnotationFilterList)
 5 | export(CdsEndFilter)
 6 | export(CdsStartFilter)
 7 | export(EntrezFilter)
 8 | export(ExonEndFilter)
 9 | export(ExonIdFilter)
10 | export(ExonNameFilter)
11 | export(ExonRankFilter)
12 | export(ExonStartFilter)
13 | export(GRangesFilter)
14 | export(GeneBiotypeFilter)
15 | export(GeneEndFilter)
16 | export(GeneIdFilter)
17 | export(GeneNameFilter)
18 | export(GeneStartFilter)
19 | export(GenenameFilter)
20 | export(ProteinIdFilter)
21 | export(SeqNameFilter)
22 | export(SeqStrandFilter)
23 | export(SymbolFilter)
24 | export(TxBiotypeFilter)
25 | export(TxEndFilter)
26 | export(TxIdFilter)
27 | export(TxNameFilter)
28 | export(TxStartFilter)
29 | export(UniprotFilter)
30 | export(feature)
31 | export(logicOp)
32 | export(not)
33 | exportClasses(AnnotationFilter)
34 | exportClasses(AnnotationFilterList)
35 | exportClasses(CdsEndFilter)
36 | exportClasses(CdsStartFilter)
37 | exportClasses(CharacterFilter)
38 | exportClasses(DoubleFilter)
39 | exportClasses(EntrezFilter)
40 | exportClasses(ExonEndFilter)
41 | exportClasses(ExonIdFilter)
42 | exportClasses(ExonNameFilter)
43 | exportClasses(ExonRankFilter)
44 | exportClasses(ExonStartFilter)
45 | exportClasses(GRangesFilter)
46 | exportClasses(GeneBiotypeFilter)
47 | exportClasses(GeneEndFilter)
48 | exportClasses(GeneIdFilter)
49 | exportClasses(GeneNameFilter)
50 | exportClasses(GeneStartFilter)
51 | exportClasses(GenenameFilter)
52 | exportClasses(IntegerFilter)
53 | exportClasses(ProteinIdFilter)
54 | exportClasses(SeqNameFilter)
55 | exportClasses(SeqStrandFilter)
56 | exportClasses(SymbolFilter)
57 | exportClasses(TxBiotypeFilter)
58 | exportClasses(TxEndFilter)
59 | exportClasses(TxIdFilter)
60 | exportClasses(TxNameFilter)
61 | exportClasses(TxStartFilter)
62 | exportClasses(UniprotFilter)
63 | exportMethods(condition)
64 | exportMethods(convertFilter)
65 | exportMethods(distributeNegation)
66 | exportMethods(field)
67 | exportMethods(not)
68 | exportMethods(show)
69 | exportMethods(supportedFilters)
70 | exportMethods(value)
71 | importClassesFrom(GenomicRanges,GRanges)
72 | importFrom(GenomicRanges,GRanges)
73 | importFrom(GenomicRanges,show)
74 | importFrom(lazyeval,f_eval)
75 | importFrom(methods,callNextMethod)
76 | importFrom(methods,initialize)
77 | importFrom(methods,is)
78 | importFrom(methods,new)
79 | importFrom(methods,show)
80 | importFrom(methods,validObject)
81 | importFrom(utils,head)
82 | importFrom(utils,tail)
83 | 


--------------------------------------------------------------------------------
/.svn/pristine/65/65e9e8d47fee95ed321413de21d8f8d13f873b17.svn-base:
--------------------------------------------------------------------------------
 1 | context("AnnotationFilter")
 2 | 
 3 | test_that("supportedFilters() works", {
 4 |     expect_true(inherits(supportedFilters(), "data.frame"))
 5 |     expect_identical(
 6 |         nrow(supportedFilters()),
 7 |         length(unlist(AnnotationFilter:::.FIELD, use.names=FALSE)) +
 8 |             length(AnnotationFilter:::.FILTERS_WO_FIELD)
 9 |     )
10 | })
11 | 
12 | test_that("SymbolFilter as representative for character filters", {
13 |     expect_true(validObject(new("SymbolFilter")))
14 |     expect_error(SymbolFilter())
15 |     expect_error(SymbolFilter(1, ">"))
16 |     expect_error(SymbolFilter(1, "foo"))
17 |     expect_error(SymbolFilter(c("foo","bar"), "startsWith"))
18 |     ## Getter / setter
19 |     fl <- SymbolFilter("BCL2")
20 |     expect_equal(value(fl), "BCL2")
21 |     fl <- SymbolFilter(c(4, 5))
22 |     expect_equal(value(fl), c("4", "5"))
23 |     fl <- SymbolFilter(3)
24 |     expect_equal(value(fl), "3")
25 |     expect_error(SymbolFilter(NA))
26 |     ## condition.
27 |     expect_equal(condition(fl), "==")
28 |     fl <- SymbolFilter("a", condition = "!=")
29 |     expect_equal(condition(fl), "!=")
30 |     expect_error(SymbolFilter("a", condition = "<"))
31 |     expect_error(SymbolFilter("a", condition = ""))
32 |     expect_error(SymbolFilter("a", condition = c("==", ">")))
33 |     expect_error(SymbolFilter("a", condition = NULL))
34 |     expect_error(SymbolFilter("a", condition = NA))
35 |     expect_error(SymbolFilter("a", condition = 4))
36 | })
37 | 
38 | test_that("GeneStartFilter as representative for integer filters", {
39 |     gsf <- GeneStartFilter(10000, condition = ">")
40 |     expect_equal(condition(gsf), ">")
41 |     expect_error(GeneStartFilter("3"))
42 |     expect_error(GeneStartFilter("B"))
43 |     expect_error(GeneStartFilter(NA))
44 |     expect_error(GeneStartFilter(NULL))
45 |     expect_error(GeneStartFilter())
46 |     ## Condition
47 |     expect_error(GeneStartFilter(10000, condition = "startsWith"))
48 |     expect_error(GeneStartFilter(10000, condition = "endsWith"))
49 |     expect_error(GeneStartFilter(10000, condition = c("==", "<")))
50 | })
51 | 
52 | test_that("GRangesFilter works", {
53 |     GRanges <- GenomicRanges::GRanges
54 |     grf <- GRangesFilter(GRanges("chr10:87869000-87876000"))
55 |     expect_equal(condition(grf), "any")
56 |     expect_error(GRangesFilter(value = 3))
57 |     expect_error(GRangesFilter(
58 |         GRanges("chr10:87869000-87876000"),
59 |         type = "=="
60 |     ))
61 |     grf <- GRangesFilter(
62 |         GRanges("chr10:87869000-87876000"),
63 |         type = "within",
64 |         feature = "tx"
65 |     )
66 |     expect_equal(condition(grf), "within")
67 |     expect_equal(feature(grf), "tx")
68 | })
69 | 
70 | test_that("fieldToClass works", {
71 |     expect_identical(AnnotationFilter:::.fieldToClass("gene_id"),
72 |                      "GeneIdFilter")
73 |     ## Support replacement for multiple _ : issue #13
74 |     expect_identical(AnnotationFilter:::.fieldToClass("gene_seq_start"),
75 |                      "GeneSeqStartFilter")
76 | })
77 | 


--------------------------------------------------------------------------------
/tests/testthat/test_AnnotationFilterList.R:
--------------------------------------------------------------------------------
 1 | context("AnnotationFilterList")
 2 | 
 3 | test_that("AnnotationFilterList() works", {
 4 |     f1 <- GeneIdFilter("somegene")
 5 |     f2 <- SeqNameFilter("chr3")
 6 |     f3 <- GeneBiotypeFilter("protein_coding", "!=")
 7 | 
 8 |     fL <- AnnotationFilter:::AnnotationFilterList(f1, f2)
 9 |     expect_true(length(fL) == 2)
10 |     expect_equal(fL[[1]], f1)
11 |     expect_equal(fL[[2]], f2)
12 |     expect_true(all(logicOp(fL) == "&"))
13 |     
14 |     fL <- AnnotationFilter:::AnnotationFilterList(f1, f2, f3,
15 |                                                   logicOp = c("&", "|"))
16 |     expect_true(length(fL) == 3)
17 |     expect_equal(fL[[1]], f1)
18 |     expect_equal(fL[[2]], f2)
19 |     expect_equal(fL[[3]], f3)
20 |     expect_equal(logicOp(fL), c("&", "|"))
21 | 
22 |     ## A AnnotationFilterList with and AnnotationFilterList
23 |     fL <- AnnotationFilter:::AnnotationFilterList(f1, f2, logicOp = "|")
24 |     fL2 <- AnnotationFilter:::AnnotationFilterList(f3, fL, logicOp = "&")
25 |     expect_true(length(fL) == 2)
26 |     expect_true(length(fL2) == 2)
27 |     expect_true(is(value(fL2)[[1]], "GeneBiotypeFilter"))
28 |     expect_true(is(value(fL2)[[2]], "AnnotationFilterList"))
29 |     expect_equal(value(fL2)[[2]], fL)
30 |     expect_equal(fL2[[2]], fL)
31 |     expect_equal(logicOp(fL2), "&")
32 |     expect_equal(logicOp(fL2[[2]]), "|")
33 | })
34 | 
35 | test_that("empty elements in AnnotationFilterList", {
36 |     ## empty elements should be removed from the AnnotationFilterList.
37 |     empty_afl <- AnnotationFilterList()
38 |     afl <- AnnotationFilterList(empty_afl)
39 |     expect_true(length(afl) == 0)
40 |     afl <- AnnotationFilterList(GeneIdFilter(4), empty_afl)
41 |     expect_true(length(afl) == 1)
42 |     afl <- AnnotationFilterList(GeneIdFilter(4),
43 |         AnnotationFilter(~ gene_id == 3 | seq_name == 4),empty_afl)
44 |     expect_true(length(afl) == 2)
45 |     ## Check validate.
46 |     afl@.Data <- c(afl@.Data, list(empty_afl))
47 |     ## Fix also the logOp.
48 |     afl@logOp <- c(afl@logOp, "|")
49 |     expect_error(validObject(afl))
50 | })
51 | 
52 | test_that("convertFilter works", {
53 |     smbl <- SymbolFilter("ADA")
54 |     txid <- TxIdFilter(1000)
55 |     gr <- GRangesFilter(GenomicRanges::GRanges("chr15:25062333-25065121"))
56 | 
57 |     expect_identical(convertFilter(AnnotationFilter(~smbl | txid)),
58 |         "symbol == 'ADA' | tx_id == '1000'")
59 |     expect_identical(convertFilter(AnnotationFilter(~smbl & (smbl | txid))),
60 |         "symbol == 'ADA' & (symbol == 'ADA' | tx_id == '1000')")
61 |     expect_identical(convertFilter(AnnotationFilter(~smbl & !(smbl | txid))),
62 |         "symbol == 'ADA' & !(symbol == 'ADA' | tx_id == '1000')")
63 |     expect_error(convertFilter(AnnotationFilter(smbl | (txid & gr))))
64 |     
65 | })
66 | 
67 | test_that("distributeNegation works", {
68 |     afl <- AnnotationFilter(~!(symbol == 'ADA' | symbol %startsWith% 'SNORD'))
69 |     afl2 <- AnnotationFilter(~!symbol == 'ADA' & !symbol %startsWith% 'SNORD')
70 |     expect_identical(distributeNegation(afl), afl2)
71 | })
72 | 


--------------------------------------------------------------------------------
/.svn/pristine/33/331922c1408ddd46de74575d157bb2d47ccfa3e9.svn-base:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/AnnotationFilterList.R
  3 | \docType{methods}
  4 | \name{AnnotationFilterList}
  5 | \alias{AnnotationFilterList}
  6 | \alias{AnnotationFilterList-class}
  7 | \alias{AnnotationFilterList}
  8 | \alias{value,AnnotationFilterList-method}
  9 | \alias{logicOp,AnnotationFilterList-method}
 10 | \alias{logicOp}
 11 | \alias{show,AnnotationFilterList-method}
 12 | \title{Combining annotation filters}
 13 | \usage{
 14 | AnnotationFilterList(..., logicOp = character(), logOp = character())
 15 | 
 16 | \S4method{value}{AnnotationFilterList}(object)
 17 | 
 18 | \S4method{logicOp}{AnnotationFilterList}(object)
 19 | 
 20 | \S4method{show}{AnnotationFilterList}(object)
 21 | }
 22 | \arguments{
 23 | \item{...}{individual \code{\link{AnnotationFilter}} objects or a
 24 | mixture of \code{AnnotationFilter} and
 25 | \code{AnnotationFilterList} objects.}
 26 | 
 27 | \item{logicOp}{\code{character} of length equal to the number
 28 | of submitted \code{AnnotationFilter} objects - 1. Each value
 29 | representing the logical operation to combine consecutive
 30 | filters, i.e. the first element being the logical operation to
 31 | combine the first and second \code{AnnotationFilter}, the
 32 | second element being the logical operation to combine the
 33 | second and third \code{AnnotationFilter} and so on. Allowed
 34 | values are \code{"&"} and \code{"|"}. The function assumes a
 35 | logical \emph{and} between all elements by default.}
 36 | 
 37 | \item{logOp}{Deprecated; use \code{logicOp=}.}
 38 | 
 39 | \item{object}{An object of class \code{AnnotationFilterList}.}
 40 | }
 41 | \value{
 42 | \code{AnnotationFilterList} returns an \code{AnnotationFilterList}.
 43 | 
 44 | \code{value()} returns a \code{list} with \code{AnnotationFilter}
 45 |     objects.
 46 | 
 47 | \code{logicOp()} returns a \code{character()} vector of
 48 |     \dQuote{&} or \dQuote{|} symbols.
 49 | }
 50 | \description{
 51 | The \code{AnnotationFilterList} allows to combine
 52 |     filter objects extending the \code{\link{AnnotationFilter}}
 53 |     class to construct more complex queries. Consecutive filter
 54 |     objects in the \code{AnnotationFilterList} can be combined by a
 55 |     logical \emph{and} (\code{&}) or \emph{or} (\code{|}). The
 56 |     \code{AnnotationFilterList} extends \code{list}, individual
 57 |     elements can thus be accessed with \code{[[}.
 58 | 
 59 | \code{value()} get a \code{list} with the
 60 |     \code{AnnotationFilter} objects. Use \code{[[} to access
 61 |     individual filters.
 62 | 
 63 | \code{logicOp()} gets the logical operators separating
 64 |     successive \code{AnnotationFilter}.
 65 | }
 66 | \note{
 67 | The \code{AnnotationFilterList} does not support containing empty
 68 |     elements, hence all elements of \code{length == 0} are removed in
 69 |     the constructor function.
 70 | }
 71 | \examples{
 72 | ## Create some AnnotationFilters
 73 | gf <- GenenameFilter(c("BCL2", "BCL2L11"))
 74 | tbtf <- TxBiotypeFilter("protein_coding", condition = "!=")
 75 | 
 76 | ## Combine both to an AnnotationFilterList. By default elements are combined
 77 | ## using a logical "and" operator. The filter list represents thus a query
 78 | ## like: get all features where the gene name is either ("BCL2" or "BCL2L11")
 79 | ## and the transcript biotype is not "protein_coding".
 80 | afl <- AnnotationFilterList(gf, tbtf)
 81 | afl
 82 | 
 83 | ## Access individual filters.
 84 | afl[[1]]
 85 | 
 86 | ## Create a filter in the form of: get all features where the gene name is
 87 | ## either ("BCL2" or "BCL2L11") and the transcript biotype is not
 88 | ## "protein_coding" or the seq_name is "Y". Hence, this will get all feature
 89 | ## also found by the previous AnnotationFilterList and returns also all
 90 | ## features on chromosome Y.
 91 | afl <- AnnotationFilterList(gf, tbtf, SeqNameFilter("Y"),
 92 |                             logicOp = c("&", "|"))
 93 | afl
 94 | 
 95 | }
 96 | \seealso{
 97 | \code{\link{supportedFilters}} for available
 98 |     \code{\link{AnnotationFilter}} objects
 99 | }
100 | 


--------------------------------------------------------------------------------
/.svn/pristine/06/065e68d6b11e4bf90da04ffd904757e8ce3c422b.svn-base:
--------------------------------------------------------------------------------
  1 | #' @include AnnotationFilter.R
  2 | 
  3 | ## Functionality to translate a query condition to an AnnotationFilter.
  4 | 
  5 | #' Adapted from GenomicDataCommons.
  6 | #'
  7 | #' @importFrom methods is validObject initialize
  8 | #'
  9 | #' @noRd
 10 | .binary_op <- function(sep) {
 11 |     force(sep)
 12 |     function(e1, e2) {
 13 |         ## First create the class. Throws an error if not possible i.e. no
 14 |         ## class for the field available.
 15 |         field <- as.character(substitute(e1))
 16 |         class <- .fieldToClass(field)
 17 |         filter <- tryCatch({
 18 |             new(class, condition = sep, field = field)
 19 |         }, error = function(e) {
 20 |             stop("No AnnotationFilter class '", class, "' for field '",
 21 |                 field, "' defined")
 22 |         })
 23 |         ## Fill with values.
 24 |         force(e2)
 25 |         if (is(filter, "CharacterFilter")) {
 26 |             e2 <- as.character(e2)
 27 |         } else if (is(filter, "IntegerFilter")) {
 28 |             e2 <- as.integer(e2)
 29 |         }
 30 |         initialize(filter, value = e2)
 31 |     }
 32 | }
 33 | 
 34 | #' Combine filters into a AnnotationFilterList combbined with \code{sep}
 35 | #'
 36 | #' @noRd
 37 | .combine_op <- function(sep) {
 38 |     force(sep)
 39 |     function(e1, e2) {
 40 |         ## Avoid implicit nesting of AnnotationFilterList - should be done
 41 |         ## eventually
 42 |         if (is(e1, "AnnotationFilterList")) {
 43 |             sep <- c(logicOp(e1), sep)
 44 |             e1 <- .aflvalue(e1)
 45 |         } else
 46 |             e1 <- list(e1)
 47 |         if (is(e2, "AnnotationFilterList")) {
 48 |             sep <- c(logicOp(e2), sep)
 49 |             e2 <- .aflvalue(e2)
 50 |         } else
 51 |             e2 <- list(e2)
 52 |         ## Don't use the constructor here.
 53 |         new("AnnotationFilterList", c(e1, e2), logOp = sep)
 54 |     }
 55 | }
 56 | 
 57 | #' The \code{.LOG_OP_REG} is a \code{list} providing functions for
 58 | #' common logical operations to translate expressions into AnnotationFilter
 59 | #' objects.
 60 | #'
 61 | #' @noRd
 62 | .LOG_OP_REG <- list()
 63 | ## Assign conditions.
 64 | .LOG_OP_REG$`==` <- .binary_op("==")
 65 | .LOG_OP_REG$`%in%` <- .binary_op("==")
 66 | .LOG_OP_REG$`!=` <- .binary_op("!=")
 67 | .LOG_OP_REG$`>` <- .binary_op(">")
 68 | .LOG_OP_REG$`<` <- .binary_op("<")
 69 | .LOG_OP_REG$`>=` <- .binary_op(">=")
 70 | .LOG_OP_REG$`<=` <- .binary_op("<=")
 71 | ## combine filters
 72 | .LOG_OP_REG$`&` <- .combine_op("&")
 73 | .LOG_OP_REG$`|` <- .combine_op("|")
 74 | 
 75 | #' @rdname AnnotationFilter
 76 | #'
 77 | #' @description \code{AnnotationFilter} \emph{translates} a filter
 78 | #'     expression such as \code{~ gene_id == "BCL2"} into a filter object
 79 | #'     extending the \code{\link{AnnotationFilter}} class (in the example a
 80 | #'     \code{\link{GeneIdFilter}} object) or an
 81 | #'     \code{\link{AnnotationFilterList}} if the expression contains multiple
 82 | #'     conditions (see examples below). Filter expressions have to be written
 83 | #'     in the form \code{~ <field> <condition> <value>}, with \code{<field>}
 84 | #'     being the default field of the filter class (use the
 85 | #'     \code{supportedFilter} function to list all fields and filter classes),
 86 | #'     \code{<condition>} the logical expression and \code{<value>} the value
 87 | #'     for the filter.
 88 | #'
 89 | #' @details Filter expressions for the \code{AnnotationFilter} class have to be
 90 | #'     written as formulas, i.e. starting with a \code{~}.
 91 | #'
 92 | #' @note Translation of nested filter expressions using the
 93 | #'     \code{AnnotationFilter} function is not yet supported.
 94 | #' 
 95 | #' @param expr A filter expression, written as a \code{formula}, to be
 96 | #'     converted to an \code{AnnotationFilter} or \code{AnnotationFilterList}
 97 | #'     class. See below for examples.
 98 | #'
 99 | #' @return \code{AnnotationFilter} returns an
100 | #'     \code{\link{AnnotationFilter}} or an \code{\link{AnnotationFilterList}}.
101 | #' 
102 | #' @importFrom lazyeval f_eval
103 | #'
104 | #' @examples
105 | #' 
106 | #' ## Convert a filter expression based on a gene ID to a GeneIdFilter
107 | #' gnf <- AnnotationFilter(~ gene_id == "BCL2")
108 | #' gnf
109 | #'
110 | #' ## Same conversion but for two gene IDs.
111 | #' gnf <- AnnotationFilter(~ gene_id %in% c("BCL2", "BCL2L11"))
112 | #' gnf
113 | #'
114 | #' ## Converting an expression that combines multiple filters. As a result we
115 | #' ## get an AnnotationFilterList containing the corresponding filters.
116 | #' ## Be aware that nesting of expressions/filters does not work.
117 | #' flt <- AnnotationFilter(~ gene_id %in% c("BCL2", "BCL2L11") &
118 | #'                         tx_biotype == "nonsense_mediated_decay" |
119 | #'                         seq_name == "Y")
120 | #' flt
121 | #' 
122 | #' @export
123 | AnnotationFilter <- function(expr) {
124 |     f_eval(expr, data = .LOG_OP_REG)
125 | }
126 | 


--------------------------------------------------------------------------------
/tests/testthat/test_translate-utils.R:
--------------------------------------------------------------------------------
  1 | context("expression translation")
  2 | 
  3 | test_that("translation of expression works for single filter/condition", {
  4 |     ## Check for some character filter.
  5 |     ## exon_id
  6 |     flt <- ExonIdFilter("EX1", condition = "==")
  7 |     flt2 <- AnnotationFilter(~ exon_id == "EX1")
  8 |     expect_equal(flt, flt2)
  9 |     flt <- ExonIdFilter(c("EX1", "EX2"), condition = "!=")
 10 |     flt2 <- AnnotationFilter(~ exon_id != c("EX1", "EX2"))
 11 |     expect_equal(flt, flt2)
 12 |     ## seq_name
 13 |     flt <- SeqNameFilter(c("chr3", "chrX"), condition = "==")
 14 |     flt2 <- AnnotationFilter(~ seq_name == c("chr3", "chrX"))
 15 |     expect_equal(flt, flt2)
 16 |     flt <- SeqNameFilter(1:3, condition = "==")
 17 |     flt2 <- AnnotationFilter(~ seq_name %in% 1:3)
 18 |     expect_equal(flt, flt2)
 19 |     ## Check IntegerFilter
 20 |     flt <- GeneStartFilter(123, condition = ">")
 21 |     flt2 <- AnnotationFilter(~ gene_start > 123)
 22 |     expect_equal(flt, flt2)
 23 |     flt <- TxStartFilter(123, condition = "<")
 24 |     flt2 <- AnnotationFilter(~ tx_start < 123)
 25 |     expect_equal(flt, flt2)
 26 |     flt <- GeneEndFilter(123, condition = ">=")
 27 |     flt2 <- AnnotationFilter(~ gene_end >= 123)
 28 |     expect_equal(flt, flt2)
 29 |     flt <- ExonEndFilter(123, condition = "<=")
 30 |     flt2 <- AnnotationFilter(~ exon_end <= 123)
 31 |     expect_equal(flt, flt2)
 32 |     ## Test exceptions/errors.
 33 |     expect_error(AnnotationFilter(~ not_existing == 1:3))
 34 |     ## Throws an error, but is not self-explanatory.
 35 |     expect_error(AnnotationFilter(~ gene_id * 3))
 36 | })
 37 | 
 38 | test_that("translation of combined expressions works", {
 39 |     res <- AnnotationFilter(~ exon_id == "EX1" & genename == "BCL2")
 40 |     cmp <- AnnotationFilterList(ExonIdFilter("EX1"), GenenameFilter("BCL2"))
 41 |     expect_equal(res, cmp)
 42 |     res <- AnnotationFilter(~ exon_id == "EX1" | genename != "BCL2")
 43 |     cmp <- AnnotationFilterList(ExonIdFilter("EX1"),
 44 |                                 GenenameFilter("BCL2", "!="), logicOp = "|")
 45 |     expect_equal(res, cmp)
 46 |     ## 3 filters.
 47 |     res <- AnnotationFilter(~ exon_id == "EX1" & genename == "BCL2" |
 48 |                                 seq_name != 3)
 49 |     ## Expect an AnnotationFilterList of length 3.
 50 |     expect_equal(length(res), 3)
 51 |     cmp <- AnnotationFilterList(ExonIdFilter("EX1"), GenenameFilter("BCL2"),
 52 |                                 SeqNameFilter(3, "!="), logicOp = c("&", "|"))
 53 |     expect_equal(res, cmp)
 54 |     ## 4 filters.
 55 |     res <- AnnotationFilter(~ exon_id == "EX1" & genename == "BCL2" |
 56 |                                 seq_name != 3 | seq_name == "Y")
 57 |     expect_equal(length(res), 4)
 58 |     cmp <- AnnotationFilterList(ExonIdFilter("EX1"), GenenameFilter("BCL2"),
 59 |                                 SeqNameFilter(3, "!="), SeqNameFilter("Y"),
 60 |                                 logicOp = c("&", "|",  "|"))
 61 |     expect_equal(res, cmp)
 62 | })
 63 | 
 64 | test_that("translation works from within other functions", {
 65 |     simpleFun <- function(x)
 66 |         AnnotationFilter(x)
 67 |     expect_equal(simpleFun(~ gene_id == 4), AnnotationFilter(~ gene_id == 4))
 68 |     filter_expr <- ~ gene_id == 4
 69 |     expect_equal(simpleFun(filter_expr),
 70 |                  AnnotationFilter(~ gene_id == 4))
 71 | })
 72 | 
 73 | ## This might be a test if we get the nesting working.
 74 | ## test_that("translation of nested expressions works" {
 75 | ##     res <- convertFilterExpression((exon_id == "EX1" & gene_id == "BCL2") |
 76 | ##                                    (exon_id == "EX3" & gene_id == "BCL2L11"))
 77 | ##     expect_equal(logicOp(res), "|")
 78 | ##     expect_true(is(res[[1]], "AnnotationFilterList"))
 79 | ##     expect_equal(res[[1]][[1]], ExonIdFilter("EX1"))
 80 | ##     expect_equal(res[[1]][[2]], GeneIdFilter("BCL2"))
 81 | ##     expect_equal(logicOp(res[[1]]), "&")
 82 | ##     expect_true(is(res[[2]], "AnnotationFilterList"))
 83 | ##     expect_equal(res[[2]][[1]], ExonIdFilter("EX3"))
 84 | ##     expect_equal(res[[2]][[2]], GeneIdFilter("BCL2L11"))
 85 | ##     expect_equal(logicOp(res[[2]]), "&")
 86 | ##     ##
 87 | ##     res <- convertFilterExpression(seq_name == "Y" |
 88 | ##                                    (exon_id == "EX1" & gene_id == "BCL2") &
 89 | ##                                    (exon_id == "EX3" & gene_id == "BCL2L11"))
 90 | ##     ## Expect: length 3, first being a SeqNameFilter, second an
 91 | ##     ## AnnotationFilterList, third a AnnotationFilterList.
 92 | ##     expect_equal(res[[1]], SeqNameFilter("Y"))
 93 | ##     expect_equal(logicOp(res), "|")
 94 | ##     expect_true(is(res[[2]], "AnnotationFilterList"))
 95 | ##     expect_equal(res[[1]][[1]], ExonIdFilter("EX1"))
 96 | ##     expect_equal(res[[1]][[2]], GeneIdFilter("BCL2"))
 97 | ##     expect_equal(logicOp(res[[1]]), "&")
 98 | ##     expect_true(is(res[[2]], "AnnotationFilterList"))
 99 | ##     expect_equal(res[[2]][[1]], ExonIdFilter("EX3"))
100 | ##     expect_equal(res[[2]][[2]], GeneIdFilter("BCL2L11"))
101 | ##     expect_equal(logicOp(res[[2]]), "&")
102 | 
103 | ##     expect_true(is(res[[1]], "AnnotationFilterList"))
104 | ##     expect_true(is(res[[2]], "AnnotationFilterList"))
105 | 
106 | ##     convertFilterExpression((gene_id == 3) ()
107 | ## })
108 | 
109 | 


--------------------------------------------------------------------------------
/.svn/pristine/a1/a129614aff000a6de02c214a739f8867a6f01752.svn-base:
--------------------------------------------------------------------------------
  1 | context("expression translation")
  2 | 
  3 | test_that("translation of expression works for single filter/condition", {
  4 |     ## Check for some character filter.
  5 |     ## exon_id
  6 |     flt <- ExonIdFilter("EX1", condition = "==")
  7 |     flt2 <- AnnotationFilter(~ exon_id == "EX1")
  8 |     expect_equal(flt, flt2)
  9 |     flt <- ExonIdFilter(c("EX1", "EX2"), condition = "!=")
 10 |     flt2 <- AnnotationFilter(~ exon_id != c("EX1", "EX2"))
 11 |     expect_equal(flt, flt2)
 12 |     ## seq_name
 13 |     flt <- SeqNameFilter(c("chr3", "chrX"), condition = "==")
 14 |     flt2 <- AnnotationFilter(~ seq_name == c("chr3", "chrX"))
 15 |     expect_equal(flt, flt2)
 16 |     flt <- SeqNameFilter(1:3, condition = "==")
 17 |     flt2 <- AnnotationFilter(~ seq_name %in% 1:3)
 18 |     expect_equal(flt, flt2)
 19 |     ## Check IntegerFilter
 20 |     flt <- GeneStartFilter(123, condition = ">")
 21 |     flt2 <- AnnotationFilter(~ gene_start > 123)
 22 |     expect_equal(flt, flt2)
 23 |     flt <- TxStartFilter(123, condition = "<")
 24 |     flt2 <- AnnotationFilter(~ tx_start < 123)
 25 |     expect_equal(flt, flt2)
 26 |     flt <- GeneEndFilter(123, condition = ">=")
 27 |     flt2 <- AnnotationFilter(~ gene_end >= 123)
 28 |     expect_equal(flt, flt2)
 29 |     flt <- ExonEndFilter(123, condition = "<=")
 30 |     flt2 <- AnnotationFilter(~ exon_end <= 123)
 31 |     expect_equal(flt, flt2)
 32 |     ## Test exceptions/errors.
 33 |     expect_error(AnnotationFilter(~ not_existing == 1:3))
 34 |     ## Throws an error, but is not self-explanatory.
 35 |     expect_error(AnnotationFilter(~ gene_id * 3))
 36 | })
 37 | 
 38 | test_that("translation of combined expressions works", {
 39 |     res <- AnnotationFilter(~ exon_id == "EX1" & genename == "BCL2")
 40 |     cmp <- AnnotationFilterList(ExonIdFilter("EX1"), GenenameFilter("BCL2"))
 41 |     expect_equal(res, cmp)
 42 |     res <- AnnotationFilter(~ exon_id == "EX1" | genename != "BCL2")
 43 |     cmp <- AnnotationFilterList(ExonIdFilter("EX1"),
 44 |                                 GenenameFilter("BCL2", "!="), logicOp = "|")
 45 |     expect_equal(res, cmp)
 46 |     ## 3 filters.
 47 |     res <- AnnotationFilter(~ exon_id == "EX1" & genename == "BCL2" |
 48 |                                 seq_name != 3)
 49 |     ## Expect an AnnotationFilterList of length 3.
 50 |     expect_equal(length(res), 3)
 51 |     cmp <- AnnotationFilterList(ExonIdFilter("EX1"), GenenameFilter("BCL2"),
 52 |                                 SeqNameFilter(3, "!="), logicOp = c("&", "|"))
 53 |     expect_equal(res, cmp)
 54 |     ## 4 filters.
 55 |     res <- AnnotationFilter(~ exon_id == "EX1" & genename == "BCL2" |
 56 |                                 seq_name != 3 | seq_name == "Y")
 57 |     expect_equal(length(res), 4)
 58 |     cmp <- AnnotationFilterList(ExonIdFilter("EX1"), GenenameFilter("BCL2"),
 59 |                                 SeqNameFilter(3, "!="), SeqNameFilter("Y"),
 60 |                                 logicOp = c("&", "|",  "|"))
 61 |     expect_equal(res, cmp)
 62 | })
 63 | 
 64 | test_that("translation works from within other functions", {
 65 |     simpleFun <- function(x)
 66 |         AnnotationFilter(x)
 67 |     expect_equal(simpleFun(~ gene_id == 4), AnnotationFilter(~ gene_id == 4))
 68 |     filter_expr <- ~ gene_id == 4
 69 |     expect_equal(simpleFun(filter_expr),
 70 |                  AnnotationFilter(~ gene_id == 4))
 71 | })
 72 | 
 73 | ## This might be a test if we get the nesting working.
 74 | ## test_that("translation of nested expressions works" {
 75 | ##     res <- convertFilterExpression((exon_id == "EX1" & gene_id == "BCL2") |
 76 | ##                                    (exon_id == "EX3" & gene_id == "BCL2L11"))
 77 | ##     expect_equal(logicOp(res), "|")
 78 | ##     expect_true(is(res[[1]], "AnnotationFilterList"))
 79 | ##     expect_equal(res[[1]][[1]], ExonIdFilter("EX1"))
 80 | ##     expect_equal(res[[1]][[2]], GeneIdFilter("BCL2"))
 81 | ##     expect_equal(logicOp(res[[1]]), "&")
 82 | ##     expect_true(is(res[[2]], "AnnotationFilterList"))
 83 | ##     expect_equal(res[[2]][[1]], ExonIdFilter("EX3"))
 84 | ##     expect_equal(res[[2]][[2]], GeneIdFilter("BCL2L11"))
 85 | ##     expect_equal(logicOp(res[[2]]), "&")
 86 | ##     ##
 87 | ##     res <- convertFilterExpression(seq_name == "Y" |
 88 | ##                                    (exon_id == "EX1" & gene_id == "BCL2") &
 89 | ##                                    (exon_id == "EX3" & gene_id == "BCL2L11"))
 90 | ##     ## Expect: length 3, first being a SeqNameFilter, second an
 91 | ##     ## AnnotationFilterList, third a AnnotationFilterList.
 92 | ##     expect_equal(res[[1]], SeqNameFilter("Y"))
 93 | ##     expect_equal(logicOp(res), "|")
 94 | ##     expect_true(is(res[[2]], "AnnotationFilterList"))
 95 | ##     expect_equal(res[[1]][[1]], ExonIdFilter("EX1"))
 96 | ##     expect_equal(res[[1]][[2]], GeneIdFilter("BCL2"))
 97 | ##     expect_equal(logicOp(res[[1]]), "&")
 98 | ##     expect_true(is(res[[2]], "AnnotationFilterList"))
 99 | ##     expect_equal(res[[2]][[1]], ExonIdFilter("EX3"))
100 | ##     expect_equal(res[[2]][[2]], GeneIdFilter("BCL2L11"))
101 | ##     expect_equal(logicOp(res[[2]]), "&")
102 | 
103 | ##     expect_true(is(res[[1]], "AnnotationFilterList"))
104 | ##     expect_true(is(res[[2]], "AnnotationFilterList"))
105 | 
106 | ##     convertFilterExpression((gene_id == 3) ()
107 | ## })
108 | 
109 | 


--------------------------------------------------------------------------------
/tests/testthat/test_AnnotationFilter.R:
--------------------------------------------------------------------------------
  1 | context("AnnotationFilter")
  2 | 
  3 | test_that("supportedFilters() works", {
  4 |     expect_true(inherits(supportedFilters(), "data.frame"))
  5 |     expect_identical(
  6 |         nrow(supportedFilters()),
  7 |         length(unlist(AnnotationFilter:::.FIELD, use.names=FALSE)) +
  8 |             length(AnnotationFilter:::.FILTERS_WO_FIELD)
  9 |     )
 10 | })
 11 | 
 12 | test_that("SymbolFilter as representative for character filters", {
 13 |     expect_true(validObject(new("SymbolFilter")))
 14 |     expect_error(SymbolFilter())
 15 |     expect_error(SymbolFilter(1, ">"))
 16 |     expect_error(SymbolFilter(1, "foo"))
 17 |     expect_error(SymbolFilter(c("foo","bar"), "startsWith"))
 18 |     ## Getter / setter
 19 |     fl <- SymbolFilter("BCL2")
 20 |     expect_equal(value(fl), "BCL2")
 21 |     fl <- SymbolFilter(c(4, 5))
 22 |     expect_equal(value(fl), c("4", "5"))
 23 |     fl <- SymbolFilter(3)
 24 |     expect_equal(value(fl), "3")
 25 |     expect_error(SymbolFilter(NA))
 26 |     ## condition.
 27 |     expect_equal(condition(fl), "==")
 28 |     fl <- SymbolFilter("a", condition = "!=")
 29 |     expect_equal(condition(fl), "!=")
 30 |     expect_error(SymbolFilter("a", condition = "<"))
 31 |     expect_error(SymbolFilter("a", condition = ""))
 32 |     expect_error(SymbolFilter("a", condition = c("==", ">")))
 33 |     expect_error(SymbolFilter("a", condition = NULL))
 34 |     expect_error(SymbolFilter("a", condition = NA))
 35 |     expect_error(SymbolFilter("a", condition = 4))
 36 | })
 37 | 
 38 | test_that("GeneStartFilter as representative for integer filters", {
 39 |     gsf <- GeneStartFilter(10000, condition = ">")
 40 |     expect_equal(condition(gsf), ">")
 41 |     expect_error(GeneStartFilter("3"))
 42 |     expect_error(GeneStartFilter("B"))
 43 |     expect_error(GeneStartFilter(NA))
 44 |     expect_error(GeneStartFilter(NULL))
 45 |     expect_error(GeneStartFilter())
 46 |     ## Condition
 47 |     expect_error(GeneStartFilter(10000, condition = "startsWith"))
 48 |     expect_error(GeneStartFilter(10000, condition = "endsWith"))
 49 |     expect_error(GeneStartFilter(10000, condition = c("==", "<")))
 50 | })
 51 | 
 52 | test_that("GRangesFilter works", {
 53 |     GRanges <- GenomicRanges::GRanges
 54 |     grf <- GRangesFilter(GRanges("chr10:87869000-87876000"))
 55 |     expect_equal(condition(grf), "any")
 56 |     expect_error(GRangesFilter(value = 3))
 57 |     expect_error(GRangesFilter(
 58 |         GRanges("chr10:87869000-87876000"),
 59 |         type = "=="
 60 |     ))
 61 |     grf <- GRangesFilter(
 62 |         GRanges("chr10:87869000-87876000"),
 63 |         type = "within",
 64 |         feature = "tx"
 65 |     )
 66 |     expect_equal(condition(grf), "within")
 67 |     expect_equal(feature(grf), "tx")
 68 | })
 69 | 
 70 | test_that("fieldToClass works", {
 71 |     expect_identical(AnnotationFilter:::.fieldToClass("gene_id"),
 72 |                      "GeneIdFilter")
 73 |     ## Support replacement for multiple _ : issue #13
 74 |     expect_identical(AnnotationFilter:::.fieldToClass("gene_seq_start"),
 75 |                      "GeneSeqStartFilter")
 76 | })
 77 | 
 78 | test_that("convertFilter Works", {
 79 |     expect_identical(convertFilter(SymbolFilter("ADA")), "symbol == 'ADA'")
 80 |     expect_identical(convertFilter(SymbolFilter("ADA", "!=")),
 81 |         "symbol != 'ADA'")
 82 |     expect_identical(convertFilter(SymbolFilter("ADA", "startsWith")),
 83 |         "symbol %like% 'ADA%'")
 84 |     expect_identical(convertFilter(SymbolFilter("ADA", "endsWith")),
 85 |         "symbol %like% '%ADA'")
 86 |     expect_identical(convertFilter(SymbolFilter("ADA", "contains")),
 87 |         "symbol %like% 'ADA'")
 88 |     
 89 |     expect_identical(convertFilter(TxStartFilter(1000)), "tx_start == '1000'")
 90 |     expect_identical(convertFilter(TxStartFilter(1000, "!=")),
 91 |         "tx_start != '1000'")
 92 |     expect_identical(convertFilter(TxStartFilter(1000, ">")), "tx_start > 1000")
 93 |     expect_identical(convertFilter(TxStartFilter(1000, "<")), "tx_start < 1000")
 94 |     expect_identical(convertFilter(TxStartFilter(1000, ">=")),
 95 |         "tx_start >= 1000")
 96 |     expect_identical(convertFilter(TxStartFilter(1000, "<=")),
 97 |         "tx_start <= 1000")
 98 | 
 99 |     ## check NOT works    
100 | 
101 |     expect_identical(convertFilter(SymbolFilter("ADA", not=TRUE)),
102 |         "!symbol == 'ADA'")
103 |     expect_identical(convertFilter(SymbolFilter("ADA", "!=", not=TRUE)),
104 |         "!symbol != 'ADA'")
105 |     expect_identical(convertFilter(SymbolFilter("ADA", "startsWith", not=TRUE)),
106 |         "!symbol %like% 'ADA%'")
107 |     expect_identical(convertFilter(SymbolFilter("ADA", "endsWith", not=TRUE)),
108 |         "!symbol %like% '%ADA'")
109 |     expect_identical(convertFilter(SymbolFilter("ADA", "contains", not=TRUE)),
110 |         "!symbol %like% 'ADA'")
111 |     
112 |     expect_identical(convertFilter(TxStartFilter(1000, not=TRUE)),
113 |         "!tx_start == '1000'")
114 |     expect_identical(convertFilter(TxStartFilter(1000, "!=", not=TRUE)),
115 |         "!tx_start != '1000'")
116 |     expect_identical(convertFilter(TxStartFilter(1000, ">", not=TRUE)),
117 |         "!tx_start > 1000")
118 |     expect_identical(convertFilter(TxStartFilter(1000, "<", not=TRUE)),
119 |         "!tx_start < 1000")
120 |     expect_identical(convertFilter(TxStartFilter(1000, ">=", not=TRUE)),
121 |         "!tx_start >= 1000")
122 |     expect_identical(convertFilter(TxStartFilter(1000, "<=", not=TRUE)),
123 |         "!tx_start <= 1000")
124 | })
125 | 


--------------------------------------------------------------------------------
/man/AnnotationFilterList.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/AnnotationFilterList.R
  3 | \docType{methods}
  4 | \name{AnnotationFilterList}
  5 | \alias{AnnotationFilterList}
  6 | \alias{AnnotationFilterList-class}
  7 | \alias{AnnotationFilterList}
  8 | \alias{value,AnnotationFilterList-method}
  9 | \alias{logicOp,AnnotationFilterList-method}
 10 | \alias{logicOp}
 11 | \alias{not,AnnotationFilterList-method}
 12 | \alias{not}
 13 | \alias{distributeNegation,AnnotationFilterList-method}
 14 | \alias{distributeNegation}
 15 | \alias{convertFilter,AnnotationFilterList,missing-method}
 16 | \alias{convertFilter}
 17 | \alias{show,AnnotationFilterList-method}
 18 | \title{Combining annotation filters}
 19 | \usage{
 20 | AnnotationFilterList(..., logicOp = character(), logOp = character(),
 21 |   not = FALSE, .groupingFlag = FALSE)
 22 | 
 23 | \S4method{value}{AnnotationFilterList}(object)
 24 | 
 25 | \S4method{logicOp}{AnnotationFilterList}(object)
 26 | 
 27 | \S4method{not}{AnnotationFilterList}(object)
 28 | 
 29 | \S4method{distributeNegation}{AnnotationFilterList}(object,
 30 |   .prior_negation = FALSE)
 31 | 
 32 | \S4method{convertFilter}{AnnotationFilterList,missing}(object)
 33 | 
 34 | \S4method{show}{AnnotationFilterList}(object)
 35 | }
 36 | \arguments{
 37 | \item{...}{individual \code{\link{AnnotationFilter}} objects or a
 38 | mixture of \code{AnnotationFilter} and
 39 | \code{AnnotationFilterList} objects.}
 40 | 
 41 | \item{logicOp}{\code{character} of length equal to the number
 42 | of submitted \code{AnnotationFilter} objects - 1. Each value
 43 | representing the logical operation to combine consecutive
 44 | filters, i.e. the first element being the logical operation to
 45 | combine the first and second \code{AnnotationFilter}, the
 46 | second element being the logical operation to combine the
 47 | second and third \code{AnnotationFilter} and so on. Allowed
 48 | values are \code{"&"} and \code{"|"}. The function assumes a
 49 | logical \emph{and} between all elements by default.}
 50 | 
 51 | \item{logOp}{Deprecated; use \code{logicOp=}.}
 52 | 
 53 | \item{not}{\code{logical} of length one. Indicates whether the grouping
 54 | of \code{AnnotationFilters} are to be negated.}
 55 | 
 56 | \item{.groupingFlag}{Flag desginated for internal use only.}
 57 | 
 58 | \item{object}{An object of class \code{AnnotationFilterList}.}
 59 | 
 60 | \item{.prior_negation}{\code{logical(1)} unused argument.}
 61 | }
 62 | \value{
 63 | \code{AnnotationFilterList} returns an \code{AnnotationFilterList}.
 64 | 
 65 | \code{value()} returns a \code{list} with \code{AnnotationFilter}
 66 |     objects.
 67 | 
 68 | \code{logicOp()} returns a \code{character()} vector of
 69 |     \dQuote{&} or \dQuote{|} symbols.
 70 | 
 71 | \code{not()} returns a \code{character()} vector of
 72 |     \dQuote{&} or \dQuote{|} symbols.
 73 | 
 74 | \code{AnnotationFilterList} object with DeMorgan's law applied to
 75 |      it such that it is equal to the original \code{AnnotationFilterList}
 76 |      object but all \code{!}'s are distributed out of the
 77 |      \code{AnnotationFilterList} object and to the nested
 78 |      \code{AnnotationFilter} objects.
 79 | 
 80 | \code{character(1)} that can be used as input to a \code{dplyr}
 81 |      filter.
 82 | }
 83 | \description{
 84 | The \code{AnnotationFilterList} allows to combine
 85 |     filter objects extending the \code{\link{AnnotationFilter}}
 86 |     class to construct more complex queries. Consecutive filter
 87 |     objects in the \code{AnnotationFilterList} can be combined by a
 88 |     logical \emph{and} (\code{&}) or \emph{or} (\code{|}). The
 89 |     \code{AnnotationFilterList} extends \code{list}, individual
 90 |     elements can thus be accessed with \code{[[}.
 91 | 
 92 | \code{value()} get a \code{list} with the
 93 |     \code{AnnotationFilter} objects. Use \code{[[} to access
 94 |     individual filters.
 95 | 
 96 | \code{logicOp()} gets the logical operators separating
 97 |     successive \code{AnnotationFilter}.
 98 | 
 99 | \code{not()} gets the logical operators separating
100 |     successive \code{AnnotationFilter}.
101 | 
102 | 
103 | 
104 | Converts an \code{AnnotationFilterList} object to a
105 |      \code{character(1)} giving an equation that can be used as input to
106 |      a \code{dplyr} filter.
107 | }
108 | \note{
109 | The \code{AnnotationFilterList} does not support containing empty
110 |     elements, hence all elements of \code{length == 0} are removed in
111 |     the constructor function.
112 | }
113 | \examples{
114 | ## Create some AnnotationFilters
115 | gf <- GeneNameFilter(c("BCL2", "BCL2L11"))
116 | tbtf <- TxBiotypeFilter("protein_coding", condition = "!=")
117 | 
118 | ## Combine both to an AnnotationFilterList. By default elements are combined
119 | ## using a logical "and" operator. The filter list represents thus a query
120 | ## like: get all features where the gene name is either ("BCL2" or "BCL2L11")
121 | ## and the transcript biotype is not "protein_coding".
122 | afl <- AnnotationFilterList(gf, tbtf)
123 | afl
124 | 
125 | ## Access individual filters.
126 | afl[[1]]
127 | 
128 | ## Create a filter in the form of: get all features where the gene name is
129 | ## either ("BCL2" or "BCL2L11") and the transcript biotype is not
130 | ## "protein_coding" or the seq_name is "Y". Hence, this will get all feature
131 | ## also found by the previous AnnotationFilterList and returns also all
132 | ## features on chromosome Y.
133 | afl <- AnnotationFilterList(gf, tbtf, SeqNameFilter("Y"),
134 |                             logicOp = c("&", "|"))
135 | afl
136 | 
137 | afl <- AnnotationFilter(~!(symbol == 'ADA' | symbol \%startsWith\% 'SNORD'))
138 | afl <- distributeNegation(afl)
139 | afl
140 | afl <- AnnotationFilter(~symbol=="ADA" & tx_start > "400000")
141 | result <- convertFilter(afl)
142 | result
143 | }
144 | \seealso{
145 | \code{\link{supportedFilters}} for available
146 |     \code{\link{AnnotationFilter}} objects
147 | }
148 | 


--------------------------------------------------------------------------------
/R/translate-utils.R:
--------------------------------------------------------------------------------
  1 | #' @include AnnotationFilter.R
  2 | 
  3 | ## Functionality to translate a query condition to an AnnotationFilter.
  4 | 
  5 | #' Adapted from GenomicDataCommons.
  6 | #'
  7 | #' @importFrom methods is validObject initialize
  8 | #'
  9 | #' @noRd
 10 | .binary_op <- function(sep) {
 11 |     force(sep)
 12 |     function(e1, e2) {
 13 |         ## First create the class. Throws an error if not possible i.e. no
 14 |         ## class for the field available.
 15 |         field <- as.character(substitute(e1))
 16 |         class <- .fieldToClass(field)
 17 |         filter <- tryCatch({
 18 |             new(class, condition = sep, field = field)
 19 |         }, error = function(e) {
 20 |             stop("No AnnotationFilter class '", class, "' for field '",
 21 |                 field, "' defined")
 22 |         })
 23 |         ## Fill with values.
 24 |         force(e2)
 25 |         if (is(filter, "CharacterFilter")) {
 26 |             e2 <- as.character(e2)
 27 |         } else if (is(filter, "IntegerFilter")) {
 28 |             e2 <- as.integer(e2)
 29 |         }
 30 |         initialize(filter, value = e2)
 31 |     }
 32 | }
 33 | 
 34 | #' Functionality to translate a unary operation into an AnnotationFilter.
 35 | #'
 36 | #' @noRd
 37 | .not_op <- function(sep) {
 38 |     force(sep)
 39 |     function(x) {
 40 |         if(is(x, "AnnotationFilterList") || is(x, "AnnotationFilter")) {
 41 |             if(x@not)
 42 |                 x@not <- FALSE
 43 |             else
 44 |                 x@not <- TRUE
 45 |             if(is(x, "AnnotationFilterList"))
 46 |                 x@.groupingFlag <- FALSE
 47 |             return(x)
 48 |         }
 49 | #       else if (is(x, "AnnotationFilter")) 
 50 | #           AnnotationFilterList(x, logicOp=character(), not=TRUE)
 51 |         else
 52 |             stop('Arguments to "!" must be an AnnotationFilter or AnnotationFilerList.')
 53 |     }
 54 | }
 55 | 
 56 | .parenthesis_op <- function(sep) {
 57 |     force(sep)
 58 |     function(x) {
 59 |         if (is(x, "AnnotationFilterList")) {
 60 |             x@.groupingFlag <- FALSE
 61 |             x
 62 |         }
 63 |         else
 64 |             AnnotationFilterList(x, .groupingFlag=FALSE)
 65 |     }
 66 | }
 67 | 
 68 | 
 69 | #' Combine filters into a AnnotationFilterList combbined with \code{sep}
 70 | #'
 71 | #' @noRd
 72 | .combine_op <- function(sep) {
 73 |     force(sep)
 74 |     function(e1, e2) {
 75 |         op1 <- character()
 76 |         op2 <- character()
 77 |         if (is(e1, "AnnotationFilterList") && e1@.groupingFlag) {
 78 |             op1 <- logicOp(e1)
 79 |             e1 <- .aflvalue(e1)
 80 |         } else {
 81 |             e1 <- list(e1)
 82 |         }
 83 |         if (is(e2, "AnnotationFilterList") && e2@.groupingFlag) {
 84 |             op2 <- logicOp(e2)
 85 |             e2 <- .aflvalue(e2)
 86 |         } else {
 87 |             e2 <- list(e2)
 88 |         }
 89 |         input <- c(e1, e2)
 90 |         input[['logicOp']] <- c(op1, sep, op2)
 91 |         input[['.groupingFlag']] <- TRUE
 92 |         do.call("AnnotationFilterList", input)
 93 |     }
 94 | }
 95 | 
 96 | #' The \code{.LOG_OP_REG} is a \code{list} providing functions for
 97 | #' common logical operations to translate expressions into AnnotationFilter
 98 | #' objects.
 99 | #'
100 | #' @noRd
101 | .LOG_OP_REG <- list()
102 | ## Assign conditions.
103 | .LOG_OP_REG$`==` <- .binary_op("==")
104 | .LOG_OP_REG$`%in%` <- .binary_op("==")
105 | .LOG_OP_REG$`!=` <- .binary_op("!=")
106 | .LOG_OP_REG$`>` <- .binary_op(">")
107 | .LOG_OP_REG$`<` <- .binary_op("<")
108 | .LOG_OP_REG$`>=` <- .binary_op(">=")
109 | .LOG_OP_REG$`<=` <- .binary_op("<=")
110 | ## Custom binary operators 
111 | .LOG_OP_REG$`%startsWith%` <- .binary_op("startsWith")
112 | .LOG_OP_REG$`%endsWith%` <- .binary_op("endsWith")
113 | .LOG_OP_REG$`%contains%` <- .binary_op("contains")
114 | ## not conditional.
115 | .LOG_OP_REG$`!` <- .not_op("!")
116 | ## parenthesis
117 | .LOG_OP_REG$`(` <- .parenthesis_op("(")
118 | ## combine filters
119 | .LOG_OP_REG$`&` <- .combine_op("&")
120 | .LOG_OP_REG$`|` <- .combine_op("|")
121 | 
122 | `%startsWith%` <- function(e1, e2){}
123 | `%endsWith%` <- function(e1, e2){}
124 | `%contains%` <- function(e1, e2){}
125 | 
126 | #' @rdname AnnotationFilter
127 | #'
128 | #' @description \code{AnnotationFilter} \emph{translates} a filter
129 | #'     expression such as \code{~ gene_id == "BCL2"} into a filter object
130 | #'     extending the \code{\link{AnnotationFilter}} class (in the example a
131 | #'     \code{\link{GeneIdFilter}} object) or an
132 | #'     \code{\link{AnnotationFilterList}} if the expression contains multiple
133 | #'     conditions (see examples below). Filter expressions have to be written
134 | #'     in the form \code{~ <field> <condition> <value>}, with \code{<field>}
135 | #'     being the default field of the filter class (use the
136 | #'     \code{supportedFilter} function to list all fields and filter classes),
137 | #'     \code{<condition>} the logical expression and \code{<value>} the value
138 | #'     for the filter.
139 | #'
140 | #' @details Filter expressions for the \code{AnnotationFilter} class have to be
141 | #'     written as formulas, i.e. starting with a \code{~}.
142 | #'
143 | #' @note Translation of nested filter expressions using the
144 | #'     \code{AnnotationFilter} function is not yet supported.
145 | #' 
146 | #' @param expr A filter expression, written as a \code{formula}, to be
147 | #'     converted to an \code{AnnotationFilter} or \code{AnnotationFilterList}
148 | #'     class. See below for examples.
149 | #'
150 | #' @return \code{AnnotationFilter} returns an
151 | #'     \code{\link{AnnotationFilter}} or an \code{\link{AnnotationFilterList}}.
152 | #' 
153 | #' @importFrom lazyeval f_eval
154 | #'
155 | #' @examples
156 | #' 
157 | #' ## Convert a filter expression based on a gene ID to a GeneIdFilter
158 | #' gnf <- AnnotationFilter(~ gene_id == "BCL2")
159 | #' gnf
160 | #'
161 | #' ## Same conversion but for two gene IDs.
162 | #' gnf <- AnnotationFilter(~ gene_id %in% c("BCL2", "BCL2L11"))
163 | #' gnf
164 | #'
165 | #' ## Converting an expression that combines multiple filters. As a result we
166 | #' ## get an AnnotationFilterList containing the corresponding filters.
167 | #' ## Be aware that nesting of expressions/filters does not work.
168 | #' flt <- AnnotationFilter(~ gene_id %in% c("BCL2", "BCL2L11") &
169 | #'                         tx_biotype == "nonsense_mediated_decay" |
170 | #'                         seq_name == "Y")
171 | #' flt
172 | #' 
173 | #' @export
174 | AnnotationFilter <- function(expr) {
175 |     res <- f_eval(expr, data = .LOG_OP_REG)
176 |     if(is(res, "AnnotationFilterList")) res@.groupingFlag <- FALSE
177 |     res
178 | }
179 | 


--------------------------------------------------------------------------------
/.svn/pristine/65/6537ab78a07d200e9fe894802b6b5f17bdd38fd9.svn-base:
--------------------------------------------------------------------------------
  1 | #' @include AnnotationFilter.R
  2 | 
  3 | #' @rdname AnnotationFilterList
  4 | #'
  5 | #' @name AnnotationFilterList
  6 | #'
  7 | #' @title Combining annotation filters
  8 | #'
  9 | #' @aliases AnnotationFilterList-class
 10 | #'
 11 | #' @description The \code{AnnotationFilterList} allows to combine
 12 | #'     filter objects extending the \code{\link{AnnotationFilter}}
 13 | #'     class to construct more complex queries. Consecutive filter
 14 | #'     objects in the \code{AnnotationFilterList} can be combined by a
 15 | #'     logical \emph{and} (\code{&}) or \emph{or} (\code{|}). The
 16 | #'     \code{AnnotationFilterList} extends \code{list}, individual
 17 | #'     elements can thus be accessed with \code{[[}.
 18 | #'
 19 | #' @note The \code{AnnotationFilterList} does not support containing empty
 20 | #'     elements, hence all elements of \code{length == 0} are removed in
 21 | #'     the constructor function.
 22 | #'
 23 | #' @exportClass AnnotationFilterList
 24 | NULL
 25 | 
 26 | .AnnotationFilterList <- setClass(
 27 |     "AnnotationFilterList",
 28 |     contains = "list",
 29 |     slots = c(logOp = "character")
 30 | )
 31 | 
 32 | .LOG_OPS <- c("&", "|")
 33 | 
 34 | setValidity("AnnotationFilterList",
 35 |     function(object)
 36 | {
 37 |     txt <- character()
 38 |     filters <- .aflvalue(object)
 39 |     logOp <- .logOp(object)
 40 |     if (length(filters) == 0 && length(logOp)) {
 41 |         txt <- c(
 42 |             txt, "'logicOp' can not have length > 0 if the object is empty"
 43 |         )
 44 |     } else if (length(filters) != 0) {
 45 |         ## Note: we allow length of filters being 1, but then logOp has
 46 |         ## to be empty.  Check content:
 47 |         fun <- function(z)
 48 |             is(z, "AnnotationFilter") || is(z, "AnnotationFilterList")
 49 |         test <- vapply(filters, fun, logical(1))
 50 |         if (!all(test)){
 51 |             txt <- c(
 52 |                 txt, "only 'AnnotationFilter' or 'AnnotationFilterList' allowed"
 53 |             )
 54 |         }
 55 |         ## Check that all elements are non-empty (issue #17). Doing this
 56 |         ## separately from the check above to ensure we get a different error
 57 |         ## message.
 58 |         if (!all(lengths(filters) > 0))
 59 |             txt <- c(txt, "Lengths of all elements have to be > 0")
 60 |         ## Check that logOp has length object -1
 61 |         if (length(logOp) != length(filters) - 1)
 62 |             txt <- c(txt, "length of 'logicOp' has to be length of the object -1")
 63 |         ## Check content of logOp.
 64 |         if (!all(logOp %in% .LOG_OPS))
 65 |             txt <- c(txt, "'logicOp' can only contain '&' and '|'")
 66 |     }
 67 | 
 68 |     if (length(txt)) txt else TRUE
 69 | })
 70 | 
 71 | ## AnnotationFilterList constructor function.
 72 | #' @rdname AnnotationFilterList
 73 | #'
 74 | #' @name AnnotationFilterList
 75 | #'
 76 | #' @param ... individual \code{\link{AnnotationFilter}} objects or a
 77 | #'     mixture of \code{AnnotationFilter} and
 78 | #'     \code{AnnotationFilterList} objects.
 79 | #'
 80 | #' @param logicOp \code{character} of length equal to the number
 81 | #'     of submitted \code{AnnotationFilter} objects - 1. Each value
 82 | #'     representing the logical operation to combine consecutive
 83 | #'     filters, i.e. the first element being the logical operation to
 84 | #'     combine the first and second \code{AnnotationFilter}, the
 85 | #'     second element being the logical operation to combine the
 86 | #'     second and third \code{AnnotationFilter} and so on. Allowed
 87 | #'     values are \code{"&"} and \code{"|"}. The function assumes a
 88 | #'     logical \emph{and} between all elements by default.
 89 | #'
 90 | #' @param logOp Deprecated; use \code{logicOp=}.
 91 | #'
 92 | #' @seealso \code{\link{supportedFilters}} for available
 93 | #'     \code{\link{AnnotationFilter}} objects
 94 | #'
 95 | #' @return \code{AnnotationFilterList} returns an \code{AnnotationFilterList}.
 96 | #' 
 97 | #' @examples
 98 | #' ## Create some AnnotationFilters
 99 | #' gf <- GenenameFilter(c("BCL2", "BCL2L11"))
100 | #' tbtf <- TxBiotypeFilter("protein_coding", condition = "!=")
101 | #'
102 | #' ## Combine both to an AnnotationFilterList. By default elements are combined
103 | #' ## using a logical "and" operator. The filter list represents thus a query
104 | #' ## like: get all features where the gene name is either ("BCL2" or "BCL2L11")
105 | #' ## and the transcript biotype is not "protein_coding".
106 | #' afl <- AnnotationFilterList(gf, tbtf)
107 | #' afl
108 | #'
109 | #' ## Access individual filters.
110 | #' afl[[1]]
111 | #'
112 | #' ## Create a filter in the form of: get all features where the gene name is
113 | #' ## either ("BCL2" or "BCL2L11") and the transcript biotype is not
114 | #' ## "protein_coding" or the seq_name is "Y". Hence, this will get all feature
115 | #' ## also found by the previous AnnotationFilterList and returns also all
116 | #' ## features on chromosome Y.
117 | #' afl <- AnnotationFilterList(gf, tbtf, SeqNameFilter("Y"),
118 | #'                             logicOp = c("&", "|"))
119 | #' afl
120 | #'
121 | #' @export
122 | AnnotationFilterList <-
123 |     function(..., logicOp = character(), logOp = character())
124 | {
125 |     if (!missing(logOp) && missing(logicOp)) {
126 |         logicOp <- logOp
127 |         .Deprecated(msg = "'logOp' deprecated, use 'logicOp'")
128 |     }
129 |     filters <- list(...)
130 |     ## Remove empty elements (issue #17)
131 |     filters <- filters[lengths(filters) > 0]
132 |     ## By default we're assuming & between elements.
133 |     if (length(filters) > 1 & length(logicOp) == 0)
134 |         logicOp <- rep("&", (length(filters) - 1))
135 |     .AnnotationFilterList(filters, logOp = logicOp)
136 | }
137 | 
138 | .logOp <- function(object) object@logOp
139 | 
140 | .aflvalue <- function(object) object@.Data
141 | 
142 | #' @rdname AnnotationFilterList
143 | #'
144 | #' @description \code{value()} get a \code{list} with the
145 | #'     \code{AnnotationFilter} objects. Use \code{[[} to access
146 | #'     individual filters.
147 | #'
148 | #' @return \code{value()} returns a \code{list} with \code{AnnotationFilter}
149 | #'     objects.
150 | #' 
151 | #' @export
152 | setMethod("value", "AnnotationFilterList", .aflvalue)
153 | 
154 | #' @rdname AnnotationFilterList
155 | #'
156 | #' @aliases logicOp
157 | #'
158 | #' @description \code{logicOp()} gets the logical operators separating
159 | #'     successive \code{AnnotationFilter}.
160 | #'
161 | #' @return \code{logicOp()} returns a \code{character()} vector of
162 | #'     \dQuote{&} or \dQuote{|} symbols.
163 | #'
164 | #' @export logicOp
165 | setMethod("logicOp", "AnnotationFilterList", .logOp)
166 | 
167 | #' @rdname AnnotationFilterList
168 | #'
169 | #' @param object An object of class \code{AnnotationFilterList}.
170 | #'
171 | #' @importFrom utils tail
172 | #' @export
173 | setMethod("show", "AnnotationFilterList",
174 |     function(object)
175 | {
176 |     cat(
177 |         "class: ", class(object), "\n",
178 |         "length: ", length(object), "\n",
179 |         sep = ""
180 |     )
181 |     if (length(object)) {
182 |         cat("filters:\n\n")
183 |         show(object[[1]])
184 |         for (i in tail(seq_along(object), -1L)) {
185 |             cat("\n", logicOp(object)[i - 1L], "\n\n")
186 |             show(object[[i]])
187 |         }
188 |     }
189 | })
190 | 
191 | 


--------------------------------------------------------------------------------
/.svn/pristine/81/81fa7ab77bea17ab59d67e253f098ea223b2e1d3.svn-base:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/AnnotationFilter.R, R/translate-utils.R
  3 | \docType{methods}
  4 | \name{AnnotationFilter}
  5 | \alias{AnnotationFilter}
  6 | \alias{CdsStartFilter}
  7 | \alias{CdsEndFilter}
  8 | \alias{ExonIdFilter}
  9 | \alias{ExonNameFilter}
 10 | \alias{ExonStartFilter}
 11 | \alias{ExonEndFilter}
 12 | \alias{ExonRankFilter}
 13 | \alias{GeneIdFilter}
 14 | \alias{GenenameFilter}
 15 | \alias{GeneBiotypeFilter}
 16 | \alias{GeneStartFilter}
 17 | \alias{GeneEndFilter}
 18 | \alias{EntrezFilter}
 19 | \alias{SymbolFilter}
 20 | \alias{TxIdFilter}
 21 | \alias{TxNameFilter}
 22 | \alias{TxBiotypeFilter}
 23 | \alias{TxStartFilter}
 24 | \alias{TxEndFilter}
 25 | \alias{ProteinIdFilter}
 26 | \alias{UniprotFilter}
 27 | \alias{SeqNameFilter}
 28 | \alias{SeqStrandFilter}
 29 | \alias{AnnotationFilter-class}
 30 | \alias{CharacterFilter-class}
 31 | \alias{IntegerFilter-class}
 32 | \alias{CdsStartFilter-class}
 33 | \alias{CdsEndFilter-class}
 34 | \alias{ExonIdFilter-class}
 35 | \alias{ExonNameFilter-class}
 36 | \alias{ExonStartFilter-class}
 37 | \alias{ExonEndFilter-class}
 38 | \alias{ExonRankFilter-class}
 39 | \alias{GeneIdFilter-class}
 40 | \alias{GenenameFilter-class}
 41 | \alias{GeneBiotypeFilter-class}
 42 | \alias{GeneStartFilter-class}
 43 | \alias{GeneEndFilter-class}
 44 | \alias{EntrezFilter-class}
 45 | \alias{SymbolFilter-class}
 46 | \alias{TxIdFilter-class}
 47 | \alias{TxNameFilter-class}
 48 | \alias{TxBiotypeFilter-class}
 49 | \alias{TxStartFilter-class}
 50 | \alias{TxEndFilter-class}
 51 | \alias{ProteinIdFilter-class}
 52 | \alias{UniprotFilter-class}
 53 | \alias{SeqNameFilter-class}
 54 | \alias{SeqStrandFilter-class}
 55 | \alias{supportedFilters}
 56 | \alias{show,AnnotationFilter-method}
 57 | \alias{show,CharacterFilter-method}
 58 | \alias{show,IntegerFilter-method}
 59 | \alias{show,GRangesFilter-method}
 60 | \alias{condition,AnnotationFilter-method}
 61 | \alias{condition}
 62 | \alias{value,AnnotationFilter-method}
 63 | \alias{value}
 64 | \alias{field,AnnotationFilter-method}
 65 | \alias{field}
 66 | \alias{GRangesFilter-class}
 67 | \alias{.GRangesFilter}
 68 | \alias{GRangesFilter}
 69 | \alias{feature}
 70 | \alias{AnnotationFilter}
 71 | \alias{supportedFilters,missing-method}
 72 | \alias{AnnotationFilter}
 73 | \title{Filters for annotation objects}
 74 | \usage{
 75 | CdsStartFilter(value, condition = "==")
 76 | CdsEndFilter(value, condition = "==")
 77 | ExonIdFilter(value, condition = "==")
 78 | ExonNameFilter(value, condition = "==")
 79 | ExonRankFilter(value, condition = "==")
 80 | ExonStartFilter(value, condition = "==")
 81 | ExonEndFilter(value, condition = "==")
 82 | GeneIdFilter(value, condition = "==")
 83 | GenenameFilter(value, condition = "==")
 84 | GeneBiotypeFilter(value, condition = "==")
 85 | GeneStartFilter(value, condition = "==")
 86 | GeneEndFilter(value, condition = "==")
 87 | EntrezFilter(value, condition = "==")
 88 | SymbolFilter(value, condition = "==")
 89 | TxIdFilter(value, condition = "==")
 90 | TxNameFilter(value, condition = "==")
 91 | TxBiotypeFilter(value, condition = "==")
 92 | TxStartFilter(value, condition = "==")
 93 | TxEndFilter(value, condition = "==")
 94 | ProteinIdFilter(value, condition = "==")
 95 | UniprotFilter(value, condition = "==")
 96 | SeqNameFilter(value, condition = "==")
 97 | SeqStrandFilter(value, condition = "==")
 98 | 
 99 | \S4method{condition}{AnnotationFilter}(object)
100 | 
101 | \S4method{value}{AnnotationFilter}(object)
102 | 
103 | \S4method{field}{AnnotationFilter}(object)
104 | 
105 | GRangesFilter(value, feature = "gene", type = c("any", "start", "end",
106 |   "within", "equal"))
107 | 
108 | feature(object)
109 | 
110 | \S4method{supportedFilters}{missing}(object)
111 | 
112 | AnnotationFilter(expr)
113 | }
114 | \arguments{
115 | \item{object}{An \code{AnnotationFilter} object.}
116 | 
117 | \item{value}{\code{character()}, \code{integer()}, or
118 | \code{GRanges()} value for the filter}
119 | 
120 | \item{feature}{\code{character(1)} defining on what feature the
121 | \code{GRangesFilter} should be applied. Choices could be
122 | \code{"gene"}, \code{"tx"} or \code{"exon"}.}
123 | 
124 | \item{type}{\code{character(1)} indicating how overlaps are to be
125 | filtered. See \code{findOverlaps} in the IRanges package for a
126 | description of this argument.}
127 | 
128 | \item{expr}{A filter expression, written as a \code{formula}, to be
129 | converted to an \code{AnnotationFilter} or \code{AnnotationFilterList}
130 | class. See below for examples.}
131 | 
132 | \item{condition}{\code{character(1)} defining the condition to be
133 | used in the filter. For \code{IntegerFilter}, one of
134 | \code{"=="}, \code{"!="}, \code{">"}, \code{"<"}, \code{">="}
135 | or \code{"<="}. For \code{CharacterFilter}, one of \code{"=="},
136 | \code{"!="}, \code{"startsWith"}, \code{"endsWith"} or \code{"contains"}.
137 | Default condition is \code{"=="}.}
138 | }
139 | \value{
140 | The constructor function return an object extending
141 |     \code{AnnotationFilter}. For the return value of the other methods see
142 |     the methods' descriptions.
143 | 
144 | \code{AnnotationFilter} returns an
145 |     \code{\link{AnnotationFilter}} or an \code{\link{AnnotationFilterList}}.
146 | }
147 | \description{
148 | The filters extending the base \code{AnnotationFilter} class
149 | represent a simple filtering concept for annotation resources.
150 | Each filter object is thought to filter on a single (database)
151 | table column using the provided values and the defined condition.
152 | 
153 | Filter instances created using the constructor functions (e.g.
154 | \code{GeneIdFilter}).
155 | 
156 | \code{supportedFilters()} lists all defined filters. It returns a two column
157 | \code{data.frame} with the filter class name and its default field.
158 | Packages using \code{AnnotationFilter} should implement the
159 | \code{supportedFilters} for their annotation resource object (e.g. for
160 | \code{object = "EnsDb"} in the \code{ensembldb} package) to list all
161 | supported filters for the specific resource.
162 | 
163 | \code{condition()} get the \code{condition} value for
164 |     the filter \code{object}.
165 | 
166 | \code{value()} get the \code{value} for the filter
167 |     \code{object}.
168 | 
169 | \code{field()} get the \code{field} for the filter
170 |     \code{object}.
171 | 
172 | \code{feature()} get the \code{feature} for the
173 |     \code{GRangesFilter} \code{object}.
174 | 
175 | \code{AnnotationFilter} \emph{translates} a filter
176 |     expression such as \code{~ gene_id == "BCL2"} into a filter object
177 |     extending the \code{\link{AnnotationFilter}} class (in the example a
178 |     \code{\link{GeneIdFilter}} object) or an
179 |     \code{\link{AnnotationFilterList}} if the expression contains multiple
180 |     conditions (see examples below). Filter expressions have to be written
181 |     in the form \code{~ <field> <condition> <value>}, with \code{<field>}
182 |     being the default field of the filter class (use the
183 |     \code{supportedFilter} function to list all fields and filter classes),
184 |     \code{<condition>} the logical expression and \code{<value>} the value
185 |     for the filter.
186 | }
187 | \details{
188 | By default filters are only available for tables containing the
189 | field on which the filter acts (i.e. that contain a column with the
190 | name matching the value of the \code{field} slot of the
191 | object). See the vignette for a description to use filters for
192 | databases in which the database table column name differs from the
193 | default \code{field} of the filter.
194 | 
195 | Filter expressions for the \code{AnnotationFilter} class have to be
196 |     written as formulas, i.e. starting with a \code{~}.
197 | }
198 | \note{
199 | Translation of nested filter expressions using the
200 |     \code{AnnotationFilter} function is not yet supported.
201 | }
202 | \examples{
203 | ## filter by GRanges
204 | GRangesFilter(GenomicRanges::GRanges("chr10:87869000-87876000"))
205 | ## Create a SymbolFilter to filter on a gene's symbol.
206 | sf <- SymbolFilter("BCL2")
207 | sf
208 | 
209 | ## Create a GeneStartFilter to filter based on the genes' chromosomal start
210 | ## coordinates
211 | gsf <- GeneStartFilter(10000, condition = ">")
212 | gsf
213 | 
214 | supportedFilters()
215 | 
216 | ## Convert a filter expression based on a gene ID to a GeneIdFilter
217 | gnf <- AnnotationFilter(~ gene_id == "BCL2")
218 | gnf
219 | 
220 | ## Same conversion but for two gene IDs.
221 | gnf <- AnnotationFilter(~ gene_id \%in\% c("BCL2", "BCL2L11"))
222 | gnf
223 | 
224 | ## Converting an expression that combines multiple filters. As a result we
225 | ## get an AnnotationFilterList containing the corresponding filters.
226 | ## Be aware that nesting of expressions/filters does not work.
227 | flt <- AnnotationFilter(~ gene_id \%in\% c("BCL2", "BCL2L11") &
228 |                         tx_biotype == "nonsense_mediated_decay" |
229 |                         seq_name == "Y")
230 | flt
231 | 
232 | }
233 | \seealso{
234 | \code{\link{AnnotationFilterList}} for combining
235 |     \code{AnnotationFilter} objects.
236 | }
237 | 


--------------------------------------------------------------------------------
/man/AnnotationFilter.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/AnnotationFilter.R, R/translate-utils.R
  3 | \docType{methods}
  4 | \name{AnnotationFilter}
  5 | \alias{AnnotationFilter}
  6 | \alias{CdsStartFilter}
  7 | \alias{CdsEndFilter}
  8 | \alias{ExonIdFilter}
  9 | \alias{ExonNameFilter}
 10 | \alias{ExonStartFilter}
 11 | \alias{ExonEndFilter}
 12 | \alias{ExonRankFilter}
 13 | \alias{GeneIdFilter}
 14 | \alias{GeneNameFilter}
 15 | \alias{GeneBiotypeFilter}
 16 | \alias{GeneStartFilter}
 17 | \alias{GeneEndFilter}
 18 | \alias{EntrezFilter}
 19 | \alias{SymbolFilter}
 20 | \alias{TxIdFilter}
 21 | \alias{TxNameFilter}
 22 | \alias{TxBiotypeFilter}
 23 | \alias{TxStartFilter}
 24 | \alias{TxEndFilter}
 25 | \alias{ProteinIdFilter}
 26 | \alias{UniprotFilter}
 27 | \alias{SeqNameFilter}
 28 | \alias{SeqStrandFilter}
 29 | \alias{AnnotationFilter-class}
 30 | \alias{CharacterFilter-class}
 31 | \alias{DoubleFilter-class}
 32 | \alias{IntegerFilter-class}
 33 | \alias{CdsStartFilter-class}
 34 | \alias{CdsEndFilter-class}
 35 | \alias{ExonIdFilter-class}
 36 | \alias{ExonNameFilter-class}
 37 | \alias{ExonStartFilter-class}
 38 | \alias{ExonEndFilter-class}
 39 | \alias{ExonRankFilter-class}
 40 | \alias{GeneIdFilter-class}
 41 | \alias{GeneNameFilter-class}
 42 | \alias{GeneBiotypeFilter-class}
 43 | \alias{GeneStartFilter-class}
 44 | \alias{GeneEndFilter-class}
 45 | \alias{EntrezFilter-class}
 46 | \alias{SymbolFilter-class}
 47 | \alias{TxIdFilter-class}
 48 | \alias{TxNameFilter-class}
 49 | \alias{TxBiotypeFilter-class}
 50 | \alias{TxStartFilter-class}
 51 | \alias{TxEndFilter-class}
 52 | \alias{ProteinIdFilter-class}
 53 | \alias{UniprotFilter-class}
 54 | \alias{SeqNameFilter-class}
 55 | \alias{SeqStrandFilter-class}
 56 | \alias{supportedFilters}
 57 | \alias{show,AnnotationFilter-method}
 58 | \alias{show,CharacterFilter-method}
 59 | \alias{show,IntegerFilter-method}
 60 | \alias{show,GRangesFilter-method}
 61 | \alias{show,DoubleFilter-method}
 62 | \alias{condition,AnnotationFilter-method}
 63 | \alias{condition}
 64 | \alias{value,AnnotationFilter-method}
 65 | \alias{value}
 66 | \alias{field,AnnotationFilter-method}
 67 | \alias{field}
 68 | \alias{not,AnnotationFilter-method}
 69 | \alias{GRangesFilter-class}
 70 | \alias{.GRangesFilter}
 71 | \alias{GRangesFilter}
 72 | \alias{feature}
 73 | \alias{AnnotationFilter}
 74 | \alias{convertFilter,AnnotationFilter,missing-method}
 75 | \alias{supportedFilters,missing-method}
 76 | \alias{AnnotationFilter}
 77 | \title{Filters for annotation objects}
 78 | \usage{
 79 | CdsStartFilter(value, condition = "==", not = FALSE)
 80 | CdsEndFilter(value, condition = "==", not = FALSE)
 81 | ExonIdFilter(value, condition = "==", not = FALSE)
 82 | ExonNameFilter(value, condition = "==", not = FALSE)
 83 | ExonRankFilter(value, condition = "==", not = FALSE)
 84 | ExonStartFilter(value, condition = "==", not = FALSE)
 85 | ExonEndFilter(value, condition = "==", not = FALSE)
 86 | GeneIdFilter(value, condition = "==", not = FALSE)
 87 | GeneNameFilter(value, condition = "==", not = FALSE)
 88 | GeneBiotypeFilter(value, condition = "==", not = FALSE)
 89 | GeneStartFilter(value, condition = "==", not = FALSE)
 90 | GeneEndFilter(value, condition = "==", not = FALSE)
 91 | EntrezFilter(value, condition = "==", not = FALSE)
 92 | SymbolFilter(value, condition = "==", not = FALSE)
 93 | TxIdFilter(value, condition = "==", not = FALSE)
 94 | TxNameFilter(value, condition = "==", not = FALSE)
 95 | TxBiotypeFilter(value, condition = "==", not = FALSE)
 96 | TxStartFilter(value, condition = "==", not = FALSE)
 97 | TxEndFilter(value, condition = "==", not = FALSE)
 98 | ProteinIdFilter(value, condition = "==", not = FALSE)
 99 | UniprotFilter(value, condition = "==", not = FALSE)
100 | SeqNameFilter(value, condition = "==", not = FALSE)
101 | SeqStrandFilter(value, condition = "==", not = FALSE)
102 | 
103 | \S4method{condition}{AnnotationFilter}(object)
104 | 
105 | \S4method{value}{AnnotationFilter}(object)
106 | 
107 | \S4method{field}{AnnotationFilter}(object)
108 | 
109 | \S4method{not}{AnnotationFilter}(object)
110 | 
111 | GRangesFilter(value, feature = "gene", type = c("any", "start", "end",
112 |   "within", "equal"))
113 | 
114 | feature(object)
115 | 
116 | \S4method{convertFilter}{AnnotationFilter,missing}(object)
117 | 
118 | \S4method{supportedFilters}{missing}(object)
119 | 
120 | AnnotationFilter(expr)
121 | }
122 | \arguments{
123 | \item{object}{An \code{AnnotationFilter} object.}
124 | 
125 | \item{value}{\code{character()}, \code{integer()}, or
126 | \code{GRanges()} value for the filter}
127 | 
128 | \item{feature}{\code{character(1)} defining on what feature the
129 | \code{GRangesFilter} should be applied. Choices could be
130 | \code{"gene"}, \code{"tx"} or \code{"exon"}.}
131 | 
132 | \item{type}{\code{character(1)} indicating how overlaps are to be
133 | filtered. See \code{findOverlaps} in the IRanges package for a
134 | description of this argument.}
135 | 
136 | \item{expr}{A filter expression, written as a \code{formula}, to be
137 | converted to an \code{AnnotationFilter} or \code{AnnotationFilterList}
138 | class. See below for examples.}
139 | 
140 | \item{condition}{\code{character(1)} defining the condition to be
141 | used in the filter. For \code{IntegerFilter} or \code{DoubleFilter},
142 | one of \code{"=="}, \code{"!="}, \code{">"}, \code{"<"}, \code{">="}
143 | or \code{"<="}. For \code{CharacterFilter}, one of \code{"=="},
144 | \code{"!="}, \code{"startsWith"}, \code{"endsWith"} or \code{"contains"}.
145 | Default condition is \code{"=="}.}
146 | 
147 | \item{not}{\code{logical(1)} whether the \code{AnnotationFilter} is negated.
148 | \code{TRUE} indicates is negated (!). \code{FALSE} indicates not
149 | negated. Default not is \code{FALSE}.}
150 | }
151 | \value{
152 | The constructor function return an object extending
153 |     \code{AnnotationFilter}. For the return value of the other methods see
154 |     the methods' descriptions.
155 | 
156 | \code{character(1)} that can be used as input to a \code{dplyr} 
157 |      filter.
158 | 
159 | \code{AnnotationFilter} returns an
160 |     \code{\link{AnnotationFilter}} or an \code{\link{AnnotationFilterList}}.
161 | }
162 | \description{
163 | The filters extending the base \code{AnnotationFilter} class
164 | represent a simple filtering concept for annotation resources.
165 | Each filter object is thought to filter on a single (database)
166 | table column using the provided values and the defined condition.
167 | 
168 | Filter instances created using the constructor functions (e.g.
169 | \code{GeneIdFilter}).
170 | 
171 | \code{supportedFilters()} lists all defined filters. It returns a two column
172 | \code{data.frame} with the filter class name and its default field.
173 | Packages using \code{AnnotationFilter} should implement the
174 | \code{supportedFilters} for their annotation resource object (e.g. for
175 | \code{object = "EnsDb"} in the \code{ensembldb} package) to list all
176 | supported filters for the specific resource.
177 | 
178 | \code{condition()} get the \code{condition} value for
179 |     the filter \code{object}.
180 | 
181 | \code{value()} get the \code{value} for the filter
182 |     \code{object}.
183 | 
184 | \code{field()} get the \code{field} for the filter
185 |     \code{object}.
186 | 
187 | \code{not()} get the \code{not} for the filter \code{object}.
188 | 
189 | \code{feature()} get the \code{feature} for the
190 |     \code{GRangesFilter} \code{object}.
191 | 
192 | Converts an \code{AnnotationFilter} object to a 
193 |      \code{character(1)} giving an equation that can be used as input to
194 |      a \code{dplyr} filter.
195 | 
196 | \code{AnnotationFilter} \emph{translates} a filter
197 |     expression such as \code{~ gene_id == "BCL2"} into a filter object
198 |     extending the \code{\link{AnnotationFilter}} class (in the example a
199 |     \code{\link{GeneIdFilter}} object) or an
200 |     \code{\link{AnnotationFilterList}} if the expression contains multiple
201 |     conditions (see examples below). Filter expressions have to be written
202 |     in the form \code{~ <field> <condition> <value>}, with \code{<field>}
203 |     being the default field of the filter class (use the
204 |     \code{supportedFilter} function to list all fields and filter classes),
205 |     \code{<condition>} the logical expression and \code{<value>} the value
206 |     for the filter.
207 | }
208 | \details{
209 | By default filters are only available for tables containing the
210 | field on which the filter acts (i.e. that contain a column with the
211 | name matching the value of the \code{field} slot of the
212 | object). See the vignette for a description to use filters for
213 | databases in which the database table column name differs from the
214 | default \code{field} of the filter.
215 | 
216 | Filter expressions for the \code{AnnotationFilter} class have to be
217 |     written as formulas, i.e. starting with a \code{~}.
218 | }
219 | \note{
220 | Translation of nested filter expressions using the
221 |     \code{AnnotationFilter} function is not yet supported.
222 | }
223 | \examples{
224 | ## filter by GRanges
225 | GRangesFilter(GenomicRanges::GRanges("chr10:87869000-87876000"))
226 | ## Create a SymbolFilter to filter on a gene's symbol.
227 | sf <- SymbolFilter("BCL2")
228 | sf
229 | 
230 | ## Create a GeneStartFilter to filter based on the genes' chromosomal start
231 | ## coordinates
232 | gsf <- GeneStartFilter(10000, condition = ">")
233 | gsf
234 | 
235 | filter <- SymbolFilter("ADA", "==")
236 | result <- convertFilter(filter)
237 | result
238 | supportedFilters()
239 | 
240 | ## Convert a filter expression based on a gene ID to a GeneIdFilter
241 | gnf <- AnnotationFilter(~ gene_id == "BCL2")
242 | gnf
243 | 
244 | ## Same conversion but for two gene IDs.
245 | gnf <- AnnotationFilter(~ gene_id \%in\% c("BCL2", "BCL2L11"))
246 | gnf
247 | 
248 | ## Converting an expression that combines multiple filters. As a result we
249 | ## get an AnnotationFilterList containing the corresponding filters.
250 | ## Be aware that nesting of expressions/filters does not work.
251 | flt <- AnnotationFilter(~ gene_id \%in\% c("BCL2", "BCL2L11") &
252 |                         tx_biotype == "nonsense_mediated_decay" |
253 |                         seq_name == "Y")
254 | flt
255 | 
256 | }
257 | \seealso{
258 | \code{\link{AnnotationFilterList}} for combining
259 |     \code{AnnotationFilter} objects.
260 | }
261 | 


--------------------------------------------------------------------------------
/R/AnnotationFilterList.R:
--------------------------------------------------------------------------------
  1 | #' @include AnnotationFilter.R
  2 | 
  3 | #' @rdname AnnotationFilterList
  4 | #'
  5 | #' @name AnnotationFilterList
  6 | #'
  7 | #' @title Combining annotation filters
  8 | #'
  9 | #' @aliases AnnotationFilterList-class
 10 | #'
 11 | #' @description The \code{AnnotationFilterList} allows to combine
 12 | #'     filter objects extending the \code{\link{AnnotationFilter}}
 13 | #'     class to construct more complex queries. Consecutive filter
 14 | #'     objects in the \code{AnnotationFilterList} can be combined by a
 15 | #'     logical \emph{and} (\code{&}) or \emph{or} (\code{|}). The
 16 | #'     \code{AnnotationFilterList} extends \code{list}, individual
 17 | #'     elements can thus be accessed with \code{[[}.
 18 | #'
 19 | #' @note The \code{AnnotationFilterList} does not support containing empty
 20 | #'     elements, hence all elements of \code{length == 0} are removed in
 21 | #'     the constructor function.
 22 | #'
 23 | #' @exportClass AnnotationFilterList
 24 | NULL
 25 | 
 26 | .AnnotationFilterList <- setClass(
 27 |     "AnnotationFilterList",
 28 |     contains = "list",
 29 |     slots = c(logOp = "character",
 30 |               not = "logical",
 31 |               .groupingFlag = "logical")
 32 | )
 33 | 
 34 | .LOG_OPS <- c("&", "|")
 35 | 
 36 | setValidity("AnnotationFilterList",
 37 |     function(object)
 38 | {
 39 |     txt <- character()
 40 |     filters <- .aflvalue(object)
 41 |     logOp <- .logOp(object)
 42 |     not <- .not(object)
 43 |     if (length(filters) == 0 && length(logOp)) {
 44 |         txt <- c(
 45 |             txt, "'logicOp' can not have length > 0 if the object is empty"
 46 |         )
 47 |     } else if (length(filters) != 0) {
 48 |         ## Note: we allow length of filters being 1, but then logOp has
 49 |         ## to be empty.  Check content:
 50 |         fun <- function(z)
 51 |             is(z, "AnnotationFilter") || is(z, "AnnotationFilterList")
 52 |         test <- vapply(filters, fun, logical(1))
 53 |         if (!all(test)){
 54 |             txt <- c(
 55 |                 txt, "only 'AnnotationFilter' or 'AnnotationFilterList' allowed"
 56 |             )
 57 |         }
 58 |         # Check that all elements are non-empty (issue #17). Doing this
 59 |         ## separately from the check above to ensure we get a different error
 60 |         ## message.
 61 |         if (!all(lengths(filters) > 0))
 62 |             txt <- c(txt, "Lengths of all elements have to be > 0")
 63 |         ## Check that logOp has length object -1
 64 |         if (length(logOp) != length(filters) - 1)
 65 |             txt <- c(txt, "length of 'logicOp' has to be length of the object -1")
 66 |         ## Check content of logOp.
 67 |         if (!all(logOp %in% .LOG_OPS))
 68 |             txt <- c(txt, "'logicOp' can only contain '&' and '|'")
 69 |     }
 70 | 
 71 |     if (length(txt)) txt else TRUE
 72 | })
 73 | 
 74 | ## AnnotationFilterList constructor function.
 75 | #' @rdname AnnotationFilterList
 76 | #'
 77 | #' @name AnnotationFilterList
 78 | #'
 79 | #' @param ... individual \code{\link{AnnotationFilter}} objects or a
 80 | #'     mixture of \code{AnnotationFilter} and
 81 | #'     \code{AnnotationFilterList} objects.
 82 | #'
 83 | #' @param logicOp \code{character} of length equal to the number
 84 | #'     of submitted \code{AnnotationFilter} objects - 1. Each value
 85 | #'     representing the logical operation to combine consecutive
 86 | #'     filters, i.e. the first element being the logical operation to
 87 | #'     combine the first and second \code{AnnotationFilter}, the
 88 | #'     second element being the logical operation to combine the
 89 | #'     second and third \code{AnnotationFilter} and so on. Allowed
 90 | #'     values are \code{"&"} and \code{"|"}. The function assumes a
 91 | #'     logical \emph{and} between all elements by default.
 92 | #'
 93 | #' @param logOp Deprecated; use \code{logicOp=}.
 94 | #'
 95 | #' @param .groupingFlag Flag desginated for internal use only.
 96 | #'
 97 | #' @param not \code{logical} of length one. Indicates whether the grouping
 98 | #'      of \code{AnnotationFilters} are to be negated.
 99 | #'
100 | #' @seealso \code{\link{supportedFilters}} for available
101 | #'     \code{\link{AnnotationFilter}} objects
102 | #'
103 | #' @return \code{AnnotationFilterList} returns an \code{AnnotationFilterList}.
104 | #' 
105 | #' @examples
106 | #' ## Create some AnnotationFilters
107 | #' gf <- GeneNameFilter(c("BCL2", "BCL2L11"))
108 | #' tbtf <- TxBiotypeFilter("protein_coding", condition = "!=")
109 | #'
110 | #' ## Combine both to an AnnotationFilterList. By default elements are combined
111 | #' ## using a logical "and" operator. The filter list represents thus a query
112 | #' ## like: get all features where the gene name is either ("BCL2" or "BCL2L11")
113 | #' ## and the transcript biotype is not "protein_coding".
114 | #' afl <- AnnotationFilterList(gf, tbtf)
115 | #' afl
116 | #'
117 | #' ## Access individual filters.
118 | #' afl[[1]]
119 | #'
120 | #' ## Create a filter in the form of: get all features where the gene name is
121 | #' ## either ("BCL2" or "BCL2L11") and the transcript biotype is not
122 | #' ## "protein_coding" or the seq_name is "Y". Hence, this will get all feature
123 | #' ## also found by the previous AnnotationFilterList and returns also all
124 | #' ## features on chromosome Y.
125 | #' afl <- AnnotationFilterList(gf, tbtf, SeqNameFilter("Y"),
126 | #'                             logicOp = c("&", "|"))
127 | #' afl
128 | #'
129 | #' @export
130 | AnnotationFilterList <-
131 |     function(..., logicOp = character(), logOp = character(), not = FALSE,
132 |         .groupingFlag=FALSE)
133 | {
134 |     if (!missing(logOp) && missing(logicOp)) {
135 |         logicOp <- logOp
136 |         .Deprecated(msg = "'logOp' deprecated, use 'logicOp'")
137 |     }
138 |     filters <- list(...)
139 | 
140 |     ## Remove empty nested lists and AnnotationFilterLists
141 |     removal <- lengths(filters) != 0
142 |     filters <- filters[removal]
143 | 
144 |     if (length(filters) > 1 & length(logicOp) == 0)
145 |         ## By default we're assuming & between elements.
146 |         logicOp <- rep("&", (length(filters) - 1))
147 |     .AnnotationFilterList(filters, logOp = logicOp, not = not,
148 |         .groupingFlag=.groupingFlag)
149 | }
150 | 
151 | .logOp <- function(object) object@logOp
152 | 
153 | .aflvalue <- function(object) object@.Data
154 | 
155 | .not <- function(object) object@not
156 | 
157 | #' @rdname AnnotationFilterList
158 | #'
159 | #' @description \code{value()} get a \code{list} with the
160 | #'     \code{AnnotationFilter} objects. Use \code{[[} to access
161 | #'     individual filters.
162 | #'
163 | #' @return \code{value()} returns a \code{list} with \code{AnnotationFilter}
164 | #'     objects.
165 | #' 
166 | #' @export
167 | setMethod("value", "AnnotationFilterList", .aflvalue)
168 | 
169 | #' @rdname AnnotationFilterList
170 | #'
171 | #' @aliases logicOp
172 | #'
173 | #' @description \code{logicOp()} gets the logical operators separating
174 | #'     successive \code{AnnotationFilter}.
175 | #'
176 | #' @return \code{logicOp()} returns a \code{character()} vector of
177 | #'     \dQuote{&} or \dQuote{|} symbols.
178 | #'
179 | #' @export logicOp
180 | setMethod("logicOp", "AnnotationFilterList", .logOp)
181 | 
182 | #' @rdname AnnotationFilterList
183 | #'
184 | #' @aliases not
185 | #'
186 | #' @description \code{not()} gets the logical operators separating
187 | #'     successive \code{AnnotationFilter}.
188 | #'
189 | #' @return \code{not()} returns a \code{character()} vector of
190 | #'     \dQuote{&} or \dQuote{|} symbols.
191 | #'
192 | #' @export not
193 | setMethod("not", "AnnotationFilterList", .not)
194 | 
195 | .distributeNegation <- function(object, .prior_negation=FALSE)
196 | {
197 |     if(.not(object))
198 |         .prior_negation <- ifelse(.prior_negation, FALSE, TRUE)
199 |     filters <- lapply(object, function(x){
200 |         if(is(x, "AnnotationFilterList"))
201 |             distributeNegation(x, .prior_negation)   
202 |         else{
203 |             if(.prior_negation) x@not <- ifelse(x@not, FALSE, TRUE)
204 |             x
205 |         }
206 |     })
207 |     ops <- vapply(logicOp(object), function(x) {
208 |         if(.prior_negation){
209 |             if(x == '&')
210 |                 '|'
211 |             else
212 |                 '&'
213 |         }
214 |         else
215 |             x
216 |     }
217 |         ,character(1)
218 |     )
219 |     ops <- unname(ops)
220 |     filters[['logicOp']] <- ops
221 |     do.call("AnnotationFilterList", filters)
222 | }
223 | 
224 | #' @rdname AnnotationFilterList
225 | #'
226 | #' @aliases distributeNegation
227 | #'
228 | #' @description
229 | #'
230 | #' @param .prior_negation \code{logical(1)} unused argument.
231 | #'
232 | #' @return \code{AnnotationFilterList} object with DeMorgan's law applied to
233 | #'      it such that it is equal to the original \code{AnnotationFilterList}
234 | #'      object but all \code{!}'s are distributed out of the
235 | #'      \code{AnnotationFilterList} object and to the nested
236 | #'      \code{AnnotationFilter} objects.
237 | #'
238 | #' @examples
239 | #' afl <- AnnotationFilter(~!(symbol == 'ADA' | symbol %startsWith% 'SNORD'))
240 | #' afl <- distributeNegation(afl)
241 | #' afl
242 | #' @export
243 | setMethod("distributeNegation", "AnnotationFilterList", .distributeNegation)
244 | 
245 | #' @importFrom utils head
246 | #'
247 | #' @noRd
248 | .convertFilterList <- function(object, show, granges=list(), nested=FALSE)
249 | {
250 |     filters <- value(object)
251 |     result <- character(length(filters))
252 |     for (i in seq_len(length(filters))) {
253 |         if (is(filters[[i]], "AnnotationFilterList")) {
254 |             res <- .convertFilterList(filters[[i]], show=show, granges=granges,
255 |                 nested=TRUE)
256 |             granges <- c(granges, res[[2]])
257 |             result[i] <- res[[1]]
258 |         }
259 |         else if (field(filters[[i]]) == "granges") {
260 |             if(!show)
261 |                 result[i] <- .convertFilter(filters[[i]])
262 |             else {
263 |                 nam <- paste0("GRangesFilter_", length(granges) + 1)
264 |                 granges <- c(granges, list(filters[[i]]))
265 |                 result[i] <- nam
266 |             }
267 |         }
268 |         else
269 |             result[i] <- .convertFilter(filters[[i]])
270 |     }
271 | 
272 |     result_last <- tail(result, 1)
273 |     result <- head(result, -1)
274 |     result <- c(rbind(result, logicOp(object)))
275 |     result <- c(result, result_last)
276 |     result <- paste(result, collapse=" ")
277 |     if(nested || object@not)
278 |         result <- paste0("(", result, ")")
279 |     if(object@not)
280 |         result <- paste0("!", result)
281 |         
282 |     list(result, granges)
283 | }
284 | 
285 | #' @rdname AnnotationFilterList
286 | #'
287 | #' @aliases convertFilter
288 | #'
289 | #' @description Converts an \code{AnnotationFilterList} object to a
290 | #'      \code{character(1)} giving an equation that can be used as input to
291 | #'      a \code{dplyr} filter.
292 | #'
293 | #' @return \code{character(1)} that can be used as input to a \code{dplyr}
294 | #'      filter.
295 | #'
296 | #' @examples
297 | #' afl <- AnnotationFilter(~symbol=="ADA" & tx_start > "400000")
298 | #' result <- convertFilter(afl)
299 | #' result
300 | #' @export
301 | setMethod("convertFilter", signature(object = "AnnotationFilterList",
302 |                                      db = "missing") , function(object)
303 | {
304 |     result <- .convertFilterList(object, show=FALSE)
305 |     result[[1]]
306 | })
307 | 
308 | #' @rdname AnnotationFilterList
309 | #'
310 | #' @param object An object of class \code{AnnotationFilterList}.
311 | #'
312 | #' @importFrom utils tail
313 | #' @export
314 | setMethod("show", "AnnotationFilterList", function(object)
315 | {
316 |     result <- .convertFilterList(object, show=TRUE)
317 |     granges <- result[[2]]
318 |     result <- result[[1]]
319 |     cat("AnnotationFilterList of length", length(object), "\n")
320 |     cat(result)
321 |     cat("\n")
322 |     for(i in seq_len(length(granges))) {
323 |         cat("\n")
324 |         cat("Symbol: GRangesFilter_", i, "\n", sep="")
325 |         show(granges[[1]])
326 |         cat("\n")
327 |     }
328 | })
329 | 


--------------------------------------------------------------------------------
/.svn/pristine/21/2189e6a627b4c711e766c184f50bb9cdc230e821.svn-base:
--------------------------------------------------------------------------------
  1 | #' @name AnnotationFilter
  2 | #'
  3 | #' @title Filters for annotation objects
  4 | #'
  5 | #' @aliases CdsStartFilter CdsEndFilter ExonIdFilter ExonNameFilter
  6 | #'     ExonStartFilter ExonEndFilter ExonRankFilter GeneIdFilter
  7 | #'     GenenameFilter GeneBiotypeFilter GeneStartFilter GeneEndFilter
  8 | #'     EntrezFilter SymbolFilter TxIdFilter TxNameFilter
  9 | #'     TxBiotypeFilter TxStartFilter TxEndFilter ProteinIdFilter
 10 | #'     UniprotFilter SeqNameFilter SeqStrandFilter
 11 | #'     AnnotationFilter-class CharacterFilter-class
 12 | #'     IntegerFilter-class CdsStartFilter-class CdsEndFilter-class
 13 | #'     ExonIdFilter-class ExonNameFilter-class ExonStartFilter-class
 14 | #'     ExonEndFilter-class ExonRankFilter-class GeneIdFilter-class
 15 | #'     GenenameFilter-class GeneBiotypeFilter-class
 16 | #'     GeneStartFilter-class GeneEndFilter-class EntrezFilter-class
 17 | #'     SymbolFilter-class TxIdFilter-class TxNameFilter-class
 18 | #'     TxBiotypeFilter-class TxStartFilter-class TxEndFilter-class
 19 | #'     ProteinIdFilter-class UniprotFilter-class SeqNameFilter-class
 20 | #'     SeqStrandFilter-class supportedFilters
 21 | #'     show,AnnotationFilter-method show,CharacterFilter-method
 22 | #'     show,IntegerFilter-method show,GRangesFilter-method
 23 | #'
 24 | #' @description
 25 | #'
 26 | #' The filters extending the base \code{AnnotationFilter} class
 27 | #' represent a simple filtering concept for annotation resources.
 28 | #' Each filter object is thought to filter on a single (database)
 29 | #' table column using the provided values and the defined condition.
 30 | #'
 31 | #' Filter instances created using the constructor functions (e.g.
 32 | #' \code{GeneIdFilter}).
 33 | #'
 34 | #' \code{supportedFilters()} lists all defined filters. It returns a two column
 35 | #' \code{data.frame} with the filter class name and its default field.
 36 | #' Packages using \code{AnnotationFilter} should implement the
 37 | #' \code{supportedFilters} for their annotation resource object (e.g. for
 38 | #' \code{object = "EnsDb"} in the \code{ensembldb} package) to list all
 39 | #' supported filters for the specific resource.
 40 | #'
 41 | #' @details
 42 | #'
 43 | #' By default filters are only available for tables containing the
 44 | #' field on which the filter acts (i.e. that contain a column with the
 45 | #' name matching the value of the \code{field} slot of the
 46 | #' object). See the vignette for a description to use filters for
 47 | #' databases in which the database table column name differs from the
 48 | #' default \code{field} of the filter.
 49 | #'
 50 | #' @usage
 51 | #'
 52 | #' CdsStartFilter(value, condition = "==")
 53 | #' CdsEndFilter(value, condition = "==")
 54 | #' ExonIdFilter(value, condition = "==")
 55 | #' ExonNameFilter(value, condition = "==")
 56 | #' ExonRankFilter(value, condition = "==")
 57 | #' ExonStartFilter(value, condition = "==")
 58 | #' ExonEndFilter(value, condition = "==")
 59 | #' GeneIdFilter(value, condition = "==")
 60 | #' GenenameFilter(value, condition = "==")
 61 | #' GeneBiotypeFilter(value, condition = "==")
 62 | #' GeneStartFilter(value, condition = "==")
 63 | #' GeneEndFilter(value, condition = "==")
 64 | #' EntrezFilter(value, condition = "==")
 65 | #' SymbolFilter(value, condition = "==")
 66 | #' TxIdFilter(value, condition = "==")
 67 | #' TxNameFilter(value, condition = "==")
 68 | #' TxBiotypeFilter(value, condition = "==")
 69 | #' TxStartFilter(value, condition = "==")
 70 | #' TxEndFilter(value, condition = "==")
 71 | #' ProteinIdFilter(value, condition = "==")
 72 | #' UniprotFilter(value, condition = "==")
 73 | #' SeqNameFilter(value, condition = "==")
 74 | #' SeqStrandFilter(value, condition = "==")
 75 | #'
 76 | #' @param value \code{character()}, \code{integer()}, or
 77 | #'     \code{GRanges()} value for the filter
 78 | #'
 79 | #' @param condition \code{character(1)} defining the condition to be
 80 | #'     used in the filter. For \code{IntegerFilter}, one of
 81 | #'     \code{"=="}, \code{"!="}, \code{">"}, \code{"<"}, \code{">="}
 82 | #'     or \code{"<="}. For \code{CharacterFilter}, one of \code{"=="},
 83 | #'     \code{"!="}, \code{"startsWith"}, \code{"endsWith"} or \code{"contains"}.
 84 | #'     Default condition is \code{"=="}.
 85 | #'
 86 | #' @return The constructor function return an object extending
 87 | #'     \code{AnnotationFilter}. For the return value of the other methods see
 88 | #'     the methods' descriptions.
 89 | #' 
 90 | #' @seealso \code{\link{AnnotationFilterList}} for combining
 91 | #'     \code{AnnotationFilter} objects.
 92 | NULL
 93 | 
 94 | .CONDITION <- list(
 95 |     IntegerFilter = c("==", "!=", ">", "<", ">=", "<="),
 96 |     CharacterFilter =  c("==", "!=", "startsWith", "endsWith", "contains"),
 97 |     GRangesFilter = c("any", "start", "end", "within", "equal")
 98 | )
 99 | 
100 | .FIELD <- list(
101 |     CharacterFilter = c(
102 |         "exon_id", "exon_name", "gene_id", "genename", "gene_biotype",
103 |         "entrez", "symbol", "tx_id", "tx_name", "tx_biotype",
104 |         "protein_id", "uniprot", "seq_name", "seq_strand"),
105 |     IntegerFilter = c(
106 |         "cds_start", "cds_end", "exon_start", "exon_rank", "exon_end",
107 |         "gene_start", "gene_end", "tx_start", "tx_end")
108 | )
109 | 
110 | .valid_condition <- function(condition, class) {
111 |     txt <- character()
112 | 
113 |     test0 <- length(condition) == 1L
114 |     if (!test0)
115 |         txt <- c(txt, "'condition' must be length 1")
116 | 
117 |     test1 <- test0 && (condition %in% .CONDITION[[class]])
118 |     if (!test1) {
119 |         value <- paste(sQuote(.CONDITION[[class]]), collapse=" ")
120 |         txt <- c(txt, paste0("'", condition, "' must be in ", value))
121 |     }
122 | 
123 |     if (length(txt)) txt else TRUE
124 | }
125 | 
126 | ############################################################
127 | ## AnnotationFilter
128 | ##
129 | 
130 | #' @exportClass AnnotationFilter
131 | .AnnotationFilter <- setClass(
132 |     "AnnotationFilter",
133 |     contains = "VIRTUAL",
134 |     slots = c(
135 |         field="character",
136 |         condition="character",
137 |         value="ANY"
138 |     ),
139 |     prototype=list(
140 |         condition= "=="
141 |     )
142 | )
143 | 
144 | setValidity("AnnotationFilter", function(object) {
145 |     txt <- character()
146 | 
147 |     value <- .value(object)
148 |     condition <- .condition(object)
149 |     test_len <- length(condition) == 1L
150 |     test_NA <- !any(is.na(condition))
151 | 
152 |     if (test_len && !test_NA)
153 |         txt <- c(txt, "'condition' can not be NA")
154 |     test0 <- test_len && test_NA
155 | 
156 |     test1 <- condition  %in% c("startsWith", "endsWith", "contains", ">",
157 |                                "<", ">=", "<=")
158 |     if (test0 && test1 && length(value) > 1L)
159 |         txt <- c(txt, paste0("'", condition, "' requires length 1 'value'"))
160 | 
161 |     if (any(is.na(value)))
162 |         txt <- c(txt, "'value' can not be NA")
163 | 
164 |     if (length(txt)) txt else TRUE
165 | })
166 | 
167 | .field <- function(object) object@field
168 | 
169 | .condition <- function(object) object@condition
170 | 
171 | .value <- function(object) object@value
172 | 
173 | #' @rdname AnnotationFilter
174 | #'
175 | #' @aliases condition
176 | #'
177 | #' @description \code{condition()} get the \code{condition} value for
178 | #'     the filter \code{object}.
179 | #'
180 | #' @param object An \code{AnnotationFilter} object.
181 | #' 
182 | #' @export
183 | setMethod("condition", "AnnotationFilter", .condition)
184 | 
185 | #' @rdname AnnotationFilter
186 | #'
187 | #' @aliases value
188 | #'
189 | #' @description \code{value()} get the \code{value} for the filter
190 | #'     \code{object}.
191 | #'
192 | #' @export
193 | setMethod("value", "AnnotationFilter", .value)
194 | 
195 | #' @rdname AnnotationFilter
196 | #'
197 | #' @aliases field
198 | #'
199 | #' @description \code{field()} get the \code{field} for the filter
200 | #'     \code{object}.
201 | #'
202 | #' @export
203 | setMethod("field", "AnnotationFilter", .field)
204 | 
205 | #' @importFrom methods show
206 | #'
207 | #' @export
208 | setMethod("show", "AnnotationFilter", function(object){
209 |     cat("class:", class(object),
210 |         "\ncondition:", .condition(object), "\n")
211 | })
212 | 
213 | ############################################################
214 | ## CharacterFilter, IntegerFilter
215 | ##
216 | 
217 | #' @exportClass CharacterFilter
218 | .CharacterFilter <- setClass(
219 |     "CharacterFilter",
220 |     contains = c("VIRTUAL", "AnnotationFilter"),
221 |     slots = c(value = "character"),
222 |     prototype = list(
223 |         value = character()
224 |     )
225 | )
226 | 
227 | setValidity("CharacterFilter", function(object) {
228 |     .valid_condition(.condition(object), "CharacterFilter")
229 | })
230 | 
231 | #' @importFrom methods show callNextMethod
232 | #'
233 | #' @export
234 | setMethod("show", "CharacterFilter", function(object) {
235 |     callNextMethod()
236 |     cat("value:", .value(object), "\n")
237 | })
238 | 
239 | #' @exportClass IntegerFilter
240 | .IntegerFilter <- setClass(
241 |     "IntegerFilter",
242 |     contains = c("VIRTUAL", "AnnotationFilter"),
243 |     slots = c(value = "integer"),
244 |     prototype = list(
245 |         value = integer()
246 |     )
247 | )
248 | 
249 | setValidity("IntegerFilter", function(object) {
250 |     .valid_condition(.condition(object), "IntegerFilter")
251 | })
252 | 
253 | #' @export
254 | setMethod("show", "IntegerFilter", function(object) {
255 |     callNextMethod()
256 |     cat("value:", .value(object), "\n")
257 | })
258 | 
259 | #' @rdname AnnotationFilter
260 | #'
261 | #' @importFrom GenomicRanges GRanges
262 | #'
263 | #' @importClassesFrom GenomicRanges GRanges
264 | #'
265 | #' @exportClass GRangesFilter
266 | .GRangesFilter <- setClass(
267 |     "GRangesFilter",
268 |     contains = "AnnotationFilter",
269 |     slots = c(
270 |         value = "GRanges",
271 |         feature = "character"
272 |     ),
273 |     prototype = list(
274 |         value  = GRanges(),
275 |         condition = "any",
276 |         field = "granges",
277 |         feature = "gene"
278 |     )
279 | )
280 | 
281 | setValidity("GRangesFilter", function(object) {
282 |     .valid_condition(.condition(object), "GRangesFilter")
283 | })
284 | 
285 | .feature <- function(object) object@feature
286 | 
287 | #' @rdname AnnotationFilter
288 | #'
289 | #' @param type \code{character(1)} indicating how overlaps are to be
290 | #'     filtered. See \code{findOverlaps} in the IRanges package for a
291 | #'     description of this argument.
292 | #'
293 | #' @examples
294 | #' ## filter by GRanges
295 | #' GRangesFilter(GenomicRanges::GRanges("chr10:87869000-87876000"))
296 | #' @export
297 | GRangesFilter <-
298 |     function(value, feature = "gene",
299 |              type = c("any", "start", "end", "within", "equal"))
300 | {
301 |     condition <- match.arg(type)
302 |     .GRangesFilter(
303 |         field = "granges",
304 |         value = value,
305 |         condition = condition,
306 |         feature = feature)
307 | }
308 | 
309 | .feature <- function(object) object@feature
310 | 
311 | #' @aliases feature
312 | #'
313 | #' @description \code{feature()} get the \code{feature} for the
314 | #'     \code{GRangesFilter} \code{object}.
315 | #'
316 | #' @rdname AnnotationFilter
317 | #'
318 | #' @export
319 | feature <- .feature
320 | 
321 | #' @importFrom GenomicRanges show
322 | #'
323 | #' @export
324 | setMethod("show", "GRangesFilter", function(object) {
325 |     callNextMethod()
326 |     cat("feature:", .feature(object),
327 |         "\nvalue:\n")
328 |     show(value(object))
329 | })
330 | 
331 | 
332 | ############################################################
333 | ## Create install-time classes
334 | ##
335 | 
336 | #' @rdname AnnotationFilter
337 | #'
338 | #' @name AnnotationFilter
339 | #'
340 | #' @param feature \code{character(1)} defining on what feature the
341 | #'     \code{GRangesFilter} should be applied. Choices could be
342 | #'     \code{"gene"}, \code{"tx"} or \code{"exon"}.
343 | #'
344 | #' @examples
345 | #' ## Create a SymbolFilter to filter on a gene's symbol.
346 | #' sf <- SymbolFilter("BCL2")
347 | #' sf
348 | #'
349 | #' ## Create a GeneStartFilter to filter based on the genes' chromosomal start
350 | #' ## coordinates
351 | #' gsf <- GeneStartFilter(10000, condition = ">")
352 | #' gsf
353 | #'
354 | #' @export CdsStartFilter CdsEndFilter ExonIdFilter ExonNameFilter
355 | #' @export ExonStartFilter ExonEndFilter ExonRankFilter GeneIdFilter
356 | #' @export GenenameFilter GeneBiotypeFilter GeneStartFilter
357 | #' @export GeneEndFilter EntrezFilter SymbolFilter TxIdFilter
358 | #' @export TxNameFilter TxBiotypeFilter TxStartFilter TxEndFilter
359 | #' @export ProteinIdFilter UniprotFilter SeqNameFilter SeqStrandFilter
360 | #'
361 | #' @importFrom methods new
362 | #'
363 | #' @exportClass CdsStartFilter CdsEndFilter ExonIdFilter
364 | #'     ExonNameFilter ExonStartFilter ExonEndFilter ExonRankFilter
365 | #'     GeneIdFilter GenenameFilter GeneBiotypeFilter GeneStartFilter
366 | #'     GeneEndFilter EntrezFilter SymbolFilter TxIdFilter TxNameFilter
367 | #'     TxBiotypeFilter TxStartFilter TxEndFilter ProteinIdFilter
368 | #'     UniprotFilter SeqNameFilter SeqStrandFilter
369 | NULL
370 | 
371 | .fieldToClass <- function(field) {
372 |     class <- gsub("_([[:alpha:]])", "\\U\\1", field, perl=TRUE)
373 |     class <- sub("^([[:alpha:]])", "\\U\\1", class, perl=TRUE)
374 |     paste0(class, if (length(class)) "Filter" else character(0))
375 | }
376 | 
377 | .filterFactory <- function(field, class) {
378 |     force(field); force(class)          # watch for lazy evaluation
379 |     as.value <-
380 |         if (field %in% .FIELD[["CharacterFilter"]]) {
381 |             as.character
382 |         } else {
383 |             function(x) {
384 |                 stopifnot(is.numeric(x))
385 |                 as.integer(x)
386 |             }
387 |         }
388 | 
389 |     function(value, condition = "==") {
390 |         value <- as.value(value)
391 |         condition <- as.character(condition)
392 |         new(class, field=field, condition = condition, value=value)
393 |     }
394 | }
395 | 
396 | local({
397 |     makeClass <- function(contains) {
398 |         fields <- .FIELD[[contains]]
399 |         classes <- .fieldToClass(fields)
400 |         for (i in seq_along(fields)) {
401 |             setClass(classes[[i]], contains=contains, where=topenv())
402 |             assign(
403 |                 classes[[i]],
404 |                 .filterFactory(fields[[i]], classes[[i]]),
405 |                 envir=topenv()
406 |             )
407 |         }
408 |     }
409 |     for (contains in names(.FIELD))
410 |         makeClass(contains)
411 | })
412 | 
413 | ############################################################
414 | ## Utilities - supportedFilters
415 | ##
416 | 
417 | .FILTERS_WO_FIELD <- c("GRangesFilter")
418 | 
419 | .supportedFilters <- function() {
420 |     fields <- unlist(.FIELD, use.names=FALSE)
421 |     filters <- .fieldToClass(fields)
422 |     d <- data.frame(
423 |       filter=c(filters, .FILTERS_WO_FIELD),
424 |       field=c(fields, rep(NA, length(.FILTERS_WO_FIELD)))
425 |     )
426 |     d[order(d$filter),]
427 | }
428 | 
429 | #' @rdname AnnotationFilter
430 | #'
431 | #' @examples
432 | #' supportedFilters()
433 | #' @export
434 | setMethod("supportedFilters", "missing", function(object) {
435 |     .supportedFilters()
436 | })
437 | 


--------------------------------------------------------------------------------
/.svn/pristine/4e/4e9ec76b932b7ba44f5280dec6263ea963e53920.svn-base:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Facilities for Filtering Bioconductor Annotation Resources"
  3 | output:
  4 |     BiocStyle::html_document2:
  5 |         toc_float: true
  6 | vignette: >
  7 |     %\VignetteIndexEntry{Facilities for Filtering Bioconductor Annotation resources}
  8 |     %\VignetteEngine{knitr::rmarkdown}
  9 |     %\VignetteEncoding{UTF-8}
 10 |     %\VignettePackage{AnnotationFilter}
 11 |     %\VignetteDepends{org.Hs.eg.db,BiocStyle,RSQLite}
 12 | ---
 13 | 
 14 | ```{r style, echo = FALSE, results = 'asis', message=FALSE}
 15 | BiocStyle::markdown()
 16 | ```
 17 | 
 18 | **Package**: `r Biocpkg("AnnotationFilter")`<br />
 19 | **Authors**: `r packageDescription("AnnotationFilter")[["Author"]] `<br />
 20 | **Last modified:** `r file.info("AnnotationFilter.Rmd")$mtime`<br />
 21 | **Compiled**: `r date()`
 22 | 
 23 | 
 24 | # Introduction
 25 | 
 26 | A large variety of annotation resources are available in Bioconductor. Accessing
 27 | the full content of these databases or even of single tables is computationally
 28 | expensive and in many instances not required, as users may want to extract only
 29 | sub-sets of the data e.g. genomic coordinates of a single gene. In that respect,
 30 | filtering annotation resources before data extraction has a major impact on
 31 | performance and increases the usability of such genome-scale databases.
 32 | 
 33 | The `r Biocpkg("AnnotationFilter")` package was thus developed to provide basic
 34 | filter classes to enable a common filtering framework for Bioconductor
 35 | annotation resources. `r Biocpkg("AnnotationFilter")` defines filter classes for
 36 | some of the most commonly used features in annotation databases, such as
 37 | *symbol* or *genename*. Each filter class is supposed to work on a single
 38 | database table column and to facilitate filtering on the provided values. Such
 39 | filter classes enable the user to build complex queries to retrieve specific
 40 | annotations without needing to know column or table names or the layout of the
 41 | underlying databases. While initially being developed to be used in the 
 42 | `r Biocpkg("Organism.dplyr")` and `r Biocpkg("ensembldb")` packages, the filter
 43 | classes and the related filtering concept can be easily added to other
 44 | annotation packages too.
 45 | 
 46 | 
 47 | # Filter classes
 48 | 
 49 | All filter classes extend the basic `AnnotationFilter` class and take one or
 50 | more *values* and a *condition* to allow filtering on a single database table
 51 | column. Based on the type of the input value, filter classes are divided into:
 52 | 
 53 | - `CharacterFilter`: takes a `character` value of length >= 1 and supports
 54 |   conditions `==`, `!=`, `startsWith` and `endsWith`. An example would be a
 55 |   `GeneIdFilter` that allows to filter on gene IDs.
 56 | 
 57 | - `IntegerFilter`: takes a single `integer` as input and supports the conditions
 58 |   `==`, `!=`, `>`, `<`, `>=` and `<=`. An example would be a `GeneStartFilter`
 59 |   that filters results on the (chromosomal) start coordinates of genes.
 60 | 
 61 | - `GRangesFilter`: is a special filter, as it takes a `GRanges` as `value` and
 62 |   performs the filtering on a combination of columns (i.e. start and end
 63 |   coordinate as well as sequence name and strand). To be consistent with the
 64 |   `findOverlaps` method from the `r Biocpkg("IRanges")` package, the constructor
 65 |   of the `GRangesFilter` filter takes a `type` argument to define its
 66 |   condition. Supported values are `"any"` (the default) that retrieves all
 67 |   entries overlapping the `GRanges`, `"start"` and `"end"` matching all features
 68 |   with the same start and end coordinate respectively, `"within"` that matches
 69 |   all features that are *within* the range defined by the `GRanges` and
 70 |   `"equal"` that returns features that are equal to the `GRanges`.
 71 | 
 72 | The names of the filter classes are intuitive, the first part corresponding to
 73 | the database column name with each character following a `_` being capitalized,
 74 | followed by the key word `Filter`. The name of a filter for a database table
 75 | column `gene_id` is thus called `GeneIdFilter`. The default database column for
 76 | a filter is stored in its `field` slot (accessible *via* the `field` method).
 77 | 
 78 | The `supportedFilters` method can be used to get an overview of all available
 79 | filter objects defined in `AnnotationFilter`.
 80 | 
 81 | ```{r supportedFilters}
 82 | library(AnnotationFilter)
 83 | supportedFilters()
 84 | ```
 85 | 
 86 | Note that the `AnnotationFilter` package does provides only the filter classes
 87 | but not the functionality to apply the filtering. Such functionality is
 88 | annotation resource and database layout dependent and needs thus to be
 89 | implemented in the packages providing access to annotation resources.
 90 | 
 91 | 
 92 | # Usage
 93 | 
 94 | Filters are created *via* their dedicated constructor functions, such as the
 95 | `GeneIdFilter` function for the `GeneIdFilter` class. Because of this simple and
 96 | cheap creation, filter classes are thought to be *read-only* and thus don't
 97 | provide *setter* methods to change their slot values. In addition to the
 98 | constructor functions, `AnnotationFilter` provides the functionality to
 99 | *translate* query expressions into filter classes (see further below for an
100 | example).
101 | 
102 | Below we create a `SymbolFilter` that could be used to filter an annotation
103 | resource to retrieve all entries associated with the specified symbol value(s).
104 | 
105 | ```{r symbol-filter}
106 | library(AnnotationFilter)
107 | 
108 | smbl <- SymbolFilter("BCL2")
109 | smbl
110 | ```
111 | 
112 | Such a filter is supposed to be used to retrieve all entries associated to
113 | features with a value in a database table column called *symbol* matching the
114 | filter's value `"BCL2"`.
115 | 
116 | Using the `"startsWith"` condition we could define a filter to retrieve all
117 | entries for genes with a gene name/symbol starting with the specified value
118 | (e.g. `"BCL2"` and `"BCL2L11"` for the example below.
119 | 
120 | ```{r symbol-startsWith}
121 | smbl <- SymbolFilter("BCL2", condition = "startsWith")
122 | smbl
123 | ```
124 | 
125 | In addition to the constructor functions, `AnnotationFilter` provides a
126 | functionality to create filter instances in a more natural and intuitive way by
127 | *translating* filter expressions (written as a *formula*, i.e. starting with a
128 | `~`).
129 | 
130 | ```{r convert-expression}
131 | smbl <- AnnotationFilter(~ symbol == "BCL2")
132 | smbl
133 | ```
134 | 
135 | Individual `AnnotationFilter` objects can be combined in an
136 | `AnnotationFilterList`. This class extends `list` and provides an additional
137 | `logicOp()` that defines how its individual filters are supposed to be
138 | combined. The length of `logicOp()` has to be 1 less than the number of filter
139 | objects. Each element in `logicOp()` defines how two consecutive filters should
140 | be combined. Below we create a `AnnotationFilterList` containing two filter
141 | objects to be combined with a logical *AND*.
142 | 
143 | ```{r convert-multi-expression}
144 | flt <- AnnotationFilter(~ symbol == "BCL2" &
145 |                             tx_biotype == "protein_coding")
146 | flt
147 | ```
148 | 
149 | Note that the `AnnotationFilter` function does not (yet) support translation of
150 | nested expressions, such as `(symbol == "BCL2L11" & tx_biotype ==
151 | "nonsense_mediated_decay") | (symbol == "BCL2" & tx_biotype ==
152 | "protein_coding")`. Such queries can however be build by nesting
153 | `AnnotationFilterList` classes.
154 | 
155 | ```{r nested-query}
156 | ## Define the filter query for the first pair of filters.
157 | afl1 <- AnnotationFilterList(SymbolFilter("BCL2L11"),
158 |                              TxBiotypeFilter("nonsense_mediated_decay"))
159 | ## Define the second filter pair in ( brackets should be combined.
160 | afl2 <- AnnotationFilterList(SymbolFilter("BCL2"),
161 |                              TxBiotypeFilter("protein_coding"))
162 | ## Now combine both with a logical OR
163 | afl <- AnnotationFilterList(afl1, afl2, logicOp = "|")
164 | 
165 | afl
166 | ```
167 | 
168 | This `AnnotationFilterList` would now select all entries for all transcripts of
169 | the gene *BCL2L11* with the biotype *nonsense_mediated_decay* or for all protein
170 | coding transcripts of the gene *BCL2*.
171 | 
172 | 
173 | # Using `AnnotationFilter` in other packages
174 | 
175 | The `AnnotationFilter` package does only provide filter classes, but no
176 | filtering functionality. This has to be implemented in the package using the
177 | filters. In this section we first show in a very simple example how
178 | `AnnotationFilter` classes could be used to filter a `data.frame` and
179 | subsequently explore how a simple filter framework could be implemented for a
180 | SQL based annotation resources.
181 | 
182 | Let's first define a simple `data.frame` containing the data we want to
183 | filter. Note that subsetting this `data.frame` using `AnnotationFilter` is
184 | obviously not the best solution, but it should help to understand the basic
185 | concept.
186 | 
187 | ```{r define-data.frame}
188 | ## Define a simple gene table
189 | gene <- data.frame(gene_id = 1:10,
190 |                    symbol = c(letters[1:9], "b"),
191 |                    seq_name = paste0("chr", c(1, 4, 4, 8, 1, 2, 5, 3, "X", 4)),
192 |                    stringsAsFactors = FALSE)
193 | gene
194 | ```
195 | 
196 | Next we generate a `SymbolFilter` and inspect what information we can extract
197 | from it.
198 | 
199 | ```{r simple-symbol}
200 | smbl <- SymbolFilter("b")
201 | ```
202 | 
203 | We can access the filter *condition* using the `condition` method
204 | 
205 | ```{r simple-symbol-condition}
206 | condition(smbl)
207 | ```
208 | 
209 | The value of the filter using the `value` method
210 | 
211 | ```{r simple-symbol-value}
212 | value(smbl)
213 | ```
214 | 
215 | And finally the *field* (i.e. column in the data table) using the `field`
216 | method.
217 | 
218 | ```{r simple-symbol-field}
219 | field(smbl)
220 | ```
221 | 
222 | With this information we can define a simple function that takes the data table
223 | and the filter as input and returns a `logical` with length equal to the number
224 | of rows of the table, `TRUE` for rows matching the filter.
225 | 
226 | ```{r doMatch}
227 | 
228 | doMatch <- function(x, filter) {
229 |     do.call(condition(filter), list(x[, field(filter)], value(filter)))
230 | }
231 | 
232 | ## Apply this function
233 | doMatch(gene, smbl)
234 | 
235 | ```
236 | 
237 | Note that this simple function does not support multiple filters and also not
238 | conditions `"startsWith"` or `"endsWith"`. Next we define a second function that
239 | extracts the relevant data from the data resource.
240 | 
241 | ```{r doExtract}
242 | 
243 | doExtract <- function(x, filter) {
244 |     x[doMatch(x, filter), ]
245 | }
246 | 
247 | ## Apply it on the data
248 | doExtract(gene, smbl)
249 | ```
250 | 
251 | We could even modify the `doMatch` function to enable filter expressions.
252 | 
253 | ```{r doMatch-formula}
254 | 
255 | doMatch <- function(x, filter) {
256 |     if (is(filter, "formula"))
257 |         filter <- AnnotationFilter(filter)
258 |     do.call(condition(filter), list(x[, field(filter)], value(filter)))
259 | }
260 | 
261 | doExtract(gene, ~ gene_id == '2')
262 | 
263 | ```
264 | 
265 | For such simple examples `AnnotationFilter` might be an overkill as the same
266 | could be achieved (much simpler) using standard R operations. A real case
267 | scenario in which `AnnotationFilter` becomes useful are SQL-based annotation
268 | resources. We will thus explore next how SQL resources could be filtered using
269 | `AnnotationFilter`.
270 | 
271 | We use the SQLite database from the `r Biocpkg("org.Hs.eg.db")` package that
272 | provides a variety of annotations for all human genes. Using the packages'
273 | connection to the database we inspect first what database tables are available
274 | and then select one for our simple filtering example. 
275 | 
276 | We use an `EnsDb` SQLite database used by the `r Biocpkg("ensembldb")` package
277 | and implement simple filter functions to extract specific data from one of its
278 | database tables. We thus load below the `EnsDb.Hsapiens.v75` package that
279 | provides access to human gene, transcript, exon and protein annotations. Using
280 | its connection to the database we inspect first what database tables are
281 | available and then what *fields* (i.e. columns) the *gene* table has.
282 | 
283 | ```{r orgDb, message = FALSE}
284 | ## Load the required packages
285 | library(org.Hs.eg.db)
286 | library(RSQLite)
287 | ## Get the database connection
288 | dbcon <- org.Hs.eg_dbconn()
289 | 
290 | ## What tables do we have?
291 | dbListTables(dbcon)
292 | ```
293 | 
294 | `org.Hs.eg.db` provides many different tables, one for each identifier or
295 | annotation resource. We will use the *gene_info* table and determine which
296 | *fields* (i.e. columns) the table provides.
297 | 
298 | ```{r gene_info}
299 | ## What fields are there in the gene_info table?
300 | dbListFields(dbcon, "gene_info")
301 | ```
302 | 
303 | The *gene_info* table provides the official gene symbol and the gene name. The
304 | column *symbol* matches the default `field` value of the `SymbolFilter`. For the
305 | `GenenameFilter` we would have to re-map its default field `"genename"` to the
306 | database column *gene_name*. There are many possibilities to do this, one would
307 | be to implement an own function to extract the field from the `AnnotationFilter`
308 | classes specific to the database. This function eventually renames the extracted
309 | field value to match the corresponding name of the database column name.
310 | 
311 | We next implement a simple `doExtractGene` function that retrieves data from the
312 | *gene_info* table and re-uses the `doFilter` function to extract specific
313 | data. The parameter `x` is now the database connection object.
314 | 
315 | ```{r doExtractSQL}
316 | 
317 | doExtractGene <- function(x, filter) {
318 |     gene <- dbGetQuery(x, "select * from gene_info")
319 |     doExtract(gene, filter)
320 | }
321 | 
322 | ## Extract all entries for BCL2
323 | bcl2 <- doExtractGene(dbcon, SymbolFilter("BCL2"))
324 | 
325 | bcl2
326 | ```
327 | 
328 | This works, but is not really efficient, since the function first fetches the
329 | full database table and subsets it only afterwards. A much more efficient
330 | solution is to *translate* the `AnnotationFilter` class(es) to an SQL *where*
331 | condition and hence perform the filtering on the database level. Here we have to
332 | do some small modifications, since not all condition values can be used 1:1 in
333 | SQL calls. The condition `"=="` has for example to be converted into `"="` and
334 | the `"startsWith"` into a SQL `"like"` by adding also a `"%"` wildcard to the
335 | value of the filter. We would also have to deal with filters that have a `value`
336 | of length > 1. A `SymbolFilter` with a `value` being `c("BCL2", "BCL2L11")`
337 | would for example have to be converted to a SQL call `"symbol in
338 | ('BCL2','BCL2L11')"`. Here we skip these special cases and define a simple
339 | function that translates an `AnnotationFilter` to a *where* condition to be
340 | included into the SQL call. Depending on whether the filter extends
341 | `CharacterFilter` or `IntegerFilter` the value has also to be quoted.
342 | 
343 | ```{r simpleSQL}
344 | 
345 | ## Define a simple function that covers some condition conversion
346 | conditionForSQL <- function(x) {
347 |     switch(x,
348 |            "==" = "=",
349 |            x)
350 | }
351 | 
352 | ## Define a function to translate a filter into an SQL where condition.
353 | ## Character values have to be quoted.
354 | where <- function(x) {
355 |     if (is(x, "CharacterFilter"))
356 |         value <- paste0("'", value(x), "'")
357 |     else value <- value(x)
358 |     paste0(field(x), conditionForSQL(condition(x)), value)
359 | }
360 | 
361 | ## Now "translate" a filter using this function
362 | where(SeqNameFilter("Y"))
363 | 
364 | ```
365 | 
366 | Next we implement a new function which integrates the filter into the SQL call
367 | to let the database server take care of the filtering.
368 | 
369 | ```{r doExtractGene2}
370 | 
371 | ## Define a function that 
372 | doExtractGene2 <- function(x, filter) {
373 |     if (is(filter, "formula"))
374 |         filter <- AnnotationFilter(filter)
375 |     query <- paste0("select * from gene_info where ", where(filter))
376 |     dbGetQuery(x, query)
377 | }
378 | 
379 | bcl2 <- doExtractGene2(dbcon, ~ symbol == "BCL2")
380 | bcl2
381 | 
382 | ```
383 | 
384 | Below we compare the performance of both approaches.
385 | 
386 | ```{r performance}
387 | system.time(doExtractGene(dbcon, ~ symbol == "BCL2"))
388 | 
389 | system.time(doExtractGene2(dbcon, ~ symbol == "BCL2"))
390 | 
391 | ```
392 | 
393 | Not surprisingly, the second approach is much faster.
394 | 
395 | Be aware that the examples shown here are only for illustration purposes. In a
396 | real world situation additional factors, like combinations of filters, which
397 | database tables to join, which columns to be returned etc would have to be
398 | considered too.
399 | 
400 | # Session information
401 | 
402 | ```{r si}
403 | sessionInfo()
404 | ```
405 | 


--------------------------------------------------------------------------------
/vignettes/AnnotationFilter.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Facilities for Filtering Bioconductor Annotation Resources"
  3 | output:
  4 |     BiocStyle::html_document:
  5 |         toc_float: true
  6 | vignette: >
  7 |     %\VignetteIndexEntry{Facilities for Filtering Bioconductor Annotation resources}
  8 |     %\VignetteEngine{knitr::rmarkdown}
  9 |     %\VignetteEncoding{UTF-8}
 10 |     %\VignettePackage{AnnotationFilter}
 11 |     %\VignetteDepends{org.Hs.eg.db,BiocStyle,RSQLite}
 12 | ---
 13 | 
 14 | ```{r style, echo = FALSE, results = 'asis', message=FALSE}
 15 | BiocStyle::markdown()
 16 | ```
 17 | 
 18 | **Package**: `r Biocpkg("AnnotationFilter")`<br />
 19 | **Authors**: `r packageDescription("AnnotationFilter")[["Author"]] `<br />
 20 | **Last modified:** `r file.info("AnnotationFilter.Rmd")$mtime`<br />
 21 | **Compiled**: `r date()`
 22 | 
 23 | 
 24 | # Introduction
 25 | 
 26 | A large variety of annotation resources are available in Bioconductor. Accessing
 27 | the full content of these databases or even of single tables is computationally
 28 | expensive and in many instances not required, as users may want to extract only
 29 | sub-sets of the data e.g. genomic coordinates of a single gene. In that respect,
 30 | filtering annotation resources before data extraction has a major impact on
 31 | performance and increases the usability of such genome-scale databases.
 32 | 
 33 | The `r Biocpkg("AnnotationFilter")` package was thus developed to provide basic
 34 | filter classes to enable a common filtering framework for Bioconductor
 35 | annotation resources. `r Biocpkg("AnnotationFilter")` defines filter classes for
 36 | some of the most commonly used features in annotation databases, such as
 37 | *symbol* or *genename*. Each filter class is supposed to work on a single
 38 | database table column and to facilitate filtering on the provided values. Such
 39 | filter classes enable the user to build complex queries to retrieve specific
 40 | annotations without needing to know column or table names or the layout of the
 41 | underlying databases. While initially being developed to be used in the 
 42 | `r Biocpkg("Organism.dplyr")` and `r Biocpkg("ensembldb")` packages, the filter
 43 | classes and the related filtering concept can be easily added to other
 44 | annotation packages too.
 45 | 
 46 | 
 47 | # Filter classes
 48 | 
 49 | All filter classes extend the basic `AnnotationFilter` class and take one or
 50 | more *values* and a *condition* to allow filtering on a single database table
 51 | column. Based on the type of the input value, filter classes are divided into:
 52 | 
 53 | - `CharacterFilter`: takes a `character` value of length >= 1 and supports
 54 |   conditions `==`, `!=`, `startsWith` and `endsWith`. An example would be a
 55 |   `GeneIdFilter` that allows to filter on gene IDs.
 56 | 
 57 | - `IntegerFilter`: takes a single `integer` as input and supports the conditions
 58 |   `==`, `!=`, `>`, `<`, `>=` and `<=`. An example would be a `GeneStartFilter`
 59 |   that filters results on the (chromosomal) start coordinates of genes.
 60 | 
 61 | - `DoubleFilter`: takes a single `numeric` as input and supports the conditions
 62 |   `==`, `!=`, `>`, `<`, `>=` and `<=`.
 63 | 
 64 | - `GRangesFilter`: is a special filter, as it takes a `GRanges` as `value` and
 65 |   performs the filtering on a combination of columns (i.e. start and end
 66 |   coordinate as well as sequence name and strand). To be consistent with the
 67 |   `findOverlaps` method from the `r Biocpkg("IRanges")` package, the constructor
 68 |   of the `GRangesFilter` filter takes a `type` argument to define its
 69 |   condition. Supported values are `"any"` (the default) that retrieves all
 70 |   entries overlapping the `GRanges`, `"start"` and `"end"` matching all features
 71 |   with the same start and end coordinate respectively, `"within"` that matches
 72 |   all features that are *within* the range defined by the `GRanges` and
 73 |   `"equal"` that returns features that are equal to the `GRanges`.
 74 | 
 75 | The names of the filter classes are intuitive, the first part corresponding to
 76 | the database column name with each character following a `_` being capitalized,
 77 | followed by the key word `Filter`. The name of a filter for a database table
 78 | column `gene_id` is thus called `GeneIdFilter`. The default database column for
 79 | a filter is stored in its `field` slot (accessible *via* the `field` method).
 80 | 
 81 | The `supportedFilters` method can be used to get an overview of all available
 82 | filter objects defined in `AnnotationFilter`.
 83 | 
 84 | ```{r supportedFilters}
 85 | library(AnnotationFilter)
 86 | supportedFilters()
 87 | ```
 88 | 
 89 | Note that the `AnnotationFilter` package does provides only the filter classes
 90 | but not the functionality to apply the filtering. Such functionality is
 91 | annotation resource and database layout dependent and needs thus to be
 92 | implemented in the packages providing access to annotation resources.
 93 | 
 94 | 
 95 | # Usage
 96 | 
 97 | Filters are created *via* their dedicated constructor functions, such as the
 98 | `GeneIdFilter` function for the `GeneIdFilter` class. Because of this simple and
 99 | cheap creation, filter classes are thought to be *read-only* and thus don't
100 | provide *setter* methods to change their slot values. In addition to the
101 | constructor functions, `AnnotationFilter` provides the functionality to
102 | *translate* query expressions into filter classes (see further below for an
103 | example).
104 | 
105 | Below we create a `SymbolFilter` that could be used to filter an annotation
106 | resource to retrieve all entries associated with the specified symbol value(s).
107 | 
108 | ```{r symbol-filter}
109 | library(AnnotationFilter)
110 | 
111 | smbl <- SymbolFilter("BCL2")
112 | smbl
113 | ```
114 | 
115 | Such a filter is supposed to be used to retrieve all entries associated to
116 | features with a value in a database table column called *symbol* matching the
117 | filter's value `"BCL2"`.
118 | 
119 | Using the `"startsWith"` condition we could define a filter to retrieve all
120 | entries for genes with a gene name/symbol starting with the specified value
121 | (e.g. `"BCL2"` and `"BCL2L11"` for the example below.
122 | 
123 | ```{r symbol-startsWith}
124 | smbl <- SymbolFilter("BCL2", condition = "startsWith")
125 | smbl
126 | ```
127 | 
128 | In addition to the constructor functions, `AnnotationFilter` provides a
129 | functionality to create filter instances in a more natural and intuitive way by
130 | *translating* filter expressions (written as a *formula*, i.e. starting with a
131 | `~`).
132 | 
133 | ```{r convert-expression}
134 | smbl <- AnnotationFilter(~ symbol == "BCL2")
135 | smbl
136 | ```
137 | 
138 | Individual `AnnotationFilter` objects can be combined in an
139 | `AnnotationFilterList`. This class extends `list` and provides an additional
140 | `logicOp()` that defines how its individual filters are supposed to be
141 | combined. The length of `logicOp()` has to be 1 less than the number of filter
142 | objects. Each element in `logicOp()` defines how two consecutive filters should
143 | be combined. Below we create a `AnnotationFilterList` containing two filter
144 | objects to be combined with a logical *AND*.
145 | 
146 | ```{r convert-multi-expression}
147 | flt <- AnnotationFilter(~ symbol == "BCL2" &
148 |                             tx_biotype == "protein_coding")
149 | flt
150 | ```
151 | 
152 | Note that the `AnnotationFilter` function does not (yet) support translation of
153 | nested expressions, such as `(symbol == "BCL2L11" & tx_biotype ==
154 | "nonsense_mediated_decay") | (symbol == "BCL2" & tx_biotype ==
155 | "protein_coding")`. Such queries can however be build by nesting
156 | `AnnotationFilterList` classes.
157 | 
158 | ```{r nested-query}
159 | ## Define the filter query for the first pair of filters.
160 | afl1 <- AnnotationFilterList(SymbolFilter("BCL2L11"),
161 |                              TxBiotypeFilter("nonsense_mediated_decay"))
162 | ## Define the second filter pair in ( brackets should be combined.
163 | afl2 <- AnnotationFilterList(SymbolFilter("BCL2"),
164 |                              TxBiotypeFilter("protein_coding"))
165 | ## Now combine both with a logical OR
166 | afl <- AnnotationFilterList(afl1, afl2, logicOp = "|")
167 | 
168 | afl
169 | ```
170 | 
171 | This `AnnotationFilterList` would now select all entries for all transcripts of
172 | the gene *BCL2L11* with the biotype *nonsense_mediated_decay* or for all protein
173 | coding transcripts of the gene *BCL2*.
174 | 
175 | 
176 | # Using `AnnotationFilter` in other packages
177 | 
178 | The `AnnotationFilter` package does only provide filter classes, but no
179 | filtering functionality. This has to be implemented in the package using the
180 | filters. In this section we first show in a very simple example how
181 | `AnnotationFilter` classes could be used to filter a `data.frame` and
182 | subsequently explore how a simple filter framework could be implemented for a
183 | SQL based annotation resources.
184 | 
185 | Let's first define a simple `data.frame` containing the data we want to
186 | filter. Note that subsetting this `data.frame` using `AnnotationFilter` is
187 | obviously not the best solution, but it should help to understand the basic
188 | concept.
189 | 
190 | ```{r define-data.frame}
191 | ## Define a simple gene table
192 | gene <- data.frame(gene_id = 1:10,
193 |                    symbol = c(letters[1:9], "b"),
194 |                    seq_name = paste0("chr", c(1, 4, 4, 8, 1, 2, 5, 3, "X", 4)),
195 |                    stringsAsFactors = FALSE)
196 | gene
197 | ```
198 | 
199 | Next we generate a `SymbolFilter` and inspect what information we can extract
200 | from it.
201 | 
202 | ```{r simple-symbol}
203 | smbl <- SymbolFilter("b")
204 | ```
205 | 
206 | We can access the filter *condition* using the `condition` method
207 | 
208 | ```{r simple-symbol-condition}
209 | condition(smbl)
210 | ```
211 | 
212 | The value of the filter using the `value` method
213 | 
214 | ```{r simple-symbol-value}
215 | value(smbl)
216 | ```
217 | 
218 | And finally the *field* (i.e. column in the data table) using the `field`
219 | method.
220 | 
221 | ```{r simple-symbol-field}
222 | field(smbl)
223 | ```
224 | 
225 | With this information we can define a simple function that takes the data table
226 | and the filter as input and returns a `logical` with length equal to the number
227 | of rows of the table, `TRUE` for rows matching the filter.
228 | 
229 | ```{r doMatch}
230 | 
231 | doMatch <- function(x, filter) {
232 |     do.call(condition(filter), list(x[, field(filter)], value(filter)))
233 | }
234 | 
235 | ## Apply this function
236 | doMatch(gene, smbl)
237 | 
238 | ```
239 | 
240 | Note that this simple function does not support multiple filters and also not
241 | conditions `"startsWith"` or `"endsWith"`. Next we define a second function that
242 | extracts the relevant data from the data resource.
243 | 
244 | ```{r doExtract}
245 | 
246 | doExtract <- function(x, filter) {
247 |     x[doMatch(x, filter), ]
248 | }
249 | 
250 | ## Apply it on the data
251 | doExtract(gene, smbl)
252 | ```
253 | 
254 | We could even modify the `doMatch` function to enable filter expressions.
255 | 
256 | ```{r doMatch-formula}
257 | 
258 | doMatch <- function(x, filter) {
259 |     if (is(filter, "formula"))
260 |         filter <- AnnotationFilter(filter)
261 |     do.call(condition(filter), list(x[, field(filter)], value(filter)))
262 | }
263 | 
264 | doExtract(gene, ~ gene_id == '2')
265 | 
266 | ```
267 | 
268 | For such simple examples `AnnotationFilter` might be an overkill as the same
269 | could be achieved (much simpler) using standard R operations. A real case
270 | scenario in which `AnnotationFilter` becomes useful are SQL-based annotation
271 | resources. We will thus explore next how SQL resources could be filtered using
272 | `AnnotationFilter`.
273 | 
274 | We use the SQLite database from the `r Biocpkg("org.Hs.eg.db")` package that
275 | provides a variety of annotations for all human genes. Using the packages'
276 | connection to the database we inspect first what database tables are available
277 | and then select one for our simple filtering example. 
278 | 
279 | We use an `EnsDb` SQLite database used by the `r Biocpkg("ensembldb")` package
280 | and implement simple filter functions to extract specific data from one of its
281 | database tables. We thus load below the `EnsDb.Hsapiens.v75` package that
282 | provides access to human gene, transcript, exon and protein annotations. Using
283 | its connection to the database we inspect first what database tables are
284 | available and then what *fields* (i.e. columns) the *gene* table has.
285 | 
286 | ```{r orgDb, message = FALSE}
287 | ## Load the required packages
288 | library(org.Hs.eg.db)
289 | library(RSQLite)
290 | ## Get the database connection
291 | dbcon <- org.Hs.eg_dbconn()
292 | 
293 | ## What tables do we have?
294 | dbListTables(dbcon)
295 | ```
296 | 
297 | `org.Hs.eg.db` provides many different tables, one for each identifier or
298 | annotation resource. We will use the *gene_info* table and determine which
299 | *fields* (i.e. columns) the table provides.
300 | 
301 | ```{r gene_info}
302 | ## What fields are there in the gene_info table?
303 | dbListFields(dbcon, "gene_info")
304 | ```
305 | 
306 | The *gene_info* table provides the official gene symbol and the gene name. The
307 | column *symbol* matches the default `field` value of the `SymbolFilter` as does
308 | the column *gene_name* for the *GeneNameFilter*. If the column in the database
309 | would not match the field of an `AnnotationFilter`, we would have to implement a
310 | function that maps the default field of the filter object to the database
311 | column. See the end of the section for an example.
312 | 
313 | We next implement a simple `doExtractGene` function that retrieves data from the
314 | *gene_info* table and re-uses the `doFilter` function to extract specific
315 | data. The parameter `x` is now the database connection object.
316 | 
317 | ```{r doExtractSQL}
318 | 
319 | doExtractGene <- function(x, filter) {
320 |     gene <- dbGetQuery(x, "select * from gene_info")
321 |     doExtract(gene, filter)
322 | }
323 | 
324 | ## Extract all entries for BCL2
325 | bcl2 <- doExtractGene(dbcon, SymbolFilter("BCL2"))
326 | 
327 | bcl2
328 | ```
329 | 
330 | This works, but is not really efficient, since the function first fetches the
331 | full database table and subsets it only afterwards. A much more efficient
332 | solution is to *translate* the `AnnotationFilter` class(es) to an SQL *where*
333 | condition and hence perform the filtering on the database level. Here we have to
334 | do some small modifications, since not all condition values can be used 1:1 in
335 | SQL calls. The condition `"=="` has for example to be converted into `"="` and
336 | the `"startsWith"` into a SQL `"like"` by adding also a `"%"` wildcard to the
337 | value of the filter. We would also have to deal with filters that have a `value`
338 | of length > 1. A `SymbolFilter` with a `value` being `c("BCL2", "BCL2L11")`
339 | would for example have to be converted to a SQL call `"symbol in
340 | ('BCL2','BCL2L11')"`. Here we skip these special cases and define a simple
341 | function that translates an `AnnotationFilter` to a *where* condition to be
342 | included into the SQL call. Depending on whether the filter extends
343 | `CharacterFilter` or `IntegerFilter` the value has also to be quoted.
344 | 
345 | ```{r simpleSQL}
346 | 
347 | ## Define a simple function that covers some condition conversion
348 | conditionForSQL <- function(x) {
349 |     switch(x,
350 |            "==" = "=",
351 |            x)
352 | }
353 | 
354 | ## Define a function to translate a filter into an SQL where condition.
355 | ## Character values have to be quoted.
356 | where <- function(x) {
357 |     if (is(x, "CharacterFilter"))
358 |         value <- paste0("'", value(x), "'")
359 |     else value <- value(x)
360 |     paste0(field(x), conditionForSQL(condition(x)), value)
361 | }
362 | 
363 | ## Now "translate" a filter using this function
364 | where(SeqNameFilter("Y"))
365 | 
366 | ```
367 | 
368 | Next we implement a new function which integrates the filter into the SQL call
369 | to let the database server take care of the filtering.
370 | 
371 | ```{r doExtractGene2}
372 | 
373 | ## Define a function that 
374 | doExtractGene2 <- function(x, filter) {
375 |     if (is(filter, "formula"))
376 |         filter <- AnnotationFilter(filter)
377 |     query <- paste0("select * from gene_info where ", where(filter))
378 |     dbGetQuery(x, query)
379 | }
380 | 
381 | bcl2 <- doExtractGene2(dbcon, ~ symbol == "BCL2")
382 | bcl2
383 | 
384 | ```
385 | 
386 | Below we compare the performance of both approaches.
387 | 
388 | ```{r performance}
389 | system.time(doExtractGene(dbcon, ~ symbol == "BCL2"))
390 | 
391 | system.time(doExtractGene2(dbcon, ~ symbol == "BCL2"))
392 | 
393 | ```
394 | 
395 | Not surprisingly, the second approach is much faster.
396 | 
397 | Be aware that the examples shown here are only for illustration purposes. In a
398 | real world situation additional factors, like combinations of filters, which
399 | database tables to join, which columns to be returned etc would have to be
400 | considered too.
401 | 
402 | What if the database column on which we want to filter does not match the
403 | `field` of an `AnnotatioFilter`? If for example the database column is named
404 | *hgnc_symbol* instead of *symbol* we could for example package-internally
405 | overwrite the default `field` method for `SymbolFilter` to return the correct
406 | field for the database column.
407 | 
408 | ```{r symbol-overwrite}
409 | ## Default method from AnnotationFilter:
410 | field(SymbolFilter("a"))
411 | 
412 | ## Overwrite the default method.
413 | setMethod("field", "SymbolFilter", function(object, ...) "hgnc_symbol")
414 | 
415 | ## Call to field returns now the "correct" database column
416 | field(SymbolFilter("a"))
417 | 
418 | ```
419 | 
420 | 
421 | # Session information
422 | 
423 | ```{r si}
424 | sessionInfo()
425 | ```
426 | 


--------------------------------------------------------------------------------
/R/AnnotationFilter.R:
--------------------------------------------------------------------------------
  1 | #' @name AnnotationFilter
  2 | #'
  3 | #' @title Filters for annotation objects
  4 | #'
  5 | #' @aliases CdsStartFilter CdsEndFilter ExonIdFilter ExonNameFilter
  6 | #'     ExonStartFilter ExonEndFilter ExonRankFilter GeneIdFilter
  7 | #'     GeneNameFilter GeneBiotypeFilter GeneStartFilter GeneEndFilter
  8 | #'     EntrezFilter SymbolFilter TxIdFilter TxNameFilter
  9 | #'     TxBiotypeFilter TxStartFilter TxEndFilter ProteinIdFilter
 10 | #'     UniprotFilter SeqNameFilter SeqStrandFilter
 11 | #'     AnnotationFilter-class CharacterFilter-class DoubleFilter-class
 12 | #'     IntegerFilter-class CdsStartFilter-class CdsEndFilter-class
 13 | #'     ExonIdFilter-class ExonNameFilter-class ExonStartFilter-class
 14 | #'     ExonEndFilter-class ExonRankFilter-class GeneIdFilter-class
 15 | #'     GeneNameFilter-class GeneBiotypeFilter-class
 16 | #'     GeneStartFilter-class GeneEndFilter-class EntrezFilter-class
 17 | #'     SymbolFilter-class TxIdFilter-class TxNameFilter-class
 18 | #'     TxBiotypeFilter-class TxStartFilter-class TxEndFilter-class
 19 | #'     ProteinIdFilter-class UniprotFilter-class SeqNameFilter-class
 20 | #'     SeqStrandFilter-class supportedFilters 
 21 | #'     show,AnnotationFilter-method show,CharacterFilter-method
 22 | #'     show,IntegerFilter-method show,GRangesFilter-method
 23 | #'     show,DoubleFilter-method
 24 | #'
 25 | #' @description
 26 | #'
 27 | #' The filters extending the base \code{AnnotationFilter} class
 28 | #' represent a simple filtering concept for annotation resources.
 29 | #' Each filter object is thought to filter on a single (database)
 30 | #' table column using the provided values and the defined condition.
 31 | #'
 32 | #' Filter instances created using the constructor functions (e.g.
 33 | #' \code{GeneIdFilter}).
 34 | #'
 35 | #' \code{supportedFilters()} lists all defined filters. It returns a two column
 36 | #' \code{data.frame} with the filter class name and its default field.
 37 | #' Packages using \code{AnnotationFilter} should implement the
 38 | #' \code{supportedFilters} for their annotation resource object (e.g. for
 39 | #' \code{object = "EnsDb"} in the \code{ensembldb} package) to list all
 40 | #' supported filters for the specific resource.
 41 | #'
 42 | #' @details
 43 | #'
 44 | #' By default filters are only available for tables containing the
 45 | #' field on which the filter acts (i.e. that contain a column with the
 46 | #' name matching the value of the \code{field} slot of the
 47 | #' object). See the vignette for a description to use filters for
 48 | #' databases in which the database table column name differs from the
 49 | #' default \code{field} of the filter.
 50 | #'
 51 | #' @usage
 52 | #'
 53 | #' CdsStartFilter(value, condition = "==", not = FALSE)
 54 | #' CdsEndFilter(value, condition = "==", not = FALSE)
 55 | #' ExonIdFilter(value, condition = "==", not = FALSE)
 56 | #' ExonNameFilter(value, condition = "==", not = FALSE)
 57 | #' ExonRankFilter(value, condition = "==", not = FALSE)
 58 | #' ExonStartFilter(value, condition = "==", not = FALSE)
 59 | #' ExonEndFilter(value, condition = "==", not = FALSE)
 60 | #' GeneIdFilter(value, condition = "==", not = FALSE)
 61 | #' GeneNameFilter(value, condition = "==", not = FALSE)
 62 | #' GeneBiotypeFilter(value, condition = "==", not = FALSE)
 63 | #' GeneStartFilter(value, condition = "==", not = FALSE)
 64 | #' GeneEndFilter(value, condition = "==", not = FALSE)
 65 | #' EntrezFilter(value, condition = "==", not = FALSE)
 66 | #' SymbolFilter(value, condition = "==", not = FALSE)
 67 | #' TxIdFilter(value, condition = "==", not = FALSE)
 68 | #' TxNameFilter(value, condition = "==", not = FALSE)
 69 | #' TxBiotypeFilter(value, condition = "==", not = FALSE)
 70 | #' TxStartFilter(value, condition = "==", not = FALSE)
 71 | #' TxEndFilter(value, condition = "==", not = FALSE)
 72 | #' ProteinIdFilter(value, condition = "==", not = FALSE)
 73 | #' UniprotFilter(value, condition = "==", not = FALSE)
 74 | #' SeqNameFilter(value, condition = "==", not = FALSE)
 75 | #' SeqStrandFilter(value, condition = "==", not = FALSE)
 76 | #'
 77 | #' @param value \code{character()}, \code{integer()}, or
 78 | #'     \code{GRanges()} value for the filter
 79 | #'
 80 | #' @param condition \code{character(1)} defining the condition to be
 81 | #'     used in the filter. For \code{IntegerFilter} or \code{DoubleFilter},
 82 | #'     one of \code{"=="}, \code{"!="}, \code{">"}, \code{"<"}, \code{">="}
 83 | #'     or \code{"<="}. For \code{CharacterFilter}, one of \code{"=="},
 84 | #'     \code{"!="}, \code{"startsWith"}, \code{"endsWith"} or \code{"contains"}.
 85 | #'     Default condition is \code{"=="}.
 86 | #'
 87 | #' @param not \code{logical(1)} whether the \code{AnnotationFilter} is negated.
 88 | #'     \code{TRUE} indicates is negated (!). \code{FALSE} indicates not
 89 | #'     negated. Default not is \code{FALSE}.
 90 | #'
 91 | #' @return The constructor function return an object extending
 92 | #'     \code{AnnotationFilter}. For the return value of the other methods see
 93 | #'     the methods' descriptions.
 94 | #' 
 95 | #' @seealso \code{\link{AnnotationFilterList}} for combining
 96 | #'     \code{AnnotationFilter} objects.
 97 | NULL
 98 | 
 99 | .CONDITION <- list(
100 |     IntegerFilter = c("==", "!=", ">", "<", ">=", "<="),
101 |     DoubleFilter = c("==", "!=", ">", "<", ">=", "<="),
102 |     CharacterFilter =  c("==", "!=", "startsWith", "endsWith", "contains"),
103 |     GRangesFilter = c("any", "start", "end", "within", "equal")
104 | )
105 | 
106 | .FIELD <- list(
107 |     CharacterFilter = c(
108 |         "exon_id", "exon_name", "gene_id", "gene_name", "gene_biotype",
109 |         "entrez", "symbol", "tx_id", "tx_name", "tx_biotype",
110 |         "protein_id", "uniprot", "seq_name", "seq_strand"),
111 |     IntegerFilter = c(
112 |         "cds_start", "cds_end", "exon_start", "exon_rank", "exon_end",
113 |         "gene_start", "gene_end", "tx_start", "tx_end")
114 | )
115 | 
116 | .valid_condition <- function(condition, class) {
117 |     txt <- character()
118 | 
119 |     test0 <- length(condition) == 1L
120 |     if (!test0)
121 |         txt <- c(txt, "'condition' must be length 1")
122 | 
123 |     test1 <- test0 && (condition %in% .CONDITION[[class]])
124 |     if (!test1) {
125 |         value <- paste(sQuote(.CONDITION[[class]]), collapse=" ")
126 |         txt <- c(txt, paste0("'", condition, "' must be in ", value))
127 |     }
128 | 
129 |     if (length(txt)) txt else TRUE
130 | }
131 | 
132 | ############################################################
133 | ## AnnotationFilter
134 | ##
135 | 
136 | #' @exportClass AnnotationFilter
137 | .AnnotationFilter <- setClass(
138 |     "AnnotationFilter",
139 |     contains = "VIRTUAL",
140 |     slots = c(
141 |         field="character",
142 |         condition="character",
143 |         value="ANY",
144 |         not="logical"
145 |     ),
146 |     prototype=list(
147 |         condition= "==",
148 |         not= FALSE
149 |     )
150 | )
151 | 
152 | setValidity("AnnotationFilter", function(object) {
153 |     txt <- character()
154 | 
155 |     value <- .value(object)
156 |     condition <- .condition(object)
157 |     not <- .not(object)
158 |     test_len <- length(condition) == 1L
159 |     test_NA <- !any(is.na(condition))
160 | 
161 |     if (test_len && !test_NA)
162 |         txt <- c(txt, "'condition' can not be NA")
163 |     test0 <- test_len && test_NA
164 | 
165 |     test1 <- condition  %in% c("startsWith", "endsWith", "contains", ">",
166 |                                "<", ">=", "<=")
167 |     if (test0 && test1 && length(value) > 1L)
168 |         txt <- c(txt, paste0("'", condition, "' requires length 1 'value'"))
169 | 
170 |     if(length(not) != 1)
171 |         txt <- c(txt, '"not" value must be of length 1.')
172 | 
173 |     if (any(is.na(value)))
174 |         txt <- c(txt, "'value' can not be NA")
175 | 
176 |     if (length(txt)) txt else TRUE
177 | })
178 | 
179 | .field <- function(object) object@field
180 | 
181 | .condition <- function(object) object@condition
182 | 
183 | .value <- function(object) object@value
184 | 
185 | .not <- function(object) object@not
186 | 
187 | #' @rdname AnnotationFilter
188 | #'
189 | #' @aliases condition
190 | #'
191 | #' @description \code{condition()} get the \code{condition} value for
192 | #'     the filter \code{object}.
193 | #'
194 | #' @param object An \code{AnnotationFilter} object.
195 | #' 
196 | #' @export
197 | setMethod("condition", "AnnotationFilter", .condition)
198 | 
199 | #' @rdname AnnotationFilter
200 | #'
201 | #' @aliases value
202 | #'
203 | #' @description \code{value()} get the \code{value} for the filter
204 | #'     \code{object}.
205 | #'
206 | #' @export
207 | setMethod("value", "AnnotationFilter", .value)
208 | 
209 | #' @rdname AnnotationFilter
210 | #'
211 | #' @aliases field
212 | #'
213 | #' @description \code{field()} get the \code{field} for the filter
214 | #'     \code{object}.
215 | #'
216 | #' @export
217 | setMethod("field", "AnnotationFilter", .field)
218 | 
219 | #' @rdname AnnotationFilter
220 | #'
221 | #' @description \code{not()} get the \code{not} for the filter \code{object}.
222 | #'
223 | #' @export
224 | setMethod("not", "AnnotationFilter", .not)
225 | 
226 | #' @importFrom methods show
227 | #'
228 | #' @export
229 | setMethod("show", "AnnotationFilter", function(object){
230 |     if(.not(object)) cat("NOT\n")
231 |     cat("class:", class(object),
232 |         "\ncondition:", .condition(object), "\n")
233 | })
234 | 
235 | ############################################################
236 | ## CharacterFilter, IntegerFilter
237 | ##
238 | 
239 | #' @exportClass CharacterFilter
240 | .CharacterFilter <- setClass(
241 |     "CharacterFilter",
242 |     contains = c("VIRTUAL", "AnnotationFilter"),
243 |     slots = c(value = "character"),
244 |     prototype = list(
245 |         value = character()
246 |     )
247 | )
248 | 
249 | setValidity("CharacterFilter", function(object) {
250 |     .valid_condition(.condition(object), "CharacterFilter")
251 | })
252 | 
253 | #' @importFrom methods show callNextMethod
254 | #'
255 | #' @export
256 | setMethod("show", "CharacterFilter", function(object) {
257 |     callNextMethod()
258 |     cat("value:", .value(object), "\n")
259 | })
260 | 
261 | #' @exportClass IntegerFilter
262 | .IntegerFilter <- setClass(
263 |     "IntegerFilter",
264 |     contains = c("VIRTUAL", "AnnotationFilter"),
265 |     slots = c(value = "integer"),
266 |     prototype = list(
267 |         value = integer()
268 |     )
269 | )
270 | 
271 | setValidity("IntegerFilter", function(object) {
272 |     .valid_condition(.condition(object), "IntegerFilter")
273 | })
274 | 
275 | #' @export
276 | setMethod("show", "IntegerFilter", function(object) {
277 |     callNextMethod()
278 |     cat("value:", .value(object), "\n")
279 | })
280 | 
281 | #' @exportClass DoubleFilter
282 | .DoubleFilter <- setClass(
283 |     "DoubleFilter",
284 |     contains = c("VIRTUAL", "AnnotationFilter"),
285 |     slots = c(value = "numeric"),
286 |     prototype = list(
287 |         value = double()
288 |     )
289 | )
290 | 
291 | setValidity("DoubleFilter", function(object) {
292 |     .valid_condition(.condition(object), "DoubleFilter")
293 | })
294 | 
295 | #' @export
296 | setMethod("show", "DoubleFilter", function(object) {
297 |     callNextMethod()
298 |     cat("value:", .value(object), "\n")
299 | })
300 | 
301 | #' @rdname AnnotationFilter
302 | #'
303 | #' @importFrom GenomicRanges GRanges
304 | #'
305 | #' @importClassesFrom GenomicRanges GRanges
306 | #'
307 | #' @exportClass GRangesFilter
308 | .GRangesFilter <- setClass(
309 |     "GRangesFilter",
310 |     contains = "AnnotationFilter",
311 |     slots = c(
312 |         value = "GRanges",
313 |         feature = "character"
314 |     ),
315 |     prototype = list(
316 |         value  = GRanges(),
317 |         condition = "any",
318 |         field = "granges",
319 |         feature = "gene"
320 |     )
321 | )
322 | 
323 | setValidity("GRangesFilter", function(object) {
324 |     .valid_condition(.condition(object), "GRangesFilter")
325 | })
326 | 
327 | .feature <- function(object) object@feature
328 | 
329 | #' @rdname AnnotationFilter
330 | #'
331 | #' @param type \code{character(1)} indicating how overlaps are to be
332 | #'     filtered. See \code{findOverlaps} in the IRanges package for a
333 | #'     description of this argument.
334 | #'
335 | #' @examples
336 | #' ## filter by GRanges
337 | #' GRangesFilter(GenomicRanges::GRanges("chr10:87869000-87876000"))
338 | #' @export
339 | GRangesFilter <-
340 |     function(value, feature = "gene",
341 |              type = c("any", "start", "end", "within", "equal"))
342 | {
343 |     condition <- match.arg(type)
344 |     .GRangesFilter(
345 |         field = "granges",
346 |         value = value,
347 |         condition = condition,
348 |         feature = feature)
349 | }
350 | 
351 | .feature <- function(object) object@feature
352 | 
353 | #' @aliases feature
354 | #'
355 | #' @description \code{feature()} get the \code{feature} for the
356 | #'     \code{GRangesFilter} \code{object}.
357 | #'
358 | #' @rdname AnnotationFilter
359 | #'
360 | #' @export
361 | feature <- .feature
362 | 
363 | #' @importFrom GenomicRanges show
364 | #'
365 | #' @export
366 | setMethod("show", "GRangesFilter", function(object) {
367 |     callNextMethod()
368 |     cat("feature:", .feature(object),
369 |         "\nvalue:\n")
370 |     show(value(object))
371 | })
372 | 
373 | 
374 | ############################################################
375 | ## Create install-time classes
376 | ##
377 | 
378 | #' @rdname AnnotationFilter
379 | #'
380 | #' @name AnnotationFilter
381 | #'
382 | #' @param feature \code{character(1)} defining on what feature the
383 | #'     \code{GRangesFilter} should be applied. Choices could be
384 | #'     \code{"gene"}, \code{"tx"} or \code{"exon"}.
385 | #'
386 | #' @examples
387 | #' ## Create a SymbolFilter to filter on a gene's symbol.
388 | #' sf <- SymbolFilter("BCL2")
389 | #' sf
390 | #'
391 | #' ## Create a GeneStartFilter to filter based on the genes' chromosomal start
392 | #' ## coordinates
393 | #' gsf <- GeneStartFilter(10000, condition = ">")
394 | #' gsf
395 | #'
396 | #' @export CdsStartFilter CdsEndFilter ExonIdFilter ExonNameFilter
397 | #' @export ExonStartFilter ExonEndFilter ExonRankFilter GeneIdFilter
398 | #' @export GeneNameFilter GeneBiotypeFilter GeneStartFilter
399 | #' @export GeneEndFilter EntrezFilter SymbolFilter TxIdFilter
400 | #' @export TxNameFilter TxBiotypeFilter TxStartFilter TxEndFilter
401 | #' @export ProteinIdFilter UniprotFilter SeqNameFilter SeqStrandFilter
402 | #' 
403 | #' @importFrom methods new
404 | #'
405 | #' @exportClass CdsStartFilter CdsEndFilter ExonIdFilter
406 | #'     ExonNameFilter ExonStartFilter ExonEndFilter ExonRankFilter
407 | #'     GeneIdFilter GeneNameFilter GeneBiotypeFilter GeneStartFilter
408 | #'     GeneEndFilter EntrezFilter SymbolFilter TxIdFilter TxNameFilter
409 | #'     TxBiotypeFilter TxStartFilter TxEndFilter ProteinIdFilter
410 | #'     UniprotFilter SeqNameFilter SeqStrandFilter
411 | NULL
412 | 
413 | .fieldToClass <- function(field) {
414 |     class <- gsub("_([[:alpha:]])", "\\U\\1", field, perl=TRUE)
415 |     class <- sub("^([[:alpha:]])", "\\U\\1", class, perl=TRUE)
416 |     paste0(class, if (length(class)) "Filter" else character(0))
417 | }
418 | 
419 | .filterFactory <- function(field, class) {
420 |     force(field); force(class)          # watch for lazy evaluation
421 |     as.value <-
422 |         if (field %in% .FIELD[["CharacterFilter"]]) {
423 |             function(x) {
424 | #               if(!is.character(x))
425 | #                  stop("Input to a ", field,
426 | #                       "filter must be a character vector.")
427 |                 as.character(x)
428 |             }
429 |         } else {
430 |             function(x) {
431 |                 if(!is.numeric(x))
432 |                     stop("Input to a ", field,
433 |                          "filter must be a numeric vector.")
434 |                 as.integer(x)
435 |             }
436 |         }
437 | 
438 |     function(value, condition = "==", not = FALSE) {
439 |         value <- as.value(value)
440 |         condition <- as.character(condition)
441 |         not <- as.logical(not)
442 |         new(class, field=field, condition = condition, value=value, not=not)
443 |     }
444 | }
445 | 
446 | local({
447 |     makeClass <- function(contains) {
448 |         fields <- .FIELD[[contains]]
449 |         classes <- .fieldToClass(fields)
450 |         for (i in seq_along(fields)) {
451 |             setClass(classes[[i]], contains=contains, where=topenv())
452 |             assign(
453 |                 classes[[i]],
454 |                 .filterFactory(fields[[i]], classes[[i]]),
455 |                 envir=topenv()
456 |             )
457 |         }
458 |     }
459 |     for (contains in names(.FIELD))
460 |         makeClass(contains)
461 | })
462 | 
463 | ############################################################
464 | ## Utilities 
465 | ##
466 | 
467 | .convertFilter <- function(object) {
468 |     field <- field(object)
469 |     if (field == "granges")
470 |         stop("GRangesFilter cannot be converted using convertFilter().")
471 |     value <- value(object)
472 |     condition <- condition(object)
473 |     not <- not(object)
474 | 
475 |     op <- switch(
476 |         condition,
477 |         "==" = if (length(value) == 1) "==" else "%in%",
478 |         "!=" = if (length(value) == 1) "!=" else "%in%",
479 |         "startsWith" = "%like%",
480 |         "endsWith" = "%like%",
481 |         "contains" = "%like%"
482 |     )
483 | 
484 |     not_val <- ifelse(not, '!', '')
485 | 
486 |     if (condition %in% c("==", "!="))
487 |         value <- paste0("'", value, "'", collapse=", ")
488 | 
489 |     if (!is.null(op) && op %in% c("==", "!="))
490 |         sprintf("%s%s %s %s", not_val, field, op, value)
491 |     else if ((condition == "==") && op == "%in%")
492 |         sprintf("%s%s %s c(%s)", not_val, field, op, value)
493 |     else if ((condition == "!=") && op == "%in%")
494 |         if(not) sprintf("%s %s c(%s)", field, op, value)
495 |         else sprintf("!%s%s %s c(%s)", not_val, field, op, value)
496 |     else if (condition == "startsWith")
497 |         sprintf("%s%s %s '%s%%'", not_val, field, op, value)
498 |     else if (condition == "endsWith")
499 |         sprintf("%s%s %s '%%%s'", not_val, field, op, value)
500 |     else if (condition == "contains")
501 |         sprintf("%s%s %s '%s'", not_val, field, op, value)
502 |     else if (condition %in% c(">", "<", ">=", "<=")) {
503 |         sprintf("%s%s %s %s", not_val, field, condition, as.integer(value))
504 |     }
505 | }
506 | 
507 | #' @rdname AnnotationFilter
508 | #'
509 | #' @description Converts an \code{AnnotationFilter} object to a 
510 | #'      \code{character(1)} giving an equation that can be used as input to
511 | #'      a \code{dplyr} filter.
512 | #'
513 | #' @return \code{character(1)} that can be used as input to a \code{dplyr} 
514 | #'      filter.
515 | #'
516 | #' @examples
517 | #' filter <- SymbolFilter("ADA", "==")
518 | #' result <- convertFilter(filter)
519 | #' result
520 | #' @export
521 | setMethod("convertFilter", signature(object = "AnnotationFilter",
522 |                                      db = "missing"), .convertFilter)
523 | 
524 | .FILTERS_WO_FIELD <- c("GRangesFilter")
525 | 
526 | .supportedFilters <- function() {
527 |     fields <- unlist(.FIELD, use.names=FALSE)
528 |     filters <- .fieldToClass(fields)
529 |     d <- data.frame(
530 |       filter=c(filters, .FILTERS_WO_FIELD),
531 |       field=c(fields, "granges") #rep(NA, length(.FILTERS_WO_FIELD)))
532 |     )
533 |     d[order(d$filter),]
534 | }
535 | 
536 | #' @rdname AnnotationFilter
537 | #'
538 | #' @examples
539 | #' supportedFilters()
540 | #' @export
541 | setMethod("supportedFilters", "missing", function(object) {
542 |     .supportedFilters()
543 | })
544 | 
545 | #' @rdname GenenameFilter
546 | #'
547 | #' @title DEPRECATED Gene name filter
548 | #'
549 | #' @aliases GenenameFilter-class
550 | #' 
551 | #' @description
552 | #'
553 | #' The `GenenameFilter` class and functions are deprecated. Please use the
554 | #' [GeneNameFilter()] instead.
555 | #'
556 | #' @param value `character()` value for the filter
557 | #'
558 | #' @param condition `character(1)` defining the condition to be
559 | #'     used in the filter. One of `"=="`, `"!="`, `"startsWith"`, `"endsWith"`
560 | #'     or `"contains"`. Default condition is `"=="`.
561 | #'
562 | #' @param not `logical(1)` whether the `AnnotationFilter` is negated.
563 | #'     `TRUE` indicates is negated (!). `FALSE` indicates not
564 | #'     negated. Default not is `FALSE`.
565 | #'
566 | #' @return The constructor function return a `GenenameFilter`.
567 | #'
568 | #' @md
569 | #' 
570 | #' @export
571 | #' 
572 | #' @exportClass GenenameFilter
573 | GenenameFilter <- function(value, condition = "==", not = FALSE) {
574 |     .Deprecated("GeneNameFilter")
575 |     new("GenenameFilter", value = value, condition = condition, not = not)
576 | }
577 | 
578 | .GenenameFilter <- setClass(
579 |     "GenenameFilter",
580 |     contains = "CharacterFilter",
581 |     prototype = list(
582 |         field = "genename"
583 |     )
584 | )
585 | 


--------------------------------------------------------------------------------