├── .svn
├── format
├── wc.db-journal
├── entries
├── wc.db
└── pristine
│ ├── 18
│ └── 18d19e0f6727f50e5ab77d34454fab81e82d049a.svn-base
│ ├── 21
│ └── 2189e6a627b4c711e766c184f50bb9cdc230e821.svn-base
│ ├── 33
│ └── 331922c1408ddd46de74575d157bb2d47ccfa3e9.svn-base
│ ├── 40
│ └── 40904d832591d5491cf5d562b1708a3f20d517f6.svn-base
│ ├── 50
│ └── 50170fd723a599624e474c619511a6ac10f2d072.svn-base
│ ├── 65
│ ├── 65e9e8d47fee95ed321413de21d8f8d13f873b17.svn-base
│ └── 6537ab78a07d200e9fe894802b6b5f17bdd38fd9.svn-base
│ ├── 73
│ └── 73eb11efc6635fdbac1cdc8ec4032d0dd1a2cbe6.svn-base
│ ├── 81
│ └── 81fa7ab77bea17ab59d67e253f098ea223b2e1d3.svn-base
│ ├── 91
│ └── 9145858a39a39a3d59b0aa20ef1971ab302d2f47.svn-base
│ ├── ff
│ └── ff35c68c1ccb291931f7f7bc302993d96557fe78.svn-base
│ ├── fc
│ └── fc49a2303f1b357ae6a059042d8d0c15fabb68fe.svn-base
│ ├── e0
│ └── e07c222eeebfc149377f972722b7aa62b4fc86cc.svn-base
│ ├── f1
│ └── f1748b5922e3eb9abfa76932622f609be9ff4d0c.svn-base
│ ├── 06
│ └── 065e68d6b11e4bf90da04ffd904757e8ce3c422b.svn-base
│ ├── a1
│ └── a129614aff000a6de02c214a739f8867a6f01752.svn-base
│ └── 4e
│ └── 4e9ec76b932b7ba44f5280dec6263ea963e53920.svn-base
├── .gitignore
├── tests
├── testthat.R
└── testthat
│ ├── test_AnnotationFilterList.R
│ ├── test_translate-utils.R
│ └── test_AnnotationFilter.R
├── NOTES.md
├── README.md
├── NEWS
├── R
├── AllGenerics.R
├── translate-utils.R
├── AnnotationFilterList.R
└── AnnotationFilter.R
├── man
├── GenenameFilter.Rd
├── AnnotationFilterList.Rd
└── AnnotationFilter.Rd
├── DESCRIPTION
├── NAMESPACE
└── vignettes
└── AnnotationFilter.Rmd
/.svn/format:
--------------------------------------------------------------------------------
1 | 12
2 |
--------------------------------------------------------------------------------
/.svn/wc.db-journal:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.svn/entries:
--------------------------------------------------------------------------------
1 | 12
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rhistory
2 | .RData
3 | .svn*
4 |
--------------------------------------------------------------------------------
/.svn/wc.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/AnnotationFilter/devel/.svn/wc.db
--------------------------------------------------------------------------------
/.svn/pristine/ff/ff35c68c1ccb291931f7f7bc302993d96557fe78.svn-base:
--------------------------------------------------------------------------------
1 | .Rhistory
2 | .RData
3 |
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(AnnotationFilter)
3 |
4 | test_check("AnnotationFilter")
5 |
--------------------------------------------------------------------------------
/.svn/pristine/73/73eb11efc6635fdbac1cdc8ec4032d0dd1a2cbe6.svn-base:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(AnnotationFilter)
3 |
4 | test_check("AnnotationFilter")
5 |
--------------------------------------------------------------------------------
/NOTES.md:
--------------------------------------------------------------------------------
1 | # Development guidelines
2 |
3 | - roxygen2 documentation
4 | - testthat unit tests
5 | - file name correspondence between code `R/foo.R`, tests
6 | `tests/testthat/test_foo.R`, and documentation `man/foo.Rd`.
7 | - version bump on master commit
8 | - commits to master pass R CMD build && R CMD check
9 |
--------------------------------------------------------------------------------
/.svn/pristine/50/50170fd723a599624e474c619511a6ac10f2d072.svn-base:
--------------------------------------------------------------------------------
1 | # Development guidelines
2 |
3 | - roxygen2 documentation
4 | - testthat unit tests
5 | - file name correspondence between code `R/foo.R`, tests
6 | `tests/testthat/test_foo.R`, and documentation `man/foo.Rd`.
7 | - version bump on master commit
8 | - commits to master pass R CMD build && R CMD check
9 |
--------------------------------------------------------------------------------
/.svn/pristine/fc/fc49a2303f1b357ae6a059042d8d0c15fabb68fe.svn-base:
--------------------------------------------------------------------------------
1 | CHANGES IN VERSION 1.1.2
2 | ------------------------
3 |
4 | NEW FEATURES
5 |
6 | o supportFilters returns a data.frame with filter class name and field.
7 |
8 |
9 | CHANGES IN VERSION 0.99.5
10 | --------------------------
11 |
12 | NEW FEATURES
13 |
14 | o Add convertFilterExpressionQuoted function.
15 | o Add field method.
16 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [
](https://bioconductor.org/)
2 |
3 | **AnnotationFilter** is an R/Bioconductor package that provides facilities for filtering Bioconductor annotation resources.
4 |
5 | See https://bioconductor.org/packages/AnnotationFilter for more information including how to install the release version of the package (please refrain from installing directly from GitHub).
6 |
7 |
--------------------------------------------------------------------------------
/.svn/pristine/91/9145858a39a39a3d59b0aa20ef1971ab302d2f47.svn-base:
--------------------------------------------------------------------------------
1 | ## Generic methods.
2 | setGeneric("condition", function(object, ...) standardGeneric("condition"))
3 |
4 | setGeneric("field", function(object, ...) standardGeneric("field"))
5 |
6 | setGeneric("value", function(object, ...) standardGeneric("value"))
7 |
8 | setGeneric("logicOp", function(object, ...) standardGeneric("logicOp"))
9 |
10 | setGeneric("supportedFilters", function(object, ...)
11 | standardGeneric("supportedFilters"))
12 |
--------------------------------------------------------------------------------
/NEWS:
--------------------------------------------------------------------------------
1 | CHANGES IN VERSION 1.5.2
2 | ------------------------
3 |
4 | USER VISIBLE CHANGES
5 |
6 | o Rename GenenameFilter into GeneNameFilter and deprecate GenenameFilter
7 | (issue #22).
8 |
9 |
10 | CHANGES IN VERSION 1.3.1
11 | ------------------------
12 |
13 | NEW FEATURES
14 |
15 | o Add DoubleFilter
16 |
17 |
18 | CHANGES IN VERSION 1.1.2
19 | ------------------------
20 |
21 | NEW FEATURES
22 |
23 | o supportFilters returns a data.frame with filter class name and field.
24 |
25 |
26 | CHANGES IN VERSION 0.99.5
27 | --------------------------
28 |
29 | NEW FEATURES
30 |
31 | o Add convertFilterExpressionQuoted function.
32 | o Add field method.
33 |
--------------------------------------------------------------------------------
/R/AllGenerics.R:
--------------------------------------------------------------------------------
1 | ## Generic methods.
2 | setGeneric("condition", function(object, ...) standardGeneric("condition"))
3 |
4 | setGeneric("field", function(object, ...) standardGeneric("field"))
5 |
6 | setGeneric("value", function(object, ...) standardGeneric("value"))
7 |
8 | setGeneric("logicOp", function(object, ...) standardGeneric("logicOp"))
9 |
10 | setGeneric("not", function(object, ...) standardGeneric("not"))
11 |
12 | setGeneric("simplify", function(object, ...) standardGeneric("simplify"))
13 |
14 | setGeneric("convertFilter", function(object, db, ...)
15 | standardGeneric("convertFilter"))
16 |
17 | setGeneric("distributeNegation", function(object, ...)
18 | standardGeneric("distributeNegation"))
19 |
20 | setGeneric("supportedFilters", function(object, ...)
21 | standardGeneric("supportedFilters"))
22 |
--------------------------------------------------------------------------------
/man/GenenameFilter.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/AnnotationFilter.R
3 | \name{GenenameFilter}
4 | \alias{GenenameFilter}
5 | \alias{GenenameFilter-class}
6 | \title{DEPRECATED Gene name filter}
7 | \usage{
8 | GenenameFilter(value, condition = "==", not = FALSE)
9 | }
10 | \arguments{
11 | \item{value}{\code{character()} value for the filter}
12 |
13 | \item{condition}{\code{character(1)} defining the condition to be
14 | used in the filter. One of \code{"=="}, \code{"!="}, \code{"startsWith"}, \code{"endsWith"}
15 | or \code{"contains"}. Default condition is \code{"=="}.}
16 |
17 | \item{not}{\code{logical(1)} whether the \code{AnnotationFilter} is negated.
18 | \code{TRUE} indicates is negated (!). \code{FALSE} indicates not
19 | negated. Default not is \code{FALSE}.}
20 | }
21 | \value{
22 | The constructor function return a \code{GenenameFilter}.
23 | }
24 | \description{
25 | The \code{GenenameFilter} class and functions are deprecated. Please use the
26 | \code{\link[=GeneNameFilter]{GeneNameFilter()}} instead.
27 | }
28 |
--------------------------------------------------------------------------------
/.svn/pristine/40/40904d832591d5491cf5d562b1708a3f20d517f6.svn-base:
--------------------------------------------------------------------------------
1 | Package: AnnotationFilter
2 | Title: Facilities for Filtering Bioconductor Annotation Resources
3 | Version: 0.99.8
4 | Authors@R: c( person("Martin", "Morgan", email =
5 | "martin.morgan@roswellpark.org", role = "aut"),
6 | person("Johannes", "Rainer", email =
7 | "johannes.rainer@eurac.edu", role = "aut"),
8 | person("Bioconductor", "Maintainer",
9 | email="maintainer@bioconductor.org", role = "cre"))
10 | URL: https://github.com/Bioconductor/AnnotationFilter
11 | BugReports: https://github.com/Bioconductor/AnnotationFilter/issues
12 | Description: This package provides class and other infrastructure to
13 | implement filters for manipulating Bioconductor annotation
14 | resources. The filters will be used by ensembldb,
15 | Organism.dplyr, and other packages.
16 | Depends: R (>= 3.4.0)
17 | Imports: utils, methods, GenomicRanges, lazyeval
18 | Suggests: BiocStyle, knitr, testthat, RSQLite, org.Hs.eg.db
19 | VignetteBuilder: knitr
20 | License: Artistic-2.0
21 | biocViews: Annotation, Infrastructure, Software
22 | Encoding: UTF-8
23 | LazyData: true
24 | RoxygenNote: 6.0.1
25 | Collate: 'AllGenerics.R' 'AnnotationFilter.R' 'AnnotationFilterList.R'
26 | 'translate-utils.R'
27 |
--------------------------------------------------------------------------------
/.svn/pristine/e0/e07c222eeebfc149377f972722b7aa62b4fc86cc.svn-base:
--------------------------------------------------------------------------------
1 | Package: AnnotationFilter
2 | Title: Facilities for Filtering Bioconductor Annotation Resources
3 | Version: 1.1.3
4 | Authors@R: c(
5 | person("Martin", "Morgan", email = "martin.morgan@roswellpark.org",
6 | role = "aut"),
7 | person("Johannes", "Rainer", email = "johannes.rainer@eurac.edu",
8 | role = "aut"),
9 | person("Joachim", "Bargsten", email = "jw@bargsten.org", role = "ctb"),
10 | person("Bioconductor", "Maintainer", email="maintainer@bioconductor.org",
11 | role = "cre"))
12 | URL: https://github.com/Bioconductor/AnnotationFilter
13 | BugReports: https://github.com/Bioconductor/AnnotationFilter/issues
14 | Description: This package provides class and other infrastructure to
15 | implement filters for manipulating Bioconductor annotation
16 | resources. The filters will be used by ensembldb, Organism.dplyr,
17 | and other packages.
18 | Depends:
19 | R (>= 3.4.0)
20 | Imports:
21 | utils,
22 | methods,
23 | GenomicRanges,
24 | lazyeval
25 | Suggests:
26 | BiocStyle,
27 | knitr,
28 | testthat,
29 | RSQLite,
30 | org.Hs.eg.db
31 | VignetteBuilder: knitr
32 | License: Artistic-2.0
33 | biocViews: Annotation, Infrastructure, Software
34 | Encoding: UTF-8
35 | LazyData: true
36 | RoxygenNote: 6.0.1
37 | Collate:
38 | 'AllGenerics.R'
39 | 'AnnotationFilter.R'
40 | 'AnnotationFilterList.R'
41 | 'translate-utils.R'
42 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: AnnotationFilter
2 | Title: Facilities for Filtering Bioconductor Annotation Resources
3 | Version: 1.35.0
4 | Authors@R: c(
5 | person("Martin", "Morgan", email = "martin.morgan@roswellpark.org",
6 | role = "aut"),
7 | person("Johannes", "Rainer", email = "johannes.rainer@eurac.edu",
8 | role = "aut"),
9 | person("Joachim", "Bargsten", email = "jw@bargsten.org", role = "ctb"),
10 | person("Daniel", "Van Twisk", email = "daniel.vantwisk@roswellpark.org",
11 | role = "ctb"),
12 | person("Bioconductor Package", "Maintainer",
13 | email="maintainer@bioconductor.org",
14 | role = "cre"))
15 | URL: https://github.com/Bioconductor/AnnotationFilter
16 | BugReports: https://github.com/Bioconductor/AnnotationFilter/issues
17 | Description: This package provides class and other infrastructure to
18 | implement filters for manipulating Bioconductor annotation
19 | resources. The filters will be used by ensembldb, Organism.dplyr,
20 | and other packages.
21 | Depends:
22 | R (>= 3.4.0)
23 | Imports:
24 | utils,
25 | methods,
26 | GenomicRanges,
27 | lazyeval
28 | Suggests:
29 | BiocStyle,
30 | knitr,
31 | testthat,
32 | RSQLite,
33 | org.Hs.eg.db,
34 | rmarkdown
35 | VignetteBuilder: knitr
36 | License: Artistic-2.0
37 | biocViews: Annotation, Infrastructure, Software
38 | Encoding: UTF-8
39 | LazyData: true
40 | RoxygenNote: 6.0.1
41 | Collate:
42 | 'AllGenerics.R'
43 | 'AnnotationFilter.R'
44 | 'AnnotationFilterList.R'
45 | 'translate-utils.R'
46 |
--------------------------------------------------------------------------------
/.svn/pristine/18/18d19e0f6727f50e5ab77d34454fab81e82d049a.svn-base:
--------------------------------------------------------------------------------
1 | context("AnnotationFilterList")
2 |
3 | test_that("AnnotationFilterList() works", {
4 | f1 <- GeneIdFilter("somegene")
5 | f2 <- SeqNameFilter("chr3")
6 | f3 <- GeneBiotypeFilter("protein_coding", "!=")
7 |
8 | fL <- AnnotationFilter:::AnnotationFilterList(f1, f2)
9 | expect_true(length(fL) == 2)
10 | expect_equal(fL[[1]], f1)
11 | expect_equal(fL[[2]], f2)
12 | expect_true(all(logicOp(fL) == "&"))
13 |
14 | fL <- AnnotationFilter:::AnnotationFilterList(f1, f2, f3,
15 | logicOp = c("&", "|"))
16 | expect_true(length(fL) == 3)
17 | expect_equal(fL[[1]], f1)
18 | expect_equal(fL[[2]], f2)
19 | expect_equal(fL[[3]], f3)
20 | expect_equal(logicOp(fL), c("&", "|"))
21 |
22 | ## A AnnotationFilterList with and AnnotationFilterList
23 | fL <- AnnotationFilter:::AnnotationFilterList(f1, f2, logicOp = "|")
24 | fL2 <- AnnotationFilter:::AnnotationFilterList(f3, fL, logicOp = "&")
25 | expect_true(length(fL) == 2)
26 | expect_true(length(fL2) == 2)
27 | expect_true(is(value(fL2)[[1]], "GeneBiotypeFilter"))
28 | expect_true(is(value(fL2)[[2]], "AnnotationFilterList"))
29 | expect_equal(value(fL2)[[2]], fL)
30 | expect_equal(fL2[[2]], fL)
31 | expect_equal(logicOp(fL2), "&")
32 | expect_equal(logicOp(fL2[[2]]), "|")
33 | })
34 |
35 | test_that("empty elements in AnnotationFilterList", {
36 | ## empty elements should be removed from the AnnotationFilterList.
37 | empty_afl <- AnnotationFilterList()
38 | afl <- AnnotationFilterList(empty_afl)
39 | expect_true(length(afl) == 0)
40 | afl <- AnnotationFilterList(GeneIdFilter(4), empty_afl)
41 | expect_true(length(afl) == 1)
42 | afl <- AnnotationFilterList(GeneIdFilter(4),
43 | AnnotationFilter(~ gene_id == 3 | seq_name == 4),
44 | empty_afl)
45 | expect_true(length(afl) == 2)
46 | ## Check validate.
47 | afl@.Data <- c(afl@.Data, list(empty_afl))
48 | ## Fix also the logOp.
49 | afl@logOp <- c(afl@logOp, "|")
50 | expect_error(validObject(afl))
51 | })
52 |
--------------------------------------------------------------------------------
/.svn/pristine/f1/f1748b5922e3eb9abfa76932622f609be9ff4d0c.svn-base:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | export(AnnotationFilter)
4 | export(AnnotationFilterList)
5 | export(CdsEndFilter)
6 | export(CdsStartFilter)
7 | export(EntrezFilter)
8 | export(ExonEndFilter)
9 | export(ExonIdFilter)
10 | export(ExonNameFilter)
11 | export(ExonRankFilter)
12 | export(ExonStartFilter)
13 | export(GRangesFilter)
14 | export(GeneBiotypeFilter)
15 | export(GeneEndFilter)
16 | export(GeneIdFilter)
17 | export(GeneStartFilter)
18 | export(GenenameFilter)
19 | export(ProteinIdFilter)
20 | export(SeqNameFilter)
21 | export(SeqStrandFilter)
22 | export(SymbolFilter)
23 | export(TxBiotypeFilter)
24 | export(TxEndFilter)
25 | export(TxIdFilter)
26 | export(TxNameFilter)
27 | export(TxStartFilter)
28 | export(UniprotFilter)
29 | export(feature)
30 | export(logicOp)
31 | exportClasses(AnnotationFilter)
32 | exportClasses(AnnotationFilterList)
33 | exportClasses(CdsEndFilter)
34 | exportClasses(CdsStartFilter)
35 | exportClasses(CharacterFilter)
36 | exportClasses(EntrezFilter)
37 | exportClasses(ExonEndFilter)
38 | exportClasses(ExonIdFilter)
39 | exportClasses(ExonNameFilter)
40 | exportClasses(ExonRankFilter)
41 | exportClasses(ExonStartFilter)
42 | exportClasses(GRangesFilter)
43 | exportClasses(GeneBiotypeFilter)
44 | exportClasses(GeneEndFilter)
45 | exportClasses(GeneIdFilter)
46 | exportClasses(GeneStartFilter)
47 | exportClasses(GenenameFilter)
48 | exportClasses(IntegerFilter)
49 | exportClasses(ProteinIdFilter)
50 | exportClasses(SeqNameFilter)
51 | exportClasses(SeqStrandFilter)
52 | exportClasses(SymbolFilter)
53 | exportClasses(TxBiotypeFilter)
54 | exportClasses(TxEndFilter)
55 | exportClasses(TxIdFilter)
56 | exportClasses(TxNameFilter)
57 | exportClasses(TxStartFilter)
58 | exportClasses(UniprotFilter)
59 | exportMethods(condition)
60 | exportMethods(field)
61 | exportMethods(show)
62 | exportMethods(supportedFilters)
63 | exportMethods(value)
64 | importClassesFrom(GenomicRanges,GRanges)
65 | importFrom(GenomicRanges,GRanges)
66 | importFrom(GenomicRanges,show)
67 | importFrom(lazyeval,f_eval)
68 | importFrom(methods,callNextMethod)
69 | importFrom(methods,initialize)
70 | importFrom(methods,is)
71 | importFrom(methods,new)
72 | importFrom(methods,show)
73 | importFrom(methods,validObject)
74 | importFrom(utils,tail)
75 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | export(AnnotationFilter)
4 | export(AnnotationFilterList)
5 | export(CdsEndFilter)
6 | export(CdsStartFilter)
7 | export(EntrezFilter)
8 | export(ExonEndFilter)
9 | export(ExonIdFilter)
10 | export(ExonNameFilter)
11 | export(ExonRankFilter)
12 | export(ExonStartFilter)
13 | export(GRangesFilter)
14 | export(GeneBiotypeFilter)
15 | export(GeneEndFilter)
16 | export(GeneIdFilter)
17 | export(GeneNameFilter)
18 | export(GeneStartFilter)
19 | export(GenenameFilter)
20 | export(ProteinIdFilter)
21 | export(SeqNameFilter)
22 | export(SeqStrandFilter)
23 | export(SymbolFilter)
24 | export(TxBiotypeFilter)
25 | export(TxEndFilter)
26 | export(TxIdFilter)
27 | export(TxNameFilter)
28 | export(TxStartFilter)
29 | export(UniprotFilter)
30 | export(feature)
31 | export(logicOp)
32 | export(not)
33 | exportClasses(AnnotationFilter)
34 | exportClasses(AnnotationFilterList)
35 | exportClasses(CdsEndFilter)
36 | exportClasses(CdsStartFilter)
37 | exportClasses(CharacterFilter)
38 | exportClasses(DoubleFilter)
39 | exportClasses(EntrezFilter)
40 | exportClasses(ExonEndFilter)
41 | exportClasses(ExonIdFilter)
42 | exportClasses(ExonNameFilter)
43 | exportClasses(ExonRankFilter)
44 | exportClasses(ExonStartFilter)
45 | exportClasses(GRangesFilter)
46 | exportClasses(GeneBiotypeFilter)
47 | exportClasses(GeneEndFilter)
48 | exportClasses(GeneIdFilter)
49 | exportClasses(GeneNameFilter)
50 | exportClasses(GeneStartFilter)
51 | exportClasses(GenenameFilter)
52 | exportClasses(IntegerFilter)
53 | exportClasses(ProteinIdFilter)
54 | exportClasses(SeqNameFilter)
55 | exportClasses(SeqStrandFilter)
56 | exportClasses(SymbolFilter)
57 | exportClasses(TxBiotypeFilter)
58 | exportClasses(TxEndFilter)
59 | exportClasses(TxIdFilter)
60 | exportClasses(TxNameFilter)
61 | exportClasses(TxStartFilter)
62 | exportClasses(UniprotFilter)
63 | exportMethods(condition)
64 | exportMethods(convertFilter)
65 | exportMethods(distributeNegation)
66 | exportMethods(field)
67 | exportMethods(not)
68 | exportMethods(show)
69 | exportMethods(supportedFilters)
70 | exportMethods(value)
71 | importClassesFrom(GenomicRanges,GRanges)
72 | importFrom(GenomicRanges,GRanges)
73 | importFrom(GenomicRanges,show)
74 | importFrom(lazyeval,f_eval)
75 | importFrom(methods,callNextMethod)
76 | importFrom(methods,initialize)
77 | importFrom(methods,is)
78 | importFrom(methods,new)
79 | importFrom(methods,show)
80 | importFrom(methods,validObject)
81 | importFrom(utils,head)
82 | importFrom(utils,tail)
83 |
--------------------------------------------------------------------------------
/.svn/pristine/65/65e9e8d47fee95ed321413de21d8f8d13f873b17.svn-base:
--------------------------------------------------------------------------------
1 | context("AnnotationFilter")
2 |
3 | test_that("supportedFilters() works", {
4 | expect_true(inherits(supportedFilters(), "data.frame"))
5 | expect_identical(
6 | nrow(supportedFilters()),
7 | length(unlist(AnnotationFilter:::.FIELD, use.names=FALSE)) +
8 | length(AnnotationFilter:::.FILTERS_WO_FIELD)
9 | )
10 | })
11 |
12 | test_that("SymbolFilter as representative for character filters", {
13 | expect_true(validObject(new("SymbolFilter")))
14 | expect_error(SymbolFilter())
15 | expect_error(SymbolFilter(1, ">"))
16 | expect_error(SymbolFilter(1, "foo"))
17 | expect_error(SymbolFilter(c("foo","bar"), "startsWith"))
18 | ## Getter / setter
19 | fl <- SymbolFilter("BCL2")
20 | expect_equal(value(fl), "BCL2")
21 | fl <- SymbolFilter(c(4, 5))
22 | expect_equal(value(fl), c("4", "5"))
23 | fl <- SymbolFilter(3)
24 | expect_equal(value(fl), "3")
25 | expect_error(SymbolFilter(NA))
26 | ## condition.
27 | expect_equal(condition(fl), "==")
28 | fl <- SymbolFilter("a", condition = "!=")
29 | expect_equal(condition(fl), "!=")
30 | expect_error(SymbolFilter("a", condition = "<"))
31 | expect_error(SymbolFilter("a", condition = ""))
32 | expect_error(SymbolFilter("a", condition = c("==", ">")))
33 | expect_error(SymbolFilter("a", condition = NULL))
34 | expect_error(SymbolFilter("a", condition = NA))
35 | expect_error(SymbolFilter("a", condition = 4))
36 | })
37 |
38 | test_that("GeneStartFilter as representative for integer filters", {
39 | gsf <- GeneStartFilter(10000, condition = ">")
40 | expect_equal(condition(gsf), ">")
41 | expect_error(GeneStartFilter("3"))
42 | expect_error(GeneStartFilter("B"))
43 | expect_error(GeneStartFilter(NA))
44 | expect_error(GeneStartFilter(NULL))
45 | expect_error(GeneStartFilter())
46 | ## Condition
47 | expect_error(GeneStartFilter(10000, condition = "startsWith"))
48 | expect_error(GeneStartFilter(10000, condition = "endsWith"))
49 | expect_error(GeneStartFilter(10000, condition = c("==", "<")))
50 | })
51 |
52 | test_that("GRangesFilter works", {
53 | GRanges <- GenomicRanges::GRanges
54 | grf <- GRangesFilter(GRanges("chr10:87869000-87876000"))
55 | expect_equal(condition(grf), "any")
56 | expect_error(GRangesFilter(value = 3))
57 | expect_error(GRangesFilter(
58 | GRanges("chr10:87869000-87876000"),
59 | type = "=="
60 | ))
61 | grf <- GRangesFilter(
62 | GRanges("chr10:87869000-87876000"),
63 | type = "within",
64 | feature = "tx"
65 | )
66 | expect_equal(condition(grf), "within")
67 | expect_equal(feature(grf), "tx")
68 | })
69 |
70 | test_that("fieldToClass works", {
71 | expect_identical(AnnotationFilter:::.fieldToClass("gene_id"),
72 | "GeneIdFilter")
73 | ## Support replacement for multiple _ : issue #13
74 | expect_identical(AnnotationFilter:::.fieldToClass("gene_seq_start"),
75 | "GeneSeqStartFilter")
76 | })
77 |
--------------------------------------------------------------------------------
/tests/testthat/test_AnnotationFilterList.R:
--------------------------------------------------------------------------------
1 | context("AnnotationFilterList")
2 |
3 | test_that("AnnotationFilterList() works", {
4 | f1 <- GeneIdFilter("somegene")
5 | f2 <- SeqNameFilter("chr3")
6 | f3 <- GeneBiotypeFilter("protein_coding", "!=")
7 |
8 | fL <- AnnotationFilter:::AnnotationFilterList(f1, f2)
9 | expect_true(length(fL) == 2)
10 | expect_equal(fL[[1]], f1)
11 | expect_equal(fL[[2]], f2)
12 | expect_true(all(logicOp(fL) == "&"))
13 |
14 | fL <- AnnotationFilter:::AnnotationFilterList(f1, f2, f3,
15 | logicOp = c("&", "|"))
16 | expect_true(length(fL) == 3)
17 | expect_equal(fL[[1]], f1)
18 | expect_equal(fL[[2]], f2)
19 | expect_equal(fL[[3]], f3)
20 | expect_equal(logicOp(fL), c("&", "|"))
21 |
22 | ## A AnnotationFilterList with and AnnotationFilterList
23 | fL <- AnnotationFilter:::AnnotationFilterList(f1, f2, logicOp = "|")
24 | fL2 <- AnnotationFilter:::AnnotationFilterList(f3, fL, logicOp = "&")
25 | expect_true(length(fL) == 2)
26 | expect_true(length(fL2) == 2)
27 | expect_true(is(value(fL2)[[1]], "GeneBiotypeFilter"))
28 | expect_true(is(value(fL2)[[2]], "AnnotationFilterList"))
29 | expect_equal(value(fL2)[[2]], fL)
30 | expect_equal(fL2[[2]], fL)
31 | expect_equal(logicOp(fL2), "&")
32 | expect_equal(logicOp(fL2[[2]]), "|")
33 | })
34 |
35 | test_that("empty elements in AnnotationFilterList", {
36 | ## empty elements should be removed from the AnnotationFilterList.
37 | empty_afl <- AnnotationFilterList()
38 | afl <- AnnotationFilterList(empty_afl)
39 | expect_true(length(afl) == 0)
40 | afl <- AnnotationFilterList(GeneIdFilter(4), empty_afl)
41 | expect_true(length(afl) == 1)
42 | afl <- AnnotationFilterList(GeneIdFilter(4),
43 | AnnotationFilter(~ gene_id == 3 | seq_name == 4),empty_afl)
44 | expect_true(length(afl) == 2)
45 | ## Check validate.
46 | afl@.Data <- c(afl@.Data, list(empty_afl))
47 | ## Fix also the logOp.
48 | afl@logOp <- c(afl@logOp, "|")
49 | expect_error(validObject(afl))
50 | })
51 |
52 | test_that("convertFilter works", {
53 | smbl <- SymbolFilter("ADA")
54 | txid <- TxIdFilter(1000)
55 | gr <- GRangesFilter(GenomicRanges::GRanges("chr15:25062333-25065121"))
56 |
57 | expect_identical(convertFilter(AnnotationFilter(~smbl | txid)),
58 | "symbol == 'ADA' | tx_id == '1000'")
59 | expect_identical(convertFilter(AnnotationFilter(~smbl & (smbl | txid))),
60 | "symbol == 'ADA' & (symbol == 'ADA' | tx_id == '1000')")
61 | expect_identical(convertFilter(AnnotationFilter(~smbl & !(smbl | txid))),
62 | "symbol == 'ADA' & !(symbol == 'ADA' | tx_id == '1000')")
63 | expect_error(convertFilter(AnnotationFilter(smbl | (txid & gr))))
64 |
65 | })
66 |
67 | test_that("distributeNegation works", {
68 | afl <- AnnotationFilter(~!(symbol == 'ADA' | symbol %startsWith% 'SNORD'))
69 | afl2 <- AnnotationFilter(~!symbol == 'ADA' & !symbol %startsWith% 'SNORD')
70 | expect_identical(distributeNegation(afl), afl2)
71 | })
72 |
--------------------------------------------------------------------------------
/.svn/pristine/33/331922c1408ddd46de74575d157bb2d47ccfa3e9.svn-base:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/AnnotationFilterList.R
3 | \docType{methods}
4 | \name{AnnotationFilterList}
5 | \alias{AnnotationFilterList}
6 | \alias{AnnotationFilterList-class}
7 | \alias{AnnotationFilterList}
8 | \alias{value,AnnotationFilterList-method}
9 | \alias{logicOp,AnnotationFilterList-method}
10 | \alias{logicOp}
11 | \alias{show,AnnotationFilterList-method}
12 | \title{Combining annotation filters}
13 | \usage{
14 | AnnotationFilterList(..., logicOp = character(), logOp = character())
15 |
16 | \S4method{value}{AnnotationFilterList}(object)
17 |
18 | \S4method{logicOp}{AnnotationFilterList}(object)
19 |
20 | \S4method{show}{AnnotationFilterList}(object)
21 | }
22 | \arguments{
23 | \item{...}{individual \code{\link{AnnotationFilter}} objects or a
24 | mixture of \code{AnnotationFilter} and
25 | \code{AnnotationFilterList} objects.}
26 |
27 | \item{logicOp}{\code{character} of length equal to the number
28 | of submitted \code{AnnotationFilter} objects - 1. Each value
29 | representing the logical operation to combine consecutive
30 | filters, i.e. the first element being the logical operation to
31 | combine the first and second \code{AnnotationFilter}, the
32 | second element being the logical operation to combine the
33 | second and third \code{AnnotationFilter} and so on. Allowed
34 | values are \code{"&"} and \code{"|"}. The function assumes a
35 | logical \emph{and} between all elements by default.}
36 |
37 | \item{logOp}{Deprecated; use \code{logicOp=}.}
38 |
39 | \item{object}{An object of class \code{AnnotationFilterList}.}
40 | }
41 | \value{
42 | \code{AnnotationFilterList} returns an \code{AnnotationFilterList}.
43 |
44 | \code{value()} returns a \code{list} with \code{AnnotationFilter}
45 | objects.
46 |
47 | \code{logicOp()} returns a \code{character()} vector of
48 | \dQuote{&} or \dQuote{|} symbols.
49 | }
50 | \description{
51 | The \code{AnnotationFilterList} allows to combine
52 | filter objects extending the \code{\link{AnnotationFilter}}
53 | class to construct more complex queries. Consecutive filter
54 | objects in the \code{AnnotationFilterList} can be combined by a
55 | logical \emph{and} (\code{&}) or \emph{or} (\code{|}). The
56 | \code{AnnotationFilterList} extends \code{list}, individual
57 | elements can thus be accessed with \code{[[}.
58 |
59 | \code{value()} get a \code{list} with the
60 | \code{AnnotationFilter} objects. Use \code{[[} to access
61 | individual filters.
62 |
63 | \code{logicOp()} gets the logical operators separating
64 | successive \code{AnnotationFilter}.
65 | }
66 | \note{
67 | The \code{AnnotationFilterList} does not support containing empty
68 | elements, hence all elements of \code{length == 0} are removed in
69 | the constructor function.
70 | }
71 | \examples{
72 | ## Create some AnnotationFilters
73 | gf <- GenenameFilter(c("BCL2", "BCL2L11"))
74 | tbtf <- TxBiotypeFilter("protein_coding", condition = "!=")
75 |
76 | ## Combine both to an AnnotationFilterList. By default elements are combined
77 | ## using a logical "and" operator. The filter list represents thus a query
78 | ## like: get all features where the gene name is either ("BCL2" or "BCL2L11")
79 | ## and the transcript biotype is not "protein_coding".
80 | afl <- AnnotationFilterList(gf, tbtf)
81 | afl
82 |
83 | ## Access individual filters.
84 | afl[[1]]
85 |
86 | ## Create a filter in the form of: get all features where the gene name is
87 | ## either ("BCL2" or "BCL2L11") and the transcript biotype is not
88 | ## "protein_coding" or the seq_name is "Y". Hence, this will get all feature
89 | ## also found by the previous AnnotationFilterList and returns also all
90 | ## features on chromosome Y.
91 | afl <- AnnotationFilterList(gf, tbtf, SeqNameFilter("Y"),
92 | logicOp = c("&", "|"))
93 | afl
94 |
95 | }
96 | \seealso{
97 | \code{\link{supportedFilters}} for available
98 | \code{\link{AnnotationFilter}} objects
99 | }
100 |
--------------------------------------------------------------------------------
/.svn/pristine/06/065e68d6b11e4bf90da04ffd904757e8ce3c422b.svn-base:
--------------------------------------------------------------------------------
1 | #' @include AnnotationFilter.R
2 |
3 | ## Functionality to translate a query condition to an AnnotationFilter.
4 |
5 | #' Adapted from GenomicDataCommons.
6 | #'
7 | #' @importFrom methods is validObject initialize
8 | #'
9 | #' @noRd
10 | .binary_op <- function(sep) {
11 | force(sep)
12 | function(e1, e2) {
13 | ## First create the class. Throws an error if not possible i.e. no
14 | ## class for the field available.
15 | field <- as.character(substitute(e1))
16 | class <- .fieldToClass(field)
17 | filter <- tryCatch({
18 | new(class, condition = sep, field = field)
19 | }, error = function(e) {
20 | stop("No AnnotationFilter class '", class, "' for field '",
21 | field, "' defined")
22 | })
23 | ## Fill with values.
24 | force(e2)
25 | if (is(filter, "CharacterFilter")) {
26 | e2 <- as.character(e2)
27 | } else if (is(filter, "IntegerFilter")) {
28 | e2 <- as.integer(e2)
29 | }
30 | initialize(filter, value = e2)
31 | }
32 | }
33 |
34 | #' Combine filters into a AnnotationFilterList combbined with \code{sep}
35 | #'
36 | #' @noRd
37 | .combine_op <- function(sep) {
38 | force(sep)
39 | function(e1, e2) {
40 | ## Avoid implicit nesting of AnnotationFilterList - should be done
41 | ## eventually
42 | if (is(e1, "AnnotationFilterList")) {
43 | sep <- c(logicOp(e1), sep)
44 | e1 <- .aflvalue(e1)
45 | } else
46 | e1 <- list(e1)
47 | if (is(e2, "AnnotationFilterList")) {
48 | sep <- c(logicOp(e2), sep)
49 | e2 <- .aflvalue(e2)
50 | } else
51 | e2 <- list(e2)
52 | ## Don't use the constructor here.
53 | new("AnnotationFilterList", c(e1, e2), logOp = sep)
54 | }
55 | }
56 |
57 | #' The \code{.LOG_OP_REG} is a \code{list} providing functions for
58 | #' common logical operations to translate expressions into AnnotationFilter
59 | #' objects.
60 | #'
61 | #' @noRd
62 | .LOG_OP_REG <- list()
63 | ## Assign conditions.
64 | .LOG_OP_REG$`==` <- .binary_op("==")
65 | .LOG_OP_REG$`%in%` <- .binary_op("==")
66 | .LOG_OP_REG$`!=` <- .binary_op("!=")
67 | .LOG_OP_REG$`>` <- .binary_op(">")
68 | .LOG_OP_REG$`<` <- .binary_op("<")
69 | .LOG_OP_REG$`>=` <- .binary_op(">=")
70 | .LOG_OP_REG$`<=` <- .binary_op("<=")
71 | ## combine filters
72 | .LOG_OP_REG$`&` <- .combine_op("&")
73 | .LOG_OP_REG$`|` <- .combine_op("|")
74 |
75 | #' @rdname AnnotationFilter
76 | #'
77 | #' @description \code{AnnotationFilter} \emph{translates} a filter
78 | #' expression such as \code{~ gene_id == "BCL2"} into a filter object
79 | #' extending the \code{\link{AnnotationFilter}} class (in the example a
80 | #' \code{\link{GeneIdFilter}} object) or an
81 | #' \code{\link{AnnotationFilterList}} if the expression contains multiple
82 | #' conditions (see examples below). Filter expressions have to be written
83 | #' in the form \code{~ }, with \code{}
84 | #' being the default field of the filter class (use the
85 | #' \code{supportedFilter} function to list all fields and filter classes),
86 | #' \code{} the logical expression and \code{} the value
87 | #' for the filter.
88 | #'
89 | #' @details Filter expressions for the \code{AnnotationFilter} class have to be
90 | #' written as formulas, i.e. starting with a \code{~}.
91 | #'
92 | #' @note Translation of nested filter expressions using the
93 | #' \code{AnnotationFilter} function is not yet supported.
94 | #'
95 | #' @param expr A filter expression, written as a \code{formula}, to be
96 | #' converted to an \code{AnnotationFilter} or \code{AnnotationFilterList}
97 | #' class. See below for examples.
98 | #'
99 | #' @return \code{AnnotationFilter} returns an
100 | #' \code{\link{AnnotationFilter}} or an \code{\link{AnnotationFilterList}}.
101 | #'
102 | #' @importFrom lazyeval f_eval
103 | #'
104 | #' @examples
105 | #'
106 | #' ## Convert a filter expression based on a gene ID to a GeneIdFilter
107 | #' gnf <- AnnotationFilter(~ gene_id == "BCL2")
108 | #' gnf
109 | #'
110 | #' ## Same conversion but for two gene IDs.
111 | #' gnf <- AnnotationFilter(~ gene_id %in% c("BCL2", "BCL2L11"))
112 | #' gnf
113 | #'
114 | #' ## Converting an expression that combines multiple filters. As a result we
115 | #' ## get an AnnotationFilterList containing the corresponding filters.
116 | #' ## Be aware that nesting of expressions/filters does not work.
117 | #' flt <- AnnotationFilter(~ gene_id %in% c("BCL2", "BCL2L11") &
118 | #' tx_biotype == "nonsense_mediated_decay" |
119 | #' seq_name == "Y")
120 | #' flt
121 | #'
122 | #' @export
123 | AnnotationFilter <- function(expr) {
124 | f_eval(expr, data = .LOG_OP_REG)
125 | }
126 |
--------------------------------------------------------------------------------
/tests/testthat/test_translate-utils.R:
--------------------------------------------------------------------------------
1 | context("expression translation")
2 |
3 | test_that("translation of expression works for single filter/condition", {
4 | ## Check for some character filter.
5 | ## exon_id
6 | flt <- ExonIdFilter("EX1", condition = "==")
7 | flt2 <- AnnotationFilter(~ exon_id == "EX1")
8 | expect_equal(flt, flt2)
9 | flt <- ExonIdFilter(c("EX1", "EX2"), condition = "!=")
10 | flt2 <- AnnotationFilter(~ exon_id != c("EX1", "EX2"))
11 | expect_equal(flt, flt2)
12 | ## seq_name
13 | flt <- SeqNameFilter(c("chr3", "chrX"), condition = "==")
14 | flt2 <- AnnotationFilter(~ seq_name == c("chr3", "chrX"))
15 | expect_equal(flt, flt2)
16 | flt <- SeqNameFilter(1:3, condition = "==")
17 | flt2 <- AnnotationFilter(~ seq_name %in% 1:3)
18 | expect_equal(flt, flt2)
19 | ## Check IntegerFilter
20 | flt <- GeneStartFilter(123, condition = ">")
21 | flt2 <- AnnotationFilter(~ gene_start > 123)
22 | expect_equal(flt, flt2)
23 | flt <- TxStartFilter(123, condition = "<")
24 | flt2 <- AnnotationFilter(~ tx_start < 123)
25 | expect_equal(flt, flt2)
26 | flt <- GeneEndFilter(123, condition = ">=")
27 | flt2 <- AnnotationFilter(~ gene_end >= 123)
28 | expect_equal(flt, flt2)
29 | flt <- ExonEndFilter(123, condition = "<=")
30 | flt2 <- AnnotationFilter(~ exon_end <= 123)
31 | expect_equal(flt, flt2)
32 | ## Test exceptions/errors.
33 | expect_error(AnnotationFilter(~ not_existing == 1:3))
34 | ## Throws an error, but is not self-explanatory.
35 | expect_error(AnnotationFilter(~ gene_id * 3))
36 | })
37 |
38 | test_that("translation of combined expressions works", {
39 | res <- AnnotationFilter(~ exon_id == "EX1" & genename == "BCL2")
40 | cmp <- AnnotationFilterList(ExonIdFilter("EX1"), GenenameFilter("BCL2"))
41 | expect_equal(res, cmp)
42 | res <- AnnotationFilter(~ exon_id == "EX1" | genename != "BCL2")
43 | cmp <- AnnotationFilterList(ExonIdFilter("EX1"),
44 | GenenameFilter("BCL2", "!="), logicOp = "|")
45 | expect_equal(res, cmp)
46 | ## 3 filters.
47 | res <- AnnotationFilter(~ exon_id == "EX1" & genename == "BCL2" |
48 | seq_name != 3)
49 | ## Expect an AnnotationFilterList of length 3.
50 | expect_equal(length(res), 3)
51 | cmp <- AnnotationFilterList(ExonIdFilter("EX1"), GenenameFilter("BCL2"),
52 | SeqNameFilter(3, "!="), logicOp = c("&", "|"))
53 | expect_equal(res, cmp)
54 | ## 4 filters.
55 | res <- AnnotationFilter(~ exon_id == "EX1" & genename == "BCL2" |
56 | seq_name != 3 | seq_name == "Y")
57 | expect_equal(length(res), 4)
58 | cmp <- AnnotationFilterList(ExonIdFilter("EX1"), GenenameFilter("BCL2"),
59 | SeqNameFilter(3, "!="), SeqNameFilter("Y"),
60 | logicOp = c("&", "|", "|"))
61 | expect_equal(res, cmp)
62 | })
63 |
64 | test_that("translation works from within other functions", {
65 | simpleFun <- function(x)
66 | AnnotationFilter(x)
67 | expect_equal(simpleFun(~ gene_id == 4), AnnotationFilter(~ gene_id == 4))
68 | filter_expr <- ~ gene_id == 4
69 | expect_equal(simpleFun(filter_expr),
70 | AnnotationFilter(~ gene_id == 4))
71 | })
72 |
73 | ## This might be a test if we get the nesting working.
74 | ## test_that("translation of nested expressions works" {
75 | ## res <- convertFilterExpression((exon_id == "EX1" & gene_id == "BCL2") |
76 | ## (exon_id == "EX3" & gene_id == "BCL2L11"))
77 | ## expect_equal(logicOp(res), "|")
78 | ## expect_true(is(res[[1]], "AnnotationFilterList"))
79 | ## expect_equal(res[[1]][[1]], ExonIdFilter("EX1"))
80 | ## expect_equal(res[[1]][[2]], GeneIdFilter("BCL2"))
81 | ## expect_equal(logicOp(res[[1]]), "&")
82 | ## expect_true(is(res[[2]], "AnnotationFilterList"))
83 | ## expect_equal(res[[2]][[1]], ExonIdFilter("EX3"))
84 | ## expect_equal(res[[2]][[2]], GeneIdFilter("BCL2L11"))
85 | ## expect_equal(logicOp(res[[2]]), "&")
86 | ## ##
87 | ## res <- convertFilterExpression(seq_name == "Y" |
88 | ## (exon_id == "EX1" & gene_id == "BCL2") &
89 | ## (exon_id == "EX3" & gene_id == "BCL2L11"))
90 | ## ## Expect: length 3, first being a SeqNameFilter, second an
91 | ## ## AnnotationFilterList, third a AnnotationFilterList.
92 | ## expect_equal(res[[1]], SeqNameFilter("Y"))
93 | ## expect_equal(logicOp(res), "|")
94 | ## expect_true(is(res[[2]], "AnnotationFilterList"))
95 | ## expect_equal(res[[1]][[1]], ExonIdFilter("EX1"))
96 | ## expect_equal(res[[1]][[2]], GeneIdFilter("BCL2"))
97 | ## expect_equal(logicOp(res[[1]]), "&")
98 | ## expect_true(is(res[[2]], "AnnotationFilterList"))
99 | ## expect_equal(res[[2]][[1]], ExonIdFilter("EX3"))
100 | ## expect_equal(res[[2]][[2]], GeneIdFilter("BCL2L11"))
101 | ## expect_equal(logicOp(res[[2]]), "&")
102 |
103 | ## expect_true(is(res[[1]], "AnnotationFilterList"))
104 | ## expect_true(is(res[[2]], "AnnotationFilterList"))
105 |
106 | ## convertFilterExpression((gene_id == 3) ()
107 | ## })
108 |
109 |
--------------------------------------------------------------------------------
/.svn/pristine/a1/a129614aff000a6de02c214a739f8867a6f01752.svn-base:
--------------------------------------------------------------------------------
1 | context("expression translation")
2 |
3 | test_that("translation of expression works for single filter/condition", {
4 | ## Check for some character filter.
5 | ## exon_id
6 | flt <- ExonIdFilter("EX1", condition = "==")
7 | flt2 <- AnnotationFilter(~ exon_id == "EX1")
8 | expect_equal(flt, flt2)
9 | flt <- ExonIdFilter(c("EX1", "EX2"), condition = "!=")
10 | flt2 <- AnnotationFilter(~ exon_id != c("EX1", "EX2"))
11 | expect_equal(flt, flt2)
12 | ## seq_name
13 | flt <- SeqNameFilter(c("chr3", "chrX"), condition = "==")
14 | flt2 <- AnnotationFilter(~ seq_name == c("chr3", "chrX"))
15 | expect_equal(flt, flt2)
16 | flt <- SeqNameFilter(1:3, condition = "==")
17 | flt2 <- AnnotationFilter(~ seq_name %in% 1:3)
18 | expect_equal(flt, flt2)
19 | ## Check IntegerFilter
20 | flt <- GeneStartFilter(123, condition = ">")
21 | flt2 <- AnnotationFilter(~ gene_start > 123)
22 | expect_equal(flt, flt2)
23 | flt <- TxStartFilter(123, condition = "<")
24 | flt2 <- AnnotationFilter(~ tx_start < 123)
25 | expect_equal(flt, flt2)
26 | flt <- GeneEndFilter(123, condition = ">=")
27 | flt2 <- AnnotationFilter(~ gene_end >= 123)
28 | expect_equal(flt, flt2)
29 | flt <- ExonEndFilter(123, condition = "<=")
30 | flt2 <- AnnotationFilter(~ exon_end <= 123)
31 | expect_equal(flt, flt2)
32 | ## Test exceptions/errors.
33 | expect_error(AnnotationFilter(~ not_existing == 1:3))
34 | ## Throws an error, but is not self-explanatory.
35 | expect_error(AnnotationFilter(~ gene_id * 3))
36 | })
37 |
38 | test_that("translation of combined expressions works", {
39 | res <- AnnotationFilter(~ exon_id == "EX1" & genename == "BCL2")
40 | cmp <- AnnotationFilterList(ExonIdFilter("EX1"), GenenameFilter("BCL2"))
41 | expect_equal(res, cmp)
42 | res <- AnnotationFilter(~ exon_id == "EX1" | genename != "BCL2")
43 | cmp <- AnnotationFilterList(ExonIdFilter("EX1"),
44 | GenenameFilter("BCL2", "!="), logicOp = "|")
45 | expect_equal(res, cmp)
46 | ## 3 filters.
47 | res <- AnnotationFilter(~ exon_id == "EX1" & genename == "BCL2" |
48 | seq_name != 3)
49 | ## Expect an AnnotationFilterList of length 3.
50 | expect_equal(length(res), 3)
51 | cmp <- AnnotationFilterList(ExonIdFilter("EX1"), GenenameFilter("BCL2"),
52 | SeqNameFilter(3, "!="), logicOp = c("&", "|"))
53 | expect_equal(res, cmp)
54 | ## 4 filters.
55 | res <- AnnotationFilter(~ exon_id == "EX1" & genename == "BCL2" |
56 | seq_name != 3 | seq_name == "Y")
57 | expect_equal(length(res), 4)
58 | cmp <- AnnotationFilterList(ExonIdFilter("EX1"), GenenameFilter("BCL2"),
59 | SeqNameFilter(3, "!="), SeqNameFilter("Y"),
60 | logicOp = c("&", "|", "|"))
61 | expect_equal(res, cmp)
62 | })
63 |
64 | test_that("translation works from within other functions", {
65 | simpleFun <- function(x)
66 | AnnotationFilter(x)
67 | expect_equal(simpleFun(~ gene_id == 4), AnnotationFilter(~ gene_id == 4))
68 | filter_expr <- ~ gene_id == 4
69 | expect_equal(simpleFun(filter_expr),
70 | AnnotationFilter(~ gene_id == 4))
71 | })
72 |
73 | ## This might be a test if we get the nesting working.
74 | ## test_that("translation of nested expressions works" {
75 | ## res <- convertFilterExpression((exon_id == "EX1" & gene_id == "BCL2") |
76 | ## (exon_id == "EX3" & gene_id == "BCL2L11"))
77 | ## expect_equal(logicOp(res), "|")
78 | ## expect_true(is(res[[1]], "AnnotationFilterList"))
79 | ## expect_equal(res[[1]][[1]], ExonIdFilter("EX1"))
80 | ## expect_equal(res[[1]][[2]], GeneIdFilter("BCL2"))
81 | ## expect_equal(logicOp(res[[1]]), "&")
82 | ## expect_true(is(res[[2]], "AnnotationFilterList"))
83 | ## expect_equal(res[[2]][[1]], ExonIdFilter("EX3"))
84 | ## expect_equal(res[[2]][[2]], GeneIdFilter("BCL2L11"))
85 | ## expect_equal(logicOp(res[[2]]), "&")
86 | ## ##
87 | ## res <- convertFilterExpression(seq_name == "Y" |
88 | ## (exon_id == "EX1" & gene_id == "BCL2") &
89 | ## (exon_id == "EX3" & gene_id == "BCL2L11"))
90 | ## ## Expect: length 3, first being a SeqNameFilter, second an
91 | ## ## AnnotationFilterList, third a AnnotationFilterList.
92 | ## expect_equal(res[[1]], SeqNameFilter("Y"))
93 | ## expect_equal(logicOp(res), "|")
94 | ## expect_true(is(res[[2]], "AnnotationFilterList"))
95 | ## expect_equal(res[[1]][[1]], ExonIdFilter("EX1"))
96 | ## expect_equal(res[[1]][[2]], GeneIdFilter("BCL2"))
97 | ## expect_equal(logicOp(res[[1]]), "&")
98 | ## expect_true(is(res[[2]], "AnnotationFilterList"))
99 | ## expect_equal(res[[2]][[1]], ExonIdFilter("EX3"))
100 | ## expect_equal(res[[2]][[2]], GeneIdFilter("BCL2L11"))
101 | ## expect_equal(logicOp(res[[2]]), "&")
102 |
103 | ## expect_true(is(res[[1]], "AnnotationFilterList"))
104 | ## expect_true(is(res[[2]], "AnnotationFilterList"))
105 |
106 | ## convertFilterExpression((gene_id == 3) ()
107 | ## })
108 |
109 |
--------------------------------------------------------------------------------
/tests/testthat/test_AnnotationFilter.R:
--------------------------------------------------------------------------------
1 | context("AnnotationFilter")
2 |
3 | test_that("supportedFilters() works", {
4 | expect_true(inherits(supportedFilters(), "data.frame"))
5 | expect_identical(
6 | nrow(supportedFilters()),
7 | length(unlist(AnnotationFilter:::.FIELD, use.names=FALSE)) +
8 | length(AnnotationFilter:::.FILTERS_WO_FIELD)
9 | )
10 | })
11 |
12 | test_that("SymbolFilter as representative for character filters", {
13 | expect_true(validObject(new("SymbolFilter")))
14 | expect_error(SymbolFilter())
15 | expect_error(SymbolFilter(1, ">"))
16 | expect_error(SymbolFilter(1, "foo"))
17 | expect_error(SymbolFilter(c("foo","bar"), "startsWith"))
18 | ## Getter / setter
19 | fl <- SymbolFilter("BCL2")
20 | expect_equal(value(fl), "BCL2")
21 | fl <- SymbolFilter(c(4, 5))
22 | expect_equal(value(fl), c("4", "5"))
23 | fl <- SymbolFilter(3)
24 | expect_equal(value(fl), "3")
25 | expect_error(SymbolFilter(NA))
26 | ## condition.
27 | expect_equal(condition(fl), "==")
28 | fl <- SymbolFilter("a", condition = "!=")
29 | expect_equal(condition(fl), "!=")
30 | expect_error(SymbolFilter("a", condition = "<"))
31 | expect_error(SymbolFilter("a", condition = ""))
32 | expect_error(SymbolFilter("a", condition = c("==", ">")))
33 | expect_error(SymbolFilter("a", condition = NULL))
34 | expect_error(SymbolFilter("a", condition = NA))
35 | expect_error(SymbolFilter("a", condition = 4))
36 | })
37 |
38 | test_that("GeneStartFilter as representative for integer filters", {
39 | gsf <- GeneStartFilter(10000, condition = ">")
40 | expect_equal(condition(gsf), ">")
41 | expect_error(GeneStartFilter("3"))
42 | expect_error(GeneStartFilter("B"))
43 | expect_error(GeneStartFilter(NA))
44 | expect_error(GeneStartFilter(NULL))
45 | expect_error(GeneStartFilter())
46 | ## Condition
47 | expect_error(GeneStartFilter(10000, condition = "startsWith"))
48 | expect_error(GeneStartFilter(10000, condition = "endsWith"))
49 | expect_error(GeneStartFilter(10000, condition = c("==", "<")))
50 | })
51 |
52 | test_that("GRangesFilter works", {
53 | GRanges <- GenomicRanges::GRanges
54 | grf <- GRangesFilter(GRanges("chr10:87869000-87876000"))
55 | expect_equal(condition(grf), "any")
56 | expect_error(GRangesFilter(value = 3))
57 | expect_error(GRangesFilter(
58 | GRanges("chr10:87869000-87876000"),
59 | type = "=="
60 | ))
61 | grf <- GRangesFilter(
62 | GRanges("chr10:87869000-87876000"),
63 | type = "within",
64 | feature = "tx"
65 | )
66 | expect_equal(condition(grf), "within")
67 | expect_equal(feature(grf), "tx")
68 | })
69 |
70 | test_that("fieldToClass works", {
71 | expect_identical(AnnotationFilter:::.fieldToClass("gene_id"),
72 | "GeneIdFilter")
73 | ## Support replacement for multiple _ : issue #13
74 | expect_identical(AnnotationFilter:::.fieldToClass("gene_seq_start"),
75 | "GeneSeqStartFilter")
76 | })
77 |
78 | test_that("convertFilter Works", {
79 | expect_identical(convertFilter(SymbolFilter("ADA")), "symbol == 'ADA'")
80 | expect_identical(convertFilter(SymbolFilter("ADA", "!=")),
81 | "symbol != 'ADA'")
82 | expect_identical(convertFilter(SymbolFilter("ADA", "startsWith")),
83 | "symbol %like% 'ADA%'")
84 | expect_identical(convertFilter(SymbolFilter("ADA", "endsWith")),
85 | "symbol %like% '%ADA'")
86 | expect_identical(convertFilter(SymbolFilter("ADA", "contains")),
87 | "symbol %like% 'ADA'")
88 |
89 | expect_identical(convertFilter(TxStartFilter(1000)), "tx_start == '1000'")
90 | expect_identical(convertFilter(TxStartFilter(1000, "!=")),
91 | "tx_start != '1000'")
92 | expect_identical(convertFilter(TxStartFilter(1000, ">")), "tx_start > 1000")
93 | expect_identical(convertFilter(TxStartFilter(1000, "<")), "tx_start < 1000")
94 | expect_identical(convertFilter(TxStartFilter(1000, ">=")),
95 | "tx_start >= 1000")
96 | expect_identical(convertFilter(TxStartFilter(1000, "<=")),
97 | "tx_start <= 1000")
98 |
99 | ## check NOT works
100 |
101 | expect_identical(convertFilter(SymbolFilter("ADA", not=TRUE)),
102 | "!symbol == 'ADA'")
103 | expect_identical(convertFilter(SymbolFilter("ADA", "!=", not=TRUE)),
104 | "!symbol != 'ADA'")
105 | expect_identical(convertFilter(SymbolFilter("ADA", "startsWith", not=TRUE)),
106 | "!symbol %like% 'ADA%'")
107 | expect_identical(convertFilter(SymbolFilter("ADA", "endsWith", not=TRUE)),
108 | "!symbol %like% '%ADA'")
109 | expect_identical(convertFilter(SymbolFilter("ADA", "contains", not=TRUE)),
110 | "!symbol %like% 'ADA'")
111 |
112 | expect_identical(convertFilter(TxStartFilter(1000, not=TRUE)),
113 | "!tx_start == '1000'")
114 | expect_identical(convertFilter(TxStartFilter(1000, "!=", not=TRUE)),
115 | "!tx_start != '1000'")
116 | expect_identical(convertFilter(TxStartFilter(1000, ">", not=TRUE)),
117 | "!tx_start > 1000")
118 | expect_identical(convertFilter(TxStartFilter(1000, "<", not=TRUE)),
119 | "!tx_start < 1000")
120 | expect_identical(convertFilter(TxStartFilter(1000, ">=", not=TRUE)),
121 | "!tx_start >= 1000")
122 | expect_identical(convertFilter(TxStartFilter(1000, "<=", not=TRUE)),
123 | "!tx_start <= 1000")
124 | })
125 |
--------------------------------------------------------------------------------
/man/AnnotationFilterList.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/AnnotationFilterList.R
3 | \docType{methods}
4 | \name{AnnotationFilterList}
5 | \alias{AnnotationFilterList}
6 | \alias{AnnotationFilterList-class}
7 | \alias{AnnotationFilterList}
8 | \alias{value,AnnotationFilterList-method}
9 | \alias{logicOp,AnnotationFilterList-method}
10 | \alias{logicOp}
11 | \alias{not,AnnotationFilterList-method}
12 | \alias{not}
13 | \alias{distributeNegation,AnnotationFilterList-method}
14 | \alias{distributeNegation}
15 | \alias{convertFilter,AnnotationFilterList,missing-method}
16 | \alias{convertFilter}
17 | \alias{show,AnnotationFilterList-method}
18 | \title{Combining annotation filters}
19 | \usage{
20 | AnnotationFilterList(..., logicOp = character(), logOp = character(),
21 | not = FALSE, .groupingFlag = FALSE)
22 |
23 | \S4method{value}{AnnotationFilterList}(object)
24 |
25 | \S4method{logicOp}{AnnotationFilterList}(object)
26 |
27 | \S4method{not}{AnnotationFilterList}(object)
28 |
29 | \S4method{distributeNegation}{AnnotationFilterList}(object,
30 | .prior_negation = FALSE)
31 |
32 | \S4method{convertFilter}{AnnotationFilterList,missing}(object)
33 |
34 | \S4method{show}{AnnotationFilterList}(object)
35 | }
36 | \arguments{
37 | \item{...}{individual \code{\link{AnnotationFilter}} objects or a
38 | mixture of \code{AnnotationFilter} and
39 | \code{AnnotationFilterList} objects.}
40 |
41 | \item{logicOp}{\code{character} of length equal to the number
42 | of submitted \code{AnnotationFilter} objects - 1. Each value
43 | representing the logical operation to combine consecutive
44 | filters, i.e. the first element being the logical operation to
45 | combine the first and second \code{AnnotationFilter}, the
46 | second element being the logical operation to combine the
47 | second and third \code{AnnotationFilter} and so on. Allowed
48 | values are \code{"&"} and \code{"|"}. The function assumes a
49 | logical \emph{and} between all elements by default.}
50 |
51 | \item{logOp}{Deprecated; use \code{logicOp=}.}
52 |
53 | \item{not}{\code{logical} of length one. Indicates whether the grouping
54 | of \code{AnnotationFilters} are to be negated.}
55 |
56 | \item{.groupingFlag}{Flag desginated for internal use only.}
57 |
58 | \item{object}{An object of class \code{AnnotationFilterList}.}
59 |
60 | \item{.prior_negation}{\code{logical(1)} unused argument.}
61 | }
62 | \value{
63 | \code{AnnotationFilterList} returns an \code{AnnotationFilterList}.
64 |
65 | \code{value()} returns a \code{list} with \code{AnnotationFilter}
66 | objects.
67 |
68 | \code{logicOp()} returns a \code{character()} vector of
69 | \dQuote{&} or \dQuote{|} symbols.
70 |
71 | \code{not()} returns a \code{character()} vector of
72 | \dQuote{&} or \dQuote{|} symbols.
73 |
74 | \code{AnnotationFilterList} object with DeMorgan's law applied to
75 | it such that it is equal to the original \code{AnnotationFilterList}
76 | object but all \code{!}'s are distributed out of the
77 | \code{AnnotationFilterList} object and to the nested
78 | \code{AnnotationFilter} objects.
79 |
80 | \code{character(1)} that can be used as input to a \code{dplyr}
81 | filter.
82 | }
83 | \description{
84 | The \code{AnnotationFilterList} allows to combine
85 | filter objects extending the \code{\link{AnnotationFilter}}
86 | class to construct more complex queries. Consecutive filter
87 | objects in the \code{AnnotationFilterList} can be combined by a
88 | logical \emph{and} (\code{&}) or \emph{or} (\code{|}). The
89 | \code{AnnotationFilterList} extends \code{list}, individual
90 | elements can thus be accessed with \code{[[}.
91 |
92 | \code{value()} get a \code{list} with the
93 | \code{AnnotationFilter} objects. Use \code{[[} to access
94 | individual filters.
95 |
96 | \code{logicOp()} gets the logical operators separating
97 | successive \code{AnnotationFilter}.
98 |
99 | \code{not()} gets the logical operators separating
100 | successive \code{AnnotationFilter}.
101 |
102 |
103 |
104 | Converts an \code{AnnotationFilterList} object to a
105 | \code{character(1)} giving an equation that can be used as input to
106 | a \code{dplyr} filter.
107 | }
108 | \note{
109 | The \code{AnnotationFilterList} does not support containing empty
110 | elements, hence all elements of \code{length == 0} are removed in
111 | the constructor function.
112 | }
113 | \examples{
114 | ## Create some AnnotationFilters
115 | gf <- GeneNameFilter(c("BCL2", "BCL2L11"))
116 | tbtf <- TxBiotypeFilter("protein_coding", condition = "!=")
117 |
118 | ## Combine both to an AnnotationFilterList. By default elements are combined
119 | ## using a logical "and" operator. The filter list represents thus a query
120 | ## like: get all features where the gene name is either ("BCL2" or "BCL2L11")
121 | ## and the transcript biotype is not "protein_coding".
122 | afl <- AnnotationFilterList(gf, tbtf)
123 | afl
124 |
125 | ## Access individual filters.
126 | afl[[1]]
127 |
128 | ## Create a filter in the form of: get all features where the gene name is
129 | ## either ("BCL2" or "BCL2L11") and the transcript biotype is not
130 | ## "protein_coding" or the seq_name is "Y". Hence, this will get all feature
131 | ## also found by the previous AnnotationFilterList and returns also all
132 | ## features on chromosome Y.
133 | afl <- AnnotationFilterList(gf, tbtf, SeqNameFilter("Y"),
134 | logicOp = c("&", "|"))
135 | afl
136 |
137 | afl <- AnnotationFilter(~!(symbol == 'ADA' | symbol \%startsWith\% 'SNORD'))
138 | afl <- distributeNegation(afl)
139 | afl
140 | afl <- AnnotationFilter(~symbol=="ADA" & tx_start > "400000")
141 | result <- convertFilter(afl)
142 | result
143 | }
144 | \seealso{
145 | \code{\link{supportedFilters}} for available
146 | \code{\link{AnnotationFilter}} objects
147 | }
148 |
--------------------------------------------------------------------------------
/R/translate-utils.R:
--------------------------------------------------------------------------------
1 | #' @include AnnotationFilter.R
2 |
3 | ## Functionality to translate a query condition to an AnnotationFilter.
4 |
5 | #' Adapted from GenomicDataCommons.
6 | #'
7 | #' @importFrom methods is validObject initialize
8 | #'
9 | #' @noRd
10 | .binary_op <- function(sep) {
11 | force(sep)
12 | function(e1, e2) {
13 | ## First create the class. Throws an error if not possible i.e. no
14 | ## class for the field available.
15 | field <- as.character(substitute(e1))
16 | class <- .fieldToClass(field)
17 | filter <- tryCatch({
18 | new(class, condition = sep, field = field)
19 | }, error = function(e) {
20 | stop("No AnnotationFilter class '", class, "' for field '",
21 | field, "' defined")
22 | })
23 | ## Fill with values.
24 | force(e2)
25 | if (is(filter, "CharacterFilter")) {
26 | e2 <- as.character(e2)
27 | } else if (is(filter, "IntegerFilter")) {
28 | e2 <- as.integer(e2)
29 | }
30 | initialize(filter, value = e2)
31 | }
32 | }
33 |
34 | #' Functionality to translate a unary operation into an AnnotationFilter.
35 | #'
36 | #' @noRd
37 | .not_op <- function(sep) {
38 | force(sep)
39 | function(x) {
40 | if(is(x, "AnnotationFilterList") || is(x, "AnnotationFilter")) {
41 | if(x@not)
42 | x@not <- FALSE
43 | else
44 | x@not <- TRUE
45 | if(is(x, "AnnotationFilterList"))
46 | x@.groupingFlag <- FALSE
47 | return(x)
48 | }
49 | # else if (is(x, "AnnotationFilter"))
50 | # AnnotationFilterList(x, logicOp=character(), not=TRUE)
51 | else
52 | stop('Arguments to "!" must be an AnnotationFilter or AnnotationFilerList.')
53 | }
54 | }
55 |
56 | .parenthesis_op <- function(sep) {
57 | force(sep)
58 | function(x) {
59 | if (is(x, "AnnotationFilterList")) {
60 | x@.groupingFlag <- FALSE
61 | x
62 | }
63 | else
64 | AnnotationFilterList(x, .groupingFlag=FALSE)
65 | }
66 | }
67 |
68 |
69 | #' Combine filters into a AnnotationFilterList combbined with \code{sep}
70 | #'
71 | #' @noRd
72 | .combine_op <- function(sep) {
73 | force(sep)
74 | function(e1, e2) {
75 | op1 <- character()
76 | op2 <- character()
77 | if (is(e1, "AnnotationFilterList") && e1@.groupingFlag) {
78 | op1 <- logicOp(e1)
79 | e1 <- .aflvalue(e1)
80 | } else {
81 | e1 <- list(e1)
82 | }
83 | if (is(e2, "AnnotationFilterList") && e2@.groupingFlag) {
84 | op2 <- logicOp(e2)
85 | e2 <- .aflvalue(e2)
86 | } else {
87 | e2 <- list(e2)
88 | }
89 | input <- c(e1, e2)
90 | input[['logicOp']] <- c(op1, sep, op2)
91 | input[['.groupingFlag']] <- TRUE
92 | do.call("AnnotationFilterList", input)
93 | }
94 | }
95 |
96 | #' The \code{.LOG_OP_REG} is a \code{list} providing functions for
97 | #' common logical operations to translate expressions into AnnotationFilter
98 | #' objects.
99 | #'
100 | #' @noRd
101 | .LOG_OP_REG <- list()
102 | ## Assign conditions.
103 | .LOG_OP_REG$`==` <- .binary_op("==")
104 | .LOG_OP_REG$`%in%` <- .binary_op("==")
105 | .LOG_OP_REG$`!=` <- .binary_op("!=")
106 | .LOG_OP_REG$`>` <- .binary_op(">")
107 | .LOG_OP_REG$`<` <- .binary_op("<")
108 | .LOG_OP_REG$`>=` <- .binary_op(">=")
109 | .LOG_OP_REG$`<=` <- .binary_op("<=")
110 | ## Custom binary operators
111 | .LOG_OP_REG$`%startsWith%` <- .binary_op("startsWith")
112 | .LOG_OP_REG$`%endsWith%` <- .binary_op("endsWith")
113 | .LOG_OP_REG$`%contains%` <- .binary_op("contains")
114 | ## not conditional.
115 | .LOG_OP_REG$`!` <- .not_op("!")
116 | ## parenthesis
117 | .LOG_OP_REG$`(` <- .parenthesis_op("(")
118 | ## combine filters
119 | .LOG_OP_REG$`&` <- .combine_op("&")
120 | .LOG_OP_REG$`|` <- .combine_op("|")
121 |
122 | `%startsWith%` <- function(e1, e2){}
123 | `%endsWith%` <- function(e1, e2){}
124 | `%contains%` <- function(e1, e2){}
125 |
126 | #' @rdname AnnotationFilter
127 | #'
128 | #' @description \code{AnnotationFilter} \emph{translates} a filter
129 | #' expression such as \code{~ gene_id == "BCL2"} into a filter object
130 | #' extending the \code{\link{AnnotationFilter}} class (in the example a
131 | #' \code{\link{GeneIdFilter}} object) or an
132 | #' \code{\link{AnnotationFilterList}} if the expression contains multiple
133 | #' conditions (see examples below). Filter expressions have to be written
134 | #' in the form \code{~ }, with \code{}
135 | #' being the default field of the filter class (use the
136 | #' \code{supportedFilter} function to list all fields and filter classes),
137 | #' \code{} the logical expression and \code{} the value
138 | #' for the filter.
139 | #'
140 | #' @details Filter expressions for the \code{AnnotationFilter} class have to be
141 | #' written as formulas, i.e. starting with a \code{~}.
142 | #'
143 | #' @note Translation of nested filter expressions using the
144 | #' \code{AnnotationFilter} function is not yet supported.
145 | #'
146 | #' @param expr A filter expression, written as a \code{formula}, to be
147 | #' converted to an \code{AnnotationFilter} or \code{AnnotationFilterList}
148 | #' class. See below for examples.
149 | #'
150 | #' @return \code{AnnotationFilter} returns an
151 | #' \code{\link{AnnotationFilter}} or an \code{\link{AnnotationFilterList}}.
152 | #'
153 | #' @importFrom lazyeval f_eval
154 | #'
155 | #' @examples
156 | #'
157 | #' ## Convert a filter expression based on a gene ID to a GeneIdFilter
158 | #' gnf <- AnnotationFilter(~ gene_id == "BCL2")
159 | #' gnf
160 | #'
161 | #' ## Same conversion but for two gene IDs.
162 | #' gnf <- AnnotationFilter(~ gene_id %in% c("BCL2", "BCL2L11"))
163 | #' gnf
164 | #'
165 | #' ## Converting an expression that combines multiple filters. As a result we
166 | #' ## get an AnnotationFilterList containing the corresponding filters.
167 | #' ## Be aware that nesting of expressions/filters does not work.
168 | #' flt <- AnnotationFilter(~ gene_id %in% c("BCL2", "BCL2L11") &
169 | #' tx_biotype == "nonsense_mediated_decay" |
170 | #' seq_name == "Y")
171 | #' flt
172 | #'
173 | #' @export
174 | AnnotationFilter <- function(expr) {
175 | res <- f_eval(expr, data = .LOG_OP_REG)
176 | if(is(res, "AnnotationFilterList")) res@.groupingFlag <- FALSE
177 | res
178 | }
179 |
--------------------------------------------------------------------------------
/.svn/pristine/65/6537ab78a07d200e9fe894802b6b5f17bdd38fd9.svn-base:
--------------------------------------------------------------------------------
1 | #' @include AnnotationFilter.R
2 |
3 | #' @rdname AnnotationFilterList
4 | #'
5 | #' @name AnnotationFilterList
6 | #'
7 | #' @title Combining annotation filters
8 | #'
9 | #' @aliases AnnotationFilterList-class
10 | #'
11 | #' @description The \code{AnnotationFilterList} allows to combine
12 | #' filter objects extending the \code{\link{AnnotationFilter}}
13 | #' class to construct more complex queries. Consecutive filter
14 | #' objects in the \code{AnnotationFilterList} can be combined by a
15 | #' logical \emph{and} (\code{&}) or \emph{or} (\code{|}). The
16 | #' \code{AnnotationFilterList} extends \code{list}, individual
17 | #' elements can thus be accessed with \code{[[}.
18 | #'
19 | #' @note The \code{AnnotationFilterList} does not support containing empty
20 | #' elements, hence all elements of \code{length == 0} are removed in
21 | #' the constructor function.
22 | #'
23 | #' @exportClass AnnotationFilterList
24 | NULL
25 |
26 | .AnnotationFilterList <- setClass(
27 | "AnnotationFilterList",
28 | contains = "list",
29 | slots = c(logOp = "character")
30 | )
31 |
32 | .LOG_OPS <- c("&", "|")
33 |
34 | setValidity("AnnotationFilterList",
35 | function(object)
36 | {
37 | txt <- character()
38 | filters <- .aflvalue(object)
39 | logOp <- .logOp(object)
40 | if (length(filters) == 0 && length(logOp)) {
41 | txt <- c(
42 | txt, "'logicOp' can not have length > 0 if the object is empty"
43 | )
44 | } else if (length(filters) != 0) {
45 | ## Note: we allow length of filters being 1, but then logOp has
46 | ## to be empty. Check content:
47 | fun <- function(z)
48 | is(z, "AnnotationFilter") || is(z, "AnnotationFilterList")
49 | test <- vapply(filters, fun, logical(1))
50 | if (!all(test)){
51 | txt <- c(
52 | txt, "only 'AnnotationFilter' or 'AnnotationFilterList' allowed"
53 | )
54 | }
55 | ## Check that all elements are non-empty (issue #17). Doing this
56 | ## separately from the check above to ensure we get a different error
57 | ## message.
58 | if (!all(lengths(filters) > 0))
59 | txt <- c(txt, "Lengths of all elements have to be > 0")
60 | ## Check that logOp has length object -1
61 | if (length(logOp) != length(filters) - 1)
62 | txt <- c(txt, "length of 'logicOp' has to be length of the object -1")
63 | ## Check content of logOp.
64 | if (!all(logOp %in% .LOG_OPS))
65 | txt <- c(txt, "'logicOp' can only contain '&' and '|'")
66 | }
67 |
68 | if (length(txt)) txt else TRUE
69 | })
70 |
71 | ## AnnotationFilterList constructor function.
72 | #' @rdname AnnotationFilterList
73 | #'
74 | #' @name AnnotationFilterList
75 | #'
76 | #' @param ... individual \code{\link{AnnotationFilter}} objects or a
77 | #' mixture of \code{AnnotationFilter} and
78 | #' \code{AnnotationFilterList} objects.
79 | #'
80 | #' @param logicOp \code{character} of length equal to the number
81 | #' of submitted \code{AnnotationFilter} objects - 1. Each value
82 | #' representing the logical operation to combine consecutive
83 | #' filters, i.e. the first element being the logical operation to
84 | #' combine the first and second \code{AnnotationFilter}, the
85 | #' second element being the logical operation to combine the
86 | #' second and third \code{AnnotationFilter} and so on. Allowed
87 | #' values are \code{"&"} and \code{"|"}. The function assumes a
88 | #' logical \emph{and} between all elements by default.
89 | #'
90 | #' @param logOp Deprecated; use \code{logicOp=}.
91 | #'
92 | #' @seealso \code{\link{supportedFilters}} for available
93 | #' \code{\link{AnnotationFilter}} objects
94 | #'
95 | #' @return \code{AnnotationFilterList} returns an \code{AnnotationFilterList}.
96 | #'
97 | #' @examples
98 | #' ## Create some AnnotationFilters
99 | #' gf <- GenenameFilter(c("BCL2", "BCL2L11"))
100 | #' tbtf <- TxBiotypeFilter("protein_coding", condition = "!=")
101 | #'
102 | #' ## Combine both to an AnnotationFilterList. By default elements are combined
103 | #' ## using a logical "and" operator. The filter list represents thus a query
104 | #' ## like: get all features where the gene name is either ("BCL2" or "BCL2L11")
105 | #' ## and the transcript biotype is not "protein_coding".
106 | #' afl <- AnnotationFilterList(gf, tbtf)
107 | #' afl
108 | #'
109 | #' ## Access individual filters.
110 | #' afl[[1]]
111 | #'
112 | #' ## Create a filter in the form of: get all features where the gene name is
113 | #' ## either ("BCL2" or "BCL2L11") and the transcript biotype is not
114 | #' ## "protein_coding" or the seq_name is "Y". Hence, this will get all feature
115 | #' ## also found by the previous AnnotationFilterList and returns also all
116 | #' ## features on chromosome Y.
117 | #' afl <- AnnotationFilterList(gf, tbtf, SeqNameFilter("Y"),
118 | #' logicOp = c("&", "|"))
119 | #' afl
120 | #'
121 | #' @export
122 | AnnotationFilterList <-
123 | function(..., logicOp = character(), logOp = character())
124 | {
125 | if (!missing(logOp) && missing(logicOp)) {
126 | logicOp <- logOp
127 | .Deprecated(msg = "'logOp' deprecated, use 'logicOp'")
128 | }
129 | filters <- list(...)
130 | ## Remove empty elements (issue #17)
131 | filters <- filters[lengths(filters) > 0]
132 | ## By default we're assuming & between elements.
133 | if (length(filters) > 1 & length(logicOp) == 0)
134 | logicOp <- rep("&", (length(filters) - 1))
135 | .AnnotationFilterList(filters, logOp = logicOp)
136 | }
137 |
138 | .logOp <- function(object) object@logOp
139 |
140 | .aflvalue <- function(object) object@.Data
141 |
142 | #' @rdname AnnotationFilterList
143 | #'
144 | #' @description \code{value()} get a \code{list} with the
145 | #' \code{AnnotationFilter} objects. Use \code{[[} to access
146 | #' individual filters.
147 | #'
148 | #' @return \code{value()} returns a \code{list} with \code{AnnotationFilter}
149 | #' objects.
150 | #'
151 | #' @export
152 | setMethod("value", "AnnotationFilterList", .aflvalue)
153 |
154 | #' @rdname AnnotationFilterList
155 | #'
156 | #' @aliases logicOp
157 | #'
158 | #' @description \code{logicOp()} gets the logical operators separating
159 | #' successive \code{AnnotationFilter}.
160 | #'
161 | #' @return \code{logicOp()} returns a \code{character()} vector of
162 | #' \dQuote{&} or \dQuote{|} symbols.
163 | #'
164 | #' @export logicOp
165 | setMethod("logicOp", "AnnotationFilterList", .logOp)
166 |
167 | #' @rdname AnnotationFilterList
168 | #'
169 | #' @param object An object of class \code{AnnotationFilterList}.
170 | #'
171 | #' @importFrom utils tail
172 | #' @export
173 | setMethod("show", "AnnotationFilterList",
174 | function(object)
175 | {
176 | cat(
177 | "class: ", class(object), "\n",
178 | "length: ", length(object), "\n",
179 | sep = ""
180 | )
181 | if (length(object)) {
182 | cat("filters:\n\n")
183 | show(object[[1]])
184 | for (i in tail(seq_along(object), -1L)) {
185 | cat("\n", logicOp(object)[i - 1L], "\n\n")
186 | show(object[[i]])
187 | }
188 | }
189 | })
190 |
191 |
--------------------------------------------------------------------------------
/.svn/pristine/81/81fa7ab77bea17ab59d67e253f098ea223b2e1d3.svn-base:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/AnnotationFilter.R, R/translate-utils.R
3 | \docType{methods}
4 | \name{AnnotationFilter}
5 | \alias{AnnotationFilter}
6 | \alias{CdsStartFilter}
7 | \alias{CdsEndFilter}
8 | \alias{ExonIdFilter}
9 | \alias{ExonNameFilter}
10 | \alias{ExonStartFilter}
11 | \alias{ExonEndFilter}
12 | \alias{ExonRankFilter}
13 | \alias{GeneIdFilter}
14 | \alias{GenenameFilter}
15 | \alias{GeneBiotypeFilter}
16 | \alias{GeneStartFilter}
17 | \alias{GeneEndFilter}
18 | \alias{EntrezFilter}
19 | \alias{SymbolFilter}
20 | \alias{TxIdFilter}
21 | \alias{TxNameFilter}
22 | \alias{TxBiotypeFilter}
23 | \alias{TxStartFilter}
24 | \alias{TxEndFilter}
25 | \alias{ProteinIdFilter}
26 | \alias{UniprotFilter}
27 | \alias{SeqNameFilter}
28 | \alias{SeqStrandFilter}
29 | \alias{AnnotationFilter-class}
30 | \alias{CharacterFilter-class}
31 | \alias{IntegerFilter-class}
32 | \alias{CdsStartFilter-class}
33 | \alias{CdsEndFilter-class}
34 | \alias{ExonIdFilter-class}
35 | \alias{ExonNameFilter-class}
36 | \alias{ExonStartFilter-class}
37 | \alias{ExonEndFilter-class}
38 | \alias{ExonRankFilter-class}
39 | \alias{GeneIdFilter-class}
40 | \alias{GenenameFilter-class}
41 | \alias{GeneBiotypeFilter-class}
42 | \alias{GeneStartFilter-class}
43 | \alias{GeneEndFilter-class}
44 | \alias{EntrezFilter-class}
45 | \alias{SymbolFilter-class}
46 | \alias{TxIdFilter-class}
47 | \alias{TxNameFilter-class}
48 | \alias{TxBiotypeFilter-class}
49 | \alias{TxStartFilter-class}
50 | \alias{TxEndFilter-class}
51 | \alias{ProteinIdFilter-class}
52 | \alias{UniprotFilter-class}
53 | \alias{SeqNameFilter-class}
54 | \alias{SeqStrandFilter-class}
55 | \alias{supportedFilters}
56 | \alias{show,AnnotationFilter-method}
57 | \alias{show,CharacterFilter-method}
58 | \alias{show,IntegerFilter-method}
59 | \alias{show,GRangesFilter-method}
60 | \alias{condition,AnnotationFilter-method}
61 | \alias{condition}
62 | \alias{value,AnnotationFilter-method}
63 | \alias{value}
64 | \alias{field,AnnotationFilter-method}
65 | \alias{field}
66 | \alias{GRangesFilter-class}
67 | \alias{.GRangesFilter}
68 | \alias{GRangesFilter}
69 | \alias{feature}
70 | \alias{AnnotationFilter}
71 | \alias{supportedFilters,missing-method}
72 | \alias{AnnotationFilter}
73 | \title{Filters for annotation objects}
74 | \usage{
75 | CdsStartFilter(value, condition = "==")
76 | CdsEndFilter(value, condition = "==")
77 | ExonIdFilter(value, condition = "==")
78 | ExonNameFilter(value, condition = "==")
79 | ExonRankFilter(value, condition = "==")
80 | ExonStartFilter(value, condition = "==")
81 | ExonEndFilter(value, condition = "==")
82 | GeneIdFilter(value, condition = "==")
83 | GenenameFilter(value, condition = "==")
84 | GeneBiotypeFilter(value, condition = "==")
85 | GeneStartFilter(value, condition = "==")
86 | GeneEndFilter(value, condition = "==")
87 | EntrezFilter(value, condition = "==")
88 | SymbolFilter(value, condition = "==")
89 | TxIdFilter(value, condition = "==")
90 | TxNameFilter(value, condition = "==")
91 | TxBiotypeFilter(value, condition = "==")
92 | TxStartFilter(value, condition = "==")
93 | TxEndFilter(value, condition = "==")
94 | ProteinIdFilter(value, condition = "==")
95 | UniprotFilter(value, condition = "==")
96 | SeqNameFilter(value, condition = "==")
97 | SeqStrandFilter(value, condition = "==")
98 |
99 | \S4method{condition}{AnnotationFilter}(object)
100 |
101 | \S4method{value}{AnnotationFilter}(object)
102 |
103 | \S4method{field}{AnnotationFilter}(object)
104 |
105 | GRangesFilter(value, feature = "gene", type = c("any", "start", "end",
106 | "within", "equal"))
107 |
108 | feature(object)
109 |
110 | \S4method{supportedFilters}{missing}(object)
111 |
112 | AnnotationFilter(expr)
113 | }
114 | \arguments{
115 | \item{object}{An \code{AnnotationFilter} object.}
116 |
117 | \item{value}{\code{character()}, \code{integer()}, or
118 | \code{GRanges()} value for the filter}
119 |
120 | \item{feature}{\code{character(1)} defining on what feature the
121 | \code{GRangesFilter} should be applied. Choices could be
122 | \code{"gene"}, \code{"tx"} or \code{"exon"}.}
123 |
124 | \item{type}{\code{character(1)} indicating how overlaps are to be
125 | filtered. See \code{findOverlaps} in the IRanges package for a
126 | description of this argument.}
127 |
128 | \item{expr}{A filter expression, written as a \code{formula}, to be
129 | converted to an \code{AnnotationFilter} or \code{AnnotationFilterList}
130 | class. See below for examples.}
131 |
132 | \item{condition}{\code{character(1)} defining the condition to be
133 | used in the filter. For \code{IntegerFilter}, one of
134 | \code{"=="}, \code{"!="}, \code{">"}, \code{"<"}, \code{">="}
135 | or \code{"<="}. For \code{CharacterFilter}, one of \code{"=="},
136 | \code{"!="}, \code{"startsWith"}, \code{"endsWith"} or \code{"contains"}.
137 | Default condition is \code{"=="}.}
138 | }
139 | \value{
140 | The constructor function return an object extending
141 | \code{AnnotationFilter}. For the return value of the other methods see
142 | the methods' descriptions.
143 |
144 | \code{AnnotationFilter} returns an
145 | \code{\link{AnnotationFilter}} or an \code{\link{AnnotationFilterList}}.
146 | }
147 | \description{
148 | The filters extending the base \code{AnnotationFilter} class
149 | represent a simple filtering concept for annotation resources.
150 | Each filter object is thought to filter on a single (database)
151 | table column using the provided values and the defined condition.
152 |
153 | Filter instances created using the constructor functions (e.g.
154 | \code{GeneIdFilter}).
155 |
156 | \code{supportedFilters()} lists all defined filters. It returns a two column
157 | \code{data.frame} with the filter class name and its default field.
158 | Packages using \code{AnnotationFilter} should implement the
159 | \code{supportedFilters} for their annotation resource object (e.g. for
160 | \code{object = "EnsDb"} in the \code{ensembldb} package) to list all
161 | supported filters for the specific resource.
162 |
163 | \code{condition()} get the \code{condition} value for
164 | the filter \code{object}.
165 |
166 | \code{value()} get the \code{value} for the filter
167 | \code{object}.
168 |
169 | \code{field()} get the \code{field} for the filter
170 | \code{object}.
171 |
172 | \code{feature()} get the \code{feature} for the
173 | \code{GRangesFilter} \code{object}.
174 |
175 | \code{AnnotationFilter} \emph{translates} a filter
176 | expression such as \code{~ gene_id == "BCL2"} into a filter object
177 | extending the \code{\link{AnnotationFilter}} class (in the example a
178 | \code{\link{GeneIdFilter}} object) or an
179 | \code{\link{AnnotationFilterList}} if the expression contains multiple
180 | conditions (see examples below). Filter expressions have to be written
181 | in the form \code{~ }, with \code{}
182 | being the default field of the filter class (use the
183 | \code{supportedFilter} function to list all fields and filter classes),
184 | \code{} the logical expression and \code{} the value
185 | for the filter.
186 | }
187 | \details{
188 | By default filters are only available for tables containing the
189 | field on which the filter acts (i.e. that contain a column with the
190 | name matching the value of the \code{field} slot of the
191 | object). See the vignette for a description to use filters for
192 | databases in which the database table column name differs from the
193 | default \code{field} of the filter.
194 |
195 | Filter expressions for the \code{AnnotationFilter} class have to be
196 | written as formulas, i.e. starting with a \code{~}.
197 | }
198 | \note{
199 | Translation of nested filter expressions using the
200 | \code{AnnotationFilter} function is not yet supported.
201 | }
202 | \examples{
203 | ## filter by GRanges
204 | GRangesFilter(GenomicRanges::GRanges("chr10:87869000-87876000"))
205 | ## Create a SymbolFilter to filter on a gene's symbol.
206 | sf <- SymbolFilter("BCL2")
207 | sf
208 |
209 | ## Create a GeneStartFilter to filter based on the genes' chromosomal start
210 | ## coordinates
211 | gsf <- GeneStartFilter(10000, condition = ">")
212 | gsf
213 |
214 | supportedFilters()
215 |
216 | ## Convert a filter expression based on a gene ID to a GeneIdFilter
217 | gnf <- AnnotationFilter(~ gene_id == "BCL2")
218 | gnf
219 |
220 | ## Same conversion but for two gene IDs.
221 | gnf <- AnnotationFilter(~ gene_id \%in\% c("BCL2", "BCL2L11"))
222 | gnf
223 |
224 | ## Converting an expression that combines multiple filters. As a result we
225 | ## get an AnnotationFilterList containing the corresponding filters.
226 | ## Be aware that nesting of expressions/filters does not work.
227 | flt <- AnnotationFilter(~ gene_id \%in\% c("BCL2", "BCL2L11") &
228 | tx_biotype == "nonsense_mediated_decay" |
229 | seq_name == "Y")
230 | flt
231 |
232 | }
233 | \seealso{
234 | \code{\link{AnnotationFilterList}} for combining
235 | \code{AnnotationFilter} objects.
236 | }
237 |
--------------------------------------------------------------------------------
/man/AnnotationFilter.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/AnnotationFilter.R, R/translate-utils.R
3 | \docType{methods}
4 | \name{AnnotationFilter}
5 | \alias{AnnotationFilter}
6 | \alias{CdsStartFilter}
7 | \alias{CdsEndFilter}
8 | \alias{ExonIdFilter}
9 | \alias{ExonNameFilter}
10 | \alias{ExonStartFilter}
11 | \alias{ExonEndFilter}
12 | \alias{ExonRankFilter}
13 | \alias{GeneIdFilter}
14 | \alias{GeneNameFilter}
15 | \alias{GeneBiotypeFilter}
16 | \alias{GeneStartFilter}
17 | \alias{GeneEndFilter}
18 | \alias{EntrezFilter}
19 | \alias{SymbolFilter}
20 | \alias{TxIdFilter}
21 | \alias{TxNameFilter}
22 | \alias{TxBiotypeFilter}
23 | \alias{TxStartFilter}
24 | \alias{TxEndFilter}
25 | \alias{ProteinIdFilter}
26 | \alias{UniprotFilter}
27 | \alias{SeqNameFilter}
28 | \alias{SeqStrandFilter}
29 | \alias{AnnotationFilter-class}
30 | \alias{CharacterFilter-class}
31 | \alias{DoubleFilter-class}
32 | \alias{IntegerFilter-class}
33 | \alias{CdsStartFilter-class}
34 | \alias{CdsEndFilter-class}
35 | \alias{ExonIdFilter-class}
36 | \alias{ExonNameFilter-class}
37 | \alias{ExonStartFilter-class}
38 | \alias{ExonEndFilter-class}
39 | \alias{ExonRankFilter-class}
40 | \alias{GeneIdFilter-class}
41 | \alias{GeneNameFilter-class}
42 | \alias{GeneBiotypeFilter-class}
43 | \alias{GeneStartFilter-class}
44 | \alias{GeneEndFilter-class}
45 | \alias{EntrezFilter-class}
46 | \alias{SymbolFilter-class}
47 | \alias{TxIdFilter-class}
48 | \alias{TxNameFilter-class}
49 | \alias{TxBiotypeFilter-class}
50 | \alias{TxStartFilter-class}
51 | \alias{TxEndFilter-class}
52 | \alias{ProteinIdFilter-class}
53 | \alias{UniprotFilter-class}
54 | \alias{SeqNameFilter-class}
55 | \alias{SeqStrandFilter-class}
56 | \alias{supportedFilters}
57 | \alias{show,AnnotationFilter-method}
58 | \alias{show,CharacterFilter-method}
59 | \alias{show,IntegerFilter-method}
60 | \alias{show,GRangesFilter-method}
61 | \alias{show,DoubleFilter-method}
62 | \alias{condition,AnnotationFilter-method}
63 | \alias{condition}
64 | \alias{value,AnnotationFilter-method}
65 | \alias{value}
66 | \alias{field,AnnotationFilter-method}
67 | \alias{field}
68 | \alias{not,AnnotationFilter-method}
69 | \alias{GRangesFilter-class}
70 | \alias{.GRangesFilter}
71 | \alias{GRangesFilter}
72 | \alias{feature}
73 | \alias{AnnotationFilter}
74 | \alias{convertFilter,AnnotationFilter,missing-method}
75 | \alias{supportedFilters,missing-method}
76 | \alias{AnnotationFilter}
77 | \title{Filters for annotation objects}
78 | \usage{
79 | CdsStartFilter(value, condition = "==", not = FALSE)
80 | CdsEndFilter(value, condition = "==", not = FALSE)
81 | ExonIdFilter(value, condition = "==", not = FALSE)
82 | ExonNameFilter(value, condition = "==", not = FALSE)
83 | ExonRankFilter(value, condition = "==", not = FALSE)
84 | ExonStartFilter(value, condition = "==", not = FALSE)
85 | ExonEndFilter(value, condition = "==", not = FALSE)
86 | GeneIdFilter(value, condition = "==", not = FALSE)
87 | GeneNameFilter(value, condition = "==", not = FALSE)
88 | GeneBiotypeFilter(value, condition = "==", not = FALSE)
89 | GeneStartFilter(value, condition = "==", not = FALSE)
90 | GeneEndFilter(value, condition = "==", not = FALSE)
91 | EntrezFilter(value, condition = "==", not = FALSE)
92 | SymbolFilter(value, condition = "==", not = FALSE)
93 | TxIdFilter(value, condition = "==", not = FALSE)
94 | TxNameFilter(value, condition = "==", not = FALSE)
95 | TxBiotypeFilter(value, condition = "==", not = FALSE)
96 | TxStartFilter(value, condition = "==", not = FALSE)
97 | TxEndFilter(value, condition = "==", not = FALSE)
98 | ProteinIdFilter(value, condition = "==", not = FALSE)
99 | UniprotFilter(value, condition = "==", not = FALSE)
100 | SeqNameFilter(value, condition = "==", not = FALSE)
101 | SeqStrandFilter(value, condition = "==", not = FALSE)
102 |
103 | \S4method{condition}{AnnotationFilter}(object)
104 |
105 | \S4method{value}{AnnotationFilter}(object)
106 |
107 | \S4method{field}{AnnotationFilter}(object)
108 |
109 | \S4method{not}{AnnotationFilter}(object)
110 |
111 | GRangesFilter(value, feature = "gene", type = c("any", "start", "end",
112 | "within", "equal"))
113 |
114 | feature(object)
115 |
116 | \S4method{convertFilter}{AnnotationFilter,missing}(object)
117 |
118 | \S4method{supportedFilters}{missing}(object)
119 |
120 | AnnotationFilter(expr)
121 | }
122 | \arguments{
123 | \item{object}{An \code{AnnotationFilter} object.}
124 |
125 | \item{value}{\code{character()}, \code{integer()}, or
126 | \code{GRanges()} value for the filter}
127 |
128 | \item{feature}{\code{character(1)} defining on what feature the
129 | \code{GRangesFilter} should be applied. Choices could be
130 | \code{"gene"}, \code{"tx"} or \code{"exon"}.}
131 |
132 | \item{type}{\code{character(1)} indicating how overlaps are to be
133 | filtered. See \code{findOverlaps} in the IRanges package for a
134 | description of this argument.}
135 |
136 | \item{expr}{A filter expression, written as a \code{formula}, to be
137 | converted to an \code{AnnotationFilter} or \code{AnnotationFilterList}
138 | class. See below for examples.}
139 |
140 | \item{condition}{\code{character(1)} defining the condition to be
141 | used in the filter. For \code{IntegerFilter} or \code{DoubleFilter},
142 | one of \code{"=="}, \code{"!="}, \code{">"}, \code{"<"}, \code{">="}
143 | or \code{"<="}. For \code{CharacterFilter}, one of \code{"=="},
144 | \code{"!="}, \code{"startsWith"}, \code{"endsWith"} or \code{"contains"}.
145 | Default condition is \code{"=="}.}
146 |
147 | \item{not}{\code{logical(1)} whether the \code{AnnotationFilter} is negated.
148 | \code{TRUE} indicates is negated (!). \code{FALSE} indicates not
149 | negated. Default not is \code{FALSE}.}
150 | }
151 | \value{
152 | The constructor function return an object extending
153 | \code{AnnotationFilter}. For the return value of the other methods see
154 | the methods' descriptions.
155 |
156 | \code{character(1)} that can be used as input to a \code{dplyr}
157 | filter.
158 |
159 | \code{AnnotationFilter} returns an
160 | \code{\link{AnnotationFilter}} or an \code{\link{AnnotationFilterList}}.
161 | }
162 | \description{
163 | The filters extending the base \code{AnnotationFilter} class
164 | represent a simple filtering concept for annotation resources.
165 | Each filter object is thought to filter on a single (database)
166 | table column using the provided values and the defined condition.
167 |
168 | Filter instances created using the constructor functions (e.g.
169 | \code{GeneIdFilter}).
170 |
171 | \code{supportedFilters()} lists all defined filters. It returns a two column
172 | \code{data.frame} with the filter class name and its default field.
173 | Packages using \code{AnnotationFilter} should implement the
174 | \code{supportedFilters} for their annotation resource object (e.g. for
175 | \code{object = "EnsDb"} in the \code{ensembldb} package) to list all
176 | supported filters for the specific resource.
177 |
178 | \code{condition()} get the \code{condition} value for
179 | the filter \code{object}.
180 |
181 | \code{value()} get the \code{value} for the filter
182 | \code{object}.
183 |
184 | \code{field()} get the \code{field} for the filter
185 | \code{object}.
186 |
187 | \code{not()} get the \code{not} for the filter \code{object}.
188 |
189 | \code{feature()} get the \code{feature} for the
190 | \code{GRangesFilter} \code{object}.
191 |
192 | Converts an \code{AnnotationFilter} object to a
193 | \code{character(1)} giving an equation that can be used as input to
194 | a \code{dplyr} filter.
195 |
196 | \code{AnnotationFilter} \emph{translates} a filter
197 | expression such as \code{~ gene_id == "BCL2"} into a filter object
198 | extending the \code{\link{AnnotationFilter}} class (in the example a
199 | \code{\link{GeneIdFilter}} object) or an
200 | \code{\link{AnnotationFilterList}} if the expression contains multiple
201 | conditions (see examples below). Filter expressions have to be written
202 | in the form \code{~ }, with \code{}
203 | being the default field of the filter class (use the
204 | \code{supportedFilter} function to list all fields and filter classes),
205 | \code{} the logical expression and \code{} the value
206 | for the filter.
207 | }
208 | \details{
209 | By default filters are only available for tables containing the
210 | field on which the filter acts (i.e. that contain a column with the
211 | name matching the value of the \code{field} slot of the
212 | object). See the vignette for a description to use filters for
213 | databases in which the database table column name differs from the
214 | default \code{field} of the filter.
215 |
216 | Filter expressions for the \code{AnnotationFilter} class have to be
217 | written as formulas, i.e. starting with a \code{~}.
218 | }
219 | \note{
220 | Translation of nested filter expressions using the
221 | \code{AnnotationFilter} function is not yet supported.
222 | }
223 | \examples{
224 | ## filter by GRanges
225 | GRangesFilter(GenomicRanges::GRanges("chr10:87869000-87876000"))
226 | ## Create a SymbolFilter to filter on a gene's symbol.
227 | sf <- SymbolFilter("BCL2")
228 | sf
229 |
230 | ## Create a GeneStartFilter to filter based on the genes' chromosomal start
231 | ## coordinates
232 | gsf <- GeneStartFilter(10000, condition = ">")
233 | gsf
234 |
235 | filter <- SymbolFilter("ADA", "==")
236 | result <- convertFilter(filter)
237 | result
238 | supportedFilters()
239 |
240 | ## Convert a filter expression based on a gene ID to a GeneIdFilter
241 | gnf <- AnnotationFilter(~ gene_id == "BCL2")
242 | gnf
243 |
244 | ## Same conversion but for two gene IDs.
245 | gnf <- AnnotationFilter(~ gene_id \%in\% c("BCL2", "BCL2L11"))
246 | gnf
247 |
248 | ## Converting an expression that combines multiple filters. As a result we
249 | ## get an AnnotationFilterList containing the corresponding filters.
250 | ## Be aware that nesting of expressions/filters does not work.
251 | flt <- AnnotationFilter(~ gene_id \%in\% c("BCL2", "BCL2L11") &
252 | tx_biotype == "nonsense_mediated_decay" |
253 | seq_name == "Y")
254 | flt
255 |
256 | }
257 | \seealso{
258 | \code{\link{AnnotationFilterList}} for combining
259 | \code{AnnotationFilter} objects.
260 | }
261 |
--------------------------------------------------------------------------------
/R/AnnotationFilterList.R:
--------------------------------------------------------------------------------
1 | #' @include AnnotationFilter.R
2 |
3 | #' @rdname AnnotationFilterList
4 | #'
5 | #' @name AnnotationFilterList
6 | #'
7 | #' @title Combining annotation filters
8 | #'
9 | #' @aliases AnnotationFilterList-class
10 | #'
11 | #' @description The \code{AnnotationFilterList} allows to combine
12 | #' filter objects extending the \code{\link{AnnotationFilter}}
13 | #' class to construct more complex queries. Consecutive filter
14 | #' objects in the \code{AnnotationFilterList} can be combined by a
15 | #' logical \emph{and} (\code{&}) or \emph{or} (\code{|}). The
16 | #' \code{AnnotationFilterList} extends \code{list}, individual
17 | #' elements can thus be accessed with \code{[[}.
18 | #'
19 | #' @note The \code{AnnotationFilterList} does not support containing empty
20 | #' elements, hence all elements of \code{length == 0} are removed in
21 | #' the constructor function.
22 | #'
23 | #' @exportClass AnnotationFilterList
24 | NULL
25 |
26 | .AnnotationFilterList <- setClass(
27 | "AnnotationFilterList",
28 | contains = "list",
29 | slots = c(logOp = "character",
30 | not = "logical",
31 | .groupingFlag = "logical")
32 | )
33 |
34 | .LOG_OPS <- c("&", "|")
35 |
36 | setValidity("AnnotationFilterList",
37 | function(object)
38 | {
39 | txt <- character()
40 | filters <- .aflvalue(object)
41 | logOp <- .logOp(object)
42 | not <- .not(object)
43 | if (length(filters) == 0 && length(logOp)) {
44 | txt <- c(
45 | txt, "'logicOp' can not have length > 0 if the object is empty"
46 | )
47 | } else if (length(filters) != 0) {
48 | ## Note: we allow length of filters being 1, but then logOp has
49 | ## to be empty. Check content:
50 | fun <- function(z)
51 | is(z, "AnnotationFilter") || is(z, "AnnotationFilterList")
52 | test <- vapply(filters, fun, logical(1))
53 | if (!all(test)){
54 | txt <- c(
55 | txt, "only 'AnnotationFilter' or 'AnnotationFilterList' allowed"
56 | )
57 | }
58 | # Check that all elements are non-empty (issue #17). Doing this
59 | ## separately from the check above to ensure we get a different error
60 | ## message.
61 | if (!all(lengths(filters) > 0))
62 | txt <- c(txt, "Lengths of all elements have to be > 0")
63 | ## Check that logOp has length object -1
64 | if (length(logOp) != length(filters) - 1)
65 | txt <- c(txt, "length of 'logicOp' has to be length of the object -1")
66 | ## Check content of logOp.
67 | if (!all(logOp %in% .LOG_OPS))
68 | txt <- c(txt, "'logicOp' can only contain '&' and '|'")
69 | }
70 |
71 | if (length(txt)) txt else TRUE
72 | })
73 |
74 | ## AnnotationFilterList constructor function.
75 | #' @rdname AnnotationFilterList
76 | #'
77 | #' @name AnnotationFilterList
78 | #'
79 | #' @param ... individual \code{\link{AnnotationFilter}} objects or a
80 | #' mixture of \code{AnnotationFilter} and
81 | #' \code{AnnotationFilterList} objects.
82 | #'
83 | #' @param logicOp \code{character} of length equal to the number
84 | #' of submitted \code{AnnotationFilter} objects - 1. Each value
85 | #' representing the logical operation to combine consecutive
86 | #' filters, i.e. the first element being the logical operation to
87 | #' combine the first and second \code{AnnotationFilter}, the
88 | #' second element being the logical operation to combine the
89 | #' second and third \code{AnnotationFilter} and so on. Allowed
90 | #' values are \code{"&"} and \code{"|"}. The function assumes a
91 | #' logical \emph{and} between all elements by default.
92 | #'
93 | #' @param logOp Deprecated; use \code{logicOp=}.
94 | #'
95 | #' @param .groupingFlag Flag desginated for internal use only.
96 | #'
97 | #' @param not \code{logical} of length one. Indicates whether the grouping
98 | #' of \code{AnnotationFilters} are to be negated.
99 | #'
100 | #' @seealso \code{\link{supportedFilters}} for available
101 | #' \code{\link{AnnotationFilter}} objects
102 | #'
103 | #' @return \code{AnnotationFilterList} returns an \code{AnnotationFilterList}.
104 | #'
105 | #' @examples
106 | #' ## Create some AnnotationFilters
107 | #' gf <- GeneNameFilter(c("BCL2", "BCL2L11"))
108 | #' tbtf <- TxBiotypeFilter("protein_coding", condition = "!=")
109 | #'
110 | #' ## Combine both to an AnnotationFilterList. By default elements are combined
111 | #' ## using a logical "and" operator. The filter list represents thus a query
112 | #' ## like: get all features where the gene name is either ("BCL2" or "BCL2L11")
113 | #' ## and the transcript biotype is not "protein_coding".
114 | #' afl <- AnnotationFilterList(gf, tbtf)
115 | #' afl
116 | #'
117 | #' ## Access individual filters.
118 | #' afl[[1]]
119 | #'
120 | #' ## Create a filter in the form of: get all features where the gene name is
121 | #' ## either ("BCL2" or "BCL2L11") and the transcript biotype is not
122 | #' ## "protein_coding" or the seq_name is "Y". Hence, this will get all feature
123 | #' ## also found by the previous AnnotationFilterList and returns also all
124 | #' ## features on chromosome Y.
125 | #' afl <- AnnotationFilterList(gf, tbtf, SeqNameFilter("Y"),
126 | #' logicOp = c("&", "|"))
127 | #' afl
128 | #'
129 | #' @export
130 | AnnotationFilterList <-
131 | function(..., logicOp = character(), logOp = character(), not = FALSE,
132 | .groupingFlag=FALSE)
133 | {
134 | if (!missing(logOp) && missing(logicOp)) {
135 | logicOp <- logOp
136 | .Deprecated(msg = "'logOp' deprecated, use 'logicOp'")
137 | }
138 | filters <- list(...)
139 |
140 | ## Remove empty nested lists and AnnotationFilterLists
141 | removal <- lengths(filters) != 0
142 | filters <- filters[removal]
143 |
144 | if (length(filters) > 1 & length(logicOp) == 0)
145 | ## By default we're assuming & between elements.
146 | logicOp <- rep("&", (length(filters) - 1))
147 | .AnnotationFilterList(filters, logOp = logicOp, not = not,
148 | .groupingFlag=.groupingFlag)
149 | }
150 |
151 | .logOp <- function(object) object@logOp
152 |
153 | .aflvalue <- function(object) object@.Data
154 |
155 | .not <- function(object) object@not
156 |
157 | #' @rdname AnnotationFilterList
158 | #'
159 | #' @description \code{value()} get a \code{list} with the
160 | #' \code{AnnotationFilter} objects. Use \code{[[} to access
161 | #' individual filters.
162 | #'
163 | #' @return \code{value()} returns a \code{list} with \code{AnnotationFilter}
164 | #' objects.
165 | #'
166 | #' @export
167 | setMethod("value", "AnnotationFilterList", .aflvalue)
168 |
169 | #' @rdname AnnotationFilterList
170 | #'
171 | #' @aliases logicOp
172 | #'
173 | #' @description \code{logicOp()} gets the logical operators separating
174 | #' successive \code{AnnotationFilter}.
175 | #'
176 | #' @return \code{logicOp()} returns a \code{character()} vector of
177 | #' \dQuote{&} or \dQuote{|} symbols.
178 | #'
179 | #' @export logicOp
180 | setMethod("logicOp", "AnnotationFilterList", .logOp)
181 |
182 | #' @rdname AnnotationFilterList
183 | #'
184 | #' @aliases not
185 | #'
186 | #' @description \code{not()} gets the logical operators separating
187 | #' successive \code{AnnotationFilter}.
188 | #'
189 | #' @return \code{not()} returns a \code{character()} vector of
190 | #' \dQuote{&} or \dQuote{|} symbols.
191 | #'
192 | #' @export not
193 | setMethod("not", "AnnotationFilterList", .not)
194 |
195 | .distributeNegation <- function(object, .prior_negation=FALSE)
196 | {
197 | if(.not(object))
198 | .prior_negation <- ifelse(.prior_negation, FALSE, TRUE)
199 | filters <- lapply(object, function(x){
200 | if(is(x, "AnnotationFilterList"))
201 | distributeNegation(x, .prior_negation)
202 | else{
203 | if(.prior_negation) x@not <- ifelse(x@not, FALSE, TRUE)
204 | x
205 | }
206 | })
207 | ops <- vapply(logicOp(object), function(x) {
208 | if(.prior_negation){
209 | if(x == '&')
210 | '|'
211 | else
212 | '&'
213 | }
214 | else
215 | x
216 | }
217 | ,character(1)
218 | )
219 | ops <- unname(ops)
220 | filters[['logicOp']] <- ops
221 | do.call("AnnotationFilterList", filters)
222 | }
223 |
224 | #' @rdname AnnotationFilterList
225 | #'
226 | #' @aliases distributeNegation
227 | #'
228 | #' @description
229 | #'
230 | #' @param .prior_negation \code{logical(1)} unused argument.
231 | #'
232 | #' @return \code{AnnotationFilterList} object with DeMorgan's law applied to
233 | #' it such that it is equal to the original \code{AnnotationFilterList}
234 | #' object but all \code{!}'s are distributed out of the
235 | #' \code{AnnotationFilterList} object and to the nested
236 | #' \code{AnnotationFilter} objects.
237 | #'
238 | #' @examples
239 | #' afl <- AnnotationFilter(~!(symbol == 'ADA' | symbol %startsWith% 'SNORD'))
240 | #' afl <- distributeNegation(afl)
241 | #' afl
242 | #' @export
243 | setMethod("distributeNegation", "AnnotationFilterList", .distributeNegation)
244 |
245 | #' @importFrom utils head
246 | #'
247 | #' @noRd
248 | .convertFilterList <- function(object, show, granges=list(), nested=FALSE)
249 | {
250 | filters <- value(object)
251 | result <- character(length(filters))
252 | for (i in seq_len(length(filters))) {
253 | if (is(filters[[i]], "AnnotationFilterList")) {
254 | res <- .convertFilterList(filters[[i]], show=show, granges=granges,
255 | nested=TRUE)
256 | granges <- c(granges, res[[2]])
257 | result[i] <- res[[1]]
258 | }
259 | else if (field(filters[[i]]) == "granges") {
260 | if(!show)
261 | result[i] <- .convertFilter(filters[[i]])
262 | else {
263 | nam <- paste0("GRangesFilter_", length(granges) + 1)
264 | granges <- c(granges, list(filters[[i]]))
265 | result[i] <- nam
266 | }
267 | }
268 | else
269 | result[i] <- .convertFilter(filters[[i]])
270 | }
271 |
272 | result_last <- tail(result, 1)
273 | result <- head(result, -1)
274 | result <- c(rbind(result, logicOp(object)))
275 | result <- c(result, result_last)
276 | result <- paste(result, collapse=" ")
277 | if(nested || object@not)
278 | result <- paste0("(", result, ")")
279 | if(object@not)
280 | result <- paste0("!", result)
281 |
282 | list(result, granges)
283 | }
284 |
285 | #' @rdname AnnotationFilterList
286 | #'
287 | #' @aliases convertFilter
288 | #'
289 | #' @description Converts an \code{AnnotationFilterList} object to a
290 | #' \code{character(1)} giving an equation that can be used as input to
291 | #' a \code{dplyr} filter.
292 | #'
293 | #' @return \code{character(1)} that can be used as input to a \code{dplyr}
294 | #' filter.
295 | #'
296 | #' @examples
297 | #' afl <- AnnotationFilter(~symbol=="ADA" & tx_start > "400000")
298 | #' result <- convertFilter(afl)
299 | #' result
300 | #' @export
301 | setMethod("convertFilter", signature(object = "AnnotationFilterList",
302 | db = "missing") , function(object)
303 | {
304 | result <- .convertFilterList(object, show=FALSE)
305 | result[[1]]
306 | })
307 |
308 | #' @rdname AnnotationFilterList
309 | #'
310 | #' @param object An object of class \code{AnnotationFilterList}.
311 | #'
312 | #' @importFrom utils tail
313 | #' @export
314 | setMethod("show", "AnnotationFilterList", function(object)
315 | {
316 | result <- .convertFilterList(object, show=TRUE)
317 | granges <- result[[2]]
318 | result <- result[[1]]
319 | cat("AnnotationFilterList of length", length(object), "\n")
320 | cat(result)
321 | cat("\n")
322 | for(i in seq_len(length(granges))) {
323 | cat("\n")
324 | cat("Symbol: GRangesFilter_", i, "\n", sep="")
325 | show(granges[[1]])
326 | cat("\n")
327 | }
328 | })
329 |
--------------------------------------------------------------------------------
/.svn/pristine/21/2189e6a627b4c711e766c184f50bb9cdc230e821.svn-base:
--------------------------------------------------------------------------------
1 | #' @name AnnotationFilter
2 | #'
3 | #' @title Filters for annotation objects
4 | #'
5 | #' @aliases CdsStartFilter CdsEndFilter ExonIdFilter ExonNameFilter
6 | #' ExonStartFilter ExonEndFilter ExonRankFilter GeneIdFilter
7 | #' GenenameFilter GeneBiotypeFilter GeneStartFilter GeneEndFilter
8 | #' EntrezFilter SymbolFilter TxIdFilter TxNameFilter
9 | #' TxBiotypeFilter TxStartFilter TxEndFilter ProteinIdFilter
10 | #' UniprotFilter SeqNameFilter SeqStrandFilter
11 | #' AnnotationFilter-class CharacterFilter-class
12 | #' IntegerFilter-class CdsStartFilter-class CdsEndFilter-class
13 | #' ExonIdFilter-class ExonNameFilter-class ExonStartFilter-class
14 | #' ExonEndFilter-class ExonRankFilter-class GeneIdFilter-class
15 | #' GenenameFilter-class GeneBiotypeFilter-class
16 | #' GeneStartFilter-class GeneEndFilter-class EntrezFilter-class
17 | #' SymbolFilter-class TxIdFilter-class TxNameFilter-class
18 | #' TxBiotypeFilter-class TxStartFilter-class TxEndFilter-class
19 | #' ProteinIdFilter-class UniprotFilter-class SeqNameFilter-class
20 | #' SeqStrandFilter-class supportedFilters
21 | #' show,AnnotationFilter-method show,CharacterFilter-method
22 | #' show,IntegerFilter-method show,GRangesFilter-method
23 | #'
24 | #' @description
25 | #'
26 | #' The filters extending the base \code{AnnotationFilter} class
27 | #' represent a simple filtering concept for annotation resources.
28 | #' Each filter object is thought to filter on a single (database)
29 | #' table column using the provided values and the defined condition.
30 | #'
31 | #' Filter instances created using the constructor functions (e.g.
32 | #' \code{GeneIdFilter}).
33 | #'
34 | #' \code{supportedFilters()} lists all defined filters. It returns a two column
35 | #' \code{data.frame} with the filter class name and its default field.
36 | #' Packages using \code{AnnotationFilter} should implement the
37 | #' \code{supportedFilters} for their annotation resource object (e.g. for
38 | #' \code{object = "EnsDb"} in the \code{ensembldb} package) to list all
39 | #' supported filters for the specific resource.
40 | #'
41 | #' @details
42 | #'
43 | #' By default filters are only available for tables containing the
44 | #' field on which the filter acts (i.e. that contain a column with the
45 | #' name matching the value of the \code{field} slot of the
46 | #' object). See the vignette for a description to use filters for
47 | #' databases in which the database table column name differs from the
48 | #' default \code{field} of the filter.
49 | #'
50 | #' @usage
51 | #'
52 | #' CdsStartFilter(value, condition = "==")
53 | #' CdsEndFilter(value, condition = "==")
54 | #' ExonIdFilter(value, condition = "==")
55 | #' ExonNameFilter(value, condition = "==")
56 | #' ExonRankFilter(value, condition = "==")
57 | #' ExonStartFilter(value, condition = "==")
58 | #' ExonEndFilter(value, condition = "==")
59 | #' GeneIdFilter(value, condition = "==")
60 | #' GenenameFilter(value, condition = "==")
61 | #' GeneBiotypeFilter(value, condition = "==")
62 | #' GeneStartFilter(value, condition = "==")
63 | #' GeneEndFilter(value, condition = "==")
64 | #' EntrezFilter(value, condition = "==")
65 | #' SymbolFilter(value, condition = "==")
66 | #' TxIdFilter(value, condition = "==")
67 | #' TxNameFilter(value, condition = "==")
68 | #' TxBiotypeFilter(value, condition = "==")
69 | #' TxStartFilter(value, condition = "==")
70 | #' TxEndFilter(value, condition = "==")
71 | #' ProteinIdFilter(value, condition = "==")
72 | #' UniprotFilter(value, condition = "==")
73 | #' SeqNameFilter(value, condition = "==")
74 | #' SeqStrandFilter(value, condition = "==")
75 | #'
76 | #' @param value \code{character()}, \code{integer()}, or
77 | #' \code{GRanges()} value for the filter
78 | #'
79 | #' @param condition \code{character(1)} defining the condition to be
80 | #' used in the filter. For \code{IntegerFilter}, one of
81 | #' \code{"=="}, \code{"!="}, \code{">"}, \code{"<"}, \code{">="}
82 | #' or \code{"<="}. For \code{CharacterFilter}, one of \code{"=="},
83 | #' \code{"!="}, \code{"startsWith"}, \code{"endsWith"} or \code{"contains"}.
84 | #' Default condition is \code{"=="}.
85 | #'
86 | #' @return The constructor function return an object extending
87 | #' \code{AnnotationFilter}. For the return value of the other methods see
88 | #' the methods' descriptions.
89 | #'
90 | #' @seealso \code{\link{AnnotationFilterList}} for combining
91 | #' \code{AnnotationFilter} objects.
92 | NULL
93 |
94 | .CONDITION <- list(
95 | IntegerFilter = c("==", "!=", ">", "<", ">=", "<="),
96 | CharacterFilter = c("==", "!=", "startsWith", "endsWith", "contains"),
97 | GRangesFilter = c("any", "start", "end", "within", "equal")
98 | )
99 |
100 | .FIELD <- list(
101 | CharacterFilter = c(
102 | "exon_id", "exon_name", "gene_id", "genename", "gene_biotype",
103 | "entrez", "symbol", "tx_id", "tx_name", "tx_biotype",
104 | "protein_id", "uniprot", "seq_name", "seq_strand"),
105 | IntegerFilter = c(
106 | "cds_start", "cds_end", "exon_start", "exon_rank", "exon_end",
107 | "gene_start", "gene_end", "tx_start", "tx_end")
108 | )
109 |
110 | .valid_condition <- function(condition, class) {
111 | txt <- character()
112 |
113 | test0 <- length(condition) == 1L
114 | if (!test0)
115 | txt <- c(txt, "'condition' must be length 1")
116 |
117 | test1 <- test0 && (condition %in% .CONDITION[[class]])
118 | if (!test1) {
119 | value <- paste(sQuote(.CONDITION[[class]]), collapse=" ")
120 | txt <- c(txt, paste0("'", condition, "' must be in ", value))
121 | }
122 |
123 | if (length(txt)) txt else TRUE
124 | }
125 |
126 | ############################################################
127 | ## AnnotationFilter
128 | ##
129 |
130 | #' @exportClass AnnotationFilter
131 | .AnnotationFilter <- setClass(
132 | "AnnotationFilter",
133 | contains = "VIRTUAL",
134 | slots = c(
135 | field="character",
136 | condition="character",
137 | value="ANY"
138 | ),
139 | prototype=list(
140 | condition= "=="
141 | )
142 | )
143 |
144 | setValidity("AnnotationFilter", function(object) {
145 | txt <- character()
146 |
147 | value <- .value(object)
148 | condition <- .condition(object)
149 | test_len <- length(condition) == 1L
150 | test_NA <- !any(is.na(condition))
151 |
152 | if (test_len && !test_NA)
153 | txt <- c(txt, "'condition' can not be NA")
154 | test0 <- test_len && test_NA
155 |
156 | test1 <- condition %in% c("startsWith", "endsWith", "contains", ">",
157 | "<", ">=", "<=")
158 | if (test0 && test1 && length(value) > 1L)
159 | txt <- c(txt, paste0("'", condition, "' requires length 1 'value'"))
160 |
161 | if (any(is.na(value)))
162 | txt <- c(txt, "'value' can not be NA")
163 |
164 | if (length(txt)) txt else TRUE
165 | })
166 |
167 | .field <- function(object) object@field
168 |
169 | .condition <- function(object) object@condition
170 |
171 | .value <- function(object) object@value
172 |
173 | #' @rdname AnnotationFilter
174 | #'
175 | #' @aliases condition
176 | #'
177 | #' @description \code{condition()} get the \code{condition} value for
178 | #' the filter \code{object}.
179 | #'
180 | #' @param object An \code{AnnotationFilter} object.
181 | #'
182 | #' @export
183 | setMethod("condition", "AnnotationFilter", .condition)
184 |
185 | #' @rdname AnnotationFilter
186 | #'
187 | #' @aliases value
188 | #'
189 | #' @description \code{value()} get the \code{value} for the filter
190 | #' \code{object}.
191 | #'
192 | #' @export
193 | setMethod("value", "AnnotationFilter", .value)
194 |
195 | #' @rdname AnnotationFilter
196 | #'
197 | #' @aliases field
198 | #'
199 | #' @description \code{field()} get the \code{field} for the filter
200 | #' \code{object}.
201 | #'
202 | #' @export
203 | setMethod("field", "AnnotationFilter", .field)
204 |
205 | #' @importFrom methods show
206 | #'
207 | #' @export
208 | setMethod("show", "AnnotationFilter", function(object){
209 | cat("class:", class(object),
210 | "\ncondition:", .condition(object), "\n")
211 | })
212 |
213 | ############################################################
214 | ## CharacterFilter, IntegerFilter
215 | ##
216 |
217 | #' @exportClass CharacterFilter
218 | .CharacterFilter <- setClass(
219 | "CharacterFilter",
220 | contains = c("VIRTUAL", "AnnotationFilter"),
221 | slots = c(value = "character"),
222 | prototype = list(
223 | value = character()
224 | )
225 | )
226 |
227 | setValidity("CharacterFilter", function(object) {
228 | .valid_condition(.condition(object), "CharacterFilter")
229 | })
230 |
231 | #' @importFrom methods show callNextMethod
232 | #'
233 | #' @export
234 | setMethod("show", "CharacterFilter", function(object) {
235 | callNextMethod()
236 | cat("value:", .value(object), "\n")
237 | })
238 |
239 | #' @exportClass IntegerFilter
240 | .IntegerFilter <- setClass(
241 | "IntegerFilter",
242 | contains = c("VIRTUAL", "AnnotationFilter"),
243 | slots = c(value = "integer"),
244 | prototype = list(
245 | value = integer()
246 | )
247 | )
248 |
249 | setValidity("IntegerFilter", function(object) {
250 | .valid_condition(.condition(object), "IntegerFilter")
251 | })
252 |
253 | #' @export
254 | setMethod("show", "IntegerFilter", function(object) {
255 | callNextMethod()
256 | cat("value:", .value(object), "\n")
257 | })
258 |
259 | #' @rdname AnnotationFilter
260 | #'
261 | #' @importFrom GenomicRanges GRanges
262 | #'
263 | #' @importClassesFrom GenomicRanges GRanges
264 | #'
265 | #' @exportClass GRangesFilter
266 | .GRangesFilter <- setClass(
267 | "GRangesFilter",
268 | contains = "AnnotationFilter",
269 | slots = c(
270 | value = "GRanges",
271 | feature = "character"
272 | ),
273 | prototype = list(
274 | value = GRanges(),
275 | condition = "any",
276 | field = "granges",
277 | feature = "gene"
278 | )
279 | )
280 |
281 | setValidity("GRangesFilter", function(object) {
282 | .valid_condition(.condition(object), "GRangesFilter")
283 | })
284 |
285 | .feature <- function(object) object@feature
286 |
287 | #' @rdname AnnotationFilter
288 | #'
289 | #' @param type \code{character(1)} indicating how overlaps are to be
290 | #' filtered. See \code{findOverlaps} in the IRanges package for a
291 | #' description of this argument.
292 | #'
293 | #' @examples
294 | #' ## filter by GRanges
295 | #' GRangesFilter(GenomicRanges::GRanges("chr10:87869000-87876000"))
296 | #' @export
297 | GRangesFilter <-
298 | function(value, feature = "gene",
299 | type = c("any", "start", "end", "within", "equal"))
300 | {
301 | condition <- match.arg(type)
302 | .GRangesFilter(
303 | field = "granges",
304 | value = value,
305 | condition = condition,
306 | feature = feature)
307 | }
308 |
309 | .feature <- function(object) object@feature
310 |
311 | #' @aliases feature
312 | #'
313 | #' @description \code{feature()} get the \code{feature} for the
314 | #' \code{GRangesFilter} \code{object}.
315 | #'
316 | #' @rdname AnnotationFilter
317 | #'
318 | #' @export
319 | feature <- .feature
320 |
321 | #' @importFrom GenomicRanges show
322 | #'
323 | #' @export
324 | setMethod("show", "GRangesFilter", function(object) {
325 | callNextMethod()
326 | cat("feature:", .feature(object),
327 | "\nvalue:\n")
328 | show(value(object))
329 | })
330 |
331 |
332 | ############################################################
333 | ## Create install-time classes
334 | ##
335 |
336 | #' @rdname AnnotationFilter
337 | #'
338 | #' @name AnnotationFilter
339 | #'
340 | #' @param feature \code{character(1)} defining on what feature the
341 | #' \code{GRangesFilter} should be applied. Choices could be
342 | #' \code{"gene"}, \code{"tx"} or \code{"exon"}.
343 | #'
344 | #' @examples
345 | #' ## Create a SymbolFilter to filter on a gene's symbol.
346 | #' sf <- SymbolFilter("BCL2")
347 | #' sf
348 | #'
349 | #' ## Create a GeneStartFilter to filter based on the genes' chromosomal start
350 | #' ## coordinates
351 | #' gsf <- GeneStartFilter(10000, condition = ">")
352 | #' gsf
353 | #'
354 | #' @export CdsStartFilter CdsEndFilter ExonIdFilter ExonNameFilter
355 | #' @export ExonStartFilter ExonEndFilter ExonRankFilter GeneIdFilter
356 | #' @export GenenameFilter GeneBiotypeFilter GeneStartFilter
357 | #' @export GeneEndFilter EntrezFilter SymbolFilter TxIdFilter
358 | #' @export TxNameFilter TxBiotypeFilter TxStartFilter TxEndFilter
359 | #' @export ProteinIdFilter UniprotFilter SeqNameFilter SeqStrandFilter
360 | #'
361 | #' @importFrom methods new
362 | #'
363 | #' @exportClass CdsStartFilter CdsEndFilter ExonIdFilter
364 | #' ExonNameFilter ExonStartFilter ExonEndFilter ExonRankFilter
365 | #' GeneIdFilter GenenameFilter GeneBiotypeFilter GeneStartFilter
366 | #' GeneEndFilter EntrezFilter SymbolFilter TxIdFilter TxNameFilter
367 | #' TxBiotypeFilter TxStartFilter TxEndFilter ProteinIdFilter
368 | #' UniprotFilter SeqNameFilter SeqStrandFilter
369 | NULL
370 |
371 | .fieldToClass <- function(field) {
372 | class <- gsub("_([[:alpha:]])", "\\U\\1", field, perl=TRUE)
373 | class <- sub("^([[:alpha:]])", "\\U\\1", class, perl=TRUE)
374 | paste0(class, if (length(class)) "Filter" else character(0))
375 | }
376 |
377 | .filterFactory <- function(field, class) {
378 | force(field); force(class) # watch for lazy evaluation
379 | as.value <-
380 | if (field %in% .FIELD[["CharacterFilter"]]) {
381 | as.character
382 | } else {
383 | function(x) {
384 | stopifnot(is.numeric(x))
385 | as.integer(x)
386 | }
387 | }
388 |
389 | function(value, condition = "==") {
390 | value <- as.value(value)
391 | condition <- as.character(condition)
392 | new(class, field=field, condition = condition, value=value)
393 | }
394 | }
395 |
396 | local({
397 | makeClass <- function(contains) {
398 | fields <- .FIELD[[contains]]
399 | classes <- .fieldToClass(fields)
400 | for (i in seq_along(fields)) {
401 | setClass(classes[[i]], contains=contains, where=topenv())
402 | assign(
403 | classes[[i]],
404 | .filterFactory(fields[[i]], classes[[i]]),
405 | envir=topenv()
406 | )
407 | }
408 | }
409 | for (contains in names(.FIELD))
410 | makeClass(contains)
411 | })
412 |
413 | ############################################################
414 | ## Utilities - supportedFilters
415 | ##
416 |
417 | .FILTERS_WO_FIELD <- c("GRangesFilter")
418 |
419 | .supportedFilters <- function() {
420 | fields <- unlist(.FIELD, use.names=FALSE)
421 | filters <- .fieldToClass(fields)
422 | d <- data.frame(
423 | filter=c(filters, .FILTERS_WO_FIELD),
424 | field=c(fields, rep(NA, length(.FILTERS_WO_FIELD)))
425 | )
426 | d[order(d$filter),]
427 | }
428 |
429 | #' @rdname AnnotationFilter
430 | #'
431 | #' @examples
432 | #' supportedFilters()
433 | #' @export
434 | setMethod("supportedFilters", "missing", function(object) {
435 | .supportedFilters()
436 | })
437 |
--------------------------------------------------------------------------------
/.svn/pristine/4e/4e9ec76b932b7ba44f5280dec6263ea963e53920.svn-base:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Facilities for Filtering Bioconductor Annotation Resources"
3 | output:
4 | BiocStyle::html_document2:
5 | toc_float: true
6 | vignette: >
7 | %\VignetteIndexEntry{Facilities for Filtering Bioconductor Annotation resources}
8 | %\VignetteEngine{knitr::rmarkdown}
9 | %\VignetteEncoding{UTF-8}
10 | %\VignettePackage{AnnotationFilter}
11 | %\VignetteDepends{org.Hs.eg.db,BiocStyle,RSQLite}
12 | ---
13 |
14 | ```{r style, echo = FALSE, results = 'asis', message=FALSE}
15 | BiocStyle::markdown()
16 | ```
17 |
18 | **Package**: `r Biocpkg("AnnotationFilter")`
19 | **Authors**: `r packageDescription("AnnotationFilter")[["Author"]] `
20 | **Last modified:** `r file.info("AnnotationFilter.Rmd")$mtime`
21 | **Compiled**: `r date()`
22 |
23 |
24 | # Introduction
25 |
26 | A large variety of annotation resources are available in Bioconductor. Accessing
27 | the full content of these databases or even of single tables is computationally
28 | expensive and in many instances not required, as users may want to extract only
29 | sub-sets of the data e.g. genomic coordinates of a single gene. In that respect,
30 | filtering annotation resources before data extraction has a major impact on
31 | performance and increases the usability of such genome-scale databases.
32 |
33 | The `r Biocpkg("AnnotationFilter")` package was thus developed to provide basic
34 | filter classes to enable a common filtering framework for Bioconductor
35 | annotation resources. `r Biocpkg("AnnotationFilter")` defines filter classes for
36 | some of the most commonly used features in annotation databases, such as
37 | *symbol* or *genename*. Each filter class is supposed to work on a single
38 | database table column and to facilitate filtering on the provided values. Such
39 | filter classes enable the user to build complex queries to retrieve specific
40 | annotations without needing to know column or table names or the layout of the
41 | underlying databases. While initially being developed to be used in the
42 | `r Biocpkg("Organism.dplyr")` and `r Biocpkg("ensembldb")` packages, the filter
43 | classes and the related filtering concept can be easily added to other
44 | annotation packages too.
45 |
46 |
47 | # Filter classes
48 |
49 | All filter classes extend the basic `AnnotationFilter` class and take one or
50 | more *values* and a *condition* to allow filtering on a single database table
51 | column. Based on the type of the input value, filter classes are divided into:
52 |
53 | - `CharacterFilter`: takes a `character` value of length >= 1 and supports
54 | conditions `==`, `!=`, `startsWith` and `endsWith`. An example would be a
55 | `GeneIdFilter` that allows to filter on gene IDs.
56 |
57 | - `IntegerFilter`: takes a single `integer` as input and supports the conditions
58 | `==`, `!=`, `>`, `<`, `>=` and `<=`. An example would be a `GeneStartFilter`
59 | that filters results on the (chromosomal) start coordinates of genes.
60 |
61 | - `GRangesFilter`: is a special filter, as it takes a `GRanges` as `value` and
62 | performs the filtering on a combination of columns (i.e. start and end
63 | coordinate as well as sequence name and strand). To be consistent with the
64 | `findOverlaps` method from the `r Biocpkg("IRanges")` package, the constructor
65 | of the `GRangesFilter` filter takes a `type` argument to define its
66 | condition. Supported values are `"any"` (the default) that retrieves all
67 | entries overlapping the `GRanges`, `"start"` and `"end"` matching all features
68 | with the same start and end coordinate respectively, `"within"` that matches
69 | all features that are *within* the range defined by the `GRanges` and
70 | `"equal"` that returns features that are equal to the `GRanges`.
71 |
72 | The names of the filter classes are intuitive, the first part corresponding to
73 | the database column name with each character following a `_` being capitalized,
74 | followed by the key word `Filter`. The name of a filter for a database table
75 | column `gene_id` is thus called `GeneIdFilter`. The default database column for
76 | a filter is stored in its `field` slot (accessible *via* the `field` method).
77 |
78 | The `supportedFilters` method can be used to get an overview of all available
79 | filter objects defined in `AnnotationFilter`.
80 |
81 | ```{r supportedFilters}
82 | library(AnnotationFilter)
83 | supportedFilters()
84 | ```
85 |
86 | Note that the `AnnotationFilter` package does provides only the filter classes
87 | but not the functionality to apply the filtering. Such functionality is
88 | annotation resource and database layout dependent and needs thus to be
89 | implemented in the packages providing access to annotation resources.
90 |
91 |
92 | # Usage
93 |
94 | Filters are created *via* their dedicated constructor functions, such as the
95 | `GeneIdFilter` function for the `GeneIdFilter` class. Because of this simple and
96 | cheap creation, filter classes are thought to be *read-only* and thus don't
97 | provide *setter* methods to change their slot values. In addition to the
98 | constructor functions, `AnnotationFilter` provides the functionality to
99 | *translate* query expressions into filter classes (see further below for an
100 | example).
101 |
102 | Below we create a `SymbolFilter` that could be used to filter an annotation
103 | resource to retrieve all entries associated with the specified symbol value(s).
104 |
105 | ```{r symbol-filter}
106 | library(AnnotationFilter)
107 |
108 | smbl <- SymbolFilter("BCL2")
109 | smbl
110 | ```
111 |
112 | Such a filter is supposed to be used to retrieve all entries associated to
113 | features with a value in a database table column called *symbol* matching the
114 | filter's value `"BCL2"`.
115 |
116 | Using the `"startsWith"` condition we could define a filter to retrieve all
117 | entries for genes with a gene name/symbol starting with the specified value
118 | (e.g. `"BCL2"` and `"BCL2L11"` for the example below.
119 |
120 | ```{r symbol-startsWith}
121 | smbl <- SymbolFilter("BCL2", condition = "startsWith")
122 | smbl
123 | ```
124 |
125 | In addition to the constructor functions, `AnnotationFilter` provides a
126 | functionality to create filter instances in a more natural and intuitive way by
127 | *translating* filter expressions (written as a *formula*, i.e. starting with a
128 | `~`).
129 |
130 | ```{r convert-expression}
131 | smbl <- AnnotationFilter(~ symbol == "BCL2")
132 | smbl
133 | ```
134 |
135 | Individual `AnnotationFilter` objects can be combined in an
136 | `AnnotationFilterList`. This class extends `list` and provides an additional
137 | `logicOp()` that defines how its individual filters are supposed to be
138 | combined. The length of `logicOp()` has to be 1 less than the number of filter
139 | objects. Each element in `logicOp()` defines how two consecutive filters should
140 | be combined. Below we create a `AnnotationFilterList` containing two filter
141 | objects to be combined with a logical *AND*.
142 |
143 | ```{r convert-multi-expression}
144 | flt <- AnnotationFilter(~ symbol == "BCL2" &
145 | tx_biotype == "protein_coding")
146 | flt
147 | ```
148 |
149 | Note that the `AnnotationFilter` function does not (yet) support translation of
150 | nested expressions, such as `(symbol == "BCL2L11" & tx_biotype ==
151 | "nonsense_mediated_decay") | (symbol == "BCL2" & tx_biotype ==
152 | "protein_coding")`. Such queries can however be build by nesting
153 | `AnnotationFilterList` classes.
154 |
155 | ```{r nested-query}
156 | ## Define the filter query for the first pair of filters.
157 | afl1 <- AnnotationFilterList(SymbolFilter("BCL2L11"),
158 | TxBiotypeFilter("nonsense_mediated_decay"))
159 | ## Define the second filter pair in ( brackets should be combined.
160 | afl2 <- AnnotationFilterList(SymbolFilter("BCL2"),
161 | TxBiotypeFilter("protein_coding"))
162 | ## Now combine both with a logical OR
163 | afl <- AnnotationFilterList(afl1, afl2, logicOp = "|")
164 |
165 | afl
166 | ```
167 |
168 | This `AnnotationFilterList` would now select all entries for all transcripts of
169 | the gene *BCL2L11* with the biotype *nonsense_mediated_decay* or for all protein
170 | coding transcripts of the gene *BCL2*.
171 |
172 |
173 | # Using `AnnotationFilter` in other packages
174 |
175 | The `AnnotationFilter` package does only provide filter classes, but no
176 | filtering functionality. This has to be implemented in the package using the
177 | filters. In this section we first show in a very simple example how
178 | `AnnotationFilter` classes could be used to filter a `data.frame` and
179 | subsequently explore how a simple filter framework could be implemented for a
180 | SQL based annotation resources.
181 |
182 | Let's first define a simple `data.frame` containing the data we want to
183 | filter. Note that subsetting this `data.frame` using `AnnotationFilter` is
184 | obviously not the best solution, but it should help to understand the basic
185 | concept.
186 |
187 | ```{r define-data.frame}
188 | ## Define a simple gene table
189 | gene <- data.frame(gene_id = 1:10,
190 | symbol = c(letters[1:9], "b"),
191 | seq_name = paste0("chr", c(1, 4, 4, 8, 1, 2, 5, 3, "X", 4)),
192 | stringsAsFactors = FALSE)
193 | gene
194 | ```
195 |
196 | Next we generate a `SymbolFilter` and inspect what information we can extract
197 | from it.
198 |
199 | ```{r simple-symbol}
200 | smbl <- SymbolFilter("b")
201 | ```
202 |
203 | We can access the filter *condition* using the `condition` method
204 |
205 | ```{r simple-symbol-condition}
206 | condition(smbl)
207 | ```
208 |
209 | The value of the filter using the `value` method
210 |
211 | ```{r simple-symbol-value}
212 | value(smbl)
213 | ```
214 |
215 | And finally the *field* (i.e. column in the data table) using the `field`
216 | method.
217 |
218 | ```{r simple-symbol-field}
219 | field(smbl)
220 | ```
221 |
222 | With this information we can define a simple function that takes the data table
223 | and the filter as input and returns a `logical` with length equal to the number
224 | of rows of the table, `TRUE` for rows matching the filter.
225 |
226 | ```{r doMatch}
227 |
228 | doMatch <- function(x, filter) {
229 | do.call(condition(filter), list(x[, field(filter)], value(filter)))
230 | }
231 |
232 | ## Apply this function
233 | doMatch(gene, smbl)
234 |
235 | ```
236 |
237 | Note that this simple function does not support multiple filters and also not
238 | conditions `"startsWith"` or `"endsWith"`. Next we define a second function that
239 | extracts the relevant data from the data resource.
240 |
241 | ```{r doExtract}
242 |
243 | doExtract <- function(x, filter) {
244 | x[doMatch(x, filter), ]
245 | }
246 |
247 | ## Apply it on the data
248 | doExtract(gene, smbl)
249 | ```
250 |
251 | We could even modify the `doMatch` function to enable filter expressions.
252 |
253 | ```{r doMatch-formula}
254 |
255 | doMatch <- function(x, filter) {
256 | if (is(filter, "formula"))
257 | filter <- AnnotationFilter(filter)
258 | do.call(condition(filter), list(x[, field(filter)], value(filter)))
259 | }
260 |
261 | doExtract(gene, ~ gene_id == '2')
262 |
263 | ```
264 |
265 | For such simple examples `AnnotationFilter` might be an overkill as the same
266 | could be achieved (much simpler) using standard R operations. A real case
267 | scenario in which `AnnotationFilter` becomes useful are SQL-based annotation
268 | resources. We will thus explore next how SQL resources could be filtered using
269 | `AnnotationFilter`.
270 |
271 | We use the SQLite database from the `r Biocpkg("org.Hs.eg.db")` package that
272 | provides a variety of annotations for all human genes. Using the packages'
273 | connection to the database we inspect first what database tables are available
274 | and then select one for our simple filtering example.
275 |
276 | We use an `EnsDb` SQLite database used by the `r Biocpkg("ensembldb")` package
277 | and implement simple filter functions to extract specific data from one of its
278 | database tables. We thus load below the `EnsDb.Hsapiens.v75` package that
279 | provides access to human gene, transcript, exon and protein annotations. Using
280 | its connection to the database we inspect first what database tables are
281 | available and then what *fields* (i.e. columns) the *gene* table has.
282 |
283 | ```{r orgDb, message = FALSE}
284 | ## Load the required packages
285 | library(org.Hs.eg.db)
286 | library(RSQLite)
287 | ## Get the database connection
288 | dbcon <- org.Hs.eg_dbconn()
289 |
290 | ## What tables do we have?
291 | dbListTables(dbcon)
292 | ```
293 |
294 | `org.Hs.eg.db` provides many different tables, one for each identifier or
295 | annotation resource. We will use the *gene_info* table and determine which
296 | *fields* (i.e. columns) the table provides.
297 |
298 | ```{r gene_info}
299 | ## What fields are there in the gene_info table?
300 | dbListFields(dbcon, "gene_info")
301 | ```
302 |
303 | The *gene_info* table provides the official gene symbol and the gene name. The
304 | column *symbol* matches the default `field` value of the `SymbolFilter`. For the
305 | `GenenameFilter` we would have to re-map its default field `"genename"` to the
306 | database column *gene_name*. There are many possibilities to do this, one would
307 | be to implement an own function to extract the field from the `AnnotationFilter`
308 | classes specific to the database. This function eventually renames the extracted
309 | field value to match the corresponding name of the database column name.
310 |
311 | We next implement a simple `doExtractGene` function that retrieves data from the
312 | *gene_info* table and re-uses the `doFilter` function to extract specific
313 | data. The parameter `x` is now the database connection object.
314 |
315 | ```{r doExtractSQL}
316 |
317 | doExtractGene <- function(x, filter) {
318 | gene <- dbGetQuery(x, "select * from gene_info")
319 | doExtract(gene, filter)
320 | }
321 |
322 | ## Extract all entries for BCL2
323 | bcl2 <- doExtractGene(dbcon, SymbolFilter("BCL2"))
324 |
325 | bcl2
326 | ```
327 |
328 | This works, but is not really efficient, since the function first fetches the
329 | full database table and subsets it only afterwards. A much more efficient
330 | solution is to *translate* the `AnnotationFilter` class(es) to an SQL *where*
331 | condition and hence perform the filtering on the database level. Here we have to
332 | do some small modifications, since not all condition values can be used 1:1 in
333 | SQL calls. The condition `"=="` has for example to be converted into `"="` and
334 | the `"startsWith"` into a SQL `"like"` by adding also a `"%"` wildcard to the
335 | value of the filter. We would also have to deal with filters that have a `value`
336 | of length > 1. A `SymbolFilter` with a `value` being `c("BCL2", "BCL2L11")`
337 | would for example have to be converted to a SQL call `"symbol in
338 | ('BCL2','BCL2L11')"`. Here we skip these special cases and define a simple
339 | function that translates an `AnnotationFilter` to a *where* condition to be
340 | included into the SQL call. Depending on whether the filter extends
341 | `CharacterFilter` or `IntegerFilter` the value has also to be quoted.
342 |
343 | ```{r simpleSQL}
344 |
345 | ## Define a simple function that covers some condition conversion
346 | conditionForSQL <- function(x) {
347 | switch(x,
348 | "==" = "=",
349 | x)
350 | }
351 |
352 | ## Define a function to translate a filter into an SQL where condition.
353 | ## Character values have to be quoted.
354 | where <- function(x) {
355 | if (is(x, "CharacterFilter"))
356 | value <- paste0("'", value(x), "'")
357 | else value <- value(x)
358 | paste0(field(x), conditionForSQL(condition(x)), value)
359 | }
360 |
361 | ## Now "translate" a filter using this function
362 | where(SeqNameFilter("Y"))
363 |
364 | ```
365 |
366 | Next we implement a new function which integrates the filter into the SQL call
367 | to let the database server take care of the filtering.
368 |
369 | ```{r doExtractGene2}
370 |
371 | ## Define a function that
372 | doExtractGene2 <- function(x, filter) {
373 | if (is(filter, "formula"))
374 | filter <- AnnotationFilter(filter)
375 | query <- paste0("select * from gene_info where ", where(filter))
376 | dbGetQuery(x, query)
377 | }
378 |
379 | bcl2 <- doExtractGene2(dbcon, ~ symbol == "BCL2")
380 | bcl2
381 |
382 | ```
383 |
384 | Below we compare the performance of both approaches.
385 |
386 | ```{r performance}
387 | system.time(doExtractGene(dbcon, ~ symbol == "BCL2"))
388 |
389 | system.time(doExtractGene2(dbcon, ~ symbol == "BCL2"))
390 |
391 | ```
392 |
393 | Not surprisingly, the second approach is much faster.
394 |
395 | Be aware that the examples shown here are only for illustration purposes. In a
396 | real world situation additional factors, like combinations of filters, which
397 | database tables to join, which columns to be returned etc would have to be
398 | considered too.
399 |
400 | # Session information
401 |
402 | ```{r si}
403 | sessionInfo()
404 | ```
405 |
--------------------------------------------------------------------------------
/vignettes/AnnotationFilter.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Facilities for Filtering Bioconductor Annotation Resources"
3 | output:
4 | BiocStyle::html_document:
5 | toc_float: true
6 | vignette: >
7 | %\VignetteIndexEntry{Facilities for Filtering Bioconductor Annotation resources}
8 | %\VignetteEngine{knitr::rmarkdown}
9 | %\VignetteEncoding{UTF-8}
10 | %\VignettePackage{AnnotationFilter}
11 | %\VignetteDepends{org.Hs.eg.db,BiocStyle,RSQLite}
12 | ---
13 |
14 | ```{r style, echo = FALSE, results = 'asis', message=FALSE}
15 | BiocStyle::markdown()
16 | ```
17 |
18 | **Package**: `r Biocpkg("AnnotationFilter")`
19 | **Authors**: `r packageDescription("AnnotationFilter")[["Author"]] `
20 | **Last modified:** `r file.info("AnnotationFilter.Rmd")$mtime`
21 | **Compiled**: `r date()`
22 |
23 |
24 | # Introduction
25 |
26 | A large variety of annotation resources are available in Bioconductor. Accessing
27 | the full content of these databases or even of single tables is computationally
28 | expensive and in many instances not required, as users may want to extract only
29 | sub-sets of the data e.g. genomic coordinates of a single gene. In that respect,
30 | filtering annotation resources before data extraction has a major impact on
31 | performance and increases the usability of such genome-scale databases.
32 |
33 | The `r Biocpkg("AnnotationFilter")` package was thus developed to provide basic
34 | filter classes to enable a common filtering framework for Bioconductor
35 | annotation resources. `r Biocpkg("AnnotationFilter")` defines filter classes for
36 | some of the most commonly used features in annotation databases, such as
37 | *symbol* or *genename*. Each filter class is supposed to work on a single
38 | database table column and to facilitate filtering on the provided values. Such
39 | filter classes enable the user to build complex queries to retrieve specific
40 | annotations without needing to know column or table names or the layout of the
41 | underlying databases. While initially being developed to be used in the
42 | `r Biocpkg("Organism.dplyr")` and `r Biocpkg("ensembldb")` packages, the filter
43 | classes and the related filtering concept can be easily added to other
44 | annotation packages too.
45 |
46 |
47 | # Filter classes
48 |
49 | All filter classes extend the basic `AnnotationFilter` class and take one or
50 | more *values* and a *condition* to allow filtering on a single database table
51 | column. Based on the type of the input value, filter classes are divided into:
52 |
53 | - `CharacterFilter`: takes a `character` value of length >= 1 and supports
54 | conditions `==`, `!=`, `startsWith` and `endsWith`. An example would be a
55 | `GeneIdFilter` that allows to filter on gene IDs.
56 |
57 | - `IntegerFilter`: takes a single `integer` as input and supports the conditions
58 | `==`, `!=`, `>`, `<`, `>=` and `<=`. An example would be a `GeneStartFilter`
59 | that filters results on the (chromosomal) start coordinates of genes.
60 |
61 | - `DoubleFilter`: takes a single `numeric` as input and supports the conditions
62 | `==`, `!=`, `>`, `<`, `>=` and `<=`.
63 |
64 | - `GRangesFilter`: is a special filter, as it takes a `GRanges` as `value` and
65 | performs the filtering on a combination of columns (i.e. start and end
66 | coordinate as well as sequence name and strand). To be consistent with the
67 | `findOverlaps` method from the `r Biocpkg("IRanges")` package, the constructor
68 | of the `GRangesFilter` filter takes a `type` argument to define its
69 | condition. Supported values are `"any"` (the default) that retrieves all
70 | entries overlapping the `GRanges`, `"start"` and `"end"` matching all features
71 | with the same start and end coordinate respectively, `"within"` that matches
72 | all features that are *within* the range defined by the `GRanges` and
73 | `"equal"` that returns features that are equal to the `GRanges`.
74 |
75 | The names of the filter classes are intuitive, the first part corresponding to
76 | the database column name with each character following a `_` being capitalized,
77 | followed by the key word `Filter`. The name of a filter for a database table
78 | column `gene_id` is thus called `GeneIdFilter`. The default database column for
79 | a filter is stored in its `field` slot (accessible *via* the `field` method).
80 |
81 | The `supportedFilters` method can be used to get an overview of all available
82 | filter objects defined in `AnnotationFilter`.
83 |
84 | ```{r supportedFilters}
85 | library(AnnotationFilter)
86 | supportedFilters()
87 | ```
88 |
89 | Note that the `AnnotationFilter` package does provides only the filter classes
90 | but not the functionality to apply the filtering. Such functionality is
91 | annotation resource and database layout dependent and needs thus to be
92 | implemented in the packages providing access to annotation resources.
93 |
94 |
95 | # Usage
96 |
97 | Filters are created *via* their dedicated constructor functions, such as the
98 | `GeneIdFilter` function for the `GeneIdFilter` class. Because of this simple and
99 | cheap creation, filter classes are thought to be *read-only* and thus don't
100 | provide *setter* methods to change their slot values. In addition to the
101 | constructor functions, `AnnotationFilter` provides the functionality to
102 | *translate* query expressions into filter classes (see further below for an
103 | example).
104 |
105 | Below we create a `SymbolFilter` that could be used to filter an annotation
106 | resource to retrieve all entries associated with the specified symbol value(s).
107 |
108 | ```{r symbol-filter}
109 | library(AnnotationFilter)
110 |
111 | smbl <- SymbolFilter("BCL2")
112 | smbl
113 | ```
114 |
115 | Such a filter is supposed to be used to retrieve all entries associated to
116 | features with a value in a database table column called *symbol* matching the
117 | filter's value `"BCL2"`.
118 |
119 | Using the `"startsWith"` condition we could define a filter to retrieve all
120 | entries for genes with a gene name/symbol starting with the specified value
121 | (e.g. `"BCL2"` and `"BCL2L11"` for the example below.
122 |
123 | ```{r symbol-startsWith}
124 | smbl <- SymbolFilter("BCL2", condition = "startsWith")
125 | smbl
126 | ```
127 |
128 | In addition to the constructor functions, `AnnotationFilter` provides a
129 | functionality to create filter instances in a more natural and intuitive way by
130 | *translating* filter expressions (written as a *formula*, i.e. starting with a
131 | `~`).
132 |
133 | ```{r convert-expression}
134 | smbl <- AnnotationFilter(~ symbol == "BCL2")
135 | smbl
136 | ```
137 |
138 | Individual `AnnotationFilter` objects can be combined in an
139 | `AnnotationFilterList`. This class extends `list` and provides an additional
140 | `logicOp()` that defines how its individual filters are supposed to be
141 | combined. The length of `logicOp()` has to be 1 less than the number of filter
142 | objects. Each element in `logicOp()` defines how two consecutive filters should
143 | be combined. Below we create a `AnnotationFilterList` containing two filter
144 | objects to be combined with a logical *AND*.
145 |
146 | ```{r convert-multi-expression}
147 | flt <- AnnotationFilter(~ symbol == "BCL2" &
148 | tx_biotype == "protein_coding")
149 | flt
150 | ```
151 |
152 | Note that the `AnnotationFilter` function does not (yet) support translation of
153 | nested expressions, such as `(symbol == "BCL2L11" & tx_biotype ==
154 | "nonsense_mediated_decay") | (symbol == "BCL2" & tx_biotype ==
155 | "protein_coding")`. Such queries can however be build by nesting
156 | `AnnotationFilterList` classes.
157 |
158 | ```{r nested-query}
159 | ## Define the filter query for the first pair of filters.
160 | afl1 <- AnnotationFilterList(SymbolFilter("BCL2L11"),
161 | TxBiotypeFilter("nonsense_mediated_decay"))
162 | ## Define the second filter pair in ( brackets should be combined.
163 | afl2 <- AnnotationFilterList(SymbolFilter("BCL2"),
164 | TxBiotypeFilter("protein_coding"))
165 | ## Now combine both with a logical OR
166 | afl <- AnnotationFilterList(afl1, afl2, logicOp = "|")
167 |
168 | afl
169 | ```
170 |
171 | This `AnnotationFilterList` would now select all entries for all transcripts of
172 | the gene *BCL2L11* with the biotype *nonsense_mediated_decay* or for all protein
173 | coding transcripts of the gene *BCL2*.
174 |
175 |
176 | # Using `AnnotationFilter` in other packages
177 |
178 | The `AnnotationFilter` package does only provide filter classes, but no
179 | filtering functionality. This has to be implemented in the package using the
180 | filters. In this section we first show in a very simple example how
181 | `AnnotationFilter` classes could be used to filter a `data.frame` and
182 | subsequently explore how a simple filter framework could be implemented for a
183 | SQL based annotation resources.
184 |
185 | Let's first define a simple `data.frame` containing the data we want to
186 | filter. Note that subsetting this `data.frame` using `AnnotationFilter` is
187 | obviously not the best solution, but it should help to understand the basic
188 | concept.
189 |
190 | ```{r define-data.frame}
191 | ## Define a simple gene table
192 | gene <- data.frame(gene_id = 1:10,
193 | symbol = c(letters[1:9], "b"),
194 | seq_name = paste0("chr", c(1, 4, 4, 8, 1, 2, 5, 3, "X", 4)),
195 | stringsAsFactors = FALSE)
196 | gene
197 | ```
198 |
199 | Next we generate a `SymbolFilter` and inspect what information we can extract
200 | from it.
201 |
202 | ```{r simple-symbol}
203 | smbl <- SymbolFilter("b")
204 | ```
205 |
206 | We can access the filter *condition* using the `condition` method
207 |
208 | ```{r simple-symbol-condition}
209 | condition(smbl)
210 | ```
211 |
212 | The value of the filter using the `value` method
213 |
214 | ```{r simple-symbol-value}
215 | value(smbl)
216 | ```
217 |
218 | And finally the *field* (i.e. column in the data table) using the `field`
219 | method.
220 |
221 | ```{r simple-symbol-field}
222 | field(smbl)
223 | ```
224 |
225 | With this information we can define a simple function that takes the data table
226 | and the filter as input and returns a `logical` with length equal to the number
227 | of rows of the table, `TRUE` for rows matching the filter.
228 |
229 | ```{r doMatch}
230 |
231 | doMatch <- function(x, filter) {
232 | do.call(condition(filter), list(x[, field(filter)], value(filter)))
233 | }
234 |
235 | ## Apply this function
236 | doMatch(gene, smbl)
237 |
238 | ```
239 |
240 | Note that this simple function does not support multiple filters and also not
241 | conditions `"startsWith"` or `"endsWith"`. Next we define a second function that
242 | extracts the relevant data from the data resource.
243 |
244 | ```{r doExtract}
245 |
246 | doExtract <- function(x, filter) {
247 | x[doMatch(x, filter), ]
248 | }
249 |
250 | ## Apply it on the data
251 | doExtract(gene, smbl)
252 | ```
253 |
254 | We could even modify the `doMatch` function to enable filter expressions.
255 |
256 | ```{r doMatch-formula}
257 |
258 | doMatch <- function(x, filter) {
259 | if (is(filter, "formula"))
260 | filter <- AnnotationFilter(filter)
261 | do.call(condition(filter), list(x[, field(filter)], value(filter)))
262 | }
263 |
264 | doExtract(gene, ~ gene_id == '2')
265 |
266 | ```
267 |
268 | For such simple examples `AnnotationFilter` might be an overkill as the same
269 | could be achieved (much simpler) using standard R operations. A real case
270 | scenario in which `AnnotationFilter` becomes useful are SQL-based annotation
271 | resources. We will thus explore next how SQL resources could be filtered using
272 | `AnnotationFilter`.
273 |
274 | We use the SQLite database from the `r Biocpkg("org.Hs.eg.db")` package that
275 | provides a variety of annotations for all human genes. Using the packages'
276 | connection to the database we inspect first what database tables are available
277 | and then select one for our simple filtering example.
278 |
279 | We use an `EnsDb` SQLite database used by the `r Biocpkg("ensembldb")` package
280 | and implement simple filter functions to extract specific data from one of its
281 | database tables. We thus load below the `EnsDb.Hsapiens.v75` package that
282 | provides access to human gene, transcript, exon and protein annotations. Using
283 | its connection to the database we inspect first what database tables are
284 | available and then what *fields* (i.e. columns) the *gene* table has.
285 |
286 | ```{r orgDb, message = FALSE}
287 | ## Load the required packages
288 | library(org.Hs.eg.db)
289 | library(RSQLite)
290 | ## Get the database connection
291 | dbcon <- org.Hs.eg_dbconn()
292 |
293 | ## What tables do we have?
294 | dbListTables(dbcon)
295 | ```
296 |
297 | `org.Hs.eg.db` provides many different tables, one for each identifier or
298 | annotation resource. We will use the *gene_info* table and determine which
299 | *fields* (i.e. columns) the table provides.
300 |
301 | ```{r gene_info}
302 | ## What fields are there in the gene_info table?
303 | dbListFields(dbcon, "gene_info")
304 | ```
305 |
306 | The *gene_info* table provides the official gene symbol and the gene name. The
307 | column *symbol* matches the default `field` value of the `SymbolFilter` as does
308 | the column *gene_name* for the *GeneNameFilter*. If the column in the database
309 | would not match the field of an `AnnotationFilter`, we would have to implement a
310 | function that maps the default field of the filter object to the database
311 | column. See the end of the section for an example.
312 |
313 | We next implement a simple `doExtractGene` function that retrieves data from the
314 | *gene_info* table and re-uses the `doFilter` function to extract specific
315 | data. The parameter `x` is now the database connection object.
316 |
317 | ```{r doExtractSQL}
318 |
319 | doExtractGene <- function(x, filter) {
320 | gene <- dbGetQuery(x, "select * from gene_info")
321 | doExtract(gene, filter)
322 | }
323 |
324 | ## Extract all entries for BCL2
325 | bcl2 <- doExtractGene(dbcon, SymbolFilter("BCL2"))
326 |
327 | bcl2
328 | ```
329 |
330 | This works, but is not really efficient, since the function first fetches the
331 | full database table and subsets it only afterwards. A much more efficient
332 | solution is to *translate* the `AnnotationFilter` class(es) to an SQL *where*
333 | condition and hence perform the filtering on the database level. Here we have to
334 | do some small modifications, since not all condition values can be used 1:1 in
335 | SQL calls. The condition `"=="` has for example to be converted into `"="` and
336 | the `"startsWith"` into a SQL `"like"` by adding also a `"%"` wildcard to the
337 | value of the filter. We would also have to deal with filters that have a `value`
338 | of length > 1. A `SymbolFilter` with a `value` being `c("BCL2", "BCL2L11")`
339 | would for example have to be converted to a SQL call `"symbol in
340 | ('BCL2','BCL2L11')"`. Here we skip these special cases and define a simple
341 | function that translates an `AnnotationFilter` to a *where* condition to be
342 | included into the SQL call. Depending on whether the filter extends
343 | `CharacterFilter` or `IntegerFilter` the value has also to be quoted.
344 |
345 | ```{r simpleSQL}
346 |
347 | ## Define a simple function that covers some condition conversion
348 | conditionForSQL <- function(x) {
349 | switch(x,
350 | "==" = "=",
351 | x)
352 | }
353 |
354 | ## Define a function to translate a filter into an SQL where condition.
355 | ## Character values have to be quoted.
356 | where <- function(x) {
357 | if (is(x, "CharacterFilter"))
358 | value <- paste0("'", value(x), "'")
359 | else value <- value(x)
360 | paste0(field(x), conditionForSQL(condition(x)), value)
361 | }
362 |
363 | ## Now "translate" a filter using this function
364 | where(SeqNameFilter("Y"))
365 |
366 | ```
367 |
368 | Next we implement a new function which integrates the filter into the SQL call
369 | to let the database server take care of the filtering.
370 |
371 | ```{r doExtractGene2}
372 |
373 | ## Define a function that
374 | doExtractGene2 <- function(x, filter) {
375 | if (is(filter, "formula"))
376 | filter <- AnnotationFilter(filter)
377 | query <- paste0("select * from gene_info where ", where(filter))
378 | dbGetQuery(x, query)
379 | }
380 |
381 | bcl2 <- doExtractGene2(dbcon, ~ symbol == "BCL2")
382 | bcl2
383 |
384 | ```
385 |
386 | Below we compare the performance of both approaches.
387 |
388 | ```{r performance}
389 | system.time(doExtractGene(dbcon, ~ symbol == "BCL2"))
390 |
391 | system.time(doExtractGene2(dbcon, ~ symbol == "BCL2"))
392 |
393 | ```
394 |
395 | Not surprisingly, the second approach is much faster.
396 |
397 | Be aware that the examples shown here are only for illustration purposes. In a
398 | real world situation additional factors, like combinations of filters, which
399 | database tables to join, which columns to be returned etc would have to be
400 | considered too.
401 |
402 | What if the database column on which we want to filter does not match the
403 | `field` of an `AnnotatioFilter`? If for example the database column is named
404 | *hgnc_symbol* instead of *symbol* we could for example package-internally
405 | overwrite the default `field` method for `SymbolFilter` to return the correct
406 | field for the database column.
407 |
408 | ```{r symbol-overwrite}
409 | ## Default method from AnnotationFilter:
410 | field(SymbolFilter("a"))
411 |
412 | ## Overwrite the default method.
413 | setMethod("field", "SymbolFilter", function(object, ...) "hgnc_symbol")
414 |
415 | ## Call to field returns now the "correct" database column
416 | field(SymbolFilter("a"))
417 |
418 | ```
419 |
420 |
421 | # Session information
422 |
423 | ```{r si}
424 | sessionInfo()
425 | ```
426 |
--------------------------------------------------------------------------------
/R/AnnotationFilter.R:
--------------------------------------------------------------------------------
1 | #' @name AnnotationFilter
2 | #'
3 | #' @title Filters for annotation objects
4 | #'
5 | #' @aliases CdsStartFilter CdsEndFilter ExonIdFilter ExonNameFilter
6 | #' ExonStartFilter ExonEndFilter ExonRankFilter GeneIdFilter
7 | #' GeneNameFilter GeneBiotypeFilter GeneStartFilter GeneEndFilter
8 | #' EntrezFilter SymbolFilter TxIdFilter TxNameFilter
9 | #' TxBiotypeFilter TxStartFilter TxEndFilter ProteinIdFilter
10 | #' UniprotFilter SeqNameFilter SeqStrandFilter
11 | #' AnnotationFilter-class CharacterFilter-class DoubleFilter-class
12 | #' IntegerFilter-class CdsStartFilter-class CdsEndFilter-class
13 | #' ExonIdFilter-class ExonNameFilter-class ExonStartFilter-class
14 | #' ExonEndFilter-class ExonRankFilter-class GeneIdFilter-class
15 | #' GeneNameFilter-class GeneBiotypeFilter-class
16 | #' GeneStartFilter-class GeneEndFilter-class EntrezFilter-class
17 | #' SymbolFilter-class TxIdFilter-class TxNameFilter-class
18 | #' TxBiotypeFilter-class TxStartFilter-class TxEndFilter-class
19 | #' ProteinIdFilter-class UniprotFilter-class SeqNameFilter-class
20 | #' SeqStrandFilter-class supportedFilters
21 | #' show,AnnotationFilter-method show,CharacterFilter-method
22 | #' show,IntegerFilter-method show,GRangesFilter-method
23 | #' show,DoubleFilter-method
24 | #'
25 | #' @description
26 | #'
27 | #' The filters extending the base \code{AnnotationFilter} class
28 | #' represent a simple filtering concept for annotation resources.
29 | #' Each filter object is thought to filter on a single (database)
30 | #' table column using the provided values and the defined condition.
31 | #'
32 | #' Filter instances created using the constructor functions (e.g.
33 | #' \code{GeneIdFilter}).
34 | #'
35 | #' \code{supportedFilters()} lists all defined filters. It returns a two column
36 | #' \code{data.frame} with the filter class name and its default field.
37 | #' Packages using \code{AnnotationFilter} should implement the
38 | #' \code{supportedFilters} for their annotation resource object (e.g. for
39 | #' \code{object = "EnsDb"} in the \code{ensembldb} package) to list all
40 | #' supported filters for the specific resource.
41 | #'
42 | #' @details
43 | #'
44 | #' By default filters are only available for tables containing the
45 | #' field on which the filter acts (i.e. that contain a column with the
46 | #' name matching the value of the \code{field} slot of the
47 | #' object). See the vignette for a description to use filters for
48 | #' databases in which the database table column name differs from the
49 | #' default \code{field} of the filter.
50 | #'
51 | #' @usage
52 | #'
53 | #' CdsStartFilter(value, condition = "==", not = FALSE)
54 | #' CdsEndFilter(value, condition = "==", not = FALSE)
55 | #' ExonIdFilter(value, condition = "==", not = FALSE)
56 | #' ExonNameFilter(value, condition = "==", not = FALSE)
57 | #' ExonRankFilter(value, condition = "==", not = FALSE)
58 | #' ExonStartFilter(value, condition = "==", not = FALSE)
59 | #' ExonEndFilter(value, condition = "==", not = FALSE)
60 | #' GeneIdFilter(value, condition = "==", not = FALSE)
61 | #' GeneNameFilter(value, condition = "==", not = FALSE)
62 | #' GeneBiotypeFilter(value, condition = "==", not = FALSE)
63 | #' GeneStartFilter(value, condition = "==", not = FALSE)
64 | #' GeneEndFilter(value, condition = "==", not = FALSE)
65 | #' EntrezFilter(value, condition = "==", not = FALSE)
66 | #' SymbolFilter(value, condition = "==", not = FALSE)
67 | #' TxIdFilter(value, condition = "==", not = FALSE)
68 | #' TxNameFilter(value, condition = "==", not = FALSE)
69 | #' TxBiotypeFilter(value, condition = "==", not = FALSE)
70 | #' TxStartFilter(value, condition = "==", not = FALSE)
71 | #' TxEndFilter(value, condition = "==", not = FALSE)
72 | #' ProteinIdFilter(value, condition = "==", not = FALSE)
73 | #' UniprotFilter(value, condition = "==", not = FALSE)
74 | #' SeqNameFilter(value, condition = "==", not = FALSE)
75 | #' SeqStrandFilter(value, condition = "==", not = FALSE)
76 | #'
77 | #' @param value \code{character()}, \code{integer()}, or
78 | #' \code{GRanges()} value for the filter
79 | #'
80 | #' @param condition \code{character(1)} defining the condition to be
81 | #' used in the filter. For \code{IntegerFilter} or \code{DoubleFilter},
82 | #' one of \code{"=="}, \code{"!="}, \code{">"}, \code{"<"}, \code{">="}
83 | #' or \code{"<="}. For \code{CharacterFilter}, one of \code{"=="},
84 | #' \code{"!="}, \code{"startsWith"}, \code{"endsWith"} or \code{"contains"}.
85 | #' Default condition is \code{"=="}.
86 | #'
87 | #' @param not \code{logical(1)} whether the \code{AnnotationFilter} is negated.
88 | #' \code{TRUE} indicates is negated (!). \code{FALSE} indicates not
89 | #' negated. Default not is \code{FALSE}.
90 | #'
91 | #' @return The constructor function return an object extending
92 | #' \code{AnnotationFilter}. For the return value of the other methods see
93 | #' the methods' descriptions.
94 | #'
95 | #' @seealso \code{\link{AnnotationFilterList}} for combining
96 | #' \code{AnnotationFilter} objects.
97 | NULL
98 |
99 | .CONDITION <- list(
100 | IntegerFilter = c("==", "!=", ">", "<", ">=", "<="),
101 | DoubleFilter = c("==", "!=", ">", "<", ">=", "<="),
102 | CharacterFilter = c("==", "!=", "startsWith", "endsWith", "contains"),
103 | GRangesFilter = c("any", "start", "end", "within", "equal")
104 | )
105 |
106 | .FIELD <- list(
107 | CharacterFilter = c(
108 | "exon_id", "exon_name", "gene_id", "gene_name", "gene_biotype",
109 | "entrez", "symbol", "tx_id", "tx_name", "tx_biotype",
110 | "protein_id", "uniprot", "seq_name", "seq_strand"),
111 | IntegerFilter = c(
112 | "cds_start", "cds_end", "exon_start", "exon_rank", "exon_end",
113 | "gene_start", "gene_end", "tx_start", "tx_end")
114 | )
115 |
116 | .valid_condition <- function(condition, class) {
117 | txt <- character()
118 |
119 | test0 <- length(condition) == 1L
120 | if (!test0)
121 | txt <- c(txt, "'condition' must be length 1")
122 |
123 | test1 <- test0 && (condition %in% .CONDITION[[class]])
124 | if (!test1) {
125 | value <- paste(sQuote(.CONDITION[[class]]), collapse=" ")
126 | txt <- c(txt, paste0("'", condition, "' must be in ", value))
127 | }
128 |
129 | if (length(txt)) txt else TRUE
130 | }
131 |
132 | ############################################################
133 | ## AnnotationFilter
134 | ##
135 |
136 | #' @exportClass AnnotationFilter
137 | .AnnotationFilter <- setClass(
138 | "AnnotationFilter",
139 | contains = "VIRTUAL",
140 | slots = c(
141 | field="character",
142 | condition="character",
143 | value="ANY",
144 | not="logical"
145 | ),
146 | prototype=list(
147 | condition= "==",
148 | not= FALSE
149 | )
150 | )
151 |
152 | setValidity("AnnotationFilter", function(object) {
153 | txt <- character()
154 |
155 | value <- .value(object)
156 | condition <- .condition(object)
157 | not <- .not(object)
158 | test_len <- length(condition) == 1L
159 | test_NA <- !any(is.na(condition))
160 |
161 | if (test_len && !test_NA)
162 | txt <- c(txt, "'condition' can not be NA")
163 | test0 <- test_len && test_NA
164 |
165 | test1 <- condition %in% c("startsWith", "endsWith", "contains", ">",
166 | "<", ">=", "<=")
167 | if (test0 && test1 && length(value) > 1L)
168 | txt <- c(txt, paste0("'", condition, "' requires length 1 'value'"))
169 |
170 | if(length(not) != 1)
171 | txt <- c(txt, '"not" value must be of length 1.')
172 |
173 | if (any(is.na(value)))
174 | txt <- c(txt, "'value' can not be NA")
175 |
176 | if (length(txt)) txt else TRUE
177 | })
178 |
179 | .field <- function(object) object@field
180 |
181 | .condition <- function(object) object@condition
182 |
183 | .value <- function(object) object@value
184 |
185 | .not <- function(object) object@not
186 |
187 | #' @rdname AnnotationFilter
188 | #'
189 | #' @aliases condition
190 | #'
191 | #' @description \code{condition()} get the \code{condition} value for
192 | #' the filter \code{object}.
193 | #'
194 | #' @param object An \code{AnnotationFilter} object.
195 | #'
196 | #' @export
197 | setMethod("condition", "AnnotationFilter", .condition)
198 |
199 | #' @rdname AnnotationFilter
200 | #'
201 | #' @aliases value
202 | #'
203 | #' @description \code{value()} get the \code{value} for the filter
204 | #' \code{object}.
205 | #'
206 | #' @export
207 | setMethod("value", "AnnotationFilter", .value)
208 |
209 | #' @rdname AnnotationFilter
210 | #'
211 | #' @aliases field
212 | #'
213 | #' @description \code{field()} get the \code{field} for the filter
214 | #' \code{object}.
215 | #'
216 | #' @export
217 | setMethod("field", "AnnotationFilter", .field)
218 |
219 | #' @rdname AnnotationFilter
220 | #'
221 | #' @description \code{not()} get the \code{not} for the filter \code{object}.
222 | #'
223 | #' @export
224 | setMethod("not", "AnnotationFilter", .not)
225 |
226 | #' @importFrom methods show
227 | #'
228 | #' @export
229 | setMethod("show", "AnnotationFilter", function(object){
230 | if(.not(object)) cat("NOT\n")
231 | cat("class:", class(object),
232 | "\ncondition:", .condition(object), "\n")
233 | })
234 |
235 | ############################################################
236 | ## CharacterFilter, IntegerFilter
237 | ##
238 |
239 | #' @exportClass CharacterFilter
240 | .CharacterFilter <- setClass(
241 | "CharacterFilter",
242 | contains = c("VIRTUAL", "AnnotationFilter"),
243 | slots = c(value = "character"),
244 | prototype = list(
245 | value = character()
246 | )
247 | )
248 |
249 | setValidity("CharacterFilter", function(object) {
250 | .valid_condition(.condition(object), "CharacterFilter")
251 | })
252 |
253 | #' @importFrom methods show callNextMethod
254 | #'
255 | #' @export
256 | setMethod("show", "CharacterFilter", function(object) {
257 | callNextMethod()
258 | cat("value:", .value(object), "\n")
259 | })
260 |
261 | #' @exportClass IntegerFilter
262 | .IntegerFilter <- setClass(
263 | "IntegerFilter",
264 | contains = c("VIRTUAL", "AnnotationFilter"),
265 | slots = c(value = "integer"),
266 | prototype = list(
267 | value = integer()
268 | )
269 | )
270 |
271 | setValidity("IntegerFilter", function(object) {
272 | .valid_condition(.condition(object), "IntegerFilter")
273 | })
274 |
275 | #' @export
276 | setMethod("show", "IntegerFilter", function(object) {
277 | callNextMethod()
278 | cat("value:", .value(object), "\n")
279 | })
280 |
281 | #' @exportClass DoubleFilter
282 | .DoubleFilter <- setClass(
283 | "DoubleFilter",
284 | contains = c("VIRTUAL", "AnnotationFilter"),
285 | slots = c(value = "numeric"),
286 | prototype = list(
287 | value = double()
288 | )
289 | )
290 |
291 | setValidity("DoubleFilter", function(object) {
292 | .valid_condition(.condition(object), "DoubleFilter")
293 | })
294 |
295 | #' @export
296 | setMethod("show", "DoubleFilter", function(object) {
297 | callNextMethod()
298 | cat("value:", .value(object), "\n")
299 | })
300 |
301 | #' @rdname AnnotationFilter
302 | #'
303 | #' @importFrom GenomicRanges GRanges
304 | #'
305 | #' @importClassesFrom GenomicRanges GRanges
306 | #'
307 | #' @exportClass GRangesFilter
308 | .GRangesFilter <- setClass(
309 | "GRangesFilter",
310 | contains = "AnnotationFilter",
311 | slots = c(
312 | value = "GRanges",
313 | feature = "character"
314 | ),
315 | prototype = list(
316 | value = GRanges(),
317 | condition = "any",
318 | field = "granges",
319 | feature = "gene"
320 | )
321 | )
322 |
323 | setValidity("GRangesFilter", function(object) {
324 | .valid_condition(.condition(object), "GRangesFilter")
325 | })
326 |
327 | .feature <- function(object) object@feature
328 |
329 | #' @rdname AnnotationFilter
330 | #'
331 | #' @param type \code{character(1)} indicating how overlaps are to be
332 | #' filtered. See \code{findOverlaps} in the IRanges package for a
333 | #' description of this argument.
334 | #'
335 | #' @examples
336 | #' ## filter by GRanges
337 | #' GRangesFilter(GenomicRanges::GRanges("chr10:87869000-87876000"))
338 | #' @export
339 | GRangesFilter <-
340 | function(value, feature = "gene",
341 | type = c("any", "start", "end", "within", "equal"))
342 | {
343 | condition <- match.arg(type)
344 | .GRangesFilter(
345 | field = "granges",
346 | value = value,
347 | condition = condition,
348 | feature = feature)
349 | }
350 |
351 | .feature <- function(object) object@feature
352 |
353 | #' @aliases feature
354 | #'
355 | #' @description \code{feature()} get the \code{feature} for the
356 | #' \code{GRangesFilter} \code{object}.
357 | #'
358 | #' @rdname AnnotationFilter
359 | #'
360 | #' @export
361 | feature <- .feature
362 |
363 | #' @importFrom GenomicRanges show
364 | #'
365 | #' @export
366 | setMethod("show", "GRangesFilter", function(object) {
367 | callNextMethod()
368 | cat("feature:", .feature(object),
369 | "\nvalue:\n")
370 | show(value(object))
371 | })
372 |
373 |
374 | ############################################################
375 | ## Create install-time classes
376 | ##
377 |
378 | #' @rdname AnnotationFilter
379 | #'
380 | #' @name AnnotationFilter
381 | #'
382 | #' @param feature \code{character(1)} defining on what feature the
383 | #' \code{GRangesFilter} should be applied. Choices could be
384 | #' \code{"gene"}, \code{"tx"} or \code{"exon"}.
385 | #'
386 | #' @examples
387 | #' ## Create a SymbolFilter to filter on a gene's symbol.
388 | #' sf <- SymbolFilter("BCL2")
389 | #' sf
390 | #'
391 | #' ## Create a GeneStartFilter to filter based on the genes' chromosomal start
392 | #' ## coordinates
393 | #' gsf <- GeneStartFilter(10000, condition = ">")
394 | #' gsf
395 | #'
396 | #' @export CdsStartFilter CdsEndFilter ExonIdFilter ExonNameFilter
397 | #' @export ExonStartFilter ExonEndFilter ExonRankFilter GeneIdFilter
398 | #' @export GeneNameFilter GeneBiotypeFilter GeneStartFilter
399 | #' @export GeneEndFilter EntrezFilter SymbolFilter TxIdFilter
400 | #' @export TxNameFilter TxBiotypeFilter TxStartFilter TxEndFilter
401 | #' @export ProteinIdFilter UniprotFilter SeqNameFilter SeqStrandFilter
402 | #'
403 | #' @importFrom methods new
404 | #'
405 | #' @exportClass CdsStartFilter CdsEndFilter ExonIdFilter
406 | #' ExonNameFilter ExonStartFilter ExonEndFilter ExonRankFilter
407 | #' GeneIdFilter GeneNameFilter GeneBiotypeFilter GeneStartFilter
408 | #' GeneEndFilter EntrezFilter SymbolFilter TxIdFilter TxNameFilter
409 | #' TxBiotypeFilter TxStartFilter TxEndFilter ProteinIdFilter
410 | #' UniprotFilter SeqNameFilter SeqStrandFilter
411 | NULL
412 |
413 | .fieldToClass <- function(field) {
414 | class <- gsub("_([[:alpha:]])", "\\U\\1", field, perl=TRUE)
415 | class <- sub("^([[:alpha:]])", "\\U\\1", class, perl=TRUE)
416 | paste0(class, if (length(class)) "Filter" else character(0))
417 | }
418 |
419 | .filterFactory <- function(field, class) {
420 | force(field); force(class) # watch for lazy evaluation
421 | as.value <-
422 | if (field %in% .FIELD[["CharacterFilter"]]) {
423 | function(x) {
424 | # if(!is.character(x))
425 | # stop("Input to a ", field,
426 | # "filter must be a character vector.")
427 | as.character(x)
428 | }
429 | } else {
430 | function(x) {
431 | if(!is.numeric(x))
432 | stop("Input to a ", field,
433 | "filter must be a numeric vector.")
434 | as.integer(x)
435 | }
436 | }
437 |
438 | function(value, condition = "==", not = FALSE) {
439 | value <- as.value(value)
440 | condition <- as.character(condition)
441 | not <- as.logical(not)
442 | new(class, field=field, condition = condition, value=value, not=not)
443 | }
444 | }
445 |
446 | local({
447 | makeClass <- function(contains) {
448 | fields <- .FIELD[[contains]]
449 | classes <- .fieldToClass(fields)
450 | for (i in seq_along(fields)) {
451 | setClass(classes[[i]], contains=contains, where=topenv())
452 | assign(
453 | classes[[i]],
454 | .filterFactory(fields[[i]], classes[[i]]),
455 | envir=topenv()
456 | )
457 | }
458 | }
459 | for (contains in names(.FIELD))
460 | makeClass(contains)
461 | })
462 |
463 | ############################################################
464 | ## Utilities
465 | ##
466 |
467 | .convertFilter <- function(object) {
468 | field <- field(object)
469 | if (field == "granges")
470 | stop("GRangesFilter cannot be converted using convertFilter().")
471 | value <- value(object)
472 | condition <- condition(object)
473 | not <- not(object)
474 |
475 | op <- switch(
476 | condition,
477 | "==" = if (length(value) == 1) "==" else "%in%",
478 | "!=" = if (length(value) == 1) "!=" else "%in%",
479 | "startsWith" = "%like%",
480 | "endsWith" = "%like%",
481 | "contains" = "%like%"
482 | )
483 |
484 | not_val <- ifelse(not, '!', '')
485 |
486 | if (condition %in% c("==", "!="))
487 | value <- paste0("'", value, "'", collapse=", ")
488 |
489 | if (!is.null(op) && op %in% c("==", "!="))
490 | sprintf("%s%s %s %s", not_val, field, op, value)
491 | else if ((condition == "==") && op == "%in%")
492 | sprintf("%s%s %s c(%s)", not_val, field, op, value)
493 | else if ((condition == "!=") && op == "%in%")
494 | if(not) sprintf("%s %s c(%s)", field, op, value)
495 | else sprintf("!%s%s %s c(%s)", not_val, field, op, value)
496 | else if (condition == "startsWith")
497 | sprintf("%s%s %s '%s%%'", not_val, field, op, value)
498 | else if (condition == "endsWith")
499 | sprintf("%s%s %s '%%%s'", not_val, field, op, value)
500 | else if (condition == "contains")
501 | sprintf("%s%s %s '%s'", not_val, field, op, value)
502 | else if (condition %in% c(">", "<", ">=", "<=")) {
503 | sprintf("%s%s %s %s", not_val, field, condition, as.integer(value))
504 | }
505 | }
506 |
507 | #' @rdname AnnotationFilter
508 | #'
509 | #' @description Converts an \code{AnnotationFilter} object to a
510 | #' \code{character(1)} giving an equation that can be used as input to
511 | #' a \code{dplyr} filter.
512 | #'
513 | #' @return \code{character(1)} that can be used as input to a \code{dplyr}
514 | #' filter.
515 | #'
516 | #' @examples
517 | #' filter <- SymbolFilter("ADA", "==")
518 | #' result <- convertFilter(filter)
519 | #' result
520 | #' @export
521 | setMethod("convertFilter", signature(object = "AnnotationFilter",
522 | db = "missing"), .convertFilter)
523 |
524 | .FILTERS_WO_FIELD <- c("GRangesFilter")
525 |
526 | .supportedFilters <- function() {
527 | fields <- unlist(.FIELD, use.names=FALSE)
528 | filters <- .fieldToClass(fields)
529 | d <- data.frame(
530 | filter=c(filters, .FILTERS_WO_FIELD),
531 | field=c(fields, "granges") #rep(NA, length(.FILTERS_WO_FIELD)))
532 | )
533 | d[order(d$filter),]
534 | }
535 |
536 | #' @rdname AnnotationFilter
537 | #'
538 | #' @examples
539 | #' supportedFilters()
540 | #' @export
541 | setMethod("supportedFilters", "missing", function(object) {
542 | .supportedFilters()
543 | })
544 |
545 | #' @rdname GenenameFilter
546 | #'
547 | #' @title DEPRECATED Gene name filter
548 | #'
549 | #' @aliases GenenameFilter-class
550 | #'
551 | #' @description
552 | #'
553 | #' The `GenenameFilter` class and functions are deprecated. Please use the
554 | #' [GeneNameFilter()] instead.
555 | #'
556 | #' @param value `character()` value for the filter
557 | #'
558 | #' @param condition `character(1)` defining the condition to be
559 | #' used in the filter. One of `"=="`, `"!="`, `"startsWith"`, `"endsWith"`
560 | #' or `"contains"`. Default condition is `"=="`.
561 | #'
562 | #' @param not `logical(1)` whether the `AnnotationFilter` is negated.
563 | #' `TRUE` indicates is negated (!). `FALSE` indicates not
564 | #' negated. Default not is `FALSE`.
565 | #'
566 | #' @return The constructor function return a `GenenameFilter`.
567 | #'
568 | #' @md
569 | #'
570 | #' @export
571 | #'
572 | #' @exportClass GenenameFilter
573 | GenenameFilter <- function(value, condition = "==", not = FALSE) {
574 | .Deprecated("GeneNameFilter")
575 | new("GenenameFilter", value = value, condition = condition, not = not)
576 | }
577 |
578 | .GenenameFilter <- setClass(
579 | "GenenameFilter",
580 | contains = "CharacterFilter",
581 | prototype = list(
582 | field = "genename"
583 | )
584 | )
585 |
--------------------------------------------------------------------------------