├── tests
└── runTests.R
├── vignettes
├── Rplots.pdf
├── GeneOverlap.synctex.gz
├── GeneOverlap.bib
└── GeneOverlap.Rnw
├── data
└── GeneOverlap.RData
├── .gitignore
├── README.md
├── man
├── gs.RNASeq.Rd
├── hESC.RNASeq.list.Rd
├── hESC.ChIPSeq.list.Rd
├── GeneOverlap-package.Rd
├── testGeneOverlap.Rd
├── newGeneOverlap.Rd
├── getGenomeSize.Rd
├── getList.Rd
├── drawHeatmap.Rd
├── newGOM.Rd
├── GeneOverlapMatrix.Rd
├── getReadonlyMatrix.Rd
├── getReadonly.Rd
└── GeneOverlap.Rd
├── DESCRIPTION
├── NAMESPACE
├── R
├── GeneOverlapMatrix-accessors.R
├── AllGenerics.R
├── GeneOverlap-accessors.R
├── GeneOverlap-methods.R
├── AllClasses.R
└── GeneOverlapMatrix-methods.R
└── inst
└── unitTests
└── test_AllGeneOverlapMethods.R
/tests/runTests.R:
--------------------------------------------------------------------------------
1 | BiocGenerics:::testPackage("GeneOverlap")
--------------------------------------------------------------------------------
/vignettes/Rplots.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shenlab-sinai/GeneOverlap/HEAD/vignettes/Rplots.pdf
--------------------------------------------------------------------------------
/data/GeneOverlap.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shenlab-sinai/GeneOverlap/HEAD/data/GeneOverlap.RData
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | GeneOverlap.Rproj
5 | .DS_Store
6 | GeneOverlap.log
7 | GeneOverlap.toc
--------------------------------------------------------------------------------
/vignettes/GeneOverlap.synctex.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shenlab-sinai/GeneOverlap/HEAD/vignettes/GeneOverlap.synctex.gz
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | geneoverlap
2 | ===========
3 | by Li Shen, Ph.D.
4 |
5 | Icahn School of Medicine at Mount Sinai
6 |
7 | New York, New York
8 |
9 | R package for testing and visualizing gene overlaps
10 |
11 | GeneOverlap is also available on [Bioconductor](https://bioconductor.org/packages/release/bioc/html/GeneOverlap.html).
12 |
--------------------------------------------------------------------------------
/man/gs.RNASeq.Rd:
--------------------------------------------------------------------------------
1 | \name{gs.RNASeq}
2 | \alias{gs.RNASeq}
3 | \docType{data}
4 | \title{
5 | Genome size based on RNA-seq data
6 | %% ~~ data name/kind ... ~~
7 | }
8 | \description{
9 | See vignette for data source and processing.
10 | %% ~~ A concise (1-5 lines) description of the dataset. ~~
11 | }
12 | \usage{data(GeneOverlap)}
13 | \format{
14 | An integer representing the genomic background.
15 | }
16 | \examples{
17 | data(GeneOverlap)
18 | gs.RNASeq
19 | }
20 | \keyword{datasets}
21 |
--------------------------------------------------------------------------------
/man/hESC.RNASeq.list.Rd:
--------------------------------------------------------------------------------
1 | \name{hESC.RNASeq.list}
2 | \alias{hESC.RNASeq.list}
3 | \docType{data}
4 | \title{
5 | RNA-seq gene lists
6 | %% ~~ data name/kind ... ~~
7 | }
8 | \description{
9 | See vignette for data source and processing.
10 | %% ~~ A concise (1-5 lines) description of the dataset. ~~
11 | }
12 | \usage{data(GeneOverlap)}
13 | \format{
14 | A named list of four character vectors.
15 | }
16 | \examples{
17 | data(GeneOverlap)
18 | str(hESC.RNASeq.list)
19 | }
20 | \keyword{datasets}
21 |
--------------------------------------------------------------------------------
/man/hESC.ChIPSeq.list.Rd:
--------------------------------------------------------------------------------
1 | \name{hESC.ChIPSeq.list}
2 | \alias{hESC.ChIPSeq.list}
3 | \docType{data}
4 | \title{
5 | ChIP-seq gene lists
6 | %% ~~ data name/kind ... ~~
7 | }
8 | \description{
9 | See vignette for data source and processing.
10 | %% ~~ A concise (1-5 lines) description of the dataset. ~~
11 | }
12 | \usage{data(GeneOverlap)}
13 | \format{
14 | A named list of four character vectors.
15 | }
16 | \examples{
17 | data(GeneOverlap)
18 | str(hESC.ChIPSeq.list)
19 | }
20 | \keyword{datasets}
21 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: GeneOverlap
2 | Type: Package
3 | Title: Test and visualize gene overlaps
4 | Version: 1.9.1
5 | Date: 2016-08-22
6 | Author: Li Shen
7 | Maintainer: Li Shen
8 | Description: Test two sets of gene lists and visualize the results.
9 | License: GPL-3
10 | Suggests: RUnit, BiocGenerics, BiocStyle
11 | Imports: stats, RColorBrewer, gplots, methods
12 | URL: http://dx.doi.org/10.5281/zenodo.60644
13 | biocViews: Enrichment, MultipleComparisons, Visualization
14 | Collate: AllClasses.R AllGenerics.R GeneOverlap-accessors.R
15 | GeneOverlap-methods.R GeneOverlapMatrix-accessors.R
16 | GeneOverlapMatrix-methods.R
17 |
18 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | import( methods )
2 | importFrom( stats, fisher.test )
3 | importFrom( RColorBrewer, brewer.pal )
4 | importFrom( gplots, heatmap.2 )
5 |
6 | exportClasses( "GeneOverlap", "GeneOverlapMatrix" )
7 |
8 | exportMethods(
9 | show, print,
10 | getListA, "setListA<-",
11 | getListB, "setListB<-",
12 | getIntersection,
13 | getUnion,
14 | getGenomeSize, "setGenomeSize<-",
15 | getTested,
16 | getContbl,
17 | getPval,
18 | getOddsRatio,
19 | getJaccard,
20 | testGeneOverlap,
21 | getGsetA,
22 | getGsetB,
23 | getSelfCompare,
24 | getMatrix,
25 | getNestedList,
26 | "[",
27 | drawHeatmap
28 | )
29 |
30 | export(
31 | newGeneOverlap,
32 | newGOM
33 | )
34 |
35 | # exportPattern("^[[:alpha:]]+")
36 |
--------------------------------------------------------------------------------
/man/GeneOverlap-package.Rd:
--------------------------------------------------------------------------------
1 | \name{GeneOverlap-package}
2 | \alias{GeneOverlap-package}
3 | \docType{package}
4 | \title{
5 | Test and visualize overlaps between gene lists
6 | }
7 | \description{
8 | Given two sets of gene lists, this package calculates the overlaps between
9 | all pairs of lists from the two sets. Fisher's exact test is used to
10 | determine the p-value and odds ratio in comparison to a genomic background.
11 | Plotting functions are provided to visualize the results.
12 | }
13 | \details{
14 | \tabular{ll}{
15 | Package: \tab GeneOverlap\cr
16 | Type: \tab Package\cr
17 | Version: \tab 1.0.1\cr
18 | Date: \tab 2016-08-22\cr
19 | License: \tab GPL-3\cr
20 | }
21 | To use the package, construct one or two named lists each representing a
22 | gene set. Each list should contain one or more vectors of gene names. Then
23 | use GeneOverlapMatrix to perform pairwise comparisons. It will return an
24 | object that can be used for visualization. The GeneOverlapMatrix calls
25 | GeneOverlap internally to perform comparison between two gene lists.
26 | }
27 | \author{
28 | Li Shen <\email{li.shen@mssm.edu}>
29 |
30 | Mount Sinai profile:\url{http://www.mountsinai.org/profiles/li-shen}
31 |
32 | Personal:\url{http://www.linkedin.com/in/lshen/}
33 | }
34 | \keyword{ htest }
35 | \keyword{ hplot }
36 | \keyword{ graphs }
37 |
--------------------------------------------------------------------------------
/man/testGeneOverlap.Rd:
--------------------------------------------------------------------------------
1 | \name{testGeneOverlap}
2 | \alias{testGeneOverlap}
3 | \alias{testGeneOverlap,GeneOverlap-method}
4 | %- Also NEED an '\alias' for EACH other topic documented here.
5 | \title{
6 | Test function for the GeneOverlap class
7 | %% ~~function to do ... ~~
8 | }
9 | \description{
10 | Perform Fisher's exact test based on the information supplied in the
11 | GeneOverlap object, i.e. gene list A, B and genome size. This function
12 | also calculates the Jaccard index. Will set the tested Boolean label after
13 | done.
14 | %% ~~ A concise (1-5 lines) description of what the function does. ~~
15 | }
16 | \usage{
17 | \S4method{testGeneOverlap}{GeneOverlap}(object)
18 | }
19 | %- maybe also 'usage' for other objects documented here.
20 | \arguments{
21 | \item{object}{A GeneOverlap object.}
22 | }
23 | \value{
24 | A GeneOverlap object with valid p-value, odds ratio, Jaccard index and
25 | contingency table. The tested Boolean label is set to true. Use show or
26 | print to display a summary of the object. Use accessors to get information
27 | of each slot.
28 | %% ~Describe the value returned
29 | %% If it is a LIST, use
30 | %% \item{comp1 }{Description of 'comp1'}
31 | %% \item{comp2 }{Description of 'comp2'}
32 | %% ...
33 | }
34 |
35 | %% ~Make other sections like Warning with \section{Warning }{....} ~
36 |
37 | \seealso{
38 | \code{\link{GeneOverlap-class}}
39 | %% ~~objects to See Also as \code{\link{help}}, ~~~
40 | }
41 | \examples{
42 | data(GeneOverlap)
43 | go.obj <- newGeneOverlap(hESC.ChIPSeq.list$H3K4me3,
44 | hESC.ChIPSeq.list$H3K27me3,
45 | genome.size=gs.RNASeq)
46 | go.obj <- testGeneOverlap(go.obj)
47 | getPval(go.obj)
48 | getOddsRatio(go.obj)
49 | getJaccard(go.obj)
50 | getContbl(go.obj)
51 | print(go.obj)
52 | }
53 | % Add one or more standard keywords, see file 'KEYWORDS' in the
54 | % R documentation directory.
55 | \keyword{ ~kwd1 }
56 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
57 |
--------------------------------------------------------------------------------
/man/newGeneOverlap.Rd:
--------------------------------------------------------------------------------
1 | \name{newGeneOverlap}
2 | \alias{newGeneOverlap}
3 | %- Also NEED an '\alias' for EACH other topic documented here.
4 | \title{
5 | Constructor for the GeneOverlap class
6 | %% ~~function to do ... ~~
7 | }
8 | \description{
9 | Use this function to create objects of the GeneOverlap class.
10 | %% ~~ A concise (1-5 lines) description of what the function does. ~~
11 | }
12 | \usage{
13 | newGeneOverlap(listA, listB, genome.size = NULL,
14 | spec = c("mm9.gene", "hg19.gene", "rn4.gene"))
15 | }
16 | %- maybe also 'usage' for other objects documented here.
17 | \arguments{
18 | \item{listA}{
19 | Gene list A. This should be a character vector or a factor.
20 | }
21 | \item{listB}{
22 | Gene list B. This should be a character vector or a factor.
23 | }
24 | \item{genome.size}{
25 | An integer represents the number of genes on the genome. If not specified, it will use the preset number based on "spec".
26 | }
27 | \item{spec}{
28 | A character string of the genome name. Currently choose one of: mm9.gene, hg19.gene, rn4.gene. The gene numbers are based on protein coding genes.
29 | }
30 | }
31 | \value{
32 | A GeneOverlap object.
33 | %% ~Describe the value returned
34 | %% If it is a LIST, use
35 | %% \item{comp1 }{Description of 'comp1'}
36 | %% \item{comp2 }{Description of 'comp2'}
37 | %% ...
38 | }
39 | \note{
40 | Both listA and listB will be converted to unique character vectors before testing, that means, the duplicated gene names are removed and therefore not counted.
41 | %% ~~further notes~~
42 | }
43 |
44 | %% ~Make other sections like Warning with \section{Warning }{....} ~
45 |
46 | \examples{
47 | data(GeneOverlap)
48 | go.obj <- newGeneOverlap(hESC.ChIPSeq.list$H3K4me3,
49 | hESC.ChIPSeq.list$H3K9me3,
50 | gs.RNASeq)
51 | print(go.obj) # not tested yet.
52 | go.obj <- testGeneOverlap(go.obj)
53 | print(go.obj)
54 | }
55 | % Add one or more standard keywords, see file 'KEYWORDS' in the
56 | % R documentation directory.
57 | \keyword{ ~kwd1 }
58 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
59 |
--------------------------------------------------------------------------------
/man/getGenomeSize.Rd:
--------------------------------------------------------------------------------
1 | \name{getGenomeSize}
2 | \alias{getGenomeSize}
3 | \alias{getGenomeSize,GeneOverlap-method}
4 | \alias{setGenomeSize<-}
5 | \alias{setGenomeSize<-,GeneOverlap-method}
6 | %- Also NEED an '\alias' for EACH other topic documented here.
7 | \title{
8 | Accessors for the "genome.size" slot of the GeneOverlap class
9 | %% ~~function to do ... ~~
10 | }
11 | \description{
12 | The genome.size slot contains the number of genes in the genome as an
13 | integer.
14 | %% ~~ A concise (1-5 lines) description of what the function does. ~~
15 | }
16 | \usage{
17 | \S4method{getGenomeSize}{GeneOverlap}(object)
18 | \S4method{setGenomeSize}{GeneOverlap}(object) <- value
19 | }
20 | %- maybe also 'usage' for other objects documented here.
21 | \arguments{
22 | \item{object}{A GeneOverlap object.}
23 | \item{value}{An integer representing genomic background.}
24 | }
25 | \details{
26 | After setGenomeSize function is called, the tested Boolean label will be
27 | reset to false.
28 | %% ~~ If necessary, more details than the description above ~~
29 | }
30 | \value{
31 | An integer representing the genome size.
32 | %% ~Describe the value returned
33 | %% If it is a LIST, use
34 | %% \item{comp1 }{Description of 'comp1'}
35 | %% \item{comp2 }{Description of 'comp2'}
36 | %% ...
37 | }
38 |
39 | %% ~Make other sections like Warning with \section{Warning }{....} ~
40 |
41 | \seealso{
42 | \code{\link{GeneOverlap-class}}
43 | %% ~~objects to See Also as \code{\link{help}}, ~~~
44 | }
45 | \examples{
46 | data(GeneOverlap)
47 | go.obj <- newGeneOverlap(hESC.ChIPSeq.list$H3K27me3,
48 | hESC.RNASeq.list$"Exp Medium",
49 | genome.size=gs.RNASeq)
50 | getGenomeSize(go.obj)
51 | v.gs <- c(12e3, 14e3, 16e3, 18e3, 20e3)
52 | setNames(sapply(v.gs, function(g) {
53 | setGenomeSize(go.obj) <- g
54 | go.obj <- testGeneOverlap(go.obj)
55 | getPval(go.obj)
56 | }), v.gs)
57 | }
58 | % Add one or more standard keywords, see file 'KEYWORDS' in the
59 | % R documentation directory.
60 | \keyword{ ~kwd1 }
61 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
62 |
--------------------------------------------------------------------------------
/man/getList.Rd:
--------------------------------------------------------------------------------
1 | \name{getList}
2 | \alias{getListA}
3 | \alias{getListA,GeneOverlap-method}
4 | \alias{setListA<-}
5 | \alias{setListA<-,GeneOverlap-method}
6 | \alias{getListB}
7 | \alias{getListB,GeneOverlap-method}
8 | \alias{setListB<-}
9 | \alias{setListB<-,GeneOverlap-method}
10 | %- Also NEED an '\alias' for EACH other topic documented here.
11 | \title{
12 | Accessors for the "listA" and "listB" slots of GeneOverlap class
13 | %% ~~function to do ... ~~
14 | }
15 | \description{
16 | The listA and listB slots hold the gene lists A and B as character vectors.
17 | %% ~~ A concise (1-5 lines) description of what the function does. ~~
18 | }
19 | \usage{
20 | \S4method{getListA}{GeneOverlap}(object)
21 | \S4method{setListA}{GeneOverlap}(object) <- value
22 | \S4method{getListB}{GeneOverlap}(object)
23 | \S4method{setListB}{GeneOverlap}(object) <- value
24 | }
25 | %- maybe also 'usage' for other objects documented here.
26 | \arguments{
27 | \item{object}{A GeneOverlap object.}
28 | \item{value}{A character vector representing gene names.}
29 | }
30 | \details{
31 | After setListX function is called, the tested Boolean label will be reset
32 | to false.
33 | %% ~~ If necessary, more details than the description above ~~
34 | }
35 | \value{
36 | A character vector representing gene list A/B.
37 | %% ~Describe the value returned
38 | %% If it is a LIST, use
39 | %% \item{comp1 }{Description of 'comp1'}
40 | %% \item{comp2 }{Description of 'comp2'}
41 | %% ...
42 | }
43 |
44 | %% ~Make other sections like Warning with \section{Warning }{....} ~
45 |
46 | \seealso{
47 | \code{\link{GeneOverlap-class}}, \code{\link{newGeneOverlap}}
48 | %% ~~objects to See Also as \code{\link{help}}, ~~~
49 | }
50 | \examples{
51 | data(GeneOverlap)
52 | go.obj <- newGeneOverlap(hESC.ChIPSeq.list$H3K4me3,
53 | hESC.ChIPSeq.list$H3K27me3,
54 | genome.size=gs.RNASeq)
55 | go.obj <- testGeneOverlap(go.obj)
56 | head(getListB(go.obj))
57 | getTested(go.obj) # true.
58 | setListB(go.obj) <- hESC.ChIPSeq.list$H3K9me3
59 | getTested(go.obj) # false.
60 | }
61 | % Add one or more standard keywords, see file 'KEYWORDS' in the
62 | % R documentation directory.
63 | \keyword{ ~kwd1 }
64 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
65 |
--------------------------------------------------------------------------------
/man/drawHeatmap.Rd:
--------------------------------------------------------------------------------
1 | \name{drawHeatmap}
2 | \alias{drawHeatmap}
3 | \alias{drawHeatmap,GeneOverlapMatrix-method}
4 | %- Also NEED an '\alias' for EACH other topic documented here.
5 | \title{
6 | Visualize GeneOverlapMatrix objects as heatmaps
7 | %% ~~function to do ... ~~
8 | }
9 | \description{
10 | Visualization function for GeneOverlapMatrix objects. Use color gradients
11 | to represent the odds ratios or Jaccard indices and the superimposed texts
12 | on the grids to represent the p-values of overlaps.
13 | %% ~~ A concise (1-5 lines) description of what the function does. ~~
14 | }
15 | \usage{
16 | \S4method{drawHeatmap}{GeneOverlapMatrix}(object,
17 | what=c("odds.ratio", "Jaccard"), log.scale=F, adj.p=F, cutoff=.05,
18 | ncolused=9, grid.col=c("Greens", "Blues", "Greys",
19 | "Oranges", "Purples", "Reds"), note.col="red")
20 | }
21 | %- maybe also 'usage' for other objects documented here.
22 | \arguments{
23 | \item{object}{A GeneOverlapMatrix object.}
24 | \item{what}{What to plot? Odds ratio or Jaccard index.}
25 | \item{log.scale}{Whether log2 scale shall be used for odds ratios.}
26 | \item{adj.p}{Boolean label for whether p-values should be adjusted (using the
27 | Benjamin-Hochberg method) before showing.}
28 | \item{cutoff}{P-value cutoff to mask the insignificant comparisons.}
29 | \item{ncolused}{Number of colors used to represent the scale of odds ratios.}
30 | \item{grid.col}{Color for odds ratios.}
31 | \item{note.col}{Color for p-value texts.}
32 | }
33 | \details{
34 | The grids that are below the p-value cutoff will be masked and shown as the
35 | lightest color.
36 | %% ~~ If necessary, more details than the description above ~~
37 | }
38 |
39 | %% ~Make other sections like Warning with \section{Warning }{....} ~
40 |
41 | \examples{
42 | data(GeneOverlap)
43 | gom.obj <- newGOM(hESC.ChIPSeq.list, genome.size=gs.RNASeq)
44 | drawHeatmap(gom.obj, adj.p=TRUE, cutoff=1, # show all.
45 | ncolused=5, grid.col="Blues", note.col="black")
46 | drawHeatmap(gom.obj, log.scale=TRUE, ncolused=5)
47 | drawHeatmap(gom.obj, what="Jaccard", ncolused=5)
48 | }
49 | % Add one or more standard keywords, see file 'KEYWORDS' in the
50 | % R documentation directory.
51 | \keyword{ ~kwd1 }
52 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
53 |
--------------------------------------------------------------------------------
/vignettes/GeneOverlap.bib:
--------------------------------------------------------------------------------
1 | @article{diffReps,
2 | author = {Shen, Li AND Shao, Ning-Yi AND Liu, Xiaochuan AND Maze, Ian AND Feng, Jian AND Nestler, Eric J.},
3 | journal = {PLoS ONE},
4 | publisher = {Public Library of Science},
5 | title = {diffReps: Detecting Differential Chromatin Modification Sites from ChIP-seq Data with Biological Replicates},
6 | year = {2013},
7 | month = {06},
8 | volume = {8},
9 | url = {http://dx.doi.org/10.1371%2Fjournal.pone.0065598},
10 | pages = {e65598},
11 | number = {6},
12 | doi = {10.1371/journal.pone.0065598}
13 | }
14 |
15 | @Article{Bowtie,
16 | AUTHOR = {Langmead, Ben and Trapnell, Cole and Pop, Mihai and Salzberg, Steven},
17 | TITLE = {Ultrafast and memory-efficient alignment of short DNA sequences to the human genome},
18 | JOURNAL = {Genome Biology},
19 | VOLUME = {10},
20 | YEAR = {2009},
21 | NUMBER = {3},
22 | PAGES = {R25},
23 | URL = {http://genomebiology.com/2009/10/3/R25},
24 | DOI = {10.1186/gb-2009-10-3-r25},
25 | PubMedID = {19261174},
26 | ISSN = {1465-6906},
27 | }
28 |
29 | @article{Tophat,
30 | author = {Trapnell, Cole and Pachter, Lior and Salzberg, Steven L.},
31 | title = {TopHat: discovering splice junctions with RNA-Seq},
32 | volume = {25},
33 | number = {9},
34 | pages = {1105-1111},
35 | year = {2009},
36 | doi = {10.1093/bioinformatics/btp120},
37 | URL = {http://bioinformatics.oxfordjournals.org/content/25/9/1105.abstract},
38 | eprint = {http://bioinformatics.oxfordjournals.org/content/25/9/1105.full.pdf+html},
39 | journal = {Bioinformatics}
40 | }
41 |
42 | @article{Cufflinks,
43 | author = {Trapnell, Cole and Williams, Brian A. and Pertea, Geo and Mortazavi, Ali and Kwan, Gordon and van Baren, Marijke J. and Salzberg, Steven L. and Wold, Barbara J. and Pachter, Lior},
44 | title = {Transcript assembly and quantification by RNA-Seq reveals unannotated transcripts and isoform switching during cell differentiation},
45 | journal = {Nat Biotech},
46 | volume = {28},
47 | number = {5},
48 | pages = {511-515},
49 | note = {10.1038/nbt.1621},
50 | year = {2010}
51 | }
52 |
53 | @ONLINE{ENCODE,
54 | author = {{ENCODE Consortium}},
55 | title = {The Encyclopedia of DNA Elements (http://encodeproject.org/ENCODE/)},
56 | url = {http://encodeproject.org/ENCODE/}
57 | }
58 |
59 |
--------------------------------------------------------------------------------
/R/GeneOverlapMatrix-accessors.R:
--------------------------------------------------------------------------------
1 | setMethod("getGsetA", "GeneOverlapMatrix", function(object) { object@gsetA } )
2 | setMethod("getGsetB", "GeneOverlapMatrix", function(object) { object@gsetB } )
3 | setMethod("getSelfCompare", "GeneOverlapMatrix",
4 | function(object) {
5 | object@self.compare
6 | }
7 | )
8 | setMethod(
9 | "getMatrix", "GeneOverlapMatrix",
10 | function(object, name=c("pval", "odds.ratio", "intersection", "union",
11 | "Jaccard")) {
12 | name <- match.arg(name)
13 | sapply(object@go.nested.list, function(ci) {
14 | sapply(ci, function(ri) {
15 | switch(name,
16 | pval=getPval(ri),
17 | odds.ratio=getOddsRatio(ri),
18 | intersection=length(getIntersection(ri)),
19 | union=length(getUnion(ri)),
20 | Jaccard=getJaccard(ri)
21 | )
22 | })
23 | })
24 | }
25 | )
26 | setMethod(
27 | "getNestedList", "GeneOverlapMatrix",
28 | function(object, name=c("intersection", "union", "cont.tbl")) {
29 | name <- match.arg(name)
30 | lapply(object@go.nested.list, function(ci) {
31 | lapply(ci, function(ri) {
32 | switch(name,
33 | intersection=getIntersection(ri),
34 | union=getUnion(ri),
35 | cont.tbl=getContbl(ri))
36 | })
37 | })
38 | }
39 | )
40 | setMethod(
41 | "[", "GeneOverlapMatrix",
42 | function(x, i, j) {
43 | stopifnot(is.numeric(i) || is.character(i))
44 | stopifnot(is.numeric(j) || is.character(j))
45 | if(is.numeric(j)) {
46 | j <- as.integer(j)
47 | stopifnot(abs(j) <= length(x@go.nested.list))
48 | } else {
49 | stopifnot(j %in% names(x@go.nested.list))
50 | }
51 | gom.col <- x@go.nested.list[[j]]
52 | if(is.numeric(i)) {
53 | i <- as.integer(i)
54 | stopifnot(abs(i) <= length(gom.col))
55 | } else {
56 | stopifnot(i %in% names(gom.col))
57 | }
58 | gom.col[[i]]
59 | }
60 | )
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
--------------------------------------------------------------------------------
/R/AllGenerics.R:
--------------------------------------------------------------------------------
1 | #### GeneOverlap ####
2 | setGeneric("getListA", function(object) { standardGeneric("getListA")})
3 | setGeneric("getListB", function(object) { standardGeneric("getListB")})
4 | setGeneric("getIntersection", function(object) {
5 | standardGeneric("getIntersection")})
6 | setGeneric("getUnion", function(object) {
7 | standardGeneric("getUnion")})
8 | setGeneric("getGenomeSize", function(object) {
9 | standardGeneric("getGenomeSize")})
10 | setGeneric("getTested", function(object) { standardGeneric("getTested")})
11 | setGeneric("getContbl", function(object) { standardGeneric("getContbl")})
12 | setGeneric("getPval", function(object) { standardGeneric("getPval")})
13 | setGeneric("getOddsRatio", function(object) { standardGeneric("getOddsRatio")})
14 | setGeneric("getJaccard", function(object) { standardGeneric("getJaccard")})
15 | setGeneric("setListA<-", function(object, value) {
16 | standardGeneric("setListA<-") })
17 | setGeneric("setListB<-", function(object, value) {
18 | standardGeneric("setListB<-") })
19 | setGeneric("setGenomeSize<-", function(object, value) {
20 | standardGeneric("setGenomeSize<-") })
21 | setGeneric("testGeneOverlap", function(object) {
22 | standardGeneric("testGeneOverlap") })
23 |
24 |
25 | #### GeneOverlapMatrix ####
26 | setGeneric("getGsetA", function(object) { standardGeneric("getGsetA") } )
27 | setGeneric("getGsetB", function(object) { standardGeneric("getGsetB") } )
28 | setGeneric("getSelfCompare", function(object) {
29 | standardGeneric("getSelfCompare")})
30 | setGeneric("getMatrix",
31 | function(object, name=c("pval", "odds.ratio", "intersection",
32 | "union", "Jaccard")) {
33 | standardGeneric("getMatrix")
34 | }
35 | )
36 | setGeneric("getNestedList",
37 | function(object, name=c("intersection", "union",
38 | "cont.tbl")) {
39 | standardGeneric("getNestedList")
40 | }
41 | )
42 | setGeneric("drawHeatmap",
43 | function(object, what=c("odds.ratio", "Jaccard"), log.scale=F,
44 | adj.p=F, cutoff=.05, ncolused=9,
45 | grid.col=c("Greens", "Blues", "Greys",
46 | "Oranges", "Purples", "Reds"),
47 | note.col="red") {
48 | standardGeneric("drawHeatmap")
49 | }
50 | )
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
--------------------------------------------------------------------------------
/man/newGOM.Rd:
--------------------------------------------------------------------------------
1 | \name{newGOM}
2 | \alias{newGOM}
3 | %- Also NEED an '\alias' for EACH other topic documented here.
4 | \title{
5 | Constructor for the GeneOverlapMatrix class
6 | %% ~~function to do ... ~~
7 | }
8 | \description{
9 | Use this function to create objects of the GeneOverlapMatrix class.
10 | %% ~~ A concise (1-5 lines) description of what the function does. ~~
11 | }
12 | \usage{
13 | newGOM(gsetA, gsetB=list(), genome.size=NULL,
14 | spec=c('mm9.gene', 'hg19.gene', 'rn4.gene'))
15 | }
16 | %- maybe also 'usage' for other objects documented here.
17 | \arguments{
18 | \item{gsetA}{
19 | Gene set A as a named list with each element being a vector/factor of gene names.
20 | }
21 | \item{gsetB}{
22 | Gene set B as a named list with each element being a vector/factor of gene names.
23 | }
24 | \item{genome.size}{
25 | The number of genes in the genome as an integer.
26 | }
27 | \item{spec}{
28 | A string description of the genome to use. There are a few presetted genome sizes to choose from for a user's convenience.
29 | }
30 | %% ~~Describe \code{x} here~~
31 | }
32 | \details{
33 | This will create a matrix so that each grid represents the comparison between the two gene lists from the two gene sets. Given two gene sets A and B, the matrix will represent all comparisons bewteen gene lists in A vs. gene lists in B. The set A will be shown as rows and the set B will be shown as columns. If only gene set A is given, the matrix will represent all comparisons between gene lists within the gene set and only the upper triangular matrix will be used.
34 | %% ~~ If necessary, more details than the description above ~~
35 | }
36 | \value{
37 | A GeneOverlapMatrix object. Use accesssors to access to its internal structure and members. Use show or print to obtain summarized information. Use \code{drawHeatmap} to visualize it.
38 | %% ~Describe the value returned
39 | %% If it is a LIST, use
40 | %% \item{comp1 }{Description of 'comp1'}
41 | %% \item{comp2 }{Description of 'comp2'}
42 | %% ...
43 | }
44 |
45 | %% ~Make other sections like Warning with \section{Warning }{....} ~
46 |
47 | \seealso{
48 | \code{\link{GeneOverlapMatrix-class}}, \code{\link{GeneOverlap-class}}
49 | %% ~~objects to See Also as \code{\link{help}}, ~~~
50 | }
51 | \examples{
52 | data(GeneOverlap)
53 | gom.obj <- newGOM(hESC.ChIPSeq.list, hESC.RNASeq.list, gs.RNASeq)
54 | gom.obj
55 | drawHeatmap(gom.obj)
56 | }
57 | % Add one or more standard keywords, see file 'KEYWORDS' in the
58 | % R documentation directory.
59 | \keyword{ ~kwd1 }
60 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
61 |
--------------------------------------------------------------------------------
/R/GeneOverlap-accessors.R:
--------------------------------------------------------------------------------
1 | #### Read-only ####
2 | setMethod("getListA", "GeneOverlap", function(object) { object@listA } )
3 | setMethod("getListB", "GeneOverlap", function(object) { object@listB } )
4 | setMethod("getIntersection", "GeneOverlap",
5 | function(object) { object@intersection } )
6 | setMethod("getUnion", "GeneOverlap", function(object) { object@union } )
7 | setMethod("getGenomeSize", "GeneOverlap",
8 | function(object) { object@genome.size } )
9 | setMethod("getTested", "GeneOverlap", function(object) { object@is.tested } )
10 | setMethod(
11 | "getContbl", "GeneOverlap",
12 | function(object) {
13 | if(object@is.tested) {
14 | object@cont.tbl
15 | } else {
16 | warning("Test has not been performed yet.\n")
17 | matrix(nrow=0, ncol=0)
18 | }
19 | }
20 | )
21 | setMethod(
22 | "getPval", "GeneOverlap",
23 | function(object) {
24 | if(object@is.tested) {
25 | object@pval
26 | } else {
27 | warning("Test has not been performed yet.\n")
28 | NA
29 | }
30 | }
31 | )
32 | setMethod(
33 | "getOddsRatio", "GeneOverlap",
34 | function(object) {
35 | if(object@is.tested) {
36 | object@odds.ratio
37 | } else {
38 | warning("Test has not been performed yet.\n")
39 | NA
40 | }
41 | }
42 | )
43 | setMethod(
44 | "getJaccard", "GeneOverlap",
45 | function(object) {
46 | if(object@is.tested) {
47 | object@Jaccard
48 | } else {
49 | warning("Test has not been performed yet.\n")
50 | NA
51 | }
52 | }
53 | )
54 |
55 | #### Writable methods ####
56 | setReplaceMethod(
57 | "setListA", "GeneOverlap",
58 | function(object, value) {
59 | object@listA <- as.character(value)
60 | object@intersection <- intersect(object@listA, object@listB)
61 | object@union <- union(object@listA, object@listB)
62 | object@is.tested <- F
63 | validObject(object)
64 |
65 | object
66 | }
67 | )
68 | setReplaceMethod(
69 | "setListB", "GeneOverlap",
70 | function(object, value) {
71 | object@listB <- as.character(value)
72 | object@intersection <- intersect(object@listA, object@listB)
73 | object@union <- union(object@listA, object@listB)
74 | object@is.tested <- F
75 | validObject(object)
76 |
77 | object
78 | }
79 | )
80 | setReplaceMethod(
81 | "setGenomeSize", "GeneOverlap",
82 | function(object, value) {
83 | object@genome.size <- value
84 | object@is.tested <- F
85 | validObject(object)
86 |
87 | object
88 | }
89 | )
90 |
91 |
92 |
93 |
--------------------------------------------------------------------------------
/man/GeneOverlapMatrix.Rd:
--------------------------------------------------------------------------------
1 | \name{GeneOverlapMatrix}
2 | \alias{GeneOverlapMatrix-class}
3 | \alias{show,GeneOverlapMatrix-method}
4 | \alias{print,GeneOverlapMatrix-method}
5 | %- Also NEED an '\alias' for EACH other topic documented here.
6 | \title{
7 | Matrix representation of the pairwise overlaps between two gene sets
8 | }
9 | \description{
10 | Given one or two gene sets each contains one or more gene lists, create a
11 | matrix to represent all pairwise comparisons between the gene lists. This
12 | class also provides functions to visualize the matrix.
13 | }
14 | \usage{
15 | \S4method{show}{GeneOverlapMatrix}(object)
16 | \S4method{print}{GeneOverlapMatrix}(x, ...)
17 | }
18 | \arguments{
19 | \item{object}{A GeneOverlapMatrix object.}
20 | \item{x}{A GeneOverlapMatrix object.}
21 | \item{...}{They are not used.}
22 | }
23 | \details{
24 | The problem is stated as the representation of all pairwise comparisons
25 | between two gene sets each contains one or more gene lists. This is
26 | represented as a matrix where the rows correspond to one gene set and the
27 | columns correspond to the other. Each grid represents the overlap between
28 | two gene lists of the corresponding row and column. This class calls the
29 | GeneOverlap constructor to create objects that represent the overlapping
30 | information. When there is only one gene set, the matrix represents the
31 | self-comparison within the gene set and only the upper triangular matrix is
32 | used.
33 |
34 | The significance of gene overlap is characterized by two pieces of
35 | information: odds ratio and p-value. This class provides functions to
36 | visualize these information as a heatmap. The color gradients of each
37 | grid represents the odds ratio while the texts superimposed on the grids
38 | state the p-values. It is also possible to let the color gradients
39 | represent Jaccard index - a measurement of similarity between two sets.
40 | %% ~~ If necessary, more details than the description above ~~
41 | }
42 | \author{
43 | Li Shen <\email{li.shen@mssm.edu}>
44 |
45 | Mount Sinai profile:\url{http://www.mountsinai.org/profiles/li-shen}
46 |
47 | Personal:\url{http://www.linkedin.com/in/lshen/}
48 | %% ~~who you are~~
49 | }
50 |
51 | %% ~Make other sections like Warning with \section{Warning }{....} ~
52 |
53 | \seealso{
54 | \code{\link{GeneOverlap-class}}
55 | %% ~~objects to See Also as \code{\link{help}}, ~~~
56 | }
57 | \examples{
58 | data(GeneOverlap)
59 | gom.obj <- newGOM(hESC.ChIPSeq.list, hESC.RNASeq.list, gs.RNASeq)
60 | gom.obj
61 | print(gom.obj)
62 | drawHeatmap(gom.obj)
63 | }
64 | % Add one or more standard keywords, see file 'KEYWORDS' in the
65 | % R documentation directory.
66 | \keyword{ htest }
67 | \keyword{ hplot }
68 | \keyword{ graphs }% __ONLY ONE__ keyword per line
69 |
--------------------------------------------------------------------------------
/man/getReadonlyMatrix.Rd:
--------------------------------------------------------------------------------
1 | \name{getReadonlyMatrix}
2 | \alias{getGsetA}
3 | \alias{getGsetA,GeneOverlapMatrix-method}
4 | \alias{getGsetB}
5 | \alias{getGsetB,GeneOverlapMatrix-method}
6 | \alias{getSelfCompare}
7 | \alias{getSelfCompare,GeneOverlapMatrix-method}
8 | \alias{getMatrix}
9 | \alias{getMatrix,GeneOverlapMatrix-method}
10 | \alias{getNestedList}
11 | \alias{getNestedList,GeneOverlapMatrix-method}
12 | \alias{[}
13 | \alias{[,GeneOverlapMatrix-method}
14 |
15 | %- Also NEED an '\alias' for EACH other topic documented here.
16 | \title{
17 | Read-only accessors for the various slots of the GeneOverlapMatrix class
18 | %% ~~function to do ... ~~
19 | }
20 | \description{
21 | The gsetA and gsetB slots contain the gene set A and B as named lists. The
22 | self.compare slot contains the Boolean label for whether self-comparison is
23 | performed. Use getMatrix to retrieve intersection, union, Jaccard index,
24 | p-value and odds ratio as a matrix. Use getNestedList to retrieve the
25 | intersection and union gene lists and contingency tables as a nested list
26 | (outer list represents columns and inner list represents rows). Use
27 | brackets [] to retrieve a particular GeneOverlap object within the
28 | GeneOverlapMatrix object.
29 | %% ~~ A concise (1-5 lines) description of what the function does. ~~
30 | }
31 | \usage{
32 | \S4method{getGsetA}{GeneOverlapMatrix}(object)
33 | \S4method{getGsetB}{GeneOverlapMatrix}(object)
34 | \S4method{getSelfCompare}{GeneOverlapMatrix}(object)
35 | \S4method{getMatrix}{GeneOverlapMatrix}(object, name=c("pval",
36 | "odds.ratio", "intersection", "union", "Jaccard"))
37 | \S4method{getNestedList}{GeneOverlapMatrix}(object, name=c(
38 | "intersection", "union", "cont.tbl"))
39 | \S4method{[}{GeneOverlapMatrix}(x, i, j)
40 | }
41 | %- maybe also 'usage' for other objects documented here.
42 | \arguments{
43 | \item{object}{A GeneOverlapMatrix object.}
44 | \item{x}{A GeneOverlapMatrix object.}
45 | \item{name}{A string description of the information to retrive. Use pval and
46 | odds.ratio to retrieve p-values and odds ratios. Use Jaccard to retrieve
47 | Jaccard indices. In the context of matrix retrieval, intersection and union
48 | will return the numbers of genes. While in the case of nested list retrieval,
49 | intersection and union will return the actual gene lists. Use cont.tbl to
50 | retrieve the contingency tables.}
51 | \item{i, j}{Integer or character indices to retrieve GeneOverlap objects from
52 | the matrix.}
53 | }
54 | \details{
55 | When character indices are used, they should match the names of gsetA or
56 | gsetB.
57 | %% ~~ If necessary, more details than the description above ~~
58 | }
59 |
60 | %% ~Make other sections like Warning with \section{Warning }{....} ~
61 |
62 | \seealso{
63 | \code{\link{GeneOverlapMatrix-class}}
64 | %% ~~objects to See Also as \code{\link{help}}, ~~~
65 | }
66 | \examples{
67 | data(GeneOverlap)
68 | gom.obj <- newGOM(hESC.ChIPSeq.list, hESC.RNASeq.list, gs.RNASeq)
69 | getMatrix(gom.obj, "odds.ratio")
70 | inter.nl <- getNestedList(gom.obj, "intersection")
71 | str(inter.nl)
72 | go.k4.high <- gom.obj[1, 1]
73 | go.k4.high
74 | }
75 | % Add one or more standard keywords, see file 'KEYWORDS' in the
76 | % R documentation directory.
77 | \keyword{ ~kwd1 }
78 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
79 |
--------------------------------------------------------------------------------
/man/getReadonly.Rd:
--------------------------------------------------------------------------------
1 | \name{getReadonly}
2 | \alias{getIntersection}
3 | \alias{getIntersection,GeneOverlap-method}
4 | \alias{getUnion}
5 | \alias{getUnion,GeneOverlap-method}
6 | \alias{getTested}
7 | \alias{getTested,GeneOverlap-method}
8 | \alias{getContbl}
9 | \alias{getContbl,GeneOverlap-method}
10 | \alias{getPval}
11 | \alias{getPval,GeneOverlap-method}
12 | \alias{getOddsRatio}
13 | \alias{getOddsRatio,GeneOverlap-method}
14 | \alias{getJaccard}
15 | \alias{getJaccard,GeneOverlap-method}
16 | %- Also NEED an '\alias' for EACH other topic documented here.
17 | \title{
18 | Read-only accessors for the "intersection", "union", "is.tested",
19 | "cont.tbl", "pval", "odds.ratio", "Jaccard" slots of the GeneOverlap class
20 | %% ~~function to do ... ~~
21 | }
22 | \description{
23 | The intersection and union slots contain the gene names as character
24 | vectors. The is.tested slot contains a Boolean label indicating whether the
25 | object has been tested or not. The cont.tbl slot contains the contingency
26 | table as a matrix. The pval and odds.ratio slots contain the p-value and
27 | odds ratio as numerics, respectively. The Jaccard slot contains the Jaccard
28 | index as a numeric.
29 | %% ~~ A concise (1-5 lines) description of what the function does. ~~
30 | }
31 | \usage{
32 | \S4method{getIntersection}{GeneOverlap}(object)
33 | \S4method{getUnion}{GeneOverlap}(object)
34 | \S4method{getTested}{GeneOverlap}(object)
35 | \S4method{getContbl}{GeneOverlap}(object)
36 | \S4method{getPval}{GeneOverlap}(object)
37 | \S4method{getOddsRatio}{GeneOverlap}(object)
38 | \S4method{getJaccard}{GeneOverlap}(object)
39 | }
40 | %- maybe also 'usage' for other objects documented here.
41 | \arguments{
42 | \item{object}{
43 | A GeneOverlap object.
44 | %% ~~Describe \code{x} here~~
45 | }
46 | }
47 | \details{
48 | If the GeneOverlap object has not been tested yet, the returned Jaccard
49 | index, p-value and odds ratio will be NA, the contingency table will be an
50 | empty matrix.
51 | %% ~~ If necessary, more details than the description above ~~
52 | }
53 | \value{
54 | \item{intersection}{A character vector represents the overlapped genes.}
55 | \item{union}{A character vector represents the genes in the union of A and
56 | B.}
57 | \item{is.tested}{A Boolean represents whether the overlapping test has been
58 | performed or not.}
59 | \item{cont.tbl}{A matrix represents the contingency table.}
60 | \item{pval}{A numeric represents the significance of overlap.}
61 | \item{odds.ratio}{A numeric represents the odds ratio in comparison to the
62 | genomic background.}
63 | \item{Jaccard}{A numeric represents the Jaccard index between two sets.}
64 | %% ~Describe the value returned
65 | %% If it is a LIST, use
66 | %% \item{comp1 }{Description of 'comp1'}
67 | %% \item{comp2 }{Description of 'comp2'}
68 | %% ...
69 | }
70 |
71 | %% ~Make other sections like Warning with \section{Warning }{....} ~
72 |
73 | \examples{
74 | data(GeneOverlap)
75 | go.obj <- newGeneOverlap(hESC.ChIPSeq.list$H3K4me3,
76 | hESC.ChIPSeq.list$H3K27me3,
77 | genome.size=gs.RNASeq)
78 | go.obj <- testGeneOverlap(go.obj)
79 | head(getIntersection(go.obj))
80 | head(getUnion(go.obj))
81 | getTested(go.obj)
82 | getContbl(go.obj)
83 | getPval(go.obj)
84 | getOddsRatio(go.obj)
85 | getJaccard(go.obj)
86 | }
87 | % Add one or more standard keywords, see file 'KEYWORDS' in the
88 | % R documentation directory.
89 | \keyword{ ~kwd1 }
90 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
91 |
--------------------------------------------------------------------------------
/R/GeneOverlap-methods.R:
--------------------------------------------------------------------------------
1 | #### Display methods ####
2 | setMethod(
3 | "show", "GeneOverlap",
4 | function(object) {
5 | cat("GeneOverlap object:\n")
6 | cat(sprintf("listA size=%d\n", length(object@listA)))
7 | cat(sprintf("listB size=%d\n", length(object@listB)))
8 | cat(sprintf("Intersection size=%d\n", length(object@intersection)))
9 | if(object@is.tested) {
10 | cat(sprintf("Overlapping p-value=%s\n",
11 | ifelse(object@pval < .01,
12 | format(object@pval, scientific=T, digits=2),
13 | format(object@pval, digits=2)
14 | )))
15 | cat(sprintf("Jaccard Index=%.1f\n", object@Jaccard))
16 | } else {
17 | cat("Overlap testing has not been performed yet.\n")
18 | }
19 | }
20 | )
21 |
22 | setMethod(
23 | "print", "GeneOverlap",
24 | function(x, ...) {
25 | cat("Detailed information about this GeneOverlap object:\n")
26 | cat(sprintf("listA size=%d, e.g. %s\n",
27 | length(x@listA),
28 | paste(head(x@listA, n=3), collapse=" ")))
29 | cat(sprintf("listB size=%d, e.g. %s\n",
30 | length(x@listB),
31 | paste(head(x@listB, n=3), collapse=" ")))
32 | cat(sprintf("Intersection size=%d, e.g. %s\n",
33 | length(x@intersection),
34 | paste(head(x@intersection, n=3), collapse=" ")))
35 | cat(sprintf("Union size=%d, e.g. %s\n",
36 | length(x@union),
37 | paste(head(x@union, n=3), collapse=" ")))
38 | cat(sprintf("Genome size=%d\n", x@genome.size))
39 | if(x@is.tested) {
40 | cat("# Contingency Table:\n")
41 | print(x@cont.tbl)
42 | cat(sprintf("Overlapping p-value=%s\n",
43 | ifelse(x@pval < .01,
44 | format(x@pval, scientific=T, digits=2),
45 | format(x@pval, digits=2)
46 | )))
47 | cat(sprintf("Odds ratio=%.1f\n", x@odds.ratio))
48 | cat("Overlap tested using Fisher's exact test (alternative=greater)\n")
49 | cat(sprintf("Jaccard Index=%.1f\n", x@Jaccard))
50 | } else {
51 | cat("Overlap has not been tested yet. Use testGeneOverlap method.\n")
52 | }
53 | }
54 | )
55 |
56 | #### Test method ####
57 | setMethod(
58 | "testGeneOverlap", "GeneOverlap",
59 | function(object) {
60 | # Configure contingency table.
61 | sizeA <- length(object@listA)
62 | sizeB <- length(object@listB)
63 | object@cont.tbl <- matrix(c(object@genome.size - length(object@union),
64 | sizeB - length(object@intersection),
65 | sizeA - length(object@intersection),
66 | length(object@intersection)),
67 | ncol=2)
68 | rownames(object@cont.tbl) <- c('notB', 'inB')
69 | colnames(object@cont.tbl) <- c('notA', 'inA')
70 |
71 | # Perform Fisher's exact test.
72 | res.fisher <- try(fisher.test(object@cont.tbl, alternative='greater'),
73 | silent=TRUE)
74 | if(is.list(res.fisher)) {
75 | object@odds.ratio <- setNames(res.fisher$estimate, NULL)
76 | object@pval <- res.fisher$p.value
77 | } else {
78 | object@odds.ratio <- .0
79 | object@pval <- 1.
80 | }
81 |
82 | # Calculate Jaccard index.
83 | object@Jaccard <- ifelse(length(object@union) == 0, 0,
84 | length(object@intersection) /
85 | length(object@union)
86 | )
87 |
88 | object@is.tested <- T
89 | object
90 | }
91 | )
92 |
--------------------------------------------------------------------------------
/inst/unitTests/test_AllGeneOverlapMethods.R:
--------------------------------------------------------------------------------
1 | test_GeneOverlap <- function() {
2 | # Vanilla test.
3 | listA <- c("A", "B", "A")
4 | listB <- c("B", "C", "B")
5 | manual.contbl <- matrix(c(7, 1, 1, 1), nrow=2)
6 | fish.res <- fisher.test(manual.contbl, alternative="greater")
7 | go.obj <- newGeneOverlap(listA, listB, genome.size=10)
8 | checkEquals(getListA(go.obj), c("A", "B"))
9 | checkEquals(getListB(go.obj), c("B", "C"))
10 | checkEqualsNumeric(getGenomeSize(go.obj), 10)
11 | checkEqualsNumeric(length(getIntersection(go.obj)), 1)
12 | checkEqualsNumeric(length(getUnion(go.obj)), 3)
13 | checkEqualsNumeric(getPval(go.obj), NA)
14 | checkEqualsNumeric(getOddsRatio(go.obj), NA)
15 | go.obj <- testGeneOverlap(go.obj)
16 | checkEqualsNumeric(getPval(go.obj), fish.res$p.value)
17 | checkEqualsNumeric(getOddsRatio(go.obj), fish.res$estimate)
18 |
19 | # Gene lists contain invalid entries.
20 | listC <- c("A", NA, "B")
21 | go.obj <- newGeneOverlap(listC, listB, genome.size=10)
22 | checkEquals(getListA(go.obj), c("A", "B"))
23 | go.obj <- testGeneOverlap(go.obj)
24 | checkEqualsNumeric(getPval(go.obj), fish.res$p.value)
25 | checkEqualsNumeric(getOddsRatio(go.obj), fish.res$estimate)
26 |
27 | # Test NO overlap.
28 | listA <- c("A", "B")
29 | listB <- c("C", "D")
30 | go.obj <- testGeneOverlap(newGeneOverlap(listA, listB, genome.size=10))
31 | checkEqualsNumeric(getPval(go.obj), 1)
32 | checkEqualsNumeric(getOddsRatio(go.obj), 0)
33 |
34 | # Test absolute overlap (p-value=0).
35 | listA <- LETTERS
36 | listB <- LETTERS
37 | go.obj <- testGeneOverlap(newGeneOverlap(listA, listB, genome.size=100))
38 | checkEqualsNumeric(getPval(go.obj), 0)
39 | checkEqualsNumeric(getOddsRatio(go.obj), Inf)
40 |
41 | # Test empty gene list.
42 | go.obj <- testGeneOverlap(newGeneOverlap("A", NULL, genome.size=10))
43 | checkEqualsNumeric(getPval(go.obj), 1)
44 | checkEqualsNumeric(getOddsRatio(go.obj), 0)
45 | go.obj <- testGeneOverlap(newGeneOverlap(NULL, "B", genome.size=10))
46 | checkEqualsNumeric(getPval(go.obj), 1)
47 | checkEqualsNumeric(getOddsRatio(go.obj), 0)
48 | go.obj <- testGeneOverlap(newGeneOverlap(NULL, NULL, genome.size=10))
49 | checkEqualsNumeric(getPval(go.obj), 1)
50 | checkEqualsNumeric(getOddsRatio(go.obj), 0)
51 |
52 | # Unknown species.
53 | checkException(newGeneOverlap("A", "B", spec="speciesdoesnotexist"))
54 |
55 | # Genome smaller than gene lists combined.
56 | checkException(newGeneOverlap("A", "B", genome.size=1))
57 | }
58 |
59 | test_GeneOverlapMatrix <- function() {
60 | # Vanilla test.
61 | gv1 <- c("A", "B")
62 | gv2 <- c("B", "C")
63 | manual.contbl <- matrix(c(7, 1, 1, 1), nrow=2)
64 | fish.res <- fisher.test(manual.contbl, alternative="greater")
65 | gsetA <- list(A=gv1)
66 | gsetB <- list(B=gv2)
67 | gom.obj <- newGOM(gsetA, gsetB, genome.size=10)
68 | checkEquals(getGsetA(gom.obj), gsetA)
69 | checkEquals(getGsetB(gom.obj), gsetB)
70 | checkEquals(getSelfCompare(gom.obj), F)
71 | checkEqualsNumeric(getMatrix(gom.obj, "pval"), fish.res$p.value)
72 | checkEqualsNumeric(getMatrix(gom.obj, "odds.ratio"), fish.res$estimate)
73 | checkEqualsNumeric(getMatrix(gom.obj, "intersection"), 1)
74 | checkEqualsNumeric(getMatrix(gom.obj, "union"), 3)
75 | checkEquals(getNestedList(gom.obj, "intersection")[[1]][[1]], "B")
76 | checkEquals(getNestedList(gom.obj, "union")[[1]][[1]], c("A", "B", "C"))
77 | go.obj <- testGeneOverlap(newGeneOverlap(gv1, gv2, genome.size=10))
78 | checkEquals(gom.obj[1, 1], go.obj)
79 | checkEquals(gom.obj["A", "B"], go.obj)
80 |
81 | # Wrong inputs.
82 | checkException(newGOM(gsetA, matrix(c(1, 2, 3, 4), nrow=2)))
83 |
84 | # gsetA self-comparison not enough size.
85 | checkException(newGOM(gsetA))
86 | checkException(newGOM(list()))
87 |
88 | # gsetA cannot be empty.
89 | checkException(newGOM(list(), gsetB))
90 |
91 | # Accessing index out of boundary.
92 | checkException(gom.obj[2, 1])
93 | checkException(gom.obj[1, -2])
94 | checkException(gom.obj["C", 1])
95 | checkException(gom.obj[1, "C"])
96 |
97 | }
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
--------------------------------------------------------------------------------
/R/AllClasses.R:
--------------------------------------------------------------------------------
1 | #### GeneOverlap ####
2 | setClass(
3 | "GeneOverlap",
4 | representation(listA="character",
5 | listB="character",
6 | intersection="character",
7 | union="character",
8 | genome.size="numeric",
9 | cont.tbl="matrix",
10 | odds.ratio="numeric",
11 | pval="numeric",
12 | Jaccard="numeric",
13 | is.tested="logical"),
14 | validity=function(object) {
15 | if(length(object@listA) > 0 && is.na(object@listA)) {
16 | stop("listA cannot be NA. Check your input.")
17 | }
18 | if(length(object@listB) > 0 && is.na(object@listB)) {
19 | stop("listB cannot be NA. Check your input.")
20 | }
21 |
22 | if(length(object@union) > object@genome.size) {
23 | stop("Union must NOT be larger than genome size.")
24 | }
25 | }
26 | )
27 |
28 | # Constructor
29 | newGeneOverlap <- function(listA, listB, genome.size=NULL,
30 | spec=c('mm9.gene', 'hg19.gene', 'rn4.gene')) {
31 | listA <- unique(as.character(listA))
32 | listB <- unique(as.character(listB))
33 | listA <- listA[!is.na(listA)]
34 | listB <- listB[!is.na(listB)]
35 |
36 | # Setup genome size.
37 | if(is.null(genome.size)){
38 | spec <- match.arg(spec)
39 | genome.size <- switch(spec,
40 | mm9.gene=23000,
41 | hg19.gene=25000,
42 | rn4.gene=17000)
43 | }
44 | genome.size <- as.integer(genome.size)
45 |
46 | new("GeneOverlap", listA=listA, listB=listB,
47 | intersection=intersect(listA, listB),
48 | union=union(listA, listB),
49 | genome.size=genome.size, is.tested=F)
50 | }
51 |
52 |
53 | #### GeneOverlapMatrix ####
54 | setClass(
55 | "GeneOverlapMatrix",
56 | representation(gsetA="list",
57 | gsetB="list",
58 | self.compare="logical",
59 | go.nested.list="list"),
60 | validity=function(object) {
61 | if(length(object@gsetB) == 0) {
62 | stopifnot(length(object@gsetA) > 1 && object@self.compare)
63 | } else {
64 | stopifnot(length(object@gsetA) > 0 && !object@self.compare)
65 | }
66 |
67 | }
68 | )
69 |
70 | # Constructor.
71 | newGOM <- function(gsetA, gsetB=list(), genome.size=NULL,
72 | spec=c('mm9.gene', 'hg19.gene', 'rn4.gene')) {
73 | stopifnot(is.list(gsetA) && is.list(gsetB))
74 | # Construct GeneOverlap objects for all pairwise comparisons.
75 | if(length(gsetB) == 0) {
76 | stopifnot(length(gsetA) >= 2)
77 | self.compare <- T
78 | row.iter <- 1:(length(gsetA) - 1)
79 | col.iter <- 2:length(gsetA)
80 | go.nested.list <-
81 | lapply(col.iter, function(ci) {
82 | this.col <- lapply(row.iter, function(ri) {
83 | if(ri >= ci) {
84 | go.obj <- newGeneOverlap(NULL, NULL) # same list.
85 | testGeneOverlap(go.obj)
86 | } else {
87 | go.obj <- newGeneOverlap(gsetA[[ri]], gsetA[[ci]],
88 | genome.size, spec)
89 | testGeneOverlap(go.obj)
90 | }
91 | })
92 | names(this.col) <- names(gsetA)[row.iter]
93 | this.col
94 | })
95 | names(go.nested.list) <- names(gsetA)[col.iter]
96 | } else {
97 | stopifnot(length(gsetA) >= 1 && length(gsetB) >= 1)
98 | self.compare <- F
99 | go.nested.list <-
100 | lapply(gsetB, function(b) {
101 | this.col <- lapply(gsetA, function(a) {
102 | go.obj <- newGeneOverlap(a, b, genome.size, spec)
103 | testGeneOverlap(go.obj)
104 | })
105 | names(this.col) <- names(gsetA)
106 | this.col
107 | })
108 | names(go.nested.list) <- names(gsetB)
109 | }
110 |
111 | new("GeneOverlapMatrix", gsetA=gsetA, gsetB=gsetB,
112 | self.compare=self.compare, go.nested.list=go.nested.list)
113 | }
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
--------------------------------------------------------------------------------
/man/GeneOverlap.Rd:
--------------------------------------------------------------------------------
1 | \name{GeneOverlap}
2 | \alias{GeneOverlap-class}
3 | \alias{show,GeneOverlap-method}
4 | \alias{print,GeneOverlap-method}
5 | %- Also NEED an '\alias' for EACH other topic documented here.
6 | \title{
7 | Test overlap between two gene lists using Fisher's exact test.
8 | }
9 | \description{
10 | Given two gene lists, tests the significance of their overlap in comparison
11 | with a genomic background. The null hypothesis is that the odds ratio is no
12 | larger than 1. The alternative is that the odds ratio is larger than 1.0.
13 | It returns the p-value, estimated odds ratio and intersection.
14 | }
15 | \usage{
16 | \S4method{show}{GeneOverlap}(object)
17 | \S4method{print}{GeneOverlap}(x, ...)
18 | }
19 | \arguments{
20 | \item{object}{A GeneOverlap object.}
21 | \item{x}{A GeneOverlap object.}
22 | \item{...}{They are not used.}
23 | }
24 | \details{
25 | The problem of gene overlap testing can be described by a hypergeometric
26 | distribution where one gene list A defines the number of white balls in the
27 | urn and the other gene list B defines the number of white balls in the
28 | draw. Assume the total number of genes is \var{n}, the number of genes in A
29 | is \var{a} and the number of genes in B is \var{b}. If the intersection
30 | between A and B is \var{t}, the probability density of seeing \var{t} can
31 | be calculated as:
32 |
33 | \code{dhyper(t, a, n - a, b)}
34 |
35 | without loss of generality, we can assume \var{b} <= \var{a}. So the
36 | largest possible value for \var{t} is \var{b}. Therefore, the p-value of
37 | seeing intersection \var{t} is:
38 |
39 | \code{sum(dhyper(t:b, a, n - a, b))}
40 |
41 | The Fisher's exact test forms this problem slightly different but its
42 | calculation is also based on the hypergeometric distribution. It starts by
43 | constructing a contingency table:
44 |
45 | \code{matrix(c(n - union(A,B), setdiff(A,B),
46 | setdiff(B,A), intersect(A,B)),
47 | nrow=2)}
48 |
49 | It therefore tests the independence between A and B and is conceptually
50 | more straightforward. The GeneOverlap class is implemented using Fisher's
51 | exact test.
52 |
53 | It is better to illustrate a concept using some example. Let's assume we
54 | have a genome of size 200 and two gene lists with 70 and 30 genes each. If
55 | the intersection between the two is 10, the hypergeometric way to calculate
56 | the p-value is:
57 |
58 | sum(dhyper(10:30, 70, 130, 30))
59 |
60 | which gives us p-value 0.6561562. If we use Fisher's exact test, we should
61 | do:
62 |
63 | \code{fisher.test(matrix(c(110, 20, 60, 10), nrow=2),
64 | alternative="greater")}
65 |
66 | which gives exactly the same p-value. In addition, the Fisher's test
67 | function also provides an estimated odds ratio, confidence interval, etc.
68 |
69 | The Jaccard index is a measurement of similarity between two sets. It is
70 | defined as the number of intersections over the number of unions.
71 | %% ~~ If necessary, more details than the description above ~~
72 | }
73 | \references{
74 | \url{http://en.wikipedia.org/wiki/Fisher's_exact_test}
75 |
76 | \url{http://en.wikipedia.org/wiki/Jaccard_index}
77 | %% ~put references to the literature/web site here ~
78 | }
79 | \author{
80 | Li Shen <\email{li.shen@mssm.edu}>
81 |
82 | Mount Sinai profile:\url{http://www.mountsinai.org/profiles/li-shen}
83 |
84 | Personal:\url{http://www.linkedin.com/in/lshen/}
85 | %% ~~who you are~~
86 | }
87 | \note{
88 | Although Fisher's exact test is chosen for implementation, it should be
89 | noted that the R implementation of Fisher's exact test is slower than using
90 | \code{dhyper} directly. As an example, run:
91 |
92 | \code{system.time(sum(dhyper(10e3:30e3, 70e3, 130e3, 30e3)))}
93 |
94 | uses around 0.016s to finish. While run:
95 |
96 | \code{system.time(fisher.test(matrix(c(110e3, 20e3, 60e3, 10e3), nrow=2),
97 | alternative="greater"))}
98 |
99 | uses around 0.072s. In practice, this time difference can often be ignored.
100 | %% ~~further notes~~
101 | }
102 |
103 | %% ~Make other sections like Warning with \section{Warning }{....} ~
104 |
105 | \seealso{
106 | \code{\link{GeneOverlapMatrix-class}}
107 | %% ~~objects to See Also as \code{\link{help}}, ~~~
108 | }
109 | \examples{
110 | data(GeneOverlap)
111 | go.obj <- newGeneOverlap(hESC.ChIPSeq.list$H3K4me3,
112 | hESC.ChIPSeq.list$H3K9me3,
113 | gs.RNASeq)
114 | go.obj <- testGeneOverlap(go.obj)
115 | go.obj # show.
116 | print(go.obj) # more details.
117 | getContbl(go.obj) # contingency table.
118 | }
119 | % Add one or more standard keywords, see file 'KEYWORDS' in the
120 | % R documentation directory.
121 | \keyword{ htest }
122 | \keyword{ hplot }
123 | \keyword{ graphs }% __ONLY ONE__ keyword per line
124 |
--------------------------------------------------------------------------------
/R/GeneOverlapMatrix-methods.R:
--------------------------------------------------------------------------------
1 | #### show and print functions ####
2 | setMethod("show", "GeneOverlapMatrix",
3 | function(object) {
4 | gom.dim <- dim(getMatrix(object, "pval"))
5 | cat(sprintf("A <%d x %d> GeneOverlapMatrix object\n",
6 | gom.dim[1], gom.dim[2]))
7 | gsetA <- getGsetA(object)
8 | gsetB <- getGsetB(object)
9 | cat("Geneset A sizes:\n")
10 | print(sapply(gsetA, length))
11 | if(getSelfCompare(object)) {
12 | cat("Matrix is based on self-comparison of geneset A.\n")
13 | } else {
14 | cat("Geneset B sizes:\n")
15 | print(sapply(gsetB, length))
16 | }
17 | }
18 | )
19 |
20 | setMethod("print", "GeneOverlapMatrix",
21 | function(x, ...) {
22 | cat("A GeneOverlapMatrix object:\n")
23 |
24 | int.mat <- getMatrix(x, "intersection")
25 | cat("###### Intersection ######\n")
26 | print(int.mat)
27 |
28 | pval.mat <- getMatrix(x, "pval")
29 | cat("###### P-value ######\n")
30 | print(pval.mat)
31 |
32 | or.mat <- getMatrix(x, "odds.ratio")
33 | cat("###### Odds Ratio ######\n")
34 | print(or.mat)
35 |
36 | ja.mat <- getMatrix(x, "Jaccard")
37 | cat("###### Jaccard Index ######\n")
38 | print(ja.mat)
39 | }
40 | )
41 |
42 | #### Draw function ####
43 | setMethod(
44 | "drawHeatmap", "GeneOverlapMatrix",
45 | function(object, what=c("odds.ratio", "Jaccard"), log.scale=F, adj.p=F,
46 | cutoff=.05, ncolused=9, grid.col=c("Greens", "Blues", "Greys",
47 | "Oranges", "Purples", "Reds"),
48 | note.col="red") {
49 |
50 | # Arguments setup.
51 | stopifnot(cutoff > 0 && cutoff <= 1)
52 | what <- match.arg(what)
53 | grid.col <- match.arg(grid.col)
54 |
55 | # Matrix values.
56 | pv.mat <- getMatrix(object, "pval")
57 | plot.mat <- switch(what,
58 | odds.ratio=getMatrix(object, "odds.ratio"),
59 | Jaccard=getMatrix(object, "Jaccard")
60 | )
61 | if(what == "odds.ratio" && log.scale) {
62 | plot.mat <- log2(plot.mat)
63 | }
64 |
65 | # Adjust p-values if needed.
66 | pv.mask <- NULL
67 | if(object@self.compare) {
68 | pv.mask <- sapply(1:ncol(pv.mat), function(j) {
69 | c(rep(T, j), rep(F, nrow(pv.mat) - j))
70 | })
71 | }
72 | if(adj.p) {
73 | if(object@self.compare) {
74 | pv.mat[pv.mask] <- p.adjust(pv.mat[pv.mask], method='BH')
75 | } else {
76 | pv.mat <- matrix(p.adjust(pv.mat, method='BH'),
77 | nrow=nrow(pv.mat))
78 | }
79 | }
80 |
81 | # Marker value of insignificant events.
82 | insig.val <- 1
83 | if(what == "odds.ratio" && log.scale || what == "Jaccard") {
84 | insig.val <- 0
85 | }
86 |
87 | # Use p-value cutoff to mask insignificant cells.
88 | plot.mat[ pv.mat >= cutoff ] <- insig.val
89 |
90 | # Cell notes.
91 | note.mat <- format(pv.mat, digits=1)
92 | note.mat[pv.mat < .01] <- format(pv.mat, digits=1,
93 | scientific=T)[pv.mat < .01]
94 | note.mat[plot.mat == insig.val] <- "N.S."
95 | if(object@self.compare) { note.mat[ !pv.mask ] <- "--" }
96 |
97 | # Configure heatmap graphic properties.
98 | row_sep <- 1:(nrow(plot.mat) - 1)
99 | col_sep <- 1:(ncol(plot.mat) - 1)
100 | longedge <- max(nrow(plot.mat), ncol(plot.mat))
101 | row_cexrc <- 0.4 + 1/log10(longedge + 2)
102 | col_cexrc <- row_cexrc
103 | key_size <- 0.2 + 1 / log10(longedge + 4)
104 | margins_use <- c(max(nchar(colnames(plot.mat))) * 0.8 + 5,
105 | max(nchar(rownames(plot.mat))) * 0.8 + 5)
106 | main.txt <- switch(what,
107 | odds.ratio=ifelse(log.scale, "log2(Odds Ratio)",
108 | "Odds Ratio"),
109 | Jaccard="Jaccard Index")
110 | footnote <- "N.S.: Not Significant; --: Ignored"
111 | # sidenote <- sprintf("Log Scale=%s", log.scale)
112 |
113 | # Draw the heatmap!
114 | heatmap.2(plot.mat, cellnote=note.mat,
115 | main=main.txt, xlab=footnote, # ylab=sidenote,
116 | col=brewer.pal(ncolused, grid.col), notecol=note.col,
117 | margins=margins_use, colsep=col_sep, rowsep=row_sep,
118 | key=T, keysize=key_size,
119 | cexRow=row_cexrc, cexCol=col_cexrc,
120 | scale='none', Colv=NA, Rowv=NA, trace='none',
121 | dendrogram='none', density.info='none',
122 | sepcolor='white', sepwidth=c(0.002,0.002),
123 | notecex=1.6)
124 | }
125 | )
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
--------------------------------------------------------------------------------
/vignettes/GeneOverlap.Rnw:
--------------------------------------------------------------------------------
1 | %\VignetteIndexEntry{Testing and visualizing gene overlaps with the "GeneOverlap" package}
2 | %\VignettePackage{GeneOverlap}
3 |
4 | \documentclass{article}
5 |
6 | <