├── .Rbuildignore
├── tests
    └── annotate_unit_tests.R
├── R
    ├── test_annotate_package.R
    ├── AllGenerics.R
    ├── compatipleVersions.R
    ├── ACCNUMStats.R
    ├── pmid2MIAME.R
    ├── zzz.R
    ├── LL2homology.R
    ├── getData.R
    ├── getSeq4ACC.R
    ├── serializeEnv.R
    ├── readGEOAnn.R
    ├── AnnMaps.R
    ├── getPMInfo.R
    ├── html.R
    ├── blastSequences.R
    ├── homoData.R
    ├── Amat.R
    ├── isValidKey.R
    ├── GOhelpers.R
    ├── findNeighbors.R
    └── pubMedAbst.R
├── data
    ├── hgu95All.rda
    ├── hgByChroms.rda
    ├── hgCLengths.rda
    ├── hgu95Achrom.rda
    ├── hgu95Asym.rda
    ├── hgu95Achroloc.rda
    └── hgu95AProbLocs.rda
├── vignettes
    ├── OLDmake
    ├── frames.png
    ├── DPChart.eps
    ├── DPChart.png
    ├── noframes.png
    ├── annotate.Rfc
    ├── annotate.bib
    ├── chromLOC.Rmd
    ├── useProbeInfo.Rnw
    └── prettyOutput.Rnw
├── inst
    ├── misc
    │   └── pmLinkGen.pdf
    ├── unitTests
    │   └── test_getAnnMap.R
    └── NEWS.Rd
├── man
    ├── hgCLengths.Rd
    ├── makeAnchor.Rd
    ├── pmid2MIAME.Rd
    ├── pm.titles.Rd
    ├── mapOrgs.Rd
    ├── hgByChroms.Rd
    ├── pmidQuery.Rd
    ├── getEvidence.Rd
    ├── buildPubMedAbst.Rd
    ├── hgu95Asym.Rd
    ├── hgu95AProbLocs.Rd
    ├── hgu95All.Rd
    ├── hasGOannote.Rd
    ├── hgu95Achroloc.Rd
    ├── organism.Rd
    ├── hgu95Achrom.Rd
    ├── buildChromLocation.Rd
    ├── aqListGOIDs.Rd
    ├── compatibleVersions.Rd
    ├── usedChromGenes.Rd
    ├── getPMInfo.Rd
    ├── PWAmat.Rd
    ├── entrezGeneByID.Rd
    ├── entrezGeneQuery.Rd
    ├── filterGOByOntology.Rd
    ├── annPkgName.Rd
    ├── pm.abstGrep.Rd
    ├── PMIDAmat.Rd
    ├── UniGeneQuery.Rd
    ├── getOntology.Rd
    ├── getSeq4Acc.Rd
    ├── dropECode.Rd
    ├── accessionToUID.Rd
    ├── GO2heatmap.Rd
    ├── ACCNUMStats.Rd
    ├── GOmnplot.Rd
    ├── updateSymbolsToValidKeys.Rd
    ├── pm.getabst.Rd
    ├── serializeEnv.Rd
    ├── pubmed.Rd
    ├── isValidkey.Rd
    ├── getGOTerm.Rd
    ├── getAnnMap.Rd
    ├── genbank.Rd
    ├── HTMLPage-class.Rd
    ├── readGEOAnn.Rd
    ├── homoData-class.Rd
    ├── setRepository.Rd
    ├── pmAbst2HTML.Rd
    ├── pubMedAbst-class.Rd
    ├── blastSequences.Rd
    ├── getSYMBOL.Rd
    ├── LL2homology.Rd
    ├── chromLocation-class.Rd
    ├── getTDRows.Rd
    ├── findNeighbors.Rd
    ├── htmlpage.Rd
    └── chrCats.Rd
├── DESCRIPTION
├── TODO
└── NAMESPACE


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | debian
2 | 


--------------------------------------------------------------------------------
/tests/annotate_unit_tests.R:
--------------------------------------------------------------------------------
1 | BiocGenerics:::testPackage("annotate")
2 | 


--------------------------------------------------------------------------------
/R/test_annotate_package.R:
--------------------------------------------------------------------------------
1 | .test <- function() BiocGenerics:::testPackage("annotate")
2 | 


--------------------------------------------------------------------------------
/data/hgu95All.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/annotate/devel/data/hgu95All.rda


--------------------------------------------------------------------------------
/data/hgByChroms.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/annotate/devel/data/hgByChroms.rda


--------------------------------------------------------------------------------
/data/hgCLengths.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/annotate/devel/data/hgCLengths.rda


--------------------------------------------------------------------------------
/data/hgu95Achrom.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/annotate/devel/data/hgu95Achrom.rda


--------------------------------------------------------------------------------
/data/hgu95Asym.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/annotate/devel/data/hgu95Asym.rda


--------------------------------------------------------------------------------
/vignettes/OLDmake:
--------------------------------------------------------------------------------
1 | chromLocs.dvi: chromLocs.tex
2 | 	latex chromLocs
3 | 	latex chromLocs
4 | 
5 | 


--------------------------------------------------------------------------------
/vignettes/frames.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/annotate/devel/vignettes/frames.png


--------------------------------------------------------------------------------
/data/hgu95Achroloc.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/annotate/devel/data/hgu95Achroloc.rda


--------------------------------------------------------------------------------
/vignettes/DPChart.eps:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/annotate/devel/vignettes/DPChart.eps


--------------------------------------------------------------------------------
/vignettes/DPChart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/annotate/devel/vignettes/DPChart.png


--------------------------------------------------------------------------------
/vignettes/noframes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/annotate/devel/vignettes/noframes.png


--------------------------------------------------------------------------------
/data/hgu95AProbLocs.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/annotate/devel/data/hgu95AProbLocs.rda


--------------------------------------------------------------------------------
/inst/misc/pmLinkGen.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/annotate/devel/inst/misc/pmLinkGen.pdf


--------------------------------------------------------------------------------
/R/AllGenerics.R:
--------------------------------------------------------------------------------
1 | ## S4 methods so we can use non-package based annotation databases (e.g., from AnnotationHub)
2 | ## as if they were installed packages
3 | 
4 | setGeneric("isValidKey", function(ids, pkg) standardGeneric("isValidKey"))
5 | 
6 | setGeneric("allValidKeys", function(pkg) standardGeneric("allValidKeys"))
7 | 


--------------------------------------------------------------------------------
/inst/unitTests/test_getAnnMap.R:
--------------------------------------------------------------------------------
 1 | ## Set up 
 2 | require(org.Hs.eg.db)
 3 | require(annotate)
 4 | require(RUnit)
 5 | 
 6 | ## For testing 
 7 | test_getAnnMap <- function(){
 8 |   ## test for a map that exist
 9 |   map <- getAnnMap("CHRLOC","org.Hs.eg.db")
10 |   checkTrue( class(map) == "AnnDbMap" )
11 |   ## and test for a map that does not (but which is available via select)
12 |   map2 <- getAnnMap("ONTOLOGY","org.Hs.eg.db")
13 |   checkTrue( class(map2) == "FlatBimap" )
14 | }
15 | 


--------------------------------------------------------------------------------
/man/hgCLengths.Rd:
--------------------------------------------------------------------------------
 1 | \name{hgCLengths}
 2 | \alias{hgCLengths}
 3 | \non_function{}
 4 | \title{ A dataset which contains the lengths (in base pairs) of the
 5 |   human chromosomes. }
 6 | \usage{data(hgCLengths)}
 7 | \format{A vector containing 24 values, each corresponding to the total
 8 | chromosome length. }
 9 | \description{
10 |   The data is described above.
11 | }
12 | \source{
13 |   UCSC Human Genome Project
14 | }
15 | \examples{
16 |   data(hgCLengths)
17 | }
18 | \keyword{datasets}
19 | 


--------------------------------------------------------------------------------
/man/makeAnchor.Rd:
--------------------------------------------------------------------------------
 1 | \name{makeAnchor}
 2 | \alias{makeAnchor}
 3 | \title{A Function To Generate HTML Anchors}
 4 | \description{
 5 |   This function will take a set of links and titles and will generate
 6 |   HTML anchor tags out of these values
 7 | }
 8 | \usage{
 9 | makeAnchor(link, title, toMain = FALSE)
10 | }
11 | \arguments{
12 |   \item{link}{A vector of URLs}
13 |   \item{title}{A vector of website names}
14 |   \item{toMain}{Used for frame pages}
15 | }
16 | \value{
17 |   A vector of HTML anchor tags
18 | }
19 | \author{Jeff Gentry}
20 | \examples{
21 | makeAnchor("http://www.bioconductor.org","Bioconductor")
22 | }
23 | \keyword{utilities}
24 | 


--------------------------------------------------------------------------------
/man/pmid2MIAME.Rd:
--------------------------------------------------------------------------------
 1 | \name{pmid2MIAME}
 2 | \alias{pmid2MIAME}
 3 | \title{use web to populate MIAME instance with pubmed details  }
 4 | \description{use web to populate MIAME instance with pubmed details
 5 | }
 6 | \usage{
 7 | pmid2MIAME(pmid)
 8 | }
 9 | \arguments{
10 |   \item{pmid}{ string encoding PMID }
11 | }
12 | \details{
13 | uses XML library to decode parts of the query response and
14 | load a MIAME object
15 | }
16 | \value{
17 | An instance of class \code{\link[Biobase:MIAME-class]{MIAME}}
18 | }
19 | \author{Vince Carey <stvjc@channing.harvard.edu>}
20 | 
21 | \examples{
22 | if (interactive()) pmid2MIAME("9843569")
23 | }
24 | \keyword{ models }
25 | 


--------------------------------------------------------------------------------
/R/compatipleVersions.R:
--------------------------------------------------------------------------------
 1 | # Checks the DESCRIPTION file to see if the packages whose names are
 2 | # passed have the same version number
 3 | 
 4 | compatibleVersions <- function(...){
 5 |     pkgs <- list(...)
 6 |     versions <- NULL
 7 |     for(i in pkgs){
 8 |         options(show.error.messages = FALSE, warn = -1)
 9 |         versions <- try(c(versions, packageDescription(i)[["Version"]]))
10 |         options(show.error.messages = TRUE, warn = 0)
11 | 
12 |         if(inherits(versions, "try-error")){
13 |             stop(paste("Package", i, "is not in the library"))
14 |         }
15 |     }
16 | 
17 |     if(length(unique(versions)) == 1){
18 |         return(TRUE)
19 |     }else{
20 |         return(FALSE)
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/man/pm.titles.Rd:
--------------------------------------------------------------------------------
 1 | \name{pm.titles}
 2 | \alias{pm.titles}
 3 | \title{Obtain the titles of the PubMed abstracts.}
 4 | \description{
 5 |  This function returns the titles from a list of PubMed abstracts.
 6 | }
 7 | \usage{
 8 | pm.titles(absts)
 9 | }
10 | \arguments{
11 |   \item{absts}{The list of PubMed abstracts.}
12 | }
13 | \details{
14 |   It simply uses \code{sapply}.
15 | }
16 | \value{
17 | A character vector of length equal to the number of abstracts. Each
18 | element is the title of the corresponding abstract.
19 | }
20 | \author{Robert Gentleman}
21 | \seealso{\code{\link{pm.abstGrep}}}
22 | \examples{
23 |   library("hgu95av2.db")
24 |   hoxa9 <- "37806_at"
25 |   absts <- pm.getabst(hoxa9, "hgu95av2")
26 |   pm.titles(absts)[[1]][[1]]
27 | }
28 | \keyword{manip}
29 | 


--------------------------------------------------------------------------------
/man/mapOrgs.Rd:
--------------------------------------------------------------------------------
 1 | \name{mapOrgs}
 2 | \alias{mapOrgs}
 3 | \alias{getOrgNameNCode}
 4 | \title{Functions to map to organism IDs used by NCBI homology.}
 5 | \description{
 6 |  These functions help map to organism identifiers used at the NCBI.
 7 | }
 8 | \usage{
 9 | mapOrgs(toMap, what = c("code","name"))
10 | getOrgNameNCode()
11 | }
12 | \arguments{
13 |   \item{toMap}{\code{vect} a vector of character strings}
14 |   \item{what}{\code{what} a character string that can either be "code"
15 |     or "name".}
16 | }
17 | \details{
18 |   mapOrgs converts organism codes to scientific names.
19 | }
20 | \value{
21 |   mapOrgs returns a vector of character strings.
22 | }
23 | \references{\url{ftp://ftp.ncbi.nih.gov/pub/HomoloGene/README}}
24 | \author{Jianhua Zhang}
25 | 
26 | \keyword{manip}
27 | 


--------------------------------------------------------------------------------
/man/hgByChroms.Rd:
--------------------------------------------------------------------------------
 1 | \name{hgByChroms}
 2 | \alias{hgByChroms}
 3 | \non_function{}
 4 | \title{ A dataset to show the human genome base pair locations per
 5 |   chromosome. }
 6 | \usage{data(hgByChroms)}
 7 | \format{
 8 |   A list, with the names consisting of the names of the chromosomes in
 9 |   the human genome (thus 24 elements).  Each element consists of a named
10 |   vector of +/- values - where each value represents the location of a
11 |   base pair (the numeric value is the location, while the +/- denotes
12 |   the strand value), with the name providing the name of the base pair.
13 | }
14 | \description{
15 |   The data is described above.
16 | }
17 | \source{
18 |   Cheng Li of the Dana-Farber Cancer Institute.
19 | }
20 | \examples{
21 |  data(hgByChroms)
22 | }
23 | \keyword{datasets}
24 | 


--------------------------------------------------------------------------------
/R/ACCNUMStats.R:
--------------------------------------------------------------------------------
 1 | ACCNUMStats <- function(pkgName){
 2 |     accMap <- getAnnMap("ACCNUM", pkgName)
 3 |     accs <- as.list(accMap)
 4 |     return(table(unlist(sapply(accs, whatACC))))
 5 | }
 6 | 
 7 | whatACC <- function(accs){
 8 |     if(is.na(accs[1])){
 9 |         return("NA")
10 |     }
11 |     accs <- strsplit(accs, ";")
12 |     if(regexpr("^[a-zA-Z]{2}\\.[0-9]+$", accs[1]) > 0){
13 |         return("UniGene")
14 |     }
15 |     if(regexpr("^(NP_)|(NG_)|(NM_)|(NC_)|(XR_)|(XM_)|(XP_)[0-9]+[._]?[0-9]?$",
16 |                accs[1]) > 0){
17 |         return("RefSeq")
18 |     }
19 |     if(regexpr("^[A-Z]+[0-9]+[._]?[0-9]$", accs[1]) > 0){
20 |         return("GBAcc")
21 |     }
22 |     if(regexpr("^[0-9]+$", accs[1]) > 0){
23 |         return("Image")
24 |     }
25 | 
26 |     return("Unknown")
27 | }
28 | 
29 | 


--------------------------------------------------------------------------------
/man/pmidQuery.Rd:
--------------------------------------------------------------------------------
 1 | \name{pmidQuery}
 2 | \alias{pmidQuery}
 3 | \title{A function to query PubMed}
 4 | \description{
 5 |   Given a PMID, will create a URL which can be used to open a
 6 |   browser and retrieve the specified information from PubMed.
 7 | }
 8 | \usage{
 9 | pmidQuery(query)
10 | }
11 | \arguments{
12 |   \item{query}{ The PubMed ID (or IDs)}
13 | }
14 | \details{
15 |   Using ublished details from NCBI we construct an appropriate string
16 |   for directing a web browser to the information available at the NCBI.
17 | }
18 | \value{
19 |   A character string containing the appropriate URL
20 | }
21 | \references{NCBI, \url{https://www.ncbi.nih.gov/} }
22 | \author{Jeff Gentry}
23 | 
24 | \seealso{\code{\link{UniGeneQuery}}}
25 | 
26 | \examples{
27 |   a <- "9695952"
28 |   pmidQuery(a)
29 | }
30 | \keyword{interface}
31 | 
32 | 


--------------------------------------------------------------------------------
/man/getEvidence.Rd:
--------------------------------------------------------------------------------
 1 | \name{getEvidence}
 2 | \alias{getEvidence}
 3 | \title{Get the Evidence codes for a set of GO terms.}
 4 | \description{
 5 |   For each mapping of a gene to a GO term there are a set of evidence
 6 |   codes that are used. Genes can be mapped using one, or more evidence
 7 |   codes and this function obtains the evidence codes for all genes
 8 |   provided in the input list.
 9 | }
10 | \usage{
11 | getEvidence(inlist)
12 | }
13 | \arguments{
14 |   \item{inlist}{A list of GO identifers.}
15 | }
16 | \value{
17 |   A \code{list} of the same length as the input list, each element is a
18 |   vector of evidence codes.
19 | }
20 | \author{R. Gentleman}
21 | 
22 | \seealso{\code{\link{getOntology}}, \code{\link{dropECode}}}
23 | 
24 | \examples{
25 |  library("hgu95av2.db")
26 |  bb <- hgu95av2GO[["39613_at"]]
27 |  getEvidence(bb)
28 | }
29 | \keyword{manip}
30 | 


--------------------------------------------------------------------------------
/man/buildPubMedAbst.Rd:
--------------------------------------------------------------------------------
 1 | \name{buildPubMedAbst}
 2 | \alias{buildPubMedAbst}
 3 | \title{A function to generate an instantiation of a pubMedAbst class }
 4 | \description{
 5 |   This function will take in a XML tree object and will create an
 6 |   instance of a pubMedAbst class.  This instance is returned to the caller.
 7 | }
 8 | \usage{
 9 |   buildPubMedAbst(xml)
10 | }
11 | \arguments{
12 |   \item{xml}{A XMLTree object that corresponds to a Pubmed abstract.}
13 | }
14 | \value{
15 |   This function returns an instantiation of a pubMedAbst object to the
16 |   caller.
17 | }
18 | 
19 | \author{ Jeff Gentry }
20 | \seealso{\code{\link{pubmed}},\code{\link{genbank}}}
21 | \examples{
22 |    x <- pubmed("9695952","8325638","8422497")
23 |    a <- xmlRoot(x)
24 |    numAbst <- length(xmlChildren(a))
25 |    absts <- list()
26 |    for (i in 1:numAbst) {
27 |       absts[[i]] <- buildPubMedAbst(a[[i]])
28 |    }
29 | 
30 | }
31 | \keyword{ utilities }
32 | 


--------------------------------------------------------------------------------
/man/hgu95Asym.Rd:
--------------------------------------------------------------------------------
 1 | \name{hgu95Asym}
 2 | \non_function{}
 3 | \docType{data}
 4 | \alias{hgu95Asym}
 5 | 
 6 | \title{Annotation data for the Affymetrix HGU95A GeneChip}
 7 | 
 8 | \usage{data(hgu95Asym)}
 9 | 
10 | \format{
11 |   This data set provides an environment with mappings from the Affymetrix
12 |   identifiers to gene symbol.
13 |   The environment functions like a hashtables and can be accessed using
14 |   \code{mget}.
15 |   If the returned value is \code{NA} then the current 
16 |   mapping was unable to identify this. Mappings and data sources are
17 |   constantly evolving so updating often is recommended.
18 | }
19 | \description{Data, in the form of environments for the Affymetrix U95A
20 |  chip.}
21 | 
22 | \source{The \code{AnnBuilder} package.}
23 | 
24 | \examples{
25 |  data(hgu95Asym)
26 |  data(sample.ExpressionSet)
27 |  mget(featureNames(sample.ExpressionSet)[330:340], env=hgu95Asym, ifnotfound=NA)
28 | }
29 | \keyword{datasets}
30 | 


--------------------------------------------------------------------------------
/man/hgu95AProbLocs.Rd:
--------------------------------------------------------------------------------
 1 | \name{hgu95AProbLocs}
 2 | \docType{methods}
 3 | \alias{hgu95AProbLocs}
 4 | \title{chromLocation instance hgu95AProbLocs, an example of a chromLocation 
 5 | object}
 6 | \description{  gives chromosome locations for Affy U95 probes }
 7 | \section{Slots}{
 8 |   \describe{
 9 |     \item{\code{species}:}{Object of class character, value: 'Human'}
10 |     \item{\code{datSource}:}{Object of class character, value}
11 |     \item{\code{nChrom}:}{Object of class numeric, value: 24}
12 |     \item{\code{chromNames}:}{Object of class character, value: 1:22, X,Y}
13 |     \item{\code{chromLocs}:}{Object of class list, value: long: sense and antisense locations
14 |       associated with affy identifiers}
15 |     \item{\code{chromLengths}:}{Object of class numeric, }
16 |     \item{\code{geneToChrom}:}{Object of class environment}
17 |     \item{\code{class}:}{Object of class character, value: 'chromLocation'}
18 |   }
19 | }
20 | \keyword{methods}
21 | 


--------------------------------------------------------------------------------
/man/hgu95All.Rd:
--------------------------------------------------------------------------------
 1 | \name{hgu95All}
 2 | \non_function{}
 3 | \docType{data}
 4 | \alias{hgu95All}
 5 | 
 6 | \title{Annotation data for the Affymetrix HGU95A GeneChip}
 7 | 
 8 | \usage{data(hgu95All)}
 9 | \description{Data, in the form of environments for the Affymetrix U95A
10 |  chip.}
11 | 
12 | 
13 | \format{
14 |   These data sets provide environments with mappings from the Affymetrix
15 |   identifiers to Entrez Gene identifiers.
16 |   The environment functions like a hashtable and can be accessed using
17 |   \code{mget}.
18 |   If the returned value is \code{NA} then the current 
19 |   mapping was unable to identify this. Mappings and data sources are
20 |   constantly evolving so updating often is recommended.
21 | }
22 | 
23 | \source{The \code{AnnBuilder} package.}
24 | 
25 | \examples{
26 |  data(hgu95All)
27 |  data(sample.ExpressionSet)
28 |  mget(featureNames(sample.ExpressionSet)[330:340], env=hgu95All, ifnotfound=NA)
29 | 
30 | }
31 | \keyword{datasets}
32 | 


--------------------------------------------------------------------------------
/man/hasGOannote.Rd:
--------------------------------------------------------------------------------
 1 | \name{hasGOannote}
 2 | \alias{hasGOannote}
 3 | \title{Check for GO annotation}
 4 | \description{
 5 |   Given a GO term, or a vector of GO terms and an ontology this function
 6 |   determines which of the terms have GO annotation in the specified
 7 |   ontology.
 8 | }
 9 | \usage{
10 | hasGOannote(x, which="MF")
11 | }
12 | \arguments{
13 |   \item{x}{A character vector, an instance of the \code{GOTerms} class
14 |   or a list of \code{GOTerms}.}
15 |   \item{which}{One of "MF", "BP" or "CC"}
16 | }
17 | \details{
18 |   The available GO annotation is searched and a determination of
19 |   whether a specific GO identifier has a value in the specified
20 |   ontology is made.
21 | }
22 | \value{
23 |   A logical vector of the same length as \code{x}.
24 | }
25 | \author{R. Gentleman}
26 | 
27 | \seealso{\code{\link{get}}}
28 | 
29 | \examples{
30 |  library("GO.db")
31 |  t1 <- "GO:0003680"
32 |  hasGOannote(t1)
33 |  hasGOannote(t1, "BP")
34 | }
35 | \keyword{manip}
36 | 


--------------------------------------------------------------------------------
/man/hgu95Achroloc.Rd:
--------------------------------------------------------------------------------
 1 | \name{hgu95Achroloc}
 2 | \alias{hgu95Achroloc}
 3 | \non_function{}
 4 | 
 5 | \title{Annotation data for the Affymetrix HGU95A GeneChip}
 6 | \usage{data(hgu95Achroloc)}
 7 | 
 8 | \format{
 9 |   These data sets provide environments with mappings from the Affymetrix
10 |   identifiers to chromosomal location, in bases.
11 |   The environments function like hashtables and can be accessed using
12 |   \code{mget}.
13 |   If the returned value is \code{NA} then the current 
14 |   mapping was unable to identify this. Mappings and data sources are
15 |   constantly evolving so updating often is recommended.
16 | 
17 | }
18 | \description{Data, in the form of environments for the Affymetrix U95A
19 |  chip.}
20 | 
21 | \source{The \code{AnnBuilder} package.}
22 | 
23 | \examples{
24 |  data(hgu95Achroloc)
25 |  data(sample.ExpressionSet)
26 |  mget(featureNames(sample.ExpressionSet)[330:340], env=hgu95Achroloc, 
27 |        ifnotfound=NA)
28 | 
29 | }
30 | \keyword{datasets}
31 | 


--------------------------------------------------------------------------------
/man/organism.Rd:
--------------------------------------------------------------------------------
 1 | \name{organism}
 2 | \alias{organism}
 3 | \alias{organism,character-method}
 4 | \title{Convenience function for getting the organism from an object or package}
 5 | 
 6 | \description{
 7 |   The most basic organism method just takes a character string (which
 8 |   represents a particular annotation package) and returns the organism
 9 |   that said package is based upon.
10 | }
11 | 
12 | \usage{
13 |   organism(object)
14 | }
15 | 
16 | \arguments{
17 |   \item{object}{a character string that names a package}
18 | }
19 | 
20 | \value{
21 |   The name of the organism used for this package or object
22 | }
23 |   
24 | 
25 | \author{Marc Carlson}
26 | 
27 | \examples{
28 |   require(hgu95av2.db)
29 |   ## get the organism for this annotation package
30 |   organism("hgu95av2")
31 | 
32 |   ## get the organism this object refers to
33 |   ## (for a ChromLocation object)
34 |   z <- buildChromLocation("hgu95av2")
35 |   organism(z)
36 | 
37 | }
38 | 
39 | \keyword{manip}
40 | 
41 | 


--------------------------------------------------------------------------------
/man/hgu95Achrom.Rd:
--------------------------------------------------------------------------------
 1 | \name{hgu95Achrom}
 2 | \non_function{}
 3 | \docType{data}
 4 | \alias{hgu95Achrom}
 5 | 
 6 | \title{Annotation data for the Affymetrix HGU95A GeneChip}
 7 | 
 8 | \usage{data(hgu95Achrom)
 9 | }
10 | 
11 | \format{
12 |   This data set provides an environment (treat as a hashtable)
13 |   with mappings from the Affymetrix
14 |   identifiers to chromosome number/name.
15 |   The environment functions like a hashtable and can be accessed using
16 |   \code{mget}.
17 |   If the returned value is \code{NA} then the current 
18 |   mapping was unable to identify this. Mappings and data sources are
19 |   constantly evolving so updating often is recommended.
20 | }
21 | \description{Data, in the form of environments for the Affymetrix U95A
22 |  chip.}
23 | 
24 | \source{The \code{AnnBuilder} package.}
25 | 
26 | \examples{
27 |  data(hgu95Achrom)
28 |  data(sample.ExpressionSet)
29 |  mget(featureNames(sample.ExpressionSet)[330:340], env=hgu95Achrom, ifnotfound=NA)
30 | }
31 | \keyword{datasets}
32 | 


--------------------------------------------------------------------------------
/man/buildChromLocation.Rd:
--------------------------------------------------------------------------------
 1 | \name{buildChromLocation}
 2 | \alias{buildChromLocation}
 3 | \title{A function to generate an instantiation of a chromLocation class}
 4 | \description{
 5 |   This function will take the name of a data package and build a
 6 |   chromLocation object representing that data set.
 7 | }
 8 | \usage{
 9 |   buildChromLocation(dataPkg)
10 | }
11 | \arguments{
12 |   \item{dataPkg}{The name of the data package to be used}
13 | }
14 | \details{
15 |   The requested data set must be available in the user's
16 |   \code{.libPaths()}, and the function will throw an error if this is
17 |   not the case.
18 | 
19 |   If the data package is present, the necessary information will be
20 |   extracted from the data package and a \code{chromLocation} object will
21 |   be created.
22 | }
23 | \value{
24 |   A \code{chromLocation} object representing the specified data set.
25 | }
26 | \author{Jeff Gentry}
27 | \examples{
28 |   library("hgu95av2.db")
29 |   z <- buildChromLocation("hgu95av2")
30 | }
31 | \keyword{utilities}
32 | 


--------------------------------------------------------------------------------
/man/aqListGOIDs.Rd:
--------------------------------------------------------------------------------
 1 | \name{aqListGOIDs}
 2 | \alias{aqListGOIDs}
 3 | 
 4 | \title{List GO Identifiers by GO Ontology}
 5 | \description{
 6 |   This function returns a character vector of all GO identifiers in the
 7 |   specified ontologies: Biological Process (BP), Cellular Component
 8 |   (CC), Molecular Function (MF).
 9 | }
10 | \usage{
11 | aqListGOIDs(ont)
12 | }
13 | 
14 | \arguments{
15 |   \item{ont}{A character vector specifying the two-letter codes of the
16 |     ontologies from which all GO IDs will be retrieved.  Entries must be
17 |   one of \code{"BP"}, \code{"CC"}, or \code{"MF"}.}
18 | }
19 | 
20 | \value{
21 |   A character vector of GO IDs.  The vector will contain all GO IDs in
22 |   the GO ontologies specified by the \code{ont} argument.
23 | }
24 | 
25 | \author{Seth Falcon}
26 | \examples{
27 | ## all GO IDs in BP
28 | bp_ids = aqListGOIDs("BP")
29 | length(bp_ids)
30 | 
31 | ## all GO IDs in BP or CC
32 | bp_or_cc_ids = aqListGOIDs(c("BP", "CC"))
33 | length(bp_or_cc_ids)
34 | }
35 | 
36 | \keyword{manip}
37 | 
38 | 


--------------------------------------------------------------------------------
/man/compatibleVersions.Rd:
--------------------------------------------------------------------------------
 1 | \name{compatibleVersions}
 2 | \alias{compatibleVersions}
 3 | 
 4 | \title{function to check to see if the packages represented by the names
 5 | passed have the same version number}
 6 | \description{
 7 |   This function takes the names of installed R packages and then checks
 8 |   to see if they all have the same version number.
 9 | }
10 | \usage{
11 | compatibleVersions(...)
12 | }
13 | \arguments{
14 |   \item{\dots}{\code{\dots} character strings for the names of R
15 |     packages that have been installed}
16 | }
17 | \details{
18 |   If all the package have the same version number, the function returns
19 |   TRUE. Otherwise, the function returns FALSE
20 | }
21 | \value{
22 |   This function returns TRUE or FALSE depending on whether the packages
23 |   have the same version number
24 | }
25 | \author{Jianhua Zhang}
26 | \seealso{\code{\link{packageDescription}}}
27 | \examples{
28 |   library("hgu95av2.db")
29 |   library("GO.db")
30 |   compatibleVersions("hgu95av2.db", "GO.db")
31 | }
32 | \keyword{misc}
33 | 


--------------------------------------------------------------------------------
/man/usedChromGenes.Rd:
--------------------------------------------------------------------------------
 1 | \name{usedChromGenes}
 2 | \alias{usedChromGenes}
 3 | \title{A function to select used genes on a chromosome from an ExpressionSet.}
 4 | \description{
 5 |   Given an instance of an \code{ExpressionSet}, a \code{chromLocation} object
 6 |   and the name of a chromosome this function returns all genes represented
 7 |   in the \code{ExpressionSet} on the specified chromosome.
 8 | }
 9 | \usage{
10 | usedChromGenes(eSet, chrom, specChrom)
11 | }
12 | \arguments{
13 |   \item{eSet}{An instance of an \code{ExpressionSet} object.}
14 |   \item{chrom}{The name of the chromosome of interest.}
15 |   \item{specChrom}{An instance of a \code{chromLocation} object.}
16 | }
17 | \value{
18 |   Returns a vector of gene names that represent the genes from the
19 |   \code{ExpressionSet} that are on the specified chromosome.
20 | }
21 | \author{ Jeff Gentry}
22 | \examples{
23 |     data(sample.ExpressionSet)
24 |     data(hgu95AProbLocs)
25 |     usedChromGenes(sample.ExpressionSet, "1", hgu95AProbLocs)
26 | }
27 | \keyword{interface}
28 | 


--------------------------------------------------------------------------------
/man/getPMInfo.Rd:
--------------------------------------------------------------------------------
 1 | \name{getPMInfo}
 2 | \alias{getPMInfo}
 3 | \title{extract publication details and abstract from annotate::pubmed function output  }
 4 | \description{
 5 | extract publication details and abstract from annotate::pubmed function output  
 6 | }
 7 | \usage{
 8 | getPMInfo(x)
 9 | }
10 | \arguments{
11 |   \item{x}{ an object of class xmlDocument; assumed to be result of a pubmed() call}
12 | }
13 | \details{
14 | uses xmlDOMApply to extract and structure key features of the XML tree returned
15 | by annotate::pubmed()
16 | }
17 | \value{
18 | a list with one element per pubmed id processed by pubmed.  Each element of
19 | the list is in turn a list with elements for author list, title, journal
20 | info, and abstract text.
21 | }
22 | \author{Vince Carey <stvjc@channing.harvard.edu>}
23 | \note{this should be turned into a method returning an instance of
24 | a formal class representing articles.    }
25 | 
26 | 
27 | \examples{
28 | demo <- pubmed("11780146", 
29 |     "11886385", "11884611")
30 | getPMInfo(demo)
31 | }
32 | \keyword{ models }
33 | 


--------------------------------------------------------------------------------
/R/pmid2MIAME.R:
--------------------------------------------------------------------------------
 1 | pmid2MIAME = function (pmid) 
 2 | {
 3 | #
 4 | # we find that some abstracts are text values of <Abstract>
 5 | # and some of <Abstract><AbstractText> ...
 6 | #
 7 | # need to think about validity checking here .. DTD?
 8 | #
 9 |     x = pubmed(pmid)
10 |     rr = xmlRoot(x)
11 |     top = xmlChildren(rr)
12 |     pmart = top[["PubmedArticle"]]
13 |     cit = xmlChildren(pmart)[["MedlineCitation"]]
14 |     art = cit[["Article"]]
15 |     cart = xmlChildren(art)
16 |     title = xmlValue(cart[["ArticleTitle"]])
17 |     abst = xmlValue(cart[["Abstract"]])
18 |     if (is.null(abst)) {
19 | 	caa = xmlChildren(cart[["Abstract"]])
20 |         abst = xmlValue(caa[["AbstractText"]])
21 |     }
22 |     if (is.null(abst)) abst = ""
23 |     aff = xmlValue(cart[["Affiliation"]])
24 |     an = cart[["AuthorList"]]
25 |     last = xmlValue(xmlChildren(an[[1]])[["LastName"]])
26 |     ini = xmlValue(xmlChildren(an[[1]])[["Initials"]])
27 |     new("MIAME", name=paste(last,ini,collapse=", "), lab = aff, title = title, abstract = abst, pubMedIds = pmid)
28 | }
29 | 


--------------------------------------------------------------------------------
/man/PWAmat.Rd:
--------------------------------------------------------------------------------
 1 | \name{PWAmat}
 2 | \alias{PWAmat}
 3 | \title{A function to compute the probe to KEGG pathway incidence matrix.}
 4 | \description{
 5 |   For a given chip we compute the mapping from probes to KEGG pathways.
 6 | }
 7 | \usage{
 8 | PWAmat(data)
 9 | }
10 | \arguments{
11 |   \item{data}{The name of the chip for which the incidence matrix should
12 |   be computed.}
13 | }
14 | \details{
15 |   Not much to say, just find which probes are in which pathways and
16 |   return the incidence matrix, with pathways as rows and probes as
17 |   columns. 
18 | 
19 |   It would be nice to be able to specify a set of probes to use, so that 
20 |  one does not do perform the calculations using all probes if they are
21 |  not of interest.
22 | }
23 | \value{
24 |  A matrix containing zero or one, depending on whether the probe
25 |   (row) is in a pathway (column).
26 | }
27 | \author{R. Gentleman}
28 | \seealso{\code{\link{KEGG2heatmap}}, \code{\link{GOmnplot}}}
29 | \examples{
30 |   library("hgu95av2.db")
31 |   Am1 <- PWAmat("hgu95av2")
32 | }
33 | \keyword{manip}
34 | 


--------------------------------------------------------------------------------
/man/entrezGeneByID.Rd:
--------------------------------------------------------------------------------
 1 | \name{entrezGeneByID}
 2 | \alias{entrezGeneByID}
 3 | \title{Create a Query String for an Entrez Gene Identifier}
 4 | \description{
 5 | Given a set of UniGene identifiers this function creates a set of URLs
 6 | that an be used to either open a browser to the requested location or
 7 | that can be used as anchors in the construction of HTML output.
 8 | }
 9 | \usage{
10 | entrezGeneByID(query)
11 | }
12 | \arguments{
13 |   \item{query}{Entrez Gene identifiers.}
14 | }
15 | \details{
16 |   Using NCBI we construct appropriate strings for directing a web
17 |   browser to the Entrez Genes specified by their IDs.
18 | }
19 | \value{
20 | A character vector containing the query string.
21 | }
22 | \references{NCBI, \url{https://www.ncbi.nih.gov/} }
23 | \author{Marc Carlson}
24 | \note{Be very careful about automatically querying this resource. It is
25 |   considered antisocial behavior by the owners. }
26 | 
27 | \examples{
28 |   q1<-entrezGeneByID(c("100", "1002"))
29 |   q1
30 |   if( interactive())
31 |     browseURL(q1[1])
32 | 
33 | }
34 | \keyword{interface}
35 | 


--------------------------------------------------------------------------------
/man/entrezGeneQuery.Rd:
--------------------------------------------------------------------------------
 1 | \name{entrezGeneQuery}
 2 | \alias{entrezGeneQuery}
 3 | \title{Create a Query String for Entrez Genes}
 4 | \description{
 5 | Given a set of search terms this function creates a set of URLs
 6 | that an be used to either open a browser to the requested location or
 7 | that can be used as anchors in the construction of HTML output.
 8 | }
 9 | \usage{
10 | entrezGeneQuery(query)
11 | }
12 | \arguments{
13 |   \item{query}{The UniGene identifiers. }
14 | }
15 | \details{
16 |   Using NCBI we construct an appropriate string for directing a web
17 |   browser to information about genes of that type at NCBI.
18 | }
19 | \value{
20 | A character vector containing the query string.
21 | }
22 | \references{NCBI, \url{https://www.ncbi.nih.gov/} }
23 | \author{Marc Carlson}
24 | \note{Be very careful about automatically querying this resource. It is
25 |   considered antisocial behavior by the owners. }
26 | 
27 | \examples{
28 |   q1<-entrezGeneQuery(c("leukemia", "Homo sapiens"))
29 |   q1
30 |   if( interactive())
31 |     browseURL(q1[1])
32 | 
33 | }
34 | \keyword{interface}
35 | 


--------------------------------------------------------------------------------
/man/filterGOByOntology.Rd:
--------------------------------------------------------------------------------
 1 | \name{filterGOByOntology}
 2 | \alias{filterGOByOntology}
 3 | 
 4 | \title{Filter GO terms by a specified GO ontology}
 5 | \description{
 6 |   Given a character vector containing GO identifiers, return a logical
 7 |   vector indicating which GO IDs are in the specified ontology (BP,
 8 |   CC, or MF).
 9 | }
10 | \usage{
11 | filterGOByOntology(goids, ontology = c("BP", "CC", "MF"))
12 | }
13 | 
14 | \arguments{
15 |   \item{goids}{a character vector of GO IDs}
16 |   \item{ontology}{One of "BP", "CC", or "MF"}
17 | }
18 | \value{
19 | A logical vector with length equal to \code{goids}.  A \code{TRUE}
20 | indicates that the corresponding GO ID in \code{goids} is a member
21 | of the ontology specified by \code{ontology}.
22 | }
23 | \author{Seth Falcon}
24 | \examples{
25 | haveGO <- suppressWarnings(require("GO.db"))
26 | if (haveGO) {
27 |     ids <- c("GO:0001838", "GO:0001839")
28 |     stopifnot(all(filterGOByOntology(ids, "BP")))
29 |     stopifnot(!any(filterGOByOntology(ids, "MF")))
30 | } else cat("Sorry, this example requires the GO package\n")
31 | 
32 | }
33 | 
34 | \keyword{manip}
35 | 


--------------------------------------------------------------------------------
/man/annPkgName.Rd:
--------------------------------------------------------------------------------
 1 | \name{annPkgName}
 2 | \alias{annPkgName}
 3 | 
 4 | \title{Get annotation package name from chip name}
 5 | \description{
 6 |   This function returns the name of the Bioconductor annotation data
 7 |   package that corresponds to the specified chip or genome.  The
 8 |   \code{type} argument is used to request an annotation package with a
 9 |   particular backing store.
10 | }
11 | \usage{
12 | annPkgName(name, type = c("db", "env"))
13 | }
14 | 
15 | \arguments{
16 |   \item{name}{string specifying the name of the chip or genome.  For
17 |       example, \code{"hgu133plus2"}}
18 |   \item{type}{Either \code{"db"} or \code{"env"}.  This will determine
19 |       whether the package name returned corresponds to the SQLite-based
20 |       annotation package or environment-based package, respectively.}
21 | }
22 | 
23 | \value{
24 |   a string giving the name of the annotation data package
25 | }
26 | 
27 | \author{Seth Falcon}
28 | \seealso{
29 |   \code{\link{getAnnMap}}
30 | }
31 | \examples{
32 | annPkgName("hgu133plus2", type="db")
33 | annPkgName("hgu133plus2", type="env")
34 | }
35 | 
36 | \keyword{manip}
37 | 
38 | 


--------------------------------------------------------------------------------
/inst/NEWS.Rd:
--------------------------------------------------------------------------------
 1 | \name{annotate-NEWS}
 2 | \title{annotate News}
 3 | 
 4 | \section{CHANGES IN VERSION 1.47}{
 5 |   \subsection{DEFUNCT}{
 6 |     \itemize{
 7 | 
 8 |       \item \code{probesByLL} is now defunct; use
 9 |       \code{AnnotationDbi::select()} instead.
10 | 
11 |       \item \code{blastSequences} supports multiple sequence queries;
12 |       use \code{as="data.frame"} for output.
13 | 
14 |       \item Improve \code{blastSequences} strategy for result retrieval,
15 |       querying the appropriate API for status every 10 seconds after
16 |       initial estimated processing time.
17 | 
18 |     }
19 |   }
20 | }
21 | 
22 | \section{CHANGES IN VERSION 1.43}{
23 | 
24 |   \subsection{NEW FEATURES}{
25 | 
26 |     \itemize{
27 | 
28 |       \item \code{blastSequences} accepts an argument \code{timeout}
29 |       limiting waiting time for a response; in an interactive session
30 |       and after the timeout is reached, the user may opt to retry the
31 |       query.
32 | 
33 |       \item \code{blastSequences} accepts an argument
34 |       \code{as} controlling the representation of the return value,
35 |       either a DNAMultipleAlignment, a data.frame, or the XML.
36 | 
37 |     }
38 |   }
39 | }
40 | 


--------------------------------------------------------------------------------
/man/pm.abstGrep.Rd:
--------------------------------------------------------------------------------
 1 | \name{pm.abstGrep}
 2 | \alias{pm.abstGrep}
 3 | \title{An interface to grep for PubMed abstracts.}
 4 | \description{
 5 |   A user friendly interface to the functionality provided by
 6 |  \code{pubmed}. 
 7 | }
 8 | \usage{
 9 | pm.abstGrep(pattern, absts, ...)
10 | }
11 | \arguments{
12 |   \item{pattern}{A pattern for the call to \code{grep}.}
13 |   \item{absts}{A list containing abstracts downloaded using \code{pubmed} or
14 |   equivalent.}
15 |   \item{\dots}{Extra arguments passed to \code{grep}.}
16 | }
17 | \details{
18 |   The \code{absts} are a list of PubMed XML objects that have been downloaded 
19 |   and parsed. This function lets the user quickly search the abstracts
20 |   for any regular expression. The returned value is a logical vector
21 |   indicating which of the abstracts contain the regular expression.
22 | }
23 | \value{
24 |  The returned value is a logical vector
25 |   indicating which of the abstracts contain the regular expression.
26 | }
27 | \author{Robert Gentleman}
28 | \seealso{\code{\link{pm.getabst}}, \code{\link{pm.titles}}}
29 | \examples{
30 |   library("hgu95av2.db")
31 |   hoxa9 <- "37806_at"
32 |   absts <- pm.getabst(hoxa9, "hgu95av2")
33 |   pm.abstGrep("SH3", absts[[1]])
34 |   pm.abstGrep("autism", absts[[1]])
35 | }
36 | \keyword{manip}
37 | 


--------------------------------------------------------------------------------
/man/PMIDAmat.Rd:
--------------------------------------------------------------------------------
 1 | \name{PMIDAmat}
 2 | \alias{PMIDAmat}
 3 | \title{A function to compute the probe to PubMed id incidence matrix.}
 4 | \description{
 5 |   For a given chip or a given set of genes, it computes the mapping from
 6 |   probes to PubMed id.
 7 | }
 8 | \usage{
 9 | PMIDAmat(pkg, gene=NULL)
10 | }
11 | \arguments{
12 |   \item{pkg}{The package name of the chip for which the incidence matrix should
13 |     be computed.}
14 |   \item{gene}{A character vector of interested probe set ids or NULL (default).}
15 | }
16 | \details{
17 |   Not much to say, just find which probes are associated with which
18 |   PubMed ids and return the incidence matrix, with PubMed ids as rows and probes as
19 |   columns. 
20 | 
21 |   To specify a set of probes to use, let the argument \code{gene} to be
22 |   a vector of probe ids.   Bt this way, the calculations are not
23 |   involved with non-interested genes/PubMed ids so that the whole
24 |   process could finish soon.  
25 |   }
26 | \value{
27 |  A matrix containing zero or one, depending on whether the probe
28 |   (column) is associated with a PubMed id (row).
29 | }
30 | \author{R. Gentleman}
31 | \examples{
32 |   library("hgu95av2.db")
33 |   probe <- names(as.list(hgu95av2ACCNUM))
34 |   Amat <- PMIDAmat("hgu95av2", gene=sample(probe, 10))
35 | }
36 | \keyword{manip}
37 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
 1 | .buildAnnotateOpts <- function() {
 2 |     if (is.null(getOption("BioC"))) {
 3 |         BioC <- list()
 4 |         class(BioC) <- "BioCOptions"
 5 |         options("BioC"=BioC)
 6 |     }
 7 | 
 8 |     Annotate <- list()
 9 |     class(Annotate) <- "BioCPkg"
10 |     Annotate$urls <- list( ncbi = "https://www.ncbi.nlm.nih.gov/",
11 |           data="http://www.bioconductor.org/datafiles/annotate/")
12 | 
13 |     BioC <- getOption("BioC")
14 |     BioC$annotate <- Annotate
15 |     options("BioC"=BioC)
16 | }
17 | 
18 | .onLoad <- function(libname, pkgname) {
19 |     .setDefaultRepositories()
20 |     .buildAnnotateOpts()
21 |     if(.Platform$OS.type == "windows" && interactive()
22 |         && .Platform$GUI ==  "Rgui"){
23 |         Biobase::addVigs2WinMenu("annotate")
24 |     }
25 | }
26 | 
27 | .repositories <- new.env(parent = emptyenv())
28 | 
29 | .setDefaultRepositories<- function() {
30 |     setRepository("ug", getQuery4UG)
31 |     setRepository("affy", getQuery4Affy)
32 |     setRepository("gb", getQuery4GB)
33 |     setRepository("sp", getQuery4SP)
34 |     setRepository("omim", getQuery4OMIM)
35 |     setRepository("fb", getQuery4FB)
36 |     setRepository("en", getQuery4EN)
37 |     setRepository("tr", getQuery4TR)
38 |     setRepository("go", getQuery4GO)
39 |     setRepository("ens", getQuery4ENSEMBL)
40 | }
41 | 


--------------------------------------------------------------------------------
/man/UniGeneQuery.Rd:
--------------------------------------------------------------------------------
 1 | \name{UniGeneQuery}
 2 | \alias{UniGeneQuery}
 3 | \title{Create a Query String for a UniGene Identifier  }
 4 | \description{
 5 | Given a set of UniGene identifiers this function creates a set of URLs
 6 | that an be used to either open a browser to the requested location or
 7 | that can be used as anchors in the construction of HTML output.
 8 | }
 9 | \usage{
10 | UniGeneQuery(query, UGaddress="UniGene/", type="CID")
11 | }
12 | \arguments{
13 |   \item{query}{The UniGene identifiers. }
14 |   \item{UGaddress}{ The address of UniGene, within the NCBI repository.}
15 |   \item{type}{What type of object is being asked for; eithe CID or UGID }
16 | }
17 | \details{
18 |   Using published details from NCBI we construct an appropriate string
19 |   for directing a web browser to the information available at the NCBI
20 |   for that genomic product (usually an EST).
21 | }
22 | \value{
23 | A character vector containing the query string.
24 | }
25 | \references{NCBI, \url{https://www.ncbi.nih.gov/} }
26 | \author{Robert Gentleman}
27 | \note{Be very careful about automatically querying this resource. It is
28 |   considered antisocial behavior by the owners. }
29 | 
30 | \examples{
31 |   q1<-UniGeneQuery(c("Hs.293970", "Hs.155650"))
32 |   q1
33 |   if( interactive())
34 |     browseURL(q1[1])
35 | 
36 | }
37 | \keyword{interface}
38 | 


--------------------------------------------------------------------------------
/man/getOntology.Rd:
--------------------------------------------------------------------------------
 1 | \name{getOntology}
 2 | \alias{getOntology}
 3 | \title{Get GO terms for a specified ontology}
 4 | \description{
 5 | Find the subset of GO terms for the specified ontology, for each element 
 6 | of the supplied list of associations. The input list is typically from
 7 | one of the chip-specific meta-data files.
 8 | }
 9 | \usage{
10 | getOntology(inlist, ontology=c("MF", "BP", "CC"))
11 | }
12 | \arguments{
13 |   \item{inlist}{A list of GO associations}
14 |   \item{ontology}{The name of the ontology you want returned.}
15 | }
16 | \details{
17 |   The input list should be a list of lists, each element of \code{inlist}
18 |   is itself a list containing the information that maps from a specified 
19 |   ID (usually LocusLink) to GO information. Each element of the inner list 
20 |   is a list with elements \code{GOID}, \code{Ontology} and \code{Evidence}. 
21 | }
22 | \value{
23 |    A list of the same length as the input list. Each element of this
24 |    list will contain a vector of \code{GOID}s for those terms that match
25 |    the requested ontology.
26 | }
27 | \author{R. Gentleman}
28 | 
29 | \seealso{\code{\link{getEvidence}}, \code{\link{dropECode}}}
30 | 
31 | \examples{
32 |  library("hgu95av2.db")
33 |  bb <- hgu95av2GO[["39613_at"]]
34 |  getOntology(bb)
35 |  sapply(bb, function(x) x$Ontology)
36 | }
37 | \keyword{manip}
38 | 


--------------------------------------------------------------------------------
/man/getSeq4Acc.Rd:
--------------------------------------------------------------------------------
 1 | \name{getSEQ}
 2 | \alias{getGI}
 3 | \alias{getSEQ}
 4 | 
 5 | \title{Queries the NCBI database to obtain the sequence for a given
 6 |   GenBank Accession number}
 7 | \description{
 8 |   Given a GenBank Accession number, getSEQ queries the NCBI database for
 9 |   the nucleotide sequence.  
10 | }
11 | \usage{
12 | getGI(accNum)
13 | getSEQ(gi)
14 | }
15 | 
16 | \arguments{
17 |   \item{accNum}{\code{accNum} a character string for a GenBank Accession
18 |     number (i.e. M22490)}
19 |   \item{gi}{\code{gi} a character string or numeric numbers for a
20 |     GenBank accession number or gi number. A gi number is a series of
21 |     digits that are assigned consecutively to each sequence record
22 |     processed by NCBI}
23 | }
24 | \details{
25 |   The NCBI database is queried for the given GenBank Accession number to
26 |   obtain the nucleotide sequence in FASTA format. The leading
27 |   identification line of the sequence data is then dropped to return only
28 |   the nucleotide sequence.  
29 | 
30 |   getGI returns the gi number corresponding to a given GenBank accession
31 |   number.
32 | }
33 | \value{
34 |   getSEQ returns a character string of nucleotide sequence
35 | }
36 | \references{\url{https://www.ncbi.nlm.nih.gov/entrez/query.fcgi}}
37 | \author{Jianhua Zhang}
38 | 
39 | \examples{
40 | getSEQ("M22490")
41 | }
42 | \keyword{misc}
43 | 
44 | 


--------------------------------------------------------------------------------
/man/dropECode.Rd:
--------------------------------------------------------------------------------
 1 | \name{dropECode}
 2 | \alias{dropECode}
 3 | \title{Drop GO labels for specified Evidence Codes}
 4 | \description{
 5 |   Genes are mapped to GO terms on the basis of evidence codes. In some
 6 |   analyses it will be appropriate to drop certain sets of annotations
 7 |   based on specific evidence codes.
 8 | }
 9 | \usage{
10 | dropECode(inlist, code="IEA")
11 | }
12 | \arguments{
13 |   \item{inlist}{A list of GO data }
14 |   \item{code}{The set of codes that should be dropped. }
15 | }
16 | \details{
17 |   A simple use of \code{\link{lapply}} and \code{\link{sapply}} to find
18 |   and eliminate those terms that have the specified evidence codes.
19 | 
20 |   This might be used when one is using to GO to validate a sequence
21 |   matching experiment (for example), then all terms whose mapping was
22 |   based on sequence similarity (say ISS and IEA) should be removed.
23 |   
24 | }
25 | \value{
26 |   A list of the same length as the input list retaining only those
27 |   annotations whose evidence codes were not the ones in the exclusion
28 |   set \code{code}.
29 | }
30 | \author{R. Gentleman}
31 | 
32 | \seealso{\code{\link{getEvidence}}, \code{\link{getOntology}}}
33 | 
34 | \examples{
35 |  library("hgu95av2.db")
36 |  bb <- hgu95av2GO[["39613_at"]]
37 |  getEvidence(bb[1:3])
38 |  cc <- dropECode(bb[1:3])
39 |  if (length(cc))
40 |    getEvidence(cc)
41 | }
42 | \keyword{manip}
43 | 


--------------------------------------------------------------------------------
/man/accessionToUID.Rd:
--------------------------------------------------------------------------------
 1 | \name{accessionToUID}
 2 | \alias{accessionToUID}
 3 | \title{A function to convert accession values to NCBI UIDs.}
 4 | \description{
 5 |   Given one or more accession values, this function will attempt to
 6 |   convert them into NCBI UID values.
 7 | }
 8 | \usage{
 9 | accessionToUID(...,db=c("genbank","pubmed"))
10 | }
11 | \arguments{
12 |   \item{...}{Accession numbers to be transformed.}
13 |   \item{db}{Which database this accession number refers to, defaults to Genbank}
14 | }
15 | \details{
16 |   Utilizes the PubMed tool esearch.fcgi to convert an accession number
17 |   into a valid NCBI UID number.
18 | 
19 |   WARNING:  The powers that be at NCBI have been known to ban the IP
20 |   addresses of users who abuse their servers (currently defined as less
21 |   then 2 seconds between queries).  Do NOT put this function in a type
22 |   loop or you may find your access revoked.
23 | }
24 | \value{
25 |   Returns either a valid NCBI UID value or NULL (if there was nothing
26 |   available).
27 | }
28 | \author{Jeff Gentry}
29 | \seealso{\code{\link{pubmed}}, \code{\link[XML]{xmlTreeParse}}}
30 | \examples{
31 | 
32 |      ## The two returns from genbank should be the same
33 |      xdoc <- genbank("U03397",type="accession",disp="data")
34 |      x <- accessionToUID("U03397",db="genbank")
35 |      xdoc <- genbank(x, type="uid",disp="data")
36 | 
37 |      ## Can handle multiple inputs
38 |      y <- accessionToUID("M16653","U892893",db="genbank")
39 | }
40 | \keyword{interface}
41 | 
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/man/GO2heatmap.Rd:
--------------------------------------------------------------------------------
 1 | \name{GO2heatmap}
 2 | \alias{GO2heatmap}
 3 | \alias{KEGG2heatmap}
 4 | \alias{KEGG2heatmap,character,eSet,character-method}
 5 | \alias{KEGG2heatmap,character,matrix,character-method}
 6 | 
 7 | \title{Compute a heatmap for the specified data, for either a GO
 8 |   category or a KEGG pathway.}
 9 | \description{
10 |   For a given GO category or KEGG pathway, all probes in the supplied
11 |   data are mapped to the pathway and a heatmap is produced.
12 | }
13 | \usage{
14 | GO2heatmap(x, eset, data, ...)
15 | KEGG2heatmap(x, eset, data, ...)
16 | }
17 | \arguments{
18 |   \item{x}{The name of the category or pathway.}
19 |   \item{eset}{An \code{ExpressionSet} providing the data.}
20 |   \item{data}{The name of the chip.}
21 |   \item{\dots}{Additional parameters to pass to \code{heatmap}.}
22 | }
23 | \details{
24 |  For the given pathway or GO category all matching probes are
25 |   determined, these are used to subset the data and \code{heatmap} is
26 |   invoked on that set of data. Extra parameters can be passed through to
27 |   \code{heatmap} using the \code{\dots} parameter.
28 |  The \code{annotation} slot of the \code{eset} argument is used to
29 |  determine the appropriate annotation data to use.
30 | }
31 | \value{
32 |   The value returned by \code{heatmap} is passed back to the user.
33 | }
34 | \author{R. Gentleman }
35 | \seealso{\code{\link{heatmap}}}
36 | \examples{
37 |   library("hgu95av2.db")
38 |   data(sample.ExpressionSet)
39 |   KEGG2heatmap("04810", sample.ExpressionSet, "hgu95av2")
40 | }
41 | \keyword{manip}
42 | 


--------------------------------------------------------------------------------
/man/ACCNUMStats.Rd:
--------------------------------------------------------------------------------
 1 | \name{ACCNUMStats}
 2 | \alias{ACCNUMStats}
 3 | \alias{whatACC}
 4 | 
 5 | \title{Provides statistics on the types of ids used for the ACCNUM
 6 |   environment of a given data package}
 7 | \description{
 8 |   Given a data package name, ACCNUMStats counts how many of the probe
 9 |   ids are mapped to GenBank Accession numbers, UniGene ids, RefSeq ids,
10 |   or Image clone ids.
11 | }
12 | \usage{
13 | ACCNUMStats(pkgName)
14 | whatACC(accs)
15 | }
16 | \arguments{
17 |   \item{pkgName}{\code{pkgName} a character string for the name of a
18 |     BioC data package}
19 |   \item{accs}{\code{accs} a vector of character string for the ids
20 |     whose type will be determined}
21 | }
22 | \details{
23 |   The ACCNUM environment of each BioC data package contains mappings
24 |   between probe ids and a set of public ids based on which mappings of
25 |   probe ids to other annotation data can be obtained using public data
26 |   sources. The set of ids were provided by a manufacturer or user at the
27 |   time when the data package was built. The manufacturer/user provided
28 |   ids can be of different types of public ids, such as GenBank Accession
29 |   number, UniGene ids, etc..
30 | 
31 |   ACCNUMStats counts the number of probes that are mapped to different
32 |   types of public ids and have the results presented in a table.
33 | }
34 | \references{The ACCNUM environment of a platform dependent BioC data package}
35 | \author{Jianhua Zhang}
36 | \examples{
37 |   library("hgu95av2.db")
38 |   ACCNUMStats("hgu95av2")
39 | }
40 | \keyword{misc}
41 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: annotate
 2 | Title: Annotation for microarrays
 3 | Version: 1.89.0
 4 | Author: R. Gentleman
 5 | Authors@R: c(
 6 |         person("Jeff", "Gentry", role = "aut"),
 7 |         person("Kritika", "Verma", role = "ctb",
 8 |                 comment = "Converted chromLOC vignette from Sweave to RMarkdown / HTML."),
 9 |         person("Manvi", "Yaduvanshi", role = "ctb",
10 |                 comment = "Converted useDataPkgs vignette from Sweave to RMarkdown / HTML."),
11 |         person("Bioconductor Package Maintainer",
12 |                 email = "maintainer@bioconductor.org", role = "cre"))
13 | Description: Using R enviroments for annotation.
14 | Depends: R (>= 2.10), AnnotationDbi (>= 1.27.5), XML
15 | Imports: Biobase, DBI, xtable, graphics, utils, stats, methods,
16 |         BiocGenerics (>= 0.13.8), httr
17 | Suggests: hgu95av2.db, genefilter, Biostrings (>= 2.25.10), IRanges, rae230a.db,
18 |         rae230aprobe, tkWidgets, GO.db, org.Hs.eg.db, org.Mm.eg.db,
19 |         humanCHRLOC, Rgraphviz, RUnit, BiocStyle, knitr
20 | VignetteBuilder: knitr
21 | License: Artistic-2.0
22 | LazyLoad: yes
23 | Collate: AllGenerics.R ACCNUMStats.R Amat.R AnnMaps.R chromLocation.R
24 |         compatipleVersions.R findNeighbors.R getData.R getPMInfo.R
25 |         getSeq4ACC.R GOhelpers.R homoData.R html.R isValidKey.R
26 |         LL2homology.R pmid2MIAME.R pubMedAbst.R query.R
27 |         readGEOAnn.R serializeEnv.R blastSequences.R zzz.R
28 |         test_annotate_package.R
29 | biocViews: Annotation, Pathways, GO
30 | Packaged: 2014-07-15 23:10:57 UTC; mcarlson
31 | 


--------------------------------------------------------------------------------
/vignettes/annotate.Rfc:
--------------------------------------------------------------------------------
 1 | A RFC for dealing with annotation data.
 2 | 
 3 | Please comment on the following proposal.
 4 | 
 5 | In order to deal with the ever changing needs and continual evolution
 6 | of genomic data the annotate package must be able to update and
 7 | install annotation data.
 8 | 
 9 | It seems that this process is not unlike the R package system and
10 | perhaps much can be gained by using features from both the package
11 | system and the data system.
12 | 
13 | Functionality:
14 | 
15 | annotation: A function, like data, if given no argument it lists the
16 | annotation that is available. Annotation is loaded using "data",
17 | however.
18 | There is an argument, local, if TRUE look on the local machine, if
19 | FALSE, list available annotation datasets from Bioconductor.
20 | 
21 | We will use a different index file, 00Annotate.dcf in the annotate
22 | data directory to hold this listing. It isn't really data and we
23 | wouldn't really want people to get confused.
24 | 
25 | install.annotation: Takes a base name (like "hgu95A") and obtains and
26 | installs a package from the bioconductor web site (using the same
27 | system of URL access as install.packages).
28 | This will download data, put it in the appropriate directory and
29 | update the index file.
30 | 
31 | update.annotation: Takes a base name (like "hgu95A") and checks to see
32 | if it needs to be updated.
33 | 
34 | I still need to figure out how to automagically build this stuff but
35 | would like to get comments quickly so it can be squeezed in to the
36 | release. 
37 | 
38 | thanks 
39 |   robert
40 | 


--------------------------------------------------------------------------------
/man/GOmnplot.Rd:
--------------------------------------------------------------------------------
 1 | \name{GOmnplot}
 2 | \alias{GOmnplot}
 3 | \alias{KEGGmnplot}
 4 | \alias{KEGGmnplot,character,eSet,character-method}
 5 | \alias{KEGGmnplot,character,matrix,character-method}
 6 | 
 7 | \title{A function to plot by group means against each other.}
 8 | \description{
 9 |   For a two sample comparison, as determined by \code{group}, and a
10 |   specified KEGG pathway or GO category, per group means are computed
11 |   and plotted against each other.
12 | }
13 | \usage{
14 | GOmnplot(x, eset, data = "hgu133plus2", group, ...)
15 | KEGGmnplot(x, eset, data = "hgu133plus2", group, ...)
16 | }
17 | \arguments{
18 |   \item{x}{The name of the KEGG pathway or GO category.}
19 |   \item{eset}{An \code{ExpressionSet} containing the data.}
20 |   \item{data}{The name of the chip that was used to provide the data.}
21 |   \item{group}{The variable indicating group membership, should have two
22 |   different values.}
23 |   \item{\dots}{Extra parameters to pass to the call to \code{plot}.}
24 | }
25 | \details{
26 |   All probes in \code{eset} that map to the given category are
27 |   determined. Then per group, per probe means are computed and plotted
28 |   against each other. Extra parameters can be passed to the plot
29 |   function via the \code{dots} argument.
30 | }
31 | \value{
32 | The matrix of per group means, for each probe.
33 | }
34 | \author{R. Gentleman}
35 | \seealso{ \code{\link{KEGG2heatmap}}}
36 | \examples{
37 |   library("hgu95av2.db")
38 |   data(sample.ExpressionSet)
39 |   KEGGmnplot("04810", sample.ExpressionSet, sample.ExpressionSet$sex, 
40 |              data = "hgu95av2")
41 | }
42 | \keyword{manip}
43 | 


--------------------------------------------------------------------------------
/man/updateSymbolsToValidKeys.Rd:
--------------------------------------------------------------------------------
 1 | \name{updateSymbolsToValidKeys}
 2 | \alias{updateSymbolsToValidKeys}
 3 | \title{Take a list of symbols and translate them into the best possible
 4 |   ID for a package.}
 5 | \description{
 6 |   Given a list of gene symbols and a package, find a valid ID for that
 7 |   package.  If there isn't a valid ID, then return the original symbol.
 8 | }
 9 | \usage{
10 | updateSymbolsToValidKeys(symbols, pkg)
11 | }
12 | \arguments{
13 |   \item{symbols}{A character vector containing gene symbols that you
14 |     wish to try and translate into valid IDs.}
15 |   \item{pkg}{The package name of the chip for which we wish to validate IDs.}
16 | }
17 | \details{
18 |   This is a convenience function for getting from a possibly varied list
19 |   of gene symbols mapped onto something that is a nice concrete ID such as an
20 |   entrez gene ID.  When such an ID cannot be found, the original symbol
21 |   will come back to prevent the loss of any information.
22 | }
23 | \value{
24 |   This function returns a vector of IDs corresponding to the symbols
25 |   that were input.  If the symbols don't have a valid ID, then they come
26 |   back instead.
27 | }
28 | \author{Marc Carlson}
29 | \seealso{\code{\link{isValidKey}}}
30 | \examples{
31 | \dontrun{
32 |   ## one "bad" ID, one that can be mapped onto a valid ID, and a 3rd
33 |   ## which already is a valid ID
34 |   syms <- c("15S_rRNA_2","21S_rRNA_4","15S_rRNA")
35 |   updateSymbolsToValidKeys(syms, "org.Sc.sgd")
36 | 
37 |   ## 3 symbols and a 4th that will NOT be valid
38 |   syms <- c("MAPK11","P38B","FLJ45465", "altSymbol")
39 |   updateSymbolsToValidKeys(syms, "org.Hs.eg")
40 | }
41 | }
42 | \keyword{manip}
43 | 


--------------------------------------------------------------------------------
/man/pm.getabst.Rd:
--------------------------------------------------------------------------------
 1 | \name{pm.getabst}
 2 | \alias{pm.getabst}
 3 | \title{Obtain the abstracts for a set PubMed list.}
 4 | \description{
 5 | The data provided by PubMed is reduced to a small set. This set is 
 6 | then suitable for further rendering.
 7 | }
 8 | \usage{
 9 | pm.getabst(geneids, basename)
10 | }
11 | \arguments{
12 |   \item{geneids}{The identifiers used to find Abstracts }
13 |   \item{basename}{The base name of the annotation package to use. }
14 | }
15 | \details{
16 | We rely on the annotation in the package associated with the 
17 | \code{basename} to provide PubMed identifiers for the genes described by
18 | the gene identifiers.
19 | With these in hand we then use the \code{pmfetch} utility to download the
20 | PubMed abstracts in XML form. These are then translated (transformed) to a 
21 | shorter version containing a small subset of the data provided by PubMed.
22 | 
23 | This function has the side effect of creating an environment in 
24 | \code{.GlobalEnv} that contains the mapping for the requested data.
25 | This is done for efficiency -- so we don't continually read in the data
26 | when there are many different queries to be performed.
27 | }
28 | \value{
29 |  A list of lists containing objects of class \code{pubMedAbst}.
30 |  There will be one element of the list for each identifier.
31 |  Each of these elements is a list containing one abstract (of
32 |  class \code{pubMedAbst} for each PubMed identifier associated with
33 |  the gene identifier.
34 | }
35 | \author{Robert Gentleman}
36 | \seealso{\code{\link{pm.abstGrep}}, \code{\link{pm.titles}}}
37 | \examples{
38 |   library("hgu95av2.db")
39 |   hoxa9 <- "37806_at"
40 |   absts <- pm.getabst(hoxa9, "hgu95av2")
41 | }
42 | \keyword{manip}
43 | 


--------------------------------------------------------------------------------
/R/LL2homology.R:
--------------------------------------------------------------------------------
 1 | LL2homology <- function(homoPkg, llids){
 2 |     stop("This function has been DEFUNCT.")
 3 |     if(!require(homoPkg, character.only = TRUE))
 4 |                   stop("Package homology not available!")
 5 | 
 6 |     hgids <- mget(as.character(llids), envir = get(paste(homoPkg,
 7 |                                     "LL2HGID", sep = "")), ifnotfound = NA)
 8 | 
 9 |     #if(length(hgids) == 1){
10 |     #    return(HGID2homology(hgids[[1]]))
11 |     #}
12 | 
13 |     return(sapply(hgids, HGID2homology, homoPkg = homoPkg))
14 | }
15 | 
16 | ACC2homology <- function(accs, homoPkg){
17 |     stop("This function has been DEFUNCT.")
18 |     if(!require(homoPkg, character.only = TRUE))
19 |         stop(paste("Package", homoPkg, "not available!"))
20 | 
21 |     hgids <- mget(as.character(accs), envir = get(paste(homoPkg, "ACC2HGID",
22 |                   sep = ""), pos = match(paste("package:", homoPkg, sep = ""),
23 |                          search())), ifnotfound = NA)
24 |     return(sapply(hgids, HGID2homology, homoPkg))
25 | }
26 | 
27 | HGID2homology <- function(hgid, homoPkg){
28 |     stop("This function has been DEFUNCT.")
29 |     homoGenes <- list()
30 | 
31 |     #  hgid may be of length greater than 1 as a LL id may be mapped to
32 |     # more than 2 HGIDs
33 |     for(i in hgid){
34 |         options(show.error.messages = FALSE)
35 |         tryMe <- try(get(as.character(i),
36 |                          envir = get(paste(homoPkg, "DATA", sep = ""),
37 |                          pos = match(paste("package:", homoPkg, sep = ""),
38 |                          search()))))
39 |         options(show.error.messages = TRUE)
40 |         if(!inherits(tryMe, "try-error")){
41 |             homoGenes[[length(homoGenes) + 1]] <- tryMe
42 |         }
43 |     }
44 |     return(homoGenes)
45 | }
46 | 


--------------------------------------------------------------------------------
/R/getData.R:
--------------------------------------------------------------------------------
 1 | ##copyright 2002 R. Gentleman, all rights reserved
 2 | ##helper functions for dealing with data environments (soon to be hash
 3 | ##tables)
 4 | 
 5 | ## JZ added lookUp and modified the other functions so that they all
 6 | ## use lookUp. Nov. 6, 2003.
 7 | lookUp <- function(x, data, what, load=FALSE) {
 8 |     if(length(x) < 1){
 9 |         stop("No keys provided")
10 |     }
11 |     mget(x, envir=getAnnMap(what, chip=data, load=load),
12 |          ifnotfound=NA)
13 | }
14 | 
15 | getGO <- function(x, data) {
16 |     lookUp(x, data, "GO")
17 |  }
18 | 
19 |  getGOdesc <- function(x, which = c("BP", "CC", "MF", "ANY")) {
20 |      which <- match.arg(which)
21 |      options(show.error.messages = FALSE)
22 |      ans <- try(lookUp(x, "GO", "TERM", load=TRUE))
23 |      options(show.error.messages = TRUE)
24 |      onts <- sapply(ans, Ontology)
25 |      if(inherits(ans, "try-error")){
26 |          warning(paste("Invalid GO term", x))
27 |          return(NULL)
28 |      }else{
29 |          if(which == "ANY"){
30 |              return(ans)
31 |          }else{
32 |              ans <- ans[onts %in% which]
33 |              if(length(ans) == 0){
34 |                  return(NULL)
35 |              }else{
36 |                  return(ans)
37 |              }
38 |          }
39 |      }
40 |  }
41 | 
42 |   getSYMBOL <- function(x, data) {
43 |       unlist(lookUp(x, data, "SYMBOL"))
44 |  }
45 | 
46 |   getPMID <- function(x, data) {
47 |       lookUp(x, data, "PMID")
48 |   }
49 | 
50 |   getEG <- function(x, data) {
51 |       unlist(lookUp(x, data, "ENTREZID"))
52 |   }
53 | 
54 | # This function needs to be updated when new annotation items are
55 | # added to the data packages
56 | getUniqAnnItem <- function(){
57 |     return(c("ACCNUM", "ENTREZID", "GENENAME", "SYMBOL", "MAP",
58 |              "GRIF", "SUMFUNC", "NM", "NP"))
59 | }
60 | 


--------------------------------------------------------------------------------
/man/serializeEnv.Rd:
--------------------------------------------------------------------------------
 1 | \name{serializeEnv}
 2 | \alias{serializeEnv}
 3 | \alias{serializeDataPkgEnvs}
 4 | \title{A Function To Serialize Environment}
 5 | \description{
 6 |   This function will serialize an environment in R to an XML format
 7 |   stored in a compressed file.
 8 | }
 9 | \usage{
10 | serializeEnv(env, fname)
11 | serializeDataPkgEnvs(pkgDir)
12 | }
13 | \arguments{
14 |   \item{env}{The name of the environment to serialize.}
15 |   \item{fname}{The name of the output file.}
16 |   \item{pkgDir}{The directory where a data package is}
17 | }
18 | \details{
19 |    The environment is converted into an XML format and then outputted to
20 |    a gzipped file (using \code{\link{gzfile}}).  The values in the
21 |    environment are serialized (using \code{\link{serialize}}) in ASCII
22 |    format although the keys are stored in plain text.
23 | 
24 |    The format of the XML is very simple, with the primary block being
25 |    \code{values}, which contain blocks of \code{entries}, and each entry
26 |    having a \code{key} and a \code{value}.  For instance, if we had an
27 |    environment with one value in it, the character \code{c} with a key
28 |    of \code{a} (e.g. \code{assign("a", "c", env=foo)}), this is what the
29 |    output would look like.
30 |    
31 |    \preformatted{
32 |      <?xml version="1.0"?>
33 |      <values xmlns:bt="http://www.bioconductor.org/RGDBM">
34 |         <entry>
35 |            <key>
36 |              a
37 |            </key>
38 |            <value>
39 |               A\n2\n131072\n66560\n1040\n1\n1033\n1\nc\n
40 |            </value>
41 | 	</entry>    
42 |      </values>
43 |    }
44 | }
45 | \author{Jeff Gentry}
46 | \seealso{\code{\link{gzfile}},
47 |   \code{\link{serialize}}} 
48 | \examples{
49 |    z <- new.env()
50 |    assign("a", 1, env=z)
51 |    assign("b", 2, env=z)
52 |    assign("c", 3, env=z)
53 |    serializeEnv(z, tempfile())
54 | }
55 | \keyword{utilities}
56 | 


--------------------------------------------------------------------------------
/R/getSeq4ACC.R:
--------------------------------------------------------------------------------
 1 | getGI <- function(accNum){
 2 |     # Get the gi based on the Accession number
 3 |     gi <- readLines(paste("https://www.ncbi.nlm.nih.gov/entrez/",
 4 |                           "query.fcgi?db=Nucleotide&cmd=search&term=",
 5 |                           accNum, sep = ""))
 6 |     gi <- gsub(paste(".*gi\\|([0-9]+)\\|[a-zA-Z0-9]+\\|", accNum,
 7 |                      ".*", sep = ""), "\\1",
 8 |                gi[grep(paste("gi\\|.*\\|[a-zA-Z0-9]+\\|", accNum, ".*",
 9 |                                     sep = ""), gi)])
10 |     if(length(gi) == 0){
11 |         stop(paste("Can't obtain a gi number for", accNum))
12 |     }else{
13 |         return(gi)
14 |     }
15 | }
16 | 
17 | 
18 | getSEQ <- function(gi){
19 | 
20 |     ## Old stuff left just in case NCBI changes things back on us (10/15/10)
21 |     ## seq <- readLines(paste("http://www.ncbi.nlm.nih.gov/entrez/batchseq.cgi?",
22 |     ##              "cmd=&txt=on&save=&cfm=&list_uids=", gi, "&",
23 |     ##              "db=nucleotide&extrafeat=16&term=&view=fasta&",
24 |     ##              "dispmax=20&SendTo=t&__from=&__to=&__strand=", sep = ""))
25 | 
26 |     seq <- readLines(paste("https://www.ncbi.nlm.nih.gov/entrez/eutils/",
27 |                  "efetch.fcgi?db=nucleotide&rettype=fasta&id=",gi,
28 |                   sep = ""))
29 | 
30 |     if(length(seq) == 0){
31 |         stop("Failed to extract the sequence")
32 |     }else{
33 |         return(paste(seq[2:length(seq)], sep = "", collapse = ""))
34 |     }
35 | }
36 | 
37 | ## better use reverseComplement from Biostrings
38 | 
39 | #revBase <- function(x){
40 | #  tot <- which(x == "A")
41 | #  tog <- which(x == "C")
42 | #  toa <- which(x == "T")
43 | #  toc <- which(x == "G")
44 | #  x[tot] <- "T"
45 | #  x[toa] <- "A"
46 | #  x[toc] <- "C"
47 | #  x[tog] <- "G"
48 | #  x
49 | # }
50 | 
51 | #revString <- function(x)
52 | #   sapply(lapply(lapply(strsplit(x, NULL), rev), revBase), paste, collapse="")
53 | 
54 | 


--------------------------------------------------------------------------------
/TODO:
--------------------------------------------------------------------------------
 1 | Here is a start of a list of ideas:
 2 | Please feel free to add to it or to start implementing stuff:
 3 | 
 4 |  1) The file, data/mgu74v2id
 5 |    contains an environment that has the affy number linked to (the 
 6 |   genbank accession number)
 7 | 
 8 |    It would be nice to be able to select a set of genes and
 9 |    get a webpage set up with the Affy #, and links to various
10 |    databases (genbank in particular).
11 | 
12 |  2) We need to decide which databases we will store in data:
13 |     -functional groups
14 |     -basic mappings
15 |     -proteins
16 |     -chromosome mapping
17 | 
18 |  3) A set of tools that let us select data from a data frame (such as the 
19 |   output of dChip) according to the data in these files.
20 | 
21 |  4) Do we want to explore a Tcl/Tk interface to selection?
22 | 
23 |  5) Need to store the data files compressed (.gz) and then use the appropriate
24 |     readers. What would be even better would be to have stored binary forms
25 |     of environments (or memory-mapped environments).
26 | 
27 |  6) Now have the ability to load up locus link webpages, but we
28 |     would like to be able to do keyword searchs and to interpret the
29 |     results directly in R.
30 | 
31 |  7) We need some form of class structure that will allow us to
32 |    manipulate the objects in a more uniform fashion.
33 | 
34 |  8) We need to design the interface so it can be handled either by
35 |    environments/hash tables or by a database -- either Access or
36 |    Postgres
37 | 
38 | ---
39 | some comments after 26 oct meetings
40 | 
41 | options-like structure (or object-like, e.g., lm.object) for htmlTable
42 | and htmlPage so that htmlTable( listOfContent, listOfMarkup, tableOptions )
43 | and htmlPage( listOfContent, listOfMarkup, pageOptions ) have good defaults.
44 | constraints on content elements of listsOfContent will be needed --
45 | same number of elements per list element, e.g.  role of the container
46 | class should be considered
47 | 


--------------------------------------------------------------------------------
/vignettes/annotate.bib:
--------------------------------------------------------------------------------
 1 | @Article{PubMedRnews,
 2 |   author       = {R. Gentleman and J. Gentry},
 3 |   title        = {Querying PubMed},
 4 |   journal      = {R News},
 5 |   year         = 2002,
 6 |   volume       = 2,
 7 |   number       = 2,
 8 |   pages        = {28--31},
 9 |   month        = {June},
10 |   url          = {http://CRAN.R-project.org/doc/Rnews/},
11 | }
12 | 
13 | 
14 | @Article{PubMedVignette,
15 |  author = {J. Gentry},
16 |  title = {HowTo: Automated Querying of PubMed Data},
17 |  journal = {Bioconductor Vignettes},
18 |  year = {2004},
19 |  url = {http://www.bioconductor.org}
20 | }
21 | 
22 | 
23 | @Article{ChromLocVignette,
24 |  author = {J. Gentry},
25 |  title = {HowTo: Build and use chromosomal information},
26 |  journal = {Bioconductor Vignettes},
27 |  year = {2004},
28 |  url = {http://www.bioconductor.org}
29 | }
30 | 
31 | @Article{HTMLVignette,
32 |  author = {R. Gentleman},
33 |  title = {HowTo: get pretty html output for my gene list},
34 |  journal = {Bioconductor Vignettes},
35 |  year = {2003},
36 |   url = {http://www.bioconductor.org}
37 | }
38 | 
39 | @Article{XML,
40 |   author = {Duncan {Temple Lang}},
41 |   title = {Tools for parsing and generating XML within R and S-Plus.},
42 |   journal = {CRAN},
43 |   year = {2000},
44 |   url = {http://www.omegahat.org/RSXML},
45 | }
46 | 
47 | @Article{GOA,
48 |    author = {E. Camon and M. Magrane and D. Barrell and V. Lee and
49 |                   E. Dimmer and J. Maslen, 
50 |           D. Binns and N. Harte and R. Lopez and R. Apweiler},
51 |    year = {2004},
52 |    title = {The {G}ene {O}ntology Annotation ({GOA}) Database: sharing
53 |          knowledge in {U}niprot with {G}ene {O}ntology},  
54 |    journal = {Nucleic Acids Research},
55 |    volume = {32},
56 |    issue = {1},
57 |    pages = {D262--D266}
58 |  }
59 | 
60 | @Article{GO,
61 |   author = 	 {{The Gene Ontology Consortium}},
62 |   title = 	 {{G}ene {O}ntology: tool for the unification of biology},
63 |   journal = 	 {Nature Genetics},
64 |   year = 	 {2000},
65 |   volume = 	 {25},
66 |   pages = 	 {25--29}
67 | }
68 | 


--------------------------------------------------------------------------------
/man/pubmed.Rd:
--------------------------------------------------------------------------------
 1 | \name{pubmed}
 2 | \alias{pubmed}
 3 | 
 4 | \title{A function to open the browser to Pubmed with the selected gene. }
 5 | \description{
 6 |   Given a vector of Pubmed identifiers or accession numbers, the user
 7 |   can either have a browser display a URL showing a Pubmed query for
 8 |   those identifiers, or a XMLdoc object with the same data.
 9 | }
10 | \usage{
11 | pubmed(...,disp=c("data","browser"), type=c("uid","accession"),
12 |        pmaddress=.efetch("PubMed", disp, type))
13 | }
14 | \arguments{
15 |   \item{...}{Vectorized set of Pubmed ID's}
16 |   \item{disp}{Either "Data" or "Browser" (default is data).  Data
17 |     returns a XMLDoc, while Browser will display information in the
18 |     user's browser.}
19 |   \item{type}{Denotes whether the arguments are accession numbers or
20 |     UIDS.  Defaults to uids.}
21 |   \item{pmaddress}{Specific path to the pubmed efetch engine from the
22 |     NCBI website.}
23 | }
24 | \details{
25 |   A simple function to retrieve Pubmed data given a specific ID, either
26 |   through XML or through a web browser.  This function will accept
27 |   either pubmed accession numbers or NCBI UIDs (defined as a Pubmed
28 |   ID or a Medline ID) - although the types must not be mixed in a single
29 |   call. 
30 | 
31 |   WARNING:  The powers that be at NCBI have been known to ban the IP
32 |   addresses of users who abuse their servers (currently defined as
33 |   less then 2 seconds between queries).  Do NOT put this function in
34 |   a tight loop or you may find your access revoked.
35 | }
36 | \value{
37 |   If the option "data" is used, an object of type XMLDoc is returned,
38 |   unless there was an error with the query in which case an object of
39 |   type try-error is returned.
40 | 
41 |   If the option "browser" is used, nothing is returned.
42 | }
43 | 
44 | \author{R. Gentleman }
45 | 
46 | \seealso{\code{\link{genbank}}, \code{\link[XML]{xmlTreeParse}}}
47 | 
48 | \examples{
49 |    if( interactive() )
50 |     opts <- c("data","browser") else
51 |     opts <- "data"
52 |    for (dp in opts)
53 |      pubmed("11780146","11886385","11884611",disp=dp)
54 | }
55 | \keyword{ interface }
56 | 


--------------------------------------------------------------------------------
/R/serializeEnv.R:
--------------------------------------------------------------------------------
 1 | serializeEnv <- function(env, fname) {
 2 |  if (!is.character(fname))
 3 |      stop("conn should be a character name of file for storage")
 4 | 
 5 |  if (is.character(env)) {
 6 |      cmd <- paste("envList <- as.list(", env, ")")
 7 |      eval(parse(text=cmd))
 8 |  }
 9 |  else if (is.environment(env))
10 |      envList <- as.list(env)
11 |  else
12 |      stop("invalid 'env' argument")
13 | 
14 |  keys <- names(envList)
15 | 
16 |  outFile <- gzfile(fname)
17 |  open(outFile, open="wb")
18 | 
19 |  cat("<?xml version=\"1.0\"?>\n",
20 |      "<values xmlns:bt=\"http://www.bioconductor.org/RGDBM\">",
21 |      file=outFile)
22 |  for (i in seq(along=envList)) {
23 |      cat("\n<entry>\n\t<key>\n\t\t<![CDATA[",
24 |           keys[i], "]]>",
25 |          "\n\t\t</key>\n\t\t<value>\n\t\t<![CDATA[",
26 |          serialize(envList[[i]], NULL, ascii=TRUE),
27 |          "]]>\n\t\t</value>\n\t</entry>", sep="", file=outFile, append=TRUE)
28 |  }
29 |  cat("\n</values>", file=outFile, append=TRUE)
30 | 
31 |  close(outFile)
32 | }
33 | 
34 | serializeDataPkgEnvs <- function(pkgDir) {
35 |     pkg <- basename(pkgDir)
36 |     require(pkg, character.only=TRUE) || stop("data package ",
37 |                  pkg, " not installed")
38 | 
39 |     cDir <- getwd()
40 |     on.exit(setwd(cDir), add=TRUE)
41 |     setwd(pkgDir)
42 | 
43 |     if (! file.exists("inst"))
44 |         if (!dir.create("inst"))
45 |             stop("Failed to create inst for ", pkgDir)
46 |     if (! file.exists(file.path("inst", "gdbm")))
47 |         if (!dir.create(file.path("inst", "gdbm")))
48 |             stop("Failed to create inst/gdbm for ", pkgDir)
49 |     setwd("inst/gdbm")
50 | 
51 |     dataSets <- ls(paste("package", pkg, sep=":"))
52 |     if (length(dataSets) == 0)
53 |         return(0)
54 | 
55 |     for (i in seq(along=dataSets)) {
56 |         cmd <- paste("is.environment(", dataSets[i], ")")
57 |         if (eval(parse(text=cmd))) {
58 |             print(paste("Converting", dataSets[i]))
59 |             serializeEnv(dataSets[i], paste(dataSets[i], ".xml.gz", sep=""))
60 |         }
61 |         else
62 |             print(paste(dataSets[i], "is not an environment, skipping."))
63 |     }
64 |     NULL
65 | }
66 | 


--------------------------------------------------------------------------------
/man/isValidkey.Rd:
--------------------------------------------------------------------------------
 1 | \docType{methods}
 2 | \name{isValidKey}
 3 | \alias{isValidKey}
 4 | \alias{allValidKeys}
 5 | \alias{isValidKey,character,character-method}
 6 | \alias{isValidKey,character,OrgDb-method}
 7 | \alias{allValidKeys,character-method}
 8 | \alias{allValidKeys,OrgDb-method}
 9 | \title{Get or verify valid IDs for a package or OrgDb object.}
10 | \description{
11 |   These functions either verify that a list of IDs are primary and valid IDs
12 |   for a package, or else return all the valid primary IDs from a package
13 | }
14 | \usage{
15 | isValidKey(ids, pkg)
16 | allValidKeys(pkg)
17 | 
18 | \S4method{isValidKey}{character,character}(ids, pkg)
19 | 
20 | \S4method{isValidKey}{character,OrgDb}(ids, pkg)
21 | 
22 | \S4method{allValidKeys}{character}(pkg)
23 | 
24 | \S4method{allValidKeys}{OrgDb}(pkg)
25 | 
26 | }
27 | \arguments{
28 |   \item{ids}{A character vector containing IDs that you wish to validate.}
29 |   \item{pkg}{Either the name of an installed annotation package (e.g.,
30 |     "org.Hs.eg.db"), or an uninstalled annotation package, e.g., from AnnotationHub.}
31 | }
32 | \details{
33 |   Every package has some kind of ID that is central to that package.
34 |   For chip-based packages this will be some kind of probe, and for the
35 |   organism based packages it will be something else (usually an entrez
36 |   gene ID).  isValidKey takes a list of IDs and tests to see whether or
37 |   not they are present (valid) in a particular package.  allValidKeys
38 |   simply returns all the valid primary IDs for a package.
39 | }
40 | \value{
41 |   \code{isValidKey} returns a vector of TRUE or FALSE values corresponding to whether or not the
42 |   ID is valid.
43 | 
44 |   \code{allValidKeys} returns a vector of all the valid primary IDs.
45 | }
46 | \author{Marc Carlson}
47 | \seealso{\code{\link{updateSymbolsToValidKeys}}}
48 | \examples{
49 | \dontrun{
50 |   ## 2 bad IDs and a 3rd that will be valid
51 |   ids <- c("15S_rRNA_2","21S_rRNA_4","15S_rRNA")
52 |   isValidKey(ids, "org.Sc.sgd")
53 | 
54 |   ## 2 good IDs and a 3rd that will not be valid
55 |   ids <- c("5600","7531", "altSymbol")
56 |   isValidKey(ids, "org.Hs.eg")
57 | 
58 |   ## Get all the valid primary id from org.Hs.eg.db
59 |   allValidKeys("org.Hs.eg")
60 | }
61 | }
62 | \keyword{manip}
63 | 


--------------------------------------------------------------------------------
/man/getGOTerm.Rd:
--------------------------------------------------------------------------------
 1 | \name{getGOTerm}
 2 | \alias{getGOTerm}
 3 | \alias{getGOParents}
 4 | \alias{getGOChildren}
 5 | \alias{getGOOntology}
 6 | 
 7 | \title{Functions to Access GO data.}
 8 | \description{
 9 |   These functions provide access to data in the GO package. The data are
10 |   assembled from publically available data from the Gene Ontology
11 |   Consortium (GO), \url{www.go.org}. Given a list of GO identifiers they
12 |   access the children (more specific terms), the parents (less specific
13 |   terms) and the terms themselves.
14 | }
15 | \usage{
16 | getGOTerm(x)
17 | getGOParents(x)
18 | getGOChildren(x)
19 | getGOOntology(x)
20 | }
21 | \arguments{
22 |   \item{x}{A character vector of valid GO identifiers. }
23 | }
24 | \details{
25 |   GO consists of three (soon to be more) specific hierarchies: Molecular
26 |   Function (MF), Biological Process (BP) and Cellular Component
27 |   (CC). For more details consult the GO website. For each GO identifier
28 |   each of these three hierarchies is searched and depending on the
29 |   function called the appropriate values are obtained and returned.
30 | 
31 |   It is possible for a GO identifier to have no children or for it to
32 |   have no parents. However, it must have a term associated with it.
33 | }
34 | \value{
35 |   A list of the same length as \code{x}.
36 |   The list contains one entry for each element of \code{x}. That entry
37 |   is itself a list. With one component named \code{Ontology} which
38 |   has as its value one of MF, BP or CC. The second component has the
39 |   same name as the suffix of the call, i.e. Children, Parents, or Term.
40 |   If there was no match in any of the ontologies then a length zero list
41 |   is returned for that element of \code{x}.
42 | 
43 |   For \code{getGOOntology} a vector of categories (the names of which
44 |   are the original GO term names). Elements of this list that are
45 |   \code{NA} indicate term names for which there is no category (and
46 |   hence they are not really term names).
47 | }
48 | \references{The Gene Ontology Consortium}
49 | 
50 | \author{R. Gentleman}
51 | 
52 | \examples{
53 |  library("GO.db")
54 | 
55 |  sG <- sample(keys(GO.db, "GOID"), 8)
56 | 
57 |  gT <- getGOTerm(sG)
58 |  gP <- getGOParents(sG)
59 |  gC <- getGOChildren(sG)
60 |  gcat <- getGOOntology(sG)
61 | 
62 | }
63 | \keyword{manip}
64 | 


--------------------------------------------------------------------------------
/man/getAnnMap.Rd:
--------------------------------------------------------------------------------
 1 | \name{getAnnMap}
 2 | \alias{getAnnMap}
 3 | 
 4 | \title{Get annotation map}
 5 | \description{
 6 |   This function retrieves a map object from an annotation data
 7 |   package.  It is intended to serve as a common interface for
 8 |   obtaining map objects from both SQLite-based and environment-based
 9 |   annotation data packages.
10 | }
11 | \usage{
12 | getAnnMap(map, chip, load = TRUE, type = c("db", "env"))
13 | }
14 | 
15 | \arguments{
16 |   \item{map}{a string specifying the name of the map to retrieve.  For
17 |       example, \code{"ENTREZID"} or \code{"GO"}}
18 |   \item{chip}{a string describing the chip or genome}
19 |   \item{load}{a logical value.  When \code{TRUE}, \code{getAnnMap}
20 |       will try to load the annotation data package if it is not
21 |       already attached.}
22 |   \item{type}{a character vector of one or more annotation data
23 |       package types.  The currently supported types are \code{"db"} and
24 |       \code{"env"}.  If \code{load} is \code{TRUE}, you can specify both
25 |       \code{"db"} and \code{"env"} and the order will determine which
26 |       type is tried first.  This provides a fall-back mechanism when the
27 |       preferred annotation data package type is not available.  If
28 |       \code{type} is missing, then the first matching annotation package
29 |       found in the search path will be used, and then the default value
30 |       of \code{type} takes over.}
31 | }
32 | \details{
33 |   \code{getAnnMap} uses the search path (see \code{search}) to find an
34 |   appropriate annotation data package; when called with
35 |   \code{chip="hgu95av2"}, the function will use the first hgu95av2
36 |   package on the search path whether it be db or environment-based.  If
37 |   \code{load=TRUE} and no suitable package is found on the search path,
38 |   then the function will attempt to load an appropriate package.  The
39 |   \code{type} argument is used to determine which type of package (db or
40 |   env) is loaded first.
41 | }
42 | \value{
43 |   If \code{type} is \code{"db"}, an S4 object representing the
44 |   requested map.  If \code{type} is \code{"env"}, an R
45 |   \code{environment} object representing the requested map.
46 | }
47 | 
48 | \author{Seth Falcon}
49 | 
50 | \examples{
51 | map <- getAnnMap("ENTREZID", "hgu95av2", load=TRUE, type=c("env", "db"))
52 | class(map)
53 | }
54 | 
55 | \keyword{manip}
56 | 


--------------------------------------------------------------------------------
/man/genbank.Rd:
--------------------------------------------------------------------------------
 1 | \name{genbank}
 2 | \alias{genbank}
 3 | \title{A function to open the browser to Genbank with the selected gene. }
 4 | \description{
 5 |   Given a vector of Genbank accession numbers or NCBI UIDs, the user can
 6 |   either have a browser display a URL showing a Genbank query for those
 7 |   identifiers, or a XMLdoc object with the same data.  
 8 | }
 9 | \usage{
10 | genbank(...,disp=c("data","browser"), type=c("accession","uid"),
11 |         pmaddress=.efetch("gene", disp, type))
12 | }
13 | \arguments{
14 |   \item{...}{Vectorized set of Genbank accession numbers or NCBI UIDs}
15 |   \item{disp}{Either "Data" or "Browser" (default is data).  Data
16 |     returns a XMLDoc, while Browser will display information in the
17 |     user's browser.}
18 |   \item{type}{Denotes whether the arguments are accession numbers or
19 |     UIDS.  Defaults to accession values.}
20 |   \item{pmaddress}{Specific path to the pubmed efetch engine from the
21 |     NCBI website.}
22 | 
23 | }
24 | \details{
25 |   A simple function to retrieve Genbank data given a specific ID, either
26 |   through XML or through a web browser.  This function will accept
27 |   either Genbank accession numbers or NCBI UIDs (defined as a Pubmed
28 |   ID or a Medline ID) - although the types must not be mixed in a single
29 |   call. 
30 | 
31 |   WARNING:  The powers that be at NCBI have been known to ban the IP
32 |   addresses of users who abuse their servers (currently defined as less
33 |   then 2 seconds between queries).  Do NOT put this function in a tight
34 |   loop or you may find your access revoked.
35 | }
36 | \value{
37 |   If the option "data" is used, an object of type XMLDoc is returned,
38 |   unless there was an error with the query in which case an object of
39 |   type try-error is returned.
40 | 
41 |   If the option "browser" is used, nothing is returned.
42 | }
43 | 
44 | \author{R. Gentleman }
45 | 
46 | \seealso{\code{\link{pubmed}}, \code{\link[XML]{xmlTreeParse}}}
47 | 
48 | \examples{
49 |    ## Use UIDs to get data in both browser & data forms
50 | 
51 |    if ( interactive() ) {
52 |       disp <- c("data","browser")
53 |    } else {
54 |       disp <- "data"
55 |    }
56 | 
57 |    for (dp in disp)
58 |      genbank("12345","9997",disp=dp,type="uid")
59 | 
60 |    ## Use accession numbers to retrieve browser info
61 |    if ( interactive() )
62 |        genbank("U03397","AF030427",disp="browser")
63 | }
64 | \keyword{interface }
65 | 


--------------------------------------------------------------------------------
/man/HTMLPage-class.Rd:
--------------------------------------------------------------------------------
 1 | \name{HTMLPage-class}
 2 | \docType{class}
 3 | \alias{HTMLPage-class}
 4 | \alias{HTMLPage}
 5 | \alias{FramedHTMLPage}
 6 | \alias{FramedHTMLPage-class}
 7 | \alias{fileName}
 8 | \alias{mainPage}
 9 | \alias{sidePage}
10 | \alias{pageText}
11 | \alias{toFile}
12 | \alias{topPage}
13 | \alias{pageTitle}
14 | \alias{HTMLPage,HTMLPage-method}
15 | \alias{FramedHTMLPage,HTMLPage-method}
16 | \alias{fileName,HTMLPage-method}
17 | \alias{mainPage,HTMLPage-method}
18 | \alias{sidePage,HTMLPage-method}
19 | \alias{pageText,HTMLPage-method}
20 | \alias{toFile,HTMLPage-method}
21 | \alias{topPage,HTMLPage-method}
22 | \alias{pageTitle,HTMLPage-method}
23 | \alias{show,HTMLPage-method}
24 | \alias{initialize,FramedHTMLPage-method}
25 | \alias{mainPage,FramedHTMLPage-method}
26 | \alias{show,FramedHTMLPage-method}
27 | \alias{sidePage,FramedHTMLPage-method}
28 | \alias{toFile,FramedHTMLPage-method}
29 | \alias{topPage,FramedHTMLPage-method}
30 | \title{Classes to represent HTML pages}
31 | \description{Class \code{HTMLPage} and \code{FramedHTMLPage} are a pair
32 |   of experimental classes used to explore concepts of representing HTML
33 |   pages using S4 objects.}
34 | \section{Slots}{
35 |   \describe{
36 |     \item{\code{fileName}:}{Object of class \code{"character"} The
37 |       filename of the HTML page}
38 |     \item{\code{pageText}:}{Object of class \code{"character"} The text
39 |       of the HTML page}
40 |     \item{\code{pageTitle}:}{Object of class \code{"character"} The
41 |       title of the HTML page}
42 |     \item{\code{topPage}:}{Object of class \code{"HTMLPage"} The header
43 |       page for a FramedHTMLPage}
44 |     \item{\code{sidePage}:}{Object of class \code{"HTMLPage"} The side
45 |       index page for a FramedHTMLPage}
46 |     \item{\code{mainPage}:}{Object of class \code{"HTMLPage"} The
47 |       primary page for a FramedHTMLPage}
48 |   }
49 | }
50 | 
51 | \section{Methods}{
52 |   \describe{
53 |     \item{show}{\code{signature(object = "HTMLPage")}: Describes
54 |       information about the page}
55 |     \item{fileName}{\code{signature(object = "HTMLPage")}: Gets the
56 |       fileName slot}
57 |     \item{pageText}{\code{signature(object = "HTMLPage")}: Gets the
58 |       pageText slot}
59 |     \item{pageTitle}{\code{signature(object = "HTMLPage")}: Gets the
60 |       pageTitle slot}
61 |     \item{toFile}{\code{signature(object = "HTMLPage")}: Writes the page
62 |     out to the file designated by the fileName slot}
63 |   }
64 | }
65 | \author{Jeff Gentry}
66 | \note{
67 |   These classes are currently experimental.
68 | 
69 |   FramedHTMLPage is modeled after the framing layout of the Bioconductor
70 |   website (www.bioconductor.org).
71 | }
72 | 
73 | \examples{
74 | ##---- Should be DIRECTLY executable !! ----
75 | }
76 | \keyword{classes}
77 | 


--------------------------------------------------------------------------------
/man/readGEOAnn.Rd:
--------------------------------------------------------------------------------
 1 | \name{readGEOAnn}
 2 | \alias{readGEOAnn}
 3 | \alias{readIDNAcc}
 4 | \alias{getGPLNames}
 5 | \alias{getSAGEFileInfo}
 6 | \alias{getSAGEGPL}
 7 | \alias{readUrl}
 8 | \title{Function to extract data from the GEO web site}
 9 | \description{
10 |   Data files that are available at GEO web site are identified by GEO
11 |   accession numbers. Given the url for the CGI script at GEO and
12 |   a GEO accession number, the functions extract data from the web site
13 |   and returns a matrix containing the data.
14 | }
15 | \usage{
16 | readGEOAnn(GEOAccNum, url = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?")
17 | readIDNAcc(GEOAccNum, url = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?")
18 | getGPLNames(url ="https://www.ncbi.nlm.nih.gov/geo/query/browse.cgi?") 
19 | getSAGEFileInfo(url =
20 |                        "https://www.ncbi.nlm.nih.gov/geo/query/browse.cgi?view=platforms&prtype=SAGE&dtype=SAGE")
21 | getSAGEGPL(organism = "Homo sapiens", enzyme = c("NlaIII", "Sau3A"))
22 | readUrl(url)
23 | }
24 | \arguments{
25 |   \item{url}{\code{url} the url for the CGI script at GEO}
26 |   \item{GEOAccNum}{\code{GEOAccNum} a character string for the GEO
27 |     accession number of a desired file (e. g. GPL97)}
28 |   \item{organism}{\code{organism} a character string for the name of the
29 |     organism of interests}
30 |   \item{enzyme}{\code{enzyme} a character string that can be eighter
31 |     NlaII or Sau3A for the enzyme used to create SAGE tags}
32 | }
33 | \details{
34 |   \code{url} is the CGI script that processes user's
35 |   request. \code{\link{readGEOAnn}} invokes the CGI by passing a GEO 
36 |   accession number and then processes the data file obtained.
37 | 
38 |   \code{\link{readIDNAcc}} calls \code{\link{readGEOAnn}} to read the
39 |   data and the extracts the columns for probe ids and accession numbers.
40 |   The \code{GEOAccNum} has to be the id for an Affymetrix chip.
41 | 
42 |   \code{\link{getGPLNames}} parses the html file that lists GEO
43 |   accession numbers and descriptions of the array represented by the
44 |   corresponding GEO accession numbers.  
45 | }
46 | \value{
47 |   Both \code{\link{readGEOAnn}} and \code{\link{readIDNAcc}} return a
48 |   matrix.
49 | 
50 |   \code{\link{getGPLNames}} returns a named vector of the names of
51 |   commercial arrays. The names of the vector are the corresponding GEO
52 |   accession number.
53 | }
54 | \references{\url{www.ncbi.nlm.nih.gov/geo}}
55 | \author{Jianhua Zhang}
56 | 
57 | \examples{
58 | # Get array names and GEO accession numbers
59 | #geoAccNums <- getGPLNames()
60 | # Read the annotation data file for HG-U133A which is GPL96 based on
61 | # examining geoAccNums 
62 | #temp <- readGEOAnn(GEOAccNum = "GPL96")
63 | #temp2 <- readIDNAcc(GEOAccNum = "GPL96")
64 | }
65 | \keyword{manip}
66 | 
67 | 


--------------------------------------------------------------------------------
/man/homoData-class.Rd:
--------------------------------------------------------------------------------
 1 | \name{homoData-class}
 2 | \docType{class}
 3 | \alias{homoData-class}
 4 | \alias{homoData}
 5 | \alias{homoLL}
 6 | \alias{homoOrg}
 7 | \alias{homoType}
 8 | \alias{homoURL}
 9 | \alias{homoACC}
10 | \alias{homoHGID}
11 | %\alias{show}
12 | \alias{homoPS}
13 | \alias{orgNameNCode}
14 | \alias{homoPS,homoData-method}
15 | \alias{homoLL,homoData-method}
16 | \alias{homoOrg,homoData-method}
17 | \alias{homoType,homoData-method}
18 | \alias{homoURL,homoData-method}
19 | \alias{homoACC,homoData-method}
20 | \alias{homoHGID,homoData-method}
21 | \alias{show,homoData-method}
22 | \title{Class "homoData"}
23 | \description{A class to present data for HomologGene data of a matching
24 |   sequence} 
25 | \section{Objects from the Class}{
26 | Objects can be created by calls of the form \code{new("homoData", ...)}. 
27 | }
28 | \section{Slots}{
29 |   \describe{
30 |     \item{\code{homoOrg}:}{Object of class \code{"character"} the
31 |       scientific name of the organism of interest}
32 |     \item{\code{homoLL}:}{Object of class \code{"numeric"} the LocusLink
33 |       id of the gene of interest}
34 |     \item{\code{homoType}:}{Object of class \code{"character"} the type of
35 |       similarity. Valid values include B - a recipiprocal best best
36 |       between 3 or more organisms, b - a reciprocal best match, and c -
37 |       a curated homology relationship} 
38 |     \item{\code{homoPS}:}{Object of class \code{"numeric"} percent
39 |       similarity value}
40 |     \item{\code{homoURL}:}{Object of class \code{"character"} the URL for
41 |       curated homology relationship}
42 |     \item{\code{homoACC}:}{Object of class \code{"character"} the
43 |       accession number}
44 |     \item{\code{homoHGID}:}{Object of class \code{"numeric"} the
45 |       internal HomologGeneID}
46 |   }
47 | }
48 | \section{Methods}{
49 |   \describe{
50 |     \item{homoPS}{\code{signature(object = "homoData")}: the get function for
51 |       slot \code{homoPS}}
52 |     \item{homoLL}{\code{signature(object = "homoData")}: the get function
53 |       for slot \code{homoLL}}
54 |     \item{homoOrg}{\code{signature(object = "homoData")}: the get function
55 |       for slot \code{homoOrg}}
56 |     \item{homoType}{\code{signature(object = "homoData")}: the get function
57 |       for slot \code{homoType}}
58 |     \item{homoURL}{\code{signature(object = "homoData")}: the get function
59 |       for slot \code{homoURL}}
60 |     \item{homoACC}{\code{signature(object = "homoData")}: the get function
61 |       for slot \code{homoACC}}
62 |     \item{homoHGID}{\code{signature(object = "homoHGID")}: the get
63 |       function for slot \code{homoHGID}}
64 |   }
65 | }
66 | \references{\url{ftp://ftp.ncbi.nih.gov/pub/HomoloGene/README}}
67 | \author{Jianhua Zhang}
68 | 
69 | \examples{
70 |     new("homoData", homoPS = 82.3, homoLL = 2324853, homoOrg = "Homo sapins",
71 | homoType = "B", homoURL = "", homoHGID = 12345)
72 | }
73 | \keyword{classes} 
74 | 


--------------------------------------------------------------------------------
/man/setRepository.Rd:
--------------------------------------------------------------------------------
 1 | \name{setRepository}
 2 | \alias{setRepository}
 3 | \alias{getRepositories}
 4 | \alias{clearRepository}
 5 | \title{Functions to add arbitrary repositories}
 6 | 
 7 | \description{These functions allow end users to add arbitrary
 8 |   repositories for use with the \code{htmlpage} function.
 9 | }
10 | \usage{
11 | setRepository(repository, FUN, ..., verbose=TRUE)
12 | getRepositories()
13 | clearRepository(repository, verbose=TRUE)
14 | }
15 | \arguments{
16 |   \item{repository}{A character name for the repository.}
17 |   \item{FUN}{A function to build hyperlinks for the repository. See
18 |     details for more information.}
19 |   \item{...}{Allows one to pass arbitrary code to underlying functions.}
20 |   \item{verbose}{Output warning messages?}
21 | }
22 | \details{These functions allow end users to add, view, and remove repositories
23 |   for use with the \code{htmlpage} function. \code{getRepositories} will
24 |   output a vector of names for available
25 |   repositories. \code{clearRepository} can be used to remove a
26 |   repository if so desired. \code{setRepository} can be used to add a
27 |   repository. See the examples section for the format of the FUN
28 |   argument.
29 | 
30 |   Once a new repository has been set, the \code{htmlpage} function can
31 |   be called using the name of the new repository as a value in the
32 |   repository argument (e.g., htmlpage(<other args>, repository =
33 |   list("newrepositoryname"))
34 | }
35 | \author{Martin Morgan <mtmorgan@fhcrc.org>}
36 | \examples{
37 | 
38 | ## A simple fake URI
39 | repofun <- function(ids, ...)
40 | paste("http://www.afakeuri.com/", ids, sep = "")
41 | 
42 | setRepository("simple", repofun)
43 | 
44 | ## More complicated, we want to make sure that
45 | ## NAs get converted to empty cells
46 | 
47 | repofun <- function(ids, ...){
48 | bIDs <- which(is.na(ids))
49 | out <- paste("http://www.afakeuri.com/", ids, sep = "")
50 | out[bIDs] <- "&nbsp;"
51 | out
52 | }
53 | 
54 | setRepository("complex", repofun)
55 | 
56 | ## More complicated URI where we need to pass more information
57 | ## An example is Ensembl, which requires a species as part of the URI
58 | ## Since htmlpage() has an '...' argument, we can pass arbitrary
59 | ## arguments to this function that will be passed down to our
60 | ## repfun. Here we assume the argument species="Homo_sapiens" has been
61 | ## included in the call to htmlpage().
62 | 
63 | 
64 | repofun <- function(ids, ...){
65 | if(!is.null(list(...)$species))
66 |       species <- list(...)$species
67 |   else
68 |       stop("To make links for Ensembl, you need to pass a 'species' argument.",
69 |            call. = FALSE)
70 | out <- paste("http://www.ensembl.org/", species, "/Search/Summary?species=",
71 |               species, ";idx=;q=", ids, sep = "")
72 | out
73 | }
74 | 
75 | setRepository("species_arg", repofun)
76 | 
77 | }
78 | \keyword{manip}
79 | 


--------------------------------------------------------------------------------
/man/pmAbst2HTML.Rd:
--------------------------------------------------------------------------------
 1 | \name{pmAbst2HTML}
 2 | \alias{pmAbst2HTML}
 3 | \title{HTML Generation for PubMed Abstracts}
 4 | \description{
 5 |   This function will take a \code{pubMedAbst} object, or a list of these
 6 |   objects and generate a web page that will list the titles of
 7 |   the abstracts and link to their full page on PubMed
 8 | }
 9 | \usage{
10 | pmAbst2HTML(absts, filename, title, frames = FALSE, table.center = TRUE)
11 | }
12 | \arguments{
13 |   \item{absts}{A list of \code{pubMedAbst} (or a single object)}
14 |   \item{filename}{The output filename.  If \code{frames} is
15 |     \code{FALSE}, this is the name of the single output file and
16 |     defaults to \code{absts.html}.  Otherwise, this is taken to be the
17 |     base of a set of filenames, and the default base is the empty
18 |     string.  See \code{value} for more information on output files.}
19 |   \item{title}{ Extra title information for your listing}
20 |   \item{frames}{If \code{frames} is \code{TRUE}, the resulting page will
21 |   use HTML frames, resulting in a more complex set of output pages.}
22 |   \item{table.center}{If TRUE, will center the listing of abstracts}
23 | }
24 | \details{
25 |   This function uses the \code{Entrez} functionality provided by NCBI to
26 |   retrieve the abstract URL at the PubMed site.  It will then create a
27 |   tabular webpage which will list the titles of the abstracts provided
28 |   and have them link to the appropriate PubMed page.  If \code{frames}
29 |   is \code{TRUE}, the table of links will be on the left hand side of
30 |   the page and the right hand will link directly to the appropriate
31 |   PubMed page.
32 | }
33 | \value{
34 |   If \code{frames} is \code{FALSE}, a simple HTML file is created with
35 |   the name specified by \code{filename}.
36 | 
37 |   If \code{frames} is \code{TRUE}, then there are four HTML files
38 |   created, of the form \code{XXXtop.html}, \code{XXXside.html},
39 |   \code{XXXmain.html} and \code{XXXindex.html}, where \code{XXX} is the
40 |   string provided by \code{filename}.
41 | }
42 | \author{Jeff Gentry}
43 | 
44 | \seealso{\code{pubMedAbst}}
45 | \examples{
46 |         x <- pubmed("9695952","8325638","8422497")
47 |         a <- xmlRoot(x)
48 |         numAbst <- length(xmlChildren(a))
49 |         absts <- list()
50 |         for (i in 1:numAbst) {
51 |            absts[[i]] <- buildPubMedAbst(a[[i]])
52 |         }
53 |         ## First try it w/o frames - using a temporary
54 |         ## file for the output
55 |         fname <- tempfile()
56 |         pmAbst2HTML(absts,filename=fname)
57 | 
58 |         if (interactive())
59 |           browseURL(paste("file://",fname,sep=""))
60 | 
61 |         ## Now try it w/ frames, using temporary files again.
62 |         fnameBase <- tempfile()
63 |         pmAbst2HTML(absts,filename=fnameBase, frames=TRUE)
64 | 
65 |         if (interactive())
66 |           browseURL(paste("file://",fnameBase,"index.html",sep=""))
67 | 
68 | }
69 | \keyword{utilities}
70 | 


--------------------------------------------------------------------------------
/R/readGEOAnn.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # Query the GEO database. url is the common CGI scrip at GEO
 3 | # and GEOAccNum is the GEO accession number representing a file in the
 4 | # database
 5 | readIDNAcc <- function(GEOAccNum, url =
 6 |                        "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?"){
 7 |     temp <- readGEOAnn(GEOAccNum, url)
 8 |     return(temp[,c("ID", "GB_ACC")])
 9 | }
10 | 
11 | getSAGEGPL <- function(organism = "Homo sapiens",
12 |                        enzyme = c("NlaIII", "Sau3A")){
13 | 
14 |     enzyme <- match.arg(enzyme)
15 | 
16 |     SAGEFiles <- getSAGEFileInfo()
17 |     return(SAGEFiles[SAGEFiles[,2] == organism & SAGEFiles[,3] ==
18 |                        enzyme, 1])
19 | }
20 | 
21 | getSAGEFileInfo <- function(url =
22 |                        "https://www.ncbi.nlm.nih.gov/geo/query/browse.cgi?view=platforms&prtype=SAGE&dtype=SAGE"){
23 |     temp <- readUrl(url)
24 |     # Get the GPL number, organism, and enzyme type
25 |     temp <- matrix(temp[grep("<TD", temp)], ncol = 8,
26 |                    byrow = TRUE)[,c(1, 5, 6)]
27 |     temp[,1] <- gsub(".*>(GPL.*)</a>", "\\1", temp[,1])
28 |     temp[,2] <- gsub(".*>(.*)</a>", "\\1", temp[,2])
29 |     temp[,3] <- gsub(".*>(.*):.*</TD>", "\\1", temp[,3])
30 | 
31 |     return(temp)
32 | }
33 | 
34 | 
35 | # Query the GEO database. url is the common CGI scrip at GEO
36 | # and GEOAccNum is the GEO accession number representing a file in the
37 | # database
38 | readGEOAnn <- function(GEOAccNum, url =
39 |                        "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?"){
40 | 
41 |     temp <- readUrl(paste(url, "acc=", GEOAccNum,
42 |                    "&view=data&form=text&targ=self", sep = ""))
43 |     # Remove the header lines that come with the file
44 |     temp <- temp[grep("\t", temp)]
45 |     # Add NAs to lines with no value for the last column
46 |     temp <- strsplit(gsub("\t$", "\tNA", temp), "\t")
47 |     # Convert to a matrix
48 |     temp <- t(sapply(temp, unlist))
49 |     # The first row is for column name. Remove it.
50 |     colnames(temp) <- temp[1,]
51 |     return(temp[-1,])
52 | }
53 | 
54 | # Read from GEO and map GEO accession numbers to array names.
55 | getGPLNames <- function(url =
56 |                         "https://www.ncbi.nlm.nih.gov/geo/query/browse.cgi?"){
57 |     temp <- readUrl(paste(url,
58 |                       "view=platforms&prtype=nucleotide&dtype=commercial",
59 |                       sep = ""))
60 | 
61 |     temp <- temp[grep("<TD", temp)]
62 |     temp <- matrix(temp, ncol = 8, byrow = TRUE)
63 | 
64 |     chipNames <- gsub(".*>(.*)</TD>$", "\\1", temp[,6])
65 |     names(chipNames) <- gsub(".*>(.*)</a>$", "\\1", temp[,1])
66 | 
67 |     return(chipNames)
68 | }
69 | 
70 | readUrl <- function(url){
71 |     options(show.error.messages = FALSE)
72 |     con <- try(url(url, open = "r"))
73 |     options(show.error.messages = TRUE)
74 |     if(inherits(con, "try-error")){
75 |         stop(paste("Can't connect to url", url))
76 |     }
77 |     temp <- readLines(con)
78 |     close(con)
79 |     return(temp)
80 | }
81 | 


--------------------------------------------------------------------------------
/man/pubMedAbst-class.Rd:
--------------------------------------------------------------------------------
 1 | \name{pubMedAbst-class}
 2 | \docType{class}
 3 | \alias{pubMedAbst-class}
 4 | \alias{pubMedAbst}
 5 | \alias{authors}
 6 | \alias{abstText}
 7 | \alias{articleTitle}
 8 | \alias{journal}
 9 | \alias{pubDate}
10 | \alias{pmid}
11 | \alias{pubMedAbst,pubMedAbst-method}
12 | \alias{authors,pubMedAbst-method}
13 | \alias{abstText,pubMedAbst-method}
14 | \alias{articleTitle,pubMedAbst-method}
15 | \alias{journal,pubMedAbst-method}
16 | \alias{pubDate,pubMedAbst-method}
17 | \alias{pmid,pubMedAbst-method}
18 | \alias{show,pubMedAbst-method}  
19 | \title{Class pubMedAbst, a class to handle PubMed abstracts, and methods
20 | for processing them.}
21 | \description{  This is a class representation for PubMed abstracts. }
22 | \section{Creating Objects}{
23 |   \code{ new('pubMedAbst',}\cr
24 |   \code{  authors      = ...., # Object of class vector}\cr
25 |   \code{  pmid         = ...., # Object of class character}\cr
26 |   \code{  abstText     = ...., # Object of class character}\cr
27 |   \code{  articleTitle = ...., # object of class character}\cr
28 |   \code{  journal      = ...., # Object of class character}\cr
29 |   \code{  pubDate      = ...., # Object of class character}\cr
30 |   \code{  )}}
31 | \section{Slots}{
32 |   \describe{
33 |     \item{\code{pmid}:}{Object of class \code{"character"} The PubMed ID
34 |       for this paper. }
35 |     \item{\code{authors}:}{Object of class \code{"vector"} The authors
36 |       of the paper. }
37 |     \item{\code{abstText}:}{Object of class \code{"character"} The
38 |       contained text of the abstract.  }
39 |     \item{\code{articleTitle}:}{Object of class \code{"character"} The
40 |       title of the article the abstract pertains to. }
41 |     \item{\code{journal}:}{Object of class \code{"character"} The journal
42 |       the article was published in. }
43 |     \item{\code{pubDate}:}{Object of class \code{"character"} The date the
44 |     journal was published. }
45 |   }
46 | }
47 | 
48 | \section{Methods}{
49 |   \describe{
50 |     \item{pmid}{\code{signature(object = "pmid")}: An accessor function
51 |       for \code{pmid}}
52 |     \item{abstText}{\code{signature(object = "pubMedAbst")}: An accessor
53 |     function for \code{abstText}}
54 |     \item{articleTitle}{\code{signature(object = "pubMedAbst")}: An accessor
55 |       function for \code{articleTitle} }
56 |     \item{authors}{\code{signature(object = "pubMedAbst")}: An accessor
57 |       function for \code{authors} }
58 |     \item{journal}{\code{signature(object = "pubMedAbst")}: An accessor
59 |       function for \code{journal} }
60 |     \item{pubDate}{\code{signature(object = "pubMedAbst")}: An accessor
61 |       function for \code{pubDate}}
62 |   }
63 | }
64 | \author{ Jeff Gentry }
65 | 
66 | \seealso{\code{\link{pubmed}}, \code{\link{genbank}}}
67 | \examples{
68 |    x <- pubmed("9695952","8325638","8422497")
69 |    a <- xmlRoot(x)
70 |    numAbst <- length(xmlChildren(a))
71 |    absts <- list()
72 |    for (i in 1:numAbst) {
73 |       absts[[i]] <- buildPubMedAbst(a[[i]])
74 |    }
75 | }
76 | \keyword{classes}
77 | 


--------------------------------------------------------------------------------
/man/blastSequences.Rd:
--------------------------------------------------------------------------------
 1 | \name{blastSequences}
 2 | \alias{blastSequences}
 3 | 
 4 | \title{
 5 |   Run a blast query to NCBI for either a string or an entrez gene ID and
 6 |   then return a series of MultipleAlignment objects.
 7 | }
 8 | 
 9 | \description{
10 |   This function sends a query to NCBI as a string of sequence or an
11 |   entrez gene ID and then returns a series of MultipleAlignment objects.
12 | }
13 | 
14 | \usage{
15 |   blastSequences(x, database, hitListSize, filter, expect, program,
16 |       timeout=40, as=c("DNAMultipleAlignment", "data.frame", "XML"))
17 | }
18 | 
19 | \arguments{
20 |   \item{x}{
21 |     A sequence as a character vector or an integer corresponding to an
22 |     entrez gene ID. Submit multiple sequences as a length-1 character
23 |     vector, \code{x = ">ID-1\nACATGCTA\n>ID-2\nAAACCACTT"}.
24 |   }
25 |   \item{database}{
26 |     Which NCBI database to use. If not \dQuote{blastn}, then set
27 |     \code{as="XML"}
28 |   }
29 |   \item{hitListSize}{
30 |     Number of hits to keep.
31 |   }
32 |   \item{filter}{
33 |     Sequence filter; \dQuote{L} for Low Complexity, \dQuote{R} for Human Repeats,
34 |     \dQuote{m} for Mask lookup
35 |   }
36 |   \item{expect}{
37 |     The BLAST \sQuote{expect} value above which matches will be
38 |     returned.
39 |   }
40 |   \item{program}{
41 |     Which program do you want to use for blast.
42 |   }
43 |   \item{timeout}{
44 |     Approximate maximum length of time, in seconds, to wait for a result.
45 |   }
46 |   \item{as}{
47 |     character(1) indicating whether the result from the NCBI server
48 |     should be parsed to a list of \code{DNAMultipleAlignment} instances,
49 |     represented as a \code{data.frame}, or returned as XML.
50 |   }
51 | 
52 | }
53 | 
54 | \details{
55 |   Right now the function only works for "blastn".
56 | 
57 |   The NCBI URL api used by this function is documented at
58 |   \url{https://www.ncbi.nlm.nih.gov/blast/Doc/urlapi.html}
59 | }
60 | 
61 | \value{
62 | 
63 |   By default, a series of \code{DNAMultipleAlignment} (see
64 |   \code{\link[Biostrings]{MultipleAlignment-class}}
65 |   objects. Alternatively, a \code{data.frame} or XML document returned
66 |   from the NCBI server. The \code{data.frame} is a \sQuote{long form}
67 |   representation of the \sQuote{Iteration}, \sQuote{Hit} and
68 |   \sQuote{Hsp} results returned from the server. The XML document is the
69 |   result of the \code{xmlParse} function of the XML library, and follows
70 |   the format described by
71 |   \url{https://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd} and
72 |   \url{https://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.mod.dtd}. }
73 | 
74 | \author{
75 |   M. Carlson
76 | }
77 | 
78 | \examples{
79 | 
80 | ## x can be an entrez gene ID
81 | blastSequences(17702, timeout=40, as="data.frame")
82 | 
83 | if (interactive()) {
84 | 
85 |     ## or x can be a sequence
86 |     blastSequences(x = "GGCCTTCATTTACCCAAAATG")
87 | 
88 |     ## hitListSize does not promise that you will get the number of
89 |     ## matches you want..  It will just try to get that many.
90 |     blastSequences(x = "GGCCTTCATTTACCCAAAATG", hitListSize="20")
91 | 
92 | }
93 | }
94 | 


--------------------------------------------------------------------------------
/man/getSYMBOL.Rd:
--------------------------------------------------------------------------------
 1 | \name{getSYMBOL}
 2 | \alias{getSYMBOL}
 3 | \alias{getGO}
 4 | \alias{getGOdesc}
 5 | \alias{getPMID}
 6 | \alias{getEG}
 7 | \alias{lookUp}
 8 | \alias{getUniqAnnItem}
 9 | \title{Functions to deal with Data Packages}
10 | \description{
11 |   The functions documented here are intended to make it easier to map
12 |   from a set of manufacturers identifiers (such as you will get from the
13 |   chips etc) to other identifiers.
14 | }
15 | \usage{
16 | getSYMBOL(x, data)
17 | getEG(x, data)
18 | getGO(x, data)
19 | getPMID(x, data)
20 | getGOdesc(x, which)
21 | lookUp(x, data, what, load = FALSE)
22 | getUniqAnnItem()
23 | }
24 | \arguments{
25 |   \item{x}{The identifiers to be mapped (usually manufacturer)}
26 |   \item{data}{The basename of the meta-data package to be used.}
27 |   \item{what}{\code{what} a character string for the name of an
28 |     annotation element of an annotation data package}
29 |   \item{which}{\code{which} a character string in the form of MF, BP,
30 |     CC, or ANY to indicated the GO categories of interest}
31 |   \item{load}{A logical value indicating whether to attempt to load the
32 |   required annotation data package if it isn't already loaded.}
33 | }
34 | \details{
35 |   Users must supply the basename of the meta-data package that they
36 |   want to use to provide the mappings. The name of the meta-data
37 |   package is the same as the basename.
38 |   
39 |   Appropriate translations are done. In some cases such as \code{getEG}
40 |   and \code{getSYMBOL} there will only be one match and a vector is
41 |   returned. In other cases such as \code{getPMID} and \code{getGO} there
42 |   may be multiple matches and a list is returned.
43 | 
44 |   For \code{getGOdesc} \code{x} contains GO identifiers (not
45 |   manufacturer identifiers) and the output is a list of GOTerms objects,
46 |   if \code{which} specifies some subset of the ontologies (MF, BP or CC) 
47 |   then only terms for that ontology are retained.
48 | 
49 |   \code{lookUp} is a general function that can be used to look
50 |   up matches. All other translation functions use \code{lookUp}
51 | 
52 |   A BioC annotation data package contains annotation data environments
53 |   whose names are package name (e. g. hgu95av2) + element name
54 |   (e. g. PMID). \code{what} must be one of the element names for the
55 |   given data package. 
56 | 
57 |   \code{getUniqAnnItem} keeps track of the annotation elements that have
58 |   one to one mappings.   
59 | }
60 | \value{
61 |   Either a vector or a list depending on whether multiple values per
62 |   input are possible.
63 | }
64 | \author{R. Gentleman}
65 | \seealso{\code{\link{mget}}}
66 | \examples{
67 |   library("hgu95av2.db")
68 |   library("GO.db")
69 | 
70 |   data(sample.ExpressionSet)
71 |   gN <- featureNames(sample.ExpressionSet)[100:105]
72 |   lookUp(gN, "hgu95av2", "SYMBOL")
73 | 
74 |   # Same as lookUp for SYMBOL except the return is a vector
75 |   getSYMBOL(gN,"hgu95av2" )
76 |   gg <- getGO(gN, "hgu95av2")
77 |   lookUp(gg[[2]][[1]][["GOID"]], "GO", "TERM")
78 | 
79 |   # Same as lookUp for TERM
80 |   getGOdesc(gg[[2]][[1]][["GOID"]], "ANY")
81 | 
82 |   # For BP only
83 |   getGOdesc(gg[[2]][[1]][["GOID"]], "BP")
84 |   getEG(gN, "hgu95av2")
85 |   getPMID(gN, "hgu95av2")
86 | }
87 | \keyword{manip}
88 | 


--------------------------------------------------------------------------------
/man/LL2homology.Rd:
--------------------------------------------------------------------------------
 1 | \name{LL2homology}
 2 | \alias{LL2homology}
 3 | \alias{HGID2homology}
 4 | \alias{ACC2homology}
 5 | \title{DEFUNCT Functions that find the homology data for a given set of
 6 |   LocusLink ids or HomoloGeneIDs}
 7 | \description{
 8 |   These functions are DEFUNCT. All this functionality has been
 9 |   replaced by inPARANOID packages.
10 |   Given a set of LocusLink ids or NCBI HomoloGeneIDs, the functions obtain the
11 |   homology data and represent them as a list of sub-lists using the
12 |   homology data package for the organism of interest. A sub-list can be of
13 |   length 1 or greater depending on whether a LocusLink id can be mapped
14 |   to one or more HomoloGeneIDs.
15 | }
16 | \usage{
17 | LL2homology(homoPkg, llids)
18 | HGID2homology(hgid, homoPkg)
19 | ACC2homology(accs, homoPkg)
20 | }
21 | \arguments{
22 |   \item{llids}{\code{llids} a vector of character strings or numberic
23 |     numbers for a set of LocusLink ids whose homologous genes in other
24 |     organisms are to be found}
25 |   \item{hgid}{\code{hgid} a named vector of character strings or numberic
26 |     numbers for a set of HomoloGeneIDs whose homologous genes in other
27 |     organisms are to be found. Names of the vector give the code used by
28 |     NCBI for organisms}
29 |   \item{accs}{\code{accs} a vector of character strings for a set of
30 |     GenBank Accession numbers}
31 |   \item{homoPkg}{\code{homoPkg} a character string for the name of the
32 |     homology data package for a given organism, which is a short version
33 |     of the scientific name of the organism plus homology (e. g. hsahomology)}
34 | }
35 | \details{
36 |   The homology data package has to be installed before executing any of
37 |   the two functions.
38 | 
39 |   Each sub-list  has the following elements:
40 | 
41 |   homoOrg - a named vector of a single character string whose value
42 |   is the scientific name of the organism and name the numeric code
43 |   used by NCBI for the organism.
44 |   
45 |   homoLL - an integer for LocusLink id.
46 |   
47 |   homoHGID - an integer for internal HomoloGeneID.
48 |   
49 |   homoACC - a character string for GenBank accession number of the
50 |   best matching sequence of the organism.
51 |   
52 |   homoType - a single letter for the type of similarity measurement
53 |   between the homologous genes. homoType can be either B (reciprocal
54 |   best best between three or more organisms), b (reciprocal best
55 |   match between two organisms), or c (curated homology relationship
56 |   between two organisms).
57 |   
58 |   homoPS - a percentage value measured as the percent of identity of
59 |   base pair alignment between the homologous sequences. 
60 |   
61 |   homoURL - a url to the source if the homology relationship is a
62 |   curated orthology.
63 |   
64 |   Sub-lists with homoType = B or b will not have any value
65 |   for homoURL and objects with homoType = c will not have any value
66 |   for homoPS.
67 | }
68 | \value{
69 |   Both functions returns a list of sub-lists containing data for
70 |   homologous genes in other organisms.
71 | }
72 | \references{\url{https://www.ncbi.nlm.nih.gov/entrez/query.fcgi?=homologene}}
73 | \author{Jianhua Zhang}
74 | 
75 | \examples{
76 | \dontrun{
77 |     ## hsahomology is a defunct package!
78 |     if(require("hsahomology")){
79 |         llids <- ls(env = hsahomologyLL2HGID)[2:5]
80 |         LL2homology("hsahomology", llids)
81 |     }
82 | 
83 | }
84 | }
85 | \keyword{misc}
86 | 
87 | 


--------------------------------------------------------------------------------
/R/AnnMaps.R:
--------------------------------------------------------------------------------
 1 | annObjPrefix <- function(name) {
 2 |     if (length(grep("\\.db$", name)))
 3 |       substr(name, 1, nchar(name) - 3L)
 4 |     else
 5 |       name
 6 | }
 7 | 
 8 | annPkgName <- function(name, type=c("db", "env")) {
 9 |     type <- match.arg(type)
10 |     if (length(grep("\\.db$", name)))
11 |       if (type == "db")
12 |         name
13 |       else
14 |         substr(name, 1, nchar(name) - 3L)
15 |     else if (type == "db")
16 |       paste(name, ".db", sep="")
17 |     else
18 |       name
19 | }
20 | 
21 | ## For cases where there is not a Bimap, but where there is an AnnoationDb
22 | ## object with a cols() value that matches the map argument, we want getAnnMap
23 | ## to spawn up a FlatBimap object and return that.
24 | 
25 | getAnnMap <- function(map, chip, load=TRUE, type=c("db", "env")) {
26 |     typeMissed <- FALSE
27 |     searchName <- NULL
28 |     if (missing(type)) {
29 |         typeMissed <- TRUE
30 |         searchNames <- paste("package:", chip, c("", ".db"), sep="")
31 |         searchPth <- search()
32 |         whLoaded <- match(searchNames, searchPth)
33 |         whLoaded <- whLoaded[!is.na(whLoaded)]
34 |         if (length(whLoaded))
35 |           searchName <- searchPth[sort(whLoaded)][1]
36 |     } else {
37 |         badTypes <- type[!(type %in% c("db", "env"))]
38 |         if (length(badTypes))
39 |           stop("unknown types in 'type' argument: ",
40 |                paste(badTypes, collapse=", "))
41 |     }
42 |     pkg <- annPkgName(name=chip, type=type[1])
43 |     if (is.null(searchName))
44 |       searchName <- paste("package", pkg, sep=":")
45 |     pkgEnv <- tryCatch(as.environment(searchName), error=function(e) {
46 |         if (load) {
47 |             ok <-
48 |               suppressWarnings(require(pkg, character.only=TRUE,
49 |                                        quietly=TRUE))
50 |             if (!ok && length(type) > 1) {
51 |                 origPkg <- pkg
52 |                 for (t in type[2:length(type)]) {
53 |                     pkg <- annPkgName(name=chip, type=t)
54 |                     searchName <- paste("package", pkg, sep=":")
55 |                     if (suppressWarnings(require(pkg, character.only=TRUE,
56 |                                                  quietly=TRUE))) {
57 |                         if (!typeMissed)
58 |                           warning("getAnnMap: ", "package ", origPkg,
59 |                                   " not available, ", "using ", pkg, " instead",
60 |                                   call.=FALSE)
61 |                         ok <- TRUE
62 |                         break
63 |                     }
64 |                 }
65 |             }
66 |             if (!ok)
67 |               stop("getAnnMap: ", "package ", pkg, " not available",
68 |                    call.=FALSE)
69 |             as.environment(searchName)
70 |         } else {
71 |             stop("getAnnMap: ", pkg, " package not attached and load is FALSE",
72 |                  call.=FALSE)
73 |         }
74 |     })
75 |     mapName <- paste(annObjPrefix(chip), map, sep="")
76 |     if(exists(mapName, envir=pkgEnv, inherits=FALSE)){
77 |       return( get(mapName, envir=pkgEnv, inherits=FALSE) ) 
78 |     }else{
79 |       ## chip will be a character, but we need to make it into a real thing.
80 |       ## spawn up a new FlatBimap
81 |       db <- eval(parse(text=pkg))
82 |       if(map %in% columns(db)){ ## if cols says its present
83 |            return(AnnotationDbi:::makeFlatBimapUsingSelect(db,
84 |                                                            col=map))
85 |       }
86 |     }
87 | }
88 | 
89 | 
90 | 
91 | 


--------------------------------------------------------------------------------
/man/chromLocation-class.Rd:
--------------------------------------------------------------------------------
 1 | \name{chromLocation-class}
 2 | \docType{class}
 3 | \alias{chromLocation-class}
 4 | \alias{chromLocation}
 5 | \alias{dataSource}
 6 | \alias{nChrom}
 7 | \alias{chromNames}
 8 | \alias{chromLocs}
 9 | \alias{chromLengths}
10 | \alias{probesToChrom}
11 | \alias{geneSymbols}
12 | \alias{chromInfo}
13 | \alias{organism,chromLocation-method}
14 | \alias{dataSource,chromLocation-method}
15 | \alias{nChrom,chromLocation-method}
16 | \alias{chromNames,chromLocation-method}
17 | \alias{chromLocs,chromLocation-method}
18 | \alias{chromLengths,chromLocation-method}
19 | \alias{probesToChrom,chromLocation-method}
20 | \alias{geneSymbols,chromLocation-method}
21 | \alias{chromInfo,chromLocation-method}
22 | \alias{show,chromLocation-method}
23 | \title{Class chromLocation, a class for describing genes and their
24 |   chromosome mappings.}
25 | \description{
26 |   This class provides chromosomal information provided by a
27 |   Bioconductor metadata package.  By creating the object once for a
28 |   particular package, it can be used in a variety of locations without
29 |   the need to recomputed values repeatedly. 
30 | }
31 | \section{Creating Objects}{
32 | \code{  new('chromLocation',}
33 | \code{    organism     = ...., # Object of class character}\cr
34 | \code{    dataSource    = ...., # Object of class character}\cr
35 | \code{    chromLocs    = ...., # Object of class list}\cr
36 | \code{    probesToChrom  = ...., # Object of class ANY}\cr
37 | \code{    chromInfo    = ...., # Object of class numeric}\cr
38 | \code{    geneSymbols  = ...., # Object of class ANY}\cr
39 | \code{  )}}
40 | \section{Slots}{
41 |   \describe{
42 |     \item{\code{organism}:}{Object of class "character".  The organism
43 |       that these genes correspond to.}
44 |     \item{\code{dataSource}:}{Object of class "character".  The source of
45 |     the gene data.}
46 |     \item{\code{chromLocs}:}{Object of class "list".  A list which
47 |       provides specific location information for every gene.}
48 |     \item{\code{probesToChrom}:}{An object with an environment-like API
49 |       which will translate a probe identifier to chromosome it belongs
50 |       to.}
51 |     \item{\code{chromInfo}:}{A numerical vector representing each
52 |       chromosome, where the names are the names of the chromosomes and
53 |       the values are their lengths}
54 |     \item{\code{geneSymbols}:}{An environment or an object with
55 |       environment-like API that maps a probe ID to
56 |       the appropriate gene symbol}
57 |   }
58 | }
59 | \section{Methods}{
60 |   \describe{
61 |     \item{chromLengths}{(chromLocation): Gets the lengths of the
62 |       chromosome for this organism}
63 |     \item{chromLocs}{(chromLocation): Gets the 'chromLocs' attribute.}
64 |     \item{chromNames}{(chromLocation): Gets the name of the chromosomes
65 |       for this organism}
66 |     \item{dataSource}{(chromLocation): Gets the 'dataSource' attribute.}
67 |     \item{probesToChrom}{(chromLocation): Gets the 'probesToChrom' attribute.}
68 |     \item{nChrom}{(chromLocation): gets the number of chromosomes this
69 |       organism has}
70 |     \item{organism}{(chromLocation): gets the 'organism' attribute.}
71 |     \item{chromInfo}{Gets the 'chromInfo' attribute.}
72 |     \item{geneSymbols}{Gets the 'geneSymbols' attribute.}
73 |   }
74 | }
75 | \seealso{\code{\link{buildChromLocation}}}
76 | \examples{
77 |   library("hgu95av2.db")
78 | 
79 |   z <- buildChromLocation("hgu95av2")
80 |   
81 |   ## find the number of chromosomes
82 |   nChrom(z)
83 | 
84 |   ## Find the names of the chromosomes
85 |   chromNames(z)
86 | 
87 |   ## get the organism this object refers to
88 |   organism(z)
89 | 
90 |   ## get the lengths of the chromosomes in this object
91 |   chromLengths(z)
92 | }
93 | \keyword{classes}
94 | 


--------------------------------------------------------------------------------
/man/getTDRows.Rd:
--------------------------------------------------------------------------------
  1 | \name{getQueryLink}
  2 | \alias{getQueryLink}
  3 | \alias{getQuery4UG}
  4 | \alias{getQuery4SP}
  5 | \alias{getQuery4OMIM}
  6 | \alias{getQuery4GB}
  7 | \alias{getQuery4Affy}
  8 | \alias{getQuery4FB}
  9 | \alias{getQuery4EN}
 10 | \alias{getCells}
 11 | \alias{getTDRows}
 12 | \alias{getQuery4TR}
 13 | \alias{getQuery4ENSEMBL}
 14 | 
 15 | \title{Functions to create hypertext links that can be placed in a table
 16 |   cell of a HTML file }
 17 | \description{
 18 |   Given a vector of ids, the functions will create a vector of
 19 |   hypertext links to a defined public repositories such as
 20 |   LocusLink, UniGene .... The linkages can be placed in a html file
 21 |   constructed by \code{\link{htmlpage}.}
 22 | }
 23 | \usage{
 24 | getQueryLink(ids, repository = "ug", ...)
 25 | getTDRows(ids, repository = "ug", ...)
 26 | getCells(ids, repository = "ug", ...)
 27 | getQuery4UG(ids, ...)
 28 | getQuery4SP(ids, ...)
 29 | getQuery4GB(ids, ...)
 30 | getQuery4OMIM(ids, ...)
 31 | getQuery4Affy(ids, ...)
 32 | getQuery4FB(ids, ...)
 33 | getQuery4EN(ids, ...)
 34 | getQuery4TR(ids, ...)
 35 | getQuery4ENSEMBL(ids, ...)
 36 | }
 37 | %- maybe also 'usage' for other objects documented here.
 38 | \arguments{
 39 |   \item{ids}{ A character vector of ids, or alternatively, a list
 40 |     containing character vectors of ids. These will be used to construct
 41 |     hypertext links. A list should be used in cases where there are
 42 |     multiple ids per gene.}
 43 |   \item{repository}{ A character string for the name of a public
 44 |     repository. Valid values include "ll", "ug", "gb", "sp", "omim",
 45 |     "affy", "en", and "fb". See the details section for more
 46 |     information. }
 47 |   \item{...}{Allows end user to pass additional arguments. See details
 48 |   for \code{\link{getQuery4ENSEMBL}} for more information.}
 49 | }
 50 | \details{
 51 |  \code{\link{getQuery4GB}} constructs hypertext links to GenBank using the
 52 |  provided ids.
 53 |  
 54 |  \code{\link{getQuery4UG}} constructs hypertext links to UniGene using the
 55 |  provided ids.
 56 |  
 57 |  \code{\link{getQuery4Affy}} constructs hypertext links to Affymetrix using the
 58 |  provided ids.
 59 |  
 60 |  \code{\link{getQuery4SP}} constructs hypertext links to SwissProt using the
 61 |  provided ids.
 62 |  
 63 |  \code{\link{getQuery4OMIM}} constructs hypertext links to OMIM using the
 64 |  provided ids.
 65 | 
 66 |  \code{\link{getQuery4FB}} constructs hypertext links to FlyBase using
 67 |  the provided ids.
 68 | 
 69 |  \code{\link{getQuery4EN}} constructs hypertext links to EntrezGene
 70 |  using the provided ids. 
 71 |  
 72 |  \code{\link{getQuery4TR}} constructs hypertext links to TAIR using the
 73 |  provided ids.
 74 | 
 75 |  \code{\link{getQuery4ENSEMBL}} constructs hypertext links to Ensembl
 76 |  using the provided ids. An additional 'species' argument must be passed
 77 |  to this function via the \code{...} argument to \code{htmlpage}. The
 78 |  form of the argument must be e.g., species="Homo_sapiens" for
 79 |  human. Note the capitalized genus and underscore (_) separator.
 80 |  
 81 |  \code{\link{getQueryLink}} directs calls to construct hypertext links using
 82 |  the provided ids.
 83 |  
 84 |  \code{\link{getTDRows}} constructs each row of the resulting table.
 85 |  
 86 |  \code{\link{getCells}} constructs each cell of the resulting table.
 87 | 
 88 |  Note that some of these functions (\code{getQuery4OMIM},
 89 |  \code{getQuery4UG}, \code{getQuery4FB}) attempt to
 90 |  return empty cells for ids that don't make sense, rather than broken
 91 |  links. For the other getQuery4XX functions, the end user must replace
 92 |  all nonsense ids with "&nbsp;" in order to have an empty cell.
 93 | 
 94 |  Also note that creating additional links is quite simple. First, define
 95 |  a new 'getQuery4XX()' function modeled on the existing functions, then
 96 |  add this function to the \code{getQueryLink} function.
 97 |  
 98 | }
 99 | \value{
100 |  Returns a vector of character strings representing the hypertext links.
101 | }
102 | 
103 | \author{ Jianhua Zhang <jzhang@jimmy.harvard.edu> with further
104 |   modifications by James W. MacDonald <jmacdon@med.umich.edu> }
105 | 
106 | 
107 | \keyword{ manip }% __ONLY ONE__ keyword per line


--------------------------------------------------------------------------------
/R/getPMInfo.R:
--------------------------------------------------------------------------------
  1 | getPMInfo <- function(x) { 
  2 | #
  3 | # getMLInfo: get medline-related info from a pubmed xml DOM tree
  4 | # works with result of Bioconductor annotate::pubmed function
  5 | #
  6 | # tagVals: utility function for grabbing vector of
  7 | # tag values from any DOM tree
  8 | #
  9 | tagVals <- function(x,tag) { 
 10 |  tagNames <- function() {
 11 |   store <- character(0)
 12 |   add <- function(x) {
 13 |    if(inherits(x, "XMLNode") & xmlName(x) == tag) {
 14 |      store <<- c(store, xmlValue(x))
 15 |     }
 16 |    x
 17 |    }
 18 |   return(list(add=add, tagVals = function() {return(store)}))
 19 |   }
 20 |  h <- tagNames()
 21 |  xmlDOMApply(x, h$add) 
 22 |  h$tagVals()
 23 | }
 24 | #
 25 | # here's the main body of getMLInfo.  the function 'arts' creates
 26 | # a closure for collecting data on articles in the document returned
 27 | # by the pubmed function.  the 'add' element of the closure
 28 | # adds information to various local vectors and lists as xmlDOMApply
 29 | # walks through the tree.
 30 | #
 31 |  if (!inherits(x, "XMLDocument")) stop("only applies to XMLDocument")
 32 |  arts <- function() {
 33 |   pmarts <- list()
 34 |   pmart <- list()
 35 |   jinfo <- character(0)
 36 |   alist <- character(0)
 37 |   chemlist <- character(0)
 38 |   cura <- character(0)
 39 |   cur <- 1
 40 |   add <- function(x) {
 41 |    if(inherits(x, "XMLNode") & xmlName(x) == "ArticleTitle") {
 42 |      pmart[["title"]] <<- xmlValue(x)
 43 |     }
 44 |    if(inherits(x, "XMLNode") & xmlName(x) == "MedlineTA") {
 45 |      pmart[["MedlineTA"]] <<- xmlValue(x)
 46 |     }
 47 |    if(inherits(x, "XMLNode") & xmlName(x) == "AbstractText") {
 48 |      pmart[["abstract"]] <<- xmlValue(x)
 49 |     }
 50 |    if(inherits(x, "XMLNode") & xmlName(x) == "PubmedArticle") {
 51 |      id <- xmlValue(getNodeSet(x, "/PubmedArticle/*/PMID")[[1L]])
 52 |      pmarts[[id]] <<- pmart
 53 |      pmart <<- list()
 54 |      cur  <<- cur+1
 55 |     }
 56 | #
 57 | # deal with journal info
 58 | # this is an ugly part because tags like Year or Volume can occur in
 59 | # different contexts.  Need to know something about the parent.
 60 | # but we don't want to assume too much about sequence of nodes
 61 | #
 62 |    if (inherits(x, "XMLNode") & xmlName(x) == "ISSN") {
 63 |      jinfo <<- c(jinfo,ISSN=xmlValue(x))
 64 |    }
 65 |    if (inherits(x, "XMLNode") & xmlName(x) == "JournalIssue") {
 66 |     jikids <- xmlChildren(x)
 67 |     for (i in seq_along(jikids))
 68 |      {
 69 |      if (xmlName(jikids[[i]]) == "Volume")
 70 |        jinfo <<- c(jinfo,vol=xmlValue(jikids[[i]]))
 71 |      else if (xmlName(jikids[[i]]) == "Issue")
 72 |        jinfo <<- c(jinfo,iss=xmlValue(jikids[[i]]))
 73 |      else if (xmlName(jikids[[i]]) == "PubDate")
 74 |        {
 75 |        Year <- tagVals(jikids[[i]],"Year")
 76 |        Month <- tagVals(jikids[[i]],"Month")
 77 |        Day <- tagVals(jikids[[i]],"Day")
 78 |        jinfo <<- c(jinfo,year=Year,month=Month,day=Day)
 79 |        }
 80 |      }
 81 |      pmart[["JrnlInfo"]] <<- jinfo
 82 |      jinfo <<- character(0)
 83 |    }
 84 | #
 85 | # deal with author info
 86 | #
 87 |    if (inherits(x, "XMLNode") & xmlName(x) =="AuthorList") {
 88 |      pmart[["authors"]] <<- alist
 89 |      alist <<- character(0)
 90 |    }
 91 |    if (inherits(x, "XMLNode") & xmlName(x) =="Author") {
 92 |      alist <<- c(alist,cura)
 93 |      cura <<- character(0)
 94 |    }
 95 |    if (inherits(x, "XMLNode") & xmlName(x) =="LastName") {
 96 |      cura <<- paste(cura,last=xmlValue(x),sep="") 
 97 |    }
 98 | #   if (inherits(x, "XMLNode") & xmlName(x) =="ForeName") {
 99 | #     cura <<- paste(cura,fore=xmlValue(x)) 
100 | #   }
101 |    if (inherits(x, "XMLNode") & xmlName(x) =="Initials") {
102 |      cura <<- paste(cura,inits=xmlValue(x)) 
103 |    }
104 | #
105 | # deal with substance info
106 | #
107 |    if (inherits(x, "XMLNode") & xmlName(x) =="ChemicalList") {
108 |      pmart[["chemlist"]] <<- chemlist
109 |      chemlist <<- character(0)
110 |    }
111 |    if (inherits(x, "XMLNode") & xmlName(x) =="NameOfSubstance") {
112 |      chemlist <<- c(chemlist,xmlValue(x))
113 |    }
114 |    x
115 |    }
116 |   return(list(add=add, arts = function() {return(pmarts)}))
117 |   }
118 |  h <- arts()
119 |  xmlDOMApply(xmlRoot(x), h$add) 
120 |  h$arts()
121 | }
122 | 


--------------------------------------------------------------------------------
/vignettes/chromLOC.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "HowTo: Build and use chromosomal information"
  3 | author:
  4 |   - name: "Jeff Gentry"
  5 |   - name: "Kritika Verma"
  6 |     affiliation: "Vignette translation from Sweave to R Markdown / HTML"
  7 | date: "`r format(Sys.time(), '%B %d, %Y')`"
  8 | output:
  9 |   BiocStyle::html_document
 10 | vignette: >
 11 |   %\VignetteIndexEntry{HowTo: Build and use chromosomal information}
 12 |   %\VignetteEngine{knitr::rmarkdown}
 13 |   %\VignetteEncoding{UTF-8}
 14 | ---
 15 | 
 16 | # Overview
 17 | 
 18 | The annotate package provides a class that can be used to model
 19 | chromosomal information about a species, using one of the metadata
 20 | packages provided by Bioconductor. This class contains information about
 21 | the organism and its chromosomes and provides a standardized interface
 22 | to the information in the metadata packages for other software to
 23 | quickly extract necessary chromosomal information. An example of using
 24 | *chromLocation* objects in other software can be found with the
 25 | `alongChrom` function of the `r Biocpkg("geneplotter")` package in Bioconductor.
 26 | 
 27 | # The chromLocation class
 28 | 
 29 | The *chromLocation* class is used to provide a structure for chromosomal data of
 30 | a particular organism. In this section, we will discuss the various slots of the
 31 | class and the methods for interacting with them. Before this though, we will
 32 | create an object of class *chromLocation* for demonstration purposes later. The
 33 | helper function `buildChromLocation` is used, and it takes as an argument the
 34 | name of a Bioconductor metadata package, which is itself used to extract the
 35 | data. For this vignette, we will be using the `r Biocpkg("hgu95av2.db")`
 36 | package.
 37 | 
 38 | ```{r buildCL, message=FALSE}
 39 | library("annotate")
 40 | z <- buildChromLocation("hgu95av2")
 41 | z
 42 | ```
 43 | 
 44 | Once we have an object of the *chromLocation* class, we can now access
 45 | its various slots to get the information contained within it. There are
 46 | six slots in this class:
 47 | 
 48 |     organism:       This lists the organism that this object is describing.
 49 |     dataSource:     Where this data was acquired from.
 50 |     chromLocs:      A list with an element for every unique chromosome 
 51 |                     name, where each element contains a named vector where
 52 |                     the names are probe IDs and the values describe the
 53 |                     location of that probe on the chromosome.  Negative
 54 |                     values indicate that the location is on the antisense
 55 |                     strand. 
 56 |     probesToChrom:  A hash table which will translate a probe ID to the 
 57 |                     chromosome it belongs to.
 58 |     chromInfo:      A numerical vector representing each chromosome, where
 59 |                     the names are the names of the chromosomes and the
 60 |                     values are the lengths of those chromosomes.
 61 |     geneSymbols:    An environment that maps a probe ID to the appropriate
 62 |                     gene symbol.
 63 | 
 64 | There is a basic 'get' type method for each of these slots, all with the same
 65 | name as the respective slot. In the following example, we will demonstrate these
 66 | basic methods. For the `probesToChrom` and `geneSymbols` methods, the return
 67 | value is an environment which maps a probe ID to other values, we will be using
 68 | the probe ID '32972_at', which was selected at random for these examples. We are
 69 | showing only part of the `chromLocs` method's output as it is quite long in its
 70 | entirety.
 71 | 
 72 | 
 73 | ```{r showBasicMethods}
 74 | organism(z)
 75 | 
 76 | dataSource(z)
 77 | 
 78 | ## The chromLocs list is extremely large. Let's only
 79 | ## look at one of the elements.
 80 | names(chromLocs(z))
 81 | chromLocs(z)[["Y"]]
 82 | 
 83 | get("32972_at", probesToChrom(z))
 84 | 
 85 | chromInfo(z)
 86 | 
 87 | get("32972_at", geneSymbols(z))
 88 | ```
 89 | 
 90 | Another method which can be used to access information about the particular
 91 | *chromLocation* object is the `nChrom` method, which will list how many
 92 | chromosomes this organism has:
 93 | 
 94 | ```{r nChrom}
 95 | nChrom(z)
 96 | ```
 97 | 
 98 | # Summary
 99 | 
100 | The *chromLocation* class has a simple design, but can be powerful if one wants
101 | to store the chromosomal data contained in a Bioconductor package into a single
102 | object. These objects can be created once and then passed around to multiple
103 | functions, which can cut down on computation time to access the desired
104 | information from the package. These objects allow access to basic but also
105 | important information, and provide a standard interface for writers of other
106 | software to access this information.
107 | 


--------------------------------------------------------------------------------
/man/findNeighbors.Rd:
--------------------------------------------------------------------------------
  1 | \name{findNeighbors}
  2 | \alias{findNeighbors}
  3 | \alias{checkArgs}
  4 | \alias{findChr4LL}
  5 | \alias{getValidChr}
  6 | \alias{getBoundary}
  7 | \alias{weightByConfi}
  8 | \title{A function to locate neighboring genes within a defined range
  9 |   around a target gene represented by a Entrez Gene ID }
 10 | \description{
 11 |   Give a data package with mappings between Entrez Gene IDs and their
 12 |   locations on chromosomes, this function locates genes that are
 13 |   within a defined range on a given chromosome. If a Entrez Gene ID is
 14 |   passed as one of the arguments, genes located will be neighbors to the
 15 |   gene represented by the Entrez Gene ID within a defined range on the
 16 |   chromosome the target gene resides
 17 | }
 18 | \usage{
 19 | findNeighbors(chrLoc, llID, chromosome, upBase, downBase, mergeOrNot = TRUE)
 20 | checkArgs(llID, chromosome, upBase, downBase)
 21 | findChr4LL(llID, chrEnv, organism)
 22 | getValidChr(organism)
 23 | getBoundary(loc, base, lower = TRUE)
 24 | weightByConfi(foundLLs)
 25 | } 
 26 | \arguments{
 27 |   \item{chrLoc}{\code{chrLoc} a character string for the name of the
 28 |     data package that contains mappings between Entrez Gene IDs and their
 29 |     locations on chromosomes. For each chromosome, there assumed to be
 30 |     mappings for the start and end locations of genes represented by
 31 |     Entrez Gene IDs. The data package needs to be built using
 32 |     chrLocPkgBuilder of AnnBuilder}
 33 |   \item{llID}{\code{llID} a character string for the Entrez Gene ID
 34 |     representing a gene whose neighbors are sought. llID can be missing}
 35 |   \item{chromosome}{\code{chromosome} a character string for the number
 36 |     of the chromosome of interest. chromosome is only required for
 37 |     locating genes within a range on the chromosome}
 38 |   \item{upBase}{\code{upBase} a numeric or character string for the
 39 |     number of base pairs that defines the upper limit of the range to
 40 |     locate genes. If neighbors of a given gene is sought, the value
 41 |     will be the distance in number of base pairs from the target gene
 42 |     upstream, to which search for genes will be conducted. Otherwise, the
 43 |     value will be the upper limit in number of base pairs from the p arm,
 44 |     to which search for genes will be conducted}
 45 |   \item{downBase}{\code{downBase} a numeric or character string for the
 46 |     number of base pairs that defines the lower limit of the range to
 47 |     locate gene. If neighbors of a given gene is sought, the value
 48 |     will be the distance in number of base pairs from the target gene
 49 |     downstream, to which search for genes will be conducted. Otherwise, the
 50 |     value will be the lower limit in number of base pairs from the p arm,
 51 |     to which search for genes will be conducted}
 52 |   \item{organism}{\code{organism} a character string for the name of the
 53 |     organism of interest}
 54 |   \item{chrEnv}{\code{chrEnv} an environment object with keys for
 55 |     Entrez Gene IDs and values for the chromosomes where genes reside}
 56 |   \item{loc}{\code{loc} a numeric of character string for the
 57 |     chromosomal location of gene of interest}
 58 |   \item{base}{\code{base} either a \code{downBase} or \code{upBase}}
 59 |   \item{lower}{\code{lower} a boolean indicating whether the lower or
 60 |     upper boundary of search limit is sought}
 61 |   \item{mergeOrNot}{\code{mergeOrNot} a boolean to indicate whether gene
 62 |     found up and down streams will be merged (TRUE)}
 63 |   \item{foundLLs}{\code{foundLLs} a vector of character strings for
 64 |     Entrez Gene IDs}
 65 | }
 66 | \details{
 67 |   A chrLoc data package can be created using function
 68 |   chrLocPkgBuilder of AnnBuilder, in which Entrez Gene IDs
 69 |   are mapped to location data on individual chromosomes.
 70 | 
 71 |   Genes are considered to be neighbors to a given target gene or within
 72 |   a given range when the transcription of genes start and end within the
 73 |   given range.
 74 | 
 75 |   findNeighbors, checkArgs, findChr4LL, getValidChr, and getBoundary are
 76 |   accessory functions called by findNeighbors and may not have real
 77 |   values outside.  
 78 | }
 79 | \value{
 80 |   The function returns a list of named vectors. The length of the list
 81 |   is one when genes in a given region are sought but varies depending on
 82 |   whether a given gene can be mapped to one or more chromosomes when
 83 |   neighboring genes of a target gene are sought. Names of vector can be
 84 |   "Confident" when a gene can be confidently placed on a chromosome or
 85 |   "Unconfident" when a gene can be placed on a chromosome but its exact
 86 |   location can not be determined with great confidence.
 87 | }
 88 | \references{\url{http://www.genome.ucsc.edu/goldenPath/}}
 89 | \author{Jianhua Zhang}
 90 | 
 91 | \examples{
 92 | if(require("humanCHRLOC")){
 93 |    findNeighbors("humanCHRLOC", "51806", 10, upBase = 600000, downBase = 600000)
 94 | }else{
 95 |    print("Can not find neighbors without the required data package")
 96 | }
 97 | }
 98 | \keyword{manip}
 99 | 
100 | 


--------------------------------------------------------------------------------
/man/htmlpage.Rd:
--------------------------------------------------------------------------------
  1 | \name{htmlpage}
  2 | \alias{htmlpage}
  3 | \title{Functions to build HTML pages}
  4 | 
  5 | \description{ This function is designed to create an HTML table
  6 |   containing both static information as well as links to various online
  7 |   annotation sources.
  8 | }
  9 | \usage{
 10 | htmlpage(genelist, filename, title, othernames, table.head,
 11 |          table.center = TRUE, repository = list("en"), ...)
 12 | }
 13 | \arguments{
 14 |   \item{genelist}{A list or \code{data.frame} of character vectors
 15 |     containing ids to be made into hypertext links. See details for more
 16 |     information.}
 17 |   \item{filename}{A filename for the resultant HTML table.}
 18 |   \item{title}{A title for the table.}
 19 |   \item{othernames}{A list or \code{data.frame} of other things to add
 20 |     to the table. These will not be hyperlinks. The list of othernames
 21 |     can contain vectors, matrices, \code{data.frames} or lists.}
 22 |   \item{table.head}{A character vector of column headers for the table.}
 23 |   \item{table.center}{Center the table? Defaults to \code{TRUE}.}
 24 |   \item{repository}{A list of repositories to use
 25 |     for creating the hypertext links. Currently available repositories
 26 |     include 'gb' (GenBank), 'en' (EntrezGene), 'omim' (Online Mendelian
 27 |     Inheritance in Man), 'sp' (SwissProt), 'affy' (Affymetrix), 'ug'
 28 |     (UniGene), 'fb' (FlyBase), 'go' (Gene Ontology), 'ens' (Ensembl).
 29 |     Additional repositories can easily be added. See
 30 |   \code{setRepository} for more information.}
 31 |   \item{...}{Further arguments to be passed. See details for more
 32 |   information.}
 33 | }
 34 | \details{ This function will accept a list or \code{data.frame} of
 35 |   character vectors, each containing different ids that are to be turned
 36 |   into hyperlinks (e.g., a list containing affy ids, genbank accession
 37 |   numbers, and Entrez Gene ids). For instances where there are more than
 38 |   one id per gene, use a sub-list of character vectors. See the vignette
 39 |   'HowTo: Get HTML Output' for more information. Othernames should be a
 40 |   list or \code{data.frame}. Again, if there are multiple entries for a
 41 |   given gene, use a sub-list. This is more easily explained using an
 42 |   example - please see the examples section below and the above
 43 |   mentioned vignette.
 44 | 
 45 |   In even the simplest case the genelist, othernames and repository have
 46 |   to be lists. A simple character vector will not suffice. 
 47 | 
 48 |   Note that this function now uses \code{xtable} to create the HTML
 49 |   table, and there is the ability to pass some arguments on to either
 50 |   \code{xtable} or \code{print.xtable}. One such argument would be
 51 |   'append=TRUE', which would allow one to put lots of tables in one
 52 |   page, as long as the filename argument remained the same.
 53 | 
 54 |   Additionally, the Ensembl repository needs a species argument in order
 55 |   to form a usable URI. This argument can be passed in the form of e.g.,
 56 |   \code{species = "Homo_sapiens"}. Note the capitalization of the genus, and
 57 |   the separation by an underscore (\code{_}).
 58 | }
 59 | \value{
 60 |   This function is used only for the side effect of creating an HTML table.
 61 | }
 62 | \author{Robert Gentleman <rgentlem@fhcrc.org>, further
 63 |   modifications by James W. MacDonald <jmacdon@med.umich.edu>}
 64 | \examples{
 65 |   ## A very simple example. Two columns, one with links, the other without.
 66 | 
 67 |   gos <- paste("GO:000000", 1:9, sep="")
 68 |   notlinks <- LETTERS[1:9]
 69 | 
 70 |   htmlpage(list(gos), "simple.html", "Two column data", list(notlinks),
 71 |            c("GO IDs", "Letters"), repository = list("go"))
 72 | 
 73 |   if(!interactive())
 74 |     file.remove("simple.html")
 75 | 
 76 |   ## A more complex example with multiple links per cell
 77 |   ## first we create data to annotate
 78 |   unigene <- list("Hs.600536",c("Hs.596913","HS.655491"),"Hs.76704")
 79 |   refseq <- list(c("NM_001030050", "NM_001030047", "NM_001648",
 80 |   "NM_001030049"), "NM_000860", c("NM_001011645", "NM_000044"))
 81 |   entrez <- c("354", "3248", "367")
 82 |   genelist <- list(unigene, refseq, entrez)
 83 | 
 84 |   ## now some other data
 85 | 
 86 |   symb <- c("KLK3","HPGD","AR")
 87 |   desc <- c("Prostate-specific antigen precursor",
 88 |             "15-hydroxyprostaglandin dehydrogenase",
 89 |             "Androgen receptor")
 90 |   t.stat <- c(40.21, -22.14, 21.56)
 91 |   p.value <- rep(0,3)
 92 |   fold.change <- c(3.54, -2.35, 3.18)
 93 |   expression <- matrix(c(11.78, 11.69, 11.62, 8.17, 5.78, 5.58, 5.68,
 94 |                          8.26, 9.08, 9.28, 9.19, 6.05), ncol=4, byrow=TRUE)
 95 | 
 96 |   otherdata <- list(symb, desc, t.stat, p.value, fold.change, expression)
 97 |   table.head <- c("UniGene", "RefSeq", "EntrezGene", "Symbol",
 98 |                   "Description", "t-stat", "p-value", "fold change",
 99 |                   paste("Sample", 1:4))
100 | 
101 |   htmlpage(genelist, "test.html", "Some gene expression data", otherdata,
102 |            table.head, repository = list("ug","gb","en"))
103 | 
104 |   if(!interactive())
105 |     file.remove("test.html")
106 | }
107 | \keyword{manip}
108 | 


--------------------------------------------------------------------------------
/vignettes/useProbeInfo.Rnw:
--------------------------------------------------------------------------------
  1 | % \VignetteIndexEntry{Using Affymetrix Probe Level Data}
  2 | % \VignetteDepends{hgu95av2.db, rae230a.db, rae230aprobe, Biostrings}
  3 | % \VignetteKeywords{Annotation}
  4 | %\VignettePackage{annotate}
  5 | 
  6 | \documentclass{article}
  7 | 
  8 | \newcommand{\Rfunction}[1]{{\texttt{#1}}}
  9 | \newcommand{\Rmethod}[1]{{\texttt{#1}}}
 10 | 
 11 | \newcommand{\Robject}[1]{{\texttt{#1}}}
 12 | \newcommand{\Rpackage}[1]{{\textit{#1}}}
 13 | \newcommand{\Rclass}[1]{{\textit{#1}}}
 14 | 
 15 | \usepackage{hyperref}
 16 | 
 17 | \usepackage[authoryear,round]{natbib}
 18 | \usepackage{times}
 19 | 
 20 | \begin{document}
 21 | \title{Using Probe Information}
 22 | 
 23 | \author{Robert Gentleman}
 24 | \date{}
 25 | \maketitle
 26 | 
 27 | \section*{Overview}
 28 | 
 29 | The Bioconductor project maintains a rich body of annotation data
 30 | assembled into R libraries. For many different Affymetrix chips
 31 | information is provided on both the sequence of the mRNA that was
 32 | intended to be matched and the actual 25mers that were used for the
 33 | bindings. In this vignette we show how to make use of the probe
 34 | information.
 35 | 
 36 | \section*{A Simple Example}
 37 | 
 38 | To demonstrate the use of probe level data we will use the
 39 | \texttt{rae230a} chip (for rats). So we first need to load these
 40 | libraries.
 41 | 
 42 | <<loadlibs, results=hide>>=
 43 | library("annotate")
 44 | library("rae230a.db")
 45 | library("rae230aprobe")
 46 | @
 47 | 
 48 | Now, we do not have any data so all we are going to do is to examine
 49 | the probe data and show how to use some of the different Bioconductor
 50 | tools to access that information, and potentially check on the mapping
 51 | information that has been given.
 52 | 
 53 | We will select a probe set,
 54 | <<selprobe>>=
 55 | 
 56 | ps = names(as.list(rae230aACCNUM))
 57 | 
 58 | myp = ps[1001]
 59 | 
 60 | myA = get(myp, rae230aACCNUM)
 61 | 
 62 | wp = rae230aprobe$Probe.Set.Name == myp
 63 | myPr = rae230aprobe[wp,]
 64 | 
 65 | @
 66 | 
 67 | The probe data is stored as a \Rclass{data.frame} with 6 columns. They
 68 | are
 69 | \begin{description}
 70 | \item[sequence] The sequence of the 25mer
 71 | \item[x] The x position of the probe on the array.
 72 | \item[y] The y position of the probe on the array.
 73 | \item[Probe.Set.Name] The Affymetrix ID for the probe set.
 74 | \item[Probe.Interrogation.Position] The location (in bases) of the
 75 | 13th base in the 25mer, in the target sequence.
 76 | \item[Target.Strandedness] Whether the 25mer is a Sense or an
 77 | Antisense match to the target sequence.
 78 | \end{description}
 79 | 
 80 | We note that it is not always the case that the sequence reported is
 81 | found in the reference or if it is, it is not always at the location
 82 | reported. One can check that using other tools available in the
 83 | \Rpackage{annotate} package and in the \Rpackage{Biostrings} package.
 84 | 
 85 | %%FIXME: need to check for connectivity
 86 | <<getACC>>=
 87 | 
 88 | myseq = getSEQ(myA)
 89 | nchar(myseq)
 90 | 
 91 | library("Biostrings")
 92 | mybs = DNAString(myseq)
 93 | 
 94 | match1 = matchPattern(as.character(myPr[1,1]), mybs)
 95 | match1
 96 | as.matrix(ranges(match1))
 97 | myPr[1,5]
 98 | @
 99 | And we can see that in this case the 13th nucleotide is indeed in
100 | exactly the place that has been predicted.
101 | 
102 | 
103 | One additional thing to note is that Affymetrix does not accurately report the strandedness of the
104 | probes, so it is necessary to check the reverse complement of the sequence prior to
105 | assuming that the probe does not interrogate the correct gene.
106 | 
107 | <<getRev>>=
108 | 
109 | myp = ps[100]
110 | 
111 | myA = get(myp, rae230aACCNUM)
112 | 
113 | wp = rae230aprobe$Probe.Set.Name == myp
114 | 
115 | myPr = rae230aprobe[wp,]
116 | 
117 | myseq = getSEQ(myA)
118 | 
119 | mybs = DNAString(myseq)
120 | 
121 | Prstr = as.character(myPr[1,1])
122 | 
123 | match2 = matchPattern(Prstr, mybs)
124 | 
125 | ## expecting 0 (no match)
126 | length(match2)
127 | 
128 | match2 = matchPattern(reverseComplement(DNAString(Prstr)), mybs)
129 | 
130 | nchar(match2)
131 | 
132 | nchar(myseq) - as.matrix(ranges(match2))
133 | myPr[1,5]
134 | @
135 | 
136 | Again, we see that the 13th nucleotide is exactly where predicted. It is relatively
137 | straightforward to check the other 25mers, and to develop different
138 | visualization tools that can be used to investigate the available data.
139 | 
140 | \section*{Other Sources of Information}
141 | 
142 | There are other tools available that may also be of some interest. For instance, the
143 | Mental Health Research Institute at the University of Michigan have various custom
144 | cdf files for Affymetrix data analysis that have been updated using more current annotation
145 | information from GenBank and Ensembl.
146 | 
147 | \url {http://brainarray.mhri.med.umich.edu/Brainarray/Database/CustomCDF/genomic_curated_CDF.asp}
148 | 
149 | The Weizmann Institute of Science have a database that can be queried to get the sensitivity and specificity
150 | for the probes on the Affymetrix HG-U95av2 chip. Although the information here is limited to a particular chip,
151 | this general idea is something that an enterprising end-user might want to replicate for other chips.
152 | 
153 | \url {http://genecards.weizmann.ac.il/geneannot/}
154 | 
155 | \section{Session Information}
156 | 
157 | The version number of R and packages loaded for generating the vignette were:
158 | 
159 | <<echo=FALSE>>=
160 | sessionInfo()
161 | @
162 | 
163 | 
164 | \end{document}
165 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
  1 | import(methods)
  2 | 
  3 | import(BiocGenerics)
  4 | 
  5 | importClassesFrom(Biobase,
  6 |                   eSet)
  7 | 
  8 | importClassesFrom(AnnotationDbi,
  9 |                   FlatBimap)
 10 | 
 11 | importMethodsFrom(DBI, dbGetQuery, dbListTables, dbListFields)
 12 | 
 13 | importMethodsFrom(AnnotationDbi,
 14 |                   Definition,
 15 |                   GOID,
 16 |                   Secondary,
 17 |                   Synonym,
 18 |                   colnames,
 19 |                   columns,
 20 |                   dbmeta,
 21 |                   eapply,
 22 |                   exists,
 23 |                   get,
 24 |                   ls,
 25 |                   mappedRkeys,
 26 |                   mget,
 27 |                   ncol,
 28 |                   nrow,
 29 |                   Ontology,
 30 |                   revmap,
 31 |                   Term)
 32 | 
 33 | importMethodsFrom(Biobase,
 34 |                   annotation,
 35 |                   contents,
 36 |                   exprs,
 37 |                   featureNames)
 38 | 
 39 | importFrom(Biobase,
 40 |            addVigs2WinMenu)
 41 | 
 42 | importFrom(graphics,
 43 |            abline,
 44 |            identify,
 45 |            plot)
 46 | 
 47 | importFrom(stats,
 48 |            setNames,
 49 |            heatmap)
 50 | 
 51 | importFrom(utils,
 52 |            browseURL,
 53 |            compareVersion,
 54 |            packageDescription,
 55 |            URLencode)
 56 | 
 57 | importFrom(xtable,
 58 |            xtable)
 59 | 
 60 | importFrom(httr,
 61 |            GET)
 62 | 
 63 | importFrom(XML,
 64 |            getNodeSet,
 65 |            htmlParse,
 66 |            xmlChildren,
 67 |            xmlDOMApply,
 68 |            xmlErrorCumulator,
 69 |            xmlName,
 70 |            xmlParse,
 71 |            xmlRoot,
 72 |            xmlToDataFrame,
 73 |            xmlTreeParse,
 74 |            xmlValue,
 75 |            xpathApply,
 76 |            xpathSApply)
 77 | 
 78 | exportClasses(
 79 |               chromLocation,
 80 |               FramedHTMLPage,
 81 |               homoData,
 82 |               HTMLPage,
 83 |               pubMedAbst
 84 |               )
 85 | 
 86 | exportMethods(
 87 |               abstText,
 88 |               articleTitle,
 89 |               authors,
 90 |               chromInfo,
 91 |               chromLengths,
 92 |               chromLocs,
 93 |               chromNames,
 94 |               dataSource,
 95 |               Definition,
 96 |               fileName,
 97 |               geneSymbols,
 98 |               GOID,
 99 |               homoACC,
100 |               homoHGID,
101 |               homoLL,
102 |               homoOrg,
103 |               homoPS,
104 |               homoType,
105 |               homoURL,
106 |               initialize,
107 |               journal,
108 |               mainPage,
109 |               nChrom,
110 |               Ontology,
111 |               organism,
112 |               pageText,
113 |               pageTitle,
114 |               pmid,
115 |               probesToChrom,
116 |               pubDate,
117 |               pubMedAbst,
118 |               Secondary,
119 |               show,
120 |               sidePage,
121 |               Synonym,
122 |               Term,
123 |               toFile,
124 |               topPage
125 |               )
126 | 
127 | export(
128 |        .buildAnnotateOpts,
129 |        .getIdTag,
130 |        .getNcbiURL,
131 |        .handleXML,
132 |        .efetch,
133 |        .transformAccession,
134 |        ACC2homology,
135 |        accessionToUID,
136 |        ACCNUMStats,
137 |        annPkgName,
138 |        aqListGOIDs,
139 |        buildChromLocation,
140 |        buildPubMedAbst,
141 |        checkArgs,
142 |        chrCats,
143 |        compatibleVersions,
144 |        createLLChrCats,
145 |        createMAPIncMat,
146 |        dropECode,
147 |        filterGOByOntology,
148 |        findChr4LL,
149 |        findNeighbors,
150 |        genbank,
151 |        getAnnMap,
152 |        getBoundary,
153 |        getEG,
154 |        getEvidence,
155 |        getGI,
156 |        getGO,
157 |        getGOChildren,
158 |        getGOdesc,
159 |        getGOOntology,
160 |        getGOParents,
161 |        getGOTerm,
162 |        getGPLNames,
163 |        getOntology,
164 |        getOrgNameNCode,
165 |        getPMID,
166 |        getPMInfo,
167 |        getQueryLink,
168 |        getSAGEFileInfo,
169 |        getSAGEGPL,
170 |        getSEQ,
171 |        getSYMBOL,
172 |        getUniqAnnItem,
173 |        getValidChr,
174 |        GO2heatmap,
175 |        GOmnplot,
176 |        hasGOannote,
177 |        HGID2homology,
178 |        homoData,
179 |        htmlpage,
180 |        KEGG2heatmap,
181 |        KEGGmnplot,
182 |        LL2homology,
183 |        lookUp,
184 |        makeAnchor,
185 |        mapOrgs,
186 |        pm.abstGrep,
187 |        pm.getabst,
188 |        pm.titles,
189 |        pmAbst2HTML,
190 |        pmid2MIAME,
191 |        PMIDAmat,
192 |        pmidQuery,
193 |        pubmed,
194 |        PWAmat,
195 |        readGEOAnn,
196 |        readIDNAcc,
197 |        readUrl,
198 |        serializeDataPkgEnvs,
199 |        serializeEnv,
200 |        UniGeneQuery,
201 |        entrezGeneByID,
202 |        entrezGeneQuery,
203 |        usedChromGenes,
204 |        usedChromGenes,
205 |        weightByConfi,
206 |        whatACC,
207 |        setRepository,
208 |        getRepositories,
209 |        clearRepository,
210 |        isValidKey,
211 |        allValidKeys,
212 |        updateSymbolsToValidKeys,
213 |        blastSequences
214 |        )
215 | 


--------------------------------------------------------------------------------
/R/html.R:
--------------------------------------------------------------------------------
  1 | makeAnchor <- function(link, title, toMain=FALSE) {
  2 |     ## Takes a vector of links and a vector of titles -
  3 |     ## returns a vector of anchors.
  4 | 
  5 |     ## !! Should allow links to be URL objects as well as strings
  6 |     out <- paste("<A HREF=",link,sep="")
  7 |     if (toMain)
  8 |         out <- paste(out," target=\"main\"", sep="")
  9 |     out <- paste(out,">",title,"</A>",sep="")
 10 |     out
 11 | }
 12 | 
 13 | 
 14 |     ## A simple class to represent a basic "HTML Page", currently
 15 |     ## being naively stored as a block of text.
 16 |     setClass("HTMLPage", representation(fileName="character",
 17 |                                         pageText="character",
 18 |                                         pageTitle="character"))
 19 |     setMethod("fileName", "HTMLPage", function(object, ...) object@fileName)
 20 | 
 21 |     if (is.null(getGeneric("pageText")))
 22 |         setGeneric("pageText", function(object, ...)
 23 |                    standardGeneric("pageText"))
 24 | 
 25 |     setMethod("pageText", "HTMLPage", function(object, ...)
 26 |               object@pageText)
 27 | 
 28 |     if (is.null(getGeneric("pageTitle")))
 29 |         setGeneric("pageTitle", function(object, ...)
 30 |                    standardGeneric("pageTitle"))
 31 |     setMethod("pageTitle", "HTMLPage", function(object, ...)
 32 |               object@pageTitle)
 33 | 
 34 |     setMethod("show","HTMLPage", function(object) print(pageText(object)))
 35 | 
 36 |     if (is.null(getGeneric("toFile")))
 37 |         setGeneric("toFile", function(object, ...)
 38 |                    standardGeneric("toFile"))
 39 |     setMethod("toFile", "HTMLPage", function(object, ...) {
 40 |         cat(pageText(object), file=fileName(object))
 41 |     })
 42 | 
 43 |     ## Defines a basic framed page.  We're using 3 frames, a top
 44 |     ## banner, a side navigation bar and a main page, much like the
 45 |     ## bioconductor website.  The object also has it's own HTML page
 46 |     ## associated with it via HTMLPage inheritance.
 47 |     setClass("FramedHTMLPage", representation(topPage="HTMLPage",
 48 |                                               sidePage="HTMLPage",
 49 |                                               mainPage="HTMLPage"),
 50 |              contains="HTMLPage")
 51 | 
 52 |     if (is.null(getGeneric("topPage")))
 53 |         setGeneric("topPage", function(object, ...)
 54 |                    standardGeneric("topPage"))
 55 |     setMethod("topPage", "FramedHTMLPage", function(object, ...)
 56 |               object@topPage)
 57 | 
 58 |     if (is.null(getGeneric("sidePage")))
 59 |         setGeneric("sidePage", function(object, ...)
 60 |                    standardGeneric("sidePage"))
 61 |     setMethod("sidePage", "FramedHTMLPage", function(object, ...)
 62 |               object@sidePage)
 63 | 
 64 |     if (is.null(getGeneric("mainPage")))
 65 |         setGeneric("mainPage", function(object, ...)
 66 |                    standardGeneric("mainPage"))
 67 |     setMethod("mainPage", "FramedHTMLPage", function(object, ...)
 68 |               object@mainPage)
 69 | 
 70 |     setMethod("toFile", "FramedHTMLPage", function(object, ...) {
 71 |         toFile(topPage(object))
 72 |         toFile(sidePage(object))
 73 |         toFile(mainPage(object))
 74 | 
 75 |         ## Is there a way to force a call to HTMLPage's 'toFile' here?
 76 |         cat(pageText(object), file=fileName(object))
 77 |     })
 78 | 
 79 |     setMethod("initialize", "FramedHTMLPage",
 80 |               function(.Object, topPage=new("HTMLPage"),
 81 |                        sidePage=new("HTMLPage"),
 82 |                        mainPage=new("HTMLPage"),
 83 |                        fileName=new("character"),
 84 |                        pageTitle=new("character")) {
 85 |                   .Object@pageTitle <- pageTitle
 86 |                   .Object@fileName <- fileName
 87 |                   .Object@topPage <- topPage
 88 |                   .Object@sidePage <- sidePage
 89 |                   .Object@mainPage <- mainPage
 90 |                   topName <- fileName(topPage(.Object))
 91 |                   sideName <- fileName(sidePage(.Object))
 92 |                   mainName <- fileName(mainPage(.Object))
 93 | 
 94 |                   out <- paste("<HTML>","<HEAD>",sep="\n")
 95 |                   t <- paste("<TITLE>",pageTitle(.Object),"</TITLE>")
 96 |                   out <- paste(out,t,"</HEAD>",
 97 |                                "<frameset rows=\"70,*\" border =\" 0\" frameborder=\" no\" framespacing =\" 0\">",
 98 |                                "  <frame name=\"banner\" scrolling=\"no\" noresize target=\"contents\" src=\"",topName,"\" marginwidth=\"0\" marginheight=\"0\">",
 99 |                                "  <frameset cols=\"250,*\">",
100 |                                "    <frame name=\"contents\" target=\"main\" src=\"",sideName,"\" marginwidth=\"10\" marginheight=\"10\" scrolling=\"auto\" noresize>",
101 |                                "    <frame name=\"main\" scrolling=\"auto\" noresize src=\"",mainName,"\" marginwidth =\" 10\" marginheight =\" 10\" target=\"_self\">",
102 |                                "  </frameset>","  <noframes>","  <body>","",
103 |                                "  <p>This page uses frames, but your browser doesn't support them.</p>",
104 |                                "", "  </body>","  </noframes>",
105 |                                "</frameset>","</html>",
106 |                                sep="\n")
107 |                   .Object@pageText <- out
108 |                   .Object
109 |               })
110 | 


--------------------------------------------------------------------------------
/R/blastSequences.R:
--------------------------------------------------------------------------------
  1 | .blastSequencesToDNAMultipleAlignment <- function(xml) {
  2 |    loadNamespace("Biostrings")
  3 |    loadNamespace("IRanges")
  4 |    qseq <- xpathSApply(xml, "//Hsp_qseq", xmlValue)
  5 |    hseq <- xpathSApply(xml, "//Hsp_hseq", xmlValue)
  6 |    res <- vector("list", length(qseq))
  7 |    for(i in seq_along(qseq)){
  8 |      res[[i]] <- Biostrings::DNAMultipleAlignment(
  9 |          c(hseq[[i]],qseq[[i]]),
 10 |          rowmask=as(IRanges::IRanges(), "NormalIRanges"),
 11 |          colmask=as(IRanges::IRanges(), "NormalIRanges"))
 12 |    }
 13 |    res
 14 | }
 15 | 
 16 | .blastSequencesToDataFrame <- function(xml) {
 17 |     if (xpathSApply(xml, "count(//Hit)") == 0L) {
 18 |         message("'blastSequences' returned 0 matches")
 19 |         return(data.frame())
 20 |     }
 21 | 
 22 |     iter <- xml["//Iteration"]
 23 |     iterlen <- sapply(iter, xpathSApply, "count(.//Hsp)")
 24 |     iterdf <- xmlToDataFrame(iter, stringsAsFactors=FALSE)
 25 | 
 26 |     hit <- xml["//Hit"]
 27 |     hitlen <- sapply(hit, xpathSApply, "count(.//Hsp)")
 28 |     hitdf <- xmlToDataFrame(hit, stringsAsFactors=FALSE)
 29 |     hitdf <- hitdf[, names(hitdf) != "Hit_hsps", drop=FALSE]
 30 | 
 31 |     hsp <- xmlToDataFrame(xml["//Hsp"] , stringsAsFactors=FALSE)
 32 | 
 33 |     df <- cbind(
 34 |         iterdf[rep(seq_len(nrow(iterdf)), iterlen),, drop=FALSE],
 35 |         hitdf[rep(seq_len(nrow(hitdf)), hitlen),, drop=FALSE],
 36 |         hsp)
 37 |     rownames(df) <- NULL
 38 |     df
 39 | }
 40 | 
 41 | .tryParseResult <- function(baseUrl, rid, rtoe, timeout) {
 42 |     message("estimated response time ", rtoe, " seconds")
 43 |     start <- Sys.time()
 44 |     end <- Sys.time() + timeout
 45 |     url <- sprintf("%s?CMD=Get&FORMAT_OBJECT=SearchInfo&RID=%s",
 46 |                    baseUrl, rid)
 47 |     Sys.sleep(min(rtoe, timeout))
 48 |     repeat {
 49 |         elapsed <- as.double(Sys.time() - start, units="secs")
 50 |         ## RCurl::getURL(url, followlocation=TRUE) has issues.
 51 |         ## See getURL2() in R/query.R
 52 |         result <- as(htmlParse(getURL2(url),
 53 |                                error = xmlErrorCumulator(immediate=FALSE)),
 54 |                      "character")
 55 | 
 56 |         if (grepl("Status=FAILED", result))
 57 |             stop("BLAST search failed")
 58 |         else if  (grepl("Status=UNKNOWN", result))
 59 |             stop("BLAST search expired")
 60 |         else if (grepl("Status=READY", result)) {
 61 |             url <- sprintf("%s?RID=%s&FORMAT_TYPE=XML&CMD=Get", baseUrl, rid)
 62 |             ## RCurl::getURL(url, followlocation=TRUE) has issues.
 63 |             ## See getURL2() in R/query.R
 64 |             result <- xmlParse(getURL2(url),
 65 |                                error = xmlErrorCumulator(immediate=FALSE))
 66 |             return(result)
 67 |         } else if (grepl("Status=WAITING", result)) {
 68 |             message(sprintf("elapsed time %.0f seconds", elapsed))
 69 |             if (Sys.time() > end && interactive()) {
 70 |                 msg <- sprintf("wait another %d seconds? [y/n] ", timeout)
 71 |                 repeat {
 72 |                     ans <- substr(trimws(tolower(readline(msg))), 1, 1)
 73 |                     if (ans %in% c("y", "n"))
 74 |                         break
 75 |                 }
 76 |                 if (ans == "n")
 77 |                     break
 78 |                 end <- Sys.time() + timeout
 79 |             }
 80 |             Sys.sleep(10)
 81 |         } else
 82 |             stop("BLAST search unknown response") 
 83 |     }
 84 |     msg <- sprintf("'blastSequences' timeout after %.0f seconds",
 85 |                    elapsed)
 86 |     stop(msg, call.=FALSE)
 87 | }
 88 | 
 89 | ## Using the REST-ish API described at
 90 | ## http://www.ncbi.nlm.nih.gov/blast/Doc/node2.html
 91 | blastSequences <- function(x, database="nr",
 92 |                            hitListSize="10",
 93 |                            filter="L",
 94 |                            expect="10",
 95 |                            program="blastn",
 96 |                            timeout=40,
 97 |                            as=c("DNAMultipleAlignment", "data.frame", "XML"))
 98 | {
 99 |     PARSE <- switch(match.arg(as),
100 |                     DNAMultipleAlignment=.blastSequencesToDNAMultipleAlignment,
101 |                     data.frame=.blastSequencesToDataFrame,
102 |                     XML=identity)
103 |     ## TODO: lots of argument checking and testing.  Also,
104 |     ## depending on which program string is used we need to make the correct
105 |     ## kind of object at the end (so blastn means DNAMultipleAlignment, and
106 |     ## blastp means AAMultipleAlignment etc.
107 | 
108 |     ## So:
109 |     ## 1) get online values these parameters can be
110 |     ## 2) document those
111 |     ## 3) restrict their vals in the code here.
112 |     ## 4) for program, use this to determine what object is returned.
113 |     
114 |     ## assemble the query
115 |     baseUrl <- "https://www.ncbi.nlm.nih.gov/blast/Blast.cgi"
116 |     query <- paste("QUERY=", URLencode(as.character(x)), "&DATABASE=",database,
117 |                    "&HITLIST_SIZE=",hitListSize,"&FILTER=",filter,
118 |                    "&EXPECT=",expect,"&PROGRAM=",program, sep="")
119 |     url0 <- sprintf("%s?%s&CMD=Put", baseUrl, query)
120 |     ## RCurl::getURL(url, followlocation=TRUE) has issues.
121 |     ## See getURL2() in R/query.R
122 |     post <- htmlParse(getURL2(url0))
123 |     
124 |     x <- post[['string(//comment()[contains(., "QBlastInfoBegin")])']]
125 |     rid <- sub(".*RID = ([[:alnum:]]+).*", "\\1", x)
126 |     rtoe <- as.integer(sub(".*RTOE = ([[:digit:]]+).*", "\\1", x))
127 |     result <- .tryParseResult(baseUrl, rid, rtoe, timeout)
128 |     PARSE(result)
129 | }
130 | 
131 | ## took 11.5 minutes to do a blast...  (ugh)
132 | 


--------------------------------------------------------------------------------
/R/homoData.R:
--------------------------------------------------------------------------------
  1 | ### homoData objects are used by homoPkgBuilder to represent homology data
  2 | 
  3 | setClass("homoData", representation(homoOrg = "character",
  4 |                                     homoLL = "numeric",
  5 |                                     homoType = "character",
  6 |                                     homoPS = "numeric",
  7 |                                     homoURL = "character",
  8 |                                     homoACC = "character",
  9 |                                     homoHGID = "numeric"))
 10 | 
 11 | # Set the get methods
 12 | setGeneric("homoOrg",
 13 |                function(object) standardGeneric("homoOrg"))
 14 | 
 15 | setMethod("homoOrg", "homoData",
 16 |           function(object) object@homoOrg)
 17 | 
 18 | setGeneric("homoLL",
 19 |                function(object) standardGeneric("homoLL"))
 20 | 
 21 | setMethod("homoLL", "homoData",
 22 |           function(object) object@homoLL)
 23 | 
 24 | setGeneric("homoType",
 25 |                function(object) standardGeneric("homoType"))
 26 | 
 27 | setMethod("homoType", "homoData",
 28 |           function(object) object@homoType)
 29 | 
 30 | setGeneric("homoPS",
 31 |                function(object) standardGeneric("homoPS"))
 32 | 
 33 | setMethod("homoPS", "homoData",
 34 |           function(object) object@homoPS)
 35 | 
 36 | setGeneric("homoURL",
 37 |                function(object) standardGeneric("homoURL"))
 38 | 
 39 | setMethod("homoURL", "homoData",
 40 |           function(object) object@homoURL)
 41 | 
 42 | setGeneric("homoACC",
 43 |                function(object) standardGeneric("homoACC"))
 44 | 
 45 | setMethod("homoACC", "homoData",
 46 |           function(object) object@homoACC)
 47 | 
 48 | setGeneric("homoHGID",
 49 |                function(object) standardGeneric("homoHGID"))
 50 | 
 51 | setMethod("homoHGID", "homoData",
 52 |           function(object) object@homoHGID)
 53 | 
 54 | setMethod("show", "homoData",
 55 |           function(object) {
 56 |               if(length(homoOrg(object)) > 0 && !is.na(homoOrg(object))){
 57 |                   cat(paste("homoOrg:", homoOrg(object)), fill = TRUE)
 58 |               }
 59 |               if(length(homoLL(object)) > 0 && !is.na(homoLL(object))){
 60 |                   cat(paste("\nhomoLL:", homoLL(object)), fill = TRUE)
 61 |               }
 62 |               if(length(homoHGID(object)) > 0 && !is.na(homoHGID(object))){
 63 |                   cat(paste("\nhomoHGID:", homoHGID(object)), fill = TRUE)
 64 |               }
 65 |               if(length(homoACC(object)) > 0 && !is.na(homoACC(object))){
 66 |                   cat(paste("\nhomoACC:", homoACC(object)), fill = TRUE)
 67 |               }
 68 |               if(length(homoType(object)) > 0 && !is.na(homoType(object))){
 69 |                   cat(paste("\nhomoType:", homoType(object)), fill = TRUE)
 70 |               }
 71 |               if(length(homoPS(object)) > 0 && !is.na(homoPS(object))){
 72 |                   cat(paste("\nhomoPS:", homoPS(object)), fill = TRUE)
 73 |               }
 74 |               if(length(homoURL(object)) > 0 && !is.na(homoURL(object))){
 75 |                   cat(paste("\nhomoURL:", homoURL(object)), fill = TRUE)
 76 |               }
 77 |               cat("\n")
 78 | })
 79 | 
 80 | mapOrgs <- function(toMap, what = c("code", "name")){
 81 |     fun <- function(x){
 82 |         if(what == "code"){
 83 |             return(orgs[[x]])
 84 |         }else{
 85 |             return(names(orgs[orgs == x]))
 86 |         }
 87 |     }
 88 |     what <- match.arg(what)
 89 |     orgs <- getOrgNameNCode()
 90 |     if(is.null(toMap) || is.na(toMap)){
 91 |          return(NA)
 92 |     }
 93 |     if(length(toMap) == 1){
 94 |         return(fun(toMap))
 95 |     }else{
 96 |         return(sapply(toMap, fun))
 97 |     }
 98 | }
 99 | 
100 | getOrgNameNCode <- function(){
101 |     return(list("3055" = "Chlamydomonas reinhardtii",
102 |              "3702" = "Arabidopsis thaliana",
103 |              "3847" = "Glycine max",
104 |              "3880" = "Medicago truncatula",
105 |              "4081" = "Lycopersicon esculentum",
106 |              "4513" = "Hordeum vulgare",
107 |              "4530" = "Oryza sativa",
108 |              "4565" = "Triticum aestivum",
109 |              "4577" = "Zea mays",
110 |              "4896" = "Schizosaccharomyces pombe",
111 |              "4932" = "Saccharomyces cerevisiae",
112 |              "5141" = "Neurospora crassa",
113 |              "5833" = "Plasmodium falciparum",
114 |              "6239" = "Caenorhabditis elegans",
115 |              "7165" = "Anopheles gambiae",
116 |              "7227" = "Drosophila melanogaster",
117 |              "7719" = "Ciona intestinalis",
118 |              "7955" = "Danio rerio",
119 |              "8022" = "Oncorhynchus mykiss",
120 |              "8090" = "Oryzias latipes",
121 |              "8355" = "Xenopus laevis",
122 |              "8364" = "Xenopus tropicalis",
123 |              "9031" = "Gallus gallus",
124 |              "9606" = "Homo sapiens",
125 |              "9615" = "Canis familiaris",
126 |              "9598" = "Pan troglodytes",
127 |              "9823" = "Sus scrofa",
128 |              "9913" = "Bos taurus",
129 |              "10090" = "Mus musculus",
130 |              "10116" = "Rattus norvegicus",
131 |              "28985" = "Kluyveromyces, lactis",
132 |              "29760" = "Vitis vinifera",
133 |              "33169" = "Eremothecium gossypii",
134 |              "44689" = "Dictyostelium discoideum",
135 |              "148305" = "Magnaporthe grisea"
136 |             ))
137 | }
138 | 
139 | 
140 | homoData <- function(organism, LL, type, PS, ACC, HGID, URL){
141 |     return(new("homoData", homoOrg = mapOrgs(organism),
142 |                    homoLL = LL, homoType = type,
143 |                    homoPS = PS, homoURL = URL,
144 |                    homoACC = ACC, homoHGID = HGID))
145 | }
146 | 


--------------------------------------------------------------------------------
/R/Amat.R:
--------------------------------------------------------------------------------
  1 | ##copyright 2004 R. Gentleman, all rights reserved
  2 | 
  3 | ##given the name of chip compute the pathway adjacency matrix for LLids
  4 | PWAmat = function(data) {
  5 |     if(!is.character(data) || length(data) != 1 )
  6 |         stop("wrong argument")
  7 | 
  8 |     if( length(grep("^org\\..+\\.sgd$", data))>=1 ){ 
  9 |          dataE = getAnnMap("PATH2ORF", data, load=TRUE, type="db")
 10 |     }
 11 |     else if (  length(grep("^org\\..+\\.eg$", data))>=1 ){
 12 |          dataE = getAnnMap("PATH2EG", data, load=TRUE, type="db")
 13 |     }
 14 |     else {
 15 |          dataE = getAnnMap("PATH2PROBE", data, load=TRUE, type=c("db"))
 16 |     }
 17 | 
 18 |     if( data == "YEAST" ||  length(grep("^org\\..+", data))>=1 )
 19 |         pathLL = as.list(dataE)
 20 |     else {
 21 |         pathLL = eapply(dataE, function(x) {
 22 |             x = x[!is.na(x)]
 23 |             if(length(x)>0){
 24 |                 LLs = getEG(x, data)
 25 |                 LLs = LLs[!is.na(LLs)]
 26 |                 unique(LLs) }
 27 |         })
 28 |     }
 29 |     uniqLL = unique(unlist(pathLL,use.names=FALSE))
 30 |     Amat = sapply(pathLL, function(x) {
 31 |         mtch = match(x, uniqLL)
 32 |         zeros = rep(0, length(uniqLL))
 33 |         zeros[mtch] = 1
 34 |         zeros})
 35 |     dimnames(Amat) = list(uniqLL, names(pathLL))
 36 |     return(Amat)
 37 | }
 38 | 
 39 | 
 40 | ##given the name of chip compute the PubMed adjacency matrix for probe set ids
 41 | PMIDAmat = function(pkg, gene=NULL) {
 42 |     if(!is.character(pkg) || length(pkg) != 1 )
 43 |         stop("wrong argument")
 44 | 
 45 |     probe2pmid <- get(paste(pkg, "PMID", sep=""))
 46 |     if(is.null(gene)){
 47 |         gene2pmid <- as.list(probe2pmid)
 48 |     }else{
 49 |         if(any(duplicated(gene))) warning("Gene is not unique.")
 50 |         gene2pmid <- mget(unique(gene), probe2pmid, ifnotfound=NA)
 51 |     }
 52 |     pmid <- unique(unlist(gene2pmid))
 53 | 
 54 |     Amat <- sapply(gene2pmid,
 55 |                    function(x){
 56 |                        mtch <- match(x, pmid)
 57 |                        zeros <- rep(0, length(pmid))
 58 |                        zeros[mtch] <- 1
 59 |                        return(zeros)
 60 |                    }
 61 |                    )
 62 |     dimnames(Amat) = list(pmid, names(gene2pmid))
 63 |     return(Amat)
 64 | }
 65 | 
 66 | 
 67 | ##given a GO term, and an exprset, produce a heatmap of all probes
 68 | ##mapped to that GOterm;
 69 | GO2heatmap = function(x, eset, data, ...) {
 70 |     if( missing(data) )
 71 |         data = annotation(eset)
 72 |     mapE = get(paste(data, "GO2ALLPROBES", sep=""))
 73 | 
 74 |     whG = mapE[[x]]
 75 |     ##need this because there could be multiple criteria
 76 |     whG = unique(whG)
 77 |     whGs = whG[whG %in% featureNames(eset)]
 78 | 
 79 |     dataM = exprs(eset)[whGs,]
 80 |     heatmap(dataM, ...)
 81 | }
 82 | 
 83 | GOmnplot = function (x, eset, data = "hgu133plus2", group, ...)
 84 | {
 85 |     mapE = get(paste(data, "GO2ALLPROBES", sep = ""))
 86 |     whG = mapE[[x]]
 87 |     whG = unique(whG)
 88 |     whGs = whG[whG %in% featureNames(eset)]
 89 |     dataM = exprs(eset)[whGs, ]
 90 |     tts = apply(dataM, 1, function(x) sapply(split(x, group), mean))
 91 |     rn = row.names(tts)
 92 |     if( length(levels(factor(group))) != 2 )
 93 |         stop("only works for factors with two levels")
 94 |     plot(tts[1,], tts[2,], xlab=rn[1], ylab=rn[2], ...)
 95 |     abline(a=0, b=1)
 96 |     return(tts)
 97 | }
 98 | 
 99 | setGeneric("KEGG2heatmap", function(x, eset, data, ...) 
100 |                              standardGeneric("KEGG2heatmap"))
101 | 
102 | setMethod("KEGG2heatmap", c("character", "eSet", "character"),
103 |    function(x, eset, data, ...) {   
104 |     if( missing(data) )
105 |        data = annotation(eset)
106 |     mapE = get(paste(data, "PATH2PROBE", sep = ""))
107 |     whG = mapE[[x]]
108 |     whG = unique(whG)
109 |     whGs = whG[whG %in% featureNames(eset)]
110 |     dataM = exprs(eset)[whGs, ] 
111 |     heatmap(dataM, ...)
112 | })
113 | 
114 | setMethod("KEGG2heatmap", c("character", "matrix", "character"),
115 |    function(x, eset, data, ...) {
116 |     mapE = get(paste(data, "PATH2PROBE", sep = ""))
117 |     whG = mapE[[x]] 
118 |     whG = unique(whG)
119 |     whGs = whG[whG %in% row.names(eset)]
120 |     dataM = eset[whGs, ]
121 |     heatmap(dataM, ...)
122 | })
123 | 
124 | 
125 | setGeneric("KEGGmnplot", function(x, eset, data= "hgu133plus2", group,
126 |                                ...) standardGeneric("KEGGmnplot"))
127 | 
128 | setMethod("KEGGmnplot",  c("character", "eSet", "character"),
129 |     function (x, eset, data = "hgu133plus2", group, ...)  {
130 |       mapE = get(paste(data, "PATH2PROBE", sep = ""))
131 |       whG = mapE[[x]]
132 |       whG = unique(whG)
133 |       whGs = whG[whG %in% featureNames(eset)]
134 |       dataM = exprs(eset)[whGs, ]
135 |       tts = apply(dataM, 1, function(x) sapply(split(x, group), mean))
136 |       rn = row.names(tts)
137 |       if( length(levels(factor(group))) != 2 )
138 |           stop("only works for factors with two levels")
139 |       plot(tts[1,], tts[2,], xlab=rn[1], ylab=rn[2], ...)
140 |       abline(a=0, b=1)
141 |       return(tts)
142 |   })
143 | 
144 | setMethod("KEGGmnplot",  c("character", "matrix", "character"),
145 |     function (x, eset, data = "hgu133plus2", group, ...)  {
146 |       mapE = get(paste(data, "PATH2PROBE", sep = ""))
147 |       whG = mapE[[x]]
148 |       whG = unique(whG)
149 |       whGs = whG[whG %in% row.names(eset)]
150 |       dataM = eset[whGs, ]
151 |       tts = apply(dataM, 1, function(x) sapply(split(x, group), mean))
152 |       rn = row.names(tts)
153 |       if( length(levels(factor(group))) != 2 )
154 |           stop("only works for factors with two levels")
155 |       plot(tts[1,], tts[2,], xlab=rn[1], ylab=rn[2], ...)
156 |       abline(a=0, b=1)
157 |       return(tts)
158 |   })
159 | 
160 | 
161 | 


--------------------------------------------------------------------------------
/R/isValidKey.R:
--------------------------------------------------------------------------------
  1 | ##Helper function for schema checking:
  2 | .defineBaseSelectSQL <- function(schema, conn){  
  3 |   ##schema <- dbmeta(conn, "DBSCHEMA")
  4 |   ##centralID <- dbmeta(conn, "CENTRALID")
  5 |     if(schema == "YEAST_DB"){
  6 |         sql <- "select distinct systematic_name from sgd where systematic_name != 'NA';"
  7 |     }else if(length(grep("CHIP_DB$", schema))==1 ){  #All chip packages have a probes table with probe_ids
  8 |         sql <- "select distinct probe_id from probes;"
  9 |     }else if(length(grep("NOSCHEMA", schema))==1 ){ ## NOSCHEMA can have weird columns
 10 |         toget <- dbListFields(conn, "genes")[2]
 11 |         sql <- paste("select distinct", toget, "from genes;")
 12 |     }else if(length(grep("_DB$", schema))==1 && length(grep("CHIP_DB$", schema))==0){
 13 |         sql <- "select distinct gene_id from genes;"
 14 |     }else{
 15 |         stop("Unidentified database schema.  Cannot find central table.  May need to add schema options to isValidKey().")
 16 |     }    
 17 |     return(sql)
 18 | }
 19 | 
 20 | ##Given a list of IDs and a package, are these IDs valid primary IDs for this package?
 21 | setMethod("isValidKey", c("character", "character"),
 22 |           function(ids, pkg){
 23 |     ##argument checking
 24 |     if(!is.character(ids)) stop("'ids' must be a character vector of IDs that you wish to validate")    
 25 |     ##access the DB, get the primary IDs, and then test if they are in your list of ids
 26 |     require(paste(pkg, ".db",sep=""),character.only = TRUE)
 27 |     conn <- do.call(paste(pkg, "_dbconn", sep=""), list())    
 28 |     schema <- dbmeta(conn, "DBSCHEMA")
 29 |     sql <- .defineBaseSelectSQL(schema, conn)
 30 |     res <- dbGetQuery(conn, sql)
 31 |     res <- as.vector(res[,1])#slice to grab result which will always be a single column (based on the sql queries)
 32 |     return(ids %in% res)
 33 | })
 34 | 
 35 | setMethod("isValidKey", c("character","OrgDb"),
 36 |           function(ids, pkg){
 37 |     conn <- dbconn(pkg)
 38 |     schema <- dbmeta(conn, "DBSCHEMA")
 39 |     sql <- .defineBaseSelectSQL(schema, conn)
 40 |     res <- dbGetQuery(conn, sql)
 41 |     res <- as.vector(res[,1])
 42 |     return(ids %in% res)
 43 | })
 44 | 
 45 | 
 46 | ##Given a package, what are all the unique valid primary IDs for this package?
 47 | setMethod("allValidKeys", "character",
 48 |           function(pkg){
 49 |     ##access the DB and get all the primary IDs, (unique constraint already on the field being sought)
 50 |     require(paste(pkg, ".db",sep=""),character.only = TRUE)
 51 |     conn <- do.call(paste(pkg, "_dbconn", sep=""), list())
 52 |     schema <- dbmeta(conn, "DBSCHEMA")
 53 |     sql <- .defineBaseSelectSQL(schema, conn)    
 54 |     res <- dbGetQuery(conn, sql)
 55 |     res <- as.vector(res[,1])#slice to grab result which will always be a single column (based on the sql queries)
 56 |     return(res)
 57 | })
 58 | 
 59 | setMethod("allValidKeys", "OrgDb",
 60 |           function(pkg){
 61 |     conn <- dbconn(pkg)
 62 |     schema <- dbmeta(conn, "DBSCHEMA")
 63 |     sql <- .defineBaseSelectSQL(schema, conn)    
 64 |     res <- dbGetQuery(conn, sql)
 65 |     res <- as.vector(res[,1])#slice to grab result which will always be a single column (based on the sql queries)
 66 |     return(res)
 67 | })
 68 | 
 69 | 
 70 | ##Given a list of gene symbols, return the primary ID (or probe if its a chip package) that should be used.
 71 | ##If there was a symbol or ID in the original list that we don't have a better ID for, keep the original symbol...
 72 | ##Because of the many to one nature of probes to genes, it will NOT be possible to support CHIP packages with this function.
 73 | updateSymbolsToValidKeys = function(symbols, pkg) {
 74 |     #argument checking
 75 |     if(!is.character(symbols)) stop("'symbols' must be a character vector of gene symbols that you wish to translate to the primary ID of the package")
 76 |     require(paste(pkg, ".db",sep=""),character.only = TRUE)
 77 | 
 78 |     ##Check the schema
 79 |     conn <- do.call(paste(pkg, "_dbconn", sep=""), list())
 80 |     schema <- dbmeta(conn, "DBSCHEMA")
 81 | 
 82 |     ##'pkg' cannot be a chip package.
 83 |     if(length(grep("CHIP_DB$", schema))>=1){
 84 |         stop("Because of the many to many relationship that can exist between probes and IDs, this function can only work with the organism level packages which can ensure that there is only one most valid ID per gene symbol.")
 85 |     }    
 86 |     
 87 |     ##Do the right thing depending on what type of package this is.
 88 |     if(length(grep("^YEAST", schema))>=1){
 89 |         ##if its yeast...
 90 |         rr1 = mappedRkeys(eval(parse(text=paste(pkg, "ALIAS", sep=""))))
 91 |         r2 = revmap(eval(parse(text=paste(pkg, "ALIAS", sep=""))))
 92 |     }else if(length(grep("^ARABIDOPSIS", schema))>=1){  
 93 |         stop("Sorry, but the Arabidopsis packages do not have alias information at this time.")
 94 |     }else if(length(grep("^MALARIA", schema))>=1){#MALARIA packages are not entrez gene based
 95 |         r2 = eval(parse(text=paste(pkg, "ALIAS2ORF", sep="")))
 96 |         rr1 = mappedRkeys(revmap(eval(parse(text=paste(pkg, "ALIAS2ORF", sep="")))))        
 97 |     }else{  #so far everything other than yeast and malaria should have reversed alias map and eg base
 98 |         ##so if its something other than yeast we need to do this...
 99 |         r2 = eval(parse(text=paste(pkg, "ALIAS2EG", sep="")))
100 |         rr1 = mappedRkeys(revmap(eval(parse(text=paste(pkg, "ALIAS2EG", sep="")))))        
101 |     }
102 | 
103 |     mA = match(symbols, rr1)
104 |     wh = rr1[mA[!is.na(mA)]]
105 |     
106 |     mB = unlist(mget(wh, r2))  
107 |     symbols[match(names(mB), symbols)] = mB
108 |     return(symbols)
109 | }
110 | 
111 | 
112 | 
113 | ## ##TEST examples:
114 | ## fu <- c("15S_rRNA_2","21S_rRNA_4","15S_rRNA")
115 | ## isValidKey(fu, "org.Sc.sgd")
116 | ## updateSymbolsToValidKeys(fu, "org.Sc.sgd")
117 | 
118 | ## sna <- c("1769325_at","altSymbol")
119 | ## isValidKey(sna, "yeast2")
120 | 
121 | ## bar <- c("MAPK11","P38B","FLJ45465", "altSymbol")
122 | ## isValidKey(bar, "org.Hs.eg")
123 | ## updateSymbolsToValidKeys(bar, "org.Hs.eg")
124 | 
125 | ## foo <- c("1396.pre-tRNA-Met-1", "1396.t00553", "altSymbol")
126 | ## updateSymbolsToValidKeys(foo, "org.Pf.plasmo")
127 | ## isValidKey(foo, "org.Pf.plasmo")
128 | 


--------------------------------------------------------------------------------
/man/chrCats.Rd:
--------------------------------------------------------------------------------
  1 | \name{chrCats}
  2 | \alias{chrCats}
  3 | 
  4 | \alias{createLLChrCats}
  5 | \alias{createMAPIncMat}
  6 | 
  7 | \title{Returns a list of chromosome locations from a MAP environment}
  8 | \description{
  9 |   The \code{chrCats} function takes a data package that contains a \code{MAP} environment
 10 |   and returns a list that contains the locations for each gene (from the
 11 |   chromosome number to more specific locations if they're available).  For
 12 |   example, the \code{hgu95av2MAP} environment gives the location, 14q22-q23, for
 13 |   Affymetrix identifier: \code{1114_at}. This function will return a list with
 14 |   one named element for \code{1114_at} and the values it will contain are 14,
 15 |   14q, 14q2, 14q22, and 14q23 since the Affy id is located at each of those
 16 |   chromosome locations.
 17 | }
 18 | \usage{
 19 | chrCats(data)
 20 | createMAPIncMat(data)
 21 | createLLChrCats(data)
 22 | }
 23 | \arguments{
 24 |   \item{data}{the data package (a character string)}
 25 | }
 26 | \details{
 27 |   This function does a lot of string manipulation and there are a few known
 28 |   errors so I want to discuss them here in case someone else would like to
 29 |   improve on this function.
 30 | 
 31 |   The first thing, \code{chrCats}, does is only allow one location for each
 32 |   Affymetrix identifier.  If the \code{MAP} environment has more than one
 33 |   location for an Affy id, then the first location is taken.  Currently, the
 34 |   \code{hgu95av2MAP} environment has only 9 Affy ids (out of 12625) that have more
 35 |   than one location and the \code{hgu133aMAP} environment has only 16 Affy ids (out
 36 |   of 22283) that have more than one location so this does not affect many
 37 |   identifiers. 
 38 | 
 39 |   Next any spaces are removed from each location as several locations have
 40 |   leading spaces.
 41 | 
 42 |   Then a \code{for} loop (which is not efficient!) is used to look at each location
 43 |   individually and make a list that will be returned.  A few particular
 44 |   strings are looked for in each location and these include \code{'|'} and \code{'-'}.
 45 | 
 46 |   Locations that include \code{'|'} in the string are split based on the \code{'|'} as
 47 |   though it represents OR.  For example, for Affy id, \code{32273_at}, in \code{hgu95av2MAP}
 48 |   the location is given as 5q33|5q31.1 and this function assumes this means
 49 |   5q33 or 5q31.1 so it will return the values 5, 5q, 5q3, 5q33, 5q31, and
 50 |   5q31.1 for this Affy id.
 51 | 
 52 |   The \code{'-'} character is assumed to mean BETWEEN.  For example, for Affy id,
 53 |   \code{1138_at}, in \code{hgu95av2MAP} the location is given as 2q11-q14 and this function
 54 |   assumes this means the location is somewhere between 2q11 and 2q14 so it
 55 |   will return the values 2, 2q, 2q1, 2q11, 2q12, 2q13, and 2q14 for this Affy
 56 |   id.
 57 | 
 58 |   Now here is the first problem with this function.  I do not know how to
 59 |   handle the \code{'-'} when the two strings are not of equal length.  For example,
 60 |   for Affy id, \code{36779_at}, in \code{hgu95av2MAP} the location is given as 5q33.3-q34,
 61 |   but I do not know how to treat this BETWEEN because I do not know how many
 62 |   sub-bands there are between 5q33.3 and 5q34.  Is there a 5q33.4 or 5q33.5,
 63 |   etc.?  I'm not sure.  So I treat this \code{'-'} as an \code{'|'}.  This function will
 64 |   return the values 5, 5q, 5q3, 5q33, 5q33.3, and 5q34 for this Affy id and
 65 |   most likely, that is incorrect.  
 66 | 
 67 |   Another problem I have with the \code{'-'} occurs when all of the characters up
 68 |   until the last character do not match.  For example, for Affy id,
 69 |   \code{38927_i_at}, in \code{hgu95av2MAP} the location is given as 11q14-q21, but again
 70 |   I'm not sure how to treat this BETWEEN because I don't know the number of
 71 |   sub-bands between 11q14 and 11q21.  Does 11q15 exist, etc.?  So I again
 72 |   treat this \code{'-'} as an \code{'|'}.  This function will return the values 11, 11q,
 73 |   11q1, 11q14, 11q2, and 11q21 for this Affy id and this is probably
 74 |   incorrect. 
 75 |  
 76 |   The problem with \code{'-'} also occurs when the location is something like
 77 |   19cen-q13.1 for Affy id, \code{34670_at}, in \code{hgu95av2MAP}.  Again I don't know the
 78 |   number of sub-bands between 19cen and 19q13.1 so I treat this BETWEEN as an
 79 |   OR.
 80 | 
 81 |   Another problem I have with \code{'cen'} in the location is that sometimes the
 82 |   location looks like: 19p13.2-cen and very rarely it looks like:
 83 |   5p13.1-5cen.  In the second case, the chromosome number is included after
 84 |   the \code{'-'} and before the \code{'cen'}.  This only occurs with the location
 85 |   5p13.1-5cen in both \code{hgu95av2MAP} and \code{hgu133aMAP} and all other locations do
 86 |   not include the chromosome number after the \code{'-'}.  Currently this function
 87 |   returns the wrong information for that one location.  It will return the
 88 |   values 5, 5p, 5p1, 5p13, 5p13.1, 5p5,and 5p5cen, but it should return 5, 5p,
 89 |   5p1, 5p13, 5p13.1, and 5cen so this one location is an error.  All other
 90 |   locations that include \code{'cen'} are correct.  For example, this function
 91 |   returns the values 19, 19p, 19p1, 19p13, 19p13.2, and 19cen for the location
 92 |   19p13.2-cen. 
 93 | 
 94 |   This function is very slow because it contains \code{for} loops and thus, it would
 95 |   be useful to make it more efficient.  Also, it would be nice at some point
 96 |   for someone with more knowledge on chromosome location figure out how to
 97 |   improve some of my string manipulation errors.
 98 | 
 99 |   \code{createLLChrCats} is a wrapper that converts probe IDs to Entrez
100 |   Gene IDs.
101 | 
102 |   \code{createMAPIncMat} is a wrapper that calls \code{createLLChrCats}
103 |   and then returns an incidence matrix with rows being the categories
104 |   and cols the Entrez Gene IDs.
105 | }
106 | \value{
107 |   A named list with an element for each Affy id.  The name will be the Affy id
108 |   and the values will be the locations for that Affy id.  If the Affy id had a
109 |   location of \code{NA} in the \code{MAP} environment, then a list element is not returned
110 |   for that Affy id. 
111 | }
112 | \author{Elizabeth Whalen}
113 | \examples{
114 |   library("hgu95av2.db")
115 |   mapValues <- chrCats("hgu95av2")
116 | }
117 | \keyword{data}
118 | 
119 | 


--------------------------------------------------------------------------------
/R/GOhelpers.R:
--------------------------------------------------------------------------------
  1 | ##Copyright R. Gentleman, 2004
  2 | ##simple functions to get Evidence codes
  3 | 
  4 | .isMissingGOEntry <- function(x) (length(x) == 1L && is.na(x))
  5 | 
  6 | ##get then GO term names for a particular (sub)ontology
  7 | getOntology = function(inlist, ontology=c("MF", "BP", "CC")) {
  8 |    which = match.arg(ontology)
  9 |    onts = sapply(inlist, function(z) {
 10 |        if (!.isMissingGOEntry(z))
 11 |          z$Ontology
 12 |        else
 13 |          z
 14 |        })
 15 |    onts = onts[!is.na(onts)]
 16 |    unique(names(inlist[onts %in% which]))
 17 | }
 18 | 
 19 | 
 20 | ##get GO evidence codes
 21 | getEvidence = function(inlist) {
 22 |     ans <- sapply(inlist, function(z) {
 23 |          if (!.isMissingGOEntry(z))
 24 |            z$Evidence
 25 |          else
 26 |            z
 27 |      })
 28 |     ans[!is.na(ans)]
 29 | }
 30 | 
 31 | 
 32 | ##drop a specified set of evidence codes
 33 | dropECode = function(inlist, code = "IEA") {
 34 |     hasCode = sapply(inlist, function(z) {
 35 |         if (!.isMissingGOEntry(z))
 36 |           z$Evidence
 37 |         else
 38 |           z
 39 |         })
 40 |     hasCode <- hasCode[!is.na(hasCode)]
 41 |     badVals = hasCode %in% code
 42 |     inlist[!badVals]
 43 | }
 44 | 
 45 | 
 46 | ## helper function, determines if there is a GO annotation for the
 47 | ## desired mode
 48 | hasGOannote <- function(x, which="MF") {
 49 |     if (is(x, "GOTerms")) {
 50 |         cat <- Ontology(x)
 51 |         if (!is.na(cat) && cat == which)
 52 |           return(TRUE) else return(FALSE)
 53 |     }
 54 |     if (is.list(x)) {
 55 |         gT <- sapply(x, function(y) is(y, "GOTerms"))
 56 |         if (any(gT)) {
 57 |             if (all(gT)) {
 58 |                 cats <- sapply(x, Ontology)
 59 |                 return(cats == which)
 60 |             }
 61 |             else
 62 |               stop("mixed arguments not allowed")
 63 |         }
 64 |     }
 65 |     if (!is.character(x))
 66 |       stop("wrong argument")
 67 |     tm <- getGOOntology(x)
 68 |     return(tm == which)
 69 | }
 70 | 
 71 | 
 72 | ##three functions to get all the GO information for a set of GO terms
 73 | ##FIXME: these need to be renovated - probably removed even..
 74 |  getGOOntology <- function(x) {
 75 |      if( !is.character(x) )
 76 |          stop("need a character argument")
 77 |      if(length(x) == 0 )
 78 |          return( character(0))
 79 |      loadNamespace("GO.db")
 80 |      wh <- mget(x, envir=GO.db::GOTERM, ifnotfound=NA)
 81 |      return( sapply(wh, Ontology) )
 82 |  }
 83 | 
 84 |  getGOParents <- function(x) {
 85 |      if( !is.character(x) )
 86 |          stop("need a character argument")
 87 |      if(length(x) == 0 )
 88 |          return(list())
 89 |      loadNamespace("GO.db")
 90 |      MF_parents <- mget(x, envir=GO.db::GOMFPARENTS, ifnotfound=NA)
 91 |      BP_parents <- mget(x, envir=GO.db::GOBPPARENTS, ifnotfound=NA)
 92 |      CC_parents <- mget(x, envir=GO.db::GOCCPARENTS, ifnotfound=NA)
 93 |      lapply(setNames(seq_along(x), x),
 94 |          function(i) {
 95 |              xi_parents <- MF_parents[[i]]
 96 |              if (!identical(xi_parents, NA))
 97 |                  return(list(Ontology="MF", Parents=xi_parents))
 98 |              xi_parents <- BP_parents[[i]]
 99 |              if (!identical(xi_parents, NA))
100 |                  return(list(Ontology="BP", Parents=xi_parents))
101 |              xi_parents <- CC_parents[[i]]
102 |              if (!identical(xi_parents, NA))
103 |                  return(list(Ontology="CC", Parents=xi_parents))
104 |              stop(paste(x[[i]], "is not a member of any ontology"))
105 |          }
106 |      )
107 |  }
108 | 
109 |  getGOChildren <- function(x) {
110 |      if( !is.character(x) )
111 |          stop("need a character argument")
112 |      if(length(x) == 0 )
113 |          return(list())
114 |      loadNamespace("GO.db")
115 |      MF_children <- mget(x, envir=GO.db::GOMFCHILDREN, ifnotfound=NA)
116 |      BP_children <- mget(x, envir=GO.db::GOBPCHILDREN, ifnotfound=NA)
117 |      CC_children <- mget(x, envir=GO.db::GOCCCHILDREN, ifnotfound=NA)
118 |      lapply(setNames(seq_along(x), x),
119 |          function(i) {
120 |              xi_children <- MF_children[[i]]
121 |              if (!identical(xi_children, NA))
122 |                  return(list(Ontology="MF", Children=xi_children))
123 |              xi_children <- BP_children[[i]]
124 |              if (!identical(xi_children, NA))
125 |                  return(list(Ontology="BP", Children=xi_children))
126 |              xi_children <- CC_children[[i]]
127 |              if (!identical(xi_children, NA))
128 |                  return(list(Ontology="CC", Children=xi_children))
129 |              list()  # not an error (unlike for getGOParents() above)
130 |          }
131 |      )
132 |  }
133 | 
134 |  getGOTerm <- function(x) {
135 |      if( !is.character(x) )
136 |          stop("need a character argument")
137 |      if(length(x) == 0 )
138 |          return(list())
139 |      loadNamespace("GO.db")
140 |      terms <- mget(x, envir=GO.db::GOTERM, ifnotfound=NA)
141 |      isNA = sapply(terms,function(x) !(isS4(x) && is(x, "GOTerms")))
142 |      if( any(isNA) )
143 |          terms = terms[!isNA]
144 | 
145 |      ontology <- sapply(terms, Ontology)
146 |      terms = sapply(terms, Term)
147 |      return(split(terms, ontology))
148 |  }
149 | 
150 | 
151 | filterGOByOntology <- function(goids, ontology=c("BP", "CC", "MF")) {
152 |     ontology <- match.arg(ontology)
153 |     eName <- switch(ontology,
154 |                     BP="GOBPPARENTS",
155 |                     CC="GOCCPARENTS",
156 |                     MF="GOMFPARENTS",
157 |                     stop("invalid ontology ", ontology))
158 |     e <- get(eName)
159 |     goids %in% ls(e)
160 | }
161 | 
162 | aqListGOIDs <- function(ont) {
163 |     ## Return all GO IDs in the specified ontologies
164 |     ont <- unique(ont)
165 |     knownOnts <- c("BP", "CC", "MF")
166 |     badOnt <- ont[!(ont %in% knownOnts)]
167 |     if (length(badOnt))
168 |       stop("Unknown ontology codes: ", paste(badOnt, collapse=", "),
169 |            "\nvalid codes are: ", paste(knownOnts, collapse=", "))
170 |     ## determine size
171 |     lens <- integer(length(ont))
172 |     for (i in seq(along=ont))
173 |       lens[i] <- length(getAnnMap(paste(ont[i], "PARENTS", sep=""),
174 |                                   chip="GO"))
175 |     ## retrieve IDs
176 |     ans <- character(sum(lens))
177 |     lens <- c(0L, lens)
178 |     for (i in seq(along=ont)) {
179 |         ans[lens[i]+1:lens[i+1]] <- ls(getAnnMap(paste(ont[i], "PARENTS", sep=""),
180 |                                                chip="GO"))
181 |     }
182 |     ans
183 | }
184 | 


--------------------------------------------------------------------------------
/R/findNeighbors.R:
--------------------------------------------------------------------------------
  1 | findNeighbors <- function(chrLoc, llID, chromosome, upBase, downBase,
  2 |                           mergeOrNot = TRUE){
  3 | 
  4 |     require(chrLoc, character.only = TRUE) ||
  5 |                            stop(paste("Chromomosome location chrLoc",
  6 |                                       "is not available on the system",
  7 |                                       "Either build one or get one from",
  8 |                                       "BioConductor"))
  9 | 
 10 |     if(checkArgs(llID, chromosome, upBase, downBase) == "swap"){
 11 |         temp <- upBase
 12 |         upBase <- downBase
 13 |         downBase <- temp
 14 |     }
 15 |     upBase <- as.numeric(ifelse(missing(upBase), 0, upBase))
 16 |     downBase <- as.numeric(ifelse(missing(downBase), 0, downBase))
 17 |     if(missing(chromosome)){
 18 |         chromosome <- findChr4LL(llID, get(paste(chrLoc,
 19 |                                                  "LOCUSID2CHR", sep = "")),
 20 |                                 gsub("CHRLOC", "", chrLoc))
 21 |     }
 22 |     if(!missing(llID)){
 23 |         # Find the location for the target gene
 24 |         location <- as.numeric(get(llID, get(paste(chrLoc,
 25 |                                       chromosome, "START", sep = ""))))
 26 |     }else{
 27 |         location <- (downBase - upBase)/2
 28 |     }
 29 |     upperB <- getBoundary(location, upBase, TRUE)
 30 |     downB <- getBoundary(location, downBase, FALSE)
 31 |     neighbors <- list()
 32 |     # There may be chances that a llID be mapped to genes on different CHR
 33 |     for(i in chromosome){
 34 |         start <- unlist(contents(get(paste(chrLoc, chromosome,
 35 |                                            "START", sep=""))),
 36 |                         use.names=TRUE)
 37 | 
 38 |         end <- unlist(contents(get(paste(chrLoc, chromosome,
 39 |                                               "END", sep=""))),
 40 |                       use.names=TRUE)
 41 | 
 42 |         if(!missing(llID)){
 43 |             # greb the ones in the range
 44 |             foundUp <- weightByConfi(start[start > upperB &
 45 |                                            start < min(location)])
 46 |             foundDown <- weightByConfi(end[end < downB &
 47 |                                            end > max(location)])
 48 |             if(length(foundUp) != 0 || length(foundDown) != 0){
 49 |                 if(mergeOrNot){
 50 |                     neighbors[[as.character(i)]] <- unique(c(foundUp,
 51 |                                                              foundDown))
 52 |                 }else{
 53 |                     neighbors[[as.character(i)]] <-
 54 |                         list(upstream = foundUp, downstream = foundDown)
 55 |                 }
 56 |             }
 57 |         }else{
 58 |             found <- weightByConfi(c(start[start >= upperB &
 59 |                                            start <= location],
 60 |                                      end[end <= downB & end >= location]))
 61 |             if(length(found) != 0){
 62 |                  neighbors[[as.character(i)]] <- unique(found)
 63 |             }
 64 |         }
 65 |     }
 66 | 
 67 |     if(length(neighbors) == 0){
 68 |         warning("No Genes in the defined region satisfy the condition")
 69 |     }else{
 70 |         return(neighbors)
 71 |     }
 72 | }
 73 | 
 74 | checkArgs <- function(llID, chromosome, upBase, downBase){
 75 |     # llID is not required if search for genes within a range
 76 |     if(missing(llID)){
 77 |         # Both upBase, downBase, and chromosome must be there if
 78 |         # llID is missing
 79 |         if(any(missing(upBase), missing(downBase), missing(chromosome))){
 80 |             stop(paste("Search can not be conducted with llID and",
 81 |                        "at least one of upBase, downBase and ",
 82 |                        "chromosome missing"))
 83 |         }else{
 84 |             if(as.numeric(upBase) < as.numeric(downBase)){
 85 |                 warning(paste("upBase value is smaller then downBase",
 86 |                               "value. Values have been swapped"))
 87 |                 return("swap")
 88 |             }
 89 |             if(as.numeric(upBase) == as.numeric(downBase)){
 90 |                 stop("upBase and downBase can not be the same")
 91 |             }
 92 |         }
 93 |     }else{
 94 |         if(missing(upBase)){
 95 |             warning(paste("upBase is missing. Search will be",
 96 |                           "conducted for genes downstream only"))
 97 |         }
 98 |         if(missing(downBase)){
 99 |             warning(paste("downBase is missing. Search will be",
100 |                           "conducted for genes upstream only"))
101 |         }
102 |     }
103 |     return("OK")
104 | }
105 | 
106 | findChr4LL <- function(llID, chrEnv, organism){
107 |     options(show.error.message = FALSE)
108 |     chr <- try(chrEnv[[llID]])
109 |     options(show.error.message = TRUE)
110 |     if(inherits(chr, "try-error")){
111 |         stop(paste("Entrez Gene ID", llID, "could not be found in any",
112 |                    "of the chromosomes in the data package"))
113 |     }else{
114 |         if(length(chr) == 1){
115 |             if(!is.element(chr, getValidChr(organism))){
116 |                 warning(paste("Entrez Gene ID", llID, "is currently",
117 |                            "not known to be associated with any",
118 |                            "chromosome"))
119 |             }
120 |             return(chr)
121 |         }else{
122 |             chr <- chr[is.element(chr, getValidChr(organism))]
123 |             return(unique(chr))
124 |         }
125 |     }
126 | }
127 | 
128 | 
129 | getValidChr <- function(organism){
130 |     switch(toupper(organism),
131 |            HUMAN = return(c(1:22, "X", "Y")),
132 |            MOUSE = return(c(1:19, "X", "Y")),
133 |            RAT = return(c(1:20, "X", "Y")),
134 |            stop(paste("Unknow organism", organism)))
135 | }
136 | 
137 | getBoundary <- function(loc, base, lower = TRUE){
138 |     if(as.numeric(loc[1]) == 0){
139 |         return(base)
140 |     }else{
141 |         if(lower){
142 |             boundary <- as.numeric(loc[1]) - base
143 |             if(boundary < 0){
144 |                 return(0)
145 |             }else{
146 |                 return(boundary)
147 |             }
148 |         }else{
149 |             return(as.numeric(loc[1]) + base)
150 |         }
151 |     }
152 | }
153 | 
154 | weightByConfi <- function(foundLLs){
155 |     if(length(foundLLs) != 0){
156 |         temp <- unique(names(foundLLs))
157 |         foundLLs <- gsub("(^.*)\\..*", "\\1", temp)
158 |         names(foundLLs) <- gsub("^.*\\.(.*)", "\\1", temp)
159 |         # Remove LLs named Unconfident if one named Confident exists
160 |         if(any(duplicated(foundLLs))){
161 |             foundLLs <- c(foundLLs[names(foundLLs) == "Confident"],
162 |                           foundLLs[names(foundLLs) != "Confident"])
163 |             foundLLs <- foundLLs[!duplicated(foundLLs)]
164 |         }
165 | 
166 |         return(foundLLs)
167 |     }else{
168 |         return("")
169 |     }
170 | }
171 | 


--------------------------------------------------------------------------------
/R/pubMedAbst.R:
--------------------------------------------------------------------------------
  1 |     ## Define the class structure of the pubMedAbst object
  2 |     setGeneric("pubMedAbst", function(object)
  3 |                standardGeneric("pubMedAbst"))
  4 | 
  5 |     setClass("pubMedAbst",
  6 |              representation(pmid="character", authors="vector", abstText="character",
  7 |              articleTitle="character", journal="character",
  8 |              pubDate="character"))
  9 | 
 10 |     setMethod("show", "pubMedAbst", function(object) {
 11 |       s <- c("An object of class 'pubMedAbst':",
 12 |           paste("Title:  ", articleTitle(object)),
 13 |           paste("PMID:   ", pmid(object)),
 14 |           paste("Authors:", paste(authors(object), collapse=", ")),
 15 |           paste("Journal:", journal(object)), 
 16 |           paste("Date:   ", pubDate(object)))
 17 |       cat(strwrap(s, exdent=5), sep="\n")
 18 |     })
 19 | 
 20 |     ## Define generics
 21 |     if (is.null(getGeneric("authors")))
 22 |         setGeneric("authors", function(object)
 23 |                    standardGeneric("authors"))
 24 | 
 25 |     if (is.null(getGeneric("abstText")))
 26 |         setGeneric("abstText", function(object)
 27 |                    standardGeneric("abstText"))
 28 | 
 29 |     if (is.null(getGeneric("articleTitle")))
 30 |         setGeneric("articleTitle", function(object)
 31 |                    standardGeneric("articleTitle"))
 32 | 
 33 |     if (is.null(getGeneric("journal")))
 34 |         setGeneric("journal", function(object)
 35 |                    standardGeneric("journal"))
 36 | 
 37 |     if (is.null(getGeneric("pubDate")))
 38 |         setGeneric("pubDate", function(object)
 39 |                    standardGeneric("pubDate"))
 40 | 
 41 |     if (is.null(getGeneric("pmid")))
 42 |         setGeneric("pmid", function(object)
 43 |                    standardGeneric("pmid"))
 44 | 
 45 | ## Methods
 46 |     setMethod("authors", "pubMedAbst", function(object)
 47 |               object@authors)
 48 |     setMethod("abstText", "pubMedAbst", function(object)
 49 |               object@abstText)
 50 |     setMethod("articleTitle", "pubMedAbst", function(object)
 51 |               object@articleTitle)
 52 |     setMethod("journal", "pubMedAbst", function(object)
 53 |               object@journal)
 54 |     setMethod("pubDate", "pubMedAbst", function(object)
 55 |               object@pubDate)
 56 |     setMethod("pmid", "pubMedAbst", function(object)
 57 |               object@pmid)
 58 | 
 59 | buildPubMedAbst <- function(xml) {
 60 |     ## Passed in a XML tree detailing a single article
 61 |     ## will parse the XML and create a new class
 62 | 
 63 |     xmlMedline <- xml["MedlineCitation"][[1]]
 64 |     xmlArticle <- xmlMedline["Article"]
 65 | 
 66 |     ## Disable error messages, and wrap potential error causers
 67 |     ## w/ trys
 68 |     options(show.error.messages = FALSE)
 69 |     on.exit(options(show.error.messages=TRUE))
 70 | 
 71 |     ## Get the PMID
 72 |     pmid <- xmlMedline["PMID"][[1]]
 73 |     pmid <- try(as.character(xmlChildren(pmid)$text)[6])
 74 |     if (inherits(pmid,"try-error") == TRUE) {
 75 |         pmid <- "No PMID Provided"
 76 |     }
 77 | 
 78 |     ## Retrieve Article Title
 79 |     articleTitle <- xmlArticle[[1]][["ArticleTitle"]]
 80 |     articleTitle <-
 81 |     try(as.character(xmlChildren(articleTitle)$text)[6])
 82 |     if (inherits(articleTitle,"try-error") == TRUE) {
 83 |         articleTitle <- "No Title Provided"
 84 |     }
 85 | 
 86 |     ## Retrieve the abstract
 87 |     abstText <- xmlArticle[[1]]["Abstract"][[1]]["AbstractText"]
 88 |     abstText <- try(as.character(xmlChildren(abstText[[1]])$text)[6])
 89 |    if (inherits(abstText,"try-error") == TRUE) {
 90 |        abstText <- "No Abstract Provided"
 91 |    }
 92 | 
 93 |     ## Retrieve the date - get the year/month separately and then
 94 |     ## join them at the end.  If no month or year provided, subst
 95 |     ## "MontH" and "Year" respectively
 96 |     pubDateBase <-
 97 |         xmlArticle[[1]]["Journal"][[1]]["JournalIssue"][[1]]["PubDate"]
 98 |     pubDateMonth <- pubDateBase[[1]]["Month"]
 99 |     pubDateMonth <-
100 |         try(as.character(xmlChildren(pubDateMonth[[1]])$text)[6])
101 |     if (inherits(pubDateMonth,"try-error") == TRUE) {
102 |         pubDateMonth <- "Month"
103 |     }
104 |     pubDateYear <- pubDateBase[[1]]["Year"]
105 |     pubDateYear <-
106 |         try(as.character(xmlChildren(pubDateYear[[1]])$text)[6])
107 |     if (inherits(pubDateYear, "try-error") == TRUE) {
108 |         pubDateYear <- "Year"
109 |     }
110 |     ## Join up the date information
111 |     pubDate <- paste(pubDateMonth,pubDateYear)
112 | 
113 |     ## Get the journal this was published in
114 |     journal <-
115 |         xml["MedlineCitation"][[1]]["MedlineJournalInfo"][[1]]["MedlineTA"]
116 |     journal <- try(as.character(xmlChildren(journal[[1]])$text)[6])
117 |     if (inherits(journal,"try-error") == TRUE) {
118 |         journal <- "No Journal Provided"
119 |     }
120 | 
121 |     ## Build up a vector of author names, created by assembling the
122 |     ## pieces of each author's name.
123 |     authorList <- xmlArticle[[1]]["AuthorList"]
124 |     authors <- vector()
125 |     numAuthors <- try(length(xmlChildren(authorList[[1]])))
126 |     if (inherits(numAuthors,"try-error") == TRUE) {
127 |         authors[1] <- "No Author Information Provided"
128 |     }
129 |     else {
130 |         for (i in 1:numAuthors) {
131 |             curAuthor <- authorList[[1]][i]
132 |             last <-
133 |                 try(as.character(xmlChildren(curAuthor[[1]]["LastName"][[1]])$text)[6])
134 |             if (inherits(last,"try-error") == TRUE) {
135 |                 last <- "LastName"
136 |             }
137 | 
138 |             initial <-
139 |                 try(as.character(xmlChildren(curAuthor[[1]]["Initials"][[1]])$text)[6])
140 |             if (inherits(initial,"try-error") == TRUE) {
141 |                 initial <- "M"
142 |             }
143 | 
144 |             authors[i] <- paste(initial,last)
145 |         }
146 |     }
147 | 
148 |     ## Restore error messages
149 |     options(show.error.messages=TRUE)
150 | 
151 |     newPMA <- new("pubMedAbst", articleTitle=articleTitle,
152 |                   abstText=abstText, pubDate=pubDate,authors=authors,
153 |                   journal=journal, pmid=pmid)
154 | 
155 |     return(newPMA)
156 | }
157 | 
158 | pm.getabst <- function(geneids, basename) {
159 |     pmids <- getPMID(geneids, basename)
160 |     numids <- length(geneids)
161 |     rval <- vector("list", length=numids)
162 |     names(rval) <- geneids
163 |     for(i in 1:numids) {
164 |         pm <- pmids[[i]]
165 |         if( length(pm)==1 && is.na(pm) )
166 |             rval[[i]] <- NA
167 |         else {
168 |             absts <- pubmed(pm)
169 |             a <- xmlRoot(absts)
170 |             numAbst <- length(xmlChildren(a))
171 |             absts <- vector("list", length=numAbst)
172 |             for (j in 1:numAbst)
173 |                 absts[[j]] <- buildPubMedAbst(a[[j]])
174 |             rval[[i]] <- absts
175 |         }
176 |     }
177 |     rval
178 | }
179 | 
180 | pm.abstGrep <- function(pattern, absts, ...)
181 | {
182 |     nabsts <- length(absts)
183 |     rval <- rep(FALSE, nabsts)
184 |     for(i in 1:nabsts) {
185 |         atxt <- abstText(absts[[i]])
186 |         ans <- grep(pattern, atxt, ...)
187 |         if( length(ans) && ans==1 )
188 |             rval[i] <- TRUE
189 |     }
190 |     rval
191 | }
192 | 
193 | pm.titles <- function (absts) {
194 |      numa <- length(absts)
195 |      rval <- vector("list", length=numa)
196 |      for(j in 1:numa)
197 |          rval[[j]] <- sapply(absts[[j]], function(x) articleTitle(x))
198 |      rval
199 | }
200 | 
201 | 


--------------------------------------------------------------------------------
/vignettes/prettyOutput.Rnw:
--------------------------------------------------------------------------------
  1 | %
  2 | % NOTE -- ONLY EDIT THE .Rnw FILE!!!  The .tex file is
  3 | % likely to be overwritten.
  4 | %
  5 | % \VignetteIndexEntry{HowTo: Get HTML Output}
  6 | % \VignetteDepends{annotate, hgu95av2.db}
  7 | % \VignetteKeywords{Expression Analysis, Annotation}
  8 | % \VignettePackage{annotate}
  9 | \documentclass[11pt]{article}
 10 | 
 11 | 
 12 | \newcommand{\Rfunction}[1]{{\texttt{#1}}}
 13 | \newcommand{\Rmethod}[1]{{\texttt{#1}}}
 14 | 
 15 | \newcommand{\Robject}[1]{{\texttt{#1}}}
 16 | \newcommand{\Rpackage}[1]{{\textit{#1}}}
 17 | \newcommand{\Rclass}[1]{{\textit{#1}}}
 18 | 
 19 | \usepackage[authoryear,round]{natbib}
 20 | 
 21 | 
 22 | \bibliographystyle{plainnat}
 23 | 
 24 | \usepackage{hyperref}
 25 | 
 26 | \begin{document}
 27 | \title{HowTo: get pretty HTML output for my gene list}
 28 | \author{James W. MacDonald}
 29 | \maketitle{}
 30 | 
 31 | \section{Overview}
 32 | The intent of this vignette is to show how to make reasonably nice
 33 | looking HTML tables for presenting the results of a microarray
 34 | analysis. These tables are a very nice format because you can insert
 35 | clickable links to various public annotation databases, which
 36 | facilitates the downstream analysis.  In addition, the format is quite
 37 | compact, can be posted on the web, and can be viewed using any number
 38 | of free web browsers. One caveat; an HTML table is probably not the
 39 | best format for presenting the results for \emph{all} of the genes on
 40 | a chip. For even a small (5000 gene) chip, the file could be 10 Mb or
 41 | more, which would take an inordinate amount of time to open and
 42 | view. Also note that the Bioconductor project supplies annotation
 43 | packages for many of the more popular Affymetrix chips, as well as for
 44 | many commercial spotted cDNA chips. For chips that have annotation
 45 | packages, the \Rpackage{annaffy} package is the preferred method for
 46 | making HTML tables.
 47 | 
 48 | To make an annotated HTML table, the only requirement is that we have
 49 | some sort of annotation data for the microarray that we are
 50 | using. Most manufacturers supply data in various formats that can be
 51 | read into \Rpackage{R}. For instance, Affymetrix supplies CSV files
 52 | that can be read into \Rpackage{R} using the \Rmethod{read.csv()}
 53 | function
 54 | \url{http://www.affymetrix.com/support/technical/byproduct.affx?cat=arrays}.
 55 | 
 56 | \section{Alternate methods}
 57 | Please note that one can also make these HTML tables by parsing data from
 58 | e.g., an online (or local) Biomart database, using functions in the biomaRt
 59 | package. This may be easier, and may result in more current annotation data.
 60 | Please see the prettyOutput vignette in the biomaRt package for more information. 
 61 | 
 62 | \section{Data Analysis}
 63 | I will assume that the reader is familiar with the analysis of
 64 | microarray data, and has a set of genes that she would like to use. In
 65 | addition, I will assume that the reader is familiar enough with
 66 | \Rpackage{R} that she can subset the data based on a list of genes,
 67 | and reorder based on a particular statistic. For any questions about
 68 | subsetting or ordering data, please see ``An Introduction to R''. For
 69 | questions regarding microarray analysis, please consult the vignettes
 70 | for, say \Rpackage{limma}, \Rpackage{multtest}, or \Rpackage{marray}.
 71 | 
 72 | \section{Getting Started}
 73 | We first load the \Rpackage{annotate} package, as well as some
 74 | data. These data will be from the Affymetrix HG-U95Av2 chip (for which
 75 | we would normally use \Rpackage{annaffy}). To keep the HTML table
 76 | small, we will take a subset of fifteen genes as an example.
 77 | 
 78 | <<echo=FALSE, eval=TRUE>>=
 79 | options(width=70)
 80 | @
 81 | 
 82 | <<>>=
 83 | library("annotate")
 84 | data(sample.ExpressionSet)
 85 | igenes <- featureNames(sample.ExpressionSet)[246:260]
 86 | @
 87 | 
 88 | \section{Annotation Data}
 89 | <<echo=FALSE>>=
 90 | ug <- c("Hs.169284 // ---", "Hs.268515 // full length", "Hs.103419 // full length", "Hs.380429 // ---" ,"--- // ---",
 91 |         "Hs.169331 // full length", "Hs.381231 // full length", "Hs.283781 // full length", "--- // ---", "--- // ---",
 92 |         "Hs.3195 // full length", "--- // ---", "Hs.176660 // full length", "Hs.272484 // full length", "Hs.372679 // full length")
 93 | ll <- c("221823", "4330", "9637", "---", "---", "6331", "841", "27335", "---", "---", "6375", "---", "2543", "2578", "2215")
 94 | gb <- c("M57423", "Z70218", "L17328", "S81916", "U63332", "M77235", "X98175", "AB019392", "J03071", "D25272", "D63789",
 95 |         "D63789", "U19142", "U19147", "X16863")
 96 | sp <- c("P21108", "Q10571", "Q9UHY8", "Q16444", "---", "Q14524 /// Q8IZC9 /// Q8WTQ6 /// Q8WWN5 /// Q96J69", "Q14790", "Q9UBQ5",
 97 |         "---", "---", "P47992", "---", "Q13065 /// Q8IYC5", "Q13070", "O75015")
 98 | 
 99 | @ 
100 | For this vignette I have supplied the annotation data. In a normal
101 | situation, these data would be subset from the manufacturer's
102 | annotation data, using the manufacturer's gene identifiers (which is
103 | how I got these IDs).
104 | 
105 | First, we will look at the GenBank and LocusLink IDs. We will be able
106 | to use these IDs without further modification. Note that the LocusLink
107 | IDs contain some missing data (``---''). This will not pose a problem
108 | because LocusLink IDs are all numeric, so we have incorporated code in
109 | \Rmethod{htmlpage()} to automatically convert any non-numeric ID to an
110 | HTML empty cell character (``\&nbsp;''). GenBank IDs (which often
111 | correspond to either RefSeq or GenBank IDs) are not as consistent, so
112 | any missing data would have to be manually converted to the HTML empty
113 | cell character. Missing data for LocusLink, UniGene and OMIM IDs are
114 | automatically converted, whereas Affymetrix, SwissProt and GenBank IDs
115 | have to be done manually. I will give examples of how to do this
116 | below.
117 | <<>>=
118 | gb
119 | ll
120 | @
121 | 
122 | The UniGene and SwissProt IDs present different challenges, so we will
123 | modify them separately. For the UniGene IDs we need to strip off the
124 | extra information appended to each ID. If we didn't do this, our
125 | hyperlink would not work correctly.
126 | 
127 | <<>>=
128 | ug
129 | ug <- sub(" //.*$", "", ug)
130 | ug
131 | @
132 | 
133 | The SwissProt IDs present a different challenge. Here there isn't any
134 | extra information. Instead, we have multiple IDs for some of the
135 | genes, and missing IDs for some of the others. Because the code for
136 | SwissProt IDs will not automatically handle missing data, we have to
137 | convert the missing data to an HTML empty cell identifier
138 | (``\&nbsp;''). For \Rmethod{htmlpage()} to correctly handle multiple
139 | IDs, we have to convert the character vector into a \emph{list} of
140 | character vectors.
141 | <<>>=
142 | sp
143 | sp <- strsplit(sub("---","&nbsp;",as.character(sp)), "///")
144 | sp
145 | @
146 | 
147 | We have converted the data to a list of character vectors, and also
148 | converted the ``---'' missing data identifier to the HTML character
149 | for an empty cell.
150 | 
151 | \section{Build the Table}
152 | 
153 | Usually we would like to include the expression values for our genes
154 | along with some statistics, say a $t$-statistic, fold change, and
155 | $p$-value. As an example, we will make a comparison using the first
156 | ten samples.
157 | 
158 | <<expDat>>=
159 | dat <- exprs(sample.ExpressionSet)[igenes,1:10]
160 | FC <- rowMeans(dat[igenes,1:5]) - rowMeans(dat[igenes,6:10])
161 | pval <- esApply(sample.ExpressionSet[igenes,1:10], 1, function(x) t.test(x[1:5], x[6:10])$p.value)
162 | tstat <- esApply(sample.ExpressionSet[igenes,1:10], 1, function(x) t.test(x[1:5], x[6:10])$statistic)
163 | @
164 | 
165 | It is also usually a good idea to include gene names in the
166 | table. Normally the names would be subsetted from the annotation data,
167 | but here we have to supply them. Again, we have to manually convert
168 | any missing names to the HTML empty cell character.
169 | 
170 | <<echo=False>>=
171 | name <- c("hypothetical protein LOC221823",
172 |           "meningioma (disrupted in balanced translocation) 1",
173 |           "fasciculation and elongation protein zeta 2 (zygin II)",
174 |           "Phosphoglycerate kinase {alternatively spliced}",
175 |           "---","sodium channel, voltage-gated, type V, alpha polypeptide",
176 |           "caspase 8, apoptosis-related cysteine protease","muscle specific gene","---","---","chemokine (C motif) ligand 1",
177 |           "---","G antigen 1","G antigen 6","Fc fragment of IgG, low affinity IIIb, receptor for (CD16)")
178 | @
179 | <<>>=
180 | name
181 | name <- gsub("---", "&nbsp;", name)
182 | name
183 | @
184 | 
185 | 
186 | We can now build our HTML table. To make the process more transparent,
187 | this will be done in steps. In practice however, this can be done in
188 | one line. Note here that the genelist consists of annotation data that
189 | will be hyperlinked to online databases, whereas othernames consists
190 | of other data that will not be hyperlinked.
191 | 
192 | <<buildTable>>=
193 | genelist <- list(igenes, ug, ll, gb, sp)
194 | filename <- "Interesting_genes.html"
195 | title <- "An Artificial Set of Interesting Genes"
196 | othernames <- list(name, round(tstat, 2), round(pval, 3), round(FC, 1), round(dat, 2))
197 | head <- c("Probe ID", "UniGene", "LocusLink", "GenBank", "SwissProt", "Gene Name", "t-statistic", "p-value",
198 |           "Fold Change", "Sample 1", "Sample 2", "Sample 3", "Sample 4", "Sample 5", "Sample 6",
199 |           "Sample 7", "Sample 8", "Sample 9", "Sample 10")
200 | repository <- list("affy", "ug", "en", "gb", "sp")
201 | htmlpage(genelist, filename, title, othernames, head, repository = repository)
202 | @
203 | 
204 | \section{Session Information}
205 | 
206 | The version number of R and packages loaded for generating the vignette were:
207 | 
208 | <<echo=FALSE>>=
209 | sessionInfo()
210 | @
211 | 
212 | \end{document}
213 | 


--------------------------------------------------------------------------------