├── .Rbuildignore ├── tests └── annotate_unit_tests.R ├── R ├── test_annotate_package.R ├── AllGenerics.R ├── compatipleVersions.R ├── ACCNUMStats.R ├── pmid2MIAME.R ├── zzz.R ├── LL2homology.R ├── getData.R ├── getSeq4ACC.R ├── serializeEnv.R ├── readGEOAnn.R ├── AnnMaps.R ├── getPMInfo.R ├── html.R ├── blastSequences.R ├── homoData.R ├── Amat.R ├── isValidKey.R ├── GOhelpers.R ├── findNeighbors.R └── pubMedAbst.R ├── data ├── hgu95All.rda ├── hgByChroms.rda ├── hgCLengths.rda ├── hgu95Achrom.rda ├── hgu95Asym.rda ├── hgu95Achroloc.rda └── hgu95AProbLocs.rda ├── vignettes ├── OLDmake ├── frames.png ├── DPChart.eps ├── DPChart.png ├── noframes.png ├── annotate.Rfc ├── annotate.bib ├── chromLOC.Rmd ├── useProbeInfo.Rnw └── prettyOutput.Rnw ├── inst ├── misc │ └── pmLinkGen.pdf ├── unitTests │ └── test_getAnnMap.R └── NEWS.Rd ├── man ├── hgCLengths.Rd ├── makeAnchor.Rd ├── pmid2MIAME.Rd ├── pm.titles.Rd ├── mapOrgs.Rd ├── hgByChroms.Rd ├── pmidQuery.Rd ├── getEvidence.Rd ├── buildPubMedAbst.Rd ├── hgu95Asym.Rd ├── hgu95AProbLocs.Rd ├── hgu95All.Rd ├── hasGOannote.Rd ├── hgu95Achroloc.Rd ├── organism.Rd ├── hgu95Achrom.Rd ├── buildChromLocation.Rd ├── aqListGOIDs.Rd ├── compatibleVersions.Rd ├── usedChromGenes.Rd ├── getPMInfo.Rd ├── PWAmat.Rd ├── entrezGeneByID.Rd ├── entrezGeneQuery.Rd ├── filterGOByOntology.Rd ├── annPkgName.Rd ├── pm.abstGrep.Rd ├── PMIDAmat.Rd ├── UniGeneQuery.Rd ├── getOntology.Rd ├── getSeq4Acc.Rd ├── dropECode.Rd ├── accessionToUID.Rd ├── GO2heatmap.Rd ├── ACCNUMStats.Rd ├── GOmnplot.Rd ├── updateSymbolsToValidKeys.Rd ├── pm.getabst.Rd ├── serializeEnv.Rd ├── pubmed.Rd ├── isValidkey.Rd ├── getGOTerm.Rd ├── getAnnMap.Rd ├── genbank.Rd ├── HTMLPage-class.Rd ├── readGEOAnn.Rd ├── homoData-class.Rd ├── setRepository.Rd ├── pmAbst2HTML.Rd ├── pubMedAbst-class.Rd ├── blastSequences.Rd ├── getSYMBOL.Rd ├── LL2homology.Rd ├── chromLocation-class.Rd ├── getTDRows.Rd ├── findNeighbors.Rd ├── htmlpage.Rd └── chrCats.Rd ├── DESCRIPTION ├── TODO └── NAMESPACE /.Rbuildignore: -------------------------------------------------------------------------------- 1 | debian 2 | -------------------------------------------------------------------------------- /tests/annotate_unit_tests.R: -------------------------------------------------------------------------------- 1 | BiocGenerics:::testPackage("annotate") 2 | -------------------------------------------------------------------------------- /R/test_annotate_package.R: -------------------------------------------------------------------------------- 1 | .test <- function() BiocGenerics:::testPackage("annotate") 2 | -------------------------------------------------------------------------------- /data/hgu95All.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/annotate/devel/data/hgu95All.rda -------------------------------------------------------------------------------- /data/hgByChroms.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/annotate/devel/data/hgByChroms.rda -------------------------------------------------------------------------------- /data/hgCLengths.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/annotate/devel/data/hgCLengths.rda -------------------------------------------------------------------------------- /data/hgu95Achrom.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/annotate/devel/data/hgu95Achrom.rda -------------------------------------------------------------------------------- /data/hgu95Asym.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/annotate/devel/data/hgu95Asym.rda -------------------------------------------------------------------------------- /vignettes/OLDmake: -------------------------------------------------------------------------------- 1 | chromLocs.dvi: chromLocs.tex 2 | latex chromLocs 3 | latex chromLocs 4 | 5 | -------------------------------------------------------------------------------- /vignettes/frames.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/annotate/devel/vignettes/frames.png -------------------------------------------------------------------------------- /data/hgu95Achroloc.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/annotate/devel/data/hgu95Achroloc.rda -------------------------------------------------------------------------------- /vignettes/DPChart.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/annotate/devel/vignettes/DPChart.eps -------------------------------------------------------------------------------- /vignettes/DPChart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/annotate/devel/vignettes/DPChart.png -------------------------------------------------------------------------------- /vignettes/noframes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/annotate/devel/vignettes/noframes.png -------------------------------------------------------------------------------- /data/hgu95AProbLocs.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/annotate/devel/data/hgu95AProbLocs.rda -------------------------------------------------------------------------------- /inst/misc/pmLinkGen.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/annotate/devel/inst/misc/pmLinkGen.pdf -------------------------------------------------------------------------------- /R/AllGenerics.R: -------------------------------------------------------------------------------- 1 | ## S4 methods so we can use non-package based annotation databases (e.g., from AnnotationHub) 2 | ## as if they were installed packages 3 | 4 | setGeneric("isValidKey", function(ids, pkg) standardGeneric("isValidKey")) 5 | 6 | setGeneric("allValidKeys", function(pkg) standardGeneric("allValidKeys")) 7 | -------------------------------------------------------------------------------- /inst/unitTests/test_getAnnMap.R: -------------------------------------------------------------------------------- 1 | ## Set up 2 | require(org.Hs.eg.db) 3 | require(annotate) 4 | require(RUnit) 5 | 6 | ## For testing 7 | test_getAnnMap <- function(){ 8 | ## test for a map that exist 9 | map <- getAnnMap("CHRLOC","org.Hs.eg.db") 10 | checkTrue( class(map) == "AnnDbMap" ) 11 | ## and test for a map that does not (but which is available via select) 12 | map2 <- getAnnMap("ONTOLOGY","org.Hs.eg.db") 13 | checkTrue( class(map2) == "FlatBimap" ) 14 | } 15 | -------------------------------------------------------------------------------- /man/hgCLengths.Rd: -------------------------------------------------------------------------------- 1 | \name{hgCLengths} 2 | \alias{hgCLengths} 3 | \non_function{} 4 | \title{ A dataset which contains the lengths (in base pairs) of the 5 | human chromosomes. } 6 | \usage{data(hgCLengths)} 7 | \format{A vector containing 24 values, each corresponding to the total 8 | chromosome length. } 9 | \description{ 10 | The data is described above. 11 | } 12 | \source{ 13 | UCSC Human Genome Project 14 | } 15 | \examples{ 16 | data(hgCLengths) 17 | } 18 | \keyword{datasets} 19 | -------------------------------------------------------------------------------- /man/makeAnchor.Rd: -------------------------------------------------------------------------------- 1 | \name{makeAnchor} 2 | \alias{makeAnchor} 3 | \title{A Function To Generate HTML Anchors} 4 | \description{ 5 | This function will take a set of links and titles and will generate 6 | HTML anchor tags out of these values 7 | } 8 | \usage{ 9 | makeAnchor(link, title, toMain = FALSE) 10 | } 11 | \arguments{ 12 | \item{link}{A vector of URLs} 13 | \item{title}{A vector of website names} 14 | \item{toMain}{Used for frame pages} 15 | } 16 | \value{ 17 | A vector of HTML anchor tags 18 | } 19 | \author{Jeff Gentry} 20 | \examples{ 21 | makeAnchor("http://www.bioconductor.org","Bioconductor") 22 | } 23 | \keyword{utilities} 24 | -------------------------------------------------------------------------------- /man/pmid2MIAME.Rd: -------------------------------------------------------------------------------- 1 | \name{pmid2MIAME} 2 | \alias{pmid2MIAME} 3 | \title{use web to populate MIAME instance with pubmed details } 4 | \description{use web to populate MIAME instance with pubmed details 5 | } 6 | \usage{ 7 | pmid2MIAME(pmid) 8 | } 9 | \arguments{ 10 | \item{pmid}{ string encoding PMID } 11 | } 12 | \details{ 13 | uses XML library to decode parts of the query response and 14 | load a MIAME object 15 | } 16 | \value{ 17 | An instance of class \code{\link[Biobase:MIAME-class]{MIAME}} 18 | } 19 | \author{Vince Carey } 20 | 21 | \examples{ 22 | if (interactive()) pmid2MIAME("9843569") 23 | } 24 | \keyword{ models } 25 | -------------------------------------------------------------------------------- /R/compatipleVersions.R: -------------------------------------------------------------------------------- 1 | # Checks the DESCRIPTION file to see if the packages whose names are 2 | # passed have the same version number 3 | 4 | compatibleVersions <- function(...){ 5 | pkgs <- list(...) 6 | versions <- NULL 7 | for(i in pkgs){ 8 | options(show.error.messages = FALSE, warn = -1) 9 | versions <- try(c(versions, packageDescription(i)[["Version"]])) 10 | options(show.error.messages = TRUE, warn = 0) 11 | 12 | if(inherits(versions, "try-error")){ 13 | stop(paste("Package", i, "is not in the library")) 14 | } 15 | } 16 | 17 | if(length(unique(versions)) == 1){ 18 | return(TRUE) 19 | }else{ 20 | return(FALSE) 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /man/pm.titles.Rd: -------------------------------------------------------------------------------- 1 | \name{pm.titles} 2 | \alias{pm.titles} 3 | \title{Obtain the titles of the PubMed abstracts.} 4 | \description{ 5 | This function returns the titles from a list of PubMed abstracts. 6 | } 7 | \usage{ 8 | pm.titles(absts) 9 | } 10 | \arguments{ 11 | \item{absts}{The list of PubMed abstracts.} 12 | } 13 | \details{ 14 | It simply uses \code{sapply}. 15 | } 16 | \value{ 17 | A character vector of length equal to the number of abstracts. Each 18 | element is the title of the corresponding abstract. 19 | } 20 | \author{Robert Gentleman} 21 | \seealso{\code{\link{pm.abstGrep}}} 22 | \examples{ 23 | library("hgu95av2.db") 24 | hoxa9 <- "37806_at" 25 | absts <- pm.getabst(hoxa9, "hgu95av2") 26 | pm.titles(absts)[[1]][[1]] 27 | } 28 | \keyword{manip} 29 | -------------------------------------------------------------------------------- /man/mapOrgs.Rd: -------------------------------------------------------------------------------- 1 | \name{mapOrgs} 2 | \alias{mapOrgs} 3 | \alias{getOrgNameNCode} 4 | \title{Functions to map to organism IDs used by NCBI homology.} 5 | \description{ 6 | These functions help map to organism identifiers used at the NCBI. 7 | } 8 | \usage{ 9 | mapOrgs(toMap, what = c("code","name")) 10 | getOrgNameNCode() 11 | } 12 | \arguments{ 13 | \item{toMap}{\code{vect} a vector of character strings} 14 | \item{what}{\code{what} a character string that can either be "code" 15 | or "name".} 16 | } 17 | \details{ 18 | mapOrgs converts organism codes to scientific names. 19 | } 20 | \value{ 21 | mapOrgs returns a vector of character strings. 22 | } 23 | \references{\url{ftp://ftp.ncbi.nih.gov/pub/HomoloGene/README}} 24 | \author{Jianhua Zhang} 25 | 26 | \keyword{manip} 27 | -------------------------------------------------------------------------------- /man/hgByChroms.Rd: -------------------------------------------------------------------------------- 1 | \name{hgByChroms} 2 | \alias{hgByChroms} 3 | \non_function{} 4 | \title{ A dataset to show the human genome base pair locations per 5 | chromosome. } 6 | \usage{data(hgByChroms)} 7 | \format{ 8 | A list, with the names consisting of the names of the chromosomes in 9 | the human genome (thus 24 elements). Each element consists of a named 10 | vector of +/- values - where each value represents the location of a 11 | base pair (the numeric value is the location, while the +/- denotes 12 | the strand value), with the name providing the name of the base pair. 13 | } 14 | \description{ 15 | The data is described above. 16 | } 17 | \source{ 18 | Cheng Li of the Dana-Farber Cancer Institute. 19 | } 20 | \examples{ 21 | data(hgByChroms) 22 | } 23 | \keyword{datasets} 24 | -------------------------------------------------------------------------------- /R/ACCNUMStats.R: -------------------------------------------------------------------------------- 1 | ACCNUMStats <- function(pkgName){ 2 | accMap <- getAnnMap("ACCNUM", pkgName) 3 | accs <- as.list(accMap) 4 | return(table(unlist(sapply(accs, whatACC)))) 5 | } 6 | 7 | whatACC <- function(accs){ 8 | if(is.na(accs[1])){ 9 | return("NA") 10 | } 11 | accs <- strsplit(accs, ";") 12 | if(regexpr("^[a-zA-Z]{2}\\.[0-9]+$", accs[1]) > 0){ 13 | return("UniGene") 14 | } 15 | if(regexpr("^(NP_)|(NG_)|(NM_)|(NC_)|(XR_)|(XM_)|(XP_)[0-9]+[._]?[0-9]?$", 16 | accs[1]) > 0){ 17 | return("RefSeq") 18 | } 19 | if(regexpr("^[A-Z]+[0-9]+[._]?[0-9]$", accs[1]) > 0){ 20 | return("GBAcc") 21 | } 22 | if(regexpr("^[0-9]+$", accs[1]) > 0){ 23 | return("Image") 24 | } 25 | 26 | return("Unknown") 27 | } 28 | 29 | -------------------------------------------------------------------------------- /man/pmidQuery.Rd: -------------------------------------------------------------------------------- 1 | \name{pmidQuery} 2 | \alias{pmidQuery} 3 | \title{A function to query PubMed} 4 | \description{ 5 | Given a PMID, will create a URL which can be used to open a 6 | browser and retrieve the specified information from PubMed. 7 | } 8 | \usage{ 9 | pmidQuery(query) 10 | } 11 | \arguments{ 12 | \item{query}{ The PubMed ID (or IDs)} 13 | } 14 | \details{ 15 | Using ublished details from NCBI we construct an appropriate string 16 | for directing a web browser to the information available at the NCBI. 17 | } 18 | \value{ 19 | A character string containing the appropriate URL 20 | } 21 | \references{NCBI, \url{https://www.ncbi.nih.gov/} } 22 | \author{Jeff Gentry} 23 | 24 | \seealso{\code{\link{UniGeneQuery}}} 25 | 26 | \examples{ 27 | a <- "9695952" 28 | pmidQuery(a) 29 | } 30 | \keyword{interface} 31 | 32 | -------------------------------------------------------------------------------- /man/getEvidence.Rd: -------------------------------------------------------------------------------- 1 | \name{getEvidence} 2 | \alias{getEvidence} 3 | \title{Get the Evidence codes for a set of GO terms.} 4 | \description{ 5 | For each mapping of a gene to a GO term there are a set of evidence 6 | codes that are used. Genes can be mapped using one, or more evidence 7 | codes and this function obtains the evidence codes for all genes 8 | provided in the input list. 9 | } 10 | \usage{ 11 | getEvidence(inlist) 12 | } 13 | \arguments{ 14 | \item{inlist}{A list of GO identifers.} 15 | } 16 | \value{ 17 | A \code{list} of the same length as the input list, each element is a 18 | vector of evidence codes. 19 | } 20 | \author{R. Gentleman} 21 | 22 | \seealso{\code{\link{getOntology}}, \code{\link{dropECode}}} 23 | 24 | \examples{ 25 | library("hgu95av2.db") 26 | bb <- hgu95av2GO[["39613_at"]] 27 | getEvidence(bb) 28 | } 29 | \keyword{manip} 30 | -------------------------------------------------------------------------------- /man/buildPubMedAbst.Rd: -------------------------------------------------------------------------------- 1 | \name{buildPubMedAbst} 2 | \alias{buildPubMedAbst} 3 | \title{A function to generate an instantiation of a pubMedAbst class } 4 | \description{ 5 | This function will take in a XML tree object and will create an 6 | instance of a pubMedAbst class. This instance is returned to the caller. 7 | } 8 | \usage{ 9 | buildPubMedAbst(xml) 10 | } 11 | \arguments{ 12 | \item{xml}{A XMLTree object that corresponds to a Pubmed abstract.} 13 | } 14 | \value{ 15 | This function returns an instantiation of a pubMedAbst object to the 16 | caller. 17 | } 18 | 19 | \author{ Jeff Gentry } 20 | \seealso{\code{\link{pubmed}},\code{\link{genbank}}} 21 | \examples{ 22 | x <- pubmed("9695952","8325638","8422497") 23 | a <- xmlRoot(x) 24 | numAbst <- length(xmlChildren(a)) 25 | absts <- list() 26 | for (i in 1:numAbst) { 27 | absts[[i]] <- buildPubMedAbst(a[[i]]) 28 | } 29 | 30 | } 31 | \keyword{ utilities } 32 | -------------------------------------------------------------------------------- /man/hgu95Asym.Rd: -------------------------------------------------------------------------------- 1 | \name{hgu95Asym} 2 | \non_function{} 3 | \docType{data} 4 | \alias{hgu95Asym} 5 | 6 | \title{Annotation data for the Affymetrix HGU95A GeneChip} 7 | 8 | \usage{data(hgu95Asym)} 9 | 10 | \format{ 11 | This data set provides an environment with mappings from the Affymetrix 12 | identifiers to gene symbol. 13 | The environment functions like a hashtables and can be accessed using 14 | \code{mget}. 15 | If the returned value is \code{NA} then the current 16 | mapping was unable to identify this. Mappings and data sources are 17 | constantly evolving so updating often is recommended. 18 | } 19 | \description{Data, in the form of environments for the Affymetrix U95A 20 | chip.} 21 | 22 | \source{The \code{AnnBuilder} package.} 23 | 24 | \examples{ 25 | data(hgu95Asym) 26 | data(sample.ExpressionSet) 27 | mget(featureNames(sample.ExpressionSet)[330:340], env=hgu95Asym, ifnotfound=NA) 28 | } 29 | \keyword{datasets} 30 | -------------------------------------------------------------------------------- /man/hgu95AProbLocs.Rd: -------------------------------------------------------------------------------- 1 | \name{hgu95AProbLocs} 2 | \docType{methods} 3 | \alias{hgu95AProbLocs} 4 | \title{chromLocation instance hgu95AProbLocs, an example of a chromLocation 5 | object} 6 | \description{ gives chromosome locations for Affy U95 probes } 7 | \section{Slots}{ 8 | \describe{ 9 | \item{\code{species}:}{Object of class character, value: 'Human'} 10 | \item{\code{datSource}:}{Object of class character, value} 11 | \item{\code{nChrom}:}{Object of class numeric, value: 24} 12 | \item{\code{chromNames}:}{Object of class character, value: 1:22, X,Y} 13 | \item{\code{chromLocs}:}{Object of class list, value: long: sense and antisense locations 14 | associated with affy identifiers} 15 | \item{\code{chromLengths}:}{Object of class numeric, } 16 | \item{\code{geneToChrom}:}{Object of class environment} 17 | \item{\code{class}:}{Object of class character, value: 'chromLocation'} 18 | } 19 | } 20 | \keyword{methods} 21 | -------------------------------------------------------------------------------- /man/hgu95All.Rd: -------------------------------------------------------------------------------- 1 | \name{hgu95All} 2 | \non_function{} 3 | \docType{data} 4 | \alias{hgu95All} 5 | 6 | \title{Annotation data for the Affymetrix HGU95A GeneChip} 7 | 8 | \usage{data(hgu95All)} 9 | \description{Data, in the form of environments for the Affymetrix U95A 10 | chip.} 11 | 12 | 13 | \format{ 14 | These data sets provide environments with mappings from the Affymetrix 15 | identifiers to Entrez Gene identifiers. 16 | The environment functions like a hashtable and can be accessed using 17 | \code{mget}. 18 | If the returned value is \code{NA} then the current 19 | mapping was unable to identify this. Mappings and data sources are 20 | constantly evolving so updating often is recommended. 21 | } 22 | 23 | \source{The \code{AnnBuilder} package.} 24 | 25 | \examples{ 26 | data(hgu95All) 27 | data(sample.ExpressionSet) 28 | mget(featureNames(sample.ExpressionSet)[330:340], env=hgu95All, ifnotfound=NA) 29 | 30 | } 31 | \keyword{datasets} 32 | -------------------------------------------------------------------------------- /man/hasGOannote.Rd: -------------------------------------------------------------------------------- 1 | \name{hasGOannote} 2 | \alias{hasGOannote} 3 | \title{Check for GO annotation} 4 | \description{ 5 | Given a GO term, or a vector of GO terms and an ontology this function 6 | determines which of the terms have GO annotation in the specified 7 | ontology. 8 | } 9 | \usage{ 10 | hasGOannote(x, which="MF") 11 | } 12 | \arguments{ 13 | \item{x}{A character vector, an instance of the \code{GOTerms} class 14 | or a list of \code{GOTerms}.} 15 | \item{which}{One of "MF", "BP" or "CC"} 16 | } 17 | \details{ 18 | The available GO annotation is searched and a determination of 19 | whether a specific GO identifier has a value in the specified 20 | ontology is made. 21 | } 22 | \value{ 23 | A logical vector of the same length as \code{x}. 24 | } 25 | \author{R. Gentleman} 26 | 27 | \seealso{\code{\link{get}}} 28 | 29 | \examples{ 30 | library("GO.db") 31 | t1 <- "GO:0003680" 32 | hasGOannote(t1) 33 | hasGOannote(t1, "BP") 34 | } 35 | \keyword{manip} 36 | -------------------------------------------------------------------------------- /man/hgu95Achroloc.Rd: -------------------------------------------------------------------------------- 1 | \name{hgu95Achroloc} 2 | \alias{hgu95Achroloc} 3 | \non_function{} 4 | 5 | \title{Annotation data for the Affymetrix HGU95A GeneChip} 6 | \usage{data(hgu95Achroloc)} 7 | 8 | \format{ 9 | These data sets provide environments with mappings from the Affymetrix 10 | identifiers to chromosomal location, in bases. 11 | The environments function like hashtables and can be accessed using 12 | \code{mget}. 13 | If the returned value is \code{NA} then the current 14 | mapping was unable to identify this. Mappings and data sources are 15 | constantly evolving so updating often is recommended. 16 | 17 | } 18 | \description{Data, in the form of environments for the Affymetrix U95A 19 | chip.} 20 | 21 | \source{The \code{AnnBuilder} package.} 22 | 23 | \examples{ 24 | data(hgu95Achroloc) 25 | data(sample.ExpressionSet) 26 | mget(featureNames(sample.ExpressionSet)[330:340], env=hgu95Achroloc, 27 | ifnotfound=NA) 28 | 29 | } 30 | \keyword{datasets} 31 | -------------------------------------------------------------------------------- /man/organism.Rd: -------------------------------------------------------------------------------- 1 | \name{organism} 2 | \alias{organism} 3 | \alias{organism,character-method} 4 | \title{Convenience function for getting the organism from an object or package} 5 | 6 | \description{ 7 | The most basic organism method just takes a character string (which 8 | represents a particular annotation package) and returns the organism 9 | that said package is based upon. 10 | } 11 | 12 | \usage{ 13 | organism(object) 14 | } 15 | 16 | \arguments{ 17 | \item{object}{a character string that names a package} 18 | } 19 | 20 | \value{ 21 | The name of the organism used for this package or object 22 | } 23 | 24 | 25 | \author{Marc Carlson} 26 | 27 | \examples{ 28 | require(hgu95av2.db) 29 | ## get the organism for this annotation package 30 | organism("hgu95av2") 31 | 32 | ## get the organism this object refers to 33 | ## (for a ChromLocation object) 34 | z <- buildChromLocation("hgu95av2") 35 | organism(z) 36 | 37 | } 38 | 39 | \keyword{manip} 40 | 41 | -------------------------------------------------------------------------------- /man/hgu95Achrom.Rd: -------------------------------------------------------------------------------- 1 | \name{hgu95Achrom} 2 | \non_function{} 3 | \docType{data} 4 | \alias{hgu95Achrom} 5 | 6 | \title{Annotation data for the Affymetrix HGU95A GeneChip} 7 | 8 | \usage{data(hgu95Achrom) 9 | } 10 | 11 | \format{ 12 | This data set provides an environment (treat as a hashtable) 13 | with mappings from the Affymetrix 14 | identifiers to chromosome number/name. 15 | The environment functions like a hashtable and can be accessed using 16 | \code{mget}. 17 | If the returned value is \code{NA} then the current 18 | mapping was unable to identify this. Mappings and data sources are 19 | constantly evolving so updating often is recommended. 20 | } 21 | \description{Data, in the form of environments for the Affymetrix U95A 22 | chip.} 23 | 24 | \source{The \code{AnnBuilder} package.} 25 | 26 | \examples{ 27 | data(hgu95Achrom) 28 | data(sample.ExpressionSet) 29 | mget(featureNames(sample.ExpressionSet)[330:340], env=hgu95Achrom, ifnotfound=NA) 30 | } 31 | \keyword{datasets} 32 | -------------------------------------------------------------------------------- /man/buildChromLocation.Rd: -------------------------------------------------------------------------------- 1 | \name{buildChromLocation} 2 | \alias{buildChromLocation} 3 | \title{A function to generate an instantiation of a chromLocation class} 4 | \description{ 5 | This function will take the name of a data package and build a 6 | chromLocation object representing that data set. 7 | } 8 | \usage{ 9 | buildChromLocation(dataPkg) 10 | } 11 | \arguments{ 12 | \item{dataPkg}{The name of the data package to be used} 13 | } 14 | \details{ 15 | The requested data set must be available in the user's 16 | \code{.libPaths()}, and the function will throw an error if this is 17 | not the case. 18 | 19 | If the data package is present, the necessary information will be 20 | extracted from the data package and a \code{chromLocation} object will 21 | be created. 22 | } 23 | \value{ 24 | A \code{chromLocation} object representing the specified data set. 25 | } 26 | \author{Jeff Gentry} 27 | \examples{ 28 | library("hgu95av2.db") 29 | z <- buildChromLocation("hgu95av2") 30 | } 31 | \keyword{utilities} 32 | -------------------------------------------------------------------------------- /man/aqListGOIDs.Rd: -------------------------------------------------------------------------------- 1 | \name{aqListGOIDs} 2 | \alias{aqListGOIDs} 3 | 4 | \title{List GO Identifiers by GO Ontology} 5 | \description{ 6 | This function returns a character vector of all GO identifiers in the 7 | specified ontologies: Biological Process (BP), Cellular Component 8 | (CC), Molecular Function (MF). 9 | } 10 | \usage{ 11 | aqListGOIDs(ont) 12 | } 13 | 14 | \arguments{ 15 | \item{ont}{A character vector specifying the two-letter codes of the 16 | ontologies from which all GO IDs will be retrieved. Entries must be 17 | one of \code{"BP"}, \code{"CC"}, or \code{"MF"}.} 18 | } 19 | 20 | \value{ 21 | A character vector of GO IDs. The vector will contain all GO IDs in 22 | the GO ontologies specified by the \code{ont} argument. 23 | } 24 | 25 | \author{Seth Falcon} 26 | \examples{ 27 | ## all GO IDs in BP 28 | bp_ids = aqListGOIDs("BP") 29 | length(bp_ids) 30 | 31 | ## all GO IDs in BP or CC 32 | bp_or_cc_ids = aqListGOIDs(c("BP", "CC")) 33 | length(bp_or_cc_ids) 34 | } 35 | 36 | \keyword{manip} 37 | 38 | -------------------------------------------------------------------------------- /man/compatibleVersions.Rd: -------------------------------------------------------------------------------- 1 | \name{compatibleVersions} 2 | \alias{compatibleVersions} 3 | 4 | \title{function to check to see if the packages represented by the names 5 | passed have the same version number} 6 | \description{ 7 | This function takes the names of installed R packages and then checks 8 | to see if they all have the same version number. 9 | } 10 | \usage{ 11 | compatibleVersions(...) 12 | } 13 | \arguments{ 14 | \item{\dots}{\code{\dots} character strings for the names of R 15 | packages that have been installed} 16 | } 17 | \details{ 18 | If all the package have the same version number, the function returns 19 | TRUE. Otherwise, the function returns FALSE 20 | } 21 | \value{ 22 | This function returns TRUE or FALSE depending on whether the packages 23 | have the same version number 24 | } 25 | \author{Jianhua Zhang} 26 | \seealso{\code{\link{packageDescription}}} 27 | \examples{ 28 | library("hgu95av2.db") 29 | library("GO.db") 30 | compatibleVersions("hgu95av2.db", "GO.db") 31 | } 32 | \keyword{misc} 33 | -------------------------------------------------------------------------------- /man/usedChromGenes.Rd: -------------------------------------------------------------------------------- 1 | \name{usedChromGenes} 2 | \alias{usedChromGenes} 3 | \title{A function to select used genes on a chromosome from an ExpressionSet.} 4 | \description{ 5 | Given an instance of an \code{ExpressionSet}, a \code{chromLocation} object 6 | and the name of a chromosome this function returns all genes represented 7 | in the \code{ExpressionSet} on the specified chromosome. 8 | } 9 | \usage{ 10 | usedChromGenes(eSet, chrom, specChrom) 11 | } 12 | \arguments{ 13 | \item{eSet}{An instance of an \code{ExpressionSet} object.} 14 | \item{chrom}{The name of the chromosome of interest.} 15 | \item{specChrom}{An instance of a \code{chromLocation} object.} 16 | } 17 | \value{ 18 | Returns a vector of gene names that represent the genes from the 19 | \code{ExpressionSet} that are on the specified chromosome. 20 | } 21 | \author{ Jeff Gentry} 22 | \examples{ 23 | data(sample.ExpressionSet) 24 | data(hgu95AProbLocs) 25 | usedChromGenes(sample.ExpressionSet, "1", hgu95AProbLocs) 26 | } 27 | \keyword{interface} 28 | -------------------------------------------------------------------------------- /man/getPMInfo.Rd: -------------------------------------------------------------------------------- 1 | \name{getPMInfo} 2 | \alias{getPMInfo} 3 | \title{extract publication details and abstract from annotate::pubmed function output } 4 | \description{ 5 | extract publication details and abstract from annotate::pubmed function output 6 | } 7 | \usage{ 8 | getPMInfo(x) 9 | } 10 | \arguments{ 11 | \item{x}{ an object of class xmlDocument; assumed to be result of a pubmed() call} 12 | } 13 | \details{ 14 | uses xmlDOMApply to extract and structure key features of the XML tree returned 15 | by annotate::pubmed() 16 | } 17 | \value{ 18 | a list with one element per pubmed id processed by pubmed. Each element of 19 | the list is in turn a list with elements for author list, title, journal 20 | info, and abstract text. 21 | } 22 | \author{Vince Carey } 23 | \note{this should be turned into a method returning an instance of 24 | a formal class representing articles. } 25 | 26 | 27 | \examples{ 28 | demo <- pubmed("11780146", 29 | "11886385", "11884611") 30 | getPMInfo(demo) 31 | } 32 | \keyword{ models } 33 | -------------------------------------------------------------------------------- /R/pmid2MIAME.R: -------------------------------------------------------------------------------- 1 | pmid2MIAME = function (pmid) 2 | { 3 | # 4 | # we find that some abstracts are text values of 5 | # and some of ... 6 | # 7 | # need to think about validity checking here .. DTD? 8 | # 9 | x = pubmed(pmid) 10 | rr = xmlRoot(x) 11 | top = xmlChildren(rr) 12 | pmart = top[["PubmedArticle"]] 13 | cit = xmlChildren(pmart)[["MedlineCitation"]] 14 | art = cit[["Article"]] 15 | cart = xmlChildren(art) 16 | title = xmlValue(cart[["ArticleTitle"]]) 17 | abst = xmlValue(cart[["Abstract"]]) 18 | if (is.null(abst)) { 19 | caa = xmlChildren(cart[["Abstract"]]) 20 | abst = xmlValue(caa[["AbstractText"]]) 21 | } 22 | if (is.null(abst)) abst = "" 23 | aff = xmlValue(cart[["Affiliation"]]) 24 | an = cart[["AuthorList"]] 25 | last = xmlValue(xmlChildren(an[[1]])[["LastName"]]) 26 | ini = xmlValue(xmlChildren(an[[1]])[["Initials"]]) 27 | new("MIAME", name=paste(last,ini,collapse=", "), lab = aff, title = title, abstract = abst, pubMedIds = pmid) 28 | } 29 | -------------------------------------------------------------------------------- /man/PWAmat.Rd: -------------------------------------------------------------------------------- 1 | \name{PWAmat} 2 | \alias{PWAmat} 3 | \title{A function to compute the probe to KEGG pathway incidence matrix.} 4 | \description{ 5 | For a given chip we compute the mapping from probes to KEGG pathways. 6 | } 7 | \usage{ 8 | PWAmat(data) 9 | } 10 | \arguments{ 11 | \item{data}{The name of the chip for which the incidence matrix should 12 | be computed.} 13 | } 14 | \details{ 15 | Not much to say, just find which probes are in which pathways and 16 | return the incidence matrix, with pathways as rows and probes as 17 | columns. 18 | 19 | It would be nice to be able to specify a set of probes to use, so that 20 | one does not do perform the calculations using all probes if they are 21 | not of interest. 22 | } 23 | \value{ 24 | A matrix containing zero or one, depending on whether the probe 25 | (row) is in a pathway (column). 26 | } 27 | \author{R. Gentleman} 28 | \seealso{\code{\link{KEGG2heatmap}}, \code{\link{GOmnplot}}} 29 | \examples{ 30 | library("hgu95av2.db") 31 | Am1 <- PWAmat("hgu95av2") 32 | } 33 | \keyword{manip} 34 | -------------------------------------------------------------------------------- /man/entrezGeneByID.Rd: -------------------------------------------------------------------------------- 1 | \name{entrezGeneByID} 2 | \alias{entrezGeneByID} 3 | \title{Create a Query String for an Entrez Gene Identifier} 4 | \description{ 5 | Given a set of UniGene identifiers this function creates a set of URLs 6 | that an be used to either open a browser to the requested location or 7 | that can be used as anchors in the construction of HTML output. 8 | } 9 | \usage{ 10 | entrezGeneByID(query) 11 | } 12 | \arguments{ 13 | \item{query}{Entrez Gene identifiers.} 14 | } 15 | \details{ 16 | Using NCBI we construct appropriate strings for directing a web 17 | browser to the Entrez Genes specified by their IDs. 18 | } 19 | \value{ 20 | A character vector containing the query string. 21 | } 22 | \references{NCBI, \url{https://www.ncbi.nih.gov/} } 23 | \author{Marc Carlson} 24 | \note{Be very careful about automatically querying this resource. It is 25 | considered antisocial behavior by the owners. } 26 | 27 | \examples{ 28 | q1<-entrezGeneByID(c("100", "1002")) 29 | q1 30 | if( interactive()) 31 | browseURL(q1[1]) 32 | 33 | } 34 | \keyword{interface} 35 | -------------------------------------------------------------------------------- /man/entrezGeneQuery.Rd: -------------------------------------------------------------------------------- 1 | \name{entrezGeneQuery} 2 | \alias{entrezGeneQuery} 3 | \title{Create a Query String for Entrez Genes} 4 | \description{ 5 | Given a set of search terms this function creates a set of URLs 6 | that an be used to either open a browser to the requested location or 7 | that can be used as anchors in the construction of HTML output. 8 | } 9 | \usage{ 10 | entrezGeneQuery(query) 11 | } 12 | \arguments{ 13 | \item{query}{The UniGene identifiers. } 14 | } 15 | \details{ 16 | Using NCBI we construct an appropriate string for directing a web 17 | browser to information about genes of that type at NCBI. 18 | } 19 | \value{ 20 | A character vector containing the query string. 21 | } 22 | \references{NCBI, \url{https://www.ncbi.nih.gov/} } 23 | \author{Marc Carlson} 24 | \note{Be very careful about automatically querying this resource. It is 25 | considered antisocial behavior by the owners. } 26 | 27 | \examples{ 28 | q1<-entrezGeneQuery(c("leukemia", "Homo sapiens")) 29 | q1 30 | if( interactive()) 31 | browseURL(q1[1]) 32 | 33 | } 34 | \keyword{interface} 35 | -------------------------------------------------------------------------------- /man/filterGOByOntology.Rd: -------------------------------------------------------------------------------- 1 | \name{filterGOByOntology} 2 | \alias{filterGOByOntology} 3 | 4 | \title{Filter GO terms by a specified GO ontology} 5 | \description{ 6 | Given a character vector containing GO identifiers, return a logical 7 | vector indicating which GO IDs are in the specified ontology (BP, 8 | CC, or MF). 9 | } 10 | \usage{ 11 | filterGOByOntology(goids, ontology = c("BP", "CC", "MF")) 12 | } 13 | 14 | \arguments{ 15 | \item{goids}{a character vector of GO IDs} 16 | \item{ontology}{One of "BP", "CC", or "MF"} 17 | } 18 | \value{ 19 | A logical vector with length equal to \code{goids}. A \code{TRUE} 20 | indicates that the corresponding GO ID in \code{goids} is a member 21 | of the ontology specified by \code{ontology}. 22 | } 23 | \author{Seth Falcon} 24 | \examples{ 25 | haveGO <- suppressWarnings(require("GO.db")) 26 | if (haveGO) { 27 | ids <- c("GO:0001838", "GO:0001839") 28 | stopifnot(all(filterGOByOntology(ids, "BP"))) 29 | stopifnot(!any(filterGOByOntology(ids, "MF"))) 30 | } else cat("Sorry, this example requires the GO package\n") 31 | 32 | } 33 | 34 | \keyword{manip} 35 | -------------------------------------------------------------------------------- /man/annPkgName.Rd: -------------------------------------------------------------------------------- 1 | \name{annPkgName} 2 | \alias{annPkgName} 3 | 4 | \title{Get annotation package name from chip name} 5 | \description{ 6 | This function returns the name of the Bioconductor annotation data 7 | package that corresponds to the specified chip or genome. The 8 | \code{type} argument is used to request an annotation package with a 9 | particular backing store. 10 | } 11 | \usage{ 12 | annPkgName(name, type = c("db", "env")) 13 | } 14 | 15 | \arguments{ 16 | \item{name}{string specifying the name of the chip or genome. For 17 | example, \code{"hgu133plus2"}} 18 | \item{type}{Either \code{"db"} or \code{"env"}. This will determine 19 | whether the package name returned corresponds to the SQLite-based 20 | annotation package or environment-based package, respectively.} 21 | } 22 | 23 | \value{ 24 | a string giving the name of the annotation data package 25 | } 26 | 27 | \author{Seth Falcon} 28 | \seealso{ 29 | \code{\link{getAnnMap}} 30 | } 31 | \examples{ 32 | annPkgName("hgu133plus2", type="db") 33 | annPkgName("hgu133plus2", type="env") 34 | } 35 | 36 | \keyword{manip} 37 | 38 | -------------------------------------------------------------------------------- /inst/NEWS.Rd: -------------------------------------------------------------------------------- 1 | \name{annotate-NEWS} 2 | \title{annotate News} 3 | 4 | \section{CHANGES IN VERSION 1.47}{ 5 | \subsection{DEFUNCT}{ 6 | \itemize{ 7 | 8 | \item \code{probesByLL} is now defunct; use 9 | \code{AnnotationDbi::select()} instead. 10 | 11 | \item \code{blastSequences} supports multiple sequence queries; 12 | use \code{as="data.frame"} for output. 13 | 14 | \item Improve \code{blastSequences} strategy for result retrieval, 15 | querying the appropriate API for status every 10 seconds after 16 | initial estimated processing time. 17 | 18 | } 19 | } 20 | } 21 | 22 | \section{CHANGES IN VERSION 1.43}{ 23 | 24 | \subsection{NEW FEATURES}{ 25 | 26 | \itemize{ 27 | 28 | \item \code{blastSequences} accepts an argument \code{timeout} 29 | limiting waiting time for a response; in an interactive session 30 | and after the timeout is reached, the user may opt to retry the 31 | query. 32 | 33 | \item \code{blastSequences} accepts an argument 34 | \code{as} controlling the representation of the return value, 35 | either a DNAMultipleAlignment, a data.frame, or the XML. 36 | 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /man/pm.abstGrep.Rd: -------------------------------------------------------------------------------- 1 | \name{pm.abstGrep} 2 | \alias{pm.abstGrep} 3 | \title{An interface to grep for PubMed abstracts.} 4 | \description{ 5 | A user friendly interface to the functionality provided by 6 | \code{pubmed}. 7 | } 8 | \usage{ 9 | pm.abstGrep(pattern, absts, ...) 10 | } 11 | \arguments{ 12 | \item{pattern}{A pattern for the call to \code{grep}.} 13 | \item{absts}{A list containing abstracts downloaded using \code{pubmed} or 14 | equivalent.} 15 | \item{\dots}{Extra arguments passed to \code{grep}.} 16 | } 17 | \details{ 18 | The \code{absts} are a list of PubMed XML objects that have been downloaded 19 | and parsed. This function lets the user quickly search the abstracts 20 | for any regular expression. The returned value is a logical vector 21 | indicating which of the abstracts contain the regular expression. 22 | } 23 | \value{ 24 | The returned value is a logical vector 25 | indicating which of the abstracts contain the regular expression. 26 | } 27 | \author{Robert Gentleman} 28 | \seealso{\code{\link{pm.getabst}}, \code{\link{pm.titles}}} 29 | \examples{ 30 | library("hgu95av2.db") 31 | hoxa9 <- "37806_at" 32 | absts <- pm.getabst(hoxa9, "hgu95av2") 33 | pm.abstGrep("SH3", absts[[1]]) 34 | pm.abstGrep("autism", absts[[1]]) 35 | } 36 | \keyword{manip} 37 | -------------------------------------------------------------------------------- /man/PMIDAmat.Rd: -------------------------------------------------------------------------------- 1 | \name{PMIDAmat} 2 | \alias{PMIDAmat} 3 | \title{A function to compute the probe to PubMed id incidence matrix.} 4 | \description{ 5 | For a given chip or a given set of genes, it computes the mapping from 6 | probes to PubMed id. 7 | } 8 | \usage{ 9 | PMIDAmat(pkg, gene=NULL) 10 | } 11 | \arguments{ 12 | \item{pkg}{The package name of the chip for which the incidence matrix should 13 | be computed.} 14 | \item{gene}{A character vector of interested probe set ids or NULL (default).} 15 | } 16 | \details{ 17 | Not much to say, just find which probes are associated with which 18 | PubMed ids and return the incidence matrix, with PubMed ids as rows and probes as 19 | columns. 20 | 21 | To specify a set of probes to use, let the argument \code{gene} to be 22 | a vector of probe ids. Bt this way, the calculations are not 23 | involved with non-interested genes/PubMed ids so that the whole 24 | process could finish soon. 25 | } 26 | \value{ 27 | A matrix containing zero or one, depending on whether the probe 28 | (column) is associated with a PubMed id (row). 29 | } 30 | \author{R. Gentleman} 31 | \examples{ 32 | library("hgu95av2.db") 33 | probe <- names(as.list(hgu95av2ACCNUM)) 34 | Amat <- PMIDAmat("hgu95av2", gene=sample(probe, 10)) 35 | } 36 | \keyword{manip} 37 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | .buildAnnotateOpts <- function() { 2 | if (is.null(getOption("BioC"))) { 3 | BioC <- list() 4 | class(BioC) <- "BioCOptions" 5 | options("BioC"=BioC) 6 | } 7 | 8 | Annotate <- list() 9 | class(Annotate) <- "BioCPkg" 10 | Annotate$urls <- list( ncbi = "https://www.ncbi.nlm.nih.gov/", 11 | data="http://www.bioconductor.org/datafiles/annotate/") 12 | 13 | BioC <- getOption("BioC") 14 | BioC$annotate <- Annotate 15 | options("BioC"=BioC) 16 | } 17 | 18 | .onLoad <- function(libname, pkgname) { 19 | .setDefaultRepositories() 20 | .buildAnnotateOpts() 21 | if(.Platform$OS.type == "windows" && interactive() 22 | && .Platform$GUI == "Rgui"){ 23 | Biobase::addVigs2WinMenu("annotate") 24 | } 25 | } 26 | 27 | .repositories <- new.env(parent = emptyenv()) 28 | 29 | .setDefaultRepositories<- function() { 30 | setRepository("ug", getQuery4UG) 31 | setRepository("affy", getQuery4Affy) 32 | setRepository("gb", getQuery4GB) 33 | setRepository("sp", getQuery4SP) 34 | setRepository("omim", getQuery4OMIM) 35 | setRepository("fb", getQuery4FB) 36 | setRepository("en", getQuery4EN) 37 | setRepository("tr", getQuery4TR) 38 | setRepository("go", getQuery4GO) 39 | setRepository("ens", getQuery4ENSEMBL) 40 | } 41 | -------------------------------------------------------------------------------- /man/UniGeneQuery.Rd: -------------------------------------------------------------------------------- 1 | \name{UniGeneQuery} 2 | \alias{UniGeneQuery} 3 | \title{Create a Query String for a UniGene Identifier } 4 | \description{ 5 | Given a set of UniGene identifiers this function creates a set of URLs 6 | that an be used to either open a browser to the requested location or 7 | that can be used as anchors in the construction of HTML output. 8 | } 9 | \usage{ 10 | UniGeneQuery(query, UGaddress="UniGene/", type="CID") 11 | } 12 | \arguments{ 13 | \item{query}{The UniGene identifiers. } 14 | \item{UGaddress}{ The address of UniGene, within the NCBI repository.} 15 | \item{type}{What type of object is being asked for; eithe CID or UGID } 16 | } 17 | \details{ 18 | Using published details from NCBI we construct an appropriate string 19 | for directing a web browser to the information available at the NCBI 20 | for that genomic product (usually an EST). 21 | } 22 | \value{ 23 | A character vector containing the query string. 24 | } 25 | \references{NCBI, \url{https://www.ncbi.nih.gov/} } 26 | \author{Robert Gentleman} 27 | \note{Be very careful about automatically querying this resource. It is 28 | considered antisocial behavior by the owners. } 29 | 30 | \examples{ 31 | q1<-UniGeneQuery(c("Hs.293970", "Hs.155650")) 32 | q1 33 | if( interactive()) 34 | browseURL(q1[1]) 35 | 36 | } 37 | \keyword{interface} 38 | -------------------------------------------------------------------------------- /man/getOntology.Rd: -------------------------------------------------------------------------------- 1 | \name{getOntology} 2 | \alias{getOntology} 3 | \title{Get GO terms for a specified ontology} 4 | \description{ 5 | Find the subset of GO terms for the specified ontology, for each element 6 | of the supplied list of associations. The input list is typically from 7 | one of the chip-specific meta-data files. 8 | } 9 | \usage{ 10 | getOntology(inlist, ontology=c("MF", "BP", "CC")) 11 | } 12 | \arguments{ 13 | \item{inlist}{A list of GO associations} 14 | \item{ontology}{The name of the ontology you want returned.} 15 | } 16 | \details{ 17 | The input list should be a list of lists, each element of \code{inlist} 18 | is itself a list containing the information that maps from a specified 19 | ID (usually LocusLink) to GO information. Each element of the inner list 20 | is a list with elements \code{GOID}, \code{Ontology} and \code{Evidence}. 21 | } 22 | \value{ 23 | A list of the same length as the input list. Each element of this 24 | list will contain a vector of \code{GOID}s for those terms that match 25 | the requested ontology. 26 | } 27 | \author{R. Gentleman} 28 | 29 | \seealso{\code{\link{getEvidence}}, \code{\link{dropECode}}} 30 | 31 | \examples{ 32 | library("hgu95av2.db") 33 | bb <- hgu95av2GO[["39613_at"]] 34 | getOntology(bb) 35 | sapply(bb, function(x) x$Ontology) 36 | } 37 | \keyword{manip} 38 | -------------------------------------------------------------------------------- /man/getSeq4Acc.Rd: -------------------------------------------------------------------------------- 1 | \name{getSEQ} 2 | \alias{getGI} 3 | \alias{getSEQ} 4 | 5 | \title{Queries the NCBI database to obtain the sequence for a given 6 | GenBank Accession number} 7 | \description{ 8 | Given a GenBank Accession number, getSEQ queries the NCBI database for 9 | the nucleotide sequence. 10 | } 11 | \usage{ 12 | getGI(accNum) 13 | getSEQ(gi) 14 | } 15 | 16 | \arguments{ 17 | \item{accNum}{\code{accNum} a character string for a GenBank Accession 18 | number (i.e. M22490)} 19 | \item{gi}{\code{gi} a character string or numeric numbers for a 20 | GenBank accession number or gi number. A gi number is a series of 21 | digits that are assigned consecutively to each sequence record 22 | processed by NCBI} 23 | } 24 | \details{ 25 | The NCBI database is queried for the given GenBank Accession number to 26 | obtain the nucleotide sequence in FASTA format. The leading 27 | identification line of the sequence data is then dropped to return only 28 | the nucleotide sequence. 29 | 30 | getGI returns the gi number corresponding to a given GenBank accession 31 | number. 32 | } 33 | \value{ 34 | getSEQ returns a character string of nucleotide sequence 35 | } 36 | \references{\url{https://www.ncbi.nlm.nih.gov/entrez/query.fcgi}} 37 | \author{Jianhua Zhang} 38 | 39 | \examples{ 40 | getSEQ("M22490") 41 | } 42 | \keyword{misc} 43 | 44 | -------------------------------------------------------------------------------- /man/dropECode.Rd: -------------------------------------------------------------------------------- 1 | \name{dropECode} 2 | \alias{dropECode} 3 | \title{Drop GO labels for specified Evidence Codes} 4 | \description{ 5 | Genes are mapped to GO terms on the basis of evidence codes. In some 6 | analyses it will be appropriate to drop certain sets of annotations 7 | based on specific evidence codes. 8 | } 9 | \usage{ 10 | dropECode(inlist, code="IEA") 11 | } 12 | \arguments{ 13 | \item{inlist}{A list of GO data } 14 | \item{code}{The set of codes that should be dropped. } 15 | } 16 | \details{ 17 | A simple use of \code{\link{lapply}} and \code{\link{sapply}} to find 18 | and eliminate those terms that have the specified evidence codes. 19 | 20 | This might be used when one is using to GO to validate a sequence 21 | matching experiment (for example), then all terms whose mapping was 22 | based on sequence similarity (say ISS and IEA) should be removed. 23 | 24 | } 25 | \value{ 26 | A list of the same length as the input list retaining only those 27 | annotations whose evidence codes were not the ones in the exclusion 28 | set \code{code}. 29 | } 30 | \author{R. Gentleman} 31 | 32 | \seealso{\code{\link{getEvidence}}, \code{\link{getOntology}}} 33 | 34 | \examples{ 35 | library("hgu95av2.db") 36 | bb <- hgu95av2GO[["39613_at"]] 37 | getEvidence(bb[1:3]) 38 | cc <- dropECode(bb[1:3]) 39 | if (length(cc)) 40 | getEvidence(cc) 41 | } 42 | \keyword{manip} 43 | -------------------------------------------------------------------------------- /man/accessionToUID.Rd: -------------------------------------------------------------------------------- 1 | \name{accessionToUID} 2 | \alias{accessionToUID} 3 | \title{A function to convert accession values to NCBI UIDs.} 4 | \description{ 5 | Given one or more accession values, this function will attempt to 6 | convert them into NCBI UID values. 7 | } 8 | \usage{ 9 | accessionToUID(...,db=c("genbank","pubmed")) 10 | } 11 | \arguments{ 12 | \item{...}{Accession numbers to be transformed.} 13 | \item{db}{Which database this accession number refers to, defaults to Genbank} 14 | } 15 | \details{ 16 | Utilizes the PubMed tool esearch.fcgi to convert an accession number 17 | into a valid NCBI UID number. 18 | 19 | WARNING: The powers that be at NCBI have been known to ban the IP 20 | addresses of users who abuse their servers (currently defined as less 21 | then 2 seconds between queries). Do NOT put this function in a type 22 | loop or you may find your access revoked. 23 | } 24 | \value{ 25 | Returns either a valid NCBI UID value or NULL (if there was nothing 26 | available). 27 | } 28 | \author{Jeff Gentry} 29 | \seealso{\code{\link{pubmed}}, \code{\link[XML]{xmlTreeParse}}} 30 | \examples{ 31 | 32 | ## The two returns from genbank should be the same 33 | xdoc <- genbank("U03397",type="accession",disp="data") 34 | x <- accessionToUID("U03397",db="genbank") 35 | xdoc <- genbank(x, type="uid",disp="data") 36 | 37 | ## Can handle multiple inputs 38 | y <- accessionToUID("M16653","U892893",db="genbank") 39 | } 40 | \keyword{interface} 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /man/GO2heatmap.Rd: -------------------------------------------------------------------------------- 1 | \name{GO2heatmap} 2 | \alias{GO2heatmap} 3 | \alias{KEGG2heatmap} 4 | \alias{KEGG2heatmap,character,eSet,character-method} 5 | \alias{KEGG2heatmap,character,matrix,character-method} 6 | 7 | \title{Compute a heatmap for the specified data, for either a GO 8 | category or a KEGG pathway.} 9 | \description{ 10 | For a given GO category or KEGG pathway, all probes in the supplied 11 | data are mapped to the pathway and a heatmap is produced. 12 | } 13 | \usage{ 14 | GO2heatmap(x, eset, data, ...) 15 | KEGG2heatmap(x, eset, data, ...) 16 | } 17 | \arguments{ 18 | \item{x}{The name of the category or pathway.} 19 | \item{eset}{An \code{ExpressionSet} providing the data.} 20 | \item{data}{The name of the chip.} 21 | \item{\dots}{Additional parameters to pass to \code{heatmap}.} 22 | } 23 | \details{ 24 | For the given pathway or GO category all matching probes are 25 | determined, these are used to subset the data and \code{heatmap} is 26 | invoked on that set of data. Extra parameters can be passed through to 27 | \code{heatmap} using the \code{\dots} parameter. 28 | The \code{annotation} slot of the \code{eset} argument is used to 29 | determine the appropriate annotation data to use. 30 | } 31 | \value{ 32 | The value returned by \code{heatmap} is passed back to the user. 33 | } 34 | \author{R. Gentleman } 35 | \seealso{\code{\link{heatmap}}} 36 | \examples{ 37 | library("hgu95av2.db") 38 | data(sample.ExpressionSet) 39 | KEGG2heatmap("04810", sample.ExpressionSet, "hgu95av2") 40 | } 41 | \keyword{manip} 42 | -------------------------------------------------------------------------------- /man/ACCNUMStats.Rd: -------------------------------------------------------------------------------- 1 | \name{ACCNUMStats} 2 | \alias{ACCNUMStats} 3 | \alias{whatACC} 4 | 5 | \title{Provides statistics on the types of ids used for the ACCNUM 6 | environment of a given data package} 7 | \description{ 8 | Given a data package name, ACCNUMStats counts how many of the probe 9 | ids are mapped to GenBank Accession numbers, UniGene ids, RefSeq ids, 10 | or Image clone ids. 11 | } 12 | \usage{ 13 | ACCNUMStats(pkgName) 14 | whatACC(accs) 15 | } 16 | \arguments{ 17 | \item{pkgName}{\code{pkgName} a character string for the name of a 18 | BioC data package} 19 | \item{accs}{\code{accs} a vector of character string for the ids 20 | whose type will be determined} 21 | } 22 | \details{ 23 | The ACCNUM environment of each BioC data package contains mappings 24 | between probe ids and a set of public ids based on which mappings of 25 | probe ids to other annotation data can be obtained using public data 26 | sources. The set of ids were provided by a manufacturer or user at the 27 | time when the data package was built. The manufacturer/user provided 28 | ids can be of different types of public ids, such as GenBank Accession 29 | number, UniGene ids, etc.. 30 | 31 | ACCNUMStats counts the number of probes that are mapped to different 32 | types of public ids and have the results presented in a table. 33 | } 34 | \references{The ACCNUM environment of a platform dependent BioC data package} 35 | \author{Jianhua Zhang} 36 | \examples{ 37 | library("hgu95av2.db") 38 | ACCNUMStats("hgu95av2") 39 | } 40 | \keyword{misc} 41 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: annotate 2 | Title: Annotation for microarrays 3 | Version: 1.89.0 4 | Author: R. Gentleman 5 | Authors@R: c( 6 | person("Jeff", "Gentry", role = "aut"), 7 | person("Kritika", "Verma", role = "ctb", 8 | comment = "Converted chromLOC vignette from Sweave to RMarkdown / HTML."), 9 | person("Manvi", "Yaduvanshi", role = "ctb", 10 | comment = "Converted useDataPkgs vignette from Sweave to RMarkdown / HTML."), 11 | person("Bioconductor Package Maintainer", 12 | email = "maintainer@bioconductor.org", role = "cre")) 13 | Description: Using R enviroments for annotation. 14 | Depends: R (>= 2.10), AnnotationDbi (>= 1.27.5), XML 15 | Imports: Biobase, DBI, xtable, graphics, utils, stats, methods, 16 | BiocGenerics (>= 0.13.8), httr 17 | Suggests: hgu95av2.db, genefilter, Biostrings (>= 2.25.10), IRanges, rae230a.db, 18 | rae230aprobe, tkWidgets, GO.db, org.Hs.eg.db, org.Mm.eg.db, 19 | humanCHRLOC, Rgraphviz, RUnit, BiocStyle, knitr 20 | VignetteBuilder: knitr 21 | License: Artistic-2.0 22 | LazyLoad: yes 23 | Collate: AllGenerics.R ACCNUMStats.R Amat.R AnnMaps.R chromLocation.R 24 | compatipleVersions.R findNeighbors.R getData.R getPMInfo.R 25 | getSeq4ACC.R GOhelpers.R homoData.R html.R isValidKey.R 26 | LL2homology.R pmid2MIAME.R pubMedAbst.R query.R 27 | readGEOAnn.R serializeEnv.R blastSequences.R zzz.R 28 | test_annotate_package.R 29 | biocViews: Annotation, Pathways, GO 30 | Packaged: 2014-07-15 23:10:57 UTC; mcarlson 31 | -------------------------------------------------------------------------------- /vignettes/annotate.Rfc: -------------------------------------------------------------------------------- 1 | A RFC for dealing with annotation data. 2 | 3 | Please comment on the following proposal. 4 | 5 | In order to deal with the ever changing needs and continual evolution 6 | of genomic data the annotate package must be able to update and 7 | install annotation data. 8 | 9 | It seems that this process is not unlike the R package system and 10 | perhaps much can be gained by using features from both the package 11 | system and the data system. 12 | 13 | Functionality: 14 | 15 | annotation: A function, like data, if given no argument it lists the 16 | annotation that is available. Annotation is loaded using "data", 17 | however. 18 | There is an argument, local, if TRUE look on the local machine, if 19 | FALSE, list available annotation datasets from Bioconductor. 20 | 21 | We will use a different index file, 00Annotate.dcf in the annotate 22 | data directory to hold this listing. It isn't really data and we 23 | wouldn't really want people to get confused. 24 | 25 | install.annotation: Takes a base name (like "hgu95A") and obtains and 26 | installs a package from the bioconductor web site (using the same 27 | system of URL access as install.packages). 28 | This will download data, put it in the appropriate directory and 29 | update the index file. 30 | 31 | update.annotation: Takes a base name (like "hgu95A") and checks to see 32 | if it needs to be updated. 33 | 34 | I still need to figure out how to automagically build this stuff but 35 | would like to get comments quickly so it can be squeezed in to the 36 | release. 37 | 38 | thanks 39 | robert 40 | -------------------------------------------------------------------------------- /man/GOmnplot.Rd: -------------------------------------------------------------------------------- 1 | \name{GOmnplot} 2 | \alias{GOmnplot} 3 | \alias{KEGGmnplot} 4 | \alias{KEGGmnplot,character,eSet,character-method} 5 | \alias{KEGGmnplot,character,matrix,character-method} 6 | 7 | \title{A function to plot by group means against each other.} 8 | \description{ 9 | For a two sample comparison, as determined by \code{group}, and a 10 | specified KEGG pathway or GO category, per group means are computed 11 | and plotted against each other. 12 | } 13 | \usage{ 14 | GOmnplot(x, eset, data = "hgu133plus2", group, ...) 15 | KEGGmnplot(x, eset, data = "hgu133plus2", group, ...) 16 | } 17 | \arguments{ 18 | \item{x}{The name of the KEGG pathway or GO category.} 19 | \item{eset}{An \code{ExpressionSet} containing the data.} 20 | \item{data}{The name of the chip that was used to provide the data.} 21 | \item{group}{The variable indicating group membership, should have two 22 | different values.} 23 | \item{\dots}{Extra parameters to pass to the call to \code{plot}.} 24 | } 25 | \details{ 26 | All probes in \code{eset} that map to the given category are 27 | determined. Then per group, per probe means are computed and plotted 28 | against each other. Extra parameters can be passed to the plot 29 | function via the \code{dots} argument. 30 | } 31 | \value{ 32 | The matrix of per group means, for each probe. 33 | } 34 | \author{R. Gentleman} 35 | \seealso{ \code{\link{KEGG2heatmap}}} 36 | \examples{ 37 | library("hgu95av2.db") 38 | data(sample.ExpressionSet) 39 | KEGGmnplot("04810", sample.ExpressionSet, sample.ExpressionSet$sex, 40 | data = "hgu95av2") 41 | } 42 | \keyword{manip} 43 | -------------------------------------------------------------------------------- /man/updateSymbolsToValidKeys.Rd: -------------------------------------------------------------------------------- 1 | \name{updateSymbolsToValidKeys} 2 | \alias{updateSymbolsToValidKeys} 3 | \title{Take a list of symbols and translate them into the best possible 4 | ID for a package.} 5 | \description{ 6 | Given a list of gene symbols and a package, find a valid ID for that 7 | package. If there isn't a valid ID, then return the original symbol. 8 | } 9 | \usage{ 10 | updateSymbolsToValidKeys(symbols, pkg) 11 | } 12 | \arguments{ 13 | \item{symbols}{A character vector containing gene symbols that you 14 | wish to try and translate into valid IDs.} 15 | \item{pkg}{The package name of the chip for which we wish to validate IDs.} 16 | } 17 | \details{ 18 | This is a convenience function for getting from a possibly varied list 19 | of gene symbols mapped onto something that is a nice concrete ID such as an 20 | entrez gene ID. When such an ID cannot be found, the original symbol 21 | will come back to prevent the loss of any information. 22 | } 23 | \value{ 24 | This function returns a vector of IDs corresponding to the symbols 25 | that were input. If the symbols don't have a valid ID, then they come 26 | back instead. 27 | } 28 | \author{Marc Carlson} 29 | \seealso{\code{\link{isValidKey}}} 30 | \examples{ 31 | \dontrun{ 32 | ## one "bad" ID, one that can be mapped onto a valid ID, and a 3rd 33 | ## which already is a valid ID 34 | syms <- c("15S_rRNA_2","21S_rRNA_4","15S_rRNA") 35 | updateSymbolsToValidKeys(syms, "org.Sc.sgd") 36 | 37 | ## 3 symbols and a 4th that will NOT be valid 38 | syms <- c("MAPK11","P38B","FLJ45465", "altSymbol") 39 | updateSymbolsToValidKeys(syms, "org.Hs.eg") 40 | } 41 | } 42 | \keyword{manip} 43 | -------------------------------------------------------------------------------- /man/pm.getabst.Rd: -------------------------------------------------------------------------------- 1 | \name{pm.getabst} 2 | \alias{pm.getabst} 3 | \title{Obtain the abstracts for a set PubMed list.} 4 | \description{ 5 | The data provided by PubMed is reduced to a small set. This set is 6 | then suitable for further rendering. 7 | } 8 | \usage{ 9 | pm.getabst(geneids, basename) 10 | } 11 | \arguments{ 12 | \item{geneids}{The identifiers used to find Abstracts } 13 | \item{basename}{The base name of the annotation package to use. } 14 | } 15 | \details{ 16 | We rely on the annotation in the package associated with the 17 | \code{basename} to provide PubMed identifiers for the genes described by 18 | the gene identifiers. 19 | With these in hand we then use the \code{pmfetch} utility to download the 20 | PubMed abstracts in XML form. These are then translated (transformed) to a 21 | shorter version containing a small subset of the data provided by PubMed. 22 | 23 | This function has the side effect of creating an environment in 24 | \code{.GlobalEnv} that contains the mapping for the requested data. 25 | This is done for efficiency -- so we don't continually read in the data 26 | when there are many different queries to be performed. 27 | } 28 | \value{ 29 | A list of lists containing objects of class \code{pubMedAbst}. 30 | There will be one element of the list for each identifier. 31 | Each of these elements is a list containing one abstract (of 32 | class \code{pubMedAbst} for each PubMed identifier associated with 33 | the gene identifier. 34 | } 35 | \author{Robert Gentleman} 36 | \seealso{\code{\link{pm.abstGrep}}, \code{\link{pm.titles}}} 37 | \examples{ 38 | library("hgu95av2.db") 39 | hoxa9 <- "37806_at" 40 | absts <- pm.getabst(hoxa9, "hgu95av2") 41 | } 42 | \keyword{manip} 43 | -------------------------------------------------------------------------------- /R/LL2homology.R: -------------------------------------------------------------------------------- 1 | LL2homology <- function(homoPkg, llids){ 2 | stop("This function has been DEFUNCT.") 3 | if(!require(homoPkg, character.only = TRUE)) 4 | stop("Package homology not available!") 5 | 6 | hgids <- mget(as.character(llids), envir = get(paste(homoPkg, 7 | "LL2HGID", sep = "")), ifnotfound = NA) 8 | 9 | #if(length(hgids) == 1){ 10 | # return(HGID2homology(hgids[[1]])) 11 | #} 12 | 13 | return(sapply(hgids, HGID2homology, homoPkg = homoPkg)) 14 | } 15 | 16 | ACC2homology <- function(accs, homoPkg){ 17 | stop("This function has been DEFUNCT.") 18 | if(!require(homoPkg, character.only = TRUE)) 19 | stop(paste("Package", homoPkg, "not available!")) 20 | 21 | hgids <- mget(as.character(accs), envir = get(paste(homoPkg, "ACC2HGID", 22 | sep = ""), pos = match(paste("package:", homoPkg, sep = ""), 23 | search())), ifnotfound = NA) 24 | return(sapply(hgids, HGID2homology, homoPkg)) 25 | } 26 | 27 | HGID2homology <- function(hgid, homoPkg){ 28 | stop("This function has been DEFUNCT.") 29 | homoGenes <- list() 30 | 31 | # hgid may be of length greater than 1 as a LL id may be mapped to 32 | # more than 2 HGIDs 33 | for(i in hgid){ 34 | options(show.error.messages = FALSE) 35 | tryMe <- try(get(as.character(i), 36 | envir = get(paste(homoPkg, "DATA", sep = ""), 37 | pos = match(paste("package:", homoPkg, sep = ""), 38 | search())))) 39 | options(show.error.messages = TRUE) 40 | if(!inherits(tryMe, "try-error")){ 41 | homoGenes[[length(homoGenes) + 1]] <- tryMe 42 | } 43 | } 44 | return(homoGenes) 45 | } 46 | -------------------------------------------------------------------------------- /R/getData.R: -------------------------------------------------------------------------------- 1 | ##copyright 2002 R. Gentleman, all rights reserved 2 | ##helper functions for dealing with data environments (soon to be hash 3 | ##tables) 4 | 5 | ## JZ added lookUp and modified the other functions so that they all 6 | ## use lookUp. Nov. 6, 2003. 7 | lookUp <- function(x, data, what, load=FALSE) { 8 | if(length(x) < 1){ 9 | stop("No keys provided") 10 | } 11 | mget(x, envir=getAnnMap(what, chip=data, load=load), 12 | ifnotfound=NA) 13 | } 14 | 15 | getGO <- function(x, data) { 16 | lookUp(x, data, "GO") 17 | } 18 | 19 | getGOdesc <- function(x, which = c("BP", "CC", "MF", "ANY")) { 20 | which <- match.arg(which) 21 | options(show.error.messages = FALSE) 22 | ans <- try(lookUp(x, "GO", "TERM", load=TRUE)) 23 | options(show.error.messages = TRUE) 24 | onts <- sapply(ans, Ontology) 25 | if(inherits(ans, "try-error")){ 26 | warning(paste("Invalid GO term", x)) 27 | return(NULL) 28 | }else{ 29 | if(which == "ANY"){ 30 | return(ans) 31 | }else{ 32 | ans <- ans[onts %in% which] 33 | if(length(ans) == 0){ 34 | return(NULL) 35 | }else{ 36 | return(ans) 37 | } 38 | } 39 | } 40 | } 41 | 42 | getSYMBOL <- function(x, data) { 43 | unlist(lookUp(x, data, "SYMBOL")) 44 | } 45 | 46 | getPMID <- function(x, data) { 47 | lookUp(x, data, "PMID") 48 | } 49 | 50 | getEG <- function(x, data) { 51 | unlist(lookUp(x, data, "ENTREZID")) 52 | } 53 | 54 | # This function needs to be updated when new annotation items are 55 | # added to the data packages 56 | getUniqAnnItem <- function(){ 57 | return(c("ACCNUM", "ENTREZID", "GENENAME", "SYMBOL", "MAP", 58 | "GRIF", "SUMFUNC", "NM", "NP")) 59 | } 60 | -------------------------------------------------------------------------------- /man/serializeEnv.Rd: -------------------------------------------------------------------------------- 1 | \name{serializeEnv} 2 | \alias{serializeEnv} 3 | \alias{serializeDataPkgEnvs} 4 | \title{A Function To Serialize Environment} 5 | \description{ 6 | This function will serialize an environment in R to an XML format 7 | stored in a compressed file. 8 | } 9 | \usage{ 10 | serializeEnv(env, fname) 11 | serializeDataPkgEnvs(pkgDir) 12 | } 13 | \arguments{ 14 | \item{env}{The name of the environment to serialize.} 15 | \item{fname}{The name of the output file.} 16 | \item{pkgDir}{The directory where a data package is} 17 | } 18 | \details{ 19 | The environment is converted into an XML format and then outputted to 20 | a gzipped file (using \code{\link{gzfile}}). The values in the 21 | environment are serialized (using \code{\link{serialize}}) in ASCII 22 | format although the keys are stored in plain text. 23 | 24 | The format of the XML is very simple, with the primary block being 25 | \code{values}, which contain blocks of \code{entries}, and each entry 26 | having a \code{key} and a \code{value}. For instance, if we had an 27 | environment with one value in it, the character \code{c} with a key 28 | of \code{a} (e.g. \code{assign("a", "c", env=foo)}), this is what the 29 | output would look like. 30 | 31 | \preformatted{ 32 | 33 | 34 | 35 | 36 | a 37 | 38 | 39 | A\n2\n131072\n66560\n1040\n1\n1033\n1\nc\n 40 | 41 | 42 | 43 | } 44 | } 45 | \author{Jeff Gentry} 46 | \seealso{\code{\link{gzfile}}, 47 | \code{\link{serialize}}} 48 | \examples{ 49 | z <- new.env() 50 | assign("a", 1, env=z) 51 | assign("b", 2, env=z) 52 | assign("c", 3, env=z) 53 | serializeEnv(z, tempfile()) 54 | } 55 | \keyword{utilities} 56 | -------------------------------------------------------------------------------- /R/getSeq4ACC.R: -------------------------------------------------------------------------------- 1 | getGI <- function(accNum){ 2 | # Get the gi based on the Accession number 3 | gi <- readLines(paste("https://www.ncbi.nlm.nih.gov/entrez/", 4 | "query.fcgi?db=Nucleotide&cmd=search&term=", 5 | accNum, sep = "")) 6 | gi <- gsub(paste(".*gi\\|([0-9]+)\\|[a-zA-Z0-9]+\\|", accNum, 7 | ".*", sep = ""), "\\1", 8 | gi[grep(paste("gi\\|.*\\|[a-zA-Z0-9]+\\|", accNum, ".*", 9 | sep = ""), gi)]) 10 | if(length(gi) == 0){ 11 | stop(paste("Can't obtain a gi number for", accNum)) 12 | }else{ 13 | return(gi) 14 | } 15 | } 16 | 17 | 18 | getSEQ <- function(gi){ 19 | 20 | ## Old stuff left just in case NCBI changes things back on us (10/15/10) 21 | ## seq <- readLines(paste("http://www.ncbi.nlm.nih.gov/entrez/batchseq.cgi?", 22 | ## "cmd=&txt=on&save=&cfm=&list_uids=", gi, "&", 23 | ## "db=nucleotide&extrafeat=16&term=&view=fasta&", 24 | ## "dispmax=20&SendTo=t&__from=&__to=&__strand=", sep = "")) 25 | 26 | seq <- readLines(paste("https://www.ncbi.nlm.nih.gov/entrez/eutils/", 27 | "efetch.fcgi?db=nucleotide&rettype=fasta&id=",gi, 28 | sep = "")) 29 | 30 | if(length(seq) == 0){ 31 | stop("Failed to extract the sequence") 32 | }else{ 33 | return(paste(seq[2:length(seq)], sep = "", collapse = "")) 34 | } 35 | } 36 | 37 | ## better use reverseComplement from Biostrings 38 | 39 | #revBase <- function(x){ 40 | # tot <- which(x == "A") 41 | # tog <- which(x == "C") 42 | # toa <- which(x == "T") 43 | # toc <- which(x == "G") 44 | # x[tot] <- "T" 45 | # x[toa] <- "A" 46 | # x[toc] <- "C" 47 | # x[tog] <- "G" 48 | # x 49 | # } 50 | 51 | #revString <- function(x) 52 | # sapply(lapply(lapply(strsplit(x, NULL), rev), revBase), paste, collapse="") 53 | 54 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | Here is a start of a list of ideas: 2 | Please feel free to add to it or to start implementing stuff: 3 | 4 | 1) The file, data/mgu74v2id 5 | contains an environment that has the affy number linked to (the 6 | genbank accession number) 7 | 8 | It would be nice to be able to select a set of genes and 9 | get a webpage set up with the Affy #, and links to various 10 | databases (genbank in particular). 11 | 12 | 2) We need to decide which databases we will store in data: 13 | -functional groups 14 | -basic mappings 15 | -proteins 16 | -chromosome mapping 17 | 18 | 3) A set of tools that let us select data from a data frame (such as the 19 | output of dChip) according to the data in these files. 20 | 21 | 4) Do we want to explore a Tcl/Tk interface to selection? 22 | 23 | 5) Need to store the data files compressed (.gz) and then use the appropriate 24 | readers. What would be even better would be to have stored binary forms 25 | of environments (or memory-mapped environments). 26 | 27 | 6) Now have the ability to load up locus link webpages, but we 28 | would like to be able to do keyword searchs and to interpret the 29 | results directly in R. 30 | 31 | 7) We need some form of class structure that will allow us to 32 | manipulate the objects in a more uniform fashion. 33 | 34 | 8) We need to design the interface so it can be handled either by 35 | environments/hash tables or by a database -- either Access or 36 | Postgres 37 | 38 | --- 39 | some comments after 26 oct meetings 40 | 41 | options-like structure (or object-like, e.g., lm.object) for htmlTable 42 | and htmlPage so that htmlTable( listOfContent, listOfMarkup, tableOptions ) 43 | and htmlPage( listOfContent, listOfMarkup, pageOptions ) have good defaults. 44 | constraints on content elements of listsOfContent will be needed -- 45 | same number of elements per list element, e.g. role of the container 46 | class should be considered 47 | -------------------------------------------------------------------------------- /vignettes/annotate.bib: -------------------------------------------------------------------------------- 1 | @Article{PubMedRnews, 2 | author = {R. Gentleman and J. Gentry}, 3 | title = {Querying PubMed}, 4 | journal = {R News}, 5 | year = 2002, 6 | volume = 2, 7 | number = 2, 8 | pages = {28--31}, 9 | month = {June}, 10 | url = {http://CRAN.R-project.org/doc/Rnews/}, 11 | } 12 | 13 | 14 | @Article{PubMedVignette, 15 | author = {J. Gentry}, 16 | title = {HowTo: Automated Querying of PubMed Data}, 17 | journal = {Bioconductor Vignettes}, 18 | year = {2004}, 19 | url = {http://www.bioconductor.org} 20 | } 21 | 22 | 23 | @Article{ChromLocVignette, 24 | author = {J. Gentry}, 25 | title = {HowTo: Build and use chromosomal information}, 26 | journal = {Bioconductor Vignettes}, 27 | year = {2004}, 28 | url = {http://www.bioconductor.org} 29 | } 30 | 31 | @Article{HTMLVignette, 32 | author = {R. Gentleman}, 33 | title = {HowTo: get pretty html output for my gene list}, 34 | journal = {Bioconductor Vignettes}, 35 | year = {2003}, 36 | url = {http://www.bioconductor.org} 37 | } 38 | 39 | @Article{XML, 40 | author = {Duncan {Temple Lang}}, 41 | title = {Tools for parsing and generating XML within R and S-Plus.}, 42 | journal = {CRAN}, 43 | year = {2000}, 44 | url = {http://www.omegahat.org/RSXML}, 45 | } 46 | 47 | @Article{GOA, 48 | author = {E. Camon and M. Magrane and D. Barrell and V. Lee and 49 | E. Dimmer and J. Maslen, 50 | D. Binns and N. Harte and R. Lopez and R. Apweiler}, 51 | year = {2004}, 52 | title = {The {G}ene {O}ntology Annotation ({GOA}) Database: sharing 53 | knowledge in {U}niprot with {G}ene {O}ntology}, 54 | journal = {Nucleic Acids Research}, 55 | volume = {32}, 56 | issue = {1}, 57 | pages = {D262--D266} 58 | } 59 | 60 | @Article{GO, 61 | author = {{The Gene Ontology Consortium}}, 62 | title = {{G}ene {O}ntology: tool for the unification of biology}, 63 | journal = {Nature Genetics}, 64 | year = {2000}, 65 | volume = {25}, 66 | pages = {25--29} 67 | } 68 | -------------------------------------------------------------------------------- /man/pubmed.Rd: -------------------------------------------------------------------------------- 1 | \name{pubmed} 2 | \alias{pubmed} 3 | 4 | \title{A function to open the browser to Pubmed with the selected gene. } 5 | \description{ 6 | Given a vector of Pubmed identifiers or accession numbers, the user 7 | can either have a browser display a URL showing a Pubmed query for 8 | those identifiers, or a XMLdoc object with the same data. 9 | } 10 | \usage{ 11 | pubmed(...,disp=c("data","browser"), type=c("uid","accession"), 12 | pmaddress=.efetch("PubMed", disp, type)) 13 | } 14 | \arguments{ 15 | \item{...}{Vectorized set of Pubmed ID's} 16 | \item{disp}{Either "Data" or "Browser" (default is data). Data 17 | returns a XMLDoc, while Browser will display information in the 18 | user's browser.} 19 | \item{type}{Denotes whether the arguments are accession numbers or 20 | UIDS. Defaults to uids.} 21 | \item{pmaddress}{Specific path to the pubmed efetch engine from the 22 | NCBI website.} 23 | } 24 | \details{ 25 | A simple function to retrieve Pubmed data given a specific ID, either 26 | through XML or through a web browser. This function will accept 27 | either pubmed accession numbers or NCBI UIDs (defined as a Pubmed 28 | ID or a Medline ID) - although the types must not be mixed in a single 29 | call. 30 | 31 | WARNING: The powers that be at NCBI have been known to ban the IP 32 | addresses of users who abuse their servers (currently defined as 33 | less then 2 seconds between queries). Do NOT put this function in 34 | a tight loop or you may find your access revoked. 35 | } 36 | \value{ 37 | If the option "data" is used, an object of type XMLDoc is returned, 38 | unless there was an error with the query in which case an object of 39 | type try-error is returned. 40 | 41 | If the option "browser" is used, nothing is returned. 42 | } 43 | 44 | \author{R. Gentleman } 45 | 46 | \seealso{\code{\link{genbank}}, \code{\link[XML]{xmlTreeParse}}} 47 | 48 | \examples{ 49 | if( interactive() ) 50 | opts <- c("data","browser") else 51 | opts <- "data" 52 | for (dp in opts) 53 | pubmed("11780146","11886385","11884611",disp=dp) 54 | } 55 | \keyword{ interface } 56 | -------------------------------------------------------------------------------- /R/serializeEnv.R: -------------------------------------------------------------------------------- 1 | serializeEnv <- function(env, fname) { 2 | if (!is.character(fname)) 3 | stop("conn should be a character name of file for storage") 4 | 5 | if (is.character(env)) { 6 | cmd <- paste("envList <- as.list(", env, ")") 7 | eval(parse(text=cmd)) 8 | } 9 | else if (is.environment(env)) 10 | envList <- as.list(env) 11 | else 12 | stop("invalid 'env' argument") 13 | 14 | keys <- names(envList) 15 | 16 | outFile <- gzfile(fname) 17 | open(outFile, open="wb") 18 | 19 | cat("\n", 20 | "", 21 | file=outFile) 22 | for (i in seq(along=envList)) { 23 | cat("\n\n\t\n\t\t", 25 | "\n\t\t\n\t\t\n\t\t\n\t\t\n\t", sep="", file=outFile, append=TRUE) 28 | } 29 | cat("\n", file=outFile, append=TRUE) 30 | 31 | close(outFile) 32 | } 33 | 34 | serializeDataPkgEnvs <- function(pkgDir) { 35 | pkg <- basename(pkgDir) 36 | require(pkg, character.only=TRUE) || stop("data package ", 37 | pkg, " not installed") 38 | 39 | cDir <- getwd() 40 | on.exit(setwd(cDir), add=TRUE) 41 | setwd(pkgDir) 42 | 43 | if (! file.exists("inst")) 44 | if (!dir.create("inst")) 45 | stop("Failed to create inst for ", pkgDir) 46 | if (! file.exists(file.path("inst", "gdbm"))) 47 | if (!dir.create(file.path("inst", "gdbm"))) 48 | stop("Failed to create inst/gdbm for ", pkgDir) 49 | setwd("inst/gdbm") 50 | 51 | dataSets <- ls(paste("package", pkg, sep=":")) 52 | if (length(dataSets) == 0) 53 | return(0) 54 | 55 | for (i in seq(along=dataSets)) { 56 | cmd <- paste("is.environment(", dataSets[i], ")") 57 | if (eval(parse(text=cmd))) { 58 | print(paste("Converting", dataSets[i])) 59 | serializeEnv(dataSets[i], paste(dataSets[i], ".xml.gz", sep="")) 60 | } 61 | else 62 | print(paste(dataSets[i], "is not an environment, skipping.")) 63 | } 64 | NULL 65 | } 66 | -------------------------------------------------------------------------------- /man/isValidkey.Rd: -------------------------------------------------------------------------------- 1 | \docType{methods} 2 | \name{isValidKey} 3 | \alias{isValidKey} 4 | \alias{allValidKeys} 5 | \alias{isValidKey,character,character-method} 6 | \alias{isValidKey,character,OrgDb-method} 7 | \alias{allValidKeys,character-method} 8 | \alias{allValidKeys,OrgDb-method} 9 | \title{Get or verify valid IDs for a package or OrgDb object.} 10 | \description{ 11 | These functions either verify that a list of IDs are primary and valid IDs 12 | for a package, or else return all the valid primary IDs from a package 13 | } 14 | \usage{ 15 | isValidKey(ids, pkg) 16 | allValidKeys(pkg) 17 | 18 | \S4method{isValidKey}{character,character}(ids, pkg) 19 | 20 | \S4method{isValidKey}{character,OrgDb}(ids, pkg) 21 | 22 | \S4method{allValidKeys}{character}(pkg) 23 | 24 | \S4method{allValidKeys}{OrgDb}(pkg) 25 | 26 | } 27 | \arguments{ 28 | \item{ids}{A character vector containing IDs that you wish to validate.} 29 | \item{pkg}{Either the name of an installed annotation package (e.g., 30 | "org.Hs.eg.db"), or an uninstalled annotation package, e.g., from AnnotationHub.} 31 | } 32 | \details{ 33 | Every package has some kind of ID that is central to that package. 34 | For chip-based packages this will be some kind of probe, and for the 35 | organism based packages it will be something else (usually an entrez 36 | gene ID). isValidKey takes a list of IDs and tests to see whether or 37 | not they are present (valid) in a particular package. allValidKeys 38 | simply returns all the valid primary IDs for a package. 39 | } 40 | \value{ 41 | \code{isValidKey} returns a vector of TRUE or FALSE values corresponding to whether or not the 42 | ID is valid. 43 | 44 | \code{allValidKeys} returns a vector of all the valid primary IDs. 45 | } 46 | \author{Marc Carlson} 47 | \seealso{\code{\link{updateSymbolsToValidKeys}}} 48 | \examples{ 49 | \dontrun{ 50 | ## 2 bad IDs and a 3rd that will be valid 51 | ids <- c("15S_rRNA_2","21S_rRNA_4","15S_rRNA") 52 | isValidKey(ids, "org.Sc.sgd") 53 | 54 | ## 2 good IDs and a 3rd that will not be valid 55 | ids <- c("5600","7531", "altSymbol") 56 | isValidKey(ids, "org.Hs.eg") 57 | 58 | ## Get all the valid primary id from org.Hs.eg.db 59 | allValidKeys("org.Hs.eg") 60 | } 61 | } 62 | \keyword{manip} 63 | -------------------------------------------------------------------------------- /man/getGOTerm.Rd: -------------------------------------------------------------------------------- 1 | \name{getGOTerm} 2 | \alias{getGOTerm} 3 | \alias{getGOParents} 4 | \alias{getGOChildren} 5 | \alias{getGOOntology} 6 | 7 | \title{Functions to Access GO data.} 8 | \description{ 9 | These functions provide access to data in the GO package. The data are 10 | assembled from publically available data from the Gene Ontology 11 | Consortium (GO), \url{www.go.org}. Given a list of GO identifiers they 12 | access the children (more specific terms), the parents (less specific 13 | terms) and the terms themselves. 14 | } 15 | \usage{ 16 | getGOTerm(x) 17 | getGOParents(x) 18 | getGOChildren(x) 19 | getGOOntology(x) 20 | } 21 | \arguments{ 22 | \item{x}{A character vector of valid GO identifiers. } 23 | } 24 | \details{ 25 | GO consists of three (soon to be more) specific hierarchies: Molecular 26 | Function (MF), Biological Process (BP) and Cellular Component 27 | (CC). For more details consult the GO website. For each GO identifier 28 | each of these three hierarchies is searched and depending on the 29 | function called the appropriate values are obtained and returned. 30 | 31 | It is possible for a GO identifier to have no children or for it to 32 | have no parents. However, it must have a term associated with it. 33 | } 34 | \value{ 35 | A list of the same length as \code{x}. 36 | The list contains one entry for each element of \code{x}. That entry 37 | is itself a list. With one component named \code{Ontology} which 38 | has as its value one of MF, BP or CC. The second component has the 39 | same name as the suffix of the call, i.e. Children, Parents, or Term. 40 | If there was no match in any of the ontologies then a length zero list 41 | is returned for that element of \code{x}. 42 | 43 | For \code{getGOOntology} a vector of categories (the names of which 44 | are the original GO term names). Elements of this list that are 45 | \code{NA} indicate term names for which there is no category (and 46 | hence they are not really term names). 47 | } 48 | \references{The Gene Ontology Consortium} 49 | 50 | \author{R. Gentleman} 51 | 52 | \examples{ 53 | library("GO.db") 54 | 55 | sG <- sample(keys(GO.db, "GOID"), 8) 56 | 57 | gT <- getGOTerm(sG) 58 | gP <- getGOParents(sG) 59 | gC <- getGOChildren(sG) 60 | gcat <- getGOOntology(sG) 61 | 62 | } 63 | \keyword{manip} 64 | -------------------------------------------------------------------------------- /man/getAnnMap.Rd: -------------------------------------------------------------------------------- 1 | \name{getAnnMap} 2 | \alias{getAnnMap} 3 | 4 | \title{Get annotation map} 5 | \description{ 6 | This function retrieves a map object from an annotation data 7 | package. It is intended to serve as a common interface for 8 | obtaining map objects from both SQLite-based and environment-based 9 | annotation data packages. 10 | } 11 | \usage{ 12 | getAnnMap(map, chip, load = TRUE, type = c("db", "env")) 13 | } 14 | 15 | \arguments{ 16 | \item{map}{a string specifying the name of the map to retrieve. For 17 | example, \code{"ENTREZID"} or \code{"GO"}} 18 | \item{chip}{a string describing the chip or genome} 19 | \item{load}{a logical value. When \code{TRUE}, \code{getAnnMap} 20 | will try to load the annotation data package if it is not 21 | already attached.} 22 | \item{type}{a character vector of one or more annotation data 23 | package types. The currently supported types are \code{"db"} and 24 | \code{"env"}. If \code{load} is \code{TRUE}, you can specify both 25 | \code{"db"} and \code{"env"} and the order will determine which 26 | type is tried first. This provides a fall-back mechanism when the 27 | preferred annotation data package type is not available. If 28 | \code{type} is missing, then the first matching annotation package 29 | found in the search path will be used, and then the default value 30 | of \code{type} takes over.} 31 | } 32 | \details{ 33 | \code{getAnnMap} uses the search path (see \code{search}) to find an 34 | appropriate annotation data package; when called with 35 | \code{chip="hgu95av2"}, the function will use the first hgu95av2 36 | package on the search path whether it be db or environment-based. If 37 | \code{load=TRUE} and no suitable package is found on the search path, 38 | then the function will attempt to load an appropriate package. The 39 | \code{type} argument is used to determine which type of package (db or 40 | env) is loaded first. 41 | } 42 | \value{ 43 | If \code{type} is \code{"db"}, an S4 object representing the 44 | requested map. If \code{type} is \code{"env"}, an R 45 | \code{environment} object representing the requested map. 46 | } 47 | 48 | \author{Seth Falcon} 49 | 50 | \examples{ 51 | map <- getAnnMap("ENTREZID", "hgu95av2", load=TRUE, type=c("env", "db")) 52 | class(map) 53 | } 54 | 55 | \keyword{manip} 56 | -------------------------------------------------------------------------------- /man/genbank.Rd: -------------------------------------------------------------------------------- 1 | \name{genbank} 2 | \alias{genbank} 3 | \title{A function to open the browser to Genbank with the selected gene. } 4 | \description{ 5 | Given a vector of Genbank accession numbers or NCBI UIDs, the user can 6 | either have a browser display a URL showing a Genbank query for those 7 | identifiers, or a XMLdoc object with the same data. 8 | } 9 | \usage{ 10 | genbank(...,disp=c("data","browser"), type=c("accession","uid"), 11 | pmaddress=.efetch("gene", disp, type)) 12 | } 13 | \arguments{ 14 | \item{...}{Vectorized set of Genbank accession numbers or NCBI UIDs} 15 | \item{disp}{Either "Data" or "Browser" (default is data). Data 16 | returns a XMLDoc, while Browser will display information in the 17 | user's browser.} 18 | \item{type}{Denotes whether the arguments are accession numbers or 19 | UIDS. Defaults to accession values.} 20 | \item{pmaddress}{Specific path to the pubmed efetch engine from the 21 | NCBI website.} 22 | 23 | } 24 | \details{ 25 | A simple function to retrieve Genbank data given a specific ID, either 26 | through XML or through a web browser. This function will accept 27 | either Genbank accession numbers or NCBI UIDs (defined as a Pubmed 28 | ID or a Medline ID) - although the types must not be mixed in a single 29 | call. 30 | 31 | WARNING: The powers that be at NCBI have been known to ban the IP 32 | addresses of users who abuse their servers (currently defined as less 33 | then 2 seconds between queries). Do NOT put this function in a tight 34 | loop or you may find your access revoked. 35 | } 36 | \value{ 37 | If the option "data" is used, an object of type XMLDoc is returned, 38 | unless there was an error with the query in which case an object of 39 | type try-error is returned. 40 | 41 | If the option "browser" is used, nothing is returned. 42 | } 43 | 44 | \author{R. Gentleman } 45 | 46 | \seealso{\code{\link{pubmed}}, \code{\link[XML]{xmlTreeParse}}} 47 | 48 | \examples{ 49 | ## Use UIDs to get data in both browser & data forms 50 | 51 | if ( interactive() ) { 52 | disp <- c("data","browser") 53 | } else { 54 | disp <- "data" 55 | } 56 | 57 | for (dp in disp) 58 | genbank("12345","9997",disp=dp,type="uid") 59 | 60 | ## Use accession numbers to retrieve browser info 61 | if ( interactive() ) 62 | genbank("U03397","AF030427",disp="browser") 63 | } 64 | \keyword{interface } 65 | -------------------------------------------------------------------------------- /man/HTMLPage-class.Rd: -------------------------------------------------------------------------------- 1 | \name{HTMLPage-class} 2 | \docType{class} 3 | \alias{HTMLPage-class} 4 | \alias{HTMLPage} 5 | \alias{FramedHTMLPage} 6 | \alias{FramedHTMLPage-class} 7 | \alias{fileName} 8 | \alias{mainPage} 9 | \alias{sidePage} 10 | \alias{pageText} 11 | \alias{toFile} 12 | \alias{topPage} 13 | \alias{pageTitle} 14 | \alias{HTMLPage,HTMLPage-method} 15 | \alias{FramedHTMLPage,HTMLPage-method} 16 | \alias{fileName,HTMLPage-method} 17 | \alias{mainPage,HTMLPage-method} 18 | \alias{sidePage,HTMLPage-method} 19 | \alias{pageText,HTMLPage-method} 20 | \alias{toFile,HTMLPage-method} 21 | \alias{topPage,HTMLPage-method} 22 | \alias{pageTitle,HTMLPage-method} 23 | \alias{show,HTMLPage-method} 24 | \alias{initialize,FramedHTMLPage-method} 25 | \alias{mainPage,FramedHTMLPage-method} 26 | \alias{show,FramedHTMLPage-method} 27 | \alias{sidePage,FramedHTMLPage-method} 28 | \alias{toFile,FramedHTMLPage-method} 29 | \alias{topPage,FramedHTMLPage-method} 30 | \title{Classes to represent HTML pages} 31 | \description{Class \code{HTMLPage} and \code{FramedHTMLPage} are a pair 32 | of experimental classes used to explore concepts of representing HTML 33 | pages using S4 objects.} 34 | \section{Slots}{ 35 | \describe{ 36 | \item{\code{fileName}:}{Object of class \code{"character"} The 37 | filename of the HTML page} 38 | \item{\code{pageText}:}{Object of class \code{"character"} The text 39 | of the HTML page} 40 | \item{\code{pageTitle}:}{Object of class \code{"character"} The 41 | title of the HTML page} 42 | \item{\code{topPage}:}{Object of class \code{"HTMLPage"} The header 43 | page for a FramedHTMLPage} 44 | \item{\code{sidePage}:}{Object of class \code{"HTMLPage"} The side 45 | index page for a FramedHTMLPage} 46 | \item{\code{mainPage}:}{Object of class \code{"HTMLPage"} The 47 | primary page for a FramedHTMLPage} 48 | } 49 | } 50 | 51 | \section{Methods}{ 52 | \describe{ 53 | \item{show}{\code{signature(object = "HTMLPage")}: Describes 54 | information about the page} 55 | \item{fileName}{\code{signature(object = "HTMLPage")}: Gets the 56 | fileName slot} 57 | \item{pageText}{\code{signature(object = "HTMLPage")}: Gets the 58 | pageText slot} 59 | \item{pageTitle}{\code{signature(object = "HTMLPage")}: Gets the 60 | pageTitle slot} 61 | \item{toFile}{\code{signature(object = "HTMLPage")}: Writes the page 62 | out to the file designated by the fileName slot} 63 | } 64 | } 65 | \author{Jeff Gentry} 66 | \note{ 67 | These classes are currently experimental. 68 | 69 | FramedHTMLPage is modeled after the framing layout of the Bioconductor 70 | website (www.bioconductor.org). 71 | } 72 | 73 | \examples{ 74 | ##---- Should be DIRECTLY executable !! ---- 75 | } 76 | \keyword{classes} 77 | -------------------------------------------------------------------------------- /man/readGEOAnn.Rd: -------------------------------------------------------------------------------- 1 | \name{readGEOAnn} 2 | \alias{readGEOAnn} 3 | \alias{readIDNAcc} 4 | \alias{getGPLNames} 5 | \alias{getSAGEFileInfo} 6 | \alias{getSAGEGPL} 7 | \alias{readUrl} 8 | \title{Function to extract data from the GEO web site} 9 | \description{ 10 | Data files that are available at GEO web site are identified by GEO 11 | accession numbers. Given the url for the CGI script at GEO and 12 | a GEO accession number, the functions extract data from the web site 13 | and returns a matrix containing the data. 14 | } 15 | \usage{ 16 | readGEOAnn(GEOAccNum, url = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?") 17 | readIDNAcc(GEOAccNum, url = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?") 18 | getGPLNames(url ="https://www.ncbi.nlm.nih.gov/geo/query/browse.cgi?") 19 | getSAGEFileInfo(url = 20 | "https://www.ncbi.nlm.nih.gov/geo/query/browse.cgi?view=platforms&prtype=SAGE&dtype=SAGE") 21 | getSAGEGPL(organism = "Homo sapiens", enzyme = c("NlaIII", "Sau3A")) 22 | readUrl(url) 23 | } 24 | \arguments{ 25 | \item{url}{\code{url} the url for the CGI script at GEO} 26 | \item{GEOAccNum}{\code{GEOAccNum} a character string for the GEO 27 | accession number of a desired file (e. g. GPL97)} 28 | \item{organism}{\code{organism} a character string for the name of the 29 | organism of interests} 30 | \item{enzyme}{\code{enzyme} a character string that can be eighter 31 | NlaII or Sau3A for the enzyme used to create SAGE tags} 32 | } 33 | \details{ 34 | \code{url} is the CGI script that processes user's 35 | request. \code{\link{readGEOAnn}} invokes the CGI by passing a GEO 36 | accession number and then processes the data file obtained. 37 | 38 | \code{\link{readIDNAcc}} calls \code{\link{readGEOAnn}} to read the 39 | data and the extracts the columns for probe ids and accession numbers. 40 | The \code{GEOAccNum} has to be the id for an Affymetrix chip. 41 | 42 | \code{\link{getGPLNames}} parses the html file that lists GEO 43 | accession numbers and descriptions of the array represented by the 44 | corresponding GEO accession numbers. 45 | } 46 | \value{ 47 | Both \code{\link{readGEOAnn}} and \code{\link{readIDNAcc}} return a 48 | matrix. 49 | 50 | \code{\link{getGPLNames}} returns a named vector of the names of 51 | commercial arrays. The names of the vector are the corresponding GEO 52 | accession number. 53 | } 54 | \references{\url{www.ncbi.nlm.nih.gov/geo}} 55 | \author{Jianhua Zhang} 56 | 57 | \examples{ 58 | # Get array names and GEO accession numbers 59 | #geoAccNums <- getGPLNames() 60 | # Read the annotation data file for HG-U133A which is GPL96 based on 61 | # examining geoAccNums 62 | #temp <- readGEOAnn(GEOAccNum = "GPL96") 63 | #temp2 <- readIDNAcc(GEOAccNum = "GPL96") 64 | } 65 | \keyword{manip} 66 | 67 | -------------------------------------------------------------------------------- /man/homoData-class.Rd: -------------------------------------------------------------------------------- 1 | \name{homoData-class} 2 | \docType{class} 3 | \alias{homoData-class} 4 | \alias{homoData} 5 | \alias{homoLL} 6 | \alias{homoOrg} 7 | \alias{homoType} 8 | \alias{homoURL} 9 | \alias{homoACC} 10 | \alias{homoHGID} 11 | %\alias{show} 12 | \alias{homoPS} 13 | \alias{orgNameNCode} 14 | \alias{homoPS,homoData-method} 15 | \alias{homoLL,homoData-method} 16 | \alias{homoOrg,homoData-method} 17 | \alias{homoType,homoData-method} 18 | \alias{homoURL,homoData-method} 19 | \alias{homoACC,homoData-method} 20 | \alias{homoHGID,homoData-method} 21 | \alias{show,homoData-method} 22 | \title{Class "homoData"} 23 | \description{A class to present data for HomologGene data of a matching 24 | sequence} 25 | \section{Objects from the Class}{ 26 | Objects can be created by calls of the form \code{new("homoData", ...)}. 27 | } 28 | \section{Slots}{ 29 | \describe{ 30 | \item{\code{homoOrg}:}{Object of class \code{"character"} the 31 | scientific name of the organism of interest} 32 | \item{\code{homoLL}:}{Object of class \code{"numeric"} the LocusLink 33 | id of the gene of interest} 34 | \item{\code{homoType}:}{Object of class \code{"character"} the type of 35 | similarity. Valid values include B - a recipiprocal best best 36 | between 3 or more organisms, b - a reciprocal best match, and c - 37 | a curated homology relationship} 38 | \item{\code{homoPS}:}{Object of class \code{"numeric"} percent 39 | similarity value} 40 | \item{\code{homoURL}:}{Object of class \code{"character"} the URL for 41 | curated homology relationship} 42 | \item{\code{homoACC}:}{Object of class \code{"character"} the 43 | accession number} 44 | \item{\code{homoHGID}:}{Object of class \code{"numeric"} the 45 | internal HomologGeneID} 46 | } 47 | } 48 | \section{Methods}{ 49 | \describe{ 50 | \item{homoPS}{\code{signature(object = "homoData")}: the get function for 51 | slot \code{homoPS}} 52 | \item{homoLL}{\code{signature(object = "homoData")}: the get function 53 | for slot \code{homoLL}} 54 | \item{homoOrg}{\code{signature(object = "homoData")}: the get function 55 | for slot \code{homoOrg}} 56 | \item{homoType}{\code{signature(object = "homoData")}: the get function 57 | for slot \code{homoType}} 58 | \item{homoURL}{\code{signature(object = "homoData")}: the get function 59 | for slot \code{homoURL}} 60 | \item{homoACC}{\code{signature(object = "homoData")}: the get function 61 | for slot \code{homoACC}} 62 | \item{homoHGID}{\code{signature(object = "homoHGID")}: the get 63 | function for slot \code{homoHGID}} 64 | } 65 | } 66 | \references{\url{ftp://ftp.ncbi.nih.gov/pub/HomoloGene/README}} 67 | \author{Jianhua Zhang} 68 | 69 | \examples{ 70 | new("homoData", homoPS = 82.3, homoLL = 2324853, homoOrg = "Homo sapins", 71 | homoType = "B", homoURL = "", homoHGID = 12345) 72 | } 73 | \keyword{classes} 74 | -------------------------------------------------------------------------------- /man/setRepository.Rd: -------------------------------------------------------------------------------- 1 | \name{setRepository} 2 | \alias{setRepository} 3 | \alias{getRepositories} 4 | \alias{clearRepository} 5 | \title{Functions to add arbitrary repositories} 6 | 7 | \description{These functions allow end users to add arbitrary 8 | repositories for use with the \code{htmlpage} function. 9 | } 10 | \usage{ 11 | setRepository(repository, FUN, ..., verbose=TRUE) 12 | getRepositories() 13 | clearRepository(repository, verbose=TRUE) 14 | } 15 | \arguments{ 16 | \item{repository}{A character name for the repository.} 17 | \item{FUN}{A function to build hyperlinks for the repository. See 18 | details for more information.} 19 | \item{...}{Allows one to pass arbitrary code to underlying functions.} 20 | \item{verbose}{Output warning messages?} 21 | } 22 | \details{These functions allow end users to add, view, and remove repositories 23 | for use with the \code{htmlpage} function. \code{getRepositories} will 24 | output a vector of names for available 25 | repositories. \code{clearRepository} can be used to remove a 26 | repository if so desired. \code{setRepository} can be used to add a 27 | repository. See the examples section for the format of the FUN 28 | argument. 29 | 30 | Once a new repository has been set, the \code{htmlpage} function can 31 | be called using the name of the new repository as a value in the 32 | repository argument (e.g., htmlpage(, repository = 33 | list("newrepositoryname")) 34 | } 35 | \author{Martin Morgan } 36 | \examples{ 37 | 38 | ## A simple fake URI 39 | repofun <- function(ids, ...) 40 | paste("http://www.afakeuri.com/", ids, sep = "") 41 | 42 | setRepository("simple", repofun) 43 | 44 | ## More complicated, we want to make sure that 45 | ## NAs get converted to empty cells 46 | 47 | repofun <- function(ids, ...){ 48 | bIDs <- which(is.na(ids)) 49 | out <- paste("http://www.afakeuri.com/", ids, sep = "") 50 | out[bIDs] <- " " 51 | out 52 | } 53 | 54 | setRepository("complex", repofun) 55 | 56 | ## More complicated URI where we need to pass more information 57 | ## An example is Ensembl, which requires a species as part of the URI 58 | ## Since htmlpage() has an '...' argument, we can pass arbitrary 59 | ## arguments to this function that will be passed down to our 60 | ## repfun. Here we assume the argument species="Homo_sapiens" has been 61 | ## included in the call to htmlpage(). 62 | 63 | 64 | repofun <- function(ids, ...){ 65 | if(!is.null(list(...)$species)) 66 | species <- list(...)$species 67 | else 68 | stop("To make links for Ensembl, you need to pass a 'species' argument.", 69 | call. = FALSE) 70 | out <- paste("http://www.ensembl.org/", species, "/Search/Summary?species=", 71 | species, ";idx=;q=", ids, sep = "") 72 | out 73 | } 74 | 75 | setRepository("species_arg", repofun) 76 | 77 | } 78 | \keyword{manip} 79 | -------------------------------------------------------------------------------- /man/pmAbst2HTML.Rd: -------------------------------------------------------------------------------- 1 | \name{pmAbst2HTML} 2 | \alias{pmAbst2HTML} 3 | \title{HTML Generation for PubMed Abstracts} 4 | \description{ 5 | This function will take a \code{pubMedAbst} object, or a list of these 6 | objects and generate a web page that will list the titles of 7 | the abstracts and link to their full page on PubMed 8 | } 9 | \usage{ 10 | pmAbst2HTML(absts, filename, title, frames = FALSE, table.center = TRUE) 11 | } 12 | \arguments{ 13 | \item{absts}{A list of \code{pubMedAbst} (or a single object)} 14 | \item{filename}{The output filename. If \code{frames} is 15 | \code{FALSE}, this is the name of the single output file and 16 | defaults to \code{absts.html}. Otherwise, this is taken to be the 17 | base of a set of filenames, and the default base is the empty 18 | string. See \code{value} for more information on output files.} 19 | \item{title}{ Extra title information for your listing} 20 | \item{frames}{If \code{frames} is \code{TRUE}, the resulting page will 21 | use HTML frames, resulting in a more complex set of output pages.} 22 | \item{table.center}{If TRUE, will center the listing of abstracts} 23 | } 24 | \details{ 25 | This function uses the \code{Entrez} functionality provided by NCBI to 26 | retrieve the abstract URL at the PubMed site. It will then create a 27 | tabular webpage which will list the titles of the abstracts provided 28 | and have them link to the appropriate PubMed page. If \code{frames} 29 | is \code{TRUE}, the table of links will be on the left hand side of 30 | the page and the right hand will link directly to the appropriate 31 | PubMed page. 32 | } 33 | \value{ 34 | If \code{frames} is \code{FALSE}, a simple HTML file is created with 35 | the name specified by \code{filename}. 36 | 37 | If \code{frames} is \code{TRUE}, then there are four HTML files 38 | created, of the form \code{XXXtop.html}, \code{XXXside.html}, 39 | \code{XXXmain.html} and \code{XXXindex.html}, where \code{XXX} is the 40 | string provided by \code{filename}. 41 | } 42 | \author{Jeff Gentry} 43 | 44 | \seealso{\code{pubMedAbst}} 45 | \examples{ 46 | x <- pubmed("9695952","8325638","8422497") 47 | a <- xmlRoot(x) 48 | numAbst <- length(xmlChildren(a)) 49 | absts <- list() 50 | for (i in 1:numAbst) { 51 | absts[[i]] <- buildPubMedAbst(a[[i]]) 52 | } 53 | ## First try it w/o frames - using a temporary 54 | ## file for the output 55 | fname <- tempfile() 56 | pmAbst2HTML(absts,filename=fname) 57 | 58 | if (interactive()) 59 | browseURL(paste("file://",fname,sep="")) 60 | 61 | ## Now try it w/ frames, using temporary files again. 62 | fnameBase <- tempfile() 63 | pmAbst2HTML(absts,filename=fnameBase, frames=TRUE) 64 | 65 | if (interactive()) 66 | browseURL(paste("file://",fnameBase,"index.html",sep="")) 67 | 68 | } 69 | \keyword{utilities} 70 | -------------------------------------------------------------------------------- /R/readGEOAnn.R: -------------------------------------------------------------------------------- 1 | 2 | # Query the GEO database. url is the common CGI scrip at GEO 3 | # and GEOAccNum is the GEO accession number representing a file in the 4 | # database 5 | readIDNAcc <- function(GEOAccNum, url = 6 | "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?"){ 7 | temp <- readGEOAnn(GEOAccNum, url) 8 | return(temp[,c("ID", "GB_ACC")]) 9 | } 10 | 11 | getSAGEGPL <- function(organism = "Homo sapiens", 12 | enzyme = c("NlaIII", "Sau3A")){ 13 | 14 | enzyme <- match.arg(enzyme) 15 | 16 | SAGEFiles <- getSAGEFileInfo() 17 | return(SAGEFiles[SAGEFiles[,2] == organism & SAGEFiles[,3] == 18 | enzyme, 1]) 19 | } 20 | 21 | getSAGEFileInfo <- function(url = 22 | "https://www.ncbi.nlm.nih.gov/geo/query/browse.cgi?view=platforms&prtype=SAGE&dtype=SAGE"){ 23 | temp <- readUrl(url) 24 | # Get the GPL number, organism, and enzyme type 25 | temp <- matrix(temp[grep("(GPL.*)", "\\1", temp[,1]) 28 | temp[,2] <- gsub(".*>(.*)", "\\1", temp[,2]) 29 | temp[,3] <- gsub(".*>(.*):.*", "\\1", temp[,3]) 30 | 31 | return(temp) 32 | } 33 | 34 | 35 | # Query the GEO database. url is the common CGI scrip at GEO 36 | # and GEOAccNum is the GEO accession number representing a file in the 37 | # database 38 | readGEOAnn <- function(GEOAccNum, url = 39 | "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?"){ 40 | 41 | temp <- readUrl(paste(url, "acc=", GEOAccNum, 42 | "&view=data&form=text&targ=self", sep = "")) 43 | # Remove the header lines that come with the file 44 | temp <- temp[grep("\t", temp)] 45 | # Add NAs to lines with no value for the last column 46 | temp <- strsplit(gsub("\t$", "\tNA", temp), "\t") 47 | # Convert to a matrix 48 | temp <- t(sapply(temp, unlist)) 49 | # The first row is for column name. Remove it. 50 | colnames(temp) <- temp[1,] 51 | return(temp[-1,]) 52 | } 53 | 54 | # Read from GEO and map GEO accession numbers to array names. 55 | getGPLNames <- function(url = 56 | "https://www.ncbi.nlm.nih.gov/geo/query/browse.cgi?"){ 57 | temp <- readUrl(paste(url, 58 | "view=platforms&prtype=nucleotide&dtype=commercial", 59 | sep = "")) 60 | 61 | temp <- temp[grep("(.*)$", "\\1", temp[,6]) 65 | names(chipNames) <- gsub(".*>(.*)$", "\\1", temp[,1]) 66 | 67 | return(chipNames) 68 | } 69 | 70 | readUrl <- function(url){ 71 | options(show.error.messages = FALSE) 72 | con <- try(url(url, open = "r")) 73 | options(show.error.messages = TRUE) 74 | if(inherits(con, "try-error")){ 75 | stop(paste("Can't connect to url", url)) 76 | } 77 | temp <- readLines(con) 78 | close(con) 79 | return(temp) 80 | } 81 | -------------------------------------------------------------------------------- /man/pubMedAbst-class.Rd: -------------------------------------------------------------------------------- 1 | \name{pubMedAbst-class} 2 | \docType{class} 3 | \alias{pubMedAbst-class} 4 | \alias{pubMedAbst} 5 | \alias{authors} 6 | \alias{abstText} 7 | \alias{articleTitle} 8 | \alias{journal} 9 | \alias{pubDate} 10 | \alias{pmid} 11 | \alias{pubMedAbst,pubMedAbst-method} 12 | \alias{authors,pubMedAbst-method} 13 | \alias{abstText,pubMedAbst-method} 14 | \alias{articleTitle,pubMedAbst-method} 15 | \alias{journal,pubMedAbst-method} 16 | \alias{pubDate,pubMedAbst-method} 17 | \alias{pmid,pubMedAbst-method} 18 | \alias{show,pubMedAbst-method} 19 | \title{Class pubMedAbst, a class to handle PubMed abstracts, and methods 20 | for processing them.} 21 | \description{ This is a class representation for PubMed abstracts. } 22 | \section{Creating Objects}{ 23 | \code{ new('pubMedAbst',}\cr 24 | \code{ authors = ...., # Object of class vector}\cr 25 | \code{ pmid = ...., # Object of class character}\cr 26 | \code{ abstText = ...., # Object of class character}\cr 27 | \code{ articleTitle = ...., # object of class character}\cr 28 | \code{ journal = ...., # Object of class character}\cr 29 | \code{ pubDate = ...., # Object of class character}\cr 30 | \code{ )}} 31 | \section{Slots}{ 32 | \describe{ 33 | \item{\code{pmid}:}{Object of class \code{"character"} The PubMed ID 34 | for this paper. } 35 | \item{\code{authors}:}{Object of class \code{"vector"} The authors 36 | of the paper. } 37 | \item{\code{abstText}:}{Object of class \code{"character"} The 38 | contained text of the abstract. } 39 | \item{\code{articleTitle}:}{Object of class \code{"character"} The 40 | title of the article the abstract pertains to. } 41 | \item{\code{journal}:}{Object of class \code{"character"} The journal 42 | the article was published in. } 43 | \item{\code{pubDate}:}{Object of class \code{"character"} The date the 44 | journal was published. } 45 | } 46 | } 47 | 48 | \section{Methods}{ 49 | \describe{ 50 | \item{pmid}{\code{signature(object = "pmid")}: An accessor function 51 | for \code{pmid}} 52 | \item{abstText}{\code{signature(object = "pubMedAbst")}: An accessor 53 | function for \code{abstText}} 54 | \item{articleTitle}{\code{signature(object = "pubMedAbst")}: An accessor 55 | function for \code{articleTitle} } 56 | \item{authors}{\code{signature(object = "pubMedAbst")}: An accessor 57 | function for \code{authors} } 58 | \item{journal}{\code{signature(object = "pubMedAbst")}: An accessor 59 | function for \code{journal} } 60 | \item{pubDate}{\code{signature(object = "pubMedAbst")}: An accessor 61 | function for \code{pubDate}} 62 | } 63 | } 64 | \author{ Jeff Gentry } 65 | 66 | \seealso{\code{\link{pubmed}}, \code{\link{genbank}}} 67 | \examples{ 68 | x <- pubmed("9695952","8325638","8422497") 69 | a <- xmlRoot(x) 70 | numAbst <- length(xmlChildren(a)) 71 | absts <- list() 72 | for (i in 1:numAbst) { 73 | absts[[i]] <- buildPubMedAbst(a[[i]]) 74 | } 75 | } 76 | \keyword{classes} 77 | -------------------------------------------------------------------------------- /man/blastSequences.Rd: -------------------------------------------------------------------------------- 1 | \name{blastSequences} 2 | \alias{blastSequences} 3 | 4 | \title{ 5 | Run a blast query to NCBI for either a string or an entrez gene ID and 6 | then return a series of MultipleAlignment objects. 7 | } 8 | 9 | \description{ 10 | This function sends a query to NCBI as a string of sequence or an 11 | entrez gene ID and then returns a series of MultipleAlignment objects. 12 | } 13 | 14 | \usage{ 15 | blastSequences(x, database, hitListSize, filter, expect, program, 16 | timeout=40, as=c("DNAMultipleAlignment", "data.frame", "XML")) 17 | } 18 | 19 | \arguments{ 20 | \item{x}{ 21 | A sequence as a character vector or an integer corresponding to an 22 | entrez gene ID. Submit multiple sequences as a length-1 character 23 | vector, \code{x = ">ID-1\nACATGCTA\n>ID-2\nAAACCACTT"}. 24 | } 25 | \item{database}{ 26 | Which NCBI database to use. If not \dQuote{blastn}, then set 27 | \code{as="XML"} 28 | } 29 | \item{hitListSize}{ 30 | Number of hits to keep. 31 | } 32 | \item{filter}{ 33 | Sequence filter; \dQuote{L} for Low Complexity, \dQuote{R} for Human Repeats, 34 | \dQuote{m} for Mask lookup 35 | } 36 | \item{expect}{ 37 | The BLAST \sQuote{expect} value above which matches will be 38 | returned. 39 | } 40 | \item{program}{ 41 | Which program do you want to use for blast. 42 | } 43 | \item{timeout}{ 44 | Approximate maximum length of time, in seconds, to wait for a result. 45 | } 46 | \item{as}{ 47 | character(1) indicating whether the result from the NCBI server 48 | should be parsed to a list of \code{DNAMultipleAlignment} instances, 49 | represented as a \code{data.frame}, or returned as XML. 50 | } 51 | 52 | } 53 | 54 | \details{ 55 | Right now the function only works for "blastn". 56 | 57 | The NCBI URL api used by this function is documented at 58 | \url{https://www.ncbi.nlm.nih.gov/blast/Doc/urlapi.html} 59 | } 60 | 61 | \value{ 62 | 63 | By default, a series of \code{DNAMultipleAlignment} (see 64 | \code{\link[Biostrings]{MultipleAlignment-class}} 65 | objects. Alternatively, a \code{data.frame} or XML document returned 66 | from the NCBI server. The \code{data.frame} is a \sQuote{long form} 67 | representation of the \sQuote{Iteration}, \sQuote{Hit} and 68 | \sQuote{Hsp} results returned from the server. The XML document is the 69 | result of the \code{xmlParse} function of the XML library, and follows 70 | the format described by 71 | \url{https://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd} and 72 | \url{https://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.mod.dtd}. } 73 | 74 | \author{ 75 | M. Carlson 76 | } 77 | 78 | \examples{ 79 | 80 | ## x can be an entrez gene ID 81 | blastSequences(17702, timeout=40, as="data.frame") 82 | 83 | if (interactive()) { 84 | 85 | ## or x can be a sequence 86 | blastSequences(x = "GGCCTTCATTTACCCAAAATG") 87 | 88 | ## hitListSize does not promise that you will get the number of 89 | ## matches you want.. It will just try to get that many. 90 | blastSequences(x = "GGCCTTCATTTACCCAAAATG", hitListSize="20") 91 | 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /man/getSYMBOL.Rd: -------------------------------------------------------------------------------- 1 | \name{getSYMBOL} 2 | \alias{getSYMBOL} 3 | \alias{getGO} 4 | \alias{getGOdesc} 5 | \alias{getPMID} 6 | \alias{getEG} 7 | \alias{lookUp} 8 | \alias{getUniqAnnItem} 9 | \title{Functions to deal with Data Packages} 10 | \description{ 11 | The functions documented here are intended to make it easier to map 12 | from a set of manufacturers identifiers (such as you will get from the 13 | chips etc) to other identifiers. 14 | } 15 | \usage{ 16 | getSYMBOL(x, data) 17 | getEG(x, data) 18 | getGO(x, data) 19 | getPMID(x, data) 20 | getGOdesc(x, which) 21 | lookUp(x, data, what, load = FALSE) 22 | getUniqAnnItem() 23 | } 24 | \arguments{ 25 | \item{x}{The identifiers to be mapped (usually manufacturer)} 26 | \item{data}{The basename of the meta-data package to be used.} 27 | \item{what}{\code{what} a character string for the name of an 28 | annotation element of an annotation data package} 29 | \item{which}{\code{which} a character string in the form of MF, BP, 30 | CC, or ANY to indicated the GO categories of interest} 31 | \item{load}{A logical value indicating whether to attempt to load the 32 | required annotation data package if it isn't already loaded.} 33 | } 34 | \details{ 35 | Users must supply the basename of the meta-data package that they 36 | want to use to provide the mappings. The name of the meta-data 37 | package is the same as the basename. 38 | 39 | Appropriate translations are done. In some cases such as \code{getEG} 40 | and \code{getSYMBOL} there will only be one match and a vector is 41 | returned. In other cases such as \code{getPMID} and \code{getGO} there 42 | may be multiple matches and a list is returned. 43 | 44 | For \code{getGOdesc} \code{x} contains GO identifiers (not 45 | manufacturer identifiers) and the output is a list of GOTerms objects, 46 | if \code{which} specifies some subset of the ontologies (MF, BP or CC) 47 | then only terms for that ontology are retained. 48 | 49 | \code{lookUp} is a general function that can be used to look 50 | up matches. All other translation functions use \code{lookUp} 51 | 52 | A BioC annotation data package contains annotation data environments 53 | whose names are package name (e. g. hgu95av2) + element name 54 | (e. g. PMID). \code{what} must be one of the element names for the 55 | given data package. 56 | 57 | \code{getUniqAnnItem} keeps track of the annotation elements that have 58 | one to one mappings. 59 | } 60 | \value{ 61 | Either a vector or a list depending on whether multiple values per 62 | input are possible. 63 | } 64 | \author{R. Gentleman} 65 | \seealso{\code{\link{mget}}} 66 | \examples{ 67 | library("hgu95av2.db") 68 | library("GO.db") 69 | 70 | data(sample.ExpressionSet) 71 | gN <- featureNames(sample.ExpressionSet)[100:105] 72 | lookUp(gN, "hgu95av2", "SYMBOL") 73 | 74 | # Same as lookUp for SYMBOL except the return is a vector 75 | getSYMBOL(gN,"hgu95av2" ) 76 | gg <- getGO(gN, "hgu95av2") 77 | lookUp(gg[[2]][[1]][["GOID"]], "GO", "TERM") 78 | 79 | # Same as lookUp for TERM 80 | getGOdesc(gg[[2]][[1]][["GOID"]], "ANY") 81 | 82 | # For BP only 83 | getGOdesc(gg[[2]][[1]][["GOID"]], "BP") 84 | getEG(gN, "hgu95av2") 85 | getPMID(gN, "hgu95av2") 86 | } 87 | \keyword{manip} 88 | -------------------------------------------------------------------------------- /man/LL2homology.Rd: -------------------------------------------------------------------------------- 1 | \name{LL2homology} 2 | \alias{LL2homology} 3 | \alias{HGID2homology} 4 | \alias{ACC2homology} 5 | \title{DEFUNCT Functions that find the homology data for a given set of 6 | LocusLink ids or HomoloGeneIDs} 7 | \description{ 8 | These functions are DEFUNCT. All this functionality has been 9 | replaced by inPARANOID packages. 10 | Given a set of LocusLink ids or NCBI HomoloGeneIDs, the functions obtain the 11 | homology data and represent them as a list of sub-lists using the 12 | homology data package for the organism of interest. A sub-list can be of 13 | length 1 or greater depending on whether a LocusLink id can be mapped 14 | to one or more HomoloGeneIDs. 15 | } 16 | \usage{ 17 | LL2homology(homoPkg, llids) 18 | HGID2homology(hgid, homoPkg) 19 | ACC2homology(accs, homoPkg) 20 | } 21 | \arguments{ 22 | \item{llids}{\code{llids} a vector of character strings or numberic 23 | numbers for a set of LocusLink ids whose homologous genes in other 24 | organisms are to be found} 25 | \item{hgid}{\code{hgid} a named vector of character strings or numberic 26 | numbers for a set of HomoloGeneIDs whose homologous genes in other 27 | organisms are to be found. Names of the vector give the code used by 28 | NCBI for organisms} 29 | \item{accs}{\code{accs} a vector of character strings for a set of 30 | GenBank Accession numbers} 31 | \item{homoPkg}{\code{homoPkg} a character string for the name of the 32 | homology data package for a given organism, which is a short version 33 | of the scientific name of the organism plus homology (e. g. hsahomology)} 34 | } 35 | \details{ 36 | The homology data package has to be installed before executing any of 37 | the two functions. 38 | 39 | Each sub-list has the following elements: 40 | 41 | homoOrg - a named vector of a single character string whose value 42 | is the scientific name of the organism and name the numeric code 43 | used by NCBI for the organism. 44 | 45 | homoLL - an integer for LocusLink id. 46 | 47 | homoHGID - an integer for internal HomoloGeneID. 48 | 49 | homoACC - a character string for GenBank accession number of the 50 | best matching sequence of the organism. 51 | 52 | homoType - a single letter for the type of similarity measurement 53 | between the homologous genes. homoType can be either B (reciprocal 54 | best best between three or more organisms), b (reciprocal best 55 | match between two organisms), or c (curated homology relationship 56 | between two organisms). 57 | 58 | homoPS - a percentage value measured as the percent of identity of 59 | base pair alignment between the homologous sequences. 60 | 61 | homoURL - a url to the source if the homology relationship is a 62 | curated orthology. 63 | 64 | Sub-lists with homoType = B or b will not have any value 65 | for homoURL and objects with homoType = c will not have any value 66 | for homoPS. 67 | } 68 | \value{ 69 | Both functions returns a list of sub-lists containing data for 70 | homologous genes in other organisms. 71 | } 72 | \references{\url{https://www.ncbi.nlm.nih.gov/entrez/query.fcgi?=homologene}} 73 | \author{Jianhua Zhang} 74 | 75 | \examples{ 76 | \dontrun{ 77 | ## hsahomology is a defunct package! 78 | if(require("hsahomology")){ 79 | llids <- ls(env = hsahomologyLL2HGID)[2:5] 80 | LL2homology("hsahomology", llids) 81 | } 82 | 83 | } 84 | } 85 | \keyword{misc} 86 | 87 | -------------------------------------------------------------------------------- /R/AnnMaps.R: -------------------------------------------------------------------------------- 1 | annObjPrefix <- function(name) { 2 | if (length(grep("\\.db$", name))) 3 | substr(name, 1, nchar(name) - 3L) 4 | else 5 | name 6 | } 7 | 8 | annPkgName <- function(name, type=c("db", "env")) { 9 | type <- match.arg(type) 10 | if (length(grep("\\.db$", name))) 11 | if (type == "db") 12 | name 13 | else 14 | substr(name, 1, nchar(name) - 3L) 15 | else if (type == "db") 16 | paste(name, ".db", sep="") 17 | else 18 | name 19 | } 20 | 21 | ## For cases where there is not a Bimap, but where there is an AnnoationDb 22 | ## object with a cols() value that matches the map argument, we want getAnnMap 23 | ## to spawn up a FlatBimap object and return that. 24 | 25 | getAnnMap <- function(map, chip, load=TRUE, type=c("db", "env")) { 26 | typeMissed <- FALSE 27 | searchName <- NULL 28 | if (missing(type)) { 29 | typeMissed <- TRUE 30 | searchNames <- paste("package:", chip, c("", ".db"), sep="") 31 | searchPth <- search() 32 | whLoaded <- match(searchNames, searchPth) 33 | whLoaded <- whLoaded[!is.na(whLoaded)] 34 | if (length(whLoaded)) 35 | searchName <- searchPth[sort(whLoaded)][1] 36 | } else { 37 | badTypes <- type[!(type %in% c("db", "env"))] 38 | if (length(badTypes)) 39 | stop("unknown types in 'type' argument: ", 40 | paste(badTypes, collapse=", ")) 41 | } 42 | pkg <- annPkgName(name=chip, type=type[1]) 43 | if (is.null(searchName)) 44 | searchName <- paste("package", pkg, sep=":") 45 | pkgEnv <- tryCatch(as.environment(searchName), error=function(e) { 46 | if (load) { 47 | ok <- 48 | suppressWarnings(require(pkg, character.only=TRUE, 49 | quietly=TRUE)) 50 | if (!ok && length(type) > 1) { 51 | origPkg <- pkg 52 | for (t in type[2:length(type)]) { 53 | pkg <- annPkgName(name=chip, type=t) 54 | searchName <- paste("package", pkg, sep=":") 55 | if (suppressWarnings(require(pkg, character.only=TRUE, 56 | quietly=TRUE))) { 57 | if (!typeMissed) 58 | warning("getAnnMap: ", "package ", origPkg, 59 | " not available, ", "using ", pkg, " instead", 60 | call.=FALSE) 61 | ok <- TRUE 62 | break 63 | } 64 | } 65 | } 66 | if (!ok) 67 | stop("getAnnMap: ", "package ", pkg, " not available", 68 | call.=FALSE) 69 | as.environment(searchName) 70 | } else { 71 | stop("getAnnMap: ", pkg, " package not attached and load is FALSE", 72 | call.=FALSE) 73 | } 74 | }) 75 | mapName <- paste(annObjPrefix(chip), map, sep="") 76 | if(exists(mapName, envir=pkgEnv, inherits=FALSE)){ 77 | return( get(mapName, envir=pkgEnv, inherits=FALSE) ) 78 | }else{ 79 | ## chip will be a character, but we need to make it into a real thing. 80 | ## spawn up a new FlatBimap 81 | db <- eval(parse(text=pkg)) 82 | if(map %in% columns(db)){ ## if cols says its present 83 | return(AnnotationDbi:::makeFlatBimapUsingSelect(db, 84 | col=map)) 85 | } 86 | } 87 | } 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /man/chromLocation-class.Rd: -------------------------------------------------------------------------------- 1 | \name{chromLocation-class} 2 | \docType{class} 3 | \alias{chromLocation-class} 4 | \alias{chromLocation} 5 | \alias{dataSource} 6 | \alias{nChrom} 7 | \alias{chromNames} 8 | \alias{chromLocs} 9 | \alias{chromLengths} 10 | \alias{probesToChrom} 11 | \alias{geneSymbols} 12 | \alias{chromInfo} 13 | \alias{organism,chromLocation-method} 14 | \alias{dataSource,chromLocation-method} 15 | \alias{nChrom,chromLocation-method} 16 | \alias{chromNames,chromLocation-method} 17 | \alias{chromLocs,chromLocation-method} 18 | \alias{chromLengths,chromLocation-method} 19 | \alias{probesToChrom,chromLocation-method} 20 | \alias{geneSymbols,chromLocation-method} 21 | \alias{chromInfo,chromLocation-method} 22 | \alias{show,chromLocation-method} 23 | \title{Class chromLocation, a class for describing genes and their 24 | chromosome mappings.} 25 | \description{ 26 | This class provides chromosomal information provided by a 27 | Bioconductor metadata package. By creating the object once for a 28 | particular package, it can be used in a variety of locations without 29 | the need to recomputed values repeatedly. 30 | } 31 | \section{Creating Objects}{ 32 | \code{ new('chromLocation',} 33 | \code{ organism = ...., # Object of class character}\cr 34 | \code{ dataSource = ...., # Object of class character}\cr 35 | \code{ chromLocs = ...., # Object of class list}\cr 36 | \code{ probesToChrom = ...., # Object of class ANY}\cr 37 | \code{ chromInfo = ...., # Object of class numeric}\cr 38 | \code{ geneSymbols = ...., # Object of class ANY}\cr 39 | \code{ )}} 40 | \section{Slots}{ 41 | \describe{ 42 | \item{\code{organism}:}{Object of class "character". The organism 43 | that these genes correspond to.} 44 | \item{\code{dataSource}:}{Object of class "character". The source of 45 | the gene data.} 46 | \item{\code{chromLocs}:}{Object of class "list". A list which 47 | provides specific location information for every gene.} 48 | \item{\code{probesToChrom}:}{An object with an environment-like API 49 | which will translate a probe identifier to chromosome it belongs 50 | to.} 51 | \item{\code{chromInfo}:}{A numerical vector representing each 52 | chromosome, where the names are the names of the chromosomes and 53 | the values are their lengths} 54 | \item{\code{geneSymbols}:}{An environment or an object with 55 | environment-like API that maps a probe ID to 56 | the appropriate gene symbol} 57 | } 58 | } 59 | \section{Methods}{ 60 | \describe{ 61 | \item{chromLengths}{(chromLocation): Gets the lengths of the 62 | chromosome for this organism} 63 | \item{chromLocs}{(chromLocation): Gets the 'chromLocs' attribute.} 64 | \item{chromNames}{(chromLocation): Gets the name of the chromosomes 65 | for this organism} 66 | \item{dataSource}{(chromLocation): Gets the 'dataSource' attribute.} 67 | \item{probesToChrom}{(chromLocation): Gets the 'probesToChrom' attribute.} 68 | \item{nChrom}{(chromLocation): gets the number of chromosomes this 69 | organism has} 70 | \item{organism}{(chromLocation): gets the 'organism' attribute.} 71 | \item{chromInfo}{Gets the 'chromInfo' attribute.} 72 | \item{geneSymbols}{Gets the 'geneSymbols' attribute.} 73 | } 74 | } 75 | \seealso{\code{\link{buildChromLocation}}} 76 | \examples{ 77 | library("hgu95av2.db") 78 | 79 | z <- buildChromLocation("hgu95av2") 80 | 81 | ## find the number of chromosomes 82 | nChrom(z) 83 | 84 | ## Find the names of the chromosomes 85 | chromNames(z) 86 | 87 | ## get the organism this object refers to 88 | organism(z) 89 | 90 | ## get the lengths of the chromosomes in this object 91 | chromLengths(z) 92 | } 93 | \keyword{classes} 94 | -------------------------------------------------------------------------------- /man/getTDRows.Rd: -------------------------------------------------------------------------------- 1 | \name{getQueryLink} 2 | \alias{getQueryLink} 3 | \alias{getQuery4UG} 4 | \alias{getQuery4SP} 5 | \alias{getQuery4OMIM} 6 | \alias{getQuery4GB} 7 | \alias{getQuery4Affy} 8 | \alias{getQuery4FB} 9 | \alias{getQuery4EN} 10 | \alias{getCells} 11 | \alias{getTDRows} 12 | \alias{getQuery4TR} 13 | \alias{getQuery4ENSEMBL} 14 | 15 | \title{Functions to create hypertext links that can be placed in a table 16 | cell of a HTML file } 17 | \description{ 18 | Given a vector of ids, the functions will create a vector of 19 | hypertext links to a defined public repositories such as 20 | LocusLink, UniGene .... The linkages can be placed in a html file 21 | constructed by \code{\link{htmlpage}.} 22 | } 23 | \usage{ 24 | getQueryLink(ids, repository = "ug", ...) 25 | getTDRows(ids, repository = "ug", ...) 26 | getCells(ids, repository = "ug", ...) 27 | getQuery4UG(ids, ...) 28 | getQuery4SP(ids, ...) 29 | getQuery4GB(ids, ...) 30 | getQuery4OMIM(ids, ...) 31 | getQuery4Affy(ids, ...) 32 | getQuery4FB(ids, ...) 33 | getQuery4EN(ids, ...) 34 | getQuery4TR(ids, ...) 35 | getQuery4ENSEMBL(ids, ...) 36 | } 37 | %- maybe also 'usage' for other objects documented here. 38 | \arguments{ 39 | \item{ids}{ A character vector of ids, or alternatively, a list 40 | containing character vectors of ids. These will be used to construct 41 | hypertext links. A list should be used in cases where there are 42 | multiple ids per gene.} 43 | \item{repository}{ A character string for the name of a public 44 | repository. Valid values include "ll", "ug", "gb", "sp", "omim", 45 | "affy", "en", and "fb". See the details section for more 46 | information. } 47 | \item{...}{Allows end user to pass additional arguments. See details 48 | for \code{\link{getQuery4ENSEMBL}} for more information.} 49 | } 50 | \details{ 51 | \code{\link{getQuery4GB}} constructs hypertext links to GenBank using the 52 | provided ids. 53 | 54 | \code{\link{getQuery4UG}} constructs hypertext links to UniGene using the 55 | provided ids. 56 | 57 | \code{\link{getQuery4Affy}} constructs hypertext links to Affymetrix using the 58 | provided ids. 59 | 60 | \code{\link{getQuery4SP}} constructs hypertext links to SwissProt using the 61 | provided ids. 62 | 63 | \code{\link{getQuery4OMIM}} constructs hypertext links to OMIM using the 64 | provided ids. 65 | 66 | \code{\link{getQuery4FB}} constructs hypertext links to FlyBase using 67 | the provided ids. 68 | 69 | \code{\link{getQuery4EN}} constructs hypertext links to EntrezGene 70 | using the provided ids. 71 | 72 | \code{\link{getQuery4TR}} constructs hypertext links to TAIR using the 73 | provided ids. 74 | 75 | \code{\link{getQuery4ENSEMBL}} constructs hypertext links to Ensembl 76 | using the provided ids. An additional 'species' argument must be passed 77 | to this function via the \code{...} argument to \code{htmlpage}. The 78 | form of the argument must be e.g., species="Homo_sapiens" for 79 | human. Note the capitalized genus and underscore (_) separator. 80 | 81 | \code{\link{getQueryLink}} directs calls to construct hypertext links using 82 | the provided ids. 83 | 84 | \code{\link{getTDRows}} constructs each row of the resulting table. 85 | 86 | \code{\link{getCells}} constructs each cell of the resulting table. 87 | 88 | Note that some of these functions (\code{getQuery4OMIM}, 89 | \code{getQuery4UG}, \code{getQuery4FB}) attempt to 90 | return empty cells for ids that don't make sense, rather than broken 91 | links. For the other getQuery4XX functions, the end user must replace 92 | all nonsense ids with " " in order to have an empty cell. 93 | 94 | Also note that creating additional links is quite simple. First, define 95 | a new 'getQuery4XX()' function modeled on the existing functions, then 96 | add this function to the \code{getQueryLink} function. 97 | 98 | } 99 | \value{ 100 | Returns a vector of character strings representing the hypertext links. 101 | } 102 | 103 | \author{ Jianhua Zhang with further 104 | modifications by James W. MacDonald } 105 | 106 | 107 | \keyword{ manip }% __ONLY ONE__ keyword per line -------------------------------------------------------------------------------- /R/getPMInfo.R: -------------------------------------------------------------------------------- 1 | getPMInfo <- function(x) { 2 | # 3 | # getMLInfo: get medline-related info from a pubmed xml DOM tree 4 | # works with result of Bioconductor annotate::pubmed function 5 | # 6 | # tagVals: utility function for grabbing vector of 7 | # tag values from any DOM tree 8 | # 9 | tagVals <- function(x,tag) { 10 | tagNames <- function() { 11 | store <- character(0) 12 | add <- function(x) { 13 | if(inherits(x, "XMLNode") & xmlName(x) == tag) { 14 | store <<- c(store, xmlValue(x)) 15 | } 16 | x 17 | } 18 | return(list(add=add, tagVals = function() {return(store)})) 19 | } 20 | h <- tagNames() 21 | xmlDOMApply(x, h$add) 22 | h$tagVals() 23 | } 24 | # 25 | # here's the main body of getMLInfo. the function 'arts' creates 26 | # a closure for collecting data on articles in the document returned 27 | # by the pubmed function. the 'add' element of the closure 28 | # adds information to various local vectors and lists as xmlDOMApply 29 | # walks through the tree. 30 | # 31 | if (!inherits(x, "XMLDocument")) stop("only applies to XMLDocument") 32 | arts <- function() { 33 | pmarts <- list() 34 | pmart <- list() 35 | jinfo <- character(0) 36 | alist <- character(0) 37 | chemlist <- character(0) 38 | cura <- character(0) 39 | cur <- 1 40 | add <- function(x) { 41 | if(inherits(x, "XMLNode") & xmlName(x) == "ArticleTitle") { 42 | pmart[["title"]] <<- xmlValue(x) 43 | } 44 | if(inherits(x, "XMLNode") & xmlName(x) == "MedlineTA") { 45 | pmart[["MedlineTA"]] <<- xmlValue(x) 46 | } 47 | if(inherits(x, "XMLNode") & xmlName(x) == "AbstractText") { 48 | pmart[["abstract"]] <<- xmlValue(x) 49 | } 50 | if(inherits(x, "XMLNode") & xmlName(x) == "PubmedArticle") { 51 | id <- xmlValue(getNodeSet(x, "/PubmedArticle/*/PMID")[[1L]]) 52 | pmarts[[id]] <<- pmart 53 | pmart <<- list() 54 | cur <<- cur+1 55 | } 56 | # 57 | # deal with journal info 58 | # this is an ugly part because tags like Year or Volume can occur in 59 | # different contexts. Need to know something about the parent. 60 | # but we don't want to assume too much about sequence of nodes 61 | # 62 | if (inherits(x, "XMLNode") & xmlName(x) == "ISSN") { 63 | jinfo <<- c(jinfo,ISSN=xmlValue(x)) 64 | } 65 | if (inherits(x, "XMLNode") & xmlName(x) == "JournalIssue") { 66 | jikids <- xmlChildren(x) 67 | for (i in seq_along(jikids)) 68 | { 69 | if (xmlName(jikids[[i]]) == "Volume") 70 | jinfo <<- c(jinfo,vol=xmlValue(jikids[[i]])) 71 | else if (xmlName(jikids[[i]]) == "Issue") 72 | jinfo <<- c(jinfo,iss=xmlValue(jikids[[i]])) 73 | else if (xmlName(jikids[[i]]) == "PubDate") 74 | { 75 | Year <- tagVals(jikids[[i]],"Year") 76 | Month <- tagVals(jikids[[i]],"Month") 77 | Day <- tagVals(jikids[[i]],"Day") 78 | jinfo <<- c(jinfo,year=Year,month=Month,day=Day) 79 | } 80 | } 81 | pmart[["JrnlInfo"]] <<- jinfo 82 | jinfo <<- character(0) 83 | } 84 | # 85 | # deal with author info 86 | # 87 | if (inherits(x, "XMLNode") & xmlName(x) =="AuthorList") { 88 | pmart[["authors"]] <<- alist 89 | alist <<- character(0) 90 | } 91 | if (inherits(x, "XMLNode") & xmlName(x) =="Author") { 92 | alist <<- c(alist,cura) 93 | cura <<- character(0) 94 | } 95 | if (inherits(x, "XMLNode") & xmlName(x) =="LastName") { 96 | cura <<- paste(cura,last=xmlValue(x),sep="") 97 | } 98 | # if (inherits(x, "XMLNode") & xmlName(x) =="ForeName") { 99 | # cura <<- paste(cura,fore=xmlValue(x)) 100 | # } 101 | if (inherits(x, "XMLNode") & xmlName(x) =="Initials") { 102 | cura <<- paste(cura,inits=xmlValue(x)) 103 | } 104 | # 105 | # deal with substance info 106 | # 107 | if (inherits(x, "XMLNode") & xmlName(x) =="ChemicalList") { 108 | pmart[["chemlist"]] <<- chemlist 109 | chemlist <<- character(0) 110 | } 111 | if (inherits(x, "XMLNode") & xmlName(x) =="NameOfSubstance") { 112 | chemlist <<- c(chemlist,xmlValue(x)) 113 | } 114 | x 115 | } 116 | return(list(add=add, arts = function() {return(pmarts)})) 117 | } 118 | h <- arts() 119 | xmlDOMApply(xmlRoot(x), h$add) 120 | h$arts() 121 | } 122 | -------------------------------------------------------------------------------- /vignettes/chromLOC.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "HowTo: Build and use chromosomal information" 3 | author: 4 | - name: "Jeff Gentry" 5 | - name: "Kritika Verma" 6 | affiliation: "Vignette translation from Sweave to R Markdown / HTML" 7 | date: "`r format(Sys.time(), '%B %d, %Y')`" 8 | output: 9 | BiocStyle::html_document 10 | vignette: > 11 | %\VignetteIndexEntry{HowTo: Build and use chromosomal information} 12 | %\VignetteEngine{knitr::rmarkdown} 13 | %\VignetteEncoding{UTF-8} 14 | --- 15 | 16 | # Overview 17 | 18 | The annotate package provides a class that can be used to model 19 | chromosomal information about a species, using one of the metadata 20 | packages provided by Bioconductor. This class contains information about 21 | the organism and its chromosomes and provides a standardized interface 22 | to the information in the metadata packages for other software to 23 | quickly extract necessary chromosomal information. An example of using 24 | *chromLocation* objects in other software can be found with the 25 | `alongChrom` function of the `r Biocpkg("geneplotter")` package in Bioconductor. 26 | 27 | # The chromLocation class 28 | 29 | The *chromLocation* class is used to provide a structure for chromosomal data of 30 | a particular organism. In this section, we will discuss the various slots of the 31 | class and the methods for interacting with them. Before this though, we will 32 | create an object of class *chromLocation* for demonstration purposes later. The 33 | helper function `buildChromLocation` is used, and it takes as an argument the 34 | name of a Bioconductor metadata package, which is itself used to extract the 35 | data. For this vignette, we will be using the `r Biocpkg("hgu95av2.db")` 36 | package. 37 | 38 | ```{r buildCL, message=FALSE} 39 | library("annotate") 40 | z <- buildChromLocation("hgu95av2") 41 | z 42 | ``` 43 | 44 | Once we have an object of the *chromLocation* class, we can now access 45 | its various slots to get the information contained within it. There are 46 | six slots in this class: 47 | 48 | organism: This lists the organism that this object is describing. 49 | dataSource: Where this data was acquired from. 50 | chromLocs: A list with an element for every unique chromosome 51 | name, where each element contains a named vector where 52 | the names are probe IDs and the values describe the 53 | location of that probe on the chromosome. Negative 54 | values indicate that the location is on the antisense 55 | strand. 56 | probesToChrom: A hash table which will translate a probe ID to the 57 | chromosome it belongs to. 58 | chromInfo: A numerical vector representing each chromosome, where 59 | the names are the names of the chromosomes and the 60 | values are the lengths of those chromosomes. 61 | geneSymbols: An environment that maps a probe ID to the appropriate 62 | gene symbol. 63 | 64 | There is a basic 'get' type method for each of these slots, all with the same 65 | name as the respective slot. In the following example, we will demonstrate these 66 | basic methods. For the `probesToChrom` and `geneSymbols` methods, the return 67 | value is an environment which maps a probe ID to other values, we will be using 68 | the probe ID '32972_at', which was selected at random for these examples. We are 69 | showing only part of the `chromLocs` method's output as it is quite long in its 70 | entirety. 71 | 72 | 73 | ```{r showBasicMethods} 74 | organism(z) 75 | 76 | dataSource(z) 77 | 78 | ## The chromLocs list is extremely large. Let's only 79 | ## look at one of the elements. 80 | names(chromLocs(z)) 81 | chromLocs(z)[["Y"]] 82 | 83 | get("32972_at", probesToChrom(z)) 84 | 85 | chromInfo(z) 86 | 87 | get("32972_at", geneSymbols(z)) 88 | ``` 89 | 90 | Another method which can be used to access information about the particular 91 | *chromLocation* object is the `nChrom` method, which will list how many 92 | chromosomes this organism has: 93 | 94 | ```{r nChrom} 95 | nChrom(z) 96 | ``` 97 | 98 | # Summary 99 | 100 | The *chromLocation* class has a simple design, but can be powerful if one wants 101 | to store the chromosomal data contained in a Bioconductor package into a single 102 | object. These objects can be created once and then passed around to multiple 103 | functions, which can cut down on computation time to access the desired 104 | information from the package. These objects allow access to basic but also 105 | important information, and provide a standard interface for writers of other 106 | software to access this information. 107 | -------------------------------------------------------------------------------- /man/findNeighbors.Rd: -------------------------------------------------------------------------------- 1 | \name{findNeighbors} 2 | \alias{findNeighbors} 3 | \alias{checkArgs} 4 | \alias{findChr4LL} 5 | \alias{getValidChr} 6 | \alias{getBoundary} 7 | \alias{weightByConfi} 8 | \title{A function to locate neighboring genes within a defined range 9 | around a target gene represented by a Entrez Gene ID } 10 | \description{ 11 | Give a data package with mappings between Entrez Gene IDs and their 12 | locations on chromosomes, this function locates genes that are 13 | within a defined range on a given chromosome. If a Entrez Gene ID is 14 | passed as one of the arguments, genes located will be neighbors to the 15 | gene represented by the Entrez Gene ID within a defined range on the 16 | chromosome the target gene resides 17 | } 18 | \usage{ 19 | findNeighbors(chrLoc, llID, chromosome, upBase, downBase, mergeOrNot = TRUE) 20 | checkArgs(llID, chromosome, upBase, downBase) 21 | findChr4LL(llID, chrEnv, organism) 22 | getValidChr(organism) 23 | getBoundary(loc, base, lower = TRUE) 24 | weightByConfi(foundLLs) 25 | } 26 | \arguments{ 27 | \item{chrLoc}{\code{chrLoc} a character string for the name of the 28 | data package that contains mappings between Entrez Gene IDs and their 29 | locations on chromosomes. For each chromosome, there assumed to be 30 | mappings for the start and end locations of genes represented by 31 | Entrez Gene IDs. The data package needs to be built using 32 | chrLocPkgBuilder of AnnBuilder} 33 | \item{llID}{\code{llID} a character string for the Entrez Gene ID 34 | representing a gene whose neighbors are sought. llID can be missing} 35 | \item{chromosome}{\code{chromosome} a character string for the number 36 | of the chromosome of interest. chromosome is only required for 37 | locating genes within a range on the chromosome} 38 | \item{upBase}{\code{upBase} a numeric or character string for the 39 | number of base pairs that defines the upper limit of the range to 40 | locate genes. If neighbors of a given gene is sought, the value 41 | will be the distance in number of base pairs from the target gene 42 | upstream, to which search for genes will be conducted. Otherwise, the 43 | value will be the upper limit in number of base pairs from the p arm, 44 | to which search for genes will be conducted} 45 | \item{downBase}{\code{downBase} a numeric or character string for the 46 | number of base pairs that defines the lower limit of the range to 47 | locate gene. If neighbors of a given gene is sought, the value 48 | will be the distance in number of base pairs from the target gene 49 | downstream, to which search for genes will be conducted. Otherwise, the 50 | value will be the lower limit in number of base pairs from the p arm, 51 | to which search for genes will be conducted} 52 | \item{organism}{\code{organism} a character string for the name of the 53 | organism of interest} 54 | \item{chrEnv}{\code{chrEnv} an environment object with keys for 55 | Entrez Gene IDs and values for the chromosomes where genes reside} 56 | \item{loc}{\code{loc} a numeric of character string for the 57 | chromosomal location of gene of interest} 58 | \item{base}{\code{base} either a \code{downBase} or \code{upBase}} 59 | \item{lower}{\code{lower} a boolean indicating whether the lower or 60 | upper boundary of search limit is sought} 61 | \item{mergeOrNot}{\code{mergeOrNot} a boolean to indicate whether gene 62 | found up and down streams will be merged (TRUE)} 63 | \item{foundLLs}{\code{foundLLs} a vector of character strings for 64 | Entrez Gene IDs} 65 | } 66 | \details{ 67 | A chrLoc data package can be created using function 68 | chrLocPkgBuilder of AnnBuilder, in which Entrez Gene IDs 69 | are mapped to location data on individual chromosomes. 70 | 71 | Genes are considered to be neighbors to a given target gene or within 72 | a given range when the transcription of genes start and end within the 73 | given range. 74 | 75 | findNeighbors, checkArgs, findChr4LL, getValidChr, and getBoundary are 76 | accessory functions called by findNeighbors and may not have real 77 | values outside. 78 | } 79 | \value{ 80 | The function returns a list of named vectors. The length of the list 81 | is one when genes in a given region are sought but varies depending on 82 | whether a given gene can be mapped to one or more chromosomes when 83 | neighboring genes of a target gene are sought. Names of vector can be 84 | "Confident" when a gene can be confidently placed on a chromosome or 85 | "Unconfident" when a gene can be placed on a chromosome but its exact 86 | location can not be determined with great confidence. 87 | } 88 | \references{\url{http://www.genome.ucsc.edu/goldenPath/}} 89 | \author{Jianhua Zhang} 90 | 91 | \examples{ 92 | if(require("humanCHRLOC")){ 93 | findNeighbors("humanCHRLOC", "51806", 10, upBase = 600000, downBase = 600000) 94 | }else{ 95 | print("Can not find neighbors without the required data package") 96 | } 97 | } 98 | \keyword{manip} 99 | 100 | -------------------------------------------------------------------------------- /man/htmlpage.Rd: -------------------------------------------------------------------------------- 1 | \name{htmlpage} 2 | \alias{htmlpage} 3 | \title{Functions to build HTML pages} 4 | 5 | \description{ This function is designed to create an HTML table 6 | containing both static information as well as links to various online 7 | annotation sources. 8 | } 9 | \usage{ 10 | htmlpage(genelist, filename, title, othernames, table.head, 11 | table.center = TRUE, repository = list("en"), ...) 12 | } 13 | \arguments{ 14 | \item{genelist}{A list or \code{data.frame} of character vectors 15 | containing ids to be made into hypertext links. See details for more 16 | information.} 17 | \item{filename}{A filename for the resultant HTML table.} 18 | \item{title}{A title for the table.} 19 | \item{othernames}{A list or \code{data.frame} of other things to add 20 | to the table. These will not be hyperlinks. The list of othernames 21 | can contain vectors, matrices, \code{data.frames} or lists.} 22 | \item{table.head}{A character vector of column headers for the table.} 23 | \item{table.center}{Center the table? Defaults to \code{TRUE}.} 24 | \item{repository}{A list of repositories to use 25 | for creating the hypertext links. Currently available repositories 26 | include 'gb' (GenBank), 'en' (EntrezGene), 'omim' (Online Mendelian 27 | Inheritance in Man), 'sp' (SwissProt), 'affy' (Affymetrix), 'ug' 28 | (UniGene), 'fb' (FlyBase), 'go' (Gene Ontology), 'ens' (Ensembl). 29 | Additional repositories can easily be added. See 30 | \code{setRepository} for more information.} 31 | \item{...}{Further arguments to be passed. See details for more 32 | information.} 33 | } 34 | \details{ This function will accept a list or \code{data.frame} of 35 | character vectors, each containing different ids that are to be turned 36 | into hyperlinks (e.g., a list containing affy ids, genbank accession 37 | numbers, and Entrez Gene ids). For instances where there are more than 38 | one id per gene, use a sub-list of character vectors. See the vignette 39 | 'HowTo: Get HTML Output' for more information. Othernames should be a 40 | list or \code{data.frame}. Again, if there are multiple entries for a 41 | given gene, use a sub-list. This is more easily explained using an 42 | example - please see the examples section below and the above 43 | mentioned vignette. 44 | 45 | In even the simplest case the genelist, othernames and repository have 46 | to be lists. A simple character vector will not suffice. 47 | 48 | Note that this function now uses \code{xtable} to create the HTML 49 | table, and there is the ability to pass some arguments on to either 50 | \code{xtable} or \code{print.xtable}. One such argument would be 51 | 'append=TRUE', which would allow one to put lots of tables in one 52 | page, as long as the filename argument remained the same. 53 | 54 | Additionally, the Ensembl repository needs a species argument in order 55 | to form a usable URI. This argument can be passed in the form of e.g., 56 | \code{species = "Homo_sapiens"}. Note the capitalization of the genus, and 57 | the separation by an underscore (\code{_}). 58 | } 59 | \value{ 60 | This function is used only for the side effect of creating an HTML table. 61 | } 62 | \author{Robert Gentleman , further 63 | modifications by James W. MacDonald } 64 | \examples{ 65 | ## A very simple example. Two columns, one with links, the other without. 66 | 67 | gos <- paste("GO:000000", 1:9, sep="") 68 | notlinks <- LETTERS[1:9] 69 | 70 | htmlpage(list(gos), "simple.html", "Two column data", list(notlinks), 71 | c("GO IDs", "Letters"), repository = list("go")) 72 | 73 | if(!interactive()) 74 | file.remove("simple.html") 75 | 76 | ## A more complex example with multiple links per cell 77 | ## first we create data to annotate 78 | unigene <- list("Hs.600536",c("Hs.596913","HS.655491"),"Hs.76704") 79 | refseq <- list(c("NM_001030050", "NM_001030047", "NM_001648", 80 | "NM_001030049"), "NM_000860", c("NM_001011645", "NM_000044")) 81 | entrez <- c("354", "3248", "367") 82 | genelist <- list(unigene, refseq, entrez) 83 | 84 | ## now some other data 85 | 86 | symb <- c("KLK3","HPGD","AR") 87 | desc <- c("Prostate-specific antigen precursor", 88 | "15-hydroxyprostaglandin dehydrogenase", 89 | "Androgen receptor") 90 | t.stat <- c(40.21, -22.14, 21.56) 91 | p.value <- rep(0,3) 92 | fold.change <- c(3.54, -2.35, 3.18) 93 | expression <- matrix(c(11.78, 11.69, 11.62, 8.17, 5.78, 5.58, 5.68, 94 | 8.26, 9.08, 9.28, 9.19, 6.05), ncol=4, byrow=TRUE) 95 | 96 | otherdata <- list(symb, desc, t.stat, p.value, fold.change, expression) 97 | table.head <- c("UniGene", "RefSeq", "EntrezGene", "Symbol", 98 | "Description", "t-stat", "p-value", "fold change", 99 | paste("Sample", 1:4)) 100 | 101 | htmlpage(genelist, "test.html", "Some gene expression data", otherdata, 102 | table.head, repository = list("ug","gb","en")) 103 | 104 | if(!interactive()) 105 | file.remove("test.html") 106 | } 107 | \keyword{manip} 108 | -------------------------------------------------------------------------------- /vignettes/useProbeInfo.Rnw: -------------------------------------------------------------------------------- 1 | % \VignetteIndexEntry{Using Affymetrix Probe Level Data} 2 | % \VignetteDepends{hgu95av2.db, rae230a.db, rae230aprobe, Biostrings} 3 | % \VignetteKeywords{Annotation} 4 | %\VignettePackage{annotate} 5 | 6 | \documentclass{article} 7 | 8 | \newcommand{\Rfunction}[1]{{\texttt{#1}}} 9 | \newcommand{\Rmethod}[1]{{\texttt{#1}}} 10 | 11 | \newcommand{\Robject}[1]{{\texttt{#1}}} 12 | \newcommand{\Rpackage}[1]{{\textit{#1}}} 13 | \newcommand{\Rclass}[1]{{\textit{#1}}} 14 | 15 | \usepackage{hyperref} 16 | 17 | \usepackage[authoryear,round]{natbib} 18 | \usepackage{times} 19 | 20 | \begin{document} 21 | \title{Using Probe Information} 22 | 23 | \author{Robert Gentleman} 24 | \date{} 25 | \maketitle 26 | 27 | \section*{Overview} 28 | 29 | The Bioconductor project maintains a rich body of annotation data 30 | assembled into R libraries. For many different Affymetrix chips 31 | information is provided on both the sequence of the mRNA that was 32 | intended to be matched and the actual 25mers that were used for the 33 | bindings. In this vignette we show how to make use of the probe 34 | information. 35 | 36 | \section*{A Simple Example} 37 | 38 | To demonstrate the use of probe level data we will use the 39 | \texttt{rae230a} chip (for rats). So we first need to load these 40 | libraries. 41 | 42 | <>= 43 | library("annotate") 44 | library("rae230a.db") 45 | library("rae230aprobe") 46 | @ 47 | 48 | Now, we do not have any data so all we are going to do is to examine 49 | the probe data and show how to use some of the different Bioconductor 50 | tools to access that information, and potentially check on the mapping 51 | information that has been given. 52 | 53 | We will select a probe set, 54 | <>= 55 | 56 | ps = names(as.list(rae230aACCNUM)) 57 | 58 | myp = ps[1001] 59 | 60 | myA = get(myp, rae230aACCNUM) 61 | 62 | wp = rae230aprobe$Probe.Set.Name == myp 63 | myPr = rae230aprobe[wp,] 64 | 65 | @ 66 | 67 | The probe data is stored as a \Rclass{data.frame} with 6 columns. They 68 | are 69 | \begin{description} 70 | \item[sequence] The sequence of the 25mer 71 | \item[x] The x position of the probe on the array. 72 | \item[y] The y position of the probe on the array. 73 | \item[Probe.Set.Name] The Affymetrix ID for the probe set. 74 | \item[Probe.Interrogation.Position] The location (in bases) of the 75 | 13th base in the 25mer, in the target sequence. 76 | \item[Target.Strandedness] Whether the 25mer is a Sense or an 77 | Antisense match to the target sequence. 78 | \end{description} 79 | 80 | We note that it is not always the case that the sequence reported is 81 | found in the reference or if it is, it is not always at the location 82 | reported. One can check that using other tools available in the 83 | \Rpackage{annotate} package and in the \Rpackage{Biostrings} package. 84 | 85 | %%FIXME: need to check for connectivity 86 | <>= 87 | 88 | myseq = getSEQ(myA) 89 | nchar(myseq) 90 | 91 | library("Biostrings") 92 | mybs = DNAString(myseq) 93 | 94 | match1 = matchPattern(as.character(myPr[1,1]), mybs) 95 | match1 96 | as.matrix(ranges(match1)) 97 | myPr[1,5] 98 | @ 99 | And we can see that in this case the 13th nucleotide is indeed in 100 | exactly the place that has been predicted. 101 | 102 | 103 | One additional thing to note is that Affymetrix does not accurately report the strandedness of the 104 | probes, so it is necessary to check the reverse complement of the sequence prior to 105 | assuming that the probe does not interrogate the correct gene. 106 | 107 | <>= 108 | 109 | myp = ps[100] 110 | 111 | myA = get(myp, rae230aACCNUM) 112 | 113 | wp = rae230aprobe$Probe.Set.Name == myp 114 | 115 | myPr = rae230aprobe[wp,] 116 | 117 | myseq = getSEQ(myA) 118 | 119 | mybs = DNAString(myseq) 120 | 121 | Prstr = as.character(myPr[1,1]) 122 | 123 | match2 = matchPattern(Prstr, mybs) 124 | 125 | ## expecting 0 (no match) 126 | length(match2) 127 | 128 | match2 = matchPattern(reverseComplement(DNAString(Prstr)), mybs) 129 | 130 | nchar(match2) 131 | 132 | nchar(myseq) - as.matrix(ranges(match2)) 133 | myPr[1,5] 134 | @ 135 | 136 | Again, we see that the 13th nucleotide is exactly where predicted. It is relatively 137 | straightforward to check the other 25mers, and to develop different 138 | visualization tools that can be used to investigate the available data. 139 | 140 | \section*{Other Sources of Information} 141 | 142 | There are other tools available that may also be of some interest. For instance, the 143 | Mental Health Research Institute at the University of Michigan have various custom 144 | cdf files for Affymetrix data analysis that have been updated using more current annotation 145 | information from GenBank and Ensembl. 146 | 147 | \url {http://brainarray.mhri.med.umich.edu/Brainarray/Database/CustomCDF/genomic_curated_CDF.asp} 148 | 149 | The Weizmann Institute of Science have a database that can be queried to get the sensitivity and specificity 150 | for the probes on the Affymetrix HG-U95av2 chip. Although the information here is limited to a particular chip, 151 | this general idea is something that an enterprising end-user might want to replicate for other chips. 152 | 153 | \url {http://genecards.weizmann.ac.il/geneannot/} 154 | 155 | \section{Session Information} 156 | 157 | The version number of R and packages loaded for generating the vignette were: 158 | 159 | <>= 160 | sessionInfo() 161 | @ 162 | 163 | 164 | \end{document} 165 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | import(methods) 2 | 3 | import(BiocGenerics) 4 | 5 | importClassesFrom(Biobase, 6 | eSet) 7 | 8 | importClassesFrom(AnnotationDbi, 9 | FlatBimap) 10 | 11 | importMethodsFrom(DBI, dbGetQuery, dbListTables, dbListFields) 12 | 13 | importMethodsFrom(AnnotationDbi, 14 | Definition, 15 | GOID, 16 | Secondary, 17 | Synonym, 18 | colnames, 19 | columns, 20 | dbmeta, 21 | eapply, 22 | exists, 23 | get, 24 | ls, 25 | mappedRkeys, 26 | mget, 27 | ncol, 28 | nrow, 29 | Ontology, 30 | revmap, 31 | Term) 32 | 33 | importMethodsFrom(Biobase, 34 | annotation, 35 | contents, 36 | exprs, 37 | featureNames) 38 | 39 | importFrom(Biobase, 40 | addVigs2WinMenu) 41 | 42 | importFrom(graphics, 43 | abline, 44 | identify, 45 | plot) 46 | 47 | importFrom(stats, 48 | setNames, 49 | heatmap) 50 | 51 | importFrom(utils, 52 | browseURL, 53 | compareVersion, 54 | packageDescription, 55 | URLencode) 56 | 57 | importFrom(xtable, 58 | xtable) 59 | 60 | importFrom(httr, 61 | GET) 62 | 63 | importFrom(XML, 64 | getNodeSet, 65 | htmlParse, 66 | xmlChildren, 67 | xmlDOMApply, 68 | xmlErrorCumulator, 69 | xmlName, 70 | xmlParse, 71 | xmlRoot, 72 | xmlToDataFrame, 73 | xmlTreeParse, 74 | xmlValue, 75 | xpathApply, 76 | xpathSApply) 77 | 78 | exportClasses( 79 | chromLocation, 80 | FramedHTMLPage, 81 | homoData, 82 | HTMLPage, 83 | pubMedAbst 84 | ) 85 | 86 | exportMethods( 87 | abstText, 88 | articleTitle, 89 | authors, 90 | chromInfo, 91 | chromLengths, 92 | chromLocs, 93 | chromNames, 94 | dataSource, 95 | Definition, 96 | fileName, 97 | geneSymbols, 98 | GOID, 99 | homoACC, 100 | homoHGID, 101 | homoLL, 102 | homoOrg, 103 | homoPS, 104 | homoType, 105 | homoURL, 106 | initialize, 107 | journal, 108 | mainPage, 109 | nChrom, 110 | Ontology, 111 | organism, 112 | pageText, 113 | pageTitle, 114 | pmid, 115 | probesToChrom, 116 | pubDate, 117 | pubMedAbst, 118 | Secondary, 119 | show, 120 | sidePage, 121 | Synonym, 122 | Term, 123 | toFile, 124 | topPage 125 | ) 126 | 127 | export( 128 | .buildAnnotateOpts, 129 | .getIdTag, 130 | .getNcbiURL, 131 | .handleXML, 132 | .efetch, 133 | .transformAccession, 134 | ACC2homology, 135 | accessionToUID, 136 | ACCNUMStats, 137 | annPkgName, 138 | aqListGOIDs, 139 | buildChromLocation, 140 | buildPubMedAbst, 141 | checkArgs, 142 | chrCats, 143 | compatibleVersions, 144 | createLLChrCats, 145 | createMAPIncMat, 146 | dropECode, 147 | filterGOByOntology, 148 | findChr4LL, 149 | findNeighbors, 150 | genbank, 151 | getAnnMap, 152 | getBoundary, 153 | getEG, 154 | getEvidence, 155 | getGI, 156 | getGO, 157 | getGOChildren, 158 | getGOdesc, 159 | getGOOntology, 160 | getGOParents, 161 | getGOTerm, 162 | getGPLNames, 163 | getOntology, 164 | getOrgNameNCode, 165 | getPMID, 166 | getPMInfo, 167 | getQueryLink, 168 | getSAGEFileInfo, 169 | getSAGEGPL, 170 | getSEQ, 171 | getSYMBOL, 172 | getUniqAnnItem, 173 | getValidChr, 174 | GO2heatmap, 175 | GOmnplot, 176 | hasGOannote, 177 | HGID2homology, 178 | homoData, 179 | htmlpage, 180 | KEGG2heatmap, 181 | KEGGmnplot, 182 | LL2homology, 183 | lookUp, 184 | makeAnchor, 185 | mapOrgs, 186 | pm.abstGrep, 187 | pm.getabst, 188 | pm.titles, 189 | pmAbst2HTML, 190 | pmid2MIAME, 191 | PMIDAmat, 192 | pmidQuery, 193 | pubmed, 194 | PWAmat, 195 | readGEOAnn, 196 | readIDNAcc, 197 | readUrl, 198 | serializeDataPkgEnvs, 199 | serializeEnv, 200 | UniGeneQuery, 201 | entrezGeneByID, 202 | entrezGeneQuery, 203 | usedChromGenes, 204 | usedChromGenes, 205 | weightByConfi, 206 | whatACC, 207 | setRepository, 208 | getRepositories, 209 | clearRepository, 210 | isValidKey, 211 | allValidKeys, 212 | updateSymbolsToValidKeys, 213 | blastSequences 214 | ) 215 | -------------------------------------------------------------------------------- /R/html.R: -------------------------------------------------------------------------------- 1 | makeAnchor <- function(link, title, toMain=FALSE) { 2 | ## Takes a vector of links and a vector of titles - 3 | ## returns a vector of anchors. 4 | 5 | ## !! Should allow links to be URL objects as well as strings 6 | out <- paste("",title,"",sep="") 10 | out 11 | } 12 | 13 | 14 | ## A simple class to represent a basic "HTML Page", currently 15 | ## being naively stored as a block of text. 16 | setClass("HTMLPage", representation(fileName="character", 17 | pageText="character", 18 | pageTitle="character")) 19 | setMethod("fileName", "HTMLPage", function(object, ...) object@fileName) 20 | 21 | if (is.null(getGeneric("pageText"))) 22 | setGeneric("pageText", function(object, ...) 23 | standardGeneric("pageText")) 24 | 25 | setMethod("pageText", "HTMLPage", function(object, ...) 26 | object@pageText) 27 | 28 | if (is.null(getGeneric("pageTitle"))) 29 | setGeneric("pageTitle", function(object, ...) 30 | standardGeneric("pageTitle")) 31 | setMethod("pageTitle", "HTMLPage", function(object, ...) 32 | object@pageTitle) 33 | 34 | setMethod("show","HTMLPage", function(object) print(pageText(object))) 35 | 36 | if (is.null(getGeneric("toFile"))) 37 | setGeneric("toFile", function(object, ...) 38 | standardGeneric("toFile")) 39 | setMethod("toFile", "HTMLPage", function(object, ...) { 40 | cat(pageText(object), file=fileName(object)) 41 | }) 42 | 43 | ## Defines a basic framed page. We're using 3 frames, a top 44 | ## banner, a side navigation bar and a main page, much like the 45 | ## bioconductor website. The object also has it's own HTML page 46 | ## associated with it via HTMLPage inheritance. 47 | setClass("FramedHTMLPage", representation(topPage="HTMLPage", 48 | sidePage="HTMLPage", 49 | mainPage="HTMLPage"), 50 | contains="HTMLPage") 51 | 52 | if (is.null(getGeneric("topPage"))) 53 | setGeneric("topPage", function(object, ...) 54 | standardGeneric("topPage")) 55 | setMethod("topPage", "FramedHTMLPage", function(object, ...) 56 | object@topPage) 57 | 58 | if (is.null(getGeneric("sidePage"))) 59 | setGeneric("sidePage", function(object, ...) 60 | standardGeneric("sidePage")) 61 | setMethod("sidePage", "FramedHTMLPage", function(object, ...) 62 | object@sidePage) 63 | 64 | if (is.null(getGeneric("mainPage"))) 65 | setGeneric("mainPage", function(object, ...) 66 | standardGeneric("mainPage")) 67 | setMethod("mainPage", "FramedHTMLPage", function(object, ...) 68 | object@mainPage) 69 | 70 | setMethod("toFile", "FramedHTMLPage", function(object, ...) { 71 | toFile(topPage(object)) 72 | toFile(sidePage(object)) 73 | toFile(mainPage(object)) 74 | 75 | ## Is there a way to force a call to HTMLPage's 'toFile' here? 76 | cat(pageText(object), file=fileName(object)) 77 | }) 78 | 79 | setMethod("initialize", "FramedHTMLPage", 80 | function(.Object, topPage=new("HTMLPage"), 81 | sidePage=new("HTMLPage"), 82 | mainPage=new("HTMLPage"), 83 | fileName=new("character"), 84 | pageTitle=new("character")) { 85 | .Object@pageTitle <- pageTitle 86 | .Object@fileName <- fileName 87 | .Object@topPage <- topPage 88 | .Object@sidePage <- sidePage 89 | .Object@mainPage <- mainPage 90 | topName <- fileName(topPage(.Object)) 91 | sideName <- fileName(sidePage(.Object)) 92 | mainName <- fileName(mainPage(.Object)) 93 | 94 | out <- paste("","",sep="\n") 95 | t <- paste("",pageTitle(.Object),"") 96 | out <- paste(out,t,"", 97 | "", 98 | " ", 99 | " ", 100 | " ", 101 | " ", 102 | " "," "," <body>","", 103 | " <p>This page uses frames, but your browser doesn't support them.</p>", 104 | "", " </body>"," ", 105 | "","", 106 | sep="\n") 107 | .Object@pageText <- out 108 | .Object 109 | }) 110 | -------------------------------------------------------------------------------- /R/blastSequences.R: -------------------------------------------------------------------------------- 1 | .blastSequencesToDNAMultipleAlignment <- function(xml) { 2 | loadNamespace("Biostrings") 3 | loadNamespace("IRanges") 4 | qseq <- xpathSApply(xml, "//Hsp_qseq", xmlValue) 5 | hseq <- xpathSApply(xml, "//Hsp_hseq", xmlValue) 6 | res <- vector("list", length(qseq)) 7 | for(i in seq_along(qseq)){ 8 | res[[i]] <- Biostrings::DNAMultipleAlignment( 9 | c(hseq[[i]],qseq[[i]]), 10 | rowmask=as(IRanges::IRanges(), "NormalIRanges"), 11 | colmask=as(IRanges::IRanges(), "NormalIRanges")) 12 | } 13 | res 14 | } 15 | 16 | .blastSequencesToDataFrame <- function(xml) { 17 | if (xpathSApply(xml, "count(//Hit)") == 0L) { 18 | message("'blastSequences' returned 0 matches") 19 | return(data.frame()) 20 | } 21 | 22 | iter <- xml["//Iteration"] 23 | iterlen <- sapply(iter, xpathSApply, "count(.//Hsp)") 24 | iterdf <- xmlToDataFrame(iter, stringsAsFactors=FALSE) 25 | 26 | hit <- xml["//Hit"] 27 | hitlen <- sapply(hit, xpathSApply, "count(.//Hsp)") 28 | hitdf <- xmlToDataFrame(hit, stringsAsFactors=FALSE) 29 | hitdf <- hitdf[, names(hitdf) != "Hit_hsps", drop=FALSE] 30 | 31 | hsp <- xmlToDataFrame(xml["//Hsp"] , stringsAsFactors=FALSE) 32 | 33 | df <- cbind( 34 | iterdf[rep(seq_len(nrow(iterdf)), iterlen),, drop=FALSE], 35 | hitdf[rep(seq_len(nrow(hitdf)), hitlen),, drop=FALSE], 36 | hsp) 37 | rownames(df) <- NULL 38 | df 39 | } 40 | 41 | .tryParseResult <- function(baseUrl, rid, rtoe, timeout) { 42 | message("estimated response time ", rtoe, " seconds") 43 | start <- Sys.time() 44 | end <- Sys.time() + timeout 45 | url <- sprintf("%s?CMD=Get&FORMAT_OBJECT=SearchInfo&RID=%s", 46 | baseUrl, rid) 47 | Sys.sleep(min(rtoe, timeout)) 48 | repeat { 49 | elapsed <- as.double(Sys.time() - start, units="secs") 50 | ## RCurl::getURL(url, followlocation=TRUE) has issues. 51 | ## See getURL2() in R/query.R 52 | result <- as(htmlParse(getURL2(url), 53 | error = xmlErrorCumulator(immediate=FALSE)), 54 | "character") 55 | 56 | if (grepl("Status=FAILED", result)) 57 | stop("BLAST search failed") 58 | else if (grepl("Status=UNKNOWN", result)) 59 | stop("BLAST search expired") 60 | else if (grepl("Status=READY", result)) { 61 | url <- sprintf("%s?RID=%s&FORMAT_TYPE=XML&CMD=Get", baseUrl, rid) 62 | ## RCurl::getURL(url, followlocation=TRUE) has issues. 63 | ## See getURL2() in R/query.R 64 | result <- xmlParse(getURL2(url), 65 | error = xmlErrorCumulator(immediate=FALSE)) 66 | return(result) 67 | } else if (grepl("Status=WAITING", result)) { 68 | message(sprintf("elapsed time %.0f seconds", elapsed)) 69 | if (Sys.time() > end && interactive()) { 70 | msg <- sprintf("wait another %d seconds? [y/n] ", timeout) 71 | repeat { 72 | ans <- substr(trimws(tolower(readline(msg))), 1, 1) 73 | if (ans %in% c("y", "n")) 74 | break 75 | } 76 | if (ans == "n") 77 | break 78 | end <- Sys.time() + timeout 79 | } 80 | Sys.sleep(10) 81 | } else 82 | stop("BLAST search unknown response") 83 | } 84 | msg <- sprintf("'blastSequences' timeout after %.0f seconds", 85 | elapsed) 86 | stop(msg, call.=FALSE) 87 | } 88 | 89 | ## Using the REST-ish API described at 90 | ## http://www.ncbi.nlm.nih.gov/blast/Doc/node2.html 91 | blastSequences <- function(x, database="nr", 92 | hitListSize="10", 93 | filter="L", 94 | expect="10", 95 | program="blastn", 96 | timeout=40, 97 | as=c("DNAMultipleAlignment", "data.frame", "XML")) 98 | { 99 | PARSE <- switch(match.arg(as), 100 | DNAMultipleAlignment=.blastSequencesToDNAMultipleAlignment, 101 | data.frame=.blastSequencesToDataFrame, 102 | XML=identity) 103 | ## TODO: lots of argument checking and testing. Also, 104 | ## depending on which program string is used we need to make the correct 105 | ## kind of object at the end (so blastn means DNAMultipleAlignment, and 106 | ## blastp means AAMultipleAlignment etc. 107 | 108 | ## So: 109 | ## 1) get online values these parameters can be 110 | ## 2) document those 111 | ## 3) restrict their vals in the code here. 112 | ## 4) for program, use this to determine what object is returned. 113 | 114 | ## assemble the query 115 | baseUrl <- "https://www.ncbi.nlm.nih.gov/blast/Blast.cgi" 116 | query <- paste("QUERY=", URLencode(as.character(x)), "&DATABASE=",database, 117 | "&HITLIST_SIZE=",hitListSize,"&FILTER=",filter, 118 | "&EXPECT=",expect,"&PROGRAM=",program, sep="") 119 | url0 <- sprintf("%s?%s&CMD=Put", baseUrl, query) 120 | ## RCurl::getURL(url, followlocation=TRUE) has issues. 121 | ## See getURL2() in R/query.R 122 | post <- htmlParse(getURL2(url0)) 123 | 124 | x <- post[['string(//comment()[contains(., "QBlastInfoBegin")])']] 125 | rid <- sub(".*RID = ([[:alnum:]]+).*", "\\1", x) 126 | rtoe <- as.integer(sub(".*RTOE = ([[:digit:]]+).*", "\\1", x)) 127 | result <- .tryParseResult(baseUrl, rid, rtoe, timeout) 128 | PARSE(result) 129 | } 130 | 131 | ## took 11.5 minutes to do a blast... (ugh) 132 | -------------------------------------------------------------------------------- /R/homoData.R: -------------------------------------------------------------------------------- 1 | ### homoData objects are used by homoPkgBuilder to represent homology data 2 | 3 | setClass("homoData", representation(homoOrg = "character", 4 | homoLL = "numeric", 5 | homoType = "character", 6 | homoPS = "numeric", 7 | homoURL = "character", 8 | homoACC = "character", 9 | homoHGID = "numeric")) 10 | 11 | # Set the get methods 12 | setGeneric("homoOrg", 13 | function(object) standardGeneric("homoOrg")) 14 | 15 | setMethod("homoOrg", "homoData", 16 | function(object) object@homoOrg) 17 | 18 | setGeneric("homoLL", 19 | function(object) standardGeneric("homoLL")) 20 | 21 | setMethod("homoLL", "homoData", 22 | function(object) object@homoLL) 23 | 24 | setGeneric("homoType", 25 | function(object) standardGeneric("homoType")) 26 | 27 | setMethod("homoType", "homoData", 28 | function(object) object@homoType) 29 | 30 | setGeneric("homoPS", 31 | function(object) standardGeneric("homoPS")) 32 | 33 | setMethod("homoPS", "homoData", 34 | function(object) object@homoPS) 35 | 36 | setGeneric("homoURL", 37 | function(object) standardGeneric("homoURL")) 38 | 39 | setMethod("homoURL", "homoData", 40 | function(object) object@homoURL) 41 | 42 | setGeneric("homoACC", 43 | function(object) standardGeneric("homoACC")) 44 | 45 | setMethod("homoACC", "homoData", 46 | function(object) object@homoACC) 47 | 48 | setGeneric("homoHGID", 49 | function(object) standardGeneric("homoHGID")) 50 | 51 | setMethod("homoHGID", "homoData", 52 | function(object) object@homoHGID) 53 | 54 | setMethod("show", "homoData", 55 | function(object) { 56 | if(length(homoOrg(object)) > 0 && !is.na(homoOrg(object))){ 57 | cat(paste("homoOrg:", homoOrg(object)), fill = TRUE) 58 | } 59 | if(length(homoLL(object)) > 0 && !is.na(homoLL(object))){ 60 | cat(paste("\nhomoLL:", homoLL(object)), fill = TRUE) 61 | } 62 | if(length(homoHGID(object)) > 0 && !is.na(homoHGID(object))){ 63 | cat(paste("\nhomoHGID:", homoHGID(object)), fill = TRUE) 64 | } 65 | if(length(homoACC(object)) > 0 && !is.na(homoACC(object))){ 66 | cat(paste("\nhomoACC:", homoACC(object)), fill = TRUE) 67 | } 68 | if(length(homoType(object)) > 0 && !is.na(homoType(object))){ 69 | cat(paste("\nhomoType:", homoType(object)), fill = TRUE) 70 | } 71 | if(length(homoPS(object)) > 0 && !is.na(homoPS(object))){ 72 | cat(paste("\nhomoPS:", homoPS(object)), fill = TRUE) 73 | } 74 | if(length(homoURL(object)) > 0 && !is.na(homoURL(object))){ 75 | cat(paste("\nhomoURL:", homoURL(object)), fill = TRUE) 76 | } 77 | cat("\n") 78 | }) 79 | 80 | mapOrgs <- function(toMap, what = c("code", "name")){ 81 | fun <- function(x){ 82 | if(what == "code"){ 83 | return(orgs[[x]]) 84 | }else{ 85 | return(names(orgs[orgs == x])) 86 | } 87 | } 88 | what <- match.arg(what) 89 | orgs <- getOrgNameNCode() 90 | if(is.null(toMap) || is.na(toMap)){ 91 | return(NA) 92 | } 93 | if(length(toMap) == 1){ 94 | return(fun(toMap)) 95 | }else{ 96 | return(sapply(toMap, fun)) 97 | } 98 | } 99 | 100 | getOrgNameNCode <- function(){ 101 | return(list("3055" = "Chlamydomonas reinhardtii", 102 | "3702" = "Arabidopsis thaliana", 103 | "3847" = "Glycine max", 104 | "3880" = "Medicago truncatula", 105 | "4081" = "Lycopersicon esculentum", 106 | "4513" = "Hordeum vulgare", 107 | "4530" = "Oryza sativa", 108 | "4565" = "Triticum aestivum", 109 | "4577" = "Zea mays", 110 | "4896" = "Schizosaccharomyces pombe", 111 | "4932" = "Saccharomyces cerevisiae", 112 | "5141" = "Neurospora crassa", 113 | "5833" = "Plasmodium falciparum", 114 | "6239" = "Caenorhabditis elegans", 115 | "7165" = "Anopheles gambiae", 116 | "7227" = "Drosophila melanogaster", 117 | "7719" = "Ciona intestinalis", 118 | "7955" = "Danio rerio", 119 | "8022" = "Oncorhynchus mykiss", 120 | "8090" = "Oryzias latipes", 121 | "8355" = "Xenopus laevis", 122 | "8364" = "Xenopus tropicalis", 123 | "9031" = "Gallus gallus", 124 | "9606" = "Homo sapiens", 125 | "9615" = "Canis familiaris", 126 | "9598" = "Pan troglodytes", 127 | "9823" = "Sus scrofa", 128 | "9913" = "Bos taurus", 129 | "10090" = "Mus musculus", 130 | "10116" = "Rattus norvegicus", 131 | "28985" = "Kluyveromyces, lactis", 132 | "29760" = "Vitis vinifera", 133 | "33169" = "Eremothecium gossypii", 134 | "44689" = "Dictyostelium discoideum", 135 | "148305" = "Magnaporthe grisea" 136 | )) 137 | } 138 | 139 | 140 | homoData <- function(organism, LL, type, PS, ACC, HGID, URL){ 141 | return(new("homoData", homoOrg = mapOrgs(organism), 142 | homoLL = LL, homoType = type, 143 | homoPS = PS, homoURL = URL, 144 | homoACC = ACC, homoHGID = HGID)) 145 | } 146 | -------------------------------------------------------------------------------- /R/Amat.R: -------------------------------------------------------------------------------- 1 | ##copyright 2004 R. Gentleman, all rights reserved 2 | 3 | ##given the name of chip compute the pathway adjacency matrix for LLids 4 | PWAmat = function(data) { 5 | if(!is.character(data) || length(data) != 1 ) 6 | stop("wrong argument") 7 | 8 | if( length(grep("^org\\..+\\.sgd$", data))>=1 ){ 9 | dataE = getAnnMap("PATH2ORF", data, load=TRUE, type="db") 10 | } 11 | else if ( length(grep("^org\\..+\\.eg$", data))>=1 ){ 12 | dataE = getAnnMap("PATH2EG", data, load=TRUE, type="db") 13 | } 14 | else { 15 | dataE = getAnnMap("PATH2PROBE", data, load=TRUE, type=c("db")) 16 | } 17 | 18 | if( data == "YEAST" || length(grep("^org\\..+", data))>=1 ) 19 | pathLL = as.list(dataE) 20 | else { 21 | pathLL = eapply(dataE, function(x) { 22 | x = x[!is.na(x)] 23 | if(length(x)>0){ 24 | LLs = getEG(x, data) 25 | LLs = LLs[!is.na(LLs)] 26 | unique(LLs) } 27 | }) 28 | } 29 | uniqLL = unique(unlist(pathLL,use.names=FALSE)) 30 | Amat = sapply(pathLL, function(x) { 31 | mtch = match(x, uniqLL) 32 | zeros = rep(0, length(uniqLL)) 33 | zeros[mtch] = 1 34 | zeros}) 35 | dimnames(Amat) = list(uniqLL, names(pathLL)) 36 | return(Amat) 37 | } 38 | 39 | 40 | ##given the name of chip compute the PubMed adjacency matrix for probe set ids 41 | PMIDAmat = function(pkg, gene=NULL) { 42 | if(!is.character(pkg) || length(pkg) != 1 ) 43 | stop("wrong argument") 44 | 45 | probe2pmid <- get(paste(pkg, "PMID", sep="")) 46 | if(is.null(gene)){ 47 | gene2pmid <- as.list(probe2pmid) 48 | }else{ 49 | if(any(duplicated(gene))) warning("Gene is not unique.") 50 | gene2pmid <- mget(unique(gene), probe2pmid, ifnotfound=NA) 51 | } 52 | pmid <- unique(unlist(gene2pmid)) 53 | 54 | Amat <- sapply(gene2pmid, 55 | function(x){ 56 | mtch <- match(x, pmid) 57 | zeros <- rep(0, length(pmid)) 58 | zeros[mtch] <- 1 59 | return(zeros) 60 | } 61 | ) 62 | dimnames(Amat) = list(pmid, names(gene2pmid)) 63 | return(Amat) 64 | } 65 | 66 | 67 | ##given a GO term, and an exprset, produce a heatmap of all probes 68 | ##mapped to that GOterm; 69 | GO2heatmap = function(x, eset, data, ...) { 70 | if( missing(data) ) 71 | data = annotation(eset) 72 | mapE = get(paste(data, "GO2ALLPROBES", sep="")) 73 | 74 | whG = mapE[[x]] 75 | ##need this because there could be multiple criteria 76 | whG = unique(whG) 77 | whGs = whG[whG %in% featureNames(eset)] 78 | 79 | dataM = exprs(eset)[whGs,] 80 | heatmap(dataM, ...) 81 | } 82 | 83 | GOmnplot = function (x, eset, data = "hgu133plus2", group, ...) 84 | { 85 | mapE = get(paste(data, "GO2ALLPROBES", sep = "")) 86 | whG = mapE[[x]] 87 | whG = unique(whG) 88 | whGs = whG[whG %in% featureNames(eset)] 89 | dataM = exprs(eset)[whGs, ] 90 | tts = apply(dataM, 1, function(x) sapply(split(x, group), mean)) 91 | rn = row.names(tts) 92 | if( length(levels(factor(group))) != 2 ) 93 | stop("only works for factors with two levels") 94 | plot(tts[1,], tts[2,], xlab=rn[1], ylab=rn[2], ...) 95 | abline(a=0, b=1) 96 | return(tts) 97 | } 98 | 99 | setGeneric("KEGG2heatmap", function(x, eset, data, ...) 100 | standardGeneric("KEGG2heatmap")) 101 | 102 | setMethod("KEGG2heatmap", c("character", "eSet", "character"), 103 | function(x, eset, data, ...) { 104 | if( missing(data) ) 105 | data = annotation(eset) 106 | mapE = get(paste(data, "PATH2PROBE", sep = "")) 107 | whG = mapE[[x]] 108 | whG = unique(whG) 109 | whGs = whG[whG %in% featureNames(eset)] 110 | dataM = exprs(eset)[whGs, ] 111 | heatmap(dataM, ...) 112 | }) 113 | 114 | setMethod("KEGG2heatmap", c("character", "matrix", "character"), 115 | function(x, eset, data, ...) { 116 | mapE = get(paste(data, "PATH2PROBE", sep = "")) 117 | whG = mapE[[x]] 118 | whG = unique(whG) 119 | whGs = whG[whG %in% row.names(eset)] 120 | dataM = eset[whGs, ] 121 | heatmap(dataM, ...) 122 | }) 123 | 124 | 125 | setGeneric("KEGGmnplot", function(x, eset, data= "hgu133plus2", group, 126 | ...) standardGeneric("KEGGmnplot")) 127 | 128 | setMethod("KEGGmnplot", c("character", "eSet", "character"), 129 | function (x, eset, data = "hgu133plus2", group, ...) { 130 | mapE = get(paste(data, "PATH2PROBE", sep = "")) 131 | whG = mapE[[x]] 132 | whG = unique(whG) 133 | whGs = whG[whG %in% featureNames(eset)] 134 | dataM = exprs(eset)[whGs, ] 135 | tts = apply(dataM, 1, function(x) sapply(split(x, group), mean)) 136 | rn = row.names(tts) 137 | if( length(levels(factor(group))) != 2 ) 138 | stop("only works for factors with two levels") 139 | plot(tts[1,], tts[2,], xlab=rn[1], ylab=rn[2], ...) 140 | abline(a=0, b=1) 141 | return(tts) 142 | }) 143 | 144 | setMethod("KEGGmnplot", c("character", "matrix", "character"), 145 | function (x, eset, data = "hgu133plus2", group, ...) { 146 | mapE = get(paste(data, "PATH2PROBE", sep = "")) 147 | whG = mapE[[x]] 148 | whG = unique(whG) 149 | whGs = whG[whG %in% row.names(eset)] 150 | dataM = eset[whGs, ] 151 | tts = apply(dataM, 1, function(x) sapply(split(x, group), mean)) 152 | rn = row.names(tts) 153 | if( length(levels(factor(group))) != 2 ) 154 | stop("only works for factors with two levels") 155 | plot(tts[1,], tts[2,], xlab=rn[1], ylab=rn[2], ...) 156 | abline(a=0, b=1) 157 | return(tts) 158 | }) 159 | 160 | 161 | -------------------------------------------------------------------------------- /R/isValidKey.R: -------------------------------------------------------------------------------- 1 | ##Helper function for schema checking: 2 | .defineBaseSelectSQL <- function(schema, conn){ 3 | ##schema <- dbmeta(conn, "DBSCHEMA") 4 | ##centralID <- dbmeta(conn, "CENTRALID") 5 | if(schema == "YEAST_DB"){ 6 | sql <- "select distinct systematic_name from sgd where systematic_name != 'NA';" 7 | }else if(length(grep("CHIP_DB$", schema))==1 ){ #All chip packages have a probes table with probe_ids 8 | sql <- "select distinct probe_id from probes;" 9 | }else if(length(grep("NOSCHEMA", schema))==1 ){ ## NOSCHEMA can have weird columns 10 | toget <- dbListFields(conn, "genes")[2] 11 | sql <- paste("select distinct", toget, "from genes;") 12 | }else if(length(grep("_DB$", schema))==1 && length(grep("CHIP_DB$", schema))==0){ 13 | sql <- "select distinct gene_id from genes;" 14 | }else{ 15 | stop("Unidentified database schema. Cannot find central table. May need to add schema options to isValidKey().") 16 | } 17 | return(sql) 18 | } 19 | 20 | ##Given a list of IDs and a package, are these IDs valid primary IDs for this package? 21 | setMethod("isValidKey", c("character", "character"), 22 | function(ids, pkg){ 23 | ##argument checking 24 | if(!is.character(ids)) stop("'ids' must be a character vector of IDs that you wish to validate") 25 | ##access the DB, get the primary IDs, and then test if they are in your list of ids 26 | require(paste(pkg, ".db",sep=""),character.only = TRUE) 27 | conn <- do.call(paste(pkg, "_dbconn", sep=""), list()) 28 | schema <- dbmeta(conn, "DBSCHEMA") 29 | sql <- .defineBaseSelectSQL(schema, conn) 30 | res <- dbGetQuery(conn, sql) 31 | res <- as.vector(res[,1])#slice to grab result which will always be a single column (based on the sql queries) 32 | return(ids %in% res) 33 | }) 34 | 35 | setMethod("isValidKey", c("character","OrgDb"), 36 | function(ids, pkg){ 37 | conn <- dbconn(pkg) 38 | schema <- dbmeta(conn, "DBSCHEMA") 39 | sql <- .defineBaseSelectSQL(schema, conn) 40 | res <- dbGetQuery(conn, sql) 41 | res <- as.vector(res[,1]) 42 | return(ids %in% res) 43 | }) 44 | 45 | 46 | ##Given a package, what are all the unique valid primary IDs for this package? 47 | setMethod("allValidKeys", "character", 48 | function(pkg){ 49 | ##access the DB and get all the primary IDs, (unique constraint already on the field being sought) 50 | require(paste(pkg, ".db",sep=""),character.only = TRUE) 51 | conn <- do.call(paste(pkg, "_dbconn", sep=""), list()) 52 | schema <- dbmeta(conn, "DBSCHEMA") 53 | sql <- .defineBaseSelectSQL(schema, conn) 54 | res <- dbGetQuery(conn, sql) 55 | res <- as.vector(res[,1])#slice to grab result which will always be a single column (based on the sql queries) 56 | return(res) 57 | }) 58 | 59 | setMethod("allValidKeys", "OrgDb", 60 | function(pkg){ 61 | conn <- dbconn(pkg) 62 | schema <- dbmeta(conn, "DBSCHEMA") 63 | sql <- .defineBaseSelectSQL(schema, conn) 64 | res <- dbGetQuery(conn, sql) 65 | res <- as.vector(res[,1])#slice to grab result which will always be a single column (based on the sql queries) 66 | return(res) 67 | }) 68 | 69 | 70 | ##Given a list of gene symbols, return the primary ID (or probe if its a chip package) that should be used. 71 | ##If there was a symbol or ID in the original list that we don't have a better ID for, keep the original symbol... 72 | ##Because of the many to one nature of probes to genes, it will NOT be possible to support CHIP packages with this function. 73 | updateSymbolsToValidKeys = function(symbols, pkg) { 74 | #argument checking 75 | if(!is.character(symbols)) stop("'symbols' must be a character vector of gene symbols that you wish to translate to the primary ID of the package") 76 | require(paste(pkg, ".db",sep=""),character.only = TRUE) 77 | 78 | ##Check the schema 79 | conn <- do.call(paste(pkg, "_dbconn", sep=""), list()) 80 | schema <- dbmeta(conn, "DBSCHEMA") 81 | 82 | ##'pkg' cannot be a chip package. 83 | if(length(grep("CHIP_DB$", schema))>=1){ 84 | stop("Because of the many to many relationship that can exist between probes and IDs, this function can only work with the organism level packages which can ensure that there is only one most valid ID per gene symbol.") 85 | } 86 | 87 | ##Do the right thing depending on what type of package this is. 88 | if(length(grep("^YEAST", schema))>=1){ 89 | ##if its yeast... 90 | rr1 = mappedRkeys(eval(parse(text=paste(pkg, "ALIAS", sep="")))) 91 | r2 = revmap(eval(parse(text=paste(pkg, "ALIAS", sep="")))) 92 | }else if(length(grep("^ARABIDOPSIS", schema))>=1){ 93 | stop("Sorry, but the Arabidopsis packages do not have alias information at this time.") 94 | }else if(length(grep("^MALARIA", schema))>=1){#MALARIA packages are not entrez gene based 95 | r2 = eval(parse(text=paste(pkg, "ALIAS2ORF", sep=""))) 96 | rr1 = mappedRkeys(revmap(eval(parse(text=paste(pkg, "ALIAS2ORF", sep=""))))) 97 | }else{ #so far everything other than yeast and malaria should have reversed alias map and eg base 98 | ##so if its something other than yeast we need to do this... 99 | r2 = eval(parse(text=paste(pkg, "ALIAS2EG", sep=""))) 100 | rr1 = mappedRkeys(revmap(eval(parse(text=paste(pkg, "ALIAS2EG", sep=""))))) 101 | } 102 | 103 | mA = match(symbols, rr1) 104 | wh = rr1[mA[!is.na(mA)]] 105 | 106 | mB = unlist(mget(wh, r2)) 107 | symbols[match(names(mB), symbols)] = mB 108 | return(symbols) 109 | } 110 | 111 | 112 | 113 | ## ##TEST examples: 114 | ## fu <- c("15S_rRNA_2","21S_rRNA_4","15S_rRNA") 115 | ## isValidKey(fu, "org.Sc.sgd") 116 | ## updateSymbolsToValidKeys(fu, "org.Sc.sgd") 117 | 118 | ## sna <- c("1769325_at","altSymbol") 119 | ## isValidKey(sna, "yeast2") 120 | 121 | ## bar <- c("MAPK11","P38B","FLJ45465", "altSymbol") 122 | ## isValidKey(bar, "org.Hs.eg") 123 | ## updateSymbolsToValidKeys(bar, "org.Hs.eg") 124 | 125 | ## foo <- c("1396.pre-tRNA-Met-1", "1396.t00553", "altSymbol") 126 | ## updateSymbolsToValidKeys(foo, "org.Pf.plasmo") 127 | ## isValidKey(foo, "org.Pf.plasmo") 128 | -------------------------------------------------------------------------------- /man/chrCats.Rd: -------------------------------------------------------------------------------- 1 | \name{chrCats} 2 | \alias{chrCats} 3 | 4 | \alias{createLLChrCats} 5 | \alias{createMAPIncMat} 6 | 7 | \title{Returns a list of chromosome locations from a MAP environment} 8 | \description{ 9 | The \code{chrCats} function takes a data package that contains a \code{MAP} environment 10 | and returns a list that contains the locations for each gene (from the 11 | chromosome number to more specific locations if they're available). For 12 | example, the \code{hgu95av2MAP} environment gives the location, 14q22-q23, for 13 | Affymetrix identifier: \code{1114_at}. This function will return a list with 14 | one named element for \code{1114_at} and the values it will contain are 14, 15 | 14q, 14q2, 14q22, and 14q23 since the Affy id is located at each of those 16 | chromosome locations. 17 | } 18 | \usage{ 19 | chrCats(data) 20 | createMAPIncMat(data) 21 | createLLChrCats(data) 22 | } 23 | \arguments{ 24 | \item{data}{the data package (a character string)} 25 | } 26 | \details{ 27 | This function does a lot of string manipulation and there are a few known 28 | errors so I want to discuss them here in case someone else would like to 29 | improve on this function. 30 | 31 | The first thing, \code{chrCats}, does is only allow one location for each 32 | Affymetrix identifier. If the \code{MAP} environment has more than one 33 | location for an Affy id, then the first location is taken. Currently, the 34 | \code{hgu95av2MAP} environment has only 9 Affy ids (out of 12625) that have more 35 | than one location and the \code{hgu133aMAP} environment has only 16 Affy ids (out 36 | of 22283) that have more than one location so this does not affect many 37 | identifiers. 38 | 39 | Next any spaces are removed from each location as several locations have 40 | leading spaces. 41 | 42 | Then a \code{for} loop (which is not efficient!) is used to look at each location 43 | individually and make a list that will be returned. A few particular 44 | strings are looked for in each location and these include \code{'|'} and \code{'-'}. 45 | 46 | Locations that include \code{'|'} in the string are split based on the \code{'|'} as 47 | though it represents OR. For example, for Affy id, \code{32273_at}, in \code{hgu95av2MAP} 48 | the location is given as 5q33|5q31.1 and this function assumes this means 49 | 5q33 or 5q31.1 so it will return the values 5, 5q, 5q3, 5q33, 5q31, and 50 | 5q31.1 for this Affy id. 51 | 52 | The \code{'-'} character is assumed to mean BETWEEN. For example, for Affy id, 53 | \code{1138_at}, in \code{hgu95av2MAP} the location is given as 2q11-q14 and this function 54 | assumes this means the location is somewhere between 2q11 and 2q14 so it 55 | will return the values 2, 2q, 2q1, 2q11, 2q12, 2q13, and 2q14 for this Affy 56 | id. 57 | 58 | Now here is the first problem with this function. I do not know how to 59 | handle the \code{'-'} when the two strings are not of equal length. For example, 60 | for Affy id, \code{36779_at}, in \code{hgu95av2MAP} the location is given as 5q33.3-q34, 61 | but I do not know how to treat this BETWEEN because I do not know how many 62 | sub-bands there are between 5q33.3 and 5q34. Is there a 5q33.4 or 5q33.5, 63 | etc.? I'm not sure. So I treat this \code{'-'} as an \code{'|'}. This function will 64 | return the values 5, 5q, 5q3, 5q33, 5q33.3, and 5q34 for this Affy id and 65 | most likely, that is incorrect. 66 | 67 | Another problem I have with the \code{'-'} occurs when all of the characters up 68 | until the last character do not match. For example, for Affy id, 69 | \code{38927_i_at}, in \code{hgu95av2MAP} the location is given as 11q14-q21, but again 70 | I'm not sure how to treat this BETWEEN because I don't know the number of 71 | sub-bands between 11q14 and 11q21. Does 11q15 exist, etc.? So I again 72 | treat this \code{'-'} as an \code{'|'}. This function will return the values 11, 11q, 73 | 11q1, 11q14, 11q2, and 11q21 for this Affy id and this is probably 74 | incorrect. 75 | 76 | The problem with \code{'-'} also occurs when the location is something like 77 | 19cen-q13.1 for Affy id, \code{34670_at}, in \code{hgu95av2MAP}. Again I don't know the 78 | number of sub-bands between 19cen and 19q13.1 so I treat this BETWEEN as an 79 | OR. 80 | 81 | Another problem I have with \code{'cen'} in the location is that sometimes the 82 | location looks like: 19p13.2-cen and very rarely it looks like: 83 | 5p13.1-5cen. In the second case, the chromosome number is included after 84 | the \code{'-'} and before the \code{'cen'}. This only occurs with the location 85 | 5p13.1-5cen in both \code{hgu95av2MAP} and \code{hgu133aMAP} and all other locations do 86 | not include the chromosome number after the \code{'-'}. Currently this function 87 | returns the wrong information for that one location. It will return the 88 | values 5, 5p, 5p1, 5p13, 5p13.1, 5p5,and 5p5cen, but it should return 5, 5p, 89 | 5p1, 5p13, 5p13.1, and 5cen so this one location is an error. All other 90 | locations that include \code{'cen'} are correct. For example, this function 91 | returns the values 19, 19p, 19p1, 19p13, 19p13.2, and 19cen for the location 92 | 19p13.2-cen. 93 | 94 | This function is very slow because it contains \code{for} loops and thus, it would 95 | be useful to make it more efficient. Also, it would be nice at some point 96 | for someone with more knowledge on chromosome location figure out how to 97 | improve some of my string manipulation errors. 98 | 99 | \code{createLLChrCats} is a wrapper that converts probe IDs to Entrez 100 | Gene IDs. 101 | 102 | \code{createMAPIncMat} is a wrapper that calls \code{createLLChrCats} 103 | and then returns an incidence matrix with rows being the categories 104 | and cols the Entrez Gene IDs. 105 | } 106 | \value{ 107 | A named list with an element for each Affy id. The name will be the Affy id 108 | and the values will be the locations for that Affy id. If the Affy id had a 109 | location of \code{NA} in the \code{MAP} environment, then a list element is not returned 110 | for that Affy id. 111 | } 112 | \author{Elizabeth Whalen} 113 | \examples{ 114 | library("hgu95av2.db") 115 | mapValues <- chrCats("hgu95av2") 116 | } 117 | \keyword{data} 118 | 119 | -------------------------------------------------------------------------------- /R/GOhelpers.R: -------------------------------------------------------------------------------- 1 | ##Copyright R. Gentleman, 2004 2 | ##simple functions to get Evidence codes 3 | 4 | .isMissingGOEntry <- function(x) (length(x) == 1L && is.na(x)) 5 | 6 | ##get then GO term names for a particular (sub)ontology 7 | getOntology = function(inlist, ontology=c("MF", "BP", "CC")) { 8 | which = match.arg(ontology) 9 | onts = sapply(inlist, function(z) { 10 | if (!.isMissingGOEntry(z)) 11 | z$Ontology 12 | else 13 | z 14 | }) 15 | onts = onts[!is.na(onts)] 16 | unique(names(inlist[onts %in% which])) 17 | } 18 | 19 | 20 | ##get GO evidence codes 21 | getEvidence = function(inlist) { 22 | ans <- sapply(inlist, function(z) { 23 | if (!.isMissingGOEntry(z)) 24 | z$Evidence 25 | else 26 | z 27 | }) 28 | ans[!is.na(ans)] 29 | } 30 | 31 | 32 | ##drop a specified set of evidence codes 33 | dropECode = function(inlist, code = "IEA") { 34 | hasCode = sapply(inlist, function(z) { 35 | if (!.isMissingGOEntry(z)) 36 | z$Evidence 37 | else 38 | z 39 | }) 40 | hasCode <- hasCode[!is.na(hasCode)] 41 | badVals = hasCode %in% code 42 | inlist[!badVals] 43 | } 44 | 45 | 46 | ## helper function, determines if there is a GO annotation for the 47 | ## desired mode 48 | hasGOannote <- function(x, which="MF") { 49 | if (is(x, "GOTerms")) { 50 | cat <- Ontology(x) 51 | if (!is.na(cat) && cat == which) 52 | return(TRUE) else return(FALSE) 53 | } 54 | if (is.list(x)) { 55 | gT <- sapply(x, function(y) is(y, "GOTerms")) 56 | if (any(gT)) { 57 | if (all(gT)) { 58 | cats <- sapply(x, Ontology) 59 | return(cats == which) 60 | } 61 | else 62 | stop("mixed arguments not allowed") 63 | } 64 | } 65 | if (!is.character(x)) 66 | stop("wrong argument") 67 | tm <- getGOOntology(x) 68 | return(tm == which) 69 | } 70 | 71 | 72 | ##three functions to get all the GO information for a set of GO terms 73 | ##FIXME: these need to be renovated - probably removed even.. 74 | getGOOntology <- function(x) { 75 | if( !is.character(x) ) 76 | stop("need a character argument") 77 | if(length(x) == 0 ) 78 | return( character(0)) 79 | loadNamespace("GO.db") 80 | wh <- mget(x, envir=GO.db::GOTERM, ifnotfound=NA) 81 | return( sapply(wh, Ontology) ) 82 | } 83 | 84 | getGOParents <- function(x) { 85 | if( !is.character(x) ) 86 | stop("need a character argument") 87 | if(length(x) == 0 ) 88 | return(list()) 89 | loadNamespace("GO.db") 90 | MF_parents <- mget(x, envir=GO.db::GOMFPARENTS, ifnotfound=NA) 91 | BP_parents <- mget(x, envir=GO.db::GOBPPARENTS, ifnotfound=NA) 92 | CC_parents <- mget(x, envir=GO.db::GOCCPARENTS, ifnotfound=NA) 93 | lapply(setNames(seq_along(x), x), 94 | function(i) { 95 | xi_parents <- MF_parents[[i]] 96 | if (!identical(xi_parents, NA)) 97 | return(list(Ontology="MF", Parents=xi_parents)) 98 | xi_parents <- BP_parents[[i]] 99 | if (!identical(xi_parents, NA)) 100 | return(list(Ontology="BP", Parents=xi_parents)) 101 | xi_parents <- CC_parents[[i]] 102 | if (!identical(xi_parents, NA)) 103 | return(list(Ontology="CC", Parents=xi_parents)) 104 | stop(paste(x[[i]], "is not a member of any ontology")) 105 | } 106 | ) 107 | } 108 | 109 | getGOChildren <- function(x) { 110 | if( !is.character(x) ) 111 | stop("need a character argument") 112 | if(length(x) == 0 ) 113 | return(list()) 114 | loadNamespace("GO.db") 115 | MF_children <- mget(x, envir=GO.db::GOMFCHILDREN, ifnotfound=NA) 116 | BP_children <- mget(x, envir=GO.db::GOBPCHILDREN, ifnotfound=NA) 117 | CC_children <- mget(x, envir=GO.db::GOCCCHILDREN, ifnotfound=NA) 118 | lapply(setNames(seq_along(x), x), 119 | function(i) { 120 | xi_children <- MF_children[[i]] 121 | if (!identical(xi_children, NA)) 122 | return(list(Ontology="MF", Children=xi_children)) 123 | xi_children <- BP_children[[i]] 124 | if (!identical(xi_children, NA)) 125 | return(list(Ontology="BP", Children=xi_children)) 126 | xi_children <- CC_children[[i]] 127 | if (!identical(xi_children, NA)) 128 | return(list(Ontology="CC", Children=xi_children)) 129 | list() # not an error (unlike for getGOParents() above) 130 | } 131 | ) 132 | } 133 | 134 | getGOTerm <- function(x) { 135 | if( !is.character(x) ) 136 | stop("need a character argument") 137 | if(length(x) == 0 ) 138 | return(list()) 139 | loadNamespace("GO.db") 140 | terms <- mget(x, envir=GO.db::GOTERM, ifnotfound=NA) 141 | isNA = sapply(terms,function(x) !(isS4(x) && is(x, "GOTerms"))) 142 | if( any(isNA) ) 143 | terms = terms[!isNA] 144 | 145 | ontology <- sapply(terms, Ontology) 146 | terms = sapply(terms, Term) 147 | return(split(terms, ontology)) 148 | } 149 | 150 | 151 | filterGOByOntology <- function(goids, ontology=c("BP", "CC", "MF")) { 152 | ontology <- match.arg(ontology) 153 | eName <- switch(ontology, 154 | BP="GOBPPARENTS", 155 | CC="GOCCPARENTS", 156 | MF="GOMFPARENTS", 157 | stop("invalid ontology ", ontology)) 158 | e <- get(eName) 159 | goids %in% ls(e) 160 | } 161 | 162 | aqListGOIDs <- function(ont) { 163 | ## Return all GO IDs in the specified ontologies 164 | ont <- unique(ont) 165 | knownOnts <- c("BP", "CC", "MF") 166 | badOnt <- ont[!(ont %in% knownOnts)] 167 | if (length(badOnt)) 168 | stop("Unknown ontology codes: ", paste(badOnt, collapse=", "), 169 | "\nvalid codes are: ", paste(knownOnts, collapse=", ")) 170 | ## determine size 171 | lens <- integer(length(ont)) 172 | for (i in seq(along=ont)) 173 | lens[i] <- length(getAnnMap(paste(ont[i], "PARENTS", sep=""), 174 | chip="GO")) 175 | ## retrieve IDs 176 | ans <- character(sum(lens)) 177 | lens <- c(0L, lens) 178 | for (i in seq(along=ont)) { 179 | ans[lens[i]+1:lens[i+1]] <- ls(getAnnMap(paste(ont[i], "PARENTS", sep=""), 180 | chip="GO")) 181 | } 182 | ans 183 | } 184 | -------------------------------------------------------------------------------- /R/findNeighbors.R: -------------------------------------------------------------------------------- 1 | findNeighbors <- function(chrLoc, llID, chromosome, upBase, downBase, 2 | mergeOrNot = TRUE){ 3 | 4 | require(chrLoc, character.only = TRUE) || 5 | stop(paste("Chromomosome location chrLoc", 6 | "is not available on the system", 7 | "Either build one or get one from", 8 | "BioConductor")) 9 | 10 | if(checkArgs(llID, chromosome, upBase, downBase) == "swap"){ 11 | temp <- upBase 12 | upBase <- downBase 13 | downBase <- temp 14 | } 15 | upBase <- as.numeric(ifelse(missing(upBase), 0, upBase)) 16 | downBase <- as.numeric(ifelse(missing(downBase), 0, downBase)) 17 | if(missing(chromosome)){ 18 | chromosome <- findChr4LL(llID, get(paste(chrLoc, 19 | "LOCUSID2CHR", sep = "")), 20 | gsub("CHRLOC", "", chrLoc)) 21 | } 22 | if(!missing(llID)){ 23 | # Find the location for the target gene 24 | location <- as.numeric(get(llID, get(paste(chrLoc, 25 | chromosome, "START", sep = "")))) 26 | }else{ 27 | location <- (downBase - upBase)/2 28 | } 29 | upperB <- getBoundary(location, upBase, TRUE) 30 | downB <- getBoundary(location, downBase, FALSE) 31 | neighbors <- list() 32 | # There may be chances that a llID be mapped to genes on different CHR 33 | for(i in chromosome){ 34 | start <- unlist(contents(get(paste(chrLoc, chromosome, 35 | "START", sep=""))), 36 | use.names=TRUE) 37 | 38 | end <- unlist(contents(get(paste(chrLoc, chromosome, 39 | "END", sep=""))), 40 | use.names=TRUE) 41 | 42 | if(!missing(llID)){ 43 | # greb the ones in the range 44 | foundUp <- weightByConfi(start[start > upperB & 45 | start < min(location)]) 46 | foundDown <- weightByConfi(end[end < downB & 47 | end > max(location)]) 48 | if(length(foundUp) != 0 || length(foundDown) != 0){ 49 | if(mergeOrNot){ 50 | neighbors[[as.character(i)]] <- unique(c(foundUp, 51 | foundDown)) 52 | }else{ 53 | neighbors[[as.character(i)]] <- 54 | list(upstream = foundUp, downstream = foundDown) 55 | } 56 | } 57 | }else{ 58 | found <- weightByConfi(c(start[start >= upperB & 59 | start <= location], 60 | end[end <= downB & end >= location])) 61 | if(length(found) != 0){ 62 | neighbors[[as.character(i)]] <- unique(found) 63 | } 64 | } 65 | } 66 | 67 | if(length(neighbors) == 0){ 68 | warning("No Genes in the defined region satisfy the condition") 69 | }else{ 70 | return(neighbors) 71 | } 72 | } 73 | 74 | checkArgs <- function(llID, chromosome, upBase, downBase){ 75 | # llID is not required if search for genes within a range 76 | if(missing(llID)){ 77 | # Both upBase, downBase, and chromosome must be there if 78 | # llID is missing 79 | if(any(missing(upBase), missing(downBase), missing(chromosome))){ 80 | stop(paste("Search can not be conducted with llID and", 81 | "at least one of upBase, downBase and ", 82 | "chromosome missing")) 83 | }else{ 84 | if(as.numeric(upBase) < as.numeric(downBase)){ 85 | warning(paste("upBase value is smaller then downBase", 86 | "value. Values have been swapped")) 87 | return("swap") 88 | } 89 | if(as.numeric(upBase) == as.numeric(downBase)){ 90 | stop("upBase and downBase can not be the same") 91 | } 92 | } 93 | }else{ 94 | if(missing(upBase)){ 95 | warning(paste("upBase is missing. Search will be", 96 | "conducted for genes downstream only")) 97 | } 98 | if(missing(downBase)){ 99 | warning(paste("downBase is missing. Search will be", 100 | "conducted for genes upstream only")) 101 | } 102 | } 103 | return("OK") 104 | } 105 | 106 | findChr4LL <- function(llID, chrEnv, organism){ 107 | options(show.error.message = FALSE) 108 | chr <- try(chrEnv[[llID]]) 109 | options(show.error.message = TRUE) 110 | if(inherits(chr, "try-error")){ 111 | stop(paste("Entrez Gene ID", llID, "could not be found in any", 112 | "of the chromosomes in the data package")) 113 | }else{ 114 | if(length(chr) == 1){ 115 | if(!is.element(chr, getValidChr(organism))){ 116 | warning(paste("Entrez Gene ID", llID, "is currently", 117 | "not known to be associated with any", 118 | "chromosome")) 119 | } 120 | return(chr) 121 | }else{ 122 | chr <- chr[is.element(chr, getValidChr(organism))] 123 | return(unique(chr)) 124 | } 125 | } 126 | } 127 | 128 | 129 | getValidChr <- function(organism){ 130 | switch(toupper(organism), 131 | HUMAN = return(c(1:22, "X", "Y")), 132 | MOUSE = return(c(1:19, "X", "Y")), 133 | RAT = return(c(1:20, "X", "Y")), 134 | stop(paste("Unknow organism", organism))) 135 | } 136 | 137 | getBoundary <- function(loc, base, lower = TRUE){ 138 | if(as.numeric(loc[1]) == 0){ 139 | return(base) 140 | }else{ 141 | if(lower){ 142 | boundary <- as.numeric(loc[1]) - base 143 | if(boundary < 0){ 144 | return(0) 145 | }else{ 146 | return(boundary) 147 | } 148 | }else{ 149 | return(as.numeric(loc[1]) + base) 150 | } 151 | } 152 | } 153 | 154 | weightByConfi <- function(foundLLs){ 155 | if(length(foundLLs) != 0){ 156 | temp <- unique(names(foundLLs)) 157 | foundLLs <- gsub("(^.*)\\..*", "\\1", temp) 158 | names(foundLLs) <- gsub("^.*\\.(.*)", "\\1", temp) 159 | # Remove LLs named Unconfident if one named Confident exists 160 | if(any(duplicated(foundLLs))){ 161 | foundLLs <- c(foundLLs[names(foundLLs) == "Confident"], 162 | foundLLs[names(foundLLs) != "Confident"]) 163 | foundLLs <- foundLLs[!duplicated(foundLLs)] 164 | } 165 | 166 | return(foundLLs) 167 | }else{ 168 | return("") 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /R/pubMedAbst.R: -------------------------------------------------------------------------------- 1 | ## Define the class structure of the pubMedAbst object 2 | setGeneric("pubMedAbst", function(object) 3 | standardGeneric("pubMedAbst")) 4 | 5 | setClass("pubMedAbst", 6 | representation(pmid="character", authors="vector", abstText="character", 7 | articleTitle="character", journal="character", 8 | pubDate="character")) 9 | 10 | setMethod("show", "pubMedAbst", function(object) { 11 | s <- c("An object of class 'pubMedAbst':", 12 | paste("Title: ", articleTitle(object)), 13 | paste("PMID: ", pmid(object)), 14 | paste("Authors:", paste(authors(object), collapse=", ")), 15 | paste("Journal:", journal(object)), 16 | paste("Date: ", pubDate(object))) 17 | cat(strwrap(s, exdent=5), sep="\n") 18 | }) 19 | 20 | ## Define generics 21 | if (is.null(getGeneric("authors"))) 22 | setGeneric("authors", function(object) 23 | standardGeneric("authors")) 24 | 25 | if (is.null(getGeneric("abstText"))) 26 | setGeneric("abstText", function(object) 27 | standardGeneric("abstText")) 28 | 29 | if (is.null(getGeneric("articleTitle"))) 30 | setGeneric("articleTitle", function(object) 31 | standardGeneric("articleTitle")) 32 | 33 | if (is.null(getGeneric("journal"))) 34 | setGeneric("journal", function(object) 35 | standardGeneric("journal")) 36 | 37 | if (is.null(getGeneric("pubDate"))) 38 | setGeneric("pubDate", function(object) 39 | standardGeneric("pubDate")) 40 | 41 | if (is.null(getGeneric("pmid"))) 42 | setGeneric("pmid", function(object) 43 | standardGeneric("pmid")) 44 | 45 | ## Methods 46 | setMethod("authors", "pubMedAbst", function(object) 47 | object@authors) 48 | setMethod("abstText", "pubMedAbst", function(object) 49 | object@abstText) 50 | setMethod("articleTitle", "pubMedAbst", function(object) 51 | object@articleTitle) 52 | setMethod("journal", "pubMedAbst", function(object) 53 | object@journal) 54 | setMethod("pubDate", "pubMedAbst", function(object) 55 | object@pubDate) 56 | setMethod("pmid", "pubMedAbst", function(object) 57 | object@pmid) 58 | 59 | buildPubMedAbst <- function(xml) { 60 | ## Passed in a XML tree detailing a single article 61 | ## will parse the XML and create a new class 62 | 63 | xmlMedline <- xml["MedlineCitation"][[1]] 64 | xmlArticle <- xmlMedline["Article"] 65 | 66 | ## Disable error messages, and wrap potential error causers 67 | ## w/ trys 68 | options(show.error.messages = FALSE) 69 | on.exit(options(show.error.messages=TRUE)) 70 | 71 | ## Get the PMID 72 | pmid <- xmlMedline["PMID"][[1]] 73 | pmid <- try(as.character(xmlChildren(pmid)$text)[6]) 74 | if (inherits(pmid,"try-error") == TRUE) { 75 | pmid <- "No PMID Provided" 76 | } 77 | 78 | ## Retrieve Article Title 79 | articleTitle <- xmlArticle[[1]][["ArticleTitle"]] 80 | articleTitle <- 81 | try(as.character(xmlChildren(articleTitle)$text)[6]) 82 | if (inherits(articleTitle,"try-error") == TRUE) { 83 | articleTitle <- "No Title Provided" 84 | } 85 | 86 | ## Retrieve the abstract 87 | abstText <- xmlArticle[[1]]["Abstract"][[1]]["AbstractText"] 88 | abstText <- try(as.character(xmlChildren(abstText[[1]])$text)[6]) 89 | if (inherits(abstText,"try-error") == TRUE) { 90 | abstText <- "No Abstract Provided" 91 | } 92 | 93 | ## Retrieve the date - get the year/month separately and then 94 | ## join them at the end. If no month or year provided, subst 95 | ## "MontH" and "Year" respectively 96 | pubDateBase <- 97 | xmlArticle[[1]]["Journal"][[1]]["JournalIssue"][[1]]["PubDate"] 98 | pubDateMonth <- pubDateBase[[1]]["Month"] 99 | pubDateMonth <- 100 | try(as.character(xmlChildren(pubDateMonth[[1]])$text)[6]) 101 | if (inherits(pubDateMonth,"try-error") == TRUE) { 102 | pubDateMonth <- "Month" 103 | } 104 | pubDateYear <- pubDateBase[[1]]["Year"] 105 | pubDateYear <- 106 | try(as.character(xmlChildren(pubDateYear[[1]])$text)[6]) 107 | if (inherits(pubDateYear, "try-error") == TRUE) { 108 | pubDateYear <- "Year" 109 | } 110 | ## Join up the date information 111 | pubDate <- paste(pubDateMonth,pubDateYear) 112 | 113 | ## Get the journal this was published in 114 | journal <- 115 | xml["MedlineCitation"][[1]]["MedlineJournalInfo"][[1]]["MedlineTA"] 116 | journal <- try(as.character(xmlChildren(journal[[1]])$text)[6]) 117 | if (inherits(journal,"try-error") == TRUE) { 118 | journal <- "No Journal Provided" 119 | } 120 | 121 | ## Build up a vector of author names, created by assembling the 122 | ## pieces of each author's name. 123 | authorList <- xmlArticle[[1]]["AuthorList"] 124 | authors <- vector() 125 | numAuthors <- try(length(xmlChildren(authorList[[1]]))) 126 | if (inherits(numAuthors,"try-error") == TRUE) { 127 | authors[1] <- "No Author Information Provided" 128 | } 129 | else { 130 | for (i in 1:numAuthors) { 131 | curAuthor <- authorList[[1]][i] 132 | last <- 133 | try(as.character(xmlChildren(curAuthor[[1]]["LastName"][[1]])$text)[6]) 134 | if (inherits(last,"try-error") == TRUE) { 135 | last <- "LastName" 136 | } 137 | 138 | initial <- 139 | try(as.character(xmlChildren(curAuthor[[1]]["Initials"][[1]])$text)[6]) 140 | if (inherits(initial,"try-error") == TRUE) { 141 | initial <- "M" 142 | } 143 | 144 | authors[i] <- paste(initial,last) 145 | } 146 | } 147 | 148 | ## Restore error messages 149 | options(show.error.messages=TRUE) 150 | 151 | newPMA <- new("pubMedAbst", articleTitle=articleTitle, 152 | abstText=abstText, pubDate=pubDate,authors=authors, 153 | journal=journal, pmid=pmid) 154 | 155 | return(newPMA) 156 | } 157 | 158 | pm.getabst <- function(geneids, basename) { 159 | pmids <- getPMID(geneids, basename) 160 | numids <- length(geneids) 161 | rval <- vector("list", length=numids) 162 | names(rval) <- geneids 163 | for(i in 1:numids) { 164 | pm <- pmids[[i]] 165 | if( length(pm)==1 && is.na(pm) ) 166 | rval[[i]] <- NA 167 | else { 168 | absts <- pubmed(pm) 169 | a <- xmlRoot(absts) 170 | numAbst <- length(xmlChildren(a)) 171 | absts <- vector("list", length=numAbst) 172 | for (j in 1:numAbst) 173 | absts[[j]] <- buildPubMedAbst(a[[j]]) 174 | rval[[i]] <- absts 175 | } 176 | } 177 | rval 178 | } 179 | 180 | pm.abstGrep <- function(pattern, absts, ...) 181 | { 182 | nabsts <- length(absts) 183 | rval <- rep(FALSE, nabsts) 184 | for(i in 1:nabsts) { 185 | atxt <- abstText(absts[[i]]) 186 | ans <- grep(pattern, atxt, ...) 187 | if( length(ans) && ans==1 ) 188 | rval[i] <- TRUE 189 | } 190 | rval 191 | } 192 | 193 | pm.titles <- function (absts) { 194 | numa <- length(absts) 195 | rval <- vector("list", length=numa) 196 | for(j in 1:numa) 197 | rval[[j]] <- sapply(absts[[j]], function(x) articleTitle(x)) 198 | rval 199 | } 200 | 201 | -------------------------------------------------------------------------------- /vignettes/prettyOutput.Rnw: -------------------------------------------------------------------------------- 1 | % 2 | % NOTE -- ONLY EDIT THE .Rnw FILE!!! The .tex file is 3 | % likely to be overwritten. 4 | % 5 | % \VignetteIndexEntry{HowTo: Get HTML Output} 6 | % \VignetteDepends{annotate, hgu95av2.db} 7 | % \VignetteKeywords{Expression Analysis, Annotation} 8 | % \VignettePackage{annotate} 9 | \documentclass[11pt]{article} 10 | 11 | 12 | \newcommand{\Rfunction}[1]{{\texttt{#1}}} 13 | \newcommand{\Rmethod}[1]{{\texttt{#1}}} 14 | 15 | \newcommand{\Robject}[1]{{\texttt{#1}}} 16 | \newcommand{\Rpackage}[1]{{\textit{#1}}} 17 | \newcommand{\Rclass}[1]{{\textit{#1}}} 18 | 19 | \usepackage[authoryear,round]{natbib} 20 | 21 | 22 | \bibliographystyle{plainnat} 23 | 24 | \usepackage{hyperref} 25 | 26 | \begin{document} 27 | \title{HowTo: get pretty HTML output for my gene list} 28 | \author{James W. MacDonald} 29 | \maketitle{} 30 | 31 | \section{Overview} 32 | The intent of this vignette is to show how to make reasonably nice 33 | looking HTML tables for presenting the results of a microarray 34 | analysis. These tables are a very nice format because you can insert 35 | clickable links to various public annotation databases, which 36 | facilitates the downstream analysis. In addition, the format is quite 37 | compact, can be posted on the web, and can be viewed using any number 38 | of free web browsers. One caveat; an HTML table is probably not the 39 | best format for presenting the results for \emph{all} of the genes on 40 | a chip. For even a small (5000 gene) chip, the file could be 10 Mb or 41 | more, which would take an inordinate amount of time to open and 42 | view. Also note that the Bioconductor project supplies annotation 43 | packages for many of the more popular Affymetrix chips, as well as for 44 | many commercial spotted cDNA chips. For chips that have annotation 45 | packages, the \Rpackage{annaffy} package is the preferred method for 46 | making HTML tables. 47 | 48 | To make an annotated HTML table, the only requirement is that we have 49 | some sort of annotation data for the microarray that we are 50 | using. Most manufacturers supply data in various formats that can be 51 | read into \Rpackage{R}. For instance, Affymetrix supplies CSV files 52 | that can be read into \Rpackage{R} using the \Rmethod{read.csv()} 53 | function 54 | \url{http://www.affymetrix.com/support/technical/byproduct.affx?cat=arrays}. 55 | 56 | \section{Alternate methods} 57 | Please note that one can also make these HTML tables by parsing data from 58 | e.g., an online (or local) Biomart database, using functions in the biomaRt 59 | package. This may be easier, and may result in more current annotation data. 60 | Please see the prettyOutput vignette in the biomaRt package for more information. 61 | 62 | \section{Data Analysis} 63 | I will assume that the reader is familiar with the analysis of 64 | microarray data, and has a set of genes that she would like to use. In 65 | addition, I will assume that the reader is familiar enough with 66 | \Rpackage{R} that she can subset the data based on a list of genes, 67 | and reorder based on a particular statistic. For any questions about 68 | subsetting or ordering data, please see ``An Introduction to R''. For 69 | questions regarding microarray analysis, please consult the vignettes 70 | for, say \Rpackage{limma}, \Rpackage{multtest}, or \Rpackage{marray}. 71 | 72 | \section{Getting Started} 73 | We first load the \Rpackage{annotate} package, as well as some 74 | data. These data will be from the Affymetrix HG-U95Av2 chip (for which 75 | we would normally use \Rpackage{annaffy}). To keep the HTML table 76 | small, we will take a subset of fifteen genes as an example. 77 | 78 | <>= 79 | options(width=70) 80 | @ 81 | 82 | <<>>= 83 | library("annotate") 84 | data(sample.ExpressionSet) 85 | igenes <- featureNames(sample.ExpressionSet)[246:260] 86 | @ 87 | 88 | \section{Annotation Data} 89 | <>= 90 | ug <- c("Hs.169284 // ---", "Hs.268515 // full length", "Hs.103419 // full length", "Hs.380429 // ---" ,"--- // ---", 91 | "Hs.169331 // full length", "Hs.381231 // full length", "Hs.283781 // full length", "--- // ---", "--- // ---", 92 | "Hs.3195 // full length", "--- // ---", "Hs.176660 // full length", "Hs.272484 // full length", "Hs.372679 // full length") 93 | ll <- c("221823", "4330", "9637", "---", "---", "6331", "841", "27335", "---", "---", "6375", "---", "2543", "2578", "2215") 94 | gb <- c("M57423", "Z70218", "L17328", "S81916", "U63332", "M77235", "X98175", "AB019392", "J03071", "D25272", "D63789", 95 | "D63789", "U19142", "U19147", "X16863") 96 | sp <- c("P21108", "Q10571", "Q9UHY8", "Q16444", "---", "Q14524 /// Q8IZC9 /// Q8WTQ6 /// Q8WWN5 /// Q96J69", "Q14790", "Q9UBQ5", 97 | "---", "---", "P47992", "---", "Q13065 /// Q8IYC5", "Q13070", "O75015") 98 | 99 | @ 100 | For this vignette I have supplied the annotation data. In a normal 101 | situation, these data would be subset from the manufacturer's 102 | annotation data, using the manufacturer's gene identifiers (which is 103 | how I got these IDs). 104 | 105 | First, we will look at the GenBank and LocusLink IDs. We will be able 106 | to use these IDs without further modification. Note that the LocusLink 107 | IDs contain some missing data (``---''). This will not pose a problem 108 | because LocusLink IDs are all numeric, so we have incorporated code in 109 | \Rmethod{htmlpage()} to automatically convert any non-numeric ID to an 110 | HTML empty cell character (``\ ''). GenBank IDs (which often 111 | correspond to either RefSeq or GenBank IDs) are not as consistent, so 112 | any missing data would have to be manually converted to the HTML empty 113 | cell character. Missing data for LocusLink, UniGene and OMIM IDs are 114 | automatically converted, whereas Affymetrix, SwissProt and GenBank IDs 115 | have to be done manually. I will give examples of how to do this 116 | below. 117 | <<>>= 118 | gb 119 | ll 120 | @ 121 | 122 | The UniGene and SwissProt IDs present different challenges, so we will 123 | modify them separately. For the UniGene IDs we need to strip off the 124 | extra information appended to each ID. If we didn't do this, our 125 | hyperlink would not work correctly. 126 | 127 | <<>>= 128 | ug 129 | ug <- sub(" //.*$", "", ug) 130 | ug 131 | @ 132 | 133 | The SwissProt IDs present a different challenge. Here there isn't any 134 | extra information. Instead, we have multiple IDs for some of the 135 | genes, and missing IDs for some of the others. Because the code for 136 | SwissProt IDs will not automatically handle missing data, we have to 137 | convert the missing data to an HTML empty cell identifier 138 | (``\ ''). For \Rmethod{htmlpage()} to correctly handle multiple 139 | IDs, we have to convert the character vector into a \emph{list} of 140 | character vectors. 141 | <<>>= 142 | sp 143 | sp <- strsplit(sub("---"," ",as.character(sp)), "///") 144 | sp 145 | @ 146 | 147 | We have converted the data to a list of character vectors, and also 148 | converted the ``---'' missing data identifier to the HTML character 149 | for an empty cell. 150 | 151 | \section{Build the Table} 152 | 153 | Usually we would like to include the expression values for our genes 154 | along with some statistics, say a $t$-statistic, fold change, and 155 | $p$-value. As an example, we will make a comparison using the first 156 | ten samples. 157 | 158 | <>= 159 | dat <- exprs(sample.ExpressionSet)[igenes,1:10] 160 | FC <- rowMeans(dat[igenes,1:5]) - rowMeans(dat[igenes,6:10]) 161 | pval <- esApply(sample.ExpressionSet[igenes,1:10], 1, function(x) t.test(x[1:5], x[6:10])$p.value) 162 | tstat <- esApply(sample.ExpressionSet[igenes,1:10], 1, function(x) t.test(x[1:5], x[6:10])$statistic) 163 | @ 164 | 165 | It is also usually a good idea to include gene names in the 166 | table. Normally the names would be subsetted from the annotation data, 167 | but here we have to supply them. Again, we have to manually convert 168 | any missing names to the HTML empty cell character. 169 | 170 | <>= 171 | name <- c("hypothetical protein LOC221823", 172 | "meningioma (disrupted in balanced translocation) 1", 173 | "fasciculation and elongation protein zeta 2 (zygin II)", 174 | "Phosphoglycerate kinase {alternatively spliced}", 175 | "---","sodium channel, voltage-gated, type V, alpha polypeptide", 176 | "caspase 8, apoptosis-related cysteine protease","muscle specific gene","---","---","chemokine (C motif) ligand 1", 177 | "---","G antigen 1","G antigen 6","Fc fragment of IgG, low affinity IIIb, receptor for (CD16)") 178 | @ 179 | <<>>= 180 | name 181 | name <- gsub("---", " ", name) 182 | name 183 | @ 184 | 185 | 186 | We can now build our HTML table. To make the process more transparent, 187 | this will be done in steps. In practice however, this can be done in 188 | one line. Note here that the genelist consists of annotation data that 189 | will be hyperlinked to online databases, whereas othernames consists 190 | of other data that will not be hyperlinked. 191 | 192 | <>= 193 | genelist <- list(igenes, ug, ll, gb, sp) 194 | filename <- "Interesting_genes.html" 195 | title <- "An Artificial Set of Interesting Genes" 196 | othernames <- list(name, round(tstat, 2), round(pval, 3), round(FC, 1), round(dat, 2)) 197 | head <- c("Probe ID", "UniGene", "LocusLink", "GenBank", "SwissProt", "Gene Name", "t-statistic", "p-value", 198 | "Fold Change", "Sample 1", "Sample 2", "Sample 3", "Sample 4", "Sample 5", "Sample 6", 199 | "Sample 7", "Sample 8", "Sample 9", "Sample 10") 200 | repository <- list("affy", "ug", "en", "gb", "sp") 201 | htmlpage(genelist, filename, title, othernames, head, repository = repository) 202 | @ 203 | 204 | \section{Session Information} 205 | 206 | The version number of R and packages loaded for generating the vignette were: 207 | 208 | <>= 209 | sessionInfo() 210 | @ 211 | 212 | \end{document} 213 | --------------------------------------------------------------------------------