├── .BBSoptions
├── .Rbuildignore
├── .gitignore
├── DESCRIPTION
├── NAMESPACE
├── NEWS
├── R
    ├── FeatureDb-class.R
    ├── TxDb-SELECT-helpers.R
    ├── TxDb-class.R
    ├── TxDb-schema.R
    ├── coordinate-mapping-methods.R
    ├── coverageByTranscript.R
    ├── exonicParts.R
    ├── extendExonsIntoIntrons.R
    ├── extractTranscriptSeqs.R
    ├── extractUpstreamSeqs.R
    ├── features.R
    ├── getPromoterSeq-methods.R
    ├── id2name.R
    ├── makeFeatureDbFromUCSC.R
    ├── makeTxDb.R
    ├── makeTxDbFromBiomart.R
    ├── makeTxDbFromEnsembl.R
    ├── makeTxDbFromGFF.R
    ├── makeTxDbFromGRanges.R
    ├── makeTxDbFromUCSC.R
    ├── makeTxDbPackage.R
    ├── mapIdsToRanges.R
    ├── nearest-methods.R
    ├── proteinToGenome.R
    ├── select-methods.R
    ├── tRNAs.R
    ├── transcriptLengths.R
    ├── transcriptLocs2refLocs.R
    ├── transcripts.R
    ├── transcriptsBy.R
    ├── transcriptsByOverlaps.R
    ├── utils.R
    └── zzz.R
├── README.md
├── TODO
├── inst
    ├── CITATION
    ├── extdata
    │   ├── Biomart_Ensembl_sample.sqlite
    │   ├── FeatureDb.sqlite
    │   ├── ITAG4.1_gene_models.subset.gff
    │   ├── cD.exByEdge-SG-Vig.Rda
    │   ├── cD.exsByGenes-SG-Vig.Rda
    │   ├── events.Rda
    │   ├── hg19_knownGene_sample.sqlite
    │   └── sample_ranges.rds
    ├── script
    │   └── README
    └── unitTests
    │   ├── test_TxDb_seqinfo.R
    │   ├── test_coordinate-mapping-methods.R
    │   ├── test_exonicParts.R
    │   ├── test_getPromoterSeq-methods.R
    │   ├── test_makeIdsForUniqueDataFrameRows.R
    │   ├── test_mapIdsToRanges.R
    │   ├── test_nearest-methods.R
    │   ├── test_select-methods.R
    │   ├── test_transcriptLengths.R
    │   ├── test_transcripts.R
    │   ├── test_transcriptsBy.R
    │   └── test_transcriptsByOverlaps.R
├── man
    ├── FeatureDb-class.Rd
    ├── TxDb-class.Rd
    ├── as-format-methods.Rd
    ├── coordinate-mapping-methods.Rd
    ├── coverageByTranscript.Rd
    ├── exonicParts.Rd
    ├── extendExonsIntoIntrons.Rd
    ├── extractTranscriptSeqs.Rd
    ├── extractUpstreamSeqs.Rd
    ├── features.Rd
    ├── getPromoterSeq-methods.Rd
    ├── id2name.Rd
    ├── makeFeatureDbFromUCSC.Rd
    ├── makeTxDb.Rd
    ├── makeTxDbFromBiomart.Rd
    ├── makeTxDbFromEnsembl.Rd
    ├── makeTxDbFromGFF.Rd
    ├── makeTxDbFromGRanges.Rd
    ├── makeTxDbFromUCSC.Rd
    ├── makeTxDbPackage.Rd
    ├── mapIdsToRanges.Rd
    ├── mapRangesToIds.Rd
    ├── nearest-methods.Rd
    ├── proteinToGenome.Rd
    ├── select-methods.Rd
    ├── tRNAs.Rd
    ├── transcriptLengths.Rd
    ├── transcriptLocs2refLocs.Rd
    ├── transcripts.Rd
    ├── transcriptsBy.Rd
    └── transcriptsByOverlaps.Rd
├── tests
    └── run_unitTests.R
└── vignettes
    └── GenomicFeatures.Rmd


/.BBSoptions:
--------------------------------------------------------------------------------
1 | RunLongTests: TRUE
2 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | .BBSoptions
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.Rhistory
2 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: GenomicFeatures
 2 | Title: Query the gene models of a given organism/assembly
 3 | Description: Extract the genomic locations of genes, transcripts, exons,
 4 | 	introns, and CDS, for the gene models stored in a TxDb object.
 5 | 	A TxDb object is a small database that contains the gene models of
 6 | 	a given organism/assembly. Bioconductor provides a small collection
 7 | 	of TxDb objects in the form of ready-to-install TxDb packages for
 8 | 	the most commonly studied organisms. Additionally, the user can
 9 | 	easily make a TxDb object (or package) for the organism/assembly
10 | 	of their choice by using the tools from the txdbmaker package.
11 | biocViews: Genetics, Infrastructure, Annotation, Sequencing,
12 | 	GenomeAnnotation
13 | URL: https://bioconductor.org/packages/GenomicFeatures
14 | BugReports: https://github.com/Bioconductor/GenomicFeatures/issues
15 | Version: 1.61.3
16 | License: Artistic-2.0
17 | Encoding: UTF-8
18 | Authors@R: c(
19 | 	person("M.", "Carlson", role="aut"),
20 | 	person("H.", "Pagès", role=c("aut", "cre"),
21 | 		email="hpages.on.github@gmail.com"),
22 | 	person("P.", "Aboyoun", role="aut"),
23 | 	person("S.", "Falcon", role="aut"),
24 | 	person("M.", "Morgan", role="aut"),
25 | 	person("D.", "Sarkar", role="aut"),
26 | 	person("M.", "Lawrence", role="aut"),
27 | 	person("V.", "Obenchain", role="aut"),
28 | 	person("S.", "Arora", role="ctb"),
29 | 	person("J.", "MacDonald", role="ctb"),
30 | 	person("M.", "Ramos", role="ctb"),
31 | 	person("S.", "Saini", role="ctb"),
32 | 	person("P.", "Shannon", role="ctb"),
33 | 	person("L.", "Shepherd", role="ctb"),
34 | 	person("D.", "Tenenbaum", role="ctb"),
35 | 	person("D.", "Van Twisk", role="ctb"))
36 | Depends: BiocGenerics (>= 0.51.2), S4Vectors (>= 0.17.29),
37 | 	IRanges (>= 2.37.1), GenomeInfoDb (>= 1.35.8),
38 | 	GenomicRanges (>= 1.55.2), AnnotationDbi (>= 1.41.4)
39 | Imports: methods, utils, stats, DBI, XVector, Biostrings, rtracklayer
40 | Suggests: txdbmaker, org.Mm.eg.db, org.Hs.eg.db,
41 | 	BSgenome, BSgenome.Hsapiens.UCSC.hg19 (>= 1.3.17),
42 | 	BSgenome.Celegans.UCSC.ce11,
43 | 	BSgenome.Dmelanogaster.UCSC.dm3 (>= 1.3.17),
44 | 	FDb.UCSC.tRNAs,
45 | 	TxDb.Hsapiens.UCSC.hg19.knownGene,
46 | 	TxDb.Celegans.UCSC.ce11.ensGene,
47 | 	TxDb.Dmelanogaster.UCSC.dm3.ensGene (>= 2.7.1),
48 | 	TxDb.Mmusculus.UCSC.mm10.knownGene (>= 3.4.7),
49 | 	TxDb.Hsapiens.UCSC.hg19.lincRNAsTranscripts,
50 | 	TxDb.Hsapiens.UCSC.hg38.knownGene (>= 3.4.6),
51 | 	SNPlocs.Hsapiens.dbSNP144.GRCh38,
52 | 	Rsamtools, pasillaBamSubset (>= 0.0.5), GenomicAlignments (>= 1.15.7),
53 | 	ensembldb, AnnotationFilter,
54 | 	RUnit, BiocStyle, knitr, markdown
55 | VignetteBuilder: knitr
56 | Collate: utils.R
57 | 	TxDb-schema.R
58 | 	TxDb-SELECT-helpers.R
59 | 	TxDb-class.R FeatureDb-class.R
60 | 	mapIdsToRanges.R
61 | 	id2name.R
62 | 	transcripts.R
63 | 	transcriptsBy.R
64 | 	transcriptsByOverlaps.R
65 | 	transcriptLengths.R
66 | 	exonicParts.R
67 | 	extendExonsIntoIntrons.R
68 | 	features.R
69 | 	tRNAs.R
70 | 	extractTranscriptSeqs.R
71 | 	extractUpstreamSeqs.R
72 | 	getPromoterSeq-methods.R
73 | 	select-methods.R
74 | 	nearest-methods.R
75 | 	transcriptLocs2refLocs.R
76 | 	coordinate-mapping-methods.R
77 | 	proteinToGenome.R
78 | 	coverageByTranscript.R
79 | 	makeTxDb.R
80 | 	makeTxDbFromUCSC.R
81 | 	makeTxDbFromBiomart.R
82 | 	makeTxDbFromEnsembl.R
83 | 	makeTxDbFromGRanges.R
84 | 	makeTxDbFromGFF.R
85 | 	makeFeatureDbFromUCSC.R
86 | 	makeTxDbPackage.R
87 | 	zzz.R
88 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
  1 | import(methods)
  2 | importFrom(stats, setNames)
  3 | importFrom(utils, as.person)
  4 | 
  5 | importMethodsFrom(DBI, dbGetQuery, dbListTables, dbListFields)
  6 | 
  7 | import(AnnotationDbi)
  8 | import(BiocGenerics)
  9 | import(S4Vectors)
 10 | import(IRanges)
 11 | import(GenomeInfoDb)
 12 | import(XVector)
 13 | import(GenomicRanges)
 14 | 
 15 | importClassesFrom(Biostrings, DNAString, DNAStringSet, MaskedDNAString)
 16 | importFrom(Biostrings, DNAStringSet, reverseComplement, getSeq)
 17 | 
 18 | importFrom(rtracklayer, asBED, asGFF)
 19 | 
 20 | 
 21 | exportClasses(TxDb, FeatureDb)
 22 | 
 23 | export(
 24 |   ## id2name.R:
 25 |   id2name,
 26 | 
 27 |   ## transcripts.R:
 28 |   transcripts, exons, cds, genes,
 29 | 
 30 |   ## transcriptsBy.R:
 31 |   transcriptsBy,
 32 |   exonsBy,
 33 |   cdsBy,
 34 |   intronsByTranscript,
 35 |   fiveUTRsByTranscript,
 36 |   threeUTRsByTranscript,
 37 | 
 38 |   ## transcriptsByOverlaps.R:
 39 |   transcriptsByOverlaps,
 40 |   exonsByOverlaps,
 41 |   cdsByOverlaps,
 42 | 
 43 |   ## transcriptLengths.R:
 44 |   transcriptLengths,
 45 | 
 46 |   ## exonicParts.R:
 47 |   tidyTranscripts, tidyExons, tidyIntrons,
 48 |   exonicParts, intronicParts,
 49 | 
 50 |   ## extendExonsIntoIntrons.R:
 51 |   extendExonsIntoIntrons,
 52 | 
 53 |   ## features.R:
 54 |   features,
 55 | 
 56 |   ## tRNAs.R:
 57 |   microRNAs,
 58 |   tRNAs,
 59 | 
 60 |   ## extractTranscriptSeqs.R:
 61 |   extractTranscriptSeqs,
 62 | 
 63 |   ## extractUpstreamSeqs.R:
 64 |   extractUpstreamSeqs,
 65 | 
 66 |   ## getPromoterSeq-methods.R:
 67 |   getPromoterSeq, getTerminatorSeq,
 68 | 
 69 |   ## transcriptLocs2refLocs.R:
 70 |   transcriptLocs2refLocs,
 71 |   transcriptWidths,
 72 | 
 73 |   ## coordinate-mapping-methods.R:
 74 |   mapToTranscripts, pmapToTranscripts,
 75 |   mapFromTranscripts, pmapFromTranscripts,
 76 | 
 77 |   ## proteinToGenome.R:
 78 |   proteinToGenome,
 79 | 
 80 |   ## coverageByTranscript.R:
 81 |   coverageByTranscript,
 82 |   pcoverageByTranscript
 83 | )
 84 | 
 85 | exportMethods(
 86 |   organism,
 87 |   show,
 88 |   as.list,
 89 |   seqlevels0, "seqlevels<-", seqinfo,
 90 |   transcripts, exons, cds, genes,
 91 |   promoters, terminators,
 92 |   transcriptsByOverlaps,
 93 |   exonsByOverlaps,
 94 |   cdsByOverlaps,
 95 |   transcriptsBy,
 96 |   exonsBy,
 97 |   cdsBy,
 98 |   intronsByTranscript,
 99 |   fiveUTRsByTranscript,
100 |   threeUTRsByTranscript,
101 |   tRNAs,
102 |   extractTranscriptSeqs,
103 |   extractUpstreamSeqs,
104 |   getPromoterSeq, getTerminatorSeq,
105 |   isActiveSeq,
106 |   "isActiveSeq<-",
107 |   asBED, asGFF,
108 |   distance,
109 |   mapToTranscripts, pmapToTranscripts,
110 |   mapFromTranscripts, pmapFromTranscripts,
111 |   mapIdsToRanges, mapRangesToIds,
112 |   proteinToGenome
113 | )
114 | 
115 | 
116 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
117 | ### Stuff that has moved to txdbmaker
118 | ###
119 | 
120 | export(
121 |   ## makeTxDb.R:
122 |   makeTxDb,
123 | 
124 |   ## makeTxDbFromUCSC.R:
125 |   supportedUCSCtables,
126 |   browseUCSCtrack,
127 |   makeTxDbFromUCSC,
128 | 
129 |   ## makeTxDbFromBiomart.R:
130 |   getChromInfoFromBiomart,
131 |   makeTxDbFromBiomart,
132 | 
133 |   ## makeTxDbFromEnsembl.R:
134 |   makeTxDbFromEnsembl,
135 | 
136 |   ## makeTxDbFromGRanges.R:
137 |   makeTxDbFromGRanges,
138 | 
139 |   ## makeTxDbFromGFF.R:
140 |   makeTxDbFromGFF,
141 | 
142 |   ## makeFeatureDbFromUCSC.R:
143 |   supportedUCSCFeatureDbTracks,
144 |   supportedUCSCFeatureDbTables,
145 |   UCSCFeatureDbTableSchema,
146 |   makeFeatureDbFromUCSC,
147 | 
148 |   ## makeTxDbPackage.R:
149 |   supportedMiRBaseBuildValues,
150 |   makePackageName,
151 |   makeTxDbPackage,
152 |   makeTxDbPackageFromUCSC,
153 |   makeFDbPackageFromUCSC,
154 |   makeTxDbPackageFromBiomart
155 | )
156 | 
157 | 


--------------------------------------------------------------------------------
/R/FeatureDb-class.R:
--------------------------------------------------------------------------------
 1 | ### =========================================================================
 2 | ### FeatureDb objects
 3 | ### -------------------------------------------------------------------------
 4 | 
 5 | 
 6 | ## This is to try and tidy up before setRefClass()
 7 | gc()
 8 | 
 9 | .FeatureDb <-
10 |     setRefClass("FeatureDb", contains="AnnotationDb")
11 | 
12 | 
13 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
14 | ### A low-level accessor (not exported).
15 | ###
16 | 
17 | ## featuredbConn <- function(featuredb) featuredb$conn
18 | 
19 | 
20 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
21 | ### Validity of a FeatureDb object.
22 | ###
23 | 
24 | .validate.colnames <- function(conn, colnames)
25 | {
26 |     ## even though we don't know the name of the table, take advantage of the
27 |     ## fact that there are only two tables and one of them is always called
28 |     ## "metadata"
29 |     tablenames <- dbListTables(conn)
30 |     tablename <- tablenames[!tablenames %in% "metadata"]
31 |     AnnotationDbi:::.valid.colnames(conn, tablename, colnames)
32 | }
33 | 
34 | .valid.feature.table <- function(conn)
35 | {
36 |   ## Restrict column name checking to just columns that we are demanding
37 |     colnames <- c("chrom", "strand","chromStart","chromEnd")
38 |     msg <- .validate.colnames(conn, colnames)
39 |     if (!is.null(msg))
40 |         return(msg)
41 |     NULL
42 | }
43 | 
44 | 
45 | .valid.FeatureDb <- function(x)
46 | {
47 |     conn <- dbconn(x)
48 |     c(AnnotationDbi:::.valid.metadata.table(conn, "Db type",
49 |                                             "FeatureDb"),
50 |       .valid.feature.table(conn))
51 | }
52 | 
53 | 
54 | setValidity2("FeatureDb", .valid.FeatureDb)
55 | 
56 | 
57 | 
58 | 
59 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
60 | ### Low-level constructor (not exported).
61 | ###
62 | 
63 | FeatureDb <- function(conn)
64 | {
65 |     .FeatureDb$new(conn=conn)
66 | }
67 | 
68 | 


--------------------------------------------------------------------------------
/R/TxDb-SELECT-helpers.R:
--------------------------------------------------------------------------------
  1 | ### =========================================================================
  2 | ### Helpers for SELECT'ing stuff from a TxDb object
  3 | ### -------------------------------------------------------------------------
  4 | ###
  5 | ### Nothing in this file is exported.
  6 | ###
  7 | 
  8 | 
  9 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 10 | ### Low-level helpers (schema agnostic) for building SQL queries
 11 | ###
 12 | 
 13 | .as_qualified <- function(tables, columns) paste(tables, columns, sep=".")
 14 | 
 15 | .tables_in_joins <- function(joins)
 16 | {
 17 |     joins_len <- length(joins)
 18 |     stopifnot(joins_len %% 2L == 1L)
 19 |     joins[seq(1L, joins_len, by=2L)]
 20 | }
 21 | 
 22 | ### 'join_type' will be recycled to the nb of joins (= length(joins) %/% 2).
 23 | .build_SQL_FROM <- function(joins, join_type="INNER")
 24 | {
 25 |     joins_len <- length(joins)
 26 |     stopifnot(joins_len %% 2L == 1L)
 27 |     SQL <- joins[[1L]]
 28 |     if (joins_len == 1L)
 29 |         return(SQL)
 30 |     njoin <- joins_len %/% 2L
 31 |     stopifnot(length(join_type) == 1L || length(join_type) == njoin)
 32 |     ON_idx <- 2L * seq_len(njoin)
 33 |     ON <- joins[ON_idx]
 34 |     Rtables <- joins[ON_idx + 1L]
 35 |     c(SQL, paste0(join_type, " JOIN ", Rtables, " ON (", ON, ")"))
 36 | }
 37 | 
 38 | .build_SQL_FROM_splicing <- function(joins, cds_join_type="LEFT")
 39 | {
 40 |     joins_len <- length(joins)
 41 |     stopifnot(joins_len %% 2L == 1L)
 42 |     SQL <- joins[[1L]]
 43 |     if (joins_len == 1L)
 44 |         return(SQL)
 45 |     njoin <- joins_len %/% 2L
 46 |     join_type <- rep.int("INNER", njoin)
 47 |     if (joins[[length(joins)]] == "cds")
 48 |         join_type[[length(join_type)]] <- cds_join_type
 49 |     paste0(.build_SQL_FROM(joins, join_type), collapse=" ")
 50 | }
 51 | 
 52 | .build_SQL_WHERE <- function(filter)
 53 | {
 54 |     if (length(filter) == 0L)
 55 |         return("")
 56 |     sql <- lapply(seq_len(length(filter)),
 57 |              function(i) {
 58 |                fi <- filter[[i]]
 59 |                if (!is.numeric(fi))
 60 |                  fi <- paste0("'", fi, "'")
 61 |                fi <- paste0("(", paste0(fi, collapse=","), ")")
 62 |                fi <- paste0(names(filter)[i], " IN ", fi)
 63 |                paste0("(", fi, ")")
 64 |            })
 65 |     paste0(unlist(sql), collapse=" AND ")
 66 | }
 67 | 
 68 | .build_SQL_SELECT <- function(columns, joins, distinct=FALSE,
 69 |                               filter=list(), orderby=character(0))
 70 | {
 71 |     SQL <- "SELECT"
 72 |     if (distinct)
 73 |         SQL <- c(SQL, "DISTINCT")
 74 |     SQL <- c(SQL, paste0(columns, collapse=", "),
 75 |              "FROM", .build_SQL_FROM(joins))
 76 |     if (length(filter) != 0L)
 77 |         SQL <- c(SQL, "WHERE", .build_SQL_WHERE(filter))
 78 |     if (length(orderby) != 0L)
 79 |         SQL <- c(SQL, "ORDER BY", paste0(orderby, collapse=", "))
 80 |     SQL
 81 | }
 82 | 
 83 | 
 84 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 85 | ### .TXDB_join_tables() and .TXDB_join_splicing_Rtables()
 86 | ###
 87 | 
 88 | .TXDB_join_tables <- function(tables)
 89 | {
 90 |     tables <- unique(tables)
 91 |     if (length(tables) == 1L)
 92 |         return(tables)
 93 |     if (any(tables %in% c("exon", "cds")))
 94 |         tables <- c(tables, "splicing")
 95 |     ## Order tables & remove duplicates.
 96 |     join_order <- c("transcript", "splicing", "exon", "cds", "gene")
 97 |     tables <- intersect(join_order, tables)
 98 |     joins <- character(2L * length(tables) - 1L)
 99 |     ON_idx <- 2L * seq_len(length(tables) - 1L)
100 |     ON <- sapply(2:length(tables), function(i) {
101 |         Rtable <- tables[[i]]
102 |         if (Rtable == "exon") {
103 |             USING <- "_exon_id"
104 |             Ltable <- "splicing"
105 |         } else if (Rtable == "cds") {
106 |             USING <- "_cds_id"
107 |             Ltable <- "splicing"
108 |         } else {
109 |             USING <- "_tx_id"
110 |             Ltable <- tables[[1L]]
111 |         }
112 |         Lcolumn <- .as_qualified(Ltable, USING)
113 |         Rcolumn <- .as_qualified(Rtable, USING)
114 |         paste(Lcolumn, Rcolumn, sep="=")
115 |     })
116 |     joins[ON_idx] <- ON
117 |     joins[c(1L, ON_idx + 1L)] <- tables
118 |     joins
119 | }
120 | 
121 | .TXDB_join_splicing_Rtables <- function(tables=character(0))
122 | {
123 |     if (!all(tables %in% TXDB_SPLICING_BUNDLE))
124 |         stop("all tables must be in TXDB_SPLICING_BUNDLE")
125 |     tables <- c("splicing", tables)
126 |     ## Order tables & remove duplicates.
127 |     tables <- intersect(TXDB_SPLICING_BUNDLE, tables)
128 |     if (length(tables) == 1L)
129 |         return(tables)
130 |     joins <- character(2L * length(tables) - 1L)
131 |     ON_idx <- 2L * seq_len(length(tables) - 1L)
132 |     Rtables <- tables[-1L]
133 |     USING <- TXDB_SPLICING_JOIN_USING[Rtables]
134 |     Lcolumns <- .as_qualified("splicing", USING)
135 |     Rcolumns <- .as_qualified(Rtables, USING)
136 |     ON <- paste(Lcolumns, Rcolumns, sep="=")
137 |     joins[ON_idx] <- ON
138 |     joins[c(1L, ON_idx + 1L)] <- tables
139 |     joins
140 | }
141 | 
142 | 
143 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
144 | ### The 2 flexible helpers for SELECT'ing stuff from a TxDb object:
145 | ###   - TxDb_SELECT_from_INNER_JOIN()
146 | ###   - TxDb_SELECT_from_splicing_bundle()
147 | ### They should satisfy the needs of most extractors defined in the package.
148 | ###
149 | 
150 | ### The columns in 'columns' + those involved thru 'filter' and 'orderby' are
151 | ### collected and their corresponding tables are INNER JOIN'ed.
152 | TxDb_SELECT_from_INNER_JOIN <- function(txdb, table, columns, filter=list(),
153 |                                         orderby=character(0))
154 | {
155 |     schema_version <- TxDb_schema_version(txdb)
156 |     tables <- TXDB_column2table(columns, from_table=table,
157 |                                 schema_version=schema_version)
158 |     where_columns <- names(filter)
159 |     where_tables <- TXDB_column2table(where_columns, from_table=table,
160 |                                       schema_version=schema_version)
161 |     joins <- .TXDB_join_tables(c(table, tables, where_tables))
162 |     orderby_tables <- TXDB_column2table(orderby, from_table=table,
163 |                                         schema_version=schema_version)
164 |     stopifnot(all(orderby_tables %in% .tables_in_joins(joins)))
165 |     use_joins <- length(joins) > 1L
166 |     if (use_joins) {
167 |         columns <- .as_qualified(tables, columns)
168 |         names(filter) <- .as_qualified(where_tables, where_columns)
169 |         orderby <- .as_qualified(orderby_tables, orderby)
170 |     }
171 |     ## .build_SQL_SELECT() uses INNER joins.
172 |     SQL <- .build_SQL_SELECT(columns, joins, distinct=use_joins,
173 |                              filter=filter, orderby=orderby)
174 |     queryAnnotationDb(txdb, SQL)
175 | }
176 | 
177 | ### Can only involve columns (thru 'columns', 'filter', and 'orderby') that
178 | ### belong to the tables in TXDB_SPLICING_BUNDLE at the moment.
179 | TxDb_SELECT_from_splicing_bundle <- function(txdb, columns,
180 |                                              filter=list(),
181 |                                              orderby=character(0),
182 |                                              cds_join_type="LEFT")
183 | {
184 |     schema_version <- TxDb_schema_version(txdb)
185 |     tables <- TXDB_column2table(columns, from_table="splicing",
186 |                                 schema_version=schema_version)
187 |     where_columns <- names(filter)
188 |     where_tables <- TXDB_column2table(where_columns, from_table="splicing",
189 |                                       schema_version=schema_version)
190 |     orderby_tables <- TXDB_column2table(orderby, from_table="splicing",
191 |                                         schema_version=schema_version)
192 |     joins <- .TXDB_join_splicing_Rtables(c(tables, where_tables,
193 |                                            orderby_tables))
194 |     use_joins <- length(joins) > 1L
195 |     if (use_joins) {
196 |         columns <- .as_qualified(tables, columns)
197 |         names(filter) <- .as_qualified(where_tables, where_columns)
198 |         orderby <- .as_qualified(orderby_tables, orderby)
199 |     }
200 |     from <- .build_SQL_FROM_splicing(joins, cds_join_type=cds_join_type)
201 |     SQL <- .build_SQL_SELECT(columns, from, distinct=FALSE,
202 |                              filter=filter, orderby=orderby)
203 |     queryAnnotationDb(txdb, SQL)
204 | }
205 | 
206 | 
207 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
208 | ### Convenience wrappers to the above flexible helpers for SELECT'ing stuff
209 | ### from a given TxDb table
210 | ###
211 | 
212 | TxDb_SELECT_from_chrominfo <- function(txdb, filter=list(),
213 |                                        orderby="_chrom_id")
214 | {
215 |     schema_version <- TxDb_schema_version(txdb)
216 |     columns <- TXDB_table_columns("chrominfo", schema_version=schema_version)
217 |     TxDb_SELECT_from_INNER_JOIN(txdb, "chrominfo", columns,
218 |                                 filter=filter, orderby=orderby)
219 | }
220 | 
221 | TxDb_SELECT_from_transcript <- function(txdb, filter=list(),
222 |                                         orderby="_tx_id")
223 | {
224 |     schema_version <- TxDb_schema_version(txdb)
225 |     columns <- TXDB_table_columns("transcript", schema_version=schema_version)
226 |     TxDb_SELECT_from_INNER_JOIN(txdb, "transcript", columns,
227 |                                 filter=filter, orderby=orderby)
228 | }
229 | 
230 | ### Select rows from the virtual table obtained by joining the "splicing",
231 | ### "exon", and "cds" tables together.
232 | TxDb_SELECT_from_splicings <- function(txdb, filter=list(),
233 |                                        orderby=c("_tx_id", "exon_rank"),
234 |                                        cds_join_type="LEFT")
235 | {
236 |     schema_version <- TxDb_schema_version(txdb)
237 |     splicing_columns <- TXDB_table_columns("splicing",
238 |                                            schema_version=schema_version)
239 |     exon_columns <- TXDB_table_columns("exon", schema_version=schema_version)
240 |     cds_columns <- TXDB_table_columns("cds", schema_version=schema_version)
241 |     cds_columns <- cds_columns[c("id", "name", "start", "end")]
242 |     columns <- unique(c(splicing_columns, exon_columns, cds_columns))
243 |     TxDb_SELECT_from_splicing_bundle(txdb, columns,
244 |                                      filter=filter, orderby=orderby,
245 |                                      cds_join_type=cds_join_type)
246 | }
247 | 
248 | TxDb_SELECT_from_gene <- function(txdb, filter=list(),
249 |                                   orderby=c("_tx_id", "gene_id"))
250 | {
251 |     schema_version <- TxDb_schema_version(txdb)
252 |     columns <- TXDB_table_columns("gene", schema_version=schema_version)
253 |     TxDb_SELECT_from_INNER_JOIN(txdb, "gene", columns,
254 |                                 filter=filter, orderby=orderby)
255 | }
256 | 
257 | 


--------------------------------------------------------------------------------
/R/TxDb-schema.R:
--------------------------------------------------------------------------------
  1 | ### =========================================================================
  2 | ### TxDb schema
  3 | ### -------------------------------------------------------------------------
  4 | ###
  5 | ### Nothing in this file is exported.
  6 | ###
  7 | ### 7 tables:
  8 | ###   - chrominfo
  9 | ###   - transcript
 10 | ###   - exon
 11 | ###   - cds
 12 | ###   - splicing
 13 | ###   - gene
 14 | ###   - metadata (not described here)
 15 | 
 16 | 
 17 | ### Not exported.
 18 | DB_TYPE_NAME <- "Db type"
 19 | DB_TYPE_VALUE <- "TxDb"  # same as the name of the class below
 20 | DB_SCHEMA_VERSION <- "1.2"  # DON'T FORGET TO BUMP THIS WHEN YOU CHANGE THE
 21 |                             # SCHEMA
 22 | 
 23 | ### Return the *effective* schema version.
 24 | TxDb_schema_version <- function(txdb)
 25 | {
 26 |     conn <- if (is(txdb, "TxDb")) dbconn(txdb) else txdb
 27 |     version <- AnnotationDbi:::.getMetaValue(conn, "DBSCHEMAVERSION")
 28 |     numeric_version(version)
 29 | }
 30 | 
 31 | 
 32 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 33 | ### Table columns
 34 | ###
 35 | 
 36 | ### 'chrominfo' table
 37 | 
 38 | TXDB_CHROMINFO_COLDEFS <- c(
 39 |     `_chrom_id`="INTEGER PRIMARY KEY",
 40 |     chrom="TEXT UNIQUE NOT NULL",
 41 |     length="INTEGER NULL",
 42 |     is_circular="INTEGER NULL"
 43 | )
 44 | 
 45 | TXDB_CHROMINFO_COLUMNS <- names(TXDB_CHROMINFO_COLDEFS)
 46 | 
 47 | ### 'transcript', 'exon', and 'cds' tables (a.k.a. "feature tables")
 48 | 
 49 | TXDB_FEATURE_COLDEFS <- c(
 50 |     id="INTEGER PRIMARY KEY",
 51 |     name="TEXT NULL",
 52 |     type="TEXT NULL",
 53 |     chrom="TEXT NOT NULL",
 54 |     strand="TEXT NOT NULL",
 55 |     start="INTEGER NOT NULL",
 56 |     end="INTEGER NOT NULL"
 57 | )
 58 | 
 59 | ### Tables "transcript", "exon", and "cds" must at least have columns with
 60 | ### the core column tags.
 61 | TXDB_CORE_COLTAGS <- c("id", "chrom", "strand", "start", "end")
 62 | TXDB_ALL_COLTAGS <- names(TXDB_FEATURE_COLDEFS)
 63 | TXDB_EXON_OR_CDS_COLTAGS <- TXDB_ALL_COLTAGS[TXDB_ALL_COLTAGS != "type"]
 64 | 
 65 | .make_feature_columns <- function(prefix, tags)
 66 | {
 67 |     fmt <- paste0("%s_", tags)
 68 |     id_pos <- match("id", tags)
 69 |     stopifnot(identical(id_pos, 1L))
 70 |     fmt[[id_pos]] <- paste0("_", fmt[[id_pos]])
 71 |     setNames(sprintf(fmt, prefix), tags)
 72 | }
 73 | 
 74 | TXDB_TRANSCRIPT_COLUMNS <- .make_feature_columns("tx", TXDB_ALL_COLTAGS)
 75 | TXDB_EXON_COLUMNS <- .make_feature_columns("exon", TXDB_EXON_OR_CDS_COLTAGS)
 76 | TXDB_CDS_COLUMNS <- .make_feature_columns("cds", TXDB_EXON_OR_CDS_COLTAGS)
 77 | 
 78 | ### 'splicing' table
 79 | 
 80 | TXDB_SPLICING_COLDEFS <- c(
 81 |     `_tx_id`="INTEGER NOT NULL",
 82 |     exon_rank="INTEGER NOT NULL",
 83 |     `_exon_id`="INTEGER NOT NULL",
 84 |     `_cds_id`="INTEGER NULL",
 85 |     cds_phase="INTEGER NULL"
 86 | )
 87 | 
 88 | TXDB_SPLICING_COLUMNS <- names(TXDB_SPLICING_COLDEFS)
 89 | 
 90 | ### 'gene' table
 91 | 
 92 | TXDB_GENE_COLDEFS <- c(
 93 |     gene_id="TEXT NOT NULL",
 94 |     `_tx_id`="INTEGER NOT NULL"
 95 | )
 96 | 
 97 | TXDB_GENE_COLUMNS <- names(TXDB_GENE_COLDEFS)
 98 | 
 99 | 
100 | ### Order of tables matters! "transcript" must be before "splicing" and "gene",
101 | ### and "exon" and "cds" must be before "splicing". See TXDB_column2table()
102 | ### below why.
103 | TXDB_COLUMNS <- list(
104 |     chrominfo=TXDB_CHROMINFO_COLUMNS,
105 |     transcript=TXDB_TRANSCRIPT_COLUMNS,
106 |     exon=TXDB_EXON_COLUMNS,
107 |     cds=TXDB_CDS_COLUMNS,
108 |     splicing=TXDB_SPLICING_COLUMNS,
109 |     gene=TXDB_GENE_COLUMNS
110 | )
111 | 
112 | 
113 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
114 | ### Build CREATE TABLE statements
115 | ###
116 | 
117 | .build_SQL_CREATE_TABLE <- function(table, coldefs, constraints=NULL)
118 | {
119 |     SQL <- "CREATE TABLE %s (%s\n)"
120 |     coldefs <- c(paste(names(coldefs), coldefs), constraints)
121 |     coldefs <- paste("\n  ", coldefs, collapse=",")
122 |     sprintf(SQL, table, coldefs)
123 | }
124 | 
125 | build_SQL_CREATE_chrominfo_table <- function()
126 | {
127 |     .build_SQL_CREATE_TABLE("chrominfo", TXDB_CHROMINFO_COLDEFS)
128 | }
129 | 
130 | build_SQL_CREATE_feature_table <- function(table)
131 | {
132 |     columns <- TXDB_COLUMNS[[table]]
133 |     coldefs <- setNames(TXDB_FEATURE_COLDEFS[names(columns)], columns)
134 |     foreign_key <- sprintf("FOREIGN KEY (%s) REFERENCES chrominfo (chrom)",
135 |                            columns[["chrom"]])
136 |     .build_SQL_CREATE_TABLE(table, coldefs, foreign_key)
137 | }
138 | 
139 | build_SQL_CREATE_splicing_table <- function()
140 | {
141 |     unique_key <- "UNIQUE (_tx_id, exon_rank)"
142 |     foreign_keys <- sprintf("FOREIGN KEY (_%s_id) REFERENCES %s",
143 |                             c("tx", "exon", "cds"),
144 |                             c("transcript", "exon", "cds"))
145 |     constraints <- c(unique_key, foreign_keys)
146 |     .build_SQL_CREATE_TABLE("splicing", TXDB_SPLICING_COLDEFS, constraints)
147 | }
148 | 
149 | build_SQL_CREATE_gene_table <- function()
150 | {
151 |     unique_key <- "UNIQUE (gene_id, _tx_id)"
152 |     foreign_key <- "FOREIGN KEY (_tx_id) REFERENCES transcript"
153 |     constraints <- c(unique_key, foreign_key)
154 |     .build_SQL_CREATE_TABLE("gene", TXDB_GENE_COLDEFS, constraints)
155 | }
156 | 
157 | 
158 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
159 | ### Relationship between the 'splicing' table and the "feature tables"
160 | ###
161 | ### The 'splicing' table is the glue between the "feature tables".
162 | ###
163 | 
164 | ### The "splicing right tables" can be bundled to the "splicing" table with
165 | ### a LEFT JOIN using the TXDB_SPLICING_JOIN_USING columns.
166 | TXDB_SPLICING_RTABLES <- c("transcript", "exon", "cds")
167 | TXDB_SPLICING_JOIN_USING <- setNames(c("_tx_id", "_exon_id", "_cds_id"),
168 |                                      TXDB_SPLICING_RTABLES)
169 | TXDB_SPLICING_BUNDLE <- c("splicing", TXDB_SPLICING_RTABLES)
170 | 
171 | 
172 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
173 | ### Helper functions
174 | ###
175 | 
176 | TXDB_tables <- function() names(TXDB_COLUMNS)
177 | 
178 | TXDB_table_columns <- function(table, schema_version=NA)
179 | {
180 |     columns <- TXDB_COLUMNS[[table]]
181 |     if (is.na(schema_version))
182 |         return(columns)
183 |     if (table == "transcript" && schema_version < numeric_version("1.1"))
184 |         columns <- columns[columns != "tx_type"]
185 |     if (table == "splicing" && schema_version < numeric_version("1.2"))
186 |         columns <- columns[columns != "cds_phase"]
187 |     columns
188 | }
189 | 
190 | ### When the same column belongs to more than one table (e.g. "_tx_id",
191 | ### "_exon_id", or "_cds_id"), then the table for which the column is a
192 | ### primary key is chosen by default. This behavior can be changed by passing
193 | ### the name of a table to 'from_table' in which case the priority is given to
194 | ### that table.
195 | TXDB_column2table <- function(columns, from_table=NA, schema_version=NA)
196 | {
197 |     if (length(columns) == 0L)
198 |         return(character(0))
199 |     tables <- sapply(columns,
200 |         function(column) {
201 |             for (table in TXDB_tables()) {
202 |                 table_columns <- TXDB_table_columns(table,
203 |                                      schema_version=schema_version)
204 |                 if (column %in% table_columns)
205 |                     return(table)
206 |             }
207 |             if (is.na(schema_version)) {
208 |                 in_schema <- ""
209 |             } else {
210 |                 in_schema <- c(" in db schema ", as.character(schema_version))
211 |             }
212 |             stop(column, ": no such column", in_schema)
213 |         }
214 |     )
215 |     if (!is.na(from_table)) {
216 |         table_columns <- TXDB_table_columns(from_table,
217 |                                             schema_version=schema_version)
218 |         tables[columns %in% table_columns] <- from_table
219 |     }
220 |     tables
221 | }
222 | 
223 | 


--------------------------------------------------------------------------------
/R/exonicParts.R:
--------------------------------------------------------------------------------
  1 | ### =========================================================================
  2 | ### Extraction of exonic and intronic parts
  3 | ### -------------------------------------------------------------------------
  4 | ###
  5 | ### For all functions in this file, 'txdb' must be a TxDb object or any
  6 | ### object that supports transcripts() and exonsBy() (e.g. EnsDb object).
  7 | ###
  8 | 
  9 | 
 10 | ### Works on whatever 'x' can be used as a splitting factor in splitAsList().
 11 | ### TODO: Rename and move to a more appropriate place (IRanges?)
 12 | .rank_in_group <- function(x)
 13 | {
 14 |     groups <- splitAsList(seq_along(x), x)
 15 |     i <- unlist(groups, use.names=FALSE)
 16 |     ans <- sequence(lengths(groups))
 17 |     ans[i] <- ans
 18 |     ans
 19 | }
 20 | 
 21 | 
 22 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 23 | ### 3 helper functions used internally by exonicParts() and intronicParts()
 24 | ###
 25 | 
 26 | ### Return a GRanges object with 1 range per transcript and metadata columns
 27 | ### tx_id, tx_name, and gene_id.
 28 | ### If 'drop.geneless' is FALSE (the default) then the transcripts are
 29 | ### returned in the same order as with transcripts(), which is expected
 30 | ### to be by transcript id (tx_id). Otherwise they are ordered first by
 31 | ### gene id (gene_id), then by transcript id.
 32 | tidyTranscripts <- function(txdb, drop.geneless=FALSE)
 33 | {
 34 |     tx <- transcripts(txdb, columns=c("tx_id", "tx_name", "gene_id"))
 35 |     mcols(tx)$gene_id <- as.character(mcols(tx)$gene_id)
 36 |     if (drop.geneless) {
 37 |         gene_id <- mcols(tx)$gene_id
 38 |         tx_id <- mcols(tx)$tx_id
 39 |         tx <- tx[order(gene_id, tx_id, na.last=NA)]
 40 |     }
 41 |     tx
 42 | }
 43 | 
 44 | ### Return a GRangesList object parallel to 'tx_ids'. The supplied 'tx_ids'
 45 | ### must be a subset of 'mcols(transcripts(txdb))$tx_id'.
 46 | .exons_by_txids <- function(txdb, tx_ids)
 47 | {
 48 |     if (anyDuplicated(tx_ids))
 49 |         stop(wmsg("\"transcripts\" method for ", class(txdb), " objects ",
 50 |                   "seems broken, sorry"))
 51 |     ans <- exonsBy(txdb, by="tx")
 52 |     tx_ids <- as.character(tx_ids)
 53 |     ans_names <- names(ans)
 54 |     if (!identical(tx_ids, ans_names)) {
 55 |         m <- match(tx_ids, ans_names)
 56 |         if (anyNA(m))
 57 |             stop(wmsg("\"exonsBy\" method for ", class(txdb), " objects ",
 58 |                       "seems broken, sorry"))
 59 |         ans <- ans[m]
 60 |     }
 61 |     ans
 62 | }
 63 | 
 64 | ### Return a GRanges object with 1 range per exon and metadata columns
 65 | ### tx_id, tx_name, gene_id, exon_id, exon_name, and exon_rank.
 66 | ### If 'drop.geneless' is FALSE (the default) then the exons are ordered first
 67 | ### by transcript id (tx_id), then by exon rank (exon_rank). Otherwise they
 68 | ### are ordered first by gene id (gene_id), then by transcript id, and then
 69 | ### by exon rank.
 70 | tidyExons <- function(txdb, drop.geneless=FALSE)
 71 | {
 72 |     tx <- tidyTranscripts(txdb, drop.geneless=drop.geneless)
 73 |     ex_by_tx <- .exons_by_txids(txdb, mcols(tx)$tx_id)
 74 | 
 75 |     ans <- unlist(ex_by_tx, use.names=FALSE)
 76 |     idx <- rep(seq_along(tx), lengths(ex_by_tx))
 77 |     mcols(ans) <- cbind(mcols(tx)[idx, , drop=FALSE], mcols(ans))
 78 |     ans
 79 | }
 80 | 
 81 | ### Return a GRanges object with 1 range per intron and metadata columns
 82 | ### tx_id, tx_name, and gene_id.
 83 | ### If 'drop.geneless' is FALSE (the default) then the introns are ordered
 84 | ### by transcript id (tx_id). Otherwise they are ordered first by gene id
 85 | ### (gene_id), then by transcript id.
 86 | tidyIntrons <- function(txdb, drop.geneless=FALSE)
 87 | {
 88 |     tx <- tidyTranscripts(txdb, drop.geneless=drop.geneless)
 89 |     ex_by_tx <- .exons_by_txids(txdb, mcols(tx)$tx_id)
 90 | 
 91 |     introns_by_tx <- psetdiff(tx, ex_by_tx)
 92 | 
 93 |     ans <- unlist(introns_by_tx, use.names=FALSE)
 94 |     idx <- rep(seq_along(tx), lengths(introns_by_tx))
 95 |     mcols(ans) <- mcols(tx)[idx, , drop=FALSE]
 96 |     ans
 97 | }
 98 | 
 99 | .break_in_parts <- function(x, linked.to.single.gene.only=FALSE,
100 |                                extra_mcol="exonic_part")
101 | {
102 |     ans <- disjoin(x, with.revmap=TRUE)
103 |     revmap <- mcols(ans)$revmap
104 |     ans_mcols <- lapply(mcols(x),
105 |                         function(col) {
106 |                             col <- unique(extractList(col, revmap))
107 |                             col[!is.na(col)]
108 |                         }
109 |                  )
110 |     mcols(ans) <- DataFrame(ans_mcols)
111 |     if (linked.to.single.gene.only) {
112 |         keep_idx <- which(elementNROWS(mcols(ans)$gene_id) == 1L)
113 |         ans <- ans[keep_idx]
114 |         gene_id <- as.character(mcols(ans)$gene_id)
115 |         mcols(ans)$gene_id <- gene_id
116 |         ## Add "exonic_part" or "intronic_part" metadata column for
117 |         ## compatibility with old disjointExons().
118 |         mcols(ans)[[extra_mcol]] <- .rank_in_group(gene_id)
119 |     }
120 |     ans
121 | }
122 | 
123 | 
124 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
125 | ### exonicParts() and intronicParts()
126 | ###
127 | ### exonicParts() is a replacement for the old disjointExons() function, with
128 | ### the following differences/improvements:
129 | ###
130 | ### 1. Argument 'linked.to.single.gene.only' in exonicParts() replaces
131 | ###    argument 'aggregateGenes' in disjointExons(), but has the opposite
132 | ###    meaning, that is:
133 | ###       exonicParts(txdb, linked.to.single.gene.only=TRUE)
134 | ###    returns the same exonic parts as:
135 | ###       disjointExons(txdb, aggregateGenes=FALSE)
136 | ###
137 | ### 2. Unlike disjointExons(txdb, aggregateGenes=TRUE),
138 | ###    exonicParts(txdb, linked.to.single.gene.only=FALSE)
139 | ###    does NOT discard exon parts that are not linked to a gene.
140 | ###
141 | ### 3. exonicParts() is almost 2x more efficient than disjointExons().
142 | ###
143 | ### 4. exonicParts() works out-of-the-box on any TxDb-like object that
144 | ###    supports the transcripts() and exonsBy() extractors, e.g. on an
145 | ###    EnsDb object.
146 | ###
147 | ### Note that disjointExons() was deprecated in BioC 3.13, then defunct in
148 | ### BioC 3.15, and finally removed from BioC 3.17.
149 | 
150 | ### Return a disjoint and strictly sorted GRanges object with 1 range per
151 | ### exonic part and with metadata columns tx_id, tx_name, gene_id, exon_id,
152 | ### exon_name, and exon_rank.
153 | exonicParts <- function(txdb, linked.to.single.gene.only=FALSE)
154 | {
155 |     if (!isTRUEorFALSE(linked.to.single.gene.only))
156 |         stop("'linked.to.single.gene.only' must be TRUE or FALSE")
157 |     ex <- tidyExons(txdb, drop.geneless=linked.to.single.gene.only)
158 |     .break_in_parts(ex, linked.to.single.gene.only,
159 |                         extra_mcol="exonic_part")
160 | }
161 | 
162 | ### Return a disjoint and strictly sorted GRanges object with 1 range per
163 | ### intronic part and with metadata columns tx_id, tx_name, and gene_id.
164 | intronicParts <- function(txdb, linked.to.single.gene.only=FALSE)
165 | {
166 |     if (!isTRUEorFALSE(linked.to.single.gene.only))
167 |         stop("'linked.to.single.gene.only' must be TRUE or FALSE")
168 |     introns <- tidyIntrons(txdb, drop.geneless=linked.to.single.gene.only)
169 |     .break_in_parts(introns, linked.to.single.gene.only,
170 |                              extra_mcol="intronic_part")
171 | }
172 | 
173 | 


--------------------------------------------------------------------------------
/R/extendExonsIntoIntrons.R:
--------------------------------------------------------------------------------
 1 | ### =========================================================================
 2 | ### Extend exons by a given number of bases into their adjacent introns
 3 | ### -------------------------------------------------------------------------
 4 | ###
 5 | 
 6 | extendExonsIntoIntrons <- function(ex_by_tx, extent=2)
 7 | {
 8 |     if (!is(ex_by_tx, "GRangesList"))
 9 |         stop(wmsg("'ex_by_tx' must be a GRangesList object"))
10 |     if (!isSingleNumber(extent))
11 |         stop(wmsg("'extent' must be a single number"))
12 |     if (!is.integer(extent))
13 |         extent <- as.integer(extent)
14 | 
15 |     resize_idx <- which(lengths(ex_by_tx) >= 2L)
16 |     ex_to_resize <- ex_by_tx[resize_idx]
17 | 
18 |     ## Resize first exons.
19 |     first_ex <- heads(ex_to_resize, n=1L)
20 |     unlisted <- unlist(first_ex, use.names=FALSE)
21 |     unlisted <- resize(unlisted, width(unlisted) + extent,
22 |                        fix="start", use.names=FALSE)
23 |     first_ex <- relist(unlisted, first_ex)
24 | 
25 |     ## Resize last exons.
26 |     last_ex <- tails(ex_to_resize, n=1L)
27 |     unlisted <- unlist(last_ex, use.names=FALSE)
28 |     unlisted <- resize(unlisted, width(unlisted) + extent,
29 |                        fix="end", use.names=FALSE)
30 |     last_ex <- relist(unlisted, last_ex)
31 | 
32 |     ## Resize intermediate exons.
33 |     mid_ex <- tails(heads(ex_to_resize, n=-1L), n=-1L)
34 |     unlisted <- unlist(mid_ex, use.names=FALSE)
35 |     unlisted <- resize(unlisted, width=width(unlisted) + 2L*extent,
36 |                        fix="center", use.names=FALSE)
37 |     mid_ex <- relist(unlisted, mid_ex)
38 | 
39 |     ## Put exons back together.
40 |     ex_by_tx[resize_idx] <- pc(first_ex, mid_ex, last_ex)
41 |     ex_by_tx
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/R/extractTranscriptSeqs.R:
--------------------------------------------------------------------------------
  1 | ### =========================================================================
  2 | ### extractTranscriptSeqs() and related tools
  3 | ### -------------------------------------------------------------------------
  4 | 
  5 | 
  6 | .unlist_strand <- function(strand, transcripts)
  7 | {
  8 |     if (is.list(strand) || is(strand, "List")) {
  9 |         ## 'strand' is a list-like object.
 10 |         if (!identical(unname(elementNROWS(strand)),
 11 |                        unname(elementNROWS(transcripts))))
 12 |             stop(wmsg("when 'strand' is a list-like object, it must have ",
 13 |                       "the same \"shape\" as 'transcripts' (i.e. same length ",
 14 |                       "plus 'strand[[i]]' must have the same length as ",
 15 |                       "'transcripts[[i]]' for all 'i')"))
 16 |         return(strand(unlist(strand, use.names=FALSE)))
 17 |     }
 18 |     if (!(is.vector(strand) || is.factor(strand) || is(strand, "Rle")))
 19 |         stop(wmsg("'strand' must be an atomic vector, a factor, ",
 20 |                   "an Rle object, or a list-like object"))
 21 |     strand <- strand(strand)
 22 |     strand <- S4Vectors:::V_recycle(strand, transcripts,
 23 |                                     "strand", "transcripts")
 24 |     rep.int(strand, elementNROWS(transcripts))
 25 | }
 26 | 
 27 | setGeneric("extractTranscriptSeqs", signature="x",
 28 |     function(x, transcripts, ...) standardGeneric("extractTranscriptSeqs")
 29 | )
 30 | 
 31 | setMethod("extractTranscriptSeqs", "DNAString",
 32 |     function(x, transcripts, strand="+")
 33 |     {
 34 |         if (!is(transcripts, "IntegerRangesList"))
 35 |             stop(wmsg("when 'x' is a DNAString object, ",
 36 |                       "'transcripts' must be an IntegerRangesList object"))
 37 |         unlisted_strand <- .unlist_strand(strand, transcripts)
 38 |         if (!all(unlisted_strand %in% c("+", "-")))
 39 |             stop(wmsg("'strand' can only contain \"+\" and/or \"-\" values. ",
 40 |                       "\"*\" is not allowed."))
 41 |         idx <- which(unlisted_strand == "-")
 42 |         exons <- extractList(x, unlist(transcripts, use.names=FALSE))
 43 |         exons[idx] <- reverseComplement(exons[idx])
 44 |         unstrsplit(relist(exons, transcripts))
 45 |     }
 46 | )
 47 | 
 48 | ### Check for transcripts that have exons located on more than one
 49 | ### chromosome.
 50 | .check_exon_chrom <- function(tx1)
 51 | {
 52 |     run_lens <- runLength(seqnames(tx1))
 53 |     idx <- which(elementNROWS(run_lens) != 1L)
 54 |     if (length(idx) == 0L)
 55 |         return()
 56 |     tx1_names <- names(tx1)
 57 |     if (is.null(tx1_names)) {
 58 |         some_in1string <- ""
 59 |     } else {
 60 |         some_idx <- head(idx, n=2L)
 61 |         some_names <- tx1_names[some_idx]
 62 |         some_in1string <- paste0(some_names, collapse=", ")
 63 |         if (length(idx) > length(some_idx))
 64 |             some_in1string <- paste0("e.g. ", some_in1string, ", etc...")
 65 |         some_in1string <- paste0(" (", some_in1string, ")")
 66 |     }
 67 |     stop(wmsg("Some transcripts", some_in1string, " have exons located on ",
 68 |               "more than one chromosome. This is not supported yet."))
 69 | }
 70 | 
 71 | ### Check the "exon_rank" inner metadata column if present. When 'transcripts'
 72 | ### contains CDS parts (instead of exons) grouped by transcript, some of the
 73 | ### lowest or/and highest exon ranks can be missing.
 74 | .check_exon_rank <- function(tx1)
 75 | {
 76 |     exon_rank <- mcols(tx1@unlistData)$exon_rank
 77 |     if (is.null(exon_rank))
 78 |         return()
 79 |     if (!is.numeric(exon_rank))
 80 |         stop(wmsg("\"exon_rank\" inner metadata column in GRangesList ",
 81 |                   "object 'transcripts' is not numeric"))
 82 |     if (!is.integer(exon_rank)) {
 83 |         warning(wmsg("\"exon_rank\" inner metadata column in GRangesList ",
 84 |                      "object 'transcripts' is not integer"))
 85 |         exon_rank <- as.integer(exon_rank)
 86 |     }
 87 |     if (any(is.na(exon_rank)))
 88 |         stop(wmsg("\"exon_rank\" inner metadata column in GRangesList ",
 89 |                   "object 'transcripts' contains NAs"))
 90 | 
 91 |     partitioning <- PartitioningByEnd(tx1)
 92 |     ## The 2 lines below are equivalent to:
 93 |     ##   tmp <- relist(exon_rank, partitioning)
 94 |     ##   min_rank <- min(tmp)
 95 |     ## but much faster!
 96 |     v <- Views(exon_rank, partitioning)
 97 |     min_rank <- viewMins(v)
 98 |     if (any(min_rank < 1L))
 99 |         stop(wmsg("\"exon_rank\" inner metadata column in GRangesList ",
100 |                   "object 'transcripts' contains ranks < 1"))
101 |     tx1_eltNROWS <- elementNROWS(partitioning)
102 |     target <- sequence(tx1_eltNROWS, from=min_rank)
103 |     if (!identical(target, unname(exon_rank)))
104 |         stop(wmsg("\"exon_rank\" inner metadata column in GRangesList ",
105 |                   "object 'transcripts' does not contain increasing ",
106 |                   "consecutive ranks for some transcripts"))
107 | }
108 | 
109 | ### TODO: Incorporate this fast path to "unlist" method for XStringSet objects.
110 | .fast_XStringSet_unlist <- function(x)
111 | {
112 |     # Disabling the fast path for now. Until I understand why using it
113 |     # causes extractTranscriptSeqs(Hsapiens, TxDb.Hsapiens.UCSC.hg18.knownGene)
114 |     # to use more memory (319.7 Mb) than when NOT using it (288.9 Mb).
115 | if (FALSE) {
116 |     x_len <- length(x)
117 |     if (x_len != 0L && length(x@pool) == 1L) {
118 |         x_ranges <- x@ranges
119 |         x_start <- start(x_ranges)
120 |         x_end <- end(x_ranges)
121 |         if (identical(x_end[-x_len] + 1L, x_start[-1L])) {
122 |             ## The ranges are adjacent. We can unlist() without copying
123 |             ## the sequence data!
124 |             cat("using fast path (", x_len, ") ...\n")
125 |             ans_class <- elementType(x)
126 |             ans_shared <- x@pool[[1L]]
127 |             ans_offset <- x_start[1L] - 1L
128 |             ans_length <- x_end[x_len] - ans_offset
129 |             ans <- new2(ans_class, shared=ans_shared,
130 |                                    offset=ans_offset, 
131 |                                    length=ans_length,
132 |                                    check = FALSE)
133 |             return(ans)
134 |         }
135 |     }
136 | }
137 |     unlist(x, use.names=FALSE)
138 | }
139 | 
140 | .extract_and_combine <- function(x, seqname, ranges)
141 | {
142 |     seqs <- getSeq(x, GRanges(seqname, ranges))
143 |     ## For "getSeq" methods (like the method for GmapGenome objects) that
144 |     ## return a character vector.
145 |     if (is.character(seqs))
146 |         seqs <- DNAStringSet(seqs)
147 |     .fast_XStringSet_unlist(seqs)
148 | }
149 | 
150 | .extractTranscriptSeqsFromOneSeq <- function(seqlevel, x, transcripts)
151 | {
152 |     seqlevels(transcripts, pruning.mode="coarse") <- seqlevel
153 |     strand <- strand(transcripts)
154 |     transcripts <- ranges(transcripts)
155 |     if (seqlevel %in% seqlevels(x)) {
156 |         ## We try to load the less stuff possible i.e. only the nucleotides
157 |         ## that participate in at least one exon.
158 |         exons <- unlist(transcripts, use.names=FALSE)
159 |         ranges_to_load <- reduce(exons, with.inframe.attrib=TRUE)
160 |         x <- .extract_and_combine(x, seqlevel, ranges_to_load)
161 |         exons <- attr(ranges_to_load, "inframe")
162 |         transcripts <- relist(exons, transcripts)
163 |     } else {
164 |         ## Why do we need this?
165 |         regex <- paste0("^", seqlevel, "$")
166 |         x <- getSeq(x, regex)
167 |     }
168 |     extractTranscriptSeqs(x, transcripts, strand=strand)
169 | }
170 | 
171 | .extractTranscriptSeqs_default <- function(x, transcripts, ...)
172 | {
173 |     if (is(transcripts, "GRangesList")) {
174 |         if (length(list(...)) != 0L)
175 |             stop(wmsg("additional arguments are allowed only when ",
176 |                       "'transcripts' is not a GRangesList object"))
177 |     } else {
178 |         transcripts <- try(exonsBy(transcripts, by="tx", ...),
179 |                            silent=TRUE)
180 |         if (is(transcripts, "try-error"))
181 |             stop(wmsg("failed to extract the exon ranges from 'transcripts' ",
182 |                       "with exonsBy(transcripts, by=\"tx\", ...)"))
183 |     }
184 |     idx1 <- which(elementNROWS(transcripts) != 0L)
185 |     tx1 <- transcripts[idx1]
186 |     .check_exon_chrom(tx1)
187 |     .check_exon_rank(tx1)
188 | 
189 |     tx1_seqlevels_in_use <- seqlevelsInUse(tx1)
190 |     x_seqlevels <- seqlevels(x)
191 |     ok <- tx1_seqlevels_in_use %in% x_seqlevels
192 |     if (!all(ok)) {
193 |         if (all(!ok))
194 |             stop(wmsg("the transcripts in 'transcripts' are on chromosomes ",
195 |                       "that are not in 'x'"))
196 |         seqlevel_not_in_x <- tx1_seqlevels_in_use[!ok][[1L]]
197 |         stop(wmsg("some transcripts in 'transcripts' are on chromosomes ",
198 |                   "that are not in 'x' (e.g. some transcripts are on ",
199 |                   "chromosome \"", seqlevel_not_in_x, "\" but this ",
200 |                   "chromosome is not in 'x')"))
201 |     }
202 |     seqlevels(tx1) <- tx1_seqlevels_in_use
203 |     ## 'seqnames1' is just an ordinary factor (not Rle) parallel to 'tx1'.
204 |     seqnames1 <- unlist(runValue(seqnames(tx1)), use.names=FALSE)
205 |     dnaset_list <- lapply(levels(seqnames1),
206 |                           .extractTranscriptSeqsFromOneSeq, x, tx1)
207 |     ans <- rep.int(DNAStringSet(""), length(transcripts))
208 |     names(ans) <- names(transcripts)
209 |     ans[idx1] <- unsplit_list_of_XVectorList("DNAStringSet",
210 |                                              dnaset_list,
211 |                                              seqnames1)
212 |     ans 
213 | }
214 | setMethod("extractTranscriptSeqs", "ANY", .extractTranscriptSeqs_default)
215 | 
216 | 


--------------------------------------------------------------------------------
/R/extractUpstreamSeqs.R:
--------------------------------------------------------------------------------
 1 | ### =========================================================================
 2 | ### extractUpstreamSeqs()
 3 | ### -------------------------------------------------------------------------
 4 | 
 5 | 
 6 | ### Dispatch is on the 2nd argument!
 7 | setGeneric("extractUpstreamSeqs", signature="genes",
 8 |     function(x, genes, width=1000, ...) standardGeneric("extractUpstreamSeqs")
 9 | )
10 | 
11 | ### Will work on any object 'x' for which seqinfo() and getSeq() are defined
12 | ### e.g. BSgenome, FaFile, TwoBitFile, etc...
13 | setMethod("extractUpstreamSeqs", "GenomicRanges",
14 |     function(x, genes, width=1000)
15 |     {
16 |         seqinfo(genes) <- merge(seqinfo(genes), seqinfo(x))
17 |         upstream <- trim(suppressWarnings(flank(genes, width=width)))
18 |         ans <- getSeq(x, upstream)
19 | 
20 |         ## Add metada columns to 'ans'.
21 |         gene_seqnames <- seqnames(genes)
22 |         gene_strand <- strand(genes)
23 |         idx1 <- which(gene_strand != "-")
24 |         idx2 <- which(gene_strand == "-")
25 |         gene_TSS <- integer(length(genes))
26 |         gene_TSS[idx1] <- start(genes)[idx1]
27 |         gene_TSS[idx2] <- end(genes)[idx2]
28 |         ans_mcols <- DataFrame(gene_seqnames=gene_seqnames,
29 |                                gene_strand=gene_strand,
30 |                                gene_TSS=gene_TSS)
31 |         mcols(ans) <- ans_mcols
32 |         ans
33 |     }
34 | )
35 | 
36 | setMethod("extractUpstreamSeqs", "TxDb",
37 |     function(x, genes, width=1000, exclude.seqlevels=NULL)
38 |     {
39 |         genes <- sort(genes(genes))
40 |         ## 'genes' is now a GRanges object.
41 |         if (!is.null(exclude.seqlevels)) {
42 |             if (!is.character(exclude.seqlevels))
43 |                 stop("'exclude.seqlevels' must be NULL or a character vector")
44 |             idx <- match(exclude.seqlevels, seqlevels(genes))
45 |             if (any(is.na(idx)))
46 |                 stop("'exclude.seqlevels' contains invalid seqlevels")
47 |             seqlevels(genes, pruning.mode="coarse") <- seqlevels(genes)[-idx]
48 |         }
49 |         callGeneric(x, genes, width=width)
50 |     }
51 | )
52 | 
53 | ### 'genes' is assumed to contain transcripts grouped by gene e.g. as returned
54 | ### by transcriptsBy(..., by="gene").
55 | setMethod("extractUpstreamSeqs", "GRangesList",
56 |     function(x, genes, width=1000)
57 |     {
58 |         stop("NOT READY YET, SORRY!")
59 |     }
60 | )
61 | 
62 | 


--------------------------------------------------------------------------------
/R/features.R:
--------------------------------------------------------------------------------
 1 | ## Very simple extractor to just return what is in our Feature.Db objects
 2 | 
 3 | ## business end of things
 4 | 
 5 | .extractDataCols <- function(conn, tableName){
 6 |   SQL <- paste0("SELECT * FROM ", tableName, ";")
 7 |   dbEasyQuery(conn, SQL)
 8 | }
 9 | 
10 | 
11 | .extractFeaturesAsGRanges <- function(db)
12 | {
13 |     ## 1st figure out what table is not the metadata table.
14 |     conn <- dbconn(db) ## featuredbconn(db)
15 |     tableNames <- dbListTables(conn)
16 |     tableName <- tableNames[!tableNames %in% "metadata"]
17 | 
18 |     ## Then learn what the columns are in that table and assign to otherCols
19 |     colNames <- dbListFields(conn, tableName)
20 |     reserved <- c("name", "chrom", "strand", "chromStart", "chromEnd")
21 |     colNames <- colNames[!colNames %in% reserved]
22 | 
23 |     ## Extract the data
24 |     df <- .extractDataCols(conn, tableName)
25 |     
26 |     ## Make & return the Object
27 |     md <- metadata(db)
28 |     genome <- md[md$name == "Genome", 'value']
29 |     if (is.null(genome))
30 |         genome <- NA_character_
31 |     ans <-
32 |         GRanges(seqnames = df$chrom,
33 |                 ranges = IRanges(df$chromStart, df$chromEnd, names=df$name),
34 |                 strand = sub('\\.', '*', df$strand),
35 |                 df[colNames],
36 |                 seqinfo = Seqinfo(unique(df$chrom), genome=genome))
37 |     metadata(ans)[[1]] <- DataFrame(metadata(db))
38 |     ans
39 | }
40 | 
41 | setGeneric("features", signature="x",
42 |            function(x) standardGeneric("features"))
43 | 
44 | setMethod("features", "FeatureDb",
45 |           function(x) .extractFeaturesAsGRanges(x))
46 | 
47 | ## test code:
48 | ## library(GenomicFeatures)
49 | ## fdb <- loadDb("FeatureDb.sqlite")
50 | ## features(fdb)
51 | 


--------------------------------------------------------------------------------
/R/getPromoterSeq-methods.R:
--------------------------------------------------------------------------------
 1 | ### =========================================================================
 2 | ### getPromoterSeq() and getTerminatorSeq()
 3 | ### -------------------------------------------------------------------------
 4 | 
 5 | ### Original author: Paul Shannon
 6 | 
 7 | ### NOTE (H. Pagès, Jan 22, 2024): Interface is inconsistent with
 8 | ### extractTranscriptSeqs() or extractUpstreamSeqs().
 9 | ### TODO: Implement extractPromoterSeqs() and extractTerminatorSeqs() and
10 | ### model them after extractTranscriptSeqs() or extractUpstreamSeqs().
11 | ### Then deprecate getPromoterSeq() and getTerminatorSeq() in favor of
12 | ### extractPromoterSeqs() and extractTerminatorSeqs().
13 | 
14 | setGeneric("getPromoterSeq", signature="query",
15 |     function(query, subject, upstream=2000, downstream=200)
16 |         standardGeneric("getPromoterSeq"))
17 | 
18 | setGeneric("getTerminatorSeq", signature="query",
19 |     function(query, subject, upstream=2000, downstream=200)
20 |         standardGeneric("getTerminatorSeq"))
21 | 
22 | .GRanges_getPromoterSeq <- function(query, subject, FUN, upstream, downstream)
23 | {
24 |     stopifnot(is(query, "GRanges"))
25 |     promoter.granges <- FUN(query, upstream, downstream)
26 |     result <- getSeq(subject, promoter.granges)
27 |     md <- mcols(query)
28 |     geneIDs <- names(query)   # often NULL
29 |     if (is.null(geneIDs))
30 |       geneIDs <- rep(NA_character_, length(query))
31 |     md$geneID <- geneIDs
32 |     mcols(result) <- md
33 |     result
34 | }
35 | 
36 | setMethod("getPromoterSeq", "GRanges",
37 |     function(query, subject, upstream=2000, downstream=200)
38 |         .GRanges_getPromoterSeq(query, subject, promoters,
39 |                                 upstream, downstream)
40 | )
41 | 
42 | setMethod("getTerminatorSeq", "GRanges",
43 |     function(query, subject, upstream=2000, downstream=200)
44 |         .GRanges_getPromoterSeq(query, subject, terminators,
45 |                                 upstream, downstream)
46 | )
47 | 
48 | .GRangesList_getPromoterSeq <-
49 |     function(query, subject, FUN, upstream, downstream)
50 | {
51 |     stopifnot(is(query, "GRangesList"))
52 |     unlisted_query <- unlist(query, use.names=FALSE)  # GRanges object
53 |     promoter.granges <- FUN(unlisted_query, upstream, downstream)
54 |     result <- getSeq(subject, promoter.granges)
55 |     md <- mcols(unlisted_query)
56 |     geneIDs <- names(query)
57 |     geneID.counts <- elementNROWS(query)
58 |     geneIDs <- rep(geneIDs, geneID.counts)  # H. Pagès: what if geneIDs is NULL?
59 |     md$geneID <- geneIDs
60 |     mcols(result) <- md
61 |     relist(result, query)
62 | }
63 | 
64 | setMethod("getPromoterSeq", "GRangesList",
65 |     function(query, subject, upstream=2000, downstream=200)
66 |         .GRangesList_getPromoterSeq(query, subject, promoters,
67 |                                     upstream, downstream)
68 | )
69 | 
70 | setMethod("getTerminatorSeq", "GRangesList",
71 |     function(query, subject, upstream=2000, downstream=200)
72 |         .GRangesList_getPromoterSeq(query, subject, terminators,
73 |                                     upstream, downstream)
74 | )
75 | 
76 | 


--------------------------------------------------------------------------------
/R/id2name.R:
--------------------------------------------------------------------------------
 1 | ### =========================================================================
 2 | ### Map internal ids to external names for a given feature type.
 3 | ### -------------------------------------------------------------------------
 4 | 
 5 | 
 6 | id2name <- function(txdb, feature.type=c("tx", "exon", "cds"))
 7 | {
 8 |     if (!is(txdb, "TxDb"))
 9 |         stop("'txdb' must be a TxDb object")
10 |     feature.type <- match.arg(feature.type)
11 |     table <- switch(feature.type, tx="transcript", exon="exon", cds="cds")
12 |     columns <- TXDB_table_columns(table)[c("id", "name")]
13 |     df <- TxDb_SELECT_from_INNER_JOIN(txdb, table, columns)
14 |     ans <- df[[columns[2L]]]
15 |     names(ans) <- as.character(df[[columns[1L]]])
16 |     ans
17 | }
18 | 
19 | 


--------------------------------------------------------------------------------
/R/makeFeatureDbFromUCSC.R:
--------------------------------------------------------------------------------
 1 | ### =========================================================================
 2 | ### makeFeatureDbFromUCSC()
 3 | ### -------------------------------------------------------------------------
 4 | 
 5 | ### Everything in this file has moved to txdbmaker!
 6 | 
 7 | supportedUCSCFeatureDbTracks <- function(genome)
 8 | {
 9 |     call_fun_in_txdbmaker("supportedUCSCFeatureDbTracks", genome=genome)
10 | }
11 | 
12 | supportedUCSCFeatureDbTables <- function(...)
13 | {
14 |     call_fun_in_txdbmaker("supportedUCSCFeatureDbTables", ...)
15 | }
16 | 
17 | UCSCFeatureDbTableSchema <- function(...)
18 | {
19 |     call_fun_in_txdbmaker("UCSCFeatureDbTableSchema", ...)
20 | }
21 | 
22 | makeFeatureDbFromUCSC <- function(...)
23 | {
24 |     call_fun_in_txdbmaker("makeFeatureDbFromUCSC", ...)
25 | }
26 | 
27 | 


--------------------------------------------------------------------------------
/R/makeTxDb.R:
--------------------------------------------------------------------------------
 1 | ### =========================================================================
 2 | ### Making TxDb objects
 3 | ### -------------------------------------------------------------------------
 4 | 
 5 | ### Everything in this file has moved to txdbmaker!
 6 | 
 7 | makeTxDb <- function(...)
 8 | {
 9 |     call_fun_in_txdbmaker("makeTxDb", ...)
10 | }
11 | 
12 | 


--------------------------------------------------------------------------------
/R/makeTxDbFromBiomart.R:
--------------------------------------------------------------------------------
 1 | ### =========================================================================
 2 | ### makeTxDbFromBiomart()
 3 | ### -------------------------------------------------------------------------
 4 | 
 5 | ### Everything in this file has moved to txdbmaker!
 6 | 
 7 | getChromInfoFromBiomart <- function(...)
 8 | {
 9 |     call_fun_in_txdbmaker("getChromInfoFromBiomart", ...)
10 | }
11 | 
12 | 
13 | makeTxDbFromBiomart <- function(...)
14 | {
15 |     call_fun_in_txdbmaker("makeTxDbFromBiomart", ...)
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/R/makeTxDbFromEnsembl.R:
--------------------------------------------------------------------------------
 1 | ### =========================================================================
 2 | ### makeTxDbFromEnsembl()
 3 | ### -------------------------------------------------------------------------
 4 | 
 5 | ### Everything in this file has moved to txdbmaker!
 6 | 
 7 | makeTxDbFromEnsembl <- function(...)
 8 | {
 9 |     call_fun_in_txdbmaker("makeTxDbFromEnsembl", ...)
10 | }
11 | 
12 | 


--------------------------------------------------------------------------------
/R/makeTxDbFromGFF.R:
--------------------------------------------------------------------------------
 1 | ### =========================================================================
 2 | ### makeTxDbFromGFF()
 3 | ### -------------------------------------------------------------------------
 4 | 
 5 | ### Everything in this file has moved to txdbmaker!
 6 | 
 7 | makeTxDbFromGFF <- function(...)
 8 | {
 9 |     call_fun_in_txdbmaker("makeTxDbFromGFF", ...)
10 | }
11 | 
12 | 


--------------------------------------------------------------------------------
/R/makeTxDbFromGRanges.R:
--------------------------------------------------------------------------------
 1 | ### =========================================================================
 2 | ### makeTxDbFromGRanges()
 3 | ### -------------------------------------------------------------------------
 4 | 
 5 | ### Everything in this file has moved to txdbmaker!
 6 | 
 7 | makeTxDbFromGRanges <- function(...)
 8 | {
 9 |     call_fun_in_txdbmaker("makeTxDbFromGRanges", ...)
10 | }
11 | 
12 | 


--------------------------------------------------------------------------------
/R/makeTxDbFromUCSC.R:
--------------------------------------------------------------------------------
 1 | ### =========================================================================
 2 | ### makeTxDbFromUCSC()
 3 | ### -------------------------------------------------------------------------
 4 | 
 5 | ### Everything in this file has moved to txdbmaker!
 6 | 
 7 | supportedUCSCtables <- function(...)
 8 | {
 9 |     call_fun_in_txdbmaker("supportedUCSCtables", ...)
10 | }
11 | 
12 | browseUCSCtrack <- function(...)
13 | {
14 |     call_fun_in_txdbmaker("browseUCSCtrack", ...)
15 | }
16 | 
17 | makeTxDbFromUCSC <- function(...)
18 | {
19 |     call_fun_in_txdbmaker("makeTxDbFromUCSC", ...)
20 | }
21 | 
22 | 


--------------------------------------------------------------------------------
/R/makeTxDbPackage.R:
--------------------------------------------------------------------------------
 1 | ### Everything in this file has moved to txdbmaker!
 2 | 
 3 | supportedMiRBaseBuildValues <- function()
 4 | {
 5 |     call_fun_in_txdbmaker("supportedMiRBaseBuildValues")
 6 | }
 7 | 
 8 | makePackageName <- function(...)
 9 | {
10 |     call_fun_in_txdbmaker("makePackageName", ...)
11 | }
12 | 
13 | makeTxDbPackage <- function(...)
14 | {
15 |     call_fun_in_txdbmaker("makeTxDbPackage", ...)
16 | }
17 | 
18 | makeTxDbPackageFromUCSC <- function(...)
19 | {
20 |     call_fun_in_txdbmaker("makeTxDbPackageFromUCSC", ...)
21 | }
22 | 
23 | makeFDbPackageFromUCSC <- function(...)
24 | {
25 |     call_fun_in_txdbmaker("makeFDbPackageFromUCSC", ...)
26 | }
27 | 
28 | makeTxDbPackageFromBiomart <- function(...)
29 | {
30 |     call_fun_in_txdbmaker("makeTxDbPackageFromBiomart", ...)
31 | }
32 | 
33 | 


--------------------------------------------------------------------------------
/R/mapIdsToRanges.R:
--------------------------------------------------------------------------------
 1 | setGeneric("mapIdsToRanges", signature="x",
 2 |     function(x, ...) standardGeneric("mapIdsToRanges")
 3 | )
 4 | 
 5 | setMethod("mapIdsToRanges", "TxDb",
 6 |           function(x,
 7 |                    keys,
 8 |                    type = c("cds", "exon", "tx", "gene"),
 9 |                    columns = NULL)
10 | {
11 |     .assert(is.list(keys) && .is.named(keys),
12 |         "'keys' must be a named list")
13 | 
14 |     .assert(is.null(columns) || is.character(columns),
15 |            "'columns' must be 'NULL' or a character vector")
16 | 
17 |     type <- match.arg(type)
18 | 
19 |     fun <- switch(type,
20 |                   cds = cds,
21 |                   exon = exons,
22 |                   tx = transcripts,
23 |                   gene = genes)
24 | 
25 |     res <- fun(x, keys, columns = unique(c(names(keys), columns)))
26 |     matches <- match(mcols(res)[[names(keys)]], keys[[1]])
27 |     ranges <- rep(res, lengths(matches))
28 | 
29 |     f <- factor(keys[[1]][unlist(matches, use.names = FALSE)],
30 |                 levels = unique(keys[[1]]))
31 |     splitAsList(ranges, f, drop = FALSE)[keys[[1]]]
32 | })
33 | 
34 | setGeneric("mapRangesToIds", signature="x",
35 |     function(x, ...) standardGeneric("mapRangesToIds")
36 | )
37 | 
38 | setMethod("mapRangesToIds", "TxDb",
39 |           function(x,
40 |                    ranges,
41 |                    type = c("cds", "exon", "tx", "gene"),
42 |                    columns = NULL,
43 |                    ...)
44 | {
45 |     type <- match.arg(type)
46 |     .assert(is(ranges, "Vector"),
47 |         "'ranges' must be a 'Vector'")
48 |     .assert(is.null(columns) || is.character(columns),
49 |            "'columns' must be 'NULL' or a character vector")
50 | 
51 |     fun <- switch(type,
52 |                   cds = cds,
53 |                   exon = exons,
54 |                   tx = transcripts,
55 |                   gene = genes)
56 | 
57 |     all <-
58 |         if (is.null(columns)) {
59 |             fun(x)
60 |         } else {
61 |             fun(x, columns = columns)
62 |         }
63 | 
64 |     hits <- findOverlaps(ranges, all, ...)
65 |     lapply(split(all[subjectHits(hits)], names(ranges)[queryHits(hits)]), mcols)
66 | })
67 | 
68 | .assert <- function(x, message) {
69 |     if(!x) {
70 |         stop(message, call. = FALSE)
71 |     }
72 | }
73 | 
74 | .is.named <- function(x) {
75 |     nm <- names(x)
76 |     !is.null(nm) && all(!is.na(nm) & nzchar(nm))
77 | }
78 | 


--------------------------------------------------------------------------------
/R/nearest-methods.R:
--------------------------------------------------------------------------------
 1 | ### =========================================================================
 2 | ### nearest (and related) methods
 3 | ### -------------------------------------------------------------------------
 4 | 
 5 | 
 6 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 7 | ### distance 
 8 | ###
 9 | 
10 | setMethod("distance", c("GenomicRanges", "TxDb"),
11 |     function(x, y, ignore.strand=FALSE, ..., id, 
12 |              type=c("gene", "tx", "exon", "cds"))
13 |     {
14 |         if (!identical(length(x), length(id)))
15 |             stop("length(id) must equal length(x)")
16 |         if (!is.character(id))
17 |             stop("'id' must be a character")
18 | 
19 |         if (type == "gene") {
20 |             .extractByGeneID(x, y, ignore.strand, id)
21 |         } else {
22 |             rng <- switch(type,
23 |                           tx=transcripts(y, "tx_id", filter=list(tx_id=id)),
24 |                           exon=exons(y, "exon_id", filter=list(exon_id=id)),
25 |                           cds=cds(y, "cds_id", filter=list(cds_id=id)))
26 |             f <- factor(mcols(rng)[,])
27 |             missing <- !id %in% levels(f) 
28 |             if (any(missing))
29 |                   warning(paste0("id(s): '", paste(unique(id[missing]), 
30 |                                  sep=","), "' were not found in 'y'"))
31 |             ## rep out ranges according to 'id'
32 |             rng <- rng[match(id[!missing], levels(f))]
33 |             ans <- rep(NA_integer_, length(x))
34 |             ans[!missing] <- distance(x[!missing], rng, 
35 |                                       ignore.strand=ignore.strand)
36 |             stopifnot(length(ans) == length(id))
37 |             ans
38 |         }
39 |     }
40 | )
41 | 
42 | .extractByGeneID <- function(x, y, ignore.strand, id)
43 | {
44 |     tx <- transcriptsBy(y, "gene")
45 |     missing <- !id %in% names(tx) 
46 |     if (any(missing))
47 |           warning(paste0("id(s): '", paste(unique(id[missing]), sep=","), 
48 |                   "' were not found in 'y'"))
49 | 
50 |     group <- range(tx[names(tx) %in% id], ignore.strand=ignore.strand)
51 |     multiRange <- lengths(group) > 1L
52 |     if (any(multiRange)) {
53 |         warning(paste0("id(s): '", paste(unique(names(multiRange)[multiRange]), 
54 |                        sep=','), 
55 |                        "' could not be collapsed to a single gene region"))
56 |         group <- group[!multiRange]
57 |     }
58 | 
59 |     valid <- (!id %in% names(multiRange)[multiRange]) & !missing 
60 |     ## rep out ranges according to 'id'
61 |     rng <- unlist(group, use.names=FALSE)
62 |     rng <- rng[match(id[valid], names(group))]
63 |     ans <- rep(NA_integer_, length(x))
64 |     ans[valid] <- distance(x[valid], rng, ignore.strand=ignore.strand)
65 |     stopifnot(length(ans) == length(id))
66 |     ans
67 | }
68 | 


--------------------------------------------------------------------------------
/R/proteinToGenome.R:
--------------------------------------------------------------------------------
  1 | ### =========================================================================
  2 | ### proteinToGenome()
  3 | ### -------------------------------------------------------------------------
  4 | 
  5 | 
  6 | ### Dispatch is on 2nd argument!
  7 | setGeneric("proteinToGenome", signature="db",
  8 |     function(x, db, ...) standardGeneric("proteinToGenome")
  9 | )
 10 | 
 11 | 
 12 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 13 | ### Make fancy error or warning messages
 14 | ###
 15 | 
 16 | .make_bad_names_msg <- function(x_names, bad_idx, what="invalid name",
 17 |                                 max.show=5L)
 18 | {
 19 |     nbad <- length(bad_idx)
 20 |     if (max.show == 0L) {
 21 |         msg <- c("the names on 'x' contain ", nbad, " ", what)
 22 |         if (nbad != 1L)
 23 |             msg <- c(msg, "s")
 24 |         return(paste(msg, collapse=""))
 25 |     }
 26 |     if (nbad == 1L) {
 27 |         msg <- c("The names on 'x' contain ", what)
 28 |     } else {
 29 |         msg <- c("The names on 'x' contain ", nbad, " ", what, "s")
 30 |         if (nbad > max.show) {
 31 |             if (max.show == 1L) {
 32 |                 msg <- c(msg, " (showing the first one only)")
 33 |             } else {
 34 |                 msg <- c(msg, " (showing the first ", max.show, " only)")
 35 |             }
 36 |             bad_idx <- head(bad_idx, n=max.show)
 37 |         }
 38 |     }
 39 |     bad_names <- x_names[bad_idx]
 40 |     paste0(paste(msg, collapse=""), ": ", paste(bad_names, collapse=", "))
 41 | }
 42 | 
 43 | 
 44 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 45 | ### proteinToGenome() method for GRangesList objects
 46 | ###
 47 | 
 48 | .make_protein_ranges_from_cumwidths <- function(cumwidths)
 49 | {
 50 |     len <- length(cumwidths)
 51 |     protein_start <- c(1L, cumwidths[-len] %/% 3L + 1L)
 52 |     protein_end <- (2L + cumwidths) %/% 3L
 53 |     IRanges(protein_start, protein_end)
 54 | }
 55 | 
 56 | ### Trims first range in 'gr' on its 5' side by 'trim1' nucleotides,
 57 | ### and last range in 'gr' on its 3' side by 'trim2' nucleotides.
 58 | ### Other than that, everything else is preserved (length, names, metadata
 59 | ### columns).
 60 | .trim_first_and_last_ranges <- function(gr, trim1=0L, trim2=0L)
 61 | {
 62 |     stopifnot(is(gr, "GRanges"),
 63 |               isSingleInteger(trim1),
 64 |               isSingleInteger(trim2))
 65 | 
 66 |     gr_ranges <- ranges(gr)
 67 |     gr_len <- length(gr_ranges)
 68 |     stopifnot(gr_len >= 1L)
 69 | 
 70 |     gr_start <- start(gr_ranges)
 71 |     gr_end <- end(gr_ranges)
 72 |     gr_strand <- S4Vectors:::decodeRle(strand(gr))
 73 | 
 74 |     ## Trim first range.
 75 |     strand1 <- gr_strand[[1L]]
 76 |     if (strand1 == "+") {
 77 |         ## Trim on the left.
 78 |         gr_start[[1L]] <- gr_start[[1L]] + trim1
 79 |     } else {
 80 |         ## Trim on the right.
 81 |         gr_end[[1L]] <- gr_end[[1L]] - trim1
 82 |     }
 83 | 
 84 |     ## Trim last range.
 85 |     strand2 <- gr_strand[[gr_len]]
 86 |     if (strand2 == "+") {
 87 |         ## Trim on the right.
 88 |         gr_end[[gr_len]] <- gr_end[[gr_len]] - trim2
 89 |     } else {
 90 |         ## Trim on the left.
 91 |         gr_start[[gr_len]] <- gr_start[[gr_len]] + trim2
 92 |     }
 93 | 
 94 |     if (any(gr_start > gr_end + 1L))
 95 |         stop(wmsg("invalid trimming"))
 96 | 
 97 |     ranges(gr) <- update_ranges(gr_ranges, start=gr_start, end=gr_end)
 98 |     gr
 99 | }
100 | 
101 | ### 'cds' must be a GRanges object representing the CDS parts of a given
102 | ### transcript/protein.
103 | ### 'protein_start' and 'protein_end' must be protein-relative coordinates
104 | ### i.e. coordinates (counted in Amino Acids) relative to the protein
105 | ### associated with 'cds'.
106 | ### Returns a GRanges object.
107 | .map_protein_to_cds <- function(protein_start, protein_end, cds)
108 | {
109 |     stopifnot(isSingleNumber(protein_start),
110 |               isSingleNumber(protein_end),
111 |               protein_start <= protein_end,
112 |               is(cds, "GRanges"))
113 |     nparts <- length(cds)
114 |     cds_widths <- width(cds)
115 |     stopifnot(nparts >= 1L, all(cds_widths >= 1L))
116 |     protein_start <- as.integer(protein_start)
117 |     protein_end <- as.integer(protein_end)
118 |     cds_cumwidths <- cumsum(cds_widths)
119 | 
120 |     ## Add metadata columns 'protein_start' and 'protein_end' to 'cds'.
121 |     protein_ranges <- .make_protein_ranges_from_cumwidths(cds_cumwidths)
122 |     protein_ranges <- DataFrame(protein_start=start(protein_ranges),
123 |                                 protein_end=end(protein_ranges))
124 |     mcols(cds) <- cbind(mcols(cds), protein_ranges)
125 | 
126 |     ## Translate protein-relative coordinates into 0-based CDS-relative
127 |     ## coordinates.
128 |     protein_start0 <- 3L * (protein_start - 1L)
129 |     protein_end0 <- 3L * protein_end - 1L
130 | 
131 |     ## Find CDS parts touched by 'protein_start' and 'protein_end'.
132 |     idx <- 1L + findInterval(c(protein_start0, protein_end0), cds_cumwidths)
133 |     idx1 <- idx[[1L]]
134 |     idx2 <- idx[[2L]]
135 |     if (idx2 > nparts)
136 |         idx2 <- nparts
137 | 
138 |     ## Extract all CDS parts touched by the [protein_start,protein_end] range.
139 |     ans <- cds[idx1:idx2]
140 | 
141 |     ## Trim first and last ranges in 'ans' (trimming should **always**
142 |     ## be valid).
143 |     trim1 <- protein_start0 - sum(head(cds_widths, idx1 - 1L))
144 |     trim2 <- cds_cumwidths[[idx2]] - protein_end0 - 1L
145 |     ans <- .trim_first_and_last_ranges(ans, trim1, trim2)
146 | 
147 |     ## Adjust metadata columns 'protein_start' and 'protein_end' to account
148 |     ## for trimming.
149 |     ans_mcols <- mcols(ans)
150 |     ans_mcols[1L, "protein_start"] <- protein_start
151 |     ans_mcols[nrow(ans_mcols), "protein_end"] <- protein_end
152 |     mcols(ans) <- ans_mcols
153 | 
154 |     ans
155 | }
156 | 
157 | ### Returns a named GRangesList object parallel to 'x' (names on 'x' are
158 | ### propagated).
159 | setMethod("proteinToGenome", "GRangesList",
160 |     function(x, db)
161 |     {
162 |         if (!is(x, "IRanges"))
163 |             stop(wmsg("'x' must be an IRanges object or derivative"))
164 |         x_names <- names(x)
165 |         if (is.null(x_names))
166 |             stop(wmsg("'x' must have names"))
167 |         coding_tx_names <- names(db)
168 |         if (is.null(coding_tx_names))
169 |             stop(wmsg("'db' must have names when it's a GRangesList object"))
170 |         non_coding_idx <- which(!(x_names %in% coding_tx_names))
171 |         if (length(non_coding_idx) != 0L) {
172 |             msg <- .make_bad_names_msg(x_names, non_coding_idx,
173 |                                        what="non-coding transcript name")
174 |             warning(wmsg(msg), immediate.=TRUE)
175 |         }
176 |         x_start <- start(x)
177 |         x_end <- end(x)
178 |         ## TODO: Replace this inefficient lapply-based implementation with
179 |         ## something better.
180 |         ans <- lapply(setNames(seq_along(x), x_names),
181 |             function(i) {
182 |                 if (i %in% non_coding_idx)
183 |                     return(GRanges())
184 |                 tx_name <- x_names[[i]]
185 |                 cds <- db[[tx_name]]
186 |                 .map_protein_to_cds(x_start[[i]], x_end[[i]], cds)
187 |             }
188 |         )
189 |         GRangesList(ans)
190 |     }
191 | )
192 | 
193 | 
194 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
195 | ### Default proteinToGenome() method
196 | ###
197 | 
198 | ### 'db' must be a TxDb object or any object that supports transcripts()
199 | ### (e.g. EnsDb object).
200 | .check_supplied_tx_names <- function(supplied_tx_names, db)
201 | {
202 |     if (is.null(supplied_tx_names))
203 |         stop(wmsg("'x' must have names and they must be transcript names"))
204 |     stopifnot(is.character(supplied_tx_names))
205 |     if (any(supplied_tx_names %in% c(NA_character_, "")))
206 |         stop(wmsg("the names on 'x' cannot contain NAs or empty strings"))
207 |     tx <- transcripts(db, columns="tx_name")
208 |     tx_names <- mcols(tx)$tx_name
209 |     bad <- !(supplied_tx_names %in% tx_names)
210 |     if (all(bad))
211 |         stop(wmsg("The names on 'x' must be transcript names present in ",
212 |                   "the supplied ", class(db), " object. Note that the ",
213 |                   "transcript names in this object can be obtained/seen ",
214 |                   "with:"),
215 |              "\n    tx <- transcripts(db, columns=\"tx_name\")",
216 |              "\n    mcols(tx)$tx_name")
217 |     bad_idx <- which(bad)
218 |     if (length(bad_idx) != 0L) {
219 |         msg <- .make_bad_names_msg(supplied_tx_names, bad_idx,
220 |                                    what="invalid transcript name")
221 |         stop(wmsg(msg))
222 |     }
223 | }
224 | 
225 | ### 'db' must be a TxDb object or any object that supports cdsBy()
226 | ### (e.g. EnsDb object).
227 | .extract_cds_by_tx <- function(db, tx_names)
228 | {
229 |     stopifnot(is.character(tx_names))
230 |     if (!is(db, "EnsDb"))
231 |         return(cdsBy(db, by="tx", use.names=TRUE))
232 |     ## Should never happen in practice because if 'db' is an EnsDb object
233 |     ## then the ensembldb package should be loaded already, and ensembldb
234 |     ## depends on AnnotationFilter.
235 |     if (!requireNamespace("AnnotationFilter", quietly=TRUE))
236 |         stop(wmsg("Couldn't load the AnnotationFilter package. ",
237 |                   "The AnnotationFilter package is needed when ",
238 |                   "calling proteinToGenome() on an EnsDb object. ",
239 |                   "Please install it."))
240 |     filter <- AnnotationFilter::TxIdFilter(tx_names)
241 |     cdsBy(db, by="tx", filter=filter)
242 | }
243 | 
244 | ### 'db' must be a TxDb object or any object that supports transcripts()
245 | ### and cdsBy() (e.g. EnsDb object).
246 | ### Returns a named GRangesList object parallel to 'x' (names on 'x' are
247 | ### propagated).
248 | .default_proteinToGenome <- function(x, db)
249 | {
250 |     if (!is(x, "IRanges"))
251 |         stop(wmsg("'x' must be an IRanges object or derivative"))
252 |     x_names <- names(x)
253 |     .check_supplied_tx_names(x_names, db)
254 |     cds_by_tx <- .extract_cds_by_tx(db, x_names)
255 |     proteinToGenome(x, cds_by_tx)
256 | }
257 | 
258 | setMethod("proteinToGenome", "ANY", .default_proteinToGenome)
259 | 
260 | 


--------------------------------------------------------------------------------
/R/tRNAs.R:
--------------------------------------------------------------------------------
 1 | ### =========================================================================
 2 | ### Extractors for features in other databases
 3 | ### -------------------------------------------------------------------------
 4 | ###
 5 | ### This is for extractors that do NOT point to the TxDb proper.
 6 | ### Such extractors can point to other databases OR they can
 7 | ### point to other FeatureDbs within the same package.
 8 | 
 9 | microRNAs <- function(x) .Defunct()
10 | 
11 | .syncSeqlevel <- function(txdb, ans){
12 |   isActSeq <- .isActiveSeq(txdb)
13 |   n2oNames <- levels(seqnames(ans))
14 |   n2o <- match(seqnames(seqinfo(txdb)), n2oNames)
15 |   seqinfo(ans, new2old=n2o) <- seqinfo(txdb)
16 |   seqlevels(ans, pruning.mode="coarse") <- names(isActSeq)[isActSeq]
17 |   ans
18 | }
19 | 
20 | ## main function
21 | .tRNAs <- function(x) {
22 |     if (!requireNamespace("txdbmaker", quietly=TRUE))
23 |         stop("Could not load package txdbmaker. Is it installed?\n\n  ",
24 |              wmsg("Note that the tRNAs() method for TxDb objects requires ",
25 |                   "the txdbmaker package. Please install it with:"),
26 |              "\n\n    BiocManager::install(\"txdbmaker\")")
27 | 
28 |   fdbpkg <- "FDb.UCSC.tRNAs"
29 |   fdbenv <- loadNamespace(fdbpkg)
30 |   ## get the current package name
31 |   pkgName <- txdbmaker::makePackageName(x)
32 |   ## from here we know what the FDB should MUST look like
33 |   fdbName <- sub("TxDb","FDb",pkgName)
34 |   fdbName <- unlist(strsplit(fdbName,"\\."))
35 |   fdbName[5] <- "tRNAs"
36 |   fdbString <- paste(fdbName,collapse=".")
37 |   if (!exists(fdbString, envir=fdbenv)) {
38 |       stop("there is no tRNA data available for this organism/source")
39 |   } else {
40 |       fdb <- get(fdbString, fdbenv)
41 |       ans <- features(fdb)
42 |   }
43 |   ## Now check active seqs and set the seqlevels
44 |   .syncSeqlevel(x, ans)
45 | }
46 | 
47 | setGeneric("tRNAs", function(x) standardGeneric("tRNAs"))
48 | 
49 | setMethod("tRNAs", "TxDb", .tRNAs)
50 | 
51 | 
52 | ## Test code for new TXTYPE support (BC vs new code)
53 | ## library(TxDb.Hsapiens.BioMart.ensembl.GRCh38);txdb2= TxDb.Hsapiens.BioMart.ensembl.GRCh38;transcripts(txdb2, columns='TXTYPE')
54 | ## exons(txdb2, columns='TXTYPE')
55 | ## And this one works now
56 | ## library(TxDb.Hsapiens.UCSC.hg19.knownGene);txdb  = TxDb.Hsapiens.UCSC.hg19.knownGene;transcripts(txdb, columns='TXTYPE')
57 | ## But this still fails (argh):
58 | ## exons(txdb, columns='TXTYPE')
59 | 
60 | 


--------------------------------------------------------------------------------
/R/transcriptLengths.R:
--------------------------------------------------------------------------------
 1 | ### =========================================================================
 2 | ### transcriptLengths()
 3 | ### -------------------------------------------------------------------------
 4 | 
 5 | 
 6 | .match_and_check <- function(rglist_names, tx_id)
 7 | {
 8 |     if (is.null(rglist_names))
 9 |         stop(wmsg("internal error in transcriptLengths(): ",
10 |                   "no names on 'rglist'"))
11 |     m <- match(rglist_names, tx_id)
12 |     if (any(is.na(m)))
13 |         stop(wmsg("internal error in transcriptLengths(): ",
14 |                   "some 'rglist' names cannot be mapped to 'tx_id'"))
15 |     m
16 | }
17 | 
18 | ### 'rglist' must be a named IntegerRangesList or GRangesList.
19 | ### 'tx_id' must be a character vector.
20 | .eltNROWS <- function(rglist, tx_id)
21 | {
22 |     ans <- integer(length(tx_id))
23 |     m <- .match_and_check(names(rglist), tx_id)
24 |     ans[m] <- elementNROWS(rglist)
25 |     ans
26 | }
27 | 
28 | .sum_width <- function(rglist, tx_id)
29 | {
30 |     ans <- integer(length(tx_id))
31 |     m <- .match_and_check(names(rglist), tx_id)
32 |     ans[m] <- sum(width(rglist))
33 |     ans
34 | }
35 | 
36 | ### The returned data frame has 1 row per transcript returned by
37 | ### 'transcripts(txdb)' and in the same order.
38 | ### NOTES:
39 | ### - The functions only accepts a TxDb object for now. We'll make it
40 | ###   a generic function when we need to support other types of input.
41 | ### - The function could probably be made much faster by querying the
42 | ###   TxDb object directly in SQL instead of calling exonsBy(), cdsBy(),
43 | ###   fiveUTRsByTranscript(), and threeUTRsByTranscript() successively.
44 | transcriptLengths <- function(txdb, with.cds_len=FALSE,
45 |                                     with.utr5_len=FALSE, with.utr3_len=FALSE,
46 | 				    ...)
47 | {
48 |     if (!isTRUEorFALSE(with.cds_len))
49 |         stop("'with.cds_len' must be TRUE or FALSE")
50 |     if (!isTRUEorFALSE(with.utr5_len))
51 |         stop("'with.utr5_len' must be TRUE or FALSE")
52 |     if (!isTRUEorFALSE(with.cds_len))
53 |         stop("'with.utr3_len' must be TRUE or FALSE")
54 |     tx <- transcripts(txdb, columns=c("tx_id", "tx_name", "gene_id"),...)
55 |     ans <- mcols(tx)
56 |     ans$gene_id <- as.character(ans$gene_id)
57 |     tx_id <- as.character(ans$tx_id)  # because match() will want a character
58 | 
59 |     rg_by_tx <- exonsBy(txdb, by="tx", ...)
60 |     ans$nexon <- .eltNROWS(rg_by_tx, tx_id)
61 |     ans$tx_len <- .sum_width(rg_by_tx, tx_id)
62 |     if (with.cds_len) {
63 |         rg_by_tx <- cdsBy(txdb, by="tx", ...)
64 |         ans$cds_len <- .sum_width(rg_by_tx, tx_id)
65 |     }
66 |     if (with.utr5_len) {
67 |         rg_by_tx <- fiveUTRsByTranscript(txdb, ...)
68 |         ans$utr5_len <- .sum_width(rg_by_tx, tx_id)
69 |     }
70 |     if (with.utr3_len) {
71 |         rg_by_tx <- threeUTRsByTranscript(txdb, ...)
72 |         ans$utr3_len <- .sum_width(rg_by_tx, tx_id)
73 |     }
74 |     as.data.frame(ans)
75 | }
76 | 
77 | 


--------------------------------------------------------------------------------
/R/transcriptLocs2refLocs.R:
--------------------------------------------------------------------------------
 1 | ### =========================================================================
 2 | ### transcriptLocs2refLocs()
 3 | ### -------------------------------------------------------------------------
 4 | 
 5 | 
 6 | .normargExonStartsOrEnds <- function(exonStarts, argname)
 7 | {
 8 |     if (is.list(exonStarts))
 9 |         return(exonStarts)
10 |     if (is(exonStarts, "IntegerList"))
11 |         return(as.list(exonStarts))
12 |     if (is.character(exonStarts))
13 |         return(toListOfIntegerVectors(exonStarts))
14 |     stop("'", argname, "' must be a list of integer vectors, ",
15 |          "an IntegerList object,\n  or a character vector where ",
16 |          "each element is a comma-separated list of\n  integers")
17 | }
18 | 
19 | 
20 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
21 | ### transcriptLocs2refLocs()
22 | ###
23 | 
24 | transcriptLocs2refLocs <- function(tlocs, exonStarts=list(), exonEnds=list(),
25 |                                    strand=character(0),
26 |                                    decreasing.rank.on.minus.strand=FALSE,
27 |                                    error.if.out.of.bounds=TRUE)
28 | {
29 |     if (!is.list(tlocs)) {
30 |         if (!is(tlocs, "IntegerList"))
31 |             stop("'tlocs' must be a list of integer vectors ",
32 |                  "or an IntegerList object")
33 |         tlocs <- as.list(tlocs)
34 |     }
35 |     if (is(exonStarts, "IntegerRangesList")) {
36 |         if (!identical(exonEnds, list()))
37 |             stop("'exonEnds' cannot be specified ",
38 |                  "when 'exonStarts' is a IntegerRangesList object")
39 |         exonEnds <- end(exonStarts)
40 |         exonStarts <- start(exonStarts)
41 |     }
42 |     exonStarts <- .normargExonStartsOrEnds(exonStarts, "exonStarts")
43 |     exonEnds <- .normargExonStartsOrEnds(exonEnds, "exonEnds")
44 |     if (is.factor(strand))
45 |         strand <- as.vector(strand)
46 |     if (!is.character(strand))
47 |         stop("'strand' must be a character vector")
48 |     if (length(tlocs) != length(strand)
49 |      || length(exonStarts) != length(strand)
50 |      || length(exonEnds) != length(strand))
51 |         stop("'tlocs', 'exonStarts', 'exonEnds' and 'strand' ",
52 |              "must have the same length")
53 |     if (!isTRUEorFALSE(decreasing.rank.on.minus.strand))
54 |         stop("'decreasing.rank.on.minus.strand' must be TRUE or FALSE")
55 |     GenomicRanges:::unsafe.transcriptLocs2refLocs(tlocs,
56 |                             exonStarts, exonEnds, strand,
57 |                             decreasing.rank.on.minus.strand,
58 |                             error.if.out.of.bounds)
59 | }
60 | 
61 | 
62 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
63 | ### transcriptWidths()
64 | ###
65 | 
66 | transcriptWidths <- function(exonStarts=list(), exonEnds=list())
67 | {
68 |     if (is(exonStarts, "IntegerRangesList")) {
69 |         if (!identical(exonEnds, list()))
70 |             stop("'exonEnds' cannot be specified ",
71 |                  "when 'exonStarts' is a IntegerRangesList object")
72 |         exonEnds <- end(exonStarts)
73 |         exonStarts <- start(exonStarts)
74 |     }
75 |     exonStarts <- .normargExonStartsOrEnds(exonStarts, "exonStarts")
76 |     exonEnds <- .normargExonStartsOrEnds(exonEnds, "exonEnds")
77 |     if (length(exonStarts) != length(exonEnds))
78 |         stop("'exonStarts', 'exonEnds' must have the same length")
79 |     GenomicRanges:::unsafe.transcriptWidths(exonStarts, exonEnds)
80 | }
81 | 
82 | 


--------------------------------------------------------------------------------
/R/transcriptsByOverlaps.R:
--------------------------------------------------------------------------------
 1 | ###
 2 | 
 3 | setGeneric("transcriptsByOverlaps", signature="x",
 4 |     function(x, ranges, maxgap = -1L, minoverlap = 0L,
 5 |              type = c("any", "start", "end"), ...)
 6 |         standardGeneric("transcriptsByOverlaps")
 7 | )
 8 | 
 9 | setMethod("transcriptsByOverlaps", "TxDb",
10 |     function(x, ranges, maxgap = -1L, minoverlap = 0L,
11 |              type = c("any", "start", "end"),
12 |              columns = c("tx_id", "tx_name"))
13 |         subsetByOverlaps(transcripts(x, columns = columns), ranges,
14 |                          maxgap = maxgap, minoverlap = minoverlap,
15 |                          type = match.arg(type))
16 | )
17 | 
18 | setGeneric("exonsByOverlaps", signature="x",
19 |     function(x, ranges, maxgap = -1L, minoverlap = 0L,
20 |              type = c("any", "start", "end"), ...)
21 |         standardGeneric("exonsByOverlaps")
22 | )
23 | 
24 | setMethod("exonsByOverlaps", "TxDb",
25 |     function(x, ranges, maxgap = -1L, minoverlap = 0L,
26 |              type = c("any", "start", "end"),
27 |              columns = "exon_id")
28 |         subsetByOverlaps(exons(x, columns = columns), ranges,
29 |                          maxgap = maxgap, minoverlap = minoverlap,
30 |                          type = match.arg(type))
31 | )
32 | 
33 | setGeneric("cdsByOverlaps", signature="x",
34 |     function(x, ranges, maxgap = -1L, minoverlap = 0L,
35 |              type = c("any", "start", "end"), ...)
36 |         standardGeneric("cdsByOverlaps")
37 | )
38 | 
39 | setMethod("cdsByOverlaps", "TxDb",
40 |     function(x, ranges, maxgap = -1L, minoverlap = 0L,
41 |              type = c("any", "start", "end"),
42 |              columns = "cds_id")
43 |         subsetByOverlaps(cds(x, columns = columns), ranges,
44 |                          maxgap = maxgap, minoverlap = minoverlap,
45 |                          type = match.arg(type))
46 | )
47 | 
48 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
  1 | ### =========================================================================
  2 | ### Miscellaneous low-level utils
  3 | ### -------------------------------------------------------------------------
  4 | ###
  5 | ### Unless stated otherwise, nothing in this file is exported.
  6 | ###
  7 | 
  8 | 
  9 | call_fun_in_txdbmaker <- function(fun, ...)
 10 | {
 11 |     msg <- c(fun, "() has moved from GenomicFeatures to the txdbmaker ",
 12 |              "package, and is formally defunct in GenomicFeatures ",
 13 |              ">= 1.61.1. Please call txdbmaker::", fun, "() to get rid ",
 14 |              "of this error.")
 15 |     .Defunct(msg=wmsg(msg))
 16 | }
 17 | 
 18 | 
 19 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 20 | ### DB related.
 21 | ###
 22 | ### Most of this stuff was copy/pasted from AnnotationDbi (trying to avoid
 23 | ### depending on AnnotationDbi for now).
 24 | ###
 25 | 
 26 | ### Environment for storing run-time objects
 27 | RTobjs <- new.env(hash=TRUE, parent=emptyenv())
 28 | 
 29 | assign("debugSQL", FALSE, envir=RTobjs)
 30 | 
 31 | debugSQL <- function()
 32 | {
 33 |     debugSQL <- !get("debugSQL", envir=RTobjs)
 34 |     assign("debugSQL", debugSQL, envir=RTobjs)
 35 |     debugSQL
 36 | }
 37 | 
 38 | 
 39 | ### Use dbQuery(conn, SQL, 1) instead of dbQuery(conn, SQL)[[1]],
 40 | ### it's much safer!
 41 | dbEasyQuery <- function(conn, SQL, j0=NA)
 42 | {
 43 |     if (get("debugSQL", envir=RTobjs)) {
 44 |         if (!is.character(SQL) || length(SQL) != 1L || is.na(SQL))
 45 |             stop("[debugSQL] 'SQL' must be a single string")
 46 |         cat("[debugSQL] SQL query: ", SQL, "\n", sep="")
 47 |         st <- system.time(data0 <- dbGetQuery(conn, SQL))
 48 |         cat("[debugSQL]      time: ", st["user.self"], " seconds\n", sep="")
 49 |     } else {
 50 |         data0 <- dbGetQuery(conn, SQL)
 51 |     }
 52 |     if (is.na(j0))
 53 |         return(data0)
 54 |     ## Needed to deal properly with data frame with 0 column ("NULL data
 55 |     ## frames with 0 rows") returned by RSQLite when the result of a SELECT
 56 |     ## query has 0 row
 57 |     if (nrow(data0) == 0L)
 58 |         character(0)
 59 |     else
 60 |         data0[[j0]]
 61 | }
 62 | 
 63 | ### TODO: Put this in AnnotationDbi.
 64 | queryAnnotationDb <- function(annotationdb, sql)
 65 | {
 66 |     AnnotationDbi:::dbEasyQuery(dbconn(annotationdb),
 67 |                                 paste(sql, collapse="\n"))
 68 | }
 69 | 
 70 | 
 71 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 72 | ### Data frame related.
 73 | ###
 74 | ### TODO: Find a better home for these low-level data.frame utils.
 75 | ###
 76 | 
 77 | ### Not data.frame specific. Would also work on any matrix-like object.
 78 | has_col <- function(x, colnames) {colnames %in% colnames(x)}
 79 | 
 80 | makeZeroRowDataFrame <- function(col2class)
 81 | {
 82 |     if (!is.character(col2class) || is.null(names(col2class)))
 83 |         stop("'col2class' must be a named character vector")
 84 |     as.data.frame(lapply(col2class, function(class) get(class)()),
 85 |                   stringsAsFactors=FALSE)
 86 | }
 87 | 
 88 | ### Sets the class of (all or some of) the columns of a data.frame.
 89 | ### Typical use:
 90 | ###   x <- setDataFrameColClass(x, c(colA="integer", colB="factor"))
 91 | ### Note that if 'x' has more than one "colA" col, then *all* of them are
 92 | ### coerced to integer.
 93 | setDataFrameColClass <- function(x, col2class, drop.extra.cols=FALSE)
 94 | {
 95 |     if (!is.data.frame(x))
 96 |         stop("'x' must be a data.frame")
 97 |     if (!is.character(col2class) || is.null(names(col2class)))
 98 |         stop("'col2class' must be a named character vector")
 99 |     if (!all(names(col2class) %in% colnames(x)))
100 |         stop("'col2class' has invalid names")
101 |     if (!isTRUEorFALSE(drop.extra.cols))
102 |         stop("'drop.extra.cols' must be TRUE or FALSE")
103 |     if (drop.extra.cols) {
104 |         col_idx <- which(colnames(x) %in% names(col2class))
105 |     } else {
106 |         col_idx <- seq_len(ncol(x))
107 |     }
108 |     tmp <- lapply(col_idx,
109 |                   function(j)
110 |                   {
111 |                       col <- x[[j]]
112 |                       colname <- colnames(x)[j]
113 |                       if (!(colname %in% names(col2class)))
114 |                           return(col)
115 |                       class <- col2class[[colname]]
116 |                       FUNname <- paste("as", class, sep=".")
117 |                       if (exists(FUNname) && is.function(FUN <- get(FUNname)))
118 |                           return(FUN(col))
119 |                       as(col, class)
120 |                   })
121 |     names(tmp) <- colnames(x)[col_idx]
122 |     return(data.frame(tmp, check.names=FALSE, stringsAsFactors=FALSE))
123 | }
124 | 
125 | 
126 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
127 | ### ID assignment and/or reassignment.
128 | ###
129 | 
130 | ### Returns the vector of ids such that 'unique(x)[ids, ]' is identical
131 | ### to 'x' (in the same way that 'levels(f)[f]' is identical to
132 | ### 'as.vector(f)' when 'f' is a character factor).
133 | ### This unambiguously defines 'ids'. In particular, it's not Locale
134 | ### specific, despite the fact that the current implementation uses a
135 | ### sorting approach.
136 | ### TODO: Remove! (not used anymore)
137 | makeIdsForUniqueDataFrameRows <- function(x)
138 | {
139 |     if (!is.data.frame(x))
140 |         stop("'x' must be a data.frame")
141 |     x_order <- do.call(order, x)
142 |     x_dups <- duplicated(x)
143 |     ## First we make "provisory" ids. Those ids *are* Locale specific.
144 |     prov_ids <- integer(nrow(x))
145 |     prov_ids[x_order] <- cumsum(!x_dups[x_order])
146 |     ## Convert the "provisory" ids into the final ids. The final ids are
147 |     ## *not* Locale specific anymore.
148 |     as.integer(factor(prov_ids, levels=unique(prov_ids)))
149 | }
150 | 
151 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
1 | .test <- function() BiocGenerics:::testPackage("GenomicFeatures")
2 | 
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [<img src="https://www.bioconductor.org/images/logo/jpg/bioconductor_logo_rgb.jpg" width="200" align="right"/>](https://bioconductor.org/)
2 | 
3 | **GenomicFeatures** is an R/Bioconductor package for querying the gene models of a given organism/assembly.
4 | 
5 | See https://bioconductor.org/packages/GenomicFeatures for more information including how to install the release version of the package (please refrain from installing directly from GitHub).
6 | 
7 | 


--------------------------------------------------------------------------------
/TODO:
--------------------------------------------------------------------------------
 1 | o Fix handling of the 'filter' arg (transcripts(), etc...) when the txdb has
 2 |   user seqlevels on.
 3 | 
 4 | o Add 'filter' arg to transcriptsBy(), exonsBy(), cdsBy(),
 5 |   fiveUTRByTranscript(), and threeUTRByTranscript(). exonsBy(), cdsBy(), and
 6 |   *UTRByTranscript() should at least support filtering by gene or transcript
 7 |   id.
 8 | 
 9 | o Too many helper functions are defined and used internally to query the
10 |   db:
11 |   - dbEasyQuery() is defined in AnnotationDbi and GenomicFeatures with
12 |     different definitions.
13 |   - AnnotationDbi:::dbQuery() is the same as GenomicFeatures:::dbEasyQuery()
14 |   - queryAnnotationDb()
15 |   Clean this mess!
16 | 
17 | o DB schema change: Replace tx_chrom, exon_chrom, and cds_chrom columns with
18 |   _tx_chrom_id, _exon_chrom_id, and _cds_chrom_id.
19 | 
20 | o Add the following indices to the db schema:
21 |     CREATE INDEX splicing_tx_id ON splicing (_tx_id);
22 |     CREATE INDEX splicing_exon_id ON splicing (_exon_id);
23 |     CREATE INDEX splicing_cds_id ON splicing (_cds_id);
24 |     CREATE INDEX gene_tx_id ON gene (_tx_id);
25 | 
26 | 


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | citEntry(entry="article",
 2 |          title = "Software for Computing and Annotating Genomic Ranges",
 3 |          author = personList( as.person("Michael Lawrence" ),
 4 |            as.person("Wolfgang Huber" ),
 5 |            as.person("Herv\\'e Pag\\`es" ),
 6 |            as.person("Patrick Aboyoun" ),
 7 |            as.person("Marc Carlson" ),
 8 |            as.person("Robert Gentleman" ),
 9 |            as.person("Martin Morgan" ),
10 |            as.person("Vincent Carey" )),
11 |          year = 2013,
12 |          journal = "{PLoS} Computational Biology",
13 |          volume = "9",
14 |          issue = "8",
15 |          doi = "10.1371/journal.pcbi.1003118",
16 |          url = "http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1003118",
17 |          textVersion = 
18 |          "Lawrence M, Huber W, Pag\\`es H, Aboyoun P, Carlson M, et al. (2013) Software for Computing and Annotating Genomic Ranges. PLoS Comput Biol 9(8): e1003118. doi:10.1371/journal.pcbi.1003118" )
19 | 


--------------------------------------------------------------------------------
/inst/extdata/Biomart_Ensembl_sample.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/GenomicFeatures/4b7c91ac86e98b9b2414af4b2d00211487ae0693/inst/extdata/Biomart_Ensembl_sample.sqlite


--------------------------------------------------------------------------------
/inst/extdata/FeatureDb.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/GenomicFeatures/4b7c91ac86e98b9b2414af4b2d00211487ae0693/inst/extdata/FeatureDb.sqlite


--------------------------------------------------------------------------------
/inst/extdata/ITAG4.1_gene_models.subset.gff:
--------------------------------------------------------------------------------
 1 | ##gff-version 3
 2 | ##sequence-regionSL4.0ch00      1       9643250
 3 | ##sequence-regionSL4.0ch01      1       90863682
 4 | ##sequence-regionSL4.0ch02      1       53473368
 5 | ##sequence-regionSL4.0ch03      1       65298490
 6 | ##sequence-regionSL4.0ch04      1       64459972
 7 | ##sequence-regionSL4.0ch05      1       65269487
 8 | ##sequence-regionSL4.0ch06      1       47258699
 9 | ##sequence-regionSL4.0ch07      1       67883646
10 | ##sequence-regionSL4.0ch08      1       63995357
11 | ##sequence-regionSL4.0ch09      1       68513564
12 | ##sequence-regionSL4.0ch10      1       64792705
13 | ##sequence-regionSL4.0ch11      1       54379777
14 | ##sequence-regionSL4.0ch12      1       66688036
15 | SL4.0ch00	maker_ITAG	gene	2030916	2032369	.	+	.	ID=gene:Solyc00g025400.2;Alias=Solyc00g025400;Name=Solyc00g025400.2;length=1453
16 | SL4.0ch00	maker_ITAG	mRNA	2030916	2032369	.	+	.	ID=mRNA:Solyc00g025400.2.1;Parent=gene:Solyc00g025400.2;Name=Solyc00g025400.2.1;Note=Unknown protein;_AED=1.00;_QI=250|0|0|0|0|0|2|0|156;_eAED=1.00
17 | SL4.0ch00	maker_ITAG	five_prime_UTR	2030916	2031165	.	+	.	ID=five_prime_UTR:Solyc00g025400.2.1.0;Parent=mRNA:Solyc00g025400.2.1
18 | SL4.0ch00	maker_ITAG	exon	2030916	2031456	.	+	.	ID=exon:Solyc00g025400.2.1.1;Parent=mRNA:Solyc00g025400.2.1
19 | SL4.0ch00	maker_ITAG	CDS	2031166	2031456	.	+	0	ID=CDS:Solyc00g025400.2.1.1;Parent=mRNA:Solyc00g025400.2.1
20 | SL4.0ch00	maker_ITAG	exon	2032190	2032369	.	+	.	ID=exon:Solyc00g025400.2.1.2;Parent=mRNA:Solyc00g025400.2.1
21 | SL4.0ch00	maker_ITAG	CDS	2032190	2032369	.	+	0	ID=CDS:Solyc00g025400.2.1.2;Parent=mRNA:Solyc00g025400.2.1
22 | ###
23 | SL4.0ch00	maker	gene	2062209	2063021	.	-	.	ID=gene:Solyc00g500104.2;Name=Solyc00g500104.2
24 | SL4.0ch00	maker	mRNA	2062209	2063021	.	-	.	ID=mRNA:Solyc00g500104.2.1;Parent=gene:Solyc00g500104.2;Name=Solyc00g500104.2.1;_aed=0.27;_eaed=0.27;_qi=0|-1|0|1|-1|1|1|150|220;Note=RNA-dependent RNA polymerase (AHRD V3.11 *-* tr|A0A2Z6JIR4|A0A2Z6JIR4_9VIRU);Dbxref=InterPro:IPR008686,Pfam:PF05919
25 | SL4.0ch00	maker	three_prime_UTR	2062209	2062358	.	-	.	ID=three_prime_UTR:Solyc00g500104.2.1.0;Parent=mRNA:Solyc00g500104.2.1
26 | SL4.0ch00	maker	exon	2062209	2063021	.	-	.	ID=exon:Solyc00g500104.2.1.1;Parent=mRNA:Solyc00g500104.2.1
27 | SL4.0ch00	maker	CDS	2062359	2063021	.	-	0	ID=CDS:Solyc00g500104.2.1.1;Parent=mRNA:Solyc00g500104.2.1
28 | ###
29 | SL4.0ch00	maker_ITAG	gene	2081475	2081793	.	-	.	ID=gene:Solyc00g142160.1;Alias=Solyc00g142160;Name=Solyc00g142160.1;length=318
30 | SL4.0ch00	maker_ITAG	mRNA	2081475	2081793	.	-	.	ID=mRNA:Solyc00g142160.1.1;Parent=gene:Solyc00g142160.1;Name=Solyc00g142160.1.1;Note=RNA-dependent RNA polymerase (AHRD V3.3 *-* A0A2Z6JIR4_9VIRU);_AED=0.64;_QI=0|-1|0|1|-1|0|1|43|91;_eAED=0.64
31 | SL4.0ch00	maker_ITAG	three_prime_UTR	2081475	2081517	.	-	.	ID=three_prime_UTR:Solyc00g142160.1.1.0;Parent=mRNA:Solyc00g142160.1.1
32 | SL4.0ch00	maker_ITAG	exon	2081475	2081793	.	-	.	ID=exon:Solyc00g142160.1.1.1;Parent=mRNA:Solyc00g142160.1.1
33 | SL4.0ch00	maker_ITAG	CDS	2081518	2081793	.	-	0	ID=CDS:Solyc00g142160.1.1.1;Parent=mRNA:Solyc00g142160.1.1
34 | ###
35 | SL4.0ch00	maker_ITAG	gene	2081817	2083225	.	-	.	ID=gene:Solyc00g142170.2;Alias=Solyc00g142170;Name=Solyc00g142170.2;length=1408
36 | SL4.0ch00	maker_ITAG	mRNA	2081817	2083225	.	-	.	ID=mRNA:Solyc00g142170.2.1;Parent=gene:Solyc00g142170.2;Name=Solyc00g142170.2.1;Note=RNA-dependent RNA polymerase (AHRD V3.3 *-* A0A2Z6JIR4_9VIRU);Dbxref=InterPro:IPR008686,Pfam:PF05919;_AED=0.38;_QI=89|0|0|0.33|0|0|3|39|305;_eAED=0.43
37 | SL4.0ch00	maker_ITAG	three_prime_UTR	2081817	2081855	.	-	.	ID=three_prime_UTR:Solyc00g142170.2.1.0;Parent=mRNA:Solyc00g142170.2.1
38 | SL4.0ch00	maker_ITAG	exon	2081817	2082335	.	-	.	ID=exon:Solyc00g142170.2.1.1;Parent=mRNA:Solyc00g142170.2.1
39 | SL4.0ch00	maker_ITAG	CDS	2081856	2082335	.	-	0	ID=CDS:Solyc00g142170.2.1.1;Parent=mRNA:Solyc00g142170.2.1
40 | SL4.0ch00	maker_ITAG	exon	2082546	2082748	.	-	.	ID=exon:Solyc00g142170.2.1.2;Parent=mRNA:Solyc00g142170.2.1
41 | SL4.0ch00	maker_ITAG	CDS	2082546	2082748	.	-	2	ID=CDS:Solyc00g142170.2.1.2;Parent=mRNA:Solyc00g142170.2.1
42 | SL4.0ch00	maker_ITAG	CDS	2082902	2083136	.	-	0	ID=CDS:Solyc00g142170.2.1.3;Parent=mRNA:Solyc00g142170.2.1
43 | SL4.0ch00	maker_ITAG	exon	2082902	2083225	.	-	.	ID=exon:Solyc00g142170.2.1.3;Parent=mRNA:Solyc00g142170.2.1
44 | SL4.0ch00	maker_ITAG	five_prime_UTR	2083137	2083225	.	-	.	ID=five_prime_UTR:Solyc00g142170.2.1.0;Parent=mRNA:Solyc00g142170.2.1
45 | ###
46 | SL4.0ch00	maker_ITAG	gene	2136189	2136452	.	-	.	ID=gene:Solyc00g500120.1;Alias=Solyc00g500120;Name=Solyc00g500120.1;length=263
47 | SL4.0ch00	maker_ITAG	mRNA	2136189	2136452	.	-	.	ID=mRNA:Solyc00g500120.1.1;Parent=gene:Solyc00g500120.1;Name=Solyc00g500120.1.1;Note=Unknown protein;_AED=0.46;_QI=0|-1|0|1|-1|0|1|0|87;_eAED=1.00
48 | SL4.0ch00	maker_ITAG	exon	2136189	2136452	.	-	.	ID=exon:Solyc00g500120.1.1.1;Parent=mRNA:Solyc00g500120.1.1
49 | SL4.0ch00	maker_ITAG	CDS	2136189	2136452	.	-	0	ID=CDS:Solyc00g500120.1.1.1;Parent=mRNA:Solyc00g500120.1.1
50 | ###
51 | SL4.0ch00	maker_ITAG	gene	2139483	2140481	.	-	.	ID=gene:Solyc00g500121.1;Alias=Solyc00g500121;Name=Solyc00g500121.1;length=998
52 | SL4.0ch00	maker_ITAG	mRNA	2139483	2140481	.	-	.	ID=mRNA:Solyc00g500121.1.1;Parent=gene:Solyc00g500121.1;Name=Solyc00g500121.1.1;Note=Unknown protein;_AED=0.38;_QI=558|1|0.5|1|0|0|2|0|86;_eAED=0.38
53 | SL4.0ch00	maker_ITAG	exon	2139483	2139659	.	-	.	ID=exon:Solyc00g500121.1.1.1;Parent=mRNA:Solyc00g500121.1.1
54 | SL4.0ch00	maker_ITAG	CDS	2139483	2139659	.	-	0	ID=CDS:Solyc00g500121.1.1.1;Parent=mRNA:Solyc00g500121.1.1
55 | SL4.0ch00	maker_ITAG	CDS	2139840	2139923	.	-	0	ID=CDS:Solyc00g500121.1.1.2;Parent=mRNA:Solyc00g500121.1.1
56 | SL4.0ch00	maker_ITAG	exon	2139840	2140481	.	-	.	ID=exon:Solyc00g500121.1.1.2;Parent=mRNA:Solyc00g500121.1.1
57 | SL4.0ch00	maker_ITAG	five_prime_UTR	2139924	2140481	.	-	.	ID=five_prime_UTR:Solyc00g500121.1.1.0;Parent=mRNA:Solyc00g500121.1.1
58 | ###
59 | SL4.0ch00	maker_ITAG	gene	2187524	2191157	.	-	.	ID=gene:Solyc00g500122.1;Alias=Solyc00g500122;Name=Solyc00g500122.1;length=3633
60 | SL4.0ch00	maker_ITAG	mRNA	2187524	2191157	.	-	.	ID=mRNA:Solyc00g500122.1.1;Parent=gene:Solyc00g500122.1;Name=Solyc00g500122.1.1;Note=Unknown protein;_AED=0.40;_QI=0|0|0.33|1|0|0|3|433|122;_eAED=0.51
61 | SL4.0ch00	maker_ITAG	three_prime_UTR	2187524	2187956	.	-	.	ID=three_prime_UTR:Solyc00g500122.1.1.0;Parent=mRNA:Solyc00g500122.1.1
62 | SL4.0ch00	maker_ITAG	exon	2187524	2188176	.	-	.	ID=exon:Solyc00g500122.1.1.1;Parent=mRNA:Solyc00g500122.1.1
63 | SL4.0ch00	maker_ITAG	CDS	2187957	2188176	.	-	1	ID=CDS:Solyc00g500122.1.1.1;Parent=mRNA:Solyc00g500122.1.1
64 | SL4.0ch00	maker_ITAG	exon	2190928	2190992	.	-	.	ID=exon:Solyc00g500122.1.1.2;Parent=mRNA:Solyc00g500122.1.1
65 | SL4.0ch00	maker_ITAG	CDS	2190928	2190992	.	-	0	ID=CDS:Solyc00g500122.1.1.2;Parent=mRNA:Solyc00g500122.1.1
66 | SL4.0ch00	maker_ITAG	exon	2191074	2191157	.	-	.	ID=exon:Solyc00g500122.1.1.3;Parent=mRNA:Solyc00g500122.1.1
67 | SL4.0ch00	maker_ITAG	CDS	2191074	2191157	.	-	0	ID=CDS:Solyc00g500122.1.1.3;Parent=mRNA:Solyc00g500122.1.1
68 | ###
69 | SL4.0ch00	maker	gene	2329909	2332560	.	-	.	ID=gene:Solyc00g160010.1;Name=Solyc00g160010.1
70 | SL4.0ch00	maker	mRNA	2329909	2332560	.	-	.	ID=mRNA:Solyc00g160010.1.1;Parent=gene:Solyc00g160010.1;Name=Solyc00g160010.1.1;_aed=0.77;_eaed=0.77;_qi=0|0|0|0.25|1|1|4|0|150;_merge_warning=1;Note=zinc finger CCCH domain-containing protein 30-like (AHRD V3.11 *-* XP_025884875.1)
71 | SL4.0ch00	maker	exon	2329909	2329963	.	-	.	ID=exon:Solyc00g160010.1.1.1;Parent=mRNA:Solyc00g160010.1.1
72 | SL4.0ch00	maker	CDS	2329909	2329963	.	-	1	ID=CDS:Solyc00g160010.1.1.1;Parent=mRNA:Solyc00g160010.1.1
73 | SL4.0ch00	maker	exon	2330621	2330687	.	-	.	ID=exon:Solyc00g160010.1.1.2;Parent=mRNA:Solyc00g160010.1.1
74 | SL4.0ch00	maker	CDS	2330621	2330687	.	-	2	ID=CDS:Solyc00g160010.1.1.2;Parent=mRNA:Solyc00g160010.1.1
75 | SL4.0ch00	maker	exon	2330792	2330966	.	-	.	ID=exon:Solyc00g160010.1.1.3;Parent=mRNA:Solyc00g160010.1.1
76 | SL4.0ch00	maker	CDS	2330792	2330966	.	-	0	ID=CDS:Solyc00g160010.1.1.3;Parent=mRNA:Solyc00g160010.1.1
77 | SL4.0ch00	maker	exon	2332405	2332560	.	-	.	ID=exon:Solyc00g160010.1.1.4;Parent=mRNA:Solyc00g160010.1.1
78 | SL4.0ch00	maker	CDS	2332405	2332560	.	-	0	ID=CDS:Solyc00g160010.1.1.4;Parent=mRNA:Solyc00g160010.1.1
79 | ###
80 | SL4.0ch00	maker	gene	2377393	2378166	.	-	.	ID=gene:Solyc00g160330.1;Name=Solyc00g160330.1
81 | SL4.0ch00	maker	mRNA	2377393	2378166	.	-	.	ID=mRNA:Solyc00g160330.1.1;Parent=gene:Solyc00g160330.1;Name=Solyc00g160330.1.1;_aed=0.58;_eaed=0.59;_qi=0|0|0|0.5|0|0.5|2|0|82;Note=zinc finger CCCH domain-containing protein 30-like (AHRD V3.11 *-* XP_025884875.1)
82 | SL4.0ch00	maker	exon	2377393	2377434	.	-	.	ID=exon:Solyc00g160330.1.1.1;Parent=mRNA:Solyc00g160330.1.1
83 | SL4.0ch00	maker	CDS	2377393	2377434	.	-	0	ID=CDS:Solyc00g160330.1.1.1;Parent=mRNA:Solyc00g160330.1.1
84 | SL4.0ch00	maker	exon	2377960	2378166	.	-	.	ID=exon:Solyc00g160330.1.1.2;Parent=mRNA:Solyc00g160330.1.1
85 | SL4.0ch00	maker	CDS	2377960	2378166	.	-	0	ID=CDS:Solyc00g160330.1.1.2;Parent=mRNA:Solyc00g160330.1.1
86 | ###
87 | SL4.0ch00	maker_ITAG	gene	2379604	2380807	.	-	.	ID=gene:Solyc00g007330.1;Alias=Solyc00g007330;Name=Solyc00g007330.1;length=1203
88 | SL4.0ch00	maker_ITAG	mRNA	2379604	2380807	.	-	.	ID=mRNA:Solyc00g007330.1.1;Parent=gene:Solyc00g007330.1;Name=Solyc00g007330.1.1;Note=Zinc finger transcription factor 1;_AED=1.00;_QI=373|0|0|0|0|0|2|0|171;_eAED=1.00
89 | SL4.0ch00	maker_ITAG	CDS	2379604	2380119	.	-	0	ID=CDS:Solyc00g007330.1.1.1;Parent=mRNA:Solyc00g007330.1.1
90 | SL4.0ch00	maker_ITAG	exon	2379604	2380324	.	-	.	ID=exon:Solyc00g007330.1.1.1;Parent=mRNA:Solyc00g007330.1.1
91 | SL4.0ch00	maker_ITAG	five_prime_UTR	2380120	2380324	.	-	.	ID=five_prime_UTR:Solyc00g007330.1.1.0;Parent=mRNA:Solyc00g007330.1.1
92 | SL4.0ch00	maker_ITAG	exon	2380640	2380807	.	-	.	ID=exon:Solyc00g007330.1.1.2;Parent=mRNA:Solyc00g007330.1.1
93 | SL4.0ch00	maker_ITAG	five_prime_UTR	2380640	2380807	.	-	.	ID=five_prime_UTR:Solyc00g007330.1.1.1;Parent=mRNA:Solyc00g007330.1.1
94 | ###
95 | 


--------------------------------------------------------------------------------
/inst/extdata/cD.exByEdge-SG-Vig.Rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/GenomicFeatures/4b7c91ac86e98b9b2414af4b2d00211487ae0693/inst/extdata/cD.exByEdge-SG-Vig.Rda


--------------------------------------------------------------------------------
/inst/extdata/cD.exsByGenes-SG-Vig.Rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/GenomicFeatures/4b7c91ac86e98b9b2414af4b2d00211487ae0693/inst/extdata/cD.exsByGenes-SG-Vig.Rda


--------------------------------------------------------------------------------
/inst/extdata/events.Rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/GenomicFeatures/4b7c91ac86e98b9b2414af4b2d00211487ae0693/inst/extdata/events.Rda


--------------------------------------------------------------------------------
/inst/extdata/hg19_knownGene_sample.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/GenomicFeatures/4b7c91ac86e98b9b2414af4b2d00211487ae0693/inst/extdata/hg19_knownGene_sample.sqlite


--------------------------------------------------------------------------------
/inst/extdata/sample_ranges.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/GenomicFeatures/4b7c91ac86e98b9b2414af4b2d00211487ae0693/inst/extdata/sample_ranges.rds


--------------------------------------------------------------------------------
/inst/script/README:
--------------------------------------------------------------------------------
1 | Scripts make_tRNAFDb.R and make_TxDbs.R are now in the txdbmaker package!
2 | 
3 | 


--------------------------------------------------------------------------------
/inst/unitTests/test_TxDb_seqinfo.R:
--------------------------------------------------------------------------------
  1 | library(TxDb.Hsapiens.UCSC.hg19.knownGene)
  2 | txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
  3 | 
  4 | test_rename_seqlevels <- function()
  5 | {
  6 |     txdb <- restoreSeqlevels(txdb)
  7 |     new_seqlevels <- as.character(seq_along(seqlevels(txdb)))
  8 |     seqlevels(txdb) <- new_seqlevels
  9 |     checkIdentical(new_seqlevels, seqlevels(txdb))
 10 | }
 11 | 
 12 | test_restrict_seqlevels <- function()
 13 | {
 14 |     ## This should work
 15 |     txdb <- restoreSeqlevels(txdb)
 16 |     seqlevels(txdb, pruning.mode="coarse") <- c(chr5="5")
 17 |     checkEquals(length(seqinfo(txdb)), 1)
 18 | 
 19 |     ## This should work
 20 |     txdb <- restoreSeqlevels(txdb)
 21 |     seqlevels(txdb, pruning.mode="coarse") <- c(chr5="5", chr6="6", chr4="4")
 22 |     checkTrue(length(seqinfo(txdb)) == 3)
 23 |     checkIdentical(c("5", "6", "4"), seqlevels(txdb))
 24 |     checkTrue(seqlengths(txdb)[2] == min(seqlengths(txdb)))
 25 |     checkTrue(seqlengths(txdb)[3] == max(seqlengths(txdb)))
 26 | 
 27 |     ## And this should NOT work
 28 |     txdb <- restoreSeqlevels(txdb)
 29 |     checkException(seqlevels(txdb, pruning.mode="coarse") <- c(foo="2"))
 30 | }
 31 | 
 32 | test_seqinfo_setter <- function()
 33 | {
 34 |     txdb <- restoreSeqlevels(txdb)
 35 |     new_seqinfo <- seqinfo(txdb)
 36 |     seqnames(new_seqinfo) <- paste0("NEW_", seqnames(new_seqinfo))
 37 |     seqinfo(txdb, new2old=seq_along(new_seqinfo)) <- new_seqinfo
 38 |     checkIdentical(new_seqinfo, seqinfo(txdb))
 39 | 
 40 |     txdb <- restoreSeqlevels(txdb)
 41 |     new_seqinfo <- seqinfo(txdb)
 42 |     seqlengths(new_seqinfo) <- 5 * seqlengths(new_seqinfo)
 43 |     checkException(seqinfo(txdb) <- new_seqinfo)
 44 | 
 45 |     txdb <- restoreSeqlevels(txdb)
 46 |     new_seqinfo <- seqinfo(txdb)
 47 |     isCircular(new_seqinfo) <- rep(TRUE, length(new_seqinfo))
 48 |     checkException(seqinfo(txdb) <- new_seqinfo)
 49 | 
 50 |     txdb <- restoreSeqlevels(txdb)
 51 |     new_seqinfo <- seqinfo(txdb)
 52 |     genome(new_seqinfo) <- "foo"
 53 |     seqinfo(txdb) <- new_seqinfo
 54 |     checkIdentical(new_seqinfo, seqinfo(txdb))
 55 | }
 56 | 
 57 | test_transcripts_accessor <- function()
 58 | {
 59 |     txdb <- restoreSeqlevels(txdb)
 60 |     txs1 <- transcripts(txdb)
 61 |     seqlevels(txs1, pruning.mode="coarse") <- c(chr5="5")
 62 |     ## Then change seqlevels for txdb
 63 |     seqlevels(txdb, pruning.mode="coarse") <- c(chr5="5")
 64 |     txs2 <- transcripts(txdb)
 65 |     checkIdentical(txs1, txs2)
 66 | }
 67 | 
 68 | test_exons_accessor <- function()
 69 | {
 70 |     txdb <- restoreSeqlevels(txdb)
 71 |     exs1 <- exons(txdb)
 72 |     seqlevels(exs1, pruning.mode="coarse") <- c(chr5="5")
 73 |     ## Then change seqlevels for txdb
 74 |     seqlevels(txdb, pruning.mode="coarse") <- c(chr5="5")
 75 |     exs2 <- exons(txdb)
 76 |     checkIdentical(exs1, exs2)
 77 | }
 78 | 
 79 | test_cds_accessor <- function()
 80 | {
 81 |     txdb <- restoreSeqlevels(txdb)
 82 |     cds1 <- cds(txdb)
 83 |     seqlevels(cds1, pruning.mode="coarse") <- c(chr5="5")
 84 |     ## Then change seqlevels for txdb
 85 |     seqlevels(txdb, pruning.mode="coarse") <- c(chr5="5")
 86 |     cds2 <- cds(txdb)
 87 |     checkIdentical(cds1, cds2)
 88 | }
 89 | 
 90 | test_promoters_accessor <- function()
 91 | {
 92 |     txdb <- restoreSeqlevels(txdb)
 93 |     prm1 <- promoters(txdb)
 94 |     seqlevels(prm1, pruning.mode="coarse") <- c(chr5="5")
 95 |     ## Then change seqlevels for txdb
 96 |     seqlevels(txdb, pruning.mode="coarse") <- c(chr5="5")
 97 |     prm2 <- promoters(txdb)
 98 |     checkIdentical(prm1, prm2)
 99 | 
100 |     txdb <- restoreSeqlevels(txdb)
101 |     trmn1 <- terminators(txdb)
102 |     seqlevels(trmn1, pruning.mode="coarse") <- c(chr5="5")
103 |     ## Then change seqlevels for txdb
104 |     seqlevels(txdb, pruning.mode="coarse") <- c(chr5="5")
105 |     trmn2 <- terminators(txdb)
106 |     checkIdentical(trmn1, trmn2)
107 | }
108 | 
109 | test_transcriptsBy_accessors <- function()
110 | {
111 |     ## This one is a "fun" one.
112 |     ## There are issues because some genes are annotated as being on
113 |     ## TWO different chromosomes.  Such genes are filtered for txs3,
114 |     ## but NOT for txs4...   Hmmmm.
115 |     txdb <- restoreSeqlevels(txdb)
116 |     txs3 <- transcriptsBy(txdb, by="gene")
117 |     seqlevels(txs3, pruning.mode="coarse") <- c(chr5="5")
118 |     ## Then change seqlevels for txdb
119 |     seqlevels(txdb, pruning.mode="coarse") <- c(chr5="5")
120 |     txs4 <- transcriptsBy(txdb, by="gene")
121 | ##    checkIdentical(txs3, txs4)  ## TROUBLE!!
122 | 
123 | }
124 | 
125 | 
126 | ## What to do about this?  The reason for the difference is because of order of operations.  txs3 gets all the ranges and then removes any that are not kosher (this is correct), txs4 OTOH gets only ranges from chr5 (efficient!), but then fails to filter out things that have hybrid seqnames (as they were pre-filtered).  I think I have to make the query less efficient to fix this, but I want to discuss it with Herve 1st to get a 2nd opinion.
127 | 


--------------------------------------------------------------------------------
/inst/unitTests/test_exonicParts.R:
--------------------------------------------------------------------------------
 1 | ###
 2 | 
 3 | test_exonicParts <- function()
 4 | {
 5 |     txdb <- loadDb(system.file("extdata", "hg19_knownGene_sample.sqlite", 
 6 |                    package="GenomicFeatures"))
 7 | 
 8 |     exonic_parts <- exonicParts(txdb, linked.to.single.gene.only=TRUE)
 9 |     checkIdentical(length(exonic_parts), 653L)
10 |     expected_mcolnames <- c("tx_id", "tx_name", "gene_id", "exon_id",
11 |                             "exon_name", "exon_rank", "exonic_part")
12 |     checkIdentical(expected_mcolnames, names(mcols(exonic_parts)))
13 |     checkTrue(is.character(mcols(exonic_parts)$gene_id))
14 |     checkTrue(is(mcols(exonic_parts)$tx_name, "CharacterList"))
15 |     checkTrue(is.integer(mcols(exonic_parts)$exonic_part))
16 | 
17 |     exonic_parts <- exonicParts(txdb, linked.to.single.gene.only=FALSE)
18 |     checkIdentical(length(exonic_parts), 660L)
19 |     expected_mcolnames <- head(expected_mcolnames, n=-1L)
20 |     checkIdentical(expected_mcolnames, names(mcols(exonic_parts)))
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/inst/unitTests/test_getPromoterSeq-methods.R:
--------------------------------------------------------------------------------
  1 | library(BSgenome.Hsapiens.UCSC.hg19)
  2 | library(TxDb.Hsapiens.UCSC.hg19.knownGene)
  3 | library(BSgenome.Dmelanogaster.UCSC.dm3)
  4 | library(TxDb.Dmelanogaster.UCSC.dm3.ensGene)
  5 | library(Rsamtools)
  6 | library(pasillaBamSubset)
  7 | 
  8 | e2f3 <- "1871"   # human gene on the plus strand, chr6
  9 | grb2 <- "2885"   # human gene on the minus strand, chr17
 10 | 
 11 | # a note on method: when the promoter sequence is 20 bases or more in length,
 12 | # uscs blat will find these sequences, and a quick visual inspection of the
 13 | # accompanying genome browser view at the right level of zoom, will
 14 | # confirm that the per-transcript sequences is indeed correct.
 15 | # there are a few tests of shorter sequences below as well, which
 16 | # I checked in the genome browser, but this required a little more effort
 17 | # than the length 20, blat approach.
 18 | 
 19 | test_GRangesListBSgenomeHumanGetPromoterSeq <- function() {
 20 |     txdb <- restoreSeqlevels(TxDb.Hsapiens.UCSC.hg19.knownGene)  ## safety net
 21 |     genes <- c(e2f3, grb2)
 22 |     tx_by_gene <- transcriptsBy(txdb, by="gene")[genes]
 23 |     checkIdentical(names(tx_by_gene), genes)
 24 |     transcript_count <- length(unlist(tx_by_gene, use.names=FALSE))
 25 | 
 26 |     promoter_seqs <- getPromoterSeq(tx_by_gene, Hsapiens,
 27 |                                     upstream=10, downstream=0)
 28 |     checkTrue(validObject(promoter_seqs))
 29 |     checkTrue(is(promoter_seqs, "DNAStringSetList"))
 30 |     checkEquals(length(promoter_seqs), 2)
 31 |     checkIdentical(names(promoter_seqs), genes)
 32 |     checkIdentical(width(unlist(promoter_seqs, use.names=FALSE)),
 33 |                    rep.int(10L, transcript_count))
 34 | 
 35 |     terminator_seqs <- getTerminatorSeq(tx_by_gene, Hsapiens,
 36 |                                         upstream=10, downstream=0)
 37 |     checkTrue(validObject(terminator_seqs))
 38 |     checkTrue(is(terminator_seqs, "DNAStringSetList"))
 39 |     checkEquals(length(terminator_seqs), 2)
 40 |     checkIdentical(names(terminator_seqs), genes)
 41 |     checkIdentical(width(unlist(terminator_seqs, use.names=FALSE)),
 42 |                    rep.int(10L, transcript_count))
 43 | }
 44 | 
 45 | test_GRangesListBSgenomeFlyGetPromoterSeq <- function() {
 46 |      # two neighboring genes near beginning of chr3R, on opposite strands
 47 |      #  gene_id  flybase_id  symbol
 48 |      #    40524 FBgn0037215 CG12582
 49 |      #    40526 FBgn0037217 CG14636
 50 |      # in 2012, UCSC reported 4 total transcripts for these two genes
 51 |      # in 2013, 6.  there should be as many promoter_seqs as there
 52 |      # are transcripts, and they should each be of width
 53 |      # upstream + downstream.  it is risky to check for specific
 54 |      # sequence in the promoter_seqs since the annotation and sequence
 55 |      # may change
 56 | 
 57 |     txdb <- restoreSeqlevels(TxDb.Dmelanogaster.UCSC.dm3.ensGene)  ## safety net
 58 |     genes <- c("FBgn0037215", "FBgn0037217")
 59 |     tx_by_gene <- transcriptsBy(txdb, by="gene")[genes]
 60 |     checkIdentical(names(tx_by_gene), genes)
 61 |     transcript_count <- length(unlist(tx_by_gene, use.names=FALSE))
 62 | 
 63 |     promoter_seqs <- getPromoterSeq(tx_by_gene, Dmelanogaster,
 64 |                                     upstream=10, downstream=10)
 65 |     checkTrue(validObject(promoter_seqs))
 66 |     checkTrue(is(promoter_seqs, "DNAStringSetList"))
 67 |     checkEquals(length(promoter_seqs), 2)
 68 |     checkIdentical(names(promoter_seqs), genes)
 69 |     checkIdentical(width(unlist(promoter_seqs, use.names=FALSE)),
 70 |                    rep.int(20L, transcript_count))
 71 | 
 72 |     terminator_seqs <- getPromoterSeq(tx_by_gene, Dmelanogaster,
 73 |                                       upstream=10, downstream=10)
 74 |     checkTrue(validObject(terminator_seqs))
 75 |     checkTrue(is(terminator_seqs, "DNAStringSetList"))
 76 |     checkEquals(length(terminator_seqs), 2)
 77 |     checkIdentical(names(terminator_seqs), genes)
 78 |     checkIdentical(width(unlist(terminator_seqs, use.names=FALSE)),
 79 |                    rep.int(20L, transcript_count))
 80 | }
 81 | 
 82 | test_GRangesListFastaFlyGetPromoterSeq <- function() {
 83 |       # two neighboring genes near beginning of chr3R, on opposite strands
 84 |       #  gene_id  flybase_id  symbol   chr
 85 |       #    43766 FBgn0025740  plexB    4
 86 |       #    43769 FBgn0085432  pan      4
 87 | 
 88 |     txdb <- restoreSeqlevels(TxDb.Dmelanogaster.UCSC.dm3.ensGene)  ## safety net
 89 |     genes <- c("FBgn0025740", "FBgn0085432")
 90 |     tx_by_gene <- transcriptsBy(txdb, by="gene")[genes]
 91 |     checkIdentical(names(tx_by_gene), genes)
 92 |     transcript_count <- length(unlist(tx_by_gene, use.names=FALSE))
 93 |     fa_file <- FaFile(dm3_chr4())
 94 | 
 95 |     promoter_seqs <- getPromoterSeq(tx_by_gene, fa_file,
 96 |                                     upstream=10, downstream=10)
 97 |     checkTrue(validObject(promoter_seqs))
 98 |     checkTrue(is(promoter_seqs, "DNAStringSetList"))
 99 |     checkEquals(length(promoter_seqs), 2)
100 |     checkIdentical(names(promoter_seqs), genes)
101 |     checkIdentical(width(unlist(promoter_seqs, use.names=FALSE)),
102 |                    rep.int(20L, transcript_count))
103 |        # we are unable to check for specific DNA sequence, since
104 |        # the UCSC annotation of these genes changes over time.
105 | 
106 |     terminator_seqs <- getPromoterSeq(tx_by_gene, fa_file,
107 |                                       upstream=10, downstream=10)
108 |     checkTrue(validObject(terminator_seqs))
109 |     checkTrue(is(terminator_seqs, "DNAStringSetList"))
110 |     checkEquals(length(terminator_seqs), 2)
111 |     checkIdentical(names(terminator_seqs), genes)
112 |     checkIdentical(width(unlist(terminator_seqs, use.names=FALSE)),
113 |                    rep.int(20L, transcript_count))
114 | }
115 | 
116 | test_GRangesBSgenomeHumanGetPromoterSeq <- function() {
117 |     txdb <- restoreSeqlevels(TxDb.Hsapiens.UCSC.hg19.knownGene)  ## safety net
118 |     e2f3_tx <- transcriptsBy(txdb, by="gene")[[e2f3]]
119 |     #names(e2f3_tx) <- mcols(e2f3_tx)$tx_name
120 |     transcript_count <- length(e2f3_tx)
121 |     checkEquals(dim(mcols(e2f3_tx)), c(transcript_count, 2))
122 |     checkIdentical(colnames(mcols(e2f3_tx)), c("tx_id", "tx_name"))
123 | 
124 |     promoter_seqs <- getPromoterSeq(e2f3_tx, Hsapiens,
125 |                                     upstream=10, downstream=0)
126 |     checkTrue(validObject(promoter_seqs))
127 |     checkTrue(is(promoter_seqs, "DNAStringSet"))
128 |     checkEquals(length(promoter_seqs), transcript_count)
129 |     checkTrue(is.null(names(promoter_seqs)))
130 |     checkIdentical(width(promoter_seqs), rep.int(10L, transcript_count))
131 |       # should be one more column in the metadata than in the metadata
132 |     checkEquals(dim(mcols(promoter_seqs)), c(transcript_count, 3))
133 |     checkEquals(colnames(mcols(promoter_seqs)), c("tx_id", "tx_name", "geneID"))
134 |        # the input, a GRanges, had no names -- which are the source
135 |        # of geneID when the GRangesList version of this methods is called.
136 |        # so ensure that this lack of information was passed along into the
137 |        # metadata of the returned promoter_seqs
138 |     checkTrue(all(is.na(mcols(promoter_seqs)$geneID)))
139 | }
140 | 
141 | 


--------------------------------------------------------------------------------
/inst/unitTests/test_makeIdsForUniqueDataFrameRows.R:
--------------------------------------------------------------------------------
 1 | ###
 2 | 
 3 | test_makeIdsForUniqueDataFrameRows <- function()
 4 | {
 5 |     x <- data.frame(
 6 |              chrom=c("chr2", "chr2", "chr2", "chr2", "chr1",
 7 |                      "chr2", "chr2", "chr1", "chr3", "chr1"),
 8 |              strand=c("+", "-", "-", "+", "+", "+", "+", "-", "-", "+"),
 9 |              start=c(5, 2, 2, 5, 4, 5, 5, 4, 2, 1),
10 |              end=c(15, 12, 12, 15, 14, 13, 15, 14, 12, 11)
11 |          )
12 |     y <- unique(x)[GenomicFeatures:::makeIdsForUniqueDataFrameRows(x), ]
13 |     row.names(y) <- NULL
14 |     checkEquals(y, x)
15 | }
16 | 
17 | 


--------------------------------------------------------------------------------
/inst/unitTests/test_mapIdsToRanges.R:
--------------------------------------------------------------------------------
 1 | txdb <- local({
 2 |     library(txdbmaker)  # for makeTxDbFromGRanges()
 3 |     fl <- system.file(package = "GenomicFeatures", "extdata",
 4 |                       "sample_ranges.rds")
 5 |     makeTxDbFromGRanges(readRDS(fl))
 6 | })
 7 | 
 8 | test_mapIdsToRanges_improper_inputs <- function()
 9 | {
10 |     checkException(mapIdsToRanges(txdb, keys = ""),
11 |         "must be a named list")
12 | 
13 |     checkException(mapIdsToRanges(txdb, keys = "ENST000000271582"),
14 |         "must be a named list")
15 | 
16 |     checkException(mapIdsToRanges(txdb, keys = list("ENST000000271582")),
17 |         "must be a named list")
18 | 
19 |     checkException(mapIdsToRanges(txdb,
20 |                                   keys = list(tx_name = "ENST000000271582"),
21 |                                   column = 1),
22 |         "'columns' must be 'NULL' or a character vector")
23 | }
24 | 
25 | test_mapIdsToRanges_same_order <- function()
26 | {
27 |     keys <- list(tx_name = c("ENST00000371582", "ENST00000371588",
28 |         "ENST00000494752", "ENST00000614008", "ENST00000496771"))
29 |     res <- mapIdsToRanges(txdb, keys = keys, type = "tx")
30 |     checkEquals(names(res), keys[[1]])
31 | 
32 |     # shuffle the order and make sure it remains equivalent
33 |     for (i in seq_len(10)) {
34 |         keys$tx_name <- sample(keys$tx_name)
35 |         res <- mapIdsToRanges(txdb, keys = keys, type = "tx")
36 |         checkEquals(names(res), keys[[1]])
37 |     }
38 | }
39 | 
40 | test_mapIdsToRanges_missing_results <- function()
41 | {
42 |     keys <- list(tx_name = c("ENST00000371582", "NOT_FOUND", "ENST00000494752"))
43 |     res <- mapIdsToRanges(txdb, keys = keys, type = "tx")
44 |     checkEquals(names(res), keys$tx_name)
45 | 
46 |     # shuffle the order and make sure it remains equivalent
47 |     for (i in seq_len(10)) {
48 |         keys$tx_name <- sample(keys$tx_name)
49 |         res <- mapIdsToRanges(txdb, keys = keys, type = "tx")
50 |         checkEquals(names(res), keys$tx_name)
51 |     }
52 | }
53 | 
54 | test_mapIdsToRanges_duplicate_ranges <- function()
55 | {
56 |     # both of these transcripts are from the same gene
57 |     keys <- list(tx_name = c("ENST00000371582", "ENST00000494752"))
58 |     res <- mapIdsToRanges(txdb, keys = keys, type = "gene")
59 | 
60 |     #names match input
61 |     checkEquals(names(res), keys[[1]])
62 |     # but values are the same
63 |     checkTrue(all.equal(res[[1]], res[[2]], check.attributes = FALSE))
64 | }
65 | 
66 | test_mapIdsToRanges_duplicate_ids <- function() {
67 |     keys <- list(tx_name = c("ENST00000371582", "ENST00000494752",
68 |                              "ENST00000371582"))
69 |     res <- mapIdsToRanges(txdb, keys = keys, type = "gene")
70 |     checkEquals(names(res), keys[[1]])
71 |     checkEquals(res[[1]], res[[3]])
72 | }
73 | 
74 | test_mapRangesToIds_empty <- function()
75 | {
76 |     checkException(mapRangesToIds(txdb, NULL), list())
77 | 
78 |     checkException(mapRangesToIds(txdb, list()), list())
79 | }
80 | 
81 | test_mapRangesToIds_matches <- function()
82 | {
83 |     keys <- list(tx_name = c("ENST00000371582", "ENST00000371588",
84 |         "ENST00000494752", "ENST00000614008", "ENST00000496771"))
85 |     res <- mapIdsToRanges(txdb, keys = keys, type = "gene")
86 | 
87 |     res2 <- mapRangesToIds(txdb, res, "tx")
88 |     checkTrue(keys$tx_name[1] %in% res2[[1]]$tx_name)
89 | 
90 |     checkTrue(keys$tx_name[2] %in% res2[[2]]$tx_name)
91 | }
92 | 


--------------------------------------------------------------------------------
/inst/unitTests/test_nearest-methods.R:
--------------------------------------------------------------------------------
 1 | quiet <- suppressWarnings
 2 | 
 3 | test_GenomicRanges_distance <- function()
 4 | {
 5 |     library(txdbmaker)  # for makeTxDb()
 6 |     genes <- data.frame(
 7 |         tx_id=1:3,
 8 |         gene_id=c("gene1", "gene1", "gene2"))
 9 |     transcripts <- data.frame(
10 |         tx_id=1:3,
11 |         tx_chrom="chr1",
12 |         tx_strand=c("+", "+", "-"),
13 |         tx_start=c(1, 2001, 3001),
14 |         tx_end=c(999, 2199, 3199))
15 |     splicings <-  data.frame(
16 |         tx_id=c(1L, 2L, 2L, 2L, 3L, 3L),
17 |         cds_id=c(10L, 11L, 12L, 13L, 14L, 15L),
18 |         exon_rank=c(1, 1, 2, 3, 1, 2),
19 |         exon_start=c(1, 2001, 2101, 2131, 3001, 3131),
20 |         exon_end=c(999, 2085, 2144, 2199, 3085, 3199),
21 |         cds_start=c(1, 2022, 2101, 2131, 3001, 3131),
22 |         cds_end=c(999, 2085, 2144, 2193, 3085, 3199))
23 |     txdb <- quiet(makeTxDb(transcripts, splicings, genes))
24 | 
25 |     gr <- GRanges("chr1", IRanges(1050, width=1))
26 |     strand(gr) <- "-"
27 |     d <- quiet(distance(gr, txdb, id="gene1", type="gene"))
28 |     checkTrue(is.na(d))
29 |     strand(gr) <- "+"
30 |     d_pos <- quiet(distance(gr, txdb, id="gene1", type="gene"))
31 |     strand(gr) <- "*"
32 |     d_star <- quiet(distance(gr, txdb, id="gene1", type="gene"))
33 |     checkIdentical(d_pos, d_star)
34 | 
35 |     d_tx <- quiet(distance(gr, txdb, id="3", type="tx"))
36 |     d_cds <- quiet(distance(gr, txdb, id="14", type="cds"))
37 |     checkIdentical(d_tx, d_cds)
38 | }
39 | 


--------------------------------------------------------------------------------
/inst/unitTests/test_transcriptLengths.R:
--------------------------------------------------------------------------------
 1 | test_transcriptLengths <- function()
 2 | {
 3 |     library(txdbmaker)  # for makeTxDbFromGFF()
 4 |     gff <- system.file("extdata", "ITAG4.1_gene_models.subset.gff",
 5 |                        package="GenomicFeatures")
 6 |     txdb <- makeTxDbFromGFF(gff)
 7 |     txlens <- transcriptLengths(txdb, with.cds_len=TRUE,
 8 |                                       with.utr5_len=TRUE,
 9 |                                       with.utr3_len=TRUE)
10 | 
11 |     checkIdentical(class(txlens), "data.frame")
12 |     checkIdentical(dim(txlens), c(10L, 8L))
13 | 
14 |     expected_colnames <- c("tx_id", "tx_name", "gene_id", "nexon", "tx_len",
15 |                            "cds_len", "utr5_len", "utr3_len")
16 |     checkIdentical(colnames(txlens), expected_colnames)
17 | 
18 |     checkIdentical(txlens$tx_len,
19 |                    txlens$cds_len + txlens$utr5_len + txlens$utr3_len)
20 | 
21 |     expected_nexon <- c(2L, 1L, 1L, 3L, 1L, 2L, 3L, 4L, 2L, 2L)
22 |     checkIdentical(txlens$nexon, expected_nexon)
23 | 
24 |     expected_tx_len <- c(721L, 813L, 319L, 1046L, 264L,
25 |                          819L, 802L, 453L, 249L, 889L)
26 |     checkIdentical(txlens$tx_len, expected_tx_len)
27 | 
28 |     expected_cds_len <- c(471L, 663L, 276L, 918L, 264L,
29 |                           261L, 369L, 453L, 249L, 516L)
30 |     checkIdentical(txlens$cds_len, expected_cds_len)
31 | 
32 |     expected_utr5_len <- c(250L, 0L, 0L, 89L, 0L, 558L, 0L, 0L, 0L, 373L)
33 |     checkIdentical(txlens$utr5_len, expected_utr5_len)
34 | 
35 |     expected_utr3_len <- c(0L, 150L, 43L, 39L, 0L, 0L, 433L, 0L, 0L, 0L)
36 |     checkIdentical(txlens$utr3_len, expected_utr3_len)
37 | }
38 | 
39 | 


--------------------------------------------------------------------------------
/inst/unitTests/test_transcripts.R:
--------------------------------------------------------------------------------
  1 | test_transcripts <- function()
  2 | {
  3 |     txdb <- loadDb(system.file("extdata", "hg19_knownGene_sample.sqlite", 
  4 |                                       package="GenomicFeatures"))
  5 | 
  6 |     ## Test misuse.
  7 |     checkException(transcripts(data.frame()), silent=TRUE)
  8 |     checkException(transcripts(txdb, filter=list(bad=1:10)), silent=TRUE)
  9 |     checkException(transcripts(txdb, columns="bad"), silent=TRUE)
 10 | 
 11 |     ## Test 1.
 12 |     current <- transcripts(txdb, filter=list(gene_id="139231"))
 13 |     metadata(current) <- list()
 14 | 
 15 |     target <- GRanges("chrX",
 16 |                       ranges=IRanges(start=c(103411156, 103430747),
 17 |                                      end  =c(103440582, 103440582)),
 18 |                       strand=strand("+"),
 19 |                       tx_id=142:143,
 20 |                       tx_name=c("uc004elw.3", "uc004elx.3"),
 21 |                       seqinfo=seqinfo(txdb))
 22 |     checkIdentical(target, current)
 23 | 
 24 |     ## Test 2.
 25 |     filter <- list(tx_chrom=c("chr12", "chr14"), tx_strand="-")
 26 |     current <- transcripts(txdb, columns=c("tx_id", "tx_name",
 27 |                                            "exon_id", "exon_rank"),
 28 |                                  filter=filter)
 29 |     metadata(current) <- list()
 30 | 
 31 |     target <- GRanges("chr12",
 32 |                       ranges=IRanges(start=52753790, end=52761309),
 33 |                       strand=strand("-"),
 34 |                       tx_id=87L,
 35 |                       tx_name="uc001sag.3",
 36 |                       exon_id=IntegerList(334:326),
 37 |                       exon_rank=IntegerList(1:9),
 38 |                       seqinfo=seqinfo(txdb))
 39 |     checkIdentical(target, current)
 40 | 
 41 |     ## Test 3.
 42 |     filter <- list(gene_id=c("220004", "1183", "10186"))
 43 |     current <- transcripts(txdb, columns=c("tx_id", "tx_name", "gene_id"),
 44 |                                  filter=filter)
 45 |     metadata(current) <- list()
 46 | 
 47 |     target_tx_id <- c(91L, 136:137)
 48 |     target_gene_id <- CharacterList("10186", "1183", "1183")
 49 |     target <- GRanges(c("chr13", "chrX", "chrX"),
 50 |                       ranges=IRanges(start=c(39917029, 10124985, 10124985),
 51 |                                      end  =c(40177356, 10205699, 10205699)),
 52 |                       strand=strand(c("-", "+", "+")),
 53 |                       tx_id=target_tx_id,
 54 |                       tx_name=c("uc001uxf.3", "uc004csy.4", "uc011mid.3"),
 55 |                       gene_id=target_gene_id,
 56 |                       seqinfo=seqinfo(txdb))
 57 |     checkIdentical(target, current)
 58 | }
 59 | 
 60 | test_transcripts_after_seqlevelsStyle_switch <- function()
 61 | {
 62 |     txdb <- loadDb(system.file("extdata", "hg19_knownGene_sample.sqlite", 
 63 |                                package="GenomicFeatures"))
 64 |     checkIdentical(seqlevelsStyle(txdb), "UCSC")
 65 |     seqlevelsStyle(txdb) <- "NCBI"
 66 |     checkIdentical(seqlevelsStyle(txdb), c("NCBI", "UCSC"))
 67 | 
 68 |     filter <- list(gene_id=c("220004", "1183", "10186"))
 69 |     current <- transcripts(txdb, columns=c("tx_id", "tx_name", "gene_id"),
 70 |                                  filter=filter)
 71 |     metadata(current) <- list()
 72 | 
 73 |     checkIdentical(c("13", "X", "X"), as.character(seqnames(current)))
 74 | 
 75 |     target_tx_id <- c(91L, 136:137)
 76 |     target_gene_id <- CharacterList("10186", "1183", "1183")
 77 |     target <- GRanges(c("13", "X", "X"),
 78 |                       ranges=IRanges(start=c(39917029, 10124985, 10124985),
 79 |                                      end  =c(40177356, 10205699, 10205699)),
 80 |                       strand=strand(c("-", "+", "+")),
 81 |                       tx_id=target_tx_id,
 82 |                       tx_name=c("uc001uxf.3", "uc004csy.4", "uc011mid.3"),
 83 |                       gene_id=target_gene_id,
 84 |                       seqinfo=seqinfo(txdb))
 85 |     checkIdentical(target, current)    
 86 | }
 87 | 
 88 | test_exons <- function()
 89 | {
 90 |     txdb <- loadDb(system.file("extdata", "hg19_knownGene_sample.sqlite", 
 91 |                                       package="GenomicFeatures"))
 92 | 
 93 |     ## Test misuse.
 94 |     checkException(exons(data.frame()), silent=TRUE)
 95 |     checkException(exons(txdb, filter=list(bad=1:10)), silent=TRUE)
 96 | 
 97 |     ## Test 1.
 98 |     current <- exons(txdb, filter=list(tx_name="uc001gde.2"))
 99 |     metadata(current) <- list()
100 | 
101 |     target <- GRanges("chr1",
102 |                       ranges=IRanges(start=c(165513478, 165532742),
103 |                                      end  =c(165514155, 165533185)),
104 |                       strand=strand("+"),
105 |                       exon_id=29:30,
106 |                       seqinfo=seqinfo(txdb))
107 |     checkIdentical(target, current)
108 | 
109 |     ## Test 2.
110 |     filter <- list(exon_chrom=c("chr5", "chr14"), exon_strand="-")
111 |     current <- exons(txdb, columns=c("exon_id", "tx_name", "gene_id"),
112 |                            filter=filter)
113 |     metadata(current) <- list()
114 | 
115 |     target_ranges <-
116 |         IRanges(start=c(134363424, 134366966, 134369403, 170732985),
117 |                 end  =c(134365011, 134367198, 134369964, 170735759))
118 |     target <- GRanges("chr5",
119 |                       ranges=target_ranges,
120 |                       strand=strand("-"),
121 |                       exon_id=182:185,
122 |                       tx_name=CharacterList("uc010jea.3", "uc010jea.3",
123 |                                             "uc010jea.3", "uc003mbe.2"),
124 |                       gene_id=CharacterList("5307", "5307", "5307", NULL),
125 |                       seqinfo=seqinfo(txdb))
126 |     checkIdentical(target, current)
127 | }
128 | 
129 | test_cds <- function()
130 | {
131 |     txdb <- loadDb(system.file("extdata", "hg19_knownGene_sample.sqlite", 
132 |                          package="GenomicFeatures"))
133 | 
134 |     ## Test misuse.
135 |     checkException(cds(data.frame()), silent=TRUE)
136 |     checkException(cds(txdb, filter=list(bad=1:10)), silent=TRUE)
137 | 
138 |     ## Test 1.
139 |     current <- cds(txdb, filter=list(tx_name="uc001gde.2"))
140 |     metadata(current) <- list()
141 | 
142 |     target <- GRanges("chr1",
143 |                       ranges=IRanges(start=c(165513534, 165532742),
144 |                                      end  =c(165514155, 165533061)),
145 |                       strand=strand("+"),
146 |                       cds_id=23:24,
147 |                       seqinfo=seqinfo(txdb))
148 |     checkIdentical(target, current)
149 | 
150 |     ## Test 2.
151 |     filter <- list(cds_chrom=c("chr5", "chr14"), cds_strand="-")
152 |     current <- cds(txdb, columns=c("exon_id", "tx_name", "gene_id"),
153 |                          filter=filter)
154 |     metadata(current) <- list()
155 | 
156 |     target_ranges <-
157 |         IRanges(start=c(134364469, 134366966, 134369403, 170735359),
158 |                 end  =c(134365011, 134367198, 134369571, 170735634))
159 |     target <- GRanges("chr5",
160 |                       ranges=target_ranges,
161 |                       strand=strand("-"),
162 |                       exon_id=as(182:185, "IntegerList"),
163 |                       tx_name=CharacterList("uc010jea.3", "uc010jea.3",
164 |                                             "uc010jea.3", "uc003mbe.2"),
165 |                       gene_id=CharacterList("5307", "5307", "5307", NULL),
166 |                       seqinfo=seqinfo(txdb))
167 |     checkIdentical(target, current)
168 | }
169 | 
170 | test_promoters <- function()
171 | {
172 |     txdb <- loadDb(system.file("extdata", "hg19_knownGene_sample.sqlite", 
173 |                                package="GenomicFeatures"))
174 |     tx <- transcripts(txdb, use.names=TRUE)
175 |     current <- promoters(txdb)
176 |     checkTrue(validObject(current))
177 |     checkEquals(colnames(mcols(current)), c("tx_id", "tx_name"))
178 |     checkIdentical(current, promoters(tx))
179 |     current <- terminators(txdb)
180 |     checkTrue(validObject(current))
181 |     checkEquals(colnames(mcols(current)), c("tx_id", "tx_name"))
182 |     checkIdentical(current, terminators(tx))
183 | }
184 | 
185 | 
186 | test_translateCols <- function(){
187 |     txdb <- loadDb(system.file("extdata", "hg19_knownGene_sample.sqlite", 
188 |                    package="GenomicFeatures"))
189 |     tx1 <- transcripts(txdb, columns=c("tx_id", "tx_name", "cds_id"))
190 |     checkEquals(colnames(mcols(tx1)), c("tx_id", "tx_name", "cds_id"))
191 |     tx2 <- transcripts(txdb, columns=c("TXID", "TXNAME", "CDSID"))
192 |     checkEquals(colnames(mcols(tx2)), c("TXID", "TXNAME", "CDSID"))
193 |     tx3 <- transcripts(txdb, columns=c(bob="CDSID"))
194 |     checkEquals(colnames(mcols(tx3)), c("bob"))
195 |     tx4 <- transcripts(txdb, columns=c(bob="cds_id"))
196 |     checkEquals(colnames(mcols(tx4)), c("bob"))
197 |     ## And these two cases should both explode. ;)
198 |     checkException(transcripts(txdb, columns=c("")))
199 |     checkException(transcripts(txdb, columns=c("bob")))
200 | }
201 | 
202 | 


--------------------------------------------------------------------------------
/inst/unitTests/test_transcriptsByOverlaps.R:
--------------------------------------------------------------------------------
  1 | ###
  2 | 
  3 | test_transcriptsByOverlaps <- function()
  4 | {
  5 |     txdb <- loadDb(system.file("extdata", "hg19_knownGene_sample.sqlite", 
  6 |                                      package="GenomicFeatures"))
  7 | 
  8 |     checkException(transcriptsByOverlaps(txdb), silent = TRUE)
  9 |     checkException(transcriptsByOverlaps(txdb, IRanges()), silent = TRUE)
 10 |     checkException(transcriptsByOverlaps(txdb, GRanges(), columns = "bad"),
 11 |                    silent = TRUE)
 12 | 
 13 |     seqinfo <- seqinfo(txdb)
 14 |     seqlevels <- seqlevels(seqinfo)
 15 | 
 16 |     gr <- GRanges(seqnames = "chrX",
 17 |                   ranges = IRanges(start=54071000, width=1),
 18 |                   strand = strand("-"))
 19 |     want <-
 20 |       GRanges(seqnames = factor("chrX", levels = seqlevels),
 21 |               ranges = IRanges(start=53963113, end=54071569),
 22 |               strand = strand("-"),
 23 |               tx_id = 147L,
 24 |               tx_name = "uc004dsu.3")
 25 |     seqinfo(want) <- seqinfo
 26 |     want <- GenomicFeatures:::.assignMetadataList(want, txdb)
 27 |     checkIdentical(transcriptsByOverlaps(txdb, gr), want)
 28 | 
 29 |     ranges <- IRanges(start = c(113000000, 54071000, 54071000),
 30 |                       width = c(  5000000,        1,        1))
 31 |     chrom <- c("chr3", "chrX", "chrX")
 32 |     strand <- strand(c("+", "+", "-"))
 33 |     gr <- GRanges(seqnames = chrom, ranges = ranges, strand = strand)
 34 |     want <- GRanges(seqnames =
 35 |                     factor(c("chr3", "chr3", "chr3", "chrX"),
 36 |                            levels = seqlevels),
 37 |                     ranges = IRanges(start = c(113666748, 113666822, 113676421,
 38 |                                                 53963113),
 39 |                                      end   = c(113681827, 113681827, 113682211,
 40 |                                                 54071569)),
 41 |                     strand = strand(c("+", "+", "+", "-")),
 42 |                     tx_id = c(29:31, 147L))
 43 |     seqinfo(want) <- seqinfo
 44 |     want <- GenomicFeatures:::.assignMetadataList(want, txdb)
 45 |     checkIdentical(transcriptsByOverlaps(txdb, gr, columns="tx_id"), want)
 46 | }
 47 | 
 48 | test_exonsByOverlaps <- function()
 49 | {
 50 |     txdb <- loadDb(system.file("extdata", "hg19_knownGene_sample.sqlite", 
 51 |                                      package="GenomicFeatures"))
 52 | 
 53 |     checkException(exonsByOverlaps(txdb), silent = TRUE)
 54 |     checkException(exonsByOverlaps(txdb, IRanges()), silent = TRUE)
 55 |     checkException(exonsByOverlaps(txdb, GRanges(), columns = "bad"),
 56 |                    silent = TRUE)
 57 | 
 58 |     seqinfo <- seqinfo(txdb)
 59 |     seqlevels <- seqlevels(seqinfo)
 60 | 
 61 |     gr <- GRanges(seqnames = "chr3",
 62 |                   ranges = IRanges(start=113677210, width=1),
 63 |                   strand = strand("+"))
 64 |     want <-
 65 |       GRanges(seqnames = factor("chr3", levels = seqlevels),
 66 |               ranges = IRanges(start=c(113677210, 113677210),
 67 |                                end  =c(113677385, 113682211)),
 68 |               strand = strand("+"),
 69 |               exon_id = 139:140)
 70 |     seqinfo(want) <- seqinfo
 71 |     want <- GenomicFeatures:::.assignMetadataList(want, txdb)
 72 |     checkIdentical(exonsByOverlaps(txdb, gr), want)
 73 | }
 74 | 
 75 | test_cdsByOverlaps <- function()
 76 | {
 77 |     txdb <- loadDb(system.file("extdata", "hg19_knownGene_sample.sqlite", 
 78 |                                      package="GenomicFeatures"))
 79 | 
 80 |     checkException(cdsByOverlaps(txdb), silent = TRUE)
 81 |     checkException(cdsByOverlaps(txdb, IRanges()), silent = TRUE)
 82 |     checkException(cdsByOverlaps(txdb, GRanges(), columns = "bad"),
 83 |                    silent = TRUE)
 84 | 
 85 |     seqinfo <- seqinfo(txdb)
 86 |     seqlevels <- seqlevels(seqinfo)
 87 | 
 88 |     gr <- GRanges(seqnames = "chr3",
 89 |                   ranges = IRanges(start=113677210, width=1),
 90 |                   strand = strand("+"))
 91 |     want <-
 92 |       GRanges(seqnames = factor("chr3", levels = seqlevels),
 93 |               ranges = IRanges(start=c(113677210, 113677210),
 94 |                                end  =c(113677385, 113677477)),
 95 |               strand = strand("+"),
 96 |               cds_id = 116:117)
 97 |     seqinfo(want) <- seqinfo
 98 |     want <- GenomicFeatures:::.assignMetadataList(want, txdb)
 99 |     checkIdentical(cdsByOverlaps(txdb, gr), want)
100 | }
101 | 


--------------------------------------------------------------------------------
/man/FeatureDb-class.Rd:
--------------------------------------------------------------------------------
 1 | \name{FeatureDb-class}
 2 | 
 3 | \alias{FeatureDb-class}
 4 | \alias{class:FeatureDb}
 5 | \alias{FeatureDb}
 6 | 
 7 | \title{FeatureDb objects}
 8 | 
 9 | \description{
10 |   WARNING: The FeatureDb/makeFeatureDbFromUCSC/features code base is
11 |   no longer actively maintained and FeatureDb-related functionalities
12 |   might get deprecated in the near future. Please use
13 |   \code{\link{makeFeatureDbFromUCSC}} for a convenient way to
14 |   import transcript annotations from UCSC online resources into
15 |   Bioconductor.
16 | 
17 |   The FeatureDb class is a generic container for storing
18 |   genomic locations of an arbitrary type of genomic features.
19 |   
20 |   See \code{?\link{TxDb}} for a container for storing transcript
21 |   annotations.
22 | 
23 |   See \code{?\link{makeFeatureDbFromUCSC}} for a convenient way to
24 |   make FeatureDb objects from BioMart online resources.
25 | }
26 | 
27 | \section{Methods}{
28 |   In the code snippets below, \code{x} is a FeatureDb object.
29 | 
30 |   \describe{
31 |     \item{\code{metadata(x)}:}{
32 |       Return \code{x}'s metadata in a data frame.
33 |     }
34 |   }
35 | }
36 | 
37 | \author{Marc Carlson}
38 | 
39 | \seealso{
40 |   \itemize{
41 |     \item The \link{TxDb} class for storing transcript annotations.
42 |     \item \code{\link{makeFeatureDbFromUCSC}} for a convenient way to
43 |           make a FeatureDb object from UCSC online resources.
44 |     \item \code{\link{saveDb}} and \code{\link{loadDb}} for
45 |           saving and loading the database content of a FeatureDb object.
46 |     \item \code{\link{features}} for how to extract genomic features
47 |           from a FeatureDb object.
48 |   }
49 | }
50 | 
51 | \examples{
52 | fdb_file <- system.file("extdata", "FeatureDb.sqlite",
53 |                         package="GenomicFeatures")
54 | fdb <- loadDb(fdb_file)
55 | fdb
56 | }
57 | 
58 | \keyword{methods}
59 | \keyword{classes}
60 | 


--------------------------------------------------------------------------------
/man/TxDb-class.Rd:
--------------------------------------------------------------------------------
  1 | \name{TxDb-class}
  2 | 
  3 | \alias{TxDb-class}
  4 | \alias{class:TxDb}
  5 | \alias{TxDb}
  6 | 
  7 | \alias{saveRDS,TxDb-method}
  8 | 
  9 | \alias{organism,TxDb-method}
 10 | \alias{seqlevels0,TxDb-method}
 11 | \alias{seqlevels<-,TxDb-method}
 12 | \alias{seqinfo,TxDb-method}
 13 | \alias{isActiveSeq}
 14 | \alias{isActiveSeq<-}
 15 | \alias{isActiveSeq,TxDb-method}
 16 | \alias{isActiveSeq<-,TxDb-method}
 17 | \alias{show,TxDb-method}
 18 | 
 19 | % coercion
 20 | \alias{as.list,TxDb-method}
 21 | 
 22 | \title{TxDb objects}
 23 | 
 24 | \description{
 25 |   The TxDb class is a container for storing transcript annotations.
 26 | }
 27 | 
 28 | \section{Methods}{
 29 |   In the code snippets below, \code{x} is a TxDb object.
 30 | 
 31 |   \describe{
 32 |     \item{\code{metadata(x)}:}{
 33 |       Return \code{x}'s metadata in a data frame.
 34 |     }
 35 |     \item{\code{seqlevels0(x)}:}{
 36 |       Get the \emph{sequence levels} originally in \code{x}. This ignores any
 37 |       change the user might have made to the \emph{sequence levels} with the
 38 |       \code{seqlevels} setter.
 39 |     }
 40 |     \item{\code{seqlevels(x)}, \code{seqlevels(x) <- value}:}{
 41 |       Get or set the \emph{sequence levels} in \code{x}.
 42 |     }
 43 |     \item{\code{seqinfo(x)}, \code{seqinfo(x) <- value}:}{
 44 |       Get or set the information about the underlying sequences.
 45 |       Note that, for now, the setter only supports replacement of the
 46 |       sequence names, i.e., except for their sequence names (accessed with
 47 |       \code{seqnames(value)} and \code{seqnames(seqinfo(x))}, respectively),
 48 |       \link[GenomeInfoDb]{Seqinfo} objects \code{value} (supplied) and
 49 |       \code{seqinfo(x)} (current) must be identical.
 50 |     }
 51 |     \item{\code{isActiveSeq(x)}:}{
 52 |       Return the currently active sequences for this txdb object as a
 53 |       named logical vector.  Only active sequences will be tapped when
 54 |       using the supplied accessor methods.  Inactive sequences will be
 55 |       ignored.  By default, all available sequences will be active.
 56 |     } 
 57 |     \item{\code{isActiveSeq(x) <- value}:}{
 58 |       Allows the user to change which sequences will be actively
 59 |       accessed by the accessor methods by altering the contents of this
 60 |       named logical vector.
 61 |     }
 62 |     \item{\code{seqlevelsStyle(x)}, \code{seqlevelsStyle(x) <- value}:}{
 63 |       Get or set the seqname style for \code{x}.
 64 |       See the \link[GenomeInfoDb]{seqlevelsStyle} generic getter and setter
 65 |       in the \pkg{GenomeInfoDb} package for more information.
 66 |     }
 67 |     \item{\code{as.list(x)}:}{
 68 |       Dump the entire db into a list of data frames, say \code{txdb_dump},
 69 |       that can then be used to recreate the original db with
 70 |       \code{do.call(txdbmaker::makeTxDb, txdb_dump)} with no loss of
 71 |       information (except possibly for some of the metadata).
 72 |       Note that the transcripts are dumped in the same order in all the
 73 |       data frames.
 74 |     }
 75 |   }
 76 | }
 77 | 
 78 | \author{Hervé Pagès, Marc Carlson}
 79 | 
 80 | \seealso{
 81 |   \itemize{
 82 |     \item \code{\link[txdbmaker]{makeTxDbFromUCSC}},
 83 |           \code{\link[txdbmaker]{makeTxDbFromBiomart}},
 84 |           and \code{\link[txdbmaker]{makeTxDbFromEnsembl}} in
 85 |           the \pkg{txdbmaker} package for making a \link{TxDb}
 86 |           object from online resources.
 87 | 
 88 |     \item \code{\link[txdbmaker]{makeTxDbFromGRanges}} and
 89 |           \code{\link[txdbmaker]{makeTxDbFromGFF}} in the \pkg{txdbmaker}
 90 |           package for making a \link{TxDb} object from a
 91 |           \link[GenomicRanges]{GRanges} object, or from a GFF or GTF file.
 92 | 
 93 |     \item \code{\link[AnnotationDbi]{saveDb}} and
 94 |           \code{\link[AnnotationDbi]{loadDb}} in the \pkg{AnnotationDbi}
 95 |           package for saving and loading a TxDb object as an SQLite file.
 96 | 
 97 |     \item \code{\link{transcripts}}, \code{\link{transcriptsBy}},
 98 |           and \code{\link{transcriptsByOverlaps}}, for extracting
 99 |           genomic feature locations from a \link{TxDb}-like object.
100 | 
101 |     \item \code{\link{transcriptLengths}} for extracting the transcript
102 |           lengths (and other metrics) from a \link{TxDb} object.
103 | 
104 |     \item \link[GenomicFeatures]{select-methods} for how to use the
105 |           simple "select" interface to extract information from a
106 |           TxDb object.
107 | 
108 |     \item The \link[GenomeInfoDb]{Seqinfo} class in the \pkg{GenomeInfoDb}
109 |           package.
110 |   }
111 | }
112 | 
113 | \examples{
114 | txdb_file <- system.file("extdata", "Biomart_Ensembl_sample.sqlite",
115 |                          package="GenomicFeatures")
116 | txdb <- loadDb(txdb_file)
117 | txdb
118 | 
119 | ## Use of seqinfo():
120 | seqlevelsStyle(txdb)
121 | seqinfo(txdb)
122 | seqlevels(txdb)
123 | seqlengths(txdb)  # shortcut for 'seqlengths(seqinfo(txdb))'
124 | isCircular(txdb)  # shortcut for 'isCircular(seqinfo(txdb))'
125 | names(which(isCircular(txdb)))
126 | 
127 | ## You can set user-supplied seqlevels on 'txdb' to restrict any further
128 | ## operations to a subset of chromosomes:
129 | seqlevels(txdb) <- c("Y", "6")
130 | ## Then you can restore the seqlevels stored in the db:
131 | seqlevels(txdb) <- seqlevels0(txdb)
132 | 
133 | ## Use of as.list():
134 | txdb_dump <- as.list(txdb)
135 | txdb_dump
136 | 
137 | library(txdbmaker)  # for makeTxDb()
138 | txdb1 <- do.call(makeTxDb, txdb_dump)
139 | stopifnot(identical(as.list(txdb1), txdb_dump))
140 | }
141 | 
142 | \keyword{methods}
143 | \keyword{classes}
144 | 


--------------------------------------------------------------------------------
/man/as-format-methods.Rd:
--------------------------------------------------------------------------------
 1 | \name{as-format-methods}
 2 | \alias{asBED,TxDb-method}
 3 | \alias{asGFF,TxDb-method}
 4 | 
 5 | \title{Coerce to file format structures}
 6 | \description{
 7 |   These functions coerce a \code{\linkS4class{TxDb}} object to a
 8 |   \code{\link[GenomicRanges:GRanges-class]{GRanges}} object with
 9 |   metadata columns encoding transcript structures according to the
10 |   model of a standard file format. Currently, BED and GFF models are
11 |   supported. If a \code{TxDb} is passed to
12 |   \code{\link[BiocIO]{export}}, when targeting a BED or GFF file,
13 |   this coercion occurs automatically.
14 | }
15 | \usage{
16 | \S4method{asBED}{TxDb}(x)
17 | \S4method{asGFF}{TxDb}(x)
18 | }
19 | 
20 | \arguments{
21 |   \item{x}{
22 |     A \code{TxDb} object to coerce to a \code{GRanges},
23 |     structured as BED or GFF.
24 |   }
25 | }
26 | 
27 | \value{
28 |   For \code{asBED}, a \code{GRanges}, with the columns \code{name},
29 |   \code{thickStart}, \code{thickEnd}, \code{blockStarts},
30 |   \code{blockSizes} added. The thick regions correspond to the CDS
31 |   regions, and the blocks represent the exons. The transcript IDs are
32 |   stored in the \code{name} column. The ranges are the transcript bounds.
33 | 
34 |   For \code{asGFF}, a \code{GRanges}, with columns \code{type},
35 |   \code{Name}, \code{ID},, and \code{Parent}. The gene structures are
36 |   expressed according to the conventions defined by the GFF3 spec. There
37 |   are elements of each \code{type} of feature: \dQuote{gene},
38 |   \dQuote{mRNA} \dQuote{exon} and \dQuote{cds}. The \code{Name} column
39 |   contains the \code{gene_id} for genes, \code{tx_name} for transcripts,
40 |   and exons and cds regions are \code{NA}. The \code{ID} column uses
41 |   \code{gene_id} and \code{tx_id}, with the prefixes \dQuote{GeneID} and
42 |   \dQuote{TxID} to ensure uniqueness across types. The exons and cds
43 |   regions have \code{NA} for \code{ID}. The \code{Parent} column
44 |   contains the \code{ID}s of the parent features. A feature may have
45 |   multiple parents (the column is a \code{CharacterList}). Each exon
46 |   belongs to one or more mRNAs, and mRNAs belong to a gene.
47 | }
48 | 
49 | \author{
50 |   Michael Lawrence
51 | }
52 | 
53 | \examples{
54 |   txdb_file <- system.file("extdata", "hg19_knownGene_sample.sqlite",
55 |                            package="GenomicFeatures")
56 |   txdb <- loadDb(txdb_file)
57 | 
58 |   asBED(txdb)
59 |   asGFF(txdb)
60 | }
61 | 


--------------------------------------------------------------------------------
/man/exonicParts.Rd:
--------------------------------------------------------------------------------
  1 | \name{exonicParts}
  2 | 
  3 | \alias{tidyTranscripts}
  4 | \alias{tidyExons}
  5 | \alias{tidyIntrons}
  6 | \alias{exonicParts}
  7 | \alias{intronicParts}
  8 | 
  9 | \title{
 10 |   Extract non-overlapping exonic or intronic parts from a TxDb-like object
 11 | }
 12 | 
 13 | \description{
 14 |   \code{exonicParts} and \code{intronicParts} extract the non-overlapping
 15 |   (a.k.a. disjoint) exonic or intronic parts from a \link{TxDb}-like object.
 16 | }
 17 | 
 18 | \usage{
 19 | exonicParts(txdb, linked.to.single.gene.only=FALSE)
 20 | intronicParts(txdb, linked.to.single.gene.only=FALSE)
 21 | 
 22 | ## 3 helper functions used internally by exonicParts() and intronicParts():
 23 | tidyTranscripts(txdb, drop.geneless=FALSE)
 24 | tidyExons(txdb, drop.geneless=FALSE)
 25 | tidyIntrons(txdb, drop.geneless=FALSE)
 26 | }
 27 | 
 28 | \arguments{
 29 |   \item{txdb}{
 30 |     A \link{TxDb} object, or any \link{TxDb}-like object that supports the
 31 |     \code{\link{transcripts}()} and \code{\link{exonsBy}()} extractors
 32 |     (e.g. an \link[ensembldb]{EnsDb} object).
 33 |   }
 34 |   \item{linked.to.single.gene.only}{
 35 |     \code{TRUE} or \code{FALSE}.
 36 | 
 37 |     If \code{FALSE} (the default), then the disjoint parts are obtained
 38 |     by calling \code{\link[IRanges]{disjoin}()} on all the exons (or introns)
 39 |     in \code{txdb}, including on exons (or introns) not linked to a gene or
 40 |     linked to more than one gene.
 41 | 
 42 |     If \code{TRUE}, then the disjoint parts are obtained in 2 steps:
 43 |     \enumerate{
 44 |       \item call \code{\link[IRanges]{disjoin}()} on the exons (or introns)
 45 |             linked to \emph{at least one gene},
 46 | 
 47 |       \item then drop the parts linked to more than one gene from
 48 |             the set of exonic (or intronic) parts obtained previously.
 49 |     }
 50 |   }
 51 |   \item{drop.geneless}{
 52 |     If \code{FALSE} (the default), then all the transcripts (or exons, or
 53 |     introns) get extracted from the \link{TxDb} object.
 54 | 
 55 |     If \code{TRUE}, then only the transcripts (or exons, or introns) that
 56 |     are linked to a gene get extracted from the \link{TxDb} object.
 57 | 
 58 |     Note that \code{drop.geneless} also impacts the order in which the
 59 |     features are returned:
 60 |     \itemize{
 61 |       \item Transcripts: If \code{drop.geneless} is \code{FALSE} then
 62 |             transcripts are returned in the same order as with
 63 |             \code{\link{transcripts}}, which is expected to be by
 64 |             internal transcript id (\code{tx_id}).
 65 |             Otherwise they are ordered first by gene id (\code{gene_id}),
 66 |             then by internal transcript id.
 67 |       \item Exons: If \code{drop.geneless} is \code{FALSE} then exons are
 68 |             ordered first by internal transcript id (\code{tx_id}),
 69 |             then by exon rank (\code{exon_rank}).
 70 |             Otherwise they are ordered first by gene id (\code{gene_id}),
 71 |             then by internal transcript id, and then by exon rank.
 72 |       \item Introns: If \code{drop.geneless} is \code{FALSE} then introns
 73 |             are ordered by internal transcript id (\code{tx_id}).
 74 |             Otherwise they are ordered first by gene id (\code{gene_id}),
 75 |             then by internal transcript id.
 76 |     }
 77 |   }
 78 | }
 79 | 
 80 | \value{
 81 |   \code{exonicParts} returns a disjoint and strictly sorted
 82 |   \link[GenomicRanges]{GRanges} object with 1 range per exonic part
 83 |   and with metadata columns \code{tx_id}, \code{tx_name}, \code{gene_id},
 84 |   \code{exon_id}, \code{exon_name}, and \code{exon_rank}.
 85 |   If \code{linked.to.single.gene.only} was set to \code{TRUE},
 86 |   an additional \code{exonic_part} metadata column is added that
 87 |   indicates the rank of each exonic part within all the exonic parts
 88 |   linked to the same gene.
 89 | 
 90 |   \code{intronicParts} returns a disjoint and strictly sorted
 91 |   \link[GenomicRanges]{GRanges} object with 1 range per intronic part
 92 |   and with metadata columns \code{tx_id}, \code{tx_name}, and \code{gene_id}.
 93 |   If \code{linked.to.single.gene.only} was set to \code{TRUE},
 94 |   an additional \code{intronic_part} metadata column is added that
 95 |   indicates the rank of each intronic part within all the intronic parts
 96 |   linked to the same gene.
 97 | 
 98 |   \code{tidyTranscripts} returns a \link[GenomicRanges]{GRanges} object
 99 |   with 1 range per transcript and with metadata columns \code{tx_id},
100 |   \code{tx_name}, and \code{gene_id}.
101 | 
102 |   \code{tidyExons} returns a \link[GenomicRanges]{GRanges} object
103 |   with 1 range per exon and with metadata columns \code{tx_id},
104 |   \code{tx_name}, \code{gene_id}, \code{exon_id}, \code{exon_name},
105 |   and \code{exon_rank}.
106 | 
107 |   \code{tidyIntrons} returns a \link[GenomicRanges]{GRanges} object
108 |   with 1 range per intron and with metadata columns \code{tx_id},
109 |   \code{tx_name}, and \code{gene_id}.
110 | }
111 | 
112 | \author{Hervé Pagès}
113 | 
114 | \seealso{
115 |   \itemize{
116 |     \item \code{\link[IRanges]{disjoin}} in the \pkg{IRanges} package.
117 | 
118 |     \item \code{\link{transcripts}}, \code{\link{transcriptsBy}},
119 |           and \code{\link{transcriptsByOverlaps}}, for extracting
120 |           genomic feature locations from a \link{TxDb}-like object.
121 | 
122 |     \item \code{\link{transcriptLengths}} for extracting the transcript
123 |           lengths (and other metrics) from a \link{TxDb} object.
124 | 
125 |     \item \code{\link{extendExonsIntoIntrons}} for extending exons into
126 |           their adjacent introns.
127 | 
128 |     \item \code{\link{extractTranscriptSeqs}} for extracting transcript
129 |           (or CDS) sequences from chromosome sequences.
130 | 
131 |     \item \code{\link{coverageByTranscript}} for computing coverage by
132 |           transcript (or CDS) of a set of ranges.
133 | 
134 |     \item The \link{TxDb} class.
135 |   }
136 | }
137 | 
138 | \examples{
139 | library(TxDb.Hsapiens.UCSC.hg19.knownGene)
140 | txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
141 | 
142 | ## ---------------------------------------------------------------------
143 | ## exonicParts()
144 | ## ---------------------------------------------------------------------
145 | 
146 | exonic_parts1 <- exonicParts(txdb)
147 | exonic_parts1
148 | 
149 | ## Mapping from exonic parts to genes is many-to-many:
150 | gene_id1 <- mcols(exonic_parts1)$gene_id
151 | gene_id1  # CharacterList object
152 | table(lengths(gene_id1))
153 | ## The number of known genes a Human exonic part can be linked to
154 | ## varies from 0 to 22!
155 | 
156 | exonic_parts2 <- exonicParts(txdb, linked.to.single.gene.only=TRUE)
157 | exonic_parts2
158 | 
159 | ## Mapping from exonic parts to genes now is many-to-one:
160 | gene_id2 <- mcols(exonic_parts2)$gene_id
161 | gene_id2[1:20]  # character vector
162 | 
163 | ## Select exonic parts for a given gene:
164 | exonic_parts2[gene_id2 \%in\% "643837"]
165 | 
166 | ## Sanity checks:
167 | stopifnot(isDisjoint(exonic_parts1), isStrictlySorted(exonic_parts1))
168 | stopifnot(isDisjoint(exonic_parts2), isStrictlySorted(exonic_parts2))
169 | stopifnot(all(exonic_parts2 \%within\% reduce(exonic_parts1)))
170 | stopifnot(identical(
171 |     lengths(gene_id1) == 1L,
172 |     exonic_parts1 \%within\% exonic_parts2
173 | ))
174 | 
175 | ## ---------------------------------------------------------------------
176 | ## intronicParts()
177 | ## ---------------------------------------------------------------------
178 | 
179 | intronic_parts1 <- intronicParts(txdb)
180 | intronic_parts1
181 | 
182 | ## Mapping from intronic parts to genes is many-to-many:
183 | mcols(intronic_parts1)$gene_id
184 | table(lengths(mcols(intronic_parts1)$gene_id))
185 | ## A Human intronic part can be linked to 0 to 22 known genes!
186 | 
187 | intronic_parts2 <- intronicParts(txdb, linked.to.single.gene.only=TRUE)
188 | intronic_parts2
189 | 
190 | ## Mapping from intronic parts to genes now is many-to-one:
191 | class(mcols(intronic_parts2)$gene_id)  # character vector
192 | 
193 | ## Sanity checks:
194 | stopifnot(isDisjoint(intronic_parts1), isStrictlySorted(intronic_parts1))
195 | stopifnot(isDisjoint(intronic_parts2), isStrictlySorted(intronic_parts2))
196 | stopifnot(all(intronic_parts2 \%within\% reduce(intronic_parts1)))
197 | stopifnot(identical(
198 |     lengths(mcols(intronic_parts1)$gene_id) == 1L,
199 |     intronic_parts1 \%within\% intronic_parts2
200 | ))
201 | 
202 | ## ---------------------------------------------------------------------
203 | ## Helper functions
204 | ## ---------------------------------------------------------------------
205 | 
206 | tidyTranscripts(txdb)                      # Ordered by 'tx_id'.
207 | tidyTranscripts(txdb, drop.geneless=TRUE)  # Ordered first by 'gene_id',
208 |                                            # then by 'tx_id'.
209 | 
210 | tidyExons(txdb)                            # Ordered first by 'tx_id',
211 |                                            # then by 'exon_rank'.
212 | tidyExons(txdb, drop.geneless=TRUE)        # Ordered first by 'gene_id',
213 |                                            # then by 'tx_id',
214 |                                            # then by 'exon_rank'.
215 | 
216 | tidyIntrons(txdb)                          # Ordered by 'tx_id'.
217 | tidyIntrons(txdb, drop.geneless=TRUE)      # Ordered first by 'gene_id',
218 |                                            # then by 'tx_id'.
219 | }
220 | 
221 | \keyword{manip}
222 | 


--------------------------------------------------------------------------------
/man/extendExonsIntoIntrons.Rd:
--------------------------------------------------------------------------------
  1 | \name{extendExonsIntoIntrons}
  2 | 
  3 | \alias{extendExonsIntoIntrons}
  4 | 
  5 | \title{
  6 |   Extend exons by a given number of bases into their adjacent introns
  7 | }
  8 | 
  9 | \description{
 10 |   \code{extendExonsIntoIntrons} extends the supplied exons by a given
 11 |   number of bases into their adjacent introns.
 12 | }
 13 | 
 14 | \usage{
 15 | extendExonsIntoIntrons(ex_by_tx, extent=2)
 16 | }
 17 | 
 18 | \arguments{
 19 |   \item{ex_by_tx}{
 20 |     A \link[GenomicRanges]{GRangesList} object containing exons grouped
 21 |     by transcript. This must be an object as returned by
 22 |     \code{\link{exonsBy}(txdb, by="tx")}, that is:
 23 |     \itemize{
 24 |       \item each list element in \code{ex_by_tx} must be a
 25 |             \link[GenomicRanges]{GRanges} object representing the
 26 |             exons of a given transcript;
 27 |       \item the exons in each list element must be ordered by ascending
 28 |             rank with respect to their transcript.
 29 |     }
 30 |   }
 31 |   \item{extent}{
 32 |     Size of the extent in number of bases. 2 by default.
 33 | 
 34 |     The first exon in a transcript will be extended by that amount on
 35 |     its 3' side only. The last exon in a transcript will be extended by
 36 |     that amount on its 5' side only. All other exons (i.e. intermediate
 37 |     exons) will be extended by that amount on \emph{each} side.
 38 | 
 39 |     Note that exons that belong to a single-exon transcript don't get
 40 |     extended.
 41 | 
 42 |     The default value of 2 corresponds to inclusion of the donor/acceptor
 43 |     intronic regions (typically GT/AG).
 44 |   }
 45 | }
 46 | 
 47 | \value{
 48 |   A copy of \link[GenomicRanges]{GRangesList} object \code{ex_by_tx}
 49 |   where the original exon ranges have been extended.
 50 | 
 51 |   Names and metadata columns on \code{ex_by_tx} are propagated to the
 52 |   result.
 53 | }
 54 | 
 55 | \author{Hervé Pagès}
 56 | 
 57 | \seealso{
 58 |   \itemize{
 59 |     \item \code{\link{transcripts}}, \code{\link{transcriptsBy}},
 60 |           and \code{\link{transcriptsByOverlaps}}, for extracting
 61 |           genomic feature locations from a \link{TxDb}-like object.
 62 | 
 63 |     \item \code{\link{exonicParts}} and \code{\link{intronicParts}} for
 64 |           extracting non-overlapping exonic or intronic parts from a
 65 |           TxDb-like object.
 66 | 
 67 |     \item \code{\link{extractTranscriptSeqs}} for extracting transcript
 68 |           (or CDS) sequences from chromosome sequences.
 69 | 
 70 |     \item The \link{TxDb} class.
 71 |   }
 72 | }
 73 | 
 74 | \examples{
 75 | ## With toy transcripts:
 76 | ex_by_tx <- GRangesList(
 77 |     TX1="chr1:10-20:+",
 78 |     TX2=c("chr1:10-20:+", "chr1:50-75:+"),
 79 |     TX3=c("chr1:10-20:+", "chr1:50-75:+", "chr1:100-120:+"),
 80 |     TX4="chr1:10-20:-",
 81 |     TX5=c("chr1:10-20:-", "chr1:50-75:-"),
 82 |     TX6=c("chr1:10-20:-", "chr1:50-75:-", "chr1:100-120:-")
 83 | )
 84 | 
 85 | extended <- extendExonsIntoIntrons(ex_by_tx, extent=2)
 86 | extended[1:3]
 87 | extended[4:6]
 88 | 
 89 | ## With real-world transcripts:
 90 | library(TxDb.Celegans.UCSC.ce11.ensGene)
 91 | txdb <- TxDb.Celegans.UCSC.ce11.ensGene
 92 | ex_by_tx <- exonsBy(txdb, by="tx")
 93 | ex_by_tx
 94 | 
 95 | extendExonsIntoIntrons(ex_by_tx, extent=2)
 96 | 
 97 | ## Sanity check:
 98 | stopifnot(identical(extendExonsIntoIntrons(ex_by_tx, extent=0), ex_by_tx))
 99 | }
100 | 
101 | \keyword{manip}
102 | 


--------------------------------------------------------------------------------
/man/extractUpstreamSeqs.Rd:
--------------------------------------------------------------------------------
  1 | \name{extractUpstreamSeqs}
  2 | 
  3 | \alias{extractUpstreamSeqs}
  4 | \alias{extractUpstreamSeqs,GenomicRanges-method}
  5 | \alias{extractUpstreamSeqs,TxDb-method}
  6 | \alias{extractUpstreamSeqs,GRangesList-method}
  7 | 
  8 | 
  9 | \title{Extract sequences upstream of a set of genes or transcripts}
 10 | 
 11 | \description{
 12 |   \code{extractUpstreamSeqs} is a generic function for extracting
 13 |   sequences upstream of a supplied set of genes or transcripts.
 14 | }
 15 | 
 16 | \usage{
 17 | extractUpstreamSeqs(x, genes, width=1000, ...)
 18 | 
 19 | ## Dispatch is on the 2nd argument!
 20 | 
 21 | \S4method{extractUpstreamSeqs}{GenomicRanges}(x, genes, width=1000)
 22 | 
 23 | \S4method{extractUpstreamSeqs}{TxDb}(x, genes, width=1000, exclude.seqlevels=NULL)
 24 | }
 25 | 
 26 | \arguments{
 27 |   \item{x}{
 28 |     An object containing the chromosome sequences from which to extract the
 29 |     upstream sequences. It can be a \link[BSgenome]{BSgenome},
 30 |     \link[rtracklayer]{TwoBitFile}, or \link[Rsamtools]{FaFile} object,
 31 |     or any \emph{genome sequence container}.
 32 |     More formally, \code{x} must be an object for which
 33 |     \code{\link[GenomeInfoDb]{seqinfo}} and \code{\link[Biostrings]{getSeq}}
 34 |     are defined.
 35 |   }
 36 |   \item{genes}{
 37 |     An object containing the locations (i.e. chromosome name, start, end, and
 38 |     strand) of the genes or transcripts with respect to the reference genome.
 39 |     Only \link[GenomicRanges]{GenomicRanges} and \link{TxDb} objects
 40 |     are supported at the moment. If the latter, the gene locations are obtained
 41 |     by calling the \code{\link{genes}} function on the \link{TxDb}
 42 |     object internally.
 43 |   }
 44 |   \item{width}{
 45 |     How many bases to extract upstream of each TSS (transcription start site).
 46 |   }
 47 |   \item{...}{
 48 |     Additional arguments, for use in specific methods.
 49 |   }
 50 |   \item{exclude.seqlevels}{
 51 |     A character vector containing the chromosome names (a.k.a. sequence levels)
 52 |     to exclude when the genes are obtained from a \link{TxDb} object.
 53 |   }
 54 | }
 55 | 
 56 | \value{
 57 |   A \link[Biostrings]{DNAStringSet} object containing one upstream sequence
 58 |   per gene (or per transcript if \code{genes} is a
 59 |   \link[GenomicRanges]{GenomicRanges} object containing transcript ranges).
 60 | 
 61 |   More precisely, if \code{genes} is a \link[GenomicRanges]{GenomicRanges}
 62 |   object, the returned object is \emph{parallel} to it, that is, the i-th
 63 |   element in the returned object is the upstream sequence corresponding to
 64 |   the i-th gene (or transcript) in \code{genes}. Also the names on the
 65 |   \link[GenomicRanges]{GenomicRanges} object are propagated to the returned
 66 |   object.
 67 | 
 68 |   If \code{genes} is a \link{TxDb} object, the names on the returned
 69 |   object are the gene IDs found in the \link{TxDb} object. To see the
 70 |   type of gene IDs (i.e. Entrez gene ID or Ensembl gene ID or ...), you can
 71 |   display \code{genes} with \code{show(genes)}.
 72 | 
 73 |   In addition, the returned object has the following metadata columns
 74 |   (accessible with \code{\link{mcols}}) that provide some information about
 75 |   the gene (or transcript) corresponding to each upstream sequence:
 76 |   \itemize{
 77 |     \item \code{gene_seqnames}: the chromosome name of the gene (or
 78 |           transcript);
 79 |     \item \code{gene_strand}: the strand of the gene (or transcript);
 80 |     \item \code{gene_TSS}: the transcription start site of the gene (or
 81 |           transcript).
 82 |   }
 83 | }
 84 | 
 85 | \note{
 86 |   IMPORTANT: Always make sure to use a TxDb package (or \link{TxDb}
 87 |   object) that contains a gene model compatible with the \emph{genome sequence
 88 |   container} \code{x}, that is, a gene model based on the exact same reference
 89 |   genome as \code{x}.
 90 | 
 91 |   See
 92 |   \url{http://bioconductor.org/packages/release/BiocViews.html#___TxDb}
 93 |   for the list of TxDb packages available in the current release of
 94 |   Bioconductor.
 95 |   Note that you can make your own custom \link{TxDb} object from
 96 |   various annotation resources by using one of the \code{makeTxDbFrom*()}
 97 |   functions defined in the \pkg{txdbmaker} package and listed in
 98 |   the "See also" section below.
 99 | }
100 | 
101 | \author{Hervé Pagès}
102 | 
103 | \seealso{
104 |   \itemize{
105 |     \item \code{\link[txdbmaker]{makeTxDbFromUCSC}},
106 |           \code{\link[txdbmaker]{makeTxDbFromBiomart}},
107 |           and \code{\link[txdbmaker]{makeTxDbFromEnsembl}} in
108 |           the \pkg{txdbmaker} package for making a \link{TxDb}
109 |           object from online resources.
110 | 
111 |     \item \code{\link[txdbmaker]{makeTxDbFromGRanges}} and
112 |           \code{\link[txdbmaker]{makeTxDbFromGFF}} in the \pkg{txdbmaker}
113 |           package for making a \link{TxDb} object from a
114 |           \link[GenomicRanges]{GRanges} object, or from a GFF or GTF file.
115 | 
116 |     \item The \code{\link[BSgenome]{available.genomes}} function in the
117 |           \pkg{BSgenome} package for checking avaibility of BSgenome
118 |           data packages (and installing the desired one).
119 | 
120 |     \item The \link[BSgenome]{BSgenome}, \link[rtracklayer]{TwoBitFile}, and
121 |           \link[Rsamtools]{FaFile} classes, defined and documented
122 |           in the \pkg{BSgenome}, \pkg{rtracklayer}, and \pkg{Rsamtools}
123 |           packages, respectively.
124 | 
125 |     \item The \link{TxDb} class.
126 | 
127 |     \item The \code{\link{genes}} function for extracting gene ranges from
128 |           a \link{TxDb} object.
129 | 
130 |     \item The \link[GenomicRanges]{GenomicRanges} class defined and documented
131 |           in the \pkg{GenomicRanges} package.
132 | 
133 |     \item The \link[Biostrings]{DNAStringSet} class defined and documented
134 |           in the \pkg{Biostrings} package.
135 | 
136 |     \item The \code{\link[GenomeInfoDb]{seqinfo}} getter defined and documented
137 |           in the \pkg{GenomeInfoDb} package.
138 | 
139 |     \item The \code{\link[Biostrings]{getSeq}} function for extracting
140 |           subsequences from a sequence container.
141 |   }
142 | }
143 | 
144 | \examples{
145 | ## Load a genome:
146 | library(BSgenome.Dmelanogaster.UCSC.dm3)
147 | genome <- BSgenome.Dmelanogaster.UCSC.dm3
148 | genome
149 | 
150 | ## Use a TxDb object:
151 | library(TxDb.Dmelanogaster.UCSC.dm3.ensGene)
152 | txdb <- TxDb.Dmelanogaster.UCSC.dm3.ensGene
153 | txdb  # contains Ensembl gene IDs
154 | 
155 | ## Because the chrU and chrUextra sequences are made of concatenated
156 | ## scaffolds (see https://genome.ucsc.edu/cgi-bin/hgGateway?db=dm3),
157 | ## extracting the upstream sequences for genes located on these
158 | ## scaffolds is not reliable. So we exclude them:
159 | exclude <- c("chrU", "chrUextra")
160 | up1000seqs <- extractUpstreamSeqs(genome, txdb, width=1000,
161 |                                   exclude.seqlevels=exclude)
162 | up1000seqs  # the names are Ensembl gene IDs
163 | mcols(up1000seqs)
164 | 
165 | ## Upstream sequences for genes close to the chromosome bounds can be
166 | ## shorter than 1000 (note that this does not happen for circular
167 | ## chromosomes like chrM):
168 | table(width(up1000seqs))
169 | mcols(up1000seqs)[width(up1000seqs) != 1000, ]
170 | }
171 | 
172 | \keyword{manip}
173 | 


--------------------------------------------------------------------------------
/man/features.Rd:
--------------------------------------------------------------------------------
 1 | \name{features}
 2 | 
 3 | \alias{features}
 4 | \alias{features,FeatureDb-method}
 5 | 
 6 | \title{
 7 |   Extract simple features from a FeatureDb object
 8 | }
 9 | 
10 | \description{
11 |   WARNING: The FeatureDb/makeFeatureDbFromUCSC/features code base is
12 |   no longer actively maintained and FeatureDb-related functionalities
13 |   might get deprecated in the near future. Please use
14 |   \code{\link{makeFeatureDbFromUCSC}} for a convenient way to
15 |   import transcript annotations from UCSC online resources into
16 |   Bioconductor.
17 | 
18 |   Generic function to extract genomic features from a FeatureDb object.
19 | }
20 | 
21 | \usage{
22 | features(x)
23 | \S4method{features}{FeatureDb}(x)
24 | }
25 | 
26 | \arguments{  
27 |   \item{x}{
28 |     A \link{FeatureDb} object.
29 |   }
30 | }
31 | 
32 | 
33 | \value{ a GRanges object }
34 | 
35 | \author{
36 |   M. Carlson
37 | }
38 | 
39 | \seealso{
40 |   \link{FeatureDb}
41 | }
42 | 
43 | \examples{
44 |   fdb <- loadDb(system.file("extdata", "FeatureDb.sqlite",
45 |                                    package="GenomicFeatures"))
46 |   features(fdb)
47 | }
48 | 


--------------------------------------------------------------------------------
/man/getPromoterSeq-methods.Rd:
--------------------------------------------------------------------------------
  1 | \name{getPromoterSeq}
  2 | 
  3 | \alias{getPromoterSeq}
  4 | \alias{getTerminatorSeq}
  5 | \alias{getPromoterSeq,GRanges-method}
  6 | \alias{getTerminatorSeq,GRanges-method}
  7 | \alias{getPromoterSeq,GRangesList-method}
  8 | \alias{getTerminatorSeq,GRangesList-method}
  9 | 
 10 | \title{Get gene promoter or terminator sequences}
 11 | 
 12 | \description{
 13 |   Extract promoter or terminator sequences for the genes or transcripts
 14 |   specified in the query (a\link{GRanges} or \link{GRangesList} object)
 15 |   from a \link[BSgenome]{BSgenome} or \link[Rsamtools]{FaFile} object.
 16 | }
 17 | 
 18 | \usage{
 19 | \S4method{getPromoterSeq}{GRanges}(query, subject, upstream=2000, downstream=200)
 20 | \S4method{getTerminatorSeq}{GRanges}(query, subject, upstream=2000, downstream=200)
 21 | 
 22 | \S4method{getPromoterSeq}{GRangesList}(query, subject, upstream=2000, downstream=200)
 23 | \S4method{getTerminatorSeq}{GRangesList}(query, subject, upstream=2000, downstream=200)
 24 | }
 25 | 
 26 | \arguments{
 27 |   \item{query}{A \link[GenomicRanges]{GRanges} or
 28 |     \link[GenomicRanges]{GRangesList} object containing genes grouped by
 29 |     transcript.
 30 |   }
 31 |   \item{subject}{A \link[BSgenome]{BSgenome} or \link[Rsamtools]{FaFile} object from which
 32 |     the sequences will be taken.}
 33 |   \item{upstream}{The number of DNA bases to include upstream of the TSS (transcription start site)}
 34 |   \item{downstream}{The number of DNA bases to include downstream of the TSS (transcription start site)}
 35 | }
 36 | 
 37 | \details{
 38 |   \code{getPromoterSeq} and \code{getTerminatorSeq} are generic functions
 39 |   dispatching on query, which is either a GRanges or a GRangesList.
 40 |   They are convenience wrappers for the \code{promoters}, \code{terminators},
 41 |   and \code{getSeq} functions.
 42 |   The purpose is to allow sequence extraction from either a
 43 |   \link[BSgenome]{BSgenome} or \link[Rsamtools]{FaFile} object.
 44 | 
 45 |   Default values for \code{upstream} and \code{downstream} were chosen based
 46 |   on our current understanding of gene regulation. On average, promoter
 47 |   regions in the mammalian genome are 5000 bp upstream and downstream of the
 48 |   transcription start site.
 49 | }
 50 | 
 51 | \value{
 52 |   A \link[Biostrings]{DNAStringSet} or
 53 |   \link[Biostrings]{DNAStringSetList} instance corresponding to the
 54 |   GRanges or GRangesList supplied in the query.
 55 | }
 56 | 
 57 | \author{Paul Shannon}
 58 | 
 59 | \seealso{
 60 |   \itemize{
 61 |     \item The \code{\link[GenomicRanges]{promoters}} man page in the
 62 |           \pkg{GenomicRanges} package for the \code{promoters()} and
 63 |           \code{terminators()} methods for \link[GenomicRanges]{GenomicRanges}
 64 |           objects.
 65 | 
 66 |     \item \code{\link[Biostrings]{getSeq}} in the \pkg{Biostrings}
 67 |           package for extracting a set of sequences from a sequence
 68 |           container like a \link[BSgenome]{BSgenome} or
 69 |           \link[Rsamtools]{FaFile} object.
 70 |   }
 71 | }
 72 | 
 73 | \examples{
 74 | library(TxDb.Hsapiens.UCSC.hg19.knownGene)
 75 | library(BSgenome.Hsapiens.UCSC.hg19)
 76 | 
 77 | 
 78 | ## A GRangesList object describing all the known Human transcripts grouped
 79 | ## by gene:
 80 | txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
 81 | tx_by_gene <- transcriptsBy(txdb, by="gene")
 82 | 
 83 | e2f3 <- "1871"  # entrez geneID for a cell cycle control transcription
 84 |                 # factor, chr6 on the plus strand
 85 | 
 86 | ## A GRanges object describing the three transcripts for gene 1871:
 87 | e2f3_tx <- tx_by_gene[[e2f3]]
 88 | 
 89 | ## Promoter sequences for gene 1871:
 90 | e2f3_promoter_seqs <- getPromoterSeq(e2f3_tx, Hsapiens,
 91 |                                      upstream=40, downstream=15)
 92 | e2f3_promoter_seqs
 93 | 
 94 | mcols(e2f3_promoter_seqs)
 95 | 
 96 | ## Terminator sequences for gene 1871:
 97 | e2f3_terminator_seqs <- getTerminatorSeq(e2f3_tx, Hsapiens,
 98 |                                          upstream=25, downstream=10)
 99 | 
100 | e2f3_terminator_seqs
101 | 
102 | mcols(e2f3_terminator_seqs)  # same as 'mcols(e2f3_promoter_seqs)'
103 | 
104 | ## All Human promoter sequences grouped by gene:
105 | getPromoterSeq(tx_by_gene, Hsapiens, upstream=6, downstream=4)
106 | }
107 | 
108 | \keyword{methods}
109 | \keyword{manip}
110 | 


--------------------------------------------------------------------------------
/man/id2name.Rd:
--------------------------------------------------------------------------------
 1 | \name{id2name}
 2 | 
 3 | \alias{id2name}
 4 | 
 5 | \title{
 6 |   Map internal ids to external names for a given feature type
 7 | }
 8 | \description{
 9 |   Utility function for retrieving the mapping from the internal ids
10 |   to the external names of a given feature type.
11 | }
12 | \usage{
13 | id2name(txdb, feature.type=c("tx", "exon", "cds"))
14 | }
15 | \arguments{
16 |   \item{txdb}{A \link{TxDb} object.}
17 |   \item{feature.type}{The feature type for which the mapping must be
18 |     retrieved.}
19 | }
20 | \details{
21 |   Transcripts, exons and CDS parts in a \link{TxDb} object are
22 |   stored in seperate tables where the primary key is an integer
23 |   called \emph{feature internal id}. This id is stored in the
24 |   \code{"tx_id"} column for transcripts, in the \code{"exon_id"}
25 |   column for exons, and in the \code{"cds_id"} column for CDS parts.
26 |   Unlike other commonly used ids like Entrez Gene IDs or Ensembl IDs,
27 |   this internal id was generated at the time the \link{TxDb}
28 |   object was created and has no meaning outside the scope of this object.
29 | 
30 |   The \code{id2name} function can be used to translate this internal
31 |   id into a more informative id or name called \emph{feature external
32 |   name}. This name is stored in the \code{"tx_name"} column for
33 |   transcripts, in the \code{"exon_name"} column for exons, and in
34 |   the \code{"cds_name"} column for CDS parts.
35 |   
36 |   Note that, unlike the feature internal id, the feature external
37 |   name is not guaranteed to be unique or even defined (the column
38 |   can contain \code{NA}s).
39 | }
40 | 
41 | \value{
42 |   A named character vector where the names are the internal ids and the
43 |   values the external names.
44 | }
45 | 
46 | \author{Hervé Pagès}
47 | 
48 | \seealso{
49 |   \itemize{
50 |     \item \code{\link{transcripts}}, \code{\link{transcriptsBy}},
51 |           and \code{\link{transcriptsByOverlaps}}, for how to extract
52 |           genomic features from a \link{TxDb} object.
53 |     \item The \link{TxDb} class.
54 |   }
55 | }
56 | \examples{
57 | txdb1_file <- system.file("extdata", "hg19_knownGene_sample.sqlite",
58 |                           package="GenomicFeatures")
59 | txdb1 <- loadDb(txdb1_file)
60 | id2name(txdb1, feature.type="tx")[1:4]
61 | id2name(txdb1, feature.type="exon")[1:4]
62 | id2name(txdb1, feature.type="cds")[1:4]
63 | 
64 | txdb2_file <- system.file("extdata", "Biomart_Ensembl_sample.sqlite",
65 |                           package="GenomicFeatures")
66 | txdb2 <- loadDb(txdb2_file)
67 | id2name(txdb2, feature.type="tx")[1:4]
68 | id2name(txdb2, feature.type="exon")[1:4]
69 | id2name(txdb2, feature.type="cds")[1:4]
70 | }
71 | 


--------------------------------------------------------------------------------
/man/makeFeatureDbFromUCSC.Rd:
--------------------------------------------------------------------------------
 1 | \name{makeFeatureDbFromUCSC}
 2 | 
 3 | \alias{supportedUCSCFeatureDbTracks}
 4 | \alias{supportedUCSCFeatureDbTables} 
 5 | \alias{UCSCFeatureDbTableSchema}
 6 | \alias{makeFeatureDbFromUCSC}
 7 | 
 8 | \title{
 9 |   [Moved to txdbmaker] Make a FeatureDb object from annotations available
10 |   at the UCSC Genome Browser
11 | }
12 | 
13 | \description{
14 |   IMPORTANT NOTE: Starting with BioC 3.19, functions
15 |   \code{supportedUCSCFeatureDbTracks()}, \code{supportedUCSCFeatureDbTables()},
16 |   \code{UCSCFeatureDbTableSchema()}, and \code{makeFeatureDbFromUCSC()} are
17 |   defined in the \pkg{txdbmaker} package.
18 | }
19 | 
20 | \seealso{
21 |   \code{txdbmaker::\link[txdbmaker]{supportedUCSCFeatureDbTracks}},
22 |   \code{txdbmaker::\link[txdbmaker]{supportedUCSCFeatureDbTables}},
23 |   \code{txdbmaker::\link[txdbmaker]{UCSCFeatureDbTableSchema}},
24 |   and \code{txdbmaker::\link[txdbmaker]{makeFeatureDbFromUCSC}}
25 |   in the \pkg{txdbmaker} package.
26 | }
27 | 
28 | 


--------------------------------------------------------------------------------
/man/makeTxDb.Rd:
--------------------------------------------------------------------------------
 1 | \name{makeTxDb}
 2 | 
 3 | \alias{makeTxDb}
 4 | 
 5 | \title{
 6 |   [Moved to txdbmaker] Make a TxDb object from user supplied annotations
 7 | }
 8 | 
 9 | \description{
10 |   IMPORTANT NOTE: Starting with BioC 3.19, the \code{makeTxDb} function
11 |   is defined in the \pkg{txdbmaker} package.
12 | }
13 | 
14 | \seealso{
15 |   \code{txdbmaker::\link[txdbmaker]{makeTxDb}} in the \pkg{txdbmaker}
16 |   package.
17 | }
18 | 
19 | 


--------------------------------------------------------------------------------
/man/makeTxDbFromBiomart.Rd:
--------------------------------------------------------------------------------
 1 | \name{makeTxDbFromBiomart}
 2 | 
 3 | \alias{makeTxDbFromBiomart}
 4 | \alias{getChromInfoFromBiomart}
 5 | 
 6 | \title{
 7 |   [Moved to txdbmaker] Make a TxDb object from annotations available
 8 |   on a BioMart database
 9 | }
10 | 
11 | \description{
12 |   IMPORTANT NOTE: Starting with BioC 3.19, functions
13 |   \code{makeTxDbFromBiomart()} and \code{getChromInfoFromBiomart()}
14 |   are defined in the \pkg{txdbmaker} package.
15 | }
16 | 
17 | \seealso{
18 |   \code{txdbmaker::\link[txdbmaker]{makeTxDbFromBiomart}} and
19 |   \code{txdbmaker::\link[txdbmaker]{getChromInfoFromBiomart}} in
20 |   the \pkg{txdbmaker} package.
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/man/makeTxDbFromEnsembl.Rd:
--------------------------------------------------------------------------------
 1 | \name{makeTxDbFromEnsembl}
 2 | 
 3 | \alias{makeTxDbFromEnsembl}
 4 | 
 5 | \title{
 6 |   [Moved to txdbmaker] Make a TxDb object from an Ensembl database
 7 | }
 8 | 
 9 | \description{
10 |   IMPORTANT NOTE: Starting with BioC 3.19, the \code{makeTxDbFromEnsembl}
11 |   function is defined in the \pkg{txdbmaker} package.
12 | }
13 | 
14 | \seealso{
15 |   \code{txdbmaker::\link[txdbmaker]{makeTxDbFromEnsembl}} in the
16 |   \pkg{txdbmaker} package.
17 | }
18 | 
19 | 


--------------------------------------------------------------------------------
/man/makeTxDbFromGFF.Rd:
--------------------------------------------------------------------------------
 1 | \name{makeTxDbFromGFF}
 2 | 
 3 | \alias{makeTxDbFromGFF}
 4 | 
 5 | \title{
 6 |   [Moved to txdbmaker] Make a TxDb object from annotations available
 7 |   as a GFF3 or GTF file
 8 | }
 9 | 
10 | \description{
11 |   IMPORTANT NOTE: Starting with BioC 3.19, the \code{makeTxDbFromGFF}
12 |   function is defined in the \pkg{txdbmaker} package.
13 | }
14 | 
15 | \seealso{
16 |   \code{txdbmaker::\link[txdbmaker]{makeTxDbFromGFF}} in the \pkg{txdbmaker}
17 |   package.
18 | }
19 | 
20 | 


--------------------------------------------------------------------------------
/man/makeTxDbFromGRanges.Rd:
--------------------------------------------------------------------------------
 1 | \name{makeTxDbFromGRanges}
 2 | 
 3 | \alias{makeTxDbFromGRanges}
 4 | 
 5 | \title{[Moved to txdbmaker] Make a TxDb object from a GRanges object}
 6 | 
 7 | \description{
 8 |   IMPORTANT NOTE: Starting with BioC 3.19, the \code{makeTxDbFromGRanges}
 9 |   function is defined in the \pkg{txdbmaker} package.
10 | }
11 | 
12 | \seealso{
13 |   \code{txdbmaker::\link[txdbmaker]{makeTxDbFromGRanges}} in the
14 |   \pkg{txdbmaker} package.
15 | }
16 | 
17 | 


--------------------------------------------------------------------------------
/man/makeTxDbFromUCSC.Rd:
--------------------------------------------------------------------------------
 1 | \name{makeTxDbFromUCSC}
 2 | 
 3 | \alias{supportedUCSCtables}
 4 | \alias{browseUCSCtrack}
 5 | \alias{makeTxDbFromUCSC}
 6 | 
 7 | \title{
 8 |   [Moved to txdbmaker] Make a TxDb object from annotations available
 9 |   at the UCSC Genome Browser
10 | }
11 | 
12 | \description{
13 |   IMPORTANT NOTE: Starting with BioC 3.19, functions
14 |   \code{makeTxDbFromUCSC()}, \code{supportedUCSCtables()},
15 |   and \code{browseUCSCtrack()} are defined in the \pkg{txdbmaker}
16 |   package.
17 | }
18 | 
19 | \seealso{
20 |   \code{txdbmaker::\link[txdbmaker]{makeTxDbFromUCSC}},
21 |   \code{txdbmaker::\link[txdbmaker]{supportedUCSCtables}},
22 |   and \code{txdbmaker::\link[txdbmaker]{browseUCSCtrack}}
23 |   in the \pkg{txdbmaker} package.
24 | }
25 | 
26 | 


--------------------------------------------------------------------------------
/man/makeTxDbPackage.Rd:
--------------------------------------------------------------------------------
 1 | \name{makeTxDbPackage}
 2 | 
 3 | \alias{supportedMiRBaseBuildValues}
 4 | \alias{makePackageName}
 5 | \alias{makeTxDbPackage}
 6 | \alias{makeTxDbPackageFromUCSC}
 7 | \alias{makeFDbPackageFromUCSC}
 8 | \alias{makeTxDbPackageFromBiomart}
 9 | 
10 | \title{
11 |   [Moved to txdbmaker] Making a TxDb package from annotations available
12 |   at the UCSC Genome Browser, biomaRt or from another source.
13 | }
14 | 
15 | \description{
16 |   IMPORTANT NOTE: Starting with BioC 3.19, functions
17 |   \code{makeTxDbPackageFromUCSC()}, \code{makeFDbPackageFromUCSC()},
18 |   \code{makeTxDbPackageFromBiomart()}, \code{makeTxDbPackage()}
19 |   \code{supportedMiRBaseBuildValues()} and \code{makePackageName()}
20 |   are defined in the \pkg{txdbmaker} package.
21 | }
22 | 
23 | \seealso{
24 |   \code{txdbmaker::\link[txdbmaker]{makeTxDbPackageFromUCSC}},
25 |   \code{txdbmaker::\link[txdbmaker]{makeFDbPackageFromUCSC}},
26 |   \code{txdbmaker::\link[txdbmaker]{makeTxDbPackageFromBiomart}},
27 |   \code{txdbmaker::\link[txdbmaker]{makeTxDbPackage}},
28 |   \code{txdbmaker::\link[txdbmaker]{supportedMiRBaseBuildValues}},
29 |   and \code{txdbmaker::\link[txdbmaker]{makePackageName}}
30 |   in the \pkg{txdbmaker} package.
31 | }
32 | 
33 | 


--------------------------------------------------------------------------------
/man/mapIdsToRanges.Rd:
--------------------------------------------------------------------------------
 1 | \docType{methods}
 2 | \name{mapIdsToRanges}
 3 | \alias{mapIdsToRanges}
 4 | \alias{mapIdsToRanges,TxDb-method}
 5 | \title{Map IDs to Genomic Ranges}
 6 | \usage{
 7 | mapIdsToRanges(x, ...)
 8 | 
 9 | \S4method{mapIdsToRanges}{TxDb}(x, keys, type = c("cds", "exon", "tx",
10 |   "gene"), columns = NULL)
11 | }
12 | \arguments{
13 | \item{x}{Database to use for mapping}
14 | 
15 | \item{keys}{Values to lookup, passed to \code{\link{transcripts}} et. al.}
16 | 
17 | \item{type}{Types of feature to return}
18 | 
19 | \item{columns}{Additional metadata columns to include in the output}
20 | 
21 | \item{...}{Additional arguments passed to methods}
22 | }
23 | \value{
24 | \code{\link[GenomicRanges]{GRangesList}} corresponding to the keys
25 | }
26 | \description{
27 | Map IDs to Genomic Ranges
28 | }
29 | \section{Methods (by class)}{
30 | \itemize{
31 | \item \code{TxDb}: TxDb method
32 | }}
33 | \examples{
34 | library(txdbmaker)  # for makeTxDbFromGRanges()
35 | fl <- system.file(package = "GenomicFeatures", "extdata", "sample_ranges.rds")
36 | txdb <- makeTxDbFromGRanges(readRDS(fl))
37 | 
38 | keys <- list(tx_name = c("ENST00000371582", "ENST00000371588",
39 |     "ENST00000494752", "ENST00000614008", "ENST00000496771"))
40 | mapIdsToRanges(txdb, keys = keys, type = "tx")
41 | }
42 | 


--------------------------------------------------------------------------------
/man/mapRangesToIds.Rd:
--------------------------------------------------------------------------------
 1 | \docType{methods}
 2 | \name{mapRangesToIds}
 3 | \alias{mapRangesToIds}
 4 | \alias{mapRangesToIds,TxDb-method}
 5 | \title{Map Genomic Ranges to IDs}
 6 | \usage{
 7 | mapRangesToIds(x, ...)
 8 | 
 9 | \S4method{mapRangesToIds}{TxDb}(x, ranges, type = c("cds", "exon", "tx",
10 |   "gene"), columns = NULL, ...)
11 | }
12 | \arguments{
13 | \item{x}{Database to use for mapping}
14 | 
15 | \item{ranges}{range object used to subset}
16 | 
17 | \item{type}{of feature to return}
18 | 
19 | \item{columns}{additional metadata columns to include in the output.}
20 | 
21 | \item{...}{Additional arguments passed to
22 |   \code{\link[GenomicRanges]{findOverlaps}}}
23 | }
24 | \value{
25 | \code{\link[S4Vectors]{DataFrame}} of mcols from the database.
26 | }
27 | \description{
28 | Map Genomic Ranges to IDs
29 | }
30 | \section{Methods (by class)}{
31 | \itemize{
32 | \item \code{TxDb}: TxDb method
33 | }}
34 | \examples{
35 | library(txdbmaker)  # for makeTxDbFromGRanges()
36 | fl <- system.file(package = "GenomicFeatures", "extdata", "sample_ranges.rds")
37 | txdb <- makeTxDbFromGRanges(readRDS(fl))
38 | 
39 | keys <- list(tx_name = c("ENST00000371582", "ENST00000371588",
40 |     "ENST00000494752", "ENST00000614008", "ENST00000496771"))
41 | res <- mapIdsToRanges(txdb, keys = keys, type = "tx")
42 | mapRangesToIds(txdb, res, "tx")
43 | }
44 | 


--------------------------------------------------------------------------------
/man/nearest-methods.Rd:
--------------------------------------------------------------------------------
  1 | \name{nearest-methods}
  2 | 
  3 | \alias{nearest-methods}
  4 | 
  5 | \alias{distance,GenomicRanges,TxDb-method}
  6 | 
  7 | \title{Finding the nearest genomic range neighbor in a TxDb}
  8 | 
  9 | \description{
 10 |   A \code{distance()} method for TxDb objects.
 11 | }
 12 | 
 13 | \usage{
 14 | \S4method{distance}{GenomicRanges,TxDb}(x, y, ignore.strand=FALSE,
 15 |     ..., id, type=c("gene", "tx", "exon", "cds"))
 16 | }
 17 | 
 18 | \arguments{
 19 |   \item{x}{
 20 |     The query \link{GenomicRanges} instance.
 21 |   }
 22 |   \item{y}{
 23 |     A \link{TxDb} object. The \code{id} is used to extract
 24 |     ranges from the \link{TxDb} which are then used to compute the
 25 |     distance from \code{x}.
 26 |   }
 27 |   \item{id}{
 28 |     A \code{character} vector the same length as \code{x}.
 29 |     The \code{id} must be identifiers in the \link{TxDb} object.
 30 |     \code{type} indicates what type of identifier \code{id} is.
 31 |   }
 32 |   \item{type}{
 33 |     A \code{character(1)} describing the \code{id}.
 34 |     Must be one of \sQuote{gene}, \sQuote{tx}, \sQuote{exon} or
 35 |     \sQuote{cds}.
 36 |   }
 37 |   \item{ignore.strand}{
 38 |     A \code{logical} indicating if the strand of the ranges
 39 |     should be ignored. When \code{TRUE}, strand is set to \code{'+'}.
 40 |   }
 41 |   \item{...}{
 42 |     Additional arguments for methods.
 43 |   }
 44 | }
 45 | 
 46 | \details{
 47 |   This \code{distance()} method returns the distance for each range in \code{x}
 48 |   to the range extracted from the \link{TxDb} object \code{y}. Values in
 49 |   \code{id} are matched to one of \sQuote{gene_id}, \sQuote{tx_id},
 50 |   \sQuote{exon_id} or \sQuote{cds_id} identifiers in the \link{TxDb}
 51 |   and the corresponding ranges are extracted. The \code{type} argument
 52 |   specifies which identifier is represented in \code{id}. The extracted
 53 |   ranges are used in the distance calculation with the ranges in \code{x}.
 54 | 
 55 |   The method returns \code{NA} values when the genomic region defined
 56 |   by \code{id} cannot be collapsed into a single range (e.g.,
 57 |   when a gene spans multiple chromosomes) or if the \code{id}
 58 |   is not found in \code{y}.
 59 | 
 60 |   The behavior of \code{distance()} with respect to zero-width ranges
 61 |   has changed in Bioconductor 2.12. See the man page \code{?distance}
 62 |   in the \pkg{IRanges} for details.
 63 | }
 64 | 
 65 | \value{
 66 |   An integer vector of distances between the ranges in \code{x} and \code{y}.
 67 | }
 68 | 
 69 | \author{Valerie Obenchain <vobencha@fhcrc.org>}
 70 | 
 71 | \seealso{
 72 |   \itemize{
 73 |     \item \link[IRanges]{nearest-methods} in the \pkg{IRanges} package.
 74 |     \item \link[GenomicRanges]{nearest-methods} in the \pkg{GenomicRanges}
 75 |           package.
 76 |   }
 77 | }
 78 | 
 79 | \examples{
 80 | library(TxDb.Dmelanogaster.UCSC.dm3.ensGene)
 81 | txdb <- TxDb.Dmelanogaster.UCSC.dm3.ensGene
 82 | gr <- GRanges(c("chr2L", "chr2R"),
 83 |               IRanges(c(100000, 200000),  width=100))
 84 | distance(gr, txdb, id=c("FBgn0259717", "FBgn0261501"), type="gene")
 85 | distance(gr, txdb, id=c("10000", "23000"), type="cds")
 86 | 
 87 | ## The id's must be in the appropriate order with respect to 'x'.
 88 | distance(gr, txdb, id=c("4", "4097"), type="tx")
 89 | 
 90 | ## 'id' "4" is on chr2L and "4097" is on chr2R.
 91 | transcripts(txdb, filter=list(tx_id=c("4", "4097")))
 92 | 
 93 | ## If we reverse the 'id' the chromosomes are incompatable with gr.
 94 | distance(gr, txdb, id=c("4097", "4"), type="tx")
 95 | 
 96 | ## distance() compares each 'x' to the corresponding 'y'.
 97 | ## If an 'id' is not found in the TxDb 'y' will not
 98 | ## be the same lenth as 'x' and an error is thrown.
 99 | \dontrun{
100 | distance(gr, txdb, id=c("FBgn0000008", "INVALID"), type="gene") ## will fail
101 | }
102 | }
103 | 
104 | \keyword{utilities}
105 | 


--------------------------------------------------------------------------------
/man/proteinToGenome.Rd:
--------------------------------------------------------------------------------
  1 | \name{proteinToGenome}
  2 | 
  3 | \alias{proteinToGenome}
  4 | \alias{proteinToGenome,GRangesList-method}
  5 | \alias{proteinToGenome,ANY-method}
  6 | 
  7 | \title{Map protein-relative coordinates to genomic coordinates}
  8 | 
  9 | \description{
 10 |   \code{proteinToGenome} is a generic function for mapping
 11 |   ranges of protein-relative positions to the genome.
 12 | 
 13 |   NOTE: This man page is for the \code{proteinToGenome} S4 generic
 14 |   function and methods defined in the \pkg{GenomicFeatures} package,
 15 |   which are (loosely) modeled on the \code{\link[ensembldb]{proteinToGenome}}
 16 |   function from the \pkg{ensembldb} package.
 17 |   See \code{?ensembldb::\link[ensembldb]{proteinToGenome}} for the latter.
 18 | }
 19 | 
 20 | \usage{
 21 | ## S4 generic function:
 22 | proteinToGenome(x, db, ...)  # dispatch is on 2nd argument 'db'
 23 | 
 24 | \S4method{proteinToGenome}{ANY}(x, db)
 25 | 
 26 | \S4method{proteinToGenome}{GRangesList}(x, db)
 27 | }
 28 | 
 29 | \arguments{
 30 |   \item{x}{
 31 |     A named \link[IRanges]{IRanges} object (or derivative) containing ranges
 32 |     of \emph{protein-relative positions} (protein-relative positions are
 33 |     positions relative to a protein sequence).
 34 | 
 35 |     The names on \code{x} must be transcript names present in \code{db}.
 36 |     More precisely, for the default \code{proteinToGenome()} method,
 37 |     \code{names(x)} must be a subset of:
 38 |     \preformatted{  mcols(transcripts(db, columns="tx_name"))$tx_name
 39 |     }
 40 |     And for the method for \link[GenomicRanges]{GRangesList} objects,
 41 |     \code{names(x)} must be a subset of:
 42 |     \preformatted{  names(db)
 43 |     }
 44 |   }
 45 |   \item{db}{
 46 |     For the default \code{proteinToGenome()} method: A \link{TxDb}
 47 |     object or any object that supports \code{\link{transcripts}()}
 48 |     and \code{\link{cdsBy}()} (e.g. an \link[ensembldb]{EnsDb} object
 49 |     from the \pkg{ensembldb} package).
 50 | 
 51 |     For the method for \link[GenomicRanges]{GRangesList} objects:
 52 |     A named \link[GenomicRanges]{GRangesList} object (or derivative)
 53 |     where each list element is a \link[GenomicRanges]{GRanges} object
 54 |     representing a CDS (the ranges in the \link[GenomicRanges]{GRanges}
 55 |     object must represent the CDS parts ordered by ascending exon rank).
 56 |   }
 57 |   \item{...}{
 58 |     Further arguments to be passed to specific methods.
 59 |   }
 60 | }
 61 | 
 62 | \details{
 63 |   The \code{proteinToGenome()} method for \link[GenomicRanges]{GRangesList}
 64 |   objects is the workhorse behind the default method. Note that the latter
 65 |   is a thin wrapper around the former, which simply does the following:
 66 |   \enumerate{
 67 |     \item Use \code{\link{cdsBy}()} to extract the CDS parts from \code{db}.
 68 |           The CDS parts are returned in a \link[GenomicRanges]{GRangesList}
 69 |           object that has the names of the transcript on it (one transcript
 70 |           name per list element).
 71 |     \item Call \code{proteinToGenome()} on \code{x} and the
 72 |           \link[GenomicRanges]{GRangesList} object returned by
 73 |           \code{\link{cdsBy}()}.
 74 |   }
 75 | }
 76 | 
 77 | \value{
 78 |   A named \link[GenomicRanges]{GRangesList} object \emph{parallel} to
 79 |   \code{x} (the transcript names on \code{x} are propagated).
 80 |   The i-th list element in the returned object is the result of mapping
 81 |   the range of protein-relative positions \code{x[i]} to the genome.
 82 | 
 83 |   Note that a given range in \code{x} can only be mapped to the genome
 84 |   if the name on it is the name of a \emph{coding} transcript. If it's
 85 |   not (i.e. if it's the name of a \emph{non-coding} transcript), then
 86 |   an empty \link[GenomicRanges]{GRanges} object is placed in the returned
 87 |   object to indicate the impossible mapping, and a warning is issued.
 88 | 
 89 |   Otherwise, if a given range in \code{x} can be mapped to the
 90 |   genome, then the result of the mapping is represented by a
 91 |   non-empty \link[GenomicRanges]{GRanges} object.
 92 |   Note that this object represents the original CDS associated to
 93 |   \code{x}, trimmed on its 5' end or 3' end, or on both.
 94 |   Furthermore, this object will have the same metadata columns as the
 95 |   \link[GenomicRanges]{GRanges} object representing the original CDS,
 96 |   plus the 2 following ones:
 97 |   \itemize{
 98 |     \item \code{protein_start}: The protein-relative start of the mapping.
 99 |     \item \code{protein_end}: The protein-relative end of the mapping.
100 |   }
101 | }
102 | 
103 | \note{
104 |   Unlike \code{ensembldb::\link[ensembldb]{proteinToGenome}()} which
105 |   can work either with Ensembl protein IDs or Ensembl transcript IDs
106 |   on \code{x}, the default \code{proteinToGenome()} method described
107 |   above only accepts \emph{transcript names} on \code{x}.
108 | 
109 |   This means that, if the user is in possession of protein IDs, they
110 |   must first replace them with the corresponding transcript IDs (referred
111 |   to as \emph{transcript names} in the context of \link{TxDb} objects).
112 |   How to do this exactly depends on the origin of those IDs (UCSC,
113 |   Ensembl, GTF/GFF3 file, FlyBase, etc...)
114 | }
115 | 
116 | \author{H. Pagès, using \code{ensembldb::proteinToGenome()} for
117 |         inspiration and design.}
118 | 
119 | \seealso{
120 |   \itemize{
121 |     \item The \code{\link[ensembldb]{proteinToGenome}} function in the
122 |           \pkg{ensembldb} package, which the \code{proteinToGenome()}
123 |           generic and methods documented in this man page are (loosely)
124 |           modeled on.
125 | 
126 |     \item \link{TxDb} objects.
127 | 
128 |     \item \link[ensembldb]{EnsDb} objects (\link{TxDb}-like objects) in
129 |           the \pkg{ensembldb} package.
130 | 
131 |     \item \code{\link{transcripts}} for extracting transcripts from a
132 |           \link{TxDb}-like object.
133 | 
134 |     \item \code{\link{cdsBy}} for extracting CDS parts from a
135 |           \link{TxDb}-like object.
136 | 
137 |     \item \link[IRanges]{IRanges} objects in the \pkg{IRanges} package.
138 | 
139 |     \item \link[GenomicRanges]{GRanges} and \link[GenomicRanges]{GRangesList}
140 |           objects in the \pkg{GenomicRanges} package.
141 |   }
142 | }
143 | 
144 | \examples{
145 | ## ---------------------------------------------------------------------
146 | ## USING TOY CDS
147 | ## ---------------------------------------------------------------------
148 | 
149 | ## CDS1 has 2 CDS parts:
150 | CDS1 <- GRanges(c("chrX:11-60:+", "chrX:101-125:+"))
151 | 
152 | ## CDS2 has 3 CDS parts:
153 | CDS2 <- GRanges(c("chrY:201-230:-", "chrY:101-125:-", "chrY:11-60:-"))
154 | 
155 | ## Put them in a GRangesList object:
156 | cds_by_tx <- GRangesList(TX1=CDS1, TX2=CDS2)
157 | cds_by_tx
158 | 
159 | x1 <- IRanges(start=8, end=20, names="TX1")
160 | proteinToGenome(x1, cds_by_tx)
161 | 
162 | x2 <- IRanges(start=c(1, 18), end=c(25, 20), names=c("TX1", "TX1"))
163 | x2
164 | proteinToGenome(x2, cds_by_tx)
165 | 
166 | x3 <- IRanges(start=8, end=15, names="TX2")
167 | proteinToGenome(x3, cds_by_tx)
168 | 
169 | x4 <- c(x3, x2)
170 | x4
171 | proteinToGenome(x4, cds_by_tx)
172 | 
173 | ## ---------------------------------------------------------------------
174 | ## USING A TxDb OBJECT
175 | ## ---------------------------------------------------------------------
176 | library(TxDb.Dmelanogaster.UCSC.dm3.ensGene)
177 | txdb <- TxDb.Dmelanogaster.UCSC.dm3.ensGene
178 | 
179 | ## The first transcript (FBtr0309810) is non-coding:
180 | x <- IRanges(c(FBtr0309810="11-55", FBtr0306539="90-300"))
181 | res <- proteinToGenome(x, txdb)
182 | res
183 | }
184 | 
185 | \keyword{methods}
186 | \keyword{utilities}
187 | 


--------------------------------------------------------------------------------
/man/select-methods.Rd:
--------------------------------------------------------------------------------
  1 | \name{select-methods}
  2 | 
  3 | \alias{select-methods}
  4 | 
  5 | \alias{columns,TxDb-method}
  6 | \alias{keytypes,TxDb-method}
  7 | \alias{keys,TxDb-method}
  8 | \alias{select,TxDb-method}
  9 | 
 10 | \title{Using the "select" interface on TxDb objects}
 11 | 
 12 | \description{
 13 |   \code{select}, \code{columns} and \code{keys} can be used together to
 14 |   extract data from a \link{TxDb} object.
 15 | }
 16 | 
 17 | \details{
 18 |   In the code snippets below, \code{x} is a \link{TxDb} object.
 19 | 
 20 |   \describe{
 21 |     \item{\code{keytypes(x)}:}{
 22 |       allows the user to discover which keytypes can be passed in to
 23 |       \code{select} or \code{keys} and the \code{keytype} argument.
 24 |     }
 25 |     \item{\code{keys(x, keytype, pattern, column, fuzzy)}:}{ Return keys for
 26 |       the database contained in the \link{TxDb} object .
 27 | 
 28 |       The \code{keytype} argument specifies the kind of keys that will
 29 |       be returned. By default \code{keys} will return the "GENEID" keys
 30 |       for the database.
 31 | 
 32 |       If \code{keys} is used with \code{pattern}, it will pattern match
 33 |       on the \code{keytype}.
 34 | 
 35 |       But if the \code{column} argument is also provided along with the
 36 |       \code{pattern} argument, then \code{pattern} will be matched
 37 |       against the values in \code{column} instead.
 38 | 
 39 |       And if \code{keys} is called with \code{column} and no
 40 |       \code{pattern} argument, then it will return all keys that have
 41 |       corresponding values in the \code{column} argument.
 42 | 
 43 |       Thus, the behavior of \code{keys} all depends on how many arguments are
 44 |       specified.
 45 | 
 46 |       Use of the \code{fuzzy} argument will toggle fuzzy matching to
 47 |       TRUE or FALSE.  If \code{pattern} is not used, fuzzy is ignored.
 48 |     }
 49 |     \item{\code{columns(x)}:}{
 50 |       Show which kinds of data can be returned for the
 51 |       \link{TxDb} object.
 52 |     }
 53 |     \item{\code{select(x, keys, columns, keytype)}:}{
 54 |       When all the appropriate arguments are specified \code{select}
 55 |       will retrieve the matching data as a data.frame based on
 56 |       parameters for selected \code{keys} and \code{columns} and
 57 |       \code{keytype} arguments.
 58 |     }
 59 |   }
 60 | 
 61 | }
 62 | 
 63 | 
 64 | 
 65 | \author{Marc Carlson}
 66 | 
 67 | \seealso{
 68 |   \itemize{
 69 |     \item \link[AnnotationDbi]{AnnotationDb-class} for more descriptsion
 70 |     of methods \code{select},\code{keytypes},\code{keys} and \code{columns}.
 71 |     \item \code{\link{transcripts}}, \code{\link{transcriptsBy}},
 72 |           and \code{\link{transcriptsByOverlaps}}, for other ways to
 73 |           extract genomic features from a \link{TxDb} object.
 74 |     \item The \link{TxDb} class.
 75 |   }
 76 | }
 77 | 
 78 | \examples{
 79 | txdb_file <- system.file("extdata", "Biomart_Ensembl_sample.sqlite",
 80 |                          package="GenomicFeatures")
 81 | txdb <- loadDb(txdb_file)
 82 | txdb
 83 | 
 84 | ## find key types
 85 | keytypes(txdb)
 86 | 
 87 | ## list IDs that can be used to filter
 88 | head(keys(txdb, "GENEID"))
 89 | head(keys(txdb, "TXID"))
 90 | head(keys(txdb, "TXNAME"))
 91 | 
 92 | ## list columns that can be returned by select
 93 | columns(txdb)
 94 | 
 95 | ## call select
 96 | res <- select(txdb, head(keys(txdb, "GENEID")),
 97 |               columns=c("GENEID","TXNAME"),
 98 |               keytype="GENEID")
 99 | head(res)
100 | }
101 | 
102 | \keyword{methods}
103 | 


--------------------------------------------------------------------------------
/man/tRNAs.Rd:
--------------------------------------------------------------------------------
 1 | \name{tRNAs}
 2 | 
 3 | \alias{microRNAs}
 4 | \alias{tRNAs}
 5 | \alias{tRNAs,TxDb-method}
 6 | 
 7 | \title{
 8 |   Extract tRNA genomic ranges from an object
 9 | }
10 | 
11 | \description{
12 |   WARNING: The code base for \code{tRNAs()} is no longer actively
13 |   maintained and the function might get deprecated in the near future.
14 | 
15 |   The \code{tRNAs()} function extracts tRNA genomic ranges from a
16 |   \link{TxDb} object.
17 | }
18 | 
19 | \usage{
20 | tRNAs(x)
21 | }
22 | 
23 | \arguments{ 
24 |   \item{x}{
25 |     A \link{TxDb} object.
26 |   }
27 | }
28 | 
29 | \value{
30 |   A \link[GenomicRanges]{GRanges} object.
31 | }
32 | 
33 | \author{
34 |   M. Carlson
35 | }
36 | 
37 | \seealso{
38 |   \itemize{
39 |     \item \code{\link{transcripts}}, \code{\link{transcriptsBy}}, and
40 |           \code{\link{transcriptsByOverlaps}} for the core genomic features
41 |           extractors.
42 |     \item The \link{TxDb} class.
43 |   }
44 | }
45 | 
46 | \keyword{methods}
47 | 


--------------------------------------------------------------------------------
/man/transcriptLengths.Rd:
--------------------------------------------------------------------------------
  1 | \name{transcriptLengths}
  2 | 
  3 | \alias{transcriptLengths}
  4 | 
  5 | 
  6 | \title{Extract the transcript lengths (and other metrics) from a TxDb object}
  7 | 
  8 | \description{
  9 |   The \code{transcriptLengths} function extracts the transcript lengths from
 10 |   a \link{TxDb} object. It also returns the CDS and UTR lengths for each
 11 |   transcript if the user requested them.
 12 | }
 13 | 
 14 | \usage{
 15 | transcriptLengths(txdb, with.cds_len=FALSE,
 16 |                   with.utr5_len=FALSE, with.utr3_len=FALSE, ...)
 17 | }
 18 | 
 19 | \arguments{
 20 |   \item{txdb}{
 21 |     A \link{TxDb} object.
 22 |   }
 23 |   \item{with.cds_len, with.utr5_len, with.utr3_len}{
 24 |     \code{TRUE} or \code{FALSE}. Whether or not to also extract and return
 25 |      the CDS, 5' UTR, and 3' UTR lengths for each transcript.
 26 |   }
 27 |   \item{\dots}{
 28 |     Additional arguments used by \code{transcripts} and other accessor 
 29 |     functions.
 30 |   }
 31 | }
 32 | 
 33 | \details{
 34 |   All the lengths are counted in number of nucleotides.
 35 | 
 36 |   The length of a processed transcript is just the sum of the lengths of its
 37 |   exons. This should not be confounded with the length of the stretch of DNA
 38 |   transcribed into RNA (a.k.a. transcription unit), which can be obtained
 39 |   with \code{width(transcripts(txdb))}.
 40 | }
 41 | 
 42 | \value{
 43 |   A data frame with 1 row per transcript. The rows are guaranteed to be in
 44 |   the same order as the elements of the \link[GenomicRanges]{GRanges} object
 45 |   returned by \code{\link{transcripts}(txdb)}.
 46 |   The data frame has between 5 and 8 columns, depending on what the user
 47 |   requested via the \code{with.cds_len}, \code{with.utr5_len}, and
 48 |   \code{with.utr3_len} arguments.
 49 | 
 50 |   The first 3 columns are the same as the metadata columns of the object
 51 |   returned by
 52 | \preformatted{  transcripts(txdb, columns=c("tx_id", "tx_name", "gene_id"))
 53 | }
 54 |   that is:
 55 |   \itemize{
 56 |     \item \code{tx_id}: The internal transcript ID. This ID is unique within
 57 |           the scope of the \link{TxDb} object. It is not an official or public
 58 |           ID (like an Ensembl or FlyBase ID) or an Accession number, so it
 59 |           cannot be used to lookup the transcript in public data bases or in
 60 |           other \link{TxDb} objects. Furthermore, this ID could change when
 61 |           re-running the code that was used to make the \link{TxDb} object.
 62 |     \item \code{tx_name}: An official/public transcript name or ID that can
 63 |           be used to lookup the transcript in public data bases or in other
 64 |           \link{TxDb} objects. This column is not guaranteed to contain unique
 65 |           values and it can contain NAs.
 66 |     \item \code{gene_id}: The official/public ID of the gene that the
 67 |           transcript belongs to. Can be NA if the gene is unknown or if the
 68 |           transcript is not considered to belong to a gene.
 69 |   }
 70 | 
 71 |   The other columns are quantitative:
 72 |   \itemize{
 73 |     \item \code{nexon}: The number of exons in the transcript.
 74 |     \item \code{tx_len}: The length of the processed transcript.
 75 |     \item \code{cds_len}: [optional] The length of the CDS region of the
 76 |           processed transcript.
 77 |     \item \code{utr5_len}: [optional] The length of the 5' UTR region of
 78 |           the processed transcript.
 79 |     \item \code{utr3_len}: [optional] The length of the 3' UTR region of
 80 |           the processed transcript.
 81 |   }
 82 | }
 83 | 
 84 | \author{Hervé Pagès}
 85 | 
 86 | \seealso{
 87 |   \itemize{
 88 |     \item \code{\link{transcripts}}, \code{\link{transcriptsBy}},
 89 |           and \code{\link{transcriptsByOverlaps}}, for extracting
 90 |           genomic feature locations from a \link{TxDb}-like object.
 91 | 
 92 |     \item \code{\link{exonicParts}} and \code{\link{intronicParts}} for
 93 |           extracting non-overlapping exonic or intronic parts from a
 94 |           TxDb-like object.
 95 | 
 96 |     \item \code{\link{extractTranscriptSeqs}} for extracting transcript
 97 |           (or CDS) sequences from chromosome sequences.
 98 | 
 99 |     \item \code{\link{coverageByTranscript}} for computing coverage by
100 |           transcript (or CDS) of a set of ranges.
101 | 
102 |     \item \code{\link[txdbmaker]{makeTxDbFromUCSC}},
103 |           \code{\link[txdbmaker]{makeTxDbFromBiomart}},
104 |           and \code{\link[txdbmaker]{makeTxDbFromEnsembl}} in
105 |           the \pkg{txdbmaker} package for making a \link{TxDb}
106 |           object from online resources.
107 | 
108 |     \item \code{\link[txdbmaker]{makeTxDbFromGRanges}} and
109 |           \code{\link[txdbmaker]{makeTxDbFromGFF}} in the \pkg{txdbmaker}
110 |           package for making a \link{TxDb} object from a
111 |           \link[GenomicRanges]{GRanges} object, or from a GFF or GTF file.
112 | 
113 |     \item The \link{TxDb} class.
114 |   }
115 | }
116 | 
117 | \examples{
118 | library(TxDb.Dmelanogaster.UCSC.dm3.ensGene)
119 | txdb <- TxDb.Dmelanogaster.UCSC.dm3.ensGene
120 | dm3_txlens <- transcriptLengths(txdb)
121 | head(dm3_txlens)
122 | 
123 | dm3_txlens <- transcriptLengths(txdb, with.cds_len=TRUE,
124 |                                       with.utr5_len=TRUE,
125 |                                       with.utr3_len=TRUE)
126 | head(dm3_txlens)
127 | 
128 | ## When cds_len is 0 (non-coding transcript), utr5_len and utr3_len
129 | ## must also be 0:
130 | non_coding <- dm3_txlens[dm3_txlens$cds_len == 0, ]
131 | stopifnot(all(non_coding[6:8] == 0))
132 | 
133 | ## When cds_len is not 0 (coding transcript), cds_len + utr5_len +
134 | ## utr3_len must be equal to tx_len:
135 | coding <- dm3_txlens[dm3_txlens$cds_len != 0, ]
136 | stopifnot(all(rowSums(coding[6:8]) == coding[[5]]))
137 | 
138 | ## A sanity check:
139 | stopifnot(identical(dm3_txlens$tx_id, mcols(transcripts(txdb))$tx_id))
140 | }
141 | 
142 | \keyword{manip}
143 | 


--------------------------------------------------------------------------------
/man/transcriptLocs2refLocs.Rd:
--------------------------------------------------------------------------------
  1 | \name{transcriptLocs2refLocs}
  2 | 
  3 | \alias{transcriptWidths}
  4 | \alias{transcriptLocs2refLocs}
  5 | 
  6 | \title{Converting transcript-based locations into reference-based locations}
  7 | 
  8 | \description{
  9 |   \code{transcriptLocs2refLocs} converts transcript-based
 10 |   locations into reference-based (aka chromosome-based or genomic)
 11 |   locations.
 12 | 
 13 |   \code{transcriptWidths} computes the lengths of the transcripts
 14 |   (called the "widths" in this context) based on the boundaries
 15 |   of their exons.
 16 | }
 17 | 
 18 | \usage{
 19 | transcriptLocs2refLocs(tlocs,
 20 |         exonStarts=list(), exonEnds=list(), strand=character(0),
 21 |         decreasing.rank.on.minus.strand=FALSE, error.if.out.of.bounds=TRUE)
 22 | 
 23 | transcriptWidths(exonStarts=list(), exonEnds=list())
 24 | }
 25 | 
 26 | \arguments{
 27 |   \item{tlocs}{
 28 |     A list of integer vectors of the same length as \code{exonStarts}
 29 |     and \code{exonEnds}. Each element in \code{tlocs} must contain
 30 |     transcript-based locations.
 31 |   }
 32 |   \item{exonStarts, exonEnds}{
 33 |     The starts and ends of the exons, respectively.
 34 | 
 35 |     Each argument can be a list of integer vectors,
 36 |     an \link[IRanges]{IntegerList} object,
 37 |     or a character vector where each element is a
 38 |     comma-separated list of integers.
 39 |     In addition, the lists represented by \code{exonStarts}
 40 |     and \code{exonEnds} must have the same shape i.e.
 41 |     have the same lengths and have elements of the same lengths.
 42 |     The length of \code{exonStarts} and \code{exonEnds}
 43 |     is the number of transcripts.
 44 |   }
 45 |   \item{strand}{
 46 |     A character vector of the same length as \code{exonStarts} and
 47 |     \code{exonEnds} specifying the strand (\code{"+"} or \code{"-"})
 48 |     from which the transcript is coming.
 49 |   }
 50 |   \item{decreasing.rank.on.minus.strand}{
 51 |     \code{TRUE} or \code{FALSE}.
 52 |     Describes the order of exons in transcripts located on the minus strand:
 53 |     are they ordered by increasing (default) or decreasing rank?
 54 |   }
 55 |   \item{error.if.out.of.bounds}{
 56 |     \code{TRUE} or \code{FALSE}.
 57 |     Controls how out of bound \code{tlocs} are handled: an error is thrown
 58 |     (default) or \code{NA} is returned.
 59 |   }
 60 | }
 61 | 
 62 | \value{
 63 |   For \code{transcriptLocs2refLocs}: A list of integer vectors of the same
 64 |   shape as \code{tlocs}.
 65 | 
 66 |   For \code{transcriptWidths}: An integer vector with one element per
 67 |   transcript.
 68 | }
 69 | 
 70 | \author{Hervé Pagès}
 71 | 
 72 | \seealso{
 73 |   \itemize{
 74 |     \item \code{\link{extractTranscriptSeqs}} for extracting transcript
 75 |           (or CDS) sequences from chromosomes.
 76 | 
 77 |     \item \code{\link{coverageByTranscript}} for computing coverage by
 78 |           transcript (or CDS) of a set of ranges.
 79 |   }
 80 | }
 81 | 
 82 | \examples{
 83 | ## ---------------------------------------------------------------------
 84 | ## WITH A SMALL SET OF HUMAN TRANSCRIPTS
 85 | ## ---------------------------------------------------------------------
 86 | txdb_file <- system.file("extdata", "hg19_knownGene_sample.sqlite",
 87 |                          package="GenomicFeatures")
 88 | txdb <- loadDb(txdb_file)
 89 | ex_by_tx <- exonsBy(txdb, by="tx", use.names=TRUE)
 90 | genome <- BSgenome::getBSgenome("hg19")  # load the hg19 genome
 91 | tx_seqs <- extractTranscriptSeqs(genome, ex_by_tx)
 92 | 
 93 | ## Get the reference-based locations of the first 4 (5' end)
 94 | ## and last 4 (3' end) nucleotides in each transcript:
 95 | tlocs <- lapply(width(tx_seqs), function(w) c(1:4, (w-3):w))
 96 | tx_strand <- sapply(strand(ex_by_tx), runValue)
 97 | 
 98 | ## Note that, because of how we made them, 'tlocs', 'start(ex_by_tx)',
 99 | ## 'end(ex_by_tx)' and 'tx_strand' are "parallel" objects i.e. they
100 | ## have the same length, and, for any valid positional index, elements
101 | ## at this position are corresponding to each other. This is how
102 | ## transcriptLocs2refLocs() expects them to be!
103 | rlocs <- transcriptLocs2refLocs(tlocs,
104 |              start(ex_by_tx), end(ex_by_tx),
105 |              tx_strand, decreasing.rank.on.minus.strand=TRUE)
106 | 
107 | ## ---------------------------------------------------------------------
108 | ## WITH TWO WORM TRANSCRIPTS: ZC101.3.1 AND F37B1.1.1
109 | ## ---------------------------------------------------------------------
110 | library(TxDb.Celegans.UCSC.ce11.ensGene)
111 | txdb <- TxDb.Celegans.UCSC.ce11.ensGene
112 | my_tx_names <- c("ZC101.3.1", "F37B1.1.1")
113 | ## Both transcripts are on chromosome II, the first one on its positive
114 | ## strand and the second one on its negative strand:
115 | my_tx <- transcripts(txdb, filter=list(tx_name=my_tx_names))
116 | my_tx
117 | 
118 | ## Using transcripts stored in a GRangesList object:
119 | ex_by_tx <- exonsBy(txdb, use.names=TRUE)[my_tx_names]
120 | genome <- getBSgenome("ce11")  # load the ce11 genome
121 | tx_seqs <- extractTranscriptSeqs(genome, ex_by_tx)
122 | tx_seqs
123 | 
124 | ## Since the 2 transcripts are on the same chromosome, an alternative
125 | ## is to store them in an IRangesList object and use that object with
126 | ## extractTranscriptSeqs():
127 | ex_by_tx2 <- ranges(ex_by_tx)
128 | tx_seqs2 <- extractTranscriptSeqs(genome$chrII, ex_by_tx2,
129 |                                   strand=strand(my_tx))
130 | stopifnot(identical(as.character(tx_seqs), as.character(tx_seqs2)))
131 | 
132 | ## Store exon starts and ends in two IntegerList objects for use with
133 | ## transcriptWidths() and transcriptLocs2refLocs():
134 | exon_starts <- start(ex_by_tx)
135 | exon_ends <- end(ex_by_tx)
136 | 
137 | ## Same as 'width(tx_seqs)':
138 | transcriptWidths(exonStarts=exon_starts, exonEnds=exon_ends)
139 | 
140 | transcriptLocs2refLocs(list(c(1:2, 202:205, 1687:1688),
141 |                             c(1:2, 193:196, 721:722)),
142 |                        exonStarts=exon_starts,
143 |                        exonEnds=exon_ends,
144 |                        strand=c("+","-"))
145 | 
146 | ## A sanity check:
147 | ref_locs <- transcriptLocs2refLocs(list(1:1688, 1:722),
148 |                                    exonStarts=exon_starts,
149 |                                    exonEnds=exon_ends,
150 |                                    strand=c("+","-"))
151 | stopifnot(genome$chrII[ref_locs[[1]]] == tx_seqs[[1]])
152 | stopifnot(complement(genome$chrII)[ref_locs[[2]]] == tx_seqs[[2]])
153 | }
154 | 
155 | \keyword{manip}
156 | 


--------------------------------------------------------------------------------
/man/transcriptsBy.Rd:
--------------------------------------------------------------------------------
  1 | \name{transcriptsBy}
  2 | 
  3 | \alias{transcriptsBy}
  4 | \alias{transcriptsBy,TxDb-method}
  5 | \alias{exonsBy}
  6 | \alias{exonsBy,TxDb-method}
  7 | \alias{cdsBy}
  8 | \alias{cdsBy,TxDb-method}
  9 | \alias{intronsByTranscript}
 10 | \alias{intronsByTranscript,TxDb-method}
 11 | \alias{fiveUTRsByTranscript}
 12 | \alias{fiveUTRsByTranscript,TxDb-method}
 13 | \alias{threeUTRsByTranscript}
 14 | \alias{threeUTRsByTranscript,TxDb-method}
 15 | 
 16 | \title{
 17 |   Extract and group genomic features of a given type from a TxDb-like object
 18 | }
 19 | \description{
 20 |   Generic functions to extract genomic features of a given type
 21 |   grouped based on another type of genomic feature.
 22 |   This page documents the methods for \link{TxDb} objects only.
 23 | }
 24 | \usage{
 25 | transcriptsBy(x, by=c("gene", "exon", "cds"), ...)
 26 | \S4method{transcriptsBy}{TxDb}(x, by=c("gene", "exon", "cds"), use.names=FALSE)
 27 | 
 28 | exonsBy(x, by=c("tx", "gene"), ...)
 29 | \S4method{exonsBy}{TxDb}(x, by=c("tx", "gene"), use.names=FALSE)
 30 | 
 31 | cdsBy(x, by=c("tx", "gene"), ...)
 32 | \S4method{cdsBy}{TxDb}(x, by=c("tx", "gene"), use.names=FALSE)
 33 | 
 34 | intronsByTranscript(x, ...)
 35 | \S4method{intronsByTranscript}{TxDb}(x, use.names=FALSE)
 36 | 
 37 | fiveUTRsByTranscript(x, ...)
 38 | \S4method{fiveUTRsByTranscript}{TxDb}(x, use.names=FALSE)
 39 | 
 40 | threeUTRsByTranscript(x, ...)
 41 | \S4method{threeUTRsByTranscript}{TxDb}(x, use.names=FALSE)
 42 | }
 43 | \arguments{
 44 |   \item{x}{A \link{TxDb} object.}
 45 |   \item{...}{Arguments to be passed to or from methods.}
 46 |   \item{by}{One of \code{"gene"}, \code{"exon"}, \code{"cds"} or \code{"tx"}.
 47 |     Determines the grouping.}
 48 |   \item{use.names}{Controls how to set the names of the returned
 49 |     \link[GenomicRanges]{GRangesList} object.
 50 |     These functions return all the features of a given type (e.g.
 51 |     all the exons) grouped by another feature type (e.g. grouped by
 52 |     transcript) in a \link[GenomicRanges]{GRangesList} object.
 53 |     By default (i.e. if \code{use.names} is \code{FALSE}), the
 54 |     names of this \link[GenomicRanges]{GRangesList} object
 55 |     (aka the group names) are the internal ids of the features
 56 |     used for grouping (aka the grouping features), which are
 57 |     guaranteed to be unique.
 58 |     If \code{use.names} is \code{TRUE}, then the names of the
 59 |     grouping features are used instead of their internal ids.
 60 |     For example, when grouping by transcript (\code{by="tx"}),
 61 |     the default group names are the transcript internal ids
 62 |     (\code{"tx_id"}). But, if \code{use.names=TRUE}, the group
 63 |     names are the transcript names (\code{"tx_name"}).
 64 |     Note that, unlike the feature ids, the feature names are not
 65 |     guaranteed to be unique or even defined (they could be all
 66 |     \code{NA}s). A warning is issued when this happens.
 67 |     See \code{?\link{id2name}} for more information about
 68 |     feature internal ids and feature external names and how
 69 |     to map the formers to the latters.
 70 | 
 71 |     Finally, \code{use.names=TRUE} cannot be used when grouping
 72 |     by gene \code{by="gene"}. This is because, unlike for the
 73 |     other features, the gene ids are external ids (e.g. Entrez
 74 |     Gene or Ensembl ids) so the db doesn't have a \code{"gene_name"}
 75 |     column for storing alternate gene names.
 76 |   }
 77 | }
 78 | \details{
 79 |   These functions return a \link[GenomicRanges]{GRangesList} object
 80 |   where the ranges within each of the elements are ordered according
 81 |   to the following rule:
 82 | 
 83 |   When using \code{exonsBy} or \code{cdsBy} with \code{by="tx"},
 84 |   the returned exons or CDS parts are ordered by ascending rank for
 85 |   each transcript, that is, by their position in the transcript.
 86 |   In all other cases, the ranges will be ordered by chromosome, strand,
 87 |   start, and end values.
 88 | }
 89 | \value{A \link[GenomicRanges]{GRangesList} object.}
 90 | \author{
 91 |   M. Carlson, P. Aboyoun and H. Pagès
 92 | }
 93 | \seealso{
 94 |   \itemize{
 95 |     \item \code{\link{transcripts}} and \code{\link{transcriptsByOverlaps}}
 96 |           for more ways to extract genomic features
 97 |           from a \link{TxDb}-like object.
 98 | 
 99 |     \item \code{\link{transcriptLengths}} for extracting the transcript
100 |           lengths (and other metrics) from a \link{TxDb} object.
101 | 
102 |     \item \code{\link{exonicParts}} and \code{\link{intronicParts}} for
103 |           extracting non-overlapping exonic or intronic parts from a
104 |           TxDb-like object.
105 | 
106 |     \item \code{\link{extendExonsIntoIntrons}} for extending exons
107 |           into their adjacent introns.
108 | 
109 |     \item \code{\link{extractTranscriptSeqs}} for extracting transcript
110 |           (or CDS) sequences from chromosome sequences.
111 | 
112 |     \item \code{\link{coverageByTranscript}} for computing coverage by
113 |           transcript (or CDS) of a set of ranges.
114 | 
115 |     \item \link[GenomicFeatures]{select-methods} for how to use the
116 |           simple "select" interface to extract information from a
117 |           \link{TxDb} object.
118 | 
119 |     \item \code{\link{id2name}} for mapping \link{TxDb} internal ids
120 |           to external names for a given feature type.
121 | 
122 |     \item The \link{TxDb} class.
123 |   }
124 | }
125 | \examples{
126 | txdb_file <- system.file("extdata", "hg19_knownGene_sample.sqlite",
127 |                          package="GenomicFeatures")
128 | txdb <- loadDb(txdb_file)
129 | 
130 | ## Extract the transcripts grouped by gene:
131 | transcriptsBy(txdb, "gene")
132 | 
133 | ## Extract the exons grouped by gene:
134 | exonsBy(txdb, "gene")
135 | 
136 | ## Extract the CDS parts grouped by transcript:
137 | cds_by_tx0 <- cdsBy(txdb, "tx")
138 | ## With more informative group names:
139 | cds_by_tx1 <- cdsBy(txdb, "tx", use.names=TRUE)
140 | ## Note that 'cds_by_tx1' can also be obtained with:
141 | names(cds_by_tx0) <- id2name(txdb, feature.type="tx")[names(cds_by_tx0)]
142 | stopifnot(identical(cds_by_tx0, cds_by_tx1))
143 | 
144 | ## Extract the introns grouped by transcript:
145 | intronsByTranscript(txdb)
146 | 
147 | ## Extract the 5' UTRs grouped by transcript:
148 | fiveUTRsByTranscript(txdb)
149 | fiveUTRsByTranscript(txdb, use.names=TRUE)  # more informative group names
150 | }
151 | 
152 | \keyword{methods}
153 | 


--------------------------------------------------------------------------------
/man/transcriptsByOverlaps.Rd:
--------------------------------------------------------------------------------
  1 | \name{transcriptsByOverlaps}
  2 | 
  3 | \alias{transcriptsByOverlaps}
  4 | \alias{transcriptsByOverlaps,TxDb-method}
  5 | \alias{exonsByOverlaps}
  6 | \alias{exonsByOverlaps,TxDb-method}
  7 | \alias{cdsByOverlaps}
  8 | \alias{cdsByOverlaps,TxDb-method}
  9 | 
 10 | \title{
 11 |   Extract genomic features from a TxDb-like object based on their
 12 |   genomic location
 13 | }
 14 | \description{
 15 |   Generic functions to extract genomic features for specified genomic
 16 |   locations.
 17 |   This page documents the methods for \link{TxDb} objects only.
 18 | }
 19 | \usage{
 20 | transcriptsByOverlaps(x, ranges,
 21 |                       maxgap = -1L, minoverlap = 0L,
 22 |                       type = c("any", "start", "end"), ...)
 23 | \S4method{transcriptsByOverlaps}{TxDb}(x, ranges,
 24 |                       maxgap = -1L, minoverlap = 0L,
 25 |                       type = c("any", "start", "end"),
 26 |                       columns = c("tx_id", "tx_name"))
 27 | 
 28 | exonsByOverlaps(x, ranges,
 29 |                 maxgap = -1L, minoverlap = 0L,
 30 |                 type = c("any", "start", "end"), ...)
 31 | \S4method{exonsByOverlaps}{TxDb}(x, ranges,
 32 |                 maxgap = -1L, minoverlap = 0L,
 33 |                 type = c("any", "start", "end"),
 34 |                 columns = "exon_id")
 35 | 
 36 | cdsByOverlaps(x, ranges,
 37 |               maxgap = -1L, minoverlap = 0L,
 38 |               type = c("any", "start", "end"), ...)
 39 | \S4method{cdsByOverlaps}{TxDb}(x, ranges,
 40 |               maxgap = -1L, minoverlap = 0L,
 41 |               type = c("any", "start", "end"),
 42 |               columns = "cds_id")
 43 | }
 44 | \arguments{  
 45 |   \item{x}{A \link{TxDb} object.}
 46 |   \item{ranges}{A \link[GenomicRanges]{GRanges} object to restrict the output.}
 47 |   \item{maxgap,minoverlap,type}{
 48 |     Used in the internal call to \code{findOverlaps()} to detect overlaps.
 49 |     See \code{?\link[IRanges]{findOverlaps}} in the \pkg{IRanges} package
 50 |     for a description of these arguments.
 51 |   }
 52 |   \item{...}{Arguments to be passed to or from methods.}
 53 |   \item{columns}{Columns to include in the output.
 54 |     See \code{?\link{transcripts}} for the possible values.}
 55 | }
 56 | \details{
 57 |   These functions subset the results of \code{\link{transcripts}},
 58 |   \code{\link{exons}}, and \code{\link{cds}} function calls with
 59 |   using the results of \code{\link[IRanges]{findOverlaps}}
 60 |   calls based on the specified \code{ranges}.
 61 | }
 62 | \value{ a GRanges object }
 63 | \author{
 64 |   P. Aboyoun
 65 | }
 66 | \seealso{
 67 |   \itemize{
 68 |     \item \code{\link{transcripts}} and \code{\link{transcriptsBy}}
 69 |           for more ways to extract genomic features
 70 |           from a \link{TxDb}-like object.
 71 | 
 72 |     \item \code{\link{transcriptLengths}} for extracting the transcript
 73 |           lengths (and other metrics) from a \link{TxDb} object.
 74 | 
 75 |     \item \code{\link{exonicParts}} and \code{\link{intronicParts}} for
 76 |           extracting non-overlapping exonic or intronic parts from a
 77 |           TxDb-like object.
 78 | 
 79 |     \item \code{\link{extractTranscriptSeqs}} for extracting transcript
 80 |           (or CDS) sequences from chromosome sequences.
 81 | 
 82 |     \item \code{\link{coverageByTranscript}} for computing coverage by
 83 |           transcript (or CDS) of a set of ranges.
 84 | 
 85 |     \item \link[GenomicFeatures]{select-methods} for how to use the
 86 |           simple "select" interface to extract information from a
 87 |           \link{TxDb} object.
 88 | 
 89 |     \item \code{\link{id2name}} for mapping \link{TxDb} internal ids
 90 |           to external names for a given feature type.
 91 | 
 92 |     \item The \link{TxDb} class.
 93 |   }
 94 | }
 95 | \examples{
 96 | txdb <- loadDb(system.file("extdata", "hg19_knownGene_sample.sqlite",
 97 |                            package="GenomicFeatures"))
 98 | gr <- GRanges(Rle("chr1", 2),
 99 |               IRanges(c(500,10500), c(10000,30000)),
100 |               strand = Rle("-", 2))
101 | transcriptsByOverlaps(txdb, gr)
102 | }
103 | 
104 | \keyword{methods}
105 | 


--------------------------------------------------------------------------------
/tests/run_unitTests.R:
--------------------------------------------------------------------------------
1 | require("GenomicFeatures") || stop("unable to load GenomicFeatures package")
2 | GenomicFeatures:::.test()
3 | 


--------------------------------------------------------------------------------