├── .github └── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── NEWS ├── R ├── AllClasses.R ├── AllGenerics.R ├── asBam.R ├── countBam.R ├── filterBam.R ├── idxstatsBam.R ├── indexBam.R ├── io_bam.R ├── mergeBam.R ├── methods-ApplyPileupsParam.R ├── methods-BamFile.R ├── methods-BamFileList.R ├── methods-BamSampler.R ├── methods-BamViews.R ├── methods-BcfFile.R ├── methods-FaFile.R ├── methods-PileupFiles.R ├── methods-RsamtoolsFile.R ├── methods-RsamtoolsFileList.R ├── methods-ScanBVcfParam.R ├── methods-ScanBamParam.R ├── methods-TabixFile.R ├── phred2ASCIIOffset.R ├── pileup.R ├── pkgconfig.R ├── quickBamFlagSummary.R ├── readPileup.R ├── scanBam.R ├── scanBamHeader.R ├── scanBcf.R ├── scanBcfHeader.R ├── sortBam.R ├── testPairedEndBam.R ├── utilities.R ├── zip_compression.R └── zzz.R ├── README.md ├── inst ├── extdata │ ├── ce2dict1.fa │ ├── ce2dict1.fa.fai │ ├── ex1.bam │ ├── ex1.bam.bai │ ├── ex1.bcf.gz │ ├── ex1.bcf.gz.csi │ ├── ex1.sam │ ├── ex1.vcf.gz │ ├── ex1.vcf.gz.csi │ ├── example.gtf.gz │ ├── example.gtf.gz.tbi │ ├── example_from_SAM_Spec.bam │ ├── example_from_SAM_Spec.bam.bai │ ├── example_from_SAM_Spec.sam │ ├── no_which_buffered_pileup.bam │ ├── no_which_buffered_pileup.bam.bai │ ├── no_which_buffered_pileup.sam │ ├── no_which_whole_file.bam │ ├── no_which_whole_file.bam.bai │ ├── no_which_whole_file.sam │ ├── olaps.Rda │ ├── pileup.txt │ ├── querybins.bam │ ├── querybins.bam.bai │ ├── querybins.sam │ ├── revbins.bam │ ├── revbins.bam.bai │ ├── revbins.sam │ ├── samtools-github.txt │ ├── slxMaq09_urls.txt │ ├── tagfilter.bam │ ├── tagfilter.bam.bai │ ├── tagfilter.sam │ ├── tiny.bam │ ├── tiny.bam.bai │ ├── tiny.sam │ └── tophat │ │ ├── README │ │ ├── accepted_hits.sam │ │ ├── coverage.wig │ │ └── junctions.bed ├── scripts │ ├── BamViews-1000g.R │ ├── features.R │ ├── remote_test.R │ └── update-samtools.sh └── unitTests │ ├── cases │ ├── RNEXT.bam │ ├── ex1.sam.gz │ ├── ex1_noindex.bam │ ├── ex1_shuf1000.bam │ ├── ex1_shuf1000.bam.bai │ ├── ex1_unsort.bam │ ├── ex1_zero_index.bam.bai │ ├── no_SAMPLE_header.vcf.gz │ ├── no_header_line.vcf.gz │ ├── pileup-no-stars.txt │ ├── plp_refskip.bam │ └── plp_refskip.bam.bai │ ├── test_BamFile.R │ ├── test_BamViews.R │ ├── test_BcfFile.R │ ├── test_FaFile.R │ ├── test_RsamtoolsFile.R │ ├── test_TabixFile.R │ ├── test_applyPileups.R │ ├── test_asBam.R │ ├── test_bam.R │ ├── test_bam_count.R │ ├── test_bam_header.R │ ├── test_compression.R │ ├── test_mapqfilter.R │ ├── test_phred2ASCIIOffset.R │ ├── test_pileup_nowhich.R │ ├── test_pileup_querybins.R │ ├── test_pileup_revbins.R │ ├── test_pileup_single_range.R │ ├── test_readPileup.R │ ├── test_scanBamFlag.R │ ├── test_sortBam_test.R │ ├── test_tagfilter.R │ ├── test_testPairedEndBam.R │ └── test_utilities.R ├── man ├── ApplyPileupsParam-class.Rd ├── BamFile-class.Rd ├── BamViews-class.Rd ├── BcfFile-class.Rd ├── FaFile-class.Rd ├── PileupFiles-class.Rd ├── Rsamtools-package.Rd ├── RsamtoolsFile-class.Rd ├── RsamtoolsFileList-class.Rd ├── ScanBamParam-class.Rd ├── ScanBcfParam-class.Rd ├── TabixFile-class.Rd ├── applyPileups.Rd ├── defunct.Rd ├── deprecated.Rd ├── headerTabix.Rd ├── indexTabix.Rd ├── pileup.Rd ├── quickBamFlagSummary.Rd ├── readPileup.Rd ├── scanBam.Rd ├── scanBcf.Rd ├── scanFa.Rd ├── scanTabix.Rd ├── seqnamesTabix.Rd ├── testPairedEndBam.Rd └── zip.Rd ├── migration_notes.md ├── src ├── BamFileIterator.h ├── BamIterator.h ├── BamRangeIterator.h ├── Biostrings_stubs.c ├── COMPAT_bcf_hdr_read.c ├── COMPAT_bcf_hdr_read.h ├── GenomicPosition.h ├── IRanges_stubs.c ├── Makevars ├── PileupBuffer.cpp ├── PileupBuffer.h ├── PileupBufferShim.h ├── PosCache.h ├── PosCacheColl.cpp ├── PosCacheColl.h ├── R_init_Rsamtools.c ├── ResultManager.cpp ├── ResultManager.h ├── S4Vectors_stubs.c ├── Template.h ├── XVector_stubs.c ├── as_bam.c ├── as_bam.h ├── bam.c ├── bam_data.c ├── bam_data.h ├── bam_mate_iter.cpp ├── bam_mate_iter.h ├── bam_plbuf.c ├── bam_sort.c ├── bambuffer.c ├── bambuffer.h ├── bamfile.c ├── bamfile.h ├── bcffile.c ├── bcffile.h ├── bedidx.c ├── encode.c ├── encode.h ├── fafile.c ├── fafile.h ├── idxstats.c ├── idxstats.h ├── io_sam.c ├── io_sam.h ├── pbuffer_wrapper.cpp ├── pbuffer_wrapper.h ├── pileup.cpp ├── pileup.h ├── pileupbam.c ├── pileupbam.h ├── sam_opts.c ├── sam_utils.c ├── samtools_patch.c ├── samtools_patch.h ├── scan_bam_data.c ├── scan_bam_data.h ├── tabixfile.c ├── tabixfile.h ├── tagfilter.c ├── tagfilter.h ├── utilities.c ├── utilities.h ├── zip_compression.c └── zip_compression.h ├── tests └── Rsamtools_unit_tests.R └── vignettes └── Rsamtools-Overview.Rmd /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help improve this package 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | Please ask questions about how to use this package on the Bioconductor 11 | support site, https://support.bioconductor.org 12 | 13 | **Describe the bug** 14 | Please provide a clear and concise description of what the bug is. 15 | 16 | **To Reproduce** 17 | Please provide minimal R code to reproduce the example. 18 | 19 | **Expected behavior** 20 | A clear and concise description of what you expected to happen. 21 | 22 | **sessionInfo** 23 | Please report the output of `sessionInfo()` here. 24 | 25 | ```{r} 26 | ## output of `sessionInfo()` here 27 | ``` 28 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this package 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | Please ask questions about how to use this package on the Bioconductor 11 | support site, https://support.bioconductor.org 12 | 13 | **Is your feature request related to a problem? Please describe.** 14 | Please provide a clear and concise description of what the problem 15 | is. Ex. I'm always frustrated when [...] 16 | 17 | **Describe the solution you'd like** 18 | A clear and concise description of what you want to happen. 19 | 20 | **Describe alternatives you've considered** 21 | A clear and concise description of any alternative solutions or 22 | features you've considered. 23 | 24 | **Additional context** 25 | Add any other context about the feature request here. 26 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: Rsamtools 2 | Type: Package 3 | Title: Binary alignment (BAM), FASTA, variant call (BCF), and tabix 4 | file import 5 | Description: This package provides an interface to the 'samtools', 6 | 'bcftools', and 'tabix' utilities for manipulating SAM (Sequence 7 | Alignment / Map), FASTA, binary variant call (BCF) and compressed 8 | indexed tab-delimited (tabix) files. 9 | biocViews: DataImport, Sequencing, Coverage, Alignment, QualityControl 10 | URL: https://bioconductor.org/packages/Rsamtools 11 | Video: https://www.youtube.com/watch?v=Rfon-DQYbWA&list=UUqaMSQd_h-2EDGsU6WDiX0Q 12 | BugReports: https://github.com/Bioconductor/Rsamtools/issues 13 | Version: 2.25.0 14 | License: Artistic-2.0 | file LICENSE 15 | Encoding: UTF-8 16 | Authors@R: c( 17 | person("Martin", "Morgan", role = "aut"), 18 | person("Hervé", "Pagès", role = "aut"), 19 | person("Valerie", "Obenchain", role = "aut"), 20 | person("Nathaniel", "Hayden", role = "aut"), 21 | person("Busayo", "Samuel", role = "ctb", 22 | comment = "Converted Rsamtools vignette from Sweave to RMarkdown / HTML."), 23 | person("Bioconductor Package Maintainer", 24 | email = "maintainer@bioconductor.org", role = "cre")) 25 | Depends: methods, GenomeInfoDb (>= 1.1.3), GenomicRanges (>= 1.31.8), 26 | Biostrings (>= 2.47.6), R (>= 3.5.0) 27 | Imports: utils, BiocGenerics (>= 0.25.1), S4Vectors (>= 0.17.25), 28 | IRanges (>= 2.13.12), XVector (>= 0.19.7), bitops, BiocParallel, stats 29 | Suggests: GenomicAlignments, ShortRead (>= 1.19.10), GenomicFeatures, 30 | TxDb.Dmelanogaster.UCSC.dm3.ensGene, 31 | TxDb.Hsapiens.UCSC.hg18.knownGene, RNAseqData.HNRNPC.bam.chr14, 32 | BSgenome.Hsapiens.UCSC.hg19, RUnit, BiocStyle, knitr 33 | LinkingTo: Rhtslib (>= 3.3.1), S4Vectors, IRanges, XVector, Biostrings 34 | LazyLoad: yes 35 | SystemRequirements: GNU make 36 | VignetteBuilder: knitr 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | File inst/extdata/ex1.sam is licensed as follows: 2 | 3 | The MIT License 4 | 5 | Copyright (c) 2008-2009 Genome Research Ltd. 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | useDynLib(Rsamtools, .registration=TRUE) 2 | 3 | import(methods) 4 | 5 | importFrom(bitops, bitAnd) 6 | 7 | importFrom(utils, read.table) # normalizePath --> base in R-2.13 8 | 9 | importFrom(stats, rbinom, setNames) 10 | 11 | importClassesFrom(BiocGenerics, url, gzfile, unz, pipe) 12 | 13 | importFrom(BiocGenerics, cbind, duplicated, eval, Filter, lapply, Map, 14 | order, path, paste, rbind, Reduce, rep.int, sapply, setdiff, 15 | tapply, union, unique) 16 | 17 | importFrom(BiocParallel, bplapply) 18 | 19 | import(S4Vectors) 20 | 21 | import(IRanges) 22 | 23 | importClassesFrom(GenomeInfoDb, Seqinfo) 24 | 25 | importFrom(GenomeInfoDb, Seqinfo, seqinfo, seqlevels, sortSeqlevels) 26 | 27 | importClassesFrom(GenomicRanges, GRanges, GRangesList) 28 | 29 | importFrom(GenomicRanges, GRanges, GRangesList) 30 | 31 | importMethodsFrom(GenomicRanges, seqnames, strand) 32 | 33 | import(XVector) 34 | 35 | importClassesFrom(Biostrings, DNAStringSet, BStringSet, PhredQuality) 36 | 37 | importFrom(Biostrings, DNA_ALPHABET, readDNAStringSet, 38 | readRNAStringSet, readAAStringSet, 39 | DNAStringSet, BStringSet, PhredQuality, 40 | getSeq, reverseComplement) 41 | 42 | exportPattern("^[^\\.]") 43 | 44 | exportMethods(length, names, "[", "[[", show, getSeq, seqinfo) 45 | 46 | S3method(close, BamFile) 47 | S3method(close, BcfFile) 48 | S3method(close, FaFile) 49 | S3method(close, TabixFile) 50 | S3method(close, RsamtoolsFileList) 51 | 52 | S3method(open, BamFile) 53 | S3method(open, BcfFile) 54 | S3method(open, FaFile) 55 | S3method(open, TabixFile) 56 | S3method(open, RsamtoolsFileList) 57 | -------------------------------------------------------------------------------- /R/AllClasses.R: -------------------------------------------------------------------------------- 1 | setOldClass(c("bzfile", "connection")) 2 | setOldClass(c("fifo", "connection")) 3 | 4 | setGeneric(".validity", function(object) standardGeneric(".validity")) 5 | 6 | setClass("ScanBamParam", 7 | representation=representation( 8 | flag="integer", 9 | simpleCigar="logical", 10 | reverseComplement="logical", 11 | tag="character", 12 | tagFilter="list", 13 | what="character", 14 | which="IntegerRangesList", 15 | mapqFilter="integer")) 16 | 17 | setClass("BamViews", 18 | representation=representation( 19 | bamPaths="character", 20 | bamIndicies="character", 21 | bamSamples="DataFrame", 22 | bamRanges="GRanges", 23 | bamExperiment="list"), 24 | validity=.validity) 25 | 26 | setClass("ScanBVcfParam", 27 | representation=representation( 28 | "VIRTUAL", 29 | which="IntegerRangesList", 30 | fixed="character", 31 | info="character", 32 | geno="character", 33 | samples="character", 34 | trimEmpty="logical"), 35 | prototype=prototype( 36 | trimEmpty=TRUE)) 37 | 38 | setClass("ScanBcfParam", contains="ScanBVcfParam") 39 | 40 | setClass("ApplyPileupsParam", 41 | representation=representation( 42 | flag="integer", 43 | minBaseQuality="integer", 44 | minMapQuality="integer", 45 | minDepth="integer", 46 | maxDepth="integer", 47 | yieldSize="integer", 48 | yieldBy="character", 49 | yieldAll="logical", 50 | which="GRanges", 51 | what="character"), 52 | validity=.validity) 53 | 54 | ## RsamtoolsFile(s) 55 | .RsamtoolsFile_generator <- setRefClass("RsamtoolsFile", 56 | fields=list(.extptr="externalptr", path="character", 57 | index="character", yieldSize="integer")) 58 | 59 | .BamFile <- setRefClass("BamFile", contains="RsamtoolsFile", 60 | fields=list(obeyQname="logical", asMates="logical", 61 | qnamePrefixEnd="character", qnameSuffixStart="character")) 62 | 63 | .BcfFile <- setRefClass("BcfFile", contains="RsamtoolsFile", 64 | fields=list(mode="character")) 65 | 66 | .TabixFile <- setRefClass("TabixFile", contains="RsamtoolsFile") 67 | 68 | .FaFile <- setRefClass("FaFile", contains="RsamtoolsFile", 69 | fields=list(gzindex="character")) 70 | 71 | setClass("RsamtoolsFileList", contains=c("SimpleList", "VIRTUAL")) 72 | 73 | setClass("BamFileList", contains="RsamtoolsFileList", 74 | prototype=prototype(elementType="BamFile")) 75 | 76 | setClass("BcfFileList", contains="RsamtoolsFileList", 77 | prototype=prototype(elementType="BcfFile")) 78 | 79 | setClass("TabixFileList", contains="RsamtoolsFileList", 80 | prototype=prototype(elementType="TabixFile")) 81 | 82 | setClass("FaFileList", contains="RsamtoolsFileList", 83 | prototype=prototype(elementType="FaFile")) 84 | 85 | setClass("PileupFiles", contains="BamFileList", 86 | representation=representation(param="ApplyPileupsParam")) 87 | -------------------------------------------------------------------------------- /R/asBam.R: -------------------------------------------------------------------------------- 1 | setMethod(asSam, "character", 2 | function(file, destination=sub("\\.bam", "", file), ..., overwrite=FALSE) 3 | { 4 | file <- .normalizePath(file) 5 | destination <- .normalizePath(destination) 6 | d0 <- paste(destination, "sam", sep=".") 7 | 8 | if (!overwrite && file.exists(d0)) { 9 | msg <- sprintf("'%s' exists, '%s' is FALSE\n %s: %s", 10 | "destination", "overwrite", "destination", 11 | d0) 12 | stop(msg) 13 | } 14 | 15 | tryCatch({ 16 | result <- .Call(.as_bam, file, d0, FALSE) 17 | if (!file.exists(d0)) 18 | stop("failed to create 'SAM' file") 19 | }, error=function(err) { 20 | msg <- sprintf("'asSam' %s\n SAM file: '%s'\n", 21 | conditionMessage(err), file) 22 | stop(msg) 23 | }) 24 | d0 25 | }) 26 | 27 | setMethod(asBam, "character", 28 | function(file, destination=sub("\\.sam(\\.gz)?", "", file), ..., 29 | overwrite=FALSE, indexDestination=TRUE) 30 | { 31 | file <- .normalizePath(file) 32 | destination <- .normalizePath(destination) 33 | d0 <- paste(destination, "bam", sep=".") 34 | 35 | ofl <- tempfile() 36 | on.exit(unlink(ofl)) 37 | if (!overwrite && file.exists(d0)) { 38 | msg <- sprintf("'%s' exists, '%s' is FALSE\n %s: %s", 39 | "destination", "overwrite", "destination", 40 | d0) 41 | stop(msg) 42 | } 43 | tryCatch({ 44 | result <- .Call(.as_bam, file, ofl, TRUE) 45 | if (!file.exists(ofl)) 46 | stop("failed to create 'BAM' file") 47 | if (indexDestination) { 48 | destination <- sortBam(ofl, destination) 49 | indexBam(destination) 50 | } else { 51 | destination <- d0 52 | .file.rename(ofl, destination) 53 | } 54 | }, error=function(err) { 55 | msg <- sprintf("'asBam' %s\n SAM file: '%s'\n", 56 | conditionMessage(err), file) 57 | stop(msg) 58 | }) 59 | destination 60 | }) 61 | -------------------------------------------------------------------------------- /R/countBam.R: -------------------------------------------------------------------------------- 1 | .countBam_postprocess <- function(x, file, param) 2 | { 3 | which <- bamWhich(param) 4 | bfile <- basename(path(file)) 5 | if (0L != length(space(which))) { 6 | data.frame(space=space(which), start=.uunlist(start(which)), 7 | end=.uunlist(end(which)), 8 | width=.uunlist(width(which)), file=bfile, 9 | records=x[["records"]], 10 | nucleotides=x[["nucleotides"]]) 11 | } else { 12 | data.frame(space=NA, start=NA, end=NA, width=NA, 13 | file=bfile, records=x[["records"]], 14 | nucleotides=x[["nucleotides"]]) 15 | } 16 | } 17 | 18 | setMethod(countBam, "character", 19 | function(file, index=file, ..., param=ScanBamParam()) 20 | { 21 | index <- 22 | if (missing(index) && 0L == length(bamWhich(param))) 23 | character(0) 24 | else .normalizePath(index) 25 | bam <- open(BamFile(file, index), "rb") 26 | on.exit(close(bam)) 27 | countBam(bam, ..., param=param) 28 | }) 29 | -------------------------------------------------------------------------------- /R/filterBam.R: -------------------------------------------------------------------------------- 1 | .normalizeRangesList <- 2 | function(rangesList) 3 | { 4 | nms <- names(rangesList) 5 | reducedList <- if (0 != length(rangesList) && is.null(nms)) { 6 | ## special case, all names missing 7 | rng <- Reduce(append, as(rangesList, "list")) 8 | IRangesList(reduce(rng, drop.empty.ranges=TRUE)) 9 | } else if (any(duplicated(nms))) { 10 | unms <- unique(nms) 11 | lst <- lapply(unms, function(nm, rnglist) { 12 | idx <- names(rnglist) == nm 13 | rng <- Reduce(append, as(rnglist[idx], "list")) 14 | reduce(rng, drop.empty.ranges=TRUE) 15 | }, rnglist=rangesList) 16 | names(lst) <- unms 17 | do.call(IRangesList, lst) 18 | } else { 19 | reduce(rangesList, drop.empty.ranges=TRUE) 20 | } 21 | reducedList[lengths(reducedList) != 0] 22 | } 23 | 24 | .filterBam_preprocess <- 25 | function(file, param) 26 | { 27 | which <- .normalizeRangesList(bamWhich(param)) 28 | hnames <- seqlevels(file) 29 | o <- order(match(names(which), hnames)) 30 | what <- bamWhat(param) 31 | if (asMates(file)) 32 | what <- union(what, c("mate_status", "groupid")) 33 | initialize(param, which=which[o], what=what) 34 | } 35 | 36 | setMethod(filterBam, "character", 37 | function(file, destination, index=file, ..., 38 | filter=FilterRules(), 39 | indexDestination=TRUE, 40 | param=ScanBamParam(what=scanBamWhat())) 41 | { 42 | if (missing(index) && 0L == length(bamWhich(param))) 43 | index <- character(0) 44 | bam <- open(BamFile(file, index), "rb") 45 | on.exit(close(bam)) 46 | filterBam(bam, destination, ..., filter=filter, 47 | indexDestination=indexDestination, param=param) 48 | }) 49 | -------------------------------------------------------------------------------- /R/idxstatsBam.R: -------------------------------------------------------------------------------- 1 | setMethod(idxstatsBam, "character", 2 | function(file, index=file, ...) 3 | { 4 | index <- .normalizePath(index) 5 | bam <- open(BamFile(file, index), "rb") 6 | on.exit(close(bam)) 7 | idxstatsBam(bam, ...) 8 | }) 9 | -------------------------------------------------------------------------------- /R/indexBam.R: -------------------------------------------------------------------------------- 1 | setMethod(indexBam, "character", 2 | function(files, ...) 3 | { 4 | files <- .normalizePath(files) 5 | sapply(files, function(file) .Call(.index_bam, file)) 6 | }) 7 | -------------------------------------------------------------------------------- /R/io_bam.R: -------------------------------------------------------------------------------- 1 | .io_bam <- 2 | function(func, file, ..., param) 3 | { 4 | flag <- bamFlag(param, asInteger=TRUE) 5 | simpleCigar <- bamSimpleCigar(param) 6 | tagFilter <- bamTagFilter(param) 7 | mapqFilter <- bamMapqFilter(param) 8 | which <- bamWhich(param) 9 | if (!all(names(which) %in% seqlevels(file))) { 10 | bad <- setdiff(names(which), seqlevels(file)) 11 | stop("seqlevels(param) not in BAM header:", 12 | "\n seqlevels: ", paste(sQuote(bad), collapse=", "), 13 | "\n file: ", path(file), 14 | "\n index: ", index(file)) 15 | } 16 | regions <- 17 | if (0L != length(space(which))) 18 | list(as.character(space(which)), .uunlist(start(which)), 19 | .uunlist(end(which))) 20 | else NULL 21 | on.exit(.Call(.scan_bam_cleanup)) 22 | 23 | .io_check_exists(path(file)) 24 | tryCatch({ 25 | .Call(func, .extptr(file), regions, flag, simpleCigar, tagFilter, 26 | mapqFilter, ...) 27 | }, error=function(err) { 28 | stop(conditionMessage(err), "\n file: ", path(file), 29 | "\n index: ", index(file)) 30 | }) 31 | } 32 | -------------------------------------------------------------------------------- /R/mergeBam.R: -------------------------------------------------------------------------------- 1 | setMethod(mergeBam, "character", 2 | function(files, destination, ..., region = GRanges(), 3 | overwrite = FALSE, header = character(), byQname = FALSE, 4 | addRG = FALSE, compressLevel1 = FALSE, 5 | indexDestination = FALSE) 6 | { 7 | tryCatch({ 8 | 9 | files <- sapply(files, .normalizePath) 10 | destination <- .normalizePath(destination) 11 | region <- local({ 12 | x <- as(region, "GRanges") 13 | if (1L < length(x)) 14 | stop("'region' must specify one range") 15 | sprintf("%s:%d-%d", as.character(seqnames(x)), start(x), end(x)) 16 | }) 17 | 18 | if (!overwrite && file.exists(destination)) { 19 | msg <- sprintf("'%s' exists, '%s' is FALSE\n %s: %s", 20 | "destination", "overwrite", "destination", 21 | destination) 22 | stop(msg) 23 | } 24 | 25 | header <- .normalizePath(header) 26 | 27 | destination <- 28 | .Call(.merge_bam, files, destination, overwrite, header, 29 | region, byQname, addRG, compressLevel1) 30 | if (indexDestination) 31 | indexBam(destination) 32 | 33 | destination 34 | 35 | }, error=function(err) { 36 | msg <- sprintf("'mergeBam' %s", conditionMessage(err)) 37 | stop(msg) 38 | }) 39 | }) 40 | -------------------------------------------------------------------------------- /R/methods-BamFileList.R: -------------------------------------------------------------------------------- 1 | BamFileList <- 2 | function(..., yieldSize=NA_integer_, obeyQname=FALSE, asMates=FALSE, 3 | qnamePrefixEnd=NA, qnameSuffixStart=NA) 4 | { 5 | fls <- .RsamtoolsFileList(..., yieldSize=yieldSize, class="BamFile") 6 | if (!missing(obeyQname)) 7 | obeyQname(fls) <- obeyQname 8 | if (!missing(asMates)) 9 | asMates(fls) <- asMates 10 | if (!missing(qnamePrefixEnd)) 11 | qnamePrefixEnd(fls) <- qnamePrefixEnd 12 | if (!missing(qnameSuffixStart)) 13 | qnameSuffixStart(fls) <- qnameSuffixStart 14 | fls 15 | } 16 | 17 | setMethod(obeyQname, "BamFileList", 18 | function(object, ...) 19 | { 20 | sapply(object, obeyQname) 21 | }) 22 | 23 | setReplaceMethod("obeyQname", "BamFileList", 24 | function(object, ..., value) 25 | { 26 | endoapply(object, `obeyQname<-`, value=value) 27 | }) 28 | 29 | setMethod(asMates, "BamFileList", 30 | function(object, ...) 31 | { 32 | sapply(object, asMates) 33 | }) 34 | 35 | setReplaceMethod("asMates", "BamFileList", 36 | function(object, ..., value) 37 | { 38 | endoapply(object, `asMates<-`, value=value) 39 | }) 40 | 41 | setMethod(qnamePrefixEnd, "BamFileList", 42 | function(object, ...) 43 | { 44 | sapply(object, qnamePrefixEnd) 45 | }) 46 | 47 | setReplaceMethod("qnamePrefixEnd", "BamFileList", 48 | function(object, ..., value) 49 | { 50 | endoapply(object, `qnamePrefixEnd<-`, value=value) 51 | }) 52 | 53 | setMethod(qnameSuffixStart, "BamFileList", 54 | function(object, ...) 55 | { 56 | sapply(object, qnameSuffixStart) 57 | }) 58 | 59 | setReplaceMethod("qnameSuffixStart", "BamFileList", 60 | function(object, ..., value) 61 | { 62 | endoapply(object, `qnameSuffixStart<-`, value=value) 63 | }) 64 | 65 | setMethod(seqinfo, "BamFileList", 66 | function(x) 67 | { 68 | Reduce(merge, lapply(x, seqinfo)) 69 | }) 70 | -------------------------------------------------------------------------------- /R/methods-BamSampler.R: -------------------------------------------------------------------------------- 1 | .BamSampler <- setRefClass("BamSampler", contains="BamFile") 2 | 3 | BamSampler <- 4 | function (file, index = file, ..., yieldSize, obeyQname = FALSE, 5 | asMates = FALSE, qnamePrefixEnd = NA, 6 | qnameSuffixStart = NA) 7 | { 8 | .Deprecated(msg=paste0("'BamSampler' is deprecated. Use 'REDUCEsampler' ", 9 | "in the GenomicFiles package")) 10 | qnamePrefixEnd <- .check_qname_arg(qnamePrefixEnd, "qnamePrefixEnd") 11 | qnameSuffixStart <- .check_qname_arg(qnameSuffixStart, "qnameSuffixStart") 12 | .RsamtoolsFile(.BamSampler, .normalizePath(file), .normalizePath(index), 13 | yieldSize = yieldSize, obeyQname = obeyQname, asMates = asMates, 14 | qnamePrefixEnd = qnamePrefixEnd, qnameSuffixStart = qnameSuffixStart, 15 | ...) 16 | } 17 | 18 | setMethod("scanBam", "BamSampler", 19 | function(file, index=file, ..., 20 | param=ScanBamParam(what=scanBamWhat())) 21 | { 22 | if (0L == length(bamWhat(param)) && 0L == length(bamTag(param))) { 23 | txt <- "no BAM fields selected for input (niether 'bamWhat(param)' 24 | nor 'bamTag(param)' defined)" 25 | stop(paste(strwrap(txt), collapse="\n ")) 26 | } 27 | 28 | sampleSize <- yieldSize(file) 29 | if (is.na(yieldSize(file))) 30 | stop("'yieldSize' must not be NA") 31 | 32 | bfile <- as(file, "BamFile") 33 | open(bfile, "rb") 34 | on.exit(close(bfile)) 35 | 36 | smpl <- S4Vectors:::quick_unlist(unname(scanBam(bfile, param=param))) 37 | tot <- length(smpl[[1]]) 38 | if (tot > sampleSize) { # e.g., ranges 39 | idx <- sample(tot, sampleSize) 40 | smpl <- lapply(smpl, `[`, idx) 41 | } 42 | repeat { 43 | yld <- S4Vectors:::quick_unlist(scanBam(bfile, param=param)) 44 | yld_n <- length(yld[[1]]) 45 | if (length(yld[[1]]) == 0L) 46 | break 47 | tot <- tot + yld_n 48 | keep <- rbinom(1L, yld_n, yld_n / tot) 49 | if (keep == 0L) 50 | next 51 | 52 | i <- sample(sampleSize, keep) 53 | j <- sample(yld_n, keep) 54 | smpl <- Map(function(x, y, i, j) { 55 | x[i] <- y[j] 56 | x 57 | }, smpl, yld, MoreArgs=list(i=i, j=j)) 58 | } 59 | lst <- list(smpl) 60 | attr(lst, "BamSamplerStatistics") <- 61 | c(yieldSize=sampleSize, totalRead=tot, yield=length(smpl[[1]])) 62 | lst 63 | }) 64 | 65 | setMethod(show, "BamSampler", function(object) { 66 | callNextMethod() 67 | }) 68 | -------------------------------------------------------------------------------- /R/methods-PileupFiles.R: -------------------------------------------------------------------------------- 1 | PileupFiles <- 2 | function(files, ..., param=ApplyPileupsParam()) 3 | { 4 | bfl <- BamFileList(files, ...) 5 | new("PileupFiles", bfl, param=param) 6 | } 7 | 8 | plpFiles <- function(object) as(object, "BamFileList") 9 | 10 | plpParam <- function(object) object@param 11 | 12 | setMethod(applyPileups, c("PileupFiles", "ApplyPileupsParam"), 13 | function(files, FUN, ..., param) 14 | { 15 | FUN <- match.fun(FUN) 16 | ok <- isOpen(files) 17 | if (!all(ok)) 18 | if (any(ok)) 19 | stop("all(isOpen())' is not 'TRUE'") 20 | else { 21 | open(files) 22 | on.exit(close(files)) 23 | } 24 | lvls <- lapply(files, seqlevels) 25 | for (i in seq_along(files)[-1]) 26 | if (!identical(lvls[[i]], lvls[[1]])) { 27 | msg <- sprintf("applyPileups 'seqlevels' must be identical(); 28 | failed when comparing %s with %s", 29 | sQuote(basename(path(files)[1])), 30 | sQuote(basename(path(files)[i]))) 31 | stop(paste(strwrap(msg, exdent=4), collapse="\n")) 32 | } 33 | tryCatch({ 34 | param <- as(param, "list") 35 | extptr <- lapply(files, .extptr) 36 | regions <- 37 | if (0L != length(param[["which"]])) .asRegions(param[["which"]]) 38 | else NULL 39 | param[["what"]] <- c("seq", "qual") %in% param[["what"]] 40 | .Call(.apply_pileups, extptr, names(files), regions, param, FUN) 41 | }, error=function(err) { 42 | stop("applyPileups: ", conditionMessage(err), call.=FALSE) 43 | }) 44 | }) 45 | 46 | setMethod(applyPileups, c("PileupFiles", "missing"), 47 | function(files, FUN, ..., param) 48 | { 49 | applyPileups(files, FUN, ..., param=plpParam(files)) 50 | }) 51 | 52 | setMethod(show, "PileupFiles", function(object) { 53 | cat("class:", class(object), "\n") 54 | nms <- names(object) 55 | txt <- paste(S4Vectors:::selectSome(nms, 3L), collapse=", ") 56 | cat(sprintf("names: %s (%d total)\n", txt, length(nms))) 57 | fls <- sapply(object, function(x) basename(path(x))) 58 | txt <- paste(S4Vectors:::selectSome(fls, 3L), collapse=", ") 59 | cat(sprintf("plpFiles: %s (%d total)\n", txt, length(fls))) 60 | cat("plpParam: class", class(plpParam(object)), "\n") 61 | }) 62 | -------------------------------------------------------------------------------- /R/methods-RsamtoolsFile.R: -------------------------------------------------------------------------------- 1 | .extptr <- function(object) object$.extptr 2 | 3 | setMethod(index, "RsamtoolsFile", 4 | function(object, ..., asNA=TRUE) 5 | { 6 | index <- object$index 7 | if (asNA && ((length(index) == 0L) || !nzchar(index))) 8 | NA_character_ 9 | else 10 | index 11 | }) 12 | 13 | setReplaceMethod("index", "RsamtoolsFile", 14 | function(object, ..., value) 15 | { 16 | stopifnot(length(value) == 1L) 17 | object$index <- as.character(value) 18 | object 19 | }) 20 | 21 | setMethod(yieldSize, "RsamtoolsFile", 22 | function(object, ...) 23 | { 24 | object$yieldSize 25 | }) 26 | 27 | setReplaceMethod("yieldSize", "RsamtoolsFile", 28 | function(object, ..., value) 29 | { 30 | if (1L != length(value)) 31 | stop("'value' must be length 1") 32 | object$yieldSize <- as.integer(value) 33 | object 34 | }) 35 | 36 | .RsamtoolsFile <- 37 | function(g, path, index, ..., yieldSize=NA_integer_) 38 | { 39 | if (1L != length(path)) 40 | stop("'file' must be length 1") # argh! public api is 'file' 41 | if (1L < length(index)) 42 | stop("'index' must be length 0 or 1") 43 | if (1L != length(yieldSize)) 44 | stop("'yieldSize' must be length 1") 45 | yieldSize <- as.integer(yieldSize) 46 | if (!(yieldSize > 0L || is.na(yieldSize))) 47 | stop("'yieldSize' must be >0 or NA") 48 | if (length(index) && is.na(index)) 49 | index <- character(0) 50 | g$new(path=.normalizePath(path), index=.normalizePath(index), ..., 51 | yieldSize=yieldSize) 52 | } 53 | 54 | setMethod(path, "RsamtoolsFile", function(object, ...) object$path) 55 | 56 | setMethod(isOpen, "RsamtoolsFile", function(con, rw="") FALSE) 57 | 58 | setMethod(show, "RsamtoolsFile", function(object) { 59 | cat("class:", class(object), "\n") 60 | cat(.ppath("path", path(object))) 61 | cat(.ppath("index", index(object))) 62 | cat("isOpen:", isOpen(object), "\n") 63 | cat("yieldSize:", yieldSize(object), "\n") 64 | }) 65 | -------------------------------------------------------------------------------- /R/methods-RsamtoolsFileList.R: -------------------------------------------------------------------------------- 1 | setGeneric(".RsamtoolsFileList", 2 | function(file, ..., yieldSize=NA_integer_, class) 3 | standardGeneric(".RsamtoolsFileList"), 4 | signature="file") 5 | 6 | setMethod(.RsamtoolsFileList, "missing", 7 | function(file, ..., classDef = class, yieldSize=NA_integer_, class) 8 | { 9 | new(paste0(class, "List")) 10 | }) 11 | 12 | setMethod(.RsamtoolsFileList, "character", 13 | function(file, index, ..., classDef=class, yieldSize=NA_integer_, class) 14 | { 15 | fun <- function(elt, ..., yieldSize, classDef) 16 | do.call(classDef, list(elt, ..., yieldSize=yieldSize)) 17 | if (is.null(names(file))) 18 | names(file) <- basename(file) 19 | listData <- if (!missing(index) && length(index)) 20 | Map(fun, file, as.character(index), ..., 21 | MoreArgs=list(yieldSize=yieldSize, classDef=classDef)) 22 | else if (missing(index)) 23 | Map(fun, file, ..., 24 | MoreArgs=list(yieldSize=yieldSize, classDef=classDef) 25 | ) 26 | else 27 | ## support old index=character() variant 28 | Map(fun, file, ..., MoreArgs=list(index=index, 29 | yieldSize=yieldSize, classDef=classDef)) 30 | new(paste0(class, "List"), listData=listData) 31 | }) 32 | 33 | setMethod(.RsamtoolsFileList, "ANY", 34 | function(file, ..., classDef = class, yieldSize=NA_integer_, class) 35 | { 36 | list <- list(file, ...) 37 | if (length(list) == 1 && (is.list(list[[1L]]) || is(list[[1L]], "List"))) 38 | list <- as.list(list[[1L]]) 39 | new(paste0(class, "List"), listData=list) 40 | }) 41 | 42 | setMethod(.RsamtoolsFileList, "RsamtoolsFile", 43 | function(file, ..., classDef = class, yieldSize=NA_integer_, class) 44 | { 45 | new(paste0(class, "List"), listData=list(file, ...)) 46 | }) 47 | 48 | setMethod(path, "RsamtoolsFileList", 49 | function(object, ...) 50 | { 51 | vapply(object, path, character(1)) 52 | }) 53 | 54 | setMethod(index, "RsamtoolsFileList", 55 | function(object, ...) 56 | { 57 | sapply(object, index, ...) 58 | }) 59 | 60 | setReplaceMethod("index", "RsamtoolsFileList", 61 | function(object, ..., value) 62 | { 63 | stopifnot(length(value) == length(path(object))) 64 | for (i in seq_along(object)) 65 | index(object[[i]]) <- value[i] 66 | object 67 | }) 68 | 69 | setMethod(yieldSize, "RsamtoolsFileList", 70 | function(object, ...) 71 | { 72 | vapply(object, yieldSize, numeric(1)) 73 | }) 74 | 75 | setReplaceMethod("yieldSize", "RsamtoolsFileList", 76 | function(object, ..., value) 77 | { 78 | for (i in seq_along(object)) 79 | yieldSize(object[[i]]) <- value 80 | object 81 | }) 82 | 83 | setMethod(isOpen, "RsamtoolsFileList", 84 | function(con, rw="") 85 | { 86 | sapply(as.list(con), isOpen, rw="read") 87 | }) 88 | 89 | open.RsamtoolsFileList <- 90 | function(con, ...) 91 | { 92 | for (f in as.list(con)) 93 | open(f, ...) 94 | con 95 | } 96 | 97 | close.RsamtoolsFileList <- 98 | function(con, ...) 99 | { 100 | for (f in as.list(con)) 101 | close(f, ...) 102 | con 103 | } 104 | 105 | setMethod(names, "RsamtoolsFileList", 106 | function(x) 107 | { 108 | nms <- callNextMethod() 109 | if (is.null(nms)) 110 | nms <- sapply(x, function(elt) basename(path(elt))) 111 | nms 112 | }) 113 | 114 | ## implementations 115 | 116 | BcfFileList <- function(...) .RsamtoolsFileList(..., class="BcfFile") 117 | 118 | TabixFileList <- function(...) 119 | .RsamtoolsFileList(..., class="TabixFile") 120 | 121 | FaFileList <- function(...) .RsamtoolsFileList(..., class="FaFile") 122 | 123 | ## BamFileList 124 | 125 | setMethod(countBam, "BamFileList", 126 | function(file, index=file, ..., param=ScanBamParam()) 127 | { 128 | counts <- lapply(file, countBam, ..., param=param) 129 | do.call(rbind, counts) 130 | }) 131 | 132 | setMethod(mergeBam, "BamFileList", 133 | function(files, destination, ...) 134 | { 135 | files <- sapply(files, path) 136 | mergeBam(files, destination, ...) 137 | }) 138 | -------------------------------------------------------------------------------- /R/methods-ScanBVcfParam.R: -------------------------------------------------------------------------------- 1 | .ScanBcfParam <- 2 | function(fixed=character(), info=character(), geno=character(), 3 | samples=character(), trimEmpty=TRUE, which, 4 | class="ScanBcfParam") 5 | { 6 | if (1L == length(fixed) && is.na(fixed)) 7 | fixed <- as.character(fixed) 8 | if (1L == length(info) && is.na(info)) 9 | info <- as.character(info) 10 | if (1L == length(geno) && is.na(geno)) 11 | geno <- as.character(geno) 12 | if (1L == length(samples) && is.na(samples)) 13 | samples <- as.character(samples) 14 | new(class, which=which, fixed=fixed, info=info, geno=geno, 15 | samples=samples, trimEmpty=trimEmpty) 16 | } 17 | 18 | ## ScanBcfParam 19 | 20 | setMethod(ScanBcfParam, c(which="missing"), 21 | function(fixed=character(), info=character(), geno=character(), 22 | samples=character(), trimEmpty=TRUE, which, ...) 23 | { 24 | which <- IRangesList() 25 | names(which) <- character() 26 | .ScanBcfParam(fixed, info, geno, samples, trimEmpty, which, ...) 27 | }) 28 | 29 | setMethod(ScanBcfParam, c(which="GRangesList"), 30 | function(fixed=character(), info=character(), geno=character(), 31 | samples=character(), trimEmpty=TRUE, which, ...) 32 | { 33 | .ScanBcfParam(fixed, info, geno, samples, trimEmpty, 34 | which=ranges(which), ...) 35 | }) 36 | 37 | setMethod(ScanBcfParam, c(which="IntegerRangesList"), 38 | function(fixed=character(), info=character(), geno=character(), 39 | samples=character(), trimEmpty=TRUE, which, ...) 40 | { 41 | .ScanBcfParam(fixed, info, geno, samples, trimEmpty, which, ...) 42 | }) 43 | 44 | setMethod(ScanBcfParam, c(which="GRanges"), 45 | function(fixed=character(), info=character(), geno=character(), 46 | samples=character(), trimEmpty=TRUE, which, ...) 47 | { 48 | which <- split(ranges(which), seqnames(which)) 49 | .ScanBcfParam(fixed, info, geno, samples, trimEmpty, which, ...) 50 | }) 51 | 52 | ## accessors 53 | 54 | bcfFixed <- function(object) slot(object, "fixed") 55 | bcfInfo <- function(object) slot(object, "info") 56 | bcfGeno <- function(object) slot(object, "geno") 57 | bcfSamples <- function(object) slot(object, "samples") 58 | bcfTrimEmpty <- function(object) slot(object, "trimEmpty") 59 | bcfWhich <- function(object) slot(object, "which") 60 | 61 | .some <- S4Vectors:::selectSome 62 | setMethod(show, "ScanBVcfParam", function(object) 63 | { 64 | .ptags <- function(tags) { 65 | if (length(tags)) 66 | paste(tags, collapse=", ") 67 | else "character() [All]" 68 | } 69 | .clslbl <- function(lbl) { 70 | cl <- 71 | if ("ScanBcfParam" == class(object)) "bcf" else "vcf" 72 | paste0(cl, lbl) 73 | } 74 | cat("class:", class(object), "\n") 75 | cat(sprintf("%s: %d elements\n", .clslbl("Which"), 76 | length(bcfWhich(object)))) 77 | cat(.clslbl("Fixed:"), .ptags(bcfFixed(object)), "\n") 78 | cat(.clslbl("Info:"), .some(bcfInfo(object)), "\n") 79 | cat(.clslbl("Geno:"), .some(bcfGeno(object)), "\n") 80 | cat(.clslbl("Samples:"), .some(bcfSamples(object)), "\n") 81 | }) 82 | 83 | -------------------------------------------------------------------------------- /R/phred2ASCIIOffset.R: -------------------------------------------------------------------------------- 1 | .ascii_offset <- function() 2 | setNames(33:126 - 33L, strsplit(rawToChar(as.raw(33:126)), "")[[1]]) 3 | 4 | .phred2ascii_int <- 5 | function(x, scheme) 6 | { 7 | ## See https://en.wikipedia.org/wiki/FASTQ_format#Encoding 8 | ascii <- .ascii_offset() 9 | switch(scheme, "Illumina 1.8+" = { 10 | ## L - Illumina 1.8+ Phred+33, raw reads typically (0, 41) 11 | stopifnot(all(x >= 0), all(x <= 41)) 12 | ascii[x + 1L] 13 | }, "Sanger" = { 14 | ## S - Sanger Phred+33, raw reads typically (0, 40) 15 | stopifnot(all(x >= 0), all(x <= 40)) 16 | ascii[x + 1L] 17 | }, "Solexa" = { 18 | ## X - Solexa Solexa+64, raw reads typically (-5, 40) 19 | stopifnot(all(x >= -5), all(x <= 40)) 20 | ascii[x + 32L] 21 | }, "Illumina 1.3+" = { 22 | ## I - Illumina 1.3+ Phred+64, raw reads typically (0, 40) 23 | stopifnot(all(x >= 0), all(x <= 40)) 24 | ascii[x + 32L] 25 | }, "Illumina 1.5+" = { 26 | ## J - Illumina 1.5+ Phred+64, raw reads typically (3, 40) 27 | ## with 0=unused, 1=unused, 2=Read Segment Quality Control Indicator (bold) 28 | ## (Note: See discussion above). 29 | stopifnot(all(x >= 3), all(x <= 40)) 30 | ascii[x + 32L] 31 | }, default = stop("unknown scheme '", scheme, "'")) 32 | } 33 | 34 | .phred2ascii_char <- 35 | function(x) 36 | { 37 | ascii <- .ascii_offset() 38 | stopifnot(all(x %in% names(ascii))) 39 | ascii[x] 40 | } 41 | 42 | phred2ASCIIOffset <- 43 | function(phred=integer(),scheme= c("Illumina 1.8+", "Sanger", "Solexa", 44 | "Illumina 1.3+", "Illumina 1.5+")) 45 | { 46 | if (is.numeric(phred)) { 47 | stopifnot(missing(scheme) || (length(scheme) == 1L), !anyNA(phred)) 48 | scheme <- match.arg(scheme) 49 | phred <- as.integer(phred) 50 | .phred2ascii_int(phred, scheme) 51 | } else if (is.character(phred)) { 52 | if (!missing(scheme)) 53 | message("'scheme' ignored; does not influence ASCII offset") 54 | if (length(phred) == 1L && nchar(phred) > 1L) 55 | phred <- strsplit(phred, "")[[1]] 56 | stopifnot(all(nchar(phred) == 1L)) 57 | .phred2ascii_char(phred) 58 | } else 59 | stop("'phred' must be numeric (coerced to integer) or character") 60 | } 61 | -------------------------------------------------------------------------------- /R/pkgconfig.R: -------------------------------------------------------------------------------- 1 | .build_path <- 2 | function(path) 3 | { 4 | if (.Platform$OS.type == "windows") { 5 | path <- normalizePath(path) 6 | if (grepl(' ', path, fixed=TRUE)) 7 | path <- utils::shortPathName(path) 8 | path <- gsub("\\\\", "/", path) 9 | } 10 | path 11 | } 12 | 13 | .pkgMk <- 14 | function() 15 | { 16 | if (.Platform$OS.type != "windows") 17 | stop(".pkgMk() expects windows operating system") 18 | path <- system.file(package="Rsamtools", "usretc", .Platform$r_arch, 19 | "Rsamtools.mk", mustWork=TRUE) 20 | .build_path(path) 21 | } 22 | 23 | .pkgLd <- 24 | function() 25 | { 26 | path <- system.file(package="Rsamtools", "usrlib", .Platform$r_arch, 27 | mustWork=TRUE) 28 | .build_path(path) 29 | } 30 | -------------------------------------------------------------------------------- /R/readPileup.R: -------------------------------------------------------------------------------- 1 | .readPileup_table <- 2 | function(conn, colClasses, ...) 3 | { 4 | read.table(conn, colClasses=colClasses, 5 | col.names=names(colClasses), sep="\t", header=FALSE, 6 | quote="", comment.char="", fill=TRUE, ...) 7 | } 8 | 9 | .readPileup_indel_idx <- function(df) which(df[[3]] == "*") 10 | 11 | .readPileup_SNP <- 12 | function(file, ..., variant) 13 | { 14 | colClasses <- 15 | c(space="factor", position="integer", 16 | referenceBase="character", consensusBase="character", 17 | consensusQuality="integer", snpQuality="integer", 18 | maxMappingQuality="integer", coverage="integer", "NULL", 19 | "NULL", "NULL", "NULL", "NULL", "NULL", "NULL") 20 | dat <- .readPileup_table(file, colClasses, ...) 21 | idx <- .readPileup_indel_idx(dat) 22 | if (length(idx) > 0L) { 23 | if (variant == "SNP") 24 | idx <- c(idx, idx-1) 25 | dat <- dat[-idx,] 26 | } 27 | GRanges(seqnames=dat[,1], 28 | ranges=IRanges(start=dat[,2],end=dat[,2]), 29 | referenceBase=factor(dat[,3], levels=DNA_ALPHABET), 30 | consensusBase=factor(dat[,4], levels=DNA_ALPHABET), 31 | consensusQuality=dat[,5], 32 | snpQuality=dat[,6], 33 | maxMappingQuality=dat[,7], 34 | coverage=dat[,8]) 35 | } 36 | 37 | .readPileup_indel <- 38 | function(file, ...) 39 | { 40 | colClasses <- c(space="factor", position="integer", 41 | reference="character", consensus="character", 42 | consensusQuality="integer", snpQuality="integer", 43 | maxMappingQuality="integer", coverage="integer", 44 | alleleOne="character", alleleTwo="character", 45 | alleleOneSupport="integer", alleleTwoSupport="integer", 46 | additionalIndels="integer", "NULL", "NULL") 47 | dat <- .readPileup_table(file, colClasses, ...) 48 | idx <- .readPileup_indel_idx(dat) 49 | if (length(idx) != 0L) { 50 | dat0 <- dat[idx-1,] 51 | dat <- dat[idx,] 52 | } else { 53 | dat <- dat0 <- dat[FALSE,] 54 | } 55 | 56 | GRanges(seqnames=dat[,1], 57 | ranges=IRanges(start=dat0[,2],end=dat0[,2]), 58 | referenceBase=factor(dat0[,3], levels=DNA_ALPHABET), 59 | consensusBase=factor(dat0[,4]), 60 | consensusQuality=dat0[,5], 61 | snpQuality=dat0[,6], 62 | maxMappingQuality=dat0[,7], 63 | coverage=dat0[,8], 64 | alleleOne=dat[,9], 65 | alleleOneSupport=dat[,11], 66 | alleleTwo=dat[,10], 67 | alleleTwoSupport=dat[,12], 68 | additionalIndels=dat[,13]) 69 | } 70 | 71 | setMethod(readPileup, "connection", 72 | function(file, ..., variant=c("SNP", "indel", "all")) 73 | { 74 | variant <- match.arg(variant) 75 | switch(variant, SNP=, all=.readPileup_SNP(file=file, ..., 76 | variant=variant), indel=.readPileup_indel(file=file, ...)) 77 | }) 78 | 79 | setMethod(readPileup, "character", function(file, ...) 80 | { 81 | conn <- file(file, "r") 82 | on.exit(close(conn)) 83 | readPileup(conn, ...) 84 | }) 85 | -------------------------------------------------------------------------------- /R/scanBam.R: -------------------------------------------------------------------------------- 1 | .scanBamTemplate <- 2 | function(seqlevels=factor(), tag=character(0)) 3 | { 4 | .Call(.scan_bam_template, seqlevels, tag) 5 | } 6 | 7 | .scanBam_template <- 8 | function(file, param) 9 | { 10 | tmpl <- .scanBamTemplate(factor(levels=seqlevels(file)), bamTag(param)) 11 | ## set those elements of the template that are not 'tag' (treat 12 | ## specially because nested list) nor specified by 'what' 13 | ## parameter to NULL 14 | tmpl[!names(tmpl) %in% c(bamWhat(param), "tag")] <- list(NULL) 15 | if (0L == length(tmpl[["tag"]])) 16 | tmpl["tag"] <- list(NULL) 17 | tmpl 18 | } 19 | 20 | ## return rname:start-end values for outer list elements 21 | .scanBam_extract_which_labels <- function(param) { 22 | which <- bamWhich(param) 23 | if( 0L != length(space(which))) 24 | paste0(space(which), ":", .uunlist(start(which)), "-", 25 | .uunlist(end(which))) 26 | else 27 | NULL 28 | } 29 | 30 | .scanBam_postprocess <- 31 | function(x, param) 32 | { 33 | which <- bamWhich(param) 34 | if (0L != length(space(which))) 35 | names(x) <- 36 | paste0(space(which), ":", .uunlist(start(which)), "-", 37 | .uunlist(end(which))) 38 | lapply(x, Filter, f=Negate(is.null)) 39 | } 40 | 41 | setMethod(scanBam, "character", 42 | function(file, index=file, ..., 43 | param=ScanBamParam(what=scanBamWhat())) 44 | { 45 | if (missing(index) && 0L == length(bamWhich(param))) 46 | index <- character(0) 47 | bam <- open(BamFile(file, index), "rb") 48 | on.exit(close(bam)) 49 | scanBam(bam, ..., param=param) 50 | }) 51 | -------------------------------------------------------------------------------- /R/scanBamHeader.R: -------------------------------------------------------------------------------- 1 | setMethod(scanBamHeader, "character", 2 | function(files, ...) 3 | { 4 | files <- .normalizePath(files) 5 | lst <- lapply(files, function(file) { 6 | bam <- open(BamFile(file, character(0))) 7 | on.exit(close(bam)) 8 | scanBamHeader(bam, ...) 9 | }) 10 | names(lst) <- files 11 | lst 12 | }) 13 | -------------------------------------------------------------------------------- /R/scanBcf.R: -------------------------------------------------------------------------------- 1 | setMethod(scanBcf, "character", 2 | function(file, index=file, ..., param=ScanBcfParam()) 3 | { 4 | bcf <- open(BcfFile(file, index)) 5 | on.exit(close(bcf)) 6 | scanBcf(bcf, ..., param=param) 7 | }) 8 | -------------------------------------------------------------------------------- /R/scanBcfHeader.R: -------------------------------------------------------------------------------- 1 | setMethod(scanBcfHeader, "character", 2 | function(file, ...) 3 | { 4 | Map(function(file, mode) { 5 | bf <- open(BcfFile(file, character(0), ...)) 6 | on.exit(close(bf)) 7 | scanBcfHeader(bf) 8 | }, file, ...) 9 | }) 10 | -------------------------------------------------------------------------------- /R/sortBam.R: -------------------------------------------------------------------------------- 1 | setMethod(sortBam, "character", 2 | function(file, destination, ..., 3 | byQname=FALSE, maxMemory=512, byTag=NULL, nThreads=1L) 4 | { 5 | file <- .normalizePath(file) 6 | destination <- .normalizePath(destination) 7 | result <- .Call(.sort_bam, file, destination, byQname, 8 | as.integer(maxMemory), byTag, as.integer(nThreads)) 9 | destination <- paste(result, "bam", sep=".") 10 | if (!file.exists(destination)) { 11 | msg <- sprintf("'sortBam' failed to create destination '%s'", 12 | destination) 13 | stop(msg) 14 | } 15 | destination 16 | }) 17 | -------------------------------------------------------------------------------- /R/testPairedEndBam.R: -------------------------------------------------------------------------------- 1 | setMethod("testPairedEndBam", "character", 2 | function(file, index=file, ...) 3 | { 4 | bf <- BamFile(file, index) 5 | testPairedEndBam(bf, ...) 6 | }) 7 | 8 | setMethod("testPairedEndBam", "BamFile", 9 | function(file, index=file, ...) 10 | { 11 | yieldSize <- yieldSize(file) 12 | if (is.na(yieldSize)) 13 | yieldSize(file) <- 1000000L 14 | on.exit(yieldSize(file) <- yieldSize) 15 | if (isOpen(file)) 16 | close(file) 17 | open(file) 18 | isPaired <- FALSE 19 | tot <- 0 20 | repeat { 21 | flag <- scanBam(file, param=ScanBamParam(what="flag"))[[1]]$flag 22 | isPaired <- any(bamFlagTest(flag, "isPaired")) 23 | if (isPaired || length(flag) == 0L) 24 | break 25 | tot <- tot + length(flag) 26 | message(tot, " ", appendLF=FALSE) 27 | } 28 | isPaired 29 | }) 30 | -------------------------------------------------------------------------------- /R/utilities.R: -------------------------------------------------------------------------------- 1 | .ppath <- function(tag, filepath) 2 | { 3 | wd <- options('width')[[1]] - nchar(tag) - 6 4 | if(is.na(filepath)) 5 | return(sprintf("%s: %s\n", tag, NA_character_)) 6 | if (0L == length(filepath) || nchar(filepath) < wd) 7 | return(sprintf("%s: %s\n", tag, filepath)) 8 | bname <- basename(filepath) 9 | wd1 <- wd - nchar(bname) 10 | dname <- substr(dirname(filepath), 1, wd1) 11 | sprintf("%s: %s...%s%s\n", 12 | tag, dname, .Platform$file.sep, bname) 13 | } 14 | 15 | .io_check_exists <- 16 | function(files) 17 | { 18 | if (!length(files)) 19 | stop("'files' is length(0)") 20 | idx <- !grepl("^(gs|aws|ftp|http|https)://", files) & !is.na(files) 21 | test <- file.exists(files[idx]) 22 | if (!all(test)) { 23 | msg <- paste0(sQuote(files[idx][!test]), collapse = "\n ") 24 | stop("file(s) do not exist:\n ", msg) 25 | } 26 | } 27 | 28 | .show_classname <- 29 | function(x) cat("class: ", class(x), "\n", sep="") 30 | 31 | .normalizePath <- 32 | function(path) 33 | { 34 | if (is(path, "RsamtoolsFile")) { 35 | path <- path(path) 36 | } else { 37 | path <- as.character(path) 38 | } 39 | idx <- !grepl("^(ftp)|(http)://", path) & !is.na(path) 40 | ## expand ~/, but don't chase links (i.e., don't normalizePath()) 41 | path[idx] <- path.expand(path[idx]) 42 | path 43 | } 44 | 45 | .file.rename <- 46 | function(from, to) 47 | { 48 | warn <- err <- NULL 49 | ok <- withCallingHandlers(tryCatch({ 50 | file.rename(from, to) || 51 | (file.copy(from, to) && file.remove(from)) 52 | }, error=function(e) { 53 | err <<- append(err, conditionMessage(e)) 54 | NULL 55 | }), warning=function(w) { 56 | warn <<- append(warn, conditionMessage(w)) 57 | invokeRestart("muffleWarning") 58 | }) 59 | if (!ok) { 60 | msg <- "file.rename or file.copy/file.remove failed:\n from: %s\n to: %s\n message(s): %s" 61 | stop(sprintf(msg, from, to, paste(c(warn, err), collapse="\n "))) 62 | } 63 | ok 64 | } 65 | 66 | .uunlist <- 67 | function(x) unlist(x, use.names=FALSE) 68 | 69 | setMethod(.asRegions, "IntegerRangesList", function(x) { 70 | list(as.character(space(x)), .uunlist(start(x)), .uunlist(end(x))) 71 | }) 72 | 73 | setMethod(.asRegions, "GRanges", function(x) { 74 | list(as.character(seqnames(x)), start(x), end(x)) 75 | }) 76 | 77 | ### All arguments must be parallel vectors (of length N). 78 | ### The arguments prefixed with 'x_' describe a vector 'x' of N alignments. 79 | ### The arguments prefixed with 'y_' describe a vector 'y' of N alignments. 80 | ### Performs "parallel pairing" of the N alignments in 'x' with the N 81 | ### alignments in 'y'. 82 | .isValidHit <- function(x_flag, x_seqnames, x_start, x_mrnm, x_mpos, 83 | y_flag, y_seqnames, y_start, y_mrnm, y_mpos) 84 | { 85 | .Call(.p_pairing, NULL, x_flag, x_seqnames, x_start, x_mrnm, x_mpos, 86 | NULL, y_flag, y_seqnames, y_start, y_mrnm, y_mpos) 87 | } 88 | 89 | ### 'x_flag', 'x_seqnames', 'x_start', 'x_mrnm', 'x_mpos': parallel vectors 90 | ### (of length N) describing N alignments. The alignments are assumed to 91 | ### be already grouped by QNAME. 92 | ### 'group.sizes': vector of non-negative integers which sum to N. 93 | ### If 'x_qname' was a vector of length N parallel to the 'x_*' arguments 94 | ### and containing the QNAME field, then 'group.sizes' would be 95 | ### 'runLength(Rle(x_qname))'. 96 | ### Returns an integer vector of length N parallel to the 'x_*' arguments. 97 | ### Alignments with more than 1 possible mate are assigned a zero. 98 | ### Those with exactly 1 mate that has itself more than 1 mate are assigned 99 | ### a negative value (the opposite of the index of the mate). 100 | .findMateWithinGroups <- function(group.sizes, 101 | x_flag, x_seqnames, 102 | x_start, x_mrnm, x_mpos) 103 | { 104 | .Call(.find_mate_within_groups, group.sizes, 105 | x_flag, x_seqnames, 106 | x_start, x_mrnm, x_mpos) 107 | } 108 | 109 | -------------------------------------------------------------------------------- /R/zip_compression.R: -------------------------------------------------------------------------------- 1 | bgzipTabix <- 2 | function(fromFname, toFname = paste(fromFname, "gz", sep="."), 3 | overwrite=FALSE) 4 | { 5 | .Defunct("bgzip", package="Rsamtools") 6 | } 7 | 8 | .zip <- 9 | function(func, file, dest, overwrite) 10 | { 11 | file <- .normalizePath(file) 12 | dest <- .normalizePath(dest) 13 | if (!is.character(dest) || 1L != length(dest)) 14 | stop("'dest' must be character(1)") 15 | if (!overwrite && file.exists(dest)) 16 | stop("'dest' exists:\n dest: ", dest) 17 | tryCatch({ 18 | .Call(func, file, dest) 19 | }, error=function(err) { 20 | msg <- sprintf("'%s' error: %s\n file: %s\n dest: %s", 21 | sub(".", "", func, fixed=TRUE), conditionMessage(err), 22 | file, dest) 23 | stop(msg, call.=FALSE) 24 | }) 25 | } 26 | 27 | bgzip <- 28 | function(file, dest = sprintf("%s.bgz", sub("\\.gz$", "", file)), 29 | overwrite=FALSE) 30 | { 31 | .zip(.bgzip, file, dest, overwrite) 32 | } 33 | 34 | 35 | razip <- 36 | function(file, dest = sprintf("%s.rz", sub("\\.gz$", "", file)), 37 | overwrite=FALSE) 38 | { 39 | .Defunct("bgzip") 40 | } 41 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | .STRAND_LEVELS <- c("+", "-", "*") 2 | 3 | .PILEUP_NUCLEOTIDE_LEVELS <- c("A", "C", "G", "T", "N", "=", "-", "+") 4 | 5 | .onLoad <- 6 | function(libname, pkgname) 7 | { 8 | if (!identical(levels(strand()), .STRAND_LEVELS)) 9 | stop("internal: 'levels(strand())' not consistent with Rsamtools") 10 | .Call(.bamfile_init) 11 | .Call(.bcffile_init) 12 | .Call(.fafile_init) 13 | .Call(.tabixfile_init) 14 | .Call(.bambuffer_init) 15 | } 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [](https://bioconductor.org/) 2 | 3 | **Rsamtools** is an R/Bioconductor package that provides an interface to the `samtools`, `bcftools`, and `tabix` utilities for manipulating SAM (Sequence Alignment / Map), FASTA, binary variant call (BCF) and compressed indexed tab-delimited (tabix) files. 4 | 5 | See https://bioconductor.org/packages/Rsamtools for more information including how to install the release version of the package (please refrain from installing directly from GitHub). 6 | 7 | -------------------------------------------------------------------------------- /inst/extdata/ce2dict1.fa: -------------------------------------------------------------------------------- 1 | >pattern01 2 | GCGAAACTAGGAGAGGCT 3 | >pattern02 4 | CTGTTAGCTAATTTTAAAAATAAAT 5 | >pattern03 6 | ACTACCACCCAAATTTAGATATTC 7 | >pattern04 8 | AAATTTTTTTTGTTGCAAATTTGA 9 | >pattern05 10 | TCTTCTTGGCTTTGGTGGTACTTTT 11 | -------------------------------------------------------------------------------- /inst/extdata/ce2dict1.fa.fai: -------------------------------------------------------------------------------- 1 | pattern01 18 11 18 19 2 | pattern02 25 41 25 26 3 | pattern03 24 78 24 25 4 | pattern04 24 114 24 25 5 | pattern05 25 150 25 26 6 | -------------------------------------------------------------------------------- /inst/extdata/ex1.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/extdata/ex1.bam -------------------------------------------------------------------------------- /inst/extdata/ex1.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/extdata/ex1.bam.bai -------------------------------------------------------------------------------- /inst/extdata/ex1.bcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/extdata/ex1.bcf.gz -------------------------------------------------------------------------------- /inst/extdata/ex1.bcf.gz.csi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/extdata/ex1.bcf.gz.csi -------------------------------------------------------------------------------- /inst/extdata/ex1.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/extdata/ex1.vcf.gz -------------------------------------------------------------------------------- /inst/extdata/ex1.vcf.gz.csi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/extdata/ex1.vcf.gz.csi -------------------------------------------------------------------------------- /inst/extdata/example.gtf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/extdata/example.gtf.gz -------------------------------------------------------------------------------- /inst/extdata/example.gtf.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/extdata/example.gtf.gz.tbi -------------------------------------------------------------------------------- /inst/extdata/example_from_SAM_Spec.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/extdata/example_from_SAM_Spec.bam -------------------------------------------------------------------------------- /inst/extdata/example_from_SAM_Spec.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/extdata/example_from_SAM_Spec.bam.bai -------------------------------------------------------------------------------- /inst/extdata/example_from_SAM_Spec.sam: -------------------------------------------------------------------------------- 1 | @HD VN:1.3 SO:coordinate 2 | @SQ SN:ref LN:45 3 | r001 163 ref 7 30 8M2I4M1D3M = 37 39 TTAGATAAAGGATACTG * 4 | r002 0 ref 9 30 3S6M1P1I4M * 0 0 AAAAGATAAGGATA * 5 | r003 0 ref 9 30 5H6M * 0 0 AGCTAA * NM:i:1 6 | r004 0 ref 16 30 6M14N5M * 0 0 ATAGCTTCAGC * 7 | r004a 0 ref 16 30 6=14N5= * 0 0 ATAGCTTCAGC * 8 | r003 16 ref 29 30 6H5M * 0 0 TAGGC * NM:i:0 9 | r001 83 ref 37 30 9M = 7 -39 CAGCGCCAT * 10 | -------------------------------------------------------------------------------- /inst/extdata/no_which_buffered_pileup.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/extdata/no_which_buffered_pileup.bam -------------------------------------------------------------------------------- /inst/extdata/no_which_buffered_pileup.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/extdata/no_which_buffered_pileup.bam.bai -------------------------------------------------------------------------------- /inst/extdata/no_which_buffered_pileup.sam: -------------------------------------------------------------------------------- 1 | @HD VN:1.3 SO:coordinate 2 | @SQ SN:chr1 LN:15 3 | @SQ SN:chr2 LN:15 4 | B7 0 chr1 1 99 5M * 0 0 AAAAA ===== 5 | B7 0 chr1 3 99 5M * 0 0 AAAAA ===== 6 | B7 0 chr1 3 99 5M * 0 0 CCCCC ===== 7 | B7 0 chr1 5 99 5M * 0 0 AAAAA ===== 8 | B7 0 chr2 5 99 5M * 0 0 GGGGG ===== -------------------------------------------------------------------------------- /inst/extdata/no_which_whole_file.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/extdata/no_which_whole_file.bam -------------------------------------------------------------------------------- /inst/extdata/no_which_whole_file.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/extdata/no_which_whole_file.bam.bai -------------------------------------------------------------------------------- /inst/extdata/no_which_whole_file.sam: -------------------------------------------------------------------------------- 1 | @HD VN:1.3 SO:coordinate 2 | @SQ SN:chr1 LN:15 3 | @SQ SN:chr2 LN:15 4 | B7 0 chr1 1 99 5M * 0 0 AAAAA ===== 5 | B7 0 chr1 3 99 5M * 0 0 ACCCC ===== 6 | B7 0 chr2 1 99 5M * 0 0 AAACC ===== -------------------------------------------------------------------------------- /inst/extdata/olaps.Rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/extdata/olaps.Rda -------------------------------------------------------------------------------- /inst/extdata/querybins.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/extdata/querybins.bam -------------------------------------------------------------------------------- /inst/extdata/querybins.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/extdata/querybins.bam.bai -------------------------------------------------------------------------------- /inst/extdata/querybins.sam: -------------------------------------------------------------------------------- 1 | @HD VN:1.3 SO:coordinate 2 | @SQ SN:sim LN:15 3 | B7 0 sim 1 99 6M * 0 0 ACGT=N ====== 4 | B7 16 sim 7 99 6M * 0 0 ACGT=N ====== 5 | -------------------------------------------------------------------------------- /inst/extdata/revbins.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/extdata/revbins.bam -------------------------------------------------------------------------------- /inst/extdata/revbins.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/extdata/revbins.bam.bai -------------------------------------------------------------------------------- /inst/extdata/revbins.sam: -------------------------------------------------------------------------------- 1 | @HD VN:1.3 SO:coordinate 2 | @SQ SN:simple LN:15 3 | @SQ SN:bistag LN:15 4 | @SQ SN:tristag LN:15 5 | @SQ SN:inf LN:15 6 | @SQ SN:diffstr LN:15 7 | B7 0 simple 1 99 1M * 0 0 = = 8 | B7 0 simple 1 99 2M * 0 0 A= == 9 | B7 0 simple 1 99 3M * 0 0 CA= === 10 | B7 0 simple 1 99 4M * 0 0 GCA= ==== 11 | B7 0 simple 1 99 5M * 0 0 TGCA= ===== 12 | B7 0 bistag 1 99 2M * 0 0 A= == 13 | B7 0 bistag 2 99 2M * 0 0 C= == 14 | B7 0 bistag 3 99 2M * 0 0 G= == 15 | B7 0 bistag 4 99 2M * 0 0 T= == 16 | B7 0 bistag 5 99 2M * 0 0 N= == 17 | B7 0 tristag 1 99 2M * 0 0 A= == 18 | B7 0 tristag 2 99 3M * 0 0 C=C === 19 | B7 0 tristag 3 99 3M * 0 0 G=G === 20 | B7 0 tristag 4 99 3M * 0 0 T=T === 21 | B7 0 tristag 5 99 3M * 0 0 N=N === 22 | B7 0 inf 1 99 6M * 0 0 AAAAA= ====== 23 | B7 0 inf 3 99 5M * 0 0 CCCC= ===== 24 | B7 0 diffstr 1 99 3M * 0 0 ACG === 25 | B7 16 diffstr 2 99 3M * 0 0 T=N === 26 | -------------------------------------------------------------------------------- /inst/extdata/samtools-github.txt: -------------------------------------------------------------------------------- 1 | 44428cd bcftools: version bump 2 | -------------------------------------------------------------------------------- /inst/extdata/slxMaq09_urls.txt: -------------------------------------------------------------------------------- 1 | ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA06986/alignment/NA06986.SLX.maq.SRP000031.2009_08.bam 2 | ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA06994/alignment/NA06994.SLX.maq.SRP000031.2009_08.bam 3 | ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA07051/alignment/NA07051.SLX.maq.SRP000031.2009_08.bam 4 | ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA07346/alignment/NA07346.SLX.maq.SRP000031.2009_08.bam 5 | ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA07347/alignment/NA07347.SLX.maq.SRP000031.2009_08.bam 6 | ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA10847/alignment/NA10847.SLX.maq.SRP000031.2009_08.bam 7 | ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA11831/alignment/NA11831.SLX.maq.SRP000031.2009_08.bam 8 | ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA11918/alignment/NA11918.SLX.maq.SRP000031.2009_08.bam 9 | ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA11920/alignment/NA11920.SLX.maq.SRP000031.2009_08.bam 10 | ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA11993/alignment/NA11993.SLX.maq.SRP000031.2009_08.bam 11 | ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA11995/alignment/NA11995.SLX.maq.SRP000031.2009_08.bam 12 | ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA12045/alignment/NA12045.SLX.maq.SRP000031.2009_08.bam 13 | ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA12144/alignment/NA12144.SLX.maq.SRP000031.2009_08.bam 14 | ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA12154/alignment/NA12154.SLX.maq.SRP000031.2009_08.bam 15 | ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA12249/alignment/NA12249.SLX.maq.SRP000031.2009_08.bam 16 | ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA12414/alignment/NA12414.SLX.maq.SRP000031.2009_08.bam 17 | ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA12716/alignment/NA12716.SLX.maq.SRP000031.2009_08.bam 18 | ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA12717/alignment/NA12717.SLX.maq.SRP000031.2009_08.bam 19 | ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA12749/alignment/NA12749.SLX.maq.SRP000031.2009_08.bam 20 | ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA12750/alignment/NA12750.SLX.maq.SRP000031.2009_08.bam 21 | ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA12751/alignment/NA12751.SLX.maq.SRP000031.2009_08.bam 22 | ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA12776/alignment/NA12776.SLX.maq.SRP000031.2009_08.bam 23 | ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA12828/alignment/NA12828.SLX.maq.SRP000031.2009_08.bam 24 | ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA12878/alignment/NA12878.SLX.maq.SRP000031.2009_08.bam 25 | -------------------------------------------------------------------------------- /inst/extdata/tagfilter.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/extdata/tagfilter.bam -------------------------------------------------------------------------------- /inst/extdata/tagfilter.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/extdata/tagfilter.bam.bai -------------------------------------------------------------------------------- /inst/extdata/tagfilter.sam: -------------------------------------------------------------------------------- 1 | @HD VN:1.3 SO:coordinate 2 | @SQ SN:notags LN:15 3 | @SQ SN:twotags LN:15 4 | @SQ SN:itag LN:15 5 | @SQ SN:Atag LN:15 6 | @SQ SN:Ztag LN:15 7 | @SQ SN:Ftag LN:15 8 | @SQ SN:Htag LN:15 9 | @SQ SN:Btag LN:15 10 | B7 0 notags 1 99 5M * 0 0 ACNTA ===== 11 | B7 0 twotags 1 99 5M * 0 0 ACNTA ===== II:i:45 AA:A:d 12 | B7 0 itag 1 99 5M * 0 0 ACNTA ===== II:i:42 13 | B7 0 itag 1 99 5M * 0 0 ACNTA ===== II:i:43 14 | B7 0 itag 1 99 5M * 0 0 ACNTA ===== II:i:44 15 | B7 0 Atag 1 99 5M * 0 0 ACNTA ===== AA:A:a 16 | B7 0 Atag 1 99 5M * 0 0 ACNTA ===== AA:A:b 17 | B7 0 Atag 1 99 5M * 0 0 ACNTA ===== AA:A:c 18 | B7 0 Ztag 1 99 5M * 0 0 ACNTA ===== ZZ:Z:woo 19 | B7 0 Ztag 1 99 5M * 0 0 ACNTA ===== ZZ:Z:won 20 | B7 0 Ztag 1 99 5M * 0 0 ACNTA ===== ZZ:Z:wow 21 | B7 0 Ftag 1 99 5M * 0 0 ACNTA ===== FF:f:13.6 22 | B7 0 Htag 1 99 5M * 0 0 ACNTA ===== HH:H:1AE301 23 | B7 0 Btag 1 99 5M * 0 0 ACNTA ===== BB:B:i42,43 24 | -------------------------------------------------------------------------------- /inst/extdata/tiny.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/extdata/tiny.bam -------------------------------------------------------------------------------- /inst/extdata/tiny.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/extdata/tiny.bam.bai -------------------------------------------------------------------------------- /inst/extdata/tiny.sam: -------------------------------------------------------------------------------- 1 | @HD VN:1.3 SO:coordinate 2 | @SQ SN:seq1 LN:15 3 | @SQ SN:seq2 LN:15 4 | @SQ SN:diff_strands LN:15 5 | @SQ SN:filter_strands LN:15 6 | @SQ SN:min_mapq LN:1 7 | @SQ SN:min_base_qual LN:3 8 | @SQ SN:max_depth LN:1 9 | @SQ SN:coll_nucs_basic LN:15 10 | @SQ SN:coll_nucs_adv LN:15 11 | @SQ SN:dist_all LN:1 12 | @SQ SN:full_seq_range LN:27 13 | @SQ SN:Biostrings_DNA_ALPHABET LN:18 14 | @SQ SN:ext_cigar_hard LN:15 15 | @SQ SN:ext_cigar_soft LN:15 16 | @SQ SN:ref_skip LN:15 17 | @SQ SN:include_deletions LN:15 18 | @SQ SN:min_nuc_depth_mono LN:15 19 | @SQ SN:min_nuc_depth_poly LN:15 20 | @SQ SN:min_minor_allele_depth LN:15 21 | @SQ SN:multi_range_single_rname LN:15 22 | @SQ SN:seqnames_from_tinysam_c LN:15 23 | @SQ SN:all_nuc_levels LN:15 24 | @SQ SN:bins_5 LN:5 25 | @SQ SN:ins1 LN:15 26 | @SQ SN:ins3 LN:15 27 | @SQ SN:ins_multiread LN:15 28 | @SQ SN:ins_base_disqualifiers LN:15 29 | @SQ SN:ins_deletion LN:15 30 | @SQ SN:ins_refskip LN:15 31 | B7 0 ins_refskip 1 99 1M1N1I1M * 0 0 AGC === 32 | B7 0 ins_deletion 1 99 1M1D1I1M * 0 0 AGC === 33 | B7 0 ins_base_disqualifiers 1 99 1M1I * 0 0 NT ++ 34 | B7 0 ins_multiread 1 99 1M1I1M1I1M * 0 0 TGGGT ===== 35 | B7 0 ins_multiread 1 99 1M1I1M1I1M * 0 0 TTTTT ===== 36 | B7 0 ins3 1 99 1M3I1M * 0 0 AGGGC ===== 37 | B7 0 ins1 1 99 1M1I1M * 0 0 AGC === 38 | B7 0 bins_5 1 99 5M * 0 0 ACGT= ===== 39 | B7 0 all_nuc_levels 1 99 2M1D4M * 0 0 ACGTN= ====== 40 | B7 0 seqnames_from_tinysam_c 1 99 1M * 0 0 = = 41 | B7 0 multi_range_single_rname 1 99 4M * 0 0 ACGT ==== 42 | B7 0 min_minor_allele_depth 1 99 1M * 0 0 C = 43 | B7 0 min_minor_allele_depth 1 99 1M * 0 0 A = 44 | B7 0 min_minor_allele_depth 1 99 1M * 0 0 A = 45 | B7 0 min_minor_allele_depth 1 99 1M * 0 0 A = 46 | B7 0 min_nuc_depth_poly 1 99 2M * 0 0 AC == 47 | B7 0 min_nuc_depth_poly 1 99 2M * 0 0 AG == 48 | B7 0 min_nuc_depth_mono 1 99 1M * 0 0 A = 49 | B7 0 include_deletions 1 99 1M1D1M * 0 0 AA == 50 | B7 0 ref_skip 1 99 1M1N1M * 0 0 AA == 51 | B7 0 ext_cigar_soft 1 99 1S2M2S3M * 0 0 tACttGTN ======== 52 | B7 0 ext_cigar_hard 1 99 1H2M2H3M * 0 0 ACGTN ===== 53 | B7 0 seq1 1 99 5M * 0 0 ACGTA ===== 54 | B7 16 seq2 1 99 5M * 0 0 ACNTA ===== 55 | B7 0 filter_strands 1 99 1M * 0 0 C = 56 | B7 16 filter_strands 1 99 1M * 0 0 C = 57 | B7 0 diff_strands 1 99 5M * 0 0 CCCCC ===== 58 | B7 16 diff_strands 1 99 5M * 0 0 CCCCC ===== 59 | B7 0 min_mapq 1 10 1M * 0 0 A = 60 | B7 0 min_mapq 1 20 1M * 0 0 A = 61 | B7 0 min_mapq 1 30 1M * 0 0 A = 62 | B7 0 min_base_qual 1 99 3M * 0 0 CCC !+5 63 | B7 0 max_depth 1 99 1M * 0 0 A = 64 | B7 0 max_depth 1 99 1M * 0 0 A = 65 | B7 0 max_depth 1 99 1M * 0 0 A = 66 | B7 0 coll_nucs_basic 1 99 5M * 0 0 CCCCC ===== 67 | B7 0 coll_nucs_basic 1 99 5M * 0 0 CCCCC ===== 68 | B7 0 coll_nucs_adv 1 99 1M * 0 0 G = 69 | B7 0 coll_nucs_adv 1 99 1M * 0 0 A = 70 | B7 0 coll_nucs_adv 1 99 1M * 0 0 T = 71 | B7 0 coll_nucs_adv 1 99 1M * 0 0 G = 72 | B7 0 dist_all 1 99 1M * 0 0 A = 73 | B7 16 dist_all 1 99 1M * 0 0 C = 74 | B7 0 dist_all 1 99 1M * 0 0 G = 75 | B7 16 dist_all 1 99 1M * 0 0 T = 76 | B7 0 dist_all 1 99 1M * 0 0 C = 77 | B7 16 dist_all 1 99 1M * 0 0 G = 78 | B7 0 dist_all 1 99 1M * 0 0 T = 79 | B7 16 dist_all 1 99 1M * 0 0 A = 80 | B7 0 dist_all 1 99 1M * 0 0 G = 81 | B7 16 dist_all 1 99 1M * 0 0 T = 82 | B7 0 dist_all 1 99 1M * 0 0 A = 83 | B7 16 dist_all 1 99 1M * 0 0 C = 84 | B7 0 dist_all 1 99 1M * 0 0 T = 85 | B7 16 dist_all 1 99 1M * 0 0 A = 86 | B7 0 dist_all 1 99 1M * 0 0 C = 87 | B7 16 dist_all 1 99 1M * 0 0 G = 88 | B7 0 dist_all 1 99 1M * 0 0 A = 89 | B7 16 dist_all 1 99 1M * 0 0 C = 90 | B7 0 dist_all 1 99 1M * 0 0 G = 91 | B7 16 dist_all 1 99 1M * 0 0 T = 92 | B7 0 dist_all 1 99 1M * 0 0 A = 93 | B7 0 dist_all 2 99 1M * 0 0 A = 94 | B7 0 dist_all 1 99 2M * 0 0 AA == 95 | B7 0 dist_all 2 99 2M * 0 0 AA == 96 | B7 0 dist_all 1 99 3M * 0 0 AAA === 97 | B7 0 dist_all 2 99 3M * 0 0 AAA === 98 | B7 0 dist_all 1 99 4M * 0 0 AAAA ==== 99 | B7 0 dist_all 2 99 4M * 0 0 AAAA ==== 100 | B7 0 dist_all 1 99 5M * 0 0 AAAAA ===== 101 | B7 0 dist_all 2 99 5M * 0 0 AAAAA ===== 102 | B7 16 dist_all 1 99 1M * 0 0 A = 103 | B7 16 dist_all 2 99 1M * 0 0 A = 104 | B7 16 dist_all 1 99 2M * 0 0 AA == 105 | B7 16 dist_all 2 99 2M * 0 0 AA == 106 | B7 16 dist_all 1 99 3M * 0 0 AAA === 107 | B7 16 dist_all 2 99 3M * 0 0 AAA === 108 | B7 16 dist_all 1 99 4M * 0 0 AAAA ==== 109 | B7 16 dist_all 2 99 4M * 0 0 AAAA ==== 110 | B7 16 dist_all 1 99 5M * 0 0 AAAAA ===== 111 | B7 16 dist_all 2 99 5M * 0 0 AAAAA ===== 112 | B7 0 full_seq_range 1 99 28M * 0 0 ABCDEFGHIJKLMNOPQRSTUVWXYZ=. ============================ 113 | B7 0 Biostrings_DNA_ALPHABET 1 99 18M * 0 0 ACGTMRWSYKVHDBN-+. ================== 114 | -------------------------------------------------------------------------------- /inst/extdata/tophat/README: -------------------------------------------------------------------------------- 1 | output from tophat-1.0.10 test data 2 | -------------------------------------------------------------------------------- /inst/extdata/tophat/coverage.wig: -------------------------------------------------------------------------------- 1 | track type=bedGraph name="TopHat - read coverage" 2 | test_chromosome 0 52 0 3 | test_chromosome 52 53 1 4 | test_chromosome 53 54 2 5 | test_chromosome 54 55 3 6 | test_chromosome 55 57 4 7 | test_chromosome 57 58 6 8 | test_chromosome 58 60 7 9 | test_chromosome 60 65 8 10 | test_chromosome 65 70 9 11 | test_chromosome 70 71 10 12 | test_chromosome 71 72 11 13 | test_chromosome 72 75 12 14 | test_chromosome 75 77 13 15 | test_chromosome 77 79 15 16 | test_chromosome 79 82 16 17 | test_chromosome 82 85 17 18 | test_chromosome 85 87 19 19 | test_chromosome 87 88 20 20 | test_chromosome 88 90 21 21 | test_chromosome 90 91 22 22 | test_chromosome 91 93 23 23 | test_chromosome 93 95 26 24 | test_chromosome 95 97 28 25 | test_chromosome 97 99 30 26 | test_chromosome 99 100 31 27 | test_chromosome 100 101 35 28 | test_chromosome 101 102 36 29 | test_chromosome 102 105 38 30 | test_chromosome 105 106 39 31 | test_chromosome 106 107 40 32 | test_chromosome 107 112 43 33 | test_chromosome 112 114 44 34 | test_chromosome 114 118 45 35 | test_chromosome 118 121 46 36 | test_chromosome 121 122 48 37 | test_chromosome 122 124 50 38 | test_chromosome 124 126 53 39 | test_chromosome 126 129 54 40 | test_chromosome 129 130 53 41 | test_chromosome 130 131 55 42 | test_chromosome 131 132 57 43 | test_chromosome 132 133 55 44 | test_chromosome 133 134 54 45 | test_chromosome 134 135 55 46 | test_chromosome 135 136 54 47 | test_chromosome 136 137 56 48 | test_chromosome 137 138 57 49 | test_chromosome 138 141 60 50 | test_chromosome 141 147 62 51 | test_chromosome 147 148 61 52 | test_chromosome 148 150 62 53 | test_chromosome 150 152 61 54 | test_chromosome 152 153 60 55 | test_chromosome 153 154 61 56 | test_chromosome 154 155 63 57 | test_chromosome 155 156 64 58 | test_chromosome 156 158 65 59 | test_chromosome 158 159 66 60 | test_chromosome 159 160 68 61 | test_chromosome 160 161 67 62 | test_chromosome 161 163 68 63 | test_chromosome 163 164 69 64 | test_chromosome 164 165 71 65 | test_chromosome 165 167 73 66 | test_chromosome 167 168 74 67 | test_chromosome 168 169 72 68 | test_chromosome 169 170 75 69 | test_chromosome 170 171 73 70 | test_chromosome 171 172 74 71 | test_chromosome 172 174 75 72 | test_chromosome 174 175 77 73 | test_chromosome 175 176 74 74 | test_chromosome 176 177 73 75 | test_chromosome 177 181 71 76 | test_chromosome 181 182 70 77 | test_chromosome 182 183 67 78 | test_chromosome 183 184 68 79 | test_chromosome 184 187 69 80 | test_chromosome 187 189 68 81 | test_chromosome 189 193 67 82 | test_chromosome 193 194 68 83 | test_chromosome 194 196 69 84 | test_chromosome 196 197 67 85 | test_chromosome 197 199 65 86 | test_chromosome 199 200 63 87 | test_chromosome 200 201 64 88 | test_chromosome 201 202 63 89 | test_chromosome 202 205 62 90 | test_chromosome 205 207 60 91 | test_chromosome 207 209 61 92 | test_chromosome 209 211 60 93 | test_chromosome 211 213 59 94 | test_chromosome 213 216 57 95 | test_chromosome 216 220 55 96 | test_chromosome 220 221 54 97 | test_chromosome 221 224 53 98 | test_chromosome 224 225 54 99 | test_chromosome 225 228 56 100 | test_chromosome 228 229 55 101 | test_chromosome 229 230 52 102 | test_chromosome 230 231 51 103 | test_chromosome 231 232 50 104 | test_chromosome 232 233 49 105 | test_chromosome 233 234 48 106 | test_chromosome 234 235 47 107 | test_chromosome 235 236 46 108 | test_chromosome 236 237 45 109 | test_chromosome 237 238 44 110 | test_chromosome 238 239 42 111 | test_chromosome 239 240 40 112 | test_chromosome 240 241 37 113 | test_chromosome 241 244 36 114 | test_chromosome 244 246 33 115 | test_chromosome 246 247 34 116 | test_chromosome 247 249 31 117 | test_chromosome 249 250 28 118 | test_chromosome 250 349 0 119 | test_chromosome 349 350 1 120 | test_chromosome 350 351 30 121 | test_chromosome 351 352 32 122 | test_chromosome 352 353 34 123 | test_chromosome 353 354 35 124 | test_chromosome 354 357 36 125 | test_chromosome 357 358 37 126 | test_chromosome 358 361 36 127 | test_chromosome 361 365 37 128 | test_chromosome 365 368 38 129 | test_chromosome 368 370 37 130 | test_chromosome 370 372 38 131 | test_chromosome 372 378 41 132 | test_chromosome 378 380 40 133 | test_chromosome 380 381 39 134 | test_chromosome 381 382 37 135 | test_chromosome 382 386 36 136 | test_chromosome 386 387 35 137 | test_chromosome 387 388 34 138 | test_chromosome 388 390 33 139 | test_chromosome 390 398 32 140 | test_chromosome 398 399 31 141 | test_chromosome 399 400 30 142 | test_chromosome 400 402 1 143 | test_chromosome 402 500 0 144 | test_chromosome 500 509 27 145 | test_chromosome 509 517 26 146 | test_chromosome 517 518 25 147 | test_chromosome 518 521 24 148 | test_chromosome 521 524 22 149 | test_chromosome 524 525 21 150 | test_chromosome 525 526 19 151 | test_chromosome 526 527 17 152 | test_chromosome 527 528 15 153 | test_chromosome 528 529 14 154 | test_chromosome 529 530 13 155 | test_chromosome 530 532 12 156 | test_chromosome 532 534 11 157 | test_chromosome 534 536 10 158 | test_chromosome 536 540 9 159 | test_chromosome 540 543 8 160 | test_chromosome 543 544 7 161 | test_chromosome 544 545 6 162 | test_chromosome 545 547 5 163 | test_chromosome 547 549 2 164 | test_chromosome 549 549 1 165 | -------------------------------------------------------------------------------- /inst/extdata/tophat/junctions.bed: -------------------------------------------------------------------------------- 1 | track name=junctions description="TopHat junctions" 2 | test_chromosome 180 402 JUNC00000001 27 + 180 402 255,0,0 2 70,52 0,170 3 | test_chromosome 349 550 JUNC00000002 27 + 349 550 255,0,0 2 51,50 0,151 4 | -------------------------------------------------------------------------------- /inst/scripts/BamViews-1000g.R: -------------------------------------------------------------------------------- 1 | setwd("/home/mtmorgan/proj/a/1000g/slx_maq_09_index_files") 2 | ftpBase = "ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/" 3 | if (length(list.files(pattern=".*bai")) == 0) 4 | { 5 | library(RCurl) 6 | indivs <- strsplit(getURL(ftpBase, ftplistonly=TRUE), "\n")[[1]] 7 | aln <- paste(ftpBase, indivs, "/alignment/", sep="") 8 | 9 | urls <- strsplit(getURL(aln, dirlistonly=TRUE), "\n") 10 | 11 | urls0 <- urls[sapply(urls, length) != 0] 12 | 13 | fls0 <- unlist(unname(urls0)) 14 | fls1 <- fls0[grepl("bai$", fls0)] 15 | fls <- fls1[sapply(strsplit(fls, "\\."), length)==7] 16 | m <- t(as.data.frame(strsplit(fls, "\\.")))[,1:5] 17 | dimnames(m) <- list(NULL, 18 | c("Individual", "Platform", "Alignment", "SRA", "Date")) 19 | df <- cbind(as.data.frame(m), File=fls) 20 | xtabs(~Platform+Alignment+Date, df) 21 | ## > xtabs(~Platform+Alignment, df) 22 | ## Alignment 23 | ## Platform corona maq MOSAIK ssaha 24 | ## 454 0 0 9 14 25 | ## SLX 0 25 11 0 26 | ## SOLID 7 0 0 0 27 | 28 | urls1 <- Filter(function(x) length(x) != 0, 29 | lapply(urls, function(x) x[grepl("SLX.maq.*2009_08.*bai$", x)])) 30 | slxMaq09 <- mapply(paste, names(urls1), urls1, sep="", USE.NAMES=FALSE) 31 | 32 | mapply(download.file, slxMaq09, basename(slxMaq09), MoreArgs=list(method="curl")) 33 | } 34 | indexFiles <- list.files(pattern="bai$") 35 | slxMaq09 <- paste(ftpBase, sub("^([^\\.]+).*$", "\\1", indexFiles), 36 | "/alignment/", sub("\\.bai$", "", indexFiles), sep="") 37 | ## headers <- scanBamHeader(fls) # nothing useful here :( 38 | 39 | ## Some regions of interest: genes involved in caffeine metabolism 40 | library(KEGG.db) 41 | library(org.Hs.eg.db) 42 | library(BSgenome.Hsapiens.UCSC.hg18) 43 | library(biomaRt) 44 | library(GenomicFeatures) 45 | kid <- revmap(KEGGPATHID2NAME)[["Caffeine metabolism"]] 46 | egid <- KEGGPATHID2EXTID[[sprintf("hsa%s", kid)]] 47 | 48 | mart <- useMart("ensembl", "hsapiens_gene_ensembl") 49 | ensid <- getBM(c("ensembl_transcript_id"), filters="entrezgene", 50 | values=egid, mart=mart)[[1]] 51 | txdb <- makeTxDbFromBiomart(transcript_ids=ensid) 52 | saveFeatures(txdb, "caffeine-txdb.sqlite") 53 | 54 | egid <- egid[egid != "9"] # multiple locations 55 | tbl <- merge(toTable(org.Hs.egCHRLOC[egid]), toTable(org.Hs.egCHRLOCEND[egid])) 56 | rng <- with(tbl, { 57 | lvls <- sprintf("chr%s", sort(as.integer(unique(Chromosome)))) 58 | rng0 <- GRanges(Chromosome, 59 | IRanges(start=abs(start_location), end=abs(end_location)), 60 | strand=ifelse(start_location>=0, "+", "-"), 61 | EntrezId=gene_id, 62 | Genename=mappedRkeys(org.Hs.egGENENAME[gene_id])) 63 | metadata(rng0) <- list(Genome="hg18?") 64 | rng0 65 | }) 66 | 67 | library(GenomicAlignments) 68 | library(multicore) 69 | bv <- BamViews(fls, sub(".bai", "", indexFiles), 70 | bamRanges=GRanges(seqnames=seqnames(rng), 71 | ranges=ranges(rng), 72 | EntrezId=mcols(rng)[["EntrezId"]])) 73 | gapped <- readGAlignments(bv[1:2,1:3]) 74 | -------------------------------------------------------------------------------- /inst/scripts/features.R: -------------------------------------------------------------------------------- 1 | fl0 = "ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA19240/alignment/NA19240.chrom6.SLX.maq.SRP000032.2009_07.bam" 2 | 3 | dir <- getwd() 4 | fl <- file.path(dir, "NA19240.chrom6.SLX.maq.SRP000032.2009_07.bam") 5 | stopifnot(file.exists(paste(fl, "bai", sep="."))) # require local index 6 | 7 | library(GenomicFeatures) 8 | library(IRanges) 9 | data(geneHuman) 10 | transcripts <- transcripts(geneHuman, proximal=300) 11 | 12 | library(Rsamtools) 13 | ## remote access 14 | chr6a0 <- ranges(transcripts)[["chr6"]][1:2] 15 | p10 <- ScanBamParam(which=IRangesList(`6`=chr6a0)) 16 | (cnt0 <- countBam(fl0, fl0, param=p10)) 17 | sum(cnt0$records) 18 | system.time(res0 <- scanBam(fl0, fl0, param=p10)) 19 | 20 | 21 | ## local 22 | if (file.exists(fl)) { 23 | cnt <- countBam(fl, param=p10); sum(cnt$records) 24 | system.time(res0 <- scanBam(fl, param=p10)) 25 | 26 | ## larger, local 27 | chr6a <- ranges(transcripts)[["chr6"]][1:50] 28 | p1 <- ScanBamParam(which=IRangesList(`6`=chr6a)) 29 | sum(countBam(fl, param=p1)$records) 30 | system.time(res <- scanBam(fl, param=p1)) # about 30s 31 | } 32 | -------------------------------------------------------------------------------- /inst/scripts/remote_test.R: -------------------------------------------------------------------------------- 1 | suppressMessages(library(Rsamtools)) 2 | fl = "ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA19240/alignment/NA19240.chrom6.SLX.maq.SRP000032.2009_07.bam" 3 | p1 <- ScanBamParam(which=IRangesList("6"=IRanges(10000, 11000))) 4 | res <- scanBam(fl, param=p1)[[1]] 5 | res[["seq"]] 6 | 7 | fl = "ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA19240/alignment/NA19240.chrom6.454.ssaha.SRP000032.2009_07.bam" 8 | p1 <- ScanBamParam(which=IRangesList("6"=IRanges(100000, 110000))) 9 | res <- scanBam(fl, param=p1)[[1]] 10 | res[["seq"]] 11 | 12 | header <- scanBamHeader(fl) 13 | txt <- header[[1]][[2]] 14 | table(names(txt)) 15 | txt[names(txt) == "@HD"] # 'header' 16 | txt[names(txt) == "@RG"][1:5] # 'read group' 17 | txt[names(txt) == "@SQ"][1:5] # 'sequence group' 18 | 19 | -------------------------------------------------------------------------------- /inst/scripts/update-samtools.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | 3 | repos0="git://github.com/samtools/samtools.git" 4 | repos1="git://github.com/samtools/tabix.git" 5 | wd=`pwd` 6 | 7 | dest="../inst/extdata" 8 | if test ! -d $dest; then 9 | echo "directory does not exist: '$dest'" 10 | exit 1 11 | fi 12 | 13 | dest0="../src/samtools/" 14 | dest1="../src/samtools/bcftools/" 15 | if test ! -d $dest0; then 16 | echo "directory does not exist: '$dest0'" 17 | exit 1 18 | fi 19 | if test ! -d $dest1; then 20 | echo "directory does not exist: '$dest1'" 21 | exit 1 22 | fi 23 | if test ! -d $dest2; then 24 | echo "directory does not exist: '$dest2'" 25 | exit 1 26 | fi 27 | unpack=`mktemp -d -u` ## unsafe: get but don't create temp dir name 28 | git clone $repos0 $unpack 29 | 30 | for f in `ls $dest0`; do 31 | echo "updating file $f" 32 | cp $unpack/$f $dest0/$f 33 | done 34 | for f in `ls $dest1`; do 35 | echo "updating file $f" 36 | cp $unpack/bcftools/$f $dest1/$f 37 | done 38 | 39 | dest2="../src/tabix/" 40 | unpack1=`mktemp -d -u` ## unsafe: get but don't create temp dir name 41 | git clone $repos1 $unpack1 42 | for f in `ls $dest2`; do 43 | echo "updating file $f" 44 | cp $unpack1/$f $dest2/$f 45 | done 46 | rm -rf $unpack 47 | 48 | 49 | -------------------------------------------------------------------------------- /inst/unitTests/cases/RNEXT.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/unitTests/cases/RNEXT.bam -------------------------------------------------------------------------------- /inst/unitTests/cases/ex1.sam.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/unitTests/cases/ex1.sam.gz -------------------------------------------------------------------------------- /inst/unitTests/cases/ex1_noindex.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/unitTests/cases/ex1_noindex.bam -------------------------------------------------------------------------------- /inst/unitTests/cases/ex1_shuf1000.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/unitTests/cases/ex1_shuf1000.bam -------------------------------------------------------------------------------- /inst/unitTests/cases/ex1_shuf1000.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/unitTests/cases/ex1_shuf1000.bam.bai -------------------------------------------------------------------------------- /inst/unitTests/cases/ex1_unsort.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/unitTests/cases/ex1_unsort.bam -------------------------------------------------------------------------------- /inst/unitTests/cases/ex1_zero_index.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/unitTests/cases/ex1_zero_index.bam.bai -------------------------------------------------------------------------------- /inst/unitTests/cases/no_SAMPLE_header.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/unitTests/cases/no_SAMPLE_header.vcf.gz -------------------------------------------------------------------------------- /inst/unitTests/cases/no_header_line.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/unitTests/cases/no_header_line.vcf.gz -------------------------------------------------------------------------------- /inst/unitTests/cases/pileup-no-stars.txt: -------------------------------------------------------------------------------- 1 | 6 12961 C G 4 4 0 1 g ) 2 | 6 14074 G T 4 4 0 1 T$ % 3 | 6 16172 C A 2 2 10 3 ,$.$a F5, 4 | 6 28466 A C 6 6 8 2 C, 30 5 | 6 33701 A G 21 21 12 35 ,,,,$,,,,.,,,,.,..,..g.,,gGggggGgggg ??@AA3>=3((>A?<-9<-8?9A>87;%6148+=2 6 | 6 45116 C M 11 65 28 14 .a,,a.AAAaAaa^!. ?C:9B8;<>>==4B 7 | 6 48270 T A 0 0 0 2 .^!a ?% 8 | 6 48271 T C 0 0 0 2 .c && 9 | 6 70415 C M 83 190 29 55 A$,$aa.AAaaA.A,A.,,A.A.aaA,A.AaaAa.a,a,A,.Aaa,,AaaaaAAA.^5. +<;@8A:>B>?6=>?6<=87,B>@:A?;$>B?>>,A??=<>;?9,6?;>@@>;=D 10 | 6 71768 C T 1 1 12 34 tt,.T..,,...,,.,,.T.,.,T..,.T..t,, =?0:>1>;3'74979=<2@<=?<>??=>;;;4=+ 11 | 6 76966 G T 7 7 10 1 t ( 12 | 6 76970 G C 0 0 7 2 c, %0 13 | 6 77243 A T 2 2 31 4 T... .%;: 14 | 6 90561 T A 4 4 7 2 A, /9 15 | 6 91279 T G 8 8 4 6 gGg,GG ?+;*5/ 16 | 6 92704 G C 57 87 19 21 ccCCCccCcccccccCCcc,C >:7<>=<6==85@4:=;<8.8 17 | 6 92848 C M 0 0 33 51 ,,,,.A,aA$..$.$.AAA..,.,,,A,.....a...A..aA...,,..,.,,, <7<6:A=B,?<==<89>7;644887<;.>??>??.?>:1<>=020=2<:97 18 | 6 92941 T Y 48 48 30 63 C$CcCc,,Cc,CC.$C....,Cc..,,cG.A,,..c,c.,.c,c,,,,,A,a,.C.c.c..cGc. ,=:,:9:'?;:0,'3'B9;58>7<96'=,3<>$895?:?<:38-8/;)8%6)?,*>3?&3:.= 19 | 6 93004 A M 44 44 37 35 ,$C,,..$T,cc.,,C,C,.C,c,C.c,,c,cc.,cC A>BA7;$C>??:>:@:@=;):?9A:A)4'<+?810 20 | 6 93039 T Y 62 62 43 27 .,$Cc.C,,,,,c,cCC,C,,.Ccccca A=%(+2@A=;;>%.:5=@7=:7" 21 | 6 93040 G R 69 69 42 28 .A$a$.A,,,,,ataAA,C,,.Aaaaaa^0.^~, 9-+%6>>@<-?(?55?%=+>?=;A.%7= 22 | 6 93097 G R 122 122 54 32 a,,$a,aa,.a.aA,,.Aa,A.,Aa.a....^ra^g, ==<==8<<&>5B1<=68A326:A<185534&* 23 | 6 93162 G R 57 66 44 32 a$,,.$CaAAa,,,a,,.a,aaa,.aA.A,,,,^7A +:;1$>5.:.9?9&;,?-3(5/+1.4>$'%7+ 24 | 6 93194 T T 97 0 52 35 ..$,$,,,G..,...,.,..-2GC...,.,.-2GC,,-2gc,....,.^s. &%><<<%%(=3'/@5>:2&4.<;;(@;=A,;:+.= 25 | -------------------------------------------------------------------------------- /inst/unitTests/cases/plp_refskip.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/unitTests/cases/plp_refskip.bam -------------------------------------------------------------------------------- /inst/unitTests/cases/plp_refskip.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/Rsamtools/316eefa7fe95fce366171bd7a7972bfd66a2a27c/inst/unitTests/cases/plp_refskip.bam.bai -------------------------------------------------------------------------------- /inst/unitTests/test_BcfFile.R: -------------------------------------------------------------------------------- 1 | fl <- system.file("extdata", "ex1.bcf.gz", package="Rsamtools") 2 | 3 | test_BcfFile_openclose <- function() 4 | { 5 | .normalizePath <- Rsamtools:::.normalizePath 6 | bf <- BcfFile(fl, character(0)) # no index 7 | checkTrue(!isOpen(bf)) 8 | open(bf) 9 | checkTrue(isOpen(bf)) 10 | checkIdentical(.normalizePath(fl), path(bf)) 11 | checkIdentical(character(0), index(bf, asNA=FALSE)) 12 | checkIdentical(NA_character_, index(bf)) 13 | close(bf) 14 | checkTrue(!isOpen(bf)) 15 | checkException(close(bf), silent=TRUE) 16 | bf <- open(bf) # open a closed BcfFile 17 | checkTrue(isOpen(bf)) 18 | bf1 <- open(bf) # (re)open BcfFile 19 | checkTrue(isOpen(bf1)) 20 | checkTrue(identical(bf$.extptr, bf1$.extptr)) 21 | 22 | checkTrue(isOpen(bf)) 23 | checkIdentical(.normalizePath(fl), path(bf)) 24 | ## checkIdentical(.normalizePath(fl), index(bf)) 25 | } 26 | 27 | test_BcfFile_scanBcfHeader <- function() 28 | { 29 | .chk <- function(h) { 30 | checkTrue(validObject(h)) 31 | checkEquals(3L, length(h)) 32 | checkEquals(2L, length(h[["Reference"]])) 33 | checkEquals("ex1.bam", h[["Sample"]]) 34 | checkEquals(6L, length(h[["Header"]])) 35 | } 36 | bf <- open(BcfFile(fl, character(0))) 37 | .chk(scanBcfHeader(bf)) 38 | bf <- BcfFile(fl, character(0)) 39 | .chk(scanBcfHeader(bf)) 40 | checkTrue(!isOpen(bf)) 41 | 42 | header <- 43 | structure(list(Reference = character(0), Sample = 44 | character(0), Header = 45 | "##FORMAT="), 46 | .Names = c("Reference", "Sample", "Header")) 47 | checkTrue(validObject(Rsamtools:::.bcfHeaderAsSimpleList(header))) 48 | } 49 | 50 | test_BcfFile_scanBcfHeader_no_SAMPLE_header <- function() 51 | { 52 | fl <- system.file(package="Rsamtools", "unitTests", "cases", 53 | "no_SAMPLE_header.vcf.gz") 54 | bf <- open(BcfFile(fl, character(0))) 55 | on.exit(close(bf)) 56 | sample <- scanBcfHeader(bf)[["Sample"]] 57 | exp <- c("NA00001", "NA00002", "NA00003") 58 | checkIdentical(exp, sample) 59 | } 60 | 61 | TEMPORARY_DISABLED_test_BcfFile_scanBcfHeader_remote <- function() 62 | { 63 | if ("windows" == .Platform$OS.type) { 64 | DEACTIVATED("remote scanBcFHeader not supported on Windows") 65 | return(TRUE) 66 | } 67 | 68 | fl <- "ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20110521/ALL.chr22.phase1_release_v3.20101123.snps_indels_svs.genotypes.vcf.gz" 69 | file <- tryCatch({ 70 | open(BcfFile(fl)) 71 | }, warning=function(w) { 72 | message("'test_BcfFile_scanBcfHeader_remote' warning:\n ", 73 | conditionMessage(w)) 74 | TRUE 75 | }) 76 | if (is.logical(file)) 77 | return(file) 78 | 79 | obs <- .Call(Rsamtools:::.scan_bcf_header, Rsamtools:::.extptr(file)) 80 | close(file) 81 | exp <- setNames(c(0L, 1092L, 29L), c("Reference", "Sample", "Header")) 82 | checkIdentical(exp, lengths(obs)) 83 | } 84 | 85 | test_BcfFile_scanBcfHeader_no_header_line <- function() 86 | { 87 | fl <- system.file(package="Rsamtools", "unitTests", "cases", 88 | "no_header_line.vcf.gz") 89 | msg <- NULL 90 | tryCatch(scanBcfHeader(fl), error=function(err) { 91 | msg <<- conditionMessage(err) 92 | }) 93 | checkIdentical("no 'header' line \"#CHROM POS ID...\"?", msg) 94 | } 95 | 96 | test_BcfFile_scan_noindex <- function() 97 | { 98 | bf <- open(BcfFile(fl, character(0))) 99 | checkTrue(isOpen(bf)) 100 | 101 | p <- ScanBcfParam() 102 | res <- scanBcf(bf, param=p) 103 | checkEquals(11L, length(res)) 104 | checkEquals(3065L, res[["RecordsPerRange"]]) 105 | checkEquals(3065L, unique(lengths(res[1:9]))) 106 | checkEquals(1, length(res[["GENO"]])) 107 | checkEquals(3065L, length(res[["GENO"]][["PL"]])) 108 | 109 | checkIdentical(res, scanBcf(bf)) 110 | } 111 | 112 | test_BcfFile_scan_index <- function() 113 | { 114 | bf <- open(BcfFile(fl)) 115 | which <- IRangesList(seq1=IRanges(c(1, 1000), width=10), 116 | seq2=IRanges(c(100, 1000), width=10)) 117 | p <- ScanBcfParam(which=which) 118 | res <- scanBcf(bf, param=p) 119 | checkEquals(11L, length(res)) 120 | checkEquals(c(0, 10, 10, 20), res[["RecordsPerRange"]]) 121 | checkEquals(30L, unique(sapply(res[1:9], length))) 122 | checkEquals(1, length(res[["GENO"]])) 123 | checkEquals(30L, length(res[["GENO"]][["PL"]])) 124 | } 125 | -------------------------------------------------------------------------------- /inst/unitTests/test_FaFile.R: -------------------------------------------------------------------------------- 1 | fl <- system.file("extdata", "ce2dict1.fa", package="Rsamtools") 2 | 3 | test_FaFile_openclose <- function() 4 | { 5 | fa <- FaFile(fl) 6 | checkIdentical(FALSE, isOpen(fa)) 7 | checkIdentical(TRUE, isOpen(open(fa))) 8 | checkIdentical(FALSE, isOpen(close(fa))) 9 | } 10 | 11 | test_FaFile_emptyfile <- function() 12 | { 13 | fl <- tempfile() 14 | file.create(fl) 15 | checkException(open(fa <- FaFile(fl)), silent=TRUE) 16 | } 17 | 18 | test_FaFile_emptyid <- function() 19 | { 20 | fl <- tempfile() 21 | cat(">\nACTA", file=fl) 22 | open(fa <- FaFile(fl)) 23 | close(fa) 24 | } 25 | 26 | test_FaFile_indexFa <- function() 27 | { 28 | fa <- FaFile(fl) 29 | fai <- index(fa) 30 | fa <- indexFa(fa) 31 | checkIdentical(fai, index(fa)) 32 | } 33 | 34 | test_FaFile_scanFaIndex <- function() 35 | { 36 | .checkIdx <- function(idx) { 37 | checkTrue(is(idx, "GRanges")) 38 | checkIdentical(5L, length(idx)) 39 | checkIdentical(116L, sum(width(idx))) 40 | } 41 | fa <- FaFile(fl) 42 | .checkIdx(scanFaIndex(fa)) 43 | .checkIdx(scanFaIndex(fl)) 44 | } 45 | 46 | test_FaFile_count <- function() 47 | { 48 | f <- FaFile(fl) 49 | status <- isOpen(f) 50 | checkIdentical(status, FALSE) 51 | checkIdentical(5L, countFa(FaFile(fl))) 52 | checkIdentical(status, isOpen(f)) 53 | checkIdentical(5L, countFa(open(FaFile(fl)))) 54 | checkIdentical(5L, countFa(fl)) 55 | } 56 | 57 | test_FaFile_scanFa <- function() 58 | { 59 | .checkRes <- function(res) { 60 | checkTrue(is(res, "DNAStringSet")) 61 | checkIdentical(5L, length(idx)) 62 | checkIdentical(116L, sum(width(idx))) 63 | } 64 | fa <- open(FaFile(fl)) 65 | idx <- scanFaIndex(fa)[1:5] 66 | .checkRes(scanFa(fa, idx)) 67 | .checkRes(scanFa(fl, idx)) 68 | 69 | ## scanFa,*,missing-methods 70 | checkTrue(validObject(scanFa(fa))) 71 | checkTrue(validObject(scanFa(fl))) 72 | 73 | ## GRanges 74 | exp0 <- subseq(scanFa(fa)[c(1, 3)], 5, 9) 75 | gr <- GRanges(c("pattern01", "pattern03"), IRanges(5, width=5)) 76 | checkIdentical(as.character(exp0), as.character(scanFa(fa, gr))) 77 | checkIdentical(as.character(DNAStringSet()), 78 | unname(as.character(scanFa(fa, GRanges())))) 79 | 80 | ## scanFa ignores strand 81 | strand(gr) <- c("-", "+") 82 | checkIdentical(as.character(exp0), as.character(scanFa(fa, gr))) 83 | 84 | ## IntegerRangesList 85 | rl <- as(gr, "IntegerRangesList") 86 | checkIdentical(as.character(exp0), as.character(scanFa(fa, rl))) 87 | } 88 | 89 | test_FaFile_getSeq <- function() 90 | { 91 | fa <- open(FaFile(fl)) 92 | exp0 <- subseq(scanFa(fa)[c(1, 3)], 5, 9) 93 | 94 | gr <- GRanges(c("pattern01", "pattern03"), IRanges(5, width=5)) 95 | checkIdentical(as.character(exp0), as.character(getSeq(fa, gr))) 96 | 97 | ## '-' strand is reverse complement 98 | strand(gr) <- c("-", "+") 99 | exp <- exp0 100 | exp[1] <- reverseComplement(exp[1]) 101 | checkIdentical(as.character(exp), as.character(getSeq(fa, gr))) 102 | 103 | ## IntegerRangesList 104 | rl <- as(gr, "IntegerRangesList") 105 | checkIdentical(as.character(exp0), as.character(getSeq(fa, rl))) 106 | } 107 | -------------------------------------------------------------------------------- /inst/unitTests/test_RsamtoolsFile.R: -------------------------------------------------------------------------------- 1 | test_RsamtoolsFile_constructor <- function() { 2 | fl <- system.file("extdata", "example.gtf.gz", package="Rsamtools") 3 | checkException(TabixFile(c(fl, fl)), silent=TRUE) 4 | 5 | checkTrue(validObject(tbx <- TabixFile(fl, character()))) 6 | checkIdentical(character(0), index(tbx, asNA=FALSE)) 7 | 8 | checkTrue(validObject(tbx <- TabixFile(fl, NA))) 9 | checkIdentical(character(0), index(tbx, asNA=FALSE)) 10 | 11 | tbx <- TabixFile(fl) 12 | checkIdentical(tbx, TabixFile(tbx)) 13 | checkTrue(validObject(TabixFile(tbx))) # idempotent 14 | } 15 | 16 | test_RsamtoolsFileList_constructor <- function() { 17 | fl <- system.file("extdata", "example.gtf.gz", package="Rsamtools") 18 | fls <- c(fl, fl) 19 | 20 | checkTrue(validObject(TabixFileList())) # 0-length 21 | 22 | checkTrue(validObject(tbx <- TabixFileList(fls))) 23 | checkIdentical(setNames(fls, basename(fls)), path(tbx)) 24 | checkIdentical(setNames(paste(fls, "tbi", sep="."), basename(fls)), 25 | index(tbx)) 26 | 27 | checkTrue(validObject(tbx <- TabixFileList(fls, character()))) 28 | checkIdentical(setNames(fls, basename(fls)), path(tbx)) 29 | checkIdentical(setNames(rep(NA_character_, 2), basename(fls)), 30 | index(tbx)) 31 | 32 | checkTrue(validObject(tbx <- TabixFileList(fls, NA))) 33 | checkIdentical(setNames(fls, basename(fls)), path(tbx)) 34 | checkIdentical(setNames(rep(NA_character_, 2), basename(fls)), 35 | index(tbx)) 36 | 37 | tbx <- TabixFile(fl) 38 | checkTrue(validObject(TabixFileList(tbx))) 39 | checkTrue(validObject(TabixFileList(tbx, tbx))) 40 | checkTrue(validObject(TabixFileList(list(tbx, tbx)))) 41 | 42 | tbx <- TabixFileList(TabixFile(fl)) 43 | checkIdentical(TabixFileList(tbx), tbx) # idempotent 44 | } 45 | -------------------------------------------------------------------------------- /inst/unitTests/test_TabixFile.R: -------------------------------------------------------------------------------- 1 | fl <- system.file("extdata", "example.gtf.gz", package="Rsamtools") 2 | 3 | test_TabixFile_openclose <- function() 4 | { 5 | .normalizePath <- Rsamtools:::.normalizePath 6 | tab <- TabixFile(fl) 7 | checkTrue(!isOpen(tab)) 8 | open(tab) 9 | checkTrue(isOpen(tab)) 10 | checkTrue(isOpen(TabixFile(tab))) 11 | 12 | checkIdentical(.normalizePath(fl), path(tab)) 13 | checkIdentical(sprintf("%s.tbi", .normalizePath(fl)), index(tab)) 14 | close(tab) 15 | checkTrue(!isOpen(tab)) 16 | checkException(close(tab), silent=TRUE) 17 | tab <- open(tab) # open a closed TabixFile 18 | checkTrue(isOpen(tab)) 19 | tab1 <- open(tab) # (re)open TabixFile 20 | checkTrue(isOpen(tab1)) 21 | checkTrue(identical(tab$.extptr, tab1$.extptr)) 22 | } 23 | 24 | test_TabixFile_scan <- function() 25 | { 26 | tab <- open(TabixFile(fl)) 27 | param <- GRanges("chr1", IRanges(1, 10000)) 28 | 29 | res <- scanTabix(tab, param=param) 30 | checkIdentical("chr1:1-10000", names(res)) 31 | checkIdentical(86L, length(res[[1]])) 32 | 33 | param <- GRanges(c("chr1", "chr2"), 34 | IRanges(c(1,1), width=100000)) 35 | 36 | res <- scanTabix(tab, param=param) 37 | checkIdentical(c("chr1:1-100000", "chr2:1-100000"), names(res)) 38 | checkIdentical(c(157L, 15L), as.vector(sapply(res, length))) 39 | } 40 | 41 | test_TabixFile_yield <- function() 42 | { 43 | tab <- open(TabixFile(fl, yieldSize=100)) 44 | it <- integer() 45 | while(length(res <- scanTabix(tab)[[1]])) 46 | it <- append(it, length(res)) 47 | close(tab) 48 | checkIdentical(c(100L, 100L, 37L), it) 49 | 50 | rng <- GRanges(c("seq1", "seq2"), IRanges(1, c(1575, 1584))) 51 | open(tab) 52 | checkException(scanTabix(tab, param=rng), silent=TRUE) 53 | close(tab) 54 | } 55 | 56 | test_TabixFile_header <- function() 57 | { 58 | hdr <- headerTabix(fl) 59 | exp <- c("seqnames", "indexColumns", "skip", "comment", "header") 60 | checkIdentical(exp, names(hdr)) 61 | checkIdentical(c("chr1", "chr2"), hdr$seqnames) 62 | checkIdentical(c(1L, 4L, 5L), unname(hdr$indexColumns)) 63 | checkIdentical(0L, hdr$skip) 64 | checkIdentical("#", hdr$comment) 65 | checkIdentical(hdr$seqnames, seqnamesTabix(fl)) 66 | checkIdentical(character(), hdr$header) 67 | } 68 | 69 | test_TabixFile_header_respects_first_line <- function() 70 | { 71 | tbx <- open(TabixFile(fl)) 72 | xx <- headerTabix(tbx) 73 | obs <- unlist(scanTabix(tbx)) 74 | close(tbx) 75 | exp <- unlist(scanTabix(tbx)) 76 | checkIdentical(obs, exp) 77 | } 78 | 79 | test_TabixFile_header_remote <- function() 80 | { 81 | if ("windows" == .Platform$OS.type) { 82 | DEACTIVATED("remote tabix not supported on Windows") 83 | return(TRUE) 84 | } 85 | 86 | fl <- sprintf("%s/%s", 87 | "http://1000genomes.s3.amazonaws.com/release", 88 | "20110521/ALL.chr22.phase1_release_v3.20101123.snps_indels_svs.genotypes.vcf.gz") 89 | 90 | if (!is.null(tryCatch({open(con <- url(fl)); close(con)}, 91 | error=function(...) FALSE))) 92 | return(TRUE) 93 | 94 | obs <- headerTabix(fl) 95 | checkIdentical("22", obs$seqnames) 96 | exp <- structure(c(1L, 2L, 0L), .Names = c("seq", "start", "end")) 97 | checkIdentical(exp, obs$indexColumns) 98 | checkIdentical(0L, obs$skip) 99 | checkIdentical("#", obs$comment) 100 | checkIdentical(30L, length(obs$header)) 101 | } 102 | -------------------------------------------------------------------------------- /inst/unitTests/test_asBam.R: -------------------------------------------------------------------------------- 1 | test_asBam <- function() 2 | { 3 | src <- system.file("unitTests", "cases", package="Rsamtools") 4 | fl <- file.path(src, "ex1.sam.gz") 5 | ofl <- tempfile() 6 | on.exit(unlink(ofl)) 7 | bam <- asBam(fl, ofl) 8 | checkIdentical(bam, paste(ofl, "bam", sep=".")) 9 | 10 | which <- GRanges("seq2", IRanges(1000, 2000)) 11 | res <- scanBam(bam, param=ScanBamParam(which=which, what="rname"))[[1]] 12 | checkIdentical(642L, length(res[["rname"]])) 13 | checkIdentical("seq2", as.character(unique(res[["rname"]]))) 14 | 15 | checkException(asBam(fl, ofl), silent=TRUE) 16 | 17 | ## asBam destination construction 18 | tmp_sam <- file.path(tempdir(), "ex1.sam.gz") 19 | file.copy(fl, tmp_sam) 20 | tmp_bam <- asBam(tmp_sam) 21 | tmp_bai <- paste0(tmp_bam, ".bai") 22 | checkTrue(file.exists(tmp_sam)) 23 | checkTrue(file.exists(tmp_bai)) 24 | file.remove(tmp_bam) 25 | file.remove(tmp_bai) 26 | file.remove(tmp_sam) 27 | } 28 | -------------------------------------------------------------------------------- /inst/unitTests/test_bam_count.R: -------------------------------------------------------------------------------- 1 | fl <- system.file("extdata", "ex1.bam", package="Rsamtools") 2 | 3 | as_data.frame <- function(x) { 4 | data.frame(space=factor(names(unlist(x)), 5 | levels=seqlevels(x)), 6 | start=start(unlist(x)), 7 | end=end(unlist(x)), 8 | width=width(unlist(x))) 9 | } 10 | 11 | test_countBam <- function() 12 | { 13 | checkEquals(data.frame(space=NA, start=NA, end=NA, width=NA, 14 | file=basename(fl), records=3307L, 15 | nucleotides=116551L), 16 | countBam(fl)) 17 | 18 | which <- IRangesList(seq1=IRanges(1000, 2000), 19 | seq2=IRanges(c(100, 1000), c(1000, 2000))) 20 | p1 <- ScanBamParam(which=which) 21 | exp <- cbind(as_data.frame(which), 22 | file=basename(fl), 23 | records=c(612L, 1169L, 642L), 24 | nucleotides=c(21549, 41235, 22640)) 25 | checkIdentical(exp, countBam(fl, param=p1)) 26 | 27 | which <- IRangesList(seq2=IRanges(c(100, 1000), c(1000, 2000)), 28 | seq1=IRanges(1000, 2000)) 29 | p2 <- ScanBamParam(which=which) 30 | exp <- merge(as_data.frame(which), exp, sort=FALSE) 31 | rownames(exp) <- NULL 32 | checkIdentical(exp, countBam(fl, param=p2)) 33 | } 34 | 35 | test_countBam_index <- function() 36 | { 37 | which <- IRangesList(seq1=IRanges(1000, 2000), 38 | seq2=IRanges(c(100, 1000), c(1000, 2000))) 39 | p1 <- ScanBamParam(which=which) 40 | exp <- cbind(as_data.frame(which), 41 | file="ex1_noindex.bam", 42 | records=c(612L, 1169L, 642L), 43 | nucleotides=c(21549, 41235, 22640)) 44 | 45 | src <- system.file("unitTests", "cases", package="Rsamtools") 46 | fl <- file.path(src, "ex1_noindex.bam") 47 | idx <- system.file("extdata", "ex1.bam", package="Rsamtools") 48 | checkIdentical(exp, countBam(fl, idx, param=p1)) 49 | 50 | checkException({ 51 | suppressWarnings(countBam(fl, tempfile(), param=p1)) 52 | }, silent=TRUE) 53 | } 54 | 55 | test_idxstatsBam <- function() 56 | { 57 | target = structure( 58 | list( 59 | seqnames = structure( 60 | 1:3, .Label = c("*", "seq1", "seq2"), class = "factor" 61 | ), 62 | seqlength = c(0L, 1575L, 1584L), 63 | mapped = c(0L, 1482, 1789), 64 | unmapped = c(0, 19, 17)), 65 | .Names = c("seqnames", "seqlength", "mapped", "unmapped"), 66 | row.names = c(NA, -3L), class = "data.frame") 67 | checkIdentical(target, idxstatsBam(fl)) 68 | 69 | checkException(idxstatsBam("cases/ex1_noindex.bam")) 70 | } 71 | -------------------------------------------------------------------------------- /inst/unitTests/test_bam_header.R: -------------------------------------------------------------------------------- 1 | fl <- system.file("extdata", "ex1.bam", package="Rsamtools") 2 | 3 | test_readBamHeader <- function() 4 | { 5 | res <- scanBamHeader(fl) 6 | checkIdentical(Rsamtools:::.normalizePath(fl), names(res)) 7 | exp <- structure(c(1575L, 1584L), .Names = c("seq1", "seq2")) 8 | checkIdentical(exp, res[[1]][["targets"]]) 9 | } 10 | -------------------------------------------------------------------------------- /inst/unitTests/test_compression.R: -------------------------------------------------------------------------------- 1 | test_bgzip_openclose <- function() 2 | { 3 | ## trying to determine that file handle has been cleaned up 4 | checkIdentical(TRUE, dir.create(d <- tempfile())) 5 | fin <- file.path(d, "in") 6 | fout <- file.path(d, "out") 7 | writeLines("123", con=fin) 8 | bgzip(fin, fout) 9 | checkIdentical(TRUE, file.remove(fin)) 10 | checkIdentical(TRUE, file.remove(fout)) 11 | checkIdentical(0L, unlink(d, recursive=TRUE)) 12 | } 13 | 14 | -------------------------------------------------------------------------------- /inst/unitTests/test_mapqfilter.R: -------------------------------------------------------------------------------- 1 | test_mapqFilter <- function() { 2 | fl <- system.file("extdata", "ex1.bam", package="Rsamtools") 3 | p1 <- ScanBamParam(what="mapq") 4 | p2 <- ScanBamParam(what="mapq", mapqFilter=74) 5 | mapq1 <- scanBam(fl, param=p1)[[1]][["mapq"]] 6 | mapq2 <- scanBam(fl, param=p2)[[1]][["mapq"]] 7 | checkTrue(length(mapq1[!is.na(mapq1) & mapq1 == bamMapqFilter(p2)]) > 0) 8 | checkIdentical(mapq2, mapq1[!is.na(mapq1) & mapq1 >= bamMapqFilter(p2)]) 9 | 10 | n <- countBam(fl, param=p2) 11 | checkIdentical(n$records, length(mapq2)) 12 | 13 | checkException(ScanBamParam(mapqFilter=-1), silent=TRUE) 14 | checkException(ScanBamParam(mapqFilter=1:2), silent=TRUE) 15 | 16 | checkIdentical(bamMapqFilter(p2), 74L) 17 | bamMapqFilter(p2) <- 75 18 | checkIdentical(bamMapqFilter(p2), 75L) 19 | bamMapqFilter(p2) <- "76.1" 20 | checkIdentical(bamMapqFilter(p2), 76L) 21 | } 22 | -------------------------------------------------------------------------------- /inst/unitTests/test_phred2ASCIIOffset.R: -------------------------------------------------------------------------------- 1 | test_phred2ASCIIOffset <- function() { 2 | schemes <- eval(formals(phred2ASCIIOffset)$scheme) 3 | 4 | exp <- structure(integer(), names=character()) 5 | checkIdentical(exp, phred2ASCIIOffset()) 6 | checkIdentical(exp, phred2ASCIIOffset(character())) 7 | checkIdentical(exp, phred2ASCIIOffset(integer())) 8 | for (scheme in schemes) { 9 | checkIdentical(exp, phred2ASCIIOffset(scheme=scheme)) 10 | checkIdentical(exp, phred2ASCIIOffset(integer(), scheme=scheme)) 11 | } 12 | 13 | offsets <- Rsamtools:::.ascii_offset() 14 | checkIdentical(offsets[1L + 0:40], phred2ASCIIOffset(0:40, "Sanger")) 15 | checkIdentical(offsets[32L + (-5):40], phred2ASCIIOffset((-5):40, "Solexa")) 16 | checkIdentical(offsets[32L + 0:40], 17 | phred2ASCIIOffset(0:40, "Illumina 1.3+")) 18 | checkIdentical(offsets[32L + 3:40], 19 | phred2ASCIIOffset(3:40, "Illumina 1.5+")) 20 | checkIdentical(offsets[1L + 0:41], phred2ASCIIOffset(0:41, "Illumina 1.8+")) 21 | 22 | checkIdentical(offsets, 23 | phred2ASCIIOffset(paste(names(offsets), collapse=""))) 24 | 25 | checkException(phred2ASCIIOffset(50)) 26 | checkException(phred2ASCIIOffset(-1)) 27 | checkException(phred2ASCIIOffset("")) 28 | checkException(phred2ASCIIOffset(c("A", "AA"))) 29 | } 30 | -------------------------------------------------------------------------------- /inst/unitTests/test_readPileup.R: -------------------------------------------------------------------------------- 1 | pileupFile <- system.file("extdata", "pileup.txt", package="Rsamtools") 2 | src <- system.file("unitTests", "cases", package="Rsamtools") 3 | nostarsFile <- file.path(src, "pileup-no-stars.txt") 4 | 5 | 6 | .check_SNP_pileup <- function(tbl, rdf) 7 | { 8 | checkIdentical(nrow(tbl), length(rdf)) 9 | checkIdentical(as.character(tbl[[1]]), as.character(seqnames(rdf))) 10 | checkIdentical(tbl[[2]], start(rdf)) 11 | for (i in 3:8) 12 | checkIdentical(as.character(tbl[[i]]), 13 | as.character(mcols(rdf)[[i-2]])) 14 | } 15 | 16 | test_readPileup <- function() 17 | { 18 | tbl <- read.table(pileupFile, fill=TRUE, quote="", comment="", 19 | col.names=1:16) 20 | 21 | rdf <- readPileup(pileupFile, variant="indel") 22 | idx <- which(tbl[[3]]=="*") 23 | checkIdentical(length(idx), length(rdf)) 24 | 25 | rdf <- readPileup(pileupFile, variant="SNP") 26 | checkIdentical(nrow(tbl) - 2L * length(idx), length(rdf)) 27 | .check_SNP_pileup(tbl[-c(idx, idx-1),], rdf) 28 | } 29 | 30 | test_readPileup_nostars <- function() 31 | { 32 | rdf <- readPileup(nostarsFile) 33 | tbl <- read.table(nostarsFile) 34 | .check_SNP_pileup(tbl, rdf) 35 | 36 | rdf <- readPileup(nostarsFile, variant="indel") 37 | checkIdentical(0L, length(rdf)) 38 | } 39 | -------------------------------------------------------------------------------- /inst/unitTests/test_scanBamFlag.R: -------------------------------------------------------------------------------- 1 | scanBamFlag_test <- function() 2 | { 3 | checkIdentical(formalArgs(scanBamFlag), Rsamtools:::.FLAG_BITNAMES) 4 | 5 | ## HP: Is this really testing something about scanBamFlag? 6 | isUnmappedQuery <- FALSE 7 | flags0 <- scanBamFlag(isUnmappedQuery=FALSE) 8 | flags1 <- scanBamFlag(isUnmappedQuery=isUnmappedQuery) 9 | checkIdentical(flags0, flags1) 10 | } 11 | -------------------------------------------------------------------------------- /inst/unitTests/test_sortBam_test.R: -------------------------------------------------------------------------------- 1 | test_sortBam <- function() { 2 | fl0 <- system.file("extdata", "ex1.bam", package="Rsamtools") 3 | src <- system.file("unitTests", "cases", package="Rsamtools") 4 | fl <- file.path(src, "ex1_unsort.bam") 5 | ofl <- tempfile() 6 | checkTrue(file.create(ofl)) 7 | on.exit(unlink(ofl)) 8 | sorted <- sortBam(fl, ofl) 9 | exp <- scanBam(fl0)[[1]] 10 | obs <- scanBam(sorted)[[1]] 11 | checkIdentical(exp[["rname"]], obs[["rname"]]) 12 | checkIdentical(Filter(Negate(is.na), exp[["pos"]]), 13 | Filter(Negate(is.na), obs[["pos"]])) 14 | } 15 | 16 | test_sortBam_not_BAM_input <- function() { 17 | fl0 <- system.file("extdata", "ex1.sam", package="Rsamtools") 18 | checkException(sortBam(fl0, tempfile()), silent=TRUE) 19 | } 20 | 21 | test_sortBam_byTag <- function() { 22 | src <- system.file("unitTests", "cases", package="Rsamtools") 23 | fl <- file.path(src, "ex1_unsort.bam") 24 | ofl <- tempfile() 25 | checkTrue(file.create(ofl)) 26 | on.exit(unlink(ofl)) 27 | 28 | # sort by integer Tag Aq 29 | sorted <- sortBam(fl, ofl, byTag = "Aq") 30 | obs <- scanBam(sorted, param = ScanBamParam(tag = "Aq"))[[1]] 31 | tVal <- obs$tag$Aq 32 | 33 | # reads without Aq tag are first records in sorted bam 34 | checkIdentical(which(is.na(tVal)), 1L:36L) 35 | validTags <- tVal[!is.na(tVal)] 36 | checkIdentical(validTags, sort(validTags)) 37 | 38 | checkException(sortBam(fl, ofl, byTag = 1), silent=TRUE) 39 | checkException(sortBam(fl, ofl, byTag = c("bogus", "input")), silent=TRUE) 40 | } 41 | 42 | test_sortBam_nThreads <- function() { 43 | src <- system.file("unitTests", "cases", package="Rsamtools") 44 | fl <- file.path(src, "ex1_unsort.bam") 45 | checkException(sortBam(fl, tempfile(), nThreads = 0), silent=TRUE) 46 | checkException(sortBam(fl, tempfile(), nThreads = c(0, 1)), silent=TRUE) 47 | } 48 | -------------------------------------------------------------------------------- /inst/unitTests/test_tagfilter.R: -------------------------------------------------------------------------------- 1 | ## suppressMessages({ 2 | ## library(Rsamtools) 3 | ## library(RUnit) 4 | ## }) 5 | 6 | fl <- system.file(package="Rsamtools", "extdata", "tagfilter.bam") 7 | bf <- BamFile(fl) 8 | 9 | msbp <- function(rnames) { 10 | ScanBamParam(which=GRanges(rnames, IRanges(1,10)), what="rname") 11 | } 12 | numrecs <- function(sbp) countBam(fl, param=sbp)[["records"]] 13 | 14 | test_notags <- function() { 15 | sbp <- msbp("notags") 16 | bamTagFilter(sbp) <- list(TT="bogus") 17 | res <- numrecs(sbp) 18 | checkIdentical(0L, res) 19 | } 20 | ##test_notags() 21 | 22 | test_shared_by_multiple_reads <- function() { 23 | sbp <- ScanBamParam(tagFilter=list(AA=c("a", "d"))) 24 | res <- numrecs(sbp) 25 | checkIdentical(2L, res) 26 | } 27 | ##test_shared_by_multiple_reads() 28 | 29 | test_multitags <- function() { 30 | ## two reads have AA:A:a or AA:A:d, but only one of the two has 31 | ## II:i:45 32 | sbp <- ScanBamParam(tagFilter=list(AA=c("a", "d"), II=45)) 33 | res <- numrecs(sbp) 34 | checkIdentical(1L, res) 35 | } 36 | ##test_multitags() 37 | 38 | test_integer <- function() { 39 | sbp <- msbp("itag") 40 | ## exclude all 41 | bamTagFilter(sbp) <- list(II=1) 42 | res <- numrecs(sbp) 43 | checkIdentical(0L, res) 44 | 45 | ## include 2 discontiguous 46 | bamTagFilter(sbp) <- list(II=c(42, 44)) 47 | res <- numrecs(sbp) 48 | checkIdentical(2L, res) 49 | 50 | ## exception for mismatch 51 | bamTagFilter(sbp) <- list(II="fun") 52 | checkException(numrecs(sbp)) 53 | } 54 | ##test_integer() 55 | 56 | ## per the SAM spec, single printable character is different from a 57 | ## string 58 | test_single_printable <- function() { 59 | sbp <- msbp("Atag") 60 | ## exclude all 61 | bamTagFilter(sbp) <- list(AA="d") 62 | res <- numrecs(sbp) 63 | checkIdentical(0L, res) 64 | 65 | ## include 2 discontiguous 66 | bamTagFilter(sbp) <- list(AA=c("a", "c")) 67 | res <- numrecs(sbp) 68 | checkIdentical(2L, res) 69 | 70 | ## exception for mismatch 71 | bamTagFilter(sbp) <- list(AA="fun") 72 | checkException(numrecs(sbp)) 73 | } 74 | ##test_single_printable() 75 | 76 | test_string <- function() { 77 | sbp <- msbp("Ztag") 78 | ## exclude all 79 | bamTagFilter(sbp) <- list(ZZ="wok") 80 | res <- numrecs(sbp) 81 | checkIdentical(0L, res) 82 | 83 | ## include 2 discontiguous 84 | bamTagFilter(sbp) <- list(ZZ=c("woo", "wow")) 85 | res <- numrecs(sbp) 86 | checkIdentical(2L, res) 87 | 88 | ## exception for mismatch 89 | bamTagFilter(sbp) <- list(ZZ=1) 90 | checkException(numrecs(sbp)) 91 | } 92 | ##test_string() 93 | 94 | ## confirm throwing error when user tries to filter on a tag that has 95 | ## an unsupported type in the BAM file 96 | test_unsupported_tag_types <- function() { 97 | ## floating point type 98 | checkException(countBam(fl, param=ScanBamParam(tagFilter=list(FF=13)))) 99 | ## hex array 100 | checkException(countBam(fl, param=ScanBamParam(tagFilter=list(HH="foo")))) 101 | ## integer or numeric *array* 102 | checkException(countBam(fl, param=ScanBamParam(tagFilter=list(BB="foo")))) 103 | } 104 | ##test_unsupported_tag_types() 105 | 106 | ## Input validation 107 | 108 | test_exception_names <- function() { 109 | ## Too many letters 110 | checkException(ScanBamParam(tagFilter=list(NNN=1))) 111 | ## Too few 112 | checkException(ScanBamParam(tagFilter=list(N=1))) 113 | ## No names 114 | checkException(ScanBamParam(tagFilter=list(1))) 115 | } 116 | ##test_exception_names() 117 | 118 | test_exception_floating_point <- function() { 119 | checkException(ScanBamParam(tagFilter=list(FF=13.001))) 120 | } 121 | ##test_exception_floating_point() 122 | 123 | test_exception_weird_values <- function() { 124 | checkException(ScanBamParam(tagFilter=list(FF=NULL))) 125 | checkException(ScanBamParam(tagFilter=list(FF=NA))) 126 | ## zero-length 127 | checkException(ScanBamParam(tagFilter=list(FF=character()))) 128 | ## empty string 129 | checkException(ScanBamParam(tagFilter=list(FF=""))) 130 | } 131 | ##test_exception_weird_values() 132 | -------------------------------------------------------------------------------- /inst/unitTests/test_testPairedEndBam.R: -------------------------------------------------------------------------------- 1 | fl <- system.file("extdata", "ex1.bam", package="Rsamtools") 2 | 3 | test_testPairedEndBam <- function() { 4 | checkTrue(testPairedEndBam(BamFile(fl))) 5 | checkTrue(testPairedEndBam(fl)) 6 | } 7 | -------------------------------------------------------------------------------- /inst/unitTests/test_utilities.R: -------------------------------------------------------------------------------- 1 | test_io_check_exists <- function() 2 | { 3 | .io_check_exists <- Rsamtools:::.io_check_exists 4 | 5 | fls <- c( 6 | tempfile(), tempfile(), 7 | "ftp://some.where/file", 8 | "http://some.where/file", 9 | "https://some.where/file", 10 | NA 11 | ) 12 | file.create(fls[1]) 13 | checkTrue(is.null(.io_check_exists(fls[1]))) 14 | checkException(.io_check_exists(fls[2]), silent = TRUE) 15 | checkTrue(is.null(.io_check_exists(fls[3:6]))) 16 | } 17 | 18 | test_catch_samtools <- function() 19 | { 20 | fl <- system.file("unitTests", "cases", "ex1_unsort.bam", 21 | package="Rsamtools") 22 | err <- warn <- FALSE 23 | tryCatch(suppressWarnings(withCallingHandlers({ 24 | indexBam(fl) 25 | }, warning=function(msg) { 26 | warn <<- TRUE 27 | })), error=function(msg) { 28 | err <<- TRUE 29 | }) 30 | checkTrue(isFALSE(warn)) 31 | checkTrue(err) 32 | } 33 | 34 | test_catch_samtools_504 <- function() 35 | { 36 | err <- FALSE 37 | tryCatch({ 38 | scanBam("http://httpbin.org/status/504") 39 | }, error=function(err) { 40 | txt <- "failed to open BamFile:" 41 | err <<- startsWith(conditionMessage(err), txt) 42 | }) 43 | checkTrue(err) 44 | } 45 | 46 | test_normalizePath <- function() 47 | { 48 | .normalizePath <- Rsamtools:::.normalizePath 49 | fl <- tempfile() 50 | checkIdentical(fl, .normalizePath(fl)) 51 | checkIdentical(fl, .normalizePath(factor(fl))) 52 | } 53 | -------------------------------------------------------------------------------- /man/BcfFile-class.Rd: -------------------------------------------------------------------------------- 1 | \name{BcfFile} 2 | \Rdversion{1.1} 3 | \docType{class} 4 | \alias{BcfFile-class} 5 | \alias{BcfFileList-class} 6 | % con/destructors 7 | \alias{BcfFile} 8 | \alias{BcfFileList} 9 | \alias{open.BcfFile} 10 | \alias{close.BcfFile } 11 | % accessors 12 | \alias{isOpen,BcfFile-method} 13 | \alias{bcfMode} 14 | % methods 15 | \alias{scanBcfHeader,BcfFile-method} 16 | \alias{scanBcf,BcfFile-method} 17 | \alias{indexBcf,BcfFile-method} 18 | 19 | \title{Manipulate BCF files.} 20 | 21 | \description{ 22 | 23 | Use \code{BcfFile()} to create a reference to a BCF (and optionally 24 | its index). The reference remains open across calls to methods, 25 | avoiding costly index re-loading. 26 | 27 | \code{BcfFileList()} provides a convenient way of managing a list of 28 | \code{BcfFile} instances. 29 | 30 | } 31 | 32 | \usage{ 33 | 34 | ## Constructors 35 | 36 | BcfFile(file, index = file, 37 | mode=ifelse(grepl("\\\\.bcf$", file), "rb", "r")) 38 | BcfFileList(...) 39 | 40 | ## Opening / closing 41 | 42 | \S3method{open}{BcfFile}(con, ...) 43 | \S3method{close}{BcfFile}(con, ...) 44 | 45 | ## accessors; also path(), index() 46 | 47 | \S4method{isOpen}{BcfFile}(con, rw="") 48 | bcfMode(object) 49 | 50 | ## actions 51 | 52 | \S4method{scanBcfHeader}{BcfFile}(file, ...) 53 | \S4method{scanBcf}{BcfFile}(file, ..., param=ScanBcfParam()) 54 | \S4method{indexBcf}{BcfFile}(file, ...) 55 | 56 | } 57 | 58 | \arguments{ 59 | 60 | \item{con, object}{An instance of \code{BcfFile}.} 61 | 62 | \item{file}{A character(1) vector of the BCF file path or, (for 63 | indexBcf) an instance of \code{BcfFile} point to a BCF file.} 64 | 65 | \item{index}{A character(1) vector of the BCF index.} 66 | 67 | \item{mode}{A character(1) vector; \code{mode="rb"} indicates a binary 68 | (BCF) file, \code{mode="r"} a text (VCF) file.} 69 | 70 | \item{param}{An optional \code{\linkS4class{ScanBcfParam}} instance to 71 | further influence scanning.} 72 | 73 | \item{...}{Additional arguments. For \code{BcfFileList}, this can 74 | either be a single character vector of paths to BCF files, or 75 | several instances of \code{BcfFile} objects.} 76 | 77 | \item{rw}{Mode of file; ignored.} 78 | 79 | } 80 | 81 | \section{Objects from the Class}{ 82 | 83 | Objects are created by calls of the form \code{BcfFile()}. 84 | 85 | } 86 | 87 | \section{Fields}{ 88 | 89 | The \code{BcfFile} class inherits fields from the 90 | \code{\linkS4class{RsamtoolsFile}} class. 91 | 92 | } 93 | 94 | \section{Functions and methods}{ 95 | 96 | \code{BcfFileList} inherits methods from 97 | \code{\link{RsamtoolsFileList}} and \code{\link{SimpleList}}. 98 | 99 | Opening / closing: 100 | \describe{ 101 | 102 | \item{open.BcfFile}{Opens the (local or remote) \code{path} and 103 | \code{index} (if \code{bamIndex} is not \code{character(0)}), 104 | files. Returns a \code{BcfFile} instance.} 105 | 106 | \item{close.BcfFile}{Closes the \code{BcfFile} \code{con}; returning 107 | (invisibly) the updated \code{BcfFile}. The instance may be 108 | re-opened with \code{open.BcfFile}.} 109 | 110 | } 111 | 112 | Accessors: 113 | \describe{ 114 | 115 | \item{path}{Returns a character(1) vector of the BCF path name.} 116 | 117 | \item{index}{Returns a character(1) vector of BCF index name.} 118 | 119 | \item{bcfMode}{Returns a character(1) vector BCF mode.} 120 | 121 | } 122 | 123 | Methods: 124 | \describe{ 125 | 126 | \item{scanBcf}{Visit the path in \code{path(file)}, returning the 127 | result of \code{\link{scanBcf}} applied to the specified path.} 128 | 129 | \item{show}{Compactly display the object.} 130 | 131 | } 132 | 133 | } 134 | \author{Martin Morgan} 135 | 136 | \examples{ 137 | 138 | fl <- system.file("extdata", "ex1.bcf.gz", package="Rsamtools", 139 | mustWork=TRUE) 140 | bf <- BcfFile(fl) # implicit index 141 | bf 142 | identical(scanBcf(bf), scanBcf(fl)) 143 | 144 | rng <- GRanges(c("seq1", "seq2"), IRanges(1, c(1575, 1584))) 145 | param <- ScanBcfParam(which=rng) 146 | bcf <- scanBcf(bf, param=param) ## all ranges 147 | 148 | ## ranges one at a time 'bf' 149 | open(bf) 150 | sapply(seq_len(length(rng)), function(i, bcfFile, rng) { 151 | param <- ScanBcfParam(which=rng) 152 | bcf <- scanBcf(bcfFile, param=param)[[1]] 153 | ## do extensive work with bcf 154 | isOpen(bf) ## file remains open 155 | }, bf, rng) 156 | 157 | } 158 | 159 | \keyword{classes} 160 | -------------------------------------------------------------------------------- /man/PileupFiles-class.Rd: -------------------------------------------------------------------------------- 1 | \name{PileupFiles} 2 | \Rdversion{1.1} 3 | \docType{class} 4 | \alias{PileupFiles-class} 5 | % con/destructors 6 | \alias{PileupFiles} 7 | \alias{PileupFiles,character-method} 8 | \alias{PileupFiles,list-method} 9 | % accessors 10 | \alias{isOpen,PileupFiles-method} 11 | \alias{plpFiles} 12 | \alias{plpParam} 13 | % methods 14 | \alias{applyPileups,PileupFiles,ApplyPileupsParam-method} 15 | \alias{applyPileups,PileupFiles,missing-method} 16 | \alias{show,PileupFiles-method} 17 | 18 | \title{Represent BAM files for pileup summaries.} 19 | 20 | \description{ 21 | 22 | Use \code{PileupFiles()} to create a reference to BAM files (and 23 | their indicies), to be used for calculating pile-up summaries. 24 | 25 | } 26 | 27 | \usage{ 28 | 29 | ## Constructors 30 | PileupFiles(files, ..., param=ApplyPileupsParam()) 31 | \S4method{PileupFiles}{character}(files, ..., param=ApplyPileupsParam()) 32 | \S4method{PileupFiles}{list}(files, ..., param=ApplyPileupsParam()) 33 | 34 | ## opening / closing 35 | ## open(con, ...) 36 | ## close(con, ...) 37 | 38 | ## accessors; also path() 39 | \S4method{isOpen}{PileupFiles}(con, rw="") 40 | plpFiles(object) 41 | plpParam(object) 42 | 43 | ## actions 44 | \S4method{applyPileups}{PileupFiles,missing}(files, FUN, ..., param) 45 | \S4method{applyPileups}{PileupFiles,ApplyPileupsParam}(files, FUN, ..., param) 46 | 47 | ## display 48 | \S4method{show}{PileupFiles}(object) 49 | 50 | } 51 | 52 | \arguments{ 53 | 54 | \item{files}{For \code{PileupFiles}, a \code{character()} or 55 | \code{list} of \code{BamFile} instances representing files to be 56 | included in the pileup. Using a \code{list} of \code{BamFile} allows 57 | indicies to be specified when these are in non-standard format. All 58 | elements of \code{\ldots} must be the same type. 59 | 60 | For \code{applyPileups,PileupFiles-method}, a \code{PileupFiles} 61 | instance.} 62 | 63 | \item{\ldots}{Additional arguments, currently ignored.} 64 | 65 | \item{con, object}{An instance of \code{PileupFiles}.} 66 | 67 | \item{FUN}{A function of one argument; see \code{\link{applyPileups}}.} 68 | 69 | \item{param}{An instance of \code{\link{ApplyPileupsParam}}, 70 | to select which records to include in the pileup, and which summary 71 | information to return.} 72 | 73 | \item{rw}{character() indicating mode of file; not used for 74 | \code{TabixFile}.} 75 | 76 | } 77 | 78 | \section{Objects from the Class}{ 79 | 80 | Objects are created by calls of the form \code{PileupFiles()}. 81 | 82 | } 83 | 84 | \section{Fields}{ 85 | 86 | The \code{PileupFiles} class is implemented as an S4 reference 87 | class. It has the following fields: 88 | 89 | \describe{ 90 | 91 | \item{files}{A list of \code{\link{BamFile}} instances.} 92 | 93 | \item{param}{An instance of \code{\link{ApplyPileupsParam}}.} 94 | 95 | } 96 | } 97 | 98 | \section{Functions and methods}{ 99 | 100 | Opening / closing: 101 | \describe{ 102 | 103 | \item{open.PileupFiles}{Opens the (local or remote) \code{path} and 104 | \code{index} of each file in the \code{PileupFiles} 105 | instance. Returns a \code{PileupFiles} instance.} 106 | 107 | \item{close.PileupFiles}{Closes each file in the \code{PileupFiles} 108 | instance; returning (invisibly) the updated 109 | \code{PileupFiles}. The instance may be re-opened with 110 | \code{open.PileupFiles}.} 111 | 112 | } 113 | 114 | Accessors: 115 | \describe{ 116 | 117 | \item{plpFiles}{Returns the \code{list} of the files in the 118 | \code{PileupFiles} instance.} 119 | 120 | \item{plpParam}{Returns the \code{\link{ApplyPileupsParam}} content of the 121 | \code{PileupFiles} instance.} 122 | 123 | } 124 | 125 | Methods: 126 | \describe{ 127 | 128 | \item{applyPileups}{Calculate the pileup across all files in 129 | \code{files} according to criteria in \code{param} (or 130 | \code{plpParam(files)} if \code{param} is missing), invoking 131 | \code{FUN} on each range or collection of positions. See 132 | \code{\link{applyPileups}}.} 133 | 134 | \item{show}{Compactly display the object.} 135 | 136 | } 137 | 138 | } 139 | 140 | \author{Martin Morgan} 141 | 142 | \examples{ 143 | example(applyPileups) 144 | } 145 | 146 | \keyword{classes} 147 | -------------------------------------------------------------------------------- /man/Rsamtools-package.Rd: -------------------------------------------------------------------------------- 1 | \name{Rsamtools-package} 2 | \Rdversion{1.1} 3 | \alias{Rsamtools-package} 4 | \alias{Rsamtools} 5 | \alias{bzfile-class} 6 | \alias{fifo-class} 7 | \alias{gzfile-class} 8 | \alias{pipe-class} 9 | \alias{unz-class} 10 | \alias{url-class} 11 | 12 | \docType{package} 13 | \title{ 14 | 'samtools' aligned sequence utilities interface 15 | } 16 | \description{ 17 | This package provides facilities for parsing samtools BAM (binary) 18 | files representing aligned sequences. 19 | } 20 | \details{ 21 | 22 | See \code{packageDescription('Rsamtools')} for package details. A 23 | useful starting point is the \code{\link{scanBam}} manual page. 24 | 25 | } 26 | \author{ 27 | Author: Martin Morgan 28 | 29 | Maintainer: Biocore Team c/o BioC user list 30 | } 31 | \references{ 32 | 33 | The current source code for samtools and bcftools is from 34 | \url{https://github.com/samtools/samtools}. Additional material is at 35 | \url{http://samtools.sourceforge.net/}. 36 | 37 | } 38 | \note{ 39 | 40 | This package documents the following classes for purely internal 41 | reasons, see help pages in other packages: \code{bzfile}, \code{fifo}, 42 | \code{gzfile}, \code{pipe}, \code{unz}, \code{url}. 43 | 44 | } 45 | \keyword{ package } 46 | \examples{ 47 | packageDescription('Rsamtools') 48 | } 49 | -------------------------------------------------------------------------------- /man/RsamtoolsFile-class.Rd: -------------------------------------------------------------------------------- 1 | \name{RsamtoolsFile} 2 | \Rdversion{1.1} 3 | \docType{class} 4 | \alias{RsamtoolsFile-class} 5 | \alias{RsamtoolsFile} 6 | \alias{path} 7 | \alias{index} 8 | \alias{index<-} 9 | \alias{yieldSize} 10 | \alias{yieldSize<-} 11 | \alias{yieldSize<-,RsamtoolsFile-method} 12 | \alias{path,RsamtoolsFile-method} 13 | \alias{index,RsamtoolsFile-method} 14 | \alias{index<-,RsamtoolsFile-method} 15 | \alias{isOpen,RsamtoolsFile-method} 16 | \alias{show,RsamtoolsFile-method} 17 | \alias{yieldSize,RsamtoolsFile-method} 18 | 19 | \title{A base class for managing file references in Rsamtools} 20 | 21 | \description{ 22 | 23 | \code{RsamtoolsFile} is a base class for managing file references in 24 | \pkg{Rsamtools}; it is not intended for direct use by users -- see, e.g., 25 | \code{\link{BamFile}}. 26 | 27 | } 28 | 29 | \usage{ 30 | 31 | ## accessors 32 | \S4method{path}{RsamtoolsFile}(object, ...) 33 | \S4method{index}{RsamtoolsFile}(object, ..., asNA = TRUE) 34 | \S4method{isOpen}{RsamtoolsFile}(con, rw="") 35 | \S4method{yieldSize}{RsamtoolsFile}(object, ...) 36 | yieldSize(object, ...) <- value 37 | \S4method{show}{RsamtoolsFile}(object) 38 | 39 | } 40 | 41 | \arguments{ 42 | 43 | \item{con, object}{An instance of a class derived from 44 | \code{RsamtoolsFile}.} 45 | 46 | \item{asNA}{logical indicating if missing output should be NA or 47 | character()} 48 | 49 | \item{rw}{Mode of file; ignored.} 50 | 51 | \item{\dots}{Additional arguments, unused.} 52 | 53 | \item{value}{Replacement value.} 54 | } 55 | 56 | \section{Objects from the Class}{ 57 | 58 | Users do not directly create instances of this class; see, e.g., 59 | \code{\link{BamFile}-class}. 60 | 61 | } 62 | 63 | \section{Fields}{ 64 | 65 | The \code{RsamtoolsFile} class is implemented as an S4 reference 66 | class. It has the following fields: 67 | 68 | \describe{ 69 | 70 | \item{.extptr}{An \code{externalptr} initialized to an internal 71 | structure with opened bam file and bam index pointers.} 72 | 73 | \item{path}{A character(1) vector of the file name.} 74 | 75 | \item{index}{A character(1) vector of the index file name.} 76 | 77 | \item{yieldSize}{An integer(1) vector of the number of records to 78 | yield.} 79 | 80 | } 81 | } 82 | 83 | \section{Functions and methods}{ 84 | 85 | Accessors: 86 | \describe{ 87 | 88 | \item{path}{Returns a character(1) vector of path names.} 89 | 90 | \item{index}{Returns a character(1) vector of index path names.} 91 | 92 | \item{yieldSize, yieldSize<-}{Return or set an integer(1) vector 93 | indicating yield size.} 94 | 95 | } 96 | 97 | Methods: 98 | \describe{ 99 | 100 | \item{isOpen}{Report whether the file is currently open.} 101 | 102 | \item{show}{Compactly display the object.} 103 | 104 | } 105 | 106 | } 107 | \author{Martin Morgan} 108 | 109 | \keyword{classes} 110 | -------------------------------------------------------------------------------- /man/RsamtoolsFileList-class.Rd: -------------------------------------------------------------------------------- 1 | \name{RsamtoolsFileList} 2 | \Rdversion{1.1} 3 | \docType{class} 4 | \alias{RsamtoolsFileList-class} 5 | \alias{RsamtoolsFileList} 6 | \alias{path,RsamtoolsFileList-method} 7 | \alias{index,RsamtoolsFileList-method} 8 | \alias{index<-,RsamtoolsFileList-method} 9 | \alias{isOpen,RsamtoolsFileList-method} 10 | \alias{open.RsamtoolsFileList} 11 | \alias{close.RsamtoolsFileList} 12 | \alias{names,RsamtoolsFileList-method} 13 | \alias{yieldSize,RsamtoolsFileList-method} 14 | \alias{yieldSize<-,RsamtoolsFileList-method} 15 | 16 | \title{A base class for managing lists of Rsamtools file references} 17 | 18 | \description{ 19 | 20 | \code{RsamtoolsFileList} is a base class for managing lists of file 21 | references in \pkg{Rsamtools}; it is not intended for direct use -- 22 | see, e.g., \code{\link{BamFileList}}. 23 | 24 | } 25 | 26 | \usage{ 27 | 28 | \S4method{path}{RsamtoolsFileList}(object, ...) 29 | \S4method{index}{RsamtoolsFileList}(object, ..., asNA = TRUE) 30 | \S4method{isOpen}{RsamtoolsFileList}(con, rw="") 31 | \S3method{open}{RsamtoolsFileList}(con, ...) 32 | \S3method{close}{RsamtoolsFileList}(con, ...) 33 | \S4method{names}{RsamtoolsFileList}(x) 34 | \S4method{yieldSize}{RsamtoolsFileList}(object, ...) 35 | 36 | } 37 | 38 | \arguments{ 39 | 40 | \item{con, object, x}{An instance of a class derived from 41 | \code{RsamtoolsFileList}.} 42 | 43 | \item{asNA}{logical indicating if missing output should be NA or 44 | character()} 45 | 46 | \item{rw}{Mode of file; ignored.} 47 | 48 | \item{\dots}{Additional arguments.} 49 | 50 | } 51 | 52 | \section{Objects from the Class}{ 53 | 54 | Users do not directly create instances of this class; see, e.g., 55 | \code{\link{BamFileList}-class}. 56 | 57 | } 58 | 59 | \section{Functions and methods}{ 60 | 61 | This class inherits functions and methods for subseting, updating, and 62 | display from the \code{\link{SimpleList}} class. 63 | 64 | Methods: 65 | \describe{ 66 | 67 | \item{isOpen:}{Report whether each file in the list is currently 68 | open.} 69 | 70 | \item{open:}{Attempt to open each file in the list.} 71 | 72 | \item{close:}{Attempt to close each file in the list.} 73 | 74 | \item{names:}{Names of each element of the list or, if names are 75 | \code{NULL}, the basename of the path of each element.} 76 | 77 | } 78 | 79 | } 80 | \author{Martin Morgan} 81 | 82 | \keyword{classes} 83 | -------------------------------------------------------------------------------- /man/ScanBcfParam-class.Rd: -------------------------------------------------------------------------------- 1 | \name{ScanBcfParam-class} 2 | \Rdversion{1.1} 3 | \docType{class} 4 | 5 | \alias{ScanBVcfParam-class} 6 | 7 | \alias{ScanBcfParam} 8 | \alias{ScanBcfParam-class} 9 | 10 | \alias{ScanBcfParam,GRanges-method} 11 | \alias{ScanBcfParam,GRangesList-method} 12 | \alias{ScanBcfParam,IntegerRangesList-method} 13 | \alias{ScanBcfParam,missing-method} 14 | \alias{bcfFixed} 15 | \alias{bcfInfo} 16 | \alias{bcfGeno} 17 | \alias{bcfSamples} 18 | \alias{bcfTrimEmpty} 19 | \alias{bcfWhich} 20 | 21 | \alias{show,ScanBVcfParam-method} 22 | 23 | \title{Parameters for scanning BCF files} 24 | \description{ 25 | 26 | Use \code{ScanBcfParam()} to create a parameter object influencing the 27 | \sQuote{INFO} and \sQuote{GENO} fields parsed, and which sample records are 28 | imported from a BCF file. Use of \code{which} requires that a BCF 29 | index file (\code{.bci}) exists. 30 | 31 | } 32 | 33 | \usage{ 34 | 35 | ScanBcfParam(fixed=character(), info=character(), geno=character(), 36 | samples=character(), trimEmpty=TRUE, which, ...) 37 | 38 | \S4method{ScanBcfParam}{missing}(fixed=character(), info=character(), geno=character(), 39 | samples=character(), trimEmpty=TRUE, which, ...) 40 | \S4method{ScanBcfParam}{IntegerRangesList}(fixed=character(), info=character(), geno=character(), 41 | samples=character(), trimEmpty=TRUE, which, ...) 42 | \S4method{ScanBcfParam}{GRanges}(fixed=character(), info=character(), geno=character(), 43 | samples=character(), trimEmpty=TRUE, which, ...) 44 | \S4method{ScanBcfParam}{GRangesList}(fixed=character(), info=character(), geno=character(), 45 | samples=character(), trimEmpty=TRUE, which, ...) 46 | 47 | ## Accessors 48 | bcfFixed(object) 49 | bcfInfo(object) 50 | bcfGeno(object) 51 | bcfSamples(object) 52 | bcfTrimEmpty(object) 53 | bcfWhich(object) 54 | 55 | } 56 | 57 | \arguments{ 58 | 59 | \item{fixed}{A logical(1) for use with \code{ScanVcfParam} only.} 60 | 61 | \item{info}{A character() vector of \sQuote{INFO} fields (see 62 | \link[VariantAnnotation]{scanVcfHeader}) to be returned.} 63 | 64 | \item{geno}{A character() vector of \sQuote{GENO} fields (see 65 | \link[VariantAnnotation]{scanVcfHeader}) to be returned. \code{character(0)} 66 | returns all fields, \code{NA_character_} returns none.} 67 | 68 | \item{samples}{A character() vector of sample names (see 69 | \link[VariantAnnotation]{scanVcfHeader}) to be returned. \code{character(0)} 70 | returns all fields, \code{NA_character_} returns none.} 71 | 72 | \item{trimEmpty}{A logical(1) indicating whether \sQuote{GENO} fields 73 | with no values should be returned.} 74 | 75 | \item{which}{An object, for which a method is defined (see usage, 76 | above), describing the sequences and ranges to be queried. Variants 77 | whose \code{POS} lies in the interval(s) \code{[start, end)} are 78 | returned.} 79 | 80 | \item{object}{An instance of class \code{ScanBcfParam}.} 81 | 82 | \item{\dots}{Arguments used internally.} 83 | 84 | } 85 | 86 | \section{Objects from the Class}{ 87 | 88 | Objects can be created by calls of the form \code{ScanBcfParam()}. 89 | 90 | } 91 | \section{Slots}{ 92 | \describe{ 93 | \item{\code{which}:}{Object of class \code{"IntegerRangesList"} indicating 94 | which reference sequence and coordinate variants must overlap.} 95 | \item{\code{info}:}{Object of class \code{"character"} indicating 96 | portions of \sQuote{INFO} to be returned.} 97 | \item{\code{geno}:}{Object of class \code{"character"} indicating 98 | portions of \sQuote{GENO} to be returned. } 99 | \item{\code{samples}:}{Object of class \code{"character"} indicating 100 | the samples to be returned. } 101 | \item{\code{trimEmpty}:}{Object of class \code{"logical"} indicating 102 | whether empty \sQuote{GENO} fields are to be returned.} 103 | \item{\code{fixed}:}{Object of class \code{"character"}. For use 104 | with \code{ScanVcfParam} only.} 105 | } 106 | } 107 | \section{Functions and methods}{ 108 | 109 | See 'Usage' for details on invocation. 110 | 111 | Constructor: 112 | \describe{ 113 | 114 | \item{ScanBcfParam:}{Returns a \code{ScanBcfParam} object. The 115 | \code{which} argument to the constructor can be one of several types, 116 | as documented above.} } 117 | 118 | Accessors: 119 | \describe{ 120 | 121 | \item{bcfInfo, bcfGeno, bcfTrimEmpty, bcfWhich:}{Return the 122 | corresponding field from \code{object}.} 123 | 124 | } 125 | 126 | Methods: 127 | \describe{ 128 | 129 | \item{show}{Compactly display the object.} 130 | 131 | } 132 | } 133 | \author{ 134 | Martin Morgan \url{mtmorgan@fhcrc.org} 135 | } 136 | \seealso{ 137 | 138 | \link[VariantAnnotation]{scanVcf} 139 | \link[VariantAnnotation]{ScanVcfParam} 140 | 141 | } 142 | 143 | \examples{ 144 | ## see ?ScanVcfParam examples 145 | } 146 | 147 | \keyword{classes} 148 | -------------------------------------------------------------------------------- /man/applyPileups.Rd: -------------------------------------------------------------------------------- 1 | \name{applyPileups} 2 | \Rdversion{1.1} 3 | \alias{applyPileups} 4 | % \alias{yieldBam} 5 | 6 | \title{ 7 | 8 | Apply a user-provided function to calculate pile-up statistics across 9 | multiple BAM files. 10 | 11 | } 12 | \description{ 13 | WARNING: Starting with Bioconductor 3.14, \code{applyPileups} is deprecated 14 | in favor of \code{\link{pileup}}. 15 | 16 | \code{applyPileups} scans one or more BAM files, returning 17 | position-specific sequence and quality summaries. 18 | 19 | } 20 | \usage{ 21 | applyPileups(files, FUN, ..., param) 22 | } 23 | 24 | \arguments{ 25 | 26 | \item{files}{A \code{\link{PileupFiles}} instances.} 27 | 28 | \item{FUN}{A function of 1 argument, \code{x}, to be evaluated for 29 | each yield (see \code{yieldSize}, \code{yieldBy}, 30 | \code{yieldAll}). The argument \code{x} is a \code{list}, with 31 | elements describing the current pile-up. The elements of the list 32 | are determined by the argument \code{what}, and include: 33 | 34 | \describe{ 35 | 36 | \item{seqnames:}{(Always returned) A named \code{integer()} 37 | representing the seqnames corresponding to each position 38 | reported in the pile-up. This is a run-length encoding, where 39 | the names of the elements represent the seqnames, and the values 40 | the number of successive positions corresponding to that 41 | seqname.} 42 | 43 | \item{pos:}{Always returned) A \code{integer()} representing the 44 | genomic coordinate of each pile-up position.} 45 | 46 | \item{seq:}{An \code{array} of dimensions nucleotide x file x 47 | position. 48 | 49 | The \sQuote{nucleotide} dimension is length 5, corresponding to 50 | \sQuote{A}, \sQuote{C}, \sQuote{G}, \sQuote{T}, and \sQuote{N} 51 | respectively. 52 | 53 | Entries in the array represent the number of times the 54 | nucleotide occurred in reads in the file overlapping the 55 | position. 56 | 57 | } 58 | 59 | \item{qual:}{Like \code{seq}, but summarizing quality; the first 60 | dimension is the Phred-encoded quality score, ranging from 61 | \sQuote{!} (0) to \sQuote{~} (93).} 62 | 63 | } 64 | } 65 | 66 | \item{\dots}{Additional arguments, passed to methods.} 67 | 68 | \item{param}{An instance of the object returned by 69 | \code{ApplyPileupsParam}.} 70 | 71 | } 72 | 73 | \details{ 74 | 75 | Regardless of \code{param} values, the algorithm follows samtools by 76 | excluding reads flagged as unmapped, secondary, duplicate, or failing 77 | quality control. 78 | 79 | } 80 | 81 | 82 | \value{ 83 | 84 | \code{applyPileups} returns a \code{list} equal in length to the 85 | number of times \code{FUN} has been called, with each element 86 | containing the result of \code{FUN}. 87 | 88 | \code{ApplyPileupsParam} returns an object describing the parameters. 89 | 90 | } 91 | 92 | \references{\url{http://samtools.sourceforge.net/}} 93 | 94 | \author{Martin Morgan} 95 | 96 | \seealso{\code{\link{ApplyPileupsParam}}.} 97 | 98 | \examples{ 99 | ## The examples below are currently broken and have been disabled for now 100 | \dontrun{ 101 | fl <- system.file("extdata", "ex1.bam", package="Rsamtools", 102 | mustWork=TRUE) 103 | 104 | fls <- PileupFiles(c(fl, fl)) 105 | 106 | calcInfo <- 107 | function(x) 108 | { 109 | ## information at each pile-up position 110 | info <- apply(x[["seq"]], 2, function(y) { 111 | y <- y[c("A", "C", "G", "T"),,drop=FALSE] 112 | y <- y + 1L # continuity 113 | cvg <- colSums(y) 114 | p <- y / cvg[col(y)] 115 | h <- -colSums(p * log(p)) 116 | ifelse(cvg == 4L, NA, h) 117 | }) 118 | list(seqnames=x[["seqnames"]], pos=x[["pos"]], info=info) 119 | } 120 | which <- GRanges(c("seq1", "seq2"), IRanges(c(1000, 1000), 2000)) 121 | param <- ApplyPileupsParam(which=which, what="seq") 122 | res <- applyPileups(fls, calcInfo, param=param) 123 | str(res) 124 | head(res[[1]][["pos"]]) # positions matching param 125 | head(res[[1]][["info"]]) # inforamtion in each file 126 | 127 | ## 'param' as part of 'files' 128 | fls1 <- PileupFiles(c(fl, fl), param=param) 129 | res1 <- applyPileups(fls1, calcInfo) 130 | identical(res, res1) 131 | 132 | ## yield by position, across ranges 133 | param <- ApplyPileupsParam(which=which, yieldSize=500L, 134 | yieldBy="position", what="seq") 135 | res <- applyPileups(fls, calcInfo, param=param) 136 | sapply(res, "[[", "seqnames") 137 | } 138 | } 139 | 140 | \keyword{ manip } 141 | -------------------------------------------------------------------------------- /man/defunct.Rd: -------------------------------------------------------------------------------- 1 | \name{Deprecated and Defunct} 2 | \title{Rsamtools Deprecated and Defunct} 3 | 4 | \alias{quickCountBam} 5 | \alias{quickBamCounts} 6 | \alias{bgzipTabix} 7 | 8 | \description{ 9 | 10 | The function, class, or data object you have asked for has been 11 | deprecated or made defunct. 12 | 13 | } 14 | 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/deprecated.Rd: -------------------------------------------------------------------------------- 1 | \name{deprecated} 2 | \Rdversion{1.1} 3 | \alias{BamSampler} 4 | \alias{BamSampler-class} 5 | \alias{scanBam,BamSampler-method} 6 | \alias{show,BamSampler-method} 7 | \alias{yieldTabix} 8 | \alias{yieldTabix,TabixFile-method} 9 | \title{Deprecated functions} 10 | \description{Functions listed on this page are no longer supported.} 11 | \details{ 12 | 13 | For \code{yieldTabix}, use the \code{yieldSize} argument of 14 | \code{TabixFiles}. 15 | 16 | For \code{BamSampler}, use \code{REDUCEsampler} with 17 | \code{reduceByYield} in the \code{GenomicFiles} package. 18 | } 19 | 20 | \author{ 21 | 22 | Martin Morgan . 23 | 24 | } 25 | 26 | \keyword{ manip } 27 | -------------------------------------------------------------------------------- /man/headerTabix.Rd: -------------------------------------------------------------------------------- 1 | \name{headerTabix} 2 | \Rdversion{1.1} 3 | 4 | \alias{headerTabix} 5 | \alias{headerTabix,character-method} 6 | 7 | \title{ 8 | 9 | Retrieve sequence names defined in a tabix file. 10 | 11 | } 12 | \description{ 13 | 14 | This function queries a tabix file, returning the names of the 15 | \sQuote{sequences} used as a key when creating the file. 16 | 17 | } 18 | \usage{ 19 | 20 | headerTabix(file, ...) 21 | \S4method{headerTabix}{character}(file, ...) 22 | 23 | } 24 | 25 | \arguments{ 26 | 27 | \item{file}{A \code{character(1)} file path or 28 | \code{\linkS4class{TabixFile}} instance pointing to a \sQuote{tabix} 29 | file.} 30 | 31 | \item{...}{Additional arguments, currently ignored.} 32 | 33 | } 34 | 35 | \value{ 36 | 37 | A \code{list(4)} of the sequence names, column indicies used to sort 38 | the file, the number of lines skipped while indexing, and the comment 39 | character used while indexing. 40 | 41 | } 42 | 43 | \author{ 44 | 45 | Martin Morgan . 46 | 47 | } 48 | 49 | \examples{ 50 | fl <- system.file("extdata", "example.gtf.gz", package="Rsamtools", 51 | mustWork=TRUE) 52 | headerTabix(fl) 53 | } 54 | 55 | \keyword{ manip } 56 | -------------------------------------------------------------------------------- /man/indexTabix.Rd: -------------------------------------------------------------------------------- 1 | \name{indexTabix} 2 | \Rdversion{1.1} 3 | 4 | \alias{indexTabix} 5 | 6 | \title{ 7 | 8 | Compress and index tabix-compatible files. 9 | 10 | } 11 | \description{ 12 | 13 | Index (with \code{indexTabix}) files that have been sorted into 14 | ascending sequence, start and end position ordering. 15 | 16 | } 17 | \usage{ 18 | 19 | indexTabix(file, 20 | format=c("gff", "bed", "sam", "vcf", "vcf4", "psltbl"), 21 | seq=integer(), start=integer(), end=integer(), 22 | skip=0L, comment="#", zeroBased=FALSE, ...) 23 | 24 | } 25 | 26 | \arguments{ 27 | 28 | \item{file}{A characater(1) path to a sorted, bgzip-compressed file.} 29 | 30 | \item{format}{The format of the data in the compressed file. A 31 | characater(1) matching one of the types named in the function 32 | signature.} 33 | 34 | \item{seq}{If \code{format} is missing, then \code{seq} indicates the 35 | column in which the \sQuote{sequence} identifier (e.g., chrq) is to 36 | be found.} 37 | 38 | \item{start}{If \code{format} is missing, \code{start} indicates the 39 | column containing the start coordinate of the feature to be 40 | indexed.} 41 | 42 | \item{end}{If \code{format} is missing, \code{end} indicates the 43 | column containing the ending coordinate of the feature to be 44 | indexed.} 45 | 46 | \item{skip}{The number of lines to be skipped at the beginning of the 47 | file.} 48 | 49 | \item{comment}{A single character which, when present as the first 50 | character in a line, indicates that the line is to be omitted. from 51 | indexing.} 52 | 53 | \item{zeroBased}{A logical(1) indicating whether coordinats in the 54 | file are zero-based.} 55 | 56 | \item{...}{Additional arguments.} 57 | 58 | } 59 | 60 | \value{ 61 | 62 | The return value of \code{indexTabix} is an updated instance of 63 | \code{file} reflecting the newly-created index file. 64 | 65 | } 66 | 67 | \references{ 68 | 69 | \url{http://samtools.sourceforge.net/tabix.shtml} 70 | 71 | } 72 | 73 | \author{ 74 | 75 | Martin Morgan . 76 | 77 | } 78 | 79 | \examples{ 80 | from <- system.file("extdata", "ex1.sam", package="Rsamtools", 81 | mustWork=TRUE) 82 | to <- tempfile() 83 | zipped <- bgzip(from, to) 84 | idx <- indexTabix(zipped, "sam") 85 | 86 | tab <- TabixFile(zipped, idx) 87 | } 88 | 89 | \keyword{ manip } 90 | -------------------------------------------------------------------------------- /man/quickBamFlagSummary.Rd: -------------------------------------------------------------------------------- 1 | \name{quickBamFlagSummary} 2 | 3 | \alias{quickBamFlagSummary} 4 | \alias{quickBamFlagSummary,character-method} 5 | \alias{quickBamFlagSummary,list-method} 6 | 7 | \title{Group the records of a BAM file based on their flag bits and count 8 | the number of records in each group} 9 | 10 | \description{ 11 | \code{quickBamFlagSummary} groups the records of a BAM file based on their flag 12 | bits and counts the number of records in each group. 13 | } 14 | 15 | \usage{ 16 | quickBamFlagSummary(file, ..., param=ScanBamParam(), main.groups.only=FALSE) 17 | 18 | \S4method{quickBamFlagSummary}{character}(file, index=file, ..., param=ScanBamParam(), 19 | main.groups.only=FALSE) 20 | 21 | \S4method{quickBamFlagSummary}{list}(file, ..., param=ScanBamParam(), main.groups.only=FALSE) 22 | } 23 | 24 | \arguments{ 25 | 26 | \item{file, index}{For the character method, the path to the BAM file 27 | to read, and to the index file of the BAM file to read, 28 | respectively. 29 | 30 | For the list() method, \code{file} is a named list with elements 31 | \dQuote{qname} and \dQuote{flag} with content as from 32 | \code{\link{scanBam}}.} 33 | 34 | \item{...}{Additional arguments, perhaps used by methods.} 35 | 36 | \item{param}{An instance of \code{\linkS4class{ScanBamParam}}. 37 | This determines which records are considered in the counting.} 38 | 39 | \item{main.groups.only}{If \code{TRUE}, then the counting is performed 40 | for the main groups only. 41 | } 42 | } 43 | 44 | \value{ 45 | Nothing is returned. A summary of the counts is printed to the console 46 | unless redirected by \code{\link{sink}}. 47 | } 48 | 49 | \author{Hervé Pagès} 50 | 51 | \references{ 52 | \url{http://samtools.sourceforge.net/} 53 | } 54 | 55 | \seealso{ 56 | \code{\link{scanBam}}, 57 | \code{\link{ScanBamParam}}. 58 | 59 | \code{\link{BamFile}} for a method for that class. 60 | } 61 | 62 | \examples{ 63 | bamfile <- system.file("extdata", "ex1.bam", package="Rsamtools", 64 | mustWork=TRUE) 65 | quickBamFlagSummary(bamfile) 66 | } 67 | 68 | \keyword{manip} 69 | -------------------------------------------------------------------------------- /man/readPileup.Rd: -------------------------------------------------------------------------------- 1 | \name{readPileup} 2 | \Rdversion{1.1} 3 | \alias{readPileup} 4 | \alias{readPileup,connection-method} 5 | \alias{readPileup,character-method} 6 | 7 | \title{ 8 | Import samtools 'pileup' files. 9 | } 10 | \description{ 11 | Import files created by evaluation of samtools' \code{pileup -cv} 12 | command. 13 | } 14 | \usage{ 15 | readPileup(file, ...) 16 | \S4method{readPileup}{connection}(file, ..., variant=c("SNP", "indel", "all")) 17 | } 18 | 19 | \arguments{ 20 | 21 | \item{file}{The file name, or 22 | \code{\link[base:connections]{connection}}, of the pileup output file 23 | to be parsed.} 24 | 25 | \item{\dots}{Additional arguments, passed to methods. For instance, 26 | specify \code{variant} for the readPileup,character-method.} 27 | 28 | \item{variant}{Type of variant to parse; select one.} 29 | 30 | } 31 | 32 | \value{ 33 | 34 | \code{readPileup} returns a \code{\link[GenomicRanges]{GRanges}} object. 35 | 36 | The value returned by \code{variant="SNP"} or \code{variant="all"} 37 | contains: 38 | \describe{ 39 | 40 | \item{space:}{The chromosome names (fastq ids) of the reference 41 | sequence} 42 | 43 | \item{position:}{The nucleotide position (base 1) of the variant.} 44 | 45 | \item{referenceBase:}{The nucleotide in the reference sequence.} 46 | 47 | \item{consensusBase;}{The consensus nucleotide, as determined by 48 | samtools pileup.} 49 | 50 | \item{consensusQuality:}{The phred-scaled consensus quality.} 51 | 52 | \item{snpQuality:}{The phred-scaled SNP quality (probability of the 53 | consensus being identical to the reference).} 54 | 55 | \item{maxMappingQuality:}{The root mean square mapping quality of reads 56 | overlapping the site.} 57 | 58 | \item{coverage:}{The number of reads covering the site.} 59 | 60 | } 61 | 62 | The value returned by \code{variant="indel"} contains space, position, 63 | reference, consensus, consensusQuality, snpQuality, maxMappingQuality, 64 | and coverage fields, and: 65 | 66 | \describe{ 67 | 68 | \item{alleleOne, alleleTwo}{The first (typically, in the reference 69 | sequence) and second allelic variants.} 70 | 71 | \item{alleleOneSupport, alleleTwoSupport}{The number of reads 72 | supporting each allele.} 73 | 74 | \item{additionalIndels}{The number of additional indels present.} 75 | 76 | } 77 | 78 | } 79 | \references{ 80 | \url{http://samtools.sourceforge.net/} 81 | } 82 | \author{ 83 | Sean Davis 84 | } 85 | 86 | \examples{ 87 | 88 | fl <- system.file("extdata", "pileup.txt", package="Rsamtools", 89 | mustWork=TRUE) 90 | (res <- readPileup(fl)) 91 | xtabs(~referenceBase + consensusBase, mcols(res))[DNA_BASES,] 92 | 93 | \dontrun{## uses a pipe, and arguments passed to read.table 94 | ## three successive piles of 100 records each 95 | cmd <- "samtools pileup -cvf human_b36_female.fa.gz na19240_3M.bam" 96 | p <- pipe(cmd, "r") 97 | snp <- readPileup(p, nrow=100) # variant="SNP" 98 | indel <- readPileup(p, nrow=100, variant="indel") 99 | all <- readPileup(p, nrow=100, variant="all") 100 | } 101 | 102 | } 103 | 104 | \keyword{ manip } 105 | -------------------------------------------------------------------------------- /man/scanBcf.Rd: -------------------------------------------------------------------------------- 1 | \name{BcfInput} 2 | \Rdversion{1.1} 3 | 4 | \alias{scanBcfHeader} 5 | \alias{scanBcfHeader,character-method} 6 | \alias{scanBcf} 7 | \alias{scanBcf,character-method} 8 | \alias{asBcf} 9 | \alias{asBcf,character-method} 10 | \alias{indexBcf} 11 | \alias{indexBcf,character-method} 12 | 13 | 14 | \title{ 15 | 16 | Operations on `BCF' files. 17 | 18 | } 19 | \description{ 20 | Import, coerce, or index variant call files in text or binary format. 21 | } 22 | \usage{ 23 | 24 | scanBcfHeader(file, ...) 25 | \S4method{scanBcfHeader}{character}(file, ...) 26 | 27 | scanBcf(file, ...) 28 | \S4method{scanBcf}{character}(file, index = file, ..., param=ScanBcfParam()) 29 | 30 | asBcf(file, dictionary, destination, ..., 31 | overwrite=FALSE, indexDestination=TRUE) 32 | \S4method{asBcf}{character}(file, dictionary, destination, ..., 33 | overwrite=FALSE, indexDestination=TRUE) 34 | 35 | indexBcf(file, ...) 36 | \S4method{indexBcf}{character}(file, ...) 37 | 38 | } 39 | 40 | \arguments{ 41 | 42 | \item{file}{For \code{scanBcf} and \code{scanBcfHeader}, the 43 | character() file name of the \sQuote{BCF} file to 44 | be processed, or an instance of class \code{\link{BcfFile}}.} 45 | 46 | \item{index}{The character() file name(s) of the `BCF' index to be 47 | processed.} 48 | 49 | \item{dictionary}{a character vector of the unique \dQuote{CHROM} 50 | names in the VCF file.} 51 | 52 | \item{destination}{The character(1) file name of the location where 53 | the BCF output file will be created. For \code{asBcf} this is 54 | without the \dQuote{.bcf} file suffix.} 55 | 56 | \item{param}{A instance of \code{\linkS4class{ScanBcfParam}} 57 | influencing which records are parsed and the \sQuote{INFO} and 58 | \sQuote{GENO} information returned.} 59 | 60 | \item{...}{Additional arguments, e.g., for 61 | \code{scanBcfHeader,character-method}, \code{mode} of 62 | \code{\link{BcfFile}}.} 63 | 64 | \item{overwrite}{A logical(1) indicating whether the destination 65 | can be over-written if it already exists.} 66 | 67 | \item{indexDestination}{A logical(1) indicating whether the created 68 | destination file should also be indexed.} 69 | 70 | } 71 | 72 | \details{ 73 | 74 | \code{bcf*} functions are restricted to the GENO fields supported by 75 | \sQuote{bcftools} (see documentation at the url below). The argument 76 | \code{param} allows portions of the file to be input, but requires 77 | that the file be BCF or bgzip'd and indexed as a 78 | \code{\linkS4class{TabixFile}}. For similar functions operating on VCF 79 | files see ?\code{scanVcf} in the \code{VariantAnnotation} package. 80 | 81 | } 82 | 83 | \value{ 84 | 85 | \code{scanBcfHeader} returns a list, with one element for each file 86 | named in \code{file}. Each element of the list is itself a list containing 87 | three elements. The \code{Reference} element is a character() vector with 88 | names of reference sequences. The \code{Sample} element is a character() 89 | vector of names of samples. The \code{Header} element is a DataFrameList 90 | with one DataFrame per unique key value in the header 91 | (preceded by \dQuote{##}). 92 | 93 | NOTE: In Rsamtools >=1.33.6, the \code{Header} DataFrameList no longer 94 | contains a DataFrame named "META". The META DataFrame contained all "simple" 95 | key-value headers lines from a bcf / vcf file. These "simple" header 96 | lines are now parsed into individual DataFrames named for the unique 97 | key. 98 | 99 | \code{scanBcf} returns a list, with one element per file. Each list has 9 100 | elements, corresponding to the columns of the VCF specification: \code{CHROM}, 101 | \code{POS}, \code{ID}, \code{REF}, \code{ALT}\code{QUAL}, \code{FILTER}, 102 | \code{INFO}, \code{FORMAT}, \code{GENO}. 103 | 104 | The \code{GENO} element is itself a list, with elements corresponding 105 | to fields supported by \sQuote{bcftools} (see documentation at the url below). 106 | 107 | \code{asBcf} creates a binary BCF file from a text VCF file. 108 | 109 | \code{indexBcf} creates an index into the BCF file. 110 | 111 | } 112 | 113 | \references{ 114 | 115 | \url{http://vcftools.sourceforge.net/specs.html} outlines the VCF 116 | specification. 117 | 118 | \url{http://samtools.sourceforge.net/mpileup.shtml} contains 119 | information on the portion of the specification implemented by 120 | \code{bcftools}. 121 | 122 | \url{http://samtools.sourceforge.net/} provides information on 123 | \code{samtools}. 124 | 125 | } 126 | 127 | \seealso{ 128 | 129 | \code{\link{BcfFile}}, \code{\link{TabixFile}} 130 | 131 | } 132 | 133 | \author{ 134 | 135 | Martin Morgan . 136 | 137 | } 138 | 139 | \examples{ 140 | fl <- system.file("extdata", "ex1.bcf.gz", package="Rsamtools", 141 | mustWork=TRUE) 142 | scanBcfHeader(fl) 143 | bcf <- scanBcf(fl) 144 | ## value: list-of-lists 145 | str(bcf[1:8]) 146 | names(bcf[["GENO"]]) 147 | str(head(bcf[["GENO"]][["PL"]])) 148 | example(BcfFile) 149 | } 150 | 151 | \keyword{ manip } 152 | -------------------------------------------------------------------------------- /man/scanFa.Rd: -------------------------------------------------------------------------------- 1 | \name{FaInput} 2 | \Rdversion{1.1} 3 | 4 | \alias{indexFa} 5 | \alias{indexFa,character-method} 6 | \alias{scanFaIndex} 7 | \alias{scanFaIndex,character-method} 8 | \alias{countFa} 9 | \alias{countFa,character-method} 10 | \alias{scanFa} 11 | \alias{scanFa,character,GRanges-method} 12 | \alias{scanFa,character,IntegerRangesList-method} 13 | \alias{scanFa,character,missing-method} 14 | 15 | \title{ 16 | 17 | Operations on indexed 'fasta' files. 18 | 19 | } 20 | \description{ 21 | 22 | Scan indexed fasta (or compressed fasta) files and their indicies. 23 | 24 | } 25 | \usage{ 26 | 27 | indexFa(file, ...) 28 | \S4method{indexFa}{character}(file, ...) 29 | 30 | scanFaIndex(file, ...) 31 | \S4method{scanFaIndex}{character}(file, ...) 32 | 33 | countFa(file, ...) 34 | \S4method{countFa}{character}(file, ...) 35 | 36 | scanFa(file, param, ..., 37 | as=c("DNAStringSet", "RNAStringSet", "AAStringSet")) 38 | \S4method{scanFa}{character,GRanges}(file, param, ..., 39 | as=c("DNAStringSet", "RNAStringSet", "AAStringSet")) 40 | \S4method{scanFa}{character,IntegerRangesList}(file, param, ..., 41 | as=c("DNAStringSet", "RNAStringSet", "AAStringSet")) 42 | \S4method{scanFa}{character,missing}(file, param, ..., 43 | as=c("DNAStringSet", "RNAStringSet", "AAStringSet")) 44 | 45 | } 46 | 47 | \arguments{ 48 | 49 | \item{file}{A character(1) vector containing the fasta file path.} 50 | 51 | \item{param}{An optional \code{\linkS4class{GRanges}} or 52 | \code{\linkS4class{IntegerRangesList}} instance to select reads (and 53 | sub-sequences) for input.} 54 | 55 | \item{as}{A character(1) vector indicating the type of object to 56 | return; default \code{DNAStringSet}.} 57 | 58 | \item{...}{Additional arguments, passed to \code{readDNAStringSet} / 59 | \code{readRNAStringSet} / \code{readAAStringSet} when \code{param} 60 | is \sQuote{missing}.} 61 | 62 | } 63 | 64 | \value{ 65 | 66 | \code{indexFa} visits the path in \code{file} and create an index file 67 | at the same location but with extension \sQuote{.fai}). 68 | 69 | \code{scanFaIndex} reads the sequence names and and widths of recorded 70 | in an indexed fasta file, returning the information as a 71 | \code{\linkS4class{GRanges}} object. 72 | 73 | \code{countFa} returns the number of records in the fasta file. 74 | 75 | \code{scanFa} return the sequences indicated by \code{param} as a 76 | \code{\linkS4class{DNAStringSet}}, \code{\linkS4class{RNAStringSet}}, 77 | \code{\linkS4class{AAStringSet}} instance. \code{seqnames(param)} 78 | selects the sequences to return; \code{start(param)} and 79 | \code{end{param}} define the (1-based) region of the sequence to 80 | return. Values of \code{end(param)} greater than the width of the 81 | sequence are set to the width of the sequence. When \code{param} is 82 | missing, all records are selected. When \code{param} is 83 | \code{GRanges()}, no records are selected. 84 | 85 | } 86 | 87 | \references{ 88 | 89 | \url{http://samtools.sourceforge.net/} provides information on 90 | \code{samtools}. 91 | 92 | } 93 | 94 | \author{ 95 | 96 | Martin Morgan . 97 | 98 | } 99 | 100 | \examples{ 101 | fa <- system.file("extdata", "ce2dict1.fa", package="Rsamtools", 102 | mustWork=TRUE) 103 | countFa(fa) 104 | (idx <- scanFaIndex(fa)) 105 | (dna <- scanFa(fa, idx[1:2])) 106 | ranges(idx) <- narrow(ranges(idx), -10) # last 10 nucleotides 107 | (dna <- scanFa(fa, idx[1:2])) 108 | } 109 | 110 | \keyword{ manip } 111 | -------------------------------------------------------------------------------- /man/scanTabix.Rd: -------------------------------------------------------------------------------- 1 | \name{TabixInput} 2 | \Rdversion{1.1} 3 | 4 | \alias{scanTabix} 5 | \alias{scanTabix,character,GRanges-method} 6 | \alias{scanTabix,character,IntegerRangesList-method} 7 | 8 | \title{ 9 | 10 | Operations on `tabix' (indexed, tab-delimited) files. 11 | 12 | } 13 | \description{ 14 | Scan compressed, sorted, tabix-indexed, tab-delimited files. 15 | } 16 | \usage{ 17 | 18 | scanTabix(file, ..., param) 19 | \S4method{scanTabix}{character,IntegerRangesList}(file, ..., param) 20 | \S4method{scanTabix}{character,GRanges}(file, ..., param) 21 | 22 | } 23 | 24 | \arguments{ 25 | 26 | \item{file}{The character() file name(s) of the tabix file be 27 | processed, or more flexibly an instance of class 28 | \code{\link{TabixFile}}.} 29 | 30 | \item{param}{A instance of \code{GRanges} or \code{IntegerRangesList} 31 | providing the sequence names and regions to be parsed.} 32 | 33 | 34 | \item{...}{Additional arguments, currently ignored.} 35 | 36 | } 37 | 38 | \value{ 39 | 40 | \code{scanTabix} returns a list, with one element per region. Each element 41 | of the list is a character vector representing records in the region. If 42 | \code{param} is empty then all records will be returned. 43 | 44 | } 45 | 46 | 47 | \section{Error}{ 48 | 49 | \code{scanTabix} signals errors using \code{signalCondition}. The 50 | following errors are signaled: 51 | 52 | \describe{ 53 | 54 | \item{\code{scanTabix_param}}{\code{yieldSize(file)} must be NA when 55 | more than one range is specified.} 56 | 57 | \item{\code{scanTabix_io}}{A read error occured while inputing the 58 | tabix file. This might be because the file is corrupt, or of 59 | incorrect format (e.g., when \code{path} points to a plain text 60 | file but \code{index} is present, implying that \code{path} should 61 | be a \code{bgzip}ed file. The error message may include an error 62 | code representing the logical OR of these cryptic signals: 1, 63 | BGZF_ERR_ZLIB; 2, BGZF_ERR_HEADER; 4, BGZF_ERR_IO; 8, 64 | BGZF_ERR_MISUSE.} 65 | 66 | } 67 | } 68 | 69 | \references{ 70 | 71 | \url{http://samtools.sourceforge.net/tabix.shtml} 72 | 73 | } 74 | 75 | \author{ 76 | 77 | Martin Morgan . 78 | 79 | } 80 | 81 | \examples{ 82 | example(TabixFile) 83 | } 84 | 85 | \keyword{ manip } 86 | -------------------------------------------------------------------------------- /man/seqnamesTabix.Rd: -------------------------------------------------------------------------------- 1 | \name{seqnamesTabix} 2 | \Rdversion{1.1} 3 | 4 | \alias{seqnamesTabix} 5 | \alias{seqnamesTabix,character-method} 6 | 7 | \title{ 8 | 9 | Retrieve sequence names defined in a tabix file. 10 | 11 | } 12 | \description{ 13 | 14 | This function queries a tabix file, returning the names of the 15 | \sQuote{sequences} used as a key when creating the file. 16 | 17 | } 18 | \usage{ 19 | 20 | seqnamesTabix(file, ...) 21 | \S4method{seqnamesTabix}{character}(file, ...) 22 | 23 | } 24 | 25 | \arguments{ 26 | 27 | \item{file}{A \code{character(1)} file path or 28 | \code{\linkS4class{TabixFile}} instance pointing to a \sQuote{tabix} 29 | file.} 30 | 31 | \item{...}{Additional arguments, currently ignored.} 32 | 33 | } 34 | 35 | \value{ 36 | 37 | A \code{character()} vector of sequence names present in the file. 38 | 39 | } 40 | 41 | \author{ 42 | 43 | Martin Morgan . 44 | 45 | } 46 | 47 | \examples{ 48 | fl <- system.file("extdata", "example.gtf.gz", package="Rsamtools", 49 | mustWork=TRUE) 50 | seqnamesTabix(fl) 51 | } 52 | 53 | \keyword{ manip } 54 | -------------------------------------------------------------------------------- /man/testPairedEndBam.Rd: -------------------------------------------------------------------------------- 1 | \name{testPairedEndBam} 2 | \alias{testPairedEndBam} 3 | \alias{testPairedEndBam,character-method} 4 | \alias{testPairedEndBam,BamFile-method} 5 | 6 | \title{ 7 | Quickly test if a BAM file has paired end reads 8 | } 9 | 10 | \description{ 11 | 12 | Iterate through a BAM file until a paired-end read is encountered or 13 | the end of file is reached; report the occurrence of paired-end 14 | reads to the user. 15 | 16 | } 17 | 18 | \usage{ 19 | testPairedEndBam(file, index=file, ...) 20 | } 21 | 22 | \arguments{ 23 | 24 | \item{file}{character(1) BAM file name, or a \code{\link{BamFile}} 25 | instance. Open \code{BamFile}s are closed; their yield size is 26 | respected when iterating through the file.} 27 | 28 | \item{index}{(optional) character(1) name of the index file of the 29 | 'BAM' file being processed; this is given \emph{without} the 30 | '.bai' extension.} 31 | 32 | \item{\dots}{Additional arguments, currently unused.} 33 | 34 | } 35 | 36 | \value{ 37 | 38 | A logical vector of length 1 containing TRUE is returned if BAM file 39 | contained paired end reads, FALSE otherwise. 40 | 41 | } 42 | 43 | \author{ 44 | 45 | Martin Morgan \url{mailto:mtmorgan@fhcrc.org}, Sonali Arora 46 | \url{mailto:sarora@fhcrc.org} 47 | 48 | } 49 | 50 | \examples{ 51 | fl <- system.file("extdata", "ex1.bam", package="Rsamtools") 52 | testPairedEndBam(fl) 53 | } 54 | 55 | -------------------------------------------------------------------------------- /man/zip.Rd: -------------------------------------------------------------------------------- 1 | \name{Compression} 2 | \Rdversion{1.1} 3 | 4 | \alias{bgzip} 5 | \alias{razip} 6 | 7 | \title{ 8 | 9 | File compression for tabix (bgzip) and fasta (razip) files. 10 | 11 | IMPORTANT NOTE: Starting with Rsamtools 1.99.0 (Bioconductor 3.9), 12 | \code{razip()} is defunct. Please use \code{bgzip()} instead. 13 | } 14 | \description{ 15 | 16 | \code{bgzip} compresses tabix (e.g. SAM or VCF) or FASTA files 17 | to a format that allows indexing for later fast random-access. 18 | 19 | } 20 | \usage{ 21 | 22 | bgzip(file, dest=sprintf("\%s.bgz", sub("\\\\.gz$", "", file)), 23 | overwrite = FALSE) 24 | 25 | ## Defunct! 26 | razip(file, dest=sprintf("\%s.rz", sub("\\\\.gz$", "", file)), 27 | overwrite = FALSE) 28 | 29 | } 30 | 31 | \arguments{ 32 | 33 | \item{file}{A character(1) path to an existing uncompressed or 34 | gz-compressed file. This file will be compressed.} 35 | 36 | \item{dest}{A character(1) path to a file. This will be the compressed 37 | file. If \code{dest} exists, then it is only over-written when 38 | \code{overwrite=TRUE}.} 39 | 40 | \item{overwrite}{A logical(1) indicating whether \code{dest} should 41 | be over-written, if it already exists.} 42 | 43 | } 44 | 45 | \value{ 46 | 47 | The full path to \code{dest}. 48 | 49 | } 50 | \references{ 51 | \url{http://samtools.sourceforge.net/} 52 | } 53 | \author{ 54 | 55 | Martin Morgan 56 | 57 | } 58 | 59 | \seealso{ 60 | 61 | \code{\link{TabixFile}}, \code{\link{FaFile}}. 62 | 63 | } 64 | 65 | \examples{ 66 | 67 | from <- system.file("extdata", "ex1.sam", package="Rsamtools", 68 | mustWork=TRUE) 69 | to <- tempfile() 70 | zipped <- bgzip(from, to) 71 | 72 | } 73 | 74 | \keyword{ manip } 75 | -------------------------------------------------------------------------------- /src/BamFileIterator.h: -------------------------------------------------------------------------------- 1 | // BamFileIterator.h: 2 | // Iterator used when reading a complete bam file. 3 | 4 | #ifndef BAMFILEITERATOR_H 5 | #define BAMFILEITERATOR_H 6 | 7 | #include "BamIterator.h" 8 | 9 | 10 | class BamFileIterator : public BamIterator { 11 | 12 | bool file_done; 13 | 14 | void iterate_inprogress(bamFile bfile) { 15 | if (iter_done | file_done) 16 | return; 17 | if ((bam1_t *) NULL == bam) { // first record 18 | bam = bam_init1(); 19 | if (bam_read1(bfile, bam) < 0) { 20 | iter_done = true; 21 | return; 22 | } 23 | } 24 | 25 | bool done = false; 26 | do { 27 | process(bam); 28 | int tid = bam->core.tid; 29 | int pos = bam->core.pos; 30 | if (bam_read1(bfile, bam) < 0) { 31 | mate_touched_templates(); 32 | iter_done = file_done = done = true; 33 | } else { 34 | if ((bam->core.tid != tid) || (bam->core.pos != pos)) { 35 | mate_touched_templates(); 36 | done = !complete.empty(); 37 | } 38 | } 39 | } while (!done); 40 | } 41 | 42 | public: 43 | 44 | // constructor / destructor 45 | BamFileIterator(bamFile bfile, const bam_index_t *bindex) : 46 | BamIterator(bfile, bindex), file_done(false) {} 47 | 48 | }; 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /src/BamIterator.h: -------------------------------------------------------------------------------- 1 | // BamIterator.h: 2 | // Virtual iterator class with concrete subclasses of 3 | // BamRangeIterator and BamFileIterator. 4 | 5 | #ifndef BAMITERATOR_H 6 | #define BAMITERATOR_H 7 | 8 | #include 9 | #include 10 | #include "Template.h" 11 | #include "bam_data.h" 12 | 13 | class BamIterator { 14 | 15 | BAM_DATA bam_data; 16 | 17 | queue > ambiguous; 18 | queue > unmated; 19 | set touched_templates; 20 | 21 | protected: 22 | 23 | typedef map Templates; 24 | Templates templates; 25 | queue > complete; 26 | 27 | const bam_index_t *bindex; 28 | bam_header_t *header; 29 | bam1_t *bam; 30 | 31 | char qname_prefix_end() const { 32 | if (bam_data == (BAM_DATA) NULL) 33 | Rf_error("[qname_prefix_end] report to maintainer('Rsamtools')"); 34 | return bam_data->qnamePrefixEnd; 35 | } 36 | 37 | char qname_suffix_start() const { 38 | if (bam_data == (BAM_DATA) NULL) 39 | Rf_error("[qname_suffix_start] report to maintainer('Rsamtools')"); 40 | return bam_data->qnameSuffixStart; 41 | } 42 | 43 | void mate_touched_templates() { 44 | for (set::iterator it=touched_templates.begin(); 45 | it != touched_templates.end(); ++it) { 46 | templates[*it].mate(complete, header->target_len); 47 | if (templates[*it].empty()) 48 | templates.erase(*it); 49 | } 50 | touched_templates.clear(); 51 | } 52 | 53 | // process 54 | void process(const bam1_t *bam) { 55 | if (bam_data == (BAM_DATA) NULL) 56 | Rf_error("[process] report to maintainer('Rsamtools')"); 57 | if (!_filter1_BAM_DATA(bam, bam_data)) 58 | return; 59 | const char *trimmed_qname = 60 | Template::qname_trim(bam1_qname(bam), qname_prefix_end(), 61 | qname_suffix_start()); 62 | if (templates[trimmed_qname].add_segment(bam)) 63 | touched_templates.insert(trimmed_qname); 64 | } 65 | 66 | virtual void iterate_inprogress(bamFile bfile) = 0; 67 | 68 | virtual void finalize_inprogress(bamFile bfile) { 69 | Templates::iterator it; 70 | // transfer Template::ambiguous to BamIterator::ambiguous 71 | // transfer Template::inprogress and Template::invalid to 72 | // BamIterator::unmated 73 | for (it = templates.begin(); it != templates.end(); ++it) 74 | it->second.cleanup(ambiguous, unmated); 75 | templates.clear(); 76 | } 77 | 78 | public: 79 | 80 | bool iter_done; 81 | 82 | // constructor / destructor 83 | BamIterator(bamFile bfile, const bam_index_t *bindex) : 84 | bam_data((BAM_DATA) NULL), bindex(bindex), 85 | bam((bam1_t *) NULL), iter_done(false) 86 | { 87 | (void) bam_seek(bfile, 0, 0); 88 | header = bam_header_read(bfile); 89 | } 90 | 91 | virtual ~BamIterator() { 92 | if ((bam1_t *) NULL != bam) 93 | bam_destroy1(bam); 94 | bam_header_destroy(header); 95 | } 96 | 97 | void set_bam_data(BAM_DATA bd) { 98 | this->bam_data = bd; 99 | } 100 | 101 | // yield 102 | void yield(bamFile bfile, bam_mates_t *result) { 103 | if (complete.empty() && !iter_done) 104 | iterate_inprogress(bfile); 105 | if (complete.empty() && !templates.empty()) 106 | finalize_inprogress(bfile); 107 | 108 | list elts; 109 | MATE_STATUS mated = MATE_UNKNOWN; 110 | if (!complete.empty()) { 111 | elts = complete.front(); 112 | complete.pop(); 113 | mated = MATE_MATED; 114 | } else if (!ambiguous.empty()) { 115 | elts = ambiguous.front(); 116 | ambiguous.pop(); 117 | mated = MATE_AMBIGUOUS; 118 | } else if (!unmated.empty()) { 119 | elts = unmated.front(); 120 | unmated.pop(); 121 | mated = MATE_UNMATED; 122 | } 123 | 124 | bam_mates_realloc(result, elts.size(), mated); 125 | for (int i = 0; !elts.empty(); ++i) { 126 | result->bams[i] = elts.front(); 127 | elts.pop_front(); 128 | } 129 | } 130 | 131 | }; 132 | 133 | #endif 134 | -------------------------------------------------------------------------------- /src/Biostrings_stubs.c: -------------------------------------------------------------------------------- 1 | #include "_Biostrings_stubs.c" 2 | -------------------------------------------------------------------------------- /src/COMPAT_bcf_hdr_read.h: -------------------------------------------------------------------------------- 1 | #ifndef _COMPAT_BCF_HDR_READ_H_ 2 | #define _COMPAT_BCF_HDR_READ_H_ 3 | 4 | #include 5 | #include 6 | 7 | bcf_hdr_t *COMPAT_bcf_hdr_read(htsFile *hfp); 8 | 9 | #endif /* _COMPAT_BCF_HDR_READ_H_ */ 10 | -------------------------------------------------------------------------------- /src/GenomicPosition.h: -------------------------------------------------------------------------------- 1 | #ifndef GENOMIC_POSITION_H 2 | #define GENOMIC_POSITION_H 3 | 4 | // identifier for genomic position; used for ordering of PosCache in 5 | // associative containers 6 | struct GenomicPosition { 7 | int tid, pos; 8 | GenomicPosition(int tid_, int pos_) : tid(tid_), pos(pos_) { } 9 | bool operator<(const GenomicPosition& rhs) const { 10 | return tid < rhs.tid || (tid == rhs.tid && pos < rhs.pos); 11 | } 12 | bool operator==(const GenomicPosition& rhs) const { 13 | return tid == rhs.tid && pos == rhs.pos; 14 | } 15 | bool operator<=(const GenomicPosition& rhs) const { 16 | return *this < rhs || *this == rhs; 17 | } 18 | }; 19 | 20 | #endif // GENOMIC_POSITION_H 21 | -------------------------------------------------------------------------------- /src/IRanges_stubs.c: -------------------------------------------------------------------------------- 1 | #include "_IRanges_stubs.c" 2 | -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | ## This file uses GNU make syntax $(shell ...) so we need to 2 | ## have "SystemRequirements: GNU make" in the DESCRIPTION file. 3 | ## See Rhtslib's vignette for details. 4 | 5 | RHTSLIB_LIBS=$(shell "${R_HOME}/bin${R_ARCH_BIN}/Rscript" -e \ 6 | 'Rhtslib::pkgconfig("PKG_LIBS")') 7 | RHTSLIB_CPPFLAGS=$(shell "${R_HOME}/bin${R_ARCH_BIN}/Rscript" -e \ 8 | 'Rhtslib::pkgconfig("PKG_CPPFLAGS")') 9 | 10 | PKG_LIBS=$(RHTSLIB_LIBS) 11 | PKG_CPPFLAGS=$(RHTSLIB_CPPFLAGS) 12 | 13 | -------------------------------------------------------------------------------- /src/PileupBufferShim.h: -------------------------------------------------------------------------------- 1 | #ifndef PILEUPBUFFERSHIM_H 2 | #define PILEUPBUFFERSHIM_H 3 | 4 | #include "PileupBuffer.h" 5 | 6 | class PileupBufferShim { 7 | 8 | private: 9 | const SEXP regions; 10 | SEXP result; 11 | PileupBuffer &buffer; 12 | public: 13 | PileupBufferShim(SEXP _regions, SEXP _result, PileupBuffer &_buffer) : 14 | regions(_regions), result(_result), buffer(_buffer) 15 | {} 16 | 17 | void start1(const int irange) { 18 | if (R_NilValue == regions) { 19 | buffer.init((const char *) NULL, 0, 0); 20 | } else { 21 | buffer.init( 22 | CHAR(STRING_ELT(VECTOR_ELT(regions, 0), irange)), 23 | INTEGER(VECTOR_ELT(regions, 1))[irange], 24 | INTEGER(VECTOR_ELT(regions, 2))[irange]); 25 | } 26 | } 27 | void finish1(const int irange) { 28 | plbuf_push((const bam1_t *) NULL); 29 | SET_VECTOR_ELT(result, irange, buffer.yield()); 30 | buffer.plbuf_destroy(); 31 | } 32 | // The only way to trigger running the callback function 33 | // (Pileup::insert in this case) is to push NULL to the buffer and 34 | // destroy it. Therefore, must destroy and recreate plbuf each 35 | // time yieldSize records are pushed. 36 | void process_yieldSize_chunk() { 37 | plbuf_push((const bam1_t *) NULL); 38 | buffer.plbuf_destroy(); // trigger run of Pileup::insert 39 | buffer.init((const char *) NULL, 0, 0); 40 | } 41 | // intended to be called from _pileup_bam after EOI message sent 42 | // to PileupBuffer; same as finish1 but no plbuf is in use 43 | void flush() { 44 | //Rprintf("flushing\n"); 45 | SET_VECTOR_ELT(result, 0, buffer.yield()); 46 | } 47 | void plbuf_push(const bam1_t *bam) { 48 | buffer.plbuf_push(bam); 49 | } 50 | }; 51 | 52 | #endif // PILEUPBUFFERSHIM_H 53 | -------------------------------------------------------------------------------- /src/PosCache.h: -------------------------------------------------------------------------------- 1 | #ifndef POS_CACHE_H 2 | #define POS_CACHE_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "GenomicPosition.h" 11 | using std::map; 12 | using std::pair; 13 | using std::make_pair; 14 | 15 | struct BamTuple { // for sending info from Pileup::insert to ResultMgr 16 | char nuc, strand; 17 | int bin; 18 | BamTuple(char nuc_ = 'X', char strand_ = 'X', int bin_ = 0) 19 | : nuc(nuc_), strand(strand_), bin(bin_) { } 20 | bool operator<(const BamTuple& rhs) const { 21 | return nuc < rhs.nuc || (nuc == rhs.nuc && strand < rhs.strand) || 22 | (nuc == rhs.nuc && strand == rhs.strand && bin < rhs.bin); 23 | } 24 | }; 25 | 26 | struct PosCache { 27 | GenomicPosition genomicPosition; 28 | std::vector tupleVec; 29 | std::map nucCounts; 30 | typedef std::vector::const_iterator tuple_iter; 31 | typedef std::map::const_iterator counts_iter; 32 | PosCache(GenomicPosition genomicPosition_) 33 | : genomicPosition(genomicPosition_), tupleVec(), nucCounts() { } 34 | void storeTuple(BamTuple& bt) { 35 | tupleVec.push_back(bt); 36 | nucCounts.insert(make_pair(bt.nuc,0)).first->second++; 37 | } 38 | // exposed so we can use std::accumulate on nucCounts map 39 | static int addSecond(int i, const pair& thePair) { 40 | return i + thePair.second; 41 | } 42 | int totalNucFreq() const { 43 | return std::accumulate(nucCounts.begin(),nucCounts.end(),0,addSecond); 44 | } 45 | int primaryNucFreq() const { 46 | int maxCount = 0; 47 | typedef map::const_iterator iter; 48 | for(iter it = nucCounts.begin(); 49 | it != nucCounts.end(); ++it) { 50 | if(it->second > maxCount) 51 | maxCount = it->second; 52 | } 53 | return maxCount; 54 | } 55 | std::set passingNucs(int min) const { 56 | std::set nucs; 57 | for(counts_iter it = nucCounts.begin(); it != nucCounts.end(); ++it) { 58 | if(it->second >= min) 59 | nucs.insert(it->first); 60 | } 61 | return nucs; 62 | } 63 | void clear() { 64 | tupleVec.clear(); 65 | nucCounts.clear(); 66 | } 67 | void print() const { 68 | printf("tupleVec contents:\n"); 69 | for(tuple_iter it = tupleVec.begin(); it != tupleVec.end(); ++it) { 70 | printf("nuc %c str %c bin %u\n", it->nuc, it->strand, it->bin); 71 | } 72 | } 73 | }; 74 | 75 | #endif // POS_CACHE_H 76 | -------------------------------------------------------------------------------- /src/PosCacheColl.cpp: -------------------------------------------------------------------------------- 1 | #include "PosCacheColl.h" 2 | 3 | // because it's a free function we have to ensure it's only ever 4 | // defined once. Including it in the .h triggers multiple definition 5 | // linking error 6 | void getPosCacheFromColl(PosCacheColl& pcc, PosCache*& posCachePtr) { 7 | PosCache* tmp = posCachePtr; // hold address pointed to by posCachePtr 8 | posCachePtr = pcc.fetchPosCache(posCachePtr); 9 | if(tmp != posCachePtr) { 10 | // found val in pcc, must deallocate incoming ptr's object 11 | delete tmp; 12 | tmp = (PosCache *) NULL; 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/PosCacheColl.h: -------------------------------------------------------------------------------- 1 | #ifndef POS_CACHE_COLL_H 2 | #define POS_CACHE_COLL_H 3 | 4 | #include 5 | #include 6 | #include "PosCache.h" 7 | #include 8 | // because set of pointers, need to compare *dereferenced* values, 9 | // otherwise would compare pointer addresses! 10 | struct PosCachePtrLess { 11 | bool operator()(const PosCache* lhs, const PosCache* rhs) const { 12 | return lhs->genomicPosition < rhs->genomicPosition; 13 | } 14 | }; 15 | 16 | class PosCacheColl { 17 | private: 18 | std::set posCaches; 19 | public: 20 | typedef std::set::const_iterator const_iter; 21 | typedef std::set::iterator iter; 22 | PosCacheColl() : posCaches() { } 23 | ~PosCacheColl() { 24 | while(!posCaches.empty()) { 25 | iter it = posCaches.begin(); 26 | PosCache* tmp = *it; 27 | posCaches.erase(it); 28 | delete tmp; 29 | } 30 | } 31 | #ifdef PILEUP_DEBUG 32 | void printGenPositions() const { 33 | for(const_iter it = posCaches.begin(); it != posCaches.end(); ++it) { 34 | printf("tid %d pos %d\n", (*it)->genomicPosition.tid, 35 | (*it)->genomicPosition.pos); 36 | } 37 | } 38 | #endif // PILEUP_DEBUG 39 | void storePosCache(PosCache *cachePtr) { 40 | //printf("storePosCache size %d\n", posCaches.size()); 41 | if(posCaches.find(cachePtr) != posCaches.end()) { 42 | Rf_error("internal: posCache already in PosCacheColl"); 43 | } 44 | posCaches.insert(cachePtr); 45 | cachePtr = (PosCache *) NULL; 46 | } 47 | // precondition: val points to an already-allocated PosCache; 48 | // clients will use this to check if a PosCache with matching 49 | // GenomicPosition already exists. If it already exists, PosCache 50 | // pointed to by incoming pointer will be deallocated and incoming 51 | // pointer will be set to existing PosCache 52 | // postcondition: if val found in set, remove set element 53 | // containing pointer, return pointer to PosCache; if not found in 54 | // set, return NULL 55 | PosCache* fetchPosCache(PosCache* val) { 56 | iter it = posCaches.find(val); 57 | if(it == posCaches.end()) { 58 | return val; // not found in set, leave ptr unchanged 59 | } 60 | PosCache* tmp = *it; // tmp is pointer to heap memory of PosCache 61 | posCaches.erase(it); // remove set element 62 | return tmp; 63 | } 64 | PosCache* destructiveNext() { 65 | //printf("destructiveNext size %d\n", posCaches.size()); 66 | if(posCaches.empty()) { 67 | return NULL; 68 | } else { 69 | iter it = posCaches.begin(); 70 | PosCache* tmp = *it; 71 | posCaches.erase(it); 72 | return tmp; 73 | } 74 | } 75 | PosCache* destructiveNextLT(const GenomicPosition& gp) { 76 | if(posCaches.empty()) { 77 | return NULL; 78 | } else { 79 | iter it = posCaches.begin(); 80 | if((*it)->genomicPosition < gp) { 81 | PosCache* tmp = *it; 82 | posCaches.erase(it); 83 | return tmp; 84 | } else { 85 | return NULL; 86 | } 87 | } 88 | } 89 | // FIX ME: use STL algorithms!! 90 | int numPosCachesLT(const GenomicPosition& gp) const { 91 | //printf("posCaches.size() %lu\n", posCaches.size()); 92 | //printf("gp gp.tid %d gp.pos %d\n", gp.tid, gp.pos); 93 | if(posCaches.empty()) 94 | return 0; 95 | int count = 0; 96 | for(const_iter it = posCaches.begin(); it != posCaches.end(); ++it) { 97 | if((*it)->genomicPosition < gp) 98 | ++count; 99 | else 100 | break; 101 | } 102 | //printf("COUNT: %d\n", count); 103 | return count; 104 | } 105 | }; 106 | 107 | // Destructive release of ownership of PosCache in PosCacheColl that 108 | // compares equal to posCachePtr's object 109 | // preconditions: 110 | // - posCachePtr points to valid initialized PosCache object 111 | // postconditions: 112 | // - if PosCache found in PosCacheColl: 113 | // a) the incoming posCachePtr's object is deallocated 114 | // b) posCachePtr is reassigned to found PosCache 115 | // c) posCacheColl no longer contains element pointing to returned 116 | // PosCache 117 | // - if PosCache NOT found in PosCacheColl 118 | // a) posCachePtr unchanged 119 | 120 | void getPosCacheFromColl(PosCacheColl& pcc, PosCache*& posCachePtr); 121 | 122 | #endif /* POS_CACHE_COLL_H */ 123 | -------------------------------------------------------------------------------- /src/R_init_Rsamtools.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "zip_compression.h" 3 | #include "utilities.h" 4 | #include "bamfile.h" 5 | #include "as_bam.h" 6 | #include "idxstats.h" 7 | #include "io_sam.h" 8 | #include "bcffile.h" 9 | #include "fafile.h" 10 | #include "tabixfile.h" 11 | #include "pileupbam.h" 12 | #include "pileup.h" 13 | 14 | static const R_CallMethodDef callMethods[] = { 15 | 16 | /* zip_compression.c */ 17 | {".bgzip", (DL_FUNC) & bgzip, 2}, 18 | /* utilities.c */ 19 | {".p_pairing", (DL_FUNC) & p_pairing, 12}, 20 | {".find_mate_within_groups", (DL_FUNC) & find_mate_within_groups, 6}, 21 | /* bamfile.c */ 22 | {".bamfile_init", (DL_FUNC) & bamfile_init, 0}, 23 | {".bamfile_open", (DL_FUNC) & bamfile_open, 3}, 24 | {".bamfile_close", (DL_FUNC) & bamfile_close, 1}, 25 | {".bamfile_isopen", (DL_FUNC) & bamfile_isopen, 1}, 26 | {".bamfile_isincomplete", (DL_FUNC) & bamfile_isincomplete, 1}, 27 | {".read_bamfile_header", (DL_FUNC) & read_bamfile_header, 2}, 28 | {".scan_bamfile", (DL_FUNC) & scan_bamfile, 13}, 29 | {".count_bamfile", (DL_FUNC) & count_bamfile, 6}, 30 | {".prefilter_bamfile", (DL_FUNC) & prefilter_bamfile, 11}, 31 | {".filter_bamfile", (DL_FUNC) & filter_bamfile, 8}, 32 | /* as_bam.c */ 33 | {".as_bam", (DL_FUNC) & as_bam, 3}, 34 | /* idxstats.c */ 35 | {".idxstats_bamfile", (DL_FUNC) & idxstats_bamfile, 1}, 36 | /* io_sam.c */ 37 | {".scan_bam_template", (DL_FUNC) & scan_bam_template, 2}, 38 | {".scan_bam_cleanup", (DL_FUNC) & scan_bam_cleanup, 0}, 39 | {".sort_bam", (DL_FUNC) & sort_bam, 6}, 40 | {".merge_bam", (DL_FUNC) & merge_bam, 8}, 41 | {".index_bam", (DL_FUNC) & index_bam, 1}, 42 | /* bcffile.c */ 43 | {".bcffile_init", (DL_FUNC) & bcffile_init, 0}, 44 | {".bcffile_open", (DL_FUNC) & bcffile_open, 3}, 45 | {".bcffile_close", (DL_FUNC) & bcffile_close, 1}, 46 | {".bcffile_isopen", (DL_FUNC) & bcffile_isopen, 1}, 47 | {".bcffile_isvcf", (DL_FUNC) & bcffile_isvcf, 1}, 48 | {".scan_bcf_header", (DL_FUNC) & scan_bcf_header, 1}, 49 | {".scan_bcf", (DL_FUNC) & scan_bcf, 3}, 50 | {".as_bcf", (DL_FUNC) & as_bcf, 3}, 51 | {".index_bcf", (DL_FUNC) & index_bcf, 1}, 52 | /* fafile.c */ 53 | {".fafile_init", (DL_FUNC) & fafile_init, 0}, 54 | {".fafile_open", (DL_FUNC) & fafile_open, 3}, 55 | {".fafile_close", (DL_FUNC) & fafile_close, 1}, 56 | {".fafile_isopen", (DL_FUNC) & fafile_isopen, 1}, 57 | {".index_fa", (DL_FUNC) & index_fa, 1}, 58 | {".n_fa", (DL_FUNC) & n_fa, 1}, 59 | {".scan_fa", (DL_FUNC) & scan_fa, 6}, 60 | /* tabixfile.c */ 61 | {".tabixfile_init", (DL_FUNC) & tabixfile_init, 0}, 62 | {".tabixfile_open", (DL_FUNC) & tabixfile_open, 2}, 63 | {".tabixfile_close", (DL_FUNC) & tabixfile_close, 1}, 64 | {".tabixfile_isopen", (DL_FUNC) & tabixfile_isopen, 1}, 65 | {".index_tabix", (DL_FUNC) & index_tabix, 8}, 66 | {".header_tabix", (DL_FUNC) & header_tabix, 1}, 67 | {".tabix_as_character", (DL_FUNC) & tabix_as_character, 6}, 68 | {".tabix_count", (DL_FUNC) & tabix_count, 6}, 69 | {".scan_tabix", (DL_FUNC) & scan_tabix, 6}, 70 | /* pileupbam.c */ 71 | {".apply_pileups", (DL_FUNC) & apply_pileups, 5}, 72 | /* bambuffer.c */ 73 | {".bambuffer_init", (DL_FUNC) & bambuffer_init, 0}, 74 | {".bambuffer", (DL_FUNC) & bambuffer, 1}, 75 | {".bambuffer_length", (DL_FUNC) & bambuffer_length, 1}, 76 | {".bambuffer_parse", (DL_FUNC) & bambuffer_parse, 9}, 77 | {".bambuffer_write", (DL_FUNC) & bambuffer_write, 3}, 78 | {".bambuffer_reset", (DL_FUNC) & bambuffer_reset, 1}, 79 | /* pileup.cpp */ 80 | {".c_Pileup", (DL_FUNC) & c_Pileup, 13}, 81 | {NULL, NULL, 0} 82 | }; 83 | 84 | void R_init_Rsamtools(DllInfo * info) 85 | { 86 | R_registerRoutines(info, NULL, callMethods, NULL, NULL); 87 | } 88 | 89 | void R_unload_Rsamtools(DllInfo *info) 90 | { 91 | (void) info; 92 | } 93 | -------------------------------------------------------------------------------- /src/S4Vectors_stubs.c: -------------------------------------------------------------------------------- 1 | #include "_S4Vectors_stubs.c" 2 | -------------------------------------------------------------------------------- /src/XVector_stubs.c: -------------------------------------------------------------------------------- 1 | #include "_XVector_stubs.c" 2 | -------------------------------------------------------------------------------- /src/as_bam.c: -------------------------------------------------------------------------------- 1 | #include "bamfile.h" 2 | 3 | int _as_bam(samfile_t * fin, samfile_t * fout) 4 | { 5 | bam1_t *b = bam_init1(); 6 | int r, count = 0; 7 | 8 | while (0 <= (r = samread(fin, b))) { 9 | samwrite(fout, b); 10 | count++; 11 | } 12 | bam_destroy1(b); 13 | 14 | return r >= -1 ? count : -1 * count; 15 | } 16 | 17 | SEXP as_bam(SEXP file, SEXP destination, SEXP binary) 18 | { 19 | if (!IS_CHARACTER(file) || 1 != LENGTH(file)) 20 | Rf_error("'file' must be character(1)"); 21 | if (!IS_CHARACTER(destination) || 1 != LENGTH(destination)) 22 | Rf_error("'destination' must be character(1)"); 23 | if (!IS_LOGICAL(binary) || 1 != LENGTH(binary)) 24 | Rf_error("'binary' must be logical(1)"); 25 | 26 | samfile_t *fin, *fout; 27 | if (LOGICAL(binary)[0]) { 28 | /* SAM --> BAM */ 29 | fin = _bam_tryopen(translateChar(STRING_ELT(file, 0)), "r", NULL); 30 | if (fin->header == 0) { 31 | samclose(fin); 32 | Rf_error("invalid header"); 33 | } 34 | fout = _bam_tryopen(translateChar(STRING_ELT(destination, 0)), "wb", 35 | fin->header); 36 | } else { 37 | /* BAM --> SAM */ 38 | fin = _bam_tryopen(translateChar(STRING_ELT(file, 0)), "rb", NULL); 39 | if (fin->header == 0) { 40 | samclose(fin); 41 | Rf_error("invalid header"); 42 | } 43 | fout = _bam_tryopen(translateChar(STRING_ELT(destination, 0)), "wh", 44 | fin->header); 45 | } 46 | 47 | int count = _as_bam(fin, fout); 48 | 49 | samclose(fin); 50 | samclose(fout); 51 | if (count < 0) 52 | Rf_error("truncated input file at record %d", -1 * count); 53 | 54 | return destination; 55 | } 56 | -------------------------------------------------------------------------------- /src/as_bam.h: -------------------------------------------------------------------------------- 1 | #ifndef _AS_BAM_H_ 2 | #define _AS_BAM_H_ 3 | 4 | #include 5 | 6 | SEXP as_bam(SEXP file, SEXP destination, SEXP binary); 7 | 8 | #endif /* _AS_BAM_H_ */ 9 | -------------------------------------------------------------------------------- /src/bam.c: -------------------------------------------------------------------------------- 1 | #include 2 | -------------------------------------------------------------------------------- /src/bam_data.h: -------------------------------------------------------------------------------- 1 | #ifndef BAM_DATA_H 2 | #define BAM_DATA_H 3 | 4 | #include "Rdefines.h" 5 | #include "bamfile.h" 6 | #include "tagfilter.h" 7 | 8 | #ifdef __cplusplus 9 | extern "C" { 10 | #endif 11 | 12 | typedef char * (*bam_qname_f)(const bam1_t *bam, char qname_prefix, 13 | char qname_suffix); 14 | 15 | typedef struct { 16 | int BLOCKSIZE; /* size to grow vectors */ 17 | char *cigar_buf; /* string representation of CIGAR */ 18 | uint32_t cigar_buf_sz; 19 | 20 | int parse_status; 21 | BAM_FILE bfile; 22 | int irec, iparsed, irange, nrange; 23 | uint32_t keep_flag[2], cigar_flag; 24 | int reverseComplement, yieldSize, obeyQname, asMates; 25 | char qnamePrefixEnd, qnameSuffixStart; 26 | C_TAGFILTER tagfilter; 27 | uint32_t mapqfilter; 28 | 29 | void *extra; 30 | } _BAM_DATA, *BAM_DATA; 31 | 32 | enum { 33 | QNAME_IDX = 0, FLAG_IDX, RNAME_IDX, STRAND_IDX, POS_IDX, QWIDTH_IDX, 34 | MAPQ_IDX, CIGAR_IDX, MRNM_IDX, MPOS_IDX, ISIZE_IDX, SEQ_IDX, 35 | QUAL_IDX, TAG_IDX, PARTITION_IDX, MATES_IDX 36 | }; 37 | 38 | BAM_DATA _init_BAM_DATA(SEXP ext, SEXP regions, SEXP flag, SEXP isSimpleCigar, 39 | SEXP tagFilter, SEXP mapqFilter, 40 | int reverseComplement, int yieldSize, 41 | int obeyQname, int asMates, char qnamePrefixEnd, 42 | char qnameSuffixStart, void *extra); 43 | void _Free_BAM_DATA(BAM_DATA bd); 44 | BAM_FILE _bam_file_BAM_DATA(BAM_DATA bd); 45 | int _count1_BAM_DATA(const bam1_t *bam, BAM_DATA bd); 46 | int _filter_and_parse1_BAM_DATA(const bam1_t *bam, BAM_DATA bd); 47 | int _filter1_BAM_DATA(const bam1_t *bam, BAM_DATA bd); 48 | int _parse1_BAM_DATA(const bam1_t *bam, BAM_DATA bd); 49 | void _finish1range_BAM_DATA(BAM_DATA bd); 50 | 51 | #ifdef __cplusplus 52 | } 53 | #endif 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /src/bam_mate_iter.cpp: -------------------------------------------------------------------------------- 1 | #include "BamRangeIterator.h" 2 | #include "BamFileIterator.h" 3 | #include "bam_mate_iter.h" 4 | 5 | #include 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | struct _bam_mate_iter_t { 12 | BamIterator *b_iter; 13 | }; 14 | 15 | // BamIterator methods 16 | void bam_mate_iter_destroy(bam_mate_iter_t iter) 17 | { 18 | delete iter->b_iter; 19 | R_Free(iter); 20 | } 21 | 22 | bam_mates_t *bam_mates_new() 23 | { 24 | bam_mates_t *mates = R_Calloc(1, bam_mates_t); 25 | mates->n = 0; 26 | mates->mated = MATE_UNKNOWN; 27 | mates->bams = (const bam1_t **) NULL; 28 | return mates; 29 | } 30 | 31 | void bam_mates_realloc(bam_mates_t *result, int n, MATE_STATUS mated) 32 | { 33 | for (int i = 0; i < result->n; ++i) { 34 | bam_destroy1((bam1_t *) result->bams[i]); 35 | result->bams[i] = (const bam1_t *) NULL; 36 | } 37 | 38 | // R_Realloc(p, 0, *) fails inappropriately 39 | if (n == 0) { 40 | R_Free(result->bams); 41 | result->bams = (const bam1_t **) NULL; 42 | } else { 43 | result->bams = R_Realloc(result->bams, n, const bam1_t *); 44 | } 45 | result->n = n; 46 | result->mated = mated; 47 | } 48 | 49 | void bam_mates_destroy(bam_mates_t *mates) 50 | { 51 | for (int i = 0; i < mates->n; ++i) 52 | bam_destroy1((bam1_t *) mates->bams[i]); 53 | R_Free(mates->bams); 54 | R_Free(mates); 55 | } 56 | 57 | int bam_mate_read(bamFile fb, bam_mate_iter_t iter, bam_mates_t *mates) 58 | { 59 | iter->b_iter->yield(fb, mates); 60 | return mates->n; 61 | } 62 | 63 | // BamRangeIterator methods 64 | bam_mate_iter_t bam_mate_range_iter_new(bamFile bfile, 65 | const bam_index_t *bindex, int tid, 66 | int beg, int end) 67 | { 68 | bam_mate_iter_t iter = R_Calloc(1, struct _bam_mate_iter_t); 69 | iter->b_iter = new BamRangeIterator(bfile, bindex, tid, beg, end); 70 | return iter; 71 | } 72 | 73 | int bam_fetch_mate(bamFile bf, const bam_index_t *idx, int tid, int beg, 74 | int end, void *data, bam_fetch_mate_f func) 75 | { 76 | BAM_DATA bd = (BAM_DATA) data; 77 | int n_rec; 78 | bam_mates_t *mates = bam_mates_new(); 79 | bam_mate_iter_t iter = bam_mate_range_iter_new(bf, idx, tid, beg, end); 80 | iter->b_iter->set_bam_data(bd); 81 | while ((n_rec = bam_mate_read(bf, iter, mates) > 0)) 82 | func(mates, data); 83 | bam_mate_iter_destroy(iter); 84 | bam_mates_destroy(mates); 85 | return n_rec; 86 | } 87 | 88 | // BamFileIterator methods 89 | bam_mate_iter_t bam_mate_file_iter_new(bamFile bfile, 90 | const bam_index_t *bindex) 91 | { 92 | bam_mate_iter_t iter = R_Calloc(1, struct _bam_mate_iter_t); 93 | iter->b_iter = new BamFileIterator(bfile, bindex); 94 | return iter; 95 | } 96 | 97 | int samread_mate(bamFile bfile, const bam_index_t *bindex, 98 | bam_mate_iter_t *iter_p, bam_mates_t *mates, 99 | void *data) 100 | { 101 | BAM_DATA bd = (BAM_DATA) data; 102 | bam_mate_iter_t iter; 103 | int status; 104 | if ((bam_mate_iter_t) NULL == *iter_p) 105 | *iter_p = bam_mate_file_iter_new(bfile, bindex); 106 | iter = *iter_p; 107 | iter->b_iter->set_bam_data(bd); 108 | iter->b_iter->iter_done = false; 109 | // single yield 110 | status = bam_mate_read(bfile, iter, mates); 111 | iter->b_iter->set_bam_data((BAM_DATA) NULL); 112 | return status; 113 | } 114 | 115 | 116 | #ifdef __cplusplus 117 | } 118 | #endif 119 | -------------------------------------------------------------------------------- /src/bam_mate_iter.h: -------------------------------------------------------------------------------- 1 | #ifndef BAM_MATE_ITER_H 2 | #define BAM_MATE_ITER_H 3 | 4 | #include 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | typedef enum { 11 | MATE_UNKNOWN, MATE_MATED, MATE_AMBIGUOUS, MATE_UNMATED 12 | } MATE_STATUS; 13 | 14 | typedef struct _bam_mate_iter_t *bam_mate_iter_t; 15 | 16 | typedef struct { 17 | const bam1_t **bams; 18 | int n; 19 | MATE_STATUS mated; 20 | } bam_mates_t; 21 | 22 | typedef int (*bam_fetch_mate_f)(const bam_mates_t *b, void *data); 23 | 24 | bam_mates_t *bam_mates_new(); 25 | void bam_mates_realloc(bam_mates_t *mates, int n, MATE_STATUS mated); 26 | void bam_mates_destroy(bam_mates_t *mates); 27 | 28 | int bam_fetch_mate(bamFile fb, const bam_index_t *idx, int tid, int beg, 29 | int end, void *data, bam_fetch_mate_f func); 30 | int samread_mate(bamFile fb, const bam_index_t *bindex, 31 | bam_mate_iter_t *iter_p, bam_mates_t *mates, 32 | void *data); 33 | void bam_mate_iter_destroy(bam_mate_iter_t iter); 34 | 35 | #ifdef __cplusplus 36 | } 37 | #endif 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /src/bam_plbuf.c: -------------------------------------------------------------------------------- 1 | #include 2 | -------------------------------------------------------------------------------- /src/bam_sort.c: -------------------------------------------------------------------------------- 1 | #include 2 | -------------------------------------------------------------------------------- /src/bambuffer.h: -------------------------------------------------------------------------------- 1 | #ifndef BAMBUFFER_H 2 | #define BAMBUFFER_H 3 | 4 | #include 5 | #include 6 | 7 | typedef struct { 8 | bam1_t **buffer; 9 | int *mates, *partition; 10 | int i, n, as_mates, mate_flag, partition_id; 11 | } _BAM_BUFFER, *BAM_BUFFER; 12 | 13 | #define BAMBUFFER(b) ((BAM_BUFFER) R_ExternalPtrAddr(b)) 14 | SEXP bambuffer_init(); 15 | SEXP bambuffer(int yieldSize, int as_mates); 16 | SEXP bambuffer_length(SEXP bufext); 17 | SEXP bambuffer_parse(SEXP bamext, SEXP regions, SEXP keepFlags, 18 | SEXP isSimpleCigar, SEXP tagFilter, SEXP mapqFilter, 19 | SEXP bufext, SEXP reverseComplement, SEXP template_list); 20 | SEXP bambuffer_write(SEXP bufext, SEXP bamext, SEXP filter); 21 | SEXP bambuffer_reset(SEXP bufext); 22 | 23 | BAM_BUFFER bambuffer_new(int n, int as_mates); 24 | void bambuffer_push(BAM_BUFFER buf, const bam1_t *bam); 25 | void bambuffer_free(BAM_BUFFER buf); 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /src/bamfile.h: -------------------------------------------------------------------------------- 1 | #ifndef BAMFILE_H 2 | #define BAMFILE_H 3 | 4 | #include 5 | #include 6 | #include "bambuffer.h" 7 | #include "bam_mate_iter.h" 8 | #include "pbuffer_wrapper.h" 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | typedef struct { 15 | samfile_t *file; 16 | bam_index_t *index; 17 | uint64_t pos0; 18 | int irange0; 19 | bam_mate_iter_t iter; 20 | void *pbuffer; /* for buffered pileup */ 21 | } _BAM_FILE, *BAM_FILE; 22 | 23 | #define BAMFILE(b) ((BAM_FILE) R_ExternalPtrAddr(b)) 24 | 25 | SEXP bamfile_init(); 26 | SEXP bamfile_open(SEXP file0, SEXP file1, SEXP mode); 27 | SEXP bamfile_close(SEXP ext); 28 | SEXP bamfile_isopen(SEXP ext); 29 | SEXP bamfile_isincomplete(SEXP ext); 30 | 31 | SEXP read_bamfile_header(SEXP ext, SEXP what); 32 | SEXP scan_bamfile(SEXP ext, SEXP regions, SEXP keepFlags, 33 | SEXP simpleCigar, SEXP tagFilter, SEXP mapqFilter, 34 | SEXP reverseComplement, SEXP yieldSize, 35 | SEXP tmpl, SEXP obeyQname, 36 | SEXP asMates, SEXP qnamePrefix, SEXP qnameSuffix); 37 | SEXP count_bamfile(SEXP ext, SEXP regions, SEXP keepFlags, SEXP isSimpleCigar, 38 | SEXP tagFilter, SEXP mapqFilter); 39 | SEXP prefilter_bamfile(SEXP ext, SEXP regions, SEXP keepFlags, 40 | SEXP isSimpleCigar, SEXP tagFilter, SEXP mapqFilter, 41 | SEXP yieldSize, 42 | SEXP obeyQname, SEXP asMates, SEXP qnamePrefix, 43 | SEXP qnameSuffix); 44 | SEXP filter_bamfile(SEXP ext, SEXP regions, SEXP keepFlags, 45 | SEXP isSimpleCigar, SEXP tagFilter, SEXP mapqFilter, 46 | SEXP fout_name, SEXP fout_mode); 47 | 48 | void _check_isbamfile(SEXP ext, const char *lbl); 49 | samfile_t *_bam_tryopen(const char *filename, const char *mode, void *aux); 50 | 51 | #ifdef __cplusplus 52 | } 53 | #endif 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /src/bcffile.h: -------------------------------------------------------------------------------- 1 | #ifndef _BCFFILE_H_ 2 | #define _BCFFILE_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | /* io_bcf.c */ 9 | typedef struct { 10 | htsFile *file; 11 | hts_idx_t *index; 12 | } _BCF_FILE; 13 | 14 | #define BCFFILE(b) ((_BCF_FILE *) R_ExternalPtrAddr(b)) 15 | 16 | SEXP bcffile_init(); 17 | SEXP bcffile_open(SEXP filename, SEXP indexname, SEXP mode); 18 | SEXP bcffile_close(SEXP ext); 19 | SEXP bcffile_isopen(SEXP ext); 20 | SEXP bcffile_isvcf(SEXP ext); 21 | 22 | SEXP scan_bcf_header(SEXP ext); 23 | SEXP scan_bcf(SEXP ext, SEXP regions, SEXP tmpl); 24 | 25 | SEXP as_bcf(SEXP file, SEXP dictionary, SEXP destination); 26 | SEXP index_bcf(SEXP file); 27 | 28 | #endif /* _BCFFILE_H_ */ 29 | -------------------------------------------------------------------------------- /src/bedidx.c: -------------------------------------------------------------------------------- 1 | #include 2 | -------------------------------------------------------------------------------- /src/encode.c: -------------------------------------------------------------------------------- 1 | #include "encode.h" 2 | #include "Biostrings_interface.h" 3 | 4 | unsigned char _bDecode(char); 5 | unsigned char _dnaDecode(char); 6 | unsigned char _rnaDecode(char); 7 | 8 | char _bEncode(char c) 9 | { 10 | return c; 11 | } 12 | 13 | #define _dnaEncode DNAencode; 14 | #define _rnaEncode RNAencode; 15 | 16 | ENCODE_FUNC _encoder(const char *base) 17 | { 18 | ENCODE_FUNC encode = NULL; 19 | if (strcmp(base, "DNAString") == 0) { 20 | encode = _dnaEncode; 21 | } else if (strcmp(base, "RNAString") == 0) { 22 | encode = _rnaEncode; 23 | } else if (strcmp(base, "BString") == 0) { 24 | encode = _bEncode; 25 | } else { 26 | Rf_error("internal: unknown '_encoder' class '%s'", base); 27 | } 28 | return encode; 29 | } 30 | -------------------------------------------------------------------------------- /src/encode.h: -------------------------------------------------------------------------------- 1 | #ifndef _RENCODE_H_ 2 | #define _RENCODE_H_ 3 | 4 | typedef char (*ENCODE_FUNC) (char); /* DNAencode, RNAencode */ 5 | ENCODE_FUNC _encoder(const char *baseclass); /* RNAString, DNAString, BString */ 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /src/fafile.h: -------------------------------------------------------------------------------- 1 | #ifndef FAFILE_H 2 | #define FAFILE_H 3 | 4 | #include 5 | #include 6 | 7 | typedef struct { 8 | faidx_t *index; 9 | } _FA_FILE; 10 | 11 | #define FAFILE(f) ((_FA_FILE *) R_ExternalPtrAddr(f)) 12 | 13 | SEXP fafile_init(); 14 | SEXP fafile_open(SEXP filename, SEXP indexname, SEXP gzindexname); 15 | SEXP fafile_close(SEXP ext); 16 | /* SEXP fafile_reopen(SEXP ext, SEXP filename, SEXP indexname); */ 17 | SEXP fafile_isopen(SEXP ext); 18 | 19 | SEXP index_fa(SEXP filename); 20 | SEXP n_fa(SEXP ext); 21 | SEXP scan_fa(SEXP ext, SEXP seq, SEXP start, SEXP end, SEXP as, SEXP lkup); 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /src/idxstats.c: -------------------------------------------------------------------------------- 1 | #include "bamfile.h" 2 | #include "utilities.h" /* _checkext */ 3 | #include 4 | #include 5 | 6 | extern SEXP BAMFILE_TAG; 7 | 8 | SEXP idxstats_bamfile(SEXP ext) 9 | { 10 | BAM_FILE bfile; 11 | bamFile fp; 12 | bam_header_t *header; 13 | bam_index_t *idx; 14 | int32_t n; 15 | SEXP result, name, len, map, unmap; 16 | 17 | _checkext(ext, BAMFILE_TAG, "idxstats"); 18 | bfile = BAMFILE(ext); 19 | fp = bfile->file->x.bam; 20 | (void) bam_seek(fp, 0, 0); 21 | header = bam_header_read(fp); 22 | idx = bfile->index; 23 | n = hts_idx_get_n(idx); 24 | 25 | result = PROTECT(Rf_allocVector(VECSXP, 4)); 26 | name = Rf_allocVector(STRSXP, n + 1L); SET_VECTOR_ELT(result, 0, name); 27 | len = Rf_allocVector(INTSXP, n + 1L); SET_VECTOR_ELT(result, 1, len); 28 | map = Rf_allocVector(REALSXP, n + 1L); SET_VECTOR_ELT(result, 2, map); 29 | unmap = Rf_allocVector(REALSXP, n + 1L); SET_VECTOR_ELT(result, 3, unmap); 30 | 31 | for (int i = 0; i < n; ++i) { 32 | uint64_t mapped, unmapped; 33 | SET_STRING_ELT(name, i, mkChar(header->target_name[i])); 34 | INTEGER(len)[i] = header->target_len[i]; 35 | hts_idx_get_stat(idx, i, &mapped, &unmapped); 36 | REAL(map)[i] = (double) mapped; 37 | REAL(unmap)[i] = (double) unmapped; 38 | } 39 | /* unmapped reads */ 40 | SET_STRING_ELT(name, n , mkChar("*")); 41 | INTEGER(len)[n] = 0; 42 | REAL(map)[n] = 0; 43 | REAL(unmap)[n] = hts_idx_get_n_no_coor(idx); 44 | 45 | UNPROTECT(1); 46 | return result; 47 | } 48 | -------------------------------------------------------------------------------- /src/idxstats.h: -------------------------------------------------------------------------------- 1 | #ifndef _IDXSTATS_H 2 | #define _IDXSTATS_H 3 | 4 | #include 5 | 6 | SEXP idxstats_bamfile(SEXP bfile); 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /src/io_sam.h: -------------------------------------------------------------------------------- 1 | #ifndef _IO_SAM_H_ 2 | #define _IO_SAM_H_ 3 | 4 | #include 5 | #include "bam_data.h" 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | SEXP scan_bam_template(SEXP rname, SEXP tags); 12 | SEXP sort_bam(SEXP fname, SEXP destinationPrefix, SEXP isByQname, 13 | SEXP maxMemory, SEXP byTag, SEXP nThreads); 14 | SEXP merge_bam(SEXP fnames, SEXP destination, SEXP overwrite, 15 | SEXP hname, SEXP regionStr, SEXP isByQname, 16 | SEXP addRG, SEXP compressLevel1); 17 | SEXP index_bam(SEXP indexname); 18 | SEXP scan_bam_cleanup(); /* error handling only */ 19 | 20 | void _bam_check_template_list(SEXP template_list); 21 | SEXP _read_bam_header(SEXP ext, SEXP what); 22 | SEXP _scan_bam(SEXP bfile, SEXP regions, SEXP keepFlags, 23 | SEXP isSimpleCigar, SEXP tagFilter, SEXP mapqFilter, 24 | SEXP reverseComplement, SEXP yieldSize, 25 | SEXP template_list, SEXP obeyQname, SEXP asMates, 26 | SEXP qnamePrefixEnd, SEXP qnameSuffixStart); 27 | SEXP _count_bam(SEXP bfile, SEXP regions, SEXP keepFlags, SEXP isSimpleCigar, 28 | SEXP tagFilter, SEXP mapqFilter); 29 | SEXP _prefilter_bam(SEXP bfile, SEXP regions, SEXP keepFlags, 30 | SEXP isSimpleCigar, SEXP tagFilter, SEXP mapqFilter, 31 | SEXP yieldSize, SEXP obeyQname, SEXP asMates, 32 | SEXP qnamePrefixEnd, SEXP qnameSuffixStart); 33 | SEXP _filter_bam(SEXP bfile, SEXP regions, SEXP keepFlags, 34 | SEXP isSimpleCigar, SEXP tagFilter, SEXP mapqFilter, 35 | SEXP fout_name, SEXP fout_mode); 36 | 37 | typedef void (_FINISH1_FUNC) (BAM_DATA); 38 | int _do_scan_bam(BAM_DATA bd, SEXP regions, bam_fetch_f parse1, 39 | bam_fetch_mate_f parse1_mate, _FINISH1_FUNC finish1); 40 | 41 | 42 | #ifdef __cplusplus 43 | } 44 | #endif 45 | 46 | #endif /* _IO_SAM_H_ */ 47 | -------------------------------------------------------------------------------- /src/pbuffer_wrapper.cpp: -------------------------------------------------------------------------------- 1 | #include "pbuffer_wrapper.h" 2 | #include "PosCacheColl.h" 3 | 4 | void pileup_pbuffer_destroy(void *pbuffer) { 5 | if(pbuffer != NULL) { 6 | delete (static_cast(pbuffer)); 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /src/pbuffer_wrapper.h: -------------------------------------------------------------------------------- 1 | #ifndef PBUFFER_WRAPPER_H 2 | #define PBUFFER_WRAPPER_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | // expose PosCacheColl dtor to C code 9 | void pileup_pbuffer_destroy(void *pbuffer); 10 | 11 | #ifdef __cplusplus 12 | } 13 | #endif /* __cplusplus */ 14 | 15 | #endif /* PBUFFER_WRAPPER_H */ 16 | -------------------------------------------------------------------------------- /src/pileup.h: -------------------------------------------------------------------------------- 1 | #ifndef PILEUP_H 2 | #define PILEUP_H 3 | 4 | #ifdef __cplusplus 5 | #define R_NO_REMAP 6 | #include "io_sam.h" 7 | #include "utilities.h" 8 | #include "PileupBufferShim.h" 9 | #ifdef PILEUP_DEBUG 10 | #include "nate_utilities.h" 11 | #endif 12 | 13 | extern "C" { 14 | #endif 15 | SEXP c_Pileup(SEXP ext, SEXP regions, SEXP keepFlags, 16 | SEXP isSimpleCigar, SEXP tagFilter, SEXP mapqFilter, 17 | SEXP reverseComplement, 18 | SEXP yieldSize, SEXP obeyQname, SEXP asMates, 19 | SEXP qnamePrefixEnd, SEXP qnameSuffixStart, 20 | SEXP pileupParams); 21 | #ifdef __cplusplus 22 | } 23 | #endif 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /src/pileupbam.h: -------------------------------------------------------------------------------- 1 | #ifndef MPILEUPBAM_H 2 | #define MPILEUPBAM_H 3 | 4 | #include 5 | 6 | SEXP apply_pileups(SEXP files, SEXP names, SEXP regions, SEXP param, 7 | SEXP callback); 8 | 9 | #endif 10 | -------------------------------------------------------------------------------- /src/sam_opts.c: -------------------------------------------------------------------------------- 1 | #include 2 | -------------------------------------------------------------------------------- /src/sam_utils.c: -------------------------------------------------------------------------------- 1 | #include 2 | -------------------------------------------------------------------------------- /src/samtools_patch.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "samtools_patch.h" 3 | 4 | void _samtools_exit(int status) 5 | { 6 | Rf_error 7 | ("internal: samtools invoked 'exit(%d)'; see warnings() and restart R", 8 | status); 9 | } 10 | 11 | void _samtools_abort() 12 | { 13 | Rf_error 14 | ("internal: samtools invoked 'abort'; see warnings() and restart R"); 15 | } 16 | 17 | int _samtools_fprintf(FILE * file, const char *fmt, ...) 18 | { 19 | static const int bufsize = 2048; 20 | va_list argp; 21 | int n; 22 | 23 | if (stderr != file) { 24 | va_start(argp, fmt); 25 | n = vfprintf(file, fmt, argp); 26 | va_end(argp); 27 | } else { 28 | /* silence some messages */ 29 | char *buf = (char *) R_alloc(bufsize, sizeof(char)); 30 | if (0 == strncmp("[samopen] SAM header is present:", fmt, 32) || 31 | 0 == strncmp("[fai_load] build FASTA index.", fmt, 29)) 32 | return 0; 33 | va_start(argp, fmt); 34 | n = vsnprintf(buf, bufsize, fmt, argp); 35 | va_end(argp); 36 | if (0 == strncmp("[khttp_connect_file] fail to open file", fmt, 38)) 37 | Rf_error("%s", buf); 38 | Rf_warning("%s", buf); 39 | } 40 | return n; 41 | } 42 | -------------------------------------------------------------------------------- /src/samtools_patch.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMTOOLS_PATCH_H 2 | #define SAMTOOLS_PATCH_H 3 | 4 | /* capture samtools errors */ 5 | extern void _samtools_abort(); 6 | extern void _samtools_exit(int status); 7 | extern int _samtools_fprintf(FILE *, const char *, ...); 8 | 9 | #endif 10 | -------------------------------------------------------------------------------- /src/scan_bam_data.h: -------------------------------------------------------------------------------- 1 | #ifndef SCAN_BAM_DATA_H 2 | #define SCAN_BAM_DATA_H 3 | 4 | #include 5 | #include "Rdefines.h" 6 | #include "bam_data.h" 7 | 8 | KHASH_SET_INIT_STR(str) 9 | 10 | typedef struct { 11 | int *flag, *rname, *strand, *pos, *qwidth, *mapq, *mrnm, *mpos, *isize, 12 | *partition, *mates; 13 | char **qname; 14 | const char **cigar, **seq, **qual; 15 | khash_t(str) *cigarhash; 16 | int icnt, ncnt, 17 | mates_flag, partition_id; /* set prior to parsing 1 bam record */ 18 | SEXP result; 19 | } _SCAN_BAM_DATA, *SCAN_BAM_DATA; 20 | 21 | 22 | SCAN_BAM_DATA _init_SCAN_BAM_DATA(SEXP result); 23 | void _Free_SCAN_BAM_DATA(SCAN_BAM_DATA sbd); 24 | 25 | int _grow_SCAN_BAM_DATA(BAM_DATA bd, int len); 26 | void _finish1range_SCAN_BAM_DATA(SCAN_BAM_DATA sbd, bam_header_t *header, 27 | int irange); 28 | SEXP _scan_bam_result_init(SEXP template_list, SEXP names, SEXP regions, 29 | BAM_FILE bfile); 30 | SEXP _get_or_grow_SCAN_BAM_DATA(BAM_DATA bd, int len); 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /src/tabixfile.h: -------------------------------------------------------------------------------- 1 | #ifndef _TABIXFILE_H 2 | #define _TABIXFILE_H 3 | 4 | #include 5 | #include "htslib/tbx.h" 6 | 7 | typedef struct { 8 | htsFile *file; 9 | tbx_t *index; 10 | hts_itr_t *iter; 11 | } _TABIX_FILE; 12 | 13 | #define TABIXFILE(b) ((_TABIX_FILE *) R_ExternalPtrAddr(b)) 14 | 15 | typedef SEXP SCAN_FUN(htsFile *file, tbx_t *index, hts_itr_t *iter, 16 | const int yield, SEXP state, SEXP rownames); 17 | 18 | SCAN_FUN tabix_as_character; 19 | 20 | SCAN_FUN tabix_count; 21 | 22 | SEXP tabixfile_init(); 23 | SEXP tabixfile_open(SEXP filename, SEXP indexname); 24 | SEXP tabixfile_close(SEXP ext); 25 | SEXP tabixfile_isopen(SEXP ext); 26 | 27 | SEXP index_tabix(SEXP filename, SEXP format, 28 | SEXP seq, SEXP begin, SEXP end, 29 | SEXP skip, SEXP comment, SEXP zeroBased); 30 | SEXP header_tabix(SEXP ext); 31 | SEXP scan_tabix(SEXP ext, SEXP regions, SEXP yield, SEXP fun, 32 | SEXP state, SEXP rownames); 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /src/tagfilter.h: -------------------------------------------------------------------------------- 1 | #ifndef TAG_FILTER_H 2 | #define TAG_FILTER_H 3 | 4 | #include 5 | #include 6 | 7 | typedef enum { TAGFILT_T_UNSET = 0, TAGFILT_T_INT, 8 | TAGFILT_T_STRING } TagFilterType; 9 | 10 | typedef struct { 11 | int len; 12 | TagFilterType type; 13 | void* ptr; 14 | 15 | } _TAGFILTER_ELT, *TAGFILTER_ELT; 16 | 17 | typedef struct { 18 | int len; 19 | const char **tagnames; 20 | _TAGFILTER_ELT *elts; 21 | } _C_TAGFILTER, *C_TAGFILTER; 22 | 23 | C_TAGFILTER _tagFilter_as_C_types(SEXP tl); 24 | void _Free_C_TAGFILTER(C_TAGFILTER ctf); 25 | 26 | int _tagfilter(const bam1_t * bam, C_TAGFILTER tagfilter, int irec); 27 | 28 | #endif /* TAG_FILTER_H */ 29 | -------------------------------------------------------------------------------- /src/utilities.h: -------------------------------------------------------------------------------- 1 | #ifndef _UTILITIES_H_ 2 | #define _UTILITIES_H_ 3 | 4 | #include 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | /* robust memory re-allocation */ 11 | 12 | #define _Rs_Realloc(p, n, t) (t *) _Rs_Realloc_impl(p, n, sizeof(t)) 13 | 14 | void *_Rs_Realloc_impl(void *p, size_t n, size_t t); 15 | 16 | /* coercion */ 17 | 18 | SEXP _get_namespace(const char *pkg); 19 | SEXP _get_encoding_lookup(const char *from, const char *to); 20 | SEXP _get_lkup(const char *baseclass); 21 | void _as_factor_SEXP(SEXP vec, SEXP lvls); 22 | void _as_factor(SEXP vec, const char **lvls, const int n_lvls); 23 | void _as_strand(SEXP vec); 24 | void _as_rname(SEXP vec, const char **lvls, const int n_lvls); 25 | void _as_nucleotide(SEXP vec); 26 | void _as_seqlevels(SEXP vec, SEXP lvls); 27 | SEXP _as_XStringSet(const char **key, int len, const char *baseclass); 28 | SEXP _as_PhredQuality(const char **key, int len); 29 | void _reverse(char *buf, int len); 30 | void _reverseComplement(char *buf, int len); 31 | char *_rtrim(char *); 32 | int _delete_trailing_LFs_and_CRs(const char *buf, int buf_len); 33 | 34 | /* common checks */ 35 | 36 | void _checkext(SEXP ext, SEXP tag, const char *lbl); 37 | void _checknames(SEXP filename, SEXP indexname, SEXP filemode); 38 | void _checkparams(SEXP regions, SEXP keepFlags, SEXP isSimpleCigar); 39 | 40 | /* pairing */ 41 | 42 | SEXP p_pairing(SEXP x_qname, SEXP x_flag, SEXP x_rname, 43 | SEXP x_pos, SEXP x_rnext, SEXP x_pnext, 44 | SEXP y_qname, SEXP y_flag, SEXP y_rname, 45 | SEXP y_pos, SEXP y_rnext, SEXP y_pnext); 46 | 47 | SEXP find_mate_within_groups(SEXP group_sizes, 48 | SEXP x_flag, SEXP x_rname, 49 | SEXP x_pos, SEXP x_rnext, SEXP x_pnext); 50 | 51 | /* call-building macros */ 52 | 53 | #define NEW_CALL(S, T, NAME, ENV, N) \ 54 | PROTECT(S = T = allocList(N)); \ 55 | SET_TYPEOF(T, LANGSXP); \ 56 | SETCAR(T, findFun(install(NAME), ENV)); \ 57 | T = CDR(T) 58 | #define CSET_CDR(T, NAME, VALUE) \ 59 | SETCAR(T, VALUE); \ 60 | SET_TAG(T, install(NAME)); \ 61 | T = CDR(T) 62 | #define CEVAL_TO(S, ENV, GETS) \ 63 | GETS = eval(S, ENV); \ 64 | UNPROTECT(1) 65 | 66 | #ifdef __cplusplus 67 | } 68 | #endif 69 | 70 | #endif /* _UTILITIES_H_ */ 71 | -------------------------------------------------------------------------------- /src/zip_compression.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "zip_compression.h" 7 | 8 | static void _zip_error(const char *txt, const char *err, int infd, int outfd) 9 | { 10 | close(infd); 11 | close(outfd); 12 | err ? Rf_error(txt, err) : Rf_error("%s", txt); 13 | } 14 | 15 | static void _zip_open(SEXP file, SEXP dest, int *infd, int *outfd) 16 | { 17 | int iflag = O_RDONLY, oflag = O_WRONLY | O_CREAT | O_TRUNC; 18 | #ifdef _WIN32 19 | iflag |= O_BINARY; 20 | oflag |= O_BINARY; 21 | #endif 22 | 23 | if (!IS_CHARACTER(file) || 1L != Rf_length(file)) 24 | Rf_error("'file' must be character(1)"); 25 | if (!IS_CHARACTER(dest) || 1L != Rf_length(dest)) 26 | Rf_error("'dest' must be character(1)"); 27 | 28 | *infd = open(translateChar(STRING_ELT(file, 0)), iflag); 29 | if (0 > *infd) 30 | Rf_error("opening 'file': %s", strerror(errno)); 31 | 32 | /* we overwrite existing files here */ 33 | *outfd = open(translateChar(STRING_ELT(dest, 0)), oflag, 0666); 34 | if (0 > *outfd) { 35 | close(*infd); 36 | Rf_error("opening 'dest': %s", strerror(errno)); 37 | } 38 | } 39 | 40 | SEXP bgzip(SEXP file, SEXP dest) 41 | { 42 | static const int BUF_SIZE = 64 * 1024; 43 | void *buffer; 44 | int infd, outfd, cnt; 45 | gzFile in; 46 | BGZF *outp; 47 | 48 | buffer = R_alloc(BUF_SIZE, sizeof(void *)); 49 | 50 | _zip_open(file, dest, &infd, &outfd); 51 | in = gzdopen(infd, "rb"); 52 | if (NULL == in) 53 | _zip_error("opening input 'file'", NULL, infd, outfd); 54 | outp = bgzf_dopen(outfd, "w"); 55 | if (NULL == outp) 56 | _zip_error("opening output 'dest'", NULL, infd, outfd); 57 | 58 | while (0 < (cnt = gzread(in, buffer, BUF_SIZE))) 59 | if (0 > bgzf_write(outp, buffer, cnt)) 60 | _zip_error("writing compressed output", NULL, infd, outfd); 61 | if (0 > cnt) 62 | _zip_error("reading compressed input: %s", 63 | strerror(errno), infd, outfd); 64 | 65 | if (0 > bgzf_close(outp)) 66 | Rf_error("closing compressed output"); 67 | if (gzclose(in) != Z_OK) 68 | _zip_error("closing input after compression", NULL, infd, outfd); 69 | 70 | return dest; 71 | } 72 | 73 | -------------------------------------------------------------------------------- /src/zip_compression.h: -------------------------------------------------------------------------------- 1 | #ifndef ZIP_COMPRESSION_H 2 | #define ZIP_COMPRESSION_H 3 | 4 | #include 5 | 6 | SEXP bgzip(SEXP from, SEXP dest); 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /tests/Rsamtools_unit_tests.R: -------------------------------------------------------------------------------- 1 | BiocGenerics:::testPackage('Rsamtools') 2 | --------------------------------------------------------------------------------