├── .gitmodules ├── test ├── test_unmapped.bam.bai ├── results │ └── .gitignore ├── tabix_reader │ ├── bad_header.txt.gz.tbi │ ├── bad_header.txt.gz │ ├── test_bed3.bed.gz │ ├── test_bed3.bed.gz.tbi │ ├── genomic_regions_header.txt.gz │ ├── genomic_positions_header.txt.gz │ ├── genomic_regions_header.txt.gz.tbi │ └── genomic_positions_header.txt.gz.tbi ├── test.bam ├── test.bcf ├── test_cram.fa.fai ├── test.bam.bai ├── test.bcf.csi ├── test_cram.bam ├── bam2sam_test.bam ├── test_cram.cram ├── test_left.vcf.gz ├── test_multi.bcf ├── bgzip │ ├── bgzip.vcf.gz │ ├── gzip.vcf.gz │ └── plain.vcf ├── test_cram.bam.bai ├── test_right.vcf.gz ├── test_unmapped.bam ├── test_unmapped.cram ├── test_cram.cram.crai ├── test_index_build.bam ├── test_left.vcf.gz.tbi ├── test_right.vcf.gz.tbi ├── test_spliced_reads.bam ├── test_unmapped.cram.crai ├── test_issue_156_no_text.bam ├── test_spliced_reads.bam.bai ├── test_different_index_name.bam ├── test_left.vcf ├── test_right.vcf ├── base_mods │ ├── MM-double.sam │ └── MM-orient.sam ├── test_non_diploid.vcf ├── test_paired.sam ├── test_cram.fa ├── test_headers.out.vcf ├── test_trailing_omitted_format.vcf ├── test_headers.vcf ├── test_missing.vcf ├── test_various.vcf ├── test_cram.sam ├── test_various.out.vcf ├── test_string.vcf ├── bam2sam_out.sam ├── bam2sam_expected.sam ├── test_orientation_supplementary.sam ├── obs-cornercase.vcf ├── test_nonstandard_orientation.sam └── test_svlen.vcf ├── src ├── htslib.rs ├── utils.rs ├── tpool.rs ├── bam │ ├── index.rs │ ├── header.rs │ ├── pileup.rs │ ├── buffer.rs │ └── record_serde.rs ├── bcf │ ├── index.rs │ ├── buffer.rs │ └── header.rs ├── lib.rs ├── errors.rs ├── faidx │ └── mod.rs ├── tbx │ └── mod.rs └── bgzf │ └── mod.rs ├── config.toml ├── .gitignore ├── .github ├── dependabot.yml └── workflows │ ├── conventional-prs.yml │ ├── release-please.yml │ └── rust.yml ├── LICENSE.md ├── Cargo.toml ├── README.md └── CHANGELOG.md /.gitmodules: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_unmapped.bam.bai: -------------------------------------------------------------------------------- 1 | BAI -------------------------------------------------------------------------------- /test/results/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /test/tabix_reader/bad_header.txt.gz.tbi: -------------------------------------------------------------------------------- 1 | INVALID INDEX DATA 2 | -------------------------------------------------------------------------------- /src/htslib.rs: -------------------------------------------------------------------------------- 1 | //! Re-export hts-sys htslib bindings 2 | pub use hts_sys::*; 3 | -------------------------------------------------------------------------------- /test/test.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/test.bam -------------------------------------------------------------------------------- /test/test.bcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/test.bcf -------------------------------------------------------------------------------- /config.toml: -------------------------------------------------------------------------------- 1 | [target.x86_64-unknown-linux-musl] 2 | linker = "x86_64-linux-musl-gcc" 3 | -------------------------------------------------------------------------------- /test/test_cram.fa.fai: -------------------------------------------------------------------------------- 1 | chr1 120 6 60 61 2 | chr2 120 134 60 61 3 | chr3 120 262 60 61 4 | -------------------------------------------------------------------------------- /test/test.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/test.bam.bai -------------------------------------------------------------------------------- /test/test.bcf.csi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/test.bcf.csi -------------------------------------------------------------------------------- /test/test_cram.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/test_cram.bam -------------------------------------------------------------------------------- /test/bam2sam_test.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/bam2sam_test.bam -------------------------------------------------------------------------------- /test/test_cram.cram: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/test_cram.cram -------------------------------------------------------------------------------- /test/test_left.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/test_left.vcf.gz -------------------------------------------------------------------------------- /test/test_multi.bcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/test_multi.bcf -------------------------------------------------------------------------------- /test/bgzip/bgzip.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/bgzip/bgzip.vcf.gz -------------------------------------------------------------------------------- /test/bgzip/gzip.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/bgzip/gzip.vcf.gz -------------------------------------------------------------------------------- /test/test_cram.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/test_cram.bam.bai -------------------------------------------------------------------------------- /test/test_right.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/test_right.vcf.gz -------------------------------------------------------------------------------- /test/test_unmapped.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/test_unmapped.bam -------------------------------------------------------------------------------- /test/test_unmapped.cram: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/test_unmapped.cram -------------------------------------------------------------------------------- /test/test_cram.cram.crai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/test_cram.cram.crai -------------------------------------------------------------------------------- /test/test_index_build.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/test_index_build.bam -------------------------------------------------------------------------------- /test/test_left.vcf.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/test_left.vcf.gz.tbi -------------------------------------------------------------------------------- /test/test_right.vcf.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/test_right.vcf.gz.tbi -------------------------------------------------------------------------------- /test/test_spliced_reads.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/test_spliced_reads.bam -------------------------------------------------------------------------------- /test/test_unmapped.cram.crai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/test_unmapped.cram.crai -------------------------------------------------------------------------------- /test/test_issue_156_no_text.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/test_issue_156_no_text.bam -------------------------------------------------------------------------------- /test/test_spliced_reads.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/test_spliced_reads.bam.bai -------------------------------------------------------------------------------- /test/tabix_reader/bad_header.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/tabix_reader/bad_header.txt.gz -------------------------------------------------------------------------------- /test/tabix_reader/test_bed3.bed.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/tabix_reader/test_bed3.bed.gz -------------------------------------------------------------------------------- /test/test_different_index_name.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/test_different_index_name.bam -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/ 2 | .idea 3 | *~ 4 | target 5 | Cargo.lock 6 | htslib/* 7 | test/out.bam 8 | test/test_index_build.bam.csi 9 | -------------------------------------------------------------------------------- /test/tabix_reader/test_bed3.bed.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/tabix_reader/test_bed3.bed.gz.tbi -------------------------------------------------------------------------------- /test/tabix_reader/genomic_regions_header.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/tabix_reader/genomic_regions_header.txt.gz -------------------------------------------------------------------------------- /test/tabix_reader/genomic_positions_header.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/tabix_reader/genomic_positions_header.txt.gz -------------------------------------------------------------------------------- /test/tabix_reader/genomic_regions_header.txt.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/tabix_reader/genomic_regions_header.txt.gz.tbi -------------------------------------------------------------------------------- /test/tabix_reader/genomic_positions_header.txt.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bio/rust-htslib/HEAD/test/tabix_reader/genomic_positions_header.txt.gz.tbi -------------------------------------------------------------------------------- /test/test_left.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.1 2 | ##FORMAT= 3 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT one two 4 | 1 100 . C A . . . GT 0/1 . 5 | 1 101 . T G . . . GT 0/1 . 6 | -------------------------------------------------------------------------------- /test/bgzip/plain.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.1 2 | ##FORMAT= 3 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT one two 4 | 1 100 . C A . . . GT 0/1 . 5 | 1 101 . T G . . . GT 0/1 . 6 | -------------------------------------------------------------------------------- /test/test_right.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.1 2 | ##FORMAT= 3 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT three four 4 | 1 100 . C A . . . GT 1/1 . 5 | 1 102 . C G . . . GT 0/1 . 6 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "cargo" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | 8 | - package-ecosystem: "github-actions" 9 | directory: "/" 10 | schedule: 11 | interval: "weekly" 12 | -------------------------------------------------------------------------------- /test/base_mods/MM-double.sam: -------------------------------------------------------------------------------- 1 | @CO Modifications called on both strands of the same record, 2 | @CO including potentially at the same location simultaneously. 3 | * 0 * 0 0 * * 0 0 AGGATCTCTAGCGGATCGGCGGGGGATATGCCATAT * Mm:Z:C+m,1,3,0;G-m,0,2,0,4;G+o,4; Ml:B:C,128,153,179,115,141,166,192,102 4 | -------------------------------------------------------------------------------- /test/test_non_diploid.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.1 2 | ##FORMAT= 3 | ##contig= 4 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT one two 5 | 1 100 . C A . . . GT 0 1 6 | 1 101 . T G . . . GT 0/1 1/1 7 | 1 102 . A G . . . GT 1|0 1/1|0 8 | -------------------------------------------------------------------------------- /test/test_paired.sam: -------------------------------------------------------------------------------- 1 | @SQ SN:xx LN:20 2 | a1 99 xx 1 1 10M = 11 20 AAAAAAAAAA ********** 3 | b1 99 xx 1 1 10M = 11 20 AAAAAAAAAA ********** 4 | c1 99 xx 1 1 10M = 11 20 AAAAAAAAAA ********** 5 | a1 147 xx 11 1 10M = 1 -20 TTTTTTTTTT ********** 6 | b1 147 xx 11 1 10M = 1 -20 TTTTTTTTTT ********** 7 | c1 147 xx 11 1 10M = 1 -20 TTTTTTTTTT ********** 8 | -------------------------------------------------------------------------------- /.github/workflows/conventional-prs.yml: -------------------------------------------------------------------------------- 1 | name: PR 2 | on: 3 | pull_request_target: 4 | types: 5 | - opened 6 | - reopened 7 | - edited 8 | - synchronize 9 | 10 | jobs: 11 | title-format: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: amannn/action-semantic-pull-request@v6.1.1 15 | env: 16 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} -------------------------------------------------------------------------------- /test/test_cram.fa: -------------------------------------------------------------------------------- 1 | >chr1 2 | GGGCACAGCCTCACCCAGGAAAGCAGCTGGGGGTCCACTGGGCTCAGGGAAGACCCCCTG 3 | CCAGGGAGACCCCAGGCGCCTGAATGGCCACGGGAAGGAAAACCTACCAGCCCCTCCGTG 4 | >chr2 5 | AAGAAATAACTGCTAATTTAAAATTGAAGACTTCTGCTCTGCAAAAGACATTGTTAAGAT 6 | AATGAAAAGACAAGCCAAAGACTTGTAGAAAGTATTTGAAAAATAATCTCTGATAAATGG 7 | >chr3 8 | CCAACAAGCATTGGTGTGGCATTTCAGTGGAGAAGGAAACTTGGGGGGAAAAAGCCCATC 9 | AAGGTTGTAAGAAGACTCCCAATTTAACTGTCCCTTTCCCTATTTATCCACCATCCAAGA 10 | -------------------------------------------------------------------------------- /test/test_headers.out.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.1 2 | ##FILTER= 3 | ##FILTER= 4 | ##INFO= 5 | ##FORMAT= 6 | ##contig= 7 | ##SOME= 8 | ##Bar1=something 9 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT one two 10 | -------------------------------------------------------------------------------- /test/base_mods/MM-orient.sam: -------------------------------------------------------------------------------- 1 | @CO Testing mods on top and bottom strand, but also in 2 | @CO original vs reverse-complemented orientation 3 | top-fwd 0 * 0 0 * * 0 0 AGGATCTCTAGCGGATCGGCGGGGGATATGCCATAT * Mm:Z:C+m,1,3,0; Ml:B:C,128,153,179 4 | top-rev 16 * 0 0 * * 0 0 ATATGGCATATCCCCCGCCGATCCGCTAGAGATCCT * Mm:Z:C+m,1,3,0; Ml:B:C,128,153,179 5 | bot-fwd 0 * 0 0 * * 0 0 AGGATCTCTAGCGGATCGGCGGGGGATATGCCATAT * Mm:Z:G-m,0,0,4,3; Ml:B:C,115,141,166,192 6 | bot-rev 16 * 0 0 * * 0 0 ATATGGCATATCCCCCGCCGATCCGCTAGAGATCCT * Mm:Z:G-m,0,0,4,3; Ml:B:C,115,141,166,192 7 | -------------------------------------------------------------------------------- /test/test_trailing_omitted_format.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.3 2 | ##contig= 3 | ##INFO= 4 | ##FORMAT= 5 | ##FORMAT= 6 | ##FORMAT= 7 | ##FORMAT= 8 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1 9 | chr1 1234 . t a . . FOO=1 GT:STR:FLT:INT . 10 | -------------------------------------------------------------------------------- /test/test_headers.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.1 2 | ##FILTER= 3 | ##FILTER= 4 | ##FILTER= 5 | ##INFO= 6 | ##INFO= 7 | ##FORMAT= 8 | ##contig= 9 | ##contig= 10 | ##SOME= 11 | ##SOME= 12 | ##Bar1=something 13 | ##Bar2=something else 14 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT one two 15 | -------------------------------------------------------------------------------- /test/test_missing.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.1 2 | ##INFO= 3 | ##INFO= 4 | ##INFO= 5 | ##FORMAT= 6 | ##FORMAT= 7 | ##FORMAT= 8 | ##FORMAT= 9 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT one two 10 | 19 3114951 rs1234 A AG . PASS S1=string1;N1=1;F1=1.0 GT:FS1:FN4:FF4 1/1:fourall:1,2,3,4:0.0,0.0,0.0,0.0 1/1:fourmissall:.,.,.,.:.,.,.,. 11 | 19 3114952 rs1234 A AG . PASS S1=string1;N1=1;F1=. GT:FS1:FN4:FF4 1/1:fourall:1,2,3,4:0.0,0.0,0.0,0.0 1/1:fourmiss:.:. 12 | -------------------------------------------------------------------------------- /test/test_various.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.1 2 | ##FILTER= 3 | ##FILTER= 4 | ##INFO= 5 | ##INFO= 6 | ##INFO= 7 | ##INFO= 8 | ##FORMAT= 9 | ##FORMAT= 10 | ##FORMAT= 11 | ##FORMAT= 12 | ##FORMAT= 13 | ##contig= 14 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT one two 15 | -------------------------------------------------------------------------------- /test/test_cram.sam: -------------------------------------------------------------------------------- 1 | @HD VN:1.5 SO:coordinate 2 | @SQ SN:chr1 LN:120 M5:20a9a0fb770814e6c5e49946750f9724 UR:/stornext/snfs5/next-gen/scratch/farek/pkg/github/forks/rust-htslib/test/test_cram.fa 3 | @SQ SN:chr2 LN:120 M5:7a2006ccca94ea92b6dae5997e1b0d70 UR:/stornext/snfs5/next-gen/scratch/farek/pkg/github/forks/rust-htslib/test/test_cram.fa 4 | @SQ SN:chr3 LN:120 M5:a66b336bfe3ee8801c744c9545c87e24 UR:/stornext/snfs5/next-gen/scratch/farek/pkg/github/forks/rust-htslib/test/test_cram.fa 5 | chr1.1 163 chr1 5 0 20M = 35 50 ACAGCCTCACCCAGGAAAGC FFFFFFFFFFFFFFFFFFF: MD:Z:20 NM:i:0 6 | chr1.2 83 chr1 35 0 20M = 5 -50 CCACTGGGCTCAGGGAAGAC FFFFFFFFFFFFFFFFFF:: MD:Z:20 NM:i:0 7 | chr2.1 99 chr2 15 0 20M = 45 50 AATTTAAAATTGAAGACTTC FFFFFFFFFFFFFFFFFFF: MD:Z:20 NM:i:0 8 | chr2.2 147 chr2 45 0 15M1D5M = 15 -50 AAGACATTGTTAAGAAATGA FFFFFFFFFFFFFFFFFF:: MD:Z:15^T5 NM:i:1 9 | chr3.1 163 chr3 25 0 20M = 55 50 CAGTGGAGAAGGAAACTTGG FFFFFFFFFFFFFFFFFFF: MD:Z:20 NM:i:0 10 | chr3.2 83 chr3 55 0 15M1I4M = 25 -50 CCCATCAAGGTTGTATAGAA FFFFFFFFFFFFFFFFFF:: MD:Z:19 NM:i:1 11 | -------------------------------------------------------------------------------- /test/test_various.out.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.1 2 | ##FILTER= 3 | ##FILTER= 4 | ##FILTER= 5 | ##INFO= 6 | ##INFO= 7 | ##INFO= 8 | ##INFO= 9 | ##FORMAT= 10 | ##FORMAT= 11 | ##FORMAT= 12 | ##FORMAT= 13 | ##FORMAT= 14 | ##contig= 15 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT one two 16 | 19 13 first_id;second_id C T,G 10 s50;q10 N1=32;F1=33;S1=fourtytwo;X1 GT:FS1:FF1:FN1:CH1 0/1:yes:43:42:A 1|1:no:11:10:B 17 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Johannes Köster, the Rust-Htslib team. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | -------------------------------------------------------------------------------- /test/test_string.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.1 2 | ##INFO= 3 | ##INFO= 4 | ##INFO= 5 | ##FORMAT= 6 | ##FORMAT= 7 | ##FORMAT= 8 | ##FORMAT= 9 | ##contig= 10 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT one two 11 | 19 3111939 rs1234 A AG . PASS S1=string1;N1=1;F1=1.0 GT:FS1:FN1 ./1:LongString1:1 1/1:ss1:2 12 | 19 3113255 rs2345 G GC . PASS S1=string2;N1=2;F1=2.0 GT:FS1:FN1 1|1:LongString2:1 1/1:ss2:2 13 | 19 3113259 rs2345 G GC . PASS S1=string3;N1=3;F1=3.0 GT:FS1:FN1 0/1:.:1 1/1:ss3:2 14 | 19 3113262 rs2345 G GC . PASS S1=string4;N1=4;F1=4.0 GT:FS1:FN1 0|1:LongString4:1 1/1:.:2 15 | 19 3113268 rs2345 G GC . PASS S1=string5;N1=5;F1=5.0 GT:FS1:FN1 1|.:evenlength:1 1/1:veenlength:2 16 | 19 3113272 rs2345 G GC . PASS S1=string6;N1=6;F1=6.0 GT:FS1:FN1 1/1:ss6:1 1/1:longstring6:2 17 | -------------------------------------------------------------------------------- /.github/workflows/release-please.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - master 5 | 6 | name: release-please 7 | 8 | jobs: 9 | release-please: 10 | if: github.repository_owner == 'rust-bio' 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v5 14 | with: 15 | submodules: recursive 16 | 17 | - uses: GoogleCloudPlatform/release-please-action@v4 18 | id: release_main 19 | with: 20 | release-type: rust 21 | package-name: rust-htslib 22 | bump-minor-pre-major: true 23 | 24 | - name: Install stable toolchain 25 | uses: dtolnay/rust-toolchain@stable 26 | if: ${{ steps.release_main.outputs.release_created }} 27 | with: 28 | toolchain: stable 29 | 30 | - name: Install system dependencies 31 | if: ${{ steps.release_main.outputs.release_created }} 32 | run: | 33 | sudo apt-get install --yes zlib1g-dev libbz2-dev musl musl-dev musl-tools clang libc6-dev 34 | 35 | - uses: Swatinem/rust-cache@v2 36 | if: ${{ steps.release_main.outputs.release_created }} 37 | 38 | - name: Publish rust-htslib 39 | if: ${{ steps.release_main.outputs.release_created }} 40 | run: cargo publish --token ${{ secrets.CRATES_IO_TOKEN }} 41 | 42 | # - uses: katyo/publish-crates@v1 43 | # if: ${{ steps.release_main.outputs.release_created || steps.release_sys.outputs.release_created }} 44 | # with: 45 | # registry-token: ${{ secrets.CRATES_IO_TOKEN }} 46 | -------------------------------------------------------------------------------- /src/utils.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Christopher Schröder, Johannes Köster. 2 | // Licensed under the MIT license (http://opensource.org/licenses/MIT) 3 | // This file may not be copied, modified, or distributed 4 | // except according to those terms. 5 | 6 | //! Module with utility code. 7 | 8 | use crate::errors::{Error, Result}; 9 | use std::ffi; 10 | use std::path::Path; 11 | use std::ptr; 12 | 13 | /// Copies data from `src` to `dst` 14 | /// TODO remove once stable in standard library. 15 | /// 16 | /// Panics if the length of `dst` is less than the length of `src`. 17 | #[inline] 18 | pub fn copy_memory(src: &[u8], dst: &mut [u8]) { 19 | let len_src = src.len(); 20 | assert!( 21 | dst.len() >= len_src, 22 | "dst len {} < src len {}", 23 | dst.len(), 24 | src.len() 25 | ); 26 | // `dst` is unaliasable, so we know statically it doesn't overlap 27 | // with `src`. 28 | unsafe { 29 | ptr::copy_nonoverlapping(src.as_ptr(), dst.as_mut_ptr(), len_src); 30 | } 31 | } 32 | 33 | pub fn path_to_cstring>(path: &P) -> Option { 34 | path.as_ref() 35 | .to_str() 36 | .and_then(|p| ffi::CString::new(p).ok()) 37 | } 38 | 39 | /// Convert a path into a byte-vector 40 | pub fn path_as_bytes<'a, P: 'a + AsRef>(path: P, must_exist: bool) -> Result> { 41 | if path.as_ref().exists() || !must_exist { 42 | Ok(path 43 | .as_ref() 44 | .to_str() 45 | .ok_or(Error::NonUnicodePath)? 46 | .as_bytes() 47 | .to_owned()) 48 | } else { 49 | Err(Error::FileNotFound { 50 | path: path.as_ref().to_owned(), 51 | }) 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | authors = ["Christopher Schröder ", "Johannes Köster "] 3 | description = "This library provides HTSlib bindings and a high level Rust API for reading and writing BAM files." 4 | documentation = "https://docs.rs/rust-htslib" 5 | edition = "2018" 6 | include = ["src/**/*", "LICENSE.md", "README.md", "CHANGELOG.md"] 7 | keywords = ["htslib", "bam", "bioinformatics", "pileup", "sequencing"] 8 | license = "MIT" 9 | name = "rust-htslib" 10 | readme = "README.md" 11 | repository = "https://github.com/rust-bio/rust-htslib.git" 12 | version = "0.51.0" 13 | 14 | [package.metadata.release] 15 | pre-release-commit-message = "release version {{version}}" 16 | tag-message = "Version {{version}} of Rust-HTSlib." 17 | 18 | [dependencies] 19 | libz-sys = ">=1.1.15" 20 | bio-types = ">=0.9" 21 | byteorder = "1.3" 22 | custom_derive = "0.1" 23 | derive-new = "0.7" 24 | hts-sys = {version = "2.2.0", default-features = false, features = ["bindgen"]} 25 | ieee754 = "0.2" 26 | lazy_static = "1.4" 27 | libc = "0.2" 28 | linear-map = "1.2" 29 | newtype_derive = "0.1" 30 | regex = "1.3" 31 | serde = {version = "^1", optional = true, features = ["derive"]} 32 | serde_bytes = {version = "0.11", optional = true} 33 | thiserror = {version = "^2" } 34 | url = "2.5" 35 | 36 | [features] 37 | bindgen = ["hts-sys/bindgen"] 38 | bzip2 = ["hts-sys/bzip2"] 39 | curl = ["hts-sys/curl"] 40 | default = ["bzip2", "lzma", "curl"] 41 | gcs = ["hts-sys/gcs"] 42 | libdeflate = ["hts-sys/libdeflate"] 43 | lzma = ["hts-sys/lzma"] 44 | s3 = ["hts-sys/s3"] 45 | serde_feature = ["serde", "serde_bytes", "bio-types/serde"] 46 | static = ["hts-sys/static"] 47 | 48 | [dev-dependencies] 49 | bincode = "1.2" 50 | pretty_assertions = "1.4" 51 | serde_json = "1.0" 52 | tempfile = "3.1.0" 53 | -------------------------------------------------------------------------------- /src/tpool.rs: -------------------------------------------------------------------------------- 1 | use std::cell::RefCell; 2 | use std::rc::Rc; 3 | 4 | pub use crate::errors::{Error, Result}; 5 | use crate::htslib; 6 | 7 | /// An HTSlib thread pool. Create a thread pool and use `set_thread_pool()` methods 8 | /// to share a thread pool across multiple BAM readers & writers. 9 | /// The Rust wrapper holds the htslib thread pool behind a Rc, and a Rc reference 10 | /// to the thread pool is held by each reader / writer so you don't need to 11 | /// explicitly manage the lifetime of the `ThreadPool`. 12 | #[derive(Clone, Debug)] 13 | pub struct ThreadPool { 14 | pub(crate) handle: Rc>, 15 | } 16 | 17 | impl ThreadPool { 18 | /// Create a new thread pool with `n_threads` threads. 19 | pub fn new(n_threads: u32) -> Result { 20 | let ret = unsafe { htslib::hts_tpool_init(n_threads as i32) }; 21 | 22 | if ret.is_null() { 23 | Err(Error::ThreadPool) 24 | } else { 25 | let inner = htslib::htsThreadPool { 26 | pool: ret, 27 | // this matches the default size 28 | // used in hts_set_threads. 29 | qsize: n_threads as i32 * 2, 30 | }; 31 | let inner = InnerThreadPool { inner }; 32 | 33 | let handle = Rc::new(RefCell::new(inner)); 34 | Ok(ThreadPool { handle }) 35 | } 36 | } 37 | } 38 | 39 | /// Internal htsThreadPool 40 | #[derive(Clone, Debug)] 41 | pub struct InnerThreadPool { 42 | pub(crate) inner: htslib::htsThreadPool, 43 | } 44 | 45 | impl Drop for InnerThreadPool { 46 | fn drop(&mut self) { 47 | if !self.inner.pool.is_null() { 48 | unsafe { 49 | htslib::hts_tpool_destroy(self.inner.pool); 50 | } 51 | } 52 | 53 | self.inner.pool = std::ptr::null_mut(); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /test/bam2sam_out.sam: -------------------------------------------------------------------------------- 1 | @SQ SN:CHROMOSOME_I LN:15072423 2 | @SQ SN:CHROMOSOME_II LN:15279345 3 | @SQ SN:CHROMOSOME_III LN:13783700 4 | @SQ SN:CHROMOSOME_IV LN:17493793 5 | @SQ SN:CHROMOSOME_V LN:20924149 6 | I 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU 7 | II.14978392 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU 8 | III 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU 9 | IV 16 CHROMOSOME_I 2 40 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU 10 | V 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU 11 | VI 2048 CHROMOSOME_I 2 1 27M100000D73M * 0 0 ACTAAGCCTAAGCCTAAGCCTAAGCCAATTATCGATTTCTGAAAAAATTATCGAATTTTCTAGAAATTTTGCAAATTTTTTCATAAAATTATCGATTTTA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC 12 | -------------------------------------------------------------------------------- /test/bam2sam_expected.sam: -------------------------------------------------------------------------------- 1 | @SQ SN:CHROMOSOME_I LN:15072423 2 | @SQ SN:CHROMOSOME_II LN:15279345 3 | @SQ SN:CHROMOSOME_III LN:13783700 4 | @SQ SN:CHROMOSOME_IV LN:17493793 5 | @SQ SN:CHROMOSOME_V LN:20924149 6 | I 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU 7 | II.14978392 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU 8 | III 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU 9 | IV 16 CHROMOSOME_I 2 40 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU 10 | V 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU 11 | VI 2048 CHROMOSOME_I 2 1 27M100000D73M * 0 0 ACTAAGCCTAAGCCTAAGCCTAAGCCAATTATCGATTTCTGAAAAAATTATCGAATTTTCTAGAAATTTTGCAAATTTTTTCATAAAATTATCGATTTTA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC 12 | -------------------------------------------------------------------------------- /test/test_orientation_supplementary.sam: -------------------------------------------------------------------------------- 1 | @HD VN:1.3 SO:coordinate 2 | @SQ SN:chr10 LN:135374737 3 | @SQ SN:chr11 LN:134452384 4 | @SQ SN:chr12 LN:132349534 5 | @SQ SN:chr13 LN:114142980 6 | @SQ SN:chr14 LN:106368585 7 | @SQ SN:chr15 LN:100338915 8 | @SQ SN:chr16 LN:88827254 9 | @SQ SN:chr17 LN:78774742 10 | @SQ SN:chr18 LN:76117153 11 | @SQ SN:chr19 LN:63811651 12 | @SQ SN:chr1 LN:247249719 13 | @SQ SN:chr20 LN:62435964 14 | @SQ SN:chr21 LN:46944323 15 | @SQ SN:chr22 LN:49691432 16 | @SQ SN:chr2 LN:242951149 17 | @SQ SN:chr3 LN:199501827 18 | @SQ SN:chr4 LN:191273063 19 | @SQ SN:chr5 LN:180857866 20 | @SQ SN:chr6 LN:170899992 21 | @SQ SN:chr7 LN:158821424 22 | @SQ SN:chr8 LN:146274826 23 | @SQ SN:chr9 LN:140273252 24 | @SQ SN:chrM LN:16571 25 | @SQ SN:chrX LN:154913754 26 | @SQ SN:chrY LN:57772954 27 | @RG ID:tumor SM:tumor 28 | @PG ID:bwa PN:bwa VN:0.7.16a-r1187-dirty CL:resources/bwa mem -t 8 -Y -R @RG\tID:tumor\tSM:tumor index/hg18/genome reads/simulated.tumor.1.fastq reads/simulated.tumor.2.fastq 29 | @PG ID:samtools PN:samtools PP:bwa VN:1.20 CL:samtools sort -n -o tests/resources/testcases/test16/tumor.bam.namesorted.bam tests/resources/testcases/test16/tumor.bam 30 | @PG ID:samtools.1 PN:samtools PP:samtools VN:1.20 CL:samtools fixmate tests/resources/testcases/test16/tumor.bam.namesorted.bam tests/resources/testcases/test16/tumor.bam.fixed.bam 31 | @PG ID:samtools.2 PN:samtools PP:samtools.1 VN:1.20 CL:samtools sort -o tests/resources/testcases/test16/tumor.bam tests/resources/testcases/test16/tumor.bam.fixed.bam 32 | @PG ID:samtools.3 PN:samtools PP:samtools.2 VN:1.20 CL:samtools view -H tumor.bam 33 | sim_Som2-5-1_chr1_1_1b5889 163 chr1 938 60 100M = 1143 264 CACTATACCACTTGAATGCTCAGAAGAAAAAAAAAAGAATTCAGAATATGTGTATTAAAATGGGTACAATAATGAGTAAAAAACTTGAAAGAAGCTGGAG GFGDECGFE?AA>FGADEEF)BDC>FDEC5EEBFEEB?D@BGDCAB:FBGGDE;DF-CBDDCD+>:ED@#?=?#>EADE,B#?A#FFC;CC@#:5DBA## NM:i:1 MD:Z:69A30 AS:i:95 XS:i:23 ZT:Z:95,1,72,72,0,0,69,0,1,0,0,0,1379,0 RG:Z:tumor MQ:i:60 MC:Z:59M41S 34 | sim_Som2-5-1_chr1_1_1b5889 83 chr1 1143 60 59M41S = 938 -264 ATCTTTCCTTTATCAACTATTGGTGTTAACCTTTGATTATATTTTTGCATAAGCATACAAAATATTGATCTTTAATTATACTAAGGAATCAATAGCCAAA F:##.##D#DFDA5D#*#:D?E7B?.>?GA>6?=CE:EEBEDAE=AF:5GF:G#ABGDGGAGEG=DAE?FF?GG7AGEG-BDEGDAGEDDEDF=>F5E*@ NM:i:2 MD:Z:2G13A42 AS:i:51 XS:i:20 SA:Z:chr1,73497948,-,58S42M,60,1; ZT:Z:51,2,31,11,0,0,42,0,3,0,0,0,419,0 RG:Z:tumor MQ:i:60 MC:Z:100M 35 | sim_Som2-5-1_chr1_1_1b5889 2131 chr1 1263 60 58S42M = 938 -367 ATCTTTCCTTTATCAACTATTGGTGTTAACCTTTGATTATATTTTTGCATAAGCATACAAAATATTGATCTTTAATTATACTAAGGAATCAATAGCCAAA F:##.##D#DFDA5D#*#:D?E7B?.>?GA>6?=CE:EEBEDAE=AF:5GF:G#ABGDGGAGEG=DAE?FF?GG7AGEG-BDEGDAGEDDEDF=>F5E*@ NM:i:1 MD:Z:40C1 MC:Z:100M AS:i:40 XS:i:0 SA:Z:chr1,73497828,-,59M41S,60,2; ZT:Z:40,1,40,40,0,0,40,0,0,0,0,0,400,0 RG:Z:tumor 36 | -------------------------------------------------------------------------------- /src/bam/index.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Johannes Köster. 2 | // Licensed under the MIT license (http://opensource.org/licenses/MIT) 3 | // This file may not be copied, modified, or distributed 4 | // except according to those terms. 5 | 6 | //! Module for working with BAM or CRAM indices. 7 | 8 | use std::path::Path; 9 | use std::ptr; 10 | 11 | use crate::errors::{Error, Result}; 12 | use crate::htslib; 13 | use crate::utils; 14 | 15 | /// Index type to build. 16 | pub enum Type { 17 | /// BAI index 18 | Bai, 19 | /// CSI index, with given minimum shift 20 | Csi(u32), 21 | } 22 | 23 | /// Build a BAM index. 24 | pub fn build>( 25 | bam_path: P, 26 | idx_path: Option

, 27 | idx_type: Type, 28 | n_threads: u32, 29 | ) -> Result<()> { 30 | let min_shift = match idx_type { 31 | Type::Bai => 0, 32 | Type::Csi(min_shift) => min_shift as i32, 33 | }; 34 | let idx_path_cstr; 35 | let idx_path_ptr = if let Some(p) = idx_path { 36 | idx_path_cstr = 37 | utils::path_to_cstring(&p).expect("path_to_cstring unexpectedly returned with Err"); 38 | idx_path_cstr.as_ptr() 39 | } else { 40 | ptr::null() 41 | }; 42 | let ret = unsafe { 43 | htslib::sam_index_build3( 44 | utils::path_to_cstring(&bam_path).unwrap().as_ptr(), 45 | idx_path_ptr, 46 | min_shift, 47 | n_threads as i32, 48 | ) 49 | }; 50 | match ret { 51 | 0 => Ok(()), 52 | -1 => Err(Error::BamBuildIndex), 53 | -2 => Err(Error::BamOpen { 54 | target: bam_path.as_ref().to_str().unwrap().to_owned(), 55 | }), 56 | -3 => Err(Error::BamNotIndexable), 57 | -4 => Err(Error::BamWriteIndex), 58 | e => panic!("unexpected error code from sam_index_build3: {}", e), 59 | } 60 | } 61 | 62 | #[cfg(test)] 63 | mod tests { 64 | use super::*; 65 | 66 | #[test] 67 | fn test_index_build() { 68 | let test_bam = "test/test_index_build.bam"; 69 | 70 | // test BAI index creation with 1 thread 71 | let idx1 = "test/results/test1.bam.bai"; 72 | build(test_bam, Some(idx1), Type::Bai, 1).unwrap(); 73 | assert!(Path::new(idx1).exists()); 74 | 75 | // test BAI index creation with 2 threads 76 | let idx2 = "test/results/test2.bam.bai"; 77 | build(test_bam, Some(idx2), Type::Bai, 2).unwrap(); 78 | assert!(Path::new(idx2).exists()); 79 | 80 | // test CSI index creation with 2 threads 81 | let idx3 = "test/results/test3.bam.csi"; 82 | build(test_bam, Some(idx3), Type::Csi(2), 2).unwrap(); 83 | assert!(Path::new(idx3).exists()); 84 | 85 | // test CSI index creation with 2 threads and default file name 86 | build(test_bam, None, Type::Csi(5), 2).unwrap(); 87 | assert!(Path::new("test/test_index_build.bam.csi").exists()); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /test/obs-cornercase.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.2 2 | ##FILTER= 3 | ##INFO= 4 | ##INFO= 5 | ##INFO= 6 | ##INFO= 7 | ##INFO= 8 | ##contig= 9 | ##INFO= 10 | ##INFO= 11 | ##INFO= 12 | ##INFO= 13 | ##INFO= 14 | ##INFO= 15 | ##INFO= 16 | ##INFO= 17 | ##INFO= 18 | ##varlociraptor_preprocess_args={"Preprocess":{"kind":{"Variants":{"reference":"ref.fa","candidates":"candidates.vcf","bam":"patient13.bam","reference_buffer_size":10,"min_bam_refetch_distance":1,"alignment_properties":null,"output":"patient13.obs.bcf","spurious_ins_rate":2.8e-6,"spurious_del_rate":5.1e-6,"spurious_insext_rate":0.0,"spurious_delext_rate":0.0,"protocol_strandedness":"Opposite","realignment_window":64,"max_depth":200,"omit_insert_size":false,"pairhmm_mode":"exact"}}}} 19 | ##varlociraptor_observation_format_version=5 20 | ##bcftools_viewVersion=1.11+htslib-1.11 21 | ##bcftools_viewCommand=view patient13.obs.bcf; Date=Tue Nov 17 13:48:51 2020 22 | #CHROM POS ID REF ALT QUAL FILTER INFO 23 | 10 1301 gridss33fb_1085o T T[10:2000[ 2.1391e+09 . EVENT=gridss33fb_1085;SVTYPE=BND;MATEID=gridss33fb_1085h;PROB_MAPPING=2,0,0,0,1,0,6982,48788,1,0,3556,48946;PROB_REF=2,0,0,0,1,0,29208,48945,1,0,29208,48945;PROB_ALT=2,0,0,0,0,0,53400,0,0,53537;PROB_MISSED_ALLELE=2,0,0,0,1,0,29208,49073,1,0,29208,49073;PROB_SAMPLE_ALT=2,0,0,0,1,0,64758,48272,1,0,64758,48272;PROB_DOUBLE_OVERLAP=2,0,0,0,0,0,64512,0,0,64512;STRAND=2,0,0,0,0,0,1,0;READ_ORIENTATION=2,0,0,0,0,0,0,0;SOFTCLIPPED=257,0,0,0,768,2,0,0,0 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Crates.io](https://img.shields.io/crates/d/rust-htslib.svg)](https://crates.io/crates/rust-htslib) 2 | [![Crates.io](https://img.shields.io/crates/v/rust-htslib.svg)](https://crates.io/crates/rust-htslib) 3 | [![Crates.io](https://img.shields.io/crates/l/rust-htslib.svg)](https://crates.io/crates/rust-htslib) 4 | [![docs.rs](https://docs.rs/rust-htslib/badge.svg)](https://docs.rs/rust-htslib) 5 | ![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/rust-bio/rust-htslib/rust.yml?branch=master&label=tests) 6 | [![Coverage Status](https://coveralls.io/repos/github/rust-bio/rust-htslib/badge.svg?branch=master)](https://coveralls.io/github/rust-bio/rust-htslib?branch=master) 7 | 8 | # HTSlib bindings for Rust 9 | 10 | This library provides HTSlib bindings and a high level Rust API for reading and writing BAM files. 11 | 12 | To clone this repository, issue 13 | 14 | ```shell 15 | $ git clone --recursive https://github.com/rust-bio/rust-htslib.git 16 | ``` 17 | 18 | ensuring that the HTSlib submodule is fetched, too. 19 | If you only want to use the library, there is no need to clone the repository. Go on to the **Usage** section in this case. 20 | 21 | ## Requirements 22 | 23 | rust-htslib comes with pre-built bindings to htslib for Mac and Linux. You will need a C toolchain compatible with the `cc` crate. The build script for this crate will automatically build a link htslib. 24 | 25 | ## Usage 26 | 27 | Add this to your `Cargo.toml`: 28 | ```toml 29 | [dependencies] 30 | rust-htslib = "*" 31 | ``` 32 | 33 | By default `rust-htslib` links to `bzip2-sys` and `lzma-sys` for full CRAM support. If you do not need CRAM support, or you do need to support CRAM files 34 | with these compression methods, you can deactivate these features to reduce you dependency count: 35 | 36 | ```toml 37 | [dependencies] 38 | rust-htslib = { version = "*", default-features = false } 39 | ``` 40 | 41 | `rust-htslib` has optional support for `serde`, to allow (de)serialization of `bam::Record` via any serde-supported format. 42 | 43 | Http access to files is available with the `curl` feature. 44 | 45 | Beta-level S3 and Google Cloud Storge support is available with the `s3` and `gcs` features. 46 | 47 | `rust-htslib` can optionally use `bindgen` to generate bindings to htslib. This can slow down the build substantially. Enabling the `bindgen` feature will 48 | cause `hts-sys` to use a create a binding file for your architecture. Pre-built bindings are supplied for Mac and Linux. The `bindgen` feature on Windows is untested - please file a bug if you need help. 49 | 50 | 51 | 52 | ```toml 53 | [dependencies] 54 | rust-htslib = { version = "*", features = ["serde_feature"] } 55 | ``` 56 | 57 | For more information, please see the [docs](https://docs.rs/rust-htslib). 58 | 59 | # Alternatives 60 | 61 | There's [noodles](https://github.com/zaeleus/noodles) by [Michael Macias](https://github.com/zaeleus) which implements a large part of htslib's C functionality in pure Rust (still experimental though). 62 | 63 | # Authors 64 | 65 | * [Johannes Köster](https://github.com/johanneskoester) 66 | * [Christopher Schröder](https://github.com/christopher-schroeder) 67 | * [Patrick Marks](https://github.com/pmarks) 68 | * [David Lähnemann](https://github.com/dlaehnemann) 69 | * [Manuel Holtgrewe](https://github.com/holtgrewe) 70 | * [Julian Gehring](https://github.com/juliangehring) 71 | 72 | For other contributors, see [here](https://github.com/rust-bio/rust-htslib/graphs/contributors). 73 | 74 | ## License 75 | 76 | Licensed under the MIT license https://opensource.org/licenses/MIT. This project may not be copied, modified, or distributed except according to those terms. 77 | Some test files are taken from https://github.com/samtools/htslib. 78 | -------------------------------------------------------------------------------- /src/bcf/index.rs: -------------------------------------------------------------------------------- 1 | use crate::{htslib, utils}; 2 | 3 | #[derive(Debug)] 4 | pub struct BcfBuildError { 5 | pub msg: String, 6 | } 7 | 8 | /// Index type to build. 9 | pub enum Type { 10 | /// Tabix index 11 | Tbx, 12 | /// CSI index, with given minimum shift 13 | Csi(u32), 14 | } 15 | 16 | impl Type { 17 | fn min_shift(&self) -> i32 { 18 | match self { 19 | Self::Tbx => 0, 20 | Self::Csi(x) => *x as i32, 21 | } 22 | } 23 | } 24 | 25 | impl BcfBuildError { 26 | pub fn error_message(error: i32) -> &'static str { 27 | match error { 28 | -1 => "indexing failed", 29 | -2 => "opening @fn failed", 30 | -3 => "format not indexable", 31 | -4 => "failed to create and/or save the index", 32 | _ => "unknown error", 33 | } 34 | } 35 | } 36 | impl std::error::Error for BcfBuildError {} 37 | 38 | impl std::fmt::Display for BcfBuildError { 39 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 40 | write!(f, "BcfBuildError{{msg: {}}}", self.msg) 41 | } 42 | } 43 | 44 | /// Build a bcf or vcf.gz index. 45 | /// Builds tbi or csi depending on index_type. 46 | /// 47 | ///``` 48 | /// // Index a sorted bcf file with csi. 49 | /// let bcf_path = concat!(env!("CARGO_MANIFEST_DIR"), "/test/test_multi.bcf"); 50 | /// rust_htslib::bcf::index::build(&bcf_path, Some(&"built_test.csi"), /*n_threads=*/ 4u32, rust_htslib::bcf::index::Type::Csi(14)).expect("Failed to build csi index for bcf file."); 51 | /// assert!(std::path::Path::new(&"built_test.csi").exists()); 52 | /// 53 | /// // Index a bgzip-compresed vcf file with tabix. 54 | /// let vcf_path = concat!(env!("CARGO_MANIFEST_DIR"), "/test/test_left.vcf.gz"); 55 | /// rust_htslib::bcf::index::build(&vcf_path, Some(&"built_test_vcf.tbx"), /*n_threads=*/ 4u32, rust_htslib::bcf::index::Type::Tbx).expect("Failed to build tbx index for vcf file."); 56 | /// assert!(std::path::Path::new(&"built_test_vcf.tbx").exists()); 57 | /// 58 | /// // Cannot build a tbi index for a bcf file: returns an Err(BcfBuildError). 59 | /// assert!(std::panic::catch_unwind(|| rust_htslib::bcf::index::build(bcf_path, Some("built_test.tbi"), 4u32, rust_htslib::bcf::index::Type::Tbx).unwrap()).is_err()); 60 | /// 61 | /// // Cannot built a csi index for a vcf file: returns an Err(BcfBuildError). 62 | /// let vcf_path = concat!(env!("CARGO_MANIFEST_DIR"), "/test/test_various.vcf"); 63 | /// assert!(std::panic::catch_unwind(|| rust_htslib::bcf::index::build(&vcf_path, Some(&"built_test_vcf.csi"), /*n_threads=*/ 4u32, rust_htslib::bcf::index::Type::Csi(14)).expect("Failed to build csi index for vcf file.")).is_err()); 64 | ///``` 65 | /// 66 | pub fn build>( 67 | bcf_path: P, 68 | idx_path: Option

, 69 | n_threads: u32, 70 | index_type: Type, 71 | ) -> Result<(), BcfBuildError> { 72 | let min_shift = index_type.min_shift(); 73 | let idx_path_cstr = idx_path.and_then(|x| utils::path_to_cstring(&x)); 74 | let bcf_path = utils::path_to_cstring(&bcf_path).ok_or(BcfBuildError { 75 | msg: format!( 76 | "Failed to format bcf_path to cstring: {:?}", 77 | bcf_path.as_ref().display() 78 | ), 79 | })?; 80 | let return_code = unsafe { 81 | htslib::bcf_index_build3( 82 | bcf_path.as_ptr(), 83 | idx_path_cstr 84 | .as_ref() 85 | .map_or(std::ptr::null(), |p| p.as_ptr()), 86 | min_shift, 87 | n_threads as i32, 88 | ) 89 | }; 90 | if return_code == 0 { 91 | Ok(()) 92 | } else { 93 | Err(BcfBuildError { 94 | msg: format!( 95 | "Failed to build bcf index. Error: {return_code:?}/{}", 96 | BcfBuildError::error_message(return_code) 97 | ), 98 | }) 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2014-2021 Johannes Köster. 2 | // Licensed under the MIT license (http://opensource.org/licenses/MIT) 3 | // This file may not be copied, modified, or distributed 4 | // except according to those terms. 5 | 6 | //! Rust-Htslib provides a high level API to working with the common HTS file formats. 7 | //! 8 | //! Htslib itself is the *de facto* standard implementation for reading and writing files for 9 | //! HTS alignments (SAM and BAM) as well as variant calls in VCF and BCF format. 10 | //! 11 | //! For example, let's say that we use samtools to view the header of a test file: 12 | //! 13 | //! ```bash 14 | //! samtools view -H test/test.bam 15 | //! @SQ SN:CHROMOSOME_I LN:15072423 16 | //! @SQ SN:CHROMOSOME_II LN:15279345 17 | //! @SQ SN:CHROMOSOME_III LN:13783700 18 | //! @SQ SN:CHROMOSOME_IV LN:17493793 19 | //! @SQ SN:CHROMOSOME_V LN:20924149 20 | //! ``` 21 | //! 22 | //! We can reproduce that with Rust-Htslib. Reading BAM files and printing the header 23 | //! to the screen is as easy as 24 | //! 25 | //! ``` 26 | //! use rust_htslib::{bam, bam::Read}; 27 | //! 28 | //! 29 | //! let bam = bam::Reader::from_path(&"test/test.bam").unwrap(); 30 | //! let header = bam::Header::from_template(bam.header()); 31 | //! 32 | //! // print header records to the terminal, akin to samtool 33 | //! for (key, records) in header.to_hashmap() { 34 | //! for record in records { 35 | //! println!("@{}\tSN:{}\tLN:{}", key, record["SN"], record["LN"]); 36 | //! } 37 | //! } 38 | //! ``` 39 | //! 40 | //! which results in the following output, equivalent to samtools. 41 | //! 42 | //! ```bash 43 | //! @SQ SN:CHROMOSOME_I LN:15072423 44 | //! @SQ SN:CHROMOSOME_II LN:15279345 45 | //! @SQ SN:CHROMOSOME_III LN:13783700 46 | //! @SQ SN:CHROMOSOME_IV LN:17493793 47 | //! @SQ SN:CHROMOSOME_V LN:20924149 48 | //! ``` 49 | //! 50 | //! We can also read directly from the BAM file and write to an output file 51 | //! 52 | //! ``` 53 | //! use rust_htslib::{bam, bam::Read}; 54 | //! 55 | //! let mut bam = bam::Reader::from_path(&"test/test.bam").unwrap(); 56 | //! let header = bam::Header::from_template(bam.header()); 57 | //! let mut out = bam::Writer::from_path(&"test/out.bam", &header, bam::Format::Bam).unwrap(); 58 | //! 59 | //! // copy reverse reads to new BAM file 60 | //! for r in bam.records() { 61 | //! let record = r.unwrap(); 62 | //! if record.is_reverse() { 63 | //! out.write(&record).unwrap(); 64 | //! } 65 | //! } 66 | //! ``` 67 | //! 68 | //! Pileups can be performed with 69 | //! 70 | //! ``` 71 | //! use rust_htslib::{bam, bam::Read}; 72 | //! 73 | //! let mut bam = bam::Reader::from_path(&"test/test.bam").unwrap(); 74 | //! 75 | //! // pileup over all covered sites 76 | //! for p in bam.pileup() { 77 | //! let pileup = p.unwrap(); 78 | //! println!("{}:{} depth {}", pileup.tid(), pileup.pos(), pileup.depth()); 79 | //! 80 | //! for alignment in pileup.alignments() { 81 | //! if !alignment.is_del() && !alignment.is_refskip() { 82 | //! println!("Base {}", alignment.record().seq()[alignment.qpos().unwrap()]); 83 | //! } 84 | //! // mark indel start 85 | //! match alignment.indel() { 86 | //! bam::pileup::Indel::Ins(len) => println!("Insertion of length {} between this and next position.", len), 87 | //! bam::pileup::Indel::Del(len) => println!("Deletion of length {} between this and next position.", len), 88 | //! bam::pileup::Indel::None => () 89 | //! } 90 | //! } 91 | //! } 92 | //! ``` 93 | //! 94 | //! In both cases, indexed BAM files can be seeked for specific regions using [`fetch`](bam/struct.IndexedReader.html#method.fetch), constraining either the record iterator or the pileups: 95 | //! 96 | //! ``` 97 | //! use rust_htslib::{bam, bam::Read}; 98 | //! 99 | //! let mut bam = bam::IndexedReader::from_path(&"test/test.bam").unwrap(); 100 | //! 101 | //! bam.fetch(("CHROMOSOME_I", 0, 20)).unwrap(); 102 | //! // afterwards, read or pileup in this region 103 | //! ``` 104 | //! 105 | //! See 106 | //! * [`fetch`](bam/struct.IndexedReader.html#method.fetch) 107 | //! * [`records`](bam/struct.IndexedReader.html#method.records) 108 | //! * [`read`](bam/struct.IndexedReader.html#method.read) 109 | //! * [`pileup`](bam/struct.IndexedReader.html#method.pileup) 110 | 111 | #[macro_use] 112 | extern crate custom_derive; 113 | 114 | #[macro_use] 115 | extern crate newtype_derive; 116 | 117 | #[cfg(feature = "serde_feature")] 118 | extern crate serde; 119 | 120 | #[cfg(test)] // <-- not needed in examples + integration tests 121 | #[macro_use] 122 | extern crate pretty_assertions; 123 | #[cfg(all(test, feature = "serde_feature"))] 124 | extern crate serde_json; 125 | 126 | pub mod bam; 127 | pub mod bcf; 128 | pub mod bgzf; 129 | pub mod errors; 130 | pub mod faidx; 131 | pub mod htslib; 132 | pub mod tbx; 133 | pub mod tpool; 134 | pub mod utils; 135 | -------------------------------------------------------------------------------- /src/bam/header.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Johannes Köster. 2 | // Licensed under the MIT license (http://opensource.org/licenses/MIT) 3 | // This file may not be copied, modified, or distributed 4 | // except according to those terms. 5 | 6 | use crate::bam::HeaderView; 7 | use lazy_static::lazy_static; 8 | use linear_map::LinearMap; 9 | use regex::Regex; 10 | use std::borrow::Cow; 11 | use std::collections::HashMap; 12 | 13 | /// A BAM header. 14 | #[derive(Debug, Clone)] 15 | pub struct Header { 16 | records: Vec>, 17 | } 18 | 19 | impl Default for Header { 20 | fn default() -> Self { 21 | Self::new() 22 | } 23 | } 24 | 25 | impl Header { 26 | /// Create a new header. 27 | pub fn new() -> Self { 28 | Header { 29 | records: Vec::new(), 30 | } 31 | } 32 | 33 | pub fn from_template(header: &HeaderView) -> Self { 34 | let mut record = header.as_bytes().to_owned(); 35 | // Strip off any trailing newline character. 36 | // Otherwise there could be a blank line in the 37 | // header which samtools (<=1.6) will complain 38 | // about 39 | while let Some(&last_char) = record.last() { 40 | if last_char == b'\n' { 41 | record.pop(); 42 | } else { 43 | break; 44 | } 45 | } 46 | Header { 47 | records: vec![record], 48 | } 49 | } 50 | 51 | /// Add a record to the header. 52 | pub fn push_record(&mut self, record: &HeaderRecord<'_>) -> &mut Self { 53 | self.records.push(record.to_bytes()); 54 | self 55 | } 56 | 57 | /// Add a comment to the header. 58 | pub fn push_comment(&mut self, comment: &[u8]) -> &mut Self { 59 | self.records.push([&b"@CO"[..], comment].join(&b'\t')); 60 | self 61 | } 62 | 63 | pub fn to_bytes(&self) -> Vec { 64 | self.records.join(&b'\n') 65 | } 66 | 67 | /// This returns a header as a HashMap. 68 | /// Comment lines starting with "@CO" will NOT be included in the HashMap. 69 | /// Comment lines can be obtained by the `comments` function. 70 | pub fn to_hashmap(&self) -> HashMap>> { 71 | let mut header_map = HashMap::default(); 72 | 73 | lazy_static! { 74 | static ref REC_TYPE_RE: Regex = Regex::new(r"@([A-Z][A-Z])").unwrap(); 75 | static ref TAG_RE: Regex = Regex::new(r"([A-Za-z][A-Za-z0-9]):([ -~]*)").unwrap(); 76 | } 77 | 78 | let header_string = String::from_utf8(self.to_bytes()).unwrap(); 79 | 80 | for line in header_string.split('\n').filter(|x| !x.is_empty()) { 81 | let parts: Vec<_> = line.split('\t').filter(|x| !x.is_empty()).collect(); 82 | // assert!(rec_type_re.is_match(parts[0])); 83 | let record_type = REC_TYPE_RE 84 | .captures(parts[0]) 85 | .unwrap() 86 | .get(1) 87 | .unwrap() 88 | .as_str() 89 | .to_owned(); 90 | if record_type.eq("CO") { 91 | continue; 92 | } 93 | let mut field = LinearMap::default(); 94 | for part in parts.iter().skip(1) { 95 | let cap = TAG_RE.captures(part).unwrap(); 96 | let tag = cap.get(1).unwrap().as_str().to_owned(); 97 | let value = cap.get(2).unwrap().as_str().to_owned(); 98 | field.insert(tag, value); 99 | } 100 | header_map 101 | .entry(record_type) 102 | .or_insert_with(Vec::new) 103 | .push(field); 104 | } 105 | header_map 106 | } 107 | 108 | /// Returns an iterator of comment lines. 109 | pub fn comments(&'_ self) -> impl Iterator> { 110 | self.records.iter().flat_map(|r| { 111 | r.split(|x| x == &b'\n') 112 | .filter(|x| x.starts_with(b"@CO\t")) 113 | .map(|x| String::from_utf8_lossy(&x[4..])) 114 | }) 115 | } 116 | } 117 | 118 | /// Header record. 119 | #[derive(Debug, Clone)] 120 | pub struct HeaderRecord<'a> { 121 | rec_type: Vec, 122 | tags: Vec<(&'a [u8], Vec)>, 123 | } 124 | 125 | impl<'a> HeaderRecord<'a> { 126 | /// Create a new header record. 127 | /// See SAM format specification for possible record types. 128 | pub fn new(rec_type: &'a [u8]) -> Self { 129 | HeaderRecord { 130 | rec_type: [&b"@"[..], rec_type].concat(), 131 | tags: Vec::new(), 132 | } 133 | } 134 | 135 | /// Add a new tag to the record. 136 | /// 137 | /// # Arguments 138 | /// 139 | /// * `tag` - the tag identifier 140 | /// * `value` - the value. Can be any type convertible into a string. Preferably numbers or 141 | /// strings. 142 | pub fn push_tag(&mut self, tag: &'a [u8], value: V) -> &mut Self { 143 | self.tags.push((tag, value.to_string().into_bytes())); 144 | self 145 | } 146 | 147 | fn to_bytes(&self) -> Vec { 148 | let mut out = Vec::new(); 149 | out.extend(self.rec_type.iter()); 150 | for &(tag, ref value) in self.tags.iter() { 151 | out.push(b'\t'); 152 | out.extend(tag.iter()); 153 | out.push(b':'); 154 | out.extend(value.iter()); 155 | } 156 | out 157 | } 158 | } 159 | 160 | #[cfg(test)] 161 | mod tests { 162 | use super::HeaderRecord; 163 | 164 | #[test] 165 | fn test_push_tag() { 166 | let mut record = HeaderRecord::new(b"HD"); 167 | record.push_tag(b"X1", 0); 168 | record.push_tag(b"X2", 0); 169 | 170 | let x = "x".to_string(); 171 | record.push_tag(b"X3", x.as_str()); 172 | record.push_tag(b"X4", &x); 173 | record.push_tag(b"X5", x); 174 | 175 | assert_eq!(record.to_bytes(), b"@HD\tX1:0\tX2:0\tX3:x\tX4:x\tX5:x"); 176 | } 177 | } 178 | -------------------------------------------------------------------------------- /src/bam/pileup.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Johannes Köster. 2 | // Licensed under the MIT license (http://opensource.org/licenses/MIT) 3 | // This file may not be copied, modified, or distributed 4 | // except according to those terms. 5 | 6 | use std::fmt; 7 | use std::iter; 8 | use std::slice; 9 | 10 | use crate::htslib; 11 | 12 | use crate::bam; 13 | use crate::bam::record; 14 | use crate::errors::{Error, Result}; 15 | 16 | /// Iterator over alignments of a pileup. 17 | pub type Alignments<'a> = iter::Map< 18 | slice::Iter<'a, htslib::bam_pileup1_t>, 19 | fn(&'a htslib::bam_pileup1_t) -> Alignment<'a>, 20 | >; 21 | 22 | /// A pileup over one genomic position. 23 | #[derive(Debug)] 24 | pub struct Pileup { 25 | inner: *const htslib::bam_pileup1_t, 26 | depth: u32, 27 | tid: u32, 28 | pos: u32, 29 | } 30 | 31 | impl Pileup { 32 | pub fn tid(&self) -> u32 { 33 | self.tid 34 | } 35 | 36 | pub fn pos(&self) -> u32 { 37 | self.pos 38 | } 39 | 40 | pub fn depth(&self) -> u32 { 41 | self.depth 42 | } 43 | 44 | pub fn alignments(&self) -> Alignments<'_> { 45 | self.inner().iter().map(Alignment::new) 46 | } 47 | 48 | fn inner(&self) -> &[htslib::bam_pileup1_t] { 49 | unsafe { 50 | slice::from_raw_parts( 51 | self.inner as *mut htslib::bam_pileup1_t, 52 | self.depth as usize, 53 | ) 54 | } 55 | } 56 | } 57 | 58 | /// An aligned read in a pileup. 59 | pub struct Alignment<'a> { 60 | inner: &'a htslib::bam_pileup1_t, 61 | } 62 | 63 | impl fmt::Debug for Alignment<'_> { 64 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 65 | write!(f, "Alignment") 66 | } 67 | } 68 | 69 | impl<'a> Alignment<'a> { 70 | pub fn new(inner: &'a htslib::bam_pileup1_t) -> Self { 71 | Alignment { inner } 72 | } 73 | 74 | /// Position within the read. None if either `is_del` or `is_refskip`. 75 | pub fn qpos(&self) -> Option { 76 | if self.is_del() || self.is_refskip() { 77 | // there is no alignment position in such a case 78 | None 79 | } else { 80 | Some(self.inner.qpos as usize) 81 | } 82 | } 83 | 84 | /// Insertion, deletion (with length) if indel starts at next base or None otherwise. 85 | pub fn indel(&self) -> Indel { 86 | match self.inner.indel { 87 | len if len < 0 => Indel::Del(-len as u32), 88 | len if len > 0 => Indel::Ins(len as u32), 89 | _ => Indel::None, 90 | } 91 | } 92 | 93 | /// Whether there is a deletion in the alignment at this position. 94 | pub fn is_del(&self) -> bool { 95 | self.inner.is_del() != 0 96 | } 97 | 98 | /// Whether the alignment starts at this position. 99 | pub fn is_head(&self) -> bool { 100 | self.inner.is_head() != 0 101 | } 102 | 103 | /// Whether the alignment ends at this position. 104 | pub fn is_tail(&self) -> bool { 105 | self.inner.is_tail() != 0 106 | } 107 | 108 | /// Whether this position is marked as refskip in the CIGAR string. 109 | pub fn is_refskip(&self) -> bool { 110 | self.inner.is_refskip() != 0 111 | } 112 | 113 | /// The corresponding record. 114 | pub fn record(&self) -> record::Record { 115 | record::Record::from_inner(self.inner.b) 116 | } 117 | } 118 | 119 | #[derive(PartialEq, Eq, Debug, Copy, Clone, Hash)] 120 | pub enum Indel { 121 | Ins(u32), 122 | Del(u32), 123 | None, 124 | } 125 | 126 | /// Iterator over pileups. 127 | #[derive(Debug)] 128 | pub struct Pileups<'a, R: bam::Read> { 129 | #[allow(dead_code)] 130 | reader: &'a mut R, 131 | itr: htslib::bam_plp_t, 132 | } 133 | 134 | impl<'a, R: bam::Read> Pileups<'a, R> { 135 | pub fn new(reader: &'a mut R, itr: htslib::bam_plp_t) -> Self { 136 | Pileups { reader, itr } 137 | } 138 | 139 | /// Warning: because htslib internally uses signed integer for depth this method 140 | /// will panic if `depth` exceeds `i32::MAX`. 141 | pub fn set_max_depth(&mut self, depth: u32) { 142 | if depth > i32::MAX as u32 { 143 | panic!( 144 | "Maximum value for pileup depth is {} but {} was provided", 145 | i32::MAX, 146 | depth 147 | ) 148 | } 149 | let intdepth = depth as i32; 150 | unsafe { 151 | htslib::bam_plp_set_maxcnt(self.itr, intdepth); 152 | } 153 | } 154 | } 155 | 156 | impl Iterator for Pileups<'_, R> { 157 | type Item = Result; 158 | 159 | #[allow(clippy::match_bool)] 160 | fn next(&mut self) -> Option> { 161 | let (mut tid, mut pos, mut depth) = (0i32, 0i32, 0i32); 162 | let inner = unsafe { htslib::bam_plp_auto(self.itr, &mut tid, &mut pos, &mut depth) }; 163 | 164 | match inner.is_null() { 165 | true if depth == -1 => Some(Err(Error::BamPileup)), 166 | true => None, 167 | false => Some(Ok(Pileup { 168 | inner, 169 | depth: depth as u32, 170 | tid: tid as u32, 171 | pos: pos as u32, 172 | })), 173 | } 174 | } 175 | } 176 | 177 | impl Drop for Pileups<'_, R> { 178 | fn drop(&mut self) { 179 | unsafe { 180 | htslib::bam_plp_reset(self.itr); 181 | htslib::bam_plp_destroy(self.itr); 182 | } 183 | } 184 | } 185 | 186 | #[cfg(test)] 187 | mod tests { 188 | 189 | use crate::bam; 190 | use crate::bam::Read; 191 | 192 | #[test] 193 | fn test_max_pileup() { 194 | let mut bam = bam::Reader::from_path("test/test.bam").unwrap(); 195 | let mut p = bam.pileup(); 196 | p.set_max_depth(0u32); 197 | p.set_max_depth(800u32); 198 | } 199 | 200 | #[test] 201 | #[should_panic] 202 | fn test_max_pileup_to_high() { 203 | let mut bam = bam::Reader::from_path("test/test.bam").unwrap(); 204 | let mut p = bam.pileup(); 205 | p.set_max_depth((i32::MAX as u32) + 1); 206 | } 207 | } 208 | -------------------------------------------------------------------------------- /src/errors.rs: -------------------------------------------------------------------------------- 1 | use std::path::PathBuf; 2 | use thiserror::Error; 3 | 4 | /// Generic result type for functions in this crate with 5 | /// a global error class. 6 | pub type Result = std::result::Result; 7 | 8 | #[derive(Error, Debug, PartialEq)] 9 | pub enum Error { 10 | // General errors 11 | #[error("file not found: {path}")] 12 | FileNotFound { path: PathBuf }, 13 | #[error("file could not be opened: {path}")] 14 | FileOpen { path: String }, 15 | #[error("invalid (non-unicode) characters in path")] 16 | NonUnicodePath, 17 | #[error("failed to fetch region")] 18 | Fetch, 19 | #[error("error seeking to file offset")] 20 | FileSeek, 21 | #[error("error seeking to {contig:?}:{start} in indexed file")] 22 | GenomicSeek { contig: String, start: u64 }, 23 | #[error("sequence {sequence} not found in index")] 24 | UnknownSequence { sequence: String }, 25 | #[error("error setting threads for file reading")] 26 | SetThreads, 27 | #[error("failed to create htslib thread pool")] 28 | ThreadPool, 29 | 30 | #[error("failed to write BAM/BCF record (out of disk space?)")] 31 | WriteRecord, 32 | 33 | // Errors for faidx 34 | #[error("The given position is too large to be converted to i64")] 35 | FaidxPositionTooLarge, 36 | #[error("bad conversion of sequence name")] 37 | FaidxBadSeqName, 38 | #[error("failed to build index for fasta file {path:?}")] 39 | FaidxBuildFailed { path: std::path::PathBuf }, 40 | 41 | // Errors for Tbx 42 | #[error("previous iterator generation failed")] 43 | TabixNoIter, 44 | #[error("truncated tabix record")] 45 | TabixTruncatedRecord, 46 | #[error("invalid tabix index")] 47 | TabixInvalidIndex, 48 | 49 | // Errors for BAM 50 | #[error("error parsing CIGAR string: {msg}")] 51 | BamParseCigar { msg: String }, 52 | #[error("unexpected CIGAR operation: {msg}")] 53 | BamUnexpectedCigarOperation { msg: String }, 54 | #[error("error parsing SAM record: {rec}")] 55 | BamParseSAM { rec: String }, 56 | #[error("invalid path to CRAM-reference {path}")] 57 | BamInvalidReferencePath { path: PathBuf }, 58 | #[error("invalid compression level {level}")] 59 | BamInvalidCompressionLevel { level: u32 }, 60 | #[error("unable to open SAM/BAM/CRAM file at {target}")] 61 | BamOpen { target: String }, 62 | #[error("unable to open SAM/BAM/CRAM index for {target}; please create an index")] 63 | BamInvalidIndex { target: String }, 64 | #[error("invalid record in SAM/BAM/CRAM file")] 65 | BamInvalidRecord, 66 | #[error("truncated record in SAM/BAM/CRAM file")] 67 | BamTruncatedRecord, 68 | #[error( 69 | "format not indexable by htslib (format is detected as something else than SAM/BAM/CRAM)" 70 | )] 71 | BamNotIndexable, 72 | #[error("failed to write BAM/CRAM index (out of disk space?)")] 73 | BamWriteIndex, 74 | #[error("failed to build BAM/CRAM index")] 75 | BamBuildIndex, 76 | #[error("failed to create SAM/BAM/CRAM pileup")] 77 | BamPileup, 78 | #[error("file is not sorted by position")] 79 | BamUnsorted, 80 | 81 | // Errors for BAM auxiliary fields 82 | #[error("failed to add aux field (out of memory?)")] 83 | BamAux, 84 | #[error("provided string contains internal 0 byte(s)")] 85 | BamAuxStringError, 86 | #[error("failed to parse aux data")] 87 | BamAuxParsingError, 88 | #[error("the specified tag does could not be found")] 89 | BamAuxTagNotFound, 90 | #[error("data type of aux field is not known")] 91 | BamAuxUnknownType, 92 | #[error("failed to add aux field, tag is already present")] 93 | BamAuxTagAlreadyPresent, 94 | #[error("updating the aux field for this datatype is not supported")] 95 | BamAuxTagUpdatingNotSupported, 96 | 97 | // Errors for base modification fields 98 | #[error("no base modification tag found for record")] 99 | BamBaseModificationTagNotFound, 100 | #[error("no base modification with the specified code found in record")] 101 | BamBaseModificationTypeNotFound, 102 | #[error("base modification iteration failed")] 103 | BamBaseModificationIterationFailed, 104 | #[error("base modification found too many modifications")] 105 | BamBaseModificationTooManyMods, 106 | 107 | // Errors for BCF 108 | #[error("error allocating internal data structure for BCF/VCF reader (out of memory?)")] 109 | BcfAllocationError, 110 | #[error("failed to open BCF/VCF from {target:?}")] 111 | BcfOpen { target: String }, 112 | #[error("invalid record in BCF/VCF file")] 113 | BcfInvalidRecord, 114 | #[error("tag {tag} undefined in BCF/VCF header")] 115 | BcfUndefinedTag { tag: String }, 116 | #[error("unexpected type for tag {tag} in BCF/VCF file")] 117 | BcfUnexpectedType { tag: String }, 118 | #[error("tag {tag} missing from record {record} in BCF/VCF file")] 119 | BcfMissingTag { tag: String, record: String }, 120 | #[error("error setting tag {tag} in BCF/VCF record (out of memory?)")] 121 | BcfSetTag { tag: String }, 122 | #[error("ID {rid} not found in BCF/VCF header")] 123 | BcfUnknownRID { rid: u32 }, 124 | #[error("contig {contig} not found in BCF/VCF header")] 125 | BcfUnknownContig { contig: String }, 126 | #[error("ID {id} not found in BCF/VCF header")] 127 | BcfUnknownID { id: String }, 128 | #[error("sample {name} not found in BCF/VCF header")] 129 | BcfUnknownSample { name: String }, 130 | #[error("duplicate sample names given for subsetting BCF/VCF")] 131 | BcfDuplicateSampleNames, 132 | #[error("failed to set values in BCF/VCF record (out of memory?)")] 133 | BcfSetValues, 134 | #[error("failed to remove alleles in BCF/VCF record")] 135 | BcfRemoveAlleles, 136 | #[error("failed to render BCF record as string")] 137 | BcfToString, 138 | 139 | #[error("invalid compression level {level}")] 140 | BgzfInvalidCompressionLevel { level: i8 }, 141 | #[error("failed setting hts reading options")] 142 | HtsSetOpt, 143 | #[error("failed calculating slow index statistics")] 144 | SlowIdxStats, 145 | #[error("invalid tid {tid}")] 146 | InvalidTid { tid: i32 }, 147 | #[error("No sequences in the reference")] 148 | NoSequencesInReference, 149 | } 150 | -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [master] 6 | pull_request: 7 | branches: [master] 8 | 9 | jobs: 10 | Formatting: 11 | runs-on: ubuntu-22.04 12 | steps: 13 | - name: Checkout repository 14 | uses: actions/checkout@v5 15 | 16 | - name: Install stable toolchain 17 | uses: dtolnay/rust-toolchain@stable 18 | with: 19 | toolchain: stable 20 | components: rustfmt 21 | 22 | - name: Check format 23 | run: cargo fmt -- --check 24 | 25 | Linting: 26 | runs-on: ubuntu-22.04 27 | steps: 28 | - name: Checkout repository 29 | uses: actions/checkout@v5 30 | with: 31 | submodules: recursive 32 | 33 | - name: Install stable toolchain 34 | uses: dtolnay/rust-toolchain@stable 35 | with: 36 | toolchain: stable 37 | components: clippy 38 | 39 | - name: Lint with clippy 40 | run: RUSTFLAGS="-Dwarnings" cargo clippy --all-features --all-targets -- -Dclippy::all -Dunused_imports 41 | 42 | Testing: 43 | needs: Formatting 44 | strategy: 45 | matrix: 46 | os: [ubuntu-22.04, ubuntu-22.04-arm] 47 | runs-on: ${{ matrix.os }} 48 | steps: 49 | - name: Checkout repository 50 | uses: actions/checkout@v5 51 | with: 52 | submodules: recursive 53 | 54 | - name: Install nightly toolchain 55 | uses: dtolnay/rust-toolchain@stable 56 | with: 57 | toolchain: nightly 58 | 59 | - name: Install system dependencies 60 | run: | 61 | sudo apt-get install --yes zlib1g-dev libbz2-dev musl musl-dev musl-tools clang libc6-dev 62 | 63 | - uses: Swatinem/rust-cache@v2 64 | 65 | - name: Run cargo-tarpaulin 66 | run: | 67 | set -x 68 | cargo install cargo-tarpaulin 69 | cargo tarpaulin --all-features --tests --doc --out Lcov -- --test-threads 1 70 | 71 | - name: Upload coverage 72 | uses: coverallsapp/github-action@v2 73 | with: 74 | github-token: ${{ secrets.GITHUB_TOKEN }} 75 | path-to-lcov: ./lcov.info 76 | 77 | Testing-Features: 78 | needs: Formatting 79 | strategy: 80 | matrix: 81 | os: [ubuntu-22.04, ubuntu-22.04-arm] 82 | target: 83 | - no-default-features 84 | - all-features 85 | include: 86 | - target: no-default-features 87 | args: --no-default-features 88 | - target: all-features 89 | os: ubuntu-22.04 90 | args: --all-features 91 | toolchain_target: x86_64-unknown-linux-musl 92 | - target: all-features 93 | os: ubuntu-22.04-arm 94 | args: --all-features 95 | toolchain_target: aarch64-unknown-linux-musl 96 | runs-on: ${{ matrix.os }} 97 | 98 | steps: 99 | - name: Checkout repository 100 | uses: actions/checkout@v5 101 | with: 102 | submodules: recursive 103 | 104 | - name: Install stable toolchain 105 | uses: dtolnay/rust-toolchain@stable 106 | with: 107 | toolchain: stable 108 | 109 | - name: Install system dependencies 110 | run: | 111 | sudo apt-get install --yes zlib1g-dev libbz2-dev musl musl-dev musl-tools clang libc6-dev 112 | 113 | - uses: Swatinem/rust-cache@v2 114 | 115 | - name: Test 116 | run: | 117 | cargo test ${{ matrix.args }} 118 | 119 | Testing-MacOS: 120 | needs: Formatting 121 | runs-on: macos-latest 122 | strategy: 123 | matrix: 124 | target: 125 | - intel-monterey 126 | - intel-ventura 127 | - silicon-sonoma 128 | include: 129 | - target: intel-monterey 130 | os: macOS-12.0 131 | toolchain_target: x86_64-apple-darwin 132 | toolchain: stable 133 | aux_args: --target x86_64-apple-darwin 134 | default: true 135 | - target: intel-ventura 136 | os: macOS-13.0 137 | toolchain_target: x86_64-apple-darwin 138 | toolchain: stable 139 | aux_args: --target x86_64-apple-darwin 140 | default: true 141 | - target: silicon-sonoma 142 | os: macOS-14.0 143 | toolchain_target: aarch64-apple-darwin 144 | toolchain: stable 145 | aux_args: "" 146 | default: false 147 | 148 | steps: 149 | - name: Checkout repository 150 | uses: actions/checkout@v5 151 | with: 152 | submodules: recursive 153 | 154 | - name: Install stable toolchain 155 | uses: dtolnay/rust-toolchain@stable 156 | with: 157 | toolchain: ${{ matrix.toolchain }} 158 | targets: ${{ matrix.toolchain_target }} 159 | override: true 160 | 161 | - name: Install htslib dependencies 162 | run: brew install bzip2 zlib xz curl-openssl 163 | 164 | - name: Test 165 | run: | 166 | cargo test --release --all-features --verbose ${{ matrix.aux_args }} 167 | # Testing-OSX-MUSL-BigSur: 168 | # needs: Formatting 169 | # runs-on: macOS-11.0 170 | # steps: 171 | # Test MUSL builds on OSX 172 | # 173 | # - uses: actions-rs/toolchain@v1.0.6 174 | # with: 175 | # toolchain: stable 176 | # target: x86_64-unknown-linux-musl 177 | # override: true 178 | 179 | # - name: Install OSX musl-cross 180 | # run: brew install FiloSottile/musl-cross/musl-cross 181 | 182 | # # https://github.com/FiloSottile/homebrew-musl-cross/issues/16 183 | # - name: Provide musl-gcc symlink for the right musl arch 184 | # run: ln -sf /usr/local/opt/musl-cross/libexec/bin/x86_64-linux-musl-gcc /usr/local/bin/musl-gcc 185 | 186 | # - name: Test musl build without default features 187 | # uses: actions-rs/cargo@v1 188 | # with: 189 | # use-cross: false # cross is not supported on GHA OSX runner, see: https://github.community/t/why-is-docker-not-installed-on-macos/17017 190 | # command: test 191 | # args: --release --target x86_64-unknown-linux-musl --no-default-features 192 | 193 | # - name: Test musl build with all features and debug symbols (non --release) on OSX 194 | # uses: actions-rs/cargo@v1.0.1 195 | # with: 196 | # use-cross: false 197 | # command: test 198 | # args: --target x86_64-unknown-linux-musl --all-features --verbose 199 | -------------------------------------------------------------------------------- /src/bam/buffer.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Johannes Köster. 2 | // Licensed under the MIT license (http://opensource.org/licenses/MIT) 3 | // This file may not be copied, modified, or distributed 4 | // except according to those terms. 5 | 6 | use std::collections::{vec_deque, VecDeque}; 7 | use std::mem; 8 | use std::str; 9 | use std::sync::Arc; 10 | 11 | use crate::bam; 12 | use crate::bam::Read; 13 | use crate::errors::{Error, Result}; 14 | /// A buffer for BAM records. This allows access regions in a sorted BAM file while iterating 15 | /// over it in a single pass. 16 | /// The buffer is implemented as a ringbuffer, such that extension or movement to the right has 17 | /// linear complexity. The buffer makes use of indexed random access. Hence, when fetching a 18 | /// region at the very end of the BAM, everything before is omitted without cost. 19 | #[derive(Debug)] 20 | pub struct RecordBuffer { 21 | reader: bam::IndexedReader, 22 | inner: VecDeque>, 23 | overflow: Option>, 24 | cache_cigar: bool, 25 | min_refetch_distance: u64, 26 | buffer_record: Arc, 27 | start_pos: Option, 28 | } 29 | 30 | unsafe impl Sync for RecordBuffer {} 31 | unsafe impl Send for RecordBuffer {} 32 | 33 | impl RecordBuffer { 34 | /// Create a new `RecordBuffer`. 35 | /// 36 | /// # Arguments 37 | /// 38 | /// * `bam` - BAM reader 39 | /// * `cache_cigar` - whether to call `bam::Record::cache_cigar()` for each record. 40 | pub fn new(bam: bam::IndexedReader, cache_cigar: bool) -> Self { 41 | RecordBuffer { 42 | reader: bam, 43 | inner: VecDeque::new(), 44 | overflow: None, 45 | cache_cigar, 46 | min_refetch_distance: 1, 47 | buffer_record: Arc::new(bam::Record::new()), 48 | start_pos: Some(0), 49 | } 50 | } 51 | 52 | /// maximum distance to previous fetch window such that a 53 | /// new fetch operation is performed. If the distance is smaller, buffer will simply 54 | /// read through until the start of the new fetch window (probably saving some time 55 | /// by avoiding the random access). 56 | pub fn set_min_refetch_distance(&mut self, min_refetch_distance: u64) { 57 | self.min_refetch_distance = min_refetch_distance; 58 | } 59 | 60 | /// Return start position of buffer 61 | pub fn start(&self) -> Option { 62 | self.inner.front().map(|rec| rec.pos() as u64) 63 | } 64 | 65 | /// Return end position of buffer. 66 | pub fn end(&self) -> Option { 67 | self.inner.back().map(|rec| rec.pos() as u64) 68 | } 69 | 70 | pub fn tid(&self) -> Option { 71 | self.inner.back().map(|rec| rec.tid()) 72 | } 73 | 74 | /// Fill buffer at the given interval. If the start coordinate is left of 75 | /// the previous start coordinate, this will use an additional BAM fetch IO operation. 76 | /// Coordinates are 0-based, and end is exclusive. 77 | /// Returns tuple with numbers of added and deleted records since the previous fetch. 78 | #[allow(unused_assignments)] // TODO this is needed because rustc thinks that deleted is unused 79 | pub fn fetch(&mut self, chrom: &[u8], start: u64, end: u64) -> Result<(usize, usize)> { 80 | let mut added = 0; 81 | // move overflow from last fetch into ringbuffer 82 | if self.overflow.is_some() { 83 | added += 1; 84 | self.inner.push_back(self.overflow.take().unwrap()); 85 | } 86 | 87 | if let Some(tid) = self.reader.header.tid(chrom) { 88 | let mut deleted = 0; 89 | let window_start = start; 90 | if self.inner.is_empty() 91 | || window_start.saturating_sub(self.end().unwrap()) >= self.min_refetch_distance 92 | || self.tid().unwrap() != tid as i32 93 | || self.start().unwrap() > self.start_pos.unwrap() 94 | { 95 | let end = self.reader.header.target_len(tid).unwrap(); 96 | self.reader.fetch((tid, window_start, end))?; 97 | deleted = self.inner.len(); 98 | self.inner.clear(); 99 | } else { 100 | // remove records too far left 101 | let to_remove = self 102 | .inner 103 | .iter() 104 | .take_while(|rec| rec.pos() < window_start as i64) 105 | .count(); 106 | for _ in 0..to_remove { 107 | self.inner.pop_front(); 108 | } 109 | deleted = to_remove; 110 | } 111 | 112 | // extend to the right 113 | loop { 114 | match self 115 | .reader 116 | .read(Arc::get_mut(&mut self.buffer_record).unwrap()) 117 | { 118 | None => break, 119 | Some(res) => res?, 120 | } 121 | 122 | if self.buffer_record.is_unmapped() { 123 | continue; 124 | } 125 | 126 | let pos = self.buffer_record.pos(); 127 | 128 | // skip records before the start 129 | if pos < start as i64 { 130 | continue; 131 | } 132 | 133 | // Record is kept, do not reuse it for next iteration 134 | // and thus create a new one. 135 | let mut record = 136 | mem::replace(&mut self.buffer_record, Arc::new(bam::Record::new())); 137 | 138 | if self.cache_cigar { 139 | Arc::get_mut(&mut record).unwrap().cache_cigar(); 140 | } 141 | 142 | if pos >= end as i64 { 143 | self.overflow = Some(record); 144 | break; 145 | } else { 146 | self.inner.push_back(record); 147 | added += 1; 148 | } 149 | } 150 | self.start_pos = Some(self.start().unwrap_or(window_start)); 151 | 152 | Ok((added, deleted)) 153 | } else { 154 | Err(Error::UnknownSequence { 155 | sequence: str::from_utf8(chrom).unwrap().to_owned(), 156 | }) 157 | } 158 | } 159 | 160 | /// Iterate over records that have been fetched with `fetch`. 161 | pub fn iter(&self) -> vec_deque::Iter<'_, Arc> { 162 | self.inner.iter() 163 | } 164 | 165 | /// Iterate over mutable references to records that have been fetched with `fetch`. 166 | pub fn iter_mut(&mut self) -> vec_deque::IterMut<'_, Arc> { 167 | self.inner.iter_mut() 168 | } 169 | 170 | pub fn len(&self) -> usize { 171 | self.inner.len() 172 | } 173 | 174 | pub fn is_empty(&self) -> bool { 175 | self.len() == 0 176 | } 177 | } 178 | 179 | #[cfg(test)] 180 | mod tests { 181 | use super::*; 182 | use crate::bam; 183 | 184 | #[test] 185 | fn test_buffer() { 186 | let reader = bam::IndexedReader::from_path("test/test.bam").unwrap(); 187 | let mut buffer = RecordBuffer::new(reader, false); 188 | 189 | buffer.fetch(b"CHROMOSOME_I", 1, 5).unwrap(); 190 | { 191 | let records: Vec<_> = buffer.iter().collect(); 192 | assert_eq!(records.len(), 6); 193 | assert_eq!(records[0].pos(), 1); 194 | assert_eq!(records[1].pos(), 1); 195 | assert_eq!(records[2].pos(), 1); 196 | assert_eq!(records[3].pos(), 1); 197 | assert_eq!(records[4].pos(), 1); 198 | assert_eq!(records[5].pos(), 1); 199 | } 200 | } 201 | } 202 | -------------------------------------------------------------------------------- /src/bcf/buffer.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Johannes Köster. 2 | // Licensed under the MIT license (http://opensource.org/licenses/MIT) 3 | // This file may not be copied, modified, or distributed 4 | // except according to those terms. 5 | 6 | use std::cmp::Ordering; 7 | use std::collections::{vec_deque, VecDeque}; 8 | use std::mem; 9 | 10 | use crate::bcf::{self, Read}; 11 | use crate::errors::Result; 12 | 13 | /// A buffer for BCF records. This allows access regions in a sorted BCF file while iterating 14 | /// over it in a single pass. 15 | /// The buffer is implemented as a ringbuffer, such that extension or movement to the right has 16 | /// linear complexity. The buffer does not use any indexed random access. Hence, for getting a 17 | /// region at the very end of the BCF, you will have to wait until all records before have 18 | /// been read. 19 | #[derive(Debug)] 20 | pub struct RecordBuffer { 21 | reader: bcf::Reader, 22 | ringbuffer: VecDeque, 23 | ringbuffer2: VecDeque, 24 | overflow: Option, 25 | } 26 | 27 | unsafe impl Sync for RecordBuffer {} 28 | unsafe impl Send for RecordBuffer {} 29 | 30 | impl RecordBuffer { 31 | /// Create new buffer. 32 | pub fn new(reader: bcf::Reader) -> Self { 33 | RecordBuffer { 34 | reader, 35 | ringbuffer: VecDeque::new(), 36 | ringbuffer2: VecDeque::new(), 37 | overflow: None, 38 | } 39 | } 40 | 41 | fn last_rid(&self) -> Option { 42 | self.ringbuffer.back().map(|rec| rec.rid().unwrap()) 43 | } 44 | 45 | fn next_rid(&self) -> Option { 46 | self.ringbuffer2.back().map(|rec| rec.rid().unwrap()) 47 | } 48 | 49 | fn swap_buffers(&mut self) { 50 | // swap with buffer for next rid 51 | mem::swap(&mut self.ringbuffer2, &mut self.ringbuffer); 52 | // clear second buffer 53 | self.ringbuffer2.clear(); 54 | } 55 | 56 | fn drain_left(&mut self, rid: u32, window_start: u64) -> usize { 57 | // remove records too far left or from wrong rid 58 | // rec.rid() will always yield Some(), because otherwise we won't put the rec into the 59 | // buffer. 60 | let to_remove = self 61 | .ringbuffer 62 | .iter() 63 | .take_while(|rec| (rec.pos() as u64) < window_start || rec.rid().unwrap() != rid) 64 | .count(); 65 | self.ringbuffer.drain(..to_remove); 66 | to_remove 67 | } 68 | 69 | /// Fill the buffer with variants in the given window. The start coordinate has to be right of 70 | /// the start coordinate of any previous `fill` operation. 71 | /// Coordinates are 0-based, and end is exclusive. 72 | /// Returns tuple with numbers of added and deleted records compared to previous fetch. 73 | pub fn fetch(&mut self, chrom: &[u8], start: u64, end: u64) -> Result<(usize, usize)> { 74 | // TODO panic if start is left of previous start or we have moved past the given chrom 75 | // before. 76 | let rid = self.reader.header.name2rid(chrom)?; 77 | let mut added = 0; 78 | let mut deleted = 0; 79 | 80 | // shrink and swap 81 | match (self.last_rid(), self.next_rid()) { 82 | (Some(last_rid), _) => { 83 | if last_rid != rid { 84 | deleted = self.ringbuffer.len(); 85 | self.swap_buffers(); 86 | added = self.ringbuffer.len(); 87 | // TODO drain left? 88 | } else { 89 | deleted = self.drain_left(rid, start); 90 | } 91 | } 92 | (_, Some(_)) => { 93 | // TODO is this really necessary? If there was no fetch before, there is nothing 94 | // to delete. 95 | deleted = self.ringbuffer.len(); 96 | self.swap_buffers(); 97 | deleted += self.drain_left(rid, start); 98 | added = self.ringbuffer.len(); 99 | } 100 | _ => (), 101 | } 102 | 103 | if !self.ringbuffer2.is_empty() { 104 | // We have already read beyond the current rid. Hence we can't extend to the right for 105 | // this rid. 106 | return Ok((added, deleted)); 107 | } 108 | 109 | // move overflow from last fill into ringbuffer 110 | if self.overflow.is_some() { 111 | let pos = self.overflow.as_ref().unwrap().pos() as u64; 112 | if pos >= start { 113 | if pos <= end { 114 | self.ringbuffer.push_back(self.overflow.take().unwrap()); 115 | added += 1; 116 | } else { 117 | return Ok((added, deleted)); 118 | } 119 | } else { 120 | // discard overflow 121 | self.overflow.take(); 122 | } 123 | } 124 | 125 | // extend to the right 126 | loop { 127 | let mut rec = self.reader.empty_record(); 128 | 129 | if self.reader.read(&mut rec).is_none() { 130 | // EOF 131 | break; 132 | } 133 | let pos = rec.pos() as u64; 134 | if let Some(rec_rid) = rec.rid() { 135 | match rec_rid.cmp(&rid) { 136 | Ordering::Equal => { 137 | if pos >= end { 138 | // Record is beyond our window. Store it anyways but stop. 139 | self.overflow = Some(rec); 140 | break; 141 | } else if pos >= start { 142 | // Record is within our window. 143 | self.ringbuffer.push_back(rec); 144 | added += 1; 145 | } else { 146 | // Record is upstream of our window, ignore it 147 | continue; 148 | } 149 | } 150 | Ordering::Greater => { 151 | // record comes from next rid. Store it in second buffer but stop filling. 152 | self.ringbuffer2.push_back(rec); 153 | break; 154 | } 155 | _ => { 156 | // Record comes from previous rid. Ignore it. 157 | continue; 158 | } 159 | } 160 | } else { 161 | // skip records without proper rid 162 | continue; 163 | } 164 | } 165 | 166 | Ok((added, deleted)) 167 | } 168 | 169 | /// Iterate over records that have been fetched with `fetch`. 170 | pub fn iter(&self) -> vec_deque::Iter<'_, bcf::Record> { 171 | self.ringbuffer.iter() 172 | } 173 | 174 | /// Iterate over mutable references to records that have been fetched with `fetch`. 175 | pub fn iter_mut(&mut self) -> vec_deque::IterMut<'_, bcf::Record> { 176 | self.ringbuffer.iter_mut() 177 | } 178 | 179 | pub fn len(&self) -> usize { 180 | self.ringbuffer.len() 181 | } 182 | 183 | pub fn is_empty(&self) -> bool { 184 | self.len() == 0 185 | } 186 | } 187 | 188 | #[cfg(test)] 189 | mod tests { 190 | use super::*; 191 | use crate::bcf; 192 | 193 | #[test] 194 | fn test_buffer() { 195 | let reader = bcf::Reader::from_path("test/test.bcf").unwrap(); 196 | let mut buffer = RecordBuffer::new(reader); 197 | 198 | buffer.fetch(b"1", 100, 10023).unwrap(); 199 | { 200 | let records: Vec<_> = buffer.iter().collect(); 201 | assert_eq!(records.len(), 2); 202 | assert_eq!(records[0].pos(), 10021); 203 | assert_eq!(records[1].pos(), 10022); 204 | } 205 | 206 | buffer.fetch(b"1", 10023, 10024).unwrap(); 207 | { 208 | let records: Vec<_> = buffer.iter().collect(); 209 | assert_eq!(records.len(), 1); 210 | assert_eq!(records[0].pos(), 10023); 211 | } 212 | } 213 | } 214 | -------------------------------------------------------------------------------- /test/test_nonstandard_orientation.sam: -------------------------------------------------------------------------------- 1 | @HD VN:1.6 SO:coordinate 2 | @SQ SN:1 LN:248956422 3 | @SQ SN:10 LN:133797422 4 | @SQ SN:11 LN:135086622 5 | @SQ SN:12 LN:133275309 6 | @SQ SN:13 LN:114364328 7 | @SQ SN:14 LN:107043718 8 | @SQ SN:15 LN:101991189 9 | @SQ SN:16 LN:90338345 10 | @SQ SN:17 LN:83257441 11 | @SQ SN:18 LN:80373285 12 | @SQ SN:19 LN:58617616 13 | @SQ SN:2 LN:242193529 14 | @SQ SN:20 LN:64444167 15 | @SQ SN:21 LN:46709983 16 | @SQ SN:22 LN:50818468 17 | @SQ SN:3 LN:198295559 18 | @SQ SN:4 LN:190214555 19 | @SQ SN:5 LN:181538259 20 | @SQ SN:6 LN:170805979 21 | @SQ SN:7 LN:159345973 22 | @SQ SN:8 LN:145138636 23 | @SQ SN:9 LN:138394717 24 | @SQ SN:MT LN:16569 25 | @SQ SN:X LN:156040895 26 | @SQ SN:Y LN:57227415 27 | @SQ SN:KI270728.1 LN:1872759 28 | @SQ SN:KI270727.1 LN:448248 29 | @SQ SN:KI270442.1 LN:392061 30 | @SQ SN:KI270729.1 LN:280839 31 | @SQ SN:GL000225.1 LN:211173 32 | @SQ SN:KI270743.1 LN:210658 33 | @SQ SN:GL000008.2 LN:209709 34 | @SQ SN:GL000009.2 LN:201709 35 | @SQ SN:KI270747.1 LN:198735 36 | @SQ SN:KI270722.1 LN:194050 37 | @SQ SN:GL000194.1 LN:191469 38 | @SQ SN:KI270742.1 LN:186739 39 | @SQ SN:GL000205.2 LN:185591 40 | @SQ SN:GL000195.1 LN:182896 41 | @SQ SN:KI270736.1 LN:181920 42 | @SQ SN:KI270733.1 LN:179772 43 | @SQ SN:GL000224.1 LN:179693 44 | @SQ SN:GL000219.1 LN:179198 45 | @SQ SN:KI270719.1 LN:176845 46 | @SQ SN:GL000216.2 LN:176608 47 | @SQ SN:KI270712.1 LN:176043 48 | @SQ SN:KI270706.1 LN:175055 49 | @SQ SN:KI270725.1 LN:172810 50 | @SQ SN:KI270744.1 LN:168472 51 | @SQ SN:KI270734.1 LN:165050 52 | @SQ SN:GL000213.1 LN:164239 53 | @SQ SN:GL000220.1 LN:161802 54 | @SQ SN:KI270715.1 LN:161471 55 | @SQ SN:GL000218.1 LN:161147 56 | @SQ SN:KI270749.1 LN:158759 57 | @SQ SN:KI270741.1 LN:157432 58 | @SQ SN:GL000221.1 LN:155397 59 | @SQ SN:KI270716.1 LN:153799 60 | @SQ SN:KI270731.1 LN:150754 61 | @SQ SN:KI270751.1 LN:150742 62 | @SQ SN:KI270750.1 LN:148850 63 | @SQ SN:KI270519.1 LN:138126 64 | @SQ SN:GL000214.1 LN:137718 65 | @SQ SN:KI270708.1 LN:127682 66 | @SQ SN:KI270730.1 LN:112551 67 | @SQ SN:KI270438.1 LN:112505 68 | @SQ SN:KI270737.1 LN:103838 69 | @SQ SN:KI270721.1 LN:100316 70 | @SQ SN:KI270738.1 LN:99375 71 | @SQ SN:KI270748.1 LN:93321 72 | @SQ SN:KI270435.1 LN:92983 73 | @SQ SN:GL000208.1 LN:92689 74 | @SQ SN:KI270538.1 LN:91309 75 | @SQ SN:KI270756.1 LN:79590 76 | @SQ SN:KI270739.1 LN:73985 77 | @SQ SN:KI270757.1 LN:71251 78 | @SQ SN:KI270709.1 LN:66860 79 | @SQ SN:KI270746.1 LN:66486 80 | @SQ SN:KI270753.1 LN:62944 81 | @SQ SN:KI270589.1 LN:44474 82 | @SQ SN:KI270726.1 LN:43739 83 | @SQ SN:KI270735.1 LN:42811 84 | @SQ SN:KI270711.1 LN:42210 85 | @SQ SN:KI270745.1 LN:41891 86 | @SQ SN:KI270714.1 LN:41717 87 | @SQ SN:KI270732.1 LN:41543 88 | @SQ SN:KI270713.1 LN:40745 89 | @SQ SN:KI270754.1 LN:40191 90 | @SQ SN:KI270710.1 LN:40176 91 | @SQ SN:KI270717.1 LN:40062 92 | @SQ SN:KI270724.1 LN:39555 93 | @SQ SN:KI270720.1 LN:39050 94 | @SQ SN:KI270723.1 LN:38115 95 | @SQ SN:KI270718.1 LN:38054 96 | @SQ SN:KI270317.1 LN:37690 97 | @SQ SN:KI270740.1 LN:37240 98 | @SQ SN:KI270755.1 LN:36723 99 | @SQ SN:KI270707.1 LN:32032 100 | @SQ SN:KI270579.1 LN:31033 101 | @SQ SN:KI270752.1 LN:27745 102 | @SQ SN:KI270512.1 LN:22689 103 | @SQ SN:KI270322.1 LN:21476 104 | @SQ SN:GL000226.1 LN:15008 105 | @SQ SN:KI270311.1 LN:12399 106 | @SQ SN:KI270366.1 LN:8320 107 | @SQ SN:KI270511.1 LN:8127 108 | @SQ SN:KI270448.1 LN:7992 109 | @SQ SN:KI270521.1 LN:7642 110 | @SQ SN:KI270581.1 LN:7046 111 | @SQ SN:KI270582.1 LN:6504 112 | @SQ SN:KI270515.1 LN:6361 113 | @SQ SN:KI270588.1 LN:6158 114 | @SQ SN:KI270591.1 LN:5796 115 | @SQ SN:KI270522.1 LN:5674 116 | @SQ SN:KI270507.1 LN:5353 117 | @SQ SN:KI270590.1 LN:4685 118 | @SQ SN:KI270584.1 LN:4513 119 | @SQ SN:KI270320.1 LN:4416 120 | @SQ SN:KI270382.1 LN:4215 121 | @SQ SN:KI270468.1 LN:4055 122 | @SQ SN:KI270467.1 LN:3920 123 | @SQ SN:KI270362.1 LN:3530 124 | @SQ SN:KI270517.1 LN:3253 125 | @SQ SN:KI270593.1 LN:3041 126 | @SQ SN:KI270528.1 LN:2983 127 | @SQ SN:KI270587.1 LN:2969 128 | @SQ SN:KI270364.1 LN:2855 129 | @SQ SN:KI270371.1 LN:2805 130 | @SQ SN:KI270333.1 LN:2699 131 | @SQ SN:KI270374.1 LN:2656 132 | @SQ SN:KI270411.1 LN:2646 133 | @SQ SN:KI270414.1 LN:2489 134 | @SQ SN:KI270510.1 LN:2415 135 | @SQ SN:KI270390.1 LN:2387 136 | @SQ SN:KI270375.1 LN:2378 137 | @SQ SN:KI270420.1 LN:2321 138 | @SQ SN:KI270509.1 LN:2318 139 | @SQ SN:KI270315.1 LN:2276 140 | @SQ SN:KI270302.1 LN:2274 141 | @SQ SN:KI270518.1 LN:2186 142 | @SQ SN:KI270530.1 LN:2168 143 | @SQ SN:KI270304.1 LN:2165 144 | @SQ SN:KI270418.1 LN:2145 145 | @SQ SN:KI270424.1 LN:2140 146 | @SQ SN:KI270417.1 LN:2043 147 | @SQ SN:KI270508.1 LN:1951 148 | @SQ SN:KI270303.1 LN:1942 149 | @SQ SN:KI270381.1 LN:1930 150 | @SQ SN:KI270529.1 LN:1899 151 | @SQ SN:KI270425.1 LN:1884 152 | @SQ SN:KI270396.1 LN:1880 153 | @SQ SN:KI270363.1 LN:1803 154 | @SQ SN:KI270386.1 LN:1788 155 | @SQ SN:KI270465.1 LN:1774 156 | @SQ SN:KI270383.1 LN:1750 157 | @SQ SN:KI270384.1 LN:1658 158 | @SQ SN:KI270330.1 LN:1652 159 | @SQ SN:KI270372.1 LN:1650 160 | @SQ SN:KI270548.1 LN:1599 161 | @SQ SN:KI270580.1 LN:1553 162 | @SQ SN:KI270387.1 LN:1537 163 | @SQ SN:KI270391.1 LN:1484 164 | @SQ SN:KI270305.1 LN:1472 165 | @SQ SN:KI270373.1 LN:1451 166 | @SQ SN:KI270422.1 LN:1445 167 | @SQ SN:KI270316.1 LN:1444 168 | @SQ SN:KI270340.1 LN:1428 169 | @SQ SN:KI270338.1 LN:1428 170 | @SQ SN:KI270583.1 LN:1400 171 | @SQ SN:KI270334.1 LN:1368 172 | @SQ SN:KI270429.1 LN:1361 173 | @SQ SN:KI270393.1 LN:1308 174 | @SQ SN:KI270516.1 LN:1300 175 | @SQ SN:KI270389.1 LN:1298 176 | @SQ SN:KI270466.1 LN:1233 177 | @SQ SN:KI270388.1 LN:1216 178 | @SQ SN:KI270544.1 LN:1202 179 | @SQ SN:KI270310.1 LN:1201 180 | @SQ SN:KI270412.1 LN:1179 181 | @SQ SN:KI270395.1 LN:1143 182 | @SQ SN:KI270376.1 LN:1136 183 | @SQ SN:KI270337.1 LN:1121 184 | @SQ SN:KI270335.1 LN:1048 185 | @SQ SN:KI270378.1 LN:1048 186 | @SQ SN:KI270379.1 LN:1045 187 | @SQ SN:KI270329.1 LN:1040 188 | @SQ SN:KI270419.1 LN:1029 189 | @SQ SN:KI270336.1 LN:1026 190 | @SQ SN:KI270312.1 LN:998 191 | @SQ SN:KI270539.1 LN:993 192 | @SQ SN:KI270385.1 LN:990 193 | @SQ SN:KI270423.1 LN:981 194 | @SQ SN:KI270392.1 LN:971 195 | @SQ SN:KI270394.1 LN:970 196 | @RG ID:chm SM:chm PL:ILLUMINA 197 | @PG ID:bwa PN:bwa VN:0.7.17-r1188 CL:bwa mem -t 8 -R @RG\tID:chm\tSM:chm\tPL:ILLUMINA resources/genome.fasta results/merged/chm_R1.fastq.gz results/merged/chm_R2.fastq.gz 198 | @PG ID:GATK ApplyBQSR VN:4.1.4.1 CL:ApplyBQSR --output results/recal/chm.sorted.bam --bqsr-recal-file /tmp/tmpuhppums6/recal_table.grp --input results/mapped/chm.sorted.bam --reference resources/genome.fasta --preserve-qscores-less-than 6 --use-original-qualities false --quantize-quals 0 --round-down-quantized false --emit-original-quals false --global-qscore-prior -1.0 --interval-set-rule UNION --interval-padding 0 --interval-exclusion-padding 0 --interval-merging-rule ALL --read-validation-stringency SILENT --seconds-between-progress-updates 10.0 --disable-sequence-dictionary-validation false --create-output-bam-index true --create-output-bam-md5 false --create-output-variant-index true --create-output-variant-md5 false --lenient false --add-output-sam-program-record true --add-output-vcf-command-line true --cloud-prefetch-buffer 40 --cloud-index-prefetch-buffer -1 --disable-bam-index-caching false --sites-only-vcf-output false --help false --version false --showHidden false --verbosity INFO --QUIET false --use-jdk-deflater false --use-jdk-inflater false --gcs-max-retries 20 --gcs-project-for-requester-pays --disable-tool-default-read-filters false PN:GATK ApplyBQSR 199 | @PG ID:samtools PN:samtools PP:GATK ApplyBQSR VN:1.20 CL:samtools view -H chm.bam 200 | @PG ID:samtools.1 PN:samtools PP:samtools VN:1.20 CL:samtools view -b nonstandard_orientation.sam 201 | @PG ID:samtools.2 PN:samtools PP:samtools.1 VN:1.20 CL:samtools view -h test_nonstandard_orientation.bam 202 | HK35WCCXX160124:1:2117:25885:46859 163 1 231 46 113M38S = 351 271 CAACCTCAGTTGACTTGGGGGACAAGGCAGCAGGAGCACCAGACCCCTGCACCACCTCCTTCTGGGTGGGAGATGAGGCAGCAGGAGCACCAGGGCCCTTCACGACCTCTTTCCAGGTGGGGGGGGAGGGAGCAGGGGCAGGAGAGCGCGT =BB?BCBCABD@C@?D@AAABD@CB?B@C@ACABCAAC@BDAC@BCCCBBD@CD@CDCCCEC;BCCCBCCE?CCCBCDCECCECDDCCEA(.:BBCBBDAABBABBACCADCBCBB?CBBA1BABAAA@@B@AABBAAACB?9BBABA@DDAC2BBBBCBCDBCCFA=? XA:Z:1,-26345048,17S134M,8; MC:Z:113M38S MD:Z:1C7C5G0A28G5T4A46G32A14 RG:Z:chm NM:i:9 AS:i:109 XS:i:94 204 | -------------------------------------------------------------------------------- /src/faidx/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Manuel Landesfeind, Evotec International GmbH 2 | // Licensed under the MIT license (http://opensource.org/licenses/MIT) 3 | // This file may not be copied, modified, or distributed 4 | // except according to those terms. 5 | 6 | //! 7 | //! Module for working with faidx-indexed FASTA files. 8 | //! 9 | 10 | use std::ffi; 11 | use std::path::Path; 12 | use url::Url; 13 | 14 | use crate::htslib; 15 | 16 | use crate::errors::{Error, Result}; 17 | use crate::utils::path_as_bytes; 18 | 19 | /// A Fasta reader. 20 | #[derive(Debug)] 21 | pub struct Reader { 22 | inner: *mut htslib::faidx_t, 23 | } 24 | 25 | /// 26 | /// Build a faidx for input path. 27 | /// 28 | /// # Errors 29 | /// If indexing fails. Could be malformatted or file could not be accessible. 30 | /// 31 | ///``` 32 | /// use rust_htslib::faidx::build; 33 | /// let path = std::path::PathBuf::from(concat!(env!("CARGO_MANIFEST_DIR"),"/test/test_cram.fa")); 34 | /// build(&path).expect("Failed to build fasta index"); 35 | ///``` 36 | /// 37 | pub fn build( 38 | path: impl Into, 39 | ) -> Result<(), std::boxed::Box> { 40 | let path = path.into(); 41 | let os_path = std::ffi::CString::new(path.display().to_string())?; 42 | let rc = unsafe { htslib::fai_build(os_path.as_ptr()) }; 43 | if rc < 0 { 44 | Err(Error::FaidxBuildFailed { path })? 45 | } else { 46 | Ok(()) 47 | } 48 | } 49 | 50 | impl Reader { 51 | /// Create a new Reader from a path. 52 | /// 53 | /// # Arguments 54 | /// 55 | /// * `path` - the path to open. 56 | pub fn from_path>(path: P) -> Result { 57 | Self::new(&path_as_bytes(path, true)?) 58 | } 59 | 60 | /// Create a new Reader from an URL. 61 | /// 62 | /// # Arguments 63 | /// 64 | /// * `url` - the url to open 65 | pub fn from_url(url: &Url) -> Result { 66 | Self::new(url.as_str().as_bytes()) 67 | } 68 | 69 | /// Internal function to create a Reader from some sort of path (could be file path but also URL). 70 | /// The path or URL will be handled by the c-implementation transparently. 71 | /// 72 | /// # Arguments 73 | /// 74 | /// * `path` - the path or URL to open 75 | fn new(path: &[u8]) -> Result { 76 | let cpath = ffi::CString::new(path).unwrap(); 77 | let inner = unsafe { htslib::fai_load(cpath.as_ptr()) }; 78 | Ok(Self { inner }) 79 | } 80 | 81 | /// Fetch the sequence as a byte array. 82 | /// 83 | /// # Arguments 84 | /// 85 | /// * `name` - the name of the template sequence (e.g., "chr1") 86 | /// * `begin` - the offset within the template sequence (starting with 0) 87 | /// * `end` - the end position to return (if smaller than `begin`, the behavior is undefined). 88 | pub fn fetch_seq>(&self, name: N, begin: usize, end: usize) -> Result> { 89 | if begin > i64::MAX as usize { 90 | return Err(Error::FaidxPositionTooLarge); 91 | } 92 | if end > i64::MAX as usize { 93 | return Err(Error::FaidxPositionTooLarge); 94 | } 95 | let cname = ffi::CString::new(name.as_ref().as_bytes()).unwrap(); 96 | let mut len_out: htslib::hts_pos_t = 0; 97 | let ptr = unsafe { 98 | htslib::faidx_fetch_seq64( 99 | self.inner, //*const faidx_t, 100 | cname.as_ptr(), // c_name 101 | begin as htslib::hts_pos_t, // p_beg_i 102 | end as htslib::hts_pos_t, // p_end_i 103 | &mut len_out, //len 104 | ) 105 | }; 106 | let vec = 107 | unsafe { Vec::from_raw_parts(ptr as *mut u8, len_out as usize, len_out as usize) }; 108 | Ok(vec) 109 | } 110 | 111 | /// Fetches the sequence and returns it as string. 112 | /// 113 | /// # Arguments 114 | /// 115 | /// * `name` - the name of the template sequence (e.g., "chr1") 116 | /// * `begin` - the offset within the template sequence (starting with 0) 117 | /// * `end` - the end position to return (if smaller than `begin`, the behavior is undefined). 118 | pub fn fetch_seq_string>( 119 | &self, 120 | name: N, 121 | begin: usize, 122 | end: usize, 123 | ) -> Result { 124 | let bytes = self.fetch_seq(name, begin, end)?; 125 | Ok(std::str::from_utf8(&bytes).unwrap().to_owned()) 126 | } 127 | 128 | /// Fetches the number of sequences in the fai index 129 | pub fn n_seqs(&self) -> u64 { 130 | let n = unsafe { htslib::faidx_nseq(self.inner) }; 131 | n as u64 132 | } 133 | 134 | /// Fetches the i-th sequence name 135 | /// 136 | /// # Arguments 137 | /// 138 | /// * `i` - index to query 139 | pub fn seq_name(&self, i: i32) -> Result { 140 | let cname = unsafe { 141 | let ptr = htslib::faidx_iseq(self.inner, i); 142 | ffi::CStr::from_ptr(ptr) 143 | }; 144 | 145 | let out = match cname.to_str() { 146 | Ok(s) => s.to_string(), 147 | Err(_) => { 148 | return Err(Error::FaidxBadSeqName); 149 | } 150 | }; 151 | 152 | Ok(out) 153 | } 154 | 155 | /// Fetches the length of the given sequence name. 156 | /// 157 | /// # Arguments 158 | /// 159 | /// * `name` - the name of the template sequence (e.g., "chr1") 160 | pub fn fetch_seq_len>(&self, name: N) -> u64 { 161 | let cname = ffi::CString::new(name.as_ref().as_bytes()).unwrap(); 162 | let seq_len = unsafe { htslib::faidx_seq_len(self.inner, cname.as_ptr()) }; 163 | seq_len as u64 164 | } 165 | 166 | /// Returns a Result> for all seq names. 167 | /// # Errors 168 | /// 169 | /// * `errors::Error::FaidxBadSeqName` - missing sequence name for sequence id. 170 | /// 171 | /// If thrown, the index is malformed, and the number of sequences in the index does not match the number of sequence names available. 172 | ///``` 173 | /// use rust_htslib::faidx::build; 174 | /// let path = std::path::PathBuf::from(concat!(env!("CARGO_MANIFEST_DIR"),"/test/test_cram.fa")); 175 | /// build(&path).expect("Failed to build fasta index"); 176 | /// let reader = rust_htslib::faidx::Reader::from_path(path).expect("Failed to open faidx"); 177 | /// assert_eq!(reader.seq_names(), Ok(vec!["chr1".to_string(), "chr2".to_string(), "chr3".to_string()])); 178 | ///``` 179 | /// 180 | pub fn seq_names(&self) -> Result> { 181 | let num_seq = self.n_seqs(); 182 | let mut ret = Vec::with_capacity(num_seq as usize); 183 | for seq_id in 0..num_seq { 184 | ret.push(self.seq_name(seq_id as i32)?); 185 | } 186 | Ok(ret) 187 | } 188 | } 189 | 190 | impl Drop for Reader { 191 | fn drop(&mut self) { 192 | unsafe { 193 | htslib::fai_destroy(self.inner); 194 | } 195 | } 196 | } 197 | 198 | unsafe impl Send for Reader {} 199 | 200 | #[cfg(test)] 201 | mod tests { 202 | use super::*; 203 | 204 | fn open_reader() -> Reader { 205 | Reader::from_path(format!("{}/test/test_cram.fa", env!("CARGO_MANIFEST_DIR"))) 206 | .ok() 207 | .unwrap() 208 | } 209 | #[test] 210 | fn faidx_open() { 211 | open_reader(); 212 | } 213 | 214 | #[test] 215 | fn faidx_read_chr_first_base() { 216 | let r = open_reader(); 217 | 218 | let bseq = r.fetch_seq("chr1", 0, 0).unwrap(); 219 | assert_eq!(bseq.len(), 1); 220 | assert_eq!(bseq, b"G"); 221 | 222 | let seq = r.fetch_seq_string("chr1", 0, 0).unwrap(); 223 | assert_eq!(seq.len(), 1); 224 | assert_eq!(seq, "G"); 225 | } 226 | 227 | #[test] 228 | fn faidx_read_chr_start() { 229 | let r = open_reader(); 230 | 231 | //for _i in 0..100_000_000 { // loop to check for memory leaks 232 | let bseq = r.fetch_seq("chr1", 0, 9).unwrap(); 233 | assert_eq!(bseq.len(), 10); 234 | assert_eq!(bseq, b"GGGCACAGCC"); 235 | 236 | let seq = r.fetch_seq_string("chr1", 0, 9).unwrap(); 237 | assert_eq!(seq.len(), 10); 238 | assert_eq!(seq, "GGGCACAGCC"); 239 | //} 240 | } 241 | 242 | #[test] 243 | fn faidx_read_chr_between() { 244 | let r = open_reader(); 245 | 246 | let bseq = r.fetch_seq("chr1", 4, 14).unwrap(); 247 | assert_eq!(bseq.len(), 11); 248 | assert_eq!(bseq, b"ACAGCCTCACC"); 249 | 250 | let seq = r.fetch_seq_string("chr1", 4, 14).unwrap(); 251 | assert_eq!(seq.len(), 11); 252 | assert_eq!(seq, "ACAGCCTCACC"); 253 | } 254 | 255 | #[test] 256 | fn faidx_read_chr_end() { 257 | let r = open_reader(); 258 | 259 | let bseq = r.fetch_seq("chr1", 110, 120).unwrap(); 260 | assert_eq!(bseq.len(), 10); 261 | assert_eq!(bseq, b"CCCCTCCGTG"); 262 | 263 | let seq = r.fetch_seq_string("chr1", 110, 120).unwrap(); 264 | assert_eq!(seq.len(), 10); 265 | assert_eq!(seq, "CCCCTCCGTG"); 266 | } 267 | 268 | #[test] 269 | fn faidx_read_twice_string() { 270 | let r = open_reader(); 271 | let seq = r.fetch_seq_string("chr1", 110, 120).unwrap(); 272 | assert_eq!(seq.len(), 10); 273 | assert_eq!(seq, "CCCCTCCGTG"); 274 | 275 | let seq = r.fetch_seq_string("chr1", 5, 9).unwrap(); 276 | assert_eq!(seq.len(), 5); 277 | assert_eq!(seq, "CAGCC"); 278 | } 279 | 280 | #[test] 281 | fn faidx_read_twice_bytes() { 282 | let r = open_reader(); 283 | let seq = r.fetch_seq("chr1", 110, 120).unwrap(); 284 | assert_eq!(seq.len(), 10); 285 | assert_eq!(seq, b"CCCCTCCGTG"); 286 | 287 | let seq = r.fetch_seq("chr1", 5, 9).unwrap(); 288 | assert_eq!(seq.len(), 5); 289 | assert_eq!(seq, b"CAGCC"); 290 | } 291 | 292 | #[test] 293 | fn faidx_position_too_large() { 294 | let r = open_reader(); 295 | let position_too_large = i64::MAX as usize; 296 | let res = r.fetch_seq("chr1", position_too_large, position_too_large + 1); 297 | assert_eq!(res, Err(Error::FaidxPositionTooLarge)); 298 | } 299 | 300 | #[test] 301 | fn faidx_n_seqs() { 302 | let r = open_reader(); 303 | assert_eq!(r.n_seqs(), 3); 304 | } 305 | 306 | #[test] 307 | fn faidx_seq_name() { 308 | let r = open_reader(); 309 | let n = r.seq_name(1).unwrap(); 310 | assert_eq!(n, "chr2"); 311 | } 312 | 313 | #[test] 314 | fn faidx_get_seq_len() { 315 | let r = open_reader(); 316 | let chr1_len = r.fetch_seq_len("chr1"); 317 | let chr2_len = r.fetch_seq_len("chr2"); 318 | assert_eq!(chr1_len, 120u64); 319 | assert_eq!(chr2_len, 120u64); 320 | } 321 | 322 | #[test] 323 | fn open_many_readers() { 324 | for _ in 0..500_000 { 325 | let reader = open_reader(); 326 | drop(reader); 327 | } 328 | } 329 | } 330 | -------------------------------------------------------------------------------- /test/test_svlen.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.1 2 | ##FILTER= 3 | ##fileDate=20181110 4 | ##source=GenerateSVCandidates 1.3.0 5 | ##reference=file:///vol/tiny/prosic/data/Homo_sapiens.GRCh38.dna.primary_assembly.fa 6 | ##contig= 7 | ##contig= 8 | ##contig= 9 | ##contig= 10 | ##contig= 11 | ##contig= 12 | ##contig= 13 | ##contig= 14 | ##contig= 15 | ##contig= 16 | ##contig= 17 | ##contig= 18 | ##contig= 19 | ##contig= 20 | ##contig= 21 | ##contig= 22 | ##contig= 23 | ##contig= 24 | ##contig= 25 | ##contig= 26 | ##contig= 27 | ##contig= 28 | ##contig= 29 | ##contig= 30 | ##contig= 31 | ##contig= 32 | ##contig= 33 | ##contig= 34 | ##contig= 35 | ##contig= 36 | ##contig= 37 | ##contig= 38 | ##contig= 39 | ##contig= 40 | ##contig= 41 | ##contig= 42 | ##contig= 43 | ##contig= 44 | ##contig= 45 | ##contig= 46 | ##contig= 47 | ##contig= 48 | ##contig= 49 | ##contig= 50 | ##contig= 51 | ##contig= 52 | ##contig= 53 | ##contig= 54 | ##contig= 55 | ##contig= 56 | ##contig= 57 | ##contig= 58 | ##contig= 59 | ##contig= 60 | ##contig= 61 | ##contig= 62 | ##contig= 63 | ##contig= 64 | ##contig= 65 | ##contig= 66 | ##contig= 67 | ##contig= 68 | ##contig= 69 | ##contig= 70 | ##contig= 71 | ##contig= 72 | ##contig= 73 | ##contig= 74 | ##contig= 75 | ##contig= 76 | ##contig= 77 | ##contig= 78 | ##contig= 79 | ##contig= 80 | ##contig= 81 | ##contig= 82 | ##contig= 83 | ##contig= 84 | ##contig= 85 | ##contig= 86 | ##contig= 87 | ##contig= 88 | ##contig= 89 | ##contig= 90 | ##contig= 91 | ##contig= 92 | ##contig= 93 | ##contig= 94 | ##contig= 95 | ##contig= 96 | ##contig= 97 | ##contig= 98 | ##contig= 99 | ##contig= 100 | ##contig= 101 | ##contig= 102 | ##contig= 103 | ##contig= 104 | ##contig= 105 | ##contig= 106 | ##contig= 107 | ##contig= 108 | ##contig= 109 | ##contig= 110 | ##contig= 111 | ##contig= 112 | ##contig= 113 | ##contig= 114 | ##contig= 115 | ##contig= 116 | ##contig= 117 | ##contig= 118 | ##contig= 119 | ##contig= 120 | ##contig= 121 | ##contig= 122 | ##contig= 123 | ##contig= 124 | ##contig= 125 | ##contig= 126 | ##contig= 127 | ##contig= 128 | ##contig= 129 | ##contig= 130 | ##contig= 131 | ##contig= 132 | ##contig= 133 | ##contig= 134 | ##contig= 135 | ##contig= 136 | ##contig= 137 | ##contig= 138 | ##contig= 139 | ##contig= 140 | ##contig= 141 | ##contig= 142 | ##contig= 143 | ##contig= 144 | ##contig= 145 | ##contig= 146 | ##contig= 147 | ##contig= 148 | ##contig= 149 | ##contig= 150 | ##contig= 151 | ##contig= 152 | ##contig= 153 | ##contig= 154 | ##contig= 155 | ##contig= 156 | ##contig= 157 | ##contig= 158 | ##contig= 159 | ##contig= 160 | ##contig= 161 | ##contig= 162 | ##contig= 163 | ##contig= 164 | ##contig= 165 | ##contig= 166 | ##contig= 167 | ##contig= 168 | ##contig= 169 | ##contig= 170 | ##contig= 171 | ##contig= 172 | ##contig= 173 | ##contig= 174 | ##contig= 175 | ##contig= 176 | ##contig= 177 | ##contig= 178 | ##contig= 179 | ##contig= 180 | ##contig= 181 | ##contig= 182 | ##contig= 183 | ##contig= 184 | ##contig= 185 | ##contig= 186 | ##contig= 187 | ##contig= 188 | ##contig= 189 | ##contig= 190 | ##contig= 191 | ##contig= 192 | ##contig= 193 | ##contig= 194 | ##contig= 195 | ##contig= 196 | ##contig= 197 | ##contig= 198 | ##contig= 199 | ##contig= 200 | ##INFO= 201 | ##INFO= 202 | ##INFO= 203 | ##INFO= 204 | ##INFO= 205 | ##INFO= 206 | ##INFO= 207 | ##INFO= 208 | ##INFO= 209 | ##INFO= 210 | ##INFO= 211 | ##INFO= 212 | ##INFO= 213 | ##INFO= 214 | ##INFO= 215 | ##INFO= 216 | ##INFO= 217 | ##INFO= 218 | ##INFO= 219 | ##INFO= 220 | ##INFO= 221 | ##ALT= 222 | ##ALT= 223 | ##ALT= 224 | ##ALT= 225 | ##ALT= 226 | ##cmdline=/vol/tiny/prosic/prosic-evaluation/.snakemake/conda/b4da40e3/bin/configManta.py --tumorBam mapped-bwa/COLO_829-Ill.tumor.hg38.sorted.bam --normalBam mapped-bwa/COLO_829-Ill.normal.hg38.sorted.bam --referenceFasta ../data/Homo_sapiens.GRCh38.dna.primary_assembly.fa --runDir manta/COLO_829-Ill 227 | ##bcftools_viewVersion=1.5+htslib-1.5 228 | ##bcftools_viewCommand=view -Ob -o manta/COLO_829-Ill.all.bcf manta/COLO_829-Ill/results/variants/candidateSV.vcf.gz; Date=Sun Nov 11 02:13:49 2018 229 | ##bcftools_viewVersion=1.9+htslib-1.9 230 | ##bcftools_viewCommand=view -h manta/COLO_829-Ill.all.bcf; Date=Mon Mar 4 11:55:14 2019 231 | #CHROM POS ID REF ALT QUAL FILTER INFO 232 | 1 231589485 MantaDEL:0:55805:55805:0:0:0 CAAACAAATCTAATTGGTCAAGGTATTCCTAAACGTTTTTGCATTCAGAGGCTCTTTCATCAATCATCTCCACCCCCCAACCCTGGCTTTACTGAAGTATGATTGACAAATAAAAATTGTACACATTT C . . END=231589612;SVTYPE=DEL;SVLEN=-127;CIGAR=1M127D;UPSTREAM_PAIR_COUNT=0;DOWNSTREAM_PAIR_COUNT=0;PAIR_COUNT=0 233 | -------------------------------------------------------------------------------- /src/bam/record_serde.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | 3 | use serde::de::{self, Deserialize, Deserializer, MapAccess, SeqAccess, Visitor}; 4 | use serde::ser::SerializeStruct; 5 | use serde::{Serialize, Serializer}; 6 | use serde_bytes::{ByteBuf, Bytes}; 7 | 8 | use crate::bam::record::Record; 9 | 10 | fn fix_l_extranul(rec: &mut Record) { 11 | // first, reset the number of extranuls to 0 for calling .qname(); then calculate how many we actually have 12 | rec.inner_mut().core.l_extranul = 0; 13 | let l_extranul = rec.qname().iter().rev().take_while(|x| **x == 0u8).count() as u8; 14 | rec.inner_mut().core.l_extranul = l_extranul; 15 | } 16 | 17 | impl Serialize for Record { 18 | fn serialize(&self, serializer: S) -> Result 19 | where 20 | S: Serializer, 21 | { 22 | let core = self.inner().core; 23 | let mut state = serializer.serialize_struct("Record", 12)?; 24 | state.serialize_field("tid", &core.tid)?; 25 | state.serialize_field("pos", &core.pos)?; 26 | state.serialize_field("bin", &core.bin)?; 27 | state.serialize_field("mapq", &core.qual)?; 28 | state.serialize_field("qname_len", &core.l_qname)?; 29 | state.serialize_field("flag", &core.flag)?; 30 | state.serialize_field("n_cigar", &core.n_cigar)?; 31 | state.serialize_field("seq_len", &core.l_qseq)?; 32 | state.serialize_field("mtid", &core.mtid)?; 33 | state.serialize_field("mpos", &core.mpos)?; 34 | state.serialize_field("isize", &core.isize_)?; 35 | state.serialize_field("data", Bytes::new(self.data()))?; 36 | state.end() 37 | } 38 | } 39 | 40 | impl<'de> Deserialize<'de> for Record { 41 | fn deserialize(deserializer: D) -> Result 42 | where 43 | D: Deserializer<'de>, 44 | { 45 | enum Field { 46 | Tid, 47 | Pos, 48 | Bin, 49 | Mapq, 50 | QnameLen, 51 | Flag, 52 | NCigar, 53 | SeqLen, 54 | Mtid, 55 | Mpos, 56 | Isize, 57 | Data, 58 | } 59 | 60 | impl<'de> Deserialize<'de> for Field { 61 | fn deserialize(deserializer: D) -> Result 62 | where 63 | D: Deserializer<'de>, 64 | { 65 | struct FieldVisitor; 66 | 67 | impl Visitor<'_> for FieldVisitor { 68 | type Value = Field; 69 | 70 | fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { 71 | formatter.write_str("expecting a bam field") 72 | } 73 | 74 | fn visit_str(self, value: &str) -> Result 75 | where 76 | E: de::Error, 77 | { 78 | match value { 79 | "tid" => Ok(Field::Tid), 80 | "pos" => Ok(Field::Pos), 81 | "bin" => Ok(Field::Bin), 82 | "mapq" => Ok(Field::Mapq), 83 | "qname_len" => Ok(Field::QnameLen), 84 | "flag" => Ok(Field::Flag), 85 | "n_cigar" => Ok(Field::NCigar), 86 | "seq_len" => Ok(Field::SeqLen), 87 | "mtid" => Ok(Field::Mtid), 88 | "mpos" => Ok(Field::Mpos), 89 | "isize" => Ok(Field::Isize), 90 | "data" => Ok(Field::Data), 91 | _ => Err(de::Error::unknown_field(value, FIELDS)), 92 | } 93 | } 94 | } 95 | 96 | deserializer.deserialize_identifier(FieldVisitor) 97 | } 98 | } 99 | 100 | struct RecordVisitor; 101 | 102 | impl<'de> Visitor<'de> for RecordVisitor { 103 | type Value = Record; 104 | 105 | fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { 106 | formatter.write_str("struct Record") 107 | } 108 | 109 | fn visit_seq(self, mut seq: V) -> Result 110 | where 111 | V: SeqAccess<'de>, 112 | { 113 | let tid = seq 114 | .next_element()? 115 | .ok_or_else(|| de::Error::invalid_length(0, &self))?; 116 | let pos = seq 117 | .next_element()? 118 | .ok_or_else(|| de::Error::invalid_length(0, &self))?; 119 | let bin = seq 120 | .next_element()? 121 | .ok_or_else(|| de::Error::invalid_length(0, &self))?; 122 | let mapq = seq 123 | .next_element()? 124 | .ok_or_else(|| de::Error::invalid_length(0, &self))?; 125 | let qname_len = seq 126 | .next_element()? 127 | .ok_or_else(|| de::Error::invalid_length(0, &self))?; 128 | let flag = seq 129 | .next_element()? 130 | .ok_or_else(|| de::Error::invalid_length(0, &self))?; 131 | let n_cigar = seq 132 | .next_element()? 133 | .ok_or_else(|| de::Error::invalid_length(0, &self))?; 134 | let seq_len = seq 135 | .next_element()? 136 | .ok_or_else(|| de::Error::invalid_length(0, &self))?; 137 | let mtid = seq 138 | .next_element()? 139 | .ok_or_else(|| de::Error::invalid_length(0, &self))?; 140 | let mpos = seq 141 | .next_element()? 142 | .ok_or_else(|| de::Error::invalid_length(0, &self))?; 143 | let isize_ = seq 144 | .next_element()? 145 | .ok_or_else(|| de::Error::invalid_length(0, &self))?; 146 | let data = seq 147 | .next_element::()? 148 | .ok_or_else(|| de::Error::invalid_length(0, &self))? 149 | .into_vec(); 150 | 151 | let mut rec = Record::new(); 152 | { 153 | let m = &mut rec.inner_mut().core; 154 | m.tid = tid; 155 | m.pos = pos; 156 | m.bin = bin; 157 | m.qual = mapq; 158 | m.l_qname = qname_len; 159 | m.flag = flag; 160 | m.n_cigar = n_cigar; 161 | m.l_qseq = seq_len; 162 | m.mtid = mtid; 163 | m.mpos = mpos; 164 | m.isize_ = isize_; 165 | } 166 | 167 | rec.set_data(&data); 168 | fix_l_extranul(&mut rec); 169 | Ok(rec) 170 | } 171 | 172 | fn visit_map(self, mut map: V) -> Result 173 | where 174 | V: MapAccess<'de>, 175 | { 176 | let mut tid = None; 177 | let mut pos = None; 178 | let mut bin = None; 179 | let mut mapq = None; 180 | let mut qname_len = None; 181 | let mut flag = None; 182 | let mut n_cigar = None; 183 | let mut seq_len = None; 184 | let mut mtid = None; 185 | let mut mpos = None; 186 | let mut isize = None; 187 | let mut data: Option = None; 188 | 189 | while let Some(key) = map.next_key()? { 190 | match key { 191 | Field::Tid => { 192 | if tid.is_some() { 193 | return Err(de::Error::duplicate_field("tid")); 194 | } 195 | tid = Some(map.next_value()?); 196 | } 197 | Field::Pos => { 198 | if pos.is_some() { 199 | return Err(de::Error::duplicate_field("pos")); 200 | } 201 | pos = Some(map.next_value()?); 202 | } 203 | Field::Bin => { 204 | if bin.is_some() { 205 | return Err(de::Error::duplicate_field("bin")); 206 | } 207 | bin = Some(map.next_value()?); 208 | } 209 | Field::Mapq => { 210 | if mapq.is_some() { 211 | return Err(de::Error::duplicate_field("mapq")); 212 | } 213 | mapq = Some(map.next_value()?); 214 | } 215 | Field::QnameLen => { 216 | if qname_len.is_some() { 217 | return Err(de::Error::duplicate_field("qname_len")); 218 | } 219 | qname_len = Some(map.next_value()?); 220 | } 221 | Field::Flag => { 222 | if flag.is_some() { 223 | return Err(de::Error::duplicate_field("flag")); 224 | } 225 | flag = Some(map.next_value()?); 226 | } 227 | Field::NCigar => { 228 | if n_cigar.is_some() { 229 | return Err(de::Error::duplicate_field("n_cigar")); 230 | } 231 | n_cigar = Some(map.next_value()?); 232 | } 233 | Field::SeqLen => { 234 | if seq_len.is_some() { 235 | return Err(de::Error::duplicate_field("seq_len")); 236 | } 237 | seq_len = Some(map.next_value()?); 238 | } 239 | Field::Mtid => { 240 | if mtid.is_some() { 241 | return Err(de::Error::duplicate_field("mtid")); 242 | } 243 | mtid = Some(map.next_value()?); 244 | } 245 | Field::Mpos => { 246 | if mpos.is_some() { 247 | return Err(de::Error::duplicate_field("mpos")); 248 | } 249 | mpos = Some(map.next_value()?); 250 | } 251 | Field::Isize => { 252 | if isize.is_some() { 253 | return Err(de::Error::duplicate_field("isize")); 254 | } 255 | isize = Some(map.next_value()?); 256 | } 257 | Field::Data => { 258 | if data.is_some() { 259 | return Err(de::Error::duplicate_field("data")); 260 | } 261 | data = Some(map.next_value()?); 262 | } 263 | } 264 | } 265 | 266 | let tid = tid.ok_or_else(|| de::Error::missing_field("tid"))?; 267 | let pos = pos.ok_or_else(|| de::Error::missing_field("pos"))?; 268 | let bin = bin.ok_or_else(|| de::Error::missing_field("bin"))?; 269 | let mapq = mapq.ok_or_else(|| de::Error::missing_field("mapq"))?; 270 | let qname_len = qname_len.ok_or_else(|| de::Error::missing_field("qname_len"))?; 271 | let flag = flag.ok_or_else(|| de::Error::missing_field("flag"))?; 272 | let n_cigar = n_cigar.ok_or_else(|| de::Error::missing_field("n_cigar"))?; 273 | let seq_len = seq_len.ok_or_else(|| de::Error::missing_field("seq_len"))?; 274 | let mtid = mtid.ok_or_else(|| de::Error::missing_field("mtid"))?; 275 | let mpos = mpos.ok_or_else(|| de::Error::missing_field("mpos"))?; 276 | let isize_ = isize.ok_or_else(|| de::Error::missing_field("isize"))?; 277 | let data = data 278 | .ok_or_else(|| de::Error::missing_field("data"))? 279 | .into_vec(); 280 | 281 | let mut rec = Record::new(); 282 | { 283 | let m = &mut rec.inner_mut().core; 284 | m.tid = tid; 285 | m.pos = pos; 286 | m.bin = bin; 287 | m.qual = mapq; 288 | m.l_qname = qname_len; 289 | m.flag = flag; 290 | m.n_cigar = n_cigar; 291 | m.l_qseq = seq_len; 292 | m.mtid = mtid; 293 | m.mpos = mpos; 294 | m.isize_ = isize_; 295 | } 296 | 297 | rec.set_data(&data); 298 | fix_l_extranul(&mut rec); 299 | Ok(rec) 300 | } 301 | } 302 | 303 | const FIELDS: &[&str] = &[ 304 | "tid", "pos", "bin", "qual", "l_qname", "flag", "n_cigar", "seq_len", "mtid", "mpos", 305 | "isize", "data", 306 | ]; 307 | deserializer.deserialize_struct("Record", FIELDS, RecordVisitor) 308 | } 309 | } 310 | 311 | #[cfg(test)] 312 | mod tests { 313 | use crate::bam::record::Record; 314 | use crate::bam::Read; 315 | use crate::bam::Reader; 316 | 317 | use std::path::Path; 318 | 319 | use bincode::{deserialize, serialize}; 320 | use serde_json; 321 | 322 | #[test] 323 | fn test_bincode() { 324 | let mut bam = Reader::from_path(Path::new("test/test.bam")).expect("Error opening file."); 325 | 326 | let mut recs = Vec::new(); 327 | for record in bam.records() { 328 | recs.push(record.unwrap()); 329 | } 330 | 331 | let encoded: Vec = serialize(&recs).unwrap(); 332 | let decoded: Vec = deserialize(&encoded[..]).unwrap(); 333 | assert_eq!(recs, decoded); 334 | } 335 | 336 | #[test] 337 | fn test_serde_json() { 338 | let mut bam = Reader::from_path(Path::new("test/test.bam")).expect("Error opening file."); 339 | 340 | let mut recs = Vec::new(); 341 | for record in bam.records() { 342 | recs.push(record.unwrap()); 343 | } 344 | 345 | let encoded: String = serde_json::to_string(&recs).unwrap(); 346 | println!("encoded: {}", encoded); 347 | let decoded: Vec = serde_json::from_str(&encoded).unwrap(); 348 | assert_eq!(recs, decoded); 349 | } 350 | } 351 | -------------------------------------------------------------------------------- /src/tbx/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Manuel Holtgrewe, Berlin Institute of Health. 2 | // Licensed under the MIT license (http://opensource.org/licenses/MIT) 3 | // This file may not be copied, modified, or distributed 4 | // except according to those terms. 5 | 6 | //! Module for working with tabix-indexed text files. 7 | //! 8 | //! This module allows to read tabix-indexed text files (such as BED) in a convenient but in a 9 | //! line-based (and thus format-agnostic way). For accessing tabix-inxed VCF files, using the 10 | //! `bcf` module is probably a better choice as this module gives you lines from the text files 11 | //! which you then have to take care of parsing. 12 | //! 13 | //! In general, for reading tabix-indexed files, first to open the file by creating a `tbx::Reader` 14 | //! objects, possibly translate the chromosome name to its numeric ID in the file, fetch the region 15 | //! of interest using `fetch()`, and finally iterate over the records using `records()`. 16 | //! 17 | //! # Examples 18 | //! 19 | //! ```rust,no_run 20 | //! use rust_htslib::tbx::{self, Read}; 21 | //! 22 | //! // Create a tabix reader for reading a tabix-indexed BED file. 23 | //! let path_bed = "file.bed.gz"; 24 | //! let mut tbx_reader = tbx::Reader::from_path(&path_bed) 25 | //! .expect(&format!("Could not open {}", path_bed)); 26 | //! 27 | //! // Resolve chromosome name to numeric ID. 28 | //! let tid = match tbx_reader.tid("chr1") { 29 | //! Ok(tid) => tid, 30 | //! Err(_) => panic!("Could not resolve 'chr1' to contig ID"), 31 | //! }; 32 | //! 33 | //! // Set region to fetch. 34 | //! tbx_reader 35 | //! .fetch(tid, 0, 100_000) 36 | //! .expect("Could not seek to chr1:1-100,000"); 37 | //! 38 | //! // Read through all records in region. 39 | //! for record in tbx_reader.records() { 40 | //! // ... actually do some work 41 | //! } 42 | //! ``` 43 | 44 | use std::ffi; 45 | use std::path::Path; 46 | use std::ptr; 47 | use url::Url; 48 | 49 | use crate::errors::{Error, Result}; 50 | use crate::htslib; 51 | use crate::utils::path_as_bytes; 52 | 53 | /// A trait for a Tabix reader with a read method. 54 | pub trait Read: Sized { 55 | /// Read next line into the given `Vec` (i.e., ASCII string). 56 | /// 57 | /// Use this method in combination with a single allocated record to avoid the reallocations 58 | /// occurring with the iterator. 59 | /// 60 | /// # Arguments 61 | /// 62 | /// * `record` - the `Vec` to be filled 63 | /// 64 | /// # Returns 65 | /// Ok(true) if record was read, Ok(false) if no more record in file 66 | fn read(&mut self, record: &mut Vec) -> Result; 67 | 68 | /// Iterator over the lines/records of the seeked region. 69 | /// 70 | /// Note that, while being convenient, this is less efficient than pre-allocating a 71 | /// `Vec` and reading into it with the `read()` method, since every iteration involves 72 | /// the allocation of a new `Vec`. 73 | fn records(&mut self) -> Records<'_, Self>; 74 | 75 | /// Return the text headers, split by line. 76 | fn header(&self) -> &Vec; 77 | } 78 | 79 | /// A Tabix file reader. 80 | /// 81 | /// This struct and its associated functions are meant for reading plain-text tabix indexed 82 | /// by `tabix`. 83 | /// 84 | /// Note that the `tabix` command from `htslib` can actually several more things, including 85 | /// building indices and converting BCF to VCF text output. Both is out of scope here. 86 | #[derive(Debug)] 87 | pub struct Reader { 88 | /// The header lines (if any). 89 | header: Vec, 90 | 91 | /// The file to read from. 92 | hts_file: *mut htslib::htsFile, 93 | /// The file format information. 94 | hts_format: htslib::htsExactFormat, 95 | /// The tbx_t structure to read from. 96 | tbx: *mut htslib::tbx_t, 97 | /// The current buffer. 98 | buf: htslib::kstring_t, 99 | /// Iterator over the buffer. 100 | itr: Option<*mut htslib::hts_itr_t>, 101 | 102 | /// The currently fetch region's tid. 103 | tid: i64, 104 | /// The currently fetch region's 0-based begin pos. 105 | start: i64, 106 | /// The currently fetch region's 0-based end pos. 107 | end: i64, 108 | } 109 | 110 | unsafe impl Send for Reader {} 111 | 112 | /// Redefinition of `KS_SEP_LINE` from `htslib/kseq.h`. 113 | const KS_SEP_LINE: i32 = 2; 114 | 115 | impl Reader { 116 | /// Create a new Reader from path. 117 | /// 118 | /// # Arguments 119 | /// 120 | /// * `path` - the path to open. 121 | pub fn from_path>(path: P) -> Result { 122 | Self::new(&path_as_bytes(path, true)?) 123 | } 124 | 125 | pub fn from_url(url: &Url) -> Result { 126 | Self::new(url.as_str().as_bytes()) 127 | } 128 | 129 | /// Create a new Reader. 130 | /// 131 | /// # Arguments 132 | /// 133 | /// * `path` - the path. 134 | fn new(path: &[u8]) -> Result { 135 | let path = ffi::CString::new(path).unwrap(); 136 | let c_str = ffi::CString::new("r").unwrap(); 137 | let hts_file = unsafe { htslib::hts_open(path.as_ptr(), c_str.as_ptr()) }; 138 | let hts_format: u32 = unsafe { 139 | let file_format: *const hts_sys::htsFormat = htslib::hts_get_format(hts_file); 140 | (*file_format).format 141 | }; 142 | 143 | let tbx = unsafe { htslib::tbx_index_load(path.as_ptr()) }; 144 | if tbx.is_null() { 145 | return Err(Error::TabixInvalidIndex); 146 | } 147 | let mut header = Vec::new(); 148 | let mut buf = htslib::kstring_t { 149 | l: 0, 150 | m: 0, 151 | s: ptr::null_mut(), 152 | }; 153 | unsafe { 154 | while htslib::hts_getline(hts_file, KS_SEP_LINE, &mut buf) >= 0 { 155 | if buf.l > 0 && i32::from(*buf.s) == (*tbx).conf.meta_char { 156 | header.push(String::from(ffi::CStr::from_ptr(buf.s).to_str().unwrap())); 157 | } else { 158 | break; 159 | } 160 | } 161 | } 162 | 163 | Ok(Reader { 164 | header, 165 | hts_file, 166 | hts_format, 167 | tbx, 168 | buf, 169 | itr: None, 170 | tid: -1, 171 | start: -1, 172 | end: -1, 173 | }) 174 | } 175 | 176 | /// Get sequence/target ID from sequence name. 177 | pub fn tid(&self, name: &str) -> Result { 178 | let name_cstr = ffi::CString::new(name.as_bytes()).unwrap(); 179 | let res = unsafe { htslib::tbx_name2id(self.tbx, name_cstr.as_ptr()) }; 180 | if res < 0 { 181 | Err(Error::UnknownSequence { 182 | sequence: name.to_owned(), 183 | }) 184 | } else { 185 | Ok(res as u64) 186 | } 187 | } 188 | 189 | /// Fetch region given by numeric sequence number and 0-based begin and end position. 190 | pub fn fetch(&mut self, tid: u64, start: u64, end: u64) -> Result<()> { 191 | self.tid = tid as i64; 192 | self.start = start as i64; 193 | self.end = end as i64; 194 | 195 | if let Some(itr) = self.itr { 196 | unsafe { 197 | htslib::hts_itr_destroy(itr); 198 | } 199 | } 200 | let itr = unsafe { 201 | htslib::hts_itr_query( 202 | (*self.tbx).idx, 203 | tid as i32, 204 | start as i64, 205 | end as i64, 206 | Some(htslib::tbx_readrec), 207 | ) 208 | }; 209 | if itr.is_null() { 210 | self.itr = None; 211 | Err(Error::Fetch) 212 | } else { 213 | self.itr = Some(itr); 214 | Ok(()) 215 | } 216 | } 217 | 218 | /// Return the sequence contig names. 219 | pub fn seqnames(&self) -> Vec { 220 | let mut result = Vec::new(); 221 | 222 | let mut nseq: i32 = 0; 223 | let seqs = unsafe { htslib::tbx_seqnames(self.tbx, &mut nseq) }; 224 | for i in 0..nseq { 225 | unsafe { 226 | result.push(String::from( 227 | ffi::CStr::from_ptr(*seqs.offset(i as isize)) 228 | .to_str() 229 | .unwrap(), 230 | )); 231 | } 232 | } 233 | unsafe { 234 | libc::free(seqs as *mut libc::c_void); 235 | }; 236 | 237 | result 238 | } 239 | 240 | /// Activate multi-threaded BGZF read support in htslib. This should permit faster 241 | /// reading of large BGZF files. 242 | /// 243 | /// # Arguments 244 | /// 245 | /// * `n_threads` - number of extra background reader threads to use 246 | pub fn set_threads(&mut self, n_threads: usize) -> Result<()> { 247 | assert!(n_threads > 0, "n_threads must be > 0"); 248 | 249 | let r = unsafe { htslib::hts_set_threads(self.hts_file, n_threads as i32) }; 250 | if r != 0 { 251 | Err(Error::SetThreads) 252 | } else { 253 | Ok(()) 254 | } 255 | } 256 | 257 | pub fn hts_format(&self) -> htslib::htsExactFormat { 258 | self.hts_format 259 | } 260 | } 261 | 262 | /// Return whether the two given genomic intervals overlap. 263 | fn overlap(tid1: i64, begin1: i64, end1: i64, tid2: i64, begin2: i64, end2: i64) -> bool { 264 | (tid1 == tid2) && (begin1 < end2) && (begin2 < end1) 265 | } 266 | 267 | impl Read for Reader { 268 | fn read(&mut self, record: &mut Vec) -> Result { 269 | match self.itr { 270 | Some(itr) => { 271 | loop { 272 | // Try to read next line. 273 | let ret = unsafe { 274 | htslib::hts_itr_next( 275 | htslib::hts_get_bgzfp(self.hts_file), 276 | itr, 277 | //mem::transmute(&mut self.buf), 278 | &mut self.buf as *mut htslib::kstring_t as *mut libc::c_void, 279 | //mem::transmute(self.tbx), 280 | self.tbx as *mut libc::c_void, 281 | ) 282 | }; 283 | // Handle errors first. 284 | if ret == -1 { 285 | return Ok(false); 286 | } else if ret == -2 { 287 | return Err(Error::TabixTruncatedRecord); 288 | } else if ret < 0 { 289 | panic!("Return value should not be <0 but was: {}", ret); 290 | } 291 | // Return first overlapping record (loop will stop when `hts_itr_next(...)` 292 | // returns `< 0`). 293 | let (tid, start, end) = 294 | unsafe { ((*itr).curr_tid, (*itr).curr_beg, (*itr).curr_end) }; 295 | // XXX: Careful with this tid conversion!!! 296 | if overlap(self.tid, self.start, self.end, tid as i64, start, end) { 297 | *record = 298 | unsafe { Vec::from(ffi::CStr::from_ptr(self.buf.s).to_str().unwrap()) }; 299 | return Ok(true); 300 | } 301 | } 302 | } 303 | _ => Err(Error::TabixNoIter), 304 | } 305 | } 306 | 307 | fn records(&mut self) -> Records<'_, Self> { 308 | Records { reader: self } 309 | } 310 | 311 | fn header(&self) -> &Vec { 312 | &self.header 313 | } 314 | } 315 | 316 | impl Drop for Reader { 317 | fn drop(&mut self) { 318 | unsafe { 319 | if self.itr.is_some() { 320 | htslib::hts_itr_destroy(self.itr.unwrap()); 321 | } 322 | htslib::tbx_destroy(self.tbx); 323 | htslib::hts_close(self.hts_file); 324 | } 325 | } 326 | } 327 | 328 | /// Iterator over the lines of a tabix file. 329 | #[derive(Debug)] 330 | pub struct Records<'a, R: Read> { 331 | reader: &'a mut R, 332 | } 333 | 334 | impl Iterator for Records<'_, R> { 335 | type Item = Result>; 336 | 337 | #[allow(clippy::read_zero_byte_vec)] 338 | fn next(&mut self) -> Option>> { 339 | let mut record = Vec::new(); 340 | match self.reader.read(&mut record) { 341 | Ok(false) => None, 342 | Ok(true) => Some(Ok(record)), 343 | Err(err) => Some(Err(err)), 344 | } 345 | } 346 | } 347 | 348 | #[cfg(test)] 349 | mod tests { 350 | use super::*; 351 | 352 | #[test] 353 | fn bed_basic() { 354 | let reader = 355 | Reader::from_path("test/tabix_reader/test_bed3.bed.gz").expect("Error opening file."); 356 | 357 | // Check sequence name vector. 358 | assert_eq!( 359 | reader.seqnames(), 360 | vec![String::from("chr1"), String::from("chr2")] 361 | ); 362 | 363 | // Check mapping between name and idx. 364 | assert_eq!(reader.tid("chr1").unwrap(), 0); 365 | assert_eq!(reader.tid("chr2").unwrap(), 1); 366 | assert!(reader.tid("chr3").is_err()); 367 | } 368 | 369 | #[test] 370 | fn bed_fetch_from_chr1_read_api() { 371 | let mut reader = 372 | Reader::from_path("test/tabix_reader/test_bed3.bed.gz").expect("Error opening file."); 373 | 374 | let chr1_id = reader.tid("chr1").unwrap(); 375 | assert!(reader.fetch(chr1_id, 1000, 1003).is_ok()); 376 | 377 | let mut record = Vec::new(); 378 | assert!(reader.read(&mut record).is_ok()); 379 | assert_eq!(record, Vec::from("chr1\t1001\t1002")); 380 | assert_eq!(reader.read(&mut record), Ok(false)); // EOF 381 | } 382 | 383 | #[test] 384 | fn bed_fetch_from_chr1_iterator_api() { 385 | let mut reader = 386 | Reader::from_path("test/tabix_reader/test_bed3.bed.gz").expect("Error opening file."); 387 | 388 | let chr1_id = reader.tid("chr1").unwrap(); 389 | assert!(reader.fetch(chr1_id, 1000, 1003).is_ok()); 390 | 391 | let records: Vec> = reader.records().map(|r| r.unwrap()).collect(); 392 | assert_eq!(records, vec![Vec::from("chr1\t1001\t1002")]); 393 | } 394 | 395 | #[test] 396 | fn test_fails_on_bam() { 397 | let reader = Reader::from_path("test/test.bam"); 398 | assert!(reader.is_err()); 399 | } 400 | 401 | #[test] 402 | fn test_fails_on_non_existiant() { 403 | let reader = Reader::from_path("test/no_such_file"); 404 | assert!(reader.is_err()); 405 | } 406 | 407 | #[test] 408 | fn test_fails_on_vcf() { 409 | let reader = Reader::from_path("test/test_left.vcf"); 410 | assert!(reader.is_err()); 411 | } 412 | 413 | #[test] 414 | fn test_text_header_regions() { 415 | // This file has chromosome, start, and end positions with a header line. 416 | Reader::from_path("test/tabix_reader/genomic_regions_header.txt.gz") 417 | .expect("Error opening file."); 418 | } 419 | 420 | #[test] 421 | fn test_text_header_positions() { 422 | // This file has chromosome and position with a header line, indexed with 423 | // `tabix -b2 -e2 `. 424 | Reader::from_path("test/tabix_reader/genomic_positions_header.txt.gz") 425 | .expect("Error opening file."); 426 | } 427 | 428 | #[test] 429 | fn test_text_bad_header() { 430 | // This is a duplicate of the above file but the index file is nonsense text. 431 | Reader::from_path("test/tabix_reader/bad_header.txt.gz") 432 | .expect_err("Invalid index file should fail."); 433 | } 434 | } 435 | -------------------------------------------------------------------------------- /src/bcf/header.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Johannes Köster. 2 | // Licensed under the MIT license (http://opensource.org/licenses/MIT) 3 | // This file may not be copied, modified, or distributed 4 | // except according to those terms. 5 | //! Module for working with VCF or BCF headers. 6 | //! 7 | //! # Examples 8 | //! From the header of a VCF file we can 9 | //! - Output sample count of a VCF file 10 | //! - Output sample names of a VCF file 11 | //! - Output sample index given a sample name of a VCF file. 12 | //! ``` 13 | //! use crate::rust_htslib::bcf::{Reader, Read}; 14 | //! use std::io::Read as IoRead; 15 | //! 16 | //! let path = &"test/test_string.vcf"; 17 | //! let mut bcf = Reader::from_path(path).expect("Error opening file."); 18 | //! let header = bcf.header(); 19 | //! assert_eq!(header.sample_count(), 2); // Sample count 20 | //! let mut s = String::new(); 21 | //! for (i, mut x) in header.samples().into_iter().enumerate() { 22 | //! x.read_to_string(&mut s); // Read sample name in to `s` 23 | //! println!("{}", s); // output sample name 24 | //! } 25 | //! assert_eq!(header.sample_id(b"one").unwrap(), 0); // Sample index wrapped in Option 26 | //! assert_eq!(header.sample_id(b"two").unwrap(), 1); // Sample index wrapped in Option 27 | //! assert!(header.sample_id(b"non existent sample").is_none()); // Return none if not found 28 | //! 29 | //! assert_eq!(header.contig_count(), 1); // Number of contig in header. 30 | //! // obtain the data type of an INFO field 31 | //! let (tag_type, tag_length) = header.info_type(b"S1").unwrap(); 32 | //! let (fmt_type, fmt_length) = header.format_type(b"GT").unwrap(); 33 | //! ``` 34 | 35 | use std::ffi; 36 | use std::os::raw::c_char; 37 | use std::slice; 38 | use std::str; 39 | use std::sync::Arc; 40 | 41 | use crate::htslib; 42 | 43 | use linear_map::LinearMap; 44 | 45 | use crate::errors::{Error, Result}; 46 | 47 | pub type SampleSubset = Vec; 48 | 49 | custom_derive! { 50 | /// A newtype for IDs from BCF headers. 51 | #[derive( 52 | NewtypeFrom, 53 | NewtypeDeref, 54 | PartialEq, 55 | PartialOrd, 56 | Eq, 57 | Ord, 58 | Copy, 59 | Clone, 60 | Debug 61 | )] 62 | pub struct Id(pub u32); 63 | } 64 | 65 | /// A BCF header. 66 | #[derive(Debug)] 67 | pub struct Header { 68 | pub(crate) inner: *mut htslib::bcf_hdr_t, 69 | pub subset: Option, 70 | } 71 | 72 | unsafe impl Send for Header {} 73 | unsafe impl Sync for Header {} 74 | 75 | impl Default for Header { 76 | fn default() -> Self { 77 | Self::new() 78 | } 79 | } 80 | 81 | impl Header { 82 | /// Create a new (empty) `Header`. 83 | pub fn new() -> Self { 84 | let c_str = ffi::CString::new(&b"w"[..]).unwrap(); 85 | Header { 86 | inner: unsafe { htslib::bcf_hdr_init(c_str.as_ptr()) }, 87 | subset: None, 88 | } 89 | } 90 | 91 | /// Create a new `Header` using the given `HeaderView` as the template. 92 | /// 93 | /// After construction, you can modify the header independently from the template `header`. 94 | /// 95 | /// # Arguments 96 | /// 97 | /// - `header` - The `HeaderView` to use as the template. 98 | pub fn from_template(header: &HeaderView) -> Self { 99 | Header { 100 | inner: unsafe { htslib::bcf_hdr_dup(header.inner) }, 101 | subset: None, 102 | } 103 | } 104 | 105 | /// Create a new `Header` using the given `HeaderView` as as template, but subsetting to the 106 | /// given `samples`. 107 | /// 108 | /// # Arguments 109 | /// 110 | /// - `header` - The `HeaderView` to use for the template. 111 | /// - `samples` - A slice of byte-encoded (`[u8]`) sample names. 112 | pub fn from_template_subset(header: &HeaderView, samples: &[&[u8]]) -> Result { 113 | let mut imap = vec![0; samples.len()]; 114 | let names: Vec<_> = samples 115 | .iter() 116 | .map(|&s| ffi::CString::new(s).unwrap()) 117 | .collect(); 118 | let name_pointers: Vec<_> = names.iter().map(|s| s.as_ptr() as *mut i8).collect(); 119 | #[allow(clippy::unnecessary_cast)] 120 | let name_pointers_ptr = name_pointers.as_ptr() as *const *mut c_char; 121 | let inner = unsafe { 122 | htslib::bcf_hdr_subset( 123 | header.inner, 124 | samples.len() as i32, 125 | name_pointers_ptr, 126 | imap.as_mut_ptr(), 127 | ) 128 | }; 129 | if inner.is_null() { 130 | Err(Error::BcfDuplicateSampleNames) 131 | } else { 132 | Ok(Header { 133 | inner, 134 | subset: Some(imap), 135 | }) 136 | } 137 | } 138 | 139 | /// Add a `sample` to the header. 140 | /// 141 | /// # Arguments 142 | /// 143 | /// - `sample` - Name of the sample to add (to the end of the sample list). 144 | pub fn push_sample(&mut self, sample: &[u8]) -> &mut Self { 145 | let c_str = ffi::CString::new(sample).unwrap(); 146 | unsafe { htslib::bcf_hdr_add_sample(self.inner, c_str.as_ptr()) }; 147 | self 148 | } 149 | 150 | /// Add a record to the header. 151 | /// 152 | /// # Arguments 153 | /// 154 | /// - `record` - String representation of the header line 155 | /// 156 | /// # Example 157 | /// 158 | /// ```rust,ignore 159 | /// header.push_record(format!("##contig=", "chrX", 155270560).as_bytes()); 160 | /// ``` 161 | pub fn push_record(&mut self, record: &[u8]) -> &mut Self { 162 | let c_str = ffi::CString::new(record).unwrap(); 163 | unsafe { htslib::bcf_hdr_append(self.inner, c_str.as_ptr()) }; 164 | self 165 | } 166 | 167 | /// Remove a `FILTER` entry from the header. 168 | /// 169 | /// # Arguments 170 | /// 171 | /// - `tag` - Name of the `FLT` tag to remove. 172 | pub fn remove_filter(&mut self, tag: &[u8]) -> &mut Self { 173 | self.remove_impl(tag, htslib::BCF_HL_FLT) 174 | } 175 | 176 | /// Remove an `INFO` entry from the header. 177 | /// 178 | /// # Arguments 179 | /// 180 | /// - `tag` - Name of the `INFO` tag to remove. 181 | pub fn remove_info(&mut self, tag: &[u8]) -> &mut Self { 182 | self.remove_impl(tag, htslib::BCF_HL_INFO) 183 | } 184 | 185 | /// Remove a `FORMAT` entry from the header. 186 | /// 187 | /// # Arguments 188 | /// 189 | /// - `tag` - Name of the `FORMAT` tag to remove. 190 | pub fn remove_format(&mut self, tag: &[u8]) -> &mut Self { 191 | self.remove_impl(tag, htslib::BCF_HL_FMT) 192 | } 193 | 194 | /// Remove a contig entry from the header. 195 | /// 196 | /// # Arguments 197 | /// 198 | /// - `tag` - Name of the `FORMAT` tag to remove. 199 | pub fn remove_contig(&mut self, tag: &[u8]) -> &mut Self { 200 | self.remove_impl(tag, htslib::BCF_HL_CTG) 201 | } 202 | 203 | /// Remove a structured entry from the header. 204 | /// 205 | /// # Arguments 206 | /// 207 | /// - `tag` - Name of the structured tag to remove. 208 | pub fn remove_structured(&mut self, tag: &[u8]) -> &mut Self { 209 | self.remove_impl(tag, htslib::BCF_HL_STR) 210 | } 211 | 212 | /// Remove a generic entry from the header. 213 | /// 214 | /// # Arguments 215 | /// 216 | /// - `tag` - Name of the generic tag to remove. 217 | pub fn remove_generic(&mut self, tag: &[u8]) -> &mut Self { 218 | self.remove_impl(tag, htslib::BCF_HL_GEN) 219 | } 220 | 221 | /// Implementation of removing header tags. 222 | fn remove_impl(&mut self, tag: &[u8], type_: u32) -> &mut Self { 223 | unsafe { 224 | let v = tag.to_vec(); 225 | let c_str = ffi::CString::new(v).unwrap(); 226 | htslib::bcf_hdr_remove(self.inner, type_ as i32, c_str.as_ptr()); 227 | } 228 | self 229 | } 230 | } 231 | 232 | impl Drop for Header { 233 | fn drop(&mut self) { 234 | unsafe { htslib::bcf_hdr_destroy(self.inner) }; 235 | } 236 | } 237 | 238 | /// A header record. 239 | #[derive(Debug)] 240 | pub enum HeaderRecord { 241 | /// A `FILTER` header record. 242 | Filter { 243 | key: String, 244 | values: LinearMap, 245 | }, 246 | /// An `INFO` header record. 247 | Info { 248 | key: String, 249 | values: LinearMap, 250 | }, 251 | /// A `FORMAT` header record. 252 | Format { 253 | key: String, 254 | values: LinearMap, 255 | }, 256 | /// A `contig` header record. 257 | Contig { 258 | key: String, 259 | values: LinearMap, 260 | }, 261 | /// A structured header record. 262 | Structured { 263 | key: String, 264 | values: LinearMap, 265 | }, 266 | /// A generic, unstructured header record. 267 | Generic { key: String, value: String }, 268 | } 269 | 270 | #[derive(Debug)] 271 | pub struct HeaderView { 272 | pub(crate) inner: *mut htslib::bcf_hdr_t, 273 | } 274 | 275 | unsafe impl Send for HeaderView {} 276 | unsafe impl Sync for HeaderView {} 277 | 278 | impl HeaderView { 279 | pub(crate) fn new(inner: *mut htslib::bcf_hdr_t) -> Self { 280 | HeaderView { inner } 281 | } 282 | 283 | #[inline] 284 | fn inner(&self) -> htslib::bcf_hdr_t { 285 | unsafe { *self.inner } 286 | } 287 | 288 | /// Get the number of samples defined in the header. 289 | pub fn sample_count(&self) -> u32 { 290 | self.inner().n[htslib::BCF_DT_SAMPLE as usize] as u32 291 | } 292 | 293 | /// Get vector of sample names defined in the header. 294 | pub fn samples(&self) -> Vec<&[u8]> { 295 | let names = 296 | unsafe { slice::from_raw_parts(self.inner().samples, self.sample_count() as usize) }; 297 | names 298 | .iter() 299 | .map(|name| unsafe { ffi::CStr::from_ptr(*name).to_bytes() }) 300 | .collect() 301 | } 302 | 303 | /// Obtain id (column index) of given sample. 304 | /// Returns `None` if sample is not present in header. 305 | pub fn sample_id(&self, sample: &[u8]) -> Option { 306 | self.samples().iter().position(|s| *s == sample) 307 | } 308 | 309 | /// Get the number of contigs defined in the header. 310 | pub fn contig_count(&self) -> u32 { 311 | self.inner().n[htslib::BCF_DT_CTG as usize] as u32 312 | } 313 | 314 | pub fn rid2name(&self, rid: u32) -> Result<&[u8]> { 315 | if rid <= self.contig_count() { 316 | unsafe { 317 | let dict = self.inner().id[htslib::BCF_DT_CTG as usize]; 318 | let ptr = (*dict.offset(rid as isize)).key; 319 | Ok(ffi::CStr::from_ptr(ptr).to_bytes()) 320 | } 321 | } else { 322 | Err(Error::BcfUnknownRID { rid }) 323 | } 324 | } 325 | 326 | /// Retrieve the (internal) chromosome identifier 327 | /// # Examples 328 | /// ```rust 329 | /// use rust_htslib::bcf::header::Header; 330 | /// use rust_htslib::bcf::{Format, Writer}; 331 | /// 332 | /// let mut header = Header::new(); 333 | /// let contig_field = br#"##contig="#; 334 | /// header.push_record(contig_field); 335 | /// let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap(); 336 | /// let header_view = vcf.header(); 337 | /// let rid = header_view.name2rid(b"foo").unwrap(); 338 | /// assert_eq!(rid, 0); 339 | /// // try and retrieve a contig not in the header 340 | /// let result = header_view.name2rid(b"bar"); 341 | /// assert!(result.is_err()) 342 | /// ``` 343 | /// # Errors 344 | /// If `name` does not match a chromosome currently in the VCF header, returns [`Error::BcfUnknownContig`] 345 | pub fn name2rid(&self, name: &[u8]) -> Result { 346 | let c_str = ffi::CString::new(name).unwrap(); 347 | unsafe { 348 | match htslib::bcf_hdr_id2int( 349 | self.inner, 350 | htslib::BCF_DT_CTG as i32, 351 | c_str.as_ptr() as *mut c_char, 352 | ) { 353 | -1 => Err(Error::BcfUnknownContig { 354 | contig: str::from_utf8(name).unwrap().to_owned(), 355 | }), 356 | i => Ok(i as u32), 357 | } 358 | } 359 | } 360 | 361 | pub fn info_type(&self, tag: &[u8]) -> Result<(TagType, TagLength)> { 362 | self.tag_type(tag, htslib::BCF_HL_INFO) 363 | } 364 | 365 | pub fn format_type(&self, tag: &[u8]) -> Result<(TagType, TagLength)> { 366 | self.tag_type(tag, htslib::BCF_HL_FMT) 367 | } 368 | 369 | fn tag_type(&self, tag: &[u8], hdr_type: ::libc::c_uint) -> Result<(TagType, TagLength)> { 370 | let tag_desc = || str::from_utf8(tag).unwrap().to_owned(); 371 | let c_str_tag = ffi::CString::new(tag).unwrap(); 372 | let (_type, length, num_values) = unsafe { 373 | let id = htslib::bcf_hdr_id2int( 374 | self.inner, 375 | htslib::BCF_DT_ID as i32, 376 | c_str_tag.as_ptr() as *mut c_char, 377 | ); 378 | if id < 0 { 379 | return Err(Error::BcfUndefinedTag { tag: tag_desc() }); 380 | } 381 | let n = (*self.inner).n[htslib::BCF_DT_ID as usize] as usize; 382 | let entry = slice::from_raw_parts((*self.inner).id[htslib::BCF_DT_ID as usize], n); 383 | let d = (*entry[id as usize].val).info[hdr_type as usize]; 384 | ((d >> 4) & 0xf, (d >> 8) & 0xf, d >> 12) 385 | }; 386 | let _type = match _type as ::libc::c_uint { 387 | htslib::BCF_HT_FLAG => TagType::Flag, 388 | htslib::BCF_HT_INT => TagType::Integer, 389 | htslib::BCF_HT_REAL => TagType::Float, 390 | htslib::BCF_HT_STR => TagType::String, 391 | _ => return Err(Error::BcfUnexpectedType { tag: tag_desc() }), 392 | }; 393 | let length = match length as ::libc::c_uint { 394 | // XXX: Hacky "as u32" cast. Trace back through unsafe{} towards BCF struct and rollback to proper type 395 | htslib::BCF_VL_FIXED => TagLength::Fixed(num_values as u32), 396 | htslib::BCF_VL_VAR => TagLength::Variable, 397 | htslib::BCF_VL_A => TagLength::AltAlleles, 398 | htslib::BCF_VL_R => TagLength::Alleles, 399 | htslib::BCF_VL_G => TagLength::Genotypes, 400 | _ => return Err(Error::BcfUnexpectedType { tag: tag_desc() }), 401 | }; 402 | 403 | Ok((_type, length)) 404 | } 405 | 406 | /// Convert string ID (e.g., for a `FILTER` value) to its numeric identifier. 407 | pub fn name_to_id(&self, id: &[u8]) -> Result { 408 | let c_str = ffi::CString::new(id).unwrap(); 409 | unsafe { 410 | match htslib::bcf_hdr_id2int( 411 | self.inner, 412 | htslib::BCF_DT_ID as i32, 413 | c_str.as_ptr() as *const c_char, 414 | ) { 415 | -1 => Err(Error::BcfUnknownID { 416 | id: str::from_utf8(id).unwrap().to_owned(), 417 | }), 418 | i => Ok(Id(i as u32)), 419 | } 420 | } 421 | } 422 | 423 | /// Convert integer representing an identifier (e.g., a `FILTER` value) to its string 424 | /// name.bam. 425 | pub fn id_to_name(&self, id: Id) -> Vec { 426 | let key = unsafe { 427 | ffi::CStr::from_ptr( 428 | (*(*self.inner).id[htslib::BCF_DT_ID as usize].offset(*id as isize)).key, 429 | ) 430 | }; 431 | key.to_bytes().to_vec() 432 | } 433 | 434 | /// Convert string sample name to its numeric identifier. 435 | pub fn sample_to_id(&self, id: &[u8]) -> Result { 436 | let c_str = ffi::CString::new(id).unwrap(); 437 | unsafe { 438 | match htslib::bcf_hdr_id2int( 439 | self.inner, 440 | htslib::BCF_DT_SAMPLE as i32, 441 | c_str.as_ptr() as *const c_char, 442 | ) { 443 | -1 => Err(Error::BcfUnknownSample { 444 | name: str::from_utf8(id).unwrap().to_owned(), 445 | }), 446 | i => Ok(Id(i as u32)), 447 | } 448 | } 449 | } 450 | 451 | /// Convert integer representing an contig to its name. 452 | pub fn id_to_sample(&self, id: Id) -> Vec { 453 | let key = unsafe { 454 | ffi::CStr::from_ptr( 455 | (*(*self.inner).id[htslib::BCF_DT_SAMPLE as usize].offset(*id as isize)).key, 456 | ) 457 | }; 458 | key.to_bytes().to_vec() 459 | } 460 | 461 | /// Return structured `HeaderRecord`s. 462 | pub fn header_records(&self) -> Vec { 463 | fn parse_kv(rec: &htslib::bcf_hrec_t) -> LinearMap { 464 | let mut result: LinearMap = LinearMap::new(); 465 | for i in 0_i32..(rec.nkeys) { 466 | let key = unsafe { 467 | ffi::CStr::from_ptr(*rec.keys.offset(i as isize)) 468 | .to_str() 469 | .unwrap() 470 | .to_string() 471 | }; 472 | let value = unsafe { 473 | ffi::CStr::from_ptr(*rec.vals.offset(i as isize)) 474 | .to_str() 475 | .unwrap() 476 | .to_string() 477 | }; 478 | result.insert(key, value); 479 | } 480 | result 481 | } 482 | 483 | let mut result: Vec = Vec::new(); 484 | for i in 0_i32..unsafe { (*self.inner).nhrec } { 485 | let rec = unsafe { &(**(*self.inner).hrec.offset(i as isize)) }; 486 | let key = unsafe { ffi::CStr::from_ptr(rec.key).to_str().unwrap().to_string() }; 487 | let record = match rec.type_ { 488 | 0 => HeaderRecord::Filter { 489 | key, 490 | values: parse_kv(rec), 491 | }, 492 | 1 => HeaderRecord::Info { 493 | key, 494 | values: parse_kv(rec), 495 | }, 496 | 2 => HeaderRecord::Format { 497 | key, 498 | values: parse_kv(rec), 499 | }, 500 | 3 => HeaderRecord::Contig { 501 | key, 502 | values: parse_kv(rec), 503 | }, 504 | 4 => HeaderRecord::Structured { 505 | key, 506 | values: parse_kv(rec), 507 | }, 508 | 5 => HeaderRecord::Generic { 509 | key, 510 | value: unsafe { ffi::CStr::from_ptr(rec.value).to_str().unwrap().to_string() }, 511 | }, 512 | _ => panic!("Unknown type: {}", rec.type_), 513 | }; 514 | result.push(record); 515 | } 516 | result 517 | } 518 | 519 | /// Create an empty record using this header view. 520 | /// 521 | /// The record can be reused multiple times. 522 | pub fn empty_record(self: &Arc) -> crate::bcf::Record { 523 | crate::bcf::Record::new(self.clone()) 524 | } 525 | } 526 | 527 | impl Clone for HeaderView { 528 | fn clone(&self) -> Self { 529 | HeaderView { 530 | inner: unsafe { htslib::bcf_hdr_dup(self.inner) }, 531 | } 532 | } 533 | } 534 | 535 | impl Drop for HeaderView { 536 | fn drop(&mut self) { 537 | unsafe { 538 | htslib::bcf_hdr_destroy(self.inner); 539 | } 540 | } 541 | } 542 | 543 | #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] 544 | pub enum TagType { 545 | Flag, 546 | Integer, 547 | Float, 548 | String, 549 | } 550 | 551 | #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] 552 | pub enum TagLength { 553 | Fixed(u32), 554 | AltAlleles, 555 | Alleles, 556 | Genotypes, 557 | Variable, 558 | } 559 | 560 | #[cfg(test)] 561 | mod tests { 562 | use crate::bcf::Reader; 563 | 564 | #[test] 565 | fn test_header_view_empty_record() { 566 | // Open a VCF file to get a HeaderView 567 | let vcf = Reader::from_path("test/test_string.vcf").expect("Error opening file"); 568 | let header_view = vcf.header.clone(); 569 | 570 | // Create an empty record from the HeaderView 571 | let record = header_view.empty_record(); 572 | eprintln!("{:?}", record.rid()); 573 | 574 | // Verify the record is properly initialized with default/empty values 575 | assert_eq!(record.rid(), Some(0)); // No chromosome/contig set 576 | assert_eq!(record.pos(), 0); // No position set 577 | assert_eq!(record.qual(), 0.0); // No quality score set 578 | } 579 | } 580 | -------------------------------------------------------------------------------- /src/bgzf/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Manuel Landesfeind, Evotec International GmbH 2 | // Licensed under the MIT license (http://opensource.org/licenses/MIT) 3 | // This file may not be copied, modified, or distributed 4 | // except according to those terms. 5 | 6 | //! 7 | //! Module for working with bgzipped file. 8 | //! 9 | 10 | use std::ffi; 11 | use std::path::Path; 12 | use url::Url; 13 | 14 | use crate::htslib; 15 | use crate::tpool::ThreadPool; 16 | 17 | use crate::errors::{Error, Result}; 18 | 19 | fn path_as_bytes<'a, P: 'a + AsRef>(path: P, must_exist: bool) -> Result> { 20 | if path.as_ref().exists() || !must_exist { 21 | Ok(path 22 | .as_ref() 23 | .to_str() 24 | .ok_or(Error::NonUnicodePath)? 25 | .as_bytes() 26 | .to_owned()) 27 | } else { 28 | Err(Error::FileNotFound { 29 | path: path.as_ref().to_owned(), 30 | }) 31 | } 32 | } 33 | 34 | /// Test if a file is a Bgzip compressed file 35 | /// 36 | /// # Arguments 37 | /// 38 | /// * `path` - the path to test. 39 | /// 40 | /// # Returns: 41 | /// Will return `Ok(true)` or `Ok(false)` if the file at `path` is BGZIP compressed. Will return an `Err` in 42 | /// cases where no testing is possible. 43 | pub fn is_bgzip>(path: P) -> Result { 44 | let byte_path = path_as_bytes(path, true)?; 45 | let cpath = ffi::CString::new(byte_path).unwrap(); 46 | let is_bgzf = unsafe { htslib::bgzf_is_bgzf(cpath.as_ptr()) == 1 }; 47 | Ok(is_bgzf) 48 | } 49 | 50 | /// A reader that transparently reads uncompressed, gzip, and bgzip files. 51 | #[derive(Debug)] 52 | pub struct Reader { 53 | inner: *mut htslib::BGZF, 54 | } 55 | 56 | impl Reader { 57 | /// Create a new Reader to read from stdin. 58 | pub fn from_stdin() -> Result { 59 | Self::new(b"-") 60 | } 61 | 62 | /// Create a new Reader from a path. 63 | /// 64 | /// # Arguments 65 | /// 66 | /// * `path` - the path to open. 67 | pub fn from_path>(path: P) -> Result { 68 | Self::new(&path_as_bytes(path, true)?) 69 | } 70 | 71 | /// Create a new Reader from an URL. 72 | /// 73 | /// # Arguments 74 | /// 75 | /// * `url` - the url to open 76 | pub fn from_url(url: &Url) -> Result { 77 | Self::new(url.as_str().as_bytes()) 78 | } 79 | 80 | /// Internal function to create a Reader from some sort of path (could be file path but also URL). 81 | /// The path or URL will be handled by the c-implementation transparently. 82 | /// 83 | /// # Arguments 84 | /// 85 | /// * `path` - the path or URL to open 86 | fn new(path: &[u8]) -> Result { 87 | let mode = ffi::CString::new("r").unwrap(); 88 | let cpath = ffi::CString::new(path).unwrap(); 89 | let inner = unsafe { htslib::bgzf_open(cpath.as_ptr(), mode.as_ptr()) }; 90 | if !inner.is_null() { 91 | Ok(Self { inner }) 92 | } else { 93 | Err(Error::FileOpen { 94 | path: String::from_utf8(path.to_vec()).unwrap(), 95 | }) 96 | } 97 | } 98 | 99 | /// Set the thread pool to use for parallel decompression. 100 | /// 101 | /// # Arguments 102 | /// 103 | /// * `tpool` - the thread-pool to use 104 | pub fn set_thread_pool(&mut self, tpool: &ThreadPool) -> Result<()> { 105 | let b = tpool.handle.borrow_mut(); 106 | let r = unsafe { 107 | htslib::bgzf_thread_pool(self.inner, b.inner.pool as *mut _, 0) // let htslib decide on the queue-size 108 | }; 109 | 110 | if r != 0 { 111 | Err(Error::ThreadPool) 112 | } else { 113 | Ok(()) 114 | } 115 | } 116 | } 117 | 118 | impl std::io::Read for Reader { 119 | fn read(&mut self, buf: &mut [u8]) -> std::io::Result { 120 | let nbytes = unsafe { 121 | htslib::bgzf_read(self.inner, buf.as_mut_ptr() as *mut libc::c_void, buf.len()) 122 | }; 123 | if nbytes < 0 { 124 | Err(std::io::Error::other("Can not read")) 125 | } else { 126 | Ok(nbytes as usize) 127 | } 128 | } 129 | } 130 | 131 | /// The CompressionLevel used by the underlying GZIP writer 132 | /// Note that the special level NoCompression will not use the GZIP writer. 133 | /// Compression levels in BGZF files 134 | /// 135 | /// * Uncompressed: No compression, zlib level 0 136 | /// * Fastest: Lowest compression level, zlib level 1 137 | /// * Maximum: Highest compression level, zlib level 9 138 | /// * Default: Default compression level, zlib level 6 139 | /// * Level(i): Custom compression level in the range [0, 9] 140 | /// * NoCompression: No compression, zlib not used. Output will be identical to input 141 | #[derive(Debug, Clone, Copy)] 142 | pub enum CompressionLevel { 143 | Default, 144 | NoCompression, 145 | Uncompressed, 146 | Fastest, 147 | Maximum, 148 | Level(i8), 149 | } 150 | impl CompressionLevel { 151 | // Convert and check the variants of the `CompressionLevel` enum to a numeric level 152 | fn convert(self) -> Result { 153 | match self { 154 | CompressionLevel::NoCompression => Ok(-2), 155 | CompressionLevel::Default => Ok(-1), 156 | CompressionLevel::Uncompressed => Ok(0), 157 | CompressionLevel::Fastest => Ok(1), 158 | CompressionLevel::Maximum => Ok(9), 159 | CompressionLevel::Level(i @ -2..=9) => Ok(i), 160 | CompressionLevel::Level(i) => Err(Error::BgzfInvalidCompressionLevel { level: i }), 161 | } 162 | } 163 | } 164 | 165 | /// A writer that writes uncompressed, gzip, and bgzip files. 166 | #[derive(Debug)] 167 | pub struct Writer { 168 | inner: *mut htslib::BGZF, 169 | tpool: Option, 170 | } 171 | 172 | impl Writer { 173 | /// Create a new Writer to write to stdout with default compression. 174 | pub fn from_stdout() -> Result { 175 | Self::from_stdout_with_compression(CompressionLevel::Default) 176 | } 177 | 178 | /// Create a new Writer to write to stdout with specific compression 179 | /// 180 | /// # Arguments 181 | /// 182 | /// * `level` the compression level to use 183 | pub fn from_stdout_with_compression(level: CompressionLevel) -> Result { 184 | Self::new(b"-", level) 185 | } 186 | 187 | /// Create a new Writer from a path with default compression. 188 | /// 189 | /// # Arguments 190 | /// 191 | /// * `path` - the path to open. 192 | pub fn from_path>(path: P) -> Result { 193 | Self::from_path_with_level(path, CompressionLevel::Default) 194 | } 195 | 196 | /// Create a new Writer from a path with a specific compression level. 197 | /// 198 | /// # Arguments 199 | /// 200 | /// * `path` - the path to open. 201 | pub fn from_path_with_level>( 202 | path: P, 203 | level: CompressionLevel, 204 | ) -> Result { 205 | Self::new(&path_as_bytes(path, false)?, level) 206 | } 207 | 208 | /// Internal function to create a Writer from a file path 209 | /// 210 | /// # Arguments 211 | /// 212 | /// * `path` - the path or URL to open 213 | fn new(path: &[u8], level: CompressionLevel) -> Result { 214 | let mode = Self::get_open_mode(level)?; 215 | let cpath = ffi::CString::new(path).unwrap(); 216 | let inner = unsafe { htslib::bgzf_open(cpath.as_ptr(), mode.as_ptr()) }; 217 | if !inner.is_null() { 218 | Ok(Self { inner, tpool: None }) 219 | } else { 220 | Err(Error::FileOpen { 221 | path: String::from_utf8(path.to_vec()).unwrap(), 222 | }) 223 | } 224 | } 225 | 226 | /// Internal function to convert compression level to "mode" 227 | /// bgzf.c expects mode for writers to be one of: 'w', 'wu', 'w#', where # is 0-9. 228 | /// # Arguments 229 | /// 230 | /// * `level` - the level of compression to use 231 | fn get_open_mode(level: CompressionLevel) -> Result { 232 | let write_string = match level.convert() { 233 | Ok(-2) => "wu".to_string(), 234 | Ok(-1) => "w".to_string(), 235 | Ok(n @ 0..=9) => format!("w{}", n), 236 | Err(e) => return Err(e), 237 | // This should be unreachable 238 | Ok(i) => return Err(Error::BgzfInvalidCompressionLevel { level: i }), 239 | }; 240 | Ok(ffi::CString::new(write_string).unwrap()) 241 | } 242 | 243 | /// Set the thread pool to use for parallel compression. 244 | /// 245 | /// # Arguments 246 | /// 247 | /// * `tpool` - the thread-pool to use 248 | pub fn set_thread_pool(&mut self, tpool: &ThreadPool) -> Result<()> { 249 | self.tpool = Some(tpool.clone()); 250 | let b = tpool.handle.borrow_mut(); 251 | let r = unsafe { 252 | htslib::bgzf_thread_pool(self.inner, b.inner.pool as *mut _, 0) // let htslib decide on the queue-size 253 | }; 254 | 255 | if r != 0 { 256 | Err(Error::ThreadPool) 257 | } else { 258 | Ok(()) 259 | } 260 | } 261 | } 262 | 263 | impl std::io::Write for Writer { 264 | fn write(&mut self, buf: &[u8]) -> std::io::Result { 265 | let nbytes = 266 | unsafe { htslib::bgzf_write(self.inner, buf.as_ptr() as *mut libc::c_void, buf.len()) }; 267 | if nbytes < 0 { 268 | Err(std::io::Error::other("Can not write")) 269 | } else { 270 | Ok(nbytes as usize) 271 | } 272 | } 273 | 274 | fn flush(&mut self) -> std::io::Result<()> { 275 | let exit_code: i32 = unsafe { htslib::bgzf_flush(self.inner) }; 276 | if exit_code == 0 { 277 | Ok(()) 278 | } else { 279 | Err(std::io::Error::other("Can not flush")) 280 | } 281 | } 282 | } 283 | 284 | impl std::ops::Drop for Writer { 285 | fn drop(&mut self) { 286 | unsafe { 287 | htslib::bgzf_close(self.inner); 288 | } 289 | } 290 | } 291 | 292 | #[cfg(test)] 293 | mod tests { 294 | use super::*; 295 | use std::io::Read; 296 | use std::io::Write; 297 | 298 | // Define paths to the test files 299 | const FN_PLAIN: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/test/bgzip/plain.vcf"); 300 | const FN_GZIP: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/test/bgzip/gzip.vcf.gz"); 301 | const FN_BGZIP: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/test/bgzip/bgzip.vcf.gz"); 302 | 303 | const CONTENT: &str = include_str!("../../test/bgzip/plain.vcf"); 304 | 305 | #[test] 306 | fn test_is_bgzip_plain() { 307 | assert!( 308 | !is_bgzip(FN_PLAIN).unwrap(), 309 | "Plain file not detected as BGZIP" 310 | ); 311 | assert!( 312 | !is_bgzip(FN_GZIP).unwrap(), 313 | "Zip file not detected as BGZIP" 314 | ); 315 | assert!(is_bgzip(FN_BGZIP).unwrap(), "Bgzip file detected as BGZIP"); 316 | } 317 | 318 | #[test] 319 | fn test_open_plain() { 320 | let r_result = Reader::from_path(FN_PLAIN); 321 | assert!(r_result.is_ok(), "Open plain file with Bgzip reader"); 322 | 323 | let mut my_content = String::new(); 324 | let reading_result = r_result.unwrap().read_to_string(&mut my_content); 325 | assert!( 326 | reading_result.is_ok(), 327 | "Reading plain file into buffer is ok" 328 | ); 329 | assert_eq!( 330 | reading_result.unwrap(), 331 | 190, 332 | "Reading plain file into buffer is correct size" 333 | ); 334 | assert_eq!( 335 | my_content, CONTENT, 336 | "Reading plain file with correct content" 337 | ); 338 | } 339 | 340 | #[test] 341 | fn test_open_gzip() { 342 | let r_result = Reader::from_path(FN_GZIP); 343 | assert!(r_result.is_ok(), "Open gzip file with Bgzip reader"); 344 | 345 | let mut my_content = String::new(); 346 | let reading_result = r_result.unwrap().read_to_string(&mut my_content); 347 | assert!( 348 | reading_result.is_ok(), 349 | "Reading gzip file into buffer is ok" 350 | ); 351 | assert_eq!( 352 | reading_result.unwrap(), 353 | 190, 354 | "Reading gzip file into buffer is correct size" 355 | ); 356 | assert_eq!( 357 | my_content, CONTENT, 358 | "Reading gzip file with correct content" 359 | ); 360 | } 361 | 362 | #[test] 363 | fn test_open_bgzip() { 364 | let r_result = Reader::from_path(FN_BGZIP); 365 | assert!(r_result.is_ok(), "Open bgzip file with Bgzip reader"); 366 | 367 | let mut my_content = String::new(); 368 | let reading_result = r_result.unwrap().read_to_string(&mut my_content); 369 | assert!( 370 | reading_result.is_ok(), 371 | "Reading bgzip file into buffer is ok" 372 | ); 373 | assert_eq!( 374 | reading_result.unwrap(), 375 | 190, 376 | "Reading bgzip file into buffer is correct size" 377 | ); 378 | assert_eq!( 379 | my_content, CONTENT, 380 | "Reading bgzip file with correct content" 381 | ); 382 | } 383 | #[test] 384 | fn test_set_threadpool() { 385 | let r_result = Reader::from_path(FN_BGZIP); 386 | assert!(r_result.is_ok(), "Open bgzip file with Bgzip reader"); 387 | let mut r = r_result.unwrap(); 388 | 389 | let tpool_result = ThreadPool::new(5); 390 | assert!(tpool_result.is_ok(), "Creating thread pool"); 391 | let tpool = tpool_result.unwrap(); 392 | 393 | let set_result = r.set_thread_pool(&tpool); 394 | assert_eq!(set_result, Ok(()), "Setting thread pool okay"); 395 | 396 | let mut my_content = String::new(); 397 | let reading_result = r.read_to_string(&mut my_content); 398 | assert!( 399 | reading_result.is_ok(), 400 | "Reading bgzip file into buffer is ok - using a threadpool" 401 | ); 402 | assert_eq!( 403 | reading_result.unwrap(), 404 | 190, 405 | "Reading bgzip file into buffer is correct size using a threadpool" 406 | ); 407 | assert_eq!( 408 | my_content, CONTENT, 409 | "Reading bgzip file with correct content using a threadpool" 410 | ); 411 | } 412 | 413 | #[test] 414 | fn test_write_plain() { 415 | let tmp = tempfile::Builder::new() 416 | .prefix("rust-htslib") 417 | .tempdir() 418 | .expect("Cannot create temp dir"); 419 | let out_path = tmp.path().join("test.vcf"); 420 | println!("{:?}", out_path); 421 | 422 | { 423 | let w_result = Writer::from_path_with_level(&out_path, CompressionLevel::NoCompression); 424 | if let Err(ref e) = w_result { 425 | println!("w_result is {}", e); 426 | } 427 | assert!(w_result.is_ok(), "Create plain file with Bgzip writer"); 428 | assert!(out_path.exists(), "Plain file is created with Bgzip writer"); 429 | let mut w = w_result.unwrap(); 430 | let write_result = w.write_all(CONTENT.as_bytes()); 431 | assert!( 432 | write_result.is_ok(), 433 | "Plain file can write with Bgzip writer" 434 | ); 435 | } // let Writer fall out of scope and implicitly close 436 | assert!( 437 | !is_bgzip(&out_path).unwrap(), 438 | "NoCompression file should not be detected as BGZIP" 439 | ); 440 | let my_content = std::fs::read_to_string(&out_path).unwrap(); 441 | assert_eq!( 442 | my_content, CONTENT, 443 | "Writing bgzip file with no compression" 444 | ); 445 | 446 | tmp.close().expect("Failed to delete temp dir"); 447 | } 448 | 449 | #[test] 450 | fn test_write_default() { 451 | let tmp = tempfile::Builder::new() 452 | .prefix("rust-htslib") 453 | .tempdir() 454 | .expect("Cannot create temp dir"); 455 | let out_path = tmp.path().join("test.vcf.bgzf"); 456 | println!("{:?}", out_path); 457 | { 458 | let w_result = Writer::from_path(&out_path); 459 | if let Err(ref e) = w_result { 460 | println!("w_result is {}", e); 461 | } 462 | assert!(w_result.is_ok(), "Create bgzip file with Bgzip writer"); 463 | assert!( 464 | std::path::Path::new(&out_path).exists(), 465 | "Bgzip file is created with Bgzip writer" 466 | ); 467 | let mut w = w_result.unwrap(); 468 | let write_result = w.write_all(CONTENT.as_bytes()); 469 | assert!( 470 | write_result.is_ok(), 471 | "Bgzip file can write with Bgzip writer" 472 | ); 473 | } // let Writer fall out of scope and implicitly close 474 | 475 | // Read in with bgzip reader 476 | let mut my_content = String::new(); 477 | Reader::from_path(&out_path) 478 | .unwrap() 479 | .read_to_string(&mut my_content) 480 | .unwrap(); 481 | assert_eq!( 482 | my_content, CONTENT, 483 | "Writing bgzip file with default compression" 484 | ); 485 | 486 | assert!( 487 | is_bgzip(&out_path).unwrap(), 488 | "Default BGZIP file detected as BGZIP" 489 | ); 490 | tmp.close().expect("Failed to delete temp dir"); 491 | } 492 | 493 | #[test] 494 | fn test_write_compression_levels() { 495 | let tmp = tempfile::Builder::new() 496 | .prefix("rust-htslib") 497 | .tempdir() 498 | .expect("Cannot create temp dir"); 499 | let out_path = tmp.path().join("test.vcf.bgzf"); 500 | 501 | // Test all levels except NoCompression 502 | let compression_levels = vec![ 503 | CompressionLevel::Fastest, 504 | CompressionLevel::Maximum, 505 | CompressionLevel::Uncompressed, 506 | ] 507 | .into_iter() 508 | .chain((-1..=9_i8).map(CompressionLevel::Level)); 509 | 510 | for level in compression_levels { 511 | { 512 | let w_result = Writer::from_path_with_level(&out_path, level); 513 | if let Err(ref e) = w_result { 514 | println!("w_result is {}", e); 515 | } 516 | assert!(w_result.is_ok(), "Create bgzip file with Bgzip writer"); 517 | assert!( 518 | std::path::Path::new(&out_path).exists(), 519 | "Bgzip file is created with Bgzip writer" 520 | ); 521 | let mut w = w_result.unwrap(); 522 | let write_result = w.write_all(CONTENT.as_bytes()); 523 | assert!( 524 | write_result.is_ok(), 525 | "Bgzip file can write with Bgzip writer" 526 | ); 527 | } // let Writer fall out of scope and implicitly close 528 | 529 | // Read in with bgzip reader 530 | let mut my_content = String::new(); 531 | Reader::from_path(&out_path) 532 | .unwrap() 533 | .read_to_string(&mut my_content) 534 | .unwrap(); 535 | assert_eq!( 536 | my_content, CONTENT, 537 | "Writing bgzip file with {:?} compression", 538 | level 539 | ); 540 | 541 | assert!( 542 | is_bgzip(&out_path).unwrap(), 543 | "Writing BGZIP file with {:?} compression detected as BGZIP", 544 | level 545 | ); 546 | } 547 | tmp.close().expect("Failed to delete temp dir"); 548 | } 549 | 550 | #[test] 551 | fn test_write_with_threadpool() { 552 | let tmp = tempfile::Builder::new() 553 | .prefix("rust-htslib") 554 | .tempdir() 555 | .expect("Cannot create temp dir"); 556 | let out_path = tmp.path().join("test.vcf.bgzf"); 557 | 558 | let content = CONTENT.as_bytes(); 559 | println!("{:?}", out_path); 560 | { 561 | let w_result = Writer::from_path(&out_path); 562 | if let Err(ref e) = w_result { 563 | println!("w_result is {}", e); 564 | } 565 | assert!(w_result.is_ok(), "Create bgzip file with Bgzip threadpool"); 566 | assert!( 567 | std::path::Path::new(&out_path).exists(), 568 | "Bgzip file is created with Bgzip threadpool" 569 | ); 570 | 571 | let mut w = w_result.unwrap(); 572 | let tpool_result = ThreadPool::new(5); 573 | assert!(tpool_result.is_ok(), "Creating thread pool"); 574 | let tpool = tpool_result.unwrap(); 575 | 576 | let set_tpool_result = w.set_thread_pool(&tpool); 577 | assert!(set_tpool_result.is_ok(), "Setting thread pool"); 578 | 579 | let write_result = w.write_all(content); 580 | assert!( 581 | write_result.is_ok(), 582 | "Bgzip file can write with Bgzip threadpool" 583 | ); 584 | } // let Writer fall out of scope and implicitly close 585 | 586 | // Read in with bgzip reader 587 | let mut my_content = String::new(); 588 | Reader::from_path(&out_path) 589 | .unwrap() 590 | .read_to_string(&mut my_content) 591 | .unwrap(); 592 | assert_eq!(my_content, CONTENT, "Writing bgzip file with threadpool"); 593 | 594 | assert!( 595 | is_bgzip(&out_path).unwrap(), 596 | "Threadpool BGZIP file detected as BGZIP" 597 | ); 598 | 599 | tmp.close().expect("Failed to delete temp dir"); 600 | } 601 | } 602 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | All notable changes to this project will be documented in this file. 3 | This project adheres to [Semantic Versioning](http://semver.org/). 4 | 5 | ## [0.51.0](https://github.com/rust-bio/rust-htslib/compare/v0.50.0...v0.51.0) (2025-10-15) 6 | 7 | 8 | ### Features 9 | 10 | * Add support for updating existing aux tags inplace. ([#481](https://github.com/rust-bio/rust-htslib/issues/481)) ([f4a1106](https://github.com/rust-bio/rust-htslib/commit/f4a1106df49bde257b323d966fe4228b1a5ece15)) 11 | 12 | ## [0.50.0](https://github.com/rust-bio/rust-htslib/compare/v0.49.0...v0.50.0) (2025-07-09) 13 | 14 | 15 | ### Features 16 | 17 | * Add bam::Record::set_cigar ([#477](https://github.com/rust-bio/rust-htslib/issues/477)) ([f1bb470](https://github.com/rust-bio/rust-htslib/commit/f1bb470d836e3cd8affae000f0fc14308f156c88)) 18 | * Allow for non-diploid genotypes ([#476](https://github.com/rust-bio/rust-htslib/issues/476)) ([1c22ac5](https://github.com/rust-bio/rust-htslib/commit/1c22ac598edaa9fc1be354b857dc8f8f5c892984)) 19 | 20 | 21 | ### Bug Fixes 22 | 23 | * clippy and fmt after merging PR [#466](https://github.com/rust-bio/rust-htslib/issues/466) and [#467](https://github.com/rust-bio/rust-htslib/issues/467) ([141e01c](https://github.com/rust-bio/rust-htslib/commit/141e01c74aa3d087f4c2e7fe380d06b1e272865b)) 24 | 25 | ## [0.49.0](https://github.com/rust-bio/rust-htslib/compare/v0.48.0...v0.49.0) (2024-12-02) 26 | 27 | 28 | ### Features 29 | 30 | * move empty_record to header ([#453](https://github.com/rust-bio/rust-htslib/issues/453)) ([797965c](https://github.com/rust-bio/rust-htslib/commit/797965c5ab815112b1a2a1bcdb32716c1bc1f92a)) 31 | 32 | 33 | ### Bug Fixes 34 | 35 | * memory leak in faidx when fetching sequences ([#455](https://github.com/rust-bio/rust-htslib/issues/455)) ([d9fe03a](https://github.com/rust-bio/rust-htslib/commit/d9fe03acfb81278a09a7e6091b9e1c38dd7b6cb4)) 36 | 37 | ## [0.48.0](https://github.com/rust-bio/rust-htslib/compare/v0.47.1...v0.48.0) (2024-11-12) 38 | 39 | 40 | ### Features 41 | 42 | * Add to_vcf_string method for bcf::Record ([#443](https://github.com/rust-bio/rust-htslib/issues/443)) ([489c0d7](https://github.com/rust-bio/rust-htslib/commit/489c0d7677445cfe38580c8d4c843ad4f4e2d827)) 43 | 44 | 45 | ### Bug Fixes 46 | 47 | * return error when bgzf_open fails to open a file ([#444](https://github.com/rust-bio/rust-htslib/issues/444)) ([9bda5f7](https://github.com/rust-bio/rust-htslib/commit/9bda5f768a5c54767c7e08ef1cafd28ff3f2a3b3)) 48 | 49 | ## [0.47.1](https://github.com/rust-bio/rust-htslib/compare/v0.47.0...v0.47.1) (2024-11-12) 50 | 51 | 52 | ### Bug Fixes 53 | 54 | * allow leading deletions in read_pos method of CigarStringView. ([#447](https://github.com/rust-bio/rust-htslib/issues/447)) ([2986713](https://github.com/rust-bio/rust-htslib/commit/298671382ffeab8f1f057cde83e8474963fbfb9a)) 55 | 56 | ## [0.47.0](https://github.com/rust-bio/rust-htslib/compare/v0.46.0...v0.47.0) (2024-05-22) 57 | 58 | 59 | ### Features 60 | 61 | * Add fasta::build function + FaidxBuildError ([#418](https://github.com/rust-bio/rust-htslib/issues/418)) ([7c575ef](https://github.com/rust-bio/rust-htslib/commit/7c575ef549908745f34d9371986551f3d70ed444)) 62 | * Add rust_htslib::bcf::index::build ([#408](https://github.com/rust-bio/rust-htslib/issues/408)) ([79d70cd](https://github.com/rust-bio/rust-htslib/commit/79d70cd6683f1a019e9052baa495dada709db432)) 63 | * derive PartialEq and Eq for bam:: and bcf::Format ([#428](https://github.com/rust-bio/rust-htslib/issues/428)) ([528e543](https://github.com/rust-bio/rust-htslib/commit/528e54367367487a28bbc2566bd37de995f8ed1d)) 64 | 65 | 66 | ### Bug Fixes 67 | 68 | * bam::Record:new should return a valid record ([#361](https://github.com/rust-bio/rust-htslib/issues/361)) ([87f2011](https://github.com/rust-bio/rust-htslib/commit/87f20116c4337eda17a416ebafb8976abc188d87)) 69 | * build for macOS ([#431](https://github.com/rust-bio/rust-htslib/issues/431)) ([d869fdd](https://github.com/rust-bio/rust-htslib/commit/d869fdda03900cafae0f4f60b033121dcd57b723)) 70 | * in bam record buffer, change the start of the window to the first added item in last iteration ([#430](https://github.com/rust-bio/rust-htslib/issues/430)) ([56ee2bd](https://github.com/rust-bio/rust-htslib/commit/56ee2bd562788dad0dc8516d0e3db90ffa916320)) 71 | * Panic on trailing omitted FORMAT records ([#417](https://github.com/rust-bio/rust-htslib/issues/417)) ([9f575ee](https://github.com/rust-bio/rust-htslib/commit/9f575ee40e15737731bc8234812c0cf36c1157f4)) 72 | 73 | ## [0.46.0](https://github.com/rust-bio/rust-htslib/compare/v0.45.0...v0.46.0) (2024-02-22) 74 | 75 | 76 | ### Features 77 | 78 | * making several RecordBuffer methods public ([6757f52](https://github.com/rust-bio/rust-htslib/commit/6757f5219955fd4edba4f61e62978ce1e001068e)) 79 | 80 | 81 | ### Bug Fixes 82 | 83 | * fix building libz-sys ([#420](https://github.com/rust-bio/rust-htslib/issues/420)) ([01c8849](https://github.com/rust-bio/rust-htslib/commit/01c884945686e7a6756406b579fde28657f70b36)) 84 | 85 | ## [0.45.0](https://github.com/rust-bio/rust-htslib/compare/v0.44.1...v0.45.0) (2024-02-07) 86 | 87 | 88 | ### Features 89 | 90 | * adding function to get sequence length to faidx mod ([#410](https://github.com/rust-bio/rust-htslib/issues/410)) ([ae79eba](https://github.com/rust-bio/rust-htslib/commit/ae79eba82ef6929105bdbe08246a8e973660899e)) 91 | 92 | 93 | ### Bug Fixes 94 | 95 | * Loosen acceptable types to support current linux build on aarch64 ([#415](https://github.com/rust-bio/rust-htslib/issues/415)) ([1d78d12](https://github.com/rust-bio/rust-htslib/commit/1d78d1251a052461605d28cd8cf832ccad93ef73)) 96 | 97 | ## [0.44.1](https://github.com/rust-bio/rust-htslib/compare/v0.44.0...v0.44.1) (2023-06-21) 98 | 99 | 100 | ### Bug Fixes 101 | 102 | * use correct return value in bcf_get_format and bcf_get_info_values ([#398](https://github.com/rust-bio/rust-htslib/issues/398)) ([f9a1981](https://github.com/rust-bio/rust-htslib/commit/f9a1981fa84eef39e35f868ddfc773ea265b94b3)) 103 | 104 | ## [0.44.0](https://github.com/rust-bio/rust-htslib/compare/v0.43.1...v0.44.0) (2023-06-20) 105 | 106 | 107 | ### Features 108 | 109 | * implement Clone for bcf::Record ([#394](https://github.com/rust-bio/rust-htslib/issues/394)) ([e89538d](https://github.com/rust-bio/rust-htslib/commit/e89538d5a9971c6508ac38d92ac468f3d70241aa)) 110 | * implement htslib basemod api ([#385](https://github.com/rust-bio/rust-htslib/issues/385)) ([8beee14](https://github.com/rust-bio/rust-htslib/commit/8beee145a116f7ae936f1b6e36d876116dca18f1)) 111 | 112 | 113 | ### Bug Fixes 114 | 115 | * include doctests in test coverage calculations ([#397](https://github.com/rust-bio/rust-htslib/issues/397)) ([8ed0837](https://github.com/rust-bio/rust-htslib/commit/8ed083783fa1dce09535564a090d37f687fc832f)) 116 | 117 | ## [0.43.1](https://github.com/rust-bio/rust-htslib/compare/v0.43.0...v0.43.1) (2023-05-16) 118 | 119 | 120 | ### Bug Fixes 121 | 122 | * implement Drop for faidx::Reader, destroying the fai handle ([#391](https://github.com/rust-bio/rust-htslib/issues/391)) ([0e6d6ac](https://github.com/rust-bio/rust-htslib/commit/0e6d6acec9a6d24ed6baf810e56f02394737a046)) 123 | 124 | ## [0.43.0](https://github.com/rust-bio/rust-htslib/compare/v0.42.0...v0.43.0) (2023-05-12) 125 | 126 | 127 | ### Features 128 | 129 | * HeaderRecord::push_tag: Value may be owned ([#388](https://github.com/rust-bio/rust-htslib/issues/388)) ([b64537d](https://github.com/rust-bio/rust-htslib/commit/b64537db011b76e5ace73a5e74c598a62a0a018b)) 130 | * Index for `bam::IndexedReader` ([#387](https://github.com/rust-bio/rust-htslib/issues/387)) ([fb74387](https://github.com/rust-bio/rust-htslib/commit/fb743875182c9232894e07007367f09f05d6e275)) 131 | 132 | ## [0.42.0](https://github.com/rust-bio/rust-htslib/compare/v0.41.1...v0.42.0) (2023-03-30) 133 | 134 | 135 | ### Features 136 | 137 | * Add ability to fetch number of sequences and I-th sequence from FAI index ([#377](https://github.com/rust-bio/rust-htslib/issues/377)) ([6ecc4bd](https://github.com/rust-bio/rust-htslib/commit/6ecc4bd1f88110da278c7f934453024e4e64ac74)) 138 | 139 | ## [0.41.1](https://github.com/rust-bio/rust-htslib/compare/v0.41.0...v0.41.1) (2023-03-03) 140 | 141 | ### Features 142 | 143 | * Revised calculation of leading- and trailing-softclips ([#375](https://github.com/rust-bio/rust-htslib/issues/375)) ([b61dd2c](https://github.com/rust-bio/rust-htslib/commit/b61dd2cfb2b74c0180f2d76bbd4ed4eb14fa09b3)) 144 | 145 | 146 | ### Performance Improvements 147 | 148 | * update htslib and corresponding bindings to 1.16 ([#366](https://github.com/rust-bio/rust-htslib/issues/366)) ([f597ce0](https://github.com/rust-bio/rust-htslib/commit/f597ce0451e3f3c393166a7291486bbc2bde4c39)) 149 | 150 | 151 | ## [0.40.2](https://github.com/rust-bio/rust-htslib/compare/rust-htslib-v0.40.1...rust-htslib-v0.40.2) (2022-10-13) 152 | 153 | 154 | ### Performance Improvements 155 | 156 | * update htslib and corresponding bindings to 1.16 ([#366](https://github.com/rust-bio/rust-htslib/issues/366)) ([f597ce0](https://github.com/rust-bio/rust-htslib/commit/f597ce0451e3f3c393166a7291486bbc2bde4c39)) 157 | 158 | ## [0.40.1](https://github.com/rust-bio/rust-htslib/compare/rust-htslib-v0.40.0...rust-htslib-v0.40.1) (2022-08-24) 159 | 160 | 161 | ### Bug Fixes 162 | 163 | * Header::to_hashmap skips `@CO` tags, add `comments()` method ([#363](https://github.com/rust-bio/rust-htslib/issues/363)) ([c24a7f6](https://github.com/rust-bio/rust-htslib/commit/c24a7f69fbe5d2db4a6f1fbd6eda3922fe7f1c18)) 164 | 165 | ## [0.40.0](https://github.com/rust-bio/rust-htslib/compare/rust-htslib-v0.39.5...rust-htslib-v0.40.0) (2022-07-05) 166 | 167 | 168 | ### Features 169 | 170 | * Add wrapper of BGZF writer ([#349](https://github.com/rust-bio/rust-htslib/issues/349)) ([965ed88](https://github.com/rust-bio/rust-htslib/commit/965ed886a0c24ee3070e48cc192c0772ac5cbaf4)) 171 | 172 | 173 | ### Bug Fixes 174 | 175 | * update to latest release-please ([b130634](https://github.com/rust-bio/rust-htslib/commit/b130634b3d096e620dcfe59acae2200df3e4d847)) 176 | 177 | ### [0.39.5](https://www.github.com/rust-bio/rust-htslib/compare/rust-htslib-v0.39.4...rust-htslib-v0.39.5) (2022-05-09) 178 | 179 | 180 | ### Bug Fixes 181 | 182 | * set path in release-please config ([d8f7c6e](https://www.github.com/rust-bio/rust-htslib/commit/d8f7c6e8f31accb7576e150fa1439e177f7816cb)) 183 | 184 | ### [0.39.4](https://www.github.com/rust-bio/rust-htslib/compare/rust-htslib-v0.39.3...rust-htslib-v0.39.4) (2022-05-09) 185 | 186 | 187 | ### Bug Fixes 188 | 189 | * perform checkout before running release please ([cbc6a0a](https://www.github.com/rust-bio/rust-htslib/commit/cbc6a0ad37c5623d14f2ed0bcbb4c5289d012fcb)) 190 | 191 | ### [0.39.3](https://www.github.com/rust-bio/rust-htslib/compare/rust-htslib-v0.39.2...rust-htslib-v0.39.3) (2021-11-20) 192 | 193 | 194 | ### Bug Fixes 195 | 196 | * change the type to c_char so it can be compiled for aarch64 ([#337](https://www.github.com/rust-bio/rust-htslib/issues/337)) ([a21aff2](https://www.github.com/rust-bio/rust-htslib/commit/a21aff289bc03c7549afc7a958084ed57e8c93f2)) 197 | 198 | ### [0.39.2](https://www.github.com/rust-bio/rust-htslib/compare/rust-htslib-v0.39.1...rust-htslib-v0.39.2) (2021-08-23) 199 | 200 | 201 | ### Bug Fixes 202 | 203 | * Configuration when cross-compiling. Even when cross-compiling, build.rs runs on the build host. Hence within build.rs `#[cfg(target_os)]` always reflects the host, not the target. Use $CARGO_CFG_TARGET_OS instead to query target properties. ([#329](https://www.github.com/rust-bio/rust-htslib/issues/329)) ([d5198e6](https://www.github.com/rust-bio/rust-htslib/commit/d5198e6c777fdbbfdd9c73a820f1be983a458ce2)) 204 | 205 | ### [0.39.1](https://www.github.com/rust-bio/rust-htslib/compare/rust-htslib-v0.39.0...rust-htslib-v0.39.1) (2021-07-06) 206 | 207 | 208 | ### Bug Fixes 209 | 210 | * bump hts-sys version to 2.0.1 ([336c8b8](https://www.github.com/rust-bio/rust-htslib/commit/336c8b8a1779422afea1065e37bcc44f54abac42)) 211 | 212 | ## [0.39.0](https://www.github.com/rust-bio/rust-htslib/compare/rust-htslib-vrust-htslib-0.38.3...rust-htslib-v0.39.0) (2021-07-06) 213 | 214 | 215 | ### ⚠ BREAKING CHANGES 216 | 217 | * dummy major version bump to move away from previous versions that were following htslib versions. 218 | * bump to new major version (for technical reasons). 219 | * dummy breaking change to increase hts-sys major version. 220 | 221 | ### Bug Fixes 222 | 223 | * bump to new major version (for technical reasons). ([9c6db30](https://www.github.com/rust-bio/rust-htslib/commit/9c6db3060818692070db1411d63e113dc7effd64)) 224 | * dummy breaking change to increase hts-sys major version. ([93415cb](https://www.github.com/rust-bio/rust-htslib/commit/93415cbb82e4f11d257a2b2cedba2664f86a034d)) 225 | * dummy changes ([3af5ede](https://www.github.com/rust-bio/rust-htslib/commit/3af5ede13a6b44ce5d1e7f0eb90836a692e711ec)) 226 | * dummy major version bump to move away from previous versions that were following htslib versions. ([aaa70a8](https://www.github.com/rust-bio/rust-htslib/commit/aaa70a85ef9a908d3b101f23879189e84a15d23f)) 227 | * dummy release ([74d1565](https://www.github.com/rust-bio/rust-htslib/commit/74d1565329fc862f1172c0925c7b66ceb8bcf988)) 228 | * dummy release ([af2f84e](https://www.github.com/rust-bio/rust-htslib/commit/af2f84eb0411507f8866b3cc05e9a6ba9d81d172)) 229 | * dummy release ([b97915f](https://www.github.com/rust-bio/rust-htslib/commit/b97915f2c70da4c914f2e69861bf78eec5979baf)) 230 | * handle subcrate with release-please ([0a4605f](https://www.github.com/rust-bio/rust-htslib/commit/0a4605f165cb2edf4428d8fb39f7e4787585f4e1)) 231 | * trigger dummy release ([7c5a7de](https://www.github.com/rust-bio/rust-htslib/commit/7c5a7de33e2a92052126e5f44389d421974d1e02)) 232 | * update changelog ([deef08f](https://www.github.com/rust-bio/rust-htslib/commit/deef08feb0b5ba2d8abf98f2cc6d327236da8aef)) 233 | 234 | ### [0.38.3](https://www.github.com/rust-bio/rust-htslib/compare/v0.38.2...v0.38.3) (2021-07-06) 235 | 236 | 237 | ### Bug Fixes 238 | 239 | * dummy fix for triggering release ([e92e6b1](https://www.github.com/rust-bio/rust-htslib/commit/e92e6b10b0a7e5db50b12e2fbe2c42b467eb369e)) 240 | 241 | ### [0.38.2](https://www.github.com/rust-bio/rust-htslib/compare/v0.38.1...v0.38.2) (2021-07-06) 242 | 243 | 244 | ### Bug Fixes 245 | 246 | * add ID to automatic release handling ([1244393](https://www.github.com/rust-bio/rust-htslib/commit/124439300e1e3e01e1d847f7549747d560c01989)) 247 | 248 | ### [0.38.1](https://www.github.com/rust-bio/rust-htslib/compare/v0.38.0...v0.38.1) (2021-07-06) 249 | 250 | 251 | ### Bug Fixes 252 | 253 | * improved documentation ([cb0b66c](https://www.github.com/rust-bio/rust-htslib/commit/cb0b66c4a92d4f03debe38dfb2a014b154c7dd96)) 254 | 255 | ## [0.38.0](https://www.github.com/rust-bio/rust-htslib/compare/v0.37.0...v0.38.0) (2021-07-06) 256 | 257 | 258 | ### ⚠ BREAKING CHANGES 259 | 260 | * Improve bcf Record filter interface and improve docs (#306) 261 | 262 | ### Features 263 | 264 | * Improve bcf Record filter interface and improve docs ([#306](https://www.github.com/rust-bio/rust-htslib/issues/306)) ([f45e91d](https://www.github.com/rust-bio/rust-htslib/commit/f45e91dfdc64ecb662d676f2996ed4f14c079995)) 265 | 266 | 267 | ### Bug Fixes 268 | 269 | * enum name usage in doc example ([#311](https://www.github.com/rust-bio/rust-htslib/issues/311)) ([6e9ba49](https://www.github.com/rust-bio/rust-htslib/commit/6e9ba4928b60c3105490a8179d074c705ea06fd7)) 270 | 271 | ## [Unreleased] 272 | ### Changes 273 | - `bcf::Record` methods `has_filter`, `remove_filter`, `push_filter`, and `set_filter` 274 | all now take a byte slice (`&[u8]`) or an `Id`. 275 | 276 | [Unreleased]: https://github.com/rust-bio/rust-htslib/compare/v0.37.0...HEAD 277 | 278 | ## [0.37.0] - 2021-07-05 279 | ### Added 280 | - `bcf::Record` methods `end`, `clear`, and `rlen` (@mbhall88). 281 | 282 | ### Changes 283 | - `bcf::IndexReader::fetch` parameter `end` is now an `Option`. This is inline with 284 | htslib regions, which do not require an end position (@mbhall88) 285 | - Removed unused dependencies (@sreenathkrishnan). 286 | - Improved documentation (@mbhall88). 287 | - Improved error message when failing to load index files (@johanneskoester). 288 | - Improved API for accessing AUX fields in BAM records (@jch-13). 289 | - Fixed compiler warnings (@fxwiegand). 290 | - BAM header representation is now always kept in sync between textual and binary (@jch-13). 291 | 292 | ## [0.36.0] - 2020-11-23 293 | ### Changes 294 | - Improved genotype API in VCF/BCF records (@MaltheSR). 295 | - Read pair orientation inference for BAM records (@johanneskoester). 296 | 297 | ## [0.35.2] - 2020-11-23 298 | ### Changes 299 | - let hts-sys dependency comply to semver. 300 | 301 | ## [0.35.1] - 2020-11-23 302 | ### Changes 303 | - Fixed wrongly define missing value constants in bcf::record (@johanneskoester). 304 | - Bump hts-sys depedency to the latest version, containing build fixes for macOS (@johanneskoester). 305 | 306 | 307 | ## [0.35.0] - 2020-11-19 308 | ### Changes 309 | - BREAKING: info and format field access in BCF records now allocates a separate buffer each time. In addition, it is also possible to pass a buffer that has been created manually before (@johanneskoester) 310 | - Fixes for building on macOS (@brainstorm) 311 | 312 | ### Added 313 | - ability to push genotypes into BCF record (@MaltheSR, @tedil). 314 | 315 | ## [0.34.0] - 2020-11-13 316 | ### Added 317 | - Ability to set minimum refetch distance in `bam::RecordBuffer`. 318 | 319 | ## [0.33.0] - 2020-11-04 320 | ### Changes 321 | - BREAKING: Rename feature 'serde' as 'serde_feature' (for technical reasons) 322 | - BREAKING: Consolidate module-wide errors into a crate-wide error module 323 | - Making `bcf::IndexedReader` always unpack records to reflect the behaviour of `bcf::Reader`. 324 | - Adding `bcf::errors::Error::FileNotFound` and using it. 325 | - Fixes for musl compilation (@brainstorm). 326 | - Improved BCF constants handling (@juliangehring) 327 | - Fixes for tabix reader (@felix-clark, @brainstorm). 328 | - Fixes for BCF handling (@holtgrewe, @tedil). 329 | - Documentation improvements (@vsoch, @brainstorm, @edmundlth). 330 | - BREAKING: Improved, more ergonomic BAM fetch API (@TiberiusPrime, @brainstorm, @tedil). 331 | - BREAKING: Let BamRecordExtensions return iterators instead of vectors (@TiberiusPrime). 332 | - Handle all errors via a unified single thiserror based enum (@landesfeind). 333 | - BREAKING: BAM read API now returns Option (@slazicoicr). 334 | ### Added 335 | - Support for reading indexed FASTA files (@landesfeind, @pmarks, @brainstorm). 336 | - Support for shared threadpools when reading and writing BAM (@pmarks, @nlhepler). 337 | - Serde support for Cigar strings (@FelixMoelder, @pmarks, @johanneskoester). 338 | - Expose bgzf functionality (@landesfeind). 339 | - Iterator over BAM records using Rc-pointers (@TiberiusPrime, @tedil). 340 | - Ability to obtain pairs of read and genome intervals from BAM (aligned_block_pairs) (@TiberiusPrime, @brainstorm). 341 | 342 | ## [0.32.0] - 2020-07-09 343 | ### Changes 344 | - Method `seq_len()` of `bam::Record` is now public. 345 | - Speedup when parsing BAM headers (thanks to @juliangehring). 346 | - Compatibility fixes for older rust versions (thanks to @pmarks and @brainstorm). 347 | 348 | ## [0.31.0] - 2020-06-22 349 | ### Changes 350 | - Bam record buffer now returns reference counted (Rc) objects. This makes the API more ergonomic to use. 351 | - Switched to thiserror instead of snafu for error handling. 352 | - Various cleanups and little fixes. 353 | 354 | ## [0.30.0] - 2020-04-03 355 | ### Changes 356 | - Removed `fn header_mut()` from `bam::Read` trait. 357 | - Fixed a major performance regression when reading bam files (issue #195). 358 | 359 | ## [0.29.0] - 2020-03-26 360 | ### Changes 361 | - Migrate buffer intervals to u64. 362 | 363 | ## [0.28.0] - 2020-03-26 364 | ### Changes 365 | - Return u64 wherever htslib has migrated to using 64 bit. 366 | - Implement more bio-types (Interval, Locus, Strand). 367 | 368 | ## [0.27.0] - 2020-03-17 369 | ### Changes 370 | - Updated to Htslib 1.10.2. 371 | - bam::Record.set() will panic if seq.len() != qual.len(). Previously, mismatched length would cause 372 | uninitialized memory to be written into the BAM file. 373 | - use `serde_bytes` to serialize .data section of bam::Record when using serde - large speed improvement. 374 | - change build.rs to avoid re-running when htslib or wrapper.h haven't changed. 375 | - update some dependencies. 376 | - refactor native dependency into htslib-sys crate, for greater versioning flexibility 377 | - Record::from_sam require `&mut HeaderView`. Provide the appropriate accessor. 378 | - set() no longer invalidates tag data. 379 | - Various minor improvements. 380 | 381 | ## [0.26.1] - 2019-12-03 382 | ### Changes 383 | - Various bug fixes in CIGAR string handling, INFO tag reading and FORMAT tag reading. 384 | 385 | ## [0.26.0] - 2019-09-27 386 | ### Changes 387 | - Allow caching of CIGAR in bam::RecordBuffer. 388 | 389 | ## [0.25.0] - 2019-09-27 390 | ### Changes 391 | - Migrated error handling to the snafu crate: https://docs.rs/snafu. 392 | - Cleaned up and simplified API (including breaking changes). 393 | - Allow writing SAM files from the bam::Writer. 394 | 395 | ## [0.24.0] - 2019-05-31 396 | ### Added 397 | - Allow setting unmapped BAM record (without Cigar string). 398 | - Various bug fixes and error handling improvements. 399 | - Various Pysam-derived methods for interpreting Cigar strings. 400 | 401 | ## [0.23.0] - 2019-05-02 402 | ### Added 403 | - Support for BAM indices that are not placed beside a file.bam as file.bam.bai 404 | - Implement SequenceRead trait for BAM records. 405 | - Add function to build an index for a BAM file. 406 | - CRAM support for BAM reader and writer. 407 | ### Changes 408 | - Allow to specify particular index filename when instantiating a BAM reader. 409 | - Various bug and API fixes. 410 | 411 | ## [0.22.0] - 2018-11-02 412 | ### Changes 413 | - Support compilation against musl. 414 | - Support for removing alleles. 415 | - Improvements to SyncedReader API. 416 | 417 | ## [0.21.0] - 2018-08-01 418 | ### Changes 419 | - Adding `bcf::synced::SyncedReader::fetch()`, changing error type for `bcf::synced::SyncedReader::read_next()`. 420 | - Adding `bcf::Record::unpack()` for explicitely unpacking BCF records. 421 | - Fixed `bcf::synced::SyncedReader::record()`. 422 | - `bam::Record::cigar()` now returns a reference (in constant time) and needs `bam::Record::unpack_cigar()` to be called first. 423 | - Allow to create Cigar string from `bio_types::Alignment`. 424 | - Provide a cached variant of obtaining cigar string. 425 | - Lots of small usability improvements. 426 | 427 | ## [0.20.0] - 2018-06-18 428 | ### Added 429 | - Initial implementation of synced BCF reader interface. 430 | - Several small helper methods for BAM readers. 431 | ### Changes 432 | - Not skipping `fileformat=` header any more. 433 | - BCF records are always unpacked when reading. 434 | 435 | ## [0.19.1] - 2018-06-07 436 | ### Changed 437 | - Moved unpacking of BCF records into constructor to prevent race conditions. 438 | - Fixed bug in retrieving BCF record id. 439 | - Fixed bug in the filter iterator of BCF. 440 | 441 | ## [0.19.0] - 2018-06-01 442 | ### Added 443 | - more push functions for BCF. 444 | 445 | ## [0.18.0] - 2018-05-04 446 | ### Added 447 | - bcf::IndexedReader 448 | - support for writing bcf FILTER field 449 | - setting thread count in all readers and writers 450 | - setting ID and alleles in bcf records 451 | - support for tabix indexes 452 | - convert CIGAR to and from strings 453 | 454 | ## [0.17.0] - 2018-02-22 455 | ### Added 456 | - Serde support for bam records. 457 | ### Changed 458 | - Various convenience improvements in the API. 459 | 460 | ## [0.16.0] - 2018-01-05 461 | ### Changed 462 | - Raw Htslib bindings are now generated on the fly. 463 | - Switched to Htslib 1.6. 464 | - Fixed a potential dangling pointer to the header in bcf records. 465 | - Various small API improvements. 466 | 467 | ## [0.15.0] - 2017-12-05 468 | ### Changed 469 | - HeaderView of bam and bcf can now be cloned. 470 | 471 | 472 | ## [0.14.0] - 2017-12-03 473 | ### Added 474 | - An efficient ringbuffer for accessing BCF regions 475 | - An efficient ringbuffer for accessing BAM regions 476 | ### Changed 477 | - Improved mutability annotation for readers. 478 | 479 | ## [0.13.0] - 2017-09-22 480 | ### Added 481 | - Ability to clone bam records. 482 | - Ability to set only qname. 483 | ### Changed 484 | - Further improved CIGAR string API. 485 | - Improved documentation. 486 | 487 | 488 | ## [0.12.1] - 2017-06-12 489 | ### Changed 490 | - Adapt to changes in Rust 1.18 that caused compilation issues. 491 | 492 | 493 | ## [0.12.0] - 2017-06-01 494 | ### Added 495 | - Support seek and tell to handle virtual offsets. 496 | ### Changed 497 | - Renamed previous seek method into fetch (in line with pysam). 498 | - Improved CIGAR API. 499 | - Updated dependencies. 500 | 501 | ## [0.11.0] - 2017-05-01 502 | ### Added 503 | - A SAM writer. 504 | ### Changed 505 | - Improved CIGAR string API using a newtype wrapper. 506 | - Improved pileup API. 507 | - Support threaded writing for BAM files. 508 | 509 | 510 | ## [0.10.0] - 2016-11-10 511 | ### Added 512 | - Prelude module to easily import all relevant traits. 513 | ### Changed 514 | - fine-grained constructors for STDIN/STDOUT, paths and URLs 515 | - better template handling with bam files 516 | 517 | 518 | ## [0.9.0] - 2016-11-02 519 | ### Changed 520 | - improved genotype handling 521 | - improved error handling 522 | - improved missing value handling 523 | 524 | ## [0.8.1] - 2016-08-17 525 | ### Changed 526 | - Finally converted the last unit error types to real error types (really now!). 527 | 528 | ## [0.8.0] - 2016-08-17 529 | ### Changed 530 | - More error types. 531 | 532 | ## [0.7.0] - 2016-08-16 533 | ### Changed 534 | - Error types now properly implement the Display and the Error trait. 535 | 536 | ## [0.6.2] - 2016-07-22 537 | ### Changed 538 | - Mark all records as Send and Sync. 539 | 540 | ## [0.6.1] - 2016-07-20 541 | ### Changed 542 | - Improved error messages. 543 | - Check existence of BAM when instantiating Readers. 544 | 545 | ## [0.6.0] - 2016-06-01 546 | ### Changed 547 | - Improved handling of memory allocation for BCF and BAM records. 548 | - Fixed a memory leak occuring when creating a new BAM record (thanks to @andrelmartins). 549 | --------------------------------------------------------------------------------