├── .github └── workflows │ └── tests.yml ├── .gitignore ├── .gitmodules ├── .zenodo.json ├── BUILD.md ├── CMakeLists.txt ├── CONTRIBUTING.md ├── LICENSE ├── PackageInfo.cmake ├── README.md ├── build-common ├── .gitignore ├── LICENSE.txt ├── README.md ├── bin │ ├── dpkg-deps │ ├── gcovr │ └── gitrev.pl ├── cmake │ ├── FindCairo.cmake │ ├── FindCairomm.cmake │ ├── FindFreetype.cmake │ ├── FindHTSlib.cmake │ ├── FindSamtools.cmake │ ├── FindTRE.cmake │ ├── LibFindMacros.cmake │ ├── PreDepends.cmake │ ├── ProjectHelper.cmake │ ├── TestHelper.cmake │ └── VersionHelper.cmake ├── python │ ├── ctest_convert.py │ ├── integrationtest.py │ └── valgrindwrapper.py ├── resources │ └── ctest_conv.xsl └── vendor │ └── gtest-1.6.0.tar.gz ├── cmake ├── BuildBoost.cmake ├── BuildSamtools.cmake └── CodeCoverage.cmake ├── debian ├── postinst.in └── prerm.in ├── integration-test ├── CMakeLists.txt ├── bam-readcount_test.py └── testdata.py.in ├── src ├── exe │ └── bam-readcount │ │ ├── CMakeLists.txt │ │ └── bamreadcount.cpp └── lib │ └── bamrc │ ├── BasicStat.cpp │ ├── BasicStat.hpp │ ├── CMakeLists.txt │ ├── IndelQueue.cpp │ ├── IndelQueue.hpp │ ├── IndelQueueEntry.cpp │ ├── IndelQueueEntry.hpp │ ├── ReadWarnings.hpp │ └── auxfields.hpp ├── test-data ├── cram_site_test.sh ├── expected_all_lib ├── expected_insertion_centric_all_lib ├── expected_insertion_centric_per_lib ├── expected_per_lib ├── rand1k.fa ├── rand1k.fa.fai ├── ref.fa ├── ref.fa.fai ├── site_list ├── test.bam ├── test.bam.bai ├── test_bad_rg.bam ├── test_bad_rg.bam.bai ├── twolib.sorted.cram ├── twolib.sorted.cram.crai └── twolib_site_list.txt ├── test └── lib │ └── bamrc │ ├── CMakeLists.txt │ ├── TestAuxFields.cpp │ ├── TestIndelQueue.cpp │ ├── TestIndelQueueEntry.cpp │ └── TestReadWarnings.cpp ├── tutorial ├── .gitignore ├── README.md ├── plots │ ├── vaf_vs_avg_basequality.png │ └── vaf_vs_avg_pos_as_fraction.png └── scripts │ ├── clean.sh │ ├── commands.sh │ ├── make_commands.sh │ ├── parse_brc.py │ ├── plot_vaf_vs_avg_bq.py │ └── plot_vaf_vs_avg_pos.py ├── vendor ├── Makefile.disable_curl.patch ├── README.md ├── boost-1.55-bamrc.tar.gz ├── bzip2-1.0.8.tar.gz ├── curl-7.67.0.tar.gz ├── mbedtls-2.16.4-apache.tgz ├── samtools-1.10.tar.bz2 ├── xz-5.2.4.tar.gz └── zlib-1.2.11.tar.gz └── version └── version.h.in /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: tests 2 | on: [push] 3 | jobs: 4 | ubuntu-18_04: 5 | runs-on: ubuntu-18.04 6 | steps: 7 | - run: sudo apt-get install lcov 8 | - uses: actions/checkout@v2 9 | - uses: actions/setup-python@v2 10 | with: 11 | python-version: '2.7' 12 | architecture: 'x64' 13 | - name: Build 14 | run: mkdir build && (cd build && cmake -DCODE_COVERAGE=ON .. && make) 15 | - name: Coverage 16 | run: (cd build && make codecoverage) 17 | - name: Coveralls 18 | uses: coverallsapp/github-action@master 19 | with: 20 | github-token: ${{ secrets.GITHUB_TOKEN }} 21 | path-to-lcov: 'build/codecoverage.info' 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .*.swp 2 | *.deb 3 | CMakeCache.txt 4 | CMakeFiles/ 5 | CPackConfig.cmake 6 | CPackSourceConfig.cmake 7 | CTestTestfile.cmake 8 | Testing/ 9 | _CPack_Packages/ 10 | bin/ 11 | build/ 12 | build-zeno/ 13 | cmake_install.cmake 14 | debian/postinst 15 | debian/prerm 16 | debian/control 17 | version/version.h 18 | install_manifest.txt 19 | debian/files 20 | debian/joinx*/ 21 | debian/*.log 22 | debian/*.substvars 23 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "build-common"] 2 | path = build-common 3 | url = git://github.com/genome/build-common.git 4 | -------------------------------------------------------------------------------- /.zenodo.json: -------------------------------------------------------------------------------- 1 | { 2 | "creators": [ 3 | { 4 | "name": "Ajay Khanna^[co-first author]", 5 | "affiliation": "Division of Oncology, Department of Internal Medicine, Washington University School of Medicine, St. Louis, MO" 6 | }, 7 | { 8 | "name": "David E. Larson^[co-first author]", 9 | "affiliation": "1. McDonnell Genome Institute, Washington University School of Medicine, St. Louis, MO, 2. Current Affiliation: Benson Hill, Inc. St. Louis, MO" 10 | }, 11 | { 12 | "name": "Sridhar Nonavinkere Srivatsan", 13 | "affiliation": "Division of Oncology, Department of Internal Medicine, Washington University School of Medicine, St. Louis, MO" 14 | }, 15 | { 16 | "name": "Matthew Mosior", 17 | "affiliation": "1. Division of Oncology, Department of Internal Medicine, Washington University School of Medicine, St. Louis, MO, 2. Moffitt Cancer Center, Tampa, FL" 18 | }, 19 | { 20 | "name": "Travis E. Abbott", 21 | "affiliation": "1. McDonnell Genome Institute, Washington University School of Medicine, St. Louis, MO, 2. Google, Inc. Mountain View, CA" 22 | }, 23 | { 24 | "name": "Susanna Kiwala", 25 | "affiliation": "McDonnell Genome Institute, Washington University School of Medicine, St. Louis, MO" 26 | }, 27 | { 28 | "name": "Timothy J. Ley", 29 | "affiliation": "1. Division of Oncology, Department of Internal Medicine, Washington University School of Medicine, St. Louis, MO, 2. Siteman Cancer Center, Washington University School of Medicine, St. Louis, MO" 30 | }, 31 | { 32 | "name": "Eric J. Duncavage", 33 | "affiliation": "Department of Pathology, Washington University School of Medicine, St. Louis, MO" 34 | }, 35 | { 36 | "name": "Matthew J. Walter", 37 | "affiliation": "1. Division of Oncology, Department of Internal Medicine, Washington University School of Medicine, St. Louis, MO, 2. Siteman Cancer Center, Washington University School of Medicine, St. Louis, MO" 38 | }, 39 | { 40 | "name": "Jason R. Walker", 41 | "affiliation": "McDonnell Genome Institute, Washington University School of Medicine, St. Louis, MO" 42 | }, 43 | { 44 | "name": "Obi L. Griffith", 45 | "affiliation": "1. Division of Oncology, Department of Internal Medicine, Washington University School of Medicine, St. Louis, MO, 2.McDonnell Genome Institute, Washington University School of Medicine, St. Louis, MO, 3. Siteman Cancer Center, Washington University School of Medicine, St. Louis, MO, 4.Department of Genetics, Washington University School of Medicine, St. Louis, MO" 46 | }, 47 | { 48 | "name": "Malachi Griffith", 49 | "affiliation": "1. Division of Oncology, Department of Internal Medicine, Washington University School of Medicine, St. Louis, MO, 2.McDonnell Genome Institute, Washington University School of Medicine, St. Louis, MO, 3. Siteman Cancer Center, Washington University School of Medicine, St. Louis, MO, 4.Department of Genetics, Washington University School of Medicine, St. Louis, MO" 50 | }, 51 | { 52 | "name": "Christopher A. Miller^[corresponding author]", 53 | "affiliation": "1. Division of Oncology, Department of Internal Medicine, Washington University School of Medicine, St. Louis, MO, 2. Siteman Cancer Center, Washington University School of Medicine, St. Louis, MO" 54 | } 55 | ], 56 | 57 | "license": "MIT" 58 | } 59 | -------------------------------------------------------------------------------- /BUILD.md: -------------------------------------------------------------------------------- 1 | Build instructions 2 | ================== 3 | 4 | 5 | Requirements 6 | ------------ 7 | 8 | A C++ toolchain 9 | cmake 10 | make 11 | 12 | On a Debian Linux-based system such as Ubuntu 13 | 14 | apt install build-essential cmake 15 | 16 | will install the required software. 17 | 18 | Builds are currently failing under OS X. 19 | 20 | All required libraries are included in the repository under `vendor/`. 21 | See [vendor/README.md](vendor/README.md) for more information. 22 | 23 | 24 | Build 25 | ----- 26 | 27 | Make a build directory 28 | 29 | mkdir build 30 | 31 | Run CMake from inside it 32 | 33 | cd build 34 | cmake .. 35 | 36 | Run Make 37 | 38 | make 39 | 40 | This will build all the vendored libraries as well as `bam-readcount`. 41 | The final binary, which can be moved anywhere, is 42 | 43 | bin/bam-readcount 44 | 45 | Try it on a test CRAM 46 | 47 | cd ../test-data 48 | ../build/bin/bam-readcount -f rand1k.fa twolib.sorted.cram 49 | 50 | 51 | Test data 52 | --------- 53 | 54 | There is a small two-library test CRAM file 55 | 56 | test-data/twolib.sorted.cram 57 | 58 | with associated reference 59 | 60 | test-data/rand1k.fa 61 | 62 | The reference is encoded in the CRAM as 63 | 64 | @SQ SN:rand1k LN:1000 M5:11e5d1f36a8e123feb3dd934cc05569a UR:rand1k.fa 65 | 66 | so `bam-readcount` should be run inside the `test-data` directory to 67 | find the reference. 68 | 69 | 70 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.3) 2 | 3 | project(bam-readcount) 4 | 5 | if(NOT CMAKE_INSTALL_PREFIX) 6 | set(CMAKE_INSTALL_PREFIX "/usr") 7 | endif() 8 | 9 | set(CMAKE_MODULE_PATH 10 | ${CMAKE_SOURCE_DIR}/cmake 11 | ${CMAKE_SOURCE_DIR}/build-common/cmake 12 | ) 13 | 14 | include(CMakePrintHelpers) 15 | include(TestHelper) 16 | include(VersionHelper) 17 | include(ProjectHelper) 18 | include(PreDepends) 19 | PreDependsInit() 20 | 21 | #set(EXE_VERSION_SUFFIX ${FULL_VERSION}) 22 | 23 | configure_file(version/version.h.in version/version.h @ONLY) 24 | include_directories(${PROJECT_BINARY_DIR}/version) 25 | 26 | set(REQUIRED_BOOST_LIBS program_options regex) 27 | include(BuildBoost) 28 | include_directories(${Boost_INCLUDE_DIRS}) 29 | 30 | include(BuildSamtools) 31 | include_directories(${Samtools_INCLUDE_DIRS} ${Htslib_INCLUDE_DIRS}) 32 | 33 | option(CODE_COVERAGE "Enable coverage reporting" OFF) 34 | 35 | if(${CMAKE_CXX_COMPILER_ID} MATCHES "GNU|Clang") 36 | if(CODE_COVERAGE) 37 | include(CodeCoverage) 38 | set(CMAKE_CXX_FLAGS "-Wall -std=c++0x --coverage -g -O0") 39 | #append_coverage_compiler_flags() 40 | setup_target_for_coverage_lcov( 41 | NAME codecoverage 42 | EXECUTABLE ctest -V 43 | BASE_DIRECTORY "./" 44 | NO_DEMANGLE 45 | EXCLUDE "build/*" "test/*" 46 | ) 47 | else() 48 | set(CMAKE_CXX_FLAGS "-Wall -std=c++0x") 49 | endif() 50 | endif() 51 | 52 | find_package(Threads) 53 | link_libraries(${CMAKE_THREAD_LIBS_INIT}) 54 | 55 | # make sure to pick up headers from library dirs 56 | include_directories("src/lib") 57 | 58 | # main project 59 | 60 | set(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin) 61 | add_projects(src/lib) 62 | add_projects(src/exe) 63 | 64 | ########################################################################### 65 | ## Testing 66 | set(TEST_DATA_DIRECTORY ${CMAKE_SOURCE_DIR}/test-data) 67 | 68 | set(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/test-bin) 69 | enable_testing(true) 70 | add_projects(test/lib) 71 | 72 | # integration testing 73 | add_subdirectory(integration-test) 74 | 75 | ########################################################################### 76 | ## Packaging 77 | include(PackageInfo.cmake) 78 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | We welcome improvements, extensions, bug reports and bug fixes to `bam-readcount`! 4 | 5 | ## Bug Reports 6 | To make bug reports most useful, please include: 7 | - a brief summary of the problem - what did you expect, and what actually happened? 8 | - precise steps to reproduce the problem, and a minimal bam/cram file on which the steps can be run, if possible. 9 | 10 | ## Fixes or Feature Additions 11 | Direct pull requests are just fine - fork the repository, create an new branch, then make a PR against this repo. 12 | 13 | Before investing time and effort, it is often best to create an issue in 14 | the github repository reporting the bug and/or proposing the change, and we'll be happy to discuss the 15 | details and feasibility with you. 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2009-2016 David Larson and Travis Abbott 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /PackageInfo.cmake: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.6) 2 | 3 | # .deb packaging 4 | set(ARCH "i686") 5 | if(${CMAKE_SIZEOF_VOID_P} MATCHES 8) 6 | set(ARCH "x86_64") 7 | endif () 8 | 9 | # The format of the description field is a short summary line followed by a 10 | # longer paragraph indented by a single space on each line 11 | set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Bam readcount 12 | Is awesome.") 13 | set(CPACK_PACKAGE_NAME "bam-readcount${EXE_VERSION_SUFFIX}") 14 | set(CPACK_PACKAGE_VENDOR "wugc") 15 | set(CPACK_PACKAGE_VERSION ${FULL_VERSION}${PACKAGE_VERSION_SUFFIX}) 16 | set(CPACK_DEBIAN_PACKAGE_MAINTAINER "Dave Larson ") 17 | set(CPACK_SYSTEM_NAME "Linux-${ARCH}") 18 | set(CPACK_TOPLEVEL_TAG "Linux-${ARCH}") 19 | set(CPACK_DEBIAN_PACKAGE_SECTION science) 20 | set(CPACK_DEBIAN_PACKAGE_PRIORITY optional) 21 | set(CPACK_DEBIAN_PACKAGE_DEPENDS "libc6 (>= 2.4), libgcc1 (>= 1:4.1.1-21)") 22 | if (CMAKE_BUILD_TYPE MATCHES package) 23 | set(CPACK_GENERATOR "DEB") 24 | else(CMAKE_BUILD_TYPE MATCHES package) 25 | set(CPACK_GENERATOR "TGZ") 26 | endif(CMAKE_BUILD_TYPE MATCHES package) 27 | 28 | configure_file(debian/postinst.in debian/postinst @ONLY) 29 | configure_file(debian/prerm.in debian/prerm @ONLY) 30 | set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "debian/postinst;debian/prerm") 31 | 32 | include(CPack) 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | bam-readcount 2 | ============= 3 | 4 | [![DOI](https://joss.theoj.org/papers/10.21105/joss.03722/status.svg)](https://doi.org/10.21105/joss.03722) 5 | ![latest release](https://img.shields.io/github/v/release/genome/bam-readcount) 6 | ![tests](https://github.com/seqfu/bam-readcount/actions/workflows/tests.yml/badge.svg?branch=samtools-1.10) 7 | ![coverage](https://coveralls.io/repos/seqfu/bam-readcount/badge.svg?branch=samtools-1.10&service=github) 8 | ![Docker Pulls](https://img.shields.io/docker/pulls/mgibio/bam-readcount?style=plastic) 9 | ![GitHub](https://img.shields.io/github/license/genome/bam-readcount) 10 | 11 | 12 | `bam-readcount` is a utility that runs on a `BAM` or `CRAM` file and generates low-level information about 13 | sequencing data at specific nucleotide positions. Its outputs include observed bases, 14 | readcounts, summarized mapping and base qualities, strandedness information, 15 | mismatch counts, and position within the reads. (see "Output" section below) 16 | 17 | Originally designed to help filter genomic mutation calls, the metrics `bam-readcount` outputs 18 | are also useful as input for variant detection tools and for resolving ambiguity between 19 | variant callers. 20 | 21 | If you find `bam-readcount` useful in your work, please cite our [paper](https://doi.org/10.21105/joss.03722): 22 | 23 | > Khanna et al., (2022). Bam-readcount - rapid generation of basepair-resolution sequence metrics. _Journal of Open Source Software_, 7(69), 3722. https://doi.org/10.21105/joss.03722 24 | 25 | Contents 26 | --------- 27 | * [Installation](#installation) 28 | * [Usage](#usage) 29 | * [Tutorial](#tutorial) 30 | * [Support](#support) 31 | * [Contributing](#contributing) 32 | 33 | 34 | Installation 35 | ------------ 36 | 37 | ### Docker 38 | 39 | The latest release version of `bam-readcount` is available as a Docker image 40 | on [DockerHub](https://hub.docker.com/r/mgibio/bam-readcount) 41 | 42 | docker pull mgibio/bam-readcount 43 | 44 | For details see the 45 | [`docker-bam-readcount`](https://github.com/genome/docker-bam-readcount) 46 | repository. 47 | 48 | 49 | ### Build 50 | 51 | Requires a C++ toolchain and `cmake`. For details see 52 | [BUILD.md](BUILD.md). 53 | 54 | git clone https://github.com/genome/bam-readcount 55 | cd bam-readcount 56 | mkdir build 57 | cd build 58 | cmake .. 59 | make 60 | # Executable is 61 | bin/bam-readcount 62 | 63 | 64 | Usage 65 | ----- 66 | 67 | Run with no arguments for command-line help: 68 | 69 | $ bam-readcount 70 | 71 | Usage: bam-readcount [OPTIONS] [region] 72 | Generate metrics for bam_file at single nucleotide positions. 73 | Example: bam-readcount -f ref.fa some.bam 74 | 75 | Available options: 76 | -h [ --help ] produce this message 77 | -v [ --version ] output the version number 78 | -q [ --min-mapping-quality ] arg (=0) minimum mapping quality of reads used 79 | for counting. 80 | -b [ --min-base-quality ] arg (=0) minimum base quality at a position to 81 | use the read for counting. 82 | -d [ --max-count ] arg (=10000000) max depth to avoid excessive memory 83 | usage. 84 | -l [ --site-list ] arg file containing a list of regions to 85 | report readcounts within. 86 | -f [ --reference-fasta ] arg reference sequence in the fasta format. 87 | -D [ --print-individual-mapq ] arg report the mapping qualities as a comma 88 | separated list. 89 | -p [ --per-library ] report results by library. 90 | -w [ --max-warnings ] arg maximum number of warnings of each type 91 | to emit. -1 gives an unlimited number. 92 | -i [ --insertion-centric ] generate indel centric readcounts. 93 | Reads containing insertions will not be 94 | included in per-base counts 95 | 96 | The optional `[region]` should be in the same format as `samtools`: 97 | 98 | chromosome:start-stop 99 | 100 | The optional `-l` (`--site-list`) file should be tab-separated, no 101 | header, one region per line: 102 | 103 | chromosome start end 104 | 105 | 106 | ### CRAM support 107 | 108 | When using CRAM files as input, if a reference is specified with `-f`, it will override whatever is in 109 | the CRAM header. Otherwise, the reference(s) encoded in the CRAM header or a lookup by 110 | MD5 at ENA will be used. 111 | 112 | ### Wrappers/Parsers 113 | 114 | Add bam-readcount counts to VCF 115 | - [VAtools](https://github.com/griffithlab/VAtools/blob/master/vatools/vcf_readcount_annotator.py) allows you to add read-counts to VCF from modern variant callers. [Additional details](https://vatools.readthedocs.io/en/latest/vcf_readcount_annotator.html) 116 | Create csv file 117 | - [brc-parser](https://github.com/sridhar0605/brc-parser) parser to convert bam-readcount output to comma seperated long format file. 118 | 119 | Output 120 | ------ 121 | 122 | Output is tab-separated with no header to `STDOUT`, one line per 123 | position: 124 | 125 | chr position reference_base depth base:count:avg_mapping_quality:avg_basequality:avg_se_mapping_quality:num_plus_strand:num_minus_strand:avg_pos_as_fraction:avg_num_mismatches_as_fraction:avg_sum_mismatch_qualities:num_q2_containing_reads:avg_distance_to_q2_start_in_q2_reads:avg_clipped_length:avg_distance_to_effective_3p_end ... 126 | 127 | There is one set of `:`-separated fields for each reported `base` with 128 | statistics on the set of reads containing that base: 129 | 130 | Field | Description 131 | ----- | ----------- 132 | base | The base, eg `C` 133 | count | Number of reads 134 | avg_mapping_quality | Mean mapping quality 135 | avg_basequality | Mean base quality 136 | avg_se_mapping_quality | Mean single ended mapping quality 137 | num_plus_strand | Number of reads on the plus/forward strand 138 | num_minus_strand | Number of reads on the minus/reverse strand 139 | avg_pos_as_fraction | Average position on the read as a fraction, calculated with respect to the length after clipping. This value is normalized to the center of the read: bases occurring strictly at the center of the read have a value of 1, those occurring strictly at the ends should approach a value of 0 140 | avg_num_mismatches_as_fraction | Average number of mismatches on these reads per base 141 | avg_sum_mismatch_qualities | Average sum of the base qualities of mismatches in the reads 142 | num_q2_containing_reads | Number of reads with q2 runs at the 3’ end 143 | avg_distance_to_q2_start_in_q2_reads | Average distance of position (as fraction of unclipped read length) to the start of the q2 run 144 | avg_clipped_length | Average clipped read length 145 | avg_distance_to_effective_3p_end | Average distance to the 3’ prime end of the read (as fraction of unclipped read length) 146 | 147 | 148 | ### Per-library output 149 | 150 | With the `-p` option, each output line will have a set of `{}`-delimited 151 | results, one for each library: 152 | 153 | chr position reference_base depth library_1_name { base:count:avg_mapping_quality:avg_basequality:avg_se_mapping_quality:num_plus_strand:num_minus_strand:avg_pos_as_fraction:avg_num_mismatches_as_fraction:avg_sum_mismatch_qualities:num_q2_containing_reads:avg_distance_to_q2_start_in_q2_reads:avg_clipped_length:avg_distance_to_effective_3p_end } ... library_N_name { base:count:avg_mapping_quality:avg_basequality:avg_se_mapping_quality:num_plus_strand:num_minus_strand:avg_pos_as_fraction:avg_num_mismatches_as_fraction:avg_sum_mismatch_qualities:num_q2_containing_reads:avg_distance_to_q2_start_in_q2_reads:avg_clipped_length:avg_distance_to_effective_3p_end } 154 | 155 | 156 | Tutorial 157 | ----- 158 | For those who learn best by example, a [brief tutorial is available here](https://github.com/genome/bam-readcount/tree/master/tutorial) that uses bam-readcount to identify the Omicron SARS-CoV-2 variant of concern from raw sequence data. 159 | 160 | 161 | Support 162 | ----- 163 | For support, please [search 164 | `bam-readcount`](https://www.biostars.org/post/search/?query=bam-readcount) on 165 | [Biostars](https://www.biostars.org) as many of the most frequently asked 166 | questions about `bam-readcount` have been answered there. For problems not addressed there, 167 | please open an github issue or make a BioStar post. 168 | 169 | 170 | Contributing 171 | ------ 172 | We welcome contributions! See [Contributing](https://github.com/genome/bam-readcount/blob/master/CONTRIBUTING.md) for more details 173 | -------------------------------------------------------------------------------- /build-common/.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | -------------------------------------------------------------------------------- /build-common/LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2011-2015 Travis Abbott and David E. Larson 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /build-common/README.md: -------------------------------------------------------------------------------- 1 | # build-common 2 | Common build scripts used in C/C++ projects. Intended to make compiling with CMake and handling dependencies slightly easier. 3 | -------------------------------------------------------------------------------- /build-common/bin/dpkg-deps: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use Dpkg::Shlibs::Objdump; 7 | 8 | die "Usage: $0 \n" if @ARGV != 1; 9 | 10 | my $exe = shift @ARGV; 11 | my $obj = Dpkg::Shlibs::Objdump::Object->new($exe); 12 | my @libs = $obj->get_needed_libraries; 13 | my %pkgs; 14 | my @search_path = ("/lib", "/usr/lib"); 15 | for my $lib (@libs) { 16 | my @dirs = grep { -e "$_/$lib" } @search_path; 17 | die "Unable to find library $lib" unless @dirs; 18 | 19 | $lib = "$dirs[0]/$lib"; 20 | 21 | my $pkg = `dpkg --search $lib | head -n1 | cut -d: -f1 2> /dev/null`; 22 | die "Unable to locate package for library $lib" unless $pkg; 23 | chomp $pkg; 24 | my $ver = `dpkg -l $pkg 2> /dev/null | tail -n1 | awk '{print \$3}'`; 25 | chomp $ver; 26 | $pkgs{$pkg} = "$pkg (>= $ver)"; 27 | } 28 | 29 | print join(", ", sort values %pkgs) . "\n"; 30 | -------------------------------------------------------------------------------- /build-common/bin/gitrev.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use POSIX qw/WEXITSTATUS/; 4 | 5 | my $vtag = "v[0-9]*"; 6 | 7 | sub git { 8 | my $args = shift; 9 | my $output = `git $args 2>&1`; 10 | my $rc = WEXITSTATUS($?); 11 | die "git command '$args' returned $rc. output was '$output'" if $rc; 12 | chomp $output; 13 | return $output; 14 | } 15 | 16 | sub is_git_repo { 17 | eval { git("status"); }; 18 | return !$@; 19 | } 20 | 21 | sub is_dirty { 22 | my $status = git("status --porcelain"); 23 | return $status ne ""; 24 | } 25 | 26 | sub is_tagged { 27 | my $status = git("describe --always --dirty --long --tags"); 28 | return 0 if $status =~ /-dirty/; 29 | if ($status =~ /^.*-([0-9]+)-g[^-]+$/) { 30 | return $1 == 0; 31 | } 32 | return; 33 | } 34 | 35 | sub tagged_version { 36 | my $rev; 37 | eval { $rev = git("describe --long --tags --match $vtag"); }; 38 | return unless !$@ and $rev ne ""; 39 | return $rev; 40 | } 41 | 42 | sub untagged_version { 43 | my $rev = git("describe --always --tags"); 44 | return $rev; 45 | } 46 | 47 | sub parse_rev { 48 | my $rev = shift; 49 | 50 | $rev =~ s/^v//g; 51 | 52 | my $exe_suffix = "-unstable"; 53 | my $full_version = "$rev-unstable"; 54 | my $commit = $rev; 55 | my $dirty = is_dirty; 56 | 57 | if ($rev =~ /^([0-9]+)\.([0-9]+)\.([0-9]+)-([0-9]+)-g(.*)/) { 58 | $exe_suffix = "$1.$2"; 59 | $full_version = "$1.$2.$3"; 60 | $commit = $5; 61 | 62 | my $commits_past_tag = $4; 63 | if ($commits_past_tag > 0) { 64 | $exe_suffix = "$1.$2.$3-$commits_past_tag-unstable"; 65 | $full_version = "$1.$2.$3-unstable-$commits_past_tag-$commit"; 66 | } 67 | } 68 | if ($dirty) { 69 | $full_version .= "-dirty"; 70 | $commit .= "-dirty"; 71 | } 72 | return ($exe_suffix, $full_version, $commit, $dirty); 73 | } 74 | 75 | sub commit_hash { 76 | return git("rev-parse --short HEAD"); 77 | } 78 | 79 | if (!is_git_repo) { 80 | print "-unstable "; 81 | print "unstable "; 82 | print "nogit\n"; 83 | exit 0; 84 | } 85 | 86 | my $rev = tagged_version || untagged_version; 87 | 88 | my ($exe_suffix, $full_version, $commit, $dirty) = parse_rev($rev); 89 | print "$exe_suffix "; 90 | print "$full_version "; 91 | print "$commit\n"; 92 | -------------------------------------------------------------------------------- /build-common/cmake/FindCairo.cmake: -------------------------------------------------------------------------------- 1 | # - Try to find Cairo 2 | # Once done, this will define 3 | # 4 | # Cairo_FOUND - system has Cairo 5 | # Cairo_INCLUDE_DIRS - the Cairo include directories 6 | # Cairo_LIBRARIES - link these to use Cairo 7 | 8 | include(LibFindMacros) 9 | 10 | # Dependencies 11 | libfind_package(Cairo Freetype) 12 | 13 | # Use pkg-config to get hints about paths 14 | libfind_pkg_check_modules(Cairo_PKGCONF cairo) 15 | 16 | # Include dir 17 | find_path(Cairo_INCLUDE_DIR 18 | NAMES cairo.h 19 | PATHS ${Cairo_PKGCONF_INCLUDE_DIRS} 20 | PATH_SUFFIXES cairo 21 | ) 22 | 23 | # Finally the library itself 24 | find_library(Cairo_LIBRARY 25 | NAMES cairo 26 | PATHS ${Cairo_PKGCONF_LIBRARY_DIRS} 27 | ) 28 | 29 | # Set the include dir variables and the libraries and let libfind_process do the rest. 30 | # NOTE: Singular variables for this library, plural for libraries this this lib depends on. 31 | set(Cairo_PROCESS_INCLUDES Cairo_INCLUDE_DIR Freetype_INCLUDE_DIR) 32 | set(Cairo_PROCESS_LIBS Cairo_LIBRARY Freetype_LIBRARIES) 33 | libfind_process(Cairo) 34 | message(" - Cairo include dirs: ${Cairo_INCLUDE_DIRS}") 35 | message(" - Cairo libraries: ${Cairo_LIBRARIES}") 36 | 37 | -------------------------------------------------------------------------------- /build-common/cmake/FindCairomm.cmake: -------------------------------------------------------------------------------- 1 | # - Try to find Cairomm 1.0 2 | # Once done, this will define 3 | # 4 | # Cairomm_FOUND - system has Cairomm 5 | # Cairomm_INCLUDE_DIRS - the Cairomm include directories 6 | # Cairomm_LIBRARIES - link these to use Cairomm 7 | 8 | include(LibFindMacros) 9 | 10 | # Dependencies 11 | libfind_package(Cairomm Cairo) 12 | 13 | # Use pkg-config to get hints about paths 14 | libfind_pkg_check_modules(Cairomm_PKGCONF cairomm-1.0) 15 | 16 | # Main include dir 17 | find_path(Cairomm_INCLUDE_DIR 18 | NAMES cairomm/cairomm.h 19 | PATHS ${Cairomm_PKGCONF_INCLUDE_DIRS} 20 | PATH_SUFFIXES cairomm-1.0 21 | ) 22 | 23 | libfind_library(Cairomm cairomm 1.0) 24 | 25 | # Set the include dir variables and the libraries and let libfind_process do the rest. 26 | # NOTE: Singular variables for this library, plural for libraries this this lib depends on. 27 | set(Cairomm_PROCESS_INCLUDES Cairomm_INCLUDE_DIR Cairo_INCLUDE_DIRS) 28 | set(Cairomm_PROCESS_LIBS Cairomm_LIBRARY Cairo_LIBRARIES) 29 | libfind_process(Cairomm) 30 | 31 | # FIXME: figure out how to let libfind locate libsigc++ 32 | set(Cairomm_INCLUDE_DIRS ${Cairomm_PKGCONF_INCLUDE_DIRS}) 33 | 34 | -------------------------------------------------------------------------------- /build-common/cmake/FindFreetype.cmake: -------------------------------------------------------------------------------- 1 | # - Try to find Freetype2 2 | # Once done, this will define 3 | # 4 | # Freetype_FOUND - system has Freetype 5 | # Freetype_INCLUDE_DIRS - the Freetype include directories 6 | # Freetype_LIBRARIES - link these to use Freetype 7 | 8 | include(LibFindMacros) 9 | 10 | # Use pkg-config to get hints about paths 11 | libfind_pkg_check_modules(Freetype_PKGCONF freetype2) 12 | 13 | # Include dir 14 | find_path(Freetype_INCLUDE_DIR 15 | NAMES freetype/freetype.h 16 | PATHS ${Freetype_PKGCONF_INCLUDE_DIRS} 17 | PATH_SUFFIXES freetype2 18 | ) 19 | 20 | # Finally the library itself 21 | find_library(Freetype_LIBRARY 22 | NAMES freetype 23 | PATHS ${Freetype_PKGCONF_LIBRARY_DIRS} 24 | ) 25 | 26 | # Set the include dir variables and the libraries and let libfind_process do the rest. 27 | # NOTE: Singular variables for this library, plural for libraries this this lib depends on. 28 | set(Freetype_PROCESS_INCLUDES Freetype_INCLUDE_DIR) 29 | set(Freetype_PROCESS_LIBS Freetype_LIBRARY) 30 | libfind_process(Freetype) 31 | message(" - Freetype libraries: ${Freetype_LIBRARIES}") 32 | -------------------------------------------------------------------------------- /build-common/cmake/FindHTSlib.cmake: -------------------------------------------------------------------------------- 1 | # - Try to find htslib 2 | # Once done, this will define 3 | # 4 | # htslib_FOUND - system has htslib 5 | # htslib_INCLUDE_DIRS - the htslib include directories 6 | # htslib_LIBRARIES - link these to use htslib 7 | 8 | set(HTSLIB_SEARCH_DIRS 9 | ${HTSLIB_SEARCH_DIRS} 10 | $ENV{HTLSIB_ROOT} 11 | /gsc/pkg/bio/htslib 12 | /usr 13 | /usr/local 14 | ) 15 | 16 | set(_htslib_ver_path "htslib-${htslib_FIND_VERSION}") 17 | include(LibFindMacros) 18 | 19 | # Dependencies 20 | libfind_package(HTSlib ZLIB) 21 | 22 | # Include dir 23 | find_path(HTSlib_INCLUDE_DIR 24 | NAMES ${HTSLIB_ADDITIONAL_HEADERS} sam.h 25 | PATHS ${HTSLIB_SEARCH_DIRS} 26 | PATH_SUFFIXES 27 | include include/htslib htslib/${_htslib_ver_path}/htslib 28 | HINTS ENV HTSLIB_ROOT 29 | ) 30 | 31 | # Finally the library itself 32 | find_library(HTSlib_LIBRARY 33 | NAMES hts libhts.a hts.a 34 | PATHS ${HTSlib_INCLUDE_DIR} ${HTSLIB_SEARCH_DIRS} 35 | NO_DEFAULT_PATH 36 | PATH_SUFFIXES lib lib64 ${_htslib_ver_path} 37 | HINTS ENV HTSLIB_ROOT 38 | ) 39 | 40 | # Set the include dir variables and the libraries and let libfind_process do the rest. 41 | # NOTE: Singular variables for this library, plural for libraries this lib depends on. 42 | set(HTSlib_PROCESS_INCLUDES HTSlib_INCLUDE_DIR ZLIB_INCLUDE_DIR) 43 | set(HTSlib_PROCESS_LIBS HTSlib_LIBRARY ZLIB_LIBRARIES) 44 | libfind_process(HTSlib) 45 | message(STATUS " HTSlib include dirs: ${HTSlib_INCLUDE_DIRS}") 46 | message(STATUS " HTSlib libraries: ${HTSlib_LIBRARIES}") 47 | -------------------------------------------------------------------------------- /build-common/cmake/FindSamtools.cmake: -------------------------------------------------------------------------------- 1 | # - Try to find Samtools 2 | # Once done, this will define 3 | # 4 | # Samtools_FOUND - system has Samtools 5 | # Samtools_INCLUDE_DIRS - the Samtools include directories 6 | # Samtools_LIBRARIES - link these to use Samtools 7 | 8 | set(SAMTOOLS_SEARCH_DIRS 9 | ${SAMTOOLS_SEARCH_DIRS} 10 | $ENV{SAMTOOLS_ROOT} 11 | /gsc/pkg/bio/samtools 12 | /usr 13 | ) 14 | 15 | set(_samtools_ver_path "samtools-${Samtools_FIND_VERSION}") 16 | include(LibFindMacros) 17 | 18 | # Dependencies 19 | libfind_package(Samtools ZLIB) 20 | 21 | # Include dir 22 | find_path(Samtools_INCLUDE_DIR 23 | NAMES ${SAMTOOLS_ADDITIONAL_HEADERS} bam.h 24 | PATHS ${SAMTOOLS_SEARCH_DIRS} 25 | PATH_SUFFIXES 26 | include include/sam include/bam include/samtools${_samtools_ver_path} 27 | include/samtools 28 | HINTS ENV SAMTOOLS_ROOT 29 | ) 30 | 31 | # Finally the library itself 32 | find_library(Samtools_LIBRARY 33 | NAMES bam libbam.a bam.a 34 | PATHS ${Samtools_INCLUDE_DIR} ${SAMTOOLS_SEARCH_DIRS} 35 | NO_DEFAULT_PATH 36 | PATH_SUFFIXES lib lib64 ${_samtools_ver_path} 37 | HINTS ENV SAMTOOLS_ROOT 38 | ) 39 | 40 | # Set the include dir variables and the libraries and let libfind_process do the rest. 41 | # NOTE: Singular variables for this library, plural for libraries this lib depends on. 42 | set(Samtools_PROCESS_INCLUDES Samtools_INCLUDE_DIR ZLIB_INCLUDE_DIR) 43 | set(Samtools_PROCESS_LIBS Samtools_LIBRARY ZLIB_LIBRARIES) 44 | libfind_process(Samtools) 45 | message(" - Samtools include dirs: ${Samtools_INCLUDE_DIRS}") 46 | message(" - Samtools libraries: ${Samtools_LIBRARIES}") 47 | -------------------------------------------------------------------------------- /build-common/cmake/FindTRE.cmake: -------------------------------------------------------------------------------- 1 | # - Try to find TRE 2 | # Once done, this will define 3 | # 4 | # TRE - system has TRE 5 | # TRE_INCLUDE_DIRS - the TRE include directories 6 | # TRE_LIBRARIES - link these to use TRE 7 | 8 | set(TRE_SEARCH_DIRS 9 | ${TRE_SEARCH_DIRS} 10 | $ENV{TRE_ROOT} 11 | /usr 12 | /usr/local 13 | /opt/local 14 | /gsc/pkg/bio/tre 15 | ) 16 | 17 | set(_tre_ver_path "tre-${TRE_FIND_VERSION}") 18 | include(LibFindMacros) 19 | 20 | # Dependencies 21 | libfind_package(TRE ZLIB) 22 | 23 | # Include dir 24 | find_path(TRE_INCLUDE_DIR 25 | NAMES tre/tre.h 26 | PATHS ${TRE_SEARCH_DIRS} 27 | PATH_SUFFIXES include ${_tre_ver_path} 28 | HINTS ENV TRE_ROOT 29 | ) 30 | 31 | # Finally the library itself 32 | find_library(TRE_LIBRARY 33 | NAMES tre libtre.a libtre.so 34 | PATHS lib ${TRE_INCLUDE_DIR} ${TRE_SEARCH_DIRS} 35 | NO_DEFAULT_PATH 36 | PATH_SUFFIXES lib ${_tre_ver_path} 37 | HINTS ENV TRE_ROOT 38 | ) 39 | 40 | # Set the include dir variables and the libraries and let libfind_process do the rest. 41 | # NOTE: Singular variables for this library, plural for libraries this lib depends on. 42 | set(TRE_PROCESS_INCLUDES TRE_INCLUDE_DIR) 43 | set(TRE_PROCESS_LIBS TRE_LIBRARY) 44 | libfind_process(TRE) 45 | message(" - TRE include dirs: ${TRE_INCLUDE_DIRS}") 46 | message(" - TRE libraries: ${TRE_LIBRARIES}") 47 | -------------------------------------------------------------------------------- /build-common/cmake/LibFindMacros.cmake: -------------------------------------------------------------------------------- 1 | # Works the same as find_package, but forwards the "REQUIRED" and "QUIET" arguments 2 | # used for the current package. For this to work, the first parameter must be the 3 | # prefix of the current package, then the prefix of the new package etc, which are 4 | # passed to find_package. 5 | macro (libfind_package PREFIX) 6 | set (LIBFIND_PACKAGE_ARGS ${ARGN}) 7 | if (${PREFIX}_FIND_QUIETLY) 8 | set (LIBFIND_PACKAGE_ARGS ${LIBFIND_PACKAGE_ARGS} QUIET) 9 | endif (${PREFIX}_FIND_QUIETLY) 10 | if (${PREFIX}_FIND_REQUIRED) 11 | set (LIBFIND_PACKAGE_ARGS ${LIBFIND_PACKAGE_ARGS} REQUIRED) 12 | endif (${PREFIX}_FIND_REQUIRED) 13 | find_package(${LIBFIND_PACKAGE_ARGS}) 14 | endmacro (libfind_package) 15 | 16 | # CMake developers made the UsePkgConfig system deprecated in the same release (2.6) 17 | # where they added pkg_check_modules. Consequently I need to support both in my scripts 18 | # to avoid those deprecated warnings. Here's a helper that does just that. 19 | # Works identically to pkg_check_modules, except that no checks are needed prior to use. 20 | macro (libfind_pkg_check_modules PREFIX PKGNAME) 21 | if (${CMAKE_MAJOR_VERSION} EQUAL 2 AND ${CMAKE_MINOR_VERSION} EQUAL 4) 22 | include(UsePkgConfig) 23 | pkgconfig(${PKGNAME} ${PREFIX}_INCLUDE_DIRS ${PREFIX}_LIBRARY_DIRS ${PREFIX}_LDFLAGS ${PREFIX}_CFLAGS) 24 | else (${CMAKE_MAJOR_VERSION} EQUAL 2 AND ${CMAKE_MINOR_VERSION} EQUAL 4) 25 | find_package(PkgConfig) 26 | if (PKG_CONFIG_FOUND) 27 | pkg_check_modules(${PREFIX} ${PKGNAME}) 28 | endif (PKG_CONFIG_FOUND) 29 | endif (${CMAKE_MAJOR_VERSION} EQUAL 2 AND ${CMAKE_MINOR_VERSION} EQUAL 4) 30 | endmacro (libfind_pkg_check_modules) 31 | 32 | # Do the final processing once the paths have been detected. 33 | # If include dirs are needed, ${PREFIX}_PROCESS_INCLUDES should be set to contain 34 | # all the variables, each of which contain one include directory. 35 | # Ditto for ${PREFIX}_PROCESS_LIBS and library files. 36 | # Will set ${PREFIX}_FOUND, ${PREFIX}_INCLUDE_DIRS and ${PREFIX}_LIBRARIES. 37 | # Also handles errors in case library detection was required, etc. 38 | macro (libfind_process PREFIX) 39 | # Skip processing if already processed during this run 40 | if (NOT ${PREFIX}_FOUND) 41 | # Start with the assumption that the library was found 42 | set (${PREFIX}_FOUND TRUE) 43 | 44 | # Process all includes and set _FOUND to false if any are missing 45 | foreach (i ${${PREFIX}_PROCESS_INCLUDES}) 46 | if (${i}) 47 | set (${PREFIX}_INCLUDE_DIRS ${${PREFIX}_INCLUDE_DIRS} ${${i}}) 48 | mark_as_advanced(${i}) 49 | else (${i}) 50 | set (${PREFIX}_FOUND FALSE) 51 | endif (${i}) 52 | endforeach (i) 53 | 54 | # Process all libraries and set _FOUND to false if any are missing 55 | foreach (i ${${PREFIX}_PROCESS_LIBS}) 56 | if (${i}) 57 | set (${PREFIX}_LIBRARIES ${${PREFIX}_LIBRARIES} ${${i}}) 58 | mark_as_advanced(${i}) 59 | else (${i}) 60 | set (${PREFIX}_FOUND FALSE) 61 | endif (${i}) 62 | endforeach (i) 63 | 64 | # Print message and/or exit on fatal error 65 | if (${PREFIX}_FOUND) 66 | if (NOT ${PREFIX}_FIND_QUIETLY) 67 | message (STATUS "Found ${PREFIX} ${${PREFIX}_VERSION}") 68 | endif (NOT ${PREFIX}_FIND_QUIETLY) 69 | else (${PREFIX}_FOUND) 70 | if (${PREFIX}_FIND_REQUIRED) 71 | foreach (i ${${PREFIX}_PROCESS_INCLUDES} ${${PREFIX}_PROCESS_LIBS}) 72 | message("${i}=${${i}}") 73 | endforeach (i) 74 | message (FATAL_ERROR "Required library ${PREFIX} NOT FOUND.\nInstall the library (dev version) and try again. If the library is already installed, use ccmake to set the missing variables manually.") 75 | endif (${PREFIX}_FIND_REQUIRED) 76 | endif (${PREFIX}_FOUND) 77 | endif (NOT ${PREFIX}_FOUND) 78 | endmacro (libfind_process) 79 | 80 | macro(libfind_library PREFIX basename) 81 | set(TMP "") 82 | if(MSVC80) 83 | set(TMP -vc80) 84 | endif(MSVC80) 85 | if(MSVC90) 86 | set(TMP -vc90) 87 | endif(MSVC90) 88 | set(${PREFIX}_LIBNAMES ${basename}${TMP}) 89 | if(${ARGC} GREATER 2) 90 | set(${PREFIX}_LIBNAMES ${basename}${TMP}-${ARGV2}) 91 | string(REGEX REPLACE "\\." "_" TMP ${${PREFIX}_LIBNAMES}) 92 | set(${PREFIX}_LIBNAMES ${${PREFIX}_LIBNAMES} ${TMP}) 93 | endif(${ARGC} GREATER 2) 94 | find_library(${PREFIX}_LIBRARY 95 | NAMES ${${PREFIX}_LIBNAMES} 96 | PATHS ${${PREFIX}_PKGCONF_LIBRARY_DIRS} 97 | ) 98 | endmacro(libfind_library) 99 | 100 | -------------------------------------------------------------------------------- /build-common/cmake/PreDepends.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | # PreDepends.cmake 3 | # 4 | # Macros to cope with the fact that cmake doesn't support the notion 5 | # of dependencies that must be built before any other target when 6 | # doing parallel builds. 7 | # 8 | # SYNOPSIS: 9 | # 10 | # Include this file in your top level CMakeLists.txt and call 11 | # 12 | # PredependsInit() 13 | # 14 | # precisely once. 15 | # 16 | # Then, use 17 | 18 | # ExternalDependency_Add( BUILD_BYPRODUCTS ARGS ...) 19 | # instead of 20 | # ExternalProject_Add( BUILD_BYPRODUCTS ...) 21 | 22 | # xadd_executable( ...) 23 | # instead of 24 | # add_executable( ...) 25 | 26 | # xadd_library( ...) 27 | # instead of 28 | # add_library( ...) 29 | 30 | 31 | # This will ensure that the external dependencies get built before 32 | # any targets added with xadd_{executable,library}. 33 | 34 | 35 | # METHODS: 36 | # This module sets up a dummy target "__bc_predepends" and provides a 37 | # macro: 38 | # 39 | # add_predepend(foo) 40 | # 41 | # This macro makes the __bc_predepends target depend on foo. It can 42 | # be used directly, e.g., 43 | # 44 | # ExternalProject_Add(foo BUILD_BYPRODUCTS path/to/libfoo.a ...) 45 | # add_predepend(foo) 46 | # 47 | # but the typical usage is to call 48 | # ExternalDependency_Add(foo BUILD_BYPRODUCTS path/to/libfoo.a ARGS ...) 49 | # instead. 50 | # 51 | # We provide another set of macros: 52 | # 53 | # xadd_executable(bar bar.c) 54 | # xadd_library(baz baz.c) 55 | # 56 | # These function exactly like the native add_executable/add_library 57 | # commands, but they also add a dependency on __bc_predepends 58 | # to the target being generated (bar and baz in this case). 59 | # This forces any registered predepends targets to be built first. 60 | # In the example at hand, we get the following dependency graph: 61 | # 62 | # bar -----------> __bc_predepends -> foo 63 | # libbaz.a ----` 64 | # 65 | # where a -> b is pronounced "a depends on b". 66 | # 67 | # NOTE: cmake 3.2+ is required to use the Ninja generator 68 | cmake_minimum_required(VERSION 2.8) 69 | 70 | set(PREDEPENDS_TARGET_NAME "__bc_predepends") 71 | 72 | # Deal with multiple inclusion of this file 73 | macro(PreDependsInit) 74 | add_custom_target(${PREDEPENDS_TARGET_NAME} ALL) 75 | endmacro() 76 | 77 | macro(add_predepend __TARGET_NAME) 78 | add_dependencies(${PREDEPENDS_TARGET_NAME} ${__TARGET_NAME}) 79 | endmacro() 80 | 81 | macro(xadd_executable __TARGET_NAME) 82 | add_executable(${__TARGET_NAME} ${ARGN}) 83 | add_dependencies(${__TARGET_NAME} ${PREDEPENDS_TARGET_NAME}) 84 | endmacro() 85 | 86 | macro(xadd_library __TARGET_NAME) 87 | add_library(${__TARGET_NAME} ${ARGN}) 88 | add_dependencies(${__TARGET_NAME} ${PREDEPENDS_TARGET_NAME}) 89 | endmacro() 90 | 91 | macro(ExternalDependency_Add NAME) 92 | set(multiValueArgs BUILD_BYPRODUCTS ARGS) 93 | cmake_parse_arguments(extdep_add "" "" "${multiValueArgs}" ${ARGN}) 94 | 95 | # Listing the byproducts is not needed for the "Unix Makefiles" generator. 96 | # It is, however, required for Ninja. I don't know about any of the other 97 | # generators... 98 | unset(BYPRODUCTS_LIST) 99 | if (CMAKE_GENERATOR MATCHES "Ninja") 100 | if(CMAKE_VERSION VERSION_LESS "3.2") 101 | message(FATAL_ERROR "The Ninja generator requires CMake 3.2+. Try the \"Unix Makefiles\" generator instead.") 102 | endif() 103 | set(BYPRODUCTS_LIST BUILD_BYPRODUCTS "${extdep_add_BUILD_BYPRODUCTS}") 104 | endif() 105 | 106 | set(arg_list "${extdep_add_ARGS}") 107 | ExternalProject_Add( 108 | ${NAME} 109 | "${BYPRODUCTS_LIST}" 110 | "${arg_list}" 111 | ) 112 | add_predepend(${NAME}) 113 | endmacro() 114 | -------------------------------------------------------------------------------- /build-common/cmake/ProjectHelper.cmake: -------------------------------------------------------------------------------- 1 | set(CMAKE_CXX_FLAGS_PACKAGE ${CMAKE_CXX_FLAGS_RELEASE}) 2 | set(CMAKE_C_FLAGS_PACKAGE ${CMAKE_CXX_FLAGS_RELEASE}) 3 | 4 | if (CMAKE_COMPILER_IS_GNUCC AND CMAKE_COMPILER_IS_GNUCXX) 5 | SET(CMAKE_CXX_FLAGS_COVERAGE "${CMAKE_CXX_FLAGS_DEBUG} --coverage") 6 | SET(CMAKE_C_FLAGS_COVERAGE "${CMAKE_C_FLAGS_DEBUG} --coverage") 7 | SET(CMAKE_EXE_LINKER_FLAGS_COVERAGE "${CMAKE_EXE_LINKER_FLAGS_DEBUG} --coverage") 8 | endif () 9 | 10 | 11 | if (NOT CMAKE_BUILD_TYPE) 12 | set(CMAKE_BUILD_TYPE release CACHE STRING 13 | "Options: None Debug Release Package RelWithDebInfo MinSizeRel Coverage." FORCE) 14 | message("No CMAKE_BUILD_TYPE specified, defaulting to release") 15 | endif () 16 | 17 | macro(add_projects dir) 18 | file(GLOB projects RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "${dir}/[a-z]*") 19 | foreach( proj ${projects} ) 20 | add_subdirectory( ${proj} ${PROJECT_BINARY_DIR}/build/${proj} ) 21 | endforeach(proj ${projects}) 22 | endmacro(add_projects dir) 23 | -------------------------------------------------------------------------------- /build-common/cmake/TestHelper.cmake: -------------------------------------------------------------------------------- 1 | # Build/configure gtest 2 | 3 | find_package(Threads REQUIRED) 4 | link_libraries(${CMAKE_THREAD_LIBS_INIT}) 5 | 6 | include(ExternalProject) 7 | set_directory_properties(PROPERTIES 8 | EP_PREFIX ${CMAKE_BINARY_DIR}/vendor) 9 | 10 | set(GTEST_LIB_DIR ${CMAKE_BINARY_DIR}/vendor/gtest160-build) 11 | ExternalProject_Add( 12 | gtest160 13 | URL ${CMAKE_CURRENT_SOURCE_DIR}/build-common/vendor/gtest-1.6.0.tar.gz 14 | INSTALL_COMMAND "" 15 | CMAKE_ARGS -DCMAKE_CXX_FLAGS=-DGTEST_USE_OWN_TR1_TUPLE=1 16 | BINARY_DIR ${GTEST_LIB_DIR} 17 | ) 18 | ExternalProject_Get_Property(gtest160 source_dir) 19 | include_directories(${source_dir}/include) 20 | 21 | set(GTEST_LIBRARY 22 | ${GTEST_LIB_DIR}/${CMAKE_FIND_LIBRARY_PREFIXES}gtest${CMAKE_STATIC_LIBRARY_SUFFIX} 23 | ) 24 | 25 | set(GTEST_MAIN_LIBRARY 26 | ${GTEST_LIB_DIR}/${CMAKE_FIND_LIBRARY_PREFIXES}gtest_main${CMAKE_STATIC_LIBRARY_SUFFIX} 27 | ) 28 | 29 | add_library(gtest STATIC IMPORTED) 30 | set_property(TARGET gtest PROPERTY IMPORTED_LOCATION ${GTEST_LIBRARY}) 31 | add_library(gtest_main STATIC IMPORTED) 32 | set_property(TARGET gtest_main PROPERTY IMPORTED_LOCATION ${GTEST_MAIN_LIBRARY}) 33 | 34 | macro(add_unit_tests test_name) 35 | set(src_files ${ARGN}) 36 | add_executable(${test_name} ${src_files} ${COMMON_SOURCES}) 37 | set_target_properties(${test_name} PROPERTIES COMPILE_FLAGS -DGTEST_USE_OWN_TR1_TUPLE=1) 38 | target_link_libraries(${test_name} ${TEST_LIBS} gtest gtest_main ${CMAKE_THREAD_LIBS_INIT}) 39 | add_dependencies(${test_name} gtest160) 40 | if($ENV{BC_UNIT_TEST_VG}) 41 | add_test( 42 | NAME ${test_name} 43 | COMMAND valgrind --leak-check=full --error-exitcode=1 $ 44 | ) 45 | else() 46 | add_test(NAME ${test_name} COMMAND ${test_name}) 47 | endif() 48 | 49 | set_tests_properties(${test_name} PROPERTIES LABELS unit) 50 | endmacro(add_unit_tests test_name src_files) 51 | 52 | macro(def_test testName) 53 | add_executable(Test${testName} Test${testName}.cpp ${COMMON_SOURCES}) 54 | set_target_properties(${test_name} PROPERTIES COMPILE_FLAGS -DGTEST_USE_OWN_TR1_TUPLE=1) 55 | target_link_libraries(Test${testName} ${TEST_LIBS} gtest gtest_main ${CMAKE_THREAD_LIBS_INIT}) 56 | add_dependencies(Test${testName} gtest160) 57 | if($ENV{BC_UNIT_TEST_VG}) 58 | add_test(NAME Test${testName} COMMAND valgrind --leak-check=full --error-exitcode=1 $) 59 | else() 60 | add_test(NAME Test${testName} COMMAND Test${testName}) 61 | endif() 62 | 63 | set_tests_properties(Test${testName} PROPERTIES LABELS unit) 64 | endmacro(def_test testName) 65 | 66 | macro(def_integration_test exe_tgt testName script) 67 | set(PPATH "${BC_PYTHONPATH_EXTRA}:${CMAKE_SOURCE_DIR}/build-common/python:$ENV{PYTHONPATH}") 68 | add_test( 69 | NAME ${testName} 70 | COMMAND sh -ec "PYTHONPATH='${PPATH}' ${CMAKE_CURRENT_SOURCE_DIR}/${script} $" 71 | ) 72 | set_tests_properties(${testName} PROPERTIES LABELS integration) 73 | endmacro(def_integration_test testName script) 74 | -------------------------------------------------------------------------------- /build-common/cmake/VersionHelper.cmake: -------------------------------------------------------------------------------- 1 | execute_process( 2 | COMMAND ${CMAKE_SOURCE_DIR}/build-common/bin/gitrev.pl 3 | WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} 4 | OUTPUT_VARIABLE VERSION_DATA 5 | ERROR_VARIABLE VERSION_ERR 6 | OUTPUT_STRIP_TRAILING_WHITESPACE 7 | ) 8 | separate_arguments(VERSION_DATA) 9 | list(GET VERSION_DATA 0 EXE_VERSION_SUFFIX) 10 | list(GET VERSION_DATA 1 FULL_VERSION) 11 | list(GET VERSION_DATA 2 COMMIT_HASH) 12 | 13 | set(DEFAULT_ETC_ALTERNATIVES_PRIORITY 10) 14 | if(${EXE_VERSION_SUFFIX} STREQUAL "-unstable") 15 | set(DEFAULT_ETC_ALTERNATIVES_PRIORITY 0) 16 | endif() 17 | 18 | if (NOT PACKAGE_VERSION_SUFFIX) 19 | set(PACKAGE_VERSION_SUFFIX "") 20 | endif () 21 | -------------------------------------------------------------------------------- /build-common/python/ctest_convert.py: -------------------------------------------------------------------------------- 1 | from lxml import etree 2 | import StringIO 3 | import sys 4 | 5 | TAGfile = open(sys.argv[1]+"/Testing/TAG", 'r') 6 | dirname = TAGfile.readline().strip() 7 | 8 | xmlfile = open(sys.argv[1]+"/Testing/"+dirname+"/Test.xml", 'r') 9 | xslfile = open(sys.argv[2], 'r') 10 | 11 | xmlcontent = xmlfile.read() 12 | xslcontent = xslfile.read() 13 | 14 | xmldoc = etree.parse(StringIO.StringIO(xmlcontent)) 15 | xslt_root = etree.XML(xslcontent) 16 | transform = etree.XSLT(xslt_root) 17 | 18 | result_tree = transform(xmldoc) 19 | print(result_tree) 20 | -------------------------------------------------------------------------------- /build-common/python/integrationtest.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from glob import glob 4 | from subprocess import Popen, PIPE 5 | from valgrindwrapper import ValgrindWrapper 6 | import difflib 7 | import os 8 | import re 9 | import shlex 10 | import shutil 11 | import sys 12 | import tempfile 13 | import unittest 14 | 15 | class IntegrationTest(): 16 | exe_path = None 17 | def setUp(self): 18 | self.test_dir = os.path.dirname(sys.argv[0]) 19 | self.data_dir = os.path.join(self.test_dir, "data") 20 | self.tmp_dir = tempfile.mkdtemp() 21 | 22 | def tearDown(self): 23 | shutil.rmtree(self.tmp_dir) 24 | 25 | def inputFiles(self, *names): 26 | rv = [] 27 | for n in names: 28 | rv.extend(sorted(glob(os.path.join(self.data_dir, n)))) 29 | if len(rv) == 0: 30 | raise IOError("No file matching %s not found in %s" %( 31 | ", ".join(names), 32 | self.data_dir) 33 | ) 34 | return rv 35 | 36 | def execute(self, args): 37 | cmdline = "%s %s" %(self.exe_path, " ".join(args)) 38 | vglog_file = self.tempFile("valgrind.log") 39 | return ValgrindWrapper(shlex.split(cmdline), vglog_file).run() 40 | 41 | def tempFile(self, name): 42 | return os.path.join(self.tmp_dir, name) 43 | 44 | def assertFilesEqual(self, first, second, msg=None, filter_regex=None): 45 | first_data = open(first).readlines() 46 | second_data = open(second).readlines() 47 | if filter_regex: 48 | first_data = [x for x in first_data if not re.match(filter_regex, x)] 49 | second_data = [x for x in second_data if not re.match(filter_regex, x)] 50 | self.assertMultiLineEqual("".join(first_data), "".join(second_data)) 51 | 52 | 53 | def assertMultiLineEqual(self, first, second, msg=None): 54 | """Assert that two multi-line strings are equal. 55 | If they aren't, show a nice diff. 56 | """ 57 | self.assertTrue(isinstance(first, str), 58 | 'First argument is not a string') 59 | self.assertTrue(isinstance(second, str), 60 | 'Second argument is not a string') 61 | 62 | if first != second: 63 | message = ''.join(difflib.ndiff(first.splitlines(True), 64 | second.splitlines(True))) 65 | if msg: 66 | message += " : " + msg 67 | self.fail("Multi-line strings are unequal:\n" + message) 68 | 69 | def main(): 70 | if len(sys.argv) < 2: 71 | print "Error: required argument (path to test executable) missing" 72 | sys.exit(1) 73 | IntegrationTest.exe_path = sys.argv.pop() 74 | unittest.main() 75 | -------------------------------------------------------------------------------- /build-common/python/valgrindwrapper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from subprocess import Popen, PIPE 4 | import os 5 | import re 6 | 7 | class ValgrindWrapper: 8 | vg_path = None 9 | disable_var = "BCINT_TEST_NO_VALGRIND" 10 | 11 | def __init__(self, command, vglog_file): 12 | self.command = command 13 | self.vglog_file = vglog_file 14 | if self.vg_path == None: 15 | for d in os.environ["PATH"].split(os.pathsep): 16 | path = os.path.join(d, "valgrind") 17 | if os.path.exists(path) and os.access(path, os.X_OK): 18 | self.vg_path = path 19 | break 20 | 21 | def have_valgrind(self): 22 | return self.vg_path != None and os.getenv(self.disable_var) == None 23 | 24 | def run(self): 25 | cmd = self.command 26 | 27 | if self.have_valgrind(): 28 | cmd[:0] = [ 29 | "valgrind", 30 | "--error-exitcode=1", 31 | "--leak-check=full", 32 | "--log-file=%s" %self.vglog_file, 33 | ] 34 | 35 | p = Popen(cmd, stderr=PIPE, close_fds=True) 36 | out, err = p.communicate(None) 37 | if not self.leak_free(): 38 | raise RuntimeError( 39 | "Possible memory leaks detected in command %s" %(" ".join(cmd)) 40 | ) 41 | return p.returncode, err 42 | 43 | def leak_free(self): 44 | if self.have_valgrind() == False: 45 | return True 46 | 47 | log_contents = open(self.vglog_file).read() 48 | m = re.search("no leaks are possible", log_contents) 49 | return m != None 50 | -------------------------------------------------------------------------------- /build-common/resources/ctest_conv.xsl: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | BuildName: 56 | BuildStamp: 57 | Name: 58 | Generator: 59 | CompilerName: 60 | OSName: 61 | Hostname: 62 | OSRelease: 63 | OSVersion: 64 | OSPlatform: 65 | Is64Bits: 66 | VendorString: 67 | VendorID: 68 | FamilyID: 69 | ModelID: 70 | ProcessorCacheSize: 71 | NumberOfLogicalCPU: 72 | NumberOfPhysicalCPU: 73 | TotalVirtualMemory: 74 | TotalPhysicalMemory: 75 | LogicalProcessorsPerPhysical: 76 | ProcessorClockFrequency: 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | -------------------------------------------------------------------------------- /build-common/vendor/gtest-1.6.0.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genome/bam-readcount/c7c76e6b84de9ed774ea3865fedc04b2d6276865/build-common/vendor/gtest-1.6.0.tar.gz -------------------------------------------------------------------------------- /cmake/BuildBoost.cmake: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | 3 | set(DEFAULT_BOOST_URL ${CMAKE_SOURCE_DIR}/vendor/boost-1.55-bamrc.tar.gz) 4 | if(NOT DEFINED BOOST_URL) 5 | set(BOOST_URL ${DEFAULT_BOOST_URL}) 6 | endif(NOT DEFINED BOOST_URL) 7 | 8 | 9 | set(BOOST_ROOT ${CMAKE_BINARY_DIR}/vendor/boost) 10 | set(BOOST_SRC ${CMAKE_BINARY_DIR}/vendor/boost-src) 11 | set(BOOST_LOG ${BOOST_SRC}/build.log) 12 | 13 | set(BOOST_BUILD_LIBS_STRING "") 14 | 15 | foreach(libname ${REQUIRED_BOOST_LIBS}) 16 | set(BOOST_BUILD_LIBS ${BOOST_BUILD_LIBS} --with-${libname}) 17 | set(Boost_LIBRARIES ${Boost_LIBRARIES} 18 | ${BOOST_ROOT}/lib/${CMAKE_FIND_LIBRARY_PREFIXES}boost_${libname}${CMAKE_STATIC_LIBRARY_SUFFIX} 19 | ) 20 | endforeach(libname ${REQUIRED_BOOST_LIBS}) 21 | 22 | message("Extracting boost from ${BOOST_URL}") 23 | message("Boost build log can be found at ${BOOST_LOG}") 24 | 25 | ExternalDependency_Add( 26 | boost-1.55 27 | BUILD_BYPRODUCTS ${Boost_LIBRARIES} 28 | ARGS 29 | URL ${BOOST_URL} 30 | SOURCE_DIR ${BOOST_SRC} 31 | BINARY_DIR ${BOOST_SRC} 32 | CONFIGURE_COMMAND "./bootstrap.sh" 33 | BUILD_COMMAND 34 | echo "Building boost, build log is ${BOOST_LOG}" && 35 | ./b2 --prefix=${BOOST_ROOT} --layout=system link=static 36 | threading=multi install ${BOOST_BUILD_LIBS} > ${BOOST_LOG} 2>&1 37 | INSTALL_COMMAND "true" 38 | ) 39 | 40 | set(Boost_INCLUDE_DIRS ${BOOST_ROOT}/include) 41 | -------------------------------------------------------------------------------- /cmake/BuildSamtools.cmake: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | 3 | set(SAMTOOLS_VERSION 1.10) 4 | 5 | set(SAMTOOLS_ROOT ${CMAKE_BINARY_DIR}/vendor/samtools-${SAMTOOLS_VERSION}) 6 | set(SAMTOOLS_LOG ${CMAKE_BINARY_DIR}/cmake_samtools_build.log) 7 | set(HTSLIB_LOG ${CMAKE_BINARY_DIR}/cmake_htslib_build.log) 8 | set(SAMTOOLS_LIB ${SAMTOOLS_ROOT}/${CMAKE_FIND_LIBRARY_PREFIXES}bam${CMAKE_STATIC_LIBRARY_SUFFIX}) 9 | #set(SAMTOOLS_BIN ${SAMTOOLS_ROOT}/samtools) 10 | 11 | set(HTSLIB_ROOT ${SAMTOOLS_ROOT}/htslib-${SAMTOOLS_VERSION}) 12 | set(HTSLIB_LIB ${HTSLIB_ROOT}/${CMAKE_FIND_LIBRARY_PREFIXES}hts${CMAKE_STATIC_LIBRARY_SUFFIX}) 13 | 14 | cmake_print_variables(SAMTOOLS_ROOT) 15 | cmake_print_variables(SAMTOOLS_LIB) 16 | cmake_print_variables(HTSLIB_LIB) 17 | 18 | set(ZLIB_ROOT ${CMAKE_BINARY_DIR}/vendor/zlib) 19 | set(ZLIB_SRC ${CMAKE_BINARY_DIR}/vendor/zlib-src) 20 | set(ZLIB_INCLUDE_DIRS ${ZLIB_ROOT}/include) 21 | set(ZLIB_LIBRARIES ${ZLIB_ROOT}/lib/${CMAKE_FIND_LIBRARY_PREFIXES}z${CMAKE_STATIC_LIBRARY_SUFFIX}) 22 | ExternalProject_Add( 23 | zlib 24 | BUILD_BYPRODUCTS ${ZLIB_LIBRARIES} 25 | ARGS 26 | URL ${CMAKE_SOURCE_DIR}/vendor/zlib-1.2.11.tar.gz 27 | SOURCE_DIR ${ZLIB_SRC} 28 | BINARY_DIR ${ZLIB_SRC} 29 | CONFIGURE_COMMAND ./configure --prefix=${ZLIB_ROOT} 30 | BUILD_COMMAND make 31 | INSTALL_COMMAND make install 32 | ) 33 | 34 | set(XZ_ROOT ${CMAKE_BINARY_DIR}/vendor/xz) 35 | set(XZ_SRC ${CMAKE_BINARY_DIR}/vendor/xz-src) 36 | set(XZ_INCLUDE_DIRS ${XZ_ROOT}/include) 37 | set(XZ_LIBRARIES ${XZ_ROOT}/lib/${CMAKE_FIND_LIBRARY_PREFIXES}lzma${CMAKE_STATIC_LIBRARY_SUFFIX}) 38 | ExternalProject_Add( 39 | xz 40 | BUILD_BYPRODUCTS ${XZ_LIBRARIES} 41 | ARGS 42 | URL ${CMAKE_SOURCE_DIR}/vendor/xz-5.2.4.tar.gz 43 | SOURCE_DIR ${XZ_SRC} 44 | BINARY_DIR ${XZ_SRC} 45 | CONFIGURE_COMMAND ./configure --prefix=${XZ_ROOT} 46 | BUILD_COMMAND make 47 | INSTALL_COMMAND make install 48 | ) 49 | 50 | set(BZIP2_ROOT ${CMAKE_BINARY_DIR}/vendor/bzip2) 51 | set(BZIP2_INCLUDE_DIRS ${BZIP2_ROOT}) 52 | set(BZIP2_LIBRARIES ${BZIP2_ROOT}/${CMAKE_FIND_LIBRARY_PREFIXES}bz2${CMAKE_STATIC_LIBRARY_SUFFIX}) 53 | ExternalProject_Add( 54 | bzip2 55 | BUILD_BYPRODUCTS ${BZIP2_LIBRARIES} 56 | ARGS 57 | URL ${CMAKE_SOURCE_DIR}/vendor/bzip2-1.0.8.tar.gz 58 | SOURCE_DIR ${BZIP2_ROOT} 59 | BINARY_DIR ${BZIP2_ROOT} 60 | CONFIGURE_COMMAND echo "Building bzip2 library" 61 | BUILD_COMMAND make 62 | INSTALL_COMMAND true 63 | ) 64 | 65 | set(MBEDTLS_ROOT ${CMAKE_BINARY_DIR}/vendor/mbedtls) 66 | set(MBEDTLS_SRC ${CMAKE_BINARY_DIR}/vendor/mbedtls-src) 67 | set(MBEDTLS_INCLUDE_DIRS ${MBEDTLS_ROOT}/include) 68 | set(MBEDTLS_LIBRARIES ${MBEDTLS_ROOT}/lib/${CMAKE_FIND_LIBRARY_PREFIXES}mbedtls${CMAKE_STATIC_LIBRARY_SUFFIX} ${MBEDTLS_ROOT}/lib/${CMAKE_FIND_LIBRARY_PREFIXES}mbedx509${CMAKE_STATIC_LIBRARY_SUFFIX} ${MBEDTLS_ROOT}/lib/${CMAKE_FIND_LIBRARY_PREFIXES}mbedcrypto${CMAKE_STATIC_LIBRARY_SUFFIX}) 69 | ExternalProject_Add( 70 | mbedtls 71 | BUILD_BYPRODUCTS ${MBEDTLS_LIBRARIES} 72 | ARGS 73 | URL ${CMAKE_SOURCE_DIR}/vendor/mbedtls-2.16.4-apache.tgz 74 | SOURCE_DIR ${MBEDTLS_SRC} 75 | BINARY_DIR ${MBEDTLS_SRC} 76 | CONFIGURE_COMMAND echo "Building mbedTLS with make lib" 77 | BUILD_COMMAND make lib 78 | INSTALL_COMMAND make DESTDIR=${MBEDTLS_ROOT} install 79 | ) 80 | 81 | set(CURL_ROOT ${CMAKE_BINARY_DIR}/vendor/curl) 82 | set(CURL_SRC ${CMAKE_BINARY_DIR}/vendor/curl-src) 83 | set(CURL_INCLUDE_DIRS ${CURL_ROOT}/include) 84 | set(CURL_LIBRARIES ${CURL_ROOT}/lib/${CMAKE_FIND_LIBRARY_PREFIXES}curl${CMAKE_STATIC_LIBRARY_SUFFIX}) 85 | ExternalProject_Add( 86 | curl 87 | BUILD_BYPRODUCTS ${CURL_LIBRARIES} 88 | ARGS 89 | URL ${CMAKE_SOURCE_DIR}/vendor/curl-7.67.0.tar.gz 90 | SOURCE_DIR ${CURL_SRC} 91 | BINARY_DIR ${CURL_SRC} 92 | # Disable everything we can except mbed with extreme prejudice 93 | # --disable-ldap and --disable-ldaps should take care of -lldap and -llber 94 | # which were causing problems on my OS X machine 95 | # RTSP remains enabled in the minimal build container, 96 | # so we leave out --disable-rtsp 97 | CONFIGURE_COMMAND ./configure --prefix=${CURL_ROOT} --with-mbedtls=${MBEDTLS_ROOT} --without-zlib --without-brotli --without-winssl --without-schannel --without-darwinssl --without-secure-transport --without-amissl --without-ssl --without-gnutls --without-wolfssl --without-mesalink --without-nss --without-libpsl --without-libmetalink --without-librtmp --without-winidn --without-libidn2 --without-nghttp2 --without-ngtcp2 --without-nghttp3 --without-quiche --without-zsh-functions-dir --without-fish-functions-dir --disable-ldap --disable-ldaps 98 | BUILD_COMMAND make 99 | INSTALL_COMMAND make install 100 | DEPENDS mbedtls 101 | ) 102 | 103 | ExternalProject_Add( 104 | samtools-lib 105 | BUILD_BYPRODUCTS ${SAMTOOLS_LIB} ${HTSLIB_LIB} 106 | ARGS 107 | URL ${CMAKE_SOURCE_DIR}/vendor/samtools-1.10.tar.bz2 108 | SOURCE_DIR ${SAMTOOLS_ROOT} 109 | BINARY_DIR ${SAMTOOLS_ROOT} 110 | #CONFIGURE_COMMAND C_INCLUDE_PATH=${ZLIB_INCLUDE_DIRS}:${BZIP2_INCLUDE_DIRS} ./configure --without-curses 111 | #CONFIGURE_COMMAND ./configure --without-curses 112 | #PATCH_COMMAND patch -p2 -t -N < ${CMAKE_SOURCE_DIR}/vendor/Makefile.disable_curl.patch 113 | CONFIGURE_COMMAND echo "Building samtools, build log at ${SAMTOOLS_LOG}" 114 | BUILD_COMMAND make libbam.a > ${SAMTOOLS_LOG} 2>&1 && 115 | cd htslib-${SAMTOOLS_VERSION} && 116 | C_INCLUDE_PATH=${ZLIB_INCLUDE_DIRS}:${BZIP2_INCLUDE_DIRS}:${XZ_INCLUDE_DIRS}:${CURL_INCLUDE_DIRS} make libhts.a > ${HTSLIB_LOG} 2>&1 117 | INSTALL_COMMAND true 118 | DEPENDS zlib bzip2 xz curl 119 | ) 120 | 121 | 122 | set(Samtools_INCLUDE_DIRS ${SAMTOOLS_ROOT}) 123 | set(Samtools_LIBRARIES ${SAMTOOLS_LIB}) 124 | 125 | set(Htslib_INCLUDE_DIRS ${HTSLIB_ROOT}) 126 | set(Htslib_LIBRARIES ${HTSLIB_LIB}) 127 | 128 | set(Support_INCLUDE_DIRS ${ZLIB_INCLUDE_DIRS} ${BZIP2_INCLUDE_DIRS} ${XZ_INCLUDE_DIRS} ${CURL_INCLUDE_DIRS} ${MBEDTLS_INCLUDE_DIRS}) 129 | set(Support_LIBRARIES pthread ${ZLIB_LIBRARIES} ${BZIP2_LIBRARIES} ${XZ_LIBRARIES} ${CURL_LIBRARIES} ${MBEDTLS_LIBRARIES}) 130 | -------------------------------------------------------------------------------- /cmake/CodeCoverage.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012 - 2017, Lars Bilke 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without modification, 5 | # are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its contributors 15 | # may be used to endorse or promote products derived from this software without 16 | # specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 22 | # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 25 | # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # CHANGES: 30 | # 31 | # 2012-01-31, Lars Bilke 32 | # - Enable Code Coverage 33 | # 34 | # 2013-09-17, Joakim Söderberg 35 | # - Added support for Clang. 36 | # - Some additional usage instructions. 37 | # 38 | # 2016-02-03, Lars Bilke 39 | # - Refactored functions to use named parameters 40 | # 41 | # 2017-06-02, Lars Bilke 42 | # - Merged with modified version from github.com/ufz/ogs 43 | # 44 | # 2019-05-06, Anatolii Kurotych 45 | # - Remove unnecessary --coverage flag 46 | # 47 | # 2019-12-13, FeRD (Frank Dana) 48 | # - Deprecate COVERAGE_LCOVR_EXCLUDES and COVERAGE_GCOVR_EXCLUDES lists in favor 49 | # of tool-agnostic COVERAGE_EXCLUDES variable, or EXCLUDE setup arguments. 50 | # - CMake 3.4+: All excludes can be specified relative to BASE_DIRECTORY 51 | # - All setup functions: accept BASE_DIRECTORY, EXCLUDE list 52 | # - Set lcov basedir with -b argument 53 | # - Add automatic --demangle-cpp in lcovr, if 'c++filt' is available (can be 54 | # overridden with NO_DEMANGLE option in setup_target_for_coverage_lcovr().) 55 | # - Delete output dir, .info file on 'make clean' 56 | # - Remove Python detection, since version mismatches will break gcovr 57 | # - Minor cleanup (lowercase function names, update examples...) 58 | # 59 | # 2019-12-19, FeRD (Frank Dana) 60 | # - Rename Lcov outputs, make filtered file canonical, fix cleanup for targets 61 | # 62 | # 2020-01-19, Bob Apthorpe 63 | # - Added gfortran support 64 | # 65 | # 2020-02-17, FeRD (Frank Dana) 66 | # - Make all add_custom_target()s VERBATIM to auto-escape wildcard characters 67 | # in EXCLUDEs, and remove manual escaping from gcovr targets 68 | # 69 | # 2021-01-19, Robin Mueller 70 | # - Add CODE_COVERAGE_VERBOSE option which will allow to print out commands which are run 71 | # - Added the option for users to set the GCOVR_ADDITIONAL_ARGS variable to supply additional 72 | # flags to the gcovr command 73 | # 74 | # 2020-05-04, Mihchael Davis 75 | # - Add -fprofile-abs-path to make gcno files contain absolute paths 76 | # - Fix BASE_DIRECTORY not working when defined 77 | # - Change BYPRODUCT from folder to index.html to stop ninja from complaining about double defines 78 | # 79 | # 2021-05-10, Martin Stump 80 | # - Check if the generator is multi-config before warning about non-Debug builds 81 | # 82 | # USAGE: 83 | # 84 | # 1. Copy this file into your cmake modules path. 85 | # 86 | # 2. Add the following line to your CMakeLists.txt (best inside an if-condition 87 | # using a CMake option() to enable it just optionally): 88 | # include(CodeCoverage) 89 | # 90 | # 3. Append necessary compiler flags: 91 | # append_coverage_compiler_flags() 92 | # 93 | # 3.a (OPTIONAL) Set appropriate optimization flags, e.g. -O0, -O1 or -Og 94 | # 95 | # 4. If you need to exclude additional directories from the report, specify them 96 | # using full paths in the COVERAGE_EXCLUDES variable before calling 97 | # setup_target_for_coverage_*(). 98 | # Example: 99 | # set(COVERAGE_EXCLUDES 100 | # '${PROJECT_SOURCE_DIR}/src/dir1/*' 101 | # '/path/to/my/src/dir2/*') 102 | # Or, use the EXCLUDE argument to setup_target_for_coverage_*(). 103 | # Example: 104 | # setup_target_for_coverage_lcov( 105 | # NAME coverage 106 | # EXECUTABLE testrunner 107 | # EXCLUDE "${PROJECT_SOURCE_DIR}/src/dir1/*" "/path/to/my/src/dir2/*") 108 | # 109 | # 4.a NOTE: With CMake 3.4+, COVERAGE_EXCLUDES or EXCLUDE can also be set 110 | # relative to the BASE_DIRECTORY (default: PROJECT_SOURCE_DIR) 111 | # Example: 112 | # set(COVERAGE_EXCLUDES "dir1/*") 113 | # setup_target_for_coverage_gcovr_html( 114 | # NAME coverage 115 | # EXECUTABLE testrunner 116 | # BASE_DIRECTORY "${PROJECT_SOURCE_DIR}/src" 117 | # EXCLUDE "dir2/*") 118 | # 119 | # 5. Use the functions described below to create a custom make target which 120 | # runs your test executable and produces a code coverage report. 121 | # 122 | # 6. Build a Debug build: 123 | # cmake -DCMAKE_BUILD_TYPE=Debug .. 124 | # make 125 | # make my_coverage_target 126 | # 127 | 128 | include(CMakeParseArguments) 129 | 130 | option(CODE_COVERAGE_VERBOSE "Verbose information" FALSE) 131 | 132 | # Check prereqs 133 | find_program( GCOV_PATH gcov ) 134 | find_program( LCOV_PATH NAMES lcov lcov.bat lcov.exe lcov.perl) 135 | find_program( FASTCOV_PATH NAMES fastcov fastcov.py ) 136 | find_program( GENHTML_PATH NAMES genhtml genhtml.perl genhtml.bat ) 137 | find_program( GCOVR_PATH gcovr PATHS ${CMAKE_SOURCE_DIR}/scripts/test) 138 | find_program( CPPFILT_PATH NAMES c++filt ) 139 | 140 | if(NOT GCOV_PATH) 141 | message(FATAL_ERROR "gcov not found! Aborting...") 142 | endif() # NOT GCOV_PATH 143 | 144 | get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES) 145 | list(GET LANGUAGES 0 LANG) 146 | 147 | if("${CMAKE_${LANG}_COMPILER_ID}" MATCHES "(Apple)?[Cc]lang") 148 | if("${CMAKE_${LANG}_COMPILER_VERSION}" VERSION_LESS 3) 149 | message(FATAL_ERROR "Clang version must be 3.0.0 or greater! Aborting...") 150 | endif() 151 | elseif(NOT CMAKE_COMPILER_IS_GNUCXX) 152 | if("${CMAKE_Fortran_COMPILER_ID}" MATCHES "[Ff]lang") 153 | # Do nothing; exit conditional without error if true 154 | elseif("${CMAKE_Fortran_COMPILER_ID}" MATCHES "GNU") 155 | # Do nothing; exit conditional without error if true 156 | else() 157 | message(FATAL_ERROR "Compiler is not GNU gcc! Aborting...") 158 | endif() 159 | endif() 160 | 161 | set(COVERAGE_COMPILER_FLAGS "-g -fprofile-arcs -ftest-coverage" 162 | CACHE INTERNAL "") 163 | if(CMAKE_CXX_COMPILER_ID MATCHES "(GNU|Clang)") 164 | include(CheckCXXCompilerFlag) 165 | check_cxx_compiler_flag(-fprofile-abs-path HAVE_fprofile_abs_path) 166 | if(HAVE_fprofile_abs_path) 167 | set(COVERAGE_COMPILER_FLAGS "${COVERAGE_COMPILER_FLAGS} -fprofile-abs-path") 168 | endif() 169 | endif() 170 | 171 | set(CMAKE_Fortran_FLAGS_COVERAGE 172 | ${COVERAGE_COMPILER_FLAGS} 173 | CACHE STRING "Flags used by the Fortran compiler during coverage builds." 174 | FORCE ) 175 | set(CMAKE_CXX_FLAGS_COVERAGE 176 | ${COVERAGE_COMPILER_FLAGS} 177 | CACHE STRING "Flags used by the C++ compiler during coverage builds." 178 | FORCE ) 179 | set(CMAKE_C_FLAGS_COVERAGE 180 | ${COVERAGE_COMPILER_FLAGS} 181 | CACHE STRING "Flags used by the C compiler during coverage builds." 182 | FORCE ) 183 | set(CMAKE_EXE_LINKER_FLAGS_COVERAGE 184 | "" 185 | CACHE STRING "Flags used for linking binaries during coverage builds." 186 | FORCE ) 187 | set(CMAKE_SHARED_LINKER_FLAGS_COVERAGE 188 | "" 189 | CACHE STRING "Flags used by the shared libraries linker during coverage builds." 190 | FORCE ) 191 | mark_as_advanced( 192 | CMAKE_Fortran_FLAGS_COVERAGE 193 | CMAKE_CXX_FLAGS_COVERAGE 194 | CMAKE_C_FLAGS_COVERAGE 195 | CMAKE_EXE_LINKER_FLAGS_COVERAGE 196 | CMAKE_SHARED_LINKER_FLAGS_COVERAGE ) 197 | 198 | get_property(GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG) 199 | if(NOT (CMAKE_BUILD_TYPE STREQUAL "Debug" OR GENERATOR_IS_MULTI_CONFIG)) 200 | message(WARNING "Code coverage results with an optimised (non-Debug) build may be misleading") 201 | endif() # NOT (CMAKE_BUILD_TYPE STREQUAL "Debug" OR GENERATOR_IS_MULTI_CONFIG) 202 | 203 | if(CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") 204 | link_libraries(gcov) 205 | endif() 206 | 207 | # Defines a target for running and collection code coverage information 208 | # Builds dependencies, runs the given executable and outputs reports. 209 | # NOTE! The executable should always have a ZERO as exit code otherwise 210 | # the coverage generation will not complete. 211 | # 212 | # setup_target_for_coverage_lcov( 213 | # NAME testrunner_coverage # New target name 214 | # EXECUTABLE testrunner -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR 215 | # DEPENDENCIES testrunner # Dependencies to build first 216 | # BASE_DIRECTORY "../" # Base directory for report 217 | # # (defaults to PROJECT_SOURCE_DIR) 218 | # EXCLUDE "src/dir1/*" "src/dir2/*" # Patterns to exclude (can be relative 219 | # # to BASE_DIRECTORY, with CMake 3.4+) 220 | # NO_DEMANGLE # Don't demangle C++ symbols 221 | # # even if c++filt is found 222 | # ) 223 | function(setup_target_for_coverage_lcov) 224 | 225 | set(options NO_DEMANGLE) 226 | set(oneValueArgs BASE_DIRECTORY NAME) 227 | set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES LCOV_ARGS GENHTML_ARGS) 228 | cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) 229 | 230 | if(NOT LCOV_PATH) 231 | message(FATAL_ERROR "lcov not found! Aborting...") 232 | endif() # NOT LCOV_PATH 233 | 234 | if(NOT GENHTML_PATH) 235 | message(FATAL_ERROR "genhtml not found! Aborting...") 236 | endif() # NOT GENHTML_PATH 237 | 238 | # Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR 239 | if(DEFINED Coverage_BASE_DIRECTORY) 240 | get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE) 241 | else() 242 | set(BASEDIR ${PROJECT_SOURCE_DIR}) 243 | endif() 244 | 245 | # Collect excludes (CMake 3.4+: Also compute absolute paths) 246 | set(LCOV_EXCLUDES "") 247 | foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_LCOV_EXCLUDES}) 248 | if(CMAKE_VERSION VERSION_GREATER 3.4) 249 | get_filename_component(EXCLUDE ${EXCLUDE} ABSOLUTE BASE_DIR ${BASEDIR}) 250 | endif() 251 | list(APPEND LCOV_EXCLUDES "${EXCLUDE}") 252 | endforeach() 253 | list(REMOVE_DUPLICATES LCOV_EXCLUDES) 254 | 255 | # Conditional arguments 256 | if(CPPFILT_PATH AND NOT ${Coverage_NO_DEMANGLE}) 257 | set(GENHTML_EXTRA_ARGS "--demangle-cpp") 258 | endif() 259 | 260 | # Setting up commands which will be run to generate coverage data. 261 | # Cleanup lcov 262 | set(LCOV_CLEAN_CMD 263 | ${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} -directory . 264 | -b ${BASEDIR} --zerocounters 265 | ) 266 | # Create baseline to make sure untouched files show up in the report 267 | set(LCOV_BASELINE_CMD 268 | ${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} -c -i -d . -b 269 | ${BASEDIR} -o ${Coverage_NAME}.base 270 | ) 271 | # Run tests 272 | set(LCOV_EXEC_TESTS_CMD 273 | ${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS} 274 | ) 275 | # Capturing lcov counters and generating report 276 | set(LCOV_CAPTURE_CMD 277 | ${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} --directory . -b 278 | ${BASEDIR} --capture --output-file ${Coverage_NAME}.capture 279 | ) 280 | # add baseline counters 281 | set(LCOV_BASELINE_COUNT_CMD 282 | ${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} -a ${Coverage_NAME}.base 283 | -a ${Coverage_NAME}.capture --output-file ${Coverage_NAME}.total 284 | ) 285 | # filter collected data to final coverage report 286 | set(LCOV_FILTER_CMD 287 | ${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} --remove 288 | ${Coverage_NAME}.total ${LCOV_EXCLUDES} --output-file ${Coverage_NAME}.info 289 | ) 290 | # Generate HTML output 291 | set(LCOV_GEN_HTML_CMD 292 | ${GENHTML_PATH} ${GENHTML_EXTRA_ARGS} ${Coverage_GENHTML_ARGS} -o 293 | ${Coverage_NAME} ${Coverage_NAME}.info 294 | ) 295 | 296 | 297 | if(CODE_COVERAGE_VERBOSE) 298 | message(STATUS "Executed command report") 299 | message(STATUS "Command to clean up lcov: ") 300 | string(REPLACE ";" " " LCOV_CLEAN_CMD_SPACED "${LCOV_CLEAN_CMD}") 301 | message(STATUS "${LCOV_CLEAN_CMD_SPACED}") 302 | 303 | message(STATUS "Command to create baseline: ") 304 | string(REPLACE ";" " " LCOV_BASELINE_CMD_SPACED "${LCOV_BASELINE_CMD}") 305 | message(STATUS "${LCOV_BASELINE_CMD_SPACED}") 306 | 307 | message(STATUS "Command to run the tests: ") 308 | string(REPLACE ";" " " LCOV_EXEC_TESTS_CMD_SPACED "${LCOV_EXEC_TESTS_CMD}") 309 | message(STATUS "${LCOV_EXEC_TESTS_CMD_SPACED}") 310 | 311 | message(STATUS "Command to capture counters and generate report: ") 312 | string(REPLACE ";" " " LCOV_CAPTURE_CMD_SPACED "${LCOV_CAPTURE_CMD}") 313 | message(STATUS "${LCOV_CAPTURE_CMD_SPACED}") 314 | 315 | message(STATUS "Command to add baseline counters: ") 316 | string(REPLACE ";" " " LCOV_BASELINE_COUNT_CMD_SPACED "${LCOV_BASELINE_COUNT_CMD}") 317 | message(STATUS "${LCOV_BASELINE_COUNT_CMD_SPACED}") 318 | 319 | message(STATUS "Command to filter collected data: ") 320 | string(REPLACE ";" " " LCOV_FILTER_CMD_SPACED "${LCOV_FILTER_CMD}") 321 | message(STATUS "${LCOV_FILTER_CMD_SPACED}") 322 | 323 | message(STATUS "Command to generate lcov HTML output: ") 324 | string(REPLACE ";" " " LCOV_GEN_HTML_CMD_SPACED "${LCOV_GEN_HTML_CMD}") 325 | message(STATUS "${LCOV_GEN_HTML_CMD_SPACED}") 326 | endif() 327 | 328 | # Setup target 329 | add_custom_target(${Coverage_NAME} 330 | COMMAND ${LCOV_CLEAN_CMD} 331 | COMMAND ${LCOV_BASELINE_CMD} 332 | COMMAND ${LCOV_EXEC_TESTS_CMD} 333 | COMMAND ${LCOV_CAPTURE_CMD} 334 | COMMAND ${LCOV_BASELINE_COUNT_CMD} 335 | COMMAND ${LCOV_FILTER_CMD} 336 | COMMAND ${LCOV_GEN_HTML_CMD} 337 | 338 | # Set output files as GENERATED (will be removed on 'make clean') 339 | BYPRODUCTS 340 | ${Coverage_NAME}.base 341 | ${Coverage_NAME}.capture 342 | ${Coverage_NAME}.total 343 | ${Coverage_NAME}.info 344 | ${Coverage_NAME}/index.html 345 | WORKING_DIRECTORY ${PROJECT_BINARY_DIR} 346 | DEPENDS ${Coverage_DEPENDENCIES} 347 | VERBATIM # Protect arguments to commands 348 | COMMENT "Resetting code coverage counters to zero.\nProcessing code coverage counters and generating report." 349 | ) 350 | 351 | # Show where to find the lcov info report 352 | add_custom_command(TARGET ${Coverage_NAME} POST_BUILD 353 | COMMAND ; 354 | COMMENT "Lcov code coverage info report saved in ${Coverage_NAME}.info." 355 | ) 356 | 357 | # Show info where to find the report 358 | add_custom_command(TARGET ${Coverage_NAME} POST_BUILD 359 | COMMAND ; 360 | COMMENT "Open ./${Coverage_NAME}/index.html in your browser to view the coverage report." 361 | ) 362 | 363 | endfunction() # setup_target_for_coverage_lcov 364 | 365 | # Defines a target for running and collection code coverage information 366 | # Builds dependencies, runs the given executable and outputs reports. 367 | # NOTE! The executable should always have a ZERO as exit code otherwise 368 | # the coverage generation will not complete. 369 | # 370 | # setup_target_for_coverage_gcovr_xml( 371 | # NAME ctest_coverage # New target name 372 | # EXECUTABLE ctest -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR 373 | # DEPENDENCIES executable_target # Dependencies to build first 374 | # BASE_DIRECTORY "../" # Base directory for report 375 | # # (defaults to PROJECT_SOURCE_DIR) 376 | # EXCLUDE "src/dir1/*" "src/dir2/*" # Patterns to exclude (can be relative 377 | # # to BASE_DIRECTORY, with CMake 3.4+) 378 | # ) 379 | # The user can set the variable GCOVR_ADDITIONAL_ARGS to supply additional flags to the 380 | # GCVOR command. 381 | function(setup_target_for_coverage_gcovr_xml) 382 | 383 | set(options NONE) 384 | set(oneValueArgs BASE_DIRECTORY NAME) 385 | set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES) 386 | cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) 387 | 388 | if(NOT GCOVR_PATH) 389 | message(FATAL_ERROR "gcovr not found! Aborting...") 390 | endif() # NOT GCOVR_PATH 391 | 392 | # Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR 393 | if(DEFINED Coverage_BASE_DIRECTORY) 394 | get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE) 395 | else() 396 | set(BASEDIR ${PROJECT_SOURCE_DIR}) 397 | endif() 398 | 399 | # Collect excludes (CMake 3.4+: Also compute absolute paths) 400 | set(GCOVR_EXCLUDES "") 401 | foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_GCOVR_EXCLUDES}) 402 | if(CMAKE_VERSION VERSION_GREATER 3.4) 403 | get_filename_component(EXCLUDE ${EXCLUDE} ABSOLUTE BASE_DIR ${BASEDIR}) 404 | endif() 405 | list(APPEND GCOVR_EXCLUDES "${EXCLUDE}") 406 | endforeach() 407 | list(REMOVE_DUPLICATES GCOVR_EXCLUDES) 408 | 409 | # Combine excludes to several -e arguments 410 | set(GCOVR_EXCLUDE_ARGS "") 411 | foreach(EXCLUDE ${GCOVR_EXCLUDES}) 412 | list(APPEND GCOVR_EXCLUDE_ARGS "-e") 413 | list(APPEND GCOVR_EXCLUDE_ARGS "${EXCLUDE}") 414 | endforeach() 415 | 416 | # Set up commands which will be run to generate coverage data 417 | # Run tests 418 | set(GCOVR_XML_EXEC_TESTS_CMD 419 | ${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS} 420 | ) 421 | # Running gcovr 422 | set(GCOVR_XML_CMD 423 | ${GCOVR_PATH} --xml -r ${BASEDIR} ${GCOVR_ADDITIONAL_ARGS} ${GCOVR_EXCLUDE_ARGS} 424 | --object-directory=${PROJECT_BINARY_DIR} -o ${Coverage_NAME}.xml 425 | ) 426 | 427 | if(CODE_COVERAGE_VERBOSE) 428 | message(STATUS "Executed command report") 429 | 430 | message(STATUS "Command to run tests: ") 431 | string(REPLACE ";" " " GCOVR_XML_EXEC_TESTS_CMD_SPACED "${GCOVR_XML_EXEC_TESTS_CMD}") 432 | message(STATUS "${GCOVR_XML_EXEC_TESTS_CMD_SPACED}") 433 | 434 | message(STATUS "Command to generate gcovr XML coverage data: ") 435 | string(REPLACE ";" " " GCOVR_XML_CMD_SPACED "${GCOVR_XML_CMD}") 436 | message(STATUS "${GCOVR_XML_CMD_SPACED}") 437 | endif() 438 | 439 | add_custom_target(${Coverage_NAME} 440 | COMMAND ${GCOVR_XML_EXEC_TESTS_CMD} 441 | COMMAND ${GCOVR_XML_CMD} 442 | 443 | BYPRODUCTS ${Coverage_NAME}.xml 444 | WORKING_DIRECTORY ${PROJECT_BINARY_DIR} 445 | DEPENDS ${Coverage_DEPENDENCIES} 446 | VERBATIM # Protect arguments to commands 447 | COMMENT "Running gcovr to produce Cobertura code coverage report." 448 | ) 449 | 450 | # Show info where to find the report 451 | add_custom_command(TARGET ${Coverage_NAME} POST_BUILD 452 | COMMAND ; 453 | COMMENT "Cobertura code coverage report saved in ${Coverage_NAME}.xml." 454 | ) 455 | endfunction() # setup_target_for_coverage_gcovr_xml 456 | 457 | # Defines a target for running and collection code coverage information 458 | # Builds dependencies, runs the given executable and outputs reports. 459 | # NOTE! The executable should always have a ZERO as exit code otherwise 460 | # the coverage generation will not complete. 461 | # 462 | # setup_target_for_coverage_gcovr_html( 463 | # NAME ctest_coverage # New target name 464 | # EXECUTABLE ctest -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR 465 | # DEPENDENCIES executable_target # Dependencies to build first 466 | # BASE_DIRECTORY "../" # Base directory for report 467 | # # (defaults to PROJECT_SOURCE_DIR) 468 | # EXCLUDE "src/dir1/*" "src/dir2/*" # Patterns to exclude (can be relative 469 | # # to BASE_DIRECTORY, with CMake 3.4+) 470 | # ) 471 | # The user can set the variable GCOVR_ADDITIONAL_ARGS to supply additional flags to the 472 | # GCVOR command. 473 | function(setup_target_for_coverage_gcovr_html) 474 | 475 | set(options NONE) 476 | set(oneValueArgs BASE_DIRECTORY NAME) 477 | set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES) 478 | cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) 479 | 480 | if(NOT GCOVR_PATH) 481 | message(FATAL_ERROR "gcovr not found! Aborting...") 482 | endif() # NOT GCOVR_PATH 483 | 484 | # Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR 485 | if(DEFINED Coverage_BASE_DIRECTORY) 486 | get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE) 487 | else() 488 | set(BASEDIR ${PROJECT_SOURCE_DIR}) 489 | endif() 490 | 491 | # Collect excludes (CMake 3.4+: Also compute absolute paths) 492 | set(GCOVR_EXCLUDES "") 493 | foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_GCOVR_EXCLUDES}) 494 | if(CMAKE_VERSION VERSION_GREATER 3.4) 495 | get_filename_component(EXCLUDE ${EXCLUDE} ABSOLUTE BASE_DIR ${BASEDIR}) 496 | endif() 497 | list(APPEND GCOVR_EXCLUDES "${EXCLUDE}") 498 | endforeach() 499 | list(REMOVE_DUPLICATES GCOVR_EXCLUDES) 500 | 501 | # Combine excludes to several -e arguments 502 | set(GCOVR_EXCLUDE_ARGS "") 503 | foreach(EXCLUDE ${GCOVR_EXCLUDES}) 504 | list(APPEND GCOVR_EXCLUDE_ARGS "-e") 505 | list(APPEND GCOVR_EXCLUDE_ARGS "${EXCLUDE}") 506 | endforeach() 507 | 508 | # Set up commands which will be run to generate coverage data 509 | # Run tests 510 | set(GCOVR_HTML_EXEC_TESTS_CMD 511 | ${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS} 512 | ) 513 | # Create folder 514 | set(GCOVR_HTML_FOLDER_CMD 515 | ${CMAKE_COMMAND} -E make_directory ${PROJECT_BINARY_DIR}/${Coverage_NAME} 516 | ) 517 | # Running gcovr 518 | set(GCOVR_HTML_CMD 519 | ${GCOVR_PATH} --html --html-details -r ${BASEDIR} ${GCOVR_ADDITIONAL_ARGS} 520 | ${GCOVR_EXCLUDE_ARGS} --object-directory=${PROJECT_BINARY_DIR} 521 | -o ${Coverage_NAME}/index.html 522 | ) 523 | 524 | if(CODE_COVERAGE_VERBOSE) 525 | message(STATUS "Executed command report") 526 | 527 | message(STATUS "Command to run tests: ") 528 | string(REPLACE ";" " " GCOVR_HTML_EXEC_TESTS_CMD_SPACED "${GCOVR_HTML_EXEC_TESTS_CMD}") 529 | message(STATUS "${GCOVR_HTML_EXEC_TESTS_CMD_SPACED}") 530 | 531 | message(STATUS "Command to create a folder: ") 532 | string(REPLACE ";" " " GCOVR_HTML_FOLDER_CMD_SPACED "${GCOVR_HTML_FOLDER_CMD}") 533 | message(STATUS "${GCOVR_HTML_FOLDER_CMD_SPACED}") 534 | 535 | message(STATUS "Command to generate gcovr HTML coverage data: ") 536 | string(REPLACE ";" " " GCOVR_HTML_CMD_SPACED "${GCOVR_HTML_CMD}") 537 | message(STATUS "${GCOVR_HTML_CMD_SPACED}") 538 | endif() 539 | 540 | add_custom_target(${Coverage_NAME} 541 | COMMAND ${GCOVR_HTML_EXEC_TESTS_CMD} 542 | COMMAND ${GCOVR_HTML_FOLDER_CMD} 543 | COMMAND ${GCOVR_HTML_CMD} 544 | 545 | BYPRODUCTS ${PROJECT_BINARY_DIR}/${Coverage_NAME}/index.html # report directory 546 | WORKING_DIRECTORY ${PROJECT_BINARY_DIR} 547 | DEPENDS ${Coverage_DEPENDENCIES} 548 | VERBATIM # Protect arguments to commands 549 | COMMENT "Running gcovr to produce HTML code coverage report." 550 | ) 551 | 552 | # Show info where to find the report 553 | add_custom_command(TARGET ${Coverage_NAME} POST_BUILD 554 | COMMAND ; 555 | COMMENT "Open ./${Coverage_NAME}/index.html in your browser to view the coverage report." 556 | ) 557 | 558 | endfunction() # setup_target_for_coverage_gcovr_html 559 | 560 | # Defines a target for running and collection code coverage information 561 | # Builds dependencies, runs the given executable and outputs reports. 562 | # NOTE! The executable should always have a ZERO as exit code otherwise 563 | # the coverage generation will not complete. 564 | # 565 | # setup_target_for_coverage_fastcov( 566 | # NAME testrunner_coverage # New target name 567 | # EXECUTABLE testrunner -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR 568 | # DEPENDENCIES testrunner # Dependencies to build first 569 | # BASE_DIRECTORY "../" # Base directory for report 570 | # # (defaults to PROJECT_SOURCE_DIR) 571 | # EXCLUDE "src/dir1/" "src/dir2/" # Patterns to exclude. 572 | # NO_DEMANGLE # Don't demangle C++ symbols 573 | # # even if c++filt is found 574 | # SKIP_HTML # Don't create html report 575 | # ) 576 | function(setup_target_for_coverage_fastcov) 577 | 578 | set(options NO_DEMANGLE SKIP_HTML) 579 | set(oneValueArgs BASE_DIRECTORY NAME) 580 | set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES FASTCOV_ARGS GENHTML_ARGS) 581 | cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) 582 | 583 | if(NOT FASTCOV_PATH) 584 | message(FATAL_ERROR "fastcov not found! Aborting...") 585 | endif() 586 | 587 | if(NOT GENHTML_PATH) 588 | message(FATAL_ERROR "genhtml not found! Aborting...") 589 | endif() 590 | 591 | # Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR 592 | if(Coverage_BASE_DIRECTORY) 593 | get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE) 594 | else() 595 | set(BASEDIR ${PROJECT_SOURCE_DIR}) 596 | endif() 597 | 598 | # Collect excludes (Patterns, not paths, for fastcov) 599 | set(FASTCOV_EXCLUDES "") 600 | foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_FASTCOV_EXCLUDES}) 601 | list(APPEND FASTCOV_EXCLUDES "${EXCLUDE}") 602 | endforeach() 603 | list(REMOVE_DUPLICATES FASTCOV_EXCLUDES) 604 | 605 | # Conditional arguments 606 | if(CPPFILT_PATH AND NOT ${Coverage_NO_DEMANGLE}) 607 | set(GENHTML_EXTRA_ARGS "--demangle-cpp") 608 | endif() 609 | 610 | # Set up commands which will be run to generate coverage data 611 | set(FASTCOV_EXEC_TESTS_CMD ${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS}) 612 | 613 | set(FASTCOV_CAPTURE_CMD ${FASTCOV_PATH} ${Coverage_FASTCOV_ARGS} --gcov ${GCOV_PATH} 614 | --search-directory ${BASEDIR} 615 | --process-gcno 616 | --lcov 617 | --output ${Coverage_NAME}.info 618 | --exclude ${FASTCOV_EXCLUDES} 619 | --exclude ${FASTCOV_EXCLUDES} 620 | ) 621 | 622 | if(Coverage_SKIP_HTML) 623 | set(FASTCOV_HTML_CMD ";") 624 | else() 625 | set(FASTCOV_HTML_CMD ${GENHTML_PATH} ${GENHTML_EXTRA_ARGS} ${Coverage_GENHTML_ARGS} 626 | -o ${Coverage_NAME} ${Coverage_NAME}.info 627 | ) 628 | endif() 629 | 630 | if(CODE_COVERAGE_VERBOSE) 631 | message(STATUS "Code coverage commands for target ${Coverage_NAME} (fastcov):") 632 | 633 | message(" Running tests:") 634 | string(REPLACE ";" " " FASTCOV_EXEC_TESTS_CMD_SPACED "${FASTCOV_EXEC_TESTS_CMD}") 635 | message(" ${FASTCOV_EXEC_TESTS_CMD_SPACED}") 636 | 637 | message(" Capturing fastcov counters and generating report:") 638 | string(REPLACE ";" " " FASTCOV_CAPTURE_CMD_SPACED "${FASTCOV_CAPTURE_CMD}") 639 | message(" ${FASTCOV_CAPTURE_CMD_SPACED}") 640 | 641 | if(NOT Coverage_SKIP_HTML) 642 | message(" Generating HTML report: ") 643 | string(REPLACE ";" " " FASTCOV_HTML_CMD_SPACED "${FASTCOV_HTML_CMD}") 644 | message(" ${FASTCOV_HTML_CMD_SPACED}") 645 | endif() 646 | endif() 647 | 648 | # Setup target 649 | add_custom_target(${Coverage_NAME} 650 | 651 | # Cleanup fastcov 652 | COMMAND ${FASTCOV_PATH} ${Coverage_FASTCOV_ARGS} --gcov ${GCOV_PATH} 653 | --search-directory ${BASEDIR} 654 | --zerocounters 655 | 656 | COMMAND ${FASTCOV_EXEC_TESTS_CMD} 657 | COMMAND ${FASTCOV_CAPTURE_CMD} 658 | COMMAND ${FASTCOV_HTML_CMD} 659 | 660 | # Set output files as GENERATED (will be removed on 'make clean') 661 | BYPRODUCTS 662 | ${Coverage_NAME}.info 663 | ${Coverage_NAME}/index.html # report directory 664 | 665 | WORKING_DIRECTORY ${PROJECT_BINARY_DIR} 666 | DEPENDS ${Coverage_DEPENDENCIES} 667 | VERBATIM # Protect arguments to commands 668 | COMMENT "Resetting code coverage counters to zero. Processing code coverage counters and generating report." 669 | ) 670 | 671 | set(INFO_MSG "fastcov code coverage info report saved in ${Coverage_NAME}.info.") 672 | if(NOT Coverage_SKIP_HTML) 673 | string(APPEND INFO_MSG " Open ${PROJECT_BINARY_DIR}/${Coverage_NAME}/index.html in your browser to view the coverage report.") 674 | endif() 675 | # Show where to find the fastcov info report 676 | add_custom_command(TARGET ${Coverage_NAME} POST_BUILD 677 | COMMAND ${CMAKE_COMMAND} -E echo ${INFO_MSG} 678 | ) 679 | 680 | endfunction() # setup_target_for_coverage_fastcov 681 | 682 | function(append_coverage_compiler_flags) 683 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COVERAGE_COMPILER_FLAGS}" PARENT_SCOPE) 684 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COVERAGE_COMPILER_FLAGS}" PARENT_SCOPE) 685 | set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${COVERAGE_COMPILER_FLAGS}" PARENT_SCOPE) 686 | message(STATUS "Appending code coverage compiler flags: ${COVERAGE_COMPILER_FLAGS}") 687 | endfunction() # append_coverage_compiler_flags 688 | -------------------------------------------------------------------------------- /debian/postinst.in: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # postinst script for joinx 3 | # 4 | # see: dh_installdeb(1) 5 | 6 | # summary of how this script can be called: 7 | # * `configure' 8 | # * `abort-upgrade' 9 | # * `abort-remove' `in-favour' 10 | # 11 | # * `abort-remove' 12 | # * `abort-deconfigure' `in-favour' 13 | # `removing' 14 | # 15 | # for details, see http://www.debian.org/doc/debian-policy/ or 16 | # the debian-policy package 17 | 18 | 19 | EXECUTABLES="bam-readcount" 20 | PRIORITY=@DEFAULT_ETC_ALTERNATIVES_PRIORITY@ 21 | 22 | set -e 23 | 24 | case "$1" in 25 | configure) 26 | for e in $EXECUTABLES 27 | do 28 | BARE_EXE=/usr/bin/$e 29 | VERSIONED_EXE="/usr/bin/${e}@EXE_VERSION_SUFFIX@" 30 | update-alternatives --install $BARE_EXE $e $VERSIONED_EXE $PRIORITY 31 | done 32 | ;; 33 | 34 | abort-upgrade|abort-remove|abort-deconfigure) 35 | ;; 36 | 37 | *) 38 | echo "postinst called with unknown argument \`$1'" >&2 39 | exit 1 40 | ;; 41 | esac 42 | 43 | # dh_installdeb will replace this with shell code automatically 44 | # generated by other debhelper scripts. 45 | 46 | #DEBHELPER# 47 | 48 | exit 0 49 | -------------------------------------------------------------------------------- /debian/prerm.in: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # prerm script for joinx 3 | # 4 | # see: dh_installdeb(1) 5 | 6 | # summary of how this script can be called: 7 | # * `remove' 8 | # * `upgrade' 9 | # * `failed-upgrade' 10 | # * `remove' `in-favour' 11 | # * `deconfigure' `in-favour' 12 | # `removing' 13 | # 14 | # for details, see http://www.debian.org/doc/debian-policy/ or 15 | # the debian-policy package 16 | 17 | 18 | EXECUTABLES="bam-readcount" 19 | 20 | set -e 21 | 22 | case "$1" in 23 | remove|upgrade|deconfigure) 24 | for e in $EXECUTABLES 25 | do 26 | VERSIONED_EXE="/usr/bin/${e}@EXE_VERSION_SUFFIX@" 27 | update-alternatives --remove $e $VERSIONED_EXE 28 | done 29 | ;; 30 | 31 | failed-upgrade) 32 | ;; 33 | 34 | *) 35 | echo "prerm called with unknown argument \`$1'" >&2 36 | exit 1 37 | ;; 38 | esac 39 | 40 | # dh_installdeb will replace this with shell code automatically 41 | # generated by other debhelper scripts. 42 | 43 | #DEBHELPER# 44 | 45 | exit 0 46 | -------------------------------------------------------------------------------- /integration-test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | configure_file(testdata.py.in testdata.py @ONLY) 2 | set(BC_PYTHONPATH_EXTRA ${CMAKE_CURRENT_BUILD_DIR}) 3 | def_integration_test(bam-readcount RunBamReadcount bam-readcount_test.py) 4 | -------------------------------------------------------------------------------- /integration-test/bam-readcount_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """This is a simple python script to perform an integration 3 | test on bam-readcount. It uses a small BAM generated by merging 4 | two 1000 Genomes sequencing for NA12878 and NA12892. 5 | See https://github.com/genome/somatic-snv-test-data for info 6 | on the BAMs and reference. These are subsets of the ones from that repo.""" 7 | 8 | import os 9 | print "I AM IN", os.getcwd() 10 | from integrationtest import IntegrationTest, main 11 | from testdata import TEST_DATA_DIRECTORY 12 | import unittest 13 | import subprocess 14 | import os 15 | 16 | class TestBamReadcount(IntegrationTest, unittest.TestCase): 17 | 18 | def setUp(self): 19 | IntegrationTest.setUp(self) 20 | self.data_dir = TEST_DATA_DIRECTORY 21 | self.orig_path = os.path.realpath(os.getcwd()) 22 | self.exe_path = os.path.realpath(self.exe_path) 23 | os.chdir(self.data_dir) 24 | 25 | def tearDown(self): 26 | IntegrationTest.tearDown(self) 27 | os.chdir(self.orig_path) 28 | 29 | def test_bamreadcount_normal(self): 30 | """test default output is as expected""" 31 | expected_file = "expected_all_lib" 32 | bam_file = "test.bam" 33 | ref_fasta = "ref.fa" 34 | site_list = "site_list" 35 | output_file = self.tempFile("output") 36 | cmdline = " ".join([self.exe_path, '-w', '1', '-f', ref_fasta, '-l', site_list, bam_file, '>', output_file]) 37 | print "Executing", cmdline 38 | print "CWD", os.getcwd() 39 | rv = subprocess.call(cmdline, shell=True) 40 | print "Return value:", rv 41 | self.assertEqual(0, rv) 42 | self.assertFilesEqual(expected_file, output_file) 43 | def test_bamreadcount_perlib(self): 44 | """test per lib output is as expected""" 45 | expected_file = "expected_per_lib" 46 | bam_file = "test.bam" 47 | ref_fasta = "ref.fa" 48 | site_list = "site_list" 49 | output_file = self.tempFile("output") 50 | cmdline = " ".join([self.exe_path, '-w', '1', '-p', '-f', ref_fasta, '-l', site_list, bam_file, '>', output_file]) 51 | print "Executing", cmdline 52 | print "CWD", os.getcwd() 53 | rv = subprocess.call(cmdline, shell=True) 54 | print "Return value:", rv 55 | self.assertEqual(0, rv) 56 | self.assertFilesEqual(expected_file, output_file) 57 | 58 | def test_bamreadcount_normal_as_list(self): 59 | """test default output is as expected when list of regions passed at command-line""" 60 | expected_file = "expected_all_lib" 61 | bam_file = "test.bam" 62 | ref_fasta = "ref.fa" 63 | regions = "21:10402985-10402985 21:10405200-10405200" 64 | output_file = self.tempFile("output") 65 | cmdline = " ".join([self.exe_path, '-w', '1', '-f', ref_fasta, bam_file, regions, '>', output_file]) 66 | print "Executing", cmdline 67 | print "CWD", os.getcwd() 68 | rv = subprocess.call(cmdline, shell=True) 69 | print "Return value:", rv 70 | self.assertEqual(0, rv) 71 | self.assertFilesEqual(expected_file, output_file) 72 | 73 | def test_bamreadcount_when_lib_absent(self): 74 | """test that we can run if there was no LB in the header""" 75 | expected_file = "expected_all_lib" 76 | bam_file = "test_bad_rg.bam" 77 | ref_fasta = "ref.fa" 78 | regions = "21:10402985-10402985 21:10405200-10405200" 79 | output_file = self.tempFile("output") 80 | cmdline = " ".join([self.exe_path, '-w', '1', '-f', ref_fasta, bam_file, regions, '>', output_file]) 81 | print "Executing", cmdline 82 | print "CWD", os.getcwd() 83 | rv = subprocess.call(cmdline, shell=True) 84 | print "Return value:", rv 85 | self.assertEqual(0, rv) 86 | self.assertFilesEqual(expected_file, output_file) 87 | 88 | def test_bamreadcount_indel_centric_normal(self): 89 | """test all lib output, but with insertion centric counting""" 90 | expected_file = "expected_insertion_centric_all_lib" 91 | bam_file = "test.bam" 92 | ref_fasta = "ref.fa" 93 | site_list = "site_list" 94 | output_file = self.tempFile("output") 95 | cmdline = " ".join([self.exe_path, '-w', '1', '-i', '-f', ref_fasta, '-l', site_list, bam_file, '>', output_file]) 96 | print "Executing", cmdline 97 | print "CWD", os.getcwd() 98 | rv = subprocess.call(cmdline, shell=True) 99 | print "Return value:", rv 100 | self.assertEqual(0, rv) 101 | self.assertFilesEqual(expected_file, output_file) 102 | 103 | def test_bamreadcount_indel_centric_per_lib(self): 104 | """test all lib output, but with insertion centric counting""" 105 | expected_file = "expected_insertion_centric_per_lib" 106 | bam_file = "test.bam" 107 | ref_fasta = "ref.fa" 108 | site_list = "site_list" 109 | output_file = self.tempFile("output") 110 | cmdline = " ".join([self.exe_path, '-w', '1', '-i', '-p', '-f', ref_fasta, '-l', site_list, bam_file, '>', output_file]) 111 | print "Executing", cmdline 112 | print "CWD", os.getcwd() 113 | rv = subprocess.call(cmdline, shell=True) 114 | print "Return value:", rv 115 | self.assertEqual(0, rv) 116 | self.assertFilesEqual(expected_file, output_file) 117 | 118 | if __name__ == "__main__": 119 | main() 120 | -------------------------------------------------------------------------------- /integration-test/testdata.py.in: -------------------------------------------------------------------------------- 1 | TEST_DATA_DIRECTORY = "@TEST_DATA_DIRECTORY@" 2 | -------------------------------------------------------------------------------- /src/exe/bam-readcount/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | 3 | project(bam-readcount) 4 | 5 | set(SOURCES bamreadcount.cpp) 6 | 7 | set(EXECUTABLE_NAME bam-readcount) 8 | xadd_executable(${EXECUTABLE_NAME} ${SOURCES}) 9 | target_link_libraries(${EXECUTABLE_NAME} bamrc ${Samtools_LIBRARIES} ${Htslib_LIBRARIES} ${Support_LIBRARIES} ${Boost_LIBRARIES}) 10 | set_target_properties(${EXECUTABLE_NAME} PROPERTIES PACKAGE_OUTPUT_NAME ${EXECUTABLE_NAME}${EXE_VERSION_SUFFIX}) 11 | install(TARGETS ${EXECUTABLE_NAME} DESTINATION bin/) 12 | -------------------------------------------------------------------------------- /src/exe/bam-readcount/bamreadcount.cpp: -------------------------------------------------------------------------------- 1 | #ifndef _GNU_SOURCE 2 | # define _GNU_SOURCE 3 | #endif 4 | 5 | #include "bamrc/auxfields.hpp" 6 | #include "bamrc/ReadWarnings.hpp" 7 | #include "version.h" 8 | #include 9 | #include "bamrc/BasicStat.hpp" 10 | #include "bamrc/IndelQueueEntry.hpp" 11 | #include "bamrc/IndelQueue.hpp" 12 | 13 | #include 14 | #include 15 | #include 16 | #include "sam.h" 17 | #include "header.h" 18 | #include "htslib/faidx.h" 19 | #include "htslib/khash.h" 20 | //#include "sam_header.h" 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | using namespace std; 31 | namespace po = boost::program_options; 32 | 33 | /* This will convert all iub codes in the reads to N */ 34 | char const* bam_canonical_nt_table = "=ACGTN"; 35 | unsigned char possible_calls = (unsigned char) strlen(bam_canonical_nt_table); 36 | unsigned char bam_nt16_canonical_table[16] = { 0,1,2,5, 37 | 3,5,5,5, 38 | 4,5,5,5, 39 | 5,5,5,5}; 40 | 41 | //below is for sam header 42 | KHASH_MAP_INIT_STR(s, int) 43 | 44 | KHASH_MAP_INIT_STR(str, const char *) 45 | 46 | struct LibraryCounts { 47 | std::map indel_stats; 48 | std::vector base_stats; 49 | LibraryCounts() : indel_stats(), base_stats(possible_calls) {} 50 | }; 51 | 52 | typedef std::map indel_queue_map_t; 53 | 54 | //Struct to store info to be passed around 55 | typedef struct { 56 | faidx_t *fai; //index into fasta file 57 | int tid; //reference id 58 | char *ref; //reference sequence 59 | int min_mapq; //minimum mapping qualitiy to use 60 | int min_bq; //minimum mapping qualitiy to use 61 | int beg,end; //start and stop of region 62 | int len; //length of currently loaded reference sequence 63 | int max_cnt; //maximum depth to set on the pileup buffer 64 | samfile_t *in; //bam file 65 | int distribution; //whether or not to display all mapping qualities 66 | bool per_lib; 67 | bool insertion_centric; 68 | std::set lib_names; 69 | indel_queue_map_t indel_queue_map; 70 | void * hash; 71 | } pileup_data_t; 72 | 73 | //struct to store reference for passing to fetch func 74 | typedef struct { 75 | const char* seq_name; 76 | int ref_len; 77 | char **ref_pointer; 78 | bam_plbuf_t* pileup_buffer; 79 | } fetch_data_t; 80 | 81 | std::auto_ptr WARN; 82 | 83 | static inline void load_reference(pileup_data_t* data, int ref) { 84 | if (data->fai && ref != data->tid) { 85 | free(data->ref); 86 | //would this be faster to just grab small chunks? Probably at some level, but not at others. How would chunking affect the indel allele calculations? Those assume that the indel allele is present in the ref and potentially occupy more than just the region of interest 87 | data->ref = fai_fetch(data->fai, data->in->header->target_name[ref], &data->len); 88 | data->tid = ref; 89 | } 90 | } 91 | 92 | std::set find_library_names(bam_header_t const* header) { 93 | std::set lib_names; 94 | sam_hdr_t * sam_hdr = sam_hdr_parse(header->l_text, header->text); 95 | sam_hrecs_t * sam_hrecs = sam_hdr->hrecs; 96 | sam_hrec_rg_t * rg = sam_hrecs->rg; 97 | int nrg = sam_hrecs->nrg; 98 | 99 | for (int i=0; itag; 101 | while (tag->next) { 102 | tag = tag->next; 103 | std::string tag_str = tag->str; 104 | if (tag_str.substr(0,2) == "LB") { 105 | lib_names.insert(tag_str.substr(3)); 106 | } 107 | } 108 | } 109 | 110 | return lib_names; 111 | } 112 | 113 | // callback for samfetch() 114 | static int fetch_func(const bam1_t *b, void *data) { 115 | //retrieve reference 116 | fetch_data_t* fetch_data = (fetch_data_t*) data; 117 | char *ref = *(fetch_data->ref_pointer); 118 | //FIXME Won't want to do this if refseq is not included 119 | 120 | //calculate single nucleotide mismatches and sum their qualities 121 | uint8_t *seq = bam1_seq(b); 122 | uint32_t *cigar = bam1_cigar(b); 123 | const bam1_core_t *core = &(b->core); 124 | int i, reference_position, read_position; 125 | uint32_t sum_of_mismatch_qualities=0; 126 | int left_clip = 0; 127 | int clipped_length = core->l_qseq; 128 | int right_clip = core->l_qseq; 129 | 130 | int last_mismatch_position = -1; 131 | int last_mismatch_qual = 0; 132 | 133 | for(i = read_position = 0, reference_position = core->pos; i < core->n_cigar; ++i) { 134 | int j; 135 | int op_length = cigar[i]>>4; 136 | int op = cigar[i]&0xf; 137 | 138 | if(op == BAM_CMATCH) { 139 | for(j = 0; j < op_length; j++) { 140 | int current_base_position = read_position + j; 141 | int read_base = bam1_seqi(seq, current_base_position); 142 | int refpos = reference_position + j; 143 | int ref_base; 144 | if(fetch_data->ref_len && refpos > fetch_data->ref_len) { 145 | fprintf(stderr, "WARNING: Request for position %d in sequence %s is > length of %d!\n", 146 | refpos, fetch_data->seq_name, fetch_data->ref_len); 147 | continue; 148 | } 149 | ref_base = bam_nt16_table[(int)ref[refpos]]; 150 | 151 | if(ref[refpos] == 0) break; //out of bounds on reference 152 | if(read_base != ref_base && ref_base != 15 && read_base != 0) { 153 | //mismatch, so store the qualities 154 | int qual = bam1_qual(b)[current_base_position]; 155 | if(last_mismatch_position != -1) { 156 | if(last_mismatch_position + 1 != current_base_position) { 157 | //not an adjacent mismatch 158 | sum_of_mismatch_qualities += last_mismatch_qual; 159 | last_mismatch_qual = qual; 160 | last_mismatch_position = current_base_position; 161 | } 162 | else { 163 | if(last_mismatch_qual < qual) { 164 | last_mismatch_qual = qual; 165 | } 166 | last_mismatch_position = current_base_position; 167 | } 168 | } 169 | else { 170 | last_mismatch_position = current_base_position; 171 | last_mismatch_qual = qual; 172 | } 173 | } 174 | } 175 | if(j < op_length) break; 176 | reference_position += op_length; 177 | read_position += op_length; 178 | } else if(op == BAM_CDEL || op == BAM_CREF_SKIP) { //ignoring indels 179 | reference_position += op_length; 180 | } else if(op ==BAM_CINS) { //ignoring indels 181 | read_position += op_length; 182 | } 183 | else if(op == BAM_CSOFT_CLIP) { 184 | read_position += op_length; 185 | 186 | clipped_length -= op_length; 187 | if(i == 0) { 188 | left_clip += op_length; 189 | } 190 | else { 191 | right_clip -= op_length; 192 | } 193 | if(clipped_length < 0) { 194 | fprintf(stderr, "After removing the clipping the length is less than 0 for read %s\n",bam1_qname(b)); 195 | } 196 | } 197 | } 198 | //add in any remaining mismatch sums; should be 0 if no mismatch 199 | sum_of_mismatch_qualities += last_mismatch_qual; 200 | 201 | //inefficiently scan again to determine the distance in leftmost read coordinates to the first Q2 base 202 | int three_prime_index = -1; 203 | int q2_pos = -1; 204 | int k; 205 | int increment; 206 | uint8_t *qual = bam1_qual(b); 207 | if(core->flag & BAM_FREVERSE) { 208 | k = three_prime_index = 0; 209 | increment = 1; 210 | if(three_prime_index < left_clip) { 211 | three_prime_index = left_clip; 212 | } 213 | } 214 | else { 215 | k = three_prime_index = core->l_qseq - 1; 216 | increment = -1; 217 | if(three_prime_index > right_clip) { 218 | three_prime_index = right_clip; 219 | } 220 | 221 | } 222 | while(q2_pos < 0 && k >= 0 && k < core->l_qseq) { 223 | if(qual[k] != 2) { 224 | q2_pos = k-1; 225 | break; 226 | } 227 | k += increment; 228 | } 229 | if(core->flag & BAM_FREVERSE) { 230 | if(three_prime_index < q2_pos) { 231 | three_prime_index = q2_pos; 232 | } 233 | } 234 | else { 235 | if(three_prime_index > q2_pos && q2_pos != -1) { 236 | three_prime_index = q2_pos; 237 | } 238 | } 239 | uint8_t temp[5*4+1]; 240 | temp[5*4]=0; 241 | memcpy(temp, &sum_of_mismatch_qualities,4); 242 | memcpy(temp+4, &clipped_length,4); 243 | memcpy(temp+8, &left_clip,4); 244 | memcpy(temp+12, &three_prime_index,4); 245 | memcpy(temp+16, &q2_pos,4); 246 | 247 | //store the value on the read, we're assuming it is always absent. This assumption may fail. Future proof if this idea has value 248 | aux_zm_t zm; 249 | zm.sum_of_mismatch_qualities = sum_of_mismatch_qualities; 250 | zm.clipped_length = clipped_length; 251 | zm.left_clip = left_clip; 252 | zm.three_prime_index = three_prime_index; 253 | zm.q2_pos = q2_pos; 254 | std::string zm_str = zm.to_string(); 255 | bam_aux_append((bam1_t *)b, "Zm", 'Z', zm_str.size() + 1, (uint8_t*)&zm_str[0]); 256 | 257 | //This just pushes all reads into the pileup buffer 258 | bam_plbuf_t *buf = fetch_data->pileup_buffer; 259 | bam_plbuf_push(b, buf); 260 | return 0; 261 | } 262 | 263 | // callback for bam_plbuf_init() 264 | // TODO allow for a simplified version that calculates less 265 | static int pileup_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data) { 266 | pileup_data_t *tmp = (pileup_data_t*)data; 267 | load_reference(tmp, tid); 268 | 269 | if ((int)pos >= tmp->beg - 1 && (int)pos < tmp->end) { 270 | 271 | int mapq_n = 0; //this tracks the number of reads that passed the mapping quality threshold and the flag filters 272 | 273 | std::map lib_counts; 274 | 275 | //loop over the bases, recycling i here. 276 | for(int i = 0; i < n; ++i) { 277 | const bam_pileup1_t *base = pl + i; //get base index 278 | const char* library_name = "all"; 279 | if(tmp->per_lib) { 280 | library_name = bam_get_library(tmp->in->header, base->b); 281 | if(library_name == 0) { 282 | WARN->warn(ReadWarnings::LIBRARY_UNAVAILABLE, bam1_qname(base->b)); 283 | return 0; 284 | } 285 | } 286 | LibraryCounts ¤t_lib = lib_counts[library_name]; 287 | 288 | if(!base->is_del && base->b->core.qual >= tmp->min_mapq && bam1_qual(base->b)[base->qpos] >= tmp->min_bq) { 289 | 290 | // Flag filters 291 | // This should not happen; unmapped reads should not be 292 | // in the pileup 293 | // These should be combined for efficiency into a single 294 | // flag test 295 | if (base->b->core.flag & BAM_FUNMAP) { 296 | //fprintf(stderr, "BAM_FUNMAP\n"); 297 | continue; 298 | } 299 | if (base->b->core.flag & BAM_FSECONDARY) { 300 | //fprintf(stderr, "BAM_FSECONDARY\n"); 301 | continue; 302 | } 303 | if (base->b->core.flag & BAM_FQCFAIL) { 304 | //fprintf(stderr, "BAM_FQCFAIL\n"); 305 | continue; 306 | } 307 | if (base->b->core.flag & BAM_FDUP) { 308 | //fprintf(stderr, "BAM_FDUP\n"); 309 | continue; 310 | } 311 | 312 | mapq_n++; 313 | 314 | 315 | if(base->indel != 0 && tmp->ref) { 316 | //indel containing read exists here 317 | //need to: 1) add an indel counting mode so insertions aren't double counted or add separate "non-indel" tracking. 318 | //2) create a queue of indel counts and the positions where they /should/ be reported. These positions need to determine reporting as well. ie. if reporting on a deletion and the roi doesn't include the deletion, but we find it. then we still need to do the pileup for that position BUT, there are no guarantees that we know that until we see the data. 319 | //3) So deletions should get put into a queue and their emission position stored. 320 | //4) At each new position, check if we need to emit the indel. 321 | //5) if that position is a) in our target roi and already passed or b) the current position then we need to emit and this needs to be done in a loop until no more indels are candidates. Maybe two loops. 322 | // 323 | std::string allele; 324 | if(base->indel > 0) { 325 | allele += "+"; 326 | for(int indel_base = 0; indel_base < base->indel; indel_base++) { 327 | //scan indel allele off the read 328 | allele += bam_canonical_nt_table[bam_nt16_canonical_table[bam1_seqi(bam1_seq(base->b), base->qpos + 1 + indel_base)]]; 329 | } 330 | } 331 | else { 332 | //deletion 333 | allele += "-"; 334 | for(int indel_base = 0; indel_base < abs(base->indel); indel_base++) { 335 | //scan indel allele off the reference 336 | //FIXME this will break with no reference 337 | allele += tmp->ref[pos + indel_base + 1]; 338 | } 339 | } 340 | current_lib.indel_stats[allele].is_indel=true; 341 | current_lib.indel_stats[allele].process_read(base); 342 | } 343 | if(base->indel < 1 || !tmp->insertion_centric) { 344 | unsigned char c = bam_nt16_canonical_table[bam1_seqi(bam1_seq(base->b), base->qpos)]; //convert to index 345 | (current_lib.base_stats)[c].process_read(base); 346 | } 347 | } 348 | } 349 | 350 | //print out information on position and reference base and depth 351 | std::string ref_name(tmp->in->header->target_name[tid]); 352 | std::string ref_base; 353 | ref_base += (tmp->ref && (int)pos < tmp->len) ? tmp->ref[pos] : 'N'; 354 | stringstream record; 355 | int extra_depth = 0; 356 | //print out the base information 357 | //Note that if there is 0 depth then that averages are reported as 0 358 | 359 | std::map::iterator lib_iter; 360 | for(lib_iter = lib_counts.begin(); lib_iter != lib_counts.end(); lib_iter++) { 361 | //print it 362 | if(tmp->per_lib) { 363 | record << "\t" << lib_iter->first << "\t{"; 364 | } 365 | for(unsigned char j = 0; j < possible_calls; ++j) { 366 | if(tmp->distribution) { 367 | throw "Not currently supporting distributions\n"; 368 | /* 369 | printf("\t%c:%d:", bam_canonical_nt_table[j], base_stat->read_counts[j]); 370 | for(iter = 0; iter < base_stat->num_mapping_qualities[j]; iter++) { 371 | if(iter != 0) { 372 | printf(","); 373 | } 374 | printf("%d",base_stat->mapping_qualities[j][iter]); 375 | } 376 | printf(":"); 377 | for(iter = 0; iter < base_stat->num_distances_to_3p[j]; iter++) { 378 | if(iter != 0) { 379 | printf(","); 380 | } 381 | printf("%0.02f",base_stat->distances_to_3p[j][iter]); 382 | } 383 | */ 384 | } 385 | else { 386 | record << "\t" << bam_canonical_nt_table[j] << ":" << lib_iter->second.base_stats[j]; 387 | } 388 | } 389 | std::map::iterator it; 390 | for(it = lib_iter->second.indel_stats.begin(); it != lib_iter->second.indel_stats.end(); ++it) { 391 | if(it->first[0] == '-') { 392 | //it's a deletion 393 | IndelQueueEntry new_entry(tid, pos + 1, it->second, it->first); 394 | IndelQueue &test = tmp->indel_queue_map[lib_iter->first]; 395 | test.push(new_entry); 396 | } 397 | else { 398 | //it's an insertion and it should be output at this position 399 | record << "\t" << it->first << ":" << it->second; 400 | } 401 | } 402 | 403 | std::map::iterator queued_it; 404 | queued_it = tmp->indel_queue_map.find(lib_iter->first); 405 | if(queued_it != tmp->indel_queue_map.end()) { 406 | //we have an indel queue for this library 407 | IndelQueue ¤t_lib_queue = queued_it->second; 408 | extra_depth += current_lib_queue.process(tid, pos, record); 409 | } 410 | if(tmp->per_lib) { 411 | record << "\t}"; 412 | } 413 | } 414 | if ((int)pos >= tmp->beg && (int)pos < tmp->end) { 415 | cout << ref_name << "\t" << pos + 1 << "\t" << ref_base << "\t" << mapq_n + extra_depth << record.str() << endl; 416 | } 417 | } 418 | return 0; 419 | } 420 | 421 | int main(int argc, char *argv[]) 422 | { 423 | bool distribution = false; 424 | bool per_lib = false; 425 | bool insertion_centric = false; 426 | string fn_pos, fn_fa; 427 | int64_t max_warnings = -1; 428 | char * fn_list = 0; 429 | 430 | pileup_data_t d {}; 431 | fetch_data_t *f = (fetch_data_t*)calloc(1, sizeof(pileup_data_t)); 432 | d.tid = -1, d.min_bq = 0, d.max_cnt = 10000000; 433 | 434 | po::options_description desc("Usage: bam-readcount [OPTIONS] bam_file|cram_file [region]\nGenerate metrics for bam_file at single nucleotide positions.\nExample: bam-readcount -f ref.fa some.bam|some.cram\n\nAvailable options"); 435 | desc.add_options() 436 | ("help,h", "produce this message") 437 | ("version,v", "output the version number") 438 | ("min-mapping-quality,q", po::value(&(d.min_mapq))->default_value(0), "minimum mapping quality of reads used for counting.") 439 | ("min-base-quality,b", po::value(&(d.min_bq))->default_value(0), "minimum base quality at a position to use the read for counting.") 440 | ("max-count,d", po::value(&(d.max_cnt))->default_value(10000000), "max depth to avoid excessive memory usage.") 441 | ("site-list,l", po::value(&fn_pos), "file containing a list of regions to report readcounts within.") 442 | ("reference-fasta,f", po::value(&fn_fa), "reference sequence in the fasta format.") 443 | ("print-individual-mapq,D", po::value(&distribution), "report the mapping qualities as a comma separated list.") 444 | ("per-library,p", po::bool_switch(&per_lib), "report results by library.") 445 | ("max-warnings,w", po::value(&max_warnings), "maximum number of warnings of each type to emit. -1 gives an unlimited number.") 446 | ("insertion-centric,i", po::bool_switch(&insertion_centric), "generate indel centric readcounts. Reads containing insertions will not be included in per-base counts") 447 | ; 448 | 449 | po::options_description hidden("Hidden options"); 450 | hidden.add_options() 451 | ("bam-file", po::value(), "bam file") 452 | ("region", po::value< vector >(), "region(s) specification e.g. 2:1-50") 453 | ; 454 | 455 | po::options_description cmdline_options; 456 | cmdline_options.add(desc).add(hidden); 457 | 458 | po::positional_options_description p; 459 | p.add("bam-file", 1); 460 | p.add("region", -1); 461 | 462 | po::variables_map vm; 463 | po::store(po::command_line_parser(argc, argv). 464 | options(cmdline_options).positional(p).run(), vm); 465 | po::notify(vm); 466 | 467 | if (vm.count("version")) { 468 | cout << "bam-readcount version: " << __g_prog_version << " (commit " << __g_commit_hash << ")\n"; 469 | return 1; 470 | } 471 | 472 | if (vm.count("help") || vm.count("bam-file") == 0) { 473 | cout << desc << "\n"; 474 | return 1; 475 | } 476 | 477 | cerr << "Minimum mapping quality is set to " << d.min_mapq << endl; 478 | /* 479 | if (argc - optind == 0) { 480 | fprintf(stderr, "\n"); 481 | fprintf(stderr, "Usage: bam-readcount bam_file>|cram_file [region]\n"); 482 | fprintf(stderr, " -q INT filtering reads with mapping quality less than INT [%d]\n", d.min_mapq); 483 | fprintf(stderr, " -b INT don't include reads where the base quality is less than INT [%d]\n", d.min_bq); 484 | fprintf(stderr, " -d INT max depth to avoid excessive memory usage [%d]\n", d.max_cnt); 485 | fprintf(stderr, " -f FILE reference sequence in the FASTA format\n"); 486 | fprintf(stderr, " -l FILE list of regions to report readcounts within.\n"); 487 | fprintf(stderr, " -D report the mapping qualities as a comma separated list\n"); 488 | fprintf(stderr, " -w maximum number of warnings of each type to emit [unlimited]\n\n"); 489 | fprintf(stderr, "This program reports readcounts for each base at each position requested.\n"); 490 | fprintf(stderr, "\nPositions should be requested via the -l option as chromosome, start, stop\nwhere the coordinates are 1-based and each field is separated by whitespace.\n"); 491 | fprintf(stderr, "\nA single region may be requested on the command-line similarly to samtools view\n(i.e. bam-readcount -f ref.fa some.bam 1:150-150).\n\n"); 492 | fprintf(stderr, "It also reports the average base quality of these bases and mapping qualities of\n"); 493 | fprintf(stderr, "the reads containing each base.\n\nThe format is as follows:\nchr\tposition\treference_base\tbase:count:avg_mapping_quality:avg_basequality:avg_se_mapping_quality:num_plus_strand:num_minus_strand:avg_pos_as_fraction:avg_num_mismatches_as_fraction:avg_sum_mismatch_qualitiest:num_q2_containing_reads:avg_distance_to_q2_start_in_q2_reads:avg_clipped_length:avg_distance_to_effective_3p_end...\n"); 494 | 495 | fprintf(stderr, "\n"); 496 | return 1; 497 | } 498 | */ 499 | WARN.reset(new ReadWarnings(std::cerr, max_warnings)); 500 | 501 | if (!fn_fa.empty()) d.fai = fai_load(fn_fa.c_str()); 502 | 503 | // Configure reference for CRAM 504 | if (fn_list == 0 && !fn_fa.empty()) { 505 | fn_list = samfaipath(fn_fa.c_str()); 506 | } 507 | 508 | d.beg = 0; d.end = 0x7fffffff; 509 | d.distribution = distribution; 510 | d.per_lib = per_lib; 511 | d.insertion_centric = insertion_centric; 512 | d.in = samopen(vm["bam-file"].as().c_str(), "rb", 0); 513 | if (d.in == 0) { 514 | fprintf(stderr, "Fail to open BAM file %s\n", argv[optind]); 515 | return 1; 516 | } 517 | 518 | // Set reference for CRAM 519 | if (fn_list) { 520 | if (hts_set_fai_filename(d.in->file, fn_list) != 0) { 521 | fprintf(stderr, "Fail to open reference file %s\n", fn_list); 522 | return 1; 523 | } 524 | } 525 | 526 | std::set lib_names = find_library_names(d.in->header); 527 | for(std::set::iterator it = lib_names.begin(); it != lib_names.end(); ++it) { 528 | cerr << "Expect library: " << *it << " in BAM" << endl; 529 | } 530 | d.lib_names = lib_names; 531 | d.indel_queue_map = indel_queue_map_t(); 532 | 533 | if(!fn_pos.empty()) { 534 | std::ifstream fp(fn_pos.c_str()); 535 | if(!fp.is_open()) { 536 | cerr << "Failed to open region list file: " << fn_pos << endl; 537 | return 1; 538 | } 539 | 540 | // Load index 541 | hts_idx_t *idx; 542 | idx = sam_index_load3( 543 | d.in->file, 544 | (const char *) vm["bam-file"].as().c_str(), 545 | NULL, 546 | HTS_IDX_SAVE_REMOTE); 547 | 548 | if (idx == 0) { 549 | fprintf(stderr, "BAM indexing file is not available.\n"); 550 | return 1; 551 | } 552 | //Now iterate through and do calculations for each one 553 | std::string ref_name; 554 | int beg; 555 | int end; 556 | int ref; 557 | 558 | 559 | //initialize the header hash 560 | khiter_t iter; 561 | khash_t(s) *h; 562 | if (d.hash == 0) { 563 | int ret, i; 564 | khiter_t iter; 565 | khash_t(s) *h; 566 | d.hash = h = kh_init(s); 567 | for (i = 0; i < d.in->header->n_targets; ++i) { 568 | iter = kh_put(s, h, d.in->header->target_name[i], &ret); 569 | kh_value(h, iter) = i; 570 | } 571 | } 572 | h = (khash_t(s)*)d.hash; 573 | std::string lineBuf; 574 | while(getline(fp, lineBuf)) { 575 | std::stringstream ss(lineBuf); 576 | if (!(ss >> ref_name >> beg >> end)) 577 | continue; 578 | 579 | iter = kh_get(s, h, ref_name.c_str()); 580 | if(iter == kh_end(h)) { 581 | fprintf(stderr, "%s not found in bam file. Region %s %i %i skipped.\n",ref_name.c_str(),ref_name.c_str(),beg,end); 582 | } 583 | else { 584 | // ref id exists 585 | //fprintf(stderr, "%s %i %i scanned in\n",ref_name,beg,end); 586 | ref = kh_value(h,iter); 587 | //fprintf(stderr, "%i %i %i scanned in\n",ref,beg,end); 588 | d.beg = beg - 1; // make this 0-based 589 | d.end = end; 590 | load_reference(&d, ref); 591 | bam_plbuf_t *buf = bam_plbuf_init(pileup_func, &d); // initialize pileup 592 | bam_plp_set_maxcnt(buf->iter, d.max_cnt); 593 | f->pileup_buffer = buf; 594 | if (d.fai) { 595 | f->ref_len = d.len; 596 | f->seq_name = d.in->header->target_name[d.tid]; 597 | } else { 598 | f->ref_len = 0; 599 | f->seq_name = 0; 600 | } 601 | f->ref_pointer = &(d.ref); 602 | samfetch(d.in, idx, ref, d.beg-1, d.end, f, fetch_func); 603 | bam_plbuf_push(0, buf); // finalize pileup 604 | bam_plbuf_destroy(buf); 605 | d.indel_queue_map.clear(); 606 | 607 | } 608 | } 609 | hts_idx_destroy(idx); 610 | samclose(d.in); 611 | if(d.fai) { 612 | fai_destroy(d.fai); 613 | } 614 | if(d.ref) { 615 | free(d.ref); 616 | } 617 | free(f); 618 | //free(fn_pos); 619 | //free(fn_fa); 620 | return 0; 621 | } 622 | else { 623 | if (!vm.count("region")) { // if a region is not specified 624 | //FIXME this currently crashes and burns because it doesn't hit the pre-processing in fetch_func 625 | sampileup(d.in, -1, pileup_func, &d); 626 | } else { 627 | int ref; 628 | 629 | // load index 630 | hts_idx_t *idx; 631 | idx = sam_index_load3( 632 | d.in->file, 633 | (const char *) vm["bam-file"].as().c_str(), 634 | NULL, 635 | HTS_IDX_SAVE_REMOTE); 636 | 637 | if (idx == 0) { 638 | fprintf(stderr, "BAM indexing file is not available.\n"); 639 | return 1; 640 | } 641 | vector regions = vm["region"].as< vector >(); 642 | typedef vector::iterator region_iter; 643 | for(region_iter it = regions.begin(); it != regions.end(); ++it) { 644 | bam_parse_region(d.in->header, it->c_str(), &ref, &(d.beg), &(d.end)); // parse the region 645 | if (ref < 0) { 646 | fprintf(stderr, "Invalid region %s\n", it->c_str()); 647 | return 1; 648 | } 649 | load_reference(&d, ref); 650 | bam_plbuf_t *buf = bam_plbuf_init(pileup_func, &d); // initialize pileup 651 | bam_plp_set_maxcnt(buf->iter, d.max_cnt); 652 | f->pileup_buffer = buf; 653 | f->ref_pointer = &(d.ref); 654 | samfetch(d.in, idx, ref, d.beg-1, d.end, f, fetch_func); 655 | bam_plbuf_push(0, buf); // finalize pileup 656 | bam_plbuf_destroy(buf); 657 | } 658 | hts_idx_destroy(idx); 659 | } 660 | if(d.ref) { 661 | free(d.ref); 662 | } 663 | if(d.fai) { 664 | fai_destroy(d.fai); 665 | } 666 | free(f); 667 | } 668 | samclose(d.in); 669 | return 0; 670 | } 671 | 672 | 673 | -------------------------------------------------------------------------------- /src/lib/bamrc/BasicStat.cpp: -------------------------------------------------------------------------------- 1 | #include "BasicStat.hpp" 2 | #include "auxfields.hpp" 3 | #include "ReadWarnings.hpp" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | BasicStat::BasicStat(bool is_indel) 11 | : read_count(0) 12 | , sum_map_qualities(0) 13 | , sum_single_ended_map_qualities(0) 14 | , num_plus_strand(0) 15 | , num_minus_strand(0) 16 | , sum_event_location(0.0) 17 | , sum_q2_distance(0.0) 18 | , num_q2_reads(0) 19 | , sum_number_of_mismatches(0.0) 20 | , sum_of_mismatch_qualities(0) 21 | , sum_of_clipped_lengths(0) 22 | , sum_3p_distance(0.0) 23 | , sum_base_qualities(0) 24 | , is_indel(is_indel) 25 | { 26 | } 27 | 28 | void BasicStat::process_read(bam_pileup1_t const* base) { 29 | read_count++; 30 | sum_map_qualities += base->b->core.qual; 31 | if(base->b->core.flag & BAM_FREVERSE) { 32 | //mapped to the reverse strand 33 | num_minus_strand++; 34 | } 35 | else { 36 | //must be mapped to the plus strand 37 | num_plus_strand++; 38 | } 39 | 40 | int32_t left_clip = 0; 41 | int32_t clipped_length = base->b->core.l_qseq; 42 | int32_t mismatch_sum = 0; 43 | int32_t q2_val = 0; 44 | int32_t three_prime_index = 0; 45 | 46 | //hopefully grab out our calculated per/read values 47 | //FIXME these will be unavailable if there is no reference 48 | //TODO Make sure the defaults on the above are reasonable if there is nothing available 49 | uint8_t *tag_ptr = bam_aux_get(base->b, "Zm"); 50 | if(tag_ptr) { 51 | ++tag_ptr; 52 | aux_zm_t zm = aux_zm_t::from_string((char const*)tag_ptr); 53 | mismatch_sum = zm.sum_of_mismatch_qualities; 54 | clipped_length = zm.clipped_length; 55 | left_clip = zm.left_clip; 56 | three_prime_index = zm.three_prime_index; 57 | q2_val = zm.q2_pos; 58 | sum_of_mismatch_qualities += mismatch_sum; 59 | 60 | if(q2_val > -1) { 61 | //this is in read coordinates. Ignores clipping as q2 may be clipped 62 | sum_q2_distance += (float) std::abs(base->qpos - q2_val) / (float) base->b->core.l_qseq; 63 | num_q2_reads++; 64 | } 65 | distances_to_3p.push_back( (float) std::abs(base->qpos - three_prime_index) / (float) base->b->core.l_qseq); 66 | sum_3p_distance += (float) std::abs(base->qpos - three_prime_index) / (float) base->b->core.l_qseq; 67 | 68 | sum_of_clipped_lengths += clipped_length; 69 | float read_center = (float)clipped_length/2.0; 70 | sum_event_location += 1.0 - std::abs((float)(base->qpos - left_clip) - read_center)/read_center; 71 | 72 | } 73 | else { 74 | WARN->warn(ReadWarnings::Zm_TAG_MISSING, bam1_qname(base->b)); 75 | } 76 | 77 | //grab the single ended mapping qualities for testing 78 | if(base->b->core.flag & BAM_FPROPER_PAIR) { 79 | uint8_t *sm_tag_ptr = bam_aux_get(base->b, "SM"); 80 | if(sm_tag_ptr) { 81 | int32_t single_ended_map_qual = bam_aux2i(sm_tag_ptr); 82 | sum_single_ended_map_qualities += single_ended_map_qual; 83 | } 84 | else { 85 | WARN->warn(ReadWarnings::SM_TAG_MISSING, bam1_qname(base->b)); 86 | } 87 | } 88 | else { 89 | //just add in the mapping quality as the single ended quality 90 | sum_single_ended_map_qualities += base->b->core.qual; 91 | } 92 | 93 | //grab out the number of mismatches 94 | uint8_t *nm_tag_ptr = bam_aux_get(base->b, "NM"); 95 | if(nm_tag_ptr) { 96 | int32_t number_mismatches = bam_aux2i(nm_tag_ptr); 97 | sum_number_of_mismatches += number_mismatches / (float) clipped_length; 98 | } 99 | else { 100 | WARN->warn(ReadWarnings::NM_TAG_MISSING, bam1_qname(base->b)); 101 | } 102 | mapping_qualities.push_back(static_cast(base->b->core.qual)); 103 | if(!is_indel) { 104 | sum_base_qualities += bam1_qual(base->b)[base->qpos]; 105 | } 106 | 107 | } 108 | 109 | 110 | std::ostream& operator<<(std::ostream& s, const BasicStat& stat) { 111 | //http://www.umich.edu/~eecs381/handouts/formatting.pdf 112 | //http://stackoverflow.com/questions/1532640/which-iomanip-manipulators-are-sticky 113 | std::ios_base::fmtflags current_flags = s.flags(); //save previous format flags 114 | std::streamsize current_precision = s.precision(); // save previous precision setting 115 | 116 | s << std::fixed << std::setprecision(2); 117 | s << stat.read_count << ":"; 118 | if(stat.read_count > 0) { 119 | s << (float) stat.sum_map_qualities / stat.read_count << ":"; 120 | if(stat.is_indel) { 121 | s << 0.0 << ":"; 122 | } 123 | else { 124 | s << (float) stat.sum_base_qualities / stat.read_count << ":"; 125 | } 126 | s << (float) stat.sum_single_ended_map_qualities / stat.read_count << ":"; 127 | s << stat.num_plus_strand << ":"; 128 | s << stat.num_minus_strand << ":"; 129 | s << (float) stat.sum_event_location / stat.read_count << ":"; 130 | s << (float) stat.sum_number_of_mismatches / stat.read_count << ":"; 131 | s << (float) stat.sum_of_mismatch_qualities / stat.read_count << ":"; 132 | s << stat.num_q2_reads << ":"; 133 | if(stat.num_q2_reads > 0) { 134 | s << (float) stat.sum_q2_distance / stat.num_q2_reads << ":"; 135 | } 136 | else { 137 | s << 0.0 << ":"; 138 | } 139 | s << (float) stat.sum_of_clipped_lengths / stat.read_count << ":"; 140 | s << (float) stat.sum_3p_distance / stat.read_count; 141 | } 142 | else { 143 | s << 0.0 << ":"; 144 | s << 0.0 << ":"; 145 | s << 0.0 << ":"; 146 | s << 0 << ":"; 147 | s << 0 << ":"; 148 | s << 0.0 << ":"; 149 | s << 0.0 << ":"; 150 | s << 0.0 << ":"; 151 | s << 0 << ":"; 152 | s << 0.0 << ":"; 153 | s << 0.0 << ":"; 154 | s << 0.0; 155 | } 156 | s.flags(current_flags); //save previous format flags 157 | s.precision(current_precision); // save previous precision setting 158 | return s; 159 | } 160 | -------------------------------------------------------------------------------- /src/lib/bamrc/BasicStat.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include "sam.h" 6 | 7 | class BasicStat { 8 | public: 9 | BasicStat(bool is_indel = false); 10 | void process_read(bam_pileup1_t const* base); //may want other things here like clipping. 11 | 12 | mutable unsigned int read_count; //number of reads containing the indel 13 | mutable unsigned int sum_map_qualities; //sum of the mapping qualities of reads containing the indel 14 | mutable unsigned int sum_single_ended_map_qualities; //sum of the single ended mapping qualities; 15 | mutable unsigned int num_plus_strand; 16 | mutable unsigned int num_minus_strand; 17 | mutable float sum_event_location; 18 | mutable float sum_q2_distance; 19 | mutable unsigned int num_q2_reads; 20 | mutable float sum_number_of_mismatches; 21 | mutable unsigned int sum_of_mismatch_qualities; 22 | mutable unsigned int sum_of_clipped_lengths; 23 | mutable float sum_3p_distance; 24 | mutable unsigned int sum_base_qualities; 25 | mutable std::vector mapping_qualities; 26 | mutable std::vector distances_to_3p; 27 | bool is_indel; 28 | }; 29 | 30 | std::ostream& operator<<(std::ostream& s, const BasicStat& stat); 31 | -------------------------------------------------------------------------------- /src/lib/bamrc/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | 3 | project(bamrc) 4 | 5 | set(SOURCES 6 | ReadWarnings.hpp 7 | auxfields.hpp 8 | BasicStat.hpp 9 | BasicStat.cpp 10 | IndelQueueEntry.hpp 11 | IndelQueueEntry.cpp 12 | IndelQueue.hpp 13 | IndelQueue.cpp 14 | ) 15 | 16 | xadd_library(bamrc ${SOURCES}) 17 | target_link_libraries(bamrc ${Boost_LIBRARIES} ${Samtools_LIBRARIES} ${Htslib_LIBRARIES} ${Support_LIBRARIES}) 18 | -------------------------------------------------------------------------------- /src/lib/bamrc/IndelQueue.cpp: -------------------------------------------------------------------------------- 1 | #include "bamrc/IndelQueue.hpp" 2 | 3 | int IndelQueue::process(uint32_t tid, uint32_t pos, std::ostream& stream) { 4 | int extra_depth = 0; 5 | while(!queue.empty() && ( (queue.front().tid == tid && queue.front().pos < pos) || (queue.front().tid != tid))) { 6 | queue.pop(); 7 | } 8 | 9 | while(!queue.empty() && queue.front().tid == tid && queue.front().pos == pos) { 10 | stream << "\t" << queue.front(); 11 | extra_depth += queue.front().indel_stats.read_count; 12 | queue.pop(); 13 | } 14 | return extra_depth; 15 | } 16 | 17 | -------------------------------------------------------------------------------- /src/lib/bamrc/IndelQueue.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "bamrc/BasicStat.hpp" 4 | #include "bamrc/IndelQueueEntry.hpp" 5 | 6 | #include 7 | #include 8 | 9 | typedef std::queue indel_queue_t; 10 | 11 | struct IndelQueue { 12 | indel_queue_t queue; 13 | void push(IndelQueueEntry entry) { queue.push(entry); }; 14 | int process(uint32_t tid, uint32_t pos, std::ostream& stream); 15 | }; 16 | -------------------------------------------------------------------------------- /src/lib/bamrc/IndelQueueEntry.cpp: -------------------------------------------------------------------------------- 1 | #include "bamrc/IndelQueueEntry.hpp" 2 | 3 | std::ostream& operator<<(std::ostream& s, const IndelQueueEntry& entry) { 4 | s << entry.allele; 5 | s << ":"; 6 | s << entry.indel_stats; 7 | return s; 8 | } 9 | -------------------------------------------------------------------------------- /src/lib/bamrc/IndelQueueEntry.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "bamrc/BasicStat.hpp" 4 | 5 | #include 6 | #include 7 | 8 | struct IndelQueueEntry { 9 | uint32_t tid; 10 | uint32_t pos; 11 | BasicStat indel_stats; 12 | std::string allele; 13 | IndelQueueEntry() : tid(0), pos(0), indel_stats(), allele() {} 14 | IndelQueueEntry(uint32_t tid, uint32_t pos, BasicStat indel_stats, std::string allele) : tid(tid), pos(pos), indel_stats(indel_stats), allele(allele) {} 15 | }; 16 | 17 | std::ostream& operator<<(std::ostream& s, const IndelQueueEntry& entry); 18 | -------------------------------------------------------------------------------- /src/lib/bamrc/ReadWarnings.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | class ReadWarnings { 11 | public: 12 | enum WarningType { 13 | SM_TAG_MISSING = 0, 14 | NM_TAG_MISSING, 15 | Zm_TAG_MISSING, 16 | LIBRARY_UNAVAILABLE, 17 | N_WARNING_TYPES 18 | }; 19 | 20 | public: 21 | ReadWarnings(std::ostream& stream, int64_t max_count_per_type) 22 | : _stream(stream) 23 | , _max_count_per_type(max_count_per_type) 24 | , _counts(N_WARNING_TYPES, 0) 25 | , _messages(N_WARNING_TYPES) 26 | { 27 | _messages[SM_TAG_MISSING] = "Couldn't find single-end mapping quality. " 28 | "Check to see if the SM tag is in BAM."; 29 | 30 | _messages[NM_TAG_MISSING] = "Couldn't find number of mismatches. " 31 | "Check to see if the NM tag is in BAM."; 32 | 33 | _messages[Zm_TAG_MISSING] = "Couldn't find the generated tag."; 34 | 35 | _messages[LIBRARY_UNAVAILABLE] = "Library unavailable. " 36 | "Check to make sure the LB tag is present in the @RG entries of the header."; 37 | } 38 | 39 | void warn(WarningType type, char const* read_name) { 40 | ++_counts[type]; 41 | if (_max_count_per_type >= 0 && _counts[type] > _max_count_per_type) { 42 | return; 43 | } 44 | _stream << "WARNING: In read " << read_name << ": " << _messages[type] << "\n"; 45 | 46 | if (_max_count_per_type >= 0 && _counts[type] == _max_count_per_type) { 47 | _stream << "The previous warning has been emitted " 48 | << _counts[type] << " times and will be disabled.\n"; 49 | } 50 | } 51 | 52 | private: 53 | std::ostream& _stream; 54 | int64_t _max_count_per_type; 55 | std::vector _counts; 56 | std::vector _messages; 57 | }; 58 | 59 | 60 | extern std::auto_ptr WARN; 61 | -------------------------------------------------------------------------------- /src/lib/bamrc/auxfields.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | struct aux_zm_t { 7 | int sum_of_mismatch_qualities; 8 | int clipped_length; 9 | int left_clip; 10 | int three_prime_index; 11 | int q2_pos; 12 | 13 | std::string to_string() const { 14 | std::stringstream ss; 15 | ss << sum_of_mismatch_qualities 16 | << " " << clipped_length 17 | << " " << left_clip 18 | << " " << three_prime_index 19 | << " " << q2_pos; 20 | return ss.str(); 21 | } 22 | 23 | static aux_zm_t from_string(char const* data) { 24 | std::stringstream ss(data); 25 | 26 | aux_zm_t zm; 27 | ss >> zm.sum_of_mismatch_qualities 28 | >> zm.clipped_length 29 | >> zm.left_clip 30 | >> zm.three_prime_index 31 | >> zm.q2_pos; 32 | 33 | return zm; 34 | } 35 | }; 36 | -------------------------------------------------------------------------------- /test-data/cram_site_test.sh: -------------------------------------------------------------------------------- 1 | ../build/bin/bam-readcount -l twolib_site_list.txt -f rand1k.fa twolib.sorted.cram > out 2 | -------------------------------------------------------------------------------- /test-data/expected_all_lib: -------------------------------------------------------------------------------- 1 | 21 10402985 G 344 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:3:58.33:11.33:0.00:2:1:0.48:0.05:43.67:2:0.51:237.67:0.54 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:341:52.80:26.00:0.16:189:152:0.50:0.01:11.75:295:0.39:237.76:0.39 T:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 +A:20:52.55:0.00:0.00:12:8:0.53:0.02:25.00:18:0.41:237.70:0.39 2 | 21 10405200 T 59 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:30:46.53:25.17:0.23:17:13:0.20:0.01:33.57:26:0.34:246.07:0.42 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:29:58.00:25.14:0.00:18:11:0.19:0.01:17.69:27:0.26:244.66:0.30 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 3 | -------------------------------------------------------------------------------- /test-data/expected_insertion_centric_all_lib: -------------------------------------------------------------------------------- 1 | 21 10402985 G 344 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:3:58.33:11.33:0.00:2:1:0.48:0.05:43.67:2:0.51:237.67:0.54 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:321:52.82:25.94:0.17:177:144:0.50:0.01:10.92:277:0.39:237.77:0.39 T:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 +A:20:52.55:0.00:0.00:12:8:0.53:0.02:25.00:18:0.41:237.70:0.39 2 | 21 10405200 T 59 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:30:46.53:25.17:0.23:17:13:0.20:0.01:33.57:26:0.34:246.07:0.42 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:29:58.00:25.14:0.00:18:11:0.19:0.01:17.69:27:0.26:244.66:0.30 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 3 | -------------------------------------------------------------------------------- /test-data/expected_insertion_centric_per_lib: -------------------------------------------------------------------------------- 1 | 21 10402985 G 344 Solexa-135852 { =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:2:57.50:16.00:0.00:2:0:0.35:0.02:42.50:2:0.51:231.50:0.51 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:135:53.77:25.41:0.00:78:57:0.51:0.01:9.04:116:0.38:236.42:0.38 T:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 +A:20:52.55:0.00:0.00:12:8:0.53:0.02:25.00:18:0.41:237.70:0.39 } Solexa-135853 { =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:1:60.00:2.00:0.00:0:1:0.76:0.10:46.00:0:0.00:250.00:0.62 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:186:52.12:26.33:0.29:99:87:0.50:0.01:12.29:161:0.39:238.74:0.40 T:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 } 2 | 21 10405200 T 59 Solexa-135852 { =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:8:42.88:26.50:0.88:3:5:0.14:0.01:35.75:5:0.49:247.38:0.66 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:14:57.93:23.36:0.00:9:5:0.19:0.01:26.64:14:0.29:245.86:0.29 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 } Solexa-135853 { =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:22:47.86:24.68:0.00:14:8:0.22:0.01:32.77:21:0.30:245.59:0.33 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:15:58.07:26.80:0.00:9:6:0.19:0.01:9.33:13:0.23:243.53:0.31 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 } 3 | -------------------------------------------------------------------------------- /test-data/expected_per_lib: -------------------------------------------------------------------------------- 1 | 21 10402985 G 344 Solexa-135852 { =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:2:57.50:16.00:0.00:2:0:0.35:0.02:42.50:2:0.51:231.50:0.51 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:155:53.61:25.61:0.00:90:65:0.51:0.01:11.10:134:0.39:236.59:0.38 T:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 +A:20:52.55:0.00:0.00:12:8:0.53:0.02:25.00:18:0.41:237.70:0.39 } Solexa-135853 { =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:1:60.00:2.00:0.00:0:1:0.76:0.10:46.00:0:0.00:250.00:0.62 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:186:52.12:26.33:0.29:99:87:0.50:0.01:12.29:161:0.39:238.74:0.40 T:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 } 2 | 21 10405200 T 59 Solexa-135852 { =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:8:42.88:26.50:0.88:3:5:0.14:0.01:35.75:5:0.49:247.38:0.66 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:14:57.93:23.36:0.00:9:5:0.19:0.01:26.64:14:0.29:245.86:0.29 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 } Solexa-135853 { =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:22:47.86:24.68:0.00:14:8:0.22:0.01:32.77:21:0.30:245.59:0.33 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:15:58.07:26.80:0.00:9:6:0.19:0.01:9.33:13:0.23:243.53:0.31 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 } 3 | -------------------------------------------------------------------------------- /test-data/rand1k.fa: -------------------------------------------------------------------------------- 1 | > rand1k 2 | TCCTAATTCTGGGTAACCGCCGCCTGAAGCCAAAAAATAAGCCGGAGCCAAGGGGGAGTC 3 | ACACTGCTCCACGAACGCCTCTCATACAGCTTCGTCTTACAGGTGGAGATCATTGTCCCG 4 | GAGAGTCATGTGCCTTAGTTAAAAAGGTTTACTGCGCTCGGGGTCGAGTGCGGGAACTTC 5 | TCGGGTGGCTACGTACCGGGGCCTACCTTGCTATCTTTGACAATCAGCGTTCTGGATTGT 6 | CAGGCTCACTTCCGTAGCAGTTGCTGGAGAATATGGACATATCAGCTTTGACACACTGGG 7 | TTAAGGCGTAGGGTAGAGACGGAGTCCCTTCGCTGCCAATGTGGTGGTTTGGGACGAGTA 8 | TCATGTTGGTGCCCCAAGTTAACTTACTCCGCCCATGTCGTGCGATTACGCGAGAGTAGT 9 | AGATCGCTACGAGTATGTCCTCGGTGATCTAGTTAACTACTGTTACTGATGTCCGTTGCT 10 | CCACAGGTATACTCGGACACAATTCACGGGCTCCTCAAGCATACTAAAGAAGTCACGAGT 11 | GACGTCGGCGTAACCTCACATTAGTGGAGGAACCCGTGTGGAACATCATTCTAACGACAC 12 | TGTCGATCCCGGATGGATATGGTAGTCTTGATTATCCAGAGTCTTAGAGACATGGTAAGT 13 | TAGGAGCGCAGGACCATCAACTCTACTTTCCGGCAAATGTTAAGGGGTTTTGCTGACCAC 14 | CCGCATGCTTACAGTCCCGTTTCGCTAAGGTCTTCCTCGCTGCCTCTAGTTTTAGCGGAC 15 | GTTCCTTTCTCAACTAGTCTATTTGTTCATACTCATTTGGCACAGGTCTGTGTACGTCTA 16 | TCATGCGGACTAAATTACCCACAGAATGTCACAGGACAACATAGTGTTCATCATCCCTGT 17 | GGGATAACCGGTTACCTCGGATGAGGAGTATGAACTATATCTTAGCGTAGACCGATGTAT 18 | GGAAAGGCGCAGCCTCTGGCCGCCCACTATCGGAAATCGT 19 | -------------------------------------------------------------------------------- /test-data/rand1k.fa.fai: -------------------------------------------------------------------------------- 1 | rand1k 1000 9 60 61 2 | -------------------------------------------------------------------------------- /test-data/ref.fa.fai: -------------------------------------------------------------------------------- 1 | 21 10505000 4 60 61 2 | -------------------------------------------------------------------------------- /test-data/site_list: -------------------------------------------------------------------------------- 1 | 21 10402985 10402985 2 | 21 10405200 10405200 3 | -------------------------------------------------------------------------------- /test-data/test.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genome/bam-readcount/c7c76e6b84de9ed774ea3865fedc04b2d6276865/test-data/test.bam -------------------------------------------------------------------------------- /test-data/test.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genome/bam-readcount/c7c76e6b84de9ed774ea3865fedc04b2d6276865/test-data/test.bam.bai -------------------------------------------------------------------------------- /test-data/test_bad_rg.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genome/bam-readcount/c7c76e6b84de9ed774ea3865fedc04b2d6276865/test-data/test_bad_rg.bam -------------------------------------------------------------------------------- /test-data/test_bad_rg.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genome/bam-readcount/c7c76e6b84de9ed774ea3865fedc04b2d6276865/test-data/test_bad_rg.bam.bai -------------------------------------------------------------------------------- /test-data/twolib.sorted.cram: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genome/bam-readcount/c7c76e6b84de9ed774ea3865fedc04b2d6276865/test-data/twolib.sorted.cram -------------------------------------------------------------------------------- /test-data/twolib.sorted.cram.crai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genome/bam-readcount/c7c76e6b84de9ed774ea3865fedc04b2d6276865/test-data/twolib.sorted.cram.crai -------------------------------------------------------------------------------- /test-data/twolib_site_list.txt: -------------------------------------------------------------------------------- 1 | rand1k 50 60 2 | -------------------------------------------------------------------------------- /test/lib/bamrc/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | 3 | set(TEST_LIBS 4 | bamrc 5 | ) 6 | include_directories(${GTEST_INCLUDE_DIRS}) 7 | 8 | add_unit_tests(TestBamrcLib 9 | TestAuxFields.cpp 10 | TestReadWarnings.cpp 11 | TestIndelQueue.cpp 12 | TestIndelQueueEntry.cpp 13 | ) 14 | -------------------------------------------------------------------------------- /test/lib/bamrc/TestAuxFields.cpp: -------------------------------------------------------------------------------- 1 | #include "bamrc/auxfields.hpp" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | using namespace std; 8 | 9 | TEST(AuxFields, zm) { 10 | aux_zm_t zm; 11 | zm.sum_of_mismatch_qualities = 1; 12 | zm.clipped_length = 2; 13 | zm.left_clip = 3; 14 | zm.three_prime_index = 4; 15 | zm.q2_pos = 5; 16 | 17 | string s = zm.to_string(); 18 | ASSERT_EQ("1 2 3 4 5", s); 19 | 20 | aux_zm_t zm2 = aux_zm_t::from_string(s.c_str()); 21 | 22 | ASSERT_EQ(1, zm2.sum_of_mismatch_qualities); 23 | ASSERT_EQ(2, zm2.clipped_length); 24 | ASSERT_EQ(3, zm.left_clip); 25 | ASSERT_EQ(4, zm.three_prime_index); 26 | ASSERT_EQ(5, zm.q2_pos); 27 | } 28 | 29 | 30 | TEST(AuxFields, zmNegative) { 31 | aux_zm_t zm; 32 | zm.sum_of_mismatch_qualities = -1; 33 | zm.clipped_length = -2; 34 | zm.left_clip = -3; 35 | zm.three_prime_index = -4; 36 | zm.q2_pos = -5; 37 | 38 | string s = zm.to_string(); 39 | ASSERT_EQ("-1 -2 -3 -4 -5", s); 40 | 41 | aux_zm_t zm2 = aux_zm_t::from_string(s.c_str()); 42 | 43 | ASSERT_EQ(-1, zm2.sum_of_mismatch_qualities); 44 | ASSERT_EQ(-2, zm2.clipped_length); 45 | ASSERT_EQ(-3, zm.left_clip); 46 | ASSERT_EQ(-4, zm.three_prime_index); 47 | ASSERT_EQ(-5, zm.q2_pos); 48 | } 49 | -------------------------------------------------------------------------------- /test/lib/bamrc/TestIndelQueue.cpp: -------------------------------------------------------------------------------- 1 | #include "bamrc/IndelQueueEntry.hpp" 2 | #include "bamrc/IndelQueue.hpp" 3 | #include "bamrc/ReadWarnings.hpp" 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | 12 | TEST(IndelQueue, push) { 13 | IndelQueue test_queue; 14 | IndelQueueEntry entry; 15 | ASSERT_TRUE(test_queue.queue.empty()); 16 | test_queue.push(entry); 17 | ASSERT_EQ(1, test_queue.queue.size()); 18 | } 19 | 20 | 21 | TEST(IndelQueue, process_irrelevant) { 22 | //This test verifies the queue is cleared of sites that are passed by 23 | IndelQueue test_queue; 24 | 25 | IndelQueueEntry test1; 26 | test1.tid = 1; 27 | test1.pos = 1; 28 | 29 | test_queue.push(test1); 30 | 31 | IndelQueueEntry test2; 32 | test2.tid = 1; 33 | test2.pos = 5; 34 | 35 | test_queue.push(test2); 36 | 37 | stringstream stream; 38 | 39 | test_queue.process(1, 2, stream); 40 | ASSERT_EQ(1, test_queue.queue.size()); 41 | } 42 | 43 | TEST(IndelQueue, process_new_chromosome) { 44 | //This test verifies the queue is cleared of sites when the chromosome changes 45 | IndelQueue test_queue; 46 | 47 | IndelQueueEntry test1; 48 | test1.tid = 1; 49 | test1.pos = 1; 50 | 51 | test_queue.push(test1); 52 | 53 | IndelQueueEntry test2; 54 | test2.tid = 1; 55 | test2.pos = 5; 56 | 57 | test_queue.push(test2); 58 | 59 | stringstream stream; 60 | 61 | test_queue.process(10, 2, stream); 62 | ASSERT_EQ(0, test_queue.queue.size()); 63 | } 64 | 65 | TEST(IndelQueue, process_relevant) { 66 | IndelQueue test_queue; 67 | 68 | IndelQueueEntry test1; 69 | test1.tid = 1; 70 | test1.pos = 1; 71 | 72 | test_queue.push(test1); 73 | 74 | IndelQueueEntry test2; 75 | test2.tid = 1; 76 | test2.pos = 5; 77 | test2.indel_stats.read_count = 10; 78 | 79 | test_queue.push(test2); 80 | 81 | stringstream stream; 82 | ASSERT_EQ(0, stream.str().size()); 83 | 84 | int depth = test_queue.process(1, 5, stream); 85 | ASSERT_EQ(0, test_queue.queue.size()); 86 | ASSERT_EQ(10, depth); 87 | ASSERT_NE(0, stream.str().size()); 88 | 89 | } 90 | -------------------------------------------------------------------------------- /test/lib/bamrc/TestIndelQueueEntry.cpp: -------------------------------------------------------------------------------- 1 | #include "bamrc/IndelQueueEntry.hpp" 2 | #include "bamrc/ReadWarnings.hpp" 3 | #include "bamrc/BasicStat.hpp" 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | //FIXME This is required for all things in the test library 11 | //using the ReadWarnings class. 12 | std::auto_ptr WARN; 13 | 14 | using namespace std; 15 | 16 | TEST(IndelQueueEntry, parameterized_constructor) { 17 | BasicStat indel_stat(true); 18 | IndelQueueEntry new_entry(1, 20, indel_stat, "-AA"); 19 | ASSERT_EQ(1, new_entry.tid); 20 | ASSERT_EQ(20, new_entry.pos); 21 | ASSERT_EQ("-AA", new_entry.allele); 22 | } 23 | 24 | TEST(IndelQueueEntry, stream_output) { 25 | stringstream stat_stream; 26 | stat_stream << "-AA:"; 27 | BasicStat indel_stat(true); 28 | stat_stream << indel_stat; 29 | 30 | stringstream entry_stream; 31 | IndelQueueEntry new_entry(1, 20, indel_stat, "-AA"); 32 | entry_stream << new_entry; 33 | ASSERT_EQ(stat_stream.str(), entry_stream.str()); 34 | } 35 | -------------------------------------------------------------------------------- /test/lib/bamrc/TestReadWarnings.cpp: -------------------------------------------------------------------------------- 1 | #include "bamrc/ReadWarnings.hpp" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | using namespace std; 8 | 9 | namespace { 10 | struct Results { 11 | Results(std::stringstream& s) 12 | : total(0) 13 | , num_sm(0) 14 | , num_nm(0) 15 | , num_zm(0) 16 | { 17 | string line; 18 | while (getline(s, line)) { 19 | ++total; 20 | if (line.find("SM tag") != string::npos) 21 | ++num_sm; 22 | else if (line.find("NM tag") != string::npos) 23 | ++num_nm; 24 | else if (line.find("generated tag") != string::npos) 25 | ++num_zm; 26 | } 27 | } 28 | 29 | int total; 30 | int num_sm; 31 | int num_nm; 32 | int num_zm; 33 | }; 34 | } 35 | 36 | TEST(ReadWarnings, unlimited) { 37 | stringstream ss; 38 | ReadWarnings w(ss, -1); 39 | 40 | for (int i = 0; i < 100; ++i) { 41 | w.warn(ReadWarnings::SM_TAG_MISSING, "x"); 42 | w.warn(ReadWarnings::NM_TAG_MISSING, "y"); 43 | w.warn(ReadWarnings::Zm_TAG_MISSING, "z"); 44 | } 45 | 46 | Results res(ss); 47 | ASSERT_EQ(300, res.total); 48 | ASSERT_EQ(100, res.num_sm); 49 | ASSERT_EQ(100, res.num_nm); 50 | ASSERT_EQ(100, res.num_zm); 51 | } 52 | 53 | TEST(ReadWarnings, fiveEach) { 54 | stringstream ss; 55 | ReadWarnings w(ss, 5); 56 | 57 | for (int i = 0; i < 100; ++i) { 58 | w.warn(ReadWarnings::SM_TAG_MISSING, "x"); 59 | w.warn(ReadWarnings::NM_TAG_MISSING, "y"); 60 | w.warn(ReadWarnings::Zm_TAG_MISSING, "z"); 61 | } 62 | 63 | Results res(ss); 64 | // there will be one extra line for each warning type mentioning that it 65 | // is now disabled 66 | ASSERT_EQ(15 + 3, res.total); 67 | ASSERT_EQ(5, res.num_sm); 68 | ASSERT_EQ(5, res.num_nm); 69 | ASSERT_EQ(5, res.num_zm); 70 | 71 | } 72 | -------------------------------------------------------------------------------- /tutorial/.gitignore: -------------------------------------------------------------------------------- 1 | v1 2 | CERI-KRISP-K032274* 3 | NC_045512.2* 4 | *.fastq 5 | -------------------------------------------------------------------------------- /tutorial/README.md: -------------------------------------------------------------------------------- 1 | # A `bam-readcount` tutorial 2 | 3 | On November 26, 2021, [WHO](https://www.who.int/) 4 | [classified](https://www.who.int/news/item/26-11-2021-classification-of-omicron-(b.1.1.529)-sars-cov-2-variant-of-concern) 5 | the 6 | [SARS-CoV-2](https://en.wikipedia.org/wiki/Severe_acute_respiratory_syndrome_coronavirus_2) 7 | [variant](https://covariants.org/) [`B.1.1.529`](https://covariants.org/variants/21K.Omicron) as a ["Variant of 8 | Concern"](https://www.who.int/en/activities/tracking-SARS-CoV-2-variants/) (VOC), designated 9 | "[Omicron](https://en.wikipedia.org/wiki/Omicron)". The variant was 10 | noted as having an unusually large number of mutations, including more 11 | than 30 in the 12 | [Spike protein](https://en.wikipedia.org/wiki/Coronavirus_spike_protein). 13 | 14 | On November 29, [Tulio de Oliveira](https://twitter.com/Tuliodna) 15 | of [CERI](https://ceri.org.za/) [posted](https://twitter.com/Tuliodna/status/1465338678264401934) a 16 | [link](https://www.ncbi.nlm.nih.gov/sra/?term=PRJNA784038) to the 17 | [FASTQ](https://en.wikipedia.org/wiki/FASTQ_format) sequencing files for some 18 | of the first sequenced Omicron cases. We can use 19 | [`bam-readcount`](https://github.com/genome/bam-readcount) along with the 20 | SARS-CoV-2 [reference genome](https://www.ncbi.nlm.nih.gov/nuccore/1798174254) and some common software to explore these mutations. Please 21 | note that this is just an exercise and would surely not be street-legal in a 22 | virology department! 23 | 24 | The only requirement is [Docker](https://www.docker.com/) and access to a Unix 25 | command line, although all the tools can be easily built locally instead. A 26 | ready-to-run Bash script containing all the commands is available 27 | [here](scripts/commands.sh) 28 | 29 | ## Docker setup 30 | 31 | All Docker commands use similar arguments to allow us to access files in the 32 | current working directory, so first we will define an alias with the 33 | common options 34 | 35 | # --rm remove container on exit 36 | # -w $(pwd) set working directory to current working directory 37 | # -v $(pwd):$(pwd) map current working directory inside container 38 | alias dockercmd='docker run --rm -w $(pwd) -v $(pwd):$(pwd)' 39 | 40 | and run Docker as 41 | 42 | dockercmd IMAGE ARGS... 43 | 44 | All the computations can be run quickly on an ordinary laptop; no command 45 | should take more than a minute to run. The Docker containers are also 46 | fairly small and should download quickly on a broadband connection. 47 | 48 | If you are using a shell that does not support this `alias` format, you 49 | can just substitute the full command for `dockercmd` in the command 50 | lines below. 51 | 52 | 53 | ## Download the reference and FASTQs 54 | 55 | # SARS-CoV-2 standard reference genome 56 | dockercmd curlimages/curl -o NC_045512.2.fa "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=NC_045512.2&rettype=fasta" 57 | 58 | # CERI FASTQs via the SRA for case CERI-KRISP-K032274 59 | # We will use the SRA tools to download and convert to FASTQ 60 | # from the SRA format 61 | # URL: https://trace.ncbi.nlm.nih.gov/Traces/sra/?run=SRR17054502 62 | dockercmd ncbi/sra-tools fastq-dump --split-files --origfmt SRR17054502 63 | 64 | 65 | ## Index and align with `bwa` and `samtools` 66 | 67 | [bwa](https://github.com/lh3/bwa) to align the sequence and 68 | [samtools](http://www.htslib.org/) to convert, sort, and index the alignments 69 | 70 | # Index the reference genome 71 | dockercmd seqfu/alpine-bwa bwa index NC_045512.2.fa 72 | 73 | # Align the sequence 74 | dockercmd seqfu/alpine-bwa bwa mem NC_045512.2.fa SRR17054502_1.fastq SRR17054502_2.fastq > CERI-KRISP-K032274.sam 75 | 76 | # Convert to BAM and remove SAM output 77 | dockercmd seqfu/alpine-samtools-1.10 samtools view -b CERI-KRISP-K032274.sam > CERI-KRISP-K032274.bam 78 | rm CERI-KRISP-K032274.sam 79 | 80 | # Sort BAM and remove unsorted BAM 81 | dockercmd seqfu/alpine-samtools-1.10 samtools sort CERI-KRISP-K032274.bam > CERI-KRISP-K032274.sorted.bam 82 | rm CERI-KRISP-K032274.bam 83 | 84 | # Index sorted BAM 85 | dockercmd seqfu/alpine-samtools-1.10 samtools index CERI-KRISP-K032274.sorted.bam 86 | 87 | 88 | ## Run `bam-readcount` on the S protein region 89 | 90 | We're now ready to run `bam-readcount`. Here we will focus on the Spike (S) protein. 91 | The Genbank [reference](https://www.ncbi.nlm.nih.gov/gene/43740568) for S gives 92 | coordinates of `21563..25384` on the reference genome, so we will specify that 93 | range to restrict `bam-readcount` output to S. We could specify mapping or 94 | base quality filters, but we will run it wide open and see what we get 95 | 96 | # -w1 Show any warnings only once; bam-readcount likes to complain about 97 | # missing SM tags for every read 98 | # -f Specify the reference genome 99 | dockercmd mgibio/bam-readcount -w1 -f NC_045512.2.fa CERI-KRISP-K032274.sorted.bam NC_045512.2:21563-25384 > CERI-KRISP-K032274.brc.tsv 100 | 101 | 102 | ## A quick look at the output 103 | 104 | The `bam-readcount` output is discussed in more detail below, but we 105 | can quickly scan through the `bam-readcount` output and see what look like 106 | some variants, for example 107 | 108 | NC_045512.2 21765 T 1615 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:3:60.00:33.00:0.00:1:2:0.09:0.05:77.00:1:0.01:100.33:0.35 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 -TACATG:1612:60.00:0.00:0.19:805:807:0.58:0.05:66.17:805:0.41:184.78:0.48 109 | 110 | At position `21765`, of `1615` reads, `1612` have a deletion `-TACATG` 111 | 112 | -TACATG:1612:60.00:0.00:0.19:805:807:0.58:0.05:66.17:805:0.41:184.78:0.48 113 | 114 | `bam-readcount` provides a lot of information about each base or indel. 115 | For example, the `60.00` is the (good) average mapping quality of these reads. 116 | The `805` and `807` are counts of the forward and reverse reads showing the variant, 117 | about a 50/50 split which indicates no strand bias. `0.58` is 118 | the average distance of locus from the ends of the reads, indicating that 119 | it is not mostly at one end of the reads, where sequencing quality and 120 | alignment artifacts may be an issue. 121 | All signs that this variant may not be just a technical artifact. 122 | 123 | But this is hard to read from the raw `bam-readcount` output. 124 | 125 | 126 | ## Parsing `bam-readcount` output 127 | 128 | `bam-readcount` output is designed to be easy to parse. The output format 129 | is tab-separated, with four fields of overall data about the position 130 | followed by one field for each base or indel that is in turn `:`-separated. 131 | There are a variable number of fields depending on the presence of 132 | indels. For more details see https://github.com/genome/bam-readcount#output, 133 | but this is best illustrated by some Python parsing code. 134 | 135 | Here is a dictionary of all the per-base data fields 136 | 137 | ```python 138 | # Per-base/indel data fields 139 | base_fields = { 140 | 'base': str, 141 | 'count': int, 142 | 'avg_mapping_quality': float, 143 | 'avg_basequality': float, 144 | 'avg_se_mapping_quality': float, 145 | 'num_plus_strand': int, 146 | 'num_minus_strand': int, 147 | 'avg_pos_as_fraction': float, 148 | 'avg_num_mismatches_as_fraction': float, 149 | 'avg_sum_mismatch_qualities': float, 150 | 'num_q2_containing_reads': int, 151 | 'avg_distance_to_q2_start_in_q2_reads': float, 152 | 'avg_clipped_length': float, 153 | 'avg_distance_to_effective_3p_end': float 154 | } 155 | ``` 156 | 157 | And here is some code to parse the output line by line 158 | 159 | ```python 160 | # Open the bam-readcount output file and read it line by line 161 | # Note that the output has no header, so we consume every line 162 | with open(args.bam_readcount_output) as in_fh: 163 | for line in in_fh: 164 | # Strip newline from end of line 165 | line = line.strip() 166 | # Fields are tab-separated, so split into a list on \t 167 | fields = line.split('\t') 168 | # The first four fields contain overall information about the position 169 | chrom = fields[0] # Chromosome/reference name 170 | position = int(fields[1]) # Position (1-based) 171 | reference_base = fields[2] # Reference base 172 | depth = int(fields[3]) # Depth of coverage 173 | # The remaining fields are data for each base or indel 174 | # Iterate over each base/indel 175 | for base_data_string in fields[4:]: 176 | # We will store per-base/indel data in a dict 177 | base_data = {} 178 | # Split the base/indel data on ':' 179 | base_values = base_data_string.split(':') 180 | # Iterate over each field of the base/indel data 181 | # Note that this relies on Python 3.5+ to keep the keys in order 182 | for i, base_field in enumerate(base_fields.keys()): 183 | # Store each field of data, converting to the appropriate 184 | # data type 185 | base_data[base_field] = base_fields[base_field](base_values[i]) 186 | 187 | # Do something with base_data, store it, filter it, etc. 188 | ``` 189 | 190 | 191 | ## Parsing example 192 | 193 | For example, for the Omicron data, we could look for variants by printing 194 | data on every base with non-zero counts that meets a minimum coverage threshold 195 | and minimum frequency out of the total depth. We'll also print out 196 | `avg_pos_as_fraction` and `average_basequality` to use later 197 | 198 | ```python 199 | # Skip zero-depth bases 200 | if depth == 0: 201 | continue 202 | # Skip reference bases and bases with no counts 203 | if base_data['base'] == reference_base or base_data['count'] == 0: 204 | continue 205 | # Calculate an allele frequency (VAF) from the base counts 206 | vaf = base_data['count'] / depth 207 | # Filter on minimum depth and VAF 208 | if depth >= args.min_cov and vaf >= args.min_vaf: 209 | # Output count and VAF data as well as avg_pos_as_fraction 210 | print('\t'.join( 211 | str(x) for x in (chrom, position, reference_base, base_data['base'], 212 | '%0.2f' % (vaf), depth, base_data['count'], 213 | base_data['avg_basequality'], base_data['avg_pos_as_fraction']))) 214 | ``` 215 | 216 | Here's the [full script](scripts/parse_brc.py). 217 | Let's try to run it on the Omicron data 218 | 219 | # Download script 220 | dockercmd curlimages/curl -o parse_brc.py "https://raw.githubusercontent.com/genome/bam-readcount/master/tutorial/scripts/parse_brc.py" 221 | 222 | # Run under Python image and count lines of output 223 | dockercmd python:3.8.2-alpine python parse_brc.py CERI-KRISP-K032274.brc.tsv | wc -l 224 | 225 | This results in `2181` variant bases with some counts, so the unfiltered data 226 | seems to be noisy. Let's filter, requiring 10 reads of coverage and a 227 | minimum VAF of `0.1` 228 | 229 | # Run with filters and count lines of output 230 | dockercmd python:3.8.2-alpine python parse_brc.py --min-cov 10 --min-vaf 0.2 CERI-KRISP-K032274.brc.tsv | wc -l 231 | 232 | This results in `22`, which seems manageable to start with, let's take a look 233 | 234 | dockercmd python:3.8.2-alpine python parse_brc.py --min-cov 10 --min-vaf 0.2 CERI-KRISP-K032274.brc.tsv 235 | 236 | chrom position ref base vaf depth count avg_pos_as_fraction 237 | NC_045512.2 21762 C T 1.00 1716 1708 0.56 238 | NC_045512.2 21765 T -TACATG 1.00 1615 1612 0.58 239 | NC_045512.2 21846 C T 1.00 1446 1442 0.56 240 | NC_045512.2 21987 G -GTGTTTATT 1.00 45 45 0.63 241 | NC_045512.2 22194 A -ATT 0.98 85 83 0.47 242 | NC_045512.2 22204 T +GAGCCAGAA 0.82 79 65 0.58 243 | NC_045512.2 22992 G A 0.91 23 21 0.29 244 | NC_045512.2 22995 C A 1.00 24 24 0.32 245 | NC_045512.2 23013 A C 0.94 32 30 0.47 246 | NC_045512.2 23040 A G 0.94 47 44 0.52 247 | NC_045512.2 23048 G A 0.98 49 48 0.51 248 | NC_045512.2 23055 A G 1.00 52 52 0.49 249 | NC_045512.2 23063 A T 1.00 53 53 0.49 250 | NC_045512.2 23075 T C 1.00 50 50 0.53 251 | NC_045512.2 23202 C A 0.98 56 55 0.54 252 | NC_045512.2 23948 G T 0.79 38 30 0.67 253 | NC_045512.2 24130 C A 0.98 45 44 0.49 254 | NC_045512.2 24424 A T 1.00 142 142 0.53 255 | NC_045512.2 24469 T A 1.00 2286 2275 0.37 256 | NC_045512.2 24503 C T 1.00 2966 2954 0.56 257 | NC_045512.2 25000 C T 1.00 12 12 0.46 258 | 259 | We might have missed some variants with these filters, but we got what look 260 | like 22 pretty clean (high-VAF) candidates. If we compare some of these to the 261 | [write-up on Omicron](https://covariants.org/variants/21K.Omicron) at 262 | [CoVariants](https://covariants.org), it looks like we've found several characteristic 263 | Omicron variants. I haven't gone through all of these, but the first six 264 | (enough to identify Omicron) listed at CoVariants are present 265 | 266 | 21762 A67V 267 | 21765 del69-70 268 | 21846 T95I 269 | 21987 del142-144/Y145D 270 | 22194 del211/N212I 271 | 22204 ins214 272 | 273 | 274 | ## Using a position list with `bam-readcount` 275 | 276 | Let's output the filtered results to a file 277 | 278 | dockercmd python:3.8.2-alpine python parse_brc.py --min-cov 10 --min-vaf 0.2 CERI-KRISP-K032274.brc.tsv > CERI-KRISP-K032274.brc.filtered_table.tsv 279 | 280 | And munge that table into a tab-separated, headerless, position list with 281 | fields 282 | 283 | chromosome start end 284 | 285 | Here we'll use the position in the filtered table to specify single-base 286 | "regions", but the region could be any size 287 | 288 | paste <(cut -f1 CERI-KRISP-K032274.brc.filtered_table.tsv) <(cut -f2 CERI-KRISP-K032274.brc.filtered_table.tsv) <(cut -f2 CERI-KRISP-K032274.brc.filtered_table.tsv) | grep -v chrom > CERI-KRISP-K032274.variant_position_list.tsv 289 | 290 | This results in (tab-separated) 291 | 292 | NC_045512.2 21762 21762 293 | NC_045512.2 21765 21765 294 | NC_045512.2 21846 21846 295 | NC_045512.2 21987 21987 296 | NC_045512.2 22194 22194 297 | NC_045512.2 22204 22204 298 | NC_045512.2 22992 22992 299 | NC_045512.2 22995 22995 300 | NC_045512.2 23013 23013 301 | NC_045512.2 23040 23040 302 | NC_045512.2 23048 23048 303 | NC_045512.2 23055 23055 304 | NC_045512.2 23063 23063 305 | NC_045512.2 23075 23075 306 | NC_045512.2 23202 23202 307 | NC_045512.2 23948 23948 308 | NC_045512.2 24130 24130 309 | NC_045512.2 24424 24424 310 | NC_045512.2 24469 24469 311 | NC_045512.2 24503 24503 312 | NC_045512.2 25000 25000 313 | 314 | We supply this list to `bam-readcount` with the `-l` argument 315 | 316 | # -l List of regions to report 317 | # -w1 Show any warnings only once; bam-readcount likes to complain about 318 | # missing SM tags for every read 319 | # -f Specify the reference genome 320 | dockercmd mgibio/bam-readcount -l CERI-KRISP-K032274.variant_position_list.tsv -w1 -f NC_045512.2.fa CERI-KRISP-K032274.sorted.bam NC_045512.2:21563-25384 > CERI-KRISP-K032274.brc.variant_positions.tsv 321 | 322 | Now the `bam-readcount` output contains just the specified regions 323 | 324 | NC_045512.2 21762 C 1716 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:1:60.00:17.00:0.00:0:1:0.17:0.14:167.00:0:0.00:109.00:0.04 G:2:60.00:15.50:0.00:1:1:0.75:0.07:71.50:1:0.61:141.50:0.62 T:1708:60.00:37.21:0.18:853:855:0.56:0.04:64.62:853:0.40:182.47:0.48 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 -CTATAC:4:60.00:0.00:0.00:3:1:0.60:0.06:107.00:3:0.33:166.50:0.31 -CTATACA:1:60.00:0.00:0.00:1:0:0.68:0.05:84.00:1:0.65:220.00:0.65 325 | NC_045512.2 21765 T 1615 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:3:60.00:33.00:0.00:1:2:0.09:0.05:77.00:1:0.01:100.33:0.35 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 -TACATG:1612:60.00:0.00:0.19:805:807:0.58:0.05:66.17:805:0.41:184.78:0.48 326 | NC_045512.2 21846 C 1446 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:3:56.33:22.00:0.00:1:2:0.54:0.07:101.33:1:0.70:118.33:0.46 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:1:60.00:16.00:0.00:0:1:0.34:0.08:112.00:0:0.00:153.00:0.83 T:1442:60.00:37.40:0.12:705:737:0.56:0.03:68.47:705:0.31:165.37:0.49 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 327 | NC_045512.2 21987 G 45 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 -GTGTTTATT:45:60.00:0.00:0.00:26:19:0.63:0.06:15.16:26:0.57:182.02:0.51 328 | NC_045512.2 22194 A 85 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:2:60.00:35.50:0.00:1:1:0.02:0.01:35.50:1:0.01:131.00:0.50 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 -ATT:83:60.00:0.00:0.00:41:42:0.47:0.07:9.29:41:0.46:168.27:0.49 329 | NC_045512.2 22204 T 79 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:79:60.00:36.37:0.00:37:42:0.49:0.07:9.76:37:0.45:169.61:0.52 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 +GAGCCAGAA:65:60.00:0.00:0.00:31:34:0.58:0.08:10.98:31:0.53:176.46:0.53 330 | NC_045512.2 22992 G 23 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:21:60.00:36.62:0.00:11:10:0.29:0.06:285.33:11:0.85:146.10:0.49 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:2:60.00:35.50:0.00:1:1:0.35:0.01:114.00:1:0.11:250.50:0.44 T:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 331 | NC_045512.2 22995 C 24 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:24:60.00:36.08:0.00:12:12:0.32:0.05:271.42:12:0.76:159.17:0.47 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 332 | NC_045512.2 23013 A 32 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:2:60.00:27.00:0.00:1:1:0.18:0.01:114.00:1:0.02:250.50:0.44 C:30:60.00:36.93:0.00:15:15:0.47:0.06:276.73:15:0.74:155.17:0.47 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 333 | NC_045512.2 23040 A 47 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:3:46.67:37.67:0.00:1:2:0.59:0.00:37.00:1:0.14:96.00:0.45 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:44:60.00:36.64:0.00:23:21:0.52:0.05:254.59:23:0.65:154.25:0.50 T:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 334 | NC_045512.2 23048 G 49 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:48:60.00:38.21:0.00:25:23:0.51:0.05:247.04:25:0.63:156.15:0.50 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:1:60.00:33.00:0.00:0:1:0.02:0.01:111.00:0:0.00:250.00:0.99 T:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 335 | NC_045512.2 23055 A 52 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:52:60.00:35.62:0.00:27:25:0.49:0.05:239.92:27:0.61:161.48:0.50 T:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 336 | NC_045512.2 23063 A 53 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:53:60.00:36.91:0.00:28:25:0.49:0.04:233.19:28:0.61:166.74:0.50 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 337 | NC_045512.2 23075 T 50 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:50:60.00:36.88:0.00:26:24:0.53:0.04:229.48:26:0.57:171.88:0.49 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 338 | NC_045512.2 23202 C 56 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:55:60.00:36.87:0.00:27:28:0.54:0.02:105.35:27:0.32:184.18:0.48 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:1:60.00:20.00:0.00:1:0:0.77:0.01:20.00:1:0.61:183.00:0.61 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 339 | NC_045512.2 23948 G 38 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:8:51.00:35.38:0.00:4:4:0.21:0.00:3.75:4:0.17:78.00:0.17 T:30:60.00:37.23:0.00:16:14:0.67:0.01:48.77:16:0.61:190.93:0.49 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 340 | NC_045512.2 24130 C 45 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:44:60.00:36.20:0.00:21:23:0.49:0.01:54.45:21:0.43:164.93:0.47 C:1:60.00:32.00:0.00:0:1:0.09:0.00:14.00:0:0.00:251.00:0.96 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 341 | NC_045512.2 24424 A 142 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:142:60.00:36.93:0.00:71:71:0.53:0.02:91.25:71:0.41:190.32:0.47 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 342 | NC_045512.2 24469 T 2286 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:2275:59.98:36.49:0.13:1203:1072:0.37:0.02:82.69:1203:0.79:166.69:0.51 C:2:60.00:23.00:0.00:1:1:0.47:0.05:108.50:1:0.95:110.50:0.54 G:1:60.00:16.00:0.00:1:0:0.27:0.11:416.00:1:0.86:251.00:0.86 T:8:60.00:27.50:0.00:7:1:0.40:0.01:44.25:7:0.79:142.50:0.71 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 343 | NC_045512.2 24503 C 2966 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:4:60.00:16.50:0.00:2:2:0.11:0.02:76.75:2:0.93:184.00:0.49 C:4:57.50:27.75:12.50:2:2:0.74:0.03:80.50:2:0.58:138.75:0.43 G:4:60.00:15.00:0.00:3:1:0.56:0.04:145.50:3:0.72:212.50:0.62 T:2954:59.98:36.78:0.16:1512:1442:0.56:0.01:74.20:1512:0.65:171.70:0.49 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 344 | NC_045512.2 25000 C 12 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:12:59.83:37.92:15.00:6:6:0.46:0.01:39.33:6:0.34:171.25:0.49 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 345 | 346 | 347 | ## Positions reported by `bam-readcount` 348 | 349 | What positions does `bam-readcount` report on? 350 | 351 | The region specified for the S protein is `21563..25384`, `3822` bases. But 352 | 353 | wc -l CERI-KRISP-K032274.brc.tsv 354 | 3305 355 | 356 | This is because `bam-readcount` will only report positions that have nonzero 357 | coverage, with one caveat: when there is an deletion longer than 1 base, 358 | `bam-readcount` will report the indel at the starting base, followed by 359 | the other deleted bases. Here are two examples: 360 | 361 | First, here again is the `-TACATG` deletion at position `21765`. There are a few reads 362 | showing the reference at the start of the deletion and in the following bases, 363 | so the reported coverage is just for those bases 364 | 365 | NC_045512.2 21765 T 1615 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:3:60.00:33.00:0.00:1:2:0.09:0.05:77.00:1:0.01:100.33:0.35 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 -TACATG:1612:60.00:0.00:0.19:805:807:0.58:0.05:66.17:805:0.41:184.78:0.48 366 | NC_045512.2 21766 A 1 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:1:60.00:16.00:0.00:0:1:0.24:0.14:167.00:0:0.00:109.00:0.05 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 367 | NC_045512.2 21767 C 1 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 -CATGTC:1:60.00:0.00:0.00:0:1:0.24:0.14:167.00:0:0.00:109.00:0.05 368 | NC_045512.2 21768 A 4 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:1:60.00:18.00:0.00:1:0:0.27:0.04:38.00:1:0.12:160.00:0.12 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:3:60.00:33.33:0.00:2:1:0.70:0.07:130.00:2:0.42:168.67:0.37 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 369 | NC_045512.2 21769 T 5 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:5:60.00:30.60:0.00:4:1:0.62:0.06:102.40:4:0.40:177.20:0.37 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 370 | NC_045512.2 21770 G 5 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:1:60.00:38.00:0.00:1:0:0.25:0.04:38.00:1:0.11:160.00:0.11 C:1:60.00:17.00:0.00:1:0:0.89:0.08:206.00:1:0.50:229.00:0.50 G:2:60.00:17.00:0.00:2:0:0.68:0.06:91.50:2:0.48:180.00:0.48 T:1:60.00:18.00:0.00:0:1:0.58:0.07:85.00:0:0.00:137.00:0.29 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 +A:1:60.00:0.00:0.00:1:0:0.70:0.05:84.00:1:0.64:220.00:0.64 371 | 372 | Here is a different deletion, `-GTGTTTATT` at position `21987`. In this case, 373 | every read except for one read in the last `T` position at `21995` show the 374 | deletion, and the reported coverage for the others is zero 375 | 376 | NC_045512.2 21987 G 45 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 -GTGTTTATT:45:60.00:0.00:0.00:26:19:0.63:0.06:15.16:26:0.57:182.02:0.51 377 | NC_045512.2 21988 T 0 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 378 | NC_045512.2 21989 G 0 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 379 | NC_045512.2 21990 T 0 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 380 | NC_045512.2 21991 T 0 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 381 | NC_045512.2 21992 T 0 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 382 | NC_045512.2 21993 A 0 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 383 | NC_045512.2 21994 T 0 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 384 | NC_045512.2 21995 T 1 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 G:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 T:1:60.00:17.00:0.00:0:1:0.77:0.06:0.00:0:0.00:154.00:0.38 N:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 385 | 386 | Additional notes: 387 | 388 | - `avg_basequality` is set to `0.00` for deletions 389 | 390 | - If a region or region list is provided, the following bases in a deletion will 391 | only be shown if they are included in the list 392 | 393 | 394 | ## Using `bam-readcount` statistics 395 | 396 | Let's process the `bam-readcount` output with a minimum coverage of `10` 397 | but no minimum VAF 398 | 399 | dockercmd python:3.8.2-alpine python parse_brc.py CERI-KRISP-K032274.brc.tsv > CERI-KRISP-K032274.brc.parsed.tsv 400 | 401 | We wrote the `bam-readcount` stats for `avg_pos_as_fraction` and 402 | `average_basequality` to the output, and I've created scripts to plot each of 403 | these against the VAF. The scripts require some additional libraries so 404 | we won't run them via the minimal Python Docker container, but they are 405 | available [here](scripts). 406 | 407 | ![vaf_vs_avg_basequality.png](plots/vaf_vs_avg_basequality.png) 408 | 409 | ![vaf_vs_avg_pos_as_fraction.png](plots/vaf_vs_avg_pos_as_fraction.png) 410 | 411 | At coverage of 10 or more, we'd generally expect to see a low VAF for positions 412 | with some reads in error, unless the position has systematic bias 413 | 414 | Looking at the `vaf vs avg_basequality`, it looks like nearly all VAFs 415 | above `0.75` have `average_basequality` over `35`. The exceptions are 416 | four variants with `average_basequality` of `0.00`, but recall that 417 | indels have this field set to `0.00` since there is no data to base it on, 418 | so these are probably the four indels discussed earlier. 419 | 420 | Looking at `vaf vs vaf_vs_avg_pos_as_fraction` is less clear. This metric 421 | approaches zero if the variant is, on average, found at either end of the 422 | read, and approaches one as the variant is, on average, found in the center 423 | of the read. Read quality and other issues arise at ends of reads, so this 424 | is often used as a filter. Here, noise looks spread throughout the range 425 | of this statistic, although all the higher-VAF variants are above `0.2`. 426 | Interesting that the higher-VAF variants are found in the middle of the range, 427 | but this could just be sampling of the `22` out of `2181` positions 428 | with variant counts. 429 | 430 | 431 | 432 | -------------------------------------------------------------------------------- /tutorial/plots/vaf_vs_avg_basequality.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genome/bam-readcount/c7c76e6b84de9ed774ea3865fedc04b2d6276865/tutorial/plots/vaf_vs_avg_basequality.png -------------------------------------------------------------------------------- /tutorial/plots/vaf_vs_avg_pos_as_fraction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genome/bam-readcount/c7c76e6b84de9ed774ea3865fedc04b2d6276865/tutorial/plots/vaf_vs_avg_pos_as_fraction.png -------------------------------------------------------------------------------- /tutorial/scripts/clean.sh: -------------------------------------------------------------------------------- 1 | rm -f CERI-KRISP-K032274* NC_045512.2.fa* *.fastq *.gz parse_brc.py 2 | -------------------------------------------------------------------------------- /tutorial/scripts/commands.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | dockercmd () { 4 | docker run --rm -w $(pwd) -v $(pwd):$(pwd) "$@" 5 | } 6 | dockercmd curlimages/curl -o NC_045512.2.fa "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=NC_045512.2&rettype=fasta" 7 | dockercmd ncbi/sra-tools fastq-dump --split-files --origfmt SRR17054502 8 | dockercmd seqfu/alpine-bwa bwa index NC_045512.2.fa 9 | dockercmd seqfu/alpine-bwa bwa mem NC_045512.2.fa SRR17054502_1.fastq SRR17054502_2.fastq > CERI-KRISP-K032274.sam 10 | dockercmd seqfu/alpine-samtools-1.10 samtools view -b CERI-KRISP-K032274.sam > CERI-KRISP-K032274.bam 11 | dockercmd seqfu/alpine-samtools-1.10 samtools sort CERI-KRISP-K032274.bam > CERI-KRISP-K032274.sorted.bam 12 | dockercmd seqfu/alpine-samtools-1.10 samtools index CERI-KRISP-K032274.sorted.bam 13 | dockercmd mgibio/bam-readcount -w1 -f NC_045512.2.fa CERI-KRISP-K032274.sorted.bam NC_045512.2:21563-25384 > CERI-KRISP-K032274.brc.tsv 14 | dockercmd curlimages/curl -o parse_brc.py "https://raw.githubusercontent.com/genome/bam-readcount/master/tutorial/scripts/parse_brc.py" 15 | dockercmd python:3.8.2-alpine python parse_brc.py CERI-KRISP-K032274.brc.tsv | wc -l 16 | dockercmd python:3.8.2-alpine python parse_brc.py --min-cov 10 --min-vaf 0.2 CERI-KRISP-K032274.brc.tsv | wc -l 17 | dockercmd python:3.8.2-alpine python parse_brc.py --min-cov 10 --min-vaf 0.2 CERI-KRISP-K032274.brc.tsv 18 | dockercmd python:3.8.2-alpine python parse_brc.py --min-cov 10 --min-vaf 0.2 CERI-KRISP-K032274.brc.tsv > CERI-KRISP-K032274.brc.filtered_table.tsv 19 | paste <(cut -f1 CERI-KRISP-K032274.brc.filtered_table.tsv) <(cut -f2 CERI-KRISP-K032274.brc.filtered_table.tsv) <(cut -f2 CERI-KRISP-K032274.brc.filtered_table.tsv) | grep -v chrom > CERI-KRISP-K032274.variant_position_list.tsv 20 | dockercmd mgibio/bam-readcount -l CERI-KRISP-K032274.variant_position_list.tsv -w1 -f NC_045512.2.fa CERI-KRISP-K032274.sorted.bam NC_045512.2:21563-25384 > CERI-KRISP-K032274.brc.variant_positions.tsv 21 | dockercmd python:3.8.2-alpine python parse_brc.py CERI-KRISP-K032274.brc.tsv > CERI-KRISP-K032274.brc.parsed.tsv 22 | -------------------------------------------------------------------------------- /tutorial/scripts/make_commands.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cat > scripts/commands.sh <> scripts/commands.sh 15 | 16 | chmod +x scripts/commands.sh 17 | 18 | -------------------------------------------------------------------------------- /tutorial/scripts/parse_brc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import argparse 5 | import logging 6 | 7 | SCRIPT_PATH = os.path.abspath(__file__) 8 | FORMAT = '[%(asctime)s] %(levelname)s %(message)s' 9 | l = logging.getLogger() 10 | lh = logging.StreamHandler() 11 | lh.setFormatter(logging.Formatter(FORMAT)) 12 | l.addHandler(lh) 13 | l.setLevel(logging.INFO) 14 | debug = l.debug 15 | info = l.info 16 | warning = l.warning 17 | error = l.error 18 | 19 | DESCRIPTION = ''' 20 | 21 | Parse bam-readcount output with optional filters 22 | 23 | 24 | ''' 25 | 26 | EPILOG = ''' 27 | ''' 28 | 29 | 30 | class CustomFormatter(argparse.ArgumentDefaultsHelpFormatter, 31 | argparse.RawDescriptionHelpFormatter): 32 | pass 33 | 34 | 35 | parser = argparse.ArgumentParser(description=DESCRIPTION, 36 | epilog=EPILOG, 37 | formatter_class=CustomFormatter) 38 | 39 | parser.add_argument('bam_readcount_output') 40 | parser.add_argument('--min-cov', 41 | action='store', 42 | type=int, 43 | help='Minimum coverage to report variant', 44 | default=0) 45 | parser.add_argument('--min-vaf', 46 | action='store', 47 | type=float, 48 | help='Minimum VAF to report variant', 49 | default=0.0) 50 | parser.add_argument('-v', 51 | '--verbose', 52 | action='store_true', 53 | help='Set logging level to DEBUG') 54 | 55 | args = parser.parse_args() 56 | 57 | if args.verbose: 58 | l.setLevel(logging.DEBUG) 59 | 60 | debug('%s begin', SCRIPT_PATH) 61 | 62 | headers = [ 63 | 'chrom', 'position', 'ref', 'base', 'vaf', 'depth', 'count', 64 | 'avg_basequality', 'avg_pos_as_fraction' 65 | ] 66 | print('\t'.join(headers)) 67 | 68 | # Per-base/indel data fields 69 | # IMPORTANT: this relies on Python 3.6+ to maintain insertion order 70 | # Each field is a key with value a function to convert to the 71 | # appropriate data type 72 | base_fields = { 73 | 'base': str, 74 | 'count': int, 75 | 'avg_mapping_quality': float, 76 | 'avg_basequality': float, 77 | 'avg_se_mapping_quality': float, 78 | 'num_plus_strand': int, 79 | 'num_minus_strand': int, 80 | 'avg_pos_as_fraction': float, 81 | 'avg_num_mismatches_as_fraction': float, 82 | 'avg_sum_mismatch_qualities': float, 83 | 'num_q2_containing_reads': int, 84 | 'avg_distance_to_q2_start_in_q2_reads': float, 85 | 'avg_clipped_length': float, 86 | 'avg_distance_to_effective_3p_end': float 87 | } 88 | 89 | # Open the bam-readcount output file and read it line by line 90 | # Note that the output has no header, so we consume every line 91 | with open(args.bam_readcount_output) as in_fh: 92 | for line in in_fh: 93 | # Strip newline from end of line 94 | line = line.strip() 95 | # Fields are tab-separated, so split into a list on \t 96 | fields = line.split('\t') 97 | # The first four fields contain overall information about the position 98 | chrom = fields[0] # Chromosome/reference 99 | position = int(fields[1]) # Position (1-based) 100 | reference_base = fields[2] # Reference base 101 | depth = int(fields[3]) # Depth of coverage 102 | # The remaining fields are data for each base or indel 103 | # Iterate over each base/indel 104 | for base_data_string in fields[4:]: 105 | # We will store per-base/indel data in a dict 106 | base_data = {} 107 | # Split the base/indel data on ':' 108 | base_values = base_data_string.split(':') 109 | # Iterate over each field of the base/indel data 110 | for i, base_field in enumerate(base_fields.keys()): 111 | # Store each field of data, converting to the appropriate 112 | # data type 113 | base_data[base_field] = base_fields[base_field](base_values[i]) 114 | 115 | # Skip zero-depth bases 116 | if depth == 0: 117 | continue 118 | # Skip reference bases and bases with no counts 119 | if base_data['base'] == reference_base or base_data['count'] == 0: 120 | continue 121 | # Calculate an allele frequency (VAF) from the base counts 122 | vaf = base_data['count'] / depth 123 | # Filter on minimum depth and VAF 124 | if depth >= args.min_cov and vaf >= args.min_vaf: 125 | # Output count and VAF data as well as avg_pos_as_fraction 126 | print('\t'.join( 127 | str(x) for x in (chrom, position, reference_base, base_data['base'], 128 | '%0.2f' % (vaf), depth, base_data['count'], 129 | base_data['avg_basequality'], base_data['avg_pos_as_fraction']))) 130 | 131 | debug('%s end', (SCRIPT_PATH)) 132 | -------------------------------------------------------------------------------- /tutorial/scripts/plot_vaf_vs_avg_bq.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import argparse 5 | import logging 6 | import pandas as pd 7 | import matplotlib as mpl 8 | mpl.use('Agg') 9 | import matplotlib.pyplot as plt 10 | import seaborn as sns 11 | sns.set_style('whitegrid') 12 | 13 | SCRIPT_PATH = os.path.abspath(__file__) 14 | FORMAT = '[%(asctime)s] %(levelname)s %(message)s' 15 | l = logging.getLogger() 16 | lh = logging.StreamHandler() 17 | lh.setFormatter(logging.Formatter(FORMAT)) 18 | l.addHandler(lh) 19 | l.setLevel(logging.INFO) 20 | debug = l.debug; info = l.info; warning = l.warning; error = l.error 21 | 22 | DESCRIPTION = ''' 23 | ''' 24 | 25 | EPILOG = ''' 26 | ''' 27 | 28 | class CustomFormatter(argparse.ArgumentDefaultsHelpFormatter, 29 | argparse.RawDescriptionHelpFormatter): 30 | pass 31 | parser = argparse.ArgumentParser(description=DESCRIPTION, epilog=EPILOG, 32 | formatter_class=CustomFormatter) 33 | 34 | parser.add_argument('variant_table') 35 | parser.add_argument('--min-var', action='store', type=int, 36 | help='Minimum number of variant reads', default=1) 37 | parser.add_argument('--min-depth', action='store', type=int, 38 | help='Minimum number of variant reads', default=1) 39 | parser.add_argument('-v', '--verbose', action='store_true', 40 | help='Set logging level to DEBUG') 41 | 42 | args = parser.parse_args() 43 | 44 | if args.verbose: 45 | l.setLevel(logging.DEBUG) 46 | 47 | debug('%s begin', SCRIPT_PATH) 48 | 49 | df = pd.read_table(args.variant_table) 50 | 51 | select = (df['depth'] >= args.min_depth) & (df['count'] >= args.min_var) 52 | df = df[select] 53 | 54 | plt.plot(df['avg_basequality'], df['vaf'], 'o', color='black', alpha=0.6) 55 | plt.title('vaf vs avg_basequality') 56 | plt.xlabel('avg_basequality') 57 | plt.ylabel('vaf') 58 | plt.savefig('vaf_vs_avg_basequality.png') 59 | 60 | 61 | debug('%s end', (SCRIPT_PATH)) 62 | -------------------------------------------------------------------------------- /tutorial/scripts/plot_vaf_vs_avg_pos.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import argparse 5 | import logging 6 | import pandas as pd 7 | import matplotlib as mpl 8 | mpl.use('Agg') 9 | import matplotlib.pyplot as plt 10 | import seaborn as sns 11 | sns.set_style('whitegrid') 12 | 13 | SCRIPT_PATH = os.path.abspath(__file__) 14 | FORMAT = '[%(asctime)s] %(levelname)s %(message)s' 15 | l = logging.getLogger() 16 | lh = logging.StreamHandler() 17 | lh.setFormatter(logging.Formatter(FORMAT)) 18 | l.addHandler(lh) 19 | l.setLevel(logging.INFO) 20 | debug = l.debug; info = l.info; warning = l.warning; error = l.error 21 | 22 | DESCRIPTION = ''' 23 | ''' 24 | 25 | EPILOG = ''' 26 | ''' 27 | 28 | class CustomFormatter(argparse.ArgumentDefaultsHelpFormatter, 29 | argparse.RawDescriptionHelpFormatter): 30 | pass 31 | parser = argparse.ArgumentParser(description=DESCRIPTION, epilog=EPILOG, 32 | formatter_class=CustomFormatter) 33 | 34 | parser.add_argument('variant_table') 35 | parser.add_argument('--min-var', action='store', type=int, 36 | help='Minimum number of variant reads', default=1) 37 | parser.add_argument('--min-depth', action='store', type=int, 38 | help='Minimum number of variant reads', default=1) 39 | parser.add_argument('--min-vaf', action='store', type=float, 40 | help='Minimum VAF', default=0) 41 | parser.add_argument('-v', '--verbose', action='store_true', 42 | help='Set logging level to DEBUG') 43 | 44 | args = parser.parse_args() 45 | 46 | if args.verbose: 47 | l.setLevel(logging.DEBUG) 48 | 49 | debug('%s begin', SCRIPT_PATH) 50 | 51 | df = pd.read_table(args.variant_table) 52 | 53 | select = (df['depth'] >= args.min_depth) & \ 54 | (df['count'] >= args.min_var) & \ 55 | (df['vaf'] >= args.min_vaf) 56 | 57 | df = df[select] 58 | 59 | plt.plot(df['avg_pos_as_fraction'], df['vaf'], 'o', color='black') 60 | plt.title('vaf vs avg_pos_as_fraction') 61 | plt.xlabel('avg_pos_as_fraction') 62 | plt.ylabel('vaf') 63 | plt.savefig('vaf_vs_avg_pos_as_fraction.png') 64 | 65 | 66 | debug('%s end', (SCRIPT_PATH)) 67 | -------------------------------------------------------------------------------- /vendor/Makefile.disable_curl.patch: -------------------------------------------------------------------------------- 1 | --- ../../samtools-1.10/htslib-1.10/Makefile 2019-11-26 10:01:13.000000000 -0600 2 | +++ vendor/samtools-1.10/htslib-1.10/Makefile 2019-12-19 14:28:10.000000000 -0600 3 | @@ -27,7 +27,8 @@ 4 | RANLIB = ranlib 5 | 6 | # Default libraries to link if configure is not used 7 | -htslib_default_libs = -lz -lm -lbz2 -llzma -lcurl 8 | +#htslib_default_libs = -lz -lm -lbz2 -llzma -lcurl 9 | +htslib_default_libs = -lz -lm -lbz2 -llzma 10 | 11 | CPPFLAGS = 12 | # TODO: probably update cram code to make it compile cleanly with -Wc++-compat 13 | @@ -179,7 +180,7 @@ 14 | # Without configure we wish to have a rich set of default figures, 15 | # but we still need conditional inclusion as we wish to still 16 | # support ./configure --disable-blah. 17 | -NONCONFIGURE_OBJS = hfile_libcurl.o 18 | +#NONCONFIGURE_OBJS = hfile_libcurl.o 19 | 20 | PLUGIN_EXT = 21 | PLUGIN_OBJS = 22 | @@ -219,7 +220,7 @@ 23 | echo '#define HAVE_LZMA_H 1' >> $@ 24 | echo '#endif' >> $@ 25 | echo '#define HAVE_DRAND48 1' >> $@ 26 | - echo '#define HAVE_LIBCURL 1' >> $@ 27 | +# echo '#define HAVE_LIBCURL 1' >> $@ 28 | 29 | # And similarly for htslib.pc.tmp ("pkg-config template"). No dependency 30 | # on htslib.pc.in listed, as if that file is newer the usual way to regenerate 31 | -------------------------------------------------------------------------------- /vendor/README.md: -------------------------------------------------------------------------------- 1 | Vendored libraries 2 | ================== 3 | 4 | 5 | Licenses 6 | -------- 7 | 8 | See each package for license information. 9 | 10 | 11 | URLs 12 | ---- 13 | 14 | https://github.com/samtools/samtools/releases/download/1.10/samtools-1.10.tar.bz2 15 | 16 | https://www.zlib.net/zlib-1.2.11.tar.gz 17 | 18 | https://sourceware.org/pub/bzip2/bzip2-1.0.8.tar.gz 19 | 20 | https://tukaani.org/xz/xz-5.2.4.tar.gz 21 | 22 | https://curl.haxx.se/download/curl-7.67.0.tar.gz 23 | 24 | https://tls.mbed.org/download/mbedtls-2.16.4-apache.tgz 25 | 26 | `boost-1.55-bamrc.tar.gz` is a custom subset of the Boost library. 27 | 28 | `Makefile.disable_curl.patch` is not currently used, but can 29 | be enabled to disable building of `curl`. 30 | 31 | 32 | Notes 33 | ----- 34 | 35 | We chose `mbedtls-2.16.4-apache.tgz` for `libcurl` SSL support as it has 36 | no additional dependencies. This is used (via `https`) to query the ENA 37 | CRAM registry for reference hashes. 38 | 39 | Instead of `libcurl` another good option might be wolfSSL's tiny-cURL 40 | distribution, which builds a lighter `curl` library. 41 | 42 | 43 | -------------------------------------------------------------------------------- /vendor/boost-1.55-bamrc.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genome/bam-readcount/c7c76e6b84de9ed774ea3865fedc04b2d6276865/vendor/boost-1.55-bamrc.tar.gz -------------------------------------------------------------------------------- /vendor/bzip2-1.0.8.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genome/bam-readcount/c7c76e6b84de9ed774ea3865fedc04b2d6276865/vendor/bzip2-1.0.8.tar.gz -------------------------------------------------------------------------------- /vendor/curl-7.67.0.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genome/bam-readcount/c7c76e6b84de9ed774ea3865fedc04b2d6276865/vendor/curl-7.67.0.tar.gz -------------------------------------------------------------------------------- /vendor/mbedtls-2.16.4-apache.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genome/bam-readcount/c7c76e6b84de9ed774ea3865fedc04b2d6276865/vendor/mbedtls-2.16.4-apache.tgz -------------------------------------------------------------------------------- /vendor/samtools-1.10.tar.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genome/bam-readcount/c7c76e6b84de9ed774ea3865fedc04b2d6276865/vendor/samtools-1.10.tar.bz2 -------------------------------------------------------------------------------- /vendor/xz-5.2.4.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genome/bam-readcount/c7c76e6b84de9ed774ea3865fedc04b2d6276865/vendor/xz-5.2.4.tar.gz -------------------------------------------------------------------------------- /vendor/zlib-1.2.11.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genome/bam-readcount/c7c76e6b84de9ed774ea3865fedc04b2d6276865/vendor/zlib-1.2.11.tar.gz -------------------------------------------------------------------------------- /version/version.h.in: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | const static char* __g_prog_version = "@FULL_VERSION@"; 8 | const static char* __g_commit_hash = "@COMMIT_HASH@"; 9 | 10 | #ifdef __cplusplus 11 | } 12 | #endif 13 | --------------------------------------------------------------------------------