├── .gitignore ├── .travis.yml ├── AUTHORS ├── Changelog ├── LICENSE ├── Makefile.am ├── NEWS ├── README.md ├── binary_installer.sh ├── bootstrap ├── configure.ac ├── devel-doc ├── FIXME ├── autotools.README ├── best-practices.txt ├── debug.README ├── dist.README ├── docker.README └── git.README ├── dist ├── LoFreq-0.2.tar.gz ├── LoFreq-0.3.1.tar.gz ├── LoFreq-0.3.2.tar.gz ├── LoFreq-0.3.tar.gz ├── LoFreq-0.4.0.tar.gz ├── lofreq-0.5.0.tar.gz ├── lofreq-v0.1.zip ├── lofreq_star-2.0.0-beta-2-linux-x86-64.tgz ├── lofreq_star-2.0.0-beta-2-macosx.tgz ├── lofreq_star-2.0.0-beta-3-linux-x86-64.tgz ├── lofreq_star-2.0.0-beta-3-macosx.tgz ├── lofreq_star-2.0.0-beta-3.tar.gz ├── lofreq_star-2.0.0-beta-linux-x86-64.tgz ├── lofreq_star-2.0.0-beta-macosx.tgz ├── lofreq_star-2.0.0-linux-x86-64.tgz ├── lofreq_star-2.0.0-macosx.tgz ├── lofreq_star-2.0.0-rc-1.tar.gz ├── lofreq_star-2.0.0.tar.gz ├── lofreq_star-2.1.0.tar.gz ├── lofreq_star-2.1.0_linux-x86-64.tgz ├── lofreq_star-2.1.0_macosx.tgz ├── lofreq_star-2.1.1.tar.gz ├── lofreq_star-2.1.1_linux-x86-64.tgz ├── lofreq_star-2.1.1_macosx.tgz ├── lofreq_star-2.1.2.tar.gz ├── lofreq_star-2.1.2_linux-x86-64.tgz ├── lofreq_star-2.1.2_macosx.tgz ├── lofreq_star-2.1.3.1.tar.gz ├── lofreq_star-2.1.3.1_linux-x86-64.tgz ├── lofreq_star-2.1.3.1_macosx.tgz ├── lofreq_star-2.1.4.tar.gz ├── lofreq_star-2.1.4_linux-x86-64.tgz ├── lofreq_star-2.1.5.tar.gz └── lofreq_star-2.1.5_linux-x86-64.tgz ├── m4 ├── ax_pthread.m4 └── ax_with_htslib.m4 ├── snakemake ├── Snakefile ├── cfg.yaml ├── data │ ├── ref.fa │ ├── regions.bed │ ├── sample1_R1.fastq.gz │ ├── sample1_R2.fastq.gz │ └── sample2.fastq.gz ├── dryrun.sh └── ecoli-cfg.yaml ├── src ├── cdflib90.README ├── cdflib90 │ ├── .gitignore │ ├── Makefile.am │ ├── cdflib.h │ ├── dcdflib.c │ ├── ipmpar.c │ └── readme ├── lofreq │ ├── .gitignore │ ├── Makefile.am │ ├── bam_index.c │ ├── bam_index.c.LICENSE │ ├── bam_index.c.README │ ├── bam_md_ext.c │ ├── bam_md_ext.h │ ├── bedidx.c │ ├── bedidx.c.LICENSE │ ├── binom.c │ ├── binom.h │ ├── defaults.h │ ├── fet.c │ ├── fet.c.LICENSE │ ├── fet.h │ ├── kprobaln_ext.c │ ├── kprobaln_ext.h │ ├── lofreq_alnqual.c │ ├── lofreq_alnqual.h │ ├── lofreq_bamstats.c │ ├── lofreq_bamstats.h │ ├── lofreq_call.c │ ├── lofreq_call.h │ ├── lofreq_checkref.c │ ├── lofreq_checkref.h │ ├── lofreq_filter.c │ ├── lofreq_filter.h │ ├── lofreq_indelqual.c │ ├── lofreq_indelqual.h │ ├── lofreq_index.c │ ├── lofreq_index.h │ ├── lofreq_main.c │ ├── lofreq_uniq.c │ ├── lofreq_uniq.h │ ├── lofreq_vcfset.c │ ├── lofreq_vcfset.h │ ├── lofreq_viterbi.c │ ├── lofreq_viterbi.h │ ├── log.c │ ├── log.h │ ├── multtest.c │ ├── multtest.h │ ├── plp.c │ ├── plp.h │ ├── samutils.c │ ├── samutils.h │ ├── snpcaller.c │ ├── snpcaller.h │ ├── utils.c │ ├── utils.h │ ├── vcf.c │ ├── vcf.h │ ├── viterbi.c │ └── viterbi.h ├── scripts │ ├── Makefile.am │ ├── README │ ├── lofreq2_call_pparallel.py │ ├── lofreq2_local.py │ └── lofreq2_somatic.py ├── tools │ ├── .gitignore │ ├── Makefile.am │ ├── README │ ├── lofreq_star │ │ ├── __init__.py │ │ ├── fdr.py │ │ ├── multiple_testing.py │ │ ├── multiple_testing.py.README │ │ ├── multiple_testing.py.org │ │ └── utils.py │ ├── phased_out │ │ ├── lofreq2_filter.py │ │ ├── lofreq2_vcfset.py │ │ ├── vcf.py │ │ └── vcf.py.README │ ├── scripts │ │ ├── lofreq2_add_fake_gt.py │ │ ├── lofreq2_add_sample.py │ │ ├── lofreq2_analyze_somatic_fn.py │ │ ├── lofreq2_bias.py │ │ ├── lofreq2_cluster.py │ │ ├── lofreq2_indel_ovlp.py │ │ ├── lofreq2_local.py │ │ ├── lofreq2_vcfplot.py │ │ └── mutect_alt_allele_in_normal.py │ ├── setup.py │ ├── setup_conf.py.README │ └── setup_conf.py.in └── uthash │ ├── .gitignore │ ├── LICENSE │ ├── Makefile.am │ └── uthash.h └── tests ├── .gitignore ├── af_tests.sh ├── alnqual.sh.FIXME ├── bamstats.sh.FIXME ├── baq-calls-less-than-nobaq.sh ├── bed.sh ├── bgzf_getline.supp ├── binom_vs_poisson.FIXME ├── bonf_auto_vs_dyn.sh ├── consvar_noqual_filter.sh ├── denv2-pseudoclonal-source-qual.sh ├── denv2-pseudoclonal.sh ├── denv2-simulation.sh ├── denv2-validation.sh ├── diff_opts_same_out.sh.OLD ├── doctest.sh ├── ecoli-clone_incl_parallel.sh ├── ecoli_spikein.sh ├── exome_in_silico.sh.FIXME ├── faidx_fetch_seq.supp ├── fdr.sh ├── filter.sh ├── filter_c.sh ├── filter_only_snvs_or_indels.sh ├── icgc-tcga-dream-indel_chr19.sh ├── icgc-tcga-dream-testproject.sh ├── indel_misc.sh ├── indel_qual.sh ├── indels.sh.FIXME ├── is_quiet.sh.FIXME ├── lewis_known.sh.FIXME ├── lib.sh ├── melanoma.sh.FIXME ├── no_snvs_on_cons_indels.sh.FIXME ├── not-matching-ref.sh ├── parallel.sh ├── pseudomonas_jade.sh.FIXME ├── pylint.rc ├── pylint.sh ├── run_all.sh ├── somatic_CHH966_chr22.sh ├── uniq.sh ├── valgrind_call.sh ├── valgrind_uniq.sh ├── valgrind_vcfset.sh ├── valid_vcf_output.sh ├── vcf_setop.sh └── viterbi.sh /.gitignore: -------------------------------------------------------------------------------- 1 | # auto generated files 2 | MANIFEST 3 | 4 | # backup files 5 | *~ 6 | *# 7 | 8 | # objects 9 | *.so 10 | *.pyc 11 | build/ 12 | lofreq/.fuse_hidden* 13 | .fuse_hidden* 14 | 15 | # tmp 16 | tmp/ 17 | schmock* 18 | clang_output* 19 | nohup.out 20 | gmon.* 21 | _* 22 | 23 | # automake stuff which gets rebuild after autoreconf 24 | Makefile 25 | Makefile.in 26 | aclocal.m4 27 | autom4te.cache/ 28 | config.* 29 | configure 30 | install-sh 31 | libtool 32 | ltmain.sh 33 | missing 34 | depcomp 35 | INSTALL 36 | compile 37 | 38 | m4/*m4 39 | *.plist 40 | 41 | tests/*log 42 | snakemake/.snakemake/ 43 | *_flymake.py 44 | *.DS_Store 45 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | compiler: 3 | - gcc 4 | - clang 5 | # Change this to your needs 6 | # Change this to your needs 7 | before_script: 8 | - wget 'https://github.com/samtools/htslib/releases/download/1.9/htslib-1.9.tar.bz2' -O /tmp/htslib-1.9.tar.bz2 9 | - tar -xjf /tmp/htslib-1.9.tar.bz2 10 | - cd htslib-1.9/ 11 | - make libhts.a htslib_static.mk 12 | - cd .. 13 | script: libtoolize; ./bootstrap && ./configure --with-htslib=${PWD}/htslib-1.9 && make 14 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | LoFreq-Star authors: 2 | - Niranjan Nagarajan: 3 | had all the brilliant ideas 4 | - Grace Hui Ting Yeo: 5 | implemented and tested the indel calling functions 6 | - Andreas Wilm: 7 | did all the rest 8 | 9 | 10 | LoFreq-Star uses some BSD licensed pieces of external software. See 11 | READMEs in sub-directories for a list of authors whose software we 12 | incorporate. 13 | 14 | 15 | LoFreq-Star is based on LoFreq (<1.0), which is described here 16 | http://www.ncbi.nlm.nih.gov/pubmed/23066108. For that version: 17 | - Niranjan Nagarajan: 18 | sketched out the core algorithm and implemented the pruned DP 19 | properly 20 | - Grace Hui Ting Yeo: 21 | implemented an early prototype of the EM (NQ) caller 22 | - Andreas Wilm: 23 | did all the rest 24 | 25 | -------------------------------------------------------------------------------- /Changelog: -------------------------------------------------------------------------------- 1 | 2020-06-15 Changes in 2.1.5 Andreas Wilm 2 | * Fixed a long-standing Heisenbug related to indel formatting (closes #89) 3 | 4 | 2020-01-04 Changes in 2.1.4 Andreas Wilm 5 | * Now using htslib API thanks to John Marshall. Requires htslib >= 1.4. New configure flags 6 | replacing SAMTOOLS and HTSLIB: --with-htslib 7 | * Minor fixes in parallel caller closing #85 8 | * Fixed #58 9 | * Enhancements to lofreq2_add_fake_gt.py 10 | 11 | 2017-06-21 Changes in 2.1.3.1 Andreas Wilm 12 | * Fixed bug introduced last minute in 2.1.3 that creates segfault if call is used without -o 13 | 14 | 2017-06-21 Changes in 2.1.3 Andreas Wilm 15 | * Maintenance release before major rewrite 16 | * Added Python3 support 17 | * Added best practices snakemake workflow 18 | * Little easier on memory in high coverage situations 19 | * Added --force-overwrite option to 'call' 20 | 21 | 2015-05-19 Changes in 2.1.2 Andreas Wilm 22 | * 'indelqual' now allows to read bam from stdin 23 | * Fixed bug in 'call' which resulted in negative phred quality 24 | filter, when pvalue alpha was above 1 and number of tests was low 25 | * 'indelqual' dindel now deletes BI/BD before inserting 26 | * remove unnecessary dependency on kaln.h (not present in samtools 27 | 1.2) 28 | * 'uniq' now closing output vcf filehandle on error, thus always 29 | writing at least a header (reported by DNANexus) 30 | * Added HRUN info field to output vcf 31 | * Fixed calling of indel consvars 32 | * Removed options (and use of) cons-as-ref and skip-n. now reference 33 | is always used by default to call against and n's are always 34 | skipped. also means the consensus variants (CONSVAR) concept 35 | disappeared 36 | * Set DEFAULT_MIN_PLP_IDQ to zero 37 | * Caught yet another variant of the reference sequence name 38 | mismatch problem 39 | * 'viterbi': memory allocation now mainly dynamic. 40 | fixes observed segfault on pacbio reads (unclear why though) 41 | * Low AF false positive multi-allelic 1bp indel adjacent to 42 | poly-AT now filtered by default. 43 | * 'indelqual': added support for adding uniform insertion and 44 | deletion qualities (instead of just indel qualities) 45 | * indel calling: fixed index violation while accessing pdi[u] in 46 | idaq happening while processing pacbio reads. added bound check as 47 | hack (idaq() mostly illumina specific anyway) 48 | * Removed MAX_READ_LEN globally 49 | * 'call': added special case for SB test: if ref is entirely missing 50 | and we have alts on only one strand fisher's exact test will 51 | return 0, which is most certainly not what we want. setting to 52 | INT_MAX instead 53 | * vcfset: only-[type] now correctly dealt with in vcf2 on top of vcf1. 54 | * vcfset: fixed bug which match vars even if they only overlapped 55 | partially (now also checking position instead of relying on 56 | tabix iterator) 57 | * Reference sequences now converted to uppercase after fetching to be safe. 58 | This also addresses the "AQ-bug" where low AQ values were reported 59 | because of a lower-case reference 60 | * 'pparallel': made bed reading function standard conform and more 61 | fault tolerant. e.g. now allowing browser and track lines 62 | * 'somatic': now also producing germline indels 63 | * 'somatic': min cov lowered to 7 64 | * 'somatic': normal stringent now has separate parameters, i.e. independent of 65 | tumor stringent (set to fdr 1%) 66 | * 'somatic': sq ignore normal now ignoring indels and snvs 67 | * 'somatic': Added support for multiple ignore vcf files 68 | * 'filter': corrected wrong info about default sb mtc method 69 | * Changed MQ0 prob to a generic 0.5 70 | * Fixed bug in fdr application in filter: previous versions 71 | called more tests significant than actually true. Fixed by setting 72 | all values to not significant right after calling fdr() 73 | * 'somatic': added --min-cov option 74 | 75 | Changes in versions before 2.1.2: See http://csb5.github.io/lofreq/blog/ 76 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | LoFreq is licensed under the MIT License (see below). 2 | 3 | Licenses for third party software that is part of the source: 4 | - cdflib90 (see src/cdflib90.README) 5 | - uthash (see src/uthash/LICENSE) 6 | 7 | Licenses external libraries (part of the statically compiled binary): 8 | - samtools (see src/samtools-1.1.LICENSE) 9 | - htslib (see src/htslib-1.1.LICENSE) 10 | 11 | ---------------------------------------------------------------------- 12 | 13 | The MIT License (MIT) 14 | 15 | Copyright (c) 2013-2017 Genome Institute of Singapore 16 | 17 | Permission is hereby granted, free of charge, to any person obtaining a copy 18 | of this software and associated documentation files (the "Software"), to deal 19 | in the Software without restriction, including without limitation the rights 20 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 21 | copies of the Software, and to permit persons to whom the Software is 22 | furnished to do so, subject to the following conditions: 23 | 24 | The above copyright notice and this permission notice shall be included in 25 | all copies or substantial portions of the Software. 26 | 27 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 28 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 29 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 30 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 31 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 32 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 33 | THE SOFTWARE. 34 | 35 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | # Top-level Makefile.am for LoFreq 2 | # 3 | # based on Makefile.am for Clustal Omega 4 | 5 | # we do not use GNU-style files: NEWS README AUTHORS ChangeLog 6 | # AUTOMAKE_OPTIONS = foreign 7 | 8 | if ENABLE_TOOLS 9 | TOOLS = src/tools 10 | endif 11 | 12 | LICENSES = LICENSE src/cdflib90.README src/uthash/LICENSE 13 | EXTRA_DIST = binary_installer.sh $(LICENSES) 14 | 15 | SUBDIRS = src/cdflib90 src/uthash src/lofreq src/scripts $(TOOLS) 16 | bug-tests: all 17 | cd tests && $(SHELL) run_all.sh; 18 | 19 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/NEWS -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LoFreq*: A sequence-quality aware, ultra-sensitive variant caller for NGS data 2 | 3 | ## Note 4 | 5 | Most users will want to install LoFreq via [BioConda](https://bioconda.github.io/)! 6 | The source code hosted here on Github is mainly for developers! 7 | 8 | LoFreq was published 10 years ago, is considered very stable and has 9 | almost 1000 citations at the time of writing this. I (Andreas) have 10 | long left academia and tried to maintain the code as best as I could 11 | until now, but find that I have no more time to do this. For bugs, 12 | suggestions, ideas, collaborations please contact [Niranjan Nagarajan](mailto:nagarajann@gis.a-star.edu.sg). 13 | 14 | 15 | ## Building the Source 16 | 17 | ### Current Build Status 18 | 19 | [![Build Status](https://travis-ci.org/CSB5/lofreq.svg?branch=master)](https://travis-ci.org/CSB5/lofreq) 20 | 21 | ### Prerequisites 22 | 23 | You will need: 24 | 25 | - a C compiler (e.g. gcc or clang) 26 | - a Python 3 interpreter 27 | - zlib developer filesi (zlib1g-dev on Ubuntu) 28 | - a compiled version of [HTSlib 1.4 or later](https://github.com/samtools/htslib) 29 | 30 | ### Compilation 31 | 32 | - Clone the repo (or download the current master as zip package and unpack) 33 | - Run `./bootstrap` to set up the required automake files 34 | - If you get an error like `required file './ltmain.sh' 35 | not found`, run `libtoolize` (or `glibtoolize`) first and then 36 | `bootstrap` again 37 | - Subsequent pulls won't require rerunning `./bootstrap`. This is 38 | only necesary when changing `configure.ac` or any of the `Makefile.am` 39 | - Run `./configure` with the **absolute** path to HTSlib 40 | (e.g. `./configure --with-htslib=/home/user/miniconda [--prefix=inst-path]`) 41 | - Run `make` 42 | - At this point you can already start using lofreq: `./bin/lofreq` 43 | - Run `make install` to properly install the package 44 | - Default is `/usr/local/`. If `--prefix` was given to `configure`, 45 | the corresponding argument is used 46 | - Depending on the used prefix you might need to adjust your PATH (and PYTHONPATH). 47 | 48 | ## Documentation 49 | 50 | - Simply calling `lofreq` on the command line will display a list of 51 | subcommands 52 | - `lofreq cmd` will then display help for `cmd` 53 | - See [LoFreq's website](http://csb5.github.io/lofreq/) for full documentation 54 | 55 | ## License 56 | 57 | LoFreq is licensed under the MIT License (see LICENSE). 58 | 59 | Licenses for third party software that is part of the source: 60 | - cdflib90 (see src/cdflib90.README) 61 | - uthash (see src/uthash/LICENSE) 62 | -------------------------------------------------------------------------------- /binary_installer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o pipefail 4 | 5 | PREFIX=/usr/local 6 | 7 | usage() { 8 | # keep in sync with arg parsing below 9 | cat <&2 45 | exit 1 46 | fi 47 | cp $f $prefix/bin/ || exit 1 48 | done 49 | 50 | echo "Installing Python tools" 51 | pushd ./src/tools >/dev/null 52 | pythonpath=$(python setup.py install --prefix $prefix | \ 53 | grep 'egg-info$' | head -n1 | cut -f 2 -d ' ' | sed -e 's,LoFreq.*,,') || exit 1 54 | 55 | echo "NOTE: Make sure $pythonpath is in your PYTHONPATH" 56 | popd >/dev/null 57 | 58 | echo "Successful exit" 59 | -------------------------------------------------------------------------------- /bootstrap: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # A bootstrapping script replacing autogen.sh and autoreconf. Brings 4 | # source tree into a state where end user can run configure and make. 5 | # 6 | # From https://www.sourceware.org/autobook/autobook/autobook_43.html: 7 | # "Autoconf comes with a program called autoreconf which essentially 8 | # does the work of the bootstrap script. autoreconf is rarely used 9 | # because, historically, has not been very well known, and only in 10 | # Autoconf 2.13 did it acquire the ability to work with Automake. 11 | # Unfortunately, even the Autoconf 2.13 autoreconf does not handle 12 | # libtoolize and some automake-related options that are frequently 13 | # nice to use. 14 | # 15 | # We recommend the bootstrap method, until autoreconf is fixed. At 16 | # this point bootstrap has not been standardized, so here is a version 17 | # of the script we used while writing this book" 18 | # 19 | 20 | aclocal && \ 21 | automake --gnu --add-missing && \ 22 | autoconf 23 | 24 | -------------------------------------------------------------------------------- /devel-doc/FIXME: -------------------------------------------------------------------------------- 1 | Todos: 2 | 3 | High Prio: 4 | ========== 5 | 6 | - fix/check af computation for indels, esp. in presence of head/tails 7 | - support of indels during ignore vcf loading and source qual computation 8 | 9 | 10 | Med Prio: 11 | ========= 12 | 13 | - what to do at high coverages where X>cap (e.g. Lewis' HepB)? replace already read values randomly? 14 | - add normal and tumor plp info to somatic calls. normal could be added in uniq 15 | - use indexing for source quality ignore vcfs if region was given 16 | 17 | Low Prio: 18 | ========= 19 | 20 | - keep var info field with hash 21 | - profile code e.g. 22 | $LOFREQ call -f mers_coronavirus_NC_019843.2.fa --verbose --debug NC_019843.2:2399-2399 dwgsim-ion-samba-mers-100kX-pool.bwamem.bam 23 | - parallelize viterbi avoiding overlapping reads. endpos of last read in last bin could be start pos for next bin 24 | - multiprocessing version of alnqual (best per chrom as we get overlapping reads if run per bin) -------------------------------------------------------------------------------- /devel-doc/autotools.README: -------------------------------------------------------------------------------- 1 | autotools 2 | ========= 3 | 4 | autotools allow to automagically generate the configure and Makefile 5 | files that allow you to semi-automatically configure, compile and 6 | install the source, like this (GNU triple jump): 7 | $ ./configure 8 | $ make 9 | $ make install 10 | 11 | The only files you will ever have to edit as a developer are the 12 | configure.ac and Makefile.am files. autoconf looks for a file called 13 | configure.ac (or configure.in). It then creates the configure script, 14 | based on the macros which it finds. After editing configure.ac you 15 | should run autoreconf (which usually also happens automatically). 16 | Changes to Makefile, Makefile.in and configure will be overwritten! 17 | 18 | 19 | Setting things up 20 | ================ 21 | 22 | After first checkout and each time a file was added/removed you have 23 | to do 24 | the following to setup the automake environment: 25 | $ autoreconf (best with -Wall) 26 | and possibly 27 | $ autoreconf -i 28 | 29 | If autoreconf complains: "required file `./ltmain.sh' not found" the 30 | run 31 | $ [g]libtoolize 32 | 33 | If autoreconf complains: "required file `./config.guess|./config.sub|' 34 | not 35 | found" then run 36 | $ automake --add-missing 37 | and run autoreconf again. 38 | 39 | NOTE: all the above has been replaced with the bootstrap script 40 | 41 | Adding source files 42 | =================== 43 | 44 | Normally, the only thing you will have to do is to just add your 45 | c-files to the corresponding Makefile.am _SOURCES variable. 46 | 47 | -------------------------------------------------------------------------------- /devel-doc/best-practices.txt: -------------------------------------------------------------------------------- 1 | - Align your reads with a good aligner. We recommend BWA-MEM. It's best to 2 | add a read-group (required if GATK is used later) during the alignment 3 | - Recommended: For PE reads fix mate-pair information and clean BAM file 4 | (Picard's FixMateInformation and CleanSam) 5 | (last two steps can be run with bwamem_wrapper.sh) 6 | 7 | - Recommended for Illumina data: realignment with 'lofreq viterbi'. 8 | 9 | - Mark PCR Duplicates with Picard's MarkDuplicates (Skip for high coverage amplicon data) 10 | - Left alignment of indels with GATK's IndelRealigner. Untested alternative: Freebayes' bamleftalign) 11 | - Base-quality recalibration with GATK's BaseRecalibrator (also adds indel qualities 12 | in versions >= 2). FIXME note about known.vcf 13 | (last three steps can be run with ngs_pipeline.py) 14 | 15 | - If LoFreq is used to predict indels and if BQSR was not run (i.e. indel qualities are missing from the BAM 16 | - file). Use 17 | 'lofreq indelqual' 18 | 19 | - Somatic: 20 | -l for targeted sequencign and exomes etc (see elsewhere) 21 | -d dbsnp. dbsnp matching your reference genome version. we recommend to remove 22 | variants marked as somatic (zgrep SAO=[23]) and variants found in cosmic from it. 23 | 24 | - FIXME: lofreq parameters: 25 | source qual 26 | baq 27 | indel 28 | somatic 29 | parallel 30 | 31 | 32 | -------------------------------------------------------------------------------- /devel-doc/debug.README: -------------------------------------------------------------------------------- 1 | General 2 | ------- 3 | 4 | Try 5 | $ make CFLAGS='-c99 -Wall -pedantic' 6 | every now and then. Ignore the following warnings: 7 | "ISO C does not permit named variadic macros" 8 | 9 | Enable debuggin every now and then and run 10 | $ ./configure --enable-debug; make clean; make 11 | 12 | 13 | Memory checks 14 | ------------- 15 | 16 | make clean; make CFLAGS='-O0 -g' LDFLAGS='-O0 -g' 17 | cd src/lofreq 18 | # and then for example 19 | valgrind --tool=memcheck --leak-check=full \ 20 | ./lofreq call -r consensus:100-200 -f ~/scratch/ref.fa ../../../lofreq-test-data/denv2-multiplex-replicates/ACAGTG_1.bam --verbose >/dev/null 21 | 22 | 23 | Profiling 24 | --------- 25 | 26 | make clean; make CFLAGS='-pg' LDFLAGS='-pg' 27 | and then for example 28 | ./src/lofreq/lofreq call \ 29 | -f tests/data/denv2-pseudoclonal/denv2-pseudoclonal_cons.fa \ 30 | -l tests/data/denv2-pseudoclonal/denv2-pseudoclonal_incl.bed \ 31 | tests/data/denv2-pseudoclonal/denv2-pseudoclonal.bam 32 | gprof ./src/lofreq/lofreq gmon.out > gmon.txt 33 | 34 | 35 | static code checker 36 | ------------------- 37 | 38 | To use clang's static code checker: 39 | $ scan-build ./configure 40 | $ scan-build make 41 | 42 | note, there are a lot of warnings produced for libbam 43 | -------------------------------------------------------------------------------- /devel-doc/dist.README: -------------------------------------------------------------------------------- 1 | To create a new distribution: 2 | 3 | - Make sure tests work test/run_all.sh 4 | - Update version in configure.ac 5 | - Update top-level README and changelog 6 | - autoreconf 7 | - run 'make dist' to compile a tarball 8 | - Either 9 | - Upload source and update the websites with info on new usage/bug-fixes/new function 10 | - or 11 | - Unpack tarball 12 | - ./configure --enable-static 13 | - make 14 | - compile against static libz if necessary, check with ldd ./src/lofreq/lofreq (or otool -L) 15 | - bash binary_installer -p somewhere and pack 16 | - Commit your changes 17 | - Tag this version 18 | e.g git tag -a v0.3.1 -m 'my version 0.3.1') 19 | push and push origin --tags 20 | 21 | - use binary_installer for binary distributions 22 | -------------------------------------------------------------------------------- /devel-doc/docker.README: -------------------------------------------------------------------------------- 1 | boot2docker init 2 | boot2docker start 3 | $(boot2docker shellinit) 4 | 5 | # --- 6 | 7 | docker run -t -i ubuntu:14.04 /bin/bash 8 | 9 | apt-get update 10 | apt-get upgrade 11 | 12 | apt-get install make 13 | apt-get install libc6-dev libc-dev 14 | apt-get install zlib1g-dev 15 | apt-get install python2.7 16 | apt-get install libncurses-dev 17 | apt-get install wget 18 | 19 | ln -s /usr/bin/python2.7 /usr/bin/python 20 | 21 | cd /usr/local/src 22 | wget -nd 'http://downloads.sourceforge.net/project/lofreq/lofreq_star-2.1.1.tar.gz' 23 | # 24 | wget 'https://github.com/samtools/htslib/releases/download/1.9/htslib-1.9.tar.bz2' 25 | tar -xjf htslib-1.9.tar.bz2 26 | cd htslib-1.9 27 | make 28 | make install 29 | 30 | cd .. 31 | tar xvzf lofreq_star-2.1.1.tar.gz 32 | cd lofreq_star-2.1.1 33 | ./configure --with-htslib=/usr/local 34 | make 35 | make install 36 | 37 | # --- 38 | 39 | docker commit -m="Added htslib 1.9 and lofreq 2.1.1" -a="Andreas Wilm" 78c85ef2e74a andreaswilm/lofreq:v2.1.1 40 | docker push andreaswilm/lofreq 41 | -------------------------------------------------------------------------------- /devel-doc/git.README: -------------------------------------------------------------------------------- 1 | Try to implement a rebase workflow AKA github workflow 2 | See 3 | http://mettadore.com/analysis/a-simple-git-rebase-workflow-explained/ 4 | http://mettadore.com/analysis/the-ever-deployable-github-workflow/ 5 | http://randyfay.com/content/rebase-workflow-git 6 | http://git-scm.com/book/en/Git-Branching-Rebasing 7 | http://isis.apache.org/contributors/git-cookbook.html 8 | -------------------------------------------------------------------------------- /dist/LoFreq-0.2.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/LoFreq-0.2.tar.gz -------------------------------------------------------------------------------- /dist/LoFreq-0.3.1.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/LoFreq-0.3.1.tar.gz -------------------------------------------------------------------------------- /dist/LoFreq-0.3.2.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/LoFreq-0.3.2.tar.gz -------------------------------------------------------------------------------- /dist/LoFreq-0.3.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/LoFreq-0.3.tar.gz -------------------------------------------------------------------------------- /dist/LoFreq-0.4.0.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/LoFreq-0.4.0.tar.gz -------------------------------------------------------------------------------- /dist/lofreq-0.5.0.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/lofreq-0.5.0.tar.gz -------------------------------------------------------------------------------- /dist/lofreq-v0.1.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/lofreq-v0.1.zip -------------------------------------------------------------------------------- /dist/lofreq_star-2.0.0-beta-2-linux-x86-64.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/lofreq_star-2.0.0-beta-2-linux-x86-64.tgz -------------------------------------------------------------------------------- /dist/lofreq_star-2.0.0-beta-2-macosx.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/lofreq_star-2.0.0-beta-2-macosx.tgz -------------------------------------------------------------------------------- /dist/lofreq_star-2.0.0-beta-3-linux-x86-64.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/lofreq_star-2.0.0-beta-3-linux-x86-64.tgz -------------------------------------------------------------------------------- /dist/lofreq_star-2.0.0-beta-3-macosx.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/lofreq_star-2.0.0-beta-3-macosx.tgz -------------------------------------------------------------------------------- /dist/lofreq_star-2.0.0-beta-3.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/lofreq_star-2.0.0-beta-3.tar.gz -------------------------------------------------------------------------------- /dist/lofreq_star-2.0.0-beta-linux-x86-64.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/lofreq_star-2.0.0-beta-linux-x86-64.tgz -------------------------------------------------------------------------------- /dist/lofreq_star-2.0.0-beta-macosx.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/lofreq_star-2.0.0-beta-macosx.tgz -------------------------------------------------------------------------------- /dist/lofreq_star-2.0.0-linux-x86-64.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/lofreq_star-2.0.0-linux-x86-64.tgz -------------------------------------------------------------------------------- /dist/lofreq_star-2.0.0-macosx.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/lofreq_star-2.0.0-macosx.tgz -------------------------------------------------------------------------------- /dist/lofreq_star-2.0.0-rc-1.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/lofreq_star-2.0.0-rc-1.tar.gz -------------------------------------------------------------------------------- /dist/lofreq_star-2.0.0.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/lofreq_star-2.0.0.tar.gz -------------------------------------------------------------------------------- /dist/lofreq_star-2.1.0.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/lofreq_star-2.1.0.tar.gz -------------------------------------------------------------------------------- /dist/lofreq_star-2.1.0_linux-x86-64.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/lofreq_star-2.1.0_linux-x86-64.tgz -------------------------------------------------------------------------------- /dist/lofreq_star-2.1.0_macosx.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/lofreq_star-2.1.0_macosx.tgz -------------------------------------------------------------------------------- /dist/lofreq_star-2.1.1.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/lofreq_star-2.1.1.tar.gz -------------------------------------------------------------------------------- /dist/lofreq_star-2.1.1_linux-x86-64.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/lofreq_star-2.1.1_linux-x86-64.tgz -------------------------------------------------------------------------------- /dist/lofreq_star-2.1.1_macosx.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/lofreq_star-2.1.1_macosx.tgz -------------------------------------------------------------------------------- /dist/lofreq_star-2.1.2.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/lofreq_star-2.1.2.tar.gz -------------------------------------------------------------------------------- /dist/lofreq_star-2.1.2_linux-x86-64.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/lofreq_star-2.1.2_linux-x86-64.tgz -------------------------------------------------------------------------------- /dist/lofreq_star-2.1.2_macosx.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/lofreq_star-2.1.2_macosx.tgz -------------------------------------------------------------------------------- /dist/lofreq_star-2.1.3.1.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/lofreq_star-2.1.3.1.tar.gz -------------------------------------------------------------------------------- /dist/lofreq_star-2.1.3.1_linux-x86-64.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/lofreq_star-2.1.3.1_linux-x86-64.tgz -------------------------------------------------------------------------------- /dist/lofreq_star-2.1.3.1_macosx.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/lofreq_star-2.1.3.1_macosx.tgz -------------------------------------------------------------------------------- /dist/lofreq_star-2.1.4.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/lofreq_star-2.1.4.tar.gz -------------------------------------------------------------------------------- /dist/lofreq_star-2.1.4_linux-x86-64.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/lofreq_star-2.1.4_linux-x86-64.tgz -------------------------------------------------------------------------------- /dist/lofreq_star-2.1.5.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/lofreq_star-2.1.5.tar.gz -------------------------------------------------------------------------------- /dist/lofreq_star-2.1.5_linux-x86-64.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/dist/lofreq_star-2.1.5_linux-x86-64.tgz -------------------------------------------------------------------------------- /m4/ax_with_htslib.m4: -------------------------------------------------------------------------------- 1 | # =========================================================================== 2 | # http://www.gnu.org/software/autoconf-archive/ax_with_htslib.html 3 | # =========================================================================== 4 | # 5 | # SYNOPSIS 6 | # 7 | # AX_WITH_HTSLIB 8 | # 9 | # DESCRIPTION 10 | # 11 | # This macro checks whether HTSlib is installed 12 | # or nearby, and adds a --with-htslib=DIR option to the configure script 13 | # for specifying the location. It locates either an installation prefix 14 | # (with 'include' and 'lib' subdirectories) or an HTSlib source tree, as 15 | # HTSlib is fast-moving and users may wish to use an in-development tree. 16 | # 17 | # Different checks occur depending on the --with-htslib argument given: 18 | # 19 | # With --with-htslib=DIR, checks whether DIR is a source tree or contains 20 | # a working installation. 21 | # By default, searches for a source tree (with a name matching htslib*) 22 | # within or alongside $srcdir. Produces AC_MSG_ERROR if there are 23 | # several equally-likely candidates. If there are none, checks for 24 | # a working default installation. 25 | # With --with-htslib=system, checks for a working default installation. 26 | # 27 | # If a source tree is found or specified, it is added to AC_CONFIG_SUBDIRS 28 | # if either --enable-configure-htslib is set, or where htslib is included 29 | # in a subdirectory (for packages that want to supply an embedded htslib). 30 | # Unfortunately this may cause a "you should use literals" warning when 31 | # autoconf is run. 32 | # 33 | # The following output variables are set by this macro: 34 | # 35 | # HTSDIR Directory containing HTSlib source tree 36 | # HTSLIB_CPPFLAGS Preprocessor flags for compiling with HTSlib 37 | # HTSLIB_LDFLAGS Linker flags for linking with HTSlib 38 | # 39 | # The following shell variables may be defined: 40 | # 41 | # ax_cv_htslib Set to "yes" if HTSlib was found 42 | # ax_cv_htslib_which Set to "source", "install", or "none" 43 | # 44 | # LICENSE 45 | # 46 | # Copyright (C) 2015,2017 Genome Research Ltd 47 | # 48 | # Copying and distribution of this file, with or without modification, are 49 | # permitted in any medium without royalty provided the copyright notice 50 | # and this notice are preserved. This file is offered as-is, without any 51 | # warranty. 52 | 53 | #serial 1 54 | 55 | AC_DEFUN([AX_WITH_HTSLIB], 56 | [AC_ARG_WITH([htslib], 57 | [AS_HELP_STRING([--with-htslib=DIR], 58 | [use the HTSlib source tree or installation in DIR]) 59 | dnl Not indented, to avoid extra whitespace outwith AS_HELP_STRING() 60 | AS_HELP_STRING([--with-htslib=system], 61 | [use only a system HTSlib installation])], 62 | [], [with_htslib=search]) 63 | AC_ARG_ENABLE([configure-htslib], 64 | [AS_HELP_STRING([--enable-configure-htslib], 65 | [run configure for htslib as well @<:@default=only_in_subdir@:>@])], 66 | [], [enable_configure_htslib=only_in_subdir]) 67 | 68 | case $with_htslib in 69 | yes|search) 70 | AC_MSG_CHECKING([location of HTSlib source tree]) 71 | case $srcdir in 72 | .) srcp= ;; 73 | *) srcp=$srcdir/ ;; 74 | esac 75 | found= 76 | for dir in ${srcp}htslib* -- ${srcp}../htslib -- ${srcp}../htslib* 77 | do 78 | if test "$dir" = "--"; then 79 | test -n "$found" && break 80 | elif test -f "$dir/hts.c" && test -f "$dir/htslib/hts.h"; then 81 | found="${found}1" 82 | HTSDIR=$dir 83 | fi 84 | done 85 | if test -z "$found"; then 86 | AC_MSG_RESULT([none found]) 87 | ax_cv_htslib_which=system 88 | elif test "$found" = 1; then 89 | AC_MSG_RESULT([$HTSDIR]) 90 | ax_cv_htslib_which=source 91 | if test "x$enable_configure_htslib" = "xonly_in_subdir" ; then 92 | case $HTSDIR in 93 | "${srcp}htslib"*) enable_configure_htslib=yes ;; 94 | *) ;; 95 | esac 96 | fi 97 | else 98 | AC_MSG_RESULT([several directories found]) 99 | AC_MSG_ERROR([use --with-htslib=DIR to select which HTSlib to use]) 100 | fi 101 | ;; 102 | no) ax_cv_htslib_which=none ;; 103 | system) ax_cv_htslib_which=system ;; 104 | *) 105 | HTSDIR=$with_htslib 106 | if test -f "$HTSDIR/hts.c" && test -f "$HTSDIR/htslib/hts.h"; then 107 | ax_cv_htslib_which=source 108 | else 109 | ax_cv_htslib_which=install 110 | fi 111 | ;; 112 | esac 113 | 114 | case $ax_cv_htslib_which in 115 | source) 116 | ax_cv_htslib=yes 117 | HTSLIB_CPPFLAGS="-I$HTSDIR" 118 | HTSLIB_LDFLAGS="-L$HTSDIR" 119 | if test "x$enable_configure_htslib" = "xyes"; then 120 | # We can't use a literal, because $HTSDIR is user-provided and variable 121 | AC_CONFIG_SUBDIRS($HTSDIR) 122 | fi 123 | ;; 124 | system) 125 | AC_CHECK_HEADER([htslib/sam.h], 126 | [AC_CHECK_LIB(hts, hts_version, [ax_cv_htslib=yes], [ax_cv_htslib=no])], 127 | [ax_cv_htslib=no], [;]) 128 | ax_cv_htslib_which=install 129 | HTSDIR= 130 | HTSLIB_CPPFLAGS= 131 | HTSLIB_LDFLAGS= 132 | ;; 133 | install) 134 | ax_saved_CPPFLAGS=$CPPFLAGS 135 | ax_saved_LDFLAGS=$LDFLAGS 136 | HTSLIB_CPPFLAGS="-I$HTSDIR/include" 137 | HTSLIB_LDFLAGS="-L$HTSDIR/lib" 138 | CPPFLAGS="$CPPFLAGS $HTSLIB_CPPFLAGS" 139 | LDFLAGS="$LDFLAGS $HTSLIB_LDFLAGS" 140 | AC_CHECK_HEADER([htslib/sam.h], 141 | [AC_CHECK_LIB(hts, hts_version, [ax_cv_htslib=yes], [ax_cv_htslib=no])], 142 | [ax_cv_htslib=no], [;]) 143 | HTSDIR= 144 | CPPFLAGS=$ax_saved_CPPFLAGS 145 | LDFLAGS=$ax_saved_LDFLAGS 146 | ;; 147 | none) 148 | ax_cv_htslib=no 149 | ;; 150 | esac 151 | 152 | AC_SUBST([HTSDIR]) 153 | AC_SUBST([HTSLIB_CPPFLAGS]) 154 | AC_SUBST([HTSLIB_LDFLAGS])]) 155 | -------------------------------------------------------------------------------- /snakemake/Snakefile: -------------------------------------------------------------------------------- 1 | """A best-practices variant calling implementation LoFreq ( # losely 2 | based on https://github.com/gis-rpd/pipelines. Starts with short 3 | reads and finishes with a bgzipped vcf file. The workflow is kept 4 | simple, i.e. no tricks are applied to speed the analysis up 5 | (splitting fastq, running viterbi by chrom etc.). 6 | 7 | # Input: config-file with following fields: 8 | - bool mark_short_splits: for bwa mem -M 9 | - string bed: for bed-file limiting analysis to certain regions 10 | - int optional 'maxdepth': for limit per-site coverage in analysis 11 | - dict 'samples': sample names as keys and one fastq-pair each as value 12 | - string reference: reference fasta file 13 | - string outdir: where to save output 14 | 15 | # Pre-installed programs: 16 | - lofreq 2.1.2 17 | - bwa (with mem support e.g. 0.7.12) 18 | - samtools >= 1.3 19 | 20 | Notes: 21 | - If missing, the workflow will try to index your reference with 22 | samtools and bwa. This can lead to race conditions so is best 23 | done in advance. 24 | 25 | """ 26 | 27 | import os 28 | 29 | 30 | shell.executable("/bin/bash") 31 | shell.prefix("set -euo pipefail;") 32 | 33 | 34 | rule all: 35 | input: 36 | expand(os.path.join(config['outdir'], "{sample}/{sample}.bwamem.lofreq.vcf.gz"), 37 | sample=config['samples']) 38 | 39 | 40 | rule bwa_index: 41 | input: 42 | "{prefix}.{suffix}" 43 | output: 44 | "{prefix}.{suffix,(fasta|fa)}.pac", 45 | "{prefix}.{suffix,(fasta|fa)}.bwt", 46 | "{prefix}.{suffix,(fasta|fa)}.sa" 47 | log: 48 | "{prefix}.{suffix,(fasta|fa)}.index.log" 49 | shell: 50 | "bwa index {input} >& {log};" 51 | 52 | 53 | rule samtools_faidx: 54 | input: 55 | "{prefix}.{suffix}" 56 | output: 57 | "{prefix}.{suffix,(fasta|fa)}.fai", 58 | log: 59 | "{prefix}.{suffix,(fasta|fa)}.index.log" 60 | shell: 61 | "samtools faidx {input} >& {log};" 62 | 63 | 64 | rule samtools_index: 65 | input: 66 | "{prefix}.bam" 67 | output: 68 | "{prefix}.bam.bai", 69 | log: 70 | "{prefix}.bam.bai.log" 71 | shell: 72 | "samtools index {input} >& {log};" 73 | 74 | 75 | rule bwamem_align: 76 | input: 77 | reffa = config['reference'], 78 | bwaindex = config['reference'] + ".bwt", 79 | fastqs = lambda wc: config['samples'][wc.sample] 80 | output: 81 | bam = '{prefix}/{sample}.bwamem.bam' 82 | log: 83 | '{prefix}/{sample}.bwamem.bam.log' 84 | params: 85 | mark_short_splits = "-M" if config['mark_short_splits'] else "", 86 | message: 87 | 'Aligning PE reads, fixing mate information and converting to sorted BAM' 88 | threads: 89 | 8 90 | shell: 91 | "{{ bwa mem {params.mark_short_splits} -t {threads}" 92 | " {input.reffa} {input.fastqs} |" 93 | " samtools fixmate - - |" 94 | " samtools sort -o {output.bam} -T {output.bam}.tmp -; }} >& {log}" 95 | 96 | 97 | rule lofreq_bam_processing: 98 | """Runs BAM through full LoFreq preprocessing pipeline, 99 | i.e. viterbi, alnqual, indelqual, followed by sort (required by 100 | viterbi). 101 | 102 | WARNING: running this on unsorted input files will be inefficient 103 | because of constant reloading of the reference 104 | """ 105 | input: 106 | bam = '{prefix}.bam', 107 | reffa = config['reference'], 108 | reffai = config['reference'] + ".fai" 109 | output: 110 | bam = '{prefix}.lofreq.bam' 111 | log: 112 | '{prefix}.lofreq.log' 113 | message: 114 | "Preprocessing BAMs with LoFreq" 115 | threads: 116 | 1 117 | shell: 118 | "{{ lofreq viterbi -f {input.reffa} {input.bam} | " 119 | " lofreq alnqual -u - {input.reffa} | " 120 | " lofreq indelqual --dindel -f {input.reffa} - | " 121 | " samtools sort -o {output.bam} -T {output.bam}.tmp -; }} >& {log}" 122 | 123 | 124 | rule lofreq_call: 125 | input: 126 | bam = '{prefix}.bam', 127 | bai = '{prefix}.bam.bai', 128 | reffa = config['reference'], 129 | refidx = config['reference'] + ".fai", 130 | output: 131 | vcf = '{prefix}.vcf.gz' 132 | log: 133 | '{prefix}.vcf.log' 134 | message: 135 | "Calling variants with LoFreq" 136 | threads: 137 | 8 138 | params: 139 | maxdepth = config.get('maxdepth', 10000), 140 | bed_arg = "-l {}".format(config['bed']) if config['bed'] else "" 141 | shell: 142 | "lofreq call-parallel --pp-threads {threads} --call-indels" 143 | " {params.bed_arg} -f {input.reffa} -o {output.vcf}" 144 | " -d {params.maxdepth} {input.bam} >& {log}" 145 | -------------------------------------------------------------------------------- /snakemake/cfg.yaml: -------------------------------------------------------------------------------- 1 | # sample definition with one fastq pair per sample 2 | samples: 3 | isolate1: 4 | - data/sample1_R1.fastq.gz 5 | - data/sample1_R2.fastq.gz 6 | isolate2: 7 | - data/sample2.fastq.gz 8 | outdir: out/ 9 | reference: 10 | data/ref.fa 11 | # optional: regions. leave blank if none 12 | bed: data/regions.bed 13 | # optional: max coverage (see snakefile for default) 14 | maxdepth: 10000 15 | # mark short split hits as secondary in BWA MEM 16 | mark_short_splits: true 17 | -------------------------------------------------------------------------------- /snakemake/data/ref.fa: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/snakemake/data/ref.fa -------------------------------------------------------------------------------- /snakemake/data/regions.bed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/snakemake/data/regions.bed -------------------------------------------------------------------------------- /snakemake/data/sample1_R1.fastq.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/snakemake/data/sample1_R1.fastq.gz -------------------------------------------------------------------------------- /snakemake/data/sample1_R2.fastq.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/snakemake/data/sample1_R2.fastq.gz -------------------------------------------------------------------------------- /snakemake/data/sample2.fastq.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/snakemake/data/sample2.fastq.gz -------------------------------------------------------------------------------- /snakemake/dryrun.sh: -------------------------------------------------------------------------------- 1 | snakemake -T -p --dryrun --configfile cfg.yaml -s Snakefile 2 | -------------------------------------------------------------------------------- /snakemake/ecoli-cfg.yaml: -------------------------------------------------------------------------------- 1 | outdir: /mnt/projects/wilma/lofreq/tmp 2 | samples: 3 | sample1: 4 | - /mnt/projects/wilma/lofreq//testing/data/ecoli-clone/EColi_EAS20_8/EAS20_8_R1.1M.fastq.gz 5 | - /mnt/projects/wilma/lofreq//testing/data/ecoli-clone/EColi_EAS20_8/EAS20_8_R2.1M.fastq.gz 6 | sample2: 7 | # fake for SE testing 8 | - /mnt/projects/wilma/lofreq//testing/data/ecoli-clone/EColi_EAS20_8/EAS20_8_R2.1M.fastq.gz 9 | reference: 10 | /mnt/projects/wilma/lofreq//testing/data/ecoli-clone/ref/Ecoli_K12_MG1655_NC_000913.fa 11 | bed: 12 | maxdepth: 1000 13 | mark_short_splits: true 14 | 15 | -------------------------------------------------------------------------------- /src/cdflib90.README: -------------------------------------------------------------------------------- 1 | DCDFLIB Library of Routines for Cumulative Distribution Functions Inverses, and Other Parameters 2 | 3 | Downloaded from https://biostatistics.mdanderson.org/SoftwareDownload/SingleSoftware.aspx?Software_Id=21 4 | Quote from there: 5 | 6 | """ 7 | DCDFLIB (Double precision Cumulative Distribution Function LIBrary) is 8 | a collection of routines that calculate cumulative distribution 9 | functions, inverses, and parameters for common statistical 10 | distributions. DCDFLIB uses published algorithms where available, and 11 | literature citations are included in the documentation. Values 12 | associated with a statistical distribution include X, the upper limit 13 | of integration of the density, P, the cumulative distribution function 14 | evaluated at X, and auxiliary parameters such as degrees of freedom. 15 | Given all but one such value, a routine in DCDFLIB will calculate the 16 | one value. (CAVEAT: For the central and noncentral F, the cdf is 17 | sometimes not monotone in the degrees of freedom. For these 18 | parameters, there can be two answers. DCDFLIB finds an arbitrary one 19 | of the two in this case.) Routines are provided for the following 20 | distributions. 21 | 22 | Beta 23 | Binomial 24 | Chi-square 25 | Noncentral Chi-square 26 | F 27 | Noncentral F 28 | Gamma 29 | Negative Binomial 30 | Normal 31 | Poisson 32 | Student's t 33 | Noncentral t 34 | CDFLIB90 is a translation of DCDFLIB into Fortran 95 with improvements. Compared to the older package, it offers an improved packaging, improved speed (a better monotone function inverter is used), and perhaps a minute amount of increased accuracy. 35 | 36 | Contact: Barry W. Brown 37 | 38 | Software developed by John Venier and Dan Serachitopol 39 | """ 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /src/cdflib90/.gitignore: -------------------------------------------------------------------------------- 1 | # backup files 2 | *~ 3 | *# 4 | 5 | # objects 6 | *.o 7 | *.a 8 | *.lo 9 | *.la 10 | lofreq_samtools 11 | 12 | Makefile 13 | Makefile.log 14 | .deps 15 | .libs 16 | -------------------------------------------------------------------------------- /src/cdflib90/Makefile.am: -------------------------------------------------------------------------------- 1 | AM_CFLAGS = -O2 -Wall -fPIC @AM_CFLAGS@ 2 | AM_LDFLAGS = -shared -fPIC @AM_LDFLAGS@ 3 | noinst_LIBRARIES = libcdf.a 4 | libcdf_a_SOURCES = cdflib.h dcdflib.c ipmpar.c 5 | EXTRA_DIST = readme 6 | -------------------------------------------------------------------------------- /src/cdflib90/cdflib.h: -------------------------------------------------------------------------------- 1 | double algdiv(double*,double*); 2 | double alngam(double*); 3 | double alnrel(double*); 4 | double apser(double*,double*,double*,double*); 5 | double basym(double*,double*,double*,double*); 6 | double bcorr(double*,double*); 7 | double betaln(double*,double*); 8 | double bfrac(double*,double*,double*,double*,double*,double*); 9 | void bgrat(double*,double*,double*,double*,double*,double*,int*i); 10 | double bpser(double*,double*,double*,double*); 11 | void bratio(double*,double*,double*,double*,double*,double*,int*); 12 | double brcmp1(int*,double*,double*,double*,double*); 13 | double brcomp(double*,double*,double*,double*); 14 | double bup(double*,double*,double*,double*,int*,double*); 15 | void cdfbet(int*,double*,double*,double*,double*,double*,double*, 16 | int*,double*); 17 | void cdfbin(int*,double*,double*,double*,double*,double*,double*, 18 | int*,double*); 19 | void cdfchi(int*,double*,double*,double*,double*,int*,double*); 20 | void cdfchn(int*,double*,double*,double*,double*,double*,int*,double*); 21 | void cdff(int*,double*,double*,double*,double*,double*,int*,double*); 22 | void cdffnc(int*,double*,double*,double*,double*,double*,double*, 23 | int*s,double*); 24 | void cdfgam(int*,double*,double*,double*,double*,double*,int*,double*); 25 | void cdfnbn(int*,double*,double*,double*,double*,double*,double*, 26 | int*,double*); 27 | void cdfnor(int*,double*,double*,double*,double*,double*,int*,double*); 28 | void cdfpoi(int*,double*,double*,double*,double*,int*,double*); 29 | void cdft(int*,double*,double*,double*,double*,int*,double*); 30 | void cdftnc(int*,double*,double*,double*,double*,double*,int*,double*); 31 | void cumbet(double*,double*,double*,double*,double*,double*); 32 | void cumbin(double*,double*,double*,double*,double*,double*); 33 | void cumchi(double*,double*,double*,double*); 34 | void cumchn(double*,double*,double*,double*,double*); 35 | void cumf(double*,double*,double*,double*,double*); 36 | void cumfnc(double*,double*,double*,double*,double*,double*); 37 | void cumgam(double*,double*,double*,double*); 38 | void cumnbn(double*,double*,double*,double*,double*,double*); 39 | void cumnor(double*,double*,double*); 40 | void cumpoi(double*,double*,double*,double*); 41 | void cumt(double*,double*,double*,double*); 42 | void cumtnc(double*,double*,double*,double*,double*); 43 | double devlpl(double [],int*,double*); 44 | double dinvnr(double *p,double *q); 45 | #ifdef AW_VOID_COMPILER_COMPLAINTS 46 | static void E0000(int,int*,double*,double*,unsigned long*, 47 | unsigned long*,double*,double*,double*, 48 | double*,double*,double*,double*); 49 | #endif 50 | void dinvr(int*,double*,double*,unsigned long*,unsigned long*); 51 | void dstinv(double*,double*,double*,double*,double*,double*, 52 | double*); 53 | double dt1(double*,double*,double*); 54 | #ifdef AW_AVOID_COMPILER_COMPLAINTS 55 | static void E0001(int,int*,double*,double*,double*,double*, 56 | unsigned long*,unsigned long*,double*,double*, 57 | double*,double*); 58 | #endif 59 | void dzror(int*,double*,double*,double*,double *, 60 | unsigned long*,unsigned long*); 61 | void dstzr(double *zxlo,double *zxhi,double *zabstl,double *zreltl); 62 | double erf1(double*); 63 | double erfc1(int*,double*); 64 | double esum(int*,double*); 65 | double exparg(int*); 66 | double fpser(double*,double*,double*,double*); 67 | double gam1(double*); 68 | void gaminv(double*,double*,double*,double*,double*,int*); 69 | double gamln(double*); 70 | double gamln1(double*); 71 | double Xgamm(double*); 72 | void grat1(double*,double*,double*,double*,double*,double*); 73 | void gratio(double*,double*,double*,double*,int*); 74 | double gsumln(double*,double*); 75 | double psi(double*); 76 | double rcomp(double*,double*); 77 | double rexp(double*); 78 | double rlog(double*); 79 | double rlog1(double*); 80 | double spmpar(int*); 81 | double stvaln(double*); 82 | double fifdint(double); 83 | double fifdmax1(double,double); 84 | double fifdmin1(double,double); 85 | double fifdsign(double,double); 86 | long fifidint(double); 87 | long fifmod(long,long); 88 | void ftnstop(char*); 89 | extern int ipmpar(int*); 90 | 91 | -------------------------------------------------------------------------------- /src/lofreq/.gitignore: -------------------------------------------------------------------------------- 1 | # backup files 2 | *~ 3 | *# 4 | 5 | # objects 6 | *.o 7 | *.a 8 | *.lo 9 | *.la 10 | lofreq 11 | lofreq2 12 | 13 | Makefile 14 | Makefile.log 15 | .deps 16 | .libs 17 | clang_output* 18 | 19 | schmock* -------------------------------------------------------------------------------- /src/lofreq/Makefile.am: -------------------------------------------------------------------------------- 1 | AM_CFLAGS = -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -Wall -O3 -I../cdflib90/ -I../uthash $(HTSLIB_CPPFLAGS) @AM_CFLAGS@ 2 | AM_LDFLAGS = $(LDFLAGS_for_htslib) @AM_LDFLAGS@ 3 | bin_PROGRAMS = lofreq 4 | lofreq_SOURCES = bam_md_ext.c bam_md_ext.h \ 5 | bedidx.c bam_index.c \ 6 | binom.c binom.h \ 7 | defaults.h \ 8 | fet.c fet.h \ 9 | kprobaln_ext.c kprobaln_ext.h \ 10 | log.c log.h \ 11 | lofreq_alnqual.c lofreq_alnqual.h \ 12 | lofreq_index.c lofreq_index.h \ 13 | lofreq_uniq.h lofreq_uniq.c \ 14 | lofreq_checkref.h lofreq_checkref.c \ 15 | lofreq_indelqual.h lofreq_indelqual.c \ 16 | lofreq_main.c \ 17 | lofreq_viterbi.c lofreq_viterbi.h \ 18 | lofreq_vcfset.c lofreq_vcfset.h \ 19 | lofreq_filter.c lofreq_filter.h \ 20 | lofreq_call.c lofreq_call.h \ 21 | multtest.c multtest.h \ 22 | plp.c plp.h \ 23 | samutils.h samutils.c \ 24 | snpcaller.h snpcaller.c \ 25 | utils.c utils.h \ 26 | vcf.c vcf.h \ 27 | viterbi.c viterbi.h 28 | #lofreq_bamstats.h lofreq_bamstats.c 29 | 30 | if HTSLIB_IS_SRC 31 | 32 | include $(HTSDIR)/htslib_static.mk 33 | LDFLAGS_for_htslib = $(HTSLIB_static_LDFLAGS) 34 | LIBS_for_htslib = $(HTSDIR)/libhts.a $(HTSLIB_static_LIBS) 35 | 36 | else 37 | 38 | LDFLAGS_for_htslib = $(HTSLIB_LDFLAGS) 39 | LIBS_for_htslib = -lhts 40 | 41 | endif 42 | 43 | # note: order matters 44 | lofreq_LDADD = $(LIBS_for_htslib) ../cdflib90/libcdf.a 45 | # -l:libgsl.a -lm 46 | -------------------------------------------------------------------------------- /src/lofreq/bam_index.c: -------------------------------------------------------------------------------- 1 | /* bam_index.c -- index and idxstats subcommands. 2 | 3 | Copyright (C) 2008-2011, 2013, 2014 Genome Research Ltd. 4 | Portions copyright (C) 2010 Broad Institute. 5 | Portions copyright (C) 2013 Peter Cock, The James Hutton Institute. 6 | 7 | Author: Heng Li 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to deal 11 | in the Software without restriction, including without limitation the rights 12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | copies of the Software, and to permit persons to whom the Software is 14 | furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notices and this permission notice shall be included in 17 | all copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 | DEALINGS IN THE SOFTWARE. */ 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #define __STDC_FORMAT_MACROS 33 | #include 34 | #include 35 | 36 | #define BAM_LIDX_SHIFT 14 37 | 38 | int bam_index_build2(const char *fn, const char *_fnidx) 39 | { 40 | fprintf(stderr, "Samtools-htslib-API: bam_index_build2() not yet implemented\n"); 41 | abort(); 42 | } 43 | 44 | static void index_usage(FILE *fp) 45 | { 46 | fprintf(fp, 47 | "Usage: samtools index [-bc] [-m INT] [out.index]\n" 48 | "Options:\n" 49 | " -b Generate BAI-format index for BAM files [default]\n" 50 | " -c Generate CSI-format index for BAM files\n" 51 | " -m INT Set minimum interval size for CSI indices to 2^INT [%d]\n", BAM_LIDX_SHIFT); 52 | } 53 | 54 | int bam_index(int argc, char *argv[]) 55 | { 56 | int csi = 0; 57 | int min_shift = BAM_LIDX_SHIFT; 58 | int c; 59 | 60 | while ((c = getopt(argc, argv, "bcm:")) >= 0) 61 | switch (c) { 62 | case 'b': csi = 0; break; 63 | case 'c': csi = 1; break; 64 | case 'm': csi = 1; min_shift = atoi(optarg); break; 65 | default: 66 | index_usage(stderr); 67 | return 1; 68 | } 69 | 70 | if (optind == argc) { 71 | index_usage(stdout); 72 | return 1; 73 | } 74 | if (argc - optind > 1) bam_index_build2(argv[optind], argv[optind+1]); 75 | else bam_index_build(argv[optind], csi? min_shift : 0); 76 | return 0; 77 | } 78 | 79 | int bam_idxstats(int argc, char *argv[]) 80 | { 81 | hts_idx_t* idx; 82 | bam_hdr_t* header; 83 | samFile* fp; 84 | 85 | if (argc < 2) { 86 | fprintf(stderr, "Usage: samtools idxstats \n"); 87 | return 1; 88 | } 89 | fp = sam_open(argv[1], "r"); 90 | if (fp == NULL) { fprintf(stderr, "[%s] fail to open BAM.\n", __func__); return 1; } 91 | header = sam_hdr_read(fp); 92 | idx = sam_index_load(fp, argv[1]); 93 | if (idx == NULL) { fprintf(stderr, "[%s] fail to load the index.\n", __func__); return 1; } 94 | 95 | int i; 96 | for (i = 0; i < header->n_targets; ++i) { 97 | // Print out contig name and length 98 | printf("%s\t%d", header->target_name[i], header->target_len[i]); 99 | // Now fetch info about it from the meta bin 100 | uint64_t u, v; 101 | hts_idx_get_stat(idx, i, &u, &v); 102 | printf("\t%" PRIu64 "\t%" PRIu64 "\n", u, v); 103 | } 104 | // Dump information about unmapped reads 105 | printf("*\t0\t0\t%" PRIu64 "\n", hts_idx_get_n_no_coor(idx)); 106 | bam_hdr_destroy(header); 107 | hts_idx_destroy(idx); 108 | sam_close(fp); 109 | return 0; 110 | } 111 | -------------------------------------------------------------------------------- /src/lofreq/bam_index.c.LICENSE: -------------------------------------------------------------------------------- 1 | Unchanged copy of corresponding file in samtools 1.1. 2 | 3 | --- 4 | 5 | The MIT/Expat License 6 | 7 | Copyright (C) 2008-2014 Genome Research Ltd. 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to deal 11 | in the Software without restriction, including without limitation the rights 12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | copies of the Software, and to permit persons to whom the Software is 14 | furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in 17 | all copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 | DEALINGS IN THE SOFTWARE. 26 | 27 | 28 | [The use of a range of years within a copyright notice in this distribution 29 | should be interpreted as being equivalent to a list of years including the 30 | first and last year specified and all consecutive years between them. 31 | 32 | For example, a copyright notice that reads "Copyright (C) 2005, 2007-2009, 33 | 2011-2012" should be interpreted as being identical to a notice that reads 34 | "Copyright (C) 2005, 2007, 2008, 2009, 2011, 2012" and a copyright notice 35 | that reads "Copyright (C) 2005-2012" should be interpreted as being identical 36 | to a notice that reads "Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 37 | 2011, 2012".] 38 | -------------------------------------------------------------------------------- /src/lofreq/bam_index.c.README: -------------------------------------------------------------------------------- 1 | copied from samtools-1.1 2 | needed because idxstats is not part of the samtools API yet 3 | -------------------------------------------------------------------------------- /src/lofreq/bam_md_ext.h: -------------------------------------------------------------------------------- 1 | /* The MIT License 2 | 3 | Copyright (c) 2003-2006, 2008, 2009 by Heng Li 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 20 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 21 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | */ 25 | 26 | #ifndef BAM_MD_EXT_H 27 | #define BAM_MD_EXT_H 28 | 29 | 30 | int bam_prob_realn_core_ext(bam1_t *b, const char *ref, 31 | int baq_flag, int ext_baq, int idaq_flag); 32 | 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /src/lofreq/bedidx.c.LICENSE: -------------------------------------------------------------------------------- 1 | Unchanged copy of corresponding file in samtools 1.1. 2 | 3 | --- 4 | 5 | The MIT/Expat License 6 | 7 | Copyright (C) 2008-2014 Genome Research Ltd. 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to deal 11 | in the Software without restriction, including without limitation the rights 12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | copies of the Software, and to permit persons to whom the Software is 14 | furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in 17 | all copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 | DEALINGS IN THE SOFTWARE. 26 | 27 | 28 | [The use of a range of years within a copyright notice in this distribution 29 | should be interpreted as being equivalent to a list of years including the 30 | first and last year specified and all consecutive years between them. 31 | 32 | For example, a copyright notice that reads "Copyright (C) 2005, 2007-2009, 33 | 2011-2012" should be interpreted as being identical to a notice that reads 34 | "Copyright (C) 2005, 2007, 2008, 2009, 2011, 2012" and a copyright notice 35 | that reads "Copyright (C) 2005-2012" should be interpreted as being identical 36 | to a notice that reads "Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 37 | 2011, 2012".] 38 | -------------------------------------------------------------------------------- /src/lofreq/binom.c: -------------------------------------------------------------------------------- 1 | /* -*- mode: c; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */ 2 | /********************************************************************* 3 | * The MIT License (MIT) 4 | * 5 | * Copyright (c) 2013,2014 Genome Institute of Singapore 6 | * 7 | * Permission is hereby granted, free of charge, to any person 8 | * obtaining a copy of this software and associated documentation files 9 | * (the "Software"), to deal in the Software without restriction, 10 | * including without limitation the rights to use, copy, modify, merge, 11 | * publish, distribute, sublicense, and/or sell copies of the Software, 12 | * and to permit persons to whom the Software is furnished to do so, 13 | * subject to the following conditions: 14 | * 15 | * The above copyright notice and this permission notice shall be 16 | * included in all copies or substantial portions of the Software. 17 | * 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 21 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 22 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 23 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 24 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | * SOFTWARE. 26 | * 27 | ************************************************************************/ 28 | 29 | 30 | 31 | #include 32 | #include 33 | 34 | #include "cdflib.h" 35 | #include "binom.h" 36 | 37 | 38 | 39 | #define MIN(X,Y) ((X) < (Y) ? (X) : (Y)) 40 | 41 | 42 | 43 | /** 44 | * @brief Compute cdf and sf 45 | * 46 | * P is the cdf evaluated at X, Q is the compliment of the cdf 47 | * evaluated at X, i.e. 1-P (AKA sf) 48 | * 49 | * Returns non-zero status on failure 50 | * 51 | */ 52 | int binom(double *p, double *q, 53 | int num_trials, int num_success, double prob_success) 54 | { 55 | int which=1; 56 | int status=1; /* error by default */ 57 | double ompr = 1.0 - prob_success; 58 | double bound; 59 | double q2, p2; 60 | 61 | double s = (double)num_success; 62 | double xn = (double)num_trials; 63 | double pr = (double)prob_success; 64 | 65 | /* P is always the cdf evaluated at X, Q is always the compliment of the 66 | cdf evaluated at X, i.e. 1-P, and X is always the value at which the 67 | cdf is evaluated. */ 68 | 69 | (void) cdfbin(&which, p?p:&p2, q?q:&q2, 70 | &s, &xn, &pr, &ompr, 71 | &status, &bound); 72 | 73 | #ifdef DEBUG 74 | 75 | fprintf(stderr, "DEBUG(%s:%s:%d): in num_success = %d\n", 76 | __FILE__, __FUNCTION__, __LINE__, num_success); 77 | fprintf(stderr, "DEBUG(%s:%s:%d): in num_trials = %d\n", 78 | __FILE__, __FUNCTION__, __LINE__, num_trials); 79 | fprintf(stderr, "DEBUG(%s:%s:%d): in pr = %g\n", 80 | __FILE__, __FUNCTION__, __LINE__, prob_success); 81 | fprintf(stderr, "DEBUG(%s:%s:%d): out p=%g\n", 82 | __FILE__, __FUNCTION__, __LINE__, p?*p:p2); 83 | fprintf(stderr, "DEBUG(%s:%s:%d): out q=%g\n", 84 | __FILE__, __FUNCTION__, __LINE__, q?*q:q2); 85 | fprintf(stderr, "DEBUG(%s:%s:%d): out status=%d\n", 86 | __FILE__, __FUNCTION__, __LINE__, status); 87 | fprintf(stderr, "DEBUG(%s:%s:%d): out bound=%g\n", 88 | __FILE__, __FUNCTION__, __LINE__, bound); 89 | #endif 90 | 91 | return status; 92 | } 93 | /* end of binom */ 94 | 95 | 96 | 97 | #ifdef BINOM_MAIN 98 | 99 | 100 | /* 101 | gcc -pedantic -Wall -g -std=gnu99 -O2 -DBINOM_MAIN -I../cdflib90/ -o binom binom.c utils.c log.c ../cdflib90/libcdf.a -lm 102 | */ 103 | #include 104 | #include "log.h" 105 | 106 | int main(int argc, char *argv[]) { 107 | int num_success; 108 | int num_trials; 109 | double prob_success; 110 | double sf_pvalue; 111 | double cdf_pvalue; 112 | 113 | if (argc<4) { 114 | fprintf(stderr, "need num_success num_trials and prob_success as args"); 115 | return -1; 116 | } 117 | 118 | num_success = atoi(argv[1]); 119 | num_trials = atoi(argv[2]); 120 | prob_success = atof(argv[3]); 121 | 122 | 123 | fprintf(stdout, "num_success=%d num_trials=%d prob_success=%f\n", num_success, num_trials, prob_success); 124 | if (0 != binom(&cdf_pvalue, &sf_pvalue, num_trials, num_success, prob_success)) { 125 | fprintf(stderr, "%s\n", "binom() failed"); 126 | return EXIT_FAILURE; 127 | } 128 | 129 | printf("sf: %g\tcdf: %g\n", sf_pvalue, cdf_pvalue); 130 | 131 | printf("sf should be identical to scipy.stats.binom.sf(%d, %d, %f)\n", num_success, num_trials, prob_success); 132 | printf("cdf should be identical to scipy.stats.binom.cdf(%d, %d, %f)\n", num_success, num_trials, prob_success); 133 | return EXIT_SUCCESS; 134 | } 135 | #endif 136 | -------------------------------------------------------------------------------- /src/lofreq/binom.h: -------------------------------------------------------------------------------- 1 | /* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */ 2 | /********************************************************************* 3 | * The MIT License (MIT) 4 | * 5 | * Copyright (c) 2013,2014 Genome Institute of Singapore 6 | * 7 | * Permission is hereby granted, free of charge, to any person 8 | * obtaining a copy of this software and associated documentation files 9 | * (the "Software"), to deal in the Software without restriction, 10 | * including without limitation the rights to use, copy, modify, merge, 11 | * publish, distribute, sublicense, and/or sell copies of the Software, 12 | * and to permit persons to whom the Software is furnished to do so, 13 | * subject to the following conditions: 14 | * 15 | * The above copyright notice and this permission notice shall be 16 | * included in all copies or substantial portions of the Software. 17 | * 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 21 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 22 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 23 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 24 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | * SOFTWARE. 26 | * 27 | ************************************************************************/ 28 | 29 | #ifndef BINOM_H 30 | #define BINOM_H 31 | 32 | int binom(double *q, double *p, 33 | int num_trials, int num_successes, double prob_success); 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /src/lofreq/defaults.h: -------------------------------------------------------------------------------- 1 | #ifndef LOFREQ_DEFAULTS_H 2 | #define LOFREQ_DEFAULTS_H 3 | /********************************************************************* 4 | * The MIT License (MIT) 5 | * 6 | * Copyright (c) 2013,2014 Genome Institute of Singapore 7 | * 8 | * Permission is hereby granted, free of charge, to any person 9 | * obtaining a copy of this software and associated documentation files 10 | * (the "Software"), to deal in the Software without restriction, 11 | * including without limitation the rights to use, copy, modify, merge, 12 | * publish, distribute, sublicense, and/or sell copies of the Software, 13 | * and to permit persons to whom the Software is furnished to do so, 14 | * subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be 17 | * included in all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 22 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 23 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 24 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 25 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 26 | * SOFTWARE. 27 | * 28 | ************************************************************************/ 29 | 30 | 31 | 32 | #define SANGER_PHRED_MAX 93 33 | 34 | /* mapping quality filters: applied to all reads. don't set too high as 35 | * this is a mapper dependent value 36 | * in case of BWA it's also dependent on the alignment command used. 37 | */ 38 | #define DEFAULT_MIN_MQ 0 39 | #define DEFAULT_MAX_MQ 255 40 | 41 | /* minimum base quality of any base below which they are skipped. 42 | note: GATK doesn't recalibrate BQ <=5 */ 43 | #define DEFAULT_MIN_BQ 6 44 | /* minimum base quality for alt bases: below and they are skipped */ 45 | #define DEFAULT_MIN_ALT_BQ 6 46 | #define DEFAULT_DEF_ALT_BQ 0 47 | /* -1: ref median, 0: keep original, >0: replace with this value */ 48 | 49 | #define DEFAULT_MIN_JQ 0 50 | /* minimum merged quality for alt bases */ 51 | #define DEFAULT_MIN_ALT_JQ 0 52 | #define DEFAULT_DEF_ALT_JQ 0 53 | /* -1: ref median, 0: keep original, >0: replace with this value */ 54 | 55 | /* non match quality for source qual */ 56 | #define DEFAULT_DEF_NM_QUAL -1 57 | 58 | /* coverage thresholds */ 59 | #define DEFAULT_MIN_COV 1 60 | #define DEFAULT_MAX_PLP_DEPTH 1000000 61 | 62 | #define DEFAULT_BAQ_ON 1 63 | 64 | /* make lofreq blind to anything below this value */ 65 | #define DEFAULT_MIN_PLP_BQ 3 66 | #define DEFAULT_MIN_PLP_IDQ 0 67 | 68 | #define DEFAULT_SIG 0.01 69 | 70 | /* ---------------------------------------------------------------------- */ 71 | 72 | /* Four nucleotides, with one consensus, makes three 73 | non-consensus bases */ 74 | #define NUM_NONCONS_BASES 3 75 | 76 | #define VARCALL_USE_BAQ 1 77 | #define VARCALL_USE_MQ 2 78 | #define VARCALL_USE_SQ 4 79 | /* indel alignment quality */ 80 | #define VARCALL_USE_IDAQ 8 81 | 82 | 83 | /* private tag for actual baq values: "l"ofreseq "b"ase-alignment */ 84 | #define BAQ_TAG "lb" 85 | 86 | 87 | #ifndef MIN 88 | #define MIN(X,Y) ((X) < (Y) ? (X) : (Y)) 89 | #endif 90 | #ifndef MAX 91 | #define MAX(X,Y) ((X) > (Y) ? (X) : (Y)) 92 | #endif 93 | 94 | 95 | #define AI_TAG "ai" 96 | #define AD_TAG "ad" 97 | 98 | /* base insertion and deletion qualities. GATK uses BI and BD. 99 | * GATKs BI & BD: "are per-base quantities which estimate 100 | * the probability that the next base in the read was 101 | * mis-incorporated or mis-deleted (due to slippage, for 102 | * example)". See 103 | * http://www.broadinstitute.org/gatk/guide/article?id=44 104 | * and 105 | * http2://gatkforums.broadinstitute.org/discussion/1619/baserecalibratorprintreads-bd-and-bi-flags 106 | * 107 | */ 108 | #define BI_TAG "BI" 109 | #define BD_TAG "BD" 110 | 111 | #endif 112 | -------------------------------------------------------------------------------- /src/lofreq/fet.c: -------------------------------------------------------------------------------- 1 | /* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */ 2 | /* Taken from samtools 0.1.18 (r982:295) */ 3 | 4 | #include 5 | #include 6 | 7 | /* This program is implemented with ideas from this web page: 8 | * 9 | * http://www.langsrud.com/fisher.htm 10 | */ 11 | 12 | /* log\binom{n}{k} */ 13 | static double lbinom(int n, int k) 14 | { 15 | if (k == 0 || n == k) return 0; 16 | return lgamma(n+1) - lgamma(k+1) - lgamma(n-k+1); 17 | } 18 | 19 | /* n11 n12 | n1_ 20 | n21 n22 | n2_ 21 | -----------+---- 22 | n_1 n_2 | n 23 | */ 24 | 25 | /* hypergeometric distribution */ 26 | static double hypergeo(int n11, int n1_, int n_1, int n) 27 | { 28 | return exp(lbinom(n1_, n11) + lbinom(n-n1_, n_1-n11) - lbinom(n, n_1)); 29 | } 30 | 31 | typedef struct { 32 | int n11, n1_, n_1, n; 33 | double p; 34 | } hgacc_t; 35 | 36 | /* incremental version of hypergenometric distribution */ 37 | static double hypergeo_acc(int n11, int n1_, int n_1, int n, hgacc_t *aux) 38 | { 39 | if (n1_ || n_1 || n) { 40 | aux->n11 = n11; aux->n1_ = n1_; aux->n_1 = n_1; aux->n = n; 41 | } else { /* then only n11 changed; the rest fixed */ 42 | if (n11%11 && n11 + aux->n - aux->n1_ - aux->n_1) { 43 | if (n11 == aux->n11 + 1) { /* incremental */ 44 | aux->p *= (double)(aux->n1_ - aux->n11) / n11 45 | * (aux->n_1 - aux->n11) / (n11 + aux->n - aux->n1_ - aux->n_1); 46 | aux->n11 = n11; 47 | return aux->p; 48 | } 49 | if (n11 == aux->n11 - 1) { /* incremental */ 50 | aux->p *= (double)aux->n11 / (aux->n1_ - n11) 51 | * (aux->n11 + aux->n - aux->n1_ - aux->n_1) / (aux->n_1 - n11); 52 | aux->n11 = n11; 53 | return aux->p; 54 | } 55 | } 56 | aux->n11 = n11; 57 | } 58 | aux->p = hypergeo(aux->n11, aux->n1_, aux->n_1, aux->n); 59 | return aux->p; 60 | } 61 | 62 | double kt_fisher_exact(int n11, int n12, int n21, int n22, double *_left, double *_right, double *two) 63 | { 64 | int i, j, max, min; 65 | double p, q, left, right; 66 | hgacc_t aux; 67 | int n1_, n_1, n; 68 | 69 | n1_ = n11 + n12; n_1 = n11 + n21; n = n11 + n12 + n21 + n22; /* calculate n1_, n_1 and n */ 70 | max = (n_1 < n1_) ? n_1 : n1_; /* max n11, for right tail */ 71 | min = n1_ + n_1 - n; 72 | if (min < 0) min = 0; /* min n11, for left tail */ 73 | *two = *_left = *_right = 1.; 74 | if (min == max) return 1.; /* no need to do test */ 75 | q = hypergeo_acc(n11, n1_, n_1, n, &aux); /* the probability of the current table */ 76 | /* left tail */ 77 | p = hypergeo_acc(min, 0, 0, 0, &aux); 78 | for (left = 0., i = min + 1; p < 0.99999999 * q; ++i) /* loop until underflow */ 79 | left += p, p = hypergeo_acc(i, 0, 0, 0, &aux); 80 | --i; 81 | if (p < 1.00000001 * q) left += p; 82 | else --i; 83 | /* right tail */ 84 | p = hypergeo_acc(max, 0, 0, 0, &aux); 85 | for (right = 0., j = max - 1; p < 0.99999999 * q; --j) /* loop until underflow */ 86 | right += p, p = hypergeo_acc(j, 0, 0, 0, &aux); 87 | ++j; 88 | if (p < 1.00000001 * q) right += p; 89 | else ++j; 90 | /* two-tail */ 91 | *two = left + right; 92 | if (*two > 1.) *two = 1.; 93 | /* adjust left and right */ 94 | if (abs(i - n11) < abs(j - n11)) right = 1. - left + q; 95 | else left = 1.0 - right + q; 96 | *_left = left; *_right = right; 97 | return q; 98 | } 99 | 100 | #ifdef FET_MAIN 101 | #include 102 | 103 | int main(int argc, char *argv[]) 104 | { 105 | char id[1024]; 106 | int n11, n12, n21, n22; 107 | double left, right, twotail, prob; 108 | 109 | while (scanf("%s%d%d%d%d", id, &n11, &n12, &n21, &n22) == 5) { 110 | prob = kt_fisher_exact(n11, n12, n21, n22, &left, &right, &twotail); 111 | printf("%s\t%d\t%d\t%d\t%d\t%.6g\t%.6g\t%.6g\t%.6g\n", id, n11, n12, n21, n22, 112 | prob, left, right, twotail); 113 | } 114 | return 0; 115 | } 116 | #endif 117 | -------------------------------------------------------------------------------- /src/lofreq/fet.c.LICENSE: -------------------------------------------------------------------------------- 1 | Copied from from samtools 0.1.18 (r982:295) 2 | 3 | --- 4 | 5 | The MIT License 6 | 7 | Copyright (c) 2008-2009 Genome Research Ltd. 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to deal 11 | in the Software without restriction, including without limitation the rights 12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | copies of the Software, and to permit persons to whom the Software is 14 | furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in 17 | all copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | THE SOFTWARE. -------------------------------------------------------------------------------- /src/lofreq/fet.h: -------------------------------------------------------------------------------- 1 | #ifndef FET_H 2 | #define FET_H 3 | 4 | double kt_fisher_exact(int n11, int n12, int n21, int n22, double *_left, double *_right, double *two); 5 | 6 | #endif 7 | -------------------------------------------------------------------------------- /src/lofreq/kprobaln_ext.h: -------------------------------------------------------------------------------- 1 | /* The MIT License 2 | 3 | Copyright (c) 2003-2006, 2008, 2009 by Heng Li 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 20 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 21 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | */ 25 | 26 | #ifndef LH3_KPROBALN_EXT_H_ 27 | #define LH3_KPROBALN_EXT_H_ 28 | 29 | #include 30 | 31 | typedef struct { 32 | float d, e; 33 | int bw; 34 | } kpa_ext_par_t; 35 | 36 | #ifdef __cplusplus 37 | extern "C" { 38 | #endif 39 | 40 | int kpa_ext_glocal(const uint8_t *_ref, int l_ref, const uint8_t *_query, int l_query, 41 | const uint8_t *iqual, const kpa_ext_par_t *c, int *state, uint8_t *q, double **pd, 42 | int *ret_bw); 43 | 44 | #ifdef __cplusplus 45 | } 46 | #endif 47 | 48 | extern kpa_ext_par_t kpa_ext_par_def, kpa_ext_par_alt, kpa_ext_par_lofreq_illumina, kpa_ext_par_lofreq_pacbio; 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /src/lofreq/lofreq_alnqual.h: -------------------------------------------------------------------------------- 1 | /********************************************************************* 2 | * The MIT License (MIT) 3 | * 4 | * Copyright (c) 2013,2014 Genome Institute of Singapore 5 | * 6 | * Permission is hereby granted, free of charge, to any person 7 | * obtaining a copy of this software and associated documentation files 8 | * (the "Software"), to deal in the Software without restriction, 9 | * including without limitation the rights to use, copy, modify, merge, 10 | * publish, distribute, sublicense, and/or sell copies of the Software, 11 | * and to permit persons to whom the Software is furnished to do so, 12 | * subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 21 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 22 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | * SOFTWARE. 25 | * 26 | ************************************************************************/ 27 | 28 | 29 | #ifndef LOFREQ_ALNQUAL_H 30 | #define LOFREQ_ALNQUAL_H 31 | 32 | /* funcion prototypes here */ 33 | int main_alnqual(int argc, char *argv[]); 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /src/lofreq/lofreq_bamstats.h: -------------------------------------------------------------------------------- 1 | /********************************************************************* 2 | * The MIT License (MIT) 3 | * 4 | * Copyright (c) 2013,2014 Genome Institute of Singapore 5 | * 6 | * Permission is hereby granted, free of charge, to any person 7 | * obtaining a copy of this software and associated documentation files 8 | * (the "Software"), to deal in the Software without restriction, 9 | * including without limitation the rights to use, copy, modify, merge, 10 | * publish, distribute, sublicense, and/or sell copies of the Software, 11 | * and to permit persons to whom the Software is furnished to do so, 12 | * subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 21 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 22 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | * SOFTWARE. 25 | * 26 | ************************************************************************/ 27 | 28 | 29 | #ifndef LOFREQ_BAMSTATS_H 30 | #define LOFREQ_BAMSTATS_H 31 | 32 | int main_bamstats(int argc, char *argv[]); 33 | 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /src/lofreq/lofreq_call.h: -------------------------------------------------------------------------------- 1 | /********************************************************************* 2 | * The MIT License (MIT) 3 | * 4 | * Copyright (c) 2013,2014 Genome Institute of Singapore 5 | * 6 | * Permission is hereby granted, free of charge, to any person 7 | * obtaining a copy of this software and associated documentation files 8 | * (the "Software"), to deal in the Software without restriction, 9 | * including without limitation the rights to use, copy, modify, merge, 10 | * publish, distribute, sublicense, and/or sell copies of the Software, 11 | * and to permit persons to whom the Software is furnished to do so, 12 | * subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 21 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 22 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | * SOFTWARE. 25 | * 26 | ************************************************************************/ 27 | 28 | #ifndef LOFREQ_CALL_H 29 | #define LOFREQ_CALL_H 30 | 31 | int main_call(int argc, char *argv[]); 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /src/lofreq/lofreq_checkref.c: -------------------------------------------------------------------------------- 1 | /* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */ 2 | /********************************************************************* 3 | * The MIT License (MIT) 4 | * 5 | * Copyright (c) 2013,2014 Genome Institute of Singapore 6 | * 7 | * Permission is hereby granted, free of charge, to any person 8 | * obtaining a copy of this software and associated documentation files 9 | * (the "Software"), to deal in the Software without restriction, 10 | * including without limitation the rights to use, copy, modify, merge, 11 | * publish, distribute, sublicense, and/or sell copies of the Software, 12 | * and to permit persons to whom the Software is furnished to do so, 13 | * subject to the following conditions: 14 | * 15 | * The above copyright notice and this permission notice shall be 16 | * included in all copies or substantial portions of the Software. 17 | * 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 21 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 22 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 23 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 24 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | * SOFTWARE. 26 | * 27 | ************************************************************************/ 28 | 29 | 30 | /* This is an almost one to one copy of the corresponding bits in 31 | * samtools' bam_index.c */ 32 | 33 | #include 34 | #include 35 | 36 | 37 | /* lofreq includes */ 38 | #include "log.h" 39 | #include "utils.h" 40 | #include "samutils.h" 41 | 42 | #define MYNAME "lofreq checkref" 43 | 44 | static void 45 | usage() 46 | { 47 | fprintf(stderr, 48 | "\n%s: Check whether given BAM file was created with given reference\n\n", MYNAME); 49 | fprintf(stderr,"Usage: %s ref.fa in.bam\n\n", MYNAME); 50 | } 51 | 52 | 53 | int main_checkref(int argc, char *argv[]) 54 | { 55 | char *bam_file; 56 | char *fasta_file; 57 | 58 | if (argc != 4) { 59 | usage(); 60 | return 1; 61 | } 62 | 63 | /* get bam file argument 64 | */ 65 | fasta_file = argv[2]; 66 | bam_file = argv[3]; 67 | 68 | if (checkref(fasta_file, bam_file)) { 69 | printf("Failed\n"); 70 | return 1; 71 | } else { 72 | printf("OK\n"); 73 | return 0; 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/lofreq/lofreq_checkref.h: -------------------------------------------------------------------------------- 1 | /********************************************************************* 2 | * The MIT License (MIT) 3 | * 4 | * Copyright (c) 2013,2014 Genome Institute of Singapore 5 | * 6 | * Permission is hereby granted, free of charge, to any person 7 | * obtaining a copy of this software and associated documentation files 8 | * (the "Software"), to deal in the Software without restriction, 9 | * including without limitation the rights to use, copy, modify, merge, 10 | * publish, distribute, sublicense, and/or sell copies of the Software, 11 | * and to permit persons to whom the Software is furnished to do so, 12 | * subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 21 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 22 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | * SOFTWARE. 25 | * 26 | ************************************************************************/ 27 | 28 | #ifndef LOFREQ_CHECKREF_H 29 | #define LOFREQ_CHECKREF_H 30 | 31 | int main_checkref(int argc, char *argv[]); 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /src/lofreq/lofreq_filter.h: -------------------------------------------------------------------------------- 1 | /********************************************************************* 2 | * The MIT License (MIT) 3 | * 4 | * Copyright (c) 2013,2014 Genome Institute of Singapore 5 | * 6 | * Permission is hereby granted, free of charge, to any person 7 | * obtaining a copy of this software and associated documentation files 8 | * (the "Software"), to deal in the Software without restriction, 9 | * including without limitation the rights to use, copy, modify, merge, 10 | * publish, distribute, sublicense, and/or sell copies of the Software, 11 | * and to permit persons to whom the Software is furnished to do so, 12 | * subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 21 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 22 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | * SOFTWARE. 25 | * 26 | ************************************************************************/ 27 | 28 | #ifndef LOFREQ_FILTER_H 29 | #define LOFREQ_FILTER_H 30 | 31 | int main_filter(int argc, char *argv[]); 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /src/lofreq/lofreq_indelqual.h: -------------------------------------------------------------------------------- 1 | /********************************************************************* 2 | * The MIT License (MIT) 3 | * 4 | * Copyright (c) 2013,2014 Genome Institute of Singapore 5 | * 6 | * Permission is hereby granted, free of charge, to any person 7 | * obtaining a copy of this software and associated documentation files 8 | * (the "Software"), to deal in the Software without restriction, 9 | * including without limitation the rights to use, copy, modify, merge, 10 | * publish, distribute, sublicense, and/or sell copies of the Software, 11 | * and to permit persons to whom the Software is furnished to do so, 12 | * subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 21 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 22 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | * SOFTWARE. 25 | * 26 | ************************************************************************/ 27 | 28 | #ifndef LOFREQ_INDELQUAL 29 | #define LOFREQ_INDELQUAL 30 | 31 | int main_indelqual(int argc, char *argv[]); 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /src/lofreq/lofreq_index.c: -------------------------------------------------------------------------------- 1 | /* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */ 2 | 3 | /* This is an almost one to one copy of the corresponding bits in samtools */ 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | /* htslib includes */ 10 | #include "htslib/faidx.h" 11 | #include "htslib/sam.h" 12 | 13 | /* bam_index actually part of API but bam_idxstats not */ 14 | int bam_index(int argc, char *argv[]); 15 | int bam_idxstats(int argc, char *argv[]); 16 | 17 | /* lofreq includes */ 18 | #include "log.h" 19 | 20 | 21 | #if 1 22 | #define MYNAME "lofreq" 23 | #else 24 | #define MYNAME PACKAGE 25 | #endif 26 | 27 | 28 | int main_faidx(int argc, char *argv[]) 29 | { 30 | char *fa; 31 | 32 | fa = argv[2]; 33 | if (fai_build(fa) < 0) { 34 | return 1; 35 | } 36 | 37 | return 0; 38 | } 39 | 40 | int 41 | main_index(int argc, char *argv[]) 42 | { 43 | char *b = argv[2]; 44 | return sam_index_build(b, 0); 45 | } 46 | 47 | int 48 | main_idxstats(int argc, char *argv[]) 49 | { 50 | return bam_idxstats(argc-1, argv+1); 51 | } 52 | -------------------------------------------------------------------------------- /src/lofreq/lofreq_index.h: -------------------------------------------------------------------------------- 1 | #ifndef LOFREQ_INDEX_H 2 | #define LOFREQ_INDEX_H 3 | 4 | int main_faidx(int argc, char *argv[]); 5 | int main_index(int argc, char *argv[]); 6 | int main_idxstats(int argc, char *argv[]); 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /src/lofreq/lofreq_uniq.h: -------------------------------------------------------------------------------- 1 | /********************************************************************* 2 | * The MIT License (MIT) 3 | * 4 | * Copyright (c) 2013,2014 Genome Institute of Singapore 5 | * 6 | * Permission is hereby granted, free of charge, to any person 7 | * obtaining a copy of this software and associated documentation files 8 | * (the "Software"), to deal in the Software without restriction, 9 | * including without limitation the rights to use, copy, modify, merge, 10 | * publish, distribute, sublicense, and/or sell copies of the Software, 11 | * and to permit persons to whom the Software is furnished to do so, 12 | * subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 21 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 22 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | * SOFTWARE. 25 | * 26 | ************************************************************************/ 27 | 28 | #ifndef LOFREQ_UNIQ_H 29 | #define LOFREQ_UNIQ_H 30 | 31 | int main_uniq(int argc, char *argv[]); 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /src/lofreq/lofreq_vcfset.h: -------------------------------------------------------------------------------- 1 | /********************************************************************* 2 | * The MIT License (MIT) 3 | * 4 | * Copyright (c) 2013,2014 Genome Institute of Singapore 5 | * 6 | * Permission is hereby granted, free of charge, to any person 7 | * obtaining a copy of this software and associated documentation files 8 | * (the "Software"), to deal in the Software without restriction, 9 | * including without limitation the rights to use, copy, modify, merge, 10 | * publish, distribute, sublicense, and/or sell copies of the Software, 11 | * and to permit persons to whom the Software is furnished to do so, 12 | * subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 21 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 22 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | * SOFTWARE. 25 | * 26 | ************************************************************************/ 27 | 28 | #ifndef LOFREQ_VCFSET_H 29 | #define LOFREQ_VCFSET_H 30 | 31 | int main_vcfset(int argc, char *argv[]); 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /src/lofreq/lofreq_viterbi.h: -------------------------------------------------------------------------------- 1 | /********************************************************************* 2 | * The MIT License (MIT) 3 | * 4 | * Copyright (c) 2013,2014 Genome Institute of Singapore 5 | * 6 | * Permission is hereby granted, free of charge, to any person 7 | * obtaining a copy of this software and associated documentation files 8 | * (the "Software"), to deal in the Software without restriction, 9 | * including without limitation the rights to use, copy, modify, merge, 10 | * publish, distribute, sublicense, and/or sell copies of the Software, 11 | * and to permit persons to whom the Software is furnished to do so, 12 | * subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 21 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 22 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | * SOFTWARE. 25 | * 26 | ************************************************************************/ 27 | 28 | 29 | #ifndef LOFREQ_VITERBI_FILE 30 | #define LOFREQ_VITERBI_FILE 31 | 32 | /* funcion prototypes here */ 33 | int main_viterbi(int argc, char *argv[]); 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /src/lofreq/log.c: -------------------------------------------------------------------------------- 1 | /* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */ 2 | /********************************************************************* 3 | * The MIT License (MIT) 4 | * 5 | * Copyright (c) 2013,2014 Genome Institute of Singapore 6 | * 7 | * Permission is hereby granted, free of charge, to any person 8 | * obtaining a copy of this software and associated documentation files 9 | * (the "Software"), to deal in the Software without restriction, 10 | * including without limitation the rights to use, copy, modify, merge, 11 | * publish, distribute, sublicense, and/or sell copies of the Software, 12 | * and to permit persons to whom the Software is furnished to do so, 13 | * subject to the following conditions: 14 | * 15 | * The above copyright notice and this permission notice shall be 16 | * included in all copies or substantial portions of the Software. 17 | * 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 21 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 22 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 23 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 24 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | * SOFTWARE. 26 | * 27 | ************************************************************************/ 28 | 29 | 30 | #include "log.h" 31 | 32 | int debug = 0; 33 | int verbose = 0; 34 | 35 | /* Taken from the Linux kernel source and slightly modified. 36 | */ 37 | int 38 | vout(FILE *stream, const char *fmt, ...) 39 | { 40 | va_list args; 41 | int rc; 42 | 43 | va_start(args, fmt); 44 | rc = vfprintf(stream, fmt, args); 45 | va_end(args); 46 | return rc; 47 | } 48 | 49 | -------------------------------------------------------------------------------- /src/lofreq/log.h: -------------------------------------------------------------------------------- 1 | /********************************************************************* 2 | * The MIT License (MIT) 3 | * 4 | * Copyright (c) 2013,2014 Genome Institute of Singapore 5 | * 6 | * Permission is hereby granted, free of charge, to any person 7 | * obtaining a copy of this software and associated documentation files 8 | * (the "Software"), to deal in the Software without restriction, 9 | * including without limitation the rights to use, copy, modify, merge, 10 | * publish, distribute, sublicense, and/or sell copies of the Software, 11 | * and to permit persons to whom the Software is furnished to do so, 12 | * subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 21 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 22 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | * SOFTWARE. 25 | * 26 | ************************************************************************/ 27 | 28 | #ifndef LOG_H 29 | #define LOG_H 30 | 31 | #include 32 | #include 33 | 34 | extern int debug; 35 | extern int verbose; 36 | 37 | int 38 | vout(FILE *stream, const char *fmt, ...); 39 | 40 | /* print only if debug is true*/ 41 | #define LOG_DEBUG(fmt, args...) {if (debug) {(void)vout(stderr, "DEBUG(%s|%s): " fmt, __FILE__, __FUNCTION__, ## args);}} 42 | /* print only if verbose is true*/ 43 | #define LOG_VERBOSE(fmt, args...) {if (verbose) {(void)vout(stderr, fmt, ## args);}} 44 | /* always warn to stderr */ 45 | #define LOG_WARN(fmt, args...) (void)vout(stderr, "WARNING(%s|%s): " fmt, __FILE__, __FUNCTION__, ## args) 46 | /* always print errors to stderr*/ 47 | #define LOG_ERROR(fmt, args...) (void)vout(stderr, "ERROR(%s|%s:%d): " fmt, __FILE__, __FUNCTION__, __LINE__, ## args) 48 | /* always print errors to stderr*/ 49 | #define LOG_FATAL(fmt, args...) (void)vout(stderr, "FATAL(%s|%s:%d): " fmt, __FILE__, __FUNCTION__, __LINE__, ## args) 50 | /* always print fixme's */ 51 | #define LOG_FIXME(fmt, args...) (void)vout(stderr, "FIXME(%s|%s:%d): " fmt, __FILE__, __FUNCTION__, __LINE__, ## args) 52 | /* always print notes */ 53 | #define LOG_NOTE(fmt, args...) (void)vout(stderr, "NOTE(%s|%s:%d): " fmt, __FILE__, __FUNCTION__, __LINE__, ## args) 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /src/lofreq/multtest.h: -------------------------------------------------------------------------------- 1 | /* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */ 2 | /********************************************************************* 3 | * The MIT License (MIT) 4 | * 5 | * Copyright (c) 2013,2014 Genome Institute of Singapore 6 | * 7 | * Permission is hereby granted, free of charge, to any person 8 | * obtaining a copy of this software and associated documentation files 9 | * (the "Software"), to deal in the Software without restriction, 10 | * including without limitation the rights to use, copy, modify, merge, 11 | * publish, distribute, sublicense, and/or sell copies of the Software, 12 | * and to permit persons to whom the Software is furnished to do so, 13 | * subject to the following conditions: 14 | * 15 | * The above copyright notice and this permission notice shall be 16 | * included in all copies or substantial portions of the Software. 17 | * 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 21 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 22 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 23 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 24 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | * SOFTWARE. 26 | * 27 | ************************************************************************/ 28 | 29 | #ifndef MULTTEST_H 30 | #define MULTTEST_H 31 | 32 | 33 | typedef enum 34 | { 35 | MTC_NONE, 36 | MTC_BONF, 37 | MTC_HOLMBONF, 38 | MTC_FDR 39 | } mtc_type_t; 40 | 41 | 42 | #define STR(name) # name 43 | 44 | static char *mtc_type_str[] = { 45 | STR(MTC_NONE), 46 | STR(MTC_BONF), 47 | STR(MTC_HOLMBONF), 48 | STR(MTC_FDR), 49 | }; 50 | 51 | 52 | void 53 | bonf_corr(double data[], long int size, long int num_tests); 54 | 55 | void 56 | holm_bonf_corr(double data[], long int size, double alpha, long int num_tests); 57 | 58 | long int 59 | fdr(double data[], long int size, double alpha, long int num_tests, long int **irejected); 60 | 61 | int 62 | mtc_str_to_type(char *t); 63 | 64 | void 65 | mtc_str(char *buf, int mtc_type); 66 | 67 | #endif 68 | -------------------------------------------------------------------------------- /src/lofreq/samutils.h: -------------------------------------------------------------------------------- 1 | /* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */ 2 | /********************************************************************* 3 | * The MIT License (MIT) 4 | * 5 | * Copyright (c) 2013,2014 Genome Institute of Singapore 6 | * 7 | * Permission is hereby granted, free of charge, to any person 8 | * obtaining a copy of this software and associated documentation files 9 | * (the "Software"), to deal in the Software without restriction, 10 | * including without limitation the rights to use, copy, modify, merge, 11 | * publish, distribute, sublicense, and/or sell copies of the Software, 12 | * and to permit persons to whom the Software is furnished to do so, 13 | * subject to the following conditions: 14 | * 15 | * The above copyright notice and this permission notice shall be 16 | * included in all copies or substantial portions of the Software. 17 | * 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 21 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 22 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 23 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 24 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | * SOFTWARE. 26 | * 27 | ************************************************************************/ 28 | 29 | #ifndef SAMUTILS_H 30 | #define SAMUTILS_H 31 | 32 | #include "htslib/sam.h" 33 | 34 | 35 | 36 | 37 | typedef enum { 38 | OP_MATCH, 39 | OP_MISMATCH, 40 | OP_INS, 41 | OP_DEL, 42 | NUM_OP_CATS, 43 | } op_cat_t; 44 | 45 | #define STR(name) # name 46 | 47 | static char *op_cat_str[] = { 48 | STR(OP_MATCH), 49 | STR(OP_MISMATCH), 50 | STR(OP_INS), 51 | STR(OP_DEL), 52 | STR(NUM_OP_CATS) 53 | }; 54 | 55 | 56 | char * 57 | cigar_str_from_bam(const bam1_t *b); 58 | 59 | int 60 | count_cigar_ops(int *counts, int **quals, 61 | const bam1_t *b, const char *ref, int min_bq, 62 | char *target); 63 | 64 | 65 | #ifdef USE_ALNERRPROF 66 | 67 | typedef struct { 68 | int num_targets; /* bam_header->n_targets */ 69 | int *prop_len; /* one prop length per target: index is tid */ 70 | double **props; /* one prop array per target: index is tid */ 71 | } alnerrprof_t; 72 | 73 | 74 | void 75 | normalize_alnerrprof(alnerrprof_t *alnerrprof); 76 | 77 | int 78 | parse_alnerrprof_statsfile(alnerrprof_t *alnerrprof, const char *path, bam_hdr_t *bam_header); 79 | 80 | void 81 | calc_read_alnerrprof(double *alnerrprof, unsigned long int *used_pos, 82 | const bam1_t *b, const char *ref); 83 | 84 | void 85 | write_alnerrprof_stats(char *target_name, unsigned long int *alnerrprof_usedpos, 86 | double *alnerrprof, int max_obs_read_len, FILE *out); 87 | 88 | void 89 | free_alnerrprof(alnerrprof_t *alnerrprof); 90 | 91 | #endif 92 | 93 | int checkref(char *fasta_file, char *bam_file); 94 | 95 | #endif 96 | -------------------------------------------------------------------------------- /src/lofreq/snpcaller.h: -------------------------------------------------------------------------------- 1 | /* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */ 2 | /********************************************************************* 3 | * The MIT License (MIT) 4 | * 5 | * Copyright (c) 2013,2014 Genome Institute of Singapore 6 | * 7 | * Permission is hereby granted, free of charge, to any person 8 | * obtaining a copy of this software and associated documentation files 9 | * (the "Software"), to deal in the Software without restriction, 10 | * including without limitation the rights to use, copy, modify, merge, 11 | * publish, distribute, sublicense, and/or sell copies of the Software, 12 | * and to permit persons to whom the Software is furnished to do so, 13 | * subject to the following conditions: 14 | * 15 | * The above copyright notice and this permission notice shall be 16 | * included in all copies or substantial portions of the Software. 17 | * 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 21 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 22 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 23 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 24 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | * SOFTWARE. 26 | * 27 | ************************************************************************/ 28 | 29 | #ifndef SNPCALLER_H 30 | #define SNPCALLER_H 31 | 32 | #include "vcf.h" 33 | #include "plp.h" 34 | #include "defaults.h" 35 | 36 | 37 | 38 | typedef struct { 39 | int min_bq; 40 | int min_alt_bq; 41 | int def_alt_bq; 42 | 43 | int min_jq; 44 | int min_alt_jq; 45 | int def_alt_jq; 46 | 47 | int bonf_dynamic; /* boolean: incr bonf as we go along. eventual 48 | * filtering of all has to be done by 49 | * caller! */ 50 | int min_cov; 51 | long long int bonf_subst; /* warning: changed dynamically ! */ 52 | long long int bonf_indel; 53 | float sig; 54 | vcf_file_t vcf_out; 55 | int flag; /* FIXME doc? */ 56 | 57 | /* FIXME the following two logically don't belong her but 58 | * would require a new structure */ 59 | int only_indels; 60 | int no_indels; 61 | 62 | int approx_threshold_n; /* when to use fast poisson binomial approximation for early exit */ 63 | } varcall_conf_t; 64 | 65 | 66 | double 67 | merge_srcq_baseq_and_mapq(const int sq, const int bq, const int mq); 68 | 69 | double 70 | merge_srcq_baseq_mapq_and_alnq(const int sq, const int bq, const int mq, const int aq); 71 | 72 | void 73 | plp_to_errprobs(double **err_probs, int *num_err_probs, 74 | int *alt_bases, int *alt_counts, int *alt_raw_counts, 75 | const plp_col_t *p, varcall_conf_t *conf); 76 | void 77 | plp_to_ins_errprobs(double **err_probs, int *num_err_probs, 78 | const plp_col_t *p, varcall_conf_t *conf, 79 | char key[MAX_INDELSIZE]); 80 | 81 | void 82 | plp_to_del_errprobs(double **err_probs, int *num_err_probs, 83 | const plp_col_t *p, varcall_conf_t *conf, 84 | char key[MAX_INDELSIZE]); 85 | 86 | void 87 | init_varcall_conf(varcall_conf_t *c); 88 | 89 | void 90 | dump_varcall_conf(const varcall_conf_t *c, FILE *stream) ; 91 | 92 | 93 | extern double * 94 | poissbin(long double *pvalue, const double *err_probs, 95 | const int num_err_probs, const int num_failures, 96 | const long long int bonf, const double sig); 97 | extern int 98 | snpcaller(long double *snp_pvalues, const double *err_probs, 99 | const int num_err_probs, const int *noncons_counts, 100 | const long long int bonf_factor, 101 | const double sig_level, 102 | const int approx_treshold_n); 103 | 104 | 105 | #endif 106 | -------------------------------------------------------------------------------- /src/lofreq/utils.h: -------------------------------------------------------------------------------- 1 | /* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */ 2 | /********************************************************************* 3 | * The MIT License (MIT) 4 | * 5 | * Copyright (c) 2013,2014 Genome Institute of Singapore 6 | * 7 | * Permission is hereby granted, free of charge, to any person 8 | * obtaining a copy of this software and associated documentation files 9 | * (the "Software"), to deal in the Software without restriction, 10 | * including without limitation the rights to use, copy, modify, merge, 11 | * publish, distribute, sublicense, and/or sell copies of the Software, 12 | * and to permit persons to whom the Software is furnished to do so, 13 | * subject to the following conditions: 14 | * 15 | * The above copyright notice and this permission notice shall be 16 | * included in all copies or substantial portions of the Software. 17 | * 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 21 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 22 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 23 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 24 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | * SOFTWARE. 26 | * 27 | ************************************************************************/ 28 | 29 | #ifndef UTILS_H 30 | #define UTILS_H 31 | 32 | #include 33 | #include 34 | #include 35 | #include 36 | 37 | #define MAX_INDELSIZE 256 38 | 39 | #define HAS_GZIP_EXT(f) (strlen(f)>3 && 0==strncmp(& f[strlen(f)-3], ".gz", 3)) 40 | 41 | 42 | #define PHREDQUAL_TO_PROB(phred) (phred==INT_MAX ? DBL_MIN : pow(10.0, -1.0*(phred)/10.0)) 43 | 44 | /* requires that prob comes out of our functions is is never zero! */ 45 | #define PROB_TO_PHREDQUAL(prob) (int)(-10.0 * log10l(prob)) 46 | #define PROB_TO_PHREDQUAL_SAFE(prob) (prob<=0.0 ? INT_MAX : (int)(-10.0 * log10l(prob))) 47 | 48 | #define BASECALLQUAL_VALID_RANGE(phred) ((phred)>=0 && (phred)<100) 49 | 50 | #define BASENAME(x) strrchr((x), '/') ? strrchr((x), '/')+1 : (x) 51 | 52 | int file_exists(const char *fname); 53 | int is_dir(const char *path); 54 | int ae_load_file_to_memory(const char *filename, char **result); 55 | int int_cmp(const void *a, const void *b); 56 | int dbl_cmp(const void *a, const void *b); 57 | int argmax_d(const double *arr, const int n); 58 | long int count_lines(const char *filename); 59 | 60 | typedef struct { 61 | unsigned long int n; /* number of elements stored */ 62 | int *data; /* actual array of data */ 63 | 64 | size_t grow_by_size; /* if needed grow array by this value. will double previous size if <=1 */ 65 | size_t alloced; /* actually allocated size for data */ 66 | } int_varray_t; 67 | 68 | void int_varray_add_value(int_varray_t *a, const int value); 69 | void int_varray_free(int_varray_t *a); 70 | void int_varray_init(int_varray_t *a, 71 | const size_t grow_by_size); 72 | 73 | int 74 | ls_dir(char ***matches, const char *path, const char *pattern, 75 | const int sort_lexi); 76 | 77 | char * 78 | join_paths(char **p1, const char *p2); 79 | 80 | void 81 | chomp(char *s); 82 | 83 | char * 84 | readlink_malloc(const char *filename); 85 | 86 | char * 87 | resolved_path(const char *path); 88 | 89 | double 90 | dbl_median(double data[], int size); 91 | 92 | int 93 | int_median(int data[], int size); 94 | void 95 | strstrip(char *str); 96 | int 97 | is_newer(const char *p1, const char *p2); 98 | 99 | /* utility hash functions for indel calling */ 100 | 101 | typedef struct { 102 | char key[MAX_INDELSIZE]; 103 | int count; 104 | int cons_quals; 105 | int_varray_t ins_quals; 106 | int_varray_t ins_aln_quals; 107 | int_varray_t ins_map_quals; 108 | int_varray_t ins_source_quals; 109 | long int fw_rv[2]; 110 | UT_hash_handle hh_ins; 111 | } ins_event; 112 | 113 | void add_ins_sequence(ins_event **head_ins_count, char seq[], 114 | int ins_qual, int ins_aln_qual, int ins_map_qual, int ins_source_qual, 115 | int fw_rv); 116 | ins_event *find_ins_sequence(ins_event *const *head_ins_counts, char seq[]); 117 | void destruct_ins_event_counts(ins_event **head_ins_counts); 118 | 119 | typedef struct { 120 | char key[MAX_INDELSIZE]; 121 | int count; 122 | int cons_quals; 123 | int_varray_t del_quals; 124 | int_varray_t del_aln_quals; 125 | int_varray_t del_map_quals; 126 | int_varray_t del_source_quals; 127 | long int fw_rv[2]; 128 | UT_hash_handle hh_del; 129 | } del_event; 130 | 131 | void add_del_sequence(del_event **head_del_counts, char seq[], 132 | int del_qual, int del_aln_qual, int del_map_qual, int del_source_qual, 133 | int fw_rv); 134 | del_event * find_del_sequence(del_event *const *head_del_counts, char seq[]); 135 | void destruct_del_event_counts(del_event **head_del_counts); 136 | 137 | void 138 | strtoupper(char *s); 139 | 140 | 141 | #endif 142 | -------------------------------------------------------------------------------- /src/lofreq/vcf.h: -------------------------------------------------------------------------------- 1 | /* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */ 2 | /********************************************************************* 3 | * The MIT License (MIT) 4 | * 5 | * Copyright (c) 2013,2014 Genome Institute of Singapore 6 | * 7 | * Permission is hereby granted, free of charge, to any person 8 | * obtaining a copy of this software and associated documentation files 9 | * (the "Software"), to deal in the Software without restriction, 10 | * including without limitation the rights to use, copy, modify, merge, 11 | * publish, distribute, sublicense, and/or sell copies of the Software, 12 | * and to permit persons to whom the Software is furnished to do so, 13 | * subject to the following conditions: 14 | * 15 | * The above copyright notice and this permission notice shall be 16 | * included in all copies or substantial portions of the Software. 17 | * 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 21 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 22 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 23 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 24 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | * SOFTWARE. 26 | * 27 | ************************************************************************/ 28 | 29 | #ifndef VCF_H 30 | #define VCF_H 31 | 32 | #include 33 | 34 | #include "htslib/bgzf.h" 35 | /*#include "zlib.h"*/ 36 | #include "uthash.h" 37 | 38 | 39 | typedef struct { 40 | char *path; 41 | int is_bgz; 42 | FILE *fh; 43 | BGZF *fh_bgz; 44 | char mode; 45 | } vcf_file_t; 46 | 47 | typedef struct { 48 | char *chrom; 49 | long int pos; /* zero offset */ 50 | char *id; 51 | char *ref; 52 | char *alt; 53 | int qual; 54 | char *filter; 55 | char *info; 56 | 57 | /* genotyping info (not used in lofreq) */ 58 | char *format; 59 | int num_samples; 60 | char **samples; 61 | } var_t; 62 | 63 | typedef struct { 64 | int ref_fw; 65 | int ref_rv; 66 | int alt_fw; 67 | int alt_rv; 68 | } dp4_counts_t; 69 | 70 | 71 | typedef struct { 72 | char *key; /* according to uthash doc this should be const but then we can't free it */ 73 | var_t *var; 74 | UT_hash_handle hh; 75 | } var_hash_t; 76 | 77 | 78 | void var_hash_add(var_hash_t **var_hash, char *key, var_t *var); 79 | void var_hash_free_elem(var_hash_t *hash_elem_ptr); 80 | void var_hash_free_table(var_hash_t *var_hash); 81 | 82 | 83 | #define VCF_MISSING_VAL_STR "." 84 | #define VCF_MISSING_VAL_CHAR VCF_MISSING_VAL_STR[0] 85 | 86 | 87 | #define VCF_VAR_PASSES(v) ((v)->filter[0]==VCF_MISSING_VAL_CHAR || 0==strncmp((v)->filter, "PASS", 4)) 88 | 89 | 90 | 91 | int 92 | vcf_file_seek(vcf_file_t *f, long int offset, int whence); 93 | int 94 | vcf_file_open(vcf_file_t *f, const char *path, const int gzip, const char mode); 95 | int 96 | vcf_file_flush(vcf_file_t *f); 97 | int 98 | vcf_file_close(vcf_file_t *f); 99 | char * 100 | vcf_file_gets(vcf_file_t *f, int len, char *line); 101 | int 102 | vcf_printf(vcf_file_t *f, char *fmt, ...); 103 | 104 | int vcf_get_dp4(dp4_counts_t *dp4, var_t *var); 105 | 106 | void vcf_new_var(var_t **var); 107 | void vcf_free_var(var_t **var); 108 | void vcf_cp_var(var_t **dest, var_t *src); 109 | 110 | void vcf_var_key(char **key, var_t *var); 111 | void vcf_var_key_pos_only(char **key, var_t *var); 112 | 113 | int vcf_parse_header(char **header, vcf_file_t *vcf_file); 114 | int vcf_skip_header(vcf_file_t *vcf_file); 115 | int vcf_parse_var_from_line(char *line, var_t *var); 116 | int vcf_parse_var(vcf_file_t *vcf_file, var_t *var); 117 | int vcf_parse_vars(var_t ***vars, vcf_file_t *vcf_file, int only_passed); 118 | 119 | int vcf_var_is_indel(const var_t *var); 120 | int vcf_var_has_info_key(char **value, const var_t *var, const char *key); 121 | int vcf_var_filtered(const var_t *var); 122 | char *vcf_var_add_to_filter(var_t *var, const char *filter_name); 123 | char *vcf_var_add_to_info(var_t *var, const char *info_str); 124 | void vcf_var_sprintf_info(var_t *var, 125 | const int dp, const float af, const int sb, 126 | const dp4_counts_t *dp4, 127 | const int is_indel, const int hrun, const int is_consvar); 128 | void vcf_write_var(vcf_file_t *vcf_file, const var_t *var); 129 | void vcf_write_header(vcf_file_t *vcf_file, const char *header); 130 | void vcf_write_new_header(vcf_file_t *vcf_file, const char *srcprog, const char *reffa); 131 | void vcf_header_add(char **header, const char *info); 132 | #endif 133 | -------------------------------------------------------------------------------- /src/lofreq/viterbi.h: -------------------------------------------------------------------------------- 1 | /********************************************************************* 2 | * The MIT License (MIT) 3 | * 4 | * Copyright (c) 2013,2014 Genome Institute of Singapore 5 | * 6 | * Permission is hereby granted, free of charge, to any person 7 | * obtaining a copy of this software and associated documentation files 8 | * (the "Software"), to deal in the Software without restriction, 9 | * including without limitation the rights to use, copy, modify, merge, 10 | * publish, distribute, sublicense, and/or sell copies of the Software, 11 | * and to permit persons to whom the Software is furnished to do so, 12 | * subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 21 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 22 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | * SOFTWARE. 25 | * 26 | ************************************************************************/ 27 | 28 | #ifndef VITERBI_H 29 | #define VITERBI_H 30 | int left_align_indels(char *sref, char *squery, int slen, char *res); 31 | int viterbi(char *ref, char *query, char *bqual, char *aln, int quality); 32 | int viterbi_test(); 33 | #endif 34 | -------------------------------------------------------------------------------- /src/scripts/Makefile.am: -------------------------------------------------------------------------------- 1 | dist_bin_SCRIPTS = lofreq2_somatic.py lofreq2_call_pparallel.py 2 | EXTRA_DIST = lofreq2_local.py 3 | 4 | 5 | -------------------------------------------------------------------------------- /src/scripts/README: -------------------------------------------------------------------------------- 1 | Directory for LoFreq's entirely independent scripts -------------------------------------------------------------------------------- /src/scripts/lofreq2_local.py: -------------------------------------------------------------------------------- 1 | # add local dir to path to make source dir, i.e. not installed scripts 2 | # work straight-away 3 | 4 | import sys 5 | import os 6 | 7 | # Set sys.path/PYTHONPATH such that we find the local source dir first 8 | # by using: from lofreq_star import ... 9 | #d = os.path.normpath(os.path.join( 10 | # os.path.dirname(sys.argv[0]), '..')) 11 | #if os.path.exists(os.path.join(d, "lofreq_star")): 12 | # #sys.stderr.write("NOTE: Adding local dir %s to PYTHONPATH\n" % d) 13 | # sys.path.insert(0, d) 14 | 15 | # Set PATH such that we find lofreq binary first 16 | d = os.path.normpath(os.path.join( 17 | os.path.dirname(sys.argv[0]), '../lofreq')) 18 | if os.path.exists(os.path.join(d, 'lofreq')): 19 | #sys.stderr.write("NOTE: Adding local dir %s to PATH\n" % d) 20 | os.environ["PATH"] = d + os.pathsep + os.environ["PATH"] 21 | 22 | # In theory need to find scripts because the main binary knows about them. However, there are circular cases where script call the binary which then can't find the scripts again (e.g. in parallel wrapper),so: 23 | # 24 | #d = os.path.normpath(os.path.join( 25 | # os.path.dirname(sys.argv[0]), '../tools/scripts')) 26 | #if os.path.exists(d): 27 | # #sys.stderr.write("NOTE: Adding local dir %s to PATH\n" % d) 28 | # os.environ["PATH"] = d + os.pathsep + os.environ["PATH"] 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /src/tools/.gitignore: -------------------------------------------------------------------------------- 1 | ./build 2 | setup_conf.py 3 | -------------------------------------------------------------------------------- /src/tools/Makefile.am: -------------------------------------------------------------------------------- 1 | # This is really just a wrapper to setup.py since that script takes 2 | # care of building and installing the Python stuff 3 | # 4 | # Python scripts and other source has to be added manually here to 5 | # make sure they are incorporated when calling 'make dist' 6 | # 7 | # How to get this list: 8 | # source=$(python setup.py --dry-run sdist 2>/dev/null | grep 'hard linking' | cut -f 3 -d ' '); 9 | # source="$source setup_conf.py $(ls ./utils/*py)"; 10 | # echo $source | sed -e 's, , \\\n,g' | sed -e 's,^,\t,'; 11 | # 12 | EXTRA_DIST = setup.py \ 13 | lofreq_star/__init__.py \ 14 | lofreq_star/utils.py \ 15 | scripts/lofreq2_indel_ovlp.py \ 16 | scripts/lofreq2_local.py \ 17 | scripts/lofreq2_vcfplot.py \ 18 | setup_conf.py.in 19 | 20 | all: 21 | $(PYTHON) setup.py build 22 | 23 | # make sure to actually install stuff via python's setup.py 24 | install-exec-hook: 25 | $(PYTHON) setup.py install --prefix '$(prefix)' 26 | # FIXME tell users where stuff was installed and how to set PYTHONPATH 27 | 28 | # local clean target: call setup.py and remove ./build/ 29 | clean-local: 30 | $(PYTHON) setup.py clean 31 | rm -rf ./build 32 | -------------------------------------------------------------------------------- /src/tools/README: -------------------------------------------------------------------------------- 1 | Directory for all non-essential utility scripts installed via Python's setup 2 | tools that depend on project specific or third party modules 3 | -------------------------------------------------------------------------------- /src/tools/lofreq_star/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | assert sys.version_info.major == 2 and sys.version_info.minor == 7, ("Need Python 2.7") 3 | -------------------------------------------------------------------------------- /src/tools/lofreq_star/fdr.py: -------------------------------------------------------------------------------- 1 | """FDR routines 2 | """ 3 | 4 | 5 | __author__ = "Grace Hui Ting Yeo" 6 | __email__ = "yeohtg@gis.a-star.edu.sg" 7 | __copyright__ = "2014 Genome Institute of Singapore" 8 | __license__ = "The MIT License" 9 | 10 | 11 | 12 | #--- standard library imports 13 | # 14 | # / 15 | 16 | #--- third-party imports 17 | # 18 | # / 19 | 20 | #--- project specific imports 21 | # 22 | # / 23 | 24 | 25 | def fdr(pvals, a=0.05, n=None): 26 | """ 27 | Implementation of the Benjamini-Hochberg procedure. 28 | Takes a list of p-values and returns a list of the indices of those p-values that pass. 29 | Does not adjust p-values. 30 | See http://sas-and-r.blogspot.sg/2012/05/example-931-exploring-multiple-testing.html 31 | for pseudocode. 32 | 33 | Test data from : http://udel.edu/~mcdonald/statmultcomp.html 34 | >>> import random 35 | >>> pvals = [0.6, 0.07, 0.49, 0.2, 0.48, 0.74, 0.68, 0.01, 0.97, 0.38, 0.032, 0.07] 36 | >>> random.shuffle(pvals) 37 | >>> sorted([pvals[i] for i in fdr(pvals, a=0.20)]) 38 | [0.01, 0.032] 39 | >>> fdr([]) 40 | [] 41 | >>> fdr([1]) 42 | [] 43 | """ 44 | 45 | if n != None: 46 | assert n>=len(pvals) 47 | else: 48 | n=len(pvals) 49 | 50 | sorted_pvals_indices = sorted(range(len(pvals)), key=lambda k:pvals[k]) 51 | t = next((rank for rank, spi in zip(range(len(pvals), 0, -1), 52 | reversed(sorted_pvals_indices)) 53 | if pvals[spi] < rank*a/n), None) 54 | if t: 55 | return sorted_pvals_indices[:t] 56 | return [] 57 | -------------------------------------------------------------------------------- /src/tools/lofreq_star/multiple_testing.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Commonly used multiple correction routines 3 | 4 | Original source: multiple_testing.py from goatools (see below). 5 | https://github.com/tanghaibao/goatools 6 | f75455067a7f7aad66f5b229ab514977b70c34d9 7 | 8 | AW: 9 | - Modified to get rid of numpy dependence. 10 | - Added n argument (for input of clipped pvalues) 11 | 12 | Original Authors: 13 | - Haibao Tang (tanghaibao), 14 | - Brent Pedersen (brentp), 15 | - Aurelien Naldi (aurelien-naldi) 16 | Email: tanghaibao@gmail.com 17 | License: BSD 18 | """ 19 | 20 | __author__ = "Haibao Tang, Brent Pedersen, Aurelien Naldi" 21 | __email__ = "tanghaibao@gmail.com" 22 | #__copyright__ = "" 23 | __license__ = "BSD" 24 | 25 | from itertools import groupby 26 | 27 | 28 | class AbstractCorrection(object): 29 | 30 | def __init__(self, pvals, a=.05, n=None): 31 | self.pvals = self.corrected_pvals = list(pvals) 32 | 33 | # number of multiple tests 34 | if n: 35 | assert n>len(pvals) 36 | self.n = n 37 | else: 38 | self.n = len(self.pvals) 39 | # type-1 error cutoff for each test 40 | self.a = a 41 | 42 | self.set_correction() 43 | 44 | def set_correction(self): 45 | # the purpose of multiple correction is to lower the alpha 46 | # instead of the canonical value (like .05) 47 | pass 48 | 49 | 50 | 51 | class Bonferroni(AbstractCorrection): 52 | """http://en.wikipedia.org/wiki/Bonferroni_correction 53 | >>> ["%.4f" % v for v in Bonferroni([0.01, 0.01, 0.03, 0.05, 0.005], a=0.05).corrected_pvals] 54 | ['0.0500', '0.0500', '0.1500', '0.2500', '0.0250'] 55 | """ 56 | 57 | def set_correction(self): 58 | self.corrected_pvals = [pv * self.n 59 | for pv in self.corrected_pvals] 60 | 61 | 62 | 63 | class Sidak(AbstractCorrection): 64 | """http://en.wikipedia.org/wiki/Bonferroni_correction 65 | >>> ["%.8f" % v for v in Sidak([0.01, 0.01, 0.03, 0.05, 0.005], a=0.05).corrected_pvals] 66 | ['0.04898974', '0.04898974', '0.14696923', '0.24494871', '0.02449487'] 67 | """ 68 | def set_correction(self): 69 | if self.n != 0: 70 | correction = self.a * 1. / (1 - (1 - self.a) ** (1. / self.n)) 71 | else: 72 | correction = 1 73 | self.corrected_pvals = [pv * correction 74 | for pv in self.corrected_pvals] 75 | 76 | 77 | 78 | class HolmBonferroni(AbstractCorrection): 79 | """http://en.wikipedia.org/wiki/Holm-Bonferroni_method 80 | given a list of pvals, perform the Holm-Bonferroni correction 81 | and return the indexes from original list that are significant. 82 | (cant use p-value as that may be repeated.) 83 | >>> ["%.4f" % v for v in HolmBonferroni([0.01, 0.01, 0.03, 0.05, 0.005], a=0.05).corrected_pvals] 84 | ['0.0400', '0.0400', '0.0600', '0.0500', '0.0250'] 85 | """ 86 | 87 | def set_correction(self): 88 | if len(self.pvals): 89 | for (i, c) in self.generate_significant(): 90 | self.corrected_pvals[i] *= c 91 | 92 | def generate_significant(self): 93 | pvals = self.pvals 94 | pvals_idxs = zip(pvals, range(len(pvals))) 95 | pvals_idxs = sorted(pvals_idxs) 96 | 97 | #lp = len(self.pvals) 98 | lp = self.n 99 | 100 | for pval, idxs in groupby(pvals_idxs, lambda x: x[0]): 101 | idxs = list(idxs) 102 | for p, i in idxs: 103 | if p * 1. / lp < self.a: 104 | yield (i, lp) 105 | lp -= len(idxs) 106 | 107 | # also in the original file, but removed here: 108 | #class FDR 109 | #def calc_qval 110 | 111 | if __name__ == '__main__': 112 | import doctest 113 | doctest.testmod() 114 | -------------------------------------------------------------------------------- /src/tools/lofreq_star/multiple_testing.py.README: -------------------------------------------------------------------------------- 1 | Original source: multiple_testing.py from goatools (see below). 2 | https://github.com/tanghaibao/goatools 3 | f75455067a7f7aad66f5b229ab514977b70c34d9 4 | 5 | Modified to get rid of numpy dependence. 6 | 7 | Original Authors: 8 | - Haibao Tang (tanghaibao), 9 | - Brent Pedersen (brentp), 10 | - Aurelien Naldi (aurelien-naldi) 11 | Email: tanghaibao@gmail.com 12 | License: BSD 13 | -------------------------------------------------------------------------------- /src/tools/lofreq_star/multiple_testing.py.org: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: UTF-8 -*- 3 | 4 | """ 5 | A list of commonly used multiple correction routines 6 | """ 7 | 8 | import sys 9 | import random 10 | import fisher 11 | import numpy as np 12 | import go_enrichment 13 | 14 | 15 | class AbstractCorrection(object): 16 | 17 | def __init__(self, pvals, a=.05): 18 | self.pvals = self.corrected_pvals = np.array(pvals) 19 | self.n = len(self.pvals) # number of multiple tests 20 | self.a = a # type-1 error cutoff for each test 21 | 22 | self.set_correction() 23 | 24 | def set_correction(self): 25 | # the purpose of multiple correction is to lower the alpha 26 | # instead of the canonical value (like .05) 27 | pass 28 | 29 | 30 | class Bonferroni(AbstractCorrection): 31 | 32 | """ 33 | >>> Bonferroni([0.01, 0.01, 0.03, 0.05, 0.005], a=0.05).corrected_pvals 34 | array([ 0.05 , 0.05 , 0.15 , 0.25 , 0.025]) 35 | """ 36 | def set_correction(self): 37 | self.corrected_pvals *= self.n 38 | 39 | 40 | class Sidak(AbstractCorrection): 41 | 42 | """http://en.wikipedia.org/wiki/Bonferroni_correction 43 | >>> Sidak([0.01, 0.01, 0.03, 0.05, 0.005], a=0.05).corrected_pvals 44 | array([ 0.04898974, 0.04898974, 0.14696923, 0.24494871, 0.02449487]) 45 | """ 46 | def set_correction(self): 47 | if self.n != 0: 48 | correction = self.a * 1. / (1 - (1 - self.a) ** (1. / self.n)) 49 | else: 50 | correction = 1 51 | self.corrected_pvals *= correction 52 | 53 | 54 | class HolmBonferroni(AbstractCorrection): 55 | 56 | """http://en.wikipedia.org/wiki/Holm-Bonferroni_method 57 | given a list of pvals, perform the Holm-Bonferroni correction 58 | and return the indexes from original list that are significant. 59 | (cant use p-value as that may be repeated.) 60 | >>> HolmBonferroni([0.01, 0.01, 0.03, 0.05, 0.005], a=0.05).corrected_pvals 61 | array([ 0.04 , 0.04 , 0.06 , 0.05 , 0.025]) 62 | """ 63 | def set_correction(self): 64 | if len(self.pvals): 65 | idxs, correction = zip(*self.generate_significant()) 66 | idxs = list(idxs) 67 | self.corrected_pvals[idxs] *= correction 68 | 69 | def generate_significant(self): 70 | 71 | pvals = self.pvals 72 | pvals_idxs = zip(pvals, xrange(len(pvals))) 73 | pvals_idxs.sort() 74 | 75 | lp = len(self.pvals) 76 | 77 | from itertools import groupby 78 | for pval, idxs in groupby(pvals_idxs, lambda x: x[0]): 79 | idxs = list(idxs) 80 | for p, i in idxs: 81 | if p * 1. / lp < self.a: 82 | yield (i, lp) 83 | lp -= len(idxs) 84 | 85 | 86 | class FDR(object): 87 | def __init__(self, p_val_distribution, results, a=.05): 88 | self.corrected_pvals = fdr = [] 89 | for rec in results: 90 | q = sum(1 for x in p_val_distribution if x < rec.p_uncorrected) \ 91 | * 1./len(p_val_distribution) 92 | fdr.append(q) 93 | 94 | 95 | 96 | """ 97 | Generate a p-value distribution based on re-sampling, as described in: 98 | http://www.biomedcentral.com/1471-2105/6/168 99 | """ 100 | #class FalseDiscoveryRate(AbstractCorrection): 101 | def calc_qval(study_count, study_n, pop_count, pop_n, pop, assoc, term_pop, obo_dag): 102 | print >>sys.stderr, "generating p-value distribution for FDR calculation " \ 103 | "(this might take a while)" 104 | T = 1000 # number of samples 105 | distribution = [] 106 | for i in xrange(T): 107 | new_study = random.sample(pop, study_n) 108 | new_term_study = go_enrichment.count_terms(new_study, assoc, obo_dag) 109 | 110 | smallest_p = 1 111 | for term, study_count in new_term_study.items(): 112 | pop_count = term_pop[term] 113 | p = fisher.pvalue_population(study_count, study_n, pop_count, pop_n) 114 | if p.two_tail < smallest_p: smallest_p = p.two_tail 115 | 116 | distribution.append(smallest_p) 117 | print >>sys.stderr, i, smallest_p 118 | return distribution 119 | 120 | 121 | if __name__ == '__main__': 122 | import doctest 123 | doctest.testmod() 124 | -------------------------------------------------------------------------------- /src/tools/lofreq_star/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Generic utils for LoFreq 3 | """ 4 | 5 | 6 | __author__ = "Andreas Wilm" 7 | __email__ = "wilma@gis.a-star.edu.sg" 8 | __copyright__ = "2011 Genome Institute of Singapore" 9 | __license__ = "The MIT License" 10 | 11 | 12 | 13 | #--- standard library imports 14 | # 15 | from math import log10, log 16 | import sys 17 | from time import strftime 18 | import string 19 | 20 | MAX_INT = 2147483647 21 | # instead of sys.maxint 22 | 23 | #--- third-party imports 24 | # 25 | from scipy.stats import chi2 26 | 27 | 28 | #--- project specific imports 29 | # 30 | # nothing should go here by definition 31 | 32 | 33 | 34 | 35 | #def mean_and_stdv(x): 36 | # """ 37 | # Calculate mean and standard deviation of data x[]: 38 | # mean = {\sum_i x_i \over n} 39 | # std = sqrt(\sum_i (x_i - mean)^2 \over n-1) 40 | # 41 | # Based on 42 | # http://www.physics.rutgers.edu/~masud/computing/WPark_recipes_in_python.html 43 | # """ 44 | # 45 | # num = len(x) 46 | # assert num != 0 47 | # if num == 1: 48 | # return (x[0], 0.0) 49 | # 50 | # mean = sum(x)/float(num) 51 | # std = sum([(a-mean)**2 for a in x]) 52 | # std = sqrt(std / float(num-1)) 53 | # 54 | # return mean, std 55 | 56 | 57 | 58 | def now(): 59 | return strftime("%Y-%m-%d %H:%M:%S") 60 | 61 | 62 | 63 | def fisher_comb(pv1, pv2): 64 | """ 65 | Fisher's method for combining p-values 66 | 67 | See for example 68 | http://en.wikipedia.org/wiki/Fisher's_method 69 | and 70 | breseq-0.18b:polymorphism_statistics.r 71 | """ 72 | 73 | if pv1 == 0 or pv2 == 0: 74 | # not sure if this is correct. 75 | # see also http://stats.stackexchange.com/questions/58537/fishers-method-when-p-value-0 76 | return 0.0 77 | 78 | comb_log = -2.0 * (log(pv1) + log(pv2)) 79 | # http://stackoverflow.com/questions/11725115/p-value-from-chi-sq-test-statistic-in-python 80 | comb_pv = 1.0 - chi2.cdf(comb_log, 4) 81 | return comb_pv 82 | 83 | 84 | 85 | def complement(strand, na_type='DNA'): 86 | """return complement of nucleic acid seqeunce 87 | 88 | original source http://stackoverflow.com/questions/1738633/more-pythonic-way-to-find-a-complementary-dna-strand 89 | Nadia Alramli 90 | 91 | Added DNA/RNA handling 92 | 93 | >>> complement("UTAGC") 94 | 'AATCG' 95 | >>> complement("utagc") 96 | 'aatcg' 97 | >>> complement("UTAGC", na_type="RNA") 98 | 'AAUCG' 99 | >>> complement("utagc", na_type="RNA") 100 | 'aaucg' 101 | """ 102 | 103 | if na_type == 'DNA': 104 | if sys.version_info[0] > 2: 105 | tr = bytes.maketrans(b'UTAGCutagc', b'AATCGaatcg') 106 | else: 107 | tr = string.maketrans('UTAGCutagc', 'AATCGaatcg') 108 | elif na_type == 'RNA': 109 | if sys.version_info[0] > 2: 110 | tr = bytes.maketrans(b'UTAGCutagc', b'AAUCGaaucg') 111 | else: 112 | tr = string.maketrans('UTAGCutagc', 'AAUCGaaucg') 113 | else: 114 | raise ValueError("Unknown NA type %s" % na_type) 115 | return strand.translate(tr) 116 | 117 | 118 | 119 | def prob_to_phredqual(prob): 120 | """ 121 | Turns an error probability into a phred value 122 | 123 | >>> prob_to_phredqual(0.01) 124 | 20 125 | 126 | """ 127 | 128 | assert prob >= 0.0, ( 129 | "Probability can't be smaller than 0 but got %f" % prob) 130 | try: 131 | return int(round(-10.0 * log10(prob))) 132 | except ValueError: 133 | # prob is zero 134 | #return sys.maxint 135 | return MAX_INT 136 | 137 | 138 | 139 | def phredqual_to_prob(phredqual): 140 | """ 141 | Turns a phred quality into an error probability 142 | 143 | >>> '%.2f' % phredqual_to_prob(20) 144 | '0.01' 145 | 146 | """ 147 | 148 | assert isinstance(phredqual, int) 149 | #assert phredqual >= 0, ("Phred-quality must be >= 0, but is %s" % phredqual) 150 | # also works for phredqual=0 151 | return 10**(-phredqual/10.0) 152 | 153 | 154 | if __name__ == '__main__': 155 | import doctest 156 | doctest.testmod() 157 | 158 | -------------------------------------------------------------------------------- /src/tools/phased_out/vcf.py.README: -------------------------------------------------------------------------------- 1 | This is based on James Casbon's PyVCF 2 | (commit bdd950d7f9f226f17ec8ff6e87ea22be3aa0ee1f) 3 | See https://github.com/jamescasbon/PyVCF 4 | 5 | ---------------------------------------------------------------------- 6 | 7 | Copyright (c) 2011 John Dougherty 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy of 10 | this software and associated documentation files (the "Software"), to deal in 11 | the Software without restriction, including without limitation the rights to 12 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 13 | the Software, and to permit persons to whom the Software is furnished to do so, 14 | subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in all 17 | copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 21 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 22 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 23 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 24 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /src/tools/scripts/lofreq2_analyze_somatic_fn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """If you know about false negative somatic calls, find where they were lost along the way 3 | """ 4 | 5 | 6 | __author__ = "Andreas Wilm" 7 | __email__ = "wilma@gis.a-star.edu.sg" 8 | __copyright__ = "2014 Genome Institute of Singapore" 9 | __license__ = "The MIT License" 10 | 11 | 12 | 13 | import sys 14 | import argparse 15 | 16 | import vcf 17 | 18 | 19 | 20 | def cmdline_parser(): 21 | """ 22 | creates an OptionParser instance 23 | """ 24 | 25 | parser = argparse.ArgumentParser(description=__doc__) 26 | 27 | parser.add_argument("-v", "--verbose", 28 | action="store_true", 29 | dest="verbose", 30 | help="be verbose") 31 | parser.add_argument("--fn", 32 | required=True, 33 | dest="vcf_fn", 34 | help="FN vcf file") 35 | parser.add_argument("--n-rlx", 36 | required=True, 37 | dest="vcf_nrlx", 38 | help="Normal relaxed vcf file") 39 | parser.add_argument("--n-str", 40 | required=True, 41 | dest="vcf_nstr", 42 | help="Normal stringent vcf file") 43 | parser.add_argument("--t-rlx", 44 | required=True, 45 | dest="vcf_trlx", 46 | help="Tumor relaxed vcf file") 47 | parser.add_argument("--t-str", 48 | required=True, 49 | dest="vcf_tstr", 50 | help="Tumor stringent vcf file") 51 | parser.add_argument("--s-raw", 52 | required=True, 53 | dest="vcf_sraw", 54 | help="Somatic raw vcf file") 55 | parser.add_argument("--s-final", 56 | required=True, 57 | dest="vcf_sfinal", 58 | help="Somatic final vcf file") 59 | parser.add_argument("--s-final-wo-dbsnp", 60 | required=True, 61 | dest="vcf_sfinal_wo_dbsnp", 62 | help="Somatic final vcf file without dbSNP") 63 | return parser 64 | 65 | 66 | 67 | def main(): 68 | """main function 69 | """ 70 | 71 | vcf_fh = dict() 72 | #vcf_files = dict() 73 | 74 | parser = cmdline_parser() 75 | args = parser.parse_args() 76 | 77 | for (k, v) in [ 78 | ('FN', args.vcf_fn), 79 | ('normal_rlx', args.vcf_nrlx), 80 | ('normal_str', args.vcf_nstr), 81 | ('tumor_rlx', args.vcf_trlx), 82 | ('tumor_str', args.vcf_tstr), 83 | ('somatic_raw', args.vcf_sraw), 84 | ('somatic_final', args.vcf_sfinal), 85 | ('somatic_final_minus_dbsnp', args.vcf_sfinal_wo_dbsnp)]: 86 | #vcf_files[k] = v 87 | try: 88 | vcf_fh[k] = vcf.VCFReader(filename=v) 89 | except: 90 | sys.stderr.write("Reading %s failed\n" % v) 91 | raise 92 | 93 | sys.stderr.write("Analyzing FN %s and friends\n" % vcf_fh['FN'].filename) 94 | 95 | ORDER = ['normal_rlx', 'normal_str', 'tumor_rlx', 'tumor_str', 'somatic_raw', 'somatic_final', 'somatic_final_minus_dbsnp'] 96 | 97 | 98 | print("#CHROM\tPOS\tREF\tALT\t%s" % ('\t'.join(ORDER))) 99 | for fn in vcf_fh['FN']: 100 | present_in = dict() 101 | for k in ORDER: 102 | present_in[k] = 0 103 | for t in vcf_fh[k].fetch(fn.CHROM, fn.POS-1, fn.POS): 104 | assert len(fn.REF) == len(t.REF) 105 | assert len(fn.ALT)==1 106 | assert len(t.ALT)==1 107 | if t.ALT[0] == fn.ALT[0]: 108 | if t.QUAL: 109 | q = t.QUAL 110 | else: 111 | q = "." 112 | try: 113 | present_in[k] = "Q=%s;SB=%s;DP=%d;AF=%f" % (q, t.INFO['SB'], t.INFO['DP'], t.INFO['AF']) 114 | except KeyError: 115 | sys.stderr.write("Key Error. Dropping to debugger\n") 116 | import pdb; pdb.set_trace() 117 | break 118 | print("%s\t%s\t%s\t%s\t%s" % ( 119 | fn.CHROM, fn.POS, fn.REF, fn.ALT[0], '\t'.join(["%s" % present_in[k] for k in ORDER]))) 120 | 121 | 122 | if __name__ == "__main__": 123 | main() 124 | -------------------------------------------------------------------------------- /src/tools/scripts/lofreq2_indel_ovlp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Removes overlapping indels 3 | """ 4 | 5 | __author__ = "Andreas Wilm" 6 | __email__ = "wilma@gis.a-star.edu.sg" 7 | __copyright__ = "2014 Genome Institute of Singapore" 8 | __license__ = "The MIT License" 9 | 10 | 11 | #--- standard library imports 12 | # 13 | import sys 14 | from collections import namedtuple 15 | import gzip 16 | 17 | #--- third-party imports 18 | # 19 | #/ 20 | 21 | 22 | VCFEntry = namedtuple('VCFEntry', ['chrom', 'pos', 'dbsnpid', 'ref', 'alt', 'qual', 'filter', 'info']) 23 | 24 | # py2to3 25 | MAX_INT = 2147483647 26 | 27 | def write_var(var, fh=sys.stdout): 28 | var = var._replace(pos=str(var.pos)) 29 | fh.write("%s\n" % '\t'.join(var)) 30 | 31 | 32 | def vcf_line_to_var(line): 33 | fields = line.rstrip().split('\t')[:8] 34 | e = VCFEntry._make(fields) 35 | return e._replace(pos=int(e.pos)) 36 | 37 | 38 | #def var_len(var): 39 | # return abs(len(var.alt)-len(var.ref)) 40 | 41 | 42 | def af_from_var(var): 43 | for f in var.info.split(';'): 44 | if f.startswith('AF='): 45 | return float(f[3:]) 46 | return None 47 | 48 | 49 | def qual_from_var(var): 50 | """takes care of missing values, int conversion and ties in comparisons 51 | """ 52 | if var.qual==".": 53 | return MAX_INT 54 | else: 55 | # add AF to deal with ties 56 | return int(var.qual)+af_from_var(var) 57 | 58 | 59 | def overlap(v1, v2): 60 | """determine whether affected positions of two variants overlap 61 | """ 62 | 63 | #if v1.pos==4589049: 64 | # import pdb; pdb.set_trace() 65 | pos1 = set([v1.pos+i for i in range(max([len(v1.ref), len(v1.alt)]))]) 66 | pos2 = set([v2.pos+i for i in range(max([len(v2.ref), len(v2.alt)]))]) 67 | return len(pos1.intersection(pos2))>0 68 | 69 | def main(): 70 | if len(sys.argv) != 2: 71 | sys.stderr.write("FATAL: Need (one) vcf file as only argument\n") 72 | sys.exit(1) 73 | 74 | vcf = sys.argv[1] 75 | if vcf == "-": 76 | fh = sys.stdin 77 | elif vcf.endswith(".gz"): 78 | fh = gzip.open(vcf) 79 | else: 80 | fh = open(vcf) 81 | 82 | #pic_best_func = af_from_var 83 | pick_best_func = qual_from_var 84 | 85 | prev_vars = [] 86 | for line in fh: 87 | line = line.rstrip() 88 | if line.startswith('#'): 89 | print(line) 90 | continue 91 | 92 | cur_var = vcf_line_to_var(line) 93 | if False: 94 | sys.stderr.write("INFO: looking at %d:%s>%s\n" % (cur_var.pos, cur_var.ref, cur_var.alt)) 95 | sys.stderr.write("INFO: on stack: %s\n" % (', '.join(["%d:%s>%s" % (v.pos, v.ref, v.alt) for v in prev_vars]))) 96 | if len(prev_vars): 97 | if cur_var.chrom != prev_vars[-1].chrom or not overlap(prev_vars[-1], cur_var): 98 | # pick highest qual/af from stack and empty stack 99 | picked_var = sorted(prev_vars, key=lambda e: pick_best_func(e), reverse=True)[0] 100 | #if len(prev_vars)>1: 101 | # print "picked %s from %s" % (picked_var, prev_vars) 102 | write_var(picked_var) 103 | prev_vars = [] 104 | prev_vars.append(cur_var) 105 | 106 | # don't forget remaining ones 107 | picked_var = sorted(prev_vars, key=lambda e: pick_best_func(e), reverse=True)[0] 108 | write_var(picked_var) 109 | 110 | 111 | if fh != sys.stdout: 112 | fh.close() 113 | 114 | #print "%d prev_vars left" % (len(prev_vars)) 115 | 116 | if __name__ == "__main__": 117 | main() 118 | -------------------------------------------------------------------------------- /src/tools/scripts/lofreq2_local.py: -------------------------------------------------------------------------------- 1 | # add local dir to path to make source dir, i.e. not installed scripts 2 | # work straight-away 3 | 4 | import sys 5 | import os 6 | 7 | # Set sys.path/PYTHONPATH such that we find the local source dir first 8 | # by using: from lofreq_star import ... 9 | d = os.path.normpath(os.path.join( 10 | os.path.dirname(sys.argv[0]), '..')) 11 | if os.path.exists(os.path.join(d, "lofreq_star")): 12 | #sys.stderr.write("NOTE: Adding local dir %s to PYTHONPATH\n" % d) 13 | sys.path.insert(0, d) 14 | 15 | # Set PATH such that we find lofreq binary first 16 | #d = os.path.normpath(os.path.join( 17 | # os.path.dirname(sys.argv[0]), '../../lofreq')) 18 | #if os.path.exists(os.path.join(d, 'lofreq')): 19 | # #sys.stderr.write("NOTE: Adding local dir %s to PATH\n" % d) 20 | # os.environ["PATH"] = d + os.pathsep + os.environ["PATH"] 21 | 22 | # In theory need to find scripts because the main binary knows about them. However, there are circular cases where script call the binary which then can't find the scripts again (e.g. in parallel wrapper),so: 23 | # 24 | #d = os.path.normpath(os.path.join( 25 | # os.path.dirname(sys.argv[0]), '../tools/scripts')) 26 | #if os.path.exists(d): 27 | # #sys.stderr.write("NOTE: Adding local dir %s to PATH\n" % d) 28 | # os.environ["PATH"] = d + os.pathsep + os.environ["PATH"] 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /src/tools/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | # see also http://docs.python.org/distutils/setupscript.html 3 | 4 | import os 5 | import sys 6 | #import subprocess 7 | 8 | import setup_conf 9 | 10 | DEBUG = False 11 | #DEBUG = True 12 | 13 | 14 | # checks 15 | # 16 | if sys.version_info < (2 , 6): 17 | sys.stderr.write("FATAL: sorry, Python versions" 18 | " below 2.6 are not supported\n") 19 | sys.exit(1) 20 | 21 | 22 | # where modules reside: 23 | #package_dir = {'': setup_conf.PACKAGE_NAME.lower()} 24 | #package_dir = {'': ''} 25 | 26 | 27 | setup(name = setup_conf.PACKAGE_NAME, 28 | packages=[setup_conf.PACKAGE_NAME.lower()], 29 | version = setup_conf.PACKAGE_VERSION, 30 | description="Low frequency variant caller", 31 | author="Andreas Wilm", 32 | author_email=setup_conf.PACKAGE_BUGREPORT, 33 | long_description = """LoFreq-Star is a fast and sensitive variant-caller for inferring single-nucleotide variants (SNVs) from high-throughput sequencing data""", 34 | # doesn't seem to work 35 | # requires = ['pysam (>=0.7.5)', 'scipy (>=0.12.0)', 'numpy (>=1.7.1)', 'huddel'], 36 | #url='https://sourceforge.net/p/lofreq/', 37 | scripts = [ 38 | 'scripts/lofreq2_vcfplot.py', 39 | 'scripts/lofreq2_indel_ovlp.py' 40 | ], 41 | # http://pypi.python.org/pypi?%3Aaction=list_classifiers 42 | classifiers=['Environment :: Console', 43 | 'Intended Audience :: Science/Research', 44 | 'Natural Language :: English', 45 | 'Operating System :: Unix', 46 | 'Programming Language :: C', 47 | 'Programming Language :: Python :: 2.7', 48 | 'Topic :: Scientific/Engineering :: Bio-Informatics', 49 | ], 50 | keywords='bioinformatics' 51 | ) 52 | -------------------------------------------------------------------------------- /src/tools/setup_conf.py.README: -------------------------------------------------------------------------------- 1 | Automatically generated from setup_conf.py.in by autotols. 2 | Any changes made here will be overwritten! 3 | -------------------------------------------------------------------------------- /src/tools/setup_conf.py.in: -------------------------------------------------------------------------------- 1 | # automagically set by autotools 2 | PACKAGE_NAME="@PACKAGE_NAME@" 3 | PACKAGE_TARNAME="@PACKAGE_TARNAME@" 4 | PACKAGE_VERSION="@PACKAGE_VERSION@" 5 | PACKAGE_STRING="@PACKAGE_STRING@" 6 | PACKAGE_BUGREPORT="@PACKAGE_BUGREPORT@" 7 | -------------------------------------------------------------------------------- /src/uthash/.gitignore: -------------------------------------------------------------------------------- 1 | ./uthash-* 2 | 3 | -------------------------------------------------------------------------------- /src/uthash/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2005-2013, Troy D. Hanson http://troydhanson.github.com/uthash/ 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 10 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 11 | IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 12 | TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 13 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 14 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 15 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 16 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 17 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 18 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 19 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 20 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 21 | 22 | -------------------------------------------------------------------------------- /src/uthash/Makefile.am: -------------------------------------------------------------------------------- 1 | EXTRA_DIST = LICENSE 2 | noinst_HEADERS = uthash.h 3 | 4 | ## http://stackoverflow.com/questions/4316499/include-a-header-only-library-in-an-autotools-project?rq=1 5 | #install:; @: 6 | #install-exec:; @: 7 | #install-data:; @: 8 | #uninstall:; @: 9 | #clean:; @: 10 | 11 | 12 | -------------------------------------------------------------------------------- /tests/.gitignore: -------------------------------------------------------------------------------- 1 | data 2 | -------------------------------------------------------------------------------- /tests/af_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source lib.sh || exit 1 4 | 5 | KEEP_TMP=0 6 | REF=./data/af_tests/ref_fasta.fa 7 | outdir=$(mktemp -d -t $(basename $0).XXXXXX) 8 | 9 | # See ./data/af_tests/README for expected results 10 | failed=0 11 | 12 | 13 | echowarn "Only works with AQ off" 14 | 15 | 16 | # del test 17 | bam=./data/af_tests/test_deletions.bam 18 | log=$outdir/del_log.txt 19 | vcf=$outdir/del_out.vcf 20 | cmd="$LOFREQ call --call-indels --no-default-filter -A -B -f $REF -o $vcf $bam" 21 | #echodebug "cmd=$cmd" 22 | if ! eval $cmd > $log 2>&1; then 23 | echoerror "LoFreq failed. Check logfile $log. Command was $cmd" 24 | exit 1 25 | fi 26 | if ! awk '{if ($2=="1" && $4=="ACG" && $5=="A" && $8 ~ /AF=0.5/) {m=1; exit 0}} END {if (m) {exit 0} else {exit 1}}' $vcf; then 27 | echoerror "Expected deletion of AF=0.5 not found in $vcf" 28 | let failed=failed+1 29 | fi 30 | if ! awk '{if ($2=="1" && $4=="A" && $5=="T" && $8 ~ /AF=1.0/) {m=1; exit 0}} END {if (m) {exit 0} else {exit 1}}' $vcf; then 31 | echoerror "Expected SNV of AF=1.0 not found in $vcf" 32 | let failed=failed+1 33 | fi 34 | 35 | # ins test 36 | bam=./data/af_tests/test_insertion.bam 37 | log=$outdir/ins_log.txt 38 | vcf=$outdir/ins_out.vcf 39 | cmd="$LOFREQ call --call-indels --no-default-filter -a 0.5 -B -A -f $REF -o $vcf $bam" 40 | #echodebug "cmd=$cmd" 41 | if ! eval $cmd > $log 2>&1; then 42 | echoerror "LoFreq failed. Check logfile $log. Command was $cmd" 43 | exit 1 44 | fi 45 | if ! awk '{if ($2=="2" && $4=="C" && $5=="CAA" && $8 ~ /AF=0.5/) {m=1; exit 0}} END {if (m) {exit 0} else {exit 1}}' $vcf; then 46 | echoerror "Expected insertion of AF=0.5 not found in $vcf" 47 | let failed=failed+1 48 | fi 49 | if ! awk '{if ($2=="2" && $4=="C" && $5=="G" && $8 ~ /AF=0.25/) {m=1; exit 0}} END {if (m) {exit 0} else {exit 1}}' $vcf; then 50 | echoerror "Expected SNV of AF=0.25 not found in $vcf" 51 | let failed=failed+1 52 | fi 53 | 54 | 55 | # FIXME check output 56 | if [ $KEEP_TMP -ne 1 ] && [ $failed -eq 0 ]; then 57 | test -d $outdir && rm -rf $outdir 58 | fi 59 | 60 | -------------------------------------------------------------------------------- /tests/alnqual.sh.FIXME: -------------------------------------------------------------------------------- 1 | read with ID in cigar should get baq and a[id] tags 2 | read without shouldnt -------------------------------------------------------------------------------- /tests/bamstats.sh.FIXME: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Test that we get the number of expected SNVs on the pseudo-clonal data-set 4 | 5 | source lib.sh || exit 1 6 | 7 | 8 | basedir=data/bamstats 9 | bam=$basedir/bamstats.bam 10 | reffa=$basedir/bamstats.fa 11 | truebamstats=$basedir/bamstats.expected.bamstats 12 | 13 | outdir=$(mktemp -d -t $(basename $0).XXXXXX) 14 | outbamstats=$outdir/bamstats.txt 15 | log=$outdir/log.txt 16 | 17 | KEEP_TMP=0 18 | 19 | cmd="$LOFREQ bamstats -f $reffa -o $outbamstats $bam" 20 | if ! eval $cmd >> $log 2>&1; then 21 | echoerror "The following command failed (see $log for more): $cmd" 22 | exit 1 23 | fi 24 | 25 | if ! diff -q $outbamstats $truebamstats; then 26 | echoerror "Output differs from expected output ($outbamstats differs from $truebamstats)" 27 | exit 1 28 | else 29 | echook "Got expected output" 30 | fi 31 | 32 | 33 | 34 | if [ $KEEP_TMP -eq 1 ]; then 35 | echowarn "Not deleting tmp dir $outdir" 36 | else 37 | rm $outdir/* 38 | rmdir $outdir 39 | fi 40 | 41 | -------------------------------------------------------------------------------- /tests/baq-calls-less-than-nobaq.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Test that we get the number of expected SNVs on the pseudo-clonal data-set 4 | 5 | source lib.sh || exit 1 6 | 7 | 8 | basedir=data/denv2-pseudoclonal 9 | bam=$basedir/denv2-pseudoclonal.bam 10 | reffa=$basedir/denv2-pseudoclonal_cons.fa 11 | bed=$basedir/denv2-pseudoclonal_incl.bed 12 | #truesnv=$basedir/denv2-pseudoclonal_true-snp.vcf 13 | 14 | outdir=$(mktemp -d -t $(basename $0).XXXXXX) 15 | outraw_nobaq=$outdir/raw_nobaq.vcf 16 | outraw_baq=$outdir/raw_baq.vcf.gz 17 | log=$outdir/log.txt 18 | 19 | KEEP_TMP=0 20 | 21 | cmd="$LOFREQ call -B -f $reffa -l $bed -o $outraw_nobaq $bam" 22 | if ! eval $cmd >> $log 2>&1; then 23 | echoerror "The following command failed (see $log for more): $cmd" 24 | exit 1 25 | fi 26 | 27 | cmd="$LOFREQ call -f $reffa -l $bed -o $outraw_baq $bam" 28 | if ! eval $cmd >> $log 2>&1; then 29 | echoerror "The following command failed (see $log for more): $cmd" 30 | exit 1 31 | fi 32 | 33 | 34 | ndiff=$($LOFREQ vcfset -a complement -1 $outraw_nobaq -2 $outraw_baq | grep -c '^[^#]') 35 | if [ $ndiff -lt 1 ]; then 36 | echoerror "Expected more SNVs with BAQ switched off (check $outraw_nobaq and $outraw_baq)" 37 | exit 1 38 | else 39 | echook "Got $ndiff more SNVs if BAQ is off" 40 | fi 41 | 42 | 43 | 44 | if [ $KEEP_TMP -eq 1 ]; then 45 | echowarn "Not deleting tmp dir $outdir" 46 | else 47 | rm $outdir/* 48 | rmdir $outdir 49 | fi 50 | 51 | -------------------------------------------------------------------------------- /tests/bed.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source lib.sh || exit 1 4 | 5 | 6 | python -c 'import sys; sys.path.insert(0, "../src/scripts/");import lofreq2_call_pparallel; print("\n".join([str(x) for x in lofreq2_call_pparallel.read_bed_coords("data/reg.bed")]))' > /dev/null 7 | if [ $? -eq 0 ]; then 8 | echook "bed reading function works" 9 | else 10 | echoerror "bed reading function failed" 11 | exit 1 12 | fi 13 | 14 | -------------------------------------------------------------------------------- /tests/bgzf_getline.supp: -------------------------------------------------------------------------------- 1 | { 2 | htslib bgzf_getline leak realloc and malloc 3 | Memcheck:Leak 4 | fun:malloc 5 | fun:realloc 6 | fun:bgzf_getline 7 | fun:tbx_readrec 8 | fun:hts_itr_next 9 | fun:main_vcfset 10 | fun:main 11 | } 12 | 13 | { 14 | htslib bgzf_getline leak realloc only 15 | Memcheck:Leak 16 | fun:realloc 17 | fun:bgzf_getline 18 | fun:tbx_readrec 19 | fun:hts_itr_next 20 | fun:main_vcfset 21 | fun:main 22 | } 23 | -------------------------------------------------------------------------------- /tests/binom_vs_poisson.FIXME: -------------------------------------------------------------------------------- 1 | binom_sf should be the same as poissbin 2 | Is that trye only for small numbers? 3 | See pseudo_binomial() in snpcaller.c 4 | -------------------------------------------------------------------------------- /tests/bonf_auto_vs_dyn.sh: -------------------------------------------------------------------------------- 1 | #a!/bin/bash 2 | 3 | # Call SNVs on a BAM fule with full coverage and change bonf settings. 4 | # Different settings (auto, dynamic and hard-coded) should give 5 | # identical results here. 6 | 7 | source lib.sh || exit 1 8 | 9 | basedir=data/denv2-pseudoclonal 10 | bed=$basedir/denv2-pseudoclonal_incl.bed 11 | bam=$basedir/denv2-pseudoclonal.bam 12 | reffa=$basedir/denv2-pseudoclonal_cons.fa 13 | 14 | outdir=$(mktemp -d -t $(basename $0).XXXXXX) 15 | # not supported anymore out_auto=$outdir/snv_auto.vcf 16 | out_dynamic=$outdir/snv_dynamic.vcf.gz 17 | # bed_len.sh $be;# = 9909 * 3 = 29727 18 | out_29727=$outdir/snv_29727.vcf.gz 19 | log=$outdir/log.txt 20 | 21 | KEEP_TMP=0 22 | 23 | #cmd="$LOFREQ call -l $bed -b auto -f $reffa -o $out_auto $bam" 24 | #if ! eval $cmd >> $log 2>&1; then 25 | # echoerror "The following command failed (see $log for more): $cmd" 26 | # exit 1 27 | #fi 28 | 29 | cmd="$LOFREQ call -l $bed -b dynamic -f $reffa -o $out_dynamic $bam" 30 | if ! eval $cmd >> $log 2>&1; then 31 | echoerror "The following command failed (see $log for more): $cmd" 32 | exit 1 33 | fi 34 | 35 | cmd="$LOFREQ call -l $bed -b 29727 -f $reffa -o $out_29727 $bam" 36 | if ! eval $cmd >> $log 2>&1; then 37 | echoerror "The following command failed (see $log for more): $cmd" 38 | exit 1 39 | fi 40 | 41 | #echodebug "All calls done. No checking results" 42 | 43 | # make sure we got at least some snvs 44 | # 45 | #if [ $(grep -c '^[^#]' $out_auto) -eq 0 ]; then 46 | # echoerror "No SNVs predicted" 47 | # exit 1 48 | #fi 49 | 50 | #echodebug "out_auto=$out_auto" 51 | #echodebug "out_dynamic=$out_dynamic" 52 | #echodebug "out_29727=$out_29727" 53 | 54 | #ndiff=$($LOFREQ vcfset -a complement -1 $out_auto -2 $out_dynamic 2>>$log | grep -c '^[^#]') 55 | #if [ $ndiff -ne 0 ]; then 56 | # echoerror "Found differences between bonf auto and bonf dynamic outputs" 57 | # exit 1 58 | #fi 59 | #ndiff=$($LOFREQ vcfset -a complement -2 $out_dynamic -1 $out_auto 2>>$log | grep -c '^[^#]') 60 | #if [ $ndiff -ne 0 ]; then 61 | # echoerror "Found differences between bonf auto and bonf dynamic outputs" 62 | # exit 1 63 | #fi 64 | 65 | #ndiff=$($LOFREQ vcfset -a complement -1 $out_auto -2 $out_29727 2>>$log | grep -c '^[^#]') 66 | #if [ $ndiff -ne 0 ]; then 67 | # echoerror "Found differences between bonf auto and bonf 29727 outputs" 68 | # exit 1 69 | #fi 70 | 71 | ndiff=$($LOFREQ vcfset -a complement -2 $out_29727 -1 $out_dynamic 2>>$log | grep -c '^[^#]') 72 | if [ $ndiff -ne 0 ]; then 73 | echoerror "Found differences between bonf dynamic and bonf 29727 outputs" 74 | exit 1 75 | fi 76 | 77 | echook "Tests passed" 78 | 79 | if [ $KEEP_TMP -eq 1 ]; then 80 | echowarn "Not deleting tmp dir $outdir" 81 | else 82 | rm $outdir/* 83 | rmdir $outdir 84 | fi 85 | -------------------------------------------------------------------------------- /tests/consvar_noqual_filter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # FIXME:add-doc 4 | 5 | source lib.sh || exit 1 6 | 7 | VCF=data/vcf/consvar_only.vcf.gz 8 | 9 | num_in=$(zgrep -vc '^#' $VCF) 10 | num_out=$($LOFREQ filter --snvqual-thresh 1 --no-defaults -i $VCF | grep -vc '^#') 11 | if [ $num_in -ne $num_out ]; then 12 | echoerror "Some CONSVARs were filtered by snvqual-thresh." 13 | else 14 | echook "CONSVARs untouched by snvqual-thresh filtering" 15 | fi 16 | 17 | -------------------------------------------------------------------------------- /tests/denv2-pseudoclonal-source-qual.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Test that we get the number of expected SNVs on the pseudo-clonal data-set 4 | 5 | source lib.sh || exit 1 6 | 7 | 8 | basedir=data/denv2-pseudoclonal 9 | bam=$basedir/denv2-pseudoclonal.bam 10 | reffa=$basedir/denv2-pseudoclonal_cons.fa 11 | bed=$basedir/denv2-pseudoclonal_incl.bed 12 | truesnv=$basedir/denv2-pseudoclonal_true-snp.vcf.gz 13 | 14 | outdir=$(mktemp -d -t $(basename $0).XXXXXX) 15 | outraw_def=$outdir/raw_def.vcf 16 | outfinal_def=$outdir/final_def.vcf 17 | log=$outdir/log.txt 18 | 19 | KEEP_TMP=0 20 | 21 | cmd="$LOFREQ call -b dynamic -f $reffa -l $bed -o $outraw_def -s -S $truesnv $bam" 22 | if ! eval $cmd >> $log 2>&1; then 23 | echoerror "The following command failed (see $log for more): $cmd" 24 | exit 1 25 | fi 26 | cmd="$LOFREQ filter -i $outraw_def -o $outfinal_def" 27 | if ! eval $cmd >> $log 2>&1; then 28 | echoerror "The following command failed (see $log for more): $cmd" 29 | exit 1 30 | fi 31 | 32 | 33 | ndiff=$($LOFREQ vcfset -a complement -1 $outfinal_def -2 $truesnv | grep -c '^[^#]') 34 | if [ $ndiff -ne 0 ]; then 35 | echoerror "Found FP SNVs (not part of the list of true SNVs)" 36 | exit 1 37 | fi 38 | 39 | ndiff=$($LOFREQ vcfset -a intersect -1 $outfinal_def -2 $truesnv | grep -c '^[^#]') 40 | #nexp=229 41 | nexp=219;# FIXME not sure if this is the exact number but this is what I saw first running src qual on this data-set 42 | if [ $ndiff -lt $nexp ]; then 43 | echoerror "Expected $nexp TP SNVs but got $ndiff" 44 | exit 1 45 | fi 46 | 47 | 48 | echook "Tests passed" 49 | 50 | if [ $KEEP_TMP -eq 1 ]; then 51 | echowarn "Not deleting tmp dir $outdir" 52 | else 53 | rm $outdir/* 54 | rmdir $outdir 55 | fi 56 | 57 | -------------------------------------------------------------------------------- /tests/denv2-pseudoclonal.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Test that we get the number of expected SNVs on the pseudo-clonal data-set 4 | 5 | source lib.sh || exit 1 6 | 7 | 8 | basedir=data/denv2-pseudoclonal 9 | bam=$basedir/denv2-pseudoclonal.bam 10 | reffa=$basedir/denv2-pseudoclonal_cons.fa 11 | bed=$basedir/denv2-pseudoclonal_incl.bed 12 | truesnv=$basedir/denv2-pseudoclonal_true-snp.vcf.gz 13 | 14 | outdir=$(mktemp -d -t $(basename $0).XXXXXX) 15 | outraw_def=$outdir/raw_def.vcf 16 | outfinal_def=$outdir/final_def.vcf 17 | log=$outdir/log.txt 18 | 19 | KEEP_TMP=0 20 | 21 | cmd="$LOFREQ call -b dynamic -f $reffa -l $bed -o $outraw_def $bam" 22 | if ! eval $cmd >> $log 2>&1; then 23 | echoerror "The following command failed (see $log for more): $cmd" 24 | exit 1 25 | fi 26 | cmd="$LOFREQ filter -i $outraw_def -o $outfinal_def" 27 | if ! eval $cmd >> $log 2>&1; then 28 | echoerror "The following command failed (see $log for more): $cmd" 29 | exit 1 30 | fi 31 | 32 | 33 | ndiff=$($LOFREQ vcfset -a complement -1 $outfinal_def -2 $truesnv | grep -c '^[^#]') 34 | if [ $ndiff -ne 0 ]; then 35 | echoerror "Found FP SNVs (not part of the list of true SNVs). Check $outdir" 36 | exit 1 37 | fi 38 | 39 | ndiff=$($LOFREQ vcfset -a intersect -1 $outfinal_def -2 $truesnv | grep -c '^[^#]') 40 | nexp=229 41 | if [ $ndiff -lt $nexp ]; then 42 | echoerror "Expected $nexp TP SNVs but got $ndiff. Check $outdir" 43 | exit 1 44 | fi 45 | 46 | 47 | echook "Tests passed" 48 | 49 | if [ $KEEP_TMP -eq 1 ]; then 50 | echowarn "Not deleting tmp dir $outdir" 51 | else 52 | rm $outdir/* 53 | rmdir $outdir 54 | fi 55 | 56 | -------------------------------------------------------------------------------- /tests/denv2-simulation.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Call SNVs on simulated data and make sure we got the expected number 4 | # of SNVs 5 | 6 | source lib.sh || exit 1 7 | 8 | basedir=data/denv2-simulation 9 | bam=$basedir/denv2-10haplo.bam 10 | reffa=$basedir/denv2-refseq.fa 11 | truesnv=$basedir/denv2-10haplo_true-snp.vcf.gz 12 | # samtools mpileup $bam | wc -l;# *3 13 | bonf=32169 14 | 15 | outdir=$(mktemp -d -t $(basename $0).XXXXXX) 16 | outraw_def=$outdir/raw_def.vcf 17 | outfinal_def=$outdir/final_def.vcf.gz;# bgzip for complement 18 | outraw_nomq=$outdir/raw_nomq.vcf 19 | outfinal_nomq=$outdir/final_nomq.vcf.gz;# bgzip for complement 20 | log=$outdir/log.txt 21 | 22 | KEEP_TMP=0 23 | if [ $KEEP_TMP -eq 1 ]; then 24 | echowarn "Keeping tmp dir $outdir" 25 | fi 26 | 27 | cmd="$LOFREQ call -B -b $bonf -f $reffa -o $outraw_def $bam" 28 | if ! eval $cmd >> $log 2>&1; then 29 | echoerror "The following command failed (see $log for more): $cmd" 30 | exit 1 31 | fi 32 | cmd="$LOFREQ filter -i $outraw_def -o $outfinal_def" 33 | if ! eval $cmd >> $log 2>&1; then 34 | echoerror "The following command failed (see $log for more): $cmd" 35 | exit 1 36 | fi 37 | 38 | 39 | cmd="$LOFREQ call -B -b $bonf -f $reffa -o $outraw_nomq -N $bam" 40 | if ! eval $cmd >> $log 2>&1; then 41 | echoerror "The following command failed (see $log for more): $cmd" 42 | exit 1 43 | fi 44 | cmd="$LOFREQ filter -i $outraw_nomq -o $outfinal_nomq" 45 | if ! eval $cmd >> $log 2>&1; then 46 | echoerror "The following command failed (see $log for more): $cmd" 47 | exit 1 48 | fi 49 | 50 | 51 | #nexp=$(grep -v -c '^#' $truesnv) 52 | #nfinal_def=$(grep -v -c '^#' $outfinal_def) 53 | #nfinal_nomq=$(grep -v -c '^#' $outfinal_nomq) 54 | #echodebug "nexp=$nexp nfinal_def=$nfinal_def $nfinal_nomq=$nfinal_nomq" 55 | 56 | 57 | ndiff=$($LOFREQ vcfset -a complement -1 $outfinal_def -2 $truesnv | grep -c '^[^#]') 58 | if [ $ndiff -ne 0 ]; then 59 | echoerror "Found extra SNVs in default predictions, which are not part of the list of true SNVs" 60 | exit 1 61 | fi 62 | ndiff=$($LOFREQ vcfset -a complement -2 $outfinal_def -1 $truesnv | grep -c '^[^#]') 63 | nexp=15 64 | # BAQ on: 19 65 | # BAQ off: 15 66 | if [ $ndiff -ne $nexp ]; then 67 | echoerror "Expected $nexp missing SNVs in default predictions but got $ndiff" 68 | exit 1 69 | fi 70 | 71 | 72 | 73 | ndiff=$($LOFREQ vcfset -a complement -1 $outfinal_nomq -2 $truesnv | grep -c '^[^#]') 74 | if [ $ndiff -ne 0 ]; then 75 | echoerror "Found extra SNVs in no-mq predictions, which are not part of the list of true SNVs" 76 | exit 1 77 | fi 78 | ndiff=$($LOFREQ vcfset -a complement -2 $outfinal_nomq -1 $truesnv | grep -c '^[^#]') 79 | nexp=11 80 | # BAQ on: 14 81 | # BAQ off: 11 82 | if [ $ndiff -ne $nexp ]; then 83 | echoerror "Expected $nexp missing SNVs in no-mq predictions but got $ndiff" 84 | exit 1 85 | fi 86 | 87 | 88 | # FIXME outfinal should not look different, i.e. filtering shouldn't do much/anything. 89 | # see /home/wilma/snpcaller/lofreq/lofreq-sourceforge.git/tests/denv2-simulation.sh 90 | 91 | echook "Tests passed" 92 | 93 | if [ $KEEP_TMP -ne 1 ]; then 94 | rm $outdir/* 95 | rmdir $outdir 96 | fi 97 | 98 | -------------------------------------------------------------------------------- /tests/denv2-validation.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Test that we get the number of expected SNVs on the pseudo-clonal data-set 4 | 5 | source lib.sh || exit 1 6 | 7 | basedir=./data/denv2-dpcr-validated 8 | bam1=$basedir/CTTGTA_2_remap_razers-i92_peakrem_corr.bam 9 | bam2=$basedir/GGCTAC_2_remap_razers-i92_peakrem_corr.bam 10 | reffa=$basedir/consensus.fa 11 | 12 | outdir=$(mktemp -d -t $(basename $0).XXXXXX) 13 | vcfout1=$outdir/$(basename $bam1 .bam).vcf 14 | vcfout2=$outdir/$(basename $bam2 .bam).vcf.gz;# 2nd file has to be bgzipped for vcfset to work 15 | vcfinter=$outdir/intersection.vcf 16 | 17 | log=$outdir/log.txt 18 | 19 | KEEP_TMP=0 20 | 21 | # true var 1687 in $bam1 has Q62 which becomes 2% after bonf correction 22 | # i.e. default -a 0.01 swallows it 23 | 24 | cmd="$LOFREQ call -a 0.05 -B -f $reffa -o $vcfout1 $bam1" 25 | if ! eval $cmd >> $log 2>&1; then 26 | echoerror "The following command failed (see $log for more): $cmd" 27 | exit 1 28 | fi 29 | 30 | cmd="$LOFREQ call -a 0.05 -B -f $reffa -o $vcfout2 $bam2" 31 | if ! eval $cmd >> $log 2>&1; then 32 | echoerror "The following command failed (see $log for more): $cmd" 33 | exit 1 34 | fi 35 | 36 | cmd="$LOFREQ vcfset -a intersect -1 $vcfout1 -2 $vcfout2 -o $vcfinter" 37 | if ! eval $cmd >> $log 2>&1; then 38 | echoerror "The following command failed (see $log for more): $cmd" 39 | exit 1 40 | fi 41 | 42 | N_PRESENT=7 43 | n_present=$(for pos in 5914 6843 598 5025 1687 9941 4828; do grep "^consensus[^0-9]*$pos" $vcfinter; done | wc -l) 44 | N_ABSENT=0 45 | n_absent=$(for pos in 7035 7404; do grep "^consensus[^0-9]*$pos" $vcfinter; done | wc -l) 46 | 47 | if [ $n_present -ne $N_PRESENT ]; then 48 | echoerror "Expected $N_PRESENT but got $n_present SNVs (see $outdir)" 49 | exit 1 50 | fi 51 | 52 | if [ $n_absent -ne $N_ABSENT ]; then 53 | echoerror "Expected $N_ABSENT but got $n_absent SNVs (see $outdir)" 54 | exit 1 55 | fi 56 | 57 | echook "Got expected number of present/absent SNVs" 58 | 59 | if [ $KEEP_TMP -eq 1 ]; then 60 | echowarn "Not deleting tmp dir $outdir" 61 | else 62 | rm $outdir/* 63 | rmdir $outdir 64 | fi 65 | 66 | -------------------------------------------------------------------------------- /tests/diff_opts_same_out.sh.OLD: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Test to make sure that paralell computed results are identical to 4 | # the ones computed without paralllel option and that reading from 5 | # stdin also results in same result 6 | 7 | source lib.sh || exit 1 8 | 9 | 10 | echowarn "Better use a less high coverage data-set for faster completion" 11 | indir=../../lofreq-test-data/denv2-pseudoclonal/ 12 | bam=$indir/denv2-pseudoclonal.bam 13 | ref=$indir/denv2-pseudoclonal_cons.fa 14 | bed=$indir/denv2-pseudoclonal_incl.fake.bed 15 | 16 | 17 | CMD[1]="$LOFREQ call -l $bed -f $ref --verbose $bam" 18 | CMD[2]="$LOFREQ call -l $bed -f $ref --verbose --pseudo-parallel 4 $bam" 19 | CMD[3]="cat $bam | $LOFREQ call -f $ref -l $bed --verbose -" 20 | # cannot: CMD[4]="cat $bam | $LOFREQ call -f $ref -l $bed --verbose --pseudo-parallel 4 -" 21 | for i in $($seq 1 ${#CMD[@]}); do 22 | cmd=${CMD[$i]} 23 | out=$(mktemp -t $(basename $0).XXXXXX.vcf) 24 | log=$(mktemp -t $(basename $0).XXXXXX.log) 25 | 26 | #echodebug "Executing $cmd with output going to $out and $log" 27 | # remove source line from vcf which will change depending on call 28 | if ! eval $cmd 2>$log | grep -v 'source' >$out ; then 29 | echoerror "Executing following command failed (see $log for more info): $cmd" 30 | exit 1 31 | fi 32 | 33 | # make sure we predicted at least one snv. if output is always 34 | # empty tests would be successful otherwise 35 | if ! grep -q DP4 $out; then 36 | echoerror "No SNVs in output file $out found" 37 | exit 1 38 | fi 39 | 40 | # compare to output of previous cmd 41 | if [ -n "$prevout" ]; then 42 | if ! diff -q $out $prevout; then 43 | echoerror "Results between runs differed. Commands were:" 44 | echoerror " Current cmd: $cmd" 45 | echoerror " Current out: $out" 46 | echoerror " Previous cmd: $prevcmd" 47 | echoerror " Previous out: $prevout" 48 | exit 1 49 | fi 50 | fi 51 | 52 | prevcmd=$cmd 53 | test -s "$prevlog" && rm $prevlog 54 | test -s "$prevout" && rm $prevout 55 | prevlog=$log 56 | prevout=$out 57 | done 58 | test -s "$prevlog" && rm $prevlog 59 | test -s "$prevout" && rm $prevout 60 | 61 | 62 | 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /tests/doctest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | myname=$(basename $0) 4 | 5 | source lib.sh || exit 1 6 | 7 | PY_DIRS="../src/tools/lofreq_star" 8 | files=$(find $PY_DIRS -name \*py -not -name _\*) 9 | for f in $files; do 10 | echo "$myname: testing $f" 11 | python $f || echoerror "testing $f failed" 12 | done 13 | 14 | for f in $files; do 15 | echo "$myname: testing $f" 16 | python -m doctest $f || echoerror "testing $f failed" 17 | done 18 | -------------------------------------------------------------------------------- /tests/ecoli-clone_incl_parallel.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Test that we get the number of expected SNVs on a clonal data-set 4 | # and also check whether running it in parallel (single genome!) works 5 | # and produces same results 6 | 7 | source lib.sh || exit 1 8 | 9 | 10 | basedir=data/ecoli-clone/ 11 | bam=$basedir/clone/EAS20_8.bwamem_pe.viterbi.mdups.realn.recal.bam 12 | reffa=$basedir/ref/Ecoli_K12_MG1655_NC_000913.fa 13 | #truesnv=$basedir/denv2-pseudoclonal_true-snp.vcf 14 | 15 | outdir=$(mktemp -d -t $(basename $0).XXXXXX) 16 | outvcf_p=$outdir/$(basename $bam .bam)_parallel.vcf.gz 17 | outvcf_s=$outdir/$(basename $bam .bam)_single.vcf.gz 18 | log=$outdir/log.txt 19 | 20 | KEEP_TMP=0 21 | 22 | cmd="$LOFREQ call-parallel --pp-threads $threads -f $reffa -o $outvcf_p $bam" 23 | #echodebug "cmd=$cmd" 24 | if ! eval $cmd >> $log 2>&1; then 25 | echoerror "The following command failed (see $log for more): $cmd" 26 | exit 1 27 | fi 28 | 29 | 30 | #MAX_SNVS=20 31 | MAX_SNVS=50 32 | 33 | 34 | # run in parallel (should work in single chromosome) 35 | # 36 | nsnvs=$(grep -c '^[^#]' $outvcf_p) 37 | if [ $nsnvs -ge $MAX_SNVS ]; then 38 | echoerror "Expected less then $MAX_SNVS on this clonal dataset but got $nsnvs (see $outdir)" 39 | exit 1 40 | else 41 | echook "Got $nsnvs SNVs for this clonal dataset which is okay (below limit of $MAX_SNVS)" 42 | fi 43 | 44 | 45 | 46 | # run single and compare results 47 | # 48 | cmd="$LOFREQ call -f $reffa -o $outvcf_s $bam" 49 | #echodebug "cmd=$cmd" 50 | if ! eval $cmd >> $log 2>&1; then 51 | echoerror "The following command failed (see $log for more): $cmd" 52 | exit 1 53 | fi 54 | 55 | 56 | nus=$($LOFREQ vcfset -a complement -1 $outvcf_s -2 $outvcf_p --count-only) 57 | nup=$($LOFREQ vcfset -a complement -2 $outvcf_s -1 $outvcf_p --count-only) 58 | # allowing one border line difference 59 | if [ $nus -gt 1 ] || [ $nup -gt 1 ]; then 60 | echoerror "Observed differences between parallel ($nup unique vars) and single ($nus unique vars) results. Check $outvcf_p and $outvcf_s" 61 | exit 1 62 | fi 63 | 64 | 65 | if [ $KEEP_TMP -eq 1 ]; then 66 | echowarn "Not deleting tmp dir $outdir" 67 | else 68 | rm $outdir/* 69 | rmdir $outdir 70 | fi 71 | -------------------------------------------------------------------------------- /tests/ecoli_spikein.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source lib.sh || exit 1 4 | 5 | KEEP_TMP=1 6 | BASEDIR=data/ecoli-clone/ 7 | BAM=$BASEDIR/spike-in/spike-in.bam 8 | REF=$BASEDIR/ref/Ecoli_K12_MG1655_NC_000913.fa 9 | TRUTH=$BASEDIR/spike-in/truth.laln.vcf.gz 10 | EVALUATOR=data/icgc-tcga-dream-support/evaluator.py 11 | 12 | for f in $BAM $REF $TRUTH $EVALUATOR; do 13 | if [ ! -e $f ]; then 14 | echoerror "Required file $f missing" 15 | exit 1 16 | fi 17 | done 18 | 19 | 20 | outdir=$(mktemp -d -t $(basename $0).XXXXXX) 21 | log=$outdir/log.txt 22 | outvcf=$outdir/out.vcf 23 | num_err=0 24 | 25 | cmd="$LOFREQ call-parallel --pp-threads 8 -f $REF -o $outvcf --verbose $BAM" 26 | # only needed as long as indels are disabled by default 27 | cmd="$cmd --call-indels" 28 | echodebug "cmd=$cmd" 29 | if ! eval $cmd > $log 2>&1; then 30 | echoerror "LoFreq failed. Check logfile $log. Command was $cmd" 31 | exit 1 32 | fi 33 | 34 | 35 | # this data set was created by running bamsurgeon addsnv first followed by addindel. 36 | # since variants were dense and bamsurgeon stupid it replaces already inserted variants 37 | # added in the first step and recall is low 38 | # took results from v2.1.2a-87-g2d53817-dirty -1% 39 | res_ll=$($EVALUATOR -v $outvcf -t $TRUTH -m SNV | awk 'END {print $NF}') || exit 1 40 | res=$(echo $res_ll | \ 41 | awk -F, '{prec=$1; rec=$2; if (prec<0.945 || rec<0.664) {status="ERROR"} else {status="OK"} printf "%s: precision=%f recall=%f\n", status, prec, rec}') || exit 1 42 | if echo $res | grep -q ERROR; then 43 | let num_err=num_err+1 44 | fi 45 | echo "snvs: $res" 1>&2 46 | 47 | 48 | # based on results for v2.1.2a-69-g5bd5919 -1% 49 | res_ll=$($EVALUATOR -f $REF -v $outvcf -t $TRUTH -m INDEL | awk 'END {print $NF}') || exit 1 50 | res=$(echo $res_ll | \ 51 | awk -F, '{prec=$1; rec=$2; if (prec<0.956 || rec<0.917) {status="ERROR"} else {status="OK"} printf "%s: precision=%f recall=%f\n", status, prec, rec}') || exit 1 52 | if echo $res | grep -q ERROR; then 53 | let num_err=num_err+1 54 | fi 55 | echo "indels: $res" 1>&2 56 | 57 | 58 | 59 | if [ $KEEP_TMP -ne 1 ] && [ $num_err -eq 0 ]; then 60 | test -d $outdir && rm -rf $outdir 61 | else 62 | echowarn "Not deleting temporary output directory $outdir" 63 | fi 64 | if [ $num_err -ne 0 ]; then 65 | exit 1 66 | fi 67 | -------------------------------------------------------------------------------- /tests/exome_in_silico.sh.FIXME: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/tests/exome_in_silico.sh.FIXME -------------------------------------------------------------------------------- /tests/faidx_fetch_seq.supp: -------------------------------------------------------------------------------- 1 | { 2 | faidx_fetch_seq leak 3 | Memcheck:Leak 4 | fun:malloc 5 | fun:faidx_fetch_seq 6 | fun:mplp_func 7 | fun:bam_plp_auto 8 | fun:bam_mplp_auto 9 | fun:mpileup 10 | fun:main_call 11 | fun:main 12 | } 13 | -------------------------------------------------------------------------------- /tests/fdr.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source lib.sh || exit 1 4 | 5 | VCF=data/vcf/fdr.vcf 6 | # 25 simulated variants extracted from mq-demo. 7 | # converted probabilities given in fdr example in 8 | # http://www.biostathandbook.com/multiplecomparisons.html 9 | # (see also multtest.c) to qualities and replace original values. 10 | 11 | 12 | # expecting following result which mimicks the same as in link and in multtest.c 13 | # expecting 5 significant results 14 | NEXP=5 15 | nres=$($LOFREQ filter --no-defaults -q fdr -r 0.25 -i $VCF | grep -vc '^#') 16 | if [ $nres -ne $NEXP ]; then 17 | echoerror "FDR filtering not producing expected results (got $nres instead of $NEXP)" 18 | exit 1 19 | fi 20 | 21 | # even after capping and setting #tests 22 | vcf=$(mktemp -t $(basename $0).XXXXXX.vcf) 23 | head -n 11 $VCF > $vcf 24 | nres=$($LOFREQ filter --no-defaults -q fdr -r 0.25 -s 25 -i $vcf | grep -vc '^#') 25 | if [ $nres -ne $NEXP ]; then 26 | echoerror "FDR filtering after capping not producing expected results (got $nres instead of $NEXP)" 27 | exit 1 28 | fi 29 | echook "FDR filtering produced expected results" 30 | -------------------------------------------------------------------------------- /tests/filter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Test that we get the number of expected SNVs on the pseudo-clonal data-set 4 | 5 | source lib.sh || exit 1 6 | 7 | 8 | vcf=data/vcf/CTTGTA_2_remap_razers-i92_peakrem_corr_nodeff.vcf.gz 9 | #outvcf=$(mktemp -t $(basename $0).XXXXXX) 10 | 11 | # FIXME base_cmd="$LOFREQ filter -i $vcf --no-defaults -o -" 12 | #base_cmd="../src/lofreq/lofreq_filter -i $vcf -o -" 13 | base_cmd="$LOFREQ filter -i $vcf -o -" 14 | 15 | ALPHA_LIST='0.01 0.0001 0.000001 0.00000001' 16 | NUMTEST_LIST='100 10000 1000000' 17 | 18 | 19 | 20 | # snv quality with varying alpha 21 | # 22 | num_fail=0 23 | for cor in "bonf" "holm-bonf" "fdr"; do 24 | last_no=0 25 | for a in $ALPHA_LIST; do 26 | #cmd="$base_cmd --snv-qual $cor --snv-qual-alpha $a" 27 | cmd="$base_cmd --snvqual-mtc $cor --snvqual-alpha $a" 28 | #echodebug "cmd=$cmd" 29 | new_no=$(eval $cmd | grep -c 'snvqual.*\(bonf\|fdr\)') || exit 1 30 | #echodebug "$cor a=$a: new_no=$new_no last_no=$last_no";# cmd = $cmd" 31 | if [ $new_no -lt $last_no ]; then 32 | echoerror "snvqual: Got fewer SNVs when filtering with higher alpha (cmd=$cmd)" 33 | let num_fail=num_fail+1 34 | fi 35 | last_no=$new_no 36 | done 37 | done 38 | if [ $num_fail -eq 0 ]; then 39 | echook "snvqual (var alpha): all tests passed" 40 | else 41 | echoerror "snvqual (var alpha): $num_fail tests failed" 42 | fi 43 | 44 | 45 | # snv quality with varying num_tests 46 | # 47 | # fixed alpha 48 | num_fail=0 49 | a=0.00000001 50 | for cor in "bonf" "holm-bonf" "fdr"; do 51 | last_no=0 52 | for n in $NUMTEST_LIST; do 53 | #cmd="$base_cmd --snv-qual $cor --snv-qual-alpha $a --snv-qual-numtests $n" 54 | cmd="$base_cmd --snvqual-mtc $cor --snvqual-alpha $a --snvqual-ntests $n" 55 | #echodebug "cmd=$cmd" 56 | new_no=$(eval $cmd | grep -c 'snvqual.*\(bonf\|fdr\)') 57 | #echodebug "$cor a=$a n=$n: new_no=$new_no last_no=$last_no";# cmd = $cmd" 58 | if [ $new_no -lt $last_no ]; then 59 | echoerror "snvqual: Got fewer SNVs when filtering with higher num-tests (cmd=$cmd)" 60 | let num_fail=num_fail+1 61 | fi 62 | last_no=$new_no 63 | done 64 | done 65 | if [ $num_fail -eq 0 ]; then 66 | echook "snvqual (var num_tests): all tests passed" 67 | else 68 | echoerror "snvqual (var num_tests): $num_fail tests failed" 69 | fi 70 | 71 | 72 | 73 | # strandbias quality with varying alpha 74 | # 75 | num_fail=0 76 | for cor in "bonf" "holm-bonf"; do 77 | last_no=100000 78 | for a in $ALPHA_LIST; do 79 | #cmd="$base_cmd --strandbias $cor --strandbias-alpha $a" 80 | cmd="$base_cmd --sb-mtc $cor --sb-alpha $a" 81 | #echodebug "cmd=$cmd" 82 | new_no=$(eval $cmd | grep -c 'sb_bonf') 83 | #echodebug "$cor a=$a: new_no=$new_no last_no=$last_no";# cmd = $cmd" 84 | if [ $new_no -gt $last_no ]; then 85 | echoerror "strandbias: Got more SNVs when filtering with higher alpha (cmd=$cmd)" 86 | let num_fail=num_fail+1 87 | fi 88 | last_no=$new_no 89 | done 90 | done 91 | if [ $num_fail -eq 0 ]; then 92 | echook "strandbias: all tests passed" 93 | else 94 | echoerror "strandbias: $num_fail tests failed" 95 | fi 96 | 97 | # window filter 98 | # FIXME: not implemented in C version 99 | # 100 | #num_fail=0 101 | #base_cmd="$LOFREQ filter -i $vcf --no-defaults -o -" 102 | #cmd="$base_cmd --window 10" 103 | #num_reg=$(eval $cmd | grep '[^0-9,]85' | grep -c snvwin) || exit 1 104 | #num_exp=4 105 | #if [ $num_reg -ne $num_exp ]; then 106 | # echoerror "window: Got $num_reg but expected $num_exp SNVs (cmd = $cmd)" 107 | # let num_fail=num_fail+1 108 | #fi 109 | ## 110 | #cmd="$base_cmd --window 1" 111 | #num_reg=$(eval $cmd | grep '[^0-9,]85' | grep -c snvwin) || exit 1 112 | #num_exp=2 113 | #if [ $num_reg -ne $num_exp ]; then 114 | # echoerror "window: Got $num_reg but expected $num_exp SNVs (cmd = $cmd)" 115 | # let num_fail=num_fail+1 116 | #fi 117 | #if [ $num_fail -eq 0 ]; then 118 | # echook "window: all tests passed" 119 | #fi 120 | # 121 | 122 | exit 0 123 | -------------------------------------------------------------------------------- /tests/filter_only_snvs_or_indels.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source lib.sh || exit 1 4 | 5 | vcf=data/icgc-tcga-dream-testproject/strelka-1.0.13_snvs-indels-somatic.vcf 6 | 7 | num_total=$(grep -vc '^#' $vcf) 8 | num_snvs=$($LOFREQ filter --no-default --only-snvs -i $vcf | grep -vc '^#') 9 | num_indels=$($LOFREQ filter --no-default --only-indels -i $vcf | grep -vc '^#') 10 | vcf=tests/data/icgc-tcga-dream-testproject/strelka-1.0.13_snvs-indels-somatic.vcf 11 | msg="Number of SNVs ($num_snvs) and indels ($num_indels) extracted by filter" 12 | if [ $(expr $num_snvs + $num_indels) -ne $num_total ]; then 13 | echoerror "$msg don't add up to total number of variants ($num_total)" 14 | exit 1 15 | else 16 | echook "$msg add up to total number of variants ($num_total)" 17 | fi 18 | -------------------------------------------------------------------------------- /tests/icgc-tcga-dream-indel_chr19.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source lib.sh || exit 1 4 | 5 | KEEP_TMP=0 6 | REF=data/icgc-tcga-dream-support/Homo_sapiens_assembly19.fasta 7 | NORMAL=data/icgc-tcga-dream-indel_chr19/chr19.normal_didq_aq.bam 8 | TUMOR=data/icgc-tcga-dream-indel_chr19/chr19.tumor_didq_aq.bam 9 | BED=data/icgc-tcga-dream-indel_chr19/chr19.bed 10 | #BED=data/icgc-tcga-dream-indel_chr19/chr19-debug.bed 11 | DBSNP=data/icgc-tcga-dream-support/00-All.vcf.gz 12 | EVALUATOR=data/icgc-tcga-dream-support/evaluator.py 13 | #EVALUATOR=/mnt/pnsg10_projects/wilma/lofreq/somatic/dream-challenge/tools/ICGC-TCGA-DREAM-Mutation-Calling-challenge-tools/evaluator.py 14 | # for patched version with --classvcf support but no proper arg handling 15 | TRUTH=data/icgc-tcga-dream-indel_chr19/chr19.truth.vcf.gz 16 | 17 | # threads=16; echoinfo "overwriting default threads to $threads" 18 | 19 | outdir=$(mktemp -d -t $(basename $0).XXXXXX) 20 | outpref=$outdir/lofreq_test 21 | log=$outdir/log.txt 22 | 23 | cmd="$LOFREQ somatic -f $REF --threads $threads -n $NORMAL -t $TUMOR -o $outpref -l $BED -d $DBSNP --verbose" 24 | # only needed as long as indels are disabled by default 25 | cmd="$cmd --call-indels" 26 | echodebug "cmd=$cmd" 27 | if ! eval $cmd > $log 2>&1; then 28 | echoerror "LoFreq failed. Check logfile $log. Command was $cmd" 29 | exit 1 30 | fi 31 | 32 | num_err=0 33 | 34 | title="snvs" 35 | f=${outpref}somatic_final.snvs.vcf.gz 36 | res_ll=$($EVALUATOR -v $f -t $TRUTH -m SNV | awk 'END {print $NF}') || exit 1 37 | res=$(echo $res_ll | \ 38 | awk -F, '{prec=$1; rec=$2; if (prec<0.98 || rec<0.96) {status="ERROR"} else {status="OK"} printf "%s: precision=%f recall=%f\n", status, prec, rec}') || exit 1 39 | if echo $res | grep -q ERROR; then 40 | let num_err=num_err+1 41 | fi 42 | echo "$title: " $res 1>&2 43 | 44 | title="snvs after dbsnp removal" 45 | f=${outpref}somatic_final_minus-dbsnp.snvs.vcf.gz 46 | res_ll=$($EVALUATOR -v $f -t $TRUTH -m SNV | awk 'END {print $NF}') || exit 1 47 | res=$(echo $res_ll | \ 48 | awk -F, '{prec=$1; rec=$2; if (prec<0.96 || rec<0.96) {status="ERROR"} else {status="OK"} printf "%s: precision=%f recall=%f\n", status, prec, rec}') || exit 1 49 | if echo $res | grep -q ERROR; then 50 | let num_err=num_err+1 51 | fi 52 | echo "$title: " $res 1>&2 53 | 54 | 55 | # sens/spec limit based on v2.1.2a-54-g52e8097 and with -1% allowance 56 | 57 | title="indels" 58 | f=${outpref}somatic_final.indels.vcf.gz 59 | res_ll=$($EVALUATOR -v $f -t $TRUTH -m INDEL | awk 'END {print $NF}') || exit 1 60 | res=$(echo $res_ll | \ 61 | awk -F, '{prec=$1; rec=$2; if (prec<0.879 || rec<0.484) {status="ERROR"} else {status="OK"} printf "%s: precision=%f recall=%f\n", status, prec, rec}') || exit 1 62 | if echo $res | grep -q ERROR; then 63 | let num_err=num_err+1 64 | fi 65 | echo "$title: "$res 1>&2 66 | 67 | title="indels after dbsnp removal" 68 | f=${outpref}somatic_final_minus-dbsnp.indels.vcf.gz 69 | res_ll=$($EVALUATOR -v $f -t $TRUTH -m INDEL | awk 'END {print $NF}') || exit 1 70 | res=$(echo $res_ll | \ 71 | awk -F, '{prec=$1; rec=$2; if (prec<0.952 || rec<0.482) {status="ERROR"} else {status="OK"} printf "%s: precision=%f recall=%f\n", status, prec, rec}') || exit 1 72 | if echo $res | grep -q ERROR; then 73 | let num_err=num_err+1 74 | fi 75 | echo "$title: "$res 1>&2 76 | 77 | 78 | 79 | if [ $KEEP_TMP -ne 1 ] && [ $num_err -eq 0 ]; then 80 | test -d $outdir && rm -rf $outdir 81 | else 82 | echowarn "Not deleting temporary output directory $outdir" 83 | fi 84 | if [ $num_err -ne 0 ]; then 85 | exit 1 86 | fi 87 | -------------------------------------------------------------------------------- /tests/icgc-tcga-dream-testproject.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | source lib.sh || exit 1 3 | 4 | KEEP_TMP=0 5 | BASE=data/icgc-tcga-dream-testproject/ 6 | #BASE=/projects/wilma/SOMATIC/dream-challenge/testproject/ 7 | #BASE=/mnt/userArchive/wilma/projects/somatic/testproject/ 8 | REF=data/icgc-tcga-dream-support/Homo_sapiens_assembly19.fasta 9 | TUMOR=${BASE}/tumor.chr20.bam 10 | NORMAL=${BASE}/normal.chr20.bam 11 | TRUTH=${BASE}/truth.chr20.vcf.gz 12 | BED=${BASE}/chr20.bed 13 | #EVALUATOR=/projects/wilma/SOMATIC/dream-challenge/tools/bamsurgeon.git/etc/evaluator.py 14 | EVALUATOR=data/icgc-tcga-dream-support/evaluator.py 15 | DEBUG=0 16 | 17 | # threads=16; echoinfo "overwriting default threads to $threads" 18 | 19 | 20 | for f in $REF $TUMOR $NORMAL $TRUTH $EVALUATOR; do 21 | if [ ! -s $f ]; then 22 | echoerror "Essential file $f missing" 23 | exit 1 24 | fi 25 | done 26 | out_pref=$(mktemp -t $(basename $0).XXXXXX) 27 | log=${out_pref}.exec.log 28 | vcf_out=${out_pref}somatic_final.snvs.vcf.gz 29 | if [ $DEBUG -eq 1 ]; then 30 | cp ${BASE}/snvs/lofreq/beta-4-8-g7b8b334-dirty_somatic_final.vcf $vcf_out 31 | else 32 | cmd="$LOFREQ somatic -l $BED -n $NORMAL -t $TUMOR -f $REF -o $out_pref --threads $threads" 33 | if ! eval $cmd > $log 2>&1; then 34 | echoerror "LoFreq failed. Check log $log and files with prefix $out_pref" 35 | exit 1 36 | fi 37 | echoinfo "lofreq somatic run completed. now checking results" 38 | fi 39 | 40 | num_err=0 41 | # use bamsurgeon evaluator 42 | # 43 | # example output 44 | # alterantive to using evaluator is to run lofreq vcfset on a truth file only containing SNVs 45 | # tpcount, fpcount, subrecs, trurecs: 46 | # 1389 15 1404 1445 47 | # precision, recall, F1 score: 0.989316239316,0.96124567474,0.975078975079 48 | title="snvs before dbsnp removal" 49 | res_ll=$($EVALUATOR -t $TRUTH -v $vcf_out -m SNV | awk 'END {print $NF}') || exit 50 | 51 | res=$(echo $res_ll | \ 52 | awk -F, '{prec=$1; rec=$2; if (prec<0.98 || rec<0.96) {status="ERROR"} else {status="OK"} printf "%s: precision=%f recall=%f\n", status, prec, rec}') || exit 1 53 | if echo $res | grep -q ERROR; then 54 | let num_err=num_err+1 55 | fi 56 | echo "$title: "$res 1>&2 57 | 58 | 59 | if [ $KEEP_TMP -ne 1 ] && [ $num_err -eq 0 ]; then 60 | test -d $outdir && rm -rf $outdir 61 | else 62 | echowarn "Not deleting temporary output directory $outdir" 63 | fi 64 | if [ $num_err -ne 0 ]; then 65 | exit 1 66 | fi 67 | 68 | -------------------------------------------------------------------------------- /tests/indel_misc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source lib.sh || exit 1 4 | 5 | KEEP_TMP=0 6 | REF=data/denv2-dpcr-validated/consensus.fa 7 | outdir=$(mktemp -d -t $(basename $0).XXXXXX) 8 | BAM=data/denv2-dpcr-validated/CTTGTA_2_remap_razers-i92_peakrem_corr.bam 9 | 10 | 11 | log=$outdir/log.txt 12 | vcf=$outdir/out.vcf 13 | cmd="$LOFREQ call --no-default-filter --only-indels --call-indels -f $REF -o $vcf $BAM" 14 | if ! eval $cmd > $log 2>&1; then 15 | echoerror "LoFreq failed. Check logfile $log. Command was $cmd" 16 | exit 1 17 | fi 18 | 19 | num_indels=$(grep -vc '^#' $vcf) 20 | if [ $num_indels -ne 0 ]; then 21 | echoerror "Got indels in indel free bam. See $vcf"[B 22 | exit 1 23 | else 24 | echook "Got no indels from indel free bam." 25 | fi 26 | 27 | if [ $KEEP_TMP -ne 1 ]; then 28 | test -d $outdir && rm -rf $outdir 29 | fi 30 | 31 | # FIXME call on ecoli as well or test there -------------------------------------------------------------------------------- /tests/indel_qual.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source lib.sh || exit 1 4 | 5 | set -o pipefail 6 | 7 | # test correct use of indel qualities 8 | 9 | sam=data/idq/delq_3lq.sam 10 | ref=data/idq/ref.fa 11 | res=$(samtools view -bS $sam 2>/dev/null | $LOFREQ call --call-indels -f $ref -b 1 -a 0.05 --no-default-filter -B -A - 2>&1 || exit 1) 12 | #echo "$res" 13 | if ! echo "$res" | grep -q 'ref[[:space:]]1'; then 14 | echoerror "Should have called indel at pos 1 but didn't (res was $res)" 15 | exit 1 16 | fi 17 | if echo "$res" | grep -q 'ref[[:space:]]3'; then 18 | echoerror "Shouldn't have called indel at pos 3 but did (res was $res)" 19 | exit 1 20 | fi 21 | 22 | sam=data/idq/delq_1lq.sam 23 | res=$(samtools view -bS $sam 2>/dev/null | $LOFREQ call --call-indels -f $ref -b 1 -a 0.05 --no-default-filter -B -A - 2>&1 || exit 1) 24 | #echo "$res" 25 | if echo "$res" | grep -q 'ref[[:space:]]1'; then 26 | echoerror "Shouldn't have called indel at pos 1 but did (res was $res)" 27 | exit 1 28 | fi 29 | if ! echo "$res" | grep -q 'ref[[:space:]]3'; then 30 | echoerror "Should have called indel at pos 3 but didn't (res was $res)" 31 | exit 1 32 | fi 33 | 34 | echook "Indels predicted as expected" -------------------------------------------------------------------------------- /tests/indels.sh.FIXME: -------------------------------------------------------------------------------- 1 | Number of indel tests on razers alignments should be zero, because we ran it without indel support 2 | Number of indel tests performed: 0 3 | ./src/lofreq/lofreq call -f ../lofreq2-gis.git/tests/data/denv2-dpcr-validated/consensus.fa ../lofreq2-gis.git/tests/data/denv2-dpcr-validated/CTTGTA_2_remap_razers-i92_peakrem_corr.bam -r consensus:10000-11000 --debug --no-default-filter 4 | -------------------------------------------------------------------------------- /tests/is_quiet.sh.FIXME: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/tests/is_quiet.sh.FIXME -------------------------------------------------------------------------------- /tests/lewis_known.sh.FIXME: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/tests/lewis_known.sh.FIXME -------------------------------------------------------------------------------- /tests/lib.sh: -------------------------------------------------------------------------------- 1 | echoerror() { 2 | echo "ERROR: $@" 1>&2 3 | } 4 | echook() { 5 | echo "OK: $@" 1>&2 6 | } 7 | echowarn() { 8 | echo "WARN: $@" 1>&2 9 | } 10 | echoinfo() { 11 | echo "INFO: $@" 1>&2 12 | } 13 | echodebug() { 14 | echo "DEBUG: $@" 1>&2 15 | } 16 | 17 | # md5sum is md5 on mac 18 | md5=$(which md5sum 2>/dev/null || which md5) 19 | 20 | # zcat looks for .Z file on mac 21 | zcat="gzip -dc" 22 | 23 | seq=$(which seq 2>/dev/null || which gseq) 24 | 25 | ncpus=$(sysctl -2 hw.ncpu 2>/dev/null || grep -c ^processor /proc/cpuinfo 2>/dev/null || echo 1) 26 | # use 1/8 of available cpus at max but 4 min for parallel tasks 27 | threads=$(echo $ncpus | awk '{n=$1/8; if (n<4) {n=4}; print n}') 28 | 29 | # if not user defined use local LoFreq 30 | if [ -z "$LOFREQ" ]; then 31 | LOFREQ=../src/lofreq/lofreq 32 | fi 33 | echoinfo "Using $LOFREQ" 34 | #LOFREQ=../lofreq_star-2.0.0-beta/lofreq/lofreq 35 | 36 | 37 | -------------------------------------------------------------------------------- /tests/melanoma.sh.FIXME: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/tests/melanoma.sh.FIXME -------------------------------------------------------------------------------- /tests/no_snvs_on_cons_indels.sh.FIXME: -------------------------------------------------------------------------------- 1 | NC_000913 3558478 . G C . . SB=0,DP4=0,1,2,5,CONSVAR,DP=554,AF=0.012635 2 | samtools tview EAS20_8.1k-snvs-1k-indels.postprocessed.viterbi-sorted.mdups.realn.recal.bam -p NC_000913:3558478-3558478 ref/Ecoli_K12_MG1655_NC_000913.fa 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /tests/not-matching-ref.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Test whether we can detect if the wrong reference was given 4 | 5 | source lib.sh || exit 1 6 | 7 | 8 | bam=data/denv2-pseudoclonal/denv2-pseudoclonal.bam 9 | reffa=data/denv2-simulation/denv2-refseq.fa 10 | 11 | 12 | cmd="$LOFREQ call -f $reffa $bed $bam" 13 | if eval $cmd 2>/dev/null; then 14 | echoerror "LoFreq should have failed but didn't. Command was $cmd" 15 | exit 1 16 | else 17 | echook "LoFreq detected use of wrong reference" 18 | fi 19 | 20 | -------------------------------------------------------------------------------- /tests/parallel.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Make sure the parallel wrapper produces the same result as the 4 | # default 5 | 6 | source lib.sh || exit 1 7 | 8 | 9 | 10 | BAM=data/icgc-tcga-first10kperchrom-syn1/dream-icgc-tcga-first10kperchrom-synthetic.challenge.set1.normal.v2.bam 11 | # don't bloody gzip your reference even though samtools happily indexes it 12 | REF=data/icgc-tcga-dream-support/Homo_sapiens_assembly19.fasta 13 | 14 | KEEP_TMP=0 15 | DEBUG=0 16 | SIMULATE=0 17 | 18 | outdir=$(mktemp -d -t $(basename $0).XXXXXX) 19 | outraw_parallel=$outdir/raw_parallel.vcf.gz 20 | outraw_single=$outdir/raw_single.vcf.gz 21 | log=$outdir/log.txt 22 | 23 | LOFREQ_PARALLEL="$(dirname $LOFREQ)/../scripts/lofreq2_call_pparallel.py" 24 | cmd="/usr/bin/time -p $LOFREQ_PARALLEL --pp-threads $threads -f $REF -o $outraw_parallel --verbose $BAM" 25 | test $SIMULATE -eq 1 && cmd="echo $cmd" 26 | test $DEBUG -eq 1 && echo "DEBUG: cmd=$cmd" 1>&2 27 | if ! eval $cmd >> $log 2>&1; then 28 | echoerror "The following command failed (see $log for more): $cmd" 29 | exit 1 30 | fi 31 | 32 | 33 | cmd="/usr/bin/time -p $LOFREQ call -f $REF -o $outraw_single --verbose $BAM" 34 | test $SIMULATE -eq 1 && cmd="echo $cmd" 35 | test $DEBUG -eq 1 && echo "DEBUG: cmd=$cmd" 1>&2 36 | if ! eval $cmd >> $log 2>&1; then 37 | echoerror "The following command failed (see $log for more): $cmd" 38 | exit 1 39 | fi 40 | 41 | 42 | if [ $SIMULATE -eq 1 ]; then 43 | nup=0 44 | nus=0 45 | else 46 | nup=$($LOFREQ vcfset -a complement -1 $outraw_parallel -2 $outraw_single --count-only) 47 | nus=$($LOFREQ vcfset -a complement -2 $outraw_parallel -1 $outraw_single --count-only) 48 | fi 49 | #if [ $nup -ne 0 ] || [ $nus -ne 0 ] ; then 50 | # there are occasional differences possible likely due to BAQ effects on region ends 51 | if [ $nup -gt 1 ] || [ $nus -gt 1 ] ; then 52 | echoerror "Observed some difference between parallel and single results. Check $outraw_parallel and $outraw_single" 53 | n_parallel=$(zgrep -vc '^#' $outraw_parallel) 54 | n_single=$(zgrep -vc '^#' $outraw_single) 55 | 56 | n_overlap=$($LOFREQ vcfset -a intersect -1 $outraw_parallel -2 $outraw_single --count-only) 57 | echoerror "$outraw_parallel has $n_parallel and $outraw_single has $n_single SNVS (both overlap by $n_overlap). Make sure these are all right on the --snvqual-thresh value." 58 | exit 1 59 | else 60 | echook "Parallel and single run give identical results." 61 | fi 62 | 63 | 64 | 65 | if [ $KEEP_TMP -eq 1 ]; then 66 | echowarn "Not deleting tmp dir $outdir" 67 | else 68 | rm $outdir/* 69 | rmdir $outdir 70 | fi 71 | 72 | -------------------------------------------------------------------------------- /tests/pseudomonas_jade.sh.FIXME: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSB5/lofreq/54281cd875821257adfd92baa957c13d96fa58c5/tests/pseudomonas_jade.sh.FIXME -------------------------------------------------------------------------------- /tests/pylint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source lib.sh || exit 1 4 | 5 | #files_to_test=$(grep 'scripts/' ../src/lofreq_python/setup.py | tr -d "[,']" | tr -d '[\t ]' | sed -e 's,^,../,') 6 | #files_to_test=$(grep '^[^#].*\.py' ../src/lofreq_python/Makefile.am | grep -v PYTHON | cut -f 2 -d = | tr -d '\\' | tr -d '[\t ]') 7 | files_to_test=$(find ../src/scripts ../src/tools/scripts ../src/tools/lofreq_star -name \*py) 8 | PYLINT=$(which pylint 2>/dev/null || which pylint-2.7) || exit 1 9 | 10 | echoinfo "Using $PYLINT" 11 | log=$(mktemp -t pylint.XXXXX) 12 | for f in $files_to_test; do 13 | echoinfo "Testing $f" 14 | $PYLINT -E --rcfile pylint.rc $f >> $log 15 | done 16 | if [ -s $log ]; then 17 | echoerror "pylint produced errors:" 18 | cat $log 19 | exit 1 20 | else 21 | echook "pylint produced no errors" 22 | fi 23 | rm $log 24 | 25 | 26 | -------------------------------------------------------------------------------- /tests/run_all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source lib.sh >/dev/null || exit 1 4 | 5 | #if hostname | grep -q aquila; then 6 | if set | grep -q SGE_CLUSTER_NAME; then 7 | on_cluster=1 8 | threads=8;# overriding default 9 | ln -sf /mnt/projects/wilma/lofreq/testing/data . 10 | else 11 | on_cluster=0 12 | ln -sf /mnt/pnsg10_projects/wilma/lofreq/testing/data . 13 | fi 14 | 15 | #mail="-m bes -M wilma@gis.a-star.edu.sg" 16 | mail="" 17 | 18 | 19 | 20 | for f in $(ls *sh | grep -v run_all*sh | grep -v lib.sh); do 21 | if [ $on_cluster -eq 1 ]; then 22 | echo "*** Scheduling $f" 23 | name="lf-test-$(basename $f)" 24 | log=${f}.$(date +%Y%m%d-%H%M).log 25 | #cat</dev/null || mktemp -t $(basename $0).XXXXXX);#XXXXXX needed on linux? 15 | if [ $KEEP_TMP -eq 1 ]; then 16 | echowarn "Keeping tmp files with prefix $outprefix" 17 | fi 18 | 19 | finalout=${outprefix}somatic_final.snvs.vcf.gz 20 | cmd="$LOFREQ somatic --threads $threads -n $BAM_N -t $BAM_T -f $REF -l $BED -o $outprefix";#--verbose";# --debug" 21 | #echodebug "cmd = $cmd" 22 | if ! eval $cmd; then 23 | echoerror "The following command failed: $cmd" 24 | exit 1 25 | fi 26 | n_intersect=$($LOFREQ vcfset -1 $TRUESNV -2 $finalout -a intersect | grep -vc '^#') 27 | if [ "$n_intersect" -lt 2 ]; then 28 | echoerror "Expected at least two true predictions but got $n_intersect (compare $finalout and $TRUESNV)" 29 | exit 1 30 | else 31 | echook "Got $n_intersect true predictions" 32 | if [ $KEEP_TMP -ne 1 ]; then 33 | rm ${outprefix}*vcf* 34 | fi 35 | fi 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /tests/uniq.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # FIXME:add-doc 4 | 5 | source lib.sh || exit 1 6 | 7 | # test vs self prediction should give zero results 8 | 9 | bam=data/denv2-simulation/denv2-10haplo.bam 10 | vcf_in=data/denv2-simulation/denv2-10haplo_true-snp.vcf.gz 11 | vcf_out=$(mktemp -t $(basename $0).XXXXXX.vcf) 12 | rm $vcf_out 13 | 14 | $LOFREQ uniq -v $vcf_in $bam -o $vcf_out || exit 1 15 | num_snvs=$(grep -cv '^#' $vcf_out) 16 | if [ "$num_snvs" -ne 0 ]; then 17 | echoerror "Expected zero SNVs when checking variants predicted from same BAM but got $num_snvs" 18 | exit 1 19 | else 20 | echook "Got zero SNVs during self-comparison, as expected" 21 | fi 22 | rm $vcf_out 23 | 24 | 25 | vcf_in=data/vcf/denv2-10haplo-fake-filter-only.vcf.gz 26 | $LOFREQ uniq -v $vcf_in $bam -o $vcf_out || exit 1 27 | num_snvs=$(grep -cv '^#' $vcf_out) 28 | if [ "$num_snvs" -ne 0 ]; then 29 | echoerror "Expected zero SNVs when checking against indels and filtered variants only but got $num_snvs" 30 | exit 1 31 | else 32 | echook "Got zero SNVs when checking indels and filtered variants only" 33 | fi 34 | rm $vcf_out 35 | 36 | 37 | 38 | 39 | # no indels! 40 | vcf_in=data/vcf/CTTGTA_2_remap_razers-i92_peakrem_corr_nodeff.vcf.gz 41 | bam=data/denv2-dpcr-validated/GGCTAC_2_remap_razers-i92_peakrem_corr.bam 42 | 43 | # in == out with detlim 44 | # 45 | num_in=$(zgrep -cv '^#' $vcf_in) 46 | cmd="$LOFREQ uniq -v $vcf_in $bam --use-det-lim -o -" 47 | num_out=$(eval $cmd | grep -vc '^#') || exit 1 48 | if [ "$num_in" -ne "$num_out" ]; then 49 | echoerror "Expected same number of in and output vars when using --use-det-lim but go $num_in and $num_out resp. (cmd was $cmd)" 50 | fi 51 | 52 | # UQ= present even with --output-all 53 | cmd="$LOFREQ uniq -v $vcf_in $bam --output-all -o -" 54 | eval $cmd | grep -q 'UQ=' || echoerror "No UQ markup found" 55 | 56 | # in gt out in default mode 57 | num_in=$(zgrep -cv '^#' $vcf_in) 58 | cmd="$LOFREQ uniq -v $vcf_in $bam -o -" 59 | num_out=$(eval $cmd | grep -vc '^#') || exit 1 60 | if [ "$num_in" -le "$num_out" ]; then 61 | echoerror "Expected fewer number of vars in default output due to filtering but got $num_in and $num_out resp. (cmd was $cmd)" 62 | fi 63 | 64 | 65 | vcf_in=data/somatic_CHH966_chr22/hg19_chr22_true_snv.vcf.gz 66 | bam=data/somatic_CHH966_chr22/CHH966-tumor-100x-10pur-hg19.chr22-bed-only.bam 67 | $LOFREQ uniq -v $vcf_in $bam -o $vcf_out || exit 1 68 | # previously 4, but now 2 true snvs in vcf_in, which both should be unique 69 | num_snvs=$(grep -cv '^#' $vcf_out) 70 | if [ "$num_snvs" -ne 2 ]; then 71 | echoerror "Expected two SNVs from somatic check but got $num_snvs" 72 | exit 1 73 | else 74 | echook "Got expected number of SNVs from somatic check" 75 | fi 76 | 77 | rm $vcf_out 78 | #echo $vcf_out 79 | 80 | 81 | -------------------------------------------------------------------------------- /tests/valgrind_call.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # running valgrind on call incl indel calls on ecoli spikein 4 | 5 | source lib.sh || exit 1 6 | 7 | KEEP_TMP=0 8 | BASEDIR=data/ecoli-clone/ 9 | BAM=$BASEDIR/spike-in/spike-in.bam 10 | REF=$BASEDIR/ref/Ecoli_K12_MG1655_NC_000913.fa 11 | 12 | for f in $BAM $REF; do 13 | if [ ! -e $f ]; then 14 | echoerror "Required file $f missing" 15 | exit 1 16 | fi 17 | done 18 | 19 | 20 | outdir=$(mktemp -d -t $(basename $0).XXXXXX) 21 | log=$outdir/log.txt 22 | valgrindlog=$outdir/valgrind.log 23 | vcf_out=$outdir/out.vcf 24 | 25 | 26 | # how to get a region with true SNVs and indels close-by 27 | #ipython 28 | #import vcf 29 | #vcfr = vcf.Reader(filename="truth.vcf.gz") 30 | #vars = [v for v in vcfr] 31 | #indel_highq = [v for v in vars_highq if v.is_indel] 32 | #snv_highq = [v for v in vars_highq if v.is_snp] 33 | #def argmin(iterable): 34 | # return min(enumerate(iterable), key=lambda x: x[1])[0] 35 | #def closest(v, cmp_list): 36 | # dists = [abs(v.POS-c.POS) for c in cmp_list] 37 | # return argmin(dists) 38 | #for i in indel_highq: 39 | # c = closest(i, snv_highq) 40 | # print i, snv_highq[c] 41 | # and check that both are present in truth and lofreq prediction 42 | 43 | valgrind --suppressions=faidx_fetch_seq.supp --leak-check=full --tool=memcheck --log-file=$valgrindlog \ 44 | $LOFREQ call --call-indels -f $REF $BAM -r 'NC_000913:2000-2600' -o $vcf_out >$log 2>&1 || exit 1 45 | 46 | for pos in 2000 2032 2214 2514 2572; do 47 | if ! grep -q -w $pos $vcf_out; then 48 | echoerror "Excepted variant position $pos not found in vcf $vcf_out" 49 | exit 1 50 | fi 51 | done 52 | echook "All expected variant positions found" 53 | 54 | 55 | num_err=$(grep 'ERROR SUMMARY' $valgrindlog | grep -cv ': 0 errors') 56 | if [ "$num_err" -ne 0 ]; then 57 | echoerror "Found errors in Valgrind output $valgrindlog" 58 | exit 1 59 | else 60 | echook "No errors found in Valgrind output" 61 | fi 62 | 63 | lost_bytes=$(grep 'lost' $valgrindlog | grep -cv ': 0 bytes in 0 blocks') 64 | if [ "$lost_bytes" -ne 0 ]; then 65 | echoerror "Found lost bytes in Valgrind output $valgrindlog" || exit 1 66 | exit 1 67 | else 68 | echook "No lost bytes found in Valgrind output" 69 | fi 70 | 71 | if [ $KEEP_TMP -ne 1 ] && [ $num_err -eq 0 ]; then 72 | test -d $outdir && rm -rf $outdir 73 | else 74 | echowarn "Not deleting temporary output directory $outdir" 75 | fi 76 | -------------------------------------------------------------------------------- /tests/valgrind_uniq.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # FIXME:add-doc 4 | 5 | source lib.sh || exit 1 6 | 7 | valgrind_log=$(mktemp -t $(basename $0).XXXXXX.valgrind) 8 | vcf_out=$(mktemp -t $(basename $0).XXXXXX.vcf) 9 | rm $vcf_out $valgrind_log 10 | 11 | # FIXME better to use somatic SNVs 12 | bam=data/denv2-simulation/denv2-10haplo.bam 13 | vcf=data/denv2-simulation/denv2-10haplo_true-snp.vcf.gz 14 | 15 | # use only head. otherwise too slow 16 | $zcat $vcf | head | valgrind --log-file=$valgrind_log --tool=memcheck \ 17 | $LOFREQ uniq -v - $bam -o $vcf_out || exit 1 18 | 19 | 20 | num_err=$(grep 'ERROR SUMMARY' $valgrind_log | grep -cv ': 0 errors') 21 | if [ "$num_err" -ne 0 ]; then 22 | echoerror "Found errors in Valgrind output $valgrind_log" 23 | exit 1 24 | else 25 | echook "No errors found in Valgrind output" 26 | fi 27 | 28 | lost_bytes=$(grep 'lost' $valgrind_log | grep -cv ': 0 bytes in 0 blocks') 29 | if [ "$lost_bytes" -ne 0 ]; then 30 | echoerror "Found lost bytes in Valgrind output $valgrind_log" || exit 1 31 | exit 1 32 | else 33 | echook "No lost bytes found in Valgrind output" 34 | fi 35 | 36 | 37 | 38 | rm $vcf_out $valgrind_log 39 | -------------------------------------------------------------------------------- /tests/valgrind_vcfset.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # FIXME:add-doc 4 | 5 | source lib.sh || exit 1 6 | 7 | valgrind_log=$(mktemp -t $(basename $0).XXXXXX.valgrind) 8 | vcf_in=data/vcf/CTTGTA_2_remap_razers-i92_peakrem_corr_nodeff.vcf.gz 9 | 10 | # htslib's (1.1) bgzf_getline() always seems to leak even though we free the used memory. suppress errors here 11 | valgrind --suppressions=bgzf_getline.supp --log-file=$valgrind_log --tool=memcheck --leak-check=full $LOFREQ vcfset -a complement -1 $vcf_in -2 $vcf_in >/dev/null || exit 1 12 | 13 | test -s $valgrind_log || exit 1 14 | 15 | num_err=$(grep 'ERROR SUMMARY' $valgrind_log | grep -cv ': 0 errors') 16 | if [ "$num_err" -ne 0 ]; then 17 | echoerror "Found errors in Valgrind output $valgrind_log" 18 | exit 1 19 | else 20 | echook "No errors found in Valgrind output" 21 | fi 22 | 23 | lost_bytes=$(grep 'lost' $valgrind_log | grep -cv ': 0 bytes in 0 blocks') 24 | if [ "$lost_bytes" -ne 0 ]; then 25 | echoerror "Found lost bytes in Valgrind output $valgrind_log" || exit 1 26 | exit 1 27 | else 28 | echook "No lost bytes found in Valgrind output" 29 | fi 30 | 31 | -------------------------------------------------------------------------------- /tests/valid_vcf_output.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source lib.sh || exit 1 4 | 5 | bam=./data/denv2-pseudoclonal/denv2-pseudoclonal.bam 6 | reffa=./data/denv2-pseudoclonal/denv2-pseudoclonal_cons.fa 7 | bed=./data/denv2-pseudoclonal/denv2-pseudoclonal_incl.bed 8 | vcf=$(mktemp -t $(basename $0).XXXXXX.vcf) 9 | rm -f $vcf 10 | 11 | # index bam if necessary 12 | test -s ${bam}.bai || samtools index $bam 13 | 14 | $LOFREQ call -f $reffa -l $bed -o $vcf $bam || exit 1 15 | # this tests 'filter' as well as it's part of call 16 | #export PERL5LIB=/Users/wilma/local/lib/ 17 | #if perl -mVcf -e validate ../tests/denv2-pseudoclonal.vcf; then 18 | if bcftools view $vcf >/dev/null; then 19 | echook "Got valid VCF output" 20 | else 21 | echoerror "Invalid VCF output" 22 | fi 23 | -------------------------------------------------------------------------------- /tests/vcf_setop.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # FIXME:add-doc 4 | 5 | source lib.sh || exit 1 6 | 7 | 8 | vcf_t=data/vcf/CHH966-tumor-100x-100pur-hg19.bwa_6431925.vcf.gz 9 | vcf_n=data/vcf/CHH966-normal-100x-100pur-hg19.bwa.renamed_6431925.vcf.gz 10 | #vcf_t=data/vcf/CHH966-tumor-100x-100pur-hg19.bwa_6431925.vcf 11 | #vcf_n=data/vcf/CHH966-normal-100x-100pur-hg19.bwa.renamed_6431925.vcf 12 | vcf_out=$(mktemp -t $(basename $0).XXXXXX.vcf) 13 | 14 | cmd="$LOFREQ vcfset -1 $vcf_t -2 $vcf_n -a complement -o -" 15 | 16 | #echodebug "cmd=$cmd" 17 | eval $cmd | cut -f 1-7 > $vcf_out 18 | 19 | num_diffs=$(gzip -dc data/vcf/CHH966-tumor-only.f-7.vcf.gz | \ 20 | diff -u $vcf_out - | grep -v '##' | grep '^[\+\-]' | wc -l) 21 | exp_diffs=10 22 | #--- test.vcf 2013-04-03 22:12:53.000000000 +0800 23 | #+++ - 2013-04-03 22:22:06.000000000 +0800 24 | #-chr12 30805918 . C G 23 . 25 | #-chr13 107516488 . T G 22 . 26 | #-chr16 69170707 . G C 23 . 27 | #-chr17 8738690 . T G 23 . 28 | #-chr2 42513376 . G C 23 . 29 | #-chr4 186560162 . C G 22 . 30 | #-chr6 42571331 . T A 24 . 31 | #-chr6 106553829 . G A 26 . 32 | # 33 | # All diffs expected. vcf-isec only looks at chrom and pos, not the 34 | 35 | if [ $num_diffs -ne $exp_diffs ]; then 36 | echoerror "Expected $exp_diffs but got $num_diffs (keeping $vcf_out for your reference)." 37 | else 38 | echook "Larger complement test produced expected results." 39 | rm $vcf_out 40 | fi 41 | 42 | 43 | 44 | 45 | 46 | vcf_1=data/vcf/vcf_set.vcf.gz 47 | vcf_1_allfiltered=data/vcf/vcf_set_allfiltered.vcf.gz 48 | 49 | # complement against self should give zero 50 | cmd="$LOFREQ vcfset -1 $vcf_1 -2 $vcf_1 -a complement -o -" 51 | num_compl=$(eval $cmd | grep -vc '^#') 52 | if [ $num_compl -ne 0 ]; then 53 | echoerror "Complement against self should give 0" 54 | else 55 | echook "Complement against self returned 0" 56 | fi 57 | 58 | 59 | # intersect against self should give all 60 | cmd="$LOFREQ vcfset -1 $vcf_1 -2 $vcf_1 -a intersect -o -" 61 | md5_test=$(eval $cmd | grep -v '^#' | $md5) 62 | md5_org=$(zgrep -v '^#' $vcf_1 | $md5) 63 | if [ "$md5_test" != "$md5_org" ]; then 64 | echoerror "Intersect against self should give results identical to input (cmd: $cmd)" 65 | #echodebug "md5_test = $md5_test" 66 | #echodebug "md5_org = $md5_org" 67 | else 68 | echook "Intersect against self gave results identical to input" 69 | fi 70 | 71 | 72 | # intersect with all filtered should give 0 73 | cmd="$LOFREQ vcfset -1 $vcf_1 -2 $vcf_1_allfiltered -a intersect --only-passed -o -" 74 | num_inter=$(eval $cmd | grep -vc '^#') 75 | if [ $num_inter -ne 0 ]; then 76 | echoerror "Intersect (only-passed) with all filtered should give 0 (but gave $num_inter; cmd = $cmd)" 77 | else 78 | echook "intersect (only-passed) with all filtered returned 0" 79 | fi 80 | 81 | # complement with all filtered should give all 82 | cmd="$LOFREQ vcfset -1 $vcf_1 -2 $vcf_1_allfiltered -a complement -o - --only-passed" 83 | md5_test=$(eval $cmd | grep -v '^#' | $md5) 84 | md5_org=$(zgrep -v '^#' $vcf_1 | grep 'PASS' | $md5) 85 | #echodebug "$cmd test=$md5_test org=$md5_org" 86 | if [ "$md5_test" != "$md5_org" ]; then 87 | echoerror "only-passed complement with all filtered should give results identical to input (cmd = $cmd)" 88 | else 89 | echook "only-passed complement with all filtered gave results identical to input" 90 | fi 91 | 92 | 93 | # 94 | vcf_org=data/vcf/vcf_set.vcf.gz 95 | vcf_baseswap=data/vcf/vcf_set_altrefswap.vcf.gz 96 | cmd="$LOFREQ vcfset -1 $vcf_org -2 $vcf_baseswap -a intersect -o -" 97 | num_out=$(eval $cmd | grep -cv '^#') 98 | if [ $num_out -ne 0 ]; then 99 | echoerror "intersection with base swapped file did not return any variants" 100 | else 101 | echook "intersection with base swapped file return variants" 102 | fi 103 | 104 | cmd="$cmd --only-pos" 105 | num_out=$(eval $cmd | grep -cv '^#') 106 | if [ $num_out -eq 0 ]; then 107 | echoerror "intersection with base swapped file when using bases did not return zero variants" 108 | else 109 | echook "intersection with base swapped file return zero variants" 110 | fi 111 | 112 | -------------------------------------------------------------------------------- /tests/viterbi.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source lib.sh || exit 1 4 | 5 | BASEDIR=data/viterbi/ 6 | REF=$BASEDIR/NC_011770.fa 7 | BAM=$BASEDIR/pseudomonas_pair_screwed_up_cigar.bam 8 | 9 | 10 | # input contains two reads with near random cigar strings 11 | # that are in fact perfect matches 12 | 13 | ncorr=$($LOFREQ viterbi -f $REF $BAM | samtools view - 2>/dev/null | grep -cw 75M) || exit 1 14 | if [ $ncorr != "2" ]; then 15 | echoerror "Expected two fixed input reads but got $ncorr" 16 | exit 1 17 | else 18 | echook "All reads correctly realigned" 19 | fi 20 | --------------------------------------------------------------------------------