├── .gitignore ├── .ycm_extra_conf.py ├── LICENSE ├── Makefile ├── README.md ├── bin ├── .gitkeep └── flye ├── docs ├── FAQ.md ├── INSTALL.md ├── NEWS.md ├── USAGE.md └── graph_example.png ├── flye ├── .main.py.swo ├── __build__.py ├── __init__.py ├── __version__.py ├── assembly │ ├── __init__.py │ ├── assemble.py │ ├── repeat_graph.py │ └── scaffolder.py ├── config │ ├── __init__.py │ ├── bin_cfg │ │ ├── asm_corrected_reads.cfg │ │ ├── asm_defaults.cfg │ │ ├── asm_hifi.cfg │ │ ├── asm_nano_hq.cfg │ │ ├── asm_raw_reads.cfg │ │ ├── asm_subasm.cfg │ │ ├── nano_r94_g36_homopolymers.mat │ │ ├── nano_r94_g36_substitutions.mat │ │ ├── nano_r94_homopolymers.mat │ │ ├── nano_r94_substitutions.mat │ │ ├── pacbio_chm13_homopolymers.mat │ │ ├── pacbio_chm13_substitutions.mat │ │ ├── pacbio_homopolymers.mat │ │ └── pacbio_substitutions.mat │ ├── configurator.py │ └── py_cfg.py ├── main.py ├── polishing │ ├── __init__.py │ ├── alignment.py │ ├── bubbles.py │ ├── consensus.py │ └── polish.py ├── repeat_graph │ ├── __init__.py │ ├── graph_alignment.py │ └── repeat_graph.py ├── short_plasmids │ ├── __init__.py │ ├── circular_sequences.py │ ├── plasmids.py │ ├── unmapped_reads.py │ └── utils.py ├── six.py ├── tests │ ├── __init__.py │ ├── data │ │ ├── ecoli_500kb.fasta │ │ ├── ecoli_500kb_reads.fastq.gz │ │ └── ecoli_500kb_reads_hifi.fastq.gz │ └── test_toy.py ├── trestle │ ├── __init__.py │ ├── divergence.py │ ├── graph_resolver.py │ ├── trestle.py │ └── trestle_config.py └── utils │ ├── __init__.py │ ├── bytes2human.py │ ├── fasta_parser.py │ ├── sam_parser.py │ └── utils.py ├── lib ├── interval_tree │ ├── IntervalTree.h │ ├── LICENSE │ └── README.md ├── lemon │ ├── AUTHORS │ ├── INSTALL │ ├── LICENSE │ ├── NEWS │ ├── README │ └── lemon │ │ ├── adaptors.h │ │ ├── arg_parser.h │ │ ├── assert.h │ │ ├── bellman_ford.h │ │ ├── bfs.h │ │ ├── bin_heap.h │ │ ├── binomial_heap.h │ │ ├── bits │ │ ├── alteration_notifier.h │ │ ├── array_map.h │ │ ├── bezier.h │ │ ├── default_map.h │ │ ├── edge_set_extender.h │ │ ├── enable_if.h │ │ ├── graph_adaptor_extender.h │ │ ├── graph_extender.h │ │ ├── lock.h │ │ ├── map_extender.h │ │ ├── path_dump.h │ │ ├── solver_bits.h │ │ ├── traits.h │ │ ├── variant.h │ │ ├── vector_map.h │ │ └── windows.h │ │ ├── bucket_heap.h │ │ ├── capacity_scaling.h │ │ ├── cbc.h │ │ ├── christofides_tsp.h │ │ ├── circulation.h │ │ ├── clp.h │ │ ├── color.h │ │ ├── concept_check.h │ │ ├── concepts │ │ ├── bpgraph.h │ │ ├── digraph.h │ │ ├── graph.h │ │ ├── graph_components.h │ │ ├── heap.h │ │ ├── maps.h │ │ └── path.h │ │ ├── config.h │ │ ├── connectivity.h │ │ ├── core.h │ │ ├── cost_scaling.h │ │ ├── counter.h │ │ ├── cplex.h │ │ ├── cycle_canceling.h │ │ ├── dfs.h │ │ ├── dheap.h │ │ ├── dijkstra.h │ │ ├── dim2.h │ │ ├── dimacs.h │ │ ├── edge_set.h │ │ ├── edmonds_karp.h │ │ ├── elevator.h │ │ ├── error.h │ │ ├── euler.h │ │ ├── fib_heap.h │ │ ├── fractional_matching.h │ │ ├── full_graph.h │ │ ├── glpk.h │ │ ├── gomory_hu.h │ │ ├── graph_to_eps.h │ │ ├── greedy_tsp.h │ │ ├── grid_graph.h │ │ ├── grosso_locatelli_pullan_mc.h │ │ ├── hao_orlin.h │ │ ├── hartmann_orlin_mmc.h │ │ ├── howard_mmc.h │ │ ├── hypercube_graph.h │ │ ├── insertion_tsp.h │ │ ├── karp_mmc.h │ │ ├── kruskal.h │ │ ├── lgf_reader.h │ │ ├── lgf_writer.h │ │ ├── list_graph.h │ │ ├── lp.h │ │ ├── lp_base.h │ │ ├── lp_skeleton.h │ │ ├── maps.h │ │ ├── matching.h │ │ ├── math.h │ │ ├── max_cardinality_search.h │ │ ├── min_cost_arborescence.h │ │ ├── nagamochi_ibaraki.h │ │ ├── nauty_reader.h │ │ ├── nearest_neighbor_tsp.h │ │ ├── network_simplex.h │ │ ├── opt2_tsp.h │ │ ├── pairing_heap.h │ │ ├── path.h │ │ ├── planarity.h │ │ ├── preflow.h │ │ ├── quad_heap.h │ │ ├── radix_heap.h │ │ ├── radix_sort.h │ │ ├── random.h │ │ ├── smart_graph.h │ │ ├── soplex.h │ │ ├── static_graph.h │ │ ├── suurballe.h │ │ ├── time_measure.h │ │ ├── tolerance.h │ │ └── unionfind.h ├── libcuckoo │ ├── LICENSE │ ├── README.md │ ├── cuckoohash_config.hh │ ├── cuckoohash_map.hh │ ├── cuckoohash_util.hh │ └── libcuckoo_bucket_container.hh ├── minimap2 │ ├── FAQ.md │ ├── LICENSE.txt │ ├── MANIFEST.in │ ├── Makefile │ ├── Makefile.simde │ ├── NEWS.md │ ├── README.md │ ├── align.c │ ├── bseq.c │ ├── bseq.h │ ├── chain.c │ ├── cookbook.md │ ├── esterr.c │ ├── example.c │ ├── format.c │ ├── hit.c │ ├── index.c │ ├── kalloc.c │ ├── kalloc.h │ ├── kdq.h │ ├── ketopt.h │ ├── khash.h │ ├── krmq.h │ ├── kseq.h │ ├── ksort.h │ ├── ksw2.h │ ├── ksw2_dispatch.c │ ├── ksw2_extd2_sse.c │ ├── ksw2_exts2_sse.c │ ├── ksw2_extz2_sse.c │ ├── ksw2_ll_sse.c │ ├── kthread.c │ ├── kthread.h │ ├── kvec.h │ ├── lchain.c │ ├── main.c │ ├── map.c │ ├── minimap.h │ ├── minimap2.1 │ ├── misc.c │ ├── misc │ │ ├── README.md │ │ ├── mmphase.js │ │ └── paftools.js │ ├── mmpriv.h │ ├── options.c │ ├── pe.c │ ├── python │ │ ├── README.rst │ │ ├── cmappy.h │ │ ├── cmappy.pxd │ │ ├── mappy.pyx │ │ └── minimap2.py │ ├── sdust.c │ ├── sdust.h │ ├── seed.c │ ├── setup.py │ ├── sketch.c │ ├── splitidx.c │ ├── sse2neon │ │ └── emmintrin.h │ └── test │ │ ├── MT-human.fa │ │ ├── MT-orang.fa │ │ ├── q-inv.fa │ │ ├── q2.fa │ │ ├── t-inv.fa │ │ └── t2.fa └── samtools-1.9 │ ├── AUTHORS │ ├── ChangeLog.old │ ├── INSTALL │ ├── LICENSE │ ├── Makefile │ ├── Makefile.mingw │ ├── NEWS │ ├── README │ ├── bam.c │ ├── bam.h │ ├── bam2bcf.c │ ├── bam2bcf.h │ ├── bam2bcf_indel.c │ ├── bam2depth.c │ ├── bam_addrprg.c │ ├── bam_aux.c │ ├── bam_cat.c │ ├── bam_color.c │ ├── bam_endian.h │ ├── bam_flags.c │ ├── bam_import.c │ ├── bam_index.c │ ├── bam_lpileup.c │ ├── bam_lpileup.h │ ├── bam_markdup.c │ ├── bam_mate.c │ ├── bam_md.c │ ├── bam_plbuf.c │ ├── bam_plbuf.h │ ├── bam_plcmd.c │ ├── bam_quickcheck.c │ ├── bam_reheader.c │ ├── bam_rmdup.c │ ├── bam_rmdupse.c │ ├── bam_sort.c │ ├── bam_split.c │ ├── bam_stat.c │ ├── bam_tview.c │ ├── bam_tview.h │ ├── bam_tview_curses.c │ ├── bam_tview_html.c │ ├── bamshuf.c │ ├── bamtk.c │ ├── bedcov.c │ ├── bedidx.c │ ├── bedidx.h │ ├── config.h.in │ ├── config.mk.in │ ├── configure │ ├── configure.ac │ ├── cut_target.c │ ├── dict.c │ ├── faidx.c │ ├── htslib-1.9 │ ├── INSTALL │ ├── LICENSE │ ├── Makefile │ ├── NEWS │ ├── README │ ├── bcf_sr_sort.c │ ├── bcf_sr_sort.h │ ├── bgzf.c │ ├── bgzip.1 │ ├── bgzip.c │ ├── config.h.in │ ├── config.mk.in │ ├── configure │ ├── configure.ac │ ├── cram │ │ ├── cram.h │ │ ├── cram_codecs.c │ │ ├── cram_codecs.h │ │ ├── cram_decode.c │ │ ├── cram_decode.h │ │ ├── cram_encode.c │ │ ├── cram_encode.h │ │ ├── cram_external.c │ │ ├── cram_index.c │ │ ├── cram_index.h │ │ ├── cram_io.c │ │ ├── cram_io.h │ │ ├── cram_samtools.c │ │ ├── cram_samtools.h │ │ ├── cram_stats.c │ │ ├── cram_stats.h │ │ ├── cram_structs.h │ │ ├── files.c │ │ ├── mFILE.c │ │ ├── mFILE.h │ │ ├── misc.h │ │ ├── open_trace_file.c │ │ ├── open_trace_file.h │ │ ├── os.h │ │ ├── pooled_alloc.c │ │ ├── pooled_alloc.h │ │ ├── rANS_byte.h │ │ ├── rANS_static.c │ │ ├── rANS_static.h │ │ ├── sam_header.c │ │ ├── sam_header.h │ │ ├── string_alloc.c │ │ └── string_alloc.h │ ├── errmod.c │ ├── faidx.5 │ ├── faidx.c │ ├── hfile.c │ ├── hfile_gcs.c │ ├── hfile_internal.h │ ├── hfile_libcurl.c │ ├── hfile_net.c │ ├── hfile_s3.c │ ├── hts.c │ ├── hts_internal.h │ ├── hts_os.c │ ├── htsfile.1 │ ├── htsfile.c │ ├── htslib.mk │ ├── htslib.pc.in │ ├── htslib │ │ ├── bgzf.h │ │ ├── cram.h │ │ ├── faidx.h │ │ ├── hfile.h │ │ ├── hts.h │ │ ├── hts_defs.h │ │ ├── hts_endian.h │ │ ├── hts_log.h │ │ ├── hts_os.h │ │ ├── kbitset.h │ │ ├── kfunc.h │ │ ├── khash.h │ │ ├── khash_str2int.h │ │ ├── klist.h │ │ ├── knetfile.h │ │ ├── kseq.h │ │ ├── ksort.h │ │ ├── kstring.h │ │ ├── regidx.h │ │ ├── sam.h │ │ ├── synced_bcf_reader.h │ │ ├── tbx.h │ │ ├── thread_pool.h │ │ ├── vcf.h │ │ ├── vcf_sweep.h │ │ └── vcfutils.h │ ├── htslib_vars.mk │ ├── kfunc.c │ ├── knetfile.c │ ├── kstring.c │ ├── m4 │ │ └── hts_prog_cc_warnings.m4 │ ├── md5.c │ ├── multipart.c │ ├── os │ │ ├── lzma_stub.h │ │ └── rand.c │ ├── plugin.c │ ├── probaln.c │ ├── realn.c │ ├── regidx.c │ ├── sam.5 │ ├── sam.c │ ├── synced_bcf_reader.c │ ├── tabix.1 │ ├── tabix.c │ ├── tbx.c │ ├── textutils.c │ ├── textutils_internal.h │ ├── thread_pool.c │ ├── thread_pool_internal.h │ ├── vcf.5 │ ├── vcf.c │ ├── vcf_sweep.c │ ├── vcfutils.c │ └── version.sh │ ├── install-sh │ ├── lz4 │ ├── LICENSE │ ├── lz4.c │ └── lz4.h │ ├── m4 │ ├── ax_with_curses.m4 │ └── ax_with_htslib.m4 │ ├── misc │ ├── HmmGlocal.java │ ├── ace2sam.c │ ├── blast2sam.pl │ ├── bowtie2sam.pl │ ├── export2sam.pl │ ├── interpolate_sam.pl │ ├── maq2sam.c │ ├── md5fa.c │ ├── md5sum-lite.c │ ├── novo2sam.pl │ ├── plot-bamstats │ ├── psl2sam.pl │ ├── r2plot.lua │ ├── sam2vcf.pl │ ├── samtools.pl │ ├── samtools_tab_completion │ ├── seq_cache_populate.pl │ ├── soap2sam.pl │ ├── varfilter.py │ ├── vcfutils.lua │ ├── wgsim.1 │ ├── wgsim.c │ ├── wgsim_eval.pl │ └── zoom2sam.pl │ ├── padding.c │ ├── phase.c │ ├── sam.c │ ├── sam.h │ ├── sam_header.c │ ├── sam_header.h │ ├── sam_opts.c │ ├── sam_opts.h │ ├── sam_utils.c │ ├── sam_view.c │ ├── sample.c │ ├── sample.h │ ├── samtools.1 │ ├── samtools.h │ ├── stats.c │ ├── stats_isize.c │ ├── stats_isize.h │ ├── tmp_file.c │ ├── tmp_file.h │ ├── version.sh │ └── win32 │ ├── xcurses.h │ ├── zconf.h │ └── zlib.h ├── requirements.txt ├── setup.py └── src ├── Makefile ├── assemble ├── chimera.cpp ├── chimera.h ├── extender.cpp ├── extender.h ├── main_assemble.cpp ├── parameters_estimator.cpp └── parameters_estimator.h ├── common ├── bfcontainer.h ├── config.h ├── disjoint_set.h ├── logger.h ├── matrix.h ├── memory_info.h ├── parallel.h ├── progress_bar.h └── utils.h ├── contigger ├── contig_extender.cpp ├── contig_extender.h └── main_contigger.cpp ├── main.cpp ├── polishing ├── alignment.cpp ├── alignment.h ├── bubble.h ├── bubble_processor.cpp ├── bubble_processor.h ├── dinucleotide_fixer.cpp ├── dinucleotide_fixer.h ├── general_polisher.cpp ├── general_polisher.h ├── homo_polisher.cpp ├── homo_polisher.h ├── main_polisher.cpp ├── subs_matrix.cpp ├── subs_matrix.h └── utility.h ├── repeat_graph ├── graph_processing.cpp ├── graph_processing.h ├── haplotype_resolver.cpp ├── haplotype_resolver.h ├── main_repeat.cpp ├── multiplicity_inferer.cpp ├── multiplicity_inferer.h ├── output_generator.cpp ├── output_generator.h ├── read_aligner.cpp ├── read_aligner.h ├── repeat_graph.cpp ├── repeat_graph.h ├── repeat_resolver.cpp └── repeat_resolver.h └── sequence ├── alignment.cpp ├── alignment.h ├── consensus_generator.cpp ├── consensus_generator.h ├── edlib.cpp ├── edlib.h ├── kmer.h ├── overlap.cpp ├── overlap.h ├── sequence.cpp ├── sequence.h ├── sequence_container.cpp ├── sequence_container.h ├── vertex_index.cpp └── vertex_index.h /.gitignore: -------------------------------------------------------------------------------- 1 | *.a 2 | *.pyc 3 | *.o 4 | *.swp 5 | *.so 6 | *.DS_Store 7 | *.egg-info 8 | build 9 | dist 10 | 11 | bin/flye-modules 12 | bin/flye-minimap2 13 | bin/flye-samtools 14 | 15 | lib/minimap2/minimap2 16 | 17 | lib/samtools-1.9/samtools 18 | lib/samtools-1.9/htslib-1.10/hts-object-files 19 | lib/samtools-1.9/htslib-1.10/htslib_static.mk 20 | lib/samtools-1.9/htslib-1.10/version.h 21 | lib/samtools-1.9/version.h 22 | 23 | lib/samtools-1.9/config.h 24 | lib/samtools-1.9/config.log 25 | lib/samtools-1.9/config.mk 26 | lib/samtools-1.9/config.status 27 | lib/samtools-1.9/htslib-1.9/config.h 28 | lib/samtools-1.9/htslib-1.9/version.h 29 | lib/samtools-1.9/htslib-1.9/config.log 30 | lib/samtools-1.9/htslib-1.9/config.mk 31 | lib/samtools-1.9/htslib-1.9/config.status 32 | lib/samtools-1.9/htslib-1.9/htslib.pc.tmp 33 | lib/samtools-1.9/htslib-1.9/htslib_static.mk 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, The Regents of the University of California 2 | License: BSD-3-Clause 3 | 4 | Project code contributors: 5 | - Mikhail Kolmogorov 6 | - Jeffrey Yuan 7 | - Yu Lin 8 | - Evgeny Polevikov 9 | 10 | All rights reserved. 11 | 12 | Redistribution and use in source and binary forms, with or without 13 | modification, are permitted provided that the following conditions are met: 14 | 15 | * Redistributions of source code must retain the above copyright 16 | notice, this list of conditions and the following disclaimer. 17 | 18 | * Redistributions in binary form must reproduce the above copyright 19 | notice, this list of conditions and the following disclaimer in the 20 | documentation and/or other materials provided with the distribution. 21 | 22 | * Neither the name of the The Regents of the University of California nor the 23 | names of its contributors may be used to endorse or promote products 24 | derived from this software without specific prior written permission. 25 | 26 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 27 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 28 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 29 | DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 30 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 31 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 32 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 33 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 35 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 | 37 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ROOT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) 2 | THREADS := 4 3 | 4 | export LIBCUCKOO = -I${ROOT_DIR}/lib/libcuckoo 5 | export INTERVAL_TREE = -I${ROOT_DIR}/lib/interval_tree 6 | export LEMON = -I${ROOT_DIR}/lib/lemon 7 | export BIN_DIR = ${ROOT_DIR}/bin 8 | export MINIMAP2_DIR = ${ROOT_DIR}/lib/minimap2 9 | export SAMTOOLS_DIR = ${ROOT_DIR}/lib/samtools-1.9 10 | 11 | export CXXFLAGS += ${LIBCUCKOO} ${INTERVAL_TREE} ${LEMON} -I${MINIMAP2_DIR} 12 | export LDFLAGS += -lz -L${MINIMAP2_DIR} -lminimap2 13 | 14 | ifeq ($(shell uname -m),arm64) 15 | export arm_neon=1 16 | export aarch64=1 17 | endif 18 | 19 | .PHONY: clean all profile debug minimap2 samtools 20 | 21 | .DEFAULT_GOAL := all 22 | 23 | 24 | ${BIN_DIR}/flye-minimap2: 25 | make -C ${MINIMAP2_DIR} -j ${THREADS} 26 | cp ${MINIMAP2_DIR}/minimap2 ${BIN_DIR}/flye-minimap2 27 | 28 | minimap2: ${BIN_DIR}/flye-minimap2 29 | 30 | samtools: ${BIN_DIR}/flye-samtools 31 | 32 | ${BIN_DIR}/flye-samtools: 33 | cd ${SAMTOOLS_DIR} && ./configure --without-curses --disable-bz2 --disable-lzma --enable-plugins 34 | make samtools -C ${SAMTOOLS_DIR} -j ${THREADS} 35 | cp ${SAMTOOLS_DIR}/samtools ${BIN_DIR}/flye-samtools 36 | 37 | all: minimap2 samtools 38 | make release -C src -j ${THREADS} 39 | profile: minimap2 samtools 40 | make profile -C src -j ${THREADS} 41 | debug: minimap2 samtools 42 | make debug -C src -j ${THREADS} 43 | clean: 44 | make clean -C src 45 | make clean -C ${MINIMAP2_DIR} 46 | make clean-all -C ${SAMTOOLS_DIR} 47 | rm -f ${BIN_DIR}/flye-minimap2 48 | rm -f ${BIN_DIR}/flye-samtools 49 | -------------------------------------------------------------------------------- /bin/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mikolmogorov/Flye/886b8c17412cdf3a2868a28237bca6c5ad1da156/bin/.gitkeep -------------------------------------------------------------------------------- /bin/flye: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | #(c) 2016 by Authors 4 | #This file is a part of ABruijn program. 5 | #Released under the BSD license (see LICENSE file) 6 | 7 | """ 8 | This script sets up environment paths 9 | and invokes Flye without installation. 10 | """ 11 | 12 | import os 13 | import sys 14 | 15 | BIN_DIR = "bin" 16 | 17 | def main(): 18 | #Setting executable paths 19 | flye_root = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 20 | bin_absolute = os.path.join(flye_root, BIN_DIR) 21 | sys.path.insert(0, flye_root) 22 | os.environ["PATH"] = bin_absolute + os.pathsep + os.environ["PATH"] 23 | 24 | #Flye entry point 25 | from flye.main import main 26 | sys.exit(main()) 27 | 28 | 29 | if __name__ == "__main__": 30 | main() 31 | -------------------------------------------------------------------------------- /docs/INSTALL.md: -------------------------------------------------------------------------------- 1 | Flye Installation 2 | ================= 3 | 4 | Availability 5 | ------------ 6 | 7 | Flye is available for Linux and MacOS platforms. 8 | 9 | Bioconda Releases 10 | ----------------- 11 | 12 | You can get the latest stable release through Bioconda: 13 | 14 | conda install flye 15 | 16 | Alternatively, you can get a release version from the github releases page 17 | 18 | 19 | Building Requirements 20 | --------------------- 21 | 22 | * Python 2.7 or 3.5+ (with setuptools package installed) 23 | * C++ compiler with C++11 support (GCC 4.8+ / Clang 3.3+ / Apple Clang 5.0+) 24 | * GNU make 25 | * Git 26 | * Core OS development headers (zlib, ...) 27 | 28 | 29 | Local building (without installation) 30 | ------------------------------------- 31 | 32 | You may use the package locally without system installation. 33 | To get and compile the latest git version, run: 34 | 35 | git clone https://github.com/fenderglass/Flye 36 | cd Flye 37 | make 38 | 39 | Then, Flye will be available as: 40 | 41 | python bin/flye 42 | 43 | Building on ARM architecture 44 | ---------------------------- 45 | 46 | In case building Flye on ARM architecture fails, you might need to modify minimap2 compilation parameters: 47 | https://github.com/fenderglass/Flye/issues/386 48 | 49 | Installing from source 50 | ---------------------- 51 | 52 | To install the Flye package into your system, run: 53 | 54 | git clone https://github.com/fenderglass/Flye 55 | cd Flye 56 | python setup.py install 57 | 58 | Depending on your OS, you might need to add 59 | ```--user``` or ```--prefix``` options to the 60 | install command for the local installation. 61 | 62 | After installation, Flye could be invoked via: 63 | 64 | flye 65 | 66 | Optionally, run some tests to ensure that installation was successful: 67 | 68 | python flye/tests/test_toy.py 69 | -------------------------------------------------------------------------------- /docs/graph_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mikolmogorov/Flye/886b8c17412cdf3a2868a28237bca6c5ad1da156/docs/graph_example.png -------------------------------------------------------------------------------- /flye/.main.py.swo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mikolmogorov/Flye/886b8c17412cdf3a2868a28237bca6c5ad1da156/flye/.main.py.swo -------------------------------------------------------------------------------- /flye/__build__.py: -------------------------------------------------------------------------------- 1 | __build__ = 1802 2 | -------------------------------------------------------------------------------- /flye/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mikolmogorov/Flye/886b8c17412cdf3a2868a28237bca6c5ad1da156/flye/__init__.py -------------------------------------------------------------------------------- /flye/__version__.py: -------------------------------------------------------------------------------- 1 | __version__ = "2.9.6" 2 | -------------------------------------------------------------------------------- /flye/assembly/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mikolmogorov/Flye/886b8c17412cdf3a2868a28237bca6c5ad1da156/flye/assembly/__init__.py -------------------------------------------------------------------------------- /flye/assembly/assemble.py: -------------------------------------------------------------------------------- 1 | #(c) 2016 by Authors 2 | #This file is a part of ABruijn program. 3 | #Released under the BSD license (see LICENSE file) 4 | 5 | """ 6 | Runs assemble binary 7 | """ 8 | 9 | from __future__ import absolute_import 10 | import subprocess 11 | import logging 12 | import os 13 | 14 | from flye.utils.utils import which 15 | 16 | ASSEMBLE_BIN = "flye-modules" 17 | logger = logging.getLogger() 18 | 19 | 20 | class AssembleException(Exception): 21 | pass 22 | 23 | 24 | def check_binaries(): 25 | if not which(ASSEMBLE_BIN): 26 | raise AssembleException("Assemble binary was not found. " 27 | "Did you run 'make'?") 28 | try: 29 | devnull = open(os.devnull, "w") 30 | subprocess.check_call([ASSEMBLE_BIN, "assemble", "-h"], stderr=devnull) 31 | except subprocess.CalledProcessError as e: 32 | if e.returncode == -9: 33 | logger.error("Looks like the system ran out of memory") 34 | raise AssembleException(str(e)) 35 | except OSError as e: 36 | raise AssembleException(str(e)) 37 | 38 | 39 | 40 | def assemble(args, run_params, out_file, log_file, config_path): 41 | logger.info("Assembling disjointigs") 42 | logger.debug("-----Begin assembly log------") 43 | cmdline = [ASSEMBLE_BIN, "assemble", "--reads", ",".join(args.reads), "--out-asm", out_file, 44 | "--config", config_path, "--log", log_file, "--threads", str(1 if args.deterministic else args.threads)] 45 | if args.debug: 46 | cmdline.append("--debug") 47 | if args.meta: 48 | cmdline.append("--meta") 49 | #if args.short_mode: 50 | # cmdline.append("--short") 51 | if args.genome_size: 52 | cmdline.extend(["--genome-size", str(args.genome_size)]) 53 | #if args.kmer_size: 54 | # cmdline.extend(["--kmer", str(args.kmer_size)]) 55 | 56 | cmdline.extend(["--min-ovlp", str(run_params["min_overlap"])]) 57 | if run_params["min_read_length"] > 0: 58 | cmdline.extend(["--min-read", str(run_params["min_read_length"])]) 59 | 60 | if args.extra_params: 61 | cmdline.extend(["--extra-params", args.extra_params]) 62 | 63 | #if args.min_kmer_count is not None: 64 | # cmdline.extend(["-m", str(args.min_kmer_count)]) 65 | #if args.max_kmer_count is not None: 66 | # cmdline.extend(["-x", str(args.max_kmer_count)]) 67 | 68 | try: 69 | logger.debug("Running: " + " ".join(cmdline)) 70 | subprocess.check_call(cmdline) 71 | except subprocess.CalledProcessError as e: 72 | if e.returncode == -9: 73 | logger.error("Looks like the system ran out of memory") 74 | raise AssembleException(str(e)) 75 | except OSError as e: 76 | raise AssembleException(str(e)) 77 | -------------------------------------------------------------------------------- /flye/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mikolmogorov/Flye/886b8c17412cdf3a2868a28237bca6c5ad1da156/flye/config/__init__.py -------------------------------------------------------------------------------- /flye/config/bin_cfg/asm_corrected_reads.cfg: -------------------------------------------------------------------------------- 1 | %include asm_defaults.cfg 2 | 3 | #specific to raw read assembly 4 | low_cutoff_warning = 0 5 | 6 | #k-mer selection 7 | kmer_size = 17 8 | use_minimizers = 1 9 | minimizer_window = 5 10 | 11 | reads_base_alignment = 1 12 | 13 | #kmer selection in metagenomes 14 | meta_read_top_kmer_rate = 0.75 15 | 16 | #basic overlap parameters 17 | maximum_jump = 1500 18 | maximum_overhang = 500 19 | repeat_kmer_rate = 100 20 | 21 | #overlap similarity thresholds 22 | assemble_ovlp_divergence = 0.03 23 | assemble_divergence_relative = 1 24 | repeat_graph_ovlp_divergence = 0.03 25 | read_align_ovlp_divergence = 0.10 26 | hpc_scoring_on = 1 27 | 28 | #disjointig generation 29 | add_unassembled_reads = 0 30 | 31 | #repeat graph parameters 32 | extend_contigs_with_repeats = 0 33 | min_read_cov_cutoff = 3 34 | short_tip_length = 10000 35 | long_tip_length = 100000 36 | -------------------------------------------------------------------------------- /flye/config/bin_cfg/asm_defaults.cfg: -------------------------------------------------------------------------------- 1 | #index construction 2 | big_genome_threshold = 29000000 3 | 4 | #indexing 5 | meta_read_filter_kmer_freq = 100 6 | 7 | #mapping/alignmenmt (match score = 1) 8 | chain_large_gap_penalty = 2 9 | chain_small_gap_penalty = 0.5 10 | chain_gap_jump_threshold = 100 11 | max_jump_gap = 500 12 | 13 | #read assembly parameters 14 | max_coverage_drop_rate = 5 15 | max_extensions_drop_rate = 5 16 | chimera_window = 100 17 | chimera_overhang = 1000 18 | min_reads_in_disjointig = 4 19 | max_inner_reads = 10 20 | max_inner_fraction = 0.25 21 | aggressive_dup_filter = 1 22 | 23 | #repeat graph parameters 24 | max_separation = 500 25 | unique_edge_length = 50000 26 | min_repeat_res_support = 0.51 27 | out_paths_ratio = 5 28 | graph_cov_drop_rate = 5 29 | coverage_estimate_window = 100 30 | max_bubble_length = 50000 31 | 32 | loop_coverage_rate = 1.5 33 | repeat_edge_cov_mult = 1.75 34 | weak_detach_rate = 5 35 | tip_coverage_rate = 2 36 | tip_length_rate = 2 37 | 38 | output_gfa_before_rr = 1 39 | remove_alt_edges = 0 40 | -------------------------------------------------------------------------------- /flye/config/bin_cfg/asm_hifi.cfg: -------------------------------------------------------------------------------- 1 | %include asm_defaults.cfg 2 | 3 | #specific to raw read assembly 4 | low_cutoff_warning = 0 5 | 6 | #k-mer selection 7 | kmer_size = 17 8 | use_minimizers = 1 9 | minimizer_window = 10 10 | 11 | reads_base_alignment = 1 12 | 13 | #kmer selection in metagenomes 14 | meta_read_top_kmer_rate = 0.75 15 | 16 | #basic overlap parameters 17 | maximum_jump = 1500 18 | maximum_overhang = 500 19 | repeat_kmer_rate = 100 20 | 21 | #overlap similarity thresholds 22 | assemble_ovlp_divergence = 0.001 23 | assemble_divergence_relative = 0 24 | repeat_graph_ovlp_divergence = 0.001 25 | read_align_ovlp_divergence = 0.03 26 | hpc_scoring_on = 1 27 | 28 | #disjointig generation 29 | add_unassembled_reads = 0 30 | 31 | #repeat graph parameters 32 | extend_contigs_with_repeats = 0 33 | min_read_cov_cutoff = 3 34 | short_tip_length = 10000 35 | long_tip_length = 100000 36 | -------------------------------------------------------------------------------- /flye/config/bin_cfg/asm_nano_hq.cfg: -------------------------------------------------------------------------------- 1 | %include asm_defaults.cfg 2 | 3 | #specific to raw read assembly 4 | low_cutoff_warning = 0 5 | 6 | #k-mer selection 7 | kmer_size = 17 8 | use_minimizers = 1 9 | minimizer_window = 10 10 | 11 | reads_base_alignment = 1 12 | 13 | #kmer selection in metagenomes 14 | meta_read_top_kmer_rate = 0.75 15 | 16 | #basic overlap parameters 17 | maximum_jump = 1500 18 | maximum_overhang = 1500 19 | repeat_kmer_rate = 100 20 | 21 | #overlap similarity thresholds 22 | assemble_ovlp_divergence = 0.03 23 | assemble_divergence_relative = 1 24 | repeat_graph_ovlp_divergence = 0.03 25 | read_align_ovlp_divergence = 0.10 26 | hpc_scoring_on = 1 27 | 28 | #disjointig generation 29 | add_unassembled_reads = 0 30 | 31 | #repeat graph parameters 32 | extend_contigs_with_repeats = 0 33 | min_read_cov_cutoff = 3 34 | short_tip_length = 20000 35 | long_tip_length = 100000 36 | -------------------------------------------------------------------------------- /flye/config/bin_cfg/asm_raw_reads.cfg: -------------------------------------------------------------------------------- 1 | %include asm_defaults.cfg 2 | 3 | #specific to raw read assembly 4 | low_cutoff_warning = 1 5 | 6 | #k-mer selection 7 | kmer_size = 17 8 | use_minimizers = 0 9 | 10 | 11 | reads_base_alignment = 0 12 | 13 | #kmer selection in metagenomes 14 | meta_read_top_kmer_rate = 0.40 15 | 16 | #basic overlap parameters 17 | maximum_jump = 1500 18 | maximum_overhang = 1500 19 | repeat_kmer_rate = 100 20 | 21 | #overlap similarity thresholds 22 | assemble_ovlp_divergence = 0.10 23 | assemble_divergence_relative = 1 24 | repeat_graph_ovlp_divergence = 0.08 25 | read_align_ovlp_divergence = 0.25 26 | hpc_scoring_on = 0 27 | 28 | #disjointig generation 29 | add_unassembled_reads = 0 30 | 31 | #repeat graph parameters 32 | extend_contigs_with_repeats = 0 33 | min_read_cov_cutoff = 3 34 | short_tip_length = 20000 35 | long_tip_length = 100000 36 | -------------------------------------------------------------------------------- /flye/config/bin_cfg/asm_subasm.cfg: -------------------------------------------------------------------------------- 1 | %include asm_defaults.cfg 2 | 3 | #specific to raw read assembly 4 | low_cutoff_warning = 0 5 | 6 | #k-mer selection 7 | kmer_size = 31 8 | use_minimizers = 1 9 | minimizer_window = 10 10 | 11 | reads_base_alignment = 1 12 | 13 | #kmer selection in metagenomes 14 | meta_read_top_kmer_rate = 0.75 15 | 16 | #basic overlap parameters 17 | maximum_jump = 500 18 | maximum_overhang = 100 19 | repeat_kmer_rate = 100 20 | 21 | #overlap similarity thresholds 22 | assemble_ovlp_divergence = 0.02 23 | assemble_divergence_relative = 0 24 | repeat_graph_ovlp_divergence = 0.02 25 | read_align_ovlp_divergence = 0.02 26 | hpc_scoring_on = 0 27 | 28 | #disjointig generation 29 | add_unassembled_reads = 1 30 | 31 | #repeat graph parameters 32 | extend_contigs_with_repeats = 0 33 | min_read_cov_cutoff = 1 34 | short_tip_length = 10000 35 | long_tip_length = 100000 36 | -------------------------------------------------------------------------------- /flye/config/bin_cfg/nano_r94_g36_substitutions.mat: -------------------------------------------------------------------------------- 1 | mat A 0.9515568866749766 2 | mat C 0.9379644671182339 3 | mat G 0.9383886712902845 4 | mat T 0.9512080082968172 5 | mis A->C 0.003283629035591762 6 | mis A->G 0.010130265837293616 7 | mis A->T 0.0047766615770363065 8 | mis C->A 0.005869725795765441 9 | mis C->G 0.0031237709575749205 10 | mis C->T 0.017804046501055854 11 | mis G->A 0.01697057765939991 12 | mis G->C 0.003149718705856967 13 | mis G->T 0.005788774961197725 14 | mis T->A 0.004818179419876455 15 | mis T->C 0.010460167992456875 16 | mis T->G 0.003330697228627753 17 | del A 0.030252556875118505 18 | del C 0.03523798962712183 19 | del G 0.03570225738331406 20 | del T 0.030182947062206887 21 | ins A 0.004913355834137615 22 | ins C 0.0032979619517697362 23 | ins G 0.003354955001560512 24 | ins T 0.004767958456401918 25 | noins 0.983665768756182 26 | -------------------------------------------------------------------------------- /flye/config/bin_cfg/nano_r94_substitutions.mat: -------------------------------------------------------------------------------- 1 | mat A 0.90352852413 2 | mat C 0.899563198899 3 | mat G 0.899432537076 4 | mat T 0.903558166301 5 | mis A->C 0.00721554762111 6 | mis A->G 0.0285282839875 7 | mis A->T 0.007674510041 8 | mis C->A 0.010653409688 9 | mis C->G 0.00590756972495 10 | mis C->T 0.031881185559 11 | mis G->A 0.0301509836432 12 | mis G->C 0.0059966180506 13 | mis G->T 0.0104792084014 14 | mis T->A 0.00779400554697 15 | mis T->C 0.0294115994139 16 | mis T->G 0.00752739727204 17 | del A 0.0530531342202 18 | del C 0.0519946361291 19 | del G 0.0539406528286 20 | del T 0.0517088314665 21 | ins A 0.0085546218779 22 | ins C 0.00696690293149 23 | ins G 0.00709709153664 24 | ins T 0.00826245765424 25 | noins 0.969118926 26 | -------------------------------------------------------------------------------- /flye/config/bin_cfg/pacbio_chm13_substitutions.mat: -------------------------------------------------------------------------------- 1 | mat A 0.939941697387 2 | mat C 0.917845926537 3 | mat G 0.922177915958 4 | mat T 0.939407356769 5 | mis A->C 0.0100732648276 6 | mis A->G 0.00703137633255 7 | mis A->T 0.00827755986074 8 | mis C->A 0.0129093859312 9 | mis C->G 0.0104784550336 10 | mis C->T 0.0114416649249 11 | mis G->A 0.0104475505629 12 | mis G->C 0.0094218679978 13 | mis G->T 0.013245034868 14 | mis T->A 0.00828085763923 15 | mis T->C 0.00702042476737 16 | mis T->G 0.0105872368784 17 | del A 0.0346761015923 18 | del C 0.047324567573 19 | del G 0.0447076306133 20 | del T 0.0347041239458 21 | ins A 0.0241305917591 22 | ins C 0.0220118918111 23 | ins G 0.022099227642 24 | ins T 0.0242575113871 25 | noins 0.907500777401 26 | -------------------------------------------------------------------------------- /flye/config/bin_cfg/pacbio_substitutions.mat: -------------------------------------------------------------------------------- 1 | mat A 0.9582463498 2 | mat C 0.9435934049 3 | mat T 0.9559668288 4 | mat G 0.9501232526 5 | mis C->G 0.0040725792 6 | mis A->T 0.0023891038 7 | mis T->A 0.0039490745 8 | mis A->G 0.0022850350 9 | mis C->T 0.0035703067 10 | mis T->C 0.0028326086 11 | mis G->A 0.0037474205 12 | mis G->T 0.0042757024 13 | mis C->A 0.0080860631 14 | mis G->C 0.0029070538 15 | mis T->G 0.0037853330 16 | mis A->C 0.0051434271 17 | del A 0.0319360844 18 | del C 0.0406776461 19 | del T 0.0334661551 20 | del G 0.0389465707 21 | ins A 0.0267382405 22 | ins C 0.0187951126 23 | ins T 0.0208484604 24 | ins G 0.0216606426 25 | noins 0.9119575439 26 | 27 | -------------------------------------------------------------------------------- /flye/config/py_cfg.py: -------------------------------------------------------------------------------- 1 | #(c) 2016 by Authors 2 | #This file is a part of ABruijn program. 3 | #Released under the BSD license (see LICENSE file) 4 | 5 | """ 6 | Configuration file for the Python part of the pipeline 7 | """ 8 | 9 | from __future__ import absolute_import 10 | import os 11 | 12 | vals = { 13 | "pkg_root" : os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 14 | "pipeline_version" : 3, 15 | 16 | #additional configuration files for binary modules 17 | "bin_cfg" : { 18 | "raw" : "config/bin_cfg/asm_raw_reads.cfg", 19 | "corrected" : "config/bin_cfg/asm_corrected_reads.cfg", 20 | "hifi" : "config/bin_cfg/asm_hifi.cfg", 21 | "nano_hq" : "config/bin_cfg/asm_nano_hq.cfg", 22 | "subasm" : "config/bin_cfg/asm_subasm.cfg" 23 | }, 24 | 25 | "min_overlap_range" : { 26 | "raw" : [1000, 10000], 27 | "corrected" : [1000, 10000], 28 | "hifi" : [1000, 10000], 29 | "nano_hq" : [1000, 10000], 30 | "subasm" : [1000, 1000] 31 | }, 32 | "max_meta_overlap" : 10000, 33 | 34 | #polishing 35 | "simple_kmer_length" : 4, 36 | "solid_kmer_length" : 10, 37 | "max_bubble_length" : 500, 38 | "max_bubble_branches" : 50, 39 | "max_read_coverage" : 1000, 40 | "min_polish_aln_len" : 500, 41 | 42 | #final coverage filtering 43 | "relative_minimum_coverage" : 5, 44 | "hard_minimum_coverage" : 3, 45 | 46 | "err_modes" : { 47 | "pacbio" : { 48 | "subs_matrix" : "config/bin_cfg/pacbio_chm13_substitutions.mat", 49 | "hopo_matrix" : "config/bin_cfg/pacbio_chm13_homopolymers.mat", 50 | "solid_missmatch" : 0.3, 51 | "solid_indel" : 0.3, 52 | "max_aln_error" : 0.25, 53 | "hopo_enabled" : False 54 | }, 55 | "nano" : { 56 | "subs_matrix" : "config/bin_cfg/nano_r94_substitutions.mat", 57 | "hopo_matrix" : "config/bin_cfg/nano_r94_g36_homopolymers.mat", 58 | "solid_missmatch" : 0.3, 59 | "solid_indel" : 0.3, 60 | "max_aln_error" : 0.25, 61 | "hopo_enabled" : False 62 | }, 63 | }, 64 | 65 | #scaffolding 66 | "scaffold_gap" : 100 67 | } 68 | -------------------------------------------------------------------------------- /flye/polishing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mikolmogorov/Flye/886b8c17412cdf3a2868a28237bca6c5ad1da156/flye/polishing/__init__.py -------------------------------------------------------------------------------- /flye/repeat_graph/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mikolmogorov/Flye/886b8c17412cdf3a2868a28237bca6c5ad1da156/flye/repeat_graph/__init__.py -------------------------------------------------------------------------------- /flye/repeat_graph/graph_alignment.py: -------------------------------------------------------------------------------- 1 | #(c) 2016 by Authors 2 | #This file is a part of ABruijn program. 3 | #Released under the BSD license (see LICENSE file) 4 | 5 | """ 6 | This module provides parsing/dumping of reads alignment 7 | to the repreat graph (as used internally in Flye) 8 | """ 9 | 10 | 11 | from __future__ import division 12 | class OverlapRange(object): 13 | __slots__ = ("cur_id", "cur_len", "cur_start", "cur_end", 14 | "ext_id", "ext_len", "ext_start", "ext_end", 15 | "left_shift", "right_shift", "score", "divergence") 16 | 17 | def __init__(self, cur_id, cur_len, cur_start, cur_end, 18 | ext_id, ext_len, ext_start, ext_end, 19 | left_shift, right_shift, score, divergence): 20 | self.cur_id = cur_id 21 | self.cur_len = cur_len 22 | self.cur_start = cur_start 23 | self.cur_end = cur_end 24 | self.ext_id = ext_id 25 | self.ext_len = ext_len 26 | self.ext_start = ext_start 27 | self.ext_end = ext_end 28 | self.left_shift = left_shift 29 | self.right_shift = right_shift 30 | self.score = score 31 | self.divergence = divergence 32 | 33 | 34 | class GraphAlignment(object): 35 | __slots__ = ("edge_id", "overlap") 36 | 37 | def __init__(self, edge_id, overlap): 38 | self.edge_id = edge_id 39 | self.overlap = overlap 40 | 41 | 42 | def iter_alignments(filename): 43 | """ 44 | Returns alignment generator 45 | """ 46 | #alignments = [] 47 | current_chain = [] 48 | with open(filename, "r") as f: 49 | for line in f: 50 | if not line: continue 51 | 52 | tokens = line.strip().split() 53 | if tokens[0] == "Chain": 54 | if current_chain: 55 | yield current_chain 56 | #alignments.append(current_chain) 57 | current_chain = [] 58 | 59 | elif tokens[0] == "Aln": 60 | (edge_id, cur_id, cur_start, cur_end, cur_len, 61 | ext_id, ext_start, ext_end, ext_len, left_shift, 62 | right_shift, score, divergence) = tokens[1:] 63 | 64 | ovlp = OverlapRange(cur_id, int(cur_len), int(cur_start), int(cur_end), 65 | ext_id, int(ext_len), int(ext_start), int(ext_end), 66 | int(left_shift), int(right_shift), int(score), 67 | float(divergence)) 68 | current_chain.append(GraphAlignment(_to_signed_id(int(edge_id)), ovlp)) 69 | 70 | else: 71 | raise Exception("Error parsing " + filename) 72 | 73 | if current_chain: 74 | yield current_chain 75 | 76 | 77 | #TODO: 78 | #def write_alignments(alignments, filename): 79 | # pass 80 | 81 | 82 | def _to_signed_id(unsigned_id): 83 | return -(unsigned_id + 1) // 2 if unsigned_id % 2 else unsigned_id // 2 + 1 84 | 85 | 86 | def _to_unsigned_id(signed_id): 87 | unsigned_id = abs(signed_id) * 2 - 2 88 | return unsigned_id + int(signed_id < 0) 89 | -------------------------------------------------------------------------------- /flye/short_plasmids/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mikolmogorov/Flye/886b8c17412cdf3a2868a28237bca6c5ad1da156/flye/short_plasmids/__init__.py -------------------------------------------------------------------------------- /flye/short_plasmids/utils.py: -------------------------------------------------------------------------------- 1 | #(c) 2016-2018 by Authors 2 | #This file is a part of Flye program. 3 | #Released under the BSD license (see LICENSE file) 4 | 5 | from flye.six.moves import range 6 | 7 | def find_connected_components(graph): 8 | def dfs(start_vertex, connected_components_counter): 9 | dfs_stack = [start_vertex] 10 | used[start_vertex] = True 11 | while len(dfs_stack): 12 | vertex = dfs_stack.pop() 13 | connected_components[vertex] = connected_components_counter 14 | for neighbour in graph[vertex]: 15 | if not used[neighbour]: 16 | dfs_stack.append(neighbour) 17 | used[neighbour] = True 18 | 19 | n_vertices = len(graph) 20 | connected_components = [0 for _ in range(n_vertices)] 21 | connected_components_counter = 0 22 | used = [False for _ in range(n_vertices)] 23 | 24 | for i in range(n_vertices): 25 | if not used[i]: 26 | dfs(i, connected_components_counter) 27 | connected_components_counter += 1 28 | 29 | return connected_components, connected_components_counter 30 | -------------------------------------------------------------------------------- /flye/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mikolmogorov/Flye/886b8c17412cdf3a2868a28237bca6c5ad1da156/flye/tests/__init__.py -------------------------------------------------------------------------------- /flye/tests/data/ecoli_500kb_reads.fastq.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mikolmogorov/Flye/886b8c17412cdf3a2868a28237bca6c5ad1da156/flye/tests/data/ecoli_500kb_reads.fastq.gz -------------------------------------------------------------------------------- /flye/tests/data/ecoli_500kb_reads_hifi.fastq.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mikolmogorov/Flye/886b8c17412cdf3a2868a28237bca6c5ad1da156/flye/tests/data/ecoli_500kb_reads_hifi.fastq.gz -------------------------------------------------------------------------------- /flye/tests/test_toy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | #(c) 2019 by Authors 4 | #This file is a part of the Flye package. 5 | #Released under the BSD license (see LICENSE file) 6 | 7 | """ 8 | Runs simple toy test 9 | """ 10 | 11 | 12 | from __future__ import print_function 13 | 14 | import os 15 | import sys 16 | import subprocess 17 | import shutil 18 | 19 | 20 | def test_toy(): 21 | if not shutil.which("flye"): 22 | sys.exit("flye is not installed!") 23 | 24 | print("Running toy test:\n") 25 | script_dir = os.path.dirname(os.path.realpath(__file__)) 26 | reads_file = os.path.join(script_dir, "data", "ecoli_500kb_reads_hifi.fastq.gz") 27 | out_dir = "flye_toy_test" 28 | subprocess.check_call(["flye", "--pacbio-corr", reads_file, "-g", "500k", 29 | "-o", out_dir, "-t", "8", "-m", "1000"]) 30 | shutil.rmtree(out_dir) 31 | print("\nTEST SUCCESSFUL") 32 | 33 | 34 | def main(): 35 | test_toy() 36 | return 0 37 | 38 | 39 | if __name__ == "__main__": 40 | sys.exit(main()) 41 | -------------------------------------------------------------------------------- /flye/trestle/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mikolmogorov/Flye/886b8c17412cdf3a2868a28237bca6c5ad1da156/flye/trestle/__init__.py -------------------------------------------------------------------------------- /flye/trestle/trestle_config.py: -------------------------------------------------------------------------------- 1 | #(c) 2016-2018 by Authors 2 | #This file is a part of Flye program. 3 | #Released under the BSD license (see LICENSE file) 4 | 5 | """ 6 | File with Trestle configurations 7 | """ 8 | 9 | vals = { 10 | "max_iter" : 10, 11 | "buffer_count" : 3, 12 | "min_edge_cov" : 10, 13 | "min_aln_rate" : 0.5, 14 | "min_bridge_count" : 5, 15 | "min_bridge_factor" : 2, 16 | "min_mult" : 2, 17 | "max_mult" : 3, 18 | "flanking_len" : 10000, 19 | "sub_thresh" : 0.1, 20 | "del_thresh" : 0.2, 21 | "ins_thresh" : 0.3, 22 | "cons_aln_rate" : 0.01, 23 | "min_supp_align_len" : 1000, 24 | "max_supp_align_overlap" : 100, 25 | "orientations_to_run" : "forward", 26 | "num_pol_iters" : 1 27 | } 28 | -------------------------------------------------------------------------------- /flye/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mikolmogorov/Flye/886b8c17412cdf3a2868a28237bca6c5ad1da156/flye/utils/__init__.py -------------------------------------------------------------------------------- /flye/utils/utils.py: -------------------------------------------------------------------------------- 1 | #(c) 2013-2016 by Authors 2 | #This file is a part of ABruijn program. 3 | #Released under the BSD license (see LICENSE file) 4 | 5 | from __future__ import absolute_import 6 | import os 7 | import signal 8 | import multiprocessing 9 | import logging 10 | 11 | 12 | logger = logging.getLogger() 13 | 14 | 15 | def which(program): 16 | """ 17 | Mimics UNIX "which" command 18 | """ 19 | def is_exe(fpath): 20 | return os.path.isfile(fpath) and os.access(fpath, os.X_OK) 21 | 22 | fpath, _ = os.path.split(program) 23 | if fpath: 24 | if is_exe(program): 25 | return program 26 | else: 27 | for path in os.environ["PATH"].split(os.pathsep): 28 | path = path.strip('"') 29 | exe_file = os.path.join(path, program) 30 | if is_exe(exe_file): 31 | return exe_file 32 | 33 | return None 34 | 35 | 36 | def process_in_parallel(function, arguments, num_proc): 37 | """ 38 | Run given function in parallel using multithreading 39 | """ 40 | #making sure the main process catches SIGINT 41 | threads = [] 42 | orig_sigint = signal.signal(signal.SIGINT, signal.SIG_IGN) 43 | for _ in range(num_proc): 44 | threads.append(multiprocessing.Process(target=function, args=arguments)) 45 | signal.signal(signal.SIGINT, orig_sigint) 46 | 47 | for t in threads: 48 | t.start() 49 | try: 50 | for t in threads: 51 | t.join() 52 | if t.exitcode == -9: 53 | logger.error("Looks like the system ran out of memory") 54 | if t.exitcode != 0: 55 | raise Exception("One of the processes exited with code: {0}" 56 | .format(t.exitcode)) 57 | except KeyboardInterrupt: 58 | for t in threads: 59 | t.terminate() 60 | raise 61 | 62 | 63 | def get_median(lst): 64 | if not lst: 65 | raise ValueError("_get_median() arg is an empty sequence") 66 | sorted_list = sorted(lst) 67 | if len(lst) % 2 == 1: 68 | return sorted_list[len(lst) // 2] 69 | else: 70 | mid1 = sorted_list[(len(lst) // 2) - 1] 71 | mid2 = sorted_list[(len(lst) // 2)] 72 | return (mid1 + mid2) / 2 73 | -------------------------------------------------------------------------------- /lib/interval_tree/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011 Erik Garrison 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 7 | of the Software, and to permit persons to whom the Software is furnished to do 8 | so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /lib/interval_tree/README.md: -------------------------------------------------------------------------------- 1 | # intervaltree 2 | 3 | ## Overview 4 | 5 | An interval tree can be used to efficiently find a set of numeric intervals overlapping or containing another interval. 6 | 7 | This library provides a basic implementation of an interval tree using C++ templates, allowing the insertion of arbitrary types into the tree. 8 | 9 | ## Usage 10 | 11 | Add `#include "IntervalTree.h"` to the source files in which you will use the interval tree. 12 | 13 | To make an IntervalTree to contain objects of class T, use: 14 | 15 | ```c++ 16 | vector > intervals; 17 | T a, b, c; 18 | intervals.push_back(Interval(2, 10, a)); 19 | intervals.push_back(Interval(3, 4, b)); 20 | intervals.push_back(Interval(20, 100, c)); 21 | IntervalTree tree; 22 | tree = IntervalTree(intervals); 23 | ``` 24 | 25 | Now, it's possible to query the tree and obtain a set of intervals which are contained within the start and stop coordinates. 26 | 27 | ```c++ 28 | vector > results; 29 | tree.findContained(start, stop, results); 30 | cout << "found " << results.size() << " overlapping intervals" << endl; 31 | ``` 32 | 33 | The function IntervalTree::findOverlapping provides a method to find all those intervals which are contained or partially overlap the interval (start, stop). 34 | 35 | ### Author: Erik Garrison 36 | 37 | ### License: MIT 38 | -------------------------------------------------------------------------------- /lib/lemon/AUTHORS: -------------------------------------------------------------------------------- 1 | The main developers of release series 1.x are 2 | 3 | * Balazs Dezso 4 | * Alpar Juttner 5 | * Peter Kovacs 6 | * Akos Ladanyi 7 | 8 | For more complete list of contributors, please visit the history of 9 | the LEMON source code repository: http://lemon.cs.elte.hu/hg/lemon 10 | 11 | Moreover, this version is heavily based on version 0.x of LEMON. Here 12 | is the list of people who contributed to those versions. 13 | 14 | * Mihaly Barasz 15 | * Johanna Becker 16 | * Attila Bernath 17 | * Balazs Dezso 18 | * Peter Hegyi 19 | * Alpar Juttner 20 | * Peter Kovacs 21 | * Akos Ladanyi 22 | * Marton Makai 23 | * Jacint Szabo 24 | 25 | Again, please visit the history of the old LEMON repository for more 26 | details: http://lemon.cs.elte.hu/hg/lemon-0.x -------------------------------------------------------------------------------- /lib/lemon/LICENSE: -------------------------------------------------------------------------------- 1 | LEMON code without an explicit copyright notice is covered by the following 2 | copyright/license. 3 | 4 | Copyright (C) 2003-2012 Egervary Jeno Kombinatorikus Optimalizalasi 5 | Kutatocsoport (Egervary Combinatorial Optimization Research Group, 6 | EGRES). 7 | 8 | =========================================================================== 9 | Boost Software License, Version 1.0 10 | =========================================================================== 11 | 12 | Permission is hereby granted, free of charge, to any person or organization 13 | obtaining a copy of the software and accompanying documentation covered by 14 | this license (the "Software") to use, reproduce, display, distribute, 15 | execute, and transmit the Software, and to prepare derivative works of the 16 | Software, and to permit third-parties to whom the Software is furnished to 17 | do so, all subject to the following: 18 | 19 | The copyright notices in the Software and this entire statement, including 20 | the above license grant, this restriction and the following disclaimer, 21 | must be included in all copies of the Software, in whole or in part, and 22 | all derivative works of the Software, unless such copies or derivative 23 | works are solely in the form of machine-executable object code generated by 24 | a source language processor. 25 | 26 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 27 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 28 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 29 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 30 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 31 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 32 | DEALINGS IN THE SOFTWARE. 33 | -------------------------------------------------------------------------------- /lib/lemon/README: -------------------------------------------------------------------------------- 1 | ===================================================================== 2 | LEMON - a Library for Efficient Modeling and Optimization in Networks 3 | ===================================================================== 4 | 5 | LEMON is an open source library written in C++. It provides 6 | easy-to-use implementations of common data structures and algorithms 7 | in the area of optimization and helps implementing new ones. The main 8 | focus is on graphs and graph algorithms, thus it is especially 9 | suitable for solving design and optimization problems of 10 | telecommunication networks. To achieve wide usability its data 11 | structures and algorithms provide generic interfaces. 12 | 13 | Contents 14 | ======== 15 | 16 | LICENSE 17 | 18 | Copying, distribution and modification conditions and terms. 19 | 20 | NEWS 21 | 22 | News and version history. 23 | 24 | INSTALL 25 | 26 | General building and installation instructions. 27 | 28 | lemon/ 29 | 30 | Source code of LEMON library. 31 | 32 | doc/ 33 | 34 | Documentation of LEMON. The starting page is doc/html/index.html. 35 | 36 | demo/ 37 | 38 | Some example programs to make you easier to get familiar with LEMON. 39 | 40 | scripts/ 41 | 42 | Scripts that make it easier to develop LEMON. 43 | 44 | test/ 45 | 46 | Programs to check the integrity and correctness of LEMON. 47 | 48 | tools/ 49 | 50 | Various utilities related to LEMON. 51 | -------------------------------------------------------------------------------- /lib/lemon/lemon/bits/lock.h: -------------------------------------------------------------------------------- 1 | /* -*- mode: C++; indent-tabs-mode: nil; -*- 2 | * 3 | * This file is a part of LEMON, a generic C++ optimization library. 4 | * 5 | * Copyright (C) 2003-2013 6 | * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport 7 | * (Egervary Research Group on Combinatorial Optimization, EGRES). 8 | * 9 | * Permission to use, modify and distribute this software is granted 10 | * provided that this copyright notice appears in all copies. For 11 | * precise terms see the accompanying LICENSE file. 12 | * 13 | * This software is provided "AS IS" with no warranty of any kind, 14 | * express or implied, and with no claim as to its suitability for any 15 | * purpose. 16 | * 17 | */ 18 | 19 | #ifndef LEMON_BITS_LOCK_H 20 | #define LEMON_BITS_LOCK_H 21 | 22 | #include 23 | #if defined(LEMON_USE_PTHREAD) 24 | #include 25 | #elif defined(LEMON_USE_WIN32_THREADS) 26 | #include 27 | #endif 28 | 29 | namespace lemon { 30 | namespace bits { 31 | 32 | #if defined(LEMON_USE_PTHREAD) 33 | class Lock { 34 | public: 35 | Lock() { 36 | pthread_mutex_init(&_lock, 0); 37 | } 38 | ~Lock() { 39 | pthread_mutex_destroy(&_lock); 40 | } 41 | void lock() { 42 | pthread_mutex_lock(&_lock); 43 | } 44 | void unlock() { 45 | pthread_mutex_unlock(&_lock); 46 | } 47 | 48 | private: 49 | pthread_mutex_t _lock; 50 | }; 51 | #elif defined(LEMON_USE_WIN32_THREADS) 52 | class Lock : public WinLock {}; 53 | #else 54 | class Lock { 55 | public: 56 | Lock() {} 57 | ~Lock() {} 58 | void lock() {} 59 | void unlock() {} 60 | }; 61 | #endif 62 | } 63 | } 64 | 65 | #endif 66 | -------------------------------------------------------------------------------- /lib/lemon/lemon/bits/windows.h: -------------------------------------------------------------------------------- 1 | /* -*- mode: C++; indent-tabs-mode: nil; -*- 2 | * 3 | * This file is a part of LEMON, a generic C++ optimization library. 4 | * 5 | * Copyright (C) 2003-2013 6 | * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport 7 | * (Egervary Research Group on Combinatorial Optimization, EGRES). 8 | * 9 | * Permission to use, modify and distribute this software is granted 10 | * provided that this copyright notice appears in all copies. For 11 | * precise terms see the accompanying LICENSE file. 12 | * 13 | * This software is provided "AS IS" with no warranty of any kind, 14 | * express or implied, and with no claim as to its suitability for any 15 | * purpose. 16 | * 17 | */ 18 | 19 | #ifndef LEMON_BITS_WINDOWS_H 20 | #define LEMON_BITS_WINDOWS_H 21 | 22 | #include 23 | 24 | namespace lemon { 25 | namespace bits { 26 | void getWinProcTimes(double &rtime, 27 | double &utime, double &stime, 28 | double &cutime, double &cstime); 29 | std::string getWinFormattedDate(); 30 | int getWinRndSeed(); 31 | 32 | class WinLock { 33 | public: 34 | WinLock(); 35 | ~WinLock(); 36 | void lock(); 37 | void unlock(); 38 | private: 39 | void *_repr; 40 | }; 41 | } 42 | } 43 | 44 | #endif 45 | -------------------------------------------------------------------------------- /lib/lemon/lemon/concept_check.h: -------------------------------------------------------------------------------- 1 | /* -*- mode: C++; indent-tabs-mode: nil; -*- 2 | * 3 | * This file is a part of LEMON, a generic C++ optimization library. 4 | * 5 | * Copyright (C) 2003-2013 6 | * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport 7 | * (Egervary Research Group on Combinatorial Optimization, EGRES). 8 | * 9 | * Permission to use, modify and distribute this software is granted 10 | * provided that this copyright notice appears in all copies. For 11 | * precise terms see the accompanying LICENSE file. 12 | * 13 | * This software is provided "AS IS" with no warranty of any kind, 14 | * express or implied, and with no claim as to its suitability for any 15 | * purpose. 16 | * 17 | */ 18 | 19 | // The contents of this file was inspired by the concept checking 20 | // utility of the BOOST library (http://www.boost.org). 21 | 22 | ///\file 23 | ///\brief Basic utilities for concept checking. 24 | /// 25 | 26 | #ifndef LEMON_CONCEPT_CHECK_H 27 | #define LEMON_CONCEPT_CHECK_H 28 | 29 | namespace lemon { 30 | 31 | /* 32 | "inline" is used for ignore_unused_variable_warning() 33 | and function_requires() to make sure there is no 34 | overtarget with g++. 35 | */ 36 | 37 | template inline void ignore_unused_variable_warning(const T&) { } 38 | template 39 | inline void ignore_unused_variable_warning(const T1&, const T2&) { } 40 | template 41 | inline void ignore_unused_variable_warning(const T1&, const T2&, 42 | const T3&) { } 43 | template 44 | inline void ignore_unused_variable_warning(const T1&, const T2&, 45 | const T3&, const T4&) { } 46 | template 47 | inline void ignore_unused_variable_warning(const T1&, const T2&, 48 | const T3&, const T4&, 49 | const T5&) { } 50 | template 51 | inline void ignore_unused_variable_warning(const T1&, const T2&, 52 | const T3&, const T4&, 53 | const T5&, const T6&) { } 54 | 55 | ///\e 56 | template 57 | inline void function_requires() 58 | { 59 | #if !defined(NDEBUG) 60 | void (Concept::*x)() = & Concept::constraints; 61 | ::lemon::ignore_unused_variable_warning(x); 62 | #endif 63 | } 64 | 65 | ///\e 66 | template 67 | inline void checkConcept() { 68 | #if !defined(NDEBUG) 69 | typedef typename Concept::template Constraints ConceptCheck; 70 | void (ConceptCheck::*x)() = & ConceptCheck::constraints; 71 | ::lemon::ignore_unused_variable_warning(x); 72 | #endif 73 | } 74 | 75 | } // namespace lemon 76 | 77 | #endif // LEMON_CONCEPT_CHECK_H 78 | -------------------------------------------------------------------------------- /lib/lemon/lemon/config.h: -------------------------------------------------------------------------------- 1 | #define LEMON_VERSION "1.3.1" 2 | #define LEMON_HAVE_LONG_LONG 1 3 | 4 | /* #undef LEMON_HAVE_LP */ 5 | /* #undef LEMON_HAVE_MIP */ 6 | /* #undef LEMON_HAVE_GLPK */ 7 | /* #undef LEMON_HAVE_CPLEX */ 8 | /* #undef LEMON_HAVE_SOPLEX */ 9 | /* #undef LEMON_HAVE_CLP */ 10 | /* #undef LEMON_HAVE_CBC */ 11 | 12 | #define _LEMON_CPLEX 1 13 | #define _LEMON_CLP 2 14 | #define _LEMON_GLPK 3 15 | #define _LEMON_SOPLEX 4 16 | #define _LEMON_CBC 5 17 | 18 | /* #undef LEMON_DEFAULT_LP */ 19 | /* #undef LEMON_DEFAULT_MIP */ 20 | 21 | #define LEMON_USE_PTHREAD 1 22 | /* #undef LEMON_USE_WIN32_THREADS */ 23 | -------------------------------------------------------------------------------- /lib/lemon/lemon/lp.h: -------------------------------------------------------------------------------- 1 | /* -*- mode: C++; indent-tabs-mode: nil; -*- 2 | * 3 | * This file is a part of LEMON, a generic C++ optimization library. 4 | * 5 | * Copyright (C) 2003-2013 6 | * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport 7 | * (Egervary Research Group on Combinatorial Optimization, EGRES). 8 | * 9 | * Permission to use, modify and distribute this software is granted 10 | * provided that this copyright notice appears in all copies. For 11 | * precise terms see the accompanying LICENSE file. 12 | * 13 | * This software is provided "AS IS" with no warranty of any kind, 14 | * express or implied, and with no claim as to its suitability for any 15 | * purpose. 16 | * 17 | */ 18 | 19 | #ifndef LEMON_LP_H 20 | #define LEMON_LP_H 21 | 22 | #include 23 | 24 | 25 | #ifdef LEMON_HAVE_GLPK 26 | #include 27 | #elif LEMON_HAVE_CPLEX 28 | #include 29 | #elif LEMON_HAVE_SOPLEX 30 | #include 31 | #elif LEMON_HAVE_CLP 32 | #include 33 | #elif LEMON_HAVE_CBC 34 | #include 35 | #endif 36 | 37 | ///\file 38 | ///\brief Defines a default LP solver 39 | ///\ingroup lp_group 40 | namespace lemon { 41 | 42 | #ifdef DOXYGEN 43 | ///The default LP solver identifier 44 | 45 | ///The default LP solver identifier. 46 | ///\ingroup lp_group 47 | /// 48 | ///Currently, the possible values are \c _LEMON_GLPK, \c LEMON__CPLEX, 49 | ///\c _LEMON_SOPLEX or \c LEMON__CLP 50 | #define LEMON_DEFAULT_LP SOLVER 51 | ///The default LP solver 52 | 53 | ///The default LP solver. 54 | ///\ingroup lp_group 55 | /// 56 | ///Currently, it is either \c GlpkLp, \c CplexLp, \c SoplexLp or \c ClpLp 57 | typedef GlpkLp Lp; 58 | 59 | ///The default MIP solver identifier 60 | 61 | ///The default MIP solver identifier. 62 | ///\ingroup lp_group 63 | /// 64 | ///Currently, the possible values are \c _LEMON_GLPK, \c LEMON__CPLEX 65 | ///or \c _LEMON_CBC 66 | #define LEMON_DEFAULT_MIP SOLVER 67 | ///The default MIP solver. 68 | 69 | ///The default MIP solver. 70 | ///\ingroup lp_group 71 | /// 72 | ///Currently, it is either \c GlpkMip, \c CplexMip , \c CbcMip 73 | typedef GlpkMip Mip; 74 | #else 75 | #if LEMON_DEFAULT_LP == _LEMON_GLPK 76 | typedef GlpkLp Lp; 77 | #elif LEMON_DEFAULT_LP == _LEMON_CPLEX 78 | typedef CplexLp Lp; 79 | #elif LEMON_DEFAULT_LP == _LEMON_SOPLEX 80 | typedef SoplexLp Lp; 81 | #elif LEMON_DEFAULT_LP == _LEMON_CLP 82 | typedef ClpLp Lp; 83 | #endif 84 | #if LEMON_DEFAULT_MIP == _LEMON_GLPK 85 | typedef GlpkMip Mip; 86 | #elif LEMON_DEFAULT_MIP == _LEMON_CPLEX 87 | typedef CplexMip Mip; 88 | #elif LEMON_DEFAULT_MIP == _LEMON_CBC 89 | typedef CbcMip Mip; 90 | #endif 91 | #endif 92 | 93 | } //namespace lemon 94 | 95 | #endif //LEMON_LP_H 96 | -------------------------------------------------------------------------------- /lib/lemon/lemon/math.h: -------------------------------------------------------------------------------- 1 | /* -*- mode: C++; indent-tabs-mode: nil; -*- 2 | * 3 | * This file is a part of LEMON, a generic C++ optimization library. 4 | * 5 | * Copyright (C) 2003-2013 6 | * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport 7 | * (Egervary Research Group on Combinatorial Optimization, EGRES). 8 | * 9 | * Permission to use, modify and distribute this software is granted 10 | * provided that this copyright notice appears in all copies. For 11 | * precise terms see the accompanying LICENSE file. 12 | * 13 | * This software is provided "AS IS" with no warranty of any kind, 14 | * express or implied, and with no claim as to its suitability for any 15 | * purpose. 16 | * 17 | */ 18 | 19 | #ifndef LEMON_MATH_H 20 | #define LEMON_MATH_H 21 | 22 | ///\ingroup misc 23 | ///\file 24 | ///\brief Some extensions to the standard \c cmath library. 25 | /// 26 | ///Some extensions to the standard \c cmath library. 27 | /// 28 | ///This file includes the standard math library (cmath). 29 | 30 | #include 31 | 32 | namespace lemon { 33 | 34 | /// \addtogroup misc 35 | /// @{ 36 | 37 | /// The Euler constant 38 | const long double E = 2.7182818284590452353602874713526625L; 39 | /// log_2(e) 40 | const long double LOG2E = 1.4426950408889634073599246810018921L; 41 | /// log_10(e) 42 | const long double LOG10E = 0.4342944819032518276511289189166051L; 43 | /// ln(2) 44 | const long double LN2 = 0.6931471805599453094172321214581766L; 45 | /// ln(10) 46 | const long double LN10 = 2.3025850929940456840179914546843642L; 47 | /// pi 48 | const long double PI = 3.1415926535897932384626433832795029L; 49 | /// pi/2 50 | const long double PI_2 = 1.5707963267948966192313216916397514L; 51 | /// pi/4 52 | const long double PI_4 = 0.7853981633974483096156608458198757L; 53 | /// sqrt(2) 54 | const long double SQRT2 = 1.4142135623730950488016887242096981L; 55 | /// 1/sqrt(2) 56 | const long double SQRT1_2 = 0.7071067811865475244008443621048490L; 57 | 58 | ///Check whether the parameter is NaN or not 59 | 60 | ///This function checks whether the parameter is NaN or not. 61 | ///Is should be equivalent with std::isnan(), but it is not 62 | ///provided by all compilers. 63 | inline bool isNaN(double v) 64 | { 65 | return v!=v; 66 | } 67 | 68 | ///Round a value to its closest integer 69 | inline double round(double r) { 70 | return (r > 0.0) ? std::floor(r + 0.5) : std::ceil(r - 0.5); 71 | } 72 | 73 | /// @} 74 | 75 | } //namespace lemon 76 | 77 | #endif //LEMON_MATH_H 78 | -------------------------------------------------------------------------------- /lib/lemon/lemon/soplex.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mikolmogorov/Flye/886b8c17412cdf3a2868a28237bca6c5ad1da156/lib/lemon/lemon/soplex.h -------------------------------------------------------------------------------- /lib/libcuckoo/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2013, Carnegie Mellon University and Intel Corporation 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | 15 | --------------------------- 16 | 17 | CityHash (lib/city.h, lib/city.cc) is Copyright (c) Google, Inc. and 18 | has its own license, as detailed in the source files. 19 | -------------------------------------------------------------------------------- /lib/libcuckoo/cuckoohash_config.hh: -------------------------------------------------------------------------------- 1 | /** \file */ 2 | 3 | #ifndef _CUCKOOHASH_CONFIG_HH 4 | #define _CUCKOOHASH_CONFIG_HH 5 | 6 | #include 7 | #include 8 | 9 | //! The default maximum number of keys per bucket 10 | constexpr size_t LIBCUCKOO_DEFAULT_SLOT_PER_BUCKET = 4; 11 | 12 | //! The default number of elements in an empty hash table 13 | constexpr size_t LIBCUCKOO_DEFAULT_SIZE = 14 | (1U << 16) * LIBCUCKOO_DEFAULT_SLOT_PER_BUCKET; 15 | 16 | //! The default minimum load factor that the table allows for automatic 17 | //! expansion. It must be a number between 0.0 and 1.0. The table will throw 18 | //! libcuckoo_load_factor_too_low if the load factor falls below this value 19 | //! during an automatic expansion. 20 | constexpr double LIBCUCKOO_DEFAULT_MINIMUM_LOAD_FACTOR = 0.05; 21 | 22 | //! An alias for the value that sets no limit on the maximum hashpower. If this 23 | //! value is set as the maximum hashpower limit, there will be no limit. This 24 | //! is also the default initial value for the maximum hashpower in a table. 25 | constexpr size_t LIBCUCKOO_NO_MAXIMUM_HASHPOWER = 26 | std::numeric_limits::max(); 27 | 28 | //! set LIBCUCKOO_DEBUG to 1 to enable debug output 29 | #define LIBCUCKOO_DEBUG 0 30 | 31 | #endif // _CUCKOOHASH_CONFIG_HH 32 | -------------------------------------------------------------------------------- /lib/minimap2/FAQ.md: -------------------------------------------------------------------------------- 1 | #### 1. Alignment different with option `-a` or `-c`? 2 | 3 | Without `-a`, `-c` or `--cs`, minimap2 only finds *approximate* mapping 4 | locations without detailed base alignment. In particular, the start and end 5 | positions of the alignment are impricise. With one of those options, minimap2 6 | will perform base alignment, which is generally more accurate but is much 7 | slower. 8 | 9 | #### 2. How to map Illumina short reads to noisy long reads? 10 | 11 | No good solutions. The better approach is to assemble short reads into contigs 12 | and then map noisy reads to contigs. 13 | 14 | #### 3. The output SAM doesn't have a header. 15 | 16 | By default, minimap2 indexes 4 billion reference bases (4Gb) in a batch and map 17 | all reads against each reference batch. Given a reference longer than 4Gb, 18 | minimap2 is unable to see all the sequences and thus can't produce a correct 19 | SAM header. In this case, minimap2 doesn't output any SAM header. There are two 20 | solutions to this issue. First, you may increase option `-I` to, for example, 21 | `-I8g` to index more reference bases in a batch. This is preferred if your 22 | machine has enough memory. Second, if your machines doesn't have enough memory 23 | to hold the reference index, you can use the `--split-prefix` option in a 24 | command line like: 25 | ```sh 26 | minimap2 -ax map-ont --split-prefix=tmp ref.fa reads.fq 27 | ``` 28 | This second approach uses less memory, but it is slower and requires temporary 29 | disk space. 30 | 31 | #### 4. The output SAM is malformatted. 32 | 33 | This typically happens when you use nohup to wrap a minimap2 command line. 34 | Nohup is discouraged as it breaks piping. If you have to use nohup, please 35 | specify an output file with option `-o`. 36 | 37 | #### 5. How to output one alignment per read? 38 | 39 | You can use `--secondary=no` to suppress secondary alignments (aka multiple 40 | mappings), but you can't suppress supplementary alignment (aka split or 41 | chimeric alignment) this way. You can use samtools to filter out these 42 | alignments: 43 | ```sh 44 | minimap2 -ax map-out ref.fa reads.fq | samtools view -F0x900 45 | ``` 46 | However, this is discouraged as supplementary alignment is informative. 47 | -------------------------------------------------------------------------------- /lib/minimap2/LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2018- Dana-Farber Cancer Institute 4 | 2017-2018 Broad Institute, Inc. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining 7 | a copy of this software and associated documentation files (the 8 | "Software"), to deal in the Software without restriction, including 9 | without limitation the rights to use, copy, modify, merge, publish, 10 | distribute, sublicense, and/or sell copies of the Software, and to 11 | permit persons to whom the Software is furnished to do so, subject to 12 | the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be 15 | included in all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 21 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 22 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | SOFTWARE. 25 | -------------------------------------------------------------------------------- /lib/minimap2/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.h 2 | include Makefile 3 | include ksw2_dispatch.c 4 | include main.c 5 | include README.md 6 | include sse2neon/emmintrin.h 7 | include python/cmappy.h 8 | include python/cmappy.pxd 9 | include python/mappy.pyx 10 | include python/README.rst 11 | -------------------------------------------------------------------------------- /lib/minimap2/bseq.h: -------------------------------------------------------------------------------- 1 | #ifndef MM_BSEQ_H 2 | #define MM_BSEQ_H 3 | 4 | #include 5 | #include 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | struct mm_bseq_file_s; 12 | typedef struct mm_bseq_file_s mm_bseq_file_t; 13 | 14 | typedef struct { 15 | int l_seq, rid; 16 | char *name, *seq, *qual, *comment; 17 | } mm_bseq1_t; 18 | 19 | mm_bseq_file_t *mm_bseq_open(const char *fn); 20 | void mm_bseq_close(mm_bseq_file_t *fp); 21 | mm_bseq1_t *mm_bseq_read3(mm_bseq_file_t *fp, int64_t chunk_size, int with_qual, int with_comment, int frag_mode, int *n_); 22 | mm_bseq1_t *mm_bseq_read2(mm_bseq_file_t *fp, int64_t chunk_size, int with_qual, int frag_mode, int *n_); 23 | mm_bseq1_t *mm_bseq_read(mm_bseq_file_t *fp, int64_t chunk_size, int with_qual, int *n_); 24 | mm_bseq1_t *mm_bseq_read_frag2(int n_fp, mm_bseq_file_t **fp, int64_t chunk_size, int with_qual, int with_comment, int *n_); 25 | mm_bseq1_t *mm_bseq_read_frag(int n_fp, mm_bseq_file_t **fp, int64_t chunk_size, int with_qual, int *n_); 26 | int mm_bseq_eof(mm_bseq_file_t *fp); 27 | 28 | extern unsigned char seq_nt4_table[256]; 29 | extern unsigned char seq_comp_table[256]; 30 | 31 | static inline int mm_qname_len(const char *s) 32 | { 33 | int l; 34 | l = strlen(s); 35 | return l >= 3 && s[l-1] >= '0' && s[l-1] <= '9' && s[l-2] == '/'? l - 2 : l; 36 | } 37 | 38 | static inline int mm_qname_same(const char *s1, const char *s2) 39 | { 40 | int l1, l2; 41 | l1 = mm_qname_len(s1); 42 | l2 = mm_qname_len(s2); 43 | return (l1 == l2 && strncmp(s1, s2, l1) == 0); 44 | } 45 | 46 | static inline void mm_revcomp_bseq(mm_bseq1_t *s) 47 | { 48 | int i, t, l = s->l_seq; 49 | for (i = 0; i < l>>1; ++i) { 50 | t = s->seq[l - i - 1]; 51 | s->seq[l - i - 1] = seq_comp_table[(uint8_t)s->seq[i]]; 52 | s->seq[i] = seq_comp_table[t]; 53 | } 54 | if (l&1) s->seq[l>>1] = seq_comp_table[(uint8_t)s->seq[l>>1]]; 55 | if (s->qual) 56 | for (i = 0; i < l>>1; ++i) 57 | t = s->qual[l - i - 1], s->qual[l - i - 1] = s->qual[i], s->qual[i] = t; 58 | } 59 | 60 | #ifdef __cplusplus 61 | } 62 | #endif 63 | 64 | #endif 65 | -------------------------------------------------------------------------------- /lib/minimap2/esterr.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "mmpriv.h" 6 | 7 | static inline int32_t get_for_qpos(int32_t qlen, const mm128_t *a) 8 | { 9 | int32_t x = (int32_t)a->y; 10 | int32_t q_span = a->y>>32 & 0xff; 11 | if (a->x>>63) 12 | x = qlen - 1 - (x + 1 - q_span); // revert the position to the forward strand of query 13 | return x; 14 | } 15 | 16 | static int get_mini_idx(int qlen, const mm128_t *a, int32_t n, const uint64_t *mini_pos) 17 | { 18 | int32_t x, L = 0, R = n - 1; 19 | x = get_for_qpos(qlen, a); 20 | while (L <= R) { // binary search 21 | int32_t m = ((uint64_t)L + R) >> 1; 22 | int32_t y = (int32_t)mini_pos[m]; 23 | if (y < x) L = m + 1; 24 | else if (y > x) R = m - 1; 25 | else return m; 26 | } 27 | return -1; 28 | } 29 | 30 | void mm_est_err(const mm_idx_t *mi, int qlen, int n_regs, mm_reg1_t *regs, const mm128_t *a, int32_t n, const uint64_t *mini_pos) 31 | { 32 | int i; 33 | uint64_t sum_k = 0; 34 | float avg_k; 35 | 36 | if (n == 0) return; 37 | for (i = 0; i < n; ++i) 38 | sum_k += mini_pos[i] >> 32 & 0xff; 39 | avg_k = (float)sum_k / n; 40 | 41 | for (i = 0; i < n_regs; ++i) { 42 | mm_reg1_t *r = ®s[i]; 43 | int32_t st, en, j, k, n_match, n_tot, l_ref; 44 | r->div = -1.0f; 45 | if (r->cnt == 0) continue; 46 | st = en = get_mini_idx(qlen, r->rev? &a[r->as + r->cnt - 1] : &a[r->as], n, mini_pos); 47 | if (st < 0) { 48 | if (mm_verbose >= 2) 49 | fprintf(stderr, "[WARNING] logic inconsistency in mm_est_err(). Please contact the developer.\n"); 50 | continue; 51 | } 52 | l_ref = mi->seq[r->rid].len; 53 | for (k = 1, j = st + 1, n_match = 1; j < n && k < r->cnt; ++j) { 54 | int32_t x; 55 | x = get_for_qpos(qlen, r->rev? &a[r->as + r->cnt - 1 - k] : &a[r->as + k]); 56 | if (x == (int32_t)mini_pos[j]) 57 | ++k, en = j, ++n_match; 58 | } 59 | n_tot = en - st + 1; 60 | if (r->qs > avg_k && r->rs > avg_k) ++n_tot; 61 | if (qlen - r->qs > avg_k && l_ref - r->re > avg_k) ++n_tot; 62 | r->div = n_match >= n_tot? 0.0f : (float)(1.0 - pow((double)n_match / n_tot, 1.0 / avg_k)); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /lib/minimap2/example.c: -------------------------------------------------------------------------------- 1 | // To compile: 2 | // gcc -g -O2 example.c libminimap2.a -lz 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "minimap.h" 9 | #include "kseq.h" 10 | KSEQ_INIT(gzFile, gzread) 11 | 12 | int main(int argc, char *argv[]) 13 | { 14 | mm_idxopt_t iopt; 15 | mm_mapopt_t mopt; 16 | int n_threads = 3; 17 | 18 | mm_verbose = 2; // disable message output to stderr 19 | mm_set_opt(0, &iopt, &mopt); 20 | mopt.flag |= MM_F_CIGAR; // perform alignment 21 | 22 | if (argc < 3) { 23 | fprintf(stderr, "Usage: minimap2-lite \n"); 24 | return 1; 25 | } 26 | 27 | // open query file for reading; you may use your favorite FASTA/Q parser 28 | gzFile f = gzopen(argv[2], "r"); 29 | assert(f); 30 | kseq_t *ks = kseq_init(f); 31 | 32 | // open index reader 33 | mm_idx_reader_t *r = mm_idx_reader_open(argv[1], &iopt, 0); 34 | mm_idx_t *mi; 35 | while ((mi = mm_idx_reader_read(r, n_threads)) != 0) { // traverse each part of the index 36 | mm_mapopt_update(&mopt, mi); // this sets the maximum minimizer occurrence; TODO: set a better default in mm_mapopt_init()! 37 | mm_tbuf_t *tbuf = mm_tbuf_init(); // thread buffer; for multi-threading, allocate one tbuf for each thread 38 | gzrewind(f); 39 | kseq_rewind(ks); 40 | while (kseq_read(ks) >= 0) { // each kseq_read() call reads one query sequence 41 | mm_reg1_t *reg; 42 | int j, i, n_reg; 43 | reg = mm_map(mi, ks->seq.l, ks->seq.s, &n_reg, tbuf, &mopt, 0); // get all hits for the query 44 | for (j = 0; j < n_reg; ++j) { // traverse hits and print them out 45 | mm_reg1_t *r = ®[j]; 46 | assert(r->p); // with MM_F_CIGAR, this should not be NULL 47 | printf("%s\t%d\t%d\t%d\t%c\t", ks->name.s, ks->seq.l, r->qs, r->qe, "+-"[r->rev]); 48 | printf("%s\t%d\t%d\t%d\t%d\t%d\t%d\tcg:Z:", mi->seq[r->rid].name, mi->seq[r->rid].len, r->rs, r->re, r->mlen, r->blen, r->mapq); 49 | for (i = 0; i < r->p->n_cigar; ++i) // IMPORTANT: this gives the CIGAR in the aligned regions. NO soft/hard clippings! 50 | printf("%d%c", r->p->cigar[i]>>4, MM_CIGAR_STR[r->p->cigar[i]&0xf]); 51 | putchar('\n'); 52 | free(r->p); 53 | } 54 | free(reg); 55 | } 56 | mm_tbuf_destroy(tbuf); 57 | mm_idx_destroy(mi); 58 | } 59 | mm_idx_reader_close(r); // close the index reader 60 | kseq_destroy(ks); // close the query file 61 | gzclose(f); 62 | return 0; 63 | } 64 | -------------------------------------------------------------------------------- /lib/minimap2/kalloc.h: -------------------------------------------------------------------------------- 1 | #ifndef _KALLOC_H_ 2 | #define _KALLOC_H_ 3 | 4 | #include /* for size_t */ 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | typedef struct { 11 | size_t capacity, available, n_blocks, n_cores, largest; 12 | } km_stat_t; 13 | 14 | void *kmalloc(void *km, size_t size); 15 | void *krealloc(void *km, void *ptr, size_t size); 16 | void *kcalloc(void *km, size_t count, size_t size); 17 | void kfree(void *km, void *ptr); 18 | 19 | void *km_init(void); 20 | void *km_init2(void *km_par, size_t min_core_size); 21 | void km_destroy(void *km); 22 | void km_stat(const void *_km, km_stat_t *s); 23 | 24 | #ifdef __cplusplus 25 | } 26 | #endif 27 | 28 | #define KMALLOC(km, ptr, len) ((ptr) = (__typeof__(ptr))kmalloc((km), (len) * sizeof(*(ptr)))) 29 | #define KCALLOC(km, ptr, len) ((ptr) = (__typeof__(ptr))kcalloc((km), (len), sizeof(*(ptr)))) 30 | #define KREALLOC(km, ptr, len) ((ptr) = (__typeof__(ptr))krealloc((km), (ptr), (len) * sizeof(*(ptr)))) 31 | 32 | #define KEXPAND(km, a, m) do { \ 33 | (m) = (m) >= 4? (m) + ((m)>>1) : 16; \ 34 | KREALLOC((km), (a), (m)); \ 35 | } while (0) 36 | 37 | #ifndef klib_unused 38 | #if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3) 39 | #define klib_unused __attribute__ ((__unused__)) 40 | #else 41 | #define klib_unused 42 | #endif 43 | #endif /* klib_unused */ 44 | 45 | #define KALLOC_POOL_INIT2(SCOPE, name, kmptype_t) \ 46 | typedef struct { \ 47 | size_t cnt, n, max; \ 48 | kmptype_t **buf; \ 49 | void *km; \ 50 | } kmp_##name##_t; \ 51 | SCOPE kmp_##name##_t *kmp_init_##name(void *km) { \ 52 | kmp_##name##_t *mp; \ 53 | KCALLOC(km, mp, 1); \ 54 | mp->km = km; \ 55 | return mp; \ 56 | } \ 57 | SCOPE void kmp_destroy_##name(kmp_##name##_t *mp) { \ 58 | size_t k; \ 59 | for (k = 0; k < mp->n; ++k) kfree(mp->km, mp->buf[k]); \ 60 | kfree(mp->km, mp->buf); kfree(mp->km, mp); \ 61 | } \ 62 | SCOPE kmptype_t *kmp_alloc_##name(kmp_##name##_t *mp) { \ 63 | ++mp->cnt; \ 64 | if (mp->n == 0) return (kmptype_t*)kcalloc(mp->km, 1, sizeof(kmptype_t)); \ 65 | return mp->buf[--mp->n]; \ 66 | } \ 67 | SCOPE void kmp_free_##name(kmp_##name##_t *mp, kmptype_t *p) { \ 68 | --mp->cnt; \ 69 | if (mp->n == mp->max) KEXPAND(mp->km, mp->buf, mp->max); \ 70 | mp->buf[mp->n++] = p; \ 71 | } 72 | 73 | #define KALLOC_POOL_INIT(name, kmptype_t) \ 74 | KALLOC_POOL_INIT2(static inline klib_unused, name, kmptype_t) 75 | 76 | #endif 77 | -------------------------------------------------------------------------------- /lib/minimap2/kthread.h: -------------------------------------------------------------------------------- 1 | #ifndef KTHREAD_H 2 | #define KTHREAD_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | void kt_for(int n_threads, void (*func)(void*,long,int), void *data, long n); 9 | void kt_pipeline(int n_threads, void *(*func)(void*, int, void*), void *shared_data, int n_steps); 10 | 11 | #ifdef __cplusplus 12 | } 13 | #endif 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /lib/minimap2/python/minimap2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | import getopt 5 | import mappy as mp 6 | 7 | def main(argv): 8 | opts, args = getopt.getopt(argv[1:], "x:n:m:k:w:r:c") 9 | if len(args) < 2: 10 | print("Usage: minimap2.py [options] | ") 11 | print("Options:") 12 | print(" -x STR preset: sr, map-pb, map-ont, asm5, asm10 or splice") 13 | print(" -n INT mininum number of minimizers") 14 | print(" -m INT mininum chaining score") 15 | print(" -k INT k-mer length") 16 | print(" -w INT minimizer window length") 17 | print(" -r INT band width") 18 | print(" -c output the cs tag") 19 | sys.exit(1) 20 | 21 | preset = min_cnt = min_sc = k = w = bw = None 22 | out_cs = False 23 | for opt, arg in opts: 24 | if opt == '-x': preset = arg 25 | elif opt == '-n': min_cnt = int(arg) 26 | elif opt == '-m': min_chain_score = int(arg) 27 | elif opt == '-r': bw = int(arg) 28 | elif opt == '-k': k = int(arg) 29 | elif opt == '-w': w = int(arg) 30 | elif opt == '-c': out_cs = True 31 | 32 | a = mp.Aligner(args[0], preset=preset, min_cnt=min_cnt, min_chain_score=min_sc, k=k, w=w, bw=bw) 33 | if not a: raise Exception("ERROR: failed to load/build index file '{}'".format(args[0])) 34 | for name, seq, qual in mp.fastx_read(args[1]): # read one sequence 35 | for h in a.map(seq, cs=out_cs): # traverse hits 36 | print('{}\t{}\t{}'.format(name, len(seq), h)) 37 | 38 | if __name__ == "__main__": 39 | main(sys.argv) 40 | -------------------------------------------------------------------------------- /lib/minimap2/sdust.h: -------------------------------------------------------------------------------- 1 | #ifndef SDUST_H 2 | #define SDUST_H 3 | 4 | #include 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | struct sdust_buf_s; 11 | typedef struct sdust_buf_s sdust_buf_t; 12 | 13 | // the simple interface 14 | uint64_t *sdust(void *km, const uint8_t *seq, int l_seq, int T, int W, int *n); 15 | 16 | // the following interface dramatically reduce heap allocations when sdust is frequently called. 17 | sdust_buf_t *sdust_buf_init(void *km); 18 | void sdust_buf_destroy(sdust_buf_t *buf); 19 | const uint64_t *sdust_core(const uint8_t *seq, int l_seq, int T, int W, int *n, sdust_buf_t *buf); 20 | 21 | #ifdef __cplusplus 22 | } 23 | #endif 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /lib/minimap2/setup.py: -------------------------------------------------------------------------------- 1 | try: 2 | from setuptools import setup, Extension 3 | from setuptools.command.build_ext import build_ext 4 | except ImportError: 5 | from distutils.core import setup 6 | from distutils.extension import Extension 7 | from distutils.command.build_ext import build_ext 8 | 9 | import sys, platform, subprocess 10 | 11 | 12 | def readme(): 13 | with open('python/README.rst') as f: 14 | return f.read() 15 | 16 | 17 | class LibMM2Build(build_ext): 18 | # Uses Makefile to build library, avoids duplicating logic 19 | # determining which objects to compile but does require 20 | # end users to have Make (since precompiled wheels are not 21 | # distributed on PyPI). 22 | def run(self): 23 | def compile_libminimap2(*args, **kwargs): 24 | cmd = ['make', 'libminimap2.a'] + list(args) 25 | subprocess.check_call(cmd) 26 | options = [] 27 | if platform.machine() in ["aarch64", "arm64"]: 28 | options = ["arm_neon=1", "aarch64=1"] 29 | self.execute( 30 | compile_libminimap2, options, 31 | 'Compiling libminimap2 using Makefile') 32 | build_ext.run(self) 33 | 34 | 35 | setup( 36 | name = 'mappy', 37 | version = '2.24', 38 | url = 'https://github.com/lh3/minimap2', 39 | description = 'Minimap2 python binding', 40 | long_description = readme(), 41 | author = 'Heng Li', 42 | author_email = 'lh3@me.com', 43 | license = 'MIT', 44 | keywords = 'sequence-alignment', 45 | scripts = ['python/minimap2.py'], 46 | cmdclass = {'build_ext': LibMM2Build}, 47 | ext_modules = [ 48 | Extension( 49 | 'mappy', 50 | sources = ['python/mappy.pyx'], 51 | depends = ['python/cmappy.h', 'python/cmappy.pxd'], 52 | include_dirs = ['.'], 53 | extra_objects = ['libminimap2.a'], 54 | libraries = ['z', 'm', 'pthread'])], 55 | classifiers = [ 56 | 'Development Status :: 5 - Production/Stable', 57 | 'License :: OSI Approved :: MIT License', 58 | 'Operating System :: POSIX', 59 | 'Programming Language :: C', 60 | 'Programming Language :: Cython', 61 | 'Programming Language :: Python :: 2.7', 62 | 'Programming Language :: Python :: 3', 63 | 'Intended Audience :: Science/Research', 64 | 'Topic :: Scientific/Engineering :: Bio-Informatics'], 65 | setup_requires=["cython"]) 66 | -------------------------------------------------------------------------------- /lib/minimap2/splitidx.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "mmpriv.h" 7 | 8 | FILE *mm_split_init(const char *prefix, const mm_idx_t *mi) 9 | { 10 | char *fn; 11 | FILE *fp; 12 | uint32_t i, k = mi->k; 13 | fn = (char*)calloc(strlen(prefix) + 10, 1); 14 | sprintf(fn, "%s.%.4d.tmp", prefix, mi->index); 15 | if ((fp = fopen(fn, "wb")) == NULL) { 16 | if (mm_verbose >= 1) 17 | fprintf(stderr, "[ERROR]\033[1;31m failed to write to temporary file '%s'\033[0m: %s\n", fn, strerror(errno)); 18 | exit(1); 19 | } 20 | mm_err_fwrite(&k, 4, 1, fp); 21 | mm_err_fwrite(&mi->n_seq, 4, 1, fp); 22 | for (i = 0; i < mi->n_seq; ++i) { 23 | uint32_t l; 24 | l = strlen(mi->seq[i].name); 25 | mm_err_fwrite(&l, 1, 4, fp); 26 | mm_err_fwrite(mi->seq[i].name, 1, l, fp); 27 | mm_err_fwrite(&mi->seq[i].len, 4, 1, fp); 28 | } 29 | free(fn); 30 | return fp; 31 | } 32 | 33 | mm_idx_t *mm_split_merge_prep(const char *prefix, int n_splits, FILE **fp, uint32_t *n_seq_part) 34 | { 35 | mm_idx_t *mi = 0; 36 | char *fn; 37 | int i, j; 38 | 39 | if (n_splits < 1) return 0; 40 | fn = CALLOC(char, strlen(prefix) + 10); 41 | for (i = 0; i < n_splits; ++i) { 42 | sprintf(fn, "%s.%.4d.tmp", prefix, i); 43 | if ((fp[i] = fopen(fn, "rb")) == 0) { 44 | if (mm_verbose >= 1) 45 | fprintf(stderr, "ERROR: failed to open temporary file '%s': %s\n", fn, strerror(errno)); 46 | for (j = 0; j < i; ++j) 47 | fclose(fp[j]); 48 | free(fn); 49 | return 0; 50 | } 51 | } 52 | free(fn); 53 | 54 | mi = CALLOC(mm_idx_t, 1); 55 | for (i = 0; i < n_splits; ++i) { 56 | mm_err_fread(&mi->k, 4, 1, fp[i]); // TODO: check if k is all the same 57 | mm_err_fread(&n_seq_part[i], 4, 1, fp[i]); 58 | mi->n_seq += n_seq_part[i]; 59 | } 60 | mi->seq = CALLOC(mm_idx_seq_t, mi->n_seq); 61 | for (i = j = 0; i < n_splits; ++i) { 62 | uint32_t k; 63 | for (k = 0; k < n_seq_part[i]; ++k, ++j) { 64 | uint32_t l; 65 | mm_err_fread(&l, 1, 4, fp[i]); 66 | mi->seq[j].name = (char*)calloc(l + 1, 1); 67 | mm_err_fread(mi->seq[j].name, 1, l, fp[i]); 68 | mm_err_fread(&mi->seq[j].len, 4, 1, fp[i]); 69 | } 70 | } 71 | return mi; 72 | } 73 | 74 | void mm_split_rm_tmp(const char *prefix, int n_splits) 75 | { 76 | int i; 77 | char *fn; 78 | fn = CALLOC(char, strlen(prefix) + 10); 79 | for (i = 0; i < n_splits; ++i) { 80 | sprintf(fn, "%s.%.4d.tmp", prefix, i); 81 | remove(fn); 82 | } 83 | free(fn); 84 | } 85 | -------------------------------------------------------------------------------- /lib/minimap2/test/q2.fa: -------------------------------------------------------------------------------- 1 | >q2 2 | GGACATCCCGATGGTGCAGTCCTACCTGTACGAAAGGAC 3 | -------------------------------------------------------------------------------- /lib/minimap2/test/t2.fa: -------------------------------------------------------------------------------- 1 | >t2 2 | GGACATCCCGATGGTGCAGgtGCTATTAAAGGTTCGTTTGTTCAACGATTAAagTCCTACCTGTACGAAAGGAC 3 | -------------------------------------------------------------------------------- /lib/samtools-1.9/AUTHORS: -------------------------------------------------------------------------------- 1 | Heng Li from the Sanger Institute wrote most of the initial source codes 2 | of SAMtools and various converters. 3 | 4 | Bob Handsaker from the Broad Institute is a major contributor to the 5 | SAM/BAM specification. He designed and implemented the BGZF format, the 6 | underlying indexable compression format for the BAM format. BGZF does 7 | not support arithmetic between file offsets. 8 | 9 | Jue Ruan for the Beijing Genome Institute designed and implemented the 10 | RAZF format, an alternative indexable compression format. RAZF is no longer 11 | used by or provided with SAMtools. Source code remains available in older 12 | SAMtools 0.1.x releases and from the standalone branch in the repository. 13 | 14 | Colin Hercus updated novo2sam.pl to support gapped alignment by 15 | novoalign. 16 | 17 | Petr Danecek contributed the header parsing library sam_header.c and 18 | sam2vcf.pl script. 19 | -------------------------------------------------------------------------------- /lib/samtools-1.9/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT/Expat License 2 | 3 | Copyright (C) 2008-2018 Genome Research Ltd. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 | DEALINGS IN THE SOFTWARE. 22 | 23 | 24 | [The use of a range of years within a copyright notice in this distribution 25 | should be interpreted as being equivalent to a list of years including the 26 | first and last year specified and all consecutive years between them. 27 | 28 | For example, a copyright notice that reads "Copyright (C) 2005, 2007-2009, 29 | 2011-2012" should be interpreted as being identical to a notice that reads 30 | "Copyright (C) 2005, 2007, 2008, 2009, 2011, 2012" and a copyright notice 31 | that reads "Copyright (C) 2005-2012" should be interpreted as being identical 32 | to a notice that reads "Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 33 | 2011, 2012".] 34 | -------------------------------------------------------------------------------- /lib/samtools-1.9/Makefile.mingw: -------------------------------------------------------------------------------- 1 | CC= gcc.exe 2 | AR= ar.exe 3 | CFLAGS= -g -Wall -O2 4 | DFLAGS= -D_USE_KNETFILE -D_CURSES_LIB=2 5 | KNETFILE_O= knetfile.o 6 | LOBJS= bgzf.o kstring.o bam_aux.o bam.o bam_import.o sam.o bam_index.o \ 7 | bam_pileup.o bam_lpileup.o bam_md.o razf.o faidx.o \ 8 | $(KNETFILE_O) bam_sort.o sam_header.o bam_reheader.o kprobaln.o bedidx.o 9 | AOBJS= bam_tview.o bam_plcmd.o sam_view.o \ 10 | bam_rmdup.o bam_rmdupse.o bam_mate.o bam_stat.o bam_color.o \ 11 | bamtk.o kaln.o bam2bcf.o bam2bcf_indel.o errmod.o sample.o \ 12 | cut_target.o phase.o bam_cat.o bam2depth.o 13 | BCFOBJS= bcftools/bcf.o bcftools/fet.o bcftools/bcf2qcall.o bcftools/bcfutils.o \ 14 | bcftools/call1.o bcftools/index.o bcftools/kfunc.o bcftools/em.o \ 15 | bcftools/kmin.o bcftools/prob1.o bcftools/vcf.o bcftools/mut.o 16 | PROG= samtools.exe bcftools.exe 17 | INCLUDES= -I. -Iwin32 18 | SUBDIRS= . 19 | LIBPATH= 20 | 21 | .SUFFIXES:.c .o 22 | 23 | .c.o: 24 | $(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@ 25 | 26 | all:$(PROG) 27 | 28 | .PHONY:all lib clean cleanlocal 29 | .PHONY:all-recur lib-recur clean-recur cleanlocal-recur install-recur 30 | 31 | lib:libbam.a 32 | 33 | libbam.a:$(LOBJS) 34 | $(AR) -cru $@ $(LOBJS) 35 | 36 | samtools.exe:$(AOBJS) libbam.a $(BCFOBJS) 37 | $(CC) $(CFLAGS) -o $@ $(AOBJS) $(BCFOBJS) $(LIBPATH) -lm -L. -lbam -Lwin32 -lz -lcurses -lws2_32 38 | 39 | bcftools.exe:$(BCFOBJS) bcftools/main.o kstring.o bgzf.o knetfile.o bedidx.o 40 | $(CC) $(CFLAGS) -o $@ $(BCFOBJS) bcftools/main.o kstring.o bgzf.o knetfile.o bedidx.o -lm -Lwin32 -lz -lws2_32 41 | 42 | razip.o:razf.h 43 | bam.o:bam.h razf.h bam_endian.h kstring.h sam_header.h 44 | sam.o:sam.h bam.h 45 | bam_import.o:bam.h kseq.h khash.h razf.h 46 | bam_pileup.o:bam.h razf.h ksort.h 47 | bam_plcmd.o:bam.h faidx.h bcftools/bcf.h bam2bcf.h 48 | bam_index.o:bam.h khash.h ksort.h razf.h bam_endian.h 49 | bam_lpileup.o:bam.h ksort.h 50 | bam_tview.o:bam.h faidx.h 51 | bam_sort.o:bam.h ksort.h razf.h 52 | bam_md.o:bam.h faidx.h 53 | sam_header.o:sam_header.h khash.h 54 | bcf.o:bcftools/bcf.h 55 | bam2bcf.o:bam2bcf.h errmod.h bcftools/bcf.h 56 | bam2bcf_indel.o:bam2bcf.h 57 | errmod.o:errmod.h 58 | 59 | faidx.o:faidx.h razf.h khash.h 60 | faidx_main.o:faidx.h razf.h 61 | 62 | clean: 63 | rm -fr gmon.out *.o a.out *.exe *.dSYM razip bgzip $(PROG) *~ *.a *.so.* *.so *.dylib 64 | -------------------------------------------------------------------------------- /lib/samtools-1.9/bam_aux.c: -------------------------------------------------------------------------------- 1 | /* bam_aux.c -- remaining aux field handling. 2 | 3 | Copyright (C) 2008-2010, 2013 Genome Research Ltd. 4 | Portions copyright (C) 2011 Broad Institute. 5 | 6 | Author: Heng Li 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 | DEALINGS IN THE SOFTWARE. */ 25 | 26 | #include 27 | 28 | #include 29 | #include 30 | #include "bam.h" 31 | 32 | static inline int bam_aux_type2size(int x) 33 | { 34 | if (x == 'C' || x == 'c' || x == 'A') return 1; 35 | else if (x == 'S' || x == 's') return 2; 36 | else if (x == 'I' || x == 'i' || x == 'f' || x == 'F') return 4; 37 | else return 0; 38 | } 39 | 40 | #define __skip_tag(s) do { \ 41 | int type = toupper(*(s)); \ 42 | ++(s); \ 43 | if (type == 'Z' || type == 'H') { while (*(s)) ++(s); ++(s); } \ 44 | else if (type == 'B') (s) += 5 + bam_aux_type2size(*(s)) * (*(int32_t*)((s)+1)); \ 45 | else (s) += bam_aux_type2size(type); \ 46 | } while(0) 47 | 48 | 49 | int bam_aux_drop_other(bam1_t *b, uint8_t *s) 50 | { 51 | if (s) { 52 | uint8_t *p, *aux; 53 | aux = bam1_aux(b); 54 | p = s - 2; 55 | __skip_tag(s); 56 | memmove(aux, p, s - p); 57 | b->data_len -= bam_get_l_aux(b) - (s - p); 58 | } else { 59 | b->data_len -= bam_get_l_aux(b); 60 | } 61 | return 0; 62 | } 63 | 64 | int bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *beg, int *end) 65 | { 66 | const char *name_lim = hts_parse_reg(str, beg, end); 67 | if (name_lim) { 68 | char *name = malloc(name_lim - str + 1); 69 | memcpy(name, str, name_lim - str); 70 | name[name_lim - str] = '\0'; 71 | *ref_id = bam_name2id(header, name); 72 | free(name); 73 | } 74 | else { 75 | // not parsable as a region, but possibly a sequence named "foo:a" 76 | *ref_id = bam_name2id(header, str); 77 | *beg = 0; *end = INT_MAX; 78 | } 79 | if (*ref_id == -1) return -1; 80 | return *beg <= *end? 0 : -1; 81 | } 82 | -------------------------------------------------------------------------------- /lib/samtools-1.9/bam_endian.h: -------------------------------------------------------------------------------- 1 | /* bam_endian.h -- endianness conversion functions. 2 | 3 | Copyright (C) 2008 Genome Research Ltd. 4 | 5 | Author: Heng Li 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. */ 24 | 25 | #ifndef BAM_ENDIAN_H 26 | #define BAM_ENDIAN_H 27 | 28 | #include 29 | 30 | static inline int bam_is_big_endian() 31 | { 32 | long one= 1; 33 | return !(*((char *)(&one))); 34 | } 35 | static inline uint16_t bam_swap_endian_2(uint16_t v) 36 | { 37 | return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8)); 38 | } 39 | static inline void *bam_swap_endian_2p(void *x) 40 | { 41 | *(uint16_t*)x = bam_swap_endian_2(*(uint16_t*)x); 42 | return x; 43 | } 44 | static inline uint32_t bam_swap_endian_4(uint32_t v) 45 | { 46 | v = ((v & 0x0000FFFFU) << 16) | (v >> 16); 47 | return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8); 48 | } 49 | static inline void *bam_swap_endian_4p(void *x) 50 | { 51 | *(uint32_t*)x = bam_swap_endian_4(*(uint32_t*)x); 52 | return x; 53 | } 54 | static inline uint64_t bam_swap_endian_8(uint64_t v) 55 | { 56 | v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32); 57 | v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16); 58 | return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8); 59 | } 60 | static inline void *bam_swap_endian_8p(void *x) 61 | { 62 | *(uint64_t*)x = bam_swap_endian_8(*(uint64_t*)x); 63 | return x; 64 | } 65 | 66 | #endif 67 | -------------------------------------------------------------------------------- /lib/samtools-1.9/bam_import.c: -------------------------------------------------------------------------------- 1 | /* bam_import.c -- SAM format parsing. 2 | 3 | Copyright (C) 2008-2013 Genome Research Ltd. 4 | 5 | Author: Heng Li 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. */ 24 | 25 | #include 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include "htslib/kstring.h" 32 | #include "bam.h" 33 | #include "htslib/kseq.h" 34 | 35 | KSTREAM_INIT(gzFile, gzread, 16384) 36 | 37 | bam_header_t *sam_header_read2(const char *fn) 38 | { 39 | bam_header_t *header; 40 | int c, dret, n_targets = 0; 41 | gzFile fp; 42 | kstream_t *ks; 43 | kstring_t *str; 44 | kstring_t samstr = { 0, 0, NULL }; 45 | if (fn == 0) return 0; 46 | fp = (strcmp(fn, "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(fn, "r"); 47 | if (fp == 0) return 0; 48 | ks = ks_init(fp); 49 | str = (kstring_t*)calloc(1, sizeof(kstring_t)); 50 | while (ks_getuntil(ks, 0, str, &dret) > 0) { 51 | ksprintf(&samstr, "@SQ\tSN:%s", str->s); 52 | ks_getuntil(ks, 0, str, &dret); 53 | ksprintf(&samstr, "\tLN:%d\n", atoi(str->s)); 54 | n_targets++; 55 | if (dret != '\n') 56 | while ((c = ks_getc(ks)) != '\n' && c != -1); 57 | } 58 | ks_destroy(ks); 59 | gzclose(fp); 60 | free(str->s); free(str); 61 | header = sam_hdr_parse(samstr.l, samstr.s? samstr.s : ""); 62 | free(samstr.s); 63 | fprintf(stderr, "[sam_header_read2] %d sequences loaded.\n", n_targets); 64 | return header; 65 | } 66 | -------------------------------------------------------------------------------- /lib/samtools-1.9/bam_lpileup.h: -------------------------------------------------------------------------------- 1 | /* bam_lpileup.h -- lplbuf routines (declarations copied from bam.h). 2 | 3 | Copyright (C) 2008, 2013 Genome Research Ltd. 4 | 5 | Author: Heng Li 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. */ 24 | 25 | #ifndef BAM_LPILEUP_H 26 | #define BAM_LPILEUP_H 27 | 28 | 29 | #include 30 | 31 | struct __bam_lplbuf_t; 32 | typedef struct __bam_lplbuf_t bam_lplbuf_t; 33 | 34 | #ifndef BAM_PILEUP_F_DEFINED 35 | #define BAM_PILEUP_F_DEFINED 36 | typedef int (*bam_pileup_f)(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data); 37 | #endif //BAM_PILEUP_F_DEFINED 38 | 39 | 40 | #ifdef __cplusplus 41 | extern "C" { 42 | #endif 43 | void bam_lplbuf_reset(bam_lplbuf_t *buf); 44 | 45 | /*! @abstract bam_plbuf_init() equivalent with level calculated. */ 46 | bam_lplbuf_t *bam_lplbuf_init(bam_pileup_f func, void *data); 47 | 48 | /*! @abstract bam_plbuf_destroy() equivalent with level calculated. */ 49 | void bam_lplbuf_destroy(bam_lplbuf_t *tv); 50 | 51 | /*! @abstract bam_plbuf_push() equivalent with level calculated. */ 52 | int bam_lplbuf_push(const bam1_t *b, bam_lplbuf_t *buf); 53 | #ifdef __cplusplus 54 | } 55 | #endif 56 | 57 | #endif // BAM_LPILEUP_H 58 | -------------------------------------------------------------------------------- /lib/samtools-1.9/bam_plbuf.c: -------------------------------------------------------------------------------- 1 | /* bam_plbuf.c -- plbuf routines (previously in bam_pileup.c). 2 | 3 | Copyright (C) 2008-2010, 2013 Genome Research Ltd. 4 | 5 | Author: Heng Li 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. */ 24 | 25 | #include 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include "bam_plbuf.h" 33 | 34 | /***************** 35 | * callback APIs * 36 | *****************/ 37 | 38 | void bam_plbuf_reset(bam_plbuf_t *buf) 39 | { 40 | bam_plp_reset(buf->iter); 41 | } 42 | 43 | bam_plbuf_t *bam_plbuf_init(bam_pileup_f func, void *data) 44 | { 45 | bam_plbuf_t *buf; 46 | buf = calloc(1, sizeof(bam_plbuf_t)); 47 | buf->iter = bam_plp_init(0, 0); 48 | buf->func = func; 49 | buf->data = data; 50 | return buf; 51 | } 52 | 53 | void bam_plbuf_destroy(bam_plbuf_t *buf) 54 | { 55 | bam_plp_destroy(buf->iter); 56 | free(buf); 57 | } 58 | 59 | int bam_plbuf_push(const bam1_t *b, bam_plbuf_t *buf) 60 | { 61 | int ret, n_plp, tid, pos; 62 | const bam_pileup1_t *plp; 63 | ret = bam_plp_push(buf->iter, b); 64 | if (ret < 0) return ret; 65 | while ((plp = bam_plp_next(buf->iter, &tid, &pos, &n_plp)) != 0) 66 | buf->func(tid, pos, n_plp, plp, buf->data); 67 | return 0; 68 | } 69 | -------------------------------------------------------------------------------- /lib/samtools-1.9/bam_plbuf.h: -------------------------------------------------------------------------------- 1 | /* bam_plbuf.h -- plbuf routines (declarations copied from bam.h). 2 | 3 | Copyright (C) 2008, 2013 Genome Research Ltd. 4 | 5 | Author: Heng Li 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. */ 24 | 25 | #ifndef BAM_PLBUF_H 26 | #define BAM_PLBUF_H 27 | 28 | #include 29 | 30 | #ifndef BAM_PILEUP_F_DEFINED 31 | #define BAM_PILEUP_F_DEFINED 32 | typedef int (*bam_pileup_f)(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data); 33 | #endif //BAM_PILEUP_F_DEFINED 34 | 35 | typedef struct { 36 | bam_plp_t iter; 37 | bam_pileup_f func; 38 | void *data; 39 | } bam_plbuf_t; 40 | 41 | #ifdef __cplusplus 42 | extern "C" { 43 | #endif 44 | void bam_plbuf_reset(bam_plbuf_t *buf); 45 | 46 | bam_plbuf_t *bam_plbuf_init(bam_pileup_f func, void *data); 47 | 48 | void bam_plbuf_destroy(bam_plbuf_t *buf); 49 | 50 | int bam_plbuf_push(const bam1_t *b, bam_plbuf_t *buf); 51 | #ifdef __cplusplus 52 | } 53 | #endif 54 | 55 | #endif // BAM_PLBUF_H 56 | -------------------------------------------------------------------------------- /lib/samtools-1.9/bedidx.h: -------------------------------------------------------------------------------- 1 | /* bedidx.h -- BED file indexing header file. 2 | 3 | Copyright (C) 2017 Genome Research Ltd. 4 | 5 | Author: Valeriu Ohan 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. */ 24 | 25 | #ifndef BEDIDX_H 26 | #define BEDIDX_H 27 | 28 | #include "htslib/hts.h" 29 | 30 | #define LIDX_SHIFT 13 31 | #define ALL 0 32 | #define FILTERED 1 33 | 34 | #define MIN(A,B) ( ( (A) < (B) ) ? (A) : (B) ) 35 | #define MAX(A,B) ( ( (A) > (B) ) ? (A) : (B) ) 36 | 37 | void *bed_read(const char *fn); 38 | void bed_destroy(void *_h); 39 | int bed_overlap(const void *_h, const char *chr, int beg, int end); 40 | void *bed_hash_regions(void *reg_hash, char **regs, int first, int last, int *op); 41 | const char* bed_get(void *reg_hash, int index, int filter); 42 | hts_reglist_t *bed_reglist(void *reg_hash, int filter, int *count_regs); 43 | void bed_unify(void *_h); 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /lib/samtools-1.9/config.h.in: -------------------------------------------------------------------------------- 1 | /* config.h.in. Generated from configure.ac by autoheader. */ 2 | 3 | /* If you use configure, this file provides #defines reflecting your 4 | configuration choices. If you have not run configure, suitable 5 | conservative defaults will be used. 6 | 7 | Autoheader adds a number of items to this template file that are not 8 | used by samtools: STDC_HEADERS and most HAVE_*_H header file defines 9 | are immaterial, as we assume standard ISO C headers and facilities; 10 | the PACKAGE_* defines are unused and are overridden by the more 11 | accurate PACKAGE_VERSION as computed by the Makefile. */ 12 | 13 | /* Define to 1 if a SysV or X/Open compatible Curses library is present */ 14 | #undef HAVE_CURSES 15 | 16 | /* Define to 1 if library supports color (enhanced functions) */ 17 | #undef HAVE_CURSES_COLOR 18 | 19 | /* Define to 1 if library supports X/Open Enhanced functions */ 20 | #undef HAVE_CURSES_ENHANCED 21 | 22 | /* Define to 1 if is present */ 23 | #undef HAVE_CURSES_H 24 | 25 | /* Define to 1 if library supports certain obsolete features */ 26 | #undef HAVE_CURSES_OBSOLETE 27 | 28 | /* Define to 1 if you have the `z' library (-lz). */ 29 | #undef HAVE_LIBZ 30 | 31 | /* Define to 1 if the Ncurses library is present */ 32 | #undef HAVE_NCURSES 33 | 34 | /* Define to 1 if the NcursesW library is present */ 35 | #undef HAVE_NCURSESW 36 | 37 | /* Define to 1 if is present */ 38 | #undef HAVE_NCURSESW_CURSES_H 39 | 40 | /* Define to 1 if is present */ 41 | #undef HAVE_NCURSESW_H 42 | 43 | /* Define to 1 if is present */ 44 | #undef HAVE_NCURSES_CURSES_H 45 | 46 | /* Define to 1 if is present */ 47 | #undef HAVE_NCURSES_H 48 | 49 | /* Define to the address where bug reports for this package should be sent. */ 50 | #undef PACKAGE_BUGREPORT 51 | 52 | /* Define to the full name of this package. */ 53 | #undef PACKAGE_NAME 54 | 55 | /* Define to the full name and version of this package. */ 56 | #undef PACKAGE_STRING 57 | 58 | /* Define to the one symbol short name of this package. */ 59 | #undef PACKAGE_TARNAME 60 | 61 | /* Define to the home page for this package. */ 62 | #undef PACKAGE_URL 63 | 64 | /* Define to the version of this package. */ 65 | #undef PACKAGE_VERSION 66 | 67 | /* Number of bits in a file offset, on hosts where this is settable. */ 68 | #undef _FILE_OFFSET_BITS 69 | 70 | /* Define for large files, on AIX-style hosts. */ 71 | #undef _LARGE_FILES 72 | -------------------------------------------------------------------------------- /lib/samtools-1.9/config.mk.in: -------------------------------------------------------------------------------- 1 | # Optional configure Makefile overrides for samtools. 2 | # 3 | # Copyright (C) 2015,2017 Genome Research Ltd. 4 | # 5 | # Author: John Marshall 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | # DEALINGS IN THE SOFTWARE. 24 | 25 | # This is @configure_input@ 26 | # 27 | # If you use configure, this file overrides variables and augments rules 28 | # in the Makefile to reflect your configuration choices. If you don't run 29 | # configure, the main Makefile contains suitable conservative defaults. 30 | 31 | prefix = @prefix@ 32 | exec_prefix = @exec_prefix@ 33 | bindir = @bindir@ 34 | datarootdir = @datarootdir@ 35 | mandir = @mandir@ 36 | 37 | CC = @CC@ 38 | CPPFLAGS = @CPPFLAGS@ 39 | CFLAGS = @CFLAGS@ 40 | LDFLAGS = @LDFLAGS@ 41 | LIBS = @LIBS@ 42 | 43 | @Hsource@HTSDIR = @HTSDIR@ 44 | @Hsource@include $(HTSDIR)/htslib.mk 45 | @Hsource@include $(HTSDIR)/htslib_static.mk 46 | @Hsource@HTSLIB = $(HTSDIR)/libhts.a 47 | @Hsource@HTSLIB_LIB = $(HTSLIB) $(HTSLIB_static_LIBS) 48 | @Hsource@HTSLIB_LDFLAGS = $(HTSLIB_static_LDFLAGS) 49 | @Hsource@BGZIP = $(HTSDIR)/bgzip 50 | HTSLIB_CPPFLAGS = @HTSLIB_CPPFLAGS@ 51 | @Hinstall@HTSLIB_LDFLAGS = @HTSLIB_LDFLAGS@ 52 | @Hinstall@HTSLIB_LIB = -lhts 53 | 54 | CURSES_LIB = @CURSES_LIB@ 55 | -------------------------------------------------------------------------------- /lib/samtools-1.9/htslib-1.9/README: -------------------------------------------------------------------------------- 1 | HTSlib is an implementation of a unified C library for accessing common file 2 | formats, such as SAM, CRAM, VCF, and BCF, used for high-throughput sequencing 3 | data. It is the core library used by samtools and bcftools. 4 | 5 | See INSTALL for building and installation instructions. 6 | -------------------------------------------------------------------------------- /lib/samtools-1.9/htslib-1.9/cram/cram.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2012-2013 Genome Research Ltd. 3 | Author: James Bonfield 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 16 | Institute nor the names of its contributors may be used to endorse or promote 17 | products derived from this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND 20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | */ 30 | 31 | /*! \file 32 | * CRAM interface. 33 | * 34 | * Consider using the higher level hts_*() API for programs that wish to 35 | * be file format agnostic (see htslib/hts.h). 36 | * 37 | * This API should be used for CRAM specific code. The specifics of the 38 | * public API are implemented in cram_io.h, cram_encode.h and cram_decode.h 39 | * although these should not be included directly (use this file instead). 40 | */ 41 | 42 | #ifndef _CRAM_H_ 43 | #define _CRAM_H_ 44 | 45 | #include "cram/cram_samtools.h" 46 | #include "cram/sam_header.h" 47 | #include "cram_structs.h" 48 | #include "cram_io.h" 49 | #include "cram_encode.h" 50 | #include "cram_decode.h" 51 | #include "cram_stats.h" 52 | #include "cram_codecs.h" 53 | #include "cram_index.h" 54 | 55 | // Validate against the external cram.h, 56 | // 57 | // This contains duplicated portions from cram_io.h and cram_structs.h, 58 | // so we want to ensure that the prototypes match. 59 | #include "htslib/cram.h" 60 | 61 | #endif 62 | -------------------------------------------------------------------------------- /lib/samtools-1.9/htslib-1.9/cram/cram_stats.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2012-2013 Genome Research Ltd. 3 | Author: James Bonfield 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 16 | Institute nor the names of its contributors may be used to endorse or promote 17 | products derived from this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND 20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | */ 30 | 31 | #ifndef _CRAM_STATS_H_ 32 | #define _CRAM_STATS_H_ 33 | 34 | #ifdef __cplusplus 35 | extern "C" { 36 | #endif 37 | 38 | cram_stats *cram_stats_create(void); 39 | void cram_stats_add(cram_stats *st, int32_t val); 40 | void cram_stats_del(cram_stats *st, int32_t val); 41 | void cram_stats_dump(cram_stats *st); 42 | void cram_stats_free(cram_stats *st); 43 | 44 | /* 45 | * Computes entropy from integer frequencies for various encoding methods and 46 | * picks the best encoding. 47 | * 48 | * FIXME: we could reuse some of the code here for the actual encoding 49 | * parameters too. Eg the best 'k' for SUBEXP or the code lengths for huffman. 50 | * 51 | * Returns the best codec to use. 52 | */ 53 | enum cram_encoding cram_stats_encoding(cram_fd *fd, cram_stats *st); 54 | 55 | #ifdef __cplusplus 56 | } 57 | #endif 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /lib/samtools-1.9/htslib-1.9/cram/files.c: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 1994, 1996-1997, 2000, 2003 MEDICAL RESEARCH COUNCIL 3 | All rights reserved 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1 Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | 11 | 2 Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3 Neither the name of the MEDICAL RESEARCH COUNCIL, THE LABORATORY OF 16 | MOLECULAR BIOLOGY nor the names of its contributors may be used to endorse or 17 | promote products derived from this software without specific prior written 18 | permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 21 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 22 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 24 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 25 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 27 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | */ 31 | 32 | #include 33 | 34 | #include "cram/misc.h" 35 | 36 | #include 37 | #include 38 | /* Alliant's Concentrix is hugely deficient */ 39 | /* Define things we require in this program */ 40 | /* Methinks S_IFMT and S_IFDIR aren't defined in POSIX */ 41 | #ifndef S_ISDIR 42 | #define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR) 43 | #endif /*!S_ISDIR*/ 44 | #ifndef S_ISREG 45 | #define S_ISREG(m) (((m)&S_IFMT) == S_IFREG) 46 | #endif /*!S_ISREG*/ 47 | 48 | int is_directory(char * fn) 49 | { 50 | struct stat buf; 51 | if ( stat(fn,&buf) ) return 0; 52 | return S_ISDIR(buf.st_mode); 53 | } 54 | 55 | int is_file(char * fn) 56 | { 57 | struct stat buf; 58 | if ( stat(fn,&buf) ) return 0; 59 | return S_ISREG(buf.st_mode); 60 | } 61 | 62 | int file_exists(char * fn) 63 | { 64 | struct stat buf; 65 | return ( stat(fn,&buf) == 0); 66 | } 67 | 68 | int file_size(char * fn) 69 | { 70 | struct stat buf; 71 | if ( stat(fn,&buf) != 0) return 0; 72 | return buf.st_size; 73 | } 74 | 75 | -------------------------------------------------------------------------------- /lib/samtools-1.9/htslib-1.9/cram/pooled_alloc.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2009 Genome Research Ltd. 3 | Author: Rob Davies 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 16 | Institute nor the names of its contributors may be used to endorse or promote 17 | products derived from this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND 20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | */ 30 | 31 | #ifndef _POOLED_ALLOC_H_ 32 | #define _POOLED_ALLOC_H_ 33 | 34 | #include 35 | 36 | #ifdef __cplusplus 37 | extern "C" { 38 | #endif 39 | 40 | /* 41 | * Implements a pooled block allocator where all items are the same size, 42 | * but we need many of them. 43 | */ 44 | typedef struct { 45 | void *pool; 46 | size_t used; 47 | } pool_t; 48 | 49 | typedef struct { 50 | size_t dsize; 51 | size_t psize; 52 | size_t npools; 53 | pool_t *pools; 54 | void *free; 55 | } pool_alloc_t; 56 | 57 | pool_alloc_t *pool_create(size_t dsize); 58 | void pool_destroy(pool_alloc_t *p); 59 | void *pool_alloc(pool_alloc_t *p); 60 | void pool_free(pool_alloc_t *p, void *ptr); 61 | 62 | #ifdef __cplusplus 63 | } 64 | #endif 65 | 66 | #endif /*_POOLED_ALLOC_H_*/ 67 | -------------------------------------------------------------------------------- /lib/samtools-1.9/htslib-1.9/cram/rANS_static.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014 Genome Research Ltd. 3 | * Author(s): James Bonfield 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above 12 | * copyright notice, this list of conditions and the following 13 | * disclaimer in the documentation and/or other materials provided 14 | * with the distribution. 15 | * 16 | * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 17 | * Institute nor the names of its contributors may be used to endorse 18 | * or promote products derived from this software without specific 19 | * prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 22 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 24 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 25 | * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | 35 | #ifndef RANS_STATIC_H 36 | #define RANS_STATIC_H 37 | 38 | #ifdef __cplusplus 39 | extern "C" { 40 | #endif 41 | 42 | unsigned char *rans_compress(unsigned char *in, unsigned int in_size, 43 | unsigned int *out_size, int order); 44 | unsigned char *rans_uncompress(unsigned char *in, unsigned int in_size, 45 | unsigned int *out_size); 46 | 47 | #ifdef __cplusplus 48 | } 49 | #endif 50 | 51 | #endif /* RANS_STATIC_H */ 52 | -------------------------------------------------------------------------------- /lib/samtools-1.9/htslib-1.9/cram/string_alloc.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2010 Genome Research Ltd. 3 | Author: Andrew Whitwham 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 16 | Institute nor the names of its contributors may be used to endorse or promote 17 | products derived from this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND 20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | */ 30 | 31 | #ifndef _STRING_ALLOC_H_ 32 | #define _STRING_ALLOC_H_ 33 | 34 | #include 35 | 36 | #ifdef __cplusplus 37 | extern "C" { 38 | #endif 39 | 40 | /* 41 | * A pooled string allocator intended to cut down on the 42 | * memory overhead of many small string allocations. 43 | * 44 | * Andrew Whitwham, September 2010. 45 | */ 46 | 47 | typedef struct { 48 | char *str; 49 | size_t used; 50 | } string_t; 51 | 52 | typedef struct { 53 | size_t max_length; 54 | size_t nstrings; 55 | string_t *strings; 56 | } string_alloc_t; 57 | 58 | string_alloc_t *string_pool_create(size_t max_length); 59 | void string_pool_destroy(string_alloc_t *a_str); 60 | char *string_alloc(string_alloc_t *a_str, size_t length); 61 | char *string_dup(string_alloc_t *a_str, char *instr); 62 | char *string_ndup(string_alloc_t *a_str, char *instr, size_t len); 63 | 64 | #ifdef __cplusplus 65 | } 66 | #endif 67 | 68 | #endif 69 | -------------------------------------------------------------------------------- /lib/samtools-1.9/htslib-1.9/hts_internal.h: -------------------------------------------------------------------------------- 1 | /* hts_internal.h -- internal functions; not part of the public API. 2 | 3 | Copyright (C) 2015-2016 Genome Research Ltd. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 | DEALINGS IN THE SOFTWARE. */ 22 | 23 | #ifndef HTSLIB_HTS_INTERNAL_H 24 | #define HTSLIB_HTS_INTERNAL_H 25 | 26 | #include 27 | #include 28 | 29 | #include "htslib/hts.h" 30 | 31 | #include "textutils_internal.h" 32 | 33 | #ifdef __cplusplus 34 | extern "C" { 35 | #endif 36 | 37 | struct hFILE; 38 | 39 | struct hts_json_token { 40 | char type; ///< Token type 41 | char *str; ///< Value as a C string (filled in for all token types) 42 | // TODO Add other fields to fill in for particular data types, e.g. 43 | // int inum; 44 | // float fnum; 45 | }; 46 | 47 | struct cram_fd; 48 | 49 | char *hts_idx_getfn(const char *fn, const char *ext); 50 | 51 | // The CRAM implementation stores the loaded index within the cram_fd rather 52 | // than separately as is done elsewhere in htslib. So if p is a pointer to 53 | // an hts_idx_t with p->fmt == HTS_FMT_CRAI, then it actually points to an 54 | // hts_cram_idx_t and should be cast accordingly. 55 | typedef struct hts_cram_idx_t { 56 | int fmt; 57 | struct cram_fd *cram; 58 | } hts_cram_idx_t; 59 | 60 | 61 | // Entry point to hFILE_multipart backend. 62 | struct hFILE *hopen_htsget_redirect(struct hFILE *hfile, const char *mode); 63 | 64 | struct hts_path_itr { 65 | kstring_t path, entry; 66 | void *dirv; // DIR * privately 67 | const char *pathdir, *prefix, *suffix; 68 | size_t prefix_len, suffix_len, entry_dir_l; 69 | }; 70 | 71 | void hts_path_itr_setup(struct hts_path_itr *itr, const char *path, 72 | const char *builtin_path, const char *prefix, size_t prefix_len, 73 | const char *suffix, size_t suffix_len); 74 | 75 | const char *hts_path_itr_next(struct hts_path_itr *itr); 76 | 77 | void *load_plugin(void **pluginp, const char *filename, const char *symbol); 78 | void *plugin_sym(void *plugin, const char *name, const char **errmsg); 79 | void close_plugin(void *plugin); 80 | 81 | #ifdef __cplusplus 82 | } 83 | #endif 84 | 85 | #endif 86 | -------------------------------------------------------------------------------- /lib/samtools-1.9/htslib-1.9/hts_os.c: -------------------------------------------------------------------------------- 1 | /// @file hts_os.c 2 | /// Operating System specific tweaks, for compatibility with POSIX. 3 | /* 4 | Copyright (C) 2017 Genome Research Ltd. 5 | 6 | Author: James Bonfield 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 | DEALINGS IN THE SOFTWARE. */ 25 | 26 | #include 27 | 28 | // Windows (maybe more) lack a drand48 implementation. 29 | #ifndef HAVE_DRAND48 30 | #include "os/rand.c" 31 | #else 32 | #include 33 | void hts_srand48(long seed) { srand48(seed); } 34 | double hts_erand48(unsigned short xseed[3]) { return erand48(xseed); } 35 | double hts_drand48(void) { return drand48(); } 36 | double hts_lrand48(void) { return lrand48(); } 37 | #endif 38 | 39 | // // On Windows when using the MSYS or Cygwin terminals, isatty fails 40 | // #ifdef _WIN32 41 | // #define USE_FILEEXTD 42 | // #include "os/iscygpty.c" 43 | // #endif 44 | -------------------------------------------------------------------------------- /lib/samtools-1.9/htslib-1.9/htslib.pc.in: -------------------------------------------------------------------------------- 1 | includedir=@-includedir@ 2 | libdir=@-libdir@ 3 | 4 | # Flags and libraries needed when linking against a static libhts.a 5 | # (used by manual and semi-manual pkg-config(1)-style enquiries). 6 | static_ldflags=@static_LDFLAGS@ 7 | static_libs=@static_LIBS@ 8 | 9 | Name: htslib 10 | Description: C library for high-throughput sequencing data formats 11 | Version: @-PACKAGE_VERSION@ 12 | Cflags: -I${includedir} 13 | Libs: -L${libdir} -lhts 14 | Libs.private: -L${libdir} @private_LIBS@ -lhts -lm -lpthread 15 | Requires.private: zlib @pc_requires@ 16 | -------------------------------------------------------------------------------- /lib/samtools-1.9/htslib-1.9/htslib/hts_os.h: -------------------------------------------------------------------------------- 1 | /// @file hts_os.h 2 | /// Operating System specific tweaks, for compatibility with POSIX. 3 | /* 4 | Copyright (C) 2017 Genome Research Ltd. 5 | 6 | Author: James Bonfield 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 | DEALINGS IN THE SOFTWARE. */ 25 | 26 | #ifndef HTSLIB_HTS_OS_H 27 | #define HTSLIB_HTS_OS_H 28 | 29 | extern void hts_srand48(long seed); 30 | extern double hts_erand48(unsigned short xseed[3]); 31 | extern double hts_drand48(void); 32 | extern long hts_lrand48(void); 33 | 34 | #if defined(_WIN32) && !defined(__CYGWIN__) 35 | // Windows usually lacks *rand48(), but cygwin provides them. 36 | #define srand48(S) hts_srand48((S)) 37 | #define erand48(X) hts_erand48((X)) 38 | #define drand48() hts_drand48() 39 | #define lrand48() hts_lrand48() 40 | #endif 41 | 42 | #if 0 /* def _WIN32 - disabled for now, not currently used */ 43 | /* Check if the fd is a cygwin/msys's pty. */ 44 | extern int is_cygpty(int fd); 45 | #endif 46 | 47 | 48 | #if defined(__MINGW32__) 49 | #include 50 | #define mkdir(filename,mode) mkdir((filename)) 51 | #endif 52 | 53 | #ifdef _WIN32 54 | #include 55 | #define srandom srand 56 | #define random rand 57 | #endif 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /lib/samtools-1.9/htslib-1.9/htslib/kfunc.h: -------------------------------------------------------------------------------- 1 | /* The MIT License 2 | 3 | Copyright (C) 2010, 2013 Genome Research Ltd. 4 | Copyright (C) 2011 Attractive Chaos 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining 7 | a copy of this software and associated documentation files (the 8 | "Software"), to deal in the Software without restriction, including 9 | without limitation the rights to use, copy, modify, merge, publish, 10 | distribute, sublicense, and/or sell copies of the Software, and to 11 | permit persons to whom the Software is furnished to do so, subject to 12 | the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be 15 | included in all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 21 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 22 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | SOFTWARE. 25 | */ 26 | 27 | #ifndef HTSLIB_KFUNC_H 28 | #define HTSLIB_KFUNC_H 29 | 30 | #ifdef __cplusplus 31 | extern "C" { 32 | #endif 33 | 34 | /* Log gamma function 35 | * \log{\Gamma(z)} 36 | * AS245, 2nd algorithm, http://lib.stat.cmu.edu/apstat/245 37 | */ 38 | double kf_lgamma(double z); 39 | 40 | /* complementary error function 41 | * \frac{2}{\sqrt{\pi}} \int_x^{\infty} e^{-t^2} dt 42 | * AS66, 2nd algorithm, http://lib.stat.cmu.edu/apstat/66 43 | */ 44 | double kf_erfc(double x); 45 | 46 | /* The following computes regularized incomplete gamma functions. 47 | * Formulas are taken from Wiki, with additional input from Numerical 48 | * Recipes in C (for modified Lentz's algorithm) and AS245 49 | * (http://lib.stat.cmu.edu/apstat/245). 50 | * 51 | * A good online calculator is available at: 52 | * 53 | * http://www.danielsoper.com/statcalc/calc23.aspx 54 | * 55 | * It calculates upper incomplete gamma function, which equals 56 | * kf_gammaq(s,z)*tgamma(s). 57 | */ 58 | 59 | double kf_gammap(double s, double z); 60 | double kf_gammaq(double s, double z); 61 | 62 | /* Regularized incomplete beta function. The method is taken from 63 | * Numerical Recipe in C, 2nd edition, section 6.4. The following web 64 | * page calculates the incomplete beta function, which equals 65 | * kf_betai(a,b,x) * gamma(a) * gamma(b) / gamma(a+b): 66 | * 67 | * http://www.danielsoper.com/statcalc/calc36.aspx 68 | */ 69 | double kf_betai(double a, double b, double x); 70 | 71 | /* 72 | * n11 n12 | n1_ 73 | * n21 n22 | n2_ 74 | * -----------+---- 75 | * n_1 n_2 | n 76 | */ 77 | double kt_fisher_exact(int n11, int n12, int n21, int n22, double *_left, double *_right, double *two); 78 | 79 | #ifdef __cplusplus 80 | } 81 | #endif 82 | 83 | #endif 84 | -------------------------------------------------------------------------------- /lib/samtools-1.9/htslib-1.9/htslib/vcf_sweep.h: -------------------------------------------------------------------------------- 1 | /// @file htslib/vcf_sweep.h 2 | /// Forward/reverse sweep API. 3 | /* 4 | Copyright (C) 2013 Genome Research Ltd. 5 | 6 | Author: Petr Danecek 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 | DEALINGS IN THE SOFTWARE. */ 25 | 26 | #ifndef HTSLIB_VCF_SWEEP_H 27 | #define HTSLIB_VCF_SWEEP_H 28 | 29 | #include "hts.h" 30 | #include "vcf.h" 31 | 32 | #ifdef __cplusplus 33 | extern "C" { 34 | #endif 35 | 36 | typedef struct _bcf_sweep_t bcf_sweep_t; 37 | 38 | bcf_sweep_t *bcf_sweep_init(const char *fname); 39 | void bcf_sweep_destroy(bcf_sweep_t *sw); 40 | bcf_hdr_t *bcf_sweep_hdr(bcf_sweep_t *sw); 41 | bcf1_t *bcf_sweep_fwd(bcf_sweep_t *sw); 42 | bcf1_t *bcf_sweep_bwd(bcf_sweep_t *sw); 43 | 44 | #ifdef __cplusplus 45 | } 46 | #endif 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /lib/samtools-1.9/htslib-1.9/htslib_vars.mk: -------------------------------------------------------------------------------- 1 | # Makefile variables useful for third-party code using htslib's public API. 2 | # 3 | # Copyright (C) 2013-2017 Genome Research Ltd. 4 | # 5 | # Author: John Marshall 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | # DEALINGS IN THE SOFTWARE. 24 | 25 | # These variables can be used to express dependencies on htslib headers. 26 | # See htslib.mk for details. 27 | 28 | htslib_bgzf_h = $(HTSPREFIX)htslib/bgzf.h $(htslib_hts_defs_h) 29 | htslib_cram_h = $(HTSPREFIX)htslib/cram.h $(htslib_hts_h) 30 | htslib_faidx_h = $(HTSPREFIX)htslib/faidx.h $(htslib_hts_defs_h) 31 | htslib_hfile_h = $(HTSPREFIX)htslib/hfile.h $(htslib_hts_defs_h) 32 | htslib_hts_h = $(HTSPREFIX)htslib/hts.h $(htslib_hts_defs_h) $(htslib_hts_log_h) 33 | htslib_hts_defs_h = $(HTSPREFIX)htslib/hts_defs.h 34 | htslib_hts_endian_h = $(HTSPREFIX)htslib/hts_endian.h 35 | htslib_hts_log_h = $(HTSPREFIX)htslib/hts_log.h $(htslib_hts_defs_h) 36 | htslib_hts_os_h = $(HTSPREFIX)htslib/hts_os.h 37 | htslib_kbitset_h = $(HTSPREFIX)htslib/kbitset.h 38 | htslib_kfunc_h = $(HTSPREFIX)htslib/kfunc.h 39 | htslib_khash_h = $(HTSPREFIX)htslib/khash.h 40 | htslib_khash_str2int_h = $(HTSPREFIX)htslib/khash_str2int.h $(htslib_khash_h) 41 | htslib_klist_h = $(HTSPREFIX)htslib/klist.h 42 | htslib_knetfile_h = $(HTSPREFIX)htslib/knetfile.h 43 | htslib_kseq_h = $(HTSPREFIX)htslib/kseq.h 44 | htslib_ksort_h = $(HTSPREFIX)htslib/ksort.h 45 | htslib_kstring_h = $(HTSPREFIX)htslib/kstring.h 46 | htslib_regidx_h = $(HTSPREFIX)htslib/regidx.h 47 | htslib_sam_h = $(HTSPREFIX)htslib/sam.h $(htslib_hts_h) 48 | htslib_synced_bcf_reader_h = $(HTSPREFIX)htslib/synced_bcf_reader.h $(htslib_hts_h) $(htslib_vcf_h) $(htslib_tbx_h) 49 | htslib_tbx_h = $(HTSPREFIX)htslib/tbx.h $(htslib_hts_h) 50 | htslib_thread_pool_h = $(HTSPREFIX)htslib/thread_pool.h 51 | htslib_vcf_h = $(HTSPREFIX)htslib/vcf.h $(htslib_hts_h) $(htslib_kstring_h) $(htslib_hts_defs_h) $(htslib_hts_endian_h) 52 | htslib_vcf_sweep_h = $(HTSPREFIX)htslib/vcf_sweep.h $(htslib_hts_h) $(htslib_vcf_h) 53 | htslib_vcfutils_h = $(HTSPREFIX)htslib/vcfutils.h $(htslib_vcf_h) 54 | -------------------------------------------------------------------------------- /lib/samtools-1.9/htslib-1.9/os/lzma_stub.h: -------------------------------------------------------------------------------- 1 | #ifndef LZMA_STUB_H 2 | #define LZMA_STUB_H 3 | 4 | /* Some platforms, notably macOS, ship a usable liblzma shared library but 5 | do not ship any LZMA header files. The and header 6 | files that come with the library contain the following statement: 7 | 8 | * 9 | * Author: Lasse Collin 10 | * 11 | * This file has been put into the public domain. 12 | * You can do whatever you want with this file. 13 | * 14 | 15 | Accordingly the following declarations have been copied and distilled 16 | from and (primarily) and are sufficient 17 | to compile cram/cram_io.c in the absence of proper LZMA headers. 18 | 19 | This file, lzma_stub.h, remains in the public domain. */ 20 | 21 | #include 22 | 23 | #ifdef __cplusplus 24 | extern "C" { 25 | #endif 26 | 27 | typedef enum { LZMA_OK = 0, LZMA_STREAM_END = 1 } lzma_ret; 28 | typedef enum { LZMA_RUN = 0, LZMA_FINISH = 3 } lzma_action; 29 | typedef enum { LZMA_CHECK_CRC32 = 1 } lzma_check; 30 | typedef enum { LZMA_RESERVED_ENUM = 0 } lzma_reserved_enum; 31 | 32 | struct lzma_allocator; 33 | struct lzma_internal; 34 | 35 | typedef struct { 36 | const uint8_t *next_in; 37 | size_t avail_in; 38 | uint64_t total_in; 39 | 40 | uint8_t *next_out; 41 | size_t avail_out; 42 | uint64_t total_out; 43 | 44 | const struct lzma_allocator *allocator; 45 | struct lzma_internal *internal; 46 | 47 | void *reserved_ptr1; 48 | void *reserved_ptr2; 49 | void *reserved_ptr3; 50 | void *reserved_ptr4; 51 | uint64_t reserved_int1; 52 | uint64_t reserved_int2; 53 | size_t reserved_int3; 54 | size_t reserved_int4; 55 | lzma_reserved_enum reserved_enum1; 56 | lzma_reserved_enum reserved_enum2; 57 | } lzma_stream; 58 | 59 | #define LZMA_STREAM_INIT \ 60 | { NULL, 0, 0, NULL, 0, 0, NULL, NULL, \ 61 | NULL, NULL, NULL, NULL, 0, 0, 0, 0, \ 62 | LZMA_RESERVED_ENUM, LZMA_RESERVED_ENUM } 63 | 64 | extern size_t lzma_stream_buffer_bound(size_t uncompressed_size); 65 | 66 | extern lzma_ret lzma_easy_buffer_encode( 67 | uint32_t preset, lzma_check check, 68 | const struct lzma_allocator *allocator, 69 | const uint8_t *in, size_t in_size, 70 | uint8_t *out, size_t *out_pos, size_t out_size); 71 | 72 | extern lzma_ret lzma_stream_decoder( 73 | lzma_stream *strm, uint64_t memlimit, uint32_t flags); 74 | 75 | extern uint64_t lzma_easy_decoder_memusage(uint32_t preset); 76 | 77 | extern lzma_ret lzma_code(lzma_stream *strm, lzma_action action); 78 | 79 | extern void lzma_end(lzma_stream *strm); 80 | 81 | #ifdef __cplusplus 82 | } 83 | #endif 84 | 85 | #endif 86 | -------------------------------------------------------------------------------- /lib/samtools-1.9/htslib-1.9/os/rand.c: -------------------------------------------------------------------------------- 1 | /* rand.c -- drand48 implementation from the FreeBSD source tree. */ 2 | 3 | // This file is an amalgamation of the many small files in FreeBSD to do with 4 | // drand48 and friends implementations. 5 | // It comprises _rand48.c, rand48.h, srand48.c, drand48.c, erand48.c, lrand48.c 6 | 7 | /* 8 | * Copyright (c) 1993 Martin Birgmeier 9 | * All rights reserved. 10 | * 11 | * You may redistribute unmodified or modified versions of this source 12 | * code provided that the above copyright notice and this and the 13 | * following conditions are retained. 14 | * 15 | * This software is provided ``as is'', and comes with no warranties 16 | * of any kind. I shall in no event be liable for anything that happens 17 | * to anyone/anything when using this software. 18 | */ 19 | 20 | //#include 21 | //__FBSDID("$FreeBSD: src/lib/libc/gen/_rand48.c,v 1.2 2002/03/22 21:52:05 obrien Exp $"); 22 | 23 | #include 24 | 25 | #define RAND48_SEED_0 (0x330e) 26 | #define RAND48_SEED_1 (0xabcd) 27 | #define RAND48_SEED_2 (0x1234) 28 | #define RAND48_MULT_0 (0xe66d) 29 | #define RAND48_MULT_1 (0xdeec) 30 | #define RAND48_MULT_2 (0x0005) 31 | #define RAND48_ADD (0x000b) 32 | 33 | static unsigned short _rand48_seed[3] = { 34 | RAND48_SEED_0, 35 | RAND48_SEED_1, 36 | RAND48_SEED_2 37 | }; 38 | static unsigned short _rand48_mult[3] = { 39 | RAND48_MULT_0, 40 | RAND48_MULT_1, 41 | RAND48_MULT_2 42 | }; 43 | static unsigned short _rand48_add = RAND48_ADD; 44 | 45 | static void 46 | _dorand48(unsigned short xseed[3]) 47 | { 48 | unsigned long accu; 49 | unsigned short temp[2]; 50 | 51 | accu = (unsigned long) _rand48_mult[0] * (unsigned long) xseed[0] + 52 | (unsigned long) _rand48_add; 53 | temp[0] = (unsigned short) accu; /* lower 16 bits */ 54 | accu >>= sizeof(unsigned short) * 8; 55 | accu += (unsigned long) _rand48_mult[0] * (unsigned long) xseed[1] + 56 | (unsigned long) _rand48_mult[1] * (unsigned long) xseed[0]; 57 | temp[1] = (unsigned short) accu; /* middle 16 bits */ 58 | accu >>= sizeof(unsigned short) * 8; 59 | accu += _rand48_mult[0] * xseed[2] + _rand48_mult[1] * xseed[1] + _rand48_mult[2] * xseed[0]; 60 | xseed[0] = temp[0]; 61 | xseed[1] = temp[1]; 62 | xseed[2] = (unsigned short) accu; 63 | } 64 | 65 | void 66 | hts_srand48(long seed) 67 | { 68 | _rand48_seed[0] = RAND48_SEED_0; 69 | _rand48_seed[1] = (unsigned short) seed; 70 | _rand48_seed[2] = (unsigned short) (seed >> 16); 71 | _rand48_mult[0] = RAND48_MULT_0; 72 | _rand48_mult[1] = RAND48_MULT_1; 73 | _rand48_mult[2] = RAND48_MULT_2; 74 | _rand48_add = RAND48_ADD; 75 | } 76 | 77 | double 78 | hts_erand48(unsigned short xseed[3]) 79 | { 80 | _dorand48(xseed); 81 | return ldexp((double) xseed[0], -48) + 82 | ldexp((double) xseed[1], -32) + 83 | ldexp((double) xseed[2], -16); 84 | } 85 | 86 | double 87 | hts_drand48(void) 88 | { 89 | return hts_erand48(_rand48_seed); 90 | } 91 | 92 | long 93 | hts_lrand48(void) 94 | { 95 | _dorand48(_rand48_seed); 96 | return ((long) _rand48_seed[2] << 15) + ((long) _rand48_seed[1] >> 1); 97 | } 98 | -------------------------------------------------------------------------------- /lib/samtools-1.9/htslib-1.9/sam.5: -------------------------------------------------------------------------------- 1 | '\" t 2 | .TH sam 5 "August 2013" "htslib" "Bioinformatics formats" 3 | .SH NAME 4 | sam \- Sequence Alignment/Map file format 5 | .\" 6 | .\" Copyright (C) 2009, 2013 Genome Research Ltd. 7 | .\" 8 | .\" Author: Heng Li 9 | .\" 10 | .\" Permission is hereby granted, free of charge, to any person obtaining a 11 | .\" copy of this software and associated documentation files (the "Software"), 12 | .\" to deal in the Software without restriction, including without limitation 13 | .\" the rights to use, copy, modify, merge, publish, distribute, sublicense, 14 | .\" and/or sell copies of the Software, and to permit persons to whom the 15 | .\" Software is furnished to do so, subject to the following conditions: 16 | .\" 17 | .\" The above copyright notice and this permission notice shall be included in 18 | .\" all copies or substantial portions of the Software. 19 | .\" 20 | .\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 | .\" IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 | .\" FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 23 | .\" THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 | .\" LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 25 | .\" FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 26 | .\" DEALINGS IN THE SOFTWARE. 27 | .\" 28 | .SH DESCRIPTION 29 | Sequence Alignment/Map (SAM) format is TAB-delimited. Apart from the header lines, which are started 30 | with the `@' symbol, each alignment line consists of: 31 | .TS 32 | nlbl. 33 | 1 QNAME Query template/pair NAME 34 | 2 FLAG bitwise FLAG 35 | 3 RNAME Reference sequence NAME 36 | 4 POS 1-based leftmost POSition/coordinate of clipped sequence 37 | 5 MAPQ MAPping Quality (Phred-scaled) 38 | 6 CIGAR extended CIGAR string 39 | 7 MRNM Mate Reference sequence NaMe (`=' if same as RNAME) 40 | 8 MPOS 1-based Mate POSistion 41 | 9 TLEN inferred Template LENgth (insert size) 42 | 10 SEQ query SEQuence on the same strand as the reference 43 | 11 QUAL query QUALity (ASCII-33 gives the Phred base quality) 44 | 12+ OPT variable OPTional fields in the format TAG:VTYPE:VALUE 45 | .TE 46 | .PP 47 | Each bit in the FLAG field is defined as: 48 | .TS 49 | lcbl. 50 | 0x0001 p the read is paired in sequencing 51 | 0x0002 P the read is mapped in a proper pair 52 | 0x0004 u the query sequence itself is unmapped 53 | 0x0008 U the mate is unmapped 54 | 0x0010 r strand of the query (1 for reverse) 55 | 0x0020 R strand of the mate 56 | 0x0040 1 the read is the first read in a pair 57 | 0x0080 2 the read is the second read in a pair 58 | 0x0100 s the alignment is not primary 59 | 0x0200 f the read fails platform/vendor quality checks 60 | 0x0400 d the read is either a PCR or an optical duplicate 61 | 0x0800 S the alignment is supplementary 62 | .TE 63 | .P 64 | where the second column gives the string representation of the FLAG field. 65 | .SH SEE ALSO 66 | .TP 67 | https://github.com/samtools/hts-specs 68 | The full SAM/BAM file format specification 69 | -------------------------------------------------------------------------------- /lib/samtools-1.9/htslib-1.9/version.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Master version, for use in tarballs or non-git source copies 4 | VERSION=1.9 5 | 6 | # If we have a git clone, then check against the current tag 7 | if [ -e .git ] 8 | then 9 | # If we ever get to 10.x this will need to be more liberal 10 | VERSION=`git describe --match '[0-9].[0-9]*' --dirty` 11 | fi 12 | 13 | # Numeric version is for use in .dylib or .so libraries 14 | # 15 | # Follows the same logic from the Makefile commit c2e93911 16 | # as non-numeric versions get bumped to patch level 255 to indicate 17 | # an unknown value. 18 | if [ "$1" = "numeric" ] 19 | then 20 | v1=`expr "$VERSION" : '\([0-9]*\)'` 21 | v2=`expr "$VERSION" : '[0-9]*.\([0-9]*\)'` 22 | v3=`expr "$VERSION" : '[0-9]*.[0-9]*.\([0-9]*\)'` 23 | if [ -z "`expr "$VERSION" : '^\([0-9.]*\)$'`" ] 24 | then 25 | VERSION="$v1.$v2.255" 26 | else 27 | VERSION="$v1.$v2${v3:+.}$v3" 28 | fi 29 | fi 30 | 31 | echo $VERSION 32 | -------------------------------------------------------------------------------- /lib/samtools-1.9/lz4/LICENSE: -------------------------------------------------------------------------------- 1 | LZ4 Library 2 | Copyright (c) 2011-2016, Yann Collet 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, 6 | are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above copyright notice, this 12 | list of conditions and the following disclaimer in the documentation and/or 13 | other materials provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 19 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 22 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /lib/samtools-1.9/misc/md5sum-lite.c: -------------------------------------------------------------------------------- 1 | /* md5sum-lite.c -- Basic md5sum implementation. 2 | 3 | Copyright (C) 2008, 2009 Genome Research Ltd. 4 | 5 | Author: Heng Li 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. */ 24 | 25 | #include 26 | 27 | #include 28 | #include 29 | #include 30 | #include "htslib/hts.h" 31 | 32 | static void md5_one(const char *fn) 33 | { 34 | unsigned char buf[4096], digest[16]; 35 | char hex[33]; 36 | hts_md5_context *md5; 37 | int l; 38 | FILE *fp; 39 | 40 | fp = strcmp(fn, "-")? fopen(fn, "r") : stdin; 41 | if (fp == 0) { 42 | fprintf(stderr, "md5sum: %s: No such file or directory\n", fn); 43 | exit(1); 44 | } 45 | if (!(md5 = hts_md5_init())) { 46 | fprintf(stderr, "md5sum: %s: Failed to allocate md5 buffer\n", fn); 47 | exit(1); 48 | } 49 | while ((l = fread(buf, 1, 4096, fp)) > 0) 50 | hts_md5_update(md5, buf, l); 51 | hts_md5_final(digest, md5); 52 | if (fp != stdin) fclose(fp); 53 | hts_md5_hex(hex, digest); 54 | printf("%s %s\n", hex, fn); 55 | hts_md5_destroy(md5); 56 | } 57 | int main(int argc, char *argv[]) 58 | { 59 | int i; 60 | if (argc == 1) md5_one("-"); 61 | else for (i = 1; i < argc; ++i) md5_one(argv[i]); 62 | return 0; 63 | } 64 | -------------------------------------------------------------------------------- /lib/samtools-1.9/misc/r2plot.lua: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env luajit 2 | 3 | function string:split(sep, n) 4 | local a, start = {}, 1; 5 | sep = sep or "%s+"; 6 | repeat 7 | local b, e = self:find(sep, start); 8 | if b == nil then 9 | table.insert(a, self:sub(start)); 10 | break 11 | end 12 | a[#a+1] = self:sub(start, b - 1); 13 | start = e + 1; 14 | if n and #a == n then 15 | table.insert(a, self:sub(start)); 16 | break 17 | end 18 | until start > #self; 19 | return a; 20 | end 21 | 22 | function io.xopen(fn, mode) 23 | mode = mode or 'r'; 24 | if fn == nil then return io.stdin; 25 | elseif fn == '-' then return (mode == 'r' and io.stdin) or io.stdout; 26 | elseif fn:sub(-3) == '.gz' then return (mode == 'r' and io.popen('gzip -dc ' .. fn, 'r')) or io.popen('gzip > ' .. fn, 'w'); 27 | elseif fn:sub(-4) == '.bz2' then return (mode == 'r' and io.popen('bzip2 -dc ' .. fn, 'r')) or io.popen('bgzip2 > ' .. fn, 'w'); 28 | else return io.open(fn, mode) end 29 | end 30 | 31 | local eps = {}; 32 | 33 | function eps.func(fp) 34 | fp = fp or io.stdout 35 | fp:write("/C { dup 255 and 255 div exch dup -8 bitshift 255 and 255 div 3 1 roll -16 bitshift 255 and 255 div 3 1 roll setrgbcolor } bind def\n") 36 | fp:write("/L { 4 2 roll moveto lineto } bind def\n") 37 | fp:write("/LX { dup 4 -1 roll exch moveto lineto } bind def\n") 38 | fp:write("/LY { dup 4 -1 roll moveto exch lineto } bind def\n") 39 | fp:write("/LS { 3 1 roll moveto show } bind def\n") 40 | fp:write("/RS { dup stringwidth pop 4 -1 roll exch sub 3 -1 roll moveto show } bind def\n") 41 | fp:write("/B { 4 copy 3 1 roll exch 6 2 roll 8 -2 roll moveto lineto lineto lineto closepath } bind def\n") 42 | end 43 | 44 | function eps.font(ft, size, fp) 45 | fp = fp or io.stdout 46 | fp:write(string.format('/FS %d def\n', size)); 47 | fp:write('/FS4 FS 4 div def\n'); 48 | fp:write('/' .. ft .. ' findfont FS scalefont setfont\n'); 49 | end 50 | 51 | local scale = 8; 52 | 53 | if #arg == 0 then 54 | print("Usage: r2plot.lua "); 55 | os.exit(1) 56 | end 57 | 58 | local fp = io.xopen(arg[1]); 59 | local n = tonumber(fp:read()); 60 | 61 | print('%!PS-Adobe-3.0 EPSF-3.0'); 62 | print('%%' .. string.format('BoundingBox: -%d -%d %.3f %.3f\n', 10*scale, scale, (n+1)*scale, (n+1)*scale)); 63 | print(string.format('%.3f setlinewidth', scale)); 64 | print(string.format('/plot { setgray moveto 0 %d rlineto } def', scale)); 65 | print(string.format('/plothalf { setgray moveto 0 %.2f rlineto } def', scale/2)); 66 | eps.func(); 67 | eps.font('Helvetica', scale-1); 68 | 69 | local i = 1; 70 | for l in fp:lines() do 71 | local t = l:split('\t'); 72 | print(string.format("%d %d FS4 add (%s) RS", (i-1)*scale-2, (i-1)*scale, t[1])); 73 | for j = 2, #t do 74 | if tonumber(t[j]) > 0.01 then 75 | print(string.format('%.2f %.2f %.2f plot stroke', (i-1+.5)*scale, (j-2)*scale, 1.-t[j])); 76 | end 77 | end 78 | i = i + 1; 79 | end 80 | for j = 1, 21 do 81 | print(string.format('%.2f %.2f %.2f plothalf stroke', -8*scale, (j-1) * scale/2, 1.-(j-1)/20)); 82 | end 83 | print('showpage'); 84 | -------------------------------------------------------------------------------- /lib/samtools-1.9/misc/samtools_tab_completion: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Copyright (c) 2016 Genome Research Ltd. 3 | # 4 | # Author: George Hall 5 | # 6 | # Permission is hereby granted, free of charge, to any person obtaining 7 | # a copy of this software and associated documentation files (the 8 | # "Software"), to deal in the Software without restriction, including 9 | # without limitation the rights to use, copy, modify, merge, publish, 10 | # distribute, sublicense, and/or sell copies of the Software, and to 11 | # permit persons to whom the Software is furnished to do so, subject to 12 | # the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included 15 | # in all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 21 | # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 | ################################################################################ 25 | 26 | # This script determines whether the user is trying to complete a subcommand or 27 | # a long-option, then determines and displays possible completions. For commands 28 | # with long-options, the initial '-' must be first be typed in order to trigger 29 | # the completion mechanism. 30 | 31 | # This file must be sourced for the tab completions to work - it is advisable to 32 | # source it from your .bashrc. By default, it will only perform tab completion for 33 | # a binary called 'samtools'. To enable tab completion for other versions of 34 | # Samtools, or for Samtools binaries with different names, simply append 35 | # 'complete -F _samtools_options ' to the end of this file. This 36 | # file will then need to be sourced again. 37 | 38 | _samtools_options() 39 | { 40 | 41 | SAMTOOLS_BIN="${COMP_WORDS[0]}" 42 | local CUR CURRENT_SUB OPTS 43 | COMPREPLY=() 44 | CUR="${COMP_WORDS[COMP_CWORD]}" 45 | 46 | if [[ $COMP_CWORD -eq 1 ]] ; then 47 | 48 | # If on the first word, generate possible subcommands, and tab complete those 49 | 50 | OPTS=$($SAMTOOLS_BIN 2>&1 | grep '^ ' | awk '{print $1}') 51 | 52 | COMPREPLY=($(compgen -W "${OPTS}" -- ${CUR})) 53 | return 0 54 | 55 | else 56 | 57 | # Complete long-options (if available) for current subcommand 58 | 59 | CURRENT_SUB="${COMP_WORDS[1]}" 60 | 61 | OPTS=$($SAMTOOLS_BIN $CURRENT_SUB 2>&1 | grep -oh "\(\-\-\)\([[:alnum:]]\|\-\)* " | \ 62 | xargs -I @ printf -- @) 63 | 64 | if [[ ${CUR} == -* ]] ; then 65 | COMPREPLY=($(compgen -W "${OPTS}" -- ${CUR})) 66 | return 0 67 | else 68 | # Assume the user wants normal file name completion 69 | COMPREPLY=($(compgen -o default)) 70 | fi 71 | 72 | fi 73 | 74 | } 75 | 76 | complete -F _samtools_options samtools 77 | 78 | -------------------------------------------------------------------------------- /lib/samtools-1.9/misc/wgsim.1: -------------------------------------------------------------------------------- 1 | .TH wgsim 1 "18 July 2018" "samtools-1.9" "Bioinformatics tools" 2 | .SH NAME 3 | wgsim \- Whole-genome sequencing read simulator 4 | .SH SYNOPSIS 5 | .B wgsim 6 | [\fI\,options\/\fR] \fI\, \/\fR 7 | .PP 8 | must be a fasta file containing a reference genome. 9 | .PP 10 | and are the first and second read output files. 11 | .SH OPTIONS 12 | .TP 13 | \fB\-e\fR FLOAT 14 | base error rate [0.000] 15 | .TP 16 | \fB\-d\fR INT 17 | outer distance between the two ends [500] 18 | .TP 19 | \fB\-s\fR INT 20 | standard deviation [50] 21 | .TP 22 | \fB\-N\fR INT 23 | number of read pairs [1000000] 24 | .TP 25 | \fB\-1\fR INT 26 | length of the first read [70] 27 | .TP 28 | \fB\-2\fR INT 29 | length of the second read [70] 30 | .TP 31 | \fB\-r\fR FLOAT 32 | rate of mutations [0.0010] 33 | .TP 34 | \fB\-R\fR FLOAT 35 | fraction of indels [0.15] 36 | .TP 37 | \fB\-X\fR FLOAT 38 | probability an indel is extended [0.30] 39 | .TP 40 | \fB\-S\fR INT 41 | seed for random generator [\-1] 42 | .TP 43 | \fB\-A\fR FLOAT 44 | discard if the fraction of ambiguous bases higher than FLOAT [0.05] 45 | .TP 46 | \fB\-h\fR 47 | haplotype mode 48 | .TP 49 | Parameter defaults are given in square brackets. 50 | .SH AUTHOR 51 | Copyright 2008 Genome Research Limited. 52 | .br 53 | Copyright 2011 Heng Li. 54 | .TP 55 | wgsim is part of samtools, https://github.com/samtools/samtools 56 | -------------------------------------------------------------------------------- /lib/samtools-1.9/sam_header.h: -------------------------------------------------------------------------------- 1 | /* sam_header.h -- basic SAM/BAM header API. 2 | 3 | Copyright (C) 2009, 2012, 2013 Genome Research Ltd. 4 | 5 | Author: Petr Danecek 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. */ 24 | 25 | #ifndef __SAM_HEADER_H__ 26 | #define __SAM_HEADER_H__ 27 | 28 | #ifdef __cplusplus 29 | extern "C" { 30 | #endif 31 | 32 | void *sam_header_parse2(const char *headerText); 33 | void *sam_header_merge(int n, const void **dicts); 34 | void sam_header_free(void *header); 35 | char *sam_header_write(const void *headerDict); // returns a newly allocated string 36 | 37 | /* 38 | // Usage example 39 | const char *key, *val; 40 | void *iter = sam_header_parse2(bam->header->text); 41 | while ( iter = sam_header_key_val(iter, "RG","ID","SM" &key,&val) ) printf("%s\t%s\n", key,val); 42 | */ 43 | void *sam_header2key_val(void *iter, const char type[2], const char key_tag[2], const char value_tag[2], const char **key, const char **value); 44 | char **sam_header2list(const void *_dict, char type[2], char key_tag[2], int *_n); 45 | 46 | /* 47 | // Usage example 48 | int i, j, n; 49 | const char *tags[] = {"SN","LN","UR","M5",NULL}; 50 | void *dict = sam_header_parse2(bam->header->text); 51 | char **tbl = sam_header2tbl_n(h->dict, "SQ", tags, &n); 52 | for (i=0; i 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. */ 24 | 25 | #include 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | #include "samtools.h" 33 | 34 | static void vprint_error_core(const char *subcommand, const char *format, va_list args, const char *extra) 35 | { 36 | fflush(stdout); 37 | if (subcommand && *subcommand) fprintf(stderr, "samtools %s: ", subcommand); 38 | else fprintf(stderr, "samtools: "); 39 | vfprintf(stderr, format, args); 40 | if (extra) fprintf(stderr, ": %s\n", extra); 41 | else fprintf(stderr, "\n"); 42 | fflush(stderr); 43 | } 44 | 45 | void print_error(const char *subcommand, const char *format, ...) 46 | { 47 | va_list args; 48 | va_start(args, format); 49 | vprint_error_core(subcommand, format, args, NULL); 50 | va_end(args); 51 | } 52 | 53 | void print_error_errno(const char *subcommand, const char *format, ...) 54 | { 55 | int err = errno; 56 | va_list args; 57 | va_start(args, format); 58 | vprint_error_core(subcommand, format, args, err? strerror(err) : NULL); 59 | va_end(args); 60 | } 61 | -------------------------------------------------------------------------------- /lib/samtools-1.9/sample.h: -------------------------------------------------------------------------------- 1 | /* sample.h -- group data by sample. 2 | 3 | Copyright (C) 2010 Broad Institute. 4 | 5 | Author: Heng Li 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. */ 24 | 25 | #ifndef BAM_SAMPLE_H 26 | #define BAM_SAMPLE_H 27 | 28 | #include "htslib/kstring.h" 29 | 30 | typedef struct { 31 | int n, m; 32 | char **smpl; 33 | void *rg2smid, *sm2id; 34 | } bam_sample_t; 35 | 36 | bam_sample_t *bam_smpl_init(void); 37 | int bam_smpl_add(bam_sample_t *sm, const char *abs, const char *txt); 38 | int bam_smpl_rg2smid(const bam_sample_t *sm, const char *fn, const char *rg, kstring_t *str); 39 | void bam_smpl_destroy(bam_sample_t *sm); 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /lib/samtools-1.9/samtools.h: -------------------------------------------------------------------------------- 1 | /* samtools.h -- utility routines. 2 | 3 | Copyright (C) 2013-2015 Genome Research Ltd. 4 | 5 | Author: Petr Danecek 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. */ 24 | 25 | #ifndef SAMTOOLS_H 26 | #define SAMTOOLS_H 27 | 28 | const char *samtools_version(void); 29 | 30 | #if defined __GNUC__ && __GNUC__ >= 2 31 | #define CHECK_PRINTF(fmt,args) __attribute__ ((format (printf, fmt, args))) 32 | #else 33 | #define CHECK_PRINTF(fmt,args) 34 | #endif 35 | 36 | void print_error(const char *subcommand, const char *format, ...) CHECK_PRINTF(2, 3); 37 | void print_error_errno(const char *subcommand, const char *format, ...) CHECK_PRINTF(2, 3); 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /lib/samtools-1.9/stats_isize.h: -------------------------------------------------------------------------------- 1 | /* stats_isize.h -- generalised insert size calculation for samtools stats. 2 | 3 | Copyright (C) 2014 Genome Research Ltd. 4 | 5 | Author: Nicholas Clarke 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. */ 24 | 25 | #include 26 | #include 27 | 28 | typedef struct 29 | { 30 | int total; 31 | uint64_t *isize_inward, *isize_outward, *isize_other; 32 | } 33 | isize_dense_data_t; 34 | 35 | typedef struct 36 | { 37 | uint64_t isize_inward, isize_outward, isize_other; 38 | } 39 | isize_sparse_record_t; 40 | 41 | KHASH_MAP_INIT_INT(m32, isize_sparse_record_t *) 42 | 43 | typedef struct 44 | { 45 | int max; 46 | khash_t(m32) *array; 47 | } 48 | isize_sparse_data_t; 49 | 50 | typedef union { 51 | isize_sparse_data_t *sparse; 52 | isize_dense_data_t *dense; 53 | } isize_data_t; 54 | 55 | // Insert size structure 56 | typedef struct 57 | { 58 | isize_data_t data; 59 | 60 | // Maximum 61 | int (*nitems)(isize_data_t); 62 | 63 | // Fetch the number of inserts of a given size 64 | uint64_t (*inward)(isize_data_t, int); 65 | uint64_t (*outward)(isize_data_t, int); 66 | uint64_t (*other)(isize_data_t, int); 67 | 68 | // Set the number of inserts of a given size 69 | void (*set_inward)(isize_data_t, int, uint64_t); 70 | void (*set_outward)(isize_data_t, int, uint64_t); 71 | void (*set_other)(isize_data_t, int, uint64_t); 72 | 73 | // Increment the number of inserts of a given size 74 | void (*inc_inward)(isize_data_t, int); 75 | void (*inc_outward)(isize_data_t, int); 76 | void (*inc_other)(isize_data_t, int); 77 | 78 | // Free this structure 79 | void (*isize_free)(isize_data_t); 80 | } 81 | isize_t; 82 | 83 | isize_t *init_isize_t(int bound); 84 | -------------------------------------------------------------------------------- /lib/samtools-1.9/version.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Master version, for use in tarballs or non-git source copies 4 | VERSION=1.9 5 | 6 | # If we have a git clone, then check against the current tag 7 | if [ -e .git ] 8 | then 9 | # If we ever get to 10.x this will need to be more liberal 10 | VERSION=`git describe --match '[0-9].[0-9]*' --dirty --always` 11 | fi 12 | 13 | echo $VERSION 14 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | setuptools 2 | -------------------------------------------------------------------------------- /src/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all clean debug profile 2 | 3 | CXXFLAGS += -Wall -Wextra -pthread -std=c++11 -g 4 | CXXFLAGS += -Wno-missing-field-initializers 5 | LDFLAGS += -pthread -std=c++11 -rdynamic 6 | 7 | MODULES_BIN := ${BIN_DIR}/flye-modules 8 | 9 | profile: CXXFLAGS += -pg 10 | profile: LDFLAGS += -pg 11 | profile: release 12 | 13 | release: CXXFLAGS += -O3 -DNDEBUG 14 | release: flye-modules 15 | 16 | SANITIZE_FLAGS += -D_GLIBCXX_SANITIZE_VECTOR -U_FORTIFY_SOURCE -fsanitize=address -fno-omit-frame-pointer -fsanitize=undefined 17 | #SANITIZE_FLAGS += -fsanitize=thread -fsanitize=leak -fsanitize=undefined 18 | debug: CXXFLAGS += -Og ${SANITIZE_FLAGS} 19 | #debug: CXXFLAGS += -D_GLIBCXX_DEBUG 20 | debug: LDFLAGS += ${SANITIZE_FLAGS} 21 | debug: flye-modules 22 | 23 | 24 | #sequence module 25 | sequence_obj := ${patsubst %.cpp,%.o,${wildcard sequence/*.cpp}} 26 | 27 | sequence/%.o: sequence/%.cpp sequence/*.h common/*.h 28 | ${CXX} -c ${CXXFLAGS} $< -o $@ 29 | 30 | #flye-assemble module 31 | assemble_obj := ${patsubst %.cpp,%.o,${wildcard assemble/*.cpp}} 32 | 33 | assemble/%.o: assemble/%.cpp assemble/*.h sequence/*.h common/*.h 34 | ${CXX} -c ${CXXFLAGS} $< -o $@ 35 | 36 | #flye-repeat module 37 | repeat_obj := ${patsubst %.cpp,%.o,${wildcard repeat_graph/*.cpp}} 38 | 39 | repeat_graph/%.o: repeat_graph/%.cpp repeat_graph/*.h sequence/*.h common/*.h 40 | ${CXX} -c ${CXXFLAGS} $< -o $@ 41 | 42 | #flye-contigger module 43 | contigger_obj := ${patsubst %.cpp,%.o,${wildcard contigger/*.cpp}} 44 | 45 | contigger/%.o: contigger/%.cpp repeat_graph/*.h sequence/*.h common/*.h 46 | ${CXX} -c ${CXXFLAGS} $< -o $@ 47 | 48 | 49 | #flye-polish module 50 | polish_obj := ${patsubst %.cpp,%.o,${wildcard polishing/*.cpp}} 51 | 52 | polishing/%.o: polishing/%.cpp bin/polisher.cpp polishing/*.h common/*h 53 | ${CXX} -c ${CXXFLAGS} $< -o $@ 54 | 55 | #main module 56 | #main_obj := ${patsubst %.cpp,%.o,${wildcard main/*.cpp}} 57 | main_obj := main.o 58 | flye-modules: ${assemble_obj} ${sequence_obj} ${repeat_obj} ${contigger_obj} ${polish_obj} ${main_obj} 59 | ${CXX} ${assemble_obj} ${sequence_obj} ${repeat_obj} ${contigger_obj} ${polish_obj} ${main_obj} -o ${MODULES_BIN} ${LDFLAGS} 60 | 61 | #main/%.o: main/%.cpp assemble/*.h sequence/*.h common/*.h repeat_graph/*.h contigger/*.h polishing/*.h 62 | main.o: main.cpp 63 | ${CXX} -c ${CXXFLAGS} $< -o $@ 64 | 65 | 66 | clean: 67 | rm -f ${repeat_obj} 68 | rm -f ${sequence_obj} 69 | rm -f ${assemble_obj} 70 | rm -f ${polish_obj} 71 | rm -f ${contigger_obj} 72 | rm -f ${main_obj} 73 | rm -f ${MODULES_BIN} 74 | -------------------------------------------------------------------------------- /src/assemble/chimera.h: -------------------------------------------------------------------------------- 1 | //(c) 2016 by Authors 2 | //This file is a part of ABruijn program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #pragma once 6 | 7 | #include "../sequence/overlap.h" 8 | #include "../sequence/sequence_container.h" 9 | #include 10 | 11 | class ChimeraDetector 12 | { 13 | public: 14 | ChimeraDetector(const SequenceContainer& readContainer, 15 | OverlapContainer& ovlpContainer): 16 | _seqContainer(readContainer), 17 | _ovlpContainer(ovlpContainer), 18 | _overlapCoverage(0) 19 | {} 20 | 21 | void estimateGlobalCoverage(); 22 | bool isChimeric(FastaRecord::Id readId, 23 | const std::vector& readOvlps); 24 | 25 | float maxCoverageDrop(FastaRecord::Id readId, 26 | const std::vector& readOvlps); 27 | 28 | int getOverlapCoverage() const {return _overlapCoverage;} 29 | int getRightTrim(FastaRecord::Id readId); 30 | bool isRepetitiveRegion(FastaRecord::Id readId, int32_t start, int32_t end, bool debug=false); 31 | 32 | private: 33 | std::vector getReadCoverage(FastaRecord::Id readId, 34 | const std::vector& readOvlps); 35 | 36 | bool testReadByCoverage(FastaRecord::Id readId, 37 | const std::vector& readOvlps); 38 | 39 | struct CachedCoverage 40 | { 41 | CachedCoverage(): 42 | cached(false) {} 43 | 44 | std::vector* coverageFullAln; 45 | std::vector* coverageIncomleteAln; 46 | bool cached; 47 | }; 48 | CachedCoverage getCachedCoverage(FastaRecord::Id readId); 49 | 50 | const SequenceContainer& _seqContainer; 51 | OverlapContainer& _ovlpContainer; 52 | cuckoohash_map _chimeras; 53 | cuckoohash_map _localOvlpsStorage; 54 | int _overlapCoverage; 55 | }; 56 | -------------------------------------------------------------------------------- /src/assemble/extender.h: -------------------------------------------------------------------------------- 1 | //(c) 2016 by Authors 2 | //This file is a part of ABruijn program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | #include "../sequence/sequence_container.h" 10 | #include "../sequence/overlap.h" 11 | #include "../sequence/consensus_generator.h" 12 | #include "chimera.h" 13 | 14 | class Extender 15 | { 16 | public: 17 | Extender(const SequenceContainer& readsContainer, 18 | OverlapContainer& ovlpContainer, 19 | int safeOverlap): 20 | _safeOverlap(safeOverlap), 21 | _readsContainer(readsContainer), 22 | _ovlpContainer(ovlpContainer), 23 | _chimDetector(readsContainer, ovlpContainer) 24 | {} 25 | 26 | void assembleDisjointigs(); 27 | const std::vector& getDisjointigPaths() const 28 | {return _disjointigPaths;} 29 | 30 | private: 31 | struct ExtensionInfo 32 | { 33 | ExtensionInfo(): leftTip(false), rightTip(false), 34 | numSuspicious(0), meanOverlaps(0), stepsToTurn(0), 35 | assembledLength(0), singleton(false), 36 | avgOverlapSize(0), minOverlapSize(0), 37 | //leftAsmOverlap(0), rightAsmOverlap(0), 38 | shortExtensions(0) {} 39 | 40 | std::vector reads; 41 | bool leftTip; 42 | bool rightTip; 43 | int numSuspicious; 44 | int meanOverlaps; 45 | int stepsToTurn; 46 | int assembledLength; 47 | bool singleton; 48 | int avgOverlapSize; 49 | int minOverlapSize; 50 | //int leftAsmOverlap; 51 | //int rightAsmOverlap; 52 | int shortExtensions; 53 | }; 54 | 55 | const int _safeOverlap; 56 | 57 | ExtensionInfo extendDisjointig(FastaRecord::Id startingRead); 58 | //int countRightExtensions(FastaRecord::Id readId) const; 59 | int countRightExtensions(const std::vector&) const; 60 | int countLeftExtensions(const std::vector&) const; 61 | bool extendsRight(const OverlapRange& ovlp) const; 62 | bool extendsLeft(const OverlapRange& ovlp) const; 63 | void convertToDisjointigs(); 64 | std::vector 65 | getInnerReads(const std::vector& ovlps); 66 | 67 | const SequenceContainer& _readsContainer; 68 | OverlapContainer& _ovlpContainer; 69 | ChimeraDetector _chimDetector; 70 | 71 | std::vector _readLists; 72 | std::vector _disjointigPaths; 73 | cuckoohash_map _innerReads; 74 | }; 75 | -------------------------------------------------------------------------------- /src/assemble/parameters_estimator.cpp: -------------------------------------------------------------------------------- 1 | //(c) 2016 by Authors 2 | //This file is a part of ABruijn program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #include "parameters_estimator.h" 6 | #include "../common/logger.h" 7 | 8 | 9 | size_t ParametersEstimator::genomeSizeEstimate() 10 | { 11 | return _takenKmers; 12 | } 13 | 14 | 15 | void ParametersEstimator::estimateMinKmerCount() 16 | { 17 | const int MIN_CUTOFF = 2; 18 | 19 | size_t takenKmers = 0; 20 | size_t cutoff = 0; 21 | size_t prevDiff = 0; 22 | for (auto mapPair = _vertexIndex.getKmerHist().rbegin(); 23 | mapPair != _vertexIndex.getKmerHist().rend(); ++mapPair) 24 | { 25 | takenKmers += mapPair->second; 26 | if (takenKmers >= _genomeSize) 27 | { 28 | if (std::max(takenKmers, _genomeSize) - 29 | std::min(takenKmers, _genomeSize) < prevDiff) 30 | { 31 | cutoff = mapPair->first; 32 | } 33 | else 34 | { 35 | cutoff = mapPair->first + 1; 36 | takenKmers -= mapPair->second; 37 | } 38 | break; 39 | } 40 | prevDiff = std::max(takenKmers, _genomeSize) - 41 | std::min(takenKmers, _genomeSize); 42 | } 43 | 44 | size_t filteredKmers = 0; 45 | for (auto itKmer : _vertexIndex.getKmerHist()) 46 | { 47 | if (itKmer.first >= cutoff) break; 48 | filteredKmers += itKmer.second; 49 | } 50 | 51 | if (cutoff < 2) 52 | { 53 | if ((bool)Config::get("low_cutoff_warning")) 54 | { 55 | Logger::get().warning() << "Unable to separate erroneous k-mers " 56 | "from solid k-mers. Possible reasons: \n" 57 | "\t(1) Incorrect expected assembly size parameter \n" 58 | "\t(2) Highly uneven coverage of the assembly \n" 59 | "\t(3) Running with error-corrected reads in raw reads mode\n" 60 | "\tAssembly will continue, but results might not be optimal"; 61 | } 62 | cutoff = MIN_CUTOFF; 63 | } 64 | 65 | Logger::get().debug() << "Estimated minimum kmer coverage: " << cutoff; 66 | //Logger::get().debug() << takenKmers << " unique kmers selected"; 67 | Logger::get().debug() << "Filtered " << filteredKmers << " erroneous k-mers"; 68 | 69 | _takenKmers = takenKmers; 70 | _minKmerCount = cutoff; 71 | } 72 | -------------------------------------------------------------------------------- /src/assemble/parameters_estimator.h: -------------------------------------------------------------------------------- 1 | //(c) 2016 by Authors 2 | //This file is a part of ABruijn program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #include "../sequence/vertex_index.h" 6 | #include "../sequence/sequence_container.h" 7 | #include 8 | 9 | class ParametersEstimator 10 | { 11 | public: 12 | ParametersEstimator(const SequenceContainer& seqContainer, 13 | const VertexIndex& vertexIndex, size_t genomeSize): 14 | _vertexIndex(vertexIndex), 15 | _seqContainer(seqContainer), 16 | _genomeSize(genomeSize), 17 | _minKmerCount(std::numeric_limits::max()) 18 | {} 19 | 20 | void estimateMinKmerCount(); 21 | size_t genomeSizeEstimate(); 22 | size_t minKmerCount() {return _minKmerCount;} 23 | private: 24 | 25 | const VertexIndex& _vertexIndex; 26 | const SequenceContainer& _seqContainer; 27 | const size_t _genomeSize; 28 | size_t _takenKmers; 29 | size_t _minKmerCount; 30 | }; 31 | -------------------------------------------------------------------------------- /src/common/disjoint_set.h: -------------------------------------------------------------------------------- 1 | //(c) 2013-2014 by Authors 2 | //This file is a part of Ragout program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #include 6 | #include 7 | 8 | template 9 | struct SetNode 10 | { 11 | SetNode(const T& data): parent(this), rank(0), data(data) {} 12 | 13 | SetNode* parent; 14 | int rank; 15 | T data; 16 | }; 17 | 18 | template 19 | SetNode* findSet(SetNode* elem) 20 | { 21 | if (elem->parent != elem) 22 | { 23 | elem->parent = findSet(elem->parent); 24 | return elem->parent; 25 | } 26 | else 27 | { 28 | return elem; 29 | } 30 | } 31 | 32 | template 33 | void unionSet(SetNode* node1, SetNode* node2) 34 | { 35 | SetNode* root1 = findSet(node1); 36 | SetNode* root2 = findSet(node2); 37 | if (root1 == root2) return; 38 | 39 | if (root1->rank > root2->rank) 40 | { 41 | root2->parent = root1; 42 | } 43 | else 44 | { 45 | root1->parent = root2; 46 | if (root1->rank == root2->rank) 47 | { 48 | ++root2->rank; 49 | } 50 | } 51 | } 52 | 53 | template 54 | std::unordered_map*, std::vector> 55 | groupBySet(const std::vector*>& sets) 56 | { 57 | std::unordered_map*, std::vector> groups; 58 | for (auto& setNode : sets) 59 | { 60 | groups[findSet(setNode)].push_back(setNode->data); 61 | } 62 | return groups; 63 | } 64 | 65 | //Vector that stores the set nodes and automatically deletes 66 | //them in the end. Does not have virtual table - do not use polymorphism! 67 | //All set elements should be pushed before any set operations are 68 | //applied (like union) - do not modify the vector afterwards 69 | template 70 | class SetVec : public std::vector*> 71 | { 72 | public: 73 | ~SetVec() 74 | { 75 | for (auto& x : *this) delete x; 76 | } 77 | }; 78 | 79 | -------------------------------------------------------------------------------- /src/common/logger.h: -------------------------------------------------------------------------------- 1 | //(c) 2013-2016 by Authors 2 | //This file is a part of Ragout program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | class Logger 13 | { 14 | public: 15 | static Logger& get() 16 | { 17 | static Logger instance; 18 | return instance; 19 | } 20 | 21 | void setOutputFile(const std::string& filename) 22 | { 23 | _logFile.open(filename, std::ofstream::out | std::ofstream::app); 24 | _logFileSet = true; 25 | if (!_logFile.is_open()) 26 | { 27 | throw std::runtime_error("Can't open log file"); 28 | } 29 | } 30 | 31 | void setDebugging(bool debug) {_debug = debug;} 32 | 33 | class StreamWriter 34 | { 35 | public: 36 | StreamWriter(const std::string& level, 37 | std::ostream* consoleStream = nullptr, 38 | std::ostream* fileStream = nullptr): 39 | _fileStream(fileStream), _consoleStream(consoleStream) 40 | { 41 | if (_fileStream) *_fileStream << timestamp() << " " << level << " "; 42 | if (_consoleStream) *_consoleStream << timestamp() 43 | << " " << level << " "; 44 | } 45 | ~StreamWriter() 46 | { 47 | if (_fileStream) *_fileStream << std::endl; 48 | if (_consoleStream) *_consoleStream << std::endl; 49 | } 50 | 51 | template 52 | Logger::StreamWriter& operator<< (const T& val) 53 | { 54 | if (_fileStream) *_fileStream << val; 55 | if (_consoleStream) *_consoleStream << val; 56 | return *this; 57 | } 58 | 59 | private: 60 | std::ostream* _fileStream; 61 | std::ostream* _consoleStream; 62 | }; 63 | 64 | StreamWriter info() 65 | { 66 | std::ostream* logPtr = _logFileSet ? &_logFile : nullptr; 67 | return StreamWriter("INFO:", &std::cerr, logPtr); 68 | } 69 | 70 | StreamWriter warning() 71 | { 72 | std::ostream* logPtr = _logFileSet ? &_logFile : nullptr; 73 | return StreamWriter("WARNING:", &std::cerr, logPtr); 74 | } 75 | 76 | StreamWriter error() 77 | { 78 | std::ostream* logPtr = _logFileSet ? &_logFile : nullptr; 79 | return StreamWriter("ERROR:", &std::cerr, logPtr); 80 | } 81 | 82 | StreamWriter debug() 83 | { 84 | std::ostream* logPtr = _logFileSet ? &_logFile : nullptr; 85 | std::ostream* consolePtr = _debug ? &std::cerr : nullptr; 86 | return StreamWriter("DEBUG:", consolePtr, logPtr); 87 | } 88 | 89 | private: 90 | static std::string timestamp(const char* format = "[%Y-%m-%d %H:%M:%S]") 91 | { 92 | std::time_t t = std::time(0); 93 | char cstr[256]; 94 | std::strftime(cstr, sizeof(cstr), format, std::localtime(&t)); 95 | return cstr; 96 | } 97 | 98 | Logger(): 99 | _debug(false), _logFileSet(false) 100 | {} 101 | ~Logger() 102 | { 103 | if (_logFileSet) 104 | { 105 | _logFile << "-----------End assembly log------------\n"; 106 | } 107 | } 108 | 109 | bool _debug; 110 | bool _logFileSet; 111 | std::ofstream _logFile; 112 | }; 113 | -------------------------------------------------------------------------------- /src/common/matrix.h: -------------------------------------------------------------------------------- 1 | //(c) 2016 by Authors 2 | //This file is a part of ABruijn program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | template 11 | class Matrix 12 | { 13 | public: 14 | Matrix(): _rows(0), _cols(0), _data(nullptr) {} 15 | Matrix(const Matrix& other): 16 | Matrix(other._rows, other._cols) 17 | { 18 | for (size_t i = 0; i < _rows; ++i) 19 | for (size_t j = 0; j < _cols; ++j) 20 | this->at(i, j) = other.at(i, j); 21 | } 22 | Matrix(Matrix&& other): 23 | Matrix() 24 | { 25 | std::swap(_cols, other._cols); 26 | std::swap(_rows, other._rows); 27 | std::swap(_data, other._data); 28 | } 29 | Matrix& operator=(Matrix && other) 30 | { 31 | std::swap(_cols, other._cols); 32 | std::swap(_rows, other._rows); 33 | std::swap(_data, other._data); 34 | return *this; 35 | } 36 | Matrix& operator=(const Matrix& other) 37 | { 38 | Matrix temp(other); 39 | std::swap(_cols, temp._cols); 40 | std::swap(_rows, temp._rows); 41 | std::swap(_data, temp._data); 42 | return *this; 43 | } 44 | 45 | Matrix(size_t rows, size_t cols, T val = 0): 46 | _rows(rows), _cols(cols) 47 | { 48 | if (!rows || !cols) 49 | throw std::runtime_error("Zero matrix dimension"); 50 | _data = new T[rows * cols]; 51 | for (size_t i = 0; i < rows * cols; ++i) _data[i] = val; 52 | } 53 | ~Matrix() 54 | { 55 | if (_data) delete[] _data; 56 | } 57 | 58 | T& at(size_t row, size_t col) {return _data[row * _cols + col];} 59 | const T& at(size_t row, size_t col) const {return _data[row * _cols + col];} 60 | size_t nrows() const {return _rows;} 61 | size_t ncols() const {return _cols;} 62 | 63 | private: 64 | size_t _rows; 65 | size_t _cols; 66 | T* _data; 67 | }; 68 | -------------------------------------------------------------------------------- /src/common/parallel.h: -------------------------------------------------------------------------------- 1 | //(c) 2016 by Authors 2 | //This file is a part of ABruijn program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "progress_bar.h" 11 | 12 | //simple thread pool implementation 13 | //updateFun should be thread-safe! 14 | template 15 | void processInParallel(const std::vector& scheduledTasks, 16 | std::function updateFun, 17 | size_t maxThreads, bool progressBar) 18 | { 19 | if (scheduledTasks.empty()) return; 20 | 21 | std::atomic jobId(0); 22 | ProgressPercent progress(scheduledTasks.size()); 23 | if (progressBar) progress.advance(0); 24 | 25 | auto threadWorker = [&jobId, &scheduledTasks, &updateFun, 26 | &progress, progressBar]() 27 | { 28 | while (true) 29 | { 30 | size_t expected = 0; 31 | while(true) 32 | { 33 | expected = jobId; 34 | if (jobId == scheduledTasks.size()) 35 | { 36 | return; 37 | } 38 | if (jobId.compare_exchange_weak(expected, expected + 1)) 39 | { 40 | break; 41 | } 42 | } 43 | updateFun(scheduledTasks[expected]); 44 | if (progressBar) progress.advance(); 45 | } 46 | }; 47 | 48 | std::vector threads(std::min(maxThreads, 49 | scheduledTasks.size())); 50 | for (size_t i = 0; i < threads.size(); ++i) 51 | { 52 | threads[i] = std::thread(threadWorker); 53 | } 54 | for (size_t i = 0; i < threads.size(); ++i) 55 | { 56 | threads[i].join(); 57 | } 58 | } 59 | 60 | -------------------------------------------------------------------------------- /src/common/progress_bar.h: -------------------------------------------------------------------------------- 1 | //(c) 2013-2016 by Authors 2 | //This file is a part of Ragout program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | class ProgressPercent 11 | { 12 | public: 13 | ProgressPercent(size_t finalCount = 0): 14 | _finalCount(finalCount), _curCount(0), _prevPercent(-1), 15 | _stopped(false) 16 | {} 17 | 18 | void setFinalCount(size_t finalCount) {_finalCount = finalCount;} 19 | void setValue(size_t value) 20 | { 21 | this->advance(value - _curCount); 22 | } 23 | void setDone() 24 | { 25 | this->setValue(_finalCount); 26 | } 27 | void advance(size_t step = 1) 28 | { 29 | if (_stopped) return; 30 | 31 | _curCount += step; 32 | int percent = 10UL * _curCount / _finalCount; 33 | 34 | if (percent > _prevPercent) 35 | { 36 | int expected = _prevPercent; 37 | if (_prevPercent.compare_exchange_weak(expected, percent)) 38 | { 39 | std::cerr << percent * 10 << "% "; 40 | if (percent >= 10) 41 | { 42 | std::cerr << std::endl; 43 | _stopped = true; 44 | } 45 | } 46 | } 47 | } 48 | 49 | private: 50 | size_t _finalCount; 51 | std::atomic _curCount; 52 | std::atomic _prevPercent; 53 | bool _stopped; 54 | }; 55 | -------------------------------------------------------------------------------- /src/common/utils.h: -------------------------------------------------------------------------------- 1 | //(c) 2016 by Authors 2 | //This file is a part of ABruijn program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "logger.h" 13 | 14 | template 15 | void vecRemove(std::vector& v, T val) 16 | { 17 | v.erase(std::remove(v.begin(), v.end(), val), v.end()); 18 | } 19 | 20 | struct pairhash 21 | { 22 | public: 23 | template 24 | std::size_t operator()(const std::pair &x) const 25 | { 26 | return std::hash()(x.first) ^ std::hash()(x.second); 27 | } 28 | }; 29 | 30 | 31 | template 32 | T quantile(const std::vector& vec, int percent) 33 | { 34 | if (vec.empty()) return 0; 35 | //NOTE: there's a bug in libstdc++ nth_element, 36 | //that sometimes leads to a segfault. This is why 37 | //we have this inefficient impleemntation here 38 | //std::nth_element(vec.begin(), vec.begin() + vec.size() / 2, 39 | // vec.end()); 40 | auto sortedVec = vec; 41 | std::sort(sortedVec.begin(), sortedVec.end()); 42 | size_t targetId = std::min(vec.size() * (size_t)percent / 100, 43 | vec.size() - 1); 44 | return sortedVec[targetId]; 45 | } 46 | 47 | template 48 | T median(const std::vector& vec) 49 | { 50 | return quantile(vec, 50); 51 | } 52 | 53 | inline std::vector 54 | splitString(const std::string &s, char delim) 55 | { 56 | std::vector elems; 57 | std::stringstream ss(s); 58 | std::string item; 59 | while (std::getline(ss, item, delim)) elems.push_back(item); 60 | return elems; 61 | } 62 | 63 | inline bool fileExists(const std::string& path) 64 | { 65 | std::ifstream fin(path); 66 | return fin.good(); 67 | } 68 | 69 | inline void segfaultHandler(int signal __attribute__((unused))) 70 | { 71 | void *stackArray[20]; 72 | size_t size = backtrace(stackArray, 10); 73 | Logger::get().error() << "Segmentation fault! Backtrace:"; 74 | char** backtrace = backtrace_symbols(stackArray, size); 75 | for (size_t i = 0; i < size; ++i) 76 | { 77 | Logger::get().error() << "\t" << backtrace[i]; 78 | } 79 | abort(); 80 | } 81 | 82 | inline void exceptionHandler() 83 | { 84 | static bool triedThrow = false; 85 | try 86 | { 87 | if (!triedThrow) 88 | { 89 | triedThrow = true; 90 | throw; 91 | } 92 | } 93 | catch (const std::exception &e) 94 | { 95 | Logger::get().error() << "Caught unhandled exception: " << e.what(); 96 | } 97 | catch (...) {} 98 | 99 | void *stackArray[20]; 100 | size_t size = backtrace(stackArray, 10); 101 | char** backtrace = backtrace_symbols(stackArray, size); 102 | for (size_t i = 0; i < size; ++i) 103 | { 104 | Logger::get().error() << "\t" << backtrace[i]; 105 | } 106 | abort(); 107 | } 108 | 109 | -------------------------------------------------------------------------------- /src/contigger/contig_extender.h: -------------------------------------------------------------------------------- 1 | //(c) 2016-2017 by Authors 2 | //This file is a part of ABruijn program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #pragma once 6 | 7 | #include "../repeat_graph/repeat_graph.h" 8 | #include "../repeat_graph/read_aligner.h" 9 | #include "../repeat_graph/graph_processing.h" 10 | 11 | 12 | 13 | class ContigExtender 14 | { 15 | public: 16 | ContigExtender(RepeatGraph& graph, const ReadAligner& aligner, 17 | const SequenceContainer& asmSeqs, 18 | const SequenceContainer& readSeqs): 19 | _graph(graph), _aligner(aligner), 20 | _asmSeqs(asmSeqs), _readSeqs(readSeqs) {} 21 | 22 | void generateUnbranchingPaths(); 23 | void generateContigs(); 24 | void outputContigs(const std::string& filename); 25 | void outputStatsTable(const std::string& filename); 26 | void outputScaffoldConnections(const std::string& filename); 27 | void appendGfaPaths(const std::string& filename); 28 | //std::vector getContigPaths(); 29 | 30 | const std::vector& getUnbranchingPaths() 31 | {return _unbranchingPaths;} 32 | private: 33 | struct Contig 34 | { 35 | Contig(const UnbranchingPath& corePath): 36 | graphEdges(corePath), graphPaths({&corePath}) 37 | {} 38 | 39 | UnbranchingPath graphEdges; 40 | std::vector graphPaths; 41 | DnaSequence sequence; 42 | }; 43 | struct Scaffold 44 | { 45 | GraphEdge* leftContig; 46 | GraphEdge* rightContig; 47 | std::unordered_set repetitiveEdges; 48 | }; 49 | struct UpathAlignment 50 | { 51 | GraphAlignment aln; 52 | UnbranchingPath* upath; 53 | }; 54 | 55 | std::vector asUpaths(const GraphPath& path); 56 | std::vector asUpathAlignment(const GraphAlignment& aln); 57 | 58 | std::vector _unbranchingPaths; 59 | std::unordered_map _edgeToPath; 60 | std::vector _contigs; 61 | 62 | RepeatGraph& _graph; 63 | const ReadAligner& _aligner; 64 | const SequenceContainer& _asmSeqs; 65 | const SequenceContainer& _readSeqs; 66 | }; 67 | -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | //(c) 2020 by Authors 2 | //This file is a part of the Flye package. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #include 6 | #include 7 | 8 | int assemble_main(int argc, char** argv); 9 | int repeat_main(int argc, char** argv); 10 | int contigger_main(int argc, char** argv); 11 | int polisher_main(int argc, char** argv); 12 | 13 | int main(int argc, char** argv) 14 | { 15 | if (argc < 2) 16 | { 17 | std::cerr << "Usage: flye-modules [assemble | repeat | contigger | polisher] ..." 18 | << std::endl; 19 | return 1; 20 | } 21 | std::string module = argv[1]; 22 | if (module == "assemble") 23 | { 24 | return assemble_main(argc - 1, argv + 1); 25 | } 26 | else if (module == "repeat") 27 | { 28 | return repeat_main(argc - 1, argv + 1); 29 | } 30 | else if (module == "contigger") 31 | { 32 | return contigger_main(argc - 1, argv + 1); 33 | } 34 | else if (module == "polisher") 35 | { 36 | return polisher_main(argc - 1, argv + 1); 37 | } 38 | else 39 | { 40 | std::cerr << "Usage: flye-modules [assemble | repeat | contigger | polisher] ..." 41 | << std::endl; 42 | return 1; 43 | } 44 | 45 | return 0; 46 | } 47 | -------------------------------------------------------------------------------- /src/polishing/alignment.h: -------------------------------------------------------------------------------- 1 | //(c) 2016 by Authors 2 | //This file is a part of ABruijn program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "../common/matrix.h" 14 | #include "subs_matrix.h" 15 | 16 | 17 | class Alignment 18 | { 19 | 20 | public: 21 | Alignment(size_t size, const SubstitutionMatrix& sm); 22 | 23 | typedef Matrix ScoreMatrix; 24 | 25 | AlnScoreType globalAlignment(const std::string& consensus, 26 | const std::vector& reads); 27 | 28 | AlnScoreType addDeletion(unsigned int letterIndex) const; 29 | AlnScoreType addSubstitution(unsigned int letterIndex, 30 | char base, const std::vector& reads) const; 31 | AlnScoreType addInsertion(unsigned int positionIndex, 32 | char base, const std::vector& reads) const; 33 | 34 | private: 35 | std::vector _forwardScores; 36 | std::vector _reverseScores; 37 | const SubstitutionMatrix& _subsMatrix; 38 | 39 | AlnScoreType getScoringMatrix(const std::string& v, const std::string& w, 40 | ScoreMatrix& scoreMat); 41 | }; 42 | -------------------------------------------------------------------------------- /src/polishing/bubble.h: -------------------------------------------------------------------------------- 1 | //(c) 2016 by Authors 2 | //This file is a part of ABruijn program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | #include "subs_matrix.h" 11 | 12 | struct StepInfo 13 | { 14 | std::string sequence; 15 | AlnScoreType score; 16 | 17 | StepInfo(): score(0.0f) {} 18 | }; 19 | 20 | struct Bubble 21 | { 22 | std::string header; 23 | int position; 24 | int subPosition; 25 | 26 | std::string candidate; 27 | std::vector branches; 28 | std::vector polishSteps; 29 | }; 30 | -------------------------------------------------------------------------------- /src/polishing/bubble_processor.h: -------------------------------------------------------------------------------- 1 | //(c) 2016 by Authors 2 | //This file is a part of ABruijn program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "subs_matrix.h" 14 | #include "bubble.h" 15 | #include "general_polisher.h" 16 | #include "homo_polisher.h" 17 | #include "utility.h" 18 | #include "../common/progress_bar.h" 19 | #include "dinucleotide_fixer.h" 20 | 21 | 22 | class BubbleProcessor 23 | { 24 | public: 25 | BubbleProcessor(const std::string& subsMatPath, 26 | const std::string& hopoMatrixPath, 27 | bool showProgress, bool hopoEndabled); 28 | void polishAll(const std::string& inBubbles, const std::string& outConsensus, 29 | int numThreads); 30 | void enableVerboseOutput(const std::string& filename); 31 | 32 | private: 33 | void parallelWorker(); 34 | void cacheBubbles(int numBubbles); 35 | void writeBubbles(const std::vector& bubbles); 36 | void writeLog(const std::vector& bubbles); 37 | 38 | const int BUBBLES_CACHE = 100; 39 | 40 | bool _verbose; 41 | bool _showProgress; 42 | bool _hopoEnabled; 43 | 44 | const SubstitutionMatrix _subsMatrix; 45 | const HopoMatrix _hopoMatrix; 46 | const GeneralPolisher _generalPolisher; 47 | const HomoPolisher _homoPolisher; 48 | const DinucleotideFixer _dinucFixer; 49 | 50 | ProgressPercent _progress; 51 | std::mutex _stateMutex; 52 | std::vector _cachedBubbles; 53 | 54 | std::ifstream _bubblesFile; 55 | std::ofstream _consensusFile; 56 | std::ofstream _logFile; 57 | }; 58 | -------------------------------------------------------------------------------- /src/polishing/dinucleotide_fixer.cpp: -------------------------------------------------------------------------------- 1 | //(c) 2016 by Authors 2 | //This file is a part of ABruijn program. 3 | //Released under the BSD license (see LICENSE file) 4 | // 5 | #include "dinucleotide_fixer.h" 6 | #include "alignment.h" 7 | 8 | void DinucleotideFixer::fixBubble(Bubble& bubble) const 9 | { 10 | auto likelihood = [this](const std::string& candidate, 11 | const std::vector& branches) 12 | { 13 | Alignment align(branches.size(), _subsMatrix); 14 | AlnScoreType score = align.globalAlignment(candidate, branches); 15 | return score; 16 | }; 17 | 18 | auto runPair = this->getDinucleotideRuns(bubble.candidate); 19 | if (runPair.second < 3) return; 20 | 21 | //try to increase / decrease dinucleotide polimer len and see what happens 22 | std::string increased = bubble.candidate; 23 | increased.insert(runPair.first, bubble.candidate.substr(runPair.first, 2)); 24 | 25 | std::string decreased = bubble.candidate; 26 | decreased.erase(runPair.first, 2); 27 | 28 | AlnScoreType normalScore = likelihood(bubble.candidate, bubble.branches); 29 | AlnScoreType increasedScore = likelihood(increased, bubble.branches); 30 | AlnScoreType decreasedScore = likelihood(decreased, bubble.branches); 31 | 32 | /* 33 | if (increasedScore > normalScore || decreasedScore > normalScore) 34 | { 35 | std::cerr << bubble.candidate << std::endl << increased 36 | << std::endl << decreased << std::endl; 37 | std::cerr << normalScore << " " 38 | << increasedScore << " " << decreasedScore << std::endl << std::endl; 39 | }*/ 40 | 41 | if (increasedScore > normalScore) 42 | { 43 | bubble.candidate = increased; 44 | StepInfo info; 45 | info.sequence = increased; 46 | bubble.polishSteps.push_back(info); 47 | } 48 | else if (decreasedScore > normalScore) 49 | { 50 | bubble.candidate = decreased; 51 | StepInfo info; 52 | info.sequence = decreased; 53 | bubble.polishSteps.push_back(info); 54 | } 55 | } 56 | 57 | std::pair 58 | DinucleotideFixer::getDinucleotideRuns(const std::string& sequence) const 59 | { 60 | int maxRun = 0; 61 | int maxPos = 0; 62 | if (sequence.length() < 2) return {0, 0}; 63 | 64 | for (size_t shift = 0; shift < 2; ++shift) 65 | { 66 | std::string prevDinuc = "--"; 67 | int curRun = 0; 68 | for (size_t pos = 0; pos < (sequence.length() - shift) / 2; ++pos) 69 | { 70 | std::string curDinuc = sequence.substr(pos * 2 + shift, 2); 71 | //std::cerr << curDinuc << " "; 72 | if (curDinuc != prevDinuc) 73 | { 74 | if (curRun > maxRun && prevDinuc[0] != prevDinuc[1]) 75 | { 76 | maxRun = curRun; 77 | maxPos = pos * 2 + shift - curRun * 2; 78 | } 79 | curRun = 1; 80 | prevDinuc = curDinuc; 81 | } 82 | else 83 | { 84 | ++curRun; 85 | } 86 | } 87 | //std::cerr << std::endl; 88 | 89 | } 90 | 91 | /*if (maxRun > 2) 92 | { 93 | std::cerr << sequence << std::endl; 94 | std::cerr << maxPos << " " << maxRun << std::endl; 95 | std::cerr << std::endl; 96 | }*/ 97 | 98 | return {maxPos, maxRun}; 99 | } 100 | -------------------------------------------------------------------------------- /src/polishing/dinucleotide_fixer.h: -------------------------------------------------------------------------------- 1 | //(c) 2016 by Authors 2 | //This file is a part of ABruijn program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #pragma once 6 | 7 | #include "subs_matrix.h" 8 | #include "bubble.h" 9 | 10 | class DinucleotideFixer 11 | { 12 | public: 13 | DinucleotideFixer(const SubstitutionMatrix& subsMatrix): 14 | _subsMatrix(subsMatrix) 15 | {} 16 | void fixBubble(Bubble& bubble) const; 17 | 18 | private: 19 | std::pair getDinucleotideRuns(const std::string& sequence) const; 20 | 21 | const SubstitutionMatrix& _subsMatrix; 22 | }; 23 | -------------------------------------------------------------------------------- /src/polishing/general_polisher.h: -------------------------------------------------------------------------------- 1 | //(c) 2016 by Authors 2 | //This file is a part of ABruijn program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #pragma once 6 | 7 | #include "bubble.h" 8 | #include "subs_matrix.h" 9 | #include "alignment.h" 10 | 11 | class GeneralPolisher 12 | { 13 | public: 14 | GeneralPolisher(const SubstitutionMatrix& subsMatrix): 15 | _subsMatrix(subsMatrix) 16 | {} 17 | void polishBubble(Bubble& bubble) const; 18 | 19 | private: 20 | StepInfo makeStep(const std::string& candidate, 21 | const std::vector& branches, 22 | Alignment& align) const; 23 | 24 | const SubstitutionMatrix& _subsMatrix; 25 | }; 26 | -------------------------------------------------------------------------------- /src/polishing/homo_polisher.h: -------------------------------------------------------------------------------- 1 | //(c) 2016 by Authors 2 | //This file is a part of ABruijn program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #pragma once 6 | 7 | #include "subs_matrix.h" 8 | #include "bubble.h" 9 | 10 | class HomoPolisher 11 | { 12 | public: 13 | HomoPolisher(const SubstitutionMatrix& subsMatrix, 14 | const HopoMatrix& hopoMatrix): 15 | _subsMatrix(subsMatrix), _hopoMatrix(hopoMatrix) 16 | {} 17 | void polishBubble(Bubble& bubble) const; 18 | 19 | private: 20 | size_t mostLikelyLen(char nucleotide, 21 | const HopoMatrix::ObsVector& obs) const; 22 | size_t compareTopTwo(char nucleotide, size_t firstChoice, 23 | size_t secondChoice, 24 | const HopoMatrix::ObsVector& observations) const; 25 | AlnScoreType likelihood(HopoMatrix::State state, 26 | const HopoMatrix::ObsVector& observations) const; 27 | 28 | const SubstitutionMatrix& _subsMatrix; 29 | const HopoMatrix& _hopoMatrix; 30 | }; 31 | -------------------------------------------------------------------------------- /src/polishing/subs_matrix.h: -------------------------------------------------------------------------------- 1 | //(c) 2016 by Authors 2 | //This file is a part of ABruijn program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | typedef int64_t AlnScoreType; 16 | 17 | class SubstitutionMatrix 18 | { 19 | public: 20 | SubstitutionMatrix(const std::string& path); 21 | AlnScoreType getScore(char v, char w) const 22 | { 23 | return _matrix[(size_t)v * MAX_CHAR + (size_t)w]; 24 | } 25 | 26 | private: 27 | void loadMatrix(const std::string& path); 28 | void setScore(char v, char w, AlnScoreType score) 29 | { 30 | _matrix[(size_t)v * MAX_CHAR + (size_t)w] = score; 31 | } 32 | 33 | const size_t MAX_CHAR = std::numeric_limits::max(); 34 | std::vector _matrix; 35 | }; 36 | 37 | class HopoMatrix 38 | { 39 | public: 40 | //State represents a homopolymer that is observed in 41 | //reference sequence 42 | struct State 43 | { 44 | State(): 45 | nucl(0), length(0), id(0) 46 | {} 47 | 48 | State(char nucl, uint32_t length); 49 | 50 | State(const std::string& str, size_t start = 0, 51 | size_t end = std::string::npos); 52 | 53 | char nucl; 54 | uint32_t length; 55 | uint32_t id; 56 | }; 57 | //Observation represents the read segment that corresponds 58 | //to a homopolymer in the reference (State). Might not be 59 | //a homopolymer, e.g. contain some other nucleotides, like 60 | //5A2X 61 | struct Observation 62 | { 63 | Observation(uint32_t id, bool extactMatch = false): 64 | id(id), extactMatch(extactMatch) 65 | {} 66 | uint32_t id; 67 | bool extactMatch; 68 | }; 69 | typedef std::vector ObsVector; 70 | 71 | HopoMatrix(const std::string& fileName, bool hopoEnabled); 72 | AlnScoreType getObsProb(State state, Observation observ) const 73 | {return _observationProbs[state.id][observ.id];} 74 | AlnScoreType getGenomeProb(State state) const 75 | {return _genomeProbs[state.id];} 76 | ObsVector knownObservations(State state) const; 77 | static Observation strToObs(char mainNucl, const std::string& dnaStr, 78 | size_t start = 0, 79 | size_t end = std::string::npos); 80 | 81 | //static std::string obsToStr(Observation obs); 82 | private: 83 | void loadMatrix(const std::string& filaName); 84 | 85 | std::vector> _observationProbs; 86 | std::vector _genomeProbs; 87 | }; 88 | -------------------------------------------------------------------------------- /src/polishing/utility.h: -------------------------------------------------------------------------------- 1 | //(c) 2013-2016 by Authors 2 | //This file is a part of Ragout program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | 15 | inline std::vector 16 | splitString(const std::string &s, char delim) 17 | { 18 | std::vector elems; 19 | std::stringstream ss(s); 20 | std::string item; 21 | while (std::getline(ss, item, delim)) elems.push_back(item); 22 | return elems; 23 | } 24 | -------------------------------------------------------------------------------- /src/repeat_graph/graph_processing.h: -------------------------------------------------------------------------------- 1 | //(c) 2016 by Authors 2 | //This file is a part of ABruijn program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | //This module provides a few repeat graph processing functions 6 | //that do no require reads alignment (e.g. only graph sctructure is used) 7 | 8 | #pragma once 9 | 10 | #include "repeat_graph.h" 11 | #include "repeat_resolver.h" 12 | 13 | //Represents an unbranching path in the graph. 14 | //Used to represent contigs and more 15 | struct UnbranchingPath 16 | { 17 | UnbranchingPath(const GraphPath& path, 18 | FastaRecord::Id id = FastaRecord::ID_NONE, 19 | bool circular = false, int length = 0, int meanCoverage = 0, 20 | const std::string& prefix = "path_"): 21 | path(path), id(id), circular(circular), repetitive(false), 22 | length(length), meanCoverage(meanCoverage), prefix(prefix) {} 23 | 24 | std::string name() const 25 | { 26 | return prefix + std::to_string(id.signedId()); 27 | } 28 | 29 | std::string nameUnsigned() const 30 | { 31 | std::string idTag = id.strand() ? std::to_string(id.signedId()) : 32 | std::to_string(id.rc().signedId()); 33 | return prefix + idTag; 34 | } 35 | 36 | std::string edgesStr() const 37 | { 38 | if (path.empty()) return ""; 39 | 40 | std::string contentsStr; 41 | for (auto& edge : path) 42 | { 43 | contentsStr += edge->edgeDescr() + " -> "; 44 | } 45 | 46 | contentsStr.erase(contentsStr.size() - 4); 47 | return contentsStr; 48 | } 49 | 50 | std::string edgesStrLong() const 51 | { 52 | if (path.empty()) return ""; 53 | 54 | std::string contentsStr; 55 | for (auto& edge : path) 56 | { 57 | contentsStr += edge->edgeDescrLong() + " -> "; 58 | } 59 | 60 | contentsStr.erase(contentsStr.size() - 4); 61 | return contentsStr; 62 | } 63 | 64 | bool isLooped() const 65 | { 66 | return path.front()->nodeLeft == path.back()->nodeRight; 67 | } 68 | 69 | GraphNode*& nodeLeft() 70 | { 71 | return path.front()->nodeLeft; 72 | } 73 | 74 | GraphNode*& nodeRight() 75 | { 76 | return path.back()->nodeRight; 77 | } 78 | 79 | GraphPath path; 80 | FastaRecord::Id id; 81 | bool circular; 82 | bool repetitive; 83 | int32_t length; 84 | int32_t meanCoverage; 85 | std::string prefix; 86 | }; 87 | 88 | //A class for basic repeat graph processing 89 | //Condencing edges, collapsing bulges, trimming tips etc. 90 | class GraphProcessor 91 | { 92 | public: 93 | GraphProcessor(RepeatGraph& graph, const SequenceContainer& asmSeqs): 94 | _graph(graph), _asmSeqs(asmSeqs) {} 95 | 96 | void fixChimericJunctions(); 97 | //void trimTips(); 98 | std::vector getUnbranchingPaths() const; 99 | std::vector getEdgesPaths() const; 100 | 101 | private: 102 | //used during repeat graph construction only 103 | friend class RepeatGraph; 104 | void simplify(); 105 | int condenceEdges(); 106 | int collapseBulges(); 107 | // 108 | 109 | RepeatGraph& _graph; 110 | const SequenceContainer& _asmSeqs; 111 | }; 112 | -------------------------------------------------------------------------------- /src/repeat_graph/haplotype_resolver.h: -------------------------------------------------------------------------------- 1 | //(c) 2016-2019 by Authors 2 | //This file is a part of the Flye program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #pragma once 6 | 7 | #include "repeat_graph.h" 8 | #include "read_aligner.h" 9 | 10 | 11 | 12 | 13 | class HaplotypeResolver 14 | { 15 | public: 16 | HaplotypeResolver(RepeatGraph& graph, ReadAligner& aligner, 17 | const SequenceContainer& asmSeqs, 18 | const SequenceContainer& readSeqs): 19 | _graph(graph), _aligner(aligner), _asmSeqs(asmSeqs), 20 | _readSeqs(readSeqs), _nextAltGroupId(2) {} 21 | 22 | void resetEdges(); 23 | int findHeterozygousLoops(); 24 | int findHeterozygousBulges(); 25 | int findRoundabouts(); 26 | int findSuperbubbles(); 27 | void collapseHaplotypes(); 28 | 29 | private: 30 | DnaSequence pathSequence(GraphPath& path); 31 | void separeteAdjacentEdges(GraphEdge* inEdge, GraphEdge* outEdge); 32 | void separateDistantEdges(GraphEdge* inEdge, GraphEdge* outEdge, 33 | EdgeSequence insSequence, FastaRecord::Id newId); 34 | 35 | struct PathWithScore 36 | { 37 | GraphAlignment path; 38 | int score; 39 | }; 40 | 41 | struct VariantPaths 42 | { 43 | VariantPaths(): startEdge(nullptr), endEdge(nullptr) {} 44 | GraphEdge* startEdge; 45 | GraphEdge* endEdge; 46 | std::vector altPaths; 47 | DnaSequence bridgingSequence; 48 | }; 49 | 50 | VariantPaths findVariantSegment(GraphEdge* startEdge, 51 | const std::vector& alnignments, 52 | const std::unordered_set& loopedEdges); 53 | 54 | RepeatGraph& _graph; 55 | ReadAligner& _aligner; 56 | const SequenceContainer& _asmSeqs; 57 | const SequenceContainer& _readSeqs; 58 | 59 | std::unordered_map, 60 | DnaSequence, pairhash> _bridgingSeqs; 61 | int _nextAltGroupId; 62 | }; 63 | -------------------------------------------------------------------------------- /src/repeat_graph/multiplicity_inferer.h: -------------------------------------------------------------------------------- 1 | //(c) 2016 by Authors 2 | //This file is a part of ABruijn program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #pragma once 6 | 7 | #include "repeat_graph.h" 8 | #include "read_aligner.h" 9 | 10 | //A simple class that assigns edges multiplicity based on the coverage 11 | //and copmutes the mean coverage of all edges 12 | class MultiplicityInferer 13 | { 14 | public: 15 | MultiplicityInferer(RepeatGraph& graph, ReadAligner& aligner, 16 | const SequenceContainer& asmSeqs): 17 | _graph(graph), _aligner(aligner), _asmSeqs(asmSeqs), 18 | _uniqueCovThreshold(0), _meanCoverage(0) {} 19 | 20 | //coverage-related 21 | void estimateCoverage(); 22 | int getMeanCoverage() const {return _meanCoverage;} 23 | int getUniqueCovThreshold() const {return _uniqueCovThreshold;} 24 | 25 | //various simplifications 26 | //int maskUnsupportedEdges(); 27 | int removeUnsupportedEdges(bool onlyTips); 28 | 29 | int removeUnsupportedConnections(); 30 | int splitNodes(); 31 | int disconnectMinorPaths(); 32 | int resolveForks(); 33 | 34 | int trimTips() 35 | { 36 | const int MAX_ITER = 5; 37 | int totalShort = 0; 38 | int totalLong = 0; 39 | int numIter = 0; 40 | for (;numIter < MAX_ITER; ++numIter) 41 | { 42 | int iterShort = 0; 43 | int iterLong = 0; 44 | this->trimTipsIteration(iterShort, iterLong); 45 | totalShort += iterShort; 46 | totalLong += iterLong; 47 | if (iterShort + iterLong == 0) break; 48 | } 49 | 50 | _aligner.updateAlignments(); 51 | Logger::get().debug() << "[SIMPL] Clipped " << totalShort 52 | << " short and " << totalLong << " long tips"; 53 | //Logger::get().debug() << "Iterations: " << numIter; 54 | return totalShort + totalLong; 55 | } 56 | 57 | 58 | private: 59 | void trimTipsIteration(int& outShort, int& outLong); 60 | 61 | RepeatGraph& _graph; 62 | ReadAligner& _aligner; 63 | const SequenceContainer& _asmSeqs; 64 | //const SequenceContainer& _readSeqs; 65 | int _uniqueCovThreshold; 66 | int _meanCoverage; 67 | }; 68 | -------------------------------------------------------------------------------- /src/repeat_graph/output_generator.h: -------------------------------------------------------------------------------- 1 | //(c) 2016 by Authors 2 | //This file is a part of ABruijn program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | //This module generates multiple types of output given the graph: 6 | //it output edges sequences in FASTA format, the graph structure 7 | //as dot or gfa. Also, output information about the unresolved 8 | //repeats for the subsequent alasysis 9 | 10 | 11 | #pragma once 12 | 13 | #include "repeat_graph.h" 14 | #include "graph_processing.h" 15 | 16 | class OutputGenerator 17 | { 18 | public: 19 | OutputGenerator(RepeatGraph& graph, const ReadAligner& aln): 20 | _graph(graph), _aligner(aln) {} 21 | 22 | void outputDot(const std::vector& paths, 23 | const std::string& filename); 24 | void outputGfa(const std::vector& paths, 25 | const std::string& filename); 26 | //void outputGfaCompact(const std::vector& paths, 27 | // const std::string& filename); 28 | void outputFasta(const std::vector& paths, 29 | const std::string& filename); 30 | std::vector 31 | generatePathSequences(const std::vector& paths) const; 32 | private: 33 | 34 | RepeatGraph& _graph; 35 | const ReadAligner& _aligner; 36 | //const SequenceContainer& _asmSeqs; 37 | //const SequenceContainer& _readSeqs; 38 | }; 39 | -------------------------------------------------------------------------------- /src/repeat_graph/read_aligner.h: -------------------------------------------------------------------------------- 1 | //(c) 2016 by Authors 2 | //This file is a part of ABruijn program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | 6 | //Aligns reads to the graph, also updates alignments 7 | //if the graph changes 8 | 9 | #pragma once 10 | 11 | #include "repeat_graph.h" 12 | 13 | struct EdgeAlignment 14 | { 15 | OverlapRange overlap; 16 | GraphEdge* edge; 17 | //EdgeSequence segment; 18 | }; 19 | typedef std::vector GraphAlignment; 20 | 21 | class ReadAligner 22 | { 23 | public: 24 | ReadAligner(RepeatGraph& graph, const SequenceContainer& readSeqs): 25 | _graph(graph), _readSeqs(readSeqs) {} 26 | 27 | void alignReads(); 28 | void updateAlignments(); 29 | const std::vector& getAlignments() const 30 | {return _readAlignments;} 31 | 32 | void storeAlignments(const std::string& filename); 33 | void loadAlignments(const std::string& filename); 34 | 35 | typedef std::unordered_map> AlnIndex; 37 | AlnIndex makeAlignmentIndex(); 38 | 39 | typedef std::unordered_map> ConnIndex; 41 | ConnIndex getEdgeConnectivity() const; 42 | 43 | private: 44 | std::vector 45 | chainReadAlignments(const std::vector& ovlps) const; 46 | 47 | float getChainBaseDivergence(const GraphAlignment& aln, bool realign); 48 | 49 | std::vector _readAlignments; 50 | 51 | RepeatGraph& _graph; 52 | //const SequenceContainer& _asmSeqs; 53 | const SequenceContainer& _readSeqs; 54 | }; 55 | -------------------------------------------------------------------------------- /src/repeat_graph/repeat_resolver.h: -------------------------------------------------------------------------------- 1 | //(c) 2016 by Authors 2 | //This file is a part of ABruijn program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | //Does the "conventional" resolution of bridged repeats. 6 | //Also, classifies edges into unique and repetitive based 7 | //on read alignment 8 | 9 | #pragma once 10 | 11 | #include "repeat_graph.h" 12 | #include "read_aligner.h" 13 | #include "multiplicity_inferer.h" 14 | 15 | class RepeatResolver 16 | { 17 | public: 18 | RepeatResolver(RepeatGraph& graph, const SequenceContainer& asmSeqs, 19 | const SequenceContainer& readSeqs, 20 | ReadAligner& aligner, 21 | MultiplicityInferer& multInf): 22 | _graph(graph), _asmSeqs(asmSeqs), _readSeqs(readSeqs), 23 | _aligner(aligner), _multInf(multInf) {} 24 | 25 | void findRepeats(); 26 | int resolveRepeats(); 27 | int resolveSimpleRepeats(); 28 | void finalizeGraph(); 29 | 30 | private: 31 | struct ReadSequence 32 | { 33 | FastaRecord::Id readId; 34 | int32_t start; 35 | int32_t end; 36 | 37 | int32_t length() const {return end - start;} 38 | }; 39 | struct Connection 40 | { 41 | GraphPath path; 42 | ReadSequence readSeq; 43 | int32_t flankLength; 44 | }; 45 | 46 | int maskUnsupportedEdges(); 47 | void separatePath(const GraphPath& path, EdgeSequence segment, 48 | FastaRecord::Id startId); 49 | 50 | bool checkByReadExtension(const GraphEdge* edge, 51 | const std::vector& alignments); 52 | bool checkForTandemCopies(const GraphEdge* checkEdge, 53 | const std::vector& alignments); 54 | void clearResolvedRepeats(); 55 | std::vector getConnections(); 56 | int resolveConnections(const std::vector& conns, 57 | float minSupport); 58 | 59 | bool checkPathConsistency(const GraphEdge* checkEdge, GraphEdge* maxConn, 60 | std::unordered_map> visitedEdges, 61 | std::unordered_map> outSpans, 62 | std::vector hangingPaths); 63 | 64 | RepeatGraph& _graph; 65 | const SequenceContainer& _asmSeqs; 66 | const SequenceContainer& _readSeqs; 67 | ReadAligner& _aligner; 68 | MultiplicityInferer& _multInf; 69 | std::unordered_map _substractedCoverage; 70 | }; 71 | -------------------------------------------------------------------------------- /src/sequence/alignment.h: -------------------------------------------------------------------------------- 1 | //(c) 2016-2020 by Authors 2 | //This file is a part of Flye program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #pragma once 6 | 7 | #include "overlap.h" 8 | 9 | 10 | float getAlignmentErrKsw(const OverlapRange& ovlp, 11 | const DnaSequence& trgSeq, 12 | const DnaSequence& qrySeq, 13 | float maxAlnErr); 14 | 15 | float getAlignmentErrEdlib(const OverlapRange& ovlp, 16 | const DnaSequence& trgSeq, 17 | const DnaSequence& qrySeq, 18 | float maxAlnErr, 19 | bool useHpc); 20 | 21 | std::vector 22 | checkIdyAndTrim(OverlapRange& ovlp, const DnaSequence& curSeq, 23 | const DnaSequence& extSeq, float maxDivergence, 24 | int32_t minOverlap, bool useHpc); 25 | 26 | struct CigOp 27 | { 28 | char op; 29 | int len; 30 | }; 31 | 32 | float getAlignmentCigarKsw(const DnaSequence& trgSeq, size_t trgBegin, size_t trgLen, 33 | const DnaSequence& qrySeq, size_t qryBegin, size_t qryLen, 34 | float maxAlnErr, std::vector& cigarOut); 35 | 36 | void decodeCigar(const std::vector& cigar, const DnaSequence& trgSeq, size_t trgBegin, 37 | const DnaSequence& qrySeq, size_t qryBegin, 38 | std::string& outAlnTrg, std::string& outAlnQry); 39 | -------------------------------------------------------------------------------- /src/sequence/consensus_generator.h: -------------------------------------------------------------------------------- 1 | //(c) 2016 by Authors 2 | //This file is a part of ABruijn program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | //Given contigs, represented as list of possibly overlapping sequences 6 | //generate a single consensus sequence for each contig 7 | 8 | #pragma once 9 | 10 | #include 11 | 12 | #include "../sequence/overlap.h" 13 | 14 | 15 | struct ContigPath 16 | { 17 | ContigPath(): trimLeft(0), trimRight(0) {} 18 | 19 | std::string name; 20 | std::vector sequences; 21 | std::vector overlaps; 22 | int32_t trimLeft; 23 | int32_t trimRight; 24 | }; 25 | 26 | class ConsensusGenerator 27 | { 28 | public: 29 | std::vector 30 | generateConsensuses(const std::vector& contigs, 31 | bool verbose = true); 32 | 33 | private: 34 | struct AlignmentInfo 35 | { 36 | std::string alnOne; 37 | std::string alnTwo; 38 | 39 | int32_t startOne; 40 | int32_t startTwo; 41 | }; 42 | typedef std::unordered_map AlignmentsMap; 44 | 45 | FastaRecord generateLinear(const ContigPath& path, 46 | const AlignmentsMap& alnMap); 47 | AlignmentsMap generateAlignments(const std::vector& contigs, 48 | bool verbose); 49 | std::pair getSwitchPositions(const AlignmentInfo& aln, 50 | int32_t prevSwitch); 51 | }; 52 | -------------------------------------------------------------------------------- /src/sequence/sequence.cpp: -------------------------------------------------------------------------------- 1 | #include "sequence.h" 2 | 3 | std::vector DnaSequence::_dnaTable; 4 | DnaSequence::TableFiller DnaSequence::_filler; 5 | --------------------------------------------------------------------------------