├── NEWS ├── COPYING ├── m4 └── keepme.m4 ├── README ├── AUTHORS ├── Makefile.am ├── .gitignore ├── src ├── RepeatIdComparator.hpp ├── TraceType.hpp ├── Links.hpp ├── ActiveElement.hpp ├── HetThreshold.hpp ├── DebruijnGraphBase.hpp ├── Node.hpp ├── Repeat.hpp ├── bamidrename.cpp ├── DebruijnGraphInterface.hpp ├── DotProduct.hpp ├── DebruijnGraphContainer.hpp ├── fillfasta.cpp ├── ComputeOffsetLikely.hpp ├── sortfasta.cpp ├── DecodedReadContainer.hpp ├── OffsetLikely.hpp ├── ChainSet.hpp ├── spikenoise.cpp ├── rlastobam.cpp ├── encodegraph.cpp ├── lasfilteralignments.cpp ├── bamfilterlongest.cpp ├── filtersym.cpp ├── Makefile.am ├── computeintrinsicqv.cpp ├── computeintrinsicqv2.cpp ├── maftobam.cpp ├── marktrue.cpp ├── wmap.cpp ├── filterchains.cpp └── computeextrinsicqv.cpp ├── bumpversion.sh ├── ChangeLog ├── configure.ac └── INSTALL /NEWS: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | GPLv3 -------------------------------------------------------------------------------- /m4/keepme.m4: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | README.md -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | German Tischler 2 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | EXTRA_DIST = configure GPLv3 2 | SUBDIRS = src 3 | ACLOCAL_AMFLAGS=-I m4 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | doconfig.sh 3 | recomp.sh 4 | Makefile 5 | Makefile.in 6 | aclocal.m4 7 | autom4te.cache/ 8 | compile 9 | config.guess 10 | config.h 11 | config.h.in 12 | config.log 13 | config.status 14 | config.sub 15 | configure 16 | depcomp 17 | install-sh 18 | libtool 19 | ltmain.sh 20 | m4/libtool.m4 21 | m4/ltoptions.m4 22 | m4/ltsugar.m4 23 | m4/ltversion.m4 24 | m4/lt~obsolete.m4 25 | missing 26 | src/.deps/ 27 | src/Makefile 28 | src/Makefile.in 29 | src/daccord 30 | src/*.o 31 | stamp-h1 32 | src/computeintrinsicqv 33 | src/lasdetectsimplerepeats 34 | src/lasfilteralignments 35 | src/lasfilteralignmentsborderrepeats 36 | src/maftobam 37 | src/bamidrename 38 | src/generateperfectpiles 39 | src/checklas 40 | src/checkconsensus 41 | src/sortfasta 42 | src/mapconstoraw 43 | src/fillfasta 44 | src/canutolas 45 | data 46 | src/spikenoise 47 | check.sh 48 | prepare.sh 49 | -------------------------------------------------------------------------------- /src/RepeatIdComparator.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | daccord 3 | Copyright (C) 2017 German Tischler 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | */ 18 | #if ! defined(REPEATIDCOMPARATOR_HPP) 19 | #define REPEATIDCOMPARATOR_HPP 20 | 21 | #include 22 | 23 | struct RepeatIdComparator 24 | { 25 | bool operator()(Repeat const & A, Repeat const & B) const 26 | { 27 | return A.id < B.id; 28 | } 29 | }; 30 | #endif 31 | -------------------------------------------------------------------------------- /src/TraceType.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | daccord 3 | Copyright (C) 2017 German Tischler 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | */ 18 | #if ! defined(TRACETYPE_HPP) 19 | #define TRACETYPE_HPP 20 | 21 | #include 22 | 23 | typedef libmaus2::lcs::AlignmentTraceContainer trace_type; 24 | 25 | struct TraceTypeInfo 26 | { 27 | typedef trace_type element_type; 28 | typedef element_type::shared_ptr_type pointer_type; 29 | 30 | static pointer_type getNullPointer() 31 | { 32 | return pointer_type(); 33 | } 34 | 35 | static pointer_type deallocate(pointer_type /* p */) 36 | { 37 | return getNullPointer(); 38 | } 39 | }; 40 | 41 | struct TraceAllocator 42 | { 43 | typedef trace_type element_type; 44 | typedef element_type::shared_ptr_type pointer_type; 45 | 46 | pointer_type operator()() const 47 | { 48 | return pointer_type(new element_type); 49 | } 50 | }; 51 | #endif 52 | -------------------------------------------------------------------------------- /src/Links.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | libmaus2 3 | Copyright (C) 2017 German Tischler 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | */ 18 | #if ! defined(LINKS_HPP) 19 | #define LINKS_HPP 20 | 21 | #include 22 | 23 | struct Links 24 | { 25 | public: 26 | uint64_t A[4]; 27 | uint64_t p; 28 | 29 | public: 30 | void reset() 31 | { 32 | p = 0; 33 | } 34 | 35 | void push(uint64_t const sym, uint64_t const freq) 36 | { 37 | if ( freq ) 38 | A[p++] = (freq << 8) | sym; 39 | } 40 | 41 | void setSize(uint64_t const rp) 42 | { 43 | p = rp; 44 | } 45 | 46 | Links() 47 | { 48 | reset(); 49 | } 50 | 51 | void sort() 52 | { 53 | if ( p <= 1 ) 54 | return; 55 | 56 | std::sort(&A[0],&A[p],std::greater()); 57 | } 58 | 59 | uint64_t size() const 60 | { 61 | return p; 62 | } 63 | 64 | uint64_t getFreq(uint64_t const i) const 65 | { 66 | return A[i] >> 8; 67 | } 68 | 69 | uint64_t getSym(uint64_t const i) const 70 | { 71 | return A[i] & 0xFF; 72 | } 73 | }; 74 | #endif 75 | -------------------------------------------------------------------------------- /src/ActiveElement.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | daccord 3 | Copyright (C) 2017 German Tischler 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | */ 18 | #if ! defined(ACTIVEELEMENT_HPP) 19 | #define ACTIVEELEMENT_HPP 20 | 21 | #include 22 | 23 | /** 24 | * active sequence in multiple alignment 25 | **/ 26 | struct ActiveElement 27 | { 28 | // base data for a sequence 29 | uint8_t const * ua; 30 | // base data for b sequence 31 | uint8_t const * ub; 32 | // current trace pointer between a and b sequence 33 | libmaus2::lcs::AlignmentTraceContainer::step_type const * ta; 34 | libmaus2::lcs::AlignmentTraceContainer::step_type const * te; 35 | // offset on b sequence 36 | uint64_t uboff; 37 | // error rate of alignment 38 | double erate; 39 | 40 | ActiveElement() {} 41 | ActiveElement( 42 | uint8_t const * rua, 43 | uint8_t const * rub, 44 | libmaus2::lcs::AlignmentTraceContainer::step_type const * rta, 45 | libmaus2::lcs::AlignmentTraceContainer::step_type const * rte, 46 | uint64_t const ruboff, 47 | double const rerate 48 | ) : ua(rua), ub(rub), ta(rta), te(rte), uboff(ruboff), erate(rerate) {} 49 | }; 50 | #endif 51 | -------------------------------------------------------------------------------- /src/HetThreshold.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | libmaus2 3 | Copyright (C) 2009-2013 German Tischler 4 | Copyright (C) 2011-2013 Genome Research Limited 5 | 6 | This program is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published by 8 | the Free Software Foundation, either version 3 of the License, or 9 | (at your option) any later version. 10 | 11 | This program is distributed in the hope that it will be useful, 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | GNU General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License 17 | along with this program. If not, see . 18 | */ 19 | #if ! defined(HETTHRESHOLD_HPP) 20 | #define HETTHRESHOLD_HPP 21 | 22 | #include 23 | #include 24 | 25 | struct HetThreshold 26 | { 27 | static uint64_t getHetThreshold(uint64_t const d, double const e, double const thres = 0.995) 28 | { 29 | double const p = 1-e; 30 | std::vector VPO = libmaus2::random::Poisson(d).getVector(1e-8); 31 | 32 | double cs = 1.0; 33 | uint64_t detd = 1; 34 | for ( uint64_t dp = 0; true; ++dp ) 35 | { 36 | double s = 0.0; 37 | 38 | for ( uint64_t i = dp; i < VPO.size(); ++i ) 39 | { 40 | double const c = 41 | VPO[i] * 42 | libmaus2::math::gpow(p,dp) * 43 | libmaus2::math::gpow(1-p,i-dp) * 44 | libmaus2::math::Binom::binomialCoefficientInteger(dp,i); 45 | 46 | s += c; 47 | } 48 | 49 | if ( cs - s < thres ) 50 | { 51 | detd = dp; 52 | break; 53 | } 54 | else 55 | { 56 | cs -= s; 57 | } 58 | } 59 | 60 | return detd; 61 | } 62 | }; 63 | #endif 64 | -------------------------------------------------------------------------------- /src/DebruijnGraphBase.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | libmaus2 3 | Copyright (C) 2017 German Tischler 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | */ 18 | #if ! defined(DEBRUIJNGRAPHBASE_HPP) 19 | #define DEBRUIJNGRAPHBASE_HPP 20 | 21 | #include 22 | #include 23 | 24 | struct DebruijnGraphBase 25 | { 26 | static libmaus2::lcs::Aligner::unique_ptr_type getAligner() 27 | { 28 | std::set const S = libmaus2::lcs::AlignerFactory::getSupportedAligners(); 29 | 30 | libmaus2::lcs::AlignerFactory::aligner_type T[] = { 31 | libmaus2::lcs::AlignerFactory::libmaus2_lcs_AlignerFactory_y256_8, 32 | libmaus2::lcs::AlignerFactory::libmaus2_lcs_AlignerFactory_x128_8, 33 | libmaus2::lcs::AlignerFactory::libmaus2_lcs_AlignerFactory_NP 34 | }; 35 | 36 | for ( uint64_t i = 0; i < sizeof(T)/sizeof(T[0]); ++i ) 37 | if ( S.find(T[i]) != S.end() ) 38 | { 39 | libmaus2::lcs::Aligner::unique_ptr_type tptr(libmaus2::lcs::AlignerFactory::construct(T[i])); 40 | return UNIQUE_PTR_MOVE(tptr); 41 | } 42 | 43 | libmaus2::exception::LibMausException lme; 44 | lme.getStream() << "DebruijnGraph::getAligner: no suitable class found" << std::endl; 45 | lme.finish(); 46 | throw lme; 47 | } 48 | }; 49 | #endif 50 | -------------------------------------------------------------------------------- /bumpversion.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | VERSION=`grep AC_INIT < configure.ac | awk -F',' '{print $2}'` 3 | FIRST=`echo $VERSION | awk -F'.' '{print $1}'` 4 | SECOND=`echo $VERSION | awk -F'.' '{print $2}'` 5 | THIRD=`echo $VERSION | awk -F'.' '{print $3}'` 6 | NEXTTHIRD=`expr ${THIRD} + 1` 7 | export DEBEMAIL=tischler@mpi-cbg.de 8 | export DEBFULLNAME="German Tischler" 9 | 10 | function cleanup 11 | { 12 | if [ ! -z "${COMMITFILE}" ] ; then 13 | if [ -f "${COMMITFILE}" ] ; then 14 | rm -f "${COMMITFILE}" 15 | fi 16 | fi 17 | } 18 | 19 | COMMITFILE=commit_msg_$$.txt 20 | 21 | trap cleanup EXIT SIGINT SIGTERM 22 | 23 | # make sure we have the latest version 24 | git pull 25 | 26 | # create commit log message 27 | joe "${COMMITFILE}" 28 | 29 | if [ ! -s "${COMMITFILE}" ] ; then 30 | echo "Empty commit log, aborting" 31 | exit 1 32 | fi 33 | 34 | # update to next minor version 35 | awk -v first=${FIRST} -v second=${SECOND} -v third=${THIRD} '/^AC_INIT/ {gsub(first"."second"."third,first"."second"."third+1);print} ; !/^AC_INIT/{print}' < configure.ac > configure.ac.tmp 36 | mv configure.ac.tmp configure.ac 37 | 38 | # update change log 39 | CHANGELOG=ChangeLog dch --distribution unstable -v ${FIRST}.${SECOND}.${NEXTTHIRD}-1 40 | 41 | # commit files 42 | git add configure.ac ChangeLog 43 | 44 | git commit -F "${COMMITFILE}" 45 | git push 46 | 47 | TAG=daccord_${FIRST}_${SECOND}_${NEXTTHIRD} 48 | git tag -a ${TAG} -m "daccord version ${FIRST}_${SECOND}_${NEXTTHIRD}" 49 | git push origin ${TAG} 50 | 51 | git checkout master 52 | VERSION=`grep . 17 | */ 18 | #if ! defined(NODE_HPP) 19 | #define NODE_HPP 20 | 21 | struct Node 22 | { 23 | uint64_t v; 24 | uint64_t spo; 25 | uint64_t freq; 26 | uint64_t numsucc; 27 | uint64_t numsuccactive; 28 | uint64_t feaspos; 29 | uint64_t cfeaspos; 30 | uint64_t numfeaspos; 31 | uint64_t numcfeaspos; 32 | uint64_t pfostart; 33 | uint64_t pfosize; 34 | uint64_t cpfostart; 35 | uint64_t cpfosize; 36 | uint64_t plow; 37 | uint64_t phigh; 38 | uint64_t cplow; 39 | uint64_t cphigh; 40 | 41 | Node() {} 42 | Node(uint64_t const rv) : v(rv) {} 43 | Node( 44 | uint64_t const rv, 45 | uint64_t const rspo, 46 | uint64_t const rfreq, 47 | uint64_t const rpfostart, 48 | uint64_t const rpfosize, 49 | uint64_t const rcpfostart, 50 | uint64_t const rcpfosize, 51 | uint64_t const rplow, 52 | uint64_t const rphigh, 53 | uint64_t const rcplow, 54 | uint64_t const rcphigh 55 | ) 56 | : v(rv), spo(rspo), freq(rfreq), numsucc(0), numsuccactive(0), feaspos(0), cfeaspos(0), numfeaspos(0), numcfeaspos(0), 57 | pfostart(rpfostart), pfosize(rpfosize), cpfostart(rcpfostart), cpfosize(rcpfosize), plow(rplow), phigh(rphigh), cplow(rcplow), cphigh(rcphigh) {} 58 | }; 59 | 60 | inline std::ostream & operator<<(std::ostream & out, Node const & N) 61 | { 62 | return out << "Node(v=" << N.v 63 | << ",spo=" << N.spo 64 | << ",freq=" << N.freq 65 | << ",numsucc=" << N.numsucc 66 | << ",numsuccactive=" << N.numsuccactive 67 | << ",feaspos=" << N.feaspos 68 | << ",numfeaspos=" << N.numfeaspos 69 | << ",pfostart=" << N.pfostart 70 | << ",pfosize=" << N.pfosize 71 | << ",plow=" << N.plow 72 | << ",phigh=" << N.phigh 73 | << ")"; 74 | } 75 | #endif 76 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | daccord (0.0.14-1) unstable; urgency=medium 2 | 3 | * add bamfilterlongest and rlastobam 4 | 5 | -- German Tischler Fri, 25 May 2018 10:53:34 +0200 6 | 7 | daccord (0.0.13-1) unstable; urgency=medium 8 | 9 | * check for matching tspace in checklas 10 | * use TempFileNameGenerator with sub directories in generateperfectpiles (avoid lots of files in a single directory) 11 | 12 | -- German Tischler Mon, 11 Sep 2017 21:46:10 +0200 13 | 14 | daccord (0.0.12-1) unstable; urgency=medium 15 | 16 | * add wgsimtobam tool 17 | 18 | -- German Tischler Sat, 08 Jul 2017 17:36:17 +0200 19 | 20 | daccord (0.0.11-1) unstable; urgency=medium 21 | 22 | * Use data in Dazzler database for extracting uncorrected reads instead of reading them from FastA file in computeextrinsicqv. 23 | 24 | -- German Tischler Sat, 08 Jul 2017 00:29:07 +0200 25 | 26 | daccord (0.0.10-1) unstable; urgency=medium 27 | 28 | * add missing header files 29 | 30 | -- German Tischler Fri, 26 May 2017 17:07:08 +0200 31 | 32 | daccord (0.0.9-1) unstable; urgency=medium 33 | 34 | * add programs filterchains, split_agr and split_dis 35 | 36 | -- German Tischler Fri, 26 May 2017 16:46:01 +0200 37 | 38 | daccord (0.0.8-1) unstable; urgency=medium 39 | 40 | * update libmaus2 dependency 41 | * check for GMP support in libmaus2 42 | 43 | -- German Tischler Tue, 23 May 2017 15:38:36 +0200 44 | 45 | daccord (0.0.7-1) unstable; urgency=medium 46 | 47 | * update input filtering in daccord 48 | * refactor 49 | 50 | -- German Tischler Fri, 05 May 2017 10:25:01 +0200 51 | 52 | daccord (0.0.6-1) unstable; urgency=medium 53 | 54 | * avoid race condition while generating eprof file in daccord 55 | 56 | -- German Tischler Tue, 28 Feb 2017 15:41:34 +0100 57 | 58 | daccord (0.0.5-1) unstable; urgency=medium 59 | 60 | * update alignment scoring for input heap 61 | 62 | -- German Tischler Tue, 28 Feb 2017 15:07:33 +0100 63 | 64 | daccord (0.0.4-1) unstable; urgency=medium 65 | 66 | * change name of fasta index file in checkconsensus so it does not collide with other filenames 67 | * enable address sanitizer and stack protection in debug compilation if possible 68 | * add spikenoise program 69 | * fix off by one argument error in computeintrinsicqv 70 | 71 | -- German Tischler Tue, 28 Feb 2017 13:33:55 +0100 72 | 73 | daccord (0.0.3-1) unstable; urgency=medium 74 | 75 | * initial release 76 | 77 | -- German Tischler Mon, 06 Feb 2017 13:24:30 +0100 78 | 79 | daccord (0.0.0-1) unstable; urgency=medium 80 | 81 | * initial 82 | 83 | -- German Tischler Wed, 20 Jan 2017 17:00:00 +0100 84 | -------------------------------------------------------------------------------- /src/Repeat.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | daccord 3 | Copyright (C) 2017 German Tischler 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | */ 18 | #if ! defined(REPEAT_HPP) 19 | #define REPEAT_HPP 20 | 21 | #include 22 | #include 23 | 24 | struct Repeat 25 | { 26 | uint64_t id; 27 | uint64_t abpos; 28 | uint64_t aepos; 29 | 30 | Repeat(uint64_t const rid = 0) : id(rid), abpos(0), aepos(0) 31 | {} 32 | 33 | Repeat(uint64_t const rid, 34 | uint64_t const rabpos, 35 | uint64_t const raepos 36 | ) : id(rid), abpos(rabpos), aepos(raepos) {} 37 | 38 | Repeat(std::istream & in) 39 | : 40 | id(libmaus2::util::NumberSerialisation::deserialiseNumber(in)), 41 | abpos(libmaus2::util::NumberSerialisation::deserialiseNumber(in)), 42 | aepos(libmaus2::util::NumberSerialisation::deserialiseNumber(in)) 43 | {} 44 | 45 | bool operator<(Repeat const & R) const 46 | { 47 | if ( id != R.id ) 48 | return id < R.id; 49 | else if ( abpos != R.abpos ) 50 | return abpos < R.abpos; 51 | else 52 | return aepos < R.aepos; 53 | } 54 | 55 | std::ostream & serialise(std::ostream & out) const 56 | { 57 | libmaus2::util::NumberSerialisation::serialiseNumber(out,id); 58 | libmaus2::util::NumberSerialisation::serialiseNumber(out,abpos); 59 | libmaus2::util::NumberSerialisation::serialiseNumber(out,aepos); 60 | return out; 61 | } 62 | 63 | std::istream & deserialise(std::istream & in) 64 | { 65 | id = libmaus2::util::NumberSerialisation::deserialiseNumber(in); 66 | abpos = libmaus2::util::NumberSerialisation::deserialiseNumber(in); 67 | aepos = libmaus2::util::NumberSerialisation::deserialiseNumber(in); 68 | return in; 69 | } 70 | 71 | static libmaus2::autoarray::AutoArray loadArray(std::istream & in) 72 | { 73 | uint64_t const recsize = 3*sizeof(uint64_t); 74 | in.clear(); 75 | in.seekg(0,std::ios::end); 76 | uint64_t const fs = in.tellg(); 77 | in.clear(); 78 | in.seekg(0,std::ios::beg); 79 | assert ( fs % recsize == 0 ); 80 | uint64_t const n = fs / recsize; 81 | libmaus2::autoarray::AutoArray A(n,false); 82 | for ( uint64_t i = 0; i < n; ++i ) 83 | A[i] = Repeat(in); 84 | return A; 85 | } 86 | 87 | static libmaus2::autoarray::AutoArray loadArray(std::string const & fn) 88 | { 89 | libmaus2::aio::InputStreamInstance ISI(fn); 90 | return loadArray(ISI); 91 | } 92 | }; 93 | #endif 94 | -------------------------------------------------------------------------------- /src/bamidrename.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | daccord 3 | Copyright (C) 2016-2017 German Tischler 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | */ 18 | 19 | #include 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | int bamidrename(libmaus2::util::ArgParser const & /* arg */, libmaus2::util::ArgInfo const & arginfo) 36 | { 37 | libmaus2::bambam::BamDecoder dec(std::cin); 38 | libmaus2::bambam::BamAlignment & algn = dec.getAlignment(); 39 | libmaus2::bambam::BamBlockWriterBase::unique_ptr_type Pwriter(libmaus2::bambam::BamBlockWriterBaseFactory::construct(dec.getHeader(),arginfo, 0)); 40 | uint64_t id = 0; 41 | 42 | while ( dec.readAlignment() ) 43 | { 44 | std::ostringstream ostr; 45 | 46 | ostr << "L0/" << id++ << "/0_" << algn.getLseq(); 47 | 48 | std::string const newname = ostr.str(); 49 | 50 | algn.replaceName(newname.begin(),newname.size()); 51 | 52 | Pwriter->writeAlignment(algn); 53 | } 54 | 55 | return EXIT_SUCCESS; 56 | } 57 | 58 | int main(int argc, char * argv[]) 59 | { 60 | try 61 | { 62 | libmaus2::util::ArgParser arg(argc,argv); 63 | 64 | if ( arg.uniqueArgPresent("v") || arg.uniqueArgPresent("version") ) 65 | { 66 | std::cerr << "This is " << PACKAGE_NAME << " version " << PACKAGE_VERSION << "." << std::endl; 67 | std::cerr << PACKAGE_NAME << " is distributed under version 3 of the GPL." << std::endl; 68 | return EXIT_SUCCESS; 69 | } 70 | else if ( arg.uniqueArgPresent("h") || arg.uniqueArgPresent("help") ) 71 | { 72 | std::cerr << "This is " << PACKAGE_NAME << " version " << PACKAGE_VERSION << "." << std::endl; 73 | std::cerr << PACKAGE_NAME << " is distributed under version 3 of the GPL." << std::endl; 74 | std::cerr << "\n"; 75 | std::cerr << "usage: " << arg.progname << " [options] out.bam\n"; 76 | return EXIT_SUCCESS; 77 | } 78 | else 79 | { 80 | libmaus2::util::ArgInfo const arginfo(argc,argv); 81 | return bamidrename(arg,arginfo); 82 | } 83 | } 84 | catch(std::exception const & ex) 85 | { 86 | std::cerr << ex.what() << std::endl; 87 | return EXIT_FAILURE; 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/DebruijnGraphInterface.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | daccord 3 | Copyright (C) 2017 German Tischler 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | */ 18 | #if ! defined(DEBRUIJNGRAPHINTERFACE_HPP) 19 | #define DEBRUIJNGRAPHINTERFACE_HPP 20 | 21 | struct DebruijnGraphInterface 22 | { 23 | typedef DebruijnGraphInterface this_type; 24 | typedef libmaus2::util::unique_ptr::type unique_ptr_type; 25 | typedef libmaus2::util::shared_ptr::type shared_ptr_type; 26 | 27 | virtual ~DebruijnGraphInterface() {} 28 | 29 | virtual void setup(std::pair< uint8_t const *, uint64_t> const * I, uint64_t const o) = 0; 30 | virtual void filterFreq(uint64_t const f, uint64_t const no) = 0; 31 | // static double getDefaultComputeFeasibleKmerPositionsThres() { return 1e-3; } 32 | virtual void computeFeasibleKmerPositions(OffsetLikely const & offsetLikely, double const thres) = 0; 33 | virtual void getLevelSuccessors(unsigned int const s) = 0; 34 | virtual void setupNodes() = 0; 35 | virtual void setupAddHeap(uint64_t const o) = 0; 36 | virtual bool addNextFromHeap(std::ostream * logstr) = 0; 37 | virtual bool traverse(int64_t const lmin, int64_t const lmax, std::pair< uint8_t const *, uint64_t> const * MA, uint64_t const MAo, 38 | uint64_t const maxfrontpath, uint64_t const maxfullpath) = 0; 39 | virtual std::pair checkCandidates(std::pair< uint8_t const *, uint64_t> const * I, uint64_t const o) = 0; 40 | virtual std::pair checkCandidatesU(std::pair< uint8_t const *, uint64_t> const * I, uint64_t const o) = 0; 41 | virtual std::pair getCandidate(uint64_t const i) const = 0; 42 | virtual uint64_t getNumCandidates() const = 0; 43 | virtual double getCandidateError( 44 | std::pair< uint8_t const *, uint64_t> const * I, 45 | uint64_t const o, 46 | std::pair Ucand 47 | ) = 0; 48 | virtual double getCandidateError( 49 | std::pair< uint8_t const *, uint64_t> const * I, uint64_t const o, 50 | uint64_t const id 51 | ) = 0; 52 | virtual double getCandidateWeight(uint64_t const i) const = 0; 53 | virtual Node const * getNodeVirtual(uint64_t const v) const = 0; 54 | virtual bool isEdgeActiveVirtual(uint64_t const from, uint64_t const to) const = 0; 55 | virtual void getActiveSuccessorsVirtual(uint64_t const v, Links & L) const = 0; 56 | virtual void getSuccessorsVirtual(uint64_t const v, Links & L) const = 0; 57 | virtual uint64_t getKmerSize() const = 0; 58 | virtual std::string printNode(Node const & S) const = 0; 59 | virtual std::ostream & print(std::ostream & out) const = 0; 60 | 61 | virtual uint64_t getCandidateErrorU(std::pair< uint8_t const *, uint64_t> const * I, uint64_t const o, std::pair Ucand) = 0; 62 | virtual uint64_t getCandidateErrorU(std::pair< uint8_t const *, uint64_t> const * I, uint64_t const o, uint64_t const id) = 0; 63 | virtual uint64_t getActiveEdgeWeightVirtual(uint64_t const from, uint64_t const to) const = 0; 64 | virtual void printStretches(uint64_t const first, std::ostream & ostr) const = 0; 65 | virtual void toDot(std::ostream & out) const = 0; 66 | }; 67 | #endif 68 | -------------------------------------------------------------------------------- /src/DotProduct.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | daccord 3 | Copyright (C) 2016-2017 German Tischler 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | */ 18 | #if ! defined(DOTPRODUCT_HPP) 19 | #define DOTPRODUCT_HPP 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | /** 27 | * dot product 28 | **/ 29 | struct DotProduct 30 | { 31 | static unsigned int getShift() 32 | { 33 | return 32; 34 | } 35 | static uint64_t getMult() 36 | { 37 | return (1ull< V; 44 | // vector of shift coefficients 45 | std::vector VS; 46 | 47 | DotProduct() {} 48 | DotProduct(uint64_t const rfirstsign, std::vector const & rV) 49 | : firstsign(rfirstsign), V(rV) 50 | { 51 | 52 | } 53 | 54 | void computeShifted() 55 | { 56 | VS.resize(V.size()); 57 | double const mult = getMult(); 58 | for ( uint64_t i = 0; i < V.size(); ++i ) 59 | VS[i] = mult * V[i]; 60 | } 61 | 62 | std::ostream & printData(std::ostream & out) const 63 | { 64 | for ( uint64_t i = 0; i < V.size(); ++i ) 65 | if ( V[i] ) 66 | out << i+firstsign << "\t" << V[i] << "\n"; 67 | return out; 68 | } 69 | 70 | uint64_t size() const 71 | { 72 | return firstsign + V.size(); 73 | } 74 | 75 | double operator[](uint64_t const i) const 76 | { 77 | if ( i < firstsign ) 78 | return 0.0; 79 | 80 | uint64_t j = i - firstsign; 81 | 82 | if ( j < V.size() ) 83 | return V[j]; 84 | else 85 | return 0.0; 86 | } 87 | 88 | void normaliseValue(uint64_t const i, double const div) 89 | { 90 | if ( i >= firstsign ) 91 | { 92 | uint64_t const j = i-firstsign; 93 | 94 | if ( j < V.size() ) 95 | V[j] /= div; 96 | } 97 | } 98 | 99 | // compute product 100 | template 101 | double dotproduct(iterator O, uint64_t const Os) const 102 | { 103 | double s = 0; 104 | 105 | // iterate over length of V 106 | for ( uint64_t i = 0; i < V.size(); ++i ) 107 | { 108 | // compute corresponding index j on O 109 | uint64_t const j = firstsign + i; 110 | 111 | // if j is in range for O 112 | if ( j < Os ) 113 | s += V[i] * O[j]; 114 | // j is too large, stop 115 | else 116 | break; 117 | } 118 | 119 | return s; 120 | } 121 | 122 | double dotproduct(std::vector < double > const & O) const 123 | { 124 | return dotproduct(O.begin(),O.size()); 125 | } 126 | 127 | // normalise the vector (make dot product between the vector and itself 1) 128 | void normalise() 129 | { 130 | double s = 0.0; 131 | for ( uint64_t i = 0; i < V.size(); ++i ) 132 | s += V[i]*V[i]; 133 | double const c = std::sqrt(1.0/s); 134 | for ( uint64_t i = 0; i < V.size(); ++i ) 135 | V[i] *= c; 136 | } 137 | }; 138 | 139 | /** 140 | * output operator for dot product 141 | **/ 142 | inline std::ostream & operator<<(std::ostream & out, DotProduct const & DP) 143 | { 144 | out << "DotProduct(firstsign=" << DP.firstsign << ","; 145 | 146 | for ( uint64_t j = 0; j < DP.V.size(); ++j ) 147 | out << DP.V[j] << ";"; 148 | out << ")"; 149 | 150 | return out; 151 | } 152 | #endif 153 | -------------------------------------------------------------------------------- /src/DebruijnGraphContainer.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | daccord 3 | Copyright (C) 2017 German Tischler 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | */ 18 | #if ! defined(DEBRUIJNGRAPHCONTAINER_HPP) 19 | #define DEBRUIJNGRAPHCONTAINER_HPP 20 | 21 | #include 22 | 23 | struct DebruijnGraphContainer 24 | { 25 | typedef DebruijnGraphContainer this_type; 26 | typedef libmaus2::util::unique_ptr::type unique_ptr_type; 27 | 28 | libmaus2::autoarray::AutoArray < DebruijnGraphInterface::unique_ptr_type > ADG; 29 | 30 | DebruijnGraphContainer( 31 | double const est_cor, 32 | uint64_t const kmersizelow, 33 | uint64_t const kmersizehigh, 34 | std::map < uint64_t, KmerLimit::shared_ptr_type > const & MKL 35 | ) : ADG( kmersizehigh-kmersizelow + 1) 36 | { 37 | for ( uint64_t k = kmersizelow; k <= kmersizehigh; ++k ) 38 | { 39 | uint64_t const j = k - kmersizelow; 40 | 41 | switch ( k ) 42 | { 43 | case 3: 44 | { 45 | DebruijnGraphInterface::unique_ptr_type tptr(new DebruijnGraph<3>(est_cor,*(MKL.find(3)->second))); 46 | ADG[j] = UNIQUE_PTR_MOVE(tptr); 47 | break; 48 | } 49 | case 4: 50 | { 51 | DebruijnGraphInterface::unique_ptr_type tptr(new DebruijnGraph<4>(est_cor,*(MKL.find(4)->second))); 52 | ADG[j] = UNIQUE_PTR_MOVE(tptr); 53 | break; 54 | } 55 | case 5: 56 | { 57 | DebruijnGraphInterface::unique_ptr_type tptr(new DebruijnGraph<5>(est_cor,*(MKL.find(5)->second))); 58 | ADG[j] = UNIQUE_PTR_MOVE(tptr); 59 | break; 60 | } 61 | case 6: 62 | { 63 | DebruijnGraphInterface::unique_ptr_type tptr(new DebruijnGraph<6>(est_cor,*(MKL.find(6)->second))); 64 | ADG[j] = UNIQUE_PTR_MOVE(tptr); 65 | break; 66 | } 67 | case 7: 68 | { 69 | DebruijnGraphInterface::unique_ptr_type tptr(new DebruijnGraph<7>(est_cor,*(MKL.find(7)->second))); 70 | ADG[j] = UNIQUE_PTR_MOVE(tptr); 71 | break; 72 | } 73 | case 8: 74 | { 75 | DebruijnGraphInterface::unique_ptr_type tptr(new DebruijnGraph<8>(est_cor,*(MKL.find(8)->second))); 76 | ADG[j] = UNIQUE_PTR_MOVE(tptr); 77 | break; 78 | } 79 | case 9: 80 | { 81 | DebruijnGraphInterface::unique_ptr_type tptr(new DebruijnGraph<9>(est_cor,*(MKL.find(9)->second))); 82 | ADG[j] = UNIQUE_PTR_MOVE(tptr); 83 | break; 84 | } 85 | case 10: 86 | { 87 | DebruijnGraphInterface::unique_ptr_type tptr(new DebruijnGraph<10>(est_cor,*(MKL.find(10)->second))); 88 | ADG[j] = UNIQUE_PTR_MOVE(tptr); 89 | break; 90 | } 91 | case 11: 92 | { 93 | DebruijnGraphInterface::unique_ptr_type tptr(new DebruijnGraph<11>(est_cor,*(MKL.find(11)->second))); 94 | ADG[j] = UNIQUE_PTR_MOVE(tptr); 95 | break; 96 | } 97 | case 12: 98 | { 99 | DebruijnGraphInterface::unique_ptr_type tptr(new DebruijnGraph<12>(est_cor,*(MKL.find(12)->second))); 100 | ADG[j] = UNIQUE_PTR_MOVE(tptr); 101 | break; 102 | } 103 | default: 104 | { 105 | libmaus2::exception::LibMausException lme; 106 | lme.getStream() << "k-mer size " << k << " is not compiled in" << std::endl; 107 | lme.finish(); 108 | throw lme; 109 | break; 110 | } 111 | } 112 | } 113 | } 114 | }; 115 | #endif 116 | -------------------------------------------------------------------------------- /src/fillfasta.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | consensus 3 | Copyright (C) 2016 German Tischler 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | */ 18 | #include 19 | #include 20 | #include 21 | 22 | uint64_t getId(libmaus2::fastx::FastAReader::pattern_type const & pat) 23 | { 24 | std::string sid = pat.getShortStringId(); 25 | assert ( sid.find('_') != std::string::npos ); 26 | sid = sid.substr(sid.find('_')+1); 27 | std::istringstream istr(sid); 28 | uint64_t id; 29 | istr >> id; 30 | assert ( istr && istr.peek() == std::istream::traits_type::eof() ); 31 | return id; 32 | } 33 | 34 | int fillfasta(libmaus2::util::ArgParser const & arg) 35 | { 36 | std::string const fullfn = arg[0]; 37 | std::string const partfn = arg[1]; 38 | 39 | libmaus2::fastx::FastAReader::pattern_type fullpat; 40 | libmaus2::fastx::FastAReader::pattern_type partpat; 41 | 42 | libmaus2::fastx::FastaPeeker fullpeek(fullfn); 43 | libmaus2::fastx::FastaPeeker partpeek(partfn); 44 | 45 | while ( fullpeek.peekNext(fullpat) && partpeek.peekNext(partpat) ) 46 | { 47 | uint64_t const fullid = getId(fullpat); 48 | uint64_t const partid = getId(partpat); 49 | 50 | if ( fullid < partid ) 51 | { 52 | fullpeek.getNext(fullpat); 53 | 54 | for ( uint64_t i = 0; i < fullpat.spattern.size(); ++i ) 55 | fullpat.spattern[i] = ::tolower(fullpat.spattern[i]); 56 | fullpat.pattern = fullpat.spattern.c_str(); 57 | 58 | fullpat.printMultiLine(std::cout,80); 59 | } 60 | else if ( partid < fullid ) 61 | { 62 | partpeek.getNext(partpat); 63 | assert ( false ); 64 | } 65 | else 66 | { 67 | assert ( partid == fullid ); 68 | fullpeek.getNext(fullpat); 69 | partpeek.getNext(partpat); 70 | partpat.printMultiLine(std::cout,80); 71 | } 72 | } 73 | 74 | assert ( ! partpeek.peekNext(partpat) ); 75 | 76 | while ( fullpeek.getNext(fullpat) ) 77 | { 78 | for ( uint64_t i = 0; i < fullpat.spattern.size(); ++i ) 79 | fullpat.spattern[i] = ::tolower(fullpat.spattern[i]); 80 | fullpat.pattern = fullpat.spattern.c_str(); 81 | 82 | fullpat.printMultiLine(std::cout,80); 83 | } 84 | 85 | return EXIT_SUCCESS; 86 | } 87 | 88 | int main(int argc, char * argv[]) 89 | { 90 | try 91 | { 92 | libmaus2::util::ArgParser const arg(argc,argv); 93 | 94 | if ( arg.uniqueArgPresent("v") || arg.uniqueArgPresent("version") ) 95 | { 96 | std::cerr << "This is " << PACKAGE_NAME << " version " << PACKAGE_VERSION << "." << std::endl; 97 | std::cerr << PACKAGE_NAME << " is distributed under version 3 of the GPL." << std::endl; 98 | return EXIT_SUCCESS; 99 | } 100 | else if ( arg.uniqueArgPresent("h") || arg.uniqueArgPresent("help") || arg.size() < 2 ) 101 | { 102 | std::cerr << "This is " << PACKAGE_NAME << " version " << PACKAGE_VERSION << "." << std::endl; 103 | std::cerr << PACKAGE_NAME << " is distributed under version 3 of the GPL." << std::endl; 104 | std::cerr << "\n"; 105 | std::cerr << "usage: " << arg.progname << " uncorrected.fasta corrected_mapped.fasta\n"; 106 | return EXIT_SUCCESS; 107 | } 108 | else 109 | { 110 | return fillfasta(arg); 111 | } 112 | } 113 | catch(std::exception const & ex) 114 | { 115 | std::cerr << ex.what() << std::endl; 116 | return EXIT_FAILURE; 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /src/ComputeOffsetLikely.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | daccord 3 | Copyright (C) 2017 German Tischler 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | */ 18 | #if ! defined(COMPUTEOFFSETLIKELY_HPP) 19 | #define COMPUTEOFFSETLIKELY_HPP 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | OffsetLikely computeOffsetLikely( 27 | // maximum length considered 28 | uint64_t const maxl, 29 | // I prob 30 | double const p_i, 31 | // D prob 32 | double const p_d 33 | ) 34 | { 35 | // std::vector < DotProduct > VD; 36 | OffsetLikely VD; 37 | 38 | // compute probability bins for number of insertions at/before a single position 39 | // double const p_i = 0.1; 40 | double const q_i = 1.0 - p_i; 41 | double f_i = q_i; 42 | 43 | std::vector < double > P_I; 44 | while ( f_i >= 1e-7 ) 45 | { 46 | P_I.push_back(f_i); 47 | f_i *= p_i; 48 | } 49 | 50 | // convolution accumulation vector for insertions 51 | std::vector < double > C_I(1,1.0); 52 | 53 | // double const p_d = 0.02; 54 | 55 | // iterate over length 56 | for ( uint64_t l = 0; l <= maxl; ++l ) 57 | { 58 | // std::cerr << std::string(40,'*') << l << std::string(40,'*') << std::endl; 59 | // 60 | // add another round of insertions 61 | #if 1 62 | C_I = libmaus2::math::Convolution::convolutionFFTRef(C_I,P_I); 63 | #else 64 | C_I = libmaus2::math::Convolution::convolutionFFT(C_I,P_I); 65 | #endif 66 | 67 | // likelihood of deletion count 68 | std::vector < libmaus2::math::GmpFloat > V_D = libmaus2::math::Binom::binomVector(p_d, l, 512); 69 | assert ( V_D.size() ); 70 | 71 | // insertion vector with V_D.size()-1 leading zeroes 72 | std::vector < double > V_I (V_D.size()-1+C_I.size()); 73 | std::copy( 74 | C_I.begin(),C_I.end(), 75 | V_I.begin() + (V_D.size()-1) 76 | ); 77 | 78 | // reverse deletion vector 79 | std::reverse(V_D.begin(),V_D.end()); 80 | 81 | #if 0 82 | for ( uint64_t j = 0; j < V_D.size(); ++j ) 83 | if ( static_cast(V_D[j]) >= 1e-5 ) 84 | std::cerr << "D j=" << j << " V_D=" << V_D[j] << std::endl; 85 | for ( uint64_t j = 0; j < V_I.size(); ++j ) 86 | if ( V_I[j] >= 1e-5 ) 87 | std::cerr << "I j=" << j << " V_I=" << V_I[j] << std::endl; 88 | #endif 89 | 90 | // compute convolution 91 | std::vector < double > F_I = libmaus2::math::Convolution::convolutionFFTRef(V_D,V_I); 92 | // first significant value found 93 | bool signfound = false; 94 | // index of first significant value 95 | int64_t firstsign = std::numeric_limits::min(); 96 | // value vector 97 | std::vector < double > VP; 98 | 99 | for ( uint64_t j = 0; j < F_I.size(); ++j ) 100 | if ( F_I[j] >= 1e-5 ) 101 | { 102 | if ( ! signfound ) 103 | { 104 | signfound = true; 105 | firstsign = static_cast(j)-static_cast(l); 106 | } 107 | 108 | uint64_t const offset = 109 | static_cast(j)-static_cast(l) - firstsign; 110 | 111 | while ( ! (offset < VP.size()) ) 112 | VP.push_back(0); 113 | VP[offset] = F_I[j]; 114 | 115 | // std::cerr << "F j=" << static_cast(j)-static_cast(2*l) << " abs=" << j-l << " off=" << offset << " F_I=" << F_I[j] << std::endl; 116 | } 117 | assert ( firstsign >= 0 ); 118 | 119 | VD.push_back(DotProduct(firstsign,VP)); 120 | // std::cerr << "first sign " << firstsign << std::endl; 121 | 122 | // add another round of insertions 123 | //C_I = libmaus2::math::Convolution::convolutionFFT(C_I,P_I); 124 | } 125 | 126 | #if 0 127 | for ( uint64_t i = 0; i < VD.size(); ++i ) 128 | VD[i].normalise(); 129 | #endif 130 | 131 | VD.setup(); 132 | 133 | return VD; 134 | } 135 | #endif 136 | -------------------------------------------------------------------------------- /src/sortfasta.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | consensus 3 | Copyright (C) 2016 German Tischler 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | */ 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | struct FastAEntry 26 | { 27 | std::string id; 28 | std::string data; 29 | 30 | FastAEntry() {} 31 | FastAEntry(std::string const & rid, std::string const & rdata) : id(rid), data(rdata) {} 32 | FastAEntry(std::istream & in) 33 | : 34 | id(libmaus2::util::StringSerialisation::deserialiseString(in)), 35 | data(libmaus2::util::StringSerialisation::deserialiseString(in)) 36 | {} 37 | 38 | std::ostream & serialise(std::ostream & out) const 39 | { 40 | libmaus2::util::StringSerialisation::serialiseString(out,id); 41 | libmaus2::util::StringSerialisation::serialiseString(out,data); 42 | return out; 43 | } 44 | 45 | std::istream & deserialise(std::istream & in) 46 | { 47 | *this = FastAEntry(in); 48 | return in; 49 | } 50 | 51 | bool operator<(FastAEntry const & O) const 52 | { 53 | return libmaus2::bambam::StrCmpNum::strcmpnum(id.c_str(),O.id.c_str()) < 0; 54 | } 55 | }; 56 | 57 | int sortfasta(libmaus2::util::ArgParser const & arg) 58 | { 59 | libmaus2::fastx::FastAReader::pattern_type fullpat; 60 | libmaus2::fastx::StreamFastAReaderWrapper fullread(std::cin); 61 | std::string const tmpfilebase = arg.uniqueArgPresent("T") ? arg["T"] : libmaus2::util::ArgInfo::getDefaultTmpFileName(arg.progname); 62 | std::string const sortfile = tmpfilebase + ".sort"; 63 | libmaus2::util::TempFileRemovalContainer::addTempFile(sortfile); 64 | libmaus2::sorting::SerialisingSortingBufferedOutputFile SBO(sortfile); 65 | while ( fullread.getNextPatternUnlocked(fullpat) ) 66 | SBO.put(FastAEntry(fullpat.sid,fullpat.spattern)); 67 | libmaus2::sorting::SerialisingSortingBufferedOutputFile::merger_ptr_type Pmerger(SBO.getMerger()); 68 | FastAEntry O; 69 | while ( Pmerger->getNext(O) ) 70 | { 71 | std::cout << '>' << O.id << "\n"; 72 | std::cout << O.data << "\n"; 73 | } 74 | 75 | return EXIT_SUCCESS; 76 | } 77 | 78 | template 79 | static std::string formatRHS(std::string const & description, default_type def) 80 | { 81 | std::ostringstream ostr; 82 | ostr << description << " (default " << def << ")"; 83 | return ostr.str(); 84 | } 85 | 86 | static std::string helpMessage(libmaus2::util::ArgParser const & arg) 87 | { 88 | std::vector < std::pair < std::string, std::string > > optionMap; 89 | optionMap . push_back ( std::pair < std::string, std::string >("T", formatRHS("temporary file prefix",libmaus2::util::ArgInfo::getDefaultTmpFileName(arg.progname)))); 90 | 91 | uint64_t maxlhs = 0; 92 | for ( std::vector < std::pair < std::string, std::string > >::const_iterator ita = optionMap.begin(); ita != optionMap.end(); ++ita ) 93 | { 94 | assert ( ita->first.size() ); 95 | 96 | if ( ita->first.size() == 1 ) 97 | maxlhs = std::max(maxlhs,static_cast(ita->first.size()+1)); 98 | else 99 | maxlhs = std::max(maxlhs,static_cast(ita->first.size()+2)); 100 | } 101 | 102 | std::ostringstream messtr; 103 | for ( std::vector < std::pair < std::string, std::string > >::const_iterator ita = optionMap.begin(); ita != optionMap.end(); ++ita ) 104 | { 105 | std::string const key = ita->first; 106 | 107 | messtr << "\t"; 108 | messtr << std::setw(maxlhs) << std::setfill(' '); 109 | if ( key.size() == 1 ) 110 | messtr << (std::string("-")+key); 111 | else 112 | messtr << (std::string("--")+key); 113 | 114 | messtr << std::setw(0); 115 | 116 | messtr << ": "; 117 | 118 | messtr << ita->second; 119 | messtr << "\n"; 120 | } 121 | 122 | return messtr.str(); 123 | } 124 | 125 | 126 | int main(int argc, char * argv[]) 127 | { 128 | try 129 | { 130 | libmaus2::util::ArgParser const arg(argc,argv); 131 | 132 | if ( arg.uniqueArgPresent("v") || arg.uniqueArgPresent("version") ) 133 | { 134 | std::cerr << "This is " << PACKAGE_NAME << " version " << PACKAGE_VERSION << "." << std::endl; 135 | std::cerr << PACKAGE_NAME << " is distributed under version 3 of the GPL." << std::endl; 136 | return EXIT_SUCCESS; 137 | } 138 | else if ( arg.uniqueArgPresent("h") || arg.uniqueArgPresent("help") ) 139 | { 140 | std::cerr << "This is " << PACKAGE_NAME << " version " << PACKAGE_VERSION << "." << std::endl; 141 | std::cerr << PACKAGE_NAME << " is distributed under version 3 of the GPL." << std::endl; 142 | std::cerr << "\n"; 143 | std::cerr << "usage: " << arg.progname << " [options] out.fasta\n"; 144 | std::cerr << "\n"; 145 | std::cerr << "The following options can be used (no space between option name and parameter allowed):\n\n"; 146 | std::cerr << helpMessage(arg); 147 | return EXIT_SUCCESS; 148 | } 149 | else 150 | { 151 | return sortfasta(arg); 152 | } 153 | 154 | 155 | } 156 | catch(std::exception const & ex) 157 | { 158 | std::cerr << ex.what() << std::endl; 159 | return EXIT_FAILURE; 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /src/DecodedReadContainer.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | daccord 3 | Copyright (C) 2017 German Tischler 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | */ 18 | #if ! defined(DECODEDREADCONTAINER_HPP) 19 | #define DECODEDREADCONTAINER_HPP 20 | 21 | #include 22 | #include 23 | 24 | struct ReadData 25 | { 26 | typedef ReadData this_type; 27 | typedef libmaus2::util::unique_ptr::type unique_ptr_type; 28 | typedef libmaus2::util::shared_ptr::type shared_ptr_type; 29 | 30 | libmaus2::autoarray::AutoArray A; 31 | uint64_t l; 32 | 33 | ReadData() 34 | { 35 | } 36 | }; 37 | 38 | struct ReadDataTypeInfo 39 | { 40 | typedef ReadData element_type; 41 | typedef element_type::shared_ptr_type pointer_type; 42 | 43 | static pointer_type getNullPointer() 44 | { 45 | return pointer_type(); 46 | } 47 | 48 | static pointer_type deallocate(pointer_type /* p */) 49 | { 50 | return getNullPointer(); 51 | } 52 | }; 53 | 54 | struct ReadDataAllocator 55 | { 56 | typedef ReadData element_type; 57 | typedef element_type::shared_ptr_type pointer_type; 58 | 59 | pointer_type operator()() const 60 | { 61 | return pointer_type(new element_type); 62 | } 63 | }; 64 | 65 | struct ReadDecoder 66 | { 67 | typedef ReadDecoder this_type; 68 | typedef libmaus2::util::unique_ptr::type unique_ptr_type; 69 | typedef libmaus2::util::shared_ptr::type shared_ptr_type; 70 | 71 | libmaus2::dazzler::db::DatabaseFile const & DB; 72 | libmaus2::aio::InputStream::unique_ptr_type bpsfile; 73 | libmaus2::aio::InputStream::unique_ptr_type idxfile; 74 | 75 | ReadDecoder(libmaus2::dazzler::db::DatabaseFile const & rDB) 76 | : DB(rDB), bpsfile(DB.openBaseStream()), idxfile(DB.openIndexStream()) 77 | { 78 | 79 | } 80 | 81 | size_t decodeRead(uint64_t const id, libmaus2::autoarray::AutoArray & A) const 82 | { 83 | return DB.decodeReadAndRC(*idxfile,*bpsfile,id,A); 84 | } 85 | }; 86 | 87 | struct ReadDecoderTypeInfo 88 | { 89 | typedef ReadDecoder element_type; 90 | typedef element_type::shared_ptr_type pointer_type; 91 | 92 | static pointer_type getNullPointer() 93 | { 94 | return pointer_type(); 95 | } 96 | 97 | static pointer_type deallocate(pointer_type /* p */) 98 | { 99 | return getNullPointer(); 100 | } 101 | }; 102 | 103 | struct ReadDecoderAllocator 104 | { 105 | typedef ReadDecoder element_type; 106 | typedef element_type::shared_ptr_type pointer_type; 107 | 108 | libmaus2::dazzler::db::DatabaseFile const * DB; 109 | 110 | ReadDecoderAllocator(libmaus2::dazzler::db::DatabaseFile const * rDB) 111 | : DB(rDB) 112 | { 113 | 114 | } 115 | 116 | pointer_type operator()() const 117 | { 118 | return pointer_type(new element_type(*DB)); 119 | } 120 | }; 121 | 122 | struct DecodedReadContainer 123 | { 124 | std::map M; 125 | std::vector V; 126 | 127 | libmaus2::parallel::LockedGrowingFreeList & readDataFreeList; 128 | libmaus2::parallel::LockedGrowingFreeList & readDecoderFreeList; 129 | 130 | ReadDecoder::shared_ptr_type decoder; 131 | 132 | DecodedReadContainer( 133 | libmaus2::parallel::LockedGrowingFreeList & rreadDataFreeList, 134 | libmaus2::parallel::LockedGrowingFreeList & rreadDecoderFreeList 135 | ) : readDataFreeList(rreadDataFreeList), readDecoderFreeList(rreadDecoderFreeList), decoder(readDecoderFreeList.get()) 136 | { 137 | } 138 | 139 | ~DecodedReadContainer() 140 | { 141 | for ( uint64_t i = 0; i < V.size(); ++i ) 142 | if ( V[i] ) 143 | readDataFreeList.put(V[i]); 144 | readDecoderFreeList.put(decoder); 145 | } 146 | 147 | void erase(uint64_t const id) 148 | { 149 | std::map::iterator it = M.find(id); 150 | 151 | if ( it != M.end() ) 152 | { 153 | ReadData::shared_ptr_type R = V[it->second]; 154 | readDataFreeList.put(R); 155 | V[it->second] = ReadData::shared_ptr_type(); 156 | M.erase(it); 157 | } 158 | } 159 | 160 | uint64_t ensurePresent(uint64_t const id) 161 | { 162 | std::map::iterator it = M.find(id); 163 | 164 | if ( it == M.end() ) 165 | { 166 | ReadData::shared_ptr_type R = readDataFreeList.get(); 167 | R->l = decoder->decodeRead(id,R->A); 168 | 169 | uint64_t const vid = V.size(); 170 | V.push_back(R); 171 | M[id] = vid; 172 | } 173 | 174 | it = M.find(id); 175 | assert ( it != M.end() ); 176 | 177 | return it->second; 178 | } 179 | 180 | char const * getForwardRead(uint64_t const id) 181 | { 182 | uint64_t const vid = ensurePresent(id); 183 | ReadData::shared_ptr_type const & R = V [ vid ]; 184 | return R->A.begin(); 185 | } 186 | 187 | char const * getReverseComplementRead(uint64_t const id) 188 | { 189 | uint64_t const vid = ensurePresent(id); 190 | ReadData::shared_ptr_type const & R = V [ vid ]; 191 | return R->A.begin() + R->l; 192 | } 193 | 194 | uint64_t getReadLength(uint64_t const id) 195 | { 196 | uint64_t const vid = ensurePresent(id); 197 | ReadData::shared_ptr_type const & R = V [ vid ]; 198 | return R->l; 199 | } 200 | }; 201 | #endif 202 | -------------------------------------------------------------------------------- /src/OffsetLikely.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | daccord 3 | Copyright (C) 2016-2017 German Tischler 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | */ 18 | #if ! defined(OFFSETLIKELY_HPP) 19 | #define OFFSETLIKELY_HPP 20 | 21 | #include 22 | #include 23 | 24 | struct OffsetLikely 25 | { 26 | std::vector DP; 27 | std::vector < double > dsum; 28 | std::vector DPnorm; 29 | // read position [] is relevant for reference positions in given interval 30 | std::vector< std::pair > Vsupport; 31 | std::vector DPnormSquare; 32 | 33 | // get supporting ref position lower bound for read position i 34 | uint64_t getSupportLow(int64_t const i) const 35 | { 36 | return expect_true(i < static_cast(Vsupport.size())) ? Vsupport[i].first : DPnorm.size(); 37 | } 38 | 39 | uint64_t getSupportHigh(int64_t const i) const 40 | { 41 | return expect_true(i < static_cast(Vsupport.size())) ? Vsupport[i].second : DPnorm.size(); 42 | } 43 | 44 | uint64_t size() const 45 | { 46 | return DP.size(); 47 | } 48 | 49 | void push_back(DotProduct const & D) 50 | { 51 | DP.push_back(D); 52 | } 53 | 54 | DotProduct const & operator[](uint64_t const i) const 55 | { 56 | return DP[i]; 57 | } 58 | 59 | void setup() 60 | { 61 | dsum.resize(0); 62 | 63 | // maximum size of any dot product (max read position supported + 1) 64 | uint64_t maxsize = 0; 65 | for ( uint64_t i = 0; i < size(); ++i ) 66 | maxsize = std::max(maxsize,DP[i].size()); 67 | 68 | for ( uint64_t i = 0; i < maxsize; ++i ) 69 | { 70 | double sum = 0.0; 71 | 72 | for ( uint64_t j = 0; j < size(); ++j ) 73 | sum += (*this)[j][i]; 74 | 75 | dsum.push_back(sum); 76 | } 77 | 78 | DPnorm = DP; 79 | for ( uint64_t i = 0; i < DPnorm.size(); ++i ) 80 | for ( uint64_t j = 0; j < maxsize; ++j ) 81 | DPnorm[i].normaliseValue(j,dsum[j]); 82 | 83 | uint64_t j = 0, k = 0; 84 | for ( uint64_t i = 0; i < maxsize; ++i ) 85 | { 86 | // update lower end 87 | while ( j < DPnorm.size() && i >= DPnorm[j].firstsign + DPnorm[j].V.size() ) 88 | ++j; 89 | // update upper end 90 | while ( k < DPnorm.size() && DPnorm[k].firstsign <= i ) 91 | ++k; 92 | 93 | Vsupport.push_back(std::pair(j,k)); 94 | } 95 | 96 | DPnormSquare = DP; 97 | for ( uint64_t i = 0; i < DPnormSquare.size(); ++i ) 98 | DPnormSquare[i].normalise(); 99 | } 100 | 101 | void plotData(std::string const & prefix) const 102 | { 103 | std::vector Vfn; 104 | std::string const gplfn = prefix + ".gpl"; 105 | libmaus2::util::TempFileRemovalContainer::addTempFile(gplfn); 106 | libmaus2::aio::OutputStreamInstance::unique_ptr_type POSI( 107 | new libmaus2::aio::OutputStreamInstance(gplfn) 108 | ); 109 | 110 | *POSI << "set terminal pdf\n"; 111 | *POSI << "set xlabel \"position in read\"\n"; 112 | *POSI << "set ylabel \"probability\"\n"; 113 | *POSI << "plot [0 to 55] "; 114 | for ( uint64_t i = 0; i < DP.size(); ++i ) 115 | { 116 | std::ostringstream fnostr; 117 | fnostr << prefix << "_" << i << ".gpl"; 118 | std::string const fn = fnostr.str(); 119 | Vfn.push_back(fn); 120 | libmaus2::util::TempFileRemovalContainer::addTempFile(fn); 121 | libmaus2::aio::OutputStreamInstance OSI(fn); 122 | DP[i].printData(OSI); 123 | 124 | if ( i > 0 ) 125 | *POSI << ","; 126 | 127 | *POSI << "\"" << fn << "\" smooth bezier title \"\""; 128 | } 129 | *POSI << "\n"; 130 | POSI->flush(); 131 | POSI.reset(); 132 | 133 | std::ostringstream comstr; 134 | comstr << "gnuplot <" << gplfn << " >" << prefix << ".pdf"; 135 | std::string const com = comstr.str(); 136 | 137 | int const r = system(com.c_str()); 138 | if ( r != EXIT_SUCCESS ) 139 | { 140 | 141 | } 142 | } 143 | 144 | void plotDataSingle(std::string const & prefix, uint64_t const z) const 145 | { 146 | std::vector Vfn; 147 | std::string const gplfn = prefix + ".gpl"; 148 | libmaus2::util::TempFileRemovalContainer::addTempFile(gplfn); 149 | libmaus2::aio::OutputStreamInstance::unique_ptr_type POSI( 150 | new libmaus2::aio::OutputStreamInstance(gplfn) 151 | ); 152 | 153 | *POSI << "set terminal pdf\n"; 154 | *POSI << "set xlabel \"position in read\"\n"; 155 | *POSI << "set ylabel \"probability\"\n"; 156 | *POSI << "plot [0 to 30] "; 157 | bool first = true; 158 | for ( uint64_t i = z; i < DP.size() && i < z+1; ++i ) 159 | { 160 | std::ostringstream fnostr; 161 | fnostr << prefix << "_" << i << ".gpl"; 162 | std::string const fn = fnostr.str(); 163 | Vfn.push_back(fn); 164 | libmaus2::util::TempFileRemovalContainer::addTempFile(fn); 165 | libmaus2::aio::OutputStreamInstance OSI(fn); 166 | DP[i].printData(OSI); 167 | 168 | if ( ! first ) 169 | *POSI << ","; 170 | 171 | *POSI << "\"" << fn << "\" smooth bezier title \"position " << z << " in true sequence\""; 172 | first = false; 173 | } 174 | *POSI << "\n"; 175 | POSI->flush(); 176 | POSI.reset(); 177 | 178 | std::ostringstream comstr; 179 | comstr << "gnuplot <" << gplfn << " >" << prefix << ".pdf"; 180 | std::string const com = comstr.str(); 181 | 182 | int const r = system(com.c_str()); 183 | if ( r != EXIT_SUCCESS ) 184 | { 185 | 186 | } 187 | } 188 | }; 189 | #endif 190 | -------------------------------------------------------------------------------- /src/ChainSet.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | daccord 3 | Copyright (C) 2017 German Tischler 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | */ 18 | #if ! defined(CHAINSET_HPP) 19 | #define CHAINSET_HPP 20 | 21 | #include 22 | #include 23 | 24 | struct ChainSet 25 | { 26 | uint64_t const f; 27 | std::vector chainprev; 28 | std::vector childcount; 29 | std::vector childoffsets; 30 | std::vector childlinks; 31 | std::vector childdiagdif; 32 | std::vector chains; 33 | std::vector chainlengths; 34 | uint64_t numchains; 35 | 36 | ChainSet(libmaus2::dazzler::align::Overlap const * RO, uint64_t const rf) 37 | : f(rf), chainprev(f,-1), childcount(f+1,0), childoffsets(f+1,0), childlinks(), childdiagdif(f,0), chains(), chainlengths(), numchains(0) 38 | { 39 | uint64_t b_low = 0; 40 | 41 | while ( b_low < f ) 42 | { 43 | uint64_t b_high = b_low+1; 44 | 45 | while ( b_high < f && RO[b_high].bread == RO[b_low].bread && RO[b_high].isInverse() == RO[b_low].isInverse() ) 46 | ++b_high; 47 | 48 | // std::cerr << "bread=" << RO[b_low].bread << " cnt=" << b_high-b_low << std::endl; 49 | 50 | // std::sort(RO+b_low,RO+b_high,OverlapPosComparator()); 51 | 52 | for ( uint64_t i = b_low; i < b_high; ++i ) 53 | { 54 | int64_t mindif = std::numeric_limits::max(); 55 | 56 | for ( uint64_t j = b_low; j < i; ++j ) 57 | { 58 | if ( RO[j].path.aepos <= RO[i].path.abpos ) 59 | { 60 | #if 0 61 | int64_t const diagend = RO[j].path.aepos - RO[j].path.bepos; 62 | int64_t const diagstart = RO[i].path.abpos - RO[i].path.bbpos; 63 | #endif 64 | int64_t const difa = RO[i].path.abpos - RO[j].path.aepos; 65 | int64_t const difb = RO[i].path.bbpos - RO[j].path.bepos; 66 | assert ( difa >= 0 ); 67 | 68 | if ( difb >= 0 ) 69 | { 70 | int64_t const diagdif = difa+difb; // std::abs(diagstart - diagend); 71 | 72 | if ( diagdif < mindif ) 73 | { 74 | // std::cerr << "check " << RO[j] << " < " << RO[i] << " diagdif=" << diagdif << std::endl; 75 | mindif = diagdif; 76 | chainprev[i] = j; 77 | childdiagdif[i] = diagdif; 78 | } 79 | } 80 | } 81 | } 82 | } 83 | 84 | b_low = b_high; 85 | } 86 | 87 | for ( uint64_t i = 0; i < f; ++i ) 88 | if ( chainprev[i] >= 0 ) 89 | assert ( RO[i].bread == RO[chainprev[i]].bread ); 90 | 91 | for ( uint64_t i = 0; i < f; ++i ) 92 | if ( chainprev[i] >= 0 ) 93 | childcount[chainprev[i]]++; 94 | 95 | childoffsets = childcount; 96 | libmaus2::util::PrefixSums::prefixSums(childoffsets.begin(),childoffsets.end()); 97 | 98 | childlinks.resize(childoffsets.back(),-1); 99 | 100 | for ( uint64_t i = 0; i < f; ++i ) 101 | if ( chainprev[i] >= 0 ) 102 | { 103 | uint64_t const parent = chainprev[i]; 104 | uint64_t const offset = childoffsets[ parent ]++; 105 | assert ( childlinks[offset] == -1 ); 106 | childlinks [ offset ] = i; 107 | } 108 | 109 | // restore childoffsets 110 | childoffsets = childcount; 111 | libmaus2::util::PrefixSums::prefixSums(childoffsets.begin(),childoffsets.end()); 112 | 113 | for ( uint64_t i = 0; i < f; ++i ) 114 | for ( uint64_t j = 0; j < childcount[i]; ++j ) 115 | assert ( RO[i].bread == RO[childlinks[childoffsets[i]+j]].bread ); 116 | 117 | // sort childlinks by increasing order of diagdif 118 | for ( uint64_t i = 0; i < f; ++i ) 119 | if ( childcount[i] ) 120 | { 121 | std::vector < std::pair > V(childcount[i]); 122 | 123 | for ( uint64_t j = 0; j < childcount[i]; ++j ) 124 | { 125 | int64_t const to = childlinks[childoffsets[i]+j]; 126 | int64_t const diagdif = childdiagdif[to]; 127 | 128 | V.at(j) = std::pair(diagdif,to); 129 | } 130 | 131 | std::sort(V.begin(),V.end()); 132 | 133 | for ( uint64_t j = 0; j < childcount[i]; ++j ) 134 | childlinks[childoffsets[i]+j] = V[j].second; 135 | } 136 | 137 | #if 0 138 | for ( uint64_t i = 0; i < f; ++i ) 139 | for ( uint64_t j = 0; j < childcount[i]; ++j ) 140 | { 141 | int64_t const from = i; 142 | int64_t const to = childlinks[childoffsets[i]+j]; 143 | std::cerr << from << " " << to << " " << childdiagdif[to] << " " 144 | << RO[from].getHeader() << " -> " << RO[to].getHeader() << std::endl; 145 | } 146 | #endif 147 | 148 | std::set sunused; 149 | for ( uint64_t i = 0; i < f; ++i ) 150 | sunused.insert(i); 151 | 152 | while ( sunused.size() ) 153 | { 154 | int64_t cur = *(sunused.begin()); 155 | uint64_t len = 0; 156 | 157 | while ( cur >= 0 ) 158 | { 159 | len += 1; 160 | chains.push_back(cur); 161 | sunused.erase(cur); 162 | 163 | if ( childcount[cur] ) 164 | { 165 | int64_t next = childlinks[childoffsets[cur]]; 166 | childoffsets[cur]++; 167 | childcount[cur]--; 168 | cur = next; 169 | } 170 | else 171 | { 172 | cur = -1; 173 | } 174 | } 175 | 176 | chainlengths.push_back(len); 177 | } 178 | numchains = chainlengths.size(); 179 | chainlengths.push_back(0); 180 | 181 | libmaus2::util::PrefixSums::prefixSums(chainlengths.begin(),chainlengths.end()); 182 | } 183 | 184 | uint64_t size() const 185 | { 186 | return numchains; 187 | } 188 | 189 | uint64_t size(uint64_t const i) const 190 | { 191 | assert ( i < size() ); 192 | return chainlengths[i+1] - chainlengths[i]; 193 | } 194 | 195 | uint64_t operator()(uint64_t const i, uint64_t const j) const 196 | { 197 | return chains [ chainlengths[i] + j ]; 198 | } 199 | }; 200 | #endif 201 | -------------------------------------------------------------------------------- /src/spikenoise.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | daccord 3 | Copyright (C) 2016-2017 German Tischler 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | */ 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | static double getDefaultSubstRate() 26 | { 27 | return 1; 28 | } 29 | 30 | static double getDefaultInsRate() 31 | { 32 | return 0.00; 33 | } 34 | 35 | static double getDefaultDelRate() 36 | { 37 | return 0.00; 38 | } 39 | 40 | static double getDefaultErrorRate() 41 | { 42 | return 0.01; 43 | } 44 | 45 | static double getDefaultErrorRateStdDev() 46 | { 47 | return 0; 48 | } 49 | 50 | int spikenoise(libmaus2::util::ArgParser const & arg) 51 | { 52 | struct timeval tv; 53 | gettimeofday(&tv,NULL); 54 | libmaus2::random::Random::setup(static_cast((static_cast(tv.tv_sec) ^ static_cast(tv.tv_usec)))); 55 | 56 | double const substrate = arg.uniqueArgPresent("s") ? arg.getParsedArg("s") : getDefaultSubstRate(); 57 | double const insrate = arg.uniqueArgPresent("i") ? arg.getParsedArg("i") : getDefaultInsRate(); 58 | double const delrate = arg.uniqueArgPresent("d") ? arg.getParsedArg("d") : getDefaultDelRate(); 59 | double const erate = arg.uniqueArgPresent("e") ? arg.getParsedArg("e") : getDefaultErrorRate(); 60 | double const eratestddev = arg.uniqueArgPresent("stddev") ? arg.getParsedArg("stddev") : getDefaultErrorRateStdDev(); 61 | bool const omitoriginal = arg.uniqueArgPresent("omitoriginal"); 62 | 63 | libmaus2::fastx::StreamFastAReaderWrapper SFAR(std::cin); 64 | libmaus2::fastx::StreamFastAReaderWrapper::pattern_type pattern; 65 | 66 | // read sequence 67 | while ( SFAR.getNextPatternUnlocked(pattern) ) 68 | { 69 | if ( ! omitoriginal ) 70 | { 71 | // output unmodified sequence 72 | std::cout << pattern; 73 | } 74 | 75 | // compute modified sequence + operations applied 76 | std::pair const P = libmaus2::random::DNABaseNoiseSpiker::modifyAndComment(pattern.spattern,substrate,insrate,delrate,0.0 /* homopol */,erate,eratestddev); 77 | 78 | // set data in pattern object 79 | pattern.sid = P.second; 80 | pattern.spattern = P.first; 81 | pattern.pattern = pattern.spattern.c_str(); 82 | 83 | // output modified sequence 84 | std::cout << pattern; 85 | } 86 | 87 | return EXIT_SUCCESS; 88 | } 89 | 90 | template 91 | static std::string formatRHS(std::string const & description, default_type def) 92 | { 93 | std::ostringstream ostr; 94 | ostr << description << " (default " << def << ")"; 95 | return ostr.str(); 96 | } 97 | 98 | static std::string helpMessage(libmaus2::util::ArgParser const & /* arg */) 99 | { 100 | std::vector < std::pair < std::string, std::string > > optionMap; 101 | optionMap . push_back ( std::pair < std::string, std::string >("s", formatRHS("substitution rate",getDefaultSubstRate()))); 102 | optionMap . push_back ( std::pair < std::string, std::string >("i", formatRHS("insertion rate",getDefaultInsRate()))); 103 | optionMap . push_back ( std::pair < std::string, std::string >("d", formatRHS("deletion rate",getDefaultDelRate()))); 104 | optionMap . push_back ( std::pair < std::string, std::string >("e", formatRHS("error rate",getDefaultErrorRate()))); 105 | optionMap . push_back ( std::pair < std::string, std::string >("stddev", formatRHS("error rate standard deviation",getDefaultErrorRateStdDev()))); 106 | 107 | uint64_t maxlhs = 0; 108 | for ( std::vector < std::pair < std::string, std::string > >::const_iterator ita = optionMap.begin(); ita != optionMap.end(); ++ita ) 109 | { 110 | assert ( ita->first.size() ); 111 | 112 | if ( ita->first.size() == 1 ) 113 | maxlhs = std::max(maxlhs,static_cast(ita->first.size()+1)); 114 | else 115 | maxlhs = std::max(maxlhs,static_cast(ita->first.size()+2)); 116 | } 117 | 118 | std::ostringstream messtr; 119 | for ( std::vector < std::pair < std::string, std::string > >::const_iterator ita = optionMap.begin(); ita != optionMap.end(); ++ita ) 120 | { 121 | std::string const key = ita->first; 122 | 123 | messtr << "\t"; 124 | messtr << std::setw(maxlhs) << std::setfill(' '); 125 | if ( key.size() == 1 ) 126 | messtr << (std::string("-")+key); 127 | else 128 | messtr << (std::string("--")+key); 129 | 130 | messtr << std::setw(0); 131 | 132 | messtr << ": "; 133 | 134 | messtr << ita->second; 135 | messtr << "\n"; 136 | } 137 | 138 | return messtr.str(); 139 | } 140 | 141 | 142 | int main(int argc, char * argv[]) 143 | { 144 | try 145 | { 146 | libmaus2::util::ArgParser const arg(argc,argv); 147 | 148 | if ( arg.uniqueArgPresent("v") || arg.uniqueArgPresent("version") ) 149 | { 150 | std::cerr << "This is " << PACKAGE_NAME << " version " << PACKAGE_VERSION << "." << std::endl; 151 | std::cerr << PACKAGE_NAME << " is distributed under version 3 of the GPL." << std::endl; 152 | return EXIT_SUCCESS; 153 | } 154 | else if ( arg.uniqueArgPresent("h") || arg.uniqueArgPresent("help") ) 155 | { 156 | std::cerr << "This is " << PACKAGE_NAME << " version " << PACKAGE_VERSION << "." << std::endl; 157 | std::cerr << PACKAGE_NAME << " is distributed under version 3 of the GPL." << std::endl; 158 | std::cerr << "\n"; 159 | std::cerr << "usage: " << arg.progname << " [options] out.fasta\n"; 160 | std::cerr << "\n"; 161 | std::cerr << "The following options can be used (no space between option name and parameter allowed):\n\n"; 162 | std::cerr << helpMessage(arg); 163 | return EXIT_SUCCESS; 164 | } 165 | else 166 | { 167 | return spikenoise(arg); 168 | } 169 | } 170 | catch(std::exception const & ex) 171 | { 172 | std::cerr << ex.what() << std::endl; 173 | return EXIT_FAILURE; 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /src/rlastobam.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | daccord 3 | Copyright (C) 2018 German Tischler-Höhle 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | */ 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | void process( 30 | std::vector < libmaus2::dazzler::align::Overlap > & VOVL, 31 | libmaus2::dazzler::db::DatabaseFile & DBa, 32 | libmaus2::dazzler::db::DatabaseFile & DBb, 33 | int64_t const tspace, 34 | libmaus2::bambam::BamBlockWriterBase & writer, 35 | uint64_t const numthreads 36 | ) 37 | { 38 | std::vector < ::libmaus2::fastx::UCharBuffer::shared_ptr_type > VB(VOVL.size()); 39 | 40 | #if defined(_OPENMP) 41 | #pragma omp parallel for num_threads(numthreads) schedule(dynamic,1) 42 | #endif 43 | for ( uint64_t i = 0; i < VOVL.size(); ++i ) 44 | { 45 | libmaus2::dazzler::align::Overlap const & OVL = VOVL[i]; 46 | 47 | libmaus2::lcs::NPLinMem np; 48 | libmaus2::lcs::AlignmentTraceContainer ATC; 49 | libmaus2::autoarray::AutoArray< std::pair > Aopblocks; 50 | libmaus2::autoarray::AutoArray Aop; 51 | ::libmaus2::fastx::UCharBuffer::shared_ptr_type sbuffer(new ::libmaus2::fastx::UCharBuffer); 52 | ::libmaus2::fastx::UCharBuffer & buffer = *sbuffer; 53 | VB[i] = sbuffer; 54 | libmaus2::bambam::BamSeqEncodeTable seqenc; 55 | 56 | std::string const a = DBa.decodeRead(OVL.aread,false); 57 | std::string const b = DBb.decodeRead(OVL.bread,OVL.isInverse()); 58 | std::string const s = DBb.getReadName(OVL.bread); 59 | std::string const h = std::string(b.size(),'H'); 60 | 61 | OVL.computeTrace( 62 | reinterpret_cast(a.c_str()), 63 | reinterpret_cast(b.c_str()), 64 | tspace, 65 | ATC, 66 | np 67 | ); 68 | 69 | uint64_t const ciglen = libmaus2::bambam::CigarStringParser::traceToCigar(ATC,Aopblocks,Aop,0,OVL.path.bbpos,b.size() - OVL.path.bepos,0); 70 | 71 | libmaus2::bambam::BamAlignmentEncoderBase::encodeAlignment( 72 | buffer,seqenc, 73 | s.c_str(), 74 | s.size(), 75 | OVL.aread, 76 | OVL.path.abpos, 77 | 255 /* mapq */, 78 | OVL.isInverse() ? libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_FREVERSE : 0, 79 | Aop.begin(),ciglen, 80 | -1 /* nextrefid */, 81 | -1 /* nextpos */, 82 | 0 /* template length */, 83 | b.begin(), 84 | b.size(), 85 | h.begin() 86 | ); 87 | 88 | libmaus2::bambam::BamAlignmentEncoderBase::putAuxString(buffer,"RG","RGID"); 89 | libmaus2::bambam::BamAlignmentEncoderBase::putAuxNumber(buffer,"tr",'i',OVL.isTrue()); 90 | } 91 | 92 | for ( uint64_t i = 0; i < VOVL.size(); ++i ) 93 | { 94 | ::libmaus2::fastx::UCharBuffer & buffer = *(VB[i]); 95 | writer.writeBamBlock(buffer.buffer,buffer.length); 96 | } 97 | 98 | VOVL.resize(0); 99 | } 100 | 101 | int main(int argc, char * argv[]) 102 | { 103 | try 104 | { 105 | libmaus2::util::ArgParser const arg(argc,argv); 106 | libmaus2::util::ArgInfo arginfo(argc,argv); 107 | 108 | if ( arg.size() < 4 ) 109 | { 110 | std::cerr << "usage: " << argv[0] << " [<-tnumthreads>] >out.bam"; 111 | return EXIT_FAILURE; 112 | } 113 | 114 | std::string const dba = arg[0]; 115 | std::string const dbb = arg[1]; 116 | std::string const ref = arg[2]; 117 | std::string const las = arg[3]; 118 | 119 | uint64_t const numthreads = arg.uniqueArgPresent("t") ? arg.getUnsignedNumericArg("t") : libmaus2::parallel::NumCpus::getNumLogicalProcessors(); 120 | 121 | arginfo.replaceKey("level","0"); 122 | 123 | std::map M; 124 | { 125 | libmaus2::fastx::FastAReader R(ref); 126 | libmaus2::fastx::FastAReader::pattern_type pattern; 127 | for ( uint64_t i = 0; R.getNextPatternUnlocked(pattern); ++i ) 128 | M [ i ] = pattern.getShortStringId(); 129 | } 130 | 131 | libmaus2::dazzler::db::DatabaseFile DBa(dba); 132 | DBa.computeTrimVector(); 133 | libmaus2::dazzler::db::DatabaseFile DBb(dbb); 134 | DBb.computeTrimVector(); 135 | 136 | libmaus2::dazzler::align::AlignmentFileRegion::unique_ptr_type tptr(libmaus2::dazzler::align::OverlapIndexer::openAlignmentFileWithoutIndex(las)); 137 | libmaus2::dazzler::align::Overlap OVL; 138 | int64_t const tspace = libmaus2::dazzler::align::AlignmentFile::getTSpace(las); 139 | 140 | std::ostringstream headerstream; 141 | headerstream << "@HD\tVN:1.5\tSO:unknown\n"; 142 | for ( uint64_t i = 0; i < DBa.size(); ++i ) 143 | { 144 | uint64_t const l = DBa[i].size(); 145 | std::string const s = M.find(i)->second; 146 | headerstream << "@SQ\tSN:" << s << "\tLN:" << l << "\n"; 147 | } 148 | headerstream << "@RG\tID:RGID\tSM:SAMPLE\n"; 149 | 150 | libmaus2::bambam::BamHeader bamheader(headerstream.str()); 151 | libmaus2::bambam::BamBlockWriterBase::unique_ptr_type writer(libmaus2::bambam::BamBlockWriterBaseFactory::construct(bamheader, arginfo)); 152 | 153 | std::vector < libmaus2::dazzler::align::Overlap > VOVL; 154 | 155 | while ( tptr->getNextOverlap(OVL) ) 156 | { 157 | VOVL.push_back(OVL); 158 | 159 | if ( VOVL.size() >= 1024 ) 160 | { 161 | process(VOVL,DBa,DBb,tspace,*writer,numthreads); 162 | } 163 | 164 | } 165 | 166 | process(VOVL,DBa,DBb,tspace,*writer,numthreads); 167 | 168 | writer.reset(); 169 | } 170 | catch(std::exception const & ex) 171 | { 172 | std::cerr << ex.what() << std::endl; 173 | return EXIT_FAILURE; 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /src/encodegraph.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | daccord 3 | Copyright (C) 2017 German Tischler 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | */ 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | static void checkGraph(std::istream & istr, std::vector const & arg) 24 | { 25 | libmaus2::dazzler::align::GraphDecoder GD(istr); 26 | 27 | libmaus2::dazzler::align::GraphDecoderContext::shared_ptr_type scontext = GD.getContext(); 28 | libmaus2::dazzler::align::GraphDecoderContext & context = *scontext; 29 | 30 | for ( uint64_t i = 0; i < arg.size(); ++i ) 31 | { 32 | libmaus2::dazzler::align::AlignmentFileRegion::unique_ptr_type AF(libmaus2::dazzler::align::OverlapIndexer::openAlignmentFileWithoutIndex(arg[i])); 33 | libmaus2::dazzler::align::Overlap OVL; 34 | 35 | while ( AF->peekNextOverlap(OVL) ) 36 | { 37 | int64_t const aid = OVL.aread; 38 | std::vector < libmaus2::dazzler::align::Overlap > V; 39 | 40 | while ( AF->peekNextOverlap(OVL) && OVL.aread == aid ) 41 | { 42 | AF->getNextOverlap(OVL); 43 | V.push_back(OVL); 44 | } 45 | 46 | GD.decode(istr,aid,context); 47 | 48 | assert ( context.size() == V.size() ); 49 | 50 | for ( uint64_t j = 0; j < V.size(); ++j ) 51 | { 52 | assert ( V[j].isInverse() == context[j].inv ); 53 | assert ( V[j].aread == context[j].aread ); 54 | assert ( V[j].bread == context[j].bread ); 55 | assert ( V[j].path.abpos == context[j].abpos ); 56 | assert ( V[j].path.aepos == context[j].aepos ); 57 | assert ( V[j].path.bbpos == context[j].bbpos ); 58 | assert ( V[j].path.bepos == context[j].bepos ); 59 | assert ( V[j].path.diffs == context[j].diffs ); 60 | } 61 | } 62 | } 63 | 64 | GD.returnContext(scontext); 65 | 66 | } 67 | 68 | std::string getTmpFileBase(libmaus2::util::ArgParser const & arg) 69 | { 70 | std::string const tmpfilebase = arg.uniqueArgPresent("T") ? arg["T"] : libmaus2::util::ArgInfo::getDefaultTmpFileName(arg.progname); 71 | return tmpfilebase; 72 | } 73 | 74 | static int getDefaultVerbose() 75 | { 76 | return 0; 77 | } 78 | 79 | static int getDefaultCheck() 80 | { 81 | return 0; 82 | } 83 | 84 | template 85 | static std::string formatRHS(std::string const & description, default_type def) 86 | { 87 | std::ostringstream ostr; 88 | ostr << description << " (default " << def << ")"; 89 | return ostr.str(); 90 | } 91 | 92 | static std::string helpMessage(libmaus2::util::ArgParser const & arg) 93 | { 94 | std::vector < std::pair < std::string, std::string > > optionMap; 95 | optionMap . push_back ( std::pair < std::string, std::string >("verbose", formatRHS("verbosity",getDefaultVerbose()))); 96 | optionMap . push_back ( std::pair < std::string, std::string >("check", formatRHS("check graph after creating it",getDefaultCheck()))); 97 | optionMap . push_back ( std::pair < std::string, std::string >("T", formatRHS("temporary file prefix",libmaus2::util::ArgInfo::getDefaultTmpFileName(arg.progname)))); 98 | 99 | uint64_t maxlhs = 0; 100 | for ( std::vector < std::pair < std::string, std::string > >::const_iterator ita = optionMap.begin(); ita != optionMap.end(); ++ita ) 101 | { 102 | assert ( ita->first.size() ); 103 | 104 | if ( ita->first.size() == 1 ) 105 | maxlhs = std::max(maxlhs,static_cast(ita->first.size()+1)); 106 | else 107 | maxlhs = std::max(maxlhs,static_cast(ita->first.size()+2)); 108 | } 109 | 110 | std::ostringstream messtr; 111 | for ( std::vector < std::pair < std::string, std::string > >::const_iterator ita = optionMap.begin(); ita != optionMap.end(); ++ita ) 112 | { 113 | std::string const key = ita->first; 114 | 115 | messtr << "\t"; 116 | messtr << std::setw(maxlhs) << std::setfill(' '); 117 | if ( key.size() == 1 ) 118 | messtr << (std::string("-")+key); 119 | else 120 | messtr << (std::string("--")+key); 121 | 122 | messtr << std::setw(0); 123 | 124 | messtr << ": "; 125 | 126 | messtr << ita->second; 127 | messtr << "\n"; 128 | } 129 | 130 | return messtr.str(); 131 | } 132 | 133 | 134 | int main(int argc, char * argv[]) 135 | { 136 | try 137 | { 138 | libmaus2::util::ArgParser arg(argc,argv); 139 | 140 | if ( arg.uniqueArgPresent("v") || arg.uniqueArgPresent("version") ) 141 | { 142 | std::cerr << "This is " << PACKAGE_NAME << " version " << PACKAGE_VERSION << "." << std::endl; 143 | std::cerr << PACKAGE_NAME << " is distributed under version 3 of the GPL." << std::endl; 144 | return EXIT_SUCCESS; 145 | } 146 | else if ( arg.uniqueArgPresent("h") || arg.uniqueArgPresent("help") || arg.size() < 1 ) 147 | { 148 | std::cerr << "This is " << PACKAGE_NAME << " version " << PACKAGE_VERSION << "." << std::endl; 149 | std::cerr << PACKAGE_NAME << " is distributed under version 3 of the GPL." << std::endl; 150 | std::cerr << "\n"; 151 | std::cerr << "usage: " << arg.progname << " [options] out.graph in.las ...\n"; 152 | std::cerr << "\n"; 153 | std::cerr << "The following options can be used (no space between option name and parameter allowed):\n\n"; 154 | std::cerr << helpMessage(arg); 155 | return EXIT_SUCCESS; 156 | } 157 | else 158 | { 159 | libmaus2::timing::RealTimeClock rtc; 160 | rtc.start(); 161 | 162 | std::string const out = arg[0]; 163 | std::vector Vin; 164 | for ( uint64_t i = 1; i < arg.size(); ++i ) 165 | Vin.push_back(arg[i]); 166 | 167 | int const verbose = arg.uniqueArgPresent("verbose"); 168 | libmaus2::dazzler::align::GraphEncoder::encodegraph(out,Vin,getTmpFileBase(arg),verbose); 169 | 170 | std::cerr << "[V] processing time " << rtc.formatTime(rtc.getElapsedSeconds()) << std::endl; 171 | 172 | int const check = arg.uniqueArgPresent("check"); 173 | 174 | if ( check ) 175 | { 176 | std::cerr << "[V] checking graph..."; 177 | libmaus2::aio::InputStreamInstance istr(out); 178 | checkGraph(istr,Vin); 179 | std::cerr << "done." << std::endl; 180 | } 181 | 182 | return EXIT_SUCCESS; 183 | } 184 | } 185 | catch(std::exception const & ex) 186 | { 187 | std::cerr << ex.what() << std::endl; 188 | return EXIT_FAILURE; 189 | } 190 | } 191 | -------------------------------------------------------------------------------- /src/lasfilteralignments.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | daccord 3 | Copyright (C) 2016-2017 German Tischler 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | */ 18 | 19 | #include 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | static double getDefaultTermVal() 33 | { 34 | return 0.35; 35 | } 36 | 37 | struct NamedInterval : std::pair 38 | { 39 | std::string name; 40 | 41 | NamedInterval() {} 42 | NamedInterval( 43 | std::pair const & rI, 44 | std::string const & rname 45 | ) : std::pair(rI), name(rname) {} 46 | }; 47 | 48 | 49 | int lasfilteralignments(libmaus2::util::ArgParser const & arg) 50 | { 51 | double const termval = arg.uniqueArgPresent("e") ? arg.getParsedArg("e") : getDefaultTermVal(); 52 | std::string const tmpfilebase = arg.uniqueArgPresent("T") ? arg["T"] : libmaus2::util::ArgInfo::getDefaultTmpFileName(arg.progname); 53 | 54 | std::string const outfn = arg[0]; 55 | std::string const symkillfn = outfn + ".sym"; 56 | std::string const dbfn = arg[1]; 57 | 58 | std::vector Vinfn; 59 | for ( uint64_t i = 2; i < arg.size(); ++i ) 60 | Vinfn.push_back(arg[i]); 61 | 62 | int64_t const tspace = libmaus2::dazzler::align::AlignmentFile::getTSpace(Vinfn); 63 | 64 | libmaus2::dazzler::db::DatabaseFile DB(dbfn); 65 | if ( DB.part != 0 ) 66 | { 67 | std::cerr << "Partial databases are not supported." << std::endl; 68 | return EXIT_FAILURE; 69 | } 70 | 71 | DB.computeTrimVector(); 72 | 73 | std::vector RL; 74 | DB.getAllReadLengths(RL); 75 | 76 | libmaus2::dazzler::db::Track::unique_ptr_type Ptrack(DB.readTrack("inqual",0)); 77 | libmaus2::dazzler::align::OverlapProperCheck OPC(RL,*Ptrack,termval); 78 | libmaus2::aio::OutputStreamInstance::unique_ptr_type Psymkill(new libmaus2::aio::OutputStreamInstance(symkillfn)); 79 | 80 | std::vector < NamedInterval > VNI; 81 | 82 | libmaus2::dazzler::align::AlignmentWriter::unique_ptr_type AW(new libmaus2::dazzler::align::AlignmentWriter(outfn,tspace,false /* index */)); 83 | 84 | for ( uint64_t i = 0; i < Vinfn.size(); ++i ) 85 | { 86 | std::string const infn = Vinfn[i]; 87 | libmaus2::dazzler::align::AlignmentFileRegion::unique_ptr_type Plas(libmaus2::dazzler::align::OverlapIndexer::openAlignmentFileWithoutIndex(infn)); 88 | 89 | libmaus2::dazzler::align::Overlap OVL; 90 | 91 | int64_t prevread = -1; 92 | uint64_t kept = 0; 93 | uint64_t removed = 0; 94 | 95 | for ( uint64_t c = 0 ; Plas->getNextOverlap(OVL) ; ++c ) 96 | { 97 | if ( OVL.aread != prevread ) 98 | { 99 | if ( prevread >= 0 ) 100 | { 101 | if ( prevread > OVL.aread ) 102 | { 103 | libmaus2::exception::LibMausException lme; 104 | lme.getStream() << "[E] file " << infn << " is unsorted" << std::endl; 105 | lme.finish(); 106 | throw lme; 107 | } 108 | } 109 | 110 | if ( kept || removed ) 111 | std::cerr << "[V] " << prevread << " kept " <put(OVL); 122 | ++kept; 123 | } 124 | else 125 | { 126 | libmaus2::dazzler::align::OverlapInfo info = OVL.getHeader().getInfo().swapped(); 127 | 128 | if ( OVL.isInverse() ) 129 | { 130 | uint64_t const alen = (*(OPC.RL))[info.aread >> 1]; 131 | uint64_t const blen = (*(OPC.RL))[info.bread >> 1]; 132 | info = info.inverse(alen,blen); 133 | } 134 | 135 | assert ( (info.aread & 1) == 0 ); 136 | 137 | info.serialise(*Psymkill); 138 | 139 | ++removed; 140 | } 141 | } 142 | 143 | if ( kept || removed ) 144 | std::cerr << "[V] " << prevread << " kept " <::sort(outfn,tmpfilebase); 153 | 154 | Psymkill->flush(); 155 | Psymkill.reset(); 156 | 157 | libmaus2::sorting::SerialisingSortingBufferedOutputFile::sort(symkillfn,16*1024*1024); 158 | libmaus2::dazzler::align::OverlapInfoIndexer::createInfoIndex(symkillfn,DB.size()); 159 | libmaus2::dazzler::align::OverlapIndexer::constructIndex(outfn); 160 | 161 | return EXIT_SUCCESS; 162 | } 163 | 164 | int main(int argc, char * argv[]) 165 | { 166 | try 167 | { 168 | libmaus2::util::ArgParser arg(argc,argv); 169 | 170 | if ( arg.uniqueArgPresent("v") || arg.uniqueArgPresent("version") ) 171 | { 172 | std::cerr << "This is " << PACKAGE_NAME << " version " << PACKAGE_VERSION << "." << std::endl; 173 | std::cerr << PACKAGE_NAME << " is distributed under version 3 of the GPL." << std::endl; 174 | return EXIT_SUCCESS; 175 | } 176 | else if ( arg.uniqueArgPresent("h") || arg.uniqueArgPresent("help") || arg.size() < 3 ) 177 | { 178 | std::cerr << "This is " << PACKAGE_NAME << " version " << PACKAGE_VERSION << "." << std::endl; 179 | std::cerr << PACKAGE_NAME << " is distributed under version 3 of the GPL." << std::endl; 180 | std::cerr << "\n"; 181 | std::cerr << "usage: " << arg.progname << " [options] out.las in.db in1.las ...\n"; 182 | std::cerr << std::endl; 183 | std::cerr << "optional parameters:" << std::endl << std::endl; 184 | std::cerr << " -e: error threshold for proper alignment termination (default: " << getDefaultTermVal() << ")" << std::endl; 185 | return EXIT_SUCCESS; 186 | } 187 | else 188 | { 189 | return lasfilteralignments(arg); 190 | } 191 | } 192 | catch(std::exception const & ex) 193 | { 194 | std::cerr << ex.what() << std::endl; 195 | return EXIT_FAILURE; 196 | } 197 | } 198 | -------------------------------------------------------------------------------- /src/bamfilterlongest.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | daccord 3 | Copyright (C) 2018 German Tischler-Höhle 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | */ 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | 38 | #include 39 | 40 | static uint64_t getDefaultNumThreads() 41 | { 42 | return libmaus2::parallel::NumCpus::getNumLogicalProcessors(); 43 | } 44 | 45 | std::string getTmpFileBase(libmaus2::util::ArgParser const & arg) 46 | { 47 | std::string const tmpfilebase = arg.uniqueArgPresent("T") ? arg["T"] : libmaus2::util::ArgInfo::getDefaultTmpFileName(arg.progname); 48 | return tmpfilebase; 49 | } 50 | 51 | ::libmaus2::bambam::BamHeader::unique_ptr_type updateHeader( 52 | ::libmaus2::util::ArgInfo const & arginfo, 53 | ::libmaus2::bambam::BamHeader const & header 54 | ) 55 | { 56 | std::string const headertext(header.text); 57 | 58 | // add PG line to header 59 | std::string const upheadtext = ::libmaus2::bambam::ProgramHeaderLineSet::addProgramLine( 60 | headertext, 61 | "bammerge", // ID 62 | "bammerge", // PN 63 | arginfo.commandline, // CL 64 | ::libmaus2::bambam::ProgramHeaderLineSet(headertext).getLastIdInChain(), // PP 65 | std::string(PACKAGE_VERSION) // VN 66 | ); 67 | // construct new header 68 | ::libmaus2::bambam::BamHeader::unique_ptr_type uphead(new ::libmaus2::bambam::BamHeader(upheadtext)); 69 | 70 | return UNIQUE_PTR_MOVE(uphead); 71 | } 72 | 73 | struct SimpleThreadPoolTerminate 74 | { 75 | libmaus2::parallel::SimpleThreadPool * STP; 76 | 77 | SimpleThreadPoolTerminate(libmaus2::parallel::SimpleThreadPool * rSTP) 78 | : STP(rSTP) {} 79 | 80 | ~SimpleThreadPoolTerminate() 81 | { 82 | if ( STP ) 83 | { 84 | STP->terminate(); 85 | STP->join(); 86 | } 87 | } 88 | }; 89 | 90 | int main(int argc, char * argv[]) 91 | { 92 | try 93 | { 94 | libmaus2::util::ArgParser const arg(argc,argv); 95 | 96 | // number of threads 97 | uint64_t const numthreads = arg.uniqueArgPresent("t") ? arg.getUnsignedNumericArg("t") : getDefaultNumThreads(); 98 | 99 | std::string const tmpfilebase = getTmpFileBase(arg); 100 | 101 | libmaus2::util::ArgInfo arginfo(argc,argv); 102 | { 103 | std::ostringstream ostr; 104 | ostr << numthreads; 105 | arginfo.replaceKey("inputthreads",ostr.str()); 106 | arginfo.replaceKey("outputthreads",ostr.str()); 107 | } 108 | 109 | libmaus2::parallel::SimpleThreadPool::unique_ptr_type PSTP; 110 | if ( numthreads > 1 ) 111 | { 112 | libmaus2::parallel::SimpleThreadPool::unique_ptr_type TSTP( 113 | new libmaus2::parallel::SimpleThreadPool(numthreads) 114 | ); 115 | PSTP = UNIQUE_PTR_MOVE(TSTP); 116 | 117 | libmaus2::bambam::BamAlignmentDecoderFactory::setThreadPool(PSTP.get()); 118 | } 119 | 120 | SimpleThreadPoolTerminate STPT(PSTP ? PSTP.get() : 0); 121 | 122 | libmaus2::bambam::BamAlignmentDecoderWrapper::unique_ptr_type decwrapper( 123 | libmaus2::bambam::BamMultiAlignmentDecoderFactory::construct( 124 | arginfo,false, // do not put rank 125 | 0, /* copy stream */ 126 | std::cin, /* standard input */ 127 | true, /* concatenate instead of merging */ 128 | false /* streaming */ 129 | ) 130 | ); 131 | ::libmaus2::bambam::BamAlignmentDecoder * ppdec = &(decwrapper->getDecoder()); 132 | ::libmaus2::bambam::BamAlignmentDecoder & dec = *ppdec; 133 | ::libmaus2::bambam::BamHeader const & header = dec.getHeader(); 134 | 135 | std::string const headertext(header.text); 136 | 137 | // add PG line to header 138 | std::string const upheadtext = ::libmaus2::bambam::ProgramHeaderLineSet::addProgramLine( 139 | headertext, 140 | "bamfilterlongest", // ID 141 | "bamfilterlongest", // PN 142 | arg.commandline, // CL 143 | ::libmaus2::bambam::ProgramHeaderLineSet(headertext).getLastIdInChain(), // PP 144 | std::string(PACKAGE_VERSION) // VN 145 | ); 146 | // construct new header 147 | ::libmaus2::bambam::BamHeader uphead(upheadtext); 148 | 149 | libmaus2::bambam::BamPeeker BP(dec); 150 | 151 | libmaus2::bambam::BamAlignment algn; 152 | 153 | std::vector< ::libmaus2::lz::BgzfDeflateOutputCallback * > * Pcbs = 0; 154 | libmaus2::bambam::BamBlockWriterBase::unique_ptr_type Pwriter( 155 | libmaus2::bambam::BamBlockWriterBaseFactory::construct(uphead,arginfo,Pcbs)); 156 | 157 | while ( BP.peekNext(algn) ) 158 | { 159 | int64_t const refid = algn.getRefID(); 160 | 161 | std::vector < libmaus2::bambam::BamAlignment::shared_ptr_type > V; 162 | 163 | std::map < std::string, uint64_t> M; 164 | 165 | while ( BP.peekNext(algn) && algn.getRefID() == refid ) 166 | { 167 | BP.getNext(algn); 168 | libmaus2::bambam::BamAlignment::shared_ptr_type sptr(algn.sclone()); 169 | V.push_back(sptr); 170 | 171 | std::string const name = algn.getName(); 172 | 173 | if ( M.find(name) == M.end() ) 174 | { 175 | M[name] = V.size()-1; 176 | } 177 | else 178 | { 179 | uint64_t const previd = M.find(name)->second; 180 | 181 | if ( algn.getReferenceLength() > V[previd]->getReferenceLength() ) 182 | M[name] = V.size()-1; 183 | } 184 | } 185 | 186 | for ( std::map < std::string, uint64_t>::const_iterator it = M.begin(); 187 | it != M.end(); ++it ) 188 | { 189 | Pwriter->writeAlignment(*V[it->second]); 190 | } 191 | } 192 | 193 | Pwriter.reset(); 194 | } 195 | catch(std::exception const & ex) 196 | { 197 | std::cerr << ex.what() << std::endl; 198 | return EXIT_FAILURE; 199 | } 200 | } 201 | -------------------------------------------------------------------------------- /src/filtersym.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | daccord 3 | Copyright (C) 2016-2017 German Tischler 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | */ 18 | #include 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | 34 | void filterBySym(libmaus2::util::ArgParser const & arg, std::string const outfn, std::string const symkillmerge) 35 | { 36 | int64_t const tspace = libmaus2::dazzler::align::AlignmentFile::getTSpace(outfn); 37 | bool const verbose = arg.uniqueArgPresent("verbose"); 38 | 39 | int64_t firstaread = -1; 40 | 41 | { 42 | libmaus2::dazzler::align::AlignmentFileRegion::unique_ptr_type Ain(libmaus2::dazzler::align::OverlapIndexer::openAlignmentFileWithoutIndex(outfn)); 43 | libmaus2::dazzler::align::Overlap OVL; 44 | if ( Ain->peekNextOverlap(OVL) ) 45 | firstaread = OVL.aread; 46 | } 47 | 48 | if ( firstaread < 0 ) 49 | return; 50 | 51 | uint64_t p = 0; 52 | 53 | { 54 | std::string const indexfn = symkillmerge + ".index"; 55 | libmaus2::aio::InputStreamInstance ISI(indexfn); 56 | ISI.clear(); 57 | ISI.seekg(firstaread * sizeof(uint64_t)); 58 | p = libmaus2::util::NumberSerialisation::deserialiseNumber(ISI); 59 | } 60 | 61 | libmaus2::dazzler::align::AlignmentFileRegion::unique_ptr_type Ain(libmaus2::dazzler::align::OverlapIndexer::openAlignmentFileWithoutIndex(outfn)); 62 | libmaus2::aio::InputStreamInstance ISIsymkill(symkillmerge); 63 | ISIsymkill.clear(); 64 | ISIsymkill.seekg(p); 65 | libmaus2::aio::SerialisedPeeker AP(ISIsymkill); 66 | libmaus2::dazzler::align::Overlap OVL; 67 | libmaus2::dazzler::align::OverlapInfo info; 68 | 69 | std::string const tmpoutfn = outfn + ".tmp"; 70 | libmaus2::util::TempFileRemovalContainer::addTempFile(tmpoutfn); 71 | libmaus2::dazzler::align::AlignmentWriter::unique_ptr_type AWtmp(new libmaus2::dazzler::align::AlignmentWriter(tmpoutfn,tspace,false /* index */)); 72 | 73 | libmaus2::dazzler::align::OverlapInfo OVLprev; 74 | bool OVLprevvalid = false; 75 | libmaus2::dazzler::align::OverlapInfo infoprev; 76 | bool infoprevvalid = false; 77 | 78 | while ( Ain->peekNextOverlap(OVL) && AP.peekNext(info) ) 79 | { 80 | libmaus2::dazzler::align::OverlapInfo const OVLinfo = OVL.getHeader().getInfo(); 81 | 82 | bool const ovlok = (!OVLprevvalid) || (OVLprev < OVLinfo); 83 | bool const infook = (!infoprevvalid) || (infoprev < info); 84 | 85 | if ( ! ovlok ) 86 | { 87 | libmaus2::exception::LibMausException lme; 88 | lme.getStream() << "[E] error in overlap stream " << OVLprev << " " << OVLinfo << std::endl; 89 | lme.finish(); 90 | throw lme; 91 | } 92 | if ( ! infook ) 93 | { 94 | libmaus2::exception::LibMausException lme; 95 | lme.getStream() << "[E] error in filter stream " << infoprev << " " << info << std::endl; 96 | lme.finish(); 97 | throw lme; 98 | } 99 | 100 | OVLprev = OVLinfo; 101 | OVLprevvalid = true; 102 | infoprev = info; 103 | infoprevvalid = true; 104 | 105 | if ( OVLinfo < info ) 106 | { 107 | while ( Ain->peekNextOverlap(OVL) && OVL.getHeader().getInfo() == OVLprev ) 108 | { 109 | Ain->getNextOverlap(OVL); 110 | AWtmp->put(OVL); 111 | } 112 | infoprevvalid = false; 113 | } 114 | else if ( info < OVLinfo ) 115 | { 116 | while ( AP.peekNext(info) && info == infoprev ) 117 | { 118 | AP.getNext(info); 119 | } 120 | OVLprevvalid = false; 121 | } 122 | else 123 | { 124 | assert ( info == OVLinfo ); 125 | 126 | while ( Ain->peekNextOverlap(OVL) && OVL.getHeader().getInfo() == OVLprev ) 127 | Ain->getNextOverlap(OVL); 128 | while ( AP.peekNext(info) && info == infoprev ) 129 | AP.getNext(info); 130 | } 131 | 132 | } 133 | 134 | while ( Ain->getNextOverlap(OVL) ) 135 | { 136 | libmaus2::dazzler::align::OverlapInfo const OVLinfo = OVL.getHeader().getInfo(); 137 | bool const ovlok = (!OVLprevvalid) || (OVLprev < OVLinfo); 138 | 139 | if ( ! ovlok ) 140 | { 141 | libmaus2::exception::LibMausException lme; 142 | lme.getStream() << "[E] error in overlap stream " << OVLprev << " " << OVLinfo << std::endl; 143 | lme.finish(); 144 | throw lme; 145 | } 146 | 147 | OVLprev = OVLinfo; 148 | OVLprevvalid = true; 149 | 150 | AWtmp->put(OVL); 151 | } 152 | 153 | AWtmp.reset(); 154 | 155 | libmaus2::aio::OutputStreamFactoryContainer::rename(tmpoutfn,outfn); 156 | libmaus2::dazzler::align::OverlapIndexer::constructIndex(outfn); 157 | } 158 | 159 | int filtersym(libmaus2::util::ArgParser const & arg) 160 | { 161 | std::string const outfn = arg[0]; 162 | std::string const symfn = arg[1]; 163 | std::string const tmpfilebase = arg.uniqueArgPresent("T") ? arg["T"] : libmaus2::util::ArgInfo::getDefaultTmpFileName(arg.progname); 164 | 165 | filterBySym(arg,outfn,symfn); 166 | 167 | return EXIT_SUCCESS; 168 | } 169 | 170 | int main(int argc, char * argv[]) 171 | { 172 | try 173 | { 174 | libmaus2::util::ArgParser arg(argc,argv); 175 | 176 | if ( arg.uniqueArgPresent("v") || arg.uniqueArgPresent("version") ) 177 | { 178 | std::cerr << "This is " << PACKAGE_NAME << " version " << PACKAGE_VERSION << "." << std::endl; 179 | std::cerr << PACKAGE_NAME << " is distributed under version 3 of the GPL." << std::endl; 180 | return EXIT_SUCCESS; 181 | } 182 | else if ( arg.uniqueArgPresent("h") || arg.uniqueArgPresent("help") || arg.size() < 2 ) 183 | { 184 | std::cerr << "This is " << PACKAGE_NAME << " version " << PACKAGE_VERSION << "." << std::endl; 185 | std::cerr << PACKAGE_NAME << " is distributed under version 3 of the GPL." << std::endl; 186 | std::cerr << "\n"; 187 | std::cerr << "usage: " << arg.progname << " [options] in.las in.sym\n"; 188 | std::cerr << std::endl; 189 | std::cerr << "optional parameters:" << std::endl << std::endl; 190 | return EXIT_SUCCESS; 191 | } 192 | else 193 | { 194 | return filtersym(arg); 195 | } 196 | } 197 | catch(std::exception const & ex) 198 | { 199 | std::cerr << ex.what() << std::endl; 200 | return EXIT_FAILURE; 201 | } 202 | } 203 | -------------------------------------------------------------------------------- /src/Makefile.am: -------------------------------------------------------------------------------- 1 | ACLOCAL_AMFLAGS=-I m4 2 | 3 | noinst_HEADERS = \ 4 | ActiveElement.hpp \ 5 | ChainSet.hpp \ 6 | ComputeOffsetLikely.hpp \ 7 | DebruijnGraphBase.hpp \ 8 | DebruijnGraphContainer.hpp \ 9 | DebruijnGraph.hpp \ 10 | DebruijnGraphInterface.hpp \ 11 | DecodedReadContainer.hpp \ 12 | DotProduct.hpp \ 13 | HandleContext.hpp \ 14 | HetThreshold.hpp \ 15 | Links.hpp \ 16 | Node.hpp \ 17 | OffsetLikely.hpp \ 18 | Repeat.hpp \ 19 | RepeatIdComparator.hpp \ 20 | TraceType.hpp 21 | 22 | MANPAGES = 23 | 24 | man_MANS = ${MANPAGES} 25 | 26 | EXTRA_DIST = ${MANPAGES} 27 | 28 | bin_PROGRAMS = daccord computeintrinsicqv lasdetectsimplerepeats lasfilteralignments lasfilteralignmentsborderrepeats maftobam generateperfectpiles bamidrename \ 29 | checklas checkconsensus sortfasta mapconstoraw fillfasta canutolas spikenoise computeextrinsicqv marktrue encodegraph \ 30 | split_agr split_dis filterchains wgsimtobam rlastobam bamfilterlongest computeintrinsicqv2 filtersym wmap 31 | 32 | 33 | daccord_SOURCES = daccord.cpp 34 | daccord_LDADD = ${LIBMAUS2LIBS} 35 | daccord_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} 36 | daccord_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} 37 | 38 | computeintrinsicqv_SOURCES = computeintrinsicqv.cpp 39 | computeintrinsicqv_LDADD = ${LIBMAUS2LIBS} 40 | computeintrinsicqv_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} 41 | computeintrinsicqv_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} 42 | 43 | lasdetectsimplerepeats_SOURCES = lasdetectsimplerepeats.cpp 44 | lasdetectsimplerepeats_LDADD = ${LIBMAUS2LIBS} 45 | lasdetectsimplerepeats_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} 46 | lasdetectsimplerepeats_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} 47 | 48 | lasfilteralignments_SOURCES = lasfilteralignments.cpp 49 | lasfilteralignments_LDADD = ${LIBMAUS2LIBS} 50 | lasfilteralignments_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} 51 | lasfilteralignments_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} 52 | 53 | lasfilteralignmentsborderrepeats_SOURCES = lasfilteralignmentsborderrepeats.cpp 54 | lasfilteralignmentsborderrepeats_LDADD = ${LIBMAUS2LIBS} 55 | lasfilteralignmentsborderrepeats_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} 56 | lasfilteralignmentsborderrepeats_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} 57 | 58 | maftobam_SOURCES = maftobam.cpp 59 | maftobam_LDADD = ${LIBMAUS2LIBS} 60 | maftobam_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} 61 | maftobam_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} 62 | 63 | generateperfectpiles_SOURCES = generateperfectpiles.cpp 64 | generateperfectpiles_LDADD = ${LIBMAUS2LIBS} 65 | generateperfectpiles_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} 66 | generateperfectpiles_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} 67 | 68 | bamidrename_SOURCES = bamidrename.cpp 69 | bamidrename_LDADD = ${LIBMAUS2LIBS} 70 | bamidrename_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} 71 | bamidrename_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} 72 | 73 | checklas_SOURCES = checklas.cpp 74 | checklas_LDADD = ${LIBMAUS2LIBS} 75 | checklas_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} 76 | checklas_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} 77 | 78 | checkconsensus_SOURCES = checkconsensus.cpp 79 | checkconsensus_LDADD = ${LIBMAUS2LIBS} 80 | checkconsensus_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} 81 | checkconsensus_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} 82 | 83 | sortfasta_SOURCES = sortfasta.cpp 84 | sortfasta_LDADD = ${LIBMAUS2LIBS} 85 | sortfasta_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} 86 | sortfasta_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} 87 | 88 | mapconstoraw_SOURCES = mapconstoraw.cpp 89 | mapconstoraw_LDADD = ${LIBMAUS2LIBS} 90 | mapconstoraw_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} 91 | mapconstoraw_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} 92 | 93 | fillfasta_SOURCES = fillfasta.cpp 94 | fillfasta_LDADD = ${LIBMAUS2LIBS} 95 | fillfasta_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} 96 | fillfasta_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} 97 | 98 | canutolas_SOURCES = canutolas.cpp 99 | canutolas_LDADD = ${LIBMAUS2LIBS} 100 | canutolas_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} 101 | canutolas_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} 102 | 103 | spikenoise_SOURCES = spikenoise.cpp 104 | spikenoise_LDADD = ${LIBMAUS2LIBS} 105 | spikenoise_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} 106 | spikenoise_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} 107 | 108 | computeextrinsicqv_SOURCES = computeextrinsicqv.cpp 109 | computeextrinsicqv_LDADD = ${LIBMAUS2LIBS} 110 | computeextrinsicqv_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} 111 | computeextrinsicqv_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} 112 | 113 | marktrue_SOURCES = marktrue.cpp 114 | marktrue_LDADD = ${LIBMAUS2LIBS} 115 | marktrue_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} 116 | marktrue_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} 117 | 118 | encodegraph_SOURCES = encodegraph.cpp 119 | encodegraph_LDADD = ${LIBMAUS2LIBS} 120 | encodegraph_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} 121 | encodegraph_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} 122 | 123 | split_agr_SOURCES = split_agr.cpp 124 | split_agr_LDADD = ${LIBMAUS2LIBS} 125 | split_agr_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} 126 | split_agr_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} 127 | 128 | split_dis_SOURCES = split_dis.cpp 129 | split_dis_LDADD = ${LIBMAUS2LIBS} 130 | split_dis_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} 131 | split_dis_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} 132 | 133 | filterchains_SOURCES = filterchains.cpp 134 | filterchains_LDADD = ${LIBMAUS2LIBS} 135 | filterchains_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} 136 | filterchains_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} 137 | 138 | wgsimtobam_SOURCES = wgsimtobam.cpp 139 | wgsimtobam_LDADD = ${LIBMAUS2LIBS} 140 | wgsimtobam_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} 141 | wgsimtobam_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} 142 | 143 | rlastobam_SOURCES = rlastobam.cpp 144 | rlastobam_LDADD = ${LIBMAUS2LIBS} 145 | rlastobam_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} 146 | rlastobam_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} 147 | 148 | bamfilterlongest_SOURCES = bamfilterlongest.cpp 149 | bamfilterlongest_LDADD = ${LIBMAUS2LIBS} 150 | bamfilterlongest_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} 151 | bamfilterlongest_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} 152 | 153 | computeintrinsicqv2_SOURCES = computeintrinsicqv2.cpp 154 | computeintrinsicqv2_LDADD = ${LIBMAUS2LIBS} 155 | computeintrinsicqv2_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} 156 | computeintrinsicqv2_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} 157 | 158 | filtersym_SOURCES = filtersym.cpp 159 | filtersym_LDADD = ${LIBMAUS2LIBS} 160 | filtersym_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} 161 | filtersym_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} 162 | 163 | wmap_SOURCES = wmap.cpp 164 | wmap_LDADD = ${LIBMAUS2LIBS} 165 | wmap_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} 166 | wmap_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} 167 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | AC_INIT(daccord,0.0.14,[tischler@mpi-cbg.de],[daccord],[http://www.mpi-cbg.de]) 2 | AC_CANONICAL_SYSTEM 3 | AC_PROG_LIBTOOL 4 | 5 | AM_INIT_AUTOMAKE([std-options subdir-objects parallel-tests]) 6 | AM_CONFIG_HEADER(config.h) 7 | AC_CONFIG_MACRO_DIR([m4]) 8 | 9 | AC_CANONICAL_SYSTEM 10 | 11 | AC_ARG_ENABLE(optimization, 12 | AS_HELP_STRING([--enable-optimization],[use compiler optimization (default yes)]), 13 | [optimization=${enableval}],[optimization=yes]) 14 | AC_ARG_ENABLE(debug, 15 | AS_HELP_STRING([--enable-debug],[use compiler debug flags (default no)]), 16 | [debug=${enableval}],[debug=no]) 17 | AC_ARG_ENABLE(profile, 18 | AS_HELP_STRING([--enable-profile],[use compiler profiling flags (default no)]), 19 | [profile=${enableval}],[profile=no]) 20 | AC_ARG_ENABLE(fast, 21 | AS_HELP_STRING([--enable-fast],[disable evaluation of assertions (default no)]), 22 | [fast=${enableval}],[fast=no]) 23 | 24 | # check for C compiler 25 | CFLAGS_SAVE="${CFLAGS}" 26 | CFLAGS="" 27 | AC_PROG_CC 28 | CFLAGS="${CFLAGS_SAVE}" 29 | 30 | CFLAGS_SAVE="${CFLAGS}" 31 | CFLAGS="" 32 | CXXFLAGS_SAVE="${CXXFLAGS}" 33 | CXXFLAGS="" 34 | AM_PROG_CC_C_O 35 | CFLAGS="${CFLAGS_SAVE}" 36 | CXXFLAGS="${CXXFLAGS_SAVE}" 37 | 38 | # check for C++ compiler 39 | CXXFLAGS_SAVE="${CXXFLAGS}" 40 | CXXFLAGS="" 41 | AC_PROG_CXX 42 | CXXFLAGS="${CXXFLAGS_SAVE}" 43 | 44 | if test "${debug}" = "yes" ; then 45 | CXXDEBUGFLAGS="-g -O0" 46 | CDEBUGFLAGS="-g -O0" 47 | 48 | case ${CC} in 49 | gcc) 50 | CXXDEBUGFLAGS="${CXXDEBUGFLAGS} -rdynamic" 51 | CDEBUGFLAGS="${CDEBUGFLAGS} -rdynamic" 52 | ;; 53 | *-mingw32msvc-gcc) 54 | CXXDEBUGFLAGS="${CXXDEBUGFLAGS} -rdynamic" 55 | CDEBUGFLAGS="${CDEBUGFLAGS} -rdynamic" 56 | ;; 57 | esac 58 | 59 | CXXFLAGS="${CXXFLAGS} ${CXXDEBUGFLAGS}" 60 | CFLAGS="${CFLAGS} ${CDEBUGFLAGS}" 61 | openmp="no" 62 | 63 | AC_LANG_PUSH([C++]) 64 | CXXFLAGS_SAVE="${CXXFLAGS}" 65 | CXXFLAGS="-fsanitize=address" 66 | AC_MSG_CHECKING([whether the C++ compiler supports the -fsanitize=address flag]) 67 | AC_TRY_COMPILE([],[],[sanitizeaddress=yes],[sanitizeaddress=no]) 68 | AC_MSG_RESULT([${sanitizeaddress}]) 69 | CXXFLAGS="${CXXFLAGS_SAVE}" 70 | AC_LANG_POP 71 | 72 | if test "${sanitizeaddress}" = "yes" ; then 73 | CXXFLAGS="${CXXFLAGS} -fsanitize=address" 74 | CFLAGS="${CFLAGS} -fsanitize=address" 75 | fi 76 | 77 | AC_LANG_PUSH([C++]) 78 | CXXFLAGS_SAVE="${CXXFLAGS}" 79 | CXXFLAGS="-fstack-protector" 80 | AC_MSG_CHECKING([whether the C++ compiler supports the -fstack-protector flag]) 81 | AC_TRY_COMPILE([],[],[stackprotect=yes],[stackprotect=no]) 82 | AC_MSG_RESULT([${stackprotect}]) 83 | CXXFLAGS="${CXXFLAGS_SAVE}" 84 | AC_LANG_POP 85 | 86 | if test "${stackprotect}" = "yes" ; then 87 | STACKPROTECT="-fstack-protector" 88 | CXXFLAGS="${CXXFLAGS} ${STACKPROTECT}" 89 | CFLAGS="${CFLAGS} ${STACKPROTECT}" 90 | fi 91 | 92 | 93 | AC_MSG_NOTICE([Adding debug options ${CDEBUGFLAGS} to C compiler ${CC} flags, full list now ${CFLAGS}]) 94 | AC_MSG_NOTICE([Adding debug options ${CXXDEBUGFLAGS} to C compiler ${CXX} flags, full list now ${CXXFLAGS}]) 95 | else 96 | if test "${profile}" = "yes" ; then 97 | CXXPROFFLAGS="-g -pg" 98 | CPROFFLAGS="-g -pg" 99 | 100 | CXXFLAGS="${CXXPROFFLAGS} -O3 -rdynamic" 101 | CFLAGS="${CPROFFLAGS} -O3 -rdynamic" 102 | 103 | AC_MSG_NOTICE([Adding profiling options ${CPROFFLAGS} to C compiler ${CC} flags, full list now ${CFLAGS}]) 104 | AC_MSG_NOTICE([Adding profiling options ${CXXPROFFLAGS} to C compiler ${CXX} flags, full list now ${CXXFLAGS}]) 105 | else 106 | if test "${optimization}" = "yes" ; then 107 | COPTFLAGS= 108 | case ${CC} in 109 | gcc) 110 | COPTFLAGS="-O3 -g -rdynamic" 111 | ;; 112 | *-mingw32msvc-gcc) 113 | COPTFLAGS="-O3 -g -rdynamic" 114 | ;; 115 | cl.exe) 116 | COPTFLAGS="-O2 -Ob2 -Ot -Oy" 117 | ;; 118 | esac 119 | CFLAGS="${CFLAGS} ${COPTFLAGS}" 120 | AC_MSG_NOTICE([Adding optimisation options ${COPTFLAGS} to C compiler ${CC} flags, full list now ${CFLAGS}]) 121 | 122 | CXXOPTFLAGS= 123 | case ${CXX} in 124 | g++) 125 | CXXOPTFLAGS="-O3 -g -rdynamic" 126 | ;; 127 | *-mingw32msvc-g++) 128 | CXXOPTFLAGS="-O3 -g -rdynamic" 129 | ;; 130 | cl.exe) 131 | CXXOPTFLAGS="-O2 -Ob2 -Ot -Oy" 132 | ;; 133 | esac 134 | CXXFLAGS="${CXXFLAGS} ${CXXOPTFLAGS}" 135 | 136 | AC_MSG_NOTICE([Adding optimisation options ${CXXOPTFLAGS} to C++ compiler ${CXX} flags, full list now ${CXXFLAGS}]) 137 | fi 138 | fi 139 | fi 140 | 141 | 142 | # check for pkg-config tool 143 | PKG_PROG_PKG_CONFIG 144 | 145 | if test "${fast}" = "yes" ; then 146 | CPPFLAGS="${CPPFLAGS} -DNDEBUG" 147 | fi 148 | 149 | # check for standard headers 150 | AC_LANG_PUSH([C++]) 151 | AC_HEADER_STDC 152 | AC_LANG_POP 153 | 154 | if test "${CXX}" = "g++" ; then 155 | CPPFLAGS="${CPPFLAGS} -W -Wall" 156 | fi 157 | 158 | AC_ARG_WITH([libmaus2], 159 | [AS_HELP_STRING([--with-libmaus2@<:@=PATH@:>@], [path to installed libmaus2 library @<:@default=@:>@])], 160 | [with_libmaus2=${withval}], 161 | [with_libmaus2=]) 162 | 163 | if test ! -z "${with_libmaus2}" ; then 164 | PKGCONFIGPATHSAVE="${PKG_CONFIG_PATH}" 165 | if test -z "${PKG_CONFIG_PATH}" ; then 166 | export PKG_CONFIG_PATH="${with_libmaus2}/lib/pkgconfig" 167 | else 168 | export PKG_CONFIG_PATH="${with_libmaus2}/lib/pkgconfig:${PKG_CONFIG_PATH}" 169 | fi 170 | fi 171 | 172 | PKG_CHECK_MODULES([libmaus2],[libmaus2 >= 2.0.352]) 173 | 174 | if test ! -z "${with_libmaus2}" ; then 175 | if test ! -z "${PKGCONFIGPATHSAVE}" ; then 176 | export PKG_CONFIG_PATH="${PKGCONFIGPATHSAVE}" 177 | fi 178 | fi 179 | 180 | LIBMAUS2CPPFLAGS="${libmaus2_CFLAGS}" 181 | LIBMAUS2LIBS="${libmaus2_LIBS}" 182 | 183 | CPPFLAGS_SAVE="${CPPFLAGS}" 184 | LDFLAGS_SAVE="${LDFLAGS}" 185 | LIBS_SAVE="${LIBS}" 186 | 187 | CPPFLAGS="${CPPFLAGS} ${libmaus2_CFLAGS}" 188 | LIBS="${LIBS} ${libmaus2_LIBS}" 189 | 190 | AC_LANG_PUSH([C++]) 191 | AC_MSG_CHECKING([whether we can compile a program using libmaus2]) 192 | AC_LINK_IFELSE([AC_LANG_SOURCE([ 193 | #include 194 | 195 | int main(int argc, char * argv[[]]) { 196 | ::libmaus2::util::ArgInfo const arginfo(argc,argv); 197 | return 0; 198 | }])], 199 | have_libmaus2=yes, 200 | have_libmaus2=no 201 | ) 202 | AC_LANG_POP 203 | AC_MSG_RESULT($have_libmaus2) 204 | 205 | if test "${have_libmaus2}" = "no" ; then 206 | AC_MSG_ERROR([Required libmaus2 is not available.]) 207 | fi 208 | 209 | AC_LANG_PUSH([C++]) 210 | AC_MSG_CHECKING([whether libmaus2 was compiled with support for the GMP library]) 211 | AC_LINK_IFELSE([AC_LANG_SOURCE([ 212 | #include 213 | 214 | int main(int argc, char * argv[[]]) { 215 | ::libmaus2::util::ArgInfo const arginfo(argc,argv); 216 | #if ! defined(LIBMAUS2_HAVE_GMP) 217 | #error "libmaus2 has no support for GMP compiled in" 218 | #endif 219 | return 0; 220 | }])], 221 | have_libmaus2_gmp=yes, 222 | have_libmaus2_gmp=no 223 | ) 224 | AC_LANG_POP 225 | AC_MSG_RESULT($have_libmaus2_gmp) 226 | 227 | if test "${have_libmaus2_gmp}" = "no" ; then 228 | AC_MSG_ERROR([Required GMP support in libmaus2 is not available.]) 229 | fi 230 | 231 | AC_MSG_NOTICE([Using flags ${CFLAGS} for C compiler ${CC}]) 232 | AC_MSG_NOTICE([Using flags ${CXXFLAGS} for C++ compiler ${CXX}]) 233 | 234 | PACKAGE_NAME=${PACKAGE} 235 | PACKAGE_VERSION=${VERSION} 236 | 237 | AC_SUBST([PACKAGE_NAME]) 238 | AC_SUBST([PACKAGE_VERSION]) 239 | # libmaus2 240 | AC_SUBST([LIBMAUS2CPPFLAGS]) 241 | AC_SUBST([LIBMAUS2LIBS]) 242 | # 243 | AC_OUTPUT(Makefile src/Makefile) 244 | -------------------------------------------------------------------------------- /src/computeintrinsicqv.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | daccord 3 | Copyright (C) 2016-2017 German Tischler 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | */ 18 | #include 19 | #include 20 | #include 21 | 22 | void handleOverlaps( 23 | // error histogram 24 | std::map< uint64_t, std::map > & H, 25 | // aread id 26 | int64_t const cura, 27 | // trace point distance 28 | int64_t const tspace, 29 | // read length array 30 | std::vector const & RL, 31 | uint64_t const d, 32 | libmaus2::autoarray::AutoArray & annosize, 33 | std::ostream & odata, 34 | std::pair const & blockintv 35 | ) 36 | { 37 | uint64_t const alen = RL[cura]; 38 | uint64_t const ablocks = (alen + tspace - 1) / tspace; 39 | 40 | // extract all blocks with at least d/4 other blocks mapping there 41 | std::vector < double > D( ablocks , 1.0 ); 42 | uint64_t const dthres = d/4; 43 | for ( std::map< uint64_t, std::map >::const_iterator ita = H.begin(); ita != H.end(); ++ita ) 44 | { 45 | // block offset 46 | uint64_t const off = ita->first; 47 | // error histogram 48 | std::map const & hist = ita->second; 49 | // depth 50 | uint64_t depth = 0; 51 | for ( std::map::const_iterator hita = hist.begin(); hita != hist.end(); ++hita ) 52 | depth += hita->second; 53 | 54 | // if depth is sufficient 55 | if ( depth >= dthres ) 56 | { 57 | // compute average error over best dthres 58 | uint64_t s = 0; 59 | uint64_t a = 0; 60 | for ( std::map::const_iterator hita = hist.begin(); hita != hist.end() && a < dthres; ++hita ) 61 | { 62 | uint64_t const add = std::min(dthres-a,hita->second); 63 | s += add * hita->first; 64 | a += add; 65 | } 66 | assert ( a == dthres ); 67 | // average number of errors in block 68 | double const avgerr = static_cast(s) / a; 69 | // error rate 70 | double const erate = avgerr / tspace; 71 | 72 | assert ( erate >= 0 ); 73 | assert ( erate <= 1.0 ); 74 | 75 | // store error rate 76 | D[off] = erate; 77 | } 78 | } 79 | 80 | 81 | // std::cerr << "read " << cura << std::endl; 82 | 83 | #if 0 84 | for ( uint64_t i = 0; i < D.size(); ++i ) 85 | std::cerr << "(" << i << "," << D[i] << ")"; 86 | std::cerr << std::endl; 87 | #endif 88 | 89 | // implicit quality values 90 | annosize [ cura - blockintv.first ] = D.size(); 91 | uint64_t const maxval = std::numeric_limits::max(); 92 | for ( uint64_t i = 0; i < D.size(); ++i ) 93 | { 94 | uint64_t const e = ::std::floor(D[i] * 255.0 + 0.5); 95 | //std::cerr << D[i] << " " << e << std::endl; 96 | odata.put(std::min(e,maxval)); 97 | } 98 | 99 | H.clear(); 100 | } 101 | 102 | int main(int argc, char *argv[]) 103 | { 104 | try 105 | { 106 | libmaus2::util::ArgParser const arg(argc,argv); 107 | 108 | if ( arg.size() < 2 ) 109 | { 110 | std::cerr << "usage: " << argv[0] << " -d " << std::endl; 111 | return EXIT_FAILURE; 112 | } 113 | 114 | std::string const dbfn = arg[0]; 115 | 116 | if ( ! arg.uniqueArgPresent("d") ) 117 | { 118 | std::cerr << "[V] argument -d required" << std::endl; 119 | return EXIT_FAILURE; 120 | } 121 | 122 | uint64_t const d = arg.getUnsignedNumericArg("d"); 123 | 124 | libmaus2::dazzler::db::DatabaseFile DB(dbfn); 125 | DB.computeTrimVector(); 126 | 127 | if ( DB.part != 0 ) 128 | { 129 | std::cerr << "Partial databases are not supported." << std::endl; 130 | return EXIT_FAILURE; 131 | } 132 | 133 | // read all meta data 134 | std::vector RL; 135 | DB.getAllReadLengths(RL); 136 | 137 | int64_t const tspace = libmaus2::dazzler::align::AlignmentFile::getTSpace( 138 | std::vector(arg.restargs.begin()+1,arg.restargs.end()) 139 | ); 140 | 141 | for ( uint64_t a = 1; a < arg.size(); ++a ) 142 | { 143 | std::string const aligns = arg[a]; 144 | 145 | std::cerr << "[V] processing " << aligns << std::endl; 146 | 147 | int64_t blockid = 0; 148 | if ( aligns.find('.') != std::string::npos ) 149 | { 150 | std::string const suffix = aligns.substr(aligns.find_first_of('.')+1); 151 | if ( suffix.size() && isdigit(suffix[0]) ) 152 | { 153 | char const * p = suffix.c_str(); 154 | char const * pe = p; 155 | 156 | while ( *pe && isdigit(*pe) ) 157 | { 158 | char const d = *pe; 159 | int64_t const digit = d - '0'; 160 | blockid *= 10; 161 | blockid += digit; 162 | ++pe; 163 | } 164 | } 165 | } 166 | 167 | std::pair const blockintv = DB.getTrimmedBlockInterval(blockid); 168 | 169 | std::string const annofilename = DB.getBlockTrackAnnoFileName("inqual",blockid); 170 | std::string const datafilename = DB.getBlockTrackDataFileName("inqual",blockid); 171 | 172 | libmaus2::autoarray::AutoArray annosize(blockintv.second-blockintv.first,true); 173 | 174 | libmaus2::aio::OutputStreamInstance annofile(annofilename); 175 | libmaus2::aio::OutputStreamInstance datafile(datafilename); 176 | 177 | // open alignment file 178 | libmaus2::aio::InputStream::unique_ptr_type Palgnfile(libmaus2::aio::InputStreamFactoryContainer::constructUnique(aligns)); 179 | libmaus2::dazzler::align::AlignmentFile algn(*Palgnfile); 180 | 181 | libmaus2::dazzler::align::Overlap OVL; 182 | 183 | // current a-read id 184 | int64_t preva = std::numeric_limits::min(); 185 | int64_t cura = -1; 186 | int64_t nextexptd = blockintv.first; 187 | 188 | std::map< uint64_t, std::map > H; 189 | std::map< uint64_t, std::map > Hempty; 190 | 191 | int64_t lp = 0; 192 | 193 | // get next overlap 194 | while ( algn.getNextOverlap(*Palgnfile,OVL) ) 195 | { 196 | if ( OVL.aread < preva ) 197 | { 198 | libmaus2::exception::LibMausException lme; 199 | lme.getStream() << "ids of a reads are not increasing" << std::endl; 200 | lme.finish(); 201 | throw lme; 202 | } 203 | preva = OVL.aread; 204 | 205 | if ( OVL.aread != cura && cura >= 0 ) 206 | { 207 | while ( nextexptd < cura ) 208 | { 209 | // std::cerr << "[V] filling empty " << nextexptd << std::endl; 210 | handleOverlaps(Hempty,nextexptd++,tspace,RL,d,annosize,datafile,blockintv); 211 | } 212 | 213 | assert ( nextexptd == cura ); 214 | handleOverlaps(H,cura,tspace,RL,d,annosize,datafile,blockintv); 215 | nextexptd++; 216 | 217 | if ( cura/1024 != lp/1024 ) 218 | { 219 | lp = cura; 220 | std::cerr << "[V] " << lp << std::endl; 221 | } 222 | } 223 | 224 | cura = OVL.aread; 225 | OVL.fillErrorHistogram(tspace,H,RL[OVL.aread]); 226 | } 227 | 228 | if ( H.size() ) 229 | { 230 | while ( nextexptd < cura ) 231 | { 232 | // std::cerr << "[V] filling empty " << nextexptd << std::endl; 233 | handleOverlaps(Hempty,nextexptd++,tspace,RL,d,annosize,datafile,blockintv); 234 | } 235 | 236 | assert ( nextexptd == cura ); 237 | handleOverlaps(H,cura,tspace,RL,d,annosize,datafile,blockintv); 238 | nextexptd++; 239 | } 240 | 241 | while ( nextexptd < static_cast(blockintv.second) ) 242 | { 243 | // std::cerr << "[V] filling empty " << nextexptd << std::endl; 244 | handleOverlaps(Hempty,nextexptd++,tspace,RL,d,annosize,datafile,blockintv); 245 | } 246 | assert ( nextexptd == static_cast(blockintv.second) ); 247 | 248 | // flush and close inqual data file 249 | datafile.flush(); 250 | 251 | // write inqual anno file 252 | uint64_t annooff = 0; 253 | libmaus2::dazzler::db::OutputBase::putLittleEndianInteger4(annofile,annosize.size() /* tracklen */,annooff); 254 | libmaus2::dazzler::db::OutputBase::putLittleEndianInteger4(annofile,8 /* size of pointer */,annooff); 255 | uint64_t s = 0; 256 | for ( uint64_t i = 0; i < annosize.size(); ++i ) 257 | { 258 | libmaus2::dazzler::db::OutputBase::putLittleEndianInteger8(annofile,s,annooff); 259 | s += annosize[i]; 260 | } 261 | libmaus2::dazzler::db::OutputBase::putLittleEndianInteger8(annofile,s,annooff); 262 | annofile.flush(); 263 | } 264 | } 265 | catch(std::exception const & ex) 266 | { 267 | std::cerr << ex.what() << std::endl; 268 | return EXIT_FAILURE; 269 | } 270 | } 271 | -------------------------------------------------------------------------------- /src/computeintrinsicqv2.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | daccord 3 | Copyright (C) 2016-2017 German Tischler 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | */ 18 | #include 19 | #include 20 | #include 21 | 22 | void handleOverlaps( 23 | // error histogram 24 | libmaus2::autoarray::AutoArray < std::pair < uint64_t, double > > & AH, 25 | uint64_t const oh, 26 | // aread id 27 | int64_t const cura, 28 | // trace point distance 29 | int64_t const tspace, 30 | // read length array 31 | std::vector const & RL, 32 | uint64_t const d, 33 | libmaus2::autoarray::AutoArray & annosize, 34 | std::ostream & odata, 35 | std::pair const & blockintv, 36 | uint64_t const * spanS 37 | ) 38 | { 39 | std::sort(AH.begin(),AH.begin()+oh); 40 | 41 | uint64_t const alen = RL[cura]; 42 | uint64_t const ablocks = (alen + tspace - 1) / tspace; 43 | 44 | // extract all blocks with at least d/4 other blocks mapping there 45 | std::vector < double > D( ablocks , 1.0 ); 46 | for ( uint64_t i = 0; i < ablocks; ++i ) 47 | assert ( D[i] == 1.0 ); 48 | 49 | uint64_t const dthres = std::max(static_cast(d/4),static_cast(1)); 50 | uint64_t ilow = 0; 51 | while ( ilow < oh ) 52 | { 53 | uint64_t const off = AH[ilow].first; 54 | uint64_t ihigh = ilow+1; 55 | while ( ihigh < oh && AH[ihigh].first == off ) 56 | ++ihigh; 57 | 58 | uint64_t const depth = ihigh-ilow; 59 | 60 | if ( depth >= dthres && spanS[off] >= dthres ) 61 | { 62 | double s = 0.0; 63 | for ( uint64_t i = 0; i < dthres; ++i ) 64 | s += AH[ilow+i].second; 65 | double const erate = s / dthres; 66 | 67 | bool const ok = erate >= 0 && erate <= 1.0; 68 | 69 | if ( ! ok ) 70 | { 71 | std::cerr << "s=" << s << std::endl; 72 | std::cerr << "dthres=" << dthres << std::endl; 73 | assert ( ok ); 74 | } 75 | 76 | 77 | // store error rate 78 | D[off] = erate; 79 | } 80 | 81 | #if 0 82 | if ( depth >= dthres && spanS[off] < dthres ) 83 | { 84 | std::cerr << "[V] cura=" << cura << " off=" << off << " depth=" << depth << " spanS[off]=" << spanS[off] << std::endl; 85 | } 86 | #endif 87 | 88 | ilow = ihigh; 89 | } 90 | 91 | #if 0 92 | if ( cura == 10797 || cura == 17917 ) 93 | for ( uint64_t i = 0; i < D.size(); ++i ) 94 | std::cerr << cura << "(" << i << "," << D[i] << ")" << std::endl; 95 | //std::cerr << std::endl; 96 | #endif 97 | 98 | // implicit quality values 99 | annosize [ cura - blockintv.first ] = D.size(); 100 | uint64_t const maxval = std::numeric_limits::max(); 101 | for ( uint64_t i = 0; i < D.size(); ++i ) 102 | { 103 | #if 0 104 | if ( D[i] > 0.4 ) 105 | std::cerr << "aread=" << cura << " block=" << i << " erate=" << D[i] << std::endl; 106 | #endif 107 | 108 | uint64_t const e = ::std::floor(D[i] * 255.0 + 0.5); 109 | //std::cerr << D[i] << " " << e << std::endl; 110 | odata.put(std::min(e,maxval)); 111 | } 112 | } 113 | 114 | int main(int argc, char *argv[]) 115 | { 116 | try 117 | { 118 | libmaus2::util::ArgParser const arg(argc,argv); 119 | 120 | if ( arg.size() < 2 ) 121 | { 122 | std::cerr << "usage: " << argv[0] << " -d " << std::endl; 123 | return EXIT_FAILURE; 124 | } 125 | 126 | std::string const dbfn = arg[0]; 127 | 128 | if ( ! arg.uniqueArgPresent("d") ) 129 | { 130 | std::cerr << "[V] argument -d required" << std::endl; 131 | return EXIT_FAILURE; 132 | } 133 | 134 | uint64_t const d = arg.getUnsignedNumericArg("d"); 135 | 136 | libmaus2::dazzler::db::DatabaseFile DB(dbfn); 137 | DB.computeTrimVector(); 138 | 139 | if ( DB.part != 0 ) 140 | { 141 | std::cerr << "Partial databases are not supported." << std::endl; 142 | return EXIT_FAILURE; 143 | } 144 | 145 | // read all meta data 146 | std::vector RL; 147 | DB.getAllReadLengths(RL); 148 | 149 | int64_t const tspace = libmaus2::dazzler::align::AlignmentFile::getTSpace( 150 | std::vector(arg.restargs.begin()+1,arg.restargs.end()) 151 | ); 152 | 153 | for ( uint64_t a = 1; a < arg.size(); ++a ) 154 | { 155 | std::string const aligns = arg[a]; 156 | 157 | std::cerr << "[V] processing " << aligns << std::endl; 158 | 159 | int64_t blockid = 0; 160 | if ( aligns.find('.') != std::string::npos ) 161 | { 162 | std::string const suffix = aligns.substr(aligns.find_first_of('.')+1); 163 | if ( suffix.size() && isdigit(suffix[0]) ) 164 | { 165 | char const * p = suffix.c_str(); 166 | char const * pe = p; 167 | 168 | while ( *pe && isdigit(*pe) ) 169 | { 170 | char const d = *pe; 171 | int64_t const digit = d - '0'; 172 | blockid *= 10; 173 | blockid += digit; 174 | ++pe; 175 | } 176 | } 177 | } 178 | 179 | std::pair const blockintv = DB.getTrimmedBlockInterval(blockid); 180 | 181 | std::string const annofilename = DB.getBlockTrackAnnoFileName("inqual",blockid); 182 | std::string const datafilename = DB.getBlockTrackDataFileName("inqual",blockid); 183 | 184 | libmaus2::autoarray::AutoArray annosize(blockintv.second-blockintv.first,true); 185 | 186 | libmaus2::aio::OutputStreamInstance annofile(annofilename); 187 | libmaus2::aio::OutputStreamInstance datafile(datafilename); 188 | 189 | // open alignment file 190 | libmaus2::aio::InputStream::unique_ptr_type Palgnfile(libmaus2::aio::InputStreamFactoryContainer::constructUnique(aligns)); 191 | libmaus2::dazzler::align::AlignmentFile algn(*Palgnfile); 192 | 193 | libmaus2::dazzler::align::Overlap OVL; 194 | 195 | // current a-read id 196 | int64_t preva = std::numeric_limits::min(); 197 | int64_t cura = -1; 198 | int64_t nextexptd = blockintv.first; 199 | 200 | uint64_t o = 0; 201 | libmaus2::autoarray::AutoArray < std::pair < uint64_t, double > > A; 202 | 203 | int64_t lp = 0; 204 | 205 | libmaus2::autoarray::AutoArray < bool > spanA; 206 | libmaus2::autoarray::AutoArray < uint64_t > spanS; 207 | 208 | // get next overlap 209 | while ( algn.getNextOverlap(*Palgnfile,OVL) ) 210 | { 211 | if ( OVL.aread < preva ) 212 | { 213 | libmaus2::exception::LibMausException lme; 214 | lme.getStream() << "ids of a reads are not increasing: OVL.aread=" << OVL.aread << " preva=" << preva << std::endl; 215 | lme.finish(); 216 | throw lme; 217 | } 218 | preva = OVL.aread; 219 | 220 | if ( OVL.aread != cura && cura >= 0 ) 221 | { 222 | while ( nextexptd < cura ) 223 | { 224 | // std::cerr << "[V] filling empty " << nextexptd << std::endl; 225 | handleOverlaps(A,0,nextexptd++,tspace,RL,d,annosize,datafile,blockintv,spanS.begin()); 226 | } 227 | 228 | assert ( nextexptd == cura ); 229 | handleOverlaps(A,o,cura,tspace,RL,d,annosize,datafile,blockintv,spanS.begin()); 230 | 231 | #if 0 232 | if ( cura == 10797 || cura == 17917 ) 233 | for ( uint64_t i = 0; i < spanS.size(); ++i ) 234 | { 235 | std::cerr << "cura=" << cura << " i=" << i << " spanS[i]=" << spanS[i] << std::endl; 236 | } 237 | #endif 238 | 239 | nextexptd++; 240 | 241 | if ( cura/1024 != lp/1024 ) 242 | { 243 | lp = cura; 244 | std::cerr << "[V] " << lp << std::endl; 245 | } 246 | } 247 | 248 | if ( OVL.aread != cura ) 249 | { 250 | o = 0; 251 | int64_t const numnewblocks = (RL[OVL.aread] + tspace - 1)/tspace; 252 | spanA.resize(numnewblocks); 253 | std::fill(spanA.begin(),spanA.end(),0ull); 254 | spanS.resize(numnewblocks); 255 | std::fill(spanS.begin(),spanS.end(),0ull); 256 | } 257 | 258 | cura = OVL.aread; 259 | o = OVL.fillErrorHistogram(tspace,A,o,RL[OVL.aread]); 260 | OVL.fillSpanHistogram(tspace,RL[OVL.aread],0.3 /* ethres */,1 /* bthres */,spanA,spanS); 261 | } 262 | 263 | if ( o ) 264 | { 265 | while ( nextexptd < cura ) 266 | { 267 | // std::cerr << "[V] filling empty " << nextexptd << std::endl; 268 | handleOverlaps(A,0,nextexptd++,tspace,RL,d,annosize,datafile,blockintv,spanS.begin()); 269 | } 270 | 271 | assert ( nextexptd == cura ); 272 | handleOverlaps(A,o,cura,tspace,RL,d,annosize,datafile,blockintv,spanS.begin()); 273 | o = 0; 274 | nextexptd++; 275 | } 276 | 277 | while ( nextexptd < static_cast(blockintv.second) ) 278 | { 279 | // std::cerr << "[V] filling empty " << nextexptd << std::endl; 280 | handleOverlaps(A,0,nextexptd++,tspace,RL,d,annosize,datafile,blockintv,spanS.begin()); 281 | } 282 | assert ( nextexptd == static_cast(blockintv.second) ); 283 | 284 | // flush and close inqual data file 285 | datafile.flush(); 286 | 287 | // write inqual anno file 288 | uint64_t annooff = 0; 289 | libmaus2::dazzler::db::OutputBase::putLittleEndianInteger4(annofile,annosize.size() /* tracklen */,annooff); 290 | libmaus2::dazzler::db::OutputBase::putLittleEndianInteger4(annofile,8 /* size of pointer */,annooff); 291 | uint64_t s = 0; 292 | for ( uint64_t i = 0; i < annosize.size(); ++i ) 293 | { 294 | libmaus2::dazzler::db::OutputBase::putLittleEndianInteger8(annofile,s,annooff); 295 | s += annosize[i]; 296 | } 297 | libmaus2::dazzler::db::OutputBase::putLittleEndianInteger8(annofile,s,annooff); 298 | annofile.flush(); 299 | } 300 | 301 | return EXIT_SUCCESS; 302 | } 303 | catch(std::exception const & ex) 304 | { 305 | std::cerr << ex.what() << std::endl; 306 | return EXIT_FAILURE; 307 | } 308 | } 309 | -------------------------------------------------------------------------------- /src/maftobam.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | daccord 3 | Copyright (C) 2016-2017 German Tischler 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | */ 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | static bool startsWith(std::string const & line, std::string const & prefix) 32 | { 33 | return line.size() >= prefix.size() && 34 | line.substr(0,prefix.size()) == prefix; 35 | } 36 | 37 | static std::vector readText(std::string const & textfn) 38 | { 39 | libmaus2::fastx::FastAReader FA(textfn); 40 | std::vector V; 41 | libmaus2::fastx::FastAReader::pattern_type pat; 42 | while ( FA.getNextPatternUnlocked(pat) ) 43 | { 44 | V.push_back(pat); 45 | } 46 | 47 | return V; 48 | } 49 | 50 | static std::vector split(std::string const & line) 51 | { 52 | std::vector tok; 53 | uint64_t l = 0; 54 | 55 | while ( l < line.size() ) 56 | { 57 | while ( l < line.size() && ::std::isspace(line[l]) ) 58 | ++l; 59 | 60 | uint64_t h = l; 61 | while ( h < line.size() && !::std::isspace(line[h]) ) 62 | ++h; 63 | 64 | if ( l != h ) 65 | tok.push_back(line.substr(l,h-l)); 66 | 67 | l = h; 68 | } 69 | 70 | return tok; 71 | } 72 | 73 | struct MatchLine 74 | { 75 | std::string name; 76 | int64_t start; 77 | uint64_t size; 78 | bool strand; 79 | uint64_t seqsize; 80 | std::string text; 81 | 82 | MatchLine() 83 | {} 84 | MatchLine( 85 | std::string const & rname, 86 | int64_t const & rstart, 87 | uint64_t const & rsize, 88 | bool const & rstrand, 89 | uint64_t const & rseqsize, 90 | std::string const & rtext 91 | ) : name(rname), start(rstart), size(rsize), strand(rstrand), seqsize(rseqsize), text(rtext) {} 92 | }; 93 | 94 | struct Match 95 | { 96 | std::vector < MatchLine > ML; 97 | 98 | void handle( 99 | libmaus2::bambam::BamBlockWriterBase & writer, 100 | std::map < std::string, uint64_t> const & M, std::vector const & /* Vref */, 101 | std::map const & replmap 102 | ) 103 | { 104 | if ( ML.size() == 2 ) 105 | { 106 | for ( uint64_t i = 1; i < ML.size(); ++i ) 107 | { 108 | bool const ok = ( ML[i].text.size() == ML[0].text.size() ); 109 | if ( ! ok ) 110 | { 111 | std::cerr << "[E] malformed alignment" << std::endl; 112 | return; 113 | } 114 | } 115 | 116 | if ( !ML[0].strand ) 117 | { 118 | std::cerr << "[E] cannot handle RC on refseq" << std::endl; 119 | return; 120 | } 121 | 122 | std::vector < libmaus2::bambam::BamFlagBase::bam_cigar_ops > Vop; 123 | std::ostringstream readstr; 124 | std::ostringstream refstr; 125 | for ( uint64_t i = 0; i < ML[0].text.size(); ++i ) 126 | { 127 | if ( ML[1].text[i] != '-' ) 128 | readstr.put(ML[1].text[i]); 129 | if ( ML[0].text[i] != '-' ) 130 | refstr.put(ML[0].text[i]); 131 | 132 | if ( ML[0].text[i] == ML[1].text[i] ) 133 | { 134 | assert ( ML[0].text[i] != '-' ); 135 | #if defined(CIGARDEBUG) 136 | Vop.push_back(libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CMATCH); 137 | #else 138 | Vop.push_back(libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CEQUAL); 139 | #endif 140 | } 141 | else 142 | { 143 | assert ( ML[0].text[i] != ML[1].text[i] ); 144 | 145 | if ( ML[0].text[i] == '-' ) 146 | Vop.push_back(libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CINS); 147 | else if ( ML[1].text[i] == '-' ) 148 | Vop.push_back(libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CDEL); 149 | else 150 | { 151 | #if defined(CIGARDEBUG) 152 | Vop.push_back(libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CMATCH); 153 | #else 154 | Vop.push_back(libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CDIFF); 155 | #endif 156 | } 157 | } 158 | } 159 | 160 | 161 | uint64_t l = 0; 162 | std::ostringstream cigstream; 163 | if ( ML[1].start ) 164 | { 165 | cigstream << ML[1].start << 'H'; 166 | } 167 | while ( l < Vop.size() ) 168 | { 169 | uint64_t h = l+1; 170 | while ( h < Vop.size() && Vop[h] == Vop[l] ) 171 | ++h; 172 | 173 | cigstream << (h-l) << Vop[l]; 174 | 175 | l = h; 176 | } 177 | 178 | if ( ML[1].start + ML[1].size < ML[1].seqsize ) 179 | { 180 | cigstream << (ML[1].seqsize-(ML[1].start + ML[1].size)) << 'H'; 181 | } 182 | 183 | std::string const readdata = readstr.str(); 184 | std::string const cigstr = cigstream.str(); 185 | std::string const qual(readdata.size(),255); 186 | 187 | if ( replmap.find(ML[0].name) != replmap.end() ) 188 | ML[0].name = replmap.find(ML[0].name)->second; 189 | 190 | if ( M.find(ML[0].name) == M.end() ) 191 | { 192 | std::cerr << "[E] unknown ref seq " << ML[0].name << std::endl; 193 | return; 194 | } 195 | 196 | uint64_t const refid = M.find(ML[0].name)->second; 197 | 198 | ::libmaus2::fastx::UCharBuffer buffer; 199 | libmaus2::bambam::BamSeqEncodeTable seqenc; 200 | 201 | #if 0 202 | std::cerr << "refstr=" << refstr.str() << std::endl; 203 | std::cerr << "compare=" << Vref[refid].spattern.substr(ML[0].start,ML[0].size) << std::endl; 204 | #endif 205 | 206 | libmaus2::bambam::BamAlignmentEncoderBase::encodeAlignment( 207 | buffer,seqenc, 208 | ML[1].name, 209 | refid, 210 | ML[0].start, 211 | 255 /* map */, 212 | ML[1].strand ? 0 : libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_FREVERSE, 213 | cigstr, 214 | -1, 215 | -1, 216 | 0, 217 | readdata, 218 | qual, 219 | 0, 220 | true 221 | ); 222 | 223 | ::libmaus2::bambam::MdStringComputationContext mdcontext; 224 | std::string const ref = refstr.str(); 225 | 226 | // std::cerr << ref << std::endl; 227 | 228 | libmaus2::bambam::BamAlignmentDecoderBase::calculateMd(buffer.buffer,buffer.length,mdcontext,ref.begin()); 229 | 230 | libmaus2::bambam::BamAlignmentEncoderBase::putAuxString(buffer,"MD",mdcontext.md.get()); 231 | libmaus2::bambam::BamAlignmentEncoderBase::putAuxNumber(buffer,"NM",'i',mdcontext.nm); 232 | 233 | #if 0 234 | ::libmaus2::bambam::BamFormatAuxiliary aux; 235 | libmaus2::bambam::BamAlignmentDecoderBase::formatAlignment( 236 | std::cout, 237 | buffer.buffer, 238 | buffer.length, 239 | header, 240 | aux 241 | ); 242 | 243 | std::cout << std::endl; 244 | #endif 245 | 246 | writer.writeBamBlock(buffer.buffer,buffer.length); 247 | } 248 | else if ( ML.size() ) 249 | { 250 | std::cerr << "[E] cannot handle multi alignment" << std::endl; 251 | } 252 | } 253 | }; 254 | 255 | int main(int argc, char * argv[]) 256 | { 257 | try 258 | { 259 | libmaus2::util::ArgInfo const arginfo(argc,argv); 260 | libmaus2::util::ArgParser const arg(argc,argv); 261 | 262 | std::map replmap; 263 | if ( arg.size() > 1 ) 264 | { 265 | std::string replname = arg[1]; 266 | libmaus2::aio::InputStreamInstance ISI(replname); 267 | while ( ISI ) 268 | { 269 | std::string line; 270 | std::getline(ISI,line); 271 | if ( line.size() && line.find('\t') != std::string::npos ) 272 | { 273 | uint64_t const p = line.find('\t'); 274 | std::string const key = line.substr(0,p); 275 | std::string const value = line.substr(p+1); 276 | replmap[key] = value; 277 | } 278 | } 279 | } 280 | 281 | libmaus2::util::LineBuffer LB(std::cin); 282 | 283 | std::vector Vref = readText(arg[0]); 284 | std::map < std::string, uint64_t> M; 285 | std::ostringstream headerstream; 286 | headerstream << "@HD\tVN:1.5\tSO:unknown\n"; 287 | for ( uint64_t i = 0; i < Vref.size(); ++i ) 288 | { 289 | M [ Vref[i].getShortStringId() ] = i; 290 | headerstream << "@SQ\tSN:" << Vref[i].getShortStringId() << "\tLN:" << Vref[i].spattern.size() << "\n"; 291 | } 292 | 293 | std::string const headertext = headerstream.str(); 294 | //std::cout << headertext; 295 | 296 | libmaus2::bambam::BamHeader bamheader(headertext); 297 | libmaus2::bambam::BamBlockWriterBase::unique_ptr_type writer(libmaus2::bambam::BamBlockWriterBaseFactory::construct(bamheader, arginfo)); 298 | 299 | char const * a = NULL; 300 | char const * e = NULL; 301 | Match match; 302 | 303 | while ( LB.getline(&a,&e) ) 304 | { 305 | // std::cerr << std::string(a,e) << std::endl; 306 | std::string const line(a,e); 307 | 308 | if ( startsWith(line,"s") || startsWith(line,"a") ) 309 | { 310 | std::vector tokens = split(line); 311 | 312 | if ( tokens.size() >= 7 && tokens[0] == "s" ) 313 | { 314 | std::string const readname = tokens[1]; 315 | int64_t const start = atol(tokens[2].c_str()); 316 | uint64_t const size = atol(tokens[3].c_str()); 317 | std::string const strand = tokens[4]; 318 | 319 | if ( strand != "+" && strand != "-" ) 320 | { 321 | std::cerr << "[E] unknown strand " << strand << std::endl; 322 | continue; 323 | } 324 | 325 | uint64_t const seqsize = atol(tokens[5].c_str()); 326 | std::string const text = tokens[6]; 327 | 328 | MatchLine ML(readname,start,size,strand == "+",seqsize,text); 329 | match.ML.push_back(ML); 330 | } 331 | if ( tokens.size() >= 1 && tokens[0] == "a" ) 332 | { 333 | match.handle(*writer,M,Vref,replmap); 334 | match.ML.resize(0); 335 | } 336 | } 337 | } 338 | 339 | if ( match.ML.size() ) 340 | match.handle(*writer,M,Vref,replmap); 341 | 342 | writer.reset(); 343 | } 344 | catch(std::exception const & ex) 345 | { 346 | std::cerr << ex.what() << std::endl; 347 | return EXIT_FAILURE; 348 | } 349 | } 350 | -------------------------------------------------------------------------------- /src/marktrue.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | daccord 3 | Copyright (C) 2017 German Tischler 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | */ 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | 34 | static uint64_t getDefaultNumThreads() 35 | { 36 | return libmaus2::parallel::NumCpus::getNumLogicalProcessors(); 37 | } 38 | 39 | template 40 | static std::string formatRHS(std::string const & description, default_type def) 41 | { 42 | std::ostringstream ostr; 43 | ostr << description << " (default " << def << ")"; 44 | return ostr.str(); 45 | } 46 | 47 | /* 48 | parameters: 49 | 50 | -t : default number of logical cores, threads 51 | */ 52 | 53 | static std::string helpMessage(libmaus2::util::ArgParser const & arg) 54 | { 55 | std::vector < std::pair < std::string, std::string > > optionMap; 56 | optionMap . push_back ( std::pair < std::string, std::string >("t", formatRHS("number of threads",getDefaultNumThreads()))); 57 | optionMap . push_back ( std::pair < std::string, std::string >("verbose", formatRHS("verbosity",false))); 58 | optionMap . push_back ( std::pair < std::string, std::string >("T", formatRHS("temporary file prefix",libmaus2::util::ArgInfo::getDefaultTmpFileName(arg.progname)))); 59 | 60 | uint64_t maxlhs = 0; 61 | for ( std::vector < std::pair < std::string, std::string > >::const_iterator ita = optionMap.begin(); ita != optionMap.end(); ++ita ) 62 | { 63 | assert ( ita->first.size() ); 64 | 65 | if ( ita->first.size() == 1 ) 66 | maxlhs = std::max(maxlhs,static_cast(ita->first.size()+1)); 67 | else 68 | maxlhs = std::max(maxlhs,static_cast(ita->first.size()+2)); 69 | } 70 | 71 | std::ostringstream messtr; 72 | for ( std::vector < std::pair < std::string, std::string > >::const_iterator ita = optionMap.begin(); ita != optionMap.end(); ++ita ) 73 | { 74 | std::string const key = ita->first; 75 | 76 | messtr << "\t"; 77 | messtr << std::setw(maxlhs) << std::setfill(' '); 78 | if ( key.size() == 1 ) 79 | messtr << (std::string("-")+key); 80 | else 81 | messtr << (std::string("--")+key); 82 | 83 | messtr << std::setw(0); 84 | 85 | messtr << ": "; 86 | 87 | messtr << ita->second; 88 | messtr << "\n"; 89 | } 90 | 91 | return messtr.str(); 92 | } 93 | 94 | struct BamAlignmentTypeInfo 95 | { 96 | typedef libmaus2::bambam::BamAlignment element_type; 97 | typedef element_type::shared_ptr_type pointer_type; 98 | 99 | static pointer_type getNullPointer() 100 | { 101 | return pointer_type(); 102 | } 103 | 104 | static pointer_type deallocate(pointer_type /* p */) 105 | { 106 | return getNullPointer(); 107 | } 108 | }; 109 | 110 | struct BamAlignmentAllocator 111 | { 112 | typedef libmaus2::bambam::BamAlignment element_type; 113 | typedef element_type::shared_ptr_type pointer_type; 114 | 115 | pointer_type operator()() const 116 | { 117 | return pointer_type(new element_type); 118 | } 119 | }; 120 | 121 | struct BamAlignmentContainer 122 | { 123 | typedef BamAlignmentContainer this_type; 124 | typedef libmaus2::util::unique_ptr::type unique_ptr_type; 125 | typedef libmaus2::util::shared_ptr::type shared_ptr_type; 126 | 127 | std::string const bamfn; 128 | libmaus2::bambam::BamNumericalIndexDecoder indexdec; 129 | libmaus2::parallel::LockedGrowingFreeList & bamAlignmentFreeList; 130 | std::map < uint64_t, libmaus2::bambam::BamAlignment::shared_ptr_type > M; 131 | 132 | BamAlignmentContainer( 133 | std::string const & rbamfn, 134 | libmaus2::parallel::LockedGrowingFreeList & rbamAlignmentFreeList 135 | ) : bamfn(rbamfn), indexdec(libmaus2::bambam::BamNumericalIndexBase::getIndexName(bamfn)), bamAlignmentFreeList(rbamAlignmentFreeList) 136 | { 137 | 138 | } 139 | 140 | void clear() 141 | { 142 | for ( std::map < uint64_t, libmaus2::bambam::BamAlignment::shared_ptr_type >::iterator it = M.begin(); it != M.end(); ++it ) 143 | bamAlignmentFreeList.put(it->second); 144 | M.clear(); 145 | } 146 | 147 | libmaus2::bambam::BamAlignment const & operator[](uint64_t const i) 148 | { 149 | if ( M.find(i) == M.end() ) 150 | { 151 | libmaus2::bambam::BamAccessor A(bamfn,indexdec,i); 152 | libmaus2::bambam::BamAlignment::shared_ptr_type P = bamAlignmentFreeList.get(); 153 | libmaus2::bambam::BamAlignment & B = A[i]; 154 | P->swap(B); 155 | M[i] = P; 156 | } 157 | 158 | assert ( M.find(i) != M.end() ); 159 | 160 | return *(M.find(i)->second); 161 | } 162 | }; 163 | 164 | std::string getTmpFileBase(libmaus2::util::ArgParser const & arg) 165 | { 166 | std::string const tmpfilebase = arg.uniqueArgPresent("T") ? arg["T"] : libmaus2::util::ArgInfo::getDefaultTmpFileName(arg.progname); 167 | return tmpfilebase; 168 | } 169 | 170 | int marktrue(libmaus2::util::ArgParser const & arg) 171 | { 172 | std::string const outfn = arg[0]; 173 | std::string const db = arg[1]; 174 | std::string const bam = arg[2]; 175 | std::vector VI(arg.restargs.begin()+3,arg.restargs.end()); 176 | 177 | uint64_t const numthreads = arg.uniqueArgPresent("t") ? arg.getUnsignedNumericArg("t") : getDefaultNumThreads(); 178 | 179 | libmaus2::dazzler::db::DatabaseFile DB(db); 180 | DB.computeTrimVector(); 181 | 182 | libmaus2::parallel::LockedGrowingFreeList bamAlignmentFreeList; 183 | libmaus2::autoarray::AutoArray < BamAlignmentContainer::unique_ptr_type > ABAC(numthreads); 184 | libmaus2::autoarray::AutoArray < libmaus2::dazzler::align::TrueOverlap::unique_ptr_type > ATO(numthreads); 185 | int64_t const tspace = libmaus2::dazzler::align::AlignmentFile::getTSpace(VI); 186 | libmaus2::dazzler::align::TrueOverlapStats TOS; 187 | 188 | for ( uint64_t i = 0; i < numthreads; ++i ) 189 | { 190 | BamAlignmentContainer::unique_ptr_type t(new BamAlignmentContainer(bam,bamAlignmentFreeList)); 191 | ABAC[i] = UNIQUE_PTR_MOVE(t); 192 | 193 | libmaus2::dazzler::align::TrueOverlap::unique_ptr_type o(new libmaus2::dazzler::align::TrueOverlap(TOS,DB,tspace)); 194 | ATO[i] = UNIQUE_PTR_MOVE(o); 195 | } 196 | 197 | std::string const tmpprefix = getTmpFileBase(arg); 198 | libmaus2::dazzler::align::AlignmentWriterArray AWA(tmpprefix + "AWA",numthreads,tspace); 199 | 200 | for ( uint64_t i = 0; i < VI.size(); ++i ) 201 | { 202 | std::string const & las = VI[i]; 203 | std::string const indalignerlasindexname = libmaus2::dazzler::align::DalignerIndexDecoder::getDalignerIndexName(las); 204 | 205 | if ( 206 | ! libmaus2::util::GetFileSize::fileExists(indalignerlasindexname) 207 | || 208 | libmaus2::util::GetFileSize::isOlder(indalignerlasindexname,las) 209 | ) 210 | { 211 | libmaus2::dazzler::align::OverlapIndexer::constructIndex(las,&std::cerr); 212 | } 213 | 214 | libmaus2::autoarray::AutoArray Adalindex(numthreads); 215 | 216 | for ( uint64_t i = 0; i < Adalindex.size(); ++i ) 217 | { 218 | libmaus2::dazzler::align::DalignerIndexDecoder::unique_ptr_type Pdalindex( 219 | new libmaus2::dazzler::align::DalignerIndexDecoder(las,indalignerlasindexname) 220 | ); 221 | Adalindex[i] = UNIQUE_PTR_MOVE(Pdalindex); 222 | } 223 | 224 | 225 | int64_t const minaread = libmaus2::dazzler::align::OverlapIndexer::getMinimumARead(las); 226 | int64_t const maxaread = libmaus2::dazzler::align::OverlapIndexer::getMaximumARead(las); 227 | 228 | #if defined(_OPENMP) 229 | #pragma omp parallel for schedule(dynamic,1) 230 | #endif 231 | for ( int64_t j = minaread; j <= maxaread; ++j ) 232 | { 233 | #if defined(_OPENMP) 234 | uint64_t const tid = omp_get_thread_num(); 235 | #else 236 | uint64_t const tid = 0; 237 | #endif 238 | 239 | libmaus2::dazzler::align::AlignmentFileDecoder::unique_ptr_type pdec(libmaus2::dazzler::align::OverlapIndexer::openAlignmentFileAt(las,j,j+1,*Adalindex[tid])); 240 | 241 | BamAlignmentContainer & BAC = *ABAC[tid]; 242 | BAC.clear(); 243 | libmaus2::dazzler::align::TrueOverlap & TO = *ATO[tid]; 244 | 245 | libmaus2::bambam::BamAlignment const & bam_a = BAC[j]; 246 | libmaus2::dazzler::align::AlignmentWriter & AW = AWA[tid]; 247 | 248 | libmaus2::dazzler::align::Overlap OVL; 249 | while ( pdec->getNextOverlap(OVL) ) 250 | { 251 | libmaus2::bambam::BamAlignment const & bam_b = BAC[OVL.bread]; 252 | bool const istrue = TO.trueOverlap(OVL,bam_a,bam_b); 253 | 254 | if ( istrue ) 255 | OVL.flags |= OVL.getTrueFlag(); 256 | else 257 | OVL.flags &= (~static_cast(OVL.getTrueFlag())); 258 | 259 | AW.put(OVL); 260 | } 261 | 262 | { 263 | libmaus2::parallel::ScopePosixSpinLock slock(libmaus2::aio::StreamLock::cerrlock); 264 | std::cerr << "[V] " << j << std::endl; 265 | } 266 | } 267 | } 268 | 269 | AWA.merge(outfn,tmpprefix+"_AWA_merge"); 270 | 271 | return EXIT_SUCCESS; 272 | } 273 | 274 | int main(int argc, char * argv[]) 275 | { 276 | try 277 | { 278 | libmaus2::util::ArgParser const arg(argc,argv); 279 | 280 | if ( arg.uniqueArgPresent("v") || arg.uniqueArgPresent("version") ) 281 | { 282 | std::cerr << "This is " << PACKAGE_NAME << " version " << PACKAGE_VERSION << "." << std::endl; 283 | std::cerr << PACKAGE_NAME << " is distributed under version 3 of the GPL." << std::endl; 284 | return EXIT_SUCCESS; 285 | } 286 | else if ( arg.uniqueArgPresent("h") || arg.uniqueArgPresent("help") || arg.size() < 3 ) 287 | { 288 | std::cerr << "This is " << PACKAGE_NAME << " version " << PACKAGE_VERSION << "." << std::endl; 289 | std::cerr << PACKAGE_NAME << " is distributed under version 3 of the GPL." << std::endl; 290 | std::cerr << "\n"; 291 | std::cerr << "usage: " << arg.progname << " [options] out.las in.db in.bam in.las ...\n"; 292 | std::cerr << "\n"; 293 | std::cerr << "The following options can be used (no space between option name and parameter allowed):\n\n"; 294 | std::cerr << helpMessage(arg); 295 | return EXIT_SUCCESS; 296 | } 297 | else 298 | { 299 | return marktrue(arg); 300 | } 301 | } 302 | catch(std::exception const & ex) 303 | { 304 | std::cerr << ex.what() << std::endl; 305 | return EXIT_FAILURE; 306 | } 307 | } 308 | -------------------------------------------------------------------------------- /src/wmap.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | daccord 3 | Copyright (C) 2018 German Tischler-Höhle 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | */ 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | int64_t getRawId(libmaus2::fastx::FastAReader::pattern_type const & P) 29 | { 30 | std::string s = P.getShortStringId(); 31 | std::deque tokens = libmaus2::util::stringFunctions::tokenize(s,std::string("/")); 32 | 33 | if ( 1 < tokens.size() ) 34 | { 35 | std::istringstream istr(tokens[1]); 36 | int64_t i; 37 | istr >> i; 38 | 39 | if ( istr && istr.peek() == std::istream::traits_type::eof() ) 40 | return i; 41 | } 42 | 43 | std::cerr << "[W] unable to parse " << s << std::endl; 44 | 45 | return -1; 46 | } 47 | 48 | int64_t getConsId(libmaus2::fastx::FastAReader::pattern_type const & P) 49 | { 50 | std::string s = P.getShortStringId(); 51 | std::deque tokens = libmaus2::util::stringFunctions::tokenize(s,std::string("/")); 52 | 53 | if ( 0 < tokens.size() ) 54 | { 55 | std::istringstream istr(tokens[0]); 56 | int64_t i; 57 | istr >> i; 58 | 59 | if ( istr && istr.peek() == std::istream::traits_type::eof() ) 60 | return i-1; 61 | } 62 | 63 | std::cerr << "[W] unable to parse " << s << std::endl; 64 | 65 | return -1; 66 | } 67 | 68 | std::vector < std::string > stok(std::string const & s) 69 | { 70 | std::vector < std::string > V; 71 | uint64_t low = 0; 72 | 73 | while ( low < s.size() ) 74 | { 75 | while ( low < s.size() && ::isspace(s[low]) ) 76 | ++low; 77 | 78 | uint64_t start = low; 79 | while ( low < s.size() && !::isspace(s[low]) ) 80 | ++low; 81 | 82 | if ( low > start ) 83 | V.push_back(s.substr(start,low-start)); 84 | } 85 | 86 | return V; 87 | } 88 | 89 | libmaus2::math::IntegerInterval getInterval(libmaus2::fastx::FastAReader::pattern_type const & P) 90 | { 91 | std::vector < std::string > VT = stok(P.sid); 92 | 93 | if ( 1 < VT.size() ) 94 | { 95 | std::string A = VT[1]; 96 | 97 | if ( A.size() >= 3 && A[0] == 'A' && A[1] == '=' && A[2] == '[' && A[A.size()-1] == ']' ) 98 | { 99 | A = A.substr(3); 100 | A = A.substr(0,A.size()-1); 101 | 102 | std::deque tokens = libmaus2::util::stringFunctions::tokenize(A,std::string(",")); 103 | 104 | if ( tokens.size() == 2 ) 105 | { 106 | std::istringstream astr(tokens[0]); 107 | std::istringstream bstr(tokens[1]); 108 | int64_t ia; 109 | int64_t ib; 110 | astr >> ia; 111 | bstr >> ib; 112 | 113 | if ( 114 | astr && astr.peek() == std::istream::traits_type::eof() && 115 | bstr && bstr.peek() == std::istream::traits_type::eof() 116 | ) 117 | { 118 | return libmaus2::math::IntegerInterval(ia,ib); 119 | } 120 | else 121 | { 122 | return libmaus2::math::IntegerInterval::empty(); 123 | } 124 | } 125 | else 126 | { 127 | return libmaus2::math::IntegerInterval::empty(); 128 | } 129 | } 130 | else 131 | { 132 | return libmaus2::math::IntegerInterval::empty(); 133 | } 134 | } 135 | else 136 | { 137 | return libmaus2::math::IntegerInterval::empty(); 138 | } 139 | // A=[0,6552] 140 | } 141 | 142 | static uint64_t getDefaultNumThreads() 143 | { 144 | return libmaus2::parallel::NumCpus::getNumLogicalProcessors(); 145 | } 146 | 147 | int main(int argc, char * argv[]) 148 | { 149 | try 150 | { 151 | libmaus2::util::ArgParser const arg(argc,argv); 152 | std::string const outfn = arg[0]; 153 | std::string const dbfn = arg[1]; 154 | std::string const rawfast = arg[2]; 155 | std::string const consfast = arg[3]; 156 | std::string const lasin = arg[4]; 157 | 158 | // number of threads 159 | uint64_t const numthreads = arg.uniqueArgPresent("t") ? arg.getUnsignedNumericArg("t") : getDefaultNumThreads(); 160 | 161 | libmaus2::dazzler::db::DatabaseFile DB(dbfn); 162 | DB.computeTrimVector(); 163 | 164 | int64_t const tspace = libmaus2::dazzler::align::AlignmentFile::getTSpace(lasin); 165 | 166 | libmaus2::fastx::FastaPeeker::unique_ptr_type praw(new libmaus2::fastx::FastaPeeker(rawfast)); 167 | libmaus2::fastx::FastaPeeker::unique_ptr_type pcons(new libmaus2::fastx::FastaPeeker(consfast)); 168 | 169 | libmaus2::dazzler::align::AlignmentFileRegion::unique_ptr_type plasin( 170 | libmaus2::dazzler::align::OverlapIndexer::openAlignmentFileWithoutIndex(lasin) 171 | ); 172 | 173 | libmaus2::dazzler::align::AlignmentWriter AW(outfn,tspace); 174 | 175 | libmaus2::fastx::FastAReader::pattern_type pa, pb; 176 | while ( 177 | praw->peekNext(pa) 178 | && 179 | pcons->peekNext(pb) 180 | ) 181 | { 182 | if ( getRawId(pa) < getConsId(pb) ) 183 | { 184 | praw->getNext(pa); 185 | } 186 | else if ( getConsId(pb) < getRawId(pa) ) 187 | { 188 | pcons->getNext(pb); 189 | } 190 | else 191 | { 192 | int64_t const id = getRawId(pa); 193 | 194 | libmaus2::dazzler::align::Overlap OVL; 195 | while ( plasin->peekNextOverlap(OVL) && OVL.aread < id ) 196 | plasin->getNextOverlap(OVL); 197 | 198 | std::vector < libmaus2::dazzler::align::Overlap > VOVL; 199 | while ( plasin->peekNextOverlap(OVL) && OVL.aread == id ) 200 | { 201 | plasin->getNextOverlap(OVL); 202 | VOVL.push_back(OVL); 203 | } 204 | 205 | 206 | std::vector < libmaus2::fastx::FastAReader::pattern_type > VRAW; 207 | std::vector < libmaus2::fastx::FastAReader::pattern_type > VCONS; 208 | std::vector < libmaus2::math::IntegerInterval > VI; 209 | 210 | while ( praw->peekNext(pa) && getRawId(pa) == id ) 211 | { 212 | praw->getNext(pa); 213 | VRAW.push_back(pa); 214 | } 215 | while ( pcons->peekNext(pb) && getConsId(pb) == id ) 216 | { 217 | pcons->getNext(pb); 218 | VCONS.push_back(pb); 219 | VI.push_back(getInterval(pb)); 220 | } 221 | 222 | assert ( VRAW.size() == 1 ); 223 | 224 | int64_t const l = VRAW[0].spattern.size(); 225 | 226 | libmaus2::math::IntegerInterval IF(0,l-1); 227 | 228 | std::vector< libmaus2::math::IntegerInterval > VC = libmaus2::math::IntegerInterval::difference(IF,VI); 229 | 230 | struct MarkedInterval 231 | { 232 | bool raw; 233 | libmaus2::math::IntegerInterval I; 234 | uint64_t id; 235 | 236 | MarkedInterval() {} 237 | MarkedInterval(bool const rraw, libmaus2::math::IntegerInterval const & rI, uint64_t const rid) : raw(rraw), I(rI), id(rid) {} 238 | 239 | bool operator<(MarkedInterval const & M) const 240 | { 241 | return I < M.I; 242 | } 243 | }; 244 | 245 | std::vector < MarkedInterval > VR; 246 | for ( uint64_t i = 0; i < VI.size(); ++i ) 247 | VR.push_back(MarkedInterval(false,VI[i],i)); 248 | for ( uint64_t i = 0; i < VC.size(); ++i ) 249 | VR.push_back(MarkedInterval(true,VC[i],i)); 250 | 251 | std::sort(VR.begin(),VR.end()); 252 | 253 | std::ostringstream basestr; 254 | for ( uint64_t i = 0; i < VR.size(); ++i ) 255 | { 256 | if ( VR[i].raw ) 257 | basestr << VRAW[0].spattern.substr(VR[i].I.from,VR[i].I.diameter()); 258 | else 259 | basestr << VCONS[VR[i].id].spattern; 260 | } 261 | 262 | std::string const sraw = VRAW[0].spattern; 263 | std::string const spatch = basestr.str(); 264 | 265 | libmaus2::lcs::NPLinMem np; 266 | // A raw 267 | // B patch 268 | np.np(sraw.begin(),sraw.end(),spatch.begin(),spatch.end()); 269 | 270 | std::cerr << id << " " << np.getAlignmentStatistics() << " " << VOVL.size() << std::endl; 271 | 272 | std::cout << ">" << VRAW[0].sid << "\n"; 273 | uint64_t p = 0; 274 | while ( p < spatch.size() ) 275 | { 276 | uint64_t const cols = 80; 277 | uint64_t const l = spatch.size(); 278 | uint64_t const rest = l - p; 279 | uint64_t const toprint = std::min(rest,cols); 280 | std::cout << spatch.substr(p,toprint) << "\n"; 281 | p += toprint; 282 | } 283 | 284 | #if defined(_OPENMP) 285 | #pragma omp parallel for num_threads(numthreads) schedule(dynamic,1) 286 | #endif 287 | for ( uint64_t i = 0; i < VOVL.size(); ++i ) 288 | { 289 | libmaus2::dazzler::align::Overlap & OVL = VOVL[i]; 290 | 291 | int64_t abpos = OVL.path.abpos; 292 | int64_t aepos = OVL.path.aepos; 293 | 294 | std::pair const adva = libmaus2::lcs::AlignmentTraceContainer::advanceMaxA(np.ta,np.te,abpos); 295 | std::pair SLA = libmaus2::lcs::AlignmentTraceContainer::getStringLengthUsed(np.ta,np.ta + adva.second); 296 | std::pair const advb = libmaus2::lcs::AlignmentTraceContainer::advanceMaxA(np.ta,np.te,aepos); 297 | std::pair SLB = libmaus2::lcs::AlignmentTraceContainer::getStringLengthUsed(np.ta,np.ta + advb.second); 298 | 299 | OVL.path.abpos = SLA.second; 300 | OVL.path.aepos = SLB.second; 301 | 302 | libmaus2::lcs::NPLinMem nploc; 303 | std::string const b = DB.decodeRead(OVL.bread,OVL.isInverse()); 304 | nploc.np( 305 | spatch.begin() + OVL.path.abpos, 306 | spatch.begin() + OVL.path.aepos, 307 | b.begin() + OVL.path.bbpos, 308 | b.begin() + OVL.path.bepos 309 | ); 310 | 311 | 312 | libmaus2::dazzler::align::Overlap const NOVL = libmaus2::dazzler::align::Overlap::computeOverlap( 313 | OVL.flags, 314 | OVL.aread, // ref 315 | OVL.bread, // read 316 | OVL.path.abpos, 317 | OVL.path.aepos, 318 | OVL.path.bbpos, 319 | OVL.path.bepos, 320 | tspace, 321 | nploc 322 | ); 323 | 324 | // std::cerr << nploc.getAlignmentStatistics() << std::endl; 325 | 326 | VOVL[i] = NOVL; 327 | } 328 | 329 | { 330 | libmaus2::lcs::NPLinMem nploc; 331 | std::string const b = DB.decodeRead(id,false); 332 | nploc.np( 333 | spatch.begin(), 334 | spatch.end(), 335 | b.begin(), 336 | b.end() 337 | ); 338 | libmaus2::dazzler::align::Overlap const NOVL = libmaus2::dazzler::align::Overlap::computeOverlap( 339 | libmaus2::dazzler::align::Overlap::getPrimaryFlag(), 340 | id, // ref 341 | id, // read 342 | 0, 343 | spatch.size(), 344 | 0, 345 | b.size(), 346 | tspace, 347 | nploc 348 | ); 349 | 350 | AW.put(NOVL); 351 | } 352 | 353 | for ( uint64_t i = 0; i < VOVL.size(); ++i ) 354 | AW.put(VOVL[i]); 355 | } 356 | } 357 | } 358 | catch(std::exception const & ex) 359 | { 360 | std::cerr << ex.what() << std::endl; 361 | return EXIT_FAILURE; 362 | } 363 | } 364 | -------------------------------------------------------------------------------- /src/filterchains.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | daccord 3 | Copyright (C) 2017 German Tischler 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | */ 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | uint64_t getDefaultMinLength() 28 | { 29 | return 4000; 30 | } 31 | 32 | void patternToUpper(libmaus2::fastx::Pattern & pattern) 33 | { 34 | for ( uint64_t i = 0; i < pattern.spattern.size(); ++i ) 35 | pattern.spattern[i] = ::toupper(pattern.spattern[i]); 36 | 37 | pattern.pattern = pattern.spattern.c_str(); 38 | } 39 | 40 | std::string getTmpFileBase(libmaus2::util::ArgParser const & arg) 41 | { 42 | std::string const tmpfilebase = arg.uniqueArgPresent("T") ? arg["T"] : libmaus2::util::ArgInfo::getDefaultTmpFileName(arg.progname); 43 | return tmpfilebase; 44 | } 45 | 46 | static std::string getFastAIndexFileName(std::string const & consfn) 47 | { 48 | std::string const consfnindex = consfn + ".phaser.index"; 49 | return consfnindex; 50 | } 51 | 52 | 53 | int generateindex( 54 | libmaus2::util::ArgParser const & arg, 55 | std::string const & consfn, 56 | uint64_t const fastaindexmod 57 | ) 58 | { 59 | 60 | std::string const consfnindex = getFastAIndexFileName(consfn); 61 | std::string const tmpfilebase = getTmpFileBase(arg); 62 | 63 | if ( 64 | ! libmaus2::util::GetFileSize::fileExists(consfnindex) 65 | || 66 | libmaus2::util::GetFileSize::isOlder(consfnindex,consfn) 67 | ) 68 | { 69 | std::string const tmpfn = tmpfilebase + ".faindextmp"; 70 | libmaus2::fastx::FastAReader::enumerateOffsets(consfn,tmpfn,fastaindexmod); 71 | libmaus2::aio::OutputStreamFactoryContainer::rename(tmpfn,consfnindex); 72 | } 73 | 74 | return EXIT_SUCCESS; 75 | } 76 | 77 | struct ReadAccessor 78 | { 79 | std::string const consfn; 80 | std::string const consfnindex; 81 | uint64_t const mod; 82 | 83 | ReadAccessor(std::string const & rconsfn, std::string const & rconsfnindex, uint64_t const rmod) 84 | : consfn(rconsfn), consfnindex(rconsfnindex), mod(rmod) 85 | { 86 | 87 | } 88 | 89 | libmaus2::fastx::FastAReader::pattern_type operator[](uint64_t const i) 90 | { 91 | uint64_t const b = i / mod; 92 | libmaus2::aio::InputStreamInstance IISI(consfnindex); 93 | IISI.seekg(b * sizeof(libmaus2::fastx::FastInterval)); 94 | libmaus2::fastx::FastInterval FI(IISI); 95 | libmaus2::fastx::FastAReader FARE(consfn,FI); 96 | uint64_t o = i - b * mod; 97 | for ( uint64_t j = 0; j < o; ++j ) 98 | FARE.skipPattern(); 99 | libmaus2::fastx::FastAReader::pattern_type P; 100 | bool const ok = FARE.getNextPatternUnlocked(P); 101 | 102 | if ( ! ok ) 103 | { 104 | libmaus2::exception::LibMausException lme; 105 | lme.getStream() << "ReadAccessor::operator[]: failed to get read number " << i << std::endl; 106 | lme.finish(); 107 | throw lme; 108 | } 109 | 110 | patternToUpper(P); 111 | return P; 112 | } 113 | 114 | std::string getName(uint64_t const i) 115 | { 116 | libmaus2::fastx::FastAReader::pattern_type const P = (*this)[i]; 117 | return P.sid; 118 | } 119 | 120 | static uint64_t getReadId(std::string name) 121 | { 122 | if ( name.find('/') != std::string::npos ) 123 | name = name.substr(0,name.find('/')); 124 | 125 | std::istringstream istr(name); 126 | uint64_t id; 127 | istr >> id; 128 | 129 | assert ( istr && istr.peek() == std::istream::traits_type::eof() ); 130 | 131 | assert ( id ); 132 | 133 | return id-1; 134 | } 135 | 136 | uint64_t getReadId(uint64_t const i) 137 | { 138 | std::string name = getName(i); 139 | return getReadId(name); 140 | } 141 | }; 142 | 143 | uint64_t getId(std::string sid, libmaus2::fastx::FastAReader::pattern_type const & pattern) 144 | { 145 | if ( sid.find('/') == std::string::npos ) 146 | { 147 | libmaus2::exception::LibMausException lme; 148 | lme.getStream() << "[E] unparsable read name (no slash) " << sid << std::endl; 149 | lme.finish(); 150 | throw lme; 151 | } 152 | 153 | sid = sid.substr(0,sid.find('/')); 154 | 155 | for ( uint64_t i = 0; i < sid.size(); ++i ) 156 | if ( ! ::isdigit(sid[i]) ) 157 | { 158 | libmaus2::exception::LibMausException lme; 159 | lme.getStream() << "[E] unparsable read name (string " << sid << " before slash not numerical) " << pattern.sid << std::endl; 160 | lme.finish(); 161 | throw lme; 162 | } 163 | 164 | std::istringstream istr(sid); 165 | uint64_t id; 166 | istr >> id; 167 | 168 | assert ( istr.peek() == std::istream::traits_type::eof() ); 169 | 170 | assert ( id ); 171 | 172 | return id - 1; 173 | } 174 | 175 | std::vector countConsensus(std::string const & consin) 176 | { 177 | libmaus2::fastx::FastAReader FA(consin); 178 | libmaus2::fastx::FastAReader::pattern_type pattern; 179 | std::vector V; 180 | 181 | while ( FA.getNextPatternUnlocked(pattern) ) 182 | { 183 | // >2/24/500_15948 A=[500,17349] 184 | 185 | std::string sid = pattern.sid; 186 | 187 | uint64_t const id = getId(sid,pattern); 188 | 189 | while ( ! (id const & VCNT; 221 | std::string const & consin; 222 | std::string const & consfnindex; 223 | uint64_t const fastaindexmod; 224 | std::vector < libmaus2::fastx::FastAReader::pattern_type > Vpat; 225 | std::vector < ReadFragment > fragments; 226 | std::vector < libmaus2::math::IntegerInterval > VIV; 227 | 228 | FragmentContainer( 229 | std::vector const & rVCNT, 230 | std::string const & rconsin, 231 | std::string const & rconsfnindex, 232 | uint64_t const rfastaindexmod 233 | ) : cur(-1), VCNT(rVCNT), consin(rconsin), consfnindex(rconsfnindex), fastaindexmod(rfastaindexmod) {} 234 | 235 | void load(int64_t const aread) 236 | { 237 | if ( aread != cur && aread + 1 < static_cast(VCNT.size()) ) 238 | { 239 | // std::cerr << "z=" << z << " " << VCNT[z] << " " << VCNT[z+1] << std::endl; 240 | 241 | uint64_t const low = VCNT[aread]; 242 | uint64_t const high = VCNT[aread+1]; 243 | ReadAccessor RA(consin, consfnindex, fastaindexmod); 244 | 245 | Vpat.resize(0); 246 | fragments.resize(0); 247 | VIV.resize(0); 248 | for ( uint64_t i = low; i < high; ++i ) 249 | { 250 | Vpat.push_back(RA[i]); 251 | 252 | assert ( 253 | static_cast(getId( 254 | Vpat.back().sid, 255 | Vpat.back() 256 | )) 257 | == 258 | aread 259 | ); 260 | 261 | // std::cerr << "got " << Vpat.back().sid << " for " << z << std::endl; 262 | } 263 | 264 | for ( uint64_t i = 0; i < Vpat.size(); ++i ) 265 | { 266 | std::string sid = Vpat[i].sid; 267 | 268 | assert ( sid.find(" A=[") != std::string::npos ); 269 | sid = sid.substr(sid.find(" A=[") + strlen(" A=[")); 270 | 271 | std::istringstream istr(sid); 272 | uint64_t first = 0; 273 | istr >> first; 274 | assert ( istr && istr.peek() == ',' ); 275 | istr.get(); 276 | uint64_t last = 0; 277 | istr >> last; 278 | assert ( istr && istr.peek() == ']' ); 279 | 280 | std::string const SCO = Vpat[i].spattern; 281 | 282 | ReadFragment RF(first,last+1,SCO); 283 | 284 | fragments.push_back(RF); 285 | 286 | VIV.push_back(libmaus2::math::IntegerInterval(fragments.back().from,fragments.back().to-1)); 287 | } 288 | 289 | cur = aread; 290 | } 291 | } 292 | }; 293 | 294 | struct OverlapPosComparator 295 | { 296 | bool operator()(libmaus2::dazzler::align::Overlap const & lhs, libmaus2::dazzler::align::Overlap const & rhs) const 297 | { 298 | return lhs.path.abpos < rhs.path.abpos; 299 | } 300 | }; 301 | 302 | struct DepthLine 303 | { 304 | uint64_t from; 305 | uint64_t to; 306 | int64_t d; 307 | 308 | DepthLine() {} 309 | DepthLine(uint64_t const rfrom, uint64_t const rto, int64_t const rd) : from(rfrom), to(rto), d(rd) {} 310 | }; 311 | 312 | std::ostream & operator<<(std::ostream & out, DepthLine const & D) 313 | { 314 | return out << "DepthLine(" << D.from << "," << D.to << "," << D.d << ")"; 315 | } 316 | 317 | void handle(libmaus2::dazzler::align::Overlap * A, uint64_t const f, libmaus2::dazzler::align::AlignmentWriter & AW, int64_t const /* dthres */) 318 | { 319 | std::sort(A,A+f,libmaus2::dazzler::align::OverlapFullComparator()); 320 | for ( uint64_t i = 0; i < f; ++i ) 321 | AW.put(A[i]); 322 | } 323 | 324 | int filterchains(libmaus2::util::ArgParser const & arg) 325 | { 326 | std::string const outfn = arg[0]; 327 | std::string const consin = arg[1]; 328 | int64_t const dthres = 10; 329 | 330 | uint64_t const fastaindexmod = 1; 331 | generateindex(arg,consin,fastaindexmod); 332 | std::vector VCNT = countConsensus(consin); 333 | std::string const consfnindex = getFastAIndexFileName(consin); 334 | 335 | std::vector Vin; 336 | for ( uint64_t i = 2; i < arg.size(); ++i ) 337 | Vin.push_back(arg[i]); 338 | int64_t const tspace = libmaus2::dazzler::align::AlignmentFile::getTSpace(Vin); 339 | libmaus2::dazzler::align::AlignmentWriter AW(outfn,tspace); 340 | std::pair prevaread(-1,-1); 341 | libmaus2::autoarray::AutoArray < libmaus2::dazzler::align::Overlap > A; 342 | libmaus2::autoarray::AutoArray < libmaus2::dazzler::align::Overlap > B; 343 | uint64_t fb = 0; 344 | int64_t idb = -1; 345 | uint64_t const minlength = arg.uniqueArgPresent("l") ? arg.getUnsignedNumericArg("l") : getDefaultMinLength(); 346 | 347 | FragmentContainer FC(VCNT,consin,consfnindex,fastaindexmod); 348 | 349 | for ( uint64_t z = 0; z < Vin.size(); ++z ) 350 | { 351 | libmaus2::dazzler::align::AlignmentFileRegion::unique_ptr_type AFR(libmaus2::dazzler::align::OverlapIndexer::openAlignmentFileWithoutIndex(Vin[z])); 352 | 353 | libmaus2::dazzler::align::Overlap refOVL, OVL; 354 | while ( AFR->peekNextOverlap(refOVL) ) 355 | { 356 | assert ( (refOVL.aread > prevaread.first) || (refOVL.aread == prevaread.first && refOVL.bread > prevaread.second) ); 357 | 358 | uint64_t f = 0; 359 | while ( AFR->peekNextOverlap(OVL) && OVL.aread == refOVL.aread && OVL.bread == refOVL.bread ) 360 | { 361 | AFR->getNextOverlap(OVL); 362 | A.push(f,OVL); 363 | } 364 | 365 | prevaread = std::pair(refOVL.aread,refOVL.bread); 366 | 367 | if ( prevaread.first != idb ) 368 | { 369 | if ( fb ) 370 | handle(B.begin(),fb,AW,dthres); 371 | idb = prevaread.first; 372 | fb = 0; 373 | } 374 | 375 | FC.load(refOVL.aread); 376 | ChainSet CS(A.begin(),f); 377 | 378 | for ( uint64_t chainid = 0; chainid < CS.size(); ++chainid ) 379 | { 380 | uint64_t s = 0; 381 | 382 | for ( uint64_t chainsubid = 0; chainsubid < CS.size(chainid); ++chainsubid ) 383 | { 384 | uint64_t const i = CS(chainid,chainsubid); 385 | libmaus2::math::IntegerInterval I(A[i].path.abpos,A[i].path.aepos-1); 386 | 387 | for ( uint64_t i = 0; i < FC.VIV.size(); ++i ) 388 | s += I.intersection(FC.VIV[i]).diameter(); 389 | } 390 | 391 | // std::cerr << prevaread.first << "," << prevaread.second << " s=" << s << std::endl; 392 | 393 | if ( s >= minlength ) 394 | { 395 | for ( uint64_t chainsubid = 0; chainsubid < CS.size(chainid); ++chainsubid ) 396 | { 397 | uint64_t const i = CS(chainid,chainsubid); 398 | B.push(fb,A[i]); 399 | // AW.put(A[i]); 400 | } 401 | } 402 | } 403 | 404 | // std::cerr << prevaread.first << "," << prevaread.second << " " << f << std::endl; 405 | } 406 | }; 407 | 408 | if ( fb ) 409 | handle(B.begin(),fb,AW,dthres); 410 | 411 | return EXIT_SUCCESS; 412 | } 413 | 414 | 415 | template 416 | static std::string formatRHS(std::string const & description, default_type def) 417 | { 418 | std::ostringstream ostr; 419 | ostr << description << " (default " << def << ")"; 420 | return ostr.str(); 421 | } 422 | 423 | /* 424 | parameters: 425 | */ 426 | static std::string helpMessage(libmaus2::util::ArgParser const & /* arg */) 427 | { 428 | std::vector < std::pair < std::string, std::string > > optionMap; 429 | // optionMap . push_back ( std::pair < std::string, std::string >("t", formatRHS("number of threads",getDefaultNumThreads()))); 430 | optionMap . push_back ( std::pair < std::string, std::string >("l", formatRHS("minimum chain length",getDefaultMinLength()))); 431 | 432 | uint64_t maxlhs = 0; 433 | for ( std::vector < std::pair < std::string, std::string > >::const_iterator ita = optionMap.begin(); ita != optionMap.end(); ++ita ) 434 | { 435 | assert ( ita->first.size() ); 436 | 437 | if ( ita->first.size() == 1 ) 438 | maxlhs = std::max(maxlhs,static_cast(ita->first.size()+1)); 439 | else 440 | maxlhs = std::max(maxlhs,static_cast(ita->first.size()+2)); 441 | } 442 | 443 | std::ostringstream messtr; 444 | for ( std::vector < std::pair < std::string, std::string > >::const_iterator ita = optionMap.begin(); ita != optionMap.end(); ++ita ) 445 | { 446 | std::string const key = ita->first; 447 | 448 | messtr << "\t"; 449 | messtr << std::setw(maxlhs) << std::setfill(' '); 450 | if ( key.size() == 1 ) 451 | messtr << (std::string("-")+key); 452 | else 453 | messtr << (std::string("--")+key); 454 | 455 | messtr << std::setw(0); 456 | 457 | messtr << ": "; 458 | 459 | messtr << ita->second; 460 | messtr << "\n"; 461 | } 462 | 463 | return messtr.str(); 464 | } 465 | 466 | 467 | 468 | int main(int argc, char * argv[]) 469 | { 470 | try 471 | { 472 | libmaus2::util::ArgParser arg(argc,argv); 473 | 474 | if ( arg.uniqueArgPresent("v") || arg.uniqueArgPresent("version") ) 475 | { 476 | std::cerr << "This is " << PACKAGE_NAME << " version " << PACKAGE_VERSION << "." << std::endl; 477 | std::cerr << PACKAGE_NAME << " is distributed under version 3 of the GPL." << std::endl; 478 | return EXIT_SUCCESS; 479 | } 480 | else if ( arg.uniqueArgPresent("h") || arg.uniqueArgPresent("help") || arg.size() < 1 ) 481 | { 482 | std::cerr << "This is " << PACKAGE_NAME << " version " << PACKAGE_VERSION << "." << std::endl; 483 | std::cerr << PACKAGE_NAME << " is distributed under version 3 of the GPL." << std::endl; 484 | std::cerr << "\n"; 485 | std::cerr << "usage: " << arg.progname << " [options] out.las cons.fasta in1.las ...\n"; 486 | std::cerr << "\n"; 487 | std::cerr << "The following options can be used (no space between option name and parameter allowed):\n\n"; 488 | std::cerr << helpMessage(arg); 489 | return EXIT_SUCCESS; 490 | } 491 | else 492 | { 493 | libmaus2::timing::RealTimeClock rtc; 494 | rtc.start(); 495 | 496 | int r = EXIT_FAILURE; 497 | 498 | r = filterchains(arg); 499 | 500 | std::cerr << "[V] processing time " << rtc.formatTime(rtc.getElapsedSeconds()) << std::endl; 501 | 502 | return r; 503 | } 504 | } 505 | catch(std::exception const & ex) 506 | { 507 | std::cerr << "[E] " << ex.what() << std::endl; 508 | return EXIT_FAILURE; 509 | } 510 | } 511 | -------------------------------------------------------------------------------- /INSTALL: -------------------------------------------------------------------------------- 1 | Installation Instructions 2 | ************************* 3 | 4 | Copyright (C) 1994-1996, 1999-2002, 2004-2013 Free Software Foundation, 5 | Inc. 6 | 7 | Copying and distribution of this file, with or without modification, 8 | are permitted in any medium without royalty provided the copyright 9 | notice and this notice are preserved. This file is offered as-is, 10 | without warranty of any kind. 11 | 12 | Basic Installation 13 | ================== 14 | 15 | Briefly, the shell command `./configure && make && make install' 16 | should configure, build, and install this package. The following 17 | more-detailed instructions are generic; see the `README' file for 18 | instructions specific to this package. Some packages provide this 19 | `INSTALL' file but do not implement all of the features documented 20 | below. The lack of an optional feature in a given package is not 21 | necessarily a bug. More recommendations for GNU packages can be found 22 | in *note Makefile Conventions: (standards)Makefile Conventions. 23 | 24 | The `configure' shell script attempts to guess correct values for 25 | various system-dependent variables used during compilation. It uses 26 | those values to create a `Makefile' in each directory of the package. 27 | It may also create one or more `.h' files containing system-dependent 28 | definitions. Finally, it creates a shell script `config.status' that 29 | you can run in the future to recreate the current configuration, and a 30 | file `config.log' containing compiler output (useful mainly for 31 | debugging `configure'). 32 | 33 | It can also use an optional file (typically called `config.cache' 34 | and enabled with `--cache-file=config.cache' or simply `-C') that saves 35 | the results of its tests to speed up reconfiguring. Caching is 36 | disabled by default to prevent problems with accidental use of stale 37 | cache files. 38 | 39 | If you need to do unusual things to compile the package, please try 40 | to figure out how `configure' could check whether to do them, and mail 41 | diffs or instructions to the address given in the `README' so they can 42 | be considered for the next release. If you are using the cache, and at 43 | some point `config.cache' contains results you don't want to keep, you 44 | may remove or edit it. 45 | 46 | The file `configure.ac' (or `configure.in') is used to create 47 | `configure' by a program called `autoconf'. You need `configure.ac' if 48 | you want to change it or regenerate `configure' using a newer version 49 | of `autoconf'. 50 | 51 | The simplest way to compile this package is: 52 | 53 | 1. `cd' to the directory containing the package's source code and type 54 | `./configure' to configure the package for your system. 55 | 56 | Running `configure' might take a while. While running, it prints 57 | some messages telling which features it is checking for. 58 | 59 | 2. Type `make' to compile the package. 60 | 61 | 3. Optionally, type `make check' to run any self-tests that come with 62 | the package, generally using the just-built uninstalled binaries. 63 | 64 | 4. Type `make install' to install the programs and any data files and 65 | documentation. When installing into a prefix owned by root, it is 66 | recommended that the package be configured and built as a regular 67 | user, and only the `make install' phase executed with root 68 | privileges. 69 | 70 | 5. Optionally, type `make installcheck' to repeat any self-tests, but 71 | this time using the binaries in their final installed location. 72 | This target does not install anything. Running this target as a 73 | regular user, particularly if the prior `make install' required 74 | root privileges, verifies that the installation completed 75 | correctly. 76 | 77 | 6. You can remove the program binaries and object files from the 78 | source code directory by typing `make clean'. To also remove the 79 | files that `configure' created (so you can compile the package for 80 | a different kind of computer), type `make distclean'. There is 81 | also a `make maintainer-clean' target, but that is intended mainly 82 | for the package's developers. If you use it, you may have to get 83 | all sorts of other programs in order to regenerate files that came 84 | with the distribution. 85 | 86 | 7. Often, you can also type `make uninstall' to remove the installed 87 | files again. In practice, not all packages have tested that 88 | uninstallation works correctly, even though it is required by the 89 | GNU Coding Standards. 90 | 91 | 8. Some packages, particularly those that use Automake, provide `make 92 | distcheck', which can by used by developers to test that all other 93 | targets like `make install' and `make uninstall' work correctly. 94 | This target is generally not run by end users. 95 | 96 | Compilers and Options 97 | ===================== 98 | 99 | Some systems require unusual options for compilation or linking that 100 | the `configure' script does not know about. Run `./configure --help' 101 | for details on some of the pertinent environment variables. 102 | 103 | You can give `configure' initial values for configuration parameters 104 | by setting variables in the command line or in the environment. Here 105 | is an example: 106 | 107 | ./configure CC=c99 CFLAGS=-g LIBS=-lposix 108 | 109 | *Note Defining Variables::, for more details. 110 | 111 | Compiling For Multiple Architectures 112 | ==================================== 113 | 114 | You can compile the package for more than one kind of computer at the 115 | same time, by placing the object files for each architecture in their 116 | own directory. To do this, you can use GNU `make'. `cd' to the 117 | directory where you want the object files and executables to go and run 118 | the `configure' script. `configure' automatically checks for the 119 | source code in the directory that `configure' is in and in `..'. This 120 | is known as a "VPATH" build. 121 | 122 | With a non-GNU `make', it is safer to compile the package for one 123 | architecture at a time in the source code directory. After you have 124 | installed the package for one architecture, use `make distclean' before 125 | reconfiguring for another architecture. 126 | 127 | On MacOS X 10.5 and later systems, you can create libraries and 128 | executables that work on multiple system types--known as "fat" or 129 | "universal" binaries--by specifying multiple `-arch' options to the 130 | compiler but only a single `-arch' option to the preprocessor. Like 131 | this: 132 | 133 | ./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ 134 | CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ 135 | CPP="gcc -E" CXXCPP="g++ -E" 136 | 137 | This is not guaranteed to produce working output in all cases, you 138 | may have to build one architecture at a time and combine the results 139 | using the `lipo' tool if you have problems. 140 | 141 | Installation Names 142 | ================== 143 | 144 | By default, `make install' installs the package's commands under 145 | `/usr/local/bin', include files under `/usr/local/include', etc. You 146 | can specify an installation prefix other than `/usr/local' by giving 147 | `configure' the option `--prefix=PREFIX', where PREFIX must be an 148 | absolute file name. 149 | 150 | You can specify separate installation prefixes for 151 | architecture-specific files and architecture-independent files. If you 152 | pass the option `--exec-prefix=PREFIX' to `configure', the package uses 153 | PREFIX as the prefix for installing programs and libraries. 154 | Documentation and other data files still use the regular prefix. 155 | 156 | In addition, if you use an unusual directory layout you can give 157 | options like `--bindir=DIR' to specify different values for particular 158 | kinds of files. Run `configure --help' for a list of the directories 159 | you can set and what kinds of files go in them. In general, the 160 | default for these options is expressed in terms of `${prefix}', so that 161 | specifying just `--prefix' will affect all of the other directory 162 | specifications that were not explicitly provided. 163 | 164 | The most portable way to affect installation locations is to pass the 165 | correct locations to `configure'; however, many packages provide one or 166 | both of the following shortcuts of passing variable assignments to the 167 | `make install' command line to change installation locations without 168 | having to reconfigure or recompile. 169 | 170 | The first method involves providing an override variable for each 171 | affected directory. For example, `make install 172 | prefix=/alternate/directory' will choose an alternate location for all 173 | directory configuration variables that were expressed in terms of 174 | `${prefix}'. Any directories that were specified during `configure', 175 | but not in terms of `${prefix}', must each be overridden at install 176 | time for the entire installation to be relocated. The approach of 177 | makefile variable overrides for each directory variable is required by 178 | the GNU Coding Standards, and ideally causes no recompilation. 179 | However, some platforms have known limitations with the semantics of 180 | shared libraries that end up requiring recompilation when using this 181 | method, particularly noticeable in packages that use GNU Libtool. 182 | 183 | The second method involves providing the `DESTDIR' variable. For 184 | example, `make install DESTDIR=/alternate/directory' will prepend 185 | `/alternate/directory' before all installation names. The approach of 186 | `DESTDIR' overrides is not required by the GNU Coding Standards, and 187 | does not work on platforms that have drive letters. On the other hand, 188 | it does better at avoiding recompilation issues, and works well even 189 | when some directory options were not specified in terms of `${prefix}' 190 | at `configure' time. 191 | 192 | Optional Features 193 | ================= 194 | 195 | If the package supports it, you can cause programs to be installed 196 | with an extra prefix or suffix on their names by giving `configure' the 197 | option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. 198 | 199 | Some packages pay attention to `--enable-FEATURE' options to 200 | `configure', where FEATURE indicates an optional part of the package. 201 | They may also pay attention to `--with-PACKAGE' options, where PACKAGE 202 | is something like `gnu-as' or `x' (for the X Window System). The 203 | `README' should mention any `--enable-' and `--with-' options that the 204 | package recognizes. 205 | 206 | For packages that use the X Window System, `configure' can usually 207 | find the X include and library files automatically, but if it doesn't, 208 | you can use the `configure' options `--x-includes=DIR' and 209 | `--x-libraries=DIR' to specify their locations. 210 | 211 | Some packages offer the ability to configure how verbose the 212 | execution of `make' will be. For these packages, running `./configure 213 | --enable-silent-rules' sets the default to minimal output, which can be 214 | overridden with `make V=1'; while running `./configure 215 | --disable-silent-rules' sets the default to verbose, which can be 216 | overridden with `make V=0'. 217 | 218 | Particular systems 219 | ================== 220 | 221 | On HP-UX, the default C compiler is not ANSI C compatible. If GNU 222 | CC is not installed, it is recommended to use the following options in 223 | order to use an ANSI C compiler: 224 | 225 | ./configure CC="cc -Ae -D_XOPEN_SOURCE=500" 226 | 227 | and if that doesn't work, install pre-built binaries of GCC for HP-UX. 228 | 229 | HP-UX `make' updates targets which have the same time stamps as 230 | their prerequisites, which makes it generally unusable when shipped 231 | generated files such as `configure' are involved. Use GNU `make' 232 | instead. 233 | 234 | On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot 235 | parse its `' header file. The option `-nodtk' can be used as 236 | a workaround. If GNU CC is not installed, it is therefore recommended 237 | to try 238 | 239 | ./configure CC="cc" 240 | 241 | and if that doesn't work, try 242 | 243 | ./configure CC="cc -nodtk" 244 | 245 | On Solaris, don't put `/usr/ucb' early in your `PATH'. This 246 | directory contains several dysfunctional programs; working variants of 247 | these programs are available in `/usr/bin'. So, if you need `/usr/ucb' 248 | in your `PATH', put it _after_ `/usr/bin'. 249 | 250 | On Haiku, software installed for all users goes in `/boot/common', 251 | not `/usr/local'. It is recommended to use the following options: 252 | 253 | ./configure --prefix=/boot/common 254 | 255 | Specifying the System Type 256 | ========================== 257 | 258 | There may be some features `configure' cannot figure out 259 | automatically, but needs to determine by the type of machine the package 260 | will run on. Usually, assuming the package is built to be run on the 261 | _same_ architectures, `configure' can figure that out, but if it prints 262 | a message saying it cannot guess the machine type, give it the 263 | `--build=TYPE' option. TYPE can either be a short name for the system 264 | type, such as `sun4', or a canonical name which has the form: 265 | 266 | CPU-COMPANY-SYSTEM 267 | 268 | where SYSTEM can have one of these forms: 269 | 270 | OS 271 | KERNEL-OS 272 | 273 | See the file `config.sub' for the possible values of each field. If 274 | `config.sub' isn't included in this package, then this package doesn't 275 | need to know the machine type. 276 | 277 | If you are _building_ compiler tools for cross-compiling, you should 278 | use the option `--target=TYPE' to select the type of system they will 279 | produce code for. 280 | 281 | If you want to _use_ a cross compiler, that generates code for a 282 | platform different from the build platform, you should specify the 283 | "host" platform (i.e., that on which the generated programs will 284 | eventually be run) with `--host=TYPE'. 285 | 286 | Sharing Defaults 287 | ================ 288 | 289 | If you want to set default values for `configure' scripts to share, 290 | you can create a site shell script called `config.site' that gives 291 | default values for variables like `CC', `cache_file', and `prefix'. 292 | `configure' looks for `PREFIX/share/config.site' if it exists, then 293 | `PREFIX/etc/config.site' if it exists. Or, you can set the 294 | `CONFIG_SITE' environment variable to the location of the site script. 295 | A warning: not all `configure' scripts look for a site script. 296 | 297 | Defining Variables 298 | ================== 299 | 300 | Variables not defined in a site shell script can be set in the 301 | environment passed to `configure'. However, some packages may run 302 | configure again during the build, and the customized values of these 303 | variables may be lost. In order to avoid this problem, you should set 304 | them in the `configure' command line, using `VAR=value'. For example: 305 | 306 | ./configure CC=/usr/local2/bin/gcc 307 | 308 | causes the specified `gcc' to be used as the C compiler (unless it is 309 | overridden in the site shell script). 310 | 311 | Unfortunately, this technique does not work for `CONFIG_SHELL' due to 312 | an Autoconf limitation. Until the limitation is lifted, you can use 313 | this workaround: 314 | 315 | CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash 316 | 317 | `configure' Invocation 318 | ====================== 319 | 320 | `configure' recognizes the following options to control how it 321 | operates. 322 | 323 | `--help' 324 | `-h' 325 | Print a summary of all of the options to `configure', and exit. 326 | 327 | `--help=short' 328 | `--help=recursive' 329 | Print a summary of the options unique to this package's 330 | `configure', and exit. The `short' variant lists options used 331 | only in the top level, while the `recursive' variant lists options 332 | also present in any nested packages. 333 | 334 | `--version' 335 | `-V' 336 | Print the version of Autoconf used to generate the `configure' 337 | script, and exit. 338 | 339 | `--cache-file=FILE' 340 | Enable the cache: use and save the results of the tests in FILE, 341 | traditionally `config.cache'. FILE defaults to `/dev/null' to 342 | disable caching. 343 | 344 | `--config-cache' 345 | `-C' 346 | Alias for `--cache-file=config.cache'. 347 | 348 | `--quiet' 349 | `--silent' 350 | `-q' 351 | Do not print messages saying which checks are being made. To 352 | suppress all normal output, redirect it to `/dev/null' (any error 353 | messages will still be shown). 354 | 355 | `--srcdir=DIR' 356 | Look for the package's source code in directory DIR. Usually 357 | `configure' can determine that directory automatically. 358 | 359 | `--prefix=DIR' 360 | Use DIR as the installation prefix. *note Installation Names:: 361 | for more details, including other options available for fine-tuning 362 | the installation locations. 363 | 364 | `--no-create' 365 | `-n' 366 | Run the configure checks, but stop before creating any output 367 | files. 368 | 369 | `configure' also accepts some other, not widely useful, options. Run 370 | `configure --help' for more details. 371 | -------------------------------------------------------------------------------- /src/computeextrinsicqv.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | daccord 3 | Copyright (C) 2017 German Tischler 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | */ 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | std::string getTmpFileBase(libmaus2::util::ArgParser const & arg) 30 | { 31 | std::string const tmpfilebase = arg.uniqueArgPresent("T") ? arg["T"] : libmaus2::util::ArgInfo::getDefaultTmpFileName(arg.progname); 32 | return tmpfilebase; 33 | } 34 | 35 | static uint64_t getDefaultNumThreads() 36 | { 37 | return libmaus2::parallel::NumCpus::getNumLogicalProcessors(); 38 | } 39 | 40 | int64_t getId(libmaus2::fastx::FastAReader::pattern_type const & pb) 41 | { 42 | std::string sid = pb.getShortStringId(); 43 | 44 | if ( sid.find('/') != std::string::npos ) 45 | { 46 | sid = sid.substr(0,sid.find('/')); 47 | 48 | std::istringstream istr(sid); 49 | int64_t id; 50 | istr >> id; 51 | 52 | if ( istr && istr.peek() == std::istream::traits_type::eof() ) 53 | return id-1; 54 | else 55 | return -1; 56 | } 57 | else 58 | { 59 | return -1; 60 | } 61 | } 62 | 63 | template 64 | static std::string formatRHS(std::string const & description, default_type def) 65 | { 66 | std::ostringstream ostr; 67 | ostr << description << " (default " << def << ")"; 68 | return ostr.str(); 69 | } 70 | 71 | 72 | int64_t getDefaultTSpace() 73 | { 74 | return 100; 75 | } 76 | 77 | static std::string helpMessage(libmaus2::util::ArgParser const & /* arg */) 78 | { 79 | std::vector < std::pair < std::string, std::string > > optionMap; 80 | optionMap . push_back ( std::pair < std::string, std::string >("tspace", formatRHS("trace point spacing",getDefaultTSpace()))); 81 | uint64_t maxlhs = 0; 82 | for ( std::vector < std::pair < std::string, std::string > >::const_iterator ita = optionMap.begin(); ita != optionMap.end(); ++ita ) 83 | { 84 | assert ( ita->first.size() ); 85 | 86 | if ( ita->first.size() == 1 ) 87 | maxlhs = std::max(maxlhs,static_cast(ita->first.size()+1)); 88 | else 89 | maxlhs = std::max(maxlhs,static_cast(ita->first.size()+2)); 90 | } 91 | 92 | std::ostringstream messtr; 93 | for ( std::vector < std::pair < std::string, std::string > >::const_iterator ita = optionMap.begin(); ita != optionMap.end(); ++ita ) 94 | { 95 | std::string const key = ita->first; 96 | 97 | messtr << "\t"; 98 | messtr << std::setw(maxlhs) << std::setfill(' '); 99 | if ( key.size() == 1 ) 100 | messtr << (std::string("-")+key); 101 | else 102 | messtr << (std::string("--")+key); 103 | 104 | messtr << std::setw(0); 105 | 106 | messtr << ": "; 107 | 108 | messtr << ita->second; 109 | messtr << "\n"; 110 | } 111 | 112 | return messtr.str(); 113 | } 114 | 115 | struct IndexEntry 116 | { 117 | uint64_t offset; 118 | uint64_t count; 119 | 120 | IndexEntry() 121 | {} 122 | 123 | IndexEntry(uint64_t const roffset, uint64_t const rcount) : offset(roffset), count(rcount) {} 124 | IndexEntry(std::istream & in) { deserialise(in); } 125 | 126 | std::istream & deserialise(std::istream & in) 127 | { 128 | offset = libmaus2::util::NumberSerialisation::deserialiseNumber(in); 129 | count = libmaus2::util::NumberSerialisation::deserialiseNumber(in); 130 | return in; 131 | } 132 | 133 | std::ostream & serialise(std::ostream & out) const 134 | { 135 | libmaus2::util::NumberSerialisation::serialiseNumber(out,offset); 136 | libmaus2::util::NumberSerialisation::serialiseNumber(out,count); 137 | return out; 138 | } 139 | }; 140 | 141 | static std::vector indexConsFile(std::string const & fn, uint64_t const nr) 142 | { 143 | std::vector V(nr); 144 | libmaus2::fastx::FastAReader FA(fn); 145 | libmaus2::fastx::FastAReader::pattern_type pattern; 146 | 147 | int64_t prevread = std::numeric_limits::min(); 148 | uint64_t prevcount = 0; 149 | uint64_t prevo = 0; 150 | uint64_t next = 0; 151 | 152 | while ( FA.foundnextmarker ) 153 | { 154 | uint64_t const o = FA.getC()-1; 155 | 156 | bool const ok = FA.getNextPatternUnlocked(pattern); 157 | assert ( ok ); 158 | 159 | int64_t const id = getId(pattern); 160 | assert ( id >= 0 ); 161 | 162 | if ( id != prevread ) 163 | { 164 | if ( prevread >= 0 ) 165 | { 166 | while ( static_cast(next) < prevread ) 167 | { 168 | V[next] = IndexEntry(0,0); 169 | ++next; 170 | } 171 | // std::cerr << "o=" << prevo << " id=" << prevread << " count=" << prevcount << std::endl; 172 | V[next] = IndexEntry(prevo,prevcount); 173 | 174 | ++next; 175 | } 176 | 177 | prevo = o; 178 | prevread = id; 179 | prevcount = 0; 180 | } 181 | 182 | ++prevcount; 183 | } 184 | 185 | if ( prevread >= 0 ) 186 | { 187 | while ( static_cast(next) < prevread ) 188 | { 189 | V[next] = IndexEntry(0,0); 190 | ++next; 191 | } 192 | // std::cerr << "o=" << prevo << " id=" << prevread << " count=" << prevcount << std::endl; 193 | V[next] = IndexEntry(prevo,prevcount); 194 | ++next; 195 | } 196 | 197 | while ( next < nr ) 198 | { 199 | V[next] = IndexEntry(0,0); 200 | ++next; 201 | } 202 | 203 | return V; 204 | } 205 | 206 | struct StringId 207 | { 208 | uint64_t id; 209 | uint64_t len; 210 | libmaus2::autoarray::AutoArray A; 211 | 212 | StringId() 213 | { 214 | 215 | } 216 | 217 | template 218 | StringId(uint64_t const rid, uint64_t const rlen, iterator rA) 219 | : id(rid), len(rlen), A(len,false) 220 | { 221 | std::copy(rA,rA+rlen,A.begin()); 222 | } 223 | 224 | StringId(std::istream & in) 225 | { 226 | deserialise(in); 227 | } 228 | 229 | std::istream & deserialise(std::istream & in) 230 | { 231 | id = libmaus2::util::NumberSerialisation::deserialiseNumber(in); 232 | len = libmaus2::util::NumberSerialisation::deserialiseNumber(in); 233 | 234 | A.resize(len); 235 | in.read(reinterpret_cast(A.begin()),len); 236 | assert ( in.gcount() == static_cast(len) ); 237 | 238 | return in; 239 | } 240 | 241 | std::ostream & serialise(std::ostream & out) const 242 | { 243 | libmaus2::util::NumberSerialisation::serialiseNumber(out,id); 244 | libmaus2::util::NumberSerialisation::serialiseNumber(out,len); 245 | out.write(reinterpret_cast(A.begin()),len); 246 | return out; 247 | } 248 | 249 | bool operator<(StringId const & O) const 250 | { 251 | return id < O.id; 252 | } 253 | }; 254 | 255 | int computequality(libmaus2::util::ArgParser const & arg) 256 | { 257 | std::string const db0name = arg[1]; 258 | 259 | std::cerr << "[V] loading data for " << db0name << " to memory..."; 260 | libmaus2::dazzler::db::DatabaseFile::DBArrayFileSet::unique_ptr_type Pdb0data( 261 | libmaus2::dazzler::db::DatabaseFile::copyToArrays(db0name) 262 | ); 263 | libmaus2::dazzler::db::DatabaseFile::DBArrayFileSet const * db0data = Pdb0data.get(); 264 | std::cerr << "done." << std::endl; 265 | 266 | libmaus2::dazzler::db::DatabaseFile::unique_ptr_type PDB0( 267 | new libmaus2::dazzler::db::DatabaseFile(db0data->getDBURL()) 268 | ); 269 | libmaus2::dazzler::db::DatabaseFile & DB = *PDB0; 270 | DB.computeTrimVector(); 271 | std::vector RL; 272 | DB.getAllReadLengths(RL); 273 | 274 | std::vector const Vindex = indexConsFile(arg[0],DB.size()); 275 | 276 | if ( arg.uniqueArgPresent("indexonly") ) 277 | return EXIT_SUCCESS; 278 | 279 | uint64_t const numthreads = arg.uniqueArgPresent("t") ? arg.getUnsignedNumericArg("t") : getDefaultNumThreads(); 280 | std::string const tmpfilebase = getTmpFileBase(arg); 281 | 282 | std::vector < std::string > Vtmp(numthreads); 283 | libmaus2::autoarray::AutoArray < libmaus2::aio::OutputStreamInstance::unique_ptr_type > Aout(numthreads); 284 | for ( uint64_t i = 0; i < numthreads; ++i ) 285 | { 286 | std::ostringstream fnostr; 287 | fnostr << tmpfilebase << "_" << i << "_datatmp"; 288 | std::string const fn = fnostr.str(); 289 | Vtmp[i] = fn; 290 | libmaus2::util::TempFileRemovalContainer::addTempFile(fn); 291 | 292 | libmaus2::aio::OutputStreamInstance::unique_ptr_type tptr( 293 | new libmaus2::aio::OutputStreamInstance(fn) 294 | ); 295 | 296 | Aout[i] = UNIQUE_PTR_MOVE(tptr); 297 | } 298 | 299 | int64_t const tspace = arg.uniqueArgPresent("tspace") ? arg.getUnsignedNumericArg("tspace") : getDefaultTSpace(); 300 | 301 | // int64_t first = -1; 302 | 303 | int64_t Icnt = 0; 304 | int64_t Idiv = 1; 305 | if ( arg.uniqueArgPresent("J") ) 306 | { 307 | 308 | std::string const Js = arg["J"]; 309 | std::istringstream istr(Js); 310 | istr >> Icnt; 311 | 312 | if ( ! istr ) 313 | { 314 | libmaus2::exception::LibMausException lme; 315 | lme.getStream() << "[E] unable to parse " << Js << std::endl; 316 | lme.finish(); 317 | throw lme; 318 | } 319 | 320 | int const c = istr.get(); 321 | 322 | if ( ! istr || c == std::istream::traits_type::eof() || c != ',' ) 323 | { 324 | libmaus2::exception::LibMausException lme; 325 | lme.getStream() << "[E] unable to parse " << Js << std::endl; 326 | lme.finish(); 327 | throw lme; 328 | } 329 | 330 | istr >> Idiv; 331 | 332 | if ( ! istr || istr.peek() != std::istream::traits_type::eof() ) 333 | { 334 | libmaus2::exception::LibMausException lme; 335 | lme.getStream() << "[E] unable to parse " << Js << std::endl; 336 | lme.finish(); 337 | throw lme; 338 | } 339 | 340 | if ( !Idiv ) 341 | { 342 | libmaus2::exception::LibMausException lme; 343 | lme.getStream() << "[E] denominator of J argument cannot be zero" << std::endl; 344 | lme.finish(); 345 | throw lme; 346 | } 347 | } 348 | 349 | uint64_t const n = DB.size(); 350 | uint64_t const readsperpack = (n + Idiv - 1) / Idiv; 351 | 352 | uint64_t const alow = std::min(Icnt * readsperpack,n); 353 | uint64_t const ahigh = std::min(alow + readsperpack,n); 354 | 355 | libmaus2::autoarray::AutoArray annosize(ahigh-alow); 356 | std::fill(annosize.begin(),annosize.end(),0ull); 357 | 358 | std::cerr << "[V] processing [" << alow << "," << ahigh << ")" << std::endl; 359 | 360 | #if defined(_OPENMP) 361 | #pragma omp parallel for schedule(dynamic,1) num_threads(numthreads) 362 | #endif 363 | for ( uint64_t aid = alow; aid < ahigh; ++aid ) 364 | { 365 | uint64_t const l = RL[aid]; 366 | // number of trace point intervals 367 | uint64_t const nt = (l + tspace - 1)/tspace; 368 | annosize[aid - alow] = nt; 369 | std::vector V(nt,std::numeric_limits::max()); 370 | 371 | libmaus2::fastx::FastAReader::pattern_type pb; 372 | libmaus2::lcs::NNPCorL np; 373 | 374 | libmaus2::aio::InputStreamInstance ISI(arg[0]); 375 | ISI.seekg(Vindex[aid].offset); 376 | libmaus2::fastx::StreamFastAReaderWrapper SFC(ISI); 377 | 378 | std::string const afull = DB[aid]; 379 | 380 | for ( uint64_t z = 0; z < Vindex[aid].count; ++z ) 381 | { 382 | bool const ok = SFC.getNextPatternUnlocked(pb); 383 | assert ( ok ); 384 | assert ( getId(pb) == static_cast(aid) ); 385 | // FC.getNext(pb); 386 | 387 | assert ( pb.sid.find("A=[") != std::string::npos ); 388 | std::string sid = pb.sid; 389 | sid = sid.substr(sid.find("A=[") + strlen("A=[")); 390 | assert ( sid.find("]") != std::string::npos ); 391 | sid = sid.substr(0,sid.find("]")); 392 | 393 | std::istringstream istr(sid); 394 | int64_t from, to; 395 | 396 | istr >> from; 397 | assert ( istr.peek() == ',' ); 398 | istr.get(); 399 | istr >> to; 400 | to += 1; 401 | to = std::min(to,static_cast(RL[aid])); 402 | assert ( istr.peek() == std::istream::traits_type::eof() ); 403 | 404 | std::string const asub = afull.substr(from,to-from); 405 | std::string const bsub = pb.spattern; 406 | 407 | np.np(asub.begin(),asub.end(),bsub.begin(),bsub.end()); 408 | 409 | #if 0 410 | libmaus2::lcs::AlignmentPrint::printAlignmentLines( 411 | std::cerr, 412 | asub.begin(), 413 | asub.size(), 414 | bsub.begin(), 415 | bsub.size(), 416 | 80, 417 | np.ta, 418 | np.te 419 | ); 420 | #endif 421 | 422 | if ( from % tspace != 0 ) 423 | { 424 | std::pair const P = libmaus2::lcs::AlignmentTraceContainer::advanceA(np.ta,np.te,tspace - (from % tspace)); 425 | 426 | from += P.first; 427 | np.ta += P.second; 428 | } 429 | 430 | assert ( from == to || (from % tspace == 0) ); 431 | 432 | while ( to-from >= tspace ) 433 | { 434 | assert ( from % tspace == 0 ); 435 | 436 | std::pair const P = libmaus2::lcs::AlignmentTraceContainer::advanceA(np.ta,np.te,tspace); 437 | 438 | bool const ok = static_cast(P.first) == tspace; 439 | if ( ! ok ) 440 | { 441 | std::cerr << "Failure for " << aid << " tspace=" << tspace << " P.first=" << P.first << " from=" << from << " to=" << to << " RL=" << RL[aid] << std::endl; 442 | } 443 | assert ( ok ); 444 | 445 | libmaus2::lcs::AlignmentStatistics AS = libmaus2::lcs::AlignmentTraceContainer::getAlignmentStatistics(np.ta,np.ta+P.second); 446 | 447 | uint64_t const e = std::floor(AS.getErrorRate() * std::numeric_limits::max() + 0.5); 448 | assert ( e <= std::numeric_limits::max() ); 449 | 450 | V [ from / tspace ] = std::min(V[from/tspace],static_cast(e)); 451 | 452 | from += tspace; 453 | np.ta += P.second; 454 | } 455 | 456 | if ( (from % tspace == 0) && (to > from) && (to == static_cast(l)) ) 457 | { 458 | uint64_t const d = to-from; 459 | assert ( static_cast(d) < tspace ); 460 | 461 | std::pair const P = libmaus2::lcs::AlignmentTraceContainer::advanceA(np.ta,np.te,d); 462 | assert ( static_cast(P.first) == static_cast(d) ); 463 | 464 | libmaus2::lcs::AlignmentStatistics AS = libmaus2::lcs::AlignmentTraceContainer::getAlignmentStatistics(np.ta,np.ta+P.second); 465 | 466 | uint64_t const e = std::floor(AS.getErrorRate() * std::numeric_limits::max() + 0.5); 467 | assert ( e <= std::numeric_limits::max() ); 468 | 469 | V [ from / tspace ] = std::min(V[from/tspace],static_cast(e)); 470 | 471 | from += d; 472 | np.ta += P.second; 473 | } 474 | } 475 | 476 | #if 0 477 | for ( uint64_t i = 0; i < V.size(); ++i ) 478 | { 479 | // std::cerr << aid << " " << i << " " << (double)V[i]/255.0 << std::endl; 480 | dataOSI.put(V[i]); 481 | } 482 | #endif 483 | 484 | StringId SI(aid,V.size(),V.begin()); 485 | 486 | #if defined(_OPENMP) 487 | uint64_t const tid = omp_get_thread_num(); 488 | #else 489 | uint64_t const tid = 0; 490 | #endif 491 | 492 | SI.serialise(*(Aout[tid])); 493 | 494 | if ( (aid-alow) % 1024 == 0 ) 495 | { 496 | libmaus2::parallel::ScopePosixSpinLock slock(libmaus2::aio::StreamLock::cerrlock); 497 | std::cerr << "[V] " << (aid-alow) << std::endl; 498 | } 499 | } 500 | 501 | for ( uint64_t i = 0; i < numthreads; ++i ) 502 | { 503 | Aout[i]->flush(); 504 | Aout[i].reset(); 505 | } 506 | 507 | std::cerr << "[V] " << (ahigh-alow) << std::endl; 508 | 509 | PDB0.reset(); 510 | libmaus2::dazzler::db::DatabaseFile ondiskDB(db0name); 511 | 512 | std::string const annofn = ondiskDB.getBlockTrackAnnoFileName("exqual",(Idiv > 1) ? (Icnt+1) : 0); 513 | std::string const datafn = ondiskDB.getBlockTrackDataFileName("exqual",(Idiv > 1) ? (Icnt+1) : 0); 514 | 515 | std::ostringstream fnostr; 516 | fnostr << tmpfilebase << "_mergetmp"; 517 | std::string const mergetmpfn = fnostr.str(); 518 | libmaus2::util::TempFileRemovalContainer::addTempFile(mergetmpfn); 519 | libmaus2::sorting::SerialisingSortingBufferedOutputFile::reduce(Vtmp,mergetmpfn); 520 | 521 | for ( uint64_t i = 0; i < numthreads; ++i ) 522 | libmaus2::aio::FileRemoval::removeFile(Vtmp[i]); 523 | 524 | libmaus2::aio::InputStreamInstance dataISI(mergetmpfn); 525 | libmaus2::aio::OutputStreamInstance dataOSI(datafn); 526 | 527 | while ( dataISI && (dataISI.peek() != std::istream::traits_type::eof()) ) 528 | { 529 | StringId SI(dataISI); 530 | for ( uint64_t i = 0; i < SI.len; ++i ) 531 | dataOSI.put(SI.A[i]); 532 | } 533 | 534 | uint64_t const p = dataOSI.tellp(); 535 | assert ( p == std::accumulate(annosize.begin(),annosize.end(),0ull) ); 536 | 537 | dataOSI.flush(); 538 | 539 | libmaus2::aio::OutputStreamInstance annoOSI(annofn); 540 | 541 | // write inqual anno file 542 | uint64_t annooff = 0; 543 | libmaus2::dazzler::db::OutputBase::putLittleEndianInteger4(annoOSI,annosize.size() /* tracklen */,annooff); 544 | libmaus2::dazzler::db::OutputBase::putLittleEndianInteger4(annoOSI,8 /* size of pointer */,annooff); 545 | uint64_t s = 0; 546 | for ( uint64_t i = 0; i < annosize.size(); ++i ) 547 | { 548 | libmaus2::dazzler::db::OutputBase::putLittleEndianInteger8(annoOSI,s,annooff); 549 | s += annosize[i]; 550 | } 551 | libmaus2::dazzler::db::OutputBase::putLittleEndianInteger8(annoOSI,s,annooff); 552 | annoOSI.flush(); 553 | 554 | return EXIT_SUCCESS; 555 | } 556 | 557 | int main(int argc, char * argv[]) 558 | { 559 | try 560 | { 561 | libmaus2::util::ArgParser arg(argc,argv); 562 | 563 | if ( arg.uniqueArgPresent("v") || arg.uniqueArgPresent("version") ) 564 | { 565 | std::cerr << "This is " << PACKAGE_NAME << " version " << PACKAGE_VERSION << "." << std::endl; 566 | std::cerr << PACKAGE_NAME << " is distributed under version 3 of the GPL." << std::endl; 567 | return EXIT_SUCCESS; 568 | } 569 | else if ( arg.uniqueArgPresent("h") || arg.uniqueArgPresent("help") || arg.size() < 2 ) 570 | { 571 | std::cerr << "This is " << PACKAGE_NAME << " version " << PACKAGE_VERSION << "." << std::endl; 572 | std::cerr << PACKAGE_NAME << " is distributed under version 3 of the GPL." << std::endl; 573 | std::cerr << "\n"; 574 | std::cerr << "usage: " << arg.progname << " [options] reads_cons.fasta reads.db\n"; 575 | std::cerr << "\n"; 576 | std::cerr << "The following options can be used (no space between option name and parameter allowed):\n\n"; 577 | std::cerr << helpMessage(arg); 578 | return EXIT_SUCCESS; 579 | } 580 | else 581 | { 582 | return computequality(arg); 583 | } 584 | } 585 | catch(std::exception const & ex) 586 | { 587 | std::cerr << ex.what() << std::endl; 588 | return EXIT_FAILURE; 589 | } 590 | 591 | } 592 | --------------------------------------------------------------------------------