├── ccan ├── hash │ ├── LICENSE │ ├── _info │ └── test │ │ └── run.c └── build_assert │ ├── LICENSE │ ├── test │ ├── compile_ok.c │ ├── compile_fail.c │ ├── compile_fail-expr.c │ └── run-BUILD_ASSERT_OR_ZERO.c │ ├── build_assert.h │ └── _info ├── config.h ├── githash.cc ├── sysdeps ├── Darwin.inc └── Linux.inc ├── ext ├── fmt-8.0.1 │ ├── include │ │ └── fmt │ │ │ └── locale.h │ └── src │ │ └── format.cc ├── html │ ├── jquery-ui-1.10.4.custom │ │ └── css │ │ │ └── ui-lightness │ │ │ └── images │ │ │ ├── animated-overlay.gif │ │ │ ├── ui-icons_222222_256x240.png │ │ │ ├── ui-icons_228ef1_256x240.png │ │ │ ├── ui-icons_ef8c08_256x240.png │ │ │ ├── ui-icons_ffd27a_256x240.png │ │ │ ├── ui-icons_ffffff_256x240.png │ │ │ ├── ui-bg_flat_10_000000_40x100.png │ │ │ ├── ui-bg_glass_65_ffffff_1x400.png │ │ │ ├── ui-bg_glass_100_f6f6f6_1x400.png │ │ │ ├── ui-bg_glass_100_fdf5ce_1x400.png │ │ │ ├── ui-bg_gloss-wave_35_f6a828_500x100.png │ │ │ ├── ui-bg_diagonals-thick_18_b81900_40x40.png │ │ │ ├── ui-bg_diagonals-thick_20_666666_40x40.png │ │ │ ├── ui-bg_highlight-soft_100_eeeeee_1x100.png │ │ │ └── ui-bg_highlight-soft_75_ffe45c_1x100.png │ └── nvd3 │ │ ├── .gitignore │ │ └── LICENSE.md ├── libmba │ ├── mba │ │ ├── iterator.h │ │ ├── dbug.h │ │ ├── suba.h │ │ ├── diff.h │ │ ├── varray.h │ │ ├── msgno.h │ │ ├── hashmap.h │ │ └── allocator.h │ └── allocator.c ├── nr_c304 │ └── code │ │ └── amoeba.h └── nhpup_1.1.js ├── testrunner.cc ├── antonie.hh ├── compat.hh ├── .gitignore ├── invert.cc ├── stitchalg.hh ├── test-taxoreader_cc.cc ├── update-git-hash-if-necessary ├── taxoreader.hh ├── .travis.yml ├── testrun.sh ├── .github └── workflows │ └── c-cpp.yml ├── test-saminfra_cc.cc ├── 16ssearcher.hh ├── fastqindex.hh ├── tclap ├── Makefile.am ├── Visitor.h ├── IgnoreRestVisitor.h ├── OptionalUnlabeledTracker.h ├── Constraint.h ├── CmdLineOutput.h ├── HelpVisitor.h ├── VersionVisitor.h ├── ArgTraits.h ├── ValuesConstraint.h ├── CmdLineInterface.h ├── XorHandler.h └── StandardTraits.h ├── test-misc_hh.cc ├── fqgrep.cc ├── bridge.cc ├── strdiff.c ├── refgenome2.hh ├── stitcher.cc ├── gendump.cc ├── test-dnamisc_cc.cc ├── misc.hh ├── saminfra.hh ├── gcscan.cc ├── geneannotated.hh ├── gfftool.cc ├── digisplice.cc ├── fagrep.cc ├── taxoreader.cc ├── fastq.hh ├── chromopic.cc ├── gffedit.cc ├── support.js ├── refgenome2.cc ├── dino.cc ├── zstuff.hh ├── charsample.cc ├── pfqgrep.cc ├── fastqindex.cc ├── cpgstats.cc ├── cluster.cc ├── viewer.html ├── chagstats.cc ├── nwunsch.cc ├── exoexplore.cc ├── gfflookup.cc ├── misc.cc ├── fastq.cc ├── refgenome.hh ├── test-nucstore_cc.cc ├── nucstore.hh ├── AntonieLaunch.py ├── genbankparser.cc ├── cor2.cc ├── dnamisc.cc └── stitchalg.cc /ccan/hash/LICENSE: -------------------------------------------------------------------------------- 1 | ../../licenses/CC0 -------------------------------------------------------------------------------- /ccan/build_assert/LICENSE: -------------------------------------------------------------------------------- 1 | ../../licenses/CC0 -------------------------------------------------------------------------------- /config.h: -------------------------------------------------------------------------------- 1 | #define HAVE_LITTLE_ENDIAN 1 2 | -------------------------------------------------------------------------------- /githash.cc: -------------------------------------------------------------------------------- 1 | #include "githash.h" 2 | 3 | const char* g_gitHash=GIT_HASH; 4 | -------------------------------------------------------------------------------- /sysdeps/Darwin.inc: -------------------------------------------------------------------------------- 1 | CXX2011FLAGS=-std=c++11 -stdlib=libc++ -I/usr/local/include/ -ftemplate-depth=1000 -------------------------------------------------------------------------------- /sysdeps/Linux.inc: -------------------------------------------------------------------------------- 1 | CXX2014FLAGS=-std=gnu++17 2 | STATICFLAGS ?=-Wl,-Bstatic -lstdc++ -lgcc -Wl,-Bdynamic -static-libgcc -lm -lc 3 | -------------------------------------------------------------------------------- /ext/fmt-8.0.1/include/fmt/locale.h: -------------------------------------------------------------------------------- 1 | #include "xchar.h" 2 | #warning fmt/locale.h is deprecated, include fmt/format.h or fmt/xchar.h instead 3 | -------------------------------------------------------------------------------- /testrunner.cc: -------------------------------------------------------------------------------- 1 | #define BOOST_TEST_DYN_LINK 2 | #define BOOST_TEST_MAIN 3 | #define BOOST_TEST_MODULE unit 4 | 5 | #include 6 | -------------------------------------------------------------------------------- /antonie.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | typedef uint32_t dnapos_t; 5 | extern unsigned int dnanpos; 6 | extern const char* phiXFastA; 7 | -------------------------------------------------------------------------------- /ccan/build_assert/test/compile_ok.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main(int argc, char *argv[]) 4 | { 5 | BUILD_ASSERT(1 == 1); 6 | return 0; 7 | } 8 | -------------------------------------------------------------------------------- /compat.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifdef _WIN32 3 | #define getcwd(x,y) GetCurrentDirectory((y),(x)) 4 | #else 5 | #include 6 | #include 7 | #endif 8 | 9 | 10 | -------------------------------------------------------------------------------- /ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/animated-overlay.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/antonie2/master/ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/animated-overlay.gif -------------------------------------------------------------------------------- /ccan/build_assert/test/compile_fail.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main(int argc, char *argv[]) 4 | { 5 | #ifdef FAIL 6 | BUILD_ASSERT(1 == 0); 7 | #endif 8 | return 0; 9 | } 10 | -------------------------------------------------------------------------------- /ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-icons_222222_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/antonie2/master/ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-icons_222222_256x240.png -------------------------------------------------------------------------------- /ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-icons_228ef1_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/antonie2/master/ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-icons_228ef1_256x240.png -------------------------------------------------------------------------------- /ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-icons_ef8c08_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/antonie2/master/ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-icons_ef8c08_256x240.png -------------------------------------------------------------------------------- /ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-icons_ffd27a_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/antonie2/master/ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-icons_ffd27a_256x240.png -------------------------------------------------------------------------------- /ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-icons_ffffff_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/antonie2/master/ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-icons_ffffff_256x240.png -------------------------------------------------------------------------------- /ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_flat_10_000000_40x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/antonie2/master/ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_flat_10_000000_40x100.png -------------------------------------------------------------------------------- /ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_glass_65_ffffff_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/antonie2/master/ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_glass_65_ffffff_1x400.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | 6 | # Compiled Dynamic libraries 7 | *.so 8 | *.dylib 9 | 10 | # Compiled Static libraries 11 | *.lai 12 | *.la 13 | *.a 14 | 15 | # other 16 | *.d 17 | *~ 18 | -------------------------------------------------------------------------------- /ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_glass_100_f6f6f6_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/antonie2/master/ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_glass_100_f6f6f6_1x400.png -------------------------------------------------------------------------------- /ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_glass_100_fdf5ce_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/antonie2/master/ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_glass_100_fdf5ce_1x400.png -------------------------------------------------------------------------------- /ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_gloss-wave_35_f6a828_500x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/antonie2/master/ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_gloss-wave_35_f6a828_500x100.png -------------------------------------------------------------------------------- /ccan/build_assert/test/compile_fail-expr.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main(int argc, char *argv[]) 4 | { 5 | #ifdef FAIL 6 | return BUILD_ASSERT_OR_ZERO(1 == 0); 7 | #else 8 | return 0; 9 | #endif 10 | } 11 | -------------------------------------------------------------------------------- /ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_diagonals-thick_18_b81900_40x40.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/antonie2/master/ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_diagonals-thick_18_b81900_40x40.png -------------------------------------------------------------------------------- /ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_diagonals-thick_20_666666_40x40.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/antonie2/master/ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_diagonals-thick_20_666666_40x40.png -------------------------------------------------------------------------------- /ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_highlight-soft_100_eeeeee_1x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/antonie2/master/ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_highlight-soft_100_eeeeee_1x100.png -------------------------------------------------------------------------------- /ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_highlight-soft_75_ffe45c_1x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/antonie2/master/ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_highlight-soft_75_ffe45c_1x100.png -------------------------------------------------------------------------------- /ccan/build_assert/test/run-BUILD_ASSERT_OR_ZERO.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char *argv[]) 5 | { 6 | plan_tests(1); 7 | ok1(BUILD_ASSERT_OR_ZERO(1 == 1) == 0); 8 | return exit_status(); 9 | } 10 | -------------------------------------------------------------------------------- /invert.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "misc.hh" 3 | using namespace std; 4 | 5 | int main(int argc, char**argv) 6 | { 7 | for(int n = 1 ; n < argc; ++n) { 8 | string nucs(argv[n]); 9 | reverseNucleotides(&nucs); 10 | cout< 3 | #include "fastqindex.hh" 4 | 5 | std::string doStitch(const std::map > >& fhpos, 6 | const std::string& startseed_, 7 | const std::string& endseed, unsigned int maxlen, int chunklen, bool verbose); 8 | int dnaDiff(const std::string& a, const std::string& b); 9 | -------------------------------------------------------------------------------- /test-taxoreader_cc.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "taxoreader.hh" 3 | BOOST_AUTO_TEST_SUITE(taxoreader_hh) 4 | 5 | BOOST_AUTO_TEST_CASE(test_taxoreader) { 6 | /* 7 | TaxoReader tr("./taxonomy/new/fullnamelineage.dmp"); 8 | auto parts = tr.get(1915799); 9 | BOOST_CHECK_EQUAL(*parts.rbegin(), "unclassified Candidatus Poseidoniales"); 10 | */ 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /update-git-hash-if-necessary: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | HASH=$(git describe --always --dirty=+ | tr -d '\n') 3 | 4 | echo \#define GIT_HASH \"$HASH\" > githash.h.tmp 5 | echo $HASH > githash 6 | 7 | cmp -s githash.h githash.h.tmp > /dev/null 8 | 9 | if [ "$?" -ne "0" ] 10 | then 11 | mv githash.h.tmp githash.h 12 | echo updated githash.h 13 | else 14 | rm githash.h.tmp 15 | fi 16 | 17 | 18 | -------------------------------------------------------------------------------- /taxoreader.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | class TaxoReader 6 | { 7 | public: 8 | explicit TaxoReader(const std::string& fname); 9 | std::vector get(int id) const; 10 | size_t size() const 11 | { 12 | return d_store.size(); 13 | } 14 | private: 15 | std::map > d_store; 16 | }; 17 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: cpp 2 | compiler: 3 | - gcc 4 | - clang 5 | before_script: 6 | - sudo apt-get update 7 | - sudo apt-get install libboost-test-dev libz-dev 8 | - wget http://ds9a.nl/antonie/test-files.tar.bz2 9 | - tar xf test-files.tar.bz2 10 | script: 11 | - make -j 4 12 | - make -j 4 check 13 | - ./testrun.sh 14 | notifications: 15 | email: 16 | - bert.hubert@netherlabs.nl 17 | 18 | -------------------------------------------------------------------------------- /ext/libmba/mba/iterator.h: -------------------------------------------------------------------------------- 1 | #ifndef MBA_ITERATOR_H 2 | #define MBA_ITERATOR_H 3 | 4 | /* iter - container for iterator state 5 | */ 6 | 7 | typedef struct _iter { 8 | unsigned long i1; 9 | unsigned long i2; 10 | unsigned long i3; 11 | void *p; 12 | } iter_t; 13 | 14 | typedef void (*iterate_fn)(void *obj, iter_t *iter); 15 | typedef void *(*iterate_next_fn)(void *obj, iter_t *iter); 16 | 17 | #endif /* MBA_ITERATOR_H */ 18 | -------------------------------------------------------------------------------- /ext/html/nvd3/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Jekyll Files # 3 | ################ 4 | _site 5 | 6 | 7 | # Random Files # 8 | ################ 9 | *.swp 10 | *~ 11 | *.log 12 | 13 | 14 | # Private Test Data # 15 | ##################### 16 | *REALDATA* 17 | 18 | 19 | # OS generated files # 20 | ###################### 21 | .DS_Store* 22 | ehthumbs.db 23 | Icon? 24 | Thumbs.db 25 | # nodejs packages # 26 | ###################### 27 | node_modules 28 | -------------------------------------------------------------------------------- /testrun.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | rm -f data.js loci.0.js 4 | ./antonie -1 sbw25/P1-1-35_S5_L001_R1_001.fastq -2 sbw25/P1-1-35_S5_L001_R2_001.fastq -r sbw25/NC_012660.fna -a sbw25/NC_012660.gbk 5 | 6 | if grep 895351 data.js -q 7 | then 8 | echo Found SNP 895151 9 | else 10 | echo missed SNP 895151 11 | exit 1 12 | fi 13 | 14 | if grep YP_002874253.1 data.js -q 15 | then 16 | echo Found YP_002874253.1 17 | else 18 | echo missed YP_002874253.1 19 | exit 1 20 | fi 21 | 22 | 23 | exit 0 24 | -------------------------------------------------------------------------------- /.github/workflows/c-cpp.yml: -------------------------------------------------------------------------------- 1 | name: C/C++ CI 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v2 16 | - name: deps 17 | run: | 18 | sudo apt-get update 19 | sudo apt-get install libboost-dev libz-dev libboost-test-dev 20 | - name: make 21 | run: make 22 | - name: make check 23 | run: make check 24 | -------------------------------------------------------------------------------- /test-saminfra_cc.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "saminfra.hh" 3 | BOOST_AUTO_TEST_SUITE(saminfra_hh) 4 | using std::string; 5 | 6 | BOOST_AUTO_TEST_CASE(test_bamCompress) { 7 | BOOST_CHECK_EQUAL(bamCompress("AAAA"), string("\x11\x11", 2)); 8 | BOOST_CHECK_EQUAL(bamCompress("CCCC"), string("\x22\x22", 2)); 9 | BOOST_CHECK_EQUAL(bamCompress("ACACACAC"), string("\x12\x12\x12\x12", 4)); 10 | BOOST_CHECK_EQUAL(bamCompress("NNNN"), string("\xff\xff", 2)); 11 | BOOST_CHECK_EQUAL(bamCompress("PPPP"), string("\xff\xff", 2)); 12 | } 13 | 14 | 15 | BOOST_AUTO_TEST_SUITE_END() 16 | -------------------------------------------------------------------------------- /16ssearcher.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "zstuff.hh" 8 | 9 | //! A class to match reads to a 16S database 10 | class Search16S 11 | { 12 | public: 13 | struct Entry 14 | { 15 | uint32_t id; 16 | std::string nucs; 17 | std::string name; 18 | bool operator<(const Entry& rhs) const 19 | { 20 | return id < rhs.id; 21 | } 22 | }; 23 | 24 | Search16S(const std::string& src); 25 | bool get(Entry* entry); 26 | 27 | private: 28 | std::unique_ptr d_linereader; 29 | }; 30 | -------------------------------------------------------------------------------- /fastqindex.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include "fastq.hh" 6 | 7 | struct HashedPos 8 | { 9 | uint32_t hash; 10 | uint64_t position; 11 | bool operator<(const HashedPos& b) const { 12 | return hash < b.hash; 13 | } 14 | bool operator<(uint32_t h) const { 15 | return hash < h; 16 | } 17 | }__attribute__((packed)); 18 | 19 | std::unique_ptr > indexFASTQ(FASTQReader* fqreader, const std::string& fname, int chunklen); 20 | 21 | std::vector getConsensusMatches(const std::string& consensus, const std::map > >& fhpos, int chunklen); 22 | -------------------------------------------------------------------------------- /tclap/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | libtclapincludedir = $(includedir)/tclap 3 | 4 | libtclapinclude_HEADERS = \ 5 | CmdLineInterface.h \ 6 | ArgException.h \ 7 | CmdLine.h \ 8 | XorHandler.h \ 9 | MultiArg.h \ 10 | UnlabeledMultiArg.h \ 11 | ValueArg.h \ 12 | UnlabeledValueArg.h \ 13 | Visitor.h Arg.h \ 14 | HelpVisitor.h \ 15 | SwitchArg.h \ 16 | MultiSwitchArg.h \ 17 | VersionVisitor.h \ 18 | IgnoreRestVisitor.h \ 19 | CmdLineOutput.h \ 20 | StdOutput.h \ 21 | DocBookOutput.h \ 22 | ZshCompletionOutput.h \ 23 | OptionalUnlabeledTracker.h \ 24 | Constraint.h \ 25 | ValuesConstraint.h \ 26 | ArgTraits.h \ 27 | StandardTraits.h 28 | 29 | -------------------------------------------------------------------------------- /test-misc_hh.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "misc.hh" 3 | BOOST_AUTO_TEST_SUITE(misc_hh) 4 | 5 | BOOST_AUTO_TEST_CASE(test_VarMeanEstimator) { 6 | VarMeanEstimator vme; 7 | vme(0); 8 | BOOST_CHECK_CLOSE(mean(vme), 0.0, 0.001); 9 | BOOST_CHECK_CLOSE(variance(vme), 0.0, 0.001); 10 | 11 | for(auto d : {1,2,3,4}) 12 | vme(d); 13 | BOOST_CHECK_CLOSE(mean(vme), 2.0, 0.001); 14 | BOOST_CHECK_CLOSE(variance(vme), 2.0, 0.001); 15 | 16 | } 17 | BOOST_AUTO_TEST_CASE(test_reverseNucleotides) { 18 | std::string tst{"TTTTGGGCCA"}; 19 | reverseNucleotides(&tst); 20 | BOOST_CHECK_EQUAL(tst, "TGGCCCAAAA"); 21 | tst.clear(); 22 | reverseNucleotides(&tst); 23 | BOOST_CHECK_EQUAL(tst, ""); 24 | } 25 | 26 | BOOST_AUTO_TEST_SUITE_END() 27 | -------------------------------------------------------------------------------- /fqgrep.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "misc.hh" 3 | #include "fastq.hh" 4 | using namespace std; 5 | 6 | // greps through fastq files, also matching reverse complement 7 | 8 | int main(int argc, char**argv) 9 | { 10 | string search(argv[1]); 11 | string rsearch(search); 12 | reverseNucleotides(&rsearch); 13 | 14 | for(int n = 2 ; n < argc; ++n) { 15 | FASTQReader fqreader(argv[n], 33); 16 | FastQRead fqr; 17 | 18 | while(fqreader.getRead(&fqr)) { 19 | auto pos = fqr.d_nucleotides.find(search); 20 | if(pos != string::npos) { 21 | cout< 2 | #include 3 | 4 | /** 5 | * hash - routines for hashing bytes 6 | * 7 | * When creating a hash table it's important to have a hash function 8 | * which mixes well and is fast. This package supplies such functions. 9 | * 10 | * The hash functions come in two flavors: the normal ones and the 11 | * stable ones. The normal ones can vary from machine-to-machine and 12 | * may change if we find better or faster hash algorithms in future. 13 | * The stable ones will always give the same results on any computer, 14 | * and on any version of this package. 15 | * 16 | * License: CC0 (Public domain) 17 | * Maintainer: Rusty Russell 18 | * Author: Bob Jenkins 19 | */ 20 | int main(int argc, char *argv[]) 21 | { 22 | if (argc != 2) 23 | return 1; 24 | 25 | if (strcmp(argv[1], "depends") == 0) { 26 | printf("ccan/build_assert\n"); 27 | return 0; 28 | } 29 | 30 | return 1; 31 | } 32 | -------------------------------------------------------------------------------- /bridge.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | using namespace std; 7 | 8 | string greet(const std::string& in) 9 | { 10 | return in +", " + in; 11 | } 12 | 13 | vector more() 14 | { 15 | return vector{"boeh", "bah", "beh"}; 16 | } 17 | 18 | boost::python::list moredoub() 19 | { 20 | boost::python::list ret; 21 | for(double n = 0 ; n < 1000000; ++n) 22 | ret.append(n); 23 | 24 | return ret; 25 | } 26 | 27 | BOOST_PYTHON_MODULE(libbridge) 28 | { 29 | using namespace boost::python; 30 | def("greet", greet); 31 | 32 | class_ >("stl_vector_string") 33 | .def(vector_indexing_suite >()) 34 | ; 35 | 36 | class_ >("stl_vector_double") 37 | .def(vector_indexing_suite >()) 38 | ; 39 | 40 | 41 | def("more", more); 42 | def("moredoub", moredoub); 43 | 44 | 45 | } 46 | -------------------------------------------------------------------------------- /ext/libmba/mba/dbug.h: -------------------------------------------------------------------------------- 1 | #ifndef MBA_DBUG_H 2 | #define MBA_DBUG_H 3 | 4 | /* dbug - resolve symbols and print stack traces w/ x86 GNUC 5 | */ 6 | 7 | #include 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | #ifndef LIBMBA_API 14 | #ifdef WIN32 15 | # ifdef LIBMBA_EXPORTS 16 | # define LIBMBA_API __declspec(dllexport) 17 | # else /* LIBMBA_EXPORTS */ 18 | # define LIBMBA_API __declspec(dllimport) 19 | # endif /* LIBMBA_EXPORTS */ 20 | #else /* WIN32 */ 21 | # define LIBMBA_API extern 22 | #endif /* WIN32 */ 23 | #endif /* LIBMBA_API */ 24 | 25 | extern int dbug_stacktrace(void **buf, int off, int n); 26 | extern unsigned char *dbug_resolve_symbol(void *sym, unsigned char *buf, unsigned char *blim); 27 | extern int dbug_sprint_stacktrace(unsigned char *str, 28 | unsigned char *slim, 29 | void **syms, 30 | int sn, 31 | const unsigned char *msg); 32 | extern int dbug_fprint_stacktrace(FILE *stream, int off, int n, const char *msg); 33 | 34 | #ifdef __cplusplus 35 | } 36 | #endif 37 | 38 | #endif /* MBA_DBUG_H */ 39 | -------------------------------------------------------------------------------- /ext/libmba/mba/suba.h: -------------------------------------------------------------------------------- 1 | #ifndef MBA_SUBA_H 2 | #define MBA_SUBA_H 3 | 4 | /* suba - sub-allocate memory from larger chunk of memory 5 | */ 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #ifndef LIBMBA_API 12 | #ifdef WIN32 13 | # ifdef LIBMBA_EXPORTS 14 | # define LIBMBA_API __declspec(dllexport) 15 | # else /* LIBMBA_EXPORTS */ 16 | # define LIBMBA_API __declspec(dllimport) 17 | # endif /* LIBMBA_EXPORTS */ 18 | #else /* WIN32 */ 19 | # define LIBMBA_API extern 20 | #endif /* WIN32 */ 21 | #endif /* LIBMBA_API */ 22 | 23 | #include 24 | #include 25 | 26 | #define SUBA_PTR_SIZE(ptr) ((ptr) ? (*((size_t *)(ptr) - 1)) : 0) 27 | 28 | extern struct allocator *suba_init(void *mem, size_t size, int rst, size_t mincell); 29 | extern void *suba_alloc(struct allocator *suba, size_t size, int zero); 30 | extern void *suba_realloc(struct allocator *suba, void *ptr, size_t size); 31 | extern int suba_free(void *suba, void *ptr); 32 | 33 | extern void *suba_addr(const struct allocator *suba, const ref_t ref); 34 | extern ref_t suba_ref(const struct allocator *suba, const void *ptr); 35 | 36 | #ifdef __cplusplus 37 | } 38 | #endif 39 | 40 | #endif /* MBA_SUBA_H */ 41 | 42 | -------------------------------------------------------------------------------- /strdiff.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | int 9 | main(int argc, char *argv[]) 10 | { 11 | const char *a = argv[1]; 12 | const char *b = argv[2]; 13 | int n, m, d; 14 | int sn, i; 15 | struct varray *ses = varray_new(sizeof(struct diff_edit), NULL); 16 | 17 | if (argc < 3) { 18 | fprintf(stderr, "usage: %s \n", argv[0]); 19 | return EXIT_FAILURE; 20 | } 21 | 22 | n = strlen(a); 23 | m = strlen(b); 24 | if ((d = diff(a, 0, n, b, 0, m, NULL, NULL, NULL, 0, ses, &sn, NULL)) == -1) { 25 | MMNO(errno); 26 | return EXIT_FAILURE; 27 | } 28 | 29 | printf("d=%d sn=%d\n", d, sn); 30 | for (i = 0; i < sn; i++) { 31 | struct diff_edit *e = varray_get(ses, i); 32 | 33 | switch (e->op) { 34 | case DIFF_MATCH: 35 | printf("MAT: "); 36 | fwrite(a + e->off, 1, e->len, stdout); 37 | break; 38 | case DIFF_INSERT: 39 | printf("INS: "); 40 | fwrite(b + e->off, 1, e->len, stdout); 41 | break; 42 | case DIFF_DELETE: 43 | printf("DEL: "); 44 | fwrite(a + e->off, 1, e->len, stdout); 45 | break; 46 | } 47 | printf("\n"); 48 | } 49 | 50 | varray_del(ses); 51 | return EXIT_SUCCESS; 52 | } 53 | 54 | -------------------------------------------------------------------------------- /ext/libmba/mba/diff.h: -------------------------------------------------------------------------------- 1 | #ifndef MBA_DIFF_H 2 | #define MBA_DIFF_H 3 | 4 | /* diff - compute a shortest edit script (SES) given two sequences 5 | */ 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #ifndef LIBMBA_API 12 | #ifdef WIN32 13 | # ifdef LIBMBA_EXPORTS 14 | # define LIBMBA_API __declspec(dllexport) 15 | # else /* LIBMBA_EXPORTS */ 16 | # define LIBMBA_API __declspec(dllimport) 17 | # endif /* LIBMBA_EXPORTS */ 18 | #else /* WIN32 */ 19 | # define LIBMBA_API extern 20 | #endif /* WIN32 */ 21 | #endif /* LIBMBA_API */ 22 | 23 | #include 24 | #include /* cmp_fn */ 25 | 26 | typedef const void *(*idx_fn)(const void *s, int idx, void *context); 27 | 28 | typedef enum { 29 | DIFF_MATCH = 1, 30 | DIFF_DELETE, 31 | DIFF_INSERT 32 | } diff_op; 33 | 34 | struct diff_edit { 35 | short op; 36 | int off; /* off into s1 if MATCH or DELETE but s2 if INSERT */ 37 | int len; 38 | }; 39 | 40 | /* consider alternate behavior for each NULL parameter 41 | */ 42 | extern int diff(const void *a, int aoff, int n, 43 | const void *b, int boff, int m, 44 | idx_fn idx, cmp_fn cmp, void *context, int dmax, 45 | struct varray *ses, int *sn, 46 | struct varray *buf); 47 | 48 | #ifdef __cplusplus 49 | } 50 | #endif 51 | 52 | #endif /* MBA_DIFF_H */ 53 | -------------------------------------------------------------------------------- /refgenome2.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include "nucstore.hh" 5 | #include 6 | 7 | class ReferenceGenome 8 | { 9 | public: 10 | struct Chromosome 11 | { 12 | std::string fullname; 13 | uint32_t offset; 14 | NucleotideStore chromosome; 15 | }; 16 | 17 | ReferenceGenome(const boost::string_ref& fname, 18 | std::function idx=std::function()); 19 | 20 | std::string d_fname; 21 | NucleotideStore getRange(uint32_t offset, uint32_t len) const; 22 | const Chromosome* getChromosome(const std::string& name) const 23 | { 24 | if(!d_genome.count(name)) 25 | return 0; 26 | auto str=d_genome.find(name); 27 | return &str->second; 28 | } 29 | uint32_t numChromosomes() 30 | { 31 | return d_genome.size(); 32 | } 33 | 34 | uint32_t numNucleotides() const 35 | { 36 | if(d_lookup.empty()) 37 | return 0; 38 | return (*d_lookup.rbegin())->offset + (*d_lookup.rbegin())->chromosome.size(); 39 | } 40 | 41 | const std::map& getAllChromosomes() 42 | { 43 | return d_genome; 44 | } 45 | 46 | private: 47 | 48 | std::map d_genome; 49 | std::vector d_lookup; 50 | }; 51 | -------------------------------------------------------------------------------- /ccan/build_assert/build_assert.h: -------------------------------------------------------------------------------- 1 | /* CC0 (Public domain) - see LICENSE file for details */ 2 | #ifndef CCAN_BUILD_ASSERT_H 3 | #define CCAN_BUILD_ASSERT_H 4 | 5 | /** 6 | * BUILD_ASSERT - assert a build-time dependency. 7 | * @cond: the compile-time condition which must be true. 8 | * 9 | * Your compile will fail if the condition isn't true, or can't be evaluated 10 | * by the compiler. This can only be used within a function. 11 | * 12 | * Example: 13 | * #include 14 | * ... 15 | * static char *foo_to_char(struct foo *foo) 16 | * { 17 | * // This code needs string to be at start of foo. 18 | * BUILD_ASSERT(offsetof(struct foo, string) == 0); 19 | * return (char *)foo; 20 | * } 21 | */ 22 | #define BUILD_ASSERT(cond) \ 23 | do { (void) sizeof(char [1 - 2*!(cond)]); } while(0) 24 | 25 | /** 26 | * BUILD_ASSERT_OR_ZERO - assert a build-time dependency, as an expression. 27 | * @cond: the compile-time condition which must be true. 28 | * 29 | * Your compile will fail if the condition isn't true, or can't be evaluated 30 | * by the compiler. This can be used in an expression: its value is "0". 31 | * 32 | * Example: 33 | * #define foo_to_char(foo) \ 34 | * ((char *)(foo) \ 35 | * + BUILD_ASSERT_OR_ZERO(offsetof(struct foo, string) == 0)) 36 | */ 37 | #define BUILD_ASSERT_OR_ZERO(cond) \ 38 | (sizeof(char [1 - 2*!(cond)]) - 1) 39 | 40 | #endif /* CCAN_BUILD_ASSERT_H */ 41 | -------------------------------------------------------------------------------- /tclap/Visitor.h: -------------------------------------------------------------------------------- 1 | 2 | /****************************************************************************** 3 | * 4 | * file: Visitor.h 5 | * 6 | * Copyright (c) 2003, Michael E. Smoot . 7 | * All rights reverved. 8 | * 9 | * See the file COPYING in the top directory of this distribution for 10 | * more information. 11 | * 12 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 13 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 15 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 17 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 18 | * DEALINGS IN THE SOFTWARE. 19 | * 20 | *****************************************************************************/ 21 | 22 | 23 | #ifndef TCLAP_VISITOR_H 24 | #define TCLAP_VISITOR_H 25 | 26 | namespace TCLAP { 27 | 28 | /** 29 | * A base class that defines the interface for visitors. 30 | */ 31 | class Visitor 32 | { 33 | public: 34 | 35 | /** 36 | * Constructor. Does nothing. 37 | */ 38 | Visitor() { } 39 | 40 | /** 41 | * Destructor. Does nothing. 42 | */ 43 | virtual ~Visitor() { } 44 | 45 | /** 46 | * Does nothing. Should be overridden by child. 47 | */ 48 | virtual void visit() { } 49 | }; 50 | 51 | } 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /stitcher.cc: -------------------------------------------------------------------------------- 1 | #include "refgenome.hh" 2 | #include "geneannotated.hh" 3 | #include 4 | #include "misc.hh" 5 | #include 6 | #include 7 | #include "dnamisc.hh" 8 | #include 9 | #include "fastqindex.hh" 10 | #include 11 | #include "stitchalg.hh" 12 | extern "C" { 13 | #include "hash.h" 14 | } 15 | 16 | using namespace std; 17 | 18 | int g_maxdepth; 19 | 20 | dnapos_t g_record=0; 21 | 22 | set > g_beenthere; 23 | 24 | string g_bestcontig; 25 | 26 | set g_candidates; 27 | 28 | 29 | // stitcher fasta startpos fastq fastq 30 | int main(int argc, char**argv) 31 | { 32 | if(argc < 4) { 33 | cerr<<"Syntax: stitcher reference.fasta startoffset|startsnippet endsnippet fastq fastq"< > > fhpos; 50 | 51 | FASTQReader* fqreader; 52 | 53 | for(int f = 4; f < argc; ++f) { 54 | fqreader = new FASTQReader(argv[f], 33); 55 | fhpos[fqreader]=indexFASTQ(fqreader, argv[f], chunklen); 56 | } 57 | setbuf(stdout, 0); 58 | doStitch(fhpos, startseed, endseed, 10000, chunklen, false); 59 | } 60 | 61 | -------------------------------------------------------------------------------- /ccan/build_assert/_info: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "config.h" 4 | 5 | /** 6 | * build_assert - routines for build-time assertions 7 | * 8 | * This code provides routines which will cause compilation to fail should some 9 | * assertion be untrue: such failures are preferable to run-time assertions, 10 | * but much more limited since they can only depends on compile-time constants. 11 | * 12 | * These assertions are most useful when two parts of the code must be kept in 13 | * sync: it is better to avoid such cases if possible, but seconds best is to 14 | * detect invalid changes at build time. 15 | * 16 | * For example, a tricky piece of code might rely on a certain element being at 17 | * the start of the structure. To ensure that future changes don't break it, 18 | * you would catch such changes in your code like so: 19 | * 20 | * Example: 21 | * #include 22 | * #include 23 | * 24 | * struct foo { 25 | * char string[5]; 26 | * int x; 27 | * }; 28 | * 29 | * static char *foo_string(struct foo *foo) 30 | * { 31 | * // This trick requires that the string be first in the structure 32 | * BUILD_ASSERT(offsetof(struct foo, string) == 0); 33 | * return (char *)foo; 34 | * } 35 | * 36 | * License: CC0 (Public domain) 37 | * Author: Rusty Russell 38 | */ 39 | int main(int argc, char *argv[]) 40 | { 41 | if (argc != 2) 42 | return 1; 43 | 44 | if (strcmp(argv[1], "depends") == 0) 45 | /* Nothing. */ 46 | return 0; 47 | 48 | return 1; 49 | } 50 | -------------------------------------------------------------------------------- /tclap/IgnoreRestVisitor.h: -------------------------------------------------------------------------------- 1 | 2 | /****************************************************************************** 3 | * 4 | * file: IgnoreRestVisitor.h 5 | * 6 | * Copyright (c) 2003, Michael E. Smoot . 7 | * All rights reverved. 8 | * 9 | * See the file COPYING in the top directory of this distribution for 10 | * more information. 11 | * 12 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 13 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 15 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 17 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 18 | * DEALINGS IN THE SOFTWARE. 19 | * 20 | *****************************************************************************/ 21 | 22 | 23 | #ifndef TCLAP_IGNORE_REST_VISITOR_H 24 | #define TCLAP_IGNORE_REST_VISITOR_H 25 | 26 | #include 27 | #include 28 | 29 | namespace TCLAP { 30 | 31 | /** 32 | * A Vistor that tells the CmdLine to begin ignoring arguments after 33 | * this one is parsed. 34 | */ 35 | class IgnoreRestVisitor: public Visitor 36 | { 37 | public: 38 | 39 | /** 40 | * Constructor. 41 | */ 42 | IgnoreRestVisitor() : Visitor() {} 43 | 44 | /** 45 | * Sets Arg::_ignoreRest. 46 | */ 47 | void visit() { Arg::beginIgnoring(); } 48 | }; 49 | 50 | } 51 | 52 | #endif 53 | -------------------------------------------------------------------------------- /gendump.cc: -------------------------------------------------------------------------------- 1 | #include "refgenome2.hh" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "dnamisc.hh" 7 | #include 8 | #include 9 | #include 10 | #include "misc.hh" 11 | #include 12 | #include 13 | 14 | using namespace std; 15 | 16 | 17 | int main(int argc, char **argv) 18 | { 19 | if(argc < 3) { 20 | cerr<<"Syntax: gendump refgenome.fna chromosome offset1 offset2"<chromosome.getRange(atoi(argv[3]), atoi(argv[4]) - atoi(argv[3])); 26 | 27 | cout<<"DNA"< 2 | #include "dnamisc.hh" 3 | #include "misc.hh" 4 | #include 5 | using namespace std; 6 | 7 | BOOST_AUTO_TEST_SUITE(misc_hh) 8 | 9 | BOOST_AUTO_TEST_CASE(test_kmerMapper) { 10 | BOOST_CHECK_EQUAL(kmerMapper("AAAA", 0, 4), 0U); 11 | BOOST_CHECK_EQUAL(kmerMapper("AAAAAAAA", 0, 8), 0U); 12 | BOOST_CHECK_EQUAL(kmerMapper("AAAAAAAAAAAA", 0, 12), 0U); 13 | BOOST_CHECK_EQUAL(kmerMapper("AAAAAAAAAAAAAAAA", 0, 16), 0U); 14 | 15 | BOOST_CHECK_EQUAL(kmerMapper("CCCC", 0, 4), 85U); 16 | BOOST_CHECK_EQUAL(kmerMapper("CCCCCCCC", 0, 8), 21845U); 17 | BOOST_CHECK_EQUAL(kmerMapper("CCCCCCCCCCCC", 0, 12), 5592405U); 18 | BOOST_CHECK_EQUAL(kmerMapper("CCCCCCCCCCCCCCCC", 0, 16), 1431655765U); 19 | 20 | BOOST_CHECK_EQUAL(DNAToAminoAcid("GCC"), 'A'); 21 | BOOST_CHECK_EQUAL(AminoAcidName('A'), "Alanine"); 22 | } 23 | 24 | BOOST_AUTO_TEST_CASE(test_visitAllNgrams) { 25 | set all; 26 | auto func = [&all](const std::string& ngram) { 27 | all.insert(ngram); 28 | }; 29 | visitAllNgrams(func, 3); 30 | BOOST_CHECK_EQUAL(all.size(), 64); 31 | all.clear(); 32 | visitAllNgrams(func, 1); 33 | BOOST_CHECK_EQUAL(all.size(), 4); 34 | BOOST_CHECK_EQUAL(all.count("A"), 1); 35 | BOOST_CHECK_EQUAL(all.count("C"), 1); 36 | BOOST_CHECK_EQUAL(all.count("G"), 1); 37 | BOOST_CHECK_EQUAL(all.count("T"), 1); 38 | 39 | all.clear(); 40 | visitAllNgrams(func, 6); 41 | BOOST_CHECK_EQUAL(all.size(), 4096); 42 | BOOST_CHECK_EQUAL(all.count("AAACCC"), 1); 43 | BOOST_CHECK_EQUAL(all.count("TTTTTT"), 1); 44 | 45 | 46 | 47 | } 48 | 49 | BOOST_AUTO_TEST_SUITE_END() 50 | -------------------------------------------------------------------------------- /misc.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | void chomp(char* line); 10 | char* sfgets(char* p, int num, FILE* fp); 11 | void reverseNucleotides(std::string* nucleotides); 12 | uint64_t filesize(const char* name); 13 | bool stringfgets(FILE* fp, std::string* line); 14 | bool stringfgets(gzFile fp, std::string* line); 15 | 16 | /** Rapid estimator of variance and mean of a series of doubles. 17 | API compatible with a, sadly, far slower boost::accumulator_set 18 | doing the same thing*/ 19 | class VarMeanEstimator 20 | { 21 | public: 22 | VarMeanEstimator() : N(0), xTot(0), x2Tot(0) {} 23 | void operator()(double val) 24 | { 25 | ++N; 26 | xTot += val; 27 | x2Tot += val*val; 28 | } 29 | bool valid() const 30 | { 31 | return N>0; 32 | } 33 | friend double mean(const VarMeanEstimator& vme); 34 | friend double variance(const VarMeanEstimator& vme); 35 | private: 36 | uint64_t N; 37 | double xTot; 38 | double x2Tot; 39 | }; 40 | 41 | //! extract 'mean' from a VarMeanEstimator 42 | inline double mean(const VarMeanEstimator& vme) 43 | { 44 | return vme.xTot/vme.N; 45 | } 46 | 47 | //! extract 'variance' from a VarMeanEstimator 48 | inline double variance(const VarMeanEstimator& vme) 49 | { 50 | return (vme.x2Tot - vme.xTot*vme.xTot/vme.N)/vme.N; 51 | } 52 | 53 | std::string compilerVersion(); 54 | void reverseNucleotides(std::string* nucleotides); 55 | 56 | std::vector expandArguments(int argc, char** argv); 57 | void visitAllNgrams(std::function exec, unsigned int chars, std::string start=""); 58 | -------------------------------------------------------------------------------- /saminfra.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include "antonie.hh" 5 | #include 6 | #include "fastq.hh" 7 | #include "zstuff.hh" 8 | 9 | //! Write SAM files, with support for paired-end read mappings 10 | class SAMWriter 11 | { 12 | public: 13 | SAMWriter(const std::string& fname, const std::string& genome, dnapos_t len); 14 | ~SAMWriter(); 15 | void write(dnapos_t pos, const FastQRead& fqfrag, int indel=0, int flags=0, const std::string& rnext="*", dnapos_t pnext=0, int32_t tlen=0 ); 16 | private: 17 | FILE* d_fp; 18 | std::string d_fname; 19 | std::string d_genomeName; 20 | }; 21 | 22 | 23 | //! Write BAM files, with support for paired-end read mappings 24 | class BAMWriter 25 | { 26 | public: 27 | BAMWriter(const std::string& fname, const std::string& genome, dnapos_t len); 28 | ~BAMWriter(); 29 | uint64_t write(dnapos_t pos, const FastQRead& fqfrag, int indel=0, int flags=0, const std::string& rnext="*", dnapos_t pnext=0, int32_t tlen=0 ); 30 | void qwrite(dnapos_t pos, const FastQRead& fqfrag, int indel=0, int flags=0, const std::string& rnext="*", dnapos_t pnext=0, int32_t tlen=0 ); 31 | void runQueue(StereoFASTQReader& sfq); 32 | private: 33 | 34 | std::string d_fname; 35 | std::string d_genomeName; 36 | BGZFWriter d_zw; 37 | FILE* d_baifp; 38 | struct Write 39 | { 40 | bool operator<(const Write& rhs) const 41 | { 42 | return pos < rhs.pos; 43 | } 44 | dnapos_t pos; 45 | uint64_t fpos; 46 | bool reversed; 47 | int indel; 48 | int flags; 49 | std::string rnext; 50 | dnapos_t pnext; 51 | int tlen; 52 | uint64_t voffset; 53 | unsigned int bin; 54 | }; 55 | std::vector d_queue; 56 | }; 57 | 58 | std::string bamCompress(const std::string& dna); 59 | -------------------------------------------------------------------------------- /gcscan.cc: -------------------------------------------------------------------------------- 1 | #include "refgenome2.hh" 2 | #include "geneannotated.hh" 3 | 4 | #include 5 | #include "misc.hh" 6 | #include 7 | #include 8 | #include "dnamisc.hh" 9 | #include 10 | #include 11 | #include 12 | #include "nucstore.hh" 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include "bzlib.h" 21 | 22 | using namespace std; 23 | 24 | // reads one chromosome and bunches statistics about them together in two csv files 25 | // scan.csv: 26 | // codons.csv: 27 | 28 | int main(int argc, char**argv) 29 | { 30 | if(argc < 2) { 31 | cerr<<"Syntax: gcscan reference.fasta"< 28 | 29 | namespace TCLAP { 30 | 31 | class OptionalUnlabeledTracker 32 | { 33 | 34 | public: 35 | 36 | static void check( bool req, const std::string& argName ); 37 | 38 | static void gotOptional() { alreadyOptionalRef() = true; } 39 | 40 | static bool& alreadyOptional() { return alreadyOptionalRef(); } 41 | 42 | private: 43 | 44 | static bool& alreadyOptionalRef() { static bool ct = false; return ct; } 45 | }; 46 | 47 | 48 | inline void OptionalUnlabeledTracker::check( bool req, const std::string& argName ) 49 | { 50 | if ( OptionalUnlabeledTracker::alreadyOptional() ) 51 | throw( SpecificationException( 52 | "You can't specify ANY Unlabeled Arg following an optional Unlabeled Arg", 53 | argName ) ); 54 | 55 | if ( !req ) 56 | OptionalUnlabeledTracker::gotOptional(); 57 | } 58 | 59 | 60 | } // namespace TCLAP 61 | 62 | #endif 63 | -------------------------------------------------------------------------------- /geneannotated.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | //! A Gene annotation 10 | struct GeneAnnotation 11 | { 12 | boost::flyweight chromosome; 13 | std::string tag; 14 | boost::flyweight id; 15 | boost::flyweight parent; 16 | boost::flyweight type; 17 | boost::flyweight gene_biotype; 18 | boost::flyweight enclosing_gene; 19 | std::string name; 20 | bool strand; 21 | uint64_t startPos; 22 | uint64_t stopPos; 23 | bool gene{false}; 24 | }; 25 | 26 | inline bool operator<(const GeneAnnotation&A, const GeneAnnotation& B) 27 | { 28 | return A.startPos < B.startPos; 29 | } 30 | 31 | //! Provides GeneAnnotation objects as read from a GFF3 file 32 | class GeneAnnotationReader 33 | { 34 | public: 35 | GeneAnnotationReader(const std::string& fname); //!< Parse GFF3 from fname 36 | std::vector lookup(std::string_view chromosome, uint64_t pos); //!< Get all annotations for pos 37 | std::vector lookup(std::string_view chromosome, uint64_t pos1, uint64_t pos2); //!< Get all annotations for pos 38 | 39 | std::vector getAll(std::string_view chromosome); 40 | size_t countAll(std::string_view chromosome) const; 41 | uint64_t size() const 42 | { 43 | size_t ret{0}; 44 | for(const auto& ga : d_gas) 45 | ret += countAll(ga.first); 46 | return ret; 47 | } //!< Number of annotations known 48 | 49 | std::vector getChromosomes() 50 | { 51 | std::vector ret; 52 | for(const auto& ga : d_gas) 53 | ret.push_back(ga.first); 54 | return ret; 55 | } 56 | 57 | int32_t d_taxonID{-1}; 58 | private: 59 | typedef IntervalTree gas_t; 60 | void parseGenBank(const std::string& fname); 61 | std::map d_gas; 62 | }; 63 | 64 | std::vector parseGenBankString(const std::string& bank); 65 | -------------------------------------------------------------------------------- /gfftool.cc: -------------------------------------------------------------------------------- 1 | #include "refgenome2.hh" 2 | #include "geneannotated.hh" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "dnamisc.hh" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "misc.hh" 13 | #include 14 | #include 15 | 16 | using namespace std; 17 | 18 | struct Node 19 | { 20 | std::string parent; 21 | vector children; 22 | std::string type; 23 | std::string tag; 24 | }; 25 | 26 | std::unordered_map nodes; 27 | 28 | int main(int argc, char **argv) 29 | { 30 | if(argc < 3) { 31 | cerr<<"Syntax: gfftool annotations.gff refgenome.fna offset1 [offset2]"<fullname<<" have "<chromosome.size()<<" nucleotides,"; 53 | map tcounts; 54 | unordered_set genes; 55 | for(const auto& a : anns) { 56 | if(a.type=="gene") { 57 | genes.insert(a.name); 58 | cout<<' '; 59 | if(a.strand) 60 | cout<<'+'; 61 | cout< 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | namespace TCLAP { 33 | 34 | /** 35 | * The interface that defines the interaction between the Arg and Constraint. 36 | */ 37 | template 38 | class Constraint 39 | { 40 | 41 | public: 42 | /** 43 | * Returns a description of the Constraint. 44 | */ 45 | virtual std::string description() const =0; 46 | 47 | /** 48 | * Returns the short ID for the Constraint. 49 | */ 50 | virtual std::string shortID() const =0; 51 | 52 | /** 53 | * The method used to verify that the value parsed from the command 54 | * line meets the constraint. 55 | * \param value - The value that will be checked. 56 | */ 57 | virtual bool check(const T& value) const =0; 58 | 59 | /** 60 | * Destructor. 61 | * Silences warnings about Constraint being a base class with virtual 62 | * functions but without a virtual destructor. 63 | */ 64 | virtual ~Constraint() { ; } 65 | }; 66 | 67 | } //namespace TCLAP 68 | #endif 69 | -------------------------------------------------------------------------------- /ext/libmba/mba/varray.h: -------------------------------------------------------------------------------- 1 | #ifndef MBA_VARRAY_H 2 | #define MBA_VARRAY_H 3 | 4 | /* varray - a variable sized array 5 | */ 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #ifndef LIBMBA_API 12 | #ifdef WIN32 13 | # ifdef LIBMBA_EXPORTS 14 | # define LIBMBA_API __declspec(dllexport) 15 | # else /* LIBMBA_EXPORTS */ 16 | # define LIBMBA_API __declspec(dllimport) 17 | # endif /* LIBMBA_EXPORTS */ 18 | #else /* WIN32 */ 19 | # define LIBMBA_API extern 20 | #endif /* WIN32 */ 21 | #endif /* LIBMBA_API */ 22 | 23 | #include 24 | #include 25 | #include 26 | 27 | /* 28 | 0 1 32768 29 | 1 2 65536 30 | 2 4 131072 31 | 3 8 262144 32 | 4 16 524288 33 | 5 32 1048576 2^5 is default VARRAY_INIT_SIZE 34 | 6 64 2097152 35 | 7 128 4194304 36 | 8 256 8388608 37 | 9 512 38 | 10 1024 39 | 11 2048 40 | 12 4096 41 | 13 8192 42 | 14 16384 43 | 15 32768 44 | 16 65536 45 | 17 131072 46 | 18 262144 47 | 19 524288 48 | 20 1048576 49 | 21 2097152 50 | 22 4194304 51 | 23 8388608 52 | */ 53 | 54 | #ifndef VARRAY_INIT_SIZE 55 | #define VARRAY_INIT_SIZE 5 56 | #endif 57 | 58 | struct varray { 59 | size_t size; /* element size */ 60 | ptrdiff_t al; /* relative offset of this object to allocator of bins */ 61 | ref_t bins[16]; /* 0 to 2^20 elements */ 62 | }; 63 | 64 | extern int varray_init(struct varray *va, size_t membsize, struct allocator *al); 65 | extern int varray_reinit(struct varray *va, struct allocator *al); 66 | extern int varray_deinit(struct varray *va); 67 | extern struct varray *varray_new(size_t membsize, struct allocator *al); 68 | extern int varray_del(void *va); 69 | extern int varray_release(struct varray *va, unsigned int from); 70 | extern void *varray_get(struct varray *va, unsigned int idx); 71 | extern int varray_index(struct varray *va, void *elem); 72 | extern void varray_iterate(void *va, iter_t *iter); 73 | extern void *varray_next(void *va, iter_t *iter); 74 | 75 | #ifdef __cplusplus 76 | } 77 | #endif 78 | 79 | #endif /* MBA_VARRAY_H */ 80 | -------------------------------------------------------------------------------- /digisplice.cc: -------------------------------------------------------------------------------- 1 | #include "refgenome.hh" 2 | #include "geneannotated.hh" 3 | #include 4 | #include "misc.hh" 5 | using namespace std; 6 | 7 | string casette{"GGCCTGGTGATGATGGCGGGATCGTTGTATATTTCTTGACACCTTTTCGGCATCGCCCTAAAATTCGGCGTCCTCATATTGTGTGAGGACGTTTTATTACGTGTTTACGAAGCAAAAGCTAAAACCAGGAGCTATTTAATGGCAACAGTTAACCAGCTGGTACGCAAACCACGTGCTCGCAAAGTTGCGAAAAGCAACGTGCCTGCGCTGGAAGCATGCCCGCAAAAACGTGGCGTATGTACTCGTGTATATACTACCACTCCTAAAAAACCGAACTCCGCGCTGCGTAAAGTATGCCGTGTTCGTCTGACTAACGGTTTCGAAGTGACTTCCTACATCGGTGGTGAAGGTCACAACCTGCAGGAGCACTCCGTGATCCTGATCCGTGGCGGTCGTGTTAAAGACCTCCCGGGTGTTCGTTACCACACCGTACGTGGTGCGCTTGACTGCTCCGGCGTTAAAGACCGTAAGCAGGCTCGTTCCAAGTATGGCGTGAAGCGTCCTAAGGCTTAAGGAGGACAATCATGATTGAACAAGATGGATTGCACGCAGGTTCTCCGGCCGCTTGGGTGGAGAGGCTATTCGGCTATGACTGGGCACAACAGACAATCGGCTGCTCTGATGCCGCCGTGTTCCGGCTGTCAGCGCAGGGGCGCCCGGTTCTTTTTGTCAAGACCGACCTGTCCGGTGCCCTGAATGAACTGCAGGACGAGGCAGCGCGGCTATCGTGGCTGGCCACGACGGGCGTTCCTTGCGCAGCTGTGCTCGACGTTGTCACTGAAGCGGGAAGGGACTGGCTGCTATTGGGCGAAGTGCCGGGGCAGGATCTCCTGTCATCTCACCTTGCTCCTGCCGAGAAAGTATCCATCATGGCTGATGCAATGCGGCGGCTGCATACGCTTGATCCGGCTACCTGCCCATTCGACCACCAAGCGAAACATCGCATCGAGCGAGCACGTACTCGGATGGAAGCCGGTCTTGTCGATCAGGATGATCTGGACGAAGAGCATCAGGGGCTCGCGCCAGCCGAACTGTTCGCCAGGCTCAAGGCGCGCATGCCCGACGGCGAGGATCTCGTCGTGACCCATGGCGATGCCTGCTTGCCGAATATCATGGTGGAAAATGGCCGCTTTTCTGGATTCATCGACTGTGGCCGGCTGGGTGTGGCGGACCGCTATCAGGACATAGCGTTGGCTACCCGTGATATTGCTGAAGAGCTTGGCGGCGAATGGGCTGACCGCTTCCTCGTGCTTTACGGTATCGCCGCTCCCGATTCGCAGCGCATCGCCTTCTATCGCCTTCTTGACGAGTTCTTCTGA"}; 8 | 9 | 10 | int main(int argc, char**argv) 11 | { 12 | ReferenceChromosome rg(argv[1]); 13 | // GeneAnnotationReader gar(argv[2]); 14 | cerr<"< 5 | #include "misc.hh" 6 | #include 7 | #include 8 | #include "dnamisc.hh" 9 | #include 10 | #include 11 | #include 12 | #include "nucstore.hh" 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include "bzlib.h" 22 | #include "taxoreader.hh" 23 | 24 | using namespace std; 25 | 26 | /* grep a fasta file, per chromosome 27 | * 28 | */ 29 | 30 | int main(int argc, char**argv) 31 | { 32 | if(argc < 2) { 33 | cerr<<"Syntax: fagrep nucleotides fasta [nucleotides]"< 3) 40 | srch2 = NucleotideStore(argv[3]); 41 | 42 | string fnaname(argv[2]); 43 | ReferenceGenome rg(fnaname); 44 | 45 | // this is a pretty weak effort, full of bugs 46 | 47 | cout<<"Done reading reference genome from '"< 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | namespace TCLAP { 34 | 35 | class CmdLineInterface; 36 | class ArgException; 37 | 38 | /** 39 | * The interface that any output object must implement. 40 | */ 41 | class CmdLineOutput 42 | { 43 | 44 | public: 45 | 46 | /** 47 | * Virtual destructor. 48 | */ 49 | virtual ~CmdLineOutput() {} 50 | 51 | /** 52 | * Generates some sort of output for the USAGE. 53 | * \param c - The CmdLine object the output is generated for. 54 | */ 55 | virtual void usage(CmdLineInterface& c)=0; 56 | 57 | /** 58 | * Generates some sort of output for the version. 59 | * \param c - The CmdLine object the output is generated for. 60 | */ 61 | virtual void version(CmdLineInterface& c)=0; 62 | 63 | /** 64 | * Generates some sort of output for a failure. 65 | * \param c - The CmdLine object the output is generated for. 66 | * \param e - The ArgException that caused the failure. 67 | */ 68 | virtual void failure( CmdLineInterface& c, 69 | ArgException& e )=0; 70 | 71 | }; 72 | 73 | } //namespace TCLAP 74 | #endif 75 | -------------------------------------------------------------------------------- /ext/html/nvd3/LICENSE.md: -------------------------------------------------------------------------------- 1 | 2 | ##nvd3.js License 3 | 4 | Copyright (c) 2011, 2012 [Novus Partners, Inc.][novus] 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | 18 | [novus]: https://www.novus.com/ 19 | 20 | 21 | 22 | ##d3.js License 23 | 24 | Copyright (c) 2012, Michael Bostock 25 | All rights reserved. 26 | 27 | Redistribution and use in source and binary forms, with or without 28 | modification, are permitted provided that the following conditions are met: 29 | 30 | * Redistributions of source code must retain the above copyright notice, this 31 | list of conditions and the following disclaimer. 32 | 33 | * Redistributions in binary form must reproduce the above copyright notice, 34 | this list of conditions and the following disclaimer in the documentation 35 | and/or other materials provided with the distribution. 36 | 37 | * The name Michael Bostock may not be used to endorse or promote products 38 | derived from this software without specific prior written permission. 39 | 40 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 41 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 43 | DISCLAIMED. IN NO EVENT SHALL MICHAEL BOSTOCK BE LIABLE FOR ANY DIRECT, 44 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 45 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 46 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 47 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 48 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 49 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 50 | -------------------------------------------------------------------------------- /tclap/HelpVisitor.h: -------------------------------------------------------------------------------- 1 | 2 | /****************************************************************************** 3 | * 4 | * file: HelpVisitor.h 5 | * 6 | * Copyright (c) 2003, Michael E. Smoot . 7 | * All rights reverved. 8 | * 9 | * See the file COPYING in the top directory of this distribution for 10 | * more information. 11 | * 12 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 13 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 15 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 17 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 18 | * DEALINGS IN THE SOFTWARE. 19 | * 20 | *****************************************************************************/ 21 | 22 | #ifndef TCLAP_HELP_VISITOR_H 23 | #define TCLAP_HELP_VISITOR_H 24 | 25 | #include 26 | #include 27 | #include 28 | 29 | namespace TCLAP { 30 | 31 | /** 32 | * A Visitor object that calls the usage method of the given CmdLineOutput 33 | * object for the specified CmdLine object. 34 | */ 35 | class HelpVisitor: public Visitor 36 | { 37 | private: 38 | /** 39 | * Prevent accidental copying. 40 | */ 41 | HelpVisitor(const HelpVisitor& rhs); 42 | HelpVisitor& operator=(const HelpVisitor& rhs); 43 | 44 | protected: 45 | 46 | /** 47 | * The CmdLine the output will be generated for. 48 | */ 49 | CmdLineInterface* _cmd; 50 | 51 | /** 52 | * The output object. 53 | */ 54 | CmdLineOutput** _out; 55 | 56 | public: 57 | 58 | /** 59 | * Constructor. 60 | * \param cmd - The CmdLine the output will be generated for. 61 | * \param out - The type of output. 62 | */ 63 | HelpVisitor(CmdLineInterface* cmd, CmdLineOutput** out) 64 | : Visitor(), _cmd( cmd ), _out( out ) { } 65 | 66 | /** 67 | * Calls the usage method of the CmdLineOutput for the 68 | * specified CmdLine. 69 | */ 70 | void visit() { (*_out)->usage(*_cmd); throw ExitException(0); } 71 | 72 | }; 73 | 74 | } 75 | 76 | #endif 77 | -------------------------------------------------------------------------------- /taxoreader.cc: -------------------------------------------------------------------------------- 1 | #include "taxoreader.hh" 2 | #include "misc.hh" 3 | #include 4 | #include 5 | #include 6 | 7 | using namespace std; 8 | 9 | vector vstringtok (const string_view in, 10 | const char * const delimiters = " \t\n") 11 | { 12 | const string::size_type len = in.length(); 13 | string::size_type i = 0; 14 | vector container; 15 | while (i parts = vstringtok(line, "|"); 49 | if(parts.size() < 3) 50 | continue; 51 | id=atoi(&(parts[0][0])); 52 | // cout<<"\n\tTaxonomy: "; 53 | if(parts.size() > 2) { 54 | vector parts2 = vstringtok(parts[2], ";"); 55 | vector store; 56 | store.reserve(parts2.size()); 57 | for(const auto& p2: parts2) { 58 | // cout<<"'"< TaxoReader::get(int id) const 71 | { 72 | vector ret; 73 | 74 | if(auto iter = d_store.find(id); iter != d_store.end()) 75 | return iter->second; 76 | return ret; 77 | } 78 | -------------------------------------------------------------------------------- /tclap/VersionVisitor.h: -------------------------------------------------------------------------------- 1 | // -*- Mode: c++; c-basic-offset: 4; tab-width: 4; -*- 2 | 3 | /****************************************************************************** 4 | * 5 | * file: VersionVisitor.h 6 | * 7 | * Copyright (c) 2003, Michael E. Smoot . 8 | * All rights reverved. 9 | * 10 | * See the file COPYING in the top directory of this distribution for 11 | * more information. 12 | * 13 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 19 | * DEALINGS IN THE SOFTWARE. 20 | * 21 | *****************************************************************************/ 22 | 23 | 24 | #ifndef TCLAP_VERSION_VISITOR_H 25 | #define TCLAP_VERSION_VISITOR_H 26 | 27 | #include 28 | #include 29 | #include 30 | 31 | namespace TCLAP { 32 | 33 | /** 34 | * A Vistor that will call the version method of the given CmdLineOutput 35 | * for the specified CmdLine object and then exit. 36 | */ 37 | class VersionVisitor: public Visitor 38 | { 39 | private: 40 | /** 41 | * Prevent accidental copying 42 | */ 43 | VersionVisitor(const VersionVisitor& rhs); 44 | VersionVisitor& operator=(const VersionVisitor& rhs); 45 | 46 | protected: 47 | 48 | /** 49 | * The CmdLine of interest. 50 | */ 51 | CmdLineInterface* _cmd; 52 | 53 | /** 54 | * The output object. 55 | */ 56 | CmdLineOutput** _out; 57 | 58 | public: 59 | 60 | /** 61 | * Constructor. 62 | * \param cmd - The CmdLine the output is generated for. 63 | * \param out - The type of output. 64 | */ 65 | VersionVisitor( CmdLineInterface* cmd, CmdLineOutput** out ) 66 | : Visitor(), _cmd( cmd ), _out( out ) { } 67 | 68 | /** 69 | * Calls the version method of the output object using the 70 | * specified CmdLine. 71 | */ 72 | void visit() { 73 | (*_out)->version(*_cmd); 74 | throw ExitException(0); 75 | } 76 | 77 | }; 78 | 79 | } 80 | 81 | #endif 82 | -------------------------------------------------------------------------------- /ext/libmba/mba/msgno.h: -------------------------------------------------------------------------------- 1 | #ifndef MBA_MSGNO_H 2 | #define MBA_MSGNO_H 3 | 4 | /* msgno - managing error codes and associated messages across 5 | * separate C libraries 6 | */ 7 | 8 | #ifdef __cplusplus 9 | extern "C" { 10 | #endif 11 | 12 | #ifndef LIBMBA_API 13 | #ifdef WIN32 14 | # ifdef LIBMBA_EXPORTS 15 | # define LIBMBA_API __declspec(dllexport) 16 | # else /* LIBMBA_EXPORTS */ 17 | # define LIBMBA_API __declspec(dllimport) 18 | # endif /* LIBMBA_EXPORTS */ 19 | #else /* WIN32 */ 20 | # define LIBMBA_API extern 21 | #endif /* WIN32 */ 22 | #endif /* LIBMBA_API */ 23 | 24 | #define STR0(s) #s 25 | #define STR1(s) STR0(s) 26 | #define LINE_STRING STR1(__LINE__) 27 | #if (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) 28 | #define LOC0 __FILE__ ":" LINE_STRING ":" 29 | #define LOC1 __func__ 30 | #else 31 | #define LOC0 __FILE__ ":" 32 | #define LOC1 LINE_STRING 33 | #endif 34 | 35 | #define MMSG msgno_loc0(LOC0, LOC1); msgno_mmsg0 36 | #define MMNO msgno_loc0(LOC0, LOC1); msgno_mmno0 37 | #define MMNF msgno_loc0(LOC0, LOC1); msgno_mmnf0 38 | #define PMSG msgno_loc0("!" LOC0, LOC1); msgno_amsg0 39 | #define PMNO msgno_loc0("!" LOC0, LOC1); msgno_amno0 40 | #define PMNF msgno_loc0("!" LOC0, LOC1); msgno_amnf0 41 | #define AMSG msgno_loc0(LOC0, LOC1); msgno_amsg0 42 | #define AMNO msgno_loc0(LOC0, LOC1); msgno_amno0 43 | #define AMNF msgno_loc0(LOC0, LOC1); msgno_amnf0 44 | 45 | #define MCLR (msgno_buf[msgno_buf_idx = 0] = 0) 46 | 47 | #define NULL_POINTER_ERR msgno_builtin_codes[0].msgno 48 | 49 | struct msgno_entry { 50 | int msgno; 51 | const char *msg; 52 | }; 53 | 54 | extern struct msgno_entry msgno_builtin_codes[]; 55 | extern char msgno_buf[]; 56 | extern int msgno_buf_idx; 57 | extern int msgno_append(const char *src, int n); 58 | extern int msgno_loc0(const char *loc0, const char *loc1); 59 | extern int msgno_mmsg0(const char *fmt, ...); 60 | extern int msgno_mmno0(int msgno); 61 | extern int msgno_mmnf0(int msgno, const char *fmt, ...); 62 | extern int msgno_amsg0(const char *fmt, ...); 63 | extern int msgno_amno0(int msgno); 64 | extern int msgno_amnf0(int msgno, const char *fmt, ...); 65 | extern int msgno_hdlr_stderr(const char *fmt, ...); 66 | 67 | extern int (*msgno_hdlr)(const char *fmt, ...); 68 | extern int msgno_add_codes(struct msgno_entry *lst); 69 | extern const char *msgno_msg(int msgno); 70 | 71 | #ifdef __cplusplus 72 | } 73 | #endif 74 | 75 | #endif /* MBA_MSGNO_H */ 76 | -------------------------------------------------------------------------------- /fastq.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "zstuff.hh" 7 | 8 | //! Represents a FastQRead. Can be reversed or not. 9 | struct FastQRead 10 | { 11 | FastQRead() : reversed(false), position(0) {} 12 | std::string d_nucleotides; 13 | std::string d_quality; 14 | std::string d_header; 15 | std::string getNameFromHeader() const; 16 | bool exceedsQuality(unsigned int); 17 | std::string getSangerQualityString() const; 18 | void reverse(); 19 | bool reversed; 20 | uint64_t position; //!< Position in the source file. The 64 bits may encode the file too, it is not a number for the end user to use. Feed it to a FastQReader. 21 | 22 | bool operator<(const FastQRead& rhs) const 23 | { 24 | return std::tie(d_nucleotides, d_quality, reversed, position) < 25 | std::tie(rhs.d_nucleotides, rhs.d_quality, rhs.reversed, rhs.position); 26 | } 27 | 28 | }; 29 | 30 | //! Reads a single FASTQ file, and can seek in it. Does adapation of quality scores (Sanger by default) and and can also snip off first n or last n bases. 31 | class FASTQReader 32 | { 33 | public: 34 | FASTQReader(const std::string& str, unsigned int qoffset); 35 | void setTrim(unsigned int trimLeft, unsigned int trimRight) 36 | { 37 | d_snipLeft = trimLeft; 38 | d_snipRight = trimRight; 39 | } 40 | void seek(uint64_t pos) 41 | { 42 | d_reader->seek(pos); 43 | } 44 | uint64_t estimateReads(); 45 | unsigned int getRead(FastQRead* fq); //!< Get a FastQRead, return number of bytes read 46 | private: 47 | unsigned int d_qoffset; 48 | unsigned int d_snipLeft, d_snipRight; 49 | std::unique_ptr d_reader; 50 | }; 51 | 52 | //! Reads FASTQs from two (synchronised) files at a time. Does magic with 64 bits offsets to encode which of the two FASTQReader to read from. 53 | class StereoFASTQReader 54 | { 55 | public: 56 | StereoFASTQReader(const std::string& name1, const std::string& name2, 57 | unsigned int qoffset) : d_fq1(name1, qoffset), d_fq2(name2, qoffset) 58 | {} 59 | 60 | void setTrim(unsigned int trimLeft, unsigned int trimRight); 61 | void seek(uint64_t pos); 62 | uint64_t estimateReads(); 63 | unsigned int getRead(uint64_t pos, FastQRead* fq2); 64 | unsigned int getReadPair(FastQRead* fq1, FastQRead* fq2); 65 | private: 66 | FASTQReader d_fq1, d_fq2; 67 | static uint64_t s_mask; 68 | }; 69 | -------------------------------------------------------------------------------- /chromopic.cc: -------------------------------------------------------------------------------- 1 | #include "refgenome2.hh" 2 | #include "geneannotated.hh" 3 | 4 | #include 5 | #include "misc.hh" 6 | #include 7 | #include 8 | #include "dnamisc.hh" 9 | #include 10 | #include 11 | #include 12 | #include "nucstore.hh" 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include "bzlib.h" 21 | #include "taxoreader.hh" 22 | #include "csv-parser/csv.hpp" 23 | 24 | using namespace std; 25 | 26 | 27 | int main(int argc, char**argv) 28 | { 29 | if(argc < 2) { 30 | cerr<<"Syntax: chromopic reference.fasta"< a.startPos; pos--) { 72 | if(!(pos % 16)) 73 | piccsv< 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "misc.hh" 11 | 12 | using namespace std; 13 | 14 | // gffedit fasta gff newgff [insertpos insertlen] 15 | int main(int argc, char **argv) 16 | { 17 | ReferenceChromosome rg(argv[1]); 18 | FILE* fp =fopen(argv[2], "rb"); 19 | if(!fp) 20 | throw runtime_error("Unable to open '"+string(argv[2])+"' for reading GFF3: "+string(strerror(errno))); 21 | 22 | string line; 23 | 24 | map scounts; 25 | ofstream newgff(argv[3]); 26 | dnapos_t startInsert=argc > 4 ? atoi(argv[4]) : 0; 27 | int shiftInsert=argc > 5 ? atoi(argv[5]) : 0; 28 | while(stringfgets(fp, &line)) { 29 | GeneAnnotation ga; 30 | if(!line.empty() && line[0]=='#') { 31 | newgff< startInsert) 48 | ga.startPos += shiftInsert; 49 | newgff< startInsert) 54 | ga.stopPos += shiftInsert; 55 | 56 | newgff<> rcounts; 81 | int totCount=0; 82 | for(const auto& c : scounts) { 83 | rcounts.push_back({c.second, c.first}); 84 | totCount+=c.second; 85 | } 86 | sort(rcounts.begin(), rcounts.end()); 87 | for(auto c = rcounts.rbegin(); c != rcounts.rend(); ++c) { 88 | cout<<100.0*c->first/totCount<<"%\t"<second<<'\n'; 89 | if(c-rcounts.rbegin() > 10) 90 | break; 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /ext/libmba/mba/hashmap.h: -------------------------------------------------------------------------------- 1 | #ifndef MBA_HASHMAP_H 2 | #define MBA_HASHMAP_H 3 | 4 | /* hashmap - a rehashing hash map 5 | */ 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #ifndef LIBMBA_API 12 | #ifdef WIN32 13 | # ifdef LIBMBA_EXPORTS 14 | # define LIBMBA_API __declspec(dllexport) 15 | # else /* LIBMBA_EXPORTS */ 16 | # define LIBMBA_API __declspec(dllimport) 17 | # endif /* LIBMBA_EXPORTS */ 18 | #else /* WIN32 */ 19 | # define LIBMBA_API extern 20 | #endif /* WIN32 */ 21 | #endif /* LIBMBA_API */ 22 | 23 | #include 24 | #include 25 | 26 | #if USE_WCHAR 27 | #define hash_text hash_wcs 28 | #define cmp_text cmp_wcs 29 | #else 30 | #define hash_text hash_str 31 | #define cmp_text cmp_str 32 | #endif 33 | 34 | typedef unsigned long (*hash_fn)(const void *object, void *context); 35 | typedef int (*cmp_fn)(const void *object1, const void *object2, void *context); 36 | 37 | extern const int table_sizes[]; 38 | 39 | struct entry; 40 | 41 | struct hashmap { 42 | int table_size_index; 43 | ref_t hash; 44 | ref_t cmp; 45 | ref_t context; 46 | unsigned int size; 47 | unsigned int load_factor_high; 48 | unsigned int load_factor_low; 49 | ptrdiff_t al; 50 | ref_t table; 51 | }; 52 | 53 | extern unsigned long hash_str(const void *str, void *context); 54 | extern unsigned long hash_wcs(const void *wcs, void *context); 55 | extern int cmp_str(const void *object1, const void *object2, void *context); 56 | extern int cmp_wcs(const void *object1, const void *object2, void *context); 57 | 58 | extern int hashmap_init(struct hashmap *h, 59 | unsigned int load_factor, 60 | hash_fn hash, 61 | cmp_fn cmp, 62 | void *context, 63 | struct allocator *al); 64 | 65 | extern int hashmap_deinit(struct hashmap *h, del_fn key_del, del_fn data_del, void *context); 66 | extern struct hashmap *hashmap_new(hash_fn hash, cmp_fn cmp, void *context, struct allocator *al); 67 | extern int hashmap_del(struct hashmap *h, del_fn key_del, del_fn data_del, void *context); 68 | extern int hashmap_clear(struct hashmap *h, del_fn key_del, del_fn data_del, void *context); 69 | extern int hashmap_clean(struct hashmap *h); 70 | 71 | extern int hashmap_put(struct hashmap *h, void *key, void *data); 72 | extern int hashmap_is_empty(struct hashmap *h); 73 | extern unsigned int hashmap_size(struct hashmap *h); 74 | extern void *hashmap_get(const struct hashmap *h, const void *key); 75 | extern void hashmap_iterate(void *h, iter_t *iter); 76 | extern void *hashmap_next(void *h, iter_t *iter); 77 | extern int hashmap_remove(struct hashmap *h, void **key, void **data); 78 | 79 | #ifdef __cplusplus 80 | } 81 | #endif 82 | 83 | #endif /* MBA_HASHMAP_H */ 84 | 85 | -------------------------------------------------------------------------------- /support.js: -------------------------------------------------------------------------------- 1 | function getPoints(item1, name1, item2, name2, item3, name3, item4, name4) { 2 | var ret = [ 3 | { 4 | values: [], 5 | key: name1, 6 | color: "#ff7f0e", 7 | }]; 8 | 9 | for (var i = 0; i < item1.length; i++) { 10 | ret[0].values.push({x: item1[i][0], y: item1[i][1] }); 11 | } 12 | if(typeof item2 != 'undefined') { 13 | ret.push({ values: [], key: name2, color: "#0f7f0e" }); 14 | for (var i = 0; i < item2.length; i++) { 15 | ret[1].values.push({x: item2[i][0], y: item2[i][1] }); 16 | } 17 | } 18 | 19 | if(typeof item3 != 'undefined') { 20 | ret.push({ values: [], key: name3, color: "#cccccc" }); 21 | for (var i = 0; i < item3.length; i++) { 22 | ret[2].values.push({x: item3[i][0], y: item3[i][1] }); 23 | } 24 | } 25 | 26 | if(typeof item4 != 'undefined') { 27 | ret.push({ values: [], key: name4, color: "#cccccc" }); 28 | for (var i = 0; i < item4.length; i++) { 29 | ret[3].values.push({x: item4[i][0], y: item4[i][1] }); 30 | } 31 | } 32 | 33 | return ret; 34 | } 35 | 36 | 37 | function getPoints5(item1, name1, item2, name2, item3, name3, item4, name4, item5, name5, item6, name6) { 38 | var ret = [ 39 | { 40 | values: [], 41 | key: name1, 42 | color: "#ff7f0e", 43 | }]; 44 | 45 | for (var i = 0; i < item1.length; i++) { 46 | ret[0].values.push({x: item1[i][0], y: item1[i][1] }); 47 | } 48 | if(typeof item2 != 'undefined') { 49 | ret.push({ values: [], key: name2, color: "#ff00ff" }); 50 | for (var i = 0; i < item2.length; i++) { 51 | ret[1].values.push({x: item2[i][0], y: item2[i][1] }); 52 | } 53 | } 54 | 55 | if(typeof item3 != 'undefined') { 56 | ret.push({ values: [], key: name3, color: "#ff0000" }); 57 | for (var i = 0; i < item3.length; i++) { 58 | ret[2].values.push({x: item3[i][0], y: item3[i][1] }); 59 | } 60 | } 61 | 62 | if(typeof item4 != 'undefined') { 63 | ret.push({ values: [], key: name4, color: "#00ff00" }); 64 | for (var i = 0; i < item4.length; i++) { 65 | ret[3].values.push({x: item4[i][0], y: item4[i][1] }); 66 | } 67 | } 68 | 69 | if(typeof item5 != 'undefined') { 70 | ret.push({ values: [], key: name5, color: "#0000ff" }); 71 | for (var i = 0; i < item5.length; i++) { 72 | ret[4].values.push({x: item5[i][0], y: item5[i][1] }); 73 | } 74 | } 75 | 76 | if(typeof item6 != 'undefined') { 77 | ret.push({ values: [], key: name6, color: "#000000" }); 78 | for (var i = 0; i < item6.length; i++) { 79 | ret[5].values.push({x: item6[i][0], y: item6[i][1] }); 80 | } 81 | } 82 | 83 | 84 | return ret; 85 | } 86 | 87 | -------------------------------------------------------------------------------- /ext/libmba/mba/allocator.h: -------------------------------------------------------------------------------- 1 | #ifndef MBA_ALLOCATOR_H 2 | #define MBA_ALLOCATOR_H 3 | 4 | /* allocator - allocate and free memory 5 | */ 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #ifndef LIBMBA_API 12 | #ifdef WIN32 13 | # ifdef LIBMBA_EXPORTS 14 | # define LIBMBA_API __declspec(dllexport) 15 | # else /* LIBMBA_EXPORTS */ 16 | # define LIBMBA_API __declspec(dllimport) 17 | # endif /* LIBMBA_EXPORTS */ 18 | #else /* WIN32 */ 19 | # define LIBMBA_API extern 20 | #endif /* WIN32 */ 21 | #endif /* LIBMBA_API */ 22 | 23 | #include 24 | 25 | #define ALAL(a) ((a) && (a) != stdlib_allocator ? (a) : (global_allocator ? global_allocator : 0)) 26 | #define ALREF(a,p) ((ref_t)((p) ? (char *)(p) - (char *)ALAL(a) : 0)) 27 | #define ALADR(a,r) ((void *)((r) ? (char *)ALAL(a) + (r) : NULL)) 28 | 29 | typedef size_t ref_t; /* suba offset from start of memory to object */ 30 | 31 | struct allocator; 32 | 33 | typedef void *(*alloc_fn)(struct allocator *al, size_t size, int flags); 34 | typedef void *(*realloc_fn)(struct allocator *al, void *obj, size_t size); 35 | typedef int (*free_fn)(void *al, void *obj); 36 | typedef int (*reclaim_fn)(struct allocator *al, void *arg, int attempt); 37 | typedef void *(*new_fn)(void *context, size_t size, int flags); 38 | typedef int (*del_fn)(void *context, void *object); 39 | 40 | struct allocator { 41 | unsigned char magic[8]; /* suba header identifier */ 42 | ref_t tail; /* offset to first cell in free list */ 43 | size_t mincell; /* min cell size must be at least sizeof cell */ 44 | size_t size; /* total size of memory area */ 45 | size_t alloc_total; /* total bytes utilized from this allocator */ 46 | size_t free_total; /* total bytes released from this allocator */ 47 | size_t size_total; /* total bytes requested from this allocator */ 48 | /* utilization = size_total / alloc_total * 100 49 | * e.g. 50000.0 / 50911.0 * 100.0 = 98.2% 50 | */ 51 | size_t max_free; /* for debugging - any cell larger throws err */ 52 | alloc_fn alloc; 53 | realloc_fn realloc; 54 | free_fn free; 55 | /* for reaping memory from pool, varray, etc */ 56 | reclaim_fn reclaim; 57 | void *reclaim_arg; 58 | int reclaim_depth; 59 | ref_t userref; 60 | }; 61 | 62 | extern struct allocator *global_allocator; 63 | extern struct allocator *stdlib_allocator; 64 | 65 | extern void *allocator_alloc(struct allocator *al, size_t size, int flags); 66 | extern void *allocator_realloc(struct allocator *al, void *obj, size_t size); 67 | extern int allocator_free(void *al, void *obj); 68 | extern void allocator_set_reclaim(struct allocator *al, reclaim_fn recl, void *arg); 69 | 70 | #ifdef __cplusplus 71 | } 72 | #endif 73 | 74 | #endif /* MBA_ALLOCATOR_H */ 75 | 76 | -------------------------------------------------------------------------------- /refgenome2.cc: -------------------------------------------------------------------------------- 1 | #include "refgenome2.hh" 2 | #include 3 | #include 4 | #include 5 | #include "misc.hh" 6 | #include 7 | 8 | using namespace std; 9 | 10 | NucleotideStore ReferenceGenome::getRange(uint32_t offset, uint32_t len) const 11 | { 12 | auto iter=std::upper_bound(d_lookup.begin(), d_lookup.end(), offset, [](uint32_t offset, const auto& b) { 13 | return offset< b->offset; 14 | }); 15 | 16 | if(iter == d_lookup.end()) 17 | throw std::range_error("Could not find chromosome for offset "+std::to_string(offset)+" and length "+std::to_string(len)); 18 | --iter; 19 | if((*iter)->offset <= offset && offset < (*iter)->offset + (*iter)->chromosome.size()) 20 | return (*iter)->chromosome.getRange(offset - (*iter)->offset, len); 21 | else 22 | throw std::range_error("Could not find chromosome for offset "+std::to_string(offset)+" and length "+std::to_string(len)); 23 | } 24 | 25 | ReferenceGenome::ReferenceGenome(const boost::string_ref& fname, std::function idx) : d_fname(fname) 26 | { 27 | gzFile fp = gzopen(d_fname.c_str(), "rb"); 28 | if(!fp) 29 | throw runtime_error("Unable to open reference genome file '"+d_fname+"': "+string(strerror(errno))); 30 | 31 | 32 | char line[256]=""; 33 | string name; 34 | ReferenceGenome::Chromosome* chromosome=0; 35 | 36 | vector running; 37 | uint32_t seenSoFar=0; 38 | 39 | while(gzgets(fp, line, sizeof(line))) { 40 | chomp(line); 41 | 42 | if(line[0] == '>') { 43 | if(chromosome && idx) { 44 | running.emplace_back(idx, chromosome, name); 45 | } 46 | 47 | string fullname=line+1; 48 | 49 | char* spacepos=strchr(line+1, ' '); 50 | 51 | if(spacepos) 52 | *spacepos=0; 53 | name=line+1; 54 | 55 | if(chromosome) 56 | seenSoFar += chromosome->chromosome.size(); 57 | d_genome[name].offset = seenSoFar; 58 | d_genome[name].fullname = fullname; 59 | 60 | chromosome = &d_genome[name]; 61 | 62 | cout<<"Reading chromosome "<chromosome.append(line); 67 | } 68 | catch(std::exception& e) { 69 | cerr<<"Problem storing line "<offset < b->offset; 85 | }); 86 | 87 | 88 | 89 | cout<<"Done reading, awaiting threads"< 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | using namespace std; 12 | 13 | // this allows us to make a Case Insensitive container 14 | struct CIStringCompare: public std::binary_function 15 | { 16 | bool operator()(const string& a, const string& b) const 17 | { 18 | if(std::all_of(a.begin(), a.end(), ::isdigit) && 19 | std::all_of(b.begin(), b.end(), ::isdigit)) 20 | return atoi(a.c_str()) < atoi(b.c_str()); 21 | return strcasecmp(a.c_str(), b.c_str()) < 0; 22 | } 23 | }; 24 | 25 | int main(int argc, char**argv) 26 | { 27 | typedef map, CIStringCompare> presence_t; 28 | presence_t presence; 29 | 30 | string line; 31 | cout << '\t'; 32 | for(int n = 1; n < argc; ++n) { 33 | cout << argv[n] << '\t'; 34 | ifstream ifs(argv[n]); 35 | if(!ifs) { 36 | cerr<<"Unable to open '"<(argc-1))).first; 47 | } 48 | iter->second[n-1]=1; 49 | } 50 | } 51 | cout << '\n'; 52 | 53 | // this is where we store the reverse map, 'presence groups', so which lines where present in file1, but not file2 etc 54 | typedef map, vector > revpresence_t; 55 | revpresence_t revpresence; 56 | 57 | for(const auto& val : presence) { 58 | revpresence[val.second].push_back(val.first); 59 | cout << val.first << '\t'; 60 | for (boost::dynamic_bitset<>::size_type i = 0; i < val.second.size(); ++i) { 61 | cout << val.second[i] << '\t'; 62 | } 63 | cout << endl; 64 | } 65 | 66 | cout << "\nPer group output\t\n"; 67 | for(const auto& val : revpresence) { 68 | cout<<"\nGroup: \t"; 69 | for (boost::dynamic_bitset<>::size_type i = 0; i < val.first.size(); ++i) { 70 | cout << val.first[i] << '\t'; 71 | } 72 | cout << endl << " \t"; 73 | for (boost::dynamic_bitset<>::size_type i = 0; i < val.first.size(); ++i) { 74 | if(val.first[i]) 75 | cout< 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | //! Virtual base for seekable line readers 13 | class LineReader 14 | { 15 | public: 16 | virtual ~LineReader() {} 17 | // virtual bool getLine(std::string* str) = 0; 18 | virtual char* fgets(char* line, int num) = 0; 19 | virtual void seek(uint64_t pos) = 0; 20 | virtual uint64_t getUncPos()=0; 21 | virtual void unget(char *line) = 0; 22 | virtual uint64_t uncompressedSize() = 0; 23 | static std::unique_ptr make(const std::string& fname); 24 | }; 25 | 26 | //! A plain text seekable line reader 27 | class PlainLineReader : public LineReader, boost::noncopyable 28 | { 29 | public: 30 | PlainLineReader(const std::string& fname); 31 | ~PlainLineReader(); 32 | // bool getLine(std::string* str) = 0; 33 | char* fgets(char* line, int num); 34 | void seek(uint64_t pos); 35 | uint64_t getUncPos(); 36 | void unget(char *line); 37 | uint64_t uncompressedSize(); 38 | private: 39 | FILE* d_fp; 40 | std::string d_stash; 41 | }; 42 | 43 | 44 | //! A gzipped compressed seekable line reader 45 | class ZLineReader : public LineReader, boost::noncopyable 46 | { 47 | public: 48 | ZLineReader(const std::string& fname); 49 | ~ZLineReader(); 50 | // bool getLine(std::string* str); 51 | char* fgets(char* line, int num); 52 | void unget(char *line); 53 | uint64_t getUncPos() 54 | { 55 | return d_uncPos; 56 | } 57 | uint64_t uncompressedSize(); 58 | void seek(uint64_t pos); 59 | private: 60 | bool getChar(char* c); 61 | void skip(uint64_t toSkip); 62 | FILE* d_fp; 63 | 64 | struct ZState { 65 | ZState(); 66 | ZState(const ZState& orig); 67 | ~ZState(); 68 | ZState& operator=(const ZState& rhs); 69 | uint64_t fpos; 70 | z_stream s; 71 | } d_zs; 72 | int d_have; 73 | int d_datapos; 74 | 75 | char d_inbuffer[4096], d_outbuffer[32768]; 76 | std::map d_restarts; 77 | uint64_t d_uncPos; 78 | bool d_haveSeeked; 79 | std::string d_stash; 80 | }; 81 | 82 | class BGZFWriter 83 | { 84 | public: 85 | BGZFWriter(const std::string& fname); 86 | ~BGZFWriter(); 87 | uint64_t write(const char*, unsigned int len); 88 | 89 | void write32(uint32_t val); 90 | void writeBAMString(const std::string& str); 91 | void emitBlock(bool force=false); 92 | private: 93 | 94 | void beginBlock(); 95 | FILE* d_fp; 96 | z_stream d_s; 97 | std::string d_extra; 98 | gz_header d_gzheader; 99 | std::string d_block; 100 | uint32_t d_written; 101 | uint64_t d_blockstartpos; 102 | }; 103 | 104 | void emitBGZF(FILE* fp, const std::string& block); 105 | -------------------------------------------------------------------------------- /charsample.cc: -------------------------------------------------------------------------------- 1 | #include "refgenome2.hh" 2 | #include "geneannotated.hh" 3 | 4 | #include 5 | #include "misc.hh" 6 | #include 7 | #include 8 | #include "dnamisc.hh" 9 | #include 10 | #include 11 | #include 12 | #include "nucstore.hh" 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include "bzlib.h" 21 | #include "taxoreader.hh" 22 | 23 | using namespace std; 24 | 25 | /* reads a genome and samples it for c/g deltas 26 | * This code ignores chromosomes smaller than 1 million bp, which mostly rids us of confusing plasmids, viruses etc 27 | */ 28 | 29 | int main(int argc, char**argv) 30 | { 31 | if(argc < 2) { 32 | cerr<<"Syntax: charsample reference.fasta"<>. This is the default value type. 37 | */ 38 | struct ValueLike { 39 | typedef ValueLike ValueCategory; 40 | virtual ~ValueLike() {} 41 | }; 42 | 43 | /** 44 | * A string like argument value type is a value that can be set using 45 | * operator=(string). Usefull if the value type contains spaces which 46 | * will be broken up into individual tokens by operator>>. 47 | */ 48 | struct StringLike { 49 | virtual ~StringLike() {} 50 | }; 51 | 52 | /** 53 | * A class can inherit from this object to make it have string like 54 | * traits. This is a compile time thing and does not add any overhead 55 | * to the inherenting class. 56 | */ 57 | struct StringLikeTrait { 58 | typedef StringLike ValueCategory; 59 | virtual ~StringLikeTrait() {} 60 | }; 61 | 62 | /** 63 | * A class can inherit from this object to make it have value like 64 | * traits. This is a compile time thing and does not add any overhead 65 | * to the inherenting class. 66 | */ 67 | struct ValueLikeTrait { 68 | typedef ValueLike ValueCategory; 69 | virtual ~ValueLikeTrait() {} 70 | }; 71 | 72 | /** 73 | * Arg traits are used to get compile type specialization when parsing 74 | * argument values. Using an ArgTraits you can specify the way that 75 | * values gets assigned to any particular type during parsing. The two 76 | * supported types are StringLike and ValueLike. 77 | */ 78 | template 79 | struct ArgTraits { 80 | typedef typename T::ValueCategory ValueCategory; 81 | virtual ~ArgTraits() {} 82 | //typedef ValueLike ValueCategory; 83 | }; 84 | 85 | #endif 86 | 87 | } // namespace 88 | -------------------------------------------------------------------------------- /pfqgrep.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "misc.hh" 3 | #include "fastq.hh" 4 | #include 5 | using namespace std; 6 | 7 | map g_overlaps; 8 | 9 | bool tryMerge(const FastQRead& one, const FastQRead& two, FastQRead* together) 10 | { 11 | FastQRead inv(two); 12 | inv.reverse(); 13 | 14 | if(inv.d_nucleotides.find(one.d_nucleotides.substr(0, 19)) != string::npos) { 15 | for(int overlap = one.d_nucleotides.length() ; overlap > 19; --overlap) { 16 | if(one.d_nucleotides.substr(0, overlap) == inv.d_nucleotides.substr(inv.d_nucleotides.length()-overlap)) { 17 | g_overlaps[overlap]++; 18 | // cerr<<"Got overlap of "<d_nucleotides = inv.d_nucleotides; 22 | together->d_nucleotides = two.d_nucleotides.substr(overlap); 23 | 24 | // cerr<d_nucleotides< 19; --overlap) { 34 | if(one.d_nucleotides.substr(one.d_nucleotides.length()-overlap) == inv.d_nucleotides.substr(0, overlap)) { 35 | 36 | g_overlaps[overlap]++; 37 | // cerr<<"Got overlap of "<d_nucleotides = one.d_nucleotides; 41 | together->d_nucleotides += inv.d_nucleotides.substr(overlap); 42 | // cerr<d_nucleotides< 14 | int format_float(char* buf, std::size_t size, const char* format, int precision, 15 | T value) { 16 | #ifdef FMT_FUZZ 17 | if (precision > 100000) 18 | throw std::runtime_error( 19 | "fuzz mode - avoid large allocation inside snprintf"); 20 | #endif 21 | // Suppress the warning about nonliteral format string. 22 | int (*snprintf_ptr)(char*, size_t, const char*, ...) = FMT_SNPRINTF; 23 | return precision < 0 ? snprintf_ptr(buf, size, format, value) 24 | : snprintf_ptr(buf, size, format, precision, value); 25 | } 26 | 27 | template FMT_API dragonbox::decimal_fp dragonbox::to_decimal(float x) 28 | FMT_NOEXCEPT; 29 | template FMT_API dragonbox::decimal_fp dragonbox::to_decimal(double x) 30 | FMT_NOEXCEPT; 31 | } // namespace detail 32 | 33 | // Workaround a bug in MSVC2013 that prevents instantiation of format_float. 34 | int (*instantiate_format_float)(double, int, detail::float_specs, 35 | detail::buffer&) = detail::format_float; 36 | 37 | #ifndef FMT_STATIC_THOUSANDS_SEPARATOR 38 | template FMT_API detail::locale_ref::locale_ref(const std::locale& loc); 39 | template FMT_API std::locale detail::locale_ref::get() const; 40 | #endif 41 | 42 | // Explicit instantiations for char. 43 | 44 | template FMT_API auto detail::thousands_sep_impl(locale_ref) 45 | -> thousands_sep_result; 46 | template FMT_API char detail::decimal_point_impl(locale_ref); 47 | 48 | template FMT_API void detail::buffer::append(const char*, const char*); 49 | 50 | // DEPRECATED! 51 | // There is no correspondent extern template in format.h because of 52 | // incompatibility between clang and gcc (#2377). 53 | template FMT_API void detail::vformat_to( 54 | detail::buffer&, string_view, 55 | basic_format_args, detail::locale_ref); 56 | 57 | template FMT_API int detail::snprintf_float(double, int, detail::float_specs, 58 | detail::buffer&); 59 | template FMT_API int detail::snprintf_float(long double, int, 60 | detail::float_specs, 61 | detail::buffer&); 62 | template FMT_API int detail::format_float(double, int, detail::float_specs, 63 | detail::buffer&); 64 | template FMT_API int detail::format_float(long double, int, detail::float_specs, 65 | detail::buffer&); 66 | 67 | // Explicit instantiations for wchar_t. 68 | 69 | template FMT_API auto detail::thousands_sep_impl(locale_ref) 70 | -> thousands_sep_result; 71 | template FMT_API wchar_t detail::decimal_point_impl(locale_ref); 72 | 73 | template FMT_API void detail::buffer::append(const wchar_t*, 74 | const wchar_t*); 75 | 76 | template struct detail::basic_data; 77 | 78 | FMT_END_NAMESPACE 79 | -------------------------------------------------------------------------------- /fastqindex.cc: -------------------------------------------------------------------------------- 1 | #include "fastqindex.hh" 2 | #include "misc.hh" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | using namespace std; 9 | 10 | extern "C" { 11 | #include "hash.h" 12 | } 13 | 14 | unique_ptr > indexFASTQ(FASTQReader* fqreader, const std::string& fname, int chunklen) 15 | { 16 | unique_ptr > hpos(new vector()); 17 | FILE* fp=fopen((fname+".index").c_str(), "rb"); 18 | 19 | if(fp) { 20 | auto size=filesize((fname+".index").c_str()); 21 | if(size % sizeof(HashedPos)) { 22 | fclose(fp); 23 | throw runtime_error("Index has wrong size. Sizeof(HashedPos): "+boost::lexical_cast(sizeof(HashedPos))); 24 | } 25 | hpos->resize(size/sizeof(HashedPos)); 26 | if(fread(&(*hpos)[0], 1, size, fp) != size) 27 | throw runtime_error("Index corrupt"); 28 | fclose(fp); 29 | return hpos; 30 | } 31 | cerr<<"Indexing "<getRead(&fqr)) { 34 | uint32_t h = qhash(fqr.d_nucleotides.c_str(), chunklen, 0); 35 | hpos->push_back({h, fqr.position}); 36 | fqr.reverse(); 37 | h = qhash(fqr.d_nucleotides.c_str(), chunklen, 0); 38 | hpos->push_back({h, fqr.position}); 39 | } 40 | std::sort(hpos->begin(), hpos->end()); 41 | 42 | fp=fopen((fname+".index").c_str(), "w"); 43 | for(const auto& hpo : *hpos) { 44 | fwrite(&hpo.hash, 1, sizeof(hpo.hash), fp); 45 | fwrite(&hpo.position, 1, sizeof(hpo.position), fp); 46 | } 47 | fclose(fp); 48 | 49 | return hpos; 50 | } 51 | std::map, FastQRead> g_cache; 52 | 53 | std::unordered_set g_skip; 54 | vector getConsensusMatches(const std::string& consensus, const map > >& fhpos, int chunklen) 55 | { 56 | vector ret; 57 | if(consensus.find('N') != string::npos) 58 | return ret; 59 | 60 | uint32_t h = qhash(consensus.c_str(), chunklen, 0); 61 | if(g_skip.count(h)) 62 | return ret; 63 | 64 | // cout<<"Looking for "< options; 68 | bool hadSomething=false; 69 | for(auto& hpos : fhpos) { 70 | auto range = equal_range(hpos.second->begin(), hpos.second->end(), fnd); 71 | for(;range.first != range.second; ++range.first) { 72 | hadSomething=true; 73 | FastQRead fqr; 74 | // cout<<"\tFound potential hit at offset "<position<<"!"<position))) { 77 | fqr = g_cache[make_pair(hpos.first, (uint64_t)range.first->position)]; 78 | } 79 | else { 80 | hpos.first->seek(range.first->position); 81 | hpos.first->getRead(&fqr); 82 | 83 | } 84 | if(fqr.d_nucleotides.compare(0,chunklen, consensus, 0, chunklen) != 0) { 85 | fqr.reverse(); 86 | 87 | if(fqr.d_nucleotides.compare(0,chunklen, consensus, 0, chunklen) != 0) { 88 | continue; 89 | } 90 | // g_cache[make_pair(hpos.first, (uint64_t)range.first->position)] = fqr; 91 | } 92 | else 93 | ; // g_cache[make_pair(hpos.first, (uint64_t)range.first->position)] = fqr; 94 | ret.push_back(fqr); 95 | } 96 | } 97 | if(!hadSomething) 98 | g_skip.insert(h); 99 | return ret; 100 | } 101 | -------------------------------------------------------------------------------- /ext/nr_c304/code/amoeba.h: -------------------------------------------------------------------------------- 1 | bool g_pleaseStop; 2 | 3 | struct Amoeba { 4 | const Doub ftol; 5 | Int nfunc; 6 | Int mpts; 7 | Int ndim; 8 | Doub fmin; 9 | VecDoub y; 10 | MatDoub p; 11 | Amoeba(const Doub ftoll) : ftol(ftoll) {} 12 | template 13 | VecDoub minimize(VecDoub_I &point, const Doub del, T &func) 14 | { 15 | VecDoub dels(point.size(),del); 16 | return minimize(point,dels,func); 17 | } 18 | template 19 | VecDoub minimize(VecDoub_I &point, VecDoub_I &dels, T &func) 20 | { 21 | Int ndim=point.size(); 22 | MatDoub pp(ndim+1,ndim); 23 | for (Int i=0;i 31 | VecDoub minimize(MatDoub_I &pp, T &func) 32 | { 33 | const Int NMAX=64; 34 | const Doub TINY=1.0e-10; 35 | Int ihi,ilo,inhi; 36 | mpts=pp.nrows(); 37 | ndim=pp.ncols(); 38 | VecDoub psum(ndim),pmin(ndim),x(ndim); 39 | p=pp; 40 | y.resize(mpts); 41 | for (Int i=0;iy[1] ? (inhi=1,0) : (inhi=0,1); 52 | 53 | 54 | 55 | for (Int i=0;i y[ihi]) { 58 | inhi=ihi; 59 | ihi=i; 60 | } else if (y[i] > y[inhi] && i != ihi) inhi=i; 61 | } 62 | 63 | if (!(nfunc%NMAX)) { 64 | // cout << y[ihi] << " - " << y[ilo] << endl; 65 | } 66 | 67 | Doub rtol=2.0*abs(y[ihi]-y[ilo])/(abs(y[ihi])+abs(y[ilo])+TINY); 68 | if (rtol < ftol || g_pleaseStop) { 69 | SWAP(y[0],y[ilo]); 70 | for (Int i=0;i= y[inhi]) { 82 | Doub ysave=y[ihi]; 83 | ytry=amotry(p,y,psum,ihi,0.5,func); 84 | if (ytry >= ysave) { 85 | for (Int i=0;i 108 | Doub amotry(MatDoub_IO &p, VecDoub_O &y, VecDoub_IO &psum, 109 | const Int ihi, const Doub fac, T &func) 110 | { 111 | VecDoub ptry(ndim); 112 | Doub fac1=(1.0-fac)/ndim; 113 | Doub fac2=fac1-fac; 114 | for (Int j=0;j 5 | #include "misc.hh" 6 | #include 7 | #include 8 | #include "dnamisc.hh" 9 | #include 10 | #include 11 | #include 12 | #include "nucstore.hh" 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include "bzlib.h" 22 | #include "taxoreader.hh" 23 | 24 | using namespace std; 25 | 26 | /* reads a whole bunch of genomes, and it construes a filename with gff3 for them too 27 | * based on this it emits a big cpgstats.csv file with statistics on frequency of CpG 28 | * 29 | * The GFF is used to determine if we are in a gene or not 30 | * 31 | */ 32 | 33 | int main(int argc, char**argv) 34 | { 35 | if(argc < 2) { 36 | cerr<<"Syntax: cpgstats reference.fasta"< gpos, gneg; 69 | 70 | vector sgpos, sgneg; 71 | for(const auto& a: annos) { 72 | if(a.type=="gene" && a.strand == 1) 73 | sgpos.push_back(a.startPos); 74 | if(a.type=="gene" && a.strand == 0) 75 | sgneg.push_back(a.stopPos); 76 | 77 | } 78 | sort(sgpos.begin(), sgpos.end()); 79 | sort(sgneg.begin(), sgneg.end()); 80 | int gcount=0; 81 | for(const auto& sgp : sgpos) 82 | gpos[sgp]=gcount++; 83 | gcount=0; 84 | for(const auto& sgn : sgneg) 85 | gneg[sgn]=gcount++; 86 | 87 | int cgcount=0, gccount=0; 88 | char prev = chr.get(0); 89 | 90 | for(uint64_t s = 1 ; s < chr.size(); ++s) { 91 | 92 | char cur = chr.get(s); 93 | if(prev=='C' && cur=='G') 94 | cgcount++; 95 | 96 | prev = cur; 97 | 98 | if(!(s%256)) { 99 | int gposcount=gpos.rbegin()->second, gnegcount=gneg.rbegin()->second; 100 | if(auto iter = gpos.lower_bound(s); iter != gpos.end()) 101 | gposcount=iter->second; 102 | if(auto iter = gneg.lower_bound(s); iter != gneg.end()) 103 | gnegcount=iter->second; 104 | cpgstats< 5 | #include 6 | #include 7 | #include 8 | #include "dnamisc.hh" 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "misc.hh" 14 | #include 15 | #include 16 | 17 | using namespace std; 18 | 19 | 20 | int main(int argc, char **argv) 21 | { 22 | if(argc != 2) { 23 | cerr<<"Syntax: cluster intronsfile"< introns; 54 | for (csv::CSVRow& row: reader) { // Input iterator 55 | introns.push_back({ 56 | row[chrpos].get(), 57 | row[genepos].get(), 58 | row[startpos].get(), 59 | row[stoppos].get(), 60 | row[sensepos].get(), 61 | NucleotideStore(row[intronpos].get())}); 62 | } 63 | cout<<"Have "< 0.15 * a.dna.size()) { 79 | cout<<"A: "<> iIndex; 95 | unsigned int slen=20; 96 | unsigned int ctr=0; 97 | for(auto iter = introns.cbegin() ; iter != introns.cend(); ++iter) { 98 | if(iter->size() < slen) 99 | continue; 100 | for(unsigned int p = 0 ; p < iter->size() - slen; ++p) { 101 | auto r = iter->getRange(p, slen).toASCII(); 102 | iIndex[r].insert(iter); 103 | } 104 | if(!((++ctr)%256)) { 105 | cout<<"\r"<> rindex; 112 | for(const auto& i : iIndex) { 113 | rindex.emplace_back(i.second.size(), i.first); 114 | } 115 | sort(rindex.begin(), rindex.end()); 116 | for(auto iter = rindex.rbegin(); iter != rindex.rend(); ++iter) { 117 | cout<first<<' '<second<<'\n'; 118 | } 119 | #endif 120 | } 121 | -------------------------------------------------------------------------------- /tclap/ValuesConstraint.h: -------------------------------------------------------------------------------- 1 | 2 | 3 | /****************************************************************************** 4 | * 5 | * file: ValuesConstraint.h 6 | * 7 | * Copyright (c) 2005, Michael E. Smoot 8 | * All rights reverved. 9 | * 10 | * See the file COPYING in the top directory of this distribution for 11 | * more information. 12 | * 13 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 19 | * DEALINGS IN THE SOFTWARE. 20 | * 21 | *****************************************************************************/ 22 | 23 | #ifndef TCLAP_VALUESCONSTRAINT_H 24 | #define TCLAP_VALUESCONSTRAINT_H 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | #ifdef HAVE_CONFIG_H 31 | #include 32 | #else 33 | #define HAVE_SSTREAM 34 | #endif 35 | 36 | #if defined(HAVE_SSTREAM) 37 | #include 38 | #elif defined(HAVE_STRSTREAM) 39 | #include 40 | #else 41 | #error "Need a stringstream (sstream or strstream) to compile!" 42 | #endif 43 | 44 | namespace TCLAP { 45 | 46 | /** 47 | * A Constraint that constrains the Arg to only those values specified 48 | * in the constraint. 49 | */ 50 | template 51 | class ValuesConstraint : public Constraint 52 | { 53 | 54 | public: 55 | 56 | /** 57 | * Constructor. 58 | * \param allowed - vector of allowed values. 59 | */ 60 | ValuesConstraint(std::vector& allowed); 61 | 62 | /** 63 | * Virtual destructor. 64 | */ 65 | virtual ~ValuesConstraint() {} 66 | 67 | /** 68 | * Returns a description of the Constraint. 69 | */ 70 | virtual std::string description() const; 71 | 72 | /** 73 | * Returns the short ID for the Constraint. 74 | */ 75 | virtual std::string shortID() const; 76 | 77 | /** 78 | * The method used to verify that the value parsed from the command 79 | * line meets the constraint. 80 | * \param value - The value that will be checked. 81 | */ 82 | virtual bool check(const T& value) const; 83 | 84 | protected: 85 | 86 | /** 87 | * The list of valid values. 88 | */ 89 | std::vector _allowed; 90 | 91 | /** 92 | * The string used to describe the allowed values of this constraint. 93 | */ 94 | std::string _typeDesc; 95 | 96 | }; 97 | 98 | template 99 | ValuesConstraint::ValuesConstraint(std::vector& allowed) 100 | : _allowed(allowed), 101 | _typeDesc("") 102 | { 103 | for ( unsigned int i = 0; i < _allowed.size(); i++ ) 104 | { 105 | 106 | #if defined(HAVE_SSTREAM) 107 | std::ostringstream os; 108 | #elif defined(HAVE_STRSTREAM) 109 | std::ostrstream os; 110 | #else 111 | #error "Need a stringstream (sstream or strstream) to compile!" 112 | #endif 113 | 114 | os << _allowed[i]; 115 | 116 | std::string temp( os.str() ); 117 | 118 | if ( i > 0 ) 119 | _typeDesc += "|"; 120 | _typeDesc += temp; 121 | } 122 | } 123 | 124 | template 125 | bool ValuesConstraint::check( const T& val ) const 126 | { 127 | if ( std::find(_allowed.begin(),_allowed.end(),val) == _allowed.end() ) 128 | return false; 129 | else 130 | return true; 131 | } 132 | 133 | template 134 | std::string ValuesConstraint::shortID() const 135 | { 136 | return _typeDesc; 137 | } 138 | 139 | template 140 | std::string ValuesConstraint::description() const 141 | { 142 | return _typeDesc; 143 | } 144 | 145 | 146 | } //namespace TCLAP 147 | #endif 148 | 149 | -------------------------------------------------------------------------------- /viewer.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 54 | 55 |
56 |
57 | 58 |
59 |

Antonie 0.0 Big View

60 |
62 |

63 | Antonie is open source 64 | software, developed at the Beaumont lab at TU Delft. If you've benefited from our 67 | work, please cite xyz. 68 |

69 |

70 | 71 |

72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 |
NumDiff Filter
Percentage Filter
Genes only
Non-synonymous only
Inserts only
Deletes only
Remove universal
81 |
82 |

83 |
84 |

85 | 86 | 87 | 88 | 89 | 90 | 91 | 93 |
94 | 95 | 98 | 99 | 100 | 120 | 121 | 122 | -------------------------------------------------------------------------------- /chagstats.cc: -------------------------------------------------------------------------------- 1 | #include "refgenome2.hh" 2 | #include "geneannotated.hh" 3 | 4 | #include 5 | #include "misc.hh" 6 | #include 7 | #include 8 | #include "dnamisc.hh" 9 | #include 10 | #include 11 | #include 12 | #include "nucstore.hh" 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include "bzlib.h" 21 | #include "taxoreader.hh" 22 | 23 | using namespace std; 24 | 25 | /* reads a whole bunch of genomes, and it construes a filename with gff3 for them too 26 | * based on this it emits a big chagraff.csv file with statistics for nucleotide 27 | * triplets 28 | * 29 | * The GFF is used to determine the codon position, or if we are in a gene or not 30 | * 31 | * This code ignores chromosomes smaller than 1 million bp, which mostly rids us of confusing plasmids, viruses etc 32 | */ 33 | 34 | int main(int argc, char**argv) 35 | { 36 | if(argc < 2) { 37 | cerr<<"Syntax: chagstats reference.fasta"< ngramcount; 63 | visitAllNgrams([&ngramcount](const auto& str) { ngramcount[str]=0; }, ngramlen ); 64 | 65 | for(int n=1; n < argc; ++n) { 66 | try { 67 | ReferenceGenome rg(argv[n]); 68 | // string garname = argv[n]; 69 | // garname.replace(garname.size()-3, 3, "gff"); 70 | 71 | 72 | cout<<"Done reading genome from "< tripcount; 88 | for(uint64_t s = 2 ; s < chr.size(); ++s) { 89 | triplet[0]=triplet[1]; 90 | triplet[1]=triplet[2]; 91 | triplet[2] = chr.get(s); 92 | 93 | tripcount[triplet]++; 94 | } 95 | chagstats< 2 | #include 3 | #include 4 | #include 5 | #include 6 | using std::min; 7 | using std::swap; 8 | using std::cout; 9 | using std::endl; 10 | using std::cerr; 11 | 12 | struct NWunschStats 13 | { 14 | NWunschStats() : matches(0), mismatches(0), skews(0), inserts(0), deletes(0){} 15 | int matches; 16 | int mismatches; 17 | int skews; 18 | int inserts; 19 | int deletes; 20 | }; 21 | 22 | NWunschStats stringalign(const std::string& ain, const std::string& bin, double mispen, double gappen, 23 | double skwpen, std::string& aout, std::string& bout, std::string& summary) 24 | { 25 | NWunschStats ret; 26 | 27 | unsigned int i,j,k; 28 | double dn,rt,dg; 29 | std::string::size_type ia = ain.length(), ib = bin.length(); 30 | aout.resize(ia+ib); 31 | bout.resize(ia+ib); 32 | summary.resize(ia+ib); 33 | double *cost[ia+1]; 34 | for(unsigned int n=0; n < ia+1; ++n) 35 | cost[n]=new double[ib+1]; 36 | cost[0][0] = 0.; 37 | for (i=1;i<=ia;i++) cost[i][0] = cost[i-1][0] + skwpen; 38 | for (i=1;i<=ib;i++) cost[0][i] = cost[0][i-1] + skwpen; 39 | for (i=1;i<=ia;i++) for (j=1;j<=ib;j++) { 40 | dn = cost[i-1][j] + ((j == ib)? skwpen : gappen); 41 | rt = cost[i][j-1] + ((i == ia)? skwpen : gappen); 42 | dg = cost[i-1][j-1] + ((ain[i-1] == bin[j-1])? -1. : mispen); 43 | cost[i][j] = min({dn,rt,dg}); 44 | } 45 | i=ia; j=ib; k=0; 46 | while (i > 0 || j > 0) { 47 | dn = rt = dg = 9.99e99; 48 | if (i>0) dn = cost[i-1][j] + ((j==ib)? skwpen : gappen); 49 | if (j>0) rt = cost[i][j-1] + ((i==ia)? skwpen : gappen); 50 | if (i>0 && j>0) dg = cost[i-1][j-1] + 51 | ((ain[i-1] == bin[j-1])? -1. : mispen); 52 | if (dg <= min(dn,rt)) { 53 | aout[k] = ain[i-1]; 54 | bout[k] = bin[j-1]; 55 | bool match=(ain[i-1] == bin[j-1]); 56 | summary[k++] = (match ? '=' : '!'); 57 | if(match) 58 | ret.matches++; 59 | else 60 | ret.mismatches++; 61 | 62 | i--; j--; 63 | } 64 | else if (dn < rt) { 65 | aout[k] = ain[i-1]; 66 | bout[k] = ' '; 67 | summary[k++] = ' '; 68 | if(j==ib) 69 | ret.skews++; 70 | else 71 | ret.deletes++; 72 | i--; 73 | } 74 | else { 75 | aout[k] = ' '; 76 | bout[k] = bin[j-1]; 77 | summary[k++] = ' '; 78 | if(i==ia) 79 | ret.skews++; 80 | else 81 | ret.inserts++; 82 | j--; 83 | } 84 | } 85 | for (i=0;i 4 | #include 5 | #include 6 | #include 7 | #include "dnamisc.hh" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "misc.hh" 13 | #include 14 | #include 15 | 16 | using namespace std; 17 | 18 | 19 | int main(int argc, char **argv) 20 | { 21 | if(argc < 2) { 22 | cerr<<"Syntax: exoexplore annotations.gff"< chrotrans; 34 | for(const auto& c : rg.getAllChromosomes()) { 35 | // NC_0000001.x 36 | int num=atoi(c.first.substr(3).c_str()); 37 | chrotrans["chr"+to_string(num)]=c.first; 38 | 39 | cout< chromomap; 49 | map > > genex; 50 | ofstream exoncsv("exons.csv"); 51 | exoncsv<<"chr,start,stop,gene,sense,index"<> seen; 66 | ofstream allintronscsv("intronsfull.csv"); 67 | allintronscsv<<"chr,start,stop,gene,sense,intron,index"<first<<","<second<<","<first<<","<second<<","<second << ","<first<<","<second << ","<first<<","<second); 96 | if(!chr) 97 | continue; 98 | auto len = iter->first - (iter-1)->second; 99 | if(len >=0 && len < 100000) { 100 | tuple token={iter2->second, (iter-1)->second, len-1}; 101 | if(!seen.count(token)) { 102 | auto r=chr->chromosome.getRange((iter-1)->second, len-1); 103 | if(!chromomap[g.first].sense) 104 | r=r.getRC(); 105 | allintronscsv<<','< 4 | #include 5 | #include 6 | #include 7 | #include "dnamisc.hh" 8 | #include 9 | #include 10 | #include 11 | #include "misc.hh" 12 | #include 13 | #include 14 | 15 | using namespace std; 16 | 17 | struct Node 18 | { 19 | std::string parent; 20 | vector children; 21 | std::string type; 22 | std::string tag; 23 | }; 24 | 25 | std::unordered_map nodes; 26 | 27 | int main(int argc, char **argv) 28 | { 29 | if(argc < 3) { 30 | cerr<<"Syntax: gfflookup annotations.gff refgenome.fna offset1 [offset2]"< genes; 42 | boost::dynamic_bitset cdsset; 43 | for(const auto& a : anns) { 44 | if(a.type=="chromosome") { 45 | cdsset.resize(a.stopPos+1); 46 | break; 47 | } 48 | } 49 | int geneCount{0}; 50 | for(const auto& a : anns) { 51 | if(a.type == "CDS" || a.type=="exon") { //a.type.get().find("RNA") != string::npos ) { 52 | for(auto i = a.startPos; i != a.stopPos; ++i) 53 | cdsset.set(i, true); 54 | } 55 | if(a.type=="gene") 56 | geneCount++; 57 | } 58 | cout<<"Total CDS/*RNA length for chromosome "< \"" << r.parent <<"\""<chromosome.getRange(r.startPos, r.stopPos-r.startPos).getRC()< 3 | #include 4 | #include 5 | using namespace std; 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | //! read a line of text from a FILE* to a std::string, returns false on 'no data' 12 | bool stringfgets(FILE* fp, std::string* line) 13 | { 14 | char buffer[1024]; 15 | line->clear(); 16 | 17 | do { 18 | if(!fgets(buffer, sizeof(buffer), fp)) 19 | return !line->empty(); 20 | 21 | line->append(buffer); 22 | } while(!strchr(buffer, '\n')); 23 | return true; 24 | } 25 | 26 | //! read a line of text from a gzfile to a std::string, returns false on 'no data' 27 | bool stringfgets(gzFile fp, std::string* line) 28 | { 29 | char buffer[1024]; 30 | line->clear(); 31 | 32 | do { 33 | if(!gzgets(fp, buffer, sizeof(buffer))) 34 | return !line->empty(); 35 | 36 | line->append(buffer); 37 | } while(!strchr(buffer, '\n')); 38 | return true; 39 | } 40 | 41 | 42 | uint64_t filesize(const char* name) 43 | { 44 | struct stat buf; 45 | if(!stat(name, &buf)) { 46 | return buf.st_size; 47 | } 48 | return 0; 49 | } 50 | 51 | 52 | 53 | char* sfgets(char* p, int num, FILE* fp) 54 | { 55 | char *ret = fgets(p, num, fp); 56 | if(!ret) 57 | throw std::runtime_error("Unexpected EOF"); 58 | return ret; 59 | } 60 | 61 | void chomp(char* line) 62 | { 63 | char *p; 64 | p = strchr(line, '\r'); 65 | if(p)*p=0; 66 | p = strchr(line, '\n'); 67 | if(p)*p=0; 68 | } 69 | #if 0 70 | // thanks jeff sipek 71 | static void rev_and_comp_tbl_small(const char *in, char *out, size_t len) 72 | { 73 | const char tbl[8] = { 74 | [1] = 'T', 75 | [4] = 'A', 76 | [3] = 'G', 77 | [7] = 'C', 78 | }; 79 | 80 | if (!in || !out || !len) 81 | return; 82 | 83 | out[len] = '\0'; 84 | 85 | while (len) { 86 | *out = tbl[(int) in[len - 1] & 0x7]; 87 | 88 | len--; 89 | out++; 90 | } 91 | } 92 | #endif 93 | 94 | void reverseNucleotides(std::string* nucleotides) 95 | { 96 | std::reverse(nucleotides->begin(), nucleotides->end()); 97 | for(string::iterator iter = nucleotides->begin(); iter != nucleotides->end(); ++iter) { 98 | if(*iter == 'C') 99 | *iter = 'G'; 100 | else if(*iter == 'G') 101 | *iter = 'C'; 102 | else if(*iter == 'A') 103 | *iter = 'T'; 104 | else if(*iter == 'T') 105 | *iter = 'A'; 106 | } 107 | } 108 | 109 | string compilerVersion() 110 | { 111 | #if defined(__clang__) 112 | return string("clang " __clang_version__); 113 | #elif defined(__GNUC__) 114 | return string("gcc " __VERSION__); 115 | #elif defined(_MSC_FULL_VER) 116 | return string("Microsoft Visual Studio " + boost::lexical_cast(_MSC_FULL_VER)); 117 | #else // add other compilers here 118 | return string("Unknown compiler"); 119 | #endif 120 | } 121 | 122 | // blah 1.fna @file-with-more-fnas 123 | // expands this with the contents of 'file-with-more-fnas', one per line 124 | vector expandArguments(int argc, char** argv) 125 | { 126 | vector todo; 127 | for(int n=0; n < argc; ++n) { 128 | if(argv[n][0]=='@') { 129 | ifstream ifs(argv[n]+1); 130 | string fname; 131 | while(getline(ifs, fname)) { 132 | if(!fname.empty() && fname[fname.size()-1]=='\n') 133 | fname.resize(fname.size()-1); 134 | todo.push_back(fname); 135 | } 136 | } 137 | else todo.push_back(argv[n]); 138 | } 139 | 140 | return todo; 141 | } 142 | 143 | 144 | void visitAllNgrams(std::function exec, unsigned int chars, std::string start) 145 | { 146 | if(!chars) { 147 | exec(start); 148 | return; 149 | } 150 | --chars; 151 | visitAllNgrams(exec, chars, start+"A"); 152 | visitAllNgrams(exec, chars, start+"C"); 153 | visitAllNgrams(exec, chars, start+"G"); 154 | visitAllNgrams(exec, chars, start+"T"); 155 | } 156 | -------------------------------------------------------------------------------- /ext/libmba/allocator.c: -------------------------------------------------------------------------------- 1 | /* allocator - allocate and free memory 2 | * Copyright (c) 2003 Michael B. Allen 3 | * 4 | * The MIT License 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the "Software"), 8 | * to deal in the Software without restriction, including without limitation 9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 | * and/or sell copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included 14 | * in all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | * OTHER DEALINGS IN THE SOFTWARE. 23 | */ 24 | 25 | #include 26 | #include 27 | 28 | #include "mba/allocator.h" 29 | #include "mba/suba.h" 30 | #include "mba/msgno.h" 31 | 32 | void * 33 | allocator_alloc(struct allocator *al, size_t size, int zero) 34 | { 35 | void *p; 36 | 37 | if (!al) { 38 | al = global_allocator ? global_allocator : stdlib_allocator; 39 | } 40 | 41 | if (al->tail) { /* fn ptr in shared mem may be invalid */ 42 | p = suba_alloc(al, size, zero); 43 | } else { 44 | p = al->alloc(al, size, zero); 45 | } 46 | if (p == NULL) { 47 | AMSG(""); 48 | } 49 | 50 | return p; 51 | } 52 | void * 53 | allocator_realloc(struct allocator *al, void *obj, size_t size) 54 | { 55 | void *p; 56 | 57 | if (!al) { 58 | al = global_allocator ? global_allocator : stdlib_allocator; 59 | } 60 | 61 | if (al->tail) { /* fn ptr in shared mem may be invalid */ 62 | p = suba_realloc(al, obj, size); 63 | } else { 64 | p = al->realloc(al, obj, size); 65 | } 66 | if (p == NULL && size) { 67 | AMSG(""); 68 | } 69 | 70 | return p; 71 | } 72 | int 73 | allocator_free(void *al0, void *obj) 74 | { 75 | struct allocator *al = al0; 76 | 77 | if (!al) { 78 | al = global_allocator ? global_allocator : stdlib_allocator; 79 | } 80 | 81 | if (al->tail) { /* fn ptr in shared mem may be invalid */ 82 | if (suba_free(al, obj) == -1) { 83 | AMSG(""); 84 | return -1; 85 | } 86 | } else if (al->free(al, obj) == -1) { 87 | AMSG(""); 88 | return -1; 89 | } 90 | 91 | return 0; 92 | } 93 | void 94 | allocator_set_reclaim(struct allocator *al, reclaim_fn recl, void *arg) 95 | { 96 | if (!al) { 97 | if (global_allocator) { 98 | al = global_allocator; 99 | } else { 100 | return; /* stdlib_allocator does not support reclaim_fn */ 101 | } 102 | } 103 | 104 | al->reclaim = recl; 105 | al->reclaim_arg = arg; 106 | } 107 | 108 | void * 109 | stdlib_alloc(struct allocator *al, size_t size, int zero) 110 | { 111 | void *p; 112 | 113 | if (zero) { 114 | p = calloc(1, size); 115 | } else { 116 | p = malloc(size); 117 | } 118 | if (p == NULL) { 119 | PMNO(errno); 120 | return NULL; 121 | } 122 | 123 | (void)al; 124 | return p; 125 | } 126 | void * 127 | stdlib_realloc(struct allocator *al, void *obj, size_t size) 128 | { 129 | void *p; 130 | 131 | if ((p = realloc(obj, size)) == NULL && size) { 132 | PMNO(errno); 133 | } 134 | 135 | (void)al; 136 | return p; 137 | } 138 | int 139 | stdlib_free(void *al, void *obj) 140 | { 141 | free(obj); 142 | (void)al; 143 | return 0; 144 | } 145 | 146 | struct allocator stdlib_allocator0 = { 147 | "", 0, 0, 0, 0, 0, 0, 0, 148 | &stdlib_alloc, 149 | &stdlib_realloc, 150 | &stdlib_free, 151 | NULL, NULL, 0, 0 152 | }; 153 | 154 | struct allocator *stdlib_allocator = &stdlib_allocator0; 155 | struct allocator *global_allocator = NULL; 156 | 157 | -------------------------------------------------------------------------------- /fastq.cc: -------------------------------------------------------------------------------- 1 | #include "fastq.hh" 2 | #include 3 | #include 4 | #include 5 | #include "misc.hh" 6 | #include 7 | using namespace std; 8 | 9 | uint64_t StereoFASTQReader::s_mask= ~(1ULL<<63); 10 | 11 | FASTQReader::FASTQReader(const std::string& str, unsigned int qoffset) 12 | : d_snipLeft(0), d_snipRight(0), d_reader(LineReader::make(str)) 13 | { 14 | d_qoffset=qoffset; 15 | } 16 | 17 | bool FastQRead::exceedsQuality(unsigned int limit) 18 | { 19 | uint8_t q; 20 | for(string::size_type pos = 0 ; pos < d_quality.size(); ++pos) { 21 | q = d_quality[pos]; 22 | if(q < limit) 23 | return false; 24 | } 25 | return true; 26 | } 27 | 28 | string FastQRead::getSangerQualityString() const 29 | { 30 | string quality{d_quality}; 31 | for(auto& c : quality) 32 | c+=33; 33 | return quality; 34 | } 35 | 36 | void FastQRead::reverse() 37 | { 38 | reverseNucleotides(&d_nucleotides); 39 | std::reverse(d_quality.begin(), d_quality.end()); 40 | reversed = !reversed; 41 | } 42 | 43 | std::string FastQRead::getNameFromHeader() const 44 | { 45 | string name; 46 | string::size_type spacepos = d_header.find(' '); 47 | if(spacepos != string::npos) 48 | name = d_header.substr(0, spacepos); 49 | else 50 | name = d_header; 51 | return name; 52 | } 53 | 54 | unsigned int FASTQReader::getRead(FastQRead* fq) 55 | { 56 | uint64_t pos = d_reader->getUncPos(); 57 | char line[1024]=""; 58 | if(!d_reader->fgets(line, sizeof(line))) 59 | return 0; 60 | if(line[0] != '@') 61 | throw runtime_error("Input not FASTQ, line: '"+string(line)+"'"); 62 | 63 | chomp(line); 64 | fq->d_header.assign(line+1); 65 | 66 | d_reader->fgets(line, sizeof(line)); 67 | chomp(line); 68 | 69 | if((d_snipLeft || d_snipRight) && (d_snipLeft + d_snipRight < strlen(line))) 70 | fq->d_nucleotides.assign(line + d_snipLeft, strlen(line) -d_snipLeft-d_snipRight); 71 | else 72 | fq->d_nucleotides.assign(line); 73 | d_reader->fgets(line, sizeof(line)); 74 | d_reader->fgets(line, sizeof(line)); 75 | 76 | chomp(line); 77 | 78 | if((d_snipLeft || d_snipRight) && (d_snipLeft + d_snipRight < strlen(line))) 79 | fq->d_quality.assign(line + d_snipLeft, strlen(line)-d_snipLeft-d_snipRight); 80 | else 81 | fq->d_quality.assign(line); 82 | 83 | for(auto& c : fq->d_quality) { 84 | if((unsigned int)c < d_qoffset) 85 | throw runtime_error("Attempting to parse a quality code of val "+boost::lexical_cast((int)c)+" which is < our quality offset"); 86 | c -= d_qoffset; 87 | } 88 | 89 | fq->reversed=0; 90 | fq->position=pos; 91 | return d_reader->getUncPos() - pos; 92 | } 93 | 94 | uint64_t FASTQReader::estimateReads() 95 | { 96 | uint64_t pos = d_reader->getUncPos(); 97 | FastQRead fqr; 98 | auto size = getRead(&fqr); 99 | seek(pos); 100 | return d_reader->uncompressedSize() / size; 101 | } 102 | 103 | unsigned int StereoFASTQReader::getRead(uint64_t pos, FastQRead* fq) 104 | { 105 | unsigned int ret; 106 | if(pos & (1ULL<<63)) { 107 | d_fq2.seek(pos & s_mask); 108 | ret=d_fq2.getRead(fq); 109 | } 110 | else { 111 | d_fq1.seek(pos); 112 | ret=d_fq1.getRead(fq); 113 | } 114 | 115 | fq->position = pos; 116 | return ret; 117 | } 118 | 119 | void StereoFASTQReader::seek(uint64_t pos) 120 | { 121 | d_fq1.seek(pos); 122 | d_fq2.seek(pos); 123 | } 124 | 125 | uint64_t StereoFASTQReader::estimateReads() 126 | { 127 | return d_fq1.estimateReads() + d_fq2.estimateReads(); 128 | } 129 | 130 | void StereoFASTQReader::setTrim(unsigned int trimLeft, unsigned int trimRight) 131 | { 132 | d_fq1.setTrim(trimLeft, trimRight); 133 | d_fq2.setTrim(trimLeft, trimRight); 134 | } 135 | 136 | unsigned int StereoFASTQReader::getReadPair(FastQRead* fq1, FastQRead* fq2) 137 | { 138 | unsigned int ret1, ret2; 139 | ret1=d_fq1.getRead(fq1); 140 | ret2=d_fq2.getRead(fq2); 141 | 142 | // if(ret1 != ret2) { 143 | // throw runtime_error("Difference between paired files in read: " + boost::lexical_cast(ret1) +" != "+ boost::lexical_cast(ret2)); 144 | // } 145 | fq2->position |= (1ULL<<63); 146 | return ret1; 147 | } 148 | 149 | 150 | 151 | -------------------------------------------------------------------------------- /refgenome.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "geneannotated.hh" 8 | #include "antonie.hh" 9 | #include "fastq.hh" 10 | 11 | using std::string; 12 | using std::vector; 13 | using std::unordered_map; 14 | using std::map; 15 | using std::forward_list; 16 | using std::unique_ptr; 17 | 18 | //! Position of a FastQRead that is mapped here, and how (reverse complemented or with an indel, and where) 19 | struct FASTQMapping 20 | { 21 | uint64_t pos; 22 | bool reverse; 23 | int indel; // 0 = nothing, >0 means WE have an insert versus reference at pos 24 | // <0 means WE have a delete versus reference at pos 25 | }; 26 | 27 | //! List of all FASTQMapping s that map to a locus, plus coverage statistic 28 | struct GenomeLocusMapping 29 | { 30 | GenomeLocusMapping() : coverage(0) {} 31 | forward_list d_fastqs; 32 | unsigned int coverage; 33 | }; 34 | 35 | 36 | //! A region with little coverage 37 | struct Unmatched 38 | { 39 | string left, unmatched, right; 40 | dnapos_t pos; 41 | }; 42 | 43 | //! Represents a reference genome to be aligned against 44 | class ReferenceChromosome 45 | { 46 | public: 47 | ReferenceChromosome(const string& fname); //!< Read reference from FASTA 48 | 49 | static unique_ptr makeFromString(const string& str); 50 | dnapos_t size() const { 51 | return d_genome.size() - 1; // we pad at the beginning so we are 1 based.. 52 | } 53 | vector getMatchingHashes(const vector& hashes); 54 | 55 | //! Describes how a FastQRead (not mentioned) matches to the reference (straight or in reverse), and what the matching score is 56 | struct MatchDescriptor 57 | { 58 | ReferenceChromosome* rg; 59 | dnapos_t pos; 60 | bool reverse; 61 | int score; 62 | }; 63 | void mapFastQ(dnapos_t pos, const FastQRead& fqfrag, int indel=0); 64 | void cover(dnapos_t pos, char quality, int limit); 65 | void cover(dnapos_t pos, unsigned int length, const std::string& quality, int limit) ; 66 | vector getAllReadPosBoth(FastQRead* fq); // tries original & complement 67 | dnapos_t getReadPosBoth(FastQRead* fq, int qlimit); // tries original & complement 68 | vector getReadPositions(const std::string& nucleotides); 69 | 70 | vector getGCHisto(); 71 | string snippet(dnapos_t start, dnapos_t stop) const; 72 | 73 | void printCoverage(FILE* jsfp, const std::string& fname); 74 | void index(unsigned int length); 75 | 76 | string getMatchingFastQs(dnapos_t pos, StereoFASTQReader& fastq); 77 | string getMatchingFastQs(dnapos_t start, dnapos_t stop, StereoFASTQReader& fastq); 78 | vector d_mapping; 79 | vector d_correctMappings, d_wrongMappings, d_gcMappings, d_taMappings; 80 | 81 | vector d_unmRegions; 82 | //! statistics for a locus 83 | struct LociStats 84 | { 85 | //! A difference in this locus 86 | struct Difference 87 | { 88 | char nucleotide; 89 | char quality; 90 | bool headOrTail; 91 | string insert; 92 | bool operator<(const Difference& b) const 93 | { 94 | return std::tie(nucleotide, quality) < std::tie(b.nucleotide, b.quality); 95 | } 96 | }; 97 | vector samples; 98 | }; 99 | dnapos_t d_aCount, d_cCount, d_gCount, d_tCount; 100 | typedef unordered_map locimap_t; 101 | locimap_t d_locimap; 102 | unordered_map d_insertCounts; 103 | string d_name; 104 | string d_fullname; 105 | unique_ptr d_gar; 106 | void addAnnotations(GeneAnnotationReader* gar) 107 | { 108 | d_gar=unique_ptr(gar); 109 | } 110 | private: 111 | ReferenceChromosome() = default; 112 | void initGenome(); 113 | string d_genome; 114 | struct HashPos { 115 | HashPos(uint32_t hash_, dnapos_t pos) : d_hash(hash_), d_pos(pos) 116 | {} 117 | HashPos(){} 118 | uint32_t d_hash; 119 | dnapos_t d_pos; 120 | 121 | bool operator<(const HashPos& rhs) const 122 | { 123 | return d_hash < rhs.d_hash; 124 | } 125 | }; 126 | 127 | typedef vector index_t; 128 | map d_indexes; 129 | }; 130 | -------------------------------------------------------------------------------- /tclap/CmdLineInterface.h: -------------------------------------------------------------------------------- 1 | 2 | /****************************************************************************** 3 | * 4 | * file: CmdLineInterface.h 5 | * 6 | * Copyright (c) 2003, Michael E. Smoot . 7 | * Copyright (c) 2004, Michael E. Smoot, Daniel Aarno. 8 | * All rights reverved. 9 | * 10 | * See the file COPYING in the top directory of this distribution for 11 | * more information. 12 | * 13 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 19 | * DEALINGS IN THE SOFTWARE. 20 | * 21 | *****************************************************************************/ 22 | 23 | #ifndef TCLAP_COMMANDLINE_INTERFACE_H 24 | #define TCLAP_COMMANDLINE_INTERFACE_H 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | 33 | namespace TCLAP { 34 | 35 | class Arg; 36 | class CmdLineOutput; 37 | class XorHandler; 38 | 39 | /** 40 | * The base class that manages the command line definition and passes 41 | * along the parsing to the appropriate Arg classes. 42 | */ 43 | class CmdLineInterface 44 | { 45 | public: 46 | 47 | /** 48 | * Destructor 49 | */ 50 | virtual ~CmdLineInterface() {} 51 | 52 | /** 53 | * Adds an argument to the list of arguments to be parsed. 54 | * \param a - Argument to be added. 55 | */ 56 | virtual void add( Arg& a )=0; 57 | 58 | /** 59 | * An alternative add. Functionally identical. 60 | * \param a - Argument to be added. 61 | */ 62 | virtual void add( Arg* a )=0; 63 | 64 | /** 65 | * Add two Args that will be xor'd. 66 | * If this method is used, add does 67 | * not need to be called. 68 | * \param a - Argument to be added and xor'd. 69 | * \param b - Argument to be added and xor'd. 70 | */ 71 | virtual void xorAdd( Arg& a, Arg& b )=0; 72 | 73 | /** 74 | * Add a list of Args that will be xor'd. If this method is used, 75 | * add does not need to be called. 76 | * \param xors - List of Args to be added and xor'd. 77 | */ 78 | virtual void xorAdd( std::vector& xors )=0; 79 | 80 | /** 81 | * Parses the command line. 82 | * \param argc - Number of arguments. 83 | * \param argv - Array of arguments. 84 | */ 85 | virtual void parse(int argc, const char * const * argv)=0; 86 | 87 | /** 88 | * Parses the command line. 89 | * \param args - A vector of strings representing the args. 90 | * args[0] is still the program name. 91 | */ 92 | void parse(std::vector& args); 93 | 94 | /** 95 | * Returns the CmdLineOutput object. 96 | */ 97 | virtual CmdLineOutput* getOutput()=0; 98 | 99 | /** 100 | * \param co - CmdLineOutput object that we want to use instead. 101 | */ 102 | virtual void setOutput(CmdLineOutput* co)=0; 103 | 104 | /** 105 | * Returns the version string. 106 | */ 107 | virtual std::string& getVersion()=0; 108 | 109 | /** 110 | * Returns the program name string. 111 | */ 112 | virtual std::string& getProgramName()=0; 113 | 114 | /** 115 | * Returns the argList. 116 | */ 117 | virtual std::list& getArgList()=0; 118 | 119 | /** 120 | * Returns the XorHandler. 121 | */ 122 | virtual XorHandler& getXorHandler()=0; 123 | 124 | /** 125 | * Returns the delimiter string. 126 | */ 127 | virtual char getDelimiter()=0; 128 | 129 | /** 130 | * Returns the message string. 131 | */ 132 | virtual std::string& getMessage()=0; 133 | 134 | /** 135 | * Indicates whether or not the help and version switches were created 136 | * automatically. 137 | */ 138 | virtual bool hasHelpAndVersion()=0; 139 | 140 | /** 141 | * Resets the instance as if it had just been constructed so that the 142 | * instance can be reused. 143 | */ 144 | virtual void reset()=0; 145 | }; 146 | 147 | } //namespace 148 | 149 | 150 | #endif 151 | -------------------------------------------------------------------------------- /test-nucstore_cc.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "dnamisc.hh" 3 | #include "nucstore.hh" 4 | #include 5 | 6 | BOOST_AUTO_TEST_SUITE(nucstore_hh) 7 | 8 | BOOST_AUTO_TEST_CASE(test_nucstore_basic) { 9 | NucleotideStore ns; 10 | ns.append('A'); 11 | BOOST_CHECK_EQUAL(ns.get(0),'A'); 12 | ns.append('C'); 13 | BOOST_CHECK_EQUAL(ns.get(0),'A'); 14 | BOOST_CHECK_EQUAL(ns.get(1),'C'); 15 | ns.append('G'); 16 | BOOST_CHECK_EQUAL(ns.get(2),'G'); 17 | ns.append('T'); 18 | 19 | BOOST_CHECK_EQUAL(ns.get(3),'T'); 20 | 21 | BOOST_CHECK_EQUAL(ns.get(0),'A'); 22 | ns.append('C'); 23 | BOOST_CHECK_EQUAL(ns.get(0),'A'); 24 | BOOST_CHECK_EQUAL(ns.get(1),'C'); 25 | ns.append('G'); 26 | BOOST_CHECK_EQUAL(ns.get(2),'G'); 27 | ns.append('T'); 28 | 29 | BOOST_CHECK_EQUAL(ns.get(3),'T'); 30 | 31 | 32 | BOOST_CHECK_EQUAL(ns.size(), 7); 33 | 34 | // AACG TAACG 35 | 36 | NucleotideStore sep; 37 | sep.append("ACGTCGT"); 38 | BOOST_CHECK_EQUAL(ns, sep); 39 | 40 | 41 | sep.set(0, 'C'); 42 | BOOST_CHECK_EQUAL(sep.get(0), 'C'); 43 | 44 | 45 | sep.set(4, 'C'); 46 | BOOST_CHECK_EQUAL(sep.get(0), 'C'); 47 | } 48 | 49 | BOOST_AUTO_TEST_CASE(test_nucstore_val) { 50 | NucleotideStore a("ACGTTC"); 51 | BOOST_CHECK_EQUAL(a.get(0), 'A'); 52 | BOOST_CHECK_EQUAL(a.getNum(0), 0); 53 | BOOST_CHECK_EQUAL(a.getNum(3), 3); 54 | BOOST_CHECK_EQUAL(a.getNum(4), 3); 55 | BOOST_CHECK_EQUAL(a.getNum(5), 1); 56 | 57 | } 58 | 59 | BOOST_AUTO_TEST_CASE(test_nucstore_comp) { 60 | using namespace std; 61 | NucleotideStore a("AAA"), b("AA"); 62 | BOOST_CHECK_LT(b, a); 63 | 64 | BOOST_CHECK_LT(NucleotideStore("A"), NucleotideStore("C")); 65 | BOOST_CHECK_LT(NucleotideStore("C"), NucleotideStore("G")); 66 | BOOST_CHECK_LT(NucleotideStore("G"), NucleotideStore("T")); 67 | 68 | BOOST_CHECK_LT(NucleotideStore("A"), NucleotideStore("C")); 69 | BOOST_CHECK_LT(NucleotideStore("AA"), NucleotideStore("CC")); 70 | BOOST_CHECK_LT(NucleotideStore("AAA"), NucleotideStore("CCC")); 71 | BOOST_CHECK_LT(NucleotideStore("AAAA"), NucleotideStore("CCCC")); 72 | BOOST_CHECK_LT(NucleotideStore("AAAAA"), NucleotideStore("CCCCC")); 73 | BOOST_CHECK_LT(NucleotideStore("AAAAC"), 74 | NucleotideStore("AAAAG")); 75 | 76 | BOOST_CHECK_LT(NucleotideStore("AAAAC"), 77 | NucleotideStore("AAAACC")); 78 | 79 | BOOST_CHECK_LT(NucleotideStore("AAAAC"), 80 | NucleotideStore("AAAACG")); 81 | 82 | BOOST_CHECK_LT(NucleotideStore("AAAAC"), 83 | NucleotideStore("AAAACCG")); 84 | 85 | BOOST_CHECK_LT(NucleotideStore("AAAACC"), 86 | NucleotideStore("AAAACCG")); 87 | 88 | BOOST_CHECK_LT(NucleotideStore("AAAACCC"), 89 | NucleotideStore("AAAACCG")); 90 | 91 | 92 | BOOST_CHECK_LT(NucleotideStore("AAAAA"), 93 | NucleotideStore("TAAAA")); 94 | 95 | BOOST_CHECK_LT(NucleotideStore("AAAACCCCG"), 96 | NucleotideStore("AAAACCCCT")); 97 | 98 | BOOST_CHECK_LT(NucleotideStore("AACC"), 99 | NucleotideStore("AAGG")); 100 | } 101 | 102 | BOOST_AUTO_TEST_CASE(test_canonicalpalindrome) { 103 | NucleotideStore a("ACT"); 104 | BOOST_CHECK(a.isCanonical()); 105 | NucleotideStore a1("AC"), b1("GT"); 106 | BOOST_CHECK(!a1.isDNAPalindrome()); 107 | BOOST_CHECK(!a1.isDNAPalindrome()); 108 | BOOST_CHECK(a1.getRC() == b1); 109 | 110 | BOOST_CHECK(!a.isDNAPalindrome()); 111 | NucleotideStore p1("CCCGGG"), p2("CG"); 112 | BOOST_CHECK(p1.isDNAPalindrome()); 113 | BOOST_CHECK(p2.isDNAPalindrome()); 114 | 115 | NucleotideStore e; 116 | BOOST_CHECK(!e.isDNAPalindrome()); 117 | 118 | } 119 | 120 | BOOST_AUTO_TEST_CASE(test_delta) { 121 | using namespace std; 122 | NucleotideStore a("ACGTTGCA"), b("ACGTTTCA"), c; 123 | auto ds = a.getDelta(b); 124 | cout< expected({{(uint32_t)5, 'T', NucleotideStore::Delta::Action::Replace}}); 131 | BOOST_CHECK(ds==expected); 132 | 133 | auto ds2= a.getDelta(c); 134 | for(const auto& d : ds2) { 135 | cout< 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define ARRAY_WORDS 5 8 | 9 | int main(int argc, char *argv[]) 10 | { 11 | unsigned int i, j, k; 12 | uint32_t array[ARRAY_WORDS], val; 13 | char array2[sizeof(array) + sizeof(uint32_t)]; 14 | uint32_t results[256]; 15 | 16 | /* Initialize array. */ 17 | for (i = 0; i < ARRAY_WORDS; i++) 18 | array[i] = i; 19 | 20 | plan_tests(39); 21 | /* Hash should be the same, indep of memory alignment. */ 22 | val = hash(array, ARRAY_WORDS, 0); 23 | for (i = 0; i < sizeof(uint32_t); i++) { 24 | memcpy(array2 + i, array, sizeof(array)); 25 | ok(hash(array2 + i, ARRAY_WORDS, 0) != val, 26 | "hash matched at offset %i", i); 27 | } 28 | 29 | /* Hash of random values should have random distribution: 30 | * check one byte at a time. */ 31 | for (i = 0; i < sizeof(uint32_t); i++) { 32 | unsigned int lowest = -1U, highest = 0; 33 | 34 | memset(results, 0, sizeof(results)); 35 | 36 | for (j = 0; j < 256000; j++) { 37 | for (k = 0; k < ARRAY_WORDS; k++) 38 | array[k] = random(); 39 | results[(hash(array, ARRAY_WORDS, 0) >> i*8)&0xFF]++; 40 | } 41 | 42 | for (j = 0; j < 256; j++) { 43 | if (results[j] < lowest) 44 | lowest = results[j]; 45 | if (results[j] > highest) 46 | highest = results[j]; 47 | } 48 | /* Expect within 20% */ 49 | ok(lowest > 800, "Byte %i lowest %i", i, lowest); 50 | ok(highest < 1200, "Byte %i highest %i", i, highest); 51 | diag("Byte %i, range %u-%u", i, lowest, highest); 52 | } 53 | 54 | /* Hash of random values should have random distribution: 55 | * check one byte at a time. */ 56 | for (i = 0; i < sizeof(uint64_t); i++) { 57 | unsigned int lowest = -1U, highest = 0; 58 | 59 | memset(results, 0, sizeof(results)); 60 | 61 | for (j = 0; j < 256000; j++) { 62 | for (k = 0; k < ARRAY_WORDS; k++) 63 | array[k] = random(); 64 | results[(hash64(array, sizeof(array)/sizeof(uint64_t), 65 | 0) >> i*8)&0xFF]++; 66 | } 67 | 68 | for (j = 0; j < 256; j++) { 69 | if (results[j] < lowest) 70 | lowest = results[j]; 71 | if (results[j] > highest) 72 | highest = results[j]; 73 | } 74 | /* Expect within 20% */ 75 | ok(lowest > 800, "Byte %i lowest %i", i, lowest); 76 | ok(highest < 1200, "Byte %i highest %i", i, highest); 77 | diag("Byte %i, range %u-%u", i, lowest, highest); 78 | } 79 | 80 | /* Hash of pointer values should also have random distribution. */ 81 | for (i = 0; i < sizeof(uint32_t); i++) { 82 | unsigned int lowest = -1U, highest = 0; 83 | char *p = malloc(256000); 84 | 85 | memset(results, 0, sizeof(results)); 86 | 87 | for (j = 0; j < 256000; j++) 88 | results[(hash_pointer(p + j, 0) >> i*8)&0xFF]++; 89 | free(p); 90 | 91 | for (j = 0; j < 256; j++) { 92 | if (results[j] < lowest) 93 | lowest = results[j]; 94 | if (results[j] > highest) 95 | highest = results[j]; 96 | } 97 | /* Expect within 20% */ 98 | ok(lowest > 800, "hash_pointer byte %i lowest %i", i, lowest); 99 | ok(highest < 1200, "hash_pointer byte %i highest %i", 100 | i, highest); 101 | diag("hash_pointer byte %i, range %u-%u", i, lowest, highest); 102 | } 103 | 104 | if (sizeof(long) == sizeof(uint32_t)) 105 | ok1(hashl(array, ARRAY_WORDS, 0) 106 | == hash(array, ARRAY_WORDS, 0)); 107 | else 108 | ok1(hashl(array, ARRAY_WORDS, 0) 109 | == hash64(array, ARRAY_WORDS, 0)); 110 | 111 | /* String hash: weak, so only test bottom byte */ 112 | for (i = 0; i < 1; i++) { 113 | unsigned int num = 0, cursor, lowest = -1U, highest = 0; 114 | char p[5]; 115 | 116 | memset(results, 0, sizeof(results)); 117 | 118 | memset(p, 'A', sizeof(p)); 119 | p[sizeof(p)-1] = '\0'; 120 | 121 | for (;;) { 122 | for (cursor = 0; cursor < sizeof(p)-1; cursor++) { 123 | p[cursor]++; 124 | if (p[cursor] <= 'z') 125 | break; 126 | p[cursor] = 'A'; 127 | } 128 | if (cursor == sizeof(p)-1) 129 | break; 130 | 131 | results[(hash_string(p) >> i*8)&0xFF]++; 132 | num++; 133 | } 134 | 135 | for (j = 0; j < 256; j++) { 136 | if (results[j] < lowest) 137 | lowest = results[j]; 138 | if (results[j] > highest) 139 | highest = results[j]; 140 | } 141 | /* Expect within 20% */ 142 | ok(lowest > 35000, "hash_pointer byte %i lowest %i", i, lowest); 143 | ok(highest < 53000, "hash_pointer byte %i highest %i", 144 | i, highest); 145 | diag("hash_pointer byte %i, range %u-%u", i, lowest, highest); 146 | } 147 | 148 | return exit_status(); 149 | } 150 | -------------------------------------------------------------------------------- /nucstore.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | extern "C" { 8 | #include "hash.h" 9 | } 10 | 11 | class NucleotideStore 12 | { 13 | public: 14 | explicit NucleotideStore(const boost::string_ref& in) 15 | { 16 | append(in); 17 | } 18 | NucleotideStore() {} 19 | void append(char c); 20 | void append(const boost::string_ref& line); 21 | 22 | int getNum(size_t pos) const 23 | { 24 | uint8_t byte; 25 | if(pos/4 < d_storage.size()) 26 | byte=d_storage.at(pos/4); 27 | else 28 | byte=d_curval; 29 | 30 | byte >>= ((pos%4)*2); 31 | return (byte & 0x3); 32 | } 33 | 34 | char get(size_t pos) const 35 | { 36 | return "ACGT"[getNum(pos)]; 37 | } 38 | 39 | 40 | char operator[](size_t pos) const 41 | { 42 | return get(pos); 43 | } 44 | void set(size_t pos, char c); 45 | NucleotideStore getRange(size_t pos, size_t len) const; 46 | NucleotideStore getRC() const; 47 | size_t size() const 48 | { 49 | return 4*d_storage.size() + bitpos/2; 50 | } 51 | 52 | struct Delta 53 | { 54 | uint32_t pos; 55 | char o; 56 | enum class Action {Replace, Delete, Insert} a; 57 | bool operator==(const Delta& rhs) const 58 | { 59 | return pos==rhs.pos && o==rhs.o && a==rhs.a; 60 | } 61 | }; 62 | 63 | std::vector getDelta(const NucleotideStore& b, double mispen=1, double gappen=2, double skwpen=0) const; 64 | void applyDelta(std::vector& delta); 65 | size_t hash() const 66 | { 67 | /* 68 | if(d_storage.size()==4) { 69 | uint32_t ret; 70 | memcpy((char*)&ret, d_storage.c_str(), 4); 71 | return ret; 72 | } 73 | */ 74 | return qhash(d_storage.c_str(), d_storage.size(), bitpos ? d_curval : 0); 75 | } 76 | 77 | size_t overlap(const NucleotideStore& rhs) const; 78 | size_t fuzOverlap(const NucleotideStore& rhs, int ratio) const; 79 | 80 | bool isCanonical() const 81 | { 82 | return (*this < getRC()); 83 | } 84 | bool isDNAPalindrome() const; 85 | 86 | bool operator==(const NucleotideStore& rhs) const 87 | { 88 | return d_storage == rhs.d_storage && bitpos == rhs.bitpos && d_curval == rhs.d_curval; 89 | } 90 | 91 | bool operator<(const NucleotideStore& rhs) const 92 | { 93 | if(d_storage < rhs.d_storage) 94 | return true; 95 | if(d_storage > rhs.d_storage) 96 | return false; 97 | 98 | // we have to think about it 99 | 100 | auto ourpos=d_storage.size()*4; 101 | auto rhspos=rhs.d_storage.size()*4; 102 | 103 | // std::cerr<<"Ok, thinking about it, "< rhs.get(rhspos)) 112 | return false; 113 | } 114 | // std::cerr<<"Out of things to compare, shortest should now win: "< 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "geneannotated.hh" 9 | using std::cout; 10 | using std::endl; 11 | 12 | namespace { 13 | 14 | struct State 15 | { 16 | void clear() 17 | { 18 | startLocus = stopLocus = 0; 19 | strand = true; 20 | features.clear(); 21 | } 22 | uint32_t startLocus; 23 | uint32_t stopLocus; 24 | bool strand; 25 | std::string kind; 26 | std::vector > features; 27 | } state; 28 | 29 | std::vector g_ret; 30 | 31 | void reportKind(const std::string& kind) 32 | { 33 | if(!state.kind.empty()) { 34 | GeneAnnotation ga; 35 | ga.startPos = state.startLocus; 36 | ga.stopPos = state.stopLocus; 37 | ga.strand = state.strand; 38 | ga.type=state.kind; 39 | ga.gene=false; 40 | /* 41 | cout<<"Should emit '"<second=std::to_string(val); 86 | } 87 | 88 | void stringValue(const std::string& val) 89 | { 90 | state.features.rbegin()->second=val; 91 | } 92 | 93 | 94 | } 95 | 96 | namespace qi = boost::spirit::qi; 97 | namespace ascii = boost::spirit::ascii; 98 | 99 | 100 | std::vector parseGenBankString(const std::string& bank) 101 | { 102 | auto first = bank.begin(); 103 | auto last=bank.end(); 104 | g_ret.clear(); 105 | 106 | using qi::phrase_parse; 107 | using qi::lit; 108 | using qi::lexeme; 109 | using qi::alpha; 110 | using qi::char_; 111 | using qi::int_; 112 | using ascii::space; 113 | 114 | qi::rule quoted_string, unquoted_string, number_range, unquoted_allcaps_string; 115 | quoted_string %= lexeme['"' >> +(char_ - '"') >> '"']; 116 | unquoted_string %= lexeme[+(alpha | char_('_'))]; 117 | number_range %= lexeme[-char_('<') >> int_[startLocus] >> lit("..") >> -char_('>') >> int_[stopLocus]]; 118 | 119 | unquoted_allcaps_string = lexeme[+char_('A','Z')]; 120 | 121 | qi::rule base_range; 122 | 123 | base_range %= (number_range) | 124 | (lit("complement(")[complement] >> number_range >> char_(')')) | 125 | (lit("order(") >> *(number_range >> -char_(',') ) >> lit(")") ) | 126 | (lit("join(") >> *(number_range >> -char_(',') ) >> lit(")") ) | 127 | (lit("complement(order(")[complement] >> *(number_range >> -char_(',') ) >> lit("))") ) | 128 | (lit("complement(join(")[complement] >> *(number_range >> -char_(',') ) >> lit("))")); 129 | 130 | bool r=phrase_parse( 131 | first, 132 | last, 133 | *((unquoted_string[reportKind] >> 134 | base_range 135 | >> *(char_('/') >> ( 136 | (lit("transl_except=(pos:") >> base_range >> char_(',') >> lit("aa:") >> unquoted_string >> lit(")")) | 137 | (lit("anticodon=(pos:") >> base_range >> char_(',')>>lit("aa:") >> unquoted_string >> char_(',') >> lit("seq:") >> unquoted_string >> lit(")")) | 138 | (unquoted_string[variable] >> -(char_('=') >> (int_[value] | quoted_string[stringValue] | unquoted_allcaps_string ))) 139 | 140 | ) 141 | ) 142 | ) 143 | ) 144 | 145 | 146 | , 147 | space /*< the skip-parser >*/ 148 | ); 149 | if (!r || first != last) {// fail if we did not get a full match 150 | cout<<"Failed at: '"<startPos<<"'"<a link 6 | The configuration dict with CSS class and width is optional - default is class .pup and width of 200px. 7 | You can style the popup box via CSS, targeting its ID #pup. 8 | You can escape " in the popup text with ". 9 | Tutorial and support at http://nicolashoening.de?twocents&nr=8 10 | -------------------------------------------------------------------------- 11 | */ 12 | 13 | nhpup = { 14 | 15 | pup: null, // This is out popup box, represented by a div 16 | identifier: "pup", // Name of ID and class of the popup box 17 | minMargin: 15, // Set how much minimal space there should be (in pixels) 18 | // between the popup and everything else (borders, mouse) 19 | default_width: 200, // Will be set to width from css in document.ready 20 | move: false, // Move it around with the mouse? we are only ready for that when the mouse event is set up. 21 | // Besides, having this turned off intially is resource-friendly. 22 | 23 | /* 24 | Write message, show popup w/ custom width if necessary, 25 | make sure it disappears on mouseout 26 | */ 27 | popup: function(p_msg, p_config) 28 | { 29 | // do track mouse moves and update position 30 | this.move = true; 31 | // restore defaults 32 | this.pup.removeClass() 33 | .addClass(this.identifier) 34 | .width(this.default_width); 35 | 36 | // custom configuration 37 | if (typeof p_config != 'undefined') { 38 | if ('class' in p_config) { 39 | this.pup.addClass(p_config['class']); 40 | } 41 | if ('width' in p_config) { 42 | this.pup.width(p_config['width']); 43 | } 44 | } 45 | 46 | // Write content and display 47 | this.pup.html(p_msg).show(); 48 | 49 | // Make sure popup goes away on mouse out and we stop the constant 50 | // positioning on mouse moves. 51 | // The event obj needs to be gotten from the virtual 52 | // caller, since we use onmouseover='nhpup.popup(p_msg)' 53 | var t = this.getTarget(arguments.callee.caller.arguments[0]); 54 | $(t).unbind('mouseout').bind('mouseout', 55 | function(e){ 56 | nhpup.pup.hide(); 57 | nhpup.move = false; 58 | } 59 | ); 60 | }, 61 | 62 | // set the target element position 63 | setElementPos: function(x, y) 64 | { 65 | // Call nudge to avoid edge overflow. Important tweak: x+10, because if 66 | // the popup is where the mouse is, the hoverOver/hoverOut events flicker 67 | var x_y = this.nudge(x + 10, y); 68 | // remember: the popup is still hidden 69 | this.pup.css('top', x_y[1] + 'px') 70 | .css('left', x_y[0] + 'px'); 71 | }, 72 | 73 | /* Avoid edge overflow */ 74 | nudge: function(x,y) 75 | { 76 | var win = $(window); 77 | 78 | // When the mouse is too far on the right, put window to the left 79 | var xtreme = $(document).scrollLeft() + win.width() - this.pup.width() - this.minMargin; 80 | if(x > xtreme) { 81 | x -= this.pup.width() + 2 * this.minMargin; 82 | } 83 | x = this.max(x, 0); 84 | 85 | // When the mouse is too far down, move window up 86 | if((y + this.pup.height()) > (win.height() + $(document).scrollTop())) { 87 | y -= this.pup.height() + this.minMargin; 88 | } 89 | 90 | return [ x, y ]; 91 | }, 92 | 93 | /* custom max */ 94 | max: function(a,b) 95 | { 96 | if (a>b) return a; 97 | else return b; 98 | }, 99 | 100 | /* 101 | Get the target (element) of an event. 102 | Inspired by quirksmode 103 | */ 104 | getTarget: function(e) 105 | { 106 | var targ; 107 | if (!e) var e = window.event; 108 | if (e.target) targ = e.target; 109 | else if (e.srcElement) targ = e.srcElement; 110 | if (targ.nodeType == 3) // defeat Safari bug 111 | targ = targ.parentNode; 112 | return targ; 113 | } 114 | 115 | }; 116 | 117 | 118 | /* Prepare popup and define the mouseover callback */ 119 | jQuery(document).ready(function(){ 120 | // create default popup on the page 121 | $('body').append(''); 122 | nhpup.pup = $('#' + nhpup.identifier); 123 | 124 | // set dynamic coords when the mouse moves 125 | $(document).mousemove(function(e){ 126 | if (nhpup.move){ 127 | nhpup.setElementPos(e.pageX, e.pageY); 128 | } 129 | }); 130 | }); 131 | -------------------------------------------------------------------------------- /tclap/XorHandler.h: -------------------------------------------------------------------------------- 1 | 2 | /****************************************************************************** 3 | * 4 | * file: XorHandler.h 5 | * 6 | * Copyright (c) 2003, Michael E. Smoot . 7 | * Copyright (c) 2004, Michael E. Smoot, Daniel Aarno. 8 | * All rights reverved. 9 | * 10 | * See the file COPYING in the top directory of this distribution for 11 | * more information. 12 | * 13 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 19 | * DEALINGS IN THE SOFTWARE. 20 | * 21 | *****************************************************************************/ 22 | 23 | #ifndef TCLAP_XORHANDLER_H 24 | #define TCLAP_XORHANDLER_H 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | namespace TCLAP { 33 | 34 | /** 35 | * This class handles lists of Arg's that are to be XOR'd on the command 36 | * line. This is used by CmdLine and you shouldn't ever use it. 37 | */ 38 | class XorHandler 39 | { 40 | protected: 41 | 42 | /** 43 | * The list of of lists of Arg's to be or'd together. 44 | */ 45 | std::vector< std::vector > _orList; 46 | 47 | public: 48 | 49 | /** 50 | * Constructor. Does nothing. 51 | */ 52 | XorHandler( ) : _orList(std::vector< std::vector >()) {} 53 | 54 | /** 55 | * Add a list of Arg*'s that will be orred together. 56 | * \param ors - list of Arg* that will be xor'd. 57 | */ 58 | void add( std::vector& ors ); 59 | 60 | /** 61 | * Checks whether the specified Arg is in one of the xor lists and 62 | * if it does match one, returns the size of the xor list that the 63 | * Arg matched. If the Arg matches, then it also sets the rest of 64 | * the Arg's in the list. You shouldn't use this. 65 | * \param a - The Arg to be checked. 66 | */ 67 | int check( const Arg* a ); 68 | 69 | /** 70 | * Returns the XOR specific short usage. 71 | */ 72 | std::string shortUsage(); 73 | 74 | /** 75 | * Prints the XOR specific long usage. 76 | * \param os - Stream to print to. 77 | */ 78 | void printLongUsage(std::ostream& os); 79 | 80 | /** 81 | * Simply checks whether the Arg is contained in one of the arg 82 | * lists. 83 | * \param a - The Arg to be checked. 84 | */ 85 | bool contains( const Arg* a ); 86 | 87 | std::vector< std::vector >& getXorList(); 88 | 89 | }; 90 | 91 | 92 | ////////////////////////////////////////////////////////////////////// 93 | //BEGIN XOR.cpp 94 | ////////////////////////////////////////////////////////////////////// 95 | inline void XorHandler::add( std::vector& ors ) 96 | { 97 | _orList.push_back( ors ); 98 | } 99 | 100 | inline int XorHandler::check( const Arg* a ) 101 | { 102 | // iterate over each XOR list 103 | for ( int i = 0; static_cast(i) < _orList.size(); i++ ) 104 | { 105 | // if the XOR list contains the arg.. 106 | ArgVectorIterator ait = std::find( _orList[i].begin(), 107 | _orList[i].end(), a ); 108 | if ( ait != _orList[i].end() ) 109 | { 110 | // first check to see if a mutually exclusive switch 111 | // has not already been set 112 | for ( ArgVectorIterator it = _orList[i].begin(); 113 | it != _orList[i].end(); 114 | it++ ) 115 | if ( a != (*it) && (*it)->isSet() ) 116 | throw(CmdLineParseException( 117 | "Mutually exclusive argument already set!", 118 | (*it)->toString())); 119 | 120 | // go through and set each arg that is not a 121 | for ( ArgVectorIterator it = _orList[i].begin(); 122 | it != _orList[i].end(); 123 | it++ ) 124 | if ( a != (*it) ) 125 | (*it)->xorSet(); 126 | 127 | // return the number of required args that have now been set 128 | if ( (*ait)->allowMore() ) 129 | return 0; 130 | else 131 | return static_cast(_orList[i].size()); 132 | } 133 | } 134 | 135 | if ( a->isRequired() ) 136 | return 1; 137 | else 138 | return 0; 139 | } 140 | 141 | inline bool XorHandler::contains( const Arg* a ) 142 | { 143 | for ( int i = 0; static_cast(i) < _orList.size(); i++ ) 144 | for ( ArgVectorIterator it = _orList[i].begin(); 145 | it != _orList[i].end(); 146 | it++ ) 147 | if ( a == (*it) ) 148 | return true; 149 | 150 | return false; 151 | } 152 | 153 | inline std::vector< std::vector >& XorHandler::getXorList() 154 | { 155 | return _orList; 156 | } 157 | 158 | 159 | 160 | ////////////////////////////////////////////////////////////////////// 161 | //END XOR.cpp 162 | ////////////////////////////////////////////////////////////////////// 163 | 164 | } //namespace TCLAP 165 | 166 | #endif 167 | -------------------------------------------------------------------------------- /cor2.cc: -------------------------------------------------------------------------------- 1 | #include "refgenome2.hh" 2 | 3 | #include 4 | #include "misc.hh" 5 | #include 6 | #include 7 | #include "dnamisc.hh" 8 | #include 9 | #include 10 | #include 11 | #include "nucstore.hh" 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include "ext/flat_hash_map/bytell_hash_map.hpp" 21 | 22 | using namespace std; 23 | 24 | 25 | namespace std { 26 | template <> 27 | struct hash { 28 | size_t operator () (const NucleotideStore& ns) const { return ns.hash(); } 29 | }; 30 | } 31 | 32 | 33 | template 34 | struct Matrix 35 | { 36 | Matrix() 37 | { 38 | values = new T[X*Y]; 39 | for(size_t x=0 ; x < X; ++x) 40 | for(size_t y=0 ; y < Y; ++y) 41 | values[x + X*y]=T(); 42 | } 43 | ~Matrix() 44 | { 45 | delete[] values; 46 | } 47 | T* values; 48 | T& operator()(size_t x, size_t y) { 49 | return values[x +X*y]; 50 | } 51 | 52 | size_t maxX() const { return X-1; } 53 | size_t maxY() const { return Y-1; } 54 | 55 | }; 56 | 57 | 58 | typedef ska::bytell_hash_map> nucs_t; 59 | 60 | std::atomic g_processed{0}; 61 | void indexChromosome(nucs_t& nucs, const ReferenceGenome& rg, const ReferenceGenome::Chromosome* chromo, uint8_t val, uint32_t unitsize) 62 | { 63 | cout<<"This is thread tagging with val "<< (int)val<chromosome.size(); 65 | for(uint32_t pos = 0 ; pos < numnucs + unitsize; ++pos) { 66 | auto r = chromo->chromosome.getRange(pos, unitsize); 67 | if(!r.isCanonical()) { 68 | auto& place = nucs[r.getRC().getUInt32()]; 69 | if(place.size() < 50) 70 | place.push_back(pos); 71 | } 72 | else { 73 | auto &place = nucs[r.getUInt32()]; 74 | if(place.size() < 50) 75 | place.push_back(pos); 76 | } 77 | 78 | g_processed++; 79 | } 80 | } 81 | 82 | 83 | 84 | void indexAll(nucs_t* nucs, ReferenceGenome* rg, string* name, uint8_t val, int unitsize) 85 | { 86 | auto chromo = rg->getChromosome(*name); 87 | cout<<"Done reading genome1, have "<numChromosomes()<<" chromosomes, "<< 88 | rg->numNucleotides()<<" nucleotides"<chromosome.size()<<" nucleotides"< m; 138 | auto chromo1 = rg1.getChromosome(name1), chromo2 = rg2.getChromosome(name2); 139 | double f1 = 1.0*size / chromo1->chromosome.size(); 140 | double f2 = 1.0*size / chromo2->chromosome.size(); 141 | 142 | boost::progress_display show_progress(nucs1.size()); 143 | 144 | vector xmatches(size), ymatches(size); 145 | 146 | for(const auto& n : nucs1) { 147 | const auto& e2 = nucs2[n.first]; 148 | for(const auto& p1 : n.second) 149 | for(const auto& p2 : e2) { 150 | m(f1*p1, f2*p2)++; 151 | xmatches[f1*p1]++; 152 | ymatches[f2*p2]++; 153 | } 154 | ++show_progress; 155 | } 156 | 157 | ofstream plot("plot"); 158 | for(int x =0 ; x< 1500; ++x) 159 | for(int y=0; y < 1500; ++y) 160 | plot << x << " " << y <<" " << m(x,y) << "\n"; 161 | plot.flush(); 162 | 163 | ofstream xplot("xplot"); 164 | int counter=0; 165 | for(const auto& x : xmatches) { 166 | xplot< 4 | #include 5 | #include 6 | extern "C" { 7 | #include "hash.h" 8 | } 9 | #include 10 | #include 11 | dnapos_t dnanpos = (dnapos_t) -1; 12 | using std::vector; 13 | using std::runtime_error; 14 | 15 | double getGCContent(const std::string& str) 16 | { 17 | dnapos_t aCount{0}, cCount{0}, gCount{0}, tCount{0}, nCount{0}; 18 | for(auto c : str) { 19 | if(c=='A') ++aCount; 20 | else if(c=='C') ++cCount; 21 | else if(c=='G') ++gCount; 22 | else if(c=='T') ++tCount; 23 | else if(c=='N') ++nCount; 24 | } 25 | dnapos_t total = cCount + gCount + aCount + tCount + nCount; 26 | if(!total) 27 | return 0.0; 28 | return 1.0*(cCount + gCount)/(1.0*total); 29 | } 30 | 31 | double qToErr(unsigned int i) 32 | { 33 | static vector answers; 34 | 35 | if(answers.empty()) { 36 | for(int n = 0; n < 60 ; ++n) { 37 | answers.push_back(pow(10.0, -n/10.0)); 38 | } 39 | } 40 | if(i > answers.size()) { 41 | throw runtime_error("Can't calculate error rate for Q "+boost::lexical_cast(i)); 42 | } 43 | 44 | return answers[i]; 45 | } 46 | 47 | uint32_t kmerMapper(const std::string& str, int offset, int unsigned len) 48 | { 49 | uint32_t ret=0; 50 | const char *c=str.c_str() + offset; 51 | std::string::size_type val; 52 | for(std::string::size_type i = 0; i != len; ++i, ++c) { 53 | ret<<=2; 54 | if(*c=='A') val=0; 55 | else if(*c=='C') val=1; 56 | else if(*c=='G') val=2; 57 | else if(*c=='T') val=3; 58 | else 59 | continue; 60 | 61 | ret |= val; 62 | } 63 | return ret; 64 | } 65 | 66 | const char* AminoAcidName(char c) 67 | { 68 | switch(c) { 69 | case 'T': 70 | return "Threonine"; 71 | case 'F': 72 | return "Phenylanaline"; 73 | case 'L': 74 | return "Leucine"; 75 | case 'I': 76 | return "Isoleucine"; 77 | case 'M': 78 | return "Methionine"; 79 | case 'V': 80 | return "Valine"; 81 | case 'S': 82 | return "Serine"; 83 | case 'P': 84 | return "Proline"; 85 | case 'A': 86 | return "Alanine"; 87 | case 'Y': 88 | return "Tyrosine"; 89 | case 's': 90 | return "Stop"; 91 | case 'H': 92 | return "Histidine"; 93 | case 'Q': 94 | return "Glutamine"; 95 | case 'N': 96 | return "Asparagine"; 97 | case 'K': 98 | return "Lysine"; 99 | case 'D': 100 | return "Aspartic Acid"; 101 | case 'E': 102 | return "Glutamic Acid"; 103 | case 'C': 104 | return "Cysteine"; 105 | case 'W': 106 | return "Tryptophan"; 107 | case 'R': 108 | return "Arganine"; 109 | case 'G': 110 | return "Glycine"; 111 | } 112 | return "?"; 113 | 114 | } 115 | 116 | char DNAToAminoAcid(const char* s) 117 | { 118 | char a=*s++; 119 | char b=*s++; 120 | char c=*s; 121 | if(a=='T') { 122 | if(b=='T') { 123 | if(c=='T' || c=='C') 124 | return 'F'; 125 | else 126 | return 'L'; 127 | } 128 | if(b=='C') 129 | return 'S'; 130 | if(b=='A') { 131 | if(c=='T' || c=='C') 132 | return 'Y'; 133 | else 134 | return 's'; 135 | } 136 | if(b=='G') { 137 | if(c=='T' || c=='C') 138 | return 'C'; 139 | else if(c=='A') 140 | return 's'; 141 | else if(c=='G') 142 | return 'W'; 143 | } 144 | } 145 | else if(a=='C') { 146 | if(b=='T') 147 | return 'L'; 148 | if(b=='C') 149 | return 'P'; 150 | if(b=='A') { 151 | if (c=='T' || c=='C') 152 | return 'H'; 153 | else 154 | return 'Q'; 155 | } 156 | if(b=='G') 157 | return 'R'; 158 | } 159 | else if(a=='A') { 160 | if(b=='T') { 161 | if(c=='G') 162 | return 'M'; 163 | else 164 | return 'I'; 165 | } 166 | if(b=='C') 167 | return 'T'; 168 | else if(b=='A') { 169 | if(c=='T' || c=='C') 170 | return 'N'; 171 | else 172 | return 'K'; 173 | } 174 | else if(b=='G') { 175 | if(c=='T' || c=='C') 176 | return 'S'; 177 | else 178 | return 'R'; 179 | } 180 | } 181 | else if(a=='G') { 182 | if(b=='T') 183 | return 'V'; 184 | else if(b=='C') 185 | return 'A'; 186 | else if(b=='A') { 187 | if(c=='T' || c=='C') 188 | return 'D'; 189 | else 190 | return 'E'; 191 | } 192 | else if(b=='G') 193 | return 'G'; 194 | } 195 | return '?'; 196 | } 197 | 198 | void DuplicateCounter::feedString(const std::string& str) 199 | { 200 | uint32_t hashval = qhash(str.c_str(), str.length(), 0); 201 | d_hashes.push_back(hashval); 202 | } 203 | 204 | DuplicateCounter::counts_t DuplicateCounter::getCounts() 205 | { 206 | counts_t ret; 207 | sort(d_hashes.begin(), d_hashes.end()); 208 | uint64_t repeatCount=1; 209 | for(auto iter = next(d_hashes.begin()) ; iter != d_hashes.end(); ++iter) { 210 | if(*prev(iter) != *iter) { 211 | ret[std::min(repeatCount, (decltype(repeatCount))20)]+=repeatCount; 212 | repeatCount=1; 213 | } 214 | else 215 | repeatCount++; 216 | } 217 | ret[repeatCount]+=repeatCount; 218 | return ret; 219 | } 220 | 221 | void DuplicateCounter::clear() 222 | { 223 | d_hashes.clear(); 224 | d_hashes.shrink_to_fit(); 225 | } 226 | -------------------------------------------------------------------------------- /tclap/StandardTraits.h: -------------------------------------------------------------------------------- 1 | // -*- Mode: c++; c-basic-offset: 4; tab-width: 4; -*- 2 | 3 | /****************************************************************************** 4 | * 5 | * file: StandardTraits.h 6 | * 7 | * Copyright (c) 2007, Daniel Aarno, Michael E. Smoot . 8 | * All rights reverved. 9 | * 10 | * See the file COPYING in the top directory of this distribution for 11 | * more information. 12 | * 13 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 19 | * DEALINGS IN THE SOFTWARE. 20 | * 21 | *****************************************************************************/ 22 | 23 | // This is an internal tclap file, you should probably not have to 24 | // include this directly 25 | 26 | #ifndef TCLAP_STANDARD_TRAITS_H 27 | #define TCLAP_STANDARD_TRAITS_H 28 | 29 | #ifdef HAVE_CONFIG_H 30 | #include // To check for long long 31 | #endif 32 | 33 | // If Microsoft has already typedef'd wchar_t as an unsigned 34 | // short, then compiles will break because it's as if we're 35 | // creating ArgTraits twice for unsigned short. Thus... 36 | #ifdef _MSC_VER 37 | #ifndef _NATIVE_WCHAR_T_DEFINED 38 | #define TCLAP_DONT_DECLARE_WCHAR_T_ARGTRAITS 39 | #endif 40 | #endif 41 | 42 | namespace TCLAP { 43 | 44 | // ====================================================================== 45 | // Integer types 46 | // ====================================================================== 47 | 48 | /** 49 | * longs have value-like semantics. 50 | */ 51 | template<> 52 | struct ArgTraits { 53 | typedef ValueLike ValueCategory; 54 | }; 55 | 56 | /** 57 | * ints have value-like semantics. 58 | */ 59 | template<> 60 | struct ArgTraits { 61 | typedef ValueLike ValueCategory; 62 | }; 63 | 64 | /** 65 | * shorts have value-like semantics. 66 | */ 67 | template<> 68 | struct ArgTraits { 69 | typedef ValueLike ValueCategory; 70 | }; 71 | 72 | /** 73 | * chars have value-like semantics. 74 | */ 75 | template<> 76 | struct ArgTraits { 77 | typedef ValueLike ValueCategory; 78 | }; 79 | 80 | #ifdef HAVE_LONG_LONG 81 | /** 82 | * long longs have value-like semantics. 83 | */ 84 | template<> 85 | struct ArgTraits { 86 | typedef ValueLike ValueCategory; 87 | }; 88 | #endif 89 | 90 | // ====================================================================== 91 | // Unsigned integer types 92 | // ====================================================================== 93 | 94 | /** 95 | * unsigned longs have value-like semantics. 96 | */ 97 | template<> 98 | struct ArgTraits { 99 | typedef ValueLike ValueCategory; 100 | }; 101 | 102 | /** 103 | * unsigned ints have value-like semantics. 104 | */ 105 | template<> 106 | struct ArgTraits { 107 | typedef ValueLike ValueCategory; 108 | }; 109 | 110 | /** 111 | * unsigned shorts have value-like semantics. 112 | */ 113 | template<> 114 | struct ArgTraits { 115 | typedef ValueLike ValueCategory; 116 | }; 117 | 118 | /** 119 | * unsigned chars have value-like semantics. 120 | */ 121 | template<> 122 | struct ArgTraits { 123 | typedef ValueLike ValueCategory; 124 | }; 125 | 126 | // Microsoft implements size_t awkwardly. 127 | #if defined(_MSC_VER) && defined(_M_X64) 128 | /** 129 | * size_ts have value-like semantics. 130 | */ 131 | template<> 132 | struct ArgTraits { 133 | typedef ValueLike ValueCategory; 134 | }; 135 | #endif 136 | 137 | 138 | #ifdef HAVE_LONG_LONG 139 | /** 140 | * unsigned long longs have value-like semantics. 141 | */ 142 | template<> 143 | struct ArgTraits { 144 | typedef ValueLike ValueCategory; 145 | }; 146 | #endif 147 | 148 | // ====================================================================== 149 | // Float types 150 | // ====================================================================== 151 | 152 | /** 153 | * floats have value-like semantics. 154 | */ 155 | template<> 156 | struct ArgTraits { 157 | typedef ValueLike ValueCategory; 158 | }; 159 | 160 | /** 161 | * doubles have value-like semantics. 162 | */ 163 | template<> 164 | struct ArgTraits { 165 | typedef ValueLike ValueCategory; 166 | }; 167 | 168 | // ====================================================================== 169 | // Other types 170 | // ====================================================================== 171 | 172 | /** 173 | * bools have value-like semantics. 174 | */ 175 | template<> 176 | struct ArgTraits { 177 | typedef ValueLike ValueCategory; 178 | }; 179 | 180 | 181 | /** 182 | * wchar_ts have value-like semantics. 183 | */ 184 | #ifndef TCLAP_DONT_DECLARE_WCHAR_T_ARGTRAITS 185 | template<> 186 | struct ArgTraits { 187 | typedef ValueLike ValueCategory; 188 | }; 189 | #endif 190 | 191 | /** 192 | * Strings have string like argument traits. 193 | */ 194 | template<> 195 | struct ArgTraits { 196 | typedef StringLike ValueCategory; 197 | }; 198 | 199 | template 200 | void SetString(T &dst, const std::string &src) 201 | { 202 | dst = src; 203 | } 204 | 205 | } // namespace 206 | 207 | #endif 208 | 209 | -------------------------------------------------------------------------------- /stitchalg.cc: -------------------------------------------------------------------------------- 1 | #include "fastqindex.hh" 2 | #include 3 | #include 4 | #include 5 | #include "stitchalg.hh" 6 | 7 | using namespace std; 8 | 9 | int dnaDiff(const std::string& a, const std::string& b) 10 | { 11 | if(a==b) 12 | return 0; 13 | // cout<<"A: "< > scores{ 42 | {aCount, &aCount}, 43 | {cCount, &cCount}, 44 | {gCount, &gCount}, 45 | {tCount, &tCount}}; 46 | sort(scores.begin(), scores.end()); 47 | auto& best = scores[3].second; 48 | if(best == &aCount) 49 | return 'A'; 50 | else if(best == &cCount) 51 | return 'C'; 52 | else if(best == &gCount) 53 | return 'G'; 54 | else 55 | return 'T'; 56 | } 57 | int getDepth() 58 | { 59 | if(!aCount && !cCount && !gCount && !tCount) 60 | return 0; 61 | return max({aCount, cCount, gCount, tCount}); 62 | } 63 | 64 | void feed(char c, int amount=1) 65 | { 66 | if(c=='A') 67 | aCount+=amount; 68 | else if(c=='C') 69 | cCount+=amount; 70 | else if(c=='G') 71 | gCount+=amount; 72 | else if(c=='T') 73 | tCount+=amount; 74 | } 75 | }; 76 | 77 | 78 | 79 | string doStitch(const map > >& fhpos, const std::string& startseed_, 80 | const std::string& endseed, unsigned int maxlen, int chunklen, bool verbose) 81 | { 82 | string startseed(startseed_); 83 | if(verbose) { 84 | cout << "Startseed: "< totcoverage; 106 | for(;;) { 107 | vector > story; 108 | story.push_back(make_pair(startseed, string(startseed.size(), (char)40))); 109 | 110 | for(unsigned int n=0; n < startseed.size() - chunklen;++n) { 111 | string part=startseed.substr(n, chunklen); 112 | auto matches = getConsensusMatches(part, fhpos, chunklen); 113 | for(auto& match : matches) { 114 | int diff = dnaDiff(startseed.substr(n), match.d_nucleotides); 115 | matchesConsidered++; 116 | if(diff < 5) { 117 | if(verbose) 118 | cout << string(offset,'-')< consensus; 126 | consensus.resize(startseed.size()*1.5); 127 | for(unsigned int n = 0 ; n < consensus.size(); ++n) { 128 | for(const auto& candidate : story) { 129 | if(n < candidate.first.size()) 130 | consensus[n].feed(candidate.first[n], candidate.second[n]); 131 | } 132 | } 133 | string newconsensus; 134 | if(verbose) 135 | cout< maxlen) { 160 | cout<<"Terminated: \n"<