├── ccan ├── hash │ ├── LICENSE │ ├── _info │ └── test │ │ └── run.c └── build_assert │ ├── LICENSE │ ├── test │ ├── compile_ok.c │ ├── compile_fail.c │ ├── compile_fail-expr.c │ └── run-BUILD_ASSERT_OR_ZERO.c │ ├── build_assert.h │ └── _info ├── config.h ├── githash.cc ├── sysdeps ├── Darwin.inc └── Linux.inc ├── testrunner.cc ├── antonie.hh ├── compat.hh ├── ext ├── html │ ├── jquery-ui-1.10.4.custom │ │ └── css │ │ │ └── ui-lightness │ │ │ └── images │ │ │ ├── animated-overlay.gif │ │ │ ├── ui-icons_222222_256x240.png │ │ │ ├── ui-icons_228ef1_256x240.png │ │ │ ├── ui-icons_ef8c08_256x240.png │ │ │ ├── ui-icons_ffd27a_256x240.png │ │ │ ├── ui-icons_ffffff_256x240.png │ │ │ ├── ui-bg_flat_10_000000_40x100.png │ │ │ ├── ui-bg_glass_65_ffffff_1x400.png │ │ │ ├── ui-bg_glass_100_f6f6f6_1x400.png │ │ │ ├── ui-bg_glass_100_fdf5ce_1x400.png │ │ │ ├── ui-bg_gloss-wave_35_f6a828_500x100.png │ │ │ ├── ui-bg_diagonals-thick_18_b81900_40x40.png │ │ │ ├── ui-bg_diagonals-thick_20_666666_40x40.png │ │ │ ├── ui-bg_highlight-soft_100_eeeeee_1x100.png │ │ │ └── ui-bg_highlight-soft_75_ffe45c_1x100.png │ └── nvd3 │ │ ├── .gitignore │ │ └── LICENSE.md ├── libmba │ ├── mba │ │ ├── iterator.h │ │ ├── dbug.h │ │ ├── suba.h │ │ ├── diff.h │ │ ├── varray.h │ │ ├── msgno.h │ │ ├── hashmap.h │ │ └── allocator.h │ ├── allocator.c │ └── varray.c └── nhpup_1.1.js ├── .gitignore ├── invert.cc ├── stitchalg.hh ├── update-git-hash-if-necessary ├── .travis.yml ├── testrun.sh ├── test-saminfra_cc.cc ├── 16ssearcher.hh ├── fastqindex.hh ├── tclap ├── Makefile.am ├── Visitor.h ├── IgnoreRestVisitor.h ├── OptionalUnlabeledTracker.h ├── Constraint.h ├── CmdLineOutput.h ├── HelpVisitor.h ├── VersionVisitor.h ├── ArgTraits.h ├── ValuesConstraint.h ├── CmdLineInterface.h ├── XorHandler.h ├── StandardTraits.h ├── ArgException.h └── MultiSwitchArg.h ├── test-misc_hh.cc ├── fqgrep.cc ├── test-dnamisc_cc.cc ├── 16ssearcher.md ├── strdiff.c ├── gendump.cc ├── refgenome2.hh ├── misc.hh ├── stitcher.cc ├── saminfra.hh ├── geneannotated.hh ├── test-nucstore_cc.cc ├── digisplice.cc ├── fastq.hh ├── gffedit.cc ├── nucstore.hh ├── misc.cc ├── support.js ├── refgenome2.cc ├── dino.cc ├── zstuff.hh ├── pfqgrep.cc ├── nwunsch.cc ├── fastqindex.cc ├── viewer.html ├── gfflookup.cc ├── fastq.cc ├── refgenome.hh ├── gtfreader.cc ├── AntonieLaunch.py ├── geneannotated.cc ├── genbankparser.cc ├── Makefile ├── dnamisc.cc ├── stitchalg.cc ├── phi-x174.cc ├── dnamisc.hh └── viewer.js /ccan/hash/LICENSE: -------------------------------------------------------------------------------- 1 | ../../licenses/CC0 -------------------------------------------------------------------------------- /ccan/build_assert/LICENSE: -------------------------------------------------------------------------------- 1 | ../../licenses/CC0 -------------------------------------------------------------------------------- /config.h: -------------------------------------------------------------------------------- 1 | #define HAVE_LITTLE_ENDIAN 1 2 | -------------------------------------------------------------------------------- /githash.cc: -------------------------------------------------------------------------------- 1 | #include "githash.h" 2 | 3 | const char* g_gitHash=GIT_HASH; 4 | -------------------------------------------------------------------------------- /sysdeps/Darwin.inc: -------------------------------------------------------------------------------- 1 | CXX2011FLAGS=-std=c++11 -stdlib=libc++ -I/usr/local/include/ -ftemplate-depth=1000 -------------------------------------------------------------------------------- /sysdeps/Linux.inc: -------------------------------------------------------------------------------- 1 | CXX2014FLAGS=-std=gnu++17 2 | STATICFLAGS ?=-Wl,-Bstatic -lstdc++ -lgcc -Wl,-Bdynamic -static-libgcc -lm -lc 3 | -------------------------------------------------------------------------------- /testrunner.cc: -------------------------------------------------------------------------------- 1 | #define BOOST_TEST_DYN_LINK 2 | #define BOOST_TEST_MAIN 3 | #define BOOST_TEST_MODULE unit 4 | 5 | #include 6 | -------------------------------------------------------------------------------- /antonie.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | typedef uint32_t dnapos_t; 5 | extern unsigned int dnanpos; 6 | extern const char* phiXFastA; 7 | -------------------------------------------------------------------------------- /ccan/build_assert/test/compile_ok.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main(int argc, char *argv[]) 4 | { 5 | BUILD_ASSERT(1 == 1); 6 | return 0; 7 | } 8 | -------------------------------------------------------------------------------- /compat.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifdef _WIN32 3 | #define getcwd(x,y) GetCurrentDirectory((y),(x)) 4 | #else 5 | #include 6 | #include 7 | #endif 8 | 9 | 10 | -------------------------------------------------------------------------------- /ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/animated-overlay.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/antonie/master/ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/animated-overlay.gif -------------------------------------------------------------------------------- /ccan/build_assert/test/compile_fail.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main(int argc, char *argv[]) 4 | { 5 | #ifdef FAIL 6 | BUILD_ASSERT(1 == 0); 7 | #endif 8 | return 0; 9 | } 10 | -------------------------------------------------------------------------------- /ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-icons_222222_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/antonie/master/ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-icons_222222_256x240.png -------------------------------------------------------------------------------- /ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-icons_228ef1_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/antonie/master/ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-icons_228ef1_256x240.png -------------------------------------------------------------------------------- /ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-icons_ef8c08_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/antonie/master/ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-icons_ef8c08_256x240.png -------------------------------------------------------------------------------- /ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-icons_ffd27a_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/antonie/master/ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-icons_ffd27a_256x240.png -------------------------------------------------------------------------------- /ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-icons_ffffff_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/antonie/master/ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-icons_ffffff_256x240.png -------------------------------------------------------------------------------- /ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_flat_10_000000_40x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/antonie/master/ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_flat_10_000000_40x100.png -------------------------------------------------------------------------------- /ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_glass_65_ffffff_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/antonie/master/ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_glass_65_ffffff_1x400.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | 6 | # Compiled Dynamic libraries 7 | *.so 8 | *.dylib 9 | 10 | # Compiled Static libraries 11 | *.lai 12 | *.la 13 | *.a 14 | 15 | # other 16 | *.d 17 | *~ 18 | -------------------------------------------------------------------------------- /ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_glass_100_f6f6f6_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/antonie/master/ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_glass_100_f6f6f6_1x400.png -------------------------------------------------------------------------------- /ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_glass_100_fdf5ce_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/antonie/master/ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_glass_100_fdf5ce_1x400.png -------------------------------------------------------------------------------- /ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_gloss-wave_35_f6a828_500x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/antonie/master/ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_gloss-wave_35_f6a828_500x100.png -------------------------------------------------------------------------------- /ccan/build_assert/test/compile_fail-expr.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main(int argc, char *argv[]) 4 | { 5 | #ifdef FAIL 6 | return BUILD_ASSERT_OR_ZERO(1 == 0); 7 | #else 8 | return 0; 9 | #endif 10 | } 11 | -------------------------------------------------------------------------------- /ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_diagonals-thick_18_b81900_40x40.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/antonie/master/ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_diagonals-thick_18_b81900_40x40.png -------------------------------------------------------------------------------- /ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_diagonals-thick_20_666666_40x40.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/antonie/master/ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_diagonals-thick_20_666666_40x40.png -------------------------------------------------------------------------------- /ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_highlight-soft_100_eeeeee_1x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/antonie/master/ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_highlight-soft_100_eeeeee_1x100.png -------------------------------------------------------------------------------- /ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_highlight-soft_75_ffe45c_1x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/antonie/master/ext/html/jquery-ui-1.10.4.custom/css/ui-lightness/images/ui-bg_highlight-soft_75_ffe45c_1x100.png -------------------------------------------------------------------------------- /ccan/build_assert/test/run-BUILD_ASSERT_OR_ZERO.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char *argv[]) 5 | { 6 | plan_tests(1); 7 | ok1(BUILD_ASSERT_OR_ZERO(1 == 1) == 0); 8 | return exit_status(); 9 | } 10 | -------------------------------------------------------------------------------- /invert.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "misc.hh" 3 | using namespace std; 4 | 5 | int main(int argc, char**argv) 6 | { 7 | for(int n = 1 ; n < argc; ++n) { 8 | string nucs(argv[n]); 9 | reverseNucleotides(&nucs); 10 | cout< 3 | #include "fastqindex.hh" 4 | 5 | std::string doStitch(const std::map > >& fhpos, 6 | const std::string& startseed_, 7 | const std::string& endseed, unsigned int maxlen, int chunklen, bool verbose); 8 | int dnaDiff(const std::string& a, const std::string& b); 9 | -------------------------------------------------------------------------------- /update-git-hash-if-necessary: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | HASH=$(git describe --always --dirty=+ | tr -d '\n') 3 | 4 | echo \#define GIT_HASH \"$HASH\" > githash.h.tmp 5 | echo $HASH > githash 6 | 7 | cmp -s githash.h githash.h.tmp > /dev/null 8 | 9 | if [ "$?" -ne "0" ] 10 | then 11 | mv githash.h.tmp githash.h 12 | echo updated githash.h 13 | else 14 | rm githash.h.tmp 15 | fi 16 | 17 | 18 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: cpp 2 | compiler: 3 | - gcc 4 | - clang 5 | before_script: 6 | - sudo apt-get update 7 | - sudo apt-get install libboost-test-dev libz-dev 8 | - wget http://ds9a.nl/antonie/test-files.tar.bz2 9 | - tar xf test-files.tar.bz2 10 | script: 11 | - make -j 4 12 | - make -j 4 check 13 | - ./testrun.sh 14 | notifications: 15 | email: 16 | - bert.hubert@netherlabs.nl 17 | 18 | -------------------------------------------------------------------------------- /ext/libmba/mba/iterator.h: -------------------------------------------------------------------------------- 1 | #ifndef MBA_ITERATOR_H 2 | #define MBA_ITERATOR_H 3 | 4 | /* iter - container for iterator state 5 | */ 6 | 7 | typedef struct _iter { 8 | unsigned long i1; 9 | unsigned long i2; 10 | unsigned long i3; 11 | void *p; 12 | } iter_t; 13 | 14 | typedef void (*iterate_fn)(void *obj, iter_t *iter); 15 | typedef void *(*iterate_next_fn)(void *obj, iter_t *iter); 16 | 17 | #endif /* MBA_ITERATOR_H */ 18 | -------------------------------------------------------------------------------- /ext/html/nvd3/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Jekyll Files # 3 | ################ 4 | _site 5 | 6 | 7 | # Random Files # 8 | ################ 9 | *.swp 10 | *~ 11 | *.log 12 | 13 | 14 | # Private Test Data # 15 | ##################### 16 | *REALDATA* 17 | 18 | 19 | # OS generated files # 20 | ###################### 21 | .DS_Store* 22 | ehthumbs.db 23 | Icon? 24 | Thumbs.db 25 | # nodejs packages # 26 | ###################### 27 | node_modules 28 | -------------------------------------------------------------------------------- /testrun.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | rm -f data.js loci.0.js 4 | ./antonie -1 sbw25/P1-1-35_S5_L001_R1_001.fastq -2 sbw25/P1-1-35_S5_L001_R2_001.fastq -r sbw25/NC_012660.fna -a sbw25/NC_012660.gbk 5 | 6 | if grep 895351 data.js -q 7 | then 8 | echo Found SNP 895151 9 | else 10 | echo missed SNP 895151 11 | exit 1 12 | fi 13 | 14 | if grep YP_002874253.1 data.js -q 15 | then 16 | echo Found YP_002874253.1 17 | else 18 | echo missed YP_002874253.1 19 | exit 1 20 | fi 21 | 22 | 23 | exit 0 24 | -------------------------------------------------------------------------------- /test-saminfra_cc.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "saminfra.hh" 3 | BOOST_AUTO_TEST_SUITE(saminfra_hh) 4 | using std::string; 5 | 6 | BOOST_AUTO_TEST_CASE(test_bamCompress) { 7 | BOOST_CHECK_EQUAL(bamCompress("AAAA"), string("\x11\x11", 2)); 8 | BOOST_CHECK_EQUAL(bamCompress("CCCC"), string("\x22\x22", 2)); 9 | BOOST_CHECK_EQUAL(bamCompress("ACACACAC"), string("\x12\x12\x12\x12", 4)); 10 | BOOST_CHECK_EQUAL(bamCompress("NNNN"), string("\xff\xff", 2)); 11 | BOOST_CHECK_EQUAL(bamCompress("PPPP"), string("\xff\xff", 2)); 12 | } 13 | 14 | 15 | BOOST_AUTO_TEST_SUITE_END() 16 | -------------------------------------------------------------------------------- /16ssearcher.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "zstuff.hh" 8 | 9 | //! A class to match reads to a 16S database 10 | class Search16S 11 | { 12 | public: 13 | struct Entry 14 | { 15 | uint32_t id; 16 | std::string nucs; 17 | std::string name; 18 | bool operator<(const Entry& rhs) const 19 | { 20 | return id < rhs.id; 21 | } 22 | }; 23 | 24 | Search16S(const std::string& src); 25 | bool get(Entry* entry); 26 | 27 | private: 28 | std::unique_ptr d_linereader; 29 | }; 30 | -------------------------------------------------------------------------------- /fastqindex.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include "fastq.hh" 6 | 7 | struct HashedPos 8 | { 9 | uint32_t hash; 10 | uint64_t position; 11 | bool operator<(const HashedPos& b) const { 12 | return hash < b.hash; 13 | } 14 | bool operator<(uint32_t h) const { 15 | return hash < h; 16 | } 17 | }__attribute__((packed)); 18 | 19 | std::unique_ptr > indexFASTQ(FASTQReader* fqreader, const std::string& fname, int chunklen); 20 | 21 | std::vector getConsensusMatches(const std::string& consensus, const std::map > >& fhpos, int chunklen); 22 | -------------------------------------------------------------------------------- /tclap/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | libtclapincludedir = $(includedir)/tclap 3 | 4 | libtclapinclude_HEADERS = \ 5 | CmdLineInterface.h \ 6 | ArgException.h \ 7 | CmdLine.h \ 8 | XorHandler.h \ 9 | MultiArg.h \ 10 | UnlabeledMultiArg.h \ 11 | ValueArg.h \ 12 | UnlabeledValueArg.h \ 13 | Visitor.h Arg.h \ 14 | HelpVisitor.h \ 15 | SwitchArg.h \ 16 | MultiSwitchArg.h \ 17 | VersionVisitor.h \ 18 | IgnoreRestVisitor.h \ 19 | CmdLineOutput.h \ 20 | StdOutput.h \ 21 | DocBookOutput.h \ 22 | ZshCompletionOutput.h \ 23 | OptionalUnlabeledTracker.h \ 24 | Constraint.h \ 25 | ValuesConstraint.h \ 26 | ArgTraits.h \ 27 | StandardTraits.h 28 | 29 | -------------------------------------------------------------------------------- /test-misc_hh.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "misc.hh" 3 | BOOST_AUTO_TEST_SUITE(misc_hh) 4 | 5 | BOOST_AUTO_TEST_CASE(test_VarMeanEstimator) { 6 | VarMeanEstimator vme; 7 | vme(0); 8 | BOOST_CHECK_CLOSE(mean(vme), 0.0, 0.001); 9 | BOOST_CHECK_CLOSE(variance(vme), 0.0, 0.001); 10 | 11 | for(auto d : {1,2,3,4}) 12 | vme(d); 13 | BOOST_CHECK_CLOSE(mean(vme), 2.0, 0.001); 14 | BOOST_CHECK_CLOSE(variance(vme), 2.0, 0.001); 15 | 16 | } 17 | BOOST_AUTO_TEST_CASE(test_reverseNucleotides) { 18 | std::string tst{"TTTTGGGCCA"}; 19 | reverseNucleotides(&tst); 20 | BOOST_CHECK_EQUAL(tst, "TGGCCCAAAA"); 21 | tst.clear(); 22 | reverseNucleotides(&tst); 23 | BOOST_CHECK_EQUAL(tst, ""); 24 | } 25 | 26 | BOOST_AUTO_TEST_SUITE_END() 27 | -------------------------------------------------------------------------------- /fqgrep.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "misc.hh" 3 | #include "fastq.hh" 4 | using namespace std; 5 | 6 | int main(int argc, char**argv) 7 | { 8 | string search(argv[1]); 9 | string rsearch(search); 10 | reverseNucleotides(&rsearch); 11 | 12 | for(int n = 2 ; n < argc; ++n) { 13 | FASTQReader fqreader(argv[n], 33); 14 | FastQRead fqr; 15 | 16 | while(fqreader.getRead(&fqr)) { 17 | auto pos = fqr.d_nucleotides.find(search); 18 | if(pos != string::npos) { 19 | cout< 2 | #include "dnamisc.hh" 3 | BOOST_AUTO_TEST_SUITE(misc_hh) 4 | 5 | BOOST_AUTO_TEST_CASE(test_kmerMapper) { 6 | BOOST_CHECK_EQUAL(kmerMapper("AAAA", 0, 4), 0U); 7 | BOOST_CHECK_EQUAL(kmerMapper("AAAAAAAA", 0, 8), 0U); 8 | BOOST_CHECK_EQUAL(kmerMapper("AAAAAAAAAAAA", 0, 12), 0U); 9 | BOOST_CHECK_EQUAL(kmerMapper("AAAAAAAAAAAAAAAA", 0, 16), 0U); 10 | 11 | BOOST_CHECK_EQUAL(kmerMapper("CCCC", 0, 4), 85U); 12 | BOOST_CHECK_EQUAL(kmerMapper("CCCCCCCC", 0, 8), 21845U); 13 | BOOST_CHECK_EQUAL(kmerMapper("CCCCCCCCCCCC", 0, 12), 5592405U); 14 | BOOST_CHECK_EQUAL(kmerMapper("CCCCCCCCCCCCCCCC", 0, 16), 1431655765U); 15 | 16 | BOOST_CHECK_EQUAL(DNAToAminoAcid("GCC"), 'A'); 17 | BOOST_CHECK_EQUAL(AminoAcidName('A'), "Alanine"); 18 | } 19 | 20 | BOOST_AUTO_TEST_SUITE_END() 21 | -------------------------------------------------------------------------------- /16ssearcher.md: -------------------------------------------------------------------------------- 1 | # 16SSEARCHER 2 | 3 | Sequencing runs ideally only contain the intended DNA. Often however, they 4 | are contaminated with unintended bacteria. 5 | 6 | 16ssearcher matches FASTQ files to databases of 16s rRNA, and generates 7 | a list of bacteria (or archea or fungi) present in your reads. 8 | 9 | ## Databases 10 | For [Green Genes](http://greengenes.secondgenome.com/downloads), 11 | download gg_xx_y.fasta.gz and gg_xx_y_accessions.txt.gz. 12 | 13 | For the [Ribosomal Database Project](http://rdp.cme.msu.edu/), download the 14 | unaligned FASTA.gz. 15 | 16 | There is no need to decompress the files, leave them as is. 17 | 18 | ## Syntax 19 | 20 | The --help output is instructive, but in short: 21 | 22 | $ 16ssearcher --mode gg gg_13_5.fasta.gz gg_13_5_accessions.txt.gz *.fastq 23 | 24 | or 25 | 26 | $ 16ssearcher --mode rdp release11_1_Bacteria_unaligned.fa.gz *.fastq 27 | 28 | A running output is emitted to stderr, a final summary to stdout. 29 | 30 | -------------------------------------------------------------------------------- /ccan/hash/_info: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | /** 5 | * hash - routines for hashing bytes 6 | * 7 | * When creating a hash table it's important to have a hash function 8 | * which mixes well and is fast. This package supplies such functions. 9 | * 10 | * The hash functions come in two flavors: the normal ones and the 11 | * stable ones. The normal ones can vary from machine-to-machine and 12 | * may change if we find better or faster hash algorithms in future. 13 | * The stable ones will always give the same results on any computer, 14 | * and on any version of this package. 15 | * 16 | * License: CC0 (Public domain) 17 | * Maintainer: Rusty Russell 18 | * Author: Bob Jenkins 19 | */ 20 | int main(int argc, char *argv[]) 21 | { 22 | if (argc != 2) 23 | return 1; 24 | 25 | if (strcmp(argv[1], "depends") == 0) { 26 | printf("ccan/build_assert\n"); 27 | return 0; 28 | } 29 | 30 | return 1; 31 | } 32 | -------------------------------------------------------------------------------- /ext/libmba/mba/dbug.h: -------------------------------------------------------------------------------- 1 | #ifndef MBA_DBUG_H 2 | #define MBA_DBUG_H 3 | 4 | /* dbug - resolve symbols and print stack traces w/ x86 GNUC 5 | */ 6 | 7 | #include 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | #ifndef LIBMBA_API 14 | #ifdef WIN32 15 | # ifdef LIBMBA_EXPORTS 16 | # define LIBMBA_API __declspec(dllexport) 17 | # else /* LIBMBA_EXPORTS */ 18 | # define LIBMBA_API __declspec(dllimport) 19 | # endif /* LIBMBA_EXPORTS */ 20 | #else /* WIN32 */ 21 | # define LIBMBA_API extern 22 | #endif /* WIN32 */ 23 | #endif /* LIBMBA_API */ 24 | 25 | extern int dbug_stacktrace(void **buf, int off, int n); 26 | extern unsigned char *dbug_resolve_symbol(void *sym, unsigned char *buf, unsigned char *blim); 27 | extern int dbug_sprint_stacktrace(unsigned char *str, 28 | unsigned char *slim, 29 | void **syms, 30 | int sn, 31 | const unsigned char *msg); 32 | extern int dbug_fprint_stacktrace(FILE *stream, int off, int n, const char *msg); 33 | 34 | #ifdef __cplusplus 35 | } 36 | #endif 37 | 38 | #endif /* MBA_DBUG_H */ 39 | -------------------------------------------------------------------------------- /ext/libmba/mba/suba.h: -------------------------------------------------------------------------------- 1 | #ifndef MBA_SUBA_H 2 | #define MBA_SUBA_H 3 | 4 | /* suba - sub-allocate memory from larger chunk of memory 5 | */ 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #ifndef LIBMBA_API 12 | #ifdef WIN32 13 | # ifdef LIBMBA_EXPORTS 14 | # define LIBMBA_API __declspec(dllexport) 15 | # else /* LIBMBA_EXPORTS */ 16 | # define LIBMBA_API __declspec(dllimport) 17 | # endif /* LIBMBA_EXPORTS */ 18 | #else /* WIN32 */ 19 | # define LIBMBA_API extern 20 | #endif /* WIN32 */ 21 | #endif /* LIBMBA_API */ 22 | 23 | #include 24 | #include 25 | 26 | #define SUBA_PTR_SIZE(ptr) ((ptr) ? (*((size_t *)(ptr) - 1)) : 0) 27 | 28 | extern struct allocator *suba_init(void *mem, size_t size, int rst, size_t mincell); 29 | extern void *suba_alloc(struct allocator *suba, size_t size, int zero); 30 | extern void *suba_realloc(struct allocator *suba, void *ptr, size_t size); 31 | extern int suba_free(void *suba, void *ptr); 32 | 33 | extern void *suba_addr(const struct allocator *suba, const ref_t ref); 34 | extern ref_t suba_ref(const struct allocator *suba, const void *ptr); 35 | 36 | #ifdef __cplusplus 37 | } 38 | #endif 39 | 40 | #endif /* MBA_SUBA_H */ 41 | 42 | -------------------------------------------------------------------------------- /strdiff.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | int 9 | main(int argc, char *argv[]) 10 | { 11 | const char *a = argv[1]; 12 | const char *b = argv[2]; 13 | int n, m, d; 14 | int sn, i; 15 | struct varray *ses = varray_new(sizeof(struct diff_edit), NULL); 16 | 17 | if (argc < 3) { 18 | fprintf(stderr, "usage: %s \n", argv[0]); 19 | return EXIT_FAILURE; 20 | } 21 | 22 | n = strlen(a); 23 | m = strlen(b); 24 | if ((d = diff(a, 0, n, b, 0, m, NULL, NULL, NULL, 0, ses, &sn, NULL)) == -1) { 25 | MMNO(errno); 26 | return EXIT_FAILURE; 27 | } 28 | 29 | printf("d=%d sn=%d\n", d, sn); 30 | for (i = 0; i < sn; i++) { 31 | struct diff_edit *e = varray_get(ses, i); 32 | 33 | switch (e->op) { 34 | case DIFF_MATCH: 35 | printf("MAT: "); 36 | fwrite(a + e->off, 1, e->len, stdout); 37 | break; 38 | case DIFF_INSERT: 39 | printf("INS: "); 40 | fwrite(b + e->off, 1, e->len, stdout); 41 | break; 42 | case DIFF_DELETE: 43 | printf("DEL: "); 44 | fwrite(a + e->off, 1, e->len, stdout); 45 | break; 46 | } 47 | printf("\n"); 48 | } 49 | 50 | varray_del(ses); 51 | return EXIT_SUCCESS; 52 | } 53 | 54 | -------------------------------------------------------------------------------- /ext/libmba/mba/diff.h: -------------------------------------------------------------------------------- 1 | #ifndef MBA_DIFF_H 2 | #define MBA_DIFF_H 3 | 4 | /* diff - compute a shortest edit script (SES) given two sequences 5 | */ 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #ifndef LIBMBA_API 12 | #ifdef WIN32 13 | # ifdef LIBMBA_EXPORTS 14 | # define LIBMBA_API __declspec(dllexport) 15 | # else /* LIBMBA_EXPORTS */ 16 | # define LIBMBA_API __declspec(dllimport) 17 | # endif /* LIBMBA_EXPORTS */ 18 | #else /* WIN32 */ 19 | # define LIBMBA_API extern 20 | #endif /* WIN32 */ 21 | #endif /* LIBMBA_API */ 22 | 23 | #include 24 | #include /* cmp_fn */ 25 | 26 | typedef const void *(*idx_fn)(const void *s, int idx, void *context); 27 | 28 | typedef enum { 29 | DIFF_MATCH = 1, 30 | DIFF_DELETE, 31 | DIFF_INSERT 32 | } diff_op; 33 | 34 | struct diff_edit { 35 | short op; 36 | int off; /* off into s1 if MATCH or DELETE but s2 if INSERT */ 37 | int len; 38 | }; 39 | 40 | /* consider alternate behavior for each NULL parameter 41 | */ 42 | extern int diff(const void *a, int aoff, int n, 43 | const void *b, int boff, int m, 44 | idx_fn idx, cmp_fn cmp, void *context, int dmax, 45 | struct varray *ses, int *sn, 46 | struct varray *buf); 47 | 48 | #ifdef __cplusplus 49 | } 50 | #endif 51 | 52 | #endif /* MBA_DIFF_H */ 53 | -------------------------------------------------------------------------------- /gendump.cc: -------------------------------------------------------------------------------- 1 | #include "refgenome2.hh" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "dnamisc.hh" 7 | #include 8 | #include 9 | #include 10 | #include "misc.hh" 11 | #include 12 | #include 13 | 14 | using namespace std; 15 | 16 | 17 | int main(int argc, char **argv) 18 | { 19 | if(argc < 3) { 20 | cerr<<"Syntax: gfflookup refgenome.fna chromosome offset1 offset2"<chromosome.getRange(atoi(argv[3]), atoi(argv[4]) - atoi(argv[3])); 26 | 27 | cout< 3 | #include 4 | #include "nucstore.hh" 5 | #include 6 | 7 | class ReferenceGenome 8 | { 9 | public: 10 | struct Chromosome 11 | { 12 | std::string fullname; 13 | uint32_t offset; 14 | NucleotideStore chromosome; 15 | }; 16 | 17 | ReferenceGenome(const boost::string_ref& fname, 18 | std::function idx=std::function()); 19 | 20 | std::string d_fname; 21 | NucleotideStore getRange(uint32_t offset, uint32_t len) const; 22 | const Chromosome* getChromosome(const std::string& name) const 23 | { 24 | if(!d_genome.count(name)) 25 | return 0; 26 | auto str=d_genome.find(name); 27 | return &str->second; 28 | } 29 | uint32_t numChromosomes() 30 | { 31 | return d_genome.size(); 32 | } 33 | 34 | uint32_t numNucleotides() const 35 | { 36 | if(d_lookup.empty()) 37 | return 0; 38 | return (*d_lookup.rbegin())->offset + (*d_lookup.rbegin())->chromosome.size(); 39 | } 40 | 41 | const std::map& getAllChromosomes() 42 | { 43 | return d_genome; 44 | } 45 | 46 | private: 47 | 48 | std::map d_genome; 49 | std::vector d_lookup; 50 | }; 51 | -------------------------------------------------------------------------------- /misc.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | 6 | void chomp(char* line); 7 | char* sfgets(char* p, int num, FILE* fp); 8 | void reverseNucleotides(std::string* nucleotides); 9 | uint64_t filesize(const char* name); 10 | bool stringfgets(FILE* fp, std::string* line); 11 | 12 | /** Rapid estimator of variance and mean of a series of doubles. 13 | API compatible with a, sadly, far slower boost::accumulator_set 14 | doing the same thing*/ 15 | class VarMeanEstimator 16 | { 17 | public: 18 | VarMeanEstimator() : N(0), xTot(0), x2Tot(0) {} 19 | void operator()(double val) 20 | { 21 | ++N; 22 | xTot += val; 23 | x2Tot += val*val; 24 | } 25 | bool valid() const 26 | { 27 | return N>0; 28 | } 29 | friend double mean(const VarMeanEstimator& vme); 30 | friend double variance(const VarMeanEstimator& vme); 31 | private: 32 | uint64_t N; 33 | double xTot; 34 | double x2Tot; 35 | }; 36 | 37 | //! extract 'mean' from a VarMeanEstimator 38 | inline double mean(const VarMeanEstimator& vme) 39 | { 40 | return vme.xTot/vme.N; 41 | } 42 | 43 | //! extract 'variance' from a VarMeanEstimator 44 | inline double variance(const VarMeanEstimator& vme) 45 | { 46 | return (vme.x2Tot - vme.xTot*vme.xTot/vme.N)/vme.N; 47 | } 48 | 49 | std::string compilerVersion(); 50 | void reverseNucleotides(std::string* nucleotides); 51 | -------------------------------------------------------------------------------- /ccan/build_assert/build_assert.h: -------------------------------------------------------------------------------- 1 | /* CC0 (Public domain) - see LICENSE file for details */ 2 | #ifndef CCAN_BUILD_ASSERT_H 3 | #define CCAN_BUILD_ASSERT_H 4 | 5 | /** 6 | * BUILD_ASSERT - assert a build-time dependency. 7 | * @cond: the compile-time condition which must be true. 8 | * 9 | * Your compile will fail if the condition isn't true, or can't be evaluated 10 | * by the compiler. This can only be used within a function. 11 | * 12 | * Example: 13 | * #include 14 | * ... 15 | * static char *foo_to_char(struct foo *foo) 16 | * { 17 | * // This code needs string to be at start of foo. 18 | * BUILD_ASSERT(offsetof(struct foo, string) == 0); 19 | * return (char *)foo; 20 | * } 21 | */ 22 | #define BUILD_ASSERT(cond) \ 23 | do { (void) sizeof(char [1 - 2*!(cond)]); } while(0) 24 | 25 | /** 26 | * BUILD_ASSERT_OR_ZERO - assert a build-time dependency, as an expression. 27 | * @cond: the compile-time condition which must be true. 28 | * 29 | * Your compile will fail if the condition isn't true, or can't be evaluated 30 | * by the compiler. This can be used in an expression: its value is "0". 31 | * 32 | * Example: 33 | * #define foo_to_char(foo) \ 34 | * ((char *)(foo) \ 35 | * + BUILD_ASSERT_OR_ZERO(offsetof(struct foo, string) == 0)) 36 | */ 37 | #define BUILD_ASSERT_OR_ZERO(cond) \ 38 | (sizeof(char [1 - 2*!(cond)]) - 1) 39 | 40 | #endif /* CCAN_BUILD_ASSERT_H */ 41 | -------------------------------------------------------------------------------- /tclap/Visitor.h: -------------------------------------------------------------------------------- 1 | 2 | /****************************************************************************** 3 | * 4 | * file: Visitor.h 5 | * 6 | * Copyright (c) 2003, Michael E. Smoot . 7 | * All rights reverved. 8 | * 9 | * See the file COPYING in the top directory of this distribution for 10 | * more information. 11 | * 12 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 13 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 15 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 17 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 18 | * DEALINGS IN THE SOFTWARE. 19 | * 20 | *****************************************************************************/ 21 | 22 | 23 | #ifndef TCLAP_VISITOR_H 24 | #define TCLAP_VISITOR_H 25 | 26 | namespace TCLAP { 27 | 28 | /** 29 | * A base class that defines the interface for visitors. 30 | */ 31 | class Visitor 32 | { 33 | public: 34 | 35 | /** 36 | * Constructor. Does nothing. 37 | */ 38 | Visitor() { } 39 | 40 | /** 41 | * Destructor. Does nothing. 42 | */ 43 | virtual ~Visitor() { } 44 | 45 | /** 46 | * Does nothing. Should be overridden by child. 47 | */ 48 | virtual void visit() { } 49 | }; 50 | 51 | } 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /stitcher.cc: -------------------------------------------------------------------------------- 1 | #include "refgenome.hh" 2 | #include "geneannotated.hh" 3 | #include 4 | #include "misc.hh" 5 | #include 6 | #include 7 | #include "dnamisc.hh" 8 | #include 9 | #include "fastqindex.hh" 10 | #include 11 | #include "stitchalg.hh" 12 | extern "C" { 13 | #include "hash.h" 14 | } 15 | 16 | using namespace std; 17 | 18 | int g_maxdepth; 19 | 20 | dnapos_t g_record=0; 21 | 22 | set > g_beenthere; 23 | 24 | string g_bestcontig; 25 | 26 | set g_candidates; 27 | 28 | 29 | // stitcher fasta startpos fastq fastq 30 | int main(int argc, char**argv) 31 | { 32 | if(argc < 4) { 33 | cerr<<"Syntax: stitcher reference.fasta startoffset|startsnippet endsnippet fastq fastq"< > > fhpos; 50 | 51 | FASTQReader* fqreader; 52 | 53 | for(int f = 4; f < argc; ++f) { 54 | fqreader = new FASTQReader(argv[f], 33); 55 | fhpos[fqreader]=indexFASTQ(fqreader, argv[f], chunklen); 56 | } 57 | setbuf(stdout, 0); 58 | doStitch(fhpos, startseed, endseed, 10000, chunklen, false); 59 | } 60 | 61 | -------------------------------------------------------------------------------- /ccan/build_assert/_info: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "config.h" 4 | 5 | /** 6 | * build_assert - routines for build-time assertions 7 | * 8 | * This code provides routines which will cause compilation to fail should some 9 | * assertion be untrue: such failures are preferable to run-time assertions, 10 | * but much more limited since they can only depends on compile-time constants. 11 | * 12 | * These assertions are most useful when two parts of the code must be kept in 13 | * sync: it is better to avoid such cases if possible, but seconds best is to 14 | * detect invalid changes at build time. 15 | * 16 | * For example, a tricky piece of code might rely on a certain element being at 17 | * the start of the structure. To ensure that future changes don't break it, 18 | * you would catch such changes in your code like so: 19 | * 20 | * Example: 21 | * #include 22 | * #include 23 | * 24 | * struct foo { 25 | * char string[5]; 26 | * int x; 27 | * }; 28 | * 29 | * static char *foo_string(struct foo *foo) 30 | * { 31 | * // This trick requires that the string be first in the structure 32 | * BUILD_ASSERT(offsetof(struct foo, string) == 0); 33 | * return (char *)foo; 34 | * } 35 | * 36 | * License: CC0 (Public domain) 37 | * Author: Rusty Russell 38 | */ 39 | int main(int argc, char *argv[]) 40 | { 41 | if (argc != 2) 42 | return 1; 43 | 44 | if (strcmp(argv[1], "depends") == 0) 45 | /* Nothing. */ 46 | return 0; 47 | 48 | return 1; 49 | } 50 | -------------------------------------------------------------------------------- /tclap/IgnoreRestVisitor.h: -------------------------------------------------------------------------------- 1 | 2 | /****************************************************************************** 3 | * 4 | * file: IgnoreRestVisitor.h 5 | * 6 | * Copyright (c) 2003, Michael E. Smoot . 7 | * All rights reverved. 8 | * 9 | * See the file COPYING in the top directory of this distribution for 10 | * more information. 11 | * 12 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 13 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 15 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 17 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 18 | * DEALINGS IN THE SOFTWARE. 19 | * 20 | *****************************************************************************/ 21 | 22 | 23 | #ifndef TCLAP_IGNORE_REST_VISITOR_H 24 | #define TCLAP_IGNORE_REST_VISITOR_H 25 | 26 | #include 27 | #include 28 | 29 | namespace TCLAP { 30 | 31 | /** 32 | * A Vistor that tells the CmdLine to begin ignoring arguments after 33 | * this one is parsed. 34 | */ 35 | class IgnoreRestVisitor: public Visitor 36 | { 37 | public: 38 | 39 | /** 40 | * Constructor. 41 | */ 42 | IgnoreRestVisitor() : Visitor() {} 43 | 44 | /** 45 | * Sets Arg::_ignoreRest. 46 | */ 47 | void visit() { Arg::beginIgnoring(); } 48 | }; 49 | 50 | } 51 | 52 | #endif 53 | -------------------------------------------------------------------------------- /saminfra.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include "antonie.hh" 5 | #include 6 | #include "fastq.hh" 7 | #include "zstuff.hh" 8 | 9 | //! Write SAM files, with support for paired-end read mappings 10 | class SAMWriter 11 | { 12 | public: 13 | SAMWriter(const std::string& fname, const std::string& genome, dnapos_t len); 14 | ~SAMWriter(); 15 | void write(dnapos_t pos, const FastQRead& fqfrag, int indel=0, int flags=0, const std::string& rnext="*", dnapos_t pnext=0, int32_t tlen=0 ); 16 | private: 17 | FILE* d_fp; 18 | std::string d_fname; 19 | std::string d_genomeName; 20 | }; 21 | 22 | 23 | //! Write BAM files, with support for paired-end read mappings 24 | class BAMWriter 25 | { 26 | public: 27 | BAMWriter(const std::string& fname, const std::string& genome, dnapos_t len); 28 | ~BAMWriter(); 29 | uint64_t write(dnapos_t pos, const FastQRead& fqfrag, int indel=0, int flags=0, const std::string& rnext="*", dnapos_t pnext=0, int32_t tlen=0 ); 30 | void qwrite(dnapos_t pos, const FastQRead& fqfrag, int indel=0, int flags=0, const std::string& rnext="*", dnapos_t pnext=0, int32_t tlen=0 ); 31 | void runQueue(StereoFASTQReader& sfq); 32 | private: 33 | 34 | std::string d_fname; 35 | std::string d_genomeName; 36 | BGZFWriter d_zw; 37 | FILE* d_baifp; 38 | struct Write 39 | { 40 | bool operator<(const Write& rhs) const 41 | { 42 | return pos < rhs.pos; 43 | } 44 | dnapos_t pos; 45 | uint64_t fpos; 46 | bool reversed; 47 | int indel; 48 | int flags; 49 | std::string rnext; 50 | dnapos_t pnext; 51 | int tlen; 52 | uint64_t voffset; 53 | unsigned int bin; 54 | }; 55 | std::vector d_queue; 56 | }; 57 | 58 | std::string bamCompress(const std::string& dna); 59 | -------------------------------------------------------------------------------- /geneannotated.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | //! A Gene annotation 10 | struct GeneAnnotation 11 | { 12 | boost::flyweight chromosome; 13 | std::string tag; 14 | boost::flyweight id; 15 | boost::flyweight parent; 16 | boost::flyweight type; 17 | std::string name; 18 | bool strand; 19 | uint64_t startPos; 20 | uint64_t stopPos; 21 | bool gene; 22 | }; 23 | 24 | inline bool operator<(const GeneAnnotation&A, const GeneAnnotation& B) 25 | { 26 | return A.startPos < B.startPos; 27 | } 28 | 29 | //! Provides GeneAnnotation objects as read from a GFF3 file 30 | class GeneAnnotationReader 31 | { 32 | public: 33 | GeneAnnotationReader(const std::string& fname); //!< Parse GFF3 from fname 34 | std::vector lookup(std::string_view chromosome, uint64_t pos); //!< Get all annotations for pos 35 | std::vector lookup(std::string_view chromosome, uint64_t pos1, uint64_t pos2); //!< Get all annotations for pos 36 | 37 | std::vector getAll(std::string_view chromosome); 38 | uint64_t size() const 39 | { 40 | size_t ret{0}; 41 | // for(const auto& ga : d_gas) 42 | // ret += ga.second.size(); 43 | return ret; 44 | } //!< Number of annotations known 45 | 46 | std::vector getChromosomes() 47 | { 48 | std::vector ret; 49 | for(const auto& ga : d_gas) 50 | ret.push_back(ga.first); 51 | return ret; 52 | } 53 | 54 | private: 55 | typedef IntervalTree gas_t; 56 | void parseGenBank(const std::string& fname); 57 | std::map d_gas; 58 | }; 59 | 60 | std::vector parseGenBankString(const std::string& bank); 61 | -------------------------------------------------------------------------------- /tclap/OptionalUnlabeledTracker.h: -------------------------------------------------------------------------------- 1 | 2 | 3 | /****************************************************************************** 4 | * 5 | * file: OptionalUnlabeledTracker.h 6 | * 7 | * Copyright (c) 2005, Michael E. Smoot . 8 | * All rights reverved. 9 | * 10 | * See the file COPYING in the top directory of this distribution for 11 | * more information. 12 | * 13 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 19 | * DEALINGS IN THE SOFTWARE. 20 | * 21 | *****************************************************************************/ 22 | 23 | 24 | #ifndef TCLAP_OPTIONAL_UNLABELED_TRACKER_H 25 | #define TCLAP_OPTIONAL_UNLABELED_TRACKER_H 26 | 27 | #include 28 | 29 | namespace TCLAP { 30 | 31 | class OptionalUnlabeledTracker 32 | { 33 | 34 | public: 35 | 36 | static void check( bool req, const std::string& argName ); 37 | 38 | static void gotOptional() { alreadyOptionalRef() = true; } 39 | 40 | static bool& alreadyOptional() { return alreadyOptionalRef(); } 41 | 42 | private: 43 | 44 | static bool& alreadyOptionalRef() { static bool ct = false; return ct; } 45 | }; 46 | 47 | 48 | inline void OptionalUnlabeledTracker::check( bool req, const std::string& argName ) 49 | { 50 | if ( OptionalUnlabeledTracker::alreadyOptional() ) 51 | throw( SpecificationException( 52 | "You can't specify ANY Unlabeled Arg following an optional Unlabeled Arg", 53 | argName ) ); 54 | 55 | if ( !req ) 56 | OptionalUnlabeledTracker::gotOptional(); 57 | } 58 | 59 | 60 | } // namespace TCLAP 61 | 62 | #endif 63 | -------------------------------------------------------------------------------- /tclap/Constraint.h: -------------------------------------------------------------------------------- 1 | 2 | /****************************************************************************** 3 | * 4 | * file: Constraint.h 5 | * 6 | * Copyright (c) 2005, Michael E. Smoot 7 | * All rights reverved. 8 | * 9 | * See the file COPYING in the top directory of this distribution for 10 | * more information. 11 | * 12 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 13 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 15 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 17 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 18 | * DEALINGS IN THE SOFTWARE. 19 | * 20 | *****************************************************************************/ 21 | 22 | #ifndef TCLAP_CONSTRAINT_H 23 | #define TCLAP_CONSTRAINT_H 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | namespace TCLAP { 33 | 34 | /** 35 | * The interface that defines the interaction between the Arg and Constraint. 36 | */ 37 | template 38 | class Constraint 39 | { 40 | 41 | public: 42 | /** 43 | * Returns a description of the Constraint. 44 | */ 45 | virtual std::string description() const =0; 46 | 47 | /** 48 | * Returns the short ID for the Constraint. 49 | */ 50 | virtual std::string shortID() const =0; 51 | 52 | /** 53 | * The method used to verify that the value parsed from the command 54 | * line meets the constraint. 55 | * \param value - The value that will be checked. 56 | */ 57 | virtual bool check(const T& value) const =0; 58 | 59 | /** 60 | * Destructor. 61 | * Silences warnings about Constraint being a base class with virtual 62 | * functions but without a virtual destructor. 63 | */ 64 | virtual ~Constraint() { ; } 65 | }; 66 | 67 | } //namespace TCLAP 68 | #endif 69 | -------------------------------------------------------------------------------- /test-nucstore_cc.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "dnamisc.hh" 3 | #include "nucstore.hh" 4 | #include 5 | 6 | BOOST_AUTO_TEST_SUITE(nucstore_hh) 7 | 8 | BOOST_AUTO_TEST_CASE(test_nucstore) { 9 | NucleotideStore ns; 10 | ns.append('A'); 11 | BOOST_CHECK_EQUAL(ns.get(0),'A'); 12 | ns.append('C'); 13 | BOOST_CHECK_EQUAL(ns.get(0),'A'); 14 | BOOST_CHECK_EQUAL(ns.get(1),'C'); 15 | ns.append('G'); 16 | BOOST_CHECK_EQUAL(ns.get(2),'G'); 17 | ns.append('T'); 18 | 19 | BOOST_CHECK_EQUAL(ns.get(3),'T'); 20 | 21 | BOOST_CHECK_EQUAL(ns.get(0),'A'); 22 | ns.append('C'); 23 | BOOST_CHECK_EQUAL(ns.get(0),'A'); 24 | BOOST_CHECK_EQUAL(ns.get(1),'C'); 25 | ns.append('G'); 26 | BOOST_CHECK_EQUAL(ns.get(2),'G'); 27 | ns.append('T'); 28 | 29 | BOOST_CHECK_EQUAL(ns.get(3),'T'); 30 | 31 | 32 | BOOST_CHECK_EQUAL(ns.size(), 7); 33 | 34 | // AACG TAACG 35 | 36 | NucleotideStore sep; 37 | sep.append("ACGTCGT"); 38 | BOOST_CHECK_EQUAL(ns, sep); 39 | 40 | 41 | sep.set(0, 'C'); 42 | BOOST_CHECK_EQUAL(sep.get(0), 'C'); 43 | 44 | 45 | sep.set(4, 'C'); 46 | BOOST_CHECK_EQUAL(sep.get(0), 'C'); 47 | 48 | 49 | 50 | } 51 | 52 | 53 | BOOST_AUTO_TEST_CASE(test_delta) { 54 | using namespace std; 55 | NucleotideStore a("ACGTTGCA"), b("ACGTTTCA"), c; 56 | auto ds = a.getDelta(b); 57 | cout< expected({{(uint32_t)5, 'T', NucleotideStore::Delta::Action::Replace}}); 64 | BOOST_CHECK(ds==expected); 65 | 66 | auto ds2= a.getDelta(c); 67 | for(const auto& d : ds2) { 68 | cout< 24 | #include 25 | #include 26 | 27 | /* 28 | 0 1 32768 29 | 1 2 65536 30 | 2 4 131072 31 | 3 8 262144 32 | 4 16 524288 33 | 5 32 1048576 2^5 is default VARRAY_INIT_SIZE 34 | 6 64 2097152 35 | 7 128 4194304 36 | 8 256 8388608 37 | 9 512 38 | 10 1024 39 | 11 2048 40 | 12 4096 41 | 13 8192 42 | 14 16384 43 | 15 32768 44 | 16 65536 45 | 17 131072 46 | 18 262144 47 | 19 524288 48 | 20 1048576 49 | 21 2097152 50 | 22 4194304 51 | 23 8388608 52 | */ 53 | 54 | #ifndef VARRAY_INIT_SIZE 55 | #define VARRAY_INIT_SIZE 5 56 | #endif 57 | 58 | struct varray { 59 | size_t size; /* element size */ 60 | ptrdiff_t al; /* relative offset of this object to allocator of bins */ 61 | ref_t bins[16]; /* 0 to 2^20 elements */ 62 | }; 63 | 64 | extern int varray_init(struct varray *va, size_t membsize, struct allocator *al); 65 | extern int varray_reinit(struct varray *va, struct allocator *al); 66 | extern int varray_deinit(struct varray *va); 67 | extern struct varray *varray_new(size_t membsize, struct allocator *al); 68 | extern int varray_del(void *va); 69 | extern int varray_release(struct varray *va, unsigned int from); 70 | extern void *varray_get(struct varray *va, unsigned int idx); 71 | extern int varray_index(struct varray *va, void *elem); 72 | extern void varray_iterate(void *va, iter_t *iter); 73 | extern void *varray_next(void *va, iter_t *iter); 74 | 75 | #ifdef __cplusplus 76 | } 77 | #endif 78 | 79 | #endif /* MBA_VARRAY_H */ 80 | -------------------------------------------------------------------------------- /digisplice.cc: -------------------------------------------------------------------------------- 1 | #include "refgenome.hh" 2 | #include "geneannotated.hh" 3 | #include 4 | #include "misc.hh" 5 | using namespace std; 6 | 7 | string casette{"GGCCTGGTGATGATGGCGGGATCGTTGTATATTTCTTGACACCTTTTCGGCATCGCCCTAAAATTCGGCGTCCTCATATTGTGTGAGGACGTTTTATTACGTGTTTACGAAGCAAAAGCTAAAACCAGGAGCTATTTAATGGCAACAGTTAACCAGCTGGTACGCAAACCACGTGCTCGCAAAGTTGCGAAAAGCAACGTGCCTGCGCTGGAAGCATGCCCGCAAAAACGTGGCGTATGTACTCGTGTATATACTACCACTCCTAAAAAACCGAACTCCGCGCTGCGTAAAGTATGCCGTGTTCGTCTGACTAACGGTTTCGAAGTGACTTCCTACATCGGTGGTGAAGGTCACAACCTGCAGGAGCACTCCGTGATCCTGATCCGTGGCGGTCGTGTTAAAGACCTCCCGGGTGTTCGTTACCACACCGTACGTGGTGCGCTTGACTGCTCCGGCGTTAAAGACCGTAAGCAGGCTCGTTCCAAGTATGGCGTGAAGCGTCCTAAGGCTTAAGGAGGACAATCATGATTGAACAAGATGGATTGCACGCAGGTTCTCCGGCCGCTTGGGTGGAGAGGCTATTCGGCTATGACTGGGCACAACAGACAATCGGCTGCTCTGATGCCGCCGTGTTCCGGCTGTCAGCGCAGGGGCGCCCGGTTCTTTTTGTCAAGACCGACCTGTCCGGTGCCCTGAATGAACTGCAGGACGAGGCAGCGCGGCTATCGTGGCTGGCCACGACGGGCGTTCCTTGCGCAGCTGTGCTCGACGTTGTCACTGAAGCGGGAAGGGACTGGCTGCTATTGGGCGAAGTGCCGGGGCAGGATCTCCTGTCATCTCACCTTGCTCCTGCCGAGAAAGTATCCATCATGGCTGATGCAATGCGGCGGCTGCATACGCTTGATCCGGCTACCTGCCCATTCGACCACCAAGCGAAACATCGCATCGAGCGAGCACGTACTCGGATGGAAGCCGGTCTTGTCGATCAGGATGATCTGGACGAAGAGCATCAGGGGCTCGCGCCAGCCGAACTGTTCGCCAGGCTCAAGGCGCGCATGCCCGACGGCGAGGATCTCGTCGTGACCCATGGCGATGCCTGCTTGCCGAATATCATGGTGGAAAATGGCCGCTTTTCTGGATTCATCGACTGTGGCCGGCTGGGTGTGGCGGACCGCTATCAGGACATAGCGTTGGCTACCCGTGATATTGCTGAAGAGCTTGGCGGCGAATGGGCTGACCGCTTCCTCGTGCTTTACGGTATCGCCGCTCCCGATTCGCAGCGCATCGCCTTCTATCGCCTTCTTGACGAGTTCTTCTGA"}; 8 | 9 | 10 | int main(int argc, char**argv) 11 | { 12 | ReferenceChromosome rg(argv[1]); 13 | // GeneAnnotationReader gar(argv[2]); 14 | cerr<"< 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | namespace TCLAP { 34 | 35 | class CmdLineInterface; 36 | class ArgException; 37 | 38 | /** 39 | * The interface that any output object must implement. 40 | */ 41 | class CmdLineOutput 42 | { 43 | 44 | public: 45 | 46 | /** 47 | * Virtual destructor. 48 | */ 49 | virtual ~CmdLineOutput() {} 50 | 51 | /** 52 | * Generates some sort of output for the USAGE. 53 | * \param c - The CmdLine object the output is generated for. 54 | */ 55 | virtual void usage(CmdLineInterface& c)=0; 56 | 57 | /** 58 | * Generates some sort of output for the version. 59 | * \param c - The CmdLine object the output is generated for. 60 | */ 61 | virtual void version(CmdLineInterface& c)=0; 62 | 63 | /** 64 | * Generates some sort of output for a failure. 65 | * \param c - The CmdLine object the output is generated for. 66 | * \param e - The ArgException that caused the failure. 67 | */ 68 | virtual void failure( CmdLineInterface& c, 69 | ArgException& e )=0; 70 | 71 | }; 72 | 73 | } //namespace TCLAP 74 | #endif 75 | -------------------------------------------------------------------------------- /ext/html/nvd3/LICENSE.md: -------------------------------------------------------------------------------- 1 | 2 | ##nvd3.js License 3 | 4 | Copyright (c) 2011, 2012 [Novus Partners, Inc.][novus] 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | 18 | [novus]: https://www.novus.com/ 19 | 20 | 21 | 22 | ##d3.js License 23 | 24 | Copyright (c) 2012, Michael Bostock 25 | All rights reserved. 26 | 27 | Redistribution and use in source and binary forms, with or without 28 | modification, are permitted provided that the following conditions are met: 29 | 30 | * Redistributions of source code must retain the above copyright notice, this 31 | list of conditions and the following disclaimer. 32 | 33 | * Redistributions in binary form must reproduce the above copyright notice, 34 | this list of conditions and the following disclaimer in the documentation 35 | and/or other materials provided with the distribution. 36 | 37 | * The name Michael Bostock may not be used to endorse or promote products 38 | derived from this software without specific prior written permission. 39 | 40 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 41 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 43 | DISCLAIMED. IN NO EVENT SHALL MICHAEL BOSTOCK BE LIABLE FOR ANY DIRECT, 44 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 45 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 46 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 47 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 48 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 49 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 50 | -------------------------------------------------------------------------------- /tclap/HelpVisitor.h: -------------------------------------------------------------------------------- 1 | 2 | /****************************************************************************** 3 | * 4 | * file: HelpVisitor.h 5 | * 6 | * Copyright (c) 2003, Michael E. Smoot . 7 | * All rights reverved. 8 | * 9 | * See the file COPYING in the top directory of this distribution for 10 | * more information. 11 | * 12 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 13 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 15 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 17 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 18 | * DEALINGS IN THE SOFTWARE. 19 | * 20 | *****************************************************************************/ 21 | 22 | #ifndef TCLAP_HELP_VISITOR_H 23 | #define TCLAP_HELP_VISITOR_H 24 | 25 | #include 26 | #include 27 | #include 28 | 29 | namespace TCLAP { 30 | 31 | /** 32 | * A Visitor object that calls the usage method of the given CmdLineOutput 33 | * object for the specified CmdLine object. 34 | */ 35 | class HelpVisitor: public Visitor 36 | { 37 | private: 38 | /** 39 | * Prevent accidental copying. 40 | */ 41 | HelpVisitor(const HelpVisitor& rhs); 42 | HelpVisitor& operator=(const HelpVisitor& rhs); 43 | 44 | protected: 45 | 46 | /** 47 | * The CmdLine the output will be generated for. 48 | */ 49 | CmdLineInterface* _cmd; 50 | 51 | /** 52 | * The output object. 53 | */ 54 | CmdLineOutput** _out; 55 | 56 | public: 57 | 58 | /** 59 | * Constructor. 60 | * \param cmd - The CmdLine the output will be generated for. 61 | * \param out - The type of output. 62 | */ 63 | HelpVisitor(CmdLineInterface* cmd, CmdLineOutput** out) 64 | : Visitor(), _cmd( cmd ), _out( out ) { } 65 | 66 | /** 67 | * Calls the usage method of the CmdLineOutput for the 68 | * specified CmdLine. 69 | */ 70 | void visit() { (*_out)->usage(*_cmd); throw ExitException(0); } 71 | 72 | }; 73 | 74 | } 75 | 76 | #endif 77 | -------------------------------------------------------------------------------- /tclap/VersionVisitor.h: -------------------------------------------------------------------------------- 1 | // -*- Mode: c++; c-basic-offset: 4; tab-width: 4; -*- 2 | 3 | /****************************************************************************** 4 | * 5 | * file: VersionVisitor.h 6 | * 7 | * Copyright (c) 2003, Michael E. Smoot . 8 | * All rights reverved. 9 | * 10 | * See the file COPYING in the top directory of this distribution for 11 | * more information. 12 | * 13 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 19 | * DEALINGS IN THE SOFTWARE. 20 | * 21 | *****************************************************************************/ 22 | 23 | 24 | #ifndef TCLAP_VERSION_VISITOR_H 25 | #define TCLAP_VERSION_VISITOR_H 26 | 27 | #include 28 | #include 29 | #include 30 | 31 | namespace TCLAP { 32 | 33 | /** 34 | * A Vistor that will call the version method of the given CmdLineOutput 35 | * for the specified CmdLine object and then exit. 36 | */ 37 | class VersionVisitor: public Visitor 38 | { 39 | private: 40 | /** 41 | * Prevent accidental copying 42 | */ 43 | VersionVisitor(const VersionVisitor& rhs); 44 | VersionVisitor& operator=(const VersionVisitor& rhs); 45 | 46 | protected: 47 | 48 | /** 49 | * The CmdLine of interest. 50 | */ 51 | CmdLineInterface* _cmd; 52 | 53 | /** 54 | * The output object. 55 | */ 56 | CmdLineOutput** _out; 57 | 58 | public: 59 | 60 | /** 61 | * Constructor. 62 | * \param cmd - The CmdLine the output is generated for. 63 | * \param out - The type of output. 64 | */ 65 | VersionVisitor( CmdLineInterface* cmd, CmdLineOutput** out ) 66 | : Visitor(), _cmd( cmd ), _out( out ) { } 67 | 68 | /** 69 | * Calls the version method of the output object using the 70 | * specified CmdLine. 71 | */ 72 | void visit() { 73 | (*_out)->version(*_cmd); 74 | throw ExitException(0); 75 | } 76 | 77 | }; 78 | 79 | } 80 | 81 | #endif 82 | -------------------------------------------------------------------------------- /ext/libmba/mba/msgno.h: -------------------------------------------------------------------------------- 1 | #ifndef MBA_MSGNO_H 2 | #define MBA_MSGNO_H 3 | 4 | /* msgno - managing error codes and associated messages across 5 | * separate C libraries 6 | */ 7 | 8 | #ifdef __cplusplus 9 | extern "C" { 10 | #endif 11 | 12 | #ifndef LIBMBA_API 13 | #ifdef WIN32 14 | # ifdef LIBMBA_EXPORTS 15 | # define LIBMBA_API __declspec(dllexport) 16 | # else /* LIBMBA_EXPORTS */ 17 | # define LIBMBA_API __declspec(dllimport) 18 | # endif /* LIBMBA_EXPORTS */ 19 | #else /* WIN32 */ 20 | # define LIBMBA_API extern 21 | #endif /* WIN32 */ 22 | #endif /* LIBMBA_API */ 23 | 24 | #define STR0(s) #s 25 | #define STR1(s) STR0(s) 26 | #define LINE_STRING STR1(__LINE__) 27 | #if (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) 28 | #define LOC0 __FILE__ ":" LINE_STRING ":" 29 | #define LOC1 __func__ 30 | #else 31 | #define LOC0 __FILE__ ":" 32 | #define LOC1 LINE_STRING 33 | #endif 34 | 35 | #define MMSG msgno_loc0(LOC0, LOC1); msgno_mmsg0 36 | #define MMNO msgno_loc0(LOC0, LOC1); msgno_mmno0 37 | #define MMNF msgno_loc0(LOC0, LOC1); msgno_mmnf0 38 | #define PMSG msgno_loc0("!" LOC0, LOC1); msgno_amsg0 39 | #define PMNO msgno_loc0("!" LOC0, LOC1); msgno_amno0 40 | #define PMNF msgno_loc0("!" LOC0, LOC1); msgno_amnf0 41 | #define AMSG msgno_loc0(LOC0, LOC1); msgno_amsg0 42 | #define AMNO msgno_loc0(LOC0, LOC1); msgno_amno0 43 | #define AMNF msgno_loc0(LOC0, LOC1); msgno_amnf0 44 | 45 | #define MCLR (msgno_buf[msgno_buf_idx = 0] = 0) 46 | 47 | #define NULL_POINTER_ERR msgno_builtin_codes[0].msgno 48 | 49 | struct msgno_entry { 50 | int msgno; 51 | const char *msg; 52 | }; 53 | 54 | extern struct msgno_entry msgno_builtin_codes[]; 55 | extern char msgno_buf[]; 56 | extern int msgno_buf_idx; 57 | extern int msgno_append(const char *src, int n); 58 | extern int msgno_loc0(const char *loc0, const char *loc1); 59 | extern int msgno_mmsg0(const char *fmt, ...); 60 | extern int msgno_mmno0(int msgno); 61 | extern int msgno_mmnf0(int msgno, const char *fmt, ...); 62 | extern int msgno_amsg0(const char *fmt, ...); 63 | extern int msgno_amno0(int msgno); 64 | extern int msgno_amnf0(int msgno, const char *fmt, ...); 65 | extern int msgno_hdlr_stderr(const char *fmt, ...); 66 | 67 | extern int (*msgno_hdlr)(const char *fmt, ...); 68 | extern int msgno_add_codes(struct msgno_entry *lst); 69 | extern const char *msgno_msg(int msgno); 70 | 71 | #ifdef __cplusplus 72 | } 73 | #endif 74 | 75 | #endif /* MBA_MSGNO_H */ 76 | -------------------------------------------------------------------------------- /fastq.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "zstuff.hh" 7 | 8 | //! Represents a FastQRead. Can be reversed or not. 9 | struct FastQRead 10 | { 11 | FastQRead() : reversed(false), position(0) {} 12 | std::string d_nucleotides; 13 | std::string d_quality; 14 | std::string d_header; 15 | std::string getNameFromHeader() const; 16 | bool exceedsQuality(unsigned int); 17 | std::string getSangerQualityString() const; 18 | void reverse(); 19 | bool reversed; 20 | uint64_t position; //!< Position in the source file. The 64 bits may encode the file too, it is not a number for the end user to use. Feed it to a FastQReader. 21 | 22 | bool operator<(const FastQRead& rhs) const 23 | { 24 | return std::tie(d_nucleotides, d_quality, reversed, position) < 25 | std::tie(rhs.d_nucleotides, rhs.d_quality, rhs.reversed, rhs.position); 26 | } 27 | 28 | }; 29 | 30 | //! Reads a single FASTQ file, and can seek in it. Does adapation of quality scores (Sanger by default) and and can also snip off first n or last n bases. 31 | class FASTQReader 32 | { 33 | public: 34 | FASTQReader(const std::string& str, unsigned int qoffset); 35 | void setTrim(unsigned int trimLeft, unsigned int trimRight) 36 | { 37 | d_snipLeft = trimLeft; 38 | d_snipRight = trimRight; 39 | } 40 | void seek(uint64_t pos) 41 | { 42 | d_reader->seek(pos); 43 | } 44 | uint64_t estimateReads(); 45 | unsigned int getRead(FastQRead* fq); //!< Get a FastQRead, return number of bytes read 46 | private: 47 | unsigned int d_qoffset; 48 | unsigned int d_snipLeft, d_snipRight; 49 | std::unique_ptr d_reader; 50 | }; 51 | 52 | //! Reads FASTQs from two (synchronised) files at a time. Does magic with 64 bits offsets to encode which of the two FASTQReader to read from. 53 | class StereoFASTQReader 54 | { 55 | public: 56 | StereoFASTQReader(const std::string& name1, const std::string& name2, 57 | unsigned int qoffset) : d_fq1(name1, qoffset), d_fq2(name2, qoffset) 58 | {} 59 | 60 | void setTrim(unsigned int trimLeft, unsigned int trimRight); 61 | void seek(uint64_t pos); 62 | uint64_t estimateReads(); 63 | unsigned int getRead(uint64_t pos, FastQRead* fq2); 64 | unsigned int getReadPair(FastQRead* fq1, FastQRead* fq2); 65 | private: 66 | FASTQReader d_fq1, d_fq2; 67 | static uint64_t s_mask; 68 | }; 69 | -------------------------------------------------------------------------------- /gffedit.cc: -------------------------------------------------------------------------------- 1 | #include "refgenome.hh" 2 | #include "geneannotated.hh" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "misc.hh" 11 | 12 | using namespace std; 13 | 14 | // gffedit fasta gff newgff [insertpos insertlen] 15 | int main(int argc, char **argv) 16 | { 17 | ReferenceChromosome rg(argv[1]); 18 | FILE* fp =fopen(argv[2], "rb"); 19 | if(!fp) 20 | throw runtime_error("Unable to open '"+string(argv[2])+"' for reading GFF3: "+string(strerror(errno))); 21 | 22 | string line; 23 | 24 | map scounts; 25 | ofstream newgff(argv[3]); 26 | dnapos_t startInsert=argc > 4 ? atoi(argv[4]) : 0; 27 | int shiftInsert=argc > 5 ? atoi(argv[5]) : 0; 28 | while(stringfgets(fp, &line)) { 29 | GeneAnnotation ga; 30 | if(!line.empty() && line[0]=='#') { 31 | newgff< startInsert) 48 | ga.startPos += shiftInsert; 49 | newgff< startInsert) 54 | ga.stopPos += shiftInsert; 55 | 56 | newgff<> rcounts; 81 | int totCount=0; 82 | for(const auto& c : scounts) { 83 | rcounts.push_back({c.second, c.first}); 84 | totCount+=c.second; 85 | } 86 | sort(rcounts.begin(), rcounts.end()); 87 | for(auto c = rcounts.rbegin(); c != rcounts.rend(); ++c) { 88 | cout<<100.0*c->first/totCount<<"%\t"<second<<'\n'; 89 | if(c-rcounts.rbegin() > 10) 90 | break; 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /nucstore.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | extern "C" { 7 | #include "hash.h" 8 | } 9 | 10 | class NucleotideStore 11 | { 12 | public: 13 | explicit NucleotideStore(const boost::string_ref& in) 14 | { 15 | append(in); 16 | } 17 | NucleotideStore() {} 18 | void append(char c); 19 | void append(const boost::string_ref& line); 20 | char get(size_t pos) const; 21 | char operator[](size_t pos) const 22 | { 23 | return get(pos); 24 | } 25 | void set(size_t pos, char c); 26 | NucleotideStore getRange(size_t pos, size_t len) const; 27 | NucleotideStore getRC() const; 28 | size_t size() const 29 | { 30 | return 4*d_storage.size() + bitpos/2; 31 | } 32 | 33 | struct Delta 34 | { 35 | uint32_t pos; 36 | char o; 37 | enum class Action {Replace, Delete, Insert} a; 38 | bool operator==(const Delta& rhs) const 39 | { 40 | return pos==rhs.pos && o==rhs.o && a==rhs.a; 41 | } 42 | }; 43 | 44 | std::vector getDelta(const NucleotideStore& b, double mispen=1, double gappen=2, double skwpen=0) const; 45 | void applyDelta(std::vector& delta); 46 | size_t hash() const 47 | { 48 | /* 49 | if(d_storage.size()==4) { 50 | uint32_t ret; 51 | memcpy((char*)&ret, d_storage.c_str(), 4); 52 | return ret; 53 | } 54 | */ 55 | return qhash(d_storage.c_str(), d_storage.size(), bitpos ? d_curval : 0); 56 | } 57 | 58 | size_t overlap(const NucleotideStore& rhs) const; 59 | size_t fuzOverlap(const NucleotideStore& rhs, int ratio) const; 60 | 61 | bool isCanonical() const 62 | { 63 | return (*this < getRC()); 64 | } 65 | bool operator==(const NucleotideStore& rhs) const 66 | { 67 | return d_storage == rhs.d_storage && bitpos == rhs.bitpos && d_curval == rhs.d_curval; 68 | } 69 | 70 | bool operator<(const NucleotideStore& rhs) const 71 | { 72 | return d_storage < rhs.d_storage; // XXX ONLY WORKS FOR EQUAL LENGTH, NO CURVAL 73 | } 74 | 75 | static char getVal(char c); 76 | std::string getString() const { return d_storage; } 77 | void setString(const std::string& str) { d_storage = str; } 78 | std::string toASCII() const; 79 | private: 80 | uint8_t d_curval{0}; 81 | uint8_t bitpos{0}; 82 | std::string d_storage; 83 | }; 84 | 85 | std::ostream& operator<<(std::ostream& os, const NucleotideStore& ns); 86 | std::ostream& operator<<(std::ostream& os, const NucleotideStore::Delta& delta); 87 | -------------------------------------------------------------------------------- /ext/libmba/mba/hashmap.h: -------------------------------------------------------------------------------- 1 | #ifndef MBA_HASHMAP_H 2 | #define MBA_HASHMAP_H 3 | 4 | /* hashmap - a rehashing hash map 5 | */ 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #ifndef LIBMBA_API 12 | #ifdef WIN32 13 | # ifdef LIBMBA_EXPORTS 14 | # define LIBMBA_API __declspec(dllexport) 15 | # else /* LIBMBA_EXPORTS */ 16 | # define LIBMBA_API __declspec(dllimport) 17 | # endif /* LIBMBA_EXPORTS */ 18 | #else /* WIN32 */ 19 | # define LIBMBA_API extern 20 | #endif /* WIN32 */ 21 | #endif /* LIBMBA_API */ 22 | 23 | #include 24 | #include 25 | 26 | #if USE_WCHAR 27 | #define hash_text hash_wcs 28 | #define cmp_text cmp_wcs 29 | #else 30 | #define hash_text hash_str 31 | #define cmp_text cmp_str 32 | #endif 33 | 34 | typedef unsigned long (*hash_fn)(const void *object, void *context); 35 | typedef int (*cmp_fn)(const void *object1, const void *object2, void *context); 36 | 37 | extern const int table_sizes[]; 38 | 39 | struct entry; 40 | 41 | struct hashmap { 42 | int table_size_index; 43 | ref_t hash; 44 | ref_t cmp; 45 | ref_t context; 46 | unsigned int size; 47 | unsigned int load_factor_high; 48 | unsigned int load_factor_low; 49 | ptrdiff_t al; 50 | ref_t table; 51 | }; 52 | 53 | extern unsigned long hash_str(const void *str, void *context); 54 | extern unsigned long hash_wcs(const void *wcs, void *context); 55 | extern int cmp_str(const void *object1, const void *object2, void *context); 56 | extern int cmp_wcs(const void *object1, const void *object2, void *context); 57 | 58 | extern int hashmap_init(struct hashmap *h, 59 | unsigned int load_factor, 60 | hash_fn hash, 61 | cmp_fn cmp, 62 | void *context, 63 | struct allocator *al); 64 | 65 | extern int hashmap_deinit(struct hashmap *h, del_fn key_del, del_fn data_del, void *context); 66 | extern struct hashmap *hashmap_new(hash_fn hash, cmp_fn cmp, void *context, struct allocator *al); 67 | extern int hashmap_del(struct hashmap *h, del_fn key_del, del_fn data_del, void *context); 68 | extern int hashmap_clear(struct hashmap *h, del_fn key_del, del_fn data_del, void *context); 69 | extern int hashmap_clean(struct hashmap *h); 70 | 71 | extern int hashmap_put(struct hashmap *h, void *key, void *data); 72 | extern int hashmap_is_empty(struct hashmap *h); 73 | extern unsigned int hashmap_size(struct hashmap *h); 74 | extern void *hashmap_get(const struct hashmap *h, const void *key); 75 | extern void hashmap_iterate(void *h, iter_t *iter); 76 | extern void *hashmap_next(void *h, iter_t *iter); 77 | extern int hashmap_remove(struct hashmap *h, void **key, void **data); 78 | 79 | #ifdef __cplusplus 80 | } 81 | #endif 82 | 83 | #endif /* MBA_HASHMAP_H */ 84 | 85 | -------------------------------------------------------------------------------- /misc.cc: -------------------------------------------------------------------------------- 1 | #include "misc.hh" 2 | #include 3 | #include 4 | #include 5 | using namespace std; 6 | #include 7 | #include 8 | #include 9 | 10 | //! read a line of text from a FILE* to a std::string, returns false on 'no data' 11 | bool stringfgets(FILE* fp, std::string* line) 12 | { 13 | char buffer[1024]; 14 | line->clear(); 15 | 16 | do { 17 | if(!fgets(buffer, sizeof(buffer), fp)) 18 | return !line->empty(); 19 | 20 | line->append(buffer); 21 | } while(!strchr(buffer, '\n')); 22 | return true; 23 | } 24 | 25 | 26 | uint64_t filesize(const char* name) 27 | { 28 | struct stat buf; 29 | if(!stat(name, &buf)) { 30 | return buf.st_size; 31 | } 32 | return 0; 33 | } 34 | 35 | 36 | 37 | char* sfgets(char* p, int num, FILE* fp) 38 | { 39 | char *ret = fgets(p, num, fp); 40 | if(!ret) 41 | throw std::runtime_error("Unexpected EOF"); 42 | return ret; 43 | } 44 | 45 | void chomp(char* line) 46 | { 47 | char *p; 48 | p = strchr(line, '\r'); 49 | if(p)*p=0; 50 | p = strchr(line, '\n'); 51 | if(p)*p=0; 52 | } 53 | #if 0 54 | // thanks jeff sipek 55 | static void rev_and_comp_tbl_small(const char *in, char *out, size_t len) 56 | { 57 | const char tbl[8] = { 58 | [1] = 'T', 59 | [4] = 'A', 60 | [3] = 'G', 61 | [7] = 'C', 62 | }; 63 | 64 | if (!in || !out || !len) 65 | return; 66 | 67 | out[len] = '\0'; 68 | 69 | while (len) { 70 | *out = tbl[(int) in[len - 1] & 0x7]; 71 | 72 | len--; 73 | out++; 74 | } 75 | } 76 | #endif 77 | 78 | void reverseNucleotides(std::string* nucleotides) 79 | { 80 | std::reverse(nucleotides->begin(), nucleotides->end()); 81 | for(string::iterator iter = nucleotides->begin(); iter != nucleotides->end(); ++iter) { 82 | if(*iter == 'C') 83 | *iter = 'G'; 84 | else if(*iter == 'G') 85 | *iter = 'C'; 86 | else if(*iter == 'A') 87 | *iter = 'T'; 88 | else if(*iter == 'T') 89 | *iter = 'A'; 90 | } 91 | } 92 | 93 | string compilerVersion() 94 | { 95 | #if defined(__clang__) 96 | return string("clang " __clang_version__); 97 | #elif defined(__GNUC__) 98 | return string("gcc " __VERSION__); 99 | #elif defined(_MSC_FULL_VER) 100 | return string("Microsoft Visual Studio " + boost::lexical_cast(_MSC_FULL_VER)); 101 | #else // add other compilers here 102 | return string("Unknown compiler"); 103 | #endif 104 | } 105 | -------------------------------------------------------------------------------- /support.js: -------------------------------------------------------------------------------- 1 | function getPoints(item1, name1, item2, name2, item3, name3, item4, name4) { 2 | var ret = [ 3 | { 4 | values: [], 5 | key: name1, 6 | color: "#ff7f0e", 7 | }]; 8 | 9 | for (var i = 0; i < item1.length; i++) { 10 | ret[0].values.push({x: item1[i][0], y: item1[i][1] }); 11 | } 12 | if(typeof item2 != 'undefined') { 13 | ret.push({ values: [], key: name2, color: "#0f7f0e" }); 14 | for (var i = 0; i < item2.length; i++) { 15 | ret[1].values.push({x: item2[i][0], y: item2[i][1] }); 16 | } 17 | } 18 | 19 | if(typeof item3 != 'undefined') { 20 | ret.push({ values: [], key: name3, color: "#cccccc" }); 21 | for (var i = 0; i < item3.length; i++) { 22 | ret[2].values.push({x: item3[i][0], y: item3[i][1] }); 23 | } 24 | } 25 | 26 | if(typeof item4 != 'undefined') { 27 | ret.push({ values: [], key: name4, color: "#cccccc" }); 28 | for (var i = 0; i < item4.length; i++) { 29 | ret[3].values.push({x: item4[i][0], y: item4[i][1] }); 30 | } 31 | } 32 | 33 | return ret; 34 | } 35 | 36 | 37 | function getPoints5(item1, name1, item2, name2, item3, name3, item4, name4, item5, name5, item6, name6) { 38 | var ret = [ 39 | { 40 | values: [], 41 | key: name1, 42 | color: "#ff7f0e", 43 | }]; 44 | 45 | for (var i = 0; i < item1.length; i++) { 46 | ret[0].values.push({x: item1[i][0], y: item1[i][1] }); 47 | } 48 | if(typeof item2 != 'undefined') { 49 | ret.push({ values: [], key: name2, color: "#ff00ff" }); 50 | for (var i = 0; i < item2.length; i++) { 51 | ret[1].values.push({x: item2[i][0], y: item2[i][1] }); 52 | } 53 | } 54 | 55 | if(typeof item3 != 'undefined') { 56 | ret.push({ values: [], key: name3, color: "#ff0000" }); 57 | for (var i = 0; i < item3.length; i++) { 58 | ret[2].values.push({x: item3[i][0], y: item3[i][1] }); 59 | } 60 | } 61 | 62 | if(typeof item4 != 'undefined') { 63 | ret.push({ values: [], key: name4, color: "#00ff00" }); 64 | for (var i = 0; i < item4.length; i++) { 65 | ret[3].values.push({x: item4[i][0], y: item4[i][1] }); 66 | } 67 | } 68 | 69 | if(typeof item5 != 'undefined') { 70 | ret.push({ values: [], key: name5, color: "#0000ff" }); 71 | for (var i = 0; i < item5.length; i++) { 72 | ret[4].values.push({x: item5[i][0], y: item5[i][1] }); 73 | } 74 | } 75 | 76 | if(typeof item6 != 'undefined') { 77 | ret.push({ values: [], key: name6, color: "#000000" }); 78 | for (var i = 0; i < item6.length; i++) { 79 | ret[5].values.push({x: item6[i][0], y: item6[i][1] }); 80 | } 81 | } 82 | 83 | 84 | return ret; 85 | } 86 | 87 | -------------------------------------------------------------------------------- /refgenome2.cc: -------------------------------------------------------------------------------- 1 | #include "refgenome2.hh" 2 | #include 3 | #include 4 | #include 5 | #include "misc.hh" 6 | 7 | using namespace std; 8 | 9 | NucleotideStore ReferenceGenome::getRange(uint32_t offset, uint32_t len) const 10 | { 11 | auto iter=std::upper_bound(d_lookup.begin(), d_lookup.end(), offset, [](uint32_t offset, const auto& b) { 12 | return offset< b->offset; 13 | }); 14 | 15 | if(iter == d_lookup.end()) 16 | throw std::range_error("Could not find chromosome for offset "+std::to_string(offset)+" and length "+std::to_string(len)); 17 | --iter; 18 | if((*iter)->offset <= offset && offset < (*iter)->offset + (*iter)->chromosome.size()) 19 | return (*iter)->chromosome.getRange(offset - (*iter)->offset, len); 20 | else 21 | throw std::range_error("Could not find chromosome for offset "+std::to_string(offset)+" and length "+std::to_string(len)); 22 | } 23 | 24 | ReferenceGenome::ReferenceGenome(const boost::string_ref& fname, std::function idx) : d_fname(fname) 25 | { 26 | FILE* fp = fopen(d_fname.c_str(), "rb"); 27 | if(!fp) 28 | throw runtime_error("Unable to open reference genome file '"+d_fname+"'"); 29 | 30 | 31 | char line[256]=""; 32 | string name; 33 | ReferenceGenome::Chromosome* chromosome=0; 34 | 35 | vector running; 36 | uint32_t seenSoFar=0; 37 | 38 | while(fgets(line, sizeof(line), fp)) { 39 | chomp(line); 40 | 41 | if(line[0] == '>') { 42 | if(chromosome && idx) { 43 | running.emplace_back(idx, chromosome, name); 44 | } 45 | 46 | string fullname=line+1; 47 | 48 | char* spacepos=strchr(line+1, ' '); 49 | 50 | if(spacepos) 51 | *spacepos=0; 52 | name=line+1; 53 | 54 | if(chromosome) 55 | seenSoFar += chromosome->chromosome.size(); 56 | d_genome[name].offset = seenSoFar; 57 | d_genome[name].fullname = fullname; 58 | 59 | chromosome = &d_genome[name]; 60 | 61 | cout<<"Reading chromosome "<chromosome.append(line); 66 | } 67 | catch(std::exception& e) { 68 | cerr<<"Problem storing line "<offset < b->offset; 84 | }); 85 | 86 | 87 | 88 | cout<<"Done reading, awaiting threads"< 24 | 25 | #define ALAL(a) ((a) && (a) != stdlib_allocator ? (a) : (global_allocator ? global_allocator : 0)) 26 | #define ALREF(a,p) ((ref_t)((p) ? (char *)(p) - (char *)ALAL(a) : 0)) 27 | #define ALADR(a,r) ((void *)((r) ? (char *)ALAL(a) + (r) : NULL)) 28 | 29 | typedef size_t ref_t; /* suba offset from start of memory to object */ 30 | 31 | struct allocator; 32 | 33 | typedef void *(*alloc_fn)(struct allocator *al, size_t size, int flags); 34 | typedef void *(*realloc_fn)(struct allocator *al, void *obj, size_t size); 35 | typedef int (*free_fn)(void *al, void *obj); 36 | typedef int (*reclaim_fn)(struct allocator *al, void *arg, int attempt); 37 | typedef void *(*new_fn)(void *context, size_t size, int flags); 38 | typedef int (*del_fn)(void *context, void *object); 39 | 40 | struct allocator { 41 | unsigned char magic[8]; /* suba header identifier */ 42 | ref_t tail; /* offset to first cell in free list */ 43 | size_t mincell; /* min cell size must be at least sizeof cell */ 44 | size_t size; /* total size of memory area */ 45 | size_t alloc_total; /* total bytes utilized from this allocator */ 46 | size_t free_total; /* total bytes released from this allocator */ 47 | size_t size_total; /* total bytes requested from this allocator */ 48 | /* utilization = size_total / alloc_total * 100 49 | * e.g. 50000.0 / 50911.0 * 100.0 = 98.2% 50 | */ 51 | size_t max_free; /* for debugging - any cell larger throws err */ 52 | alloc_fn alloc; 53 | realloc_fn realloc; 54 | free_fn free; 55 | /* for reaping memory from pool, varray, etc */ 56 | reclaim_fn reclaim; 57 | void *reclaim_arg; 58 | int reclaim_depth; 59 | ref_t userref; 60 | }; 61 | 62 | extern struct allocator *global_allocator; 63 | extern struct allocator *stdlib_allocator; 64 | 65 | extern void *allocator_alloc(struct allocator *al, size_t size, int flags); 66 | extern void *allocator_realloc(struct allocator *al, void *obj, size_t size); 67 | extern int allocator_free(void *al, void *obj); 68 | extern void allocator_set_reclaim(struct allocator *al, reclaim_fn recl, void *arg); 69 | 70 | #ifdef __cplusplus 71 | } 72 | #endif 73 | 74 | #endif /* MBA_ALLOCATOR_H */ 75 | 76 | -------------------------------------------------------------------------------- /dino.cc: -------------------------------------------------------------------------------- 1 | // read all input files, output for each line of text in which of the input files it was found 2 | // public domain code by bert.hubert@netherlabs.nl 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | using namespace std; 12 | 13 | // this allows us to make a Case Insensitive container 14 | struct CIStringCompare: public std::binary_function 15 | { 16 | bool operator()(const string& a, const string& b) const 17 | { 18 | if(std::all_of(a.begin(), a.end(), ::isdigit) && 19 | std::all_of(b.begin(), b.end(), ::isdigit)) 20 | return atoi(a.c_str()) < atoi(b.c_str()); 21 | return strcasecmp(a.c_str(), b.c_str()) < 0; 22 | } 23 | }; 24 | 25 | int main(int argc, char**argv) 26 | { 27 | typedef map, CIStringCompare> presence_t; 28 | presence_t presence; 29 | 30 | string line; 31 | cout << '\t'; 32 | for(int n = 1; n < argc; ++n) { 33 | cout << argv[n] << '\t'; 34 | ifstream ifs(argv[n]); 35 | if(!ifs) { 36 | cerr<<"Unable to open '"<(argc-1))).first; 47 | } 48 | iter->second[n-1]=1; 49 | } 50 | } 51 | cout << '\n'; 52 | 53 | // this is where we store the reverse map, 'presence groups', so which lines where present in file1, but not file2 etc 54 | typedef map, vector > revpresence_t; 55 | revpresence_t revpresence; 56 | 57 | for(const auto& val : presence) { 58 | revpresence[val.second].push_back(val.first); 59 | cout << val.first << '\t'; 60 | for (boost::dynamic_bitset<>::size_type i = 0; i < val.second.size(); ++i) { 61 | cout << val.second[i] << '\t'; 62 | } 63 | cout << endl; 64 | } 65 | 66 | cout << "\nPer group output\t\n"; 67 | for(const auto& val : revpresence) { 68 | cout<<"\nGroup: \t"; 69 | for (boost::dynamic_bitset<>::size_type i = 0; i < val.first.size(); ++i) { 70 | cout << val.first[i] << '\t'; 71 | } 72 | cout << endl << " \t"; 73 | for (boost::dynamic_bitset<>::size_type i = 0; i < val.first.size(); ++i) { 74 | if(val.first[i]) 75 | cout< 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | //! Virtual base for seekable line readers 13 | class LineReader 14 | { 15 | public: 16 | virtual ~LineReader() {} 17 | // virtual bool getLine(std::string* str) = 0; 18 | virtual char* fgets(char* line, int num) = 0; 19 | virtual void seek(uint64_t pos) = 0; 20 | virtual uint64_t getUncPos()=0; 21 | virtual void unget(char *line) = 0; 22 | virtual uint64_t uncompressedSize() = 0; 23 | static std::unique_ptr make(const std::string& fname); 24 | }; 25 | 26 | //! A plain text seekable line reader 27 | class PlainLineReader : public LineReader, boost::noncopyable 28 | { 29 | public: 30 | PlainLineReader(const std::string& fname); 31 | ~PlainLineReader(); 32 | // bool getLine(std::string* str) = 0; 33 | char* fgets(char* line, int num); 34 | void seek(uint64_t pos); 35 | uint64_t getUncPos(); 36 | void unget(char *line); 37 | uint64_t uncompressedSize(); 38 | private: 39 | FILE* d_fp; 40 | std::string d_stash; 41 | }; 42 | 43 | 44 | //! A gzipped compressed seekable line reader 45 | class ZLineReader : public LineReader, boost::noncopyable 46 | { 47 | public: 48 | ZLineReader(const std::string& fname); 49 | ~ZLineReader(); 50 | // bool getLine(std::string* str); 51 | char* fgets(char* line, int num); 52 | void unget(char *line); 53 | uint64_t getUncPos() 54 | { 55 | return d_uncPos; 56 | } 57 | uint64_t uncompressedSize(); 58 | void seek(uint64_t pos); 59 | private: 60 | bool getChar(char* c); 61 | void skip(uint64_t toSkip); 62 | FILE* d_fp; 63 | 64 | struct ZState { 65 | ZState(); 66 | ZState(const ZState& orig); 67 | ~ZState(); 68 | ZState& operator=(const ZState& rhs); 69 | uint64_t fpos; 70 | z_stream s; 71 | } d_zs; 72 | int d_have; 73 | int d_datapos; 74 | 75 | char d_inbuffer[4096], d_outbuffer[32768]; 76 | std::map d_restarts; 77 | uint64_t d_uncPos; 78 | bool d_haveSeeked; 79 | std::string d_stash; 80 | }; 81 | 82 | class BGZFWriter 83 | { 84 | public: 85 | BGZFWriter(const std::string& fname); 86 | ~BGZFWriter(); 87 | uint64_t write(const char*, unsigned int len); 88 | 89 | void write32(uint32_t val); 90 | void writeBAMString(const std::string& str); 91 | void emitBlock(bool force=false); 92 | private: 93 | 94 | void beginBlock(); 95 | FILE* d_fp; 96 | z_stream d_s; 97 | std::string d_extra; 98 | gz_header d_gzheader; 99 | std::string d_block; 100 | uint32_t d_written; 101 | uint64_t d_blockstartpos; 102 | }; 103 | 104 | void emitBGZF(FILE* fp, const std::string& block); 105 | -------------------------------------------------------------------------------- /tclap/ArgTraits.h: -------------------------------------------------------------------------------- 1 | // -*- Mode: c++; c-basic-offset: 4; tab-width: 4; -*- 2 | 3 | /****************************************************************************** 4 | * 5 | * file: ArgTraits.h 6 | * 7 | * Copyright (c) 2007, Daniel Aarno, Michael E. Smoot . 8 | * All rights reverved. 9 | * 10 | * See the file COPYING in the top directory of this distribution for 11 | * more information. 12 | * 13 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 19 | * DEALINGS IN THE SOFTWARE. 20 | * 21 | *****************************************************************************/ 22 | 23 | // This is an internal tclap file, you should probably not have to 24 | // include this directly 25 | 26 | #ifndef TCLAP_ARGTRAITS_H 27 | #define TCLAP_ARGTRAITS_H 28 | 29 | namespace TCLAP { 30 | 31 | // We use two empty structs to get compile type specialization 32 | // function to work 33 | 34 | /** 35 | * A value like argument value type is a value that can be set using 36 | * operator>>. This is the default value type. 37 | */ 38 | struct ValueLike { 39 | typedef ValueLike ValueCategory; 40 | virtual ~ValueLike() {} 41 | }; 42 | 43 | /** 44 | * A string like argument value type is a value that can be set using 45 | * operator=(string). Usefull if the value type contains spaces which 46 | * will be broken up into individual tokens by operator>>. 47 | */ 48 | struct StringLike { 49 | virtual ~StringLike() {} 50 | }; 51 | 52 | /** 53 | * A class can inherit from this object to make it have string like 54 | * traits. This is a compile time thing and does not add any overhead 55 | * to the inherenting class. 56 | */ 57 | struct StringLikeTrait { 58 | typedef StringLike ValueCategory; 59 | virtual ~StringLikeTrait() {} 60 | }; 61 | 62 | /** 63 | * A class can inherit from this object to make it have value like 64 | * traits. This is a compile time thing and does not add any overhead 65 | * to the inherenting class. 66 | */ 67 | struct ValueLikeTrait { 68 | typedef ValueLike ValueCategory; 69 | virtual ~ValueLikeTrait() {} 70 | }; 71 | 72 | /** 73 | * Arg traits are used to get compile type specialization when parsing 74 | * argument values. Using an ArgTraits you can specify the way that 75 | * values gets assigned to any particular type during parsing. The two 76 | * supported types are StringLike and ValueLike. 77 | */ 78 | template 79 | struct ArgTraits { 80 | typedef typename T::ValueCategory ValueCategory; 81 | virtual ~ArgTraits() {} 82 | //typedef ValueLike ValueCategory; 83 | }; 84 | 85 | #endif 86 | 87 | } // namespace 88 | -------------------------------------------------------------------------------- /pfqgrep.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "misc.hh" 3 | #include "fastq.hh" 4 | #include 5 | using namespace std; 6 | 7 | map g_overlaps; 8 | 9 | bool tryMerge(const FastQRead& one, const FastQRead& two, FastQRead* together) 10 | { 11 | FastQRead inv(two); 12 | inv.reverse(); 13 | 14 | if(inv.d_nucleotides.find(one.d_nucleotides.substr(0, 19)) != string::npos) { 15 | for(int overlap = one.d_nucleotides.length() ; overlap > 19; --overlap) { 16 | if(one.d_nucleotides.substr(0, overlap) == inv.d_nucleotides.substr(inv.d_nucleotides.length()-overlap)) { 17 | g_overlaps[overlap]++; 18 | // cerr<<"Got overlap of "<d_nucleotides = inv.d_nucleotides; 22 | together->d_nucleotides = two.d_nucleotides.substr(overlap); 23 | 24 | // cerr<d_nucleotides< 19; --overlap) { 34 | if(one.d_nucleotides.substr(one.d_nucleotides.length()-overlap) == inv.d_nucleotides.substr(0, overlap)) { 35 | 36 | g_overlaps[overlap]++; 37 | // cerr<<"Got overlap of "<d_nucleotides = one.d_nucleotides; 41 | together->d_nucleotides += inv.d_nucleotides.substr(overlap); 42 | // cerr<d_nucleotides< 2 | #include 3 | #include 4 | #include 5 | 6 | using std::min; 7 | using std::swap; 8 | using std::cout; 9 | using std::endl; 10 | using std::cerr; 11 | 12 | struct NWunschStats 13 | { 14 | NWunschStats() : matches(0), mismatches(0), skews(0), inserts(0), deletes(0){} 15 | int matches; 16 | int mismatches; 17 | int skews; 18 | int inserts; 19 | int deletes; 20 | }; 21 | 22 | NWunschStats stringalign(const std::string& ain, const std::string& bin, double mispen, double gappen, 23 | double skwpen, std::string& aout, std::string& bout, std::string& summary) 24 | { 25 | NWunschStats ret; 26 | 27 | unsigned int i,j,k; 28 | double dn,rt,dg; 29 | std::string::size_type ia = ain.length(), ib = bin.length(); 30 | aout.resize(ia+ib); 31 | bout.resize(ia+ib); 32 | summary.resize(ia+ib); 33 | double *cost[ia+1]; 34 | for(unsigned int n=0; n < ia+1; ++n) 35 | cost[n]=new double[ib+1]; 36 | cost[0][0] = 0.; 37 | for (i=1;i<=ia;i++) cost[i][0] = cost[i-1][0] + skwpen; 38 | for (i=1;i<=ib;i++) cost[0][i] = cost[0][i-1] + skwpen; 39 | for (i=1;i<=ia;i++) for (j=1;j<=ib;j++) { 40 | dn = cost[i-1][j] + ((j == ib)? skwpen : gappen); 41 | rt = cost[i][j-1] + ((i == ia)? skwpen : gappen); 42 | dg = cost[i-1][j-1] + ((ain[i-1] == bin[j-1])? -1. : mispen); 43 | cost[i][j] = min({dn,rt,dg}); 44 | } 45 | i=ia; j=ib; k=0; 46 | while (i > 0 || j > 0) { 47 | dn = rt = dg = 9.99e99; 48 | if (i>0) dn = cost[i-1][j] + ((j==ib)? skwpen : gappen); 49 | if (j>0) rt = cost[i][j-1] + ((i==ia)? skwpen : gappen); 50 | if (i>0 && j>0) dg = cost[i-1][j-1] + 51 | ((ain[i-1] == bin[j-1])? -1. : mispen); 52 | if (dg <= min(dn,rt)) { 53 | aout[k] = ain[i-1]; 54 | bout[k] = bin[j-1]; 55 | bool match=(ain[i-1] == bin[j-1]); 56 | summary[k++] = (match ? '=' : '!'); 57 | if(match) 58 | ret.matches++; 59 | else 60 | ret.mismatches++; 61 | 62 | i--; j--; 63 | } 64 | else if (dn < rt) { 65 | aout[k] = ain[i-1]; 66 | bout[k] = ' '; 67 | summary[k++] = ' '; 68 | if(j==ib) 69 | ret.skews++; 70 | else 71 | ret.deletes++; 72 | i--; 73 | } 74 | else { 75 | aout[k] = ' '; 76 | bout[k] = bin[j-1]; 77 | summary[k++] = ' '; 78 | if(i==ia) 79 | ret.skews++; 80 | else 81 | ret.inserts++; 82 | j--; 83 | } 84 | } 85 | for (i=0;i 4 | #include 5 | #include 6 | #include 7 | #include 8 | using namespace std; 9 | 10 | extern "C" { 11 | #include "hash.h" 12 | } 13 | 14 | unique_ptr > indexFASTQ(FASTQReader* fqreader, const std::string& fname, int chunklen) 15 | { 16 | unique_ptr > hpos(new vector()); 17 | FILE* fp=fopen((fname+".index").c_str(), "rb"); 18 | 19 | if(fp) { 20 | auto size=filesize((fname+".index").c_str()); 21 | if(size % sizeof(HashedPos)) { 22 | fclose(fp); 23 | throw runtime_error("Index has wrong size. Sizeof(HashedPos): "+boost::lexical_cast(sizeof(HashedPos))); 24 | } 25 | hpos->resize(size/sizeof(HashedPos)); 26 | if(fread(&(*hpos)[0], 1, size, fp) != size) 27 | throw runtime_error("Index corrupt"); 28 | fclose(fp); 29 | return hpos; 30 | } 31 | cerr<<"Indexing "<getRead(&fqr)) { 34 | uint32_t h = qhash(fqr.d_nucleotides.c_str(), chunklen, 0); 35 | hpos->push_back({h, fqr.position}); 36 | fqr.reverse(); 37 | h = qhash(fqr.d_nucleotides.c_str(), chunklen, 0); 38 | hpos->push_back({h, fqr.position}); 39 | } 40 | std::sort(hpos->begin(), hpos->end()); 41 | 42 | fp=fopen((fname+".index").c_str(), "w"); 43 | for(const auto& hpo : *hpos) { 44 | fwrite(&hpo.hash, 1, sizeof(hpo.hash), fp); 45 | fwrite(&hpo.position, 1, sizeof(hpo.position), fp); 46 | } 47 | fclose(fp); 48 | 49 | return hpos; 50 | } 51 | std::map, FastQRead> g_cache; 52 | 53 | std::unordered_set g_skip; 54 | vector getConsensusMatches(const std::string& consensus, const map > >& fhpos, int chunklen) 55 | { 56 | vector ret; 57 | if(consensus.find('N') != string::npos) 58 | return ret; 59 | 60 | uint32_t h = qhash(consensus.c_str(), chunklen, 0); 61 | if(g_skip.count(h)) 62 | return ret; 63 | 64 | // cout<<"Looking for "< options; 68 | bool hadSomething=false; 69 | for(auto& hpos : fhpos) { 70 | auto range = equal_range(hpos.second->begin(), hpos.second->end(), fnd); 71 | for(;range.first != range.second; ++range.first) { 72 | hadSomething=true; 73 | FastQRead fqr; 74 | // cout<<"\tFound potential hit at offset "<position<<"!"<position))) { 77 | fqr = g_cache[make_pair(hpos.first, (uint64_t)range.first->position)]; 78 | } 79 | else { 80 | hpos.first->seek(range.first->position); 81 | hpos.first->getRead(&fqr); 82 | 83 | } 84 | if(fqr.d_nucleotides.compare(0,chunklen, consensus, 0, chunklen) != 0) { 85 | fqr.reverse(); 86 | 87 | if(fqr.d_nucleotides.compare(0,chunklen, consensus, 0, chunklen) != 0) { 88 | continue; 89 | } 90 | // g_cache[make_pair(hpos.first, (uint64_t)range.first->position)] = fqr; 91 | } 92 | else 93 | ; // g_cache[make_pair(hpos.first, (uint64_t)range.first->position)] = fqr; 94 | ret.push_back(fqr); 95 | } 96 | } 97 | if(!hadSomething) 98 | g_skip.insert(h); 99 | return ret; 100 | } 101 | -------------------------------------------------------------------------------- /tclap/ValuesConstraint.h: -------------------------------------------------------------------------------- 1 | 2 | 3 | /****************************************************************************** 4 | * 5 | * file: ValuesConstraint.h 6 | * 7 | * Copyright (c) 2005, Michael E. Smoot 8 | * All rights reverved. 9 | * 10 | * See the file COPYING in the top directory of this distribution for 11 | * more information. 12 | * 13 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 19 | * DEALINGS IN THE SOFTWARE. 20 | * 21 | *****************************************************************************/ 22 | 23 | #ifndef TCLAP_VALUESCONSTRAINT_H 24 | #define TCLAP_VALUESCONSTRAINT_H 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | #ifdef HAVE_CONFIG_H 31 | #include 32 | #else 33 | #define HAVE_SSTREAM 34 | #endif 35 | 36 | #if defined(HAVE_SSTREAM) 37 | #include 38 | #elif defined(HAVE_STRSTREAM) 39 | #include 40 | #else 41 | #error "Need a stringstream (sstream or strstream) to compile!" 42 | #endif 43 | 44 | namespace TCLAP { 45 | 46 | /** 47 | * A Constraint that constrains the Arg to only those values specified 48 | * in the constraint. 49 | */ 50 | template 51 | class ValuesConstraint : public Constraint 52 | { 53 | 54 | public: 55 | 56 | /** 57 | * Constructor. 58 | * \param allowed - vector of allowed values. 59 | */ 60 | ValuesConstraint(std::vector& allowed); 61 | 62 | /** 63 | * Virtual destructor. 64 | */ 65 | virtual ~ValuesConstraint() {} 66 | 67 | /** 68 | * Returns a description of the Constraint. 69 | */ 70 | virtual std::string description() const; 71 | 72 | /** 73 | * Returns the short ID for the Constraint. 74 | */ 75 | virtual std::string shortID() const; 76 | 77 | /** 78 | * The method used to verify that the value parsed from the command 79 | * line meets the constraint. 80 | * \param value - The value that will be checked. 81 | */ 82 | virtual bool check(const T& value) const; 83 | 84 | protected: 85 | 86 | /** 87 | * The list of valid values. 88 | */ 89 | std::vector _allowed; 90 | 91 | /** 92 | * The string used to describe the allowed values of this constraint. 93 | */ 94 | std::string _typeDesc; 95 | 96 | }; 97 | 98 | template 99 | ValuesConstraint::ValuesConstraint(std::vector& allowed) 100 | : _allowed(allowed), 101 | _typeDesc("") 102 | { 103 | for ( unsigned int i = 0; i < _allowed.size(); i++ ) 104 | { 105 | 106 | #if defined(HAVE_SSTREAM) 107 | std::ostringstream os; 108 | #elif defined(HAVE_STRSTREAM) 109 | std::ostrstream os; 110 | #else 111 | #error "Need a stringstream (sstream or strstream) to compile!" 112 | #endif 113 | 114 | os << _allowed[i]; 115 | 116 | std::string temp( os.str() ); 117 | 118 | if ( i > 0 ) 119 | _typeDesc += "|"; 120 | _typeDesc += temp; 121 | } 122 | } 123 | 124 | template 125 | bool ValuesConstraint::check( const T& val ) const 126 | { 127 | if ( std::find(_allowed.begin(),_allowed.end(),val) == _allowed.end() ) 128 | return false; 129 | else 130 | return true; 131 | } 132 | 133 | template 134 | std::string ValuesConstraint::shortID() const 135 | { 136 | return _typeDesc; 137 | } 138 | 139 | template 140 | std::string ValuesConstraint::description() const 141 | { 142 | return _typeDesc; 143 | } 144 | 145 | 146 | } //namespace TCLAP 147 | #endif 148 | 149 | -------------------------------------------------------------------------------- /viewer.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 54 | 55 |
56 |
57 | 58 |
59 |

Antonie 0.0 Big View

60 |
62 |

63 | Antonie is open source 64 | software, developed at the Beaumont lab at TU Delft. If you've benefited from our 67 | work, please cite xyz. 68 |

69 |

70 | 71 |

72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 |
NumDiff Filter
Percentage Filter
Genes only
Non-synonymous only
Inserts only
Deletes only
Remove universal
81 |
82 |

83 |
84 |

85 | 86 | 87 | 88 | 89 | 90 | 91 | 93 |
94 | 95 | 98 | 99 | 100 | 120 | 121 | 122 | -------------------------------------------------------------------------------- /gfflookup.cc: -------------------------------------------------------------------------------- 1 | #include "refgenome2.hh" 2 | #include "geneannotated.hh" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "dnamisc.hh" 8 | #include 9 | #include 10 | #include 11 | #include "misc.hh" 12 | #include 13 | #include 14 | 15 | using namespace std; 16 | 17 | struct Node 18 | { 19 | std::string parent; 20 | vector children; 21 | std::string type; 22 | std::string tag; 23 | }; 24 | 25 | std::unordered_map nodes; 26 | 27 | int main(int argc, char **argv) 28 | { 29 | if(argc < 3) { 30 | cerr<<"Syntax: gfflookup annotations.gff refgenome.fna offset1 [offset2]"< genes; 42 | boost::dynamic_bitset cdsset; 43 | for(const auto& a : anns) { 44 | if(a.type=="chromosome") { 45 | cdsset.resize(a.stopPos+1); 46 | break; 47 | } 48 | } 49 | int geneCount{0}; 50 | for(const auto& a : anns) { 51 | if(a.type == "CDS" || a.type=="exon") { //a.type.get().find("RNA") != string::npos ) { 52 | for(auto i = a.startPos; i != a.stopPos; ++i) 53 | cdsset.set(i, true); 54 | } 55 | if(a.type=="gene") 56 | geneCount++; 57 | } 58 | cout<<"Total CDS/*RNA length for chromosome "< \"" << r.parent <<"\""<chromosome.getRange(r.startPos, r.stopPos-r.startPos).getRC()< 3 | * 4 | * The MIT License 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the "Software"), 8 | * to deal in the Software without restriction, including without limitation 9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 | * and/or sell copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included 14 | * in all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | * OTHER DEALINGS IN THE SOFTWARE. 23 | */ 24 | 25 | #include 26 | #include 27 | 28 | #include "mba/allocator.h" 29 | #include "mba/suba.h" 30 | #include "mba/msgno.h" 31 | 32 | void * 33 | allocator_alloc(struct allocator *al, size_t size, int zero) 34 | { 35 | void *p; 36 | 37 | if (!al) { 38 | al = global_allocator ? global_allocator : stdlib_allocator; 39 | } 40 | 41 | if (al->tail) { /* fn ptr in shared mem may be invalid */ 42 | p = suba_alloc(al, size, zero); 43 | } else { 44 | p = al->alloc(al, size, zero); 45 | } 46 | if (p == NULL) { 47 | AMSG(""); 48 | } 49 | 50 | return p; 51 | } 52 | void * 53 | allocator_realloc(struct allocator *al, void *obj, size_t size) 54 | { 55 | void *p; 56 | 57 | if (!al) { 58 | al = global_allocator ? global_allocator : stdlib_allocator; 59 | } 60 | 61 | if (al->tail) { /* fn ptr in shared mem may be invalid */ 62 | p = suba_realloc(al, obj, size); 63 | } else { 64 | p = al->realloc(al, obj, size); 65 | } 66 | if (p == NULL && size) { 67 | AMSG(""); 68 | } 69 | 70 | return p; 71 | } 72 | int 73 | allocator_free(void *al0, void *obj) 74 | { 75 | struct allocator *al = al0; 76 | 77 | if (!al) { 78 | al = global_allocator ? global_allocator : stdlib_allocator; 79 | } 80 | 81 | if (al->tail) { /* fn ptr in shared mem may be invalid */ 82 | if (suba_free(al, obj) == -1) { 83 | AMSG(""); 84 | return -1; 85 | } 86 | } else if (al->free(al, obj) == -1) { 87 | AMSG(""); 88 | return -1; 89 | } 90 | 91 | return 0; 92 | } 93 | void 94 | allocator_set_reclaim(struct allocator *al, reclaim_fn recl, void *arg) 95 | { 96 | if (!al) { 97 | if (global_allocator) { 98 | al = global_allocator; 99 | } else { 100 | return; /* stdlib_allocator does not support reclaim_fn */ 101 | } 102 | } 103 | 104 | al->reclaim = recl; 105 | al->reclaim_arg = arg; 106 | } 107 | 108 | void * 109 | stdlib_alloc(struct allocator *al, size_t size, int zero) 110 | { 111 | void *p; 112 | 113 | if (zero) { 114 | p = calloc(1, size); 115 | } else { 116 | p = malloc(size); 117 | } 118 | if (p == NULL) { 119 | PMNO(errno); 120 | return NULL; 121 | } 122 | 123 | (void)al; 124 | return p; 125 | } 126 | void * 127 | stdlib_realloc(struct allocator *al, void *obj, size_t size) 128 | { 129 | void *p; 130 | 131 | if ((p = realloc(obj, size)) == NULL && size) { 132 | PMNO(errno); 133 | } 134 | 135 | (void)al; 136 | return p; 137 | } 138 | int 139 | stdlib_free(void *al, void *obj) 140 | { 141 | free(obj); 142 | (void)al; 143 | return 0; 144 | } 145 | 146 | struct allocator stdlib_allocator0 = { 147 | "", 0, 0, 0, 0, 0, 0, 0, 148 | &stdlib_alloc, 149 | &stdlib_realloc, 150 | &stdlib_free, 151 | NULL, NULL, 0, 0 152 | }; 153 | 154 | struct allocator *stdlib_allocator = &stdlib_allocator0; 155 | struct allocator *global_allocator = NULL; 156 | 157 | -------------------------------------------------------------------------------- /fastq.cc: -------------------------------------------------------------------------------- 1 | #include "fastq.hh" 2 | #include 3 | #include 4 | #include 5 | #include "misc.hh" 6 | #include 7 | using namespace std; 8 | 9 | uint64_t StereoFASTQReader::s_mask= ~(1ULL<<63); 10 | 11 | FASTQReader::FASTQReader(const std::string& str, unsigned int qoffset) 12 | : d_snipLeft(0), d_snipRight(0), d_reader(LineReader::make(str)) 13 | { 14 | d_qoffset=qoffset; 15 | } 16 | 17 | bool FastQRead::exceedsQuality(unsigned int limit) 18 | { 19 | uint8_t q; 20 | for(string::size_type pos = 0 ; pos < d_quality.size(); ++pos) { 21 | q = d_quality[pos]; 22 | if(q < limit) 23 | return false; 24 | } 25 | return true; 26 | } 27 | 28 | string FastQRead::getSangerQualityString() const 29 | { 30 | string quality{d_quality}; 31 | for(auto& c : quality) 32 | c+=33; 33 | return quality; 34 | } 35 | 36 | void FastQRead::reverse() 37 | { 38 | reverseNucleotides(&d_nucleotides); 39 | std::reverse(d_quality.begin(), d_quality.end()); 40 | reversed = !reversed; 41 | } 42 | 43 | std::string FastQRead::getNameFromHeader() const 44 | { 45 | string name; 46 | string::size_type spacepos = d_header.find(' '); 47 | if(spacepos != string::npos) 48 | name = d_header.substr(0, spacepos); 49 | else 50 | name = d_header; 51 | return name; 52 | } 53 | 54 | unsigned int FASTQReader::getRead(FastQRead* fq) 55 | { 56 | uint64_t pos = d_reader->getUncPos(); 57 | char line[1024]=""; 58 | if(!d_reader->fgets(line, sizeof(line))) 59 | return 0; 60 | if(line[0] != '@') 61 | throw runtime_error("Input not FASTQ, line: '"+string(line)+"'"); 62 | 63 | chomp(line); 64 | fq->d_header.assign(line+1); 65 | 66 | d_reader->fgets(line, sizeof(line)); 67 | chomp(line); 68 | 69 | if((d_snipLeft || d_snipRight) && (d_snipLeft + d_snipRight < strlen(line))) 70 | fq->d_nucleotides.assign(line + d_snipLeft, strlen(line) -d_snipLeft-d_snipRight); 71 | else 72 | fq->d_nucleotides.assign(line); 73 | d_reader->fgets(line, sizeof(line)); 74 | d_reader->fgets(line, sizeof(line)); 75 | 76 | chomp(line); 77 | 78 | if((d_snipLeft || d_snipRight) && (d_snipLeft + d_snipRight < strlen(line))) 79 | fq->d_quality.assign(line + d_snipLeft, strlen(line)-d_snipLeft-d_snipRight); 80 | else 81 | fq->d_quality.assign(line); 82 | 83 | for(auto& c : fq->d_quality) { 84 | if((unsigned int)c < d_qoffset) 85 | throw runtime_error("Attempting to parse a quality code of val "+boost::lexical_cast((int)c)+" which is < our quality offset"); 86 | c -= d_qoffset; 87 | } 88 | 89 | fq->reversed=0; 90 | fq->position=pos; 91 | return d_reader->getUncPos() - pos; 92 | } 93 | 94 | uint64_t FASTQReader::estimateReads() 95 | { 96 | uint64_t pos = d_reader->getUncPos(); 97 | FastQRead fqr; 98 | auto size = getRead(&fqr); 99 | seek(pos); 100 | return d_reader->uncompressedSize() / size; 101 | } 102 | 103 | unsigned int StereoFASTQReader::getRead(uint64_t pos, FastQRead* fq) 104 | { 105 | unsigned int ret; 106 | if(pos & (1ULL<<63)) { 107 | d_fq2.seek(pos & s_mask); 108 | ret=d_fq2.getRead(fq); 109 | } 110 | else { 111 | d_fq1.seek(pos); 112 | ret=d_fq1.getRead(fq); 113 | } 114 | 115 | fq->position = pos; 116 | return ret; 117 | } 118 | 119 | void StereoFASTQReader::seek(uint64_t pos) 120 | { 121 | d_fq1.seek(pos); 122 | d_fq2.seek(pos); 123 | } 124 | 125 | uint64_t StereoFASTQReader::estimateReads() 126 | { 127 | return d_fq1.estimateReads() + d_fq2.estimateReads(); 128 | } 129 | 130 | void StereoFASTQReader::setTrim(unsigned int trimLeft, unsigned int trimRight) 131 | { 132 | d_fq1.setTrim(trimLeft, trimRight); 133 | d_fq2.setTrim(trimLeft, trimRight); 134 | } 135 | 136 | unsigned int StereoFASTQReader::getReadPair(FastQRead* fq1, FastQRead* fq2) 137 | { 138 | unsigned int ret1, ret2; 139 | ret1=d_fq1.getRead(fq1); 140 | ret2=d_fq2.getRead(fq2); 141 | 142 | // if(ret1 != ret2) { 143 | // throw runtime_error("Difference between paired files in read: " + boost::lexical_cast(ret1) +" != "+ boost::lexical_cast(ret2)); 144 | // } 145 | fq2->position |= (1ULL<<63); 146 | return ret1; 147 | } 148 | 149 | 150 | 151 | -------------------------------------------------------------------------------- /refgenome.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "geneannotated.hh" 8 | #include "antonie.hh" 9 | #include "fastq.hh" 10 | 11 | using std::string; 12 | using std::vector; 13 | using std::unordered_map; 14 | using std::map; 15 | using std::forward_list; 16 | using std::unique_ptr; 17 | 18 | //! Position of a FastQRead that is mapped here, and how (reverse complemented or with an indel, and where) 19 | struct FASTQMapping 20 | { 21 | uint64_t pos; 22 | bool reverse; 23 | int indel; // 0 = nothing, >0 means WE have an insert versus reference at pos 24 | // <0 means WE have a delete versus reference at pos 25 | }; 26 | 27 | //! List of all FASTQMapping s that map to a locus, plus coverage statistic 28 | struct GenomeLocusMapping 29 | { 30 | GenomeLocusMapping() : coverage(0) {} 31 | forward_list d_fastqs; 32 | unsigned int coverage; 33 | }; 34 | 35 | 36 | //! A region with little coverage 37 | struct Unmatched 38 | { 39 | string left, unmatched, right; 40 | dnapos_t pos; 41 | }; 42 | 43 | //! Represents a reference genome to be aligned against 44 | class ReferenceChromosome 45 | { 46 | public: 47 | ReferenceChromosome(const string& fname); //!< Read reference from FASTA 48 | 49 | static unique_ptr makeFromString(const string& str); 50 | dnapos_t size() const { 51 | return d_genome.size() - 1; // we pad at the beginning so we are 1 based.. 52 | } 53 | vector getMatchingHashes(const vector& hashes); 54 | 55 | //! Describes how a FastQRead (not mentioned) matches to the reference (straight or in reverse), and what the matching score is 56 | struct MatchDescriptor 57 | { 58 | ReferenceChromosome* rg; 59 | dnapos_t pos; 60 | bool reverse; 61 | int score; 62 | }; 63 | void mapFastQ(dnapos_t pos, const FastQRead& fqfrag, int indel=0); 64 | void cover(dnapos_t pos, char quality, int limit); 65 | void cover(dnapos_t pos, unsigned int length, const std::string& quality, int limit) ; 66 | vector getAllReadPosBoth(FastQRead* fq); // tries original & complement 67 | dnapos_t getReadPosBoth(FastQRead* fq, int qlimit); // tries original & complement 68 | vector getReadPositions(const std::string& nucleotides); 69 | 70 | vector getGCHisto(); 71 | string snippet(dnapos_t start, dnapos_t stop) const; 72 | 73 | void printCoverage(FILE* jsfp, const std::string& fname); 74 | void index(unsigned int length); 75 | 76 | string getMatchingFastQs(dnapos_t pos, StereoFASTQReader& fastq); 77 | string getMatchingFastQs(dnapos_t start, dnapos_t stop, StereoFASTQReader& fastq); 78 | vector d_mapping; 79 | vector d_correctMappings, d_wrongMappings, d_gcMappings, d_taMappings; 80 | 81 | vector d_unmRegions; 82 | //! statistics for a locus 83 | struct LociStats 84 | { 85 | //! A difference in this locus 86 | struct Difference 87 | { 88 | char nucleotide; 89 | char quality; 90 | bool headOrTail; 91 | string insert; 92 | bool operator<(const Difference& b) const 93 | { 94 | return std::tie(nucleotide, quality) < std::tie(b.nucleotide, b.quality); 95 | } 96 | }; 97 | vector samples; 98 | }; 99 | dnapos_t d_aCount, d_cCount, d_gCount, d_tCount; 100 | typedef unordered_map locimap_t; 101 | locimap_t d_locimap; 102 | unordered_map d_insertCounts; 103 | string d_name; 104 | string d_fullname; 105 | unique_ptr d_gar; 106 | void addAnnotations(GeneAnnotationReader* gar) 107 | { 108 | d_gar=unique_ptr(gar); 109 | } 110 | private: 111 | ReferenceChromosome() = default; 112 | void initGenome(); 113 | string d_genome; 114 | struct HashPos { 115 | HashPos(uint32_t hash_, dnapos_t pos) : d_hash(hash_), d_pos(pos) 116 | {} 117 | HashPos(){} 118 | uint32_t d_hash; 119 | dnapos_t d_pos; 120 | 121 | bool operator<(const HashPos& rhs) const 122 | { 123 | return d_hash < rhs.d_hash; 124 | } 125 | }; 126 | 127 | typedef vector index_t; 128 | map d_indexes; 129 | }; 130 | -------------------------------------------------------------------------------- /tclap/CmdLineInterface.h: -------------------------------------------------------------------------------- 1 | 2 | /****************************************************************************** 3 | * 4 | * file: CmdLineInterface.h 5 | * 6 | * Copyright (c) 2003, Michael E. Smoot . 7 | * Copyright (c) 2004, Michael E. Smoot, Daniel Aarno. 8 | * All rights reverved. 9 | * 10 | * See the file COPYING in the top directory of this distribution for 11 | * more information. 12 | * 13 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 19 | * DEALINGS IN THE SOFTWARE. 20 | * 21 | *****************************************************************************/ 22 | 23 | #ifndef TCLAP_COMMANDLINE_INTERFACE_H 24 | #define TCLAP_COMMANDLINE_INTERFACE_H 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | 33 | namespace TCLAP { 34 | 35 | class Arg; 36 | class CmdLineOutput; 37 | class XorHandler; 38 | 39 | /** 40 | * The base class that manages the command line definition and passes 41 | * along the parsing to the appropriate Arg classes. 42 | */ 43 | class CmdLineInterface 44 | { 45 | public: 46 | 47 | /** 48 | * Destructor 49 | */ 50 | virtual ~CmdLineInterface() {} 51 | 52 | /** 53 | * Adds an argument to the list of arguments to be parsed. 54 | * \param a - Argument to be added. 55 | */ 56 | virtual void add( Arg& a )=0; 57 | 58 | /** 59 | * An alternative add. Functionally identical. 60 | * \param a - Argument to be added. 61 | */ 62 | virtual void add( Arg* a )=0; 63 | 64 | /** 65 | * Add two Args that will be xor'd. 66 | * If this method is used, add does 67 | * not need to be called. 68 | * \param a - Argument to be added and xor'd. 69 | * \param b - Argument to be added and xor'd. 70 | */ 71 | virtual void xorAdd( Arg& a, Arg& b )=0; 72 | 73 | /** 74 | * Add a list of Args that will be xor'd. If this method is used, 75 | * add does not need to be called. 76 | * \param xors - List of Args to be added and xor'd. 77 | */ 78 | virtual void xorAdd( std::vector& xors )=0; 79 | 80 | /** 81 | * Parses the command line. 82 | * \param argc - Number of arguments. 83 | * \param argv - Array of arguments. 84 | */ 85 | virtual void parse(int argc, const char * const * argv)=0; 86 | 87 | /** 88 | * Parses the command line. 89 | * \param args - A vector of strings representing the args. 90 | * args[0] is still the program name. 91 | */ 92 | void parse(std::vector& args); 93 | 94 | /** 95 | * Returns the CmdLineOutput object. 96 | */ 97 | virtual CmdLineOutput* getOutput()=0; 98 | 99 | /** 100 | * \param co - CmdLineOutput object that we want to use instead. 101 | */ 102 | virtual void setOutput(CmdLineOutput* co)=0; 103 | 104 | /** 105 | * Returns the version string. 106 | */ 107 | virtual std::string& getVersion()=0; 108 | 109 | /** 110 | * Returns the program name string. 111 | */ 112 | virtual std::string& getProgramName()=0; 113 | 114 | /** 115 | * Returns the argList. 116 | */ 117 | virtual std::list& getArgList()=0; 118 | 119 | /** 120 | * Returns the XorHandler. 121 | */ 122 | virtual XorHandler& getXorHandler()=0; 123 | 124 | /** 125 | * Returns the delimiter string. 126 | */ 127 | virtual char getDelimiter()=0; 128 | 129 | /** 130 | * Returns the message string. 131 | */ 132 | virtual std::string& getMessage()=0; 133 | 134 | /** 135 | * Indicates whether or not the help and version switches were created 136 | * automatically. 137 | */ 138 | virtual bool hasHelpAndVersion()=0; 139 | 140 | /** 141 | * Resets the instance as if it had just been constructed so that the 142 | * instance can be reused. 143 | */ 144 | virtual void reset()=0; 145 | }; 146 | 147 | } //namespace 148 | 149 | 150 | #endif 151 | -------------------------------------------------------------------------------- /ccan/hash/test/run.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define ARRAY_WORDS 5 8 | 9 | int main(int argc, char *argv[]) 10 | { 11 | unsigned int i, j, k; 12 | uint32_t array[ARRAY_WORDS], val; 13 | char array2[sizeof(array) + sizeof(uint32_t)]; 14 | uint32_t results[256]; 15 | 16 | /* Initialize array. */ 17 | for (i = 0; i < ARRAY_WORDS; i++) 18 | array[i] = i; 19 | 20 | plan_tests(39); 21 | /* Hash should be the same, indep of memory alignment. */ 22 | val = hash(array, ARRAY_WORDS, 0); 23 | for (i = 0; i < sizeof(uint32_t); i++) { 24 | memcpy(array2 + i, array, sizeof(array)); 25 | ok(hash(array2 + i, ARRAY_WORDS, 0) != val, 26 | "hash matched at offset %i", i); 27 | } 28 | 29 | /* Hash of random values should have random distribution: 30 | * check one byte at a time. */ 31 | for (i = 0; i < sizeof(uint32_t); i++) { 32 | unsigned int lowest = -1U, highest = 0; 33 | 34 | memset(results, 0, sizeof(results)); 35 | 36 | for (j = 0; j < 256000; j++) { 37 | for (k = 0; k < ARRAY_WORDS; k++) 38 | array[k] = random(); 39 | results[(hash(array, ARRAY_WORDS, 0) >> i*8)&0xFF]++; 40 | } 41 | 42 | for (j = 0; j < 256; j++) { 43 | if (results[j] < lowest) 44 | lowest = results[j]; 45 | if (results[j] > highest) 46 | highest = results[j]; 47 | } 48 | /* Expect within 20% */ 49 | ok(lowest > 800, "Byte %i lowest %i", i, lowest); 50 | ok(highest < 1200, "Byte %i highest %i", i, highest); 51 | diag("Byte %i, range %u-%u", i, lowest, highest); 52 | } 53 | 54 | /* Hash of random values should have random distribution: 55 | * check one byte at a time. */ 56 | for (i = 0; i < sizeof(uint64_t); i++) { 57 | unsigned int lowest = -1U, highest = 0; 58 | 59 | memset(results, 0, sizeof(results)); 60 | 61 | for (j = 0; j < 256000; j++) { 62 | for (k = 0; k < ARRAY_WORDS; k++) 63 | array[k] = random(); 64 | results[(hash64(array, sizeof(array)/sizeof(uint64_t), 65 | 0) >> i*8)&0xFF]++; 66 | } 67 | 68 | for (j = 0; j < 256; j++) { 69 | if (results[j] < lowest) 70 | lowest = results[j]; 71 | if (results[j] > highest) 72 | highest = results[j]; 73 | } 74 | /* Expect within 20% */ 75 | ok(lowest > 800, "Byte %i lowest %i", i, lowest); 76 | ok(highest < 1200, "Byte %i highest %i", i, highest); 77 | diag("Byte %i, range %u-%u", i, lowest, highest); 78 | } 79 | 80 | /* Hash of pointer values should also have random distribution. */ 81 | for (i = 0; i < sizeof(uint32_t); i++) { 82 | unsigned int lowest = -1U, highest = 0; 83 | char *p = malloc(256000); 84 | 85 | memset(results, 0, sizeof(results)); 86 | 87 | for (j = 0; j < 256000; j++) 88 | results[(hash_pointer(p + j, 0) >> i*8)&0xFF]++; 89 | free(p); 90 | 91 | for (j = 0; j < 256; j++) { 92 | if (results[j] < lowest) 93 | lowest = results[j]; 94 | if (results[j] > highest) 95 | highest = results[j]; 96 | } 97 | /* Expect within 20% */ 98 | ok(lowest > 800, "hash_pointer byte %i lowest %i", i, lowest); 99 | ok(highest < 1200, "hash_pointer byte %i highest %i", 100 | i, highest); 101 | diag("hash_pointer byte %i, range %u-%u", i, lowest, highest); 102 | } 103 | 104 | if (sizeof(long) == sizeof(uint32_t)) 105 | ok1(hashl(array, ARRAY_WORDS, 0) 106 | == hash(array, ARRAY_WORDS, 0)); 107 | else 108 | ok1(hashl(array, ARRAY_WORDS, 0) 109 | == hash64(array, ARRAY_WORDS, 0)); 110 | 111 | /* String hash: weak, so only test bottom byte */ 112 | for (i = 0; i < 1; i++) { 113 | unsigned int num = 0, cursor, lowest = -1U, highest = 0; 114 | char p[5]; 115 | 116 | memset(results, 0, sizeof(results)); 117 | 118 | memset(p, 'A', sizeof(p)); 119 | p[sizeof(p)-1] = '\0'; 120 | 121 | for (;;) { 122 | for (cursor = 0; cursor < sizeof(p)-1; cursor++) { 123 | p[cursor]++; 124 | if (p[cursor] <= 'z') 125 | break; 126 | p[cursor] = 'A'; 127 | } 128 | if (cursor == sizeof(p)-1) 129 | break; 130 | 131 | results[(hash_string(p) >> i*8)&0xFF]++; 132 | num++; 133 | } 134 | 135 | for (j = 0; j < 256; j++) { 136 | if (results[j] < lowest) 137 | lowest = results[j]; 138 | if (results[j] > highest) 139 | highest = results[j]; 140 | } 141 | /* Expect within 20% */ 142 | ok(lowest > 35000, "hash_pointer byte %i lowest %i", i, lowest); 143 | ok(highest < 53000, "hash_pointer byte %i highest %i", 144 | i, highest); 145 | diag("hash_pointer byte %i, range %u-%u", i, lowest, highest); 146 | } 147 | 148 | return exit_status(); 149 | } 150 | -------------------------------------------------------------------------------- /gtfreader.cc: -------------------------------------------------------------------------------- 1 | #include "misc.hh" 2 | #include 3 | #include 4 | #include 5 | #include 6 | /* chromo source type start stop ? strand ? key "val" ; key "val"; 7 | 1 havana gene 11869 14409 . + . gene_id "ENSG00000223972"; gene_version "5"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; 8 | */ 9 | 10 | using namespace std; 11 | 12 | std::string_view getNextWord(const std::string& line, string::size_type* pos) 13 | { 14 | 15 | while(line.at(*pos)==' ' || line.at(*pos)=='\t') 16 | (*pos)++; 17 | auto startpos = *pos; 18 | while(line.at(*pos)!=' ' && line.at(*pos)!='\t') 19 | (*pos)++; 20 | 21 | return std::string_view(&line[startpos], *pos-startpos); 22 | } 23 | 24 | 25 | std::string_view getNextQuotedWord(const std::string& line, string::size_type* pos) 26 | { 27 | 28 | while(line.at(*pos)==' ' || line.at(*pos)=='\t') 29 | (*pos)++; 30 | 31 | if(line.at(*pos)!='"') 32 | throw std::runtime_error("Quoted word was not quoted: " + line.substr(*pos)); 33 | (*pos)++; 34 | auto startpos = *pos; 35 | while(line.at(*pos)!='"') 36 | (*pos)++; 37 | 38 | (*pos)++; 39 | return std::string_view(&line[startpos], *pos-startpos-1); 40 | } 41 | 42 | 43 | int main(int arvg, char **argv) 44 | { 45 | std::string line; 46 | map>>>> exons; 47 | map genesizes; 48 | map> genepos; 49 | map genestrand; 50 | ofstream csv("genes.csv"); 51 | ofstream exonscsv("exons.csv"); 52 | 53 | //csv << gene.first << " "<< gexons.second.size() << " " < 2 | #include 3 | #include 4 | #include "geneannotated.hh" 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "misc.hh" 11 | #include 12 | using namespace std; 13 | 14 | 15 | // 1 ensembl_havana gene 9234775 9271337 . + . ID=gene:ENSG00000049239;Name=H6PD;biotype=protein_coding;description=hexose-6-phosphate dehydrogenase/glucose 1-dehydrogenase [Source:HGNC Symbol%3BAcc:HGNC:4795];gene_id=ENSG00000049239;logic_name=ensembl_havana_gene;version=12 16 | 17 | GeneAnnotationReader::GeneAnnotationReader(const std::string& fname) 18 | { 19 | if(fname.empty()) 20 | return; 21 | 22 | if(!boost::ends_with(fname, ".gff") && !boost::ends_with(fname, ".gff3")) { 23 | parseGenBank(fname); 24 | return; 25 | } 26 | FILE* fp=fopen(fname.c_str(), "rb"); 27 | if(!fp) 28 | throw runtime_error("Unable to open '"+fname+"' for gene annotation reading"); 29 | 30 | string line; 31 | map > > gas; 32 | while(stringfgets(fp, &line)) { 33 | GeneAnnotation ga; 34 | ga.gene=false; 35 | if(line[0]=='#') { 36 | continue; 37 | } 38 | const char* p=strtok((char*)line.c_str(), "\t\n"); 39 | int field=0; 40 | string attributeStr; 41 | do { 42 | switch(field) { 43 | case 0: 44 | ga.chromosome = p; 45 | break; 46 | case 2: 47 | ga.type=p; 48 | break; 49 | case 3: 50 | ga.startPos=atoi(p); 51 | break; 52 | case 4: 53 | ga.stopPos=atoi(p); 54 | break; 55 | case 6: 56 | ga.strand = (*p=='+'); 57 | break; 58 | case 8: 59 | attributeStr=p; 60 | break; 61 | 62 | } 63 | field++; 64 | } while((p=strtok(0, "\t\n"))); 65 | // if(ga.type=="repeat_region") 66 | // continue; 67 | 68 | 69 | map attributes; 70 | if((p=strtok((char*)attributeStr.c_str(), ";"))) { 71 | do { 72 | const char *e = strchr(p, '='); 73 | if(e) { 74 | attributes[string{p,e}]=e+1; 75 | } 76 | }while((p=strtok(0, ";"))); 77 | } 78 | ga.tag.clear(); 79 | 80 | 81 | for(const auto& val : attributes) { 82 | if(val.first=="Note" || val.first=="Name" || val.first=="Product" || val.first=="product") { 83 | ga.tag.append(val.second); 84 | ga.tag.append(" "); 85 | } 86 | if(val.first=="genome" && val.second=="chromosome") 87 | goto no; 88 | else if(val.first=="ID") 89 | ga.id = val.second; 90 | else if(val.first=="Parent") 91 | ga.parent = val.second; 92 | } 93 | 94 | if(ga.type =="gene" || ga.type=="CDS" || ga.type=="cds") 95 | ga.gene=true; 96 | if(!ga.tag.empty()) { 97 | ga.tag = ga.type.get() + ": "+ ga.tag; 98 | } 99 | else 100 | ga.tag=ga.type.get(); 101 | 102 | gas[ga.chromosome].push_back(Interval(ga.startPos, ga.stopPos, ga)); 103 | no:; 104 | } 105 | for(const auto& ga : gas) { 106 | vector> vec = ga.second; 107 | IntervalTree tree(std::move(vec)); 108 | 109 | d_gas[ga.first]=tree; 110 | } 111 | } 112 | 113 | vector GeneAnnotationReader::getAll(string_view chromo) 114 | { 115 | vector ret; 116 | d_gas[(string)chromo].visit_all([&ret](const auto& i) { 117 | ret.push_back(i.value); 118 | }); 119 | return ret; 120 | } 121 | 122 | vector GeneAnnotationReader::lookup(string_view chromo, uint64_t pos1) 123 | { 124 | vector ret; 125 | 126 | auto results = d_gas[(string)chromo].findOverlapping(pos1, pos1); 127 | for(const auto& res : results) { 128 | ret.push_back(res.value); 129 | } 130 | return ret; 131 | } 132 | 133 | vector GeneAnnotationReader::lookup(string_view chromo, uint64_t pos1, uint64_t pos2) 134 | { 135 | vector ret; 136 | 137 | auto results = d_gas[(string)chromo].findContained(pos1, pos2); 138 | for(const auto& res : results) { 139 | ret.push_back(res.value); 140 | } 141 | return ret; 142 | } 143 | 144 | 145 | void GeneAnnotationReader::parseGenBank(const std::string& fname) 146 | { 147 | FILE* fp=fopen(fname.c_str(), "rb"); 148 | if(!fp) 149 | throw runtime_error("Unable to open '"+fname+"' for gene annotation reading"); 150 | 151 | string line; 152 | 153 | // 154 | while(stringfgets(fp, &line)) { 155 | if(line.find("FEATURES") == 0) 156 | break; 157 | } 158 | 159 | string genbank; 160 | while(stringfgets(fp, &line)) { 161 | if(!isspace(line[0])) 162 | break; 163 | 164 | boost::trim_right(line); 165 | 166 | genbank+=line+"\n"; 167 | } 168 | // d_gas[""]=parseGenBankString(genbank); 169 | } 170 | -------------------------------------------------------------------------------- /genbankparser.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "geneannotated.hh" 9 | using std::cout; 10 | using std::endl; 11 | 12 | namespace { 13 | 14 | struct State 15 | { 16 | void clear() 17 | { 18 | startLocus = stopLocus = 0; 19 | strand = true; 20 | features.clear(); 21 | } 22 | uint32_t startLocus; 23 | uint32_t stopLocus; 24 | bool strand; 25 | std::string kind; 26 | std::vector > features; 27 | } state; 28 | 29 | std::vector g_ret; 30 | 31 | void reportKind(const std::string& kind) 32 | { 33 | if(!state.kind.empty()) { 34 | GeneAnnotation ga; 35 | ga.startPos = state.startLocus; 36 | ga.stopPos = state.stopLocus; 37 | ga.strand = state.strand; 38 | ga.type=state.kind; 39 | ga.gene=false; 40 | /* 41 | cout<<"Should emit '"<second=std::to_string(val); 86 | } 87 | 88 | void stringValue(const std::string& val) 89 | { 90 | state.features.rbegin()->second=val; 91 | } 92 | 93 | 94 | } 95 | 96 | namespace qi = boost::spirit::qi; 97 | namespace ascii = boost::spirit::ascii; 98 | 99 | 100 | std::vector parseGenBankString(const std::string& bank) 101 | { 102 | auto first = bank.begin(); 103 | auto last=bank.end(); 104 | g_ret.clear(); 105 | 106 | using qi::phrase_parse; 107 | using qi::lit; 108 | using qi::lexeme; 109 | using qi::alpha; 110 | using qi::char_; 111 | using qi::int_; 112 | using ascii::space; 113 | 114 | qi::rule quoted_string, unquoted_string, number_range, unquoted_allcaps_string; 115 | quoted_string %= lexeme['"' >> +(char_ - '"') >> '"']; 116 | unquoted_string %= lexeme[+(alpha | char_('_'))]; 117 | number_range %= lexeme[-char_('<') >> int_[startLocus] >> lit("..") >> -char_('>') >> int_[stopLocus]]; 118 | 119 | unquoted_allcaps_string = lexeme[+char_('A','Z')]; 120 | 121 | qi::rule base_range; 122 | 123 | base_range %= (number_range) | 124 | (lit("complement(")[complement] >> number_range >> char_(')')) | 125 | (lit("order(") >> *(number_range >> -char_(',') ) >> lit(")") ) | 126 | (lit("join(") >> *(number_range >> -char_(',') ) >> lit(")") ) | 127 | (lit("complement(order(")[complement] >> *(number_range >> -char_(',') ) >> lit("))") ) | 128 | (lit("complement(join(")[complement] >> *(number_range >> -char_(',') ) >> lit("))")); 129 | 130 | bool r=phrase_parse( 131 | first, 132 | last, 133 | *((unquoted_string[reportKind] >> 134 | base_range 135 | >> *(char_('/') >> ( 136 | (lit("transl_except=(pos:") >> base_range >> char_(',') >> lit("aa:") >> unquoted_string >> lit(")")) | 137 | (lit("anticodon=(pos:") >> base_range >> char_(',')>>lit("aa:") >> unquoted_string >> char_(',') >> lit("seq:") >> unquoted_string >> lit(")")) | 138 | (unquoted_string[variable] >> -(char_('=') >> (int_[value] | quoted_string[stringValue] | unquoted_allcaps_string ))) 139 | 140 | ) 141 | ) 142 | ) 143 | ) 144 | 145 | 146 | , 147 | space /*< the skip-parser >*/ 148 | ); 149 | if (!r || first != last) {// fail if we did not get a full match 150 | cout<<"Failed at: '"<startPos<<"'"<a link 6 | The configuration dict with CSS class and width is optional - default is class .pup and width of 200px. 7 | You can style the popup box via CSS, targeting its ID #pup. 8 | You can escape " in the popup text with ". 9 | Tutorial and support at http://nicolashoening.de?twocents&nr=8 10 | -------------------------------------------------------------------------- 11 | */ 12 | 13 | nhpup = { 14 | 15 | pup: null, // This is out popup box, represented by a div 16 | identifier: "pup", // Name of ID and class of the popup box 17 | minMargin: 15, // Set how much minimal space there should be (in pixels) 18 | // between the popup and everything else (borders, mouse) 19 | default_width: 200, // Will be set to width from css in document.ready 20 | move: false, // Move it around with the mouse? we are only ready for that when the mouse event is set up. 21 | // Besides, having this turned off intially is resource-friendly. 22 | 23 | /* 24 | Write message, show popup w/ custom width if necessary, 25 | make sure it disappears on mouseout 26 | */ 27 | popup: function(p_msg, p_config) 28 | { 29 | // do track mouse moves and update position 30 | this.move = true; 31 | // restore defaults 32 | this.pup.removeClass() 33 | .addClass(this.identifier) 34 | .width(this.default_width); 35 | 36 | // custom configuration 37 | if (typeof p_config != 'undefined') { 38 | if ('class' in p_config) { 39 | this.pup.addClass(p_config['class']); 40 | } 41 | if ('width' in p_config) { 42 | this.pup.width(p_config['width']); 43 | } 44 | } 45 | 46 | // Write content and display 47 | this.pup.html(p_msg).show(); 48 | 49 | // Make sure popup goes away on mouse out and we stop the constant 50 | // positioning on mouse moves. 51 | // The event obj needs to be gotten from the virtual 52 | // caller, since we use onmouseover='nhpup.popup(p_msg)' 53 | var t = this.getTarget(arguments.callee.caller.arguments[0]); 54 | $(t).unbind('mouseout').bind('mouseout', 55 | function(e){ 56 | nhpup.pup.hide(); 57 | nhpup.move = false; 58 | } 59 | ); 60 | }, 61 | 62 | // set the target element position 63 | setElementPos: function(x, y) 64 | { 65 | // Call nudge to avoid edge overflow. Important tweak: x+10, because if 66 | // the popup is where the mouse is, the hoverOver/hoverOut events flicker 67 | var x_y = this.nudge(x + 10, y); 68 | // remember: the popup is still hidden 69 | this.pup.css('top', x_y[1] + 'px') 70 | .css('left', x_y[0] + 'px'); 71 | }, 72 | 73 | /* Avoid edge overflow */ 74 | nudge: function(x,y) 75 | { 76 | var win = $(window); 77 | 78 | // When the mouse is too far on the right, put window to the left 79 | var xtreme = $(document).scrollLeft() + win.width() - this.pup.width() - this.minMargin; 80 | if(x > xtreme) { 81 | x -= this.pup.width() + 2 * this.minMargin; 82 | } 83 | x = this.max(x, 0); 84 | 85 | // When the mouse is too far down, move window up 86 | if((y + this.pup.height()) > (win.height() + $(document).scrollTop())) { 87 | y -= this.pup.height() + this.minMargin; 88 | } 89 | 90 | return [ x, y ]; 91 | }, 92 | 93 | /* custom max */ 94 | max: function(a,b) 95 | { 96 | if (a>b) return a; 97 | else return b; 98 | }, 99 | 100 | /* 101 | Get the target (element) of an event. 102 | Inspired by quirksmode 103 | */ 104 | getTarget: function(e) 105 | { 106 | var targ; 107 | if (!e) var e = window.event; 108 | if (e.target) targ = e.target; 109 | else if (e.srcElement) targ = e.srcElement; 110 | if (targ.nodeType == 3) // defeat Safari bug 111 | targ = targ.parentNode; 112 | return targ; 113 | } 114 | 115 | }; 116 | 117 | 118 | /* Prepare popup and define the mouseover callback */ 119 | jQuery(document).ready(function(){ 120 | // create default popup on the page 121 | $('body').append(''); 122 | nhpup.pup = $('#' + nhpup.identifier); 123 | 124 | // set dynamic coords when the mouse moves 125 | $(document).mousemove(function(e){ 126 | if (nhpup.move){ 127 | nhpup.setElementPos(e.pageX, e.pageY); 128 | } 129 | }); 130 | }); 131 | -------------------------------------------------------------------------------- /tclap/XorHandler.h: -------------------------------------------------------------------------------- 1 | 2 | /****************************************************************************** 3 | * 4 | * file: XorHandler.h 5 | * 6 | * Copyright (c) 2003, Michael E. Smoot . 7 | * Copyright (c) 2004, Michael E. Smoot, Daniel Aarno. 8 | * All rights reverved. 9 | * 10 | * See the file COPYING in the top directory of this distribution for 11 | * more information. 12 | * 13 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 19 | * DEALINGS IN THE SOFTWARE. 20 | * 21 | *****************************************************************************/ 22 | 23 | #ifndef TCLAP_XORHANDLER_H 24 | #define TCLAP_XORHANDLER_H 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | namespace TCLAP { 33 | 34 | /** 35 | * This class handles lists of Arg's that are to be XOR'd on the command 36 | * line. This is used by CmdLine and you shouldn't ever use it. 37 | */ 38 | class XorHandler 39 | { 40 | protected: 41 | 42 | /** 43 | * The list of of lists of Arg's to be or'd together. 44 | */ 45 | std::vector< std::vector > _orList; 46 | 47 | public: 48 | 49 | /** 50 | * Constructor. Does nothing. 51 | */ 52 | XorHandler( ) : _orList(std::vector< std::vector >()) {} 53 | 54 | /** 55 | * Add a list of Arg*'s that will be orred together. 56 | * \param ors - list of Arg* that will be xor'd. 57 | */ 58 | void add( std::vector& ors ); 59 | 60 | /** 61 | * Checks whether the specified Arg is in one of the xor lists and 62 | * if it does match one, returns the size of the xor list that the 63 | * Arg matched. If the Arg matches, then it also sets the rest of 64 | * the Arg's in the list. You shouldn't use this. 65 | * \param a - The Arg to be checked. 66 | */ 67 | int check( const Arg* a ); 68 | 69 | /** 70 | * Returns the XOR specific short usage. 71 | */ 72 | std::string shortUsage(); 73 | 74 | /** 75 | * Prints the XOR specific long usage. 76 | * \param os - Stream to print to. 77 | */ 78 | void printLongUsage(std::ostream& os); 79 | 80 | /** 81 | * Simply checks whether the Arg is contained in one of the arg 82 | * lists. 83 | * \param a - The Arg to be checked. 84 | */ 85 | bool contains( const Arg* a ); 86 | 87 | std::vector< std::vector >& getXorList(); 88 | 89 | }; 90 | 91 | 92 | ////////////////////////////////////////////////////////////////////// 93 | //BEGIN XOR.cpp 94 | ////////////////////////////////////////////////////////////////////// 95 | inline void XorHandler::add( std::vector& ors ) 96 | { 97 | _orList.push_back( ors ); 98 | } 99 | 100 | inline int XorHandler::check( const Arg* a ) 101 | { 102 | // iterate over each XOR list 103 | for ( int i = 0; static_cast(i) < _orList.size(); i++ ) 104 | { 105 | // if the XOR list contains the arg.. 106 | ArgVectorIterator ait = std::find( _orList[i].begin(), 107 | _orList[i].end(), a ); 108 | if ( ait != _orList[i].end() ) 109 | { 110 | // first check to see if a mutually exclusive switch 111 | // has not already been set 112 | for ( ArgVectorIterator it = _orList[i].begin(); 113 | it != _orList[i].end(); 114 | it++ ) 115 | if ( a != (*it) && (*it)->isSet() ) 116 | throw(CmdLineParseException( 117 | "Mutually exclusive argument already set!", 118 | (*it)->toString())); 119 | 120 | // go through and set each arg that is not a 121 | for ( ArgVectorIterator it = _orList[i].begin(); 122 | it != _orList[i].end(); 123 | it++ ) 124 | if ( a != (*it) ) 125 | (*it)->xorSet(); 126 | 127 | // return the number of required args that have now been set 128 | if ( (*ait)->allowMore() ) 129 | return 0; 130 | else 131 | return static_cast(_orList[i].size()); 132 | } 133 | } 134 | 135 | if ( a->isRequired() ) 136 | return 1; 137 | else 138 | return 0; 139 | } 140 | 141 | inline bool XorHandler::contains( const Arg* a ) 142 | { 143 | for ( int i = 0; static_cast(i) < _orList.size(); i++ ) 144 | for ( ArgVectorIterator it = _orList[i].begin(); 145 | it != _orList[i].end(); 146 | it++ ) 147 | if ( a == (*it) ) 148 | return true; 149 | 150 | return false; 151 | } 152 | 153 | inline std::vector< std::vector >& XorHandler::getXorList() 154 | { 155 | return _orList; 156 | } 157 | 158 | 159 | 160 | ////////////////////////////////////////////////////////////////////// 161 | //END XOR.cpp 162 | ////////////////////////////////////////////////////////////////////// 163 | 164 | } //namespace TCLAP 165 | 166 | #endif 167 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | -include sysdeps/$(shell uname).inc 2 | 3 | VERSION=0.1 4 | CXXFLAGS?=-Wall -O3 -fPIC -I/usr/include/python3.6m -ggdb -I. -Iext -Iext/libmba -MMD -MP -pthread $(CXX2014FLAGS) -Wno-strict-aliasing # -Wno-unused-local-typedefs 5 | CFLAGS=-Wall -I. -Iext/libmba -O3 -MMD -MP 6 | LDFLAGS=$(CXX2014FLAGS) -pthread # -Wl,-Bstatic -lstdc++ -lgcc -lz -Wl,-Bdynamic -static-libgcc -lm -lc 7 | CHEAT_ARG := $(shell ./update-git-hash-if-necessary) 8 | 9 | SHIPPROGRAMS=antonie 16ssearcher stitcher fqgrep pfqgrep genex 10 | PROGRAMS=$(SHIPPROGRAMS) digisplice gffedit gfflookup nwunsch fogsaa gtfreader 11 | 12 | ifeq ($(CC),clang) 13 | CXXFLAGS+=-ftemplate-depth=1000 14 | endif 15 | 16 | all: $(PROGRAMS) 17 | 18 | -include *.d 19 | 20 | .PHONY: antonie.exe codedocs/html/index.html check 21 | 22 | MBA_OBJECTS = ext/libmba/allocator.o ext/libmba/diff.o ext/libmba/msgno.o ext/libmba/suba.o ext/libmba/varray.o 23 | ANTONIE_OBJECTS = antonie.o refgenome.o hash.o geneannotated.o misc.o fastq.o saminfra.o dnamisc.o githash.o phi-x174.o zstuff.o genbankparser.o $(MBA_OBJECTS) 24 | 25 | dino: dino.o 26 | $(CXX) $^ -o $@ 27 | 28 | strdiff: strdiff.o $(MBA_OBJECTS) 29 | $(CC) strdiff.o $(MBA_OBJECTS) -o $@ 30 | 31 | antonie: $(ANTONIE_OBJECTS) 32 | $(CXX) $(ANTONIE_OBJECTS) $(LDFLAGS) $(STATICFLAGS) -lz -o $@ 33 | 34 | SEARCHER_OBJECTS=16ssearcher.o hash.o misc.o fastq.o zstuff.o githash.o fastqindex.o stitchalg.o 35 | 36 | 16ssearcher: $(SEARCHER_OBJECTS) 37 | $(CXX) $(SEARCHER_OBJECTS) -lz $(LDFLAGS) $(STATICFLAGS) -o $@ 38 | 39 | digisplice: digisplice.o refgenome.o misc.o fastq.o hash.o zstuff.o dnamisc.o geneannotated.o genbankparser.o 40 | $(CXX) $(LDFLAGS) $^ -lz $(STATICFLAGS) -o $@ 41 | 42 | stitcher: stitcher.o refgenome.o misc.o fastq.o hash.o zstuff.o dnamisc.o geneannotated.o genbankparser.o fastqindex.o stitchalg.o 43 | $(CXX) $(LDFLAGS) $^ -lz -pthread $(STATICFLAGS) -o $@ 44 | 45 | #renovo: renovo.o refgenome.o misc.o fastq.o hash.o zstuff.o dnamisc.o geneannotated.o genbankparser.o fastqindex.o stitchalg.o 46 | # $(CXX) $(LDFLAGS) $^ -lz -pthread $(STATICFLAGS) -o $@ 47 | 48 | 49 | libbridge.so: bridge.o 50 | g++ -shared -Wl,-soname,"libhello.so" bridge.o -lboost_python3 -fpic -o libbridge.so 51 | 52 | 53 | invert: invert.o misc.o 54 | $(CXX) $(LDFLAGS) $(STATICFLAGS) $^ -o $@ 55 | 56 | fqgrep: fqgrep.o misc.o fastq.o dnamisc.o zstuff.o hash.o 57 | $(CXX) $(LDFLAGS) $^ -lz $(STATICFLAGS) -o $@ 58 | 59 | pfqgrep: pfqgrep.o misc.o fastq.o dnamisc.o zstuff.o hash.o 60 | $(CXX) $(LDFLAGS) $^ -lz $(STATICFLAGS) -o $@ 61 | 62 | genex: genex.o dnamisc.o zstuff.o misc.o hash.o nucstore.o refgenome2.o 63 | $(CXX) $(LDFLAGS) $^ -lz $(STATICFLAGS) -pthread -o $@ 64 | 65 | correlo: correlo.o dnamisc.o zstuff.o misc.o hash.o nucstore.o refgenome2.o 66 | $(CXX) $(LDFLAGS) $^ -lz $(STATICFLAGS) -pthread -lbz2 -o $@ 67 | 68 | 69 | gffedit: gffedit.o refgenome.o fastq.o dnamisc.o zstuff.o misc.o hash.o 70 | $(CXX) $(LDFLAGS) $^ -lz $(STATICFLAGS) -o $@ 71 | 72 | gfflookup: gfflookup.o geneannotated.o genbankparser.o refgenome2.o nucstore.o fastq.o dnamisc.o zstuff.o misc.o hash.o 73 | $(CXX) $(LDFLAGS) $^ -lz $(STATICFLAGS) -o $@ 74 | 75 | gtfreader: gtfreader.o geneannotated.o genbankparser.o refgenome2.o nucstore.o fastq.o dnamisc.o zstuff.o misc.o hash.o 76 | $(CXX) $(LDFLAGS) $^ -lz $(STATICFLAGS) -o $@ 77 | 78 | 79 | gendump: gendump.o geneannotated.o genbankparser.o refgenome2.o nucstore.o fastq.o dnamisc.o zstuff.o misc.o hash.o 80 | $(CXX) $(LDFLAGS) $^ -lz $(STATICFLAGS) -o $@ 81 | 82 | 83 | nwunsch: nwunsch.o 84 | $(CXX) $(LDFLAGS) $^ -lz $(STATICFLAGS) -o $@ 85 | 86 | fogsaa: fogsaaimp.o 87 | $(CXX) $(LDFLAGS) $^ -lz $(STATICFLAGS) -o $@ 88 | 89 | 90 | install: antonie 91 | mkdir -p $(DESTDIR)/usr/bin/ 92 | mkdir -p $(DESTDIR)/usr/share/doc/antonie/ 93 | mkdir -p $(DESTDIR)/usr/share/doc/antonie/ext 94 | install -s $(SHIPPROGRAMS) $(DESTDIR)/usr/bin/ 95 | cp report.html $(DESTDIR)/usr/share/doc/antonie 96 | cp -r ext/html $(DESTDIR)/usr/share/doc/antonie/ext 97 | 98 | clean: 99 | rm -f *~ *.o $(MBA_OBJECTS) *.d $(PROGRAMS) githash.h 100 | 101 | package: all 102 | rm -rf dist 103 | DESTDIR=dist make install 104 | fpm -s dir -f -t rpm -n antonie -v 1.g$(shell cat githash) -C dist . 105 | fpm -s dir -f -t deb -n antonie -v 1.g$(shell cat githash) -C dist . 106 | rm -rf dist 107 | 108 | codedocs: codedocs/html/index.html 109 | 110 | codedocs/html/index.html: 111 | doxygen 112 | 113 | antonie.exe: 114 | make clean 115 | STATICFLAGS="-static -static-libgcc -static-libstdc++" CXX=i686-w64-mingw32-g++ CC=i686-w64-mingw32-gcc make antonie 116 | mv antonie antonie.exe 117 | 118 | 16ssearcher.exe: 119 | make clean 120 | CXXFLAGS="-Wall -O3 -I. -Iext/libmba -MMD -MP $(CXX2014FLAGS)" STATICFLAGS="-static -static-libgcc -static-libstdc++" CXX=i686-w64-mingw32-g++ CC=i686-w64-mingw32-gcc make 16ssearcher 121 | mv 16ssearcher 16ssearcher.exe 122 | 123 | check: testrunner 124 | ./testrunner 125 | 126 | testrunner: test-misc_hh.o test-nucstore_cc.o test-dnamisc_cc.o test-saminfra_cc.o testrunner.o misc.o dnamisc.o saminfra.o zstuff.o fastq.o hash.o nucstore.o 127 | $(CXX) $^ -lboost_unit_test_framework -lz -o $@ 128 | -------------------------------------------------------------------------------- /dnamisc.cc: -------------------------------------------------------------------------------- 1 | #include "dnamisc.hh" 2 | #include "antonie.hh" 3 | #include 4 | #include 5 | #include 6 | extern "C" { 7 | #include "hash.h" 8 | } 9 | #include 10 | #include 11 | dnapos_t dnanpos = (dnapos_t) -1; 12 | using std::vector; 13 | using std::runtime_error; 14 | 15 | double getGCContent(const std::string& str) 16 | { 17 | dnapos_t aCount{0}, cCount{0}, gCount{0}, tCount{0}, nCount{0}; 18 | for(auto c : str) { 19 | if(c=='A') ++aCount; 20 | else if(c=='C') ++cCount; 21 | else if(c=='G') ++gCount; 22 | else if(c=='T') ++tCount; 23 | else if(c=='N') ++nCount; 24 | } 25 | dnapos_t total = cCount + gCount + aCount + tCount + nCount; 26 | if(!total) 27 | return 0.0; 28 | return 1.0*(cCount + gCount)/(1.0*total); 29 | } 30 | 31 | double qToErr(unsigned int i) 32 | { 33 | static vector answers; 34 | 35 | if(answers.empty()) { 36 | for(int n = 0; n < 60 ; ++n) { 37 | answers.push_back(pow(10.0, -n/10.0)); 38 | } 39 | } 40 | if(i > answers.size()) { 41 | throw runtime_error("Can't calculate error rate for Q "+boost::lexical_cast(i)); 42 | } 43 | 44 | return answers[i]; 45 | } 46 | 47 | uint32_t kmerMapper(const std::string& str, int offset, int unsigned len) 48 | { 49 | uint32_t ret=0; 50 | const char *c=str.c_str() + offset; 51 | std::string::size_type val; 52 | for(std::string::size_type i = 0; i != len; ++i, ++c) { 53 | ret<<=2; 54 | if(*c=='A') val=0; 55 | else if(*c=='C') val=1; 56 | else if(*c=='G') val=2; 57 | else if(*c=='T') val=3; 58 | else 59 | continue; 60 | 61 | ret |= val; 62 | } 63 | return ret; 64 | } 65 | 66 | const char* AminoAcidName(char c) 67 | { 68 | switch(c) { 69 | case 'T': 70 | return "Threonine"; 71 | case 'F': 72 | return "Phenylanaline"; 73 | case 'L': 74 | return "Leucine"; 75 | case 'I': 76 | return "Isoleucine"; 77 | case 'M': 78 | return "Methionine"; 79 | case 'V': 80 | return "Valine"; 81 | case 'S': 82 | return "Serine"; 83 | case 'P': 84 | return "Proline"; 85 | case 'A': 86 | return "Alanine"; 87 | case 'Y': 88 | return "Tyrosine"; 89 | case 's': 90 | return "Stop"; 91 | case 'H': 92 | return "Histidine"; 93 | case 'Q': 94 | return "Glutamine"; 95 | case 'N': 96 | return "Asparagine"; 97 | case 'K': 98 | return "Lysine"; 99 | case 'D': 100 | return "Aspartic Acid"; 101 | case 'E': 102 | return "Glutamic Acid"; 103 | case 'C': 104 | return "Cysteine"; 105 | case 'W': 106 | return "Tryptophan"; 107 | case 'R': 108 | return "Arganine"; 109 | case 'G': 110 | return "Glycine"; 111 | } 112 | return "?"; 113 | 114 | } 115 | 116 | char DNAToAminoAcid(const char* s) 117 | { 118 | char a=*s++; 119 | char b=*s++; 120 | char c=*s; 121 | if(a=='T') { 122 | if(b=='T') { 123 | if(c=='T' || c=='C') 124 | return 'F'; 125 | else 126 | return 'L'; 127 | } 128 | if(b=='C') 129 | return 'S'; 130 | if(b=='A') { 131 | if(c=='T' || c=='C') 132 | return 'Y'; 133 | else 134 | return 's'; 135 | } 136 | if(b=='G') { 137 | if(c=='T' || c=='C') 138 | return 'C'; 139 | else if(c=='A') 140 | return 's'; 141 | else if(c=='G') 142 | return 'W'; 143 | } 144 | } 145 | else if(a=='C') { 146 | if(b=='T') 147 | return 'L'; 148 | if(b=='C') 149 | return 'P'; 150 | if(b=='A') { 151 | if (c=='T' || c=='C') 152 | return 'H'; 153 | else 154 | return 'Q'; 155 | } 156 | if(b=='G') 157 | return 'R'; 158 | } 159 | else if(a=='A') { 160 | if(b=='T') { 161 | if(c=='G') 162 | return 'M'; 163 | else 164 | return 'I'; 165 | } 166 | if(b=='C') 167 | return 'T'; 168 | else if(b=='A') { 169 | if(c=='T' || c=='C') 170 | return 'N'; 171 | else 172 | return 'K'; 173 | } 174 | else if(b=='G') { 175 | if(c=='T' || c=='C') 176 | return 'S'; 177 | else 178 | return 'R'; 179 | } 180 | } 181 | else if(a=='G') { 182 | if(b=='T') 183 | return 'V'; 184 | else if(b=='C') 185 | return 'A'; 186 | else if(b=='A') { 187 | if(c=='T' || c=='C') 188 | return 'D'; 189 | else 190 | return 'E'; 191 | } 192 | else if(b=='G') 193 | return 'G'; 194 | } 195 | return '?'; 196 | } 197 | 198 | void DuplicateCounter::feedString(const std::string& str) 199 | { 200 | uint32_t hashval = qhash(str.c_str(), str.length(), 0); 201 | d_hashes.push_back(hashval); 202 | } 203 | 204 | DuplicateCounter::counts_t DuplicateCounter::getCounts() 205 | { 206 | counts_t ret; 207 | sort(d_hashes.begin(), d_hashes.end()); 208 | uint64_t repeatCount=1; 209 | for(auto iter = next(d_hashes.begin()) ; iter != d_hashes.end(); ++iter) { 210 | if(*prev(iter) != *iter) { 211 | ret[std::min(repeatCount, (decltype(repeatCount))20)]+=repeatCount; 212 | repeatCount=1; 213 | } 214 | else 215 | repeatCount++; 216 | } 217 | ret[repeatCount]+=repeatCount; 218 | return ret; 219 | } 220 | 221 | void DuplicateCounter::clear() 222 | { 223 | d_hashes.clear(); 224 | d_hashes.shrink_to_fit(); 225 | } 226 | -------------------------------------------------------------------------------- /tclap/StandardTraits.h: -------------------------------------------------------------------------------- 1 | // -*- Mode: c++; c-basic-offset: 4; tab-width: 4; -*- 2 | 3 | /****************************************************************************** 4 | * 5 | * file: StandardTraits.h 6 | * 7 | * Copyright (c) 2007, Daniel Aarno, Michael E. Smoot . 8 | * All rights reverved. 9 | * 10 | * See the file COPYING in the top directory of this distribution for 11 | * more information. 12 | * 13 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 19 | * DEALINGS IN THE SOFTWARE. 20 | * 21 | *****************************************************************************/ 22 | 23 | // This is an internal tclap file, you should probably not have to 24 | // include this directly 25 | 26 | #ifndef TCLAP_STANDARD_TRAITS_H 27 | #define TCLAP_STANDARD_TRAITS_H 28 | 29 | #ifdef HAVE_CONFIG_H 30 | #include // To check for long long 31 | #endif 32 | 33 | // If Microsoft has already typedef'd wchar_t as an unsigned 34 | // short, then compiles will break because it's as if we're 35 | // creating ArgTraits twice for unsigned short. Thus... 36 | #ifdef _MSC_VER 37 | #ifndef _NATIVE_WCHAR_T_DEFINED 38 | #define TCLAP_DONT_DECLARE_WCHAR_T_ARGTRAITS 39 | #endif 40 | #endif 41 | 42 | namespace TCLAP { 43 | 44 | // ====================================================================== 45 | // Integer types 46 | // ====================================================================== 47 | 48 | /** 49 | * longs have value-like semantics. 50 | */ 51 | template<> 52 | struct ArgTraits { 53 | typedef ValueLike ValueCategory; 54 | }; 55 | 56 | /** 57 | * ints have value-like semantics. 58 | */ 59 | template<> 60 | struct ArgTraits { 61 | typedef ValueLike ValueCategory; 62 | }; 63 | 64 | /** 65 | * shorts have value-like semantics. 66 | */ 67 | template<> 68 | struct ArgTraits { 69 | typedef ValueLike ValueCategory; 70 | }; 71 | 72 | /** 73 | * chars have value-like semantics. 74 | */ 75 | template<> 76 | struct ArgTraits { 77 | typedef ValueLike ValueCategory; 78 | }; 79 | 80 | #ifdef HAVE_LONG_LONG 81 | /** 82 | * long longs have value-like semantics. 83 | */ 84 | template<> 85 | struct ArgTraits { 86 | typedef ValueLike ValueCategory; 87 | }; 88 | #endif 89 | 90 | // ====================================================================== 91 | // Unsigned integer types 92 | // ====================================================================== 93 | 94 | /** 95 | * unsigned longs have value-like semantics. 96 | */ 97 | template<> 98 | struct ArgTraits { 99 | typedef ValueLike ValueCategory; 100 | }; 101 | 102 | /** 103 | * unsigned ints have value-like semantics. 104 | */ 105 | template<> 106 | struct ArgTraits { 107 | typedef ValueLike ValueCategory; 108 | }; 109 | 110 | /** 111 | * unsigned shorts have value-like semantics. 112 | */ 113 | template<> 114 | struct ArgTraits { 115 | typedef ValueLike ValueCategory; 116 | }; 117 | 118 | /** 119 | * unsigned chars have value-like semantics. 120 | */ 121 | template<> 122 | struct ArgTraits { 123 | typedef ValueLike ValueCategory; 124 | }; 125 | 126 | // Microsoft implements size_t awkwardly. 127 | #if defined(_MSC_VER) && defined(_M_X64) 128 | /** 129 | * size_ts have value-like semantics. 130 | */ 131 | template<> 132 | struct ArgTraits { 133 | typedef ValueLike ValueCategory; 134 | }; 135 | #endif 136 | 137 | 138 | #ifdef HAVE_LONG_LONG 139 | /** 140 | * unsigned long longs have value-like semantics. 141 | */ 142 | template<> 143 | struct ArgTraits { 144 | typedef ValueLike ValueCategory; 145 | }; 146 | #endif 147 | 148 | // ====================================================================== 149 | // Float types 150 | // ====================================================================== 151 | 152 | /** 153 | * floats have value-like semantics. 154 | */ 155 | template<> 156 | struct ArgTraits { 157 | typedef ValueLike ValueCategory; 158 | }; 159 | 160 | /** 161 | * doubles have value-like semantics. 162 | */ 163 | template<> 164 | struct ArgTraits { 165 | typedef ValueLike ValueCategory; 166 | }; 167 | 168 | // ====================================================================== 169 | // Other types 170 | // ====================================================================== 171 | 172 | /** 173 | * bools have value-like semantics. 174 | */ 175 | template<> 176 | struct ArgTraits { 177 | typedef ValueLike ValueCategory; 178 | }; 179 | 180 | 181 | /** 182 | * wchar_ts have value-like semantics. 183 | */ 184 | #ifndef TCLAP_DONT_DECLARE_WCHAR_T_ARGTRAITS 185 | template<> 186 | struct ArgTraits { 187 | typedef ValueLike ValueCategory; 188 | }; 189 | #endif 190 | 191 | /** 192 | * Strings have string like argument traits. 193 | */ 194 | template<> 195 | struct ArgTraits { 196 | typedef StringLike ValueCategory; 197 | }; 198 | 199 | template 200 | void SetString(T &dst, const std::string &src) 201 | { 202 | dst = src; 203 | } 204 | 205 | } // namespace 206 | 207 | #endif 208 | 209 | -------------------------------------------------------------------------------- /stitchalg.cc: -------------------------------------------------------------------------------- 1 | #include "fastqindex.hh" 2 | #include 3 | #include 4 | #include 5 | #include "stitchalg.hh" 6 | 7 | using namespace std; 8 | 9 | int dnaDiff(const std::string& a, const std::string& b) 10 | { 11 | if(a==b) 12 | return 0; 13 | // cout<<"A: "< > scores{ 42 | {aCount, &aCount}, 43 | {cCount, &cCount}, 44 | {gCount, &gCount}, 45 | {tCount, &tCount}}; 46 | sort(scores.begin(), scores.end()); 47 | auto& best = scores[3].second; 48 | if(best == &aCount) 49 | return 'A'; 50 | else if(best == &cCount) 51 | return 'C'; 52 | else if(best == &gCount) 53 | return 'G'; 54 | else 55 | return 'T'; 56 | } 57 | int getDepth() 58 | { 59 | if(!aCount && !cCount && !gCount && !tCount) 60 | return 0; 61 | return max({aCount, cCount, gCount, tCount}); 62 | } 63 | 64 | void feed(char c, int amount=1) 65 | { 66 | if(c=='A') 67 | aCount+=amount; 68 | else if(c=='C') 69 | cCount+=amount; 70 | else if(c=='G') 71 | gCount+=amount; 72 | else if(c=='T') 73 | tCount+=amount; 74 | } 75 | }; 76 | 77 | 78 | 79 | string doStitch(const map > >& fhpos, const std::string& startseed_, 80 | const std::string& endseed, unsigned int maxlen, int chunklen, bool verbose) 81 | { 82 | string startseed(startseed_); 83 | if(verbose) { 84 | cout << "Startseed: "< totcoverage; 106 | for(;;) { 107 | vector > story; 108 | story.push_back(make_pair(startseed, string(startseed.size(), (char)40))); 109 | 110 | for(unsigned int n=0; n < startseed.size() - chunklen;++n) { 111 | string part=startseed.substr(n, chunklen); 112 | auto matches = getConsensusMatches(part, fhpos, chunklen); 113 | for(auto& match : matches) { 114 | int diff = dnaDiff(startseed.substr(n), match.d_nucleotides); 115 | matchesConsidered++; 116 | if(diff < 5) { 117 | if(verbose) 118 | cout << string(offset,'-')< consensus; 126 | consensus.resize(startseed.size()*1.5); 127 | for(unsigned int n = 0 ; n < consensus.size(); ++n) { 128 | for(const auto& candidate : story) { 129 | if(n < candidate.first.size()) 130 | consensus[n].feed(candidate.first[n], candidate.second[n]); 131 | } 132 | } 133 | string newconsensus; 134 | if(verbose) 135 | cout< maxlen) { 160 | cout<<"Terminated: \n"<gi|216019|gb|J02482.1|PX1CG Coliphage phi-X174, complete genome 2 | GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTT 3 | GATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAA 4 | ATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTG 5 | TCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTA 6 | GATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC 7 | TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTT 8 | TCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTT 9 | CGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCT 10 | TGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCG 11 | TCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTAC 12 | GGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA 13 | CGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCGGAAGGAG 14 | TGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACT 15 | AAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGC 16 | CCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCA 17 | TCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGAC 18 | TCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTA 19 | CTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAA 20 | GGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTT 21 | GGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACA 22 | ACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGC 23 | TCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTT 24 | TCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGC 25 | ATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCATGATGTTATTTCTTCATTTGGAGGTAAAAC 26 | CTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTT 27 | GATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGC 28 | CGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGAC 29 | TAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTG 30 | TATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGT 31 | TTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGA 32 | AGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGAT 33 | TATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTT 34 | ATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAAC 35 | GCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGC 36 | TTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGT 37 | TCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTA 38 | TATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTG 39 | TCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGC 40 | CTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTG 41 | AATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGC 42 | CGGGCAATAACGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGT 43 | TTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTG 44 | CTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAA 45 | AGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCT 46 | GGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTG 47 | GTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGA 48 | TAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTAT 49 | CTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGG 50 | TTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGA 51 | GATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGAC 52 | CAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTA 53 | TGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCA 54 | AACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGAC 55 | TTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTT 56 | CTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGA 57 | TACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCG 58 | TCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTT 59 | CTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTAT 60 | TGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGC 61 | ATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATG 62 | TTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGA 63 | ATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGG 64 | GACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCC 65 | CTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATT 66 | GCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTGCTATTCAGCGTTTGATGAATGCAATGCGACAG 67 | GCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTT 68 | ATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCG 69 | CAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGC 70 | CGTCTTCATTTCCATGCGGTGCACTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTC 71 | GTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCAT 72 | CGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAG 73 | CCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATA 74 | TGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACT 75 | TCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTG 76 | TCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGC 77 | AGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACC 78 | TGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA)"}; 79 | -------------------------------------------------------------------------------- /ext/libmba/varray.c: -------------------------------------------------------------------------------- 1 | /* varray - a variable sized array 2 | * Copyright (c) 2003 Michael B. Allen 3 | * 4 | * The MIT License 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the "Software"), 8 | * to deal in the Software without restriction, including without limitation 9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 | * and/or sell copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included 14 | * in all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | * OTHER DEALINGS IN THE SOFTWARE. 23 | */ 24 | 25 | #include 26 | #include 27 | #include 28 | 29 | #include "mba/iterator.h" 30 | #include "mba/varray.h" 31 | #include "mba/msgno.h" 32 | 33 | #define VAAL(va) ((struct allocator *)((va)->al ? (char *)(va) - (ptrdiff_t)(va)->al : NULL)) 34 | #define BINSIZ(i) ((i) ? 1 << ((i) + (VARRAY_INIT_SIZE - 1)) : (1 << VARRAY_INIT_SIZE)) 35 | 36 | int 37 | varray_init(struct varray *va, size_t membsize, struct allocator *al) 38 | { 39 | if (va == NULL || membsize == 0) { 40 | PMNO(errno = EINVAL); 41 | return -1; 42 | } 43 | 44 | memset(va, 0, sizeof *va); 45 | va->size = membsize; 46 | if (al && al->tail) { /* al is a suba allocator */ 47 | va->al = (char *)va - (char *)al; 48 | } 49 | 50 | return 0; 51 | } 52 | int 53 | varray_deinit(struct varray *va) 54 | { 55 | if (varray_release(va, 0) == -1) { 56 | AMSG(""); 57 | return -1; 58 | } 59 | return 0; 60 | } 61 | struct varray * 62 | varray_new(size_t membsize, struct allocator *al) 63 | { 64 | struct varray *va; 65 | 66 | if ((va = allocator_alloc(al, sizeof *va, 0)) == NULL) { 67 | AMSG(""); 68 | return NULL; 69 | } 70 | if (varray_init(va, membsize, al) == -1) { 71 | AMSG(""); 72 | allocator_free(al, va); 73 | return NULL; 74 | } 75 | 76 | return va; 77 | } 78 | int 79 | varray_del(void *va) 80 | { 81 | int ret = 0; 82 | 83 | if (va) { 84 | ret += varray_release(va, 0); 85 | ret += allocator_free(VAAL((struct varray *)va), va); 86 | } 87 | 88 | if (ret) { 89 | AMSG(""); 90 | return -1; 91 | } 92 | 93 | return 0; 94 | } 95 | int 96 | varray_release(struct varray *va, unsigned int from) 97 | { 98 | unsigned int r, i; 99 | int ret = 0; 100 | 101 | if (va == NULL) { 102 | return 0; 103 | } 104 | 105 | r = (1 << VARRAY_INIT_SIZE); 106 | for (i = 0; i < 16; i++) { 107 | if (from <= r) { 108 | break; 109 | } 110 | r *= 2; 111 | } 112 | if (from != 0) i++; 113 | for ( ; i < 16; i++) { 114 | if (va->bins[i]) { 115 | struct allocator *al = VAAL(va); 116 | ret += allocator_free(al, ALADR(al, va->bins[i])); 117 | va->bins[i] = 0; 118 | } 119 | } 120 | 121 | if (ret) { 122 | AMSG(""); 123 | return -1; 124 | } 125 | 126 | return 0; 127 | } 128 | void * 129 | varray_get(struct varray *va, unsigned int idx) 130 | { 131 | unsigned int r, i, n; 132 | struct allocator *al; 133 | 134 | if (va == NULL) { 135 | PMNO(errno = EINVAL); 136 | return NULL; 137 | } 138 | 139 | r = (1 << VARRAY_INIT_SIZE); /* First and second bins hold 32 then 64,128,256,... */ 140 | for (i = 0; i < 16; i++) { 141 | if (r > idx) { 142 | break; 143 | } 144 | r *= 2; 145 | } 146 | if (i == 16) { 147 | PMNO(errno = ERANGE); 148 | return NULL; 149 | } 150 | 151 | al = VAAL(va); 152 | n = BINSIZ(i); /* n is nmemb in bin i */ 153 | 154 | if (va->bins[i] == 0) { 155 | char *mem = allocator_alloc(al, n * va->size, 1); 156 | if (mem == NULL) { 157 | AMSG(""); 158 | return NULL; 159 | } 160 | va->bins[i] = ALREF(al, mem); 161 | } 162 | 163 | return (char *)ALADR(al, va->bins[i]) + (i ? idx - n : idx) * va->size; 164 | } 165 | int 166 | varray_index(struct varray *va, void *elem) 167 | { 168 | ref_t er = ALREF(VAAL(va), elem); 169 | int i; 170 | 171 | for (i = 0; i < 16; i++) { 172 | if (va->bins[i]) { 173 | size_t n = BINSIZ(i); 174 | ref_t start = va->bins[i]; 175 | ref_t end = start + n * va->size; 176 | if (er >= start && er < end) { 177 | return (i ? n : 0) + ((er - start) / va->size); 178 | } 179 | } 180 | } 181 | 182 | PMNO(errno = EFAULT); 183 | return -1; 184 | } 185 | void 186 | varray_iterate(void *va, iter_t *iter) 187 | { 188 | if (va && iter) { 189 | iter->i1 = iter->i2 = 0; 190 | } 191 | } 192 | void * 193 | varray_next(void *va0, iter_t *iter) 194 | { 195 | struct varray *va = va0; 196 | unsigned int n; 197 | 198 | if (va == NULL || iter == NULL) { 199 | PMNO(errno = EINVAL); 200 | return NULL; 201 | } 202 | 203 | /* n is nmemb in iter->i1 */ 204 | n = iter->i1 == 0 ? (1 << VARRAY_INIT_SIZE) : 205 | 1 << (iter->i1 + (VARRAY_INIT_SIZE - 1)); 206 | 207 | if (iter->i2 == n) { 208 | iter->i2 = 0; 209 | iter->i1++; 210 | } 211 | while (va->bins[iter->i1] == 0) { 212 | iter->i1++; 213 | if (iter->i1 == 16) { 214 | return NULL; 215 | } 216 | } 217 | 218 | return (char *)ALADR(VAAL(va), va->bins[iter->i1]) + iter->i2++ * va->size; 219 | } 220 | 221 | -------------------------------------------------------------------------------- /tclap/ArgException.h: -------------------------------------------------------------------------------- 1 | // -*- Mode: c++; c-basic-offset: 4; tab-width: 4; -*- 2 | 3 | /****************************************************************************** 4 | * 5 | * file: ArgException.h 6 | * 7 | * Copyright (c) 2003, Michael E. Smoot . 8 | * All rights reverved. 9 | * 10 | * See the file COPYING in the top directory of this distribution for 11 | * more information. 12 | * 13 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 19 | * DEALINGS IN THE SOFTWARE. 20 | * 21 | *****************************************************************************/ 22 | 23 | 24 | #ifndef TCLAP_ARG_EXCEPTION_H 25 | #define TCLAP_ARG_EXCEPTION_H 26 | 27 | #include 28 | #include 29 | 30 | namespace TCLAP { 31 | 32 | /** 33 | * A simple class that defines and argument exception. Should be caught 34 | * whenever a CmdLine is created and parsed. 35 | */ 36 | class ArgException : public std::exception 37 | { 38 | public: 39 | 40 | /** 41 | * Constructor. 42 | * \param text - The text of the exception. 43 | * \param id - The text identifying the argument source. 44 | * \param td - Text describing the type of ArgException it is. 45 | * of the exception. 46 | */ 47 | ArgException( const std::string& text = "undefined exception", 48 | const std::string& id = "undefined", 49 | const std::string& td = "Generic ArgException") 50 | : std::exception(), 51 | _errorText(text), 52 | _argId( id ), 53 | _typeDescription(td) 54 | { } 55 | 56 | /** 57 | * Destructor. 58 | */ 59 | virtual ~ArgException() throw() { } 60 | 61 | /** 62 | * Returns the error text. 63 | */ 64 | std::string error() const { return ( _errorText ); } 65 | 66 | /** 67 | * Returns the argument id. 68 | */ 69 | std::string argId() const 70 | { 71 | if ( _argId == "undefined" ) 72 | return " "; 73 | else 74 | return ( "Argument: " + _argId ); 75 | } 76 | 77 | /** 78 | * Returns the arg id and error text. 79 | */ 80 | const char* what() const throw() 81 | { 82 | static std::string ex; 83 | ex = _argId + " -- " + _errorText; 84 | return ex.c_str(); 85 | } 86 | 87 | /** 88 | * Returns the type of the exception. Used to explain and distinguish 89 | * between different child exceptions. 90 | */ 91 | std::string typeDescription() const 92 | { 93 | return _typeDescription; 94 | } 95 | 96 | 97 | private: 98 | 99 | /** 100 | * The text of the exception message. 101 | */ 102 | std::string _errorText; 103 | 104 | /** 105 | * The argument related to this exception. 106 | */ 107 | std::string _argId; 108 | 109 | /** 110 | * Describes the type of the exception. Used to distinguish 111 | * between different child exceptions. 112 | */ 113 | std::string _typeDescription; 114 | 115 | }; 116 | 117 | /** 118 | * Thrown from within the child Arg classes when it fails to properly 119 | * parse the argument it has been passed. 120 | */ 121 | class ArgParseException : public ArgException 122 | { 123 | public: 124 | /** 125 | * Constructor. 126 | * \param text - The text of the exception. 127 | * \param id - The text identifying the argument source 128 | * of the exception. 129 | */ 130 | ArgParseException( const std::string& text = "undefined exception", 131 | const std::string& id = "undefined" ) 132 | : ArgException( text, 133 | id, 134 | std::string( "Exception found while parsing " ) + 135 | std::string( "the value the Arg has been passed." )) 136 | { } 137 | }; 138 | 139 | /** 140 | * Thrown from CmdLine when the arguments on the command line are not 141 | * properly specified, e.g. too many arguments, required argument missing, etc. 142 | */ 143 | class CmdLineParseException : public ArgException 144 | { 145 | public: 146 | /** 147 | * Constructor. 148 | * \param text - The text of the exception. 149 | * \param id - The text identifying the argument source 150 | * of the exception. 151 | */ 152 | CmdLineParseException( const std::string& text = "undefined exception", 153 | const std::string& id = "undefined" ) 154 | : ArgException( text, 155 | id, 156 | std::string( "Exception found when the values ") + 157 | std::string( "on the command line do not meet ") + 158 | std::string( "the requirements of the defined ") + 159 | std::string( "Args." )) 160 | { } 161 | }; 162 | 163 | /** 164 | * Thrown from Arg and CmdLine when an Arg is improperly specified, e.g. 165 | * same flag as another Arg, same name, etc. 166 | */ 167 | class SpecificationException : public ArgException 168 | { 169 | public: 170 | /** 171 | * Constructor. 172 | * \param text - The text of the exception. 173 | * \param id - The text identifying the argument source 174 | * of the exception. 175 | */ 176 | SpecificationException( const std::string& text = "undefined exception", 177 | const std::string& id = "undefined" ) 178 | : ArgException( text, 179 | id, 180 | std::string("Exception found when an Arg object ")+ 181 | std::string("is improperly defined by the ") + 182 | std::string("developer." )) 183 | { } 184 | 185 | }; 186 | 187 | class ExitException { 188 | public: 189 | ExitException(int estat) : _estat(estat) {} 190 | 191 | int getExitStatus() const { return _estat; } 192 | 193 | private: 194 | int _estat; 195 | }; 196 | 197 | } // namespace TCLAP 198 | 199 | #endif 200 | 201 | -------------------------------------------------------------------------------- /dnamisc.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "antonie.hh" 10 | 11 | extern const char* g_gitHash; 12 | 13 | //! convert a Sanger Q-score into an error probability. Uses a cache to be fast. 14 | double qToErr(unsigned int i); 15 | 16 | //! returns GC fraction of nucleotides in str 17 | double getGCContent(const std::string& str); 18 | 19 | 20 | //! Generic class to cluster objects that are 'close by' 21 | template 22 | class Clusterer 23 | { 24 | public: 25 | //! Cluster objects that are less than 'limit' apart together 26 | explicit Clusterer(int limit) : d_limit(limit) 27 | {} 28 | 29 | //! Feed an object 30 | void feed(const T& t) 31 | { 32 | if(d_clusters.empty() || t.pos - d_clusters.rbegin()->getEnd() > d_limit) { 33 | d_clusters.push_back(cluster()); 34 | } 35 | d_clusters.rbegin()->d_members.push_back(t); 36 | } 37 | 38 | //! Represents a cluster 39 | struct cluster 40 | { 41 | int getBegin() 42 | { 43 | return d_members.begin()->pos; 44 | } 45 | int getEnd() 46 | { 47 | return d_members.rbegin()->pos; 48 | } 49 | int getMiddle() 50 | { 51 | return (getBegin()+getEnd())/2; 52 | } 53 | 54 | std::vector d_members; //!< members of this cluster 55 | }; 56 | 57 | //! number of clusters 58 | unsigned int numClusters() 59 | { 60 | return d_clusters.size(); 61 | } 62 | 63 | //! number of entries 64 | unsigned int numEntries() 65 | { 66 | unsigned int ret=0; 67 | for(const auto& c: d_clusters) 68 | ret += c.d_members.size(); 69 | return ret; 70 | } 71 | 72 | 73 | //! The clusters we made for you 74 | std::vector d_clusters; 75 | private: 76 | unsigned int d_limit; 77 | }; 78 | 79 | typedef std::function acgt_t; 80 | inline void acgtDo(char c, const acgt_t& aDo, 81 | const acgt_t& cDo, const acgt_t& gDo, const acgt_t& tDo) 82 | { 83 | switch(c) { 84 | case 'A': 85 | aDo(); 86 | break; 87 | case 'C': 88 | cDo(); 89 | break; 90 | case 'G': 91 | gDo(); 92 | break; 93 | case 'T': 94 | tDo(); 95 | break; 96 | } 97 | } 98 | 99 | 100 | typedef std::function acgt_t; 101 | inline void acgtxDo(char c, acgt_t aDo, acgt_t cDo, acgt_t gDo, acgt_t tDo, acgt_t xDo) 102 | { 103 | switch(c) { 104 | case 'A': 105 | aDo(); 106 | break; 107 | case 'C': 108 | cDo(); 109 | break; 110 | case 'G': 111 | gDo(); 112 | break; 113 | case 'T': 114 | tDo(); 115 | break; 116 | case 'X': 117 | xDo(); 118 | break; 119 | } 120 | } 121 | 122 | 123 | //! Little utility to pick a random element from a container 124 | template 125 | const typename T::value_type& pickRandom(const T& t) 126 | { 127 | return t[rand() % t.size()]; 128 | } 129 | 130 | inline std::string jsonVectorPair(const std::vector >& in) 131 | { 132 | std::ostringstream str; 133 | str<<"["; 134 | bool first=true; 135 | for(const auto& ent : in) { 136 | if(!first) 137 | str<<","; 138 | first=false; 139 | str<<"["< 152 | std::string jsonVector(const std::vector& v, const std::string& name, 153 | std::function yAdjust = [](dnapos_t d){return 1.0*d;}, 154 | std::function xAdjust = [](int i){return 1.0*i;}) 155 | { 156 | std::ostringstream ret; 157 | ret < 170 | std::string jsonVectorD(const std::vector& v, 171 | std::function yAdjust = [](double d){return d;}, 172 | std::function xAdjust = [](int i){return 1.0*i;}) 173 | { 174 | std::ostringstream ret; 175 | ret<<"["; 176 | for(auto iter = v.begin(); iter != v.end(); ++iter) { 177 | if(iter != v.begin()) 178 | ret<<','; 179 | ret << '[' << std::fixed<< xAdjust(iter - v.begin()) <<','; 180 | ret.unsetf(std::ios_base::floatfield); 181 | ret << yAdjust(*iter)<<']'; 182 | } 183 | ret <<"]"; 184 | return ret.str(); 185 | } 186 | 187 | template 188 | std::string jsonVectorX(const std::vector& v, 189 | std::function xAdjust = [](int i){return i;}) 190 | { 191 | std::ostringstream ret; 192 | ret<<"["; 193 | for(auto iter = v.begin(); iter != v.end(); ++iter) { 194 | if(iter != v.begin()) 195 | ret<<','; 196 | ret << '[' << xAdjust(iter - v.begin()) <<','; 197 | ret << *iter<<']'; 198 | } 199 | ret <<"]"; 200 | return ret.str(); 201 | } 202 | 203 | 204 | //! maps 'len' nucleotides from 'str' at offset offset to a 32 bit string. At most 16 nuclotides therefore! 205 | uint32_t kmerMapper(const std::string& str, int offset, int unsigned len); 206 | 207 | char DNAToAminoAcid(const char* s); 208 | const char* AminoAcidName(char c); 209 | 210 | 211 | //! Very simple duplicate count estimator using a 32 bit hash. Also provides statistics 212 | class DuplicateCounter 213 | { 214 | public: 215 | DuplicateCounter(int estimate=1000000) 216 | { 217 | d_hashes.reserve(estimate); 218 | } 219 | void feedString(const std::string& str); //! do statistics on str 220 | void clear(); //! clean ourselves up 221 | typedef std::map counts_t; 222 | 223 | counts_t getCounts(); //! in position 0, everyone with no duplicates, in position 1 single duplicates etc 224 | private: 225 | std::vector d_hashes; 226 | }; 227 | 228 | 229 | -------------------------------------------------------------------------------- /viewer.js: -------------------------------------------------------------------------------- 1 | 2 | function makeSNPStat() 3 | { 4 | var ret={}; 5 | ret.counts=0; 6 | ret.present=[]; 7 | return ret; 8 | } 9 | 10 | function UpdateTable() 11 | { 12 | var numpools=0, numMuts=0, filteredSNPs=0, nonGene=0; 13 | var snps={}; 14 | var numDiffLimit = document.getElementById("numDiff").value; 15 | var percDiffLimit = document.getElementById("percDiff").value; 16 | var geneFilter = document.getElementById("geneFilter").checked; 17 | var nonsynFilter = document.getElementById("nonsynFilter").checked; 18 | var nonuniversalFilter = document.getElementById("nonuniversalFilter").checked; 19 | var deletesOnly = document.getElementById("deletesOnly").checked; 20 | var insertsOnly = document.getElementById("insertsOnly").checked; 21 | // console.log("The gene filter "+geneFilter); 22 | 23 | $.each(loci, function(pool, poollocus) { 24 | numpools++; 25 | numMuts+= poollocus.length; 26 | $.each(poollocus, function(pos, locus) { 27 | if(geneFilter && !locus.gene) { 28 | nonGene++; 29 | return; 30 | } 31 | if(snps[locus.locus] == undefined) { 32 | snps[locus.locus]=makeSNPStat(); 33 | try { 34 | snps[locus.locus].description = decodeURIComponent(locus.annotation); 35 | } 36 | catch(e) { 37 | snps[locus.locus].description = locus.annotation; 38 | console.log(locus.annotation); 39 | } 40 | } 41 | 42 | snps[locus.locus].count++; 43 | snps[locus.locus].present.push([pool, locus]); 44 | }); 45 | }); 46 | 47 | var distinct=0; 48 | $.each(snps, function() { 49 | distinct++; 50 | }); 51 | 52 | 53 | var table=""; 54 | for(pool in loci) { 55 | table+=(""); 56 | } 57 | var realCount=0; 58 | $.each(snps, function(locus, snp) { 59 | row=""; 60 | var real=false; 61 | var totfound=false; 62 | var universal=snp.present.length > 0; 63 | var totCount=0; 64 | for(pool in loci) { 65 | var found=false; 66 | 67 | for(pos in snp.present) { 68 | if(snp.present[pos][0]==pool) { 69 | var aminoReport = snp.present[pos][1].aminoReport; 70 | 71 | var mouseOver="onmouseover='nhpup.popup(\""; 72 | mouseOver+=snp.present[pos][1].summary+" "; 73 | mouseOver+= "Numdiff: "+snp.present[pos][1].numDiff; 74 | mouseOver+= ", "; 75 | if(snp.present[pos][1].xCount) 76 | mouseOver += snp.present[pos][1].xCount+" deletes, "; 77 | if(snp.present[pos][1].insertReport != "") { 78 | mouseOver += "Inserts: "+snp.present[pos][1].insertReport+", "; 79 | } 80 | var totPresent=snp.present[pos][1].numDiff + snp.present[pos][1].depth; 81 | mouseOver+=(100.0*snp.present[pos][1].numDiff/totPresent).toFixed(2); 82 | mouseOver+= "%, depth: " + totPresent; 83 | mouseOver +=", "+aminoReport; 84 | mouseOver+="\");'"; 85 | 86 | var onClick='onclick="'; 87 | onClick+="drawGraph('" +pool+"',"+ locus + ");"; 88 | onClick+='"'; 89 | 90 | if(nonsynFilter) { 91 | var re=/([A-Z][a-z ]+) -> ([A-Z][a-z ]+) $/; 92 | var result = re.exec(aminoReport); 93 | // console.log(aminoReport +": "+result); 94 | if(result != null && result[1]==result[2]) { 95 | // console.log("Neutral:'"+result[1]+"' '"+result[2]+"'"); 96 | continue; 97 | } 98 | } 99 | if(deletesOnly && snp.present[pos][1].xCount == 0) 100 | continue; 101 | if(insertsOnly && snp.present[pos][1].insertReport=='') 102 | continue; 103 | totCount+=snp.present[pos][1].numDiff; 104 | var percentage = 100.0* snp.present[pos][1].numDiff / snp.present[pos][1].depth; 105 | if(snp.present[pos][1].numDiff < numDiffLimit || percentage < percDiffLimit) 106 | row+=''; 107 | else if(snp.present[pos][1].numDiff > 10 && percentage >= percDiffLimit) { 108 | row+=''; 109 | real=1; 110 | } 111 | else { 112 | row+=""; 113 | real=1; 114 | } 115 | found=true; 116 | break; 117 | } 118 | } 119 | if(!found) { 120 | row+=""; 121 | universal=false; 122 | } 123 | } 124 | 125 | row+=""; 126 | if((real || (totCount > 3*numDiffLimit && totCount > 20)) && (!nonuniversalFilter || !universal)) { 127 | table+=row; 128 | realCount++; 129 | } 130 | }); 131 | 132 | table+=("
LocusDescription"+pool+"
"+locus+""+snp.description+"'+pool+''+pool+'"+pool+"
"); 133 | d3.select("#toctable").html(table); 134 | 135 | var resp = "There are "+numpools+ " pools, "+numMuts+" candidate mutations, " 136 | resp += nonGene + " non-genes, "+realCount+" distinct left"; 137 | d3.select("#log").text(resp); 138 | 139 | return false; 140 | } 141 | 142 | function drawGraph(pool, locus) 143 | { 144 | $("#dialog").dialog("open"); 145 | 146 | var item={}; 147 | $.each(loci[pool], function(key, val) { 148 | if(val.locus==locus) 149 | item=val; 150 | }); 151 | console.log(item); 152 | d3.select("#dialog").html(''); 153 | nv.addGraph((function() { 154 | var chart = nv.models.lineChart() 155 | .options({ 156 | margin: {left: 100, bottom: 75}, 157 | x: function(d,i) { return d.x}, 158 | showXAxis: true, 159 | showYAxis: true, 160 | transitionDuration: 250 161 | }) 162 | .forceY([0]); 163 | 164 | chart.xAxis.axisLabel("Position (p)").tickFormat(d3.format(',d')); 165 | chart.yAxis.axisLabel('Depth').tickFormat(d3.format(',d')); 166 | 167 | var ourdiv=d3.select('#dialog').append('div').attr('class','chart').attr('id',"region"+item); 168 | ourdiv.append('p').html("Pool "+pool+", locus "+locus+", depth "+item.depth+", differences: "+item.numDiff+", fraction forward: "+item.fraction); 169 | ourdiv.append('p').html(item.annotation); 170 | ourdiv.append('p').html("Change summary: "+item.summary); 171 | ourdiv.append('p').html("Amino acid change: "+item.aminoReport+", inserts: "+item.insertReport+", deletes: " + item.xCount); 172 | ourdiv.append('svg').datum(getPoints5(item.graph, "Depth", item.aProb, "aProb", item.cProb, "cProb", item.gProb, "gProb", item.tProb, "tProb", item.xProb, "xProb")).call(chart); 173 | nv.utils.windowResize(chart.update); 174 | 175 | return chart; 176 | })()); 177 | } 178 | UpdateTable(); 179 | -------------------------------------------------------------------------------- /tclap/MultiSwitchArg.h: -------------------------------------------------------------------------------- 1 | 2 | /****************************************************************************** 3 | * 4 | * file: MultiSwitchArg.h 5 | * 6 | * Copyright (c) 2003, Michael E. Smoot . 7 | * Copyright (c) 2004, Michael E. Smoot, Daniel Aarno. 8 | * Copyright (c) 2005, Michael E. Smoot, Daniel Aarno, Erik Zeek. 9 | * All rights reverved. 10 | * 11 | * See the file COPYING in the top directory of this distribution for 12 | * more information. 13 | * 14 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | *****************************************************************************/ 23 | 24 | 25 | #ifndef TCLAP_MULTI_SWITCH_ARG_H 26 | #define TCLAP_MULTI_SWITCH_ARG_H 27 | 28 | #include 29 | #include 30 | 31 | #include 32 | 33 | namespace TCLAP { 34 | 35 | /** 36 | * A multiple switch argument. If the switch is set on the command line, then 37 | * the getValue method will return the number of times the switch appears. 38 | */ 39 | class MultiSwitchArg : public SwitchArg 40 | { 41 | protected: 42 | 43 | /** 44 | * The value of the switch. 45 | */ 46 | int _value; 47 | 48 | /** 49 | * Used to support the reset() method so that ValueArg can be 50 | * reset to their constructed value. 51 | */ 52 | int _default; 53 | 54 | public: 55 | 56 | /** 57 | * MultiSwitchArg constructor. 58 | * \param flag - The one character flag that identifies this 59 | * argument on the command line. 60 | * \param name - A one word name for the argument. Can be 61 | * used as a long flag on the command line. 62 | * \param desc - A description of what the argument is for or 63 | * does. 64 | * \param init - Optional. The initial/default value of this Arg. 65 | * Defaults to 0. 66 | * \param v - An optional visitor. You probably should not 67 | * use this unless you have a very good reason. 68 | */ 69 | MultiSwitchArg(const std::string& flag, 70 | const std::string& name, 71 | const std::string& desc, 72 | int init = 0, 73 | Visitor* v = NULL); 74 | 75 | 76 | /** 77 | * MultiSwitchArg constructor. 78 | * \param flag - The one character flag that identifies this 79 | * argument on the command line. 80 | * \param name - A one word name for the argument. Can be 81 | * used as a long flag on the command line. 82 | * \param desc - A description of what the argument is for or 83 | * does. 84 | * \param parser - A CmdLine parser object to add this Arg to 85 | * \param init - Optional. The initial/default value of this Arg. 86 | * Defaults to 0. 87 | * \param v - An optional visitor. You probably should not 88 | * use this unless you have a very good reason. 89 | */ 90 | MultiSwitchArg(const std::string& flag, 91 | const std::string& name, 92 | const std::string& desc, 93 | CmdLineInterface& parser, 94 | int init = 0, 95 | Visitor* v = NULL); 96 | 97 | 98 | /** 99 | * Handles the processing of the argument. 100 | * This re-implements the SwitchArg version of this method to set the 101 | * _value of the argument appropriately. 102 | * \param i - Pointer the the current argument in the list. 103 | * \param args - Mutable list of strings. Passed 104 | * in from main(). 105 | */ 106 | virtual bool processArg(int* i, std::vector& args); 107 | 108 | /** 109 | * Returns int, the number of times the switch has been set. 110 | */ 111 | int getValue(); 112 | 113 | /** 114 | * Returns the shortID for this Arg. 115 | */ 116 | std::string shortID(const std::string& val) const; 117 | 118 | /** 119 | * Returns the longID for this Arg. 120 | */ 121 | std::string longID(const std::string& val) const; 122 | 123 | void reset(); 124 | 125 | }; 126 | 127 | ////////////////////////////////////////////////////////////////////// 128 | //BEGIN MultiSwitchArg.cpp 129 | ////////////////////////////////////////////////////////////////////// 130 | inline MultiSwitchArg::MultiSwitchArg(const std::string& flag, 131 | const std::string& name, 132 | const std::string& desc, 133 | int init, 134 | Visitor* v ) 135 | : SwitchArg(flag, name, desc, false, v), 136 | _value( init ), 137 | _default( init ) 138 | { } 139 | 140 | inline MultiSwitchArg::MultiSwitchArg(const std::string& flag, 141 | const std::string& name, 142 | const std::string& desc, 143 | CmdLineInterface& parser, 144 | int init, 145 | Visitor* v ) 146 | : SwitchArg(flag, name, desc, false, v), 147 | _value( init ), 148 | _default( init ) 149 | { 150 | parser.add( this ); 151 | } 152 | 153 | inline int MultiSwitchArg::getValue() { return _value; } 154 | 155 | inline bool MultiSwitchArg::processArg(int *i, std::vector& args) 156 | { 157 | if ( _ignoreable && Arg::ignoreRest() ) 158 | return false; 159 | 160 | if ( argMatches( args[*i] )) 161 | { 162 | // so the isSet() method will work 163 | _alreadySet = true; 164 | 165 | // Matched argument: increment value. 166 | ++_value; 167 | 168 | _checkWithVisitor(); 169 | 170 | return true; 171 | } 172 | else if ( combinedSwitchesMatch( args[*i] ) ) 173 | { 174 | // so the isSet() method will work 175 | _alreadySet = true; 176 | 177 | // Matched argument: increment value. 178 | ++_value; 179 | 180 | // Check for more in argument and increment value. 181 | while ( combinedSwitchesMatch( args[*i] ) ) 182 | ++_value; 183 | 184 | _checkWithVisitor(); 185 | 186 | return false; 187 | } 188 | else 189 | return false; 190 | } 191 | 192 | inline std::string 193 | MultiSwitchArg::shortID(const std::string& val) const 194 | { 195 | return Arg::shortID(val) + " ... "; 196 | } 197 | 198 | inline std::string 199 | MultiSwitchArg::longID(const std::string& val) const 200 | { 201 | return Arg::longID(val) + " (accepted multiple times)"; 202 | } 203 | 204 | inline void 205 | MultiSwitchArg::reset() 206 | { 207 | MultiSwitchArg::_value = MultiSwitchArg::_default; 208 | } 209 | 210 | ////////////////////////////////////////////////////////////////////// 211 | //END MultiSwitchArg.cpp 212 | ////////////////////////////////////////////////////////////////////// 213 | 214 | } //namespace TCLAP 215 | 216 | #endif 217 | --------------------------------------------------------------------------------