├── src ├── dawg.rc ├── dawg.ico ├── dawg.png ├── dawg-256.png ├── dawg.desktop ├── include │ └── dawg │ │ ├── details │ │ ├── config.h.cmake │ │ ├── aliases.xmh │ │ ├── cumtree.h │ │ ├── global.xmh │ │ ├── xm.h │ │ ├── dawgma.xmh │ │ └── subst_dna.h │ │ ├── bark.h │ │ ├── utils │ │ ├── specfunc.h │ │ ├── aliastable.h │ │ └── vecio.h │ │ ├── log.h │ │ ├── global.h │ │ ├── utils.h │ │ ├── root.h │ │ ├── ma.h │ │ ├── output.h │ │ ├── wood_parse.h │ │ ├── wood.h │ │ ├── trick.h │ │ ├── rate.h │ │ ├── matic.h │ │ └── trick_parse.h ├── version.h.in.in ├── lib │ ├── meson.build │ ├── models.cpp │ ├── parse.cpp │ └── ma.cpp ├── meson.build ├── dawg.h ├── dawg_app.h ├── dawgarg.xmh └── dawg.cpp ├── tests ├── speed-long-branch.sh ├── speed-short-branch.sh ├── test0.sh ├── Dawg │ └── RunTest.cmake.in ├── speed-long-branch-gamma.sh ├── speed-long-branch-indels.sh ├── test0.dawg ├── test0.fasta ├── CMakeLists.txt ├── Unit │ ├── Dawg │ │ ├── parse_test.cpp │ │ └── testu01.cpp │ ├── CMakeLists.txt │ └── boost_test_helper.h ├── speed-256-gamma.sh └── CheckProcessTest.cmake ├── .clang-format ├── releng ├── txt2c.pl ├── build-rel-unix.sh ├── mingw32.cmake ├── build-rel-win.bat └── releng.cmake ├── examples ├── CMakeLists.txt ├── basic-dna-2.dawg ├── recombination.dawg ├── dna-with-gaps.dawg ├── pseudogene.dawg ├── segments.dawg ├── multiple-models.dawg ├── basic-dna.dawg ├── basic-dna-zero-rate.dawg ├── dna.dawg ├── rna.dawg ├── error.dawg ├── mitochondria.dawg └── aa.dawg ├── utils ├── bal256.R ├── cpp_encodeaa.pl ├── cpp_encodedna.pl ├── paml2dawg │ ├── paml2dawg.R │ ├── lg_LG.PAML.txt │ ├── jtt-dcmut.dat │ ├── dayhoff-dcmut.dat │ ├── dayhoff-molphy.dat │ ├── wag.dat │ └── wagstar.dat ├── cpp_tripletdiff.pl └── cpp_encodetri.pl ├── postflight.sh.in ├── news.txt ├── varrep.pl ├── meson.build ├── .gitignore ├── contrib └── meson.build ├── dawg.sublime-project.in ├── nexus2fasta.pl ├── doc ├── footer.html ├── mainpage.hpp ├── header.html.in └── meson.build ├── Modules ├── cmake_uninstall.cmake.in ├── cmake_ext_boost_bootstrap.cmake.in ├── FindTestU01.cmake ├── DawgDevel.cmake ├── FindLibunwind.cmake ├── CPackBefore.cmake ├── ExternalDep.cmake └── FindGperftools.cmake ├── outsplit.pl ├── changelog.txt └── dawg.txt /src/dawg.rc: -------------------------------------------------------------------------------- 1 | IDI_ICON1 ICON DISCARDABLE "dawg.ico" 2 | -------------------------------------------------------------------------------- /src/dawg.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CartwrightLab/dawg/HEAD/src/dawg.ico -------------------------------------------------------------------------------- /src/dawg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CartwrightLab/dawg/HEAD/src/dawg.png -------------------------------------------------------------------------------- /src/dawg-256.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CartwrightLab/dawg/HEAD/src/dawg-256.png -------------------------------------------------------------------------------- /tests/speed-long-branch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | dawg=$1 4 | 5 | ${dawg} - < /dev/null 6 | Root.Length = 10000000 7 | Tree.Tree = (A:10)B; 8 | Sim.Reps = 10 9 | EOF 10 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | BasedOnStyle: Google 4 | ColumnLimit: 80 5 | IndentCaseLabels: false 6 | IndentWidth: 4 7 | SpaceBeforeParens: Never 8 | ... 9 | -------------------------------------------------------------------------------- /tests/speed-short-branch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | dawg=$1 4 | 5 | ${dawg} - < /dev/null 6 | Root.Length = 100000000 7 | Tree.Tree = (A:0.1)B; 8 | Sim.Reps = 100 9 | EOF 10 | -------------------------------------------------------------------------------- /releng/txt2c.pl: -------------------------------------------------------------------------------- 1 | #! /usr/bin/perl -w 2 | 3 | use strict; 4 | 5 | while(<>) 6 | { 7 | chomp; 8 | s/\s+$//; 9 | s/\\/\\\\/g; 10 | s/"/\\"/g; 11 | print qq("$_\\n" \\\n); 12 | } 13 | print qq(\n); 14 | -------------------------------------------------------------------------------- /tests/test0.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | 3 | : ${DAWG=dawg} 4 | : ${DIFF=diff} 5 | 6 | ${DAWG} ${top_srcdir}/tests/test0.dawg | ${DIFF} - \ 7 | ${top_srcdir}/tests/test0.fasta 8 | result=$? 9 | 10 | exit $result 11 | -------------------------------------------------------------------------------- /examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | INSTALL(FILES 2 | segments.dawg 3 | basic-dna.dawg 4 | basic-dna-2.dawg 5 | multiple-models.dawg 6 | pseudogene.dawg 7 | recombination.dawg 8 | DESTINATION "${CMAKE_DATA_DIR}/examples") 9 | 10 | -------------------------------------------------------------------------------- /tests/Dawg/RunTest.cmake.in: -------------------------------------------------------------------------------- 1 | set(Help-CMD @DAWG_CALL_EXE@ --help) 2 | set(Help-WD ".") 3 | set(Help-RESULT 0) 4 | set(Help-STDERR "Usage:\n dawg") 5 | 6 | include("@CMAKE_CURRENT_SOURCE_DIR@/CheckProcessTest.cmake") 7 | 8 | CheckProcessTests(Dawg.Run Help) -------------------------------------------------------------------------------- /tests/speed-long-branch-gamma.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | dawg=$1 4 | 5 | ${dawg} - < /dev/null 6 | Root.Length = 10000000 7 | Tree.Tree = (A:10)B; 8 | Sim.Reps = 10 9 | Subst.Rate.Model = "gamma" 10 | Subst.Rate.Params = 0.5, 0.0, 16 11 | EOF 12 | -------------------------------------------------------------------------------- /utils/bal256.R: -------------------------------------------------------------------------------- 1 | s <- sprintf("s%X", 0:255) 2 | while(length(s) > 1) { 3 | x <- seq(1,length(s),2) 4 | s <- sprintf("(%s:1,%s:1)", s[x],s[x+1]) 5 | } 6 | s <- sprintf("%s;", s) 7 | f <- file("balanced256.tree") 8 | write(s,f) 9 | close(f) 10 | 11 | 12 | -------------------------------------------------------------------------------- /tests/speed-long-branch-indels.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | dawg=$1 4 | 5 | ${dawg} - < /dev/null 6 | Root.Length = 1000000 7 | Tree.Tree = (A:1)B; 8 | Sim.Reps = 300 9 | Indel.Model.Del = power 10 | Indel.Rate.Del = 0.1 11 | Indel.Params.Del = 1.5, 10000 12 | EOF 13 | -------------------------------------------------------------------------------- /postflight.sh.in: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | app1="$2/@PROJ_BUNDLE_NAME@.app/Contents/MacOS/@PROJ_BUNDLE_NAME@" 3 | app2="$2/@PROJ_BUNDLE_NAME@/bin/dawg" 4 | if [ -x "$app1" ]; then 5 | ln -s "$app1" /usr/bin/dawg 6 | elif [ -x "$app2" ]; then 7 | ln -s "$app2" /usr/bin/dawg 8 | fi 9 | exit 0 10 | 11 | -------------------------------------------------------------------------------- /tests/test0.dawg: -------------------------------------------------------------------------------- 1 | #example0.fud 2 | #simple tree with set seed 3 | #used to check dawg 4 | #because every node is labeled every node is in the output 5 | 6 | Tree = ((A:0.1,B:0.1)D:0.1,C:0.2)R; 7 | Lambda = 0.1 8 | GapModel = "NB" 9 | GapParams = {1,0.5} 10 | Seed = 1000 11 | Format = "Fasta" 12 | 13 | -------------------------------------------------------------------------------- /news.txt: -------------------------------------------------------------------------------- 1 | 04/26/2009 - Version 1.2 Released 2 | 3 | 08/14/2008 - Version 1.1.2 Released 4 | 5 | 05/22/2008 - Version 1.1.1 Released 6 | 7 | 09/05/2006 - Version 1.1 Released 8 | 1-STABLE branch created 9 | 2-CURRENT branch created 10 | 11 | 02/18/2005 - Version 1.0.0 Released 12 | -------------------------------------------------------------------------------- /examples/basic-dna-2.dawg: -------------------------------------------------------------------------------- 1 | # Example: Simulate DNA evolution along a tree 2 | # Identical model as basic-dna.dawg 3 | 4 | Tree.Tree = "((Man:0.1,Monkey:0.1):0.2,Dawg:0.25);" 5 | Subst.Model = "HKY" 6 | Subst.Params = 2.0, 1.0 7 | Subst.Freqs = 0.3, 0.2, 0.2, 0.3 8 | Root.Length = 1000 9 | Sim.Reps = 10 10 | 11 | -------------------------------------------------------------------------------- /src/dawg.desktop: -------------------------------------------------------------------------------- 1 | [Desktop Entry] 2 | Version=1.0 3 | Encoding=UTF-8 4 | Name=Dawg 5 | Comment=Sequence Evolution Simulator 6 | Exec=dawg %f 7 | TryExec=dawg 8 | Icon=dawg 9 | Terminal=true 10 | X-MultipleArgs=false 11 | Type=Application 12 | Categories=Application;Education;Science;Biology; 13 | StartupNotify=false 14 | -------------------------------------------------------------------------------- /examples/recombination.dawg: -------------------------------------------------------------------------------- 1 | # Example: Specifying a recombination event and using autonamed sections 2 | 3 | [[-]] 4 | Subst.Model = jc 5 | Root.Segment = 1 6 | Root.Length = 60 7 | Tree.Tree = ((A:0.02,B:0.02):0.2,(C:0.02):0.2); 8 | 9 | [[-]] 10 | Root.Code = 1 11 | Root.Segment = 0 12 | Root.Length = 60 13 | Tree.Tree = ((A:0.02):0.2,(B:0.02,C:0.02):0.2); 14 | 15 | [[-]] 16 | Root.Segment = 2 17 | 18 | -------------------------------------------------------------------------------- /src/include/dawg/details/config.h.cmake: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef DAWG_DETAILS_CONFIG_H 3 | #define DAWG_DETAILS_CONFIG_H 4 | 5 | #cmakedefine RANDOM_GEN_HEADER_X @RANDOM_GEN_HEADER_X@ 6 | 7 | #ifndef RANDOM_GEN_HEADER 8 | # define RANDOM_GEN_HEADER RANDOM_GEN_HEADER_X 9 | #endif 10 | 11 | #ifndef PACKAGE_STRING 12 | #cmakedefine PACKAGE_STRING "@PACKAGE_STRING@" 13 | #endif 14 | 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /examples/dna-with-gaps.dawg: -------------------------------------------------------------------------------- 1 | # Test DNA evolutionary model 2 | 3 | Tree.Tree = "((A:0.3, B:0.2)C:0.1, (D:0.3, E:0.2)F:0.1)R;" 4 | 5 | Root.Length = 100 6 | 7 | Subst.Model = "GTR" 8 | Subst.Freqs = 0.2, 0.3, 0.3, 0.2 9 | Subst.Params = 2.0, 1.0, 3.0, 1.0, 1.0, 1.0 10 | 11 | 12 | Indel.Model = "GEO" 13 | Indel.Rate = 0.1 14 | Indel.Max = 20 15 | Indel.Params = 0.9 16 | 17 | Sim.Seed = 20240708 18 | Sim.Reps = 10 -------------------------------------------------------------------------------- /src/version.h.in.in: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef DAWG_VERSION_H 3 | #define DAWG_VERSION_H 4 | 5 | #define DAWG_VERSION_STRING "@version_vcs@" 6 | #define DAWG_PACKAGE_STRING "@package_string@" 7 | #define DAWG_VERSION_INTEGER @version_int@ 8 | #define DAWG_VERSION_MAJOR @version_major@ 9 | #define DAWG_VERSION_MINOR @version_minor@ 10 | #define DAWG_VERSION_PATCH @version_patch@ 11 | 12 | #endif // DAWG_VERSION_H 13 | -------------------------------------------------------------------------------- /src/lib/meson.build: -------------------------------------------------------------------------------- 1 | 2 | libdawg_sources = files([ 3 | 'ma.cpp', 4 | 'matic.cpp', 5 | 'models.cpp', 6 | 'output.cpp', 7 | 'parse.cpp' 8 | ]) 9 | 10 | libdawg_deps = [boost_dep, cli_dep, doctest_dep, random_dep, ryml_dep] 11 | 12 | libdawg = static_library('libdawg', [libdawg_sources, version_file], 13 | include_directories : inc, 14 | dependencies : libdawg_deps, 15 | cpp_args : ['-DDOCTEST_CONFIG_DISABLE'] 16 | ) 17 | -------------------------------------------------------------------------------- /src/include/dawg/bark.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef DAWG_BARK_H 3 | #define DAWG_BARK_H 4 | /**************************************************************************** 5 | * Copyright (C) 2009 Reed A. Cartwright, PhD * 6 | ****************************************************************************/ 7 | 8 | #include 9 | 10 | namespace dawg { 11 | 12 | struct bark: virtual boost::exception, virtual std::exception { }; 13 | 14 | } // namespace dawg 15 | 16 | 17 | #endif 18 | 19 | -------------------------------------------------------------------------------- /utils/cpp_encodeaa.pl: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | my @a = qw(A C D E F G H I K L M N P Q R S T V W Y); 5 | my @b = map {ord($_)-ord('0')} @a; 6 | my @q = (63) x 80; 7 | 8 | my $j = 0; 9 | 10 | my @x = (63) x 80; 11 | foreach(@a) { 12 | $x[ord(lc($_))-ord('0')] = $j; 13 | $x[ord($_)-ord('0')] = $j; 14 | $j++; 15 | } 16 | 17 | @x = map { sprintf("% 2s", $_) } @x; 18 | print join(",", @x[ 0..19]) . ",\n" . 19 | join(",", @x[20..39]) . ",\n" . 20 | join(",", @x[40..59]) . ",\n" . 21 | join(",", @x[60..79]) . "\n" 22 | ; 23 | 24 | 25 | -------------------------------------------------------------------------------- /utils/cpp_encodedna.pl: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | my @a = qw(A C G T); 5 | my @b = map {ord($_)-ord('0')} @a; 6 | my @q = (63) x 80; 7 | 8 | my $j = 0; 9 | 10 | my @x = (63) x 80; 11 | foreach(@a) { 12 | $x[ord(lc($_))-ord('0')] = $j; 13 | $x[ord($_)-ord('0')] = $j; 14 | $j++; 15 | } 16 | $x[ord('u')-ord('0')] = 3; 17 | $x[ord('U')-ord('0')] = 3; 18 | 19 | @x = map { sprintf("% 2s", $_) } @x; 20 | print join(",", @x[ 0..19]) . ",\n" . 21 | join(",", @x[20..39]) . ",\n" . 22 | join(",", @x[40..59]) . ",\n" . 23 | join(",", @x[60..79]) . "\n" 24 | ; 25 | 26 | 27 | -------------------------------------------------------------------------------- /varrep.pl: -------------------------------------------------------------------------------- 1 | #! /usr/bin/perl -w -ibak 2 | # Copyright (2005) Reed A. Cartwright. All rights reserved. 3 | # 4 | # varrep.pl is used to substitute varables in files 5 | # 6 | # usage: perl varrep.pl file 7 | # 8 | # right now the only variable supported is #NUM# 9 | # useful for modifying the code blocks of Nexus files produced by Dawg 10 | # 11 | # Distributed under the same license as DAWG 12 | # 13 | 14 | 15 | use strict; 16 | 17 | my %vars = (NUM => 0); 18 | 19 | while(<>) 20 | { 21 | $vars{NUM} = $1 if(/\[DataSet (\d+)\]/); 22 | s/#(\w+)#/$vars{$1}/ge; 23 | print $_; 24 | } -------------------------------------------------------------------------------- /src/lib/models.cpp: -------------------------------------------------------------------------------- 1 | /**************************************************************************** 2 | * Copyright (C) 2012-2013 Reed A. Cartwright, PhD * 3 | ****************************************************************************/ 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include "dawg/subst.h" 10 | 11 | bool dawg::subst_model::create_alias_tables() { 12 | stat_dist_table_.create(&freqs[0], &freqs[64]); 13 | for(std::size_t k = 0; k < 64; ++k) 14 | mutation_table_[k].create(&table[k][0], &table[k][64]); 15 | return true; 16 | } 17 | -------------------------------------------------------------------------------- /tests/test0.fasta: -------------------------------------------------------------------------------- 1 | >A 2 | AGGAACTGGTCACGTTCTGCTATACGTAGTTCACGTGACCGCATTCAGAAAAATGCGACT 3 | AGTATTGTGATCAGGCATATCAGTGACTTGCGGCAGGAGAGC 4 | 5 | >B 6 | AGCAACTTGTCACGTTCTGCGATACGTAGTTCACGTGACCGCATTCACGCAAACACTACT 7 | TGTGCT--GACCATGCATATCAGGGACTTGCGACATGTGGGC 8 | 9 | >C 10 | AGCATGGTGTCAAGTTCTGCGTTCAGTAGTTAACCAGACGGCATTAACGCTAATACATC- 11 | -GTGTT--GACCTACCAACGTACGGACTTGCGTAATGTGGTC 12 | 13 | >D 14 | AGCAACTTGTCACGTTCTGCGATACGTAGTTCACGTGACCGCATTCACGCAAATACTACT 15 | TGTGTT--GACCAGGCATATCAGGGACTTGCGACATGAGGGC 16 | 17 | >R 18 | AGCATGGTGTCAAGTTCTGCGATCCGTAGTTCACGTGACCGCATTAACGCAAATACTACC 19 | TGTGTT--GATCAGGCAACTTAGGGACTTGCGAAATGGGGGC 20 | 21 | -------------------------------------------------------------------------------- /examples/pseudogene.dawg: -------------------------------------------------------------------------------- 1 | # Example: How to simulate using multiple models on a tree. 2 | 3 | # Our phylogeny is ((Man:0.1,Monkey:0.1):0.2,Dawg:0.25). And we want the 4 | # primate lineage to contain a pseudogene. We will thus partititon the 5 | # simulation into multiple sections (tasks). 6 | 7 | # NOT YET IMPLEMENTED 8 | 9 | [[Gene]] 10 | Root.Length = 40 11 | Tree.Tree = (~Primate:0.1,Dawg:0.25); 12 | Subst.Model = codmg 13 | Subst.Params = 0.1, 1, 1, 1, 1, 1, 1 14 | Subst.Freqs = 0.2, 0.3, 0.3, 0.2 15 | 16 | [[Pseudogene]] 17 | Tree.Tree = ((Man:0.1,Monkey:0.1):0.1)~Primate; 18 | Subst.Model = F81 19 | Subst.Freqs = 0.2, 0.3, 0.3, 0.2 20 | 21 | -------------------------------------------------------------------------------- /meson.build: -------------------------------------------------------------------------------- 1 | 2 | # Dawg's Project Configuration 3 | 4 | # Init Project 5 | project('Dawg', 'cpp', 6 | version : '2.0.9000', 7 | license : 'GPL-2.0-or-later', 8 | meson_version : '>=0.58.0', 9 | default_options : [ 'buildtype=debugoptimized', 'cpp_std=c++17' ] 10 | ) 11 | 12 | subdir('contrib') 13 | subdir('src') 14 | subdir('doc') 15 | # subdir('tests') 16 | 17 | configure_file(input : 'dawg.sublime-project.in', 18 | output : 'dawg.sublime-project', 19 | configuration : { 20 | 'source_dir' : meson.project_source_root(), 21 | 'project_name' : meson.project_name(), 22 | 'build_dir' : meson.project_build_root() 23 | } 24 | ) 25 | -------------------------------------------------------------------------------- /src/include/dawg/utils/specfunc.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef DAWG_SPECFUNC_H 3 | #define DAWG_SPECFUNC_H 4 | /**************************************************************************** 5 | * Copyright (C) 2009,2012,2013 Reed A. Cartwright, PhD * 6 | ****************************************************************************/ 7 | 8 | #include 9 | #include 10 | 11 | namespace dawg { 12 | 13 | inline double zeta(double z) { 14 | return boost::math::zeta(z); 15 | } 16 | 17 | inline double beta(double a, double b) { 18 | return boost::math::beta(a,b); 19 | } 20 | 21 | }; 22 | 23 | #endif 24 | 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # KDE stuff 2 | *kdev4* 3 | *kdev4/ 4 | 5 | # intelliJ stuff 6 | .idea/ 7 | 8 | # KDE stuff 9 | *kde* 10 | 11 | # Compiled Object files 12 | *.slo 13 | *.lo 14 | *.o 15 | 16 | # Compiled Dynamic libraries 17 | *.so 18 | 19 | # Compiled Static libraries 20 | *.lai 21 | *.la 22 | *.a 23 | 24 | # Data files 25 | *.glf 26 | *.sam 27 | *.fa 28 | *.fai 29 | 30 | # misc 31 | *~ 32 | *temp* 33 | .DS_* 34 | *.chain 35 | 36 | bk 37 | .gitignore 38 | .Rhistory 39 | 40 | # cmake stuff 41 | CMakeCache.txt 42 | CMakeFiles 43 | Makefile 44 | cmake_install.cmake 45 | install_manifest.txt 46 | src/version.h 47 | src/config.h 48 | 49 | # Ignore the build directory 50 | build/* 51 | builddir/* 52 | 53 | # Ignore workspace 54 | *.sublime-workspace 55 | -------------------------------------------------------------------------------- /contrib/meson.build: -------------------------------------------------------------------------------- 1 | libryml = static_library( 2 | 'ryml', ['rapidyaml/rapidyaml.hpp'], 3 | include_directories : include_directories('rapidyaml') 4 | ) 5 | 6 | ryml_dep = declare_dependency( 7 | include_directories : include_directories('rapidyaml'), 8 | link_with : libryml 9 | ) 10 | 11 | librandom = static_library( 12 | 'random', ['random/random.cpp'], 13 | include_directories : include_directories('random') 14 | ) 15 | 16 | random_dep = declare_dependency( 17 | include_directories : include_directories('random'), 18 | link_with : librandom 19 | ) 20 | 21 | cli_dep = declare_dependency(include_directories : include_directories('CLI')) 22 | 23 | doctest_dep = declare_dependency(include_directories: include_directories('doctest')) 24 | -------------------------------------------------------------------------------- /dawg.sublime-project.in: -------------------------------------------------------------------------------- 1 | { 2 | "folders": 3 | [ 4 | { 5 | "path": "@source_dir@", 6 | "folder_exclude_patterns":["builddir"] 7 | } 8 | ], 9 | "settings": { 10 | "tab_size": 4, 11 | "translate_tabs_to_spaces": true 12 | // "ClangFormat": { 13 | // "format_on_save": true 14 | // } 15 | }, 16 | "build_systems": 17 | [ 18 | { 19 | "name": "@project_name@ - default", 20 | "cmd": ["meson", "compile", "-C", "@build_dir@"], 21 | "working_dir": "${project_path}", 22 | "file_regex": "^(..[^:]*):([0-9]+):?([0-9]+)?:? (.*)$" 23 | }, 24 | { 25 | "name": "@project_name@ - test", 26 | "cmd": ["meson", "test", "-C", "@build_dir@"], 27 | "working_dir": "${project_path}", 28 | "file_regex": "^(..[^:]*):([0-9]+):?([0-9]+)?:? (.*)$" 29 | } 30 | ] 31 | } 32 | -------------------------------------------------------------------------------- /nexus2fasta.pl: -------------------------------------------------------------------------------- 1 | #! /usr/bin/perl -w 2 | # Copyright (2005) Reed A. Cartwright. All rights reserved. 3 | # 4 | # converts nexus sequences to fasta 5 | # 6 | # usage: perl nexus2fasta.pl < infile > outfile 7 | # 8 | # Distributed under the same license as DAWG 9 | # 10 | 11 | 12 | my $state = 0; 13 | 14 | my %seqs = (); 15 | 16 | local $/; 17 | my $text = <>; 18 | 19 | my ($data) = $text =~ /begin\s+data;.+?matrix\s*(.*?);\s*end;/is; 20 | my @lines = split(/\n/, $data); 21 | 22 | foreach(@lines) 23 | { 24 | s/^\s+//; 25 | s/\s+$//; 26 | next unless(/\w/); 27 | my @sec = split(/\s+/, $_); 28 | my $name = shift(@sec); 29 | $seqs{$name} |= ''; 30 | $seqs{$name} .= join('', @sec); 31 | } 32 | 33 | print ">$_\n$seqs{$_}\n\n" foreach(sort(keys(%seqs))); 34 | 35 | -------------------------------------------------------------------------------- /doc/footer.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 12 | 13 | 14 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /examples/segments.dawg: -------------------------------------------------------------------------------- 1 | # Example: Using sections simulate both coding and noncoding sequences 2 | 3 | # Use "_initial_" section to specify tree and reps 4 | #Tree.Tree = "(A:0.3)D;" 5 | Sim.Reps = 5 6 | Root.Segment = 1 7 | #Root.Length = 0 8 | 9 | # "Noncoding" section to control segment 1 of the sequence 10 | [[Noncoding]] 11 | Tree.Tree = "(A:0.3)D;" 12 | Root.Segment = 1 13 | Root.Length = 10 14 | Subst.Model = JC 15 | 16 | # "Coding" section to control segment 2 of the sequence 17 | [[Coding]] 18 | Root.Segment = 2 19 | Root.Length = 10 20 | Subst.Model = codmg 21 | Subst.Params = 0.1, 1, 1, 1, 1, 1, 1 22 | Subst.Freqs = 0.2, 0.3, 0.3, 0.2 23 | # Root.Code = 100 24 | 25 | # Another section to control segment 3. Clone parameters from "Noncoding" 26 | # section. 27 | [[Noncoding2 = Noncoding]] 28 | Root.Segment = 3 29 | -------------------------------------------------------------------------------- /examples/multiple-models.dawg: -------------------------------------------------------------------------------- 1 | # Example: How to simulate using multiple models on a tree. 2 | 3 | # Our phylogeny is ((Man:0.02,Monkey:0.02):0.1,(Katt:0.02,Dawg:0.02):0.1). 4 | # And we want the primate branch to contain a signal of selection. 5 | # We will thus partititon the simulation into multiple sections (tasks). 6 | # 7 | # Node names that begin with ~ are not printed in the alignment. 8 | 9 | Root.Length = 100 10 | Subst.Model = codmg 11 | Subst.Freqs = 0.2, 0.3, 0.3, 0.2 12 | 13 | [[Ancestor]] 14 | Tree.Tree = (~PrimateA:0.01,(Katt:0.02,Dawg:0.02):0.1)~Root; 15 | Subst.Params = 0.1, 1, 1, 1, 1, 1, 1 16 | 17 | [[Adaptation]] 18 | Tree.Tree = (~PrimateB:0.08)~PrimateA; 19 | Subst.Params = 1.1, 1, 1, 1, 1, 1, 1 20 | 21 | 22 | [[Primates]] 23 | Tree.Tree = ((Man:0.02,Monkey:0.02):0.01)~PrimateB; 24 | Subst.Params = 0.1, 1, 1, 1, 1, 1, 1 25 | 26 | -------------------------------------------------------------------------------- /Modules/cmake_uninstall.cmake.in: -------------------------------------------------------------------------------- 1 | if(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") 2 | message(FATAL_ERROR "Cannot find install manifest: '@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt'") 3 | endif() 4 | 5 | file(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files) 6 | string(REPLACE "\n" ";" files "${files}") 7 | 8 | foreach(file ${files}) 9 | set(dfile "$ENV{DESTDIR}${file}") 10 | message(STATUS "Uninstalling: ${dfile}") 11 | if(IS_SYMLINK "${dfile}" OR EXISTS "${dfile}") 12 | execute_process(COMMAND ${CMAKE_COMMAND} -E remove "${dfile}" 13 | OUTPUT_VARIABLE rm_out 14 | RESULT_VARIABLE rm_retval 15 | ) 16 | if(NOT "${rm_retval}" STREQUAL 0) 17 | message(FATAL_ERROR "Problem when removing '${dfile}'") 18 | ENDIF() 19 | else() 20 | message(STATUS "File '${dfile}' does not exist.") 21 | endif() 22 | endforeach() 23 | 24 | -------------------------------------------------------------------------------- /releng/build-rel-unix.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Download and build a package from GitHub 3 | 4 | CMAKE=cmake 5 | umask 077 6 | 7 | # Process command line arguments 8 | build_toolchain= 9 | while getopts t: name; do 10 | case $name in 11 | t) build_toolchain="$OPTARG" ;; 12 | ?) printf "Usage: %s [-t toolchain] [tag]\n" $0 13 | exit 2;; 14 | esac 15 | done 16 | shift `expr $OPTIND - 1` 17 | 18 | # Determine the archive tag 19 | build_archive=${1-current} 20 | build_args="-DRELENG_TAG=${build_archive}" 21 | 22 | # if toolchain is m32 or m64 use the flags and not a toolchain 23 | if [ ! -z "${build_toolchain}" ]; then 24 | case "${build_toolchain}" in 25 | m32|M32) build_args="${build_args} -DRELENG_M32=on" ;; 26 | m64|M64) build_args="${build_args} -DRELENG_M64=on" ;; 27 | *) build_args="${build_args} -DRELENG_TOOLCHAIN=${build_toolchain}" ;; 28 | esac 29 | fi 30 | 31 | $CMAKE $build_args -P releng.cmake 32 | 33 | -------------------------------------------------------------------------------- /doc/mainpage.hpp: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////// 2 | /// \mainpage 3 | /// 4 | /// \section welcome Welcome 5 | /// Welcome to the official DAWG documentation. Here you will find a detailed 6 | /// view of all the DAWG classes and functions.
7 | /// If you are looking for tutorials, you can check out some of the programs in examples 8 | /// directory.. 9 | /// 10 | /// \section example Short example 11 | /// Here is a short example, to show you how simple it is to use DAWG; 12 | /// This example is borrowed from SpartaABC 13 | /// 14 | /// \code 15 | /// 16 | /// #include 17 | /// 18 | /// int main() 19 | /// { 20 | /// // Use the code routines from SpartaABC that uses DAWG 21 | /// 22 | /// 23 | /// return EXIT_SUCCESS; 24 | /// } 25 | /// \endcode 26 | //////////////////////////////////////////////////////////// -------------------------------------------------------------------------------- /src/include/dawg/log.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef DAWG_LOG_H 3 | #define DAWG_LOG_H 4 | /**************************************************************************** 5 | * Copyright (C) 2009 Reed A. Cartwright, PhD * 6 | ****************************************************************************/ 7 | 8 | #include 9 | #include 10 | 11 | #ifdef NDEBUG 12 | # define DAWG_ERROR(err_msg) ((std::cerr << "ERROR: " << err_msg << std::endl), false) 13 | # define DAWG_WARN(warn_msg) ((std::cerr << "WARNING: " << warn_msg << std::endl), false) 14 | #else 15 | # define DAWG_ERROR(err_msg) ((std::cerr << "ERROR: " << err_msg << std::endl \ 16 | << "(" << __FILE__ << ":" << __LINE__ << ")" << std::endl ), false) 17 | # define DAWG_WARN(warn_msg) ((std::cerr << "WARNING: " << warn_msg << std::endl \ 18 | << "(" << __FILE__ << ":" << __LINE__ << ")" << std::endl ), false) 19 | #endif 20 | 21 | #endif /* DAWG_LOG_H */ 22 | -------------------------------------------------------------------------------- /utils/paml2dawg/paml2dawg.R: -------------------------------------------------------------------------------- 1 | aa <- "A R N D C Q E G H I L K M F P S T W Y V" 2 | aa <- strsplit(aa, "\\s+")[[1]] 3 | m <- scan("wag.txt") 4 | ss <- m[1:190] 5 | p <- m[191:210] 6 | 7 | s <- matrix(0,20,20) 8 | s[upper.tri(s)] <- ss 9 | s <- t(s) 10 | s[upper.tri(s)] <- ss 11 | 12 | o <- order(aa) 13 | s <- s[o,][,o] 14 | 15 | ss.new <- s[lower.tri(s)] 16 | 17 | p.new <- p[o] 18 | 19 | m <- matrix(sprintf("%.7f", ss.new),nrow=10) 20 | m <- apply(m,2,paste, collapse=", ") 21 | m <- paste(m, collapse=",\n") 22 | cat(m); cat("\n\n") 23 | 24 | m <- matrix(sprintf("%.7f", p.new),nrow=10) 25 | m <- apply(m,2,paste, collapse=", ") 26 | m <- paste(m, collapse=",\n") 27 | cat(m); cat("\n\n") 28 | 29 | # p2 <- p.new/sum(p.new) 30 | # s2 <- t(p2*s)*p2 31 | # s2 <- s2/sum(s2) 32 | # s2 <- s2/p2 33 | # r2 <- rowSums(s2) 34 | # r3 <- max(r2) 35 | # diag(s2) <- r3-r2 36 | # s2 <- s2/rowSums(s2) 37 | -------------------------------------------------------------------------------- /releng/mingw32.cmake: -------------------------------------------------------------------------------- 1 | # Choose an appropriate compiler prefix 2 | SET(COMPILER_PREFIX i486-mingw32) # Arch Linux 3 | 4 | # the name of the target operating system 5 | SET(CMAKE_SYSTEM_NAME Windows) 6 | SET(CMAKE_SYSTEM_PROCESSOR x86) 7 | 8 | # which compilers to use for C and C++ 9 | find_program(CMAKE_RC_COMPILER NAMES ${COMPILER_PREFIX}-windres) 10 | find_program(CMAKE_C_COMPILER NAMES ${COMPILER_PREFIX}-gcc) 11 | find_program(CMAKE_CXX_COMPILER NAMES ${COMPILER_PREFIX}-g++) 12 | 13 | # here is the target environment located 14 | #SET(USER_ROOT_PATH) 15 | SET(CMAKE_FIND_ROOT_PATH /usr/${CROSS_PREFIX} /usr/local/${CROSS_PREFIX}) 16 | 17 | # adjust the default behaviour of the FIND_XXX() commands: 18 | # search headers and libraries in the target environment, search 19 | # programs in the host environment 20 | set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) 21 | set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) 22 | set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) 23 | 24 | -------------------------------------------------------------------------------- /doc/header.html.in: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | DAWG - DNA Analysis with Gaps 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 18 |
-------------------------------------------------------------------------------- /src/meson.build: -------------------------------------------------------------------------------- 1 | # Process version information 2 | semver = meson.project_version().split('.') 3 | version_src = configure_file( 4 | input : 'version.h.in.in', 5 | output : 'version.h.in', 6 | configuration : { 7 | 'version_major' : semver[0], 8 | 'version_minor' : semver[1], 9 | 'version_patch' : semver[2], 10 | 'version_int' : (semver[0].to_int()*1000+ semver[1].to_int())*1000+ \ 11 | semver[2].to_int(), 12 | 'version_vcs' : '@VCS_TAG@', 13 | 'package_string' : 'dawg ' + '@VCS_TAG@' 14 | }) 15 | 16 | version_file = vcs_tag(input : version_src, output : 'version.h') 17 | 18 | inc = include_directories(['.', 'include']) 19 | 20 | boost_dep = dependency('boost', 21 | version : '>=1.47.0', 22 | required : true 23 | ) 24 | 25 | subdir('lib') 26 | 27 | executable('dawg', ['dawg.cpp', version_file], 28 | include_directories : inc, 29 | link_with : [libdawg], 30 | cpp_args : ['-DDOCTEST_CONFIG_DISABLE'], 31 | dependencies : [libdawg_deps], 32 | install : true 33 | ) 34 | -------------------------------------------------------------------------------- /examples/basic-dna.dawg: -------------------------------------------------------------------------------- 1 | # Example: Simulate DNA evolution along a tree 2 | # See readme.txt for an explanation on the structure of an input file. 3 | # Simulation results are sent to stdout. 4 | 5 | ## Tree Section ################################################################ 6 | # 7 | # Use a constant tree. 8 | 9 | [Tree] 10 | Tree = "((Man:0.1,Monkey:0.1):0.2,Dawg:0.25);" 11 | 12 | ## Subst Section ############################################################### 13 | # 14 | # Use an HKY substitution model with a transition rate of 2.0 and a transversion 15 | # rate of 1.0. Allele frequences are 0.3 A, 0.2 C, 0.2 G, and 0.3 T. 16 | 17 | [Subst] 18 | Model = "HKY" 19 | Params = 2.0, 1.0 20 | Freqs = 0.3, 0.2, 0.2, 0.3 21 | 22 | ## Root Section ################################################################ 23 | # 24 | # Simulate a sequence that is 1000 nt long. 25 | 26 | [Root] 27 | Length = 1000 28 | 29 | ## Sim Section ################################################################# 30 | # 31 | # Simulate 10 alignments 32 | 33 | [Sim] 34 | Reps = 10 35 | 36 | -------------------------------------------------------------------------------- /src/lib/parse.cpp: -------------------------------------------------------------------------------- 1 | /**************************************************************************** 2 | * Copyright (C) 2010 Reed A. Cartwright, PhD * 3 | ****************************************************************************/ 4 | 5 | #if _MSC_VER 6 | #pragma warning(disable : 4127) 7 | #endif 8 | 9 | #include 10 | #include 11 | 12 | #include "dawg/trick_parse.h" 13 | #include "dawg/wood_parse.h" 14 | 15 | using namespace dawg; 16 | 17 | bool trick::parse_file(trick &p, const char *cs) { 18 | bool ret; 19 | if(cs == nullptr || strcmp(cs, "") == 0 || strcmp(cs, "-") == 0) { 20 | ret = p.parse_stream(std::cin); 21 | } else { 22 | std::ifstream is(cs); 23 | if(!is.is_open()) 24 | return DAWG_ERROR("unable to open input file '" << cs << "'"); 25 | ret = p.parse_stream(is); 26 | } 27 | if(!ret) return DAWG_ERROR("unable to parse input '" << cs << "'"); 28 | return true; 29 | } 30 | 31 | bool wood::parse_string(wood &w, const std::string &ss) { return w.parse(ss); } 32 | -------------------------------------------------------------------------------- /Modules/cmake_ext_boost_bootstrap.cmake.in: -------------------------------------------------------------------------------- 1 | set(work "@CMAKE_CURRENT_BINARY_DIR@/@EXT_PREFIX@/boost/src/ext_boost") 2 | set(build "${work}/tools/build/src/engine") 3 | 4 | set(ENV{CC} "@CMAKE_C_COMPILER@") 5 | execute_process( 6 | COMMAND sh "${build}/build.sh" "@EXT_BOOST_BOOTSTRAP_TOOLSET@" 7 | WORKING_DIRECTORY "${build}" 8 | RESULT_VARIABLE result 9 | ERROR_FILE ${work}/bootstrap.log 10 | OUTPUT_FILE ${work}/bootstrap.log 11 | ) 12 | if(result) 13 | MESSAGE(FATAL_ERROR "Unable to boostrap ext_boost. See '${work}/bootstrap.log' for details. Error code: ${result}") 14 | endif() 15 | 16 | execute_process( 17 | COMMAND "${build}/bootstrap/jam0" -d0 -f build.jam "--toolset=@EXT_BOOST_BOOTSTRAP_TOOLSET@" 18 | --toolset-root= --show-locate-target 19 | WORKING_DIRECTORY "${build}" 20 | OUTPUT_VARIABLE output 21 | OUTPUT_STRIP_TRAILING_WHITESPACE 22 | ) 23 | file(COPY "${build}/${output}/b2" DESTINATION "${work}") 24 | 25 | file(WRITE "${work}/tools/build/src/user-config.jam" 26 | "using @EXT_BOOST_CXX_TOOLSET@ : @CMAKE_CXX_COMPILER_VERSION@ : @CMAKE_CXX_COMPILER@ : ;" 27 | ) -------------------------------------------------------------------------------- /examples/basic-dna-zero-rate.dawg: -------------------------------------------------------------------------------- 1 | # Example: Simulate DNA evolution along a tree 2 | # See readme.txt for an explanation on the structure of an input file. 3 | # Simulation results are sent to stdout. 4 | 5 | ## Tree Section ################################################################ 6 | # 7 | # Use a constant tree. 8 | 9 | [Tree] 10 | Tree = "((Man:0.1,Monkey:0.1):0.2,Dawg:0.25);" 11 | 12 | ## Subst Section ############################################################### 13 | # 14 | # Use an HKY substitution model with a transition rate of 2.0 and a transversion 15 | # rate of 1.0. Allele frequences are 0.3 A, 0.2 C, 0.2 G, and 0.3 T. 16 | 17 | [Subst] 18 | Model = "HKY" 19 | Params = 2.0, 1.0 20 | Freqs = 0.3, 0.2, 0.2, 0.3 21 | Rate.Model = "ZERO" 22 | 23 | ## Root Section ################################################################ 24 | # 25 | # Simulate a sequence that is 1000 nt long. 26 | 27 | [Root] 28 | Length = 1000 29 | 30 | ## Sim Section ################################################################# 31 | # 32 | # Simulate 10 alignments 33 | 34 | [Sim] 35 | Reps = 10 -------------------------------------------------------------------------------- /src/include/dawg/global.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef DAWG_GLOBAL_H 3 | #define DAWG_GLOBAL_H 4 | /**************************************************************************** 5 | * Copyright (C) 2010 Reed A. Cartwright, PhD * 6 | ****************************************************************************/ 7 | 8 | #include 9 | #include 10 | 11 | #include "dawg/trick.h" 12 | 13 | namespace dawg { 14 | 15 | struct global_options { 16 | #define XM(name, type, def, desc) type XV(name); 17 | #include "dawg/details/global.xmh" 18 | #undef XM 19 | 20 | global_options() 21 | : 22 | #define XM(name, type, def, desc) XV(name)(def), 23 | #include "dawg/details/global.xmh" 24 | #undef XM 25 | _unused() { 26 | } 27 | 28 | void read_section(const trick::data_type::value_type &sec) { 29 | #define XM(name, type, def, desc) sec.get(XP(name), XV(name)); 30 | #include "dawg/details/global.xmh" 31 | #undef XM 32 | } 33 | 34 | private: 35 | char _unused; 36 | }; 37 | 38 | } // namespace dawg 39 | 40 | #endif // DAWG_GLOBAL_H 41 | -------------------------------------------------------------------------------- /doc/meson.build: -------------------------------------------------------------------------------- 1 | fs = import('fs') 2 | 3 | doxygen = find_program('doxygen', required : false) 4 | 5 | if doxygen.found() 6 | semver = meson.project_version().split('.') 7 | 8 | doxy_conf = configuration_data() 9 | doxy_conf.set('DOXYGEN_INPUT_DIR', meson.project_source_root()) 10 | doxy_conf.set('DOXYGEN_OUTPUT_DIR', meson.current_build_dir()) 11 | doxy_conf.set('DAWG_DESCRIPTION', 'Sequence Evolution Simulator') 12 | doxy_conf.set('SOURCE_DIR', meson.project_source_root()) 13 | doxy_conf.set('DAWG_VERSION_MAJOR', semver[0]) 14 | doxy_conf.set('DAWG_VERSION_MINOR', semver[1]) 15 | doxy_conf.set('DAWG_VERSION_PATCH', semver[2]) 16 | 17 | doxyfile = configure_file( 18 | input : 'doxyfile.in', 19 | output : 'doxyfile', 20 | configuration : doxy_conf, 21 | install : false 22 | ) 23 | 24 | html_header = configure_file( 25 | input : 'header.html.in', 26 | output : 'header.html', 27 | configuration : doxy_conf, 28 | install : false 29 | ) 30 | 31 | custom_target('doc', 32 | command : [doxygen, doxyfile, html_header], 33 | output : ['html'], 34 | build_by_default : false 35 | ) 36 | else 37 | message('Documentation disabled, doxygen not found') 38 | endif 39 | -------------------------------------------------------------------------------- /src/include/dawg/details/aliases.xmh: -------------------------------------------------------------------------------- 1 | /**************************************************************************** 2 | * Copyright (C) 2010 Reed A. Cartwright, PhD * 3 | ****************************************************************************/ 4 | 5 | #include "dawg/details/xm.h" 6 | 7 | /*************************************************************************** 8 | * X-Macro List * 9 | ***************************************************************************/ 10 | 11 | XM((indel)(model), (indel)(model)(ins)) 12 | XM((indel)(model), (indel)(model)(del)) 13 | XM((indel)(params), (indel)(params)(ins)) 14 | XM((indel)(params), (indel)(params)(del)) 15 | XM((indel)(rate), (indel)(rate)(ins)) 16 | XM((indel)(rate), (indel)(rate)(del)) 17 | XM((indel)(max), (indel)(max)(ins)) 18 | XM((indel)(max), (indel)(max)(del)) 19 | 20 | /*************************************************************************** 21 | * cleanup * 22 | ***************************************************************************/ 23 | #include "dawg/details/xm.h" 24 | 25 | -------------------------------------------------------------------------------- /examples/dna.dawg: -------------------------------------------------------------------------------- 1 | # Test dna evaluationary models 2 | 3 | Tree.Scale = 0.99999 4 | Root.Length = 100 5 | Root.Code = 1 6 | Root.Seq = "AATGCTTTTTTTTTTGGCCAA" 7 | Sim.Reps = 10 8 | Sim.Seed = 42 9 | Root.Seg = 0 10 | 11 | [[-]] 12 | Tree.Tree = "(D:0.3, E:0.2)C;" 13 | Subst.Model = "gtr" 14 | Subst.Freqs = 0.2, 0.3, 0.3, 0.2 15 | Subst.Params = 2.0, 1.0, 3.0, 1.0, 1.0, 1.0 16 | Subst.Rate.Model = "ZERO" 17 | Root.Seg = 1 18 | 19 | [[-]] 20 | Tree.Tree = "(A:0.3, B:0.2)C;" 21 | Subst.Model = "jc" 22 | Subst.Rate.Model = "CONST" 23 | Root.Seg = 1 24 | 25 | [[-]] 26 | Tree.Tree = "(F:0.3, G:0.2)C;" 27 | Subst.Model = "k2p" 28 | Subst.Rate.Model = "ZERO" 29 | Root.Seg = 3 30 | 31 | [[-]] 32 | Tree.Tree = "(H:0.3, I:0.2)C;" 33 | Subst.Model = "hky" 34 | Subst.Rate.Model = "ZERO" 35 | Root.Seg = 2 36 | 37 | [[-]] 38 | Tree.Tree = "(J:0.3, K:0.2)C;" 39 | Subst.Model = "f84" 40 | Subst.Rate.Model = "ZERO" 41 | Root.Seg = 5 42 | 43 | [[-]] 44 | Tree.Tree = "(L:0.3, M:0.2)C;" 45 | Subst.Model = "f81" 46 | Subst.Rate.Model = "ZERO" 47 | Root.Seg = 6 48 | 49 | [[-]] 50 | Tree.Tree = "(N:0.3, O:0.2)C;" 51 | Subst.Model = "tn" 52 | Subst.Rate.Model = "ZERO" 53 | Root.Seg = 7 54 | 55 | [[-]] 56 | Tree.Tree = "(P:0.3, Q:0.2)C;" 57 | Subst.Model = "tn-f04" 58 | Subst.Rate.Model = "ZERO" 59 | Root.Seg = 8 60 | -------------------------------------------------------------------------------- /outsplit.pl: -------------------------------------------------------------------------------- 1 | #! /usr/bin/perl -w 2 | # Copyright (2005) Reed A. Cartwright. All rights reserved. 3 | # 4 | # outsplit.pl is used to extract sequences from Fasta and Phylip files 5 | # 6 | # usage: perl -w outsplit.pl 7 | # 8 | # if is "all" it it creates a directory and 9 | # creates a new file for each alignment 10 | # 11 | # Distributed under the same license as DAWG 12 | # 13 | 14 | use strict; 15 | use File::Basename; 16 | use File::Path; 17 | use File::Spec::Functions; 18 | 19 | my ($file, $id) = @ARGV; 20 | 21 | local $/; 22 | 23 | open( FILE, $file) or die("Error opening $file."); 24 | 25 | my $text = ; 26 | 27 | close(FILE); 28 | 29 | my @blocks = split(/\[DataSet \d+\].*\n/, $text); 30 | 31 | if($id ne 'all') 32 | { 33 | print $blocks[$id]; 34 | } 35 | else 36 | { 37 | my ($name,$dir,$ext) = fileparse($file, qr{\..*}); 38 | my $outdir = catdir($dir, $name); 39 | print "Creating directory $outdir\n"; 40 | mkpath($outdir) unless (-d $outdir); 41 | chdir($outdir) or die("Unable to change directory."); 42 | 43 | foreach my $i (1..$#blocks) 44 | { 45 | my $out = "${name}_$i$ext"; 46 | print "Creating file $out\n"; 47 | open(OUT, ">$out") or die("Unable to open file."); 48 | print OUT $blocks[0]; 49 | print OUT $blocks[$i]; 50 | close(OUT); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/include/dawg/utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef DAWG_UTILS_H 3 | #define DAWG_UTILS_H 4 | /**************************************************************************** 5 | * Copyright (C) 2009 Reed A. Cartwright, PhD * 6 | ****************************************************************************/ 7 | 8 | #include 9 | #include 10 | 11 | namespace dawg { 12 | 13 | template 14 | std::size_t key_switch(A &ss, const B (&key)[N]) { 15 | using boost::algorithm::istarts_with; 16 | for(std::size_t i=0;i 24 | inline std::size_t search_binary_cont(It first, It last, const V &v) { 25 | std::size_t r = 0; 26 | for(std::size_t u = (last-first)/2; u > 0; u /= 2) { 27 | if(v >= first[r+u-1]) 28 | r += u; 29 | } 30 | return r; 31 | } 32 | 33 | template 34 | inline std::size_t search_binary_cont(V (&a)[N], const V &v) { 35 | return search_binary_cont(&a[0], &a[N], v); 36 | } 37 | 38 | template 39 | inline T upper_binary(T u) { 40 | u--; 41 | for(unsigned int i=1;i<8*sizeof(u);i*=2) 42 | u |= u >> i; 43 | u++; 44 | return u; 45 | } 46 | 47 | } /* namespace dawg */ 48 | 49 | #endif /* DAWG_UTILS_H */ 50 | 51 | -------------------------------------------------------------------------------- /utils/cpp_tripletdiff.pl: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | my @n = qw(T C A G); 5 | my %n = (T => 0, C => 1, A => 2, G => 3); 6 | my @e = ([-1,0,1,2], [-1,-1,3,4], [-1,-1,-1,5], [-1,-1,-1,-1]); 7 | 8 | my @d; 9 | 10 | my @row = (); 11 | # enumerate over upper triangle of 64x64 codon matrix 12 | foreach my $i(0..62) { 13 | # convert codon to triplet 14 | my @a = @n[int($i/16)%4, int($i/4) % 4, $i % 4]; 15 | foreach my $j(($i+1)..63) { 16 | # convert codon to triplet 17 | my @b = @n[int($j/16)%4, int($j/4) % 4, $j % 4]; 18 | # count differences between codons 19 | my $x = 0; 20 | ++$x if($a[0] ne $b[0]); 21 | ++$x if($a[1] ne $b[1]); 22 | ++$x if($a[2] ne $b[2]); 23 | # if differences are more than one return -1 24 | # else use 5 bits (aabbb) to encode the position of the difference (aa) 25 | # and the substitution type of the difference (bbb) 26 | if($x > 1) { 27 | push(@row, -1); 28 | } elsif($a[0] ne $b[0]) { 29 | push(@row, 0+$e[$n{$a[0]}][$n{$b[0]}]); 30 | } elsif($a[1] ne $b[1]) { 31 | push(@row, 8+$e[$n{$a[1]}][$n{$b[1]}]); 32 | } else { 33 | push(@row, 16+$e[$n{$a[2]}][$n{$b[2]}]); 34 | } 35 | } 36 | } 37 | 38 | @row = map { sprintf("% 2s", $_) } @row; 39 | 40 | my @rrow = (); 41 | my $y = int($#row / 24); 42 | foreach my $r(0..$y) { 43 | my $a = ($r*24); 44 | my $b = (($r+1)*24-1); 45 | $b = @row-1 if($b >= @row); 46 | push(@rrow, join(",", @row[$a..$b])); 47 | } 48 | 49 | print join(",\n", @rrow) . "\n"; 50 | 51 | -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Determine the location of the executables/scripts and any data sets required for full package testing 3 | set(DAWG_EXE ${CMAKE_BINARY_DIR}/src/dawg) 4 | 5 | ############################################################################### 6 | # AddCMakeTest 7 | # A macro to create a test based on a .cmake.in file 8 | # Example: AddCMakeTest(DngCall.Bam "" "Build.dng-call;Build.testdata") 9 | # - Creates a test called DngCall.Bam 10 | # - Makes that test depend on Build.dng-call and Build.testdata 11 | # - Processes the file ./DngCall/BamTest.cmake.in 12 | # - When the test is called, it calls the .cmake script with possible PREARGS and post ARGs 13 | 14 | macro(AddCMakeTest NAME PREARGS DEPENDS) 15 | string(REPLACE "." "/" pathname "${NAME}") 16 | configure_file("${CMAKE_CURRENT_SOURCE_DIR}/${pathname}Test.cmake.in" 17 | "${CMAKE_CURRENT_BINARY_DIR}/${pathname}Test.cmake" @ONLY) 18 | add_test(NAME ${NAME} 19 | COMMAND ${CMAKE_COMMAND} ${PREARGS} 20 | -P "${CMAKE_CURRENT_BINARY_DIR}/${pathname}Test.cmake" ${ARGN}) 21 | if("${DEPENDS}") 22 | set_tests_properties(${NAME} PROPERTIES DEPENDS "${DEPENDS}") 23 | endif() 24 | endmacro() 25 | 26 | ############################################################################### 27 | # Add Tests Here 28 | 29 | add_test(Source.CheckBuildDir ${CMAKE_COMMAND} -E md5sum "${CMAKE_SOURCE_DIR}/build/.gitkeep") 30 | set_tests_properties(Source.CheckBuildDir PROPERTIES 31 | PASS_REGULAR_EXPRESSION "d41d8cd98f00b204e9800998ecf8427e" 32 | ) 33 | 34 | add_subdirectory(Unit) 35 | -------------------------------------------------------------------------------- /examples/rna.dawg: -------------------------------------------------------------------------------- 1 | # Test RNA evolutionary models 2 | 3 | Output.Rna = 1 4 | Output.lowercase = 1 5 | Output.markins = 1 6 | Output.keepempty = 0 7 | Tree.Tree = "(X:0.3, Y:0.2)D;" 8 | Tree.Scale = 0.99999 9 | Root.Length = 100 10 | Root.Code = 2 11 | Root.Seq = "ACGTACGTTTAAAATTAAAAAAAAAAAAATTAAAAAAAAAAAAAAAAAAAAAAAAAAAAATTTTTTTTTGGGGGGGGGGGGGGGCCCCCCCCCCCCAATTA" 12 | Sim.Reps = 4 13 | Sim.Seed = 42 14 | Root.Seg = 0 15 | 16 | [Indel] 17 | Params.Ins = 1.039000, 100 18 | Model.Ins = "GEO" 19 | Rate.Ins = 0.1 20 | Max.Ins = 100.0 21 | Params.Del = 1.039000, 10 22 | Model.Del = "POWER-LAW" 23 | Rate.Del = 0.01 24 | Max.Del = 10.0 25 | 26 | [[-]] 27 | Tree.Tree = "(D:0.3, E:0.2)C;" 28 | Subst.Model = "gtr" 29 | Subst.Freqs = 0.2, 0.3, 0.3, 0.2 30 | Subst.Params = 2.0, 1.0, 3.0, 1.0, 1.0, 1.0 31 | Subst.Rate.Model = "ZERO" 32 | Root.Seg = 1 33 | 34 | [[-]] 35 | Tree.Tree = "(A:0.3, B:0.2)C;" 36 | Subst.Model = "jc" 37 | Subst.Rate.Model = "CONST" 38 | Root.Seg = 1 39 | 40 | [[-]] 41 | Tree.Tree = "(F:0.3, G:0.2)C;" 42 | Subst.Model = "k2p" 43 | Subst.Rate.Model = "ZERO" 44 | Root.Seg = 3 45 | 46 | [[-]] 47 | Tree.Tree = "(H:0.3, I:0.2)C;" 48 | Subst.Model = "hky" 49 | Subst.Rate.Model = "ZERO" 50 | Root.Seg = 2 51 | 52 | [[-]] 53 | Tree.Tree = "(J:0.3, K:0.2)C;" 54 | Subst.Model = "f84" 55 | Subst.Rate.Model = "ZERO" 56 | Root.Seg = 5 57 | 58 | [[-]] 59 | Tree.Tree = "(L:0.3, M:0.2)C;" 60 | Subst.Model = "f81" 61 | Subst.Rate.Model = "ZERO" 62 | Root.Seg = 6 63 | 64 | [[-]] 65 | Tree.Tree = "(N:0.3, O:0.2)C;" 66 | Subst.Model = "tn" 67 | Subst.Rate.Model = "ZERO" 68 | Root.Seg = 7 69 | 70 | [[-]] 71 | Tree.Tree = "(P:0.3, Q:0.2)C;" 72 | Subst.Model = "tn-f04" 73 | Subst.Rate.Model = "ZERO" 74 | Root.Seg = 8 75 | -------------------------------------------------------------------------------- /Modules/FindTestU01.cmake: -------------------------------------------------------------------------------- 1 | # Locate the testu01 library 2 | # 3 | # This module defines the following variables: 4 | # 5 | # TESTU01_LIBRARY the name of the library; 6 | # TESTU01_INCLUDE_DIR where to find testu01 include files. 7 | # TESTU01_FOUND true if both the TESTU01_LIBRARY and TESTU01_INCLUDE_DIR have been found. 8 | # 9 | # To help locate the library and include file, you can define a 10 | # variable called TESTU01_ROOT which points to the root of the TESTU01 library 11 | # installation. 12 | # 13 | 14 | # default search dirs 15 | set( _testu01_HEADER_SEARCH_DIRS 16 | "/usr/include" 17 | "/usr/local/include" 18 | "C:/Program Files (x86)/testu01/include" ) 19 | set( _testu01_LIB_SEARCH_DIRS 20 | "/usr/lib" 21 | "/usr/local/lib" 22 | "C:/Program Files (x86)/testu01/lib-msvc110" ) 23 | 24 | # Check environment for root search directory 25 | set( _testu01_ENV_ROOT $ENV{testu01_ROOT} ) 26 | if( NOT TESTU01_ROOT AND _testu01_ENV_ROOT ) 27 | set(TESTU01_ROOT ${_testu01_ENV_ROOT} ) 28 | endif() 29 | 30 | # Put user specified location at beginning of search 31 | if( TESTU01_ROOT ) 32 | list( INSERT _testu01_HEADER_SEARCH_DIRS 0 "${TESTU01_ROOT}/include" ) 33 | list( INSERT _testu01_LIB_SEARCH_DIRS 0 "${TESTU01_ROOT}/testu01" ) 34 | endif() 35 | 36 | # Search for the header 37 | FIND_PATH(TESTU01_INCLUDE_DIR "TestU01.h" 38 | PATHS ${_testu01_HEADER_SEARCH_DIRS} 39 | PATH_SUFFIXES TestU01) 40 | 41 | # Search for the library 42 | FIND_LIBRARY(TESTU01_LIBRARY NAMES testu01 43 | PATHS ${_testu01_LIB_SEARCH_DIRS} ) 44 | 45 | INCLUDE(FindPackageHandleStandardArgs) 46 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(TESTU01 DEFAULT_MSG 47 | TESTU01_LIBRARY TESTU01_INCLUDE_DIR) -------------------------------------------------------------------------------- /changelog.txt: -------------------------------------------------------------------------------- 1 | Version 1.2 2 | * Migration from Autoconf/Automake to CMake for building. Easier to develop 3 | and build packages for modern operating systems. 4 | * Bug fix for homoplasious insertions. In very large trees (>8000 branches) 5 | the bug can still affect the output in very rare cases. Email me for instructions 6 | on avoiding this limitation. 7 | * All empty columns in the alignment are removed by default. 8 | * New KeepEmpty variable to preserve empty columns. GapPlus is true forces 9 | KeepEmpty to be true. 10 | * New KeepFlank option to control long deletions into flanking regions. 11 | 12 | Version 1.1.2 13 | * Bug fix in reading root sequence from input file 14 | 15 | Version 1.1.1 16 | * More bug fixes in deletion code 17 | 18 | Version 1.1 19 | * Bug fixes in deletion code 20 | * Bug fixes in recombination code 21 | * Portability Fixes 22 | * Clustal default format 23 | * New [Header] format 24 | * New [.SubHeader] format 25 | * Shorter format names: aln, phy, nex, fas, txt, out, poo 26 | * Format detection based on output file name 27 | * Output file can be overriden by "-o filename" flag 28 | * NexusCode depreciated 29 | * Out.Block.Head, Out.Block.Before, Out.Block.After, Out.Block.Tail added 30 | * Varable Substitution enabled for Out.Block.* 31 | * Out.Subst boolean to turn substitution on and off 32 | * Getopt library support 33 | * Quiet mode (disable errors and warnings) 34 | * Output file commandline override 35 | * Similiar to python triple quotes specify a multiline strings. 36 | - Triple double quotes remove initial and final newlines 37 | - Triple single quotes preserve initial and final newlines 38 | * Lambda.pl has been updated. It is still a work in progress. 39 | 40 | Version 1.0 41 | * Initial Release 42 | -------------------------------------------------------------------------------- /Modules/DawgDevel.cmake: -------------------------------------------------------------------------------- 1 | # This CMake File defines several useful developer options 2 | 3 | SET(DAWG_DEVEL_ENABLE_GPERFTOOLS OFF CACHE BOOL "Enable profiling with gperftools.") 4 | 5 | SET(dawg_devel_LIBRARIES) 6 | if(DAWG_DEVEL_ENABLE_GPERFTOOLS) 7 | find_package(Gperftools COMPONENTS profiler) 8 | if(GPERFTOOLS_FOUND) 9 | message(STATUS "DAWG_DEVEL: Profiling with gperftools enabled. Use CPUPROFILE environmental variable to turn on profiling and specify output file.") 10 | set(dawg_devel_LIBRARIES ${dawg_devel_LIBRARIES} GPERFTOOLS::GPERFTOOLS) 11 | else() 12 | message(FATAL_ERROR "Gperftools was not found. Please disable the flag DAWG_DEVEL_ENABLE_GPERFTOOLS and try again.") 13 | endif() 14 | endif() 15 | 16 | SET(DAWG_DEVEL_ENABLE_COVERAGE_REPORT OFF CACHE BOOL "Enable code coverage reporting.") 17 | 18 | if (DAWG_DEVEL_ENABLE_COVERAGE_REPORT) 19 | ## Only compatible with debug builds 20 | if(CMAKE_BUILD_TYPE) 21 | string(TOLOWER "${CMAKE_BUILD_TYPE}" cmake_build_type_tolower) 22 | if(NOT cmake_build_type_tolower STREQUAL "debug") 23 | message(FATAL_ERROR "Unsupported build type \"${CMAKE_BUILD_TYPE}\". DAWG_DEVEL_ENABLE_COVERAGE_REPORT can only be used with a debug build.") 24 | else() 25 | message(STATUS "DAWG_DEVEL: Coverage report enabled.") 26 | SET(COVERAGE_FLAGS --coverage) 27 | SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${COVERAGE_FLAGS}") 28 | SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${COVERAGE_FLAGS}") 29 | SET(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS_DEBUG} ${COVERAGE_FLAGS}") 30 | SET(CMAKE_MODULE_LINKER_FLAGS_DEBUG "${CMAKE_MODULE_LINKER_FLAGS_DEBUG} ${COVERAGE_FLAGS}") 31 | SET(CMAKE_SHARED_LINKER_FLAGS_DEBUG "${CMAKE_SHARED_LINKER_FLAGS_DEBUG} ${COVERAGE_FLAGS}") 32 | endif() 33 | endif() 34 | endif() 35 | -------------------------------------------------------------------------------- /tests/Unit/Dawg/parse_test.cpp: -------------------------------------------------------------------------------- 1 | #define BOOST_TEST_MODULE Dawg::parse_test 2 | #define BOOST_TEST_DYN_LINK 3 | 4 | #include 5 | 6 | #include "../boost_test_helper.h" 7 | //#include 8 | 9 | #include 10 | #include 11 | 12 | BOOST_AUTO_TEST_SUITE(test_examples) 13 | 14 | static std::string dir_prefix = "tests/Unit/Dawg/"; 15 | 16 | BOOST_AUTO_TEST_CASE(test_basic_dna) 17 | { 18 | dawg::trick input; 19 | std::string exampleFile (dir_prefix + "basic-dna.dawg"); 20 | BOOST_CHECK_EQUAL(dawg::trick::parse_file(input, exampleFile.c_str()), true); 21 | } 22 | 23 | BOOST_AUTO_TEST_CASE(test_basic_dna_2) 24 | { 25 | dawg::trick input; 26 | std::string exampleFile (dir_prefix + "basic-dna-2.dawg"); 27 | BOOST_CHECK_EQUAL(dawg::trick::parse_file(input, exampleFile.c_str()), true); 28 | } 29 | 30 | BOOST_AUTO_TEST_CASE(test_basic_dna_zero_rate) 31 | { 32 | dawg::trick input; 33 | std::string exampleFile (dir_prefix + "basic-dna-zero-rate.dawg"); 34 | BOOST_CHECK_EQUAL(dawg::trick::parse_file(input, exampleFile.c_str()), true); 35 | } 36 | 37 | BOOST_AUTO_TEST_CASE(test_multiple_models) 38 | { 39 | dawg::trick input; 40 | std::string exampleFile (dir_prefix + "multiple-models.dawg"); 41 | BOOST_CHECK_EQUAL(dawg::trick::parse_file(input, exampleFile.c_str()), true); 42 | } 43 | 44 | BOOST_AUTO_TEST_CASE(test_recombination) 45 | { 46 | dawg::trick input; 47 | std::string exampleFile (dir_prefix + "recombination.dawg"); 48 | BOOST_CHECK_EQUAL(dawg::trick::parse_file(input, exampleFile.c_str()), true); 49 | } 50 | 51 | BOOST_AUTO_TEST_CASE(test_segments) 52 | { 53 | dawg::trick input; 54 | std::string exampleFile (dir_prefix + "segments.dawg"); 55 | BOOST_CHECK_EQUAL(dawg::trick::parse_file(input, exampleFile.c_str()), true); 56 | } 57 | 58 | BOOST_AUTO_TEST_SUITE_END() -------------------------------------------------------------------------------- /utils/paml2dawg/lg_LG.PAML.txt: -------------------------------------------------------------------------------- 1 | 2 | 0.425093 3 | 0.276818 0.751878 4 | 0.395144 0.123954 5.076149 5 | 2.489084 0.534551 0.528768 0.062556 6 | 0.969894 2.807908 1.695752 0.523386 0.084808 7 | 1.038545 0.363970 0.541712 5.243870 0.003499 4.128591 8 | 2.066040 0.390192 1.437645 0.844926 0.569265 0.267959 0.348847 9 | 0.358858 2.426601 4.509238 0.927114 0.640543 4.813505 0.423881 0.311484 10 | 0.149830 0.126991 0.191503 0.010690 0.320627 0.072854 0.044265 0.008705 0.108882 11 | 0.395337 0.301848 0.068427 0.015076 0.594007 0.582457 0.069673 0.044261 0.366317 4.145067 12 | 0.536518 6.326067 2.145078 0.282959 0.013266 3.234294 1.807177 0.296636 0.697264 0.159069 0.137500 13 | 1.124035 0.484133 0.371004 0.025548 0.893680 1.672569 0.173735 0.139538 0.442472 4.273607 6.312358 0.656604 14 | 0.253701 0.052722 0.089525 0.017416 1.105251 0.035855 0.018811 0.089586 0.682139 1.112727 2.592692 0.023918 1.798853 15 | 1.177651 0.332533 0.161787 0.394456 0.075382 0.624294 0.419409 0.196961 0.508851 0.078281 0.249060 0.390322 0.099849 0.094464 16 | 4.727182 0.858151 4.008358 1.240275 2.784478 1.223828 0.611973 1.739990 0.990012 0.064105 0.182287 0.748683 0.346960 0.361819 1.338132 17 | 2.139501 0.578987 2.000679 0.425860 1.143480 1.080136 0.604545 0.129836 0.584262 1.033739 0.302936 1.136863 2.020366 0.165001 0.571468 6.472279 18 | 0.180717 0.593607 0.045376 0.029890 0.670128 0.236199 0.077852 0.268491 0.597054 0.111660 0.619632 0.049906 0.696175 2.457121 0.095131 0.248862 0.140825 19 | 0.218959 0.314440 0.612025 0.135107 1.165532 0.257336 0.120037 0.054679 5.306834 0.232523 0.299648 0.131932 0.481306 7.803902 0.089613 0.400547 0.245841 3.151815 20 | 2.547870 0.170887 0.083688 0.037967 1.959291 0.210332 0.245034 0.076701 0.119013 10.649107 1.702745 0.185202 1.898718 0.654683 0.296501 0.098369 2.188158 0.189510 0.249313 21 | 22 | 0.079066 0.055941 0.041977 0.053052 0.012937 0.040767 0.071586 0.057337 0.022355 0.062157 0.099081 0.064600 0.022951 0.042302 0.044040 0.061197 0.053287 0.012066 0.034155 0.069147 -------------------------------------------------------------------------------- /src/dawg.h: -------------------------------------------------------------------------------- 1 | // dawg.h - Copyright (c) 2004-2009 Reed A. Cartwright (all rights reserved) 2 | 3 | #ifndef DAWG_DAWG_H 4 | #define DAWG_DAWG_H 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | // Error Reporting 29 | bool DawgError(const char* csErr, ...); //always returns false 30 | bool DawgWarn(const char* csErr, ...); //always returns false 31 | 32 | // Utility functions 33 | #define CERRORR(err_msg) ((std::cerr << "ERROR: " << err_msg << std::endl), false); 34 | #define CERROR(err_msg) (std::cerr << "ERROR: " << err_msg << std::endl); 35 | 36 | bool SetFormat(unsigned int fmt, int nNum, 37 | const char* csHead, const char* csBefore, 38 | const char* csAfter, const char* csTail, 39 | bool bSubst); 40 | void DawgIniOutput(std::ostream& os); 41 | void DawgFinOutput(std::ostream& os); 42 | 43 | // File Formats 44 | const unsigned int FormatFasta = 0; 45 | const unsigned int FormatNexus = 1; 46 | const unsigned int FormatPhylip = 2; 47 | const unsigned int FormatClustal = 3; 48 | 49 | // Output Flags 50 | const unsigned int FlagOutLowerCase = 1; // 00001 51 | const unsigned int FlagOutGapPlus = 2; // 00010 52 | const unsigned int FlagOutGapSingleChar = 4; // 00100 53 | const unsigned int FlagOutTranslate = 8; // 01000 54 | const unsigned int FlagOutKeepEmpty = 16; // 10000 55 | 56 | // Nucleotide Numbers 57 | const int NumAdenine = 0; 58 | const int NumCytosine = 1; 59 | const int NumGuanine = 2; 60 | const int NumThymine = 3; 61 | 62 | template class SumValue 63 | { 64 | private: 65 | Type m_Sum; 66 | public: 67 | SumValue () : m_Sum((Type)0) { } 68 | void operator ( ) ( const Type& elem ) {m_Sum += elem;} 69 | operator Type() const { return m_Sum; } 70 | }; 71 | 72 | #endif 73 | -------------------------------------------------------------------------------- /src/include/dawg/details/cumtree.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef DAWG_DETAILS_CUMTREE_H 3 | #define DAWG_DETAILS_CUMTREE_H 4 | /**************************************************************************** 5 | * Copyright (C) 2009 Reed A. Cartwright, PhD * 6 | ****************************************************************************/ 7 | 8 | #include 9 | #include 10 | 11 | namespace dawg { namespace details { 12 | 13 | template > 14 | class cum_tree { 15 | public: 16 | typedef typename std::vector<_Tp, _Alloc> storage_type; 17 | typedef typename storage_type::size_type size_type; 18 | typedef typename storage_type::value_type value_type; 19 | 20 | cum_tree() : mid(0) { } 21 | 22 | cum_tree(size_type n) : data(upper_bound(n), value_type(0)) { 23 | mid = data.size()/2; 24 | } 25 | 26 | template 27 | cum_tree(It first, It last) { 28 | assign(first,last); 29 | } 30 | 31 | template 32 | void assign(It first, It last) { 33 | data.assign(upper_bound(last-first),value_type(0)); 34 | mid = data.size()/2; 35 | typename storage_type::iterator it = data.begin(); 36 | value_type v = value_type(0); 37 | for(;b != e; ++b) { 38 | v += *b; 39 | *(it++) = v; 40 | } 41 | } 42 | 43 | void increase(size_type u, const value_type& x) { 44 | for(; u < data.size(); ++u) 45 | data[u] += x; 46 | } 47 | 48 | // TODO: Try std::lower_bound 49 | size_type operator()(const value_type& v) const { 50 | size_type r = 0; 51 | for(size_type u = mid; u > 0; u /= 2) { 52 | if(v >= data[r+u-1]) 53 | r += u; 54 | } 55 | return r; 56 | } 57 | 58 | template 59 | size_type operator()(const value_type& v) const { 60 | size_type r = 0; 61 | for(size_type u = n; u > 0; u /= 2) { 62 | if(v >= data[r+u-1]) 63 | r += u; 64 | } 65 | return r; 66 | } 67 | 68 | protected: 69 | size_type upper_bound(size_type n) { 70 | size_type u = 4; 71 | for(;u * 3 | ****************************************************************************/ 4 | 5 | #include "dawg/details/xm.h" 6 | 7 | /*************************************************************************** 8 | * X-Macro List * 9 | * Defines the core model for Dawg, simulate sequences down a tree * 10 | * using substitution and indel models * 11 | * * 12 | * XM(name, type, def, help) * 13 | ***************************************************************************/ 14 | 15 | XM( (output)(block)(head), std::string, , 16 | "Text that will be written to the beginning of output.") 17 | XM( (output)(block)(tail), std::string, , 18 | "Text that will be written to the end of output.") 19 | XM( (output)(block)(before), std::string, , 20 | "Text that will be written before every replicate.") 21 | XM( (output)(block)(after), std::string, , 22 | "Text that will be written after every replicate.") 23 | XM( (output)(block)(between), std::string, , 24 | "Text that will be written between replicates.") 25 | 26 | //XM( (output)(vars), bool, true) 27 | XM( (output)(file), std::string, , 28 | "Path to the output file.") 29 | XM( (output)(split), bool, false, 30 | "Output each replicate to its own file.") 31 | XM( (output)(append), bool, false, 32 | "Append results to existing file.") 33 | XM( (output)(label), bool, false, 34 | "label each simulation with a unique id.") 35 | 36 | XM( (sim)(reps), unsigned int, 1, 37 | "Number of simulation replicates.") 38 | XM( (sim)(seed), std::vector, , 39 | "The seed of the random number generator") 40 | 41 | /*************************************************************************** 42 | * cleanup * 43 | ***************************************************************************/ 44 | #include "dawg/details/xm.h" 45 | -------------------------------------------------------------------------------- /src/lib/ma.cpp: -------------------------------------------------------------------------------- 1 | /**************************************************************************** 2 | * Copyright (C) 2009 Reed A. Cartwright, PhD * 3 | ****************************************************************************/ 4 | #include "dawg/ma.h" 5 | 6 | #include 7 | 8 | #include "dawg/log.h" 9 | #include "dawg/wood.h" 10 | 11 | using namespace dawg; 12 | using namespace std; 13 | 14 | // Read a section, setting and values that correspond to 15 | void dawg::ma::read_section(const trick::data_type::value_type &sec) { 16 | name = sec.name; 17 | #define XM(aname, type, def, desc) sec.get(XP(aname), XV(aname)); 18 | #include 19 | #undef XM 20 | } 21 | 22 | // Reads the trick format into a vector of dawg::mas 23 | // Use inheritance and defaults 24 | bool dawg::ma::from_trick(const trick &trk, vector &v) { 25 | // create lookup map so we can know if something has been touched 26 | typedef map map_t; 27 | map_t lookup; 28 | // we reserve space to preserve pointers 29 | v.reserve(trk.data.size()); 30 | // create default object and put it in the map 31 | const ma def("_default_"); 32 | lookup[def.name] = &def; 33 | 34 | for(trick::data_type::const_iterator secit = trk.data.begin(); 35 | secit != trk.data.end(); ++secit) { 36 | // lookup parent section 37 | map_t::const_iterator iit = lookup.find(secit->inherits); 38 | if(iit == lookup.end()) 39 | return DAWG_ERROR("section '" << secit->inherits 40 | << "' not found (inherited by '" 41 | << secit->name << "')"); 42 | // lookup section 43 | pair me = 44 | lookup.insert(make_pair(secit->name, (dawg::ma *)nullptr)); 45 | if(!me.second) 46 | return DAWG_ERROR("section '" << secit->name 47 | << "' specified more than once."); 48 | 49 | // read inherited ma 50 | v.push_back(*iit->second); 51 | // read the section 52 | v.back().read_section(*secit); 53 | // add section to map 54 | me.first->second = &v.back(); 55 | } 56 | return true; 57 | } 58 | -------------------------------------------------------------------------------- /src/include/dawg/root.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef DAWG_ROOT_H 3 | #define DAWG_ROOT_H 4 | /**************************************************************************** 5 | * Copyright (C) 2010 Reed A. Cartwright, PhD * 6 | ****************************************************************************/ 7 | 8 | #include 9 | #include 10 | 11 | #include "dawg/rate.h" 12 | #include "dawg/residue.h" 13 | #include "dawg/subst.h" 14 | 15 | namespace dawg { 16 | 17 | using random_t = fragmites::random::Random; 18 | 19 | class root_model { 20 | public: 21 | bool create(unsigned int len, sequence &root_seq) { 22 | this->root_seq = root_seq; 23 | root_len = len; 24 | 25 | if(root_seq.empty()) { 26 | do_op = &root_model::do_stat; 27 | name = "stationary"; 28 | } else { 29 | do_op = &root_model::do_user_seq; 30 | name = "user_seq"; 31 | } 32 | 33 | return true; 34 | } 35 | 36 | inline void operator()(sequence &seq, random_t &m, const subst_model &s, 37 | const rate_model &r, residue::data_type b) const { 38 | (this->*do_op)(seq, m, s, r, b); 39 | } 40 | 41 | inline const std::string &label() const { return name; } 42 | 43 | private: 44 | // pointer that will hold our method 45 | void (root_model::*do_op)(sequence &seq, random_t &m, const subst_model &s, 46 | const rate_model &r, residue::data_type b) const; 47 | 48 | void do_stat(sequence &seq, random_t &m, const subst_model &s, 49 | const rate_model &r, residue::data_type b) const { 50 | seq.resize(root_len); 51 | for(sequence::iterator it = seq.begin(); it != seq.end(); ++it) { 52 | it->base(s(m)); 53 | it->rate_cat(r(m)); 54 | it->branch(b); 55 | } 56 | } 57 | 58 | void do_user_seq(sequence &seq, random_t &m, const subst_model &s, 59 | const rate_model &r, residue::data_type b) const { 60 | seq = root_seq; 61 | for(size_t i = 0; i != seq.size(); ++i) { 62 | seq.at(i).rate_cat(r(m)); 63 | seq.at(i).branch(b); 64 | } 65 | } 66 | 67 | unsigned int root_len; 68 | std::string name; 69 | sequence root_seq; 70 | std::vector rates; 71 | }; 72 | 73 | } // namespace dawg 74 | 75 | #endif // DAWG_ROOT_H 76 | -------------------------------------------------------------------------------- /src/dawg_app.h: -------------------------------------------------------------------------------- 1 | /**************************************************************************** 2 | * Copyright (C) 2009 Reed A. Cartwright, PhD * 3 | * * 4 | * This program is free software: you can redistribute it and/or modify * 5 | * it under the terms of the GNU General Public License as published by * 6 | * the Free Software Foundation, either version 3 of the License, or * 7 | * (at your option) any later version. * 8 | * * 9 | * This program is distributed in the hope that it will be useful, * 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 12 | * GNU General Public License for more details. * 13 | * * 14 | * You should have received a copy of the GNU General Public License * 15 | * along with this program. If not, see . * 16 | ****************************************************************************/ 17 | 18 | #ifndef DAWG_APP_H 19 | #define DAWG_APP_H 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | /**************************************************************************** 26 | * class dawg_app * 27 | ****************************************************************************/ 28 | 29 | class dawg_app { 30 | public: 31 | dawg_app(int argc, char* argv[]); 32 | dawg_app(const dawg_app&) = delete; // copy constructor 33 | dawg_app& operator=(const dawg_app&) = delete; // copy assignment operator 34 | dawg_app(dawg_app&&) = delete; // move constructor 35 | dawg_app& operator=(dawg_app&&) = delete; // move assignment operator 36 | virtual ~dawg_app() = default; // destructor 37 | 38 | virtual int run(); 39 | 40 | struct args { 41 | // use X-Macros to specify argument variables 42 | #define XM(lname, sname, desc, type, def) type XV(lname){def}; 43 | #define XF(lname, sname, desc, type, def) type XV(lname){def}; 44 | #include "dawgarg.xmh" 45 | #undef XM 46 | #undef XF 47 | std::vector input; 48 | }; 49 | 50 | private: 51 | args arg; 52 | CLI::App cli_app; 53 | std::string runname{""}; 54 | }; 55 | 56 | #endif 57 | -------------------------------------------------------------------------------- /examples/error.dawg: -------------------------------------------------------------------------------- 1 | # Test using different sequence types in a single trick file 2 | # Try to cause an error 3 | 4 | Tree.Scale = 4.99999 5 | Root.Length = 100 6 | Root.Code = 2 7 | Root.Seq = "GHAGCVFFYTDYSIGDGFCCMVMYC" 8 | Sim.Reps = 10 9 | Sim.Seed = 42 10 | Root.Seg = 0 11 | Output.Markins=1 12 | 13 | [[ProteinSequence]] 14 | Tree.Tree = "(X:0.3, Y:0.2)Z;" 15 | Subst.Model = "aagtr" 16 | Subst.Freqs = 17 | 0.443, 0.3113, 0.3, 0.2, 0.888, 0.6520, 0.9112, 0.3323, 0.2998, 0.7171, 18 | 0.443, 0.3113, 0.3003, 0.211, 0.5588, 0.43520, 0.487112, 0.76323, 0.1998, 0.44171 19 | Subst.Params = 20 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 21 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 22 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 23 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 24 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 25 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 26 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 27 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 28 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 29 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 30 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 31 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 32 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 33 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 34 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 35 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 36 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 37 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 38 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171 39 | Subst.Rate.Model = "CONST" 40 | Root.Seg = 1 41 | Root.Code=13 42 | [Indel] 43 | Params.Ins = 1.039000, 50 44 | Model.Ins = "GEO" 45 | Rate.Ins = 0.01 46 | Max.Ins = 50.0 47 | Params.Del = 1.039000, 50 48 | Model.Del = "POWER-LAW" 49 | Rate.Del = 0.01 50 | Max.Del = 50.0 51 | 52 | # [[DnaSequence]] 53 | # Tree.Tree = "(A:0.3, B:0.2)Z;" 54 | # Subst.Model = "gtr" 55 | # Subst.Freqs = 0.2, 0.3, 0.3, 0.2 56 | # Subst.Params = 2.0, 1.0, 3.0, 1.0, 1.0, 1.0 57 | # Subst.Rate.Model = "ZERO" 58 | # Root.Seg = 2 59 | # Root.Code = 4 60 | -------------------------------------------------------------------------------- /tests/speed-256-gamma.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | dawg=$1 4 | 5 | ${dawg} -o poo:- - < /dev/null 6 | Root.Length = 100000 7 | Tree.Tree = "((((((((s0:1,s1:1):1,(s2:1,s3:1):1):1,((s4:1,s5:1):1,(s6:1,s7:1):1):1):1,(((s8:1,s9:1):1,(sA:1,sB:1):1):1,((sC:1,sD:1):1,(sE:1,sF:1):1):1):1):1,((((s10:1,s11:1):1,(s12:1,s13:1):1):1,((s14:1,s15:1):1,(s16:1,s17:1):1):1):1,(((s18:1,s19:1):1,(s1A:1,s1B:1):1):1,((s1C:1,s1D:1):1,(s1E:1,s1F:1):1):1):1):1):1,(((((s20:1,s21:1):1,(s22:1,s23:1):1):1,((s24:1,s25:1):1,(s26:1,s27:1):1):1):1,(((s28:1,s29:1):1,(s2A:1,s2B:1):1):1,((s2C:1,s2D:1):1,(s2E:1,s2F:1):1):1):1):1,((((s30:1,s31:1):1,(s32:1,s33:1):1):1,((s34:1,s35:1):1,(s36:1,s37:1):1):1):1,(((s38:1,s39:1):1,(s3A:1,s3B:1):1):1,((s3C:1,s3D:1):1,(s3E:1,s3F:1):1):1):1):1):1):1,((((((s40:1,s41:1):1,(s42:1,s43:1):1):1,((s44:1,s45:1):1,(s46:1,s47:1):1):1):1,(((s48:1,s49:1):1,(s4A:1,s4B:1):1):1,((s4C:1,s4D:1):1,(s4E:1,s4F:1):1):1):1):1,((((s50:1,s51:1):1,(s52:1,s53:1):1):1,((s54:1,s55:1):1,(s56:1,s57:1):1):1):1,(((s58:1,s59:1):1,(s5A:1,s5B:1):1):1,((s5C:1,s5D:1):1,(s5E:1,s5F:1):1):1):1):1):1,(((((s60:1,s61:1):1,(s62:1,s63:1):1):1,((s64:1,s65:1):1,(s66:1,s67:1):1):1):1,(((s68:1,s69:1):1,(s6A:1,s6B:1):1):1,((s6C:1,s6D:1):1,(s6E:1,s6F:1):1):1):1):1,((((s70:1,s71:1):1,(s72:1,s73:1):1):1,((s74:1,s75:1):1,(s76:1,s77:1):1):1):1,(((s78:1,s79:1):1,(s7A:1,s7B:1):1):1,((s7C:1,s7D:1):1,(s7E:1,s7F:1):1):1):1):1):1):1):1,(((((((s80:1,s81:1):1,(s82:1,s83:1):1):1,((s84:1,s85:1):1,(s86:1,s87:1):1):1):1,(((s88:1,s89:1):1,(s8A:1,s8B:1):1):1,((s8C:1,s8D:1):1,(s8E:1,s8F:1):1):1):1):1,((((s90:1,s91:1):1,(s92:1,s93:1):1):1,((s94:1,s95:1):1,(s96:1,s97:1):1):1):1,(((s98:1,s99:1):1,(s9A:1,s9B:1):1):1,((s9C:1,s9D:1):1,(s9E:1,s9F:1):1):1):1):1):1,(((((sA0:1,sA1:1):1,(sA2:1,sA3:1):1):1,((sA4:1,sA5:1):1,(sA6:1,sA7:1):1):1):1,(((sA8:1,sA9:1):1,(sAA:1,sAB:1):1):1,((sAC:1,sAD:1):1,(sAE:1,sAF:1):1):1):1):1,((((sB0:1,sB1:1):1,(sB2:1,sB3:1):1):1,((sB4:1,sB5:1):1,(sB6:1,sB7:1):1):1):1,(((sB8:1,sB9:1):1,(sBA:1,sBB:1):1):1,((sBC:1,sBD:1):1,(sBE:1,sBF:1):1):1):1):1):1):1,((((((sC0:1,sC1:1):1,(sC2:1,sC3:1):1):1,((sC4:1,sC5:1):1,(sC6:1,sC7:1):1):1):1,(((sC8:1,sC9:1):1,(sCA:1,sCB:1):1):1,((sCC:1,sCD:1):1,(sCE:1,sCF:1):1):1):1):1,((((sD0:1,sD1:1):1,(sD2:1,sD3:1):1):1,((sD4:1,sD5:1):1,(sD6:1,sD7:1):1):1):1,(((sD8:1,sD9:1):1,(sDA:1,sDB:1):1):1,((sDC:1,sDD:1):1,(sDE:1,sDF:1):1):1):1):1):1,(((((sE0:1,sE1:1):1,(sE2:1,sE3:1):1):1,((sE4:1,sE5:1):1,(sE6:1,sE7:1):1):1):1,(((sE8:1,sE9:1):1,(sEA:1,sEB:1):1):1,((sEC:1,sED:1):1,(sEE:1,sEF:1):1):1):1):1,((((sF0:1,sF1:1):1,(sF2:1,sF3:1):1):1,((sF4:1,sF5:1):1,(sF6:1,sF7:1):1):1):1,(((sF8:1,sF9:1):1,(sFA:1,sFB:1):1):1,((sFC:1,sFD:1):1,(sFE:1,sFF:1):1):1):1):1):1):1):1);" 8 | Tree.Scale = 0.1 9 | Sim.Reps = 20 10 | Subst.Rate.Model = gamma 11 | Subst.Rate.Params = 0.5, 0.0, 16 12 | EOF 13 | 14 | -------------------------------------------------------------------------------- /src/include/dawg/ma.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef DAWG_MA_H 3 | #define DAWG_MA_H 4 | /**************************************************************************** 5 | * Copyright (C) 2009 Reed A. Cartwright, PhD * 6 | ****************************************************************************/ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "dawg/trick.h" 14 | #include "dawg/utils/vecio.h" 15 | 16 | namespace dawg { 17 | 18 | // dawg::ma is a "model argument" structure 19 | struct ma { 20 | #define XM(name, type, def, desc) type XV(name); 21 | #include "dawg/details/dawgma.xmh" 22 | #undef XM 23 | std::string name; 24 | 25 | ma(const std::string &_n = std::string()) 26 | : 27 | #define XM(name, type, def, desc) XV(name)(def), 28 | #include "dawg/details/dawgma.xmh" 29 | #undef XM 30 | name(_n) { 31 | } 32 | 33 | static bool from_trick(const dawg::trick &trk, std::vector &v); 34 | 35 | void read_section(const trick::data_type::value_type &sec); 36 | 37 | template 38 | static void help(std::basic_ostream &o); 39 | 40 | private: 41 | }; 42 | 43 | template 44 | inline std::basic_ostream &operator<<( 45 | std::basic_ostream &o, const ma &a) { 46 | if(!o.good()) return o; 47 | 48 | o << set_open('\x7f') << set_close('\x7f') << set_delimiter(','); 49 | 50 | o << "[[ " << a.name << " ]]" << std::endl; 51 | #define XM(name, type, def, desc) \ 52 | o << XP(name) " = " << a.XV(name) << std::endl; 53 | #include "dawg/details/dawgma.xmh" 54 | #undef XM 55 | 56 | return o; 57 | } 58 | 59 | namespace details { 60 | inline std::string ma_help_name(const char *cs) { 61 | std::string ret(cs); 62 | typedef boost::iterator_range range; 63 | range r(ret.begin(), ret.begin() + 1); 64 | boost::to_upper(r); 65 | std::size_t pos = 0; 66 | while((pos = ret.find('.', pos + 1)) != std::string::npos) { 67 | r = range(ret.begin() + pos + 1, ret.begin() + pos + 2); 68 | boost::to_upper(r); 69 | } 70 | return ret; 71 | } 72 | }; // namespace details 73 | 74 | template 75 | void ma::help(std::basic_ostream &o) { 76 | o << "[REGULAR PARAMETERS]" << std::endl; 77 | #define XM(name, type, def, desc) \ 78 | o << details::ma_help_name(XP(name)) << " - " << (desc) << std::endl; 79 | #include "dawg/details/dawgma.xmh" 80 | #undef XM 81 | o << "\n[GLOBAL PARAMETERS]" << std::endl; 82 | #define XM(name, type, def, desc) \ 83 | o << details::ma_help_name(XP(name)) << " - " << (desc) << std::endl; 84 | #include "dawg/details/global.xmh" 85 | #undef XM 86 | } 87 | 88 | } // namespace dawg 89 | #endif // DAWG_MA_H 90 | -------------------------------------------------------------------------------- /tests/Unit/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(DAWG_DIR "Dawg") 2 | 3 | find_package(TestU01) 4 | 5 | ############################################################################### 6 | # AddUnitTest 7 | # A macro that will add a unit test to the testing queue 8 | # Example: AddUnitTest(dng::seq) 9 | # - creates the `Unit.dng::seq` test 10 | # - creates the `unittest_dng_seq` target from the file ./dng/seq.cc 11 | # - builds the target and runs the right test test-command 12 | # - the target is linked against Boost::UNIT_TEST_FRAMEWORK and libdng 13 | 14 | macro(AddUnitTest NAME) 15 | string(REPLACE "::" "/" head "${NAME}") 16 | string(REPLACE "::" "_" exe "${NAME}") 17 | set(target "unittest_${exe}") 18 | add_executable("${target}" EXCLUDE_FROM_ALL "${head}.cpp") 19 | # TODO WIN64: add .exe if needed 20 | set_target_properties("${target}" PROPERTIES OUTPUT_NAME "${head}") 21 | get_filename_component(dir "${head}" DIRECTORY) 22 | file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${dir}") 23 | 24 | target_include_directories("${target}" BEFORE PRIVATE "${CMAKE_SOURCE_DIR}/src/include") 25 | 26 | target_link_libraries("${target}" 27 | libdawg2 28 | ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY} 29 | ) 30 | 31 | add_test("Unit.${NAME}" ${CMAKE_CTEST_COMMAND} 32 | --build-and-test "${CMAKE_SOURCE_DIR}" "${CMAKE_BINARY_DIR}" 33 | --build-generator ${CMAKE_GENERATOR} 34 | --build-nocmake 35 | --build-noclean 36 | --build-project "${CMAKE_PROJECT_NAME}" 37 | --build-target "${target}" 38 | --test-command "${CMAKE_CURRENT_BINARY_DIR}/${head}" --log_level=unit_scope 39 | ) 40 | endmacro() 41 | 42 | file(COPY "${CMAKE_SOURCE_DIR}/examples/basic-dna.dawg" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/${DAWG_DIR}") 43 | file(COPY "${CMAKE_SOURCE_DIR}/examples/basic-dna-2.dawg" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/${DAWG_DIR}") 44 | file(COPY "${CMAKE_SOURCE_DIR}/examples/basic-dna-zero-rate.dawg" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/${DAWG_DIR}") 45 | file(COPY "${CMAKE_SOURCE_DIR}/examples/multiple-models.dawg" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/${DAWG_DIR}") 46 | file(COPY "${CMAKE_SOURCE_DIR}/examples/pseudogene.dawg" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/${DAWG_DIR}") 47 | file(COPY "${CMAKE_SOURCE_DIR}/examples/recombination.dawg" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/${DAWG_DIR}") 48 | file(COPY "${CMAKE_SOURCE_DIR}/examples/segments.dawg" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/${DAWG_DIR}") 49 | 50 | ############################################################################### 51 | # Add Unit tests based on CXX namespaces 52 | 53 | AddUnitTest(${DAWG_DIR}::parse_test) 54 | 55 | if(TESTU01_FOUND) 56 | AddUnitTest(${DAWG_DIR}::testu01) 57 | target_link_libraries(unittest_${DAWG_DIR}_testu01 ${TESTU01_LIBRARY}) 58 | target_include_directories(unittest_${DAWG_DIR}_testu01 BEFORE PRIVATE ${TESTU01_INCLUDE_DIR}) 59 | endif(TESTU01_FOUND) 60 | -------------------------------------------------------------------------------- /examples/mitochondria.dawg: -------------------------------------------------------------------------------- 1 | # Test mitochondria root code 2 | # Genetic codes -- https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi#SG2 3 | # 2 = The Vertevrate Mitochondrial Code 4 | 5 | Tree.Scale = 0.99999 6 | Root.Length = 100 7 | Root.Code = 2 8 | Root.Seq = "ACTTTGAATGGGATATTTTCATACACACCGTCTTTGAAGTGTACTGACCTCTTCATGATAATAGGTCTGGTCATGATATGCTACTTCTCTTAGCTGTCGTTCTAATCATCTGTTGGCACAACATTGCGTAATGGAAATGGATAACTTTTCTGTTTAGTTTGCTCTGTGGCTCTATCTAGAAAATTAAAGGATACAAAACTAATTAACCATTCTTATAAATCACTAAGCCAGCACAATGGTTTCTACTGCTGTCAGCCTTTATTTTCAATTTTTCTTAGTCCCCTAGCTTTTCCGTAAACACCGAACACTTCTCTCGTATGATTGGTGCAAACCGTTTGGTGAAAAAAAGCACATTTGCATATACAAAAAATTAACAAATCTATCATATAATAATTAAGATATAGAGTTCATGATCCTTGTGCCATTGATCAAGCCATCGGCGATGATGATAAACCTTTTATTGATACCTTTTTTACCTAGTATTTTTTGCAAATGCTTCATTGTTACACCTCCGAGGAATACTACGAGTGCAATATCCTCTGCAGCATGAAATGTCCCATTAGATCCGTCTATGGACCGTCTATTATTGCTCTTGATGCTTGCATTGATCTTTTTTGTAATGGTTCCCGGGACCCATATACTCTCTTCGATCGCATGTGAGTCTCCATATAACTGTTCAATTAAATCCTCCGTTTGAGAAACTCTAGGTTCTCTTGACAGTATAAAAGGCTGCTGCGAGGAATAATTATGGAACAAGATAGACCTATCATATAACATCTGAACCAGTCTCATTGTCAAGGGCACTACACCACTGTAAGCAAAAGTGGCTTCCGAGAAGTCATCGTTCTCATTTGTGATTTTATCGGCAGCATGCTCGTCTTCTATGGGTACTGTATTGAACCATTGTGAAATATAACGGTATTCTTTCTGTAATCTTGTTGACTTACTTGTTGTAATATGCAAATCAGTTTTTCCCGTTTTGCTAGTGAAAAATCCTAAGCCAATGATTCAAGTTGAAAGCATTTTTCAATGCCCCAAGAGTCGATCATAAAGGTTCTTAGATATTCATAATCCTTTTCTCGAAGTGAATTTTTACAAAGAGACAATAAACAGATTAATCTGAGAATCTTCTTTTGATCAACCTCGTACTGGTACTGCAACTCGAGTATAATATCTTCAATGTCGTTATTAAGTGTATTTCCCATCAAAATTTCCAGCTCTAACTCTAAGATTTTATTGAAAGATCCGTACTCTTCAGTCTCTACCACTTTCAAAACGTCGGATGATAAGGTTGTGTGATTTTTCAAAAAAGCTTGCCTTTGTTGCAAAGAACCTAAGGAATCAACGAATTCTTTGATTTCATGTACGCTCTCGGCTTTATGCCTTGTATCATATTGCGTTTGTAGTTCCTTTGCCAATTTATTTAACTGCGGCCCAATCGACCCAAAATTTAAAAATTTCAAATCATTCCATATTTTATCTTCCTTATAATTGAA" 9 | Sim.Reps = 10 10 | Sim.Seed = 42 11 | Root.Seg = 0 12 | 13 | [[-]] 14 | Tree.Tree = "(D:0.3, E:0.2)C;" 15 | Subst.Model = "gtr" 16 | Subst.Freqs = 0.2, 0.3, 0.3, 0.2 17 | Subst.Params = 2.0, 1.0, 3.0, 1.0, 1.0, 1.0 18 | Subst.Rate.Model = "ZERO" 19 | Root.Seg = 1 20 | 21 | [[-]] 22 | Tree.Tree = "(A:0.3, B:0.2)C;" 23 | Subst.Model = "jc" 24 | Subst.Rate.Model = "CONST" 25 | Root.Seg = 1 26 | 27 | [[-]] 28 | Tree.Tree = "(F:0.3, G:0.2)C;" 29 | Subst.Model = "k2p" 30 | Subst.Rate.Model = "ZERO" 31 | Root.Seg = 3 32 | 33 | [[-]] 34 | Tree.Tree = "(H:0.3, I:0.2)C;" 35 | Subst.Model = "hky" 36 | Subst.Rate.Model = "ZERO" 37 | Root.Seg = 2 38 | [Indel] 39 | Params.Ins = 1.039000, 50 40 | Model.Ins = "GEO" 41 | Rate.Ins = 0.01 42 | Max.Ins = 50.0 43 | Params.Del = 1.039000, 50 44 | Model.Del = "POWER-LAW" 45 | Rate.Del = 0.01 46 | Max.Del = 50.0 47 | 48 | [[-]] 49 | Tree.Tree = "(J:0.3, K:0.2)C;" 50 | Subst.Model = "f84" 51 | Subst.Rate.Model = "ZERO" 52 | Root.Seg = 5 53 | 54 | [[-]] 55 | Tree.Tree = "(L:0.3, M:0.2)C;" 56 | Subst.Model = "f81" 57 | Subst.Rate.Model = "ZERO" 58 | Root.Seg = 6 59 | 60 | [[-]] 61 | Tree.Tree = "(N:0.3, O:0.2)C;" 62 | Subst.Model = "tn" 63 | Subst.Rate.Model = "ZERO" 64 | Root.Seg = 7 65 | 66 | [[-]] 67 | Tree.Tree = "(P:0.3, Q:0.2)C;" 68 | Subst.Model = "tn-f04" 69 | Subst.Rate.Model = "ZERO" 70 | Root.Seg = 8 71 | -------------------------------------------------------------------------------- /releng/build-rel-win.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | set ProductDir= 3 | set toolchain= 4 | set arch= 5 | set archivetag=current 6 | 7 | if "%~1"=="-t" goto TOOLCHAIN 8 | goto BEGINVCVAR 9 | :TOOLCHAIN 10 | if "%~2"=="M32" goto M32 11 | if "%~2"=="m32" goto M32 12 | if "%~2"=="M64" goto M64 13 | if "%~2"=="m64" goto M64 14 | set toolchain="%~f2" 15 | goto ENDTOOLCHAIN 16 | :M32 17 | set arch=x86 18 | goto ENDTOOLCHAIN 19 | :M64 20 | set arch=amd64 21 | :ENDTOOLCHAIN 22 | shift 23 | shift 24 | 25 | rem Find Visual Studio Install 26 | :BEGINVCVAR 27 | if DEFINED VCINSTALLDIR goto ENDVCVAR 28 | if NOT DEFINED PROCESSOR_ARCHITECTURE goto X86 29 | if /I %PROCESSOR_ARCHITECTURE% == x86 goto X86 30 | :AMD64 31 | FOR /F "tokens=2*" %%A IN ('REG.EXE QUERY "HKLM\Software\WOW6432Node\Microsoft\VCExpress\8.0\Setup\VC" /V "ProductDir" 2^>NUL ^| FIND "REG_SZ"') DO SET ProductDir=%%B 32 | FOR /F "tokens=2*" %%A IN ('REG.EXE QUERY "HKLM\Software\WOW6432Node\Microsoft\VisualStudio\8.0\Setup\VC" /V "ProductDir" 2^>NUL ^| FIND "REG_SZ"') DO SET ProductDir=%%B 33 | FOR /F "tokens=2*" %%A IN ('REG.EXE QUERY "HKLM\Software\WOW6432Node\Microsoft\VCExpress\9.0\Setup\VC" /V "ProductDir" 2^>NUL ^| FIND "REG_SZ"') DO SET ProductDir=%%B 34 | FOR /F "tokens=2*" %%A IN ('REG.EXE QUERY "HKLM\Software\WOW6432Node\Microsoft\VisualStudio\9.0\Setup\VC" /V "ProductDir" 2^>NUL ^| FIND "REG_SZ"') DO SET ProductDir=%%B 35 | FOR /F "tokens=2*" %%A IN ('REG.EXE QUERY "HKLM\Software\WOW6432Node\Microsoft\VisualStudio\10.0\Setup\VC" /V "ProductDir" 2^>NUL ^| FIND "REG_SZ"') DO SET ProductDir=%%B 36 | FOR /F "tokens=2*" %%A IN ('REG.EXE QUERY "HKLM\Software\WOW6432Node\Microsoft\VisualStudio\11.0\Setup\VC" /V "ProductDir" 2^>NUL ^| FIND "REG_SZ"') DO SET ProductDir=%%B 37 | goto REST 38 | :X86 39 | FOR /F "tokens=2*" %%A IN ('REG.EXE QUERY "HKLM\Software\Microsoft\VCExpress\8.0\Setup\VC" /V "ProductDir" 2^>NUL ^| FIND "REG_SZ"') DO SET ProductDir=%%B 40 | FOR /F "tokens=2*" %%A IN ('REG.EXE QUERY "HKLM\Software\Microsoft\VisualStudio\8.0\Setup\VC" /V "ProductDir" 2^>NUL ^| FIND "REG_SZ"') DO SET ProductDir=%%B 41 | FOR /F "tokens=2*" %%A IN ('REG.EXE QUERY "HKLM\Software\Microsoft\VCExpress\9.0\Setup\VC" /V "ProductDir" 2^>NUL ^| FIND "REG_SZ"') DO SET ProductDir=%%B 42 | FOR /F "tokens=2*" %%A IN ('REG.EXE QUERY "HKLM\Software\Microsoft\VisualStudio\9.0\Setup\VC" /V "ProductDir" 2^>NUL ^| FIND "REG_SZ"') DO SET ProductDir=%%B 43 | FOR /F "tokens=2*" %%A IN ('REG.EXE QUERY "HKLM\Software\Microsoft\VisualStudio\10.0\Setup\VC" /V "ProductDir" 2^>NUL ^| FIND "REG_SZ"') DO SET ProductDir=%%B 44 | FOR /F "tokens=2*" %%A IN ('REG.EXE QUERY "HKLM\Software\Microsoft\VisualStudio\11.0\Setup\VC" /V "ProductDir" 2^>NUL ^| FIND "REG_SZ"') DO SET ProductDir=%%B 45 | :REST 46 | if NOT DEFINED ProductDir goto ENDVCVAR 47 | :VCVAR 48 | call "%ProductDir%\vcvarsall.bat" %arch% 49 | :ENDVCVAR 50 | 51 | if NOT [%1]==[] set archivetag=%1 52 | set buildargs=-DRELENG_TAG=%archivetag% 53 | if DEFINED toolchain set buildargs=%buildargs% -DRELENG_TOOLCHAIN=%toolchain% 54 | 55 | cmake %buildargs% -P releng.cmake 56 | 57 | :EOF 58 | -------------------------------------------------------------------------------- /utils/paml2dawg/jtt-dcmut.dat: -------------------------------------------------------------------------------- 1 | 0.531678 2 | 0.557967 0.451095 3 | 0.827445 0.154899 5.549530 4 | 0.574478 1.019843 0.313311 0.105625 5 | 0.556725 3.021995 0.768834 0.521646 0.091304 6 | 1.066681 0.318483 0.578115 7.766557 0.053907 3.417706 7 | 1.740159 1.359652 0.773313 1.272434 0.546389 0.231294 1.115632 8 | 0.219970 3.210671 4.025778 1.032342 0.724998 5.684080 0.243768 0.201696 9 | 0.361684 0.239195 0.491003 0.115968 0.150559 0.078270 0.111773 0.053769 0.181788 10 | 0.310007 0.372261 0.137289 0.061486 0.164593 0.709004 0.097485 0.069492 0.540571 2.335139 11 | 0.369437 6.529255 2.529517 0.282466 0.049009 2.966732 1.731684 0.269840 0.525096 0.202562 0.146481 12 | 0.469395 0.431045 0.330720 0.190001 0.409202 0.456901 0.175084 0.130379 0.329660 4.831666 3.856906 0.624581 13 | 0.138293 0.065314 0.073481 0.032522 0.678335 0.045683 0.043829 0.050212 0.453428 0.777090 2.500294 0.024521 0.436181 14 | 1.959599 0.710489 0.121804 0.127164 0.123653 1.608126 0.191994 0.208081 1.141961 0.098580 1.060504 0.216345 0.164215 0.148483 15 | 3.887095 1.001551 5.057964 0.589268 2.155331 0.548807 0.312449 1.874296 0.743458 0.405119 0.592511 0.474478 0.285564 0.943971 2.788406 16 | 4.582565 0.650282 2.351311 0.425159 0.469823 0.523825 0.331584 0.316862 0.477355 2.553806 0.272514 0.965641 2.114728 0.138904 1.176961 4.777647 17 | 0.084329 1.257961 0.027700 0.057466 1.104181 0.172206 0.114381 0.544180 0.128193 0.134510 0.530324 0.089134 0.201334 0.537922 0.069965 0.310927 0.080556 18 | 0.139492 0.235601 0.700693 0.453952 2.114852 0.254745 0.063452 0.052500 5.848400 0.303445 0.241094 0.087904 0.189870 5.484236 0.113850 0.628608 0.201094 0.747889 19 | 2.924161 0.171995 0.164525 0.315261 0.621323 0.179771 0.465271 0.470140 0.121827 9.533943 1.761439 0.124066 3.038533 0.593478 0.211561 0.408532 1.143980 0.239697 0.165473 20 | 21 | 22 | 0.076862 0.051057 0.042546 0.051269 0.020279 0.041061 0.061820 0.074714 0.022983 0.052569 0.091111 0.059498 0.023414 0.040530 0.050532 0.068225 0.058518 0.014336 0.032303 0.066374 23 | 24 | 25 | A R N D C Q E G H I L K M F P S T W Y V 26 | Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val 27 | 28 | 29 | JTT rate matrix prepared using the DCMut method* 30 | ------------------------------------------------ 31 | 32 | The first part above indicates the symmetric 'exchangeability' parameters s_ij, 33 | where s_ij = s_ji. 34 | The second part gives the amino acid equilibrium frequencies pi_i. 35 | The net replacement rate from i to j is q_ij = pi_j*s_ij. 36 | 37 | This model is usually scaled so that the mean rate of change at 38 | equilibrium, Sum_i Sum_j!=i pi_i*q_ij, equals 1. You should check this 39 | scaling before using the matrix above. The PAML package will perform 40 | this scaling. 41 | 42 | // 43 | 44 | *Prepared by Carolin Kosiol and Nick Goldman, December 2003. 45 | 46 | See the following paper for more details: 47 | Kosiol, C., and Goldman, N. 2005. Different versions of the Dayhoff rate matrix. 48 | Molecular Biology and Evolution 22:193-199. 49 | 50 | See also http://www.ebi.ac.uk/goldman/dayhoff 51 | -------------------------------------------------------------------------------- /utils/paml2dawg/dayhoff-dcmut.dat: -------------------------------------------------------------------------------- 1 | 0.267828 2 | 0.984474 0.327059 3 | 1.199805 0.000000 8.931515 4 | 0.360016 0.232374 0.000000 0.000000 5 | 0.887753 2.439939 1.028509 1.348551 0.000000 6 | 1.961167 0.000000 1.493409 11.388659 0.000000 7.086022 7 | 2.386111 0.087791 1.385352 1.240981 0.107278 0.281581 0.811907 8 | 0.228116 2.383148 5.290024 0.868241 0.282729 6.011613 0.439469 0.106802 9 | 0.653416 0.632629 0.768024 0.239248 0.438074 0.180393 0.609526 0.000000 0.076981 10 | 0.406431 0.154924 0.341113 0.000000 0.000000 0.730772 0.112880 0.071514 0.443504 2.556685 11 | 0.258635 4.610124 3.148371 0.716913 0.000000 1.519078 0.830078 0.267683 0.270475 0.460857 0.180629 12 | 0.717840 0.896321 0.000000 0.000000 0.000000 1.127499 0.304803 0.170372 0.000000 3.332732 5.230115 2.411739 13 | 0.183641 0.136906 0.138503 0.000000 0.000000 0.000000 0.000000 0.153478 0.475927 1.951951 1.565160 0.000000 0.921860 14 | 2.485920 1.028313 0.419244 0.133940 0.187550 1.526188 0.507003 0.347153 0.933709 0.119152 0.316258 0.335419 0.170205 0.110506 15 | 4.051870 1.531590 4.885892 0.956097 1.598356 0.561828 0.793999 2.322243 0.353643 0.247955 0.171432 0.954557 0.619951 0.459901 2.427202 16 | 3.680365 0.265745 2.271697 0.660930 0.162366 0.525651 0.340156 0.306662 0.226333 1.900739 0.331090 1.350599 1.031534 0.136655 0.782857 5.436674 17 | 0.000000 2.001375 0.224968 0.000000 0.000000 0.000000 0.000000 0.000000 0.270564 0.000000 0.461776 0.000000 0.000000 0.762354 0.000000 0.740819 0.000000 18 | 0.244139 0.078012 0.946940 0.000000 0.953164 0.000000 0.214717 0.000000 1.265400 0.374834 0.286572 0.132142 0.000000 6.952629 0.000000 0.336289 0.417839 0.608070 19 | 2.059564 0.240368 0.158067 0.178316 0.484678 0.346983 0.367250 0.538165 0.438715 8.810038 1.745156 0.103850 2.565955 0.123606 0.485026 0.303836 1.561997 0.000000 0.279379 20 | 21 | 22 | 0.087127 0.040904 0.040432 0.046872 0.033474 0.038255 0.049530 0.088612 0.033619 0.036886 0.085357 0.080481 0.014753 0.039772 0.050680 0.069577 0.058542 0.010494 0.029916 0.064718 23 | 24 | 25 | A R N D C Q E G H I L K M F P S T W Y V 26 | Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val 27 | 28 | 29 | Dayhoff rate matrix prepared using the DCMut method* 30 | ---------------------------------------------------- 31 | 32 | The first part above indicates the symmetric 'exchangeability' parameters s_ij, 33 | where s_ij = s_ji. 34 | The second part gives the amino acid equilibrium frequencies pi_i. 35 | The net replacement rate from i to j is q_ij = pi_j*s_ij. 36 | 37 | This model is usually scaled so that the mean rate of change at 38 | equilibrium, Sum_i Sum_j!=i pi_i*q_ij, equals 1. You should check this 39 | scaling before using the matrix above. The PAML package will perform 40 | this scaling. 41 | 42 | // 43 | 44 | *Prepared by Carolin Kosiol and Nick Goldman, December 2003. 45 | 46 | See the following paper for more details: 47 | Kosiol, C., and Goldman, N. 2005. Different versions of the Dayhoff rate matrix. 48 | Molecular Biology and Evolution 22:193-199. 49 | 50 | See also http://www.ebi.ac.uk/goldman/dayhoff 51 | -------------------------------------------------------------------------------- /utils/paml2dawg/dayhoff-molphy.dat: -------------------------------------------------------------------------------- 1 | 0.267257 2 | 0.995319 0.329397 3 | 1.196794 0.016493 8.992246 4 | 0.365255 0.234872 0.020464 0.007072 5 | 0.893910 2.447538 1.045293 1.352218 0.006067 6 | 1.943156 0.015501 1.493525 11.236928 0.004083 7.057870 7 | 2.376209 0.087085 1.392474 1.230667 0.108197 0.281880 0.799778 8 | 0.225596 2.347959 5.280613 0.855075 0.283222 5.976661 0.429931 0.105015 9 | 0.651534 0.628428 0.772963 0.237558 0.442435 0.180812 0.601184 0.006405 0.075779 10 | 0.408223 0.155010 0.345815 0.007180 0.010824 0.737865 0.112160 0.071409 0.439814 2.556320 11 | 0.260244 4.621249 3.197574 0.718366 0.002234 1.536592 0.826204 0.267785 0.268704 0.461627 0.182251 12 | 0.706162 0.878398 0.052957 0.032318 0.039211 1.114993 0.296582 0.166619 0.034749 3.263592 5.159102 2.383284 13 | 0.182627 0.135634 0.139025 0.003597 0.024074 0.005001 0.005491 0.151745 0.467311 1.932390 1.560815 0.001843 0.900346 14 | 2.470839 1.018202 0.420586 0.132571 0.188814 1.524901 0.498468 0.343043 0.916303 0.117884 0.315206 0.334907 0.166172 0.109043 15 | 4.028242 1.516886 4.902751 0.946552 1.609487 0.561469 0.780814 2.295303 0.347137 0.245388 0.170901 0.953314 0.605287 0.453952 2.394418 16 | 3.715525 0.267254 2.314805 0.664453 0.166031 0.533471 0.339684 0.307796 0.225599 1.910180 0.335177 1.369705 1.022709 0.136975 0.784224 5.447514 17 | 0.010673 2.092641 0.238328 0.008979 0.096295 0.026659 0.020261 0.035705 0.280386 0.027380 0.486002 0.009493 0.044996 0.794428 0.009932 0.771731 0.011637 18 | 0.243501 0.077505 0.953314 0.022541 0.962946 0.027880 0.211851 0.004742 1.246168 0.372170 0.286614 0.132407 0.031862 6.885035 0.012138 0.332907 0.420039 0.635541 19 | 2.051045 0.238484 0.158886 0.176829 0.488888 0.347358 0.361776 0.532821 0.431365 8.733750 1.742707 0.103887 2.509573 0.122215 0.479300 0.300312 1.567787 0.035082 0.277033 20 | 21 | 22 | 0.087000 0.041000 0.040000 0.047000 0.033000 0.038000 0.050000 0.089000 0.034000 0.037000 0.085000 0.080000 0.015000 0.040000 0.051000 0.070000 0.058000 0.010000 0.030000 0.065000 23 | 24 | 25 | A R N D C Q E G H I L K M F P S T W Y V 26 | Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val 27 | 28 | 29 | Dayhoff rate matrix prepared using the Molphy method* 30 | ----------------------------------------------------- 31 | 32 | The first part above indicates the symmetric 'exchangeability' parameters s_ij, 33 | where s_ij = s_ji. 34 | The second part gives the amino acid equilibrium frequencies pi_i. 35 | The net replacement rate from i to j is q_ij = pi_j*s_ij. 36 | 37 | This model is usually scaled so that the mean rate of change at 38 | equilibrium, Sum_i Sum_j!=i pi_i*q_ij, equals 1. You should check this 39 | scaling before using the matrix above. The PAML package will perform 40 | this scaling. 41 | 42 | // 43 | 44 | *Prepared by Carolin Kosiol and Nick Goldman, December 2003. 45 | 46 | See the following paper for more details: 47 | Kosiol, C., and Goldman, N. 2005. Different versions of the Dayhoff rate matrix. 48 | Molecular Biology and Evolution 22:193-199. 49 | 50 | See also http://www.ebi.ac.uk/goldman/dayhoff 51 | -------------------------------------------------------------------------------- /utils/paml2dawg/wag.dat: -------------------------------------------------------------------------------- 1 | 0.551571 2 | 0.509848 0.635346 3 | 0.738998 0.147304 5.429420 4 | 1.027040 0.528191 0.265256 0.0302949 5 | 0.908598 3.035500 1.543640 0.616783 0.0988179 6 | 1.582850 0.439157 0.947198 6.174160 0.021352 5.469470 7 | 1.416720 0.584665 1.125560 0.865584 0.306674 0.330052 0.567717 8 | 0.316954 2.137150 3.956290 0.930676 0.248972 4.294110 0.570025 0.249410 9 | 0.193335 0.186979 0.554236 0.039437 0.170135 0.113917 0.127395 0.0304501 0.138190 10 | 0.397915 0.497671 0.131528 0.0848047 0.384287 0.869489 0.154263 0.0613037 0.499462 3.170970 11 | 0.906265 5.351420 3.012010 0.479855 0.0740339 3.894900 2.584430 0.373558 0.890432 0.323832 0.257555 12 | 0.893496 0.683162 0.198221 0.103754 0.390482 1.545260 0.315124 0.174100 0.404141 4.257460 4.854020 0.934276 13 | 0.210494 0.102711 0.0961621 0.0467304 0.398020 0.0999208 0.0811339 0.049931 0.679371 1.059470 2.115170 0.088836 1.190630 14 | 1.438550 0.679489 0.195081 0.423984 0.109404 0.933372 0.682355 0.243570 0.696198 0.0999288 0.415844 0.556896 0.171329 0.161444 15 | 3.370790 1.224190 3.974230 1.071760 1.407660 1.028870 0.704939 1.341820 0.740169 0.319440 0.344739 0.967130 0.493905 0.545931 1.613280 16 | 2.121110 0.554413 2.030060 0.374866 0.512984 0.857928 0.822765 0.225833 0.473307 1.458160 0.326622 1.386980 1.516120 0.171903 0.795384 4.378020 17 | 0.113133 1.163920 0.0719167 0.129767 0.717070 0.215737 0.156557 0.336983 0.262569 0.212483 0.665309 0.137505 0.515706 1.529640 0.139405 0.523742 0.110864 18 | 0.240735 0.381533 1.086000 0.325711 0.543833 0.227710 0.196303 0.103604 3.873440 0.420170 0.398618 0.133264 0.428437 6.454280 0.216046 0.786993 0.291148 2.485390 19 | 2.006010 0.251849 0.196246 0.152335 1.002140 0.301281 0.588731 0.187247 0.118358 7.821300 1.800340 0.305434 2.058450 0.649892 0.314887 0.232739 1.388230 0.365369 0.314730 20 | 21 | 0.0866279 0.043972 0.0390894 0.0570451 0.0193078 0.0367281 0.0580589 0.0832518 0.0244313 0.048466 0.086209 0.0620286 0.0195027 0.0384319 0.0457631 0.0695179 0.0610127 0.0143859 0.0352742 0.0708956 22 | 23 | 24 | A R N D C Q E G H I L K M F P S T W Y V 25 | Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val 26 | 27 | 28 | Symmetrical part of the WAG rate matrix and aa frequencies, 29 | estimated from 3905 globular protein amino acid sequences forming 182 30 | protein families. 31 | The first part above indicates the symmetric 'exchangeability' 32 | parameters, where s_ij = s_ji. The s_ij above are not scaled, but the 33 | PAML package will perform this scaling. 34 | The second part gives the amino acid frequencies (pi_i) 35 | estimated from the 3905 sequences. The net replacement rate from i to 36 | j is Q_ij = s_ij*pi_j. 37 | Prepared by Simon Whelan and Nick Goldman, December 2000. 38 | 39 | Citation: 40 | Whelan, S. and N. Goldman. 2001. A general empirical model of 41 | protein evolution derived from multiple protein families using 42 | a maximum likelihood approach. Molecular Biology and 43 | Evolution 18:691-699. 44 | -------------------------------------------------------------------------------- /examples/aa.dawg: -------------------------------------------------------------------------------- 1 | # Test aa evaluationary models 2 | 3 | Tree.Scale = 0.666 4 | Root.Length = 10000 5 | Root.Code = 2 6 | Root.Seq = "AATGCTTTTTTTTTTGGCCAAAATTTTGGGGAAATTGGCCCCCCACACACACATTTTTATATATGG" 7 | Sim.Reps = 10 8 | Sim.Seed = 420 9 | Root.Seg = 0 10 | 11 | [Indel] 12 | Params.Ins = 1.039000, 50 13 | Model.Ins = "GEO" 14 | Rate.Ins = 0.01 15 | Max.Ins = 50.0 16 | Params.Del = 1.039000, 50 17 | Model.Del = "POWER-LAW" 18 | Rate.Del = 0.01 19 | Max.Del = 50.0 20 | 21 | [[-]] 22 | Tree.Tree = "(A:0.3, B:0.2)C;" 23 | Subst.Model = "aagtr" 24 | Subst.Freqs = 25 | 0.443, 0.3113, 0.3, 0.2, 0.888, 0.6520, 0.9112, 0.3323, 0.2998, 0.7171, 26 | 0.443, 0.3113, 0.3003, 0.211, 0.5588, 0.43520, 0.487112, 0.76323, 0.1998, 0.44171 27 | Subst.Params = 28 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 29 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 30 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 31 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 32 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 33 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 34 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 35 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 36 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 37 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 38 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 39 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 40 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 41 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 42 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 43 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 44 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 45 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171, 46 | 100.443, 49.3113, 66.3, 10.2, 40.888, 3.6520, 49.9112, 22.3323, 48.2998, 33.7171 47 | Subst.Rate.Model = "ZERO" 48 | 49 | [[-]] 50 | Tree.Tree = "(D:0.3, E:0.2)C;" 51 | Subst.Model = "equ" 52 | Subst.Rate.Model = "CONST" 53 | Root.Seg = 1 54 | 55 | [[-]] 56 | Tree.Tree = "(F:0.3, G:0.2)C;" 57 | Subst.Model = "lg" 58 | Subst.Rate.Model = "ZERO" 59 | Root.Seg = 2 60 | 61 | [[-]] 62 | Tree.Tree = "(H:0.3, I:0.2)C;" 63 | Subst.Model = "wag" 64 | Subst.Rate.Model = "GAMMA" 65 | Subst.Rate.Params = 8.0 66 | Root.Seg = 1 67 | 68 | [[-]] 69 | Tree.Tree = "(J:0.3, K:0.2)C;" 70 | Subst.Model = "wagstar" 71 | Subst.Rate.Model = "CONST" 72 | 73 | [[-]] 74 | Tree.Tree = "(L:0.3, M:0.2)C;" 75 | Subst.Model = "jtt-dcmut" 76 | Subst.Rate.Model = "ZERO" 77 | Root.Seg = 1 78 | 79 | [[-]] 80 | Tree.Tree = "(N:0.3, O:0.2)C;" 81 | Subst.Model = "dayhoff-dcmut" 82 | Subst.Rate.Model = "ZERO" 83 | Root.Seg = 0 84 | 85 | [[-]] 86 | Tree.Tree = "(P:0.3, Q:0.2)C;" 87 | Subst.Model = "molphy" 88 | Subst.Rate.Model = "GAMMA" 89 | -------------------------------------------------------------------------------- /utils/paml2dawg/wagstar.dat: -------------------------------------------------------------------------------- 1 | 0.589718 2 | 0.514347 0.67416 3 | 0.731152 0.159054 5.30821 4 | 1.21324 0.568449 0.233527 0.0379056 5 | 1.03344 3.02808 1.62299 0.657364 0.0999068 6 | 1.55788 0.443685 1.00122 6.04299 0.0284956 5.6037 7 | 1.41993 0.629768 1.12717 0.88357 0.312544 0.346823 0.588609 8 | 0.317684 2.31211 3.9337 0.958529 0.341479 4.87366 0.599188 0.279542 9 | 0.214596 0.187262 0.527321 0.0390513 0.198958 0.125999 0.124553 0.0310522 0.162975 10 | 0.400822 0.51821 0.144354 0.0869637 0.451124 0.873266 0.154936 0.067443 0.508952 3.1554 11 | 0.881639 5.74119 2.88102 0.480308 0.0719929 4.19125 2.45392 0.381514 0.854485 0.320597 0.255092 12 | 0.887458 0.660816 0.198404 0.0992829 0.428648 1.64018 0.294481 0.184545 0.40117 3.94646 4.81956 0.877057 13 | 0.213179 0.122792 0.0848492 0.0458258 0.485001 0.109241 0.0873936 0.0552962 0.631713 1.06458 2.10414 0.0832422 1.14516 14 | 1.51861 0.711498 0.204905 0.444152 0.109081 0.913179 0.720567 0.254626 0.722123 0.111722 0.422851 0.588203 0.179858 0.165205 15 | 3.52499 1.35611 3.90127 1.09965 1.35221 0.87908 0.822025 1.33618 0.876688 0.321774 0.351913 1.05314 0.554077 0.563999 1.54694 16 | 2.24161 0.594177 2.06787 0.395176 0.522957 0.829315 0.889765 0.236489 0.54992 1.48876 0.351564 1.45173 1.56873 0.188237 0.802531 4.02507 17 | 0.135395 1.24086 0.0746093 0.142159 0.728065 0.208163 0.176397 0.366467 0.261223 0.259584 0.706082 0.159261 0.565299 1.58681 0.135024 0.528249 0.118584 18 | 0.270321 0.386714 1.05269 0.326191 0.481954 0.210494 0.209621 0.108982 4.31772 0.44009 0.427718 0.155623 0.437069 6.49269 0.212945 0.742154 0.286443 2.42261 19 | 1.92496 0.282892 0.193323 0.155419 1.10899 0.32893 0.588443 0.190095 0.119749 7.48376 1.82105 0.300343 2.03324 0.653015 0.325745 0.23769 1.4088 0.396884 0.353358 20 | 21 | 0.0866279 0.043972 0.0390894 0.0570451 0.0193078 0.0367281 0.0580589 0.0832518 0.0244313 0.048466 0.086209 0.0620286 0.0195027 0.0384319 0.0457631 0.0695179 0.0610127 0.0143859 0.0352742 0.0708956 22 | 23 | 24 | A R N D C Q E G H I L K M F P S T W Y V 25 | Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val 26 | 27 | 28 | Symmetrical part of the WAG* rate matrix and aa frequencies, 29 | estimated from 3905 globular protein amino acid sequences forming 182 30 | protein families. 31 | The first part above indicates the symmetric 'exchangeability' 32 | parameters, where s_ij = s_ji. The s_ij above are not scaled, but the 33 | PAML package will perform this scaling. 34 | The second part gives the amino acid frequencies (pi_i) 35 | estimated from the 3905 sequences. The net replacement rate from i to 36 | j is Q_ij = s_ij*pi_j. 37 | Prepared by Simon Whelan and Nick Goldman, December 2000. 38 | 39 | Citation: 40 | Whelan, S. and N. Goldman. 2001. A general empirical model of 41 | protein evolution derived from multiple protein families using 42 | a maximum likelihood approach. Molecular Biology and 43 | Evolution 18:691-699. 44 | -------------------------------------------------------------------------------- /tests/Unit/Dawg/testu01.cpp: -------------------------------------------------------------------------------- 1 | #define BOOST_TEST_MODULE Dawg::testu01 2 | #define BOOST_TEST_DYN_LINK 3 | 4 | #include "../boost_test_helper.h" 5 | 6 | // Test for uniformity 7 | #define DAWG_NO_CONFIG_H 8 | #ifndef TEST_GEN 9 | # define TEST_GEN 10 | #endif 11 | 12 | #define RANDOM_GEN_HEADER TEST_GEN 13 | 14 | #define XSTR(s) XSTR_(s) 15 | #define XSTR_(s) #s 16 | #define GEN_NAME XSTR(RANDOM_GEN_HEADER) 17 | 18 | #include 19 | #include 20 | #include 21 | 22 | extern "C" { 23 | #include 24 | #include 25 | } 26 | 27 | #include 28 | 29 | using namespace dawg; 30 | using namespace dawg::details; 31 | 32 | uint64_t revbits(uint64_t x) { 33 | uint64_t y = 0; 34 | for(int i=0;i<8;++i) { 35 | unsigned char b = (unsigned char)x; 36 | b = ((b * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32; 37 | y = (y << 8) + b; 38 | x = x >> 8; 39 | } 40 | return y; 41 | } 42 | 43 | uint32_t to_32(uint32_t x) { 44 | return x; 45 | } 46 | uint32_t to_32(uint64_t x) { 47 | #ifdef TEST_REV 48 | x = revbits(x) ;//<< 4; 49 | #endif 50 | #ifdef TEST_LOWER 51 | return (x & 0xFFFFFFFFUL); 52 | #else 53 | return ((x >> 32) & 0xFFFFFFFFUL); 54 | #endif 55 | } 56 | 57 | unsigned long get_bits(void *params, void *state) { 58 | mutt_gen_default *g = static_cast(state); 59 | #ifndef TEST_MIX 60 | return to_32(g->rand_native()); 61 | #else 62 | unsigned long u=0; 63 | for(int i=0;i<8;++i) 64 | u = u << 8 | (g->rand_native() & 255); 65 | return u; 66 | #endif 67 | } 68 | 69 | double get_u01(void *params, void *state) { 70 | mutt_gen_default *g = static_cast(state); 71 | return g->rand_real(); 72 | } 73 | 74 | void write_gen(void *state) { 75 | printf("N/A"); 76 | } 77 | 78 | unif01_Gen *create_gen(unsigned int u) { 79 | static char name[] = GEN_NAME 80 | #ifdef DAWG_DISABLE_WEYL_GENERATOR 81 | "-now" 82 | #endif 83 | #ifdef TEST_LOWER 84 | "-low" 85 | #endif 86 | #ifdef TEST_REV 87 | "-rev" 88 | #endif 89 | #ifdef TEST_MIX 90 | "-mix" 91 | #endif 92 | ; 93 | unif01_Gen *gen = new unif01_Gen; 94 | mutt_gen_default *g = new mutt_gen_default; 95 | g->seed(u); 96 | gen->state = g; 97 | gen->name = &name[0]; 98 | gen->param = nullptr; 99 | gen->GetU01 = &get_u01; 100 | gen->GetBits = &get_bits; 101 | gen->Write = &write_gen; 102 | return gen; 103 | } 104 | 105 | void delete_gen(unif01_Gen *gen) { 106 | if(nullptr == gen) 107 | return; 108 | delete static_cast(gen->state); 109 | delete gen; 110 | } 111 | 112 | BOOST_AUTO_TEST_CASE(test_small_crush) 113 | { 114 | unsigned int u = 1276590013; 115 | unif01_Gen *g = create_gen(u); 116 | bbattery_SmallCrush(g); 117 | delete_gen(g); 118 | } 119 | 120 | //BOOST_AUTO_TEST_CASE(test_crush) 121 | //{ 122 | // unsigned int u = 1276590013; 123 | // unif01_Gen *g = create_gen(u); 124 | // bbattery_Crush(g); 125 | // delete_gen(g); 126 | //} 127 | 128 | //BOOST_AUTO_TEST_CASE(test_big_crush) 129 | //{ 130 | // unsigned int u = 1276590013; 131 | // unif01_Gen *g = create_gen(u); 132 | // bbattery_BigCrush(g); 133 | // delete_gen(g); 134 | //} 135 | 136 | -------------------------------------------------------------------------------- /dawg.txt: -------------------------------------------------------------------------------- 1 | dawg -[scubvhqew?] [-o outputfile] file1 [file2...] 2 | -s: process files serially [default] 3 | -c: process files combined together 4 | -u: unbuffered output 5 | -b: buffered output [default] 6 | -q: disable error and warning reports (quiet) 7 | -e: enable error reports [default] 8 | -w: enable warning reports [default] 9 | -v: display version information 10 | -h: display help information 11 | -?: same as -h 12 | -o outputfile: override ouput filename in the configuration file 13 | 14 | Dawg will read stdin if filename is "-". 15 | 16 | FILE FORMAT 17 | The file format takes a series of statements in the form of 18 | "name = value," where "name" is alphanumeric and value can be a string, 19 | number, boolean, tree, or vector of values. A single variable is 20 | equivalent to a vector of a single entry. 21 | 22 | string: "[char-sequence]" 23 | '[char-sequence]' 24 | """[multi-line char-sequence]""" (rm initial and final newlines) 25 | '''[multi-line char-sequence]''' (kp initial and final newlines) 26 | number: [sign]digits[.digits][(e|E)[sign]digits] 27 | boolean: true|false 28 | tree: Newick Format 29 | vector: { value, value, ...} 30 | 31 | OPTIONS 32 | 33 | Name Type Description 34 | -------------------------------------------------------------------------- 35 | Tree VT phylogeny 36 | TreeScale N coefficient to scale branch lengths by 37 | Sequence VS root sequences 38 | Length VN length of generated root sequences 39 | Rates VVN rate of evolution of each root nucleotide 40 | Model S model of evolution: GTR|JC|K2P|K3P|HKY|F81|F84|TN 41 | Freqs VN nucleotide (ACGT) frequencies 42 | Params VN parameters for the model of evolution 43 | Width N block width for indels and recombination 44 | Scale VN block position scales 45 | Gamma VN coefficients of variance for rate heterogenity 46 | Alpha VN shape parameters 47 | Iota VN proportions of invariant sites 48 | GapModel VS models of indel formation: NB|PL|US 49 | Lambda VN rates of indel formation 50 | GapParams VVN parameter for the indel model 51 | Reps N number of data sets to output 52 | File S output file 53 | Format S output format: Fasta|Nexus|Phylip|Clustal 54 | GapSingleChar B output gaps as a single character 55 | GapPlus B distinguish insertions from deletions in alignment 56 | KeepFlank N undeletable flanking regions N nucs from sequence 57 | KeepEmpty B preserve empty columns in final alignment 58 | LowerCase B output sequences in lowercase 59 | Translate B translate outputed sequences to amino acids 60 | Seed VN pseudo-random-number-generator seed (integers) 61 | Out.Block.Head S string to insert at the start of the output 62 | Out.Block.Tail S string to insert at the end of the output 63 | Out.Block.Before S string to insert before a sequence set in the output 64 | Out.Block.After S string to insert after a sequence set in the output 65 | Out.Subst B do variable subsitution in Out.Block.* 66 | 67 | -------------------------------------------------------------------------------- /Modules/FindLibunwind.cmake: -------------------------------------------------------------------------------- 1 | # This file downloaded from https://raw.githubusercontent.com/m-a-d-n-e-s-s/madness/master/cmake/modules/FindLibunwind.cmake 2 | # - Try to find Libunwind 3 | # Input variables: 4 | # LIBUNWIND_ROOT_DIR - The libunwind install directory 5 | # LIBUNWIND_INCLUDE_DIR - The libunwind include directory 6 | # LIBUNWIND_LIBRARY - The libunwind library directory 7 | # Output variables: 8 | # LIBUNWIND_FOUND - System has libunwind 9 | # LIBUNWIND_INCLUDE_DIRS - The libunwind include directories 10 | # LIBUNWIND_LIBRARIES - The libraries needed to use libunwind 11 | # LIBUNWIND_VERSION - The version string for libunwind 12 | 13 | include(FindPackageHandleStandardArgs) 14 | 15 | if(NOT DEFINED LIBUNWIND_FOUND) 16 | 17 | # Set default sarch paths for libunwind 18 | if(LIBUNWIND_ROOT_DIR) 19 | set(LIBUNWIND_INCLUDE_DIR ${LIBUNWIND_ROOT_DIR}/include CACHE PATH "The include directory for libunwind") 20 | if(CMAKE_SIZEOF_VOID_P EQUAL 8 AND CMAKE_SYSTEM_NAME STREQUAL "Linux") 21 | set(LIBUNWIND_LIBRARY ${LIBUNWIND_ROOT_DIR}/lib64;${LIBUNWIND_ROOT_DIR}/lib CACHE PATH "The library directory for libunwind") 22 | else() 23 | set(LIBUNWIND_LIBRARY ${LIBUNWIND_ROOT_DIR}/lib CACHE PATH "The library directory for libunwind") 24 | endif() 25 | endif() 26 | 27 | find_path(LIBUNWIND_INCLUDE_DIRS NAMES libunwind.h 28 | HINTS ${LIBUNWIND_INCLUDE_DIR}) 29 | 30 | find_library(LIBUNWIND_LIBRARIES unwind 31 | HINTS ${LIBUNWIND_LIBRARY}) 32 | 33 | # Get libunwind version 34 | if(EXISTS "${LIBUNWIND_INCLUDE_DIRS}/libunwind-common.h") 35 | file(READ "${LIBUNWIND_INCLUDE_DIRS}/libunwind-common.h" _libunwind_version_header) 36 | string(REGEX REPLACE ".*define[ \t]+UNW_VERSION_MAJOR[ \t]+([0-9]+).*" "\\1" 37 | LIBUNWIND_MAJOR_VERSION "${_libunwind_version_header}") 38 | string(REGEX REPLACE ".*define[ \t]+UNW_VERSION_MINOR[ \t]+([0-9]+).*" "\\1" 39 | LIBUNWIND_MINOR_VERSION "${_libunwind_version_header}") 40 | string(REGEX REPLACE ".*define[ \t]+UNW_VERSION_EXTRA[ \t]+([0-9]*).*" "\\1" 41 | LIBUNWIND_MICRO_VERSION "${_libunwind_version_header}") 42 | if(LIBUNWIND_MICRO_VERSION) 43 | set(LIBUNWIND_VERSION "${LIBUNWIND_MAJOR_VERSION}.${LIBUNWIND_MINOR_VERSION}.${LIBUNWIND_MICRO_VERSION}") 44 | else() 45 | set(LIBUNWIND_VERSION "${LIBUNWIND_MAJOR_VERSION}.${LIBUNWIND_MINOR_VERSION}") 46 | endif() 47 | unset(_libunwind_version_header) 48 | endif() 49 | 50 | # handle the QUIETLY and REQUIRED arguments and set LIBUNWIND_FOUND to TRUE 51 | # if all listed variables are TRUE 52 | find_package_handle_standard_args(Libunwind 53 | FOUND_VAR LIBUNWIND_FOUND 54 | VERSION_VAR LIBUNWIND_VERSION 55 | REQUIRED_VARS LIBUNWIND_LIBRARIES LIBUNWIND_INCLUDE_DIRS) 56 | 57 | mark_as_advanced(LIBUNWIND_INCLUDE_DIR LIBUNWIND_LIBRARY 58 | LIBUNWIND_INCLUDE_DIRS LIBUNWIND_LIBRARIES) 59 | 60 | endif() 61 | 62 | if(LIBUNWIND_FOUND) 63 | if(NOT TARGET LIBUNWIND::LIBUNWIND) 64 | add_library(LIBUNWIND::LIBUNWIND UNKNOWN IMPORTED) 65 | set_target_properties(LIBUNWIND::LIBUNWIND PROPERTIES 66 | INTERFACE_INCLUDE_DIRECTORIES "${LIBUNWIND_INCLUDE_DIRS}" 67 | IMPORTED_LOCATION "${LIBUNWIND_LIBRARIES}" ) 68 | endif() 69 | endif() -------------------------------------------------------------------------------- /src/dawgarg.xmh: -------------------------------------------------------------------------------- 1 | /**************************************************************************** 2 | * Copyright (C) 2009 Reed A. Cartwright, PhD * 3 | * * 4 | * This program is free software: you can redistribute it and/or modify * 5 | * it under the terms of the GNU General Public License as published by * 6 | * the Free Software Foundation, either version 3 of the License, or * 7 | * (at your option) any later version. * 8 | * * 9 | * This program is distributed in the hope that it will be useful, * 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 12 | * GNU General Public License for more details. * 13 | * * 14 | * You should have received a copy of the GNU General Public License * 15 | * along with this program. If not, see . * 16 | ****************************************************************************/ 17 | 18 | #include "dawg/details/xm.h" 19 | 20 | /*************************************************************************** 21 | * X-Macro List * 22 | * * 23 | * XM(lname, sname, desc, type, def) (option) * 24 | * XF(lname, sname, desc, type, def) (flag) * 25 | ***************************************************************************/ 26 | 27 | XF((version), , "display version information", bool, false) 28 | XF((help)(trick), , "display description of common control variables", bool, 29 | false) 30 | 31 | XM((output), (o), "output to this file", std::string, "") 32 | XM((seed), , "PRNG seed", unsigned int, 0) 33 | XM((reps), , "the number of alignments to generate", unsigned int, 0) 34 | XF((split), , "split output into separate files", bool, false) 35 | XF((append), , "append output to file", bool, false) 36 | XF((label), , "label each simulation with a unique id", bool, false) 37 | 38 | XM((arg)(file), , "read arguments from file", std::string, "") 39 | 40 | // XCMD((serial), (s), "Process input files serially", bool, true) 41 | // XCMD((combined),(c), "process input files together", bool, false) 42 | // XCMD((buffer), (b), "buffer output", bool, true) 43 | // XCMD((unbuffer), (u), "unbuffered output", bool, false) 44 | 45 | // Standard options 46 | // XM((quiet), (q), "disable all warnings and error messages", bool, 47 | // false) XM((warn), (w), "dsable warnings", bool, false) XM((error), 48 | // (e), "disable error messages", bool, false) XM((input), , "input 49 | // files", std::vector< std::string >, DL(std::vector< std::string >(1, 50 | // std::string("-")), "-") ) XM((input), , "input files", std::vector< 51 | // std::string >, DL(std::vector< std::string >(), "") ) 52 | 53 | /*************************************************************************** 54 | * cleanup * 55 | ***************************************************************************/ 56 | #include "dawg/details/xm.h" 57 | -------------------------------------------------------------------------------- /releng/releng.cmake: -------------------------------------------------------------------------------- 1 | # Portable Release Engineering Script 2 | 3 | set(PROJECT_VERSION "") # Default Version 4 | set(PROJECT_NAME "dawg") 5 | set(PROJECT_TITLE "Dawg") 6 | set(PROJECT_DISTS "dawg-2*") 7 | set(SVN_URL "svn://scit.us/${PROJECT_NAME}/") 8 | 9 | find_program(SVN_BIN svn) 10 | 11 | if(NOT SVN_BIN) 12 | message(FATAL_ERROR "Could not find Subversion binary.") 13 | endif() 14 | 15 | set(ENV{LC_ALL} C) 16 | 17 | # Script Options 18 | # RELENG_TAG 19 | # RELENG_M32 20 | # RELENG_M64 21 | # RELENG_TOOLCHAIN 22 | 23 | if(NOT RELENG_TAG) 24 | set(RELENG_TAG "current") 25 | endif() 26 | 27 | # Identify Temporary Directory 28 | if(WIN32 AND NOT UNIX) 29 | set(TMPDIR $ENV{TEMP}) 30 | if(NOT TMPDIR) 31 | set(TMPDIR "c:/Temp") 32 | endif() 33 | else() 34 | set(TMPDIR $ENV{TMPDIR}) 35 | if(NOT TMPDIR) 36 | set(TMPDIR "/tmp") 37 | endif() 38 | endif() 39 | 40 | string(RANDOM TMP) 41 | set(RELENG_DIR "${TMPDIR}/${PROJECT_NAME}-releng-${TMP}/") 42 | set(ARCHIVE_DIR "${RELENG_DIR}/dawg") 43 | 44 | message(STATUS "Using ${RELENG_DIR} to build packages ...") 45 | file(MAKE_DIRECTORY "${RELENG_DIR}") 46 | 47 | message(STATUS "Exporting ${PROJECT_TITLE} '${RELENG_TAG}' from SVN...") 48 | execute_process(COMMAND ${SVN_BIN} export "${SVN_URL}/${RELENG_TAG}" "${ARCHIVE_DIR}" 49 | OUTPUT_VARIABLE SVN_OUTPUT 50 | ) 51 | 52 | if(SVN_OUTPUT MATCHES "\nExported revision (.*)\\.\n") 53 | set(PROJECT_SVNREV ${CMAKE_MATCH_1}) 54 | else() 55 | message(FATAL_ERROR "Unable to identify revision") 56 | endif() 57 | 58 | message(STATUS "Configuring ${PROJECT_TITLE} '${RELENG_TAG}' ...") 59 | set(CMAKE_DEFS 60 | -DCMAKE_BUILD_TYPE=Release 61 | -DUSE_STATIC_LIBS=on 62 | -DNEW_PACKAGE_VERSION_REV=${PROJECT_SVNREV} 63 | ) 64 | set(CMAKE_ARGS "") 65 | if(WIN32 AND NOT UNIX) 66 | set(CMAKE_ARGS -G "NMake Makefiles" ${CMAKE_ARGS}) 67 | set(MAKE_BIN nmake) 68 | elseif(APPLE) 69 | SET(CMAKE_DEFS ${CMAKE_DEFS} 70 | "-DCMAKE_OSX_ARCHITECTURES=x86_64\\;i386" 71 | -DCMAKE_OSX_DEPLOYMENT_TARGET=10.5 72 | -DCPACK_SYSTEM_NAME=Darwin64-universal 73 | ) 74 | set(MAKE_BIN make) 75 | else() 76 | set(MAKE_BIN make) 77 | endif() 78 | 79 | if(RELENG_M32) 80 | set(CMAKE_DEFS ${CMAKE_DEFS} -DCMAKE_C_FLAGS=-m32) 81 | endif() 82 | if(RELENG_M64) 83 | set(CMAKE_DEFS ${CMAKE_DEFS} -DCMAKE_C_FLAGS=-m64) 84 | endif() 85 | if(RELENG_TOOLCHAIN) 86 | get_filename_component(RELENG_TOOLCHAIN ${RELENG_TOOLCHAIN} REALPATH) 87 | set(CMAKE_DEFS ${CMAKE_DEFS} -DCMAKE_TOOLCHAIN_FILE=${RELENG_TOOLCHAIN}) 88 | endif() 89 | 90 | execute_process(COMMAND ${CMAKE_COMMAND} ${CMAKE_ARGS} .. ${CMAKE_DEFS} 91 | WORKING_DIRECTORY "${ARCHIVE_DIR}/build") 92 | 93 | message(STATUS "Building packages ...") 94 | 95 | execute_process(COMMAND ${MAKE_BIN} ${PROJECT_NAME} WORKING_DIRECTORY "${ARCHIVE_DIR}/build") 96 | execute_process(COMMAND ${MAKE_BIN} new_package WORKING_DIRECTORY "${ARCHIVE_DIR}/build") 97 | execute_process(COMMAND ${MAKE_BIN} new_package_source WORKING_DIRECTORY "${ARCHIVE_DIR}/build") 98 | 99 | message(STATUS "Relocating packages ...") 100 | file(GLOB DISTS "${ARCHIVE_DIR}/build/${PROJECT_DISTS}") 101 | foreach(dist ${DISTS}) 102 | execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different ${dist} ${CMAKE_CURRENT_BINARY_DIR}) 103 | endforeach() 104 | 105 | if(NOT NO_CLEAN) 106 | message(STATUS "Cleaning up ...") 107 | execute_process(COMMAND ${CMAKE_COMMAND} -E remove_directory "${RELENG_DIR}") 108 | endif() 109 | 110 | #get_cmake_property(_variableNames VARIABLES) 111 | #foreach (_variableName ${_variableNames}) 112 | # message(STATUS "${_variableName}=${${_variableName}}") 113 | #endforeach() 114 | 115 | -------------------------------------------------------------------------------- /src/include/dawg/details/xm.h: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright (c) 2007-2009 Reed A. Cartwright, PhD * 3 | * * 4 | * Permission is hereby granted, free of charge, to any person obtaining a * 5 | * copy of this software and associated documentation files (the "Software"), * 6 | * to deal in the Software without restriction, including without limitation * 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, * 8 | * and/or sell copies of the Software, and to permit persons to whom the * 9 | * Software is furnished to do so, subject to the following conditions: * 10 | * * 11 | * The above copyright notice and this permission notice shall be included in * 12 | * all copies or substantial portions of the Software. * 13 | * * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * 20 | * DEALINGS IN THE SOFTWARE. * 21 | ******************************************************************************/ 22 | 23 | #include 24 | 25 | #ifndef XMACROS_HELPERS 26 | #define XMACROS_HELPERS 27 | 28 | /****************************************************************************** 29 | * X-Helpers List * 30 | ******************************************************************************/ 31 | 32 | // The JS macro cats a seq 'seq' with separator 'sep' 33 | 34 | #define JS_OP(s, data, elem) BOOST_PP_CAT(data, elem) 35 | 36 | #define JS(sep, seq) \ 37 | BOOST_PP_IF(BOOST_PP_EQUAL(BOOST_PP_SEQ_SIZE(seq), 1), JS_1, JS_2)(sep, seq) 38 | 39 | #define JS_1(sep, seq) BOOST_PP_SEQ_HEAD(seq) 40 | 41 | #define JS_2(sep, seq) \ 42 | BOOST_PP_SEQ_CAT((BOOST_PP_SEQ_HEAD(seq))BOOST_PP_SEQ_TRANSFORM( \ 43 | JS_OP, sep, BOOST_PP_SEQ_TAIL(seq))) 44 | 45 | // The SS macro is similiar to _JS except that it stringizes everything 46 | 47 | #define SS_OP(r, data, elem) data BOOST_PP_STRINGIZE(elem) 48 | 49 | #define SS(sep, seq) \ 50 | BOOST_PP_STRINGIZE(BOOST_PP_SEQ_HEAD(seq)) \ 51 | BOOST_PP_SEQ_FOR_EACH(SS_OP, sep, BOOST_PP_SEQ_TAIL(seq)) 52 | 53 | // Output result if seq is defined 54 | 55 | #define IFD(seq, res) \ 56 | BOOST_PP_EXPR_IF(BOOST_PP_GREATER(BOOST_PP_SEQ_SIZE((_)seq), 1), res) 57 | 58 | #define XV(lname) JS(_, lname) 59 | #define XS(lname) SS("-", lname) 60 | #define XP(lname) SS(".", lname) 61 | #define DL(a, b) a, b 62 | 63 | #else 64 | 65 | /****************************************************************************** 66 | * Cleanup * 67 | ******************************************************************************/ 68 | 69 | #undef XMACROS_HELPERS 70 | #undef JS_OP 71 | #undef JS 72 | #undef SS 73 | #undef IFD 74 | #undef XV 75 | #undef XS 76 | #undef XP 77 | #undef DL 78 | 79 | #endif 80 | -------------------------------------------------------------------------------- /src/include/dawg/output.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef DAWG_OUTPUT_H 3 | #define DAWG_OUTPUT_H 4 | /**************************************************************************** 5 | * Copyright (C) 2010 Reed A. Cartwright, PhD * 6 | ****************************************************************************/ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "dawg/residue.h" 14 | #include "dawg/utils.h" 15 | #include "version.h" 16 | 17 | namespace dawg { 18 | 19 | class output { 20 | public: 21 | output() 22 | : do_op(&output::print_aln), 23 | p_out(nullptr), 24 | format_id(0), 25 | rep(0), 26 | label_width(0), 27 | do_append(false), 28 | do_split(false), 29 | do_label(false), 30 | split_id_offset(0) {} 31 | 32 | bool open(const char *file_name, unsigned int max_rep = 0, 33 | bool split = false, bool append = false, bool label = false); 34 | 35 | inline bool operator()(const alignment &aln) { 36 | open_next(); 37 | if(p_out == nullptr) return false; 38 | std::ostream &out = *p_out; 39 | if(!do_split) out << ((rep == 0) ? block_head : block_between); 40 | out << block_before; 41 | (this->*do_op)(aln); 42 | out << block_after; 43 | if(!do_split == 0 && rep == last_rep) out << block_tail; 44 | ++rep; 45 | out.flush(); 46 | return true; 47 | } 48 | 49 | template 50 | bool set_format(T format); 51 | 52 | inline void set_ostream(std::ostream &os) { p_out = &os; } 53 | inline void set_ostream(std::ostream *os) { p_out = os; } 54 | 55 | inline void set_blocks(const char *h, const char *w, const char *t, 56 | const char *b, const char *a) { 57 | block_head.assign(h); 58 | block_between.assign(w); 59 | block_tail.assign(t); 60 | block_before.assign(b); 61 | block_after.assign(a); 62 | } 63 | 64 | private: 65 | void (output::*do_op)(const alignment &aln); 66 | 67 | void print_aln(const alignment &aln); 68 | void print_poo(const alignment &aln); 69 | void print_fasta(const alignment &aln); 70 | void print_nexus(const alignment &aln); 71 | void print_phylip(const alignment &aln); 72 | 73 | protected: 74 | bool open_file(const char *file_name); 75 | bool open_next(); 76 | 77 | std::ostream *p_out; 78 | std::ofstream fout; 79 | std::size_t format_id; 80 | unsigned int rep, label_width, last_rep; 81 | bool do_append, do_split, do_label; 82 | std::string current_label, split_file_name; 83 | std::string::size_type split_id_offset; 84 | 85 | std::string block_head; 86 | std::string block_between; 87 | std::string block_tail; 88 | std::string block_before; 89 | std::string block_after; 90 | }; 91 | 92 | template 93 | bool output::set_format(T format) { 94 | static constexpr char format_keys[][10] = {"aln", "poo", "fasta", 95 | "fsa", "nexus", "phylip"}; 96 | static void (output::*format_ops[])(const alignment &aln) = { 97 | &output::print_aln, &output::print_poo, &output::print_fasta, 98 | &output::print_fasta, &output::print_nexus, &output::print_phylip}; 99 | format_id = key_switch(format, format_keys); 100 | if(format_id == (std::size_t)-1) { 101 | format_id = 0; 102 | do_op = &output::print_aln; 103 | return false; 104 | } 105 | do_op = format_ops[format_id]; 106 | return true; 107 | } 108 | 109 | } // namespace dawg 110 | 111 | #endif // DAWG_OUTPUT_H 112 | -------------------------------------------------------------------------------- /tests/CheckProcessTest.cmake: -------------------------------------------------------------------------------- 1 | macro(ESCAPE_STRING STR) 2 | string(REPLACE "\\" "\\\\" ${STR} "${${STR}}") 3 | string(REPLACE "\n" "\\n" ${STR} "${${STR}}") 4 | string(REPLACE "\t" "\\t" ${STR} "${${STR}}") 5 | endmacro() 6 | 7 | function(CheckProcessTest PREFIX TEST) 8 | message(STATUS "Test ${PREFIX}.${TEST}...") 9 | 10 | execute_process( COMMAND ${${TEST}-CMD} 11 | OUTPUT_VARIABLE stdout 12 | ERROR_VARIABLE stderr 13 | RESULT_VARIABLE result 14 | WORKING_DIRECTORY "${${TEST}-WD}" 15 | ) 16 | string(REPLACE "\n" "\n out> " out " out> ${stdout}") 17 | string(REPLACE "\n" "\n err> " err " err> ${stderr}") 18 | 19 | if(DEFINED ${TEST}-RESULT) 20 | if(NOT "${result}" STREQUAL "${${TEST}-RESULT}") 21 | message(FATAL_ERROR 22 | "Test result does not match \"${${TEST}-RESULT}\".\n" 23 | "Test result: ${result}\n" 24 | "Test output:\n" 25 | "${out}\n" 26 | "${err}" ) 27 | endif() 28 | endif() 29 | 30 | if(DEFINED ${TEST}-RESULT-FAIL) 31 | if("${result}" STREQUAL "${${TEST}-RESULT-FAIL}") 32 | message(FATAL_ERROR 33 | "Test result unexpectedly matches \"${${TEST}-RESULT-FAIL}\".\n" 34 | "Test result: ${result}\n" 35 | "Test output:\n" 36 | "${out}\n" 37 | "${err}" ) 38 | endif() 39 | endif() 40 | 41 | if(DEFINED ${TEST}-STDERR) 42 | foreach(test_str ${${TEST}-STDERR}) 43 | if(NOT "${stderr}" MATCHES "${test_str}") 44 | ESCAPE_STRING(test_str) 45 | message(FATAL_ERROR 46 | "Test stderr does not match \"${test_str}\".\n" 47 | "Test result: ${result}\n" 48 | "Test output:\n" 49 | "${out}\n" 50 | "${err}" ) 51 | endif() 52 | endforeach() 53 | endif() 54 | 55 | if(DEFINED ${TEST}-STDERR-FAIL) 56 | foreach(test_str ${${TEST}-STDERR-FAIL}) 57 | if("${stderr}" MATCHES "${test_str}") 58 | ESCAPE_STRING(test_str) 59 | message(FATAL_ERROR 60 | "Test stderr unexpectedly matches \"${test_str}\".\n" 61 | "Test result: ${result}\n" 62 | "Test output:\n" 63 | "${out}\n" 64 | "${err}" ) 65 | endif() 66 | endforeach() 67 | endif() 68 | 69 | if(DEFINED ${TEST}-STDOUT) 70 | foreach(test_str ${${TEST}-STDOUT}) 71 | if(NOT "${stdout}" MATCHES "${test_str}") 72 | ESCAPE_STRING(test_str) 73 | message(FATAL_ERROR 74 | "Test stdout does not match \"${test_str}\".\n" 75 | "Test result: ${result}\n" 76 | "Test output:\n" 77 | "${out}\n" 78 | "${err}" ) 79 | endif() 80 | endforeach() 81 | endif() 82 | 83 | if(DEFINED ${TEST}-STDOUT-FAIL) 84 | foreach(test_str ${${TEST}-STDOUT-FAIL}) 85 | if("${stdout}" MATCHES "${test_str}") 86 | ESCAPE_STRING(test_str) 87 | message(FATAL_ERROR 88 | "Test stdout unexpectedly matches \"${test_str}\".\n" 89 | "Test result: ${result}\n" 90 | "Test output:\n" 91 | "${out}\n" 92 | "${err}" ) 93 | endif() 94 | endforeach() 95 | endif() 96 | 97 | endfunction() 98 | 99 | function(CheckProcessTests PREFIX) 100 | foreach(TEST ${ARGN}) 101 | CheckProcessTest("${PREFIX}" "${TEST}") 102 | endforeach() 103 | endfunction() -------------------------------------------------------------------------------- /src/include/dawg/details/dawgma.xmh: -------------------------------------------------------------------------------- 1 | /**************************************************************************** 2 | * Copyright (C) 2009 Reed A. Cartwright, PhD * 3 | ****************************************************************************/ 4 | 5 | #include "dawg/details/xm.h" 6 | 7 | /*************************************************************************** 8 | * X-Macro List * 9 | * Defines the core model for Dawg, simulate sequences down a tree * 10 | * using substitution and indel models * 11 | * * 12 | * XM(name, type, def, help) * 13 | ***************************************************************************/ 14 | 15 | XM((subst)(model), std::string, "jc", 16 | "The identifier of the substitution model, e.g. JC, GTR, WAG,\n" 17 | " CODGY." ) 18 | XM((subst)(params), std::vector, , 19 | "A list specifying the parameters of the substitution model.\n" 20 | " Model Dependant." ) 21 | XM((subst)(freqs), std::vector, , 22 | "A list specifying the stationary frequencies of nucleotides,\n" 23 | " amino acids, or codons. Model Dependant.") 24 | XM((subst)(rate)(model), std::string, "const", 25 | "The identifier of the heterogeneous rate model, e.g.\n" 26 | " CONST, GAMMA, or ZERO." ) 27 | XM((subst)(rate)(params), std::vector, , 28 | "The parameters of the rate model. Model Dependant.") 29 | 30 | XM((indel)(model)(ins), std::vector, DL(1, "user"), 31 | "The identifiers of the insertion models, e.g. USER, GEO,\n" 32 | " POWER-LAW.") 33 | XM((indel)(params)(ins), std::vector, DL(1, 1.0), 34 | "The parameters of the insertion models. Model Dependant.") 35 | XM((indel)(rate)(ins), std::vector, , 36 | "The per-substitution rates of the mixture of insertion models.") 37 | XM((indel)(max)(ins), unsigned int, 100, 38 | "The maximum size of an insertion") 39 | 40 | XM((indel)(model)(del), std::vector, DL(1, "user"), 41 | "The identifiers of the deletion models, e.g. USER, GEO,\n" 42 | " POWER-LAW.") 43 | XM((indel)(params)(del), std::vector, DL(1, 1.0), 44 | "The parameters of the deletion models. Model Dependant.") 45 | XM((indel)(rate)(del), std::vector, , 46 | "The per-substitution rates of the mixture of deletion models.") 47 | XM((indel)(max)(del), unsigned int, 100, 48 | "The maximum size of a deletion.") 49 | 50 | XM((tree)(model), std::string, "user", 51 | "The identifier of the tree model.") 52 | XM((tree)(params), std::vector, , 53 | "The parameters of the tree model. Model Dependant.") 54 | XM((tree)(tree), std::string, , 55 | "The tree or tree template.") 56 | XM((tree)(scale), double, 1.0, 57 | "Branch-lengths are scaled by this number in the simulation.") 58 | 59 | XM((root)(length), unsigned int, 0, 60 | "The length of a randomly generated root sequence.") 61 | XM((root)(seq), std::string, , 62 | "A specific root sequence.") 63 | XM((root)(code), unsigned int, 0, 64 | "The genetic code used when simulating codon evolution.") 65 | XM((root)(segment), unsigned int, 0, 66 | "The segment number that the root belongs too.") 67 | XM((root)(gapoverlap), bool, true, 68 | "Allow upstream deletions to affect this segment.") 69 | 70 | XM((output)(markins), bool, false, 71 | "Distinguish insertions from deletions.") 72 | XM((output)(keepempty), bool, true, 73 | "Keep empty columns instead of deleting them in the alignment.") 74 | XM((output)(lowercase), bool, false, 75 | "Use lowercase for sequence output.") 76 | XM((output)(rna), bool, false, 77 | "Output an RNA sequence instead of a DNA sequence") 78 | //XM((output)(translate), bool, false) 79 | //XM((output)(gapsinglechar), bool, false) 80 | 81 | /*************************************************************************** 82 | * cleanup * 83 | ***************************************************************************/ 84 | #include "dawg/details/xm.h" 85 | -------------------------------------------------------------------------------- /tests/Unit/boost_test_helper.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 Steven H. Wu 3 | * Authors: Steven H. Wu 4 | * 5 | * This file is part of DeNovoGear. 6 | * 7 | * DeNovoGear is free software: you can redistribute it and/or modify it under 8 | * the terms of the GNU General Public License as published by the Free Software 9 | * Foundation; either version 3 of the License, or (at your option) any later 10 | * version. 11 | * 12 | * This program is distributed in the hope that it will be useful, but WITHOUT 13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 14 | * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License along with 17 | * this program. If not, see . 18 | */ 19 | 20 | #ifndef DENOVOGEAR_BOOST_TEST_HELPER_H 21 | #define DENOVOGEAR_BOOST_TEST_HELPER_H 22 | 23 | #include 24 | #include 25 | 26 | //FIXME: too many global 27 | //TODO(Reed): I think we should make this configurable somehow from a CMake run. It is important to test both absolute equality and threshold-based equality. 28 | //TODO(SHW): Issue #157 29 | const double BOOST_ABS_THRESHOLD = 1e-5; 30 | const double BOOST_CLOSE_PERCENTAGE_THRESHOLD = 1e-5; 31 | 32 | 33 | template 34 | void boost_check_array(A &expected, A &result, int expected_size) { 35 | 36 | BOOST_CHECK_EQUAL(expected_size, expected.size()); 37 | BOOST_CHECK_EQUAL(expected.size(), result.size()); 38 | for (int i = 0; i < expected.size(); ++i) { 39 | BOOST_CHECK_CLOSE(expected[i], result[i], BOOST_CLOSE_PERCENTAGE_THRESHOLD); 40 | } 41 | 42 | } 43 | 44 | template 45 | void boost_check_matrix(M &expected, M &result) { 46 | BOOST_CHECK_EQUAL(expected.rows(), result.rows()); 47 | BOOST_CHECK_EQUAL(expected.cols(), result.cols()); 48 | 49 | for (int i = 0; i < expected.rows(); i++) { 50 | for (int j = 0; j < expected.cols(); j++) { 51 | if (expected(i, j) == 0) { 52 | BOOST_CHECK_EQUAL(0, result(i,j)); 53 | } else { 54 | BOOST_CHECK_CLOSE(expected(i, j), result(i, j), 55 | BOOST_CLOSE_PERCENTAGE_THRESHOLD); 56 | } 57 | } 58 | } 59 | } 60 | 61 | 62 | template 63 | void boost_check_matrix(M &expected, M &result, int expected_rows, 64 | int expected_cols) { 65 | BOOST_CHECK_EQUAL(expected_rows, expected.rows()); 66 | BOOST_CHECK_EQUAL(expected_cols, expected.cols()); 67 | 68 | boost_check_matrix(expected, result); 69 | } 70 | 71 | template 72 | void boost_check_close_vector(V &expected, V2 &result) { 73 | 74 | BOOST_CHECK_EQUAL(expected.size(), result.size()); 75 | for (int i = 0; i < expected.size(); ++i) { 76 | if(expected[i] == 0){ 77 | BOOST_CHECK_EQUAL(0, result[i]); 78 | } 79 | else { 80 | BOOST_CHECK_CLOSE(expected[i], result[i], BOOST_CLOSE_PERCENTAGE_THRESHOLD); 81 | } 82 | } 83 | } 84 | 85 | template 86 | void boost_check_equal_vector(V &expected, V2 &result) { 87 | 88 | BOOST_CHECK_EQUAL(expected.size(), result.size()); 89 | for (int i = 0; i < expected.size(); ++i) { 90 | BOOST_CHECK(expected[i] == result[i]); 91 | } 92 | } 93 | 94 | 95 | 96 | template 97 | void AssertTrue(A expected, B actual){ 98 | assert(expected==actual); 99 | }; 100 | 101 | template 102 | void AssertNear(A expected, A actual){ 103 | assert(((expected - actual)/expected) < BOOST_ABS_THRESHOLD); 104 | 105 | }; 106 | 107 | template 108 | void AssertEigenMatrixNear(A expected, A actual){ 109 | for (int j = 0; j < expected.rows(); ++j) { 110 | for (int k = 0; k < expected.cols(); ++k) { 111 | AssertNear(expected(j,k), actual(j,k)); 112 | } 113 | } 114 | }; 115 | 116 | #endif //DENOVOGEAR_BOOST_TEST_HELPER_H 117 | -------------------------------------------------------------------------------- /utils/cpp_encodetri.pl: -------------------------------------------------------------------------------- 1 | #!/bin/perl -w 2 | use strict; 3 | use warnings; 4 | 5 | my @a = split(//, 'ABCDEFGHIJ@=KLOMNPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'); 6 | 7 | my @b = map {ord($_)-ord('0');} @a; 8 | 9 | my @q = (-1) x 80; 10 | 11 | my $i = 0; 12 | $q[$_] = $i++ foreach @b; 13 | 14 | @q = map { sprintf("%2d", $_) } @q; 15 | 16 | print "\n// cod64 -> codon number\n"; 17 | print join(",", @q[ 0..19]) . ",\n" . 18 | join(",", @q[20..39]) . ",\n" . 19 | join(",", @q[40..59]) . ",\n" . 20 | join(",", @q[60..79]) . "\n" 21 | ; 22 | 23 | my @nord = ('T', 'C', 'A', 'G'); 24 | my @codes = ( 25 | # 0 The Standard Code 26 | "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", 27 | # 1 The Standard Code 28 | "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", 29 | # 2 The Vertebrate Mitochondrial Code 30 | "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG", 31 | # 3 The Yeast Mitochondrial Code 32 | "FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG", 33 | # 4 The Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code 34 | "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", 35 | # 5 The Invertebrate Mitochondrial Code 36 | "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG", 37 | # 6 The Ciliate, Dasycladacean and Hexamita Nuclear Code 38 | "FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", 39 | "", 40 | "", 41 | # 9 The Echinoderm and Flatworm Mitochondrial Code 42 | "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG", 43 | #10 The Euplotid Nuclear Code 44 | "FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", 45 | #11 The Bacterial, Archaeal, and Plant Plastid Code 46 | "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", 47 | #12 The Alternative Yeast Nuclear Code 48 | "FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", 49 | #13 The Ascidian Mitochondrial Code 50 | "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG", 51 | #14 The Alternative Flatworm Mitochondrial Code 52 | "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG", 53 | #15 The Blepharisma Nuclear Code 54 | "FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", 55 | #16 The Chlorophycean Mitochondrial Code 56 | "FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", 57 | "", 58 | "", 59 | "", 60 | "", 61 | #21 The Trematode Mitochondrial Code 62 | "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG", 63 | #22 The Scenedesmus obliquus mitochondrial Code 64 | "FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", 65 | #23 The Thraustochytrium Mitochondrial Code 66 | "FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG" 67 | ); 68 | 69 | @a = split(//, 'ABCDEFGHIJ@=KLOMNPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!-'); 70 | 71 | 72 | print("\n// base -> char\n"); 73 | foreach(@codes) { 74 | my @c = split(//); 75 | my $j = 0; 76 | my $i = 0; 77 | my @x = (64) x 64; 78 | my $gap = 0; 79 | foreach(@c) { 80 | if($_ ne '*') { 81 | $x[$j] = $i; 82 | } elsif($gap) { 83 | $x[$j] = 64; 84 | } else { 85 | $x[$j] = 65; 86 | $gap = 1; 87 | } 88 | $i++; 89 | $j++; 90 | } 91 | print "\t\t\"" . join('', @a[@x]) . "\"\n"; 92 | } 93 | 94 | print("\n// char -> base\n"); 95 | foreach(@codes) { 96 | my @c = split(//); 97 | my $j = 0; 98 | my $i = 0; 99 | my @x = (-1) x 80; 100 | foreach(@c) { 101 | $x[ord($a[$i])-ord('0')] = $j++ if($_ ne '*'); 102 | $i++; 103 | } 104 | @x = map { sprintf("% 2s", $_) } @x; 105 | print join(",", @x[ 0..19]) . ",\n" . 106 | join(",", @x[20..39]) . ",\n" . 107 | join(",", @x[40..59]) . ",\n" . 108 | join(",", @x[60..79]) . "\n" 109 | ; 110 | } 111 | 112 | print("\n// removing stops\n"); 113 | foreach(@codes) { 114 | my @x = (); 115 | while(/([*])/g) { 116 | push(@x, pos()-length($1)); 117 | } 118 | @x = reverse(@x); 119 | push(@x, 0) while(@x < 5); 120 | @x = map { sprintf("% 2s", $_) } @x; 121 | print "\t\t" . join(',', @x) . ",\n"; 122 | } 123 | 124 | 125 | 126 | -------------------------------------------------------------------------------- /src/include/dawg/utils/aliastable.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef ALIASTABLE_H 3 | #define ALIASTABLE_H 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | class alias_table { 13 | public: 14 | typedef boost::uint64_t uint64; 15 | typedef boost::uint32_t uint32; 16 | typedef uint32 category_type; 17 | 18 | alias_table() { } 19 | 20 | template< typename T > 21 | explicit alias_table(const T &v) { 22 | create(v); 23 | } 24 | 25 | category_type get(uint64 u) const { 26 | uint32 x = static_cast(u >> shr_); 27 | uint32 y = static_cast(u); 28 | return ( y < p_[x]) ? x : a_[x]; 29 | } 30 | 31 | const std::vector& a() const { return a_;} 32 | const std::vector& p() const { return p_;} 33 | 34 | category_type operator()(uint64 u) const { 35 | return get(u); 36 | } 37 | 38 | // create the alias table 39 | template< typename T > 40 | void create(const T &v) { 41 | std::vector vv(boost::begin(v),boost::end(v)); 42 | create_inplace(vv); 43 | } 44 | 45 | template< typename T > 46 | void create(T first, T last) { 47 | std::vector vv(first,last); 48 | create_inplace(vv); 49 | } 50 | 51 | // create the alias table 52 | void create_inplace(std::vector &v) { 53 | assert(v.size() <= std::numeric_limits::max()); 54 | // round the size of vector up to the nearest power of two 55 | std::pair::size_type,int> ru = round_up(v.size()); 56 | const std::vector::size_type sz = ru.first; 57 | v.resize(sz,0.0); 58 | a_.resize(sz,0); 59 | p_.resize(sz,0); 60 | // use the number of bits to calculate the right shift operand 61 | shr_ = 64 - ru.second; 62 | 63 | // find scale for input vector 64 | double d = std::accumulate(v.begin(),v.end(),0.0)/sz; 65 | 66 | // find first large and small values 67 | // g: current large value index 68 | // m: current small value index 69 | // mm: next possible small value index 70 | std::vector::size_type g,m,mm; 71 | for(g=0; g= d; ++m) 74 | /*noop*/; 75 | mm = m+1; 76 | 77 | // contruct table 78 | while(g < sz && m < sz) { 79 | assert(v[m] < d); 80 | p_[m] = static_cast(4294967296.0/d*v[m]); 81 | a_[m] = static_cast(g); 82 | v[g] = (v[g]+v[m])-d; 83 | if(v[g] >= d || mm <= g) { 84 | for(m=mm; m= d; ++m) 85 | /*noop*/; 86 | mm = m+1; 87 | } else 88 | m = g; 89 | for(; g::max(); 95 | a_[g] = static_cast(g); 96 | for(g=g+1; g::max(); 100 | a_[g] = static_cast(g); 101 | } 102 | } 103 | // if we stopped early fill in the rest 104 | if(m < sz) { 105 | p_[m] = std::numeric_limits::max(); 106 | a_[m] = static_cast(m); 107 | for(m=mm; m d) 109 | continue; 110 | p_[m] = std::numeric_limits::max(); 111 | a_[m] = static_cast(m); 112 | } 113 | } 114 | } 115 | 116 | template 117 | void print_table(std::basic_ostream& o) { 118 | for(std::size_t n = 0; n < a_.size(); ++n) { 119 | o << n << "\t" << a_[n] << "\t" << p_[n] << "\n"; 120 | } 121 | } 122 | 123 | private: 124 | template 125 | inline static std::pair round_up(T x) { 126 | T y = static_cast(2); 127 | int k = 1; 128 | for(;y < x;y*=2,++k) 129 | /*noop*/; 130 | return std::make_pair(y,k); 131 | } 132 | 133 | std::vector a_,p_; 134 | int shr_; 135 | }; 136 | 137 | template 138 | inline std::basic_ostream& 139 | operator<<(std::basic_ostream& o, const alias_table &a) { 140 | 141 | } 142 | 143 | 144 | #endif //ALIASTABLE_H 145 | -------------------------------------------------------------------------------- /src/include/dawg/utils/vecio.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef DAWG_VECIO_H 3 | #define DAWG_VECIO_H 4 | /**************************************************************************** 5 | * Copyright (C) 2009 Reed A. Cartwright, PhD * 6 | ****************************************************************************/ 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | namespace dawg { 14 | 15 | namespace details { 16 | 17 | // vecio_info adapted from boost/tuple/tuple_io.hpp 18 | // Copyright (C) 2001 Jaakko Jarvi (jaakko.jarvi@cs.utu.fi) 19 | // 2001 Gary Powell (gary.powell@sierra.com) 20 | class vecio_info { 21 | public: 22 | enum manipulator_type { open, close, delimiter, number_of_manipulators}; 23 | private: 24 | static int get_stream_index (int m) { 25 | static const int stream_index[number_of_manipulators] 26 | = { std::ios::xalloc(), std::ios::xalloc(), std::ios::xalloc() }; 27 | return stream_index[m]; 28 | } 29 | 30 | vecio_info(const vecio_info&); 31 | vecio_info(); 32 | 33 | public: 34 | template 35 | static CharType get_manipulator(std::basic_ios& i, 36 | manipulator_type m) { 37 | CharType c = static_cast(i.iword(get_stream_index(m)) ); 38 | // parentheses and space are the default manipulators 39 | if (!c) { 40 | switch(m) { 41 | case details::vecio_info::open : c = i.widen('('); break; 42 | case details::vecio_info::close : c = i.widen(')'); break; 43 | case details::vecio_info::delimiter : c = i.widen(' '); break; 44 | default: break; 45 | } 46 | } 47 | return c; 48 | } 49 | 50 | template 51 | static void set_manipulator(std::basic_ios& i, 52 | manipulator_type m, CharType c) { 53 | i.iword(get_stream_index(m)) = static_cast(c); 54 | } 55 | }; 56 | 57 | } //namespace details 58 | 59 | template 60 | class vecio_manipulator { 61 | const details::vecio_info::manipulator_type mt; 62 | CharType f_c; 63 | public: 64 | explicit vecio_manipulator(details::vecio_info::manipulator_type m, 65 | const char c = 0) 66 | : mt(m), f_c(c) {} 67 | 68 | template 69 | void set(std::basic_ios &io) const { 70 | details::vecio_info::set_manipulator(io, mt, f_c); 71 | } 72 | }; 73 | 74 | template 75 | inline vecio_manipulator set_open(const CharType c) { 76 | return vecio_manipulator(details::vecio_info::open, c); 77 | } 78 | 79 | template 80 | inline vecio_manipulator set_close(const CharType c) { 81 | return vecio_manipulator(details::vecio_info::close, c); 82 | } 83 | 84 | template 85 | inline vecio_manipulator set_delimiter(const CharType c) { 86 | return vecio_manipulator(details::vecio_info::delimiter, c); 87 | } 88 | 89 | template 90 | inline std::basic_ostream& 91 | operator<<(std::basic_ostream& o, const vecio_manipulator& m) { 92 | m.set(o); 93 | return o; 94 | } 95 | 96 | template 97 | inline std::basic_ostream& 98 | operator<<(std::basic_ostream& o, const std::vector &v) { 99 | if(!o.good()) return o; 100 | 101 | const CharType l = 102 | details::vecio_info::get_manipulator(o, details::vecio_info::open); 103 | const CharType d = 104 | details::vecio_info::get_manipulator(o, details::vecio_info::delimiter); 105 | const CharType r = 106 | details::vecio_info::get_manipulator(o, details::vecio_info::close); 107 | if(l != 127) o << l; 108 | typename std::vector::const_iterator it = v.begin(); 109 | if(it != v.end()) 110 | o << *(it++); 111 | if(d != 127) 112 | for(;it != v.end();++it) 113 | o << d << *it; 114 | else 115 | for(;it != v.end();++it) 116 | o << *it; 117 | 118 | if(r != 127) o << r; 119 | 120 | return o; 121 | } 122 | 123 | } //namespace dawg 124 | #endif 125 | 126 | -------------------------------------------------------------------------------- /Modules/CPackBefore.cmake: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | ## Wrapper script for setting up CPack 3 | ## 4 | 5 | SET(CPACK_SOURCE_PACKAGE_FILE_NAME "${NEW_PACKAGE_NAME}-${NEW_PACKAGE_VERSION}") 6 | SET(CPACK_PACKAGE_VERSION ${NEW_PACKAGE_VERSION}) 7 | SET(CPACK_PACKAGE_VERSION_MAJOR ${NEW_PACKAGE_VERSION_MAJOR}) 8 | SET(CPACK_PACKAGE_VERSION_MINOR ${NEW_PACKAGE_VERSION_MINOR}) 9 | SET(CPACK_PACKAGE_VERSION_PATCH ${NEW_PACKAGE_VERSION_PATCH}) 10 | 11 | SET(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/readme.txt") 12 | SET(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/copying.txt") 13 | 14 | list(APPEND CPACK_SOURCE_IGNORE_FILES 15 | "/CVS/" "/\\\\.svn/" "/\\\\.bzr/" "/\\\\.hg/" "/\\\\.git/" "\\\\.swp$" 16 | "\\\\.#" "/#" ".*~$" 17 | "/releng/" "/build/" 18 | "/CMakeFiles/" "CMakeCache\\\\.txt" 19 | "CPack.*Config\\\\.cmake" "cmake_install\\\\.cmake" 20 | "install_manifest\\\\.txt$" 21 | "_CPACK_PACKAGES" "_CPack_Packages" 22 | "\\\\.dir" "Makefile$" "\\\\.a$" 23 | ) 24 | 25 | IF(NOT DEFINED CPACK_SYSTEM_NAME) 26 | if(APPLE) # work around a bug in CMakeDetermineSystem.cmake 27 | exec_program(uname ARGS -m OUTPUT_VARIABLE CPACK_SYSTEM_PROCESSOR RETURN_VAL val) 28 | IF("${CMAKE_C_FLAGS}" MATCHES "[-]m32") 29 | SET(CPACK_SYSTEM_PROCESSOR i386) 30 | ELSEIF("${CMAKE_C_FLAGS}" MATCHES "[-]m64") 31 | SET(CPACK_SYSTEM_PROCESSOR x86_64) 32 | ENDIF() 33 | if(CPACK_SYSTEM_PROCESSOR STREQUAL "x86_64") 34 | set(CPACK_SYSTEM_NAME Darwin64) 35 | else() 36 | set(CPACK_SYSTEM_NAME Darwin) 37 | endif() 38 | SET(CPACK_SYSTEM_NAME ${CPACK_SYSTEM_NAME}-${CPACK_SYSTEM_PROCESSOR}) 39 | elseif("${CMAKE_SYSTEM_NAME}" STREQUAL "CYGWIN") 40 | SET(CPACK_SYSTEM_NAME Cygwin-x86) 41 | elseif(UNIX) 42 | set(CPACK_SYSTEM_PROCESSOR ${CMAKE_SYSTEM_PROCESSOR}) 43 | # Check for building 32-bit binaries on 64-bit machines 44 | # Adjust names as appropriate 45 | IF("${CMAKE_C_FLAGS}" MATCHES "[-]m32") 46 | IF(CPACK_SYSTEM_PROCESSOR MATCHES "x86") 47 | SET(CPACK_SYSTEM_PROCESSOR x86_32) 48 | ELSE() 49 | SET(CPACK_SYSTEM_PROCESSOR i386) 50 | ENDIF() 51 | # Check for building 64-bit binaries on 32-bit machines 52 | # Adjust names as appropriate 53 | ELSEIF("${CMAKE_C_FLAGS}" MATCHES "[-]m64") 54 | IF(CPACK_SYSTEM_PROCESSOR MATCHES "x86") 55 | SET(CPACK_SYSTEM_PROCESSOR x86_64) 56 | ELSE() 57 | SET(CPACK_SYSTEM_PROCESSOR amd64) 58 | ENDIF() 59 | ENDIF() 60 | SET(CPACK_SYSTEM_NAME ${CMAKE_SYSTEM_NAME}-${CPACK_SYSTEM_PROCESSOR}) 61 | ELSE() 62 | SET(CPACK_SYSTEM_NAME ${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}) 63 | ENDIF() 64 | ENDIF() 65 | 66 | IF(${CPACK_SYSTEM_NAME} MATCHES Windows) 67 | IF(CMAKE_CL_64) 68 | SET(CPACK_SYSTEM_NAME Win64-amd64) 69 | SET(CPACK_NSIS_INSTALL_ROOT "$PROGRAMFILES64") 70 | ELSE() 71 | SET(CPACK_SYSTEM_NAME Win32-x86) 72 | ENDIF() 73 | ENDIF() 74 | 75 | IF(NOT DEFINED CPACK_PACKAGE_FILE_NAME) 76 | SET(CPACK_PACKAGE_FILE_NAME 77 | "${CPACK_SOURCE_PACKAGE_FILE_NAME}-${CPACK_SYSTEM_NAME}") 78 | ENDIF() 79 | 80 | if(WIN32 AND NOT UNIX) 81 | SET(CPACK_NSIS_MODIFY_PATH ON) 82 | set(CPACK_NSIS_DEFINES " 83 | VIProductVersion ${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}.0 84 | VIAddVersionKey ProductName \\\"${CPACK_PACKAGE_NAME}\\\" 85 | VIAddVersionKey FileDescription \\\"${CPACK_PACKAGE_DESCRIPTION_SUMMARY}\\\" 86 | VIAddVersionKey Comments \\\"A binary installer for ${CPACK_PACKAGE_NAME}\\\" 87 | VIAddVersionKey CompanyName \\\"${CPACK_PACKAGE_VENDOR}\\\" 88 | VIAddVersionKey LegalCopyright \\\"${CPACK_PACKAGE_VENDOR}\\\" 89 | VIAddVersionKey InternalName \\\"${CPACK_PACKAGE_NAME} Installer\\\" 90 | VIAddVersionKey LegalTrademarks \\\"\\\" 91 | VIAddVersionKey OriginalFilename \\\"${CPACK_PACKAGE_FILE_NAME}.exe\\\" 92 | VIAddVersionKey FileVersion ${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}.0 93 | VIAddVersionKey ProductVersion ${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}.0 94 | ") 95 | endif() 96 | 97 | if(WIN32 AND NOT UNIX) 98 | set(CPACK_SOURCE_GENERATOR "ZIP") 99 | set(CPACK_GENERATOR "ZIP;NSIS") 100 | elseif(APPLE) 101 | set(CPACK_SOURCE_GENERATOR "TBZ2") 102 | set(CPACK_GENERATOR "PackageMaker" "TBZ2") 103 | else() 104 | set(CPACK_SOURCE_GENERATOR "TBZ2") 105 | set(CPACK_GENERATOR "TBZ2") 106 | endif() 107 | 108 | -------------------------------------------------------------------------------- /src/include/dawg/wood_parse.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef DAWG_WOOD_PARSE_H 3 | #define DAWG_WOOD_PARSE_H 4 | /**************************************************************************** 5 | * Copyright (C) 2009-2010 Reed A. Cartwright, PhD * 6 | ****************************************************************************/ 7 | 8 | #define BOOST_SPIRIT_USE_PHOENIX_V3 1 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #if SPIRIT_VERSION < 0x2020 17 | #error Spirit version 2.2 or greater required. 18 | #endif 19 | 20 | #ifdef _MSC_VER 21 | #pragma warning(push) 22 | #pragma warning(disable : 4127) 23 | #endif 24 | 25 | #include 26 | 27 | #ifdef _MSC_VER 28 | #pragma warning(pop) 29 | #endif 30 | 31 | #include 32 | 33 | #include "dawg/wood.h" 34 | 35 | namespace dawg { 36 | 37 | namespace qi = boost::spirit::qi; 38 | namespace standard = boost::spirit::standard; 39 | namespace phoenix = boost::phoenix; 40 | 41 | struct make_inode_impl { 42 | typedef void result_type; 43 | 44 | template 45 | void operator()(V& vec, const C& width) const { 46 | vec.back().anc = 1; 47 | wood_node::id_t w = static_cast(width + 1); 48 | (vec.end() - w)->anc = w; 49 | vec.push_back(wood_node(w)); 50 | } 51 | }; 52 | const phoenix::function make_inode; 53 | 54 | template 55 | struct newick_grammar 56 | : qi::grammar { 57 | // http://evolution.genetics.washington.edu/phylip/newick_doc.html 58 | typedef wood::data_type::size_type size_type; 59 | newick_grammar() : newick_grammar::base_type(start) { 60 | using phoenix::back; 61 | using phoenix::bind; 62 | using phoenix::construct; 63 | using phoenix::push_back; 64 | using phoenix::size; 65 | using qi::_1; 66 | using qi::_2; 67 | using qi::_r1; 68 | using qi::_val; 69 | using qi::eps; 70 | using qi::float_; 71 | using qi::lexeme; 72 | using qi::omit; 73 | using qi::raw; 74 | using standard::char_; 75 | using standard::space; 76 | 77 | start = omit[node(_val)] >> ';'; 78 | node = tip(_r1) | inode(_r1); 79 | tip = 80 | label[push_back(_r1, construct(_1))][_val = 1] >> 81 | -(':' >> float_[phoenix::bind(&wood_node::length, back(_r1)) = _1]); 82 | inode = '(' >> node(_r1)[_val = _1] >> 83 | (+(',' >> node(_r1)[make_inode(_r1, _1)][_val += _1 + 1]) | 84 | eps[make_inode(_r1, 0)][_val += 1]) >> 85 | ')' >> 86 | -(label[phoenix::bind(&wood_node::label, back(_r1)) = _1] || 87 | (':' >> 88 | float_[phoenix::bind(&wood_node::length, back(_r1)) = _1])); 89 | label = unquoted | quoted; 90 | // Due to the way hidden nodes are constructed, 91 | // unquoted labels should not begin with {, |, or }. 92 | unquoted = lexeme[(char_ - (char_(":,)(;'[]|{}") | space)) >> 93 | *(char_ - (char_(":,)(;'[]") | space))]; 94 | quoted = 95 | raw[lexeme['\'' >> *(char_ - '\'') >> 96 | *(standard::string("\'\'") >> *(char_ - '\'')) >> '\'']]; 97 | } 98 | 99 | qi::rule start; 100 | qi::rule node; 101 | qi::rule tip; 102 | qi::rule inode; 103 | qi::rule label; 104 | qi::rule unquoted; 105 | qi::rule quoted; 106 | }; 107 | 108 | template 109 | bool wood::parse(Iterator first, Iterator last) { 110 | using standard::space; 111 | newick_grammar newick_parser; 112 | bool r = qi::phrase_parse(first, last, newick_parser, space, _data); 113 | if(first != last || !r) return false; 114 | 115 | // The parser produces a tree that is tips first. 116 | // Transform to root first. 117 | std::reverse(_data.begin(), _data.end()); 118 | 119 | if(!autolabel()) return false; 120 | 121 | return true; 122 | } 123 | 124 | } // namespace dawg 125 | #endif // DAWG_WOOD_PARSE_H 126 | -------------------------------------------------------------------------------- /src/include/dawg/wood.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef DAWG_WOOD_H 3 | #define DAWG_WOOD_H 4 | /**************************************************************************** 5 | * Copyright (C) 2009 Reed A. Cartwright, PhD * 6 | ****************************************************************************/ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "dawg/log.h" 15 | 16 | namespace dawg { 17 | 18 | struct wood_node { 19 | typedef uint32_t id_t; 20 | std::string label; 21 | float length; 22 | id_t anc; 23 | id_t right; 24 | 25 | wood_node() : label(), length(), anc(), right() {} 26 | wood_node(id_t r) : label(), length(), anc(), right(r) {} 27 | wood_node(const std::string &lab, float len = 0.0f) 28 | : label(lab), length(len), anc(0), right(0) {} 29 | wood_node(const std::string &lab, const boost::optional &len) 30 | : label(lab), length(len.get_value_or(0.0f)), anc(0), right(0) {} 31 | inline bool unlabeled() const { return (label.empty() || label[0] == '('); } 32 | inline bool terminal() const { return (right == 0); } 33 | inline bool one_child() const { return (right == 1); } 34 | inline bool two_child() const { return (right > 1); } 35 | }; 36 | 37 | class wood { 38 | public: 39 | typedef wood_node node; 40 | typedef std::vector data_type; 41 | 42 | inline const data_type &data() const { return _data; } 43 | inline const std::string &root_label() const { return root_name; } 44 | inline bool has_desc(const std::string &d) const { 45 | return desc_names.count(d) != 0; 46 | } 47 | inline const std::set &desc_labels() const { 48 | return desc_names; 49 | } 50 | 51 | template 52 | bool parse(Iterator first, Iterator last); 53 | 54 | template 55 | bool parse(const char (&str)[N]) { 56 | return parse(&str[0], &str[N]); 57 | } 58 | bool parse(const std::string &str) { return parse(str.begin(), str.end()); } 59 | static bool parse_string(wood &w, const std::string &str); 60 | 61 | inline void scale(double d) { 62 | for(data_type::iterator it = _data.begin(); it != _data.end(); ++it) { 63 | it->length *= static_cast(d); 64 | } 65 | } 66 | 67 | bool autolabel() { 68 | if(_data.empty()) return true; 69 | desc_names.clear(); 70 | for(data_type::reverse_iterator it = _data.rbegin(); it != _data.rend(); 71 | ++it) { 72 | if(it->unlabeled() && !it->terminal()) { 73 | if(it->one_child()) { 74 | it->label = "{" + get_right(it)->label + "}"; 75 | } else { 76 | std::string &a = get_left(it)->label; 77 | std::string &b = get_right(it)->label; 78 | if(b > a) 79 | it->label = "{" + a + "," + b + "}"; 80 | else 81 | it->label = "{" + b + "," + a + "}"; 82 | } 83 | } 84 | // add label to descendents list 85 | if(!desc_names.insert(it->label).second) 86 | return DAWG_ERROR("invalid tree; node label '" 87 | << it->label 88 | << "' used more than once by Tree.Tree."); 89 | } 90 | 91 | // set root name and remove it from the desc set 92 | root_name = _data.front().label; 93 | desc_names.erase(root_name); 94 | return true; 95 | } 96 | 97 | template 98 | static Iterator get_left(Iterator me) { 99 | std::advance(me, 1); 100 | return me; 101 | } 102 | template 103 | static Iterator get_right(Iterator me) { 104 | std::advance(me, static_cast(me->right)); 105 | return me; 106 | } 107 | template 108 | static Iterator get_anc(Iterator me) { 109 | std::advance(me, -static_cast(me->anc)); 110 | return me; 111 | } 112 | template 113 | static std::reverse_iterator get_left( 114 | std::reverse_iterator me) { 115 | std::advance(me, -1); 116 | return me; 117 | } 118 | template 119 | static std::reverse_iterator get_right( 120 | std::reverse_iterator me) { 121 | std::advance(me, -static_cast(me->right)); 122 | return me; 123 | } 124 | template 125 | static std::reverse_iterator get_anc( 126 | std::reverse_iterator me) { 127 | std::advance(me, static_cast(me->anc)); 128 | return me; 129 | } 130 | 131 | protected: 132 | data_type _data; 133 | std::set desc_names; 134 | std::string root_name; 135 | }; 136 | 137 | } // namespace dawg 138 | 139 | #endif // DAWG_WOOD_H 140 | -------------------------------------------------------------------------------- /src/include/dawg/trick.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef DAWG_TRICK_H 3 | #define DAWG_TRICK_H 4 | /**************************************************************************** 5 | * Copyright (C) 2009 Reed A. Cartwright, PhD * 6 | ****************************************************************************/ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "dawg/log.h" 16 | 17 | namespace dawg { 18 | 19 | struct trick { 20 | struct section { 21 | typedef std::vector value_type; 22 | typedef std::map db_type; 23 | std::string name; 24 | std::string inherits; 25 | db_type db; 26 | 27 | template 28 | inline void get(const std::string& k, T& r) const; 29 | template 30 | inline void get(const std::string& k, std::vector& r) const; 31 | 32 | // inline void get(const std::string& k, trick::section& r) const; 33 | 34 | inline void read_aliases(); 35 | 36 | private: 37 | static inline void conv(const std::string& ss, std::string& r); 38 | static inline void conv(const std::string& ss, double& r); 39 | static inline void conv(const std::string& ss, bool& r); 40 | static inline void conv(const std::string& ss, unsigned int& r); 41 | static inline void conv(const std::string& ss, int& r); 42 | 43 | inline void read_alias(const std::string& a, const std::string& b); 44 | }; 45 | typedef std::vector
data_type; 46 | data_type data; 47 | 48 | static bool parse_file(trick& p, const char* cs); 49 | template 50 | bool parse(Iterator first, Iterator last); 51 | template 52 | inline bool parse_stream(std::basic_istream& is); 53 | 54 | trick() { 55 | data.push_back(section()); 56 | data.back().name = "_initial_"; 57 | data.back().inherits = "_default_"; 58 | } 59 | 60 | inline void read_aliases(); 61 | }; 62 | 63 | template 64 | inline void trick::section::get(const std::string& k, T& r) const { 65 | db_type::const_iterator it; 66 | if((it = db.find(k)) != db.end() && !it->second.empty()) { 67 | section::conv(it->second.front(), r); 68 | } 69 | } 70 | 71 | template 72 | inline void trick::section::get(const std::string& k, 73 | std::vector& r) const { 74 | db_type::const_iterator it; 75 | if((it = db.find(k)) != db.end()) { 76 | T x; 77 | r.clear(); 78 | for(const std::string& ss : it->second) { 79 | section::conv(ss, x); 80 | r.push_back(x); 81 | } 82 | } 83 | } 84 | 85 | template <> 86 | inline void trick::section::get(const std::string& k, trick::section& r) const { 87 | using boost::algorithm::erase_head_copy; 88 | using boost::algorithm::starts_with; 89 | r.name = k; 90 | r.inherits = "_nothing_"; 91 | r.db.clear(); 92 | db_type::const_iterator first = db.lower_bound(k); 93 | db_type::const_iterator last; 94 | for(last = first; last != db.end() && starts_with(last->first, k); ++last) { 95 | r.db.insert(r.db.end(), 96 | make_pair(erase_head_copy(last->first, 97 | static_cast(k.length())), 98 | last->second)); 99 | } 100 | } 101 | 102 | inline void trick::section::conv(const std::string& ss, std::string& r) { 103 | r = ss; 104 | } 105 | 106 | inline void trick::section::conv(const std::string& ss, double& r) { 107 | r = strtod(ss.c_str(), nullptr); 108 | } 109 | 110 | inline void trick::section::conv(const std::string& ss, unsigned int& r) { 111 | r = strtoul(ss.c_str(), nullptr, 0); 112 | } 113 | 114 | inline void trick::section::conv(const std::string& ss, int& r) { 115 | r = strtol(ss.c_str(), nullptr, 0); 116 | } 117 | 118 | // A value is false if it is equal to 0, f, false, off, no, or blank 119 | inline void trick::section::conv(const std::string& ss, bool& r) { 120 | using boost::algorithm::iequals; 121 | r = !(ss.empty() || iequals(ss, "false") || iequals(ss, "0") || 122 | iequals(ss, "f") || iequals(ss, "off") || iequals(ss, "no")); 123 | } 124 | 125 | inline void trick::section::read_alias(const std::string& a, 126 | const std::string& b) { 127 | db_type::const_iterator it; 128 | // if b exists or a doesn't, stop 129 | if(db.find(b) != db.end() || (it = db.find(a)) == db.end()) return; 130 | db[b] = it->second; 131 | } 132 | 133 | inline void trick::section::read_aliases() { 134 | #define XM(aname, bname) read_alias(XP(aname), XP(bname)); 135 | #include "dawg/details/aliases.xmh" 136 | #undef XM 137 | } 138 | 139 | inline void trick::read_aliases() { 140 | for(section& sec : data) { 141 | sec.read_aliases(); 142 | } 143 | } 144 | 145 | } // namespace dawg 146 | 147 | #endif // DAWG_TRICK_H 148 | -------------------------------------------------------------------------------- /src/include/dawg/rate.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef DAWG_RATE_H 3 | #define DAWG_RATE_H 4 | /**************************************************************************** 5 | * Copyright (C) 2009,2013 Reed A. Cartwright, PhD * 6 | ****************************************************************************/ 7 | 8 | // We will approximate the cont. gamma by a large disc. gamma. 9 | #define DAWG_GAMMA_CONT_SIZE 4095 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include "dawg/utils/aliastable.h" 16 | 17 | namespace dawg { 18 | 19 | class rate_model { 20 | public: 21 | typedef alias_table::category_type category_type; 22 | 23 | template 24 | bool create(const std::string& rname, It first, It last) { 25 | static const std::string name_keys[] = {std::string("const"), 26 | std::string("gamma-invariant"), 27 | std::string("zero")}; 28 | switch(key_switch(rname, name_keys)) { 29 | case 0: 30 | return create_const(first, last); 31 | case 1: 32 | return create_gamma(first, last); 33 | case 2: 34 | return create_zero(first, last); 35 | }; 36 | return DAWG_ERROR("Invalid rate model; no model named '" << rname 37 | << "'"); 38 | } 39 | 40 | template 41 | inline bool create_const(It first, It last) { 42 | name_ = "const"; 43 | weights_.assign(1, 1.0); 44 | sample_.create(weights_); 45 | values_.assign(1, 1.0f); 46 | return true; 47 | } 48 | 49 | template 50 | bool create_gamma(It first, It last) { 51 | if(first == last) 52 | return DAWG_ERROR( 53 | "Invalid rate model; gamma-invariant requires at least 1 " 54 | "parameter"); 55 | double alpha = *first++; 56 | if(alpha < 0.0) 57 | return DAWG_ERROR( 58 | "Invalid rate model; first gamma-invariant parameter '" 59 | << alpha << "' is not >= 0."); 60 | double iota = 0.0; 61 | if(first != last) { 62 | iota = *first++; 63 | if(iota < 0.0 || iota >= 1.0) 64 | return DAWG_ERROR( 65 | "Invalid rate model; second gamma-invariant parameter '" 66 | << iota << "' is not [0,1)."); 67 | } 68 | int sz = DAWG_GAMMA_CONT_SIZE; 69 | if(first != last) { 70 | sz = static_cast(*first++); 71 | // use an upper limit to catch user mistakes. 72 | if(sz < 1 || sz > 65535) 73 | return DAWG_ERROR( 74 | "Invalid rate model; third gamma-invariant parameter '" 75 | << sz << "' is not in [1,65535]."); 76 | } 77 | bool do_median = false; 78 | if(first != last) do_median = (*first++ != 0.0); 79 | 80 | // construct weights 81 | double gw = (1.0 - iota) / sz; 82 | weights_.assign(1 + sz, gw); 83 | weights_[0] = iota; 84 | sample_.create(weights_); 85 | 86 | // construct values 87 | values_.assign(1 + sz, 0.0f); 88 | 89 | if(do_median) { 90 | boost::math::gamma_distribution<> gamma_dist(alpha, 1.0 / alpha); 91 | for(int k = 0; k < sz; ++k) 92 | values_[k + 1] = 93 | boost::math::quantile(gamma_dist, (2 * k + 1) / (2.0 * sz)); 94 | } else { 95 | std::vector g(sz); 96 | boost::math::gamma_distribution<> gamma_dist(alpha, 1.0 / alpha); 97 | boost::math::gamma_distribution<> gamma_dist2(alpha + 1.0, 98 | 1.0 / alpha); 99 | 100 | for(int k = 0; k < sz - 1; ++k) 101 | g[k] = boost::math::quantile(gamma_dist, (k + 1.0) / sz); 102 | for(int k = 0; k < sz - 1; ++k) 103 | g[k] = boost::math::cdf(gamma_dist2, g[k]); 104 | values_[1] = g[0] * sz; 105 | for(int k = 1; k < sz - 1; ++k) 106 | values_[k + 1] = (g[k] - g[k - 1]) * sz; 107 | values_[sz] = (1.0 - g[sz - 2]) * sz; 108 | } 109 | 110 | // rescale values so that the expected value is exactly 1.0 111 | double d = 0.0; 112 | for(size_t k = 1; k < values_.size(); ++k) d += values_[k] * gw; 113 | for(size_t k = 1; k < values_.size(); ++k) values_[k] /= d; 114 | 115 | name_ = "gamma-invariant"; 116 | return true; 117 | } 118 | 119 | template 120 | inline bool create_zero(It first, It last) { 121 | name_ = "zero"; 122 | weights_.assign(1, 1.0f); 123 | sample_.create(weights_); 124 | values_.assign(1, 0.0f); 125 | return true; 126 | } 127 | 128 | inline const std::string& label() const { return name_; } 129 | 130 | category_type operator()(fragmites::random::Random& m) const { 131 | return sample_(m.u64()); 132 | } 133 | 134 | const std::vector& values() const { return values_; } 135 | 136 | private: 137 | alias_table sample_; 138 | std::vector weights_; 139 | std::vector values_; 140 | std::string name_; 141 | }; 142 | 143 | } /* namespace dawg */ 144 | 145 | #endif /* DAWG_RATE_H */ 146 | -------------------------------------------------------------------------------- /src/dawg.cpp: -------------------------------------------------------------------------------- 1 | /* Dawg - DNA Assembly with Gaps - Simulating Sequence Evolution 2 | Copyright (c) 2004-2012 Reed A. Cartwright, PhD 3 | 4 | This program is free software; you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation; either version 2 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program; if not, write to the Free Software 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 | */ 18 | #include "dawg.h" 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | #include "../version.h" 25 | #include "dawg/global.h" 26 | #include "dawg/ma.h" 27 | #include "dawg/matic.h" 28 | #include "dawg/output.h" 29 | #include "dawg/trick.h" 30 | #include "dawg_app.h" 31 | 32 | #define VERSION_MSG \ 33 | DAWG_PACKAGE_STRING \ 34 | "\n" \ 35 | " Copyright (C) 2004-2013 Reed A. Cartwright, PhD " \ 36 | "\n" 37 | 38 | int main(int argc, char *argv[]) { 39 | int ret = EXIT_FAILURE; 40 | try { 41 | dawg_app app(argc, argv); 42 | ret = app.run(); 43 | } catch(std::exception &e) { 44 | CERROR(e.what()); 45 | } 46 | return ret; 47 | } 48 | 49 | dawg_app::dawg_app(int argc, char *argv[]) : runname(argv[0]) { 50 | // set_cli_options 51 | this->cli_app.add_option("input", arg.input, "input files"); 52 | #define XM(lname, sname, desc, type, def) \ 53 | this->cli_app.add_option( \ 54 | IFD(sname, "-" BOOST_PP_STRINGIZE sname ",") "--" XS(lname), \ 55 | arg.XV(lname), desc, def); 56 | #define XF(lname, sname, desc, type, def) \ 57 | this->cli_app.add_flag( \ 58 | IFD(sname, "-" BOOST_PP_STRINGIZE sname ",") "--" XS(lname), \ 59 | arg.XV(lname), desc); 60 | #include "dawgarg.xmh" 61 | #undef XM 62 | #undef XF 63 | 64 | try { 65 | this->cli_app.parse(argc, argv); 66 | } catch(const CLI::CallForHelp &e) { 67 | exit(this->cli_app.exit(e)); 68 | } 69 | } 70 | 71 | int dawg_app::run() { 72 | // std::string _temp(" I love %r/%R/%%/%. Do you?"), _out; 73 | //_out = boost::algorithm::replace_all_regex_copy(_temp, 74 | // boost::regex("%(r)|%(R)|%(%)"), std::string("?1x:?2y:z"), match_default 75 | // | format_all); cout << _out << endl << endl; 76 | 77 | if(arg.version) { 78 | std::cerr << std::endl << VERSION_MSG << std::endl << std::endl; 79 | return EXIT_SUCCESS; 80 | } 81 | if(arg.help_trick) { 82 | std::cerr << std::endl << VERSION_MSG << std::endl << std::endl; 83 | dawg::ma::help(std::cerr); 84 | return EXIT_SUCCESS; 85 | } 86 | if(arg.input.empty()) { 87 | std::cerr << std::endl << VERSION_MSG << std::endl << std::endl; 88 | std::cerr << std::endl << this->cli_app.help() << std::endl; 89 | return EXIT_SUCCESS; 90 | } 91 | 92 | // if(arg.quiet) 93 | // cerr.clear(ios::failbit); 94 | dawg::trick input; 95 | 96 | bool ret = true; 97 | for(std::string &ss : arg.input) { 98 | ret &= dawg::trick::parse_file(input, ss.c_str()); 99 | } 100 | 101 | if(!ret) return EXIT_FAILURE; 102 | // process aliases 103 | input.read_aliases(); 104 | 105 | dawg::global_options glopts; 106 | glopts.read_section(input.data.front()); 107 | 108 | unsigned int num_reps = (arg.reps > 0) ? arg.reps : glopts.sim_reps; 109 | 110 | dawg::output write_aln; 111 | const char *file_name = 112 | arg.output.empty() ? glopts.output_file.c_str() : arg.output.c_str(); 113 | 114 | if(!write_aln.open(file_name, num_reps - 1, arg.split, arg.append, 115 | arg.label)) { 116 | DAWG_ERROR("bad configuration"); 117 | return EXIT_FAILURE; 118 | } 119 | write_aln.set_blocks( 120 | glopts.output_block_head.c_str(), glopts.output_block_between.c_str(), 121 | glopts.output_block_tail.c_str(), glopts.output_block_before.c_str(), 122 | glopts.output_block_after.c_str()); 123 | 124 | std::vector configs; 125 | if(!dawg::ma::from_trick(input, configs)) { 126 | DAWG_ERROR("bad configuration"); 127 | return EXIT_FAILURE; 128 | } 129 | 130 | // Create the object that will do all the simulation 131 | // work for us. Configure its sections. 132 | dawg::matic simulation; 133 | // if a seed was specified, use it 134 | if(arg.seed != 0) { 135 | simulation.seed(arg.seed); 136 | } else if(!glopts.sim_seed.empty()) { 137 | simulation.seed(glopts.sim_seed.begin(), glopts.sim_seed.end()); 138 | } else { 139 | simulation.auto_seed_seq(); 140 | } 141 | 142 | if(!simulation.configure(configs.begin(), configs.end())) { 143 | DAWG_ERROR("bad configuration"); 144 | return EXIT_FAILURE; 145 | } 146 | // create sets of aligned sequences; 147 | dawg::alignment aln; 148 | simulation.pre_walk(aln); 149 | for(unsigned int i = 0; i < num_reps; ++i) { 150 | simulation.walk(aln); 151 | write_aln(aln); 152 | } 153 | return EXIT_SUCCESS; 154 | } 155 | -------------------------------------------------------------------------------- /src/include/dawg/matic.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef DAWG_MATIC_H 3 | #define DAWG_MATIC_H 4 | /**************************************************************************** 5 | * Copyright (C) 2009 Reed A. Cartwright, PhD * 6 | ****************************************************************************/ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "dawg/indel.h" 16 | #include "dawg/ma.h" 17 | #include "dawg/rate.h" 18 | #include "dawg/residue.h" 19 | #include "dawg/root.h" 20 | #include "dawg/subst.h" 21 | #include "dawg/wood.h" 22 | 23 | namespace dawg { 24 | 25 | using random_t = fragmites::random::Random; 26 | 27 | namespace details { 28 | 29 | struct indel_data { 30 | typedef std::pair element; 31 | typedef std::stack stack; 32 | 33 | stack ins; 34 | stack del; 35 | 36 | inline void clear() { 37 | while(!ins.empty()) ins.pop(); 38 | while(!del.empty()) del.pop(); 39 | } 40 | }; 41 | 42 | class matic_section { 43 | public: 44 | typedef std::vector wood_meta_type; 45 | 46 | wood usertree; 47 | wood_meta_type metatree; 48 | double tree_scale; 49 | 50 | bool gap_overlap; 51 | 52 | subst_model sub_mod; 53 | rate_model rat_mod; 54 | root_model rut_mod; 55 | indel_model ins_mod; 56 | indel_model del_mod; 57 | 58 | residue::data_type gap_base; 59 | 60 | void evolve(sequence &child, indel_data &indels, double T, 61 | residue::data_type branch_color, sequence::const_iterator first, 62 | sequence::const_iterator last, random_t &m) const; 63 | void evolve_upstream(sequence &child, indel_data &indels, double T, 64 | residue::data_type branch_color, random_t &m) const; 65 | dawg::sequence::const_iterator evolve_indels( 66 | sequence &child, indel_data &indels, double T, 67 | residue::data_type branch_color, sequence::const_iterator first, 68 | sequence::const_iterator last, random_t &m) const; 69 | 70 | inline boost::uint32_t next_indel(double d, double &f, bool bDel) const { 71 | double ins_rate = ins_mod.rate(); 72 | double del_rate = del_mod.rate(); 73 | double indel_rate = ins_rate + del_rate; 74 | if(bDel) { 75 | if(d < ins_rate) { 76 | f = d; 77 | return 2; 78 | } 79 | d -= ins_rate; 80 | } 81 | f = modf(d / indel_rate, &d); 82 | f *= (indel_rate); 83 | boost::uint32_t x = 2 * static_cast(d); 84 | if(f < del_rate) return x + ((bDel) ? 3 : 1); 85 | f -= del_rate; 86 | return x + ((bDel) ? 4 : 2); 87 | } 88 | 89 | inline boost::uint32_t mark_del(boost::uint32_t u, sequence &child, 90 | sequence::const_iterator &first, 91 | sequence::const_iterator last) const { 92 | boost::uint32_t uu; 93 | for(uu = 0; uu != u && first != last; ++first) { 94 | child.push_back(*first); 95 | if(first->base() == gap_base) continue; 96 | child.back().base(gap_base); 97 | child.back().rate_cat(0); 98 | ++uu; 99 | } 100 | return uu; 101 | } 102 | }; 103 | 104 | struct sequence_data { 105 | sequence seq; 106 | dawg::details::indel_data indels; 107 | }; 108 | 109 | typedef std::map seq_map; 110 | 111 | } // namespace details 112 | 113 | template 114 | inline std::basic_ostream &operator<<( 115 | std::basic_ostream &o, const alignment &aln) { 116 | for(const alignment::value_type &v : aln) { 117 | o << v.label << "\t" << v.seq << std::endl; 118 | } 119 | return o; 120 | } 121 | 122 | // Core simulation algorithm class 123 | class matic { 124 | public: 125 | // Configure Simulation 126 | inline bool configure(const dawg::ma &ma) { 127 | clear_configuration(); 128 | return (add_config_section(ma) && finalize_configuration()); 129 | } 130 | inline void clear_configuration() { configs.clear(); } 131 | 132 | template 133 | inline bool configure(It first, It last) { 134 | clear_configuration(); 135 | for(; first != last; ++first) 136 | if(!add_config_section(*first)) 137 | return DAWG_ERROR("Configuration section '" 138 | << first->name << "' failed to process."); 139 | return finalize_configuration(); 140 | } 141 | 142 | // Run the simulation 143 | void walk(alignment &aln); 144 | 145 | // Precalculate stuff for simulation 146 | void pre_walk(alignment &aln); 147 | 148 | template 149 | void seed(_It first, _It last) { 150 | fragmites::random::seed_seq_t seed(first, last); 151 | maxx.Seed(seed); 152 | } 153 | template 154 | void seed(T t) { 155 | maxx.Seed(t); 156 | } 157 | 158 | void auto_seed_seq() { maxx.Seed(fragmites::random::auto_seed_seq()); } 159 | 160 | matic() : branch_color(0) {} 161 | 162 | protected: 163 | typedef dawg::details::matic_section section; 164 | struct segment : public std::vector> { 165 | residue_exchange rex; 166 | }; 167 | typedef std::vector segment_vector; 168 | 169 | typedef std::map 170 | label_to_index_type; 171 | typedef std::vector seq_buffers_type; 172 | 173 | seq_buffers_type seqs; 174 | 175 | segment_vector configs; 176 | // mutt maxx; 177 | random_t maxx; 178 | 179 | residue::data_type branch_color; 180 | 181 | label_to_index_type label_union; 182 | alignment::size_type aln_size; 183 | 184 | bool add_config_section(const dawg::ma &ma); 185 | bool finalize_configuration(); 186 | 187 | void align(alignment &aln, const seq_buffers_type &seqs, 188 | const residue_exchange &rex); 189 | }; 190 | 191 | } // namespace dawg 192 | #endif // DAWG_MATIC_H 193 | -------------------------------------------------------------------------------- /Modules/ExternalDep.cmake: -------------------------------------------------------------------------------- 1 | # This CMake module finds packages and, if not found, builds them 2 | include(ExternalProject) 3 | 4 | # Prelim settings 5 | SET(EXT_PREFIX ext_deps) 6 | 7 | IF(NOT "${CMAKE_VERSION}" VERSION_LESS 3.2) 8 | SET(use_byproducts true) 9 | ENDIF() 10 | 11 | SET(EXT_CFLAGS "${CMAKE_C_FLAGS}") 12 | SET(EXT_LDFLAGS "${CMAKE_STATIC_LINKER_FLAGS}") 13 | IF(CMAKE_BUILD_TYPE) 14 | STRING(TOUPPER "${CMAKE_BUILD_TYPE}" cmake_build_type_toupper) 15 | SET(EXT_CFLAGS "${EXT_CFLAGS} ${CMAKE_C_FLAGS_${cmake_build_type_toupper}}") 16 | SET(EXT_LDFLAGS "${EXT_LDFLAGS} ${CMAKE_STATIC_LINKER_FLAGS_${cmake_build_type_toupper}}") 17 | 18 | ## Turn off debugging in libraries 19 | IF(cmake_build_type_toupper STREQUAL "RELEASE" OR 20 | cmake_build_type_toupper STREQUAL "RELWITHDEBINFO") 21 | ADD_DEFINITIONS("-DBOOST_DISABLE_ASSERTS") 22 | SET(boost_variant variant=release) 23 | ELSE() 24 | SET(boost_variant variant=debug) 25 | ENDIF() 26 | ENDIF() 27 | 28 | IF(NOT BUILD_EXTERNAL_PROJECTS) 29 | SET(REQ REQUIRED) 30 | SET(QUI ) 31 | ELSE() 32 | SET(REQ ) 33 | SET(QUI QUIET) 34 | STRING(TOUPPER "${BUILD_EXTERNAL_PROJECTS}" build_external_projects_toupper) 35 | IF(build_external_projects_toupper STREQUAL "FORCE") 36 | SET(BUILD_EXTERNAL_PROJECTS_FORCED ON) 37 | ENDIF() 38 | ENDIF() 39 | SET(missing_ext_deps FALSE) 40 | 41 | IF(USE_STATIC_LIBS) 42 | SET(Boost_USE_STATIC_LIBS ON) 43 | SET(CMAKE_FIND_LIBRARY_SUFFIXES ".a") 44 | ENDIF(USE_STATIC_LIBS) 45 | 46 | ADD_CUSTOM_TARGET(ext_projects) 47 | 48 | ################################################################################ 49 | # THREADS 50 | # 51 | 52 | SET(THREADS_PREFER_PTHREAD_FLAG ON) 53 | FIND_PACKAGE(Threads) 54 | 55 | ################################################################################ 56 | 57 | ################################################################################ 58 | # BOOST 59 | # 60 | 61 | IF(NOT BUILD_EXTERNAL_PROJECTS_FORCED) 62 | #IF(DEVEL_MODE) 63 | # SET(boost_devel timer) 64 | #ENDIF() 65 | FIND_PACKAGE(Boost 1.47.0 ${REQ} COMPONENTS 66 | program_options 67 | unit_test_framework 68 | #${boost_devel} 69 | ) 70 | 71 | IF(Boost_FOUND) 72 | message(STATUS "Boost library: ${Boost_LIBRARY_DIRS}") 73 | message(STATUS "Boost headers: ${Boost_INCLUDE_DIRS}") 74 | ENDIF(Boost_FOUND) 75 | ENDIF() 76 | 77 | IF(BUILD_EXTERNAL_PROJECTS AND NOT Boost_FOUND) 78 | SET(boost_ext_libdir "${CMAKE_BINARY_DIR}/${EXT_PREFIX}/boost/lib") 79 | SET(Boost_FOUND TRUE) 80 | SET(Boost_VERSION 1.60.0) 81 | SET(Boost_INCLUDE_DIRS "${CMAKE_BINARY_DIR}/${EXT_PREFIX}/boost/include/") 82 | FILE(MAKE_DIRECTORY "${Boost_INCLUDE_DIRS}") 83 | SET(Boost_LIBRARY_DIRS "") 84 | SET(BOOST_EXT_TARGET ext_boost) 85 | SET(Boost_USE_STATIC_LIBS TRUE) 86 | 87 | SET(Boost_LIBRARIES) 88 | FOREACH(ext_boost_name PROGRAM_OPTIONS UNIT_TEST_FRAMEWORK) 89 | STRING(TOLOWER "${ext_boost_name}" ext_boost_lowname) 90 | SET(Boost_${ext_boost_name}_FOUND On) 91 | SET(Boost_${ext_boost_name}_LIBRARY "${boost_ext_libdir}/libboost_${ext_boost_lowname}.a") 92 | SET(Boost_LIBRARIES ${Boost_LIBRARIES} ${Boost_${ext_boost_name}_LIBRARY}) 93 | ENDFOREACH() 94 | 95 | IF(use_byproducts) 96 | SET(byproducts BUILD_BYPRODUCTS ${Boost_LIBRARIES}) 97 | ENDIF() 98 | 99 | IF(CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$") 100 | SET(EXT_BOOST_CXX_TOOLSET "clang") 101 | ELSEIF(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") 102 | SET(EXT_BOOST_CXX_TOOLSET "gcc") 103 | ELSEIF(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") 104 | SET(EXT_BOOST_CXX_TOOLSET "intel") 105 | ELSE() 106 | SET(EXT_BOOST_CXX_TOOLSET "cc") 107 | ENDIF() 108 | 109 | IF(NOT DEFINED EXT_BOOST_TOOLSET) 110 | SET(EXT_BOOST_TOOLSET "${EXT_BOOST_CXX_TOOLSET}-${CMAKE_CXX_COMPILER_VERSION}" CACHE STRING "Toolset to use when building ext_boost.") 111 | ENDIF() 112 | 113 | IF(EXT_BOOST_TOOLSET) 114 | SET(boost_toolset "toolset=${EXT_BOOST_TOOLSET}") 115 | ENDIF() 116 | 117 | SET(EXT_BOOST_BOOTSTRAP_TOOLSET "cc" CACHE STRING "Toolset to use when bootstrapping ext_boost.") 118 | 119 | CONFIGURE_FILE( 120 | "${CMAKE_SOURCE_DIR}/Modules/cmake_ext_boost_bootstrap.cmake.in" 121 | "${CMAKE_BINARY_DIR}/${EXT_PREFIX}/cmake_ext_boost_bootstrap.cmake" 122 | IMMEDIATE @ONLY 123 | ) 124 | 125 | SET(boost_bootstrap 126 | "${CMAKE_COMMAND}" -P "${CMAKE_BINARY_DIR}/${EXT_PREFIX}/cmake_ext_boost_bootstrap.cmake" 127 | ) 128 | 129 | MARK_AS_ADVANCED(EXT_BOOST_TOOLSET EXT_BOOST_BOOTSTRAP_TOOLSET) 130 | 131 | IF(cxx_std_flag) 132 | SET(boost_cxxflags "cxxflags=${cxx_std_flag}") 133 | ENDIF() 134 | 135 | SET(boost_build 136 | ./b2 install 137 | --prefix= 138 | --with-program_options 139 | --with-test 140 | --disable-icu 141 | --ignore-site-config 142 | threading=multi 143 | link=static 144 | runtime-link=shared 145 | optimization=speed 146 | ${boost_toolset} 147 | ${boost_cxxflags} 148 | ${boost_variant} 149 | ) 150 | 151 | string(REPLACE "." "_" boost_file_version "${Boost_VERSION}") 152 | 153 | ExternalProject_add(ext_boost 154 | URL http://downloads.sourceforge.net/project/boost/boost/${Boost_VERSION}/boost_${boost_file_version}.tar.bz2 155 | URL_MD5 65a840e1a0b13a558ff19eeb2c4f0cbe 156 | PREFIX "${EXT_PREFIX}/boost" 157 | BUILD_IN_SOURCE TRUE 158 | CONFIGURE_COMMAND ${boost_bootstrap} 159 | BUILD_COMMAND ${boost_build} 160 | INSTALL_COMMAND "" 161 | ${byproducts} 162 | ) 163 | ADD_DEPENDENCIES(ext_projects ext_boost) 164 | 165 | MESSAGE(STATUS "Building Boost ${Boost_VERSION} as external dependency") 166 | ENDIF() 167 | 168 | IF(Boost_FOUND) 169 | FOREACH(ext_boost_name PROGRAM_OPTIONS UNIT_TEST_FRAMEWORK) 170 | if(Boost_${ext_boost_name}_FOUND AND NOT TARGET Boost::${ext_boost_name}) 171 | add_library(Boost::${ext_boost_name} UNKNOWN IMPORTED) 172 | set_target_properties(Boost::${ext_boost_name} PROPERTIES 173 | INTERFACE_INCLUDE_DIRECTORIES "${Boost_INCLUDE_DIRS}" 174 | IMPORTED_LOCATION "${Boost_${ext_boost_name}_LIBRARY}" 175 | ) 176 | if(BOOST_EXT_TARGET) 177 | ADD_DEPENDENCIES(Boost::${ext_boost_name} ext_boost) 178 | endif() 179 | endif() 180 | ENDFOREACH() 181 | ADD_DEFINITIONS(-DBOOST_ALL_NO_LIB -DBOOST_PROGRAM_OPTIONS_NO_LIB) 182 | INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS}) 183 | LINK_DIRECTORIES(${Boost_LIBRARY_DIRS}) 184 | IF(NOT Boost_USE_STATIC_LIBS) 185 | ADD_DEFINITIONS(-DBOOST_DYN_LINK -DBOOST_PROGRAM_OPTIONS_DYN_LINK -DBOOST_TEST_DYN_LINK) 186 | ENDIF(NOT Boost_USE_STATIC_LIBS) 187 | ELSE() 188 | SET(missing_ext_deps TRUE) 189 | ENDIF(Boost_FOUND) -------------------------------------------------------------------------------- /src/include/dawg/details/subst_dna.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef DAWG_SUBST_DNA_H 3 | #define DAWG_SUBST_DNA_H 4 | /**************************************************************************** 5 | * Copyright (C) 2009-2012 Reed A. Cartwright, PhD * 6 | ****************************************************************************/ 7 | 8 | namespace dawg { 9 | 10 | // name, followed by params, then freqs 11 | template 12 | bool subst_model::create_gtr(const char *mod_name, unsigned int code, It1 first1, It1 last1, It2 first2, It2 last2) { 13 | double d = 0.0; 14 | int u = 0; 15 | 16 | type_ = residue_exchange::DNA; 17 | code_ = code; 18 | 19 | // do freqs first 20 | if(!create_freqs(mod_name, first2, last2, &freqs[0], &freqs[4])) 21 | return false; 22 | for(int i=4;i<64;++i) 23 | freqs[i] = 0.0; 24 | 25 | // fill params array 26 | double params[6]; 27 | u = 0; 28 | for(;first1 != last1 && u<6;++first1,++u) { 29 | if(*first1 < 0) 30 | return DAWG_ERROR("Invalid subst model; gtr parameter #" << u 31 | << " '" << *first1 << "' is not >= 0."); 32 | params[u] = *first1; 33 | } 34 | if(u != 6) 35 | return DAWG_ERROR("Invalid subst model; gtr requires six parameters."); 36 | 37 | // construct substitution matrix 38 | double rs[4]; 39 | table[0][0] = table[1][1] = table[2][2] = table[3][3] = 0.0; 40 | table[0][1] = table[1][0] = params[0]; // A-C 41 | table[0][2] = table[2][0] = params[1]; // A-G 42 | table[0][3] = table[3][0] = params[2]; // A-T 43 | table[1][2] = table[2][1] = params[3]; // C-G 44 | table[1][3] = table[3][1] = params[4]; // C-T 45 | table[2][3] = table[3][2] = params[5]; // G-T 46 | // scale the matrix to substitution time and uniformize 47 | d = 0.0; 48 | uni_scale = 0.0; 49 | for(int i=0;i<4;++i) { 50 | for(int j=0;j<4;++j) { 51 | table[i][j] *= freqs[j]; 52 | d += table[i][j]*freqs[i]; 53 | } 54 | } 55 | for(int i=0;i<4;++i) { 56 | rs[i] = 0.0; 57 | for(int j=0;j<4;++j) { 58 | table[i][j] /= d; 59 | rs[i] += table[i][j]; 60 | } 61 | uni_scale = std::max(uni_scale, rs[i]); 62 | } 63 | // create pseudosubstitutions and transition frequencies 64 | for(int i=0;i<4;++i) 65 | table[i][i] = uni_scale - rs[i]; 66 | for(int i=0;i<4;++i) { 67 | for(int j=0;j<4;++j) 68 | table[i][j] /= uni_scale; 69 | } 70 | // fill in the rest of the table matrix 71 | for(int i=0;i<4;++i) 72 | for(int j=4;j<64;++j) 73 | table[i][j] = 0.0; 74 | for(int i=4;i<64;++i) 75 | for(int j=0;j<64;++j) 76 | table[i][j] = 1.0/64.0; 77 | 78 | if(!create_alias_tables()) 79 | return DAWG_ERROR("unable to create alias tables"); 80 | name = mod_name; 81 | return true; 82 | } 83 | 84 | template 85 | bool subst_model::create_jc(const char *, unsigned int code, It1 first1, It1 last1, It2 first2, It2 last2) { 86 | // equal rates and frequencies 87 | static const double ones[6] = {1.0,1.0,1.0,1.0,1.0,1.0}; 88 | return create_gtr("jc", code, &ones[0], &ones[6], &ones[0], &ones[4]); 89 | } 90 | template 91 | bool subst_model::create_f81(const char *, unsigned int code, It1 first1, It1 last1, It2 first2, It2 last2) { 92 | // equal rates and frequencies 93 | static const double ones[6] = {1.0,1.0,1.0,1.0,1.0,1.0}; 94 | return create_gtr("f81", code, &ones[0], &ones[6], first2, last2); 95 | } 96 | template 97 | bool subst_model::create_k2p(const char *, unsigned int code, It1 first1, It1 last1, It2 first2, It2 last2) { 98 | // equal rates and frequencies 99 | static const double ones[4] = {1.0,1.0,1.0,1.0}; 100 | double p[6], a, b=0.5; // this default for b means that a=r if b is not specified 101 | if(first1 == last1) 102 | return DAWG_ERROR("Invalid subst model; k2p requires one or two parameters."); 103 | a = *first1++; 104 | if(first1 != last1) 105 | b = *first1; 106 | p[1] = p[4] = a; 107 | p[0] = p[2] = p[3] = p[5] = b; 108 | return create_gtr("k2p", code, &p[0], &p[6], &ones[0], &ones[4]); 109 | } 110 | template 111 | bool subst_model::create_tn(const char *mod_name, unsigned int code, It1 first1, It1 last1, It2 first2, It2 last2) { 112 | double p[6], f[4], fr, fy, ay, ar, b; 113 | // read frequencies 114 | if(!create_freqs(mod_name, first2, last2, &f[0], &f[4])) 115 | return false; 116 | fr = f[0]+f[2]; 117 | fy = f[1]+f[3]; 118 | if(first1 == last1) 119 | return DAWG_ERROR("Invalid subst model; " << mod_name << " requires two or three parameters."); 120 | ay = *first1++; 121 | if(first1 == last1) 122 | return DAWG_ERROR("Invalid subst model; " << mod_name << " requires two or three parameters."); 123 | ar = *first1++; 124 | if(first1 == last1) { 125 | // two parameters 126 | double R = ay, rho=ar; 127 | ay = (fr*fy*R-f[0]*f[2]-f[1]*f[3])/ 128 | (2.0*(1.0+R)*(fy*f[0]*f[2]*rho+fr*f[1]*f[3])); 129 | ar = rho*ay; 130 | b = 0.5/(fr*fy*(1.0+R)); 131 | ar = ar/fr+b; 132 | ay = ay/fy+b; 133 | 134 | } else { 135 | // three parameters 136 | b = *first1; 137 | } 138 | p[1] = ar; 139 | p[4] = ay; 140 | p[0] = p[2] = p[3] = p[5] = b; 141 | return create_gtr(mod_name, code, &p[0], &p[6], &f[0], &f[4]); 142 | } 143 | 144 | template 145 | bool subst_model::create_tn_f04(const char *mod_name, unsigned int code, It1 first1, It1 last1, It2 first2, It2 last2) { 146 | double p[6], f[4], fr, fy, ay, ar, b; 147 | // read frequencies 148 | if(!create_freqs(mod_name, first2, last2, &f[0], &f[4])) 149 | return false; 150 | fr = f[0]+f[2]; 151 | fy = f[1]+f[3]; 152 | if(first1 == last1) 153 | return DAWG_ERROR("Invalid subst model; " << mod_name << " requires two or three parameters."); 154 | ay = *first1++; 155 | if(first1 == last1) 156 | return DAWG_ERROR("Invalid subst model; " << mod_name << " requires two or three parameters."); 157 | ar = *first1++; 158 | if(first1 == last1) { 159 | // two parameters 160 | double R = ay, rho=ar; 161 | ay = (fr*fy*R-f[0]*f[2]-f[1]*f[3])/ 162 | (2.0*(1.0+R)*(fy*f[0]*f[2]*rho+fr*f[1]*f[3])); 163 | ar = rho*ay; 164 | b = 0.5/(fr*fy*(1.0+R)); 165 | } else { 166 | // three parameters 167 | b = *first1; 168 | } 169 | p[1] = ar/fr+b; 170 | p[4] = ay/fy+b; 171 | p[0] = p[2] = p[3] = p[5] = b; 172 | return create_gtr(mod_name, code, &p[0], &p[6], &f[0], &f[4]); 173 | } 174 | 175 | template 176 | bool subst_model::create_f84(const char *, unsigned int code, It1 first1, It1 last1, It2 first2, It2 last2) { 177 | double p[3]; 178 | if(first1 == last1) 179 | return DAWG_ERROR("Invalid subst model; f84 requires one or two parameters."); 180 | double a = *first1++; 181 | if(first1 == last1) { 182 | p[0] = a; 183 | p[1] = 1.0; 184 | return create_tn_f04("f84", code, &p[0], &p[2], first2, last2); 185 | } 186 | double b = *first1; 187 | p[0] = p[1] = a; 188 | p[2] = b; 189 | return create_tn_f04("f84", code, &p[0], &p[3], first2, last2); 190 | } 191 | 192 | template 193 | bool subst_model::create_hky(const char *, unsigned int code, It1 first1, It1 last1, It2 first2, It2 last2) { 194 | double p[3]; 195 | if(first1 == last1) 196 | return DAWG_ERROR("Invalid subst model; hky requires one or two parameters."); 197 | double a = *first1++; 198 | if(first1 == last1) { 199 | p[0] = a; 200 | p[1] = 1.0; 201 | return create_tn("hky", code, &p[0], &p[2], first2, last2); 202 | } 203 | double b = *first1; 204 | p[0] = p[1] = a; 205 | p[2] = b; 206 | return create_tn("hky", code, &p[0], &p[3], first2, last2); 207 | } 208 | 209 | }; 210 | 211 | #endif // DAWG_SUBST_DNA_H 212 | 213 | -------------------------------------------------------------------------------- /Modules/FindGperftools.cmake: -------------------------------------------------------------------------------- 1 | # File Copied from https://raw.githubusercontent.com/m-a-d-n-e-s-s/madness/master/cmake/modules/FindGperftools.cmake 2 | # 3 | # - Try to find Google performance tools (gperftools) 4 | # Input variables: 5 | # GPERFTOOLS_ROOT_DIR - The gperftools install directory 6 | # GPERFTOOLS_INCLUDE_DIR - The gperftools include directory 7 | # GPERFTOOLS_LIBRARY - The gperftools library directory 8 | # Components: profiler, and tcmalloc or tcmalloc_minimal 9 | # Output variables: 10 | # GPERFTOOLS_FOUND - System has gperftools 11 | # GPERFTOOLS_INCLUDE_DIRS - The gperftools include directories 12 | # GPERFTOOLS_LIBRARIES - The libraries needed to use gperftools 13 | # GPERFTOOLS_VERSION - The version string for gperftools 14 | 15 | include(FindPackageHandleStandardArgs) 16 | 17 | if(NOT DEFINED GPERFTOOLS_FOUND) 18 | 19 | # Check to see if libunwind is required 20 | set(GPERFTOOLS_DISABLE_PROFILER FALSE) 21 | if((";${Gperftools_FIND_COMPONENTS};" MATCHES ";profiler;") AND 22 | (CMAKE_SYSTEM_NAME MATCHES "Linux" OR 23 | CMAKE_SYSTEM_NAME MATCHES "BlueGeneQ" OR 24 | CMAKE_SYSTEM_NAME MATCHES "BlueGeneP") AND 25 | (CMAKE_SIZEOF_VOID_P EQUAL 8)) 26 | 27 | # Libunwind is required by profiler on this platform 28 | if(Gperftools_FIND_REQUIRED_profiler OR Gperftools_FIND_REQUIRED_tcmalloc_and_profiler) 29 | find_package(Libunwind 0.99 REQUIRED) 30 | else() 31 | find_package(Libunwind) 32 | if(NOT LIBUNWIND_FOUND OR LIBUNWIND_VERSION VERSION_LESS 0.99) 33 | set(GPERFTOOLS_DISABLE_PROFILER TRUE) 34 | endif() 35 | endif() 36 | endif() 37 | 38 | # Check for invalid components 39 | foreach(_comp ${Gperftools_FIND_COMPONENTS}) 40 | if((NOT _comp STREQUAL "tcmalloc_and_profiler") AND 41 | (NOT _comp STREQUAL "tcmalloc") AND 42 | (NOT _comp STREQUAL "tcmalloc_minimal") AND 43 | (NOT _comp STREQUAL "profiler")) 44 | message(FATAL_ERROR "Invalid component specified for Gperftools: ${_comp}") 45 | endif() 46 | endforeach() 47 | 48 | # Check for valid component combinations 49 | if(";${Gperftools_FIND_COMPONENTS};" MATCHES ";tcmalloc_and_profiler;" AND 50 | (";${Gperftools_FIND_COMPONENTS};" MATCHES ";tcmalloc;" OR 51 | ";${Gperftools_FIND_COMPONENTS};" MATCHES ";tcmalloc_minimal;" OR 52 | ";${Gperftools_FIND_COMPONENTS};" MATCHES ";profiler;")) 53 | message("ERROR: Invalid component selection for Gperftools: ${Gperftools_FIND_COMPONENTS}") 54 | message("ERROR: Gperftools cannot link both tcmalloc_and_profiler with the tcmalloc, tcmalloc_minimal, or profiler libraries") 55 | message(FATAL_ERROR "Gperftools component list is invalid") 56 | endif() 57 | if(";${Gperftools_FIND_COMPONENTS};" MATCHES ";tcmalloc;" AND ";${Gperftools_FIND_COMPONENTS};" MATCHES ";tcmalloc_minimal;") 58 | message("ERROR: Invalid component selection for Gperftools: ${Gperftools_FIND_COMPONENTS}") 59 | message("ERROR: Gperftools cannot link both tcmalloc and tcmalloc_minimal") 60 | message(FATAL_ERROR "Gperftools component list is invalid") 61 | endif() 62 | 63 | # Set default sarch paths for gperftools 64 | if(GPERFTOOLS_ROOT_DIR) 65 | set(GPERFTOOLS_INCLUDE_DIR ${GPERFTOOLS_ROOT_DIR}/include CACHE PATH "The include directory for gperftools") 66 | if(CMAKE_SIZEOF_VOID_P EQUAL 8 AND CMAKE_SYSTEM_NAME STREQUAL "Linux") 67 | set(GPERFTOOLS_LIBRARY ${GPERFTOOLS_ROOT_DIR}/lib64;${GPERFTOOLS_ROOT_DIR}/lib CACHE PATH "The library directory for gperftools") 68 | else() 69 | set(GPERFTOOLS_LIBRARY ${GPERFTOOLS_ROOT_DIR}/lib CACHE PATH "The library directory for gperftools") 70 | endif() 71 | endif() 72 | 73 | find_path(GPERFTOOLS_INCLUDE_DIRS NAMES gperftools/malloc_extension.h 74 | HINTS ${GPERFTOOLS_INCLUDE_DIR}) 75 | 76 | # Search for component libraries 77 | foreach(_comp tcmalloc_and_profiler tcmalloc tcmalloc_minimal profiler) 78 | find_library(GPERFTOOLS_${_comp}_LIBRARY ${_comp} 79 | HINTS ${GPERFTOOLS_LIBRARY}) 80 | if(GPERFTOOLS_${_comp}_LIBRARY) 81 | set(GPERFTOOLS_${_comp}_FOUND TRUE) 82 | else() 83 | set(GPERFTOOLS_${_comp}_FOUND FALSE) 84 | endif() 85 | 86 | # Exclude profiler from the found list if libunwind is required but not found 87 | if(GPERFTOOLS_${_comp}_FOUND AND ${_comp} MATCHES "profiler" AND GPERFTOOLS_DISABLE_PROFILER) 88 | set(GPERFTOOLS_${_comp}_FOUND FALSE) 89 | set(GPERFTOOLS_${_comp}_LIBRARY "GPERFTOOLS_${_comp}_LIBRARY-NOTFOUND") 90 | message("WARNING: Gperftools '${_comp}' requires libunwind 0.99 or later.") 91 | message("WARNING: Gperftools '${_comp}' will be disabled.") 92 | endif() 93 | 94 | if(";${Gperftools_FIND_COMPONENTS};" MATCHES ";${_comp};" AND GPERFTOOLS_${_comp}_FOUND) 95 | list(APPEND GPERFTOOLS_LIBRARIES "${GPERFTOOLS_${_comp}_LIBRARY}") 96 | endif() 97 | endforeach() 98 | 99 | # Set gperftools libraries if not set based on component list 100 | if(NOT GPERFTOOLS_LIBRARIES) 101 | if(GPERFTOOLS_tcmalloc_and_profiler_FOUND) 102 | set(GPERFTOOLS_LIBRARIES "${GPERFTOOLS_tcmalloc_and_profiler_LIBRARY}") 103 | elseif(GPERFTOOLS_tcmalloc_FOUND AND GPERFTOOLS_profiler_FOUND) 104 | set(GPERFTOOLS_LIBRARIES "${GPERFTOOLS_tcmalloc_LIBRARY}" "${GPERFTOOLS_profiler_LIBRARY}") 105 | elseif(GPERFTOOLS_profiler_FOUND) 106 | set(GPERFTOOLS_LIBRARIES "${GPERFTOOLS_profiler_LIBRARY}") 107 | elseif(GPERFTOOLS_tcmalloc_FOUND) 108 | set(GPERFTOOLS_LIBRARIES "${GPERFTOOLS_tcmalloc_LIBRARY}") 109 | elseif(GPERFTOOLS_tcmalloc_minimal_FOUND) 110 | set(GPERFTOOLS_LIBRARIES "${GPERFTOOLS_tcmalloc_minimal_LIBRARY}") 111 | endif() 112 | endif() 113 | 114 | # handle the QUIETLY and REQUIRED arguments and set GPERFTOOLS_FOUND to TRUE 115 | # if all listed variables are TRUE 116 | find_package_handle_standard_args(GPERFTOOLS 117 | FOUND_VAR GPERFTOOLS_FOUND 118 | REQUIRED_VARS GPERFTOOLS_LIBRARIES GPERFTOOLS_INCLUDE_DIRS 119 | HANDLE_COMPONENTS) 120 | 121 | mark_as_advanced(GPERFTOOLS_INCLUDE_DIR GPERFTOOLS_LIBRARY 122 | GPERFTOOLS_INCLUDE_DIRS GPERFTOOLS_LIBRARIES) 123 | 124 | # Add linker flags that instruct the compiler to exclude built in memory 125 | # allocation functions. This works for GNU, Intel, and Clang. Other compilers 126 | # may need to be added in the future. 127 | if(GPERFTOOLS_LIBRARIES MATCHES "tcmalloc") 128 | if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR 129 | (CMAKE_CXX_COMPILER_ID MATCHES "AppleClang") OR 130 | (CMAKE_CXX_COMPILER_ID MATCHES "Clang") OR 131 | ((CMAKE_CXX_COMPILER_ID MATCHES "Intel") AND (NOT CMAKE_CXX_PLATFORM_ID MATCHES "Windows"))) 132 | list(APPEND GPERFTOOLS_LIBRARIES "-fno-builtin-malloc" 133 | "-fno-builtin-calloc" "-fno-builtin-realloc" "-fno-builtin-free") 134 | endif() 135 | endif() 136 | 137 | # Add libunwind flags to gperftools if the profiler is being used 138 | if(GPERFTOOLS_LIBRARIES MATCHES "profiler" AND LIBUNWIND_FOUND) 139 | #list(APPEND GPERFTOOLS_INCLUDE_DIRS "${LIBUNWIND_INCLUDE_DIRS}") 140 | #list(APPEND GPERFTOOLS_LIBRARIES "${LIBUNWIND_LIBRARIES}") 141 | endif() 142 | 143 | unset(GPERFTOOLS_DISABLE_PROFILER) 144 | 145 | endif() 146 | 147 | if(GPERFTOOLS_FOUND) 148 | if(NOT TARGET GPERFTOOLS::GPERFTOOLS) 149 | add_library(GPERFTOOLS::GPERFTOOLS UNKNOWN IMPORTED) 150 | set_target_properties(GPERFTOOLS::GPERFTOOLS PROPERTIES 151 | INTERFACE_INCLUDE_DIRECTORIES "${GPERFTOOLS_INCLUDE_DIRS}" 152 | IMPORTED_LOCATION "${GPERFTOOLS_LIBRARIES}" ) 153 | if(GPERFTOOLS_LIBRARIES MATCHES "profiler" AND LIBUNWIND_FOUND) 154 | set_target_properties(GPERFTOOLS::GPERFTOOLS PROPERTIES 155 | INTERFACE_LINK_LIBRARIES LIBUNWIND::LIBUNWIND) 156 | add_dependencies(GPERFTOOLS::GPERFTOOLS LIBUNWIND::LIBUNWIND) 157 | endif() 158 | endif() 159 | endif() -------------------------------------------------------------------------------- /src/include/dawg/trick_parse.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef DAWG_TRICK_PARSE_H 3 | #define DAWG_TRICK_PARSE_H 4 | /**************************************************************************** 5 | * Copyright (C) 2009-2010 Reed A. Cartwright, PhD * 6 | ****************************************************************************/ 7 | 8 | #define BOOST_SPIRIT_USE_PHOENIX_V3 1 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | #include "dawg/trick.h" 15 | #if SPIRIT_VERSION < 0x2020 16 | #error Spirit version 2.2 or greater required. 17 | #endif 18 | 19 | #include 20 | #include 21 | 22 | #ifdef _MSC_VER 23 | #pragma warning(push) 24 | #pragma warning(disable : 4127) 25 | #endif 26 | 27 | #include 28 | 29 | #ifdef _MSC_VER 30 | #pragma warning(pop) 31 | #endif 32 | 33 | #include 34 | #include 35 | 36 | namespace dawg { 37 | 38 | namespace qi = boost::spirit::qi; 39 | namespace standard = boost::spirit::standard; 40 | namespace phoenix = boost::phoenix; 41 | 42 | namespace details { 43 | using line_type = std::pair >; 44 | typedef std::vector subsection_body_type; 45 | typedef std::pair subsection_type; 46 | typedef std::vector section_body_type; 47 | typedef std::pair section_header_type; 48 | typedef std::pair section_type; 49 | typedef std::vector trick_raw_type; 50 | } // namespace details 51 | 52 | template 53 | struct white_space : qi::grammar { 54 | white_space() : white_space::base_type(start) { 55 | using qi::eoi; 56 | using qi::eol; 57 | using standard::char_; 58 | using standard::space; 59 | start = space | ('#' >> *(char_ - eol)); 60 | } 61 | qi::rule start; 62 | }; 63 | 64 | template 65 | struct trick_grammar 66 | : qi::grammar { 67 | trick_grammar() : trick_grammar::base_type(start) { 68 | using qi::eol; 69 | using qi::lexeme; 70 | using qi::raw; 71 | using standard::alnum; 72 | using standard::char_; 73 | using standard::graph; 74 | using standard::print; 75 | using standard::space; 76 | 77 | start = *section; 78 | section = section_header || section_body; 79 | section_header = "[[" >> id >> -('=' >> id) >> "]]"; 80 | section_body = +subsection; 81 | subsection = subsection_header || subsection_body; 82 | subsection_header = '[' >> -id >> ']'; 83 | subsection_body = +line; 84 | line = id >> '=' >> (trick_string % ','); 85 | id = lexeme[+(alnum | char_("._-"))]; 86 | trick_string = 87 | qqquoted_string | quoted_string | tree_string | bare_string; 88 | bare_string = lexeme[+(graph - char_(",#\"[]=()"))]; 89 | tree_string = lexeme[char_("(") >> +(char_ - ';') >> char_(";")]; 90 | quoted_string = lexeme['"' >> *(print - '"') >> '"']; 91 | qqquoted_string = lexeme["\"\"\"" >> *(char_ - "\"\"\"") >> "\"\"\""]; 92 | } 93 | 94 | qi::rule start; 95 | qi::rule section; 96 | qi::rule 97 | section_header; 98 | qi::rule section_body; 99 | qi::rule subsection; 100 | qi::rule 101 | subsection_body; 102 | qi::rule line; 103 | qi::rule subsection_header; 104 | qi::rule id; 105 | qi::rule trick_string; 106 | qi::rule bare_string; 107 | qi::rule tree_string; 108 | qi::rule quoted_string; 109 | qi::rule qqquoted_string; 110 | }; 111 | 112 | template 113 | bool trick::parse(Iterator first, Iterator last) { 114 | using boost::algorithm::starts_with; 115 | using boost::algorithm::to_lower; 116 | details::trick_raw_type pyle; 117 | white_space wasp; 118 | trick_grammar > pyler; 119 | bool r = qi::phrase_parse(first, last, pyler, wasp, pyle); 120 | if(first != last || !r) return DAWG_ERROR("parsing failed."); 121 | std::string header("_initial_"), subheader(""); 122 | int autonum = 1; // TODO: What happens with multiple trick files? 123 | section *psec = &data.front(); 124 | for(details::trick_raw_type::const_iterator it = pyle.begin(); 125 | it != pyle.end(); ++it) { 126 | const details::section_type &sec = *it; 127 | // if section header is not blank, we have a new section 128 | if(!sec.first.first.empty()) { 129 | data.push_back(section()); 130 | psec = &data.back(); 131 | // if section parent is blank, inherit from previous 132 | psec->inherits = 133 | sec.first.second.empty() ? header : sec.first.second; 134 | // set new header and reset subheader 135 | if(sec.first.first != "-") { 136 | header = sec.first.first; 137 | } else { 138 | using boost::spirit::karma::generate; 139 | using boost::spirit::karma::int_; 140 | char x[16 + 12] = "Unnamed Section "; 141 | char *p = x + 16; 142 | generate(p, int_, autonum++); 143 | *p = 0; 144 | header = x; 145 | } 146 | psec->name = header; 147 | subheader.clear(); 148 | } 149 | const details::section_body_type &body = sec.second; 150 | for(details::section_body_type::const_iterator ssit = body.begin(); 151 | ssit != body.end(); ++ssit) { 152 | const std::string &h = ssit->first; 153 | if(h.empty()) // if h is empty, clear subheader 154 | subheader.clear(); 155 | else if(starts_with(h, "..")) { // if h is '..', strip off tail 156 | std::string hh = h; 157 | do { // repeat as needed 158 | size_t pos = subheader.rfind('.'); 159 | subheader.erase((pos == std::string::npos) ? 0 : pos); 160 | hh.erase(0, 2); 161 | } while(starts_with(hh, "..")); 162 | if(!subheader.empty() && !hh.empty()) subheader.append(1, '.'); 163 | subheader.append(hh); 164 | } else if(starts_with(h, 165 | ".")) { // if h begins with a single period, 166 | // append it to existing subheader 167 | if(!subheader.empty() && h.size() > 1) 168 | subheader.append(1, '.').append(h.begin() + 1, h.end()); 169 | else 170 | subheader.append(h); 171 | } else { // set subheader 172 | subheader = h; 173 | } 174 | // cycle through lines 175 | for(details::subsection_body_type::const_iterator lit = 176 | ssit->second.begin(); 177 | lit != ssit->second.end(); ++lit) { 178 | std::string hh(subheader); 179 | if(!hh.empty()) hh.append(1, '.'); 180 | hh.append(lit->first); 181 | to_lower(hh); 182 | psec->db[hh] = lit->second; 183 | } 184 | } 185 | } 186 | return true; 187 | } 188 | 189 | template 190 | inline bool trick::parse_stream(std::basic_istream &is) { 191 | is.unsetf(std::ios::skipws); 192 | boost::spirit::basic_istream_iterator first(is), last; 193 | return parse(first, last); 194 | } 195 | 196 | } // namespace dawg 197 | 198 | #endif // DAWG_TRICK_PARSE_H 199 | --------------------------------------------------------------------------------