├── .circleci
    └── config.yml
├── .gitignore
├── .travis.yml
├── AUTHORS
├── COPYING
├── ChangeLog
├── INSTALL
├── LICENSE
├── Makefile.am
├── Makefile.in
├── README.md
├── REVISION_HISTORY.md
├── Sequence
    ├── AlignStream.hpp
    ├── Alignment.hpp
    ├── AlleleCountMatrix.hpp
    ├── Clustalw.hpp
    ├── Coalescent
    │   ├── Coalesce.hpp
    │   ├── Coalescent.hpp
    │   ├── DemographicModels.hpp
    │   ├── FragmentsRescaling.hpp
    │   ├── Initialize.hpp
    │   ├── Makefile.am
    │   ├── Makefile.in
    │   ├── Mutation.hpp
    │   ├── NeutralSample.hpp
    │   ├── Recombination.hpp
    │   ├── SimTypes.hpp
    │   ├── Trajectories.hpp
    │   ├── TreeOperations.hpp
    │   └── bits
    │   │   ├── Coalesce.tcc
    │   │   ├── DemographicModels.tcc
    │   │   ├── Makefile.am
    │   │   ├── Makefile.in
    │   │   ├── Mutation.tcc
    │   │   ├── Recombination.tcc
    │   │   └── Trajectories.tcc
    ├── CodonTable.hpp
    ├── Comeron95.hpp
    ├── Comparisons.hpp
    ├── ComplementBase.hpp
    ├── CountingOperators.hpp
    ├── FST.hpp
    ├── Fasta.hpp
    ├── Grantham.hpp
    ├── GranthamWeights.hpp
    ├── HKA.hpp
    ├── Hudson2001.hpp
    ├── Kimura80.hpp
    ├── Makefile.am
    ├── Makefile.in
    ├── NonOwningCapsules.hpp
    ├── PathwayHelper.hpp
    ├── PolyFunctional.hpp
    ├── PolySIM.hpp
    ├── PolySNP.hpp
    ├── PolySNPimpl.hpp
    ├── PolySites.hpp
    ├── PolyTable.hpp
    ├── PolyTableFunctions.hpp
    ├── PolyTableSlice.hpp
    ├── Recombination.hpp
    ├── RedundancyCom95.hpp
    ├── Seq.hpp
    ├── SeqAlphabets.hpp
    ├── SeqConstants.hpp
    ├── SeqEnums.hpp
    ├── SeqFunctors.hpp
    ├── SeqProperties.hpp
    ├── SeqRegexes.hpp
    ├── SeqUtilities.hpp
    ├── SimData.hpp
    ├── SimParams.hpp
    ├── SimpleSNP.hpp
    ├── SingleSub.hpp
    ├── Sites.hpp
    ├── StateCounts.hpp
    ├── SummStatsDeprecated.hpp
    ├── SummStatsDeprecated
    │   ├── Garud.hpp
    │   ├── Makefile.am
    │   ├── Makefile.in
    │   ├── Snn.hpp
    │   ├── lHaf.hpp
    │   └── nSL.hpp
    ├── ThreeSubs.hpp
    ├── Translate.hpp
    ├── Translate2.hpp
    ├── TwoSubs.hpp
    ├── Unweighted.hpp
    ├── VariantMatrix.hpp
    ├── VariantMatrixCapsule.hpp
    ├── VariantMatrixViews.hpp
    ├── VectorCapsules.hpp
    ├── WeightingSchemes.hpp
    ├── bamreader.hpp
    ├── bamrecord.hpp
    ├── bits
    │   ├── AlignStream.tcc
    │   ├── Alignment.tcc
    │   ├── Clustalw.tcc
    │   ├── CountingOperators.tcc
    │   ├── Makefile.am
    │   ├── Makefile.in
    │   ├── PolySites.tcc
    │   ├── PolyTable.tcc
    │   ├── PolyTableFunctions.tcc
    │   ├── PolyTableSlice.tcc
    │   ├── Snn.tcc
    │   ├── col_view_iterator.hpp
    │   ├── descriptiveStats.tcc
    │   ├── phylipData.tcc
    │   └── variant_matrix_views_internal.hpp
    ├── descriptiveStats.hpp
    ├── fastq.hpp
    ├── phylipData.hpp
    ├── polySiteVector.hpp
    ├── samflag.hpp
    ├── samfunctions.hpp
    ├── samrecord.hpp
    ├── shortestPath.hpp
    ├── stateCounter.hpp
    ├── summstats.hpp
    ├── summstats
    │   ├── Makefile.am
    │   ├── Makefile.in
    │   ├── algorithm.hpp
    │   ├── allele_counts.hpp
    │   ├── auxillary.hpp
    │   ├── classics.hpp
    │   ├── garud.hpp
    │   ├── generic.hpp
    │   ├── ld.hpp
    │   ├── lhaf.hpp
    │   ├── nSLiHS.hpp
    │   ├── nsl.hpp
    │   ├── nslx.hpp
    │   ├── nvariablesites.hpp
    │   ├── thetah.hpp
    │   ├── thetal.hpp
    │   ├── thetapi.hpp
    │   ├── thetaw.hpp
    │   └── util.hpp
    ├── typedefs.hpp
    └── variant_matrix
    │   ├── Makefile.am
    │   ├── Makefile.in
    │   ├── filtering.hpp
    │   ├── msformat.hpp
    │   └── windows.hpp
├── aclocal.m4
├── compile
├── config-h.in.in
├── config.guess
├── config.h.in
├── config.sub
├── configure
├── configure.ac
├── depcomp
├── doc
    ├── Makefile
    ├── images
    │   ├── 2subs
    │   ├── 2subs.jpg
    │   ├── 2subs.pdf
    │   ├── 3subs
    │   ├── 3subs.jpg
    │   └── 3subs.pdf
    ├── libsequence.bib
    ├── libsequence.doxygen.in
    └── md
    │   └── tutorial.md
├── examples
    ├── Makefile.am
    ├── Makefile.in
    ├── Makefile.old
    ├── PolyTableIterators.cc
    ├── baseComp.cc
    ├── codons.cc
    ├── correlations.cc
    ├── int_handler.cc
    ├── int_handler.hpp
    ├── mean_nSLx.cc
    ├── ms_to_VariantMatrix.cc
    ├── msstats.cc
    ├── nSL_from_ms.cc
    ├── nSL_vs_nSLx.cc
    ├── polySiteVector_test.cc
    ├── slidingWindow.cc
    ├── slidingWindow2.cc
    ├── test_SimDataIO.cc
    ├── translateTest.cc
    ├── ufs.cc
    └── valid_dna.cc
├── index.md
├── init_autotools.sh
├── install-sh
├── ltmain.sh
├── m4
    ├── ax_cxx_compile_stdxx_11.m4
    ├── libtool.m4
    ├── ltoptions.m4
    ├── ltsugar.m4
    ├── ltversion.m4
    └── lt~obsolete.m4
├── missing
├── pandoc.css
├── src
    ├── ChangeLog
    ├── Coalescent
    │   ├── CoalescentCoalesce.cc
    │   ├── CoalescentFragmentsRescaling.cc
    │   ├── CoalescentInitialize.cc
    │   ├── CoalescentMutation.cc
    │   ├── CoalescentRecombination.cc
    │   ├── CoalescentSimTypes.cc
    │   └── CoalescentTreeOperations.cc
    ├── CodonTable.cc
    ├── Comeron95.cc
    ├── Comparisons.cc
    ├── ComplementBase.cc
    ├── Grantham.cc
    ├── GranthamWeights.cc
    ├── Kimura80.cc
    ├── Makefile.am
    ├── Makefile.in
    ├── PathwayHelper.cc
    ├── PolySites.cc
    ├── PolyTable.cc
    ├── PolyTableFunctions.cc
    ├── PolyTableManip.cc
    ├── RedundancyCom95.cc
    ├── Seq
    │   ├── Fasta.cc
    │   ├── Seq.cc
    │   └── fastq.cc
    ├── SeqAlphabets.cc
    ├── SeqConstants.cc
    ├── SimData.cc
    ├── SimParams.cc
    ├── SimpleSNP.cc
    ├── SingleSub.cc
    ├── Sites.cc
    ├── Specializations.cc
    ├── ThreeSubs.cc
    ├── Translate.cc
    ├── TwoSubs.cc
    ├── Unweighted.cc
    ├── hts
    │   ├── bamreader.cc
    │   ├── bamrecord.cc
    │   ├── samflag.cc
    │   ├── samfunctions.cc
    │   └── samrecord.cc
    ├── libsequenceConfig.cc
    ├── polySiteVector.cc
    ├── shortestPath.cc
    ├── stateCounter.cc
    ├── summstats
    │   ├── algorithm.hpp
    │   ├── allele_counts.cc
    │   ├── auxillary.cc
    │   ├── faywuh.cc
    │   ├── garud.cc
    │   ├── generic.cc
    │   ├── haplotype_statistics.cc
    │   ├── hprime.cc
    │   ├── hprime_faywuh_aggregator.hpp
    │   ├── ld.cc
    │   ├── lhaf.cc
    │   ├── nsl.cc
    │   ├── nsl_common.hpp
    │   ├── nslx.cc
    │   ├── nvariablesites.cc
    │   ├── rmin.cc
    │   ├── tajd.cc
    │   ├── thetah_thetal.cc
    │   ├── thetapi.cc
    │   └── thetaw.cc
    ├── summstats_deprecated
    │   ├── FST.cc
    │   ├── Garud.cc
    │   ├── HKA.cc
    │   ├── PolySIM.cc
    │   ├── PolySNP.cc
    │   ├── Recombination.cc
    │   ├── Snn.cc
    │   ├── SummStats.cc
    │   ├── lHaf.cc
    │   └── nSL.cc
    └── variant_matrix
    │   ├── AlleleCountMatrix.cc
    │   ├── StateCounts.cc
    │   ├── VariantMatrix.cc
    │   ├── VariantMatrixViews.cc
    │   ├── capsule.cc
    │   ├── filtering.cc
    │   ├── nonowningcapsules.cc
    │   └── windows.cc
├── test-driver
└── test
    ├── AlignStreamTest.cc
    ├── AlignmentTest.cc
    ├── ComparisonsTest.cc
    ├── CountingOperators.cc
    ├── FastaConstructors.cc
    ├── FastaExplicitIO.cc
    ├── FastaIO.cc
    ├── FastaOperations.cc
    ├── Makefile.am
    ├── Makefile.in
    ├── PolySIMtest.cc
    ├── PolySNPtest.cc
    ├── PolySitesIO.cc
    ├── PolyTableBadBehavior.cc
    ├── PolyTableConversions.cc
    ├── PolyTableSliceTest.cc
    ├── PolyTableTweaking.cc
    ├── README.md
    ├── RedundancyCom95test.cc
    ├── Seq8test.cc
    ├── SeqConversions.cc
    ├── SimpleSNPIO.cc
    ├── VariantMatrixFixture.hpp
    ├── VariantMatrixTest.cc
    ├── alphabets.cc
    ├── data
        ├── CG15644-Z.aln
        ├── README.md
        ├── data.fastq
        ├── phylip_input.txt
        └── single_ms.txt
    ├── fastqConstructors.cc
    ├── fastqIO.cc
    ├── libseq_unit_tests.cc
    ├── msformatdata.cc
    ├── msformatdata.hpp
    ├── msprime_data_fixture.hpp
    ├── polySiteVectorTest.cc
    ├── runTests.sh
    ├── stateCounterTest.cc
    ├── testAlleleCountMatrix.cc
    ├── testClassicSummstats.cc
    ├── testClassicSummstatsEmptyVariantMatrix.cc
    ├── testGarudStatistics.cc
    ├── testLD.cc
    └── testVariantMatrixWindows.cc


/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | jobs:
 3 |   build:
 4 |     docker:
 5 |       - image: circleci/python:3.6-stretch
 6 |     working_directory: /home/circleci/libsequence
 7 |     steps:
 8 |       - checkout
 9 |       - run: sudo chown -R circleci:circleci *
10 |       - restore_cache:
11 |           key: libsequence-{{ .Branch }}
12 |       - run:
13 |           name: Checkout submodules
14 |           command: |
15 |             git submodule update --init --recursive 
16 |             # Write out the status for debugging purposes. Are we checked out at tags?
17 |             git submodule status --recursive
18 |       - run:
19 |           name: Install dependencies and set path
20 |           command: |
21 |             sudo apt-get update
22 |             sudo apt-get install libboost-*dev
23 |             # way to set path persistently https://circleci.com/docs/2.0/env-vars/#setting-path
24 |             echo 'export PATH=/home/circleci/.local/bin:$PATH' >> $BASH_ENV
25 |       - save_cache:
26 |           key: libsequence-{{ .Branch }}
27 |           paths:
28 |             - "/home/circleci/.local"
29 | 
30 |       - run:
31 |           name: Compile package
32 |           command: |
33 |               ./configure
34 |               make
35 | 
36 |       - run:
37 |           name: Run test suite
38 |           command: |
39 |               make check
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.o
 2 | *.lo
 3 | *~
 4 | Makefile
 5 | *.deps
 6 | *.libs
 7 | *.cache
 8 | config.h
 9 | *.log
10 | *.status
11 | libtool
12 | *.dirstamp
13 | *.la
14 | *.trs
15 | *.doxygen
16 | stamp-h1
17 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | sudo: false
 2 | language: cpp
 3 | 
 4 | matrix:
 5 |     include:
 6 |     - os: linux
 7 |       dist: trusty
 8 |       env:
 9 |         - MATRIX_EVAL="CC=gcc-5 && CXX=g++-5"
10 |       addons:
11 |         apt:
12 |           sources:
13 |             - ubuntu-toolchain-r-test
14 |           packages:
15 |           - g++-5
16 |           - gcc-5
17 |           - zlib1g
18 |           - zlib1g-dev
19 |           - libboost-dev
20 |           - libboost-system-dev
21 |           - libboost-test-dev
22 |           - libtbb-dev
23 | 
24 |     - os: linux
25 |       dist: trusty
26 |       env:
27 |         - MATRIX_EVAL="CC=gcc-4.8 && CXX=g++-4.8"
28 |       addons:
29 |         apt:
30 |           sources:
31 |             - ubuntu-toolchain-r-test
32 |           packages:
33 |             - g++-4.8
34 |             - gcc-4.8
35 |             - zlib1g
36 |             - zlib1g-dev
37 |             - libboost-dev
38 |             - libboost-system-dev
39 |             - libboost-test-dev
40 |             - libtbb-dev
41 | 
42 | 
43 |     - os: linux
44 |       dist: trusty
45 |       addons:
46 |         apt:
47 |           sources:
48 |             - ubuntu-toolchain-r-test
49 |           packages:
50 |             - g++-6
51 |             - zlib1g
52 |             - zlib1g-dev
53 |             - libboost-dev
54 |             - libboost-system-dev
55 |             - libboost-test-dev
56 |             - libtbb-dev
57 |       env:
58 |         - MATRIX_EVAL="CC=gcc-6 && CXX=g++-6"
59 | 
60 |     - os: linux
61 |       dist: trusty
62 |       addons:
63 |         apt:
64 |           sources:
65 |             - ubuntu-toolchain-r-test
66 |           packages:
67 |             - g++-7
68 |             - zlib1g
69 |             - zlib1g-dev
70 |             - libboost-dev
71 |             - libboost-system-dev
72 |             - libboost-test-dev
73 |             - libtbb-dev
74 |       env:
75 |         - MATRIX_EVAL="CC=gcc-7 && CXX=g++-7"
76 | 
77 | before_install:
78 |     - eval "${MATRIX_EVAL}"
79 | 
80 | notifications:
81 |   email: false
82 | 
83 | script:
84 |  - export LD_LIBRARY_PATH=$HOME/lib
85 |  - ./configure --prefix=$HOME && make  && make install
86 |  - make check
87 |  - $HOME/bin/libsequenceConfig --version
88 | 


--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
1 | Kevin Thornton <kt234@cornell.edu>
2 | 


--------------------------------------------------------------------------------
/Makefile.am:
--------------------------------------------------------------------------------
 1 | AUTOMAKE_OPTIONS = foreign
 2 | ACLOCAL_AMFLAGS = -I m4
 3 | LIBTOOL_DEPS = @LIBTOOL_DEPS@	
 4 | SUBDIRS=src Sequence test examples
 5 | includedir=$(oldincludedir) -ISequence
 6 | EXTRA_DIST=doc/libsequence.doxygen doc/Makefile doc/images/* examples/Makefile examples/*.cc examples/*.hpp README.md config.h.in
 7 | 
 8 | libtool: $(LIBTOOL_DEPS)
 9 | 	$(SHELL) ./config.status libtool
10 | 


--------------------------------------------------------------------------------
/Sequence/AlleleCountMatrix.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef SEQUENCE_ALLELE_COUNT_MATRIX_HPP
 2 | #define SEQUENCE_ALLELE_COUNT_MATRIX_HPP
 3 | 
 4 | #include <cstdint>
 5 | #include <vector>
 6 | #include <utility>
 7 | #include <stdexcept>
 8 | #include <Sequence/VariantMatrix.hpp>
 9 | 
10 | namespace Sequence
11 | {
12 |     class AlleleCountMatrix
13 |     /// \brief Matrix representation of allele counts in a VariantMatrix
14 |     /// To be constructed
15 |     {
16 |       private:
17 |         static std::vector<std::int32_t> init_counts(const VariantMatrix& m);
18 | 
19 |       public:
20 |         const std::vector<std::int32_t> counts;
21 |         using value_type = std::vector<std::int32_t>::value_type;
22 |         const std::size_t ncol;
23 |         const std::size_t nrow;
24 |         const std::size_t nsam;
25 |         explicit AlleleCountMatrix(const VariantMatrix& m);
26 | 
27 |         /// This constructor is for advanced use only,
28 |         /// such as constructing from a slice of a
29 |         /// pre-existing AlleleCountMatrix.
30 |         template <typename T>
31 |         AlleleCountMatrix(T&& t, const std::size_t nc_, const std::size_t nr_,
32 |                           const std::size_t n_)
33 |             : counts(std::forward<T>(t)), ncol{ nc_ }, nrow{ nr_ }, nsam{ n_ }
34 |         {
35 |             if (ncol * nrow != counts.size())
36 |                 {
37 |                     throw std::invalid_argument(
38 |                         "incorrect dimensions for AlleleCountMatrix");
39 |                 }
40 |         }
41 |         std::pair<std::vector<std::int32_t>::const_iterator,
42 |                   std::vector<std::int32_t>::const_iterator>
43 |         row(const std::size_t) const;
44 |     };
45 | } // namespace Sequence
46 | 
47 | #endif
48 | 


--------------------------------------------------------------------------------
/Sequence/Coalescent/Coalesce.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef __SEQUENCE_COALESCENT_COALESCE_HPP__
 2 | #define __SEQUENCE_COALESCENT_COALESCE_HPP__
 3 | 
 4 | #include <Sequence/Coalescent/SimTypes.hpp>
 5 | #include <utility>
 6 | #include <functional>
 7 | 
 8 | namespace Sequence
 9 | {
10 |   namespace coalsim {
11 |     template<typename uniform_generator>
12 |     std::pair<int,int> pick2_in_deme( uniform_generator & uni, 
13 | 				      const std::vector<Sequence::coalsim::chromosome> & sample,
14 | 				      const int & ttl_nsam,
15 | 				      const int & deme_nsam,
16 | 				      const int & deme );
17 | 
18 |     template<typename uniform_generator>
19 |     std::pair<int,int> pick2_in_deme( const uniform_generator & uni, 
20 | 				      const std::vector<Sequence::coalsim::chromosome> & sample,
21 | 				      const int & ttl_nsam,
22 | 				      const int & deme_nsam,
23 | 				      const int & deme );
24 | 
25 |     template<typename uniform_generator>
26 |     std::pair<int,int> pick2( uniform_generator & uni, const int & nsam);
27 | 
28 |     template<typename uniform_generator>
29 |     std::pair<int,int> pick2( const uniform_generator & uni, const int & nsam);
30 | 
31 |     bool isseg( chromosome::const_iterator seg, const unsigned & nsegs,
32 | 		const int & pos, unsigned * offset );
33 | 
34 |     int coalesce(const double & time,
35 | 		 const int & ttl_nsam,
36 | 		 const int & current_nsam,
37 | 		 const int & c1,
38 | 		 const int & c2,
39 | 		 const int & nsites,
40 | 		 int * nlinks,
41 | 		 std::vector<chromosome> * sample,
42 | 		 arg * sample_history);
43 |   }
44 | }
45 | #endif
46 | #include <Sequence/Coalescent/bits/Coalesce.tcc>
47 | 


--------------------------------------------------------------------------------
/Sequence/Coalescent/Coalescent.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef __SEQUENCE_COALESCENT_COALECENT_HPP__
 2 | #define __SEQUENCE_COALESCENT_COALECENT_HPP__
 3 | 
 4 | /*!
 5 |   \defgroup coalescent Classes and functions related to simulating data under coalescent models
 6 |   \ingroup popgen
 7 | */
 8 | /*! \file Coalescent.hpp
 9 |   @brief A lazy header to include the headers needed to start writing simulations.
10 |   Includes:
11 |    <Sequence/Coalescent/SimTypes.hpp>
12 |    <Sequence/Coalescent/Coalesce.hpp>
13 |    <Sequence/Coalescent/Recombination.hpp>
14 |    <Sequence/Coalescent/Mutation.hpp>
15 |    <Sequence/Coalescent/Initialize.hpp>
16 |    <Sequence/Coalescent/DemographicModels.hpp>
17 |    <Sequence/Coalescent/FragmentsRescaling.hpp>
18 | */
19 | /*! \example freerec.cc
20 |   Coalescent simulation with free recombination
21 | */
22 | /*! \example msmm.cc
23 |   Coalescent simulation
24 | */
25 | /*! \example bottleneck.cc
26 |   Example of using the  Sequence::bottleneck template function
27 | */
28 | /*! \example fragments.cc
29 |   Example of simulating partially linked fragments in neutral models.
30 | */
31 | 
32 | /*!
33 |   \namespace Sequence::coalsim @brief Routines for coalescent simulation
34 |  */
35 | 
36 | #include <Sequence/Coalescent/SimTypes.hpp>
37 | #include <Sequence/Coalescent/Coalesce.hpp>
38 | #include <Sequence/Coalescent/Recombination.hpp>
39 | #include <Sequence/Coalescent/Mutation.hpp>
40 | #include <Sequence/Coalescent/Initialize.hpp>
41 | #include <Sequence/Coalescent/DemographicModels.hpp>
42 | #include <Sequence/Coalescent/FragmentsRescaling.hpp>
43 | #include <Sequence/Coalescent/Trajectories.hpp>
44 | #endif
45 | 


--------------------------------------------------------------------------------
/Sequence/Coalescent/FragmentsRescaling.hpp:
--------------------------------------------------------------------------------
 1 | /*! \file FragmentsRescaling.hpp
 2 |   \brief Helper functions for simulating partially linked fragments
 3 |   One often wants to simulate partially linked fragments under neutral models.
 4 |   An efficient way to do this is to simulate a contiguous fragment, but
 5 |   with the recombination rate varying along the region (to represent
 6 |   the variable genetic distances between fragments).  This header file
 7 |   declares functions that make this task easier, particularly the 
 8 |   operations of rescaling the positions of mutations/marginal trees
 9 |   from the genetic map back to the physical map
10 | */
11 | 
12 | #ifndef __SEQUENCE_COALESCENT_FRAGMENTS_RESCALING_HPP__
13 | #define __SEQUENCE_COALESCENT_FRAGMENTS_RESCALING_HPP__
14 | 
15 | #include <Sequence/Coalescent/SimTypes.hpp>
16 | #include <vector>
17 | #include <utility>
18 | 
19 | namespace Sequence
20 | {
21 |   class SimData; //fwd declaration
22 |   namespace coalsim {
23 |     int sample_length( const std::vector< std::pair<int,int> > & fragments );
24 |     int total_length( const std::vector< std::pair<int,int> > & fragments );
25 |     void calculate_scales(const std::vector< std::pair<int,int> > & fragments,
26 | 			  std::vector< std::pair<double,double> > * sample_scale,
27 | 			  std::vector< std::pair<double,double> > * mutation_scale );
28 |     void rescale_mutation_positions(Sequence::SimData * d,
29 | 				    const std::vector< std::pair<double,double> > & sample_scale, 
30 | 				    const std::vector< std::pair<double,double> > & mutation_scale )__attribute__((deprecated));
31 |     void rescale_arg( arg * sample_history,
32 | 		    const std::vector< std::pair<int,int> > & fragments );
33 |     double integrate_genetic_map( const std::vector<chromosome> & sample,
34 | 				  const int & current_nsam,
35 | 				  const std::vector<double> & genetic_map,
36 | 				  std::vector<double> * reclens);
37 |   }
38 | }
39 | #endif
40 | 


--------------------------------------------------------------------------------
/Sequence/Coalescent/Initialize.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef __SEQUENCE_COALESCENT_INIT_ARG_FUNCTIONS_HPP__
 2 | #define __SEQUENCE_COALESCENT_INIT_ARG_FUNCTIONS_HPP__
 3 | 
 4 | #include <Sequence/Coalescent/SimTypes.hpp>
 5 | #include <vector>
 6 | 
 7 | namespace Sequence
 8 | {
 9 |   namespace coalsim {
10 |     std::vector<chromosome> init_sample( const std::vector<int> & pop_config,
11 | 				       const int & nsites );
12 |     marginal init_marginal( const int & nsam );
13 |   }
14 | }
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/Sequence/Coalescent/Makefile.am:
--------------------------------------------------------------------------------
 1 | SUBDIRS = bits
 2 | 
 3 | pkgincludedir=$(prefix)/include/Sequence/Coalescent
 4 | 
 5 | pkginclude_HEADERS = Coalesce.hpp\
 6 | 	Initialize.hpp\
 7 | 	Mutation.hpp\
 8 | 	NeutralSample.hpp\
 9 | 	Recombination.hpp\
10 | 	SimTypes.hpp\
11 | 	TreeOperations.hpp\
12 | 	Coalescent.hpp\
13 | 	DemographicModels.hpp\
14 | 	FragmentsRescaling.hpp\
15 | 	Trajectories.hpp


--------------------------------------------------------------------------------
/Sequence/Coalescent/Recombination.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef __SEQUENCE_COALESCENT_RECOMBINATION_HPP__
 2 | #define __SEQUENCE_COALESCENT_RECOMBINATION_HPP__
 3 | 
 4 | #include <Sequence/Coalescent/SimTypes.hpp>
 5 | namespace Sequence
 6 | {
 7 |   namespace coalsim {
 8 |     int crossover( const int & current_nsam,
 9 | 		   const int & chromo,
10 | 		   const int & pos,
11 | 		   std::vector<chromosome> * sample,
12 | 		   arg * sample_history);
13 | 
14 |     std::pair<int,int> pick_uniform_spot(const double & random_01,
15 | 					 const int & nlinks,
16 | 					 std::vector<chromosome>::const_iterator sample_begin,
17 | 					 const unsigned & current_nsam);
18 | 
19 |     template<typename uniform01_generator>
20 |     std::pair<int,int> pick_spot( uniform01_generator & uni01,
21 | 				  const double & total_reclen,
22 | 				  const std::vector<double> & reclens,
23 | 				  std::vector<chromosome>::const_iterator sample_begin,
24 | 				  const unsigned & current_nsam,
25 | 				  const double * rec_map);
26 | 
27 |     template<typename uniform01_generator>
28 |     std::pair<int,int> pick_spot( const uniform01_generator & uni01,
29 | 				  const double & total_reclen,
30 | 				  const std::vector<double> & reclens,
31 | 				  std::vector<chromosome>::const_iterator sample_begin,
32 | 				  const unsigned & current_nsam,
33 | 				  const double * rec_map);
34 |   }
35 | }
36 | #endif
37 | #include <Sequence/Coalescent/bits/Recombination.tcc>
38 | 


--------------------------------------------------------------------------------
/Sequence/Coalescent/TreeOperations.hpp:
--------------------------------------------------------------------------------
 1 | /*! \file TreeOperations.hpp
 2 |   Things you may want to do with marginal trees in a coalescent simulations
 3 | */
 4 | #ifndef __SEQUENCE_COALESCENT_TREE_OPERATIONS_HPP__
 5 | #define __SEQUENCE_COALESCENT_TREE_OPERATIONS_HPP__
 6 | 
 7 | #include <Sequence/Coalescent/SimTypes.hpp>
 8 | #include <vector>
 9 | #include <memory>
10 | namespace Sequence
11 | {
12 |   namespace coalsim {
13 |     double total_time( const marginal::const_iterator beg,
14 | 		       const int & nsam );
15 |   
16 |     int pick_branch( marginal::const_iterator beg,
17 | 		     const int & nsam,
18 | 		     const double & rtime);
19 |   
20 |     std::vector<int> get_all_descendants (marginal::const_iterator beg,
21 | 					  const int & nsam,
22 | 					  const int & branch);
23 | 
24 |     bool is_descendant( marginal::const_iterator beg,
25 | 			const int & ind,
26 | 			const int & branch );
27 | 
28 |     double total_time_on_arg( const Sequence::coalsim::arg & sample_history,
29 | 			      const int & total_number_of_sites );
30 | 
31 |     void minimize_arg( arg * sample_history );
32 | 
33 |     class sfs_times_impl;
34 |     class sfs_times
35 |     {
36 |     private:
37 |       std::unique_ptr<sfs_times_impl> impl;
38 |     public:
39 |       sfs_times();
40 |       sfs_times(arg::const_iterator sample_history_beg,
41 | 		const arg::size_type & nsegs,
42 | 		const int & total_nsites_simulated,
43 | 		bool folded = false);
44 |       sfs_times(const sfs_times &);
45 |       ~sfs_times();
46 |     
47 |       double operator[]( std::vector<double>::size_type const & ) const;
48 |       sfs_times & operator=(const sfs_times &);
49 |       bool operator==(const sfs_times & rhs) const;
50 |       double ttime() const;
51 |       size_t size() const;
52 |       typedef std::vector<double>::const_iterator const_iterator;
53 |       const_iterator begin() const;
54 |       const_iterator end() const;
55 |     };
56 |   }
57 | }
58 | #endif
59 | 


--------------------------------------------------------------------------------
/Sequence/Coalescent/bits/Makefile.am:
--------------------------------------------------------------------------------
1 | pkgincludedir=$(prefix)/include/Sequence/Coalescent/bits
2 | 
3 | pkginclude_HEADERS = Mutation.tcc \
4 | 	DemographicModels.tcc \
5 | 	Recombination.tcc \
6 | 	Coalesce.tcc \
7 | 	Trajectories.tcc


--------------------------------------------------------------------------------
/Sequence/CodonTable.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | #ifndef _CODON_TABLE_H_
25 | #define _CODON_TABLE_H_
26 | #include <Sequence/typedefs.hpp>
27 | 
28 | /*! \file CodonTable.hpp
29 |   \deprecated
30 |   @brief facility to count codons in CDS sequence, function Sequence::makeCodonUsageTable
31 | */
32 | 
33 | namespace Sequence
34 | {
35 |   /*!
36 |     \c #include \c <Sequence/CodonTable.hpp>
37 |     A codon usage table is a list of the codons and
38 |     the number of times each codon occurs in a sequence.
39 |     This is represented by the type Sequence::CodonUsageTable,
40 |     which is a vector < pair<string,int >.
41 |     To output a codon table:
42 |     \code
43 |     Sequence::Fasta sequence;
44 |     cin >> sequence;
45 |     CodonUsageTable x = makeCodonUsageTable(&sequence);
46 |     for(unsigned i = 0 ; i < x.size ; ++i)
47 |     {
48 |     cout << x[i].first << '\t' << x[i].second << '\n';
49 |     }
50 |     \endcode
51 |   */
52 |   class Seq;
53 |   CodonUsageTable makeCodonUsageTable(const Seq* sequence);
54 |   CodonUsageTable makeCodonUsageTable(const std::string &sequence);
55 |   CodonUsageTable makeCodonUsageTable(std::string::const_iterator beg,
56 | 				      std::string::const_iterator end);
57 | 
58 | }
59 | #endif
60 | 


--------------------------------------------------------------------------------
/Sequence/Comparisons.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | #ifndef COMPARISONS_H
25 | #define COMPARISONS_H
26 | /*! \file Comparisons.hpp
27 |   @brief delcaration of routines for comparing DNA sequences
28 |   This file declares a set of functions useful for comparing two bits
29 |   of sequence data--sequences, nucleotides, etc.
30 |  
31 |   @short Routines to compare bases, sequences, etc. 
32 |   Declares Sequence::TsTv,Sequence::NumDiffs,Sequence::Gapped,
33 |   Sequence::NotAGap
34 |   \ingroup misc
35 | */
36 | 
37 | #include <string>
38 | #include <algorithm>
39 | #include <type_traits>
40 | #include <Sequence/SeqEnums.hpp>
41 | 
42 | namespace Sequence
43 | {
44 |   Mutations TsTv(const char & i, const char & j);
45 |   Mutations TsTv(const int & i,const int & j);
46 |   bool Different (const std::string & seq1,
47 | 		  const std::string & seq2,
48 | 		  const bool & skip_missing = true,
49 | 		  const bool & nucleic_acid = true);
50 |   
51 |   int NumDiffs(const std::string & seq1,
52 | 	       const std::string & seq2,
53 | 	       const bool & skip_missing = true ,
54 | 	       const bool & nucleic_acid = true);
55 | 
56 |   bool Gapped(const std::string &s);
57 | 
58 |   template<typename Iterator> bool Gapped(Iterator beg,Iterator end,
59 | 					  const char & gapchar = '-')
60 |     /*!
61 |       \param beg an iterator
62 |       \param end an iterator
63 |       \param gapchar a character representing an aligment gap
64 |       \return true if \a gapchar is present in the range [beg,end), false otherwise
65 |     */
66 |   {
67 |     Iterator itr = std::find(beg,end,gapchar);
68 |     return (itr!=end);
69 |   }
70 | 
71 |   bool NotAGap(const char &c);
72 | }
73 | #endif
74 | 


--------------------------------------------------------------------------------
/Sequence/ComplementBase.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | #ifndef __COMPLEMENT_BASE_H__
25 | #define __COMPLEMENT_BASE_H__
26 | #include <functional>
27 | 
28 | /*! \file ComplementBase.hpp
29 |   @brief Delcaration of Sequence::ComplementBase, a function object to return the complement of a DNA nucleotide
30 | */
31 | /*! 
32 |   \struct Sequence::ComplementBase Sequence/ComplementBase.hpp
33 |   \ingroup functors
34 |   a functor to complement a sequence\n
35 |   example use:
36 |   \code
37 |   //reverse and complement a std::string
38 |   #include <string>
39 |   #include <algorithm>
40 |   #include <Sequence/SeqFunctors.hpp>
41 |   
42 |   int main ()
43 |   {
44 |   std::string seq;
45 |   //fill seq with DNA characters
46 |   std::reverse(seq.begin(),seq.end());
47 |   std::for_each(seq.begin(),seq.end(),Sequence::ComplementBase());
48 |   }
49 |   \endcode
50 | */
51 | namespace Sequence
52 |   {
53 |     struct ComplementBase : public std::unary_function<char,void>
54 |     {
55 |       void operator()(char &ch) const;
56 |     };
57 | }
58 | #endif
59 | 


--------------------------------------------------------------------------------
/Sequence/FST.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | #ifndef __FST_H__
25 | #define __FST_H__
26 | 
27 | #include <utility>
28 | #include <set>
29 | #include <memory>
30 | #include <Sequence/stateCounter.hpp>
31 | 
32 | /*! \file FST.hpp
33 |   @brief delcaration of a class (Sequence::FST) to analyze population structure
34 | 
35 |   \deprecated Will be removed in libsequence 2.0
36 | */
37 | namespace Sequence
38 | {
39 |   class PolyTable;
40 |   struct FSTimpl;
41 |   class __attribute__ ((deprecated)) FST 
42 |   {
43 |   private:
44 |     std::unique_ptr<FSTimpl> impl;
45 |   public:
46 |     explicit FST(const PolyTable *data, unsigned npop, const unsigned *config=NULL,
47 | 		 const double *weights=NULL, bool haveOutgroup = false,
48 | 		 unsigned outgroup = 0);
49 |     FST(const FST &) = delete;
50 |     FST & operator=(const FST &) = delete;
51 |     ~FST(void);
52 |     double HSM(void) const;
53 |     double Slatkin(void) const;
54 |     double HBK(void) const;
55 |     double piB(void) const;
56 |     double piT(void) const;
57 |     double piS(void) const;
58 |     double piD(void) const;
59 |     std::set<double> shared(unsigned pop1, unsigned pop2) const;
60 |     std::set<double> fixed(unsigned pop1, unsigned pop2) const;
61 |     std::pair< std::set<double>,std::set<double> > Private(unsigned pop1, unsigned pop2) const;
62 |   };
63 | }
64 | #endif
65 | 


--------------------------------------------------------------------------------
/Sequence/Fasta.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | /*! \file Fasta.hpp
25 |   @brief Declaration of Sequence::Fasta streams
26 | */
27 | 
28 | /*!
29 |   \class Sequence::Fasta Sequence/Fasta.hpp
30 |   \ingroup seqio
31 |   Publicly derived from Sequence::Seq, this class defines
32 |   how to read and print sequences in FASTA format, which looks like:\n
33 |   >sequence name 1\n
34 |   ATGATGATCAGATAGACATAGCAGATACATGT\n
35 |   >sequence name 2\n
36 |   ATGTTGGTTTTTTTTTAGAGATGTTTATAGGT\n
37 |   ETC... 
38 |  
39 |   @short FASTA sequence stream
40 | */
41 | 
42 | #ifndef FASTA_H
43 | #define FASTA_H
44 | 
45 | #include <Sequence/Seq.hpp>
46 | 
47 | namespace Sequence
48 |   {
49 |   class Fasta : public Seq
50 |     {
51 |     private:
52 |     public:
53 |       using Seq::Seq;
54 |       Fasta();
55 |       Fasta (const Seq & s);
56 |       Fasta( Fasta && ) = default;
57 |       Fasta( Seq && );
58 |       Fasta( const Fasta & ) = default;
59 |       ~Fasta()/*! placeholder for vtable */ {}
60 |       Fasta & operator=(Fasta &&) = default;
61 |       Fasta & operator=(const Fasta &) = default;
62 |       /*!
63 | 	\exception Sequence::SeqException if memory can't be allocated. 
64 | 	(This is because the data are temporarily read into char *, 
65 | 	because that was found to be faster).
66 | 	\exception Sequence::badFormat if the input stream is not
67 | 	in FASTA format
68 |       */
69 |       std::istream&  read(std::istream &s);
70 |       /*!
71 | 	\param stream a std::ostream
72 | 	write the sequence in FASTA format to \a stream
73 |       */
74 |       std::ostream& print(std::ostream& s) const;
75 |     };
76 | }
77 | #endif
78 | 


--------------------------------------------------------------------------------
/Sequence/Grantham.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | #ifndef GRANTHAM_H
25 | #define GRANTHAM_H
26 | /*! \file Grantham.hpp
27 |   @brief Grantham's distances (Sequence::Grantham)
28 | */
29 | 
30 | /*! \class Sequence::Grantham Sequence/Grantham.hpp
31 |   A functor to return the Grantham's distance between
32 |   two amino acids.
33 |  
34 |   @short Grantham's distances
35 | */
36 | #include <limits>
37 | 
38 | namespace Sequence
39 |   {
40 |   class Grantham
41 |     {
42 |     private:
43 |       double D[60][60];
44 |       const double stopweight;
45 |     public:
46 |       Grantham(const double stop = std::numeric_limits<double>::max());
47 |       double operator()(char aa1, char aa2) const;
48 |     };
49 | }
50 | #endif
51 | 


--------------------------------------------------------------------------------
/Sequence/GranthamWeights.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | #ifndef __GRANTHAMWEIGHTS_H__
25 | #define __GRANTHAMWEIGHTS_H__
26 | 
27 | /*! \file GranthamWeights.hpp
28 |   @brief declaration of classes to weight codons by Grantham distance (i.e. for Sequence::Comeron95). Declares 
29 |   Sequence::GranthamWeights2 and  Sequence::GranthamWeights3
30 | */
31 | 
32 | /*!
33 |   \class Sequence::GranthamWeights2 Sequence/GranthamWeights.hpp
34 |   \ingroup weights
35 |   @short Weights paths by Grantham's distances for codons differing at 2 sites
36 | */
37 | 
38 | /*!
39 |   \class Sequence::GranthamWeights3 Sequence/GranthamWeights.hpp
40 |   \ingroup weights
41 |   @short Weights paths by Grantham's distances for codons differing at 3 sites
42 | */
43 | #include <Sequence/SeqEnums.hpp>
44 | #include <Sequence/WeightingSchemes.hpp>
45 | 
46 | namespace Sequence
47 |   {
48 |   class Grantham;
49 |   struct GranthamWeights2 : public WeightingScheme2
50 |     {
51 |       weights2_t operator()(const std::string &codon1, const std::string &codon2,Sequence::GeneticCodes genetic_code) const;
52 |     };
53 | 
54 |   struct GranthamWeights3 : public WeightingScheme3
55 |     {
56 |       weights3_t operator()(const std::string &codon1, const std::string &codon2,Sequence::GeneticCodes genetic_code) const;
57 |     };
58 | }
59 | #endif
60 | 


--------------------------------------------------------------------------------
/Sequence/Hudson2001.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | #ifndef HUDSON2001_H
25 | #define HUDSON2001_H
26 | 
27 | /*! \file Hudson2001.hpp
28 |   \deprecated
29 | */
30 | #include <Sequence/SimpleSNP.hpp>
31 | #warning "This header is deprecated. Please use <Sequence/SimpleSNP.hpp>"
32 | 
33 | #endif
34 | 


--------------------------------------------------------------------------------
/Sequence/Kimura80.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | /*! \file Kimura80.hpp
25 |   @brief declaration of Sequence::Kimura80
26 | */
27 | 
28 | /*! \class Sequence::Kimura80 Sequence/Kimura80.hpp
29 |   \ingroup divergence
30 |   Calculate a measure of sequence divergence using Kimura's 1980 method.\n
31 |   The reference is: Kimura, M (1980) J. Mol. Evol 16: 111-120.\n
32 |   The calculation only depends on 3 numbers:\n
33 |   1.) the number of sites in the sequence\n
34 |   2.) the number of transitions between the two sequences\n
35 |   3.) the number of transversions between the two sequences\n
36 |   \n
37 |   The implementation of this class does the following:\n
38 |   1.) compare each position in both sequences, counting transitions and transversions\n
39 |   2.) calculate distance using Kimura's formula\n
40 |   \n
41 |   \exception Sequence::SeqException if the two sequences are of unequal length.
42 |  
43 |   @short Kimura's 2-parameter distance
44 | */
45 | #ifndef KIMURA80_H
46 | #define KIMURA80_H
47 | 
48 | namespace Sequence
49 |   {
50 |   class Seq;
51 |   class Kimura80
52 |     {
53 |     private:
54 |       unsigned num_Ts, num_Tv;
55 |       size_t seqlen;		//total sequence length
56 |       size_t sites_compared;	//number of ungapped sites in the data
57 |       void Compute (const Sequence::Seq *seq1, const Sequence::Seq *seq2);
58 |       double divergence, P, Q;
59 |     public:
60 |       explicit Kimura80 (const Sequence::Seq * seqa,const  Sequence::Seq * seqb);
61 |       double K() const;
62 |       size_t sites (void) const;
63 |     };
64 | }
65 | #endif
66 | 


--------------------------------------------------------------------------------
/Sequence/Makefile.am:
--------------------------------------------------------------------------------
 1 | SUBDIRS = bits SummStatsDeprecated variant_matrix summstats
 2 | 
 3 | pkgincludedir=$(prefix)/include/Sequence
 4 | 
 5 | pkginclude_HEADERS = AlignStream.hpp\
 6 | 	Alignment.hpp\
 7 | 	Clustalw.hpp\
 8 | 	phylipData.hpp\
 9 | 	CodonTable.hpp\
10 | 	Comeron95.hpp\
11 | 	Comparisons.hpp\
12 | 	ComplementBase.hpp\
13 | 	CountingOperators.hpp\
14 | 	FST.hpp\
15 | 	Fasta.hpp\
16 | 	fastq.hpp\
17 | 	Grantham.hpp\
18 | 	GranthamWeights.hpp\
19 | 	SimpleSNP.hpp\
20 | 	Hudson2001.hpp\
21 | 	Kimura80.hpp\
22 | 	PathwayHelper.hpp\
23 | 	PolySIM.hpp\
24 | 	PolySNP.hpp\
25 | 	PolySNPimpl.hpp\
26 | 	PolySites.hpp\
27 | 	PolyTable.hpp\
28 | 	PolyTableFunctions.hpp\
29 | 	PolyFunctional.hpp\
30 | 	PolyTableSlice.hpp\
31 | 	polySiteVector.hpp \
32 | 	Recombination.hpp\
33 | 	RedundancyCom95.hpp\
34 | 	Seq.hpp\
35 | 	SeqConstants.hpp\
36 | 	SeqEnums.hpp\
37 | 	SeqFunctors.hpp\
38 | 	SeqProperties.hpp\
39 | 	SeqRegexes.hpp\
40 | 	SeqUtilities.hpp\
41 | 	SimData.hpp\
42 | 	SimParams.hpp\
43 | 	SingleSub.hpp\
44 | 	Sites.hpp\
45 | 	ThreeSubs.hpp\
46 | 	Translate.hpp\
47 | 	Translate2.hpp\
48 | 	TwoSubs.hpp\
49 | 	Unweighted.hpp\
50 | 	WeightingSchemes.hpp\
51 | 	stateCounter.hpp\
52 | 	shortestPath.hpp\
53 | 	descriptiveStats.hpp\
54 | 	HKA.hpp\
55 | 	typedefs.hpp \
56 | 	SummStatsDeprecated.hpp \
57 | 	SeqAlphabets.hpp \
58 | 	VariantMatrix.hpp \
59 | 	VariantMatrixCapsule.hpp \
60 | 	NonOwningCapsules.hpp \
61 | 	VectorCapsules.hpp \
62 | 	VariantMatrixViews.hpp \
63 | 	AlleleCountMatrix.hpp \
64 | 	summstats.hpp \
65 | 	StateCounts.hpp
66 | 


--------------------------------------------------------------------------------
/Sequence/PathwayHelper.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | #ifndef __PATHWAYHELPER_H__
25 | #define __PATHWAYHELPER_H__
26 | /*! \file PathwayHelper.hpp
27 |   @brief declarations of Sequence::Intermediates2 and Sequence::Intermediates3
28 | */
29 | /*!
30 |   \defgroup CodonPaths Classes and functions to aid in the calculations of the pathways between two codons
31 |   This group of classes and functions deals with determining 
32 |   either the counts of silent and replacement differences between codons
33 |   or the intermedate codons that occurs between two different codons
34 | */
35 | #include <string>
36 | #include <array>
37 | namespace Sequence
38 | {
39 |   using Inter2_t = std::array<std::string,2>;
40 |   using Inter3_t = std::array<std::string,9>;
41 |   Inter2_t Intermediates2(const std::string &codon1, const std::string &codon2);
42 |   Inter3_t Intermediates3(const std::string &codon1, const std::string &codon2);
43 | }
44 | #endif
45 | 
46 | 


--------------------------------------------------------------------------------
/Sequence/PolySNPimpl.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | #include <Sequence/PolyTable.hpp>
25 | #include <Sequence/stateCounter.hpp>
26 | #include <string>
27 | #include <mutex>
28 | namespace Sequence
29 | {
30 |   struct _PolySNPImpl
31 |   /*!
32 |     Implementation details for PolySNP.  This class is visible
33 |     so that it can be accessed from classes derived from PolySNP.
34 |     A PolySNP object contains a pointer to an instance of this class
35 |     that is storage class protected.
36 |   */
37 |   {
38 |     const PolyTable* _data;
39 |     unsigned _nsites,_nsam,_outgroup;
40 |     bool _haveOutgroup, _totMuts;
41 |     unsigned _totsam;
42 |     unsigned _DVK;
43 |     double _DVH;
44 |     bool _counted_singletons;
45 |     bool _know_pi;
46 |     bool _CalculatedDandV;
47 |     double _pi;
48 |     unsigned _singletons;
49 |     unsigned _walls_Bprime,_NumPoly;
50 |     double _walls_B,_walls_Q;
51 |     bool _calculated_wall_stats;
52 |     std::vector< Sequence::stateCounter > _counts;
53 |     std::vector< std::pair< bool, Sequence::stateCounter > > _derivedCounts;
54 | 	std::mutex instance_lock;
55 |     bool _preprocessed;
56 |     void preprocess(void);
57 | 
58 |     _PolySNPImpl (const Sequence::PolyTable * data, const bool & haveOutgroup ,
59 | 		  const unsigned & outgroup, const bool & totMuts);
60 |   };
61 | }
62 | 


--------------------------------------------------------------------------------
/Sequence/PolySites.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | #ifndef POLYSITES_H
25 | #define POLYSITES_H
26 | /*! \file PolySites.hpp
27 |   @brief Sequence::PolySites, generates polymorphism tables from data
28 | */
29 | #include <Sequence/PolyTable.hpp>
30 | namespace Sequence
31 |   {
32 |   class Fasta;
33 |   class PolySites : public PolyTable
34 |     {
35 |     private:
36 |       /*!
37 |       PolySites::fillIt() is the function that actually fills the polymorphism table.
38 |       */
39 |       template<class __DataType>
40 |       void fillIt(const std::vector < __DataType >&alignment,
41 |                          bool strictInfSites = 0,
42 |                          bool ignoregaps = 1,bool skipMissing=false,
43 |                          unsigned freqfilter=0);
44 |     public:
45 |       PolySites (void);
46 |       template<typename __DataType>
47 |       PolySites (const std::vector < __DataType >&alignment,
48 | 		 bool strictInfSites = 0,
49 | 		 bool ignoregaps = 1,
50 | 		 bool skipMissing=false,
51 |                  bool skipAdjSNP=false, 
52 | 		 unsigned freqfilter=0);
53 |       //PolySites (const std::vector < double > &List, const std::vector < std::string > &stringList);
54 |       PolySites ( std::vector < double >  List, std::vector < std::string > stringList);
55 |       PolySites (PolyTable::const_site_iterator beg,
56 | 		 PolyTable::const_site_iterator end);
57 |       PolySites( PolySites && );
58 |       PolySites( const PolySites & );
59 |       PolySites & operator=( PolySites && );
60 |       PolySites & operator=( const PolySites & );
61 |       ~PolySites(void){};
62 |       std::istream & read(std::istream &s) ;
63 |       std::ostream & print(std::ostream &stream) const;
64 |     };
65 | }
66 | #include <Sequence/bits/PolySites.tcc>
67 | #endif
68 | 


--------------------------------------------------------------------------------
/Sequence/SeqConstants.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | #ifndef __SEQCONSTANTS_HPP__
25 | #define __SEQCONSTANTS_HPP__
26 | /*!
27 |   \file SeqConstants.hpp
28 |   A file defining constants used in various places in libsequence.
29 | */
30 | 
31 | namespace Sequence
32 | {
33 | extern const unsigned SEQMAXUNSIGNED;
34 | extern const double SEQMAXDOUBLE;
35 | }
36 | 
37 | #endif
38 | 


--------------------------------------------------------------------------------
/Sequence/SeqEnums.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | #ifndef SEQENUMS_H
25 | #define SEQENUMS_H
26 | #include <cstdint>
27 | /*! \file SeqEnums.hpp
28 |   Defines a handfull of enumeration types useful
29 |   for sequence data.
30 |   @brief Definition of enumeration types
31 | */
32 | 
33 | namespace Sequence
34 |   {
35 |     /*! \enum Sequence::GeneticCodes
36 |       Only UNIVERSAL (= 0)  is currently supported.
37 |       The order of the genetic codes is that of NCBI's code tables, available at 
38 |       http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
39 |     */
40 |     enum class GeneticCodes : std::int16_t {UNIVERSAL};
41 |     /*! \enum Sequence::Mutations
42 |       Values: Unknown=0,Ts, and Tv.\n
43 |       Unknown means unknown, Ts means transition, Tv means transversion
44 |     */
45 |     enum class Mutations : std::int8_t {Unknown,Ts,Tv};
46 | }
47 | #endif
48 | 


--------------------------------------------------------------------------------
/Sequence/SeqFunctors.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | #ifndef __SEQ_FUNCTORS_H__
25 | #define __SEQ_FUNCTORS_H__
26 | #include <Sequence/stateCounter.hpp>
27 | #include <Sequence/ComplementBase.hpp>
28 | /*! \file SeqFunctors.hpp
29 |   \ingroup functors
30 |   This file is a bit of a catch-all for function objects defined in namespace Sequence.
31 |   Currently, including the header brings the definitions of Sequence::ComplementBase
32 |   and Sequence::stateCounter into scope
33 |  */
34 | /*!
35 |   \defgroup functors Function objects defined in the library
36 | */
37 | #endif
38 | 


--------------------------------------------------------------------------------
/Sequence/SeqProperties.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | #ifndef __SEQ_PROPERTIES_HPP__
25 | #define __SEQ_PROPERTIES_HPP__
26 | /*! \file SeqProperties.hpp
27 |   \deprecated
28 |  */
29 | #include <Sequence/SeqAlphabets.hpp>
30 | #include <Sequence/Comparisons.hpp>
31 | 
32 | #endif
33 | 


--------------------------------------------------------------------------------
/Sequence/SimParams.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | #ifndef SIMPARAMS_H
25 | #define SIMPARAMS_H
26 | /*! \file SimParams.hpp
27 |   @brief Sequence::SimParams reads in the parameters of Dick Hudon's coalescent simulation program.  Used in conjunction with Sequence::SimData
28 | */
29 | 
30 | /*! \class Sequence::SimParams Sequence/SimParams.hpp
31 |   \ingroup coalescent
32 |   include SimParams.h
33 |   Allows reading in and printing out of the parameter
34 |   list that Hudson's coalescent simulation program spits
35 |   out at the beginning of its execution.  An example of use
36 |   is found in tajd.cc in the Examples section.
37 |  
38 |   @author Kevin Thornton
39 |   @short Parameters for Hudson's simulation program
40 | */
41 | #include <iosfwd>
42 | #include <string>
43 | #include <vector>
44 | #include <cstdio>
45 | 
46 | namespace Sequence
47 | {
48 |   class SimParams
49 |   {
50 |     friend std::ostream& operator<<(std::ostream&,class SimParams &object);
51 |   private:
52 |     std::string _command_line;
53 |     unsigned _howmany, tsam;
54 |   public:
55 |     SimParams(void);
56 |     std::istream& read(std::istream& s);
57 |     int fromfile ( FILE * openfile );
58 |     std::string params (void) const
59 |       /*!
60 | 	\return the command-line input to ms
61 | 	\note for complicated models, this can be parsed
62 | 	with a stringstream to figure out what the parameters are
63 |       */
64 |     {
65 |       return _command_line;
66 |     }
67 |     unsigned totsam (void) const
68 |       /*!
69 | 	\return the total sample size (# gametes)
70 |       */
71 |     {
72 |       return (tsam);
73 |     }
74 |     unsigned runs (void) const
75 |       /*!
76 | 	\return number of genealogies to generate
77 |       */
78 |     {
79 |       return (_howmany);
80 |     }
81 |   };
82 | 
83 |   std::istream& operator>>(std::istream& s,  SimParams& c);
84 | }
85 | #endif
86 | 


--------------------------------------------------------------------------------
/Sequence/SingleSub.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 |   Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 |   Remove the brackets to email me.
 6 | 
 7 |   This file is part of libsequence.
 8 | 
 9 |   libsequence is free software: you can redistribute it and/or modify
10 |   it under the terms of the GNU General Public License as published by
11 |   the Free Software Foundation, either version 3 of the License, or
12 |   (at your option) any later version.
13 | 
14 |   libsequence is distributed in the hope that it will be useful,
15 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
16 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 |   GNU General Public License for more details.
18 | 
19 |   You should have received a copy of the GNU General Public License
20 |   long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | #ifndef SINGLESUB_H
25 | #define SINGLESUB_H
26 | /*! \file SingleSub.hpp
27 |   @brief used by Sequence::Comeron95, class Sequence::SingleSub calculates divergence between codons that differ at one site
28 | */
29 | 
30 | /*!
31 |   \class Sequence::SingleSub Sequence/SingleSub.hpp
32 |   \ingroup kaks
33 |   A functor to obtain divergence statistics for Comeron's method for codons that differ at one position.  Used by
34 |   Sequence::Comeron95
35 | 
36 |   @author Kevin Thornton
37 |   @short Deal with codons differing at 1 position
38 | */
39 | #include <string>
40 | #include <memory>
41 | namespace Sequence
42 | {
43 |   class RedundancyCom95;
44 |     
45 |   class SingleSub
46 |   {
47 |   private:
48 |     struct SingleSubImpl;
49 |     std::unique_ptr<SingleSubImpl> impl;
50 |   public:
51 |     explicit SingleSub(void);
52 |     void operator()(const RedundancyCom95 & sitesObj,
53 | 		    const std::string &cod1,
54 | 		    const std::string &cod2);
55 |     ~SingleSub();
56 |     double P0(void) const;
57 |     double P2S(void) const;
58 |     double P2V(void) const;
59 |     double P4(void) const;
60 |     double Q0(void) const;
61 |     double Q2S(void) const;
62 |     double Q2V(void) const;
63 |     double Q4(void) const;
64 |   };
65 | }
66 | #endif
67 | 


--------------------------------------------------------------------------------
/Sequence/StateCounts.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef SEQUENCE_VARIANTMATRIX_STATECOUNTS_HPP__
 2 | #define SEQUENCE_VARIANTMATRIX_STATECOUNTS_HPP__
 3 | 
 4 | #include "VariantMatrix.hpp"
 5 | #include "VariantMatrixViews.hpp"
 6 | #include <limits>
 7 | #include <vector>
 8 | 
 9 | namespace Sequence
10 | {
11 |     struct StateCounts
12 |     /// \brief Track character state occurrence at a site in a VariantMatrix.
13 |     ///
14 |     /// This class keeps track of how many times each character state occurs
15 |     /// at a variable site in a VariantMatrix.  All missing data (negative
16 |     /// state values) are considered equivalent and collapsed into the single
17 |     /// missing value of -1.
18 |     ///
19 |     /// When constructed, the sample size at a site is considered to be the
20 |     /// sum of the number of occurrences of all non-missing states.
21 |     ///
22 |     /// \ingroup variantmatrix
23 |     {
24 |         static constexpr VariantMatrix::value_type max_allele
25 |             = std::numeric_limits<VariantMatrix::value_type>::max();
26 |         /// Keep track of (state, count) pairs
27 |         std::vector<std::int32_t> counts;
28 |         /// The max allelic value seen
29 |         std::size_t max_allele_idx;
30 |         /// The sample size at this site.  Excluded missing data.
31 |         std::uint32_t n;
32 |         /// The reference state for this site.  Needed for certain summary
33 |         /// statistics. Default is -1 (missing).
34 |         std::int8_t refstate;
35 | 
36 |         /// Construct with a ConstRowView and a reference state, which defaults
37 |         /// to 0.
38 |         StateCounts(const std::int8_t refstate_);
39 |         StateCounts();
40 |         void operator()(ConstRowView &);
41 |         void operator()(const RowView &);
42 |     };
43 | 
44 |     /// Create a vector of StateCounts from a VariantMatrix.
45 |     /// If `refstates` is not empty and differs in length
46 |     /// from `m.nsites`, then `std::invalid_argument` is thrown.
47 |     /// \ingroup variantmatrix
48 |     std::vector<StateCounts>
49 |     process_variable_sites(const VariantMatrix& m,
50 |                            const std::vector<std::int8_t>& refstates);
51 |     /// Create a vector of StateCounts with a specific reference state
52 |     /// used for all sites
53 |     /// \ingroup variantmatrix
54 |     std::vector<StateCounts>
55 |     process_variable_sites(const VariantMatrix& m, const std::int8_t refstate);
56 |     /// Create a vector of StateCounts with a reference state of -1
57 |     /// used for all sites
58 |     /// \ingroup variantmatrix
59 |     std::vector<StateCounts> process_variable_sites(const VariantMatrix& m);
60 | } // namespace Sequence
61 | 
62 | #endif
63 | 


--------------------------------------------------------------------------------
/Sequence/SummStatsDeprecated.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef __SEQUENCE_SUMMSTATS_HPP__
 2 | #define __SEQUENCE_SUMMSTATS_HPP__
 3 | 
 4 | /*! \file SummStatsDeprecated.hpp
 5 |   Header file for summary statistic of variation data.
 6 | */
 7 | 
 8 | #include <Sequence/SummStatsDeprecated/nSL.hpp>
 9 | #include <Sequence/SummStatsDeprecated/Garud.hpp>
10 | #include <Sequence/SummStatsDeprecated/lHaf.hpp>
11 | #include <Sequence/SummStatsDeprecated/Snn.hpp>
12 | 
13 | #endif
14 | 


--------------------------------------------------------------------------------
/Sequence/SummStatsDeprecated/Garud.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef __SEQUENCE_GARUD_HPP__
 2 | #define __SEQUENCE_GARUD_HPP__
 3 | 
 4 | #include <Sequence/SimData.hpp>
 5 | #include <Sequence/summstats/garud.hpp>
 6 | 
 7 | namespace Sequence
 8 | {
 9 |   /*
10 |     Garud et al. DOI: 10.1371/journal.pgen.1005004
11 |     Messer & Petrov DOI: 10.1016/j.tree.2013.08.003
12 |     Note that H1 = 1 - haplotype homozygosity, e.g. Depaulis and Veuille's "H"
13 |     \ingroup popgenanalysis
14 |     \return An object of type Sequence::GarudStats
15 |   */
16 |   GarudStats H1H12(const SimData & d)__attribute__ ((deprecated));
17 | }
18 | 
19 | #endif
20 | 


--------------------------------------------------------------------------------
/Sequence/SummStatsDeprecated/Makefile.am:
--------------------------------------------------------------------------------
1 | pkgincludedir=$(prefix)/include/Sequence/SummStatsDeprecated
2 | 
3 | pkginclude_HEADERS = nSL.hpp Garud.hpp lHaf.hpp Snn.hpp
4 | 


--------------------------------------------------------------------------------
/Sequence/SummStatsDeprecated/Snn.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | #ifndef __SEQUENCE_SNN_HPP__
25 | #define __SEQUENCE_SNN_HPP__
26 | 
27 | #include <Sequence/Comparisons.hpp>
28 | #include <Sequence/PolyTable.hpp>
29 | #include <vector>
30 | #include <utility>
31 | #include <cassert>
32 | 
33 | namespace Sequence
34 | {
35 |   /*!
36 |     Test statistic from Hudson (2000) Genetics 155(4):2011
37 |    */
38 |   double Snn_statistic( const unsigned individuals[],
39 | 			const std::vector< std::vector<double> > & dkj,
40 | 			const unsigned config[],
41 | 			const size_t & npop,
42 | 			const unsigned & nsam )__attribute__ ((deprecated));
43 |   
44 |   template< typename shuffler >
45 |   std::pair<double,double>
46 |   Snn_test(const PolyTable & snpTable,
47 | 	   const unsigned config[],
48 | 	   const size_t & npop,
49 | 	   shuffler & s,
50 | 	   const unsigned & nperms = 10000)__attribute__ ((deprecated));
51 | 
52 |   template< typename shuffler >
53 |   std::vector< std::vector<double> >
54 |   Snn_test_pairwise(const PolyTable & snpTable,
55 | 		    const unsigned config[],
56 | 		    const size_t & npop,
57 | 		    shuffler & s,
58 | 		    const unsigned & nperms = 10000)__attribute__ ((deprecated));
59 | }
60 | #endif
61 | #include <Sequence/bits/Snn.tcc>
62 | 


--------------------------------------------------------------------------------
/Sequence/SummStatsDeprecated/lHaf.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef __SEQUENCE_SUMMSTATS_LHAP_HPP__
 2 | #define __SEQUENCE_SUMMSTATS_LHAP_HPP__
 3 | 
 4 | #include <Sequence/SimData.hpp>
 5 | 
 6 | namespace Sequence 
 7 | {
 8 |   /*
 9 |     ! doi:10.1371/journal.pgen.1005527.g001
10 |     \ingroup popgenanalysis
11 |   */
12 |   std::vector<double> lHaf( const SimData & data, const double l );
13 | }
14 | 
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/Sequence/SummStatsDeprecated/nSL.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef __SEQUENCE_SUMMSTATS_NSL_HPP__
 2 | #define __SEQUENCE_SUMMSTATS_NSL_HPP__
 3 | 
 4 | /* \file nSL.hpp
 5 |    @brief The nSL statistic of doi: 10.1093/molbev/msu077
 6 | */
 7 | 
 8 | #include <Sequence/PolySIM.hpp>
 9 | #include <unordered_map>
10 | #include <tuple>
11 | #include <cstdint>
12 | 
13 | namespace Sequence
14 | {
15 |     /*!
16 |       The nSL statistic of Ferrer-Admetlla et al. doi: 10.1093/molbev/msu077.
17 |       \param core The index of the "focal/core" SNP
18 |       \param d An object of type Sequence::SimData
19 |       \param gmap The positions of every marker in d on the genetic map.  If
20 |       std::unordered_map<double,double>() is passed,
21 |       iHS is calculated using SNP positions.
22 |       \return nSL and iHs, with the latter as defined in doi:
23 |       10.1093/molbev/msu077.
24 |       \note This routine was validated by comparing to code provided by
25 |       Ferrer-Admetlla et al.
26 |       \warning The use of 'gmap' is untested.
27 |       \ingroup popgenanalysis
28 |      */
29 |     std::pair<double, double>
30 |     nSL(const std::size_t &core, const SimData &d,
31 |         const std::unordered_map<double, double> &gmap
32 |         = std::unordered_map<double, double>())__attribute__ ((deprecated));
33 | }
34 | #endif
35 | 


--------------------------------------------------------------------------------
/Sequence/Translate.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | #ifndef __TRANSLATE_HPP__
25 | #define __TRANSLATE_HPP__
26 | #include <string>
27 | #include <Sequence/SeqEnums.hpp>
28 | /*! \file Translate.hpp
29 |   @brief declares Sequence::Translate,a function to translate CDS sequences into peptide sequences
30 | */
31 | 
32 | /*!
33 |   \defgroup misc Miscellany
34 |  */
35 | namespace Sequence
36 |   {
37 |   /*!
38 |     \ingroup misc
39 |     \param beg a pointer to the beginning of the region to translate
40 |     \param end a pointer to 1 past the end of the region to translate
41 |     \param genetic_code must be a value from the enumeration list Sequence::GeneticCodes
42 |     \param gapchar a character representing an alignment gap
43 |     \return a string representing the translation of the range
44 |     \throw std::runtime_error if \a genetic_code is invalid
45 |     \code
46 |     #include <Sequence/Translate.hpp>
47 |     \endcode
48 |   */
49 |     std::string Translate(std::string::const_iterator beg,
50 | 			  std::string::const_iterator end,
51 | 			  Sequence::GeneticCodes  genetic_code = GeneticCodes::UNIVERSAL,
52 | 			  const char & gapchar = '-');
53 | }
54 | #endif
55 | 


--------------------------------------------------------------------------------
/Sequence/Translate2.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | #include <Sequence/Translate.hpp>
25 | /*! \file Translate2.hpp
26 |   \short Deprecated header declaring routines to translate sequences.  Including it includes the current header and issues a compiler warning.
27 |  */
28 | #warning "Using deprecated header <Sequence/Translate2.hpp>, please use <Sequence/Translate.hpp>"
29 | 
30 | 


--------------------------------------------------------------------------------
/Sequence/Unweighted.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | #ifndef __UNWEIGHTED_H__
25 | #define __UNWEIGHTED_H__
26 | 
27 | #include <Sequence/WeightingSchemes.hpp>
28 | /*! \file Unweighted.hpp"
29 |   @brief declares Sequence::Unweighted2 and  Sequence::Unweighted3
30 | */
31 | 
32 | /*!
33 |   \class Sequence::Unweighted2 Sequence/Unweighted.hpp
34 |   \ingroup weights
35 |   @short weights all pathways equally
36 |   \note This is generally not what you want to use (it biases the result to a higher Ka/Ks ratio)
37 | */
38 | 
39 | /*!
40 |   \class Sequence::Unweighted3 Sequence/Unweighted.hpp
41 |   \ingroup weights
42 |   @short weights all pathways equally
43 |   \note This is generally not what you want to use (it biases the result to a higher Ka/Ks ratio)
44 | */
45 | namespace Sequence
46 |   {
47 |   struct Unweighted2 : public WeightingScheme2
48 |     {
49 |       weights2_t operator()(const std::string &codon1, const std::string &codon2,Sequence::GeneticCodes genetic_code) const;
50 |     };
51 | 
52 |     struct Unweighted3 : public WeightingScheme3
53 |     {
54 |     public:
55 |       weights3_t operator()(const std::string &codon1, const std::string &codon2,Sequence::GeneticCodes genetic_code) const;
56 |     };
57 | }
58 | #endif
59 | 
60 | 


--------------------------------------------------------------------------------
/Sequence/VariantMatrixCapsule.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef SEQUENCE_VARIANT_MATRIX_CAPSULE
 2 | #define SEQUENCE_VARIANT_MATRIX_CAPSULE
 3 | 
 4 | #include <cstdint>
 5 | #include <memory>
 6 | #include <vector>
 7 | 
 8 | namespace Sequence
 9 | {
10 |     template <typename T> struct Capsule
11 |     {
12 |         virtual ~Capsule() = default;
13 |         // Following two may not be needed
14 |         //virtual T& get(std::size_t site, std::size_t sample) = 0;
15 |         //virtual const T& get(std::size_t site,
16 |         //                               std::size_t sample) const = 0;
17 |         virtual T* data() = 0;
18 |         virtual const T* data() const = 0;
19 |         virtual const T* cdata() const = 0;
20 |         virtual T* begin() = 0;
21 |         virtual const T* begin() const = 0;
22 |         virtual T* end() = 0;
23 |         virtual const T* end() const = 0;
24 |         virtual const T* cbegin() const = 0;
25 |         virtual const T* cend() const = 0;
26 |         virtual bool empty() const = 0;
27 |         virtual std::size_t size() const = 0;
28 | 
29 |         virtual bool resizable() const = 0;
30 | 
31 |         /// Overload iff resizable() returns true
32 |         virtual void
33 |         resize(bool)
34 |         {
35 |             throw std::runtime_error("Capsule cannot be resized");
36 |         }
37 |     };
38 | 
39 |     struct GenotypeCapsule : public Capsule<std::int8_t>
40 |     {
41 |         virtual ~GenotypeCapsule() = default;
42 |         virtual std::size_t nsites() const = 0;
43 |         virtual std::size_t nsam() const = 0;
44 |         virtual std::size_t& nsites() = 0;
45 |         virtual std::size_t& nsam() = 0;
46 |         virtual std::size_t row_offset() const = 0;
47 |         virtual std::size_t col_offset() const = 0;
48 |         virtual std::size_t stride() const = 0;
49 |         virtual std::unique_ptr<GenotypeCapsule> clone() const = 0;
50 |         virtual std::int8_t& operator()(std::size_t, std::size_t) = 0;
51 |         virtual const std::int8_t& operator()(std::size_t,
52 |                                               std::size_t) const = 0;
53 |     };
54 | 
55 |     struct PositionCapsule : public Capsule<double>
56 |     {
57 |         virtual ~PositionCapsule() = default;
58 |         virtual std::size_t nsites() const = 0;
59 |         virtual std::unique_ptr<PositionCapsule> clone() const = 0;
60 |         virtual double& operator[](std::size_t) = 0;
61 |         virtual const double& operator[](std::size_t) const = 0;
62 |     };
63 | 
64 | } // namespace Sequence
65 | 
66 | #endif
67 | 


--------------------------------------------------------------------------------
/Sequence/bits/Makefile.am:
--------------------------------------------------------------------------------
 1 | pkgincludedir=$(prefix)/include/Sequence/bits
 2 | 
 3 | pkginclude_HEADERS = PolySites.tcc\
 4 | 		PolyTable.tcc\
 5 | 		PolyTableSlice.tcc\
 6 | 		CountingOperators.tcc\
 7 | 		AlignStream.tcc\
 8 | 		Alignment.tcc\
 9 | 		Clustalw.tcc\
10 | 		phylipData.tcc\
11 | 		descriptiveStats.tcc\
12 | 		PolyTableFunctions.tcc\
13 | 		Snn.tcc \
14 | 		variant_matrix_views_internal.hpp \
15 | 		col_view_iterator.hpp
16 | 


--------------------------------------------------------------------------------
/Sequence/bits/PolyTable.tcc:
--------------------------------------------------------------------------------
 1 | // Code for the -*- C++ -*- namespace Sequence::PolyTable template members
 2 | 
 3 | /*
 4 | 
 5 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 6 | 
 7 | Remove the brackets to email me.
 8 | 
 9 | This file is part of libsequence.
10 | 
11 | libsequence is free software: you can redistribute it and/or modify
12 | it under the terms of the GNU General Public License as published by
13 | the Free Software Foundation, either version 3 of the License, or
14 | (at your option) any later version.
15 | 
16 | libsequence is distributed in the hope that it will be useful,
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 | GNU General Public License for more details.
20 | 
21 | You should have received a copy of the GNU General Public License
22 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
23 | 
24 | */
25 | 
26 | 
27 | #ifndef __POLY_TABLE_TCC__
28 | #define __POLY_TABLE_TCC__
29 | 
30 | #include <iterator>
31 | 
32 | namespace Sequence
33 | {
34 |   template<typename numeric_type,
35 | 	   typename string_type>
36 |   bool PolyTable::assign( const numeric_type * _positions, 
37 | 			  const size_t & _num_positions,
38 | 			  const string_type * _data,
39 | 			  const size_t & _num_individuals )
40 |   { 
41 |     //The numeric array must be convertible to double
42 |     static_assert( std::is_convertible<numeric_type,double>::value,
43 | 			"numeric_type must be convertible to double");
44 |     //The character type must be eithe char * or std::string
45 |     static_assert( (std::is_same<string_type,char*>::value || 
46 | 		    std::is_same<string_type,std::string>::value),
47 | 		   "string_type must be char * or std::string");
48 |   
49 |     first.resize(_num_positions);
50 |     second.resize(_num_individuals);
51 |     first.assign(_positions,_positions+_num_positions);
52 |     second.assign(_data,_data+_num_individuals);
53 |     non_const_access = true;
54 |     for(std::vector<std::string>::const_iterator itr = second.begin() ;
55 | 	itr < second.end() ; ++itr)
56 |       {
57 | 	if (itr->length() != _num_positions)
58 | 	  {
59 | 	    first.clear();
60 | 	    second.clear();
61 | 	    return false;
62 | 	  }
63 |       }
64 |     return true;
65 |   }
66 | }
67 | 
68 | #endif
69 | 


--------------------------------------------------------------------------------
/Sequence/fastq.hpp:
--------------------------------------------------------------------------------
 1 | /*! 
 2 |   \file fastq.hpp
 3 |   @brief FASTQ class
 4 | */
 5 | 
 6 | /*!
 7 |   \class Sequence::fastq Sequence/fastq.hpp
 8 |   \ingroup seqio
 9 |   Publicly derived from Sequence::Seq.
10 | */
11 | #ifndef __SEQUENCE_FASTQ_HPP__
12 | #define __SEQUENCE_FASTQ_HPP__
13 | 
14 | #include <Sequence/Seq.hpp>
15 | 
16 | namespace Sequence
17 |   {
18 |   class fastq : public Seq
19 |     {
20 |     public:
21 |       std::string quality;
22 |     private:
23 |       bool repeat_name;
24 |     public:
25 |       using Seq::Seq;
26 |       fastq(void);
27 |       fastq (const std::string &name, const std::string &seq,
28 | 	     const std::string & qual);
29 |       fastq (std::string && name, std::string && seq,
30 | 	     std::string && qual);
31 |       //! \warning Quality string will be left empty
32 |       fastq (const Seq & s);
33 |       fastq (const fastq & s) = default;
34 |       fastq ( fastq && s) = default;
35 |       //! \warning Quality string will be left empty
36 |       fastq ( Seq && s);
37 |       fastq & operator=(const fastq & ) = default;
38 |       fastq & operator=( fastq && ) = default;
39 |       ~fastq()/*! placeholder for vtable */ {}
40 | 
41 |       //! Set to true or false for repeating the seq name on third line of output
42 |       void repname(const bool &);
43 |       /*!
44 | 	\exception Sequence::SeqException if memory can't be allocated. 
45 | 	(This is because the data are temporarily read into char *, 
46 | 	because that was found to be faster).
47 | 	\exception Sequence::badFormat if the input stream is not
48 | 	in FASTQ format
49 |       */
50 |       std::istream & read(std::istream &s);
51 |       /*!
52 | 	\param stream a std::ostream
53 | 	write the sequence in FASTQ format to \a stream
54 |       */
55 |       std::ostream & print(std::ostream& s) const;
56 |     };
57 | }
58 | 
59 | #endif
60 | 
61 | 


--------------------------------------------------------------------------------
/Sequence/phylipData.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | /*! \file phylipData.hpp
25 |   \short Sequence::phylipData -- read in phylip alignments
26 | */
27 | #ifndef __PHYLIPDATA_HPP__
28 | #define __PHYLIPDATA_HPP__
29 | 
30 | #include <Sequence/AlignStream.hpp>
31 | #include <utility>
32 | #include <string>
33 | namespace Sequence
34 | {
35 |   template < typename T >
36 |   class phylipData: public AlignStream < T >
37 | 		   /*!
38 | 		     Input of phylip-format alignments
39 | 		    */
40 |   {
41 |   public:
42 |     phylipData (): AlignStream<T>(){}
43 |     phylipData(const std::vector<T> & _data): AlignStream<T>(_data)
44 |     {
45 |     }
46 |     phylipData (const AlignStream<T> &a) : AlignStream<T>(a)
47 |     {
48 |     }
49 |     phylipData (const phylipData<T> &a) : AlignStream<T>(a)
50 |     {
51 |     }
52 |     phylipData( AlignStream<T> && a) : AlignStream<T>(std::move(a))
53 |     {
54 |     }
55 |     phylipData( phylipData<T> && a) : AlignStream<T>(std::move(a))
56 |     {
57 |     }
58 |     phylipData( std::vector<T> && a) : AlignStream<T>(std::move(a))
59 |     {
60 |     }
61 |     ~phylipData(void)
62 |     {}
63 |     std::istream & read (std::istream & s);
64 |     std::ostream & print (std::ostream & s) const;
65 |     phylipData<T> & operator=( const AlignStream<T> & rhs);
66 |   };
67 | }
68 | #include <Sequence/bits/phylipData.tcc>
69 | #endif
70 | 


--------------------------------------------------------------------------------
/Sequence/polySiteVector.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | #ifndef __POLYSITEVECTOR_MANIP_HPP__
25 | #define __POLYSITEVECTOR_MANIP_HPP__
26 | 
27 | 
28 | /*! \file Sequence/polySiteVector.hpp
29 |   @brief Site-major variation tables in ASCII format
30 | */
31 | 
32 | #include <string>
33 | #include <utility>
34 | #include <vector>
35 | 
36 | namespace Sequence
37 | {
38 |   class PolyTable;
39 | 
40 |   /*!
41 |     For polymorphism data, a Site can be represented as
42 |     a position (a double) and the characters at 
43 |     that positions (a std::string)
44 |   */
45 |   using polymorphicSite = std::pair< double, std::string >;
46 | 
47 |   /*!
48 |     A polymorphism data set can be represented as
49 |     a vector containing a sequence of polymorphicSite
50 |   */
51 |   using polySiteVector = std::vector< polymorphicSite >;
52 | 
53 |   polySiteVector make_polySiteVector(const Sequence::PolyTable & data)__attribute__((deprecated));
54 | }
55 | #endif
56 | 


--------------------------------------------------------------------------------
/Sequence/samflag.hpp:
--------------------------------------------------------------------------------
 1 | //! \file Sequence/samflag.hpp @brief SAM flags
 2 | #ifndef __LIBSEQ_SAMFLAG_HPP__
 3 | #define __LIBSEQ_SAMFLAG_HPP__
 4 | 
 5 | #include <iosfwd>
 6 | #include <cstdio>
 7 | #include <cstdint>
 8 | namespace Sequence
 9 | {
10 |   /*!
11 |     \namespace Sequence::sambits
12 | 
13 |     \brief Stores the hex flags used by a SAM file flag field in an easy-to-read format
14 | 
15 |     \ingroup HTS
16 |   */
17 |   namespace sambits
18 |   {
19 |     static const int is_paired=0x0001;
20 |     static const int is_proper_pair=0x0002;
21 |     static const int query_unmapped=0x0004;
22 |     static const int mate_unmapped=0x0008;
23 |     static const int qstrand = 0x0010;
24 |     static const int mstrand = 0x0020;
25 |     static const int first_read = 0x0040;
26 |     static const int second_read = 0x0080;
27 |     static const int not_primary = 0x0100;
28 |     static const int qcfail = 0x0200;
29 |     static const int duplicate = 0x0400;
30 |     static const int suppalign = 0x0800;  //Supplementary alignment
31 |   }
32 | 
33 |   /*!
34 |     \class Sequence::samflag Sequence/samflag.hpp
35 |     \brief The flag field of a SAM record
36 |     
37 |     A SAM file's FLAG field is stored as an integer that is the sum of
38 |     a series of flags (defined in namespace Sequence::sambits).
39 | 
40 |     This class simply takes that integer and stores a set of boolean 
41 |     variables based on the value of the integer.
42 |   */
43 |   class samflag
44 |   {
45 |   private:
46 |     void process_bits();
47 |   public:
48 |     /*!
49 |       The flag value
50 |     */
51 |     std::int32_t flag;
52 |     bool is_paired,is_proper_pair,query_unmapped,
53 |       mate_unmapped,qstrand,mstrand,first_read,
54 |       second_read,not_primary,qcfail,duplicate,supp_alignment;
55 |     samflag(const std::int32_t & __flag);
56 |     samflag();
57 |     operator std::int32_t() const;
58 |     std::istream & read( std::istream & i);
59 |   };
60 | 
61 |   std::ostream & operator<<(std::ostream & o, const samflag & s);
62 |   std::istream & operator>>(std::istream & i, samflag & s);
63 | }
64 | 
65 | #endif
66 | 


--------------------------------------------------------------------------------
/Sequence/samfunctions.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef __SEQ_SAMFUNCTIONS_HPP__
 2 | #define __SEQ_SAMFUNCTIONS_HPP__
 3 | 
 4 | #include <Sequence/samrecord.hpp>
 5 | #include <Sequence/bamrecord.hpp>
 6 | 
 7 | namespace Sequence
 8 | {
 9 |   unsigned alignment_length( const samrecord & b );
10 |   unsigned insertion_distance( const samrecord & b );
11 |   unsigned deletion_distance( const samrecord & b );
12 |   unsigned ngaps( const samrecord & b );
13 |   unsigned mismatches( const samrecord & b );
14 | #ifdef HAVE_HTSLIB
15 |   unsigned alignment_length( const bamrecord & b );
16 |   unsigned insertion_distance( const bamrecord & b );
17 |   unsigned deletion_distance( const bamrecord & b );
18 |   unsigned ngaps( const bamrecord & b );
19 |   unsigned mismatches( const bamrecord & b );
20 | #endif
21 | }
22 | #endif
23 | 


--------------------------------------------------------------------------------
/Sequence/stateCounter.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | #ifndef _STATE_COUNTER_H_
25 | #define _STATE_COUNTER_H_
26 | #include <functional>
27 | /*! \file stateCounter.hpp
28 | @brief declaration of Sequence::stateCounter, a class to keep track of nucleotide counts either at a site
29 | in an alignment, or along a sequence
30 | */
31 | /*!
32 |   \class Sequence::stateCounter Sequence/stateCounter.hpp
33 |   \ingroup functors
34 |   \warning class data are public.  Use responsibly.
35 |   @short keep track of state counts at a site in an alignment or along a sequence
36 | */
37 | namespace Sequence
38 |   {
39 |     class __attribute__ ((deprecated))stateCounter : public std::unary_function<char,void>
40 |     {
41 |     public:
42 |       typedef unsigned size_type;
43 |       size_type a,g,c,t,zero,one,gap,n;
44 |       bool ndna;
45 |     private:
46 |       char _gap;
47 |     public:
48 |       stateCounter(char gapchar = '-');
49 |       stateCounter(stateCounter &&) = default;
50 |       stateCounter(const stateCounter &) = default;
51 |       stateCounter & operator=(const stateCounter &)=default;
52 |       stateCounter & operator=( stateCounter &&)=default;
53 |       void operator()(const char &ch);
54 |       size_type nStates(void) const;
55 |     };
56 | }
57 | #endif
58 | 


--------------------------------------------------------------------------------
/Sequence/summstats.hpp:
--------------------------------------------------------------------------------
 1 | /// @file Sequence/summstats.hpp
 2 | /// \brief Include all summary statistic functions and types
 3 | #ifndef SEQUENCE_SUMMSTATS_HPP__
 4 | #define SEQUENCE_SUMMSTATS_HPP__
 5 | 
 6 | /*!
 7 |  *  \defgroup popgenanalysis Analysis of molecular population genetic data
 8 |  *  \brief Summary statistics and other analysis of Sequence::VariantMatrix
 9 |  *  \ingroup popgen
10 |  *
11 |  *  See @ref md_md_tutorial.
12 |  * 
13 | */
14 | 
15 | #include "summstats/generic.hpp"
16 | #include "summstats/classics.hpp"
17 | #include "summstats/nsl.hpp"
18 | #include "summstats/nslx.hpp"
19 | #include "summstats/ld.hpp"
20 | #include "summstats/lhaf.hpp"
21 | #include "summstats/garud.hpp"
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/Sequence/summstats/Makefile.am:
--------------------------------------------------------------------------------
1 | pkgincludedir=$(prefix)/include/Sequence/summstats
2 | 
3 | pkginclude_HEADERS = classics.hpp thetapi.hpp thetaw.hpp thetah.hpp thetal.hpp auxillary.hpp nvariablesites.hpp allele_counts.hpp \
4 | 					 util.hpp ld.hpp nSLiHS.hpp nsl.hpp nslx.hpp garud.hpp generic.hpp lhaf.hpp \
5 | 					 algorithm.hpp
6 | 


--------------------------------------------------------------------------------
/Sequence/summstats/allele_counts.hpp:
--------------------------------------------------------------------------------
 1 | /// \file Sequence/summstats/allele_counts.hpp
 2 | /// \brief Count alleles at variable sites.
 3 | #ifndef SEQUENCE_SUMMSTATS_ALLELE_COUNTS_HPP__
 4 | #define SEQUENCE_SUMMSTATS_ALLELE_COUNTS_HPP__
 5 | 
 6 | #include <vector>
 7 | #include <utility>
 8 | #include <cstdint>
 9 | #include <Sequence/AlleleCountMatrix.hpp>
10 | 
11 | namespace Sequence
12 | {
13 |     struct AlleleCounts
14 |     /// Tracks the number of states at a site
15 |     /// \ingroup popgenanalysis
16 |     {
17 |         /// Number of non-missing states
18 |         int nstates;
19 |         /// Number of samples with missing states
20 |         int nmissing;
21 |     };
22 | 
23 |     /*! \brief Count number of alleles at each site
24 |      * \param m An AlleleCountMatrix
25 |      * \ingroup popgenanalysis
26 |      */
27 |     std::vector<AlleleCounts> allele_counts(const AlleleCountMatrix& m);
28 | 
29 |     /*! \brief Count number of non-reference alleles at each site
30 |      * \param m An AlleleCountMatrix
31 |      * \param m refstate The reference state for all sites.
32 |      * \ingroup popgenanalysis
33 |      */
34 |     std::vector<AlleleCounts>
35 |     non_reference_allele_counts(const AlleleCountMatrix& m,
36 |                                 const std::int8_t refstate);
37 | 
38 |     /*! \brief Count number of non-reference alleles at each site
39 |      * \param m An AlleleCountMatrix
40 |      * \param m refstate The reference state at each site.
41 |      * \ingroup popgenanalysis
42 |      */
43 |     std::vector<AlleleCounts>
44 |     non_reference_allele_counts(const AlleleCountMatrix& m,
45 |                                 const std::vector<std::int8_t>& refstates);
46 | } // namespace Sequence
47 | #endif
48 | 


--------------------------------------------------------------------------------
/Sequence/summstats/auxillary.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef SEQUENCE_SUMMSTATS_AUXILLARY_HPP__
 2 | #define SEQUENCE_SUMMSTATS_AUXILLARY_HPP__
 3 | 
 4 | #include <cstdint>
 5 | 
 6 | namespace Sequence
 7 | {
 8 |     namespace summstats_aux
 9 |     {
10 |         double a_sub_n(const std::uint32_t);
11 |         double b_sub_n(const std::uint32_t nsam);
12 |         double b_sub_n_plus1(const std::uint32_t nsam);
13 |     } // namespace summstats_aux
14 | } // namespace Sequence
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/Sequence/summstats/garud.hpp:
--------------------------------------------------------------------------------
 1 | /// \file Sequence/summstats/garud.hpp
 2 | /// \brief H1, H12, and H2/H1 stats
 3 | #ifndef SEQUENCE_SUMMSTATS_GARUD_HPP
 4 | #define SEQUENCE_SUMMSTATS_GARUD_HPP
 5 | 
 6 | #include <Sequence/VariantMatrix.hpp>
 7 | 
 8 | namespace Sequence
 9 | {
10 |   struct GarudStats
11 |   /*! 
12 |     Statistics from \cite Garud2015-ob
13 |     \note H1 = 1 - haplotype homozygosity, e.g. "H" from \cite Depaulis1998-ol
14 |     \ingroup popgenanalysis
15 |   */
16 |   {
17 |     double H1,H12,H2H1;
18 |     GarudStats();
19 |     GarudStats(const double, const double, const double);
20 |   };
21 | 
22 |   /*! \brief Calculate H1, H12, and H2/H1
23 |    * \param m A VariantMatrix
24 |    * \return GarudStats
25 |    *
26 |    * See \cite Garud2015-ob for details.
27 |    */
28 |   GarudStats garud_statistics(const VariantMatrix & m);
29 | }
30 | 
31 | #endif
32 | 


--------------------------------------------------------------------------------
/Sequence/summstats/generic.hpp:
--------------------------------------------------------------------------------
 1 | /// \file Sequence/summstats/generic.hpp
 2 | /// \brief Generic utilities for calculating summary statistics
 3 | #ifndef SEQUENCE_SUMMSTATS_GENERIC_HPP
 4 | #define SEQUENCE_SUMMSTATS_GENERIC_HPP
 5 | 
 6 | #include <unordered_map>
 7 | #include <cstdint>
 8 | 
 9 | namespace Sequence
10 | {
11 |     /*! \brief Calculate heterozygosity/diversity from count data
12 |      * \param counts a vector counts. 
13 |      * \param nsam the sample size
14 |      * \return diversity = 1 - homozygosity
15 |      */
16 |     double diversity_from_counts(
17 |         const std::unordered_map<std::int32_t, std::int32_t>& counts,
18 |         const std::size_t nsam);
19 | } // namespace Sequence
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/Sequence/summstats/ld.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef SEQUENCE_SUMMSTATS_LD_HPP__
 2 | #define SEQUENCE_SUMMSTATS_LD_HPP__
 3 | 
 4 | #include <cstdint>
 5 | #include <vector>
 6 | #include <Sequence/VariantMatrix.hpp>
 7 | 
 8 | namespace Sequence
 9 | {
10 |     struct TwoLocusCounts
11 |     {
12 |         std::int8_t i, j;
13 |         int n;
14 |         TwoLocusCounts(std::int8_t i_, std::int8_t j_, int n_);
15 |     };
16 | 
17 |     std::vector<TwoLocusCounts>
18 |     two_locus_haplotype_counts(const VariantMatrix& m, std::size_t sitei,
19 |                                const std::size_t sitej,
20 |                                const bool skip_missing);
21 | } // namespace Sequence
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/Sequence/summstats/lhaf.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef SEQUENCE_SUMMSTATS_LHAF_HPP
 2 | #define SEQUENCE_SUMMSTATS_LHAF_HPP
 3 | 
 4 | #include <cstdint>
 5 | #include <vector>
 6 | #include <Sequence/VariantMatrix.hpp>
 7 | 
 8 | namespace Sequence
 9 | {
10 |     /*! \brief l-Haf statistic of \cite Ronen2015-te
11 |     * \param m A VariantMatrix
12 |     * \param refstate The ancstral state
13 |     * \param l The power parameter
14 |     * \return vector of the statistic
15 |     * \ingroup popgenanalysis
16 |     */
17 |     std::vector<double> lhaf(const VariantMatrix &m,
18 |                              const std::int8_t refstate, const double l);
19 | } // namespace Sequence
20 | #endif
21 | 


--------------------------------------------------------------------------------
/Sequence/summstats/nSLiHS.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef SEQUENCE_SUMMSTATS_NSLIHS_HPP
 2 | #define SEQUENCE_SUMMSTATS_NSLIHS_HPP
 3 | 
 4 | #include <cstdint>
 5 | 
 6 | namespace Sequence
 7 | {
 8 |     struct nSLiHS
 9 |     /// Stores the results of nSL and iHS calculations.
10 |     /// See Sequence::nsl for details.
11 |     ///
12 |     /// \note This type is usually forward-declared in other headers,
13 |     /// meaning this header will need inclusion in relevant translation
14 |     /// units.
15 |     ///
16 |     /// \ingroup popgenanalysis
17 |     {
18 |         /// The nSL statistic \cite Ferrer-Admetlla2014-wa
19 |         double nsl;
20 |         /// The iHS statistic, calculated according to \cite Ferrer-Admetlla2014-wa
21 |         double ihs;
22 |         /// Count of non-reference,
23 |         /// non-missing allele.
24 |         std::int32_t core_count;
25 |     };
26 | } // namespace Sequence
27 | 
28 | #endif
29 | 


--------------------------------------------------------------------------------
/Sequence/summstats/nsl.hpp:
--------------------------------------------------------------------------------
 1 | /// \file Sequence/summstats/nsl.hpp
 2 | /// \brief nSL and iHS
 3 | #ifndef SEQUENCE_SUMMSTATS_NSL_HPP__
 4 | #define SEQUENCE_SUMMSTATS_NSL_HPP__
 5 | 
 6 | #include <vector>
 7 | #include <cstdint>
 8 | #include <Sequence/VariantMatrix.hpp>
 9 | #include "nSLiHS.hpp"
10 | 
11 | namespace Sequence
12 | {
13 | 
14 |     /*! \brief nSL and iHS statistics
15 |      * \param m A VariantMatrix
16 |      * \param core The index of the core site
17 |      * \param refstate The value of the reference/ancestral allelic state
18 |      *
19 |      * \return an nSLiHS object
20 |      * \ingroup popgenanalysis
21 |      *
22 |      * See nSL_from_ms.cc for example
23 |      *
24 |      * See \cite Ferrer-Admetlla2014-wa for details.
25 |      */
26 |     nSLiHS nsl(const VariantMatrix& m, const std::size_t core,
27 |                const std::int8_t refstate);
28 | 
29 |     /*! \brief nSL and iHS statistics
30 |      * \param m A VariantMatrix
31 |      * \param refstate The value of the reference/ancestral allelic state
32 |      *
33 |      * \return vector of nSLiHS objects (one for each site)
34 |      * \ingroup popgenanalysis
35 |      *
36 |      * This function differs from the version working 
37 |      * on a core site in that it uses an efficient
38 |      * method to dynamically update suffix lengths as each 
39 |      * core site is processed.  The result is a huge runtime
40 |      * reduction compared to calculating the statistic
41 |      * for each core site on its own.
42 |      *
43 |      * See \cite Ferrer-Admetlla2014-wa for details.
44 |      */
45 |     std::vector<nSLiHS> nsl(const VariantMatrix& m,
46 |                             const std::int8_t refstate);
47 | } // namespace Sequence
48 | 
49 | #endif
50 | 


--------------------------------------------------------------------------------
/Sequence/summstats/nslx.hpp:
--------------------------------------------------------------------------------
 1 | /// \file Sequence/summstats/nslx.hpp
 2 | /// \brief nSL and iHS
 3 | #ifndef SEQUENCE_SUMMSTATS_NSLX_HPP
 4 | #define SEQUENCE_SUMMSTATS_NSLX_HPP
 5 | 
 6 | #include <vector>
 7 | #include <cstdint>
 8 | #include <Sequence/VariantMatrix.hpp>
 9 | #include "nSLiHS.hpp"
10 | 
11 | namespace Sequence
12 | {
13 |     /*! \brief A variation on nSL/iHS 
14 |      * \param m A VariantMatrix
15 |      * \param refstate The ancestral state
16 |      * \param x Non-reference allele count
17 |      *
18 |      * \return vector of nSLiHS
19 |      *
20 |      * This variant on nSL only allows suffix lengths
21 |      * to be broken by variants where the derived 
22 |      * (non-refstate) allele is present <= \a x times.
23 |      *
24 |      * When \x is 1, this statistic is a proxy for the 
25 |      * SDS score of \cite Field2016-so.
26 |      */
27 |     std::vector<nSLiHS> nslx(const VariantMatrix& m,
28 |                              const std::int8_t refstate, const int x);
29 | } // namespace Sequence
30 | 
31 | #endif
32 | 


--------------------------------------------------------------------------------
/Sequence/summstats/nvariablesites.hpp:
--------------------------------------------------------------------------------
 1 | /// \file Sequence/summstats/nvariablesites.hpp
 2 | /// \brief Calculate total numbers of polymorphisms
 3 | #ifndef SEQUENCE_SUMMSTATS_NVARIABLESITES_HPP__
 4 | #define SEQUENCE_SUMMSTATS_NVARIABLESITES_HPP__
 5 | 
 6 | #include <cstdint>
 7 | #include <Sequence/AlleleCountMatrix.hpp>
 8 | 
 9 | namespace Sequence
10 | {
11 |     /*! \brief Number of polymorphic sites
12 |      *
13 |      * Returns the number of sites with more than one non-missing state
14 |      * \param m An AlleleCountMatrix
15 |      * \return std::uint32_t
16 |      * \ingroup popgenanalysis
17 |      */
18 |     std::uint32_t nvariable_sites(const AlleleCountMatrix& m);
19 | 
20 |     /*! \brief Number of bi-allelic sites
21 |      *
22 |      * Return the number of sites with exactly two non-missing states.
23 |      * \param m An AlleleCountMatrix
24 |      * \return std::uint32_t
25 |      * \ingroup popgenanalysis
26 |      */
27 |     std::uint32_t nbiallelic_sites(const AlleleCountMatrix& m);
28 | 
29 |     /*! \brief Total number of mutations in the sample
30 |      *
31 |      * Return \f$\sum_{i=0}^{i=m.nsites-1}I(i)\f$ where \f$I(i)\f$
32 |      * is \f$k_i - 1\f$ if \f$k_i\f$, the number of states at the \f$i^{th}\f$ site,
33 |      * is greater than one, and zero otherwise.
34 |      *
35 |      * \param m An AlleleCountMatrix
36 |      * \return std::uint32_t
37 |      * \ingroup popgenanalysis
38 |      */
39 |     std::uint32_t total_number_of_mutations(const AlleleCountMatrix& m);
40 | } // namespace Sequence
41 | 
42 | #endif
43 | 


--------------------------------------------------------------------------------
/Sequence/summstats/thetah.hpp:
--------------------------------------------------------------------------------
 1 | /// \file Sequence/summstats/thetah.hpp
 2 | /// \brief Fay and Wu's \f$\hat\theta_H\f$.
 3 | #ifndef SEQUENCE_SUMMSTATS_THETAH_HPP__
 4 | #define SEQUENCE_SUMMSTATS_THETAH_HPP__
 5 | 
 6 | #include <vector>
 7 | #include <Sequence/AlleleCountMatrix.hpp>
 8 | 
 9 | namespace Sequence
10 | {
11 |     /*! \brief Fay and Wu's \f$\hat\theta_H\f$.
12 |      * \param m An AlleleCountMatrix
13 |      * \param refstate The ancestral state
14 |      * \return double
15 |      *
16 |      * See \cite Fay2000-ef for details.
17 |      * \ingroup popgenanalysis
18 |      */
19 |     double thetah(const AlleleCountMatrix& ac, const std::int8_t refstate);
20 | 
21 |     /*! \brief Fay and Wu's \f$\hat\theta_H\f$.
22 |      * \param m a VariantMatrix
23 |      * \param refstate Vector of ancestral states.
24 |      * \return double
25 |      *
26 |      * See \cite Fay2000-ef for details.
27 |      * \ingroup popgenanalysis
28 |      */
29 |     double thetah(const AlleleCountMatrix& m,
30 |                   const std::vector<std::int8_t>& refstates);
31 | } // namespace Sequence
32 | 
33 | #endif
34 | 


--------------------------------------------------------------------------------
/Sequence/summstats/thetal.hpp:
--------------------------------------------------------------------------------
 1 | /// \file Sequence/summstats/thetal.hpp
 2 | /// \brief Zeng et al. \f$\hat\theta_L\f$
 3 | #ifndef SEQUENCE_SUMMSTATS_THETAL_HPP__
 4 | #define SEQUENCE_SUMMSTATS_THETAL_HPP__
 5 | 
 6 | #include <vector>
 7 | #include <Sequence/AlleleCountMatrix.hpp>
 8 | 
 9 | namespace Sequence
10 | {
11 |     /*! \brief Zeng et al. \f$\hat\theta_L\f$
12 |      * \param m An AlleleCountMatrix
13 |      * \param refstate The ancestral state
14 |      * \return double
15 |      *
16 |      * See \cite Zeng2006-is for details.
17 |      * \ingroup popgenanalysis
18 |      */
19 |     double thetal(const AlleleCountMatrix& ac, const std::int8_t refstate);
20 | 
21 |     /*! \brief Zeng et al. \f$\hat\theta_L\f$
22 |      * \param m An AlleleCountMatrix
23 |      * \param refstate Vector of ancestral states.
24 |      * \return double
25 |      *
26 |      * See \cite Zeng2006-is for details.
27 |      * \ingroup popgenanalysis
28 |      */
29 |     double thetal(const AlleleCountMatrix& m,
30 |                   const std::vector<std::int8_t>& refstates);
31 | } // namespace Sequence
32 | 
33 | #endif
34 | 


--------------------------------------------------------------------------------
/Sequence/summstats/thetapi.hpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 | \file summstats/thetapi.hpp
 3 | */
 4 | #ifndef SEQUENCE_SUMMSTATS_THETAPI_HPP__
 5 | #define SEQUENCE_SUMMSTATS_THETAPI_HPP__
 6 | 
 7 | #include <Sequence/AlleleCountMatrix.hpp>
 8 | 
 9 | namespace Sequence
10 | {
11 |     /*! \brief Mean pairwise differences
12 |      * \param m An AlleleCountMatrix
13 |      * \return Mean pairwise differences
14 |      * \note Calcuated as sum over one minus site homozygosity
15 |      *
16 |      * This function is included via Sequence/summstats.hpp,
17 |      * Sequence/summstats/classics.hpp or
18 |      * Sequence/summstats/thetapi.hpp
19 |      *
20 |      * See \cite Tajima1983-it for details.
21 |      * \ingroup popgenanalysis
22 |      */
23 |     double thetapi(const AlleleCountMatrix& ac);
24 | } // namespace Sequence
25 | 
26 | #endif
27 | 


--------------------------------------------------------------------------------
/Sequence/summstats/thetaw.hpp:
--------------------------------------------------------------------------------
 1 | /// \file Sequence/summstats/thetaw.hpp
 2 | /// \brief Watterson's theta
 3 | #ifndef SEQUENCE_SUMMSTATS_THETAW_HPP__
 4 | #define SEQUENCE_SUMMSTATS_THETAW_HPP__
 5 | 
 6 | #include <Sequence/AlleleCountMatrix.hpp>
 7 | 
 8 | namespace Sequence
 9 | {
10 |     /*! \brief Watterson's theta
11 |      * \param m An AlleleCountMatrix
12 |      * \returns Watterson's theta, a double
13 |      * 
14 |      * \note For a site with \f$k\f$ states,
15 |      * \f$k-1\f$ is added to the number of inferred mutations.
16 |      * In other words, the calculation is based on the total
17 |      * number of mutations.
18 |      *
19 |      * See \cite Watterson1975-ej for details.
20 |      * \ingroup popgenanalysis
21 |      */
22 |     double thetaw(const AlleleCountMatrix& ac);
23 | } // namespace Sequence
24 | 
25 | #endif
26 | 


--------------------------------------------------------------------------------
/Sequence/summstats/util.hpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * \file Sequence/summstats/util.hpp
 3 |  * \brief Helper functions for implementing summary statistics
 4 |  * 
 5 |  * This file must be included directly.  No other header file
 6 |  * includes it.
 7 | */
 8 | #ifndef SEQUENCE_SUMMSTATS_UTIl_HPP__
 9 | #define SEQUENCE_SUMMSTATS_UTIl_HPP__
10 | 
11 | #include <cstdint>
12 | #include <algorithm>
13 | 
14 | namespace Sequence
15 | {
16 |     template <typename T>
17 |     inline bool
18 |     all_missing(const T& t)
19 |     /// Returns true if all elements in t encode missing data.
20 |     /// T should be a model of a VariantMatrix, RowView, or ColumnView
21 |     {
22 |         return std::all_of(
23 |             t.begin(), t.end(),
24 |             [](const typename T::value_type v) { return v < 0; });
25 |     }
26 | } // namespace Sequence
27 | 
28 | #endif
29 | 


--------------------------------------------------------------------------------
/Sequence/typedefs.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | /*! \file typedefs.hpp
25 |   Typedefs used in the library are defined here.
26 |   Wherever possible, types from namespace std
27 |   are given forward declarations. 
28 |   @brief typedefs used by libsequence
29 | */
30 | #ifndef __SEQUENCE_TYPEDEFS_HPP
31 | #define __SEQUENCE_TYPEDEFS_HPP
32 | #include <vector>
33 | #include <utility>
34 | #include <string>
35 | namespace Sequence
36 | {
37 |   /*! 
38 |     A CodonUsageTable is a vector of pairs.  In each pair,
39 |     the first element is the codon, and the second element
40 |     is an integer counting the number of occurrences of 
41 |     the codon
42 |   */
43 |   typedef std::vector< std::pair<std::string,int> > CodonUsageTable;
44 | }
45 | #endif
46 | 


--------------------------------------------------------------------------------
/Sequence/variant_matrix/Makefile.am:
--------------------------------------------------------------------------------
1 | pkgincludedir=$(prefix)/include/Sequence/variant_matrix
2 | 
3 | pkginclude_HEADERS = filtering.hpp windows.hpp msformat.hpp
4 | 


--------------------------------------------------------------------------------
/Sequence/variant_matrix/filtering.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef SEQUENCE_VARIANT_MATRIX_FILTERING_HPP_
 2 | #define SEQUENCE_VARIANT_MATRIX_FILTERING_HPP_
 3 | 
 4 | #include <Sequence/VariantMatrix.hpp>
 5 | #include <Sequence/VariantMatrixViews.hpp>
 6 | #include <functional>
 7 | #include <cstdint>
 8 | 
 9 | namespace Sequence
10 | {
11 |     std::int32_t filter_sites(VariantMatrix &m,
12 |                               const std::function<bool(const RowView &)> &f);
13 | 
14 |     std::int32_t
15 |     filter_haplotypes(VariantMatrix &m,
16 |                       const std::function<bool(const ColView &)> &f);
17 | 
18 |     std::int32_t filter_sites(VariantMatrix &m,
19 |                               const std::function<bool(const ConstRowView &)> &f);
20 | 
21 |     std::int32_t
22 |     filter_haplotypes(VariantMatrix &m,
23 |                       const std::function<bool(const ConstColView &)> &f);
24 | }
25 | 
26 | #endif
27 | 


--------------------------------------------------------------------------------
/Sequence/variant_matrix/windows.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef SEQUENCE_VARIANT_MATRIX_WINDOWS_HPP
 2 | #define SEQUENCE_VARIANT_MATRIX_WINDOWS_HPP
 3 | 
 4 | #include <algorithm>
 5 | #include <vector>
 6 | #include <stdexcept>
 7 | #include <Sequence/VariantMatrix.hpp>
 8 | #include <Sequence/VariantMatrixViews.hpp>
 9 | 
10 | namespace Sequence
11 | {
12 |     /*! \brief Return a window from a VariantMatrix
13 |      * \param m A VariantMatrix
14 |      * \param beg Beginning of window
15 |      * \param end End of window
16 |      *
17 |      * \note The window intervals are open, [beg,end]
18 |      */
19 |     VariantMatrix make_window(const VariantMatrix& m, const double beg,
20 |                               const double end);
21 |     /*! \brief Return a slice from a VariantMatrix
22 |      * \param m A VariantMatrix
23 |      * \param beg Beginning of window
24 |      * \param end End of window
25 |      * \param i index of first haplotype to include
26 |      * \param j one past last haplotype to include
27 |      *
28 |      * The result is a variant matrix including positions [beg,end]
29 |      * and samples [i,j) from \a m.  Note that the sample interval is 
30 |      * half-open!
31 |      */
32 |     
33 |     VariantMatrix make_slice(const VariantMatrix& m, const double beg,
34 |                              const double end,
35 |                              const std::size_t i,
36 |                              const std::size_t j);
37 | } // namespace Sequence
38 | 
39 | #endif
40 | 


--------------------------------------------------------------------------------
/config-h.in.in:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/molpopgen/libsequence/ff1a4f698ed2fac40301ff466a6253c82889fd6d/config-h.in.in


--------------------------------------------------------------------------------
/config.h.in:
--------------------------------------------------------------------------------
 1 | /* config.h.in.  Generated from configure.ac by autoheader.  */
 2 | 
 3 | /* define if the compiler supports basic C++11 syntax */
 4 | #undef HAVE_CXX11
 5 | 
 6 | /* Define to 1 if you have the <dlfcn.h> header file. */
 7 | #undef HAVE_DLFCN_H
 8 | 
 9 | /* Define to 1 if you have the <inttypes.h> header file. */
10 | #undef HAVE_INTTYPES_H
11 | 
12 | /* Define to 1 if you have the `z' library (-lz). */
13 | #undef HAVE_LIBZ
14 | 
15 | /* Define to 1 if you have the <memory.h> header file. */
16 | #undef HAVE_MEMORY_H
17 | 
18 | /* Define to 1 if you have the <stdint.h> header file. */
19 | #undef HAVE_STDINT_H
20 | 
21 | /* Define to 1 if you have the <stdlib.h> header file. */
22 | #undef HAVE_STDLIB_H
23 | 
24 | /* Define to 1 if you have the <strings.h> header file. */
25 | #undef HAVE_STRINGS_H
26 | 
27 | /* Define to 1 if you have the <string.h> header file. */
28 | #undef HAVE_STRING_H
29 | 
30 | /* Define to 1 if you have the <sys/stat.h> header file. */
31 | #undef HAVE_SYS_STAT_H
32 | 
33 | /* Define to 1 if you have the <sys/types.h> header file. */
34 | #undef HAVE_SYS_TYPES_H
35 | 
36 | /* Define to 1 if you have the <unistd.h> header file. */
37 | #undef HAVE_UNISTD_H
38 | 
39 | /* Define to the sub-directory in which libtool stores uninstalled libraries.
40 |    */
41 | #undef LT_OBJDIR
42 | 
43 | /* Name of package */
44 | #undef PACKAGE
45 | 
46 | /* Define to the address where bug reports for this package should be sent. */
47 | #undef PACKAGE_BUGREPORT
48 | 
49 | /* Define to the full name of this package. */
50 | #undef PACKAGE_NAME
51 | 
52 | /* Define to the full name and version of this package. */
53 | #undef PACKAGE_STRING
54 | 
55 | /* Define to the one symbol short name of this package. */
56 | #undef PACKAGE_TARNAME
57 | 
58 | /* Define to the home page for this package. */
59 | #undef PACKAGE_URL
60 | 
61 | /* Define to the version of this package. */
62 | #undef PACKAGE_VERSION
63 | 
64 | /* Define to 1 if you have the ANSI C header files. */
65 | #undef STDC_HEADERS
66 | 
67 | /* Version number of package */
68 | #undef VERSION
69 | 
70 | /* Define to empty if `const' does not conform to ANSI C. */
71 | #undef const
72 | 


--------------------------------------------------------------------------------
/configure.ac:
--------------------------------------------------------------------------------
 1 | AC_PREREQ(2.59)
 2 | 
 3 | AC_INIT([libsequence], [1.9.8], [https://github.com/molpopgen/libsequence])
 4 | AC_CONFIG_SRCDIR([src/Seq/Seq.cc])
 5 | AC_CONFIG_SRCDIR([test/FastaIO.cc])
 6 | AC_CONFIG_SRCDIR([examples/msstats.cc])
 7 | AM_INIT_AUTOMAKE([subdir-objects])
 8 | AC_CONFIG_HEADERS([config.h])
 9 | 
10 | AC_CONFIG_MACRO_DIR([m4])
11 | 
12 | AC_PROG_CC
13 | AC_C_CONST
14 | AC_PROG_CXX
15 | AX_CXX_COMPILE_STDCXX_11([noext],[mandatory])
16 | 
17 | AM_MAINTAINER_MODE([disable])
18 | 
19 | LT_INIT
20 | AC_PROG_LIBTOOL
21 | AC_LANG(C++)
22 | AC_CONFIG_FILES([Makefile src/Makefile Sequence/Makefile Sequence/bits/Makefile Sequence/SummStatsDeprecated/Makefile
23 | 				 Sequence/variant_matrix/Makefile Sequence/summstats/Makefile test/Makefile examples/Makefile doc/libsequence.doxygen])
24 | 
25 | dnl AC_ARG_ENABLE(debug,
26 | dnl [  --enable-debug    Turn on debugging],
27 | dnl [case "${enableval}" in
28 | dnl   yes) debug=true ;;
29 | dnl   no)  debug=false ;;
30 | dnl   *) AC_MSG_ERROR(bad value ${enableval} for --enable-debug) ;;
31 | dnl esac],[debug=false])
32 | dnl AM_CONDITIONAL(DEBUG, test x$debug = xtrue)
33 | dnl 
34 | dnl AC_ARG_ENABLE(profiling,
35 | dnl [  --enable-profiling    Turn on profiling],
36 | dnl [case "${enableval}" in
37 | dnl   yes) profiling=true ;;
38 | dnl   no)  profiling=false ;;
39 | dnl   *) AC_MSG_ERROR(bad value ${enableval} for --enable-profiling) ;;
40 | dnl esac],[profiling=false])
41 | dnl AM_CONDITIONAL(PROFILING, test x$profiling = xtrue)
42 | 
43 | 
44 | dnl zlib header
45 | dnl AC_CHECK_HEADER(zlib.h,,[AC_MSG_ERROR([zlib headers missing - cannot continue])])
46 | 
47 | dnl zlib runtime
48 | dnl AC_CHECK_LIB([z],gzungetc,,[echo "zlib run time library not found";exit 1])
49 | 
50 | dnl boost unit test library
51 | AC_CHECK_HEADER(boost/test/unit_test.hpp, BUNITTEST=1,[echo "boost/test/unit_test.hpp not found. Unit tests will not be compiled."])
52 | AM_CONDITIONAL([BUNIT_TEST_PRESENT], test x$BUNITTEST = x1)
53 | 
54 | dnl check for Intel TBB headers that we need
55 | dnl AC_CHECK_HEADER(tbb/parallel_for.h,,[AC_MSG_ERROR([tbb/parallel_for.h not found.  Please either install Intel's TBB library or make sure your CXXFLAGS are set correctly])])
56 | dnl AC_CHECK_HEADER(tbb/task_scheduler_init.h,,[AC_MSG_ERROR([tbb/task_scheduler_init.h not found.  Please either install Intel's TBB library or make sure your CXXFLAGS are set correctly])])
57 | 
58 | dnl check for Intel TBB library
59 | dnl AC_CHECK_LIB([tbb],main,FOUND_TBB_RUNTIME=1,[AC_MSG_ERROR([tbb runtime library not found.  Please install Intel's TBB library.])])
60 | AC_OUTPUT
61 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
 1 | DOX=doxygen
 2 | all: 
 3 | 	$(DOX) libsequence.doxygen
 4 | #	cd latex && make
 5 | 	cd ..
 6 | clean:
 7 | 	rm -rf html latex
 8 | install:
 9 | 	install -d html /usr/share/doc/libsequence/html
10 | 	install html/*  /usr/share/doc/libsequence/html
11 | #	install  latex/refman.pdf /usr/share/doc/libsequence/libsequence-ref.pdf
12 | 


--------------------------------------------------------------------------------
/doc/images/2subs:
--------------------------------------------------------------------------------
 1 | #FIG 3.2
 2 | Landscape
 3 | Center
 4 | Metric
 5 | A4      
 6 | 100.00
 7 | Single
 8 | -2
 9 | 1200 2
10 | 2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
11 | 	1 1 1.00 90.00 120.00
12 | 	 5400 3150 4500 4320
13 | 2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
14 | 	1 1 1.00 90.00 120.00
15 | 	 6030 3150 6930 4230
16 | 2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
17 | 	1 1 1.00 90.00 120.00
18 | 	 4500 4500 5400 5580
19 | 2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
20 | 	1 1 1.00 90.00 120.00
21 | 	 7020 4500 6120 5670
22 | 4 0 0 50 0 0 14 0.0000 4 150 675 5400 5805 Codon2\001
23 | 4 0 0 50 0 0 14 0.0000 4 150 675 5400 3150 Codon1\001
24 | 4 0 0 50 0 0 14 0.0000 4 150 1215 4050 4500 Intermediate1\001
25 | 4 0 0 50 0 0 14 0.0000 4 150 1215 6300 4500 Intermediate2\001
26 | 4 0 0 50 0 0 14 0.0000 4 150 210 6480 3600 b3\001
27 | 4 0 0 50 0 0 14 0.0000 4 150 210 4770 3600 b1\001
28 | 4 0 0 50 0 0 14 0.0000 4 150 210 4680 5130 b2\001
29 | 4 0 0 50 0 0 14 0.0000 4 150 210 6750 5130 b4\001
30 | 


--------------------------------------------------------------------------------
/doc/images/2subs.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/molpopgen/libsequence/ff1a4f698ed2fac40301ff466a6253c82889fd6d/doc/images/2subs.jpg


--------------------------------------------------------------------------------
/doc/images/2subs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/molpopgen/libsequence/ff1a4f698ed2fac40301ff466a6253c82889fd6d/doc/images/2subs.pdf


--------------------------------------------------------------------------------
/doc/images/3subs.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/molpopgen/libsequence/ff1a4f698ed2fac40301ff466a6253c82889fd6d/doc/images/3subs.jpg


--------------------------------------------------------------------------------
/doc/images/3subs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/molpopgen/libsequence/ff1a4f698ed2fac40301ff466a6253c82889fd6d/doc/images/3subs.pdf


--------------------------------------------------------------------------------
/doc/md/tutorial.md:
--------------------------------------------------------------------------------
 1 | # Tutorial/overview
 2 | 
 3 | [TOC]
 4 | 
 5 | The citation for the library is \cite Thornton2003-wj
 6 | 
 7 | ## Creation and manipulation of a VariantMatrix
 8 | 
 9 | TBW
10 | 
11 | ## Calculation of summary statistics from a VariantMatrix
12 | 
13 | In libsequence, variation data are represented as a Sequence::VariantMatrix.
14 | The library provides functions for many standard analyses based on input
15 | data in this format.  The following headers are relevant:
16 | 
17 | 1. Sequence/summstats.hpp
18 | 
19 | Clicking on the above headers will reveal the existence of other headers.
20 | The intent is that you may only wish to bring some names into scope. 
21 | For example, if you implement a new analysis where mean pairwise differences
22 | are needed, you many include Sequence/summstats/thetapi.hpp instead of every 
23 | single summary statistic function provided by the library.
24 | 
25 | TBW
26 | 


--------------------------------------------------------------------------------
/examples/Makefile.am:
--------------------------------------------------------------------------------
 1 | check_PROGRAMS= ms_to_VariantMatrix \
 2 | 	nSL_from_ms \
 3 | 	mean_nSLx \
 4 | 	nSL_vs_nSLx \
 5 | 	baseComp valid_dna translateTest \
 6 | 	slidingWindow slidingWindow2 PolyTableIterators \
 7 | 	ufs \
 8 | 	msstats polySiteVector_test
 9 | 
10 | ms_to_VariantMatrix_SOURCES=ms_to_VariantMatrix.cc
11 | nSL_from_ms_SOURCES=nSL_from_ms.cc
12 | mean_nSLx_SOURCES=mean_nSLx.cc
13 | nSL_vs_nSLx_SOURCES=nSL_vs_nSLx.cc
14 | baseComp_SOURCES=baseComp.cc
15 | valid_dna_SOURCES=valid_dna.cc
16 | translateTest_SOURCES=translateTest.cc
17 | slidingWindow_SOURCES=slidingWindow.cc
18 | slidingWindow2_SOURCES=slidingWindow2.cc
19 | PolyTableIterators_SOURCES=PolyTableIterators.cc
20 | ufs_SOURCES=ufs.cc
21 | msstats_SOURCES=msstats.cc
22 | polySiteVector_test_SOURCES=polySiteVector_test.cc
23 | 
24 | AM_CXXFLAGS=
25 | AM_LIBS=-lsequence
26 | AM_LDFLAGS=-L../src/.libs -Wl,-rpath,../src/.libs
27 | 
28 | #if DEBUG
29 | #AM_CXXFLAGS+=-g
30 | #else
31 | #AM_CXXFLAGS+=-DNDEBUG
32 | #endif
33 | #
34 | #if PROFILING
35 | #PROFILE= -pg
36 | #else
37 | #PROFILE=
38 | #endif
39 | 
40 | LIBS+=$(AM_LIBS)
41 | 


--------------------------------------------------------------------------------
/examples/Makefile.old:
--------------------------------------------------------------------------------
 1 | #CC=cc
 2 | #CXX=c++
 3 | CFLAGS = -O3 -Wall -pedantic -ansi -Wtraditional -I..
 4 | CXXFLAGS = -O3 -Wall -W -I.. -std=c++11
 5 | LIBS= -L/usr/local/lib -Wl,-rpath -Wl,/usr/local/lib -lsequence -lz
 6 | #all: codons.o gestimator.o msstats.o int_handler.o getopt.o getopt1.o baseComp.o valid_dna.o FastaExplicit.o translateTest.o slidingWindow.o slidingWindow2.o PolyTableIterators.o correlations.o critical_values.o ufs.o ms--.o msbeta.o freerec.o bottleneck.o fragments.o test_SimDataIO.o
 7 | #all: codons.o msstats.o int_handler.o baseComp.o valid_dna.o FastaExplicit.o translateTest.o slidingWindow.o slidingWindow2.o PolyTableIterators.o correlations.o critical_values.o ufs.o ms--.o msbeta.o freerec.o bottleneck.o fragments.o test_SimDataIO.o
 8 | all: codons.o msstats.o int_handler.o baseComp.o FastaExplicit.o translateTest.o slidingWindow.o slidingWindow2.o PolyTableIterators.o correlations.o critical_values.o ufs.o ms--.o freerec.o bottleneck.o fragments.o test_SimDataIO.o valid_dna.o Ptable_test.o
 9 | 	$(CXX) $(CXXFLAGS)   ufs.o -o ufs $(LDFLAGS) $(LIBS)
10 | 	$(CXX) $(CXXFLAGS)   codons.o -o codons $(LDFLAGS) $(LIBS)
11 | 	$(CXX) $(CXXFLAGS)   msstats.o -o msstats $(LDFLAGS) $(LIBS)
12 | #	$(CXX) $(CXXFLAGS)   gestimator.o int_handler.o getopt.o getopt1.o -o gestimator $(LDFLAGS) $(LIBS)
13 | 	$(CXX) $(CXXFLAGS)   baseComp.o -o baseComp $(LDFLAGS) $(LIBS)
14 | 	$(CXX) $(CXXFLAGS)   FastaExplicit.o -o FastaExplicit $(LDFLAGS) $(LIBS)
15 | 	$(CXX) $(CXXFLAGS)   translateTest.o -o translateTest $(LDFLAGS) $(LIBS)
16 | 	$(CXX) $(CXXFLAGS)   slidingWindow.o -o slidingWindow $(LDFLAGS) $(LIBS)
17 | 	$(CXX) $(CXXFLAGS)   slidingWindow2.o -o slidingWindow2 $(LDFLAGS) $(LIBS)
18 | 	$(CXX) $(CXXFLAGS)   PolyTableIterators.o -o PolyTableIterators $(LDFLAGS) $(LIBS)
19 | 	$(CXX) $(CXXFLAGS)   correlations.o -o correlations  $(LDFLAGS) $(LIBS)
20 | 	$(CXX) $(CXXFLAGS)   critical_values.o -o critical_values $(LDFLAGS) $(LIBS)
21 | 	$(CXX) $(CXXFLAGS)   ms--.o -o ms-- $(LDFLAGS) $(LIBS) 
22 | #	$(CXX) $(CXXFLAGS)   msbeta.o -o msbeta $(LDFLAGS) $(LIBS) 
23 | 	$(CXX) $(CXXFLAGS)   freerec.o -o freerec $(LDFLAGS) $(LIBS) 
24 | 	$(CXX) $(CXXFLAGS)   bottleneck.o -o bottleneck $(LDFLAGS) $(LIBS) 
25 | 	$(CXX) $(CXXFLAGS)   fragments.o -o fragments $(LDFLAGS) $(LIBS) 
26 | 	$(CXX) $(CXXFLAGS)   valid_dna.o -o valid_dna $(LDFLAGS) $(LIBS) 
27 | 	$(CXX) $(CXXFLAGS)   test_SimDataIO.o -o test_SimDataIO $(LDFLAGS) $(LIBS)
28 | 	$(CXX) $(CXXFLAGS)   Ptable_test.o -o Ptable_test $(LDFLAGS) $(LIBS)
29 | 
30 | clean:
31 | 	rm -f *.o codons msstats baseComp valid_dna FastaExplicit translateTest \
32 | 	slidingWindow slidingWindow2 PolyTableIterators  correlations \
33 | 	critical_values ufs ms-- msbeta freerec bottleneck fragments
34 | 
35 | 


--------------------------------------------------------------------------------
/examples/int_handler.cc:
--------------------------------------------------------------------------------
 1 | #include "int_handler.hpp"
 2 | #include <iostream>
 3 | void cntrl_c_handler(int sig)
 4 | {
 5 |   char answer;
 6 |   std::cerr << "Interrupt signal received.\n";
 7 |   std::cerr << "Do you really want to quit [y or n]?\n";
 8 |   std::cin >> answer;
 9 |   switch (answer)
10 |     {
11 |     case 'Y':
12 |       exit(0);
13 |       break;
14 |     case 'y':
15 |       exit(0);
16 |       break;
17 |     default:
18 |       signal(SIGINT,cntrl_c_handler);
19 |       std::cerr << "continuing"<<endl;
20 |       break;
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/examples/int_handler.hpp:
--------------------------------------------------------------------------------
1 | #include <signal.h>
2 | #include <cstdio>
3 | #include <cstdlib>
4 | 
5 | using namespace std;
6 | 
7 | void cntrl_c_handler(int sig);
8 | 


--------------------------------------------------------------------------------
/examples/mean_nSLx.cc:
--------------------------------------------------------------------------------
 1 | /*! \include mean_nSLx.cc */
 2 | #include <cmath>
 3 | #include <iostream>
 4 | #include <cassert>
 5 | #include <cstdlib>
 6 | #include <Sequence/VariantMatrix.hpp>
 7 | #include <Sequence/VariantMatrixViews.hpp>
 8 | #include <Sequence/variant_matrix/msformat.hpp>
 9 | #include <Sequence/summstats/nslx.hpp>
10 | 
11 | int
12 | main(int argc, char** argv)
13 | {
14 |     int x = std::atoi(argv[1]);
15 |     while (!std::cin.eof())
16 |         {
17 |             auto vm = Sequence::from_msformat(std::cin);
18 |             auto nsl_stats = Sequence::nslx(vm, 0, x);
19 |             double sum = 0.0;
20 |             unsigned n = 0;
21 |             for (auto& i : nsl_stats)
22 |                 {
23 |                     if (std::isfinite(i.nsl))
24 |                         {
25 |                             sum += i.nsl;
26 |                             ++n;
27 |                         }
28 |                 }
29 |             std::cout << sum / static_cast<double>(n) << '\n';
30 |         }
31 | }
32 | 


--------------------------------------------------------------------------------
/examples/ms_to_VariantMatrix.cc:
--------------------------------------------------------------------------------
 1 | /*! \include ms_to_VariantMatrix.cc */
 2 | #include <iostream>
 3 | #include <Sequence/VariantMatrix.hpp>
 4 | #include <Sequence/VariantMatrixViews.hpp>
 5 | #include <Sequence/variant_matrix/msformat.hpp>
 6 | 
 7 | int
 8 | main(int argc, char** argv)
 9 | {
10 |     do
11 |         {
12 |             auto vm = Sequence::from_msformat(std::cin);
13 |             Sequence::to_msformat(vm, std::cout);
14 |             std::cout << '\n';
15 |         }
16 |     while (!std::cin.eof());
17 | }
18 | 


--------------------------------------------------------------------------------
/examples/msstats.cc:
--------------------------------------------------------------------------------
 1 | /* 
 2 |    msstats - read data from ms via stdin, calculate common summary statistics
 3 | 
 4 |    Copyright (C) 2002 Kevin Thornton
 5 | 
 6 |    This program is free software; you can redistribute it and/or modify
 7 |    it under the terms of the GNU General Public License as published by
 8 |    the Free Software Foundation; either version 2, or (at your option)
 9 |    any later version.
10 | 
11 |    This program is distributed in the hope that it will be useful,
12 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
13 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 |    GNU General Public License for more details.
15 | 
16 |    You should have received a copy of the GNU General Public License
17 |    along with this program; if not, write to the Free Software Foundation,
18 |    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  
19 | 
20 | */
21 | 
22 | #include <iostream>
23 | #include <vector>
24 | #include <Sequence/SimParams.hpp>
25 | #include <Sequence/SimData.hpp>
26 | #include <Sequence/PolySIM.hpp>
27 | #include <cstdio>
28 | 
29 | using namespace std;
30 | using namespace Sequence;
31 | 
32 | int main(int argc, char *argv[]) 
33 | {
34 |   SimParams p;
35 |   cin >> p;
36 |   SimData d;
37 |   std::ios_base::sync_with_stdio(true);
38 | 
39 |   int rv;
40 |   while( (rv = d.fromfile(stdin)) != EOF )
41 |     {
42 |       PolySIM P(&d);
43 |       cout <<P.NumPoly()  << '\t' 
44 | 	   << P.ThetaW()  << '\t' 
45 | 	   << P.ThetaPi() << '\t'
46 | 	   << P.ThetaH()  << '\t' 
47 | 	   << P.TajimasD() << '\t'
48 | 	   << P.FuLiD() << '\t'
49 | 	   << P.FuLiF() << '\t'
50 | 	   << P.FuLiDStar() << '\t'
51 | 	   << P.FuLiFStar() << endl;
52 |     }
53 | }
54 | 


--------------------------------------------------------------------------------
/examples/nSL_from_ms.cc:
--------------------------------------------------------------------------------
 1 | /*! \include nSL_from_ms.cc */
 2 | #include <cmath>
 3 | #include <iostream>
 4 | #include <cassert>
 5 | #include <Sequence/VariantMatrix.hpp>
 6 | #include <Sequence/VariantMatrixViews.hpp>
 7 | #include <Sequence/variant_matrix/msformat.hpp>
 8 | #include <Sequence/summstats/nsl.hpp>
 9 | 
10 | int
11 | main(int argc, char** argv)
12 | {
13 |     auto vm = Sequence::from_msformat(std::cin);
14 |     auto nsl_stats = Sequence::nsl(vm, 0);
15 |     //assert(nsl_stats.size() == vm.nsites);
16 |     //for (std::size_t i = 0; i < vm.nsites; ++i)
17 |     //    {
18 |     //        auto n = Sequence::nsl(vm, i, 0);
19 |     //        if (!std::isnan(n.nsl))
20 |     //            {
21 |     //                std::cout << vm.positions[i] << ' ' << n.nsl << ' '
22 |     //                          << n.ihs << ' ' << n.core_count << ' '
23 |     //                          << nsl_stats[i].nsl << ' ' << nsl_stats[i].ihs
24 |     //                          << ' ' << nsl_stats[i].core_count << '\n';
25 |     //            }
26 |     //        else
27 |     //            {
28 |     //                assert(std::isnan(nsl_stats[i].nsl));
29 |     //            }
30 |     //    }
31 | }
32 | 


--------------------------------------------------------------------------------
/examples/nSL_vs_nSLx.cc:
--------------------------------------------------------------------------------
 1 | /*! \include nSL_from_ms.cc */
 2 | #include <cmath>
 3 | #include <iostream>
 4 | #include <cassert>
 5 | #include <Sequence/VariantMatrix.hpp>
 6 | #include <Sequence/VariantMatrixViews.hpp>
 7 | #include <Sequence/variant_matrix/msformat.hpp>
 8 | #include <Sequence/summstats/nsl.hpp>
 9 | #include <Sequence/summstats/nslx.hpp>
10 | 
11 | int
12 | main(int argc, char** argv)
13 | {
14 |     int x = std::atoi(argv[1]);
15 |     auto vm = Sequence::from_msformat(std::cin);
16 |     auto nsl_stats = Sequence::nslx(vm, 0, x);
17 |     for(auto & s : nsl_stats){std::cout << s.nsl << ' ' << s.ihs << ' ' << s.core_count << '\n'; }
18 | }
19 | 
20 | 


--------------------------------------------------------------------------------
/examples/slidingWindow.cc:
--------------------------------------------------------------------------------
 1 | #include <Sequence/PolySites.hpp>
 2 | #include <Sequence/Fasta.hpp>
 3 | #include <Sequence/Alignment.hpp>
 4 | #include <Sequence/PolySNP.hpp>
 5 | #include <Sequence/PolyTableSlice.hpp>
 6 | #include <vector>
 7 | #include <iostream>
 8 | 
 9 | /*! \include slidingWindow.cc */
10 | 
11 | //Read in a data set of aligned sequence in Fasta
12 | //format.  Create a polymorphism table.  Calculate
13 | //Tajima's D for the whole table.  Then, run a sliding
14 | //window of 1 segregating site (with a jump size of 1)
15 | //along the SNP table, and use that to calculate Tajima's
16 | //D for each site.
17 | 
18 | //This is a somewhat contrived example, but it illustrates
19 | //the sliding window code.
20 | 
21 | int main(int argc, char **argv)
22 | {
23 |   const char * infilename = argv[1];
24 |   std::vector<Sequence::Fasta> data;
25 |   Sequence::Alignment::GetData(data,infilename);
26 | 
27 |   if ( Sequence::Alignment::IsAlignment(data) && 
28 |        Sequence::Alignment::validForPolyAnalysis(data.begin(),data.end()) )
29 |     {
30 |       Sequence::PolySites SNPtable(data);
31 |       if (! SNPtable.empty())
32 | 	{
33 | 	  Sequence::PolySNP analyzeRegion(&SNPtable);
34 | 	  std::cout << "Tajima's D for the region is: "<< analyzeRegion.TajimasD() << std::endl;
35 | 	  
36 | 	  Sequence::PolyTableSlice<Sequence::PolySites> windows(SNPtable.sbegin(),
37 | 								SNPtable.send(),1u,1u);
38 | 	  Sequence::PolyTableSlice<Sequence::PolySites>::const_iterator itr = windows.cbegin();
39 | 	  while(itr < windows.cend())
40 | 	    {
41 | 	      Sequence::PolySites window = windows.get_slice(itr);
42 | 	      Sequence::PolySNP analyzeWindow(&window);
43 | 	      std::cout << "D for window " 
44 | 			<< itr-windows.cbegin()
45 | 			<< " is: "
46 | 			<< analyzeWindow.TajimasD()
47 | 			<< std::endl;
48 | 	      ++itr;
49 | 	    }
50 | 	}
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/examples/slidingWindow2.cc:
--------------------------------------------------------------------------------
 1 | #include <Sequence/Fasta.hpp>
 2 | #include <Sequence/Alignment.hpp>
 3 | #include <Sequence/PolySites.hpp>
 4 | #include <Sequence/PolySNP.hpp>
 5 | #include <Sequence/PolyTableSlice.hpp>
 6 | #include <iostream>
 7 | 
 8 | 
 9 | //these are made explicit for example purposes
10 | using std::vector;
11 | using std::cout;
12 | using std::endl;
13 | 
14 | //run a non-overlapping 100bp window over a SNP data set
15 | 
16 | int main(int argc, char **argv)
17 | {
18 |   const char *infilename = argv[1];
19 | 
20 |   vector<Sequence::Fasta> data;
21 |   
22 |   Sequence::Alignment::GetData(data,infilename);
23 | 
24 |   if ( Sequence::Alignment::IsAlignment(data) &&
25 |        Sequence::Alignment::validForPolyAnalysis(data.begin(),data.end()) )
26 |     {
27 |       const unsigned alignmentLength = data[0].length();
28 | 
29 |       Sequence::PolySites SNPtable(data);
30 | 
31 |       Sequence::PolySNP analyzeRegion(&SNPtable);
32 | 
33 |       cout << "Tajima's D for the whole dataset is: "
34 | 	   << analyzeRegion.TajimasD()
35 | 	   << endl;
36 | 
37 |       Sequence::PolyTableSlice<Sequence::PolySites> windows(SNPtable.sbegin(),
38 | 							    SNPtable.send(),
39 | 							    100, //window length (bp)
40 | 							    100, //step size (bp)
41 | 							    alignmentLength);
42 |       
43 |       for(unsigned i = 0 ; i < windows.size() ; ++i)
44 | 	{
45 | 	  Sequence::PolySites window(windows[i]); //use copy constructor
46 | 	  Sequence::PolySNP analyzeWindow(&window);
47 | 	  cout << "Tajima's D for window "
48 | 	       << i
49 | 	       << " is: "
50 | 	       << analyzeWindow.TajimasD() 
51 | 	       << endl;
52 | 	}
53 |     }
54 | }
55 | 


--------------------------------------------------------------------------------
/examples/test_SimDataIO.cc:
--------------------------------------------------------------------------------
 1 | #include <Sequence/SimDataIO.hpp>
 2 | #include <iostream>
 3 | #include <fstream>
 4 | 
 5 | using namespace std;
 6 | using namespace Sequence;
 7 | 
 8 | void print_problems( const SimData & d,
 9 | 		     const SimData & d2 );
10 | 
11 | int main( int argc, char ** argv )
12 | {
13 |   SimData d;
14 |   while(!cin.eof())
15 |     {
16 |       cin >> d >> ws;
17 | 
18 |       cerr << "Writing in gzip\n";
19 |       gzFile gzf = gzopen("test_zlib_out.gz","w");
20 |       write_SimData_gz(gzf, d);
21 |       gzclose(gzf);
22 | 
23 |       cerr << "Reading from gzip\n";
24 |       //now, try to read it
25 |       gzf = gzopen("test_zlib_out.gz","r");
26 |       SimData d2 = read_SimData_gz(gzf);
27 |       gzclose(gzf);
28 | 
29 |       cerr << "Writing in binary\n";
30 |       //write it in binary
31 |       ofstream obin("test_binary_out.bin",ios::binary);
32 |       write_SimData_binary(obin,d);
33 |       obin.close();
34 | 
35 |       cerr << "Reading from binary\n";
36 |       //read it
37 |       ifstream ibin("test_binary_out.bin",ios::binary);
38 |       SimData d3 = read_SimData_binary(ibin);
39 |       ibin.close();
40 |   
41 |       cerr << "Writing to binary + gzip\n";
42 |       gzf = gzopen("test_zlib_out.bin.gz","bw");
43 |       write_SimData_gz(gzf, d, true);
44 |       gzclose(gzf);
45 | 
46 |       cerr << "Reading from binary + gzip\n";
47 |       //now, try to read it
48 |       gzf = gzopen("test_zlib_out.bin.gz","r");
49 |       SimData d4 = read_SimData_gz(gzf,true);
50 |       gzclose(gzf);
51 | 
52 |       if( d != d2 )
53 | 	{
54 | 	  cerr << "Error: d != d2\n";
55 | 	  print_problems(d,d2);
56 | 	}
57 |       if(d != d3)
58 | 	{
59 | 	  cerr << "Error: d != d3\n";
60 | 	  print_problems(d,d3);
61 | 	}
62 |       if(d != d4)
63 | 	{
64 | 	  cerr << "Error: d != d4\n";
65 | 	  print_problems(d,d4);
66 | 	}
67 |     }
68 | }
69 | 
70 | void print_problems( const SimData & d,
71 | 		     const SimData & d2 )
72 | {
73 |   for( unsigned i = 0 ; i < d.numsites() ; ++i )
74 |     {
75 |       if( d.position(i) != d2.position(i) )
76 | 	{
77 | 	  cerr << "Position " << i << ": " << d.position(i) << ' ' << d2.position(i) << '\n';
78 | 	}
79 |       for( unsigned i = 0 ; i < d.size() ; ++i )
80 | 	{
81 | 	  if( d[i] != d2[i] )
82 | 	    {
83 | 	      cerr << "Haplotype " << i << ": " << d[i] << "\n\n" << d2[i] << '\n';
84 | 	    }
85 | 	}
86 |     }
87 | }
88 | 


--------------------------------------------------------------------------------
/examples/translateTest.cc:
--------------------------------------------------------------------------------
 1 | #include <Sequence/Translate.hpp>
 2 | #include <iostream>
 3 | #include <string>
 4 | 
 5 | const char alphabet[4] = {'A','G','C','T'};
 6 | 
 7 | int main(int argc, char **argv)
 8 | {
 9 |   std::string codon;
10 |   codon.resize(3);
11 |   for (unsigned first = 0 ; first < 4 ; ++first)
12 |     {
13 |         for (unsigned second = 0 ; second < 4 ; ++second)
14 | 	  {
15 | 	    for (unsigned third = 0 ; third < 4 ; ++third)
16 | 	      {
17 | 		codon[0] = alphabet[first];
18 | 		codon[1] = alphabet[second];
19 | 		codon[2] = alphabet[third];
20 | 		std::cout << codon
21 | 			  << '\t' 
22 | 			  << Sequence::Translate(codon.begin(),codon.end())
23 | 			  << std::endl;
24 | 	      }
25 | 	  }
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/examples/valid_dna.cc:
--------------------------------------------------------------------------------
 1 | /*! \include valid_dna.cc */
 2 | #include <Sequence/Fasta.hpp>
 3 | #include <Sequence/SeqRegexes.hpp>
 4 | #include <Sequence/SeqAlphabets.hpp>
 5 | #include <fstream>
 6 | #include <iostream>
 7 | #include <algorithm>
 8 | 
 9 | int main(int argc, char **argv)
10 | {
11 |   std::ifstream in(argv[1]);
12 |   Sequence::Fasta seq;
13 |   while (in >> seq)
14 |     {
15 |       std::cout << Sequence::validSeq(seq.begin(),seq.end())
16 | 		<< '\t'
17 | 		<< Sequence::validSeq(seq.begin(),seq.end(),Sequence::full_dna_alphabet)
18 | 		<< '\t'
19 | 		<< (std::find_if(seq.begin(),seq.end(),Sequence::ambiguousNucleotide())
20 | 		    != seq.end())
21 | 		<<'\n';
22 |     }
23 | }
24 | 


--------------------------------------------------------------------------------
/index.md:
--------------------------------------------------------------------------------
 1 | # The libsequence home page
 2 | 
 3 | This is the homepage for __libsequence__, which is a C++ library for evolutionary genetics.
 4 | 
 5 | ## Versions
 6 | 
 7 | The current "master" branch of the library is 1.8.3.  This version represents a major change to the code base.  The biggest change is that the library no longer requires [boost](http://www.boost.org) or the [GSL](http://gnu.org/software/gsl) libraries.  It still requires [zlib](http://zlib.net).  See the Revision History section in the README, which you can find at the [GitHub repo](https://github.com/molpopgen/libsequence) for the project.
 8 | 
 9 | ## Source code
10 | 
11 | The library code is [here](https://github.com/molpopgen/libsequence)
12 | 
13 | ## Library documentation
14 | 
15 | [Reference manual](doc/html/index.html)
16 | 
17 | ## Programs using the library
18 | 
19 | There are lots of these:
20 | 
21 | * [Thornton lab software page](http://molpopgen.org/software.html)
22 | * [Kevin Thornton](https://github.com/molpopgen)'s GitHub account
23 | * [Thornton lab](https://github.com/ThorntonLab) GitHub account
24 | 
25 | Some specific projects using the library
26 | 
27 | * [pylibseq](http://molpopgen.github.io/pylibseq) - A Python interface to libsequence.
28 | * [fwdpp](http://molpopgen.github.io/fwdpp) - Forward simulation library described in Thornton (2014, doi: 10.1534/genetics.114.165019)
29 | * [pecnv](https://guthub.com/molpopgen/pecnv) - code from Rogers et al. (2014, doi: 10.1093/molbev/msu124) and Cridland et al (2013, doi: 10.1093/molbev/mst129) for detecting structural variants and transposable element insertions
30 | * [baldwin_brown_2014](https://github.com/molpopgen/baldwin_brown_2014) - Code from Baldwin-Brown et al. (2014, doi: 10.1093/molbev/msu048) for simulating "evolve and resequence" experiments
31 | * [analysis](https://github.com/molpopgen/analysis)
32 | * [sequtils](https://github.com/molpopgen/sequtils)
33 | * [msstats](https://github.com/molpopgen/msstats)
34 | * [sweepsims](https://github.com/molpopgen/sweepsims) - selective sweep code from Jensen et al. (2008, doi: 10.1371/journal.pgen.1000198), Thornton and Jensen 2007, doi: 10.1534/genetics.106.064642)
35 | * [newgene](https://github.com/molpopgen/newgene) - copy-number variant simulation code from Thornton (2007, doi: 10.1534/genetics.107.074948)
36 | 
37 | ## Citation
38 | 
39 | If you use __libsequence__ for yor research, or programs depending up on it, please cite the following paper:
40 | 
41 | * Thornton, K. (2003) libsequence, a C++ class library for evolutionary genetic analysis. Bioinformatics __19__(17): 2325-2327  PMID 14630667 [Manuscript](http://bioinformatics.oxfordjournals.org/content/19/17/2325.short), [Software](https://github.com/molpopgen/libsequence)
42 | 
43 | 


--------------------------------------------------------------------------------
/init_autotools.sh:
--------------------------------------------------------------------------------
1 | #!sh
2 | libtoolize --force --copy
3 | autoreconf -fi
4 | autoheader
5 | automake --add-missing --copy


--------------------------------------------------------------------------------
/m4/ltversion.m4:
--------------------------------------------------------------------------------
 1 | # ltversion.m4 -- version numbers			-*- Autoconf -*-
 2 | #
 3 | #   Copyright (C) 2004 Free Software Foundation, Inc.
 4 | #   Written by Scott James Remnant, 2004
 5 | #
 6 | # This file is free software; the Free Software Foundation gives
 7 | # unlimited permission to copy and/or distribute it, with or without
 8 | # modifications, as long as this notice is preserved.
 9 | 
10 | # @configure_input@
11 | 
12 | # serial 3337 ltversion.m4
13 | # This file is part of GNU Libtool
14 | 
15 | m4_define([LT_PACKAGE_VERSION], [2.4.2])
16 | m4_define([LT_PACKAGE_REVISION], [1.3337])
17 | 
18 | AC_DEFUN([LTVERSION_VERSION],
19 | [macro_version='2.4.2'
20 | macro_revision='1.3337'
21 | _LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?])
22 | _LT_DECL(, macro_revision, 0)
23 | ])
24 | 


--------------------------------------------------------------------------------
/src/Coalescent/CoalescentMutation.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 |   Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 |   Remove the brackets to email me.
 6 | 
 7 |   This file is part of libsequence.
 8 | 
 9 |   libsequence is free software: you can redistribute it and/or modify
10 |   it under the terms of the GNU General Public License as published by
11 |   the Free Software Foundation, either version 3 of the License, or
12 |   (at your option) any later version.
13 | 
14 |   libsequence is distributed in the hope that it will be useful,
15 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
16 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 |   GNU General Public License for more details.
18 | 
19 |   You should have received a copy of the GNU General Public License
20 |   long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | #include <Sequence/Coalescent/Mutation.hpp>
25 | 
26 | namespace Sequence
27 | {
28 |   namespace coalsim {
29 |     void output_gametes(FILE * fp,
30 | 			const unsigned & segsites,
31 | 			const unsigned & nsam,
32 | 			const gamete_storage_type & gametes)
33 |     /*!
34 |       @brief Write an object of type gamete_storage type to a C-style file stream
35 |       This function is used when you need to output simulated gametes using a
36 |       method faster than the operator<< for class SimData.
37 |       \param fp pointer to an open C-style output stream
38 |       \param segsites the number of segregating sites in \a gametes
39 |       \param nsam the number of individuals in \a gametes
40 |       \param gametes the simulated sample.  Must be allocated to hold at least 
41 |       \a segsites positions, and \a nsam strings of length \a segsites
42 |     */
43 |     {
44 |       fprintf(fp,"//\n");
45 |       if ( segsites > 0 )
46 | 	{
47 | 	  fprintf(fp,"segsites: %u\npositions: ",segsites);
48 | 	  for(unsigned i=0;i<segsites;++i)
49 | 	    {
50 | 	      fprintf(fp,"%f ",gametes.first[i]);
51 | 	    }
52 | 	  fprintf(fp,"\n");
53 | 	  for(unsigned i=0;i<nsam;++i)
54 | 	    {
55 | 	      for(unsigned j=0;j<segsites;++j)
56 | 		{
57 | 		  fprintf(fp,"%c",gametes.second[i][j]);
58 | 		}
59 | 	      fprintf(fp,"\n");
60 | 	    }
61 | 	}
62 |       else
63 | 	{
64 | 	  fprintf(fp,"segsites: 0\n");
65 | 	}
66 |     }
67 |   }
68 | }
69 | 


--------------------------------------------------------------------------------
/src/Makefile.am:
--------------------------------------------------------------------------------
 1 | lib_LTLIBRARIES=libsequence.la
 2 | bin_PROGRAMS=libsequenceConfig
 3 | 
 4 | libsequenceConfig_SOURCES=libsequenceConfig.cc
 5 | 
 6 | libsequence_la_SOURCES=  Grantham.cc\
 7 | 	PathwayHelper.cc\
 8 | 	summstats_deprecated/Recombination.cc\
 9 | 	SimParams.cc\
10 | 	Translate.cc\
11 | 	Comeron95.cc\
12 | 	GranthamWeights.cc\
13 | 	summstats_deprecated/PolySNP.cc\
14 | 	summstats_deprecated/PolySIM.cc\
15 | 	RedundancyCom95.cc\
16 | 	SingleSub.cc\
17 | 	TwoSubs.cc\
18 | 	stateCounter.cc\
19 | 	summstats_deprecated/FST.cc\
20 | 	Comparisons.cc\
21 | 	SimpleSNP.cc\
22 | 	PolyTable.cc\
23 | 	PolyTableFunctions.cc\
24 | 	Seq/Seq.cc\
25 | 	ComplementBase.cc\
26 | 	Sites.cc\
27 | 	Unweighted.cc\
28 | 	Seq/Fasta.cc\
29 | 	Seq/fastq.cc\
30 | 	Kimura80.cc\
31 | 	PolySites.cc\
32 | 	SimData.cc\
33 | 	ThreeSubs.cc\
34 | 	CodonTable.cc\
35 | 	Specializations.cc\
36 | 	SeqConstants.cc\
37 | 	shortestPath.cc\
38 | 	summstats_deprecated/HKA.cc\
39 | 	summstats_deprecated/Snn.cc\
40 | 	polySiteVector.cc \
41 | 	summstats_deprecated/SummStats.cc \
42 | 	summstats_deprecated/nSL.cc \
43 | 	summstats_deprecated/Garud.cc\
44 | 	SeqAlphabets.cc \
45 | 	summstats_deprecated/lHaf.cc \
46 | 	variant_matrix/VariantMatrix.cc \
47 | 	variant_matrix/VariantMatrixViews.cc \
48 | 	variant_matrix/AlleleCountMatrix.cc \
49 | 	variant_matrix/StateCounts.cc \
50 | 	variant_matrix/filtering.cc \
51 | 	variant_matrix/windows.cc \
52 | 	variant_matrix/capsule.cc \
53 | 	variant_matrix/nonowningcapsules.cc \
54 | 	summstats/thetapi.cc \
55 | 	summstats/thetaw.cc \
56 | 	summstats/tajd.cc \
57 | 	summstats/thetah_thetal.cc \
58 | 	summstats/faywuh.cc \
59 | 	summstats/hprime.cc \
60 | 	summstats/nvariablesites.cc \
61 | 	summstats/allele_counts.cc \
62 | 	summstats/haplotype_statistics.cc \
63 | 	summstats/ld.cc \
64 | 	summstats/rmin.cc \
65 | 	summstats/nsl.cc \
66 | 	summstats/nslx.cc \
67 | 	summstats/garud.cc \
68 | 	summstats/generic.cc \
69 | 	summstats/lhaf.cc \
70 | 	summstats/auxillary.cc
71 | 
72 | 
73 | AM_LDFLAGS=-version-info 20:0:0
74 | 
75 | AM_CXXFLAGS= -Wall -W -Woverloaded-virtual  -Wnon-virtual-dtor -Wcast-qual -Wconversion -Wsign-conversion -Wsign-promo -Wsynth
76 | 
77 | #if DEBUG
78 | #AM_CXXFLAGS+=-g
79 | #else
80 | #AM_CXXFLAGS+=-DNDEBUG
81 | #endif
82 | #
83 | #if PROFILING
84 | #PROFILE= -pg
85 | #else
86 | #PROFILE=
87 | #endif
88 | 
89 | 


--------------------------------------------------------------------------------
/src/PolyTableFunctions.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | #include <Sequence/PolyTableFunctions.hpp>
25 | #include <Sequence/SeqAlphabets.hpp>
26 | #include <Sequence/PolyTable.hpp>
27 | #include <algorithm>
28 | #include <set>
29 | #include <cctype>
30 | 
31 | namespace Sequence
32 | {
33 |   bool containsCharacter(const PolyTable * t,
34 |   			 const char ch)
35 |   {
36 |     for( PolyTable::const_data_iterator itr = t->begin() ;
37 |   	 itr < t->end() ;
38 |   	 ++itr )
39 |       {
40 |   	if ( itr->find(ch) != std::string::npos )
41 |   	  {
42 |   	    return true;
43 |   	  }
44 |       }
45 |     return false;
46 |   }
47 | 
48 |   bool polyTableValid(const PolyTable * table)
49 |   {
50 |     for ( PolyTable::const_data_iterator itr = table->begin() ;
51 | 	  itr < table->end() ; 
52 | 	  ++itr )
53 |       {
54 | 	if ( (std::find_if(itr->begin(),itr->end(),invalidPolyChar()) != itr->end())
55 | 	     || ( itr->length() != table->numsites() ) )
56 | 	  {
57 | 	    return false;
58 | 	  }
59 |       }
60 |     return true;
61 |   }
62 | }
63 | 


--------------------------------------------------------------------------------
/src/PolyTableManip.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | #include <Sequence/PolyTable.hpp>
25 | #include <Sequence/PolyTableManip.hpp>
26 | 
27 | namespace Sequence
28 | {
29 |   polySiteVector rotatePolyTable(const Sequence::PolyTable *data)
30 |   /*!
31 |     Rotate a polymorphism table
32 |     into a vector of pairs, where the
33 |     pairs are of type std::pair<double, string>,
34 |     representing the site position and the characters
35 |     at that site
36 |     \param data a pointer to a Sequence::PolyTable
37 |     \ingroup polytables 
38 |   */
39 |   {
40 |     polySiteVector L;
41 |     for (unsigned i = 0 ; i < data->numsites() ; ++i)
42 |       {
43 |         std::string s;
44 |         for(unsigned j = 0 ; j < data->size() ; ++j)
45 |           {
46 |             s += (*data)[j][i];
47 |           }
48 |         L.push_back( polymorphicSite(data->position(i), s) );
49 |       }
50 |     return L;
51 |   }
52 | }
53 | 


--------------------------------------------------------------------------------
/src/Seq/Fasta.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | #include <Sequence/Fasta.hpp>
25 | #include <stdexcept>
26 | #include <iostream>
27 | #include <functional>
28 | 
29 | namespace Sequence
30 | {
31 |   Fasta::Fasta() : Seq() {}
32 | 
33 |   Fasta::Fasta (const Seq & seq) : Seq(seq) 
34 |     /*! copy constructor */
35 |   {}
36 |  
37 |   Fasta::Fasta( Seq && seq ) : Seq(std::move(seq))
38 |   {
39 |   }
40 | 
41 |   std::istream & Fasta::read (std::istream & stream) 
42 |   {
43 |     name.clear();
44 |     seq.clear();
45 |     std::string temp;
46 |     int ch = stream.peek();
47 |     if( stream.eof() ) { return stream; }
48 |     if (char(ch) != '>')
49 |       {
50 |         throw std::runtime_error("Fasta.cc: error, file not in FASTA format");
51 |       }
52 |     //Read in name
53 |     //stream >> ch >> std::ws;
54 |     ch = stream.get();
55 |     std::getline(stream,name);
56 |     stream >> std::ws;
57 |     seq.reserve(1000);
58 |     while( char( ch = stream.peek() ) != '>' && ! stream.eof() )
59 |       {
60 | 	std::getline(stream,temp);
61 | 	seq += temp;
62 |       }
63 |     return (stream);
64 |   }
65 | 
66 |   std::ostream & Fasta::print (std::ostream & stream) const
67 |   {
68 |     stream << '>'
69 | 	   << name
70 | 	   << '\n'
71 | 	   << seq;
72 |     return stream;
73 |   }
74 | }
75 | 


--------------------------------------------------------------------------------
/src/Seq/fastq.cc:
--------------------------------------------------------------------------------
 1 | #include <Sequence/fastq.hpp>
 2 | #include <functional>
 3 | #include <iostream>
 4 | 
 5 | namespace Sequence
 6 | {
 7 |     fastq::fastq(void) : Seq(), quality(std::string()), repeat_name(true) {}
 8 | 
 9 |     fastq::fastq(const std::string &name, const std::string &seq,
10 |                  const std::string &qual)
11 |         : Seq(name, seq), quality(qual), repeat_name(true)
12 |     {
13 |     }
14 | 
15 |     fastq::fastq(std::string &&name, std::string &&seq, std::string &&qual)
16 |         : Seq(std::move(name), std::move(seq)), quality(std::move(qual)),
17 |           repeat_name(true)
18 |     {
19 |     }
20 | 
21 |     fastq::fastq(const Seq &s)
22 |         : Seq(s.name, s.seq), quality(std::string()), repeat_name(true)
23 |     {
24 |     }
25 | 
26 |     fastq::fastq(Seq &&s)
27 |         : Seq(std::move(s)), quality(std::string()), repeat_name(true)
28 |     {
29 |     }
30 | 
31 |     void
32 |     fastq::repname(const bool &b)
33 |     {
34 |         repeat_name = b;
35 |     }
36 | 
37 |     std::istream &
38 |     fastq::read(std::istream &stream)
39 |     {
40 |         if (stream.peek() == EOF)
41 |             return stream;
42 |         if (char(stream.peek()) != '@')
43 |             throw std::runtime_error("Sequence::fastq::read - error: record "
44 |                                      "did not begin with \'@\'");
45 |         std::string temp;
46 |         stream.ignore(1, '@');
47 |         std::getline(stream, name);
48 |         std::getline(stream, seq);
49 |         stream >> std::ws;
50 |         if (char(stream.peek()) != '+')
51 |             throw std::runtime_error("Sequence::fastq::read - error: third "
52 |                                      "line did not begin with \'+\'");
53 |         stream >> temp >> std::ws;
54 |         if (temp.size() == 1)
55 |             repeat_name = false;
56 |         quality.resize(seq.length());
57 |         stream.read(&quality[0], std::streamsize(seq.length()));
58 |         stream >> std::ws;
59 |         if (seq.length() != quality.length())
60 |             throw std::runtime_error("Sequence::fastq::read - error: sequence "
61 |                                      "and quality strings differ in length");
62 |         return stream;
63 |     }
64 | 
65 |     std::ostream &
66 |     fastq::print(std::ostream &stream) const
67 |     {
68 |         stream << '@' << name << '\n' << seq << '\n' << '+';
69 |         if (this->repeat_name)
70 |             {
71 |                 stream << name;
72 |             }
73 |         stream << '\n' << quality;
74 |         return stream;
75 |     }
76 | } //ns Sequence
77 | 


--------------------------------------------------------------------------------
/src/SeqAlphabets.cc:
--------------------------------------------------------------------------------
 1 | //! \file src/SeqAlphabets.cc
 2 | #include <Sequence/SeqAlphabets.hpp>
 3 | #include <algorithm>
 4 | #include <cctype>
 5 | #include <array>
 6 | 
 7 | namespace Sequence {
 8 |   const alphabet_t dna_alphabet{ {'A','C','G','T',
 9 | 	'R','Y','S','W',
10 | 	'K','M','B','D',
11 | 	'H','V','N','-'} };
12 | 
13 |   const alphabet_t dna_poly_alphabet{ {'A','C','G','T', //0-3
14 | 	'0','1','-','N', //4-7
15 | 	'\0', //8
16 | 	} };
17 | 
18 |   const alphabet_t::size_type NOTPOLYCHAR = dna_poly_alphabet.size();
19 |   
20 |   const alphabet_t::size_type POLYEOS = alphabet_t::size_type( std::distance(dna_poly_alphabet.begin(),
21 | 									     std::find(dna_poly_alphabet.begin(),
22 | 										       dna_poly_alphabet.end(),
23 | 										       '\0')
24 | 									     ) );
25 |   bool isDNA( const char & ch) 
26 |   {
27 |     return std::find( dna_alphabet.begin(),
28 | 		      dna_alphabet.end(),
29 | 		      std::toupper(ch) ) != dna_alphabet.end();
30 |   }
31 | 
32 |   bool ambiguousNucleotide::operator()(const char & c) const
33 |   {
34 |     return std::distance( dna_alphabet.begin(),
35 | 			  std::find(dna_alphabet.begin(),
36 | 				    dna_alphabet.end(),
37 | 				    std::toupper(c)) ) > 3;
38 |     /*
39 |     const char ch = char(std::toupper(c));
40 |     return (ch != 'A' &&
41 | 	    ch != 'G' &&
42 | 	    ch != 'T' &&
43 | 	    ch != 'C' );
44 |     */
45 |   }
46 |   
47 |   bool invalidPolyChar::operator()(const char & nucleotide) const
48 |     {
49 |       auto itr = std::find(dna_poly_alphabet.begin(),
50 | 			   dna_poly_alphabet.end(),
51 | 			   std::toupper(nucleotide));
52 |       if(itr == dna_poly_alphabet.end()) return 1;
53 |       auto d = std::distance( dna_alphabet.begin(),
54 | 			      itr );
55 |       return ( d > 3 && d < 14 ); 
56 |     }
57 | }
58 | 


--------------------------------------------------------------------------------
/src/SeqConstants.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | #include <Sequence/SeqConstants.hpp>
25 | #include <limits>
26 | 
27 | namespace Sequence
28 | {
29 |   /*! \var const unsigned SEQMAXUNSIGNED
30 |     The maximum value of an unsinged integer.
31 |   */
32 |   const unsigned SEQMAXUNSIGNED = std::numeric_limits<unsigned>::max();
33 |   /*! \var const unsigned SEQMAXDOUBLE
34 |     The maximum value of an double
35 |   */
36 |   const double SEQMAXDOUBLE = std::numeric_limits<double>::max();
37 | }
38 | 


--------------------------------------------------------------------------------
/src/Unweighted.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 |   Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 |   Remove the brackets to email me.
 6 | 
 7 |   This file is part of libsequence.
 8 | 
 9 |   libsequence is free software: you can redistribute it and/or modify
10 |   it under the terms of the GNU General Public License as published by
11 |   the Free Software Foundation, either version 3 of the License, or
12 |   (at your option) any later version.
13 | 
14 |   libsequence is distributed in the hope that it will be useful,
15 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
16 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 |   GNU General Public License for more details.
18 | 
19 |   You should have received a copy of the GNU General Public License
20 |   long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | #include <Sequence/Unweighted.hpp>
25 | #include <limits>
26 | 
27 | namespace Sequence
28 | {
29 |   WeightingScheme2::weights2_t Unweighted2::operator()(const std::string &, const std::string &,Sequence::GeneticCodes) const
30 |   /*!
31 |     Calculate actually calculates the weights for each branch
32 |     \param codon1 a std::string of length 3 representing a sense codon
33 |     \param codon2 a std::string of length 3 representing a sense codon
34 |   */
35 |   {
36 |     return weights2_t({{1.,1.}});
37 |   }
38 | 
39 |   WeightingScheme3::weights3_t Unweighted3::operator()(const std::string &, const std::string &,Sequence::GeneticCodes ) const
40 |   /*!
41 |     Calculate actually calculates the weights for each branch
42 |     \param codon1 a std::string of length 3 representing a sense codon
43 |     \param codon2 a std::string of length 3 representing a sense codon
44 |   */
45 |   {
46 |     return weights3_t({{1.,1.,1.,1.,1.,1.}});
47 |   }
48 | }
49 | 


--------------------------------------------------------------------------------
/src/libsequenceConfig.cc:
--------------------------------------------------------------------------------
 1 | #include <config.h>
 2 | #include <iostream>
 3 | #include <string>
 4 | #include <cstdlib>
 5 | 
 6 | using namespace std;
 7 | 
 8 | //From config.h
 9 | static const std::string LIBSEQ_VERSION(VERSION);
10 | 
11 | int main(int argc, char ** argv)
12 | {
13 |   if(argc==1)
14 |     {
15 |       cerr << "usage:\n"
16 | 	   << "\t--version\tPrint out version number and exit\n";
17 |       exit(EXIT_SUCCESS);
18 |     }
19 | 
20 |   string av1(argv[1]);
21 |   if( av1 == "--version" ) cout << LIBSEQ_VERSION << '\n';
22 | 
23 |   exit(EXIT_SUCCESS);
24 | }
25 | 


--------------------------------------------------------------------------------
/src/polySiteVector.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | #include <Sequence/polySiteVector.hpp>
24 | #include <Sequence/PolyTable.hpp>
25 | 
26 | 
27 | namespace Sequence
28 | {
29 |   polySiteVector make_polySiteVector(const Sequence::PolyTable & data)
30 |   /*!
31 |     Rotate a polymorphism table
32 |     into a vector of pairs, where the
33 |     pairs are of type std::pair<double, string>,
34 |     representing the site position and the characters
35 |     at that site
36 |     \param data a pointer to a Sequence::PolyTable
37 |     \ingroup polytables 
38 |   */
39 |   {
40 |     polySiteVector L;
41 |     for (unsigned i = 0 ; i < data.numsites() ; ++i)
42 |       {
43 | 	std::string s;
44 | 	for(unsigned j = 0 ; j < data.size() ; ++j)
45 |           {
46 |             s += data[j][i];
47 |           }
48 | 	L.emplace_back( polymorphicSite(data.position(i), s));
49 |       }
50 |     return L;
51 |   }
52 | }
53 | 


--------------------------------------------------------------------------------
/src/summstats/algorithm.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef SEQUENCE_SUMMSTATS_ALGORITHM
 2 | #define SEQUENCE_SUMMSTATS_ALGORITHM
 3 | 
 4 | #include <cstdint>
 5 | #include <utility>
 6 | #include <algorithm>
 7 | 
 8 | namespace Sequence
 9 | {
10 |     namespace summstats_algo
11 |     {
12 |         template <typename iterator>
13 |         inline std::pair<iterator, iterator>
14 |         mismatch_skip_missing(iterator beg, iterator end, iterator beg2)
15 |         {
16 |             auto m = std::mismatch(beg, end, beg2);
17 |             while (m.first < end && (*m.first < 0 || *m.second < 0))
18 |                 {
19 |                     m = std::mismatch(m.first + 1, end, m.second + 1);
20 |                 }
21 |             return m;
22 |         }
23 | 
24 |         template <typename iterator>
25 |         inline std::int32_t
26 |         ndiff_skip_missing(iterator beg, iterator end, iterator beg2)
27 |         {
28 |             std::int32_t ndiffs = 0;
29 |             auto m = mismatch_skip_missing(beg, end, beg2);
30 | 
31 |             while (m.first < end)
32 |                 {
33 |                     ++ndiffs;
34 |                     m = mismatch_skip_missing(m.first + 1, end, m.second + 1);
35 |                 }
36 |             return ndiffs;
37 |         }
38 |     } // namespace summstats_algo
39 | } // namespace Sequence
40 | 
41 | #endif
42 | 


--------------------------------------------------------------------------------
/src/summstats/auxillary.cc:
--------------------------------------------------------------------------------
 1 | #include <cstdint>
 2 | #include <cmath>
 3 | 
 4 | namespace Sequence
 5 | {
 6 |     namespace summstats_aux
 7 |     {
 8 |         double
 9 |         a_sub_n(const std::uint32_t nsam)
10 |         {
11 |             double rv = 0.0;
12 |             for (std::uint32_t i = 1; i < nsam; ++i)
13 |                 {
14 |                     rv += 1.0 / static_cast<double>(i);
15 |                 }
16 |             return rv;
17 |         }
18 | 
19 |         double
20 |         b_sub_n(const std::uint32_t nsam)
21 |         {
22 |             double rv = 0.0;
23 |             for (std::uint32_t i = 1; i < nsam; ++i)
24 |                 {
25 |                     rv += 1.0 / std::pow(static_cast<double>(i), 2.0);
26 |                 }
27 |             return rv;
28 |         }
29 | 
30 |         double
31 |         b_sub_n_plus1(const std::uint32_t nsam)
32 |         {
33 |             double rv = 0.0;
34 |             for (std::uint32_t i = 1; i < nsam + 1; ++i)
35 |                 {
36 |                     rv += 1.0 / std::pow(static_cast<double>(i), 2.0);
37 |                 }
38 |             return rv;
39 |         }
40 |     } // namespace summstats_aux
41 | } // namespace Sequence
42 | 


--------------------------------------------------------------------------------
/src/summstats/garud.cc:
--------------------------------------------------------------------------------
 1 | #include <cstdint>
 2 | #include <limits>
 3 | #include <vector>
 4 | #include <algorithm>
 5 | #include <unordered_map>
 6 | #include <Sequence/summstats/garud.hpp>
 7 | #include <Sequence/summstats/generic.hpp>
 8 | #include <Sequence/VariantMatrix.hpp>
 9 | #include <Sequence/summstats/classics.hpp>
10 | 
11 | namespace Sequence
12 | {
13 |     GarudStats::GarudStats()
14 |         : H1(1.), H12(std::numeric_limits<double>::quiet_NaN()),
15 |           H2H1(std::numeric_limits<double>::quiet_NaN())
16 |     {
17 |     }
18 | 
19 |     GarudStats::GarudStats(const double __h1, const double __h12,
20 |                            const double __h2h1)
21 |         : H1(__h1), H12(__h12), H2H1(__h2h1)
22 |     {
23 |     }
24 | 
25 |     GarudStats
26 |     garud_statistics(const VariantMatrix& m)
27 |     {
28 |         GarudStats rv;
29 |         if (m.empty() || !m.nsam())
30 |             {
31 |                 return rv;
32 |             }
33 |         // Although one of the stats is haplotype diversity,
34 |         // w
35 |         auto labels = label_haplotypes(m);
36 |         std::unordered_map<std::int32_t, std::int32_t> counts;
37 |         std::size_t nmissing = 0;
38 |         for (auto l : labels)
39 |             {
40 |                 if (l < 0)
41 |                     {
42 |                         ++nmissing;
43 |                     }
44 |                 else
45 |                     {
46 |                         counts[l]++;
47 |                     }
48 |             }
49 |         if (counts.size() < 2)
50 |             {
51 |                 return rv;
52 |             }
53 |         rv.H1 = 1.0 - diversity_from_counts(counts, m.nsam() - nmissing);
54 |         std::vector<std::pair<std::int32_t, std::int32_t>> vcounts(
55 |             counts.begin(), counts.end());
56 |         std::sort(vcounts.begin(), vcounts.end(),
57 |                   [](const std::pair<std::int32_t, std::int32_t>& a,
58 |                      const std::pair<std::int32_t, std::int32_t>& b) {
59 |                       return a.second > b.second;
60 |                   });
61 |         double nsam
62 |             = static_cast<double>(m.nsam()) - static_cast<double>(nmissing);
63 |         rv.H12 = rv.H1
64 |                  + 2. * static_cast<double>(vcounts[0].second)
65 |                        * static_cast<double>(vcounts[1].second)
66 |                        / (nsam * (nsam - 1.0));
67 |         rv.H2H1 = (rv.H1
68 |                    - static_cast<double>(vcounts[0].second
69 |                                          * (vcounts[0].second - 1))
70 |                          / (nsam * (nsam - 1)))
71 |                   / rv.H1;
72 |         return rv;
73 |     }
74 | } // namespace Sequence
75 | 


--------------------------------------------------------------------------------
/src/summstats/generic.cc:
--------------------------------------------------------------------------------
 1 | #include <unordered_map>
 2 | #include <algorithm>
 3 | #include <cstdint>
 4 | #include <limits>
 5 | #include <vector>
 6 | 
 7 | namespace Sequence
 8 | {
 9 |     double
10 |     diversity_from_counts(
11 |         const std::unordered_map<std::int32_t, std::int32_t>& counts,
12 |         const std::size_t nsam)
13 |     {
14 |         if (counts.empty() || !nsam)
15 |             {
16 |                 return std::numeric_limits<double>::quiet_NaN();
17 |             }
18 |         double hom = 0.0;
19 |         for (auto&& c : counts)
20 |             {
21 |                 hom += static_cast<double>(c.second * (c.second - 1));
22 |             }
23 |         hom /= static_cast<double>(nsam * (nsam - 1));
24 |         return 1.0 - hom;
25 |     }
26 | } // namespace Sequence
27 | 


--------------------------------------------------------------------------------
/src/summstats/ld.cc:
--------------------------------------------------------------------------------
 1 | #include <cstdint>
 2 | #include <vector>
 3 | #include <algorithm>
 4 | #include <Sequence/summstats/ld.hpp>
 5 | #include <Sequence/VariantMatrix.hpp>
 6 | #include <Sequence/VariantMatrixViews.hpp>
 7 | 
 8 | namespace Sequence
 9 | {
10 |     TwoLocusCounts::TwoLocusCounts(std::int8_t i_, std::int8_t j_, int n_)
11 |         : i{ i_ }, j{ j_ }, n{ n_ }
12 |     {
13 |     }
14 | 
15 |     std::vector<TwoLocusCounts>
16 |     two_locus_haplotype_counts(const VariantMatrix& m, std::size_t sitei,
17 |                                const std::size_t sitej,
18 |                                const bool skip_missing)
19 |     {
20 |         auto ri = get_ConstRowView(m, sitei);
21 |         auto rj = get_ConstRowView(m, sitej);
22 |         std::vector<TwoLocusCounts> rv;
23 |         for (auto i = ri.begin(), j = rj.begin(); i < ri.end(); ++i, ++j)
24 |             {
25 |                 if (!skip_missing || ((*i >= 0 || *j >= 0) && skip_missing))
26 |                     {
27 |                         auto exists
28 |                             = std::find_if(rv.begin(), rv.end(),
29 |                                            [i, j](const TwoLocusCounts& t) {
30 |                                                return t.i == *i && t.j == *j;
31 |                                            });
32 |                         if (exists == rv.end())
33 |                             {
34 |                                 rv.emplace_back(*i, *j, 1);
35 |                             }
36 |                         else
37 |                             {
38 |                                 exists->n++;
39 |                             }
40 |                     }
41 |             }
42 |         return rv;
43 |     }
44 | } // namespace Sequence
45 | 


--------------------------------------------------------------------------------
/src/summstats/lhaf.cc:
--------------------------------------------------------------------------------
 1 | #include <cstdint>
 2 | #include <algorithm>
 3 | #include <numeric>
 4 | #include <cmath>
 5 | #include <Sequence/VariantMatrix.hpp>
 6 | #include <Sequence/VariantMatrixViews.hpp>
 7 | 
 8 | namespace Sequence
 9 | {
10 |     std::vector<double>
11 |     lhaf(const VariantMatrix &m, const std::int8_t refstate, const double l)
12 |     {
13 |         std::vector<long int> dcounts;
14 |         dcounts.reserve(m.nsites());
15 |         const auto find_nonref = [refstate](const std::int8_t x) {
16 |             return x != refstate && !(x < 0);
17 |         };
18 |         for (std::size_t i = 0; i < m.nsites(); ++i)
19 |             {
20 |                 auto r = get_ConstRowView(m, i);
21 |                 dcounts.push_back(
22 |                     std::count_if(r.begin(), r.end(), find_nonref));
23 |             }
24 | 
25 |         // Get the values for each element in the data
26 |         std::vector<double> rv;
27 |         rv.reserve(m.nsam());
28 |         for (std::size_t i = 0; i < m.nsam(); ++i)
29 |             {
30 |                 auto c = get_ConstColView(m, i);
31 |                 auto j = std::find_if(c.cbegin(), c.cend(), find_nonref);
32 |                 double score = 0.0;
33 |                 while (j != c.cend())
34 |                     {
35 |                         size_t d2 = static_cast<std::size_t>(
36 |                             std::distance(c.cbegin(), j));
37 |                         score += std::pow(static_cast<double>(dcounts[d2]), l);
38 |                         j = std::find_if(j + 1, c.cend(), find_nonref);
39 |                     }
40 |                 rv.push_back(score);
41 |             }
42 |         return rv;
43 |     }
44 | } // namespace Sequence
45 | 


--------------------------------------------------------------------------------
/src/summstats/nsl_common.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef SEQUENCE_SUMMSTATS_NSL_COMMON_HPP
 2 | #define SEQUENCE_SUMMSTATS_NSL_COMMON_HPP
 3 | 
 4 | // These functions are not exported.
 5 | // They are used internally.
 6 | 
 7 | #include <cstdint>
 8 | #include <vector>
 9 | #include <algorithm>
10 | #include <cmath>
11 | #include <Sequence/summstats/nSLiHS.hpp>
12 | #include <Sequence/VariantMatrixViews.hpp>
13 | 
14 | namespace Sequence
15 | {
16 |     namespace summstats_details
17 |     {
18 |         struct suffix_edges
19 |         {
20 |             std::int64_t left, right;
21 |             suffix_edges() : left(-1), right(-1) {}
22 |         };
23 | 
24 |         static void
25 |         update_counts(double nsl_values[2], double ihs_values[2],
26 |                       int counts[2], const std::size_t nsites,
27 |                       // NOTE: code smell here -- dangerous
28 |                       const double * positions,
29 |                       const std::size_t index, const std::int64_t left,
30 |                       const std::int64_t right)
31 |         {
32 |             if (left >= 0 && static_cast<std::size_t>(right) < nsites)
33 |                 //Then there are SNPs differentiating
34 |                 //i and j within the region
35 |                 {
36 |                     nsl_values[index] += static_cast<double>(right - left);
37 |                     //TODO: check if we need to add one?
38 |                     ihs_values[index]
39 |                         += positions[static_cast<std::size_t>(right)]
40 |                            - positions[static_cast<std::size_t>(left)];
41 |                     counts[index]++;
42 |                 }
43 |         }
44 | 
45 |         inline nSLiHS
46 |         get_stat(const ConstRowView& core_view, const std::int8_t refstate,
47 |                  const double nsl_values[2], const double ihs_values[2],
48 |                  const int counts[2])
49 |         {
50 | 
51 |             double nSL_den = nsl_values[0] / static_cast<double>(counts[0]);
52 |             double nSL_num = nsl_values[1] / static_cast<double>(counts[1]);
53 |             double iHS_den = ihs_values[0] / static_cast<double>(counts[0]);
54 |             double iHS_num = ihs_values[1] / static_cast<double>(counts[1]);
55 |             auto nonrefcount = static_cast<std::int32_t>(
56 |                 std::count_if(core_view.begin(), core_view.end(),
57 |                               [refstate](const std::int8_t i) {
58 |                                   return i >= 0 && i != refstate;
59 |                               }));
60 |             return nSLiHS{ std::log(nSL_num) - std::log(nSL_den),
61 |                            std::log(iHS_num) - std::log(iHS_den),
62 |                            nonrefcount };
63 |         }
64 |     } // namespace summstats_details
65 | } // namespace Sequence
66 | 
67 | #endif
68 | 


--------------------------------------------------------------------------------
/src/summstats/nvariablesites.cc:
--------------------------------------------------------------------------------
 1 | #include <cstdint>
 2 | #include <algorithm>
 3 | #include <Sequence/AlleleCountMatrix.hpp>
 4 | 
 5 | namespace Sequence
 6 | {
 7 |     std::uint32_t
 8 |     nvariable_sites(const AlleleCountMatrix& m)
 9 |     {
10 |         std::uint32_t nv = 0;
11 |         for (std::size_t site = 0; site < m.nrow; ++site)
12 |             {
13 |                 auto r = m.row(site);
14 |                 auto nstates
15 |                     = std::count_if(r.first, r.second,
16 |                                     [](const AlleleCountMatrix::value_type c) {
17 |                                         return c > 0;
18 |                                     });
19 |                 if (nstates > 1)
20 |                     {
21 |                         ++nv;
22 |                     }
23 |             }
24 |         return nv;
25 |     }
26 | 
27 |     std::uint32_t
28 |     nbiallelic_sites(const AlleleCountMatrix& m)
29 |     {
30 |         std::uint32_t nv = 0;
31 |         for (std::size_t site = 0; site < m.nrow; ++site)
32 |             {
33 |                 auto r = m.row(site);
34 |                 auto nstates
35 |                     = std::count_if(r.first, r.second,
36 |                                     [](const AlleleCountMatrix::value_type c) {
37 |                                         return c > 0;
38 |                                     });
39 |                 if (nstates == 2)
40 |                     {
41 |                         ++nv;
42 |                     }
43 |             }
44 |         return nv;
45 |     }
46 | 
47 |     std::uint32_t
48 |     total_number_of_mutations(const AlleleCountMatrix& m)
49 |     {
50 |         std::uint32_t nv = 0;
51 |         for (std::size_t site = 0; site < m.nrow; ++site)
52 |             {
53 |                 auto r = m.row(site);
54 |                 auto nstates
55 |                     = std::count_if(r.first, r.second,
56 |                                     [](const AlleleCountMatrix::value_type c) {
57 |                                         return c > 0;
58 |                                     });
59 |                 if (nstates > 1)
60 |                     {
61 |                         nv += static_cast<decltype(nv)>(nstates) - 1;
62 |                     }
63 |             }
64 |         return nv;
65 |     }
66 | } // namespace Sequence
67 | 


--------------------------------------------------------------------------------
/src/summstats/rmin.cc:
--------------------------------------------------------------------------------
 1 | #include <cstdint>
 2 | #include <Sequence/summstats/ld.hpp>
 3 | #include <Sequence/summstats/allele_counts.hpp>
 4 | #include <Sequence/VariantMatrix.hpp>
 5 | #include <Sequence/AlleleCountMatrix.hpp>
 6 | #include <Sequence/VariantMatrixViews.hpp>
 7 | 
 8 | namespace Sequence
 9 | {
10 |     std::int32_t
11 |     rmin(const VariantMatrix& m)
12 |     {
13 |         if (m.nsites() < 2)
14 |             {
15 |                 return -1;
16 |             }
17 |         Sequence::AlleleCountMatrix acm(m);
18 |         auto ac = allele_counts(acm);
19 |         std::vector<std::size_t> biallelic_site_indexes;
20 |         for (std::size_t i = 0; i < ac.size(); ++i)
21 |             {
22 |                 if (ac[i].nstates == 2)
23 |                     {
24 |                         biallelic_site_indexes.push_back(i);
25 |                     }
26 |             }
27 |         if (biallelic_site_indexes.size() < 2)
28 |             {
29 |                 return 0;
30 |             }
31 |         bool flag = false;
32 |         std::size_t x = 0;
33 |         std::int32_t rv = 0;
34 |         for (std::size_t a = x + 1; a < biallelic_site_indexes.size(); ++a)
35 |             {
36 |                 for (std::size_t b = (!flag) ? x : a - 1; b < a; ++b)
37 |                     {
38 |                         flag = false;
39 |                         // We do not allow missing data to result in
40 |                         // additional haplotypes
41 |                         auto tl = two_locus_haplotype_counts(m, a, b, true);
42 |                         if (tl.size() == 4)
43 |                             {
44 |                                 ++rv;
45 |                                 flag = true;
46 |                                 break;
47 |                             }
48 |                     }
49 |                 if (flag == true)
50 |                     {
51 |                         x = a;
52 |                     }
53 |             }
54 |         return rv;
55 |     }
56 | } // namespace Sequence
57 | 


--------------------------------------------------------------------------------
/src/summstats/tajd.cc:
--------------------------------------------------------------------------------
 1 | #include <cmath>
 2 | #include <limits>
 3 | #include <Sequence/AlleleCountMatrix.hpp>
 4 | #include <Sequence/summstats/auxillary.hpp>
 5 | 
 6 | namespace Sequence
 7 | {
 8 |     double
 9 |     tajd(const AlleleCountMatrix& ac)
10 |     {
11 |         double pi = 0.0;
12 |         int S = 0;
13 |         std::int32_t max_nsam = 0;
14 |         for (std::size_t i = 0; i < ac.counts.size(); i += ac.ncol)
15 |             {
16 |                 std::int32_t nsam = 0;
17 |                 double homozygosity = 0.0;
18 |                 int nstates = 0;
19 |                 for (std::size_t j = i; j < i + ac.ncol; ++j)
20 |                     {
21 |                         if (ac.counts[j] > 0)
22 |                             {
23 |                                 ++nstates;
24 |                                 nsam += ac.counts[j];
25 |                                 homozygosity += static_cast<double>(
26 |                                     ac.counts[j] * (ac.counts[j] - 1));
27 |                             }
28 |                     }
29 | 
30 |                 if (nstates)
31 |                     {
32 |                         max_nsam = std::max(max_nsam, nsam);
33 |                         S += nstates - 1;
34 |                         pi += 1.0
35 |                               - homozygosity
36 |                                     / static_cast<double>(nsam * (nsam - 1));
37 |                     }
38 |             }
39 |         if (!S)
40 |             {
41 |                 return std::numeric_limits<double>::quiet_NaN();
42 |             }
43 |         auto a1 = summstats_aux::a_sub_n(static_cast<std::uint32_t>(max_nsam));
44 |         double w = static_cast<double>(S) / a1;
45 |         auto a2 = summstats_aux::b_sub_n(static_cast<std::uint32_t>(max_nsam));
46 |         auto dn = static_cast<double>(max_nsam);
47 |         double b1 = (dn + 1.0) / (3.0 * (dn - 1.0));
48 |         double b2
49 |             = (2.0 * (std::pow(dn, 2.0) + dn + 3.0)) / (9.0 * dn * (dn - 1.0));
50 |         double c1 = b1 - 1.0 / a1;
51 |         double c2 = b2 - (dn + 2.0) / (a1 * dn) + a2 / std::pow(a1, 2.0);
52 |         double e1 = c1 / a1;
53 |         double e2 = c2 / (std::pow(a1, 2.0) + a2);
54 |         double denominator = std::pow((e1 * S + e2 * S * (S - 1.0)), 0.5);
55 |         return (pi - w) / denominator;
56 |     }
57 | } // namespace Sequence
58 | 


--------------------------------------------------------------------------------
/src/summstats/thetapi.cc:
--------------------------------------------------------------------------------
 1 | #include <stdexcept>
 2 | #include <Sequence/AlleleCountMatrix.hpp>
 3 | 
 4 | namespace Sequence
 5 | {
 6 |     double
 7 |     thetapi(const AlleleCountMatrix& ac)
 8 |     {
 9 |         double pi = 0.0;
10 |         for (std::size_t i = 0; i < ac.counts.size(); i += ac.ncol)
11 |             {
12 |                 std::int32_t nsam = 0;
13 |                 double homozygosity = 0.0;
14 |                 for (std::size_t j = i; j < i + ac.ncol; ++j)
15 |                     {
16 |                         nsam += ac.counts[j];
17 |                         homozygosity += static_cast<double>(
18 |                             ac.counts[j] * (ac.counts[j] - 1));
19 |                     }
20 |                 pi += 1.0
21 |                       - homozygosity / static_cast<double>(nsam * (nsam - 1));
22 |             }
23 |         return pi;
24 |     }
25 | 
26 | } // namespace Sequence
27 | 


--------------------------------------------------------------------------------
/src/summstats/thetaw.cc:
--------------------------------------------------------------------------------
 1 | #include <algorithm>
 2 | #include <stdexcept>
 3 | #include <Sequence/AlleleCountMatrix.hpp>
 4 | #include <Sequence/summstats/auxillary.hpp>
 5 | 
 6 | namespace Sequence
 7 | {
 8 |     double
 9 |     thetaw(const AlleleCountMatrix& ac)
10 |     {
11 |         double w = 0.0;
12 |         for (std::size_t i = 0; i < ac.counts.size(); i += ac.ncol)
13 |             {
14 |                 std::uint32_t nsam = 0, nstates = 0;
15 |                 for (std::size_t j = i; j < i + ac.ncol; ++j)
16 |                     {
17 |                         if (ac.counts[j] > 0)
18 |                             {
19 |                                 nsam += static_cast<std::uint32_t>(
20 |                                     ac.counts[j]);
21 |                                 nstates++;
22 |                             }
23 |                     }
24 |                 if (nstates > 1)
25 |                     {
26 |                         auto denom = summstats_aux::a_sub_n(nsam);
27 |                         w += static_cast<double>(nstates - 1) / denom;
28 |                     }
29 |             }
30 |         return w;
31 |     }
32 | 
33 | } // namespace Sequence
34 | 


--------------------------------------------------------------------------------
/src/summstats_deprecated/Garud.cc:
--------------------------------------------------------------------------------
 1 | #include <algorithm>
 2 | #include <set>
 3 | #include <string>
 4 | #include <cmath>
 5 | #include <numeric>
 6 | #include <Sequence/SummStatsDeprecated/Garud.hpp>
 7 | 
 8 | using namespace std;
 9 | 
10 | namespace Sequence
11 | {
12 |     GarudStats
13 |     H1H12(const SimData &d)
14 |     /*!
15 |       H1 is total haplotype homozygosity.
16 |       H2 is haplotype homozygosity, combining two most common haplotypes.  H2 =
17 |       H1 + 2p1p2
18 |       H2H1 = H2/H1, where H2 is haplotype homozygosity for all but most common
19 |       haplotype.
20 |       H2H1 = (H1 - p1^2)/H1
21 |      */
22 |     {
23 |         if (d.empty())
24 |             return GarudStats();
25 |         set<string> uhaps(d.begin(), d.end());
26 |         vector<string> vuhaps(uhaps.size());
27 |         std::move(uhaps.begin(), uhaps.end(), vuhaps.begin());
28 |         vector<double> hapcounts;
29 | 		hapcounts.reserve(uhaps.size());
30 | 		for(auto & uh : uhaps)
31 | 		{
32 | 			hapcounts.push_back(static_cast<double>(std::count(d.begin(),d.end(),uh)));
33 | 		}
34 |         const double denom = static_cast<double>(d.size() * (d.size() - 1));
35 |         double H1 = 0.0;
36 | 		for(auto c : hapcounts)
37 | 		{
38 | 			H1 += c*(c-1.0);
39 | 		}
40 | 		H1 /= denom;
41 |         
42 |         sort(hapcounts.begin(), hapcounts.end(),
43 |              std::bind(greater<double>(), std::placeholders::_1,
44 |                        std::placeholders::_2));
45 |         double H12 = H1
46 |                      + 2. * hapcounts[0] * hapcounts[1]
47 |                            / std::pow(double(d.size()), 2.);
48 |         double H2H1 = (H1
49 |                        - double(hapcounts[0] * (hapcounts[0] - 1))
50 |                              / double(d.size() * (d.size() - 1)))
51 |                       / H1;
52 |         return GarudStats(H1, H12, H2H1);
53 |     }
54 | }
55 | 


--------------------------------------------------------------------------------
/src/summstats_deprecated/Snn.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
 4 | 
 5 | Remove the brackets to email me.
 6 | 
 7 | This file is part of libsequence.
 8 | 
 9 | libsequence is free software: you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation, either version 3 of the License, or
12 | (at your option) any later version.
13 | 
14 | libsequence is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | GNU General Public License for more details.
18 | 
19 | You should have received a copy of the GNU General Public License
20 | long with libsequence.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | */
23 | 
24 | #include <Sequence/SummStatsDeprecated/Snn.hpp>
25 | 
26 | namespace Sequence
27 | {
28 |   double Snn_statistic( const unsigned individuals[],
29 | 			const std::vector< std::vector<double> > & dkj,
30 | 			const unsigned config[],
31 | 			const size_t & npop,
32 | 			const unsigned & nsam)
33 | {
34 |   /*
35 |     notation for variables follows Hudson's paper
36 |   */
37 |   double snn = 0.;
38 | 
39 |   //store the d_kj for the whole sample
40 |   double * d_kj = new double[nsam-1];
41 | 
42 |   //store d_kj for within-population comparisons
43 |   std::vector<double> d_kj_win;
44 |   for(unsigned k=0; k<nsam ; ++k)
45 |     {
46 |       //find out which pop ind k is in
47 |       unsigned pop = 0,ttl=0;
48 |       while (pop < npop)
49 | 	{
50 | 	  ttl += config[pop];
51 | 	  if (k < ttl)
52 | 	    break;
53 | 	  pop++;
54 | 	}
55 |       d_kj_win.clear();
56 |       for(unsigned j = 0,dummy=0; j < nsam ; ++j)
57 | 	{
58 | 	  if (k!=j)
59 | 	    {
60 | 	      unsigned a=*(individuals+j),b=*(individuals+k);
61 | 	      if (a>b)
62 | 		std::swap(a,b);
63 | 
64 | 	      double ndiffs = dkj[a][b];
65 | 	      d_kj[dummy++] = ndiffs;
66 | 	      //figure out what pop j is in;
67 | 	      unsigned pop_j=0,ttl=0;
68 | 	      while (pop_j < npop)
69 | 		{
70 | 		  ttl += config[pop_j];
71 | 		  if (j < ttl)
72 | 		    break;
73 | 		  pop_j++;
74 | 		}
75 | 	      if (pop==pop_j)
76 | 		d_kj_win.push_back(ndiffs);
77 | 	    }
78 | 	}
79 |       //Calculate T_k
80 |       double min = d_kj[0];
81 |       for (unsigned j = 1 ; j < nsam-1 ; ++j)
82 | 	if (d_kj[j] < min) min = d_kj[j];
83 |       
84 |       std::ptrdiff_t T_k = std::count(d_kj,d_kj+(nsam-1),min);
85 | 
86 |       //Calculate M_k
87 |       std::ptrdiff_t M_k = std::count(d_kj_win.begin(),
88 | 				d_kj_win.end(),min);
89 |       snn += double(M_k)/double(T_k);
90 |     }
91 |   delete [] d_kj;
92 |   return snn/double(nsam);
93 | }
94 | 
95 | }
96 | 


--------------------------------------------------------------------------------
/src/summstats_deprecated/SummStats.cc:
--------------------------------------------------------------------------------
 1 | #include <Sequence/polySiteVector.hpp>
 2 | #include <Sequence/SimData.hpp>
 3 | #include <algorithm>
 4 | #include <numeric>
 5 | #include <cctype>
 6 | #include <stdexcept>
 7 | using namespace std;
 8 | 
 9 | namespace Sequence
10 | {
11 |   double Dij(const polymorphicSite & p, const std::vector< unsigned > & config, const unsigned & i, const unsigned & j)
12 |   {
13 |     unsigned rv = 0;
14 |     unsigned N = 0;
15 |     unsigned start1 = accumulate(config.begin(),config.begin()+i,0u),
16 |       start2 = accumulate(config.begin(),config.begin()+j,0u);
17 |     for( unsigned x = start1 ; x < start1 + config[i] ; ++x )
18 |       {
19 | 	for(unsigned y = start2 ; y < start2 + config[j] ; ++y)
20 | 	  {
21 | 	    char ch1 = char(std::toupper(p.second[x])),ch2=char(std::toupper(p.second[y]));
22 | 	    if(ch1 != 'N' && ch2 != 'N')
23 | 	      {
24 | 		rv += (ch1 != ch2) ? 1u : 0u;
25 | 	      }
26 | 	    else
27 | 	      {
28 | 		++N;
29 | 	      }
30 | 	  }
31 |       }
32 |     return double(rv)/(double(config[i]+config[j]-N));
33 |   }
34 |   
35 |   double Gmin(const polySiteVector & , const std::vector< unsigned > & )
36 |   {
37 |     throw std::runtime_error("not implemented yet");
38 |     unsigned mdxy = numeric_limits<unsigned>::max();
39 |     return mdxy;
40 |   }
41 | }//ns Sequence
42 | 


--------------------------------------------------------------------------------
/src/summstats_deprecated/lHaf.cc:
--------------------------------------------------------------------------------
 1 | #include <Sequence/SummStatsDeprecated/lHaf.hpp>
 2 | #include <algorithm>
 3 | #include <numeric>
 4 | #include <cmath>
 5 | 
 6 | namespace Sequence
 7 | {
 8 |     std::vector<double>
 9 |     lHaf(const SimData &data, const double l)
10 |     {
11 |         //using range_type = tbb::blocked_range<SimData::const_site_iterator>;
12 |         //using data_range_type
13 |         //    = tbb::blocked_range<std::vector<std::string>::const_iterator>;
14 |         // Get derived mutation frequency counts per site
15 |         std::vector<unsigned> dcounts;
16 |         dcounts.reserve(data.numsites());
17 |         for (auto i = data.sbegin(); i < data.send(); ++i)
18 |             {
19 |                 dcounts.push_back(static_cast<unsigned>(
20 |                     std::count(i->second.begin(), i->second.end(), '1')));
21 |             }
22 |         // Get the values for each element in the data
23 |         std::vector<double> rv;
24 |         rv.reserve(data.size());
25 |         for (auto &i : data)
26 |             {
27 |                 auto j
28 |                     = std::find_if(i.cbegin(), i.cend(),
29 |                                    [](const char &ch) { return ch == '1'; });
30 |                 double score = 0.0;
31 |                 while (j != i.cend())
32 |                     {
33 |                         size_t d2 = size_t(j - i.cbegin());
34 |                         score += std::pow(static_cast<double>(dcounts[d2]), l);
35 |                         j = std::find(j + 1, i.cend(), '1');
36 |                     }
37 |                 rv.push_back(score);
38 |             }
39 |         return rv;
40 |     }
41 | } // namespace Sequence
42 | 


--------------------------------------------------------------------------------
/src/variant_matrix/AlleleCountMatrix.cc:
--------------------------------------------------------------------------------
 1 | #include <stdexcept>
 2 | #include <Sequence/AlleleCountMatrix.hpp>
 3 | #include <Sequence/StateCounts.hpp>
 4 | 
 5 | namespace Sequence
 6 | {
 7 |     std::vector<std::int32_t>
 8 |     AlleleCountMatrix::init_counts(const VariantMatrix& m)
 9 |     {
10 |         if (m.max_allele() < 0)
11 |             {
12 |                 throw std::invalid_argument("matrix max_allele must be >= 0");
13 |             }
14 |         std::vector<std::int32_t> counts;
15 |         counts.reserve(m.nsam() * static_cast<std::size_t>(m.max_allele() + 1));
16 |         StateCounts c;
17 |         for (std::size_t i = 0; i < m.nsites(); ++i)
18 |             {
19 |                 auto r = get_ConstRowView(m, i);
20 |                 if (static_cast<std::int8_t>(c.max_allele_idx) > m.max_allele())
21 |                     {
22 |                         throw std::runtime_error("found allele value greater "
23 |                                                  "than matrix.max_allele");
24 |                     }
25 |                 c(r);
26 |                 for (std::size_t j = 0;
27 |                      j < static_cast<std::size_t>(m.max_allele() + 1); ++j)
28 |                     {
29 |                         counts.push_back(c.counts[j]);
30 |                     }
31 |             }
32 |         return counts;
33 |     }
34 | 
35 |     AlleleCountMatrix::AlleleCountMatrix(const VariantMatrix& m)
36 |         : counts(init_counts(m)),
37 |           ncol(!m.empty() ? static_cast<std::size_t>(m.max_allele()) + 1
38 |                                : 0),
39 |           nrow(!m.empty() ? counts.size() / ncol : 0), nsam(m.nsam())
40 |     {
41 |     }
42 | 
43 |     std::pair<std::vector<std::int32_t>::const_iterator,
44 |               std::vector<std::int32_t>::const_iterator>
45 |     AlleleCountMatrix::row(const std::size_t i) const
46 |     {
47 |         if (i >= nrow)
48 |             {
49 |                 throw std::out_of_range("row index out of range");
50 |             }
51 |         return std::make_pair(counts.begin() + i * ncol,
52 |                               counts.begin() + i * ncol + ncol);
53 |     }
54 | } // namespace Sequence
55 | 


--------------------------------------------------------------------------------
/src/variant_matrix/windows.cc:
--------------------------------------------------------------------------------
 1 | #include <Sequence/NonOwningCapsules.hpp>
 2 | #include <Sequence/variant_matrix/windows.hpp>
 3 | 
 4 | namespace Sequence
 5 | {
 6 |     VariantMatrix
 7 |     make_window(const VariantMatrix& m, const double beg, const double end)
 8 |     {
 9 |         return make_slice(m, beg, end, 0, m.nsam());
10 |     }
11 | 
12 |     VariantMatrix
13 |     make_slice(const VariantMatrix& m, const double beg, const double end,
14 |                const std::size_t i, const std::size_t j)
15 |     {
16 |         if (end < beg)
17 |             {
18 |                 throw std::invalid_argument("end must be >= beg");
19 |             }
20 |         if (!(j > i))
21 |             {
22 |                 throw std::invalid_argument("i must be < j");
23 |             }
24 |         if (j > m.nsam())
25 |             {
26 |                 throw std::invalid_argument("slice indexes out of range");
27 |             }
28 |         auto pb = std::lower_bound(m.pbegin(), m.pend(), beg);
29 |         auto pe = std::upper_bound(pb, m.pend(), end);
30 |         if (pb == m.pend())
31 |             {
32 |                 std::unique_ptr<GenotypeCapsule> gc(
33 |                     new NonOwningGenotypeCapsule(m.cdata(), 0, 0, 0, 0, 0));
34 |                 std::unique_ptr<PositionCapsule> pc(
35 |                     new NonOwningPositionCapsule(pb, 0));
36 |                 return VariantMatrix(std::move(gc), std::move(pc), -1);
37 |             }
38 |         std::size_t nsites = pe - pb;
39 |         std::size_t nsam = j - i;
40 |         std::size_t row_offset = pb - m.pbegin();
41 |         std::unique_ptr<GenotypeCapsule> gc(new NonOwningGenotypeCapsule(
42 |             m.cdata(), nsites, nsam, row_offset, i, m.nsam()));
43 |         std::unique_ptr<PositionCapsule> pc(
44 |             new NonOwningPositionCapsule(pb, pe - pb));
45 |         return VariantMatrix(std::move(gc), std::move(pc), m.max_allele());
46 |     }
47 | } // namespace Sequence
48 | 


--------------------------------------------------------------------------------
/test/CountingOperators.cc:
--------------------------------------------------------------------------------
 1 | #include <Sequence/CountingOperators.hpp>
 2 | #include <boost/test/unit_test.hpp>
 3 | #include <algorithm>
 4 | #include <functional>
 5 | #include <string>
 6 | 
 7 | BOOST_AUTO_TEST_SUITE(CountingOperatorsTest)
 8 | 
 9 | BOOST_AUTO_TEST_CASE( test_counting_operators_map_plus )
10 | {
11 |   using Sequence::operator+;
12 |   std::map<char,unsigned> baseCounts,baseCounts2;
13 |   baseCounts['A'] = 5;
14 |   baseCounts['G'] = 10;
15 |   baseCounts2['A'] = 11;
16 |   baseCounts2['C'] = 17;
17 |   std::map<char,unsigned> baseCounts3 = baseCounts + baseCounts2;
18 | 
19 |   BOOST_REQUIRE_EQUAL( baseCounts3['A'], 16 );
20 |   BOOST_REQUIRE_EQUAL( baseCounts3['G'], 10 );
21 |   BOOST_REQUIRE_EQUAL( baseCounts3['C'], 17 );
22 | }
23 | 
24 | BOOST_AUTO_TEST_CASE( test_counting_operators_map_plus_equal )
25 | {
26 |   using Sequence::operator+=;
27 |   std::map<char,unsigned> baseCounts,baseCounts2;
28 |   baseCounts['A'] = 5;
29 |   baseCounts['G'] = 10;
30 |   baseCounts2['A'] = 11;
31 |   baseCounts2['C'] = 17;
32 |   baseCounts += baseCounts2;
33 | 
34 |   BOOST_REQUIRE_EQUAL( baseCounts['A'], 16 );
35 |   BOOST_REQUIRE_EQUAL( baseCounts['G'], 10 );
36 |   BOOST_REQUIRE_EQUAL( baseCounts['C'], 17 );
37 | }
38 | 
39 | BOOST_AUTO_TEST_CASE( test_counting_operators_vector_plus )
40 | {
41 |   using Sequence::operator+;
42 |   std::vector< std::pair<char,unsigned> > baseCounts,baseCounts2;
43 |   baseCounts.push_back(std::make_pair('A',5u));
44 |   baseCounts.push_back(std::make_pair('G',10u));
45 |   baseCounts2.push_back(std::make_pair('A',11u));
46 |   baseCounts2.push_back(std::make_pair('C',17u));
47 | 
48 |   auto baseCounts3 = baseCounts + baseCounts2;
49 | 
50 |   std::string bases = {'A','G','C'};
51 | 
52 |   auto i = std::find_if(baseCounts3.cbegin(),
53 | 			baseCounts3.cend(),
54 | 			[](const std::pair<char,unsigned> & __p) {
55 | 			  return __p.first == 'A';
56 | 			});
57 |   BOOST_REQUIRE( i != baseCounts3.cend() );
58 |   BOOST_REQUIRE_EQUAL( i->second,16 );
59 | 
60 |   i = std::find_if(baseCounts3.cbegin(),
61 | 		   baseCounts3.cend(),
62 | 		   [](const std::pair<char,unsigned> & __p) {
63 | 		     return __p.first == 'G';
64 | 		   });
65 |   BOOST_REQUIRE( i != baseCounts3.cend() );
66 |   BOOST_REQUIRE_EQUAL( i->second,10 );
67 |   
68 |   i = std::find_if(baseCounts3.cbegin(),
69 | 		   baseCounts3.cend(),
70 | 		   [](const std::pair<char,unsigned> & __p) {
71 | 		     return __p.first == 'C';
72 | 		   });
73 |   BOOST_REQUIRE( i != baseCounts3.cend() );
74 |   BOOST_REQUIRE_EQUAL( i->second,17 );
75 | 
76 | }
77 | BOOST_AUTO_TEST_SUITE_END()
78 | 


--------------------------------------------------------------------------------
/test/FastaConstructors.cc:
--------------------------------------------------------------------------------
 1 | //!\ file FastaConstructors.cc
 2 | 
 3 | #include <Sequence/Fasta.hpp>
 4 | #include <string>
 5 | #include <boost/test/unit_test.hpp>
 6 | #include <iostream>
 7 | #include <functional>
 8 | 
 9 | struct fasta_constructors_fixture
10 | {
11 |     std::string name, seq;
12 |     fasta_constructors_fixture()
13 |         : name{ "seqname" }, seq{ "AGCGTAGACAGTAGAGTGAT" }
14 |     {
15 |     }
16 | };
17 | 
18 | BOOST_FIXTURE_TEST_SUITE(FastaConstructorsTest, fasta_constructors_fixture)
19 | 
20 | BOOST_AUTO_TEST_CASE(empty)
21 | {
22 |     Sequence::Fasta f;
23 |     BOOST_REQUIRE(f.name.empty());
24 |     BOOST_REQUIRE(f.seq.empty());
25 | }
26 | 
27 | BOOST_AUTO_TEST_CASE(string_con)
28 | {
29 |     Sequence::Fasta f = Sequence::Fasta(name, seq);
30 |     BOOST_CHECK(f.name == name);
31 |     BOOST_CHECK(f.seq == seq);
32 | }
33 | 
34 | BOOST_AUTO_TEST_CASE(copy_con)
35 | {
36 |     Sequence::Fasta f = Sequence::Fasta(name.c_str(), seq.c_str());
37 |     BOOST_CHECK(f.name == name);
38 |     BOOST_CHECK(f.seq == seq);
39 | 
40 |     Sequence::Fasta f2(f);
41 |     BOOST_REQUIRE(f == f2);
42 | }
43 | 
44 | BOOST_AUTO_TEST_CASE(move_con)
45 | {
46 |     Sequence::Fasta f = Sequence::Fasta(name.c_str(), seq.c_str());
47 |     BOOST_CHECK(f.name == name);
48 |     BOOST_CHECK(f.seq == seq);
49 | 
50 |     Sequence::Fasta f2(std::move(f));
51 |     BOOST_CHECK(f2.name == name);
52 |     BOOST_CHECK(f2.seq == seq);
53 |     BOOST_CHECK(f.length() == 0);
54 |     BOOST_CHECK(f.name.empty());
55 | }
56 | 
57 | BOOST_AUTO_TEST_CASE(move_con2)
58 | //This "should" work???
59 | {
60 |     std::string a(name), b(seq);
61 |     Sequence::Fasta f = Sequence::Fasta(std::move(a), std::move(b));
62 |     BOOST_CHECK(f.name == name);
63 |     BOOST_CHECK(f.seq == seq);
64 |     BOOST_CHECK(a.empty());
65 |     BOOST_CHECK(b.empty());
66 | }
67 | 
68 | BOOST_AUTO_TEST_CASE(move_assign)
69 | {
70 |     Sequence::Fasta f = Sequence::Fasta(name, seq);
71 |     BOOST_CHECK(f.name == name);
72 |     BOOST_CHECK(f.seq == seq);
73 | 
74 |     Sequence::Fasta f2;
75 |     f2 = std::move(f);
76 |     BOOST_CHECK(f2.name == name);
77 |     BOOST_CHECK(f2.seq == seq);
78 |     BOOST_CHECK(f.length() == 0);
79 |     BOOST_CHECK(f.name.empty());
80 | }
81 | BOOST_AUTO_TEST_SUITE_END()
82 | //EOF
83 | 


--------------------------------------------------------------------------------
/test/FastaOperations.cc:
--------------------------------------------------------------------------------
  1 | //\file FastaOperations.cc
  2 | 
  3 | #include <Sequence/Fasta.hpp>
  4 | #include <string>
  5 | #include <iostream>
  6 | #include <algorithm>
  7 | #include <numeric>
  8 | #include <boost/test/unit_test.hpp>
  9 | 
 10 | struct fasta_operations_fixture
 11 | {
 12 |     std::string name, seq;
 13 |     fasta_operations_fixture()
 14 |         : name{ "seqname" }, seq{ "AGCGTAGACAGTAGAGTGAT" }
 15 |     {
 16 |     }
 17 | };
 18 | 
 19 | BOOST_FIXTURE_TEST_SUITE(FastaOperationsTest, fasta_operations_fixture)
 20 | 
 21 | //A generic revcom routine written for this test
 22 | std::string rcom( const std::string & s )
 23 | {
 24 |   std::string rv(s);
 25 |   std::reverse(rv.begin(),rv.end());
 26 |   std::transform(rv.begin(),rv.end(),
 27 | 		 rv.begin(),
 28 | 		 [](const char & ch)
 29 | 		 {
 30 | 		   switch(ch)
 31 | 		     {
 32 | 		     case 'A':
 33 | 		       return 'T';
 34 | 		       break;
 35 | 		     case 'G':
 36 | 		       return 'C';
 37 | 		       break;
 38 | 		     case 'C':
 39 | 		       return 'G';
 40 | 		       break;
 41 | 		     case 'T':
 42 | 		       return 'A';
 43 | 		       break;
 44 | 		     }
 45 | 		   return 'N';
 46 | 		 });
 47 |   return rv;
 48 | }
 49 | 
 50 | 
 51 | BOOST_AUTO_TEST_CASE( revcom )
 52 | {
 53 |   std::string name("seqname"),seq("AGCGTAGACAGTAGAGTGAT");
 54 |   Sequence::Fasta f(name,seq);
 55 | 
 56 |   Sequence::Fasta f2 = f;
 57 |   f2.Revcom();
 58 | 
 59 |   BOOST_REQUIRE( f2.seq == rcom(seq) );
 60 | }
 61 | 
 62 | BOOST_AUTO_TEST_CASE( subseq )
 63 | {
 64 |   std::string name("seqname"),seq("AGCGTAGACAGTAGAGTGAT");
 65 |   Sequence::Fasta f(name,seq);
 66 | 
 67 |   Sequence::Fasta f3(f);
 68 |   f3.Subseq(1,3);
 69 | 
 70 |   BOOST_REQUIRE( f3.seq == "GCG" );
 71 | 
 72 |   f3.Complement();
 73 | 
 74 |   BOOST_REQUIRE( f3.seq == "CGC" );
 75 | 
 76 |   BOOST_REQUIRE( std::string(f3) == "CGC" ); //operator string()
 77 | 
 78 | }
 79 | 
 80 | 
 81 | BOOST_AUTO_TEST_CASE( gapped )
 82 | {
 83 |   Sequence::Fasta f3("seqname","GCG");
 84 | 
 85 |   BOOST_REQUIRE( !f3.IsGapped() );
 86 | 
 87 |   f3.seq += '-';
 88 | 
 89 |   BOOST_REQUIRE( f3.IsGapped() );
 90 | 
 91 |   BOOST_REQUIRE( f3.length() == 4 );
 92 | 
 93 |   BOOST_REQUIRE( f3.UngappedLength() == 3 );
 94 | 
 95 |   //Remove the gap
 96 |   f3.seq.erase( f3.seq.find('-'), 1 );
 97 | 
 98 |   BOOST_REQUIRE( f3.length() == 3 );
 99 | 
100 |   BOOST_REQUIRE( f3.UngappedLength() == 3 );
101 | }
102 | 
103 | BOOST_AUTO_TEST_CASE( cpp11access_1 )
104 | {
105 |   Sequence::Fasta f3("seqname","GCG");
106 |   for( auto & d  : f3 )
107 |     {
108 |       d = 'A';
109 |     }
110 |   BOOST_REQUIRE_EQUAL(f3.seq,"AAA");
111 | }
112 | 
113 | BOOST_AUTO_TEST_SUITE_END()
114 | //EOF
115 | 


--------------------------------------------------------------------------------
/test/Makefile.am:
--------------------------------------------------------------------------------
 1 | if BUNIT_TEST_PRESENT
 2 | 
 3 | check_PROGRAMS=libseq_unit_tests
 4 | 
 5 | TESTS=$(check_PROGRAMS)
 6 | 
 7 | AM_CXXFLAGS=-g
 8 | AM_LDFLAGS=-L../src/.libs -Wl,-rpath,../src/.libs
 9 | AM_LIBS=-lsequence
10 | 
11 | #if DEBUG
12 | #AM_CXXFLAGS+=-g
13 | #else
14 | #AM_CXXFLAGS+=-DNDEBUG
15 | #endif
16 | #
17 | #if PROFILING
18 | #PROFILE= -pg
19 | #else
20 | #PROFILE=
21 | #endif
22 | 
23 | LIBS+=$(AM_LIBS)
24 | 
25 | libseq_unit_tests_SOURCES=libseq_unit_tests.cc \
26 | FastaConstructors.cc \
27 | FastaIO.cc \
28 | FastaOperations.cc \
29 | AlignStreamTest.cc \
30 | CountingOperators.cc \
31 | PolyTableConversions.cc \
32 | PolyTableTweaking.cc \
33 | PolyTableBadBehavior.cc \
34 | PolySitesIO.cc \
35 | SimpleSNPIO.cc \
36 | PolySIMtest.cc \
37 | PolySNPtest.cc \
38 | ComparisonsTest.cc \
39 | AlignmentTest.cc \
40 | fastqIO.cc \
41 | fastqConstructors.cc \
42 | SeqConversions.cc \
43 | RedundancyCom95test.cc \
44 | alphabets.cc \
45 | polySiteVectorTest.cc \
46 | PolyTableSliceTest.cc \
47 | stateCounterTest.cc \
48 | VariantMatrixTest.cc \
49 | testAlleleCountMatrix.cc \
50 | testClassicSummstats.cc \
51 | testClassicSummstatsEmptyVariantMatrix.cc \
52 | testLD.cc \
53 | testGarudStatistics.cc \
54 | msformatdata.cc \
55 | testVariantMatrixWindows.cc
56 | 
57 | endif #if BUNIT_TEST_PRESENT
58 | 


--------------------------------------------------------------------------------
/test/PolySitesIO.cc:
--------------------------------------------------------------------------------
 1 | #include <Sequence/PolySites.hpp>
 2 | #include <boost/test/unit_test.hpp>
 3 | #include <sstream>
 4 | #include <fstream>
 5 | #include <unistd.h>
 6 | 
 7 | BOOST_AUTO_TEST_SUITE(PolySitesIOTest)
 8 | 
 9 | BOOST_AUTO_TEST_CASE( polysites_io )
10 | {
11 |   std::vector<double> pos = {1,2,3,4,5};
12 |   std::vector<std::string> data = {"AAAAA",
13 | 				   "AAGAA",
14 | 				   "CTGAA",
15 | 				   "NAACT"};
16 |   
17 |   Sequence::PolySites ps(std::move(pos),std::move(data)),ps2;
18 | 
19 |   std::ostringstream o;
20 |   o << ps << '\n';
21 |   std::istringstream in(o.str());
22 | 
23 |   BOOST_REQUIRE_NO_THROW( in >> ps2 >> std::ws );
24 | 
25 |   BOOST_REQUIRE( ps == ps2 );
26 | 
27 |   const char * fn = "psitesio.txt";
28 | 
29 |   std::ofstream of(fn);
30 |   of << ps << '\n';
31 |   of.close();
32 |   std::ifstream inf(fn);
33 |   BOOST_REQUIRE_NO_THROW(inf >> ps2 >> std::ws);
34 |   BOOST_REQUIRE( ps == ps2 );
35 |   unlink(fn);
36 | }
37 | BOOST_AUTO_TEST_SUITE_END()
38 | 


--------------------------------------------------------------------------------
/test/PolyTableSliceTest.cc:
--------------------------------------------------------------------------------
 1 | //! \file PolyTableSliceTest.cc @brief Tests for Sequence/PolyTableSlice.hpp
 2 | #include <boost/test/unit_test.hpp>
 3 | #include <Sequence/SimData.hpp>
 4 | #include <iostream>
 5 | #include <Sequence/PolyTableSlice.hpp>
 6 | #include <vector>
 7 | #include <utility>
 8 | #include <string>
 9 | 
10 | using namespace std;
11 | using namespace Sequence;
12 | 
13 | BOOST_AUTO_TEST_SUITE(PolyTableSliceTest)
14 | 
15 | BOOST_AUTO_TEST_CASE( lastwindows1 )
16 | {
17 |   vector<pair<double,string> > data;
18 |   for(double i = 0.05 ; i < 0.9 ; i += 0.01 )
19 |     data.push_back(make_pair(i,string("001000")));
20 | 
21 |   SimData d(data.begin(),data.end());
22 |   PolyTableSlice<SimData> w(d.sbegin(),d.send(),0.1,0.001,0.,1.);
23 |   unsigned nwindows = unsigned(1./0.001);
24 |   BOOST_REQUIRE_EQUAL(w.size(),nwindows);
25 | }
26 | 
27 | BOOST_AUTO_TEST_CASE( nwindows1 )
28 | {
29 |   vector<pair<double,string> > data;
30 |   for(double i = 0.05 ; i < 0.9 ; i += 0.01 )
31 |     data.push_back(make_pair(i,string("001000")));
32 |   
33 |   SimData d(data.begin(),data.end());
34 |   PolyTableSlice<SimData> w(d.sbegin(),d.send(),64);
35 |   unsigned ewindows = std::ceil(double(d.numsites())/64);
36 |   BOOST_REQUIRE_EQUAL(w.size(),std::ceil(double(d.numsites())/double(ewindows)));
37 |   for(auto i = w.cbegin();i!=w.cend();++i)
38 |     {
39 |       auto wi = w.get_slice(i);
40 |       BOOST_CHECK( wi.empty() == false );
41 |     }
42 | }
43 | 
44 | BOOST_AUTO_TEST_CASE( nwindows2 )
45 | {
46 |   //Make 10x as many SNPs
47 |   vector<pair<double,string> > data;
48 |   for(double i = 0.05 ; i < 0.9 ; i += 0.001 )
49 |     data.push_back(make_pair(i,string("001000")));
50 |   
51 |   SimData d(data.begin(),data.end());
52 |   PolyTableSlice<SimData> w(d.sbegin(),d.send(),64);
53 |   unsigned ewindows = std::ceil(double(d.numsites())/64);
54 |   BOOST_REQUIRE_EQUAL(w.size(),std::ceil(double(d.numsites())/double(ewindows)));
55 |   for(auto i = w.cbegin();i!=w.cend();++i)
56 |     {
57 |       auto wi = w.get_slice(i);
58 |       BOOST_CHECK( wi.empty() == false );
59 |     }
60 | }
61 | 
62 | 
63 | BOOST_AUTO_TEST_SUITE_END()
64 | 


--------------------------------------------------------------------------------
/test/README.md:
--------------------------------------------------------------------------------
 1 | #Unit tests for libsequence
 2 | 
 3 | ##Dependencies
 4 | 
 5 | 1. Make sure that libsequence is compiled in the parent directory
 6 | 2. The [boost](http://boost.org) unit testing library is used by these tests.  Currently, autoconf does _not_ check for this dependency.  Make sure that the library is installed
 7 | 
 8 | ###A word of caution
 9 | 
10 | I develop the library and the tests on an Ubuntu Linux machine.  The library is written in C++11 and tested primarily using GCC and secondarily using clang++ (the default compiler on current-era OS X).  On Ubuntu 14.04, I have observed that the unit tests fail to compile with clang++.  I have no tracked down if this due to the Ubuntu boost packages being compiled with GCC, without C++11 awareness, some issue with clang++ and boost's unit testing library, or some complex interaction amongst those possibilities.  However, I have confirmed that the unit testing compiles and works find on OS X Yosemite using clang++.
11 | 
12 | ##Compiling the tests
13 | 
14 | ```
15 | make check
16 | ```
17 | 
18 | ##Running the tests
19 | 
20 | ```
21 | sh runTests.sh
22 | ```
23 | 
24 | If you really want all the details, then execute this instead:
25 | 
26 | ```
27 | BOOST_TEST_LOG_LEVEL=all sh runTests.sh
28 | ```
29 | 
30 | The boost unit testing library will report any errors in any testing modules.
31 | 
32 | Note that some tests may intentionally cause errors.  When that it the case, a message stating that the error is intentional will appear on screen along with the error.
33 | 
34 | ##Notes
35 | 
36 | * The tests are statically-linked against the version of libsequence compiled in the parent directory.  This is done so that there is no confusion that the tests are testing the current code, and not some other version of the library installed on your system.
37 | * More tests are needed!


--------------------------------------------------------------------------------
/test/SeqConversions.cc:
--------------------------------------------------------------------------------
 1 | #include <Sequence/Fasta.hpp>
 2 | #include <Sequence/fastq.hpp>
 3 | #include <string>
 4 | #include <fstream>
 5 | #include <iostream>
 6 | #include <boost/test/unit_test.hpp>
 7 | BOOST_AUTO_TEST_SUITE(SeqConversionsTest)
 8 | 
 9 | BOOST_AUTO_TEST_CASE( fastq2fasta )
10 | {
11 |   std::ifstream in("data/data.fastq");
12 |   Sequence::fastq fq;
13 |   Sequence::Fasta fa;
14 |   
15 |   in >> fq >> std::ws;
16 | 
17 |   fa = fq;
18 | 
19 |   BOOST_CHECK_EQUAL( fq.name , fa.name );
20 |   BOOST_CHECK_EQUAL( fq.seq , fa.seq );
21 | }
22 | 
23 | BOOST_AUTO_TEST_CASE( fastq2fasta2 )
24 | {
25 |   std::ifstream in("data/data.fastq");
26 |   Sequence::fastq fq;
27 |   
28 |   in >> fq >> std::ws;
29 | 
30 |   Sequence::Fasta fa(fq);
31 | 
32 |   BOOST_CHECK_EQUAL( fq.name , fa.name );
33 |   BOOST_CHECK_EQUAL( fq.seq , fa.seq );
34 | }
35 | 
36 | BOOST_AUTO_TEST_CASE( fastq2fasta3 )
37 | {
38 |   std::ifstream in("data/data.fastq");
39 |   Sequence::fastq fq;
40 |   
41 |   in >> fq >> std::ws;
42 | 
43 |   Sequence::Fasta fa(std::move(fq));
44 | 
45 |   BOOST_CHECK (fq.length() == 0);
46 |   BOOST_CHECK (fq.name.empty());
47 | }
48 | 
49 | BOOST_AUTO_TEST_CASE( fasta2fastq_1 )
50 | {
51 |   Sequence::Fasta fa = {"name","ATGC"};
52 |   Sequence::fastq fq = fa;
53 | 
54 |   BOOST_CHECK_EQUAL( fq.name , fa.name );
55 |   BOOST_CHECK_EQUAL( fq.seq , fa.seq );
56 |   BOOST_CHECK( fq.quality.empty() );
57 | }
58 | 
59 | BOOST_AUTO_TEST_CASE( fasta2fastq_2 )
60 | {
61 |   Sequence::Fasta fa = {"name","ATGC"};
62 |   Sequence::fastq fq = std::move(fa);
63 | 
64 |   BOOST_CHECK( fq.name == "name" );
65 |   BOOST_CHECK( fq.seq == "ATGC" );
66 |   BOOST_CHECK( fq.quality.empty() );
67 | }
68 | 
69 | BOOST_AUTO_TEST_CASE( fasta2fastq_3 )
70 | {
71 |   Sequence::Fasta fa = {"name","ATGC"};
72 |   Sequence::fastq fq(std::move(fa));
73 | 
74 |   BOOST_CHECK( fq.name == "name" );
75 |   BOOST_CHECK( fq.seq == "ATGC" );
76 |   BOOST_CHECK( fq.quality.empty() );
77 | }
78 | BOOST_AUTO_TEST_SUITE_END()
79 | 


--------------------------------------------------------------------------------
/test/SimpleSNPIO.cc:
--------------------------------------------------------------------------------
 1 | #include <Sequence/PolySites.hpp>
 2 | #include <Sequence/SimpleSNP.hpp>
 3 | #include <boost/test/unit_test.hpp>
 4 | #include <sstream>
 5 | #include <fstream>
 6 | #include <unistd.h>
 7 | 
 8 | BOOST_AUTO_TEST_SUITE(SimpleSNPIOTest)
 9 | 
10 | BOOST_AUTO_TEST_CASE( polysites_io )
11 | {
12 |   std::vector<double> pos = {1,2,3,4,5};
13 |   std::vector<std::string> data = {"AAAAA",
14 | 				   "AAGAA",
15 | 				   "CTGAA",
16 | 				   "NAACT"};
17 |   
18 |   Sequence::PolySites temp(std::move(pos),std::move(data));
19 | 
20 |   Sequence::SimpleSNP ps,ps2,ps3;
21 |   ps.assign(temp.sbegin(),temp.send());
22 |   ps3.assign(temp.sbegin(),temp.send());
23 | 
24 |   std::ostringstream o;
25 |   o << ps << '\n';
26 |   std::istringstream in(o.str());
27 | 
28 |   BOOST_REQUIRE_NO_THROW( in >> ps2 >> std::ws );
29 | 
30 |   BOOST_REQUIRE( ps == ps2 );
31 | 
32 |   const char * fn = "simplesnpio.txt";
33 | 
34 |   std::ofstream of(fn);
35 |   of << ps << '\n';
36 |   of.close();
37 |   std::ifstream inf(fn);
38 |   BOOST_REQUIRE_NO_THROW(inf >> ps2 >> std::ws);
39 |   BOOST_REQUIRE( ps == ps2 );
40 |   inf.close();
41 |   unlink(fn);
42 | 
43 |   const char * fn2 = "simplesnpio2.txt";
44 |   //Now, change the outgroup
45 |   ps.set_outgroup(true);
46 |   of.open(fn2);
47 |   of << ps << '\n';
48 |   of.close();
49 |   inf.open(fn2);
50 |   BOOST_REQUIRE_NO_THROW(inf >> ps2 >> std::ws);
51 |   inf.close();
52 |   BOOST_REQUIRE( ps == ps2 );
53 |   BOOST_REQUIRE( ps == ps3 );
54 | 
55 |   unlink(fn2);
56 | }
57 | BOOST_AUTO_TEST_SUITE_END()
58 | 


--------------------------------------------------------------------------------
/test/VariantMatrixFixture.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef LIBSEQUENCE_TESTS_VARIANTMATRIXFIXTURE_HPP
 2 | #define LIBSEQUENCE_TESTS_VARIANTMATRIXFIXTURE_HPP
 3 | 
 4 | #include <Sequence/VariantMatrix.hpp>
 5 | #include <Sequence/AlleleCountMatrix.hpp>
 6 | 
 7 | struct invariantdataset
 8 | {
 9 |     using data_type = std::vector<std::int8_t>;
10 |     using positions_type = std::vector<double>;
11 |     Sequence::VariantMatrix empty, invariant;
12 |     Sequence::AlleleCountMatrix empty_counts, invariant_counts;
13 |     invariantdataset()
14 |         : empty{ data_type{}, positions_type{} },
15 |           invariant{ data_type{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16 |                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
17 |                      positions_type{ 0.1, 0.2, 0.3 } },
18 |           empty_counts(empty), invariant_counts(invariant)
19 |     {
20 |     }
21 | };
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/test/alphabets.cc:
--------------------------------------------------------------------------------
 1 | /*! \file alphabets.cc @brief Unit tests for Sequence/SeqAlphabets.hpp */
 2 | 
 3 | #include <Sequence/SeqAlphabets.hpp>
 4 | #include <Sequence/Fasta.hpp>
 5 | #include <boost/test/unit_test.hpp>
 6 | #include <algorithm>
 7 | #include <iterator>
 8 | BOOST_AUTO_TEST_SUITE(AlphabetTest)
 9 | 
10 | BOOST_AUTO_TEST_CASE( check_dna_alphabet )
11 | {
12 |   for ( auto c : {'A','G','C','T'} )
13 |     {
14 |       BOOST_REQUIRE( std::distance(Sequence::dna_alphabet.begin(),
15 | 				   std::find( Sequence::dna_alphabet.begin(),
16 | 					      Sequence::dna_alphabet.end(), c ) ) < 4 );
17 |     }
18 | }
19 | 
20 | BOOST_AUTO_TEST_CASE( check_isDNA_1 )
21 | {
22 |   for (auto c : Sequence::dna_alphabet )
23 |     {
24 |       BOOST_REQUIRE( Sequence::isDNA(c) );
25 |     }
26 | }
27 | 
28 | BOOST_AUTO_TEST_CASE( check_isDNA_2 )
29 | {
30 |   Sequence::Fasta f = { "name","ATGCZAGC" };  //Z is a non-DNA character
31 |   auto itr = std::find_if( f.begin(),f.end(),
32 | 			   [](const char & __ch) {
33 | 			     return !Sequence::isDNA(__ch);
34 | 			   } );
35 |   BOOST_REQUIRE_EQUAL( std::distance(f.begin(),itr),4 );
36 | 
37 |   f.seq.erase( std::remove_if(f.begin(),
38 | 				 f.end(),
39 | 				 [](const char & __ch) {
40 | 				   return !Sequence::isDNA(__ch);
41 | 				 }), f.seq.end() );
42 |   BOOST_REQUIRE_EQUAL(f.seq,"ATGCAGC");
43 | }
44 | 
45 | //Test of dna_poly_alphabet
46 | BOOST_AUTO_TEST_CASE( dna_poly_alphabet_1 )
47 | {
48 |   BOOST_REQUIRE( std::find( Sequence::dna_poly_alphabet.begin(),
49 | 			    Sequence::dna_poly_alphabet.end(),'\0' ) != Sequence::dna_poly_alphabet.end() );
50 | }
51 | 
52 | BOOST_AUTO_TEST_CASE( dna_poly_alphabet_2 )
53 | {
54 |   BOOST_CHECK_EQUAL( Sequence::POLYEOS, 8 );
55 | }
56 | 
57 | BOOST_AUTO_TEST_CASE( dna_poly_alphabet_3 )
58 | {
59 |   for ( auto c : {'A','C','G','T','N','0','1','-'} )
60 |     {
61 |       BOOST_CHECK( std::distance( Sequence::dna_poly_alphabet.begin(),
62 | 				  std::find(Sequence::dna_poly_alphabet.begin(),
63 | 					    Sequence::dna_poly_alphabet.end(),c) ) < Sequence::POLYEOS );
64 |     }
65 | }
66 | 
67 | BOOST_AUTO_TEST_CASE( dna_poly_alphabet_4 )
68 | {
69 |   for ( auto c : {'a','c','g','t','n','W','K'} )
70 |     {
71 |       BOOST_CHECK( std::distance( Sequence::dna_poly_alphabet.begin(),
72 | 				  std::find(Sequence::dna_poly_alphabet.begin(),
73 | 					    Sequence::dna_poly_alphabet.end(),c) ) >= Sequence::POLYEOS );
74 |     }
75 | }
76 | 
77 | BOOST_AUTO_TEST_CASE( dna_poly_alphabet_5 )
78 | {
79 |   for ( auto c : {'a','c','g','t','n','W','K'} )
80 |     {
81 |       BOOST_CHECK( std::distance( Sequence::dna_poly_alphabet.begin(),
82 | 				  std::find(Sequence::dna_poly_alphabet.begin(),
83 | 					    Sequence::dna_poly_alphabet.end(),c) ) >= Sequence::NOTPOLYCHAR );
84 |     }
85 | }
86 | BOOST_AUTO_TEST_SUITE_END()
87 | 


--------------------------------------------------------------------------------
/test/data/README.md:
--------------------------------------------------------------------------------
1 | #Data files for unit tests
2 | 
3 | * phylip_input.txt - copied from http://evolution.genetics.washington.edu/phylip/doc/main.html
4 | * single_ms.txt - output from Hudson's "ms" program
5 | * CG15644-Z.aln - Variation data from a Drosophila Zimbabwe population sample.  In clustalw format


--------------------------------------------------------------------------------
/test/data/phylip_input.txt:
--------------------------------------------------------------------------------
 1 |     6   39
 2 | Archaeopt CGATGCTTAC CGCCGATGCT
 3 | HesperorniCGTTACTCGT TGTCGTTACT
 4 | BaluchitheTAATGTTAAT TGTTAATGTT
 5 | B. virginiTAATGTTCGT TGTTAATGTT
 6 | BrontosaurCAAAACCCAT CATCAAAACC
 7 | B.subtilisGGCAGCCAAT CACGGCAGCC
 8 | 
 9 | TACCGCCGAT GCTTACCGC
10 | CGTTGTCGTT ACTCGTTGT
11 | AATTGTTAAT GTTAATTGT
12 | CGTTGTTAAT GTTCGTTGT
13 | CATCATCAAA ACCCATCAT
14 | AATCACGGCA GCCAATCAC
15 | 


--------------------------------------------------------------------------------
/test/fastqConstructors.cc:
--------------------------------------------------------------------------------
 1 | //\file fastqConstructors.cc
 2 | 
 3 | #include <Sequence/fastq.hpp>
 4 | #include <fstream>
 5 | #include <boost/test/unit_test.hpp>
 6 | #include <unistd.h>
 7 | #include <iterator>
 8 | #include <iostream>
 9 | #include <stdexcept>
10 | 
11 | BOOST_AUTO_TEST_SUITE(FastqConstructorsTest)
12 | 
13 | BOOST_AUTO_TEST_CASE( move_construction )
14 | {
15 |   std::ifstream in("data/data.fastq");
16 |   if (!in)
17 |     {
18 |       std::cerr << "Error, couldn't find input file!\n";
19 |       exit(1);
20 |     }
21 |   Sequence::fastq f;
22 | 
23 |   in >> f >> std::ws;
24 | 
25 |   Sequence::fastq f2(std::move(f));
26 | 
27 |   BOOST_CHECK_EQUAL(f.length(),0);
28 | }
29 | BOOST_AUTO_TEST_SUITE_END()
30 | 


--------------------------------------------------------------------------------
/test/fastqIO.cc:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <Sequence/fastq.hpp>
 3 | #include <fstream>
 4 | #include <boost/test/unit_test.hpp>
 5 | #include <unistd.h>
 6 | #include <iterator>
 7 | #include <iostream>
 8 | #include <stdexcept>
 9 | 
10 | BOOST_AUTO_TEST_SUITE(FASTQIOTest)
11 | 
12 | BOOST_AUTO_TEST_CASE( input_test )
13 | {
14 |   std::ifstream in("data/data.fastq");
15 |   if (!in)
16 |     {
17 |       std::cerr << "Error, couldn't find input file!\n";
18 |       exit(1);
19 |     }
20 |   Sequence::fastq f;
21 | 
22 |   unsigned count = 0;
23 |   BOOST_REQUIRE_NO_THROW 
24 |     (
25 |      while(!in.eof())
26 |        {
27 | 	 in >> f >> std::ws;
28 | 	 ++count;
29 |        }
30 |      );
31 |   BOOST_CHECK_EQUAL(count,50);
32 | }
33 | 
34 | BOOST_AUTO_TEST_CASE( input_test2 )
35 | {
36 |   std::ifstream in("data/data.fastq");
37 |   if (!in)
38 |     {
39 |       std::cerr << "Error, couldn't find input file!\n";
40 |       exit(1);
41 |     }
42 |   Sequence::fastq f;
43 | 
44 |   unsigned count = 0;
45 |   BOOST_REQUIRE_NO_THROW 
46 |     (
47 |      unsigned count = 0;
48 |      std::istream_iterator<Sequence::fastq> i(in);
49 |      for( ; i != std::istream_iterator<Sequence::fastq>() ; ++i )
50 |        {
51 | 	 ++count;  
52 |        }
53 |      BOOST_CHECK_EQUAL(count,50);
54 |      in.close();
55 |      ); 
56 | }
57 | 
58 | BOOST_AUTO_TEST_CASE( output_test )
59 | {
60 |   BOOST_REQUIRE_NO_THROW 
61 |     (
62 |      std::ifstream in("data/data.fastq");
63 |      if (!in)
64 |        {
65 | 	 std::cerr << "Error, couldn't find input file!\n";
66 | 	 exit(1);
67 |        }
68 | 
69 |      Sequence::fastq f;
70 |      
71 |      std::vector<Sequence::fastq> vf;
72 |      std::ofstream out("fastqIOtest.txt");
73 |      unsigned count = 0;
74 |      while(!in.eof())
75 |        {
76 | 	 in >> f >> std::ws;
77 | 	 f.repname(false);
78 | 	 vf.push_back(f);
79 | 	 out << f << '\n';
80 | 	 ++count;
81 |        }
82 |      BOOST_CHECK_EQUAL(count,50);
83 |      out.close();
84 |      in.close();
85 |      in.open("fastqIOtest.txt");
86 |      count = 0;
87 |      while(!in.eof())
88 |        {
89 | 	 in >> f >> std::ws;
90 | 	 BOOST_CHECK_EQUAL(f,vf[count]);
91 | 	 ++count;
92 |        }
93 |      unlink("fastqIOtest.txt");
94 |      in.close(); 
95 |      );
96 | }
97 | BOOST_AUTO_TEST_SUITE_END()
98 | 


--------------------------------------------------------------------------------
/test/libseq_unit_tests.cc:
--------------------------------------------------------------------------------
1 | #define BOOST_TEST_MODULE libsequence_unit_tests
2 | #include <boost/test/included/unit_test.hpp>
3 | 
4 | 


--------------------------------------------------------------------------------
/test/msformatdata.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef LIBSEQUENCE_TEST_MSFORMAT_HPP
 2 | #define LIBSEQUENCE_TEST_MSFORMAT_HPP
 3 | 
 4 | #include <string>
 5 | 
 6 | std::string get_msformat_data();
 7 | std::string get_msformat_stream();
 8 | 
 9 | #endif
10 | 


--------------------------------------------------------------------------------
/test/msprime_data_fixture.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef LIBSEQUENCE_TEST_MSPRIME_DATA_FIXTURE_HPP
 2 | #define LIBSEQUENCE_TEST_MSPRIME_DATA_FIXTURE_HPP
 3 | 
 4 | #include <sstream>
 5 | #include "msformatdata.hpp"
 6 | #include <Sequence/VariantMatrix.hpp>
 7 | #include <Sequence/AlleleCountMatrix.hpp>
 8 | #include <Sequence/variant_matrix/msformat.hpp>
 9 | 
10 | struct vmatrix_from_msprime
11 | {
12 |     Sequence::VariantMatrix m;
13 |     Sequence::AlleleCountMatrix c;
14 | 
15 |     static Sequence::VariantMatrix
16 |     read()
17 |     {
18 |         std::istringstream in(get_msformat_data());
19 |         return Sequence::from_msformat(in);
20 |     }
21 | 
22 |     vmatrix_from_msprime() : m(read()), c(m) {}
23 | };
24 | 
25 | struct msprime_stream
26 | {
27 |     std::istringstream in;
28 |     msprime_stream() : in(get_msformat_stream()) {}
29 | };
30 | 
31 | #endif
32 | 


--------------------------------------------------------------------------------
/test/polySiteVectorTest.cc:
--------------------------------------------------------------------------------
 1 | //! \file polySiteVectorTest.cc @brief Unit tests for Sequence::polySiteVector
 2 | #include <Sequence/polySiteVector.hpp>
 3 | #include <Sequence/PolySites.hpp>
 4 | #include <Sequence/SeqAlphabets.hpp>
 5 | #include <boost/test/unit_test.hpp>
 6 | #include <cstdio>
 7 | #include <cstdlib>
 8 | #include <cctype>
 9 | #include <iterator>
10 | #include <iostream>
11 | #include <algorithm>
12 | #include <functional>
13 | 
14 | using psite = Sequence::polymorphicSite;
15 | using Ptable = Sequence::polySiteVector;
16 | 
17 | BOOST_AUTO_TEST_SUITE(PolySiteVectorTest)
18 | 
19 | BOOST_AUTO_TEST_CASE( ptable_remove_1 )
20 | {
21 | 
22 |   Ptable t = { psite(1.,"AAGC"),
23 | 	       psite(2.,"ACZA") }; //site 2 has a non-DNA character
24 |   
25 |   BOOST_CHECK_EQUAL( t.size(), 2 );
26 |   
27 |   t.erase( std::remove_if( t.begin(),
28 | 			t.end(),
29 | 			[]( const psite & __p ) {
30 | 			     return std::find_if(__p.second.begin(),
31 | 						 __p.second.end(),
32 | 						 Sequence::invalidPolyChar())
33 | 			       != __p.second.end();
34 | 			   } ),
35 | 	   t.end() );
36 |   BOOST_CHECK_EQUAL( t.size(), 1 );
37 | }
38 | 
39 | BOOST_AUTO_TEST_CASE( ptable_make_from_polytable )
40 | {
41 |  using psite = Sequence::polymorphicSite;
42 |  Ptable t = { psite(1.,"AAGC"),
43 | 	      psite(2.,"ACAA") };
44 |  
45 |  Sequence::PolySites ps(t.begin(),t.end());
46 | 
47 |  BOOST_REQUIRE( std::distance(t.begin(),t.end()) ==
48 | 		std::distance(ps.sbegin(),ps.send()) );
49 |  
50 |   auto t_i = t.begin();
51 |   auto ps_i = ps.sbegin();
52 | 
53 |   while( t_i < t.end() )
54 |     {
55 |       BOOST_CHECK_EQUAL(t_i->first,ps_i->first);
56 |       BOOST_CHECK_EQUAL(t_i->second,ps_i->second);
57 |       ++t_i;
58 |       ++ps_i;
59 |     }
60 | 
61 |   Ptable t2(Sequence::make_polySiteVector(ps));
62 | 
63 |   BOOST_REQUIRE( t == t2 );
64 | }
65 | BOOST_AUTO_TEST_SUITE_END()
66 | 


--------------------------------------------------------------------------------
/test/runTests.sh:
--------------------------------------------------------------------------------
1 | #!sh
2 | 
3 | for i in $(find . -perm +111 -type f)
4 | do
5 | ./$i
6 | done
7 | 


--------------------------------------------------------------------------------
/test/stateCounterTest.cc:
--------------------------------------------------------------------------------
 1 | #include <Sequence/stateCounter.hpp>
 2 | #include <string>
 3 | #include <boost/test/unit_test.hpp>
 4 | #include <iostream>
 5 | #include <algorithm>
 6 | #include <functional>
 7 | 
 8 | using namespace std;
 9 | using namespace Sequence;
10 | BOOST_AUTO_TEST_SUITE(stateCounterTest)
11 | 
12 | BOOST_AUTO_TEST_CASE( test1 )
13 | {
14 |   string x("AGCTN-");
15 |   auto y = for_each(begin(x),end(x),
16 | 		    stateCounter());
17 |   BOOST_CHECK_EQUAL(y.a,1);
18 |   BOOST_CHECK_EQUAL(y.g,1);
19 |   BOOST_CHECK_EQUAL(y.c,1);
20 |   BOOST_CHECK_EQUAL(y.t,1);
21 |   BOOST_CHECK_EQUAL(y.n,1);
22 |   BOOST_CHECK_EQUAL(y.gap,1);
23 |   BOOST_CHECK_EQUAL(y.ndna,0);
24 | }
25 | BOOST_AUTO_TEST_SUITE_END()
26 | 


--------------------------------------------------------------------------------
/test/testAlleleCountMatrix.cc:
--------------------------------------------------------------------------------
 1 | //! \file testAlleleCountMatrix.cc @brief Tests for Sequence/VariantMatrix.hpp
 2 | #include "msprime_data_fixture.hpp"
 3 | #include <Sequence/AlleleCountMatrix.hpp>
 4 | #include <Sequence/variant_matrix/windows.hpp>
 5 | #include <boost/test/unit_test.hpp>
 6 | #include <algorithm>
 7 | #include <numeric> //for std::iota
 8 | #include <iterator>
 9 | 
10 | BOOST_FIXTURE_TEST_SUITE(test_allele_count_matrix, vmatrix_from_msprime)
11 | 
12 | BOOST_AUTO_TEST_CASE(test_max_allele_exception)
13 | {
14 |     //Change some data in m so that m[i] > m.max_allele
15 |     m.data()[0] = 5;
16 | 
17 |     BOOST_REQUIRE_THROW(Sequence::AlleleCountMatrix ac(m), std::runtime_error);
18 | }
19 | 
20 | BOOST_AUTO_TEST_CASE(counts_from_windows)
21 | {
22 |     for (std::size_t i = 0; i < m.nsites(); ++i)
23 |         {
24 |             auto w = Sequence::make_window(m, m.position(i), m.position(i));
25 |             BOOST_REQUIRE_NO_THROW(Sequence::AlleleCountMatrix ac(w));
26 |         }
27 | }
28 | 
29 | BOOST_AUTO_TEST_SUITE_END()
30 | 
31 | 


--------------------------------------------------------------------------------
/test/testLD.cc:
--------------------------------------------------------------------------------
 1 | //! \file testLD.cc @brief unit tests for LD-related calculations
 2 | 
 3 | #include <cmath>
 4 | #include <algorithm>
 5 | #include <vector>
 6 | #include <iostream>
 7 | #include <Sequence/VariantMatrix.hpp>
 8 | #include <Sequence/VariantMatrixViews.hpp>
 9 | #include <Sequence/summstats/ld.hpp>
10 | #include <boost/test/unit_test.hpp>
11 | #include "msprime_data_fixture.hpp"
12 | 
13 | BOOST_FIXTURE_TEST_SUITE(test_LD, vmatrix_from_msprime)
14 | 
15 | BOOST_AUTO_TEST_CASE(test_two_locus_haplotype_counts)
16 | {
17 |     std::vector<int> results;
18 |     for (std::size_t i = 0; i < m.nsites() - 1; ++i)
19 |         {
20 |             for (std::size_t j = i + 1; j < m.nsites(); ++j)
21 |                 {
22 |                     std::vector<std::pair<std::int8_t, std::int8_t>> haps;
23 |                     auto hc
24 |                         = Sequence::two_locus_haplotype_counts(m, i, j, true);
25 |                     auto ri = Sequence::get_ConstRowView(m, i);
26 |                     auto rj = Sequence::get_ConstRowView(m, j);
27 |                     for (std::size_t k = 0; k < ri.size(); ++k)
28 |                         {
29 |                             haps.emplace_back(ri[k], rj[k]);
30 |                         }
31 |                     std::sort(haps.begin(), haps.end());
32 |                     auto end_of_unique_haps
33 |                         = std::unique(haps.begin(), haps.end());
34 |                     BOOST_REQUIRE_EQUAL(
35 |                         hc.size(), static_cast<std::size_t>(std::distance(
36 |                                        haps.begin(), end_of_unique_haps)));
37 |                 }
38 |         }
39 | }
40 | 
41 | BOOST_AUTO_TEST_SUITE_END()
42 | 


--------------------------------------------------------------------------------
/test/testVariantMatrixWindows.cc:
--------------------------------------------------------------------------------
 1 | #include <Sequence/VariantMatrix.hpp>
 2 | #include <Sequence/VariantMatrixViews.hpp>
 3 | #include <Sequence/variant_matrix/windows.hpp>
 4 | #include <Sequence/variant_matrix/msformat.hpp>
 5 | #include <boost/test/unit_test.hpp>
 6 | #include <algorithm>
 7 | #include <sstream>
 8 | #include <iostream>
 9 | #include "msformatdata.hpp"
10 | 
11 | BOOST_AUTO_TEST_SUITE(testVariantMatrixWindows)
12 | 
13 | BOOST_AUTO_TEST_CASE(test_windows)
14 | {
15 |     std::istringstream i(get_msformat_data());
16 |     auto vm = Sequence::from_msformat(i);
17 |     for (double i = 0.0; i < 1.0 - 1e-4; i += 0.1)
18 |         {
19 |             auto w = Sequence::make_window(vm, i, i + 0.1);
20 |             auto pb = std::lower_bound(vm.pbegin(), vm.pend(), i);
21 |             std::size_t offset = pb - vm.pbegin();
22 |             for (std::size_t site = 0; site < w.nsites(); ++site)
23 |                 {
24 |                     auto window_site = Sequence::get_ConstRowView(w, site);
25 |                     auto matrix_site
26 |                         = Sequence::get_ConstRowView(vm, offset + site);
27 |                     auto m
28 |                         = std::mismatch(window_site.begin(), window_site.end(),
29 |                                         matrix_site.begin());
30 |                     BOOST_REQUIRE_EQUAL(m.first == window_site.end(), true);
31 |                 }
32 |         }
33 | }
34 | 
35 | BOOST_AUTO_TEST_CASE(test_slices)
36 | {
37 |     std::istringstream i(get_msformat_data());
38 |     auto vm = Sequence::from_msformat(i);
39 |     std::size_t from = 53, to = 70;
40 |     for (double i = 0.0; i < 1.0 - 1e-4; i += 0.1)
41 |         {
42 |             auto w = Sequence::make_slice(vm, i, i + 0.1, from, to);
43 |             auto pb = std::lower_bound(vm.pbegin(), vm.pend(), i);
44 |             std::size_t offset = pb - vm.pbegin();
45 |             for (std::size_t site = 0; site < w.nsites(); ++site)
46 |                 {
47 |                     auto window_site = Sequence::get_ConstRowView(w, site);
48 |                     auto matrix_site
49 |                         = Sequence::get_ConstRowView(vm, offset + site);
50 |                     auto m = std::mismatch(window_site.begin(),
51 |                                            window_site.end(),
52 |                                            matrix_site.begin() + from);
53 |                     BOOST_REQUIRE_EQUAL(m.first == window_site.end(),
54 |                                         true);
55 |                 }
56 |         }
57 | }
58 | 
59 | BOOST_AUTO_TEST_SUITE_END()
60 | 


--------------------------------------------------------------------------------