├── .circleci └── config.yml ├── .gitignore ├── .travis.yml ├── AUTHORS ├── COPYING ├── ChangeLog ├── INSTALL ├── LICENSE ├── Makefile.am ├── Makefile.in ├── README.md ├── REVISION_HISTORY.md ├── Sequence ├── AlignStream.hpp ├── Alignment.hpp ├── AlleleCountMatrix.hpp ├── Clustalw.hpp ├── Coalescent │ ├── Coalesce.hpp │ ├── Coalescent.hpp │ ├── DemographicModels.hpp │ ├── FragmentsRescaling.hpp │ ├── Initialize.hpp │ ├── Makefile.am │ ├── Makefile.in │ ├── Mutation.hpp │ ├── NeutralSample.hpp │ ├── Recombination.hpp │ ├── SimTypes.hpp │ ├── Trajectories.hpp │ ├── TreeOperations.hpp │ └── bits │ │ ├── Coalesce.tcc │ │ ├── DemographicModels.tcc │ │ ├── Makefile.am │ │ ├── Makefile.in │ │ ├── Mutation.tcc │ │ ├── Recombination.tcc │ │ └── Trajectories.tcc ├── CodonTable.hpp ├── Comeron95.hpp ├── Comparisons.hpp ├── ComplementBase.hpp ├── CountingOperators.hpp ├── FST.hpp ├── Fasta.hpp ├── Grantham.hpp ├── GranthamWeights.hpp ├── HKA.hpp ├── Hudson2001.hpp ├── Kimura80.hpp ├── Makefile.am ├── Makefile.in ├── NonOwningCapsules.hpp ├── PathwayHelper.hpp ├── PolyFunctional.hpp ├── PolySIM.hpp ├── PolySNP.hpp ├── PolySNPimpl.hpp ├── PolySites.hpp ├── PolyTable.hpp ├── PolyTableFunctions.hpp ├── PolyTableSlice.hpp ├── Recombination.hpp ├── RedundancyCom95.hpp ├── Seq.hpp ├── SeqAlphabets.hpp ├── SeqConstants.hpp ├── SeqEnums.hpp ├── SeqFunctors.hpp ├── SeqProperties.hpp ├── SeqRegexes.hpp ├── SeqUtilities.hpp ├── SimData.hpp ├── SimParams.hpp ├── SimpleSNP.hpp ├── SingleSub.hpp ├── Sites.hpp ├── StateCounts.hpp ├── SummStatsDeprecated.hpp ├── SummStatsDeprecated │ ├── Garud.hpp │ ├── Makefile.am │ ├── Makefile.in │ ├── Snn.hpp │ ├── lHaf.hpp │ └── nSL.hpp ├── ThreeSubs.hpp ├── Translate.hpp ├── Translate2.hpp ├── TwoSubs.hpp ├── Unweighted.hpp ├── VariantMatrix.hpp ├── VariantMatrixCapsule.hpp ├── VariantMatrixViews.hpp ├── VectorCapsules.hpp ├── WeightingSchemes.hpp ├── bamreader.hpp ├── bamrecord.hpp ├── bits │ ├── AlignStream.tcc │ ├── Alignment.tcc │ ├── Clustalw.tcc │ ├── CountingOperators.tcc │ ├── Makefile.am │ ├── Makefile.in │ ├── PolySites.tcc │ ├── PolyTable.tcc │ ├── PolyTableFunctions.tcc │ ├── PolyTableSlice.tcc │ ├── Snn.tcc │ ├── col_view_iterator.hpp │ ├── descriptiveStats.tcc │ ├── phylipData.tcc │ └── variant_matrix_views_internal.hpp ├── descriptiveStats.hpp ├── fastq.hpp ├── phylipData.hpp ├── polySiteVector.hpp ├── samflag.hpp ├── samfunctions.hpp ├── samrecord.hpp ├── shortestPath.hpp ├── stateCounter.hpp ├── summstats.hpp ├── summstats │ ├── Makefile.am │ ├── Makefile.in │ ├── algorithm.hpp │ ├── allele_counts.hpp │ ├── auxillary.hpp │ ├── classics.hpp │ ├── garud.hpp │ ├── generic.hpp │ ├── ld.hpp │ ├── lhaf.hpp │ ├── nSLiHS.hpp │ ├── nsl.hpp │ ├── nslx.hpp │ ├── nvariablesites.hpp │ ├── thetah.hpp │ ├── thetal.hpp │ ├── thetapi.hpp │ ├── thetaw.hpp │ └── util.hpp ├── typedefs.hpp └── variant_matrix │ ├── Makefile.am │ ├── Makefile.in │ ├── filtering.hpp │ ├── msformat.hpp │ └── windows.hpp ├── aclocal.m4 ├── compile ├── config-h.in.in ├── config.guess ├── config.h.in ├── config.sub ├── configure ├── configure.ac ├── depcomp ├── doc ├── Makefile ├── images │ ├── 2subs │ ├── 2subs.jpg │ ├── 2subs.pdf │ ├── 3subs │ ├── 3subs.jpg │ └── 3subs.pdf ├── libsequence.bib ├── libsequence.doxygen.in └── md │ └── tutorial.md ├── examples ├── Makefile.am ├── Makefile.in ├── Makefile.old ├── PolyTableIterators.cc ├── baseComp.cc ├── codons.cc ├── correlations.cc ├── int_handler.cc ├── int_handler.hpp ├── mean_nSLx.cc ├── ms_to_VariantMatrix.cc ├── msstats.cc ├── nSL_from_ms.cc ├── nSL_vs_nSLx.cc ├── polySiteVector_test.cc ├── slidingWindow.cc ├── slidingWindow2.cc ├── test_SimDataIO.cc ├── translateTest.cc ├── ufs.cc └── valid_dna.cc ├── index.md ├── init_autotools.sh ├── install-sh ├── ltmain.sh ├── m4 ├── ax_cxx_compile_stdxx_11.m4 ├── libtool.m4 ├── ltoptions.m4 ├── ltsugar.m4 ├── ltversion.m4 └── lt~obsolete.m4 ├── missing ├── pandoc.css ├── src ├── ChangeLog ├── Coalescent │ ├── CoalescentCoalesce.cc │ ├── CoalescentFragmentsRescaling.cc │ ├── CoalescentInitialize.cc │ ├── CoalescentMutation.cc │ ├── CoalescentRecombination.cc │ ├── CoalescentSimTypes.cc │ └── CoalescentTreeOperations.cc ├── CodonTable.cc ├── Comeron95.cc ├── Comparisons.cc ├── ComplementBase.cc ├── Grantham.cc ├── GranthamWeights.cc ├── Kimura80.cc ├── Makefile.am ├── Makefile.in ├── PathwayHelper.cc ├── PolySites.cc ├── PolyTable.cc ├── PolyTableFunctions.cc ├── PolyTableManip.cc ├── RedundancyCom95.cc ├── Seq │ ├── Fasta.cc │ ├── Seq.cc │ └── fastq.cc ├── SeqAlphabets.cc ├── SeqConstants.cc ├── SimData.cc ├── SimParams.cc ├── SimpleSNP.cc ├── SingleSub.cc ├── Sites.cc ├── Specializations.cc ├── ThreeSubs.cc ├── Translate.cc ├── TwoSubs.cc ├── Unweighted.cc ├── hts │ ├── bamreader.cc │ ├── bamrecord.cc │ ├── samflag.cc │ ├── samfunctions.cc │ └── samrecord.cc ├── libsequenceConfig.cc ├── polySiteVector.cc ├── shortestPath.cc ├── stateCounter.cc ├── summstats │ ├── algorithm.hpp │ ├── allele_counts.cc │ ├── auxillary.cc │ ├── faywuh.cc │ ├── garud.cc │ ├── generic.cc │ ├── haplotype_statistics.cc │ ├── hprime.cc │ ├── hprime_faywuh_aggregator.hpp │ ├── ld.cc │ ├── lhaf.cc │ ├── nsl.cc │ ├── nsl_common.hpp │ ├── nslx.cc │ ├── nvariablesites.cc │ ├── rmin.cc │ ├── tajd.cc │ ├── thetah_thetal.cc │ ├── thetapi.cc │ └── thetaw.cc ├── summstats_deprecated │ ├── FST.cc │ ├── Garud.cc │ ├── HKA.cc │ ├── PolySIM.cc │ ├── PolySNP.cc │ ├── Recombination.cc │ ├── Snn.cc │ ├── SummStats.cc │ ├── lHaf.cc │ └── nSL.cc └── variant_matrix │ ├── AlleleCountMatrix.cc │ ├── StateCounts.cc │ ├── VariantMatrix.cc │ ├── VariantMatrixViews.cc │ ├── capsule.cc │ ├── filtering.cc │ ├── nonowningcapsules.cc │ └── windows.cc ├── test-driver └── test ├── AlignStreamTest.cc ├── AlignmentTest.cc ├── ComparisonsTest.cc ├── CountingOperators.cc ├── FastaConstructors.cc ├── FastaExplicitIO.cc ├── FastaIO.cc ├── FastaOperations.cc ├── Makefile.am ├── Makefile.in ├── PolySIMtest.cc ├── PolySNPtest.cc ├── PolySitesIO.cc ├── PolyTableBadBehavior.cc ├── PolyTableConversions.cc ├── PolyTableSliceTest.cc ├── PolyTableTweaking.cc ├── README.md ├── RedundancyCom95test.cc ├── Seq8test.cc ├── SeqConversions.cc ├── SimpleSNPIO.cc ├── VariantMatrixFixture.hpp ├── VariantMatrixTest.cc ├── alphabets.cc ├── data ├── CG15644-Z.aln ├── README.md ├── data.fastq ├── phylip_input.txt └── single_ms.txt ├── fastqConstructors.cc ├── fastqIO.cc ├── libseq_unit_tests.cc ├── msformatdata.cc ├── msformatdata.hpp ├── msprime_data_fixture.hpp ├── polySiteVectorTest.cc ├── runTests.sh ├── stateCounterTest.cc ├── testAlleleCountMatrix.cc ├── testClassicSummstats.cc ├── testClassicSummstatsEmptyVariantMatrix.cc ├── testGarudStatistics.cc ├── testLD.cc └── testVariantMatrixWindows.cc /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | jobs: 3 | build: 4 | docker: 5 | - image: circleci/python:3.6-stretch 6 | working_directory: /home/circleci/libsequence 7 | steps: 8 | - checkout 9 | - run: sudo chown -R circleci:circleci * 10 | - restore_cache: 11 | key: libsequence-{{ .Branch }} 12 | - run: 13 | name: Checkout submodules 14 | command: | 15 | git submodule update --init --recursive 16 | # Write out the status for debugging purposes. Are we checked out at tags? 17 | git submodule status --recursive 18 | - run: 19 | name: Install dependencies and set path 20 | command: | 21 | sudo apt-get update 22 | sudo apt-get install libboost-*dev 23 | # way to set path persistently https://circleci.com/docs/2.0/env-vars/#setting-path 24 | echo 'export PATH=/home/circleci/.local/bin:$PATH' >> $BASH_ENV 25 | - save_cache: 26 | key: libsequence-{{ .Branch }} 27 | paths: 28 | - "/home/circleci/.local" 29 | 30 | - run: 31 | name: Compile package 32 | command: | 33 | ./configure 34 | make 35 | 36 | - run: 37 | name: Run test suite 38 | command: | 39 | make check 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.lo 3 | *~ 4 | Makefile 5 | *.deps 6 | *.libs 7 | *.cache 8 | config.h 9 | *.log 10 | *.status 11 | libtool 12 | *.dirstamp 13 | *.la 14 | *.trs 15 | *.doxygen 16 | stamp-h1 17 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: cpp 3 | 4 | matrix: 5 | include: 6 | - os: linux 7 | dist: trusty 8 | env: 9 | - MATRIX_EVAL="CC=gcc-5 && CXX=g++-5" 10 | addons: 11 | apt: 12 | sources: 13 | - ubuntu-toolchain-r-test 14 | packages: 15 | - g++-5 16 | - gcc-5 17 | - zlib1g 18 | - zlib1g-dev 19 | - libboost-dev 20 | - libboost-system-dev 21 | - libboost-test-dev 22 | - libtbb-dev 23 | 24 | - os: linux 25 | dist: trusty 26 | env: 27 | - MATRIX_EVAL="CC=gcc-4.8 && CXX=g++-4.8" 28 | addons: 29 | apt: 30 | sources: 31 | - ubuntu-toolchain-r-test 32 | packages: 33 | - g++-4.8 34 | - gcc-4.8 35 | - zlib1g 36 | - zlib1g-dev 37 | - libboost-dev 38 | - libboost-system-dev 39 | - libboost-test-dev 40 | - libtbb-dev 41 | 42 | 43 | - os: linux 44 | dist: trusty 45 | addons: 46 | apt: 47 | sources: 48 | - ubuntu-toolchain-r-test 49 | packages: 50 | - g++-6 51 | - zlib1g 52 | - zlib1g-dev 53 | - libboost-dev 54 | - libboost-system-dev 55 | - libboost-test-dev 56 | - libtbb-dev 57 | env: 58 | - MATRIX_EVAL="CC=gcc-6 && CXX=g++-6" 59 | 60 | - os: linux 61 | dist: trusty 62 | addons: 63 | apt: 64 | sources: 65 | - ubuntu-toolchain-r-test 66 | packages: 67 | - g++-7 68 | - zlib1g 69 | - zlib1g-dev 70 | - libboost-dev 71 | - libboost-system-dev 72 | - libboost-test-dev 73 | - libtbb-dev 74 | env: 75 | - MATRIX_EVAL="CC=gcc-7 && CXX=g++-7" 76 | 77 | before_install: 78 | - eval "${MATRIX_EVAL}" 79 | 80 | notifications: 81 | email: false 82 | 83 | script: 84 | - export LD_LIBRARY_PATH=$HOME/lib 85 | - ./configure --prefix=$HOME && make && make install 86 | - make check 87 | - $HOME/bin/libsequenceConfig --version 88 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Kevin Thornton 2 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | AUTOMAKE_OPTIONS = foreign 2 | ACLOCAL_AMFLAGS = -I m4 3 | LIBTOOL_DEPS = @LIBTOOL_DEPS@ 4 | SUBDIRS=src Sequence test examples 5 | includedir=$(oldincludedir) -ISequence 6 | EXTRA_DIST=doc/libsequence.doxygen doc/Makefile doc/images/* examples/Makefile examples/*.cc examples/*.hpp README.md config.h.in 7 | 8 | libtool: $(LIBTOOL_DEPS) 9 | $(SHELL) ./config.status libtool 10 | -------------------------------------------------------------------------------- /Sequence/AlleleCountMatrix.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SEQUENCE_ALLELE_COUNT_MATRIX_HPP 2 | #define SEQUENCE_ALLELE_COUNT_MATRIX_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace Sequence 11 | { 12 | class AlleleCountMatrix 13 | /// \brief Matrix representation of allele counts in a VariantMatrix 14 | /// To be constructed 15 | { 16 | private: 17 | static std::vector init_counts(const VariantMatrix& m); 18 | 19 | public: 20 | const std::vector counts; 21 | using value_type = std::vector::value_type; 22 | const std::size_t ncol; 23 | const std::size_t nrow; 24 | const std::size_t nsam; 25 | explicit AlleleCountMatrix(const VariantMatrix& m); 26 | 27 | /// This constructor is for advanced use only, 28 | /// such as constructing from a slice of a 29 | /// pre-existing AlleleCountMatrix. 30 | template 31 | AlleleCountMatrix(T&& t, const std::size_t nc_, const std::size_t nr_, 32 | const std::size_t n_) 33 | : counts(std::forward(t)), ncol{ nc_ }, nrow{ nr_ }, nsam{ n_ } 34 | { 35 | if (ncol * nrow != counts.size()) 36 | { 37 | throw std::invalid_argument( 38 | "incorrect dimensions for AlleleCountMatrix"); 39 | } 40 | } 41 | std::pair::const_iterator, 42 | std::vector::const_iterator> 43 | row(const std::size_t) const; 44 | }; 45 | } // namespace Sequence 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /Sequence/Coalescent/Coalesce.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __SEQUENCE_COALESCENT_COALESCE_HPP__ 2 | #define __SEQUENCE_COALESCENT_COALESCE_HPP__ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace Sequence 9 | { 10 | namespace coalsim { 11 | template 12 | std::pair pick2_in_deme( uniform_generator & uni, 13 | const std::vector & sample, 14 | const int & ttl_nsam, 15 | const int & deme_nsam, 16 | const int & deme ); 17 | 18 | template 19 | std::pair pick2_in_deme( const uniform_generator & uni, 20 | const std::vector & sample, 21 | const int & ttl_nsam, 22 | const int & deme_nsam, 23 | const int & deme ); 24 | 25 | template 26 | std::pair pick2( uniform_generator & uni, const int & nsam); 27 | 28 | template 29 | std::pair pick2( const uniform_generator & uni, const int & nsam); 30 | 31 | bool isseg( chromosome::const_iterator seg, const unsigned & nsegs, 32 | const int & pos, unsigned * offset ); 33 | 34 | int coalesce(const double & time, 35 | const int & ttl_nsam, 36 | const int & current_nsam, 37 | const int & c1, 38 | const int & c2, 39 | const int & nsites, 40 | int * nlinks, 41 | std::vector * sample, 42 | arg * sample_history); 43 | } 44 | } 45 | #endif 46 | #include 47 | -------------------------------------------------------------------------------- /Sequence/Coalescent/Coalescent.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __SEQUENCE_COALESCENT_COALECENT_HPP__ 2 | #define __SEQUENCE_COALESCENT_COALECENT_HPP__ 3 | 4 | /*! 5 | \defgroup coalescent Classes and functions related to simulating data under coalescent models 6 | \ingroup popgen 7 | */ 8 | /*! \file Coalescent.hpp 9 | @brief A lazy header to include the headers needed to start writing simulations. 10 | Includes: 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | */ 19 | /*! \example freerec.cc 20 | Coalescent simulation with free recombination 21 | */ 22 | /*! \example msmm.cc 23 | Coalescent simulation 24 | */ 25 | /*! \example bottleneck.cc 26 | Example of using the Sequence::bottleneck template function 27 | */ 28 | /*! \example fragments.cc 29 | Example of simulating partially linked fragments in neutral models. 30 | */ 31 | 32 | /*! 33 | \namespace Sequence::coalsim @brief Routines for coalescent simulation 34 | */ 35 | 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #endif 45 | -------------------------------------------------------------------------------- /Sequence/Coalescent/FragmentsRescaling.hpp: -------------------------------------------------------------------------------- 1 | /*! \file FragmentsRescaling.hpp 2 | \brief Helper functions for simulating partially linked fragments 3 | One often wants to simulate partially linked fragments under neutral models. 4 | An efficient way to do this is to simulate a contiguous fragment, but 5 | with the recombination rate varying along the region (to represent 6 | the variable genetic distances between fragments). This header file 7 | declares functions that make this task easier, particularly the 8 | operations of rescaling the positions of mutations/marginal trees 9 | from the genetic map back to the physical map 10 | */ 11 | 12 | #ifndef __SEQUENCE_COALESCENT_FRAGMENTS_RESCALING_HPP__ 13 | #define __SEQUENCE_COALESCENT_FRAGMENTS_RESCALING_HPP__ 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | namespace Sequence 20 | { 21 | class SimData; //fwd declaration 22 | namespace coalsim { 23 | int sample_length( const std::vector< std::pair > & fragments ); 24 | int total_length( const std::vector< std::pair > & fragments ); 25 | void calculate_scales(const std::vector< std::pair > & fragments, 26 | std::vector< std::pair > * sample_scale, 27 | std::vector< std::pair > * mutation_scale ); 28 | void rescale_mutation_positions(Sequence::SimData * d, 29 | const std::vector< std::pair > & sample_scale, 30 | const std::vector< std::pair > & mutation_scale )__attribute__((deprecated)); 31 | void rescale_arg( arg * sample_history, 32 | const std::vector< std::pair > & fragments ); 33 | double integrate_genetic_map( const std::vector & sample, 34 | const int & current_nsam, 35 | const std::vector & genetic_map, 36 | std::vector * reclens); 37 | } 38 | } 39 | #endif 40 | -------------------------------------------------------------------------------- /Sequence/Coalescent/Initialize.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __SEQUENCE_COALESCENT_INIT_ARG_FUNCTIONS_HPP__ 2 | #define __SEQUENCE_COALESCENT_INIT_ARG_FUNCTIONS_HPP__ 3 | 4 | #include 5 | #include 6 | 7 | namespace Sequence 8 | { 9 | namespace coalsim { 10 | std::vector init_sample( const std::vector & pop_config, 11 | const int & nsites ); 12 | marginal init_marginal( const int & nsam ); 13 | } 14 | } 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /Sequence/Coalescent/Makefile.am: -------------------------------------------------------------------------------- 1 | SUBDIRS = bits 2 | 3 | pkgincludedir=$(prefix)/include/Sequence/Coalescent 4 | 5 | pkginclude_HEADERS = Coalesce.hpp\ 6 | Initialize.hpp\ 7 | Mutation.hpp\ 8 | NeutralSample.hpp\ 9 | Recombination.hpp\ 10 | SimTypes.hpp\ 11 | TreeOperations.hpp\ 12 | Coalescent.hpp\ 13 | DemographicModels.hpp\ 14 | FragmentsRescaling.hpp\ 15 | Trajectories.hpp -------------------------------------------------------------------------------- /Sequence/Coalescent/Recombination.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __SEQUENCE_COALESCENT_RECOMBINATION_HPP__ 2 | #define __SEQUENCE_COALESCENT_RECOMBINATION_HPP__ 3 | 4 | #include 5 | namespace Sequence 6 | { 7 | namespace coalsim { 8 | int crossover( const int & current_nsam, 9 | const int & chromo, 10 | const int & pos, 11 | std::vector * sample, 12 | arg * sample_history); 13 | 14 | std::pair pick_uniform_spot(const double & random_01, 15 | const int & nlinks, 16 | std::vector::const_iterator sample_begin, 17 | const unsigned & current_nsam); 18 | 19 | template 20 | std::pair pick_spot( uniform01_generator & uni01, 21 | const double & total_reclen, 22 | const std::vector & reclens, 23 | std::vector::const_iterator sample_begin, 24 | const unsigned & current_nsam, 25 | const double * rec_map); 26 | 27 | template 28 | std::pair pick_spot( const uniform01_generator & uni01, 29 | const double & total_reclen, 30 | const std::vector & reclens, 31 | std::vector::const_iterator sample_begin, 32 | const unsigned & current_nsam, 33 | const double * rec_map); 34 | } 35 | } 36 | #endif 37 | #include 38 | -------------------------------------------------------------------------------- /Sequence/Coalescent/TreeOperations.hpp: -------------------------------------------------------------------------------- 1 | /*! \file TreeOperations.hpp 2 | Things you may want to do with marginal trees in a coalescent simulations 3 | */ 4 | #ifndef __SEQUENCE_COALESCENT_TREE_OPERATIONS_HPP__ 5 | #define __SEQUENCE_COALESCENT_TREE_OPERATIONS_HPP__ 6 | 7 | #include 8 | #include 9 | #include 10 | namespace Sequence 11 | { 12 | namespace coalsim { 13 | double total_time( const marginal::const_iterator beg, 14 | const int & nsam ); 15 | 16 | int pick_branch( marginal::const_iterator beg, 17 | const int & nsam, 18 | const double & rtime); 19 | 20 | std::vector get_all_descendants (marginal::const_iterator beg, 21 | const int & nsam, 22 | const int & branch); 23 | 24 | bool is_descendant( marginal::const_iterator beg, 25 | const int & ind, 26 | const int & branch ); 27 | 28 | double total_time_on_arg( const Sequence::coalsim::arg & sample_history, 29 | const int & total_number_of_sites ); 30 | 31 | void minimize_arg( arg * sample_history ); 32 | 33 | class sfs_times_impl; 34 | class sfs_times 35 | { 36 | private: 37 | std::unique_ptr impl; 38 | public: 39 | sfs_times(); 40 | sfs_times(arg::const_iterator sample_history_beg, 41 | const arg::size_type & nsegs, 42 | const int & total_nsites_simulated, 43 | bool folded = false); 44 | sfs_times(const sfs_times &); 45 | ~sfs_times(); 46 | 47 | double operator[]( std::vector::size_type const & ) const; 48 | sfs_times & operator=(const sfs_times &); 49 | bool operator==(const sfs_times & rhs) const; 50 | double ttime() const; 51 | size_t size() const; 52 | typedef std::vector::const_iterator const_iterator; 53 | const_iterator begin() const; 54 | const_iterator end() const; 55 | }; 56 | } 57 | } 58 | #endif 59 | -------------------------------------------------------------------------------- /Sequence/Coalescent/bits/Makefile.am: -------------------------------------------------------------------------------- 1 | pkgincludedir=$(prefix)/include/Sequence/Coalescent/bits 2 | 3 | pkginclude_HEADERS = Mutation.tcc \ 4 | DemographicModels.tcc \ 5 | Recombination.tcc \ 6 | Coalesce.tcc \ 7 | Trajectories.tcc -------------------------------------------------------------------------------- /Sequence/CodonTable.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | #ifndef _CODON_TABLE_H_ 25 | #define _CODON_TABLE_H_ 26 | #include 27 | 28 | /*! \file CodonTable.hpp 29 | \deprecated 30 | @brief facility to count codons in CDS sequence, function Sequence::makeCodonUsageTable 31 | */ 32 | 33 | namespace Sequence 34 | { 35 | /*! 36 | \c #include \c 37 | A codon usage table is a list of the codons and 38 | the number of times each codon occurs in a sequence. 39 | This is represented by the type Sequence::CodonUsageTable, 40 | which is a vector < pair. 41 | To output a codon table: 42 | \code 43 | Sequence::Fasta sequence; 44 | cin >> sequence; 45 | CodonUsageTable x = makeCodonUsageTable(&sequence); 46 | for(unsigned i = 0 ; i < x.size ; ++i) 47 | { 48 | cout << x[i].first << '\t' << x[i].second << '\n'; 49 | } 50 | \endcode 51 | */ 52 | class Seq; 53 | CodonUsageTable makeCodonUsageTable(const Seq* sequence); 54 | CodonUsageTable makeCodonUsageTable(const std::string &sequence); 55 | CodonUsageTable makeCodonUsageTable(std::string::const_iterator beg, 56 | std::string::const_iterator end); 57 | 58 | } 59 | #endif 60 | -------------------------------------------------------------------------------- /Sequence/Comparisons.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | #ifndef COMPARISONS_H 25 | #define COMPARISONS_H 26 | /*! \file Comparisons.hpp 27 | @brief delcaration of routines for comparing DNA sequences 28 | This file declares a set of functions useful for comparing two bits 29 | of sequence data--sequences, nucleotides, etc. 30 | 31 | @short Routines to compare bases, sequences, etc. 32 | Declares Sequence::TsTv,Sequence::NumDiffs,Sequence::Gapped, 33 | Sequence::NotAGap 34 | \ingroup misc 35 | */ 36 | 37 | #include 38 | #include 39 | #include 40 | #include 41 | 42 | namespace Sequence 43 | { 44 | Mutations TsTv(const char & i, const char & j); 45 | Mutations TsTv(const int & i,const int & j); 46 | bool Different (const std::string & seq1, 47 | const std::string & seq2, 48 | const bool & skip_missing = true, 49 | const bool & nucleic_acid = true); 50 | 51 | int NumDiffs(const std::string & seq1, 52 | const std::string & seq2, 53 | const bool & skip_missing = true , 54 | const bool & nucleic_acid = true); 55 | 56 | bool Gapped(const std::string &s); 57 | 58 | template bool Gapped(Iterator beg,Iterator end, 59 | const char & gapchar = '-') 60 | /*! 61 | \param beg an iterator 62 | \param end an iterator 63 | \param gapchar a character representing an aligment gap 64 | \return true if \a gapchar is present in the range [beg,end), false otherwise 65 | */ 66 | { 67 | Iterator itr = std::find(beg,end,gapchar); 68 | return (itr!=end); 69 | } 70 | 71 | bool NotAGap(const char &c); 72 | } 73 | #endif 74 | -------------------------------------------------------------------------------- /Sequence/ComplementBase.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | #ifndef __COMPLEMENT_BASE_H__ 25 | #define __COMPLEMENT_BASE_H__ 26 | #include 27 | 28 | /*! \file ComplementBase.hpp 29 | @brief Delcaration of Sequence::ComplementBase, a function object to return the complement of a DNA nucleotide 30 | */ 31 | /*! 32 | \struct Sequence::ComplementBase Sequence/ComplementBase.hpp 33 | \ingroup functors 34 | a functor to complement a sequence\n 35 | example use: 36 | \code 37 | //reverse and complement a std::string 38 | #include 39 | #include 40 | #include 41 | 42 | int main () 43 | { 44 | std::string seq; 45 | //fill seq with DNA characters 46 | std::reverse(seq.begin(),seq.end()); 47 | std::for_each(seq.begin(),seq.end(),Sequence::ComplementBase()); 48 | } 49 | \endcode 50 | */ 51 | namespace Sequence 52 | { 53 | struct ComplementBase : public std::unary_function 54 | { 55 | void operator()(char &ch) const; 56 | }; 57 | } 58 | #endif 59 | -------------------------------------------------------------------------------- /Sequence/FST.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | #ifndef __FST_H__ 25 | #define __FST_H__ 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | /*! \file FST.hpp 33 | @brief delcaration of a class (Sequence::FST) to analyze population structure 34 | 35 | \deprecated Will be removed in libsequence 2.0 36 | */ 37 | namespace Sequence 38 | { 39 | class PolyTable; 40 | struct FSTimpl; 41 | class __attribute__ ((deprecated)) FST 42 | { 43 | private: 44 | std::unique_ptr impl; 45 | public: 46 | explicit FST(const PolyTable *data, unsigned npop, const unsigned *config=NULL, 47 | const double *weights=NULL, bool haveOutgroup = false, 48 | unsigned outgroup = 0); 49 | FST(const FST &) = delete; 50 | FST & operator=(const FST &) = delete; 51 | ~FST(void); 52 | double HSM(void) const; 53 | double Slatkin(void) const; 54 | double HBK(void) const; 55 | double piB(void) const; 56 | double piT(void) const; 57 | double piS(void) const; 58 | double piD(void) const; 59 | std::set shared(unsigned pop1, unsigned pop2) const; 60 | std::set fixed(unsigned pop1, unsigned pop2) const; 61 | std::pair< std::set,std::set > Private(unsigned pop1, unsigned pop2) const; 62 | }; 63 | } 64 | #endif 65 | -------------------------------------------------------------------------------- /Sequence/Fasta.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | /*! \file Fasta.hpp 25 | @brief Declaration of Sequence::Fasta streams 26 | */ 27 | 28 | /*! 29 | \class Sequence::Fasta Sequence/Fasta.hpp 30 | \ingroup seqio 31 | Publicly derived from Sequence::Seq, this class defines 32 | how to read and print sequences in FASTA format, which looks like:\n 33 | >sequence name 1\n 34 | ATGATGATCAGATAGACATAGCAGATACATGT\n 35 | >sequence name 2\n 36 | ATGTTGGTTTTTTTTTAGAGATGTTTATAGGT\n 37 | ETC... 38 | 39 | @short FASTA sequence stream 40 | */ 41 | 42 | #ifndef FASTA_H 43 | #define FASTA_H 44 | 45 | #include 46 | 47 | namespace Sequence 48 | { 49 | class Fasta : public Seq 50 | { 51 | private: 52 | public: 53 | using Seq::Seq; 54 | Fasta(); 55 | Fasta (const Seq & s); 56 | Fasta( Fasta && ) = default; 57 | Fasta( Seq && ); 58 | Fasta( const Fasta & ) = default; 59 | ~Fasta()/*! placeholder for vtable */ {} 60 | Fasta & operator=(Fasta &&) = default; 61 | Fasta & operator=(const Fasta &) = default; 62 | /*! 63 | \exception Sequence::SeqException if memory can't be allocated. 64 | (This is because the data are temporarily read into char *, 65 | because that was found to be faster). 66 | \exception Sequence::badFormat if the input stream is not 67 | in FASTA format 68 | */ 69 | std::istream& read(std::istream &s); 70 | /*! 71 | \param stream a std::ostream 72 | write the sequence in FASTA format to \a stream 73 | */ 74 | std::ostream& print(std::ostream& s) const; 75 | }; 76 | } 77 | #endif 78 | -------------------------------------------------------------------------------- /Sequence/Grantham.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | #ifndef GRANTHAM_H 25 | #define GRANTHAM_H 26 | /*! \file Grantham.hpp 27 | @brief Grantham's distances (Sequence::Grantham) 28 | */ 29 | 30 | /*! \class Sequence::Grantham Sequence/Grantham.hpp 31 | A functor to return the Grantham's distance between 32 | two amino acids. 33 | 34 | @short Grantham's distances 35 | */ 36 | #include 37 | 38 | namespace Sequence 39 | { 40 | class Grantham 41 | { 42 | private: 43 | double D[60][60]; 44 | const double stopweight; 45 | public: 46 | Grantham(const double stop = std::numeric_limits::max()); 47 | double operator()(char aa1, char aa2) const; 48 | }; 49 | } 50 | #endif 51 | -------------------------------------------------------------------------------- /Sequence/GranthamWeights.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | #ifndef __GRANTHAMWEIGHTS_H__ 25 | #define __GRANTHAMWEIGHTS_H__ 26 | 27 | /*! \file GranthamWeights.hpp 28 | @brief declaration of classes to weight codons by Grantham distance (i.e. for Sequence::Comeron95). Declares 29 | Sequence::GranthamWeights2 and Sequence::GranthamWeights3 30 | */ 31 | 32 | /*! 33 | \class Sequence::GranthamWeights2 Sequence/GranthamWeights.hpp 34 | \ingroup weights 35 | @short Weights paths by Grantham's distances for codons differing at 2 sites 36 | */ 37 | 38 | /*! 39 | \class Sequence::GranthamWeights3 Sequence/GranthamWeights.hpp 40 | \ingroup weights 41 | @short Weights paths by Grantham's distances for codons differing at 3 sites 42 | */ 43 | #include 44 | #include 45 | 46 | namespace Sequence 47 | { 48 | class Grantham; 49 | struct GranthamWeights2 : public WeightingScheme2 50 | { 51 | weights2_t operator()(const std::string &codon1, const std::string &codon2,Sequence::GeneticCodes genetic_code) const; 52 | }; 53 | 54 | struct GranthamWeights3 : public WeightingScheme3 55 | { 56 | weights3_t operator()(const std::string &codon1, const std::string &codon2,Sequence::GeneticCodes genetic_code) const; 57 | }; 58 | } 59 | #endif 60 | -------------------------------------------------------------------------------- /Sequence/Hudson2001.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | #ifndef HUDSON2001_H 25 | #define HUDSON2001_H 26 | 27 | /*! \file Hudson2001.hpp 28 | \deprecated 29 | */ 30 | #include 31 | #warning "This header is deprecated. Please use " 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /Sequence/Kimura80.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | /*! \file Kimura80.hpp 25 | @brief declaration of Sequence::Kimura80 26 | */ 27 | 28 | /*! \class Sequence::Kimura80 Sequence/Kimura80.hpp 29 | \ingroup divergence 30 | Calculate a measure of sequence divergence using Kimura's 1980 method.\n 31 | The reference is: Kimura, M (1980) J. Mol. Evol 16: 111-120.\n 32 | The calculation only depends on 3 numbers:\n 33 | 1.) the number of sites in the sequence\n 34 | 2.) the number of transitions between the two sequences\n 35 | 3.) the number of transversions between the two sequences\n 36 | \n 37 | The implementation of this class does the following:\n 38 | 1.) compare each position in both sequences, counting transitions and transversions\n 39 | 2.) calculate distance using Kimura's formula\n 40 | \n 41 | \exception Sequence::SeqException if the two sequences are of unequal length. 42 | 43 | @short Kimura's 2-parameter distance 44 | */ 45 | #ifndef KIMURA80_H 46 | #define KIMURA80_H 47 | 48 | namespace Sequence 49 | { 50 | class Seq; 51 | class Kimura80 52 | { 53 | private: 54 | unsigned num_Ts, num_Tv; 55 | size_t seqlen; //total sequence length 56 | size_t sites_compared; //number of ungapped sites in the data 57 | void Compute (const Sequence::Seq *seq1, const Sequence::Seq *seq2); 58 | double divergence, P, Q; 59 | public: 60 | explicit Kimura80 (const Sequence::Seq * seqa,const Sequence::Seq * seqb); 61 | double K() const; 62 | size_t sites (void) const; 63 | }; 64 | } 65 | #endif 66 | -------------------------------------------------------------------------------- /Sequence/Makefile.am: -------------------------------------------------------------------------------- 1 | SUBDIRS = bits SummStatsDeprecated variant_matrix summstats 2 | 3 | pkgincludedir=$(prefix)/include/Sequence 4 | 5 | pkginclude_HEADERS = AlignStream.hpp\ 6 | Alignment.hpp\ 7 | Clustalw.hpp\ 8 | phylipData.hpp\ 9 | CodonTable.hpp\ 10 | Comeron95.hpp\ 11 | Comparisons.hpp\ 12 | ComplementBase.hpp\ 13 | CountingOperators.hpp\ 14 | FST.hpp\ 15 | Fasta.hpp\ 16 | fastq.hpp\ 17 | Grantham.hpp\ 18 | GranthamWeights.hpp\ 19 | SimpleSNP.hpp\ 20 | Hudson2001.hpp\ 21 | Kimura80.hpp\ 22 | PathwayHelper.hpp\ 23 | PolySIM.hpp\ 24 | PolySNP.hpp\ 25 | PolySNPimpl.hpp\ 26 | PolySites.hpp\ 27 | PolyTable.hpp\ 28 | PolyTableFunctions.hpp\ 29 | PolyFunctional.hpp\ 30 | PolyTableSlice.hpp\ 31 | polySiteVector.hpp \ 32 | Recombination.hpp\ 33 | RedundancyCom95.hpp\ 34 | Seq.hpp\ 35 | SeqConstants.hpp\ 36 | SeqEnums.hpp\ 37 | SeqFunctors.hpp\ 38 | SeqProperties.hpp\ 39 | SeqRegexes.hpp\ 40 | SeqUtilities.hpp\ 41 | SimData.hpp\ 42 | SimParams.hpp\ 43 | SingleSub.hpp\ 44 | Sites.hpp\ 45 | ThreeSubs.hpp\ 46 | Translate.hpp\ 47 | Translate2.hpp\ 48 | TwoSubs.hpp\ 49 | Unweighted.hpp\ 50 | WeightingSchemes.hpp\ 51 | stateCounter.hpp\ 52 | shortestPath.hpp\ 53 | descriptiveStats.hpp\ 54 | HKA.hpp\ 55 | typedefs.hpp \ 56 | SummStatsDeprecated.hpp \ 57 | SeqAlphabets.hpp \ 58 | VariantMatrix.hpp \ 59 | VariantMatrixCapsule.hpp \ 60 | NonOwningCapsules.hpp \ 61 | VectorCapsules.hpp \ 62 | VariantMatrixViews.hpp \ 63 | AlleleCountMatrix.hpp \ 64 | summstats.hpp \ 65 | StateCounts.hpp 66 | -------------------------------------------------------------------------------- /Sequence/PathwayHelper.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | #ifndef __PATHWAYHELPER_H__ 25 | #define __PATHWAYHELPER_H__ 26 | /*! \file PathwayHelper.hpp 27 | @brief declarations of Sequence::Intermediates2 and Sequence::Intermediates3 28 | */ 29 | /*! 30 | \defgroup CodonPaths Classes and functions to aid in the calculations of the pathways between two codons 31 | This group of classes and functions deals with determining 32 | either the counts of silent and replacement differences between codons 33 | or the intermedate codons that occurs between two different codons 34 | */ 35 | #include 36 | #include 37 | namespace Sequence 38 | { 39 | using Inter2_t = std::array; 40 | using Inter3_t = std::array; 41 | Inter2_t Intermediates2(const std::string &codon1, const std::string &codon2); 42 | Inter3_t Intermediates3(const std::string &codon1, const std::string &codon2); 43 | } 44 | #endif 45 | 46 | -------------------------------------------------------------------------------- /Sequence/PolySNPimpl.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | namespace Sequence 29 | { 30 | struct _PolySNPImpl 31 | /*! 32 | Implementation details for PolySNP. This class is visible 33 | so that it can be accessed from classes derived from PolySNP. 34 | A PolySNP object contains a pointer to an instance of this class 35 | that is storage class protected. 36 | */ 37 | { 38 | const PolyTable* _data; 39 | unsigned _nsites,_nsam,_outgroup; 40 | bool _haveOutgroup, _totMuts; 41 | unsigned _totsam; 42 | unsigned _DVK; 43 | double _DVH; 44 | bool _counted_singletons; 45 | bool _know_pi; 46 | bool _CalculatedDandV; 47 | double _pi; 48 | unsigned _singletons; 49 | unsigned _walls_Bprime,_NumPoly; 50 | double _walls_B,_walls_Q; 51 | bool _calculated_wall_stats; 52 | std::vector< Sequence::stateCounter > _counts; 53 | std::vector< std::pair< bool, Sequence::stateCounter > > _derivedCounts; 54 | std::mutex instance_lock; 55 | bool _preprocessed; 56 | void preprocess(void); 57 | 58 | _PolySNPImpl (const Sequence::PolyTable * data, const bool & haveOutgroup , 59 | const unsigned & outgroup, const bool & totMuts); 60 | }; 61 | } 62 | -------------------------------------------------------------------------------- /Sequence/PolySites.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | #ifndef POLYSITES_H 25 | #define POLYSITES_H 26 | /*! \file PolySites.hpp 27 | @brief Sequence::PolySites, generates polymorphism tables from data 28 | */ 29 | #include 30 | namespace Sequence 31 | { 32 | class Fasta; 33 | class PolySites : public PolyTable 34 | { 35 | private: 36 | /*! 37 | PolySites::fillIt() is the function that actually fills the polymorphism table. 38 | */ 39 | template 40 | void fillIt(const std::vector < __DataType >&alignment, 41 | bool strictInfSites = 0, 42 | bool ignoregaps = 1,bool skipMissing=false, 43 | unsigned freqfilter=0); 44 | public: 45 | PolySites (void); 46 | template 47 | PolySites (const std::vector < __DataType >&alignment, 48 | bool strictInfSites = 0, 49 | bool ignoregaps = 1, 50 | bool skipMissing=false, 51 | bool skipAdjSNP=false, 52 | unsigned freqfilter=0); 53 | //PolySites (const std::vector < double > &List, const std::vector < std::string > &stringList); 54 | PolySites ( std::vector < double > List, std::vector < std::string > stringList); 55 | PolySites (PolyTable::const_site_iterator beg, 56 | PolyTable::const_site_iterator end); 57 | PolySites( PolySites && ); 58 | PolySites( const PolySites & ); 59 | PolySites & operator=( PolySites && ); 60 | PolySites & operator=( const PolySites & ); 61 | ~PolySites(void){}; 62 | std::istream & read(std::istream &s) ; 63 | std::ostream & print(std::ostream &stream) const; 64 | }; 65 | } 66 | #include 67 | #endif 68 | -------------------------------------------------------------------------------- /Sequence/SeqConstants.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | #ifndef __SEQCONSTANTS_HPP__ 25 | #define __SEQCONSTANTS_HPP__ 26 | /*! 27 | \file SeqConstants.hpp 28 | A file defining constants used in various places in libsequence. 29 | */ 30 | 31 | namespace Sequence 32 | { 33 | extern const unsigned SEQMAXUNSIGNED; 34 | extern const double SEQMAXDOUBLE; 35 | } 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /Sequence/SeqEnums.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | #ifndef SEQENUMS_H 25 | #define SEQENUMS_H 26 | #include 27 | /*! \file SeqEnums.hpp 28 | Defines a handfull of enumeration types useful 29 | for sequence data. 30 | @brief Definition of enumeration types 31 | */ 32 | 33 | namespace Sequence 34 | { 35 | /*! \enum Sequence::GeneticCodes 36 | Only UNIVERSAL (= 0) is currently supported. 37 | The order of the genetic codes is that of NCBI's code tables, available at 38 | http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi 39 | */ 40 | enum class GeneticCodes : std::int16_t {UNIVERSAL}; 41 | /*! \enum Sequence::Mutations 42 | Values: Unknown=0,Ts, and Tv.\n 43 | Unknown means unknown, Ts means transition, Tv means transversion 44 | */ 45 | enum class Mutations : std::int8_t {Unknown,Ts,Tv}; 46 | } 47 | #endif 48 | -------------------------------------------------------------------------------- /Sequence/SeqFunctors.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | #ifndef __SEQ_FUNCTORS_H__ 25 | #define __SEQ_FUNCTORS_H__ 26 | #include 27 | #include 28 | /*! \file SeqFunctors.hpp 29 | \ingroup functors 30 | This file is a bit of a catch-all for function objects defined in namespace Sequence. 31 | Currently, including the header brings the definitions of Sequence::ComplementBase 32 | and Sequence::stateCounter into scope 33 | */ 34 | /*! 35 | \defgroup functors Function objects defined in the library 36 | */ 37 | #endif 38 | -------------------------------------------------------------------------------- /Sequence/SeqProperties.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | #ifndef __SEQ_PROPERTIES_HPP__ 25 | #define __SEQ_PROPERTIES_HPP__ 26 | /*! \file SeqProperties.hpp 27 | \deprecated 28 | */ 29 | #include 30 | #include 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /Sequence/SimParams.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | #ifndef SIMPARAMS_H 25 | #define SIMPARAMS_H 26 | /*! \file SimParams.hpp 27 | @brief Sequence::SimParams reads in the parameters of Dick Hudon's coalescent simulation program. Used in conjunction with Sequence::SimData 28 | */ 29 | 30 | /*! \class Sequence::SimParams Sequence/SimParams.hpp 31 | \ingroup coalescent 32 | include SimParams.h 33 | Allows reading in and printing out of the parameter 34 | list that Hudson's coalescent simulation program spits 35 | out at the beginning of its execution. An example of use 36 | is found in tajd.cc in the Examples section. 37 | 38 | @author Kevin Thornton 39 | @short Parameters for Hudson's simulation program 40 | */ 41 | #include 42 | #include 43 | #include 44 | #include 45 | 46 | namespace Sequence 47 | { 48 | class SimParams 49 | { 50 | friend std::ostream& operator<<(std::ostream&,class SimParams &object); 51 | private: 52 | std::string _command_line; 53 | unsigned _howmany, tsam; 54 | public: 55 | SimParams(void); 56 | std::istream& read(std::istream& s); 57 | int fromfile ( FILE * openfile ); 58 | std::string params (void) const 59 | /*! 60 | \return the command-line input to ms 61 | \note for complicated models, this can be parsed 62 | with a stringstream to figure out what the parameters are 63 | */ 64 | { 65 | return _command_line; 66 | } 67 | unsigned totsam (void) const 68 | /*! 69 | \return the total sample size (# gametes) 70 | */ 71 | { 72 | return (tsam); 73 | } 74 | unsigned runs (void) const 75 | /*! 76 | \return number of genealogies to generate 77 | */ 78 | { 79 | return (_howmany); 80 | } 81 | }; 82 | 83 | std::istream& operator>>(std::istream& s, SimParams& c); 84 | } 85 | #endif 86 | -------------------------------------------------------------------------------- /Sequence/SingleSub.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | #ifndef SINGLESUB_H 25 | #define SINGLESUB_H 26 | /*! \file SingleSub.hpp 27 | @brief used by Sequence::Comeron95, class Sequence::SingleSub calculates divergence between codons that differ at one site 28 | */ 29 | 30 | /*! 31 | \class Sequence::SingleSub Sequence/SingleSub.hpp 32 | \ingroup kaks 33 | A functor to obtain divergence statistics for Comeron's method for codons that differ at one position. Used by 34 | Sequence::Comeron95 35 | 36 | @author Kevin Thornton 37 | @short Deal with codons differing at 1 position 38 | */ 39 | #include 40 | #include 41 | namespace Sequence 42 | { 43 | class RedundancyCom95; 44 | 45 | class SingleSub 46 | { 47 | private: 48 | struct SingleSubImpl; 49 | std::unique_ptr impl; 50 | public: 51 | explicit SingleSub(void); 52 | void operator()(const RedundancyCom95 & sitesObj, 53 | const std::string &cod1, 54 | const std::string &cod2); 55 | ~SingleSub(); 56 | double P0(void) const; 57 | double P2S(void) const; 58 | double P2V(void) const; 59 | double P4(void) const; 60 | double Q0(void) const; 61 | double Q2S(void) const; 62 | double Q2V(void) const; 63 | double Q4(void) const; 64 | }; 65 | } 66 | #endif 67 | -------------------------------------------------------------------------------- /Sequence/StateCounts.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SEQUENCE_VARIANTMATRIX_STATECOUNTS_HPP__ 2 | #define SEQUENCE_VARIANTMATRIX_STATECOUNTS_HPP__ 3 | 4 | #include "VariantMatrix.hpp" 5 | #include "VariantMatrixViews.hpp" 6 | #include 7 | #include 8 | 9 | namespace Sequence 10 | { 11 | struct StateCounts 12 | /// \brief Track character state occurrence at a site in a VariantMatrix. 13 | /// 14 | /// This class keeps track of how many times each character state occurs 15 | /// at a variable site in a VariantMatrix. All missing data (negative 16 | /// state values) are considered equivalent and collapsed into the single 17 | /// missing value of -1. 18 | /// 19 | /// When constructed, the sample size at a site is considered to be the 20 | /// sum of the number of occurrences of all non-missing states. 21 | /// 22 | /// \ingroup variantmatrix 23 | { 24 | static constexpr VariantMatrix::value_type max_allele 25 | = std::numeric_limits::max(); 26 | /// Keep track of (state, count) pairs 27 | std::vector counts; 28 | /// The max allelic value seen 29 | std::size_t max_allele_idx; 30 | /// The sample size at this site. Excluded missing data. 31 | std::uint32_t n; 32 | /// The reference state for this site. Needed for certain summary 33 | /// statistics. Default is -1 (missing). 34 | std::int8_t refstate; 35 | 36 | /// Construct with a ConstRowView and a reference state, which defaults 37 | /// to 0. 38 | StateCounts(const std::int8_t refstate_); 39 | StateCounts(); 40 | void operator()(ConstRowView &); 41 | void operator()(const RowView &); 42 | }; 43 | 44 | /// Create a vector of StateCounts from a VariantMatrix. 45 | /// If `refstates` is not empty and differs in length 46 | /// from `m.nsites`, then `std::invalid_argument` is thrown. 47 | /// \ingroup variantmatrix 48 | std::vector 49 | process_variable_sites(const VariantMatrix& m, 50 | const std::vector& refstates); 51 | /// Create a vector of StateCounts with a specific reference state 52 | /// used for all sites 53 | /// \ingroup variantmatrix 54 | std::vector 55 | process_variable_sites(const VariantMatrix& m, const std::int8_t refstate); 56 | /// Create a vector of StateCounts with a reference state of -1 57 | /// used for all sites 58 | /// \ingroup variantmatrix 59 | std::vector process_variable_sites(const VariantMatrix& m); 60 | } // namespace Sequence 61 | 62 | #endif 63 | -------------------------------------------------------------------------------- /Sequence/SummStatsDeprecated.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __SEQUENCE_SUMMSTATS_HPP__ 2 | #define __SEQUENCE_SUMMSTATS_HPP__ 3 | 4 | /*! \file SummStatsDeprecated.hpp 5 | Header file for summary statistic of variation data. 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /Sequence/SummStatsDeprecated/Garud.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __SEQUENCE_GARUD_HPP__ 2 | #define __SEQUENCE_GARUD_HPP__ 3 | 4 | #include 5 | #include 6 | 7 | namespace Sequence 8 | { 9 | /* 10 | Garud et al. DOI: 10.1371/journal.pgen.1005004 11 | Messer & Petrov DOI: 10.1016/j.tree.2013.08.003 12 | Note that H1 = 1 - haplotype homozygosity, e.g. Depaulis and Veuille's "H" 13 | \ingroup popgenanalysis 14 | \return An object of type Sequence::GarudStats 15 | */ 16 | GarudStats H1H12(const SimData & d)__attribute__ ((deprecated)); 17 | } 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /Sequence/SummStatsDeprecated/Makefile.am: -------------------------------------------------------------------------------- 1 | pkgincludedir=$(prefix)/include/Sequence/SummStatsDeprecated 2 | 3 | pkginclude_HEADERS = nSL.hpp Garud.hpp lHaf.hpp Snn.hpp 4 | -------------------------------------------------------------------------------- /Sequence/SummStatsDeprecated/Snn.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | #ifndef __SEQUENCE_SNN_HPP__ 25 | #define __SEQUENCE_SNN_HPP__ 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | namespace Sequence 34 | { 35 | /*! 36 | Test statistic from Hudson (2000) Genetics 155(4):2011 37 | */ 38 | double Snn_statistic( const unsigned individuals[], 39 | const std::vector< std::vector > & dkj, 40 | const unsigned config[], 41 | const size_t & npop, 42 | const unsigned & nsam )__attribute__ ((deprecated)); 43 | 44 | template< typename shuffler > 45 | std::pair 46 | Snn_test(const PolyTable & snpTable, 47 | const unsigned config[], 48 | const size_t & npop, 49 | shuffler & s, 50 | const unsigned & nperms = 10000)__attribute__ ((deprecated)); 51 | 52 | template< typename shuffler > 53 | std::vector< std::vector > 54 | Snn_test_pairwise(const PolyTable & snpTable, 55 | const unsigned config[], 56 | const size_t & npop, 57 | shuffler & s, 58 | const unsigned & nperms = 10000)__attribute__ ((deprecated)); 59 | } 60 | #endif 61 | #include 62 | -------------------------------------------------------------------------------- /Sequence/SummStatsDeprecated/lHaf.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __SEQUENCE_SUMMSTATS_LHAP_HPP__ 2 | #define __SEQUENCE_SUMMSTATS_LHAP_HPP__ 3 | 4 | #include 5 | 6 | namespace Sequence 7 | { 8 | /* 9 | ! doi:10.1371/journal.pgen.1005527.g001 10 | \ingroup popgenanalysis 11 | */ 12 | std::vector lHaf( const SimData & data, const double l ); 13 | } 14 | 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /Sequence/SummStatsDeprecated/nSL.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __SEQUENCE_SUMMSTATS_NSL_HPP__ 2 | #define __SEQUENCE_SUMMSTATS_NSL_HPP__ 3 | 4 | /* \file nSL.hpp 5 | @brief The nSL statistic of doi: 10.1093/molbev/msu077 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | namespace Sequence 14 | { 15 | /*! 16 | The nSL statistic of Ferrer-Admetlla et al. doi: 10.1093/molbev/msu077. 17 | \param core The index of the "focal/core" SNP 18 | \param d An object of type Sequence::SimData 19 | \param gmap The positions of every marker in d on the genetic map. If 20 | std::unordered_map() is passed, 21 | iHS is calculated using SNP positions. 22 | \return nSL and iHs, with the latter as defined in doi: 23 | 10.1093/molbev/msu077. 24 | \note This routine was validated by comparing to code provided by 25 | Ferrer-Admetlla et al. 26 | \warning The use of 'gmap' is untested. 27 | \ingroup popgenanalysis 28 | */ 29 | std::pair 30 | nSL(const std::size_t &core, const SimData &d, 31 | const std::unordered_map &gmap 32 | = std::unordered_map())__attribute__ ((deprecated)); 33 | } 34 | #endif 35 | -------------------------------------------------------------------------------- /Sequence/Translate.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | #ifndef __TRANSLATE_HPP__ 25 | #define __TRANSLATE_HPP__ 26 | #include 27 | #include 28 | /*! \file Translate.hpp 29 | @brief declares Sequence::Translate,a function to translate CDS sequences into peptide sequences 30 | */ 31 | 32 | /*! 33 | \defgroup misc Miscellany 34 | */ 35 | namespace Sequence 36 | { 37 | /*! 38 | \ingroup misc 39 | \param beg a pointer to the beginning of the region to translate 40 | \param end a pointer to 1 past the end of the region to translate 41 | \param genetic_code must be a value from the enumeration list Sequence::GeneticCodes 42 | \param gapchar a character representing an alignment gap 43 | \return a string representing the translation of the range 44 | \throw std::runtime_error if \a genetic_code is invalid 45 | \code 46 | #include 47 | \endcode 48 | */ 49 | std::string Translate(std::string::const_iterator beg, 50 | std::string::const_iterator end, 51 | Sequence::GeneticCodes genetic_code = GeneticCodes::UNIVERSAL, 52 | const char & gapchar = '-'); 53 | } 54 | #endif 55 | -------------------------------------------------------------------------------- /Sequence/Translate2.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | #include 25 | /*! \file Translate2.hpp 26 | \short Deprecated header declaring routines to translate sequences. Including it includes the current header and issues a compiler warning. 27 | */ 28 | #warning "Using deprecated header , please use " 29 | 30 | -------------------------------------------------------------------------------- /Sequence/Unweighted.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | #ifndef __UNWEIGHTED_H__ 25 | #define __UNWEIGHTED_H__ 26 | 27 | #include 28 | /*! \file Unweighted.hpp" 29 | @brief declares Sequence::Unweighted2 and Sequence::Unweighted3 30 | */ 31 | 32 | /*! 33 | \class Sequence::Unweighted2 Sequence/Unweighted.hpp 34 | \ingroup weights 35 | @short weights all pathways equally 36 | \note This is generally not what you want to use (it biases the result to a higher Ka/Ks ratio) 37 | */ 38 | 39 | /*! 40 | \class Sequence::Unweighted3 Sequence/Unweighted.hpp 41 | \ingroup weights 42 | @short weights all pathways equally 43 | \note This is generally not what you want to use (it biases the result to a higher Ka/Ks ratio) 44 | */ 45 | namespace Sequence 46 | { 47 | struct Unweighted2 : public WeightingScheme2 48 | { 49 | weights2_t operator()(const std::string &codon1, const std::string &codon2,Sequence::GeneticCodes genetic_code) const; 50 | }; 51 | 52 | struct Unweighted3 : public WeightingScheme3 53 | { 54 | public: 55 | weights3_t operator()(const std::string &codon1, const std::string &codon2,Sequence::GeneticCodes genetic_code) const; 56 | }; 57 | } 58 | #endif 59 | 60 | -------------------------------------------------------------------------------- /Sequence/VariantMatrixCapsule.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SEQUENCE_VARIANT_MATRIX_CAPSULE 2 | #define SEQUENCE_VARIANT_MATRIX_CAPSULE 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace Sequence 9 | { 10 | template struct Capsule 11 | { 12 | virtual ~Capsule() = default; 13 | // Following two may not be needed 14 | //virtual T& get(std::size_t site, std::size_t sample) = 0; 15 | //virtual const T& get(std::size_t site, 16 | // std::size_t sample) const = 0; 17 | virtual T* data() = 0; 18 | virtual const T* data() const = 0; 19 | virtual const T* cdata() const = 0; 20 | virtual T* begin() = 0; 21 | virtual const T* begin() const = 0; 22 | virtual T* end() = 0; 23 | virtual const T* end() const = 0; 24 | virtual const T* cbegin() const = 0; 25 | virtual const T* cend() const = 0; 26 | virtual bool empty() const = 0; 27 | virtual std::size_t size() const = 0; 28 | 29 | virtual bool resizable() const = 0; 30 | 31 | /// Overload iff resizable() returns true 32 | virtual void 33 | resize(bool) 34 | { 35 | throw std::runtime_error("Capsule cannot be resized"); 36 | } 37 | }; 38 | 39 | struct GenotypeCapsule : public Capsule 40 | { 41 | virtual ~GenotypeCapsule() = default; 42 | virtual std::size_t nsites() const = 0; 43 | virtual std::size_t nsam() const = 0; 44 | virtual std::size_t& nsites() = 0; 45 | virtual std::size_t& nsam() = 0; 46 | virtual std::size_t row_offset() const = 0; 47 | virtual std::size_t col_offset() const = 0; 48 | virtual std::size_t stride() const = 0; 49 | virtual std::unique_ptr clone() const = 0; 50 | virtual std::int8_t& operator()(std::size_t, std::size_t) = 0; 51 | virtual const std::int8_t& operator()(std::size_t, 52 | std::size_t) const = 0; 53 | }; 54 | 55 | struct PositionCapsule : public Capsule 56 | { 57 | virtual ~PositionCapsule() = default; 58 | virtual std::size_t nsites() const = 0; 59 | virtual std::unique_ptr clone() const = 0; 60 | virtual double& operator[](std::size_t) = 0; 61 | virtual const double& operator[](std::size_t) const = 0; 62 | }; 63 | 64 | } // namespace Sequence 65 | 66 | #endif 67 | -------------------------------------------------------------------------------- /Sequence/bits/Makefile.am: -------------------------------------------------------------------------------- 1 | pkgincludedir=$(prefix)/include/Sequence/bits 2 | 3 | pkginclude_HEADERS = PolySites.tcc\ 4 | PolyTable.tcc\ 5 | PolyTableSlice.tcc\ 6 | CountingOperators.tcc\ 7 | AlignStream.tcc\ 8 | Alignment.tcc\ 9 | Clustalw.tcc\ 10 | phylipData.tcc\ 11 | descriptiveStats.tcc\ 12 | PolyTableFunctions.tcc\ 13 | Snn.tcc \ 14 | variant_matrix_views_internal.hpp \ 15 | col_view_iterator.hpp 16 | -------------------------------------------------------------------------------- /Sequence/bits/PolyTable.tcc: -------------------------------------------------------------------------------- 1 | // Code for the -*- C++ -*- namespace Sequence::PolyTable template members 2 | 3 | /* 4 | 5 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 6 | 7 | Remove the brackets to email me. 8 | 9 | This file is part of libsequence. 10 | 11 | libsequence is free software: you can redistribute it and/or modify 12 | it under the terms of the GNU General Public License as published by 13 | the Free Software Foundation, either version 3 of the License, or 14 | (at your option) any later version. 15 | 16 | libsequence is distributed in the hope that it will be useful, 17 | but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | GNU General Public License for more details. 20 | 21 | You should have received a copy of the GNU General Public License 22 | long with libsequence. If not, see . 23 | 24 | */ 25 | 26 | 27 | #ifndef __POLY_TABLE_TCC__ 28 | #define __POLY_TABLE_TCC__ 29 | 30 | #include 31 | 32 | namespace Sequence 33 | { 34 | template 36 | bool PolyTable::assign( const numeric_type * _positions, 37 | const size_t & _num_positions, 38 | const string_type * _data, 39 | const size_t & _num_individuals ) 40 | { 41 | //The numeric array must be convertible to double 42 | static_assert( std::is_convertible::value, 43 | "numeric_type must be convertible to double"); 44 | //The character type must be eithe char * or std::string 45 | static_assert( (std::is_same::value || 46 | std::is_same::value), 47 | "string_type must be char * or std::string"); 48 | 49 | first.resize(_num_positions); 50 | second.resize(_num_individuals); 51 | first.assign(_positions,_positions+_num_positions); 52 | second.assign(_data,_data+_num_individuals); 53 | non_const_access = true; 54 | for(std::vector::const_iterator itr = second.begin() ; 55 | itr < second.end() ; ++itr) 56 | { 57 | if (itr->length() != _num_positions) 58 | { 59 | first.clear(); 60 | second.clear(); 61 | return false; 62 | } 63 | } 64 | return true; 65 | } 66 | } 67 | 68 | #endif 69 | -------------------------------------------------------------------------------- /Sequence/fastq.hpp: -------------------------------------------------------------------------------- 1 | /*! 2 | \file fastq.hpp 3 | @brief FASTQ class 4 | */ 5 | 6 | /*! 7 | \class Sequence::fastq Sequence/fastq.hpp 8 | \ingroup seqio 9 | Publicly derived from Sequence::Seq. 10 | */ 11 | #ifndef __SEQUENCE_FASTQ_HPP__ 12 | #define __SEQUENCE_FASTQ_HPP__ 13 | 14 | #include 15 | 16 | namespace Sequence 17 | { 18 | class fastq : public Seq 19 | { 20 | public: 21 | std::string quality; 22 | private: 23 | bool repeat_name; 24 | public: 25 | using Seq::Seq; 26 | fastq(void); 27 | fastq (const std::string &name, const std::string &seq, 28 | const std::string & qual); 29 | fastq (std::string && name, std::string && seq, 30 | std::string && qual); 31 | //! \warning Quality string will be left empty 32 | fastq (const Seq & s); 33 | fastq (const fastq & s) = default; 34 | fastq ( fastq && s) = default; 35 | //! \warning Quality string will be left empty 36 | fastq ( Seq && s); 37 | fastq & operator=(const fastq & ) = default; 38 | fastq & operator=( fastq && ) = default; 39 | ~fastq()/*! placeholder for vtable */ {} 40 | 41 | //! Set to true or false for repeating the seq name on third line of output 42 | void repname(const bool &); 43 | /*! 44 | \exception Sequence::SeqException if memory can't be allocated. 45 | (This is because the data are temporarily read into char *, 46 | because that was found to be faster). 47 | \exception Sequence::badFormat if the input stream is not 48 | in FASTQ format 49 | */ 50 | std::istream & read(std::istream &s); 51 | /*! 52 | \param stream a std::ostream 53 | write the sequence in FASTQ format to \a stream 54 | */ 55 | std::ostream & print(std::ostream& s) const; 56 | }; 57 | } 58 | 59 | #endif 60 | 61 | -------------------------------------------------------------------------------- /Sequence/phylipData.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | /*! \file phylipData.hpp 25 | \short Sequence::phylipData -- read in phylip alignments 26 | */ 27 | #ifndef __PHYLIPDATA_HPP__ 28 | #define __PHYLIPDATA_HPP__ 29 | 30 | #include 31 | #include 32 | #include 33 | namespace Sequence 34 | { 35 | template < typename T > 36 | class phylipData: public AlignStream < T > 37 | /*! 38 | Input of phylip-format alignments 39 | */ 40 | { 41 | public: 42 | phylipData (): AlignStream(){} 43 | phylipData(const std::vector & _data): AlignStream(_data) 44 | { 45 | } 46 | phylipData (const AlignStream &a) : AlignStream(a) 47 | { 48 | } 49 | phylipData (const phylipData &a) : AlignStream(a) 50 | { 51 | } 52 | phylipData( AlignStream && a) : AlignStream(std::move(a)) 53 | { 54 | } 55 | phylipData( phylipData && a) : AlignStream(std::move(a)) 56 | { 57 | } 58 | phylipData( std::vector && a) : AlignStream(std::move(a)) 59 | { 60 | } 61 | ~phylipData(void) 62 | {} 63 | std::istream & read (std::istream & s); 64 | std::ostream & print (std::ostream & s) const; 65 | phylipData & operator=( const AlignStream & rhs); 66 | }; 67 | } 68 | #include 69 | #endif 70 | -------------------------------------------------------------------------------- /Sequence/polySiteVector.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | #ifndef __POLYSITEVECTOR_MANIP_HPP__ 25 | #define __POLYSITEVECTOR_MANIP_HPP__ 26 | 27 | 28 | /*! \file Sequence/polySiteVector.hpp 29 | @brief Site-major variation tables in ASCII format 30 | */ 31 | 32 | #include 33 | #include 34 | #include 35 | 36 | namespace Sequence 37 | { 38 | class PolyTable; 39 | 40 | /*! 41 | For polymorphism data, a Site can be represented as 42 | a position (a double) and the characters at 43 | that positions (a std::string) 44 | */ 45 | using polymorphicSite = std::pair< double, std::string >; 46 | 47 | /*! 48 | A polymorphism data set can be represented as 49 | a vector containing a sequence of polymorphicSite 50 | */ 51 | using polySiteVector = std::vector< polymorphicSite >; 52 | 53 | polySiteVector make_polySiteVector(const Sequence::PolyTable & data)__attribute__((deprecated)); 54 | } 55 | #endif 56 | -------------------------------------------------------------------------------- /Sequence/samflag.hpp: -------------------------------------------------------------------------------- 1 | //! \file Sequence/samflag.hpp @brief SAM flags 2 | #ifndef __LIBSEQ_SAMFLAG_HPP__ 3 | #define __LIBSEQ_SAMFLAG_HPP__ 4 | 5 | #include 6 | #include 7 | #include 8 | namespace Sequence 9 | { 10 | /*! 11 | \namespace Sequence::sambits 12 | 13 | \brief Stores the hex flags used by a SAM file flag field in an easy-to-read format 14 | 15 | \ingroup HTS 16 | */ 17 | namespace sambits 18 | { 19 | static const int is_paired=0x0001; 20 | static const int is_proper_pair=0x0002; 21 | static const int query_unmapped=0x0004; 22 | static const int mate_unmapped=0x0008; 23 | static const int qstrand = 0x0010; 24 | static const int mstrand = 0x0020; 25 | static const int first_read = 0x0040; 26 | static const int second_read = 0x0080; 27 | static const int not_primary = 0x0100; 28 | static const int qcfail = 0x0200; 29 | static const int duplicate = 0x0400; 30 | static const int suppalign = 0x0800; //Supplementary alignment 31 | } 32 | 33 | /*! 34 | \class Sequence::samflag Sequence/samflag.hpp 35 | \brief The flag field of a SAM record 36 | 37 | A SAM file's FLAG field is stored as an integer that is the sum of 38 | a series of flags (defined in namespace Sequence::sambits). 39 | 40 | This class simply takes that integer and stores a set of boolean 41 | variables based on the value of the integer. 42 | */ 43 | class samflag 44 | { 45 | private: 46 | void process_bits(); 47 | public: 48 | /*! 49 | The flag value 50 | */ 51 | std::int32_t flag; 52 | bool is_paired,is_proper_pair,query_unmapped, 53 | mate_unmapped,qstrand,mstrand,first_read, 54 | second_read,not_primary,qcfail,duplicate,supp_alignment; 55 | samflag(const std::int32_t & __flag); 56 | samflag(); 57 | operator std::int32_t() const; 58 | std::istream & read( std::istream & i); 59 | }; 60 | 61 | std::ostream & operator<<(std::ostream & o, const samflag & s); 62 | std::istream & operator>>(std::istream & i, samflag & s); 63 | } 64 | 65 | #endif 66 | -------------------------------------------------------------------------------- /Sequence/samfunctions.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __SEQ_SAMFUNCTIONS_HPP__ 2 | #define __SEQ_SAMFUNCTIONS_HPP__ 3 | 4 | #include 5 | #include 6 | 7 | namespace Sequence 8 | { 9 | unsigned alignment_length( const samrecord & b ); 10 | unsigned insertion_distance( const samrecord & b ); 11 | unsigned deletion_distance( const samrecord & b ); 12 | unsigned ngaps( const samrecord & b ); 13 | unsigned mismatches( const samrecord & b ); 14 | #ifdef HAVE_HTSLIB 15 | unsigned alignment_length( const bamrecord & b ); 16 | unsigned insertion_distance( const bamrecord & b ); 17 | unsigned deletion_distance( const bamrecord & b ); 18 | unsigned ngaps( const bamrecord & b ); 19 | unsigned mismatches( const bamrecord & b ); 20 | #endif 21 | } 22 | #endif 23 | -------------------------------------------------------------------------------- /Sequence/stateCounter.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | #ifndef _STATE_COUNTER_H_ 25 | #define _STATE_COUNTER_H_ 26 | #include 27 | /*! \file stateCounter.hpp 28 | @brief declaration of Sequence::stateCounter, a class to keep track of nucleotide counts either at a site 29 | in an alignment, or along a sequence 30 | */ 31 | /*! 32 | \class Sequence::stateCounter Sequence/stateCounter.hpp 33 | \ingroup functors 34 | \warning class data are public. Use responsibly. 35 | @short keep track of state counts at a site in an alignment or along a sequence 36 | */ 37 | namespace Sequence 38 | { 39 | class __attribute__ ((deprecated))stateCounter : public std::unary_function 40 | { 41 | public: 42 | typedef unsigned size_type; 43 | size_type a,g,c,t,zero,one,gap,n; 44 | bool ndna; 45 | private: 46 | char _gap; 47 | public: 48 | stateCounter(char gapchar = '-'); 49 | stateCounter(stateCounter &&) = default; 50 | stateCounter(const stateCounter &) = default; 51 | stateCounter & operator=(const stateCounter &)=default; 52 | stateCounter & operator=( stateCounter &&)=default; 53 | void operator()(const char &ch); 54 | size_type nStates(void) const; 55 | }; 56 | } 57 | #endif 58 | -------------------------------------------------------------------------------- /Sequence/summstats.hpp: -------------------------------------------------------------------------------- 1 | /// @file Sequence/summstats.hpp 2 | /// \brief Include all summary statistic functions and types 3 | #ifndef SEQUENCE_SUMMSTATS_HPP__ 4 | #define SEQUENCE_SUMMSTATS_HPP__ 5 | 6 | /*! 7 | * \defgroup popgenanalysis Analysis of molecular population genetic data 8 | * \brief Summary statistics and other analysis of Sequence::VariantMatrix 9 | * \ingroup popgen 10 | * 11 | * See @ref md_md_tutorial. 12 | * 13 | */ 14 | 15 | #include "summstats/generic.hpp" 16 | #include "summstats/classics.hpp" 17 | #include "summstats/nsl.hpp" 18 | #include "summstats/nslx.hpp" 19 | #include "summstats/ld.hpp" 20 | #include "summstats/lhaf.hpp" 21 | #include "summstats/garud.hpp" 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /Sequence/summstats/Makefile.am: -------------------------------------------------------------------------------- 1 | pkgincludedir=$(prefix)/include/Sequence/summstats 2 | 3 | pkginclude_HEADERS = classics.hpp thetapi.hpp thetaw.hpp thetah.hpp thetal.hpp auxillary.hpp nvariablesites.hpp allele_counts.hpp \ 4 | util.hpp ld.hpp nSLiHS.hpp nsl.hpp nslx.hpp garud.hpp generic.hpp lhaf.hpp \ 5 | algorithm.hpp 6 | -------------------------------------------------------------------------------- /Sequence/summstats/allele_counts.hpp: -------------------------------------------------------------------------------- 1 | /// \file Sequence/summstats/allele_counts.hpp 2 | /// \brief Count alleles at variable sites. 3 | #ifndef SEQUENCE_SUMMSTATS_ALLELE_COUNTS_HPP__ 4 | #define SEQUENCE_SUMMSTATS_ALLELE_COUNTS_HPP__ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | namespace Sequence 12 | { 13 | struct AlleleCounts 14 | /// Tracks the number of states at a site 15 | /// \ingroup popgenanalysis 16 | { 17 | /// Number of non-missing states 18 | int nstates; 19 | /// Number of samples with missing states 20 | int nmissing; 21 | }; 22 | 23 | /*! \brief Count number of alleles at each site 24 | * \param m An AlleleCountMatrix 25 | * \ingroup popgenanalysis 26 | */ 27 | std::vector allele_counts(const AlleleCountMatrix& m); 28 | 29 | /*! \brief Count number of non-reference alleles at each site 30 | * \param m An AlleleCountMatrix 31 | * \param m refstate The reference state for all sites. 32 | * \ingroup popgenanalysis 33 | */ 34 | std::vector 35 | non_reference_allele_counts(const AlleleCountMatrix& m, 36 | const std::int8_t refstate); 37 | 38 | /*! \brief Count number of non-reference alleles at each site 39 | * \param m An AlleleCountMatrix 40 | * \param m refstate The reference state at each site. 41 | * \ingroup popgenanalysis 42 | */ 43 | std::vector 44 | non_reference_allele_counts(const AlleleCountMatrix& m, 45 | const std::vector& refstates); 46 | } // namespace Sequence 47 | #endif 48 | -------------------------------------------------------------------------------- /Sequence/summstats/auxillary.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SEQUENCE_SUMMSTATS_AUXILLARY_HPP__ 2 | #define SEQUENCE_SUMMSTATS_AUXILLARY_HPP__ 3 | 4 | #include 5 | 6 | namespace Sequence 7 | { 8 | namespace summstats_aux 9 | { 10 | double a_sub_n(const std::uint32_t); 11 | double b_sub_n(const std::uint32_t nsam); 12 | double b_sub_n_plus1(const std::uint32_t nsam); 13 | } // namespace summstats_aux 14 | } // namespace Sequence 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /Sequence/summstats/garud.hpp: -------------------------------------------------------------------------------- 1 | /// \file Sequence/summstats/garud.hpp 2 | /// \brief H1, H12, and H2/H1 stats 3 | #ifndef SEQUENCE_SUMMSTATS_GARUD_HPP 4 | #define SEQUENCE_SUMMSTATS_GARUD_HPP 5 | 6 | #include 7 | 8 | namespace Sequence 9 | { 10 | struct GarudStats 11 | /*! 12 | Statistics from \cite Garud2015-ob 13 | \note H1 = 1 - haplotype homozygosity, e.g. "H" from \cite Depaulis1998-ol 14 | \ingroup popgenanalysis 15 | */ 16 | { 17 | double H1,H12,H2H1; 18 | GarudStats(); 19 | GarudStats(const double, const double, const double); 20 | }; 21 | 22 | /*! \brief Calculate H1, H12, and H2/H1 23 | * \param m A VariantMatrix 24 | * \return GarudStats 25 | * 26 | * See \cite Garud2015-ob for details. 27 | */ 28 | GarudStats garud_statistics(const VariantMatrix & m); 29 | } 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /Sequence/summstats/generic.hpp: -------------------------------------------------------------------------------- 1 | /// \file Sequence/summstats/generic.hpp 2 | /// \brief Generic utilities for calculating summary statistics 3 | #ifndef SEQUENCE_SUMMSTATS_GENERIC_HPP 4 | #define SEQUENCE_SUMMSTATS_GENERIC_HPP 5 | 6 | #include 7 | #include 8 | 9 | namespace Sequence 10 | { 11 | /*! \brief Calculate heterozygosity/diversity from count data 12 | * \param counts a vector counts. 13 | * \param nsam the sample size 14 | * \return diversity = 1 - homozygosity 15 | */ 16 | double diversity_from_counts( 17 | const std::unordered_map& counts, 18 | const std::size_t nsam); 19 | } // namespace Sequence 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /Sequence/summstats/ld.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SEQUENCE_SUMMSTATS_LD_HPP__ 2 | #define SEQUENCE_SUMMSTATS_LD_HPP__ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace Sequence 9 | { 10 | struct TwoLocusCounts 11 | { 12 | std::int8_t i, j; 13 | int n; 14 | TwoLocusCounts(std::int8_t i_, std::int8_t j_, int n_); 15 | }; 16 | 17 | std::vector 18 | two_locus_haplotype_counts(const VariantMatrix& m, std::size_t sitei, 19 | const std::size_t sitej, 20 | const bool skip_missing); 21 | } // namespace Sequence 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /Sequence/summstats/lhaf.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SEQUENCE_SUMMSTATS_LHAF_HPP 2 | #define SEQUENCE_SUMMSTATS_LHAF_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace Sequence 9 | { 10 | /*! \brief l-Haf statistic of \cite Ronen2015-te 11 | * \param m A VariantMatrix 12 | * \param refstate The ancstral state 13 | * \param l The power parameter 14 | * \return vector of the statistic 15 | * \ingroup popgenanalysis 16 | */ 17 | std::vector lhaf(const VariantMatrix &m, 18 | const std::int8_t refstate, const double l); 19 | } // namespace Sequence 20 | #endif 21 | -------------------------------------------------------------------------------- /Sequence/summstats/nSLiHS.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SEQUENCE_SUMMSTATS_NSLIHS_HPP 2 | #define SEQUENCE_SUMMSTATS_NSLIHS_HPP 3 | 4 | #include 5 | 6 | namespace Sequence 7 | { 8 | struct nSLiHS 9 | /// Stores the results of nSL and iHS calculations. 10 | /// See Sequence::nsl for details. 11 | /// 12 | /// \note This type is usually forward-declared in other headers, 13 | /// meaning this header will need inclusion in relevant translation 14 | /// units. 15 | /// 16 | /// \ingroup popgenanalysis 17 | { 18 | /// The nSL statistic \cite Ferrer-Admetlla2014-wa 19 | double nsl; 20 | /// The iHS statistic, calculated according to \cite Ferrer-Admetlla2014-wa 21 | double ihs; 22 | /// Count of non-reference, 23 | /// non-missing allele. 24 | std::int32_t core_count; 25 | }; 26 | } // namespace Sequence 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /Sequence/summstats/nsl.hpp: -------------------------------------------------------------------------------- 1 | /// \file Sequence/summstats/nsl.hpp 2 | /// \brief nSL and iHS 3 | #ifndef SEQUENCE_SUMMSTATS_NSL_HPP__ 4 | #define SEQUENCE_SUMMSTATS_NSL_HPP__ 5 | 6 | #include 7 | #include 8 | #include 9 | #include "nSLiHS.hpp" 10 | 11 | namespace Sequence 12 | { 13 | 14 | /*! \brief nSL and iHS statistics 15 | * \param m A VariantMatrix 16 | * \param core The index of the core site 17 | * \param refstate The value of the reference/ancestral allelic state 18 | * 19 | * \return an nSLiHS object 20 | * \ingroup popgenanalysis 21 | * 22 | * See nSL_from_ms.cc for example 23 | * 24 | * See \cite Ferrer-Admetlla2014-wa for details. 25 | */ 26 | nSLiHS nsl(const VariantMatrix& m, const std::size_t core, 27 | const std::int8_t refstate); 28 | 29 | /*! \brief nSL and iHS statistics 30 | * \param m A VariantMatrix 31 | * \param refstate The value of the reference/ancestral allelic state 32 | * 33 | * \return vector of nSLiHS objects (one for each site) 34 | * \ingroup popgenanalysis 35 | * 36 | * This function differs from the version working 37 | * on a core site in that it uses an efficient 38 | * method to dynamically update suffix lengths as each 39 | * core site is processed. The result is a huge runtime 40 | * reduction compared to calculating the statistic 41 | * for each core site on its own. 42 | * 43 | * See \cite Ferrer-Admetlla2014-wa for details. 44 | */ 45 | std::vector nsl(const VariantMatrix& m, 46 | const std::int8_t refstate); 47 | } // namespace Sequence 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /Sequence/summstats/nslx.hpp: -------------------------------------------------------------------------------- 1 | /// \file Sequence/summstats/nslx.hpp 2 | /// \brief nSL and iHS 3 | #ifndef SEQUENCE_SUMMSTATS_NSLX_HPP 4 | #define SEQUENCE_SUMMSTATS_NSLX_HPP 5 | 6 | #include 7 | #include 8 | #include 9 | #include "nSLiHS.hpp" 10 | 11 | namespace Sequence 12 | { 13 | /*! \brief A variation on nSL/iHS 14 | * \param m A VariantMatrix 15 | * \param refstate The ancestral state 16 | * \param x Non-reference allele count 17 | * 18 | * \return vector of nSLiHS 19 | * 20 | * This variant on nSL only allows suffix lengths 21 | * to be broken by variants where the derived 22 | * (non-refstate) allele is present <= \a x times. 23 | * 24 | * When \x is 1, this statistic is a proxy for the 25 | * SDS score of \cite Field2016-so. 26 | */ 27 | std::vector nslx(const VariantMatrix& m, 28 | const std::int8_t refstate, const int x); 29 | } // namespace Sequence 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /Sequence/summstats/nvariablesites.hpp: -------------------------------------------------------------------------------- 1 | /// \file Sequence/summstats/nvariablesites.hpp 2 | /// \brief Calculate total numbers of polymorphisms 3 | #ifndef SEQUENCE_SUMMSTATS_NVARIABLESITES_HPP__ 4 | #define SEQUENCE_SUMMSTATS_NVARIABLESITES_HPP__ 5 | 6 | #include 7 | #include 8 | 9 | namespace Sequence 10 | { 11 | /*! \brief Number of polymorphic sites 12 | * 13 | * Returns the number of sites with more than one non-missing state 14 | * \param m An AlleleCountMatrix 15 | * \return std::uint32_t 16 | * \ingroup popgenanalysis 17 | */ 18 | std::uint32_t nvariable_sites(const AlleleCountMatrix& m); 19 | 20 | /*! \brief Number of bi-allelic sites 21 | * 22 | * Return the number of sites with exactly two non-missing states. 23 | * \param m An AlleleCountMatrix 24 | * \return std::uint32_t 25 | * \ingroup popgenanalysis 26 | */ 27 | std::uint32_t nbiallelic_sites(const AlleleCountMatrix& m); 28 | 29 | /*! \brief Total number of mutations in the sample 30 | * 31 | * Return \f$\sum_{i=0}^{i=m.nsites-1}I(i)\f$ where \f$I(i)\f$ 32 | * is \f$k_i - 1\f$ if \f$k_i\f$, the number of states at the \f$i^{th}\f$ site, 33 | * is greater than one, and zero otherwise. 34 | * 35 | * \param m An AlleleCountMatrix 36 | * \return std::uint32_t 37 | * \ingroup popgenanalysis 38 | */ 39 | std::uint32_t total_number_of_mutations(const AlleleCountMatrix& m); 40 | } // namespace Sequence 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /Sequence/summstats/thetah.hpp: -------------------------------------------------------------------------------- 1 | /// \file Sequence/summstats/thetah.hpp 2 | /// \brief Fay and Wu's \f$\hat\theta_H\f$. 3 | #ifndef SEQUENCE_SUMMSTATS_THETAH_HPP__ 4 | #define SEQUENCE_SUMMSTATS_THETAH_HPP__ 5 | 6 | #include 7 | #include 8 | 9 | namespace Sequence 10 | { 11 | /*! \brief Fay and Wu's \f$\hat\theta_H\f$. 12 | * \param m An AlleleCountMatrix 13 | * \param refstate The ancestral state 14 | * \return double 15 | * 16 | * See \cite Fay2000-ef for details. 17 | * \ingroup popgenanalysis 18 | */ 19 | double thetah(const AlleleCountMatrix& ac, const std::int8_t refstate); 20 | 21 | /*! \brief Fay and Wu's \f$\hat\theta_H\f$. 22 | * \param m a VariantMatrix 23 | * \param refstate Vector of ancestral states. 24 | * \return double 25 | * 26 | * See \cite Fay2000-ef for details. 27 | * \ingroup popgenanalysis 28 | */ 29 | double thetah(const AlleleCountMatrix& m, 30 | const std::vector& refstates); 31 | } // namespace Sequence 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /Sequence/summstats/thetal.hpp: -------------------------------------------------------------------------------- 1 | /// \file Sequence/summstats/thetal.hpp 2 | /// \brief Zeng et al. \f$\hat\theta_L\f$ 3 | #ifndef SEQUENCE_SUMMSTATS_THETAL_HPP__ 4 | #define SEQUENCE_SUMMSTATS_THETAL_HPP__ 5 | 6 | #include 7 | #include 8 | 9 | namespace Sequence 10 | { 11 | /*! \brief Zeng et al. \f$\hat\theta_L\f$ 12 | * \param m An AlleleCountMatrix 13 | * \param refstate The ancestral state 14 | * \return double 15 | * 16 | * See \cite Zeng2006-is for details. 17 | * \ingroup popgenanalysis 18 | */ 19 | double thetal(const AlleleCountMatrix& ac, const std::int8_t refstate); 20 | 21 | /*! \brief Zeng et al. \f$\hat\theta_L\f$ 22 | * \param m An AlleleCountMatrix 23 | * \param refstate Vector of ancestral states. 24 | * \return double 25 | * 26 | * See \cite Zeng2006-is for details. 27 | * \ingroup popgenanalysis 28 | */ 29 | double thetal(const AlleleCountMatrix& m, 30 | const std::vector& refstates); 31 | } // namespace Sequence 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /Sequence/summstats/thetapi.hpp: -------------------------------------------------------------------------------- 1 | /*! 2 | \file summstats/thetapi.hpp 3 | */ 4 | #ifndef SEQUENCE_SUMMSTATS_THETAPI_HPP__ 5 | #define SEQUENCE_SUMMSTATS_THETAPI_HPP__ 6 | 7 | #include 8 | 9 | namespace Sequence 10 | { 11 | /*! \brief Mean pairwise differences 12 | * \param m An AlleleCountMatrix 13 | * \return Mean pairwise differences 14 | * \note Calcuated as sum over one minus site homozygosity 15 | * 16 | * This function is included via Sequence/summstats.hpp, 17 | * Sequence/summstats/classics.hpp or 18 | * Sequence/summstats/thetapi.hpp 19 | * 20 | * See \cite Tajima1983-it for details. 21 | * \ingroup popgenanalysis 22 | */ 23 | double thetapi(const AlleleCountMatrix& ac); 24 | } // namespace Sequence 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /Sequence/summstats/thetaw.hpp: -------------------------------------------------------------------------------- 1 | /// \file Sequence/summstats/thetaw.hpp 2 | /// \brief Watterson's theta 3 | #ifndef SEQUENCE_SUMMSTATS_THETAW_HPP__ 4 | #define SEQUENCE_SUMMSTATS_THETAW_HPP__ 5 | 6 | #include 7 | 8 | namespace Sequence 9 | { 10 | /*! \brief Watterson's theta 11 | * \param m An AlleleCountMatrix 12 | * \returns Watterson's theta, a double 13 | * 14 | * \note For a site with \f$k\f$ states, 15 | * \f$k-1\f$ is added to the number of inferred mutations. 16 | * In other words, the calculation is based on the total 17 | * number of mutations. 18 | * 19 | * See \cite Watterson1975-ej for details. 20 | * \ingroup popgenanalysis 21 | */ 22 | double thetaw(const AlleleCountMatrix& ac); 23 | } // namespace Sequence 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /Sequence/summstats/util.hpp: -------------------------------------------------------------------------------- 1 | /*! 2 | * \file Sequence/summstats/util.hpp 3 | * \brief Helper functions for implementing summary statistics 4 | * 5 | * This file must be included directly. No other header file 6 | * includes it. 7 | */ 8 | #ifndef SEQUENCE_SUMMSTATS_UTIl_HPP__ 9 | #define SEQUENCE_SUMMSTATS_UTIl_HPP__ 10 | 11 | #include 12 | #include 13 | 14 | namespace Sequence 15 | { 16 | template 17 | inline bool 18 | all_missing(const T& t) 19 | /// Returns true if all elements in t encode missing data. 20 | /// T should be a model of a VariantMatrix, RowView, or ColumnView 21 | { 22 | return std::all_of( 23 | t.begin(), t.end(), 24 | [](const typename T::value_type v) { return v < 0; }); 25 | } 26 | } // namespace Sequence 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /Sequence/typedefs.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | /*! \file typedefs.hpp 25 | Typedefs used in the library are defined here. 26 | Wherever possible, types from namespace std 27 | are given forward declarations. 28 | @brief typedefs used by libsequence 29 | */ 30 | #ifndef __SEQUENCE_TYPEDEFS_HPP 31 | #define __SEQUENCE_TYPEDEFS_HPP 32 | #include 33 | #include 34 | #include 35 | namespace Sequence 36 | { 37 | /*! 38 | A CodonUsageTable is a vector of pairs. In each pair, 39 | the first element is the codon, and the second element 40 | is an integer counting the number of occurrences of 41 | the codon 42 | */ 43 | typedef std::vector< std::pair > CodonUsageTable; 44 | } 45 | #endif 46 | -------------------------------------------------------------------------------- /Sequence/variant_matrix/Makefile.am: -------------------------------------------------------------------------------- 1 | pkgincludedir=$(prefix)/include/Sequence/variant_matrix 2 | 3 | pkginclude_HEADERS = filtering.hpp windows.hpp msformat.hpp 4 | -------------------------------------------------------------------------------- /Sequence/variant_matrix/filtering.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SEQUENCE_VARIANT_MATRIX_FILTERING_HPP_ 2 | #define SEQUENCE_VARIANT_MATRIX_FILTERING_HPP_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace Sequence 10 | { 11 | std::int32_t filter_sites(VariantMatrix &m, 12 | const std::function &f); 13 | 14 | std::int32_t 15 | filter_haplotypes(VariantMatrix &m, 16 | const std::function &f); 17 | 18 | std::int32_t filter_sites(VariantMatrix &m, 19 | const std::function &f); 20 | 21 | std::int32_t 22 | filter_haplotypes(VariantMatrix &m, 23 | const std::function &f); 24 | } 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /Sequence/variant_matrix/windows.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SEQUENCE_VARIANT_MATRIX_WINDOWS_HPP 2 | #define SEQUENCE_VARIANT_MATRIX_WINDOWS_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace Sequence 11 | { 12 | /*! \brief Return a window from a VariantMatrix 13 | * \param m A VariantMatrix 14 | * \param beg Beginning of window 15 | * \param end End of window 16 | * 17 | * \note The window intervals are open, [beg,end] 18 | */ 19 | VariantMatrix make_window(const VariantMatrix& m, const double beg, 20 | const double end); 21 | /*! \brief Return a slice from a VariantMatrix 22 | * \param m A VariantMatrix 23 | * \param beg Beginning of window 24 | * \param end End of window 25 | * \param i index of first haplotype to include 26 | * \param j one past last haplotype to include 27 | * 28 | * The result is a variant matrix including positions [beg,end] 29 | * and samples [i,j) from \a m. Note that the sample interval is 30 | * half-open! 31 | */ 32 | 33 | VariantMatrix make_slice(const VariantMatrix& m, const double beg, 34 | const double end, 35 | const std::size_t i, 36 | const std::size_t j); 37 | } // namespace Sequence 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /config-h.in.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/molpopgen/libsequence/ff1a4f698ed2fac40301ff466a6253c82889fd6d/config-h.in.in -------------------------------------------------------------------------------- /config.h.in: -------------------------------------------------------------------------------- 1 | /* config.h.in. Generated from configure.ac by autoheader. */ 2 | 3 | /* define if the compiler supports basic C++11 syntax */ 4 | #undef HAVE_CXX11 5 | 6 | /* Define to 1 if you have the header file. */ 7 | #undef HAVE_DLFCN_H 8 | 9 | /* Define to 1 if you have the header file. */ 10 | #undef HAVE_INTTYPES_H 11 | 12 | /* Define to 1 if you have the `z' library (-lz). */ 13 | #undef HAVE_LIBZ 14 | 15 | /* Define to 1 if you have the header file. */ 16 | #undef HAVE_MEMORY_H 17 | 18 | /* Define to 1 if you have the header file. */ 19 | #undef HAVE_STDINT_H 20 | 21 | /* Define to 1 if you have the header file. */ 22 | #undef HAVE_STDLIB_H 23 | 24 | /* Define to 1 if you have the header file. */ 25 | #undef HAVE_STRINGS_H 26 | 27 | /* Define to 1 if you have the header file. */ 28 | #undef HAVE_STRING_H 29 | 30 | /* Define to 1 if you have the header file. */ 31 | #undef HAVE_SYS_STAT_H 32 | 33 | /* Define to 1 if you have the header file. */ 34 | #undef HAVE_SYS_TYPES_H 35 | 36 | /* Define to 1 if you have the header file. */ 37 | #undef HAVE_UNISTD_H 38 | 39 | /* Define to the sub-directory in which libtool stores uninstalled libraries. 40 | */ 41 | #undef LT_OBJDIR 42 | 43 | /* Name of package */ 44 | #undef PACKAGE 45 | 46 | /* Define to the address where bug reports for this package should be sent. */ 47 | #undef PACKAGE_BUGREPORT 48 | 49 | /* Define to the full name of this package. */ 50 | #undef PACKAGE_NAME 51 | 52 | /* Define to the full name and version of this package. */ 53 | #undef PACKAGE_STRING 54 | 55 | /* Define to the one symbol short name of this package. */ 56 | #undef PACKAGE_TARNAME 57 | 58 | /* Define to the home page for this package. */ 59 | #undef PACKAGE_URL 60 | 61 | /* Define to the version of this package. */ 62 | #undef PACKAGE_VERSION 63 | 64 | /* Define to 1 if you have the ANSI C header files. */ 65 | #undef STDC_HEADERS 66 | 67 | /* Version number of package */ 68 | #undef VERSION 69 | 70 | /* Define to empty if `const' does not conform to ANSI C. */ 71 | #undef const 72 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | AC_PREREQ(2.59) 2 | 3 | AC_INIT([libsequence], [1.9.8], [https://github.com/molpopgen/libsequence]) 4 | AC_CONFIG_SRCDIR([src/Seq/Seq.cc]) 5 | AC_CONFIG_SRCDIR([test/FastaIO.cc]) 6 | AC_CONFIG_SRCDIR([examples/msstats.cc]) 7 | AM_INIT_AUTOMAKE([subdir-objects]) 8 | AC_CONFIG_HEADERS([config.h]) 9 | 10 | AC_CONFIG_MACRO_DIR([m4]) 11 | 12 | AC_PROG_CC 13 | AC_C_CONST 14 | AC_PROG_CXX 15 | AX_CXX_COMPILE_STDCXX_11([noext],[mandatory]) 16 | 17 | AM_MAINTAINER_MODE([disable]) 18 | 19 | LT_INIT 20 | AC_PROG_LIBTOOL 21 | AC_LANG(C++) 22 | AC_CONFIG_FILES([Makefile src/Makefile Sequence/Makefile Sequence/bits/Makefile Sequence/SummStatsDeprecated/Makefile 23 | Sequence/variant_matrix/Makefile Sequence/summstats/Makefile test/Makefile examples/Makefile doc/libsequence.doxygen]) 24 | 25 | dnl AC_ARG_ENABLE(debug, 26 | dnl [ --enable-debug Turn on debugging], 27 | dnl [case "${enableval}" in 28 | dnl yes) debug=true ;; 29 | dnl no) debug=false ;; 30 | dnl *) AC_MSG_ERROR(bad value ${enableval} for --enable-debug) ;; 31 | dnl esac],[debug=false]) 32 | dnl AM_CONDITIONAL(DEBUG, test x$debug = xtrue) 33 | dnl 34 | dnl AC_ARG_ENABLE(profiling, 35 | dnl [ --enable-profiling Turn on profiling], 36 | dnl [case "${enableval}" in 37 | dnl yes) profiling=true ;; 38 | dnl no) profiling=false ;; 39 | dnl *) AC_MSG_ERROR(bad value ${enableval} for --enable-profiling) ;; 40 | dnl esac],[profiling=false]) 41 | dnl AM_CONDITIONAL(PROFILING, test x$profiling = xtrue) 42 | 43 | 44 | dnl zlib header 45 | dnl AC_CHECK_HEADER(zlib.h,,[AC_MSG_ERROR([zlib headers missing - cannot continue])]) 46 | 47 | dnl zlib runtime 48 | dnl AC_CHECK_LIB([z],gzungetc,,[echo "zlib run time library not found";exit 1]) 49 | 50 | dnl boost unit test library 51 | AC_CHECK_HEADER(boost/test/unit_test.hpp, BUNITTEST=1,[echo "boost/test/unit_test.hpp not found. Unit tests will not be compiled."]) 52 | AM_CONDITIONAL([BUNIT_TEST_PRESENT], test x$BUNITTEST = x1) 53 | 54 | dnl check for Intel TBB headers that we need 55 | dnl AC_CHECK_HEADER(tbb/parallel_for.h,,[AC_MSG_ERROR([tbb/parallel_for.h not found. Please either install Intel's TBB library or make sure your CXXFLAGS are set correctly])]) 56 | dnl AC_CHECK_HEADER(tbb/task_scheduler_init.h,,[AC_MSG_ERROR([tbb/task_scheduler_init.h not found. Please either install Intel's TBB library or make sure your CXXFLAGS are set correctly])]) 57 | 58 | dnl check for Intel TBB library 59 | dnl AC_CHECK_LIB([tbb],main,FOUND_TBB_RUNTIME=1,[AC_MSG_ERROR([tbb runtime library not found. Please install Intel's TBB library.])]) 60 | AC_OUTPUT 61 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | DOX=doxygen 2 | all: 3 | $(DOX) libsequence.doxygen 4 | # cd latex && make 5 | cd .. 6 | clean: 7 | rm -rf html latex 8 | install: 9 | install -d html /usr/share/doc/libsequence/html 10 | install html/* /usr/share/doc/libsequence/html 11 | # install latex/refman.pdf /usr/share/doc/libsequence/libsequence-ref.pdf 12 | -------------------------------------------------------------------------------- /doc/images/2subs: -------------------------------------------------------------------------------- 1 | #FIG 3.2 2 | Landscape 3 | Center 4 | Metric 5 | A4 6 | 100.00 7 | Single 8 | -2 9 | 1200 2 10 | 2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 11 | 1 1 1.00 90.00 120.00 12 | 5400 3150 4500 4320 13 | 2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 14 | 1 1 1.00 90.00 120.00 15 | 6030 3150 6930 4230 16 | 2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 17 | 1 1 1.00 90.00 120.00 18 | 4500 4500 5400 5580 19 | 2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 20 | 1 1 1.00 90.00 120.00 21 | 7020 4500 6120 5670 22 | 4 0 0 50 0 0 14 0.0000 4 150 675 5400 5805 Codon2\001 23 | 4 0 0 50 0 0 14 0.0000 4 150 675 5400 3150 Codon1\001 24 | 4 0 0 50 0 0 14 0.0000 4 150 1215 4050 4500 Intermediate1\001 25 | 4 0 0 50 0 0 14 0.0000 4 150 1215 6300 4500 Intermediate2\001 26 | 4 0 0 50 0 0 14 0.0000 4 150 210 6480 3600 b3\001 27 | 4 0 0 50 0 0 14 0.0000 4 150 210 4770 3600 b1\001 28 | 4 0 0 50 0 0 14 0.0000 4 150 210 4680 5130 b2\001 29 | 4 0 0 50 0 0 14 0.0000 4 150 210 6750 5130 b4\001 30 | -------------------------------------------------------------------------------- /doc/images/2subs.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/molpopgen/libsequence/ff1a4f698ed2fac40301ff466a6253c82889fd6d/doc/images/2subs.jpg -------------------------------------------------------------------------------- /doc/images/2subs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/molpopgen/libsequence/ff1a4f698ed2fac40301ff466a6253c82889fd6d/doc/images/2subs.pdf -------------------------------------------------------------------------------- /doc/images/3subs.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/molpopgen/libsequence/ff1a4f698ed2fac40301ff466a6253c82889fd6d/doc/images/3subs.jpg -------------------------------------------------------------------------------- /doc/images/3subs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/molpopgen/libsequence/ff1a4f698ed2fac40301ff466a6253c82889fd6d/doc/images/3subs.pdf -------------------------------------------------------------------------------- /doc/md/tutorial.md: -------------------------------------------------------------------------------- 1 | # Tutorial/overview 2 | 3 | [TOC] 4 | 5 | The citation for the library is \cite Thornton2003-wj 6 | 7 | ## Creation and manipulation of a VariantMatrix 8 | 9 | TBW 10 | 11 | ## Calculation of summary statistics from a VariantMatrix 12 | 13 | In libsequence, variation data are represented as a Sequence::VariantMatrix. 14 | The library provides functions for many standard analyses based on input 15 | data in this format. The following headers are relevant: 16 | 17 | 1. Sequence/summstats.hpp 18 | 19 | Clicking on the above headers will reveal the existence of other headers. 20 | The intent is that you may only wish to bring some names into scope. 21 | For example, if you implement a new analysis where mean pairwise differences 22 | are needed, you many include Sequence/summstats/thetapi.hpp instead of every 23 | single summary statistic function provided by the library. 24 | 25 | TBW 26 | -------------------------------------------------------------------------------- /examples/Makefile.am: -------------------------------------------------------------------------------- 1 | check_PROGRAMS= ms_to_VariantMatrix \ 2 | nSL_from_ms \ 3 | mean_nSLx \ 4 | nSL_vs_nSLx \ 5 | baseComp valid_dna translateTest \ 6 | slidingWindow slidingWindow2 PolyTableIterators \ 7 | ufs \ 8 | msstats polySiteVector_test 9 | 10 | ms_to_VariantMatrix_SOURCES=ms_to_VariantMatrix.cc 11 | nSL_from_ms_SOURCES=nSL_from_ms.cc 12 | mean_nSLx_SOURCES=mean_nSLx.cc 13 | nSL_vs_nSLx_SOURCES=nSL_vs_nSLx.cc 14 | baseComp_SOURCES=baseComp.cc 15 | valid_dna_SOURCES=valid_dna.cc 16 | translateTest_SOURCES=translateTest.cc 17 | slidingWindow_SOURCES=slidingWindow.cc 18 | slidingWindow2_SOURCES=slidingWindow2.cc 19 | PolyTableIterators_SOURCES=PolyTableIterators.cc 20 | ufs_SOURCES=ufs.cc 21 | msstats_SOURCES=msstats.cc 22 | polySiteVector_test_SOURCES=polySiteVector_test.cc 23 | 24 | AM_CXXFLAGS= 25 | AM_LIBS=-lsequence 26 | AM_LDFLAGS=-L../src/.libs -Wl,-rpath,../src/.libs 27 | 28 | #if DEBUG 29 | #AM_CXXFLAGS+=-g 30 | #else 31 | #AM_CXXFLAGS+=-DNDEBUG 32 | #endif 33 | # 34 | #if PROFILING 35 | #PROFILE= -pg 36 | #else 37 | #PROFILE= 38 | #endif 39 | 40 | LIBS+=$(AM_LIBS) 41 | -------------------------------------------------------------------------------- /examples/Makefile.old: -------------------------------------------------------------------------------- 1 | #CC=cc 2 | #CXX=c++ 3 | CFLAGS = -O3 -Wall -pedantic -ansi -Wtraditional -I.. 4 | CXXFLAGS = -O3 -Wall -W -I.. -std=c++11 5 | LIBS= -L/usr/local/lib -Wl,-rpath -Wl,/usr/local/lib -lsequence -lz 6 | #all: codons.o gestimator.o msstats.o int_handler.o getopt.o getopt1.o baseComp.o valid_dna.o FastaExplicit.o translateTest.o slidingWindow.o slidingWindow2.o PolyTableIterators.o correlations.o critical_values.o ufs.o ms--.o msbeta.o freerec.o bottleneck.o fragments.o test_SimDataIO.o 7 | #all: codons.o msstats.o int_handler.o baseComp.o valid_dna.o FastaExplicit.o translateTest.o slidingWindow.o slidingWindow2.o PolyTableIterators.o correlations.o critical_values.o ufs.o ms--.o msbeta.o freerec.o bottleneck.o fragments.o test_SimDataIO.o 8 | all: codons.o msstats.o int_handler.o baseComp.o FastaExplicit.o translateTest.o slidingWindow.o slidingWindow2.o PolyTableIterators.o correlations.o critical_values.o ufs.o ms--.o freerec.o bottleneck.o fragments.o test_SimDataIO.o valid_dna.o Ptable_test.o 9 | $(CXX) $(CXXFLAGS) ufs.o -o ufs $(LDFLAGS) $(LIBS) 10 | $(CXX) $(CXXFLAGS) codons.o -o codons $(LDFLAGS) $(LIBS) 11 | $(CXX) $(CXXFLAGS) msstats.o -o msstats $(LDFLAGS) $(LIBS) 12 | # $(CXX) $(CXXFLAGS) gestimator.o int_handler.o getopt.o getopt1.o -o gestimator $(LDFLAGS) $(LIBS) 13 | $(CXX) $(CXXFLAGS) baseComp.o -o baseComp $(LDFLAGS) $(LIBS) 14 | $(CXX) $(CXXFLAGS) FastaExplicit.o -o FastaExplicit $(LDFLAGS) $(LIBS) 15 | $(CXX) $(CXXFLAGS) translateTest.o -o translateTest $(LDFLAGS) $(LIBS) 16 | $(CXX) $(CXXFLAGS) slidingWindow.o -o slidingWindow $(LDFLAGS) $(LIBS) 17 | $(CXX) $(CXXFLAGS) slidingWindow2.o -o slidingWindow2 $(LDFLAGS) $(LIBS) 18 | $(CXX) $(CXXFLAGS) PolyTableIterators.o -o PolyTableIterators $(LDFLAGS) $(LIBS) 19 | $(CXX) $(CXXFLAGS) correlations.o -o correlations $(LDFLAGS) $(LIBS) 20 | $(CXX) $(CXXFLAGS) critical_values.o -o critical_values $(LDFLAGS) $(LIBS) 21 | $(CXX) $(CXXFLAGS) ms--.o -o ms-- $(LDFLAGS) $(LIBS) 22 | # $(CXX) $(CXXFLAGS) msbeta.o -o msbeta $(LDFLAGS) $(LIBS) 23 | $(CXX) $(CXXFLAGS) freerec.o -o freerec $(LDFLAGS) $(LIBS) 24 | $(CXX) $(CXXFLAGS) bottleneck.o -o bottleneck $(LDFLAGS) $(LIBS) 25 | $(CXX) $(CXXFLAGS) fragments.o -o fragments $(LDFLAGS) $(LIBS) 26 | $(CXX) $(CXXFLAGS) valid_dna.o -o valid_dna $(LDFLAGS) $(LIBS) 27 | $(CXX) $(CXXFLAGS) test_SimDataIO.o -o test_SimDataIO $(LDFLAGS) $(LIBS) 28 | $(CXX) $(CXXFLAGS) Ptable_test.o -o Ptable_test $(LDFLAGS) $(LIBS) 29 | 30 | clean: 31 | rm -f *.o codons msstats baseComp valid_dna FastaExplicit translateTest \ 32 | slidingWindow slidingWindow2 PolyTableIterators correlations \ 33 | critical_values ufs ms-- msbeta freerec bottleneck fragments 34 | 35 | -------------------------------------------------------------------------------- /examples/int_handler.cc: -------------------------------------------------------------------------------- 1 | #include "int_handler.hpp" 2 | #include 3 | void cntrl_c_handler(int sig) 4 | { 5 | char answer; 6 | std::cerr << "Interrupt signal received.\n"; 7 | std::cerr << "Do you really want to quit [y or n]?\n"; 8 | std::cin >> answer; 9 | switch (answer) 10 | { 11 | case 'Y': 12 | exit(0); 13 | break; 14 | case 'y': 15 | exit(0); 16 | break; 17 | default: 18 | signal(SIGINT,cntrl_c_handler); 19 | std::cerr << "continuing"< 2 | #include 3 | #include 4 | 5 | using namespace std; 6 | 7 | void cntrl_c_handler(int sig); 8 | -------------------------------------------------------------------------------- /examples/mean_nSLx.cc: -------------------------------------------------------------------------------- 1 | /*! \include mean_nSLx.cc */ 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | int 12 | main(int argc, char** argv) 13 | { 14 | int x = std::atoi(argv[1]); 15 | while (!std::cin.eof()) 16 | { 17 | auto vm = Sequence::from_msformat(std::cin); 18 | auto nsl_stats = Sequence::nslx(vm, 0, x); 19 | double sum = 0.0; 20 | unsigned n = 0; 21 | for (auto& i : nsl_stats) 22 | { 23 | if (std::isfinite(i.nsl)) 24 | { 25 | sum += i.nsl; 26 | ++n; 27 | } 28 | } 29 | std::cout << sum / static_cast(n) << '\n'; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /examples/ms_to_VariantMatrix.cc: -------------------------------------------------------------------------------- 1 | /*! \include ms_to_VariantMatrix.cc */ 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | int 8 | main(int argc, char** argv) 9 | { 10 | do 11 | { 12 | auto vm = Sequence::from_msformat(std::cin); 13 | Sequence::to_msformat(vm, std::cout); 14 | std::cout << '\n'; 15 | } 16 | while (!std::cin.eof()); 17 | } 18 | -------------------------------------------------------------------------------- /examples/msstats.cc: -------------------------------------------------------------------------------- 1 | /* 2 | msstats - read data from ms via stdin, calculate common summary statistics 3 | 4 | Copyright (C) 2002 Kevin Thornton 5 | 6 | This program is free software; you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published by 8 | the Free Software Foundation; either version 2, or (at your option) 9 | any later version. 10 | 11 | This program is distributed in the hope that it will be useful, 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | GNU General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License 17 | along with this program; if not, write to the Free Software Foundation, 18 | Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 19 | 20 | */ 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | using namespace std; 30 | using namespace Sequence; 31 | 32 | int main(int argc, char *argv[]) 33 | { 34 | SimParams p; 35 | cin >> p; 36 | SimData d; 37 | std::ios_base::sync_with_stdio(true); 38 | 39 | int rv; 40 | while( (rv = d.fromfile(stdin)) != EOF ) 41 | { 42 | PolySIM P(&d); 43 | cout < 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | int 11 | main(int argc, char** argv) 12 | { 13 | auto vm = Sequence::from_msformat(std::cin); 14 | auto nsl_stats = Sequence::nsl(vm, 0); 15 | //assert(nsl_stats.size() == vm.nsites); 16 | //for (std::size_t i = 0; i < vm.nsites; ++i) 17 | // { 18 | // auto n = Sequence::nsl(vm, i, 0); 19 | // if (!std::isnan(n.nsl)) 20 | // { 21 | // std::cout << vm.positions[i] << ' ' << n.nsl << ' ' 22 | // << n.ihs << ' ' << n.core_count << ' ' 23 | // << nsl_stats[i].nsl << ' ' << nsl_stats[i].ihs 24 | // << ' ' << nsl_stats[i].core_count << '\n'; 25 | // } 26 | // else 27 | // { 28 | // assert(std::isnan(nsl_stats[i].nsl)); 29 | // } 30 | // } 31 | } 32 | -------------------------------------------------------------------------------- /examples/nSL_vs_nSLx.cc: -------------------------------------------------------------------------------- 1 | /*! \include nSL_from_ms.cc */ 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | int 12 | main(int argc, char** argv) 13 | { 14 | int x = std::atoi(argv[1]); 15 | auto vm = Sequence::from_msformat(std::cin); 16 | auto nsl_stats = Sequence::nslx(vm, 0, x); 17 | for(auto & s : nsl_stats){std::cout << s.nsl << ' ' << s.ihs << ' ' << s.core_count << '\n'; } 18 | } 19 | 20 | -------------------------------------------------------------------------------- /examples/slidingWindow.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | /*! \include slidingWindow.cc */ 10 | 11 | //Read in a data set of aligned sequence in Fasta 12 | //format. Create a polymorphism table. Calculate 13 | //Tajima's D for the whole table. Then, run a sliding 14 | //window of 1 segregating site (with a jump size of 1) 15 | //along the SNP table, and use that to calculate Tajima's 16 | //D for each site. 17 | 18 | //This is a somewhat contrived example, but it illustrates 19 | //the sliding window code. 20 | 21 | int main(int argc, char **argv) 22 | { 23 | const char * infilename = argv[1]; 24 | std::vector data; 25 | Sequence::Alignment::GetData(data,infilename); 26 | 27 | if ( Sequence::Alignment::IsAlignment(data) && 28 | Sequence::Alignment::validForPolyAnalysis(data.begin(),data.end()) ) 29 | { 30 | Sequence::PolySites SNPtable(data); 31 | if (! SNPtable.empty()) 32 | { 33 | Sequence::PolySNP analyzeRegion(&SNPtable); 34 | std::cout << "Tajima's D for the region is: "<< analyzeRegion.TajimasD() << std::endl; 35 | 36 | Sequence::PolyTableSlice windows(SNPtable.sbegin(), 37 | SNPtable.send(),1u,1u); 38 | Sequence::PolyTableSlice::const_iterator itr = windows.cbegin(); 39 | while(itr < windows.cend()) 40 | { 41 | Sequence::PolySites window = windows.get_slice(itr); 42 | Sequence::PolySNP analyzeWindow(&window); 43 | std::cout << "D for window " 44 | << itr-windows.cbegin() 45 | << " is: " 46 | << analyzeWindow.TajimasD() 47 | << std::endl; 48 | ++itr; 49 | } 50 | } 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /examples/slidingWindow2.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | //these are made explicit for example purposes 10 | using std::vector; 11 | using std::cout; 12 | using std::endl; 13 | 14 | //run a non-overlapping 100bp window over a SNP data set 15 | 16 | int main(int argc, char **argv) 17 | { 18 | const char *infilename = argv[1]; 19 | 20 | vector data; 21 | 22 | Sequence::Alignment::GetData(data,infilename); 23 | 24 | if ( Sequence::Alignment::IsAlignment(data) && 25 | Sequence::Alignment::validForPolyAnalysis(data.begin(),data.end()) ) 26 | { 27 | const unsigned alignmentLength = data[0].length(); 28 | 29 | Sequence::PolySites SNPtable(data); 30 | 31 | Sequence::PolySNP analyzeRegion(&SNPtable); 32 | 33 | cout << "Tajima's D for the whole dataset is: " 34 | << analyzeRegion.TajimasD() 35 | << endl; 36 | 37 | Sequence::PolyTableSlice windows(SNPtable.sbegin(), 38 | SNPtable.send(), 39 | 100, //window length (bp) 40 | 100, //step size (bp) 41 | alignmentLength); 42 | 43 | for(unsigned i = 0 ; i < windows.size() ; ++i) 44 | { 45 | Sequence::PolySites window(windows[i]); //use copy constructor 46 | Sequence::PolySNP analyzeWindow(&window); 47 | cout << "Tajima's D for window " 48 | << i 49 | << " is: " 50 | << analyzeWindow.TajimasD() 51 | << endl; 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /examples/test_SimDataIO.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | using namespace std; 6 | using namespace Sequence; 7 | 8 | void print_problems( const SimData & d, 9 | const SimData & d2 ); 10 | 11 | int main( int argc, char ** argv ) 12 | { 13 | SimData d; 14 | while(!cin.eof()) 15 | { 16 | cin >> d >> ws; 17 | 18 | cerr << "Writing in gzip\n"; 19 | gzFile gzf = gzopen("test_zlib_out.gz","w"); 20 | write_SimData_gz(gzf, d); 21 | gzclose(gzf); 22 | 23 | cerr << "Reading from gzip\n"; 24 | //now, try to read it 25 | gzf = gzopen("test_zlib_out.gz","r"); 26 | SimData d2 = read_SimData_gz(gzf); 27 | gzclose(gzf); 28 | 29 | cerr << "Writing in binary\n"; 30 | //write it in binary 31 | ofstream obin("test_binary_out.bin",ios::binary); 32 | write_SimData_binary(obin,d); 33 | obin.close(); 34 | 35 | cerr << "Reading from binary\n"; 36 | //read it 37 | ifstream ibin("test_binary_out.bin",ios::binary); 38 | SimData d3 = read_SimData_binary(ibin); 39 | ibin.close(); 40 | 41 | cerr << "Writing to binary + gzip\n"; 42 | gzf = gzopen("test_zlib_out.bin.gz","bw"); 43 | write_SimData_gz(gzf, d, true); 44 | gzclose(gzf); 45 | 46 | cerr << "Reading from binary + gzip\n"; 47 | //now, try to read it 48 | gzf = gzopen("test_zlib_out.bin.gz","r"); 49 | SimData d4 = read_SimData_gz(gzf,true); 50 | gzclose(gzf); 51 | 52 | if( d != d2 ) 53 | { 54 | cerr << "Error: d != d2\n"; 55 | print_problems(d,d2); 56 | } 57 | if(d != d3) 58 | { 59 | cerr << "Error: d != d3\n"; 60 | print_problems(d,d3); 61 | } 62 | if(d != d4) 63 | { 64 | cerr << "Error: d != d4\n"; 65 | print_problems(d,d4); 66 | } 67 | } 68 | } 69 | 70 | void print_problems( const SimData & d, 71 | const SimData & d2 ) 72 | { 73 | for( unsigned i = 0 ; i < d.numsites() ; ++i ) 74 | { 75 | if( d.position(i) != d2.position(i) ) 76 | { 77 | cerr << "Position " << i << ": " << d.position(i) << ' ' << d2.position(i) << '\n'; 78 | } 79 | for( unsigned i = 0 ; i < d.size() ; ++i ) 80 | { 81 | if( d[i] != d2[i] ) 82 | { 83 | cerr << "Haplotype " << i << ": " << d[i] << "\n\n" << d2[i] << '\n'; 84 | } 85 | } 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /examples/translateTest.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | const char alphabet[4] = {'A','G','C','T'}; 6 | 7 | int main(int argc, char **argv) 8 | { 9 | std::string codon; 10 | codon.resize(3); 11 | for (unsigned first = 0 ; first < 4 ; ++first) 12 | { 13 | for (unsigned second = 0 ; second < 4 ; ++second) 14 | { 15 | for (unsigned third = 0 ; third < 4 ; ++third) 16 | { 17 | codon[0] = alphabet[first]; 18 | codon[1] = alphabet[second]; 19 | codon[2] = alphabet[third]; 20 | std::cout << codon 21 | << '\t' 22 | << Sequence::Translate(codon.begin(),codon.end()) 23 | << std::endl; 24 | } 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /examples/valid_dna.cc: -------------------------------------------------------------------------------- 1 | /*! \include valid_dna.cc */ 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | int main(int argc, char **argv) 10 | { 11 | std::ifstream in(argv[1]); 12 | Sequence::Fasta seq; 13 | while (in >> seq) 14 | { 15 | std::cout << Sequence::validSeq(seq.begin(),seq.end()) 16 | << '\t' 17 | << Sequence::validSeq(seq.begin(),seq.end(),Sequence::full_dna_alphabet) 18 | << '\t' 19 | << (std::find_if(seq.begin(),seq.end(),Sequence::ambiguousNucleotide()) 20 | != seq.end()) 21 | <<'\n'; 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /index.md: -------------------------------------------------------------------------------- 1 | # The libsequence home page 2 | 3 | This is the homepage for __libsequence__, which is a C++ library for evolutionary genetics. 4 | 5 | ## Versions 6 | 7 | The current "master" branch of the library is 1.8.3. This version represents a major change to the code base. The biggest change is that the library no longer requires [boost](http://www.boost.org) or the [GSL](http://gnu.org/software/gsl) libraries. It still requires [zlib](http://zlib.net). See the Revision History section in the README, which you can find at the [GitHub repo](https://github.com/molpopgen/libsequence) for the project. 8 | 9 | ## Source code 10 | 11 | The library code is [here](https://github.com/molpopgen/libsequence) 12 | 13 | ## Library documentation 14 | 15 | [Reference manual](doc/html/index.html) 16 | 17 | ## Programs using the library 18 | 19 | There are lots of these: 20 | 21 | * [Thornton lab software page](http://molpopgen.org/software.html) 22 | * [Kevin Thornton](https://github.com/molpopgen)'s GitHub account 23 | * [Thornton lab](https://github.com/ThorntonLab) GitHub account 24 | 25 | Some specific projects using the library 26 | 27 | * [pylibseq](http://molpopgen.github.io/pylibseq) - A Python interface to libsequence. 28 | * [fwdpp](http://molpopgen.github.io/fwdpp) - Forward simulation library described in Thornton (2014, doi: 10.1534/genetics.114.165019) 29 | * [pecnv](https://guthub.com/molpopgen/pecnv) - code from Rogers et al. (2014, doi: 10.1093/molbev/msu124) and Cridland et al (2013, doi: 10.1093/molbev/mst129) for detecting structural variants and transposable element insertions 30 | * [baldwin_brown_2014](https://github.com/molpopgen/baldwin_brown_2014) - Code from Baldwin-Brown et al. (2014, doi: 10.1093/molbev/msu048) for simulating "evolve and resequence" experiments 31 | * [analysis](https://github.com/molpopgen/analysis) 32 | * [sequtils](https://github.com/molpopgen/sequtils) 33 | * [msstats](https://github.com/molpopgen/msstats) 34 | * [sweepsims](https://github.com/molpopgen/sweepsims) - selective sweep code from Jensen et al. (2008, doi: 10.1371/journal.pgen.1000198), Thornton and Jensen 2007, doi: 10.1534/genetics.106.064642) 35 | * [newgene](https://github.com/molpopgen/newgene) - copy-number variant simulation code from Thornton (2007, doi: 10.1534/genetics.107.074948) 36 | 37 | ## Citation 38 | 39 | If you use __libsequence__ for yor research, or programs depending up on it, please cite the following paper: 40 | 41 | * Thornton, K. (2003) libsequence, a C++ class library for evolutionary genetic analysis. Bioinformatics __19__(17): 2325-2327 PMID 14630667 [Manuscript](http://bioinformatics.oxfordjournals.org/content/19/17/2325.short), [Software](https://github.com/molpopgen/libsequence) 42 | 43 | -------------------------------------------------------------------------------- /init_autotools.sh: -------------------------------------------------------------------------------- 1 | #!sh 2 | libtoolize --force --copy 3 | autoreconf -fi 4 | autoheader 5 | automake --add-missing --copy -------------------------------------------------------------------------------- /m4/ltversion.m4: -------------------------------------------------------------------------------- 1 | # ltversion.m4 -- version numbers -*- Autoconf -*- 2 | # 3 | # Copyright (C) 2004 Free Software Foundation, Inc. 4 | # Written by Scott James Remnant, 2004 5 | # 6 | # This file is free software; the Free Software Foundation gives 7 | # unlimited permission to copy and/or distribute it, with or without 8 | # modifications, as long as this notice is preserved. 9 | 10 | # @configure_input@ 11 | 12 | # serial 3337 ltversion.m4 13 | # This file is part of GNU Libtool 14 | 15 | m4_define([LT_PACKAGE_VERSION], [2.4.2]) 16 | m4_define([LT_PACKAGE_REVISION], [1.3337]) 17 | 18 | AC_DEFUN([LTVERSION_VERSION], 19 | [macro_version='2.4.2' 20 | macro_revision='1.3337' 21 | _LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?]) 22 | _LT_DECL(, macro_revision, 0) 23 | ]) 24 | -------------------------------------------------------------------------------- /src/Coalescent/CoalescentMutation.cc: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | #include 25 | 26 | namespace Sequence 27 | { 28 | namespace coalsim { 29 | void output_gametes(FILE * fp, 30 | const unsigned & segsites, 31 | const unsigned & nsam, 32 | const gamete_storage_type & gametes) 33 | /*! 34 | @brief Write an object of type gamete_storage type to a C-style file stream 35 | This function is used when you need to output simulated gametes using a 36 | method faster than the operator<< for class SimData. 37 | \param fp pointer to an open C-style output stream 38 | \param segsites the number of segregating sites in \a gametes 39 | \param nsam the number of individuals in \a gametes 40 | \param gametes the simulated sample. Must be allocated to hold at least 41 | \a segsites positions, and \a nsam strings of length \a segsites 42 | */ 43 | { 44 | fprintf(fp,"//\n"); 45 | if ( segsites > 0 ) 46 | { 47 | fprintf(fp,"segsites: %u\npositions: ",segsites); 48 | for(unsigned i=0;i. 21 | 22 | */ 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | namespace Sequence 32 | { 33 | bool containsCharacter(const PolyTable * t, 34 | const char ch) 35 | { 36 | for( PolyTable::const_data_iterator itr = t->begin() ; 37 | itr < t->end() ; 38 | ++itr ) 39 | { 40 | if ( itr->find(ch) != std::string::npos ) 41 | { 42 | return true; 43 | } 44 | } 45 | return false; 46 | } 47 | 48 | bool polyTableValid(const PolyTable * table) 49 | { 50 | for ( PolyTable::const_data_iterator itr = table->begin() ; 51 | itr < table->end() ; 52 | ++itr ) 53 | { 54 | if ( (std::find_if(itr->begin(),itr->end(),invalidPolyChar()) != itr->end()) 55 | || ( itr->length() != table->numsites() ) ) 56 | { 57 | return false; 58 | } 59 | } 60 | return true; 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/PolyTableManip.cc: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | #include 25 | #include 26 | 27 | namespace Sequence 28 | { 29 | polySiteVector rotatePolyTable(const Sequence::PolyTable *data) 30 | /*! 31 | Rotate a polymorphism table 32 | into a vector of pairs, where the 33 | pairs are of type std::pair, 34 | representing the site position and the characters 35 | at that site 36 | \param data a pointer to a Sequence::PolyTable 37 | \ingroup polytables 38 | */ 39 | { 40 | polySiteVector L; 41 | for (unsigned i = 0 ; i < data->numsites() ; ++i) 42 | { 43 | std::string s; 44 | for(unsigned j = 0 ; j < data->size() ; ++j) 45 | { 46 | s += (*data)[j][i]; 47 | } 48 | L.push_back( polymorphicSite(data->position(i), s) ); 49 | } 50 | return L; 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/Seq/Fasta.cc: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | namespace Sequence 30 | { 31 | Fasta::Fasta() : Seq() {} 32 | 33 | Fasta::Fasta (const Seq & seq) : Seq(seq) 34 | /*! copy constructor */ 35 | {} 36 | 37 | Fasta::Fasta( Seq && seq ) : Seq(std::move(seq)) 38 | { 39 | } 40 | 41 | std::istream & Fasta::read (std::istream & stream) 42 | { 43 | name.clear(); 44 | seq.clear(); 45 | std::string temp; 46 | int ch = stream.peek(); 47 | if( stream.eof() ) { return stream; } 48 | if (char(ch) != '>') 49 | { 50 | throw std::runtime_error("Fasta.cc: error, file not in FASTA format"); 51 | } 52 | //Read in name 53 | //stream >> ch >> std::ws; 54 | ch = stream.get(); 55 | std::getline(stream,name); 56 | stream >> std::ws; 57 | seq.reserve(1000); 58 | while( char( ch = stream.peek() ) != '>' && ! stream.eof() ) 59 | { 60 | std::getline(stream,temp); 61 | seq += temp; 62 | } 63 | return (stream); 64 | } 65 | 66 | std::ostream & Fasta::print (std::ostream & stream) const 67 | { 68 | stream << '>' 69 | << name 70 | << '\n' 71 | << seq; 72 | return stream; 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/Seq/fastq.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | namespace Sequence 6 | { 7 | fastq::fastq(void) : Seq(), quality(std::string()), repeat_name(true) {} 8 | 9 | fastq::fastq(const std::string &name, const std::string &seq, 10 | const std::string &qual) 11 | : Seq(name, seq), quality(qual), repeat_name(true) 12 | { 13 | } 14 | 15 | fastq::fastq(std::string &&name, std::string &&seq, std::string &&qual) 16 | : Seq(std::move(name), std::move(seq)), quality(std::move(qual)), 17 | repeat_name(true) 18 | { 19 | } 20 | 21 | fastq::fastq(const Seq &s) 22 | : Seq(s.name, s.seq), quality(std::string()), repeat_name(true) 23 | { 24 | } 25 | 26 | fastq::fastq(Seq &&s) 27 | : Seq(std::move(s)), quality(std::string()), repeat_name(true) 28 | { 29 | } 30 | 31 | void 32 | fastq::repname(const bool &b) 33 | { 34 | repeat_name = b; 35 | } 36 | 37 | std::istream & 38 | fastq::read(std::istream &stream) 39 | { 40 | if (stream.peek() == EOF) 41 | return stream; 42 | if (char(stream.peek()) != '@') 43 | throw std::runtime_error("Sequence::fastq::read - error: record " 44 | "did not begin with \'@\'"); 45 | std::string temp; 46 | stream.ignore(1, '@'); 47 | std::getline(stream, name); 48 | std::getline(stream, seq); 49 | stream >> std::ws; 50 | if (char(stream.peek()) != '+') 51 | throw std::runtime_error("Sequence::fastq::read - error: third " 52 | "line did not begin with \'+\'"); 53 | stream >> temp >> std::ws; 54 | if (temp.size() == 1) 55 | repeat_name = false; 56 | quality.resize(seq.length()); 57 | stream.read(&quality[0], std::streamsize(seq.length())); 58 | stream >> std::ws; 59 | if (seq.length() != quality.length()) 60 | throw std::runtime_error("Sequence::fastq::read - error: sequence " 61 | "and quality strings differ in length"); 62 | return stream; 63 | } 64 | 65 | std::ostream & 66 | fastq::print(std::ostream &stream) const 67 | { 68 | stream << '@' << name << '\n' << seq << '\n' << '+'; 69 | if (this->repeat_name) 70 | { 71 | stream << name; 72 | } 73 | stream << '\n' << quality; 74 | return stream; 75 | } 76 | } //ns Sequence 77 | -------------------------------------------------------------------------------- /src/SeqAlphabets.cc: -------------------------------------------------------------------------------- 1 | //! \file src/SeqAlphabets.cc 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | namespace Sequence { 8 | const alphabet_t dna_alphabet{ {'A','C','G','T', 9 | 'R','Y','S','W', 10 | 'K','M','B','D', 11 | 'H','V','N','-'} }; 12 | 13 | const alphabet_t dna_poly_alphabet{ {'A','C','G','T', //0-3 14 | '0','1','-','N', //4-7 15 | '\0', //8 16 | } }; 17 | 18 | const alphabet_t::size_type NOTPOLYCHAR = dna_poly_alphabet.size(); 19 | 20 | const alphabet_t::size_type POLYEOS = alphabet_t::size_type( std::distance(dna_poly_alphabet.begin(), 21 | std::find(dna_poly_alphabet.begin(), 22 | dna_poly_alphabet.end(), 23 | '\0') 24 | ) ); 25 | bool isDNA( const char & ch) 26 | { 27 | return std::find( dna_alphabet.begin(), 28 | dna_alphabet.end(), 29 | std::toupper(ch) ) != dna_alphabet.end(); 30 | } 31 | 32 | bool ambiguousNucleotide::operator()(const char & c) const 33 | { 34 | return std::distance( dna_alphabet.begin(), 35 | std::find(dna_alphabet.begin(), 36 | dna_alphabet.end(), 37 | std::toupper(c)) ) > 3; 38 | /* 39 | const char ch = char(std::toupper(c)); 40 | return (ch != 'A' && 41 | ch != 'G' && 42 | ch != 'T' && 43 | ch != 'C' ); 44 | */ 45 | } 46 | 47 | bool invalidPolyChar::operator()(const char & nucleotide) const 48 | { 49 | auto itr = std::find(dna_poly_alphabet.begin(), 50 | dna_poly_alphabet.end(), 51 | std::toupper(nucleotide)); 52 | if(itr == dna_poly_alphabet.end()) return 1; 53 | auto d = std::distance( dna_alphabet.begin(), 54 | itr ); 55 | return ( d > 3 && d < 14 ); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/SeqConstants.cc: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | #include 25 | #include 26 | 27 | namespace Sequence 28 | { 29 | /*! \var const unsigned SEQMAXUNSIGNED 30 | The maximum value of an unsinged integer. 31 | */ 32 | const unsigned SEQMAXUNSIGNED = std::numeric_limits::max(); 33 | /*! \var const unsigned SEQMAXDOUBLE 34 | The maximum value of an double 35 | */ 36 | const double SEQMAXDOUBLE = std::numeric_limits::max(); 37 | } 38 | -------------------------------------------------------------------------------- /src/Unweighted.cc: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | #include 25 | #include 26 | 27 | namespace Sequence 28 | { 29 | WeightingScheme2::weights2_t Unweighted2::operator()(const std::string &, const std::string &,Sequence::GeneticCodes) const 30 | /*! 31 | Calculate actually calculates the weights for each branch 32 | \param codon1 a std::string of length 3 representing a sense codon 33 | \param codon2 a std::string of length 3 representing a sense codon 34 | */ 35 | { 36 | return weights2_t({{1.,1.}}); 37 | } 38 | 39 | WeightingScheme3::weights3_t Unweighted3::operator()(const std::string &, const std::string &,Sequence::GeneticCodes ) const 40 | /*! 41 | Calculate actually calculates the weights for each branch 42 | \param codon1 a std::string of length 3 representing a sense codon 43 | \param codon2 a std::string of length 3 representing a sense codon 44 | */ 45 | { 46 | return weights3_t({{1.,1.,1.,1.,1.,1.}}); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/libsequenceConfig.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | using namespace std; 7 | 8 | //From config.h 9 | static const std::string LIBSEQ_VERSION(VERSION); 10 | 11 | int main(int argc, char ** argv) 12 | { 13 | if(argc==1) 14 | { 15 | cerr << "usage:\n" 16 | << "\t--version\tPrint out version number and exit\n"; 17 | exit(EXIT_SUCCESS); 18 | } 19 | 20 | string av1(argv[1]); 21 | if( av1 == "--version" ) cout << LIBSEQ_VERSION << '\n'; 22 | 23 | exit(EXIT_SUCCESS); 24 | } 25 | -------------------------------------------------------------------------------- /src/polySiteVector.cc: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | #include 24 | #include 25 | 26 | 27 | namespace Sequence 28 | { 29 | polySiteVector make_polySiteVector(const Sequence::PolyTable & data) 30 | /*! 31 | Rotate a polymorphism table 32 | into a vector of pairs, where the 33 | pairs are of type std::pair, 34 | representing the site position and the characters 35 | at that site 36 | \param data a pointer to a Sequence::PolyTable 37 | \ingroup polytables 38 | */ 39 | { 40 | polySiteVector L; 41 | for (unsigned i = 0 ; i < data.numsites() ; ++i) 42 | { 43 | std::string s; 44 | for(unsigned j = 0 ; j < data.size() ; ++j) 45 | { 46 | s += data[j][i]; 47 | } 48 | L.emplace_back( polymorphicSite(data.position(i), s)); 49 | } 50 | return L; 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/summstats/algorithm.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SEQUENCE_SUMMSTATS_ALGORITHM 2 | #define SEQUENCE_SUMMSTATS_ALGORITHM 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace Sequence 9 | { 10 | namespace summstats_algo 11 | { 12 | template 13 | inline std::pair 14 | mismatch_skip_missing(iterator beg, iterator end, iterator beg2) 15 | { 16 | auto m = std::mismatch(beg, end, beg2); 17 | while (m.first < end && (*m.first < 0 || *m.second < 0)) 18 | { 19 | m = std::mismatch(m.first + 1, end, m.second + 1); 20 | } 21 | return m; 22 | } 23 | 24 | template 25 | inline std::int32_t 26 | ndiff_skip_missing(iterator beg, iterator end, iterator beg2) 27 | { 28 | std::int32_t ndiffs = 0; 29 | auto m = mismatch_skip_missing(beg, end, beg2); 30 | 31 | while (m.first < end) 32 | { 33 | ++ndiffs; 34 | m = mismatch_skip_missing(m.first + 1, end, m.second + 1); 35 | } 36 | return ndiffs; 37 | } 38 | } // namespace summstats_algo 39 | } // namespace Sequence 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /src/summstats/auxillary.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace Sequence 5 | { 6 | namespace summstats_aux 7 | { 8 | double 9 | a_sub_n(const std::uint32_t nsam) 10 | { 11 | double rv = 0.0; 12 | for (std::uint32_t i = 1; i < nsam; ++i) 13 | { 14 | rv += 1.0 / static_cast(i); 15 | } 16 | return rv; 17 | } 18 | 19 | double 20 | b_sub_n(const std::uint32_t nsam) 21 | { 22 | double rv = 0.0; 23 | for (std::uint32_t i = 1; i < nsam; ++i) 24 | { 25 | rv += 1.0 / std::pow(static_cast(i), 2.0); 26 | } 27 | return rv; 28 | } 29 | 30 | double 31 | b_sub_n_plus1(const std::uint32_t nsam) 32 | { 33 | double rv = 0.0; 34 | for (std::uint32_t i = 1; i < nsam + 1; ++i) 35 | { 36 | rv += 1.0 / std::pow(static_cast(i), 2.0); 37 | } 38 | return rv; 39 | } 40 | } // namespace summstats_aux 41 | } // namespace Sequence 42 | -------------------------------------------------------------------------------- /src/summstats/garud.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | namespace Sequence 12 | { 13 | GarudStats::GarudStats() 14 | : H1(1.), H12(std::numeric_limits::quiet_NaN()), 15 | H2H1(std::numeric_limits::quiet_NaN()) 16 | { 17 | } 18 | 19 | GarudStats::GarudStats(const double __h1, const double __h12, 20 | const double __h2h1) 21 | : H1(__h1), H12(__h12), H2H1(__h2h1) 22 | { 23 | } 24 | 25 | GarudStats 26 | garud_statistics(const VariantMatrix& m) 27 | { 28 | GarudStats rv; 29 | if (m.empty() || !m.nsam()) 30 | { 31 | return rv; 32 | } 33 | // Although one of the stats is haplotype diversity, 34 | // w 35 | auto labels = label_haplotypes(m); 36 | std::unordered_map counts; 37 | std::size_t nmissing = 0; 38 | for (auto l : labels) 39 | { 40 | if (l < 0) 41 | { 42 | ++nmissing; 43 | } 44 | else 45 | { 46 | counts[l]++; 47 | } 48 | } 49 | if (counts.size() < 2) 50 | { 51 | return rv; 52 | } 53 | rv.H1 = 1.0 - diversity_from_counts(counts, m.nsam() - nmissing); 54 | std::vector> vcounts( 55 | counts.begin(), counts.end()); 56 | std::sort(vcounts.begin(), vcounts.end(), 57 | [](const std::pair& a, 58 | const std::pair& b) { 59 | return a.second > b.second; 60 | }); 61 | double nsam 62 | = static_cast(m.nsam()) - static_cast(nmissing); 63 | rv.H12 = rv.H1 64 | + 2. * static_cast(vcounts[0].second) 65 | * static_cast(vcounts[1].second) 66 | / (nsam * (nsam - 1.0)); 67 | rv.H2H1 = (rv.H1 68 | - static_cast(vcounts[0].second 69 | * (vcounts[0].second - 1)) 70 | / (nsam * (nsam - 1))) 71 | / rv.H1; 72 | return rv; 73 | } 74 | } // namespace Sequence 75 | -------------------------------------------------------------------------------- /src/summstats/generic.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | namespace Sequence 8 | { 9 | double 10 | diversity_from_counts( 11 | const std::unordered_map& counts, 12 | const std::size_t nsam) 13 | { 14 | if (counts.empty() || !nsam) 15 | { 16 | return std::numeric_limits::quiet_NaN(); 17 | } 18 | double hom = 0.0; 19 | for (auto&& c : counts) 20 | { 21 | hom += static_cast(c.second * (c.second - 1)); 22 | } 23 | hom /= static_cast(nsam * (nsam - 1)); 24 | return 1.0 - hom; 25 | } 26 | } // namespace Sequence 27 | -------------------------------------------------------------------------------- /src/summstats/ld.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace Sequence 9 | { 10 | TwoLocusCounts::TwoLocusCounts(std::int8_t i_, std::int8_t j_, int n_) 11 | : i{ i_ }, j{ j_ }, n{ n_ } 12 | { 13 | } 14 | 15 | std::vector 16 | two_locus_haplotype_counts(const VariantMatrix& m, std::size_t sitei, 17 | const std::size_t sitej, 18 | const bool skip_missing) 19 | { 20 | auto ri = get_ConstRowView(m, sitei); 21 | auto rj = get_ConstRowView(m, sitej); 22 | std::vector rv; 23 | for (auto i = ri.begin(), j = rj.begin(); i < ri.end(); ++i, ++j) 24 | { 25 | if (!skip_missing || ((*i >= 0 || *j >= 0) && skip_missing)) 26 | { 27 | auto exists 28 | = std::find_if(rv.begin(), rv.end(), 29 | [i, j](const TwoLocusCounts& t) { 30 | return t.i == *i && t.j == *j; 31 | }); 32 | if (exists == rv.end()) 33 | { 34 | rv.emplace_back(*i, *j, 1); 35 | } 36 | else 37 | { 38 | exists->n++; 39 | } 40 | } 41 | } 42 | return rv; 43 | } 44 | } // namespace Sequence 45 | -------------------------------------------------------------------------------- /src/summstats/lhaf.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace Sequence 9 | { 10 | std::vector 11 | lhaf(const VariantMatrix &m, const std::int8_t refstate, const double l) 12 | { 13 | std::vector dcounts; 14 | dcounts.reserve(m.nsites()); 15 | const auto find_nonref = [refstate](const std::int8_t x) { 16 | return x != refstate && !(x < 0); 17 | }; 18 | for (std::size_t i = 0; i < m.nsites(); ++i) 19 | { 20 | auto r = get_ConstRowView(m, i); 21 | dcounts.push_back( 22 | std::count_if(r.begin(), r.end(), find_nonref)); 23 | } 24 | 25 | // Get the values for each element in the data 26 | std::vector rv; 27 | rv.reserve(m.nsam()); 28 | for (std::size_t i = 0; i < m.nsam(); ++i) 29 | { 30 | auto c = get_ConstColView(m, i); 31 | auto j = std::find_if(c.cbegin(), c.cend(), find_nonref); 32 | double score = 0.0; 33 | while (j != c.cend()) 34 | { 35 | size_t d2 = static_cast( 36 | std::distance(c.cbegin(), j)); 37 | score += std::pow(static_cast(dcounts[d2]), l); 38 | j = std::find_if(j + 1, c.cend(), find_nonref); 39 | } 40 | rv.push_back(score); 41 | } 42 | return rv; 43 | } 44 | } // namespace Sequence 45 | -------------------------------------------------------------------------------- /src/summstats/nsl_common.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SEQUENCE_SUMMSTATS_NSL_COMMON_HPP 2 | #define SEQUENCE_SUMMSTATS_NSL_COMMON_HPP 3 | 4 | // These functions are not exported. 5 | // They are used internally. 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | namespace Sequence 15 | { 16 | namespace summstats_details 17 | { 18 | struct suffix_edges 19 | { 20 | std::int64_t left, right; 21 | suffix_edges() : left(-1), right(-1) {} 22 | }; 23 | 24 | static void 25 | update_counts(double nsl_values[2], double ihs_values[2], 26 | int counts[2], const std::size_t nsites, 27 | // NOTE: code smell here -- dangerous 28 | const double * positions, 29 | const std::size_t index, const std::int64_t left, 30 | const std::int64_t right) 31 | { 32 | if (left >= 0 && static_cast(right) < nsites) 33 | //Then there are SNPs differentiating 34 | //i and j within the region 35 | { 36 | nsl_values[index] += static_cast(right - left); 37 | //TODO: check if we need to add one? 38 | ihs_values[index] 39 | += positions[static_cast(right)] 40 | - positions[static_cast(left)]; 41 | counts[index]++; 42 | } 43 | } 44 | 45 | inline nSLiHS 46 | get_stat(const ConstRowView& core_view, const std::int8_t refstate, 47 | const double nsl_values[2], const double ihs_values[2], 48 | const int counts[2]) 49 | { 50 | 51 | double nSL_den = nsl_values[0] / static_cast(counts[0]); 52 | double nSL_num = nsl_values[1] / static_cast(counts[1]); 53 | double iHS_den = ihs_values[0] / static_cast(counts[0]); 54 | double iHS_num = ihs_values[1] / static_cast(counts[1]); 55 | auto nonrefcount = static_cast( 56 | std::count_if(core_view.begin(), core_view.end(), 57 | [refstate](const std::int8_t i) { 58 | return i >= 0 && i != refstate; 59 | })); 60 | return nSLiHS{ std::log(nSL_num) - std::log(nSL_den), 61 | std::log(iHS_num) - std::log(iHS_den), 62 | nonrefcount }; 63 | } 64 | } // namespace summstats_details 65 | } // namespace Sequence 66 | 67 | #endif 68 | -------------------------------------------------------------------------------- /src/summstats/nvariablesites.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | namespace Sequence 6 | { 7 | std::uint32_t 8 | nvariable_sites(const AlleleCountMatrix& m) 9 | { 10 | std::uint32_t nv = 0; 11 | for (std::size_t site = 0; site < m.nrow; ++site) 12 | { 13 | auto r = m.row(site); 14 | auto nstates 15 | = std::count_if(r.first, r.second, 16 | [](const AlleleCountMatrix::value_type c) { 17 | return c > 0; 18 | }); 19 | if (nstates > 1) 20 | { 21 | ++nv; 22 | } 23 | } 24 | return nv; 25 | } 26 | 27 | std::uint32_t 28 | nbiallelic_sites(const AlleleCountMatrix& m) 29 | { 30 | std::uint32_t nv = 0; 31 | for (std::size_t site = 0; site < m.nrow; ++site) 32 | { 33 | auto r = m.row(site); 34 | auto nstates 35 | = std::count_if(r.first, r.second, 36 | [](const AlleleCountMatrix::value_type c) { 37 | return c > 0; 38 | }); 39 | if (nstates == 2) 40 | { 41 | ++nv; 42 | } 43 | } 44 | return nv; 45 | } 46 | 47 | std::uint32_t 48 | total_number_of_mutations(const AlleleCountMatrix& m) 49 | { 50 | std::uint32_t nv = 0; 51 | for (std::size_t site = 0; site < m.nrow; ++site) 52 | { 53 | auto r = m.row(site); 54 | auto nstates 55 | = std::count_if(r.first, r.second, 56 | [](const AlleleCountMatrix::value_type c) { 57 | return c > 0; 58 | }); 59 | if (nstates > 1) 60 | { 61 | nv += static_cast(nstates) - 1; 62 | } 63 | } 64 | return nv; 65 | } 66 | } // namespace Sequence 67 | -------------------------------------------------------------------------------- /src/summstats/rmin.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace Sequence 9 | { 10 | std::int32_t 11 | rmin(const VariantMatrix& m) 12 | { 13 | if (m.nsites() < 2) 14 | { 15 | return -1; 16 | } 17 | Sequence::AlleleCountMatrix acm(m); 18 | auto ac = allele_counts(acm); 19 | std::vector biallelic_site_indexes; 20 | for (std::size_t i = 0; i < ac.size(); ++i) 21 | { 22 | if (ac[i].nstates == 2) 23 | { 24 | biallelic_site_indexes.push_back(i); 25 | } 26 | } 27 | if (biallelic_site_indexes.size() < 2) 28 | { 29 | return 0; 30 | } 31 | bool flag = false; 32 | std::size_t x = 0; 33 | std::int32_t rv = 0; 34 | for (std::size_t a = x + 1; a < biallelic_site_indexes.size(); ++a) 35 | { 36 | for (std::size_t b = (!flag) ? x : a - 1; b < a; ++b) 37 | { 38 | flag = false; 39 | // We do not allow missing data to result in 40 | // additional haplotypes 41 | auto tl = two_locus_haplotype_counts(m, a, b, true); 42 | if (tl.size() == 4) 43 | { 44 | ++rv; 45 | flag = true; 46 | break; 47 | } 48 | } 49 | if (flag == true) 50 | { 51 | x = a; 52 | } 53 | } 54 | return rv; 55 | } 56 | } // namespace Sequence 57 | -------------------------------------------------------------------------------- /src/summstats/tajd.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | namespace Sequence 7 | { 8 | double 9 | tajd(const AlleleCountMatrix& ac) 10 | { 11 | double pi = 0.0; 12 | int S = 0; 13 | std::int32_t max_nsam = 0; 14 | for (std::size_t i = 0; i < ac.counts.size(); i += ac.ncol) 15 | { 16 | std::int32_t nsam = 0; 17 | double homozygosity = 0.0; 18 | int nstates = 0; 19 | for (std::size_t j = i; j < i + ac.ncol; ++j) 20 | { 21 | if (ac.counts[j] > 0) 22 | { 23 | ++nstates; 24 | nsam += ac.counts[j]; 25 | homozygosity += static_cast( 26 | ac.counts[j] * (ac.counts[j] - 1)); 27 | } 28 | } 29 | 30 | if (nstates) 31 | { 32 | max_nsam = std::max(max_nsam, nsam); 33 | S += nstates - 1; 34 | pi += 1.0 35 | - homozygosity 36 | / static_cast(nsam * (nsam - 1)); 37 | } 38 | } 39 | if (!S) 40 | { 41 | return std::numeric_limits::quiet_NaN(); 42 | } 43 | auto a1 = summstats_aux::a_sub_n(static_cast(max_nsam)); 44 | double w = static_cast(S) / a1; 45 | auto a2 = summstats_aux::b_sub_n(static_cast(max_nsam)); 46 | auto dn = static_cast(max_nsam); 47 | double b1 = (dn + 1.0) / (3.0 * (dn - 1.0)); 48 | double b2 49 | = (2.0 * (std::pow(dn, 2.0) + dn + 3.0)) / (9.0 * dn * (dn - 1.0)); 50 | double c1 = b1 - 1.0 / a1; 51 | double c2 = b2 - (dn + 2.0) / (a1 * dn) + a2 / std::pow(a1, 2.0); 52 | double e1 = c1 / a1; 53 | double e2 = c2 / (std::pow(a1, 2.0) + a2); 54 | double denominator = std::pow((e1 * S + e2 * S * (S - 1.0)), 0.5); 55 | return (pi - w) / denominator; 56 | } 57 | } // namespace Sequence 58 | -------------------------------------------------------------------------------- /src/summstats/thetapi.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace Sequence 5 | { 6 | double 7 | thetapi(const AlleleCountMatrix& ac) 8 | { 9 | double pi = 0.0; 10 | for (std::size_t i = 0; i < ac.counts.size(); i += ac.ncol) 11 | { 12 | std::int32_t nsam = 0; 13 | double homozygosity = 0.0; 14 | for (std::size_t j = i; j < i + ac.ncol; ++j) 15 | { 16 | nsam += ac.counts[j]; 17 | homozygosity += static_cast( 18 | ac.counts[j] * (ac.counts[j] - 1)); 19 | } 20 | pi += 1.0 21 | - homozygosity / static_cast(nsam * (nsam - 1)); 22 | } 23 | return pi; 24 | } 25 | 26 | } // namespace Sequence 27 | -------------------------------------------------------------------------------- /src/summstats/thetaw.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | namespace Sequence 7 | { 8 | double 9 | thetaw(const AlleleCountMatrix& ac) 10 | { 11 | double w = 0.0; 12 | for (std::size_t i = 0; i < ac.counts.size(); i += ac.ncol) 13 | { 14 | std::uint32_t nsam = 0, nstates = 0; 15 | for (std::size_t j = i; j < i + ac.ncol; ++j) 16 | { 17 | if (ac.counts[j] > 0) 18 | { 19 | nsam += static_cast( 20 | ac.counts[j]); 21 | nstates++; 22 | } 23 | } 24 | if (nstates > 1) 25 | { 26 | auto denom = summstats_aux::a_sub_n(nsam); 27 | w += static_cast(nstates - 1) / denom; 28 | } 29 | } 30 | return w; 31 | } 32 | 33 | } // namespace Sequence 34 | -------------------------------------------------------------------------------- /src/summstats_deprecated/Garud.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | namespace Sequence 11 | { 12 | GarudStats 13 | H1H12(const SimData &d) 14 | /*! 15 | H1 is total haplotype homozygosity. 16 | H2 is haplotype homozygosity, combining two most common haplotypes. H2 = 17 | H1 + 2p1p2 18 | H2H1 = H2/H1, where H2 is haplotype homozygosity for all but most common 19 | haplotype. 20 | H2H1 = (H1 - p1^2)/H1 21 | */ 22 | { 23 | if (d.empty()) 24 | return GarudStats(); 25 | set uhaps(d.begin(), d.end()); 26 | vector vuhaps(uhaps.size()); 27 | std::move(uhaps.begin(), uhaps.end(), vuhaps.begin()); 28 | vector hapcounts; 29 | hapcounts.reserve(uhaps.size()); 30 | for(auto & uh : uhaps) 31 | { 32 | hapcounts.push_back(static_cast(std::count(d.begin(),d.end(),uh))); 33 | } 34 | const double denom = static_cast(d.size() * (d.size() - 1)); 35 | double H1 = 0.0; 36 | for(auto c : hapcounts) 37 | { 38 | H1 += c*(c-1.0); 39 | } 40 | H1 /= denom; 41 | 42 | sort(hapcounts.begin(), hapcounts.end(), 43 | std::bind(greater(), std::placeholders::_1, 44 | std::placeholders::_2)); 45 | double H12 = H1 46 | + 2. * hapcounts[0] * hapcounts[1] 47 | / std::pow(double(d.size()), 2.); 48 | double H2H1 = (H1 49 | - double(hapcounts[0] * (hapcounts[0] - 1)) 50 | / double(d.size() * (d.size() - 1))) 51 | / H1; 52 | return GarudStats(H1, H12, H2H1); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/summstats_deprecated/Snn.cc: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu 4 | 5 | Remove the brackets to email me. 6 | 7 | This file is part of libsequence. 8 | 9 | libsequence is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | libsequence is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | long with libsequence. If not, see . 21 | 22 | */ 23 | 24 | #include 25 | 26 | namespace Sequence 27 | { 28 | double Snn_statistic( const unsigned individuals[], 29 | const std::vector< std::vector > & dkj, 30 | const unsigned config[], 31 | const size_t & npop, 32 | const unsigned & nsam) 33 | { 34 | /* 35 | notation for variables follows Hudson's paper 36 | */ 37 | double snn = 0.; 38 | 39 | //store the d_kj for the whole sample 40 | double * d_kj = new double[nsam-1]; 41 | 42 | //store d_kj for within-population comparisons 43 | std::vector d_kj_win; 44 | for(unsigned k=0; kb) 62 | std::swap(a,b); 63 | 64 | double ndiffs = dkj[a][b]; 65 | d_kj[dummy++] = ndiffs; 66 | //figure out what pop j is in; 67 | unsigned pop_j=0,ttl=0; 68 | while (pop_j < npop) 69 | { 70 | ttl += config[pop_j]; 71 | if (j < ttl) 72 | break; 73 | pop_j++; 74 | } 75 | if (pop==pop_j) 76 | d_kj_win.push_back(ndiffs); 77 | } 78 | } 79 | //Calculate T_k 80 | double min = d_kj[0]; 81 | for (unsigned j = 1 ; j < nsam-1 ; ++j) 82 | if (d_kj[j] < min) min = d_kj[j]; 83 | 84 | std::ptrdiff_t T_k = std::count(d_kj,d_kj+(nsam-1),min); 85 | 86 | //Calculate M_k 87 | std::ptrdiff_t M_k = std::count(d_kj_win.begin(), 88 | d_kj_win.end(),min); 89 | snn += double(M_k)/double(T_k); 90 | } 91 | delete [] d_kj; 92 | return snn/double(nsam); 93 | } 94 | 95 | } 96 | -------------------------------------------------------------------------------- /src/summstats_deprecated/SummStats.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | using namespace std; 8 | 9 | namespace Sequence 10 | { 11 | double Dij(const polymorphicSite & p, const std::vector< unsigned > & config, const unsigned & i, const unsigned & j) 12 | { 13 | unsigned rv = 0; 14 | unsigned N = 0; 15 | unsigned start1 = accumulate(config.begin(),config.begin()+i,0u), 16 | start2 = accumulate(config.begin(),config.begin()+j,0u); 17 | for( unsigned x = start1 ; x < start1 + config[i] ; ++x ) 18 | { 19 | for(unsigned y = start2 ; y < start2 + config[j] ; ++y) 20 | { 21 | char ch1 = char(std::toupper(p.second[x])),ch2=char(std::toupper(p.second[y])); 22 | if(ch1 != 'N' && ch2 != 'N') 23 | { 24 | rv += (ch1 != ch2) ? 1u : 0u; 25 | } 26 | else 27 | { 28 | ++N; 29 | } 30 | } 31 | } 32 | return double(rv)/(double(config[i]+config[j]-N)); 33 | } 34 | 35 | double Gmin(const polySiteVector & , const std::vector< unsigned > & ) 36 | { 37 | throw std::runtime_error("not implemented yet"); 38 | unsigned mdxy = numeric_limits::max(); 39 | return mdxy; 40 | } 41 | }//ns Sequence 42 | -------------------------------------------------------------------------------- /src/summstats_deprecated/lHaf.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | namespace Sequence 7 | { 8 | std::vector 9 | lHaf(const SimData &data, const double l) 10 | { 11 | //using range_type = tbb::blocked_range; 12 | //using data_range_type 13 | // = tbb::blocked_range::const_iterator>; 14 | // Get derived mutation frequency counts per site 15 | std::vector dcounts; 16 | dcounts.reserve(data.numsites()); 17 | for (auto i = data.sbegin(); i < data.send(); ++i) 18 | { 19 | dcounts.push_back(static_cast( 20 | std::count(i->second.begin(), i->second.end(), '1'))); 21 | } 22 | // Get the values for each element in the data 23 | std::vector rv; 24 | rv.reserve(data.size()); 25 | for (auto &i : data) 26 | { 27 | auto j 28 | = std::find_if(i.cbegin(), i.cend(), 29 | [](const char &ch) { return ch == '1'; }); 30 | double score = 0.0; 31 | while (j != i.cend()) 32 | { 33 | size_t d2 = size_t(j - i.cbegin()); 34 | score += std::pow(static_cast(dcounts[d2]), l); 35 | j = std::find(j + 1, i.cend(), '1'); 36 | } 37 | rv.push_back(score); 38 | } 39 | return rv; 40 | } 41 | } // namespace Sequence 42 | -------------------------------------------------------------------------------- /src/variant_matrix/AlleleCountMatrix.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | namespace Sequence 6 | { 7 | std::vector 8 | AlleleCountMatrix::init_counts(const VariantMatrix& m) 9 | { 10 | if (m.max_allele() < 0) 11 | { 12 | throw std::invalid_argument("matrix max_allele must be >= 0"); 13 | } 14 | std::vector counts; 15 | counts.reserve(m.nsam() * static_cast(m.max_allele() + 1)); 16 | StateCounts c; 17 | for (std::size_t i = 0; i < m.nsites(); ++i) 18 | { 19 | auto r = get_ConstRowView(m, i); 20 | if (static_cast(c.max_allele_idx) > m.max_allele()) 21 | { 22 | throw std::runtime_error("found allele value greater " 23 | "than matrix.max_allele"); 24 | } 25 | c(r); 26 | for (std::size_t j = 0; 27 | j < static_cast(m.max_allele() + 1); ++j) 28 | { 29 | counts.push_back(c.counts[j]); 30 | } 31 | } 32 | return counts; 33 | } 34 | 35 | AlleleCountMatrix::AlleleCountMatrix(const VariantMatrix& m) 36 | : counts(init_counts(m)), 37 | ncol(!m.empty() ? static_cast(m.max_allele()) + 1 38 | : 0), 39 | nrow(!m.empty() ? counts.size() / ncol : 0), nsam(m.nsam()) 40 | { 41 | } 42 | 43 | std::pair::const_iterator, 44 | std::vector::const_iterator> 45 | AlleleCountMatrix::row(const std::size_t i) const 46 | { 47 | if (i >= nrow) 48 | { 49 | throw std::out_of_range("row index out of range"); 50 | } 51 | return std::make_pair(counts.begin() + i * ncol, 52 | counts.begin() + i * ncol + ncol); 53 | } 54 | } // namespace Sequence 55 | -------------------------------------------------------------------------------- /src/variant_matrix/windows.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace Sequence 5 | { 6 | VariantMatrix 7 | make_window(const VariantMatrix& m, const double beg, const double end) 8 | { 9 | return make_slice(m, beg, end, 0, m.nsam()); 10 | } 11 | 12 | VariantMatrix 13 | make_slice(const VariantMatrix& m, const double beg, const double end, 14 | const std::size_t i, const std::size_t j) 15 | { 16 | if (end < beg) 17 | { 18 | throw std::invalid_argument("end must be >= beg"); 19 | } 20 | if (!(j > i)) 21 | { 22 | throw std::invalid_argument("i must be < j"); 23 | } 24 | if (j > m.nsam()) 25 | { 26 | throw std::invalid_argument("slice indexes out of range"); 27 | } 28 | auto pb = std::lower_bound(m.pbegin(), m.pend(), beg); 29 | auto pe = std::upper_bound(pb, m.pend(), end); 30 | if (pb == m.pend()) 31 | { 32 | std::unique_ptr gc( 33 | new NonOwningGenotypeCapsule(m.cdata(), 0, 0, 0, 0, 0)); 34 | std::unique_ptr pc( 35 | new NonOwningPositionCapsule(pb, 0)); 36 | return VariantMatrix(std::move(gc), std::move(pc), -1); 37 | } 38 | std::size_t nsites = pe - pb; 39 | std::size_t nsam = j - i; 40 | std::size_t row_offset = pb - m.pbegin(); 41 | std::unique_ptr gc(new NonOwningGenotypeCapsule( 42 | m.cdata(), nsites, nsam, row_offset, i, m.nsam())); 43 | std::unique_ptr pc( 44 | new NonOwningPositionCapsule(pb, pe - pb)); 45 | return VariantMatrix(std::move(gc), std::move(pc), m.max_allele()); 46 | } 47 | } // namespace Sequence 48 | -------------------------------------------------------------------------------- /test/CountingOperators.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | BOOST_AUTO_TEST_SUITE(CountingOperatorsTest) 8 | 9 | BOOST_AUTO_TEST_CASE( test_counting_operators_map_plus ) 10 | { 11 | using Sequence::operator+; 12 | std::map baseCounts,baseCounts2; 13 | baseCounts['A'] = 5; 14 | baseCounts['G'] = 10; 15 | baseCounts2['A'] = 11; 16 | baseCounts2['C'] = 17; 17 | std::map baseCounts3 = baseCounts + baseCounts2; 18 | 19 | BOOST_REQUIRE_EQUAL( baseCounts3['A'], 16 ); 20 | BOOST_REQUIRE_EQUAL( baseCounts3['G'], 10 ); 21 | BOOST_REQUIRE_EQUAL( baseCounts3['C'], 17 ); 22 | } 23 | 24 | BOOST_AUTO_TEST_CASE( test_counting_operators_map_plus_equal ) 25 | { 26 | using Sequence::operator+=; 27 | std::map baseCounts,baseCounts2; 28 | baseCounts['A'] = 5; 29 | baseCounts['G'] = 10; 30 | baseCounts2['A'] = 11; 31 | baseCounts2['C'] = 17; 32 | baseCounts += baseCounts2; 33 | 34 | BOOST_REQUIRE_EQUAL( baseCounts['A'], 16 ); 35 | BOOST_REQUIRE_EQUAL( baseCounts['G'], 10 ); 36 | BOOST_REQUIRE_EQUAL( baseCounts['C'], 17 ); 37 | } 38 | 39 | BOOST_AUTO_TEST_CASE( test_counting_operators_vector_plus ) 40 | { 41 | using Sequence::operator+; 42 | std::vector< std::pair > baseCounts,baseCounts2; 43 | baseCounts.push_back(std::make_pair('A',5u)); 44 | baseCounts.push_back(std::make_pair('G',10u)); 45 | baseCounts2.push_back(std::make_pair('A',11u)); 46 | baseCounts2.push_back(std::make_pair('C',17u)); 47 | 48 | auto baseCounts3 = baseCounts + baseCounts2; 49 | 50 | std::string bases = {'A','G','C'}; 51 | 52 | auto i = std::find_if(baseCounts3.cbegin(), 53 | baseCounts3.cend(), 54 | [](const std::pair & __p) { 55 | return __p.first == 'A'; 56 | }); 57 | BOOST_REQUIRE( i != baseCounts3.cend() ); 58 | BOOST_REQUIRE_EQUAL( i->second,16 ); 59 | 60 | i = std::find_if(baseCounts3.cbegin(), 61 | baseCounts3.cend(), 62 | [](const std::pair & __p) { 63 | return __p.first == 'G'; 64 | }); 65 | BOOST_REQUIRE( i != baseCounts3.cend() ); 66 | BOOST_REQUIRE_EQUAL( i->second,10 ); 67 | 68 | i = std::find_if(baseCounts3.cbegin(), 69 | baseCounts3.cend(), 70 | [](const std::pair & __p) { 71 | return __p.first == 'C'; 72 | }); 73 | BOOST_REQUIRE( i != baseCounts3.cend() ); 74 | BOOST_REQUIRE_EQUAL( i->second,17 ); 75 | 76 | } 77 | BOOST_AUTO_TEST_SUITE_END() 78 | -------------------------------------------------------------------------------- /test/FastaConstructors.cc: -------------------------------------------------------------------------------- 1 | //!\ file FastaConstructors.cc 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | struct fasta_constructors_fixture 10 | { 11 | std::string name, seq; 12 | fasta_constructors_fixture() 13 | : name{ "seqname" }, seq{ "AGCGTAGACAGTAGAGTGAT" } 14 | { 15 | } 16 | }; 17 | 18 | BOOST_FIXTURE_TEST_SUITE(FastaConstructorsTest, fasta_constructors_fixture) 19 | 20 | BOOST_AUTO_TEST_CASE(empty) 21 | { 22 | Sequence::Fasta f; 23 | BOOST_REQUIRE(f.name.empty()); 24 | BOOST_REQUIRE(f.seq.empty()); 25 | } 26 | 27 | BOOST_AUTO_TEST_CASE(string_con) 28 | { 29 | Sequence::Fasta f = Sequence::Fasta(name, seq); 30 | BOOST_CHECK(f.name == name); 31 | BOOST_CHECK(f.seq == seq); 32 | } 33 | 34 | BOOST_AUTO_TEST_CASE(copy_con) 35 | { 36 | Sequence::Fasta f = Sequence::Fasta(name.c_str(), seq.c_str()); 37 | BOOST_CHECK(f.name == name); 38 | BOOST_CHECK(f.seq == seq); 39 | 40 | Sequence::Fasta f2(f); 41 | BOOST_REQUIRE(f == f2); 42 | } 43 | 44 | BOOST_AUTO_TEST_CASE(move_con) 45 | { 46 | Sequence::Fasta f = Sequence::Fasta(name.c_str(), seq.c_str()); 47 | BOOST_CHECK(f.name == name); 48 | BOOST_CHECK(f.seq == seq); 49 | 50 | Sequence::Fasta f2(std::move(f)); 51 | BOOST_CHECK(f2.name == name); 52 | BOOST_CHECK(f2.seq == seq); 53 | BOOST_CHECK(f.length() == 0); 54 | BOOST_CHECK(f.name.empty()); 55 | } 56 | 57 | BOOST_AUTO_TEST_CASE(move_con2) 58 | //This "should" work??? 59 | { 60 | std::string a(name), b(seq); 61 | Sequence::Fasta f = Sequence::Fasta(std::move(a), std::move(b)); 62 | BOOST_CHECK(f.name == name); 63 | BOOST_CHECK(f.seq == seq); 64 | BOOST_CHECK(a.empty()); 65 | BOOST_CHECK(b.empty()); 66 | } 67 | 68 | BOOST_AUTO_TEST_CASE(move_assign) 69 | { 70 | Sequence::Fasta f = Sequence::Fasta(name, seq); 71 | BOOST_CHECK(f.name == name); 72 | BOOST_CHECK(f.seq == seq); 73 | 74 | Sequence::Fasta f2; 75 | f2 = std::move(f); 76 | BOOST_CHECK(f2.name == name); 77 | BOOST_CHECK(f2.seq == seq); 78 | BOOST_CHECK(f.length() == 0); 79 | BOOST_CHECK(f.name.empty()); 80 | } 81 | BOOST_AUTO_TEST_SUITE_END() 82 | //EOF 83 | -------------------------------------------------------------------------------- /test/FastaOperations.cc: -------------------------------------------------------------------------------- 1 | //\file FastaOperations.cc 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | struct fasta_operations_fixture 11 | { 12 | std::string name, seq; 13 | fasta_operations_fixture() 14 | : name{ "seqname" }, seq{ "AGCGTAGACAGTAGAGTGAT" } 15 | { 16 | } 17 | }; 18 | 19 | BOOST_FIXTURE_TEST_SUITE(FastaOperationsTest, fasta_operations_fixture) 20 | 21 | //A generic revcom routine written for this test 22 | std::string rcom( const std::string & s ) 23 | { 24 | std::string rv(s); 25 | std::reverse(rv.begin(),rv.end()); 26 | std::transform(rv.begin(),rv.end(), 27 | rv.begin(), 28 | [](const char & ch) 29 | { 30 | switch(ch) 31 | { 32 | case 'A': 33 | return 'T'; 34 | break; 35 | case 'G': 36 | return 'C'; 37 | break; 38 | case 'C': 39 | return 'G'; 40 | break; 41 | case 'T': 42 | return 'A'; 43 | break; 44 | } 45 | return 'N'; 46 | }); 47 | return rv; 48 | } 49 | 50 | 51 | BOOST_AUTO_TEST_CASE( revcom ) 52 | { 53 | std::string name("seqname"),seq("AGCGTAGACAGTAGAGTGAT"); 54 | Sequence::Fasta f(name,seq); 55 | 56 | Sequence::Fasta f2 = f; 57 | f2.Revcom(); 58 | 59 | BOOST_REQUIRE( f2.seq == rcom(seq) ); 60 | } 61 | 62 | BOOST_AUTO_TEST_CASE( subseq ) 63 | { 64 | std::string name("seqname"),seq("AGCGTAGACAGTAGAGTGAT"); 65 | Sequence::Fasta f(name,seq); 66 | 67 | Sequence::Fasta f3(f); 68 | f3.Subseq(1,3); 69 | 70 | BOOST_REQUIRE( f3.seq == "GCG" ); 71 | 72 | f3.Complement(); 73 | 74 | BOOST_REQUIRE( f3.seq == "CGC" ); 75 | 76 | BOOST_REQUIRE( std::string(f3) == "CGC" ); //operator string() 77 | 78 | } 79 | 80 | 81 | BOOST_AUTO_TEST_CASE( gapped ) 82 | { 83 | Sequence::Fasta f3("seqname","GCG"); 84 | 85 | BOOST_REQUIRE( !f3.IsGapped() ); 86 | 87 | f3.seq += '-'; 88 | 89 | BOOST_REQUIRE( f3.IsGapped() ); 90 | 91 | BOOST_REQUIRE( f3.length() == 4 ); 92 | 93 | BOOST_REQUIRE( f3.UngappedLength() == 3 ); 94 | 95 | //Remove the gap 96 | f3.seq.erase( f3.seq.find('-'), 1 ); 97 | 98 | BOOST_REQUIRE( f3.length() == 3 ); 99 | 100 | BOOST_REQUIRE( f3.UngappedLength() == 3 ); 101 | } 102 | 103 | BOOST_AUTO_TEST_CASE( cpp11access_1 ) 104 | { 105 | Sequence::Fasta f3("seqname","GCG"); 106 | for( auto & d : f3 ) 107 | { 108 | d = 'A'; 109 | } 110 | BOOST_REQUIRE_EQUAL(f3.seq,"AAA"); 111 | } 112 | 113 | BOOST_AUTO_TEST_SUITE_END() 114 | //EOF 115 | -------------------------------------------------------------------------------- /test/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUNIT_TEST_PRESENT 2 | 3 | check_PROGRAMS=libseq_unit_tests 4 | 5 | TESTS=$(check_PROGRAMS) 6 | 7 | AM_CXXFLAGS=-g 8 | AM_LDFLAGS=-L../src/.libs -Wl,-rpath,../src/.libs 9 | AM_LIBS=-lsequence 10 | 11 | #if DEBUG 12 | #AM_CXXFLAGS+=-g 13 | #else 14 | #AM_CXXFLAGS+=-DNDEBUG 15 | #endif 16 | # 17 | #if PROFILING 18 | #PROFILE= -pg 19 | #else 20 | #PROFILE= 21 | #endif 22 | 23 | LIBS+=$(AM_LIBS) 24 | 25 | libseq_unit_tests_SOURCES=libseq_unit_tests.cc \ 26 | FastaConstructors.cc \ 27 | FastaIO.cc \ 28 | FastaOperations.cc \ 29 | AlignStreamTest.cc \ 30 | CountingOperators.cc \ 31 | PolyTableConversions.cc \ 32 | PolyTableTweaking.cc \ 33 | PolyTableBadBehavior.cc \ 34 | PolySitesIO.cc \ 35 | SimpleSNPIO.cc \ 36 | PolySIMtest.cc \ 37 | PolySNPtest.cc \ 38 | ComparisonsTest.cc \ 39 | AlignmentTest.cc \ 40 | fastqIO.cc \ 41 | fastqConstructors.cc \ 42 | SeqConversions.cc \ 43 | RedundancyCom95test.cc \ 44 | alphabets.cc \ 45 | polySiteVectorTest.cc \ 46 | PolyTableSliceTest.cc \ 47 | stateCounterTest.cc \ 48 | VariantMatrixTest.cc \ 49 | testAlleleCountMatrix.cc \ 50 | testClassicSummstats.cc \ 51 | testClassicSummstatsEmptyVariantMatrix.cc \ 52 | testLD.cc \ 53 | testGarudStatistics.cc \ 54 | msformatdata.cc \ 55 | testVariantMatrixWindows.cc 56 | 57 | endif #if BUNIT_TEST_PRESENT 58 | -------------------------------------------------------------------------------- /test/PolySitesIO.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | BOOST_AUTO_TEST_SUITE(PolySitesIOTest) 8 | 9 | BOOST_AUTO_TEST_CASE( polysites_io ) 10 | { 11 | std::vector pos = {1,2,3,4,5}; 12 | std::vector data = {"AAAAA", 13 | "AAGAA", 14 | "CTGAA", 15 | "NAACT"}; 16 | 17 | Sequence::PolySites ps(std::move(pos),std::move(data)),ps2; 18 | 19 | std::ostringstream o; 20 | o << ps << '\n'; 21 | std::istringstream in(o.str()); 22 | 23 | BOOST_REQUIRE_NO_THROW( in >> ps2 >> std::ws ); 24 | 25 | BOOST_REQUIRE( ps == ps2 ); 26 | 27 | const char * fn = "psitesio.txt"; 28 | 29 | std::ofstream of(fn); 30 | of << ps << '\n'; 31 | of.close(); 32 | std::ifstream inf(fn); 33 | BOOST_REQUIRE_NO_THROW(inf >> ps2 >> std::ws); 34 | BOOST_REQUIRE( ps == ps2 ); 35 | unlink(fn); 36 | } 37 | BOOST_AUTO_TEST_SUITE_END() 38 | -------------------------------------------------------------------------------- /test/PolyTableSliceTest.cc: -------------------------------------------------------------------------------- 1 | //! \file PolyTableSliceTest.cc @brief Tests for Sequence/PolyTableSlice.hpp 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | using namespace Sequence; 12 | 13 | BOOST_AUTO_TEST_SUITE(PolyTableSliceTest) 14 | 15 | BOOST_AUTO_TEST_CASE( lastwindows1 ) 16 | { 17 | vector > data; 18 | for(double i = 0.05 ; i < 0.9 ; i += 0.01 ) 19 | data.push_back(make_pair(i,string("001000"))); 20 | 21 | SimData d(data.begin(),data.end()); 22 | PolyTableSlice w(d.sbegin(),d.send(),0.1,0.001,0.,1.); 23 | unsigned nwindows = unsigned(1./0.001); 24 | BOOST_REQUIRE_EQUAL(w.size(),nwindows); 25 | } 26 | 27 | BOOST_AUTO_TEST_CASE( nwindows1 ) 28 | { 29 | vector > data; 30 | for(double i = 0.05 ; i < 0.9 ; i += 0.01 ) 31 | data.push_back(make_pair(i,string("001000"))); 32 | 33 | SimData d(data.begin(),data.end()); 34 | PolyTableSlice w(d.sbegin(),d.send(),64); 35 | unsigned ewindows = std::ceil(double(d.numsites())/64); 36 | BOOST_REQUIRE_EQUAL(w.size(),std::ceil(double(d.numsites())/double(ewindows))); 37 | for(auto i = w.cbegin();i!=w.cend();++i) 38 | { 39 | auto wi = w.get_slice(i); 40 | BOOST_CHECK( wi.empty() == false ); 41 | } 42 | } 43 | 44 | BOOST_AUTO_TEST_CASE( nwindows2 ) 45 | { 46 | //Make 10x as many SNPs 47 | vector > data; 48 | for(double i = 0.05 ; i < 0.9 ; i += 0.001 ) 49 | data.push_back(make_pair(i,string("001000"))); 50 | 51 | SimData d(data.begin(),data.end()); 52 | PolyTableSlice w(d.sbegin(),d.send(),64); 53 | unsigned ewindows = std::ceil(double(d.numsites())/64); 54 | BOOST_REQUIRE_EQUAL(w.size(),std::ceil(double(d.numsites())/double(ewindows))); 55 | for(auto i = w.cbegin();i!=w.cend();++i) 56 | { 57 | auto wi = w.get_slice(i); 58 | BOOST_CHECK( wi.empty() == false ); 59 | } 60 | } 61 | 62 | 63 | BOOST_AUTO_TEST_SUITE_END() 64 | -------------------------------------------------------------------------------- /test/README.md: -------------------------------------------------------------------------------- 1 | #Unit tests for libsequence 2 | 3 | ##Dependencies 4 | 5 | 1. Make sure that libsequence is compiled in the parent directory 6 | 2. The [boost](http://boost.org) unit testing library is used by these tests. Currently, autoconf does _not_ check for this dependency. Make sure that the library is installed 7 | 8 | ###A word of caution 9 | 10 | I develop the library and the tests on an Ubuntu Linux machine. The library is written in C++11 and tested primarily using GCC and secondarily using clang++ (the default compiler on current-era OS X). On Ubuntu 14.04, I have observed that the unit tests fail to compile with clang++. I have no tracked down if this due to the Ubuntu boost packages being compiled with GCC, without C++11 awareness, some issue with clang++ and boost's unit testing library, or some complex interaction amongst those possibilities. However, I have confirmed that the unit testing compiles and works find on OS X Yosemite using clang++. 11 | 12 | ##Compiling the tests 13 | 14 | ``` 15 | make check 16 | ``` 17 | 18 | ##Running the tests 19 | 20 | ``` 21 | sh runTests.sh 22 | ``` 23 | 24 | If you really want all the details, then execute this instead: 25 | 26 | ``` 27 | BOOST_TEST_LOG_LEVEL=all sh runTests.sh 28 | ``` 29 | 30 | The boost unit testing library will report any errors in any testing modules. 31 | 32 | Note that some tests may intentionally cause errors. When that it the case, a message stating that the error is intentional will appear on screen along with the error. 33 | 34 | ##Notes 35 | 36 | * The tests are statically-linked against the version of libsequence compiled in the parent directory. This is done so that there is no confusion that the tests are testing the current code, and not some other version of the library installed on your system. 37 | * More tests are needed! -------------------------------------------------------------------------------- /test/SeqConversions.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | BOOST_AUTO_TEST_SUITE(SeqConversionsTest) 8 | 9 | BOOST_AUTO_TEST_CASE( fastq2fasta ) 10 | { 11 | std::ifstream in("data/data.fastq"); 12 | Sequence::fastq fq; 13 | Sequence::Fasta fa; 14 | 15 | in >> fq >> std::ws; 16 | 17 | fa = fq; 18 | 19 | BOOST_CHECK_EQUAL( fq.name , fa.name ); 20 | BOOST_CHECK_EQUAL( fq.seq , fa.seq ); 21 | } 22 | 23 | BOOST_AUTO_TEST_CASE( fastq2fasta2 ) 24 | { 25 | std::ifstream in("data/data.fastq"); 26 | Sequence::fastq fq; 27 | 28 | in >> fq >> std::ws; 29 | 30 | Sequence::Fasta fa(fq); 31 | 32 | BOOST_CHECK_EQUAL( fq.name , fa.name ); 33 | BOOST_CHECK_EQUAL( fq.seq , fa.seq ); 34 | } 35 | 36 | BOOST_AUTO_TEST_CASE( fastq2fasta3 ) 37 | { 38 | std::ifstream in("data/data.fastq"); 39 | Sequence::fastq fq; 40 | 41 | in >> fq >> std::ws; 42 | 43 | Sequence::Fasta fa(std::move(fq)); 44 | 45 | BOOST_CHECK (fq.length() == 0); 46 | BOOST_CHECK (fq.name.empty()); 47 | } 48 | 49 | BOOST_AUTO_TEST_CASE( fasta2fastq_1 ) 50 | { 51 | Sequence::Fasta fa = {"name","ATGC"}; 52 | Sequence::fastq fq = fa; 53 | 54 | BOOST_CHECK_EQUAL( fq.name , fa.name ); 55 | BOOST_CHECK_EQUAL( fq.seq , fa.seq ); 56 | BOOST_CHECK( fq.quality.empty() ); 57 | } 58 | 59 | BOOST_AUTO_TEST_CASE( fasta2fastq_2 ) 60 | { 61 | Sequence::Fasta fa = {"name","ATGC"}; 62 | Sequence::fastq fq = std::move(fa); 63 | 64 | BOOST_CHECK( fq.name == "name" ); 65 | BOOST_CHECK( fq.seq == "ATGC" ); 66 | BOOST_CHECK( fq.quality.empty() ); 67 | } 68 | 69 | BOOST_AUTO_TEST_CASE( fasta2fastq_3 ) 70 | { 71 | Sequence::Fasta fa = {"name","ATGC"}; 72 | Sequence::fastq fq(std::move(fa)); 73 | 74 | BOOST_CHECK( fq.name == "name" ); 75 | BOOST_CHECK( fq.seq == "ATGC" ); 76 | BOOST_CHECK( fq.quality.empty() ); 77 | } 78 | BOOST_AUTO_TEST_SUITE_END() 79 | -------------------------------------------------------------------------------- /test/SimpleSNPIO.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | BOOST_AUTO_TEST_SUITE(SimpleSNPIOTest) 9 | 10 | BOOST_AUTO_TEST_CASE( polysites_io ) 11 | { 12 | std::vector pos = {1,2,3,4,5}; 13 | std::vector data = {"AAAAA", 14 | "AAGAA", 15 | "CTGAA", 16 | "NAACT"}; 17 | 18 | Sequence::PolySites temp(std::move(pos),std::move(data)); 19 | 20 | Sequence::SimpleSNP ps,ps2,ps3; 21 | ps.assign(temp.sbegin(),temp.send()); 22 | ps3.assign(temp.sbegin(),temp.send()); 23 | 24 | std::ostringstream o; 25 | o << ps << '\n'; 26 | std::istringstream in(o.str()); 27 | 28 | BOOST_REQUIRE_NO_THROW( in >> ps2 >> std::ws ); 29 | 30 | BOOST_REQUIRE( ps == ps2 ); 31 | 32 | const char * fn = "simplesnpio.txt"; 33 | 34 | std::ofstream of(fn); 35 | of << ps << '\n'; 36 | of.close(); 37 | std::ifstream inf(fn); 38 | BOOST_REQUIRE_NO_THROW(inf >> ps2 >> std::ws); 39 | BOOST_REQUIRE( ps == ps2 ); 40 | inf.close(); 41 | unlink(fn); 42 | 43 | const char * fn2 = "simplesnpio2.txt"; 44 | //Now, change the outgroup 45 | ps.set_outgroup(true); 46 | of.open(fn2); 47 | of << ps << '\n'; 48 | of.close(); 49 | inf.open(fn2); 50 | BOOST_REQUIRE_NO_THROW(inf >> ps2 >> std::ws); 51 | inf.close(); 52 | BOOST_REQUIRE( ps == ps2 ); 53 | BOOST_REQUIRE( ps == ps3 ); 54 | 55 | unlink(fn2); 56 | } 57 | BOOST_AUTO_TEST_SUITE_END() 58 | -------------------------------------------------------------------------------- /test/VariantMatrixFixture.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LIBSEQUENCE_TESTS_VARIANTMATRIXFIXTURE_HPP 2 | #define LIBSEQUENCE_TESTS_VARIANTMATRIXFIXTURE_HPP 3 | 4 | #include 5 | #include 6 | 7 | struct invariantdataset 8 | { 9 | using data_type = std::vector; 10 | using positions_type = std::vector; 11 | Sequence::VariantMatrix empty, invariant; 12 | Sequence::AlleleCountMatrix empty_counts, invariant_counts; 13 | invariantdataset() 14 | : empty{ data_type{}, positions_type{} }, 15 | invariant{ data_type{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, 17 | positions_type{ 0.1, 0.2, 0.3 } }, 18 | empty_counts(empty), invariant_counts(invariant) 19 | { 20 | } 21 | }; 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /test/alphabets.cc: -------------------------------------------------------------------------------- 1 | /*! \file alphabets.cc @brief Unit tests for Sequence/SeqAlphabets.hpp */ 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | BOOST_AUTO_TEST_SUITE(AlphabetTest) 9 | 10 | BOOST_AUTO_TEST_CASE( check_dna_alphabet ) 11 | { 12 | for ( auto c : {'A','G','C','T'} ) 13 | { 14 | BOOST_REQUIRE( std::distance(Sequence::dna_alphabet.begin(), 15 | std::find( Sequence::dna_alphabet.begin(), 16 | Sequence::dna_alphabet.end(), c ) ) < 4 ); 17 | } 18 | } 19 | 20 | BOOST_AUTO_TEST_CASE( check_isDNA_1 ) 21 | { 22 | for (auto c : Sequence::dna_alphabet ) 23 | { 24 | BOOST_REQUIRE( Sequence::isDNA(c) ); 25 | } 26 | } 27 | 28 | BOOST_AUTO_TEST_CASE( check_isDNA_2 ) 29 | { 30 | Sequence::Fasta f = { "name","ATGCZAGC" }; //Z is a non-DNA character 31 | auto itr = std::find_if( f.begin(),f.end(), 32 | [](const char & __ch) { 33 | return !Sequence::isDNA(__ch); 34 | } ); 35 | BOOST_REQUIRE_EQUAL( std::distance(f.begin(),itr),4 ); 36 | 37 | f.seq.erase( std::remove_if(f.begin(), 38 | f.end(), 39 | [](const char & __ch) { 40 | return !Sequence::isDNA(__ch); 41 | }), f.seq.end() ); 42 | BOOST_REQUIRE_EQUAL(f.seq,"ATGCAGC"); 43 | } 44 | 45 | //Test of dna_poly_alphabet 46 | BOOST_AUTO_TEST_CASE( dna_poly_alphabet_1 ) 47 | { 48 | BOOST_REQUIRE( std::find( Sequence::dna_poly_alphabet.begin(), 49 | Sequence::dna_poly_alphabet.end(),'\0' ) != Sequence::dna_poly_alphabet.end() ); 50 | } 51 | 52 | BOOST_AUTO_TEST_CASE( dna_poly_alphabet_2 ) 53 | { 54 | BOOST_CHECK_EQUAL( Sequence::POLYEOS, 8 ); 55 | } 56 | 57 | BOOST_AUTO_TEST_CASE( dna_poly_alphabet_3 ) 58 | { 59 | for ( auto c : {'A','C','G','T','N','0','1','-'} ) 60 | { 61 | BOOST_CHECK( std::distance( Sequence::dna_poly_alphabet.begin(), 62 | std::find(Sequence::dna_poly_alphabet.begin(), 63 | Sequence::dna_poly_alphabet.end(),c) ) < Sequence::POLYEOS ); 64 | } 65 | } 66 | 67 | BOOST_AUTO_TEST_CASE( dna_poly_alphabet_4 ) 68 | { 69 | for ( auto c : {'a','c','g','t','n','W','K'} ) 70 | { 71 | BOOST_CHECK( std::distance( Sequence::dna_poly_alphabet.begin(), 72 | std::find(Sequence::dna_poly_alphabet.begin(), 73 | Sequence::dna_poly_alphabet.end(),c) ) >= Sequence::POLYEOS ); 74 | } 75 | } 76 | 77 | BOOST_AUTO_TEST_CASE( dna_poly_alphabet_5 ) 78 | { 79 | for ( auto c : {'a','c','g','t','n','W','K'} ) 80 | { 81 | BOOST_CHECK( std::distance( Sequence::dna_poly_alphabet.begin(), 82 | std::find(Sequence::dna_poly_alphabet.begin(), 83 | Sequence::dna_poly_alphabet.end(),c) ) >= Sequence::NOTPOLYCHAR ); 84 | } 85 | } 86 | BOOST_AUTO_TEST_SUITE_END() 87 | -------------------------------------------------------------------------------- /test/data/README.md: -------------------------------------------------------------------------------- 1 | #Data files for unit tests 2 | 3 | * phylip_input.txt - copied from http://evolution.genetics.washington.edu/phylip/doc/main.html 4 | * single_ms.txt - output from Hudson's "ms" program 5 | * CG15644-Z.aln - Variation data from a Drosophila Zimbabwe population sample. In clustalw format -------------------------------------------------------------------------------- /test/data/phylip_input.txt: -------------------------------------------------------------------------------- 1 | 6 39 2 | Archaeopt CGATGCTTAC CGCCGATGCT 3 | HesperorniCGTTACTCGT TGTCGTTACT 4 | BaluchitheTAATGTTAAT TGTTAATGTT 5 | B. virginiTAATGTTCGT TGTTAATGTT 6 | BrontosaurCAAAACCCAT CATCAAAACC 7 | B.subtilisGGCAGCCAAT CACGGCAGCC 8 | 9 | TACCGCCGAT GCTTACCGC 10 | CGTTGTCGTT ACTCGTTGT 11 | AATTGTTAAT GTTAATTGT 12 | CGTTGTTAAT GTTCGTTGT 13 | CATCATCAAA ACCCATCAT 14 | AATCACGGCA GCCAATCAC 15 | -------------------------------------------------------------------------------- /test/fastqConstructors.cc: -------------------------------------------------------------------------------- 1 | //\file fastqConstructors.cc 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | BOOST_AUTO_TEST_SUITE(FastqConstructorsTest) 12 | 13 | BOOST_AUTO_TEST_CASE( move_construction ) 14 | { 15 | std::ifstream in("data/data.fastq"); 16 | if (!in) 17 | { 18 | std::cerr << "Error, couldn't find input file!\n"; 19 | exit(1); 20 | } 21 | Sequence::fastq f; 22 | 23 | in >> f >> std::ws; 24 | 25 | Sequence::fastq f2(std::move(f)); 26 | 27 | BOOST_CHECK_EQUAL(f.length(),0); 28 | } 29 | BOOST_AUTO_TEST_SUITE_END() 30 | -------------------------------------------------------------------------------- /test/fastqIO.cc: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | BOOST_AUTO_TEST_SUITE(FASTQIOTest) 11 | 12 | BOOST_AUTO_TEST_CASE( input_test ) 13 | { 14 | std::ifstream in("data/data.fastq"); 15 | if (!in) 16 | { 17 | std::cerr << "Error, couldn't find input file!\n"; 18 | exit(1); 19 | } 20 | Sequence::fastq f; 21 | 22 | unsigned count = 0; 23 | BOOST_REQUIRE_NO_THROW 24 | ( 25 | while(!in.eof()) 26 | { 27 | in >> f >> std::ws; 28 | ++count; 29 | } 30 | ); 31 | BOOST_CHECK_EQUAL(count,50); 32 | } 33 | 34 | BOOST_AUTO_TEST_CASE( input_test2 ) 35 | { 36 | std::ifstream in("data/data.fastq"); 37 | if (!in) 38 | { 39 | std::cerr << "Error, couldn't find input file!\n"; 40 | exit(1); 41 | } 42 | Sequence::fastq f; 43 | 44 | unsigned count = 0; 45 | BOOST_REQUIRE_NO_THROW 46 | ( 47 | unsigned count = 0; 48 | std::istream_iterator i(in); 49 | for( ; i != std::istream_iterator() ; ++i ) 50 | { 51 | ++count; 52 | } 53 | BOOST_CHECK_EQUAL(count,50); 54 | in.close(); 55 | ); 56 | } 57 | 58 | BOOST_AUTO_TEST_CASE( output_test ) 59 | { 60 | BOOST_REQUIRE_NO_THROW 61 | ( 62 | std::ifstream in("data/data.fastq"); 63 | if (!in) 64 | { 65 | std::cerr << "Error, couldn't find input file!\n"; 66 | exit(1); 67 | } 68 | 69 | Sequence::fastq f; 70 | 71 | std::vector vf; 72 | std::ofstream out("fastqIOtest.txt"); 73 | unsigned count = 0; 74 | while(!in.eof()) 75 | { 76 | in >> f >> std::ws; 77 | f.repname(false); 78 | vf.push_back(f); 79 | out << f << '\n'; 80 | ++count; 81 | } 82 | BOOST_CHECK_EQUAL(count,50); 83 | out.close(); 84 | in.close(); 85 | in.open("fastqIOtest.txt"); 86 | count = 0; 87 | while(!in.eof()) 88 | { 89 | in >> f >> std::ws; 90 | BOOST_CHECK_EQUAL(f,vf[count]); 91 | ++count; 92 | } 93 | unlink("fastqIOtest.txt"); 94 | in.close(); 95 | ); 96 | } 97 | BOOST_AUTO_TEST_SUITE_END() 98 | -------------------------------------------------------------------------------- /test/libseq_unit_tests.cc: -------------------------------------------------------------------------------- 1 | #define BOOST_TEST_MODULE libsequence_unit_tests 2 | #include 3 | 4 | -------------------------------------------------------------------------------- /test/msformatdata.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LIBSEQUENCE_TEST_MSFORMAT_HPP 2 | #define LIBSEQUENCE_TEST_MSFORMAT_HPP 3 | 4 | #include 5 | 6 | std::string get_msformat_data(); 7 | std::string get_msformat_stream(); 8 | 9 | #endif 10 | -------------------------------------------------------------------------------- /test/msprime_data_fixture.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LIBSEQUENCE_TEST_MSPRIME_DATA_FIXTURE_HPP 2 | #define LIBSEQUENCE_TEST_MSPRIME_DATA_FIXTURE_HPP 3 | 4 | #include 5 | #include "msformatdata.hpp" 6 | #include 7 | #include 8 | #include 9 | 10 | struct vmatrix_from_msprime 11 | { 12 | Sequence::VariantMatrix m; 13 | Sequence::AlleleCountMatrix c; 14 | 15 | static Sequence::VariantMatrix 16 | read() 17 | { 18 | std::istringstream in(get_msformat_data()); 19 | return Sequence::from_msformat(in); 20 | } 21 | 22 | vmatrix_from_msprime() : m(read()), c(m) {} 23 | }; 24 | 25 | struct msprime_stream 26 | { 27 | std::istringstream in; 28 | msprime_stream() : in(get_msformat_stream()) {} 29 | }; 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /test/polySiteVectorTest.cc: -------------------------------------------------------------------------------- 1 | //! \file polySiteVectorTest.cc @brief Unit tests for Sequence::polySiteVector 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | using psite = Sequence::polymorphicSite; 15 | using Ptable = Sequence::polySiteVector; 16 | 17 | BOOST_AUTO_TEST_SUITE(PolySiteVectorTest) 18 | 19 | BOOST_AUTO_TEST_CASE( ptable_remove_1 ) 20 | { 21 | 22 | Ptable t = { psite(1.,"AAGC"), 23 | psite(2.,"ACZA") }; //site 2 has a non-DNA character 24 | 25 | BOOST_CHECK_EQUAL( t.size(), 2 ); 26 | 27 | t.erase( std::remove_if( t.begin(), 28 | t.end(), 29 | []( const psite & __p ) { 30 | return std::find_if(__p.second.begin(), 31 | __p.second.end(), 32 | Sequence::invalidPolyChar()) 33 | != __p.second.end(); 34 | } ), 35 | t.end() ); 36 | BOOST_CHECK_EQUAL( t.size(), 1 ); 37 | } 38 | 39 | BOOST_AUTO_TEST_CASE( ptable_make_from_polytable ) 40 | { 41 | using psite = Sequence::polymorphicSite; 42 | Ptable t = { psite(1.,"AAGC"), 43 | psite(2.,"ACAA") }; 44 | 45 | Sequence::PolySites ps(t.begin(),t.end()); 46 | 47 | BOOST_REQUIRE( std::distance(t.begin(),t.end()) == 48 | std::distance(ps.sbegin(),ps.send()) ); 49 | 50 | auto t_i = t.begin(); 51 | auto ps_i = ps.sbegin(); 52 | 53 | while( t_i < t.end() ) 54 | { 55 | BOOST_CHECK_EQUAL(t_i->first,ps_i->first); 56 | BOOST_CHECK_EQUAL(t_i->second,ps_i->second); 57 | ++t_i; 58 | ++ps_i; 59 | } 60 | 61 | Ptable t2(Sequence::make_polySiteVector(ps)); 62 | 63 | BOOST_REQUIRE( t == t2 ); 64 | } 65 | BOOST_AUTO_TEST_SUITE_END() 66 | -------------------------------------------------------------------------------- /test/runTests.sh: -------------------------------------------------------------------------------- 1 | #!sh 2 | 3 | for i in $(find . -perm +111 -type f) 4 | do 5 | ./$i 6 | done 7 | -------------------------------------------------------------------------------- /test/stateCounterTest.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | using namespace Sequence; 10 | BOOST_AUTO_TEST_SUITE(stateCounterTest) 11 | 12 | BOOST_AUTO_TEST_CASE( test1 ) 13 | { 14 | string x("AGCTN-"); 15 | auto y = for_each(begin(x),end(x), 16 | stateCounter()); 17 | BOOST_CHECK_EQUAL(y.a,1); 18 | BOOST_CHECK_EQUAL(y.g,1); 19 | BOOST_CHECK_EQUAL(y.c,1); 20 | BOOST_CHECK_EQUAL(y.t,1); 21 | BOOST_CHECK_EQUAL(y.n,1); 22 | BOOST_CHECK_EQUAL(y.gap,1); 23 | BOOST_CHECK_EQUAL(y.ndna,0); 24 | } 25 | BOOST_AUTO_TEST_SUITE_END() 26 | -------------------------------------------------------------------------------- /test/testAlleleCountMatrix.cc: -------------------------------------------------------------------------------- 1 | //! \file testAlleleCountMatrix.cc @brief Tests for Sequence/VariantMatrix.hpp 2 | #include "msprime_data_fixture.hpp" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include //for std::iota 8 | #include 9 | 10 | BOOST_FIXTURE_TEST_SUITE(test_allele_count_matrix, vmatrix_from_msprime) 11 | 12 | BOOST_AUTO_TEST_CASE(test_max_allele_exception) 13 | { 14 | //Change some data in m so that m[i] > m.max_allele 15 | m.data()[0] = 5; 16 | 17 | BOOST_REQUIRE_THROW(Sequence::AlleleCountMatrix ac(m), std::runtime_error); 18 | } 19 | 20 | BOOST_AUTO_TEST_CASE(counts_from_windows) 21 | { 22 | for (std::size_t i = 0; i < m.nsites(); ++i) 23 | { 24 | auto w = Sequence::make_window(m, m.position(i), m.position(i)); 25 | BOOST_REQUIRE_NO_THROW(Sequence::AlleleCountMatrix ac(w)); 26 | } 27 | } 28 | 29 | BOOST_AUTO_TEST_SUITE_END() 30 | 31 | -------------------------------------------------------------------------------- /test/testLD.cc: -------------------------------------------------------------------------------- 1 | //! \file testLD.cc @brief unit tests for LD-related calculations 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "msprime_data_fixture.hpp" 12 | 13 | BOOST_FIXTURE_TEST_SUITE(test_LD, vmatrix_from_msprime) 14 | 15 | BOOST_AUTO_TEST_CASE(test_two_locus_haplotype_counts) 16 | { 17 | std::vector results; 18 | for (std::size_t i = 0; i < m.nsites() - 1; ++i) 19 | { 20 | for (std::size_t j = i + 1; j < m.nsites(); ++j) 21 | { 22 | std::vector> haps; 23 | auto hc 24 | = Sequence::two_locus_haplotype_counts(m, i, j, true); 25 | auto ri = Sequence::get_ConstRowView(m, i); 26 | auto rj = Sequence::get_ConstRowView(m, j); 27 | for (std::size_t k = 0; k < ri.size(); ++k) 28 | { 29 | haps.emplace_back(ri[k], rj[k]); 30 | } 31 | std::sort(haps.begin(), haps.end()); 32 | auto end_of_unique_haps 33 | = std::unique(haps.begin(), haps.end()); 34 | BOOST_REQUIRE_EQUAL( 35 | hc.size(), static_cast(std::distance( 36 | haps.begin(), end_of_unique_haps))); 37 | } 38 | } 39 | } 40 | 41 | BOOST_AUTO_TEST_SUITE_END() 42 | -------------------------------------------------------------------------------- /test/testVariantMatrixWindows.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "msformatdata.hpp" 10 | 11 | BOOST_AUTO_TEST_SUITE(testVariantMatrixWindows) 12 | 13 | BOOST_AUTO_TEST_CASE(test_windows) 14 | { 15 | std::istringstream i(get_msformat_data()); 16 | auto vm = Sequence::from_msformat(i); 17 | for (double i = 0.0; i < 1.0 - 1e-4; i += 0.1) 18 | { 19 | auto w = Sequence::make_window(vm, i, i + 0.1); 20 | auto pb = std::lower_bound(vm.pbegin(), vm.pend(), i); 21 | std::size_t offset = pb - vm.pbegin(); 22 | for (std::size_t site = 0; site < w.nsites(); ++site) 23 | { 24 | auto window_site = Sequence::get_ConstRowView(w, site); 25 | auto matrix_site 26 | = Sequence::get_ConstRowView(vm, offset + site); 27 | auto m 28 | = std::mismatch(window_site.begin(), window_site.end(), 29 | matrix_site.begin()); 30 | BOOST_REQUIRE_EQUAL(m.first == window_site.end(), true); 31 | } 32 | } 33 | } 34 | 35 | BOOST_AUTO_TEST_CASE(test_slices) 36 | { 37 | std::istringstream i(get_msformat_data()); 38 | auto vm = Sequence::from_msformat(i); 39 | std::size_t from = 53, to = 70; 40 | for (double i = 0.0; i < 1.0 - 1e-4; i += 0.1) 41 | { 42 | auto w = Sequence::make_slice(vm, i, i + 0.1, from, to); 43 | auto pb = std::lower_bound(vm.pbegin(), vm.pend(), i); 44 | std::size_t offset = pb - vm.pbegin(); 45 | for (std::size_t site = 0; site < w.nsites(); ++site) 46 | { 47 | auto window_site = Sequence::get_ConstRowView(w, site); 48 | auto matrix_site 49 | = Sequence::get_ConstRowView(vm, offset + site); 50 | auto m = std::mismatch(window_site.begin(), 51 | window_site.end(), 52 | matrix_site.begin() + from); 53 | BOOST_REQUIRE_EQUAL(m.first == window_site.end(), 54 | true); 55 | } 56 | } 57 | } 58 | 59 | BOOST_AUTO_TEST_SUITE_END() 60 | --------------------------------------------------------------------------------