├── .coveragerc
├── .editorconfig
├── .github
└── workflows
│ ├── docs.yml
│ ├── publish.yml
│ └── tests.yml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.rst
├── autodocs.sh
├── docs
├── Makefile
├── citation.rst
├── conf.py
├── development.rst
├── examples.rst
├── favicon.ico
├── images
│ ├── logo.png
│ ├── logo.svg
│ └── report.png
├── index.rst
├── installation.rst
├── make.bat
├── quickstart.rst
├── reference.rst
├── testFiles.rst
└── usage.rst
├── examples
├── data
│ ├── example.csv
│ ├── example.fasta
│ ├── example.tree
│ ├── example.tsv
│ ├── example.xlsx
│ ├── ice_viruses.fasta
│ ├── ice_viruses.fasta.treefile
│ ├── ice_viruses_cleaned.fasta
│ ├── ice_viruses_cleaned.fasta.treefile
│ └── invalid.fasta
├── example.py
└── self_contained.py
├── mkdocs.sh
├── phytest
├── __init__.py
├── bio
│ ├── __init__.py
│ ├── alignment.py
│ ├── data.py
│ ├── sequence.py
│ └── tree.py
├── cli.py
├── main.py
├── report
│ └── logo.css
└── utils.py
├── poetry.lock
├── pyproject.toml
└── tests
├── __init__.py
├── input
├── alignment.py
├── basic.py
└── testfile1.py
├── test_alignments.py
├── test_cli.py
├── test_data.py
├── test_main.py
├── test_self_contained.py
├── test_sequences.py
└── test_trees.py
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | source = phytest
3 |
4 | [report]
5 | precision = 2
6 |
--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
1 | # EditorConfig: https://EditorConfig.org
2 |
3 | root = true
4 |
5 | [*]
6 | end_of_line = lf
7 | insert_final_newline = true
8 | charset = utf-8
9 |
10 | [{*.py,*.smk,Snakemake}]
11 | indent_style = space
12 | indent_size = 4
13 |
14 | [*.{yml,yaml}]
15 | indent_style = space
16 | indent_size = 2
17 |
--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
1 | name: docs
2 |
3 | on:
4 | push:
5 | branches: main
6 |
7 | jobs:
8 | build:
9 |
10 | runs-on: ubuntu-latest
11 | strategy:
12 | fail-fast: false
13 | matrix:
14 | python-version: ['3.9']
15 |
16 | steps:
17 | - uses: actions/checkout@v3
18 | - name: Install poetry
19 | run: pipx install poetry
20 | - name: Install dependencies for Python ${{ matrix.python-version }}
21 | uses: actions/setup-python@v3
22 | with:
23 | python-version: ${{ matrix.python-version }}
24 | cache: 'poetry'
25 | - run: poetry install
26 | - name: Docs
27 | run: |
28 | poetry run sphinx-build -b html docs gh-pages
29 | - name: Coverage
30 | run: |
31 | poetry run coverage run -m pytest
32 | poetry run coverage html --directory gh-pages/coverage
33 | echo "COVERAGE=$(poetry run coverage report --precision 2 | grep TOTAL | tr -s ' ' | cut -f 4 -d " ")" >> $GITHUB_ENV
34 | - name: Create Badge
35 | uses: schneegans/dynamic-badges-action@v1.1.0 # instructions here: https://github.com/Schneegans/dynamic-badges-action
36 | with:
37 | auth: ${{ secrets.GIST_SECRET }}
38 | gistID: e8160655e03d9015b1e93b97ed611f4f
39 | filename: coverage-badge.json
40 | label: coverage
41 | message: ${{ env.COVERAGE }}
42 | color: green
43 | - name: Deploy 🚀
44 | uses: JamesIves/github-pages-deploy-action@4.1.5
45 | with:
46 | branch: gh-pages # The branch the action should deploy to.
47 | folder: gh-pages # The folder the action should deploy.
48 |
--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
1 | name: publish
2 | on:
3 | push:
4 | tags:
5 | - 'v*.*.*'
6 | jobs:
7 | build:
8 | runs-on: ubuntu-latest
9 | steps:
10 | #----------------------------------------------
11 | # check-out repo and set-up python
12 | #----------------------------------------------
13 | - name: Check out repository
14 | uses: actions/checkout@v2
15 | - name: Set up python ${{ matrix.python-version }}
16 | uses: actions/setup-python@v2
17 | with:
18 | python-version: ${{ matrix.python-version }}
19 | #----------------------------------------------
20 | # ----- install & configure poetry -----
21 | #----------------------------------------------
22 | - name: Install Poetry
23 | uses: snok/install-poetry@v1
24 | with:
25 | virtualenvs-create: true
26 | virtualenvs-in-project: true
27 | #----------------------------------------------
28 | # load cached venv if cache exists
29 | #----------------------------------------------
30 | - name: Load cached venv
31 | id: cached-poetry-dependencies
32 | uses: actions/cache@v2
33 | with:
34 | path: .venv
35 | key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }}
36 | #----------------------------------------------
37 | # install dependencies if cache does not exist
38 | #----------------------------------------------
39 | - name: Install dependencies
40 | if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
41 | run: poetry install --no-interaction --no-root
42 | #----------------------------------------------
43 | # install your root project, if required
44 | #----------------------------------------------
45 | - name: Install library
46 | run: poetry install --no-interaction
47 | - name: Build library
48 | run: poetry build
49 | - name: Publish library
50 | env:
51 | PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
52 | run: |
53 | poetry config pypi-token.pypi $PYPI_TOKEN
54 | poetry publish
55 |
--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
1 | # Based on https://github.com/actions/starter-workflows/blob/main/ci/python-package.yml
2 | name: tests
3 |
4 | on: [push]
5 | jobs:
6 | build:
7 |
8 | runs-on: ubuntu-latest
9 | strategy:
10 | fail-fast: false
11 | matrix:
12 | python-version: ['3.8', '3.9', '3.10', '3.11']
13 |
14 | steps:
15 | - uses: actions/checkout@v3
16 | - name: Install poetry
17 | run: pipx install poetry
18 | - name: Install dependencies for Python ${{ matrix.python-version }}
19 | uses: actions/setup-python@v3
20 | with:
21 | python-version: ${{ matrix.python-version }}
22 | cache: 'poetry'
23 | - name: Install dependencies for Python ${{ matrix.python-version }}
24 | run: |
25 | poetry env use "${{ matrix.python-version }}"
26 | poetry install
27 | - name: Tests
28 | run: |
29 | poetry env info
30 | poetry run pytest
31 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | .envrc
3 | *.pyc
4 | *.html
5 | .coverage
6 | dist/
7 | docs/_build/
8 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/pre-commit/pre-commit-hooks
3 | rev: v4.2.0
4 | hooks:
5 | - id: check-yaml
6 | - id: end-of-file-fixer
7 | - id: trailing-whitespace
8 | - repo: https://github.com/psf/black
9 | rev: 22.3.0
10 | hooks:
11 | - id: black
12 | - repo: https://github.com/PyCQA/isort.git
13 | rev: 5.12.0
14 | hooks:
15 | - id: isort
16 | # - repo: https://github.com/python-poetry/poetry
17 | # rev: '1.2.0b2'
18 | # hooks:
19 | # - id: poetry-check
20 | # - id: poetry-lock
21 |
22 | # ci:
23 | # skip: [poetry-lock]
24 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2022 Wytamma Wirth
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | .. image:: https://raw.githubusercontent.com/phytest-devs/phytest/main/docs/images/logo.png
2 | :alt: Phytest logo
3 |
4 | .. start-badges
5 |
6 | |pypi badge| |tests badge| |coverage badge| |docs badge| |black badge| |pre-commit badge| |doi badge|
7 |
8 |
9 | .. |pypi badge| image:: https://img.shields.io/pypi/v/phytest.svg
10 | :target: https://pypi.org/project/phytest/
11 |
12 | .. |tests badge| image:: https://github.com/phytest-devs/phytest/workflows/tests/badge.svg
13 | :target: https://github.com/phytest-devs/phytest/actions
14 |
15 | .. |docs badge| image:: https://github.com/phytest-devs/phytest/workflows/docs/badge.svg
16 | :target: https://phytest-devs.github.io/phytest/
17 |
18 | .. |black badge| image:: https://img.shields.io/badge/code%20style-black-000000.svg
19 | :target: https://github.com/psf/black
20 |
21 | .. |coverage badge| image:: https://img.shields.io/endpoint?url=https://gist.githubusercontent.com/smutch/e8160655e03d9015b1e93b97ed611f4f/raw/coverage-badge.json
22 | :target: https://phytest-devs.github.io/phytest/coverage/
23 |
24 | .. |pre-commit badge| image:: https://results.pre-commit.ci/badge/github/phytest-devs/phytest/main.svg
25 | :target: https://results.pre-commit.ci/latest/github/phytest-devs/phytest/main
26 |
27 | .. |doi badge| image:: https://img.shields.io/badge/DOI-10.1093%2Fbioinformatics%2Fbtac664-success.svg
28 | :target: https://academic.oup.com/bioinformatics/article/38/22/5124/6751773
29 |
30 | .. end-badges
31 |
32 |
33 |
34 | Phytest: Quality Control for Phylogenetic Analyses.
35 |
36 | ----
37 |
38 | Documentation: https://phytest-devs.github.io/phytest
39 |
40 | Code: https://github.com/phytest-devs/phytest
41 |
42 | Tutorials: https://github.com/phytest-devs?q=example
43 |
44 | ----
45 |
46 | .. start-quickstart
47 |
48 | Installation
49 | ============
50 | Install phytest using pip:
51 |
52 | .. code-block:: bash
53 |
54 | pip install phytest
55 |
56 |
57 | Quick Start
58 | ============
59 |
60 | Phytest is a tool for automating quality control checks on sequence, tree and metadata files during phylogenetic analyses.
61 | Phytest ensures that phylogenetic analyses meet user-defined quality control tests.
62 |
63 | Here we will create example data files to run our tests on.
64 |
65 | Create an alignment fasta file :code:`example.fasta`
66 |
67 | .. code-block:: text
68 |
69 | >Sequence_A
70 | ATGAGATCCCCGATAGCGAGCTAGCGATCGCAGCGACTCAGCAGCTACAGCGCAGAGGAGAGAGAGGCCCCTATTTACTAGAGCTCCAGATATAGNNTAG
71 | >Sequence_B
72 | ATGAGATCCCCGATAGCGAGCTAGXGATCGCAGCGACTCAGCAGCTACAGCGCAGAGGAGAGAGAGGCCCCTATTTACTAGAGCTCCAGATATAGNNTAG
73 | >Sequence_C
74 | ATGAGA--CCCGATAGCGAGCTAGCGATCGCAGCGACTCAGCAGCTACAGCGCAGAGGAGAGAGAGGCCCCTATTTACTAGAGCTCCAGATATAGNNTAG
75 | >Sequence_D
76 | ATGAGATCCCCGATAGCGAGCTAGCGATNNNNNNNNNNNNNNNNNTACAGCGCAGAGGAGAGAGAGGCCCCTATTTACTAGAGCTCCAGATATAGNNTAG
77 |
78 | Create a tree newick file :code:`example.tree`
79 |
80 | .. code-block:: text
81 |
82 | (Sequence_A:1,Sequence_B:0.2,(Sequence_C:0.3,Sequence_D:0.4):0.5);
83 |
84 | Writing a test file
85 | ########################
86 |
87 | We want to enforce the follow constraints on our data:
88 | 1. The alignment has 4 sequences
89 | 2. The sequences have a length of 100
90 | 3. The sequences only contains the characters A, T, G, C, N and -
91 | 4. The sequences are allowed to only contain single base deletions
92 | 5. The longest stretch of Ns is 10
93 | 6. The tree has 4 tips
94 | 7. The tree is bifurcating
95 | 8. The alignment and tree have the same names
96 | 9. All internal branches are longer than the given threshold
97 | 10. There are no outlier branches in the tree
98 |
99 | We can write these tests in a python files :code:`example.py`
100 |
101 | .. code-block:: python
102 |
103 | from phytest import Alignment, Sequence, Tree
104 |
105 |
106 | def test_alignment_has_4_sequences(alignment: Alignment):
107 | alignment.assert_length(4)
108 |
109 |
110 | def test_alignment_has_a_width_of_100(alignment: Alignment):
111 | alignment.assert_width(100)
112 |
113 |
114 | def test_sequences_only_contains_the_characters(sequence: Sequence):
115 | sequence.assert_valid_alphabet(alphabet="ATGCN-")
116 |
117 |
118 | def test_single_base_deletions(sequence: Sequence):
119 | sequence.assert_longest_stretch_gaps(max=1)
120 |
121 |
122 | def test_longest_stretch_of_Ns_is_10(sequence: Sequence):
123 | sequence.assert_longest_stretch_Ns(max=10)
124 |
125 |
126 | def test_tree_has_4_tips(tree: Tree):
127 | tree.assert_number_of_tips(4)
128 |
129 |
130 | def test_tree_is_bifurcating(tree: Tree):
131 | tree.assert_is_bifurcating()
132 |
133 |
134 | def test_aln_tree_match_names(alignment: Alignment, tree: Tree):
135 | aln_names = [i.name for i in alignment]
136 | tree.assert_tip_names(aln_names)
137 |
138 |
139 | def test_all_internal_branches_lengths_above_threshold(tree: Tree, threshold=1e-4):
140 | tree.assert_internal_branch_lengths(min=threshold)
141 |
142 |
143 | def test_outlier_branches(tree: Tree):
144 | # Here we create a custom function to detect outliers
145 | import statistics
146 |
147 | tips = tree.get_terminals()
148 | branch_lengths = [t.branch_length for t in tips]
149 | cut_off = statistics.mean(branch_lengths) + statistics.stdev(branch_lengths)
150 | for tip in tips:
151 | assert tip.branch_length < cut_off, f"Outlier tip '{tip.name}' (branch length = {tip.branch_length})!"
152 |
153 | Running Phytest
154 | ################
155 |
156 | We can then run these tests on our data with :code:`phytest`:
157 |
158 | .. code-block:: bash
159 |
160 | phytest examples/example.py -s examples/data/example.fasta -t examples/data/example.tree
161 |
162 | Generate a report by adding :code:`--report report.html`.
163 |
164 | .. image:: https://raw.githubusercontent.com/phytest-devs/phytest/main/docs/images/report.png
165 | :alt: HTML Report
166 |
167 | From the output we can see several tests failed:
168 |
169 | .. code-block::
170 |
171 | FAILED examples/example.py::test_sequences_only_contains_the_characters[Sequence_B] - AssertionError: Invalid pattern found in 'Sequence_B'!
172 | FAILED examples/example.py::test_single_base_deletions[Sequence_C] - AssertionError: Longest stretch of '-' in 'Sequence_C' > 1!
173 | FAILED examples/example.py::test_longest_stretch_of_Ns_is_10[Sequence_D] - AssertionError: Longest stretch of 'N' in 'Sequence_D' > 10!
174 | FAILED examples/example.py::test_outlier_branches - AssertionError: Outlier tip 'Sequence_A' (branch length = 1.0)!
175 |
176 | Results (0.07s):
177 | 15 passed
178 | 4 failed
179 | - examples/example.py:12 test_sequences_only_contains_the_characters[Sequence_B]
180 | - examples/example.py:16 test_single_base_deletions[Sequence_C]
181 | - examples/example.py:20 test_longest_stretch_of_Ns_is_10[Sequence_D]
182 | - examples/example.py:32 test_outlier_branches
183 |
184 |
185 |
186 | .. end-quickstart
187 |
188 | See docs for more information https://phytest-devs.github.io/phytest.
189 |
190 | Citation
191 | ============
192 |
193 | .. start-citation
194 |
195 | If you use phytest, please cite the following paper:
196 |
197 | Wytamma Wirth, Simon Mutch, Robert Turnbull, Sebastian Duchene, Phytest: quality control for phylogenetic analyses, Bioinformatics, Volume 38, Issue 22, 15 November 2022, Pages 5124–5125, https://doi.org/10.1093/bioinformatics/btac664
198 |
199 |
200 | .. code-block:: bibtex
201 |
202 | @article{10.1093/bioinformatics/btac664,
203 | author = {Wirth, Wytamma and Mutch, Simon and Turnbull, Robert and Duchene, Sebastian},
204 | title = "{{Phytest: quality control for phylogenetic analyses}}",
205 | journal = {Bioinformatics},
206 | volume = {38},
207 | number = {22},
208 | pages = {5124-5125},
209 | year = {2022},
210 | month = {10},
211 | issn = {1367-4803},
212 | doi = {10.1093/bioinformatics/btac664},
213 | url = {https://doi.org/10.1093/bioinformatics/btac664},
214 | eprint = {https://academic.oup.com/bioinformatics/article-pdf/38/22/5124/47153886/btac664.pdf},
215 | }
216 |
217 |
218 | .. end-citation
219 |
--------------------------------------------------------------------------------
/autodocs.sh:
--------------------------------------------------------------------------------
1 | poetry run sphinx-autobuild docs docs/_build/html --open-browser
2 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/citation.rst:
--------------------------------------------------------------------------------
1 | ============
2 | Citation
3 | ============
4 |
5 | .. include:: ../README.rst
6 | :start-after: start-citation
7 | :end-before: end-citation
8 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | # import os
14 | # import sys
15 | # sys.path.insert(0, os.path.abspath('.'))
16 |
17 |
18 | # -- Project information -----------------------------------------------------
19 |
20 | project = 'phytest'
21 | copyright = '2022, Wytamma Wirth, Simon Mutch, Robert Turnbull, Sebastian Duchene'
22 | author = 'Wytamma Wirth, Simon Mutch, Robert Turnbull, Sebastian Duchene'
23 |
24 | html_favicon = 'favicon.ico'
25 |
26 | # The full version, including alpha/beta/rc tags
27 | release = '0.1.0'
28 |
29 |
30 | # -- General configuration ---------------------------------------------------
31 |
32 | # Add any Sphinx extension module names here, as strings. They can be
33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
34 | # ones.
35 | extensions = [
36 | "sphinx_rtd_theme",
37 | "nbsphinx",
38 | "sphinx.ext.mathjax",
39 | "sphinx.ext.githubpages",
40 | "myst_parser",
41 | "sphinx.ext.autodoc",
42 | "sphinx.ext.coverage",
43 | "sphinx.ext.napoleon",
44 | "sphinx_copybutton",
45 | "sphinx.ext.autosummary",
46 | ]
47 |
48 | github_username = 'phytest-devs'
49 | github_repository = 'phytest'
50 |
51 | html_context = {
52 | 'display_github': True,
53 | 'github_user': 'phytest-devs',
54 | 'github_repo': 'phytest',
55 | 'github_version': 'main/docs/',
56 | }
57 |
58 | # Add any paths that contain templates here, relative to this directory.
59 | templates_path = ['_templates']
60 |
61 | # List of patterns, relative to source directory, that match files and
62 | # directories to ignore when looking for source files.
63 | # This pattern also affects html_static_path and html_extra_path.
64 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
65 |
66 |
67 | # -- Options for HTML output -------------------------------------------------
68 |
69 | # The theme to use for HTML and HTML Help pages. See the documentation for
70 | # a list of builtin themes.
71 | #
72 | html_theme = 'sphinx_rtd_theme'
73 |
74 | # Add any paths that contain custom static files (such as style sheets) here,
75 | # relative to this directory. They are copied after the builtin static files,
76 | # so a file named "default.css" will overwrite the builtin "default.css".
77 | html_static_path = ['_static']
78 |
--------------------------------------------------------------------------------
/docs/development.rst:
--------------------------------------------------------------------------------
1 | ============
2 | Development
3 | ============
4 |
5 | Install poetry (https://python-poetry.org/)
6 |
7 | Clone the repository:
8 |
9 | .. code-block:: bash
10 |
11 | git clone https://github.com/phytest-devs/phytest.git && cd phytest
12 |
13 | .. code-block:: bash
14 |
15 | poetry install
16 | poetry shell
17 |
18 | Test the code with pytest:
19 |
20 | .. code-block:: bash
21 |
22 | pytest
23 |
--------------------------------------------------------------------------------
/docs/examples.rst:
--------------------------------------------------------------------------------
1 | ==============
2 | Examples
3 | ==============
4 |
5 | The Phytest organisation of GitHub contains serval example repositories (https://github.com/phytest-devs) that show how Phytest can integrate into standard phylogenetic analyses and scenarios.
6 |
7 | Nextstrain Example
8 | ------------------
9 |
10 | The Nextstrain pipeline is widely used for pathogen phylogenetic analysis.
11 | In this example we modify the Nextstrain zika-tutorial (https://github.com/nextstrain/zika-tutorial)
12 | to include testing with Phytest. This modified pipeline is available as an example repository in the phytest-devs GitHub organisation
13 | https://github.com/phytest-devs/phytest-nextstrain-example and provides an example of using Phytest for quality control in a Snakemake
14 | pipeline.
15 |
16 | Phytest is included in the pipeline to ensure the alignment and maximum likelihood tree meet explicit quality
17 | requirements before proceeding though the pipeline. Only if all the tests pass will the pipeline continue, thus savings computational resources.
18 | The resulting HTML report provides details of any failed tests so that the offending data can be removed.
19 | While Augur (the Nextstrain toolkit) has some ability to refine/filter tree and alignment files,
20 | Phytest adds highly a customizable testing framework to the pipeline that ensures the quality of the analysis.
21 |
22 | Temporal Signal Example
23 | -----------------------
24 |
25 | A repository containing the code for this example can be found at https://github.com/phytest-devs/phytest-temporal-signal-example.
26 |
27 | Temporal signal in an important prerequisite to many Bayesian phylogenetic analyses. In this example we use Phytest to ensure the
28 | data-set meets the minimum temporal signal requirements for Bayesian analyses. Temporal signal analysis can help to detect
29 | problematic sequences and potential issues before heading on to a Bayesian phylogenetic analysis e.g. with BEAST.
30 | Here, we use data from from the TempEst tutorial https://beast.community/tempest\_tutorial.
31 |
32 | TempEst is a useful program for performing temporal signal analysis, however, it is not possible to easily automate the TempEst graphical user interface.
33 | Internally, Phytest uses TimeTree to perform a root-to-tip regression, allowing users to automate temporal signal testing.
34 | The :code:`Tree.assert_root_to_tip` method is used for testing temporal signal and provides arguments for testing the
35 | coefficient of determination, estimated rate and root date. The Phytest Tree class also implements methods for exploring and plotting results.
36 |
37 | Continuous Testing Example
38 | --------------------------
39 |
40 | In this example Phytest is used to test data shared on GitHub every time the data is updated (https://github.com/phytest-devs/phytest-continuous-testing-example).
41 |
42 | Tests are run against the phylogenetic data using the Continuous Integration features that are freely available
43 | through GitHub (other services are also available). Using Phytest through GitHub Actions (https://github.com/features/actions)
44 | ensures that anytime the data changes (common during development) they still meet the requirements defined in the tests.
45 |
--------------------------------------------------------------------------------
/docs/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phytest-devs/phytest/b199e2fcc2bbd2a2e82ce71a73c472d28ea474ec/docs/favicon.ico
--------------------------------------------------------------------------------
/docs/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phytest-devs/phytest/b199e2fcc2bbd2a2e82ce71a73c472d28ea474ec/docs/images/logo.png
--------------------------------------------------------------------------------
/docs/images/logo.svg:
--------------------------------------------------------------------------------
1 |
2 |
19 |
--------------------------------------------------------------------------------
/docs/images/report.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phytest-devs/phytest/b199e2fcc2bbd2a2e82ce71a73c472d28ea474ec/docs/images/report.png
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. phytest documentation master file, created by
2 | sphinx-quickstart on Wed Apr 13 15:22:21 2022.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | .. image:: images/logo.png
7 | :alt: Phytest logo
8 |
9 | .. include:: ../README.rst
10 | :start-after: start-badges
11 | :end-before: end-badges
12 |
13 | Phytest
14 | ==================
15 |
16 | Phytest: Quality Control for Phylogenetic Analyses.
17 |
18 | Phytest is a tool for automating quality control checks on sequence, tree and metadata files during phylogenetic analyses. Phytest ensures that phylogenetic analyses meet user-defined quality control tests.
19 |
20 | ----
21 |
22 | Documentation: https://phytest-devs.github.io/phytest
23 |
24 | Code: https://github.com/phytest-devs/phytest
25 |
26 | ----
27 |
28 | .. toctree::
29 | :maxdepth: 2
30 | :caption: Contents
31 |
32 | quickstart
33 | installation
34 | testFiles
35 | usage
36 | examples
37 | reference
38 | development
39 | citation
40 |
41 | Indices and tables
42 | ==================
43 |
44 | * :ref:`genindex`
45 | * :ref:`modindex`
46 | * :ref:`search`
47 |
--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
1 | ============
2 | Installation
3 | ============
4 |
5 | Install Phytest using pip:
6 |
7 | .. code-block:: bash
8 |
9 | pip install phytest
10 |
11 |
12 | .. NOTE::
13 | Requires Python >=3.8 & <3.11
14 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 |
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | echo.
16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | echo.installed, then set the SPHINXBUILD environment variable to point
18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | echo.may add the Sphinx directory to PATH.
20 | echo.
21 | echo.If you don't have Sphinx installed, grab it from
22 | echo.https://www.sphinx-doc.org/
23 | exit /b 1
24 | )
25 |
26 | if "%1" == "" goto help
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/quickstart.rst:
--------------------------------------------------------------------------------
1 | ============
2 | Quickstart
3 | ============
4 |
5 |
6 | .. include:: ../README.rst
7 | :start-after: start-quickstart
8 | :end-before: end-quickstart
9 |
10 | .. |report image| image:: images/report.png
11 | :alt: HTML Report
12 |
--------------------------------------------------------------------------------
/docs/reference.rst:
--------------------------------------------------------------------------------
1 | =======================
2 | API Reference
3 | =======================
4 |
5 |
6 | Sequence
7 | ======================
8 |
9 | .. autoclass:: phytest.bio.sequence.Sequence
10 | :members:
11 |
12 |
13 | Alignment
14 | ======================
15 |
16 | .. autoclass:: phytest.bio.alignment.Alignment
17 | :members:
18 |
19 | Tree
20 | ======================
21 |
22 | .. autoclass:: phytest.bio.tree.Tree
23 | :members:
24 |
25 | Data
26 | ======================
27 |
28 | .. autoclass:: phytest.bio.data.Data
29 | :members:
30 |
--------------------------------------------------------------------------------
/docs/testFiles.rst:
--------------------------------------------------------------------------------
1 | ==============
2 | Writing Tests
3 | ==============
4 |
5 | Phytest is easily extendable and provides a simple interface for writing custom phylogenetic tests.
6 | The interface follows the Pytest model of testing i.e. tests are defined as Python functions (or class methods)
7 | containing assert statements that are collected and evaluated at run-time. Tests that fail are captured and reported
8 | to the user allowing for repeatable and automated testing.
9 | Phytest provides many convenient helper functions for testing phylogenetic analyses including methods for testing sequences,
10 | alignments, trees and metadata files.
11 |
12 | Phytest fixtures
13 | =================
14 |
15 | Phytest injects special fixture objects into test functions, allowing for easy evaluation and
16 | testing of phylogenetic data structures. These fixtures provide the standard Biopython (sequences and trees) and Pandas (metadata)
17 | class methods as well as special assert methods for testing these data structures.
18 |
19 | Only functions that require the fixtures will have the Pytest objects passed to them. For example consider the following tests.
20 |
21 | .. code-block:: python
22 |
23 | from phytest import Sequence
24 |
25 | def test_example(sequence: Sequence):
26 | ...
27 |
28 | Test functions must start with the keyword :code:`test_` this allows Pytest to identify and collect the tests.
29 | Fixtures are required using one of the special arguments i.e. the lower case of the class name.
30 |
31 | Here the :code:`sequence` argument is used to require the sequences passed from the command line
32 | (see below for information on how to pass files to Phytest). Phytest will identify which test functions
33 | require which fixtures and pass the Phytest objects to them for testing.
34 |
35 | Using Phytest classes for type hints is not required, however, makes for a better development experience.
36 | For example the following is a valid Phytest test and will be passed a Sequence object.
37 |
38 | .. code-block:: python
39 |
40 | def test_example(sequence):
41 | ...
42 |
43 | Fixtures can be combined to make more complex tests across multiple data types e.g.
44 |
45 | .. code-block:: python
46 |
47 | from phytest import Sequence, Tree
48 |
49 | def test_example(sequence: Sequence, tree: Tree):
50 | # test tree and sequence objects together
51 | ...
52 |
53 | Sequence
54 | ---------
55 |
56 | The Phytest Sequence class is a sub-class of the Biopython SeqRecord class. This class uses the fixture :code:`sequence`.
57 |
58 | .. code-block:: python
59 |
60 | from phytest import Sequence
61 |
62 | def test_example(sequence: Sequence):
63 | ...
64 |
65 | Any tests requiring the class will be run for every sequence in the file. For example if the fasta file below is passed to Phytest
66 | the :code:`test_example` function above would be run 4 times (Sequence_A-Sequence_D).
67 |
68 | .. code-block:: text
69 |
70 | >Sequence_A
71 | ATGAGATCCCCGATAGCGAGCTAGCGATCGCAGCGACTCAGCAGCTACAGCGCAGAGGAGAGAGAGGCCCCTATTTACTAGAGCTCCAGATATAGNNTAG
72 | >Sequence_B
73 | ATGAGATCCCCGATAGCGAGCTAGXGATCGCAGCGACTCAGCAGCTACAGCGCAGAGGAGAGAGAGGCCCCTATTTACTAGAGCTCCAGATATAGNNTAG
74 | >Sequence_C
75 | ATGAGA--CCCGATAGCGAGCTAGCGATCGCAGCGACTCAGCAGCTACAGCGCAGAGGAGAGAGAGGCCCCTATTTACTAGAGCTCCAGATATAGNNTAG
76 | >Sequence_D
77 | ATGAGATCCCCGATAGCGAGCTAGCGATNNNNNNNNNNNNNNNNNTACAGCGCAGAGGAGAGAGAGGCCCCTATTTACTAGAGCTCCAGATATAGNNTAG
78 |
79 | .. code-block:: bash
80 |
81 | $ phytest test.py --sequence sequences.fasta
82 |
83 | Test session starts (platform: darwin, Python 3.9.12, pytest 7.1.1, pytest-sugar 0.9.4)
84 | rootdir: /Users/wytamma/programming/phytest, configfile: pyproject.toml
85 | plugins: sugar-0.9.4, html-3.1.1, cov-3.0.0
86 | collecting ...
87 | test.py ✓✓✓✓ 100% ██████████
88 |
89 | Results (0.03s):
90 | 4 passed
91 |
92 |
93 | Alternative file formats can be specified using the :code:`--sequence-format` flag.
94 |
95 | Alignment
96 | ---------
97 |
98 | The Phytest Alignment class is a sub-class of the Biopython MultipleSeqAlignment class. This class uses the fixture :code:`alignment`.
99 |
100 | .. code-block:: python
101 |
102 | from phytest import Alignment
103 |
104 | def test_example(alignment: Alignment):
105 | ...
106 |
107 | Tests using the alignment file will be run once i.e. you will have access to the entire alignment during the test.
108 | Alignments are also passed to Phytest using the :code:`--sequence` flag however they are required to be valid
109 | alignments e.g. all sequence must be the same length.
110 |
111 | .. code-block:: bash
112 |
113 | phytest test.py --sequence sequences.fasta
114 |
115 | Test session starts (platform: darwin, Python 3.9.12, pytest 7.1.1, pytest-sugar 0.9.4)
116 | rootdir: /Users/wytamma/programming/phytest, configfile: pyproject.toml
117 | plugins: sugar-0.9.4, html-3.1.1, cov-3.0.0
118 | collecting ...
119 | test.py ✓ 100% ██████████
120 |
121 | Results (0.02s):
122 | 1 passed
123 |
124 |
125 | Alternative file formats can be specified using the :code:`--sequence-format` flag.
126 |
127 | Tree
128 | -----
129 |
130 | The Phytest Tree class is a sub-class of the Biopython Tree class. This class uses the fixture :code:`tree`.
131 |
132 | .. code-block:: python
133 |
134 | from phytest import Tree
135 |
136 | def test_example(tree: Tree):
137 | ...
138 |
139 | Tests using the tree fixture will be run once per tree in the file. Tree files are passed to Phytest using the :code:`--tree` flag.
140 |
141 | .. code-block:: text
142 |
143 | (Sequence_A:1,Sequence_B:0.2,(Sequence_C:0.3,Sequence_D:0.4):0.5);
144 | (Sequence_A:1,Sequence_B:0.3,(Sequence_C:0.3,Sequence_D:0.4):0.5);
145 |
146 |
147 | .. code-block:: bash
148 |
149 | phytest test.py --tree tree.newick
150 |
151 | Test session starts (platform: darwin, Python 3.9.12, pytest 7.1.1, pytest-sugar 0.9.4)
152 | rootdir: /Users/wytamma/programming/phytest, configfile: pyproject.toml
153 | plugins: sugar-0.9.4, html-3.1.1, cov-3.0.0
154 | collecting ...
155 | test.py ✓✓ 100% ██████████
156 |
157 | Results (0.02s):
158 | 2 passed
159 |
160 | Alternative file formats can be specified using the :code:`--tree-format` flag.
161 |
162 | Data
163 | -----
164 |
165 | The Phytest Data class is a sub-class of the Pandas DataFrame class. This class uses the fixture :code:`data`.
166 |
167 | .. code-block:: python
168 |
169 | from phytest import Data
170 |
171 | def test_example(data: Data):
172 | ...
173 |
174 | Tests using the data file will be run once. Data files are passed to Phytest using the :code:`--data` flag.
175 |
176 | .. code-block:: bash
177 |
178 | phytest test.py --data metadata.csv
179 |
180 | Test session starts (platform: darwin, Python 3.9.12, pytest 7.1.1, pytest-sugar 0.9.4)
181 | rootdir: /Users/wytamma/programming/phytest, configfile: pyproject.toml
182 | plugins: sugar-0.9.4, html-3.1.1, cov-3.0.0
183 | collecting ...
184 | test.py ✓ 100% ██████████
185 |
186 | Results (0.02s):
187 | 1 passed
188 |
189 |
190 | Alternative file formats can be specified using the :code:`--data-format` flag.
191 |
192 |
193 | Built-in asserts
194 | =================
195 |
196 | Phytest provides many convenient helper functions for testing phylogenetic analyses including methods for testing sequences,
197 | alignments, trees and metadata files.
198 |
199 | .. code-block:: python
200 |
201 | from phytest import Sequence
202 |
203 | def test_GC_content(sequence: Sequence):
204 | sequence.assert_percent_GC(38)
205 |
206 | For example, the Phytest Sequence class implements the method :code:`Sequence.assert_percent_GC`.
207 | Calling this method with the expected GC-content e.g. :code:`sequence.assert_percent_GC(38)` will
208 | raise an error if the percent of G and C nucleotides in the sequence is not equal to 38%.
209 | Many methods also provide maximum and minimum arguments so the upper and lower bounds can be tested
210 | e.g. :code:`sequence.assert_percent_GC(min=30, max=40)`.
211 |
212 | .. code-block:: python
213 |
214 | from phytest import Sequence
215 |
216 | def test_GC_content(sequence: Sequence):
217 | sequence.assert_percent_GC(min=30, max=40)
218 |
219 | All Phytest assert methods also provide a warning flag e.g. :code:`sequence.assert_percent_GC(38, warn=True)`
220 | causing the method to raise a warning instead of an error if the test fails. In an automated pipeline,
221 | this provides a way to inform the user of potential problems without causing the pipeline to fail.
222 | The warning flag can be set automatically by calling the method with the :code:`warn_` prefix instead
223 | of :code:`assert_` e.g. :code:`sequence.warn_percent_GC(38)`.
224 |
225 | .. code-block:: python
226 |
227 | from phytest import Sequence
228 |
229 | def test_GC_content(sequence: Sequence):
230 | sequence.warn_percent_GC(38)
231 |
232 | See the documentation for a full list of built-in assert methods (https://phytest-devs.github.io/phytest/reference.html).
233 |
234 |
235 | Custom asserts
236 | =================
237 |
238 | As Phytest is running Pytest under the hood it is trivial to write your own custom asserts using the Phytest fixtures.
239 |
240 | .. code-block:: python
241 |
242 | def test_outlier_branches(tree: Tree):
243 | # Here we create a custom function to detect outliers
244 | import statistics
245 |
246 | tips = tree.get_terminals()
247 | branch_lengths = [t.branch_length for t in tips]
248 | cut_off = statistics.mean(branch_lengths) + statistics.stdev(branch_lengths)
249 | for tip in tips:
250 | assert tip.branch_length < cut_off, f"Outlier tip '{tip.name}' (branch length = {tip.branch_length})!"
251 |
--------------------------------------------------------------------------------
/docs/usage.rst:
--------------------------------------------------------------------------------
1 | ==============
2 | Running Tests
3 | ==============
4 |
5 | Phytest has been developed as a command-line interface, Python module, and Pytest plugin, providing multiple methods of invocation.
6 |
7 | Phytest CLI
8 | ===========
9 |
10 | Phytest provides a command line interface (CLI) for running testing on specific data files.
11 |
12 | .. code-block:: python
13 |
14 | from phytest import Sequence
15 |
16 | def test_gc_content(sequence: Sequence):
17 | sequence.assert_percent_GC(
18 | min=30,
19 | max=40
20 | )
21 |
22 | The Phytest CLI requires a path to the file containing user defined tests and has optional flags for specifying sequence/alignment, tree and metadata files:
23 |
24 | .. code-block:: bash
25 |
26 | phytest test.py --sequence sequences.fasta --tree tree.newick --data metadata.csv
27 |
28 | Alternative file formats can be specified with :code:`--sequence-format`, :code:`--tree-format`, :code:`--data-format` flags.
29 | Supported formats include those supported by Biopython (sequences and trees) and TSV and Excel (metadata).
30 |
31 | Phytest Module
32 | ================
33 |
34 | The Phytest module can be imported into script so that tests are self-contained i.e. data files are specified in the tests.
35 |
36 | .. code-block:: python
37 |
38 | import phytest
39 |
40 | def test_gc_content(sequence: phytest.Sequence):
41 | sequence.assert_percent_GC(
42 | min=30,
43 | max=40
44 | )
45 |
46 | if __name__ == "__main__":
47 | sys.exit(phytest.main(sequence='examples/data/ice_viruses.fasta'))
48 |
49 | This test style uses :code:`if __name__ == "__main__"` python convention to only run the tests when invoked from the command line using the python command.
50 |
51 | .. code-block:: bash
52 |
53 | python test.py
54 |
55 | The :code:`phytest.main` function will run the tests and return a exit status (0 ir 1) that is passed to :code:`sys.exit` to ensure the test exit correctly.
56 |
57 |
58 | Pytest Plugin
59 | ================
60 |
61 | Phytest can also be used as a Pytest plugin. Simply install Phytest and then run Pytest on the test file with the appropriate flags.
62 |
63 | .. code-block:: bash
64 |
65 | pytest test.py --sequence sequences.fasta
66 |
67 | .. NOTE::
68 | Short hand flags must be capitalised when running Phytest through Pytest e.g. :code:`pytest test.py -S sequences.fasta`
69 |
--------------------------------------------------------------------------------
/examples/data/example.csv:
--------------------------------------------------------------------------------
1 | name,date
2 | Sequence_A,2020-05-04
3 | Sequence_B,2020-05-04
4 | Sequence_C,2020-05-04
5 | Sequence_D,2020-05-04
6 |
--------------------------------------------------------------------------------
/examples/data/example.fasta:
--------------------------------------------------------------------------------
1 | >Sequence_A
2 | ATGAGATCCCCGATAGCGAGCTAGCGATCGCAGCGACTCAGCAGCTACAGCGCAGAGGAGAGAGAGGCCCCTATTTACTAGAGCTCCAGATATAGNNTAG
3 | >Sequence_B
4 | ATGAGATCCCCGATAGCGAGCTAGXGATCGCAGCGACTCAGCAGCTACAGCGCAGAGGAGAGAGAGGCCCCTATTTACTAGAGCTCCAGATATAGNNTAG
5 | >Sequence_C
6 | ATGAGA--CCCGATAGCGAGCTAGCGATCGCAGCGACTCAGCAGCTACAGCGCAGAGGAGAGAGAGGCCCCTATTTACTAGAGCTCCAGATATAGNNTAG
7 | >Sequence_D
8 | ATGAGATCCCCGATAGCGAGCTAGCGATNNNNNNNNNNNNNNNNNTACAGCGCAGAGGAGAGAGAGGCCCCTATTTACTAGAGCTCCAGATATAGNNTAG
9 |
--------------------------------------------------------------------------------
/examples/data/example.tree:
--------------------------------------------------------------------------------
1 | (Sequence_A:1,Sequence_B:0.2,(Sequence_C:0.3,Sequence_D:0.4):0.5);
2 |
--------------------------------------------------------------------------------
/examples/data/example.tsv:
--------------------------------------------------------------------------------
1 | name date
2 | Sequence_A 4/5/2020
3 | Sequence_B 4/5/2020
4 | Sequence_C 4/5/2020
5 | Sequence_D 4/5/2020
6 |
--------------------------------------------------------------------------------
/examples/data/example.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phytest-devs/phytest/b199e2fcc2bbd2a2e82ce71a73c472d28ea474ec/examples/data/example.xlsx
--------------------------------------------------------------------------------
/examples/data/ice_viruses.fasta:
--------------------------------------------------------------------------------
1 | >A.Fiji.15899.83.AJ289702_1983
2 | GTCAACCTACTTGAGGACAATCACAACGGGAAACTATGCAAACTAAAAGGAATAGCGCCACTACAATTGGGGAAATGCAACATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGTTTTCTAAGAAATCATGGTCCTACATTGCAGAAACACCAAACTCCGAGAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAAGAACTGAGGGAACAATTGAGCTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGTTCATGGCCCAACCACAACGTAACCAAAGGAGTAACGGCATCATGTTCCCATAAGGGGAAAAGCAGTTTTTACAGAAACTTGCTATGGCTGACGGAGAAAAATGGCTCGTACCCAAATCTGAGCAAGTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT
3 | >A.Kiev.59.1979.M38353_1979
4 | GTCAACCTACTTGAGGACAGTCACAACGGAAAACTATGCAGACTGAAAGGAATAGCTCCACTACAATTGGGGAAATGCAGCATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGTTTTCTAAGAAATCATGGTCCTACATTGCAGAAACACCAAACTCCGAGAATGGAACATGTTACCCAGGATATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGATCATGGCCCAAACACAACGTAACCAGAGGAGTAACGGCATCATGCTCCCATAAGGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAAAATGGCTCGTACCCAAATCTGAGCAAGTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT
5 | >A.FortMonmouth.1.1947.U02085_1947
6 | GTAAACCTACTCGAAGACAGCCACAACGGGAAATTATGCAGATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGCTTTCTAAGAGATCATGGTCCTACATTGCAGAAACACCAAACTCTGAGAATGGAGCATGTTACCCAGGAGATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTGTCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGATCATGGCCCAAACACAACATAACCAGAGGAGTAACGGCAGCATGCTCCCATGCGGGGAAAAGCAGTTTTTACAAAAATTTGCTCTGGCTGACGGAGACAGATGGCTCATACCCAAAGCTGAGCAAGTCCTATGTGAACAATAAAGAGAAAGAAGTCCTTGTGCTATGGGGTGTTCATCACCCGTCT
7 | >A.BrevigMission.1.18.AF116575_1918
8 | GTTAACCTGCTCGAAGACAGCCACAACGGAAAACTATGTAAATTAAAAGGAATAGCCCCATTACAATTGGGGAAATGTAATATCGCCGGATGGCTCTTGGGAAACCCGGAATGCGATTTACTGCTCACAGCGAGCTCATGGTCCTATATTGTAGAAACATCGAACTCAGAGAATGGAACATGTTACCCAGGAGATTTCATCGACTATGAAGAACTGAGGGAGCAATTGAGCTCAGTGTCATCGTTCGAAAAATTCGAAATATTTCCCAAGACAAGCTCGTGGCCCAATCATGAAACAACCAAAGGTGTAACGGCAGCATGCTCCTATGCGGGAGCAAGCAGTTTTTACAGAAATTTGCTGTGGCTGACAAAGAAGGGAAGCTCATACCCAAAGCTTAGCAAGTCCTATGTGAACAATAAAGGGAAAGAAGTCCTTGTACTATGGGGTGTTCATCATCCGCCT
9 | >A.SouthCarolina.6.1988.L19025_1988
10 | GTCAACCTACTTGAGGACAGTCACAACGGAAAACTATGTCGACTAAAAGGAATAGCCCCACTACAATTGGGTAATTGCAGCATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGTTTTCTAAGAAATCATGGTCCTACATTGCAGAAACACCAAACTCCGAGAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAGGAACTGAGGGAGCAGTTGAGTTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAAGAAAGCTCATGGCCCAACCACACCGTAACCAAAGGAGTAACGGCATCATGCTCCCATAAGGGGAGAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGAATGGCTTGTACCCAAATCTGAGCAAGTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGCGTTCATCACCCGTCT
11 | >A.Yamagata.32.1989.D31949_1989
12 | GTCAACCTACTTGAGGACAGTCACAACGGAAAACTATGTCGACTAAAAGGAATAGCCCCACTACAATTGGGTAATTGCAGCATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGTTTTCTAAGGAATCATGGTCCTACATTGCAGAAACACCAAACTCCGAGAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAAGAAAGATCATGGCCCAACCACACCGTAACCAAAGGAGTAACGGCATCATGCTCCCATAATGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGAATGGCTTGTACCCAAATCTGAGCAAGTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT
13 | >A.Finland.4.1984.L33491_1984
14 | GTCNACCTACTTGAGGACAGTCACAACGGAAAACTATGCAGACTAAAAGGAATAGCCCCACTACAATTGGGGAAATGCAGCATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGTTTTCTAAGAAATCATGGTCCTACATTGCAGAAACACCAAACTCCGAGAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCATNNTTCGAGAGATTCGAAATATTCCCCAAGGAAAGTTCATGGCCCAAACACAACATAACCAAAGGAGTAACGGCATCATGCTCCCATAAGGGNAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAAAATGGCTTGTACCCAAATCTGAGCAAGTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT
15 | >A.USSR.90.1977.K01331_1977
16 | GTCAACCTACTTGAGGACAGTCACAACGGAAAACTATGCAGACTAAAAGGGATAGCCCCACTACAATTGGGGAAATGCAACATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGTTTTCTAAGAAATCATGGTCCTACATTGCAGAAACACCAAACTCCGAGAATGGAACATGTTACCCAGGATATTTCGCCGACTATGAGGAATTGAGGGAGCAATTGAGCTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGATCATGGCCCAAACACAACGTAACCAGAGGCGTAACGGCATCATGCTCCCATAAGGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAAAATGGCTCGTACCCAAATCTGAGCAAGTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT
17 | >A.Lepine.1948.AB043479_1948
18 | GTCAACCTACTCGAGGACAGTCACAACGGAAAACTATGCAGACTAAAAGGAATAGCCCCACTACAATTGGGGAAATGCAACATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGTTTTCTAAGAAATCATGGTCCTACATTGCAGAAACACCAAACTCTGAGAATGGAACATGTTACCCAGGATATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTTTCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGGTCATGGCCCAAACACAACGTAACCAGAGGAGTAACGGCAGCATGCTCCCATAAGGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAAAATGGCTCGTACCCAACTCTGAGCAAGTCCTATGTGAACAATAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCAACCGTCT
19 | >A.Meguro.1.1956.AB043485_1956
20 | GTAAACCTACTCGAAGACAGCCACAATGGGAAATTATGCAGATTAAAAGGAAAAGCCCCACTACAATTGGGGAAATGTAACATTGCCGGATGGGTCTTAGGAAACCCAGAATGTGAATCATTGCTTTCCAATAGATCATGGTCCTACATTGCAGAAACACCAAACCCTGAGAATGGGACATGTTACCCAGGAGATTTCACCAACTATGAGGAACTGAAGGAGCAATTGAGCTCAGTGTCATCACTCGAGAGATTCGAAATATTCCCCAAGGAAAGATCATGGCCCAAACACAACACAACCAGAGGAGTGACGGCAGCATGCTCCCATGCGAGGAAAAGCAGTTTCTACAAAAATTTGGTCTGGCTGACGGAGGCAAATGGCTCATACCCAACTCTGAGCAATTCCTATGTGAACAATCAAGAGAAAGAAGTCCTTGTGCTATGGGGTGTTCATCACCCGTCT
21 | >A.Kojiya.1.1952.AB043482_1952
22 | GTAAACCTACTCGAAGACAGCCACAATGGGAAATTATGCAGATTAAAAGGAAAAGCCCCACTACAATTGGGGAAATGTAACATTGCCGGATGGGTCTTAGGAAACCCAGAATGCGAATCATTGCTTTCCAATAGATCATGGTCCTACATTGCAGAAACACCAAACTCTGAGAATGGGACATGTTACCCAGGAGATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGATCATGGCCCAAACACAACACAACCAGAGGAGTGACGGCAGCATGCTCCCATGCGGGGAAAAGCAGTTTTTACAAAAATTTGGTCTGGCTGACGGAGGCAAATGGCTCATACCCAAATCTGAGCAAGTCCTATGTGAACAATCAAGAGAAAGAAGTCCTTGTGCTATGGGGTGTTCATCACCCGTCT
23 | >A.Huston.43.AF494251_1943
24 | GTCAACCTACTCGAAGACAGCCACAACGGGAAATTATGTAGATTAAAAGGAATAGCCCCACTACAATTGAGGAAATGTAACATTGCTGGATGGATCCTGGGAAACCCAGAATGCGAATCACTGCTTTCAGAGAGATCATGGTCCTACATTGTTGAAACACCAAACTCTGAGAATGGAACATGTTACCCAGGAGATTTTACCAACTATGAGGAATTGAGGGAGCAATTGAGCTCTGTATCATCATTCGAAAGATTCGAAATATTCCCCAAGGAAAGCTCATGGCCCAAACACAACACAACCAGAGGAGTAACGGCAGCATGCTCCCATGCGGGAAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGGATGGCTCATATCCGAATCTGAACAATTCCTATGTGAACAAGAAAGGGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCC
25 | >A.BuenosAires.T114.97.AF534026_1997
26 | GTCAACCTACTTGAGGACAGTCACAACGGAAAACTATGCCGACTAAAAGGAACAGCCCCACTACAATTGGGTAATTGCAGCATTGCCGGATGGATCTTAGGAAATCCAGAATGCGAATCACTGTTTTCTAGGGAATCATGGTCCTACATTGCAGAAACACCAAACCCTGAAAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTATCATCATTCGAGAGATTTGAAATATTCCCCAAGGAAAGCTCATGGCCCAACCACACCGTAACCAAAGGAGTGACGGCATCATGCTCCCATAATGGGAAAAGCAGCTTTTACAAAAATTTGCTATGGCTGACGGAGAAGAATGGCTTGTACCCAAATCTGAGCAAGTCCTATGTAAACAACAAGGGGAAAGAAGTCCTTGTGCTATGGGGTGTTCATCACCCGTCT
27 | >A.PuertoRico.8.34.J02144_1934
28 | GTTAACCTGCTCGAAGACAGCCACAACGGAAAACTATGTAGATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATCGCCGGATGGCTCTTGGGAAACCCAGAATGCGACCCACTGCTTCCAGTGAGATCATGGTCCTACATTGTAGAAACACCAAACTCTGAGAATGGAATATGTTATCCAGGAGATTTCATCGACTATGAGGAGCTGAGGGAGCAATTGAGCTCAGTGTCATCATTCGAAAGATTCGAAATATTTCCCAAAGAAAGCTCATGGCCCAACCACAACACAAAC---GGAGTAACGGCAGCATGCTCCCATGAGGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGGAGGGCTCATACCCAAAGCTGAAAAATTCTTATGTGAACAAAAAAGGGAAAGAAGTCCTTGTACTGTGGGGTATTCATCACCCGCCT
29 | >A.PuertoRico.8.1934.J04572_1934
30 | GTTAACCTGCTCGAAGACAGCCACAACGGAAAACTATGTAGATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATCGCCGGATGGCTCTTGGGAAACCCAGAATGCGACCCACTGCTTCCAGTGAGATCATGGTCCTACATTGTAGAAACACCAAACTCTGAGAATGGAATATGTTATCCAGGAGATTTCATCGACTATGAGGAGCTGAGGGAGCAATTGAGCTCAGTGTCATCATTCGAAAGATTCGAAATATTTCCCAAAGAAAGCTCATGGCCCAACCACAACACAACCAAAGGAGTAACGGCAGCATGCTCCCATGCGGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGGAGGGCTCATACCCAAAGCTGAAAAATTCTTATGTGAACAAGAAAGGGAAAGAAGTCCTTGTACTGTGGGGTATTCATCACCCGTCT
31 | >A.PuertoRico.8.1934.EF467821_1934
32 | GTTAACCTGCTCGAAGACAGCCACAACGGAAAACTATGTAGATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATCGCCGGATGGCTCTTGGGAAACCCAGAATGCGACCCACTGCTTCCAGTGAGATCATGGTCCTACATTGTAGAAACACCAAACTCTGAGAATGGAATATGTTATCCAGGAGATTTCATCGACTATGAGGAGCTGAGGGAGCAATTGAGCTCAGTGTCATCATTCGAAAGATTCGAAATATTTCCCAAAGAAAGCTCATGGCCCAACCACAACACAAAC---GGAGTAACGGCAGCATGCTCCCATGAGGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGGAGGGCTCATACCCAAAGCTGAAAAATTCTTATGTGAACAAAAAAGGGAAAGAAGTCCTTGTACTGTGGGGTATTCATCACCCGCCT
33 | >A.Mongolia.153.1988.Z54287_1988
34 | GTTAACCTGCTCGAAGACAGCCACAACGGAAAACTATGTAGATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATCGCCGGATGGCTCTTGGGAAACCCAGAATGCGACCCACTGCTTCCAGTGAGATCATGGTCCTACATTGTAGAAACACCAAACTCTGAGAATGGAATATGTTATCCAGGAGATTTCATCGACTATGAGGAGCTGAGGGAGCAATTGAGCTCAGTGTCATCATTCGAAAGATTCGAAATATTTCCCAAAGAAAGCTCATGGCCCAACCACAACACAAAC---GGAGTAACGGCAGCATGCTCCCATGAGGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGGAGGGCTCATACCCAAAGCTGAAAAATTCTTATGTGAACAAAAAAGGGAAAGAAGTCCTTGTACTGTGGGGTATTCATCACCCGCCT
35 | >A.Mongolia.111.1991.Z54288_1991
36 | GTTAACCTGCTCGAAGACAGCCACAACGGAAAACTATGTAGATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATCGCCGGATGGCTCCTGGGAAACCCAGAATGCGACCCACTGCTTCCAGTGAGATCATGGTCCTACATTGTAGAAACACCAAACTCTGAGAATGGAATATGTTATCCAGGAGATTTCATCGACTATGAGGAGCTGAGGGAGCAATTGAGCTCAGTGTCATCATTCGAAAGATTCGAAATATTTCCCAAAGAAAGCTCATGGCCCAACCACAACACAAAC---GGAGTAACGGCAGCATGCTCCCATGAGGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGGAGGGCTCATACCCAAAGCTGAAAAATTCTTATGTGAACAAAAAAGGGAAAGAAGTCCTTGTACTGTGGGGTATTCATCACCCGCCT
37 | >A.Saga.2.1957.AB043486_1957
38 | GTAAACCTACTCGAAGACAGCCACAATGGGAAATTATGCAGATTAAAAGGAAAAGCCCCACTACAATTGGGGAACTGTAACATTGCCGGATGGGTCTTAGGAAACCCAGAATGTGAATCATTGCTTTCCGGTAGATCATGGTCCTACATTGCAGAAACACCAAACTCTGAGAATGGGACGTGCTACCCAGGGGATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGATCATGGCCCAACCAC---ACAACCAGAGGAGTGACGGCAGCATGCCCCCATGCGAAGAAAAGCAGTTTTTACAAAAATTTGGTCTGGCTGACGGAGGCAAATGGCTCATACCCAAATCTGAGCAGGTCCTATGTGAACAATCAGGAGAAAGAAGTCCTTGTGCTATGGGGAGTTCATCACCCGTCT
39 | >A.Beijing.262.95.AY289928_1995
40 | GTCAACCTACTTGAGGACAGTCACAATGGAAAACTATGTCTACTAAAAGGAATAGCCCCACTACAATTGGGTAATTGCAGCGTTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGATTTCTAAGGAATCATGGTCCTACATTGTAGAGACACCAAACCCTGAGAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCATCATTTGAGAGATTCGAAATATTCCCCAAAGAAAGCTCATGGCCCAAACACACCGTAACA---GGAGTAACGGCATCATGCTCCCATAATGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGAATGGCTTGTACCCAAATCTGAGCAATTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCATCT
41 | >A.Alaska.1173.00.AY029287_2000
42 | GTCAACCTACTTGAGGACAGTCACAATGGAAAACTATGTCTACTAAAAGGAATAGCCCCACTACAATTGGGTAATTGCAGCGTTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATTACTGATTTCCAAGGAATCATGGTCCTACATTGTAGAAACACCAAATCCTGAGAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCTTCATTTGAGAGATTCGAAATATTCCCCAAAGAAAGCTCATGGCCCAACCACACCGTAACC---GGAGTATCAGCATCATGCTCCCATAATGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGGGAAGAATGGTTTGTACCCAAGCCTGAGCAAGTCCTATGCAAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGCCT
43 | >A.Tokyo.3.1967.U38242_1967
44 | GTTAACCTGCTCGAAGACAGCCACAACGGGAAACTATGTAAATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATCGCCGGATGGCTCTTGGGAAACCCAGAATGCGACTCACTGCTTCCAGCGAGATCATGGTCCTACATTGTAGAAACACCAAACTCTGAGAATGGAGCATGTTATCCAGGAGATTTCATCGACTATGAGGAACTGAAGGAGCAATTGAGCTCAGTATCATCATTAGAAAGATTCGAAATATTTCCCAAGGAAAGTTCATGGCCCAACCACAACACACTCAAAGGAGTAACAGCATCATGCTCCCATGGGGGAAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGAAAACGGAGGACTCATACCCAAAGCTGAGCAATTCCTATGTGAACAATAAAGGGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT
45 | >A.WS.33.U08904_1933
46 | GTTAACCTGCTCGAAGACAGCCACAACGGGAAACTATGTAAATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATCGCCGGATGGCTCTTGGGAAACCCAGAATGCGACTCACTGCTTCCAGCGAAATCATGGTCCTACATTGTAGAAACACCAAACTCTGAGAATGGAGCATGTTATCCAGGAGATTTCATCGACTATGAGGAACTGAAGGAGCAATTGAGCTCAGTATCATCATTAGAAAGATTCGAAATATTTCCCAAGGAAAGTTCATGGCCCAACCACAACACACTCAAAGGAGTAACAGCAGCATGCTCCCATAGGGGAAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGAAAACGGGGGACTCATACCCAAAGCTGAACAATTCCTATGTGAACAATAAAGGGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT
47 | >A.WSN.1933.CY010788_1933
48 | GTTAACCTGCTCGAAGACAGACACAACGGGAAACTATGTAAATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATCACCGGATGGCTCTTGGGAAATCCAGAATGCGACTCACTGCTTCCAGCGAGATCATGGTCCTACATTGTAGAAACACCAAACTCTGAGAATGGAGCATGTTATCCAGGAGATTTCATCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTATCATCATTAGAAAGATTCGAAATATTTCCCAAGGAAAGTTCATGGCCCAACCACACATTCAAC---GGAGTAACAGTATCATGCTCCCATAGGGGAAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGAAGAAGGGGGATTCATACCCAAAGCTGACCAATTCCTATGTGAACAATAAAGGGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT
49 | >A.WilsonSmith.1933.DQ508905_1933
50 | GTTAACCTGCTCGAAGACAGACACAACGGGAAACTATGTAAATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATCACCGGATGGCTCTTGGGAAATCCAGAATGCGACTCACTGCTTCCAGCGAGATCATGGTCCTACATTGTAGAAACACCAAACTCTGAGAATGGAGCATGTTATCCAGGAGATTTCATCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTATCATCATTAGAAAGATTCGAAATATTTCCCAAGGAAAGTTCATGGCCCAACCACACATTCAAC---GGAGTAACAGCATCATGCTCCCATAGGGGAAAAAGCAGTTTTTACAGAAATTTGATATGGCTGACGAAGAAGGGGGATTCATACCCAAAGCTGACCAATTCCTATGTGAACAATAAAGGGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT
51 | >A.goose.HongKong.8.1976.U46021_1976
52 | GTTAATTTACTCGAAAACAGCCATAATGGAAAACTCTGCAGCCTGAATGGAATAGCCCCTTTACAACTAGGGAAATGCAACGTGGCGGGGTGGCTCCTTGGCAACCCAGAATGTGACCTGCTGCTCACTGCGAGTTCATGGTCCTACATAATAGAGACTTCAAATTCAGAAAACGGAACATGCTACCCCGGAGAGTTCATTGATTATGAAGAGTTAAGGGAACAGCTAAGTTCAGTGTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAATCTCATGGCCAAATCATGAAACAACCAAAGGTGTCACAGCTGCATGCTCTTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTATGGATAACAAAGAAAGGAACTTCCTACCCTAAACTCAGCAAATCATACACGAACAACAAAGGGAAAGAAGTGCTTGTACTTTGGGGGGTGCATCATCCTCCA
53 | >A.duck.Australia.749.80.AF091312_1980
54 | GTTAATTTACTTGAAAACAGCCATAATGGAAAACTTTGCAGCCTGAATGGAATAGCCCCATTACAACTAGGGAAATGCAACGTGGCGGGGTGGCTCCTTGGCAACCTAGAATGTGACCTGTTGCTCACTGCGAATTCATGGTCTTATATAATAGAGACTTCAAATTCAGAAAACGGAACATGTTACCCCGGGGAGTTCATCGATTATGAGGAATTAAGAGAACAGCTAAGTTCAGTGTCTTCATTTGAGAAATTTGAAATTTTCCCGAAGGCAAGCTCATGGCCAAATCATGAGACAACCAAAGGTGTCACAGCTGCATGCTCTTACTTGGGAGCTAGCAGCTTTTATCGGAATTTGCTATGGATGACAAAGAAGGGAACTTCCTATCCTAAACTCAGCAAATCATATACGAACAACAAAGGGAAAGAAGTGCTTGTACTTTGGGGGGTGCATCACCCTCCC
55 | >A.mallard.Tennessee.11464.85.AF091311_1985
56 | GTGAATTTGCTCGAAGACAGCCATAATGGGAAACTCTGCAGTCTGAACGGGATAGCTCCCCTACAATTGGGGAAGTGCAATGTAGCGGGATGGCTCCTTGGCAATCCAGAGTGTGACCTTCTACTCACTGCAAACTCATGGTCCTACATAATAGAGACTTCCAATTCAGAAAACGGGACATGCTACCCCGGTGAATTCATAGATTATGAAGAATTAAGAGAGCAGCTAAGTTCAGTTTCTTCATTTGAAAGGTTTGAAATTTTCCCGAAGGCAAACTCATGGCCAAATCATGAGACAACTAAAGGTGTTACAGCCGCATGCTCTTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTGTGGATAACAAAGAAGGGAACGTCATATCCAAAACTCAGCAAATCATACACGAACAATAAAGGGAAAGAAGTACTCGTGCTCTGGGGAGTGCACCACCCTCCA
57 | >A.duck.Alberta.35.76.U47310_1976
58 | GTGAATTTGCTCGAAGACAGCCATAATGGGAAACTCTGCAGCCTAAACGGGATACCTCCCCTACAACTGGGAAAGTGCAATGTGGCGGGATGGCTCCTGGGCAATCCAGAGTGTGATCTTCTACTCACTGCAAACTCATGGTCCTACATAATAGAAACTTCAAACTCAGAAAACGGAACATGCTACCCCGGTGAATTCATAGATTATGAAGAATTAAGAGAGCAGCTAAGTTCAGTTTCTTCATTTGAAAAATTTGAAATTTTCCCGAAGGCAAGCTCATGGCCAAATCATGAGACAACTAAAGGTGTTACAGCTGCATGCTCTTACTCTGGAGCCAGCAGTTTTTACCGGAATTTGCTGTGGATAACAAAGAAAGGGACTTCATATCCAAAACTCAGCAAATCATACACGAACAATAAGGGGAAAGAAGTGCTTGTGCTCTGGGGGGTGCACCACCCTCCA
59 | >A.duck.HK.196.1977.D00839_1977
60 | GTTAATTTACTAGAAAACAGCCATAATGGAAAACTCTGCAGACTGAATGGAATAGCCCCCTTACAGCTAGGGAAATGCAACGTGGCAGGATGGATCCTTGGCAACCCAGAGTGTGATCTATTGCTCACAGCGAATTCATGGTCTTACATAATAGAGACTTCAAATTCAGAGAATGGAACATGCTACCCCGGAGAGTTCAATGATTATGAAGAATTAAGGGAACAGCTGAGTTCGGTGTCTTCATTTGAAAAGTTTGAAATTTTCCCAAAGGCTAGCTCATGGCCAAATCATGAGACAACTAAAGGTATTACAGCTGCATGTCCTTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTATGGATAACAAAGAAGGGAACTTCATACCCTAAACTCAGCAAATCATACACAAACAACAAAGGGAAAGAAGTGCTTGTAATCTGGGGAGTGCACCACCCTCCA
61 | >A.teal.Alberta.141.1992.CY004539_1992
62 | GTGAATTTGCTCGAAGACAGCCATAATGGGAAACTCTGCAGCCTGAACGGGATAGCTCCTCTACAATTGGGGAAGTGCAATGTAGCGGGGTGGCTCCTGGGCAACCCAGAATGTGACCTTCTACTCACTGCAAACTCATGGTCCTATATAATAGAGACTTCCAATTCAGAGAATGGGACATGCTATCCCGGTGAGTTCATAGATTATGAAGAATTAAGAGAGCAGCTGAGTTCAGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCGAAGGCAAGCTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCTTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTGTGGATAACAAAGAAGGGAACTTCATATCCAAAACTCAGCAAATCATACACAAACAATAAGGGGAAGGAAGTGCTCGTGCTCTGGGGAGTGCACCACCCTCCA
63 | >A.pintail.Ohio.25.1999.CY017725_1999
64 | GTGAATTTGCTCGAAAACAGCCATAATGGGAAACTCTGCAGCCTGAATGGGATAGCTCCTTTACAATTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTTTACTCACTGCAAACTCATGGTCCTATATAATAGAGACTTCCAATTCAGAGAACGGAACATGCTATCCCGGTGAGTTCATAGATTATGAGGAATTAAGAGAGCAATTGAGTTCAGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCCTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTGTGGCTAACAAAGAAGGGAACTTCATATCCAAAGCTCAGCAAATCATACACGAACAATAAGGGAAAAGAAGTGCTTGTACTCTGGGGAGTGCACCATCCTCCG
65 | >A.Teal.Ohio.72.1999.CY017717_1999
66 | GTGAATTTGCTCGAAAACAGCCATAATGGGAAACTCTGCAGCCTGAATGGGATAGCTCCTTTACAATTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTTTACTCACTGCAAACTCATGGTCCTATATAATAGAGACTTCCAATTCAGAGAACGGAACATGCTATCCCGGTGAGTTCATAGATTATGAGGAATTAAGAGAGCAATTGAGTTCAGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCCTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTGTGGCTAACAAAGAAGGGAACTTCATATCCAAAGCTCAGCAAATCATACACGAACAATAAGGGAAAAGAAGTGCTTGTACTCTGGGGAGTGCACCATCCTCCG
67 | >A.BrantGoose.1.1917.AY095226_1917
68 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTGTGGCTAACAAAGAAGGGAACTTCATATCCAAAGCTCAGCAAATCATACACGAACAATAAGGGAAAAGAAGTGCTTGTACTCTGGGGAGTGCACCATCCTCCG
69 | >A.mallard.Ohio.66.1999.CY016955_1999
70 | GTGAATTTGCTCGAAAACAGCCATAATGGGAAACTCTGCAGCCTGAATGGGATAGCTCCTTTACAATTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTTTACTCACTGCAAACTCATGGTCCTATATAATAGAGACTTCCAATTCAGAGAACGGAACATGCTATCCCGGTGAGTTCATAGATTATGAGGAATTAAGAGAGCAATTGAGTTCAGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCCTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTGTGGCTAACAAAGAAGGGAACTTCATATCCAAAGCTCAGCAAATCATACACGAACAATAAGGGAAAAGAAGTGCTTGTACTCTGGGGAGTGCACCATCCTCCG
71 | >A.mallard.Ohio.56.1999.CY012824_1999
72 | GTGAATTTGCTCGAAAACAGCCATAATGGGAAACTCTGCAGCCTGAATGGGATAGCTCCTTTACAATTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTTTACTCACTGCAAACTCATGGTCCTATATAATAGAGACTTCCAATTCAGAGAACGGAACATGCTATCCCGGTGAGTTCATAGATTATGAGGAATTAAGAGAGCAATTGAGTTCAGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCCTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTGTGGCTAACAAAGAAGGGAACTTCATATCCAAAGCTCAGCAAATCATACACGAACAATAAGGGAAAAGAAGTGCTTGTACTCTGGGGAGTGCACCATCCTCCG
73 | >A.mallard.ALB.267.1996.CY004504_1996
74 | GTGAATTTGCTCGAAGACAGCCATAATGGGAAACTCTGCAGCCTGAATGGGATAGCTCCTTTACAACTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTCTACTCACTGCAAACTCATGGTCCTATATAATAGAGACTTCCAATTCAGAGAACGGGACATGCTATCCCGGTGAATTCATAGATTATGAGGAATTAAGAGAGCAATTGAGTTCAGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCTTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTGTGGATAACAAAGAAGGGAACTTCATATCCAAAACTCAGCAAATCATACACGAACAATAAGGGAAAAGAAGTGCTTGTGCTCTGGGGAGTGCACCATCCTCCG
75 | >A.duck.NJ.771770.1995.EU026110_1995
76 | GTGAATTTGCTCGAAGACAGCCATAATGGGAAACTCTGCAGCCTGAATGGGATAGCTCCTTTACAATTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTCTACTCACTGCAAACTCATGGTCCTATATAATAGAGACTTCCAATTCAGAGAACGGAACATGCTATCCCGGTGAATTCATAGATTATGAGGAATTAAGAGAGCAATTGAGTTCAGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCTTACTCTGGAGCAAGCAGTTTTTATCGGAATTTGCTGTGGATAACAAAGAAGGGAACTTCATATCCAAAACTCAGCAAATCATACACGAACAATAAGGGAAAAGAAGTGCTTGTGCTCTGGGGAGTGCACCATCCTCCG
77 | >A.mallard.Alberta.211.1998.AY633212_1998
78 | GTGAATTTGCTCGAAGACAGCCATAATGGGAAACTCTGCAGCCTGAATGGGATAGCTCCTTTACAATTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTCTACTCACTGCAAACTCATGGTCCTATATAATAGAGACATCCAATTCAGAGAACGGGACATGCTATCCCGGTGAGTTCATAGATTATGAGGAATTAAGGGAGCAATTGAGTTCGGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCTTACTCTGGGGCCAGCAGTTTTTATCGAAATTTGCTGTGGATAATAAAGAAGGGAACTTCATATCCAAAACTCAGCAAGTCATACACGAACAATAAGGGAAAAGAAGTGCTTGTGCTCTGGGGAGTGCACCATCCTCCG
79 | >A.mallard.MD.403.2002.EU026082_2002
80 | GTGAATTTGCTCGAAGACCGCCATAATGGGAAGCTCTGCAGCCTGAATGGGATAGCTCCTTTACAATTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTCTACTCACTGCAAACTCATGGTCCTATATAATAGAGACGTCCAATTCAGAGAACGGGACATGCTATCCCGGTGAGTTCATAGATTATGAGGAATTAAGGGAGCAATTGAGTTCAGTTTCTTCTTTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCCTACTCTGGGGCCAGCAGTTTTTATCGAAATTTGCTGTGGATAGTAAAGAAGGGAACTTCATACCCGAAACTCAGCAAGTCATACACGAACAATAAGGGAAAAGAAGTGCTTGTGCTCTGGGGAGTGCACCATCCTCCG
81 | >A.swine.29.37.U04857_1937
82 | GTTAACCTGCTTGAAGACAGCCACAACGGAAAACTATGTAGACTAGGGGGAATAGCCCCATTGCAACTGGGTAAATGTAATATTGCCGGATGCGTCTTGGGAAACCCAGAATGCGATTTGCTGCTCACAGTGAACTCATGGTCCTATATTGTAGAAACATCGAACTCAGATAATGGGACATGTTACCCAGGAGATTTCATTGACTATGAAGAACTGAGAGAGCAATTGAGCTCAGTGTCATCATTCGAAAGATTCGAGATATTTCCCAAAACAAGTTCGTGGCCCAATCATGAAACAACCAGAGGTGTAACGGCAGCATGCCCCTATGCTGGAGCAAGCAGCTTTTACAGAAATTTACTATGGCTGGTGAAGAAGGGAGATTCATATCCAAAGCTCAGCAAATCCTATGTTAACAATAAAGGGAAAGAAGTCCTTGTGCTATGGGGGGTTCACCATCCNCCT
83 | >A.swine.Ehime.1.80.X57494_1980
84 | GTTAACCTTCTTGAAGACAGACATAACGGGAAACTATGTAAACTAGGGGGGATAGCCCCATTGCATCTGGGTAAATGTAACATTGCCGGATGGCTTTTGGGAAATCCAGAATGTGAATTACTATTCACAGTAAGCTCATGGTCTTACATTGTGGAAACATCGAACTCAGACAATGGGACATGTTACCCAGGGGATTTCATCAATTATGAAGAGCTGAGAGAGCAGTTGAGCTCAGTGTCATCATTTGAAAGATTCGAGATATTCCCCAAGACAAGTTCGTGGCCCAATCATGAAACAAACAGAGGTGTGACGGCAGCATGCCCTTATGCTGGAGCAAACAGTTTCTACAGAAATTTAATATGGCTTGTGAAAAAAGAAAACTCATATCCAAAGCTCCGCAAATCCTATGTTAACAATAAGGGGAAGGAAGTCCTTGTGCTATGGGGCATTCACCATCCACCT
85 | >A.swine.Illinois.63.X57493_1963
86 | GTTAATCTTCTTGAAGACAGACACAACGGGAAACTATGTAAACTAGGGGGAATAGCCCCATTGCACCTAGGTAAATGTAACATTGCCGGATGGCTTTTGGGAAACCCAGAATGTGAATTACTGCTCACAGTAAGCTCATGGTCTTATATTGTGGAAACATCGAACTCAGACAATGGGACATGTTACCCAGGAGATTTCATCAATTATGAAGAGCTGAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGATTCGAGATATTTCCCAAGATAAGTTCGTGGCCCAATCATGAAACAAACAGAGGTGTAACGGCAGCATGCCCTTATGCTGGAGCAAACAGCTTTTTCAGAAATTTAATATGGCTGGTGAAGAAGGAGAGTTCATACCCAAAGCTCAGCAAATCCTATGTTAACAATAAAGGGAAGGAAGTCCTTGTGCTATGGGGTATTCACCATCCGCCT
87 | >A.swine.NewJersey.11.76.K00992_1976
88 | GTTAATCTTCTTGAAGACAGACATAACGGGAAACTATGTAAACTGGGGGGGATAGCCCCATTGCACTTGGGTAAATGTAACATTGCCGGTAGGCTTTTGGGAAACCCAGAATGTGAATTACTACTCACAGTAAGCTCATGGTCTTACATTGTGGAAACATCGAAATCAGACAATGGGACATGTTACCCAGGAGATTTCATCAATTATGAAGAGCTGAGAGAGCAGTTGAGCTCAGTGTCATCATTTGAAAGATTCGAGATATTCCCCAAGACAAGTTCGTGGCCCAATCATGAAACAAACAGAGGTGTGACGGCAGCATGCCCTTATGCTGGAGCAAACAGCTTCTACAGAAATTTAATATGGCTGGTGAAAAAAGAAAATTCATACCCAAAGCTCAGCAAATCCTATGTTAACAATAAAGGGAAGGAAGTCCTTGTGCTATGGGGCATTCATCATCCACCT
89 | >A.swine.Wisconsin.1.61.AF091307_1961
90 | GTTAATCTGCTTGAAGACAGACACAACGGGAAACTATGTAAACTAGGGGGAATAGCCCCATTGCACCTAGGTAAATGTAACATTGCCGGATGGCTTTTGGGAAACCCAGAATGTGAATTACTGCTCACAGTAAGCTCATGGTCTTATATTGTGGAAACATCGAACTCAGACAATGGGACATGTTACCCAGGGGATTTCATCAATTATGAAGAGCTGAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGATTCGAGATATTTCCCAAGACAAGTTCGTGGCCCAATCATGAAACAAACAGAGGTGTAACGGCAGCATGCCCTTATGCTGGAGCAAACAGCTTTTACAGAAATTTAATATGGCTGGTGAAGAAGGAGAGTTCATACCCAAAGCTCAGCAAATCCTATGTTAACAATAAAGGGAAGGAAGTCCTTGTGCTATGGGGTATTCACCATCCGCCT
91 | >A.swine.Thailand.271.2005.EF101749_2005
92 | GTTAACCTTCTAGAAGACAGGCACAATGGGAAGCTATGTAACCTAAGGGGGGAAGCCCCACTGCATTTGGGTAAATGTAACATTGCCGGATGGCTCCTAGGAAACCCAGAATGCGAATTACTATTTGCAGTAAACTCATGGTCTTACATTGTGGAAACATCGAACTCAGACAATGGGACATGTTACCCAGGAGATTTCACCAGTTATGAAGAGCTAAGAGAACAATTGAGCTCAGTGTCATCATTTGAAAGATTCGAGATATTCCCCAAAGCAAGCTCTTGGCCCAACCATGAAACAAACAGAGGTGTAACGGCAGCATGCCCTTATGCTGGAACAAACAGCTTCTACAGGAATTTGATATGGCTAGTAAAAAAGGGAAACTCATATCCAAAGCTCAGTAAATCCTATGTTAATAATAAGAAGAAGGAAGTCCTTGTACTATGGGGCATCCACCATCCACCC
93 | >A.swine.HongKong.273.1994.U45452_1994
94 | GTTAACCTTCTAGAAGACAGACATAACGGGAAACTATGTAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGATGGCTCCTGGGAAATCCAGAGTGTGAATTACTATTCACAGCAAGCTCATGGTCTTACATTGTGGAAACATCTAATTCAGACAATGGGACATGTTACCCAGGAGATTTCATCAATTATGAAGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGATTTGAGATGTTCCCCAAGTCAAGTTCATGGCCCAATCATGAAACGAACAGAGGTGTGACGGCAGCATGTCCTTATGCTGGAGCAAACAGCTTCTACAGAAATTTAATATGGCTTGTAAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTATATTAACAATAAGGAGAAAGAAGTCCTCGTGCTATGGGGAATTCACCATCCACCT
95 | >A.swine.Iowa.15.1930.U47305_1930
96 | GTTAACCTGCTTGAAGACAGCCACAACGGAAAACTATGTAGACTAGGGGGAATAGCCCCATTACAACTGGGGAAATGTAATATTGCCGGATGGATCTTGGGAAACCCAGAATGCGATTTGCTGCTCACAGTGAGCTCATGGTCCTATATTGTAGAAACATCGAACTCAGATAATGGGACATGTTACCCAGGAGATTTCATCGACTATGAAGAACTGAGAGAGCAACTAAGCTCAGTGTCATCATTCGAAAAATTCGAGATATTTCCCAAGACAAGCTCGTGGCCCAATCATGAAACAACCAGAGGTGTAACGGCAGCATGCCCCTATGCTGGAGCAAGCAGCTTCTACAGAAATTTACTATGGCTGGTAAAGAAGGAAAATTCATACCCAAAGCTTAGCAAATCCTATGTTAACAATAAAGGGAAAGAAGTCCTTGTGCTATGGGGTGTTCATCATCCGCCT
97 | >A.swine.Iowa.15.1930.X57492_1930
98 | GTTAACCTGCTTGAAGACAGCCACAACGGAAAACTATGTAGACTGGGAGGAATAGCCCCATTACAACTGGGGAAATGTAATATTGCCGGATGGCTCTTGGGAAACCCAGAATGCGATTTGCTGCTCACAGTGAGCTCATGGTCCTATATTGTAGAAACACGGACCTCAGATAATGGGACATGTTACCCAGGAGATTTCATCGACTATGAAGAACTGAGAGAGCAACTGAACTCAATGTCATCATTCGAAAAATTCGAGATATTTCCCAAGACAAGCTCGTGGCCCAATCATGAAACAACCAAAGGTGTAACGGCAGCATGCCCCTATGCTGGAGCAAGCAGCTTTTACAGAAACTTACTATGGCTGGTAAAGAAGGAAAATTCATACCCAAAGCTTAGCAAATCCTATGTTAACAATAAAGGAAAAGAAGTCCTTGTGCTATGGGGTGTTCATCATCCGCCT
99 | >A.swine.Iowa.15.1930.EU139823_1930
100 | GTTAACCTGCTTGAAGACAGCCACAACGGAAAACTATGTAGACTAGGGGGAATAGCCCCATTACAACTGGGGAAATGTAATATTGCCGGATGGCTCTTGGGAAACCCAGAATGCGATTTGCTGCTCACAGTGAGCTCATGGTCCTATATTGTAGAAACATCGAACTCAGATAATGGGACATGTTACCCAGGAGATTTCATCGACTATGAAGAACTGAGAGAGCAACTAAGCTCAGTGTCATCATTCGAAAAATTCGAGATATTTCCCAAGACAAGCTCGTGGCCCAATCATGAAACAACCAGAGGTGTAACGGCAGCATGCCCCTATGCTGGAGCAAGCAGCTTTTACAGAAATTTACTATGGCTGGTAAAGAAGGAAAATTCATACCCAAAGCTTAGCAAATCCTATGTTAACAATAAAGGGAAAGAAGTCCTTGTGCTATGGGGTGTTCATCATCCGCCT
101 | >A.AlmaAta.1417.1984.S62154_1984
102 | GTTAACCTGCTTGAAGACAGCCACAACGGAAAACTATGTAGACTGGGAGGAATAGCCCCATTACAACTGGGGAAATGTAATATTGCCGGTAGGCTCTTGGGAAACCCAGAATGCGAATTGCTGCTCACGGTGAGCTCATGGTCCTATATTGTAGAAACATCGGACTCAGATAATGGGACATGTTACCCAGGAGATTTCATCGACTATGAAGAACTGAGAGAGCAACTGAACTCAATGTCATCATTCGAAAAATTCGAGATATTTCCCAAGACAAGCTCGTGGCCCAATCATGAAACAACCAAAGGTGTAACGGCAGCATGCCCCTATGCTGGAGCAAGCAGCTTTTACAGAAACTTACTATGGCTGGTAAAGAAGGAAAATTCATACCCAAAGCTTAGCAAATCCTATGTTAACAATAAAGGAAAAGAAGTCCTTGTGCTATGGGGTGTTCATCATCCGCCT
103 | >A.swine.StHyacinthe.148.1990.U11703_1990
104 | GTTAACCTGCTTGAAGACAGCCACAACGGAAAACTATGTAGACTAGGGGGAATAGCCCCATTACAACTGGGGAAATGTAATATTGCCGGATGGCTCTTGGGAAACCCAGAATGCGATTTGCTGCTCACAGTGAGCTCATGGTCCTATATTGTAGAAACATCGAACTCAGATAATGGGACATGTTACCCAGGAGATTTCATCGACTATGAAGAACTGAGAGAGCAACTAAGCTCAGTGTCATCATTCGAAAAATTCGAGATATTTCCCAAGACAAGCTCGTGGCCCAATCATGAAACAACCAGAGGTGTAACGGCAGCATGCCCCTATGCTGGAGCAAGCAGCTTTTACAGAAATTTACTATGGCTGGTAAAGAAGGAAANTTCATACCCAAAGCTTAGCAAATCCTATGTTAACAATAAAGGGAAAGAAGTCCTTGTGCTATGGGGTGTTCATCATCCGCCT
105 |
--------------------------------------------------------------------------------
/examples/data/ice_viruses.fasta.treefile:
--------------------------------------------------------------------------------
1 | (A.Fiji.15899.83.AJ289702_1983:0.0225365992,(A.Kiev.59.1979.M38353_1979:0.0044063580,((((A.FortMonmouth.1.1947.U02085_1947:0.0060795414,((A.Meguro.1.1956.AB043485_1956:0.0188631900,A.Saga.2.1957.AB043486_1957:0.0255477164):0.0035469108,A.Kojiya.1.1952.AB043482_1952:0.0000010000):0.0274954280):0.0112137610,((((A.BrevigMission.1.18.AF116575_1918:0.0061671626,(((A.goose.HongKong.8.1976.U46021_1976:0.0173913752,(((A.mallard.Tennessee.11464.85.AF091311_1985:0.0173712916,(A.teal.Alberta.141.1992.CY004539_1992:0.0085090458,((((((A.pintail.Ohio.25.1999.CY017725_1999:0.0000000000,A.mallard.Ohio.56.1999.CY012824_1999:0.0000000000):0.0000000000,A.mallard.Ohio.66.1999.CY016955_1999:0.0000000000):0.0000010000,A.Teal.Ohio.72.1999.CY017717_1999:0.0000010000):0.0000010000,A.BrantGoose.1.1917.AY095226_1917:0.0000010000):0.0140357840,(A.mallard.Alberta.211.1998.AY633212_1998:0.0020287280,A.mallard.MD.403.2002.EU026082_2002:0.0160944617):0.0124102429):0.0000020609,(A.mallard.ALB.267.1996.CY004504_1996:0.0020053127,A.duck.NJ.771770.1995.EU026110_1995:0.0039400706):0.0018940840):0.0250570402):0.0214717374):0.0101504815,A.duck.Alberta.35.76.U47310_1976:0.0253721489):0.0736037938,A.duck.HK.196.1977.D00839_1977:0.0532728540):0.0147304465):0.0290343792,A.duck.Australia.749.80.AF091312_1980:0.0193538639):0.3007691507,(((A.swine.29.37.U04857_1937:0.0197444807,((((A.swine.Ehime.1.80.X57494_1980:0.0101060184,(A.swine.Thailand.271.2005.EF101749_2005:0.0816472254,A.swine.HongKong.273.1994.U45452_1994:0.0384696748):0.0237732993):0.0135430554,A.swine.NewJersey.11.76.K00992_1976:0.0120939688):0.0327961058,A.swine.Illinois.63.X57493_1963:0.0041529782):0.0023752980,A.swine.Wisconsin.1.61.AF091307_1961:0.0014907340):0.0622118742):0.0165006167,(A.swine.Iowa.15.1930.U47305_1930:0.0039689088,(A.swine.Iowa.15.1930.EU139823_1930:0.0000010000,A.swine.StHyacinthe.148.1990.U11703_1990:0.0000010000):0.0000010000):0.0019675273):0.0027110493,(A.swine.Iowa.15.1930.X57492_1930:0.0060967682,A.AlmaAta.1417.1984.S62154_1984:0.0102470201):0.0115511712):0.0554586947):0.0033674386):0.0836106020,((A.Tokyo.3.1967.U38242_1967:0.0052010471,A.WS.33.U08904_1933:0.0047809277):0.0093388386,(A.WSN.1933.CY010788_1933:0.0019834304,A.WilsonSmith.1933.DQ508905_1933:0.0020081950):0.0267044517):0.0196615124):0.0040324481,((((A.PuertoRico.8.34.J02144_1934:0.0000000000,A.Mongolia.153.1988.Z54287_1988:0.0000000000):0.0000010000,A.PuertoRico.8.1934.EF467821_1934:0.0000010000):0.0000010000,A.Mongolia.111.1991.Z54288_1991:0.0019811635):0.0080130157,A.PuertoRico.8.1934.J04572_1934:0.0000010000):0.0256731110):0.0339931412,A.Huston.43.AF494251_1943:0.0274881011):0.0309406850):0.0303861545,A.Lepine.1948.AB043479_1948:0.0083093368):0.0081943988,A.USSR.90.1977.K01331_1977:0.0060750547):0.0035899200):0.0061715438,((A.SouthCarolina.6.1988.L19025_1988:0.0058921898,(A.Yamagata.32.1989.D31949_1989:0.0019932881,(A.BuenosAires.T114.97.AF534026_1997:0.0315205588,(A.Beijing.262.95.AY289928_1995:0.0104412671,A.Alaska.1173.00.AY029287_2000:0.0274942819):0.0130006148):0.0036889941):0.0041257183):0.0186435452,A.Finland.4.1984.L33491_1984:0.0020744044):0.0025350891);
2 |
--------------------------------------------------------------------------------
/examples/data/ice_viruses_cleaned.fasta:
--------------------------------------------------------------------------------
1 | >A.Fiji.15899.83.AJ289702_1983
2 | GTCAACCTACTTGAGGACAATCACAACGGGAAACTATGCAAACTAAAAGGAATAGCGCCACTACAATTGGGGAAATGCAACATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGTTTTCTAAGAAATCATGGTCCTACATTGCAGAAACACCAAACTCCGAGAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAAGAACTGAGGGAACAATTGAGCTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGTTCATGGCCCAACCACAACGTAACCAAAGGAGTAACGGCATCATGTTCCCATAAGGGGAAAAGCAGTTTTTACAGAAACTTGCTATGGCTGACGGAGAAAAATGGCTCGTACCCAAATCTGAGCAAGTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT
3 | >A.Kiev.59.1979.M38353_1979
4 | GTCAACCTACTTGAGGACAGTCACAACGGAAAACTATGCAGACTGAAAGGAATAGCTCCACTACAATTGGGGAAATGCAGCATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGTTTTCTAAGAAATCATGGTCCTACATTGCAGAAACACCAAACTCCGAGAATGGAACATGTTACCCAGGATATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGATCATGGCCCAAACACAACGTAACCAGAGGAGTAACGGCATCATGCTCCCATAAGGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAAAATGGCTCGTACCCAAATCTGAGCAAGTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT
5 | >A.FortMonmouth.1.1947.U02085_1947
6 | GTAAACCTACTCGAAGACAGCCACAACGGGAAATTATGCAGATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGCTTTCTAAGAGATCATGGTCCTACATTGCAGAAACACCAAACTCTGAGAATGGAGCATGTTACCCAGGAGATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTGTCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGATCATGGCCCAAACACAACATAACCAGAGGAGTAACGGCAGCATGCTCCCATGCGGGGAAAAGCAGTTTTTACAAAAATTTGCTCTGGCTGACGGAGACAGATGGCTCATACCCAAAGCTGAGCAAGTCCTATGTGAACAATAAAGAGAAAGAAGTCCTTGTGCTATGGGGTGTTCATCACCCGTCT
7 | >A.BrevigMission.1.18.AF116575_1918
8 | GTTAACCTGCTCGAAGACAGCCACAACGGAAAACTATGTAAATTAAAAGGAATAGCCCCATTACAATTGGGGAAATGTAATATCGCCGGATGGCTCTTGGGAAACCCGGAATGCGATTTACTGCTCACAGCGAGCTCATGGTCCTATATTGTAGAAACATCGAACTCAGAGAATGGAACATGTTACCCAGGAGATTTCATCGACTATGAAGAACTGAGGGAGCAATTGAGCTCAGTGTCATCGTTCGAAAAATTCGAAATATTTCCCAAGACAAGCTCGTGGCCCAATCATGAAACAACCAAAGGTGTAACGGCAGCATGCTCCTATGCGGGAGCAAGCAGTTTTTACAGAAATTTGCTGTGGCTGACAAAGAAGGGAAGCTCATACCCAAAGCTTAGCAAGTCCTATGTGAACAATAAAGGGAAAGAAGTCCTTGTACTATGGGGTGTTCATCATCCGCCT
9 | >A.SouthCarolina.6.1988.L19025_1988
10 | GTCAACCTACTTGAGGACAGTCACAACGGAAAACTATGTCGACTAAAAGGAATAGCCCCACTACAATTGGGTAATTGCAGCATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGTTTTCTAAGAAATCATGGTCCTACATTGCAGAAACACCAAACTCCGAGAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAGGAACTGAGGGAGCAGTTGAGTTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAAGAAAGCTCATGGCCCAACCACACCGTAACCAAAGGAGTAACGGCATCATGCTCCCATAAGGGGAGAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGAATGGCTTGTACCCAAATCTGAGCAAGTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGCGTTCATCACCCGTCT
11 | >A.Yamagata.32.1989.D31949_1989
12 | GTCAACCTACTTGAGGACAGTCACAACGGAAAACTATGTCGACTAAAAGGAATAGCCCCACTACAATTGGGTAATTGCAGCATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGTTTTCTAAGGAATCATGGTCCTACATTGCAGAAACACCAAACTCCGAGAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAAGAAAGATCATGGCCCAACCACACCGTAACCAAAGGAGTAACGGCATCATGCTCCCATAATGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGAATGGCTTGTACCCAAATCTGAGCAAGTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT
13 | >A.Finland.4.1984.L33491_1984
14 | GTCNACCTACTTGAGGACAGTCACAACGGAAAACTATGCAGACTAAAAGGAATAGCCCCACTACAATTGGGGAAATGCAGCATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGTTTTCTAAGAAATCATGGTCCTACATTGCAGAAACACCAAACTCCGAGAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCATNNTTCGAGAGATTCGAAATATTCCCCAAGGAAAGTTCATGGCCCAAACACAACATAACCAAAGGAGTAACGGCATCATGCTCCCATAAGGGNAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAAAATGGCTTGTACCCAAATCTGAGCAAGTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT
15 | >A.USSR.90.1977.K01331_1977
16 | GTCAACCTACTTGAGGACAGTCACAACGGAAAACTATGCAGACTAAAAGGGATAGCCCCACTACAATTGGGGAAATGCAACATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGTTTTCTAAGAAATCATGGTCCTACATTGCAGAAACACCAAACTCCGAGAATGGAACATGTTACCCAGGATATTTCGCCGACTATGAGGAATTGAGGGAGCAATTGAGCTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGATCATGGCCCAAACACAACGTAACCAGAGGCGTAACGGCATCATGCTCCCATAAGGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAAAATGGCTCGTACCCAAATCTGAGCAAGTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT
17 | >A.Lepine.1948.AB043479_1948
18 | GTCAACCTACTCGAGGACAGTCACAACGGAAAACTATGCAGACTAAAAGGAATAGCCCCACTACAATTGGGGAAATGCAACATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGTTTTCTAAGAAATCATGGTCCTACATTGCAGAAACACCAAACTCTGAGAATGGAACATGTTACCCAGGATATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTTTCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGGTCATGGCCCAAACACAACGTAACCAGAGGAGTAACGGCAGCATGCTCCCATAAGGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAAAATGGCTCGTACCCAACTCTGAGCAAGTCCTATGTGAACAATAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCAACCGTCT
19 | >A.Meguro.1.1956.AB043485_1956
20 | GTAAACCTACTCGAAGACAGCCACAATGGGAAATTATGCAGATTAAAAGGAAAAGCCCCACTACAATTGGGGAAATGTAACATTGCCGGATGGGTCTTAGGAAACCCAGAATGTGAATCATTGCTTTCCAATAGATCATGGTCCTACATTGCAGAAACACCAAACCCTGAGAATGGGACATGTTACCCAGGAGATTTCACCAACTATGAGGAACTGAAGGAGCAATTGAGCTCAGTGTCATCACTCGAGAGATTCGAAATATTCCCCAAGGAAAGATCATGGCCCAAACACAACACAACCAGAGGAGTGACGGCAGCATGCTCCCATGCGAGGAAAAGCAGTTTCTACAAAAATTTGGTCTGGCTGACGGAGGCAAATGGCTCATACCCAACTCTGAGCAATTCCTATGTGAACAATCAAGAGAAAGAAGTCCTTGTGCTATGGGGTGTTCATCACCCGTCT
21 | >A.Kojiya.1.1952.AB043482_1952
22 | GTAAACCTACTCGAAGACAGCCACAATGGGAAATTATGCAGATTAAAAGGAAAAGCCCCACTACAATTGGGGAAATGTAACATTGCCGGATGGGTCTTAGGAAACCCAGAATGCGAATCATTGCTTTCCAATAGATCATGGTCCTACATTGCAGAAACACCAAACTCTGAGAATGGGACATGTTACCCAGGAGATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGATCATGGCCCAAACACAACACAACCAGAGGAGTGACGGCAGCATGCTCCCATGCGGGGAAAAGCAGTTTTTACAAAAATTTGGTCTGGCTGACGGAGGCAAATGGCTCATACCCAAATCTGAGCAAGTCCTATGTGAACAATCAAGAGAAAGAAGTCCTTGTGCTATGGGGTGTTCATCACCCGTCT
23 | >A.Huston.43.AF494251_1943
24 | GTCAACCTACTCGAAGACAGCCACAACGGGAAATTATGTAGATTAAAAGGAATAGCCCCACTACAATTGAGGAAATGTAACATTGCTGGATGGATCCTGGGAAACCCAGAATGCGAATCACTGCTTTCAGAGAGATCATGGTCCTACATTGTTGAAACACCAAACTCTGAGAATGGAACATGTTACCCAGGAGATTTTACCAACTATGAGGAATTGAGGGAGCAATTGAGCTCTGTATCATCATTCGAAAGATTCGAAATATTCCCCAAGGAAAGCTCATGGCCCAAACACAACACAACCAGAGGAGTAACGGCAGCATGCTCCCATGCGGGAAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGGATGGCTCATATCCGAATCTGAACAATTCCTATGTGAACAAGAAAGGGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCC
25 | >A.BuenosAires.T114.97.AF534026_1997
26 | GTCAACCTACTTGAGGACAGTCACAACGGAAAACTATGCCGACTAAAAGGAACAGCCCCACTACAATTGGGTAATTGCAGCATTGCCGGATGGATCTTAGGAAATCCAGAATGCGAATCACTGTTTTCTAGGGAATCATGGTCCTACATTGCAGAAACACCAAACCCTGAAAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTATCATCATTCGAGAGATTTGAAATATTCCCCAAGGAAAGCTCATGGCCCAACCACACCGTAACCAAAGGAGTGACGGCATCATGCTCCCATAATGGGAAAAGCAGCTTTTACAAAAATTTGCTATGGCTGACGGAGAAGAATGGCTTGTACCCAAATCTGAGCAAGTCCTATGTAAACAACAAGGGGAAAGAAGTCCTTGTGCTATGGGGTGTTCATCACCCGTCT
27 | >A.PuertoRico.8.34.J02144_1934
28 | GTTAACCTGCTCGAAGACAGCCACAACGGAAAACTATGTAGATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATCGCCGGATGGCTCTTGGGAAACCCAGAATGCGACCCACTGCTTCCAGTGAGATCATGGTCCTACATTGTAGAAACACCAAACTCTGAGAATGGAATATGTTATCCAGGAGATTTCATCGACTATGAGGAGCTGAGGGAGCAATTGAGCTCAGTGTCATCATTCGAAAGATTCGAAATATTTCCCAAAGAAAGCTCATGGCCCAACCACAACACAAAC---GGAGTAACGGCAGCATGCTCCCATGAGGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGGAGGGCTCATACCCAAAGCTGAAAAATTCTTATGTGAACAAAAAAGGGAAAGAAGTCCTTGTACTGTGGGGTATTCATCACCCGCCT
29 | >A.PuertoRico.8.1934.J04572_1934
30 | GTTAACCTGCTCGAAGACAGCCACAACGGAAAACTATGTAGATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATCGCCGGATGGCTCTTGGGAAACCCAGAATGCGACCCACTGCTTCCAGTGAGATCATGGTCCTACATTGTAGAAACACCAAACTCTGAGAATGGAATATGTTATCCAGGAGATTTCATCGACTATGAGGAGCTGAGGGAGCAATTGAGCTCAGTGTCATCATTCGAAAGATTCGAAATATTTCCCAAAGAAAGCTCATGGCCCAACCACAACACAACCAAAGGAGTAACGGCAGCATGCTCCCATGCGGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGGAGGGCTCATACCCAAAGCTGAAAAATTCTTATGTGAACAAGAAAGGGAAAGAAGTCCTTGTACTGTGGGGTATTCATCACCCGTCT
31 | >A.PuertoRico.8.1934.EF467821_1934
32 | GTTAACCTGCTCGAAGACAGCCACAACGGAAAACTATGTAGATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATCGCCGGATGGCTCTTGGGAAACCCAGAATGCGACCCACTGCTTCCAGTGAGATCATGGTCCTACATTGTAGAAACACCAAACTCTGAGAATGGAATATGTTATCCAGGAGATTTCATCGACTATGAGGAGCTGAGGGAGCAATTGAGCTCAGTGTCATCATTCGAAAGATTCGAAATATTTCCCAAAGAAAGCTCATGGCCCAACCACAACACAAAC---GGAGTAACGGCAGCATGCTCCCATGAGGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGGAGGGCTCATACCCAAAGCTGAAAAATTCTTATGTGAACAAAAAAGGGAAAGAAGTCCTTGTACTGTGGGGTATTCATCACCCGCCT
33 | >A.Saga.2.1957.AB043486_1957
34 | GTAAACCTACTCGAAGACAGCCACAATGGGAAATTATGCAGATTAAAAGGAAAAGCCCCACTACAATTGGGGAACTGTAACATTGCCGGATGGGTCTTAGGAAACCCAGAATGTGAATCATTGCTTTCCGGTAGATCATGGTCCTACATTGCAGAAACACCAAACTCTGAGAATGGGACGTGCTACCCAGGGGATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGATCATGGCCCAACCAC---ACAACCAGAGGAGTGACGGCAGCATGCCCCCATGCGAAGAAAAGCAGTTTTTACAAAAATTTGGTCTGGCTGACGGAGGCAAATGGCTCATACCCAAATCTGAGCAGGTCCTATGTGAACAATCAGGAGAAAGAAGTCCTTGTGCTATGGGGAGTTCATCACCCGTCT
35 | >A.Beijing.262.95.AY289928_1995
36 | GTCAACCTACTTGAGGACAGTCACAATGGAAAACTATGTCTACTAAAAGGAATAGCCCCACTACAATTGGGTAATTGCAGCGTTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGATTTCTAAGGAATCATGGTCCTACATTGTAGAGACACCAAACCCTGAGAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCATCATTTGAGAGATTCGAAATATTCCCCAAAGAAAGCTCATGGCCCAAACACACCGTAACA---GGAGTAACGGCATCATGCTCCCATAATGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGAATGGCTTGTACCCAAATCTGAGCAATTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCATCT
37 | >A.Alaska.1173.00.AY029287_2000
38 | GTCAACCTACTTGAGGACAGTCACAATGGAAAACTATGTCTACTAAAAGGAATAGCCCCACTACAATTGGGTAATTGCAGCGTTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATTACTGATTTCCAAGGAATCATGGTCCTACATTGTAGAAACACCAAATCCTGAGAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCTTCATTTGAGAGATTCGAAATATTCCCCAAAGAAAGCTCATGGCCCAACCACACCGTAACC---GGAGTATCAGCATCATGCTCCCATAATGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGGGAAGAATGGTTTGTACCCAAGCCTGAGCAAGTCCTATGCAAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGCCT
39 | >A.WS.33.U08904_1933
40 | GTTAACCTGCTCGAAGACAGCCACAACGGGAAACTATGTAAATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATCGCCGGATGGCTCTTGGGAAACCCAGAATGCGACTCACTGCTTCCAGCGAAATCATGGTCCTACATTGTAGAAACACCAAACTCTGAGAATGGAGCATGTTATCCAGGAGATTTCATCGACTATGAGGAACTGAAGGAGCAATTGAGCTCAGTATCATCATTAGAAAGATTCGAAATATTTCCCAAGGAAAGTTCATGGCCCAACCACAACACACTCAAAGGAGTAACAGCAGCATGCTCCCATAGGGGAAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGAAAACGGGGGACTCATACCCAAAGCTGAACAATTCCTATGTGAACAATAAAGGGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT
41 | >A.WSN.1933.CY010788_1933
42 | GTTAACCTGCTCGAAGACAGACACAACGGGAAACTATGTAAATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATCACCGGATGGCTCTTGGGAAATCCAGAATGCGACTCACTGCTTCCAGCGAGATCATGGTCCTACATTGTAGAAACACCAAACTCTGAGAATGGAGCATGTTATCCAGGAGATTTCATCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTATCATCATTAGAAAGATTCGAAATATTTCCCAAGGAAAGTTCATGGCCCAACCACACATTCAAC---GGAGTAACAGTATCATGCTCCCATAGGGGAAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGAAGAAGGGGGATTCATACCCAAAGCTGACCAATTCCTATGTGAACAATAAAGGGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT
43 | >A.WilsonSmith.1933.DQ508905_1933
44 | GTTAACCTGCTCGAAGACAGACACAACGGGAAACTATGTAAATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATCACCGGATGGCTCTTGGGAAATCCAGAATGCGACTCACTGCTTCCAGCGAGATCATGGTCCTACATTGTAGAAACACCAAACTCTGAGAATGGAGCATGTTATCCAGGAGATTTCATCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTATCATCATTAGAAAGATTCGAAATATTTCCCAAGGAAAGTTCATGGCCCAACCACACATTCAAC---GGAGTAACAGCATCATGCTCCCATAGGGGAAAAAGCAGTTTTTACAGAAATTTGATATGGCTGACGAAGAAGGGGGATTCATACCCAAAGCTGACCAATTCCTATGTGAACAATAAAGGGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT
45 | >A.goose.HongKong.8.1976.U46021_1976
46 | GTTAATTTACTCGAAAACAGCCATAATGGAAAACTCTGCAGCCTGAATGGAATAGCCCCTTTACAACTAGGGAAATGCAACGTGGCGGGGTGGCTCCTTGGCAACCCAGAATGTGACCTGCTGCTCACTGCGAGTTCATGGTCCTACATAATAGAGACTTCAAATTCAGAAAACGGAACATGCTACCCCGGAGAGTTCATTGATTATGAAGAGTTAAGGGAACAGCTAAGTTCAGTGTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAATCTCATGGCCAAATCATGAAACAACCAAAGGTGTCACAGCTGCATGCTCTTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTATGGATAACAAAGAAAGGAACTTCCTACCCTAAACTCAGCAAATCATACACGAACAACAAAGGGAAAGAAGTGCTTGTACTTTGGGGGGTGCATCATCCTCCA
47 | >A.duck.Australia.749.80.AF091312_1980
48 | GTTAATTTACTTGAAAACAGCCATAATGGAAAACTTTGCAGCCTGAATGGAATAGCCCCATTACAACTAGGGAAATGCAACGTGGCGGGGTGGCTCCTTGGCAACCTAGAATGTGACCTGTTGCTCACTGCGAATTCATGGTCTTATATAATAGAGACTTCAAATTCAGAAAACGGAACATGTTACCCCGGGGAGTTCATCGATTATGAGGAATTAAGAGAACAGCTAAGTTCAGTGTCTTCATTTGAGAAATTTGAAATTTTCCCGAAGGCAAGCTCATGGCCAAATCATGAGACAACCAAAGGTGTCACAGCTGCATGCTCTTACTTGGGAGCTAGCAGCTTTTATCGGAATTTGCTATGGATGACAAAGAAGGGAACTTCCTATCCTAAACTCAGCAAATCATATACGAACAACAAAGGGAAAGAAGTGCTTGTACTTTGGGGGGTGCATCACCCTCCC
49 | >A.mallard.Tennessee.11464.85.AF091311_1985
50 | GTGAATTTGCTCGAAGACAGCCATAATGGGAAACTCTGCAGTCTGAACGGGATAGCTCCCCTACAATTGGGGAAGTGCAATGTAGCGGGATGGCTCCTTGGCAATCCAGAGTGTGACCTTCTACTCACTGCAAACTCATGGTCCTACATAATAGAGACTTCCAATTCAGAAAACGGGACATGCTACCCCGGTGAATTCATAGATTATGAAGAATTAAGAGAGCAGCTAAGTTCAGTTTCTTCATTTGAAAGGTTTGAAATTTTCCCGAAGGCAAACTCATGGCCAAATCATGAGACAACTAAAGGTGTTACAGCCGCATGCTCTTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTGTGGATAACAAAGAAGGGAACGTCATATCCAAAACTCAGCAAATCATACACGAACAATAAAGGGAAAGAAGTACTCGTGCTCTGGGGAGTGCACCACCCTCCA
51 | >A.duck.HK.196.1977.D00839_1977
52 | GTTAATTTACTAGAAAACAGCCATAATGGAAAACTCTGCAGACTGAATGGAATAGCCCCCTTACAGCTAGGGAAATGCAACGTGGCAGGATGGATCCTTGGCAACCCAGAGTGTGATCTATTGCTCACAGCGAATTCATGGTCTTACATAATAGAGACTTCAAATTCAGAGAATGGAACATGCTACCCCGGAGAGTTCAATGATTATGAAGAATTAAGGGAACAGCTGAGTTCGGTGTCTTCATTTGAAAAGTTTGAAATTTTCCCAAAGGCTAGCTCATGGCCAAATCATGAGACAACTAAAGGTATTACAGCTGCATGTCCTTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTATGGATAACAAAGAAGGGAACTTCATACCCTAAACTCAGCAAATCATACACAAACAACAAAGGGAAAGAAGTGCTTGTAATCTGGGGAGTGCACCACCCTCCA
53 | >A.teal.Alberta.141.1992.CY004539_1992
54 | GTGAATTTGCTCGAAGACAGCCATAATGGGAAACTCTGCAGCCTGAACGGGATAGCTCCTCTACAATTGGGGAAGTGCAATGTAGCGGGGTGGCTCCTGGGCAACCCAGAATGTGACCTTCTACTCACTGCAAACTCATGGTCCTATATAATAGAGACTTCCAATTCAGAGAATGGGACATGCTATCCCGGTGAGTTCATAGATTATGAAGAATTAAGAGAGCAGCTGAGTTCAGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCGAAGGCAAGCTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCTTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTGTGGATAACAAAGAAGGGAACTTCATATCCAAAACTCAGCAAATCATACACAAACAATAAGGGGAAGGAAGTGCTCGTGCTCTGGGGAGTGCACCACCCTCCA
55 | >A.pintail.Ohio.25.1999.CY017725_1999
56 | GTGAATTTGCTCGAAAACAGCCATAATGGGAAACTCTGCAGCCTGAATGGGATAGCTCCTTTACAATTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTTTACTCACTGCAAACTCATGGTCCTATATAATAGAGACTTCCAATTCAGAGAACGGAACATGCTATCCCGGTGAGTTCATAGATTATGAGGAATTAAGAGAGCAATTGAGTTCAGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCCTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTGTGGCTAACAAAGAAGGGAACTTCATATCCAAAGCTCAGCAAATCATACACGAACAATAAGGGAAAAGAAGTGCTTGTACTCTGGGGAGTGCACCATCCTCCG
57 | >A.Teal.Ohio.72.1999.CY017717_1999
58 | GTGAATTTGCTCGAAAACAGCCATAATGGGAAACTCTGCAGCCTGAATGGGATAGCTCCTTTACAATTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTTTACTCACTGCAAACTCATGGTCCTATATAATAGAGACTTCCAATTCAGAGAACGGAACATGCTATCCCGGTGAGTTCATAGATTATGAGGAATTAAGAGAGCAATTGAGTTCAGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCCTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTGTGGCTAACAAAGAAGGGAACTTCATATCCAAAGCTCAGCAAATCATACACGAACAATAAGGGAAAAGAAGTGCTTGTACTCTGGGGAGTGCACCATCCTCCG
59 | >A.mallard.Ohio.66.1999.CY016955_1999
60 | GTGAATTTGCTCGAAAACAGCCATAATGGGAAACTCTGCAGCCTGAATGGGATAGCTCCTTTACAATTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTTTACTCACTGCAAACTCATGGTCCTATATAATAGAGACTTCCAATTCAGAGAACGGAACATGCTATCCCGGTGAGTTCATAGATTATGAGGAATTAAGAGAGCAATTGAGTTCAGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCCTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTGTGGCTAACAAAGAAGGGAACTTCATATCCAAAGCTCAGCAAATCATACACGAACAATAAGGGAAAAGAAGTGCTTGTACTCTGGGGAGTGCACCATCCTCCG
61 | >A.mallard.Ohio.56.1999.CY012824_1999
62 | GTGAATTTGCTCGAAAACAGCCATAATGGGAAACTCTGCAGCCTGAATGGGATAGCTCCTTTACAATTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTTTACTCACTGCAAACTCATGGTCCTATATAATAGAGACTTCCAATTCAGAGAACGGAACATGCTATCCCGGTGAGTTCATAGATTATGAGGAATTAAGAGAGCAATTGAGTTCAGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCCTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTGTGGCTAACAAAGAAGGGAACTTCATATCCAAAGCTCAGCAAATCATACACGAACAATAAGGGAAAAGAAGTGCTTGTACTCTGGGGAGTGCACCATCCTCCG
63 | >A.mallard.ALB.267.1996.CY004504_1996
64 | GTGAATTTGCTCGAAGACAGCCATAATGGGAAACTCTGCAGCCTGAATGGGATAGCTCCTTTACAACTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTCTACTCACTGCAAACTCATGGTCCTATATAATAGAGACTTCCAATTCAGAGAACGGGACATGCTATCCCGGTGAATTCATAGATTATGAGGAATTAAGAGAGCAATTGAGTTCAGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCTTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTGTGGATAACAAAGAAGGGAACTTCATATCCAAAACTCAGCAAATCATACACGAACAATAAGGGAAAAGAAGTGCTTGTGCTCTGGGGAGTGCACCATCCTCCG
65 | >A.duck.NJ.771770.1995.EU026110_1995
66 | GTGAATTTGCTCGAAGACAGCCATAATGGGAAACTCTGCAGCCTGAATGGGATAGCTCCTTTACAATTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTCTACTCACTGCAAACTCATGGTCCTATATAATAGAGACTTCCAATTCAGAGAACGGAACATGCTATCCCGGTGAATTCATAGATTATGAGGAATTAAGAGAGCAATTGAGTTCAGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCTTACTCTGGAGCAAGCAGTTTTTATCGGAATTTGCTGTGGATAACAAAGAAGGGAACTTCATATCCAAAACTCAGCAAATCATACACGAACAATAAGGGAAAAGAAGTGCTTGTGCTCTGGGGAGTGCACCATCCTCCG
67 | >A.mallard.Alberta.211.1998.AY633212_1998
68 | GTGAATTTGCTCGAAGACAGCCATAATGGGAAACTCTGCAGCCTGAATGGGATAGCTCCTTTACAATTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTCTACTCACTGCAAACTCATGGTCCTATATAATAGAGACATCCAATTCAGAGAACGGGACATGCTATCCCGGTGAGTTCATAGATTATGAGGAATTAAGGGAGCAATTGAGTTCGGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCTTACTCTGGGGCCAGCAGTTTTTATCGAAATTTGCTGTGGATAATAAAGAAGGGAACTTCATATCCAAAACTCAGCAAGTCATACACGAACAATAAGGGAAAAGAAGTGCTTGTGCTCTGGGGAGTGCACCATCCTCCG
69 | >A.mallard.MD.403.2002.EU026082_2002
70 | GTGAATTTGCTCGAAGACCGCCATAATGGGAAGCTCTGCAGCCTGAATGGGATAGCTCCTTTACAATTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTCTACTCACTGCAAACTCATGGTCCTATATAATAGAGACGTCCAATTCAGAGAACGGGACATGCTATCCCGGTGAGTTCATAGATTATGAGGAATTAAGGGAGCAATTGAGTTCAGTTTCTTCTTTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCCTACTCTGGGGCCAGCAGTTTTTATCGAAATTTGCTGTGGATAGTAAAGAAGGGAACTTCATACCCGAAACTCAGCAAGTCATACACGAACAATAAGGGAAAAGAAGTGCTTGTGCTCTGGGGAGTGCACCATCCTCCG
71 | >A.swine.29.37.U04857_1937
72 | GTTAACCTGCTTGAAGACAGCCACAACGGAAAACTATGTAGACTAGGGGGAATAGCCCCATTGCAACTGGGTAAATGTAATATTGCCGGATGCGTCTTGGGAAACCCAGAATGCGATTTGCTGCTCACAGTGAACTCATGGTCCTATATTGTAGAAACATCGAACTCAGATAATGGGACATGTTACCCAGGAGATTTCATTGACTATGAAGAACTGAGAGAGCAATTGAGCTCAGTGTCATCATTCGAAAGATTCGAGATATTTCCCAAAACAAGTTCGTGGCCCAATCATGAAACAACCAGAGGTGTAACGGCAGCATGCCCCTATGCTGGAGCAAGCAGCTTTTACAGAAATTTACTATGGCTGGTGAAGAAGGGAGATTCATATCCAAAGCTCAGCAAATCCTATGTTAACAATAAAGGGAAAGAAGTCCTTGTGCTATGGGGGGTTCACCATCCNCCT
73 | >A.swine.Ehime.1.80.X57494_1980
74 | GTTAACCTTCTTGAAGACAGACATAACGGGAAACTATGTAAACTAGGGGGGATAGCCCCATTGCATCTGGGTAAATGTAACATTGCCGGATGGCTTTTGGGAAATCCAGAATGTGAATTACTATTCACAGTAAGCTCATGGTCTTACATTGTGGAAACATCGAACTCAGACAATGGGACATGTTACCCAGGGGATTTCATCAATTATGAAGAGCTGAGAGAGCAGTTGAGCTCAGTGTCATCATTTGAAAGATTCGAGATATTCCCCAAGACAAGTTCGTGGCCCAATCATGAAACAAACAGAGGTGTGACGGCAGCATGCCCTTATGCTGGAGCAAACAGTTTCTACAGAAATTTAATATGGCTTGTGAAAAAAGAAAACTCATATCCAAAGCTCCGCAAATCCTATGTTAACAATAAGGGGAAGGAAGTCCTTGTGCTATGGGGCATTCACCATCCACCT
75 | >A.swine.Illinois.63.X57493_1963
76 | GTTAATCTTCTTGAAGACAGACACAACGGGAAACTATGTAAACTAGGGGGAATAGCCCCATTGCACCTAGGTAAATGTAACATTGCCGGATGGCTTTTGGGAAACCCAGAATGTGAATTACTGCTCACAGTAAGCTCATGGTCTTATATTGTGGAAACATCGAACTCAGACAATGGGACATGTTACCCAGGAGATTTCATCAATTATGAAGAGCTGAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGATTCGAGATATTTCCCAAGATAAGTTCGTGGCCCAATCATGAAACAAACAGAGGTGTAACGGCAGCATGCCCTTATGCTGGAGCAAACAGCTTTTTCAGAAATTTAATATGGCTGGTGAAGAAGGAGAGTTCATACCCAAAGCTCAGCAAATCCTATGTTAACAATAAAGGGAAGGAAGTCCTTGTGCTATGGGGTATTCACCATCCGCCT
77 | >A.swine.NewJersey.11.76.K00992_1976
78 | GTTAATCTTCTTGAAGACAGACATAACGGGAAACTATGTAAACTGGGGGGGATAGCCCCATTGCACTTGGGTAAATGTAACATTGCCGGTAGGCTTTTGGGAAACCCAGAATGTGAATTACTACTCACAGTAAGCTCATGGTCTTACATTGTGGAAACATCGAAATCAGACAATGGGACATGTTACCCAGGAGATTTCATCAATTATGAAGAGCTGAGAGAGCAGTTGAGCTCAGTGTCATCATTTGAAAGATTCGAGATATTCCCCAAGACAAGTTCGTGGCCCAATCATGAAACAAACAGAGGTGTGACGGCAGCATGCCCTTATGCTGGAGCAAACAGCTTCTACAGAAATTTAATATGGCTGGTGAAAAAAGAAAATTCATACCCAAAGCTCAGCAAATCCTATGTTAACAATAAAGGGAAGGAAGTCCTTGTGCTATGGGGCATTCATCATCCACCT
79 | >A.swine.Wisconsin.1.61.AF091307_1961
80 | GTTAATCTGCTTGAAGACAGACACAACGGGAAACTATGTAAACTAGGGGGAATAGCCCCATTGCACCTAGGTAAATGTAACATTGCCGGATGGCTTTTGGGAAACCCAGAATGTGAATTACTGCTCACAGTAAGCTCATGGTCTTATATTGTGGAAACATCGAACTCAGACAATGGGACATGTTACCCAGGGGATTTCATCAATTATGAAGAGCTGAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGATTCGAGATATTTCCCAAGACAAGTTCGTGGCCCAATCATGAAACAAACAGAGGTGTAACGGCAGCATGCCCTTATGCTGGAGCAAACAGCTTTTACAGAAATTTAATATGGCTGGTGAAGAAGGAGAGTTCATACCCAAAGCTCAGCAAATCCTATGTTAACAATAAAGGGAAGGAAGTCCTTGTGCTATGGGGTATTCACCATCCGCCT
81 | >A.swine.Thailand.271.2005.EF101749_2005
82 | GTTAACCTTCTAGAAGACAGGCACAATGGGAAGCTATGTAACCTAAGGGGGGAAGCCCCACTGCATTTGGGTAAATGTAACATTGCCGGATGGCTCCTAGGAAACCCAGAATGCGAATTACTATTTGCAGTAAACTCATGGTCTTACATTGTGGAAACATCGAACTCAGACAATGGGACATGTTACCCAGGAGATTTCACCAGTTATGAAGAGCTAAGAGAACAATTGAGCTCAGTGTCATCATTTGAAAGATTCGAGATATTCCCCAAAGCAAGCTCTTGGCCCAACCATGAAACAAACAGAGGTGTAACGGCAGCATGCCCTTATGCTGGAACAAACAGCTTCTACAGGAATTTGATATGGCTAGTAAAAAAGGGAAACTCATATCCAAAGCTCAGTAAATCCTATGTTAATAATAAGAAGAAGGAAGTCCTTGTACTATGGGGCATCCACCATCCACCC
83 | >A.swine.HongKong.273.1994.U45452_1994
84 | GTTAACCTTCTAGAAGACAGACATAACGGGAAACTATGTAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGATGGCTCCTGGGAAATCCAGAGTGTGAATTACTATTCACAGCAAGCTCATGGTCTTACATTGTGGAAACATCTAATTCAGACAATGGGACATGTTACCCAGGAGATTTCATCAATTATGAAGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGATTTGAGATGTTCCCCAAGTCAAGTTCATGGCCCAATCATGAAACGAACAGAGGTGTGACGGCAGCATGTCCTTATGCTGGAGCAAACAGCTTCTACAGAAATTTAATATGGCTTGTAAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTATATTAACAATAAGGAGAAAGAAGTCCTCGTGCTATGGGGAATTCACCATCCACCT
85 | >A.swine.Iowa.15.1930.U47305_1930
86 | GTTAACCTGCTTGAAGACAGCCACAACGGAAAACTATGTAGACTAGGGGGAATAGCCCCATTACAACTGGGGAAATGTAATATTGCCGGATGGATCTTGGGAAACCCAGAATGCGATTTGCTGCTCACAGTGAGCTCATGGTCCTATATTGTAGAAACATCGAACTCAGATAATGGGACATGTTACCCAGGAGATTTCATCGACTATGAAGAACTGAGAGAGCAACTAAGCTCAGTGTCATCATTCGAAAAATTCGAGATATTTCCCAAGACAAGCTCGTGGCCCAATCATGAAACAACCAGAGGTGTAACGGCAGCATGCCCCTATGCTGGAGCAAGCAGCTTCTACAGAAATTTACTATGGCTGGTAAAGAAGGAAAATTCATACCCAAAGCTTAGCAAATCCTATGTTAACAATAAAGGGAAAGAAGTCCTTGTGCTATGGGGTGTTCATCATCCGCCT
87 | >A.swine.Iowa.15.1930.X57492_1930
88 | GTTAACCTGCTTGAAGACAGCCACAACGGAAAACTATGTAGACTGGGAGGAATAGCCCCATTACAACTGGGGAAATGTAATATTGCCGGATGGCTCTTGGGAAACCCAGAATGCGATTTGCTGCTCACAGTGAGCTCATGGTCCTATATTGTAGAAACACGGACCTCAGATAATGGGACATGTTACCCAGGAGATTTCATCGACTATGAAGAACTGAGAGAGCAACTGAACTCAATGTCATCATTCGAAAAATTCGAGATATTTCCCAAGACAAGCTCGTGGCCCAATCATGAAACAACCAAAGGTGTAACGGCAGCATGCCCCTATGCTGGAGCAAGCAGCTTTTACAGAAACTTACTATGGCTGGTAAAGAAGGAAAATTCATACCCAAAGCTTAGCAAATCCTATGTTAACAATAAAGGAAAAGAAGTCCTTGTGCTATGGGGTGTTCATCATCCGCCT
89 | >A.swine.Iowa.15.1930.EU139823_1930
90 | GTTAACCTGCTTGAAGACAGCCACAACGGAAAACTATGTAGACTAGGGGGAATAGCCCCATTACAACTGGGGAAATGTAATATTGCCGGATGGCTCTTGGGAAACCCAGAATGCGATTTGCTGCTCACAGTGAGCTCATGGTCCTATATTGTAGAAACATCGAACTCAGATAATGGGACATGTTACCCAGGAGATTTCATCGACTATGAAGAACTGAGAGAGCAACTAAGCTCAGTGTCATCATTCGAAAAATTCGAGATATTTCCCAAGACAAGCTCGTGGCCCAATCATGAAACAACCAGAGGTGTAACGGCAGCATGCCCCTATGCTGGAGCAAGCAGCTTTTACAGAAATTTACTATGGCTGGTAAAGAAGGAAAATTCATACCCAAAGCTTAGCAAATCCTATGTTAACAATAAAGGGAAAGAAGTCCTTGTGCTATGGGGTGTTCATCATCCGCCT
91 |
--------------------------------------------------------------------------------
/examples/data/ice_viruses_cleaned.fasta.treefile:
--------------------------------------------------------------------------------
1 | (A.Fiji.15899.83.AJ289702_1983:0.0225066451,(A.Kiev.59.1979.M38353_1979:0.0043867334,((((A.FortMonmouth.1.1947.U02085_1947:0.0060469005,((A.Meguro.1.1956.AB043485_1956:0.0188000079,A.Saga.2.1957.AB043486_1957:0.0254876478):0.0035208056,A.Kojiya.1.1952.AB043482_1952:0.0000010000):0.0274963883):0.0111458950,((((A.BrevigMission.1.18.AF116575_1918:0.0058389439,(((A.goose.HongKong.8.1976.U46021_1976:0.0170135328,((A.mallard.Tennessee.11464.85.AF091311_1985:0.0216301767,(A.teal.Alberta.141.1992.CY004539_1992:0.0084453155,(((((A.pintail.Ohio.25.1999.CY017725_1999:0.0000000000,A.mallard.Ohio.56.1999.CY012824_1999:0.0000000000):0.0000000000,A.mallard.Ohio.66.1999.CY016955_1999:0.0000000000):0.0000010000,A.Teal.Ohio.72.1999.CY017717_1999:0.0000010000):0.0139369554,(A.mallard.ALB.267.1996.CY004504_1996:0.0019874608,A.duck.NJ.771770.1995.EU026110_1995:0.0039298726):0.0019473678):0.0000020212,(A.mallard.Alberta.211.1998.AY633212_1998:0.0020549333,A.mallard.MD.403.2002.EU026082_2002:0.0160201914):0.0123046887):0.0249392287):0.0167112382):0.0797578005,A.duck.HK.196.1977.D00839_1977:0.0540106401):0.0141688170):0.0296933079,A.duck.Australia.749.80.AF091312_1980:0.0186920171):0.3016056265,(((A.swine.29.37.U04857_1937:0.0198096674,((((A.swine.Ehime.1.80.X57494_1980:0.0100776239,(A.swine.Thailand.271.2005.EF101749_2005:0.0812944476,A.swine.HongKong.273.1994.U45452_1994:0.0382951989):0.0238395020):0.0134469971,A.swine.NewJersey.11.76.K00992_1976:0.0120608173):0.0326587861,A.swine.Illinois.63.X57493_1963:0.0041348454):0.0023593120,A.swine.Wisconsin.1.61.AF091307_1961:0.0014986072):0.0619865326):0.0163570435,(A.swine.Iowa.15.1930.U47305_1930:0.0039530040,A.swine.Iowa.15.1930.EU139823_1930:0.0000010000):0.0019932448):0.0016388727,A.swine.Iowa.15.1930.X57492_1930:0.0189049204):0.0569543689):0.0029119936):0.0833447630,(A.WS.33.U08904_1933:0.0113878430,(A.WSN.1933.CY010788_1933:0.0019762617,A.WilsonSmith.1933.DQ508905_1933:0.0020001487):0.0257285722):0.0200884663):0.0045915750,((A.PuertoRico.8.34.J02144_1934:0.0000010000,A.PuertoRico.8.1934.EF467821_1934:0.0000010000):0.0079816073,A.PuertoRico.8.1934.J04572_1934:0.0000020465):0.0253069847):0.0338418911,A.Huston.43.AF494251_1943:0.0272099316):0.0311305152):0.0302688369,A.Lepine.1948.AB043479_1948:0.0082842905):0.0081599174,A.USSR.90.1977.K01331_1977:0.0060504852):0.0035758745):0.0061509031,((A.SouthCarolina.6.1988.L19025_1988:0.0058647462,(A.Yamagata.32.1989.D31949_1989:0.0019840935,(A.BuenosAires.T114.97.AF534026_1997:0.0313954883,(A.Beijing.262.95.AY289928_1995:0.0104492633,A.Alaska.1173.00.AY029287_2000:0.0273554037):0.0129145023):0.0036475476):0.0041044694):0.0185559133,A.Finland.4.1984.L33491_1984:0.0020695783):0.0025283110);
2 |
--------------------------------------------------------------------------------
/examples/data/invalid.fasta:
--------------------------------------------------------------------------------
1 | >sequence_1-pass
2 | AGTAGATCCCCGATAGCGAGCTAGCGATCGCAGCGACTCAGCAGCTACAGCGCAGAGGAGAGAGAGGCCCCTATTTACTAGAGCTCCAGATATAGNNNNN
3 | >sequence_2-fail
4 | AGTAGATCCCCGATAGCGAGCTAGXGATCGCAGCGACTCAGCAGCTACAGCGCAGAGGAGAGAGAGGCCCCTATTTACTAGAGCTCCAGATATAGNNNNN
5 | >sequence_3-pass
6 | AGTAGATCCCCGATAGCGAGCTAGCGATNNNNNNNNNNNNNNNNNTACAGCGCAGAGGAGAGAGAGGCCCCTATTTACTAGAGCTCCAGATATAGNNNNN
7 |
--------------------------------------------------------------------------------
/examples/example.py:
--------------------------------------------------------------------------------
1 | from phytest import Alignment, Sequence, Tree
2 |
3 |
4 | def test_alignment_has_4_sequences(alignment: Alignment):
5 | alignment.assert_length(4)
6 |
7 |
8 | def test_alignment_has_a_width_of_100(alignment: Alignment):
9 | alignment.assert_width(100)
10 |
11 |
12 | def test_sequences_only_contains_the_characters(sequence: Sequence):
13 | sequence.assert_valid_alphabet(alphabet="ATGCN-")
14 |
15 |
16 | def test_single_base_deletions(sequence: Sequence):
17 | sequence.assert_longest_stretch_gaps(max=1)
18 |
19 |
20 | def test_longest_stretch_of_Ns_is_10(sequence: Sequence):
21 | sequence.assert_longest_stretch_Ns(max=10)
22 |
23 |
24 | def test_tree_has_4_tips(tree: Tree):
25 | tree.assert_number_of_tips(4)
26 |
27 |
28 | def test_tree_is_bifurcating(tree: Tree):
29 | tree.assert_is_bifurcating()
30 |
31 |
32 | def test_aln_tree_match_names(alignment: Alignment, tree: Tree):
33 | aln_names = [i.name for i in alignment]
34 | tree.assert_tip_names(aln_names)
35 |
36 |
37 | def test_any_internal_branch_lengths_above_threshold(tree: Tree, threshold=1e-4):
38 | tree.assert_internal_branch_lengths(min=threshold)
39 |
40 |
41 | def test_outlier_branches(tree: Tree):
42 | # Here we create a custom function to detect outliers
43 | import statistics
44 |
45 | tips = tree.get_terminals()
46 | branch_lengths = [t.branch_length for t in tips]
47 | cut_off = statistics.mean(branch_lengths) + statistics.stdev(branch_lengths)
48 | for tip in tips:
49 | assert tip.branch_length < cut_off, f"Outlier tip '{tip.name}' (branch length = {tip.branch_length})!"
50 |
--------------------------------------------------------------------------------
/examples/self_contained.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | import phytest
4 |
5 |
6 | def test_length(sequence: phytest.Sequence):
7 | sequence.assert_length(length=462)
8 |
9 |
10 | if __name__ == "__main__":
11 | sys.exit(phytest.main(sequence='examples/data/ice_viruses.fasta'))
12 |
--------------------------------------------------------------------------------
/mkdocs.sh:
--------------------------------------------------------------------------------
1 | poetry run sphinx-build -b html docs docs/_build/html -E -a
2 |
--------------------------------------------------------------------------------
/phytest/__init__.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import pytest
4 | # from py.xml import html
5 |
6 | from .bio import Alignment, Data, Sequence, Tree
7 | from .main import main as main
8 |
9 |
10 | def pytest_addoption(parser):
11 | parser.addoption("--sequence", "-S", action="store", default=None, help="sequence file")
12 | parser.addoption("--sequence-format", action="store", default='fasta', help="sequence file format")
13 | parser.addoption("--tree", "-T", action="store", default=None, help="tree file")
14 | parser.addoption("--tree-format", action="store", default='newick', help="tree file format")
15 | parser.addoption("--data", "-D", action="store", default=None, help="data file")
16 | parser.addoption("--data-format", action="store", default='csv', help="data file format")
17 | parser.addoption(
18 | "--apply-fixes", action="store_true", default=None, help="automatically apply fixes where possible"
19 | )
20 |
21 |
22 | def pytest_generate_tests(metafunc):
23 | sequence_path = metafunc.config.getoption("sequence")
24 | if 'alignment' in metafunc.fixturenames:
25 | if sequence_path is None:
26 | raise ValueError(f"{metafunc.function.__name__} requires an alignment file")
27 | fpth = Path(sequence_path)
28 | if not fpth.exists():
29 | raise FileNotFoundError(f"Unable to locate requested alignment file ({fpth})! 😱")
30 | tree_path = metafunc.config.getoption("tree")
31 | if 'tree' in metafunc.fixturenames:
32 | if tree_path is None:
33 | raise ValueError(f"{metafunc.function.__name__} requires a tree file")
34 | fpth = Path(tree_path)
35 | if not fpth.exists():
36 | raise FileNotFoundError(f"Unable to locate requested tree file ({fpth})! 😱")
37 | tree_format = metafunc.config.getoption("--tree-format")
38 | trees = Tree.parse(tree_path, tree_format)
39 | metafunc.parametrize("tree", trees, ids=lambda t: t.name)
40 | data_path = metafunc.config.getoption("data")
41 | if 'data' in metafunc.fixturenames:
42 | if data_path is None:
43 | raise ValueError(f"{metafunc.function.__name__} requires a data file")
44 | fpth = Path(data_path)
45 | if not fpth.exists():
46 | raise FileNotFoundError(f"Unable to locate requested data file ({fpth})! 😱")
47 | if "sequence" in metafunc.fixturenames:
48 | if sequence_path is None:
49 | raise ValueError(f"{metafunc.function.__name__} requires a sequence file")
50 | fpth = Path(sequence_path)
51 | if not fpth.exists():
52 | raise FileNotFoundError(f"Unable to locate requested sequence file ({fpth})! 😱")
53 | alignment_format = metafunc.config.getoption("--sequence-format")
54 | sequences = Sequence.parse(sequence_path, alignment_format)
55 | metafunc.parametrize("sequence", sequences, ids=lambda s: s.id)
56 |
57 |
58 | @pytest.fixture(scope="session", name="alignment")
59 | def _alignment_fixture(request):
60 | alignment_path = request.config.getoption("sequence")
61 | alignment_format = request.config.getoption("--sequence-format")
62 | alignment = Alignment.read(alignment_path, alignment_format)
63 | return alignment
64 |
65 |
66 | @pytest.fixture(scope="session", name="data")
67 | def _data_fixture(request):
68 | data_path = request.config.getoption("data")
69 | data_format = request.config.getoption("--data-format")
70 | data = Data.read(data_path, data_format)
71 | return data
72 |
73 |
74 | def pytest_html_report_title(report):
75 | report.title = "report"
76 |
--------------------------------------------------------------------------------
/phytest/bio/__init__.py:
--------------------------------------------------------------------------------
1 | from .alignment import Alignment
2 | from .data import Data
3 | from .sequence import Sequence
4 | from .tree import Tree
5 |
--------------------------------------------------------------------------------
/phytest/bio/alignment.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 | from warnings import warn
3 |
4 | from Bio import AlignIO
5 | from Bio.Align import MultipleSeqAlignment
6 |
7 | from ..utils import PhytestObject, assert_or_warn
8 |
9 |
10 | class Alignment(PhytestObject, MultipleSeqAlignment):
11 | @classmethod
12 | def read(cls, alignment_path, alignment_format) -> 'Alignment':
13 | alignment = AlignIO.read(alignment_path, alignment_format)
14 | return Alignment(
15 | alignment._records, annotations=alignment.annotations, column_annotations=alignment.column_annotations
16 | )
17 |
18 | def assert_width(
19 | self,
20 | width: Optional[int] = None,
21 | *,
22 | min: Optional[int] = None,
23 | max: Optional[int] = None,
24 | warning: bool = False,
25 | ) -> None:
26 | """
27 | Asserts that the alignment width (the number of bases in the sequences) meets the specified criteria.
28 |
29 | Args:
30 | length (int, optional): If set, then alignment width must be equal to this value. Defaults to None.
31 | min (int, optional): If set, then alignment width must be equal to or greater than this value. Defaults to None.
32 | max (int, optional): If set, then alignment width must be equal to or less than this value. Defaults to None.
33 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
34 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
35 | """
36 | alignment_width = self.get_alignment_length()
37 | summary = f"The width of the alignment is {alignment_width}."
38 |
39 | if width is not None:
40 | assert_or_warn(
41 | alignment_width == width,
42 | warning,
43 | summary,
44 | f"This is not equal to the required width of {width}.",
45 | )
46 | if min is not None:
47 | assert_or_warn(
48 | alignment_width >= min,
49 | warning,
50 | summary,
51 | f"This is less than the minimum width of {min}.",
52 | )
53 | if max is not None:
54 | assert_or_warn(
55 | alignment_width <= max,
56 | warning,
57 | summary,
58 | f"This is greater than the maximum width of {max}.",
59 | )
60 |
61 | def assert_length(
62 | self,
63 | length: Optional[int] = None,
64 | *,
65 | min: Optional[int] = None,
66 | max: Optional[int] = None,
67 | warning: bool = False,
68 | ) -> None:
69 | """
70 | Asserts that the alignment length (the number of sequences in the alignment) meets the specified criteria.
71 |
72 | Args:
73 | length (int, optional): If set, then alignment length must be equal to this value. Defaults to None.
74 | min (int, optional): If set, then alignment length must be equal to or greater than this value. Defaults to None.
75 | max (int, optional): If set, then alignment length must be equal to or less than this value. Defaults to None.
76 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
77 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
78 | """
79 | alignment_length = len(self)
80 | summary = f"The number of sequences in the alignment is {alignment_length}."
81 |
82 | if length is not None:
83 | assert_or_warn(
84 | alignment_length == length,
85 | warning,
86 | summary,
87 | f"This is less than required number of {length}.",
88 | )
89 | if min is not None:
90 | assert_or_warn(
91 | alignment_length >= min,
92 | warning,
93 | summary,
94 | f"This is less than the minimum {min}.",
95 | )
96 | if max is not None:
97 | assert_or_warn(
98 | alignment_length <= max,
99 | warning,
100 | summary,
101 | f"This is greater than the maximum {max}.",
102 | )
103 |
--------------------------------------------------------------------------------
/phytest/bio/data.py:
--------------------------------------------------------------------------------
1 | import re
2 | from typing import List, Union
3 |
4 | import pandas as pd
5 | from pandas import DataFrame
6 |
7 | from ..utils import PhytestObject, assert_or_warn
8 |
9 |
10 | class Data(PhytestObject, DataFrame):
11 | @classmethod
12 | def read(cls, data_path, data_format) -> 'Data':
13 | allowed_formats = ['csv', 'tsv', 'excel']
14 | if data_format not in allowed_formats:
15 | raise ValueError(f'Data format must be one of {", ".join(allowed_formats)}.')
16 | if data_format == 'csv':
17 | df = pd.read_csv(data_path)
18 | elif data_format == 'tsv':
19 | df = pd.read_csv(data_path, sep='\t')
20 | elif data_format == 'excel':
21 | df = pd.read_excel(data_path, engine='openpyxl')
22 | return Data(df)
23 |
24 | def assert_contains(
25 | self,
26 | column: str,
27 | value: str,
28 | *,
29 | warning: bool = False,
30 | ) -> None:
31 | """
32 | Asserts that specified column contains the specified value.
33 |
34 | Args:
35 | column (str, required): The column to check.
36 | value (str, required): the value to look for.
37 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
38 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
39 | """
40 | column_values = self[column].values
41 | summary = f"The values of column '{column}' are '{column_values}'."
42 | assert_or_warn(
43 | value in column_values,
44 | warning,
45 | summary,
46 | f"The column '{column}' does not contain '{value}'.",
47 | )
48 |
49 | def assert_match(
50 | self,
51 | column: str,
52 | pattern: str,
53 | *,
54 | warning: bool = False,
55 | ) -> None:
56 | """
57 | Asserts that all values of the specified column match the specified pattern.
58 |
59 | Args:
60 | column (str, required): The column to check.
61 | pattern (str, required): The pattern to match.
62 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
63 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
64 | """
65 | column_values = self[column].values
66 | summary = f"The values of column '{column}' are '{column_values}'."
67 | not_matched = self[~self[column].str.contains(re.compile(pattern))].index.values
68 | assert_or_warn(
69 | len(not_matched) == 0,
70 | warning,
71 | summary,
72 | f"The row(s) '{not_matched}' of the column '{column}' do not match the pattern '{pattern}'.",
73 | )
74 |
75 | def assert_columns(
76 | self,
77 | allowed_columns: List[str],
78 | *,
79 | exact: bool = False,
80 | warning: bool = False,
81 | ) -> None:
82 | """
83 | Asserts that the specified column(s) are in the DataFrame.
84 |
85 | Args:
86 | allowed_columns (List[str], required): The list of allowed columns.
87 | exact (bool): If True, the list of allowed columns must be exactly the same as the list of columns in the DataFrame.
88 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
89 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
90 | """
91 | columns = self.columns.values
92 | summary = f"The names of the columns are '{columns}'."
93 | if exact:
94 | not_allowed = list(set(allowed_columns).symmetric_difference(set(columns)))
95 | message = f"The column names do not exactly match the list of allowed columns '{allowed_columns}'."
96 | else:
97 | not_allowed = [column for column in columns if column not in allowed_columns]
98 | message = f"The columns '{not_allowed}' are not in the list of allowed columns '{allowed_columns}'."
99 | assert_or_warn(len(not_allowed) == 0, warning, summary, message)
100 |
101 | def assert_values(
102 | self,
103 | column: str,
104 | values: list,
105 | *,
106 | allow_nan: bool = False,
107 | exact: bool = False,
108 | warning: bool = False,
109 | ) -> None:
110 | """
111 | Asserts that all values of the specified column are in the specified list of allowed values.
112 |
113 | Args:
114 | column (str, required): The column to check.
115 | values (list, required): The list of allowed values.
116 | allow_nan (bool): If True, allow NaN values.
117 | exact (bool): If True, the list of allowed values must be exactly the same as the list of values in the DataFrame.
118 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
119 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
120 | """
121 |
122 | column_values = self[column].values
123 | summary = f"The values of column '{column}' are '{column_values}'."
124 | if allow_nan:
125 | values.append(float('nan'))
126 | if exact:
127 | not_allowed = list(set(values).symmetric_difference(set(column_values)))
128 | message = f"The values column '{column}' do not exactly match the allowed values '{values}'"
129 | else:
130 | not_allowed = self[~self[column].isin(values)].index.values
131 | message = (
132 | f"The row(s) '{not_allowed}' of the column '{column}' are not in the list of allowed values '{values}'."
133 | )
134 | assert_or_warn(len(not_allowed) == 0, warning, summary, message)
135 |
136 | def assert_range(
137 | self,
138 | column: str,
139 | *,
140 | min: Union[int, float] = None,
141 | max: Union[int, float] = None,
142 | warning: bool = False,
143 | ) -> None:
144 | """
145 | Asserts that all values of the specified column are in the specified range.
146 |
147 | Args:
148 | column (str, required): The column to check.
149 | min (Union[int, float]): The minimum value of the range.
150 | max (Union[int, float]): The maximum value of the range.
151 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
152 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
153 | """
154 | column_values = self[column].values
155 | summary = f"The values of column '{column}' are '{column_values}'."
156 | if min is not None:
157 | assert_or_warn(
158 | min <= column_values.min(),
159 | warning,
160 | summary,
161 | f"The minimum value of column '{column}' is '{column_values.min()}', which is less than '{min}'.",
162 | )
163 | if max is not None:
164 | assert_or_warn(
165 | max >= column_values.max(),
166 | warning,
167 | summary,
168 | f"The maximum value of column '{column}' is '{column_values.max()}', which is greater than '{max}'.",
169 | )
170 |
--------------------------------------------------------------------------------
/phytest/bio/sequence.py:
--------------------------------------------------------------------------------
1 | import re
2 | from builtins import max as builtin_max
3 | from typing import List, Optional, Union
4 |
5 | from Bio import AlignIO
6 | from Bio import SeqIO as SeqIO
7 | from Bio.SeqRecord import SeqRecord
8 |
9 | from ..utils import PhytestObject, assert_or_warn
10 |
11 |
12 | class Sequence(PhytestObject, SeqRecord):
13 | @classmethod
14 | def parse(cls, alignment_path, alignment_format) -> 'Sequence':
15 | return (
16 | Sequence(
17 | r.seq,
18 | id=r.id,
19 | name=r.name,
20 | description=r.description,
21 | dbxrefs=r.dbxrefs,
22 | features=r.features,
23 | annotations=r.annotations,
24 | letter_annotations=r.letter_annotations,
25 | )
26 | for r in SeqIO.parse(alignment_path, alignment_format)
27 | )
28 |
29 | def assert_valid_alphabet(self, alphabet: str = "ATCGN-", *, warning: bool = False) -> None:
30 | """
31 | Asserts that that the sequence only contains particular charaters.
32 |
33 | Args:
34 | alphabet (str): A string containing legal charaters. Defaults to 'ATCGN-'.
35 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
36 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
37 | """
38 | regex_invalid = re.compile(f"[^{re.escape(alphabet)}]")
39 | result = regex_invalid.search(str(self.seq))
40 | if result:
41 | assert_or_warn(
42 | not result,
43 | warning,
44 | f"Invalid pattern found in '{self.id}'.",
45 | f"Character '{result.group(0)}' at position {result.start(0)+1} found which is not in alphabet '{alphabet}'.",
46 | )
47 |
48 | def assert_length(
49 | self,
50 | length: Optional[int] = None,
51 | *,
52 | min: Optional[int] = None,
53 | max: Optional[int] = None,
54 | warning: bool = False,
55 | ) -> None:
56 | """
57 | Asserts that that the sequence length meets the specified criteria.
58 |
59 | Args:
60 | length (int, optional): If set, then sequence length must be equal to this value. Defaults to None.
61 | min (int, optional): If set, then sequence length must be equal to or greater than this value. Defaults to None.
62 | max (int, optional): If set, then sequence length must be equal to or less than this value. Defaults to None.
63 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
64 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
65 | """
66 | sequence_length = len(self.seq)
67 | if length is not None:
68 | assert_or_warn(
69 | sequence_length == length,
70 | warning,
71 | f"Sequence length of '{self.id}' ({sequence_length}) is not equal to the required length of {length}.",
72 | )
73 | if min is not None:
74 | assert_or_warn(
75 | sequence_length >= min,
76 | warning,
77 | f"Sequence length of '{self.id}' ({sequence_length}) is less than the minimum {min}.",
78 | )
79 | if max is not None:
80 | assert_or_warn(
81 | sequence_length <= max,
82 | warning,
83 | f"Sequence length of '{self.id}' ({sequence_length}) is greater than the maximum {max}.",
84 | )
85 |
86 | def assert_count(
87 | self,
88 | pattern: str,
89 | *,
90 | count: Optional[int] = None,
91 | min: Optional[int] = None,
92 | max: Optional[int] = None,
93 | warning: bool = False,
94 | ) -> None:
95 | """
96 | Asserts that the count of a pattern in the sequence meets the specified criteria.
97 |
98 | Args:
99 | pattern: (str): the pattern to count in the the sequence.
100 | count (int, optional): If set, then pattern count must be equal to this value. Defaults to None.
101 | min (int, optional): If set, then pattern count must be equal to or greater than this value. Defaults to None.
102 | max (int, optional): If set, then pattern count must be equal to or less than this value. Defaults to None.
103 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
104 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
105 | """
106 | base_count = self.seq.count(pattern)
107 | summary = f"Sequence '{self.id}' matches pattern '{pattern}' {base_count} time(s)."
108 | if count is not None:
109 | assert_or_warn(
110 | base_count == count,
111 | warning,
112 | summary,
113 | f"This is not equal to the required number of {count}.",
114 | )
115 | if min is not None:
116 | assert_or_warn(
117 | base_count >= min,
118 | warning,
119 | summary,
120 | f"This is less than the minimum {min}.",
121 | )
122 | if max is not None:
123 | assert_or_warn(
124 | base_count <= max,
125 | warning,
126 | summary,
127 | f"This is greater than the maximum {max}.",
128 | )
129 |
130 | def assert_percent(
131 | self,
132 | nucleotide: Union[str, List[str]],
133 | *,
134 | percent: Optional[float] = None,
135 | min: Optional[float] = None,
136 | max: Optional[float] = None,
137 | warning: bool = False,
138 | ) -> None:
139 | """
140 | Asserts that the percentage of a nucleotide in the sequence meets the specified criteria.
141 |
142 | Args:
143 | nucleotide: (Union[str, List[str]]): The nucleotide(s) to count in the the sequence.
144 | percent (float, optional): If set, then nucleotide percentage must be equal to this value. Defaults to None.
145 | min (float, optional): If set, then nucleotide percentage must be equal to or greater than this value. Defaults to None.
146 | max (float, optional): If set, then nucleotide percentage must be equal to or less than this value. Defaults to None.
147 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
148 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
149 | """
150 | try:
151 | if isinstance(nucleotide, str):
152 | if len(nucleotide) > 1:
153 | raise ValueError(
154 | f"The length of the requested nucleotide '{nucleotide}' is more than a single character. "
155 | "This value should either be a single character (i.e. A, G, C, T) or a list of single characters."
156 | )
157 | base_percent = (self.seq.count(nucleotide) * 100.0) / len(self.seq)
158 | elif isinstance(nucleotide, list):
159 | base_percent = (sum(self.seq.count(x) for x in nucleotide) * 100) / len(self.seq)
160 | nucleotide = ', '.join(nucleotide)
161 | else:
162 | raise ValueError(f"Nucleotide must be str or list and cannot be of type '{type(nucleotide)}'.")
163 | except ZeroDivisionError:
164 | base_percent = 0.0
165 | summary = f"Sequence '{self.id}' contains {base_percent} percent '{nucleotide}'."
166 | if percent is not None:
167 | assert_or_warn(
168 | base_percent == percent,
169 | warning,
170 | summary,
171 | f"This is not equal to the required percentage of {percent}.",
172 | )
173 | if min is not None:
174 | assert_or_warn(
175 | base_percent >= min,
176 | warning,
177 | summary,
178 | f"This is less than the minimum {min}.",
179 | )
180 | if max is not None:
181 | assert_or_warn(
182 | base_percent <= max,
183 | warning,
184 | summary,
185 | f"This is greater than the maximum {max}.",
186 | )
187 |
188 | def assert_percent_GC(
189 | self,
190 | percent: Optional[int] = None,
191 | *,
192 | min: Optional[int] = None,
193 | max: Optional[int] = None,
194 | warning: bool = False,
195 | ) -> None:
196 | """
197 | Asserts that the percent of GC's (ambiguous nucleotide S) in the sequence meets the specified criteria.
198 |
199 | Args:
200 | percent (float, optional): If set, then the percentage of GC's must be equal to this value. Defaults to None.
201 | min (float, optional): If set, then the percentage of GC's must be equal to or greater than this value. Defaults to None.
202 | max (float, optional): If set, then the percentage of GC's must be equal to or less than this value. Defaults to None.
203 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
204 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
205 | """
206 | self.assert_percent(
207 | nucleotide=["G", "C", "g", "c", "S", "s"], percent=percent, min=min, max=max, warning=warning
208 | )
209 |
210 | def assert_percent_N(
211 | self,
212 | percent: Optional[int] = None,
213 | *,
214 | min: Optional[int] = None,
215 | max: Optional[int] = None,
216 | warning: bool = False,
217 | ) -> None:
218 | """
219 | Asserts that the percent of N's in the sequence meets the specified criteria.
220 |
221 | Args:
222 | percent (float, optional): If set, then the percentage of N's must be equal to this value. Defaults to None.
223 | min (float, optional): If set, then the percentage of N's must be equal to or greater than this value. Defaults to None.
224 | max (float, optional): If set, then the percentage of N's must be equal to or less than this value. Defaults to None.
225 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
226 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
227 | """
228 | self.assert_percent(nucleotide=["N", "n"], percent=percent, min=min, max=max, warning=warning)
229 |
230 | def assert_percent_gaps(
231 | self,
232 | percent: Optional[int] = None,
233 | *,
234 | min: Optional[int] = None,
235 | max: Optional[int] = None,
236 | warning: bool = False,
237 | ) -> None:
238 | """
239 | Asserts that the percent of gaps (-) in the sequence meets the specified criteria.
240 |
241 | Args:
242 | percent (float, optional): If set, then the percentage of gaps must be equal to this value. Defaults to None.
243 | min (float, optional): If set, then the percentage of gaps must be equal to or greater than this value. Defaults to None.
244 | max (float, optional): If set, then the percentage of gaps must be equal to or less than this value. Defaults to None.
245 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
246 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
247 | """
248 | self.assert_percent(nucleotide='-', percent=percent, min=min, max=max, warning=warning)
249 |
250 | def assert_count_Ns(
251 | self,
252 | count: Optional[int] = None,
253 | *,
254 | min: Optional[int] = None,
255 | max: Optional[int] = None,
256 | warning: bool = False,
257 | ) -> None:
258 | """
259 | Asserts that the number of a N's in the sequence meets the specified criteria.
260 |
261 | Args:
262 | count (int, optional): If set, then the number of N's must be equal to this value. Defaults to None.
263 | min (int, optional): If set, then the number of N's must be equal to or greater than this value. Defaults to None.
264 | max (int, optional): If set, then the number of N's must be equal to or less than this value. Defaults to None.
265 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
266 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
267 | """
268 | self.assert_count(pattern='N', count=count, min=min, max=max, warning=warning)
269 |
270 | def assert_count_gaps(
271 | self,
272 | count: Optional[int] = None,
273 | *,
274 | min: Optional[int] = None,
275 | max: Optional[int] = None,
276 | warning: bool = False,
277 | ) -> None:
278 | """
279 | Asserts that the number of a gaps (-) in the sequence meets the specified criteria.
280 |
281 | Args:
282 | count (int, optional): If set, then the number of gaps (-) must be equal to this value. Defaults to None.
283 | min (int, optional): If set, then the number of gaps (-) must be equal to or greater than this value. Defaults to None.
284 | max (int, optional): If set, then the number of gaps (-) must be equal to or less than this value. Defaults to None.
285 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
286 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
287 | """
288 | self.assert_count(pattern='-', count=count, min=min, max=max, warning=warning)
289 |
290 | def assert_longest_stretch(
291 | self,
292 | pattern: str,
293 | *,
294 | count: Optional[int] = None,
295 | min: Optional[int] = None,
296 | max: Optional[int] = None,
297 | warning: bool = False,
298 | ):
299 | """
300 | Asserts that the longest stretch of a pattern in the sequence meets the specified criteria.
301 |
302 | e.g. the longest stretch of N's in 'ANNNANNA' is 3.
303 |
304 | Args:
305 | pattern: (str): the pattern to count in the the sequence.
306 | count (int, optional): If set, then the longest stretch of the pattern must be equal to this value. Defaults to None.
307 | min (int, optional): If set, then the longest stretch of the pattern must be equal to or greater than this value. Defaults to None.
308 | max (int, optional): If set, then the longest stretch of the pattern must be equal to or less than this value. Defaults to None.
309 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
310 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
311 | """
312 | matches = re.findall(f'{pattern}+', str(self.seq))
313 | longest_stretch = len(builtin_max(matches)) if matches else 0
314 | summary = f"The longest stretch of pattern '{pattern}' in sequence '{self.id}' is {longest_stretch}."
315 | if count is not None:
316 | assert_or_warn(
317 | longest_stretch == count,
318 | warning,
319 | summary,
320 | f"This is not equal to the required number of {count}.",
321 | )
322 | if min is not None:
323 | assert_or_warn(
324 | longest_stretch >= min,
325 | warning,
326 | summary,
327 | f"This is less than the minimum {min}.",
328 | )
329 | if max is not None:
330 | assert_or_warn(
331 | longest_stretch <= max,
332 | warning,
333 | summary,
334 | f"This is greater than the maximum {max}.",
335 | )
336 |
337 | def assert_longest_stretch_Ns(
338 | self,
339 | count: Optional[int] = None,
340 | *,
341 | min: Optional[int] = None,
342 | max: Optional[int] = None,
343 | warning: bool = False,
344 | ):
345 | """
346 | Asserts that the longest stretch of a N's in the sequence meets the specified criteria.
347 |
348 | e.g. the logest stretch of N's in 'ANNNANNA' is 3.
349 |
350 | Args:
351 | count (int, optional): If set, then the longest stretch of N's must be equal to this value. Defaults to None.
352 | min (int, optional): If set, then the longest stretch of N's must be equal to or greater than this value. Defaults to None.
353 | max (int, optional): If set, then the longest stretch of N's must be equal to or less than this value. Defaults to None.
354 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
355 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
356 | """
357 | self.assert_longest_stretch(pattern='N', count=count, min=min, max=max, warning=warning)
358 |
359 | def assert_longest_stretch_gaps(
360 | self,
361 | count: Optional[int] = None,
362 | *,
363 | min: Optional[int] = None,
364 | max: Optional[int] = None,
365 | warning: bool = False,
366 | ):
367 | """
368 | Asserts that the longest stretch of a gaps (-) in the sequence meets the specified criteria.
369 |
370 | e.g. the logest stretch of gaps (-) in 'A---A--A' is 3.
371 |
372 | Args:
373 | count (int, optional): If set, then the longest stretch of gaps (-) must be equal to this value. Defaults to None.
374 | min (int, optional): If set, then the longest stretch of gaps (-) must be equal to or greater than this value. Defaults to None.
375 | max (int, optional): If set, then the longest stretch of gaps (-) must be equal to or less than this value. Defaults to None.
376 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
377 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
378 | """
379 | self.assert_longest_stretch(pattern='-', count=count, min=min, max=max, warning=warning)
380 |
381 | def assert_startswith(self, pattern: str, *, warning: bool = False):
382 | """
383 | Asserts that the sequence starts with a particular pattern.
384 |
385 | Args:
386 | pattern (str): The sequence must start with this value.
387 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
388 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
389 | """
390 | assert_or_warn(
391 | self.seq.startswith(pattern),
392 | warning,
393 | f"Sequence '{self.id}' does not start with '{pattern}'.",
394 | )
395 |
396 | def assert_endswith(self, pattern: str, *, warning: bool = False):
397 | """
398 | Asserts that the sequence ends with a particular pattern.
399 |
400 | Args:
401 | pattern (str): The sequence must end with this value.
402 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
403 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
404 | """
405 | assert_or_warn(
406 | self.seq.endswith(pattern),
407 | warning,
408 | f"Sequence '{self.id}' does not end with '{pattern}'.",
409 | )
410 |
411 | def assert_contains(self, pattern: str, *, warning: bool = False):
412 | """
413 | Asserts that the sequence contains a particular pattern.
414 |
415 | Args:
416 | pattern (str): The sequence must contain this value.
417 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
418 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
419 | """
420 | self.assert_count(pattern=pattern, min=1, warning=warning)
421 |
--------------------------------------------------------------------------------
/phytest/bio/tree.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import re
3 | import unittest
4 | from datetime import datetime
5 | from io import BytesIO, StringIO
6 | from pathlib import Path
7 | from typing import Dict, List, Optional, Union
8 | from warnings import warn
9 |
10 | from Bio import Phylo as Phylo
11 | from Bio.Align import MultipleSeqAlignment
12 | from Bio.Phylo.BaseTree import Clade
13 | from Bio.Phylo.BaseTree import Tree as BioTree
14 | from dateutil.parser import parse
15 | from pytest_html import extras
16 | from treetime import GTR, TreeTime
17 | from treetime.utils import DateConversion, datetime_from_numeric, numeric_date
18 |
19 | from ..utils import (
20 | PhytestAssertion,
21 | PhytestObject,
22 | PhytestWarning,
23 | assert_or_warn,
24 | default_date_patterns,
25 | )
26 |
27 |
28 | class Tree(PhytestObject, BioTree):
29 | @classmethod
30 | def read(cls, tree_path, tree_format) -> 'Tree':
31 | tree = Phylo.read(tree_path, tree_format)
32 | return cls(root=tree.root, rooted=tree.rooted, id=tree.id, name=tree.name)
33 |
34 | @classmethod
35 | def parse(cls, tree_path, tree_format) -> 'Tree':
36 | trees = Phylo.parse(tree_path, tree_format)
37 | return (cls(root=tree.root, rooted=tree.rooted, id=tree.id, name=tree.name) for tree in trees)
38 |
39 | @classmethod
40 | def read_str(cls, tree_str: str, tree_format: str = "newick") -> 'Tree':
41 | data = StringIO(tree_str)
42 | return cls.read(data, tree_format)
43 |
44 | @property
45 | def tips(self):
46 | return self.get_terminals()
47 |
48 | def parse_tip_dates(
49 | self,
50 | *,
51 | patterns=None,
52 | date_format: Optional[str] = None,
53 | decimal_year: bool = False,
54 | ):
55 | patterns = patterns or default_date_patterns()
56 | if isinstance(patterns, str):
57 | patterns = [patterns]
58 |
59 | dates = {}
60 |
61 | compiled_patterns = [re.compile(pattern_string) for pattern_string in patterns]
62 | for tip in self.find_elements(terminal=True):
63 | for pattern in compiled_patterns:
64 | m = pattern.search(tip.name)
65 | if m:
66 | matched_str = m.group(0)
67 | if re.match(r"^\d+\.?\d*$", matched_str):
68 | date = datetime_from_numeric(float(matched_str))
69 | else:
70 | date = parse(matched_str, date_format)
71 |
72 | dates[tip.name] = date
73 | break
74 |
75 | if decimal_year:
76 | dates = {key: numeric_date(value) for key, value in dates.items()}
77 |
78 | return dates
79 |
80 | def assert_number_of_tips(
81 | self,
82 | tips: Optional[int] = None,
83 | *,
84 | min: Optional[int] = None,
85 | max: Optional[int] = None,
86 | warning: bool = False,
87 | ):
88 | """
89 | Asserts that the number of tips meets the specified criteria.
90 |
91 | Args:
92 | tips (int, optional): If set, then number of tips must be equal to this value. Defaults to None.
93 | min (int, optional): If set, then number of tips must be equal to or greater than this value. Defaults to None.
94 | max (int, optional): If set, then number of tips must be equal to or less than this value. Defaults to None.
95 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
96 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
97 | """
98 | number_of_tips = len(self.get_terminals())
99 | if tips is not None:
100 | assert_or_warn(
101 | number_of_tips == tips,
102 | warning,
103 | f"The number of tips ({number_of_tips}) which is different from the required number of tips ({tips}).",
104 | )
105 | if min is not None:
106 | assert_or_warn(
107 | number_of_tips >= min,
108 | warning,
109 | f"The number of tips ({number_of_tips}) is less than the minimum ({min}).",
110 | )
111 | if max is not None:
112 | assert_or_warn(
113 | number_of_tips <= max,
114 | warning,
115 | f"The number of tips ({number_of_tips}) is greater than the maximum ({max}).",
116 | )
117 |
118 | def assert_unique_tips(self, *, warning: bool = False):
119 | """
120 | Asserts that all the tip names are unique.
121 |
122 | Args:
123 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
124 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
125 |
126 | """
127 | tip_names = [t.name for t in self.get_terminals()]
128 | assert_or_warn(
129 | len(tip_names) == len(set(tip_names)),
130 | warning,
131 | f"The tree contains {len(tip_names)} tips, however, {len(set(tip_names))} are unique.",
132 | )
133 |
134 | def assert_is_rooted(self, *, warning: bool = False):
135 | """
136 | Asserts that the tree is rooted.
137 |
138 | Args:
139 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
140 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
141 | """
142 | assert_or_warn(
143 | self.rooted,
144 | warning,
145 | "The tree is not rooted.",
146 | )
147 |
148 | def assert_is_bifurcating(self, *, warning: bool = False):
149 | """
150 | Asserts that the tree is bifurcating.
151 |
152 | The root may have 3 descendents and still be considered part of a bifurcating tree, because it has no ancestor.
153 |
154 | Args:
155 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
156 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
157 | """
158 | assert_or_warn(
159 | self.is_bifurcating(),
160 | warning,
161 | "The tree is not bifurcating.",
162 | )
163 |
164 | def assert_is_monophyletic(self, tips: List[Clade], *, warning: bool = False):
165 | """
166 | Asserts that the specified tips form a monophyletic group.
167 |
168 | Args:
169 | tips (List[Clade]): List of terminal nodes (tips).
170 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
171 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
172 | """
173 | assert_or_warn(
174 | self.is_monophyletic(tips),
175 | warning,
176 | f"The group \'{', '.join([tip.name for tip in tips])}\' is paraphyletic!",
177 | )
178 |
179 | def assert_branch_lengths(
180 | self,
181 | *,
182 | min: Optional[float] = None,
183 | max: Optional[float] = None,
184 | terminal: Optional[bool] = None,
185 | warning: bool = False,
186 | ):
187 | """
188 | Asserts that the all brach lengths meet the specified criteria.
189 |
190 | Args:
191 | min (float, optional): If set, then each brach length must be equal to or greater than this value. Defaults to None.
192 | max (float, optional): If set, then each brach length must be equal to or less than this value. Defaults to None.
193 | terminal (bool, optional): True searches for only terminal nodes, False excludes terminal nodes, and the default, None,
194 | searches both terminal and non-terminal nodes, as well as any tree elements lacking the is_terminal method.
195 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
196 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
197 | """
198 | root, *nodes = self.find_clades(terminal=terminal)
199 | for node in nodes:
200 | print(node, node.branch_length)
201 | if min is not None:
202 | assert_or_warn(
203 | node.branch_length >= min,
204 | warning,
205 | f"An internal branch in the tree is less than the minimum ({min}).",
206 | )
207 | if max is not None:
208 | assert_or_warn(
209 | node.branch_length <= max,
210 | warning,
211 | f"An internal branch in the tree is greater than the maximum ({max}).",
212 | )
213 |
214 | def assert_terminal_branch_lengths(
215 | self,
216 | *,
217 | min: Optional[float] = None,
218 | max: Optional[float] = None,
219 | warning: bool = False,
220 | ):
221 | """
222 | Asserts that the terminal brach lengths meet the specified criteria.
223 |
224 | Args:
225 | min (float, optional): If set, then each terminal brach length must be equal to or greater than this value. Defaults to None.
226 | max (float, optional): If set, then each terminal brach length must be equal to or less than this value. Defaults to None.
227 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
228 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
229 | """
230 | self.assert_branch_lengths(min=min, max=max, terminal=True, warning=warning)
231 |
232 | def assert_internal_branch_lengths(
233 | self,
234 | *,
235 | min: Optional[float] = None,
236 | max: Optional[float] = None,
237 | warning: bool = False,
238 | ):
239 | """
240 | Asserts that the internal brach lengths meet the specified criteria.
241 |
242 | Args:
243 | min (float, optional): If set, then each internal brach length must be equal to or greater than this value. Defaults to None.
244 | max (float, optional): If set, then each internal brach length must be equal to or less than this value. Defaults to None.
245 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
246 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
247 | """
248 | self.assert_branch_lengths(min=min, max=max, terminal=False, warning=warning)
249 |
250 | def assert_no_negatives(
251 | self,
252 | *,
253 | terminal: Optional[bool] = None,
254 | warning: bool = False,
255 | ):
256 | """
257 | Asserts that there are no negative branches.
258 |
259 | Args:
260 | terminal (bool, optional): True searches for only terminal nodes, False excludes terminal nodes, and the default, None,
261 | searches both terminal and non-terminal nodes, as well as any tree elements lacking the is_terminal method.
262 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
263 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
264 | """
265 | self.assert_branch_lengths(min=0, terminal=terminal, warning=warning)
266 |
267 | def assert_total_branch_length(
268 | self,
269 | length: Optional[float] = None,
270 | *,
271 | min: Optional[float] = None,
272 | max: Optional[float] = None,
273 | warning: bool = False,
274 | ):
275 | """
276 | Asserts that the total brach length meets the specified criteria.
277 |
278 | Args:
279 | length (float, optional): If set, then total brach length must be equal to this value. Defaults to None.
280 | min (float, optional): If set, then total brach length must be equal to or greater than this value. Defaults to None.
281 | max (float, optional): If set, then total brach length must be equal to or less than this value. Defaults to None.
282 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
283 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
284 | """
285 | total_branch_length = self.total_branch_length()
286 | if length is not None:
287 | assert_or_warn(
288 | total_branch_length == length,
289 | warning,
290 | f"The total branch length ({total_branch_length}) is not equal to the required length ({length}).",
291 | )
292 | if min is not None:
293 | assert_or_warn(
294 | total_branch_length >= min,
295 | warning,
296 | f"The total branch length ({total_branch_length}) is less than the minimum ({min}).",
297 | )
298 | if max is not None:
299 | assert_or_warn(
300 | total_branch_length <= max,
301 | warning,
302 | f"The total branch length ({total_branch_length}) is greater than the maximum ({max}).",
303 | )
304 |
305 | def assert_tip_regex(
306 | self,
307 | patterns: Union[List[str], str],
308 | *,
309 | warning: bool = False,
310 | ):
311 | """
312 | Asserts that all the tips match at least one of a list of regular expression patterns.
313 |
314 | Args:
315 | patterns (Union[List[str], str]): The regex pattern(s) to match to.
316 | If a string, then every tip must match that pattern.
317 | If a list then each tip must match at least one pattern in the list.
318 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
319 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
320 | """
321 | if isinstance(patterns, str):
322 | patterns = [patterns]
323 |
324 | compiled_patterns = [re.compile(pattern_string) for pattern_string in patterns]
325 |
326 | for tip in self.find_elements(terminal=True):
327 | matches = False
328 | for pattern in compiled_patterns:
329 | if pattern.search(tip.name):
330 | matches = True
331 | break
332 | assert_or_warn(
333 | matches,
334 | warning,
335 | f"Tip {tip.name} does not match any of the regex patterns in: '{patterns}'.",
336 | )
337 |
338 | def assert_tip_names(self, names: List[str], warning=False):
339 | """
340 | Asserts that the tree tip names match the supplied names.
341 |
342 | Args:
343 | names (List[str]): The names to match.
344 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
345 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
346 | """
347 | tip_names = [t.name for t in self.get_terminals()]
348 | assert_or_warn(
349 | len(tip_names) == len(names),
350 | warning,
351 | f"The tree contains {len(tip_names)} tips, however, {len(names)} names were supplied.",
352 | )
353 | diff = set(tip_names).difference(names)
354 | assert_or_warn(
355 | diff == set(),
356 | warning,
357 | f"There was a difference ({', '.join(diff)}) between the supplied names and tree tip names.",
358 | )
359 |
360 | def copy(self):
361 | """Makes a deep copy of this tree."""
362 | new_copy = copy.deepcopy(self)
363 | return new_copy
364 |
365 | def root_to_tip_regression(
366 | self,
367 | *,
368 | dates: Optional[Dict] = None,
369 | alignment: Optional[MultipleSeqAlignment] = None,
370 | sequence_length: Optional[int] = None,
371 | clock_filter: float = 3.0,
372 | gtr: Union[GTR, str] = 'JC69',
373 | root_method: str = 'least-squares',
374 | allow_negative_rate: bool = False,
375 | keep_root: bool = False,
376 | covariation: bool = False,
377 | ):
378 | """
379 | Performs a root-to-tip regression to determine how clock-like a tree is.
380 |
381 | Args:
382 | dates (Dict, optional): The tip dates as a dictionary with the tip name as the key and the date as the value.
383 | If not set, then it parses the tip dates to generate this dictionary using the `parse_tip_dates` method.
384 | alignment (MultipleSeqAlignment, optional): The alignment associated with this tree. Defaults to None.
385 | sequence_length (int, optional): The sequence length of the alignment. Defaults to None.
386 | clock_filter (float, optional): The number of interquartile ranges from regression beyond which to ignore.
387 | This provides a way to ignore tips that don't follow a loose clock.
388 | Defaults to 3.0.
389 | gtr (GTR, str, optional): The molecular evolution model. Defaults to 'JC69'.
390 | allow_negative_rate (bool, optional): Whether or not a negative clock rate is allowed.
391 | For trees with little temporal signal, it can be set to True to achieve essentially mid-point rooting.
392 | Defaults to False.
393 | keep_root (bool, optional): Keeps the current root of the tree.
394 | If False, then a new optimal root is sought. Defaults to False.
395 | root_method (str, optional): The method used to reroot the tree if `keep_root` is False.
396 | Valid choices are: 'min_dev', 'least-squares', and 'oldest'.
397 | Defaults to 'least-squares'.
398 | covariation (bool, optional): Accounts for covariation when estimating rates or rerooting. Defaults to False.
399 | """
400 |
401 | if covariation and (alignment is None and sequence_length is None):
402 | raise PhytestAssertion(
403 | "Cannot perform root-to-tip regression with `covariation` as True if no alignment of sequence length is provided."
404 | )
405 |
406 | dates = dates or self.parse_tip_dates()
407 |
408 | # Convert datetimes to floats with decimal years if necessary
409 | dates = {name: numeric_date(date) if isinstance(date, datetime) else date for name, date in dates.items()}
410 |
411 | regression = TreeTime(
412 | dates=dates,
413 | tree=self.copy(),
414 | aln=alignment,
415 | gtr=gtr,
416 | seq_len=sequence_length,
417 | )
418 |
419 | if clock_filter:
420 | bad_nodes = [node.name for node in regression.tree.get_terminals() if node.bad_branch]
421 | regression.clock_filter(n_iqd=clock_filter, reroot=root_method or 'least-squares')
422 | bad_nodes_after = [node.name for node in regression.tree.get_terminals() if node.bad_branch]
423 | if len(bad_nodes_after) > len(bad_nodes):
424 | warn(
425 | "The following leaves don't follow a loose clock and "
426 | "will be ignored in rate estimation:\n\t" + "\n\t".join(set(bad_nodes_after).difference(bad_nodes)),
427 | PhytestWarning,
428 | )
429 |
430 | if not keep_root:
431 | if covariation: # this requires branch length estimates
432 | regression.run(root="least-squares", max_iter=0, use_covariation=covariation)
433 |
434 | assert root_method in ['min_dev', 'least-squares', 'oldest']
435 | regression.reroot(root_method, force_positive=not allow_negative_rate)
436 |
437 | regression.get_clock_model(covariation=covariation)
438 | return regression
439 |
440 | def plot_root_to_tip(
441 | self,
442 | filename: Union[str, Path],
443 | *,
444 | format: Optional[str] = None,
445 | regression: Optional[TreeTime] = None,
446 | add_internal: bool = False,
447 | label: bool = True,
448 | ax=None,
449 | **kwargs,
450 | ):
451 | """
452 | Plots a root-to-tip regression.
453 |
454 | Args:
455 | filename (str, Path): The path to save the plot as an image.
456 | regression (TreeTime, optional): The root-to-tip regression for this tree.
457 | If None, then this regression is calculated using the `root_to_tip_regression` method.
458 | add_internal (bool): Whether or not to plot the internal node positions. Default: False.
459 | label (bool): Whether or not to label the points. Default: True.
460 | ax (matplotlib axes): Uses matplotlib axes if provided. Default: None.
461 | **kwargs: Keyword arguments for the `root_to_tip_regression` method.
462 | """
463 | regression = regression or self.root_to_tip_regression(**kwargs)
464 | from matplotlib import pyplot as plt
465 |
466 | regression.plot_root_to_tip(add_internal=add_internal, label=label, ax=ax)
467 | if isinstance(filename, Path):
468 | filename = str(filename)
469 |
470 | plt.savefig(filename, format=format)
471 |
472 | def assert_root_to_tip(
473 | self,
474 | *,
475 | regression: Optional[TreeTime] = None,
476 | min_r_squared: Optional[float] = None,
477 | min_rate: Optional[float] = None,
478 | max_rate: Optional[float] = None,
479 | min_root_date: Optional[float] = None,
480 | max_root_date: Optional[float] = None,
481 | valid_confidence: Optional[bool] = None,
482 | extra: Optional[List] = None,
483 | warning: bool = False,
484 | **kwargs,
485 | ):
486 | """
487 | Checks inferred values from a root-to-tip regression.
488 |
489 | Args:
490 | regression (TreeTime, optional): The root-to-tip regression for this tree.
491 | If None, then this regression is calculated using the `root_to_tip_regression` method.
492 | min_r_squared (float, optional): If set, then R^2 must be equal or greater than this value. Defaults to None.
493 | min_rate (float, optional): If set, then the clock rate must be equal or greater than this value. Defaults to None.
494 | max_rate (float, optional): If set, then the clock rate must be equal or less than this value. Defaults to None.
495 | min_root_date (float, optional): If set, then the interpolated root date must be equal or greater than this value. Defaults to None.
496 | max_root_date (float, optional): If set, then the interpolated root date must be equal or less than this value. Defaults to None.
497 | valid_confidence (bool, optional): Checks that the `valid_confidence` value in the regression is equal to this boolean value.
498 | Defaults to None which does not perform a check.
499 | warning (bool): If True, raise a warning instead of an exception. Defaults to False.
500 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
501 | extra (List): The pytest-html extra fixture for adding in root-to-tip regression plot.
502 | **kwargs: Keyword arguments for the `root_to_tip_regression` method.
503 | """
504 | regression = regression or self.root_to_tip_regression(**kwargs)
505 | clock_model = DateConversion.from_regression(regression.clock_model)
506 | root_date = clock_model.numdate_from_dist2root(0.0)
507 |
508 | if extra is not None:
509 | f = StringIO()
510 | self.plot_root_to_tip(filename=f, format="svg", regression=regression)
511 | svg = f.getvalue()
512 | extra.append(extras.html(svg))
513 |
514 | if min_r_squared is not None:
515 | assert_or_warn(
516 | clock_model.r_val**2 >= min_r_squared,
517 | warning,
518 | f"The R-squared value from the root-to-tip regression '{clock_model.r_val**2}' "
519 | "is less than the minimum allowed R-squarred '{min_r_squared}'.",
520 | )
521 |
522 | if min_rate is not None:
523 | assert_or_warn(
524 | clock_model.clock_rate >= min_rate,
525 | warning,
526 | f"Inferred clock rate '{clock_model.clock_rate}' is less than the minimum allowed clock rate '{min_rate}'.",
527 | )
528 |
529 | if max_rate is not None:
530 | assert_or_warn(
531 | clock_model.clock_rate <= max_rate,
532 | warning,
533 | f"Inferred clock rate '{clock_model.clock_rate}' is greater than the maximum allowed clock rate '{max_rate}'.",
534 | )
535 |
536 | if min_root_date is not None:
537 | assert_or_warn(
538 | root_date >= min_root_date,
539 | warning,
540 | f"Inferred root date '{root_date}' is less than the minimum allowed root date '{min_root_date}'.",
541 | )
542 |
543 | if max_root_date is not None:
544 | assert_or_warn(
545 | root_date <= max_root_date,
546 | warning,
547 | f"Inferred root date '{root_date}' is greater than the maximum allowed root date: '{max_root_date}'.",
548 | )
549 |
550 | if valid_confidence is not None:
551 | assert_or_warn(
552 | clock_model.valid_confidence == valid_confidence,
553 | warning,
554 | f"The `clock_model.valid_confidence` variable is not {valid_confidence}.",
555 | )
556 |
--------------------------------------------------------------------------------
/phytest/cli.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from typing import Optional
3 |
4 | from Bio.AlignIO import _FormatToIterator as supported_alignment_formats
5 | from Bio.Phylo._io import supported_formats as supported_tree_formats
6 | from Bio.SeqIO import _FormatToIterator as supported_sequence_formats
7 |
8 | supported_sequence_formats.update(supported_alignment_formats)
9 | import typer
10 |
11 | from .main import main
12 |
13 | app = typer.Typer()
14 |
15 |
16 | def sequence_format_callback(value: str):
17 | if value not in supported_sequence_formats:
18 | raise typer.BadParameter(
19 | f"'{value}' is not a valid sequence format. Must be one of {', '.join(supported_sequence_formats.keys())}."
20 | )
21 | return value
22 |
23 |
24 | def tree_format_callback(value: str):
25 | if value not in supported_tree_formats:
26 | raise typer.BadParameter(
27 | f"'{value}' is not a valid tree format. Must be one of {', '.join(supported_tree_formats.keys())}."
28 | )
29 | return value
30 |
31 |
32 | def data_format_callback(value: str):
33 | if value not in ['csv', 'tsv', 'excel']:
34 | raise typer.BadParameter(f"'{value}' is not a valid data format. Must be one of csv, tsv, excel.")
35 | return value
36 |
37 |
38 | @app.command(context_settings={"help_option_names": ["-h", "--help"]})
39 | def cli(
40 | testfile: Path = typer.Argument(..., help="Path to test file."),
41 | sequence: Optional[Path] = typer.Option(
42 | None, "--sequence", "-s", dir_okay=False, exists=True, help="Path to sequence file."
43 | ),
44 | sequence_format: Optional[str] = typer.Option(
45 | 'fasta',
46 | "--sequence-format",
47 | dir_okay=False,
48 | exists=True,
49 | help=f"{', '.join(supported_sequence_formats.keys())}.",
50 | callback=sequence_format_callback,
51 | ),
52 | tree: Optional[Path] = typer.Option(None, "--tree", "-t", dir_okay=False, exists=True, help="Path to tree file."),
53 | tree_format: Optional[str] = typer.Option(
54 | 'newick',
55 | "--tree-format",
56 | dir_okay=False,
57 | exists=True,
58 | help=f"{', '.join(supported_tree_formats.keys())}.",
59 | callback=tree_format_callback,
60 | ),
61 | data: Optional[Path] = typer.Option(None, "--data", "-d", dir_okay=False, exists=True, help="Path to data file."),
62 | data_format: Optional[str] = typer.Option(
63 | 'csv', "--data-format", dir_okay=False, exists=True, help="csv, tsv, excel.", callback=data_format_callback
64 | ),
65 | report: Optional[Path] = typer.Option(
66 | None, "--report", "-r", dir_okay=False, exists=False, help="Path to HTML report to generate."
67 | ),
68 | verbose: Optional[bool] = typer.Option(False, "--verbose", "-v", help="Verbose output"),
69 | expression: Optional[str] = typer.Option(
70 | None, "-k", help="Only run tests which match the given substring expression."
71 | ),
72 | cores: Optional[str] = typer.Option(
73 | None,
74 | "-n",
75 | help="Number of cores. Use 'auto' to spawn a number of workers processes equal to the number of available CPUs.",
76 | ),
77 | ):
78 | exit_code = main(
79 | testfile=testfile,
80 | sequence=sequence,
81 | sequence_format=sequence_format,
82 | tree=tree,
83 | tree_format=tree_format,
84 | data=data,
85 | data_format=data_format,
86 | verbose=verbose,
87 | report=report,
88 | expression=expression,
89 | cores=cores,
90 | )
91 | raise typer.Exit(code=exit_code)
92 |
--------------------------------------------------------------------------------
/phytest/main.py:
--------------------------------------------------------------------------------
1 | import inspect
2 | import os
3 | from pathlib import Path
4 | from typing import Optional
5 |
6 | import pytest
7 |
8 |
9 | def main(
10 | testfile: Optional[Path] = None,
11 | sequence: Optional[Path] = None,
12 | sequence_format: Optional[str] = 'fasta',
13 | tree: Optional[Path] = None,
14 | tree_format: Optional[str] = 'newick',
15 | data: Optional[Path] = None,
16 | data_format: Optional[str] = 'csv',
17 | verbose: bool = False,
18 | report: Optional[Path] = None,
19 | expression: Optional[str] = None,
20 | cores: Optional[str] = None,
21 | ):
22 | if not testfile:
23 | testfile = Path(os.path.abspath((inspect.stack()[1])[1]))
24 | args = [testfile]
25 | if not verbose:
26 | args.extend(["-ra", "--tb=no", "--no-header"])
27 | else:
28 | args.extend(["-v"])
29 | if sequence is not None:
30 | args.extend(["--sequence", sequence])
31 | args.extend(["--sequence-format", sequence_format])
32 | if tree is not None:
33 | args.extend(["--tree", tree])
34 | args.extend(["--tree-format", tree_format])
35 | if data is not None:
36 | args.extend(["--data", data])
37 | args.extend(["--data-format", data_format])
38 | if report:
39 | if not str(report).endswith('.html'):
40 | raise ValueError(f"Report must use .html extension.")
41 | args.extend([f"--html={report}", "--self-contained-html", f"--css={Path(__file__).parent / 'report/logo.css'}"])
42 | if expression:
43 | # only run tests which match the given substring expression
44 | # see the pytest help
45 | args.extend(["-k", expression])
46 | if cores:
47 | # parallel with pytest-xdist
48 | args.extend(["-n", cores])
49 | exit_code = pytest.main(args, plugins=['phytest'])
50 | return exit_code
51 |
--------------------------------------------------------------------------------
/phytest/report/logo.css:
--------------------------------------------------------------------------------
1 | h1 {
2 | background-image: url();
3 | background-repeat: no-repeat;
4 | display: flex;
5 | height: 3em;
6 | background-size: auto 2em;
7 | align-items: end;
8 | padding-left: 30px;
9 | }
10 |
--------------------------------------------------------------------------------
/phytest/utils.py:
--------------------------------------------------------------------------------
1 | from functools import partial
2 | from typing import List
3 | from warnings import warn
4 |
5 |
6 | class PhytestWarning(Warning):
7 | pass
8 |
9 |
10 | class PhytestAssertion(AssertionError):
11 | pass
12 |
13 |
14 | def assert_or_warn(statement, warning, *messages):
15 | if statement:
16 | return
17 |
18 | message = "\n".join(messages)
19 | if warning:
20 | warn(message, PhytestWarning)
21 | else:
22 | raise PhytestAssertion(message)
23 |
24 |
25 | def default_date_patterns():
26 | return [
27 | r"\d{4}\.?\d*$",
28 | r"\d{4}-\d{2}-\d{2}",
29 | ]
30 |
31 |
32 | class PhytestObject:
33 | def __init__(self, *args, **kwargs):
34 | super().__init__(*args, **kwargs)
35 |
36 | # Add partial methods with the warning flag set to True
37 | for method_name in self.assertion_method_names():
38 | method = getattr(self, method_name)
39 | truncated_name = method_name[len("assert") :]
40 | warning_name = f"warn{truncated_name}"
41 | setattr(self, warning_name, partial(method, warning=True))
42 |
43 | def assertion_method_names(self) -> List[str]:
44 | """
45 | Returns a list with the names of the methods used to make assertion statements.
46 | """
47 | return [
48 | attribute
49 | for attribute in dir(self)
50 | if attribute.startswith("assert_") and callable(getattr(self, attribute))
51 | ]
52 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "phytest"
3 | version = "1.4.1"
4 | description = "Quality control for phylogenetic pipelines using pytest"
5 | authors = ["Wytamma Wirth ", "Simon Mutch ", "Rob Turnbull "]
6 | readme = "README.rst"
7 | classifiers = [
8 | "Framework :: Pytest",
9 | ]
10 |
11 | [tool.poetry.scripts]
12 | phytest = "phytest.cli:app"
13 |
14 | [tool.poetry.dependencies]
15 | python = ">=3.8,<3.12"
16 | pytest = ">=7.1.1"
17 | pytest-sugar = ">=0.9.4"
18 | pytest-html = ">=3.1.1"
19 | typer = ">=0.4.1"
20 | biopython = ">=1.79"
21 | phylo-treetime = ">=0.8.6"
22 | scipy = ">=1.8.0"
23 | numpy = ">=1.22.3"
24 | pytest-xdist = ">=3.2.0"
25 |
26 | [tool.poetry.dev-dependencies]
27 | coverage = "^5.5"
28 | Sphinx = "^4.2.0"
29 | nbsphinx = "^0.8.7"
30 | sphinx-rtd-theme = "^1.0.0"
31 | sphinx-autobuild = "^2021.3.14"
32 | myst-parser = "^0.15.2"
33 | pre-commit = "^2.15.0"
34 | sphinx-copybutton = "^0.4.0"
35 | typing-extensions = "^4.1.1"
36 | pytest-cov = "^3.0.0"
37 | openpyxl = "^3.0.10"
38 |
39 | [build-system]
40 | requires = ["poetry-core>=1.0.0"]
41 | build-backend = "poetry.core.masonry.api"
42 |
43 | [tool.black]
44 | line-length = 120
45 | skip_string_normalization = true
46 |
47 | [tool.isort]
48 | profile = "black"
49 |
50 | [tool.pytest.ini_options]
51 | minversion = "6.0"
52 | testpaths = [
53 | "tests",
54 | ]
55 |
56 | [tool.poetry.plugins."pytest11"]
57 | "phytest" = "phytest"
58 |
59 |
60 | [tool.pylint.messages_control]
61 | disable = "C0330, C0326"
62 |
63 | [tool.pylint.format]
64 | max-line-length = "120"
65 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phytest-devs/phytest/b199e2fcc2bbd2a2e82ce71a73c472d28ea474ec/tests/__init__.py
--------------------------------------------------------------------------------
/tests/input/alignment.py:
--------------------------------------------------------------------------------
1 | from phytest import Alignment, Data, Sequence, Tree
2 |
3 |
4 | def test_alignment_length(alignment: Alignment):
5 | alignment.assert_length(length=4)
6 |
--------------------------------------------------------------------------------
/tests/input/basic.py:
--------------------------------------------------------------------------------
1 | from phytest import Alignment, Data, Sequence, Tree
2 |
3 |
4 | def test_length(sequence: Sequence):
5 | sequence.assert_length(length=100)
6 |
7 |
8 | def test_alignment_length(alignment: Alignment):
9 | alignment.assert_length(length=4)
10 |
11 |
12 | def test_tree_number_of_tips(tree: Tree):
13 | tree.assert_number_of_tips(4)
14 |
15 |
16 | def test_data_number_of_rows(data: Data):
17 | data.assert_match('name', 'Sequence_[A-D]')
18 |
--------------------------------------------------------------------------------
/tests/input/testfile1.py:
--------------------------------------------------------------------------------
1 | def test_dummy():
2 | assert 1 == 1
3 |
--------------------------------------------------------------------------------
/tests/test_alignments.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | import pytest
4 |
5 | from phytest import Alignment
6 | from phytest.utils import PhytestAssertion, PhytestWarning
7 |
8 |
9 | def test_assert_alignment_width():
10 | alignment_path = 'examples/data/invalid.fasta'
11 | alignment = Alignment.read(alignment_path, 'fasta')
12 | alignment.assert_width(width=100, min=99, max=101)
13 | with pytest.raises(
14 | PhytestAssertion,
15 | match=re.escape("The width of the alignment is 100.\nThis is not equal to the required width of 99."),
16 | ):
17 | alignment.assert_width(width=99)
18 | with pytest.raises(
19 | PhytestAssertion,
20 | match=re.escape("The width of the alignment is 100.\nThis is less than the minimum width of 101."),
21 | ):
22 | alignment.assert_width(min=101)
23 | with pytest.raises(
24 | PhytestAssertion,
25 | match=re.escape("The width of the alignment is 100.\nThis is greater than the maximum width of 99."),
26 | ):
27 | alignment.assert_width(max=99)
28 |
29 | with pytest.warns(
30 | PhytestWarning,
31 | match=re.escape("The width of the alignment is 100.\nThis is greater than the maximum width of 99."),
32 | ):
33 | alignment.warn_width(max=99)
34 |
35 |
36 | def test_assert_alignment_length():
37 | alignment_path = 'examples/data/invalid.fasta'
38 | alignment = Alignment.read(alignment_path, 'fasta')
39 | alignment.assert_length(length=3, min=2, max=4)
40 | with pytest.raises(
41 | PhytestAssertion,
42 | match=re.escape("The number of sequences in the alignment is 3.\nThis is less than required number of 1."),
43 | ):
44 | alignment.assert_length(length=1)
45 | with pytest.raises(
46 | PhytestAssertion,
47 | match=re.escape("The number of sequences in the alignment is 3.\nThis is less than the minimum 4."),
48 | ):
49 | alignment.assert_length(min=4)
50 | with pytest.raises(
51 | PhytestAssertion,
52 | match=re.escape("The number of sequences in the alignment is 3.\nThis is greater than the maximum 2."),
53 | ):
54 | alignment.assert_length(max=2)
55 |
--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import pytest
4 | from typer.testing import CliRunner
5 |
6 | from phytest.cli import app
7 |
8 | runner = CliRunner()
9 |
10 |
11 | def test_cli_help():
12 | result = runner.invoke(app, ["--help"])
13 | assert result.exit_code == 0
14 | assert "TESTFILE Path to test file. [required]" in result.stdout
15 |
16 |
17 | def test_cli_no_input_file(request: pytest.FixtureRequest):
18 | result = runner.invoke(app, [str(request.path.parent / "input/testfile1.py")])
19 | assert result.exit_code == 0
20 | assert "testfile1.py" in result.stdout
21 | assert "1 passed" in result.stdout
22 |
23 |
24 | def test_cli_basic(request: pytest.FixtureRequest):
25 | result = runner.invoke(
26 | app,
27 | [
28 | str(request.path.parent / "input/basic.py"),
29 | "-s",
30 | "examples/data/example.fasta",
31 | "-t",
32 | "examples/data/example.tree",
33 | "-d",
34 | "examples/data/example.csv",
35 | ],
36 | )
37 | assert "7 passed" in result.stdout
38 |
39 |
40 | def test_cli_basic_expression(request: pytest.FixtureRequest):
41 | result = runner.invoke(
42 | app,
43 | [
44 | str(request.path.parent / "input/basic.py"),
45 | "-s",
46 | "examples/data/example.fasta",
47 | "-t",
48 | "examples/data/example.tree",
49 | "-d",
50 | "examples/data/example.csv",
51 | "-k",
52 | "test_tree_number_of_tips",
53 | ],
54 | )
55 | assert "1 passed" in result.stdout
56 | assert "6 deselected" in result.stdout
57 |
58 |
59 | def test_cli_parallel(request: pytest.FixtureRequest):
60 | result = runner.invoke(
61 | app,
62 | [
63 | str(request.path.parent / "input/basic.py"),
64 | "-s",
65 | "examples/data/example.fasta",
66 | "-t",
67 | "examples/data/example.tree",
68 | "-d",
69 | "examples/data/example.csv",
70 | "-n",
71 | "2",
72 | ],
73 | )
74 | assert "2 workers" in result.stdout
75 |
76 |
77 | def test_cli_report(request: pytest.FixtureRequest):
78 | result = runner.invoke(
79 | app,
80 | [
81 | str(request.path.parent / "input/basic.py"),
82 | "-s",
83 | "examples/data/example.fasta",
84 | "-t",
85 | "examples/data/example.tree",
86 | "-d",
87 | "examples/data/example.csv",
88 | "-r",
89 | "pytest-report.html",
90 | ],
91 | )
92 | assert Path("pytest-report.html").exists()
93 |
94 |
95 | def test_cli_report_invalid(request: pytest.FixtureRequest):
96 | result = runner.invoke(
97 | app,
98 | [
99 | str(request.path.parent / "input/basic.py"),
100 | "-s",
101 | "examples/data/example.fasta",
102 | "-t",
103 | "examples/data/example.tree",
104 | "-d",
105 | "examples/data/example.csv",
106 | "-r",
107 | "pytest-report.txt",
108 | ],
109 | )
110 | assert isinstance(result.exception, ValueError)
111 | assert str(result.exception) == "Report must use .html extension."
112 |
113 |
114 | def test_cli_missing_sequence_file(request: pytest.FixtureRequest):
115 | result = runner.invoke(
116 | app,
117 | [
118 | str(request.path.parent / "input/basic.py"),
119 | "-t",
120 | "examples/data/example.tree",
121 | "-d",
122 | "examples/data/example.csv",
123 | "-v",
124 | ],
125 | )
126 | assert "ValueError: test_length requires a sequence file" in result.stdout
127 |
128 |
129 | def test_cli_invalid_tree_format(request: pytest.FixtureRequest):
130 | result = runner.invoke(
131 | app,
132 | [
133 | str(request.path.parent / "input/basic.py"),
134 | "-t",
135 | "examples/data/example.tree",
136 | "-s",
137 | "examples/data/example.fasta",
138 | "-d",
139 | "examples/data/example.csv",
140 | "--tree-format",
141 | "excel",
142 | "-v",
143 | ],
144 | )
145 | assert (
146 | "Error: Invalid value for '--tree-format': 'excel' is not a valid tree format. Must be one of newick, nexus, phyloxml, nexml."
147 | in result.stdout
148 | )
149 |
150 |
151 | def test_cli_invalid_data_format(request: pytest.FixtureRequest):
152 | result = runner.invoke(
153 | app,
154 | [
155 | str(request.path.parent / "input/basic.py"),
156 | "-t",
157 | "examples/data/example.tree",
158 | "-s",
159 | "examples/data/example.fasta",
160 | "-d",
161 | "examples/data/example.csv",
162 | "--data-format",
163 | "pdf",
164 | "-v",
165 | ],
166 | )
167 | assert (
168 | "Error: Invalid value for '--data-format': 'pdf' is not a valid data format. Must be one of csv, tsv, excel"
169 | in result.stdout
170 | )
171 |
172 |
173 | def test_cli_invalid_sequence_format(request: pytest.FixtureRequest):
174 | result = runner.invoke(
175 | app,
176 | [
177 | str(request.path.parent / "input/basic.py"),
178 | "-t",
179 | "examples/data/example.tree",
180 | "-s",
181 | "examples/data/example.fasta",
182 | "-d",
183 | "examples/data/example.csv",
184 | "--sequence-format",
185 | "pdf",
186 | "-v",
187 | ],
188 | )
189 | assert "Error: Invalid value for '--sequence-format': 'pdf' is not" in result.stdout
190 |
191 |
192 | def test_cli_invalid_data(request: pytest.FixtureRequest):
193 | result = runner.invoke(
194 | app,
195 | [
196 | str(request.path.parent / "input/basic.py"),
197 | "-t",
198 | "phytest/bio/tree.py", # should not be read
199 | "-d",
200 | "phytest/bio/data.py",
201 | "-v",
202 | ],
203 | )
204 | assert "ValueError: test_length requires a sequence file" in result.stdout
205 |
206 |
207 | def test_cli_missing_tree_file(request: pytest.FixtureRequest):
208 | result = runner.invoke(
209 | app,
210 | [
211 | str(request.path.parent / "input/basic.py"),
212 | "-s",
213 | "examples/data/example.fasta",
214 | "-d",
215 | "examples/data/example.csv",
216 | "-v",
217 | ],
218 | )
219 | assert "ValueError: test_tree_number_of_tips requires a tree file" in result.stdout
220 |
221 |
222 | def test_cli_missing_data_file(request: pytest.FixtureRequest):
223 | result = runner.invoke(
224 | app,
225 | [
226 | str(request.path.parent / "input/basic.py"),
227 | "-s",
228 | "examples/data/example.fasta",
229 | "-t",
230 | "examples/data/example.tree",
231 | "-v",
232 | ],
233 | )
234 | assert "ValueError: test_data_number_of_rows requires a data file" in result.stdout
235 |
236 |
237 | def test_cli_missing_alignment_file(request: pytest.FixtureRequest):
238 | result = runner.invoke(
239 | app,
240 | [
241 | str(request.path.parent / "input/alignment.py"),
242 | "-t",
243 | "examples/data/example.tree",
244 | "-d",
245 | "examples/data/example.csv",
246 | "-v",
247 | ],
248 | )
249 | assert "ValueError: test_alignment_length requires an alignment file" in result.stdout
250 |
251 |
252 | def test_cli_alignment(request: pytest.FixtureRequest):
253 | result = runner.invoke(
254 | app,
255 | [
256 | str(request.path.parent / "input/alignment.py"),
257 | "-s",
258 | "examples/data/example.fasta",
259 | "-v",
260 | ],
261 | )
262 | assert "1 passed" in result.stdout
263 |
--------------------------------------------------------------------------------
/tests/test_data.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | import pytest
4 |
5 | from phytest import Data
6 | from phytest.utils import PhytestAssertion, PhytestWarning
7 |
8 |
9 | def test_data_read():
10 | data_path = 'examples/data/example.csv'
11 | data = Data.read(data_path, 'csv')
12 | data_path = 'examples/data/example.tsv'
13 | data = Data.read(data_path, 'tsv')
14 | data_path = 'examples/data/example.xlsx'
15 | data = Data.read(data_path, 'excel')
16 |
17 |
18 | def test_data_read_invalid():
19 | data_path = 'examples/data/example.csv'
20 | with pytest.raises(ValueError, match="Data format must be one of csv, tsv, excel"):
21 | Data.read(data_path, 'txt')
22 |
23 |
24 | def test_assert_data_contains():
25 | data_path = 'examples/data/example.csv'
26 | data = Data.read(data_path, 'csv')
27 | data.assert_contains('name', 'Sequence_A')
28 | with pytest.raises(
29 | PhytestAssertion,
30 | match=re.escape(
31 | "The values of column 'name' are '['Sequence_A' 'Sequence_B' 'Sequence_C' 'Sequence_D']'.\nThe column 'name' does not contain 'Sequence_X'."
32 | ),
33 | ):
34 | data.assert_contains('name', 'Sequence_X')
35 |
36 |
37 | def test_assert_data_match():
38 | data_path = 'examples/data/example.csv'
39 | data = Data.read(data_path, 'csv')
40 | data.assert_match('name', 'Sequence_.')
41 | with pytest.raises(
42 | PhytestAssertion,
43 | match=re.escape(
44 | "The values of column 'name' are '['Sequence_A' 'Sequence_B' 'Sequence_C' 'Sequence_D']'.\nThe row(s) '[3]' of the column 'name' do not match the pattern 'Sequence_[A-C]'."
45 | ),
46 | ):
47 | data.assert_match('name', 'Sequence_[A-C]')
48 |
49 |
50 | def test_assert_data_allowed_columns():
51 | data_path = 'examples/data/example.csv'
52 | data = Data.read(data_path, 'csv')
53 | data.assert_columns(['name', 'date', 'sequence'])
54 | with pytest.raises(
55 | PhytestAssertion,
56 | match=re.escape("The columns '['date']' are not in the list of allowed columns '['name', 'sequence']'."),
57 | ):
58 | data.assert_columns(['name', 'sequence'])
59 | with pytest.raises(
60 | PhytestAssertion,
61 | match=re.escape("The column names do not exactly match the list of allowed columns"),
62 | ):
63 | data.assert_columns(['name', 'date', 'sequence'], exact=True)
64 |
65 |
66 | def test_assert_data_allowed_values():
67 | data_path = 'examples/data/example.csv'
68 | data = Data.read(data_path, 'csv')
69 | data.assert_values('name', ['Sequence_A', 'Sequence_B', 'Sequence_C', 'Sequence_D', 'Sequence_E'])
70 | with pytest.raises(
71 | PhytestAssertion,
72 | match=re.escape(
73 | "The row(s) '[3]' of the column 'name' are not in the list of allowed values '['Sequence_A', 'Sequence_B', 'Sequence_C']"
74 | ),
75 | ):
76 | data.assert_values('name', ['Sequence_A', 'Sequence_B', 'Sequence_C'])
77 |
78 | # exact
79 | with pytest.raises(
80 | PhytestAssertion,
81 | match=re.escape("The values column 'name' do not exactly match the allowed values"),
82 | ):
83 | data.assert_values('name', ['Sequence_A', 'Sequence_B', 'Sequence_C', 'Sequence_D', 'Sequence_E'], exact=True)
84 |
85 | # allow nan
86 | data.replace('Sequence_D', float('nan'), inplace=True)
87 | data.assert_values('name', ['Sequence_A', 'Sequence_B', 'Sequence_C'], allow_nan=True)
88 | with pytest.raises(
89 | PhytestAssertion,
90 | match=re.escape(
91 | "The row(s) '[3]' of the column 'name' are not in the list of allowed values '['Sequence_A', 'Sequence_B', 'Sequence_C']'."
92 | ),
93 | ):
94 | data.assert_values('name', ['Sequence_A', 'Sequence_B', 'Sequence_C'])
95 |
96 |
97 | def test_assert_range():
98 | data_path = 'examples/data/example.csv'
99 | data = Data.read(data_path, 'csv')
100 | data['value'] = [1, 2, 3, 4]
101 | data.assert_range('value', min=1, max=5)
102 | with pytest.raises(
103 | PhytestAssertion,
104 | match=re.escape("The maximum value of column 'value' is '4', which is greater than '3'."),
105 | ):
106 | data.assert_range('value', max=3)
107 | with pytest.raises(
108 | PhytestAssertion,
109 | match=re.escape("The minimum value of column 'value' is '1', which is less than '2'."),
110 | ):
111 | data.assert_range('value', min=2)
112 |
--------------------------------------------------------------------------------
/tests/test_main.py:
--------------------------------------------------------------------------------
1 | from unittest.mock import patch
2 |
3 | import pytest
4 |
5 | from phytest import main
6 |
7 |
8 | def test_main_basic(request: pytest.FixtureRequest):
9 | result = main(
10 | str(request.path.parent / "input/basic.py"),
11 | sequence="examples/data/example.fasta",
12 | tree="examples/data/example.tree",
13 | data="examples/data/example.csv",
14 | )
15 | assert result.value == 0
16 |
17 |
18 | def test_tree_not_found(request: pytest.FixtureRequest, capsys):
19 | result = main(
20 | str(request.path.parent / "input/basic.py"),
21 | sequence="examples/data/example.fasta",
22 | tree="examples/data/NOTFOUND.tree",
23 | data="examples/data/example.csv",
24 | )
25 | captured = capsys.readouterr()
26 | assert "FileNotFoundError: Unable to locate requested t" in captured.out
27 | assert result.value != 0
28 |
29 |
30 | def test_data_not_found(request: pytest.FixtureRequest, capsys):
31 | result = main(
32 | str(request.path.parent / "input/basic.py"),
33 | sequence="examples/data/example.fasta",
34 | tree="examples/data/example.tree",
35 | data="examples/data/NOTFOUND.csv",
36 | )
37 | captured = capsys.readouterr()
38 | assert "FileNotFoundError: Unable to locate requested d" in captured.out
39 | assert result.value != 0
40 |
41 |
42 | def test_sequence_not_found(request: pytest.FixtureRequest, capsys):
43 | result = main(
44 | str(request.path.parent / "input/basic.py"),
45 | sequence="examples/data/NOTFOUND.fasta",
46 | tree="examples/data/example.tree",
47 | data="examples/data/example.csv",
48 | )
49 | captured = capsys.readouterr()
50 | assert "FileNotFoundError: Unable to locate requested s" in captured.out
51 | assert result.value != 0
52 |
53 |
54 | def test_alignment_not_found(capsys):
55 | result = main(
56 | "examples/example.py",
57 | sequence="examples/data/NOTFOUND.fasta",
58 | )
59 | captured = capsys.readouterr()
60 | assert "FileNotFoundError: Unable to locate requested al" in captured.out
61 | assert result.value != 0
62 |
63 |
64 | @patch.object(pytest, 'main')
65 | def test_auto_testfile(pytest_main):
66 | main()
67 | pytest_main.assert_called_once()
68 | assert pytest_main.mock_calls[0].args[0][0].name == "test_main.py"
69 |
--------------------------------------------------------------------------------
/tests/test_self_contained.py:
--------------------------------------------------------------------------------
1 | import subprocess
2 |
3 |
4 | def test_self_contained():
5 | testfile = 'examples/self_contained.py'
6 | p = subprocess.Popen(f"python {testfile}", stdout=subprocess.PIPE, shell=True)
7 | (output, err) = p.communicate()
8 | status = p.wait()
9 | assert status == 0
10 | assert '52 passed' in str(output)
11 |
--------------------------------------------------------------------------------
/tests/test_sequences.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | import pytest
4 | from Bio.Seq import Seq
5 |
6 | from phytest import Sequence
7 | from phytest.utils import PhytestAssertion, PhytestWarning
8 |
9 |
10 | def test_assert_valid_alphabet():
11 | sequence = Sequence(
12 | Seq("ACGTACGTACGT"),
13 | id="DNAID",
14 | name="TEST",
15 | description="Test dna sequence",
16 | )
17 | protein = Sequence(
18 | Seq("MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF"),
19 | id="PROTEINID",
20 | name="TEST Protein",
21 | description="Test protein sequence",
22 | )
23 | sequence.assert_valid_alphabet()
24 | with pytest.raises(
25 | PhytestAssertion,
26 | match="Invalid pattern found in 'DNAID'.\nCharacter 'G' at position 3 found which is not in alphabet 'ABCDE'.",
27 | ):
28 | sequence.assert_valid_alphabet(alphabet="ABCDE")
29 |
30 | protein.assert_valid_alphabet(alphabet="ACDEFGHIKLMNPQRSTVWYXBZJ")
31 | with pytest.raises(
32 | PhytestAssertion,
33 | match="Invalid pattern found in 'PROTEINID'.\nCharacter 'M' at position 1 found which is not in alphabet 'ATCGN-'.",
34 | ):
35 | protein.assert_valid_alphabet()
36 |
37 |
38 | def test_assert_length():
39 | sequence = Sequence(
40 | Seq("A" * 100),
41 | id="DNAID",
42 | name="TEST",
43 | description="Test dna sequence",
44 | )
45 | sequence.assert_length(length=100, min=99, max=101)
46 |
47 | with pytest.raises(
48 | PhytestAssertion, match=re.escape("Sequence length of 'DNAID' (100) is not equal to the required length of 1.")
49 | ):
50 | sequence.assert_length(length=1)
51 |
52 | with pytest.raises(
53 | PhytestAssertion, match=re.escape("Sequence length of 'DNAID' (100) is less than the minimum 101.")
54 | ):
55 | sequence.assert_length(min=101)
56 |
57 | with pytest.raises(
58 | PhytestAssertion, match=re.escape("Sequence length of 'DNAID' (100) is greater than the maximum 99.")
59 | ):
60 | sequence.assert_length(max=99)
61 |
62 | with pytest.warns(
63 | PhytestWarning, match=re.escape("Sequence length of 'DNAID' (100) is greater than the maximum 99.")
64 | ):
65 | sequence.warn_length(max=99)
66 |
67 |
68 | def test_assert_count():
69 | sequence = Sequence(
70 | Seq("ATG" * 100),
71 | id="DNAID",
72 | name="TEST",
73 | description="Test dna sequence",
74 | )
75 | sequence.assert_count(pattern='A', count=100, min=99, max=101)
76 | with pytest.raises(
77 | PhytestAssertion,
78 | match=re.escape(
79 | "Sequence 'DNAID' matches pattern 'A' 100 time(s).\nThis is not equal to the required number of 1."
80 | ),
81 | ):
82 | sequence.assert_count(pattern='A', count=1)
83 | with pytest.raises(
84 | PhytestAssertion,
85 | match=re.escape("Sequence 'DNAID' matches pattern 'A' 100 time(s).\nThis is less than the minimum 101."),
86 | ):
87 | sequence.assert_count(pattern='A', min=101)
88 | with pytest.raises(
89 | PhytestAssertion,
90 | match=re.escape("Sequence 'DNAID' matches pattern 'A' 100 time(s).\nThis is greater than the maximum 99."),
91 | ):
92 | sequence.assert_count(pattern='A', max=99)
93 |
94 |
95 | def test_assert_percent():
96 | sequence = Sequence(
97 | Seq("ATGC" * 100),
98 | id="DNAID",
99 | name="TEST",
100 | description="Test dna sequence",
101 | )
102 | sequence.assert_percent(nucleotide='A', percent=25, min=24.9, max=25.1)
103 | with pytest.raises(
104 | PhytestAssertion,
105 | match=re.escape(
106 | "Sequence 'DNAID' contains 25.0 percent 'A'.\nThis is not equal to the required percentage of 24."
107 | ),
108 | ):
109 | sequence.assert_percent(nucleotide='A', percent=24)
110 | with pytest.raises(
111 | PhytestAssertion,
112 | match=re.escape("Sequence 'DNAID' contains 25.0 percent 'A'.\nThis is less than the minimum 25.1."),
113 | ):
114 | sequence.assert_percent(nucleotide='A', min=25.1)
115 | with pytest.raises(
116 | PhytestAssertion,
117 | match=re.escape("Sequence 'DNAID' contains 25.0 percent 'A'.\nThis is greater than the maximum 24.9."),
118 | ):
119 | sequence.assert_percent(nucleotide='A', max=24.9)
120 |
121 | sequence.assert_percent(nucleotide='N', percent=0.0)
122 |
123 |
124 | def test_assert_percent_zero_length():
125 | sequence = Sequence(
126 | Seq(""),
127 | id="empty",
128 | name="empty",
129 | description="empty dna sequence",
130 | )
131 | sequence.assert_percent(nucleotide='A', percent=0.0)
132 |
133 |
134 | def test_assert_percent_errors():
135 | sequence = Sequence(
136 | Seq("ATGC" * 100),
137 | id="DNAID",
138 | name="TEST",
139 | description="Test dna sequence",
140 | )
141 | with pytest.raises(
142 | ValueError,
143 | match=re.escape("The length of the requested nucleotide"),
144 | ):
145 | sequence.assert_percent(nucleotide='AA', percent=25, min=24.9, max=25.1)
146 |
147 | with pytest.raises(
148 | ValueError,
149 | match=re.escape("Nucleotide must be str or list and cannot be of type"),
150 | ):
151 | sequence.assert_percent(nucleotide=10, percent=25, min=24.9, max=25.1)
152 |
153 |
154 | def test_assert_percent_N():
155 | sequence = Sequence(
156 | Seq("ATNN" * 100),
157 | id="DNAID",
158 | name="TEST",
159 | description="Test dna sequence",
160 | )
161 | sequence.assert_percent_N(percent=50, min=49.9, max=50.1)
162 | with pytest.raises(
163 | PhytestAssertion,
164 | match=re.escape(
165 | "Sequence 'DNAID' contains 50.0 percent 'N, n'.\nThis is not equal to the required percentage of 49."
166 | ),
167 | ):
168 | sequence.assert_percent_N(percent=49)
169 | with pytest.raises(
170 | PhytestAssertion,
171 | match=re.escape("Sequence 'DNAID' contains 50.0 percent 'N, n'.\nThis is less than the minimum 50.1."),
172 | ):
173 | sequence.assert_percent_N(min=50.1)
174 | with pytest.raises(
175 | PhytestAssertion,
176 | match=re.escape("Sequence 'DNAID' contains 50.0 percent 'N, n'.\nThis is greater than the maximum 49.9."),
177 | ):
178 | sequence.assert_percent_N(max=49.9)
179 |
180 |
181 | def test_assert_percent_gaps():
182 | sequence = Sequence(
183 | Seq("AT--" * 100),
184 | id="DNAID",
185 | name="TEST",
186 | description="Test dna sequence",
187 | )
188 | sequence.assert_percent_gaps(percent=50, min=49.9, max=50.1)
189 | with pytest.raises(
190 | PhytestAssertion,
191 | match=re.escape(
192 | "Sequence 'DNAID' contains 50.0 percent '-'.\nThis is not equal to the required percentage of 49."
193 | ),
194 | ):
195 | sequence.assert_percent_gaps(percent=49)
196 | with pytest.raises(
197 | PhytestAssertion,
198 | match=re.escape("Sequence 'DNAID' contains 50.0 percent '-'.\nThis is less than the minimum 50.1."),
199 | ):
200 | sequence.assert_percent_gaps(min=50.1)
201 | with pytest.raises(
202 | PhytestAssertion,
203 | match=re.escape("Sequence 'DNAID' contains 50.0 percent '-'.\nThis is greater than the maximum 49.9."),
204 | ):
205 | sequence.assert_percent_gaps(max=49.9)
206 |
207 |
208 | def test_assert_percent_GC():
209 | sequence = Sequence(
210 | Seq("ATGC" * 100),
211 | id="DNAID",
212 | name="TEST",
213 | description="Test dna sequence",
214 | )
215 | sequence.assert_percent_GC(percent=50, min=49.9, max=50.1)
216 | with pytest.raises(
217 | PhytestAssertion,
218 | match=re.escape(
219 | "Sequence 'DNAID' contains 50.0 percent 'G, C, g, c, S, s'.\nThis is not equal to the required percentage of 49."
220 | ),
221 | ):
222 | sequence.assert_percent_GC(percent=49)
223 | with pytest.raises(
224 | PhytestAssertion,
225 | match=re.escape(
226 | "Sequence 'DNAID' contains 50.0 percent 'G, C, g, c, S, s'.\nThis is less than the minimum 50.1."
227 | ),
228 | ):
229 | sequence.assert_percent_GC(min=50.1)
230 | with pytest.raises(
231 | PhytestAssertion,
232 | match=re.escape(
233 | "Sequence 'DNAID' contains 50.0 percent 'G, C, g, c, S, s'.\nThis is greater than the maximum 49.9."
234 | ),
235 | ):
236 | sequence.assert_percent_GC(max=49.9)
237 |
238 |
239 | def test_assert_count_Ns():
240 | sequence = Sequence(
241 | Seq("ATGN" * 100),
242 | id="DNAID",
243 | name="TEST",
244 | description="Test dna sequence",
245 | )
246 | sequence.assert_count_Ns(count=100, min=99, max=101)
247 | with pytest.raises(
248 | PhytestAssertion,
249 | match=re.escape(
250 | "Sequence 'DNAID' matches pattern 'N' 100 time(s).\nThis is not equal to the required number of 1."
251 | ),
252 | ):
253 | sequence.assert_count_Ns(count=1)
254 | with pytest.raises(
255 | PhytestAssertion,
256 | match=re.escape("Sequence 'DNAID' matches pattern 'N' 100 time(s).\nThis is less than the minimum 101."),
257 | ):
258 | sequence.assert_count_Ns(min=101)
259 | with pytest.raises(
260 | PhytestAssertion,
261 | match=re.escape("Sequence 'DNAID' matches pattern 'N' 100 time(s).\nThis is greater than the maximum 99."),
262 | ):
263 | sequence.assert_count_Ns(max=99)
264 |
265 |
266 | def test_assert_count_gaps():
267 | sequence = Sequence(
268 | Seq("ATG-" * 100),
269 | id="DNAID",
270 | name="TEST",
271 | description="Test dna sequence",
272 | )
273 | sequence.assert_count_gaps(count=100, min=99, max=101)
274 | with pytest.raises(
275 | PhytestAssertion,
276 | match=re.escape(
277 | "Sequence 'DNAID' matches pattern '-' 100 time(s).\nThis is not equal to the required number of 1."
278 | ),
279 | ):
280 | sequence.assert_count_gaps(count=1)
281 | with pytest.raises(
282 | PhytestAssertion,
283 | match=re.escape("Sequence 'DNAID' matches pattern '-' 100 time(s).\nThis is less than the minimum 101."),
284 | ):
285 | sequence.assert_count_gaps(min=101)
286 | with pytest.raises(
287 | PhytestAssertion,
288 | match=re.escape("Sequence 'DNAID' matches pattern '-' 100 time(s).\nThis is greater than the maximum 99."),
289 | ):
290 | sequence.assert_count_gaps(max=99)
291 |
292 |
293 | def test_assert_sequence_longest_stretch():
294 | sequence = Sequence(
295 | Seq("A" * 10 + "-" * 3 + "N" * 10),
296 | id="DNAID",
297 | name="TEST",
298 | description="Test dna sequence",
299 | )
300 | sequence.assert_longest_stretch(pattern='A', count=10, min=9, max=11)
301 | with pytest.raises(
302 | PhytestAssertion,
303 | match=re.escape(
304 | "The longest stretch of pattern 'A' in sequence 'DNAID' is 10.\nThis is not equal to the required number of 1"
305 | ),
306 | ):
307 | sequence.assert_longest_stretch(pattern='A', count=1)
308 | with pytest.raises(
309 | PhytestAssertion,
310 | match=re.escape(
311 | "The longest stretch of pattern 'A' in sequence 'DNAID' is 10.\nThis is less than the minimum 11."
312 | ),
313 | ):
314 | sequence.assert_longest_stretch(pattern='A', min=11)
315 | with pytest.raises(
316 | PhytestAssertion,
317 | match=re.escape(
318 | "The longest stretch of pattern 'A' in sequence 'DNAID' is 10.\nThis is greater than the maximum 9."
319 | ),
320 | ):
321 | sequence.assert_longest_stretch(pattern='A', max=9)
322 |
323 |
324 | def test_assert_sequence_longest_Ns():
325 | sequence = Sequence(
326 | Seq("A" * 10 + "-" * 3 + "N" * 10),
327 | id="DNAID",
328 | name="TEST",
329 | description="Test dna sequence",
330 | )
331 | sequence.assert_longest_stretch_Ns(count=10, min=9, max=11)
332 | with pytest.raises(
333 | PhytestAssertion,
334 | match=re.escape(
335 | "The longest stretch of pattern 'N' in sequence 'DNAID' is 10.\nThis is not equal to the required number of 1."
336 | ),
337 | ):
338 | sequence.assert_longest_stretch_Ns(count=1)
339 | with pytest.raises(
340 | PhytestAssertion,
341 | match=re.escape(
342 | "The longest stretch of pattern 'N' in sequence 'DNAID' is 10.\nThis is less than the minimum 11."
343 | ),
344 | ):
345 | sequence.assert_longest_stretch_Ns(min=11)
346 | with pytest.raises(
347 | PhytestAssertion,
348 | match=re.escape(
349 | "The longest stretch of pattern 'N' in sequence 'DNAID' is 10.\nThis is greater than the maximum 9."
350 | ),
351 | ):
352 | sequence.assert_longest_stretch_Ns(max=9)
353 |
354 |
355 | def test_assert_sequence_longest_gaps():
356 | sequence = Sequence(
357 | Seq("A" * 10 + "-" * 3 + "N" * 10),
358 | id="DNAID",
359 | name="TEST",
360 | description="Test dna sequence",
361 | )
362 | sequence.assert_longest_stretch_gaps(count=3, min=2, max=4)
363 | with pytest.raises(
364 | PhytestAssertion,
365 | match=re.escape(
366 | "The longest stretch of pattern '-' in sequence 'DNAID' is 3.\nThis is not equal to the required number of 1."
367 | ),
368 | ):
369 | sequence.assert_longest_stretch_gaps(count=1)
370 | with pytest.raises(
371 | PhytestAssertion,
372 | match=re.escape(
373 | "The longest stretch of pattern '-' in sequence 'DNAID' is 3.\nThis is less than the minimum 4."
374 | ),
375 | ):
376 | sequence.assert_longest_stretch_gaps(min=4)
377 | with pytest.raises(
378 | PhytestAssertion,
379 | match=re.escape(
380 | "The longest stretch of pattern '-' in sequence 'DNAID' is 3.\nThis is greater than the maximum 2."
381 | ),
382 | ):
383 | sequence.assert_longest_stretch_gaps(max=2)
384 |
385 |
386 | def test_assert_sequence_startswith():
387 | sequence = Sequence(
388 | Seq("ATG" + "-" * 3 + "UGA"),
389 | id="DNAID",
390 | name="TEST",
391 | description="Test dna sequence",
392 | )
393 | sequence.assert_startswith(pattern='ATG')
394 | with pytest.raises(PhytestAssertion, match=re.escape("Sequence 'DNAID' does not start with 'UGA'.")):
395 | sequence.assert_startswith(pattern='UGA')
396 |
397 |
398 | def test_assert_sequence_endswith():
399 | sequence = Sequence(
400 | Seq("ATG" + "-" * 3 + "UGA"),
401 | id="DNAID",
402 | name="TEST",
403 | description="Test dna sequence",
404 | )
405 | sequence.assert_endswith(pattern='UGA')
406 | with pytest.raises(PhytestAssertion, match=re.escape("Sequence 'DNAID' does not end with 'ATG'.")):
407 | sequence.assert_endswith(pattern='ATG')
408 |
409 |
410 | def test_assert_sequence_contains():
411 | sequence = Sequence(
412 | Seq("ATG" + "TGACGT" + "UGA"),
413 | id="DNAID",
414 | name="TEST",
415 | description="Test dna sequence",
416 | )
417 | sequence.assert_contains(pattern='TGACGT')
418 | with pytest.raises(
419 | PhytestAssertion,
420 | match=re.escape("Sequence 'DNAID' matches pattern 'CAGCTG' 0 time(s).\nThis is less than the minimum 1."),
421 | ):
422 | sequence.assert_contains(pattern='CAGCTG')
423 |
--------------------------------------------------------------------------------
/tests/test_trees.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | from datetime import datetime
3 | from pathlib import Path
4 | from tempfile import NamedTemporaryFile
5 |
6 | import pytest
7 |
8 | from phytest import Tree
9 | from phytest.utils import PhytestAssertion, PhytestWarning, default_date_patterns
10 |
11 |
12 | def test_tips_property():
13 | tree = Tree.read_str("(A:0.1,B:0.2);")
14 | assert [t.name for t in tree.tips] == ['A', 'B']
15 |
16 |
17 | def test_assert_tree_number_of_tips():
18 | tree = Tree.read_str(
19 | "(Bovine:0.69395,(Hylobates:0.36079,(Pongo:0.33636,(G._Gorilla:0.17147, (P._paniscus:0.19268,H._sapiens:0.11927):0.08386):0.06124):0.15057):0.54939, Rodent:1.21460);"
20 | )
21 | tree.assert_number_of_tips(tips=7, min=6, max=8)
22 | with pytest.raises(AssertionError):
23 | tree.assert_number_of_tips(tips=1)
24 | with pytest.raises(AssertionError):
25 | tree.assert_number_of_tips(min=8)
26 | with pytest.raises(AssertionError):
27 | tree.assert_number_of_tips(max=6)
28 |
29 |
30 | def test_assert_unique_tips():
31 | tree = Tree.read_str(
32 | "(Bovine:0.69395,(Hylobates:0.36079,(Pongo:0.33636,(G._Gorilla:0.17147, (P._paniscus:0.19268,H._sapiens:0.11927):0.08386):0.06124):0.15057):0.54939, Rodent:1.21460);"
33 | )
34 | tree.assert_unique_tips()
35 | tree = Tree.read_str(
36 | "(Bovine:0.69395,(Hylobates:0.36079,(Pongo:0.33636,(G._Gorilla:0.17147, (P._paniscus:0.19268,H._sapiens:0.11927):0.08386):0.06124):0.15057):0.54939, Bovine:1.21460);"
37 | )
38 | with pytest.raises(AssertionError):
39 | tree.assert_unique_tips()
40 |
41 |
42 | def test_assert_tree_is_rooted():
43 | tree = Tree.read_str("((A:0.1,B:0.2):0.3,(C:0.3,D:0.4):0.5);")
44 | with pytest.raises(AssertionError):
45 | tree.assert_is_rooted()
46 | tree.root_at_midpoint()
47 | tree.assert_is_rooted()
48 |
49 |
50 | def test_assert_tree_is_bifurcating():
51 | tree = Tree.read_str(
52 | "(Bovine:0.69395,(Hylobates:0.36079,(Pongo:0.33636,(G._Gorilla:0.17147, (P._paniscus:0.19268,H._sapiens:0.11927):0.08386):0.06124):0.15057):0.54939, Rodent:1.21460);"
53 | )
54 | tree.assert_is_bifurcating()
55 |
56 |
57 | def test_assert_tree_is_monophyletic():
58 | tree = Tree.read_str(
59 | "(Bovine:0.69395,(Hylobates:0.36079,(Pongo:0.33636,(G._Gorilla:0.17147, (P._paniscus:0.19268,H._sapiens:0.11927):0.08386):0.06124):0.15057):0.54939, Rodent:1.21460);"
60 | )
61 | print(tree.root)
62 | tips = [tip for tip in tree.get_terminals() if tip.name in ("P._paniscus", "H._sapiens")]
63 | tree.assert_is_monophyletic(tips)
64 | with pytest.raises(AssertionError):
65 | tips = [tip for tip in tree.get_terminals() if tip.name in ("Pongo", "H._sapiens")]
66 | tree.assert_is_monophyletic(tips)
67 |
68 |
69 | def test_assert_branch_lengths():
70 | tree = Tree.read_str(
71 | "(Bovine:1,(Hylobates:1,(Pongo:1,(G._Gorilla:1, (P._paniscus:1,H._sapiens:1):1):1):1):1, Rodent:1);"
72 | )
73 | tree.assert_branch_lengths(min=0, max=1)
74 | with pytest.raises(AssertionError):
75 | tree.assert_branch_lengths(min=2)
76 | with pytest.raises(AssertionError):
77 | tree.assert_branch_lengths(max=0)
78 |
79 |
80 | def test_assert_no_negative_branch_lengths():
81 | tree = Tree.read_str(
82 | "(Bovine:1,(Hylobates:1,(Pongo:1,(G._Gorilla:1, (P._paniscus:1,H._sapiens:1):1):1):1):1, Rodent:1);"
83 | )
84 | tree.assert_no_negatives()
85 | tree = Tree.read_str(
86 | "(Bovine:1,(Hylobates:1,(Pongo:1,(G._Gorilla:1, (P._paniscus:1,H._sapiens:1):-1):1):1):1, Rodent:1);"
87 | )
88 | with pytest.raises(AssertionError):
89 | tree.assert_no_negatives()
90 |
91 |
92 | def test_assert_terminal_branch_lengths():
93 | tree = Tree.read_str(
94 | "(Bovine:1,(Hylobates:1,(Pongo:1,(G._Gorilla:1, (P._paniscus:1,H._sapiens:1):1):5):1):1, Rodent:1);"
95 | )
96 | tree.assert_terminal_branch_lengths(min=0, max=1)
97 | with pytest.raises(AssertionError):
98 | tree.assert_terminal_branch_lengths(min=2)
99 | with pytest.raises(AssertionError):
100 | tree.assert_terminal_branch_lengths(max=0)
101 |
102 |
103 | def test_assert_internal_branch_lengths():
104 | tree = Tree.read_str(
105 | "(Bovine:4,(Hylobates:1,(Pongo:1,(G._Gorilla:1, (P._paniscus:1,H._sapiens:1):2):2):2):2, Rodent:1);"
106 | )
107 | tree.assert_internal_branch_lengths(min=0, max=2)
108 | with pytest.raises(AssertionError):
109 | tree.assert_internal_branch_lengths(min=3)
110 | with pytest.raises(AssertionError):
111 | tree.assert_internal_branch_lengths(max=1)
112 |
113 |
114 | def test_assert_tree_total_branch_length():
115 | tree = Tree.read_str(
116 | "(Bovine:1,(Hylobates:1,(Pongo:1,(G._Gorilla:1, (P._paniscus:1,H._sapiens:1):1):1):1):1, Rodent:1);"
117 | )
118 | tree.assert_total_branch_length(length=11, min=10, max=12)
119 | with pytest.raises(AssertionError):
120 | tree.assert_total_branch_length(length=1)
121 | with pytest.raises(AssertionError):
122 | tree.assert_total_branch_length(min=12)
123 | with pytest.raises(AssertionError):
124 | tree.assert_total_branch_length(max=10)
125 |
126 |
127 | def test_assert_tip_regex():
128 | tree = Tree.read_str("(A_1993.3, (B_1998-07-02,C_1992-12-31));")
129 | patterns = default_date_patterns()
130 |
131 | # Since the tree uses both conventions, just asserting a single pattern should fail
132 | for pattern in patterns:
133 | with pytest.raises(AssertionError):
134 | tree.assert_tip_regex(pattern)
135 |
136 | # Giving both patterns should pass
137 | tree.assert_tip_regex(patterns)
138 |
139 |
140 | def test_assert_tip_names():
141 | tree = Tree.read_str(
142 | "(Bovine:1,(Hylobates:1,(Pongo:1,(G._Gorilla:1, (P._paniscus:1,H._sapiens:1):1):1):1):1, Rodent:1);"
143 | )
144 | tree.assert_tip_names(names=['Bovine', 'Hylobates', 'Pongo', 'G._Gorilla', 'P._paniscus', 'H._sapiens', 'Rodent'])
145 | with pytest.raises(AssertionError):
146 | tree.assert_tip_names(
147 | names=['Bovine', 'Bovine', 'Hylobates', 'Pongo', 'G._Gorilla', 'P._paniscus', 'H._sapiens', 'Rodent']
148 | )
149 | with pytest.raises(AssertionError):
150 | tree.assert_tip_names(
151 | names=['Different', 'Hylobates', 'Pongo', 'G._Gorilla', 'P._paniscus', 'H._sapiens', 'Rodent']
152 | )
153 |
154 |
155 | def test_parse_tip_dates():
156 | tree = Tree.read_str("(A_1993.3, (B_1998-07-02,C_1992-10-01));")
157 | dates = tree.parse_tip_dates()
158 | assert dates == {
159 | 'A_1993.3': datetime(1993, 4, 20, 0, 0),
160 | 'B_1998-07-02': datetime(1998, 7, 2, 0, 0),
161 | 'C_1992-10-01': datetime(1992, 10, 1, 0, 0),
162 | }
163 | dates = tree.parse_tip_dates(decimal_year=True)
164 | assert dates == {
165 | 'A_1993.3': 1993.3,
166 | 'B_1998-07-02': 1998.5,
167 | 'C_1992-10-01': 1992.75,
168 | }
169 | # Setting pattern explicitly
170 | dates = tree.parse_tip_dates(patterns=r"\d{4}\.?\d*$", decimal_year=True)
171 | assert dates == {
172 | 'A_1993.3': 1993.3,
173 | }
174 |
175 |
176 | def test_plot_root_to_tip():
177 | tree = Tree.read("examples/data/ice_viruses.fasta.treefile", tree_format="newick")
178 | with NamedTemporaryFile(suffix=".svg") as file:
179 | path = Path(file.name)
180 | tree.plot_root_to_tip(path, covariation=True, sequence_length=463)
181 | assert path.exists()
182 | assert path.stat().st_size > 30_000
183 | svg = path.read_text()
184 | assert "!DOCTYPE svg PUBLIC" in svg
185 |
186 |
187 | def test_assert_root_to_tip_min_r_squared():
188 | tree = Tree.read("examples/data/ice_viruses.fasta.treefile", tree_format="newick")
189 | tree.assert_root_to_tip(min_r_squared=0.35)
190 | with pytest.raises(PhytestAssertion):
191 | tree.assert_root_to_tip(min_r_squared=0.40)
192 |
193 |
194 | def test_assert_root_to_tip_rate():
195 | tree = Tree.read("examples/data/ice_viruses.fasta.treefile", tree_format="newick")
196 | tree.assert_root_to_tip(min_rate=1.5e-03, max_rate=1.6e-03)
197 | with pytest.raises(PhytestAssertion):
198 | tree.assert_root_to_tip(max_rate=1.5e-03)
199 | with pytest.raises(PhytestAssertion):
200 | tree.assert_root_to_tip(min_rate=1.6e-03)
201 |
202 |
203 | def test_assert_root_to_tip_root_date():
204 | tree = Tree.read("examples/data/ice_viruses.fasta.treefile", tree_format="newick")
205 |
206 | tree.assert_root_to_tip(min_root_date=1772.0, max_root_date=1773.0)
207 | with pytest.raises(PhytestAssertion):
208 | tree.assert_root_to_tip(max_root_date=1772.0)
209 | with pytest.raises(
210 | PhytestAssertion, match=r"Inferred root date '1772.\d*' is less than the minimum allowed root date '1773.0'."
211 | ):
212 | tree.assert_root_to_tip(min_root_date=1773.0)
213 |
214 | with pytest.warns(
215 | PhytestWarning, match=r"Inferred root date '1772.\d*' is less than the minimum allowed root date '1773.0'."
216 | ):
217 | tree.warn_root_to_tip(min_root_date=1773.0)
218 |
219 |
220 | def test_assert_root_to_tip_covariation():
221 | tree = Tree.read("examples/data/ice_viruses.fasta.treefile", tree_format="newick")
222 | tree.assert_root_to_tip(covariation=True, sequence_length=463, valid_confidence=True)
223 | tree.assert_root_to_tip(valid_confidence=False)
224 | with pytest.raises(PhytestAssertion, match=r"The `clock_model.valid_confidence` variable is not False."):
225 | tree.assert_root_to_tip(covariation=True, sequence_length=463, valid_confidence=False)
226 |
227 | with pytest.raises(
228 | PhytestAssertion,
229 | match=r"Cannot perform root-to-tip regression with `covariation` as True if no alignment of sequence length is provided.",
230 | ):
231 | tree.assert_root_to_tip(covariation=True, valid_confidence=True)
232 |
233 |
234 | def test_assert_root_to_tip_root_extra():
235 | tree = Tree.read("examples/data/ice_viruses.fasta.treefile", tree_format="newick")
236 |
237 | extra = []
238 | tree.assert_root_to_tip(min_root_date=1772.0, max_root_date=1773.0, extra=extra)
239 | assert extra[0]['format_type'] == 'html'
240 | assert extra[0]['content'].startswith('')
241 |
242 |
243 | def test_assert_root_to_tip_clock_filter():
244 | tree = Tree.read("examples/data/ice_viruses.fasta.treefile", tree_format="newick")
245 | with pytest.warns(PhytestWarning):
246 | tree.assert_root_to_tip(clock_filter=1.0)
247 |
248 | with warnings.catch_warnings():
249 | warnings.simplefilter("error")
250 | tree.assert_root_to_tip(clock_filter=3.0)
251 |
--------------------------------------------------------------------------------