├── .coveragerc ├── .editorconfig ├── .github └── workflows │ ├── docs.yml │ ├── publish.yml │ └── tests.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.rst ├── autodocs.sh ├── docs ├── Makefile ├── citation.rst ├── conf.py ├── development.rst ├── examples.rst ├── favicon.ico ├── images │ ├── logo.png │ ├── logo.svg │ └── report.png ├── index.rst ├── installation.rst ├── make.bat ├── quickstart.rst ├── reference.rst ├── testFiles.rst └── usage.rst ├── examples ├── data │ ├── example.csv │ ├── example.fasta │ ├── example.tree │ ├── example.tsv │ ├── example.xlsx │ ├── ice_viruses.fasta │ ├── ice_viruses.fasta.treefile │ ├── ice_viruses_cleaned.fasta │ ├── ice_viruses_cleaned.fasta.treefile │ └── invalid.fasta ├── example.py └── self_contained.py ├── mkdocs.sh ├── phytest ├── __init__.py ├── bio │ ├── __init__.py │ ├── alignment.py │ ├── data.py │ ├── sequence.py │ └── tree.py ├── cli.py ├── main.py ├── report │ └── logo.css └── utils.py ├── poetry.lock ├── pyproject.toml └── tests ├── __init__.py ├── input ├── alignment.py ├── basic.py └── testfile1.py ├── test_alignments.py ├── test_cli.py ├── test_data.py ├── test_main.py ├── test_self_contained.py ├── test_sequences.py └── test_trees.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source = phytest 3 | 4 | [report] 5 | precision = 2 6 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig: https://EditorConfig.org 2 | 3 | root = true 4 | 5 | [*] 6 | end_of_line = lf 7 | insert_final_newline = true 8 | charset = utf-8 9 | 10 | [{*.py,*.smk,Snakemake}] 11 | indent_style = space 12 | indent_size = 4 13 | 14 | [*.{yml,yaml}] 15 | indent_style = space 16 | indent_size = 2 17 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: docs 2 | 3 | on: 4 | push: 5 | branches: main 6 | 7 | jobs: 8 | build: 9 | 10 | runs-on: ubuntu-latest 11 | strategy: 12 | fail-fast: false 13 | matrix: 14 | python-version: ['3.9'] 15 | 16 | steps: 17 | - uses: actions/checkout@v3 18 | - name: Install poetry 19 | run: pipx install poetry 20 | - name: Install dependencies for Python ${{ matrix.python-version }} 21 | uses: actions/setup-python@v3 22 | with: 23 | python-version: ${{ matrix.python-version }} 24 | cache: 'poetry' 25 | - run: poetry install 26 | - name: Docs 27 | run: | 28 | poetry run sphinx-build -b html docs gh-pages 29 | - name: Coverage 30 | run: | 31 | poetry run coverage run -m pytest 32 | poetry run coverage html --directory gh-pages/coverage 33 | echo "COVERAGE=$(poetry run coverage report --precision 2 | grep TOTAL | tr -s ' ' | cut -f 4 -d " ")" >> $GITHUB_ENV 34 | - name: Create Badge 35 | uses: schneegans/dynamic-badges-action@v1.1.0 # instructions here: https://github.com/Schneegans/dynamic-badges-action 36 | with: 37 | auth: ${{ secrets.GIST_SECRET }} 38 | gistID: e8160655e03d9015b1e93b97ed611f4f 39 | filename: coverage-badge.json 40 | label: coverage 41 | message: ${{ env.COVERAGE }} 42 | color: green 43 | - name: Deploy 🚀 44 | uses: JamesIves/github-pages-deploy-action@4.1.5 45 | with: 46 | branch: gh-pages # The branch the action should deploy to. 47 | folder: gh-pages # The folder the action should deploy. 48 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: publish 2 | on: 3 | push: 4 | tags: 5 | - 'v*.*.*' 6 | jobs: 7 | build: 8 | runs-on: ubuntu-latest 9 | steps: 10 | #---------------------------------------------- 11 | # check-out repo and set-up python 12 | #---------------------------------------------- 13 | - name: Check out repository 14 | uses: actions/checkout@v2 15 | - name: Set up python ${{ matrix.python-version }} 16 | uses: actions/setup-python@v2 17 | with: 18 | python-version: ${{ matrix.python-version }} 19 | #---------------------------------------------- 20 | # ----- install & configure poetry ----- 21 | #---------------------------------------------- 22 | - name: Install Poetry 23 | uses: snok/install-poetry@v1 24 | with: 25 | virtualenvs-create: true 26 | virtualenvs-in-project: true 27 | #---------------------------------------------- 28 | # load cached venv if cache exists 29 | #---------------------------------------------- 30 | - name: Load cached venv 31 | id: cached-poetry-dependencies 32 | uses: actions/cache@v2 33 | with: 34 | path: .venv 35 | key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }} 36 | #---------------------------------------------- 37 | # install dependencies if cache does not exist 38 | #---------------------------------------------- 39 | - name: Install dependencies 40 | if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' 41 | run: poetry install --no-interaction --no-root 42 | #---------------------------------------------- 43 | # install your root project, if required 44 | #---------------------------------------------- 45 | - name: Install library 46 | run: poetry install --no-interaction 47 | - name: Build library 48 | run: poetry build 49 | - name: Publish library 50 | env: 51 | PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} 52 | run: | 53 | poetry config pypi-token.pypi $PYPI_TOKEN 54 | poetry publish 55 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | # Based on https://github.com/actions/starter-workflows/blob/main/ci/python-package.yml 2 | name: tests 3 | 4 | on: [push] 5 | jobs: 6 | build: 7 | 8 | runs-on: ubuntu-latest 9 | strategy: 10 | fail-fast: false 11 | matrix: 12 | python-version: ['3.8', '3.9', '3.10', '3.11'] 13 | 14 | steps: 15 | - uses: actions/checkout@v3 16 | - name: Install poetry 17 | run: pipx install poetry 18 | - name: Install dependencies for Python ${{ matrix.python-version }} 19 | uses: actions/setup-python@v3 20 | with: 21 | python-version: ${{ matrix.python-version }} 22 | cache: 'poetry' 23 | - name: Install dependencies for Python ${{ matrix.python-version }} 24 | run: | 25 | poetry env use "${{ matrix.python-version }}" 26 | poetry install 27 | - name: Tests 28 | run: | 29 | poetry env info 30 | poetry run pytest 31 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .envrc 3 | *.pyc 4 | *.html 5 | .coverage 6 | dist/ 7 | docs/_build/ 8 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.2.0 4 | hooks: 5 | - id: check-yaml 6 | - id: end-of-file-fixer 7 | - id: trailing-whitespace 8 | - repo: https://github.com/psf/black 9 | rev: 22.3.0 10 | hooks: 11 | - id: black 12 | - repo: https://github.com/PyCQA/isort.git 13 | rev: 5.12.0 14 | hooks: 15 | - id: isort 16 | # - repo: https://github.com/python-poetry/poetry 17 | # rev: '1.2.0b2' 18 | # hooks: 19 | # - id: poetry-check 20 | # - id: poetry-lock 21 | 22 | # ci: 23 | # skip: [poetry-lock] 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2022 Wytamma Wirth 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | .. image:: https://raw.githubusercontent.com/phytest-devs/phytest/main/docs/images/logo.png 2 | :alt: Phytest logo 3 | 4 | .. start-badges 5 | 6 | |pypi badge| |tests badge| |coverage badge| |docs badge| |black badge| |pre-commit badge| |doi badge| 7 | 8 | 9 | .. |pypi badge| image:: https://img.shields.io/pypi/v/phytest.svg 10 | :target: https://pypi.org/project/phytest/ 11 | 12 | .. |tests badge| image:: https://github.com/phytest-devs/phytest/workflows/tests/badge.svg 13 | :target: https://github.com/phytest-devs/phytest/actions 14 | 15 | .. |docs badge| image:: https://github.com/phytest-devs/phytest/workflows/docs/badge.svg 16 | :target: https://phytest-devs.github.io/phytest/ 17 | 18 | .. |black badge| image:: https://img.shields.io/badge/code%20style-black-000000.svg 19 | :target: https://github.com/psf/black 20 | 21 | .. |coverage badge| image:: https://img.shields.io/endpoint?url=https://gist.githubusercontent.com/smutch/e8160655e03d9015b1e93b97ed611f4f/raw/coverage-badge.json 22 | :target: https://phytest-devs.github.io/phytest/coverage/ 23 | 24 | .. |pre-commit badge| image:: https://results.pre-commit.ci/badge/github/phytest-devs/phytest/main.svg 25 | :target: https://results.pre-commit.ci/latest/github/phytest-devs/phytest/main 26 | 27 | .. |doi badge| image:: https://img.shields.io/badge/DOI-10.1093%2Fbioinformatics%2Fbtac664-success.svg 28 | :target: https://academic.oup.com/bioinformatics/article/38/22/5124/6751773 29 | 30 | .. end-badges 31 | 32 | 33 | 34 | Phytest: Quality Control for Phylogenetic Analyses. 35 | 36 | ---- 37 | 38 | Documentation: https://phytest-devs.github.io/phytest 39 | 40 | Code: https://github.com/phytest-devs/phytest 41 | 42 | Tutorials: https://github.com/phytest-devs?q=example 43 | 44 | ---- 45 | 46 | .. start-quickstart 47 | 48 | Installation 49 | ============ 50 | Install phytest using pip: 51 | 52 | .. code-block:: bash 53 | 54 | pip install phytest 55 | 56 | 57 | Quick Start 58 | ============ 59 | 60 | Phytest is a tool for automating quality control checks on sequence, tree and metadata files during phylogenetic analyses. 61 | Phytest ensures that phylogenetic analyses meet user-defined quality control tests. 62 | 63 | Here we will create example data files to run our tests on. 64 | 65 | Create an alignment fasta file :code:`example.fasta` 66 | 67 | .. code-block:: text 68 | 69 | >Sequence_A 70 | ATGAGATCCCCGATAGCGAGCTAGCGATCGCAGCGACTCAGCAGCTACAGCGCAGAGGAGAGAGAGGCCCCTATTTACTAGAGCTCCAGATATAGNNTAG 71 | >Sequence_B 72 | ATGAGATCCCCGATAGCGAGCTAGXGATCGCAGCGACTCAGCAGCTACAGCGCAGAGGAGAGAGAGGCCCCTATTTACTAGAGCTCCAGATATAGNNTAG 73 | >Sequence_C 74 | ATGAGA--CCCGATAGCGAGCTAGCGATCGCAGCGACTCAGCAGCTACAGCGCAGAGGAGAGAGAGGCCCCTATTTACTAGAGCTCCAGATATAGNNTAG 75 | >Sequence_D 76 | ATGAGATCCCCGATAGCGAGCTAGCGATNNNNNNNNNNNNNNNNNTACAGCGCAGAGGAGAGAGAGGCCCCTATTTACTAGAGCTCCAGATATAGNNTAG 77 | 78 | Create a tree newick file :code:`example.tree` 79 | 80 | .. code-block:: text 81 | 82 | (Sequence_A:1,Sequence_B:0.2,(Sequence_C:0.3,Sequence_D:0.4):0.5); 83 | 84 | Writing a test file 85 | ######################## 86 | 87 | We want to enforce the follow constraints on our data: 88 | 1. The alignment has 4 sequences 89 | 2. The sequences have a length of 100 90 | 3. The sequences only contains the characters A, T, G, C, N and - 91 | 4. The sequences are allowed to only contain single base deletions 92 | 5. The longest stretch of Ns is 10 93 | 6. The tree has 4 tips 94 | 7. The tree is bifurcating 95 | 8. The alignment and tree have the same names 96 | 9. All internal branches are longer than the given threshold 97 | 10. There are no outlier branches in the tree 98 | 99 | We can write these tests in a python files :code:`example.py` 100 | 101 | .. code-block:: python 102 | 103 | from phytest import Alignment, Sequence, Tree 104 | 105 | 106 | def test_alignment_has_4_sequences(alignment: Alignment): 107 | alignment.assert_length(4) 108 | 109 | 110 | def test_alignment_has_a_width_of_100(alignment: Alignment): 111 | alignment.assert_width(100) 112 | 113 | 114 | def test_sequences_only_contains_the_characters(sequence: Sequence): 115 | sequence.assert_valid_alphabet(alphabet="ATGCN-") 116 | 117 | 118 | def test_single_base_deletions(sequence: Sequence): 119 | sequence.assert_longest_stretch_gaps(max=1) 120 | 121 | 122 | def test_longest_stretch_of_Ns_is_10(sequence: Sequence): 123 | sequence.assert_longest_stretch_Ns(max=10) 124 | 125 | 126 | def test_tree_has_4_tips(tree: Tree): 127 | tree.assert_number_of_tips(4) 128 | 129 | 130 | def test_tree_is_bifurcating(tree: Tree): 131 | tree.assert_is_bifurcating() 132 | 133 | 134 | def test_aln_tree_match_names(alignment: Alignment, tree: Tree): 135 | aln_names = [i.name for i in alignment] 136 | tree.assert_tip_names(aln_names) 137 | 138 | 139 | def test_all_internal_branches_lengths_above_threshold(tree: Tree, threshold=1e-4): 140 | tree.assert_internal_branch_lengths(min=threshold) 141 | 142 | 143 | def test_outlier_branches(tree: Tree): 144 | # Here we create a custom function to detect outliers 145 | import statistics 146 | 147 | tips = tree.get_terminals() 148 | branch_lengths = [t.branch_length for t in tips] 149 | cut_off = statistics.mean(branch_lengths) + statistics.stdev(branch_lengths) 150 | for tip in tips: 151 | assert tip.branch_length < cut_off, f"Outlier tip '{tip.name}' (branch length = {tip.branch_length})!" 152 | 153 | Running Phytest 154 | ################ 155 | 156 | We can then run these tests on our data with :code:`phytest`: 157 | 158 | .. code-block:: bash 159 | 160 | phytest examples/example.py -s examples/data/example.fasta -t examples/data/example.tree 161 | 162 | Generate a report by adding :code:`--report report.html`. 163 | 164 | .. image:: https://raw.githubusercontent.com/phytest-devs/phytest/main/docs/images/report.png 165 | :alt: HTML Report 166 | 167 | From the output we can see several tests failed: 168 | 169 | .. code-block:: 170 | 171 | FAILED examples/example.py::test_sequences_only_contains_the_characters[Sequence_B] - AssertionError: Invalid pattern found in 'Sequence_B'! 172 | FAILED examples/example.py::test_single_base_deletions[Sequence_C] - AssertionError: Longest stretch of '-' in 'Sequence_C' > 1! 173 | FAILED examples/example.py::test_longest_stretch_of_Ns_is_10[Sequence_D] - AssertionError: Longest stretch of 'N' in 'Sequence_D' > 10! 174 | FAILED examples/example.py::test_outlier_branches - AssertionError: Outlier tip 'Sequence_A' (branch length = 1.0)! 175 | 176 | Results (0.07s): 177 | 15 passed 178 | 4 failed 179 | - examples/example.py:12 test_sequences_only_contains_the_characters[Sequence_B] 180 | - examples/example.py:16 test_single_base_deletions[Sequence_C] 181 | - examples/example.py:20 test_longest_stretch_of_Ns_is_10[Sequence_D] 182 | - examples/example.py:32 test_outlier_branches 183 | 184 | 185 | 186 | .. end-quickstart 187 | 188 | See docs for more information https://phytest-devs.github.io/phytest. 189 | 190 | Citation 191 | ============ 192 | 193 | .. start-citation 194 | 195 | If you use phytest, please cite the following paper: 196 | 197 | Wytamma Wirth, Simon Mutch, Robert Turnbull, Sebastian Duchene, Phytest: quality control for phylogenetic analyses, Bioinformatics, Volume 38, Issue 22, 15 November 2022, Pages 5124–5125, https://doi.org/10.1093/bioinformatics/btac664 198 | 199 | 200 | .. code-block:: bibtex 201 | 202 | @article{10.1093/bioinformatics/btac664, 203 | author = {Wirth, Wytamma and Mutch, Simon and Turnbull, Robert and Duchene, Sebastian}, 204 | title = "{{Phytest: quality control for phylogenetic analyses}}", 205 | journal = {Bioinformatics}, 206 | volume = {38}, 207 | number = {22}, 208 | pages = {5124-5125}, 209 | year = {2022}, 210 | month = {10}, 211 | issn = {1367-4803}, 212 | doi = {10.1093/bioinformatics/btac664}, 213 | url = {https://doi.org/10.1093/bioinformatics/btac664}, 214 | eprint = {https://academic.oup.com/bioinformatics/article-pdf/38/22/5124/47153886/btac664.pdf}, 215 | } 216 | 217 | 218 | .. end-citation 219 | -------------------------------------------------------------------------------- /autodocs.sh: -------------------------------------------------------------------------------- 1 | poetry run sphinx-autobuild docs docs/_build/html --open-browser 2 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/citation.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Citation 3 | ============ 4 | 5 | .. include:: ../README.rst 6 | :start-after: start-citation 7 | :end-before: end-citation 8 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | # import os 14 | # import sys 15 | # sys.path.insert(0, os.path.abspath('.')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = 'phytest' 21 | copyright = '2022, Wytamma Wirth, Simon Mutch, Robert Turnbull, Sebastian Duchene' 22 | author = 'Wytamma Wirth, Simon Mutch, Robert Turnbull, Sebastian Duchene' 23 | 24 | html_favicon = 'favicon.ico' 25 | 26 | # The full version, including alpha/beta/rc tags 27 | release = '0.1.0' 28 | 29 | 30 | # -- General configuration --------------------------------------------------- 31 | 32 | # Add any Sphinx extension module names here, as strings. They can be 33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 34 | # ones. 35 | extensions = [ 36 | "sphinx_rtd_theme", 37 | "nbsphinx", 38 | "sphinx.ext.mathjax", 39 | "sphinx.ext.githubpages", 40 | "myst_parser", 41 | "sphinx.ext.autodoc", 42 | "sphinx.ext.coverage", 43 | "sphinx.ext.napoleon", 44 | "sphinx_copybutton", 45 | "sphinx.ext.autosummary", 46 | ] 47 | 48 | github_username = 'phytest-devs' 49 | github_repository = 'phytest' 50 | 51 | html_context = { 52 | 'display_github': True, 53 | 'github_user': 'phytest-devs', 54 | 'github_repo': 'phytest', 55 | 'github_version': 'main/docs/', 56 | } 57 | 58 | # Add any paths that contain templates here, relative to this directory. 59 | templates_path = ['_templates'] 60 | 61 | # List of patterns, relative to source directory, that match files and 62 | # directories to ignore when looking for source files. 63 | # This pattern also affects html_static_path and html_extra_path. 64 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 65 | 66 | 67 | # -- Options for HTML output ------------------------------------------------- 68 | 69 | # The theme to use for HTML and HTML Help pages. See the documentation for 70 | # a list of builtin themes. 71 | # 72 | html_theme = 'sphinx_rtd_theme' 73 | 74 | # Add any paths that contain custom static files (such as style sheets) here, 75 | # relative to this directory. They are copied after the builtin static files, 76 | # so a file named "default.css" will overwrite the builtin "default.css". 77 | html_static_path = ['_static'] 78 | -------------------------------------------------------------------------------- /docs/development.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Development 3 | ============ 4 | 5 | Install poetry (https://python-poetry.org/) 6 | 7 | Clone the repository: 8 | 9 | .. code-block:: bash 10 | 11 | git clone https://github.com/phytest-devs/phytest.git && cd phytest 12 | 13 | .. code-block:: bash 14 | 15 | poetry install 16 | poetry shell 17 | 18 | Test the code with pytest: 19 | 20 | .. code-block:: bash 21 | 22 | pytest 23 | -------------------------------------------------------------------------------- /docs/examples.rst: -------------------------------------------------------------------------------- 1 | ============== 2 | Examples 3 | ============== 4 | 5 | The Phytest organisation of GitHub contains serval example repositories (https://github.com/phytest-devs) that show how Phytest can integrate into standard phylogenetic analyses and scenarios. 6 | 7 | Nextstrain Example 8 | ------------------ 9 | 10 | The Nextstrain pipeline is widely used for pathogen phylogenetic analysis. 11 | In this example we modify the Nextstrain zika-tutorial (https://github.com/nextstrain/zika-tutorial) 12 | to include testing with Phytest. This modified pipeline is available as an example repository in the phytest-devs GitHub organisation 13 | https://github.com/phytest-devs/phytest-nextstrain-example and provides an example of using Phytest for quality control in a Snakemake 14 | pipeline. 15 | 16 | Phytest is included in the pipeline to ensure the alignment and maximum likelihood tree meet explicit quality 17 | requirements before proceeding though the pipeline. Only if all the tests pass will the pipeline continue, thus savings computational resources. 18 | The resulting HTML report provides details of any failed tests so that the offending data can be removed. 19 | While Augur (the Nextstrain toolkit) has some ability to refine/filter tree and alignment files, 20 | Phytest adds highly a customizable testing framework to the pipeline that ensures the quality of the analysis. 21 | 22 | Temporal Signal Example 23 | ----------------------- 24 | 25 | A repository containing the code for this example can be found at https://github.com/phytest-devs/phytest-temporal-signal-example. 26 | 27 | Temporal signal in an important prerequisite to many Bayesian phylogenetic analyses. In this example we use Phytest to ensure the 28 | data-set meets the minimum temporal signal requirements for Bayesian analyses. Temporal signal analysis can help to detect 29 | problematic sequences and potential issues before heading on to a Bayesian phylogenetic analysis e.g. with BEAST. 30 | Here, we use data from from the TempEst tutorial https://beast.community/tempest\_tutorial. 31 | 32 | TempEst is a useful program for performing temporal signal analysis, however, it is not possible to easily automate the TempEst graphical user interface. 33 | Internally, Phytest uses TimeTree to perform a root-to-tip regression, allowing users to automate temporal signal testing. 34 | The :code:`Tree.assert_root_to_tip` method is used for testing temporal signal and provides arguments for testing the 35 | coefficient of determination, estimated rate and root date. The Phytest Tree class also implements methods for exploring and plotting results. 36 | 37 | Continuous Testing Example 38 | -------------------------- 39 | 40 | In this example Phytest is used to test data shared on GitHub every time the data is updated (https://github.com/phytest-devs/phytest-continuous-testing-example). 41 | 42 | Tests are run against the phylogenetic data using the Continuous Integration features that are freely available 43 | through GitHub (other services are also available). Using Phytest through GitHub Actions (https://github.com/features/actions) 44 | ensures that anytime the data changes (common during development) they still meet the requirements defined in the tests. 45 | -------------------------------------------------------------------------------- /docs/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phytest-devs/phytest/b199e2fcc2bbd2a2e82ce71a73c472d28ea474ec/docs/favicon.ico -------------------------------------------------------------------------------- /docs/images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phytest-devs/phytest/b199e2fcc2bbd2a2e82ce71a73c472d28ea474ec/docs/images/logo.png -------------------------------------------------------------------------------- /docs/images/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | logo 7 | 8 | tree 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | Phytest 17 | 18 | 19 | -------------------------------------------------------------------------------- /docs/images/report.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phytest-devs/phytest/b199e2fcc2bbd2a2e82ce71a73c472d28ea474ec/docs/images/report.png -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. phytest documentation master file, created by 2 | sphinx-quickstart on Wed Apr 13 15:22:21 2022. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | .. image:: images/logo.png 7 | :alt: Phytest logo 8 | 9 | .. include:: ../README.rst 10 | :start-after: start-badges 11 | :end-before: end-badges 12 | 13 | Phytest 14 | ================== 15 | 16 | Phytest: Quality Control for Phylogenetic Analyses. 17 | 18 | Phytest is a tool for automating quality control checks on sequence, tree and metadata files during phylogenetic analyses. Phytest ensures that phylogenetic analyses meet user-defined quality control tests. 19 | 20 | ---- 21 | 22 | Documentation: https://phytest-devs.github.io/phytest 23 | 24 | Code: https://github.com/phytest-devs/phytest 25 | 26 | ---- 27 | 28 | .. toctree:: 29 | :maxdepth: 2 30 | :caption: Contents 31 | 32 | quickstart 33 | installation 34 | testFiles 35 | usage 36 | examples 37 | reference 38 | development 39 | citation 40 | 41 | Indices and tables 42 | ================== 43 | 44 | * :ref:`genindex` 45 | * :ref:`modindex` 46 | * :ref:`search` 47 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Installation 3 | ============ 4 | 5 | Install Phytest using pip: 6 | 7 | .. code-block:: bash 8 | 9 | pip install phytest 10 | 11 | 12 | .. NOTE:: 13 | Requires Python >=3.8 & <3.11 14 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/quickstart.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Quickstart 3 | ============ 4 | 5 | 6 | .. include:: ../README.rst 7 | :start-after: start-quickstart 8 | :end-before: end-quickstart 9 | 10 | .. |report image| image:: images/report.png 11 | :alt: HTML Report 12 | -------------------------------------------------------------------------------- /docs/reference.rst: -------------------------------------------------------------------------------- 1 | ======================= 2 | API Reference 3 | ======================= 4 | 5 | 6 | Sequence 7 | ====================== 8 | 9 | .. autoclass:: phytest.bio.sequence.Sequence 10 | :members: 11 | 12 | 13 | Alignment 14 | ====================== 15 | 16 | .. autoclass:: phytest.bio.alignment.Alignment 17 | :members: 18 | 19 | Tree 20 | ====================== 21 | 22 | .. autoclass:: phytest.bio.tree.Tree 23 | :members: 24 | 25 | Data 26 | ====================== 27 | 28 | .. autoclass:: phytest.bio.data.Data 29 | :members: 30 | -------------------------------------------------------------------------------- /docs/testFiles.rst: -------------------------------------------------------------------------------- 1 | ============== 2 | Writing Tests 3 | ============== 4 | 5 | Phytest is easily extendable and provides a simple interface for writing custom phylogenetic tests. 6 | The interface follows the Pytest model of testing i.e. tests are defined as Python functions (or class methods) 7 | containing assert statements that are collected and evaluated at run-time. Tests that fail are captured and reported 8 | to the user allowing for repeatable and automated testing. 9 | Phytest provides many convenient helper functions for testing phylogenetic analyses including methods for testing sequences, 10 | alignments, trees and metadata files. 11 | 12 | Phytest fixtures 13 | ================= 14 | 15 | Phytest injects special fixture objects into test functions, allowing for easy evaluation and 16 | testing of phylogenetic data structures. These fixtures provide the standard Biopython (sequences and trees) and Pandas (metadata) 17 | class methods as well as special assert methods for testing these data structures. 18 | 19 | Only functions that require the fixtures will have the Pytest objects passed to them. For example consider the following tests. 20 | 21 | .. code-block:: python 22 | 23 | from phytest import Sequence 24 | 25 | def test_example(sequence: Sequence): 26 | ... 27 | 28 | Test functions must start with the keyword :code:`test_` this allows Pytest to identify and collect the tests. 29 | Fixtures are required using one of the special arguments i.e. the lower case of the class name. 30 | 31 | Here the :code:`sequence` argument is used to require the sequences passed from the command line 32 | (see below for information on how to pass files to Phytest). Phytest will identify which test functions 33 | require which fixtures and pass the Phytest objects to them for testing. 34 | 35 | Using Phytest classes for type hints is not required, however, makes for a better development experience. 36 | For example the following is a valid Phytest test and will be passed a Sequence object. 37 | 38 | .. code-block:: python 39 | 40 | def test_example(sequence): 41 | ... 42 | 43 | Fixtures can be combined to make more complex tests across multiple data types e.g. 44 | 45 | .. code-block:: python 46 | 47 | from phytest import Sequence, Tree 48 | 49 | def test_example(sequence: Sequence, tree: Tree): 50 | # test tree and sequence objects together 51 | ... 52 | 53 | Sequence 54 | --------- 55 | 56 | The Phytest Sequence class is a sub-class of the Biopython SeqRecord class. This class uses the fixture :code:`sequence`. 57 | 58 | .. code-block:: python 59 | 60 | from phytest import Sequence 61 | 62 | def test_example(sequence: Sequence): 63 | ... 64 | 65 | Any tests requiring the class will be run for every sequence in the file. For example if the fasta file below is passed to Phytest 66 | the :code:`test_example` function above would be run 4 times (Sequence_A-Sequence_D). 67 | 68 | .. code-block:: text 69 | 70 | >Sequence_A 71 | ATGAGATCCCCGATAGCGAGCTAGCGATCGCAGCGACTCAGCAGCTACAGCGCAGAGGAGAGAGAGGCCCCTATTTACTAGAGCTCCAGATATAGNNTAG 72 | >Sequence_B 73 | ATGAGATCCCCGATAGCGAGCTAGXGATCGCAGCGACTCAGCAGCTACAGCGCAGAGGAGAGAGAGGCCCCTATTTACTAGAGCTCCAGATATAGNNTAG 74 | >Sequence_C 75 | ATGAGA--CCCGATAGCGAGCTAGCGATCGCAGCGACTCAGCAGCTACAGCGCAGAGGAGAGAGAGGCCCCTATTTACTAGAGCTCCAGATATAGNNTAG 76 | >Sequence_D 77 | ATGAGATCCCCGATAGCGAGCTAGCGATNNNNNNNNNNNNNNNNNTACAGCGCAGAGGAGAGAGAGGCCCCTATTTACTAGAGCTCCAGATATAGNNTAG 78 | 79 | .. code-block:: bash 80 | 81 | $ phytest test.py --sequence sequences.fasta 82 | 83 | Test session starts (platform: darwin, Python 3.9.12, pytest 7.1.1, pytest-sugar 0.9.4) 84 | rootdir: /Users/wytamma/programming/phytest, configfile: pyproject.toml 85 | plugins: sugar-0.9.4, html-3.1.1, cov-3.0.0 86 | collecting ... 87 | test.py ✓✓✓✓ 100% ██████████ 88 | 89 | Results (0.03s): 90 | 4 passed 91 | 92 | 93 | Alternative file formats can be specified using the :code:`--sequence-format` flag. 94 | 95 | Alignment 96 | --------- 97 | 98 | The Phytest Alignment class is a sub-class of the Biopython MultipleSeqAlignment class. This class uses the fixture :code:`alignment`. 99 | 100 | .. code-block:: python 101 | 102 | from phytest import Alignment 103 | 104 | def test_example(alignment: Alignment): 105 | ... 106 | 107 | Tests using the alignment file will be run once i.e. you will have access to the entire alignment during the test. 108 | Alignments are also passed to Phytest using the :code:`--sequence` flag however they are required to be valid 109 | alignments e.g. all sequence must be the same length. 110 | 111 | .. code-block:: bash 112 | 113 | phytest test.py --sequence sequences.fasta 114 | 115 | Test session starts (platform: darwin, Python 3.9.12, pytest 7.1.1, pytest-sugar 0.9.4) 116 | rootdir: /Users/wytamma/programming/phytest, configfile: pyproject.toml 117 | plugins: sugar-0.9.4, html-3.1.1, cov-3.0.0 118 | collecting ... 119 | test.py ✓ 100% ██████████ 120 | 121 | Results (0.02s): 122 | 1 passed 123 | 124 | 125 | Alternative file formats can be specified using the :code:`--sequence-format` flag. 126 | 127 | Tree 128 | ----- 129 | 130 | The Phytest Tree class is a sub-class of the Biopython Tree class. This class uses the fixture :code:`tree`. 131 | 132 | .. code-block:: python 133 | 134 | from phytest import Tree 135 | 136 | def test_example(tree: Tree): 137 | ... 138 | 139 | Tests using the tree fixture will be run once per tree in the file. Tree files are passed to Phytest using the :code:`--tree` flag. 140 | 141 | .. code-block:: text 142 | 143 | (Sequence_A:1,Sequence_B:0.2,(Sequence_C:0.3,Sequence_D:0.4):0.5); 144 | (Sequence_A:1,Sequence_B:0.3,(Sequence_C:0.3,Sequence_D:0.4):0.5); 145 | 146 | 147 | .. code-block:: bash 148 | 149 | phytest test.py --tree tree.newick 150 | 151 | Test session starts (platform: darwin, Python 3.9.12, pytest 7.1.1, pytest-sugar 0.9.4) 152 | rootdir: /Users/wytamma/programming/phytest, configfile: pyproject.toml 153 | plugins: sugar-0.9.4, html-3.1.1, cov-3.0.0 154 | collecting ... 155 | test.py ✓✓ 100% ██████████ 156 | 157 | Results (0.02s): 158 | 2 passed 159 | 160 | Alternative file formats can be specified using the :code:`--tree-format` flag. 161 | 162 | Data 163 | ----- 164 | 165 | The Phytest Data class is a sub-class of the Pandas DataFrame class. This class uses the fixture :code:`data`. 166 | 167 | .. code-block:: python 168 | 169 | from phytest import Data 170 | 171 | def test_example(data: Data): 172 | ... 173 | 174 | Tests using the data file will be run once. Data files are passed to Phytest using the :code:`--data` flag. 175 | 176 | .. code-block:: bash 177 | 178 | phytest test.py --data metadata.csv 179 | 180 | Test session starts (platform: darwin, Python 3.9.12, pytest 7.1.1, pytest-sugar 0.9.4) 181 | rootdir: /Users/wytamma/programming/phytest, configfile: pyproject.toml 182 | plugins: sugar-0.9.4, html-3.1.1, cov-3.0.0 183 | collecting ... 184 | test.py ✓ 100% ██████████ 185 | 186 | Results (0.02s): 187 | 1 passed 188 | 189 | 190 | Alternative file formats can be specified using the :code:`--data-format` flag. 191 | 192 | 193 | Built-in asserts 194 | ================= 195 | 196 | Phytest provides many convenient helper functions for testing phylogenetic analyses including methods for testing sequences, 197 | alignments, trees and metadata files. 198 | 199 | .. code-block:: python 200 | 201 | from phytest import Sequence 202 | 203 | def test_GC_content(sequence: Sequence): 204 | sequence.assert_percent_GC(38) 205 | 206 | For example, the Phytest Sequence class implements the method :code:`Sequence.assert_percent_GC`. 207 | Calling this method with the expected GC-content e.g. :code:`sequence.assert_percent_GC(38)` will 208 | raise an error if the percent of G and C nucleotides in the sequence is not equal to 38%. 209 | Many methods also provide maximum and minimum arguments so the upper and lower bounds can be tested 210 | e.g. :code:`sequence.assert_percent_GC(min=30, max=40)`. 211 | 212 | .. code-block:: python 213 | 214 | from phytest import Sequence 215 | 216 | def test_GC_content(sequence: Sequence): 217 | sequence.assert_percent_GC(min=30, max=40) 218 | 219 | All Phytest assert methods also provide a warning flag e.g. :code:`sequence.assert_percent_GC(38, warn=True)` 220 | causing the method to raise a warning instead of an error if the test fails. In an automated pipeline, 221 | this provides a way to inform the user of potential problems without causing the pipeline to fail. 222 | The warning flag can be set automatically by calling the method with the :code:`warn_` prefix instead 223 | of :code:`assert_` e.g. :code:`sequence.warn_percent_GC(38)`. 224 | 225 | .. code-block:: python 226 | 227 | from phytest import Sequence 228 | 229 | def test_GC_content(sequence: Sequence): 230 | sequence.warn_percent_GC(38) 231 | 232 | See the documentation for a full list of built-in assert methods (https://phytest-devs.github.io/phytest/reference.html). 233 | 234 | 235 | Custom asserts 236 | ================= 237 | 238 | As Phytest is running Pytest under the hood it is trivial to write your own custom asserts using the Phytest fixtures. 239 | 240 | .. code-block:: python 241 | 242 | def test_outlier_branches(tree: Tree): 243 | # Here we create a custom function to detect outliers 244 | import statistics 245 | 246 | tips = tree.get_terminals() 247 | branch_lengths = [t.branch_length for t in tips] 248 | cut_off = statistics.mean(branch_lengths) + statistics.stdev(branch_lengths) 249 | for tip in tips: 250 | assert tip.branch_length < cut_off, f"Outlier tip '{tip.name}' (branch length = {tip.branch_length})!" 251 | -------------------------------------------------------------------------------- /docs/usage.rst: -------------------------------------------------------------------------------- 1 | ============== 2 | Running Tests 3 | ============== 4 | 5 | Phytest has been developed as a command-line interface, Python module, and Pytest plugin, providing multiple methods of invocation. 6 | 7 | Phytest CLI 8 | =========== 9 | 10 | Phytest provides a command line interface (CLI) for running testing on specific data files. 11 | 12 | .. code-block:: python 13 | 14 | from phytest import Sequence 15 | 16 | def test_gc_content(sequence: Sequence): 17 | sequence.assert_percent_GC( 18 | min=30, 19 | max=40 20 | ) 21 | 22 | The Phytest CLI requires a path to the file containing user defined tests and has optional flags for specifying sequence/alignment, tree and metadata files: 23 | 24 | .. code-block:: bash 25 | 26 | phytest test.py --sequence sequences.fasta --tree tree.newick --data metadata.csv 27 | 28 | Alternative file formats can be specified with :code:`--sequence-format`, :code:`--tree-format`, :code:`--data-format` flags. 29 | Supported formats include those supported by Biopython (sequences and trees) and TSV and Excel (metadata). 30 | 31 | Phytest Module 32 | ================ 33 | 34 | The Phytest module can be imported into script so that tests are self-contained i.e. data files are specified in the tests. 35 | 36 | .. code-block:: python 37 | 38 | import phytest 39 | 40 | def test_gc_content(sequence: phytest.Sequence): 41 | sequence.assert_percent_GC( 42 | min=30, 43 | max=40 44 | ) 45 | 46 | if __name__ == "__main__": 47 | sys.exit(phytest.main(sequence='examples/data/ice_viruses.fasta')) 48 | 49 | This test style uses :code:`if __name__ == "__main__"` python convention to only run the tests when invoked from the command line using the python command. 50 | 51 | .. code-block:: bash 52 | 53 | python test.py 54 | 55 | The :code:`phytest.main` function will run the tests and return a exit status (0 ir 1) that is passed to :code:`sys.exit` to ensure the test exit correctly. 56 | 57 | 58 | Pytest Plugin 59 | ================ 60 | 61 | Phytest can also be used as a Pytest plugin. Simply install Phytest and then run Pytest on the test file with the appropriate flags. 62 | 63 | .. code-block:: bash 64 | 65 | pytest test.py --sequence sequences.fasta 66 | 67 | .. NOTE:: 68 | Short hand flags must be capitalised when running Phytest through Pytest e.g. :code:`pytest test.py -S sequences.fasta` 69 | -------------------------------------------------------------------------------- /examples/data/example.csv: -------------------------------------------------------------------------------- 1 | name,date 2 | Sequence_A,2020-05-04 3 | Sequence_B,2020-05-04 4 | Sequence_C,2020-05-04 5 | Sequence_D,2020-05-04 6 | -------------------------------------------------------------------------------- /examples/data/example.fasta: -------------------------------------------------------------------------------- 1 | >Sequence_A 2 | ATGAGATCCCCGATAGCGAGCTAGCGATCGCAGCGACTCAGCAGCTACAGCGCAGAGGAGAGAGAGGCCCCTATTTACTAGAGCTCCAGATATAGNNTAG 3 | >Sequence_B 4 | ATGAGATCCCCGATAGCGAGCTAGXGATCGCAGCGACTCAGCAGCTACAGCGCAGAGGAGAGAGAGGCCCCTATTTACTAGAGCTCCAGATATAGNNTAG 5 | >Sequence_C 6 | ATGAGA--CCCGATAGCGAGCTAGCGATCGCAGCGACTCAGCAGCTACAGCGCAGAGGAGAGAGAGGCCCCTATTTACTAGAGCTCCAGATATAGNNTAG 7 | >Sequence_D 8 | ATGAGATCCCCGATAGCGAGCTAGCGATNNNNNNNNNNNNNNNNNTACAGCGCAGAGGAGAGAGAGGCCCCTATTTACTAGAGCTCCAGATATAGNNTAG 9 | -------------------------------------------------------------------------------- /examples/data/example.tree: -------------------------------------------------------------------------------- 1 | (Sequence_A:1,Sequence_B:0.2,(Sequence_C:0.3,Sequence_D:0.4):0.5); 2 | -------------------------------------------------------------------------------- /examples/data/example.tsv: -------------------------------------------------------------------------------- 1 | name date 2 | Sequence_A 4/5/2020 3 | Sequence_B 4/5/2020 4 | Sequence_C 4/5/2020 5 | Sequence_D 4/5/2020 6 | -------------------------------------------------------------------------------- /examples/data/example.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phytest-devs/phytest/b199e2fcc2bbd2a2e82ce71a73c472d28ea474ec/examples/data/example.xlsx -------------------------------------------------------------------------------- /examples/data/ice_viruses.fasta: -------------------------------------------------------------------------------- 1 | >A.Fiji.15899.83.AJ289702_1983 2 | GTCAACCTACTTGAGGACAATCACAACGGGAAACTATGCAAACTAAAAGGAATAGCGCCACTACAATTGGGGAAATGCAACATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGTTTTCTAAGAAATCATGGTCCTACATTGCAGAAACACCAAACTCCGAGAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAAGAACTGAGGGAACAATTGAGCTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGTTCATGGCCCAACCACAACGTAACCAAAGGAGTAACGGCATCATGTTCCCATAAGGGGAAAAGCAGTTTTTACAGAAACTTGCTATGGCTGACGGAGAAAAATGGCTCGTACCCAAATCTGAGCAAGTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT 3 | >A.Kiev.59.1979.M38353_1979 4 | GTCAACCTACTTGAGGACAGTCACAACGGAAAACTATGCAGACTGAAAGGAATAGCTCCACTACAATTGGGGAAATGCAGCATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGTTTTCTAAGAAATCATGGTCCTACATTGCAGAAACACCAAACTCCGAGAATGGAACATGTTACCCAGGATATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGATCATGGCCCAAACACAACGTAACCAGAGGAGTAACGGCATCATGCTCCCATAAGGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAAAATGGCTCGTACCCAAATCTGAGCAAGTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT 5 | >A.FortMonmouth.1.1947.U02085_1947 6 | GTAAACCTACTCGAAGACAGCCACAACGGGAAATTATGCAGATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGCTTTCTAAGAGATCATGGTCCTACATTGCAGAAACACCAAACTCTGAGAATGGAGCATGTTACCCAGGAGATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTGTCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGATCATGGCCCAAACACAACATAACCAGAGGAGTAACGGCAGCATGCTCCCATGCGGGGAAAAGCAGTTTTTACAAAAATTTGCTCTGGCTGACGGAGACAGATGGCTCATACCCAAAGCTGAGCAAGTCCTATGTGAACAATAAAGAGAAAGAAGTCCTTGTGCTATGGGGTGTTCATCACCCGTCT 7 | >A.BrevigMission.1.18.AF116575_1918 8 | GTTAACCTGCTCGAAGACAGCCACAACGGAAAACTATGTAAATTAAAAGGAATAGCCCCATTACAATTGGGGAAATGTAATATCGCCGGATGGCTCTTGGGAAACCCGGAATGCGATTTACTGCTCACAGCGAGCTCATGGTCCTATATTGTAGAAACATCGAACTCAGAGAATGGAACATGTTACCCAGGAGATTTCATCGACTATGAAGAACTGAGGGAGCAATTGAGCTCAGTGTCATCGTTCGAAAAATTCGAAATATTTCCCAAGACAAGCTCGTGGCCCAATCATGAAACAACCAAAGGTGTAACGGCAGCATGCTCCTATGCGGGAGCAAGCAGTTTTTACAGAAATTTGCTGTGGCTGACAAAGAAGGGAAGCTCATACCCAAAGCTTAGCAAGTCCTATGTGAACAATAAAGGGAAAGAAGTCCTTGTACTATGGGGTGTTCATCATCCGCCT 9 | >A.SouthCarolina.6.1988.L19025_1988 10 | GTCAACCTACTTGAGGACAGTCACAACGGAAAACTATGTCGACTAAAAGGAATAGCCCCACTACAATTGGGTAATTGCAGCATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGTTTTCTAAGAAATCATGGTCCTACATTGCAGAAACACCAAACTCCGAGAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAGGAACTGAGGGAGCAGTTGAGTTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAAGAAAGCTCATGGCCCAACCACACCGTAACCAAAGGAGTAACGGCATCATGCTCCCATAAGGGGAGAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGAATGGCTTGTACCCAAATCTGAGCAAGTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGCGTTCATCACCCGTCT 11 | >A.Yamagata.32.1989.D31949_1989 12 | GTCAACCTACTTGAGGACAGTCACAACGGAAAACTATGTCGACTAAAAGGAATAGCCCCACTACAATTGGGTAATTGCAGCATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGTTTTCTAAGGAATCATGGTCCTACATTGCAGAAACACCAAACTCCGAGAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAAGAAAGATCATGGCCCAACCACACCGTAACCAAAGGAGTAACGGCATCATGCTCCCATAATGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGAATGGCTTGTACCCAAATCTGAGCAAGTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT 13 | >A.Finland.4.1984.L33491_1984 14 | GTCNACCTACTTGAGGACAGTCACAACGGAAAACTATGCAGACTAAAAGGAATAGCCCCACTACAATTGGGGAAATGCAGCATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGTTTTCTAAGAAATCATGGTCCTACATTGCAGAAACACCAAACTCCGAGAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCATNNTTCGAGAGATTCGAAATATTCCCCAAGGAAAGTTCATGGCCCAAACACAACATAACCAAAGGAGTAACGGCATCATGCTCCCATAAGGGNAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAAAATGGCTTGTACCCAAATCTGAGCAAGTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT 15 | >A.USSR.90.1977.K01331_1977 16 | GTCAACCTACTTGAGGACAGTCACAACGGAAAACTATGCAGACTAAAAGGGATAGCCCCACTACAATTGGGGAAATGCAACATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGTTTTCTAAGAAATCATGGTCCTACATTGCAGAAACACCAAACTCCGAGAATGGAACATGTTACCCAGGATATTTCGCCGACTATGAGGAATTGAGGGAGCAATTGAGCTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGATCATGGCCCAAACACAACGTAACCAGAGGCGTAACGGCATCATGCTCCCATAAGGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAAAATGGCTCGTACCCAAATCTGAGCAAGTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT 17 | >A.Lepine.1948.AB043479_1948 18 | GTCAACCTACTCGAGGACAGTCACAACGGAAAACTATGCAGACTAAAAGGAATAGCCCCACTACAATTGGGGAAATGCAACATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGTTTTCTAAGAAATCATGGTCCTACATTGCAGAAACACCAAACTCTGAGAATGGAACATGTTACCCAGGATATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTTTCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGGTCATGGCCCAAACACAACGTAACCAGAGGAGTAACGGCAGCATGCTCCCATAAGGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAAAATGGCTCGTACCCAACTCTGAGCAAGTCCTATGTGAACAATAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCAACCGTCT 19 | >A.Meguro.1.1956.AB043485_1956 20 | GTAAACCTACTCGAAGACAGCCACAATGGGAAATTATGCAGATTAAAAGGAAAAGCCCCACTACAATTGGGGAAATGTAACATTGCCGGATGGGTCTTAGGAAACCCAGAATGTGAATCATTGCTTTCCAATAGATCATGGTCCTACATTGCAGAAACACCAAACCCTGAGAATGGGACATGTTACCCAGGAGATTTCACCAACTATGAGGAACTGAAGGAGCAATTGAGCTCAGTGTCATCACTCGAGAGATTCGAAATATTCCCCAAGGAAAGATCATGGCCCAAACACAACACAACCAGAGGAGTGACGGCAGCATGCTCCCATGCGAGGAAAAGCAGTTTCTACAAAAATTTGGTCTGGCTGACGGAGGCAAATGGCTCATACCCAACTCTGAGCAATTCCTATGTGAACAATCAAGAGAAAGAAGTCCTTGTGCTATGGGGTGTTCATCACCCGTCT 21 | >A.Kojiya.1.1952.AB043482_1952 22 | GTAAACCTACTCGAAGACAGCCACAATGGGAAATTATGCAGATTAAAAGGAAAAGCCCCACTACAATTGGGGAAATGTAACATTGCCGGATGGGTCTTAGGAAACCCAGAATGCGAATCATTGCTTTCCAATAGATCATGGTCCTACATTGCAGAAACACCAAACTCTGAGAATGGGACATGTTACCCAGGAGATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGATCATGGCCCAAACACAACACAACCAGAGGAGTGACGGCAGCATGCTCCCATGCGGGGAAAAGCAGTTTTTACAAAAATTTGGTCTGGCTGACGGAGGCAAATGGCTCATACCCAAATCTGAGCAAGTCCTATGTGAACAATCAAGAGAAAGAAGTCCTTGTGCTATGGGGTGTTCATCACCCGTCT 23 | >A.Huston.43.AF494251_1943 24 | GTCAACCTACTCGAAGACAGCCACAACGGGAAATTATGTAGATTAAAAGGAATAGCCCCACTACAATTGAGGAAATGTAACATTGCTGGATGGATCCTGGGAAACCCAGAATGCGAATCACTGCTTTCAGAGAGATCATGGTCCTACATTGTTGAAACACCAAACTCTGAGAATGGAACATGTTACCCAGGAGATTTTACCAACTATGAGGAATTGAGGGAGCAATTGAGCTCTGTATCATCATTCGAAAGATTCGAAATATTCCCCAAGGAAAGCTCATGGCCCAAACACAACACAACCAGAGGAGTAACGGCAGCATGCTCCCATGCGGGAAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGGATGGCTCATATCCGAATCTGAACAATTCCTATGTGAACAAGAAAGGGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCC 25 | >A.BuenosAires.T114.97.AF534026_1997 26 | GTCAACCTACTTGAGGACAGTCACAACGGAAAACTATGCCGACTAAAAGGAACAGCCCCACTACAATTGGGTAATTGCAGCATTGCCGGATGGATCTTAGGAAATCCAGAATGCGAATCACTGTTTTCTAGGGAATCATGGTCCTACATTGCAGAAACACCAAACCCTGAAAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTATCATCATTCGAGAGATTTGAAATATTCCCCAAGGAAAGCTCATGGCCCAACCACACCGTAACCAAAGGAGTGACGGCATCATGCTCCCATAATGGGAAAAGCAGCTTTTACAAAAATTTGCTATGGCTGACGGAGAAGAATGGCTTGTACCCAAATCTGAGCAAGTCCTATGTAAACAACAAGGGGAAAGAAGTCCTTGTGCTATGGGGTGTTCATCACCCGTCT 27 | >A.PuertoRico.8.34.J02144_1934 28 | GTTAACCTGCTCGAAGACAGCCACAACGGAAAACTATGTAGATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATCGCCGGATGGCTCTTGGGAAACCCAGAATGCGACCCACTGCTTCCAGTGAGATCATGGTCCTACATTGTAGAAACACCAAACTCTGAGAATGGAATATGTTATCCAGGAGATTTCATCGACTATGAGGAGCTGAGGGAGCAATTGAGCTCAGTGTCATCATTCGAAAGATTCGAAATATTTCCCAAAGAAAGCTCATGGCCCAACCACAACACAAAC---GGAGTAACGGCAGCATGCTCCCATGAGGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGGAGGGCTCATACCCAAAGCTGAAAAATTCTTATGTGAACAAAAAAGGGAAAGAAGTCCTTGTACTGTGGGGTATTCATCACCCGCCT 29 | >A.PuertoRico.8.1934.J04572_1934 30 | GTTAACCTGCTCGAAGACAGCCACAACGGAAAACTATGTAGATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATCGCCGGATGGCTCTTGGGAAACCCAGAATGCGACCCACTGCTTCCAGTGAGATCATGGTCCTACATTGTAGAAACACCAAACTCTGAGAATGGAATATGTTATCCAGGAGATTTCATCGACTATGAGGAGCTGAGGGAGCAATTGAGCTCAGTGTCATCATTCGAAAGATTCGAAATATTTCCCAAAGAAAGCTCATGGCCCAACCACAACACAACCAAAGGAGTAACGGCAGCATGCTCCCATGCGGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGGAGGGCTCATACCCAAAGCTGAAAAATTCTTATGTGAACAAGAAAGGGAAAGAAGTCCTTGTACTGTGGGGTATTCATCACCCGTCT 31 | >A.PuertoRico.8.1934.EF467821_1934 32 | GTTAACCTGCTCGAAGACAGCCACAACGGAAAACTATGTAGATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATCGCCGGATGGCTCTTGGGAAACCCAGAATGCGACCCACTGCTTCCAGTGAGATCATGGTCCTACATTGTAGAAACACCAAACTCTGAGAATGGAATATGTTATCCAGGAGATTTCATCGACTATGAGGAGCTGAGGGAGCAATTGAGCTCAGTGTCATCATTCGAAAGATTCGAAATATTTCCCAAAGAAAGCTCATGGCCCAACCACAACACAAAC---GGAGTAACGGCAGCATGCTCCCATGAGGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGGAGGGCTCATACCCAAAGCTGAAAAATTCTTATGTGAACAAAAAAGGGAAAGAAGTCCTTGTACTGTGGGGTATTCATCACCCGCCT 33 | >A.Mongolia.153.1988.Z54287_1988 34 | GTTAACCTGCTCGAAGACAGCCACAACGGAAAACTATGTAGATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATCGCCGGATGGCTCTTGGGAAACCCAGAATGCGACCCACTGCTTCCAGTGAGATCATGGTCCTACATTGTAGAAACACCAAACTCTGAGAATGGAATATGTTATCCAGGAGATTTCATCGACTATGAGGAGCTGAGGGAGCAATTGAGCTCAGTGTCATCATTCGAAAGATTCGAAATATTTCCCAAAGAAAGCTCATGGCCCAACCACAACACAAAC---GGAGTAACGGCAGCATGCTCCCATGAGGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGGAGGGCTCATACCCAAAGCTGAAAAATTCTTATGTGAACAAAAAAGGGAAAGAAGTCCTTGTACTGTGGGGTATTCATCACCCGCCT 35 | >A.Mongolia.111.1991.Z54288_1991 36 | GTTAACCTGCTCGAAGACAGCCACAACGGAAAACTATGTAGATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATCGCCGGATGGCTCCTGGGAAACCCAGAATGCGACCCACTGCTTCCAGTGAGATCATGGTCCTACATTGTAGAAACACCAAACTCTGAGAATGGAATATGTTATCCAGGAGATTTCATCGACTATGAGGAGCTGAGGGAGCAATTGAGCTCAGTGTCATCATTCGAAAGATTCGAAATATTTCCCAAAGAAAGCTCATGGCCCAACCACAACACAAAC---GGAGTAACGGCAGCATGCTCCCATGAGGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGGAGGGCTCATACCCAAAGCTGAAAAATTCTTATGTGAACAAAAAAGGGAAAGAAGTCCTTGTACTGTGGGGTATTCATCACCCGCCT 37 | >A.Saga.2.1957.AB043486_1957 38 | GTAAACCTACTCGAAGACAGCCACAATGGGAAATTATGCAGATTAAAAGGAAAAGCCCCACTACAATTGGGGAACTGTAACATTGCCGGATGGGTCTTAGGAAACCCAGAATGTGAATCATTGCTTTCCGGTAGATCATGGTCCTACATTGCAGAAACACCAAACTCTGAGAATGGGACGTGCTACCCAGGGGATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGATCATGGCCCAACCAC---ACAACCAGAGGAGTGACGGCAGCATGCCCCCATGCGAAGAAAAGCAGTTTTTACAAAAATTTGGTCTGGCTGACGGAGGCAAATGGCTCATACCCAAATCTGAGCAGGTCCTATGTGAACAATCAGGAGAAAGAAGTCCTTGTGCTATGGGGAGTTCATCACCCGTCT 39 | >A.Beijing.262.95.AY289928_1995 40 | GTCAACCTACTTGAGGACAGTCACAATGGAAAACTATGTCTACTAAAAGGAATAGCCCCACTACAATTGGGTAATTGCAGCGTTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGATTTCTAAGGAATCATGGTCCTACATTGTAGAGACACCAAACCCTGAGAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCATCATTTGAGAGATTCGAAATATTCCCCAAAGAAAGCTCATGGCCCAAACACACCGTAACA---GGAGTAACGGCATCATGCTCCCATAATGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGAATGGCTTGTACCCAAATCTGAGCAATTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCATCT 41 | >A.Alaska.1173.00.AY029287_2000 42 | GTCAACCTACTTGAGGACAGTCACAATGGAAAACTATGTCTACTAAAAGGAATAGCCCCACTACAATTGGGTAATTGCAGCGTTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATTACTGATTTCCAAGGAATCATGGTCCTACATTGTAGAAACACCAAATCCTGAGAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCTTCATTTGAGAGATTCGAAATATTCCCCAAAGAAAGCTCATGGCCCAACCACACCGTAACC---GGAGTATCAGCATCATGCTCCCATAATGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGGGAAGAATGGTTTGTACCCAAGCCTGAGCAAGTCCTATGCAAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGCCT 43 | >A.Tokyo.3.1967.U38242_1967 44 | GTTAACCTGCTCGAAGACAGCCACAACGGGAAACTATGTAAATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATCGCCGGATGGCTCTTGGGAAACCCAGAATGCGACTCACTGCTTCCAGCGAGATCATGGTCCTACATTGTAGAAACACCAAACTCTGAGAATGGAGCATGTTATCCAGGAGATTTCATCGACTATGAGGAACTGAAGGAGCAATTGAGCTCAGTATCATCATTAGAAAGATTCGAAATATTTCCCAAGGAAAGTTCATGGCCCAACCACAACACACTCAAAGGAGTAACAGCATCATGCTCCCATGGGGGAAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGAAAACGGAGGACTCATACCCAAAGCTGAGCAATTCCTATGTGAACAATAAAGGGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT 45 | >A.WS.33.U08904_1933 46 | GTTAACCTGCTCGAAGACAGCCACAACGGGAAACTATGTAAATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATCGCCGGATGGCTCTTGGGAAACCCAGAATGCGACTCACTGCTTCCAGCGAAATCATGGTCCTACATTGTAGAAACACCAAACTCTGAGAATGGAGCATGTTATCCAGGAGATTTCATCGACTATGAGGAACTGAAGGAGCAATTGAGCTCAGTATCATCATTAGAAAGATTCGAAATATTTCCCAAGGAAAGTTCATGGCCCAACCACAACACACTCAAAGGAGTAACAGCAGCATGCTCCCATAGGGGAAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGAAAACGGGGGACTCATACCCAAAGCTGAACAATTCCTATGTGAACAATAAAGGGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT 47 | >A.WSN.1933.CY010788_1933 48 | GTTAACCTGCTCGAAGACAGACACAACGGGAAACTATGTAAATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATCACCGGATGGCTCTTGGGAAATCCAGAATGCGACTCACTGCTTCCAGCGAGATCATGGTCCTACATTGTAGAAACACCAAACTCTGAGAATGGAGCATGTTATCCAGGAGATTTCATCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTATCATCATTAGAAAGATTCGAAATATTTCCCAAGGAAAGTTCATGGCCCAACCACACATTCAAC---GGAGTAACAGTATCATGCTCCCATAGGGGAAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGAAGAAGGGGGATTCATACCCAAAGCTGACCAATTCCTATGTGAACAATAAAGGGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT 49 | >A.WilsonSmith.1933.DQ508905_1933 50 | GTTAACCTGCTCGAAGACAGACACAACGGGAAACTATGTAAATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATCACCGGATGGCTCTTGGGAAATCCAGAATGCGACTCACTGCTTCCAGCGAGATCATGGTCCTACATTGTAGAAACACCAAACTCTGAGAATGGAGCATGTTATCCAGGAGATTTCATCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTATCATCATTAGAAAGATTCGAAATATTTCCCAAGGAAAGTTCATGGCCCAACCACACATTCAAC---GGAGTAACAGCATCATGCTCCCATAGGGGAAAAAGCAGTTTTTACAGAAATTTGATATGGCTGACGAAGAAGGGGGATTCATACCCAAAGCTGACCAATTCCTATGTGAACAATAAAGGGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT 51 | >A.goose.HongKong.8.1976.U46021_1976 52 | GTTAATTTACTCGAAAACAGCCATAATGGAAAACTCTGCAGCCTGAATGGAATAGCCCCTTTACAACTAGGGAAATGCAACGTGGCGGGGTGGCTCCTTGGCAACCCAGAATGTGACCTGCTGCTCACTGCGAGTTCATGGTCCTACATAATAGAGACTTCAAATTCAGAAAACGGAACATGCTACCCCGGAGAGTTCATTGATTATGAAGAGTTAAGGGAACAGCTAAGTTCAGTGTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAATCTCATGGCCAAATCATGAAACAACCAAAGGTGTCACAGCTGCATGCTCTTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTATGGATAACAAAGAAAGGAACTTCCTACCCTAAACTCAGCAAATCATACACGAACAACAAAGGGAAAGAAGTGCTTGTACTTTGGGGGGTGCATCATCCTCCA 53 | >A.duck.Australia.749.80.AF091312_1980 54 | GTTAATTTACTTGAAAACAGCCATAATGGAAAACTTTGCAGCCTGAATGGAATAGCCCCATTACAACTAGGGAAATGCAACGTGGCGGGGTGGCTCCTTGGCAACCTAGAATGTGACCTGTTGCTCACTGCGAATTCATGGTCTTATATAATAGAGACTTCAAATTCAGAAAACGGAACATGTTACCCCGGGGAGTTCATCGATTATGAGGAATTAAGAGAACAGCTAAGTTCAGTGTCTTCATTTGAGAAATTTGAAATTTTCCCGAAGGCAAGCTCATGGCCAAATCATGAGACAACCAAAGGTGTCACAGCTGCATGCTCTTACTTGGGAGCTAGCAGCTTTTATCGGAATTTGCTATGGATGACAAAGAAGGGAACTTCCTATCCTAAACTCAGCAAATCATATACGAACAACAAAGGGAAAGAAGTGCTTGTACTTTGGGGGGTGCATCACCCTCCC 55 | >A.mallard.Tennessee.11464.85.AF091311_1985 56 | GTGAATTTGCTCGAAGACAGCCATAATGGGAAACTCTGCAGTCTGAACGGGATAGCTCCCCTACAATTGGGGAAGTGCAATGTAGCGGGATGGCTCCTTGGCAATCCAGAGTGTGACCTTCTACTCACTGCAAACTCATGGTCCTACATAATAGAGACTTCCAATTCAGAAAACGGGACATGCTACCCCGGTGAATTCATAGATTATGAAGAATTAAGAGAGCAGCTAAGTTCAGTTTCTTCATTTGAAAGGTTTGAAATTTTCCCGAAGGCAAACTCATGGCCAAATCATGAGACAACTAAAGGTGTTACAGCCGCATGCTCTTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTGTGGATAACAAAGAAGGGAACGTCATATCCAAAACTCAGCAAATCATACACGAACAATAAAGGGAAAGAAGTACTCGTGCTCTGGGGAGTGCACCACCCTCCA 57 | >A.duck.Alberta.35.76.U47310_1976 58 | GTGAATTTGCTCGAAGACAGCCATAATGGGAAACTCTGCAGCCTAAACGGGATACCTCCCCTACAACTGGGAAAGTGCAATGTGGCGGGATGGCTCCTGGGCAATCCAGAGTGTGATCTTCTACTCACTGCAAACTCATGGTCCTACATAATAGAAACTTCAAACTCAGAAAACGGAACATGCTACCCCGGTGAATTCATAGATTATGAAGAATTAAGAGAGCAGCTAAGTTCAGTTTCTTCATTTGAAAAATTTGAAATTTTCCCGAAGGCAAGCTCATGGCCAAATCATGAGACAACTAAAGGTGTTACAGCTGCATGCTCTTACTCTGGAGCCAGCAGTTTTTACCGGAATTTGCTGTGGATAACAAAGAAAGGGACTTCATATCCAAAACTCAGCAAATCATACACGAACAATAAGGGGAAAGAAGTGCTTGTGCTCTGGGGGGTGCACCACCCTCCA 59 | >A.duck.HK.196.1977.D00839_1977 60 | GTTAATTTACTAGAAAACAGCCATAATGGAAAACTCTGCAGACTGAATGGAATAGCCCCCTTACAGCTAGGGAAATGCAACGTGGCAGGATGGATCCTTGGCAACCCAGAGTGTGATCTATTGCTCACAGCGAATTCATGGTCTTACATAATAGAGACTTCAAATTCAGAGAATGGAACATGCTACCCCGGAGAGTTCAATGATTATGAAGAATTAAGGGAACAGCTGAGTTCGGTGTCTTCATTTGAAAAGTTTGAAATTTTCCCAAAGGCTAGCTCATGGCCAAATCATGAGACAACTAAAGGTATTACAGCTGCATGTCCTTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTATGGATAACAAAGAAGGGAACTTCATACCCTAAACTCAGCAAATCATACACAAACAACAAAGGGAAAGAAGTGCTTGTAATCTGGGGAGTGCACCACCCTCCA 61 | >A.teal.Alberta.141.1992.CY004539_1992 62 | GTGAATTTGCTCGAAGACAGCCATAATGGGAAACTCTGCAGCCTGAACGGGATAGCTCCTCTACAATTGGGGAAGTGCAATGTAGCGGGGTGGCTCCTGGGCAACCCAGAATGTGACCTTCTACTCACTGCAAACTCATGGTCCTATATAATAGAGACTTCCAATTCAGAGAATGGGACATGCTATCCCGGTGAGTTCATAGATTATGAAGAATTAAGAGAGCAGCTGAGTTCAGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCGAAGGCAAGCTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCTTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTGTGGATAACAAAGAAGGGAACTTCATATCCAAAACTCAGCAAATCATACACAAACAATAAGGGGAAGGAAGTGCTCGTGCTCTGGGGAGTGCACCACCCTCCA 63 | >A.pintail.Ohio.25.1999.CY017725_1999 64 | GTGAATTTGCTCGAAAACAGCCATAATGGGAAACTCTGCAGCCTGAATGGGATAGCTCCTTTACAATTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTTTACTCACTGCAAACTCATGGTCCTATATAATAGAGACTTCCAATTCAGAGAACGGAACATGCTATCCCGGTGAGTTCATAGATTATGAGGAATTAAGAGAGCAATTGAGTTCAGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCCTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTGTGGCTAACAAAGAAGGGAACTTCATATCCAAAGCTCAGCAAATCATACACGAACAATAAGGGAAAAGAAGTGCTTGTACTCTGGGGAGTGCACCATCCTCCG 65 | >A.Teal.Ohio.72.1999.CY017717_1999 66 | GTGAATTTGCTCGAAAACAGCCATAATGGGAAACTCTGCAGCCTGAATGGGATAGCTCCTTTACAATTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTTTACTCACTGCAAACTCATGGTCCTATATAATAGAGACTTCCAATTCAGAGAACGGAACATGCTATCCCGGTGAGTTCATAGATTATGAGGAATTAAGAGAGCAATTGAGTTCAGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCCTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTGTGGCTAACAAAGAAGGGAACTTCATATCCAAAGCTCAGCAAATCATACACGAACAATAAGGGAAAAGAAGTGCTTGTACTCTGGGGAGTGCACCATCCTCCG 67 | >A.BrantGoose.1.1917.AY095226_1917 68 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTGTGGCTAACAAAGAAGGGAACTTCATATCCAAAGCTCAGCAAATCATACACGAACAATAAGGGAAAAGAAGTGCTTGTACTCTGGGGAGTGCACCATCCTCCG 69 | >A.mallard.Ohio.66.1999.CY016955_1999 70 | GTGAATTTGCTCGAAAACAGCCATAATGGGAAACTCTGCAGCCTGAATGGGATAGCTCCTTTACAATTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTTTACTCACTGCAAACTCATGGTCCTATATAATAGAGACTTCCAATTCAGAGAACGGAACATGCTATCCCGGTGAGTTCATAGATTATGAGGAATTAAGAGAGCAATTGAGTTCAGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCCTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTGTGGCTAACAAAGAAGGGAACTTCATATCCAAAGCTCAGCAAATCATACACGAACAATAAGGGAAAAGAAGTGCTTGTACTCTGGGGAGTGCACCATCCTCCG 71 | >A.mallard.Ohio.56.1999.CY012824_1999 72 | GTGAATTTGCTCGAAAACAGCCATAATGGGAAACTCTGCAGCCTGAATGGGATAGCTCCTTTACAATTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTTTACTCACTGCAAACTCATGGTCCTATATAATAGAGACTTCCAATTCAGAGAACGGAACATGCTATCCCGGTGAGTTCATAGATTATGAGGAATTAAGAGAGCAATTGAGTTCAGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCCTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTGTGGCTAACAAAGAAGGGAACTTCATATCCAAAGCTCAGCAAATCATACACGAACAATAAGGGAAAAGAAGTGCTTGTACTCTGGGGAGTGCACCATCCTCCG 73 | >A.mallard.ALB.267.1996.CY004504_1996 74 | GTGAATTTGCTCGAAGACAGCCATAATGGGAAACTCTGCAGCCTGAATGGGATAGCTCCTTTACAACTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTCTACTCACTGCAAACTCATGGTCCTATATAATAGAGACTTCCAATTCAGAGAACGGGACATGCTATCCCGGTGAATTCATAGATTATGAGGAATTAAGAGAGCAATTGAGTTCAGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCTTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTGTGGATAACAAAGAAGGGAACTTCATATCCAAAACTCAGCAAATCATACACGAACAATAAGGGAAAAGAAGTGCTTGTGCTCTGGGGAGTGCACCATCCTCCG 75 | >A.duck.NJ.771770.1995.EU026110_1995 76 | GTGAATTTGCTCGAAGACAGCCATAATGGGAAACTCTGCAGCCTGAATGGGATAGCTCCTTTACAATTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTCTACTCACTGCAAACTCATGGTCCTATATAATAGAGACTTCCAATTCAGAGAACGGAACATGCTATCCCGGTGAATTCATAGATTATGAGGAATTAAGAGAGCAATTGAGTTCAGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCTTACTCTGGAGCAAGCAGTTTTTATCGGAATTTGCTGTGGATAACAAAGAAGGGAACTTCATATCCAAAACTCAGCAAATCATACACGAACAATAAGGGAAAAGAAGTGCTTGTGCTCTGGGGAGTGCACCATCCTCCG 77 | >A.mallard.Alberta.211.1998.AY633212_1998 78 | GTGAATTTGCTCGAAGACAGCCATAATGGGAAACTCTGCAGCCTGAATGGGATAGCTCCTTTACAATTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTCTACTCACTGCAAACTCATGGTCCTATATAATAGAGACATCCAATTCAGAGAACGGGACATGCTATCCCGGTGAGTTCATAGATTATGAGGAATTAAGGGAGCAATTGAGTTCGGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCTTACTCTGGGGCCAGCAGTTTTTATCGAAATTTGCTGTGGATAATAAAGAAGGGAACTTCATATCCAAAACTCAGCAAGTCATACACGAACAATAAGGGAAAAGAAGTGCTTGTGCTCTGGGGAGTGCACCATCCTCCG 79 | >A.mallard.MD.403.2002.EU026082_2002 80 | GTGAATTTGCTCGAAGACCGCCATAATGGGAAGCTCTGCAGCCTGAATGGGATAGCTCCTTTACAATTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTCTACTCACTGCAAACTCATGGTCCTATATAATAGAGACGTCCAATTCAGAGAACGGGACATGCTATCCCGGTGAGTTCATAGATTATGAGGAATTAAGGGAGCAATTGAGTTCAGTTTCTTCTTTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCCTACTCTGGGGCCAGCAGTTTTTATCGAAATTTGCTGTGGATAGTAAAGAAGGGAACTTCATACCCGAAACTCAGCAAGTCATACACGAACAATAAGGGAAAAGAAGTGCTTGTGCTCTGGGGAGTGCACCATCCTCCG 81 | >A.swine.29.37.U04857_1937 82 | GTTAACCTGCTTGAAGACAGCCACAACGGAAAACTATGTAGACTAGGGGGAATAGCCCCATTGCAACTGGGTAAATGTAATATTGCCGGATGCGTCTTGGGAAACCCAGAATGCGATTTGCTGCTCACAGTGAACTCATGGTCCTATATTGTAGAAACATCGAACTCAGATAATGGGACATGTTACCCAGGAGATTTCATTGACTATGAAGAACTGAGAGAGCAATTGAGCTCAGTGTCATCATTCGAAAGATTCGAGATATTTCCCAAAACAAGTTCGTGGCCCAATCATGAAACAACCAGAGGTGTAACGGCAGCATGCCCCTATGCTGGAGCAAGCAGCTTTTACAGAAATTTACTATGGCTGGTGAAGAAGGGAGATTCATATCCAAAGCTCAGCAAATCCTATGTTAACAATAAAGGGAAAGAAGTCCTTGTGCTATGGGGGGTTCACCATCCNCCT 83 | >A.swine.Ehime.1.80.X57494_1980 84 | GTTAACCTTCTTGAAGACAGACATAACGGGAAACTATGTAAACTAGGGGGGATAGCCCCATTGCATCTGGGTAAATGTAACATTGCCGGATGGCTTTTGGGAAATCCAGAATGTGAATTACTATTCACAGTAAGCTCATGGTCTTACATTGTGGAAACATCGAACTCAGACAATGGGACATGTTACCCAGGGGATTTCATCAATTATGAAGAGCTGAGAGAGCAGTTGAGCTCAGTGTCATCATTTGAAAGATTCGAGATATTCCCCAAGACAAGTTCGTGGCCCAATCATGAAACAAACAGAGGTGTGACGGCAGCATGCCCTTATGCTGGAGCAAACAGTTTCTACAGAAATTTAATATGGCTTGTGAAAAAAGAAAACTCATATCCAAAGCTCCGCAAATCCTATGTTAACAATAAGGGGAAGGAAGTCCTTGTGCTATGGGGCATTCACCATCCACCT 85 | >A.swine.Illinois.63.X57493_1963 86 | GTTAATCTTCTTGAAGACAGACACAACGGGAAACTATGTAAACTAGGGGGAATAGCCCCATTGCACCTAGGTAAATGTAACATTGCCGGATGGCTTTTGGGAAACCCAGAATGTGAATTACTGCTCACAGTAAGCTCATGGTCTTATATTGTGGAAACATCGAACTCAGACAATGGGACATGTTACCCAGGAGATTTCATCAATTATGAAGAGCTGAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGATTCGAGATATTTCCCAAGATAAGTTCGTGGCCCAATCATGAAACAAACAGAGGTGTAACGGCAGCATGCCCTTATGCTGGAGCAAACAGCTTTTTCAGAAATTTAATATGGCTGGTGAAGAAGGAGAGTTCATACCCAAAGCTCAGCAAATCCTATGTTAACAATAAAGGGAAGGAAGTCCTTGTGCTATGGGGTATTCACCATCCGCCT 87 | >A.swine.NewJersey.11.76.K00992_1976 88 | GTTAATCTTCTTGAAGACAGACATAACGGGAAACTATGTAAACTGGGGGGGATAGCCCCATTGCACTTGGGTAAATGTAACATTGCCGGTAGGCTTTTGGGAAACCCAGAATGTGAATTACTACTCACAGTAAGCTCATGGTCTTACATTGTGGAAACATCGAAATCAGACAATGGGACATGTTACCCAGGAGATTTCATCAATTATGAAGAGCTGAGAGAGCAGTTGAGCTCAGTGTCATCATTTGAAAGATTCGAGATATTCCCCAAGACAAGTTCGTGGCCCAATCATGAAACAAACAGAGGTGTGACGGCAGCATGCCCTTATGCTGGAGCAAACAGCTTCTACAGAAATTTAATATGGCTGGTGAAAAAAGAAAATTCATACCCAAAGCTCAGCAAATCCTATGTTAACAATAAAGGGAAGGAAGTCCTTGTGCTATGGGGCATTCATCATCCACCT 89 | >A.swine.Wisconsin.1.61.AF091307_1961 90 | GTTAATCTGCTTGAAGACAGACACAACGGGAAACTATGTAAACTAGGGGGAATAGCCCCATTGCACCTAGGTAAATGTAACATTGCCGGATGGCTTTTGGGAAACCCAGAATGTGAATTACTGCTCACAGTAAGCTCATGGTCTTATATTGTGGAAACATCGAACTCAGACAATGGGACATGTTACCCAGGGGATTTCATCAATTATGAAGAGCTGAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGATTCGAGATATTTCCCAAGACAAGTTCGTGGCCCAATCATGAAACAAACAGAGGTGTAACGGCAGCATGCCCTTATGCTGGAGCAAACAGCTTTTACAGAAATTTAATATGGCTGGTGAAGAAGGAGAGTTCATACCCAAAGCTCAGCAAATCCTATGTTAACAATAAAGGGAAGGAAGTCCTTGTGCTATGGGGTATTCACCATCCGCCT 91 | >A.swine.Thailand.271.2005.EF101749_2005 92 | GTTAACCTTCTAGAAGACAGGCACAATGGGAAGCTATGTAACCTAAGGGGGGAAGCCCCACTGCATTTGGGTAAATGTAACATTGCCGGATGGCTCCTAGGAAACCCAGAATGCGAATTACTATTTGCAGTAAACTCATGGTCTTACATTGTGGAAACATCGAACTCAGACAATGGGACATGTTACCCAGGAGATTTCACCAGTTATGAAGAGCTAAGAGAACAATTGAGCTCAGTGTCATCATTTGAAAGATTCGAGATATTCCCCAAAGCAAGCTCTTGGCCCAACCATGAAACAAACAGAGGTGTAACGGCAGCATGCCCTTATGCTGGAACAAACAGCTTCTACAGGAATTTGATATGGCTAGTAAAAAAGGGAAACTCATATCCAAAGCTCAGTAAATCCTATGTTAATAATAAGAAGAAGGAAGTCCTTGTACTATGGGGCATCCACCATCCACCC 93 | >A.swine.HongKong.273.1994.U45452_1994 94 | GTTAACCTTCTAGAAGACAGACATAACGGGAAACTATGTAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGATGGCTCCTGGGAAATCCAGAGTGTGAATTACTATTCACAGCAAGCTCATGGTCTTACATTGTGGAAACATCTAATTCAGACAATGGGACATGTTACCCAGGAGATTTCATCAATTATGAAGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGATTTGAGATGTTCCCCAAGTCAAGTTCATGGCCCAATCATGAAACGAACAGAGGTGTGACGGCAGCATGTCCTTATGCTGGAGCAAACAGCTTCTACAGAAATTTAATATGGCTTGTAAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTATATTAACAATAAGGAGAAAGAAGTCCTCGTGCTATGGGGAATTCACCATCCACCT 95 | >A.swine.Iowa.15.1930.U47305_1930 96 | GTTAACCTGCTTGAAGACAGCCACAACGGAAAACTATGTAGACTAGGGGGAATAGCCCCATTACAACTGGGGAAATGTAATATTGCCGGATGGATCTTGGGAAACCCAGAATGCGATTTGCTGCTCACAGTGAGCTCATGGTCCTATATTGTAGAAACATCGAACTCAGATAATGGGACATGTTACCCAGGAGATTTCATCGACTATGAAGAACTGAGAGAGCAACTAAGCTCAGTGTCATCATTCGAAAAATTCGAGATATTTCCCAAGACAAGCTCGTGGCCCAATCATGAAACAACCAGAGGTGTAACGGCAGCATGCCCCTATGCTGGAGCAAGCAGCTTCTACAGAAATTTACTATGGCTGGTAAAGAAGGAAAATTCATACCCAAAGCTTAGCAAATCCTATGTTAACAATAAAGGGAAAGAAGTCCTTGTGCTATGGGGTGTTCATCATCCGCCT 97 | >A.swine.Iowa.15.1930.X57492_1930 98 | GTTAACCTGCTTGAAGACAGCCACAACGGAAAACTATGTAGACTGGGAGGAATAGCCCCATTACAACTGGGGAAATGTAATATTGCCGGATGGCTCTTGGGAAACCCAGAATGCGATTTGCTGCTCACAGTGAGCTCATGGTCCTATATTGTAGAAACACGGACCTCAGATAATGGGACATGTTACCCAGGAGATTTCATCGACTATGAAGAACTGAGAGAGCAACTGAACTCAATGTCATCATTCGAAAAATTCGAGATATTTCCCAAGACAAGCTCGTGGCCCAATCATGAAACAACCAAAGGTGTAACGGCAGCATGCCCCTATGCTGGAGCAAGCAGCTTTTACAGAAACTTACTATGGCTGGTAAAGAAGGAAAATTCATACCCAAAGCTTAGCAAATCCTATGTTAACAATAAAGGAAAAGAAGTCCTTGTGCTATGGGGTGTTCATCATCCGCCT 99 | >A.swine.Iowa.15.1930.EU139823_1930 100 | GTTAACCTGCTTGAAGACAGCCACAACGGAAAACTATGTAGACTAGGGGGAATAGCCCCATTACAACTGGGGAAATGTAATATTGCCGGATGGCTCTTGGGAAACCCAGAATGCGATTTGCTGCTCACAGTGAGCTCATGGTCCTATATTGTAGAAACATCGAACTCAGATAATGGGACATGTTACCCAGGAGATTTCATCGACTATGAAGAACTGAGAGAGCAACTAAGCTCAGTGTCATCATTCGAAAAATTCGAGATATTTCCCAAGACAAGCTCGTGGCCCAATCATGAAACAACCAGAGGTGTAACGGCAGCATGCCCCTATGCTGGAGCAAGCAGCTTTTACAGAAATTTACTATGGCTGGTAAAGAAGGAAAATTCATACCCAAAGCTTAGCAAATCCTATGTTAACAATAAAGGGAAAGAAGTCCTTGTGCTATGGGGTGTTCATCATCCGCCT 101 | >A.AlmaAta.1417.1984.S62154_1984 102 | GTTAACCTGCTTGAAGACAGCCACAACGGAAAACTATGTAGACTGGGAGGAATAGCCCCATTACAACTGGGGAAATGTAATATTGCCGGTAGGCTCTTGGGAAACCCAGAATGCGAATTGCTGCTCACGGTGAGCTCATGGTCCTATATTGTAGAAACATCGGACTCAGATAATGGGACATGTTACCCAGGAGATTTCATCGACTATGAAGAACTGAGAGAGCAACTGAACTCAATGTCATCATTCGAAAAATTCGAGATATTTCCCAAGACAAGCTCGTGGCCCAATCATGAAACAACCAAAGGTGTAACGGCAGCATGCCCCTATGCTGGAGCAAGCAGCTTTTACAGAAACTTACTATGGCTGGTAAAGAAGGAAAATTCATACCCAAAGCTTAGCAAATCCTATGTTAACAATAAAGGAAAAGAAGTCCTTGTGCTATGGGGTGTTCATCATCCGCCT 103 | >A.swine.StHyacinthe.148.1990.U11703_1990 104 | GTTAACCTGCTTGAAGACAGCCACAACGGAAAACTATGTAGACTAGGGGGAATAGCCCCATTACAACTGGGGAAATGTAATATTGCCGGATGGCTCTTGGGAAACCCAGAATGCGATTTGCTGCTCACAGTGAGCTCATGGTCCTATATTGTAGAAACATCGAACTCAGATAATGGGACATGTTACCCAGGAGATTTCATCGACTATGAAGAACTGAGAGAGCAACTAAGCTCAGTGTCATCATTCGAAAAATTCGAGATATTTCCCAAGACAAGCTCGTGGCCCAATCATGAAACAACCAGAGGTGTAACGGCAGCATGCCCCTATGCTGGAGCAAGCAGCTTTTACAGAAATTTACTATGGCTGGTAAAGAAGGAAANTTCATACCCAAAGCTTAGCAAATCCTATGTTAACAATAAAGGGAAAGAAGTCCTTGTGCTATGGGGTGTTCATCATCCGCCT 105 | -------------------------------------------------------------------------------- /examples/data/ice_viruses.fasta.treefile: -------------------------------------------------------------------------------- 1 | (A.Fiji.15899.83.AJ289702_1983:0.0225365992,(A.Kiev.59.1979.M38353_1979:0.0044063580,((((A.FortMonmouth.1.1947.U02085_1947:0.0060795414,((A.Meguro.1.1956.AB043485_1956:0.0188631900,A.Saga.2.1957.AB043486_1957:0.0255477164):0.0035469108,A.Kojiya.1.1952.AB043482_1952:0.0000010000):0.0274954280):0.0112137610,((((A.BrevigMission.1.18.AF116575_1918:0.0061671626,(((A.goose.HongKong.8.1976.U46021_1976:0.0173913752,(((A.mallard.Tennessee.11464.85.AF091311_1985:0.0173712916,(A.teal.Alberta.141.1992.CY004539_1992:0.0085090458,((((((A.pintail.Ohio.25.1999.CY017725_1999:0.0000000000,A.mallard.Ohio.56.1999.CY012824_1999:0.0000000000):0.0000000000,A.mallard.Ohio.66.1999.CY016955_1999:0.0000000000):0.0000010000,A.Teal.Ohio.72.1999.CY017717_1999:0.0000010000):0.0000010000,A.BrantGoose.1.1917.AY095226_1917:0.0000010000):0.0140357840,(A.mallard.Alberta.211.1998.AY633212_1998:0.0020287280,A.mallard.MD.403.2002.EU026082_2002:0.0160944617):0.0124102429):0.0000020609,(A.mallard.ALB.267.1996.CY004504_1996:0.0020053127,A.duck.NJ.771770.1995.EU026110_1995:0.0039400706):0.0018940840):0.0250570402):0.0214717374):0.0101504815,A.duck.Alberta.35.76.U47310_1976:0.0253721489):0.0736037938,A.duck.HK.196.1977.D00839_1977:0.0532728540):0.0147304465):0.0290343792,A.duck.Australia.749.80.AF091312_1980:0.0193538639):0.3007691507,(((A.swine.29.37.U04857_1937:0.0197444807,((((A.swine.Ehime.1.80.X57494_1980:0.0101060184,(A.swine.Thailand.271.2005.EF101749_2005:0.0816472254,A.swine.HongKong.273.1994.U45452_1994:0.0384696748):0.0237732993):0.0135430554,A.swine.NewJersey.11.76.K00992_1976:0.0120939688):0.0327961058,A.swine.Illinois.63.X57493_1963:0.0041529782):0.0023752980,A.swine.Wisconsin.1.61.AF091307_1961:0.0014907340):0.0622118742):0.0165006167,(A.swine.Iowa.15.1930.U47305_1930:0.0039689088,(A.swine.Iowa.15.1930.EU139823_1930:0.0000010000,A.swine.StHyacinthe.148.1990.U11703_1990:0.0000010000):0.0000010000):0.0019675273):0.0027110493,(A.swine.Iowa.15.1930.X57492_1930:0.0060967682,A.AlmaAta.1417.1984.S62154_1984:0.0102470201):0.0115511712):0.0554586947):0.0033674386):0.0836106020,((A.Tokyo.3.1967.U38242_1967:0.0052010471,A.WS.33.U08904_1933:0.0047809277):0.0093388386,(A.WSN.1933.CY010788_1933:0.0019834304,A.WilsonSmith.1933.DQ508905_1933:0.0020081950):0.0267044517):0.0196615124):0.0040324481,((((A.PuertoRico.8.34.J02144_1934:0.0000000000,A.Mongolia.153.1988.Z54287_1988:0.0000000000):0.0000010000,A.PuertoRico.8.1934.EF467821_1934:0.0000010000):0.0000010000,A.Mongolia.111.1991.Z54288_1991:0.0019811635):0.0080130157,A.PuertoRico.8.1934.J04572_1934:0.0000010000):0.0256731110):0.0339931412,A.Huston.43.AF494251_1943:0.0274881011):0.0309406850):0.0303861545,A.Lepine.1948.AB043479_1948:0.0083093368):0.0081943988,A.USSR.90.1977.K01331_1977:0.0060750547):0.0035899200):0.0061715438,((A.SouthCarolina.6.1988.L19025_1988:0.0058921898,(A.Yamagata.32.1989.D31949_1989:0.0019932881,(A.BuenosAires.T114.97.AF534026_1997:0.0315205588,(A.Beijing.262.95.AY289928_1995:0.0104412671,A.Alaska.1173.00.AY029287_2000:0.0274942819):0.0130006148):0.0036889941):0.0041257183):0.0186435452,A.Finland.4.1984.L33491_1984:0.0020744044):0.0025350891); 2 | -------------------------------------------------------------------------------- /examples/data/ice_viruses_cleaned.fasta: -------------------------------------------------------------------------------- 1 | >A.Fiji.15899.83.AJ289702_1983 2 | GTCAACCTACTTGAGGACAATCACAACGGGAAACTATGCAAACTAAAAGGAATAGCGCCACTACAATTGGGGAAATGCAACATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGTTTTCTAAGAAATCATGGTCCTACATTGCAGAAACACCAAACTCCGAGAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAAGAACTGAGGGAACAATTGAGCTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGTTCATGGCCCAACCACAACGTAACCAAAGGAGTAACGGCATCATGTTCCCATAAGGGGAAAAGCAGTTTTTACAGAAACTTGCTATGGCTGACGGAGAAAAATGGCTCGTACCCAAATCTGAGCAAGTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT 3 | >A.Kiev.59.1979.M38353_1979 4 | GTCAACCTACTTGAGGACAGTCACAACGGAAAACTATGCAGACTGAAAGGAATAGCTCCACTACAATTGGGGAAATGCAGCATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGTTTTCTAAGAAATCATGGTCCTACATTGCAGAAACACCAAACTCCGAGAATGGAACATGTTACCCAGGATATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGATCATGGCCCAAACACAACGTAACCAGAGGAGTAACGGCATCATGCTCCCATAAGGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAAAATGGCTCGTACCCAAATCTGAGCAAGTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT 5 | >A.FortMonmouth.1.1947.U02085_1947 6 | GTAAACCTACTCGAAGACAGCCACAACGGGAAATTATGCAGATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGCTTTCTAAGAGATCATGGTCCTACATTGCAGAAACACCAAACTCTGAGAATGGAGCATGTTACCCAGGAGATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTGTCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGATCATGGCCCAAACACAACATAACCAGAGGAGTAACGGCAGCATGCTCCCATGCGGGGAAAAGCAGTTTTTACAAAAATTTGCTCTGGCTGACGGAGACAGATGGCTCATACCCAAAGCTGAGCAAGTCCTATGTGAACAATAAAGAGAAAGAAGTCCTTGTGCTATGGGGTGTTCATCACCCGTCT 7 | >A.BrevigMission.1.18.AF116575_1918 8 | GTTAACCTGCTCGAAGACAGCCACAACGGAAAACTATGTAAATTAAAAGGAATAGCCCCATTACAATTGGGGAAATGTAATATCGCCGGATGGCTCTTGGGAAACCCGGAATGCGATTTACTGCTCACAGCGAGCTCATGGTCCTATATTGTAGAAACATCGAACTCAGAGAATGGAACATGTTACCCAGGAGATTTCATCGACTATGAAGAACTGAGGGAGCAATTGAGCTCAGTGTCATCGTTCGAAAAATTCGAAATATTTCCCAAGACAAGCTCGTGGCCCAATCATGAAACAACCAAAGGTGTAACGGCAGCATGCTCCTATGCGGGAGCAAGCAGTTTTTACAGAAATTTGCTGTGGCTGACAAAGAAGGGAAGCTCATACCCAAAGCTTAGCAAGTCCTATGTGAACAATAAAGGGAAAGAAGTCCTTGTACTATGGGGTGTTCATCATCCGCCT 9 | >A.SouthCarolina.6.1988.L19025_1988 10 | GTCAACCTACTTGAGGACAGTCACAACGGAAAACTATGTCGACTAAAAGGAATAGCCCCACTACAATTGGGTAATTGCAGCATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGTTTTCTAAGAAATCATGGTCCTACATTGCAGAAACACCAAACTCCGAGAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAGGAACTGAGGGAGCAGTTGAGTTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAAGAAAGCTCATGGCCCAACCACACCGTAACCAAAGGAGTAACGGCATCATGCTCCCATAAGGGGAGAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGAATGGCTTGTACCCAAATCTGAGCAAGTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGCGTTCATCACCCGTCT 11 | >A.Yamagata.32.1989.D31949_1989 12 | GTCAACCTACTTGAGGACAGTCACAACGGAAAACTATGTCGACTAAAAGGAATAGCCCCACTACAATTGGGTAATTGCAGCATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGTTTTCTAAGGAATCATGGTCCTACATTGCAGAAACACCAAACTCCGAGAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAAGAAAGATCATGGCCCAACCACACCGTAACCAAAGGAGTAACGGCATCATGCTCCCATAATGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGAATGGCTTGTACCCAAATCTGAGCAAGTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT 13 | >A.Finland.4.1984.L33491_1984 14 | GTCNACCTACTTGAGGACAGTCACAACGGAAAACTATGCAGACTAAAAGGAATAGCCCCACTACAATTGGGGAAATGCAGCATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGTTTTCTAAGAAATCATGGTCCTACATTGCAGAAACACCAAACTCCGAGAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCATNNTTCGAGAGATTCGAAATATTCCCCAAGGAAAGTTCATGGCCCAAACACAACATAACCAAAGGAGTAACGGCATCATGCTCCCATAAGGGNAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAAAATGGCTTGTACCCAAATCTGAGCAAGTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT 15 | >A.USSR.90.1977.K01331_1977 16 | GTCAACCTACTTGAGGACAGTCACAACGGAAAACTATGCAGACTAAAAGGGATAGCCCCACTACAATTGGGGAAATGCAACATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGTTTTCTAAGAAATCATGGTCCTACATTGCAGAAACACCAAACTCCGAGAATGGAACATGTTACCCAGGATATTTCGCCGACTATGAGGAATTGAGGGAGCAATTGAGCTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGATCATGGCCCAAACACAACGTAACCAGAGGCGTAACGGCATCATGCTCCCATAAGGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAAAATGGCTCGTACCCAAATCTGAGCAAGTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT 17 | >A.Lepine.1948.AB043479_1948 18 | GTCAACCTACTCGAGGACAGTCACAACGGAAAACTATGCAGACTAAAAGGAATAGCCCCACTACAATTGGGGAAATGCAACATTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGTTTTCTAAGAAATCATGGTCCTACATTGCAGAAACACCAAACTCTGAGAATGGAACATGTTACCCAGGATATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTTTCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGGTCATGGCCCAAACACAACGTAACCAGAGGAGTAACGGCAGCATGCTCCCATAAGGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAAAATGGCTCGTACCCAACTCTGAGCAAGTCCTATGTGAACAATAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCAACCGTCT 19 | >A.Meguro.1.1956.AB043485_1956 20 | GTAAACCTACTCGAAGACAGCCACAATGGGAAATTATGCAGATTAAAAGGAAAAGCCCCACTACAATTGGGGAAATGTAACATTGCCGGATGGGTCTTAGGAAACCCAGAATGTGAATCATTGCTTTCCAATAGATCATGGTCCTACATTGCAGAAACACCAAACCCTGAGAATGGGACATGTTACCCAGGAGATTTCACCAACTATGAGGAACTGAAGGAGCAATTGAGCTCAGTGTCATCACTCGAGAGATTCGAAATATTCCCCAAGGAAAGATCATGGCCCAAACACAACACAACCAGAGGAGTGACGGCAGCATGCTCCCATGCGAGGAAAAGCAGTTTCTACAAAAATTTGGTCTGGCTGACGGAGGCAAATGGCTCATACCCAACTCTGAGCAATTCCTATGTGAACAATCAAGAGAAAGAAGTCCTTGTGCTATGGGGTGTTCATCACCCGTCT 21 | >A.Kojiya.1.1952.AB043482_1952 22 | GTAAACCTACTCGAAGACAGCCACAATGGGAAATTATGCAGATTAAAAGGAAAAGCCCCACTACAATTGGGGAAATGTAACATTGCCGGATGGGTCTTAGGAAACCCAGAATGCGAATCATTGCTTTCCAATAGATCATGGTCCTACATTGCAGAAACACCAAACTCTGAGAATGGGACATGTTACCCAGGAGATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGATCATGGCCCAAACACAACACAACCAGAGGAGTGACGGCAGCATGCTCCCATGCGGGGAAAAGCAGTTTTTACAAAAATTTGGTCTGGCTGACGGAGGCAAATGGCTCATACCCAAATCTGAGCAAGTCCTATGTGAACAATCAAGAGAAAGAAGTCCTTGTGCTATGGGGTGTTCATCACCCGTCT 23 | >A.Huston.43.AF494251_1943 24 | GTCAACCTACTCGAAGACAGCCACAACGGGAAATTATGTAGATTAAAAGGAATAGCCCCACTACAATTGAGGAAATGTAACATTGCTGGATGGATCCTGGGAAACCCAGAATGCGAATCACTGCTTTCAGAGAGATCATGGTCCTACATTGTTGAAACACCAAACTCTGAGAATGGAACATGTTACCCAGGAGATTTTACCAACTATGAGGAATTGAGGGAGCAATTGAGCTCTGTATCATCATTCGAAAGATTCGAAATATTCCCCAAGGAAAGCTCATGGCCCAAACACAACACAACCAGAGGAGTAACGGCAGCATGCTCCCATGCGGGAAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGGATGGCTCATATCCGAATCTGAACAATTCCTATGTGAACAAGAAAGGGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCC 25 | >A.BuenosAires.T114.97.AF534026_1997 26 | GTCAACCTACTTGAGGACAGTCACAACGGAAAACTATGCCGACTAAAAGGAACAGCCCCACTACAATTGGGTAATTGCAGCATTGCCGGATGGATCTTAGGAAATCCAGAATGCGAATCACTGTTTTCTAGGGAATCATGGTCCTACATTGCAGAAACACCAAACCCTGAAAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTATCATCATTCGAGAGATTTGAAATATTCCCCAAGGAAAGCTCATGGCCCAACCACACCGTAACCAAAGGAGTGACGGCATCATGCTCCCATAATGGGAAAAGCAGCTTTTACAAAAATTTGCTATGGCTGACGGAGAAGAATGGCTTGTACCCAAATCTGAGCAAGTCCTATGTAAACAACAAGGGGAAAGAAGTCCTTGTGCTATGGGGTGTTCATCACCCGTCT 27 | >A.PuertoRico.8.34.J02144_1934 28 | GTTAACCTGCTCGAAGACAGCCACAACGGAAAACTATGTAGATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATCGCCGGATGGCTCTTGGGAAACCCAGAATGCGACCCACTGCTTCCAGTGAGATCATGGTCCTACATTGTAGAAACACCAAACTCTGAGAATGGAATATGTTATCCAGGAGATTTCATCGACTATGAGGAGCTGAGGGAGCAATTGAGCTCAGTGTCATCATTCGAAAGATTCGAAATATTTCCCAAAGAAAGCTCATGGCCCAACCACAACACAAAC---GGAGTAACGGCAGCATGCTCCCATGAGGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGGAGGGCTCATACCCAAAGCTGAAAAATTCTTATGTGAACAAAAAAGGGAAAGAAGTCCTTGTACTGTGGGGTATTCATCACCCGCCT 29 | >A.PuertoRico.8.1934.J04572_1934 30 | GTTAACCTGCTCGAAGACAGCCACAACGGAAAACTATGTAGATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATCGCCGGATGGCTCTTGGGAAACCCAGAATGCGACCCACTGCTTCCAGTGAGATCATGGTCCTACATTGTAGAAACACCAAACTCTGAGAATGGAATATGTTATCCAGGAGATTTCATCGACTATGAGGAGCTGAGGGAGCAATTGAGCTCAGTGTCATCATTCGAAAGATTCGAAATATTTCCCAAAGAAAGCTCATGGCCCAACCACAACACAACCAAAGGAGTAACGGCAGCATGCTCCCATGCGGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGGAGGGCTCATACCCAAAGCTGAAAAATTCTTATGTGAACAAGAAAGGGAAAGAAGTCCTTGTACTGTGGGGTATTCATCACCCGTCT 31 | >A.PuertoRico.8.1934.EF467821_1934 32 | GTTAACCTGCTCGAAGACAGCCACAACGGAAAACTATGTAGATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATCGCCGGATGGCTCTTGGGAAACCCAGAATGCGACCCACTGCTTCCAGTGAGATCATGGTCCTACATTGTAGAAACACCAAACTCTGAGAATGGAATATGTTATCCAGGAGATTTCATCGACTATGAGGAGCTGAGGGAGCAATTGAGCTCAGTGTCATCATTCGAAAGATTCGAAATATTTCCCAAAGAAAGCTCATGGCCCAACCACAACACAAAC---GGAGTAACGGCAGCATGCTCCCATGAGGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGGAGGGCTCATACCCAAAGCTGAAAAATTCTTATGTGAACAAAAAAGGGAAAGAAGTCCTTGTACTGTGGGGTATTCATCACCCGCCT 33 | >A.Saga.2.1957.AB043486_1957 34 | GTAAACCTACTCGAAGACAGCCACAATGGGAAATTATGCAGATTAAAAGGAAAAGCCCCACTACAATTGGGGAACTGTAACATTGCCGGATGGGTCTTAGGAAACCCAGAATGTGAATCATTGCTTTCCGGTAGATCATGGTCCTACATTGCAGAAACACCAAACTCTGAGAATGGGACGTGCTACCCAGGGGATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTATCATCATTCGAGAGATTCGAAATATTCCCCAAGGAAAGATCATGGCCCAACCAC---ACAACCAGAGGAGTGACGGCAGCATGCCCCCATGCGAAGAAAAGCAGTTTTTACAAAAATTTGGTCTGGCTGACGGAGGCAAATGGCTCATACCCAAATCTGAGCAGGTCCTATGTGAACAATCAGGAGAAAGAAGTCCTTGTGCTATGGGGAGTTCATCACCCGTCT 35 | >A.Beijing.262.95.AY289928_1995 36 | GTCAACCTACTTGAGGACAGTCACAATGGAAAACTATGTCTACTAAAAGGAATAGCCCCACTACAATTGGGTAATTGCAGCGTTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATCACTGATTTCTAAGGAATCATGGTCCTACATTGTAGAGACACCAAACCCTGAGAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCATCATTTGAGAGATTCGAAATATTCCCCAAAGAAAGCTCATGGCCCAAACACACCGTAACA---GGAGTAACGGCATCATGCTCCCATAATGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGAATGGCTTGTACCCAAATCTGAGCAATTCCTATGTGAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCATCT 37 | >A.Alaska.1173.00.AY029287_2000 38 | GTCAACCTACTTGAGGACAGTCACAATGGAAAACTATGTCTACTAAAAGGAATAGCCCCACTACAATTGGGTAATTGCAGCGTTGCCGGATGGATCTTAGGAAACCCAGAATGCGAATTACTGATTTCCAAGGAATCATGGTCCTACATTGTAGAAACACCAAATCCTGAGAATGGAACATGTTACCCAGGGTATTTCGCCGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCTTCATTTGAGAGATTCGAAATATTCCCCAAAGAAAGCTCATGGCCCAACCACACCGTAACC---GGAGTATCAGCATCATGCTCCCATAATGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGGGAAGAATGGTTTGTACCCAAGCCTGAGCAAGTCCTATGCAAACAACAAAGAGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGCCT 39 | >A.WS.33.U08904_1933 40 | GTTAACCTGCTCGAAGACAGCCACAACGGGAAACTATGTAAATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATCGCCGGATGGCTCTTGGGAAACCCAGAATGCGACTCACTGCTTCCAGCGAAATCATGGTCCTACATTGTAGAAACACCAAACTCTGAGAATGGAGCATGTTATCCAGGAGATTTCATCGACTATGAGGAACTGAAGGAGCAATTGAGCTCAGTATCATCATTAGAAAGATTCGAAATATTTCCCAAGGAAAGTTCATGGCCCAACCACAACACACTCAAAGGAGTAACAGCAGCATGCTCCCATAGGGGAAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGAAAACGGGGGACTCATACCCAAAGCTGAACAATTCCTATGTGAACAATAAAGGGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT 41 | >A.WSN.1933.CY010788_1933 42 | GTTAACCTGCTCGAAGACAGACACAACGGGAAACTATGTAAATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATCACCGGATGGCTCTTGGGAAATCCAGAATGCGACTCACTGCTTCCAGCGAGATCATGGTCCTACATTGTAGAAACACCAAACTCTGAGAATGGAGCATGTTATCCAGGAGATTTCATCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTATCATCATTAGAAAGATTCGAAATATTTCCCAAGGAAAGTTCATGGCCCAACCACACATTCAAC---GGAGTAACAGTATCATGCTCCCATAGGGGAAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGAAGAAGGGGGATTCATACCCAAAGCTGACCAATTCCTATGTGAACAATAAAGGGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT 43 | >A.WilsonSmith.1933.DQ508905_1933 44 | GTTAACCTGCTCGAAGACAGACACAACGGGAAACTATGTAAATTAAAAGGAATAGCCCCACTACAATTGGGGAAATGTAACATCACCGGATGGCTCTTGGGAAATCCAGAATGCGACTCACTGCTTCCAGCGAGATCATGGTCCTACATTGTAGAAACACCAAACTCTGAGAATGGAGCATGTTATCCAGGAGATTTCATCGACTATGAGGAACTGAGGGAGCAATTGAGCTCAGTATCATCATTAGAAAGATTCGAAATATTTCCCAAGGAAAGTTCATGGCCCAACCACACATTCAAC---GGAGTAACAGCATCATGCTCCCATAGGGGAAAAAGCAGTTTTTACAGAAATTTGATATGGCTGACGAAGAAGGGGGATTCATACCCAAAGCTGACCAATTCCTATGTGAACAATAAAGGGAAAGAAGTCCTTGTACTATGGGGTGTTCATCACCCGTCT 45 | >A.goose.HongKong.8.1976.U46021_1976 46 | GTTAATTTACTCGAAAACAGCCATAATGGAAAACTCTGCAGCCTGAATGGAATAGCCCCTTTACAACTAGGGAAATGCAACGTGGCGGGGTGGCTCCTTGGCAACCCAGAATGTGACCTGCTGCTCACTGCGAGTTCATGGTCCTACATAATAGAGACTTCAAATTCAGAAAACGGAACATGCTACCCCGGAGAGTTCATTGATTATGAAGAGTTAAGGGAACAGCTAAGTTCAGTGTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAATCTCATGGCCAAATCATGAAACAACCAAAGGTGTCACAGCTGCATGCTCTTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTATGGATAACAAAGAAAGGAACTTCCTACCCTAAACTCAGCAAATCATACACGAACAACAAAGGGAAAGAAGTGCTTGTACTTTGGGGGGTGCATCATCCTCCA 47 | >A.duck.Australia.749.80.AF091312_1980 48 | GTTAATTTACTTGAAAACAGCCATAATGGAAAACTTTGCAGCCTGAATGGAATAGCCCCATTACAACTAGGGAAATGCAACGTGGCGGGGTGGCTCCTTGGCAACCTAGAATGTGACCTGTTGCTCACTGCGAATTCATGGTCTTATATAATAGAGACTTCAAATTCAGAAAACGGAACATGTTACCCCGGGGAGTTCATCGATTATGAGGAATTAAGAGAACAGCTAAGTTCAGTGTCTTCATTTGAGAAATTTGAAATTTTCCCGAAGGCAAGCTCATGGCCAAATCATGAGACAACCAAAGGTGTCACAGCTGCATGCTCTTACTTGGGAGCTAGCAGCTTTTATCGGAATTTGCTATGGATGACAAAGAAGGGAACTTCCTATCCTAAACTCAGCAAATCATATACGAACAACAAAGGGAAAGAAGTGCTTGTACTTTGGGGGGTGCATCACCCTCCC 49 | >A.mallard.Tennessee.11464.85.AF091311_1985 50 | GTGAATTTGCTCGAAGACAGCCATAATGGGAAACTCTGCAGTCTGAACGGGATAGCTCCCCTACAATTGGGGAAGTGCAATGTAGCGGGATGGCTCCTTGGCAATCCAGAGTGTGACCTTCTACTCACTGCAAACTCATGGTCCTACATAATAGAGACTTCCAATTCAGAAAACGGGACATGCTACCCCGGTGAATTCATAGATTATGAAGAATTAAGAGAGCAGCTAAGTTCAGTTTCTTCATTTGAAAGGTTTGAAATTTTCCCGAAGGCAAACTCATGGCCAAATCATGAGACAACTAAAGGTGTTACAGCCGCATGCTCTTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTGTGGATAACAAAGAAGGGAACGTCATATCCAAAACTCAGCAAATCATACACGAACAATAAAGGGAAAGAAGTACTCGTGCTCTGGGGAGTGCACCACCCTCCA 51 | >A.duck.HK.196.1977.D00839_1977 52 | GTTAATTTACTAGAAAACAGCCATAATGGAAAACTCTGCAGACTGAATGGAATAGCCCCCTTACAGCTAGGGAAATGCAACGTGGCAGGATGGATCCTTGGCAACCCAGAGTGTGATCTATTGCTCACAGCGAATTCATGGTCTTACATAATAGAGACTTCAAATTCAGAGAATGGAACATGCTACCCCGGAGAGTTCAATGATTATGAAGAATTAAGGGAACAGCTGAGTTCGGTGTCTTCATTTGAAAAGTTTGAAATTTTCCCAAAGGCTAGCTCATGGCCAAATCATGAGACAACTAAAGGTATTACAGCTGCATGTCCTTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTATGGATAACAAAGAAGGGAACTTCATACCCTAAACTCAGCAAATCATACACAAACAACAAAGGGAAAGAAGTGCTTGTAATCTGGGGAGTGCACCACCCTCCA 53 | >A.teal.Alberta.141.1992.CY004539_1992 54 | GTGAATTTGCTCGAAGACAGCCATAATGGGAAACTCTGCAGCCTGAACGGGATAGCTCCTCTACAATTGGGGAAGTGCAATGTAGCGGGGTGGCTCCTGGGCAACCCAGAATGTGACCTTCTACTCACTGCAAACTCATGGTCCTATATAATAGAGACTTCCAATTCAGAGAATGGGACATGCTATCCCGGTGAGTTCATAGATTATGAAGAATTAAGAGAGCAGCTGAGTTCAGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCGAAGGCAAGCTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCTTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTGTGGATAACAAAGAAGGGAACTTCATATCCAAAACTCAGCAAATCATACACAAACAATAAGGGGAAGGAAGTGCTCGTGCTCTGGGGAGTGCACCACCCTCCA 55 | >A.pintail.Ohio.25.1999.CY017725_1999 56 | GTGAATTTGCTCGAAAACAGCCATAATGGGAAACTCTGCAGCCTGAATGGGATAGCTCCTTTACAATTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTTTACTCACTGCAAACTCATGGTCCTATATAATAGAGACTTCCAATTCAGAGAACGGAACATGCTATCCCGGTGAGTTCATAGATTATGAGGAATTAAGAGAGCAATTGAGTTCAGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCCTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTGTGGCTAACAAAGAAGGGAACTTCATATCCAAAGCTCAGCAAATCATACACGAACAATAAGGGAAAAGAAGTGCTTGTACTCTGGGGAGTGCACCATCCTCCG 57 | >A.Teal.Ohio.72.1999.CY017717_1999 58 | GTGAATTTGCTCGAAAACAGCCATAATGGGAAACTCTGCAGCCTGAATGGGATAGCTCCTTTACAATTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTTTACTCACTGCAAACTCATGGTCCTATATAATAGAGACTTCCAATTCAGAGAACGGAACATGCTATCCCGGTGAGTTCATAGATTATGAGGAATTAAGAGAGCAATTGAGTTCAGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCCTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTGTGGCTAACAAAGAAGGGAACTTCATATCCAAAGCTCAGCAAATCATACACGAACAATAAGGGAAAAGAAGTGCTTGTACTCTGGGGAGTGCACCATCCTCCG 59 | >A.mallard.Ohio.66.1999.CY016955_1999 60 | GTGAATTTGCTCGAAAACAGCCATAATGGGAAACTCTGCAGCCTGAATGGGATAGCTCCTTTACAATTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTTTACTCACTGCAAACTCATGGTCCTATATAATAGAGACTTCCAATTCAGAGAACGGAACATGCTATCCCGGTGAGTTCATAGATTATGAGGAATTAAGAGAGCAATTGAGTTCAGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCCTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTGTGGCTAACAAAGAAGGGAACTTCATATCCAAAGCTCAGCAAATCATACACGAACAATAAGGGAAAAGAAGTGCTTGTACTCTGGGGAGTGCACCATCCTCCG 61 | >A.mallard.Ohio.56.1999.CY012824_1999 62 | GTGAATTTGCTCGAAAACAGCCATAATGGGAAACTCTGCAGCCTGAATGGGATAGCTCCTTTACAATTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTTTACTCACTGCAAACTCATGGTCCTATATAATAGAGACTTCCAATTCAGAGAACGGAACATGCTATCCCGGTGAGTTCATAGATTATGAGGAATTAAGAGAGCAATTGAGTTCAGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCCTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTGTGGCTAACAAAGAAGGGAACTTCATATCCAAAGCTCAGCAAATCATACACGAACAATAAGGGAAAAGAAGTGCTTGTACTCTGGGGAGTGCACCATCCTCCG 63 | >A.mallard.ALB.267.1996.CY004504_1996 64 | GTGAATTTGCTCGAAGACAGCCATAATGGGAAACTCTGCAGCCTGAATGGGATAGCTCCTTTACAACTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTCTACTCACTGCAAACTCATGGTCCTATATAATAGAGACTTCCAATTCAGAGAACGGGACATGCTATCCCGGTGAATTCATAGATTATGAGGAATTAAGAGAGCAATTGAGTTCAGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCTTACTCTGGAGCCAGCAGTTTTTATCGGAATTTGCTGTGGATAACAAAGAAGGGAACTTCATATCCAAAACTCAGCAAATCATACACGAACAATAAGGGAAAAGAAGTGCTTGTGCTCTGGGGAGTGCACCATCCTCCG 65 | >A.duck.NJ.771770.1995.EU026110_1995 66 | GTGAATTTGCTCGAAGACAGCCATAATGGGAAACTCTGCAGCCTGAATGGGATAGCTCCTTTACAATTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTCTACTCACTGCAAACTCATGGTCCTATATAATAGAGACTTCCAATTCAGAGAACGGAACATGCTATCCCGGTGAATTCATAGATTATGAGGAATTAAGAGAGCAATTGAGTTCAGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCTTACTCTGGAGCAAGCAGTTTTTATCGGAATTTGCTGTGGATAACAAAGAAGGGAACTTCATATCCAAAACTCAGCAAATCATACACGAACAATAAGGGAAAAGAAGTGCTTGTGCTCTGGGGAGTGCACCATCCTCCG 67 | >A.mallard.Alberta.211.1998.AY633212_1998 68 | GTGAATTTGCTCGAAGACAGCCATAATGGGAAACTCTGCAGCCTGAATGGGATAGCTCCTTTACAATTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTCTACTCACTGCAAACTCATGGTCCTATATAATAGAGACATCCAATTCAGAGAACGGGACATGCTATCCCGGTGAGTTCATAGATTATGAGGAATTAAGGGAGCAATTGAGTTCGGTTTCTTCATTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCTTACTCTGGGGCCAGCAGTTTTTATCGAAATTTGCTGTGGATAATAAAGAAGGGAACTTCATATCCAAAACTCAGCAAGTCATACACGAACAATAAGGGAAAAGAAGTGCTTGTGCTCTGGGGAGTGCACCATCCTCCG 69 | >A.mallard.MD.403.2002.EU026082_2002 70 | GTGAATTTGCTCGAAGACCGCCATAATGGGAAGCTCTGCAGCCTGAATGGGATAGCTCCTTTACAATTGGGGAAGTGTAATGTAGCGGGATGGCTCCTGGGCAACCCAGAATGTGACCTTCTACTCACTGCAAACTCATGGTCCTATATAATAGAGACGTCCAATTCAGAGAACGGGACATGCTATCCCGGTGAGTTCATAGATTATGAGGAATTAAGGGAGCAATTGAGTTCAGTTTCTTCTTTTGAAAAGTTTGAAATTTTCCCCAAGGCAAACTCATGGCCAAACCATGAGACAACTAAAGGTGTTACAGCTGCCTGCTCCTACTCTGGGGCCAGCAGTTTTTATCGAAATTTGCTGTGGATAGTAAAGAAGGGAACTTCATACCCGAAACTCAGCAAGTCATACACGAACAATAAGGGAAAAGAAGTGCTTGTGCTCTGGGGAGTGCACCATCCTCCG 71 | >A.swine.29.37.U04857_1937 72 | GTTAACCTGCTTGAAGACAGCCACAACGGAAAACTATGTAGACTAGGGGGAATAGCCCCATTGCAACTGGGTAAATGTAATATTGCCGGATGCGTCTTGGGAAACCCAGAATGCGATTTGCTGCTCACAGTGAACTCATGGTCCTATATTGTAGAAACATCGAACTCAGATAATGGGACATGTTACCCAGGAGATTTCATTGACTATGAAGAACTGAGAGAGCAATTGAGCTCAGTGTCATCATTCGAAAGATTCGAGATATTTCCCAAAACAAGTTCGTGGCCCAATCATGAAACAACCAGAGGTGTAACGGCAGCATGCCCCTATGCTGGAGCAAGCAGCTTTTACAGAAATTTACTATGGCTGGTGAAGAAGGGAGATTCATATCCAAAGCTCAGCAAATCCTATGTTAACAATAAAGGGAAAGAAGTCCTTGTGCTATGGGGGGTTCACCATCCNCCT 73 | >A.swine.Ehime.1.80.X57494_1980 74 | GTTAACCTTCTTGAAGACAGACATAACGGGAAACTATGTAAACTAGGGGGGATAGCCCCATTGCATCTGGGTAAATGTAACATTGCCGGATGGCTTTTGGGAAATCCAGAATGTGAATTACTATTCACAGTAAGCTCATGGTCTTACATTGTGGAAACATCGAACTCAGACAATGGGACATGTTACCCAGGGGATTTCATCAATTATGAAGAGCTGAGAGAGCAGTTGAGCTCAGTGTCATCATTTGAAAGATTCGAGATATTCCCCAAGACAAGTTCGTGGCCCAATCATGAAACAAACAGAGGTGTGACGGCAGCATGCCCTTATGCTGGAGCAAACAGTTTCTACAGAAATTTAATATGGCTTGTGAAAAAAGAAAACTCATATCCAAAGCTCCGCAAATCCTATGTTAACAATAAGGGGAAGGAAGTCCTTGTGCTATGGGGCATTCACCATCCACCT 75 | >A.swine.Illinois.63.X57493_1963 76 | GTTAATCTTCTTGAAGACAGACACAACGGGAAACTATGTAAACTAGGGGGAATAGCCCCATTGCACCTAGGTAAATGTAACATTGCCGGATGGCTTTTGGGAAACCCAGAATGTGAATTACTGCTCACAGTAAGCTCATGGTCTTATATTGTGGAAACATCGAACTCAGACAATGGGACATGTTACCCAGGAGATTTCATCAATTATGAAGAGCTGAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGATTCGAGATATTTCCCAAGATAAGTTCGTGGCCCAATCATGAAACAAACAGAGGTGTAACGGCAGCATGCCCTTATGCTGGAGCAAACAGCTTTTTCAGAAATTTAATATGGCTGGTGAAGAAGGAGAGTTCATACCCAAAGCTCAGCAAATCCTATGTTAACAATAAAGGGAAGGAAGTCCTTGTGCTATGGGGTATTCACCATCCGCCT 77 | >A.swine.NewJersey.11.76.K00992_1976 78 | GTTAATCTTCTTGAAGACAGACATAACGGGAAACTATGTAAACTGGGGGGGATAGCCCCATTGCACTTGGGTAAATGTAACATTGCCGGTAGGCTTTTGGGAAACCCAGAATGTGAATTACTACTCACAGTAAGCTCATGGTCTTACATTGTGGAAACATCGAAATCAGACAATGGGACATGTTACCCAGGAGATTTCATCAATTATGAAGAGCTGAGAGAGCAGTTGAGCTCAGTGTCATCATTTGAAAGATTCGAGATATTCCCCAAGACAAGTTCGTGGCCCAATCATGAAACAAACAGAGGTGTGACGGCAGCATGCCCTTATGCTGGAGCAAACAGCTTCTACAGAAATTTAATATGGCTGGTGAAAAAAGAAAATTCATACCCAAAGCTCAGCAAATCCTATGTTAACAATAAAGGGAAGGAAGTCCTTGTGCTATGGGGCATTCATCATCCACCT 79 | >A.swine.Wisconsin.1.61.AF091307_1961 80 | GTTAATCTGCTTGAAGACAGACACAACGGGAAACTATGTAAACTAGGGGGAATAGCCCCATTGCACCTAGGTAAATGTAACATTGCCGGATGGCTTTTGGGAAACCCAGAATGTGAATTACTGCTCACAGTAAGCTCATGGTCTTATATTGTGGAAACATCGAACTCAGACAATGGGACATGTTACCCAGGGGATTTCATCAATTATGAAGAGCTGAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGATTCGAGATATTTCCCAAGACAAGTTCGTGGCCCAATCATGAAACAAACAGAGGTGTAACGGCAGCATGCCCTTATGCTGGAGCAAACAGCTTTTACAGAAATTTAATATGGCTGGTGAAGAAGGAGAGTTCATACCCAAAGCTCAGCAAATCCTATGTTAACAATAAAGGGAAGGAAGTCCTTGTGCTATGGGGTATTCACCATCCGCCT 81 | >A.swine.Thailand.271.2005.EF101749_2005 82 | GTTAACCTTCTAGAAGACAGGCACAATGGGAAGCTATGTAACCTAAGGGGGGAAGCCCCACTGCATTTGGGTAAATGTAACATTGCCGGATGGCTCCTAGGAAACCCAGAATGCGAATTACTATTTGCAGTAAACTCATGGTCTTACATTGTGGAAACATCGAACTCAGACAATGGGACATGTTACCCAGGAGATTTCACCAGTTATGAAGAGCTAAGAGAACAATTGAGCTCAGTGTCATCATTTGAAAGATTCGAGATATTCCCCAAAGCAAGCTCTTGGCCCAACCATGAAACAAACAGAGGTGTAACGGCAGCATGCCCTTATGCTGGAACAAACAGCTTCTACAGGAATTTGATATGGCTAGTAAAAAAGGGAAACTCATATCCAAAGCTCAGTAAATCCTATGTTAATAATAAGAAGAAGGAAGTCCTTGTACTATGGGGCATCCACCATCCACCC 83 | >A.swine.HongKong.273.1994.U45452_1994 84 | GTTAACCTTCTAGAAGACAGACATAACGGGAAACTATGTAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGATGGCTCCTGGGAAATCCAGAGTGTGAATTACTATTCACAGCAAGCTCATGGTCTTACATTGTGGAAACATCTAATTCAGACAATGGGACATGTTACCCAGGAGATTTCATCAATTATGAAGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGATTTGAGATGTTCCCCAAGTCAAGTTCATGGCCCAATCATGAAACGAACAGAGGTGTGACGGCAGCATGTCCTTATGCTGGAGCAAACAGCTTCTACAGAAATTTAATATGGCTTGTAAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTATATTAACAATAAGGAGAAAGAAGTCCTCGTGCTATGGGGAATTCACCATCCACCT 85 | >A.swine.Iowa.15.1930.U47305_1930 86 | GTTAACCTGCTTGAAGACAGCCACAACGGAAAACTATGTAGACTAGGGGGAATAGCCCCATTACAACTGGGGAAATGTAATATTGCCGGATGGATCTTGGGAAACCCAGAATGCGATTTGCTGCTCACAGTGAGCTCATGGTCCTATATTGTAGAAACATCGAACTCAGATAATGGGACATGTTACCCAGGAGATTTCATCGACTATGAAGAACTGAGAGAGCAACTAAGCTCAGTGTCATCATTCGAAAAATTCGAGATATTTCCCAAGACAAGCTCGTGGCCCAATCATGAAACAACCAGAGGTGTAACGGCAGCATGCCCCTATGCTGGAGCAAGCAGCTTCTACAGAAATTTACTATGGCTGGTAAAGAAGGAAAATTCATACCCAAAGCTTAGCAAATCCTATGTTAACAATAAAGGGAAAGAAGTCCTTGTGCTATGGGGTGTTCATCATCCGCCT 87 | >A.swine.Iowa.15.1930.X57492_1930 88 | GTTAACCTGCTTGAAGACAGCCACAACGGAAAACTATGTAGACTGGGAGGAATAGCCCCATTACAACTGGGGAAATGTAATATTGCCGGATGGCTCTTGGGAAACCCAGAATGCGATTTGCTGCTCACAGTGAGCTCATGGTCCTATATTGTAGAAACACGGACCTCAGATAATGGGACATGTTACCCAGGAGATTTCATCGACTATGAAGAACTGAGAGAGCAACTGAACTCAATGTCATCATTCGAAAAATTCGAGATATTTCCCAAGACAAGCTCGTGGCCCAATCATGAAACAACCAAAGGTGTAACGGCAGCATGCCCCTATGCTGGAGCAAGCAGCTTTTACAGAAACTTACTATGGCTGGTAAAGAAGGAAAATTCATACCCAAAGCTTAGCAAATCCTATGTTAACAATAAAGGAAAAGAAGTCCTTGTGCTATGGGGTGTTCATCATCCGCCT 89 | >A.swine.Iowa.15.1930.EU139823_1930 90 | GTTAACCTGCTTGAAGACAGCCACAACGGAAAACTATGTAGACTAGGGGGAATAGCCCCATTACAACTGGGGAAATGTAATATTGCCGGATGGCTCTTGGGAAACCCAGAATGCGATTTGCTGCTCACAGTGAGCTCATGGTCCTATATTGTAGAAACATCGAACTCAGATAATGGGACATGTTACCCAGGAGATTTCATCGACTATGAAGAACTGAGAGAGCAACTAAGCTCAGTGTCATCATTCGAAAAATTCGAGATATTTCCCAAGACAAGCTCGTGGCCCAATCATGAAACAACCAGAGGTGTAACGGCAGCATGCCCCTATGCTGGAGCAAGCAGCTTTTACAGAAATTTACTATGGCTGGTAAAGAAGGAAAATTCATACCCAAAGCTTAGCAAATCCTATGTTAACAATAAAGGGAAAGAAGTCCTTGTGCTATGGGGTGTTCATCATCCGCCT 91 | -------------------------------------------------------------------------------- /examples/data/ice_viruses_cleaned.fasta.treefile: -------------------------------------------------------------------------------- 1 | (A.Fiji.15899.83.AJ289702_1983:0.0225066451,(A.Kiev.59.1979.M38353_1979:0.0043867334,((((A.FortMonmouth.1.1947.U02085_1947:0.0060469005,((A.Meguro.1.1956.AB043485_1956:0.0188000079,A.Saga.2.1957.AB043486_1957:0.0254876478):0.0035208056,A.Kojiya.1.1952.AB043482_1952:0.0000010000):0.0274963883):0.0111458950,((((A.BrevigMission.1.18.AF116575_1918:0.0058389439,(((A.goose.HongKong.8.1976.U46021_1976:0.0170135328,((A.mallard.Tennessee.11464.85.AF091311_1985:0.0216301767,(A.teal.Alberta.141.1992.CY004539_1992:0.0084453155,(((((A.pintail.Ohio.25.1999.CY017725_1999:0.0000000000,A.mallard.Ohio.56.1999.CY012824_1999:0.0000000000):0.0000000000,A.mallard.Ohio.66.1999.CY016955_1999:0.0000000000):0.0000010000,A.Teal.Ohio.72.1999.CY017717_1999:0.0000010000):0.0139369554,(A.mallard.ALB.267.1996.CY004504_1996:0.0019874608,A.duck.NJ.771770.1995.EU026110_1995:0.0039298726):0.0019473678):0.0000020212,(A.mallard.Alberta.211.1998.AY633212_1998:0.0020549333,A.mallard.MD.403.2002.EU026082_2002:0.0160201914):0.0123046887):0.0249392287):0.0167112382):0.0797578005,A.duck.HK.196.1977.D00839_1977:0.0540106401):0.0141688170):0.0296933079,A.duck.Australia.749.80.AF091312_1980:0.0186920171):0.3016056265,(((A.swine.29.37.U04857_1937:0.0198096674,((((A.swine.Ehime.1.80.X57494_1980:0.0100776239,(A.swine.Thailand.271.2005.EF101749_2005:0.0812944476,A.swine.HongKong.273.1994.U45452_1994:0.0382951989):0.0238395020):0.0134469971,A.swine.NewJersey.11.76.K00992_1976:0.0120608173):0.0326587861,A.swine.Illinois.63.X57493_1963:0.0041348454):0.0023593120,A.swine.Wisconsin.1.61.AF091307_1961:0.0014986072):0.0619865326):0.0163570435,(A.swine.Iowa.15.1930.U47305_1930:0.0039530040,A.swine.Iowa.15.1930.EU139823_1930:0.0000010000):0.0019932448):0.0016388727,A.swine.Iowa.15.1930.X57492_1930:0.0189049204):0.0569543689):0.0029119936):0.0833447630,(A.WS.33.U08904_1933:0.0113878430,(A.WSN.1933.CY010788_1933:0.0019762617,A.WilsonSmith.1933.DQ508905_1933:0.0020001487):0.0257285722):0.0200884663):0.0045915750,((A.PuertoRico.8.34.J02144_1934:0.0000010000,A.PuertoRico.8.1934.EF467821_1934:0.0000010000):0.0079816073,A.PuertoRico.8.1934.J04572_1934:0.0000020465):0.0253069847):0.0338418911,A.Huston.43.AF494251_1943:0.0272099316):0.0311305152):0.0302688369,A.Lepine.1948.AB043479_1948:0.0082842905):0.0081599174,A.USSR.90.1977.K01331_1977:0.0060504852):0.0035758745):0.0061509031,((A.SouthCarolina.6.1988.L19025_1988:0.0058647462,(A.Yamagata.32.1989.D31949_1989:0.0019840935,(A.BuenosAires.T114.97.AF534026_1997:0.0313954883,(A.Beijing.262.95.AY289928_1995:0.0104492633,A.Alaska.1173.00.AY029287_2000:0.0273554037):0.0129145023):0.0036475476):0.0041044694):0.0185559133,A.Finland.4.1984.L33491_1984:0.0020695783):0.0025283110); 2 | -------------------------------------------------------------------------------- /examples/data/invalid.fasta: -------------------------------------------------------------------------------- 1 | >sequence_1-pass 2 | AGTAGATCCCCGATAGCGAGCTAGCGATCGCAGCGACTCAGCAGCTACAGCGCAGAGGAGAGAGAGGCCCCTATTTACTAGAGCTCCAGATATAGNNNNN 3 | >sequence_2-fail 4 | AGTAGATCCCCGATAGCGAGCTAGXGATCGCAGCGACTCAGCAGCTACAGCGCAGAGGAGAGAGAGGCCCCTATTTACTAGAGCTCCAGATATAGNNNNN 5 | >sequence_3-pass 6 | AGTAGATCCCCGATAGCGAGCTAGCGATNNNNNNNNNNNNNNNNNTACAGCGCAGAGGAGAGAGAGGCCCCTATTTACTAGAGCTCCAGATATAGNNNNN 7 | -------------------------------------------------------------------------------- /examples/example.py: -------------------------------------------------------------------------------- 1 | from phytest import Alignment, Sequence, Tree 2 | 3 | 4 | def test_alignment_has_4_sequences(alignment: Alignment): 5 | alignment.assert_length(4) 6 | 7 | 8 | def test_alignment_has_a_width_of_100(alignment: Alignment): 9 | alignment.assert_width(100) 10 | 11 | 12 | def test_sequences_only_contains_the_characters(sequence: Sequence): 13 | sequence.assert_valid_alphabet(alphabet="ATGCN-") 14 | 15 | 16 | def test_single_base_deletions(sequence: Sequence): 17 | sequence.assert_longest_stretch_gaps(max=1) 18 | 19 | 20 | def test_longest_stretch_of_Ns_is_10(sequence: Sequence): 21 | sequence.assert_longest_stretch_Ns(max=10) 22 | 23 | 24 | def test_tree_has_4_tips(tree: Tree): 25 | tree.assert_number_of_tips(4) 26 | 27 | 28 | def test_tree_is_bifurcating(tree: Tree): 29 | tree.assert_is_bifurcating() 30 | 31 | 32 | def test_aln_tree_match_names(alignment: Alignment, tree: Tree): 33 | aln_names = [i.name for i in alignment] 34 | tree.assert_tip_names(aln_names) 35 | 36 | 37 | def test_any_internal_branch_lengths_above_threshold(tree: Tree, threshold=1e-4): 38 | tree.assert_internal_branch_lengths(min=threshold) 39 | 40 | 41 | def test_outlier_branches(tree: Tree): 42 | # Here we create a custom function to detect outliers 43 | import statistics 44 | 45 | tips = tree.get_terminals() 46 | branch_lengths = [t.branch_length for t in tips] 47 | cut_off = statistics.mean(branch_lengths) + statistics.stdev(branch_lengths) 48 | for tip in tips: 49 | assert tip.branch_length < cut_off, f"Outlier tip '{tip.name}' (branch length = {tip.branch_length})!" 50 | -------------------------------------------------------------------------------- /examples/self_contained.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import phytest 4 | 5 | 6 | def test_length(sequence: phytest.Sequence): 7 | sequence.assert_length(length=462) 8 | 9 | 10 | if __name__ == "__main__": 11 | sys.exit(phytest.main(sequence='examples/data/ice_viruses.fasta')) 12 | -------------------------------------------------------------------------------- /mkdocs.sh: -------------------------------------------------------------------------------- 1 | poetry run sphinx-build -b html docs docs/_build/html -E -a 2 | -------------------------------------------------------------------------------- /phytest/__init__.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | # from py.xml import html 5 | 6 | from .bio import Alignment, Data, Sequence, Tree 7 | from .main import main as main 8 | 9 | 10 | def pytest_addoption(parser): 11 | parser.addoption("--sequence", "-S", action="store", default=None, help="sequence file") 12 | parser.addoption("--sequence-format", action="store", default='fasta', help="sequence file format") 13 | parser.addoption("--tree", "-T", action="store", default=None, help="tree file") 14 | parser.addoption("--tree-format", action="store", default='newick', help="tree file format") 15 | parser.addoption("--data", "-D", action="store", default=None, help="data file") 16 | parser.addoption("--data-format", action="store", default='csv', help="data file format") 17 | parser.addoption( 18 | "--apply-fixes", action="store_true", default=None, help="automatically apply fixes where possible" 19 | ) 20 | 21 | 22 | def pytest_generate_tests(metafunc): 23 | sequence_path = metafunc.config.getoption("sequence") 24 | if 'alignment' in metafunc.fixturenames: 25 | if sequence_path is None: 26 | raise ValueError(f"{metafunc.function.__name__} requires an alignment file") 27 | fpth = Path(sequence_path) 28 | if not fpth.exists(): 29 | raise FileNotFoundError(f"Unable to locate requested alignment file ({fpth})! 😱") 30 | tree_path = metafunc.config.getoption("tree") 31 | if 'tree' in metafunc.fixturenames: 32 | if tree_path is None: 33 | raise ValueError(f"{metafunc.function.__name__} requires a tree file") 34 | fpth = Path(tree_path) 35 | if not fpth.exists(): 36 | raise FileNotFoundError(f"Unable to locate requested tree file ({fpth})! 😱") 37 | tree_format = metafunc.config.getoption("--tree-format") 38 | trees = Tree.parse(tree_path, tree_format) 39 | metafunc.parametrize("tree", trees, ids=lambda t: t.name) 40 | data_path = metafunc.config.getoption("data") 41 | if 'data' in metafunc.fixturenames: 42 | if data_path is None: 43 | raise ValueError(f"{metafunc.function.__name__} requires a data file") 44 | fpth = Path(data_path) 45 | if not fpth.exists(): 46 | raise FileNotFoundError(f"Unable to locate requested data file ({fpth})! 😱") 47 | if "sequence" in metafunc.fixturenames: 48 | if sequence_path is None: 49 | raise ValueError(f"{metafunc.function.__name__} requires a sequence file") 50 | fpth = Path(sequence_path) 51 | if not fpth.exists(): 52 | raise FileNotFoundError(f"Unable to locate requested sequence file ({fpth})! 😱") 53 | alignment_format = metafunc.config.getoption("--sequence-format") 54 | sequences = Sequence.parse(sequence_path, alignment_format) 55 | metafunc.parametrize("sequence", sequences, ids=lambda s: s.id) 56 | 57 | 58 | @pytest.fixture(scope="session", name="alignment") 59 | def _alignment_fixture(request): 60 | alignment_path = request.config.getoption("sequence") 61 | alignment_format = request.config.getoption("--sequence-format") 62 | alignment = Alignment.read(alignment_path, alignment_format) 63 | return alignment 64 | 65 | 66 | @pytest.fixture(scope="session", name="data") 67 | def _data_fixture(request): 68 | data_path = request.config.getoption("data") 69 | data_format = request.config.getoption("--data-format") 70 | data = Data.read(data_path, data_format) 71 | return data 72 | 73 | 74 | def pytest_html_report_title(report): 75 | report.title = "report" 76 | -------------------------------------------------------------------------------- /phytest/bio/__init__.py: -------------------------------------------------------------------------------- 1 | from .alignment import Alignment 2 | from .data import Data 3 | from .sequence import Sequence 4 | from .tree import Tree 5 | -------------------------------------------------------------------------------- /phytest/bio/alignment.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | from warnings import warn 3 | 4 | from Bio import AlignIO 5 | from Bio.Align import MultipleSeqAlignment 6 | 7 | from ..utils import PhytestObject, assert_or_warn 8 | 9 | 10 | class Alignment(PhytestObject, MultipleSeqAlignment): 11 | @classmethod 12 | def read(cls, alignment_path, alignment_format) -> 'Alignment': 13 | alignment = AlignIO.read(alignment_path, alignment_format) 14 | return Alignment( 15 | alignment._records, annotations=alignment.annotations, column_annotations=alignment.column_annotations 16 | ) 17 | 18 | def assert_width( 19 | self, 20 | width: Optional[int] = None, 21 | *, 22 | min: Optional[int] = None, 23 | max: Optional[int] = None, 24 | warning: bool = False, 25 | ) -> None: 26 | """ 27 | Asserts that the alignment width (the number of bases in the sequences) meets the specified criteria. 28 | 29 | Args: 30 | length (int, optional): If set, then alignment width must be equal to this value. Defaults to None. 31 | min (int, optional): If set, then alignment width must be equal to or greater than this value. Defaults to None. 32 | max (int, optional): If set, then alignment width must be equal to or less than this value. Defaults to None. 33 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 34 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 35 | """ 36 | alignment_width = self.get_alignment_length() 37 | summary = f"The width of the alignment is {alignment_width}." 38 | 39 | if width is not None: 40 | assert_or_warn( 41 | alignment_width == width, 42 | warning, 43 | summary, 44 | f"This is not equal to the required width of {width}.", 45 | ) 46 | if min is not None: 47 | assert_or_warn( 48 | alignment_width >= min, 49 | warning, 50 | summary, 51 | f"This is less than the minimum width of {min}.", 52 | ) 53 | if max is not None: 54 | assert_or_warn( 55 | alignment_width <= max, 56 | warning, 57 | summary, 58 | f"This is greater than the maximum width of {max}.", 59 | ) 60 | 61 | def assert_length( 62 | self, 63 | length: Optional[int] = None, 64 | *, 65 | min: Optional[int] = None, 66 | max: Optional[int] = None, 67 | warning: bool = False, 68 | ) -> None: 69 | """ 70 | Asserts that the alignment length (the number of sequences in the alignment) meets the specified criteria. 71 | 72 | Args: 73 | length (int, optional): If set, then alignment length must be equal to this value. Defaults to None. 74 | min (int, optional): If set, then alignment length must be equal to or greater than this value. Defaults to None. 75 | max (int, optional): If set, then alignment length must be equal to or less than this value. Defaults to None. 76 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 77 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 78 | """ 79 | alignment_length = len(self) 80 | summary = f"The number of sequences in the alignment is {alignment_length}." 81 | 82 | if length is not None: 83 | assert_or_warn( 84 | alignment_length == length, 85 | warning, 86 | summary, 87 | f"This is less than required number of {length}.", 88 | ) 89 | if min is not None: 90 | assert_or_warn( 91 | alignment_length >= min, 92 | warning, 93 | summary, 94 | f"This is less than the minimum {min}.", 95 | ) 96 | if max is not None: 97 | assert_or_warn( 98 | alignment_length <= max, 99 | warning, 100 | summary, 101 | f"This is greater than the maximum {max}.", 102 | ) 103 | -------------------------------------------------------------------------------- /phytest/bio/data.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import List, Union 3 | 4 | import pandas as pd 5 | from pandas import DataFrame 6 | 7 | from ..utils import PhytestObject, assert_or_warn 8 | 9 | 10 | class Data(PhytestObject, DataFrame): 11 | @classmethod 12 | def read(cls, data_path, data_format) -> 'Data': 13 | allowed_formats = ['csv', 'tsv', 'excel'] 14 | if data_format not in allowed_formats: 15 | raise ValueError(f'Data format must be one of {", ".join(allowed_formats)}.') 16 | if data_format == 'csv': 17 | df = pd.read_csv(data_path) 18 | elif data_format == 'tsv': 19 | df = pd.read_csv(data_path, sep='\t') 20 | elif data_format == 'excel': 21 | df = pd.read_excel(data_path, engine='openpyxl') 22 | return Data(df) 23 | 24 | def assert_contains( 25 | self, 26 | column: str, 27 | value: str, 28 | *, 29 | warning: bool = False, 30 | ) -> None: 31 | """ 32 | Asserts that specified column contains the specified value. 33 | 34 | Args: 35 | column (str, required): The column to check. 36 | value (str, required): the value to look for. 37 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 38 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 39 | """ 40 | column_values = self[column].values 41 | summary = f"The values of column '{column}' are '{column_values}'." 42 | assert_or_warn( 43 | value in column_values, 44 | warning, 45 | summary, 46 | f"The column '{column}' does not contain '{value}'.", 47 | ) 48 | 49 | def assert_match( 50 | self, 51 | column: str, 52 | pattern: str, 53 | *, 54 | warning: bool = False, 55 | ) -> None: 56 | """ 57 | Asserts that all values of the specified column match the specified pattern. 58 | 59 | Args: 60 | column (str, required): The column to check. 61 | pattern (str, required): The pattern to match. 62 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 63 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 64 | """ 65 | column_values = self[column].values 66 | summary = f"The values of column '{column}' are '{column_values}'." 67 | not_matched = self[~self[column].str.contains(re.compile(pattern))].index.values 68 | assert_or_warn( 69 | len(not_matched) == 0, 70 | warning, 71 | summary, 72 | f"The row(s) '{not_matched}' of the column '{column}' do not match the pattern '{pattern}'.", 73 | ) 74 | 75 | def assert_columns( 76 | self, 77 | allowed_columns: List[str], 78 | *, 79 | exact: bool = False, 80 | warning: bool = False, 81 | ) -> None: 82 | """ 83 | Asserts that the specified column(s) are in the DataFrame. 84 | 85 | Args: 86 | allowed_columns (List[str], required): The list of allowed columns. 87 | exact (bool): If True, the list of allowed columns must be exactly the same as the list of columns in the DataFrame. 88 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 89 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 90 | """ 91 | columns = self.columns.values 92 | summary = f"The names of the columns are '{columns}'." 93 | if exact: 94 | not_allowed = list(set(allowed_columns).symmetric_difference(set(columns))) 95 | message = f"The column names do not exactly match the list of allowed columns '{allowed_columns}'." 96 | else: 97 | not_allowed = [column for column in columns if column not in allowed_columns] 98 | message = f"The columns '{not_allowed}' are not in the list of allowed columns '{allowed_columns}'." 99 | assert_or_warn(len(not_allowed) == 0, warning, summary, message) 100 | 101 | def assert_values( 102 | self, 103 | column: str, 104 | values: list, 105 | *, 106 | allow_nan: bool = False, 107 | exact: bool = False, 108 | warning: bool = False, 109 | ) -> None: 110 | """ 111 | Asserts that all values of the specified column are in the specified list of allowed values. 112 | 113 | Args: 114 | column (str, required): The column to check. 115 | values (list, required): The list of allowed values. 116 | allow_nan (bool): If True, allow NaN values. 117 | exact (bool): If True, the list of allowed values must be exactly the same as the list of values in the DataFrame. 118 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 119 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 120 | """ 121 | 122 | column_values = self[column].values 123 | summary = f"The values of column '{column}' are '{column_values}'." 124 | if allow_nan: 125 | values.append(float('nan')) 126 | if exact: 127 | not_allowed = list(set(values).symmetric_difference(set(column_values))) 128 | message = f"The values column '{column}' do not exactly match the allowed values '{values}'" 129 | else: 130 | not_allowed = self[~self[column].isin(values)].index.values 131 | message = ( 132 | f"The row(s) '{not_allowed}' of the column '{column}' are not in the list of allowed values '{values}'." 133 | ) 134 | assert_or_warn(len(not_allowed) == 0, warning, summary, message) 135 | 136 | def assert_range( 137 | self, 138 | column: str, 139 | *, 140 | min: Union[int, float] = None, 141 | max: Union[int, float] = None, 142 | warning: bool = False, 143 | ) -> None: 144 | """ 145 | Asserts that all values of the specified column are in the specified range. 146 | 147 | Args: 148 | column (str, required): The column to check. 149 | min (Union[int, float]): The minimum value of the range. 150 | max (Union[int, float]): The maximum value of the range. 151 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 152 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 153 | """ 154 | column_values = self[column].values 155 | summary = f"The values of column '{column}' are '{column_values}'." 156 | if min is not None: 157 | assert_or_warn( 158 | min <= column_values.min(), 159 | warning, 160 | summary, 161 | f"The minimum value of column '{column}' is '{column_values.min()}', which is less than '{min}'.", 162 | ) 163 | if max is not None: 164 | assert_or_warn( 165 | max >= column_values.max(), 166 | warning, 167 | summary, 168 | f"The maximum value of column '{column}' is '{column_values.max()}', which is greater than '{max}'.", 169 | ) 170 | -------------------------------------------------------------------------------- /phytest/bio/sequence.py: -------------------------------------------------------------------------------- 1 | import re 2 | from builtins import max as builtin_max 3 | from typing import List, Optional, Union 4 | 5 | from Bio import AlignIO 6 | from Bio import SeqIO as SeqIO 7 | from Bio.SeqRecord import SeqRecord 8 | 9 | from ..utils import PhytestObject, assert_or_warn 10 | 11 | 12 | class Sequence(PhytestObject, SeqRecord): 13 | @classmethod 14 | def parse(cls, alignment_path, alignment_format) -> 'Sequence': 15 | return ( 16 | Sequence( 17 | r.seq, 18 | id=r.id, 19 | name=r.name, 20 | description=r.description, 21 | dbxrefs=r.dbxrefs, 22 | features=r.features, 23 | annotations=r.annotations, 24 | letter_annotations=r.letter_annotations, 25 | ) 26 | for r in SeqIO.parse(alignment_path, alignment_format) 27 | ) 28 | 29 | def assert_valid_alphabet(self, alphabet: str = "ATCGN-", *, warning: bool = False) -> None: 30 | """ 31 | Asserts that that the sequence only contains particular charaters. 32 | 33 | Args: 34 | alphabet (str): A string containing legal charaters. Defaults to 'ATCGN-'. 35 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 36 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 37 | """ 38 | regex_invalid = re.compile(f"[^{re.escape(alphabet)}]") 39 | result = regex_invalid.search(str(self.seq)) 40 | if result: 41 | assert_or_warn( 42 | not result, 43 | warning, 44 | f"Invalid pattern found in '{self.id}'.", 45 | f"Character '{result.group(0)}' at position {result.start(0)+1} found which is not in alphabet '{alphabet}'.", 46 | ) 47 | 48 | def assert_length( 49 | self, 50 | length: Optional[int] = None, 51 | *, 52 | min: Optional[int] = None, 53 | max: Optional[int] = None, 54 | warning: bool = False, 55 | ) -> None: 56 | """ 57 | Asserts that that the sequence length meets the specified criteria. 58 | 59 | Args: 60 | length (int, optional): If set, then sequence length must be equal to this value. Defaults to None. 61 | min (int, optional): If set, then sequence length must be equal to or greater than this value. Defaults to None. 62 | max (int, optional): If set, then sequence length must be equal to or less than this value. Defaults to None. 63 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 64 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 65 | """ 66 | sequence_length = len(self.seq) 67 | if length is not None: 68 | assert_or_warn( 69 | sequence_length == length, 70 | warning, 71 | f"Sequence length of '{self.id}' ({sequence_length}) is not equal to the required length of {length}.", 72 | ) 73 | if min is not None: 74 | assert_or_warn( 75 | sequence_length >= min, 76 | warning, 77 | f"Sequence length of '{self.id}' ({sequence_length}) is less than the minimum {min}.", 78 | ) 79 | if max is not None: 80 | assert_or_warn( 81 | sequence_length <= max, 82 | warning, 83 | f"Sequence length of '{self.id}' ({sequence_length}) is greater than the maximum {max}.", 84 | ) 85 | 86 | def assert_count( 87 | self, 88 | pattern: str, 89 | *, 90 | count: Optional[int] = None, 91 | min: Optional[int] = None, 92 | max: Optional[int] = None, 93 | warning: bool = False, 94 | ) -> None: 95 | """ 96 | Asserts that the count of a pattern in the sequence meets the specified criteria. 97 | 98 | Args: 99 | pattern: (str): the pattern to count in the the sequence. 100 | count (int, optional): If set, then pattern count must be equal to this value. Defaults to None. 101 | min (int, optional): If set, then pattern count must be equal to or greater than this value. Defaults to None. 102 | max (int, optional): If set, then pattern count must be equal to or less than this value. Defaults to None. 103 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 104 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 105 | """ 106 | base_count = self.seq.count(pattern) 107 | summary = f"Sequence '{self.id}' matches pattern '{pattern}' {base_count} time(s)." 108 | if count is not None: 109 | assert_or_warn( 110 | base_count == count, 111 | warning, 112 | summary, 113 | f"This is not equal to the required number of {count}.", 114 | ) 115 | if min is not None: 116 | assert_or_warn( 117 | base_count >= min, 118 | warning, 119 | summary, 120 | f"This is less than the minimum {min}.", 121 | ) 122 | if max is not None: 123 | assert_or_warn( 124 | base_count <= max, 125 | warning, 126 | summary, 127 | f"This is greater than the maximum {max}.", 128 | ) 129 | 130 | def assert_percent( 131 | self, 132 | nucleotide: Union[str, List[str]], 133 | *, 134 | percent: Optional[float] = None, 135 | min: Optional[float] = None, 136 | max: Optional[float] = None, 137 | warning: bool = False, 138 | ) -> None: 139 | """ 140 | Asserts that the percentage of a nucleotide in the sequence meets the specified criteria. 141 | 142 | Args: 143 | nucleotide: (Union[str, List[str]]): The nucleotide(s) to count in the the sequence. 144 | percent (float, optional): If set, then nucleotide percentage must be equal to this value. Defaults to None. 145 | min (float, optional): If set, then nucleotide percentage must be equal to or greater than this value. Defaults to None. 146 | max (float, optional): If set, then nucleotide percentage must be equal to or less than this value. Defaults to None. 147 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 148 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 149 | """ 150 | try: 151 | if isinstance(nucleotide, str): 152 | if len(nucleotide) > 1: 153 | raise ValueError( 154 | f"The length of the requested nucleotide '{nucleotide}' is more than a single character. " 155 | "This value should either be a single character (i.e. A, G, C, T) or a list of single characters." 156 | ) 157 | base_percent = (self.seq.count(nucleotide) * 100.0) / len(self.seq) 158 | elif isinstance(nucleotide, list): 159 | base_percent = (sum(self.seq.count(x) for x in nucleotide) * 100) / len(self.seq) 160 | nucleotide = ', '.join(nucleotide) 161 | else: 162 | raise ValueError(f"Nucleotide must be str or list and cannot be of type '{type(nucleotide)}'.") 163 | except ZeroDivisionError: 164 | base_percent = 0.0 165 | summary = f"Sequence '{self.id}' contains {base_percent} percent '{nucleotide}'." 166 | if percent is not None: 167 | assert_or_warn( 168 | base_percent == percent, 169 | warning, 170 | summary, 171 | f"This is not equal to the required percentage of {percent}.", 172 | ) 173 | if min is not None: 174 | assert_or_warn( 175 | base_percent >= min, 176 | warning, 177 | summary, 178 | f"This is less than the minimum {min}.", 179 | ) 180 | if max is not None: 181 | assert_or_warn( 182 | base_percent <= max, 183 | warning, 184 | summary, 185 | f"This is greater than the maximum {max}.", 186 | ) 187 | 188 | def assert_percent_GC( 189 | self, 190 | percent: Optional[int] = None, 191 | *, 192 | min: Optional[int] = None, 193 | max: Optional[int] = None, 194 | warning: bool = False, 195 | ) -> None: 196 | """ 197 | Asserts that the percent of GC's (ambiguous nucleotide S) in the sequence meets the specified criteria. 198 | 199 | Args: 200 | percent (float, optional): If set, then the percentage of GC's must be equal to this value. Defaults to None. 201 | min (float, optional): If set, then the percentage of GC's must be equal to or greater than this value. Defaults to None. 202 | max (float, optional): If set, then the percentage of GC's must be equal to or less than this value. Defaults to None. 203 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 204 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 205 | """ 206 | self.assert_percent( 207 | nucleotide=["G", "C", "g", "c", "S", "s"], percent=percent, min=min, max=max, warning=warning 208 | ) 209 | 210 | def assert_percent_N( 211 | self, 212 | percent: Optional[int] = None, 213 | *, 214 | min: Optional[int] = None, 215 | max: Optional[int] = None, 216 | warning: bool = False, 217 | ) -> None: 218 | """ 219 | Asserts that the percent of N's in the sequence meets the specified criteria. 220 | 221 | Args: 222 | percent (float, optional): If set, then the percentage of N's must be equal to this value. Defaults to None. 223 | min (float, optional): If set, then the percentage of N's must be equal to or greater than this value. Defaults to None. 224 | max (float, optional): If set, then the percentage of N's must be equal to or less than this value. Defaults to None. 225 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 226 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 227 | """ 228 | self.assert_percent(nucleotide=["N", "n"], percent=percent, min=min, max=max, warning=warning) 229 | 230 | def assert_percent_gaps( 231 | self, 232 | percent: Optional[int] = None, 233 | *, 234 | min: Optional[int] = None, 235 | max: Optional[int] = None, 236 | warning: bool = False, 237 | ) -> None: 238 | """ 239 | Asserts that the percent of gaps (-) in the sequence meets the specified criteria. 240 | 241 | Args: 242 | percent (float, optional): If set, then the percentage of gaps must be equal to this value. Defaults to None. 243 | min (float, optional): If set, then the percentage of gaps must be equal to or greater than this value. Defaults to None. 244 | max (float, optional): If set, then the percentage of gaps must be equal to or less than this value. Defaults to None. 245 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 246 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 247 | """ 248 | self.assert_percent(nucleotide='-', percent=percent, min=min, max=max, warning=warning) 249 | 250 | def assert_count_Ns( 251 | self, 252 | count: Optional[int] = None, 253 | *, 254 | min: Optional[int] = None, 255 | max: Optional[int] = None, 256 | warning: bool = False, 257 | ) -> None: 258 | """ 259 | Asserts that the number of a N's in the sequence meets the specified criteria. 260 | 261 | Args: 262 | count (int, optional): If set, then the number of N's must be equal to this value. Defaults to None. 263 | min (int, optional): If set, then the number of N's must be equal to or greater than this value. Defaults to None. 264 | max (int, optional): If set, then the number of N's must be equal to or less than this value. Defaults to None. 265 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 266 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 267 | """ 268 | self.assert_count(pattern='N', count=count, min=min, max=max, warning=warning) 269 | 270 | def assert_count_gaps( 271 | self, 272 | count: Optional[int] = None, 273 | *, 274 | min: Optional[int] = None, 275 | max: Optional[int] = None, 276 | warning: bool = False, 277 | ) -> None: 278 | """ 279 | Asserts that the number of a gaps (-) in the sequence meets the specified criteria. 280 | 281 | Args: 282 | count (int, optional): If set, then the number of gaps (-) must be equal to this value. Defaults to None. 283 | min (int, optional): If set, then the number of gaps (-) must be equal to or greater than this value. Defaults to None. 284 | max (int, optional): If set, then the number of gaps (-) must be equal to or less than this value. Defaults to None. 285 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 286 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 287 | """ 288 | self.assert_count(pattern='-', count=count, min=min, max=max, warning=warning) 289 | 290 | def assert_longest_stretch( 291 | self, 292 | pattern: str, 293 | *, 294 | count: Optional[int] = None, 295 | min: Optional[int] = None, 296 | max: Optional[int] = None, 297 | warning: bool = False, 298 | ): 299 | """ 300 | Asserts that the longest stretch of a pattern in the sequence meets the specified criteria. 301 | 302 | e.g. the longest stretch of N's in 'ANNNANNA' is 3. 303 | 304 | Args: 305 | pattern: (str): the pattern to count in the the sequence. 306 | count (int, optional): If set, then the longest stretch of the pattern must be equal to this value. Defaults to None. 307 | min (int, optional): If set, then the longest stretch of the pattern must be equal to or greater than this value. Defaults to None. 308 | max (int, optional): If set, then the longest stretch of the pattern must be equal to or less than this value. Defaults to None. 309 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 310 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 311 | """ 312 | matches = re.findall(f'{pattern}+', str(self.seq)) 313 | longest_stretch = len(builtin_max(matches)) if matches else 0 314 | summary = f"The longest stretch of pattern '{pattern}' in sequence '{self.id}' is {longest_stretch}." 315 | if count is not None: 316 | assert_or_warn( 317 | longest_stretch == count, 318 | warning, 319 | summary, 320 | f"This is not equal to the required number of {count}.", 321 | ) 322 | if min is not None: 323 | assert_or_warn( 324 | longest_stretch >= min, 325 | warning, 326 | summary, 327 | f"This is less than the minimum {min}.", 328 | ) 329 | if max is not None: 330 | assert_or_warn( 331 | longest_stretch <= max, 332 | warning, 333 | summary, 334 | f"This is greater than the maximum {max}.", 335 | ) 336 | 337 | def assert_longest_stretch_Ns( 338 | self, 339 | count: Optional[int] = None, 340 | *, 341 | min: Optional[int] = None, 342 | max: Optional[int] = None, 343 | warning: bool = False, 344 | ): 345 | """ 346 | Asserts that the longest stretch of a N's in the sequence meets the specified criteria. 347 | 348 | e.g. the logest stretch of N's in 'ANNNANNA' is 3. 349 | 350 | Args: 351 | count (int, optional): If set, then the longest stretch of N's must be equal to this value. Defaults to None. 352 | min (int, optional): If set, then the longest stretch of N's must be equal to or greater than this value. Defaults to None. 353 | max (int, optional): If set, then the longest stretch of N's must be equal to or less than this value. Defaults to None. 354 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 355 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 356 | """ 357 | self.assert_longest_stretch(pattern='N', count=count, min=min, max=max, warning=warning) 358 | 359 | def assert_longest_stretch_gaps( 360 | self, 361 | count: Optional[int] = None, 362 | *, 363 | min: Optional[int] = None, 364 | max: Optional[int] = None, 365 | warning: bool = False, 366 | ): 367 | """ 368 | Asserts that the longest stretch of a gaps (-) in the sequence meets the specified criteria. 369 | 370 | e.g. the logest stretch of gaps (-) in 'A---A--A' is 3. 371 | 372 | Args: 373 | count (int, optional): If set, then the longest stretch of gaps (-) must be equal to this value. Defaults to None. 374 | min (int, optional): If set, then the longest stretch of gaps (-) must be equal to or greater than this value. Defaults to None. 375 | max (int, optional): If set, then the longest stretch of gaps (-) must be equal to or less than this value. Defaults to None. 376 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 377 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 378 | """ 379 | self.assert_longest_stretch(pattern='-', count=count, min=min, max=max, warning=warning) 380 | 381 | def assert_startswith(self, pattern: str, *, warning: bool = False): 382 | """ 383 | Asserts that the sequence starts with a particular pattern. 384 | 385 | Args: 386 | pattern (str): The sequence must start with this value. 387 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 388 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 389 | """ 390 | assert_or_warn( 391 | self.seq.startswith(pattern), 392 | warning, 393 | f"Sequence '{self.id}' does not start with '{pattern}'.", 394 | ) 395 | 396 | def assert_endswith(self, pattern: str, *, warning: bool = False): 397 | """ 398 | Asserts that the sequence ends with a particular pattern. 399 | 400 | Args: 401 | pattern (str): The sequence must end with this value. 402 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 403 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 404 | """ 405 | assert_or_warn( 406 | self.seq.endswith(pattern), 407 | warning, 408 | f"Sequence '{self.id}' does not end with '{pattern}'.", 409 | ) 410 | 411 | def assert_contains(self, pattern: str, *, warning: bool = False): 412 | """ 413 | Asserts that the sequence contains a particular pattern. 414 | 415 | Args: 416 | pattern (str): The sequence must contain this value. 417 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 418 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 419 | """ 420 | self.assert_count(pattern=pattern, min=1, warning=warning) 421 | -------------------------------------------------------------------------------- /phytest/bio/tree.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import re 3 | import unittest 4 | from datetime import datetime 5 | from io import BytesIO, StringIO 6 | from pathlib import Path 7 | from typing import Dict, List, Optional, Union 8 | from warnings import warn 9 | 10 | from Bio import Phylo as Phylo 11 | from Bio.Align import MultipleSeqAlignment 12 | from Bio.Phylo.BaseTree import Clade 13 | from Bio.Phylo.BaseTree import Tree as BioTree 14 | from dateutil.parser import parse 15 | from pytest_html import extras 16 | from treetime import GTR, TreeTime 17 | from treetime.utils import DateConversion, datetime_from_numeric, numeric_date 18 | 19 | from ..utils import ( 20 | PhytestAssertion, 21 | PhytestObject, 22 | PhytestWarning, 23 | assert_or_warn, 24 | default_date_patterns, 25 | ) 26 | 27 | 28 | class Tree(PhytestObject, BioTree): 29 | @classmethod 30 | def read(cls, tree_path, tree_format) -> 'Tree': 31 | tree = Phylo.read(tree_path, tree_format) 32 | return cls(root=tree.root, rooted=tree.rooted, id=tree.id, name=tree.name) 33 | 34 | @classmethod 35 | def parse(cls, tree_path, tree_format) -> 'Tree': 36 | trees = Phylo.parse(tree_path, tree_format) 37 | return (cls(root=tree.root, rooted=tree.rooted, id=tree.id, name=tree.name) for tree in trees) 38 | 39 | @classmethod 40 | def read_str(cls, tree_str: str, tree_format: str = "newick") -> 'Tree': 41 | data = StringIO(tree_str) 42 | return cls.read(data, tree_format) 43 | 44 | @property 45 | def tips(self): 46 | return self.get_terminals() 47 | 48 | def parse_tip_dates( 49 | self, 50 | *, 51 | patterns=None, 52 | date_format: Optional[str] = None, 53 | decimal_year: bool = False, 54 | ): 55 | patterns = patterns or default_date_patterns() 56 | if isinstance(patterns, str): 57 | patterns = [patterns] 58 | 59 | dates = {} 60 | 61 | compiled_patterns = [re.compile(pattern_string) for pattern_string in patterns] 62 | for tip in self.find_elements(terminal=True): 63 | for pattern in compiled_patterns: 64 | m = pattern.search(tip.name) 65 | if m: 66 | matched_str = m.group(0) 67 | if re.match(r"^\d+\.?\d*$", matched_str): 68 | date = datetime_from_numeric(float(matched_str)) 69 | else: 70 | date = parse(matched_str, date_format) 71 | 72 | dates[tip.name] = date 73 | break 74 | 75 | if decimal_year: 76 | dates = {key: numeric_date(value) for key, value in dates.items()} 77 | 78 | return dates 79 | 80 | def assert_number_of_tips( 81 | self, 82 | tips: Optional[int] = None, 83 | *, 84 | min: Optional[int] = None, 85 | max: Optional[int] = None, 86 | warning: bool = False, 87 | ): 88 | """ 89 | Asserts that the number of tips meets the specified criteria. 90 | 91 | Args: 92 | tips (int, optional): If set, then number of tips must be equal to this value. Defaults to None. 93 | min (int, optional): If set, then number of tips must be equal to or greater than this value. Defaults to None. 94 | max (int, optional): If set, then number of tips must be equal to or less than this value. Defaults to None. 95 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 96 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 97 | """ 98 | number_of_tips = len(self.get_terminals()) 99 | if tips is not None: 100 | assert_or_warn( 101 | number_of_tips == tips, 102 | warning, 103 | f"The number of tips ({number_of_tips}) which is different from the required number of tips ({tips}).", 104 | ) 105 | if min is not None: 106 | assert_or_warn( 107 | number_of_tips >= min, 108 | warning, 109 | f"The number of tips ({number_of_tips}) is less than the minimum ({min}).", 110 | ) 111 | if max is not None: 112 | assert_or_warn( 113 | number_of_tips <= max, 114 | warning, 115 | f"The number of tips ({number_of_tips}) is greater than the maximum ({max}).", 116 | ) 117 | 118 | def assert_unique_tips(self, *, warning: bool = False): 119 | """ 120 | Asserts that all the tip names are unique. 121 | 122 | Args: 123 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 124 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 125 | 126 | """ 127 | tip_names = [t.name for t in self.get_terminals()] 128 | assert_or_warn( 129 | len(tip_names) == len(set(tip_names)), 130 | warning, 131 | f"The tree contains {len(tip_names)} tips, however, {len(set(tip_names))} are unique.", 132 | ) 133 | 134 | def assert_is_rooted(self, *, warning: bool = False): 135 | """ 136 | Asserts that the tree is rooted. 137 | 138 | Args: 139 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 140 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 141 | """ 142 | assert_or_warn( 143 | self.rooted, 144 | warning, 145 | "The tree is not rooted.", 146 | ) 147 | 148 | def assert_is_bifurcating(self, *, warning: bool = False): 149 | """ 150 | Asserts that the tree is bifurcating. 151 | 152 | The root may have 3 descendents and still be considered part of a bifurcating tree, because it has no ancestor. 153 | 154 | Args: 155 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 156 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 157 | """ 158 | assert_or_warn( 159 | self.is_bifurcating(), 160 | warning, 161 | "The tree is not bifurcating.", 162 | ) 163 | 164 | def assert_is_monophyletic(self, tips: List[Clade], *, warning: bool = False): 165 | """ 166 | Asserts that the specified tips form a monophyletic group. 167 | 168 | Args: 169 | tips (List[Clade]): List of terminal nodes (tips). 170 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 171 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 172 | """ 173 | assert_or_warn( 174 | self.is_monophyletic(tips), 175 | warning, 176 | f"The group \'{', '.join([tip.name for tip in tips])}\' is paraphyletic!", 177 | ) 178 | 179 | def assert_branch_lengths( 180 | self, 181 | *, 182 | min: Optional[float] = None, 183 | max: Optional[float] = None, 184 | terminal: Optional[bool] = None, 185 | warning: bool = False, 186 | ): 187 | """ 188 | Asserts that the all brach lengths meet the specified criteria. 189 | 190 | Args: 191 | min (float, optional): If set, then each brach length must be equal to or greater than this value. Defaults to None. 192 | max (float, optional): If set, then each brach length must be equal to or less than this value. Defaults to None. 193 | terminal (bool, optional): True searches for only terminal nodes, False excludes terminal nodes, and the default, None, 194 | searches both terminal and non-terminal nodes, as well as any tree elements lacking the is_terminal method. 195 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 196 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 197 | """ 198 | root, *nodes = self.find_clades(terminal=terminal) 199 | for node in nodes: 200 | print(node, node.branch_length) 201 | if min is not None: 202 | assert_or_warn( 203 | node.branch_length >= min, 204 | warning, 205 | f"An internal branch in the tree is less than the minimum ({min}).", 206 | ) 207 | if max is not None: 208 | assert_or_warn( 209 | node.branch_length <= max, 210 | warning, 211 | f"An internal branch in the tree is greater than the maximum ({max}).", 212 | ) 213 | 214 | def assert_terminal_branch_lengths( 215 | self, 216 | *, 217 | min: Optional[float] = None, 218 | max: Optional[float] = None, 219 | warning: bool = False, 220 | ): 221 | """ 222 | Asserts that the terminal brach lengths meet the specified criteria. 223 | 224 | Args: 225 | min (float, optional): If set, then each terminal brach length must be equal to or greater than this value. Defaults to None. 226 | max (float, optional): If set, then each terminal brach length must be equal to or less than this value. Defaults to None. 227 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 228 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 229 | """ 230 | self.assert_branch_lengths(min=min, max=max, terminal=True, warning=warning) 231 | 232 | def assert_internal_branch_lengths( 233 | self, 234 | *, 235 | min: Optional[float] = None, 236 | max: Optional[float] = None, 237 | warning: bool = False, 238 | ): 239 | """ 240 | Asserts that the internal brach lengths meet the specified criteria. 241 | 242 | Args: 243 | min (float, optional): If set, then each internal brach length must be equal to or greater than this value. Defaults to None. 244 | max (float, optional): If set, then each internal brach length must be equal to or less than this value. Defaults to None. 245 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 246 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 247 | """ 248 | self.assert_branch_lengths(min=min, max=max, terminal=False, warning=warning) 249 | 250 | def assert_no_negatives( 251 | self, 252 | *, 253 | terminal: Optional[bool] = None, 254 | warning: bool = False, 255 | ): 256 | """ 257 | Asserts that there are no negative branches. 258 | 259 | Args: 260 | terminal (bool, optional): True searches for only terminal nodes, False excludes terminal nodes, and the default, None, 261 | searches both terminal and non-terminal nodes, as well as any tree elements lacking the is_terminal method. 262 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 263 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 264 | """ 265 | self.assert_branch_lengths(min=0, terminal=terminal, warning=warning) 266 | 267 | def assert_total_branch_length( 268 | self, 269 | length: Optional[float] = None, 270 | *, 271 | min: Optional[float] = None, 272 | max: Optional[float] = None, 273 | warning: bool = False, 274 | ): 275 | """ 276 | Asserts that the total brach length meets the specified criteria. 277 | 278 | Args: 279 | length (float, optional): If set, then total brach length must be equal to this value. Defaults to None. 280 | min (float, optional): If set, then total brach length must be equal to or greater than this value. Defaults to None. 281 | max (float, optional): If set, then total brach length must be equal to or less than this value. Defaults to None. 282 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 283 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 284 | """ 285 | total_branch_length = self.total_branch_length() 286 | if length is not None: 287 | assert_or_warn( 288 | total_branch_length == length, 289 | warning, 290 | f"The total branch length ({total_branch_length}) is not equal to the required length ({length}).", 291 | ) 292 | if min is not None: 293 | assert_or_warn( 294 | total_branch_length >= min, 295 | warning, 296 | f"The total branch length ({total_branch_length}) is less than the minimum ({min}).", 297 | ) 298 | if max is not None: 299 | assert_or_warn( 300 | total_branch_length <= max, 301 | warning, 302 | f"The total branch length ({total_branch_length}) is greater than the maximum ({max}).", 303 | ) 304 | 305 | def assert_tip_regex( 306 | self, 307 | patterns: Union[List[str], str], 308 | *, 309 | warning: bool = False, 310 | ): 311 | """ 312 | Asserts that all the tips match at least one of a list of regular expression patterns. 313 | 314 | Args: 315 | patterns (Union[List[str], str]): The regex pattern(s) to match to. 316 | If a string, then every tip must match that pattern. 317 | If a list then each tip must match at least one pattern in the list. 318 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 319 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 320 | """ 321 | if isinstance(patterns, str): 322 | patterns = [patterns] 323 | 324 | compiled_patterns = [re.compile(pattern_string) for pattern_string in patterns] 325 | 326 | for tip in self.find_elements(terminal=True): 327 | matches = False 328 | for pattern in compiled_patterns: 329 | if pattern.search(tip.name): 330 | matches = True 331 | break 332 | assert_or_warn( 333 | matches, 334 | warning, 335 | f"Tip {tip.name} does not match any of the regex patterns in: '{patterns}'.", 336 | ) 337 | 338 | def assert_tip_names(self, names: List[str], warning=False): 339 | """ 340 | Asserts that the tree tip names match the supplied names. 341 | 342 | Args: 343 | names (List[str]): The names to match. 344 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 345 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 346 | """ 347 | tip_names = [t.name for t in self.get_terminals()] 348 | assert_or_warn( 349 | len(tip_names) == len(names), 350 | warning, 351 | f"The tree contains {len(tip_names)} tips, however, {len(names)} names were supplied.", 352 | ) 353 | diff = set(tip_names).difference(names) 354 | assert_or_warn( 355 | diff == set(), 356 | warning, 357 | f"There was a difference ({', '.join(diff)}) between the supplied names and tree tip names.", 358 | ) 359 | 360 | def copy(self): 361 | """Makes a deep copy of this tree.""" 362 | new_copy = copy.deepcopy(self) 363 | return new_copy 364 | 365 | def root_to_tip_regression( 366 | self, 367 | *, 368 | dates: Optional[Dict] = None, 369 | alignment: Optional[MultipleSeqAlignment] = None, 370 | sequence_length: Optional[int] = None, 371 | clock_filter: float = 3.0, 372 | gtr: Union[GTR, str] = 'JC69', 373 | root_method: str = 'least-squares', 374 | allow_negative_rate: bool = False, 375 | keep_root: bool = False, 376 | covariation: bool = False, 377 | ): 378 | """ 379 | Performs a root-to-tip regression to determine how clock-like a tree is. 380 | 381 | Args: 382 | dates (Dict, optional): The tip dates as a dictionary with the tip name as the key and the date as the value. 383 | If not set, then it parses the tip dates to generate this dictionary using the `parse_tip_dates` method. 384 | alignment (MultipleSeqAlignment, optional): The alignment associated with this tree. Defaults to None. 385 | sequence_length (int, optional): The sequence length of the alignment. Defaults to None. 386 | clock_filter (float, optional): The number of interquartile ranges from regression beyond which to ignore. 387 | This provides a way to ignore tips that don't follow a loose clock. 388 | Defaults to 3.0. 389 | gtr (GTR, str, optional): The molecular evolution model. Defaults to 'JC69'. 390 | allow_negative_rate (bool, optional): Whether or not a negative clock rate is allowed. 391 | For trees with little temporal signal, it can be set to True to achieve essentially mid-point rooting. 392 | Defaults to False. 393 | keep_root (bool, optional): Keeps the current root of the tree. 394 | If False, then a new optimal root is sought. Defaults to False. 395 | root_method (str, optional): The method used to reroot the tree if `keep_root` is False. 396 | Valid choices are: 'min_dev', 'least-squares', and 'oldest'. 397 | Defaults to 'least-squares'. 398 | covariation (bool, optional): Accounts for covariation when estimating rates or rerooting. Defaults to False. 399 | """ 400 | 401 | if covariation and (alignment is None and sequence_length is None): 402 | raise PhytestAssertion( 403 | "Cannot perform root-to-tip regression with `covariation` as True if no alignment of sequence length is provided." 404 | ) 405 | 406 | dates = dates or self.parse_tip_dates() 407 | 408 | # Convert datetimes to floats with decimal years if necessary 409 | dates = {name: numeric_date(date) if isinstance(date, datetime) else date for name, date in dates.items()} 410 | 411 | regression = TreeTime( 412 | dates=dates, 413 | tree=self.copy(), 414 | aln=alignment, 415 | gtr=gtr, 416 | seq_len=sequence_length, 417 | ) 418 | 419 | if clock_filter: 420 | bad_nodes = [node.name for node in regression.tree.get_terminals() if node.bad_branch] 421 | regression.clock_filter(n_iqd=clock_filter, reroot=root_method or 'least-squares') 422 | bad_nodes_after = [node.name for node in regression.tree.get_terminals() if node.bad_branch] 423 | if len(bad_nodes_after) > len(bad_nodes): 424 | warn( 425 | "The following leaves don't follow a loose clock and " 426 | "will be ignored in rate estimation:\n\t" + "\n\t".join(set(bad_nodes_after).difference(bad_nodes)), 427 | PhytestWarning, 428 | ) 429 | 430 | if not keep_root: 431 | if covariation: # this requires branch length estimates 432 | regression.run(root="least-squares", max_iter=0, use_covariation=covariation) 433 | 434 | assert root_method in ['min_dev', 'least-squares', 'oldest'] 435 | regression.reroot(root_method, force_positive=not allow_negative_rate) 436 | 437 | regression.get_clock_model(covariation=covariation) 438 | return regression 439 | 440 | def plot_root_to_tip( 441 | self, 442 | filename: Union[str, Path], 443 | *, 444 | format: Optional[str] = None, 445 | regression: Optional[TreeTime] = None, 446 | add_internal: bool = False, 447 | label: bool = True, 448 | ax=None, 449 | **kwargs, 450 | ): 451 | """ 452 | Plots a root-to-tip regression. 453 | 454 | Args: 455 | filename (str, Path): The path to save the plot as an image. 456 | regression (TreeTime, optional): The root-to-tip regression for this tree. 457 | If None, then this regression is calculated using the `root_to_tip_regression` method. 458 | add_internal (bool): Whether or not to plot the internal node positions. Default: False. 459 | label (bool): Whether or not to label the points. Default: True. 460 | ax (matplotlib axes): Uses matplotlib axes if provided. Default: None. 461 | **kwargs: Keyword arguments for the `root_to_tip_regression` method. 462 | """ 463 | regression = regression or self.root_to_tip_regression(**kwargs) 464 | from matplotlib import pyplot as plt 465 | 466 | regression.plot_root_to_tip(add_internal=add_internal, label=label, ax=ax) 467 | if isinstance(filename, Path): 468 | filename = str(filename) 469 | 470 | plt.savefig(filename, format=format) 471 | 472 | def assert_root_to_tip( 473 | self, 474 | *, 475 | regression: Optional[TreeTime] = None, 476 | min_r_squared: Optional[float] = None, 477 | min_rate: Optional[float] = None, 478 | max_rate: Optional[float] = None, 479 | min_root_date: Optional[float] = None, 480 | max_root_date: Optional[float] = None, 481 | valid_confidence: Optional[bool] = None, 482 | extra: Optional[List] = None, 483 | warning: bool = False, 484 | **kwargs, 485 | ): 486 | """ 487 | Checks inferred values from a root-to-tip regression. 488 | 489 | Args: 490 | regression (TreeTime, optional): The root-to-tip regression for this tree. 491 | If None, then this regression is calculated using the `root_to_tip_regression` method. 492 | min_r_squared (float, optional): If set, then R^2 must be equal or greater than this value. Defaults to None. 493 | min_rate (float, optional): If set, then the clock rate must be equal or greater than this value. Defaults to None. 494 | max_rate (float, optional): If set, then the clock rate must be equal or less than this value. Defaults to None. 495 | min_root_date (float, optional): If set, then the interpolated root date must be equal or greater than this value. Defaults to None. 496 | max_root_date (float, optional): If set, then the interpolated root date must be equal or less than this value. Defaults to None. 497 | valid_confidence (bool, optional): Checks that the `valid_confidence` value in the regression is equal to this boolean value. 498 | Defaults to None which does not perform a check. 499 | warning (bool): If True, raise a warning instead of an exception. Defaults to False. 500 | This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 501 | extra (List): The pytest-html extra fixture for adding in root-to-tip regression plot. 502 | **kwargs: Keyword arguments for the `root_to_tip_regression` method. 503 | """ 504 | regression = regression or self.root_to_tip_regression(**kwargs) 505 | clock_model = DateConversion.from_regression(regression.clock_model) 506 | root_date = clock_model.numdate_from_dist2root(0.0) 507 | 508 | if extra is not None: 509 | f = StringIO() 510 | self.plot_root_to_tip(filename=f, format="svg", regression=regression) 511 | svg = f.getvalue() 512 | extra.append(extras.html(svg)) 513 | 514 | if min_r_squared is not None: 515 | assert_or_warn( 516 | clock_model.r_val**2 >= min_r_squared, 517 | warning, 518 | f"The R-squared value from the root-to-tip regression '{clock_model.r_val**2}' " 519 | "is less than the minimum allowed R-squarred '{min_r_squared}'.", 520 | ) 521 | 522 | if min_rate is not None: 523 | assert_or_warn( 524 | clock_model.clock_rate >= min_rate, 525 | warning, 526 | f"Inferred clock rate '{clock_model.clock_rate}' is less than the minimum allowed clock rate '{min_rate}'.", 527 | ) 528 | 529 | if max_rate is not None: 530 | assert_or_warn( 531 | clock_model.clock_rate <= max_rate, 532 | warning, 533 | f"Inferred clock rate '{clock_model.clock_rate}' is greater than the maximum allowed clock rate '{max_rate}'.", 534 | ) 535 | 536 | if min_root_date is not None: 537 | assert_or_warn( 538 | root_date >= min_root_date, 539 | warning, 540 | f"Inferred root date '{root_date}' is less than the minimum allowed root date '{min_root_date}'.", 541 | ) 542 | 543 | if max_root_date is not None: 544 | assert_or_warn( 545 | root_date <= max_root_date, 546 | warning, 547 | f"Inferred root date '{root_date}' is greater than the maximum allowed root date: '{max_root_date}'.", 548 | ) 549 | 550 | if valid_confidence is not None: 551 | assert_or_warn( 552 | clock_model.valid_confidence == valid_confidence, 553 | warning, 554 | f"The `clock_model.valid_confidence` variable is not {valid_confidence}.", 555 | ) 556 | -------------------------------------------------------------------------------- /phytest/cli.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Optional 3 | 4 | from Bio.AlignIO import _FormatToIterator as supported_alignment_formats 5 | from Bio.Phylo._io import supported_formats as supported_tree_formats 6 | from Bio.SeqIO import _FormatToIterator as supported_sequence_formats 7 | 8 | supported_sequence_formats.update(supported_alignment_formats) 9 | import typer 10 | 11 | from .main import main 12 | 13 | app = typer.Typer() 14 | 15 | 16 | def sequence_format_callback(value: str): 17 | if value not in supported_sequence_formats: 18 | raise typer.BadParameter( 19 | f"'{value}' is not a valid sequence format. Must be one of {', '.join(supported_sequence_formats.keys())}." 20 | ) 21 | return value 22 | 23 | 24 | def tree_format_callback(value: str): 25 | if value not in supported_tree_formats: 26 | raise typer.BadParameter( 27 | f"'{value}' is not a valid tree format. Must be one of {', '.join(supported_tree_formats.keys())}." 28 | ) 29 | return value 30 | 31 | 32 | def data_format_callback(value: str): 33 | if value not in ['csv', 'tsv', 'excel']: 34 | raise typer.BadParameter(f"'{value}' is not a valid data format. Must be one of csv, tsv, excel.") 35 | return value 36 | 37 | 38 | @app.command(context_settings={"help_option_names": ["-h", "--help"]}) 39 | def cli( 40 | testfile: Path = typer.Argument(..., help="Path to test file."), 41 | sequence: Optional[Path] = typer.Option( 42 | None, "--sequence", "-s", dir_okay=False, exists=True, help="Path to sequence file." 43 | ), 44 | sequence_format: Optional[str] = typer.Option( 45 | 'fasta', 46 | "--sequence-format", 47 | dir_okay=False, 48 | exists=True, 49 | help=f"{', '.join(supported_sequence_formats.keys())}.", 50 | callback=sequence_format_callback, 51 | ), 52 | tree: Optional[Path] = typer.Option(None, "--tree", "-t", dir_okay=False, exists=True, help="Path to tree file."), 53 | tree_format: Optional[str] = typer.Option( 54 | 'newick', 55 | "--tree-format", 56 | dir_okay=False, 57 | exists=True, 58 | help=f"{', '.join(supported_tree_formats.keys())}.", 59 | callback=tree_format_callback, 60 | ), 61 | data: Optional[Path] = typer.Option(None, "--data", "-d", dir_okay=False, exists=True, help="Path to data file."), 62 | data_format: Optional[str] = typer.Option( 63 | 'csv', "--data-format", dir_okay=False, exists=True, help="csv, tsv, excel.", callback=data_format_callback 64 | ), 65 | report: Optional[Path] = typer.Option( 66 | None, "--report", "-r", dir_okay=False, exists=False, help="Path to HTML report to generate." 67 | ), 68 | verbose: Optional[bool] = typer.Option(False, "--verbose", "-v", help="Verbose output"), 69 | expression: Optional[str] = typer.Option( 70 | None, "-k", help="Only run tests which match the given substring expression." 71 | ), 72 | cores: Optional[str] = typer.Option( 73 | None, 74 | "-n", 75 | help="Number of cores. Use 'auto' to spawn a number of workers processes equal to the number of available CPUs.", 76 | ), 77 | ): 78 | exit_code = main( 79 | testfile=testfile, 80 | sequence=sequence, 81 | sequence_format=sequence_format, 82 | tree=tree, 83 | tree_format=tree_format, 84 | data=data, 85 | data_format=data_format, 86 | verbose=verbose, 87 | report=report, 88 | expression=expression, 89 | cores=cores, 90 | ) 91 | raise typer.Exit(code=exit_code) 92 | -------------------------------------------------------------------------------- /phytest/main.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import os 3 | from pathlib import Path 4 | from typing import Optional 5 | 6 | import pytest 7 | 8 | 9 | def main( 10 | testfile: Optional[Path] = None, 11 | sequence: Optional[Path] = None, 12 | sequence_format: Optional[str] = 'fasta', 13 | tree: Optional[Path] = None, 14 | tree_format: Optional[str] = 'newick', 15 | data: Optional[Path] = None, 16 | data_format: Optional[str] = 'csv', 17 | verbose: bool = False, 18 | report: Optional[Path] = None, 19 | expression: Optional[str] = None, 20 | cores: Optional[str] = None, 21 | ): 22 | if not testfile: 23 | testfile = Path(os.path.abspath((inspect.stack()[1])[1])) 24 | args = [testfile] 25 | if not verbose: 26 | args.extend(["-ra", "--tb=no", "--no-header"]) 27 | else: 28 | args.extend(["-v"]) 29 | if sequence is not None: 30 | args.extend(["--sequence", sequence]) 31 | args.extend(["--sequence-format", sequence_format]) 32 | if tree is not None: 33 | args.extend(["--tree", tree]) 34 | args.extend(["--tree-format", tree_format]) 35 | if data is not None: 36 | args.extend(["--data", data]) 37 | args.extend(["--data-format", data_format]) 38 | if report: 39 | if not str(report).endswith('.html'): 40 | raise ValueError(f"Report must use .html extension.") 41 | args.extend([f"--html={report}", "--self-contained-html", f"--css={Path(__file__).parent / 'report/logo.css'}"]) 42 | if expression: 43 | # only run tests which match the given substring expression 44 | # see the pytest help 45 | args.extend(["-k", expression]) 46 | if cores: 47 | # parallel with pytest-xdist 48 | args.extend(["-n", cores]) 49 | exit_code = pytest.main(args, plugins=['phytest']) 50 | return exit_code 51 | -------------------------------------------------------------------------------- /phytest/report/logo.css: -------------------------------------------------------------------------------- 1 | h1 { 2 | background-image: url(data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0idXRmLTgiPz4KPHN2ZyB2aWV3Qm94PSIxMDUuNTMzIDc1LjA4NyAxMDk0LjM2MyAzMjUuNjM3IiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciPgogIDxyZWN0IHg9Ii0zMC4xNDQiIHk9Ii02LjY2MSIgd2lkdGg9IjEzOTYuMTI1IiBoZWlnaHQ9IjQ4Ni4yNzQiIHN0eWxlPSJzdHJva2U6IHJnYigwLCAwLCAwKTsgdmlzaWJpbGl0eTogaGlkZGVuOyIvPgogIDxnIHRyYW5zZm9ybT0ibWF0cml4KDEsIDAsIDAsIDEsIC0zLjg2NTg1MywgMTYuMTM3NzkzKSI+CiAgICA8Zz4KICAgICAgPHRpdGxlPmxvZ288L3RpdGxlPgogICAgICA8Zz4KICAgICAgICA8dGl0bGU+dHJlZTwvdGl0bGU+CiAgICAgICAgPHBvbHlsaW5lIHN0eWxlPSJmaWxsOiBub25lOyBzdHJva2UtbGluZWNhcDogcm91bmQ7IHN0cm9rZS1saW5lam9pbjogcm91bmQ7IHN0cm9rZTogcmdiKDEyNywgMTQwLCAxNDEpOyBzdHJva2Utd2lkdGg6IDZweDsiIHBvaW50cz0iMjg3LjgyMyAyMjUuMTU5IDIzNS45OTQgMjI1LjE1OSAyMzUuOTk0IDE0Ni4wNSAyODcuODk3IDE0Ni4wNSIvPgogICAgICAgIDxwb2x5bGluZSBzdHlsZT0iZmlsbDogbm9uZTsgc3Ryb2tlLWxpbmVjYXA6IHJvdW5kOyBzdHJva2UtbGluZWpvaW46IHJvdW5kOyBzdHJva2U6IHJnYigxMjcsIDE0MCwgMTQxKTsgc3Ryb2tlLXdpZHRoOiA2cHg7IiBwb2ludHM9IjIzNS42MzkgMTg1LjcxNiAxNzguMDg3IDE4NS43MTYgMTc4LjA4NyAyOTUuNDk0IDI4Ny44NTkgMjk1LjQ5NCIvPgogICAgICA8L2c+CiAgICAgIDxwYXRoIHN0cm9rZS1saW5lY2FwPSJyb3VuZCIgc3Ryb2tlLWxpbmVqb2luPSJyb3VuZCIgZD0iTSAzMDQuMDMzIDE0NS4yNzEgTCAzMDYuMjg1IDE0Ny41MjMgTCAzMTAuNzg5IDE0My4wMTkgTSAzMTcuNTQ0IDE0NS4yNzEgQyAzMTcuNTQ0IDE1My4wNzEgMzA5LjEgMTU3Ljk0OCAzMDIuMzQ1IDE1NC4wNDYgQyAyOTkuMjEgMTUyLjIzNyAyOTcuMjc4IDE0OC44OTIgMjk3LjI3OCAxNDUuMjcxIEMgMjk3LjI3OCAxMzcuNDcxIDMwNS43MjIgMTMyLjU5NiAzMTIuNDc4IDEzNi40OTYgQyAzMTUuNjEzIDEzOC4zMDYgMzE3LjU0NCAxNDEuNjUxIDMxNy41NDQgMTQ1LjI3MSBaIiBzdHlsZT0iZmlsbDogbm9uZTsgc3Ryb2tlOiByZ2IoNDYsIDIwNCwgMTEzKTsgc3Ryb2tlLXdpZHRoOiAzcHg7Ii8+CiAgICAgIDxwYXRoIHN0cm9rZS1saW5lY2FwPSJyb3VuZCIgc3Ryb2tlLWxpbmVqb2luPSJyb3VuZCIgZD0iTSAzMDQuMDMzIDI5NS41ODIgTCAzMDYuMjg1IDI5Ny44MzQgTCAzMTAuNzg5IDI5My4zMyBNIDMxNy41NDQgMjk1LjU4MiBDIDMxNy41NDQgMzAzLjM4MiAzMDkuMSAzMDguMjU5IDMwMi4zNDUgMzA0LjM1NyBDIDI5OS4yMSAzMDIuNTQ4IDI5Ny4yNzggMjk5LjIwMyAyOTcuMjc4IDI5NS41ODIgQyAyOTcuMjc4IDI4Ny43ODIgMzA1LjcyMiAyODIuOTA3IDMxMi40NzggMjg2LjgwNyBDIDMxNS42MTMgMjg4LjYxNyAzMTcuNTQ0IDI5MS45NjMgMzE3LjU0NCAyOTUuNTgyIFoiIHN0eWxlPSJmaWxsOiBub25lOyBzdHJva2U6IHJnYig0NiwgMjA0LCAxMTMpOyBzdHJva2Utd2lkdGg6IDNweDsiLz4KICAgICAgPHBhdGggc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIiBkPSJNIDMwNS4xNTkgMjI3LjU4MSBMIDMwNy40MTEgMjI1LjMyOSBNIDMwNy40MTEgMjI1LjMyOSBMIDMwOS42NjMgMjIzLjA3NyBNIDMwNy40MTEgMjI1LjMyOSBMIDMwNS4xNTkgMjIzLjA3NyBNIDMwNy40MTEgMjI1LjMyOSBMIDMwOS42NjMgMjI3LjU4MSBNIDMxNy41NDQgMjI1LjMyOSBDIDMxNy41NDQgMjMzLjEyOSAzMDkuMSAyMzguMDA0IDMwMi4zNDUgMjM0LjEwNCBDIDI5OS4yMSAyMzIuMjk0IDI5Ny4yNzggMjI4Ljk0OSAyOTcuMjc4IDIyNS4zMjkgQyAyOTcuMjc4IDIxNy41MjggMzA1LjcyMiAyMTIuNjUzIDMxMi40NzggMjE2LjU1MyBDIDMxNS42MTMgMjE4LjM2MyAzMTcuNTQ0IDIyMS43MDggMzE3LjU0NCAyMjUuMzI5IFoiIHN0eWxlPSJmaWxsOiBub25lOyBzdHJva2U6IHJnYigyMzEsIDc2LCA2MCk7IHN0cm9rZS13aWR0aDogM3B4OyIvPgogICAgPC9nPgogICAgPHRleHQgc3R5bGU9ImZpbGw6IHJnYig0NCwgNjIsIDgwKTsgZm9udC1mYW1pbHk6IEFyaWFsLCBzYW5zLXNlcmlmOyBmb250LXNpemU6IDI0NC45cHg7IHN0cm9rZTogcmdiKDEyNywgMTQwLCAxNDEpOyBzdHJva2UtbGluZWNhcDogcm91bmQ7IHN0cm9rZS1saW5lam9pbjogcm91bmQ7IHN0cm9rZS13aWR0aDogNHB4OyB3aGl0ZS1zcGFjZTogcHJlOyIgeD0iMzE4LjM1OCIgeT0iMzA2LjY5MSI+UGh5dGVzdDwvdGV4dD4KICA8L2c+Cjwvc3ZnPgo=); 3 | background-repeat: no-repeat; 4 | display: flex; 5 | height: 3em; 6 | background-size: auto 2em; 7 | align-items: end; 8 | padding-left: 30px; 9 | } 10 | -------------------------------------------------------------------------------- /phytest/utils.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | from typing import List 3 | from warnings import warn 4 | 5 | 6 | class PhytestWarning(Warning): 7 | pass 8 | 9 | 10 | class PhytestAssertion(AssertionError): 11 | pass 12 | 13 | 14 | def assert_or_warn(statement, warning, *messages): 15 | if statement: 16 | return 17 | 18 | message = "\n".join(messages) 19 | if warning: 20 | warn(message, PhytestWarning) 21 | else: 22 | raise PhytestAssertion(message) 23 | 24 | 25 | def default_date_patterns(): 26 | return [ 27 | r"\d{4}\.?\d*$", 28 | r"\d{4}-\d{2}-\d{2}", 29 | ] 30 | 31 | 32 | class PhytestObject: 33 | def __init__(self, *args, **kwargs): 34 | super().__init__(*args, **kwargs) 35 | 36 | # Add partial methods with the warning flag set to True 37 | for method_name in self.assertion_method_names(): 38 | method = getattr(self, method_name) 39 | truncated_name = method_name[len("assert") :] 40 | warning_name = f"warn{truncated_name}" 41 | setattr(self, warning_name, partial(method, warning=True)) 42 | 43 | def assertion_method_names(self) -> List[str]: 44 | """ 45 | Returns a list with the names of the methods used to make assertion statements. 46 | """ 47 | return [ 48 | attribute 49 | for attribute in dir(self) 50 | if attribute.startswith("assert_") and callable(getattr(self, attribute)) 51 | ] 52 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "phytest" 3 | version = "1.4.1" 4 | description = "Quality control for phylogenetic pipelines using pytest" 5 | authors = ["Wytamma Wirth ", "Simon Mutch ", "Rob Turnbull "] 6 | readme = "README.rst" 7 | classifiers = [ 8 | "Framework :: Pytest", 9 | ] 10 | 11 | [tool.poetry.scripts] 12 | phytest = "phytest.cli:app" 13 | 14 | [tool.poetry.dependencies] 15 | python = ">=3.8,<3.12" 16 | pytest = ">=7.1.1" 17 | pytest-sugar = ">=0.9.4" 18 | pytest-html = ">=3.1.1" 19 | typer = ">=0.4.1" 20 | biopython = ">=1.79" 21 | phylo-treetime = ">=0.8.6" 22 | scipy = ">=1.8.0" 23 | numpy = ">=1.22.3" 24 | pytest-xdist = ">=3.2.0" 25 | 26 | [tool.poetry.dev-dependencies] 27 | coverage = "^5.5" 28 | Sphinx = "^4.2.0" 29 | nbsphinx = "^0.8.7" 30 | sphinx-rtd-theme = "^1.0.0" 31 | sphinx-autobuild = "^2021.3.14" 32 | myst-parser = "^0.15.2" 33 | pre-commit = "^2.15.0" 34 | sphinx-copybutton = "^0.4.0" 35 | typing-extensions = "^4.1.1" 36 | pytest-cov = "^3.0.0" 37 | openpyxl = "^3.0.10" 38 | 39 | [build-system] 40 | requires = ["poetry-core>=1.0.0"] 41 | build-backend = "poetry.core.masonry.api" 42 | 43 | [tool.black] 44 | line-length = 120 45 | skip_string_normalization = true 46 | 47 | [tool.isort] 48 | profile = "black" 49 | 50 | [tool.pytest.ini_options] 51 | minversion = "6.0" 52 | testpaths = [ 53 | "tests", 54 | ] 55 | 56 | [tool.poetry.plugins."pytest11"] 57 | "phytest" = "phytest" 58 | 59 | 60 | [tool.pylint.messages_control] 61 | disable = "C0330, C0326" 62 | 63 | [tool.pylint.format] 64 | max-line-length = "120" 65 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phytest-devs/phytest/b199e2fcc2bbd2a2e82ce71a73c472d28ea474ec/tests/__init__.py -------------------------------------------------------------------------------- /tests/input/alignment.py: -------------------------------------------------------------------------------- 1 | from phytest import Alignment, Data, Sequence, Tree 2 | 3 | 4 | def test_alignment_length(alignment: Alignment): 5 | alignment.assert_length(length=4) 6 | -------------------------------------------------------------------------------- /tests/input/basic.py: -------------------------------------------------------------------------------- 1 | from phytest import Alignment, Data, Sequence, Tree 2 | 3 | 4 | def test_length(sequence: Sequence): 5 | sequence.assert_length(length=100) 6 | 7 | 8 | def test_alignment_length(alignment: Alignment): 9 | alignment.assert_length(length=4) 10 | 11 | 12 | def test_tree_number_of_tips(tree: Tree): 13 | tree.assert_number_of_tips(4) 14 | 15 | 16 | def test_data_number_of_rows(data: Data): 17 | data.assert_match('name', 'Sequence_[A-D]') 18 | -------------------------------------------------------------------------------- /tests/input/testfile1.py: -------------------------------------------------------------------------------- 1 | def test_dummy(): 2 | assert 1 == 1 3 | -------------------------------------------------------------------------------- /tests/test_alignments.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | import pytest 4 | 5 | from phytest import Alignment 6 | from phytest.utils import PhytestAssertion, PhytestWarning 7 | 8 | 9 | def test_assert_alignment_width(): 10 | alignment_path = 'examples/data/invalid.fasta' 11 | alignment = Alignment.read(alignment_path, 'fasta') 12 | alignment.assert_width(width=100, min=99, max=101) 13 | with pytest.raises( 14 | PhytestAssertion, 15 | match=re.escape("The width of the alignment is 100.\nThis is not equal to the required width of 99."), 16 | ): 17 | alignment.assert_width(width=99) 18 | with pytest.raises( 19 | PhytestAssertion, 20 | match=re.escape("The width of the alignment is 100.\nThis is less than the minimum width of 101."), 21 | ): 22 | alignment.assert_width(min=101) 23 | with pytest.raises( 24 | PhytestAssertion, 25 | match=re.escape("The width of the alignment is 100.\nThis is greater than the maximum width of 99."), 26 | ): 27 | alignment.assert_width(max=99) 28 | 29 | with pytest.warns( 30 | PhytestWarning, 31 | match=re.escape("The width of the alignment is 100.\nThis is greater than the maximum width of 99."), 32 | ): 33 | alignment.warn_width(max=99) 34 | 35 | 36 | def test_assert_alignment_length(): 37 | alignment_path = 'examples/data/invalid.fasta' 38 | alignment = Alignment.read(alignment_path, 'fasta') 39 | alignment.assert_length(length=3, min=2, max=4) 40 | with pytest.raises( 41 | PhytestAssertion, 42 | match=re.escape("The number of sequences in the alignment is 3.\nThis is less than required number of 1."), 43 | ): 44 | alignment.assert_length(length=1) 45 | with pytest.raises( 46 | PhytestAssertion, 47 | match=re.escape("The number of sequences in the alignment is 3.\nThis is less than the minimum 4."), 48 | ): 49 | alignment.assert_length(min=4) 50 | with pytest.raises( 51 | PhytestAssertion, 52 | match=re.escape("The number of sequences in the alignment is 3.\nThis is greater than the maximum 2."), 53 | ): 54 | alignment.assert_length(max=2) 55 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | from typer.testing import CliRunner 5 | 6 | from phytest.cli import app 7 | 8 | runner = CliRunner() 9 | 10 | 11 | def test_cli_help(): 12 | result = runner.invoke(app, ["--help"]) 13 | assert result.exit_code == 0 14 | assert "TESTFILE Path to test file. [required]" in result.stdout 15 | 16 | 17 | def test_cli_no_input_file(request: pytest.FixtureRequest): 18 | result = runner.invoke(app, [str(request.path.parent / "input/testfile1.py")]) 19 | assert result.exit_code == 0 20 | assert "testfile1.py" in result.stdout 21 | assert "1 passed" in result.stdout 22 | 23 | 24 | def test_cli_basic(request: pytest.FixtureRequest): 25 | result = runner.invoke( 26 | app, 27 | [ 28 | str(request.path.parent / "input/basic.py"), 29 | "-s", 30 | "examples/data/example.fasta", 31 | "-t", 32 | "examples/data/example.tree", 33 | "-d", 34 | "examples/data/example.csv", 35 | ], 36 | ) 37 | assert "7 passed" in result.stdout 38 | 39 | 40 | def test_cli_basic_expression(request: pytest.FixtureRequest): 41 | result = runner.invoke( 42 | app, 43 | [ 44 | str(request.path.parent / "input/basic.py"), 45 | "-s", 46 | "examples/data/example.fasta", 47 | "-t", 48 | "examples/data/example.tree", 49 | "-d", 50 | "examples/data/example.csv", 51 | "-k", 52 | "test_tree_number_of_tips", 53 | ], 54 | ) 55 | assert "1 passed" in result.stdout 56 | assert "6 deselected" in result.stdout 57 | 58 | 59 | def test_cli_parallel(request: pytest.FixtureRequest): 60 | result = runner.invoke( 61 | app, 62 | [ 63 | str(request.path.parent / "input/basic.py"), 64 | "-s", 65 | "examples/data/example.fasta", 66 | "-t", 67 | "examples/data/example.tree", 68 | "-d", 69 | "examples/data/example.csv", 70 | "-n", 71 | "2", 72 | ], 73 | ) 74 | assert "2 workers" in result.stdout 75 | 76 | 77 | def test_cli_report(request: pytest.FixtureRequest): 78 | result = runner.invoke( 79 | app, 80 | [ 81 | str(request.path.parent / "input/basic.py"), 82 | "-s", 83 | "examples/data/example.fasta", 84 | "-t", 85 | "examples/data/example.tree", 86 | "-d", 87 | "examples/data/example.csv", 88 | "-r", 89 | "pytest-report.html", 90 | ], 91 | ) 92 | assert Path("pytest-report.html").exists() 93 | 94 | 95 | def test_cli_report_invalid(request: pytest.FixtureRequest): 96 | result = runner.invoke( 97 | app, 98 | [ 99 | str(request.path.parent / "input/basic.py"), 100 | "-s", 101 | "examples/data/example.fasta", 102 | "-t", 103 | "examples/data/example.tree", 104 | "-d", 105 | "examples/data/example.csv", 106 | "-r", 107 | "pytest-report.txt", 108 | ], 109 | ) 110 | assert isinstance(result.exception, ValueError) 111 | assert str(result.exception) == "Report must use .html extension." 112 | 113 | 114 | def test_cli_missing_sequence_file(request: pytest.FixtureRequest): 115 | result = runner.invoke( 116 | app, 117 | [ 118 | str(request.path.parent / "input/basic.py"), 119 | "-t", 120 | "examples/data/example.tree", 121 | "-d", 122 | "examples/data/example.csv", 123 | "-v", 124 | ], 125 | ) 126 | assert "ValueError: test_length requires a sequence file" in result.stdout 127 | 128 | 129 | def test_cli_invalid_tree_format(request: pytest.FixtureRequest): 130 | result = runner.invoke( 131 | app, 132 | [ 133 | str(request.path.parent / "input/basic.py"), 134 | "-t", 135 | "examples/data/example.tree", 136 | "-s", 137 | "examples/data/example.fasta", 138 | "-d", 139 | "examples/data/example.csv", 140 | "--tree-format", 141 | "excel", 142 | "-v", 143 | ], 144 | ) 145 | assert ( 146 | "Error: Invalid value for '--tree-format': 'excel' is not a valid tree format. Must be one of newick, nexus, phyloxml, nexml." 147 | in result.stdout 148 | ) 149 | 150 | 151 | def test_cli_invalid_data_format(request: pytest.FixtureRequest): 152 | result = runner.invoke( 153 | app, 154 | [ 155 | str(request.path.parent / "input/basic.py"), 156 | "-t", 157 | "examples/data/example.tree", 158 | "-s", 159 | "examples/data/example.fasta", 160 | "-d", 161 | "examples/data/example.csv", 162 | "--data-format", 163 | "pdf", 164 | "-v", 165 | ], 166 | ) 167 | assert ( 168 | "Error: Invalid value for '--data-format': 'pdf' is not a valid data format. Must be one of csv, tsv, excel" 169 | in result.stdout 170 | ) 171 | 172 | 173 | def test_cli_invalid_sequence_format(request: pytest.FixtureRequest): 174 | result = runner.invoke( 175 | app, 176 | [ 177 | str(request.path.parent / "input/basic.py"), 178 | "-t", 179 | "examples/data/example.tree", 180 | "-s", 181 | "examples/data/example.fasta", 182 | "-d", 183 | "examples/data/example.csv", 184 | "--sequence-format", 185 | "pdf", 186 | "-v", 187 | ], 188 | ) 189 | assert "Error: Invalid value for '--sequence-format': 'pdf' is not" in result.stdout 190 | 191 | 192 | def test_cli_invalid_data(request: pytest.FixtureRequest): 193 | result = runner.invoke( 194 | app, 195 | [ 196 | str(request.path.parent / "input/basic.py"), 197 | "-t", 198 | "phytest/bio/tree.py", # should not be read 199 | "-d", 200 | "phytest/bio/data.py", 201 | "-v", 202 | ], 203 | ) 204 | assert "ValueError: test_length requires a sequence file" in result.stdout 205 | 206 | 207 | def test_cli_missing_tree_file(request: pytest.FixtureRequest): 208 | result = runner.invoke( 209 | app, 210 | [ 211 | str(request.path.parent / "input/basic.py"), 212 | "-s", 213 | "examples/data/example.fasta", 214 | "-d", 215 | "examples/data/example.csv", 216 | "-v", 217 | ], 218 | ) 219 | assert "ValueError: test_tree_number_of_tips requires a tree file" in result.stdout 220 | 221 | 222 | def test_cli_missing_data_file(request: pytest.FixtureRequest): 223 | result = runner.invoke( 224 | app, 225 | [ 226 | str(request.path.parent / "input/basic.py"), 227 | "-s", 228 | "examples/data/example.fasta", 229 | "-t", 230 | "examples/data/example.tree", 231 | "-v", 232 | ], 233 | ) 234 | assert "ValueError: test_data_number_of_rows requires a data file" in result.stdout 235 | 236 | 237 | def test_cli_missing_alignment_file(request: pytest.FixtureRequest): 238 | result = runner.invoke( 239 | app, 240 | [ 241 | str(request.path.parent / "input/alignment.py"), 242 | "-t", 243 | "examples/data/example.tree", 244 | "-d", 245 | "examples/data/example.csv", 246 | "-v", 247 | ], 248 | ) 249 | assert "ValueError: test_alignment_length requires an alignment file" in result.stdout 250 | 251 | 252 | def test_cli_alignment(request: pytest.FixtureRequest): 253 | result = runner.invoke( 254 | app, 255 | [ 256 | str(request.path.parent / "input/alignment.py"), 257 | "-s", 258 | "examples/data/example.fasta", 259 | "-v", 260 | ], 261 | ) 262 | assert "1 passed" in result.stdout 263 | -------------------------------------------------------------------------------- /tests/test_data.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | import pytest 4 | 5 | from phytest import Data 6 | from phytest.utils import PhytestAssertion, PhytestWarning 7 | 8 | 9 | def test_data_read(): 10 | data_path = 'examples/data/example.csv' 11 | data = Data.read(data_path, 'csv') 12 | data_path = 'examples/data/example.tsv' 13 | data = Data.read(data_path, 'tsv') 14 | data_path = 'examples/data/example.xlsx' 15 | data = Data.read(data_path, 'excel') 16 | 17 | 18 | def test_data_read_invalid(): 19 | data_path = 'examples/data/example.csv' 20 | with pytest.raises(ValueError, match="Data format must be one of csv, tsv, excel"): 21 | Data.read(data_path, 'txt') 22 | 23 | 24 | def test_assert_data_contains(): 25 | data_path = 'examples/data/example.csv' 26 | data = Data.read(data_path, 'csv') 27 | data.assert_contains('name', 'Sequence_A') 28 | with pytest.raises( 29 | PhytestAssertion, 30 | match=re.escape( 31 | "The values of column 'name' are '['Sequence_A' 'Sequence_B' 'Sequence_C' 'Sequence_D']'.\nThe column 'name' does not contain 'Sequence_X'." 32 | ), 33 | ): 34 | data.assert_contains('name', 'Sequence_X') 35 | 36 | 37 | def test_assert_data_match(): 38 | data_path = 'examples/data/example.csv' 39 | data = Data.read(data_path, 'csv') 40 | data.assert_match('name', 'Sequence_.') 41 | with pytest.raises( 42 | PhytestAssertion, 43 | match=re.escape( 44 | "The values of column 'name' are '['Sequence_A' 'Sequence_B' 'Sequence_C' 'Sequence_D']'.\nThe row(s) '[3]' of the column 'name' do not match the pattern 'Sequence_[A-C]'." 45 | ), 46 | ): 47 | data.assert_match('name', 'Sequence_[A-C]') 48 | 49 | 50 | def test_assert_data_allowed_columns(): 51 | data_path = 'examples/data/example.csv' 52 | data = Data.read(data_path, 'csv') 53 | data.assert_columns(['name', 'date', 'sequence']) 54 | with pytest.raises( 55 | PhytestAssertion, 56 | match=re.escape("The columns '['date']' are not in the list of allowed columns '['name', 'sequence']'."), 57 | ): 58 | data.assert_columns(['name', 'sequence']) 59 | with pytest.raises( 60 | PhytestAssertion, 61 | match=re.escape("The column names do not exactly match the list of allowed columns"), 62 | ): 63 | data.assert_columns(['name', 'date', 'sequence'], exact=True) 64 | 65 | 66 | def test_assert_data_allowed_values(): 67 | data_path = 'examples/data/example.csv' 68 | data = Data.read(data_path, 'csv') 69 | data.assert_values('name', ['Sequence_A', 'Sequence_B', 'Sequence_C', 'Sequence_D', 'Sequence_E']) 70 | with pytest.raises( 71 | PhytestAssertion, 72 | match=re.escape( 73 | "The row(s) '[3]' of the column 'name' are not in the list of allowed values '['Sequence_A', 'Sequence_B', 'Sequence_C']" 74 | ), 75 | ): 76 | data.assert_values('name', ['Sequence_A', 'Sequence_B', 'Sequence_C']) 77 | 78 | # exact 79 | with pytest.raises( 80 | PhytestAssertion, 81 | match=re.escape("The values column 'name' do not exactly match the allowed values"), 82 | ): 83 | data.assert_values('name', ['Sequence_A', 'Sequence_B', 'Sequence_C', 'Sequence_D', 'Sequence_E'], exact=True) 84 | 85 | # allow nan 86 | data.replace('Sequence_D', float('nan'), inplace=True) 87 | data.assert_values('name', ['Sequence_A', 'Sequence_B', 'Sequence_C'], allow_nan=True) 88 | with pytest.raises( 89 | PhytestAssertion, 90 | match=re.escape( 91 | "The row(s) '[3]' of the column 'name' are not in the list of allowed values '['Sequence_A', 'Sequence_B', 'Sequence_C']'." 92 | ), 93 | ): 94 | data.assert_values('name', ['Sequence_A', 'Sequence_B', 'Sequence_C']) 95 | 96 | 97 | def test_assert_range(): 98 | data_path = 'examples/data/example.csv' 99 | data = Data.read(data_path, 'csv') 100 | data['value'] = [1, 2, 3, 4] 101 | data.assert_range('value', min=1, max=5) 102 | with pytest.raises( 103 | PhytestAssertion, 104 | match=re.escape("The maximum value of column 'value' is '4', which is greater than '3'."), 105 | ): 106 | data.assert_range('value', max=3) 107 | with pytest.raises( 108 | PhytestAssertion, 109 | match=re.escape("The minimum value of column 'value' is '1', which is less than '2'."), 110 | ): 111 | data.assert_range('value', min=2) 112 | -------------------------------------------------------------------------------- /tests/test_main.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import patch 2 | 3 | import pytest 4 | 5 | from phytest import main 6 | 7 | 8 | def test_main_basic(request: pytest.FixtureRequest): 9 | result = main( 10 | str(request.path.parent / "input/basic.py"), 11 | sequence="examples/data/example.fasta", 12 | tree="examples/data/example.tree", 13 | data="examples/data/example.csv", 14 | ) 15 | assert result.value == 0 16 | 17 | 18 | def test_tree_not_found(request: pytest.FixtureRequest, capsys): 19 | result = main( 20 | str(request.path.parent / "input/basic.py"), 21 | sequence="examples/data/example.fasta", 22 | tree="examples/data/NOTFOUND.tree", 23 | data="examples/data/example.csv", 24 | ) 25 | captured = capsys.readouterr() 26 | assert "FileNotFoundError: Unable to locate requested t" in captured.out 27 | assert result.value != 0 28 | 29 | 30 | def test_data_not_found(request: pytest.FixtureRequest, capsys): 31 | result = main( 32 | str(request.path.parent / "input/basic.py"), 33 | sequence="examples/data/example.fasta", 34 | tree="examples/data/example.tree", 35 | data="examples/data/NOTFOUND.csv", 36 | ) 37 | captured = capsys.readouterr() 38 | assert "FileNotFoundError: Unable to locate requested d" in captured.out 39 | assert result.value != 0 40 | 41 | 42 | def test_sequence_not_found(request: pytest.FixtureRequest, capsys): 43 | result = main( 44 | str(request.path.parent / "input/basic.py"), 45 | sequence="examples/data/NOTFOUND.fasta", 46 | tree="examples/data/example.tree", 47 | data="examples/data/example.csv", 48 | ) 49 | captured = capsys.readouterr() 50 | assert "FileNotFoundError: Unable to locate requested s" in captured.out 51 | assert result.value != 0 52 | 53 | 54 | def test_alignment_not_found(capsys): 55 | result = main( 56 | "examples/example.py", 57 | sequence="examples/data/NOTFOUND.fasta", 58 | ) 59 | captured = capsys.readouterr() 60 | assert "FileNotFoundError: Unable to locate requested al" in captured.out 61 | assert result.value != 0 62 | 63 | 64 | @patch.object(pytest, 'main') 65 | def test_auto_testfile(pytest_main): 66 | main() 67 | pytest_main.assert_called_once() 68 | assert pytest_main.mock_calls[0].args[0][0].name == "test_main.py" 69 | -------------------------------------------------------------------------------- /tests/test_self_contained.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | 4 | def test_self_contained(): 5 | testfile = 'examples/self_contained.py' 6 | p = subprocess.Popen(f"python {testfile}", stdout=subprocess.PIPE, shell=True) 7 | (output, err) = p.communicate() 8 | status = p.wait() 9 | assert status == 0 10 | assert '52 passed' in str(output) 11 | -------------------------------------------------------------------------------- /tests/test_sequences.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | import pytest 4 | from Bio.Seq import Seq 5 | 6 | from phytest import Sequence 7 | from phytest.utils import PhytestAssertion, PhytestWarning 8 | 9 | 10 | def test_assert_valid_alphabet(): 11 | sequence = Sequence( 12 | Seq("ACGTACGTACGT"), 13 | id="DNAID", 14 | name="TEST", 15 | description="Test dna sequence", 16 | ) 17 | protein = Sequence( 18 | Seq("MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF"), 19 | id="PROTEINID", 20 | name="TEST Protein", 21 | description="Test protein sequence", 22 | ) 23 | sequence.assert_valid_alphabet() 24 | with pytest.raises( 25 | PhytestAssertion, 26 | match="Invalid pattern found in 'DNAID'.\nCharacter 'G' at position 3 found which is not in alphabet 'ABCDE'.", 27 | ): 28 | sequence.assert_valid_alphabet(alphabet="ABCDE") 29 | 30 | protein.assert_valid_alphabet(alphabet="ACDEFGHIKLMNPQRSTVWYXBZJ") 31 | with pytest.raises( 32 | PhytestAssertion, 33 | match="Invalid pattern found in 'PROTEINID'.\nCharacter 'M' at position 1 found which is not in alphabet 'ATCGN-'.", 34 | ): 35 | protein.assert_valid_alphabet() 36 | 37 | 38 | def test_assert_length(): 39 | sequence = Sequence( 40 | Seq("A" * 100), 41 | id="DNAID", 42 | name="TEST", 43 | description="Test dna sequence", 44 | ) 45 | sequence.assert_length(length=100, min=99, max=101) 46 | 47 | with pytest.raises( 48 | PhytestAssertion, match=re.escape("Sequence length of 'DNAID' (100) is not equal to the required length of 1.") 49 | ): 50 | sequence.assert_length(length=1) 51 | 52 | with pytest.raises( 53 | PhytestAssertion, match=re.escape("Sequence length of 'DNAID' (100) is less than the minimum 101.") 54 | ): 55 | sequence.assert_length(min=101) 56 | 57 | with pytest.raises( 58 | PhytestAssertion, match=re.escape("Sequence length of 'DNAID' (100) is greater than the maximum 99.") 59 | ): 60 | sequence.assert_length(max=99) 61 | 62 | with pytest.warns( 63 | PhytestWarning, match=re.escape("Sequence length of 'DNAID' (100) is greater than the maximum 99.") 64 | ): 65 | sequence.warn_length(max=99) 66 | 67 | 68 | def test_assert_count(): 69 | sequence = Sequence( 70 | Seq("ATG" * 100), 71 | id="DNAID", 72 | name="TEST", 73 | description="Test dna sequence", 74 | ) 75 | sequence.assert_count(pattern='A', count=100, min=99, max=101) 76 | with pytest.raises( 77 | PhytestAssertion, 78 | match=re.escape( 79 | "Sequence 'DNAID' matches pattern 'A' 100 time(s).\nThis is not equal to the required number of 1." 80 | ), 81 | ): 82 | sequence.assert_count(pattern='A', count=1) 83 | with pytest.raises( 84 | PhytestAssertion, 85 | match=re.escape("Sequence 'DNAID' matches pattern 'A' 100 time(s).\nThis is less than the minimum 101."), 86 | ): 87 | sequence.assert_count(pattern='A', min=101) 88 | with pytest.raises( 89 | PhytestAssertion, 90 | match=re.escape("Sequence 'DNAID' matches pattern 'A' 100 time(s).\nThis is greater than the maximum 99."), 91 | ): 92 | sequence.assert_count(pattern='A', max=99) 93 | 94 | 95 | def test_assert_percent(): 96 | sequence = Sequence( 97 | Seq("ATGC" * 100), 98 | id="DNAID", 99 | name="TEST", 100 | description="Test dna sequence", 101 | ) 102 | sequence.assert_percent(nucleotide='A', percent=25, min=24.9, max=25.1) 103 | with pytest.raises( 104 | PhytestAssertion, 105 | match=re.escape( 106 | "Sequence 'DNAID' contains 25.0 percent 'A'.\nThis is not equal to the required percentage of 24." 107 | ), 108 | ): 109 | sequence.assert_percent(nucleotide='A', percent=24) 110 | with pytest.raises( 111 | PhytestAssertion, 112 | match=re.escape("Sequence 'DNAID' contains 25.0 percent 'A'.\nThis is less than the minimum 25.1."), 113 | ): 114 | sequence.assert_percent(nucleotide='A', min=25.1) 115 | with pytest.raises( 116 | PhytestAssertion, 117 | match=re.escape("Sequence 'DNAID' contains 25.0 percent 'A'.\nThis is greater than the maximum 24.9."), 118 | ): 119 | sequence.assert_percent(nucleotide='A', max=24.9) 120 | 121 | sequence.assert_percent(nucleotide='N', percent=0.0) 122 | 123 | 124 | def test_assert_percent_zero_length(): 125 | sequence = Sequence( 126 | Seq(""), 127 | id="empty", 128 | name="empty", 129 | description="empty dna sequence", 130 | ) 131 | sequence.assert_percent(nucleotide='A', percent=0.0) 132 | 133 | 134 | def test_assert_percent_errors(): 135 | sequence = Sequence( 136 | Seq("ATGC" * 100), 137 | id="DNAID", 138 | name="TEST", 139 | description="Test dna sequence", 140 | ) 141 | with pytest.raises( 142 | ValueError, 143 | match=re.escape("The length of the requested nucleotide"), 144 | ): 145 | sequence.assert_percent(nucleotide='AA', percent=25, min=24.9, max=25.1) 146 | 147 | with pytest.raises( 148 | ValueError, 149 | match=re.escape("Nucleotide must be str or list and cannot be of type"), 150 | ): 151 | sequence.assert_percent(nucleotide=10, percent=25, min=24.9, max=25.1) 152 | 153 | 154 | def test_assert_percent_N(): 155 | sequence = Sequence( 156 | Seq("ATNN" * 100), 157 | id="DNAID", 158 | name="TEST", 159 | description="Test dna sequence", 160 | ) 161 | sequence.assert_percent_N(percent=50, min=49.9, max=50.1) 162 | with pytest.raises( 163 | PhytestAssertion, 164 | match=re.escape( 165 | "Sequence 'DNAID' contains 50.0 percent 'N, n'.\nThis is not equal to the required percentage of 49." 166 | ), 167 | ): 168 | sequence.assert_percent_N(percent=49) 169 | with pytest.raises( 170 | PhytestAssertion, 171 | match=re.escape("Sequence 'DNAID' contains 50.0 percent 'N, n'.\nThis is less than the minimum 50.1."), 172 | ): 173 | sequence.assert_percent_N(min=50.1) 174 | with pytest.raises( 175 | PhytestAssertion, 176 | match=re.escape("Sequence 'DNAID' contains 50.0 percent 'N, n'.\nThis is greater than the maximum 49.9."), 177 | ): 178 | sequence.assert_percent_N(max=49.9) 179 | 180 | 181 | def test_assert_percent_gaps(): 182 | sequence = Sequence( 183 | Seq("AT--" * 100), 184 | id="DNAID", 185 | name="TEST", 186 | description="Test dna sequence", 187 | ) 188 | sequence.assert_percent_gaps(percent=50, min=49.9, max=50.1) 189 | with pytest.raises( 190 | PhytestAssertion, 191 | match=re.escape( 192 | "Sequence 'DNAID' contains 50.0 percent '-'.\nThis is not equal to the required percentage of 49." 193 | ), 194 | ): 195 | sequence.assert_percent_gaps(percent=49) 196 | with pytest.raises( 197 | PhytestAssertion, 198 | match=re.escape("Sequence 'DNAID' contains 50.0 percent '-'.\nThis is less than the minimum 50.1."), 199 | ): 200 | sequence.assert_percent_gaps(min=50.1) 201 | with pytest.raises( 202 | PhytestAssertion, 203 | match=re.escape("Sequence 'DNAID' contains 50.0 percent '-'.\nThis is greater than the maximum 49.9."), 204 | ): 205 | sequence.assert_percent_gaps(max=49.9) 206 | 207 | 208 | def test_assert_percent_GC(): 209 | sequence = Sequence( 210 | Seq("ATGC" * 100), 211 | id="DNAID", 212 | name="TEST", 213 | description="Test dna sequence", 214 | ) 215 | sequence.assert_percent_GC(percent=50, min=49.9, max=50.1) 216 | with pytest.raises( 217 | PhytestAssertion, 218 | match=re.escape( 219 | "Sequence 'DNAID' contains 50.0 percent 'G, C, g, c, S, s'.\nThis is not equal to the required percentage of 49." 220 | ), 221 | ): 222 | sequence.assert_percent_GC(percent=49) 223 | with pytest.raises( 224 | PhytestAssertion, 225 | match=re.escape( 226 | "Sequence 'DNAID' contains 50.0 percent 'G, C, g, c, S, s'.\nThis is less than the minimum 50.1." 227 | ), 228 | ): 229 | sequence.assert_percent_GC(min=50.1) 230 | with pytest.raises( 231 | PhytestAssertion, 232 | match=re.escape( 233 | "Sequence 'DNAID' contains 50.0 percent 'G, C, g, c, S, s'.\nThis is greater than the maximum 49.9." 234 | ), 235 | ): 236 | sequence.assert_percent_GC(max=49.9) 237 | 238 | 239 | def test_assert_count_Ns(): 240 | sequence = Sequence( 241 | Seq("ATGN" * 100), 242 | id="DNAID", 243 | name="TEST", 244 | description="Test dna sequence", 245 | ) 246 | sequence.assert_count_Ns(count=100, min=99, max=101) 247 | with pytest.raises( 248 | PhytestAssertion, 249 | match=re.escape( 250 | "Sequence 'DNAID' matches pattern 'N' 100 time(s).\nThis is not equal to the required number of 1." 251 | ), 252 | ): 253 | sequence.assert_count_Ns(count=1) 254 | with pytest.raises( 255 | PhytestAssertion, 256 | match=re.escape("Sequence 'DNAID' matches pattern 'N' 100 time(s).\nThis is less than the minimum 101."), 257 | ): 258 | sequence.assert_count_Ns(min=101) 259 | with pytest.raises( 260 | PhytestAssertion, 261 | match=re.escape("Sequence 'DNAID' matches pattern 'N' 100 time(s).\nThis is greater than the maximum 99."), 262 | ): 263 | sequence.assert_count_Ns(max=99) 264 | 265 | 266 | def test_assert_count_gaps(): 267 | sequence = Sequence( 268 | Seq("ATG-" * 100), 269 | id="DNAID", 270 | name="TEST", 271 | description="Test dna sequence", 272 | ) 273 | sequence.assert_count_gaps(count=100, min=99, max=101) 274 | with pytest.raises( 275 | PhytestAssertion, 276 | match=re.escape( 277 | "Sequence 'DNAID' matches pattern '-' 100 time(s).\nThis is not equal to the required number of 1." 278 | ), 279 | ): 280 | sequence.assert_count_gaps(count=1) 281 | with pytest.raises( 282 | PhytestAssertion, 283 | match=re.escape("Sequence 'DNAID' matches pattern '-' 100 time(s).\nThis is less than the minimum 101."), 284 | ): 285 | sequence.assert_count_gaps(min=101) 286 | with pytest.raises( 287 | PhytestAssertion, 288 | match=re.escape("Sequence 'DNAID' matches pattern '-' 100 time(s).\nThis is greater than the maximum 99."), 289 | ): 290 | sequence.assert_count_gaps(max=99) 291 | 292 | 293 | def test_assert_sequence_longest_stretch(): 294 | sequence = Sequence( 295 | Seq("A" * 10 + "-" * 3 + "N" * 10), 296 | id="DNAID", 297 | name="TEST", 298 | description="Test dna sequence", 299 | ) 300 | sequence.assert_longest_stretch(pattern='A', count=10, min=9, max=11) 301 | with pytest.raises( 302 | PhytestAssertion, 303 | match=re.escape( 304 | "The longest stretch of pattern 'A' in sequence 'DNAID' is 10.\nThis is not equal to the required number of 1" 305 | ), 306 | ): 307 | sequence.assert_longest_stretch(pattern='A', count=1) 308 | with pytest.raises( 309 | PhytestAssertion, 310 | match=re.escape( 311 | "The longest stretch of pattern 'A' in sequence 'DNAID' is 10.\nThis is less than the minimum 11." 312 | ), 313 | ): 314 | sequence.assert_longest_stretch(pattern='A', min=11) 315 | with pytest.raises( 316 | PhytestAssertion, 317 | match=re.escape( 318 | "The longest stretch of pattern 'A' in sequence 'DNAID' is 10.\nThis is greater than the maximum 9." 319 | ), 320 | ): 321 | sequence.assert_longest_stretch(pattern='A', max=9) 322 | 323 | 324 | def test_assert_sequence_longest_Ns(): 325 | sequence = Sequence( 326 | Seq("A" * 10 + "-" * 3 + "N" * 10), 327 | id="DNAID", 328 | name="TEST", 329 | description="Test dna sequence", 330 | ) 331 | sequence.assert_longest_stretch_Ns(count=10, min=9, max=11) 332 | with pytest.raises( 333 | PhytestAssertion, 334 | match=re.escape( 335 | "The longest stretch of pattern 'N' in sequence 'DNAID' is 10.\nThis is not equal to the required number of 1." 336 | ), 337 | ): 338 | sequence.assert_longest_stretch_Ns(count=1) 339 | with pytest.raises( 340 | PhytestAssertion, 341 | match=re.escape( 342 | "The longest stretch of pattern 'N' in sequence 'DNAID' is 10.\nThis is less than the minimum 11." 343 | ), 344 | ): 345 | sequence.assert_longest_stretch_Ns(min=11) 346 | with pytest.raises( 347 | PhytestAssertion, 348 | match=re.escape( 349 | "The longest stretch of pattern 'N' in sequence 'DNAID' is 10.\nThis is greater than the maximum 9." 350 | ), 351 | ): 352 | sequence.assert_longest_stretch_Ns(max=9) 353 | 354 | 355 | def test_assert_sequence_longest_gaps(): 356 | sequence = Sequence( 357 | Seq("A" * 10 + "-" * 3 + "N" * 10), 358 | id="DNAID", 359 | name="TEST", 360 | description="Test dna sequence", 361 | ) 362 | sequence.assert_longest_stretch_gaps(count=3, min=2, max=4) 363 | with pytest.raises( 364 | PhytestAssertion, 365 | match=re.escape( 366 | "The longest stretch of pattern '-' in sequence 'DNAID' is 3.\nThis is not equal to the required number of 1." 367 | ), 368 | ): 369 | sequence.assert_longest_stretch_gaps(count=1) 370 | with pytest.raises( 371 | PhytestAssertion, 372 | match=re.escape( 373 | "The longest stretch of pattern '-' in sequence 'DNAID' is 3.\nThis is less than the minimum 4." 374 | ), 375 | ): 376 | sequence.assert_longest_stretch_gaps(min=4) 377 | with pytest.raises( 378 | PhytestAssertion, 379 | match=re.escape( 380 | "The longest stretch of pattern '-' in sequence 'DNAID' is 3.\nThis is greater than the maximum 2." 381 | ), 382 | ): 383 | sequence.assert_longest_stretch_gaps(max=2) 384 | 385 | 386 | def test_assert_sequence_startswith(): 387 | sequence = Sequence( 388 | Seq("ATG" + "-" * 3 + "UGA"), 389 | id="DNAID", 390 | name="TEST", 391 | description="Test dna sequence", 392 | ) 393 | sequence.assert_startswith(pattern='ATG') 394 | with pytest.raises(PhytestAssertion, match=re.escape("Sequence 'DNAID' does not start with 'UGA'.")): 395 | sequence.assert_startswith(pattern='UGA') 396 | 397 | 398 | def test_assert_sequence_endswith(): 399 | sequence = Sequence( 400 | Seq("ATG" + "-" * 3 + "UGA"), 401 | id="DNAID", 402 | name="TEST", 403 | description="Test dna sequence", 404 | ) 405 | sequence.assert_endswith(pattern='UGA') 406 | with pytest.raises(PhytestAssertion, match=re.escape("Sequence 'DNAID' does not end with 'ATG'.")): 407 | sequence.assert_endswith(pattern='ATG') 408 | 409 | 410 | def test_assert_sequence_contains(): 411 | sequence = Sequence( 412 | Seq("ATG" + "TGACGT" + "UGA"), 413 | id="DNAID", 414 | name="TEST", 415 | description="Test dna sequence", 416 | ) 417 | sequence.assert_contains(pattern='TGACGT') 418 | with pytest.raises( 419 | PhytestAssertion, 420 | match=re.escape("Sequence 'DNAID' matches pattern 'CAGCTG' 0 time(s).\nThis is less than the minimum 1."), 421 | ): 422 | sequence.assert_contains(pattern='CAGCTG') 423 | -------------------------------------------------------------------------------- /tests/test_trees.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from datetime import datetime 3 | from pathlib import Path 4 | from tempfile import NamedTemporaryFile 5 | 6 | import pytest 7 | 8 | from phytest import Tree 9 | from phytest.utils import PhytestAssertion, PhytestWarning, default_date_patterns 10 | 11 | 12 | def test_tips_property(): 13 | tree = Tree.read_str("(A:0.1,B:0.2);") 14 | assert [t.name for t in tree.tips] == ['A', 'B'] 15 | 16 | 17 | def test_assert_tree_number_of_tips(): 18 | tree = Tree.read_str( 19 | "(Bovine:0.69395,(Hylobates:0.36079,(Pongo:0.33636,(G._Gorilla:0.17147, (P._paniscus:0.19268,H._sapiens:0.11927):0.08386):0.06124):0.15057):0.54939, Rodent:1.21460);" 20 | ) 21 | tree.assert_number_of_tips(tips=7, min=6, max=8) 22 | with pytest.raises(AssertionError): 23 | tree.assert_number_of_tips(tips=1) 24 | with pytest.raises(AssertionError): 25 | tree.assert_number_of_tips(min=8) 26 | with pytest.raises(AssertionError): 27 | tree.assert_number_of_tips(max=6) 28 | 29 | 30 | def test_assert_unique_tips(): 31 | tree = Tree.read_str( 32 | "(Bovine:0.69395,(Hylobates:0.36079,(Pongo:0.33636,(G._Gorilla:0.17147, (P._paniscus:0.19268,H._sapiens:0.11927):0.08386):0.06124):0.15057):0.54939, Rodent:1.21460);" 33 | ) 34 | tree.assert_unique_tips() 35 | tree = Tree.read_str( 36 | "(Bovine:0.69395,(Hylobates:0.36079,(Pongo:0.33636,(G._Gorilla:0.17147, (P._paniscus:0.19268,H._sapiens:0.11927):0.08386):0.06124):0.15057):0.54939, Bovine:1.21460);" 37 | ) 38 | with pytest.raises(AssertionError): 39 | tree.assert_unique_tips() 40 | 41 | 42 | def test_assert_tree_is_rooted(): 43 | tree = Tree.read_str("((A:0.1,B:0.2):0.3,(C:0.3,D:0.4):0.5);") 44 | with pytest.raises(AssertionError): 45 | tree.assert_is_rooted() 46 | tree.root_at_midpoint() 47 | tree.assert_is_rooted() 48 | 49 | 50 | def test_assert_tree_is_bifurcating(): 51 | tree = Tree.read_str( 52 | "(Bovine:0.69395,(Hylobates:0.36079,(Pongo:0.33636,(G._Gorilla:0.17147, (P._paniscus:0.19268,H._sapiens:0.11927):0.08386):0.06124):0.15057):0.54939, Rodent:1.21460);" 53 | ) 54 | tree.assert_is_bifurcating() 55 | 56 | 57 | def test_assert_tree_is_monophyletic(): 58 | tree = Tree.read_str( 59 | "(Bovine:0.69395,(Hylobates:0.36079,(Pongo:0.33636,(G._Gorilla:0.17147, (P._paniscus:0.19268,H._sapiens:0.11927):0.08386):0.06124):0.15057):0.54939, Rodent:1.21460);" 60 | ) 61 | print(tree.root) 62 | tips = [tip for tip in tree.get_terminals() if tip.name in ("P._paniscus", "H._sapiens")] 63 | tree.assert_is_monophyletic(tips) 64 | with pytest.raises(AssertionError): 65 | tips = [tip for tip in tree.get_terminals() if tip.name in ("Pongo", "H._sapiens")] 66 | tree.assert_is_monophyletic(tips) 67 | 68 | 69 | def test_assert_branch_lengths(): 70 | tree = Tree.read_str( 71 | "(Bovine:1,(Hylobates:1,(Pongo:1,(G._Gorilla:1, (P._paniscus:1,H._sapiens:1):1):1):1):1, Rodent:1);" 72 | ) 73 | tree.assert_branch_lengths(min=0, max=1) 74 | with pytest.raises(AssertionError): 75 | tree.assert_branch_lengths(min=2) 76 | with pytest.raises(AssertionError): 77 | tree.assert_branch_lengths(max=0) 78 | 79 | 80 | def test_assert_no_negative_branch_lengths(): 81 | tree = Tree.read_str( 82 | "(Bovine:1,(Hylobates:1,(Pongo:1,(G._Gorilla:1, (P._paniscus:1,H._sapiens:1):1):1):1):1, Rodent:1);" 83 | ) 84 | tree.assert_no_negatives() 85 | tree = Tree.read_str( 86 | "(Bovine:1,(Hylobates:1,(Pongo:1,(G._Gorilla:1, (P._paniscus:1,H._sapiens:1):-1):1):1):1, Rodent:1);" 87 | ) 88 | with pytest.raises(AssertionError): 89 | tree.assert_no_negatives() 90 | 91 | 92 | def test_assert_terminal_branch_lengths(): 93 | tree = Tree.read_str( 94 | "(Bovine:1,(Hylobates:1,(Pongo:1,(G._Gorilla:1, (P._paniscus:1,H._sapiens:1):1):5):1):1, Rodent:1);" 95 | ) 96 | tree.assert_terminal_branch_lengths(min=0, max=1) 97 | with pytest.raises(AssertionError): 98 | tree.assert_terminal_branch_lengths(min=2) 99 | with pytest.raises(AssertionError): 100 | tree.assert_terminal_branch_lengths(max=0) 101 | 102 | 103 | def test_assert_internal_branch_lengths(): 104 | tree = Tree.read_str( 105 | "(Bovine:4,(Hylobates:1,(Pongo:1,(G._Gorilla:1, (P._paniscus:1,H._sapiens:1):2):2):2):2, Rodent:1);" 106 | ) 107 | tree.assert_internal_branch_lengths(min=0, max=2) 108 | with pytest.raises(AssertionError): 109 | tree.assert_internal_branch_lengths(min=3) 110 | with pytest.raises(AssertionError): 111 | tree.assert_internal_branch_lengths(max=1) 112 | 113 | 114 | def test_assert_tree_total_branch_length(): 115 | tree = Tree.read_str( 116 | "(Bovine:1,(Hylobates:1,(Pongo:1,(G._Gorilla:1, (P._paniscus:1,H._sapiens:1):1):1):1):1, Rodent:1);" 117 | ) 118 | tree.assert_total_branch_length(length=11, min=10, max=12) 119 | with pytest.raises(AssertionError): 120 | tree.assert_total_branch_length(length=1) 121 | with pytest.raises(AssertionError): 122 | tree.assert_total_branch_length(min=12) 123 | with pytest.raises(AssertionError): 124 | tree.assert_total_branch_length(max=10) 125 | 126 | 127 | def test_assert_tip_regex(): 128 | tree = Tree.read_str("(A_1993.3, (B_1998-07-02,C_1992-12-31));") 129 | patterns = default_date_patterns() 130 | 131 | # Since the tree uses both conventions, just asserting a single pattern should fail 132 | for pattern in patterns: 133 | with pytest.raises(AssertionError): 134 | tree.assert_tip_regex(pattern) 135 | 136 | # Giving both patterns should pass 137 | tree.assert_tip_regex(patterns) 138 | 139 | 140 | def test_assert_tip_names(): 141 | tree = Tree.read_str( 142 | "(Bovine:1,(Hylobates:1,(Pongo:1,(G._Gorilla:1, (P._paniscus:1,H._sapiens:1):1):1):1):1, Rodent:1);" 143 | ) 144 | tree.assert_tip_names(names=['Bovine', 'Hylobates', 'Pongo', 'G._Gorilla', 'P._paniscus', 'H._sapiens', 'Rodent']) 145 | with pytest.raises(AssertionError): 146 | tree.assert_tip_names( 147 | names=['Bovine', 'Bovine', 'Hylobates', 'Pongo', 'G._Gorilla', 'P._paniscus', 'H._sapiens', 'Rodent'] 148 | ) 149 | with pytest.raises(AssertionError): 150 | tree.assert_tip_names( 151 | names=['Different', 'Hylobates', 'Pongo', 'G._Gorilla', 'P._paniscus', 'H._sapiens', 'Rodent'] 152 | ) 153 | 154 | 155 | def test_parse_tip_dates(): 156 | tree = Tree.read_str("(A_1993.3, (B_1998-07-02,C_1992-10-01));") 157 | dates = tree.parse_tip_dates() 158 | assert dates == { 159 | 'A_1993.3': datetime(1993, 4, 20, 0, 0), 160 | 'B_1998-07-02': datetime(1998, 7, 2, 0, 0), 161 | 'C_1992-10-01': datetime(1992, 10, 1, 0, 0), 162 | } 163 | dates = tree.parse_tip_dates(decimal_year=True) 164 | assert dates == { 165 | 'A_1993.3': 1993.3, 166 | 'B_1998-07-02': 1998.5, 167 | 'C_1992-10-01': 1992.75, 168 | } 169 | # Setting pattern explicitly 170 | dates = tree.parse_tip_dates(patterns=r"\d{4}\.?\d*$", decimal_year=True) 171 | assert dates == { 172 | 'A_1993.3': 1993.3, 173 | } 174 | 175 | 176 | def test_plot_root_to_tip(): 177 | tree = Tree.read("examples/data/ice_viruses.fasta.treefile", tree_format="newick") 178 | with NamedTemporaryFile(suffix=".svg") as file: 179 | path = Path(file.name) 180 | tree.plot_root_to_tip(path, covariation=True, sequence_length=463) 181 | assert path.exists() 182 | assert path.stat().st_size > 30_000 183 | svg = path.read_text() 184 | assert "!DOCTYPE svg PUBLIC" in svg 185 | 186 | 187 | def test_assert_root_to_tip_min_r_squared(): 188 | tree = Tree.read("examples/data/ice_viruses.fasta.treefile", tree_format="newick") 189 | tree.assert_root_to_tip(min_r_squared=0.35) 190 | with pytest.raises(PhytestAssertion): 191 | tree.assert_root_to_tip(min_r_squared=0.40) 192 | 193 | 194 | def test_assert_root_to_tip_rate(): 195 | tree = Tree.read("examples/data/ice_viruses.fasta.treefile", tree_format="newick") 196 | tree.assert_root_to_tip(min_rate=1.5e-03, max_rate=1.6e-03) 197 | with pytest.raises(PhytestAssertion): 198 | tree.assert_root_to_tip(max_rate=1.5e-03) 199 | with pytest.raises(PhytestAssertion): 200 | tree.assert_root_to_tip(min_rate=1.6e-03) 201 | 202 | 203 | def test_assert_root_to_tip_root_date(): 204 | tree = Tree.read("examples/data/ice_viruses.fasta.treefile", tree_format="newick") 205 | 206 | tree.assert_root_to_tip(min_root_date=1772.0, max_root_date=1773.0) 207 | with pytest.raises(PhytestAssertion): 208 | tree.assert_root_to_tip(max_root_date=1772.0) 209 | with pytest.raises( 210 | PhytestAssertion, match=r"Inferred root date '1772.\d*' is less than the minimum allowed root date '1773.0'." 211 | ): 212 | tree.assert_root_to_tip(min_root_date=1773.0) 213 | 214 | with pytest.warns( 215 | PhytestWarning, match=r"Inferred root date '1772.\d*' is less than the minimum allowed root date '1773.0'." 216 | ): 217 | tree.warn_root_to_tip(min_root_date=1773.0) 218 | 219 | 220 | def test_assert_root_to_tip_covariation(): 221 | tree = Tree.read("examples/data/ice_viruses.fasta.treefile", tree_format="newick") 222 | tree.assert_root_to_tip(covariation=True, sequence_length=463, valid_confidence=True) 223 | tree.assert_root_to_tip(valid_confidence=False) 224 | with pytest.raises(PhytestAssertion, match=r"The `clock_model.valid_confidence` variable is not False."): 225 | tree.assert_root_to_tip(covariation=True, sequence_length=463, valid_confidence=False) 226 | 227 | with pytest.raises( 228 | PhytestAssertion, 229 | match=r"Cannot perform root-to-tip regression with `covariation` as True if no alignment of sequence length is provided.", 230 | ): 231 | tree.assert_root_to_tip(covariation=True, valid_confidence=True) 232 | 233 | 234 | def test_assert_root_to_tip_root_extra(): 235 | tree = Tree.read("examples/data/ice_viruses.fasta.treefile", tree_format="newick") 236 | 237 | extra = [] 238 | tree.assert_root_to_tip(min_root_date=1772.0, max_root_date=1773.0, extra=extra) 239 | assert extra[0]['format_type'] == 'html' 240 | assert extra[0]['content'].startswith('') 241 | 242 | 243 | def test_assert_root_to_tip_clock_filter(): 244 | tree = Tree.read("examples/data/ice_viruses.fasta.treefile", tree_format="newick") 245 | with pytest.warns(PhytestWarning): 246 | tree.assert_root_to_tip(clock_filter=1.0) 247 | 248 | with warnings.catch_warnings(): 249 | warnings.simplefilter("error") 250 | tree.assert_root_to_tip(clock_filter=3.0) 251 | --------------------------------------------------------------------------------