├── tests
    ├── __init__.py
    ├── scripts
    │   ├── __init__.py
    │   └── test_cli.py
    ├── conftest.py
    ├── testdata
    │   └── example.faa
    └── test_msaviz.py
├── src
    └── pymsaviz
    │   ├── scripts
    │       ├── __init__.py
    │       └── cli.py
    │   ├── config
    │       ├── testdata
    │       │   ├── HIGD2A.fa
    │       │   └── MRGPRG.fa
    │       ├── __init__.py
    │       └── color_schemes.tsv
    │   ├── __init__.py
    │   └── msaviz.py
├── .gitattributes
├── example
    ├── example.zip
    ├── cli_example_run.sh
    ├── HIGD2A.fa
    └── MRGPRG.fa
├── docs
    ├── images
    │   ├── api_example01.png
    │   ├── api_example02.png
    │   ├── api_example03.png
    │   ├── cli_example01.png
    │   ├── cli_example02.png
    │   └── cli_example03.png
    ├── api-docs
    │   └── msaviz.md
    ├── index.md
    └── cli-docs
    │   └── pymsaviz.md
├── CITATION.cff
├── .pre-commit-config.yaml
├── .github
    └── workflows
    │   ├── publish_mkdocs.yml
    │   ├── publish_to_pypi.yml
    │   └── ci.yml
├── requirements.lock
├── LICENSE
├── mkdocs.yml
├── pyproject.toml
├── .gitignore
├── README.md
└── requirements-dev.lock


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/pymsaviz/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.ipynb linguist-documentation
2 | 


--------------------------------------------------------------------------------
/example/example.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moshi4/pyMSAviz/HEAD/example/example.zip


--------------------------------------------------------------------------------
/docs/images/api_example01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moshi4/pyMSAviz/HEAD/docs/images/api_example01.png


--------------------------------------------------------------------------------
/docs/images/api_example02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moshi4/pyMSAviz/HEAD/docs/images/api_example02.png


--------------------------------------------------------------------------------
/docs/images/api_example03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moshi4/pyMSAviz/HEAD/docs/images/api_example03.png


--------------------------------------------------------------------------------
/docs/images/cli_example01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moshi4/pyMSAviz/HEAD/docs/images/cli_example01.png


--------------------------------------------------------------------------------
/docs/images/cli_example02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moshi4/pyMSAviz/HEAD/docs/images/cli_example02.png


--------------------------------------------------------------------------------
/docs/images/cli_example03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moshi4/pyMSAviz/HEAD/docs/images/cli_example03.png


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
1 | cff-version: 1.2.0
2 | message: If you use this software, please cite it as below.
3 | authors:
4 |   - family-names: Shimoyama
5 |     given-names: Yuki
6 | title: "pyMSAviz: MSA visualization python package for sequence analysis"
7 | date-released: 2022-11-13
8 | url: https://github.com/moshi4/pyMSAviz
9 | 


--------------------------------------------------------------------------------
/docs/api-docs/msaviz.md:
--------------------------------------------------------------------------------
 1 | # MsaViz Class
 2 | 
 3 | ::: pymsaviz.msaviz.MsaViz
 4 |     options:
 5 |       members:
 6 |         - available_color_schemes
 7 |         - set_plot_params
 8 |         - set_custom_color_scheme
 9 |         - set_custom_color_func
10 |         - set_highlight_pos
11 |         - set_highlight_pos_by_ident_thr
12 |         - add_markers
13 |         - add_text_annotation
14 |         - plotfig
15 |         - savefig
16 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # See https://pre-commit.com for more information
 2 | # See https://pre-commit.com/hooks.html for more hooks
 3 | repos:
 4 |   - repo: https://github.com/astral-sh/ruff-pre-commit
 5 |     rev: v0.8.1
 6 |     hooks:
 7 |       - id: ruff
 8 |         name: ruff lint check
 9 |         types_or: [python, pyi]
10 |         args: [--fix]
11 |       - id: ruff-format
12 |         name: ruff format check
13 |         types_or: [python, pyi]
14 | 


--------------------------------------------------------------------------------
/.github/workflows/publish_mkdocs.yml:
--------------------------------------------------------------------------------
 1 | name: Publish MkDocs
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [released]
 6 |   workflow_dispatch:
 7 | 
 8 | jobs:
 9 |   publish_mkdocs:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - name: Checkout
13 |         uses: actions/checkout@v4
14 | 
15 |       - name: Install Rye
16 |         run: |
17 |           curl -sSf https://rye.astral.sh/get | RYE_INSTALL_OPTION="--yes" bash
18 |           echo "$HOME/.rye/shims" >> $GITHUB_PATH
19 | 
20 |       - name: Install Python & MkDocs & Plugins
21 |         run: rye sync
22 | 
23 |       - name: Publish document
24 |         run: rye run mkdocs gh-deploy --force
25 | 


--------------------------------------------------------------------------------
/example/cli_example_run.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/bash
 2 | 
 3 | # Example 01
 4 | echo "Run pyMSAviz CLI example 01..."
 5 | pymsaviz -i HIGD2A.fa -o cli_example01.png --color_scheme Identity --dpi 100
 6 | 
 7 | # Example 02
 8 | echo "Run pyMSAviz CLI example 02..."
 9 | pymsaviz -i MRGPRG.fa -o cli_example02.png --wrap_length 80 --dpi 100 \
10 |          --color_scheme Taylor --show_consensus --show_count
11 | 
12 | # Example 03
13 | echo "Run pyMSAviz CLI example 03..."
14 | pymsaviz -i MRGPRG.fa -o cli_example03.png --start 100 --end 160 --dpi 100 \
15 |          --color_scheme Flower --show_grid --show_consensus --consensus_color tomato
16 | 
17 | echo -e "\nFinished all example CLI run."
18 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pytest
 4 | from Bio.Align import MultipleSeqAlignment
 5 | from Bio.Seq import Seq
 6 | from Bio.SeqRecord import SeqRecord
 7 | 
 8 | 
 9 | @pytest.fixture
10 | def testdata_dir() -> Path:
11 |     """Test data directory"""
12 |     return Path(__file__).parent / "testdata"
13 | 
14 | 
15 | @pytest.fixture
16 | def msa_fasta_file(testdata_dir: Path) -> Path:
17 |     """MSA fasta file"""
18 |     return testdata_dir / "example.faa"
19 | 
20 | 
21 | @pytest.fixture
22 | def dummy_msa() -> MultipleSeqAlignment:
23 |     """Dummy MSA object"""
24 |     return MultipleSeqAlignment([SeqRecord(Seq("ATGC")), SeqRecord(Seq("ATGC"))])
25 | 


--------------------------------------------------------------------------------
/.github/workflows/publish_to_pypi.yml:
--------------------------------------------------------------------------------
 1 | name: Publish to PyPI
 2 | on:
 3 |   release:
 4 |     types: [released]
 5 |   workflow_dispatch:
 6 | 
 7 | jobs:
 8 |   publish_to_pypi:
 9 |     name: Publish to PyPI
10 |     runs-on: ubuntu-latest
11 |     env:
12 |       PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }}
13 |       PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
14 |     steps:
15 |       - name: Checkout
16 |         uses: actions/checkout@v4
17 | 
18 |       - name: Install Rye
19 |         run: |
20 |           curl -sSf https://rye.astral.sh/get | RYE_INSTALL_OPTION="--yes" bash
21 |           echo "$HOME/.rye/shims" >> $GITHUB_PATH
22 | 
23 |       - name: Build
24 |         run: rye build
25 | 
26 |       - name: Publish
27 |         run: rye publish -u $PYPI_USERNAME --token $PYPI_PASSWORD -y
28 | 


--------------------------------------------------------------------------------
/example/HIGD2A.fa:
--------------------------------------------------------------------------------
 1 | >GorillaGorilla
 2 | MATPGPVIPEVPFEPSKPPVIEGLSPTVYRNPESFKEKFLRKTRENPVVPIGCLATAAAL
 3 | TYGLYSFHRGNSQRSQLMMRTRIAAQGFTVAAILLGLAVTAMKSRP------------
 4 | >HomoSapiens
 5 | MATPGPVIPEVPFEPSKPPVIEGLSPTVYRNPESFKEKFVRKTRENPVVPIGCLATAAAL
 6 | TYGLYSFHRGNSQRSQLMMRTRIAAQGFTVAAILLGLAVTAMKSRP------------
 7 | >NomascusLeucogenys
 8 | MATPGPVIPEVPFEPSKPPVIEGFSPTVYRNPESFKGKFLRKTRENPVVPIGCLATAAAL
 9 | TYGLYSFHRGDSQRSQLMMRTRIAAQGFTVAAILLGLAVTAMKSRPSAQGLASKAPQK
10 | >PanPaniscus
11 | MATPGPVIPEVPFEPSKPPVIEGLSPTVYRNPESFKEKFVRKTRENPVVPIGCLATAAAL
12 | TYGLYSFHRGNSQRSQLMMRTRIAAQGFTVAAILLGLAVTAMKSRP------------
13 | >PanTroglodytes
14 | MATPGPVIQEVPFEPSKPPVIEGLSPTVYRNPESFKEKFVRKTRENPVVPIGCLATAAAL
15 | TYGLYSFHRGNSQRSQLMMRTRIAAQGFTVAAILLGLAVTAMKSRP------------
16 | >PongoAbelii
17 | MATPGPVIPKVPFEPSKPPVIEGLSPTVYRNPESFKEKFLRKTRENPVVPIGCLATATAL
18 | SYGLYSFHRGNSQRSQLMMRTRIAAQGFTVAAILLGLAVTAMKSRP------------
19 | 


--------------------------------------------------------------------------------
/src/pymsaviz/config/testdata/HIGD2A.fa:
--------------------------------------------------------------------------------
 1 | >GorillaGorilla
 2 | MATPGPVIPEVPFEPSKPPVIEGLSPTVYRNPESFKEKFLRKTRENPVVPIGCLATAAAL
 3 | TYGLYSFHRGNSQRSQLMMRTRIAAQGFTVAAILLGLAVTAMKSRP------------
 4 | >HomoSapiens
 5 | MATPGPVIPEVPFEPSKPPVIEGLSPTVYRNPESFKEKFVRKTRENPVVPIGCLATAAAL
 6 | TYGLYSFHRGNSQRSQLMMRTRIAAQGFTVAAILLGLAVTAMKSRP------------
 7 | >NomascusLeucogenys
 8 | MATPGPVIPEVPFEPSKPPVIEGFSPTVYRNPESFKGKFLRKTRENPVVPIGCLATAAAL
 9 | TYGLYSFHRGDSQRSQLMMRTRIAAQGFTVAAILLGLAVTAMKSRPSAQGLASKAPQK
10 | >PanPaniscus
11 | MATPGPVIPEVPFEPSKPPVIEGLSPTVYRNPESFKEKFVRKTRENPVVPIGCLATAAAL
12 | TYGLYSFHRGNSQRSQLMMRTRIAAQGFTVAAILLGLAVTAMKSRP------------
13 | >PanTroglodytes
14 | MATPGPVIQEVPFEPSKPPVIEGLSPTVYRNPESFKEKFVRKTRENPVVPIGCLATAAAL
15 | TYGLYSFHRGNSQRSQLMMRTRIAAQGFTVAAILLGLAVTAMKSRP------------
16 | >PongoAbelii
17 | MATPGPVIPKVPFEPSKPPVIEGLSPTVYRNPESFKEKFLRKTRENPVVPIGCLATATAL
18 | SYGLYSFHRGNSQRSQLMMRTRIAAQGFTVAAILLGLAVTAMKSRP------------
19 | 


--------------------------------------------------------------------------------
/src/pymsaviz/__init__.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | import matplotlib as mpl
 4 | 
 5 | from pymsaviz.config import get_msa_testdata
 6 | from pymsaviz.msaviz import MsaViz
 7 | 
 8 | warnings.filterwarnings("ignore")
 9 | 
10 | __all__ = [
11 |     "MsaViz",
12 |     "get_msa_testdata",
13 | ]
14 | 
15 | __version__ = "0.5.0"
16 | 
17 | # Setting matplotlib rc(runtime configuration) parameters
18 | # https://matplotlib.org/stable/tutorials/introductory/customizing.html
19 | mpl_rc_params = {
20 |     # Legend
21 |     "legend.loc": "upper left",  # Default: best
22 |     "legend.frameon": False,  # Default: True
23 |     "legend.handlelength": 1,  # Default: 2.0
24 |     "legend.handleheight": 1,  # Default: 0.7
25 |     # Savefig
26 |     "savefig.bbox": "tight",  # Default: None
27 |     "savefig.pad_inches": 0.5,  # Default: 0.1
28 |     # SVG
29 |     "svg.fonttype": "none",
30 | }
31 | mpl.rcParams.update(mpl_rc_params)
32 | 


--------------------------------------------------------------------------------
/tests/scripts/test_cli.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | from pathlib import Path
 3 | 
 4 | 
 5 | def test_cli_default_run(msa_fasta_file: Path, tmp_path: Path):
 6 |     """Test CLI with default option"""
 7 |     fig_outfile = tmp_path / "test.png"
 8 | 
 9 |     cmd = f"pymsaviz -i {msa_fasta_file} -o {fig_outfile}"
10 |     subprocess.run(cmd, shell=True)
11 | 
12 |     assert fig_outfile.exists()
13 | 
14 | 
15 | def test_cli_full_option_run(msa_fasta_file: Path, tmp_path: Path):
16 |     """Test CLI with full option"""
17 |     fig_outfile = tmp_path / "test.png"
18 | 
19 |     cmd = f"pymsaviz -i {msa_fasta_file} -o {fig_outfile} --format fasta "
20 |     cmd += "--color_scheme Taylor --start 50 --end 250 --wrap_length 100 "
21 |     cmd += "--wrap_space_size 3.0 --show_grid --show_count --show_consensus "
22 |     cmd += "--consensus_color green --consensus_size 2.0 --sort --dpi 100"
23 |     subprocess.run(cmd, shell=True)
24 | 
25 |     assert fig_outfile.exists()
26 | 


--------------------------------------------------------------------------------
/requirements.lock:
--------------------------------------------------------------------------------
 1 | # generated by rye
 2 | # use `rye lock` or `rye sync` to update this lockfile
 3 | #
 4 | # last locked with the following flags:
 5 | #   pre: false
 6 | #   features: []
 7 | #   all-features: true
 8 | #   with-sources: false
 9 | #   generate-hashes: false
10 | #   universal: false
11 | 
12 | -e file:.
13 | biopython==1.84
14 |     # via pymsaviz
15 | contourpy==1.3.0
16 |     # via matplotlib
17 | cycler==0.12.1
18 |     # via matplotlib
19 | fonttools==4.55.0
20 |     # via matplotlib
21 | importlib-resources==6.4.5
22 |     # via matplotlib
23 | kiwisolver==1.4.7
24 |     # via matplotlib
25 | matplotlib==3.9.3
26 |     # via pymsaviz
27 | numpy==2.0.2
28 |     # via biopython
29 |     # via contourpy
30 |     # via matplotlib
31 | packaging==24.2
32 |     # via matplotlib
33 | pillow==11.0.0
34 |     # via matplotlib
35 | pyparsing==3.2.0
36 |     # via matplotlib
37 | python-dateutil==2.9.0.post0
38 |     # via matplotlib
39 | six==1.16.0
40 |     # via python-dateutil
41 | zipp==3.21.0
42 |     # via importlib-resources
43 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 moshi
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | on:
 3 |   push:
 4 |     branches: [main, develop]
 5 |     paths: ["src/**", "tests/**", ".github/workflows/ci.yml"]
 6 |   pull_request:
 7 |     branches: [main, develop]
 8 |     paths: ["src/**", "tests/**", ".github/workflows/ci.yml"]
 9 |   workflow_dispatch:
10 | 
11 | jobs:
12 |   CI:
13 |     runs-on: ${{ matrix.os }}
14 |     strategy:
15 |       matrix:
16 |         os: [ubuntu-latest, macos-latest]
17 |         python-version: ["3.9", "3.10", "3.11", "3.12"]
18 |     steps:
19 |       - name: Checkout
20 |         uses: actions/checkout@v4
21 | 
22 |       - name: Install Rye
23 |         run: |
24 |           curl -sSf https://rye.astral.sh/get | RYE_INSTALL_OPTION="--yes" bash
25 |           echo "$HOME/.rye/shims" >> $GITHUB_PATH
26 | 
27 |       - name: Setup Python ${{matrix.python-version}} & Dependencies
28 |         run: |
29 |           rye pin ${{ matrix.python-version }}
30 |           rye sync --update-all --all-features
31 | 
32 |       - name: Run ruff lint check
33 |         run: rye run ruff check --diff
34 | 
35 |       - name: Run ruff format check
36 |         run: rye run ruff format --check --diff
37 | 
38 |       - name: Run pytest
39 |         run: rye run pytest
40 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | # pyMSAviz
 2 | 
 3 | ![Python3](https://img.shields.io/badge/Language-Python3-steelblue)
 4 | ![OS](https://img.shields.io/badge/OS-_Windows_|_Mac_|_Linux-steelblue)
 5 | ![License](https://img.shields.io/badge/License-MIT-steelblue)
 6 | [![Latest PyPI version](https://img.shields.io/pypi/v/pymsaviz.svg)](https://pypi.python.org/pypi/pymsaviz)
 7 | [![Bioconda](https://img.shields.io/conda/vn/bioconda/pymsaviz.svg?color=green)](https://anaconda.org/bioconda/pymsaviz)  
 8 | 
 9 | ## Overview
10 | 
11 | pyMSAviz is a MSA(Multiple Sequence Alignment) visualization python package for sequence analysis implemented based on matplotlib.
12 | This package is developed for the purpose of easily and beautifully plotting MSA in Python.
13 | It also implements the functionality to add markers, text annotations, highlights to specific positions and ranges in MSA.
14 | pyMSAviz was developed inspired by [Jalview](https://www.jalview.org/) and [ggmsa](https://github.com/YuLab-SMU/ggmsa).
15 | 
16 | <figure markdown>
17 |   ![example.png](./images/api_example01.png)
18 |   <figcaption>Fig.1 Simple visualization result</figcaption>
19 | </figure>
20 | 
21 | <figure markdown>
22 |   ![example.png](./images/api_example03.png)
23 |   <figcaption>Fig.2 Customized visualization result</figcaption>
24 | </figure>
25 | 
26 | ## Installation
27 | 
28 | `Python 3.9 or later` is required for installation.
29 | 
30 | **Install PyPI package:**
31 | 
32 |     pip install pymsaviz
33 | 
34 | **Install bioconda package:**
35 | 
36 |     conda install -c conda-forge -c bioconda pymsaviz
37 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: pyMSAviz
 2 | site_description: MSA visualization python package for sequence analysis
 3 | site_author: moshi4
 4 | repo_name: moshi4/pyMSAviz
 5 | repo_url: https://github.com/moshi4/pyMSAviz
 6 | edit_uri: ""
 7 | use_directory_urls: true
 8 | 
 9 | nav:
10 |   - Home: index.md
11 |   - Getting Started: getting_started.ipynb
12 |   - Color Schemes: color_schemes.ipynb
13 |   - API Docs: api-docs/msaviz.md
14 |   - CLI Docs: cli-docs/pymsaviz.md
15 | 
16 | theme:
17 |   name: material # material, readthedocs, mkdocs
18 |   features:
19 |     - navigation.top
20 |     - navigation.expand
21 |     # - navigation.tabs
22 |     - navigation.tabs.sticky
23 |     - navigation.sections
24 | 
25 | markdown_extensions:
26 |   - pymdownx.highlight:
27 |       anchor_linenums: true
28 |   - pymdownx.inlinehilite
29 |   - pymdownx.snippets
30 |   - pymdownx.superfences
31 |   - pymdownx.details
32 |   - admonition
33 |   - attr_list
34 |   - md_in_html
35 | 
36 | plugins:
37 |   - search
38 |   - mkdocs-jupyter:
39 |       execute: False
40 |   - mkdocstrings:
41 |       handlers:
42 |         python:
43 |           # Reference: https://mkdocstrings.github.io/python/usage/
44 |           options:
45 |             # Heading options
46 |             heading_level: 2
47 |             show_root_full_path: False
48 |             show_root_heading: True
49 |             # Member options
50 |             members_order: source # alphabetical, source
51 |             # Docstrings options
52 |             docstring_style: numpy
53 |             docstring_section_style: spacy # table, list, spacy
54 |             line_length: 89
55 |             merge_init_into_class: True
56 |             # Signatures/annotations options
57 |             show_signature_annotations: True
58 |             separate_signature: True
59 |             # Additional options
60 |             show_source: False
61 | 


--------------------------------------------------------------------------------
/example/MRGPRG.fa:
--------------------------------------------------------------------------------
 1 | >GorillaGorilla
 2 | MFGLFGLWRTFHSVVFYLTLIVGLGGPVGNGLVLWNLSFHVKKGPFSINLLHLAAADFLF
 3 | LSCRVGFSVAQAALG---------------------------RCLSDLFPA---------
 4 | ---VLCTLVWAPTLPAVLLPANACGLLCISARPLVCLRYHVASVTWFLVLARVAWTAGVV
 5 | LFVWVTCCSTRLQPRLYGIVLGALLLLFLCGLPLVFYWSLQPLLNFLLPMFSPLATLLAC
 6 | VNSSSKPLIYSGLGRQPGKRESLRSVLRRALGEGAKLGARGQSLPMGLL
 7 | >HomoSapiens
 8 | MFGLFGLWRTFDSVVFYLTLIVGLGGPVGNGLVLWNLGFRIKKGPFSIYLLHLAAADFLF
 9 | LSCRVGFSVAQAALGAQDTLYFVLTFLWFAVGLWLLAAFSVERCLSDLFPACYQGCRPRH
10 | ASAVLCALVWTPTLPAVPLPANACGLLRNSACPLVCPRYHVASVTWFLVLARVAWTAGVV
11 | LFVWVTCCSTRPRPRLYGIVLGALLLLFFCGLPSVFYWSLQPLLNFLLPVFSPLATLLAC
12 | VNSSSKPLIYSGLGRQPGKREPLRSVLRRALGEGAELGARGQSLPMGLL
13 | >NomascusLeucogenys
14 | MFGLFGLWRTFDSVVFYLTLIVGLGGLVGNGLVLWNLGFHIKKGPFSVYLLHLAAADFLF
15 | LSCHVGFSVAQAALGAQDTLYFVLTFLWFAVGLWLVVAFSVERCLSDLFPACYQGCRPRH
16 | TSVILCALVWALTLPAVLLPANACGLLHNSARPLVCLRYHVASVTWFLVLACVAWTAGVV
17 | LFVWVTCCSTRPRPRLYGIVLGALLLLFFCGLPLVLYWSLQPLLNFLLPMFSPLATLLAC
18 | VNSSSKPLIYXXXXRQPGKREPLRVVLWRALGEGAELSARGQSLPMGLL
19 | >PanPaniscus
20 | MFGLFGLWRTFDSVVFYLTLIVGLGGPVGNGLVLWNLGFHIKKGTFSIYLLHLAAADFLF
21 | LSCPVGFSVAQAALGAQDTLYFVLTFLWFAVGLWLLAAFSVERSLSDLFPACYQGCRPRH
22 | ASAVLCALVWTPTLPALPLPANACGLLRNSACPLVCLRYHVASVTWFLVLARVAWTAGVV
23 | LFVWVTCCSTRPRPRLYGIVLGALLLLFFCGLPSVFYWRLQPLLNFLLPMFSPLATLLAC
24 | VNSSSKPLIYSGLGRQPGKREPLRSVLRRALGEGAELGARGQSLPMGLL
25 | >PanTroglodytes
26 | MFGLFGLWRTFDSVVFYLTPIVGLGGPVGNGLVLWNLGFHIKKGPFSIYLLHLAAADFLF
27 | LSCPVGFSVAQAALGAQDTLYFVLTFLWFAVGLWLLAAFSVERCLSDLFPACYQGCRPRH
28 | ASAVLCALVWTPTLPALPLPANACGLLRNSACPLVCLRYHVASVTWFLVLARVAWTAGVV
29 | LFVWVTCCSTRPRPRLYGIVLGALLLLFFCGLPSVFYWSLQPLLNFLLPMFSPLATLLAC
30 | VNSSSKPLIYSGLGRQPGKREPLRSVLRRALGEGAELGARGQSLPMGLL
31 | >PongoAbelii
32 | MFGLFGLWRTFDSVVFYLTLIVGLGGLVGNGLVLWNLGFHIKKGPFSVYLLHLAAADFLF
33 | LSCHVGFSVAQAALGAQDTLYFVLTFLWFAVGLWLLAAFSVERCLSDLFPACYHGCRPRH
34 | ASAVLCALVWALTLPAVLLPANACGLLRNSARPLVCLRYHVPASPGSCAGLR-AWTAGVV
35 | LFVLVTCCSMRARPR-YGIVLGALLLLF-CGLPLVFYWSLQPLLNFLLPMFSPLAMLLAC
36 | VNSSSKPLIYLGLGRQPGKREPLRVVLQRALGEGAELGARGQSLPMGLL
37 | 


--------------------------------------------------------------------------------
/src/pymsaviz/config/testdata/MRGPRG.fa:
--------------------------------------------------------------------------------
 1 | >GorillaGorilla
 2 | MFGLFGLWRTFHSVVFYLTLIVGLGGPVGNGLVLWNLSFHVKKGPFSINLLHLAAADFLF
 3 | LSCRVGFSVAQAALG---------------------------RCLSDLFPA---------
 4 | ---VLCTLVWAPTLPAVLLPANACGLLCISARPLVCLRYHVASVTWFLVLARVAWTAGVV
 5 | LFVWVTCCSTRLQPRLYGIVLGALLLLFLCGLPLVFYWSLQPLLNFLLPMFSPLATLLAC
 6 | VNSSSKPLIYSGLGRQPGKRESLRSVLRRALGEGAKLGARGQSLPMGLL
 7 | >HomoSapiens
 8 | MFGLFGLWRTFDSVVFYLTLIVGLGGPVGNGLVLWNLGFRIKKGPFSIYLLHLAAADFLF
 9 | LSCRVGFSVAQAALGAQDTLYFVLTFLWFAVGLWLLAAFSVERCLSDLFPACYQGCRPRH
10 | ASAVLCALVWTPTLPAVPLPANACGLLRNSACPLVCPRYHVASVTWFLVLARVAWTAGVV
11 | LFVWVTCCSTRPRPRLYGIVLGALLLLFFCGLPSVFYWSLQPLLNFLLPVFSPLATLLAC
12 | VNSSSKPLIYSGLGRQPGKREPLRSVLRRALGEGAELGARGQSLPMGLL
13 | >NomascusLeucogenys
14 | MFGLFGLWRTFDSVVFYLTLIVGLGGLVGNGLVLWNLGFHIKKGPFSVYLLHLAAADFLF
15 | LSCHVGFSVAQAALGAQDTLYFVLTFLWFAVGLWLVVAFSVERCLSDLFPACYQGCRPRH
16 | TSVILCALVWALTLPAVLLPANACGLLHNSARPLVCLRYHVASVTWFLVLACVAWTAGVV
17 | LFVWVTCCSTRPRPRLYGIVLGALLLLFFCGLPLVLYWSLQPLLNFLLPMFSPLATLLAC
18 | VNSSSKPLIYXXXXRQPGKREPLRVVLWRALGEGAELSARGQSLPMGLL
19 | >PanPaniscus
20 | MFGLFGLWRTFDSVVFYLTLIVGLGGPVGNGLVLWNLGFHIKKGTFSIYLLHLAAADFLF
21 | LSCPVGFSVAQAALGAQDTLYFVLTFLWFAVGLWLLAAFSVERSLSDLFPACYQGCRPRH
22 | ASAVLCALVWTPTLPALPLPANACGLLRNSACPLVCLRYHVASVTWFLVLARVAWTAGVV
23 | LFVWVTCCSTRPRPRLYGIVLGALLLLFFCGLPSVFYWRLQPLLNFLLPMFSPLATLLAC
24 | VNSSSKPLIYSGLGRQPGKREPLRSVLRRALGEGAELGARGQSLPMGLL
25 | >PanTroglodytes
26 | MFGLFGLWRTFDSVVFYLTPIVGLGGPVGNGLVLWNLGFHIKKGPFSIYLLHLAAADFLF
27 | LSCPVGFSVAQAALGAQDTLYFVLTFLWFAVGLWLLAAFSVERCLSDLFPACYQGCRPRH
28 | ASAVLCALVWTPTLPALPLPANACGLLRNSACPLVCLRYHVASVTWFLVLARVAWTAGVV
29 | LFVWVTCCSTRPRPRLYGIVLGALLLLFFCGLPSVFYWSLQPLLNFLLPMFSPLATLLAC
30 | VNSSSKPLIYSGLGRQPGKREPLRSVLRRALGEGAELGARGQSLPMGLL
31 | >PongoAbelii
32 | MFGLFGLWRTFDSVVFYLTLIVGLGGLVGNGLVLWNLGFHIKKGPFSVYLLHLAAADFLF
33 | LSCHVGFSVAQAALGAQDTLYFVLTFLWFAVGLWLLAAFSVERCLSDLFPACYHGCRPRH
34 | ASAVLCALVWALTLPAVLLPANACGLLRNSARPLVCLRYHVPASPGSCAGLR-AWTAGVV
35 | LFVLVTCCSMRARPR-YGIVLGALLLLF-CGLPLVFYWSLQPLLNFLLPMFSPLAMLLAC
36 | VNSSSKPLIYLGLGRQPGKREPLRVVLQRALGEGAELGARGQSLPMGLL
37 | 


--------------------------------------------------------------------------------
/tests/testdata/example.faa:
--------------------------------------------------------------------------------
 1 | >GorillaGorilla_ENSGGOP00000051206.1
 2 | MFGLFGLWRTFHSVVFYLTLIVGLGGPVGNGLVLWNLSFHVKKGPFSINLLHLAAADFLF
 3 | LSCRVGFSVAQAALG---------------------------RCLSDLFPA---------
 4 | ---VLCTLVWAPTLPAVLLPANACGLLCISARPLVCLRYHVASVTWFLVLARVAWTAGVV
 5 | LFVWVTCCSTRLQPRLYGIVLGALLLLFLCGLPLVFYWSLQPLLNFLLPMFSPLATLLAC
 6 | VNSSSKPLIYSGLGRQPGKRESLRSVLRRALGEGAKLGARGQSLPMGLL
 7 | >HomoSapiens_ENSP00000330612.3
 8 | MFGLFGLWRTFDSVVFYLTLIVGLGGPVGNGLVLWNLGFRIKKGPFSIYLLHLAAADFLF
 9 | LSCRVGFSVAQAALGAQDTLYFVLTFLWFAVGLWLLAAFSVERCLSDLFPACYQGCRPRH
10 | ASAVLCALVWTPTLPAVPLPANACGLLRNSACPLVCPRYHVASVTWFLVLARVAWTAGVV
11 | LFVWVTCCSTRPRPRLYGIVLGALLLLFFCGLPSVFYWSLQPLLNFLLPVFSPLATLLAC
12 | VNSSSKPLIYSGLGRQPGKREPLRSVLRRALGEGAELGARGQSLPMGLL
13 | >NomascusLeucogenys_ENSNLEP00000013130.1
14 | MFGLFGLWRTFDSVVFYLTLIVGLGGLVGNGLVLWNLGFHIKKGPFSVYLLHLAAADFLF
15 | LSCHVGFSVAQAALGAQDTLYFVLTFLWFAVGLWLVVAFSVERCLSDLFPACYQGCRPRH
16 | TSVILCALVWALTLPAVLLPANACGLLHNSARPLVCLRYHVASVTWFLVLACVAWTAGVV
17 | LFVWVTCCSTRPRPRLYGIVLGALLLLFFCGLPLVLYWSLQPLLNFLLPMFSPLATLLAC
18 | VNSSSKPLIYXXXXRQPGKREPLRVVLWRALGEGAELSARGQSLPMGLL
19 | >PanPaniscus_ENSPPAP00000002623.1
20 | MFGLFGLWRTFDSVVFYLTLIVGLGGPVGNGLVLWNLGFHIKKGTFSIYLLHLAAADFLF
21 | LSCPVGFSVAQAALGAQDTLYFVLTFLWFAVGLWLLAAFSVERSLSDLFPACYQGCRPRH
22 | ASAVLCALVWTPTLPALPLPANACGLLRNSACPLVCLRYHVASVTWFLVLARVAWTAGVV
23 | LFVWVTCCSTRPRPRLYGIVLGALLLLFFCGLPSVFYWRLQPLLNFLLPMFSPLATLLAC
24 | VNSSSKPLIYSGLGRQPGKREPLRSVLRRALGEGAELGARGQSLPMGLL
25 | >PanTroglodytes_ENSPTRP00000048296.3
26 | MFGLFGLWRTFDSVVFYLTPIVGLGGPVGNGLVLWNLGFHIKKGPFSIYLLHLAAADFLF
27 | LSCPVGFSVAQAALGAQDTLYFVLTFLWFAVGLWLLAAFSVERCLSDLFPACYQGCRPRH
28 | ASAVLCALVWTPTLPALPLPANACGLLRNSACPLVCLRYHVASVTWFLVLARVAWTAGVV
29 | LFVWVTCCSTRPRPRLYGIVLGALLLLFFCGLPSVFYWSLQPLLNFLLPMFSPLATLLAC
30 | VNSSSKPLIYSGLGRQPGKREPLRSVLRRALGEGAELGARGQSLPMGLL
31 | >PongoAbelii_ENSPPYP00000003417.1
32 | MFGLFGLWRTFDSVVFYLTLIVGLGGLVGNGLVLWNLGFHIKKGPFSVYLLHLAAADFLF
33 | LSCHVGFSVAQAALGAQDTLYFVLTFLWFAVGLWLLAAFSVERCLSDLFPACYHGCRPRH
34 | ASAVLCALVWALTLPAVLLPANACGLLRNSARPLVCLRYHVPASPGSCAGLR-AWTAGVV
35 | LFVLVTCCSMRARPR-YGIVLGALLLLF-CGLPLVFYWSLQPLLNFLLPMFSPLAMLLAC
36 | VNSSSKPLIYLGLGRQPGKREPLRVVLQRALGEGAELGARGQSLPMGLL
37 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "pyMSAviz"
 3 | dynamic = ["version"]
 4 | description = "MSA visualization python package for sequence analysis"
 5 | authors = [{ name = "moshi4", email = "" }]
 6 | license = "MIT"
 7 | readme = "README.md"
 8 | keywords = [
 9 |     "bioinformatics",
10 |     "matplotlib",
11 |     "visualization",
12 |     "sequence-alignment",
13 |     "sequence-analysis",
14 | ]
15 | classifiers = [
16 |     "Intended Audience :: Science/Research",
17 |     "Topic :: Scientific/Engineering :: Bio-Informatics",
18 |     "Framework :: Matplotlib",
19 | ]
20 | requires-python = ">=3.9"
21 | dependencies = ["matplotlib>=3.5.2", "biopython>=1.79"]
22 | 
23 | [project.urls]
24 | homepage = "https://moshi4.github.io/pyMSAviz/"
25 | repository = "https://github.com/moshi4/pyMSAviz/"
26 | 
27 | [project.scripts]
28 | pymsaviz = "pymsaviz.scripts.cli:main"
29 | 
30 | [tool.hatch.version]
31 | path = "src/pymsaviz/__init__.py"
32 | 
33 | [tool.rye]
34 | managed = true
35 | dev-dependencies = [
36 |     "ruff>=0.4.0",
37 |     "pre-commit>=3.5.0",
38 |     "pytest>=8.0.0",
39 |     "pytest-cov>=4.0.0",
40 |     "ipykernel>=6.13.0",
41 |     # docs
42 |     "mkdocs>=1.2",
43 |     "mkdocstrings[python]>=0.19.0",
44 |     "mkdocs-jupyter>=0.21.0",
45 |     "mkdocs-material>=8.2",
46 |     "black>=22.3.0",
47 | ]
48 | 
49 | [tool.pytest.ini_options]
50 | minversion = "6.0"
51 | addopts = "--cov=src --tb=line --cov-report=xml --cov-report=term"
52 | testpaths = ["tests"]
53 | 
54 | [tool.ruff]
55 | include = ["src/**.py", "tests/**.py"]
56 | line-length = 88
57 | 
58 | # Lint Rules: https://docs.astral.sh/ruff/rules/
59 | [tool.ruff.lint]
60 | select = [
61 |     "F", # pyflakes
62 |     "E", # pycodestyle (Error)
63 |     "W", # pycodestyle (Warning)
64 |     "I", # isort
65 |     "D", # pydocstyle
66 | ]
67 | ignore = [
68 |     "D100", # Missing docstring in public module
69 |     "D101", # Missing docstring in public class
70 |     "D104", # Missing docstring in public package
71 |     "D105", # Missing docstring in magic method
72 |     "D205", # 1 blank line required between summary line and description
73 |     "D400", # First line should end with a period
74 |     "D401", # First line should be in imperative mood
75 |     "D403", # First word of the first line should be properly capitalized
76 |     "D415", # First line should end with a period, question mark, or exclamation point
77 | ]
78 | 
79 | [tool.ruff.lint.pydocstyle]
80 | convention = "numpy"
81 | 
82 | [build-system]
83 | requires = ["hatchling"]
84 | build-backend = "hatchling.build"
85 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | example
  2 | .vscode/
  3 | notebooks/
  4 | 
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | pip-wheel-metadata/
 28 | share/python-wheels/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | *.egg
 32 | MANIFEST
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .nox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | *.py,cover
 55 | .hypothesis/
 56 | .pytest_cache/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 99 | __pypackages__/
100 | 
101 | # Celery stuff
102 | celerybeat-schedule
103 | celerybeat.pid
104 | 
105 | # SageMath parsed files
106 | *.sage.py
107 | 
108 | # Environments
109 | .env
110 | .venv
111 | env/
112 | venv/
113 | ENV/
114 | env.bak/
115 | venv.bak/
116 | 
117 | # Spyder project settings
118 | .spyderproject
119 | .spyproject
120 | 
121 | # Rope project settings
122 | .ropeproject
123 | 
124 | # mkdocs documentation
125 | /site
126 | 
127 | # mypy
128 | .mypy_cache/
129 | .dmypy.json
130 | dmypy.json
131 | 
132 | # Pyre type checker
133 | .pyre/
134 | 


--------------------------------------------------------------------------------
/src/pymsaviz/config/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import csv
 4 | from enum import IntEnum, auto
 5 | from pathlib import Path
 6 | 
 7 | ###########################################################
 8 | # Color Schemes Config
 9 | ###########################################################
10 | 
11 | 
12 | def get_color_schemes() -> dict[str, dict[str, str]]:
13 |     """Get color schemes
14 | 
15 |     Returns
16 |     -------
17 |     name2color_scheme : dict[str, dict[str, str]]
18 |         Color schemes dict
19 |     """
20 |     COLOR_SCHEMES_FILE = Path(__file__).parent / "color_schemes.tsv"
21 |     name2color_scheme = {}
22 |     with open(COLOR_SCHEMES_FILE) as f:
23 |         reader = csv.reader(f, delimiter="\t")
24 |         header = next(reader)
25 |         letters = header[1:]
26 |         for row in reader:
27 |             name, colors = row[0], row[1:]
28 |             color_scheme = {}
29 |             for letter, color in zip(letters, colors):
30 |                 color_scheme[letter] = color
31 |             name2color_scheme[name] = color_scheme
32 |     return name2color_scheme
33 | 
34 | 
35 | COLOR_SCHEMES = get_color_schemes()
36 | 
37 | ###########################################################
38 | # Plot Config
39 | ###########################################################
40 | 
41 | 
42 | class AxesType(IntEnum):
43 |     """Plot axes type enum"""
44 | 
45 |     MSA = auto()
46 |     CONSENSUS = auto()
47 |     SPACE = auto()
48 |     WRAP_SPACE = auto()
49 | 
50 | 
51 | ###########################################################
52 | # Example MSA Dataset
53 | ###########################################################
54 | 
55 | 
56 | def get_msa_testdata(name: str = "MRGPRG.fa") -> Path:
57 |     """Get MSA testdata file
58 | 
59 |     List of MSA testdata filename
60 |     - `HIGD2A.fa` (6 species genes, 118 alignment length)
61 |     - `MRGPRG.fa` (6 species genes, 289 alignment length)
62 | 
63 |     Parameters
64 |     ----------
65 |     name : str, optional
66 |         Testdata name
67 | 
68 |     Returns
69 |     -------
70 |     msa_testdata_file : Path
71 |         MSA testdata file
72 |     """
73 |     testdata_dir = Path(__file__).parent / "testdata"
74 |     dataset_files = testdata_dir.glob("*")
75 |     name2dataset_file = {f.name: f for f in dataset_files}
76 |     if name not in name2dataset_file:
77 |         err_msg = f"Dataset name = '{name}' not found. "
78 |         err_msg += f"Available testdata name = {list(name2dataset_file.keys())}"
79 |         raise ValueError(err_msg)
80 |     return name2dataset_file[name]
81 | 


--------------------------------------------------------------------------------
/docs/cli-docs/pymsaviz.md:
--------------------------------------------------------------------------------
 1 | # pymsaviz CLI Document
 2 | 
 3 | ## Usage
 4 | 
 5 | ### Basic Command
 6 | 
 7 |     pymsaviz -i [MSA file] -o [MSA visualization file]
 8 | 
 9 | ### Options
10 | 
11 |     $ pymsaviz --help
12 |     usage: pymsaviz [options] -i msa.fa -o msa_viz.png
13 | 
14 |     MSA(Multiple Sequence Alignment) visualization CLI tool
15 | 
16 |     optional arguments:
17 |       -i I, --infile I    Input MSA file
18 |       -o O, --outfile O   Output MSA visualization file (*.png|*.jpg|*.svg|*.pdf)
19 |       --format            MSA file format (Default: 'fasta')
20 |       --color_scheme      Color scheme (Default: 'Zappo')
21 |       --start             Start position of MSA visualization (Default: 1)
22 |       --end               End position of MSA visualization (Default: 'MSA Length')
23 |       --wrap_length       Wrap length (Default: None)
24 |       --wrap_space_size   Space size between wrap MSA plot area (Default: 3.0)
25 |       --label_type        Label type ('id'[default]|'description')
26 |       --show_grid         Show grid (Default: OFF)
27 |       --show_count        Show seq char count without gap on right side (Default: OFF)
28 |       --show_consensus    Show consensus sequence (Default: OFF)
29 |       --consensus_color   Consensus identity bar color (Default: '#1f77b4')
30 |       --consensus_size    Consensus identity bar height size (Default: 2.0)
31 |       --sort              Sort MSA order by NJ tree constructed from MSA distance matrix (Default: OFF)
32 |       --dpi               Figure DPI (Default: 300)
33 |       -v, --version       Print version information
34 |       -h, --help          Show this help message and exit
35 | 
36 |     Available Color Schemes:
37 |     ['Clustal', 'Zappo', 'Taylor', 'Flower', 'Blossom', 'Sunset', 'Ocean', 'Hydrophobicity', 'HelixPropensity', 'StrandPropensity', 'TurnPropensity', 'BuriedIndex', 'Nucleotide', 'Purine/Pyrimidine', 'Identity', 'None']
38 | 
39 | ### Example Command
40 | 
41 | Click [here](https://github.com/moshi4/pyMSAviz/raw/main/example/example.zip) to download example MSA files.  
42 | 
43 | #### Example 1
44 | 
45 |     pymsaviz -i ./example/HIGD2A.fa -o cli_example01.png --color_scheme Identity
46 | 
47 | ![example01.png](../images/cli_example01.png)  
48 | 
49 | #### Example 2
50 | 
51 |     pymsaviz -i ./example/MRGPRG.fa -o cli_example02.png --wrap_length 80 \
52 |              --color_scheme Taylor --show_consensus --show_count
53 | 
54 | ![example02.png](../images/cli_example02.png)  
55 | 
56 | #### Example 3
57 | 
58 |     pymsaviz -i ./example/MRGPRG.fa -o cli_example03.png --start 100 --end 160 \
59 |              --color_scheme Flower --show_grid --show_consensus --consensus_color tomato 
60 | 
61 | ![example03.png](../images/cli_example03.png)  
62 | 


--------------------------------------------------------------------------------
/src/pymsaviz/config/color_schemes.tsv:
--------------------------------------------------------------------------------
 1 | ColorScheme	A	R	N	D	C	Q	E	G	H	I	L	K	M	F	P	S	T	W	Y	V	B	X	Z	J	O	U	-
 2 | Clustal	#80A0F0	#F01505	#00FF00	#C048C0	#F08080	#00FF00	#C048C0	#F09048	#15A4A4	#80A0F0	#80A0F0	#F01505	#80A0F0	#80A0F0	#FFFF00	#00FF00	#00FF00	#80A0F0	#15A4A4	#80A0F0	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF
 3 | Zappo	#FFAFAF	#6464FF	#00FF00	#FF0000	#FFFF00	#00FF00	#FF0000	#FF00FF	#6464FF	#FFAFAF	#FFAFAF	#6464FF	#FFAFAF	#FFC800	#FF00FF	#00FF00	#00FF00	#FFC800	#FFC800	#FFAFAF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF
 4 | Taylor	#CCFF00	#0000FF	#CC00FF	#FF0000	#FFFF00	#FF00CC	#FF0066	#FF9900	#0066FF	#66FF00	#33FF00	#6600FF	#00FF00	#00FF66	#FFCC00	#FF3300	#FF6600	#00CCFF	#00FFCC	#99FF00	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF
 5 | Flower	#B18A51	#83BFF1	#38CEC6	#29A578	#F85604	#7295AE	#2DA0A1	#B1C23C	#2E94F9	#F27663	#DF6E75	#7FC3D7	#FA9DB0	#F9559D	#4FA32A	#B4BD9B	#D2B576	#F92CED	#C96ECF	#FA997B	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF
 6 | Blossom	#8BC4B4	#F99504	#B5C207	#5FA504	#2E93FE	#BF8526	#DBB501	#36D382	#F85604	#9ABAF3	#CDA5DC	#FAA527	#F5A1B8	#F74FA8	#35D631	#7E9D59	#2AA39B	#F907FB	#F84E7A	#87C0E4	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF
 7 | Sunset	#FBA0FD	#85746A	#ABC8F4	#2E7BBE	#F90BFE	#8C6E81	#677892	#3099FF	#DBC58E	#F821A1	#E01E82	#DEBECC	#D13E7B	#F8375D	#5766F9	#E7B4FD	#A658B7	#F83704	#CB5339	#F951B8	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF
 8 | Ocean	#C6CA9B	#2BA0A8	#3DDFC3	#4CDFA1	#C68136	#8BD3D1	#60DAC9	#33A551	#3CCFFE	#F2BAAA	#BB8A83	#40A090	#A48B88	#AB88AF	#AFD364	#6D9B74	#8D9566	#758AEE	#BAC3FC	#E9BEA4	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF
 9 | Hydrophobicity	#AD0052	#0000FF	#0C00F3	#0C00F3	#C2003D	#0C00F3	#0C00F3	#6A0095	#1500EA	#FF0000	#EA0015	#0000FF	#B0004F	#CB0034	#4600B9	#5E00A1	#61009E	#5B00A4	#4F00B0	#F60009	#0C00F3	#680097	#0C00F3	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF
10 | HelixPropensity	#E718E7	#6F906F	#39E41A	#778877	#37DC23	#926D92	#F905FF	#41FF00	#758A75	#8A758A	#AE51AE	#A05FA0	#EF10EF	#986798	#41FF00	#36C936	#47B847	#8A758A	#38DE20	#857A85	#49B649	#758A75	#C936C9	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF
11 | StrandPropensity	#5858A7	#6B6B94	#64649C	#2121DE	#9D9D62	#8C8C73	#1D00FF	#4949B6	#60609F	#ECEC14	#B2B24D	#4747B8	#82827D	#C2C23C	#2323DB	#4949B6	#9D9D62	#C0C03E	#D3D32C	#FEFF03	#4343BC	#797986	#4747B8	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF
12 | TurnPropensity	#3AD3D3	#708F8F	#F81502	#E81717	#A85757	#3FC0C0	#778888	#F81502	#708F8F	#48FEFF	#40E3E3	#7E8181	#3FE1E1	#3FE1E1	#F61507	#E11E1E	#738C8C	#738C8C	#9D6262	#46F8F8	#F3140B	#7C8383	#5BA4A4	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF
13 | BuriedIndex	#28A35C	#40FC03	#3BEB13	#3BEB13	#1D00FF	#3DF10D	#3DF10D	#269D62	#35D529	#1954AB	#1F7C84	#41FF00	#259768	#218778	#38E01F	#35D529	#37DB23	#29A857	#3AE619	#1A5FA0	#3BEB13	#2DB649	#3DF10D	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF
14 | Nucleotide	#64F740	#FFFFFF	#FFFFFF	#FFFFFF	#FAB340	#FFFFFF	#FFFFFF	#EB413B	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#3C88EE	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#3C88EE	#FFFFFF
15 | Purine/Pyrimidine	#FA82FA	#FA82FA	#FFFFFF	#FFFFFF	#40E0D0	#FFFFFF	#FFFFFF	#FA82FA	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#40E0D0	#FFFFFF	#40E0D0	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#40E0D0	#FFFFFF
16 | Identity	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF
17 | None	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF	#FFFFFF
18 | 


--------------------------------------------------------------------------------
/tests/test_msaviz.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | 
  3 | import pytest
  4 | from Bio.Align import MultipleSeqAlignment
  5 | from Bio.Seq import Seq
  6 | from Bio.SeqRecord import SeqRecord
  7 | 
  8 | from pymsaviz import MsaViz, get_msa_testdata
  9 | 
 10 | 
 11 | def test_simple_all_run(msa_fasta_file: Path, tmp_path: Path):
 12 |     """Test simple all run (Only check if no error occurs)"""
 13 |     mv = MsaViz(msa_fasta_file)
 14 | 
 15 |     fig_outfile = tmp_path / "test.png"
 16 |     mv.savefig(fig_outfile)
 17 | 
 18 |     assert fig_outfile.exists()
 19 | 
 20 | 
 21 | def test_all_run_with_options(msa_fasta_file: Path, tmp_path: Path):
 22 |     """Test all run with options (Only check if no error occurs)"""
 23 |     mv = MsaViz(
 24 |         msa_fasta_file,
 25 |         color_scheme="Identity",
 26 |         wrap_length=50,
 27 |         show_label=False,
 28 |         show_seq_char=False,
 29 |         sort=True,
 30 |     )
 31 |     mv.set_highlight_pos([1, 5, (10, 13), 18])
 32 |     mv.set_highlight_pos_by_ident_thr(min_thr=80, max_thr=100)
 33 |     mv.add_markers([50, 51, 52, (60, 70), 80], marker="x", color="blue", size=6)
 34 |     mv.add_text_annotation(
 35 |         (100, 120), text="test", text_color="blue", text_size=10, range_color="blue"
 36 |     )
 37 | 
 38 |     fig_outfile = tmp_path / "test.png"
 39 |     mv.savefig(fig_outfile)
 40 | 
 41 |     assert fig_outfile.exists()
 42 | 
 43 | 
 44 | def test_basic_property():
 45 |     """Test basic property"""
 46 |     msa = MultipleSeqAlignment([])
 47 |     id_list = ["first", "second", "third", "fourth"]
 48 |     seq_list = [
 49 |         "CDNIPGFED",
 50 |         "ADNIPGFED",
 51 |         "BDNIPGFED",
 52 |         "DDNIPGFED",
 53 |     ]
 54 |     for id, seq in zip(id_list, seq_list):
 55 |         msa.append(SeqRecord(Seq(seq), id=id))
 56 | 
 57 |     mv = MsaViz(msa)
 58 |     assert mv.msa_count == 4
 59 |     assert mv.alignment_length == 9
 60 |     assert mv.id_list == id_list
 61 |     assert mv.seq_list == seq_list
 62 |     assert mv.wrap_num == 0
 63 |     assert mv.consensus_seq == "XDNIPGFED"
 64 | 
 65 | 
 66 | def test_set_custom_color_scheme(dummy_msa: MultipleSeqAlignment):
 67 |     """Test set_custom_color_scheme"""
 68 |     mv = MsaViz(dummy_msa)
 69 |     # Case1: Set correct custom color scheme
 70 |     custom_color_scheme = {"A": "red", "T": "blue", "G": "green", "C": "orange"}
 71 |     mv.set_custom_color_scheme(custom_color_scheme)
 72 |     assert mv.color_scheme == custom_color_scheme
 73 | 
 74 |     # Case2: Set invalid custom color scheme
 75 |     invalid_color_scheme = {"A": "invalid", "T": "blue", "G": "green", "C": "orange"}
 76 |     with pytest.raises(ValueError):
 77 |         mv.set_custom_color_scheme(invalid_color_scheme)
 78 | 
 79 | 
 80 | def test_set_custom_color_func(msa_fasta_file: Path, tmp_path: Path):
 81 |     """Test set_custom_color_func"""
 82 |     mv = MsaViz(msa_fasta_file)
 83 | 
 84 |     def custom_color_func(
 85 |         row_pos: int, col_pos: int, seq_char: str, msa: MultipleSeqAlignment
 86 |     ) -> str:
 87 |         if col_pos < 60 and seq_char != "-":
 88 |             return "salmon"
 89 |         if col_pos >= 60 and 1 <= row_pos <= 4:
 90 |             return "lime"
 91 |         return "white"
 92 | 
 93 |     mv.set_custom_color_func(custom_color_func)
 94 | 
 95 |     fig_outfile = tmp_path / "test.png"
 96 |     mv.savefig(fig_outfile)
 97 | 
 98 |     assert fig_outfile.exists()
 99 | 
100 | 
101 | def test_consensus_identity():
102 |     """Test consensus identity calculation"""
103 |     msa = MultipleSeqAlignment([])
104 |     # Test MSA summary
105 |     # 1: 'ABCDE'(All different char) => 'X' [20 %]
106 |     # 2: 'GGGGG'(All 'G') => 'G' [100 %]
107 |     # 3: '-----'(All gaps) => 'X' [0 %]
108 |     # 4: '--V--'(one char & gaps) => 'V' [20 %]
109 |     # 5: '-AAAC'('A' is most common) => 'A' [60 %]
110 |     # 6: 'RRTTI'('R' & 'T' is most common) => 'X' [40 %]
111 |     # 7: 'XXAX-'('X' is most common) => 'X' [60 %]
112 |     seq_list = [
113 |         "AG---RX",
114 |         "BG--ARX",
115 |         "CG-VATA",
116 |         "DG--ATX",
117 |         "EG--CI-",
118 |     ]
119 |     for seq in seq_list:
120 |         msa.append(SeqRecord(Seq(seq)))
121 | 
122 |     # Test consensus seq & identity
123 |     mv = MsaViz(msa)
124 |     assert mv.consensus_seq == "XGXVAXX"
125 |     consensus_ident_list = mv._get_consensus_identity_list()
126 |     assert consensus_ident_list == [20, 100, 0, 20, 60, 40, 60]
127 | 
128 | 
129 | def test_is_aa_msa():
130 |     """Test `aa` or `nt` MSA check"""
131 |     # Case1: AA MSA
132 |     aa_msa = MultipleSeqAlignment(
133 |         [
134 |             SeqRecord(Seq("MFLTALLCRGRI")),
135 |             SeqRecord(Seq("MFLT---TRGVI")),
136 |         ]
137 |     )
138 |     assert MsaViz(aa_msa)._is_aa_msa() is True
139 | 
140 |     # Case2: NT MSA
141 |     nt_msa = MultipleSeqAlignment(
142 |         [
143 |             SeqRecord(Seq("ATGC--TGCA")),
144 |             SeqRecord(Seq("AAGCTCTGCA")),
145 |         ]
146 |     )
147 |     assert MsaViz(nt_msa)._is_aa_msa() is False
148 | 
149 | 
150 | def test_parse_positions(dummy_msa: MultipleSeqAlignment):
151 |     """Test parse_positions"""
152 |     mv = MsaViz(dummy_msa)
153 |     # Case1: int value
154 |     assert mv._parse_positions([1]) == [0]
155 |     # Case2: int values
156 |     assert mv._parse_positions([1, 5, 10, 20]) == [0, 4, 9, 19]
157 |     # Case3: tuple range
158 |     assert mv._parse_positions([(5, 9)]) == [4, 5, 6, 7, 8]
159 |     # Case4: int values & tuple range
160 |     assert mv._parse_positions([1, 5, (10, 13), 18]) == [0, 4, 9, 10, 11, 12, 17]
161 | 
162 | 
163 | def test_get_msa_testdata():
164 |     """Test get_msa_testdata"""
165 |     assert get_msa_testdata().exists()
166 |     assert get_msa_testdata("HIGD2A.fa").exists()
167 |     with pytest.raises(ValueError):
168 |         get_msa_testdata("invalid_name")
169 | 


--------------------------------------------------------------------------------
/src/pymsaviz/scripts/cli.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import argparse
  4 | from pathlib import Path
  5 | 
  6 | from pymsaviz import MsaViz, __version__
  7 | 
  8 | 
  9 | def main():
 10 |     """Main function called from CLI"""
 11 |     args = get_args()
 12 |     run(**args.__dict__)
 13 | 
 14 | 
 15 | def run(
 16 |     infile: str | Path,
 17 |     outfile: str | Path,
 18 |     format: str = "fasta",
 19 |     color_scheme: str = "Zappo",
 20 |     start: int = 1,
 21 |     end: int | None = None,
 22 |     wrap_length: int | None = None,
 23 |     wrap_space_size: float = 3.0,
 24 |     label_type: str = "id",
 25 |     show_grid: bool = False,
 26 |     show_count: bool = False,
 27 |     show_consensus: bool = False,
 28 |     consensus_color: str = "#1f77b4",
 29 |     consensus_size: float = 2.0,
 30 |     sort: bool = False,
 31 |     dpi: int = 300,
 32 | ):
 33 |     """Run MSA visualization"""
 34 |     mv = MsaViz(
 35 |         msa=infile,
 36 |         format=format,
 37 |         start=start,
 38 |         end=end,
 39 |         wrap_length=wrap_length,
 40 |         wrap_space_size=wrap_space_size,
 41 |         label_type=label_type,
 42 |         color_scheme=color_scheme,
 43 |         show_grid=show_grid,
 44 |         show_count=show_count,
 45 |         show_consensus=show_consensus,
 46 |         consensus_color=consensus_color,
 47 |         consensus_size=consensus_size,
 48 |         sort=sort,
 49 |     )
 50 |     mv.savefig(outfile, dpi=dpi)
 51 | 
 52 | 
 53 | def get_args() -> argparse.Namespace:
 54 |     """Get arguments
 55 | 
 56 |     Returns
 57 |     -------
 58 |     args : argparse.Namespace
 59 |         Argument parameters
 60 |     """
 61 |     description = "MSA(Multiple Sequence Alignment) visualization CLI tool"
 62 |     parser = argparse.ArgumentParser(
 63 |         description=description,
 64 |         usage="pymsaviz [options] -i msa.fa -o msa_viz.png",
 65 |         add_help=False,
 66 |         epilog=f"Available Color Schemes:\n{MsaViz.available_color_schemes()}",
 67 |         formatter_class=argparse.RawDescriptionHelpFormatter,
 68 |     )
 69 | 
 70 |     parser.add_argument(
 71 |         "-i",
 72 |         "--infile",
 73 |         type=Path,
 74 |         help="Input MSA file",
 75 |         metavar="I",
 76 |     )
 77 |     parser.add_argument(
 78 |         "-o",
 79 |         "--outfile",
 80 |         type=Path,
 81 |         help="Output MSA visualization file (*.png|*.jpg|*.svg|*.pdf)",
 82 |         required=True,
 83 |         metavar="O",
 84 |     )
 85 |     default_msa_format = "fasta"
 86 |     parser.add_argument(
 87 |         "--format",
 88 |         type=str,
 89 |         help=f"MSA file format (Default: '{default_msa_format}')",
 90 |         default=default_msa_format,
 91 |         metavar="",
 92 |     )
 93 |     default_color_scheme = "Zappo"
 94 |     parser.add_argument(
 95 |         "--color_scheme",
 96 |         type=str,
 97 |         help=f"Color scheme (Default: '{default_color_scheme}')",
 98 |         default=default_color_scheme,
 99 |         choices=MsaViz.available_color_schemes(),
100 |         metavar="",
101 |     )
102 |     default_start = 1
103 |     parser.add_argument(
104 |         "--start",
105 |         type=int,
106 |         help=f"Start position of MSA visualization (Default: {default_start})",
107 |         default=default_start,
108 |         metavar="",
109 |     )
110 |     default_end = None
111 |     parser.add_argument(
112 |         "--end",
113 |         type=int,
114 |         help="End position of MSA visualization (Default: 'MSA Length')",
115 |         default=default_end,
116 |         metavar="",
117 |     )
118 |     default_wrap_length = None
119 |     parser.add_argument(
120 |         "--wrap_length",
121 |         type=int,
122 |         help=f"Wrap length (Default: {default_wrap_length})",
123 |         default=default_wrap_length,
124 |         metavar="",
125 |     )
126 |     default_wrap_space_size = 3.0
127 |     parser.add_argument(
128 |         "--wrap_space_size",
129 |         type=float,
130 |         help="Space size between wrap MSA plot area "
131 |         f"(Default: {default_wrap_space_size})",
132 |         default=default_wrap_space_size,
133 |         metavar="",
134 |     )
135 |     default_label_type = "id"
136 |     parser.add_argument(
137 |         "--label_type",
138 |         type=str,
139 |         help="Label type ('id'[default]|'description')",
140 |         default=default_label_type,
141 |         choices=("id", "description"),
142 |         metavar="",
143 |     )
144 |     parser.add_argument(
145 |         "--show_grid",
146 |         help="Show grid (Default: OFF)",
147 |         action="store_true",
148 |     )
149 |     parser.add_argument(
150 |         "--show_count",
151 |         help="Show seq char count without gap on right side (Default: OFF)",
152 |         action="store_true",
153 |     )
154 |     parser.add_argument(
155 |         "--show_consensus",
156 |         help="Show consensus sequence (Default: OFF)",
157 |         action="store_true",
158 |     )
159 |     default_consensus_color = "#1f77b4"
160 |     parser.add_argument(
161 |         "--consensus_color",
162 |         type=str,
163 |         help=f"Consensus identity bar color (Default: '{default_consensus_color}')",
164 |         default=default_consensus_color,
165 |         metavar="",
166 |     )
167 |     default_consensus_size = 2.0
168 |     parser.add_argument(
169 |         "--consensus_size",
170 |         type=float,
171 |         help=f"Consensus identity bar height size (Default: {default_consensus_size})",
172 |         default=default_consensus_size,
173 |         metavar="",
174 |     )
175 |     parser.add_argument(
176 |         "--sort",
177 |         help="Sort MSA order by NJ tree constructed from MSA distance matrix "
178 |         "(Default: OFF)",
179 |         action="store_true",
180 |     )
181 |     default_dpi = 300
182 |     parser.add_argument(
183 |         "--dpi",
184 |         type=int,
185 |         help=f"Figure DPI (Default: {default_dpi})",
186 |         default=default_dpi,
187 |         metavar="",
188 |     )
189 |     parser.add_argument(
190 |         "-v",
191 |         "--version",
192 |         version=f"v{__version__}",
193 |         help="Print version information",
194 |         action="version",
195 |     )
196 |     parser.add_argument(
197 |         "-h",
198 |         "--help",
199 |         help="Show this help message and exit",
200 |         action="help",
201 |     )
202 |     return parser.parse_args()
203 | 
204 | 
205 | if __name__ == "__main__":
206 |     main()
207 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # pyMSAviz
  2 | 
  3 | ![Python3](https://img.shields.io/badge/Language-Python3-steelblue)
  4 | ![OS](https://img.shields.io/badge/OS-_Windows_|_Mac_|_Linux-steelblue)
  5 | ![License](https://img.shields.io/badge/License-MIT-steelblue)
  6 | [![Latest PyPI version](https://img.shields.io/pypi/v/pymsaviz.svg)](https://pypi.python.org/pypi/pymsaviz)
  7 | [![Bioconda](https://img.shields.io/conda/vn/bioconda/pymsaviz.svg?color=green)](https://anaconda.org/bioconda/pymsaviz)
  8 | [![CI](https://github.com/moshi4/pyMSAviz/actions/workflows/ci.yml/badge.svg)](https://github.com/moshi4/pyMSAviz/actions/workflows/ci.yml)
  9 | 
 10 | ## Table of contents
 11 | 
 12 | - [Overview](#overview)
 13 | - [Installation](#installation)
 14 | - [API Usage](#api-usage)
 15 | - [CLI Usage](#cli-usage)
 16 | 
 17 | ## Overview
 18 | 
 19 | pyMSAviz is a MSA(Multiple Sequence Alignment) visualization python package for sequence analysis implemented based on matplotlib.
 20 | This package is developed for the purpose of easily and beautifully plotting MSA in Python.
 21 | It also implements the functionality to add markers, text annotations, highlights to specific positions and ranges in MSA.
 22 | pyMSAviz was developed inspired by [Jalview](https://www.jalview.org/) and [ggmsa](https://github.com/YuLab-SMU/ggmsa).
 23 | More detailed documentation is available [here](https://moshi4.github.io/pyMSAviz/).
 24 | 
 25 | ![example01.png](https://raw.githubusercontent.com/moshi4/pyMSAviz/main/docs/images/api_example01.png)  
 26 | **Fig.1 Simple visualization result**
 27 | 
 28 | ![example03.png](https://raw.githubusercontent.com/moshi4/pyMSAviz/main/docs/images/api_example03.png)  
 29 | **Fig.2 Customized visualization result**
 30 | 
 31 | ## Installation
 32 | 
 33 | `Python 3.9 or later` is required for installation.
 34 | 
 35 | **Install PyPI package:**
 36 | 
 37 |     pip install pymsaviz
 38 | 
 39 | **Install bioconda package:**
 40 | 
 41 |     conda install -c conda-forge -c bioconda pymsaviz
 42 | 
 43 | ## API Usage
 44 | 
 45 | Only simple example usage is described in this section.
 46 | For more details, please see [Getting Started](https://moshi4.github.io/pyMSAviz/getting_started/) and [API Docs](https://moshi4.github.io/pyMSAviz/api-docs/msaviz/).
 47 | 
 48 | ### API Example
 49 | 
 50 | #### API Example 1
 51 | 
 52 | ```python
 53 | from pymsaviz import MsaViz, get_msa_testdata
 54 | 
 55 | msa_file = get_msa_testdata("HIGD2A.fa")
 56 | mv = MsaViz(msa_file, wrap_length=60, show_count=True)
 57 | mv.savefig("api_example01.png")
 58 | ```
 59 | 
 60 | ![example01.png](https://raw.githubusercontent.com/moshi4/pyMSAviz/main/docs/images/api_example01.png)  
 61 | 
 62 | #### API Example 2
 63 | 
 64 | ```python
 65 | from pymsaviz import MsaViz, get_msa_testdata
 66 | 
 67 | msa_file = get_msa_testdata("MRGPRG.fa")
 68 | mv = MsaViz(msa_file, color_scheme="Taylor", wrap_length=80, show_grid=True, show_consensus=True)
 69 | mv.savefig("api_example02.png")
 70 | ```
 71 | 
 72 | ![example02.png](https://raw.githubusercontent.com/moshi4/pyMSAviz/main/docs/images/api_example02.png)  
 73 | 
 74 | #### API Example 3
 75 | 
 76 | ```python
 77 | from pymsaviz import MsaViz, get_msa_testdata
 78 | 
 79 | msa_file = get_msa_testdata("MRGPRG.fa")
 80 | mv = MsaViz(msa_file, end=180, wrap_length=60, show_consensus=True)
 81 | 
 82 | # Extract MSA positions less than 50% consensus identity
 83 | pos_ident_less_than_50 = []
 84 | ident_list = mv._get_consensus_identity_list()
 85 | for pos, ident in enumerate(ident_list, 1):
 86 |     if ident <= 50:
 87 |         pos_ident_less_than_50.append(pos)
 88 | 
 89 | # Add markers
 90 | mv.add_markers([1])
 91 | mv.add_markers([10, 20], color="orange", marker="o")
 92 | mv.add_markers([30, (40, 50), 55], color="green", marker="+")
 93 | mv.add_markers(pos_ident_less_than_50, marker="x", color="blue")
 94 | # Add text annotations
 95 | mv.add_text_annotation((76, 102), "Gap Region", text_color="red", range_color="red")
 96 | mv.add_text_annotation((112, 123), "Gap Region", text_color="green", range_color="green")
 97 | 
 98 | mv.savefig("api_example03.png")
 99 | ```
100 | 
101 | ![example03.png](https://raw.githubusercontent.com/moshi4/pyMSAviz/main/docs/images/api_example03.png)  
102 | 
103 | ## CLI Usage
104 | 
105 | pyMSAviz provides simple MSA visualization CLI.
106 | 
107 | ### Basic Command
108 | 
109 |     pymsaviz -i [MSA file] -o [MSA visualization file]
110 | 
111 | ### Options
112 | 
113 |     $ pymsaviz --help
114 |     usage: pymsaviz [options] -i msa.fa -o msa_viz.png
115 | 
116 |     MSA(Multiple Sequence Alignment) visualization CLI tool
117 | 
118 |     optional arguments:
119 |       -i I, --infile I    Input MSA file
120 |       -o O, --outfile O   Output MSA visualization file (*.png|*.jpg|*.svg|*.pdf)
121 |       --format            MSA file format (Default: 'fasta')
122 |       --color_scheme      Color scheme (Default: 'Zappo')
123 |       --start             Start position of MSA visualization (Default: 1)
124 |       --end               End position of MSA visualization (Default: 'MSA Length')
125 |       --wrap_length       Wrap length (Default: None)
126 |       --wrap_space_size   Space size between wrap MSA plot area (Default: 3.0)
127 |       --label_type        Label type ('id'[default]|'description')
128 |       --show_grid         Show grid (Default: OFF)
129 |       --show_count        Show seq char count without gap on right side (Default: OFF)
130 |       --show_consensus    Show consensus sequence (Default: OFF)
131 |       --consensus_color   Consensus identity bar color (Default: '#1f77b4')
132 |       --consensus_size    Consensus identity bar height size (Default: 2.0)
133 |       --sort              Sort MSA order by NJ tree constructed from MSA distance matrix (Default: OFF)
134 |       --dpi               Figure DPI (Default: 300)
135 |       -v, --version       Print version information
136 |       -h, --help          Show this help message and exit
137 | 
138 |     Available Color Schemes:
139 |     ['Clustal', 'Zappo', 'Taylor', 'Flower', 'Blossom', 'Sunset', 'Ocean', 'Hydrophobicity', 'HelixPropensity', 'StrandPropensity', 'TurnPropensity', 'BuriedIndex', 'Nucleotide', 'Purine/Pyrimidine', 'Identity', 'None']
140 | 
141 | ### CLI Example
142 | 
143 | Click [here](https://github.com/moshi4/pyMSAviz/raw/main/example/example.zip) to download example MSA files.  
144 | 
145 | #### CLI Example 1
146 | 
147 |     pymsaviz -i ./example/HIGD2A.fa -o cli_example01.png --color_scheme Identity
148 | 
149 | ![example01.png](https://raw.githubusercontent.com/moshi4/pyMSAviz/main/docs/images/cli_example01.png)  
150 | 
151 | #### CLI Example 2
152 | 
153 |     pymsaviz -i ./example/MRGPRG.fa -o cli_example02.png --wrap_length 80 \
154 |              --color_scheme Taylor --show_consensus --show_count
155 | 
156 | ![example02.png](https://raw.githubusercontent.com/moshi4/pyMSAviz/main/docs/images/cli_example02.png)  
157 | 
158 | #### CLI Example 3
159 | 
160 |     pymsaviz -i ./example/MRGPRG.fa -o cli_example03.png --start 100 --end 160 \
161 |              --color_scheme Flower --show_grid --show_consensus --consensus_color tomato 
162 | 
163 | ![example03.png](https://raw.githubusercontent.com/moshi4/pyMSAviz/main/docs/images/cli_example03.png)  
164 | 
165 | ## Star History
166 | 
167 | [![Star History Chart](https://api.star-history.com/svg?repos=moshi4/pyMSAviz&type=Date)](https://star-history.com/#moshi4/pyMSAviz&Date)
168 | 


--------------------------------------------------------------------------------
/requirements-dev.lock:
--------------------------------------------------------------------------------
  1 | # generated by rye
  2 | # use `rye lock` or `rye sync` to update this lockfile
  3 | #
  4 | # last locked with the following flags:
  5 | #   pre: false
  6 | #   features: []
  7 | #   all-features: true
  8 | #   with-sources: false
  9 | #   generate-hashes: false
 10 | #   universal: false
 11 | 
 12 | -e file:.
 13 | asttokens==3.0.0
 14 |     # via stack-data
 15 | attrs==24.2.0
 16 |     # via jsonschema
 17 |     # via referencing
 18 | babel==2.16.0
 19 |     # via mkdocs-material
 20 | beautifulsoup4==4.12.3
 21 |     # via nbconvert
 22 | biopython==1.84
 23 |     # via pymsaviz
 24 | black==24.10.0
 25 | bleach==6.2.0
 26 |     # via nbconvert
 27 | certifi==2024.8.30
 28 |     # via requests
 29 | cfgv==3.4.0
 30 |     # via pre-commit
 31 | charset-normalizer==3.4.0
 32 |     # via requests
 33 | click==8.1.7
 34 |     # via black
 35 |     # via mkdocs
 36 |     # via mkdocstrings
 37 | colorama==0.4.6
 38 |     # via griffe
 39 |     # via mkdocs-material
 40 | comm==0.2.2
 41 |     # via ipykernel
 42 | contourpy==1.3.0
 43 |     # via matplotlib
 44 | coverage==7.6.8
 45 |     # via pytest-cov
 46 | cycler==0.12.1
 47 |     # via matplotlib
 48 | debugpy==1.8.9
 49 |     # via ipykernel
 50 | decorator==5.1.1
 51 |     # via ipython
 52 | defusedxml==0.7.1
 53 |     # via nbconvert
 54 | distlib==0.3.9
 55 |     # via virtualenv
 56 | exceptiongroup==1.2.2
 57 |     # via ipython
 58 |     # via pytest
 59 | executing==2.1.0
 60 |     # via stack-data
 61 | fastjsonschema==2.21.0
 62 |     # via nbformat
 63 | filelock==3.16.1
 64 |     # via virtualenv
 65 | fonttools==4.55.0
 66 |     # via matplotlib
 67 | ghp-import==2.1.0
 68 |     # via mkdocs
 69 | griffe==1.5.1
 70 |     # via mkdocstrings-python
 71 | identify==2.6.3
 72 |     # via pre-commit
 73 | idna==3.10
 74 |     # via requests
 75 | importlib-metadata==8.5.0
 76 |     # via jupyter-client
 77 |     # via markdown
 78 |     # via mkdocs
 79 |     # via mkdocs-get-deps
 80 |     # via mkdocstrings
 81 |     # via nbconvert
 82 | importlib-resources==6.4.5
 83 |     # via matplotlib
 84 | iniconfig==2.0.0
 85 |     # via pytest
 86 | ipykernel==6.29.5
 87 |     # via mkdocs-jupyter
 88 | ipython==8.18.1
 89 |     # via ipykernel
 90 | jedi==0.19.2
 91 |     # via ipython
 92 | jinja2==3.1.4
 93 |     # via mkdocs
 94 |     # via mkdocs-material
 95 |     # via mkdocstrings
 96 |     # via nbconvert
 97 | jsonschema==4.23.0
 98 |     # via nbformat
 99 | jsonschema-specifications==2024.10.1
100 |     # via jsonschema
101 | jupyter-client==8.6.3
102 |     # via ipykernel
103 |     # via nbclient
104 | jupyter-core==5.7.2
105 |     # via ipykernel
106 |     # via jupyter-client
107 |     # via nbclient
108 |     # via nbconvert
109 |     # via nbformat
110 | jupyterlab-pygments==0.3.0
111 |     # via nbconvert
112 | jupytext==1.16.4
113 |     # via mkdocs-jupyter
114 | kiwisolver==1.4.7
115 |     # via matplotlib
116 | markdown==3.7
117 |     # via mkdocs
118 |     # via mkdocs-autorefs
119 |     # via mkdocs-material
120 |     # via mkdocstrings
121 |     # via pymdown-extensions
122 | markdown-it-py==3.0.0
123 |     # via jupytext
124 |     # via mdit-py-plugins
125 | markupsafe==3.0.2
126 |     # via jinja2
127 |     # via mkdocs
128 |     # via mkdocs-autorefs
129 |     # via mkdocstrings
130 |     # via nbconvert
131 | matplotlib==3.9.3
132 |     # via pymsaviz
133 | matplotlib-inline==0.1.7
134 |     # via ipykernel
135 |     # via ipython
136 | mdit-py-plugins==0.4.2
137 |     # via jupytext
138 | mdurl==0.1.2
139 |     # via markdown-it-py
140 | mergedeep==1.3.4
141 |     # via mkdocs
142 |     # via mkdocs-get-deps
143 | mistune==3.0.2
144 |     # via nbconvert
145 | mkdocs==1.6.1
146 |     # via mkdocs-autorefs
147 |     # via mkdocs-jupyter
148 |     # via mkdocs-material
149 |     # via mkdocstrings
150 | mkdocs-autorefs==1.2.0
151 |     # via mkdocstrings
152 |     # via mkdocstrings-python
153 | mkdocs-get-deps==0.2.0
154 |     # via mkdocs
155 | mkdocs-jupyter==0.25.1
156 | mkdocs-material==9.5.46
157 |     # via mkdocs-jupyter
158 | mkdocs-material-extensions==1.3.1
159 |     # via mkdocs-material
160 | mkdocstrings==0.27.0
161 |     # via mkdocstrings-python
162 | mkdocstrings-python==1.12.2
163 |     # via mkdocstrings
164 | mypy-extensions==1.0.0
165 |     # via black
166 | nbclient==0.10.1
167 |     # via nbconvert
168 | nbconvert==7.16.4
169 |     # via mkdocs-jupyter
170 | nbformat==5.10.4
171 |     # via jupytext
172 |     # via nbclient
173 |     # via nbconvert
174 | nest-asyncio==1.6.0
175 |     # via ipykernel
176 | nodeenv==1.9.1
177 |     # via pre-commit
178 | numpy==2.0.2
179 |     # via biopython
180 |     # via contourpy
181 |     # via matplotlib
182 | packaging==24.2
183 |     # via black
184 |     # via ipykernel
185 |     # via jupytext
186 |     # via matplotlib
187 |     # via mkdocs
188 |     # via nbconvert
189 |     # via pytest
190 | paginate==0.5.7
191 |     # via mkdocs-material
192 | pandocfilters==1.5.1
193 |     # via nbconvert
194 | parso==0.8.4
195 |     # via jedi
196 | pathspec==0.12.1
197 |     # via black
198 |     # via mkdocs
199 | pexpect==4.9.0
200 |     # via ipython
201 | pillow==11.0.0
202 |     # via matplotlib
203 | platformdirs==4.3.6
204 |     # via black
205 |     # via jupyter-core
206 |     # via mkdocs-get-deps
207 |     # via mkdocstrings
208 |     # via virtualenv
209 | pluggy==1.5.0
210 |     # via pytest
211 | pre-commit==4.0.1
212 | prompt-toolkit==3.0.48
213 |     # via ipython
214 | psutil==6.1.0
215 |     # via ipykernel
216 | ptyprocess==0.7.0
217 |     # via pexpect
218 | pure-eval==0.2.3
219 |     # via stack-data
220 | pygments==2.18.0
221 |     # via ipython
222 |     # via mkdocs-jupyter
223 |     # via mkdocs-material
224 |     # via nbconvert
225 | pymdown-extensions==10.12
226 |     # via mkdocs-material
227 |     # via mkdocstrings
228 | pyparsing==3.2.0
229 |     # via matplotlib
230 | pytest==8.3.3
231 |     # via pytest-cov
232 | pytest-cov==6.0.0
233 | python-dateutil==2.9.0.post0
234 |     # via ghp-import
235 |     # via jupyter-client
236 |     # via matplotlib
237 | pyyaml==6.0.2
238 |     # via jupytext
239 |     # via mkdocs
240 |     # via mkdocs-get-deps
241 |     # via pre-commit
242 |     # via pymdown-extensions
243 |     # via pyyaml-env-tag
244 | pyyaml-env-tag==0.1
245 |     # via mkdocs
246 | pyzmq==26.2.0
247 |     # via ipykernel
248 |     # via jupyter-client
249 | referencing==0.35.1
250 |     # via jsonschema
251 |     # via jsonschema-specifications
252 | regex==2024.11.6
253 |     # via mkdocs-material
254 | requests==2.32.3
255 |     # via mkdocs-material
256 | rpds-py==0.21.0
257 |     # via jsonschema
258 |     # via referencing
259 | ruff==0.8.1
260 | six==1.16.0
261 |     # via python-dateutil
262 | soupsieve==2.6
263 |     # via beautifulsoup4
264 | stack-data==0.6.3
265 |     # via ipython
266 | tinycss2==1.4.0
267 |     # via nbconvert
268 | tomli==2.2.1
269 |     # via black
270 |     # via coverage
271 |     # via jupytext
272 |     # via pytest
273 | tornado==6.4.2
274 |     # via ipykernel
275 |     # via jupyter-client
276 | traitlets==5.14.3
277 |     # via comm
278 |     # via ipykernel
279 |     # via ipython
280 |     # via jupyter-client
281 |     # via jupyter-core
282 |     # via matplotlib-inline
283 |     # via nbclient
284 |     # via nbconvert
285 |     # via nbformat
286 | typing-extensions==4.12.2
287 |     # via black
288 |     # via ipython
289 |     # via mkdocstrings
290 | urllib3==2.2.3
291 |     # via requests
292 | virtualenv==20.28.0
293 |     # via pre-commit
294 | watchdog==6.0.0
295 |     # via mkdocs
296 | wcwidth==0.2.13
297 |     # via prompt-toolkit
298 | webencodings==0.5.1
299 |     # via bleach
300 |     # via tinycss2
301 | zipp==3.21.0
302 |     # via importlib-metadata
303 |     # via importlib-resources
304 | 


--------------------------------------------------------------------------------
/src/pymsaviz/msaviz.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import math
  4 | from collections import Counter
  5 | from io import StringIO
  6 | from pathlib import Path
  7 | from typing import Any, Callable
  8 | from urllib.parse import urlparse
  9 | from urllib.request import urlopen
 10 | 
 11 | import matplotlib.pyplot as plt
 12 | from Bio import AlignIO
 13 | from Bio.AlignIO import MultipleSeqAlignment as MSA
 14 | from Bio.Phylo.BaseTree import Tree
 15 | from Bio.Phylo.TreeConstruction import DistanceCalculator, DistanceTreeConstructor
 16 | from Bio.SeqRecord import SeqRecord
 17 | from matplotlib import colors
 18 | from matplotlib.axes import Axes
 19 | from matplotlib.collections import PatchCollection
 20 | from matplotlib.colors import is_color_like
 21 | from matplotlib.figure import Figure
 22 | from matplotlib.gridspec import GridSpec
 23 | from matplotlib.patches import Rectangle
 24 | 
 25 | from pymsaviz.config import COLOR_SCHEMES, AxesType
 26 | 
 27 | 
 28 | class MsaViz:
 29 |     """Multiple Sequence Alignment Visualization"""
 30 | 
 31 |     def __init__(
 32 |         self,
 33 |         msa: str | Path | MSA,
 34 |         *,
 35 |         format: str = "fasta",
 36 |         color_scheme: str | None = None,
 37 |         start: int = 1,
 38 |         end: int | None = None,
 39 |         wrap_length: int | None = None,
 40 |         wrap_space_size: float = 3.0,
 41 |         show_label: bool = True,
 42 |         label_type: str = "id",
 43 |         show_seq_char: bool = True,
 44 |         show_grid: bool = False,
 45 |         show_count: bool = False,
 46 |         show_consensus: bool = False,
 47 |         consensus_color: str = "#1f77b4",
 48 |         consensus_size: float = 2.0,
 49 |         sort: bool = False,
 50 |     ):
 51 |         """
 52 |         Parameters
 53 |         ----------
 54 |         msa : str | Path | MultipleSeqAlignment
 55 |             MSA file, URL MSA file, MSA object
 56 |         format : str, optional
 57 |             Alignment file format (e.g. `fasta`, `phylip`, `clustal`, `emboss`, etc...)
 58 |         color_scheme : str | None, optional
 59 |             Color scheme. If None, `Zappo`(AA) or `Nucleotide`(NT) is set.
 60 |             [`Clustal`|`Zappo`|`Taylor`|`Flower`|`Blossom`|`Sunset`|`Ocean`|
 61 |             `Hydrophobicity`|`HelixPropensity`|`StrandPropensity`|`TurnPropensity`|
 62 |             `BuriedIndex`|`Nucleotide`|`Purine/Pyrimidine`|`Identity`|`None`]
 63 |         start : int, optional
 64 |             Start position of visualization (one-based coordinates)
 65 |         end : int | None, optional
 66 |             End position of visualization (one-based coordinates)
 67 |         wrap_length : int | None, optional
 68 |             Wrap sequence length. If None, no wrapping sequence.
 69 |         wrap_space_size: float, optional
 70 |             Space size between wrap MSA plot area
 71 |         show_label : bool, optional
 72 |             If True, show label
 73 |         label_type : str, optional
 74 |             Label type (`id`|`description`) to be shown when show_label=True.
 75 |             If `label_type="id"`, show omitted id label.
 76 |             If `label_type="description"`, show full description label.
 77 |         show_seq_char : bool, optional
 78 |             If True, show sequence character
 79 |         show_grid : bool, optional
 80 |             If True, show grid
 81 |         show_count : bool, optional
 82 |             If True, show seq char count without gap on right side
 83 |         show_consensus : bool, optional
 84 |             If True, show consensus sequence
 85 |         consensus_color : str, optional
 86 |             Consensus identity bar color
 87 |         consensus_size : float, optional
 88 |             Consensus identity bar height size
 89 |         sort : bool, optional
 90 |             Sort MSA order by NJ tree constructed from MSA distance matrix
 91 |         """
 92 |         # Load MSA
 93 |         if isinstance(msa, MSA):
 94 |             self._msa = msa
 95 |         elif isinstance(msa, str) and urlparse(msa).scheme in ("http", "https"):
 96 |             content = urlopen(msa).read().decode("utf-8")
 97 |             self._msa = AlignIO.read(StringIO(content), format)
 98 |         else:
 99 |             self._msa: MSA = AlignIO.read(msa, format)
100 |         if sort:
101 |             self._msa = self._sorted_msa_by_njtree(self._msa)
102 |         self._consensus_seq = self._get_consensus_seq(self._msa)
103 |         self._color_scheme_name = color_scheme
104 | 
105 |         # Check & Set start, end position
106 |         end = self.alignment_length if end is None else end
107 |         if not 1 <= start <= end <= self.alignment_length:
108 |             err_msg = f"{start=}, {end=} is invalid MSA range "
109 |             err_msg += f"(1 <= start <= end <= {self.alignment_length})"
110 |             raise ValueError(err_msg)
111 |         self._start, self._end = start - 1, end
112 |         self._length = self._end - self._start
113 | 
114 |         # Set user-specified plot configs
115 |         if wrap_length in (0, None) or wrap_length > self._length:
116 |             self._wrap_length = self._length
117 |         else:
118 |             self._wrap_length = wrap_length
119 |         self._wrap_space_size = wrap_space_size
120 |         self._show_seq_char = show_seq_char
121 |         self._show_label = show_label
122 |         self._label_type = label_type
123 |         self._show_grid = show_grid
124 |         self._show_count = show_count
125 |         self._show_consensus = show_consensus
126 |         self._consensus_color = consensus_color
127 |         self._consensus_size = consensus_size
128 |         self._highlight_positions = None
129 |         self._custom_color_func: Callable[[int, int, str, MSA], str | None] | None = (
130 |             None
131 |         )
132 |         self._pos2marker_kws: dict[int, dict[str, Any]] = {}
133 |         self._pos2text_kws: dict[int, dict[str, Any]] = {}
134 |         self.set_plot_params()
135 | 
136 |         # Set color scheme
137 |         if color_scheme is None:
138 |             color_scheme = "Zappo" if self._is_aa_msa() else "Nucleotide"
139 |         if color_scheme not in self.available_color_schemes():
140 |             err_msg = f"{color_scheme=} is invalid.\n"
141 |             err_msg += f"Available color scheme = {self.available_color_schemes()}"
142 |             raise ValueError(err_msg)
143 |         self._color_scheme = COLOR_SCHEMES[color_scheme]
144 | 
145 |     ############################################################
146 |     # Property
147 |     ############################################################
148 | 
149 |     @property
150 |     def msa(self) -> MSA:
151 |         """Multiple Sequence Alignment object (BioPython)"""
152 |         return self._msa
153 | 
154 |     @property
155 |     def msa_count(self) -> int:
156 |         """MSA count"""
157 |         return len(self._msa)
158 | 
159 |     @property
160 |     def alignment_length(self) -> int:
161 |         """Alignment length"""
162 |         return self._msa.get_alignment_length()
163 | 
164 |     @property
165 |     def id_list(self) -> list[str]:
166 |         """MSA ID list"""
167 |         return [rec.id for rec in self._msa]
168 | 
169 |     @property
170 |     def desc_list(self) -> list[str]:
171 |         """MSA description list"""
172 |         return [rec.description for rec in self._msa]
173 | 
174 |     @property
175 |     def seq_list(self) -> list[str]:
176 |         """MSA sequence list"""
177 |         return [str(rec.seq) for rec in self._msa]
178 | 
179 |     @property
180 |     def wrap_num(self) -> int:
181 |         """Wrap number"""
182 |         if self._wrap_length is None:
183 |             return 0
184 |         else:
185 |             return math.ceil(self._length / self._wrap_length) - 1
186 | 
187 |     @property
188 |     def consensus_seq(self) -> str:
189 |         """Consensus sequence"""
190 |         return self._consensus_seq
191 | 
192 |     @property
193 |     def color_scheme(self) -> dict[str, str]:
194 |         """Color scheme"""
195 |         return self._color_scheme
196 | 
197 |     ############################################################
198 |     # Public Method
199 |     ############################################################
200 | 
201 |     @staticmethod
202 |     def available_color_schemes() -> list[str]:
203 |         """Get available color schemes
204 | 
205 |         Returns
206 |         -------
207 |         color_scheme_names : list[str]
208 |             Available color schemes
209 |         """
210 |         return list(COLOR_SCHEMES.keys())
211 | 
212 |     def set_plot_params(
213 |         self,
214 |         *,
215 |         ticks_interval: int | None = 10,
216 |         x_unit_size: float = 0.14,
217 |         y_unit_size: float = 0.20,
218 |         grid_color: str = "lightgrey",
219 |         show_consensus_char: bool = True,
220 |         identity_color: str = "#A3A5FF",
221 |         identity_color_min_thr: float = 30,
222 |     ) -> None:
223 |         """Set plot parameters to adjust figure appearence in detail
224 | 
225 |         Parameters
226 |         ----------
227 |         ticks_interval : int | None, optional
228 |             Ticks interval. If None, ticks interval is not displayed.
229 |         x_unit_size : float, optional
230 |             X-axis unit size of seq char rectangle
231 |         y_unit_size : float, optional
232 |             Y-axis unit size of seq char rectangle
233 |         grid_color : str, optional
234 |             Grid color
235 |         show_consensus_char : bool, optional
236 |             If True, show consensus character
237 |         identity_color : str, optional
238 |             Base color for `Identity` color scheme
239 |         identity_color_min_thr : float, optional
240 |             Min identity color threshold for `Identity` color scheme
241 |         """
242 |         self._ticks_interval = ticks_interval
243 |         self._x_unit_size = x_unit_size
244 |         self._y_unit_size = y_unit_size
245 |         self._grid_color = grid_color
246 |         self._show_consensus_char = show_consensus_char
247 |         self._identity_color = identity_color
248 |         self._identity_color_min_thr = identity_color_min_thr
249 | 
250 |     def set_custom_color_scheme(self, color_scheme: dict[str, str]) -> None:
251 |         """Set user-defined custom color scheme (Overwrite color scheme setting)
252 | 
253 |         Parameters
254 |         ----------
255 |         color_scheme : dict[str, str]
256 |             Custom color scheme dict (e.g. `{"A": "red", "R": "#F01505", ...}`)
257 |         """
258 |         if isinstance(color_scheme, dict):
259 |             if not all(map(is_color_like, color_scheme.values())):
260 |                 raise ValueError(f"{color_scheme=} contains invalid color code.")
261 |             self._color_scheme = color_scheme
262 |         else:
263 |             raise ValueError(f"{color_scheme=} is not dict type.")
264 | 
265 |     def set_custom_color_func(
266 |         self,
267 |         custom_color_func: Callable[[int, int, str, MSA], str | None],
268 |     ):
269 |         """Set user-defined custom color func (Overwrite all other color setting)
270 | 
271 |         User can change the color of each residue specified
272 |         by the row and column position of the MSA.
273 | 
274 |         Parameters
275 |         ----------
276 |         custom_color_func : Callable[[int, int, str, MSA], str | None]
277 |             Custom color function.
278 |             `Callable[[int, int, str, MSA], str | None]` means
279 |             `Callable[[row_pos, col_pos, seq_char, msa], hexcolor | None]`
280 |         """
281 |         self._custom_color_func = custom_color_func
282 | 
283 |     def set_highlight_pos(self, positions: list[tuple[int, int] | int]) -> None:
284 |         """Set user-defined highlight MSA positions
285 | 
286 |         Parameters
287 |         ----------
288 |         positions : list[tuple[int, int] | int]
289 |             Highlight positions. int and tuple range mixture positions can be specified.
290 |             (e.g. If `[1, 5, (10, 13), 18]` is set, `1, 5, 10, 11, 12, 13, 18`
291 |             positions are highlighted)
292 |         """
293 |         self._highlight_positions = self._parse_positions(positions)
294 | 
295 |     def set_highlight_pos_by_ident_thr(
296 |         self, min_thr: float = 0, max_thr: float = 100
297 |     ) -> None:
298 |         """Set highlight MSA positions by consensus identity threshold
299 | 
300 |         Parameters
301 |         ----------
302 |         min_thr : float, optional
303 |             Min identity threshold for highlight position selection
304 |         max_thr : float, optional
305 |             Max identity threshold for highlight position selection
306 |         """
307 |         ident_list = self._get_consensus_identity_list()
308 |         highlight_positions: list[int] = []
309 |         for pos, ident in enumerate(ident_list):
310 |             if min_thr <= ident <= max_thr:
311 |                 highlight_positions.append(pos)
312 |         self._highlight_positions = highlight_positions
313 | 
314 |     def add_markers(
315 |         self,
316 |         positions: list[tuple[int, int] | int],
317 |         marker: str = "v",
318 |         color: str = "black",
319 |         size: float = 6,
320 |     ) -> None:
321 |         """Add markers on specified positions
322 | 
323 |         Parameters
324 |         ----------
325 |         positions : list[tuple[int, int] | int]
326 |             Marker positions. int and tuple range mixture positions can be specified.
327 |             (e.g. If `[1, 5, (10, 13), 18]` is set, markers are plotted on
328 |             `1, 5, 10, 11, 12, 13, 18` positions)
329 |         marker : str, optional
330 |             Marker type of matplotlib.
331 |             See <https://matplotlib.org/stable/api/markers_api.html> for details.
332 |         color : str, optional
333 |             Marker color
334 |         size : float, optional
335 |             Marker size
336 |         """
337 |         for pos in self._parse_positions(positions):
338 |             self._pos2marker_kws[pos] = dict(
339 |                 marker=marker,
340 |                 color=color,
341 |                 markersize=size,
342 |                 clip_on=False,
343 |             )
344 | 
345 |     def add_text_annotation(
346 |         self,
347 |         range: tuple[int, int],
348 |         text: str,
349 |         *,
350 |         text_color: str = "black",
351 |         text_size: float = 10,
352 |         range_color: str = "black",
353 |     ) -> None:
354 |         """Add text annotation in specified range
355 | 
356 |         Parameters
357 |         ----------
358 |         range : tuple[int, int]
359 |             Annotation start-end range tuple
360 |         text : str
361 |             Annotation text
362 |         text_color : str, optional
363 |             Text color
364 |         text_size : float, optional
365 |             Text size
366 |         range_color : str, optional
367 |             Annotation range line color
368 |         """
369 |         # Add annotation text
370 |         start, end = range[0] - 1, range[1]
371 |         x = (start + end) / 2
372 |         pos = int(x)
373 |         self._pos2text_kws[pos] = dict(
374 |             x=x,
375 |             y=self.msa_count + 0.75,
376 |             s=text,
377 |             color=text_color,
378 |             size=text_size,
379 |             ha="center",
380 |             va="bottom",
381 |         )
382 |         # Add annotation range line markers
383 |         marker_size = 10 * (self._x_unit_size / 0.14)
384 |         self.add_markers([range], marker="_", color=range_color, size=marker_size)
385 | 
386 |     def plotfig(self, dpi: int = 100) -> Figure:
387 |         """Plot figure
388 | 
389 |         Parameters
390 |         ----------
391 |         dpi : int, optional
392 |             Figure DPI
393 | 
394 |         Returns
395 |         -------
396 |         fig : Figure
397 |             Figure
398 |         """
399 |         # Setup plot figure configs
400 |         ax_type2y_size = {
401 |             AxesType.MSA: self.msa_count * self._y_unit_size,
402 |             AxesType.SPACE: self._y_unit_size * 1.5,
403 |             AxesType.CONSENSUS: self._y_unit_size * self._consensus_size,
404 |             AxesType.WRAP_SPACE: self._y_unit_size * self._wrap_space_size,
405 |         }
406 | 
407 |         plot_ax_types = []
408 |         for wrap_idx in range(self.wrap_num + 1):
409 |             plot_ax_types.append(AxesType.MSA)
410 |             if self._show_consensus:
411 |                 plot_ax_types.append(AxesType.SPACE)
412 |                 plot_ax_types.append(AxesType.CONSENSUS)
413 |             if wrap_idx != self.wrap_num:
414 |                 plot_ax_types.append(AxesType.WRAP_SPACE)
415 | 
416 |         y_size_list = [ax_type2y_size[t] for t in plot_ax_types]
417 |         figsize = (self._wrap_length * self._x_unit_size, sum(y_size_list))
418 |         fig: Figure = plt.figure(figsize=figsize, dpi=dpi)  # type: ignore
419 |         fig.tight_layout()
420 |         gs = GridSpec(nrows=len(plot_ax_types), ncols=1, height_ratios=y_size_list)
421 |         gs.update(left=0, right=1, bottom=0, top=1, hspace=0, wspace=0)
422 | 
423 |         # Plot figure
424 |         wrap_cnt = 0
425 |         for idx, plot_ax_type in enumerate(plot_ax_types):
426 |             ax: Axes = fig.add_subplot(gs[idx])
427 |             if not isinstance(ax, Axes):
428 |                 raise TypeError("Error: Not matplotlib Axes class instance.")
429 | 
430 |             start = self._start + self._wrap_length * wrap_cnt
431 |             end = self._start + self._wrap_length * (wrap_cnt + 1)
432 |             end = self._end if end > self._end else end
433 | 
434 |             if plot_ax_type == AxesType.MSA:
435 |                 self._plot_msa(ax, start, end)
436 |             elif plot_ax_type == AxesType.CONSENSUS:
437 |                 self._plot_consensus(ax, start, end)
438 |             elif plot_ax_type == AxesType.SPACE:
439 |                 ax.axis("off")
440 |             elif plot_ax_type == AxesType.WRAP_SPACE:
441 |                 ax.axis("off")
442 |                 wrap_cnt += 1
443 |             else:
444 |                 raise NotImplementedError(f"{plot_ax_type=} is invalid.")
445 | 
446 |         return fig
447 | 
448 |     def savefig(
449 |         self,
450 |         savefile: str | Path,
451 |         dpi: int = 100,
452 |         pad_inches: float = 0.5,
453 |     ) -> None:
454 |         """Save figure to file
455 | 
456 |         Parameters
457 |         ----------
458 |         savefile : str | Path
459 |             Save file
460 |         dpi : int, optional
461 |             DPI
462 |         pad_inches : float, optional
463 |             Padding inches
464 |         """
465 |         fig = self.plotfig(dpi=dpi)
466 |         fig.savefig(
467 |             fname=str(savefile),
468 |             dpi=dpi,
469 |             pad_inches=pad_inches,
470 |         )
471 |         # Clear & close figure to suppress memory leak
472 |         fig.clear()
473 |         plt.close(fig)
474 | 
475 |     ############################################################
476 |     # Private Method
477 |     ############################################################
478 | 
479 |     def _plot_msa(
480 |         self, ax: Axes, start: int | None = None, end: int | None = None
481 |     ) -> None:
482 |         """Plot MSA
483 | 
484 |         Parameters
485 |         ----------
486 |         ax : Axes
487 |             Matplotlib axes to be plotted
488 |         start : int | None, optional
489 |             Start position. If None, `0` is set.
490 |         end : int | None, optional
491 |             End position. If None, `alignment_length` is set.
492 |         """
493 |         # Set xlim, ylim
494 |         start = 0 if start is None else start
495 |         end = self.alignment_length if end is None else end
496 |         ax.set_xlim(start, start + self._wrap_length)
497 |         ax.set_ylim(0, self.msa_count)
498 | 
499 |         # Set spines & tick params (Only show bottom ticklables)
500 |         for pos in ("left", "right", "top", "bottom"):
501 |             ax.spines[pos].set_visible(False)
502 |         ax.tick_params(left=False, labelleft=False)
503 | 
504 |         # Plot alignment position every 10 chars on xticks
505 |         ticks_interval = self._ticks_interval
506 |         if ticks_interval is None:
507 |             ax.tick_params(bottom=False, labelbottom=False)
508 |         else:
509 |             tick_ranges = range(start + 1, end + 1)
510 |             xticklabels = list(filter(lambda n: n % ticks_interval == 0, tick_ranges))
511 |             xticks = [n - 0.5 for n in xticklabels]
512 |             ax.set_xticks(xticks, xticklabels, size=8)  # type: ignore
513 | 
514 |         plot_patches = []
515 |         for cnt in range(self.msa_count):
516 |             msa_seq = self.seq_list[cnt]
517 |             y_lower = self.msa_count - (cnt + 1)
518 |             y_center = y_lower + 0.5
519 |             # Plot label text
520 |             if self._show_label:
521 |                 if self._label_type == "id":
522 |                     label = self.id_list[cnt]
523 |                 elif self._label_type == "description":
524 |                     label = self.desc_list[cnt]
525 |                 else:
526 |                     err_msg = f"{self._label_type=} is invalid (`id`|`description`)"
527 |                     raise ValueError(err_msg)
528 |                 ax.text(start - 1, y_center, label, ha="right", va="center", size=10)
529 |             # Plot count text
530 |             if self._show_count:
531 |                 scale = end - self._start - msa_seq[self._start : end].count("-")
532 |                 ax.text(end + 1, y_center, str(scale), ha="left", va="center", size=10)
533 |             for x_left in range(start, end):
534 |                 # Add colored rectangle patch
535 |                 seq_char = msa_seq[x_left]
536 |                 rect_prop: dict = dict(
537 |                     xy=(x_left, y_lower), width=1, height=1, color="none", lw=0
538 |                 )
539 |                 highlight_positions = self._highlight_positions
540 |                 if highlight_positions is None or x_left in highlight_positions:
541 |                     color = self.color_scheme.get(seq_char, "#FFFFFF")
542 |                     if self._color_scheme_name == "Identity":
543 |                         color = self._get_identity_color(seq_char, x_left)
544 |                     if self._custom_color_func is not None:
545 |                         custom_color = self._custom_color_func(
546 |                             cnt, x_left, seq_char, self.msa
547 |                         )
548 |                         color = color if custom_color is None else custom_color
549 |                     rect_prop.update(**dict(color=color, lw=0, fill=True))
550 |                 if self._show_grid:
551 |                     rect_prop.update(**dict(ec=self._grid_color, lw=0.5))
552 |                 plot_patches.append(Rectangle(**rect_prop))
553 | 
554 |                 # Plot seq char text
555 |                 x_center = x_left + 0.5
556 |                 if self._show_seq_char:
557 |                     ax.text(
558 |                         x_center, y_center, seq_char, ha="center", va="center", size=10
559 |                     )
560 |                 # Plot marker
561 |                 if cnt == 0 and x_left in self._pos2marker_kws:
562 |                     marker_kws = self._pos2marker_kws[x_left]
563 |                     ax.plot(x_center, y_center + 1, **marker_kws)
564 |                 # Plot text annotation
565 |                 if cnt == 0 and x_left in self._pos2text_kws:
566 |                     text_kws = self._pos2text_kws[x_left]
567 |                     ax.text(**text_kws)
568 | 
569 |         # Plot colored rectangle patch collection (Use collection for speedup)
570 |         collection = PatchCollection(plot_patches, match_original=True, clip_on=False)
571 |         ax.add_collection(collection)  # type: ignore
572 | 
573 |     def _plot_consensus(
574 |         self, ax: Axes, start: int | None = None, end: int | None = None
575 |     ) -> None:
576 |         """Plot consensus seq char & identity bar
577 | 
578 |         Parameters
579 |         ----------
580 |         ax : Axes
581 |             Matplotlib axes to be plotted
582 |         start : int | None, optional
583 |             Start position. If None, `0` is set.
584 |         end : int | None, optional
585 |             End position. If None, `alignment_length` is set.
586 |         """
587 |         # Set xlim, ylim
588 |         start = 0 if start is None else start
589 |         end = self.alignment_length if end is None else end
590 |         ax.set_xlim(start, start + self._wrap_length)
591 |         ax.set_ylim(0, 100)  # 0 - 100 [%]
592 | 
593 |         # Plot label text
594 |         if self._show_label and self._consensus_size != 0:
595 |             ax.text(start - 1, 40, "Consensus", ha="right", va="center", size=10)
596 | 
597 |         # Set spines & tick params
598 |         for pos in ("left", "right", "top", "bottom"):
599 |             ax.spines[pos].set_visible(False)
600 |         ax.tick_params(bottom=False, left=False, labelleft=False, pad=0)
601 | 
602 |         # Plot consensus seq chars on xticks
603 |         xticks = list(map(lambda n: n + 0.5, range(start, end)))
604 |         if self._show_consensus_char:
605 |             xticklabels = list(self.consensus_seq[start:end])
606 |             ax.set_xticks(xticks, xticklabels, size=10)  # type: ignore
607 |         else:
608 |             ax.axis("off")
609 | 
610 |         # Plot consensus identity bar
611 |         ident_list = self._get_consensus_identity_list(start, end)
612 |         color_list = self._get_interpolate_colors(self._consensus_color, ident_list)
613 |         ax.bar(xticks, ident_list, width=1, color=color_list, ec="white", lw=0.5)
614 | 
615 |     def _get_consensus_seq(self, msa: MSA) -> str:
616 |         """Get consensus sequence
617 | 
618 |         Parameters
619 |         ----------
620 |         msa : MSA
621 |             Multiple sequence alignment
622 | 
623 |         Returns
624 |         -------
625 |         consensus_seq : str
626 |             Consensus suquence
627 |         """
628 |         consensus_seq = ""
629 |         ambiguous_char = "X"
630 |         aln_len = msa.get_alignment_length()
631 | 
632 |         for idx in range(aln_len):
633 |             chars = ""
634 |             for record in self._msa:
635 |                 char = str(record.seq)[idx]
636 |                 if char != "-" and char != ".":
637 |                     chars += str(record.seq)[idx]
638 |             if len(chars) == 0:
639 |                 consensus_seq += ambiguous_char
640 |                 continue
641 | 
642 |             char2count = Counter(chars)
643 |             most_freq_chars = []
644 |             most_freq_count = char2count.most_common()[0][1]
645 |             for char, count in char2count.most_common():
646 |                 if count == most_freq_count:
647 |                     most_freq_chars.append(char)
648 | 
649 |             if len(most_freq_chars) == 1:
650 |                 consensus_seq += most_freq_chars[0]
651 |             else:
652 |                 consensus_seq += ambiguous_char
653 | 
654 |         return consensus_seq
655 | 
656 |     def _get_consensus_identity_list(
657 |         self, start: int | None = None, end: int | None = None
658 |     ) -> list[float]:
659 |         """Get consensus identity list
660 | 
661 |         Parameters
662 |         ----------
663 |         start : int | None, optional
664 |             Start position. If None, `0` is set.
665 |         end : int | None, optional
666 |             End position. If None, `alignment_length` is set.
667 | 
668 |         Returns
669 |         -------
670 |         consensus_identity_list : list[float]
671 |             Consensus identity list (0 - 100 [%])
672 |         """
673 |         start = 0 if start is None else start
674 |         end = self.alignment_length if end is None else end
675 |         consensus_identity_list = []
676 |         for idx, _ in enumerate(self.consensus_seq[start:end], start):
677 |             column_chars = str(self.msa[:, idx])
678 |             counter = Counter(filter(lambda c: c not in ("-", "*"), column_chars))
679 |             count = counter.most_common()[0][1] if len(counter) != 0 else 0
680 |             consensus_identity = (count / self.msa_count) * 100
681 |             consensus_identity_list.append(consensus_identity)
682 |         return consensus_identity_list
683 | 
684 |     def _get_interpolate_colors(
685 |         self,
686 |         color: str,
687 |         values: list[float],
688 |         vmin: float = 0,
689 |         vmax: float = 100,
690 |     ) -> list[str]:
691 |         """Interpolate colors by size of values
692 | 
693 |         Parameters
694 |         ----------
695 |         color : str
696 |             Base color for interpolation
697 |         values : list[float]
698 |             Values for interpolation
699 |         vmin : float, optional
700 |             Min value
701 |         vmax : float, optional
702 |             Max value
703 | 
704 |         Returns
705 |         -------
706 |         interpolated_colors : list[str]
707 |             Interpolated colors based on values
708 |         """
709 |         cmap = colors.LinearSegmentedColormap.from_list("m", ["white", color])
710 |         norm = colors.Normalize(vmin=vmin, vmax=vmax)
711 |         return [colors.to_hex(cmap(norm(v))) for v in values]  # type: ignore
712 | 
713 |     def _get_identity_color(self, seq_char: str, pos: int) -> str:
714 |         """Get identity color for `Identity` color scheme
715 | 
716 |         Parameters
717 |         ----------
718 |         seq_char : str
719 |             Seq character
720 |         pos : int
721 |             Seq character position
722 | 
723 |         Returns
724 |         -------
725 |         identity_color : str
726 |             Identity color
727 |         """
728 |         # Exclude characters color
729 |         exclude_chars = ("-", "*", "X")
730 |         if seq_char in exclude_chars:
731 |             return "#FFFFFF"
732 |         # Get most common characters in target MSA position
733 |         column_chars = str(self.msa[:, pos])
734 |         counter = Counter(filter(lambda c: c not in exclude_chars, column_chars))
735 |         most_common_count = counter.most_common()[0][1]
736 |         most_common_chars = []
737 |         for char, count in counter.most_common():
738 |             if count == most_common_count:
739 |                 most_common_chars.append(char)
740 |         # Calculate identity & color if target seq char is most common
741 |         identity = (most_common_count / len(column_chars)) * 100
742 |         if seq_char in most_common_chars and identity >= self._identity_color_min_thr:
743 |             color, color_thr = self._identity_color, self._identity_color_min_thr
744 |             return self._get_interpolate_colors(color, [identity], vmin=color_thr)[0]
745 |         else:
746 |             return "#FFFFFF"
747 | 
748 |     def _is_aa_msa(self) -> bool:
749 |         """Check MSA is `aa` or `nt`
750 | 
751 |         If the ratio of `ATGCUN` char is less than 90%, return True.
752 | 
753 |         Returns
754 |         -------
755 |         check_result : bool
756 |             Check result
757 |         """
758 |         nt_count, all_count = 0, 0
759 |         for seq in self.seq_list:
760 |             for seq_char in seq:
761 |                 if seq_char == "-":
762 |                     continue
763 |                 all_count += 1
764 |                 if seq_char in "ATGCUN":
765 |                     nt_count += 1
766 |         return nt_count / all_count < 0.9
767 | 
768 |     def _parse_positions(self, positions: list[tuple[int, int] | int]) -> list[int]:
769 |         """Parse int and tuple range mixture positions
770 | 
771 |         e.g. `[1, 5, (10, 13), 18]` means `1, 5, 10, 11, 12, 13, 18` positions
772 | 
773 |         Parameters
774 |         ----------
775 |         positions : list[tuple[int, int] | int]
776 |             int and tuple range mixture positions (one-based coordinates)
777 | 
778 |         Returns
779 |         -------
780 |         result_positions : list[int]
781 |             Parse result int positions (zero-based coordinates)
782 |         """
783 |         result_positions: list[int] = []
784 |         for pos in positions:
785 |             if isinstance(pos, (tuple, list)):
786 |                 result_positions.extend(list(range(pos[0] - 1, pos[1])))
787 |             elif isinstance(pos, int):
788 |                 result_positions.append(pos - 1)
789 |             else:
790 |                 raise ValueError(f"{positions=} is invalid.")
791 |         return sorted(set(result_positions))
792 | 
793 |     def _sorted_msa_by_njtree(self, msa: MSA) -> MSA:
794 |         """Sort MSA order by NJ tree constructed from MSA distance matrix
795 | 
796 |         Parameters
797 |         ----------
798 |         msa : MultipleSeqAlignment
799 |             MSA
800 | 
801 |         Returns
802 |         -------
803 |         sorted_msa : MultipleSeqAlignment
804 |             Sorted MSA
805 |         """
806 |         # Set unique id for MSA records to avoid duplicate name error
807 |         uid2id = {}
808 |         for idx, rec in enumerate(msa):
809 |             uid = f"seq{idx}"
810 |             uid2id[uid] = rec.id
811 |             rec.id = uid
812 |         uid2seq = {rec.id: rec.seq for rec in msa}
813 |         uid2desc = {rec.id: rec.description for rec in msa}
814 |         # Sort MSA order by NJ tree
815 |         njtree = self._construct_njtree(msa)
816 |         sorted_msa = MSA([])
817 |         for leaf in njtree.get_terminals():
818 |             uid = str(leaf.name)
819 |             id, seq, desc = uid2id[uid], uid2seq[uid], uid2desc[uid]
820 |             sorted_msa.append(SeqRecord(seq, id=id, description=desc))
821 |         return sorted_msa
822 | 
823 |     def _construct_njtree(self, msa: MSA) -> Tree:
824 |         """Construct NJ tree from MSA distance matrix
825 | 
826 |         Parameters
827 |         ----------
828 |         msa : MultipleSeqAlignment
829 |             MSA
830 | 
831 |         Returns
832 |         -------
833 |         njtree : Tree
834 |             NJ tree
835 |         """
836 |         # Calculate MSA distance matrix & construct NJ tree
837 |         model = "blosum62" if self._is_aa_msa() else "identity"
838 |         distance_matrix = DistanceCalculator(model).get_distance(msa)
839 |         njtree = DistanceTreeConstructor().nj(distance_matrix)
840 |         njtree.root_at_midpoint()
841 |         return njtree
842 | 


--------------------------------------------------------------------------------