├── tests
    ├── __init__.py
    ├── utils
    │   ├── __init__.py
    │   ├── test_helper.py
    │   └── test_dataset.py
    ├── parser
    │   ├── __init__.py
    │   ├── test_gff.py
    │   ├── test_genbank.py
    │   ├── test_table.py
    │   └── test_matrix.py
    ├── testdata
    │   ├── prokaryote
    │   │   ├── mycoplasma_alvi.gbk.gz
    │   │   ├── mycoplasma_alvi.gff.gz
    │   │   └── mycoplasma_alvi_nocomment.gff.gz
    │   └── eukaryote
    │   │   └── hg38
    │   │       └── hg38_chr.bed
    ├── test_track.py
    ├── conftest.py
    ├── test_sector.py
    └── test_circos.py
├── docs
    ├── index.md
    ├── api-docs
    │   ├── track.md
    │   ├── circos.md
    │   ├── sector.md
    │   ├── treeviz.md
    │   └── utils.md
    └── images
    │   ├── example01.png
    │   ├── example02.png
    │   ├── example03.png
    │   ├── example04.png
    │   ├── example05.png
    │   ├── pyCirclize_gallery.png
    │   └── pyCirclize_tooltip.gif
├── .gitattributes
├── src
    └── pycirclize
    │   ├── __init__.py
    │   ├── utils
    │       ├── example_data
    │       │   ├── images
    │       │   │   └── python_logo.png
    │       │   └── trees
    │       │   │   ├── small_example.nwk
    │       │   │   ├── alphabet.nwk
    │       │   │   ├── medium_example.nwk
    │       │   │   └── large_example.nwk
    │       ├── __init__.py
    │       ├── helper.py
    │       ├── plot.py
    │       └── dataset.py
    │   ├── typing.py
    │   ├── parser
    │       ├── __init__.py
    │       ├── bed.py
    │       ├── table.py
    │       ├── matrix.py
    │       ├── genbank.py
    │       └── gff.py
    │   ├── config.py
    │   ├── annotation.py
    │   ├── tooltip.py
    │   ├── patches.py
    │   └── sector.py
├── CITATION.cff
├── .pre-commit-config.yaml
├── .github
    └── workflows
    │   ├── publish_mkdocs.yml
    │   ├── publish_to_pypi.yml
    │   └── ci.yml
├── LICENSE
├── mkdocs.yml
├── .gitignore
├── pyproject.toml
└── README.md


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/parser/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | --8<-- "README.md"
2 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.ipynb linguist-documentation
2 | 


--------------------------------------------------------------------------------
/docs/api-docs/track.md:
--------------------------------------------------------------------------------
1 | # Track Class
2 | 
3 | ::: pycirclize.track.Track
4 | 


--------------------------------------------------------------------------------
/docs/api-docs/circos.md:
--------------------------------------------------------------------------------
1 | # Circos Class
2 | 
3 | ::: pycirclize.circos.Circos
4 | 


--------------------------------------------------------------------------------
/docs/api-docs/sector.md:
--------------------------------------------------------------------------------
1 | # Sector Class
2 | 
3 | ::: pycirclize.sector.Sector
4 | 


--------------------------------------------------------------------------------
/docs/api-docs/treeviz.md:
--------------------------------------------------------------------------------
1 | # TreeViz Class
2 | 
3 | ::: pycirclize.tree.TreeViz
4 | 


--------------------------------------------------------------------------------
/docs/images/example01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moshi4/pyCirclize/HEAD/docs/images/example01.png


--------------------------------------------------------------------------------
/docs/images/example02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moshi4/pyCirclize/HEAD/docs/images/example02.png


--------------------------------------------------------------------------------
/docs/images/example03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moshi4/pyCirclize/HEAD/docs/images/example03.png


--------------------------------------------------------------------------------
/docs/images/example04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moshi4/pyCirclize/HEAD/docs/images/example04.png


--------------------------------------------------------------------------------
/docs/images/example05.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moshi4/pyCirclize/HEAD/docs/images/example05.png


--------------------------------------------------------------------------------
/docs/images/pyCirclize_gallery.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moshi4/pyCirclize/HEAD/docs/images/pyCirclize_gallery.png


--------------------------------------------------------------------------------
/docs/images/pyCirclize_tooltip.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moshi4/pyCirclize/HEAD/docs/images/pyCirclize_tooltip.gif


--------------------------------------------------------------------------------
/src/pycirclize/__init__.py:
--------------------------------------------------------------------------------
1 | from pycirclize.circos import Circos
2 | 
3 | __version__ = "1.10.1"
4 | 
5 | __all__ = [
6 |     "Circos",
7 | ]
8 | 


--------------------------------------------------------------------------------
/tests/testdata/prokaryote/mycoplasma_alvi.gbk.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moshi4/pyCirclize/HEAD/tests/testdata/prokaryote/mycoplasma_alvi.gbk.gz


--------------------------------------------------------------------------------
/tests/testdata/prokaryote/mycoplasma_alvi.gff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moshi4/pyCirclize/HEAD/tests/testdata/prokaryote/mycoplasma_alvi.gff.gz


--------------------------------------------------------------------------------
/src/pycirclize/utils/example_data/images/python_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moshi4/pyCirclize/HEAD/src/pycirclize/utils/example_data/images/python_logo.png


--------------------------------------------------------------------------------
/tests/testdata/prokaryote/mycoplasma_alvi_nocomment.gff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moshi4/pyCirclize/HEAD/tests/testdata/prokaryote/mycoplasma_alvi_nocomment.gff.gz


--------------------------------------------------------------------------------
/src/pycirclize/typing.py:
--------------------------------------------------------------------------------
1 | from collections.abc import Sequence
2 | 
3 | from numpy.typing import NDArray
4 | 
5 | Numeric = int | float
6 | NumericArrayLike = Sequence[Numeric] | NDArray
7 | 


--------------------------------------------------------------------------------
/src/pycirclize/utils/example_data/trees/small_example.nwk:
--------------------------------------------------------------------------------
1 | ((Hylobates_moloch:0.333,Nomascus_leucogenys:0.3123)1.00:0.6897,(Pongo_abelii:0.8478,(Gorilla_gorilla:0.4021,(Homo_sapiens:0.3164,(Pan_troglodytes:0.1144,Pan_paniscus:0.1106)0.97:0.1865)0.99:0.1052)1.00:0.3929)1.00:0.114)1.00;
2 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
1 | cff-version: 1.2.0
2 | message: If you use this software, please cite it as below.
3 | authors:
4 |   - family-names: Shimoyama
5 |     given-names: Yuki
6 | title: "pyCirclize: Circular visualization in Python"
7 | date-released: 2022-12-20
8 | url: https://github.com/moshi4/pyCirclize
9 | 


--------------------------------------------------------------------------------
/src/pycirclize/parser/__init__.py:
--------------------------------------------------------------------------------
 1 | from pycirclize.parser.bed import Bed
 2 | from pycirclize.parser.genbank import Genbank
 3 | from pycirclize.parser.gff import Gff
 4 | from pycirclize.parser.matrix import Matrix
 5 | from pycirclize.parser.table import RadarTable, StackedBarTable
 6 | 
 7 | __all__ = [
 8 |     "Bed",
 9 |     "Genbank",
10 |     "Gff",
11 |     "Matrix",
12 |     "RadarTable",
13 |     "StackedBarTable",
14 | ]
15 | 


--------------------------------------------------------------------------------
/docs/api-docs/utils.md:
--------------------------------------------------------------------------------
 1 | # Utility Class
 2 | 
 3 | ::: pycirclize.utils.dataset.load_prokaryote_example_file
 4 | ::: pycirclize.utils.dataset.load_eukaryote_example_dataset
 5 | ::: pycirclize.utils.dataset.load_example_tree_file
 6 | ::: pycirclize.utils.dataset.load_example_image_file
 7 | ::: pycirclize.utils.dataset.fetch_genbank_by_accid
 8 | 
 9 | ::: pycirclize.utils.helper.calc_group_spaces
10 | ::: pycirclize.utils.helper.ColorCycler
11 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # See https://pre-commit.com for more information
 2 | # See https://pre-commit.com/hooks.html for more hooks
 3 | repos:
 4 |   - repo: https://github.com/astral-sh/ruff-pre-commit
 5 |     rev: v0.12.7
 6 |     hooks:
 7 |       - id: ruff
 8 |         name: ruff lint check
 9 |         types_or: [python, pyi]
10 |         args: [--fix]
11 |       - id: ruff-format
12 |         name: ruff format check
13 |         types_or: [python, pyi]
14 | 


--------------------------------------------------------------------------------
/.github/workflows/publish_mkdocs.yml:
--------------------------------------------------------------------------------
 1 | name: Publish MkDocs
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [released]
 6 |   workflow_dispatch:
 7 | 
 8 | jobs:
 9 |   publish_mkdocs:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - name: Checkout
13 |         uses: actions/checkout@v4
14 | 
15 |       - name: Install uv
16 |         uses: astral-sh/setup-uv@v6
17 | 
18 |       - name: Install Python & MkDocs & Plugins
19 |         run: uv sync
20 | 
21 |       - name: Publish document
22 |         run: uv run mkdocs gh-deploy --force
23 | 


--------------------------------------------------------------------------------
/src/pycirclize/utils/example_data/trees/alphabet.nwk:
--------------------------------------------------------------------------------
1 | (((A:1.90623,(B:0.77315,(C:0.78332,(D:2.24573,(E:1.25976,F:1.85780)95:0.00000)99:0.15046)99:1.04012)100:0.90160)99:0.72056,((((G:0.59616,H:1.70149)91:1.58325,I:1.00419)98:1.19115,(J:0.91877,K:1.31367)91:0.88856)99:1.55681,L:0.88143)99:0.74292)95:1.02610,((M:1.46117,((N:0.12272,(O:1.12953,P:0.58300)99:0.73047)91:0.70152,(Q:0.71106,((((R:0.53759,S:0.71617)90:1.49010,T:1.40058)100:1.09120,(U:0.80607,V:1.06576)99:0.73818)100:0.94044,W:1.90216)91:1.65416)93:1.75184)90:0.90540)91:1.56327,(X:0.59332,(Y:1.23427,Z:1.60350)93:0.98802)90:0.87582)97:0.47830)100:0.00000;
2 | 


--------------------------------------------------------------------------------
/.github/workflows/publish_to_pypi.yml:
--------------------------------------------------------------------------------
 1 | name: Publish to PyPI
 2 | on:
 3 |   release:
 4 |     types: [released]
 5 |   workflow_dispatch:
 6 | 
 7 | jobs:
 8 |   publish_to_pypi:
 9 |     name: Publish to PyPI
10 |     runs-on: ubuntu-latest
11 |     env:
12 |       PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }}
13 |       PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
14 |     steps:
15 |       - name: Checkout
16 |         uses: actions/checkout@v4
17 | 
18 |       - name: Install uv
19 |         uses: astral-sh/setup-uv@v6
20 | 
21 |       - name: Build
22 |         run: uv build
23 | 
24 |       - name: Publish
25 |         run: uv publish --token $PYPI_PASSWORD
26 | 


--------------------------------------------------------------------------------
/src/pycirclize/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from pycirclize.utils import plot
 2 | from pycirclize.utils.dataset import (
 3 |     fetch_genbank_by_accid,
 4 |     load_eukaryote_example_dataset,
 5 |     load_example_image_file,
 6 |     load_example_tree_file,
 7 |     load_prokaryote_example_file,
 8 | )
 9 | from pycirclize.utils.helper import (
10 |     ColorCycler,
11 |     calc_group_spaces,
12 |     is_pseudo_feature,
13 |     load_image,
14 | )
15 | 
16 | __all__ = [
17 |     "ColorCycler",
18 |     "calc_group_spaces",
19 |     "fetch_genbank_by_accid",
20 |     "is_pseudo_feature",
21 |     "load_eukaryote_example_dataset",
22 |     "load_example_image_file",
23 |     "load_example_tree_file",
24 |     "load_image",
25 |     "load_prokaryote_example_file",
26 |     "plot",
27 | ]
28 | 


--------------------------------------------------------------------------------
/src/pycirclize/utils/example_data/trees/medium_example.nwk:
--------------------------------------------------------------------------------
1 | (((Hylobates_moloch:0.00333,Nomascus_leucogenys:0.003123):0.006897,(Pongo_abelii:0.008478,(Gorilla_gorilla:0.004021,(Homo_sapiens:0.003164,(Pan_troglodytes:0.001144,Pan_paniscus:0.001106):0.001865):0.001052):0.003929):0.00114):0.004949,(((Colobus_angolensis:0.005275,Piliocolobus_tephrosceles:0.004777):0.001021,(Trachypithecus_francoisi:0.004061,(Rhinopithecus_roxellana:0.000994,Rhinopithecus_bieti:0.002571):0.00248):0.001915):0.002828,(Chlorocebus_sabaeus:0.005852,((Macaca_nemestrina:0.002166,(Macaca_thibetana:0.001527,(Macaca_fascicularis:0.001199,Macaca_mulatta:0.001166):0.000396):3.7e-05):0.001884,((Mandrillus_leucophaeus:0.00342,Cercocebus_atys:0.003303):0.000458,(Papio_anubis:0.002032,Theropithecus_gelada:0.001939):0.000891):0.00056):0.00146):0.00244):0.009713);
2 | 


--------------------------------------------------------------------------------
/tests/testdata/eukaryote/hg38/hg38_chr.bed:
--------------------------------------------------------------------------------
 1 | #chrom	chromStart	chromEnd	name
 2 | chr1	0	248956422	NC_000001.11
 3 | chr2	0	242193529	NC_000002.12
 4 | chr3	0	198295559	NC_000003.12
 5 | chr4	0	190214555	NC_000004.12
 6 | chr5	0	181538259	NC_000005.10
 7 | chr6	0	170805979	NC_000006.12
 8 | chr7	0	159345973	NC_000007.14
 9 | chr8	0	145138636	NC_000008.11
10 | chr9	0	138394717	NC_000009.12
11 | chr10	0	133797422	NC_000010.11
12 | chr11	0	135086622	NC_000011.10
13 | chr12	0	133275309	NC_000012.12
14 | chr13	0	114364328	NC_000013.11
15 | chr14	0	107043718	NC_000014.9
16 | chr15	0	101991189	NC_000015.10
17 | chr16	0	90338345	NC_000016.10
18 | chr17	0	83257441	NC_000017.11
19 | chr18	0	80373285	NC_000018.10
20 | chr19	0	58617616	NC_000019.10
21 | chr20	0	64444167	NC_000020.11
22 | chr21	0	46709983	NC_000021.9
23 | chr22	0	50818468	NC_000022.11
24 | chrX	0	156040895	NC_000023.11
25 | chrY	0	57227415	NC_000024.10
26 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 moshi
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/tests/test_track.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import math
 4 | 
 5 | import pytest
 6 | 
 7 | from pycirclize.sector import Sector
 8 | from pycirclize.track import Track
 9 | 
10 | 
11 | @pytest.fixture
12 | def track() -> Track:
13 |     """Track fixture"""
14 |     sector = Sector(
15 |         name="A",
16 |         size=1000,
17 |         rad_lim=(0, math.pi),
18 |     )
19 |     track = Track(
20 |         name="Track01",
21 |         r_lim=(90, 100),
22 |         r_pad_ratio=0.1,
23 |         parent_sector=sector,
24 |     )
25 |     return track
26 | 
27 | 
28 | def test_track_property(track: Track) -> None:
29 |     """Test track property"""
30 |     assert track.name == "Track01"
31 |     assert track.size == 1000
32 |     assert track.start == 0
33 |     assert track.end == 1000
34 |     assert track.center == 500
35 |     assert track.r_size == 10
36 |     assert track.r_lim == (90, 100)
37 |     assert track.r_center == 95
38 |     assert track.r_plot_size == 9
39 |     assert track.r_plot_lim == (90.5, 99.5)
40 |     assert track.rad_size == math.pi
41 |     assert track.rad_lim == (0, math.pi)
42 |     assert track.deg_size == 180
43 |     assert track.deg_lim == (0, 180)
44 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | on:
 3 |   push:
 4 |     branches: [main, develop]
 5 |     paths: ["src/**", "tests/**", ".github/workflows/ci.yml"]
 6 |   pull_request:
 7 |     branches: [main, develop]
 8 |     paths: ["src/**", "tests/**", ".github/workflows/ci.yml"]
 9 |   workflow_dispatch:
10 | 
11 | jobs:
12 |   CI:
13 |     runs-on: ${{ matrix.os }}
14 |     defaults:
15 |       run:
16 |         shell: bash
17 |     strategy:
18 |       matrix:
19 |         os: [ubuntu-latest, macos-latest]
20 |         python-version: ["3.10", "3.11", "3.12", "3.13"]
21 |     steps:
22 |       - name: Checkout
23 |         uses: actions/checkout@v4
24 | 
25 |       - name: Install uv
26 |         uses: astral-sh/setup-uv@v6
27 | 
28 |       - name: Install Python ${{ matrix.python-version }} & dependencies
29 |         run: uv sync --all-extras --all-groups --upgrade --python ${{ matrix.python-version }}
30 | 
31 |       - name: Run ruff lint check
32 |         run: uv run ruff check src tests
33 | 
34 |       - name: Run ruff format check
35 |         run: uv run ruff format src tests
36 | 
37 |       - name: Run pytest
38 |         run: uv run pytest --junitxml=pytest.xml --cov-report=term-missing:skip-covered --cov=src | tee pytest-coverage.txt
39 | 
40 |       - name: Pytest Coverage Comment
41 |         uses: MishaKav/pytest-coverage-comment@main
42 |         with:
43 |           pytest-coverage-path: pytest-coverage.txt
44 |           junitxml-path: pytest.xml
45 |         if: ${{ matrix.os=='ubuntu-latest' && matrix.python-version=='3.12' }}
46 | 


--------------------------------------------------------------------------------
/src/pycirclize/parser/bed.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import csv
 4 | from dataclasses import dataclass
 5 | from typing import TYPE_CHECKING
 6 | 
 7 | if TYPE_CHECKING:
 8 |     from pathlib import Path
 9 | 
10 | 
11 | class Bed:
12 |     """BED Parser Class"""
13 | 
14 |     def __init__(self, bed_file: str | Path) -> None:
15 |         """
16 |         Parameters
17 |         ----------
18 |         bed_file : str | Path
19 |             BED format file
20 |         """
21 |         self._bed_file = bed_file
22 |         self._records = BedRecord.parse(bed_file)
23 | 
24 |     @property
25 |     def records(self) -> list[BedRecord]:
26 |         """BED records"""
27 |         return self._records
28 | 
29 | 
30 | @dataclass
31 | class BedRecord:
32 |     chr: str
33 |     start: int
34 |     end: int
35 |     name: str | None = None
36 |     score: str | None = None
37 | 
38 |     @property
39 |     def size(self) -> int:
40 |         """Size"""
41 |         return self.end - self.start
42 | 
43 |     @staticmethod
44 |     def parse(bed_file: str | Path) -> list[BedRecord]:
45 |         """Parse BED format file
46 | 
47 |         Parameters
48 |         ----------
49 |         bed_file : str | Path
50 |             BED format file
51 | 
52 |         Returns
53 |         -------
54 |         bed_records : list[BedRecord]
55 |             BED records
56 |         """
57 |         bed_records = []
58 |         with open(bed_file, encoding="utf-8") as f:
59 |             reader = csv.reader(f, delimiter="\t")
60 |             for row in reader:
61 |                 if row[0].startswith("#") or len(row) < 3:
62 |                     continue
63 |                 try:
64 |                     chr, start, end = row[0], int(row[1]), int(row[2])
65 |                 except Exception:
66 |                     continue
67 |                 name, score = None, None
68 |                 if len(row) >= 5:
69 |                     name, score = row[3], row[4]
70 |                 rec = BedRecord(chr, start, end, name, score)
71 |                 bed_records.append(rec)
72 |         return bed_records
73 | 


--------------------------------------------------------------------------------
/tests/parser/test_gff.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pytest
 4 | 
 5 | from pycirclize.parser import Gff
 6 | 
 7 | 
 8 | def test_parse_complete_genome(prokaryote_testdata_dir: Path) -> None:
 9 |     """Test parse complete genome"""
10 |     gff_file = prokaryote_testdata_dir / "enterobacteria_phage.gff"
11 |     gff = Gff(gff_file)
12 |     seqid = "NC_000902.1"
13 |     assert gff.target_seqid == seqid
14 |     max_genome_size = 60942
15 |     assert gff.range_size == max_genome_size
16 |     assert gff.seq_region == (0, max_genome_size)
17 |     assert gff.seqid_list == [seqid]
18 |     assert gff.get_seqid2size() == {seqid: max_genome_size}
19 | 
20 | 
21 | @pytest.mark.parametrize(
22 |     "gff_filename",
23 |     [
24 |         ("mycoplasma_alvi.gff.gz"),
25 |         ("mycoplasma_alvi_nocomment.gff.gz"),
26 |     ],
27 | )
28 | def test_parse_contig_genomes(prokaryote_testdata_dir: Path, gff_filename: str) -> None:
29 |     """Test parse contig genomes"""
30 |     gff_file = prokaryote_testdata_dir / gff_filename
31 |     gff = Gff(gff_file)
32 |     seqid2size = {
33 |         "NZ_JNJU01000001.1": 264665,
34 |         "NZ_JNJU01000002.1": 190782,
35 |         "NZ_KL370824.1": 158240,
36 |         "NZ_KL370825.1": 155515,
37 |         "NZ_JNJU01000007.1": 67647,
38 |         "NZ_JNJU01000008.1": 2683,
39 |         "NZ_JNJU01000009.1": 1108,
40 |     }
41 |     seqid_list = list(seqid2size.keys())
42 |     size_list = list(seqid2size.values())
43 | 
44 |     assert gff.target_seqid == next(iter(seqid2size.keys()))
45 |     assert gff.range_size == size_list[0]
46 |     assert gff.seq_region == (0, size_list[0])
47 |     assert gff.seqid_list == seqid_list
48 |     assert gff.get_seqid2size() == seqid2size
49 | 
50 |     seqid2cds_features = gff.get_seqid2features()
51 |     first_contig_cds_features = next(iter(seqid2cds_features.values()))
52 |     assert len(first_contig_cds_features) == 204
53 | 
54 |     seqid2trna_features = gff.get_seqid2features("tRNA")
55 |     first_contig_trna_features = next(iter(seqid2trna_features.values()))
56 |     assert len(first_contig_trna_features) == 12
57 | 


--------------------------------------------------------------------------------
/tests/utils/test_helper.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from pycirclize.utils import ColorCycler, calc_group_spaces
 4 | 
 5 | 
 6 | def test_color_cycler() -> None:
 7 |     """Test color cycler"""
 8 |     # Check get color list length
 9 |     ColorCycler.set_cmap("tab10")
10 |     assert len(ColorCycler.get_color_list()) == 10
11 |     assert len(ColorCycler.get_color_list(5)) == 5
12 |     assert len(ColorCycler.get_color_list(20)) == 20
13 | 
14 |     # Check cycle index, color
15 |     assert ColorCycler(0) != ColorCycler(1)
16 |     assert ColorCycler(0) == ColorCycler(10)
17 |     assert ColorCycler(15) == ColorCycler(25)
18 | 
19 |     # Check cycle counter
20 |     assert ColorCycler() != ColorCycler()
21 |     assert ColorCycler.counter == 2
22 | 
23 |     # Check reset cycle
24 |     ColorCycler.reset_cycle()
25 |     assert ColorCycler.counter == 0
26 | 
27 |     # Check cmap change
28 |     ColorCycler.set_cmap("tab20")
29 |     with pytest.raises(KeyError):
30 |         ColorCycler.set_cmap("invalid name")
31 |     assert len(ColorCycler.get_color_list()) == 20
32 | 
33 | 
34 | def test_calc_group_spaces() -> None:
35 |     """Test `calc_group_spaces`"""
36 |     # Case1. Blank list (error)
37 |     with pytest.raises(ValueError):
38 |         calc_group_spaces([])
39 | 
40 |     # Case2. List length = 1 (endspace=True)
41 |     spaces = calc_group_spaces([5])
42 |     expected_spaces = [2, 2, 2, 2, 2]
43 |     assert spaces == expected_spaces
44 | 
45 |     # Case3. List length = 1 (endspace=False)
46 |     spaces = calc_group_spaces([5], space_in_group=3, endspace=False)
47 |     expected_spaces = [3, 3, 3, 3]
48 |     assert spaces == expected_spaces
49 | 
50 |     # Case4. List length > 1 (endspace=True)
51 |     spaces = calc_group_spaces([4, 3, 3])
52 |     expected_spaces = [2, 2, 2, 15, 2, 2, 15, 2, 2, 15]
53 |     assert spaces == expected_spaces
54 | 
55 |     # Case5. List length > 1 (endspace=False)
56 |     spaces = calc_group_spaces(
57 |         [4, 3, 3], space_bw_group=8, space_in_group=1, endspace=False
58 |     )
59 |     expected_spaces = [1, 1, 1, 8, 1, 1, 8, 1, 1]
60 |     assert spaces == expected_spaces
61 | 


--------------------------------------------------------------------------------
/tests/utils/test_dataset.py:
--------------------------------------------------------------------------------
 1 | from urllib.request import urlopen
 2 | 
 3 | import pytest
 4 | 
 5 | from pycirclize.utils import (
 6 |     load_eukaryote_example_dataset,
 7 |     load_example_image_file,
 8 |     load_example_tree_file,
 9 |     load_prokaryote_example_file,
10 | )
11 | 
12 | 
13 | def check_network_conn(url: str = "https://github.com/moshi4/pyCirclize") -> bool:
14 |     """Check network connection
15 | 
16 |     Parameters
17 |     ----------
18 |     url : str, optional
19 |         Check target URL
20 | 
21 |     Returns
22 |     -------
23 |     check_result : bool
24 |         Network connection check result
25 |     """
26 |     try:
27 |         with urlopen(url) as _:
28 |             return True
29 |     except Exception:
30 |         return False
31 | 
32 | 
33 | @pytest.mark.skipif(
34 |     condition=not check_network_conn(),
35 |     reason="No network connection.",
36 | )
37 | def test_load_prokaryote_example_file() -> None:
38 |     """Test `load_prokaryote_example_file()`"""
39 |     gbk_file = load_prokaryote_example_file("enterobacteria_phage.gbk")
40 |     assert gbk_file.exists()
41 | 
42 | 
43 | @pytest.mark.skipif(
44 |     condition=not check_network_conn(),
45 |     reason="No network connection.",
46 | )
47 | def test_load_eukaryote_example_dataset() -> None:
48 |     """Test `load_eukaryote_example_dataset()`"""
49 |     bed_file, cytoband_file, _ = load_eukaryote_example_dataset("hg38")
50 |     assert bed_file.exists()
51 |     assert cytoband_file.exists()
52 | 
53 | 
54 | def test_load_example_image_file() -> None:
55 |     """Test `load_example_image_file()`"""
56 |     # 1. Normal scenario
57 |     image_file = load_example_image_file("python_logo.png")
58 |     assert image_file.exists()
59 | 
60 |     # 2. Exception scenario
61 |     with pytest.raises(FileNotFoundError):
62 |         load_example_image_file("noexists.png")
63 | 
64 | 
65 | def test_load_example_tree_file() -> None:
66 |     """Test `load_example_tree_file()`"""
67 |     # 1. Success case
68 |     tree_file = load_example_tree_file("alphabet.nwk")
69 |     assert tree_file.exists()
70 | 
71 |     # 2. Failure case
72 |     with pytest.raises(FileNotFoundError):
73 |         load_example_tree_file("noexists.nwk")
74 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: pyCirclize
 2 | site_description: Circular visualization in Python
 3 | site_author: moshi4
 4 | repo_name: moshi4/pyCirclize
 5 | repo_url: https://github.com/moshi4/pyCirclize
 6 | edit_uri: ""
 7 | use_directory_urls: true
 8 | watch:
 9 |   - src
10 | 
11 | nav:
12 |   - Home: index.md
13 |   - Getting Started: getting_started.ipynb
14 |   - Plot API Example: plot_api_example.ipynb
15 |   - Chord Diagram: chord_diagram.ipynb
16 |   - Radar Chart: radar_chart.ipynb
17 |   - Circos Plot (Genomics): circos_plot.ipynb
18 |   - Comparative Genomics: comparative_genomics.ipynb
19 |   - Phylogenetic Tree: phylogenetic_tree.ipynb
20 |   - Plot Tips: plot_tips.ipynb
21 |   - API Docs:
22 |       - Circos: api-docs/circos.md
23 |       - Sector: api-docs/sector.md
24 |       - Track: api-docs/track.md
25 |       - TreeViz: api-docs/treeviz.md
26 |       - utils: api-docs/utils.md
27 | 
28 | theme:
29 |   name: material # material, readthedocs, mkdocs
30 |   features:
31 |     - navigation.top
32 |     - navigation.expand
33 |     # - navigation.tabs
34 |     - navigation.tabs.sticky
35 |     - navigation.sections
36 | 
37 | markdown_extensions:
38 |   - pymdownx.highlight:
39 |       anchor_linenums: true
40 |   - pymdownx.inlinehilite
41 |   - pymdownx.snippets
42 |   - pymdownx.superfences
43 |   - pymdownx.details
44 |   - admonition
45 |   - attr_list
46 |   - md_in_html
47 |   - github-callouts
48 | 
49 | plugins:
50 |   - search
51 |   - mkdocs-jupyter:
52 |       execute: False
53 |   - mkdocstrings:
54 |       handlers:
55 |         python:
56 |           # Reference: https://mkdocstrings.github.io/python/usage/
57 |           options:
58 |             # Heading options
59 |             heading_level: 2
60 |             show_root_full_path: False
61 |             show_root_heading: True
62 |             # Member options
63 |             members_order: source # alphabetical, source
64 |             # Docstrings options
65 |             docstring_style: numpy
66 |             docstring_section_style: spacy # table, list, spacy
67 |             line_length: 89
68 |             merge_init_into_class: True
69 |             # Signatures/annotations options
70 |             show_signature_annotations: True
71 |             separate_signature: True
72 |             # Additional options
73 |             show_source: False
74 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | examples
  2 | .vscode/
  3 | notebooks/
  4 | 
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | pip-wheel-metadata/
 28 | share/python-wheels/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | *.egg
 32 | MANIFEST
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .nox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | *.py,cover
 55 | .hypothesis/
 56 | .pytest_cache/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 99 | __pypackages__/
100 | 
101 | # Celery stuff
102 | celerybeat-schedule
103 | celerybeat.pid
104 | 
105 | # SageMath parsed files
106 | *.sage.py
107 | 
108 | # Environments
109 | .env
110 | .venv
111 | env/
112 | venv/
113 | ENV/
114 | env.bak/
115 | venv.bak/
116 | 
117 | # Spyder project settings
118 | .spyderproject
119 | .spyproject
120 | 
121 | # Rope project settings
122 | .ropeproject
123 | 
124 | # mkdocs documentation
125 | /site
126 | 
127 | # mypy
128 | .mypy_cache/
129 | .dmypy.json
130 | dmypy.json
131 | 
132 | # Pyre type checker
133 | .pyre/
134 | 


--------------------------------------------------------------------------------
/tests/parser/test_genbank.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from pycirclize.parser import Genbank
 4 | 
 5 | 
 6 | def test_parse_complete_genome(prokaryote_testdata_dir: Path) -> None:
 7 |     """Test parse complete genome"""
 8 |     gbk_file = prokaryote_testdata_dir / "enterobacteria_phage.gbk"
 9 |     gbk = Genbank(gbk_file)
10 |     seqid = "NC_000902.1"
11 |     assert gbk.name == "enterobacteria_phage"
12 |     max_genome_size = 60942
13 |     assert gbk.range_size == max_genome_size
14 |     assert gbk.genome_length == max_genome_size
15 |     assert gbk.full_genome_length == max_genome_size
16 |     assert gbk.get_seqid2size() == {seqid: max_genome_size}
17 | 
18 | 
19 | def test_parse_contig_genomes(prokaryote_testdata_dir: Path) -> None:
20 |     """Test parse contig genomes"""
21 |     gbk_file = prokaryote_testdata_dir / "mycoplasma_alvi.gbk.gz"
22 |     gbk = Genbank(gbk_file)
23 |     seqid2size = {
24 |         "NZ_JNJU01000001.1": 264665,
25 |         "NZ_JNJU01000002.1": 190782,
26 |         "NZ_KL370824.1": 158240,
27 |         "NZ_KL370825.1": 155515,
28 |         "NZ_JNJU01000007.1": 67647,
29 |         "NZ_JNJU01000008.1": 2683,
30 |         "NZ_JNJU01000009.1": 1108,
31 |     }
32 |     size_list = list(seqid2size.values())
33 | 
34 |     assert gbk.name == "mycoplasma_alvi"
35 |     assert gbk.range_size == size_list[0]
36 |     assert gbk.get_seqid2size() == seqid2size
37 | 
38 |     seqid2cds_features = gbk.get_seqid2features()
39 |     first_contig_cds_features = next(iter(seqid2cds_features.values()))
40 |     assert len(first_contig_cds_features) == 204
41 | 
42 |     seqid2trna_features = gbk.get_seqid2features("tRNA")
43 |     first_contig_trna_features = next(iter(seqid2trna_features.values()))
44 |     assert len(first_contig_trna_features) == 12
45 | 
46 | 
47 | def test_calc_gc(prokaryote_testdata_dir: Path) -> None:
48 |     """Test GCskew, GCcontent calculation"""
49 |     gbk_file = prokaryote_testdata_dir / "enterobacteria_phage.gbk"
50 |     gbk = Genbank(gbk_file)
51 |     gbk.calc_gc_skew()
52 |     gbk.calc_gc_content()
53 | 
54 |     # Set user specified short sequence (< 1000)
55 |     # Check if zero length error of window_size & step_size is not occured
56 |     seq = "ATGC" * 100
57 |     gbk.calc_gc_skew(seq=seq)
58 |     gbk.calc_gc_content(seq=seq)
59 | 
60 | 
61 | def test_write_cds_fasta(prokaryote_testdata_dir: Path, fig_outfile: Path) -> None:
62 |     """Test `gbk.write_cds_fasta()` (only check properly output file)"""
63 |     gbk_file = prokaryote_testdata_dir / "enterobacteria_phage.gbk"
64 |     gbk = Genbank(gbk_file)
65 |     gbk.write_cds_fasta(fig_outfile)
66 |     assert fig_outfile.exists()
67 | 
68 | 
69 | def test_genome_fasta(prokaryote_testdata_dir: Path, fig_outfile: Path) -> None:
70 |     """Test `gbk.write_genome_fasta()` (only check properly output file)"""
71 |     gbk_file = prokaryote_testdata_dir / "enterobacteria_phage.gbk"
72 |     gbk = Genbank(gbk_file)
73 |     gbk.write_genome_fasta(fig_outfile)
74 |     assert fig_outfile.exists()
75 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | 
  3 | import pandas as pd
  4 | import pytest
  5 | 
  6 | 
  7 | @pytest.fixture
  8 | def testdata_dir() -> Path:
  9 |     """Testdata directory fixture"""
 10 |     return Path(__file__).parent / "testdata"
 11 | 
 12 | 
 13 | @pytest.fixture
 14 | def fig_outfile(tmp_path: Path) -> Path:
 15 |     """Figure output file fixture"""
 16 |     return tmp_path / "figure.png"
 17 | 
 18 | 
 19 | @pytest.fixture
 20 | def hg38_testdata_dir(testdata_dir: Path) -> Path:
 21 |     """hg38 dataset directory fixture"""
 22 |     return testdata_dir / "eukaryote" / "hg38"
 23 | 
 24 | 
 25 | @pytest.fixture
 26 | def prokaryote_testdata_dir(testdata_dir: Path) -> Path:
 27 |     """Prokaryote test file directory"""
 28 |     return testdata_dir / "prokaryote"
 29 | 
 30 | 
 31 | @pytest.fixture
 32 | def matrix_df() -> pd.DataFrame:
 33 |     """Pandas matrix dataframe"""
 34 |     matrix_data = [
 35 |         [4, 14, 13, 17, 5, 2],
 36 |         [7, 1, 6, 8, 12, 15],
 37 |         [9, 10, 3, 16, 11, 18],
 38 |     ]
 39 |     row_names = ["S1", "S2", "S3"]
 40 |     col_names = ["E1", "E2", "E3", "E4", "E5", "E6"]
 41 |     matrix_df = pd.DataFrame(matrix_data, index=row_names, columns=col_names)
 42 |     return matrix_df
 43 | 
 44 | 
 45 | @pytest.fixture
 46 | def fromto_table_df() -> pd.DataFrame:
 47 |     """Pandas from-to table dataframe"""
 48 |     return pd.DataFrame(
 49 |         data=[
 50 |             ["A", "B", 10],
 51 |             ["A", "C", 5],
 52 |             ["A", "D", 15],
 53 |             ["A", "E", 20],
 54 |             ["A", "F", 3],
 55 |             ["B", "A", 3],
 56 |             ["B", "G", 15],
 57 |             ["F", "D", 13],
 58 |             ["F", "E", 2],
 59 |             ["E", "A", 20],
 60 |             ["E", "D", 6],
 61 |         ],
 62 |     )
 63 | 
 64 | 
 65 | @pytest.fixture
 66 | def radar_table_df() -> pd.DataFrame:
 67 |     """Pandas radar table dataframe"""
 68 |     return pd.DataFrame(
 69 |         data=[
 70 |             [80, 80, 80, 80, 80, 80],
 71 |             [90, 95, 95, 30, 30, 80],
 72 |             [60, 20, 20, 100, 90, 50],
 73 |         ],
 74 |         index=["Hero", "Warrior", "Wizard"],
 75 |         columns=["HP", "ATK", "DEF", "SP.ATK", "SP.DEF", "SPD"],
 76 |     )
 77 | 
 78 | 
 79 | @pytest.fixture
 80 | def csv_matrix_file(matrix_df: pd.DataFrame, tmp_path: Path) -> Path:
 81 |     """CSV matrix file fixture"""
 82 |     csv_matrix_file = tmp_path / "matrix.csv"
 83 |     matrix_df.to_csv(csv_matrix_file)
 84 |     return csv_matrix_file
 85 | 
 86 | 
 87 | @pytest.fixture
 88 | def tsv_matrix_file(matrix_df: pd.DataFrame, tmp_path: Path) -> Path:
 89 |     """TSV matrix file fixture"""
 90 |     tsv_matrix_file = tmp_path / "matrix.tsv"
 91 |     matrix_df.to_csv(tsv_matrix_file, sep="\t")
 92 |     return tsv_matrix_file
 93 | 
 94 | 
 95 | @pytest.fixture
 96 | def tsv_radar_table_file(radar_table_df: pd.DataFrame, tmp_path: Path) -> Path:
 97 |     """TSV radar table file fixture"""
 98 |     tsv_radar_table_file = tmp_path / "radar_table.tsv"
 99 |     radar_table_df.to_csv(tsv_radar_table_file, sep="\t")
100 |     return tsv_radar_table_file
101 | 


--------------------------------------------------------------------------------
/tests/test_sector.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import pytest
 4 | 
 5 | from pycirclize import config
 6 | from pycirclize.sector import Sector
 7 | 
 8 | 
 9 | @pytest.fixture
10 | def sector() -> Sector:
11 |     """Sector test fixture"""
12 |     return Sector("test", 1000, (0, math.pi))
13 | 
14 | 
15 | def test_property() -> None:
16 |     """Test sector property"""
17 |     # Case1: Set int size
18 |     name, size, rad_lim = "test", 1000, (0, math.pi)
19 |     sector_case1 = Sector(name, size, rad_lim)
20 |     assert sector_case1.name == name
21 |     assert sector_case1.size == size
22 |     assert sector_case1.start == 0
23 |     assert sector_case1.end == size
24 |     assert sector_case1.center == 500
25 |     assert sector_case1.rad_size == math.pi
26 |     assert sector_case1.rad_lim == rad_lim
27 |     assert sector_case1.deg_size == 180
28 |     assert sector_case1.deg_lim == (0, 180)
29 |     assert sector_case1.tracks == []
30 |     assert sector_case1.patches == []
31 |     assert sector_case1.plot_funcs == []
32 | 
33 |     # Case2: Set tuple[float, float] range
34 |     name, range, rad_lim = "test", (100, 1100), (0, math.pi)
35 |     sector_case2 = Sector(name, range, rad_lim)
36 |     assert sector_case2.size == range[1] - range[0]
37 |     assert sector_case2.start == range[0]
38 |     assert sector_case2.end == range[1]
39 |     assert sector_case2.center == 600
40 | 
41 | 
42 | def test_add_track(sector: Sector) -> None:
43 |     """Test add_track()"""
44 |     sector.add_track((90, 100), name="Test01")
45 |     sector.add_track((80, 90))
46 |     assert len(sector.tracks) == 2
47 |     assert [t.name for t in sector.tracks] == ["Test01", "Track02"]
48 | 
49 | 
50 | def test_get_track(sector: Sector) -> None:
51 |     """Test `get_track()`"""
52 |     # Case1: No tracks (Error)
53 |     with pytest.raises(ValueError):
54 |         sector.get_track("error")
55 |     # Case2: No exists target name track (Error)
56 |     sector.add_track((90, 100))
57 |     sector.add_track((80, 90))
58 |     with pytest.raises(ValueError):
59 |         sector.get_track("error")
60 |     # Case3: Found track (No error)
61 |     sector.get_track("Track02")
62 | 
63 | 
64 | def test_get_lowest_r(sector: Sector) -> None:
65 |     """Test `get_lowest_r()`"""
66 |     # Case1: No tracks
67 |     assert sector.get_lowest_r() == config.MAX_R
68 |     # Case2: Add tracks
69 |     sector.add_track((90, 100))
70 |     sector.add_track((50, 70))
71 |     assert sector.get_lowest_r() == 50
72 | 
73 | 
74 | def test_x_to_pad() -> None:
75 |     """Test `x_to_pad()`"""
76 |     # Case1: Set int size
77 |     sector = Sector("test", 1000, (0, math.pi))
78 |     assert sector.x_to_rad(0) == 0
79 |     assert sector.x_to_rad(250) == math.pi / 4
80 |     assert sector.x_to_rad(500) == math.pi / 2
81 |     assert sector.x_to_rad(1000) == math.pi
82 |     with pytest.raises(ValueError):
83 |         sector.x_to_rad(sector.end + 1)
84 | 
85 |     # Case2: Set tuple[float, float] range
86 |     sector = Sector("test", (100, 1100), (0, math.pi))
87 |     assert sector.x_to_rad(350) == math.pi / 4
88 |     assert sector.x_to_rad(600) == math.pi / 2
89 |     assert sector.x_to_rad(1100) == math.pi
90 |     with pytest.raises(ValueError):
91 |         assert sector.x_to_rad(0) == 0
92 |     with pytest.raises(ValueError):
93 |         sector.x_to_rad(sector.end + 1)
94 | 


--------------------------------------------------------------------------------
/tests/test_circos.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import pytest
 4 | 
 5 | from pycirclize import Circos
 6 | 
 7 | 
 8 | def test_circos_init() -> None:
 9 |     """Test circos initialization"""
10 |     circos = Circos({"A": 10, "B": 20, "C": 15})
11 |     assert [s.name for s in circos.sectors] == ["A", "B", "C"]
12 |     assert [s.size for s in circos.sectors] == [10, 20, 15]
13 | 
14 |     circos = Circos(dict(D=10, E=(10, 20), F=(30, 50), G=100))
15 |     assert [s.name for s in circos.sectors] == ["D", "E", "F", "G"]
16 |     assert [s.size for s in circos.sectors] == [10, 10, 20, 100]
17 | 
18 | 
19 | @pytest.mark.parametrize(
20 |     "start, end",
21 |     [
22 |         (-10, 360),  # End - Start > 360
23 |         (0, -90),  # Start > End
24 |         (-400, -200),  # Start < -360
25 |         (200, 400),  # End > 360
26 |     ],
27 | )
28 | def test_circos_init_range_error(start: float, end: float) -> None:
29 |     """Test circos initialization range error"""
30 |     with pytest.raises(ValueError):
31 |         Circos({s: 10 for s in "ABC"}, start=start, end=end)
32 | 
33 | 
34 | @pytest.mark.parametrize(
35 |     "space, endspace, success",
36 |     [
37 |         # If `sector num = 3` and `endspace = True`
38 |         # List of space length must be 3
39 |         ([5], True, False),
40 |         ([5, 10], True, False),
41 |         ([5, 10, 15], True, True),
42 |         ([5, 10, 15, 20], True, False),
43 |         # If `sector num = 3` and `endspace = False`
44 |         # List of space length must be 2
45 |         ([5], False, False),
46 |         ([5, 10], False, True),
47 |         ([5, 10, 15], False, False),
48 |     ],
49 | )
50 | def test_circos_init_space_list(
51 |     space: list[float], endspace: bool, success: bool
52 | ) -> None:
53 |     """Test circos initialization space list length error"""
54 |     sectors = {s: 10 for s in "ABC"}
55 |     if success:
56 |         Circos(sectors, space=space, endspace=endspace)
57 |     else:
58 |         with pytest.raises(ValueError):
59 |             Circos({s: 10 for s in "ABC"}, space=space, endspace=endspace)
60 | 
61 | 
62 | def test_get_sector() -> None:
63 |     """Test `get_sector()`"""
64 |     sectors = {"A": 10, "B": 20, "C": 15}
65 |     circos = Circos(sectors)
66 |     # Case1: Successfully get sector
67 |     for sector_name in sectors:
68 |         circos.get_sector(sector_name)
69 |     # Case2: Failed to get sector
70 |     with pytest.raises(ValueError):
71 |         circos.get_sector("error")
72 | 
73 | 
74 | def test_get_group_sectors_deg_lim() -> None:
75 |     """Test `get_group_sectors_deg_lim()`"""
76 |     sectors = dict(A=10, B=10, C=10, D=10, E=10, F=10, G=10, H=10)
77 | 
78 |     group1 = list("BCD")
79 |     circos = Circos(sectors)
80 |     group1_deg_lim = circos.get_group_sectors_deg_lim(group1)
81 |     assert tuple(map(round, group1_deg_lim)) == (45, 180)
82 | 
83 |     group2 = list("HEF")
84 |     circos = Circos(sectors, start=20, end=340)
85 |     group2_deg_lim = circos.get_group_sectors_deg_lim(group2)
86 |     assert tuple(map(round, group2_deg_lim)) == (180, 340)
87 | 
88 | 
89 | def test_ax_property() -> None:
90 |     """Test `circos.ax` property"""
91 |     sectors = {"A": 10, "B": 20, "C": 15}
92 |     circos = Circos(sectors)
93 |     # Raise error before calling `circos.plotfig()` method
94 |     with pytest.raises(ValueError):
95 |         assert circos.ax
96 |     circos.plotfig()
97 |     assert circos.ax
98 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [project]
  2 | name = "pyCirclize"
  3 | dynamic = ["version"]
  4 | description = "Circular visualization in Python"
  5 | authors = [{ name = "moshi4" }]
  6 | license = "MIT"
  7 | readme = "README.md"
  8 | keywords = [
  9 |     "matplotlib",
 10 |     "visualization",
 11 |     "bioinformatics",
 12 |     "circos",
 13 |     "chord-diagram",
 14 | ]
 15 | classifiers = [
 16 |     "Intended Audience :: Science/Research",
 17 |     "Topic :: Scientific/Engineering :: Bio-Informatics",
 18 |     "Framework :: Matplotlib",
 19 | ]
 20 | requires-python = ">=3.10"
 21 | dependencies = [
 22 |     "matplotlib>=3.6.3",
 23 |     "biopython>=1.80",
 24 |     "numpy>=1.21",
 25 |     "pandas>=1.3.5",
 26 | ]
 27 | 
 28 | [project.urls]
 29 | homepage = "https://moshi4.github.io/pyCirclize/"
 30 | repository = "https://github.com/moshi4/pyCirclize/"
 31 | 
 32 | [project.optional-dependencies]
 33 | tooltip = ["ipympl>=0.9.7"]
 34 | 
 35 | [dependency-groups]
 36 | dev = [
 37 |     { include-group = "test" },
 38 |     { include-group = "lint" },
 39 |     { include-group = "jupyter" },
 40 |     { include-group = "docs" },
 41 |     { include-group = "genomics" },
 42 | ]
 43 | test = ["pytest>=8.0.0", "pytest-cov>=4.0.0"]
 44 | lint = ["ruff>=0.4.0", "pre-commit>=3.5.0"]
 45 | jupyter = ["ipykernel>=6.13.0"]
 46 | docs = [
 47 |     "mkdocs>=1.2",
 48 |     "mkdocstrings[python]>=0.19.0",
 49 |     "mkdocs-jupyter>=0.21.0",
 50 |     "mkdocs-material>=8.2",
 51 |     "markdown-callouts>=0.4.0",
 52 |     "black>=22.3.0",
 53 | ]
 54 | genomics = ["pygenomeviz>=1.5.0"]
 55 | 
 56 | [tool.hatch.version]
 57 | path = "src/pycirclize/__init__.py"
 58 | 
 59 | [tool.pytest.ini_options]
 60 | minversion = "6.0"
 61 | addopts = "--cov=src --tb=long -vv --cov-report=xml --cov-report=term"
 62 | testpaths = ["tests"]
 63 | 
 64 | [tool.ruff]
 65 | line-length = 88
 66 | 
 67 | # Lint Rules: https://docs.astral.sh/ruff/rules/
 68 | [tool.ruff.lint]
 69 | select = [
 70 |     "I",   # isort
 71 |     "F",   # pyflakes
 72 |     "E",   # pycodestyle (Error)
 73 |     "W",   # pycodestyle (Warning)
 74 |     "D",   # pydocstyle
 75 |     "UP",  # pyupgrade
 76 |     "PL",  # Pylint
 77 |     "ANN", # flake8-annotations
 78 |     "TC",  # flake8-type-checking
 79 |     "B",   # flake8-bugbear
 80 |     "SIM", # flake8-simplify
 81 |     "ARG", # flake8-unused-arguments
 82 |     "PTH", # flake8-use-pathlib
 83 |     "RUF", # Ruff-specific rules
 84 | ]
 85 | ignore = [
 86 |     "D100",    # Missing docstring in public module
 87 |     "D101",    # Missing docstring in public class
 88 |     "D104",    # Missing docstring in public package
 89 |     "D105",    # Missing docstring in magic method
 90 |     "D205",    # 1 blank line required between summary line and description
 91 |     "D400",    # First line should end with a period
 92 |     "D401",    # First line should be in imperative mood
 93 |     "D403",    # First word of the first line should be properly capitalized
 94 |     "D415",    # First line should end with a period, question mark, or exclamation point
 95 |     "ANN002",  # Missing type annotation for *{name}
 96 |     "ANN003",  # Missing type annotation for **{name}
 97 |     "PTH123",  # open() should be replaced by Path.open()
 98 |     "PLR0913", # Too many arguments in function definition ({c_args} > {max_args})
 99 |     "PLR2004", # Magic value used in comparison, consider replacing {value} with a constant variable
100 |     "PLW2901", # Outer {outer_kind} variable {name} overwritten by inner {inner_kind} target
101 |     "UP038",   # Deprecated: Use X | Y in {} call instead of (X, Y)
102 | ]
103 | 
104 | [tool.ruff.lint.pydocstyle]
105 | convention = "numpy"
106 | 
107 | [build-system]
108 | requires = ["hatchling"]
109 | build-backend = "hatchling.build"
110 | 


--------------------------------------------------------------------------------
/tests/parser/test_table.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import pandas as pd
  4 | import pytest
  5 | 
  6 | from pycirclize.parser import RadarTable, StackedBarTable
  7 | 
  8 | 
  9 | class TestStackedBarTable:
 10 |     """Test StackedBarTable Class"""
 11 | 
 12 |     @pytest.fixture
 13 |     def sb_table(self) -> StackedBarTable:
 14 |         """Initialize stacked bar table fixture"""
 15 |         table_df = pd.DataFrame(
 16 |             [
 17 |                 [1, 4, 7, 10],
 18 |                 [2, 5, 8, 11],
 19 |                 [3, 6, 9, 12],
 20 |             ],
 21 |             index=["row1", "row2", "row3"],
 22 |             columns=["col1", "col2", "col3", "col4"],
 23 |         )
 24 |         return StackedBarTable(table_df)
 25 | 
 26 |     def test_properties(self, sb_table: StackedBarTable) -> None:
 27 |         """Test properties"""
 28 |         assert sb_table.row_names == ["row1", "row2", "row3"]
 29 |         assert sb_table.col_names == ["col1", "col2", "col3", "col4"]
 30 |         assert sb_table.row_num == 3
 31 |         assert sb_table.col_num == 4
 32 |         assert sb_table.row_sum_vmax == 3 + 6 + 9 + 12
 33 |         assert sb_table.row_name2sum == dict(row1=22, row2=26, row3=30)
 34 |         assert sb_table.stacked_bar_heights == [
 35 |             [1, 2, 3],
 36 |             [4, 5, 6],
 37 |             [7, 8, 9],
 38 |             [10, 11, 12],
 39 |         ]
 40 |         assert sb_table.stacked_bar_bottoms == [
 41 |             [0, 0, 0],
 42 |             [1, 2, 3],
 43 |             [5, 7, 9],
 44 |             [12, 15, 18],
 45 |         ]
 46 | 
 47 |     @pytest.mark.parametrize(
 48 |         "track_size, expected_x_list",
 49 |         [
 50 |             (30, [5, 15, 25]),
 51 |             (150, [25, 75, 125]),
 52 |         ],
 53 |     )
 54 |     def test_calc_bar_label_x_list(
 55 |         self,
 56 |         sb_table: StackedBarTable,
 57 |         track_size: float,
 58 |         expected_x_list: list[float],
 59 |     ) -> None:
 60 |         """Test `calc_bar_label_x_list()`"""
 61 |         x_list = sb_table.calc_bar_label_x_list(track_size)
 62 |         assert x_list == expected_x_list
 63 | 
 64 |     @pytest.mark.parametrize(
 65 |         "track_r_lim, expected_r_list",
 66 |         [
 67 |             ((70, 100), [95, 85, 75]),
 68 |             ((10, 70), [60, 40, 20]),
 69 |         ],
 70 |     )
 71 |     def test_calc_barh_label_r_list(
 72 |         self,
 73 |         sb_table: StackedBarTable,
 74 |         track_r_lim: tuple[float, float],
 75 |         expected_r_list: list[float],
 76 |     ) -> None:
 77 |         """Test `calc_barh_label_r_list()`"""
 78 |         r_list = sb_table.calc_barh_label_r_list(track_r_lim)
 79 |         assert r_list == expected_r_list
 80 | 
 81 |     @pytest.mark.parametrize(
 82 |         "track_r_lim, width, expected_r_lim_list",
 83 |         [
 84 |             ((70, 100), 1.0, [(90, 100), (80, 90), (70, 80)]),
 85 |             ((10, 70), 0.8, [(52, 68), (32, 48), (12, 28)]),
 86 |         ],
 87 |     )
 88 |     def test_calc_barh_r_lim_list(
 89 |         self,
 90 |         sb_table: StackedBarTable,
 91 |         track_r_lim: tuple[float, float],
 92 |         width: float,
 93 |         expected_r_lim_list: list[tuple[float, float]],
 94 |     ) -> None:
 95 |         """Test `calc_barh_r_lim_list()`"""
 96 |         r_lim_list = sb_table.calc_barh_r_lim_list(track_r_lim, width)
 97 |         assert r_lim_list == expected_r_lim_list
 98 | 
 99 | 
100 | class TestRaderTable:
101 |     """Test RadarTable Class"""
102 | 
103 |     @pytest.fixture
104 |     def radar_table(self) -> RadarTable:
105 |         """Initialize radar table fixture"""
106 |         table_df = pd.DataFrame(
107 |             data=[
108 |                 [80, 80, 80, 80, 80, 80],
109 |                 [90, 95, 95, 30, 30, 80],
110 |                 [60, 20, 20, 100, 90, 50],
111 |             ],
112 |             index=["Hero", "Warrior", "Wizard"],
113 |             columns=["HP", "ATK", "DEF", "SP.ATK", "SP.DEF", "SPD"],
114 |         )
115 |         return RadarTable(table_df)
116 | 
117 |     def test_row_name2values(self, radar_table: RadarTable) -> None:
118 |         """Test `row_name2values()`"""
119 |         assert radar_table.row_name2values == dict(
120 |             Hero=[80, 80, 80, 80, 80, 80],
121 |             Warrior=[90, 95, 95, 30, 30, 80],
122 |             Wizard=[60, 20, 20, 100, 90, 50],
123 |         )
124 | 


--------------------------------------------------------------------------------
/tests/parser/test_matrix.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | 
  3 | import pandas as pd
  4 | 
  5 | from pycirclize.parser import Matrix
  6 | 
  7 | 
  8 | def test_load_dataframe_matrix(matrix_df: pd.DataFrame) -> None:
  9 |     """Test load panda dataframe matrix"""
 10 |     # Load pandas matrix dataframe
 11 |     matrix = Matrix(matrix_df)
 12 | 
 13 |     # Test row & column names
 14 |     row_names = ["S1", "S2", "S3"]
 15 |     col_names = ["E1", "E2", "E3", "E4", "E5", "E6"]
 16 |     assert matrix.all_names == row_names + col_names
 17 |     assert matrix.row_names == row_names
 18 |     assert matrix.col_names == col_names
 19 | 
 20 |     # Only test successfully call function
 21 |     matrix.to_sectors()
 22 |     matrix.to_links()
 23 | 
 24 | 
 25 | def test_parse_fromto_table(fromto_table_df: pd.DataFrame) -> None:
 26 |     """Test parse from-to table"""
 27 |     # Parse from-to table dataframe
 28 |     matrix = Matrix.parse_fromto_table(fromto_table_df)
 29 | 
 30 |     # Test row & column names
 31 |     expected_names = list("ABCDEFG")
 32 |     assert matrix.all_names == expected_names
 33 |     assert matrix.row_names == expected_names
 34 |     assert matrix.col_names == expected_names
 35 | 
 36 |     # Only test successfully call function
 37 |     matrix.to_sectors()
 38 |     matrix.to_links()
 39 | 
 40 | 
 41 | def test_load_tsv_matrix(tsv_matrix_file: Path) -> None:
 42 |     """Test load tsv matrix"""
 43 |     # Load tsv format matrix file
 44 |     matrix = Matrix(tsv_matrix_file)
 45 | 
 46 |     # Test row & column names
 47 |     row_names = ["S1", "S2", "S3"]
 48 |     col_names = ["E1", "E2", "E3", "E4", "E5", "E6"]
 49 |     assert matrix.all_names == row_names + col_names
 50 |     assert matrix.row_names == row_names
 51 |     assert matrix.col_names == col_names
 52 | 
 53 |     # Only test successfully call function
 54 |     matrix.to_sectors()
 55 |     matrix.to_links()
 56 | 
 57 | 
 58 | def test_load_csv_matrix(csv_matrix_file: Path) -> None:
 59 |     """Test load csv matrix"""
 60 |     # Load csv format matrix file
 61 |     matrix = Matrix(csv_matrix_file, delimiter=",")
 62 | 
 63 |     # Test row & column names
 64 |     row_names = ["S1", "S2", "S3"]
 65 |     col_names = ["E1", "E2", "E3", "E4", "E5", "E6"]
 66 |     assert matrix.all_names == row_names + col_names
 67 |     assert matrix.row_names == row_names
 68 |     assert matrix.col_names == col_names
 69 | 
 70 |     # Only test successfully call function
 71 |     matrix.to_sectors()
 72 |     matrix.to_links()
 73 | 
 74 | 
 75 | def test_matrix_sort() -> None:
 76 |     """Test `matrix.sort()`"""
 77 |     matrix_df = pd.DataFrame(
 78 |         [
 79 |             [1, 2],
 80 |             [3, 4],
 81 |         ],
 82 |         index=["R1", "R2"],
 83 |         columns=["C1", "C2"],
 84 |     )
 85 |     matrix = Matrix(matrix_df)
 86 | 
 87 |     # Ascending Sort
 88 |     expected_asc_matrix_df = pd.DataFrame(
 89 |         [
 90 |             [0, 1, 2, 0],
 91 |             [0, 0, 0, 0],
 92 |             [0, 0, 0, 0],
 93 |             [0, 3, 4, 0],
 94 |         ],
 95 |         index=["R1", "C1", "C2", "R2"],
 96 |         columns=["R1", "C1", "C2", "R2"],
 97 |     )
 98 |     asc_matrix_df = matrix.sort("asc").dataframe
 99 |     assert asc_matrix_df.equals(expected_asc_matrix_df)
100 | 
101 |     # Descending Sort
102 |     expected_desc_matrix_df = pd.DataFrame(
103 |         [
104 |             [0, 4, 3, 0],
105 |             [0, 0, 0, 0],
106 |             [0, 0, 0, 0],
107 |             [0, 2, 1, 0],
108 |         ],
109 |         index=["R2", "C2", "C1", "R1"],
110 |         columns=["R2", "C2", "C1", "R1"],
111 |     )
112 |     desc_matrix_df = matrix.sort("desc").dataframe
113 |     assert desc_matrix_df.equals(expected_desc_matrix_df)
114 | 
115 |     # User-specified Order Sort
116 |     expected_sort_matrix_df = pd.DataFrame(
117 |         [
118 |             [0, 0, 0, 0],
119 |             [0, 0, 0, 0],
120 |             [1, 2, 0, 0],
121 |             [3, 4, 0, 0],
122 |         ],
123 |         index=["C1", "C2", "R1", "R2"],
124 |         columns=["C1", "C2", "R1", "R2"],
125 |     )
126 |     sort_matrix_df = matrix.sort(["C1", "C2", "R1", "R2"]).dataframe
127 |     assert sort_matrix_df.equals(expected_sort_matrix_df)
128 | 
129 | 
130 | def test_to_fromto_table(tsv_matrix_file: Path) -> None:
131 |     """Test `matrix.to_fromto_table()`"""
132 |     matrix = Matrix(tsv_matrix_file)
133 |     expected_table_df = pd.DataFrame(
134 |         [
135 |             ["S1", "E1", 4],
136 |             ["S1", "E2", 14],
137 |             ["S1", "E3", 13],
138 |             ["S1", "E4", 17],
139 |             ["S1", "E5", 5],
140 |             ["S1", "E6", 2],
141 |             ["S2", "E1", 7],
142 |             ["S2", "E2", 1],
143 |             ["S2", "E3", 6],
144 |             ["S2", "E4", 8],
145 |             ["S2", "E5", 12],
146 |             ["S2", "E6", 15],
147 |             ["S3", "E1", 9],
148 |             ["S3", "E2", 10],
149 |             ["S3", "E3", 3],
150 |             ["S3", "E4", 16],
151 |             ["S3", "E5", 11],
152 |             ["S3", "E6", 18],
153 |         ],
154 |         columns=["from", "to", "value"],
155 |     )
156 |     assert matrix.to_fromto_table().equals(expected_table_df)
157 | 


--------------------------------------------------------------------------------
/src/pycirclize/config.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import math
  4 | from enum import IntEnum
  5 | from typing import ClassVar
  6 | 
  7 | import matplotlib as mpl
  8 | 
  9 | ###########################################################
 10 | # Constant Value Config
 11 | ###########################################################
 12 | 
 13 | # Fundamental Plot Parameters
 14 | MIN_R = 0
 15 | MAX_R = 100
 16 | R_PLOT_MARGIN = 5
 17 | ARC_RADIAN_STEP = 0.01
 18 | R_LIM = (MIN_R, MAX_R)
 19 | AXIS_FACE_PARAM = dict(zorder=0.99, ec="none", edgecolor="none")
 20 | AXIS_EDGE_PARAM = dict(zorder=1.01, fc="none", facecolor="none")
 21 | EPSILON = 1e-10
 22 | 
 23 | # Circos Color Scheme
 24 | # http://circos.ca/tutorials/lessons/configuration/colors/
 25 | CYTOBAND_COLORMAP = {
 26 |     "gpos100": "#000000",  # 0,0,0
 27 |     "gpos": "#000000",  # 0,0,0
 28 |     "gpos75": "#828282",  # 130,130,130
 29 |     "gpos66": "#A0A0A0",  # 160,160,160
 30 |     "gpos50": "#C8C8C8",  # 200,200,200
 31 |     "gpos33": "#D2D2D2",  # 210,210,210
 32 |     "gpos25": "#C8C8C8",  # 200,200,200
 33 |     "gvar": "#DCDCDC",  # 220,220,220
 34 |     "gneg": "#FFFFFF",  # 255,255,255
 35 |     "acen": "#D92F27",  # 217,47,39
 36 |     "stalk": "#647FA4",  # 100,127,164
 37 | }
 38 | 
 39 | 
 40 | class Direction(IntEnum):
 41 |     """Link BezierCurve Direction Enum"""
 42 | 
 43 |     REVERSE = -1
 44 |     NONE = 0
 45 |     FORWARD = 1
 46 |     BIDIRECTIONAL = 2
 47 | 
 48 | 
 49 | ###########################################################
 50 | # Mutable Value Config (Mainly for Developer)
 51 | ###########################################################
 52 | 
 53 | 
 54 | class _AnnotationAdjustConfig:
 55 |     """Annotation Position Adjustment Config"""
 56 | 
 57 |     enable: ClassVar[bool] = True
 58 |     """Enable Annotation position adjustment (default: `True`)"""
 59 |     limit: ClassVar[int] = 200
 60 |     """Limit of Annotation number for position adjustment (default: `200`)"""
 61 |     max_iter: ClassVar[int] = 1000
 62 |     """Max iteration number for Annotation position adjustment (default: `1000`)"""
 63 |     drad: ClassVar[float] = math.radians(0.1)
 64 |     """Delta radian for iterative position adjustment (default: `math.radians(0.1)`)"""
 65 |     dr: ClassVar[float] = 0.1
 66 |     """Delta radius for iterative position adjustment (default: `0.1`)"""
 67 |     expand: ClassVar[tuple[float, float]] = (1.2, 1.2)
 68 |     """Expand width & height factor of text bbox (default: `(1.2, 1.2)`)"""
 69 |     max_rad_shift: ClassVar[float] = math.radians(3.0)
 70 |     """Max radian of Annotation position shift (default: `math.radians(3.0)`)"""
 71 | 
 72 | 
 73 | class _TooltipConfig:
 74 |     """Tooltip Display Config"""
 75 | 
 76 |     enabled: ClassVar[bool] = False
 77 |     """Enabled tooltip display"""
 78 |     fontsize: ClassVar[int] = 9
 79 |     """Font size (default: `9`)"""
 80 |     fc: ClassVar[str | None] = None
 81 |     """Facecolor (default: `cursor hover patch facecolor`)"""
 82 |     lw: ClassVar[float] = 0.5
 83 |     """Linewidth (default: `0.5`)"""
 84 |     margin: ClassVar[float] = 15
 85 |     """Margin between tooltip and cursor (default: `15`)"""
 86 |     boxstyle: ClassVar[str] = "round"
 87 |     """Boxstyle (default: `round`)"""
 88 | 
 89 | 
 90 | clear_savefig: bool = True
 91 | """
 92 | By default, after saving a figure using the `savefig()` method, figure object is
 93 | automatically deleted to avoid memory leaks (no display on jupyter notebook)
 94 | If you want to display the figure on jupyter notebook using `savefig()` method,
 95 | set clear_savefig=False.
 96 | """
 97 | ann_adjust = _AnnotationAdjustConfig
 98 | tooltip = _TooltipConfig
 99 | 
100 | 
101 | ###########################################################
102 | # Matplotlib Runtime Configuration
103 | ###########################################################
104 | 
105 | # Setting matplotlib rc(runtime configuration) parameters
106 | # https://matplotlib.org/stable/tutorials/introductory/customizing.html
107 | mpl_rc_params = {
108 |     # Savefig
109 |     "savefig.bbox": "tight",  # Default: None
110 |     "savefig.pad_inches": 0.5,  # Default: 0.1
111 |     # SVG
112 |     "svg.fonttype": "none",
113 | }
114 | mpl.rcParams.update(mpl_rc_params)
115 | 
116 | ###########################################################
117 | # GitHub Eukaryote & Prokaryote Dataset Config
118 | ###########################################################
119 | 
120 | GITHUB_DATA_URL = "https://raw.githubusercontent.com/moshi4/pycirclize-data/master/"
121 | 
122 | EUKARYOTE_DATASET = {
123 |     "hg38": [
124 |         "hg38_chr.bed",
125 |         "hg38_cytoband.tsv",
126 |         "hg38_genomic_link.tsv",
127 |     ],
128 |     "hs1": [
129 |         "hs1_chr.bed",
130 |         "hs1_cytoband.tsv",
131 |         "hs1_genomic_link.tsv",
132 |     ],
133 |     "mm10": [
134 |         "mm10_chr.bed",
135 |         "mm10_cytoband.tsv",
136 |         "mm10_genomic_link.tsv",
137 |     ],
138 |     "mm39": [
139 |         "mm39_chr.bed",
140 |         "mm39_cytoband.tsv",
141 |         "mm39_genomic_link.tsv",
142 |     ],
143 | }
144 | 
145 | PROKARYOTE_FILES = [
146 |     "enterobacteria_phage.gbk",
147 |     "enterobacteria_phage.gff",
148 |     "mycoplasma_alvi.gbk",
149 |     "mycoplasma_alvi.gff",
150 |     "escherichia_coli.gbk.gz",
151 |     "escherichia_coli.gff.gz",
152 | ]
153 | 


--------------------------------------------------------------------------------
/src/pycirclize/utils/helper.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from pathlib import Path
  4 | from typing import TYPE_CHECKING
  5 | from urllib.parse import urlparse
  6 | from urllib.request import urlopen
  7 | 
  8 | import matplotlib as mpl
  9 | import numpy as np
 10 | from matplotlib.colors import Colormap, to_hex
 11 | from PIL import Image
 12 | 
 13 | if TYPE_CHECKING:
 14 |     from Bio.SeqFeature import SeqFeature
 15 | 
 16 | 
 17 | class ColorCycler:
 18 |     """Color Cycler Class"""
 19 | 
 20 |     counter = 0
 21 |     cmap: Colormap = mpl.colormaps["tab10"]  # type: ignore
 22 | 
 23 |     def __new__(cls, n: int | None = None) -> str:
 24 |         """Get hexcolor cyclically from cmap by counter or user specified number
 25 | 
 26 |         `ColorCycler()` works same as `ColorCycler.get_color()`
 27 | 
 28 |         Parameters
 29 |         ----------
 30 |         n : int | None, optional
 31 |             Number for color cycle. If None, counter class variable is used.
 32 | 
 33 |         Returns
 34 |         -------
 35 |         hexcolor : str
 36 |             Cyclic hexcolor string
 37 |         """
 38 |         return cls.get_color(n)
 39 | 
 40 |     @classmethod
 41 |     def reset_cycle(cls) -> None:
 42 |         """Reset cycle counter"""
 43 |         cls.counter = 0
 44 | 
 45 |     @classmethod
 46 |     def set_cmap(cls, name: str) -> None:
 47 |         """Set colormap (Default: `tab10`)"""
 48 |         cls.cmap = mpl.colormaps[name]  # type: ignore
 49 |         cls.counter = 0
 50 | 
 51 |     @classmethod
 52 |     def get_color(cls, n: int | None = None) -> str:
 53 |         """Get hexcolor cyclically from cmap by counter or user specified number
 54 | 
 55 |         Parameters
 56 |         ----------
 57 |         n : int | None, optional
 58 |             Number for color cycle. If None, counter class variable is used.
 59 | 
 60 |         Returns
 61 |         -------
 62 |         hexcolor : str
 63 |             Cyclic hexcolor string
 64 |         """
 65 |         if n is None:
 66 |             n = cls.counter
 67 |             cls.counter += 1
 68 |         return to_hex(cls.cmap(n % cls.cmap.N), keep_alpha=True)  # type: ignore
 69 | 
 70 |     @classmethod
 71 |     def get_color_list(cls, n: int | None = None) -> list[str]:
 72 |         """Get hexcolor list of colormap
 73 | 
 74 |         Parameters
 75 |         ----------
 76 |         n : int | None, optional
 77 |             If n is None, all(=cmap.N) hexcolors are extracted from colormap.
 78 |             If n is specified, hexcolors are extracted from n equally divided colormap.
 79 | 
 80 |         Returns
 81 |         -------
 82 |         hexcolor_list : list[str]
 83 |             Hexcolor list
 84 |         """
 85 |         if n is None:
 86 |             cmap_idx_list = list(range(0, cls.cmap.N))  # type: ignore
 87 |         elif n > 0:
 88 |             cmap_idx_list = [int(i) for i in np.linspace(0, cls.cmap.N, n)]  # type: ignore
 89 |         else:
 90 |             raise ValueError(f"{n=} is invalid number (Must be 'n > 0').")
 91 | 
 92 |         return [to_hex(cls.cmap(i), keep_alpha=True) for i in cmap_idx_list]  # type: ignore
 93 | 
 94 | 
 95 | def calc_group_spaces(
 96 |     groups: list[int],
 97 |     *,
 98 |     space_bw_group: float = 15,
 99 |     space_in_group: float = 2,
100 |     endspace: bool = True,
101 | ) -> list[float]:
102 |     """Calculate spaces between/within groups
103 | 
104 |     This function can be used to easily calculate the space size
105 |     when data is separated into multiple groups for display.
106 |     For example, to set up a space to divide `[A, B, C, D, E, F, G, H, I, J]`
107 |     into three groups such as `[(A, B, C, D), (E, F, G), (H, I, J)]`,
108 |     set `groups=[4, 3, 3]`.
109 | 
110 |     Parameters
111 |     ----------
112 |     groups : list[int]
113 |         List of each group number (e.g. `[4, 3, 3]`)
114 |     space_bw_group : float, optional
115 |         Space size between group
116 |     space_in_group : float, optional
117 |         Space size within group
118 |     endspace : bool, optional
119 |         If True, insert space after the end group
120 | 
121 |     Returns
122 |     -------
123 |     spaces : list[float]
124 |         Spaces between/within groups
125 |     """
126 |     if len(groups) == 0:
127 |         raise ValueError(f"{len(groups)=} is invalid.")
128 |     elif len(groups) == 1:
129 |         group_num = groups[0]
130 |         spaces = [space_in_group] * group_num
131 |     else:
132 |         spaces: list[float] = []
133 |         for group_num in groups:
134 |             group_spaces = [space_in_group] * (group_num - 1)
135 |             group_spaces.extend([space_bw_group])
136 |             spaces.extend(group_spaces)
137 |     if endspace:
138 |         return spaces
139 |     else:
140 |         return spaces[:-1]
141 | 
142 | 
143 | def load_image(img: str | Path | Image.Image) -> Image.Image:
144 |     """Load target image as PIL Image
145 | 
146 |     Parameters
147 |     ----------
148 |     img : str | Path | Image.Image
149 |         Load target image (`File Path`|`URL`|`PIL Image`)
150 | 
151 |     Returns
152 |     -------
153 |     im : Image.Image
154 |         PIL Image (mode=`RGBA`)
155 |     """
156 |     if isinstance(img, str) and urlparse(img).scheme in ("http", "https"):
157 |         im = Image.open(urlopen(img))
158 |     elif isinstance(img, (str, Path)):
159 |         im = Image.open(str(img))
160 |     else:
161 |         im = img
162 |     return im.convert("RGBA")
163 | 
164 | 
165 | def is_pseudo_feature(feature: SeqFeature) -> bool:
166 |     """Check target feature is pseudo or not from qualifiers tag
167 | 
168 |     Parameters
169 |     ----------
170 |     feature : SeqFeature
171 |         Target feature
172 | 
173 |     Returns
174 |     -------
175 |     check_result : bool
176 |         pseudo check result
177 |     """
178 |     quals = feature.qualifiers
179 |     return "pseudo" in quals or "pseudogene" in quals
180 | 


--------------------------------------------------------------------------------
/src/pycirclize/annotation.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import warnings
  4 | from collections import defaultdict
  5 | from typing import TYPE_CHECKING
  6 | 
  7 | import numpy as np
  8 | from matplotlib.text import Annotation, Text
  9 | 
 10 | from pycirclize import config, utils
 11 | from pycirclize.utils.plot import degrees
 12 | 
 13 | if TYPE_CHECKING:
 14 |     from matplotlib.projections.polar import PolarAxes
 15 |     from matplotlib.transforms import Bbox
 16 |     from numpy.typing import NDArray
 17 | 
 18 | 
 19 | def adjust_annotation(ax: PolarAxes) -> None:
 20 |     """Adjust annotation text position"""
 21 |     # Get sorted annotation list for position adjustment
 22 |     ann_list = _get_sorted_ann_list(ax)
 23 |     if len(ann_list) == 0 or config.ann_adjust.max_iter <= 0:
 24 |         return
 25 |     if len(ann_list) > config.ann_adjust.limit:
 26 |         warnings.warn(
 27 |             f"Too many annotations(={len(ann_list)}). Annotation position adjustment is not done.",  # noqa: E501
 28 |             stacklevel=2,
 29 |         )
 30 |         return
 31 | 
 32 |     def get_ann_window_extent(ann: Annotation) -> Bbox:
 33 |         return Text.get_window_extent(ann).expanded(*config.ann_adjust.expand)
 34 | 
 35 |     # Iterate annotation position adjustment
 36 |     ax.figure.draw_without_rendering()  # type: ignore
 37 |     ann2rad_shift_candidates = _get_ann2rad_shift_candidates(ann_list)
 38 |     for idx, ann in enumerate(ann_list[1:], 1):
 39 |         orig_rad, orig_r = ann.xyann
 40 |         ann_bbox = get_ann_window_extent(ann)
 41 |         adj_ann_list = ann_list[:idx]
 42 |         adj_ann_bboxes = [get_ann_window_extent(ann) for ann in adj_ann_list]
 43 | 
 44 |         # Adjust radian position
 45 |         iter, max_iter = 0, config.ann_adjust.max_iter
 46 |         if _is_ann_rad_shift_target_loc(orig_rad):
 47 |             for rad_shift_candidate in ann2rad_shift_candidates[str(ann)]:
 48 |                 ann.xyann = (rad_shift_candidate, orig_r)
 49 |                 ann_bbox = get_ann_window_extent(ann)
 50 |                 if ann_bbox.count_overlaps(adj_ann_bboxes) == 0 or iter > max_iter:
 51 |                     break
 52 |                 else:
 53 |                     ann.xyann = (orig_rad, orig_r)
 54 |                 iter += 1
 55 | 
 56 |         # Adjust radius position
 57 |         while ann_bbox.count_overlaps(adj_ann_bboxes) > 0 and iter <= max_iter:
 58 |             rad, r = ann.xyann
 59 |             ann.xyann = (rad, r + config.ann_adjust.dr)
 60 |             ann_bbox = get_ann_window_extent(ann)
 61 |             iter += 1
 62 | 
 63 |     # Plot annotation text bbox for developer check
 64 |     # for ann in ann_list:
 65 |     #     utils.plot.plot_bbox(get_ann_window_extent(ann), ax)
 66 | 
 67 | 
 68 | def _get_sorted_ann_list(ax: PolarAxes) -> list[Annotation]:
 69 |     """Sorted annotation list
 70 | 
 71 |     Sorting per 4 sections for adjusting annotation text position
 72 |     """
 73 |     ann_list = [t for t in ax.texts if isinstance(t, Annotation)]
 74 |     loc2ann_list: dict[str, list[Annotation]] = defaultdict(list)
 75 |     for ann in ann_list:
 76 |         loc = utils.plot.get_loc(ann.xyann[0])
 77 |         loc2ann_list[loc].append(ann)
 78 | 
 79 |     def sort_by_ann_rad(ann: Annotation) -> float:
 80 |         return utils.plot.degrees(ann.xyann[0])
 81 | 
 82 |     return (
 83 |         sorted(loc2ann_list["upper-right"], key=sort_by_ann_rad, reverse=True)
 84 |         + sorted(loc2ann_list["lower-right"], key=sort_by_ann_rad, reverse=False)
 85 |         + sorted(loc2ann_list["lower-left"], key=sort_by_ann_rad, reverse=True)
 86 |         + sorted(loc2ann_list["upper-left"], key=sort_by_ann_rad, reverse=False)
 87 |     )
 88 | 
 89 | 
 90 | def _is_ann_rad_shift_target_loc(rad: float) -> bool:
 91 |     """Check radian is annotation radian shift target or not
 92 | 
 93 |     Parameters
 94 |     ----------
 95 |     rad : float
 96 |         Annotation radian position
 97 | 
 98 |     Returns
 99 |     -------
100 |     result : bool
101 |         Target or not
102 |     """
103 |     deg = degrees(rad)
104 |     return 30 <= deg <= 150 or 210 <= deg <= 330
105 | 
106 | 
107 | def _get_ann2rad_shift_candidates(
108 |     ann_list: list[Annotation],
109 | ) -> dict[str, NDArray[np.float64]]:
110 |     """Get candidate radian shift position of annotation text
111 | 
112 |     Get the candidate radian position to shift of the target annotation
113 |     based on the radian positions of the previous and next annotations and
114 |     the maximum radian shift value.
115 | 
116 |     Parameters
117 |     ----------
118 |     ann_list : list[Annotation]
119 |         Annotation list
120 | 
121 |     Returns
122 |     -------
123 |     ann2shift_rad_candidates : dict[str, NDArray[np.float64]]
124 |         Annotation & candidate radian shift position dict
125 |     """
126 |     ann_list = sorted(ann_list, key=lambda a: utils.plot.degrees(a.xyann[0]))
127 |     ann2rad_shift_candidates: dict[str, NDArray[np.float64]] = {}
128 |     for idx, curr_ann in enumerate(ann_list):
129 |         # Get current, prev, next annotation info
130 |         curr_ann_rad = curr_ann.xyann[0]
131 |         prev_ann = curr_ann if idx == 0 else ann_list[idx - 1]
132 |         next_ann = curr_ann if idx == len(ann_list) - 1 else ann_list[idx + 1]
133 |         prev_ann_rad, next_ann_rad = prev_ann.xyann[0], next_ann.xyann[0]
134 |         # Get min-max radian shift position
135 |         if abs(curr_ann_rad - prev_ann_rad) > config.ann_adjust.max_rad_shift:
136 |             min_rad_shift = curr_ann_rad - config.ann_adjust.max_rad_shift
137 |         else:
138 |             min_rad_shift = prev_ann_rad
139 |         if abs(next_ann_rad - curr_ann_rad) > config.ann_adjust.max_rad_shift:
140 |             max_rad_shift = curr_ann_rad + config.ann_adjust.max_rad_shift
141 |         else:
142 |             max_rad_shift = next_ann_rad
143 |         # Calculate candidate radian positions between min-max radian shift position
144 |         # Sort candidate list in order of nearest to current annotation radian
145 |         drad = config.ann_adjust.drad
146 |         candidates = np.arange(min_rad_shift, max_rad_shift + drad, drad)
147 |         candidates = np.append(candidates, curr_ann_rad)
148 |         candidates = candidates[np.argsort(np.abs(candidates - curr_ann_rad))]
149 |         ann2rad_shift_candidates[str(curr_ann)] = candidates
150 |     return ann2rad_shift_candidates
151 | 


--------------------------------------------------------------------------------
/src/pycirclize/utils/example_data/trees/large_example.nwk:
--------------------------------------------------------------------------------
1 | ((Tachyglossus_aculeatus:0.041039,Ornithorhynchus_anatinus:0.032667):0.151655,(((Monodelphis_domestica:0.023161,Gracilinanus_agilis:0.019479):0.044743,((Dromiciops_gliroides:0.04538,(Antechinus_flavipes:0.012552,Sarcophilus_harrisii:0.013098):0.049783):0.0,(Trichosurus_vulpecula:0.031435,(Phascolarctos_cinereus:0.019468,Vombatus_ursinus:0.0201):0.013778):0.009855):0.011865):0.14674,(((Choloepus_didactylus:0.051362,Dasypus_novemcinctus:0.0628):0.030968,((Trichechus_manatus:0.034498,(Elephas_maximus:0.001934,Loxodonta_africana:0.002948):0.038159):0.014145,((Orycteropus_afer:0.060603,Elephantulus_edwardii:0.111994):0.0,(Echinops_telfairi:0.122896,Chrysochloris_asiatica:0.077968):0.004724):0.003366):0.028953):0.003561,((((Galeopterus_variegatus:0.058224,Tupaia_chinensis:0.096364):0.0,((Otolemur_garnettii:0.061401,(Propithecus_coquereli:0.020446,(Microcebus_murinus:0.028666,Lemur_catta:0.021612):0.0):0.020103):0.016933,(Carlito_syrichta:0.074345,((Aotus_nancymaae:0.012121,(Callithrix_jacchus:0.015702,(Saimiri_boliviensis:0.013572,(Sapajus_apella:0.002932,Cebus_imitator:0.003296):0.008564):0.001747):0.0):0.019315,(((Hylobates_moloch:0.00333,Nomascus_leucogenys:0.003123):0.006897,(Pongo_abelii:0.008478,(Gorilla_gorilla:0.004021,(Homo_sapiens:0.003164,(Pan_troglodytes:0.001144,Pan_paniscus:0.001106):0.001865):0.001052):0.003929):0.00114):0.004949,(((Colobus_angolensis:0.005275,Piliocolobus_tephrosceles:0.004777):0.001021,(Trachypithecus_francoisi:0.004061,(Rhinopithecus_roxellana:9.94E-4,Rhinopithecus_bieti:0.002571):0.00248):0.001915):0.002828,(Chlorocebus_sabaeus:0.005852,((Macaca_nemestrina:0.002166,(Macaca_thibetana:0.001527,(Macaca_fascicularis:0.001199,Macaca_mulatta:0.001166):3.96E-4):3.7E-5):0.001884,((Mandrillus_leucophaeus:0.00342,Cercocebus_atys:0.003303):4.58E-4,(Papio_anubis:0.002032,Theropithecus_gelada:0.001939):8.91E-4):5.6E-4):0.00146):0.00244):0.009713):0.009444):0.032854):0.003177):0.007546):8.37E-4,((Oryctolagus_cuniculus:0.058469,(Ochotona_curzoniae:0.018974,Ochotona_princeps:0.017454):0.087191):0.055735,((((Heterocephalus_glaber:0.031255,Fukomys_damarensis:0.040089):0.016404,(Cavia_porcellus:0.059037,(Chinchilla_lanigera:0.040164,Octodon_degus:0.057341):0.005638):0.012104):0.056969,((Urocitellus_parryii:0.004837,Ictidomys_tridecemlineatus:0.005738):0.00319,(Marmota_marmota:0.002762,(Marmota_monax:0.002463,Marmota_flaviventris:0.002079):0.0):0.004337):0.067595):0.0,((Castor_canadensis:0.064132,(Perognathus_longimembris:0.047365,(Dipodomys_spectabilis:0.005613,Dipodomys_ordii:0.007268):0.03364):0.069847):0.007988,(Jaculus_jaculus:0.099935,(Nannospalax_galili:0.067109,(((Acomys_russatus:0.053457,Meriones_unguiculatus:0.054028):0.006455,((Rattus_norvegicus:0.005842,Rattus_rattus:0.006673):0.034271,((Grammomys_surdaster:0.018483,Arvicanthis_niloticus:0.018568):0.011928,(Apodemus_sylvaticus:0.037346,(Mastomys_coucha:0.028128,(Mus_pahari:0.018467,(Mus_musculus:0.00926,Mus_caroli:0.009222):0.009302):0.013992):0.001743):0.002912):0.002225):0.026092):0.006639,((Onychomys_torridus:0.020338,(Peromyscus_californicus:0.010724,(Peromyscus_maniculatus:0.006007,Peromyscus_leucopus:0.00573):0.010541):0.005231):0.022089,((Phodopus_roborovskii:0.031875,(Cricetulus_griseus:0.024581,Mesocricetus_auratus:0.030155):0.003172):0.013321,(Myodes_glareolus:0.017788,(Arvicola_amphibius:0.015589,(Microtus_fortis:0.00926,(Microtus_ochrogaster:0.007048,Microtus_oregoni:0.006925):0.002372):0.007538):0.002737):0.032918):5.51E-4):0.012794):0.044807):0.020382):0.020997):0.00865):0.013867):0.004763):0.009498,(((Talpa_occidentalis:0.034179,Condylura_cristata:0.0477):0.051753,(Erinaceus_europaeus:0.138675,(Suncus_etruscus:0.101524,Sorex_araneus:0.08657):0.082828):0.01047):0.01438,((((Rhinolophus_ferrumequinum:0.031471,Hipposideros_armiger:0.032399):0.021734,(Rousettus_aegyptiacus:0.022455,(Pteropus_alecto:0.003701,(Pteropus_vampyrus:0.002,Pteropus_giganteus:0.002104):0.001403):0.011809):0.040275):0.004114,((Desmodus_rotundus:0.026345,((Phyllostomus_discolor:0.010528,Phyllostomus_hastatus:0.00889):0.021179,(Artibeus_jamaicensis:0.020278,Sturnira_hondurensis:0.021968):0.011022):0.006395):0.038395,(Molossus_molossus:0.048736,(Miniopterus_natalensis:0.047506,((Pipistrellus_kuhlii:0.045693,Eptesicus_fuscus:0.012037):0.008305,((Myotis_davidii:0.01195,Myotis_myotis:0.007549):0.003267,(Myotis_lucifugus:0.005237,Myotis_brandtii:0.006754):0.001948):0.011622):0.033082):0.003907):0.006671):0.015341):0.01496,(((Ceratotherium_simum:0.030622,((Equus_asinus:0.001561,Equus_quagga:0.001671):0.001588,(Equus_caballus:7.55E-4,Equus_przewalskii:0.001271):0.002407):0.03485):0.018203,((Vicugna_pacos:0.007718,(Camelus_dromedarius:0.002158,(Camelus_ferus:5.56E-4,Camelus_bactrianus:0.00138):8.61E-4):0.005174):0.046126,(Sus_scrofa:0.058797,(((Odocoileus_virginianus:0.011514,(Cervus_canadensis:0.001134,Cervus_elaphus:9.53E-4):0.007974):0.01094,((Oryx_dammah:0.010016,(Budorcas_taxicolor:0.005618,(Capra_hircus:0.004961,Ovis_aries:0.005211):8.56E-4):0.004159):0.006643,(Bubalus_bubalis:0.007784,((Bison_bison:0.002949,Bos_mutus:0.00256):7.75E-4,(Bos_taurus:0.001308,Bos_indicus:0.00139):0.001056):0.005658):0.006567):0.004971):0.045229,((Balaenoptera_musculus:0.003928,Balaenoptera_acutorostrata:0.005117):0.006735,(Physeter_catodon:0.011701,(Lipotes_vexillifer:0.00984,(((Monodon_monoceros:0.001652,Delphinapterus_leucas:0.001686):0.002258,(Neophocaena_asiaeorientalis:0.001875,Phocoena_sinus:0.001814):0.003335):0.001772,(Orcinus_orca:0.002651,(Lagenorhynchus_obliquidens:0.002594,(Tursiops_truncatus:0.002347,Globicephala_melas:0.002389):1.6E-4):3.39E-4):0.003465):0.002546):0.004627):0.001056):0.021301):0.008221):0.003537):0.021401):2.63E-4,((Manis_pentadactyla:0.008628,Manis_javanica:0.010455):0.066826,(((Hyaena_hyaena:0.025938,Suricata_suricatta:0.033926):0.009827,((Panthera_tigris:0.001588,(Panthera_uncia:0.002026,(Panthera_leo:0.001604,Panthera_pardus:0.001384):5.89E-4):0.0):0.0032,(Leopardus_geoffroyi:0.004188,((Felis_catus:0.00344,(Lynx_canadensis:0.003815,Prionailurus_bengalensis:0.003718):0.0):2.06E-4,(Acinonyx_jubatus:0.002889,(Puma_yagouaroundi:0.002737,Puma_concolor:0.003565):4.61E-4):0.001087):3.86E-4):6.66E-4):0.018981):0.019759,((Canis_lupus:0.005075,(Vulpes_lagopus:0.00214,Vulpes_vulpes:0.00204):0.003802):0.038703,((Meles_meles:0.013989,((Lontra_canadensis:0.007132,Enhydra_lutris:0.006685):0.003833,(Neogale_vison:0.00697,(Mustela_erminea:0.003844,Mustela_putorius:0.004712):0.003036):0.005606):0.00278):0.027582,((Ailuropoda_melanoleuca:0.010243,(Ursus_americanus:0.001562,(Ursus_maritimus:0.001219,Ursus_arctos:9.48E-4):4.69E-4):0.006924):0.018488,((Odobenus_rosmarus:0.005668,(Callorhinus_ursinus:0.00245,(Zalophus_californianus:0.001235,Eumetopias_jubatus:0.001192):0.001217):0.003828):0.006737,((Phoca_vitulina:0.001376,Halichoerus_grypus:0.001964):0.00413,(Neomonachus_schauinslandi:0.003865,(Leptonychotes_weddellii:0.003686,(Mirounga_leonina:9.7E-4,Mirounga_angustirostris:0.001092):0.003679):3.11E-4):0.002457):0.005457):0.012989):0.0):0.009482):0.008295):0.025497):0.001876):4.5E-4):0.003616):0.011384):0.007673):0.16004):0.151655);
2 | 


--------------------------------------------------------------------------------
/src/pycirclize/utils/plot.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import math
  4 | from typing import TYPE_CHECKING, Any, Literal
  5 | 
  6 | from matplotlib.colors import Normalize, to_rgb
  7 | 
  8 | if TYPE_CHECKING:
  9 |     from matplotlib.projections import PolarAxes
 10 |     from matplotlib.transforms import Bbox
 11 |     from matplotlib.typing import ColorType
 12 | 
 13 | 
 14 | def degrees(rad: float) -> float:
 15 |     """Convert radian to positive degree (`0 - 360`)
 16 | 
 17 |     Parameters
 18 |     ----------
 19 |     rad : float
 20 |         Target radian
 21 | 
 22 |     Returns
 23 |     -------
 24 |     deg : float
 25 |         Positive degree (`0 - 360`)
 26 |     """
 27 |     # Radian to degree
 28 |     deg = math.degrees(rad)
 29 |     min_thr = 1e-10
 30 |     if abs(deg) < min_thr:
 31 |         deg = 0
 32 |     # Normalize degree in 0 - 360 range
 33 |     deg = deg % 360
 34 |     # Negative to positive
 35 |     if deg < 0:
 36 |         deg += 360
 37 |     return deg
 38 | 
 39 | 
 40 | def is_lower_loc(rad: float) -> bool:
 41 |     """Check target radian is lower location or not
 42 | 
 43 |     Parameters
 44 |     ----------
 45 |     rad : float
 46 |         Target radian
 47 | 
 48 |     Returns
 49 |     -------
 50 |     result : bool
 51 |         Lower location or not
 52 |     """
 53 |     deg = math.degrees(rad)
 54 |     return -270 <= deg < -90 or 90 <= deg < 270
 55 | 
 56 | 
 57 | def is_right_loc(rad: float) -> bool:
 58 |     """Check target radian is right location or not
 59 | 
 60 |     Parameters
 61 |     ----------
 62 |     rad : float
 63 |         Target radian
 64 | 
 65 |     Returns
 66 |     -------
 67 |     result : bool
 68 |         Right location or not
 69 |     """
 70 |     deg = math.degrees(rad)
 71 |     return -360 <= deg < -180 or 0 <= deg < 180
 72 | 
 73 | 
 74 | def select_textcolor(fc: ColorType) -> str:
 75 |     """Select `black` or `white` appropriate textcolor from facecolor relative luminance
 76 | 
 77 |     Relative luminance: <https://www.w3.org/TR/WCAG21/#dfn-relative-luminance>
 78 | 
 79 |     Parameters
 80 |     ----------
 81 |     fc : ColorType
 82 |         Target facecolor
 83 | 
 84 |     Returns
 85 |     -------
 86 |     textcolor : str
 87 |         `black` or `white`
 88 |     """
 89 | 
 90 |     def calc_relative_luminance(color: ColorType) -> float:
 91 |         """Calculate relative luminance (0.0 - 1.0)"""
 92 |         color = to_rgb(color)
 93 |         r, g, b = map(
 94 |             lambda v: v / 12.92 if v <= 0.04045 else ((v + 0.055) / 1.055) ** 2.4,
 95 |             color,
 96 |         )
 97 |         return 0.2126 * r + 0.7152 * g + 0.0722 * b
 98 | 
 99 |     lum = calc_relative_luminance(fc)
100 |     return "black" if lum > 0.5 else "white"
101 | 
102 | 
103 | def get_loc(
104 |     rad: float,
105 | ) -> Literal["upper-right", "lower-right", "lower-left", "upper-left"]:
106 |     """Get location of 4 sections
107 | 
108 |     Returns
109 |     -------
110 |     loc : str
111 |         Location (`upper-right`|`lower-right`|`lower-left`|`upper-left`)
112 |     """
113 |     deg = degrees(rad)
114 |     if 0 <= deg < 90:
115 |         return "upper-right"
116 |     elif 90 <= deg < 180:
117 |         return "lower-right"
118 |     elif 180 <= deg < 270:
119 |         return "lower-left"
120 |     else:
121 |         return "upper-left"
122 | 
123 | 
124 | def get_ann_relpos(rad: float) -> tuple[float, float]:
125 |     """Get relative position for annotate by radian text position
126 | 
127 |     Parameters
128 |     ----------
129 |     rad : float
130 |         Radian text position
131 | 
132 |     Returns
133 |     -------
134 |     relpos : tuple[float, float]
135 |         Relative position
136 |     """
137 |     deg = degrees(rad)
138 |     if 0 <= deg <= 180:
139 |         return 0.0, Normalize(0, 180)(deg)
140 |     else:
141 |         return 1.0, 1.0 - Normalize(180, 360)(deg)
142 | 
143 | 
144 | def plot_bbox(bbox: Bbox, ax: PolarAxes, **kwargs) -> None:
145 |     """Plot bbox to check bbox area for development
146 | 
147 |     Parameters
148 |     ----------
149 |     bbox : Bbox
150 |         Bounding box
151 |     ax : PolarAxes
152 |         Polar axes
153 |     **kwargs : dict, optional
154 |         Axes.plot properties (e.g. `color="red", lw=0.5, ls="--", ...`)
155 |         <https://matplotlib.org/stable/api/_as_gen/matplotlib.axes.Axes.plot.html>
156 |     """
157 |     trans_bbox = bbox.transformed(ax.transAxes.inverted())
158 |     kwargs.setdefault("clip_on", False)
159 |     x0, y0, x1, y1 = trans_bbox.x0, trans_bbox.y0, trans_bbox.x1, trans_bbox.y1
160 |     x, y = [x0, x1, x1, x0, x0], [y0, y0, y1, y1, y0]
161 |     ax.plot(x, y, transform=ax.transAxes, **kwargs)
162 | 
163 | 
164 | def get_label_params_by_rad(
165 |     rad: float,
166 |     orientation: str,
167 |     outer: bool = True,
168 |     only_rotation: bool = False,
169 | ) -> dict[str, Any]:
170 |     """Get proper label parameters by radian position
171 | 
172 |     Parameters
173 |     ----------
174 |     rad : float
175 |         Radian coordinate
176 |     orientation : str
177 |         Label orientation (`horizontal` or `vertical`)
178 |     outer : bool, optional
179 |         If True, show on `outer` style. Else, show on `inner` style.
180 |     only_rotation : bool, optional
181 |         If True, Only return rotation parameter
182 | 
183 |     Returns
184 |     -------
185 |     dict_param : dict[str, Any]
186 |         `va`, `ha`, `rotation`, `rotation_mode` dict
187 |     """
188 |     # Get position degree & location info
189 |     deg = math.degrees(rad)
190 |     is_lower, is_right = is_lower_loc(rad), is_right_loc(rad)
191 |     # Get parameters
192 |     if orientation == "horizontal":
193 |         rotation = 180 - deg if is_lower else -deg
194 |         ha = "center"
195 |         if outer:
196 |             va = "top" if is_lower else "bottom"
197 |         else:
198 |             va = "bottom" if is_lower else "top"
199 |     elif orientation == "vertical":
200 |         rotation = 90 - deg if is_right else 270 - deg
201 |         va = "center_baseline"
202 |         if outer:
203 |             ha = "left" if is_right else "right"
204 |         else:
205 |             ha = "right" if is_right else "left"
206 |     else:
207 |         err_msg = f"'{orientation=} is invalid ('horizontal' or 'vertical')"
208 |         raise ValueError(err_msg)
209 | 
210 |     if only_rotation:
211 |         return dict(rotation=rotation)
212 |     else:
213 |         return dict(va=va, ha=ha, rotation=rotation, rotation_mode="anchor")
214 | 
215 | 
216 | def set_axis_default_kwargs(**kwargs) -> dict[str, Any]:
217 |     """Set axis default keyword arguments
218 | 
219 |     Set simple black axis params (`fc="none", ec="black", lw=0.5`) as default.
220 | 
221 |     Returns
222 |     -------
223 |     kwargs : dict[str, Any]
224 |         Keyword arguments
225 |     """
226 |     if "fc" not in kwargs and "facecolor" not in kwargs:
227 |         kwargs.update({"fc": "none"})
228 |     if "ec" not in kwargs and "edgecolor" not in kwargs:
229 |         kwargs.update({"ec": "black"})
230 |     if "lw" not in kwargs and "linewidth" not in kwargs:
231 |         kwargs.update({"lw": 0.5})
232 |     return kwargs
233 | 


--------------------------------------------------------------------------------
/src/pycirclize/parser/table.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from pathlib import Path
  4 | 
  5 | import pandas as pd
  6 | 
  7 | from pycirclize.utils import ColorCycler
  8 | 
  9 | 
 10 | class Table:
 11 |     """Table Parser Base Class"""
 12 | 
 13 |     def __init__(
 14 |         self,
 15 |         table_data: str | Path | pd.DataFrame,
 16 |         *,
 17 |         delimiter: str = "\t",
 18 |     ) -> None:
 19 |         """
 20 |         Parameters
 21 |         ----------
 22 |         table_data : str | Path | pd.DataFrame
 23 |             Table file or Table DataFrame
 24 |         delimiter : str, optional
 25 |             Table file delimiter. By default, `tab` delimiter.
 26 |         """
 27 |         if isinstance(table_data, (str, Path)):
 28 |             table_data = pd.read_csv(table_data, sep=delimiter, index_col=0)
 29 |         self._dataframe = table_data
 30 | 
 31 |     @property
 32 |     def dataframe(self) -> pd.DataFrame:
 33 |         """Table dataframe"""
 34 |         return self._dataframe
 35 | 
 36 |     @property
 37 |     def row_names(self) -> list[str]:
 38 |         """Table row names"""
 39 |         return list(map(str, self.dataframe.index))
 40 | 
 41 |     @property
 42 |     def col_names(self) -> list[str]:
 43 |         """Table column names"""
 44 |         return list(map(str, self.dataframe.columns))
 45 | 
 46 |     @property
 47 |     def row_num(self) -> int:
 48 |         """Table row count number"""
 49 |         return len(self.dataframe.index)
 50 | 
 51 |     @property
 52 |     def col_num(self) -> int:
 53 |         """Table column count number"""
 54 |         return len(self.dataframe.columns)
 55 | 
 56 |     def get_col_name2color(self, cmap: str = "tab10") -> dict[str, str]:
 57 |         """Get column name & color dict
 58 | 
 59 |         Parameters
 60 |         ----------
 61 |         cmap : str, optional
 62 |             Colormap (e.g. `tab10`, `Set3`)
 63 | 
 64 |         Returns
 65 |         -------
 66 |         col_name2color : dict[str, str]
 67 |             Column name & color dict
 68 |         """
 69 |         ColorCycler.set_cmap(cmap)
 70 |         return {n: ColorCycler.get_color() for n in self.col_names}
 71 | 
 72 |     def get_row_name2color(self, cmap: str = "tab10") -> dict[str, str]:
 73 |         """Get row name & color dict
 74 | 
 75 |         Parameters
 76 |         ----------
 77 |         cmap : str, optional
 78 |             Colormap (e.g. `tab10`, `Set3`)
 79 | 
 80 |         Returns
 81 |         -------
 82 |         col_name2color : dict[str, str]
 83 |             Column name & color dict
 84 |         """
 85 |         ColorCycler.set_cmap(cmap)
 86 |         return {n: ColorCycler.get_color() for n in self.row_names}
 87 | 
 88 |     def __str__(self) -> str:
 89 |         return str(self.dataframe)
 90 | 
 91 | 
 92 | class StackedBarTable(Table):
 93 |     """Table Parser Class
 94 | 
 95 |     Basically used for plotting stacked bar chart
 96 |     """
 97 | 
 98 |     @property
 99 |     def row_sum_vmax(self) -> float:
100 |         """Max value in each row values sum"""
101 |         return max(map(sum, self.dataframe.itertuples(index=False)))
102 | 
103 |     @property
104 |     def row_name2sum(self) -> dict[str, float]:
105 |         """Row name & sum dict"""
106 |         row_sum_list = list(map(sum, self.dataframe.itertuples(index=False)))
107 |         return dict(zip(self.row_names, row_sum_list, strict=True))
108 | 
109 |     @property
110 |     def stacked_bar_heights(self) -> list[list[float]]:
111 |         """Stacked bar heights"""
112 |         return [list(self.dataframe[col_name]) for col_name in self.col_names]
113 | 
114 |     @property
115 |     def stacked_bar_bottoms(self) -> list[list[float]]:
116 |         """Stacked bar bottoms"""
117 |         bottoms: list[list[float]] = []
118 |         row_name2stack_value = {name: 0.0 for name in self.row_names}
119 |         for col_name in self.col_names:
120 |             bottom = [row_name2stack_value[name] for name in self.row_names]
121 |             for row_name in self.row_names:
122 |                 value = float(self.dataframe.at[row_name, col_name])
123 |                 row_name2stack_value[row_name] += value
124 |             bottoms.append(bottom)
125 |         return bottoms
126 | 
127 |     def calc_bar_label_x_list(
128 |         self,
129 |         track_size: float,
130 |     ) -> list[float]:
131 |         """Calculate list of x position for bar label plot
132 | 
133 |         Parameters
134 |         ----------
135 |         track_size : float
136 |             Track size
137 | 
138 |         Returns
139 |         -------
140 |         x_list : list[float]
141 |             List of x position for bar label plot
142 |         """
143 |         interval = track_size / len(self.row_names)
144 |         return [cnt * interval + (interval / 2) for cnt in range(len(self.row_names))]
145 | 
146 |     def calc_barh_label_r_list(
147 |         self,
148 |         track_r_lim: tuple[float, float],
149 |     ) -> list[float]:
150 |         """Calculate list of radius position for horizontal bar label plot
151 | 
152 |         Parameters
153 |         ----------
154 |         track_r_lim : tuple[float, float]
155 |             Track radius limit region
156 | 
157 |         Returns
158 |         -------
159 |         bar_label_r_list : list[float]
160 |             List of radius position for horizontal bar label plot
161 |         """
162 |         rmin, rmax = track_r_lim
163 |         interval = (rmax - rmin) / len(self.row_names)
164 |         bar_label_r_list: list[float] = []
165 |         for cnt in range(len(self.row_names)):
166 |             r_center = rmax - (interval * cnt) - (interval / 2)
167 |             bar_label_r_list.append(r_center)
168 |         return bar_label_r_list
169 | 
170 |     def calc_barh_r_lim_list(
171 |         self,
172 |         track_r_lim: tuple[float, float],
173 |         width: float = 0.8,
174 |     ) -> list[tuple[float, float]]:
175 |         """Calculate list of radius limit for horizontal bar plot
176 | 
177 |         Parameters
178 |         ----------
179 |         track_r_lim : tuple[float, float]
180 |             Track radius limit region
181 |         width : float, optional
182 |             Bar width ratio (0.0 - 1.0)
183 | 
184 |         Returns
185 |         -------
186 |         list[tuple[float, float]]
187 |             List of radius limit for horizontal bar plot
188 |         """
189 |         rmin, rmax = track_r_lim
190 |         interval = (rmax - rmin) / len(self.row_names)
191 |         bar_r_lim_list: list[tuple[float, float]] = []
192 |         for cnt in range(len(self.row_names)):
193 |             r_center = rmax - (interval * cnt) - (interval / 2)
194 |             r_top = r_center + (interval / 2) * width
195 |             r_bottom = r_center - (interval / 2) * width
196 |             bar_r_lim_list.append((r_bottom, r_top))
197 |         return bar_r_lim_list
198 | 
199 | 
200 | class RadarTable(Table):
201 |     """Radar Table Parser Class"""
202 | 
203 |     @property
204 |     def row_name2values(self) -> dict[str, list[float]]:
205 |         """Row name & values"""
206 |         row_name2values = {}
207 |         for row_name in self.row_names:
208 |             row_name2values[row_name] = list(self.dataframe.loc[row_name])
209 |         return row_name2values
210 | 
211 |     def get_row_tooltip(self, target_row: str) -> list[str]:
212 |         """Get target row tooltip"""
213 |         tooltip: list[str] = []
214 |         values = self.row_name2values[target_row]
215 |         for col_name, v in zip(self.col_names, values, strict=True):
216 |             tooltip.append(f"{target_row}\n{col_name}:{v}")
217 |         return tooltip
218 | 


--------------------------------------------------------------------------------
/src/pycirclize/parser/matrix.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from collections import defaultdict
  4 | from pathlib import Path
  5 | 
  6 | import pandas as pd
  7 | 
  8 | 
  9 | class Matrix:
 10 |     """Matrix Parser Class"""
 11 | 
 12 |     def __init__(
 13 |         self,
 14 |         matrix: str | Path | pd.DataFrame,
 15 |         *,
 16 |         delimiter: str = "\t",
 17 |     ) -> None:
 18 |         """
 19 |         Parameters
 20 |         ----------
 21 |         matrix : str | Path | pd.DataFrame
 22 |             Matrix file or Matrix DataFrame
 23 |         delimiter : str, optional
 24 |             Matrix file delimiter. By default, `tab` delimiter.
 25 |         """
 26 |         # If input matrix is file path, convert to pandas dataframe
 27 |         if isinstance(matrix, (str, Path)):
 28 |             matrix = pd.read_csv(matrix, delimiter=delimiter, index_col=0)
 29 | 
 30 |         # Calculate data size & link positions
 31 |         rev_matrix = matrix.iloc[::-1, ::-1]
 32 |         name2size, links = defaultdict(float), []
 33 |         for row_name, row in zip(rev_matrix.index, rev_matrix.values, strict=True):
 34 |             for col_name, value in zip(rev_matrix.columns, row, strict=True):
 35 |                 if value <= 0:
 36 |                     continue
 37 |                 row_size, col_size = name2size[row_name], name2size[col_name]
 38 |                 if row_name == col_name:
 39 |                     link_row = (row_name, row_size, row_size + value)
 40 |                     link_col = (col_name, col_size + (value * 2), col_size + value)
 41 |                 else:
 42 |                     link_row = (row_name, row_size, row_size + value)
 43 |                     link_col = (col_name, col_size + value, col_size)
 44 |                 links.append((link_row, link_col))
 45 |                 name2size[row_name] += value
 46 |                 name2size[col_name] += value
 47 | 
 48 |         self._matrix = matrix
 49 |         self._col_names = list(map(str, matrix.columns))
 50 |         self._row_names = list(map(str, matrix.index))
 51 |         self._links = links
 52 |         self._name2size = name2size
 53 | 
 54 |     @staticmethod
 55 |     def parse_fromto_table(
 56 |         fromto_table: str | Path | pd.DataFrame,
 57 |         *,
 58 |         order: str | list[str] | None = None,
 59 |         delimiter: str = "\t",
 60 |         header: bool = True,
 61 |     ) -> Matrix:
 62 |         """Parse from-to table and convert to Matrix
 63 | 
 64 |         ```
 65 |         From-to Table Example
 66 |         # from  to  value
 67 |         #    A   B     10
 68 |         #    A   C      5
 69 |         #    A   D     15
 70 |         #    B   D      8
 71 |         #    C   D      6
 72 |         ```
 73 | 
 74 |         Parameters
 75 |         ----------
 76 |         fromto_table : str | Path | pd.DataFrame
 77 |             From-to table file or DataFrame
 78 |         order : str | list[str] | None, optional
 79 |             Sort order of matrix for plotting Chord Diagram. If `None`, no sorting.
 80 |             If `asc`|`desc`, sort in ascending(or descending) order by node size.
 81 |             If node name list is set, sort in user specified node order.
 82 |         delimiter : str, optional
 83 |             From-to table delimiter
 84 |         header : bool, optional
 85 |             If True, from-to table file first line is parsed as header line.
 86 | 
 87 |         Returns
 88 |         -------
 89 |         matrix : Matrix
 90 |             Matrix converted from from-to table
 91 |         """
 92 |         # If input from-to table is file path, convert to pandas dataframe
 93 |         if isinstance(fromto_table, (str, Path)):
 94 |             fromto_table = pd.read_csv(
 95 |                 fromto_table,
 96 |                 delimiter=delimiter,
 97 |                 header=0 if header else None,
 98 |             )
 99 | 
100 |         # Parse from-to table dataframe
101 |         label2value_sum = defaultdict(int)
102 |         fromto2value = defaultdict(int)
103 |         for row in fromto_table.itertuples():
104 |             from_label, to_label, value = str(row[1]), str(row[2]), row[3]
105 |             if float(value) >= 0:
106 |                 fromto = f"{from_label}-->{to_label}"
107 |                 fromto2value[fromto] = value
108 |                 label2value_sum[from_label] += value
109 |                 label2value_sum[to_label] += value
110 |         all_labels = list(map(str, label2value_sum.keys()))
111 | 
112 |         # Set user specified label order
113 |         if isinstance(order, (list, tuple)):
114 |             if set(all_labels) == set(order):
115 |                 all_labels = order
116 |             else:
117 |                 raise ValueError(f"'order' is not match 'all_labels' in from-to table.\n{order=}\n{all_labels=}")  # fmt: skip  # noqa: E501
118 |         elif order in ("asc", "desc"):
119 |             items = label2value_sum.items()
120 |             sorted_items = sorted(items, key=lambda v: v[1], reverse=order == "desc")
121 |             all_labels = [item[0] for item in sorted_items]
122 |         elif order is not None:
123 |             raise ValueError(f"{order=} is invalid (list[str]|`asc`|`desc`).")
124 | 
125 |         # Convert from-to table to matrix
126 |         matrix_data = []
127 |         for row_label in all_labels:
128 |             row_data = []
129 |             for col_label in all_labels:
130 |                 row_data.append(fromto2value[f"{row_label}-->{col_label}"])
131 |             matrix_data.append(row_data)
132 |         matrix_df = pd.DataFrame(matrix_data, index=all_labels, columns=all_labels)
133 | 
134 |         return Matrix(matrix_df)
135 | 
136 |     @property
137 |     def all_names(self) -> list[str]:
138 |         """Row + Column all names"""
139 |         return list(self.to_sectors().keys())
140 | 
141 |     @property
142 |     def col_names(self) -> list[str]:
143 |         """Column names"""
144 |         return self._col_names
145 | 
146 |     @property
147 |     def row_names(self) -> list[str]:
148 |         """Row names"""
149 |         return self._row_names
150 | 
151 |     @property
152 |     def dataframe(self) -> pd.DataFrame:
153 |         """Matrix dataframe"""
154 |         return self._matrix
155 | 
156 |     def sort(self, order: str | list[str] = "asc") -> Matrix:
157 |         """Sort order of matrix
158 | 
159 |         Parameters
160 |         ----------
161 |         order : str | list[str], optional
162 |             Sort order of matrix for plotting Chord Diagram.
163 |             If `asc`|`desc`, sort in ascending(or descending) order by node size.
164 |             If node name list is set, sort in user specified node order.
165 | 
166 |         Returns
167 |         -------
168 |         matrix : Matrix
169 |             Sorted matrix
170 |         """
171 |         fromto_table = self.to_fromto_table()
172 |         return self.parse_fromto_table(fromto_table, order=order)
173 | 
174 |     def to_sectors(self) -> dict[str, float]:
175 |         """Convert matrix to sectors for Circos initialization
176 | 
177 |         >>> # Example usage
178 |         >>> matrix = Matrix(matrix_file)
179 |         >>> circos = Circos(matrix.to_sectors())
180 | 
181 |         Returns
182 |         -------
183 |         sectors : dict[str, float]
184 |             Sector dict (e.g. `{"A": 12, "B": 15, "C":20, ...}`)
185 |         """
186 |         sectors = {}
187 |         for row_name in self.row_names:
188 |             sectors[row_name] = self._name2size[row_name]
189 |         for col_name in self.col_names:
190 |             sectors[col_name] = self._name2size[col_name]
191 |         return sectors
192 | 
193 |     def to_links(
194 |         self,
195 |     ) -> list[tuple[tuple[str, float, float], tuple[str, float, float]]]:
196 |         """Convert matrix to links data for `circos.link()` method
197 | 
198 |         Returns
199 |         -------
200 |         link_target1 : tuple[str, float, float]
201 |             name1, start1, end1
202 |         link_target2 : tuple[str, float, float]
203 |             name2, start2, end2
204 | 
205 |         Examples
206 |         --------
207 |         >>> matrix = Matrix(matrix_file)
208 |         >>> circos = Circos(matrix.to_sectors())
209 |         >>> for link in matrix.to_links():
210 |         >>>    circos.link(*link)
211 |         """
212 |         return self._links
213 | 
214 |     def to_fromto_table(self) -> pd.DataFrame:
215 |         """Convert matrix to from-to table dataframe
216 | 
217 |         Returns
218 |         -------
219 |         fromto_table : pd.DataFrame
220 |             From-to table dataframe
221 |         """
222 |         fromto_table_data = []
223 |         for row_name in self.row_names:
224 |             for col_name in self.col_names:
225 |                 value = self.dataframe[col_name][row_name]
226 |                 if value > 0:
227 |                     fromto_table_data.append([row_name, col_name, value])
228 |         return pd.DataFrame(fromto_table_data, columns=["from", "to", "value"])
229 | 
230 |     def __str__(self) -> str:
231 |         return str(self.dataframe)
232 | 


--------------------------------------------------------------------------------
/src/pycirclize/utils/dataset.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import csv
  4 | from dataclasses import dataclass
  5 | from io import StringIO, TextIOWrapper
  6 | from pathlib import Path
  7 | from urllib.request import urlretrieve
  8 | 
  9 | from Bio import Entrez
 10 | 
 11 | from pycirclize import config
 12 | 
 13 | 
 14 | def load_prokaryote_example_file(
 15 |     filename: str,
 16 |     cache_dir: str | Path | None = None,
 17 |     overwrite_cache: bool = False,
 18 | ) -> Path:
 19 |     """Load pycirclize example Genbank or GFF file
 20 | 
 21 |     Load example file from <https://github.com/moshi4/pycirclize-data/>
 22 |     and cache file in local directory (Default: `~/.cache/pycirclize/`).
 23 | 
 24 |     List of example Genbank or GFF filename
 25 | 
 26 |     - `enterobacteria_phage.[gbk|gff]`
 27 |     - `mycoplasma_alvi.[gbk|gff]`
 28 |     - `escherichia_coli.[gbk|gff].gz`
 29 | 
 30 |     Parameters
 31 |     ----------
 32 |     filename : str
 33 |         Genbank or GFF filename (e.g. `enterobacteria_phage.gff`)
 34 |     cache_dir : str | Path | None, optional
 35 |         Output cache directory (Default: `~/.cache/pycirclize/`)
 36 |     overwrite_cache : bool, optional
 37 |         If True, overwrite cache file.
 38 |         Assumed to be used when cache file is corrupt.
 39 | 
 40 |     Returns
 41 |     -------
 42 |     file_path : Path
 43 |         Genbank or GFF file
 44 |     """
 45 |     # Check specified filename exists or not
 46 |     if filename not in config.PROKARYOTE_FILES:
 47 |         raise ValueError(f"{filename=} not found.")
 48 | 
 49 |     # Cache local directory
 50 |     if cache_dir is None:
 51 |         package_name = __name__.split(".")[0]
 52 |         cache_base_dir = Path.home() / ".cache" / package_name
 53 |         cache_dir = cache_base_dir / "prokaryote"
 54 |         cache_dir.mkdir(parents=True, exist_ok=True)
 55 |     else:
 56 |         cache_dir = Path(cache_dir)
 57 |         if not cache_dir.exists():
 58 |             raise ValueError(f"{cache_dir=} not exists.")
 59 | 
 60 |     # Download file
 61 |     file_url = config.GITHUB_DATA_URL + f"prokaryote/{filename}"
 62 |     file_path = cache_dir / filename
 63 |     if overwrite_cache or not file_path.exists():
 64 |         urlretrieve(file_url, file_path)
 65 | 
 66 |     return file_path
 67 | 
 68 | 
 69 | def load_eukaryote_example_dataset(
 70 |     name: str = "hg38",
 71 |     cache_dir: str | Path | None = None,
 72 |     overwrite_cache: bool = False,
 73 | ) -> tuple[Path, Path, list[ChrLink]]:
 74 |     """Load pycirclize eukaryote example dataset
 75 | 
 76 |     Load example file from <https://github.com/moshi4/pycirclize-data/>
 77 |     and cache file in local directory (Default: `~/.cache/pycirclize/`).
 78 | 
 79 |     List of dataset contents (download from UCSC)
 80 | 
 81 |     1. Chromosome BED file (e.g. `chr1 0 248956422`)
 82 |     2. Cytoband file (e.g. `chr1 0 2300000 p36.33 gneg`)
 83 |     3. Chromosome links (e.g. `chr1 1000 4321 chr3 8000 5600`)
 84 | 
 85 |     Parameters
 86 |     ----------
 87 |     name : str, optional
 88 |         Dataset name (`hg38`|`hs1`|`mm10`|`mm39`)
 89 |     cache_dir : str | Path | None, optional
 90 |         Output cache directory (Default: `~/.cache/pycirclize/`)
 91 |     overwrite_cache : bool
 92 |         If True, overwrite cache dataset.
 93 |         Assumed to be used when cache dataset is corrupt.
 94 | 
 95 |     Returns
 96 |     -------
 97 |     chr_bed_file : Path
 98 |         BED file
 99 |     cytoband_file : Path
100 |         Cytoband file
101 |     chr_links : list[ChrLink]
102 |         Chromosome links
103 |     """
104 |     # Check specified name dataset exists or not
105 |     if name not in config.EUKARYOTE_DATASET:
106 |         available_dataset = list(config.EUKARYOTE_DATASET.keys())
107 |         raise ValueError(f"{name=} dataset not found.\n{available_dataset=}")
108 | 
109 |     # Dataset cache local directory
110 |     if cache_dir is None:
111 |         package_name = __name__.split(".")[0]
112 |         cache_base_dir = Path.home() / ".cache" / package_name
113 |         cache_dir = cache_base_dir / "eukaryote" / name
114 |         cache_dir.mkdir(parents=True, exist_ok=True)
115 |     else:
116 |         cache_dir = Path(cache_dir)
117 |         if not cache_dir.exists():
118 |             raise ValueError(f"{cache_dir=} not exists.")
119 | 
120 |     # Download & cache dataset
121 |     eukaryote_files: list[Path] = []
122 |     chr_links: list[ChrLink] = []
123 |     for filename in config.EUKARYOTE_DATASET[name]:
124 |         file_url = config.GITHUB_DATA_URL + f"eukaryote/{name}/{filename}"
125 |         file_path = cache_dir / filename
126 |         if overwrite_cache or not file_path.exists():
127 |             urlretrieve(file_url, file_path)
128 |         if str(file_path).endswith("link.tsv"):
129 |             chr_links = ChrLink.load(file_path)
130 |         else:
131 |             eukaryote_files.append(file_path)
132 | 
133 |     return eukaryote_files[0], eukaryote_files[1], chr_links
134 | 
135 | 
136 | def load_example_image_file(filename: str) -> Path:
137 |     """Load example image file from local package data
138 | 
139 |     e.g. `python_logo.png`
140 | 
141 |     Parameters
142 |     ----------
143 |     filename : str
144 |         Image file name
145 | 
146 |     Returns
147 |     -------
148 |     image_file_path : Path
149 |         Image file path
150 |     """
151 |     image_dir = Path(__file__).parent / "example_data" / "images"
152 |     image_filenames = [f.name for f in image_dir.glob("*.png")]
153 | 
154 |     if filename.lower() in image_filenames:
155 |         return image_dir / filename.lower()
156 |     else:
157 |         raise FileNotFoundError(f"{filename=} is not found.\nAvailable filenames = {image_filenames}")  # fmt: skip  # noqa: E501
158 | 
159 | 
160 | def load_example_tree_file(filename: str) -> Path:
161 |     """Load example phylogenetic tree file
162 | 
163 |     List of example tree filename
164 | 
165 |     - `small_example.nwk` (7 species)
166 |     - `medium_example.nwk` (21 species)
167 |     - `large_example.nwk` (190 species)
168 |     - `alphabet.nwk` (26 species)
169 | 
170 |     Parameters
171 |     ----------
172 |     filename : str
173 |         Target filename
174 | 
175 |     Returns
176 |     -------
177 |     tree_file : Path
178 |         Tree file (Newick format)
179 |     """
180 |     example_data_dir = Path(__file__).parent / "example_data" / "trees"
181 |     example_files = example_data_dir.glob("*.nwk")
182 |     available_filenames = [f.name for f in example_files]
183 |     if filename not in available_filenames:
184 |         raise FileNotFoundError(f"{filename=} is invalid.\n{available_filenames=}")
185 |     target_file = example_data_dir / filename
186 |     return target_file
187 | 
188 | 
189 | def fetch_genbank_by_accid(
190 |     accid: str,
191 |     gbk_outfile: str | Path | None = None,
192 |     email: str | None = None,
193 | ) -> StringIO:
194 |     """Fetch genbank text by `Accession ID`
195 | 
196 |     Parameters
197 |     ----------
198 |     accid : str
199 |         Accession ID
200 |     gbk_outfile : str | Path | None, optional
201 |         If file path is set, write fetch data to file
202 |     email : str | None, optional
203 |         Email address to notify download limitation (Required for bulk download)
204 | 
205 |     Returns
206 |     -------
207 |     gbk_str_io : StringIO
208 |         Genbank data
209 | 
210 |     Examples
211 |     --------
212 |     >>> gbk_fetch_data = fetch_genbank_by_accid("NC_002483")
213 |     >>> gbk = Genbank(gbk_fetch_data)
214 |     """
215 |     Entrez.email = "" if email is None else email
216 |     gbk_fetch_data: TextIOWrapper = Entrez.efetch(
217 |         db="nucleotide",
218 |         id=accid,
219 |         rettype="gbwithparts",
220 |         retmode="text",
221 |     )
222 |     gbk_text = gbk_fetch_data.read()
223 |     if gbk_outfile is not None:
224 |         with open(gbk_outfile, "w", encoding="utf-8") as f:
225 |             f.write(gbk_text)
226 | 
227 |     return StringIO(gbk_text)
228 | 
229 | 
230 | @dataclass
231 | class ChrLink:
232 |     """Chromosome Link DataClass"""
233 | 
234 |     query_chr: str
235 |     query_start: int
236 |     query_end: int
237 |     ref_chr: str
238 |     ref_start: int
239 |     ref_end: int
240 | 
241 |     @staticmethod
242 |     def load(chr_link_file: str | Path) -> list[ChrLink]:
243 |         """Load chromosome link file
244 | 
245 |         Parameters
246 |         ----------
247 |         chr_link_file : str | Path
248 |             Chromosome link file
249 | 
250 |         Returns
251 |         -------
252 |         chr_link_list : list[ChrLink]
253 |             Chromosome link list
254 |         """
255 |         chr_link_list = []
256 |         with open(chr_link_file, encoding="utf-8") as f:
257 |             reader = csv.reader(f, delimiter="\t")
258 |             for row in reader:
259 |                 qchr, qstart, qend = row[0], int(row[1]), int(row[2])
260 |                 rchr, rstart, rend = row[3], int(row[4]), int(row[5])
261 |                 chr_link_list.append(ChrLink(qchr, qstart, qend, rchr, rstart, rend))
262 |         return chr_link_list
263 | 


--------------------------------------------------------------------------------
/src/pycirclize/tooltip.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import textwrap
  4 | import uuid
  5 | from typing import TYPE_CHECKING, Any
  6 | 
  7 | import numpy as np
  8 | from matplotlib.colors import is_color_like, to_rgb
  9 | from matplotlib.patches import Patch, Rectangle
 10 | 
 11 | from pycirclize import config
 12 | from pycirclize.utils.plot import degrees, is_lower_loc, is_right_loc, select_textcolor
 13 | 
 14 | if TYPE_CHECKING:
 15 |     from collections.abc import Sequence
 16 | 
 17 |     from Bio.Phylo.BaseTree import Clade
 18 |     from Bio.SeqFeature import SeqFeature
 19 |     from matplotlib.backend_bases import MouseEvent
 20 |     from matplotlib.collections import Collection
 21 |     from matplotlib.projections import PolarAxes
 22 | 
 23 |     from pycirclize.parser.bed import BedRecord
 24 | 
 25 | 
 26 | def gen_gid(prefix: str | None = None) -> str:
 27 |     """Generate Group ID for tooltip
 28 | 
 29 |     Generate unique id using `uuid.uuid4()` function
 30 | 
 31 |     Parameters
 32 |     ----------
 33 |     prefix : str | None, optional
 34 |         Group ID prefix
 35 | 
 36 |     Returns
 37 |     -------
 38 |     gid : str
 39 |         Group ID
 40 |     """
 41 |     gid = uuid.uuid4().hex
 42 |     return f"{prefix}-{gid}" if prefix else gid
 43 | 
 44 | 
 45 | def to_feature_tooltip(feature: SeqFeature) -> str:
 46 |     """Convert feature to tooltip text"""
 47 |     start, end = int(feature.location.start), int(feature.location.end)  # type: ignore
 48 |     strand = "-" if feature.location.strand == -1 else "+"
 49 |     type = feature.type
 50 |     gene = str(feature.qualifiers.get("gene", ["na"])[0])
 51 |     product = str(feature.qualifiers.get("product", ["na"])[0])
 52 |     product = "\n".join(textwrap.wrap(product, width=40))
 53 |     return "\n".join(
 54 |         [
 55 |             f"location: {start:,} - {end:,} ({strand})",
 56 |             f"length: {end - start:,}",
 57 |             f"type: {type}",
 58 |             f"gene: {gene}",
 59 |             f"product: {product}",
 60 |         ]
 61 |     )
 62 | 
 63 | 
 64 | def to_link_tooltip(
 65 |     sector_region1: tuple[str, float, float],
 66 |     sector_region2: tuple[str, float, float],
 67 |     direction: int,
 68 | ) -> str:
 69 |     """Convert link data to tooltip text"""
 70 |     name1, start1, end1 = sector_region1
 71 |     size1 = abs(end1 - start1)
 72 |     name2, start2, end2 = sector_region2
 73 |     size2 = abs(end2 - start2)
 74 |     direction2symbol = {0: "-", 1: "-->", -1: "<--", 2: "<-->"}
 75 |     return textwrap.dedent(
 76 |         f"""
 77 |         Source
 78 |           {name1}: {size1:,}
 79 |         Target
 80 |           {name2}: {size2:,}
 81 |         Relation
 82 |           {name1} {direction2symbol[direction]} {name2}
 83 |         """
 84 |     )[1:-1]
 85 | 
 86 | 
 87 | def to_node_tooltip(node: Clade) -> str:
 88 |     """Convert tree node to tooltip text"""
 89 |     tooltip = f"Node: {node.name}\nLength: {node.branch_length}"
 90 |     if node.confidence is not None:
 91 |         tooltip += f"\nBootstrap: {node.confidence}"
 92 |     return tooltip
 93 | 
 94 | 
 95 | def to_cytoband_tooltip(rec: BedRecord) -> str:
 96 |     """Convert cytoband bed record to tooltip text"""
 97 |     return textwrap.dedent(
 98 |         f"""
 99 |         Cytoband: {rec.name} ({rec.chr})
100 |         Start: {rec.start:,}
101 |         End: {rec.end:,}
102 |         """
103 |     )[1:-1]
104 | 
105 | 
106 | def set_patch_tooltip(
107 |     ax: PolarAxes,
108 |     patches: Sequence[Patch],
109 |     gid2tooltip: dict[str, str],
110 | ) -> None:
111 |     """Set patch tooltip annotation
112 | 
113 |     Parameters
114 |     ----------
115 |     ax : PolarAxes
116 |         Target polar axes
117 |     patches : Sequence[Patch]
118 |         List of patches
119 |     gid2tooltip : dict[str, str]
120 |         Patch group id & tooltip dict
121 | 
122 |     Note
123 |     ----
124 |         To display tooltips, it is necessary to first identify the patch
125 |         at the mouse position. Since it is a heavy load to check all
126 |         the patches against the mouse position every time, the processing
127 |         speed can be improved by dividing the patches into segments in advance
128 |         by a certain radian range.
129 |     """
130 |     gid = gen_gid("patch-tooltip")
131 |     # Hover target patches
132 |     target_patches = [p for p in patches if p.get_gid()]
133 |     seg_patches_list = _segmentize_patches(target_patches)
134 | 
135 |     def hover(e: MouseEvent) -> None:
136 |         # Remove previous tooltip annotation if exists
137 |         for ann in ax.texts:
138 |             if ann.get_gid() == gid:
139 |                 ann.remove()
140 | 
141 |         # Check axes is target or not
142 |         if e.inaxes != ax:
143 |             return
144 | 
145 |         # Get hover patch
146 |         hover_patch: Patch | None = None
147 |         for patch in seg_patches_list[_get_mouse_segment_idx(e)]:
148 |             contains, _ = patch.contains(e)
149 |             if contains:
150 |                 hover_patch = patch
151 |                 break
152 | 
153 |         # Plot tooltip annotation for hover patch
154 |         if hover_patch is not None:
155 |             tooltip = gid2tooltip.get(str(hover_patch.get_gid()))
156 |             if tooltip is None:
157 |                 return
158 |             color = to_rgb(hover_patch.get_facecolor())
159 |             ax.annotate(**_build_tooltip_ann_kwargs(tooltip, gid, e, color, ax))
160 | 
161 |     # Set hover event handler
162 |     fig = ax.get_figure()
163 |     fig.canvas.mpl_connect("motion_notify_event", hover)  # type: ignore
164 |     for key, value in dict(
165 |         toolbar_visible=False,
166 |         header_visible=False,
167 |         footer_visible=False,
168 |         resizable=True,
169 |     ).items():
170 |         if hasattr(fig.canvas, key):
171 |             setattr(fig.canvas, key, value)
172 | 
173 | 
174 | SEG_NUM = 360
175 | SEG_DEG = 1
176 | 
177 | 
178 | def _segmentize_patches(patches: list[Patch]) -> list[list[Patch]]:
179 |     """Segmentize patches by patch radian position"""
180 |     seg_patches_list: list[list[Patch]] = [[] for _ in range(SEG_NUM)]
181 |     for p in patches:
182 |         rads = np.array(p.get_path().vertices)[:, 0]
183 |         if isinstance(p, Rectangle):  # For track.bar() Rectangle Patch
184 |             degs = [degrees(p.xy[0]), degrees(p.xy[0] + p.get_width())]
185 |         else:
186 |             degs = list(map(degrees, rads))
187 |         degs = np.append(np.arange(min(degs), max(degs), 0.01), max(degs))
188 |         indices = set(map(lambda v: int(v // SEG_DEG), degs))
189 |         indices = filter(lambda idx: idx if idx < SEG_NUM else SEG_NUM - 1, indices)
190 |         for idx in indices:
191 |             seg_patches_list[idx].append(p)
192 |     return seg_patches_list
193 | 
194 | 
195 | def _get_mouse_segment_idx(e: MouseEvent) -> int:
196 |     """Get segment index by mouse radian position"""
197 |     deg = degrees(float(e.xdata))  # type: ignore
198 |     return int(deg // SEG_DEG)
199 | 
200 | 
201 | def set_collection_tooltip(ax: PolarAxes, coll: Collection, labels: list[str]) -> None:
202 |     """Set collection tooltip annotation
203 | 
204 |     Parameters
205 |     ----------
206 |     ax : PolarAxes
207 |         Target polar axes
208 |     coll : Collection
209 |         Plot collection
210 |     labels : list[str]
211 |         Tooltip labels
212 |     """
213 |     gid = gen_gid("collection-tooltip")
214 | 
215 |     def hover(e: MouseEvent) -> None:
216 |         # Remove previous tooltip annotation if exists
217 |         for ann in ax.texts:
218 |             if ann.get_gid() == gid:
219 |                 ann.remove()
220 | 
221 |         # Check axes is target or not
222 |         if e.inaxes != ax:
223 |             return
224 | 
225 |         # Get hover target
226 |         contains, ctx = coll.contains(e)
227 |         label, color = None, None
228 |         if contains:
229 |             hover_coll_idx = ctx["ind"][0]
230 |             label = labels[hover_coll_idx]
231 |             color = coll.get_facecolor()
232 |             if is_color_like(color):
233 |                 color = to_rgb(color)  # type: ignore
234 |             else:
235 |                 color = to_rgb(color[hover_coll_idx])
236 | 
237 |         if label is not None and color is not None:
238 |             ax.annotate(**_build_tooltip_ann_kwargs(label, gid, e, color, ax))
239 | 
240 |     # Set hover event handler
241 |     fig = ax.get_figure()
242 |     fig.canvas.mpl_connect("motion_notify_event", hover)  # type: ignore
243 | 
244 | 
245 | def _build_tooltip_ann_kwargs(
246 |     text: str,
247 |     gid: str,
248 |     e: MouseEvent,
249 |     fc: tuple[float, float, float],
250 |     ax: PolarAxes,
251 | ) -> dict[str, Any]:
252 |     """Build tooltip annotation keyword arguments"""
253 |     tooltip_fc = fc if config.tooltip.fc is None else to_rgb(config.tooltip.fc)
254 |     tooltip_textcolor = select_textcolor(tooltip_fc)
255 |     rad, _ = float(e.xdata), float(e.ydata)  # type: ignore
256 |     xtext = -config.tooltip.margin if is_right_loc(rad) else config.tooltip.margin
257 |     return dict(
258 |         text=text,
259 |         xy=(e.x, e.y),
260 |         xytext=(xtext, 0),
261 |         xycoords=ax.transScale,
262 |         textcoords="offset pixels",
263 |         size=config.tooltip.fontsize,
264 |         gid=gid,
265 |         ha="right" if is_right_loc(rad) else "left",
266 |         va="bottom" if is_lower_loc(rad) else "top",
267 |         ma="left",
268 |         color=tooltip_textcolor,
269 |         zorder=999,
270 |         bbox=dict(
271 |             fc=tooltip_fc,
272 |             ec=tooltip_textcolor,
273 |             lw=config.tooltip.lw,
274 |             boxstyle=config.tooltip.boxstyle,
275 |         ),
276 |     )
277 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # pyCirclize: Circular visualization in Python
  2 | 
  3 | ![Python3](https://img.shields.io/badge/Language-Python3-steelblue)
  4 | ![OS](https://img.shields.io/badge/OS-_Windows_|_Mac_|_Linux-steelblue)
  5 | ![License](https://img.shields.io/badge/License-MIT-steelblue)
  6 | [![Latest PyPI version](https://img.shields.io/pypi/v/pycirclize.svg)](https://pypi.python.org/pypi/pycirclize)
  7 | [![conda-forge](https://img.shields.io/conda/vn/conda-forge/pycirclize.svg?color=green)](https://anaconda.org/conda-forge/pycirclize)
  8 | [![CI](https://github.com/moshi4/pyCirclize/actions/workflows/ci.yml/badge.svg)](https://github.com/moshi4/pyCirclize/actions/workflows/ci.yml)
  9 | 
 10 | ## Table of contents
 11 | 
 12 | - [Overview](#overview)
 13 | - [Installation](#installation)
 14 | - [API Usage](#api-usage)
 15 | - [Code Example](#code-example)
 16 | - [Tooltip Option](#tooltip-option)
 17 | - [Star History](#star-history)
 18 | 
 19 | ## Overview
 20 | 
 21 | pyCirclize is a circular visualization python package implemented based on matplotlib.
 22 | This package is developed for the purpose of easily and beautifully plotting circular figure such as Circos Plot and Chord Diagram in Python.
 23 | In addition, useful genome and phylogenetic tree visualization methods for the bioinformatics field are also implemented.
 24 | pyCirclize was inspired by [circlize](https://github.com/jokergoo/circlize) and [pyCircos](https://github.com/ponnhide/pyCircos).
 25 | More detailed documentation is available [here](https://moshi4.github.io/pyCirclize/).
 26 | 
 27 | ![pyCirclize_gallery.png](https://raw.githubusercontent.com/moshi4/pyCirclize/main/docs/images/pyCirclize_gallery.png)  
 28 | **Fig.1 pyCirclize example plot gallery**
 29 | 
 30 | ## Installation
 31 | 
 32 | `Python 3.10 or later` is required for installation.
 33 | 
 34 | **Install PyPI package:**
 35 | 
 36 |     pip install pycirclize
 37 | 
 38 | **Install conda-forge package:**
 39 | 
 40 |     conda install -c conda-forge pycirclize
 41 | 
 42 | ## API Usage
 43 | 
 44 | API usage is described in each of the following sections in the [document](https://moshi4.github.io/pyCirclize/).
 45 | 
 46 | - [Getting Started](https://moshi4.github.io/pyCirclize/getting_started/)
 47 | - [Plot API Example](https://moshi4.github.io/pyCirclize/plot_api_example/)
 48 | - [Chord Diagram](https://moshi4.github.io/pyCirclize/chord_diagram/)
 49 | - [Radar Chart](https://moshi4.github.io/pyCirclize/radar_chart/)
 50 | - [Circos Plot (Genomics)](https://moshi4.github.io/pyCirclize/circos_plot/)
 51 | - [Comparative Genomics](https://moshi4.github.io/pyCirclize/comparative_genomics/)
 52 | - [Phylogenetic Tree](https://moshi4.github.io/pyCirclize/phylogenetic_tree/)
 53 | - [Plot Tips](https://moshi4.github.io/pyCirclize/plot_tips/)
 54 | 
 55 | ## Code Example
 56 | 
 57 | ### 1. Circos Plot
 58 | 
 59 | ```python
 60 | from pycirclize import Circos
 61 | import numpy as np
 62 | np.random.seed(0)
 63 | 
 64 | # Initialize Circos sectors
 65 | sectors = {"A": 10, "B": 15, "C": 12, "D": 20, "E": 15}
 66 | circos = Circos(sectors, space=5)
 67 | 
 68 | for sector in circos.sectors:
 69 |     # Plot sector name
 70 |     sector.text(f"Sector: {sector.name}", r=110, size=15)
 71 |     # Create x positions & random y values
 72 |     x = np.arange(sector.start, sector.end) + 0.5
 73 |     y = np.random.randint(0, 100, len(x))
 74 |     # Plot lines
 75 |     track1 = sector.add_track((80, 100), r_pad_ratio=0.1)
 76 |     track1.xticks_by_interval(interval=1)
 77 |     track1.axis()
 78 |     track1.line(x, y)
 79 |     # Plot points 
 80 |     track2 = sector.add_track((55, 75), r_pad_ratio=0.1)
 81 |     track2.axis()
 82 |     track2.scatter(x, y)
 83 |     # Plot bars
 84 |     track3 = sector.add_track((30, 50), r_pad_ratio=0.1)
 85 |     track3.axis()
 86 |     track3.bar(x, y)
 87 | 
 88 | # Plot links 
 89 | circos.link(("A", 0, 3), ("B", 15, 12))
 90 | circos.link(("B", 0, 3), ("C", 7, 11), color="skyblue")
 91 | circos.link(("C", 2, 5), ("E", 15, 12), color="chocolate", direction=1)
 92 | circos.link(("D", 3, 5), ("D", 18, 15), color="lime", ec="black", lw=0.5, hatch="//", direction=2)
 93 | circos.link(("D", 8, 10), ("E", 2, 8), color="violet", ec="red", lw=1.0, ls="dashed")
 94 | 
 95 | circos.savefig("example01.png")
 96 | ```
 97 | 
 98 | ![example01.png](https://raw.githubusercontent.com/moshi4/pyCirclize/main/docs/images/example01.png)  
 99 | 
100 | ### 2. Circos Plot (Genomics)
101 | 
102 | ```python
103 | from pycirclize import Circos
104 | from pycirclize.utils import fetch_genbank_by_accid
105 | from pycirclize.parser import Genbank
106 | 
107 | # Download `NC_002483` E.coli plasmid genbank
108 | gbk_fetch_data = fetch_genbank_by_accid("NC_002483")
109 | gbk = Genbank(gbk_fetch_data)
110 | 
111 | # Initialize Circos instance with genome size
112 | sectors = gbk.get_seqid2size()
113 | space = 0 if len(sectors) == 1 else 2
114 | circos = Circos(sectors, space=space)
115 | circos.text(f"Escherichia coli K-12 plasmid F\n\n{gbk.name}", size=14)
116 | 
117 | seqid2features = gbk.get_seqid2features(feature_type="CDS")
118 | for sector in circos.sectors:
119 |     # Setup track for features plot
120 |     f_cds_track = sector.add_track((95, 100))
121 |     f_cds_track.axis(fc="lightgrey", ec="none", alpha=0.5)
122 |     r_cds_track = sector.add_track((90, 95))
123 |     r_cds_track.axis(fc="lightgrey", ec="none", alpha=0.5)
124 |     # Plot forward/reverse strand CDS
125 |     features = seqid2features[sector.name]
126 |     for feature in features:
127 |         if feature.location.strand == 1:
128 |             f_cds_track.genomic_features(feature, plotstyle="arrow", fc="salmon", lw=0.5)
129 |         else:
130 |             r_cds_track.genomic_features(feature, plotstyle="arrow", fc="skyblue", lw=0.5)
131 | 
132 |     # Plot 'gene' qualifier label if exists
133 |     labels, label_pos_list = [], []
134 |     for feature in features:
135 |         start = int(feature.location.start)
136 |         end = int(feature.location.end)
137 |         label_pos = (start + end) / 2
138 |         gene_name = feature.qualifiers.get("gene", [None])[0]
139 |         if gene_name is not None:
140 |             labels.append(gene_name)
141 |             label_pos_list.append(label_pos)
142 |             f_cds_track.annotate(label_pos, gene_name, label_size=6)
143 | 
144 |     # Plot xticks (interval = 10 Kb)
145 |     r_cds_track.xticks_by_interval(
146 |         10000, outer=False, label_formatter=lambda v: f"{v/1000:.1f} Kb"
147 |     )
148 | 
149 | circos.savefig("example02.png")
150 | ```
151 | 
152 | ![example02.png](https://raw.githubusercontent.com/moshi4/pyCirclize/main/docs/images/example02.png)  
153 | 
154 | ### 3. Chord Diagram
155 | 
156 | ```python
157 | from pycirclize import Circos
158 | import pandas as pd
159 | 
160 | # Create matrix dataframe (3 x 6)
161 | row_names = ["F1", "F2", "F3"]
162 | col_names = ["T1", "T2", "T3", "T4", "T5", "T6"]
163 | matrix_data = [
164 |     [10, 16, 7, 7, 10, 8],
165 |     [4, 9, 10, 12, 12, 7],
166 |     [17, 13, 7, 4, 20, 4],
167 | ]
168 | matrix_df = pd.DataFrame(matrix_data, index=row_names, columns=col_names)
169 | 
170 | # Initialize Circos instance for chord diagram plot
171 | circos = Circos.chord_diagram(
172 |     matrix_df,
173 |     space=5,
174 |     cmap="tab10",
175 |     label_kws=dict(size=12),
176 |     link_kws=dict(ec="black", lw=0.5, direction=1),
177 | )
178 | 
179 | circos.savefig("example03.png")
180 | ```
181 | 
182 | ![example03.png](https://raw.githubusercontent.com/moshi4/pyCirclize/main/docs/images/example03.png)  
183 | 
184 | ### 4. Phylogenetic Tree
185 | 
186 | ```python
187 | from pycirclize import Circos
188 | from pycirclize.utils import load_example_tree_file, ColorCycler
189 | from matplotlib.lines import Line2D
190 | 
191 | # Initialize Circos from phylogenetic tree
192 | tree_file = load_example_tree_file("large_example.nwk")
193 | circos, tv = Circos.initialize_from_tree(
194 |     tree_file,
195 |     r_lim=(30, 100),
196 |     leaf_label_size=5,
197 |     line_kws=dict(color="lightgrey", lw=1.0),
198 | )
199 | 
200 | # Define group-species dict for tree annotation
201 | # In this example, set minimum species list to specify group's MRCA node
202 | group_name2species_list = dict(
203 |     Monotremata=["Tachyglossus_aculeatus", "Ornithorhynchus_anatinus"],
204 |     Marsupialia=["Monodelphis_domestica", "Vombatus_ursinus"],
205 |     Xenarthra=["Choloepus_didactylus", "Dasypus_novemcinctus"],
206 |     Afrotheria=["Trichechus_manatus", "Chrysochloris_asiatica"],
207 |     Euarchontes=["Galeopterus_variegatus", "Theropithecus_gelada"],
208 |     Glires=["Oryctolagus_cuniculus", "Microtus_oregoni"],
209 |     Laurasiatheria=["Talpa_occidentalis", "Mirounga_leonina"],
210 | )
211 | 
212 | # Set tree line color & label color
213 | ColorCycler.set_cmap("tab10")
214 | group_name2color = {name: ColorCycler() for name in group_name2species_list.keys()}
215 | for group_name, species_list in group_name2species_list.items():
216 |     color = group_name2color[group_name]
217 |     tv.set_node_line_props(species_list, color=color, apply_label_color=True)
218 | 
219 | # Plot figure & set legend on center
220 | fig = circos.plotfig()
221 | _ = circos.ax.legend(
222 |     handles=[Line2D([], [], label=n, color=c) for n, c in group_name2color.items()],
223 |     labelcolor=group_name2color.values(),
224 |     fontsize=6,
225 |     loc="center",
226 |     bbox_to_anchor=(0.5, 0.5),
227 | )
228 | fig.savefig("example04.png")
229 | ```
230 | 
231 | ![example04.png](https://raw.githubusercontent.com/moshi4/pyCirclize/main/docs/images/example04.png)  
232 | 
233 | ### 5. Radar Chart
234 | 
235 | ```python
236 | from pycirclize import Circos
237 | import pandas as pd
238 | 
239 | # Create RPG jobs parameter dataframe (3 jobs, 7 parameters)
240 | df = pd.DataFrame(
241 |     data=[
242 |         [80, 80, 80, 80, 80, 80, 80],
243 |         [90, 20, 95, 95, 30, 30, 80],
244 |         [60, 90, 20, 20, 100, 90, 50],
245 |     ],
246 |     index=["Hero", "Warrior", "Wizard"],
247 |     columns=["HP", "MP", "ATK", "DEF", "SP.ATK", "SP.DEF", "SPD"],
248 | )
249 | 
250 | # Initialize Circos instance for radar chart plot
251 | circos = Circos.radar_chart(
252 |     df,
253 |     vmax=100,
254 |     marker_size=6,
255 |     grid_interval_ratio=0.2,
256 | )
257 | 
258 | # Plot figure & set legend on upper right
259 | fig = circos.plotfig()
260 | _ = circos.ax.legend(loc="upper right", fontsize=10)
261 | fig.savefig("example05.png")
262 | ```
263 | 
264 | ![example05.png](https://raw.githubusercontent.com/moshi4/pyCirclize/main/docs/images/example05.png)  
265 | 
266 | ## Tooltip Option
267 | 
268 | pyCirclize supports tooltip display in jupyter using [ipympl](https://github.com/matplotlib/ipympl).
269 | To enable tooltip, install pycirclize with ipympl and call `circos.plotfig(tooltip=True)` method.
270 | Tooltip option is tested on jupyter notebooks in VScode and JupyterLab.
271 | 
272 | ```shell
273 | pip install pycirclize[tooltip]
274 | # or
275 | conda install -c conda-forge pycirclize ipympl
276 | ```
277 | 
278 | > [!WARNING]
279 | > Interactive tooltip plots require live python kernel.
280 | > Be aware that tooltips are not permanently enabled in the notebook after plotting.
281 | 
282 | ![pyCirclize_tooltip.gif](https://raw.githubusercontent.com/moshi4/pyCirclize/main/docs/images/pyCirclize_tooltip.gif)  
283 | 
284 | ## Star History
285 | 
286 | [![Star History Chart](https://api.star-history.com/svg?repos=moshi4/pyCirclize&type=Date)](https://star-history.com/#moshi4/pyCirclize&Date)
287 | 


--------------------------------------------------------------------------------
/src/pycirclize/parser/genbank.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import bz2
  4 | import gzip
  5 | import warnings
  6 | import zipfile
  7 | from collections import defaultdict
  8 | from io import StringIO, TextIOWrapper
  9 | from pathlib import Path
 10 | from typing import TYPE_CHECKING
 11 | 
 12 | import numpy as np
 13 | from Bio import SeqIO, SeqUtils
 14 | from Bio.SeqFeature import Seq, SeqFeature, SimpleLocation
 15 | from Bio.SeqRecord import SeqRecord
 16 | 
 17 | if TYPE_CHECKING:
 18 |     from numpy.typing import NDArray
 19 | 
 20 | 
 21 | class Genbank:
 22 |     """Genbank Parser Class"""
 23 | 
 24 |     def __init__(
 25 |         self,
 26 |         gbk_source: str | Path | StringIO | TextIOWrapper | list[SeqRecord],
 27 |         *,
 28 |         name: str | None = None,
 29 |         min_range: None = None,
 30 |         max_range: None = None,
 31 |     ) -> None:
 32 |         """
 33 |         Parameters
 34 |         ----------
 35 |         gbk_source : str | Path | StringIO | TextIOWrapper | list[SeqRecord]
 36 |             Genbank file or source
 37 |             (`*.gz`, `*.bz2`, `*.zip` compressed file can be readable)
 38 |         name : str | None, optional
 39 |             name (If None, `file name` or `record name` is set)
 40 |         min_range : None, optional
 41 |             No longer used. Left for backward compatibility.
 42 |         max_range : None, optional
 43 |             No longer used. Left for backward compatibility.
 44 |         """
 45 |         self._gbk_source = gbk_source
 46 |         if isinstance(gbk_source, (str, Path, StringIO, TextIOWrapper)):
 47 |             self._records = self._parse_gbk_source(gbk_source)
 48 |         else:
 49 |             self._records = gbk_source
 50 | 
 51 |         # Set genbank name
 52 |         if name is not None:
 53 |             self._name = name
 54 |         elif isinstance(self._gbk_source, (str, Path)):
 55 |             gbk_file = Path(self._gbk_source)
 56 |             if gbk_file.suffix in (".gz", ".bz2", ".zip"):
 57 |                 self._name = gbk_file.with_suffix("").with_suffix("").name
 58 |             else:
 59 |                 self._name = gbk_file.with_suffix("").name
 60 |         elif isinstance(self._gbk_source, (StringIO, TextIOWrapper)):
 61 |             self._name = self._records[0].name
 62 |         else:
 63 |             raise ValueError("Failed to get genbank name.")
 64 | 
 65 |         if min_range or max_range:
 66 |             warnings.warn(
 67 |                 "min_range & max_range is no longer used in Genbank parser.",
 68 |                 stacklevel=2,
 69 |             )
 70 | 
 71 |         if len(self.records) == 0:
 72 |             raise ValueError(f"Failed to parse {gbk_source} as Genbank file.")
 73 | 
 74 |     ############################################################
 75 |     # Property
 76 |     ############################################################
 77 | 
 78 |     @property
 79 |     def name(self) -> str:
 80 |         """Name"""
 81 |         return self._name
 82 | 
 83 |     @property
 84 |     def records(self) -> list[SeqRecord]:
 85 |         """Genbank records"""
 86 |         return self._records
 87 | 
 88 |     @property
 89 |     def genome_seq(self) -> str:
 90 |         """Genome sequence (only first record)"""
 91 |         return str(self.records[0].seq)
 92 | 
 93 |     @property
 94 |     def genome_length(self) -> int:
 95 |         """Genome length (only first record)"""
 96 |         return len(self.genome_seq)
 97 | 
 98 |     @property
 99 |     def range_size(self) -> int:
100 |         """Same as `self.genome_length` (Left for backward compatibility)"""
101 |         return self.genome_length
102 | 
103 |     @property
104 |     def full_genome_seq(self) -> str:
105 |         """Full genome sequence (concatenate all records)"""
106 |         return "".join(str(r.seq) for r in self.records)
107 | 
108 |     @property
109 |     def full_genome_length(self) -> int:
110 |         """Full genome length (concatenate all records)"""
111 |         return len(self.full_genome_seq)
112 | 
113 |     ############################################################
114 |     # Public Method
115 |     ############################################################
116 | 
117 |     def calc_genome_gc_content(self, seq: str | None = None) -> float:
118 |         """Calculate genome GC content
119 | 
120 |         Parameters
121 |         ----------
122 |         seq : str | None, optional
123 |             Sequence for GC content calculation (Default: `self.genome_seq`)
124 | 
125 |         Returns
126 |         -------
127 |         gc_content : float
128 |             GC content
129 |         """
130 |         seq = self.genome_seq if seq is None else seq
131 |         gc_content = SeqUtils.gc_fraction(seq) * 100
132 |         return gc_content
133 | 
134 |     def calc_gc_skew(
135 |         self,
136 |         window_size: int | None = None,
137 |         step_size: int | None = None,
138 |         *,
139 |         seq: str | None = None,
140 |     ) -> tuple[NDArray[np.int64], NDArray[np.float64]]:
141 |         """Calculate GC skew in sliding window
142 | 
143 |         Parameters
144 |         ----------
145 |         window_size : int | None, optional
146 |             Window size (Default: `genome_size / 500`)
147 |         step_size : int | None, optional
148 |             Step size (Default: `genome_size / 1000`)
149 |         seq : str | None, optional
150 |             Sequence for GCskew calculation (Default: `self.genome_seq`)
151 | 
152 |         Returns
153 |         -------
154 |         pos_list : NDArray[np.int64]
155 |             Position list
156 |         gc_skew_list : NDArray[np.float64]
157 |             GC skew list
158 |         """
159 |         pos_list, gc_skew_list = [], []
160 |         seq = self.genome_seq if seq is None else seq
161 |         if window_size is None:
162 |             window_size = int(len(seq) / 500)
163 |         if step_size is None:
164 |             step_size = int(len(seq) / 1000)
165 |         if window_size == 0 or step_size == 0:
166 |             window_size, step_size = len(seq), int(len(seq) / 2)
167 |         pos_list = [*list(range(0, len(seq), step_size)), len(seq)]
168 |         for pos in pos_list:
169 |             window_start_pos = pos - int(window_size / 2)
170 |             window_end_pos = pos + int(window_size / 2)
171 |             window_start_pos = 0 if window_start_pos < 0 else window_start_pos
172 |             window_end_pos = len(seq) if window_end_pos > len(seq) else window_end_pos
173 | 
174 |             subseq = seq[window_start_pos:window_end_pos]
175 |             g = subseq.count("G") + subseq.count("g")
176 |             c = subseq.count("C") + subseq.count("c")
177 |             try:
178 |                 skew = (g - c) / float(g + c)
179 |             except ZeroDivisionError:
180 |                 skew = 0.0
181 |             gc_skew_list.append(skew)
182 | 
183 |         pos_list = np.array(pos_list).astype(np.int64)
184 |         gc_skew_list = np.array(gc_skew_list).astype(np.float64)
185 | 
186 |         return pos_list, gc_skew_list
187 | 
188 |     def calc_gc_content(
189 |         self,
190 |         window_size: int | None = None,
191 |         step_size: int | None = None,
192 |         *,
193 |         seq: str | None = None,
194 |     ) -> tuple[NDArray[np.int64], NDArray[np.float64]]:
195 |         """Calculate GC content in sliding window
196 | 
197 |         Parameters
198 |         ----------
199 |         window_size : int | None, optional
200 |             Window size (Default: `genome_size / 500`)
201 |         step_size : int | None, optional
202 |             Step size (Default: `genome_size / 1000`)
203 |         seq : str | None, optional
204 |             Sequence for GC content calculation (Default: `self.genome_seq`)
205 | 
206 |         Returns
207 |         -------
208 |         pos_list : NDArray[np.int64]
209 |             Position list
210 |         gc_content_list : NDArray[np.float64]
211 |             GC content list
212 |         """
213 |         pos_list, gc_content_list = [], []
214 |         seq = self.genome_seq if seq is None else seq
215 |         if window_size is None:
216 |             window_size = int(len(seq) / 500)
217 |         if step_size is None:
218 |             step_size = int(len(seq) / 1000)
219 |         if window_size == 0 or step_size == 0:
220 |             window_size, step_size = len(seq), int(len(seq) / 2)
221 |         pos_list = [*list(range(0, len(seq), step_size)), len(seq)]
222 |         for pos in pos_list:
223 |             window_start_pos = pos - int(window_size / 2)
224 |             window_end_pos = pos + int(window_size / 2)
225 |             window_start_pos = 0 if window_start_pos < 0 else window_start_pos
226 |             window_end_pos = len(seq) if window_end_pos > len(seq) else window_end_pos
227 | 
228 |             subseq = seq[window_start_pos:window_end_pos]
229 |             gc_content = SeqUtils.gc_fraction(subseq) * 100
230 |             gc_content_list.append(gc_content)
231 | 
232 |         pos_list = np.array(pos_list).astype(np.int64)
233 |         gc_content_list = np.array(gc_content_list).astype(np.float64)
234 | 
235 |         return pos_list, gc_content_list
236 | 
237 |     def get_seqid2seq(self) -> dict[str, str]:
238 |         """Get seqid & complete/contig/scaffold genome sequence dict
239 | 
240 |         Returns
241 |         -------
242 |         seqid2seq : dict[str, str]
243 |             seqid & genome sequence dict
244 |         """
245 |         return {str(rec.id): str(rec.seq) for rec in self.records}
246 | 
247 |     def get_seqid2size(self) -> dict[str, int]:
248 |         """Get seqid & complete/contig/scaffold genome size dict
249 | 
250 |         Returns
251 |         -------
252 |         seqid2size : dict[str, int]
253 |             seqid & genome size dict
254 |         """
255 |         return {seqid: len(seq) for seqid, seq in self.get_seqid2seq().items()}
256 | 
257 |     def get_seqid2features(
258 |         self,
259 |         feature_type: str | list[str] | None = "CDS",
260 |         target_strand: int | None = None,
261 |     ) -> dict[str, list[SeqFeature]]:
262 |         """Get seqid & features in target seqid genome dict
263 | 
264 |         Parameters
265 |         ----------
266 |         feature_type : str | list[str] | None, optional
267 |             Feature type (`CDS`, `gene`, `mRNA`, etc...)
268 |             If None, extract regardless of feature type.
269 |         target_strand : int | None, optional
270 |             Extract target strand. If None, extract regardless of strand.
271 | 
272 |         Returns
273 |         -------
274 |         seqid2features : dict[str, list[SeqFeature]]
275 |             seqid & features dict
276 |         """
277 |         if isinstance(feature_type, str):
278 |             feature_type = [feature_type]
279 | 
280 |         seqid2features = defaultdict(list)
281 |         for rec in self.records:
282 |             feature: SeqFeature
283 |             for feature in rec.features:
284 |                 # Ignore feature if parsing of location fails
285 |                 if feature.location is None:
286 |                     continue
287 |                 # Filter feature by type & strand
288 |                 strand = feature.location.strand
289 |                 if feature_type is not None and feature.type not in feature_type:
290 |                     continue
291 |                 if target_strand is not None and strand != target_strand:
292 |                     continue
293 |                 # Exclude feature which straddle genome start position
294 |                 if self._is_straddle_feature(feature):
295 |                     continue
296 |                 start = int(feature.location.start)  # type: ignore
297 |                 end = int(feature.location.end)  # type: ignore
298 |                 seqid2features[rec.id].append(
299 |                     SeqFeature(
300 |                         location=SimpleLocation(start, end, strand),
301 |                         type=feature.type,
302 |                         qualifiers=feature.qualifiers,
303 |                     ),
304 |                 )
305 |         return seqid2features
306 | 
307 |     def extract_features(
308 |         self,
309 |         feature_type: str | list[str] | None = "CDS",
310 |         *,
311 |         target_strand: int | None = None,
312 |         target_range: tuple[int, int] | None = None,
313 |     ) -> list[SeqFeature]:
314 |         """Extract features (only first record)
315 | 
316 |         Parameters
317 |         ----------
318 |         feature_type : str | list[str] | None, optional
319 |             Feature type (`CDS`, `gene`, `mRNA`, etc...)
320 |             If None, extract regardless of feature type.
321 |         target_strand : int | None, optional
322 |             Extract target strand. If None, extract regardless of strand.
323 |         target_range : tuple[int, int] | None, optional
324 |             Extract target range. If None, extract regardless of range.
325 | 
326 |         Returns
327 |         -------
328 |         features : list[SeqFeature]
329 |             Extracted features
330 |         """
331 |         seqid2features = self.get_seqid2features(feature_type, target_strand)
332 |         first_record_features = next(iter(seqid2features.values()))
333 |         if target_range:
334 |             target_features = []
335 |             for feature in first_record_features:
336 |                 start = int(feature.location.start)  # type: ignore
337 |                 end = int(feature.location.end)  # type: ignore
338 |                 if min(target_range) <= start <= end <= max(target_range):
339 |                     target_features.append(feature)
340 |             return target_features
341 |         else:
342 |             return first_record_features
343 | 
344 |     def write_cds_fasta(self, outfile: str | Path) -> None:
345 |         """Write CDS fasta file
346 | 
347 |         Parameters
348 |         ----------
349 |         outfile : str | Path
350 |             Output CDS fasta file
351 |         """
352 |         cds_records: list[SeqRecord] = []
353 |         counter = 0
354 |         seqid2cds_features = self.get_seqid2features(feature_type="CDS")
355 |         for seqid, cds_features in seqid2cds_features.items():
356 |             for cds_feature in cds_features:
357 |                 # Ignore no translation feature
358 |                 translation = cds_feature.qualifiers.get("translation", [None])[0]
359 |                 if translation is None:
360 |                     continue
361 |                 # Get feature location
362 |                 start = int(cds_feature.location.start)  # type: ignore
363 |                 end = int(cds_feature.location.end)  # type: ignore
364 |                 strand = -1 if cds_feature.location.strand == -1 else 1
365 |                 # Set feature id
366 |                 location_id = f"|{seqid}|{start}_{end}_{strand}|"
367 |                 protein_id = cds_feature.qualifiers.get("protein_id", [None])[0]
368 |                 if protein_id is None:
369 |                     feature_id = f"GENE{counter:06d}{location_id}"
370 |                 else:
371 |                     feature_id = f"GENE{counter:06d}_{protein_id}{location_id}"
372 |                 counter += 1
373 |                 # Add SeqRecord of CDS feature
374 |                 seq = Seq(translation)
375 |                 product = cds_feature.qualifiers.get("product", [""])[0]
376 |                 seq_record = SeqRecord(seq, feature_id, description=product)
377 |                 cds_records.append(seq_record)
378 |         # Write CDS file
379 |         SeqIO.write(cds_records, outfile, "fasta-2line")
380 | 
381 |     def write_genome_fasta(self, outfile: str | Path) -> None:
382 |         """Write genome fasta file
383 | 
384 |         Parameters
385 |         ----------
386 |         outfile : str | Path
387 |             Output genome fasta file
388 |         """
389 |         with open(outfile, "w", encoding="utf-8") as f:
390 |             for seqid, seq in self.get_seqid2seq().items():
391 |                 f.write(f">{seqid}\n{seq}\n")
392 | 
393 |     ############################################################
394 |     # Private Method
395 |     ############################################################
396 | 
397 |     def _parse_gbk_source(
398 |         self, gbk_source: str | Path | StringIO | TextIOWrapper
399 |     ) -> list[SeqRecord]:
400 |         """Parse genbank source
401 | 
402 |         Parameters
403 |         ----------
404 |         gbk_source : str | Path | StringIO | TextIOWrapper
405 |             Genbank file or source
406 | 
407 |         Returns
408 |         -------
409 |         list[SeqRecord]
410 |             Genbank SeqRecords
411 |         """
412 |         # Parse file
413 |         if isinstance(gbk_source, (str, Path)):
414 |             if Path(gbk_source).suffix == ".gz":
415 |                 with gzip.open(gbk_source, mode="rt", encoding="utf-8") as f:
416 |                     return list(SeqIO.parse(f, "genbank"))
417 |             elif Path(gbk_source).suffix == ".bz2":
418 |                 with bz2.open(gbk_source, mode="rt", encoding="utf-8") as f:
419 |                     return list(SeqIO.parse(f, "genbank"))
420 |             elif Path(gbk_source).suffix == ".zip":
421 |                 with zipfile.ZipFile(gbk_source) as z, z.open(z.namelist()[0]) as f:
422 |                     io = TextIOWrapper(f, encoding="utf-8")
423 |                     return list(SeqIO.parse(io, "genbank"))
424 |             else:
425 |                 with open(gbk_source, encoding="utf-8") as f:
426 |                     return list(SeqIO.parse(f, "genbank"))
427 |         # Parse TextIOWrapper
428 |         return list(SeqIO.parse(gbk_source, "genbank"))
429 | 
430 |     def _is_straddle_feature(self, feature: SeqFeature) -> bool:
431 |         """Check target feature straddle genome start position or not
432 | 
433 |         Parameters
434 |         ----------
435 |         feature : SeqFeature
436 |             Target feature
437 | 
438 |         Returns
439 |         -------
440 |         result : bool
441 |             Check result
442 |         """
443 |         strand = feature.location.strand
444 |         if strand == -1:
445 |             start = int(feature.location.parts[-1].start)  # type: ignore
446 |             end = int(feature.location.parts[0].end)  # type: ignore
447 |         else:
448 |             start = int(feature.location.parts[0].start)  # type: ignore
449 |             end = int(feature.location.parts[-1].end)  # type: ignore
450 |         return start > end
451 | 
452 |     def __str__(self) -> str:
453 |         text = f"{self.name}: {len(self.records)} records\n"
454 |         for num, (seqid, size) in enumerate(self.get_seqid2size().items(), 1):
455 |             text += f"{num:02d}. {seqid} ({size:,} bp)\n"
456 |         return text
457 | 


--------------------------------------------------------------------------------
/src/pycirclize/patches.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import math
  4 | 
  5 | import numpy as np
  6 | from matplotlib.patches import PathPatch
  7 | from matplotlib.path import Path
  8 | 
  9 | from pycirclize import config
 10 | 
 11 | 
 12 | class Line(PathPatch):
 13 |     """Linear Line Patch"""
 14 | 
 15 |     def __init__(
 16 |         self,
 17 |         rad_lim: tuple[float, float],
 18 |         r_lim: tuple[float, float],
 19 |         **kwargs,
 20 |     ) -> None:
 21 |         """
 22 |         Parameters
 23 |         ----------
 24 |         rad_lim : tuple[float, float]
 25 |             Radian limit region
 26 |         r_lim : tuple[float, float]
 27 |             Radius limit region
 28 |         **kwargs : dict, optional
 29 |             Patch properties (e.g. `ec="red", lw=1.0, ...`)
 30 |             <https://matplotlib.org/stable/api/_as_gen/matplotlib.patches.Patch.html>
 31 |         """
 32 |         # Default params: fc='none', color='black', linewidth=0.5
 33 |         kwargs.update(dict(fc="none"))
 34 |         if "ec" not in kwargs and "edgecolor" not in kwargs and "color" not in kwargs:
 35 |             kwargs.update(dict(ec="black"))
 36 |         if "lw" not in kwargs and "linewidth" not in kwargs:
 37 |             kwargs.update(dict(lw=0.5))
 38 | 
 39 |         # Set line path
 40 |         verts = list(zip(rad_lim, r_lim, strict=True))
 41 |         super().__init__(Path(verts), **kwargs)  # type: ignore
 42 | 
 43 | 
 44 | class ArcLine(PathPatch):
 45 |     """Arc Line Patch"""
 46 | 
 47 |     def __init__(
 48 |         self,
 49 |         rad_lim: tuple[float, float],
 50 |         r_lim: tuple[float, float],
 51 |         **kwargs,
 52 |     ) -> None:
 53 |         """
 54 |         Parameters
 55 |         ----------
 56 |         rad_lim : tuple[float, float]
 57 |             Radian limit region
 58 |         r_lim : tuple[float, float]
 59 |             Radius limit region
 60 |         **kwargs : dict, optional
 61 |             Patch properties (e.g. `ec="red", lw=1.0, ...`)
 62 |             <https://matplotlib.org/stable/api/_as_gen/matplotlib.patches.Patch.html>
 63 |         """
 64 |         # Default params: fc='none', color='black', linewidth=0.5
 65 |         kwargs.update(dict(fc="none"))
 66 |         if "ec" not in kwargs and "edgecolor" not in kwargs and "color" not in kwargs:
 67 |             kwargs.update(dict(ec="black"))
 68 |         if "lw" not in kwargs and "linewidth" not in kwargs:
 69 |             kwargs.update(dict(lw=0.5))
 70 | 
 71 |         # Calculate line path vertices
 72 |         if rad_lim[1] >= rad_lim[0]:
 73 |             rad_start, rad_end = rad_lim
 74 |             r_start, r_end = r_lim
 75 |         else:
 76 |             rad_start, rad_end = rad_lim[::-1]
 77 |             r_start, r_end = r_lim[::-1]
 78 |         if rad_start == rad_end:
 79 |             arc_rads = [rad_start, rad_end]
 80 |         else:
 81 |             step = config.ARC_RADIAN_STEP
 82 |             arc_rads = [*list(np.arange(rad_start, rad_end, step)), rad_end]
 83 |         arc_r_list = np.linspace(r_start, r_end, len(arc_rads), endpoint=True)
 84 | 
 85 |         # Set line path
 86 |         verts = list(zip(arc_rads, arc_r_list, strict=True))
 87 |         super().__init__(Path(verts), **kwargs)  # type: ignore
 88 | 
 89 | 
 90 | class ArcRectangle(PathPatch):
 91 |     """Arc Rectangle PathPatch"""
 92 | 
 93 |     def __init__(
 94 |         self,
 95 |         radr: tuple[float, float],
 96 |         width: float,
 97 |         height: float,
 98 |         **kwargs,
 99 |     ) -> None:
100 |         """
101 |         Parameters
102 |         ----------
103 |         radr : tuple[float, float]
104 |             Anchor point (rad=`radian`, r=`radius`)
105 |         width : float
106 |             Rectangle radian width
107 |         height : float
108 |             Rectangle radius height
109 |         **kwargs : dict, optional
110 |             Patch properties (e.g. `fc="red", ec="blue", lw=2.0, ...`)
111 |             <https://matplotlib.org/stable/api/_as_gen/matplotlib.patches.Patch.html>
112 |         """
113 |         if "lw" not in kwargs and "linewidth" not in kwargs:
114 |             kwargs.setdefault("lw", 0.0)
115 | 
116 |         min_rad, min_r = radr
117 |         max_rad, max_r = min_rad + width, min_r + height
118 |         arc_rads = np.arange(min_rad, max_rad, config.ARC_RADIAN_STEP)
119 |         arc_rads = np.append(arc_rads, max_rad)
120 | 
121 |         min_r_list = [min_r] * len(arc_rads)
122 |         bottom_arc_path = list(zip(arc_rads, min_r_list, strict=True))
123 | 
124 |         max_r_list = [max_r] * len(arc_rads)
125 |         upper_arc_path = list(zip(arc_rads[::-1], max_r_list, strict=True))
126 | 
127 |         arc_rect_path = Path(
128 |             bottom_arc_path + upper_arc_path + [bottom_arc_path[0]],
129 |             closed=True,
130 |         )
131 |         super().__init__(arc_rect_path, **kwargs)
132 | 
133 | 
134 | class ArcArrow(PathPatch):
135 |     """Arc Arrow PathPatch"""
136 | 
137 |     def __init__(
138 |         self,
139 |         rad: float,
140 |         r: float,
141 |         drad: float,
142 |         dr: float,
143 |         head_length: float = np.pi / 90,
144 |         shaft_ratio: float = 0.5,
145 |         **kwargs,
146 |     ) -> None:
147 |         """
148 |         Parameters
149 |         ----------
150 |         rad : float
151 |             Radian base coordinate
152 |         r : float
153 |             Radius base coordinate
154 |         drad : float
155 |             Radian size
156 |         dr : float
157 |             Radius size
158 |         head_length : float, optional
159 |             Arrow head length (Radian unit)
160 |         shaft_ratio : float, optional
161 |             Arrow shaft ratio (0 - 1.0)
162 |         **kwargs : dict, optional
163 |             Patch properties (e.g. `fc="red", ec="blue", lw=1.0, ...`)
164 |             <https://matplotlib.org/stable/api/_as_gen/matplotlib.patches.Patch.html>
165 |         """
166 |         if "lw" not in kwargs and "linewidth" not in kwargs:
167 |             kwargs.setdefault("lw", 0.0)
168 | 
169 |         # Set position parameters
170 |         shaft_size = dr * shaft_ratio
171 |         y_shaft_bottom = r + ((dr - shaft_size) / 2)
172 |         y_shaft_upper = r + dr - ((dr - shaft_size) / 2)
173 | 
174 |         is_forward = drad >= 0
175 |         drad = abs(drad)
176 |         head_length = min(head_length, drad)
177 |         if is_forward:
178 |             rad_shaft_tip = rad + (drad - head_length)
179 |             rad_arrow_tip = rad + drad
180 |         else:
181 |             rad_shaft_tip = rad - (drad - head_length)
182 |             rad_arrow_tip = rad - drad
183 | 
184 |         # ArcArrow vertex points
185 |         p1 = rad, y_shaft_bottom
186 |         p2 = rad_shaft_tip, y_shaft_bottom
187 |         p3 = rad_shaft_tip, r  # Arrow bottom tip point
188 |         p4 = rad_arrow_tip, (r + (r + dr)) / 2  # Arrow center tip point
189 |         p5 = rad_shaft_tip, r + dr  # Arrow upper tip point
190 |         p6 = rad_shaft_tip, y_shaft_upper
191 |         p7 = rad, y_shaft_upper
192 | 
193 |         # Create ArcArrow Path from vertex points
194 |         step = config.ARC_RADIAN_STEP if is_forward else -config.ARC_RADIAN_STEP
195 |         shaft_arc_rads = np.arange(p1[0], p2[0], step)
196 |         bottom_shaft_r_list = [p1[1]] * len(shaft_arc_rads)
197 |         upper_shaft_r_list = [p7[1]] * len(shaft_arc_rads)
198 |         bottom_shaft_arc_path = list(
199 |             zip(shaft_arc_rads, bottom_shaft_r_list, strict=True)
200 |         )
201 |         upper_shaft_arc_path = list(
202 |             zip(shaft_arc_rads[::-1], upper_shaft_r_list, strict=True)
203 |         )
204 |         arc_arrow_path = Path(
205 |             [*bottom_shaft_arc_path, p2, p3, p4, p5, p6, *upper_shaft_arc_path, p1],  # type: ignore
206 |             closed=True,
207 |         )
208 |         super().__init__(arc_arrow_path, **kwargs)
209 | 
210 | 
211 | class BezierCurveLink(PathPatch):
212 |     """Bezier Curve Link PathPatch"""
213 | 
214 |     def __init__(
215 |         self,
216 |         rad_start1: float,
217 |         rad_end1: float,
218 |         r1: float,
219 |         rad_start2: float,
220 |         rad_end2: float,
221 |         r2: float,
222 |         height_ratio: float = 0.5,
223 |         direction: int = 0,
224 |         arrow_length_ratio: float = 0.05,
225 |         **kwargs,
226 |     ) -> None:
227 |         """
228 |         Parameters
229 |         ----------
230 |         rad_start1 : float
231 |             Radian start1
232 |         rad_end1 : float
233 |             Radian end1
234 |         r1 : float
235 |             Radius position1
236 |         rad_start2 : float
237 |             Radian start2
238 |         rad_end2 : float
239 |             Radian end2
240 |         r2 : float
241 |             Radius position2
242 |         height_ratio : float, optional
243 |             Bezier curve height ratio parameter
244 |         direction : int, optional
245 |             `0`: Circular edge shape (Default)
246 |             `1`: Directional(1 -> 2) arrow edge shape
247 |             `-1`: Directional(1 <- 2) arrow edge shape
248 |             `2`: Bidirectional arrow edge shape
249 |         arrow_length_ratio : float, optional
250 |             Arrow length ratio.
251 |         **kwargs : dict, optional
252 |             Patch properties (e.g. `lw=1.0, hatch="//", ...`)
253 |             <https://matplotlib.org/stable/api/_as_gen/matplotlib.patches.Patch.html>
254 |         """
255 | 
256 |         def arc_paths(
257 |             rad1: float, rad2: float, r: float
258 |         ) -> list[tuple[np.uint8, tuple[float, float]]]:
259 |             # If rad1 == rad2, return blank list
260 |             arc_paths = []
261 |             step = config.ARC_RADIAN_STEP if rad1 <= rad2 else -config.ARC_RADIAN_STEP
262 |             for rad in np.arange(rad1, rad2, step):
263 |                 arc_paths.append((Path.LINETO, (rad, r)))
264 |             return arc_paths
265 | 
266 |         def arrow_paths(
267 |             rad1: float, rad2: float, r_side: float, r_top: float
268 |         ) -> list[tuple[np.uint8, tuple[float, float]]]:
269 |             return [
270 |                 (Path.LINETO, (rad1, r_side)),
271 |                 (Path.LINETO, ((rad1 + rad2) / 2, r_top)),
272 |                 (Path.LINETO, (rad2, r_side)),
273 |             ]
274 | 
275 |         def bezier_paths(
276 |             rad1: float, rad2: float, r1: float, r2: float, height_ratio: float = 0.5
277 |         ) -> list[tuple[np.uint8, tuple[float, float]]]:
278 |             if height_ratio >= 0.5:
279 |                 # Example1: height_ratio: 0.50 => r_ctl_pos: 0
280 |                 # Example2: height_ratio: 0.75 => r_ctl_pos: 25
281 |                 # Example3: height_ratio: 1.00 => r_ctl_pos: 50
282 |                 r_ctl_pos = config.MAX_R * (height_ratio - 0.5)
283 |                 rad_ctl_pos = (rad1 + rad2) / 2 + math.pi
284 |             else:
285 |                 # Example1: height_ratio: 0.25 => r_ctl_pos: 25
286 |                 # Example2: height_ratio: 0.00 => r_ctl_pos: 50
287 |                 r_ctl_pos = config.MAX_R * (0.5 - height_ratio)
288 |                 rad_ctl_pos = (rad1 + rad2) / 2
289 |             return [
290 |                 (Path.LINETO, (rad1, r1)),
291 |                 (Path.CURVE3, (rad_ctl_pos, r_ctl_pos)),
292 |                 (Path.LINETO, (rad2, r2)),
293 |             ]
294 | 
295 |         # Circos style plot order `start1 -> end1 -> end2 -> start2 -> start1`
296 |         # http://circos.ca/documentation/tutorials/links/twists/images
297 |         arrow_r1 = r1 * (1 - arrow_length_ratio)
298 |         arrow_r2 = r2 * (1 - arrow_length_ratio)
299 |         if direction == config.Direction.NONE:
300 |             path_data = [
301 |                 (Path.MOVETO, (rad_start1, r1)),
302 |                 *arc_paths(rad_start1, rad_end1, r1),
303 |                 (Path.LINETO, (rad_end1, r1)),
304 |                 *bezier_paths(rad_end1, rad_end2, r1, r2, height_ratio),
305 |                 (Path.LINETO, (rad_end2, r2)),
306 |                 *arc_paths(rad_end2, rad_start2, r2),
307 |                 (Path.LINETO, (rad_start2, r2)),
308 |                 *bezier_paths(rad_start2, rad_start1, r2, r1, height_ratio),
309 |                 (Path.CLOSEPOLY, (rad_start1, r1)),
310 |             ]
311 |         elif direction == config.Direction.FORWARD:
312 |             path_data = [
313 |                 (Path.MOVETO, (rad_start1, r1)),
314 |                 *arc_paths(rad_start1, rad_end1, r1),
315 |                 (Path.LINETO, (rad_end1, r1)),
316 |                 *bezier_paths(rad_end1, rad_end2, r1, arrow_r2, height_ratio),
317 |                 (Path.LINETO, (rad_end2, arrow_r2)),
318 |                 *arrow_paths(rad_end2, rad_start2, arrow_r2, r2),
319 |                 (Path.LINETO, (rad_start2, arrow_r2)),
320 |                 *bezier_paths(rad_start2, rad_start1, arrow_r2, r1, height_ratio),
321 |                 (Path.CLOSEPOLY, (rad_start1, r1)),
322 |             ]
323 |         elif direction == config.Direction.REVERSE:
324 |             path_data = [
325 |                 (Path.MOVETO, (rad_start1, arrow_r1)),
326 |                 *arrow_paths(rad_start1, rad_end1, arrow_r1, r1),
327 |                 (Path.LINETO, (rad_end1, arrow_r1)),
328 |                 *bezier_paths(rad_end1, rad_end2, arrow_r1, r2, height_ratio),
329 |                 (Path.LINETO, (rad_end2, r2)),
330 |                 *arc_paths(rad_end2, rad_start2, r2),
331 |                 (Path.LINETO, (rad_start2, r2)),
332 |                 *bezier_paths(rad_start2, rad_start1, r2, arrow_r1, height_ratio),
333 |                 (Path.CLOSEPOLY, (rad_start1, arrow_r1)),
334 |             ]
335 |         elif direction == config.Direction.BIDIRECTIONAL:
336 |             path_data = [
337 |                 (Path.MOVETO, (rad_start1, arrow_r1)),
338 |                 *arrow_paths(rad_start1, rad_end1, arrow_r1, r1),
339 |                 (Path.LINETO, (rad_end1, arrow_r1)),
340 |                 *bezier_paths(rad_end1, rad_end2, arrow_r1, arrow_r2, height_ratio),
341 |                 (Path.LINETO, (rad_end2, arrow_r2)),
342 |                 *arrow_paths(rad_end2, rad_start2, arrow_r2, r2),
343 |                 (Path.LINETO, (rad_start2, arrow_r2)),
344 |                 *bezier_paths(rad_start2, rad_start1, arrow_r2, arrow_r1, height_ratio),
345 |                 (Path.CLOSEPOLY, (rad_start1, arrow_r1)),
346 |             ]
347 |         else:
348 |             raise ValueError(f"{direction=} is invalid value (0 or 1 or -1 or 2).")
349 | 
350 |         verts, codes = [p[1] for p in path_data], [p[0] for p in path_data]
351 |         bezier_curve_path = Path(verts, codes, closed=True)  # type: ignore
352 |         super().__init__(bezier_curve_path, **kwargs)
353 | 
354 | 
355 | class BezierCurveLine(PathPatch):
356 |     """Bezier Curve Line PathPatch"""
357 | 
358 |     def __init__(
359 |         self,
360 |         rad1: float,
361 |         r1: float,
362 |         rad2: float,
363 |         r2: float,
364 |         height_ratio: float = 0.5,
365 |         direction: int = 0,
366 |         arrow_height: float = 3.0,
367 |         arrow_width: float = 1.0,
368 |         **kwargs,
369 |     ) -> None:
370 |         """
371 |         Parameters
372 |         ----------
373 |         rad1 : float
374 |             Radian position1
375 |         r1 : float
376 |             Radius position1
377 |         rad2 : float
378 |             Radian position2
379 |         r2 : float
380 |             Radius position2
381 |         height_ratio : float, optional
382 |             Bezier curve height ratio parameter
383 |         direction : int, optional
384 |             `0`: No edge shape (Default)
385 |             `1`: Directional(1 -> 2) arrow edge shape
386 |             `-1`: Directional(1 <- 2) arrow edge shape
387 |             `2`: Bidirectional arrow edge shape
388 |         arrow_height : float, optional
389 |             Arrow height size (Radius unit)
390 |         arrow_width : float, optional
391 |             Arrow width size (Degree unit)
392 |         **kwargs : dict, optional
393 |             Patch properties (e.g. `lw=1.0, hatch="//", ...`)
394 |             <https://matplotlib.org/stable/api/_as_gen/matplotlib.patches.Patch.html>
395 |         """
396 |         kwargs.update(fill=False)
397 |         kwargs.setdefault("lw", 0.5)
398 | 
399 |         def bezier_paths(
400 |             rad1: float,
401 |             rad2: float,
402 |             r1: float,
403 |             r2: float,
404 |             height_ratio: float = 0.5,
405 |         ) -> list[tuple[np.uint8, tuple[float, float]]]:
406 |             if height_ratio >= 0.5:
407 |                 # Example1: height_ratio: 0.50 => r_ctl_pos: 0
408 |                 # Example2: height_ratio: 0.75 => r_ctl_pos: 25
409 |                 # Example3: height_ratio: 1.00 => r_ctl_pos: 50
410 |                 r_ctl_pos = config.MAX_R * (height_ratio - 0.5)
411 |                 rad_ctl_pos = (rad1 + rad2) / 2 + math.pi
412 |             else:
413 |                 # Example1: height_ratio: 0.25 => r_ctl_pos: 25
414 |                 # Example2: height_ratio: 0.00 => r_ctl_pos: 50
415 |                 r_ctl_pos = config.MAX_R * (0.5 - height_ratio)
416 |                 rad_ctl_pos = (rad1 + rad2) / 2
417 |             return [
418 |                 (Path.LINETO, (rad1, r1)),
419 |                 (Path.CURVE3, (rad_ctl_pos, r_ctl_pos)),
420 |                 (Path.LINETO, (rad2, r2)),
421 |             ]
422 | 
423 |         def arrow_line_paths(
424 |             rad_pos: float,
425 |             r_pos: float,
426 |             arrow_rad_width: float,
427 |             arrow_r_height: float,
428 |         ) -> list[tuple[np.uint8, tuple[float, float]]]:
429 |             arrow_r_pos = r_pos - arrow_r_height
430 |             return [
431 |                 (Path.MOVETO, (rad_pos + (arrow_rad_width / 2), arrow_r_pos)),
432 |                 (Path.LINETO, (rad_pos, r_pos)),
433 |                 (Path.LINETO, (rad_pos - (arrow_rad_width / 2), arrow_r_pos)),
434 |             ]
435 | 
436 |         arrow_rad_width = np.radians(arrow_width)
437 |         path_data: list[tuple[np.uint8, tuple[float, float]]] = []
438 |         if direction in (config.Direction.REVERSE, config.Direction.BIDIRECTIONAL):
439 |             path_data.extend(arrow_line_paths(rad1, r1, arrow_rad_width, arrow_height))
440 |         path_data.append((Path.MOVETO, (rad1, r1)))
441 |         path_data.extend(bezier_paths(rad1, rad2, r1, r2, height_ratio))
442 |         path_data.append((Path.LINETO, (rad2, r2)))
443 |         if direction in (config.Direction.FORWARD, config.Direction.BIDIRECTIONAL):
444 |             path_data.extend(arrow_line_paths(rad2, r2, arrow_rad_width, arrow_height))
445 | 
446 |         verts, codes = [p[1] for p in path_data], [p[0] for p in path_data]
447 |         bezier_arrow_line_path = Path(verts, codes, closed=True)  # type: ignore
448 |         super().__init__(bezier_arrow_line_path, **kwargs)
449 | 


--------------------------------------------------------------------------------
/src/pycirclize/sector.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import math
  4 | import textwrap
  5 | import warnings
  6 | from collections.abc import Callable, Sequence
  7 | from copy import deepcopy
  8 | from typing import TYPE_CHECKING, Any
  9 | 
 10 | import numpy as np
 11 | from PIL import Image, ImageOps
 12 | 
 13 | from pycirclize import config, utils
 14 | from pycirclize.patches import ArcLine, ArcRectangle, Line
 15 | from pycirclize.tooltip import gen_gid
 16 | from pycirclize.track import Track
 17 | from pycirclize.utils.plot import get_label_params_by_rad
 18 | 
 19 | if TYPE_CHECKING:
 20 |     from pathlib import Path
 21 | 
 22 |     from matplotlib.patches import Patch
 23 |     from matplotlib.projections.polar import PolarAxes
 24 | 
 25 | 
 26 | class Sector:
 27 |     """Circos Sector Class"""
 28 | 
 29 |     def __init__(
 30 |         self,
 31 |         name: str,
 32 |         size: float | tuple[float, float],
 33 |         rad_lim: tuple[float, float],
 34 |         clockwise: bool = True,
 35 |     ) -> None:
 36 |         """
 37 |         Parameters
 38 |         ----------
 39 |         name : str
 40 |             Sector name
 41 |         size : float | tuple[float, float]
 42 |             Sector size (or range)
 43 |         rad_lim : tuple[float, float]
 44 |             Sector radian limit region
 45 |         clockwise : bool, optional
 46 |             Sector coordinate direction (clockwise or anti-clockwise).
 47 |         """
 48 |         self._name = name
 49 |         if isinstance(size, Sequence):
 50 |             start, end = size[0], size[1]
 51 |         else:
 52 |             start, end = 0, size
 53 |         self._start = start
 54 |         self._end = end
 55 |         self._size = end - start
 56 |         self._rad_lim = rad_lim
 57 |         self._clockwise = clockwise
 58 |         self._tracks: list[Track] = []
 59 | 
 60 |         # Plot data and functions
 61 |         self._patches: list[Patch] = []
 62 |         self._gid2tooltip: dict[str, str] = {}
 63 |         self._plot_funcs: list[Callable[[PolarAxes], None]] = []
 64 | 
 65 |     ############################################################
 66 |     # Property
 67 |     ############################################################
 68 | 
 69 |     @property
 70 |     def name(self) -> str:
 71 |         """Sector name"""
 72 |         return self._name
 73 | 
 74 |     @property
 75 |     def size(self) -> float:
 76 |         """Sector size (x coordinate)"""
 77 |         return self._size
 78 | 
 79 |     @property
 80 |     def start(self) -> float:
 81 |         """Sector start position (x coordinate)"""
 82 |         return self._start
 83 | 
 84 |     @property
 85 |     def end(self) -> float:
 86 |         """Sector end position (x coordinate)"""
 87 |         return self._end
 88 | 
 89 |     @property
 90 |     def center(self) -> float:
 91 |         """Sector center position (x coordinate)"""
 92 |         return (self.start + self.end) / 2
 93 | 
 94 |     @property
 95 |     def rad_size(self) -> float:
 96 |         """Sector radian size"""
 97 |         return max(self.rad_lim) - min(self.rad_lim)
 98 | 
 99 |     @property
100 |     def rad_lim(self) -> tuple[float, float]:
101 |         """Sector radian limit"""
102 |         return self._rad_lim
103 | 
104 |     @property
105 |     def deg_size(self) -> float:
106 |         """Sector degree size"""
107 |         return max(self.deg_lim) - min(self.deg_lim)
108 | 
109 |     @property
110 |     def deg_lim(self) -> tuple[float, float]:
111 |         """Sector degree limit"""
112 |         return (math.degrees(self.rad_lim[0]), math.degrees(self.rad_lim[1]))
113 | 
114 |     @property
115 |     def clockwise(self) -> bool:
116 |         """Sector coordinate direction"""
117 |         return self._clockwise
118 | 
119 |     @property
120 |     def tracks(self) -> list[Track]:
121 |         """Tracks in sector"""
122 |         return self._tracks
123 | 
124 |     @property
125 |     def patches(self) -> list[Patch]:
126 |         """Plot patches"""
127 |         return self._patches
128 | 
129 |     @property
130 |     def plot_funcs(self) -> list[Callable[[PolarAxes], None]]:
131 |         """Plot functions"""
132 |         return self._plot_funcs
133 | 
134 |     ############################################################
135 |     # Public Method
136 |     ############################################################
137 | 
138 |     def add_track(
139 |         self,
140 |         r_lim: tuple[float, float],
141 |         *,
142 |         r_pad_ratio: float = 0,
143 |         name: str | None = None,
144 |     ) -> Track:
145 |         """Add track to sector
146 | 
147 |         Parameters
148 |         ----------
149 |         r_lim : tuple[float, float]
150 |             Radius limit region (0 - 100)
151 |         r_pad_ratio : float
152 |             Radius padding ratio for plot data (0 - 1.0)
153 |         name : str | None, optional
154 |             Track name. If None, `Track{track_idx}` is set.
155 | 
156 |         Returns
157 |         -------
158 |         track : Track
159 |             Track
160 |         """
161 |         name = f"Track{len(self.tracks) + 1:02d}" if name is None else name
162 |         if name in [t.name for t in self.tracks]:
163 |             raise ValueError(f"{name=} track is already exists.")
164 |         if not 0 <= min(r_lim) <= max(r_lim) <= 100:
165 |             warnings.warn(
166 |                 f"{r_lim=} is unexpected plot range (0 <= r <= 100).",
167 |                 stacklevel=2,
168 |             )
169 |         track = Track(name, r_lim, r_pad_ratio, self)
170 |         self._tracks.append(track)
171 |         return track
172 | 
173 |     def get_track(self, name: str) -> Track:
174 |         """Get track by name
175 | 
176 |         Parameters
177 |         ----------
178 |         name : str
179 |             Track name
180 | 
181 |         Returns
182 |         -------
183 |         track : Track
184 |             Target name track
185 |         """
186 |         name2track = {t.name: t for t in self.tracks}
187 |         if name not in name2track:
188 |             raise ValueError(f"{name=} track not exists.")
189 |         return name2track[name]
190 | 
191 |     def get_lowest_r(self) -> float:
192 |         """Get lowest radius position of sector from tracks data
193 | 
194 |         Returns
195 |         -------
196 |         lowest_r : float
197 |             Lowest radius position. If no tracks found, `lowest_r=100`.
198 |         """
199 |         if len(self.tracks) == 0:
200 |             return config.MAX_R
201 |         return min([min(t.r_lim) for t in self.tracks])
202 | 
203 |     def x_to_rad(self, x: float, ignore_range_error: bool = False) -> float:
204 |         """Convert x coordinate to radian in sector start-end range
205 | 
206 |         Parameters
207 |         ----------
208 |         x : float
209 |             X coordinate
210 |         ignore_range_error : bool
211 |             Ignore x coordinate range error
212 | 
213 |         Returns
214 |         -------
215 |         rad : float
216 |             Radian coordinate
217 |         """
218 |         # Check target x is in valid sector range
219 |         if not ignore_range_error:
220 |             # Apply relative torelance value to sector range to avoid
221 |             # unexpected invalid range error due to rounding errors (Issue #27, #67)
222 |             min_range = self.start - config.EPSILON
223 |             max_range = self.end + config.EPSILON
224 |             if not min_range <= x <= max_range:
225 |                 raise ValueError(f"{x=} is invalid range of '{self.name}' sector.\n{self}")  # fmt: skip  # noqa: E501
226 | 
227 |         if not self.clockwise:
228 |             x = (self.start + self.end) - x
229 |         size_ratio = self.rad_size / self.size if self.size != 0 else 0
230 |         x_from_start = x - self.start
231 |         rad_from_start = x_from_start * size_ratio
232 |         rad = min(self.rad_lim) + rad_from_start
233 |         return rad
234 | 
235 |     def axis(self, **kwargs) -> None:
236 |         """Plot axis
237 | 
238 |         By default, simple black axis params(`fc="none", ec="black", lw=0.5`) are set.
239 | 
240 |         Parameters
241 |         ----------
242 |         **kwargs : dict, optional
243 |             Patch properties (e.g. `fc="red", ec="blue", lw=0.5, ...`)
244 |             <https://matplotlib.org/stable/api/_as_gen/matplotlib.patches.Patch.html>
245 |         """
246 |         # Set default params
247 |         kwargs = utils.plot.set_axis_default_kwargs(**kwargs)
248 | 
249 |         # Axis facecolor placed behind other patches (zorder=0.99)
250 |         fc_behind_kwargs = {**kwargs, **config.AXIS_FACE_PARAM}
251 |         self.rect(self.start, self.end, config.R_LIM, **fc_behind_kwargs)
252 | 
253 |         # Axis edgecolor placed in front of other patches (zorder=1.01)
254 |         ec_front_kwargs = {**kwargs, **config.AXIS_EDGE_PARAM}
255 |         self.rect(self.start, self.end, config.R_LIM, **ec_front_kwargs)
256 | 
257 |     def text(
258 |         self,
259 |         text: str,
260 |         x: float | None = None,
261 |         r: float = 105,
262 |         *,
263 |         adjust_rotation: bool = True,
264 |         orientation: str = "horizontal",
265 |         ignore_range_error: bool = False,
266 |         **kwargs,
267 |     ) -> None:
268 |         """Plot text
269 | 
270 |         Parameters
271 |         ----------
272 |         text : str
273 |             Text content
274 |         x: float | None, optional
275 |             X position. If None, sector center x is set.
276 |         r : float, optional
277 |             Radius position. By default, outer position `r=105` is set.
278 |         adjust_rotation : bool, optional
279 |             If True, text rotation is auto set based on `x` and `orientation` params.
280 |         orientation : str, optional
281 |             Text orientation (`horizontal` or `vertical`)
282 |         ignore_range_error : bool, optional
283 |             If True, ignore x position range error
284 |             (ErrorCase: `not track.start <= x <= track.end`)
285 |         **kwargs : dict, optional
286 |             Text properties (e.g. `size=12, color="red", va="center", ...`)
287 |             <https://matplotlib.org/stable/api/_as_gen/matplotlib.axes.Axes.text.html>
288 |         """
289 |         # If value is None, center position is set.
290 |         x = self.center if x is None else x
291 |         rad = self.x_to_rad(x, ignore_range_error)
292 | 
293 |         if adjust_rotation:
294 |             # Set label proper alignment, rotation parameters by radian
295 |             params = utils.plot.get_label_params_by_rad(rad, orientation)
296 |             kwargs.update(params)
297 | 
298 |         if "ha" not in kwargs and "horizontalalignment" not in kwargs:
299 |             kwargs.update(dict(ha="center"))
300 |         if "va" not in kwargs and "verticalalignment" not in kwargs:
301 |             kwargs.update(dict(va="center"))
302 | 
303 |         def plot_text(ax: PolarAxes) -> None:
304 |             ax.text(rad, r, text, **kwargs)
305 | 
306 |         self._plot_funcs.append(plot_text)
307 | 
308 |     def line(
309 |         self,
310 |         *,
311 |         r: float | tuple[float, float],
312 |         start: float | None = None,
313 |         end: float | None = None,
314 |         arc: bool = True,
315 |         **kwargs,
316 |     ) -> None:
317 |         """Plot line
318 | 
319 |         Parameters
320 |         ----------
321 |         r : float | tuple[float, float]
322 |             Line radius position (0 - 100). If r is float, (r, r) is set.
323 |         start : float | None, optional
324 |             Start position (x coordinate). If None, `sector.start` is set.
325 |         end : float | None, optional
326 |             End position (x coordinate). If None, `sector.end` is set.
327 |         arc : bool, optional
328 |             If True, plot arc style line for polar projection.
329 |             If False, simply plot linear style line.
330 |         **kwargs : dict, optional
331 |             Patch properties (e.g. `color="red", lw=3, ...`)
332 |             <https://matplotlib.org/stable/api/_as_gen/matplotlib.patches.Patch.html>
333 |         """
334 |         start = self.start if start is None else start
335 |         end = self.end if end is None else end
336 |         rad_lim = (self.x_to_rad(start), self.x_to_rad(end))
337 |         r_lim = r if isinstance(r, Sequence) else (r, r)
338 |         LinePatch = ArcLine if arc else Line
339 |         self._patches.append(LinePatch(rad_lim, r_lim, **kwargs))
340 | 
341 |     def rect(
342 |         self,
343 |         start: float | None = None,
344 |         end: float | None = None,
345 |         r_lim: tuple[float, float] | None = None,
346 |         tooltip: str | None = None,
347 |         **kwargs,
348 |     ) -> None:
349 |         """Plot rectangle
350 | 
351 |         Parameters
352 |         ----------
353 |         start : float | None, optional
354 |             Start position (x coordinate). If None, `sector.start` is set.
355 |         end : float | None, optional
356 |             End position (x coordinate). If None, `sector.end` is set.
357 |         r_lim : tuple[float, float] | None, optional
358 |             Radius limit region. If None, (0, 100) is set.
359 |         tooltip : str | None, optional
360 |             Tooltip label
361 |         **kwargs : dict, optional
362 |             Patch properties (e.g. `fc="red", ec="blue", lw=1.0, ...`)
363 |             <https://matplotlib.org/stable/api/_as_gen/matplotlib.patches.Patch.html>
364 |         """
365 |         start = self.start if start is None else start
366 |         end = self.end if end is None else end
367 |         rad_rect_start = self.x_to_rad(start)
368 |         rad_rect_end = self.x_to_rad(end)
369 | 
370 |         r_lim = config.R_LIM if r_lim is None else r_lim
371 |         min_rad = min(rad_rect_start, rad_rect_end)
372 |         max_rad = max(rad_rect_start, rad_rect_end)
373 | 
374 |         radr = (min_rad, min(r_lim))
375 |         width = max_rad - min_rad
376 |         height = max(r_lim) - min(r_lim)
377 | 
378 |         if tooltip:
379 |             gid = gen_gid("rect")
380 |             self._gid2tooltip[gid] = tooltip
381 |             kwargs["gid"] = gid
382 | 
383 |         self._patches.append(ArcRectangle(radr, width, height, **kwargs))
384 | 
385 |     def raster(
386 |         self,
387 |         img: str | Path | Image.Image,
388 |         *,
389 |         size: float = 0.05,
390 |         x: float | None = None,
391 |         r: float = 105,
392 |         rotation: int | float | str | None = None,
393 |         border_width: int = 0,
394 |         label: str | None = None,
395 |         label_pos: str = "bottom",
396 |         label_margin: float = 0.1,
397 |         imshow_kws: dict[str, Any] | None = None,
398 |         text_kws: dict[str, Any] | None = None,
399 |     ) -> None:
400 |         """Plot raster image
401 | 
402 |         This method is experimental. API may change in the future release.
403 | 
404 |         Parameters
405 |         ----------
406 |         img : str | Path | Image
407 |             Image data (`File Path`|`URL`|`PIL Image`)
408 |         size : float, optional
409 |             Image size (ratio to overall figure size)
410 |         x : float | None, optional
411 |             X position. If None, sector center x position is set.
412 |         r : float, optional
413 |             Radius position
414 |         rotation : int | float | str | None, optional
415 |             Image rotation setting.
416 |             If `None`, no rotate image (default).
417 |             If `auto`, rotate image by auto set rotation.
418 |             If `int` or `float` value, rotate image by user-specified value.
419 |         border_width : int, optional
420 |             Border width in pixel. By default, 0 is set (no border shown).
421 |         label : str | None, optional
422 |             Image label. If None, no label shown.
423 |         label_pos : str, optional
424 |             Label plot position (`bottom` or `top`)
425 |         label_margin : float, optional
426 |             Label margin
427 |         imshow_kws : dict[str, Any] | None, optional
428 |             Axes.imshow properties
429 |             <https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.imshow.html>
430 |         text_kws : dict[str, Any] | None, optional
431 |             Text properties (e.g. `dict(size=10, color="red", ...`)
432 |             <https://matplotlib.org/stable/api/_as_gen/matplotlib.axes.Axes.text.html>
433 |         """
434 |         imshow_kws = {} if imshow_kws is None else deepcopy(imshow_kws)
435 |         text_kws = {} if text_kws is None else deepcopy(text_kws)
436 | 
437 |         # Load image data
438 |         im = utils.load_image(img)
439 | 
440 |         # Draw border on image
441 |         if border_width > 0:
442 |             im = ImageOps.expand(im, border=border_width, fill="black")
443 | 
444 |         # Rotate image
445 |         x = self.center if x is None else x
446 |         rad = self.x_to_rad(x)
447 |         if isinstance(rotation, (int, float)):
448 |             im = im.rotate(rotation, expand=True)
449 |             rotate_value = rotation
450 |         elif rotation == "auto":
451 |             rotate_value: float = get_label_params_by_rad(rad, "horizontal")["rotation"]
452 |             im = im.rotate(rotate_value, expand=True)
453 |         elif rotation is None:
454 |             rotate_value = 0
455 |         else:
456 |             raise ValueError(f"{rotation=} is invalid.")
457 | 
458 |         # Calculate x, y image set position
459 |         max_r_lim = config.MAX_R + config.R_PLOT_MARGIN
460 |         im_x: float = np.cos((np.pi / 2) - rad) * (r / max_r_lim)
461 |         im_y: float = np.sin((np.pi / 2) - rad) * (r / max_r_lim)
462 |         # Normalize (-1, 1) to (0, 1) axis range
463 |         im_x = (im_x + 1) / 2
464 |         im_y = (im_y + 1) / 2
465 | 
466 |         # TODO: Terrible code to be fixed in the future
467 |         # Approximate image size calculation logic, not complete
468 |         scale = 1 - (abs(abs(rotate_value) % 90 - 45) / 45)  # 0 - 1.0
469 |         size_ratio = 1 + (scale * (np.sqrt(2) - 1))
470 |         size = size * size_ratio
471 | 
472 |         def plot_raster(ax: PolarAxes) -> None:
473 |             # Set inset axes & plot raster image
474 |             bounds = (im_x - (size / 2), im_y - (size / 2), size, size)
475 |             axin = ax.inset_axes(bounds, transform=ax.transAxes)
476 |             axin.axis("off")
477 |             axin.imshow(im, **imshow_kws)  # type: ignore
478 | 
479 |             # Plot label
480 |             if label is not None:
481 |                 text_x = sum(axin.get_xlim()) / 2
482 |                 y_size = max(axin.get_ylim()) - min(axin.get_ylim())
483 |                 if label_pos == "bottom":
484 |                     text_y = max(axin.get_ylim()) + (y_size * label_margin)
485 |                     va = "top"
486 |                 elif label_pos == "top":
487 |                     text_y = min(axin.get_ylim()) - (y_size * label_margin)
488 |                     va = "bottom"
489 |                 else:
490 |                     raise ValueError(f"{label_pos=} is invalid ('top' or 'bottom').")
491 |                 axin.text(text_x, text_y, label, ha="center", va=va, **text_kws)
492 | 
493 |         self._plot_funcs.append(plot_raster)
494 | 
495 |     ############################################################
496 |     # Private Method
497 |     ############################################################
498 | 
499 |     def __str__(self) -> str:
500 |         min_deg_lim, max_deg_lim = min(self.deg_lim), max(self.deg_lim)
501 |         track_names = [t.name for t in self.tracks]
502 |         return textwrap.dedent(
503 |             f"""
504 |             # Sector = '{self.name}'
505 |             # Size = {self.size} ({self.start} - {self.end})
506 |             # Degree Size = {self.deg_size:.2f} ({min_deg_lim:.2f} - {max_deg_lim:.2f})
507 |             # Track List = {track_names}
508 |             """
509 |         )[1:]
510 | 


--------------------------------------------------------------------------------
/src/pycirclize/parser/gff.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import bz2
  4 | import gzip
  5 | import warnings
  6 | import zipfile
  7 | from collections import defaultdict
  8 | from dataclasses import dataclass
  9 | from io import TextIOWrapper
 10 | from pathlib import Path
 11 | from typing import Any, TextIO
 12 | 
 13 | from Bio.SeqFeature import CompoundLocation, SeqFeature, SimpleLocation
 14 | 
 15 | 
 16 | class Gff:
 17 |     """GFF Parser Class"""
 18 | 
 19 |     def __init__(
 20 |         self,
 21 |         gff_file: str | Path,
 22 |         *,
 23 |         name: str | None = None,
 24 |         target_seqid: str | None = None,
 25 |         min_range: None = None,
 26 |         max_range: None = None,
 27 |     ) -> None:
 28 |         """
 29 |         Parameters
 30 |         ----------
 31 |         gff_file : str | Path
 32 |             GFF file (`*.gz`, `*.bz2`, `*.zip` compressed file can be readable)
 33 |         name : str | None, optional
 34 |             name (If None, `file name` is set)
 35 |         target_seqid : str | None, optional
 36 |             Target seqid to be extracted. If None, only first seqid record is extracted.
 37 |         min_range : None, optional
 38 |             No longer used. Left for backward compatibility.
 39 |         max_range : None, optional
 40 |             No longer used. Left for backward compatibility.
 41 |         """
 42 |         self._gff_file = Path(gff_file)
 43 |         self._name = name
 44 |         self._records, start, end = self._parse_gff(gff_file, target_seqid)
 45 |         self._seq_region = (start, end)
 46 | 
 47 |         if min_range or max_range:
 48 |             warnings.warn(
 49 |                 "min_range & max_range is no longer used in Gff parser.", stacklevel=2
 50 |             )
 51 | 
 52 |     ############################################################
 53 |     # Property
 54 |     ############################################################
 55 | 
 56 |     @property
 57 |     def name(self) -> str:
 58 |         """Name"""
 59 |         if self._name is not None:
 60 |             return self._name
 61 |         if self._gff_file.suffix in (".gz", ".bz2", ".zip"):
 62 |             return self._gff_file.with_suffix("").with_suffix("").name
 63 |         else:
 64 |             return self._gff_file.with_suffix("").name
 65 | 
 66 |     @property
 67 |     def seq_region(self) -> tuple[int, int]:
 68 |         """GFF sequence-region start & end tuple
 69 | 
 70 |         If `##sequence-region` pragma is not found, seq_region=`(0, max_coords_value)`
 71 |         """
 72 |         return self._seq_region
 73 | 
 74 |     @property
 75 |     def records(self) -> list[GffRecord]:
 76 |         """GFF records (only target seqid)"""
 77 |         return self._records
 78 | 
 79 |     @property
 80 |     def all_records(self) -> list[GffRecord]:
 81 |         """All GFF records"""
 82 |         return self._all_records
 83 | 
 84 |     @property
 85 |     def target_seqid(self) -> str:
 86 |         """Target seqid"""
 87 |         return self._target_seqid
 88 | 
 89 |     @property
 90 |     def seqid_list(self) -> list[str]:
 91 |         """seqid list"""
 92 |         return self._seqid_list
 93 | 
 94 |     @property
 95 |     def genome_length(self) -> int:
 96 |         """Genome length (target seqid record)"""
 97 |         return max(self.seq_region)
 98 | 
 99 |     @property
100 |     def range_size(self) -> int:
101 |         """Same as `self.genome_length` (Left for backward compatibility)"""
102 |         return self.genome_length
103 | 
104 |     @property
105 |     def full_genome_length(self) -> int:
106 |         """Full genome length (concatenate all records)"""
107 |         return sum(list(self.get_seqid2size().values()))
108 | 
109 |     ############################################################
110 |     # Public Method
111 |     ############################################################
112 | 
113 |     def get_seqid2size(self) -> dict[str, int]:
114 |         """Get seqid & complete/contig/scaffold genome size dict
115 | 
116 |         By default, size is defined by `##sequence-region` pragma of target seqid.
117 |         If `##sequence-region` is not found, size is defined by max coordinate size in
118 |         target seqid features. This may differ from actual genome size.
119 | 
120 |         Returns
121 |         -------
122 |         seqid2size : dict[str, int]
123 |             seqid & genome size dict
124 |         """
125 |         return self._seqid2size
126 | 
127 |     def get_seqid2features(
128 |         self,
129 |         feature_type: str | list[str] | None = "CDS",
130 |         target_strand: int | None = None,
131 |     ) -> dict[str, list[SeqFeature]]:
132 |         """Get seqid & features in target seqid genome dict
133 | 
134 |         Parameters
135 |         ----------
136 |         feature_type : str | list[str] | None, optional
137 |             Feature type (`CDS`, `gene`, `mRNA`, etc...)
138 |             If None, extract regardless of feature type.
139 |         target_strand : int | None, optional
140 |             Extract target strand. If None, extract regardless of strand.
141 | 
142 |         Returns
143 |         -------
144 |         seqid2features : dict[str, list[SeqFeature]]
145 |             seqid & features dict
146 |         """
147 |         if isinstance(feature_type, str):
148 |             feature_type = [feature_type]
149 | 
150 |         gff_records = GffRecord.filter_records(
151 |             self.all_records,
152 |             feature_type=feature_type,
153 |             target_strand=target_strand,
154 |         )
155 |         seqid2features: dict[str, list[SeqFeature]] = {}
156 |         for seqid in self.seqid_list:
157 |             seqid2features[seqid] = []
158 |         for rec in gff_records:
159 |             seqid2features[rec.seqid].append(rec.to_seq_feature())
160 |         return seqid2features
161 | 
162 |     def extract_features(
163 |         self,
164 |         feature_type: str | list[str] | None = "CDS",
165 |         *,
166 |         target_strand: int | None = None,
167 |         target_range: tuple[int, int] | None = None,
168 |     ) -> list[SeqFeature]:
169 |         """Extract features
170 | 
171 |         If `target_seqid` is specified when the Gff instance initialized,
172 |         then the features of the target seqid are extracted.
173 |         Otherwise, extract the features of the seqid in the first row.
174 | 
175 |         Parameters
176 |         ----------
177 |         feature_type : str | list[str] | None, optional
178 |             Feature type (`CDS`, `gene`, `mRNA`, etc...)
179 |             If None, extract regardless of feature type.
180 |         target_strand : int | None, optional
181 |             Extract target strand. If None, extract regardless of strand.
182 |         target_range : tuple[int, int] | None, optional
183 |             Extract target range. If None, extract regardless of range.
184 | 
185 |         Returns
186 |         -------
187 |         features : list[SeqFeature]
188 |             Feature list
189 |         """
190 |         gff_records = GffRecord.filter_records(
191 |             self.records,
192 |             feature_type=feature_type,
193 |             target_strand=target_strand,
194 |             target_range=target_range,
195 |         )
196 |         return [rec.to_seq_feature() for rec in gff_records]
197 | 
198 |     def extract_exon_features(
199 |         self,
200 |         feature_type: str = "mRNA",
201 |         *,
202 |         target_strand: int | None = None,
203 |         target_range: tuple[int, int] | None = None,
204 |     ) -> list[SeqFeature]:
205 |         """Extract exon structure features
206 | 
207 |         Extract exons based on `parent feature` and `exon` ID-Parent relation
208 | 
209 |         Parameters
210 |         ----------
211 |         feature_type : str, optional
212 |             Feature type (e.g. `mRNA`, `ncRNA` , etc...)
213 |         target_strand : int | None, optional
214 |             Extract target strand. If None, extract regardless of strand.
215 |         target_range : tuple[int, int] | None, optional
216 |             Extract target range. If None, extract regardless of range.
217 | 
218 |         Returns
219 |         -------
220 |         features : list[SeqFeature]
221 |             Feature list
222 |         """
223 |         # Extract exon features by mRNA-exon relation
224 |         parent_id = None
225 |         parent_id2record: dict[str, GffRecord] = {}
226 |         parent_id2exons: dict[str, list[GffRecord]] = defaultdict(list)
227 |         for rec in self.records:
228 |             if rec.type == feature_type:
229 |                 parent_id = rec.attrs.get("ID", [None])[0]
230 |                 if parent_id is None:
231 |                     continue
232 |                 parent_id2record[parent_id] = rec
233 |             if (
234 |                 rec.type == "exon"
235 |                 and parent_id is not None
236 |                 and parent_id == rec.attrs["Parent"][0]
237 |             ):
238 |                 parent_id2exons[parent_id].append(rec)
239 | 
240 |         # Set exon features
241 |         exon_features: list[SeqFeature] = []
242 |         for parent_id, _ in parent_id2record.items():
243 |             parent_record = parent_id2record[parent_id]
244 |             exons = parent_id2exons[parent_id]
245 | 
246 |             parent_feature = parent_record.to_seq_feature()
247 |             if len(exons) == 1:
248 |                 exon_feature = parent_feature
249 |             elif len(exons) >= 2:
250 |                 exons = sorted(exons, key=lambda e: e.start)
251 |                 locs = [e.to_feature_location() for e in exons]
252 |                 exon_feature = SeqFeature(
253 |                     location=CompoundLocation(locs),
254 |                     type=parent_feature.type,
255 |                     id=parent_feature.id,
256 |                     qualifiers=parent_feature.qualifiers,
257 |                 )
258 |             else:
259 |                 # If no exon exists, skip feature extraction
260 |                 continue
261 | 
262 |             exon_features.append(exon_feature)
263 | 
264 |         # Filter exon features by target strand & range
265 |         filter_exon_features = []
266 |         for feature in exon_features:
267 |             if target_strand is not None and feature.strand != target_strand:
268 |                 continue
269 |             if target_range is not None:
270 |                 start, end = int(feature.location.start), int(feature.location.end)  # type: ignore
271 |                 min_range, max_range = min(target_range), max(target_range)
272 |                 if not min_range <= start <= end <= max_range:
273 |                     continue
274 |             filter_exon_features.append(feature)
275 | 
276 |         return filter_exon_features
277 | 
278 |     ############################################################
279 |     # Private Method
280 |     ############################################################
281 | 
282 |     def _parse_gff(
283 |         self,
284 |         gff_file: str | Path,
285 |         target_seqid: str | None,
286 |     ) -> tuple[list[GffRecord], int, int]:
287 |         """Parse GFF file
288 | 
289 |         Only parse target seqid record.
290 |         If target_record is None, only parse first seqid record.
291 | 
292 |         Parameters
293 |         ----------
294 |         gff_file : str | Path
295 |             GFF file
296 |         target_seqid : str | None
297 |             Target seqid to be extracted
298 | 
299 |         Returns
300 |         -------
301 |         gff_records : list[GffRecord]
302 |             GFF record list
303 |         start : int
304 |             Start position of target_seqid record
305 |         end : int
306 |             End position of target_seqid record
307 |         """
308 |         gff_file = Path(gff_file)
309 |         if gff_file.suffix == ".gz":
310 |             with gzip.open(gff_file, mode="rt", encoding="utf-8") as f:
311 |                 gff_records, start, end = self._parse_gff_textio(f, target_seqid)
312 |         elif gff_file.suffix == ".bz2":
313 |             with bz2.open(gff_file, mode="rt", encoding="utf-8") as f:
314 |                 gff_records, start, end = self._parse_gff_textio(f, target_seqid)
315 |         elif gff_file.suffix == ".zip":
316 |             with zipfile.ZipFile(gff_file) as z, z.open(z.namelist()[0]) as f:
317 |                 io = TextIOWrapper(f, encoding="utf-8")
318 |                 gff_records, start, end = self._parse_gff_textio(io, target_seqid)
319 |         else:
320 |             with open(gff_file, encoding="utf-8") as f:
321 |                 gff_records, start, end = self._parse_gff_textio(f, target_seqid)
322 | 
323 |         return gff_records, start, end
324 | 
325 |     def _parse_gff_textio(
326 |         self,
327 |         handle: TextIO,
328 |         target_seqid: str | None = None,
329 |     ) -> tuple[list[GffRecord], int, int]:
330 |         """Parse GFF file TextIO
331 | 
332 |         Parameters
333 |         ----------
334 |         handle : TextIO
335 |             GFF TextIO handle
336 |         target_seqid : str | None, optional
337 |             GFF target seqid
338 | 
339 |         Returns
340 |         -------
341 |         gff_records : list[GffRecord]
342 |             GFF record list
343 |         start : int
344 |             Start position of target_seqid record
345 |         end : int
346 |             End position of target_seqid record
347 |         """
348 |         # Parse GFF lines
349 |         gff_all_lines = handle.read().splitlines()
350 |         gff_record_lines = filter(GffRecord.is_gff_line, gff_all_lines)
351 |         gff_records = list(map(GffRecord.parse_gff_line, gff_record_lines))
352 |         if len(gff_records) == 0:
353 |             raise ValueError(f"Failed to parse '{self._gff_file}' as GFF file")
354 | 
355 |         # Get target seqid & GFF records
356 |         seqid_list = list(dict.fromkeys([rec.seqid for rec in gff_records]))
357 |         if target_seqid is None:
358 |             target_seqid = seqid_list[0]
359 |         if target_seqid not in seqid_list:
360 |             raise ValueError(f"Not found {target_seqid=} in '{self._gff_file}'")
361 |         target_gff_records = [rec for rec in gff_records if rec.seqid == target_seqid]
362 | 
363 |         # Try to get start-end region from '##sequence-region' annotation line
364 |         # If not found, (0, max_coordinate) is set as start-end
365 |         seqid2start_end: dict[str, tuple[int, int]] = {}
366 |         for seqid in seqid_list:
367 |             start, end = None, None
368 |             for line in gff_all_lines:
369 |                 if (
370 |                     line.startswith("##sequence-region")
371 |                     and len(line.split()) == 4
372 |                     and line.split()[1] == seqid
373 |                 ):
374 |                     # e.g. `##sequence-region NC_XXXXXX 1 10000` (seqid, start, end)
375 |                     start, end = line.split()[2:4]
376 |                     start, end = int(start) - 1, int(end)
377 |                     break
378 |             if start is None or end is None:
379 |                 seqid_gff_records = [rec for rec in gff_records if rec.seqid == seqid]
380 |                 start, end = 0, max([r.end for r in seqid_gff_records])
381 |             seqid2start_end[seqid] = (start, end)
382 |         seqid2size = {seqid: e - s for seqid, (s, e) in seqid2start_end.items()}
383 | 
384 |         # Set properties
385 |         self._target_seqid = target_seqid
386 |         self._seqid_list = seqid_list
387 |         self._all_records = gff_records
388 |         self._seqid2size = seqid2size
389 | 
390 |         return target_gff_records, *seqid2start_end[target_seqid]
391 | 
392 | 
393 | @dataclass
394 | class GffRecord:
395 |     """GFF Record DataClass"""
396 | 
397 |     seqid: str
398 |     source: str
399 |     type: str
400 |     start: int  # 1-based coordinate
401 |     end: int
402 |     score: float | None
403 |     strand: int
404 |     phase: int | None
405 |     attrs: dict[str, list[str]]
406 | 
407 |     def is_within_range(self, min_range: int, max_range: int) -> bool:
408 |         """Check within target range or not
409 | 
410 |         Parameters
411 |         ----------
412 |         min_range : int
413 |             Min range
414 |         max_range : int
415 |             Max range
416 | 
417 |         Returns
418 |         -------
419 |         check_result : bool
420 |             Check result
421 |         """
422 |         return min_range <= self.start <= self.end <= max_range
423 | 
424 |     def to_seq_feature(self) -> SeqFeature:
425 |         """Convert GffRecord to SeqFeature (1-based to 0-based coordinate)"""
426 |         return SeqFeature(
427 |             location=self.to_feature_location(),
428 |             type=self.type,
429 |             id=self.attrs.get("ID", [""])[0],
430 |             qualifiers=self.attrs,
431 |         )
432 | 
433 |     def to_feature_location(self) -> SimpleLocation:
434 |         """Convert GffRecord to SimpleLocation (1-based to 0-based coordinate)
435 | 
436 |         Returns
437 |         -------
438 |         feature_location : SimpleLocation
439 |             Simple location
440 |         """
441 |         return SimpleLocation(self.start - 1, self.end, self.strand)
442 | 
443 |     def to_gff_line(self) -> str:
444 |         """Convert GffRecord to GFF record line
445 | 
446 |         Returns
447 |         -------
448 |         gff_line : str
449 |             GFF record line
450 |         """
451 |         return "\t".join(
452 |             (
453 |                 self.seqid,
454 |                 self.source,
455 |                 self.type,
456 |                 str(self.start),
457 |                 str(self.end),
458 |                 "." if self.score is None else str(self.score),
459 |                 "-" if self.strand == -1 else "+",
460 |                 "." if self.phase is None else str(self.phase),
461 |                 ";".join([f"{k}={','.join(v)}" for k, v in self.attrs.items()]),
462 |             )
463 |         )
464 | 
465 |     @staticmethod
466 |     def is_gff_line(line: str) -> bool:
467 |         """Check GFF record line or not
468 | 
469 |         Parameters
470 |         ----------
471 |         line : str
472 |             GFF line
473 | 
474 |         Returns
475 |         -------
476 |         check_result : bool
477 |             Check result
478 |         """
479 |         return not (line.startswith("#") or len(line.split("\t")) < 9)
480 | 
481 |     @staticmethod
482 |     def parse_gff_line(gff_line: str) -> GffRecord:
483 |         """Parse GFF record line
484 | 
485 |         Parameters
486 |         ----------
487 |         gff_line : str
488 |             GFF record line
489 | 
490 |         Returns
491 |         -------
492 |         gff_record : GffRecord
493 |             GFF record
494 |         """
495 |         gff_elms: list[Any] = gff_line.split("\t")[0:9]
496 |         # start, end
497 |         gff_elms[3], gff_elms[4] = int(gff_elms[3]), int(gff_elms[4])
498 |         # score
499 |         gff_elms[5] = None if gff_elms[5] in (".", "") else float(gff_elms[5])
500 |         # strand
501 |         if gff_elms[6] == "+":
502 |             gff_elms[6] = 1
503 |         elif gff_elms[6] == "-":
504 |             gff_elms[6] = -1
505 |         else:
506 |             gff_elms[6] = 0
507 |         # frame
508 |         gff_elms[7] = None if gff_elms[7] in (".", "") else int(gff_elms[7])
509 |         # attrs
510 |         attr_dict: dict[str, list[str]] = {}
511 |         attrs = str(gff_elms[8]).split(";")
512 |         for attr in attrs:
513 |             if attr != "" and "=" in attr:
514 |                 key, value = attr.split("=")
515 |                 attr_dict[key] = value.split(",")
516 |         gff_elms[8] = attr_dict
517 | 
518 |         return GffRecord(*gff_elms)
519 | 
520 |     @staticmethod
521 |     def filter_records(
522 |         gff_records: list[GffRecord],
523 |         feature_type: str | list[str] | None = "CDS",
524 |         target_strand: int | None = None,
525 |         target_range: tuple[int, int] | None = None,
526 |     ) -> list[GffRecord]:
527 |         """Filter GFF records by feature_type, strand, range
528 | 
529 |         Parameters
530 |         ----------
531 |         gff_records : list[GffRecord]
532 |             GFF records to be filterd
533 |         feature_type : str | list[str] | None, optional
534 |             Feature type (`CDS`, `gene`, `mRNA`, etc...). If None, no filter.
535 |         target_strand : int | None, optional
536 |             Target strand. If None, no filter.
537 |         target_range : tuple[int, int] | None, optional
538 |             Target range. If None, no filter.
539 | 
540 |         Returns
541 |         -------
542 |         filter_gff_records : list[SeqFeature]
543 |             Filtered GFF records
544 |         """
545 |         if isinstance(feature_type, str):
546 |             feature_type = [feature_type]
547 | 
548 |         filter_gff_records = []
549 |         for rec in gff_records:
550 |             if feature_type is not None and rec.type not in feature_type:
551 |                 continue
552 |             if target_strand is not None and rec.strand != target_strand:
553 |                 continue
554 |             if target_range is not None:
555 |                 min_range, max_range = min(target_range), max(target_range)
556 |                 if not min_range <= rec.start <= rec.end <= max_range:
557 |                     continue
558 |             filter_gff_records.append(rec)
559 |         return filter_gff_records
560 | 


--------------------------------------------------------------------------------