├── .DS_Store ├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE.md └── workflows │ ├── docs.yaml │ ├── pypi.yml │ └── testpypi.yml ├── .gitignore ├── Makefile ├── README.md ├── cfutils ├── __init__.py ├── align.py ├── cli.py ├── count.py ├── parser.py ├── run.py ├── show.py └── utils.py ├── data ├── B5-M13R_B07.ab1 ├── B5-M13R_B07_vs_ref.pdf ├── B5-M13R_B07_vs_ref.tsv ├── data_file ├── matplotlib_example.png ├── plot.png └── ref.fa ├── docs ├── CNAME ├── _config.yml ├── api.md ├── cli.md ├── favicon.ico ├── features.md ├── index.md └── installation.md ├── pyproject.toml ├── test ├── __init__.py ├── __init__.py.py ├── test_advance.py ├── test_align.py ├── test_basic.py ├── test_parser.py ├── test_run.py └── test_show.py └── uv.lock /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/y9c/cfutils/a34e263d38ccba3821ff5c5b79df9988903c0ba6/.DS_Store -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: y9c 4 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | * treeio version: 2 | * Python version: 3 | * Operating System: 4 | 5 | ### Description 6 | 7 | Describe what you were trying to get done. 8 | Tell us what happened, what went wrong, and what you expected to happen. 9 | 10 | ### What I Did 11 | 12 | ``` 13 | Paste the command(s) you ran and the output. 14 | If there was a crash, please include the traceback here. 15 | ``` 16 | -------------------------------------------------------------------------------- /.github/workflows/docs.yaml: -------------------------------------------------------------------------------- 1 | name: Deploy Jekyll with GitHub Pages dependencies preinstalled 2 | 3 | on: 4 | # Runs on pushes targeting the default branch 5 | push: 6 | branches: 7 | - 'main' 8 | - 'dev' 9 | paths: 10 | - 'docs/**' 11 | 12 | # Allows you to run this workflow manually from the Actions tab 13 | workflow_dispatch: 14 | 15 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 16 | permissions: 17 | contents: read 18 | pages: write 19 | id-token: write 20 | 21 | # Allow one concurrent deployment 22 | concurrency: 23 | group: 'pages' 24 | cancel-in-progress: true 25 | 26 | jobs: 27 | # Build job 28 | build: 29 | runs-on: ubuntu-latest 30 | steps: 31 | - name: Checkout 32 | uses: actions/checkout@v3 33 | - name: Setup Pages 34 | uses: actions/configure-pages@v2 35 | - name: Build with Jekyll 36 | uses: actions/jekyll-build-pages@v1 37 | with: 38 | source: ./docs 39 | destination: ./_site 40 | - name: Upload artifact 41 | uses: actions/upload-pages-artifact@v1 42 | 43 | # Deployment job 44 | deploy: 45 | environment: 46 | name: github-pages 47 | url: ${{ steps.deployment.outputs.page_url }} 48 | runs-on: ubuntu-latest 49 | needs: build 50 | steps: 51 | - name: Deploy to GitHub Pages 52 | id: deployment 53 | uses: actions/deploy-pages@v1 54 | -------------------------------------------------------------------------------- /.github/workflows/pypi.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python 🐍 distributions 📦 to PyPI 2 | 3 | on: 4 | push: 5 | tags: 6 | - 'v[0-9]+.[0-9]+.[0-9]+' 7 | 8 | jobs: 9 | build-n-publish: 10 | name: Build and publish Python 🐍 distributions 📦 to PyPI 11 | runs-on: ubuntu-18.04 12 | steps: 13 | - name: Checkout code 14 | uses: actions/checkout@master 15 | 16 | - name: Set up Python 3.8 17 | uses: actions/setup-python@v1 18 | with: 19 | python-version: 3.8 20 | 21 | - name: Install poetry 22 | run: >- 23 | curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python && 24 | source $HOME/.poetry/env 25 | 26 | - name: Build a source tarball 27 | run: >- 28 | $HOME/.poetry/bin/poetry build 29 | 30 | - name: Publish distribution 📦 to PyPI 31 | uses: pypa/gh-action-pypi-publish@master 32 | with: 33 | password: ${{ secrets.pypi_password }} 34 | -------------------------------------------------------------------------------- /.github/workflows/testpypi.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python 🐍 distributions 📦 to TestPyPI 2 | 3 | on: 4 | push: 5 | tags: 6 | - 'v[0-9]+.[0-9]+.[0-9]+*' 7 | 8 | jobs: 9 | build-n-publish: 10 | name: Build and publish Python 🐍 distributions 📦 to TestPyPI 11 | runs-on: ubuntu-18.04 12 | steps: 13 | - name: Checkout code 14 | uses: actions/checkout@master 15 | 16 | - name: Set up Python 3.8 17 | uses: actions/setup-python@v1 18 | with: 19 | python-version: 3.8 20 | 21 | - name: Install poetry 22 | run: >- 23 | curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python && 24 | source $HOME/.poetry/env 25 | 26 | - name: Build a source tarball 27 | run: >- 28 | $HOME/.poetry/bin/poetry build 29 | 30 | - name: Publish distribution 📦 to Test PyPI 31 | uses: pypa/gh-action-pypi-publish@master 32 | with: 33 | password: ${{ secrets.test_pypi_password }} 34 | repository_url: https://test.pypi.org/legacy/ 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | 163 | # directory for temp file 164 | temp/ 165 | # tool cache 166 | .ruff_cache 167 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Improved Makefile for cfutils 2 | .PHONY: init dev test clean lock 3 | 4 | # Install dependencies (production only) 5 | init: 6 | uv pip install --no-deps 7 | 8 | # Install all dependencies (dev + prod) 9 | dev: 10 | uv pip install 11 | 12 | # Run all tests 13 | test: 14 | python -m unittest discover -s test 15 | 16 | # Remove Python cache and temp files 17 | clean: 18 | rm -rf __pycache__ */__pycache__ *.pyc *.pyo *.pyd temp/* test/__pycache__ cfutils/__pycache__ 19 | 20 | # Update lock file from pyproject.toml 21 | lock: 22 | uv pip compile 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Readthedocs](https://readthedocs.org/projects/cfutils/badge/?version=latest)](https://cfutils.readthedocs.io/en/latest/?badge=latest) 2 | [![Pypi Releases](https://img.shields.io/pypi/v/cfutils.svg)](https://pypi.python.org/pypi/cfutils) 3 | [![Downloads](https://static.pepy.tech/badge/cfutils)](https://pepy.tech/project/cfutils) 4 | 5 | **Chromatogram File Utils** 6 | 7 | For Sanger sequencing data visualizing, alignment, mutation calling, and trimming etc. 8 | 9 | ## Demo 10 | 11 | ![plot chromatogram with mutation](https://raw.githubusercontent.com/y9c/cfutils/master/data/plot.png) 12 | 13 | > command to generate the demo above 14 | 15 | ```bash 16 | cfutils mut --query ./data/B5-M13R_B07.ab1 --subject ./data/ref.fa --outdir ./data/ --plot 17 | ``` 18 | 19 | ## How to use? 20 | 21 | - You can have mutation detection and visualization in one step using the command line. 22 | 23 | ```bash 24 | cfutils mut --help 25 | ``` 26 | 27 | - You can also integrate the result matplotlib figures and use it as a python module. 28 | 29 | An example: 30 | 31 | ```python 32 | import matplotlib.pyplot as plt 33 | import numpy as np 34 | 35 | from cfutils.parser import parse_abi 36 | from cfutils.show import plot_chromatograph 37 | 38 | seq = parse_abi("./data/B5-M13R_B07.ab1") 39 | peaks = seq.annotations["peak positions"][100:131] 40 | 41 | fig, axes = plt.subplots(2, 1, figsize=(12, 6), sharex=True) 42 | plot_chromatograph( 43 | seq, 44 | region=(100, 130), 45 | ax=axes[0], 46 | show_bases=True, 47 | show_positions=True, 48 | color_map=dict(zip("ATGC", ["C0", "C2", "C1", "C4"])), 49 | ) 50 | axes[1].bar(peaks, np.random.randn(len(peaks)), color="0.66") 51 | plt.show() 52 | ``` 53 | 54 | ![plot chromatogram in_matplotlib](https://raw.githubusercontent.com/y9c/cfutils/master/data/matplotlib_example.png) 55 | 56 | ## How to install? 57 | 58 | ### form pypi 59 | 60 | _(use this way ONLY, if you don't know what's going on)_ 61 | 62 | ```bash 63 | pip install --user cfutils 64 | ``` 65 | 66 | ### manipulate the source code 67 | 68 | - clone from github 69 | 70 | ```bash 71 | git clone git@github.com:y9c/cfutils.git 72 | ``` 73 | 74 | - install the dependence 75 | 76 | ```bash 77 | make init 78 | ``` 79 | 80 | - do unittest 81 | 82 | ```bash 83 | make test 84 | ``` 85 | 86 | ## ChangeLog 87 | 88 | - Reverse completement the chromatogram file. (Inspired by Snapgene) 89 | - build as python package for pypi 90 | - fix bug that highlighting wrong base 91 | - replace blastn with buildin python aligner 92 | 93 | ## TODO 94 | 95 | - [ ] call mutation by alignment and plot Chromatogram graphic 96 | - [ ] add a doc 97 | - [x] change xaxis by peak location 98 | - [ ] fix bug that chromatogram switch pos after trim 99 | - [x] wrap as a cli app 100 | - [ ] return quality score in output 101 | - [ ] fix issue that selected base is not in the middle 102 | - [ ] fix plot_chromatograph rendering bug 103 | 104 | - [ ] add projection feature to make align and assemble possible 105 | -------------------------------------------------------------------------------- /cfutils/__init__.py: -------------------------------------------------------------------------------- 1 | # export function in show modual? 2 | # from .show import plot_chromatograph 3 | -------------------------------------------------------------------------------- /cfutils/align.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2019 yech 5 | # Distributed under terms of the MIT license. 6 | # 7 | # Created: 2019-05-27 20:19 8 | 9 | 10 | """align query sequence with ref. 11 | 12 | Use 1-based for all the position 13 | """ 14 | 15 | from dataclasses import dataclass 16 | from typing import List, Optional, Tuple 17 | 18 | import ssw 19 | from .parser import SeqRecord 20 | 21 | from .utils import get_logger 22 | 23 | LOGGER = get_logger(__name__) 24 | 25 | 26 | @dataclass 27 | class SitePair: 28 | """Object for storing align pair at mutation site.""" 29 | 30 | ref_pos: int 31 | ref_base: str 32 | cf_pos: int 33 | cf_base: str 34 | qual_site: Optional[int] = None 35 | qual_local: Optional[int] = None 36 | 37 | def __repr__(self): 38 | return f"{self.ref_base}({self.ref_pos})->{self.cf_base}({self.cf_pos})" 39 | 40 | 41 | def run_align(reference: str, query: str) -> List[SitePair]: 42 | """Align query sequence with reference sequence. 43 | 44 | Args: 45 | reference (str): The reference sequence. 46 | query (str): The query sequence. 47 | 48 | Returns: 49 | List[SitePair]: A list of SitePair objects representing alignment. 50 | """ 51 | aligner = ssw.Aligner() 52 | alignment = aligner.align(reference=reference, query=query) 53 | results = [] 54 | query_pos = alignment.query_begin 55 | ref_pos = alignment.reference_begin 56 | for query_base, _, ref_base in zip(*alignment.alignment): 57 | results.append( 58 | SitePair( 59 | ref_pos=ref_pos, 60 | ref_base=ref_base, 61 | cf_pos=query_pos, 62 | cf_base=query_base, 63 | ) 64 | ) 65 | if query_base != "-": 66 | query_pos += 1 67 | if ref_base != "-": 68 | ref_pos += 1 69 | return results 70 | 71 | 72 | def get_quality(pos: int, query_record: SeqRecord, flank_base_num=0) -> Tuple[int, int]: 73 | """get quality of site and local region. 74 | 75 | change flank_base_num to number gt 0 to get mean qual within region 76 | """ 77 | qual = query_record.letter_annotations["phred_quality"] 78 | qual_site = qual[pos - 1] 79 | qual_flank = qual[ 80 | max(0, pos - 1 - flank_base_num) : min(len(qual), pos + flank_base_num) 81 | ] 82 | qual_local = int(sum(qual_flank) / len(qual_flank)) 83 | return qual_site, qual_local 84 | 85 | 86 | def align_chromatograph( 87 | query_record: SeqRecord, subject_record: SeqRecord 88 | ) -> List[SitePair]: 89 | """run align. 90 | 91 | @return: list of SitePair about all sites 92 | """ 93 | sitepairs = run_align( 94 | reference=str(subject_record.seq), query=str(query_record.seq) 95 | ) 96 | LOGGER.info(f"{query_record.name}: Total aligned number: {len(sitepairs)}") 97 | for site in sitepairs: 98 | site.qual_site, site.qual_local = get_quality( 99 | site.cf_pos, query_record, flank_base_num=5 100 | ) 101 | LOGGER.debug(f"{site}\tlocal:{site.qual_local}\tsite:{site.qual_site}") 102 | return sitepairs 103 | 104 | 105 | def call_mutations( 106 | query_record: SeqRecord, 107 | subject_record: SeqRecord, 108 | report_all_sites: bool = False, 109 | ) -> List[SitePair]: 110 | """run align and call mutations. 111 | 112 | @return: list of SitePair about mutation sites 113 | """ 114 | sitepairs = align_chromatograph(query_record, subject_record) 115 | mutations = [] 116 | for site in sitepairs: 117 | if report_all_sites: 118 | mutations.append(site) 119 | LOGGER.debug(f"Site ({site}) is reported!") 120 | else: 121 | if site.ref_base != site.cf_base: 122 | mutations.append(site) 123 | LOGGER.debug(f"Site ({site}) is with mutation!") 124 | if not report_all_sites: 125 | LOGGER.info(f"{query_record.name}: Total mutation number: {len(mutations)}") 126 | return mutations 127 | -------------------------------------------------------------------------------- /cfutils/cli.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2019 yech 5 | # 6 | # Distributed under terms of the MIT license. 7 | 8 | """Chromatogram File Utils. 9 | 10 | wrap cfutils into cli app 11 | - update in 20190405 12 | """ 13 | 14 | import click 15 | from cfutils.run import report_mutation 16 | 17 | 18 | @click.group() 19 | @click.option("--debug/--no-debug", default=False) 20 | def cli(debug): 21 | """Chromatogram File Utils.""" 22 | if debug: 23 | click.echo("Debug mode is on") 24 | 25 | 26 | # call mutation 27 | @cli.command() 28 | @click.option("--query", prompt="QUERY (abi file): ", help="Query file in abi format") 29 | @click.option( 30 | "--subject", 31 | prompt="SUBJECT (fasta file): ", 32 | help="Subject file in fasta format as ref", 33 | ) 34 | @click.option("--outdir", default=None, required=False, help="Output directory") 35 | @click.option("--outbase", default=None, required=False, help="Output basename") 36 | @click.option( 37 | "--aligned/--mutated", 38 | default=False, 39 | help="Report all aligned sites or mutation sites only", 40 | ) 41 | @click.option( 42 | "--plot/--no-plot", 43 | default=False, 44 | help="Generate figure of mutation in chromatogram.", 45 | ) 46 | def mut(query, subject, outdir, outbase, aligned, plot): 47 | """do mutation calling, then report in tsv and pdf.""" 48 | report_mutation( 49 | query_ab1_file=query, 50 | subject_fasta_file=subject, 51 | output_dir=outdir, 52 | file_basename=outbase, 53 | report_all_sites=aligned, 54 | report_mut_plot=plot, 55 | ) 56 | -------------------------------------------------------------------------------- /cfutils/count.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2019 yech 5 | # 6 | # Distributed under terms of the MIT license. 7 | 8 | """Chromatogram File Utils. 9 | 10 | - update in 20190405 11 | """ 12 | 13 | import re 14 | import sys 15 | from datetime import date 16 | 17 | 18 | class CountMutations: 19 | def __init__(self): 20 | pass 21 | 22 | def _to_int(self, str): 23 | """Convert str to int if it can otherwise 0.""" 24 | tmp = 1 25 | try: 26 | tmp = int(str) 27 | except ValueError: 28 | pass 29 | 30 | return tmp 31 | 32 | def _get_date(self, seq_def): 33 | """Parses a sequence definition and extracts the date.""" 34 | parts = seq_def.split("|") 35 | 36 | # Probably no date if there is no | in the line 37 | if len(parts) == 1: 38 | return date(1800, 1, 1) 39 | 40 | try: 41 | y = int(parts[-4]) 42 | except ValueError: 43 | print("Failed on this definition line") 44 | sys.exit(-1) 45 | m = self._to_int(parts[-3]) 46 | d = self._to_int(parts[-2]) 47 | dt = date(y, m, d) 48 | return dt 49 | 50 | def _get_gi(self, seq_def): 51 | """Parses genbank id out.""" 52 | parts = seq_def.split("|") 53 | if len(parts) > 1: 54 | return parts[1] 55 | else: 56 | return seq_def 57 | 58 | def _get_name(self, seq_def): 59 | """Parses a sequence definition and extracts the name Assumed to be in 60 | the 2 column after split( '|' ) and between ()""" 61 | p = re.compile(r"\((.*)\)") 62 | parts = seq_def.split("|") 63 | if len(parts) > 1: 64 | m = p.search(parts[2]) 65 | return m.group(1) 66 | else: 67 | return seq_def 68 | 69 | def parse(self, mutations_file, cutoff_date): 70 | """Parse a mutations file generated by mutalign.py.""" 71 | fh = open(mutations_file) 72 | chart = [] 73 | mut_count = 0 74 | last_date = None 75 | inc = 1 76 | for line in fh: 77 | # Start new date and set counter to 0 78 | if not line.startswith("Q: "): 79 | # Set mutation count for last date 80 | if last_date: 81 | chart[-1][3] = mut_count 82 | 83 | # Get the new date and set it as last_date 84 | last_date = self._get_date(line) 85 | 86 | if last_date < cutoff_date: 87 | inc = -1 88 | else: 89 | inc = 1 90 | name = self._get_name(line) 91 | gi = self._get_gi(line) 92 | chart.append([gi, last_date, name, 0]) 93 | mut_count = 0 94 | # Else Count mutations 95 | mut_count += inc 96 | 97 | fh.close() 98 | 99 | chart[-1][3] = mut_count 100 | 101 | return chart 102 | 103 | def get_chart(self, mutations_file, cutoff_date): 104 | p = self.parse(mutations_file, cutoff_date) 105 | for gi, dt, name, num in p: 106 | print("%s,%s,%s,%s" % (gi, name, dt, num)) 107 | 108 | 109 | def parse_date(dte): 110 | if "-" in dte: 111 | p = dte.split("-") 112 | elif "/" in dte: 113 | p = dte.split("/") 114 | 115 | if len(p) == 3: 116 | return date(int(p[0]), int(p[1]), int(p[2])) 117 | -------------------------------------------------------------------------------- /cfutils/parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2019 yech 5 | # Distributed under terms of the MIT license. 6 | # 7 | # Copyright 2011 by Wibowo Arindrarto (w.arindrarto@gmail.com) 8 | # Revisions copyright 2011 by Peter Cock. 9 | # This code is part of the Biopython distribution and governed by its 10 | # license. Please see the LICENSE file that should have been included 11 | # as part of this package. 12 | # 13 | # Created: 2019-08-30 13:51 14 | 15 | 16 | """parser for the ABI format. 17 | 18 | Learned from Bio.SeqIO 19 | 20 | ABI is the format used by Applied Biosystem's sequencing machines to store 21 | sequencing results. 22 | 23 | For more details on the format specification, visit: 24 | http://www.appliedbiosystem.com/support/software_community/ABIF_File_Format.pdf 25 | """ 26 | 27 | import datetime 28 | import struct 29 | from pathlib import Path 30 | 31 | # dictionary for determining which tags goes into SeqRecord annotation 32 | # each key is tag_name + tag_number 33 | # if a tag entry needs to be added, just add its key and its key 34 | # for the annotations dictionary as the value 35 | _EXTRACT = { 36 | "TUBE1": "sample_well", 37 | "DySN1": "dye", 38 | "GTyp1": "polymer", 39 | "MODL1": "machine_model", 40 | } 41 | # dictionary for tags that require preprocessing before use in creating 42 | # seqrecords 43 | _SPCTAGS = [ 44 | "PBAS2", # base-called sequence 45 | "PCON2", # quality values of base-called sequence 46 | "SMPL1", # sample id inputted before sequencing run 47 | "RUND1", # run start date 48 | "RUND2", # run finish date 49 | "RUNT1", # run start time 50 | "RUNT2", # run finish time 51 | # NOTE: The following are used for trace data 52 | "PLOC2", # position of peaks 53 | "DATA1", # channel1 raw data 54 | "DATA2", # channel2 raw data 55 | "DATA3", # channel3 raw data 56 | "DATA4", # channel4 raw data 57 | "DATA9", # channel1 analyzed data 58 | "DATA10", # channel2 analyzed data 59 | "DATA11", # channel3 analyzed data 60 | "DATA12", # channel4 analyzed data 61 | "FWO_1", # base order for channels 62 | ] 63 | # dictionary for data unpacking format 64 | _BYTEFMT = { 65 | 1: "b", # byte 66 | 2: "s", # char 67 | 3: "H", # word 68 | 4: "h", # short 69 | 5: "i", # long 70 | 6: "2i", # rational, legacy unsupported 71 | 7: "f", # float 72 | 8: "d", # double 73 | 10: "h2B", # date 74 | 11: "4B", # time 75 | 12: "2i2b", # thumb 76 | 13: "B", # bool 77 | 14: "2h", # point, legacy unsupported 78 | 15: "4h", # rect, legacy unsupported 79 | 16: "2i", # vPoint, legacy unsupported 80 | 17: "4i", # vRect, legacy unsupported 81 | 18: "s", # pString 82 | 19: "s", # cString 83 | 20: "2i", # tag, legacy unsupported 84 | } 85 | # header data structure (exluding 4 byte ABIF marker) 86 | _HEADFMT = ">H4sI2H3I" 87 | # directory data structure 88 | _DIRFMT = ">4sI2H4I" 89 | 90 | 91 | class SeqRecord: 92 | def __init__(self, seq, id="", name="", description="", annotations=None, letter_annotations=None): 93 | self.seq = str(seq) # Ensure sequence is stored as a string 94 | self.id = id 95 | self.name = name 96 | self.description = description 97 | self.annotations = annotations if annotations is not None else {} 98 | self.letter_annotations = letter_annotations if letter_annotations is not None else {} 99 | 100 | def __getitem__(self, key): 101 | new_seq = self.seq[key] 102 | new_annotations = self.annotations.copy() 103 | new_letter_annotations = {k: v[key] for k, v in self.letter_annotations.items()} 104 | return SeqRecord(new_seq, self.id, self.name, self.description, new_annotations, new_letter_annotations) 105 | 106 | def __len__(self): 107 | return len(self.seq) 108 | 109 | def __str__(self): 110 | return self.seq # Return the sequence string for display 111 | 112 | 113 | def abi_iterator(handle): 114 | """Iterator for the Abi file format.""" 115 | 116 | # raise exception if handle mode is not 'rb' 117 | if hasattr(handle, "mode"): 118 | if set("rb") != set(handle.mode.lower()): 119 | raise ValueError("ABI files has to be opened in 'rb' mode.") 120 | 121 | # check if input file is a valid Abi file 122 | handle.seek(0) 123 | marker = handle.read(4) 124 | if not marker: 125 | # handle empty file gracefully 126 | raise StopIteration 127 | if marker != b"ABIF": 128 | raise IOError("File should start ABIF, not %r" % marker) 129 | 130 | # dirty hack for handling time information 131 | times = {"RUND1": "", "RUND2": "", "RUNT1": "", "RUNT2": ""} 132 | 133 | # initialize annotations 134 | annot = dict(zip(_EXTRACT.values(), [None] * len(_EXTRACT))) 135 | 136 | # parse header and extract data from directories 137 | header = struct.unpack(_HEADFMT, handle.read(struct.calcsize(_HEADFMT))) 138 | 139 | for tag_name, tag_number, tag_data in _abi_parse_header(header, handle): 140 | # stop iteration if all desired tags have been extracted 141 | # 4 tags from _EXTRACT + 2 time tags from _SPCTAGS - 3, 142 | # and seq, qual, id 143 | # todo 144 | 145 | key = tag_name + str(tag_number) 146 | 147 | # PBAS2 is base-called sequence 148 | if key == "PBAS2": 149 | seq = tag_data 150 | # PCON2 is quality values of base-called sequence 151 | elif key == "PCON2": 152 | qual = [ord(val) for val in tag_data] 153 | # PLOC2 is the location of peaks 154 | elif key == "PLOC2": 155 | peakamps = [float(val) for val in tag_data] 156 | annot["peak positions"] = peakamps 157 | # DATA1-DATA4 is raw channel 1-4 output, DATA9-12 the analyzed one 158 | elif key in ["DATA9", "DATA10", "DATA11", "DATA12"]: 159 | rawch = [float(val) for val in tag_data] 160 | annot["channel " + str(int(key[4:]) - 8)] = rawch 161 | # FWO_1 is the order of channels in bases 162 | elif key == "FWO_1": 163 | channelorders = tag_data 164 | annot["channels"] = channelorders 165 | # SMPL1 is sample id entered before sequencing run 166 | elif key == "SMPL1": 167 | sample_id = tag_data 168 | elif key in times: 169 | times[key] = tag_data 170 | else: 171 | # extract sequence annotation as defined in _EXTRACT 172 | if key in _EXTRACT: 173 | annot[_EXTRACT[key]] = tag_data 174 | 175 | # set time annotations 176 | annot["run_start"] = "%s %s" % (times["RUND1"], times["RUNT1"]) 177 | annot["run_finish"] = "%s %s" % (times["RUND2"], times["RUNT2"]) 178 | 179 | # use the file name as SeqRecord.name if available 180 | # try: 181 | file_name = Path(handle.name).stem 182 | # except: 183 | # file_name = "" 184 | 185 | record = SeqRecord( 186 | seq, # Use the sequence string directly 187 | id=sample_id, 188 | name=file_name, 189 | description="", 190 | annotations=annot, 191 | letter_annotations={"phred_quality": qual}, 192 | ) 193 | 194 | yield record 195 | 196 | 197 | def _abi_parse_header(header, handle): 198 | """Generator that returns directory contents.""" 199 | # header structure (after ABIF marker): 200 | # file version, tag name, tag number, 201 | # element type code, element size, number of elements 202 | # data size, data offset, handle (not file handle) 203 | head_elem_size = header[4] 204 | head_elem_num = header[5] 205 | head_offset = header[7] 206 | index = 0 207 | 208 | while index < head_elem_num: 209 | start = head_offset + index * head_elem_size 210 | # add directory offset to tuple 211 | # to handle directories with data size <= 4 bytes 212 | handle.seek(start) 213 | dir_entry = struct.unpack(_DIRFMT, handle.read(struct.calcsize(_DIRFMT))) + ( 214 | start, 215 | ) 216 | index += 1 217 | # only parse desired dirs 218 | key = dir_entry[0].decode() 219 | key += str(dir_entry[1]) 220 | if key in list(_EXTRACT.keys()) + _SPCTAGS: 221 | tag_name = dir_entry[0].decode() 222 | tag_number = dir_entry[1] 223 | elem_code = dir_entry[2] 224 | elem_num = dir_entry[4] 225 | data_size = dir_entry[5] 226 | data_offset = dir_entry[6] 227 | tag_offset = dir_entry[8] 228 | # if data size <= 4 bytes, data is stored inside tag 229 | # so offset needs to be changed 230 | if data_size <= 4: 231 | data_offset = tag_offset + 20 232 | handle.seek(data_offset) 233 | data = handle.read(data_size) 234 | yield tag_name, tag_number, _parse_tag_data(elem_code, elem_num, data) 235 | 236 | 237 | def _abi_trim(seq_record: SeqRecord) -> SeqRecord: 238 | """Trims the sequence using Richard Mott's modified trimming algorithm. 239 | 240 | seq_record - SeqRecord object to be trimmed. 241 | 242 | Trimmed bases are determined from their segment score, which is a 243 | cumulative sum of each base's score. Base scores are calculated from 244 | their quality values. 245 | 246 | More about the trimming algorithm: 247 | http://www.phrap.org/phredphrap/phred.html 248 | http://www.clcbio.com/manual/genomics/Quality_abif_trimming.html 249 | """ 250 | 251 | start = False # flag for starting position of trimmed sequence 252 | segment = 20 # minimum sequence length 253 | trim_start = 0 # init start index 254 | cutoff = 0.05 # default cutoff value for calculating base score 255 | 256 | if len(seq_record) <= segment: 257 | return seq_record 258 | else: 259 | # calculate base score 260 | score_list = [ 261 | cutoff - (10 ** (qual / -10.0)) 262 | for qual in seq_record.letter_annotations["phred_quality"] 263 | ] 264 | 265 | # calculate cummulative score 266 | # if cummulative value < 0, set it to 0 267 | # first value is set to 0, because of the assumption that 268 | # the first base will always be trimmed out 269 | cummul_score = [0] 270 | for i in range(1, len(score_list)): 271 | score = cummul_score[-1] + score_list[i] 272 | if score < 0: 273 | cummul_score.append(0) 274 | else: 275 | cummul_score.append(score) 276 | if not start: 277 | # trim_start = value when cummulative score is first > 0 278 | trim_start = i 279 | start = True 280 | 281 | # trim_finish = index of highest cummulative score, 282 | # marking the end of sequence segment with highest cummulative score 283 | trim_finish = cummul_score.index(max(cummul_score)) 284 | new_record = seq_record[trim_start:trim_finish] 285 | new_record.annotations = seq_record.annotations.copy() 286 | 287 | return new_record 288 | 289 | 290 | def _parse_tag_data(elem_code, elem_num, raw_data): 291 | """Returns single data value. 292 | 293 | elem_code - What kind of data 294 | elem_num - How many data points 295 | raw_data - abi file object from which the tags would be unpacked 296 | """ 297 | if elem_code in _BYTEFMT: 298 | # because '>1s' unpack differently from '>s' 299 | if elem_num == 1: 300 | num = "" 301 | else: 302 | num = str(elem_num) 303 | fmt = ">" + num + _BYTEFMT[elem_code] 304 | 305 | assert len(raw_data) == struct.calcsize(fmt) 306 | data = struct.unpack(fmt, raw_data) 307 | 308 | # no need to use tuple if len(data) == 1 309 | # also if data is date / time 310 | if elem_code not in [10, 11] and len(data) == 1: 311 | data = data[0] 312 | 313 | # account for different data types 314 | if elem_code == 2: 315 | return data.decode() 316 | if elem_code == 10: 317 | return str(datetime.date(*data)) 318 | if elem_code == 11: 319 | return str(datetime.time(*data[:3])) 320 | if elem_code == 13: 321 | return bool(data) 322 | if elem_code == 18: 323 | return data[1:].decode() 324 | if elem_code == 19: 325 | return data[:-1].decode() 326 | return data 327 | else: 328 | return None 329 | 330 | 331 | def trim_and_rescale_trace(seq): 332 | """Trim traces to peak positions, shift to start from zero, and rescale.""" 333 | 334 | traces = [seq.annotations["channel " + str(i)] for i in range(1, 5)] 335 | peaks = seq.annotations["peak positions"] 336 | n = len(peaks) 337 | step = 1.0 * (peaks[-1] - peaks[0]) / n 338 | 339 | traces = [ 340 | [t for (i, t) in enumerate(trace) if peaks[0] <= i < peaks[-1]] 341 | for trace in traces 342 | ] 343 | peaks = [(p - peaks[0]) / step for p in peaks] 344 | 345 | x = [1.0 * i / step for i in range(len(traces[0]))] 346 | 347 | seq.annotations["peak positions"] = peaks 348 | for i, trace in enumerate(traces, 1): 349 | seq.annotations["channel " + str(i)] = trace 350 | seq.annotations["trace_x"] = x 351 | return seq 352 | 353 | 354 | def rescale_trace(seq: SeqRecord) -> SeqRecord: 355 | traces = [seq.annotations["channel " + str(i)] for i in range(1, 5)] 356 | peaks = seq.annotations["peak positions"] 357 | n = len(peaks) 358 | step = 1.0 * (peaks[-1] - peaks[0]) / n 359 | traces = [ 360 | [t for (i, t) in enumerate(trace) if peaks[0] <= i < peaks[-1]] 361 | for trace in traces 362 | ] 363 | # peaks = [(p - peaks[0]) / step for p in peaks] 364 | peaks = [p / step for p in peaks] 365 | 366 | x = [1.0 * i / step for i in range(len(traces[0]))] 367 | 368 | seq.annotations["peak positions"] = peaks 369 | for i, trace in enumerate(traces, 1): 370 | seq.annotations["channel " + str(i)] = trace 371 | seq.annotations["trace_x"] = x 372 | return seq 373 | 374 | 375 | def parse_abi(filename: str) -> SeqRecord: 376 | """Parse an ABI file from Sanger sequencing.""" 377 | with open(filename, "rb") as abifile: 378 | seq = list(abi_iterator(abifile))[0] 379 | 380 | seq = rescale_trace(seq) 381 | return seq 382 | 383 | 384 | def parse_fasta(filename: str) -> SeqRecord: 385 | """parse_fasta may support other type of file in the future. 386 | 387 | :param filename: 388 | :type filename: str 389 | :rtype: SeqRecord 390 | """ 391 | with open(filename, "r") as file: 392 | lines = file.readlines() 393 | id_line = lines[0].strip() 394 | sequence = ''.join(line.strip() for line in lines[1:]) 395 | seq_id = id_line[1:] if id_line.startswith('>') else '' 396 | return SeqRecord(sequence, id=seq_id) 397 | -------------------------------------------------------------------------------- /cfutils/run.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2019 yech 5 | # 6 | # Distributed under terms of the MIT license. 7 | 8 | """Chromatogram File Utils. 9 | 10 | do some wrap functions 11 | """ 12 | 13 | import os 14 | from datetime import datetime 15 | from pathlib import Path 16 | from typing import List 17 | 18 | import matplotlib as mpl 19 | import matplotlib.pyplot as plt 20 | 21 | from cfutils.align import call_mutations 22 | from cfutils.parser import parse_abi, parse_fasta, SeqRecord 23 | from cfutils.show import annotate_mutation, highlight_base, plot_chromatograph 24 | 25 | from .utils import get_logger 26 | 27 | mpl.use("Agg", force=True) 28 | 29 | LOGGER = get_logger(__name__) 30 | 31 | 32 | def do_mutation_showing(query_record: SeqRecord, mutations: List, output_fig_file: str) -> None: 33 | """report mutations in pdf format.""" 34 | min_base_qual = 50 35 | min_local_qual = 20 36 | 37 | mutations = sorted(mutations, key=lambda x: x.cf_pos) 38 | flanking_size = 6 39 | windows_size = 30 40 | mutation_windows = [] 41 | start_pos = max(1, mutations[0].cf_pos - flanking_size) 42 | mutation_region = [] 43 | for idx, mut in enumerate(mutations): 44 | if mut.cf_pos + flanking_size <= start_pos + windows_size: 45 | mutation_region.append(mut) 46 | else: 47 | mutation_windows.append(mutation_region) 48 | start_pos = max(1, mutations[idx].cf_pos - flanking_size) 49 | mutation_region = [mut] 50 | mutation_windows.append(mutation_region) 51 | 52 | fig, axes = plt.subplots( 53 | len(mutation_windows), figsize=(20, 5 * len(mutation_windows)) 54 | ) 55 | for idx, mutation_region in enumerate(mutation_windows): 56 | if len(mutation_windows) == 1: 57 | ax = axes 58 | else: 59 | ax = axes[idx] 60 | region_start = max(1, mutation_region[0].cf_pos - flanking_size) 61 | plot_chromatograph( 62 | query_record, 63 | region=(region_start, region_start + windows_size), 64 | ax=ax, 65 | ) 66 | for mut in mutation_region: 67 | base_passed = ( 68 | mut.qual_site is not None 69 | and mut.qual_site >= min_base_qual 70 | and mut.qual_local is not None 71 | and mut.qual_local >= min_local_qual 72 | ) 73 | highlight_base(mut.cf_pos, query_record, ax, passed_filter=base_passed) 74 | annotate_mutation(mut, query_record, ax) 75 | fig.savefig(output_fig_file, bbox_inches="tight") 76 | 77 | 78 | def report_mutation( 79 | query_ab1_file, 80 | subject_fasta_file, 81 | output_dir=None, 82 | file_basename=None, 83 | report_all_sites=False, 84 | report_mut_plot=False, 85 | ): 86 | """reprot mutation within region.""" 87 | if output_dir is None: 88 | output_dir = os.path.join( 89 | os.getcwd(), 90 | "CFresult_" + datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), 91 | ) 92 | os.makedirs(output_dir, exist_ok=True) 93 | 94 | if file_basename is None: 95 | file_basename = ( 96 | Path(query_ab1_file).stem + "_vs_" + Path(subject_fasta_file).stem 97 | ) 98 | 99 | query_record = parse_abi(query_ab1_file) 100 | subject_record = parse_fasta(subject_fasta_file) 101 | 102 | sites = call_mutations( 103 | query_record, 104 | subject_record, 105 | report_all_sites=report_all_sites, 106 | ) 107 | # save mutation / alignment to tsv file 108 | with open(os.path.join(output_dir, file_basename + ".tsv"), "w") as f_mut: 109 | header = [ 110 | "RefLocation", 111 | "RefBase", 112 | "CfLocation", 113 | "CfBase", 114 | "SiteQual", 115 | "LocalQual", 116 | ] 117 | f_mut.write("\t".join(header) + "\n") 118 | for site in sites: 119 | f_mut.write( 120 | f"{site.ref_pos}\t{site.ref_base}\t{site.cf_pos}\t{site.cf_base}\t{site.qual_site}\t{site.qual_local}\n" 121 | ) 122 | 123 | # do forget to filter mutation for plot 124 | if report_all_sites: 125 | mutations = [s for s in sites if s.ref_base != s.cf_base] 126 | LOGGER.info(f"{query_record.name}: Mutation number for plot: {len(mutations)}") 127 | else: 128 | mutations = sites 129 | 130 | # show mutation in pdf file 131 | if mutations and report_mut_plot: 132 | output_fig_file = os.path.join(output_dir, file_basename + ".pdf") 133 | do_mutation_showing(query_record, mutations, output_fig_file) 134 | -------------------------------------------------------------------------------- /cfutils/show.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2019 yech 5 | # 6 | # Distributed under terms of the MIT license. 7 | 8 | """ 9 | Chromatogram File Utils. 10 | 11 | show alignment with matplotlib 12 | 13 | author: Fabio Zanini 14 | date: 09/12/13 15 | content: Plot functions for Sanger chromatographs. 16 | modified: By Ye Chang in 2018-05-14 17 | """ 18 | 19 | from collections import defaultdict 20 | from typing import Optional, Tuple 21 | 22 | import matplotlib as mpl 23 | import matplotlib.pyplot as plt 24 | from matplotlib.axes import Axes 25 | 26 | from .align import SitePair, align_chromatograph 27 | from .parser import SeqRecord # Import the custom SeqRecord class 28 | from .utils import get_logger, reverse_complement 29 | 30 | LOGGER = get_logger(__name__) 31 | 32 | 33 | def plot_chromatograph( 34 | seq: SeqRecord, 35 | region: Optional[Tuple[int, int]] = None, 36 | ax: Optional[Axes] = None, 37 | color_map: Optional[dict] = None, 38 | show_bases: bool = True, 39 | show_positions: bool = True, 40 | show_rc: bool = False, 41 | ) -> Axes: 42 | """ 43 | Plot Sanger chromatograph. 44 | 45 | region: include both start and end (1-based) 46 | """ 47 | if ax is None: 48 | ax = plt.gca() 49 | # _, ax = plt.subplots(1, 1, figsize=(16, 6)) 50 | 51 | if seq is None: 52 | return ax 53 | 54 | if region is None: 55 | # turn into 0 based for better indexing 56 | region_start, region_end = 0, len(seq) 57 | else: 58 | region_start = max(region[0], 0) 59 | region_end = min(region[1], len(seq) - 1) 60 | 61 | _colors = defaultdict(lambda: "purple", {"A": "g", "C": "b", "G": "k", "T": "r"}) 62 | if color_map is not None: 63 | _colors.update(color_map) 64 | 65 | # Get signals 66 | peaks = seq.annotations["peak positions"] 67 | trace_x = seq.annotations["trace_x"] 68 | traces_y = [seq.annotations["channel " + str(i)] for i in range(1, 5)] 69 | bases = seq.annotations["channels"] 70 | 71 | xlim_left, xlim_right = peaks[region_start] - 1, peaks[region_end] + 0.5 72 | 73 | # Ensure seq is treated as a string 74 | sequence_str = seq.seq 75 | 76 | # subset peak and sequence 77 | # TODO: this might fix the bug 78 | peak_start = peaks[0] 79 | peak_zip = [ 80 | (p, s) 81 | for i, (p, s) in enumerate(zip(peaks, sequence_str)) 82 | if region_start <= i <= region_end 83 | ] 84 | peaks, sequence_str = list(zip(*peak_zip)) 85 | 86 | # subset trace_x and traces_y together 87 | trace_zip = [ 88 | (x + peak_start, *ys) 89 | for x, *ys in zip(trace_x, *traces_y) 90 | if xlim_left <= x <= xlim_right 91 | ] 92 | if not trace_zip: 93 | return ax 94 | trace_x, *traces_y = list(zip(*trace_zip)) 95 | 96 | # Plot traces 97 | trmax = max(map(max, traces_y)) 98 | for base in bases: 99 | chanel_index = bases.index(base) 100 | trace_y = [1.0 * ci / trmax for ci in traces_y[chanel_index]] 101 | if show_rc: 102 | base = reverse_complement(base) 103 | ax.plot(trace_x, trace_y, color=_colors[base], lw=2, label=base) 104 | ax.fill_between(trace_x, 0, trace_y, facecolor=_colors[base], alpha=0.125) 105 | 106 | # Plot bases at peak positions 107 | if show_bases: 108 | for i, peak in enumerate(peaks): 109 | b = reverse_complement(sequence_str[i]) if show_rc else sequence_str[i] 110 | ax.text( 111 | peak, 112 | -0.11, 113 | b, 114 | color=_colors[b], 115 | va="center", 116 | ha="center", 117 | alpha=0.66, 118 | fontsize="x-large", 119 | fontweight="bold", 120 | ) 121 | ax.set_ylim(bottom=-0.15, top=1.05) 122 | else: 123 | ax.set_ylim(bottom=-0.05, top=1.05) 124 | 125 | # peaks[0] - max(2, 0.02 * (peaks[-1] - peaks[0])), 126 | # right=peaks[-1] + max(2, 0.02 * (peaks[-1] - peaks[0])), 127 | ax.set_xlim(xlim_left + 0.5, xlim_right) 128 | 129 | if show_positions: 130 | ax.set_xticks(peaks) 131 | ax.set_xticklabels(list(range(region_start + 1, region_end + 2))) 132 | else: 133 | ax.set_xticks([]) 134 | 135 | if show_rc: 136 | ax.invert_xaxis() 137 | 138 | # hide y axis 139 | ax.set_yticklabels([]) 140 | ax.get_yaxis().set_visible(False) 141 | # hide border 142 | ax.spines["left"].set_visible(False) 143 | ax.spines["right"].set_visible(False) 144 | ax.spines["top"].set_visible(False) 145 | # hide grid 146 | ax.grid(False) 147 | # set legend 148 | ax.legend(loc="upper left", bbox_to_anchor=(0.95, 0.99)) 149 | return ax 150 | 151 | 152 | def show_reference( 153 | query_record: SeqRecord, 154 | subject_record: SeqRecord, 155 | ax: Axes, 156 | ref_central: Optional[int] = None, 157 | ) -> Axes: 158 | """ 159 | show the reference of the chromatograph. 160 | 161 | design: if location is not proviode, do the alignment first 162 | @param seq: input SeqRecord of ref 163 | """ 164 | 165 | sitepairs = align_chromatograph(query_record, subject_record) 166 | sitepairs_indexing = {s.cf_pos: s for s in sitepairs} 167 | cf_sites = [int(i.get_text()) for i in ax.get_xticklabels()] 168 | matched_sitepairs = [sitepairs_indexing[pos] for pos in cf_sites] 169 | for i, peak in enumerate(ax.get_xticks()): 170 | ax.text( 171 | peak, 172 | 1.05, 173 | matched_sitepairs[i].ref_base, 174 | color="dimgrey", 175 | va="bottom", 176 | ha="center", 177 | alpha=0.85, 178 | fontsize="xx-large", 179 | fontweight="bold", 180 | clip_on=False, 181 | ) 182 | if ref_central is not None: 183 | ref_pos = matched_sitepairs[i].ref_pos - ref_central 184 | else: 185 | ref_pos = matched_sitepairs[i].ref_pos 186 | ax.text( 187 | peak, 188 | 1.12, 189 | ref_pos, 190 | color="dimgrey", 191 | va="bottom", 192 | ha="center", 193 | alpha=0.85, 194 | fontsize="medium", 195 | fontweight="normal", 196 | clip_on=False, 197 | ) 198 | return ax 199 | 200 | 201 | def highlight_base( 202 | pos_highlight: int, seq: SeqRecord, ax: Axes, passed_filter=True 203 | ) -> Axes: 204 | """ 205 | Highlight the area around a peak with a rectangle. 206 | """ 207 | 208 | peaks = seq.annotations["peak positions"] 209 | peak = peaks[pos_highlight - 1] 210 | 211 | xmin, xmax = ax.get_xlim() 212 | if not xmin <= peak < xmax: 213 | raise ValueError("peak not within plot bounds") 214 | 215 | if pos_highlight == 1: 216 | xmin = -0.5 217 | else: 218 | xmin = 0.5 * (peaks[pos_highlight - 1] + peaks[pos_highlight - 2]) 219 | 220 | if pos_highlight == len(peaks): 221 | xmax = -0.5 222 | else: 223 | xmax = 0.5 * (peaks[pos_highlight - 1] + peaks[pos_highlight]) 224 | ymin, ymax = ax.get_ylim() 225 | 226 | if passed_filter: 227 | fcolor = "yellow" 228 | else: 229 | fcolor = "grey" 230 | rec = mpl.patches.Rectangle( 231 | (xmin, ymin), 232 | (xmax - xmin), 233 | (ymax - ymin), 234 | edgecolor="none", 235 | facecolor=fcolor, 236 | alpha=0.3, 237 | ) 238 | ax.add_patch(rec) 239 | return ax 240 | 241 | 242 | def annotate_mutation(mut: SitePair, seq: SeqRecord, ax) -> Axes: 243 | """ 244 | Annotate mutation pattern chromatograph position. 245 | """ 246 | peaks = seq.annotations["peak positions"] 247 | peak = peaks[mut.cf_pos - 1] 248 | ax.text( 249 | peak, 250 | 0.99, 251 | f"{mut.ref_base}{mut.ref_pos}{mut.cf_base}", 252 | color="c", 253 | fontsize="large", 254 | fontweight="bold", 255 | rotation=45, 256 | ha="center", 257 | va="center", 258 | ) 259 | return ax 260 | -------------------------------------------------------------------------------- /cfutils/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2019 yech 5 | # Distributed under terms of the MIT license. 6 | # 7 | # Created: 2019-05-27 21:19 8 | 9 | """shared function for package.""" 10 | 11 | import logging 12 | import sys 13 | 14 | try: 15 | assert sys.version_info > (3, 6) 16 | except AssertionError: 17 | raise RuntimeError("cfutils requires Python 3.6+!") 18 | 19 | 20 | def get_logger(name: str) -> logging.Logger: 21 | """global logging.""" 22 | logger: logging.Logger = logging.getLogger(name) 23 | if not logger.handlers: 24 | handler: logging.StreamHandler = logging.StreamHandler() 25 | formatter: logging.Formatter = logging.Formatter( 26 | "%(asctime)s %(name)-12s %(levelname)-8s %(message)s" 27 | ) 28 | handler.setFormatter(formatter) 29 | logger.addHandler(handler) 30 | # logger.setLevel(logging.DEBUG) 31 | logger.setLevel(logging.INFO) 32 | return logger 33 | 34 | 35 | LOGGER: logging.Logger = get_logger(__name__) 36 | 37 | 38 | def evenchunks(string, chunksize=10): 39 | out = [] 40 | for i in range(0, len(string), chunksize): 41 | end = i + chunksize 42 | out.append(string[i:end]) 43 | return out 44 | 45 | 46 | def chunked_lines(string, chunksize=10, chunks_per_line=5, spacer=" "): 47 | chunks = evenchunks(string, chunksize) 48 | lines = [] 49 | while chunks: 50 | lines.append(spacer.join(chunks[:chunks_per_line])) 51 | del chunks[:chunks_per_line] 52 | return lines 53 | 54 | 55 | def reverse_complement(dna): 56 | "Return the reverse complement of a DNA sequence." 57 | return dna.translate(str.maketrans("ATCG", "TAGC"))[::-1] 58 | -------------------------------------------------------------------------------- /data/B5-M13R_B07.ab1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/y9c/cfutils/a34e263d38ccba3821ff5c5b79df9988903c0ba6/data/B5-M13R_B07.ab1 -------------------------------------------------------------------------------- /data/B5-M13R_B07_vs_ref.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/y9c/cfutils/a34e263d38ccba3821ff5c5b79df9988903c0ba6/data/B5-M13R_B07_vs_ref.pdf -------------------------------------------------------------------------------- /data/B5-M13R_B07_vs_ref.tsv: -------------------------------------------------------------------------------- 1 | RefLocation RefBase CfLocation CfBase SiteQual LocalQual 2 | 20 A 66 N 52 48 3 | 21 C 67 N 52 47 4 | 22 G 68 N 52 45 5 | 23 G 69 N 45 45 6 | 24 A 70 N 35 45 7 | 25 G 71 N 35 45 8 | 26 A 72 N 35 43 9 | 27 C 73 N 35 42 10 | 28 C 74 N 52 41 11 | 29 G 75 N 52 40 12 | 30 A 76 N 52 40 13 | 31 A 77 N 35 40 14 | 32 G 78 N 44 41 15 | 33 G 79 N 35 42 16 | 34 A 80 a 35 41 17 | 35 G 81 g 35 41 18 | 36 A 82 a 40 41 19 | 60 A 106 T 55 53 20 | 61 C 107 G 55 54 21 | 169 A 215 G 55 54 22 | 170 A 216 G 55 54 23 | 172 A 218 G 55 55 24 | 177 T 223 C 55 55 25 | 391 G 437 A 55 55 26 | 592 A 638 C 55 53 27 | 1037 G 1083 - 30 24 28 | 1044 C 1089 - 38 25 29 | 1068 A 1112 - 30 27 30 | 1079 G 1122 - 30 22 31 | 1089 C 1131 - 16 15 32 | -------------------------------------------------------------------------------- /data/data_file: -------------------------------------------------------------------------------- 1 | some data 2 | 3 | by yc 4 | -------------------------------------------------------------------------------- /data/matplotlib_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/y9c/cfutils/a34e263d38ccba3821ff5c5b79df9988903c0ba6/data/matplotlib_example.png -------------------------------------------------------------------------------- /data/plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/y9c/cfutils/a34e263d38ccba3821ff5c5b79df9988903c0ba6/data/plot.png -------------------------------------------------------------------------------- /data/ref.fa: -------------------------------------------------------------------------------- 1 | >3k 2 | CGCCTCCCTCGCGCCATCAGNNNNNNNNNNNNNNagaAGCTTACTAACCAGCCAACTAGCTGGCTAGCAGGTAAACCTGCCAGCCTGCCGGCTCAGGTGAGCCAGTTAGTAGGCAAGTAAGCTCACCTGTAGGGGCTTTGGAGCAGGTATTGGAGTACAGGTGTAGGTTGGAGTTAGCCAGTAGGTTCACCTGATTACCCTGTTATCCCTACAGGTGAGCAGGCTAGCAAGTAGGTTCCAATGCCGGCTGGTAAGCATACCAACTCCAAAGTTCACCTGCAGGTGTAGGTACCTAGGCACCTGCACCTGGGCATAGGTGCTCCTAAGCTAGCAAACCGGTACCTATACTCAGGTGAGCTAGCAAGCTCAGGTGTAGGGATAACAGGGTAATAGCTAACCTACTAGTTGGCTAACCCCAACCAATACTTAGGAGCTGGCAGGCTAGTTTACTAGCTCAGGTGCAGGTGAGTAAGTACACCTGTGCCAGTAAGCACCTAAGCCAACCAGCCCAGGTGAGCCAACTTGCTGGCAAACCTACTGGTATACCATTACCCTGTTATCCCTAAGCTGGTAAGCTTACCCCTATACTCACCTGTGCCAGCCCAGGTGAGCAAGTTGGTATACCCACCTGCAGGTGAGTAGGCTAGTAAGCTAGCTAGTATGCTAGCTGGTTAGTTTGCCGGCTGGCTCCAAAACTAGTTGGTTGGCTCAGGTGTGCCGGTTTAGGGATAACAGGGTAATTGCTCCTACAGGTGAGTAGGCTTACCAGCTCAGGTGAGCAAGCTTGCTCCAATAGGTAGGTTGGAGCATGCCAGTTAGCTTTGGAGCTCAGGTGAGTTTGCCAGTAGGTAAACTAGTATACTTGCTAGCTGGCAAGCCGGTTAGTAGGCTCCTAATTACCCTGTTATCCCTACCAAAACCTGCCCCTAAGCTAGTATAGGAGCCGGTTAGCCAACCAGTACCAACCTAAGCACACCTGAGCTAGCAAACTAGTACCTATACTTGCCAGCAGGCTAGCTTACCAGTAAGTAGGCACAGGTGTGCCCCTAAGCCAGCTGGCAAGCTTAGGGATAACAGGGTAATGGCTGGCTTGCCAGCAGGTTTACCAACTAACCTAGGAACCAACTAACTTGCTCCAAAGCAAGCAAACTCACCTGGGCATGCCCCTAAGCTAGTAAACCCAGGTGAGCAGGTAGGTAAGTTTACCAGCCAACTTACCCAGGTGAACCAGTTCACCTGATTACCCTGTTATCCCTATGCTAGCATACTTGCTTGCCGGCATGCTTGCTAGTACCAAAACTAGCTGGTTGGCACAGGTGGGCTTGCTTAGGCACCTGAGCAGGCAGGCTAGTACCTAAGCCAACCGGCAAGTAAGTTAGTAGGCTCCAAAGTTCAGGTGTTGGAGTTAACTTAGGGATAACAGGGTAATAGTAGGTAGGTTAGCTGGTTAGTAAGCTTGCCTTGGAGCTTGCTAGTTTGCTAGTTTACCAACTAACCGGCAAGTTAACTTTGGCACCTGTTGGTAGGCCTAAGCTTGCCAGCCCACCTGAACCTGCCCAGGTGGGCACACCTGAGTATGCCTTGGATTACCCTGTTATCCCTAAGCACACCTGAGCAAGCTAGTACAGGTGCACCTGCAGGTGCCTACACCTGGGTAGGCTAACTCACCTGTGCCTGCCTGCTGGCACACCTGAACTGGTTGGCACCTATGCCAGCTTGCCAACCGGCTTAGGTAGGTACCAGCCGGTATACTAGCTAACTAACCTAGGGATAACAGGGTAATCACCTGAGTAAACCCCTAGGTAAGTACAGGTGTACCAGCTGGTTGGTTCCAACCTAAGCTTTGGTTGGTGCCGGCTGGTTTACCGGTATACTCCAACACCTGAGCTGGTACCTAGGCTTACTCACCTGCAGGTGGGCTGGTACCTATGCCAACCAACCATTACCCTGTTATCCCTACACCTGTTGGAGCTTTGGCACCTGAGCACACCTGGGCTGGCATGCTTAGGCACCTGGGTAGGCTTAGGCAGGTGAGCAGGCTAGCTGGTAGGTTAGCCGGTACACCTGAGTTTACTCAGGTGCCTAAGCTGGTTTAGGAGCTGGTATAGGGGCATTGGAGCATAGGGATAACAGGGTAATGGCTGGCAGGTTAACCAACTAACCAACTCCTAAGCCGGTAGGCTAGCTAGCATACCTGCTAGCCCCAACACCTGTACCAGCAGGCAAGCTGGCTCCTAAACTAGTACAGGTGAACCTGCCGGCTAGCTAGCTTAGGGGCTAGCCAGTAGGTTATTACCCTGTTATCCCTAAGCTAGCCTGCCAGCTCCTATGCTAGTTAGCAAGCTGGTAGGCTGGCTAGCCTGCCTACTTACCGGTTGGTAGGTAAACCCACCTGAGCATGCCGGTATGCCTAGGGGCTTGCCTGCCAGCCAACCTAGGTGCTGGCACCTATGCCTACTTAGGGATAACAGGGTAATAACTGGCTCCAACACCTGTACTAGCAAGCTTGCCAGCAAGTATAGGCACCTGAGCTAACTAGCTTAGGAACCCACCTGGGCATAGGAACCAGCTAGTTAGCTCCAAAGCTAACCCCTAGGTTGGTTTGCCAGCACACCTGTACTTACCCACCTGTACTATTACCCTGTTATCCCTAAGTTAACTCCTAAGCCCACCTGTACCAACCAGTAGGCATTGGAGTTGGCTGGTACCTAGGCTGGCTAGCCAGCTGGTAAGCAAGCAAGTTTACCCAGGTGGGCTCCTACAGGTGAGCTCCTAAGCTCACCTGGGTACCAAGGCTGGCAAGCAAGCCTAGGGATAACAGGGTAATAGCTGGCTAGTTGGTAGGCTAGCTTAGGGGCTGGCTAACCAGCAGGTAAGTAAGCACCAAAGCAGGTTGGTAAACCTTGGCAGGTGAGTTGGCTAGCTTTGGAACTAGCCAGTTTACCTAGGAACTAGTTCCTAAGCTAGTAGGTTAGTAtctacacaaggaacaaacactggatgtcactttcagttcaaattgtaacgctaatcactccgaacaggtcac 3 | -------------------------------------------------------------------------------- /docs/CNAME: -------------------------------------------------------------------------------- 1 | cfutils.yech.science -------------------------------------------------------------------------------- /docs/_config.yml: -------------------------------------------------------------------------------- 1 | remote_theme: just-the-docs/just-the-docs 2 | ga_tracking: G-EFHY8WFMHK 3 | ga_tracking_anonymize_ip: true 4 | color_scheme: light 5 | favicon_ico: '/favicon.ico' 6 | footer_content: 'Copyright © 2021-2024, Chang Ye' 7 | 8 | callouts: 9 | warning: 10 | title: Warning 11 | color: red 12 | note: 13 | title: Note 14 | color: green 15 | 16 | plugins: 17 | - jekyll-spaceship 18 | -------------------------------------------------------------------------------- /docs/api.md: -------------------------------------------------------------------------------- 1 | # API Reference 2 | 3 | ## cfutils.run.report_mutation 4 | 5 | ```python 6 | def report_mutation( 7 | query_ab1_file: str, 8 | subject_fasta_file: str, 9 | output_dir: Optional[str] = None, 10 | file_basename: Optional[str] = None, 11 | report_all_sites: bool = False, 12 | report_mut_plot: bool = False, 13 | ) -> None: 14 | """Report mutation within region, optionally generate plot and TSV output.""" 15 | ``` 16 | 17 | - **query_ab1_file**: Path to the ABI file (Sanger sequencing data) 18 | - **subject_fasta_file**: Path to the reference FASTA file 19 | - **output_dir**: Output directory for results (optional) 20 | - **file_basename**: Output file basename (optional) 21 | - **report_all_sites**: If True, report all aligned sites; otherwise, only mutation sites 22 | - **report_mut_plot**: If True, generate a PDF plot of the mutation region 23 | 24 | --- 25 | 26 | ## cfutils.parser.parse_abi 27 | 28 | ```python 29 | def parse_abi(path: str) -> SeqRecord: 30 | """Parse an ABI file and return a SeqRecord object.""" 31 | ``` 32 | 33 | --- 34 | 35 | ## cfutils.show.plot_chromatograph 36 | 37 | ```python 38 | def plot_chromatograph( 39 | seq: SeqRecord, 40 | region: Optional[Tuple[int, int]] = None, 41 | ax: Optional[matplotlib.axes.Axes] = None, 42 | show_bases: bool = True, 43 | show_positions: bool = True, 44 | color_map: Optional[dict] = None, 45 | ) -> matplotlib.axes.Axes: 46 | """Plot a chromatogram for a given sequence region.""" 47 | ``` 48 | 49 | --- 50 | 51 | For more details, see the source code and docstrings in each module. 52 | -------------------------------------------------------------------------------- /docs/cli.md: -------------------------------------------------------------------------------- 1 | # Command Line Interface (CLI) 2 | 3 | ## Detect and visualize mutations in one step 4 | 5 | ```bash 6 | cfutils mut --query ./data/B5-M13R_B07.ab1 --subject ./data/ref.fa --outdir ./data/ --plot 7 | ``` 8 | 9 | ## For help on CLI options 10 | 11 | ```bash 12 | cfutils mut --help 13 | ``` 14 | 15 | ## Running Tests 16 | 17 | To run all tests: 18 | 19 | ```bash 20 | make test 21 | ``` 22 | 23 | To clean up build and cache files: 24 | 25 | ```bash 26 | make clean 27 | ``` 28 | -------------------------------------------------------------------------------- /docs/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/y9c/cfutils/a34e263d38ccba3821ff5c5b79df9988903c0ba6/docs/favicon.ico -------------------------------------------------------------------------------- /docs/features.md: -------------------------------------------------------------------------------- 1 | # Features 2 | 3 | - Visualize Sanger sequencing chromatograms 4 | - Call and report mutations by alignment 5 | - Trim and rescale traces 6 | - CLI and Python API support 7 | - Export results as TSV and PDF 8 | - Highlight and annotate bases and mutations 9 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Home 3 | layout: home 4 | nav_order: 1 5 | description: "Chromatogram File Utils: Sanger sequencing visualization, alignment, mutation calling, and more." 6 | permalink: / 7 | --- 8 | 9 | # Chromatogram File Utils 10 | {: .fs-9 } 11 | 12 | A toolkit for Sanger sequencing data: visualize chromatograms, call and report mutations, trim and rescale traces, and more. 13 | {: .fs-6 .fw-300 } 14 | 15 | [Get Started](installation.md){: .btn .btn-primary .fs-5 .mb-4 .mb-md-0 .mr-2 } 16 | [View on GitHub](https://github.com/y9c/cfutils){: .btn .fs-5 .mb-4 .mb-md-0 } 17 | 18 | --- 19 | 20 | {: .warning } 21 | > This documentation is for the latest development version of cfutils. For stable releases, see the [GitHub Releases](https://github.com/y9c/cfutils/releases). 22 | 23 | cfutils is a Python toolkit and CLI for Sanger sequencing data analysis. It supports: 24 | 25 | - Chromatogram visualization 26 | - Mutation calling and reporting 27 | - CLI and Python API 28 | - Export to PDF/TSV 29 | - Easy integration with bioinformatics workflows 30 | 31 | ## Quick links 32 | 33 | - [Installation](installation.md) 34 | - [CLI Usage](cli.md) 35 | - [API Reference](api.md) 36 | 37 | {: .note } 38 | > cfutils is open source and welcomes contributions! See the [GitHub repo](https://github.com/y9c/cfutils) for details. 39 | 40 | --- 41 | 42 | ## About the project 43 | 44 | cfutils is © 2019-{{ "now" | date: "%Y" }} by [Chang Ye](https://github.com/y9c). 45 | 46 | ### License 47 | 48 | cfutils is distributed under the [MIT license](https://github.com/y9c/cfutils/blob/master/LICENSE). 49 | -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | ## From PyPI 4 | 5 | ```bash 6 | pip install --user cfutils 7 | ``` 8 | 9 | ## From source 10 | 11 | ```bash 12 | git clone git@github.com:y9c/cfutils.git 13 | cd cfutils 14 | make init 15 | ``` 16 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "cfutils" 3 | version = "0.0.0.dev62" 4 | description = "Chromatogram File Utils" 5 | authors = [{ name = "Ye Chang", email = "yech1990@gmail.com" }] 6 | requires-python = "~=3.9" 7 | readme = "README.md" 8 | license = "MIT" 9 | keywords = ["DNA", "mutation", "chromatogram", "biology"] 10 | dependencies = [ 11 | "click>=8.0.0,<9", 12 | "ssw>=0.4.1,<0.5", 13 | "matplotlib>=3.9.2,<4", 14 | "numpy>=2.0.2", 15 | ] 16 | 17 | [project.urls] 18 | Repository = "https://github.com/yech1990/cfutils" 19 | Documentation = "https://cf.readthedocs.io/" 20 | 21 | [project.scripts] 22 | cfutils = "cfutils.cli:cli" 23 | 24 | [build-system] 25 | requires = ["hatchling"] 26 | build-backend = "hatchling.build" 27 | 28 | [tool.hatch.build.targets.wheel] 29 | packages = ["cfutils"] 30 | 31 | [tool.black] 32 | line-length = 79 33 | 34 | [dependency-groups] 35 | dev = [ 36 | "pytest>=8.3.5", 37 | ] 38 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/y9c/cfutils/a34e263d38ccba3821ff5c5b79df9988903c0ba6/test/__init__.py -------------------------------------------------------------------------------- /test/__init__.py.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/y9c/cfutils/a34e263d38ccba3821ff5c5b79df9988903c0ba6/test/__init__.py.py -------------------------------------------------------------------------------- /test/test_advance.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Unit tests for cfutils advanced mutation reporting.""" 3 | 4 | import unittest 5 | import tempfile 6 | import os 7 | from cfutils.run import report_mutation 8 | 9 | class TestFunc(unittest.TestCase): 10 | """Test advanced mutation reporting in cfutils.""" 11 | 12 | def test_plot_mutation(self) -> None: 13 | """Test report_mutation with plot output enabled and check output files.""" 14 | with tempfile.TemporaryDirectory() as tmpdir: 15 | try: 16 | report_mutation( 17 | query_ab1_file="./data/B5-M13R_B07.ab1", 18 | subject_fasta_file="./data/ref.fa", 19 | output_dir=tmpdir, 20 | file_basename="test", 21 | report_all_sites=True, 22 | report_mut_plot=True, 23 | ) 24 | pdf_path = os.path.join(tmpdir, "test.pdf") 25 | tsv_path = os.path.join(tmpdir, "test.tsv") 26 | if not os.path.exists(tsv_path): 27 | print(f"Temp dir contents: {os.listdir(tmpdir)}") 28 | self.assertTrue(os.path.exists(tsv_path), f"Missing TSV: {tsv_path}") 29 | if os.path.exists(tsv_path): 30 | with open(tsv_path) as f: 31 | lines = f.readlines() 32 | # If there are mutations, PDF should exist 33 | if len(lines) > 1: 34 | self.assertTrue(os.path.exists(pdf_path), f"Missing PDF: {pdf_path} (but TSV has mutations)") 35 | else: 36 | if not os.path.exists(pdf_path): 37 | print(f"Warning: No mutations found, so PDF was not generated. TSV content: {lines}") 38 | except Exception as e: 39 | self.fail(f"report_mutation raised an exception: {e}") 40 | 41 | if __name__ == "__main__": 42 | unittest.main() 43 | -------------------------------------------------------------------------------- /test/test_align.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Unit tests for cfutils alignment functions.""" 3 | 4 | import unittest 5 | from cfutils.align import align_chromatograph, call_mutations 6 | from cfutils.parser import parse_abi, parse_fasta 7 | 8 | 9 | class TestAlignFunc(unittest.TestCase): 10 | """Test alignment and mutation calling in cfutils.align.""" 11 | 12 | def setUp(self) -> None: 13 | """Load test data for alignment tests.""" 14 | self.query_record = parse_abi("./data/B5-M13R_B07.ab1") 15 | self.subject_record = parse_fasta("./data/ref.fa") 16 | 17 | def test_align_chromatograph(self) -> None: 18 | """Test align_chromatograph returns a list of site pairs.""" 19 | sitepairs = align_chromatograph(self.query_record, self.subject_record) 20 | self.assertIsInstance(sitepairs, list) 21 | self.assertGreater(len(sitepairs), 0, "No site pairs found.") 22 | 23 | def test_call_mutations(self) -> None: 24 | """Test call_mutations returns a list of mutations.""" 25 | mutations = call_mutations(self.query_record, self.subject_record) 26 | self.assertIsInstance(mutations, list) 27 | # Optionally check mutation structure if known 28 | 29 | 30 | if __name__ == "__main__": 31 | unittest.main() 32 | -------------------------------------------------------------------------------- /test/test_basic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Basic environment and sanity tests for cfutils.""" 3 | 4 | import sys 5 | import unittest 6 | 7 | 8 | class TestBasicFunc(unittest.TestCase): 9 | """Test basic environment requirements for cfutils.""" 10 | 11 | def test_python_version(self) -> None: 12 | """Ensure Python version is >= 3.6.""" 13 | self.assertGreaterEqual(sys.version_info, (3, 6), "Python 3.6+ is required.") 14 | 15 | 16 | if __name__ == "__main__": 17 | unittest.main() 18 | -------------------------------------------------------------------------------- /test/test_parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Unit tests for cfutils parser functions.""" 3 | 4 | import unittest 5 | from cfutils.parser import parse_abi, parse_fasta 6 | 7 | class TestParserFunc(unittest.TestCase): 8 | """Test parsing functions in cfutils.parser.""" 9 | 10 | def test_parse_abi(self) -> None: 11 | """Test parse_abi returns a SeqRecord with expected attributes.""" 12 | record = parse_abi("./data/B5-M13R_B07.ab1") 13 | self.assertIsNotNone(record) 14 | self.assertTrue(hasattr(record, "seq"), "SeqRecord missing 'seq' attribute.") 15 | 16 | def test_parse_fasta(self) -> None: 17 | """Test parse_fasta returns a SeqRecord with expected attributes.""" 18 | record = parse_fasta("./data/ref.fa") 19 | self.assertIsNotNone(record) 20 | self.assertTrue(hasattr(record, "seq"), "SeqRecord missing 'seq' attribute.") 21 | 22 | if __name__ == "__main__": 23 | unittest.main() -------------------------------------------------------------------------------- /test/test_run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Unit tests for cfutils run module.""" 3 | 4 | import unittest 5 | import tempfile 6 | import os 7 | from cfutils.run import report_mutation 8 | 9 | class TestRunFunc(unittest.TestCase): 10 | """Test report_mutation in cfutils.run.""" 11 | 12 | def test_report_mutation(self) -> None: 13 | """Test report_mutation creates output files without error.""" 14 | with tempfile.TemporaryDirectory() as tmpdir: 15 | try: 16 | report_mutation( 17 | query_ab1_file="./data/B5-M13R_B07.ab1", 18 | subject_fasta_file="./data/ref.fa", 19 | output_dir=tmpdir, 20 | file_basename="test", 21 | report_all_sites=True, 22 | report_mut_plot=False, 23 | ) 24 | tsv_path = os.path.join(tmpdir, "test.tsv") 25 | if not os.path.exists(tsv_path): 26 | print(f"Temp dir contents: {os.listdir(tmpdir)}") 27 | self.assertTrue(os.path.exists(tsv_path), f"Missing TSV: {tsv_path}") 28 | except Exception as e: 29 | self.fail(f"report_mutation raised an exception: {e}") 30 | 31 | if __name__ == "__main__": 32 | unittest.main() -------------------------------------------------------------------------------- /test/test_show.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Unit tests for cfutils show module.""" 3 | 4 | import unittest 5 | import matplotlib.pyplot as plt 6 | from cfutils.parser import parse_abi, parse_fasta 7 | from cfutils.show import highlight_base, plot_chromatograph, annotate_mutation 8 | from cfutils.align import SitePair 9 | 10 | class TestShowFunc(unittest.TestCase): 11 | """Test visualization functions in cfutils.show.""" 12 | 13 | def setUp(self) -> None: 14 | """Set up test data and figure for plotting tests.""" 15 | self.query_record = parse_abi("./data/B5-M13R_B07.ab1") 16 | self.subject_record = parse_fasta("./data/ref.fa") 17 | self.fig, self.ax = plt.subplots(1, 1, figsize=(15, 6)) 18 | 19 | def test_plot_chromatograph(self) -> None: 20 | """Test plot_chromatograph function runs without error.""" 21 | plot_chromatograph(self.query_record, region=(10, 30), ax=self.ax) 22 | self.assertTrue(True) 23 | 24 | def test_highlight_base(self) -> None: 25 | """Test highlight_base overlays highlight on chromatograph.""" 26 | plot_chromatograph(self.query_record, region=(10, 20), ax=self.ax) 27 | highlight_base(14, self.query_record, self.ax) 28 | self.assertTrue(True) 29 | 30 | def test_annotate_mutation(self) -> None: 31 | """Test annotate_mutation overlays mutation annotation.""" 32 | mutation = SitePair(ref_pos=10, ref_base='A', cf_pos=14, cf_base='T') 33 | annotate_mutation(mutation, self.query_record, self.ax) 34 | self.assertTrue(True) 35 | 36 | def tearDown(self) -> None: 37 | """Close the matplotlib figure after each test.""" 38 | plt.close(self.fig) 39 | 40 | if __name__ == "__main__": 41 | unittest.main() 42 | --------------------------------------------------------------------------------