├── .flake8
├── .github
    └── workflows
    │   ├── python-package.yml
    │   └── python-publish.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CITATION.cff
├── LICENSE
├── README.md
├── docs
    ├── Makefile
    ├── api.rst
    ├── conf.py
    ├── index.rst
    ├── make.bat
    ├── prefix.csv
    └── spec.rst
├── pyproject.toml
├── src
    └── mavehgvs
    │   ├── __init__.py
    │   ├── exceptions.py
    │   ├── patterns
    │       ├── __init__.py
    │       ├── combined.py
    │       ├── dna.py
    │       ├── position.py
    │       ├── protein.py
    │       ├── rna.py
    │       └── util.py
    │   ├── position.py
    │   ├── py.typed
    │   ├── util.py
    │   └── variant.py
└── tests
    ├── __init__.py
    ├── test_patterns
        ├── __init__.py
        ├── test_dna.py
        ├── test_protein.py
        ├── test_rna.py
        └── test_util.py
    ├── test_position.py
    ├── test_util.py
    └── test_variant.py


/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | extend-ignore = E203
3 | max-line-length = 88
4 | max-complexity = 10
5 | 


--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
 3 | 
 4 | name: Python package
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ "main" ]
 9 |   pull_request:
10 |     branches: [ "main" ]
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-22.04
16 |     strategy:
17 |       fail-fast: false
18 |       matrix:
19 |         python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
20 | 
21 |     steps:
22 |     - uses: actions/checkout@v3
23 |     - name: Set up Python ${{ matrix.python-version }}
24 |       uses: actions/setup-python@v3
25 |       with:
26 |         python-version: ${{ matrix.python-version }}
27 |     - name: Install dependencies
28 |       run: |
29 |         python -m pip install --upgrade pip
30 |         python -m pip install flake8 pytest
31 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
32 |     - name: Install package
33 |       run: |
34 |         python -m pip install .
35 |     - name: Lint with flake8
36 |       run: |
37 |         # stop the build if there are Python syntax errors or undefined names
38 |         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
39 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
40 |         flake8 . --count --exit-zero --statistics
41 |     - name: Test with pytest
42 |       run: |
43 |         pytest
44 | 


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
 3 | 
 4 | # This workflow uses actions that are not certified by GitHub.
 5 | # They are provided by a third-party and are governed by
 6 | # separate terms of service, privacy policy, and support
 7 | # documentation.
 8 | 
 9 | name: Upload Python Package
10 | 
11 | on:
12 |   release:
13 |     types: [published]
14 | 
15 | permissions:
16 |   contents: read
17 | 
18 | jobs:
19 |   deploy:
20 | 
21 |     runs-on: ubuntu-latest
22 | 
23 |     steps:
24 |     - uses: actions/checkout@v3
25 |     - name: Set up Python
26 |       uses: actions/setup-python@v3
27 |       with:
28 |         python-version: '3.x'
29 |     - name: Install dependencies
30 |       run: |
31 |         python -m pip install --upgrade pip
32 |         pip install hatch
33 |     - name: Build package
34 |       run: hatch build
35 |     - name: Publish package
36 |       uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
37 |       with:
38 |         user: __token__
39 |         password: ${{ secrets.PYPI_MAVEHGVS }}
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Idea
  2 | .idea/
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | MANIFEST
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | 
 53 | # Translations
 54 | *.mo
 55 | *.pot
 56 | 
 57 | # Django stuff:
 58 | *.log
 59 | local_settings.py
 60 | db.sqlite3
 61 | 
 62 | # Flask stuff:
 63 | instance/
 64 | .webassets-cache
 65 | 
 66 | # Scrapy stuff:
 67 | .scrapy
 68 | 
 69 | # Sphinx documentation
 70 | docs/_build/
 71 | 
 72 | # PyBuilder
 73 | target/
 74 | 
 75 | # Jupyter Notebook
 76 | .ipynb_checkpoints
 77 | 
 78 | # pyenv
 79 | .python-version
 80 | 
 81 | # celery beat schedule file
 82 | celerybeat-schedule
 83 | 
 84 | # SageMath parsed files
 85 | *.sage.py
 86 | 
 87 | # Environments
 88 | .env
 89 | .venv
 90 | env/
 91 | venv/
 92 | ENV/
 93 | env.bak/
 94 | venv.bak/
 95 | 
 96 | # Spyder project settings
 97 | .spyderproject
 98 | .spyproject
 99 | 
100 | # Rope project settings
101 | .ropeproject
102 | 
103 | # mkdocs documentation
104 | /site
105 | 
106 | # mypy
107 | .mypy_cache/
108 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/psf/black
 3 |     rev: 23.1.0
 4 |     hooks:
 5 |       - id: black
 6 |         language_version: python3.11
 7 |   - repo: https://github.com/pycqa/flake8
 8 |     rev: 5.0.4
 9 |     hooks:
10 |     -   id: flake8
11 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: "If you use this software, please cite it as below."
 3 | authors:
 4 | - family-names: "Rubin"
 5 |   given-names: "Alan F"
 6 |   orcid: "https://orcid.org/0000-0003-1474-605X"
 7 | title: "mavehgvs"
 8 | version: 0.4.0
 9 | doi: 10.5281/zenodo.5148054
10 | date-released: 2021-07-30
11 | url: "https://github.com/VariantEffect/mavehgvs"
12 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2018-2023, Alan F Rubin and Daniel Esposito
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![Build Status](https://travis-ci.com/VariantEffect/mavehgvs.svg?branch=main)](https://travis-ci.com/VariantEffect/mavehgvs)
 2 | [![Coverage Status](https://coveralls.io/repos/github/VariantEffect/mavehgvs/badge.svg?branch=main)](https://coveralls.io/github/VariantEffect/mavehgvs?branch=main)
 3 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 4 | 
 5 | # mavehgvs
 6 | mavehgvs is the Python reference implementation of the MAVE-HGVS variant representation standard,
 7 | a strict subset of [HGVS](http://varnomen.hgvs.org/), used primarily for clinical genomics.
 8 | 
 9 | ## The MAVE-HGVS Standard
10 | MAVE-HGVS is a strict subset of the [HGVS Sequence Variant Nomenclature](https://varnomen.hgvs.org/), version 20.05.
11 | HGVS nomenclature is comprehensive and very expressive and consequently includes a lot of syntax that is not needed to
12 | represent variants from Multiplexed Assay of Variant Effect (MAVE) data and makes the variant strings more challenging
13 | to parse.
14 | 
15 | While packages exist for parsing HGVS (most notably the
16 | [biocommons hgvs package](https://github.com/biocommons/hgvs/), they are intended for use in human genetics and
17 | rely on sequence databases and reference sequence (called "target sequence" for MAVE-HGVS), which are not always
18 | available for or relevant for multiplexed assays.
19 | 
20 | MAVE-HGVS is an attempt to define an easy-to-parse subset of the HGVS nomenclature that captures those variants that
21 | occur in MAVE datasets, while excluding many variant types that are unlikely to be found. Importantly, the
22 | mavehgvs implementation does not rely on external sequence databases or identifiers.
23 | 
24 | ## Supported Variants
25 | MAVE-HGVS supports DNA, RNA, and protein variants.
26 | MAVE-HGVS supports a subset of HGVS variants including:
27 | 
28 | * substitutions
29 | * deletions
30 | * duplications
31 | * insertions
32 | * frame shifts
33 | 
34 | Many HGVS variants are unsupported including:
35 | 
36 | * inversions
37 | * conversions
38 | * extensions
39 | * changes in methylation state
40 | * RNA fusion transcripts
41 | * mosaicism
42 | * chimerism
43 | * variants with uncertain consequence
44 | * variants in trans or unknown phase
45 | * complex variants (e.g. translocations)
46 | 
47 | For further details, including example variants, see the specification in the package documentation.
48 | 
49 | # Installation
50 | Install mavehgvs from pip using:
51 | 
52 | ```bash
53 | pip3 install mavehgvs
54 | ```
55 | 
56 | To set up the package for development purposes, include the optional dependencies and
57 | install pre-commit:
58 | 
59 |     pip3 install mavehgvs[dev]
60 |     pre-commit install
61 | 
62 | # Feedback
63 | To report a problem or request a new feature with either the mavehgvs package or the MAVE-HGVS standard,
64 | please use the GitHub issue tracker.
65 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
 1 | .. _api-docs:
 2 | 
 3 | mavehgvs API documentation
 4 | ==========================
 5 | 
 6 | Variant objects
 7 | ---------------
 8 | 
 9 | Each variant can be parsed into a variant object, which populates and exposes named
10 | fields for each piece of the variant string.
11 | 
12 | .. automodule:: mavehgvs.position
13 |    :members:
14 |    :private-members:
15 |    :special-members:
16 | 
17 | .. automodule:: mavehgvs.variant
18 |    :members:
19 |    :private-members:
20 |    :special-members:
21 | 
22 | .. automodule:: mavehgvs.exceptions
23 |    :members:
24 | 
25 | Utility functions for handling variants
26 | ---------------------------------------
27 | 
28 | .. automodule:: mavehgvs.util
29 |    :members:
30 | 
31 | Utility functions for regular expression patterns
32 | -------------------------------------------------
33 | 
34 | .. automodule:: mavehgvs.patterns.util
35 |    :members:
36 | 
37 | DNA pattern strings
38 | -------------------
39 | 
40 | .. automodule:: mavehgvs.patterns.dna
41 |    :members:
42 | 
43 | RNA pattern strings
44 | -------------------
45 | 
46 | .. automodule:: mavehgvs.patterns.rna
47 |    :members:
48 | 
49 | Protein pattern strings
50 | -----------------------
51 | 
52 | .. automodule:: mavehgvs.patterns.protein
53 |    :members:
54 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 | 
16 | sys.path.insert(0, os.path.abspath("../src"))
17 | 
18 | from mavehgvs import __version__  # noqa: E402
19 | 
20 | # -- Project information -----------------------------------------------------
21 | 
22 | project = "MAVE-HGVS"
23 | copyright = "2018-2023, Alan F Rubin and Daniel Esposito"
24 | author = "Alan F Rubin and Daniel Esposito"
25 | 
26 | # The full version, including alpha/beta/rc tags
27 | release = __version__
28 | 
29 | 
30 | # -- General configuration ---------------------------------------------------
31 | 
32 | # Add any Sphinx extension module names here, as strings. They can be
33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
34 | # ones.
35 | extensions = [
36 |     "sphinx.ext.autodoc",
37 |     "sphinx.ext.napoleon",
38 |     "sphinx.ext.intersphinx",
39 |     "sphinx.ext.autosectionlabel",
40 | ]
41 | 
42 | # Add any paths that contain templates here, relative to this directory.
43 | templates_path = ["_templates"]
44 | 
45 | # List of patterns, relative to source directory, that match files and
46 | # directories to ignore when looking for source files.
47 | # This pattern also affects html_static_path and html_extra_path.
48 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
49 | 
50 | 
51 | # -- Options for HTML output -------------------------------------------------
52 | 
53 | # The theme to use for HTML and HTML Help pages.  See the documentation for
54 | # a list of builtin themes.
55 | #
56 | html_theme = "nature"
57 | 
58 | # Add any paths that contain custom static files (such as style sheets) here,
59 | # relative to this directory. They are copied after the builtin static files,
60 | # so a file named "default.css" will overwrite the builtin "default.css".
61 | html_static_path = ["_static"]
62 | 
63 | 
64 | # -- Extension configuration -------------------------------------------------
65 | intersphinx_mapping = {"python": ("https://docs.python.org/3", None)}
66 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | MAVE-HGVS documentation
 2 | =======================
 3 | 
 4 | MAVE-HGVS is a strict subset of the `HGVS sequence variant nomenclature <https://varnomen.hgvs.org/>`_
 5 | used by `MaveDB <https://www.mavedb.org>`_ and related tools to represent protein and DNA variants in
 6 | Multiplexed Assays of Variant Effect (MAVE) datasets.
 7 | 
 8 | This version of MAVE-HGVS is based on HGVS version 20.05.
 9 | 
10 | When citing, please refer to:
11 | 
12 | #. Esposito, D., Weile J., *et al.* MaveDB: an open-source platform to distribute and interpret data from multiplexed assays of variant effect. *Genome Biol* **20**, 223 (2019). https://doi.org/10.1186/s13059-019-1845-6
13 | #. den Dunnen, J. T. *et al.* HGVS Recommendations for the Description of Sequence Variants: 2016 Update. *Hum Mutat* **37**, 564–569 (2016). https://doi.org/10.1002/humu.22981
14 | 
15 | .. toctree::
16 |    :maxdepth: 2
17 |    :caption: Contents:
18 | 
19 |    spec
20 |    api
21 | 
22 | Indices and tables
23 | ==================
24 | 
25 | * :ref:`genindex`
26 | * :ref:`modindex`
27 | * :ref:`search`
28 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/prefix.csv:
--------------------------------------------------------------------------------
1 | "c",  "coding DNA sequence"
2 | "g",  "linear genomic DNA sequence"
3 | "m",  "mitochondrial genomic DNA sequence"
4 | "n",  "non-coding DNA sequence"
5 | "o",  "circular genomic DNA sequence"
6 | "p",  "protein sequence"
7 | "r",  "RNA transcript sequence"
8 | 


--------------------------------------------------------------------------------
/docs/spec.rst:
--------------------------------------------------------------------------------
  1 | .. _spec-docs:
  2 | 
  3 | MAVE-HGVS specification
  4 | =======================
  5 | 
  6 | MAVE-HGVS is a strict subset of the `HGVS Sequence Variant Nomenclature <https://varnomen.hgvs.org/>`_, version 20.05.
  7 | HGVS nomenclature is comprehensive and very expressive and consequently includes a lot of syntax that is not needed to
  8 | represent variants from Multiplexed Assay of Variant Effect (MAVE) data and makes the variant strings more challenging
  9 | to parse.
 10 | 
 11 | While packages exist for parsing HGVS (most notably the
 12 | `biocommons hgvs package <https://github.com/biocommons/hgvs/>`_), they are intended for use in human genetics and
 13 | rely on sequence databases and reference sequence (called "target sequence" for MAVE-HGVS), which are not always
 14 | available for or relevant for multiplexed assays.
 15 | 
 16 | MAVE-HGVS is an attempt to define an easy-to-parse subset of the HGVS nomenclature that captures those variants that
 17 | occur in MAVE datasets, while excluding many variant types that are unlikely to be found. Importantly, the
 18 | :ref:`corresponding implementation <api-docs>` of MAVE-HGVS does not rely on external sequence databases or identifiers.
 19 | 
 20 | Key differences between HGVS and MAVE-HGVS
 21 | ------------------------------------------
 22 | 
 23 | Standard HGVS strings have the format :code:`reference:variant` (e.g. :code:`NM_001130145.3:c.832C>T`).
 24 | MAVE-HGVS strings typically include the variant portion only and the reference (target) portion is inferred from the
 25 | MAVE design.
 26 | 
 27 | Target identifiers in MAVE-HGVS are optional, and would typically be used in cases where a mix of MAVE datasets are
 28 | being analyzed jointly or for experimental designs that contain multiple target sequences.
 29 | Target identifiers in MAVE-HGVS can contain any word characters, numbers, or the underscore.
 30 | 
 31 | MAVE-HGVS does not distinguish between variants that have been observed experimentally and the predicted consequence of
 32 | observed variants.
 33 | Therefore, variants that contain :code:`()` to denote predicted consequences are considered invalid with one exception
 34 | (see `Substitution`_ below).
 35 | 
 36 | MAVE-HGVS supports position numberings that are relative to a transcript (e.g. :code:`c.85+12G>A` or :code:`n.*22del`).
 37 | These positions are referred to here as using the extended position notation.
 38 | Variants using the extended position notation should appear alongside variants with simple (integer only) position
 39 | numbers relative to the target sequence, expressed using the appropriate genomic prefix.
 40 | 
 41 | Like HGVS, MAVE-HGVS supports alleles (called multi-variants in MAVE-HGVS) that describe multiple variants in a single
 42 | variant string.
 43 | Multi-variants are represented as a semicolon-separated list of valid MAVE-HGVS variants.
 44 | 
 45 | MAVE-HGVS supports a subset of HGVS variants including:
 46 | 
 47 | * substitutions
 48 | * frame shifts
 49 | * deletions
 50 | * duplications
 51 | * insertions
 52 | 
 53 | Many HGVS variants are unsupported including:
 54 | 
 55 | * inversions
 56 | * extensions
 57 | * changes in methylation state
 58 | * RNA fusion transcripts
 59 | * mosaicism
 60 | * chimerism
 61 | * variants with uncertain consequence
 62 | * variants in trans or unknown phase
 63 | * complex variants (e.g. translocations)
 64 | 
 65 | Sequence prefixes and sequence types
 66 | ------------------------------------
 67 | 
 68 | Similarly to HGVS, a MAVE-HGVS variant begins with a single prefix character that defines the sequence type.
 69 | Supported sequence types are the same as for HGVS, and are listed in the following table:
 70 | 
 71 | .. csv-table::
 72 |    :file: ../docs/prefix.csv
 73 |    :header: "Prefix", "Description"
 74 |    :widths: 5, 20
 75 | 
 76 | Typically MAVE variants are expressed relative to a coding, non-coding, or protein sequence.
 77 | 
 78 | A notable exception is when the target sequence for the MAVE consists of both coding and non-coding sequences,
 79 | such as when a full-length gene with introns is mutagenized and splice variants are assayed via saturation genome
 80 | editing or other methods.
 81 | In this case, it is appropriate to use one of the genomic sequence prefixes to describe changes using the contiguous
 82 | region containing all mutagenized sequences as the target sequence.
 83 | 
 84 | RNA variants are intended to be used when assaying the functional consequences to an RNA molecule,
 85 | such as a tRNA or ribozyme.
 86 | Variants that are measured at the DNA level should generally not use the RNA syntax.
 87 | 
 88 | Equality
 89 | --------
 90 | 
 91 | MAVE-HGVS allows variants to describe equality to the target in a variety of ways.
 92 | 
 93 | Variants describing identity to the full target sequence (e.g. :code:`c.=`) are valid and are the intended way to
 94 | specify identity to the target (wild-type) sequence.
 95 | This replaces the `Enrich2 <https://doi.org/10.1186/s13059-017-1272-5>`_ :code:`_wt` variant syntax.
 96 | 
 97 | Variants that describe identity to the reference (target) at a single position (e.g. :code:`c.44=`)
 98 | or range of positions (e.g. :code:`c.1_3=`) are valid for coding and genomic sequences.
 99 | These should only be used for special cases, such as in MITE-seq datasets where the scores and counts are
100 | reported separately for each wild-type codon.
101 | 
102 | The target-identity variants :code:`c.=` and :code:`p.=` are only valid on their own and are considered invalid as
103 | part of multi-variants.
104 | The variants that describe nucleotide identity to part of the reference are also invalid as part of multi-variants.
105 | 
106 | Variants that describe identity to the target at a single amino acid position (e.g. :code:`p.Cys22=`) are valid and
107 | are the preferred way to describe specific synonymous variants.
108 | 
109 | The variant :code:`p.(=)` is used when summarizing the population of variants that are synonymous at the protein level
110 | but not target identical at the DNA level.
111 | This replaces the `Enrich2 <https://doi.org/10.1186/s13059-017-1272-5>`_  :code:`_sy` variant syntax.
112 | 
113 | .. warning:: Many variants currently in MaveDB use only '=' as part of multi-variants and are therefore invalid
114 |    MAVE-HGVS.
115 |    Additionally, some MaveDB datasets have a one-to-one relationship between nucleotide and protein multi-variants
116 |    resulting in duplicate protein variants in the multi-variant.
117 |    This should also be considered invalid.
118 | 
119 | Examples of valid equality variants:
120 | 
121 | * c.=
122 | * c.22=
123 | * c.1_3=
124 | * g.123=
125 | * p.Cys22=
126 | * p.(=)
127 | 
128 | Substitution
129 | ------------
130 | 
131 | .. note:: TODO: add some noncoding ('n.' variants) to the examples.
132 | 
133 | MAVE-HGVS supports substitutions of a single nucleotide or amino acid.
134 | 
135 | MAVE-HGVS does not support extension variants, which extend an amino acid sequence to the N- or C- terminal end
136 | (e.g. :code:`p.Met1ext-4` for gain of an upstream start or :code:`p.Ter345Lysext5` for a new downstream termination
137 | codon).
138 | Variants that remove a termination codon should be written as standard substitution variants.
139 | Variants that result in an N-terminal extension are currently undefined,
140 | but have not been observed in the MAVE literature at the time of writing.
141 | 
142 | Substitutions of more than one base at a time are covered under `Deletion-Insertion`_.
143 | 
144 | Examples of valid substitutions:
145 | 
146 | * g.48C>A
147 | * c.122-6T>A
148 | * c.*33G>C
149 | * p.Glu27Trp
150 | * p.Ter345Lys
151 | * r.22g>u
152 | * r.33+12a>c
153 | 
154 | Examples of valid HGVS substitutions that are invalid in MAVE-HGVS:
155 | 
156 | * g.48C>W
157 | * c.122=/T>A
158 | * p.(Glu27Trp)
159 | * p.*345Lys
160 | * p.Glu23Xaa
161 | * r.spl
162 | 
163 | Frame Shift
164 | -----------
165 | 
166 | MAVE-HGVS supports a simplified syntax to describe frame shifts in protein variants.
167 | Multi-variants that include multiple frame shifts or a second variant after a frame shift are considered invalid.
168 | 
169 | Because frame shift (and the related extension) variants are uncommon in MAVE datasets, MAVE-HGVS provides this minimal support.
170 | Extension variants (removal of a termination codon) should be expressed as a frame shift at the termination codon.
171 | 
172 | Examples of valid frame shift variants:
173 | 
174 | * p.Glu27fs
175 | * p.Asp125fs
176 | * p.Ter385fs
177 | 
178 | Examples of valid HGVS frame shift variants that are invalid in MAVE-HGVS:
179 | 
180 | * p.Arg12LysfsTer18
181 | * p.Arg12Lysfs*18
182 | * p.Glu27fs*?
183 | * p.(Glu27fs)
184 | 
185 | Deletion
186 | --------
187 | 
188 | MAVE-HGVS supports deletions of specified nucleotides or amino acids.
189 | 
190 | Deletions of an unknown number of bases or amino acids are not supported.
191 | For example, deletions where the breakpoint is not known or where the deletion extends past the end of the target
192 | cannot be represented with uncertainty.
193 | To represent a deletion of a sequence including the start or end of the target, specify the deletion exactly as if it
194 | extended to the first or last position.
195 | 
196 | Examples of valid deletions:
197 | 
198 | * g.44del
199 | * c.78+5_78+10del
200 | * c.1_95del
201 | * p.Gly18del
202 | * p.Gln7_Asn19del
203 | * r.34_36del
204 | 
205 | Examples of valid HGVS deletions that are invalid in MAVE-HGVS:
206 | 
207 | * c.(78+1_79-1)_(124+1_125-1)del
208 | * g.(?_85)_(124\_?)del
209 | * c.122=/del
210 | * p.(Gly18del)
211 | * r.=/9_12del
212 | * r.(155_185)del
213 | 
214 | Duplication
215 | -----------
216 | 
217 | MAVE-HGVS supports duplications of one or more nucleotides or amino acids.
218 | The syntax is the same as HGVS.
219 | 
220 | Examples of valid duplications:
221 | 
222 | * g.22_24dup
223 | * c.77dup
224 | * c.101+1_101+7dup
225 | * p.Pro12_Gly18dup
226 | * p.Cys5dup
227 | * r.12dup
228 | 
229 | Examples of valid HGVS duplications that are invalid in MAVE-HGVS:
230 | 
231 | * c.(78+1_79-1)_(124+1_125-1)dup
232 | * g.(?_85)_(124\_?)dup
233 | * c.122_125=//dup
234 | * p.(Cys5dup)
235 | 
236 | Insertion
237 | ---------
238 | 
239 | MAVE-HGVS supports insertions of a specified nucleotide or amino acid sequence.
240 | 
241 | Insertions of a number of unspecified bases or amino acids or insertions using ambiguity characters (e.g. N or Xaa)
242 | are not supported.
243 | 
244 | Insertions must be specified by listing the complete inserted sequence.
245 | Referring to the sequence that is inserted based on its position in the target sequence is not considered valid for
246 | MAVE-HGVS.
247 | 
248 | To describe an insertion at the end of the target sequence, use a :ref:`Deletion-Insertion` variant that deletes
249 | the last base or amino acid in the target and inserts the deleted symbol plus the insertion.
250 | 
251 | Examples of valid insertions:
252 | 
253 | * g.234_235insT
254 | * c.84_85insCTG
255 | * c.99+6_99+7insA
256 | * p.His7_Gln8insSer
257 | * p.Ala12_Pro13insGlyProCys
258 | * r.22_23insauc
259 | 
260 | Examples of valid HGVS insertions that are invalid in MAVE-HGVS:
261 | 
262 | * c.84_85ins100_125
263 | * g.234_235ins(10)
264 | * g.234_235ins(?)
265 | * c.(122_125)insG
266 | * p.(His7_Gln8insSer)
267 | * p.(His7_Gln8insX)
268 | * p.(Ala12_Pro13ins(2))
269 | * r.(27_30)insu
270 | * r.74_74insnnn
271 | 
272 | Deletion-Insertion
273 | ------------------
274 | 
275 | MAVE-HGVS supports deletion-insertions of a specified nucleotide or amino acid sequence.
276 | 
277 | Deletion-insertions of a number of unspecified bases or amino acids or insertions using ambiguity characters
278 | (e.g. N or Xaa) are not supported. This includes deletion-insertions with uncertain breakpoints.
279 | 
280 | Examples of valid deletion-insertions:
281 | 
282 | * g.22delinsAACG
283 | * c.83_85delinsT
284 | * c.43-6_595+12delinsCTT
285 | * p.Ile71_Cys80delinsSer
286 | * p.His44delinsValProGlyGlu
287 | * r.92delinsgac
288 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["hatchling"]
 3 | build-backend = "hatchling.build"
 4 | 
 5 | [project]
 6 | name = "mavehgvs"
 7 | dynamic = ["version"]
 8 | description = "Regular expression-based validation of HGVS-style variant strings for Multiplexed Assays of Variant Effect."
 9 | readme = "README.md"
10 | license = "BSD-3-Clause"
11 | requires-python = ">=3.6"
12 | authors = [
13 |     { name = "Alan F Rubin", email = "alan.rubin@wehi.edu.au" },
14 | ]
15 | classifiers = [
16 |     "Development Status :: 3 - Alpha",
17 |     "Intended Audience :: Science/Research",
18 |     "License :: OSI Approved :: BSD License",
19 |     "Operating System :: OS Independent",
20 |     "Programming Language :: Python :: 3",
21 |     "Topic :: Scientific/Engineering :: Bio-Informatics",
22 | ]
23 | dependencies = [
24 |     "fqfa>=1.2.3",
25 | ]
26 | 
27 | [project.urls]
28 | repository = "https://github.com/VariantEffect/mavehgvs"
29 | documentation = "https://www.mavedb.org/docs/mavehgvs"
30 | 
31 | [project.optional-dependencies]
32 | dev = [
33 |     "black",
34 |     "flake8",
35 |     "pre-commit",
36 |     "pytest",
37 | ]
38 | 
39 | [tool.hatch.version]
40 | path = "src/mavehgvs/__init__.py"
41 | 
42 | [tool.hatch.build.targets.wheel]
43 | packages = ["src/mavehgvs"]
44 | 
45 | [tool.hatch.build.targets.sdist]
46 | exclude = [
47 |     "docs/",
48 |     ".github/",
49 | ]
50 | 
51 | [tool.setuptools.package-data]
52 | "mavehgvs" = ["py.typed"]
53 | 


--------------------------------------------------------------------------------
/src/mavehgvs/__init__.py:
--------------------------------------------------------------------------------
 1 | from mavehgvs.exceptions import MaveHgvsParseError
 2 | from mavehgvs.position import VariantPosition
 3 | from mavehgvs.variant import Variant
 4 | from mavehgvs.util import parse_variant_strings
 5 | 
 6 | __version__ = "0.7.0"
 7 | 
 8 | __all__ = [
 9 |     "__version__",
10 |     "Variant",
11 |     "VariantPosition",
12 |     "MaveHgvsParseError",
13 |     "parse_variant_strings",
14 | ]
15 | 


--------------------------------------------------------------------------------
/src/mavehgvs/exceptions.py:
--------------------------------------------------------------------------------
1 | __all__ = ["MaveHgvsParseError"]
2 | 
3 | 
4 | class MaveHgvsParseError(Exception):
5 |     """Exception to use when a MAVE-HGVS string is not valid."""
6 | 
7 |     pass
8 | 


--------------------------------------------------------------------------------
/src/mavehgvs/patterns/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VariantEffect/mavehgvs/69476dde5391022e7c0eca32ecd1734e371436eb/src/mavehgvs/patterns/__init__.py


--------------------------------------------------------------------------------
/src/mavehgvs/patterns/combined.py:
--------------------------------------------------------------------------------
 1 | from mavehgvs.patterns.dna import dna_single_variant as dsv, dna_multi_variant as dmv
 2 | from mavehgvs.patterns.rna import rna_single_variant as rsv, rna_multi_variant as rmv
 3 | from mavehgvs.patterns.protein import (
 4 |     pro_single_variant as psv,
 5 |     pro_multi_variant as pmv,
 6 | )
 7 | 
 8 | any_variant = (
 9 |     r"(?:(?P<target_id>[a-zA-Z0-9_.-]+):)?"
10 |     + r"(?P<variant>"
11 |     + rf"(?P<single_variant>{r'|'.join([dsv, rsv, psv])})|"
12 |     + rf"(?P<multi_variant>{r'|'.join([dmv, rmv, pmv])})"
13 |     + r")"
14 | )
15 | 


--------------------------------------------------------------------------------
/src/mavehgvs/patterns/dna.py:
--------------------------------------------------------------------------------
  1 | from fqfa.constants import DNA_BASES
  2 | from mavehgvs.patterns.util import combine_patterns, remove_named_groups
  3 | from mavehgvs.patterns.position import pos, pos_intron, pos_intron_utr
  4 | 
  5 | dna_nt: str = rf"[{''.join(DNA_BASES)}]"
  6 | """str: Pattern matching any uppercase DNA base.
  7 | 
  8 | This does not include IUPAC ambiguity characters.
  9 | """
 10 | 
 11 | dna_equal_c: str = (
 12 |     rf"(?P<dna_equal_c>(?:(?:(?P<start>{pos_intron_utr})_(?P<end>{pos_intron_utr}))|"
 13 |     + rf"(?P<position>{pos_intron_utr}))?(?P<equal>=))"
 14 | )
 15 | """str: Pattern matching DNA equality with numeric, intronic, or UTR positions.
 16 | """
 17 | 
 18 | dna_sub_c: str = (
 19 |     rf"(?P<dna_sub_c>(?P<position>{pos_intron_utr})(?P<ref>{dna_nt})>(?P<new>{dna_nt}))"
 20 | )
 21 | """str: Pattern matching a DNA substitution with numeric, intronic, or UTR positions.
 22 | """
 23 | 
 24 | dna_del_c: str = (
 25 |     rf"(?P<dna_del_c>(?:(?:(?P<start>{pos_intron_utr})_(?P<end>{pos_intron_utr}))|"
 26 |     + rf"(?P<position>{pos_intron_utr}))del)"
 27 | )
 28 | """str: Pattern matching a DNA deletion with numeric, intronic, or UTR positions.
 29 | """
 30 | 
 31 | dna_dup_c: str = (
 32 |     rf"(?P<dna_dup_c>(?:(?:(?P<start>{pos_intron_utr})_"
 33 |     + rf"(?P<end>{pos_intron_utr}))|(?P<position>{pos_intron_utr}))dup)"
 34 | )
 35 | """str: Pattern matching a DNA duplication with numeric, intronic, or UTR positions.
 36 | """
 37 | 
 38 | dna_ins_c: str = (
 39 |     rf"(?P<dna_ins_c>(?P<start>{pos_intron_utr})_"
 40 |     + rf"(?P<end>{pos_intron_utr})ins(?P<seq>{dna_nt}+))"
 41 | )
 42 | """str: Pattern matching a DNA insertion with numeric, intronic, or UTR positions.
 43 | """
 44 | 
 45 | dna_delins_c: str = (
 46 |     rf"(?P<dna_delins_c>(?:(?:(?P<start>{pos_intron_utr})_"
 47 |     + rf"(?P<end>{pos_intron_utr}))|(?P<position>{pos_intron_utr}))"
 48 |     + rf"delins(?P<seq>{dna_nt}+))"
 49 | )
 50 | """str: Pattern matching a DNA deletion-insertion with numeric, intronic, or UTR
 51 | positions.
 52 | """
 53 | 
 54 | dna_equal_n: str = r"(?P<dna_equal_n>(?P<equal>=))"
 55 | """str: Pattern matching DNA equality with no position support.
 56 | """
 57 | 
 58 | dna_sub_n: str = dna_sub_c.replace(pos_intron_utr, pos_intron).replace(
 59 |     "(?P<dna_sub_c>", "(?P<dna_sub_n>"
 60 | )
 61 | """str: Pattern matching a DNA substitution with numeric or intron positions for
 62 | non-coding variants.
 63 | """
 64 | 
 65 | dna_del_n: str = dna_del_c.replace(pos_intron_utr, pos_intron).replace(
 66 |     "(?P<dna_del_c>", "(?P<dna_del_n>"
 67 | )
 68 | """str: Pattern matching a DNA deletion with numeric or intron positions for non-coding
 69 | variants.
 70 | """
 71 | 
 72 | dna_dup_n: str = dna_dup_c.replace(pos_intron_utr, pos_intron).replace(
 73 |     "(?P<dna_dup_c>", "(?P<dna_dup_n>"
 74 | )
 75 | """str: Pattern matching a DNA duplication with numeric or intron positions for
 76 | non-coding variants.
 77 | """
 78 | 
 79 | dna_ins_n: str = dna_ins_c.replace(pos_intron_utr, pos_intron).replace(
 80 |     "(?P<dna_ins_c>", "(?P<dna_ins_n>"
 81 | )
 82 | """str: Pattern matching a DNA insertion with numeric or intron positions for non-coding
 83 | variants.
 84 | """
 85 | 
 86 | dna_delins_n: str = dna_delins_c.replace(pos_intron_utr, pos_intron).replace(
 87 |     "(?P<dna_delins_c>", "(?P<dna_delins_n>"
 88 | )
 89 | """str: Pattern matching a DNA deletion-insertion with numeric or intron positions for
 90 | non-coding variants.
 91 | """
 92 | 
 93 | dna_equal_gmo: str = dna_equal_c.replace(pos_intron_utr, pos).replace(
 94 |     "(?P<dna_equal_c>", "(?P<dna_equal_gmo>"
 95 | )
 96 | """str: Pattern matching a DNA substitution with only numeric positions for
 97 | genomic-style variants.
 98 | """
 99 | 
100 | dna_sub_gmo: str = dna_sub_c.replace(pos_intron_utr, pos).replace(
101 |     "(?P<dna_sub_c>", "(?P<dna_sub_gmo>"
102 | )
103 | """str: Pattern matching a DNA substitution with only numeric positions for
104 | genomic-style variants.
105 | """
106 | 
107 | dna_del_gmo: str = dna_del_c.replace(pos_intron_utr, pos).replace(
108 |     "(?P<dna_del_c>", "(?P<dna_del_gmo>"
109 | )
110 | """str: Pattern matching a DNA deletion with only numeric positions for genomic-style
111 | variants.
112 | """
113 | 
114 | dna_dup_gmo: str = dna_dup_c.replace(pos_intron_utr, pos).replace(
115 |     "(?P<dna_dup_c>", "(?P<dna_dup_gmo>"
116 | )
117 | """str: Pattern matching a DNA duplication with only numeric positions for genomic-style
118 | variants.
119 | """
120 | 
121 | dna_ins_gmo: str = dna_ins_c.replace(pos_intron_utr, pos).replace(
122 |     "(?P<dna_ins_c>", "(?P<dna_ins_gmo>"
123 | )
124 | """str: Pattern matching a DNA insertion with only numeric positions for genomic-style
125 | variants.
126 | """
127 | 
128 | dna_delins_gmo: str = dna_delins_c.replace(pos_intron_utr, pos).replace(
129 |     "(?P<dna_delins_c>", "(?P<dna_delins_gmo>"
130 | )
131 | """str: Pattern matching a DNA deletion-insertion with only numeric positions for
132 | genomic-style variants.
133 | """
134 | 
135 | dna_variant_c: str = combine_patterns(
136 |     [dna_equal_c, dna_sub_c, dna_del_c, dna_dup_c, dna_ins_c, dna_delins_c], None
137 | )
138 | """str: Pattern matching any of the coding DNA variants.
139 | """
140 | 
141 | dna_variant_n: str = combine_patterns(
142 |     [dna_equal_n, dna_sub_n, dna_del_n, dna_dup_n, dna_ins_n, dna_delins_n], None
143 | )
144 | """str: Pattern matching any of the non-coding DNA variants.
145 | """
146 | 
147 | dna_variant_gmo: str = combine_patterns(
148 |     [dna_equal_gmo, dna_sub_gmo, dna_del_gmo, dna_dup_gmo, dna_ins_gmo, dna_delins_gmo],
149 |     None,
150 | )
151 | """str: Pattern matching any of the genomic-style DNA variants.
152 | """
153 | 
154 | dna_single_variant: str = (
155 |     rf"(?P<dna_c>c\.{dna_variant_c})|"
156 |     + rf"(?P<dna_n>n\.{dna_variant_n})|"
157 |     + rf"(?P<dna_gmo>[gmo]\.{dna_variant_gmo})"
158 | )
159 | """str: Pattern matching any complete single DNA variant, including the prefix
160 | character.
161 | """
162 | 
163 | dna_multi_variant: str = (
164 |     r"(?P<dna_c_multi>c\."
165 |     + rf"\[{remove_named_groups(dna_variant_c)}"
166 |     + rf"(?:;{remove_named_groups(dna_variant_c)}){{1,}}\])|"
167 |     + r"(?P<dna_n_multi>n\."
168 |     + rf"\[{remove_named_groups(dna_variant_n)}"
169 |     + rf"(?:;{remove_named_groups(dna_variant_n)}){{1,}}\])|"
170 |     + r"(?P<dna_gmo_multi>[gmo]\."
171 |     + rf"\[{remove_named_groups(dna_variant_gmo)}"
172 |     + rf"(?:;{remove_named_groups(dna_variant_gmo)}){{1,}}\])"
173 | )
174 | """str: Pattern matching any complete DNA multi-variant, including the prefix character.
175 | 
176 | Named capture groups have been removed from the variant patterns because of
177 | non-uniqueness.
178 | Another applications of single-variant regular expressions is needed to recover the
179 | named groups from each individual variant in the multi-variant.
180 | """
181 | 


--------------------------------------------------------------------------------
/src/mavehgvs/patterns/position.py:
--------------------------------------------------------------------------------
 1 | pos: str = r"[1-9][0-9]*"
 2 | """str: Pattern matching a positive integer not starting with 0.
 3 | 
 4 | This pattern is used for sequence positions, as position 0 does not exist.
 5 | """
 6 | 
 7 | pos_intron: str = rf"{pos}(?:[+-]{pos})?"
 8 | """str: Pattern matching a position with optional intron component.
 9 | 
10 | This pattern is used for sequence positions in an RNA or noncoding sequence.
11 | """
12 | 
13 | pos_intron_utr: str = rf"[*-]?{pos}(?:[+-]{pos})?"
14 | """str: Pattern matching a position with optional intron and UTR components.
15 | 
16 | This pattern is used for sequence positions in a coding sequence.
17 | """
18 | 


--------------------------------------------------------------------------------
/src/mavehgvs/patterns/protein.py:
--------------------------------------------------------------------------------
 1 | from fqfa.constants import AA_CODES
 2 | from mavehgvs.patterns.util import combine_patterns, remove_named_groups
 3 | from mavehgvs.patterns.position import pos
 4 | 
 5 | amino_acid: str = rf"(?:{'|'.join(AA_CODES.values())})"
 6 | """str: Pattern matching any amino acid or Ter.
 7 | 
 8 | This does not include ambiguous amino acids such as Glx and Xaa.
 9 | """
10 | 
11 | aa_pos: str = rf"(?:{amino_acid}{pos})"
12 | """str: Pattern matching an amino acid code followed by a position.
13 | """
14 | 
15 | pro_equal: str = (
16 |     rf"(?P<pro_equal>(?:(?P<position>{aa_pos})?(?P<equal>=))|(?P<equal_sy>\(=\)))"
17 | )
18 | """str: Pattern matching protein equality or synonymous variant.
19 | """
20 | 
21 | pro_sub: str = rf"(?P<pro_sub>(?P<position>{aa_pos})(?P<new>{amino_acid}))"
22 | """str: Pattern matching a protein substitution.
23 | """
24 | 
25 | pro_fs: str = rf"(?P<pro_fs>(?P<position>{aa_pos})fs)"
26 | """str: Pattern matching a protein substitution.
27 | """
28 | 
29 | pro_del: str = (
30 |     rf"(?P<pro_del>(?:(?P<start>{aa_pos})_(?P<end>{aa_pos})del)|"
31 |     + rf"(?:(?P<position>{aa_pos})del))"
32 | )
33 | """str: Pattern matching a protein deletion.
34 | """
35 | 
36 | pro_dup: str = (
37 |     rf"(?P<pro_dup>(?:(?P<start>{aa_pos})_(?P<end>{aa_pos})dup)|"
38 |     + rf"(?:(?P<position>{aa_pos})dup))"
39 | )
40 | """str: Pattern matching a protein duplication.
41 | """
42 | 
43 | pro_ins: str = (
44 |     rf"(?P<pro_ins>(?P<start>{aa_pos})_(?P<end>{aa_pos})ins(?P<seq>{amino_acid}+))"
45 | )
46 | """str: Pattern matching a protein insertion.
47 | """
48 | 
49 | pro_delins: str = (
50 |     rf"(?P<pro_delins>(?:(?:(?P<start>{aa_pos})_(?P<end>{aa_pos}))|"
51 |     + rf"(?P<position>{aa_pos}))delins(?P<seq>{amino_acid}+))"
52 | )
53 | """str: Pattern matching a protein deletion-insertion.
54 | """
55 | 
56 | pro_variant: str = combine_patterns(
57 |     [pro_equal, pro_sub, pro_fs, pro_del, pro_dup, pro_ins, pro_delins], None
58 | )
59 | """str: Pattern matching any single protein variant event.
60 | """
61 | 
62 | pro_single_variant: str = rf"(?P<pro>p\.{pro_variant})"
63 | """str: Pattern matching any complete protein variant, including the prefix character.
64 | """
65 | 
66 | pro_multi_variant: str = (
67 |     rf"(?P<pro_multi>p\.\[{remove_named_groups(pro_variant)}"
68 |     + rf"(?:;{remove_named_groups(pro_variant)}){{1,}}\])"
69 | )
70 | 
71 | """str: Pattern matching any complete protein multi-variant, including the prefix
72 | character.
73 | 
74 | Named capture groups have been removed from the variant patterns because of
75 | non-uniqueness.
76 | Another applications of single-variant regular expressions is needed to recover the
77 | named groups from each individual variant in the multi-variant.
78 | """
79 | 


--------------------------------------------------------------------------------
/src/mavehgvs/patterns/rna.py:
--------------------------------------------------------------------------------
 1 | from fqfa.constants import RNA_BASES
 2 | from mavehgvs.patterns.util import combine_patterns, remove_named_groups
 3 | from mavehgvs.patterns.position import pos_intron
 4 | 
 5 | rna_nt: str = rf"[{''.join(RNA_BASES).lower()}]"
 6 | """str: Pattern matching any lowercase RNA base.
 7 | 
 8 | This does not include IUPAC ambiguity characters.
 9 | """
10 | 
11 | rna_equal: str = (
12 |     rf"(?P<rna_equal>(?:(?:(?P<start>{pos_intron})_"
13 |     + rf"(?P<end>{pos_intron}))|(?P<position>{pos_intron}))?(?P<equal>=))"
14 | )
15 | """str: Pattern matching RNA equality with numeric or relative-to-transcript positions.
16 | """
17 | 
18 | rna_sub: str = (
19 |     rf"(?P<rna_sub>(?P<position>{pos_intron})(?P<ref>{rna_nt})>(?P<new>{rna_nt}))"
20 | )
21 | """str: Pattern matching a RNA substitution with numeric or relative-to-transcript
22 | positions.
23 | """
24 | 
25 | rna_del: str = (
26 |     rf"(?P<rna_del>(?:(?:(?P<start>{pos_intron})_(?P<end>{pos_intron}))|"
27 |     + rf"(?P<position>{pos_intron}))del)"
28 | )
29 | """str: Pattern matching a RNA deletion with numeric or relative-to-transcript
30 | positions.
31 | """
32 | 
33 | rna_dup: str = (
34 |     rf"(?P<rna_dup>(?:(?:(?P<start>{pos_intron})_(?P<end>{pos_intron}))|"
35 |     + rf"(?P<position>{pos_intron}))dup)"
36 | )
37 | """str: Pattern matching a RNA duplication with numeric or relative-to-transcript
38 | positions.
39 | """
40 | 
41 | rna_ins: str = (
42 |     rf"(?P<rna_ins>(?P<start>{pos_intron})_(?P<end>{pos_intron})ins(?P<seq>{rna_nt}+))"
43 | )
44 | """str: Pattern matching a RNA insertion with numeric or relative-to-transcript
45 | positions.
46 | """
47 | 
48 | rna_delins: str = (
49 |     rf"(?P<rna_delins>(?:(?:(?P<start>{pos_intron})_(?P<end>{pos_intron}))|"
50 |     + rf"(?P<position>{pos_intron}))delins(?P<seq>{rna_nt}+))"
51 | )
52 | """str: Pattern matching a RNA deletion-insertion with numeric or relative-to-transcript
53 | positions.
54 | """
55 | 
56 | rna_variant: str = combine_patterns(
57 |     [rna_equal, rna_sub, rna_del, rna_dup, rna_ins, rna_delins], None
58 | )
59 | """str: Pattern matching any single RNA variant event.
60 | """
61 | 
62 | rna_single_variant: str = rf"(?P<rna>r\.{rna_variant})"
63 | """str: Pattern matching any complete RNA variant, including the prefix character.
64 | """
65 | 
66 | rna_multi_variant: str = (
67 |     rf"(?P<rna_multi>r\.\[{remove_named_groups(rna_variant)}"
68 |     + rf"(?:;{remove_named_groups(rna_variant)}){{1,}}\])"
69 | )
70 | """str: Pattern matching any complete RNA multi-variant, including the prefix character.
71 | 
72 | Named capture groups have been removed from the variant patterns because of
73 | non-uniqueness.
74 | Another applications of single-variant regular expressions is needed to recover the
75 | named groups from each individual variant in the multi-variant.
76 | """
77 | 


--------------------------------------------------------------------------------
/src/mavehgvs/patterns/util.py:
--------------------------------------------------------------------------------
 1 | """Utility functions for working with mavehgvs regex pattern strings.
 2 | """
 3 | 
 4 | import re
 5 | from typing import Sequence, Optional
 6 | 
 7 | 
 8 | def combine_patterns(patterns: Sequence[str], groupname: Optional[str] = None) -> str:
 9 |     """Combine multiple pattern strings into a single pattern string.
10 | 
11 |     Because multiple identical group names are not allowed in a pattern, the resulting
12 |     object renames all named match groups such they are prefixed with the first match
13 |     group name in the pattern. For example,
14 |     ``(?P<substitution>(?P<position>[1-9][0-9]*)...`` becomes
15 |     ``(?P<substitution>(?P<substitution_position>[1-9][0-9]*)...``.
16 | 
17 |     The function assumes that all input patterns are enclosed in parentheses.
18 | 
19 |     Parameters
20 |     ----------
21 |     patterns : Sequence[str]
22 |         Sequence of pattern strings to combine.
23 | 
24 |     groupname : Optional[str]
25 |         Name for the capture group surrounding the resulting pattern. If this is None, a
26 |         non-capturing group will be used instead.
27 | 
28 |     Returns
29 |     -------
30 |     str
31 |         Pattern string that matches any of the input patterns. Match groups are renamed
32 |         as described above to attempt to ensure uniqueness across the combined pattern.
33 | 
34 |     """
35 |     tag_re = re.compile(r"\(\?P<(\w+)>")
36 |     stripped_patterns = list()
37 |     for p in patterns:
38 |         tags = list(tag_re.finditer(p))
39 |         prefix = f"{tags[0].group(1)}_"
40 |         new_p = p
41 |         for t in tags[:0:-1]:
42 |             start, end = t.span(1)
43 |             new_p = "".join((new_p[:start], prefix, new_p[start:]))
44 |         stripped_patterns.append(new_p)
45 |     if groupname is None:
46 |         combined = rf"(?:{r'|'.join(stripped_patterns)})"
47 |     else:
48 |         combined = rf"(?P<{groupname}>{r'|'.join(stripped_patterns)})"
49 | 
50 |     return combined
51 | 
52 | 
53 | def remove_named_groups(pattern: str, noncapturing: bool = True) -> str:
54 |     """Function that replaces named match groups in a regular expression pattern.
55 | 
56 |     Named groups are replaced with either regular parentheses or non-capturing
57 |     parentheses.
58 | 
59 |     Parameters
60 |     ----------
61 |     pattern : str
62 |         The pattern string to strip match groups from.
63 | 
64 |     noncapturing : bool
65 |         If True, the named grouping parentheses are replaced by non-capturing
66 |         parentheses.
67 |         If False, regular parentheses are used.
68 | 
69 |     Returns
70 |     -------
71 |     str
72 |         The pattern string without named match groups.
73 | 
74 |     """
75 |     if noncapturing:
76 |         new_parens = "(?:"
77 |     else:
78 |         new_parens = "("
79 | 
80 |     return re.sub(r"\(\?P<\w+>", new_parens, pattern)
81 | 


--------------------------------------------------------------------------------
/src/mavehgvs/position.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from functools import total_ordering
  3 | 
  4 | from mavehgvs.exceptions import MaveHgvsParseError
  5 | from mavehgvs.patterns.position import pos
  6 | from mavehgvs.patterns.protein import amino_acid
  7 | 
  8 | __all__ = ["VariantPosition"]
  9 | 
 10 | pos_with_groups: str = (
 11 |     rf"(?P<position_aa>{amino_acid})?(?P<position>[*-]?{pos})"
 12 |     + rf"(?P<position_intron>[+-]{pos})?"
 13 | )
 14 | """str: Pattern matching a position with match groups for parsing into a
 15 | :py:class:`VariantPosition`.
 16 | """
 17 | 
 18 | 
 19 | @total_ordering
 20 | class VariantPosition:
 21 |     """Class for storing a variant position.
 22 | 
 23 |     The class includes special fields for variants using the extended position syntax.
 24 |     Attributes
 25 |     ----------
 26 |     position : Optional[int]
 27 |         The position as an integer.
 28 |         Negative positions are only expected for 5' UTR positions.
 29 |     amino_acid : Optional[str]
 30 |          The amino acid at this position for protein variants.
 31 |     intronic_position : Optional[int]
 32 |         The number of bases into the intron for intronic positions.
 33 |         None for non-intronic positions.
 34 | 
 35 |         Nucleotides in the 5' half of the intron have positive ``intronic_position`` and
 36 |         their position is that of the last base of the 5' exon.
 37 |         Nucleotides in the 3' half of the intron have negative ``intronic_position`` and
 38 |         their position is that of the first base of the 3' exon.
 39 |     utr : Optional[bool]
 40 |         True if the position is in the UTR. None for all other positions.
 41 | 
 42 |     """
 43 | 
 44 |     fullmatch = re.compile(pos_with_groups, flags=re.ASCII).fullmatch
 45 |     """Callable[[str, int, int], Optional[Match[str]]]: fullmatch callable for parsing
 46 |     positions
 47 | 
 48 |     Returns an :py:obj:`re.Match` object if the full string matches one of the position
 49 |     groups in :py:data:`pos_extended`.
 50 |     """
 51 | 
 52 |     def __init__(self, pos_str: str) -> None:
 53 |         """Parse a position string into a VariantPosition object.
 54 | 
 55 |         Parameters
 56 |         ----------
 57 |         pos_str : str
 58 |             The string to convert to a VariantPosition object.
 59 | 
 60 |         """
 61 |         try:
 62 |             gdict = VariantPosition.fullmatch(pos_str).groupdict()
 63 |         except AttributeError:
 64 |             raise MaveHgvsParseError(f"invalid variant position string '{pos_str}'")
 65 | 
 66 |         self.position = None
 67 |         self.amino_acid = None
 68 |         self.intronic_position = None
 69 |         self.utr = None
 70 | 
 71 |         if gdict["position"].startswith("*"):  # 3' UTR position
 72 |             self.utr = True
 73 |             self.position = int(gdict["position"][1:])
 74 |         else:
 75 |             if gdict["position"].startswith("-"):  # 5' UTR position
 76 |                 self.utr = True
 77 |             self.position = int(gdict["position"])
 78 | 
 79 |         if gdict["position_aa"] is not None:
 80 |             self.amino_acid = gdict["position_aa"]
 81 | 
 82 |         if gdict["position_intron"] is not None:
 83 |             self.intronic_position = int(gdict["position_intron"])
 84 | 
 85 |         if self.amino_acid is not None and (
 86 |             self.intronic_position is not None or self.utr is not None
 87 |         ):
 88 |             raise MaveHgvsParseError("invalid variant")
 89 | 
 90 |     def __repr__(self) -> str:
 91 |         """The object representation is equivalent to the input string.
 92 | 
 93 |         Returns
 94 |         -------
 95 |         str
 96 |             The object representation.
 97 | 
 98 |         """
 99 |         if self.utr and self.position > 0:
100 |             p = f"*{self.position}"
101 |         else:
102 |             p = f"{self.position}"
103 | 
104 |         if self.intronic_position is not None:
105 |             if self.intronic_position > 0:
106 |                 return f"{p}+{self.intronic_position}"
107 |             else:
108 |                 return f"{p}{self.intronic_position}"
109 |         elif self.amino_acid is not None:
110 |             return f"{self.amino_acid}{p}"
111 |         else:
112 |             return p
113 | 
114 |     def __lt__(self, other: "VariantPosition") -> bool:
115 |         """Less than comparison operator.
116 | 
117 |         Other comparison operators will be filled in using
118 |         :py:func:`functools.total_ordering`.
119 | 
120 |         Parameters
121 |         ----------
122 |         other : VariantPosition
123 |             The other VariantPosition to compare to.
124 | 
125 |         Returns
126 |         -------
127 |         bool
128 |             True if this position evaluates as strictly less than the other position;
129 |             else False.
130 | 
131 |         """
132 |         if self.utr == other.utr:
133 |             if self.position == other.position:
134 |                 if (
135 |                     self.intronic_position == other.intronic_position
136 |                 ):  # pragma: no cover
137 |                     # this case is covered by __eq__
138 |                     return False
139 |                 elif self.intronic_position is None:
140 |                     return other.intronic_position > 0
141 |                 elif other.intronic_position is None:
142 |                     return self.intronic_position < 0
143 |                 else:
144 |                     return self.intronic_position < other.intronic_position
145 |             else:
146 |                 return self.position < other.position
147 |         else:  # 5' < non-UTR < 3'
148 |             if self.utr:
149 |                 if self.position < 0:  # self is in 5' UTR
150 |                     return True
151 |                 else:  # self is in 3' UTR
152 |                     return False
153 |             else:
154 |                 if other.position < 0:  # other is in 5' UTR
155 |                     return False
156 |                 else:  # other is in 3' UTR
157 |                     return True
158 | 
159 |     def __eq__(self, other: "VariantPosition") -> bool:
160 |         """Equality comparison operator.
161 | 
162 |         Note that the amino acid portion of a protein position is not used in this
163 |         comparison.
164 | 
165 |         Other comparison operators will be filled in using
166 |         :py:func:`functools.total_ordering`.
167 | 
168 |         Parameters
169 |         ----------
170 |         other : VariantPosition
171 |             The other VariantPosition to compare to.
172 | 
173 |         Returns
174 |         -------
175 |         bool
176 |             True if this position is the same as the other position; else False.
177 | 
178 |         """
179 |         return (self.position, self.intronic_position, self.utr) == (
180 |             other.position,
181 |             other.intronic_position,
182 |             other.utr,
183 |         )
184 | 
185 |     def __ne__(self, other: "VariantPosition") -> bool:
186 |         """Not equal comparison operator.
187 | 
188 |         Note that the amino acid portion of a protein position is not used in this
189 |         comparison.
190 | 
191 |         Other comparison operators will be filled in using
192 |         :py:func:`functools.total_ordering`.
193 | 
194 |         Parameters
195 |         ----------
196 |         other : VariantPosition
197 |             The other VariantPosition to compare to.
198 | 
199 |         Returns
200 |         -------
201 |         bool
202 |             True if this position is not the same as the other position; else False.
203 | 
204 |         """
205 |         return (self.position, self.intronic_position, self.utr) != (
206 |             other.position,
207 |             other.intronic_position,
208 |             other.utr,
209 |         )
210 | 
211 |     def is_utr(self) -> bool:
212 |         """Return whether this is a UTR position.
213 | 
214 |         Returns
215 |         -------
216 |         bool
217 |             True if the object describes a position in the UTR; else False.
218 | 
219 |         """
220 |         return self.utr is not None
221 | 
222 |     def is_intronic(self) -> bool:
223 |         """Return whether this is an intronic position.
224 | 
225 |         Returns
226 |         -------
227 |         bool
228 |             True if the object describes a position in an intron; else False.
229 | 
230 |         """
231 |         return self.intronic_position is not None
232 | 
233 |     def is_protein(self) -> bool:
234 |         """Return whether this is a protein position
235 | 
236 |         Returns
237 |         -------
238 |         bool
239 |             True if the object describes a position with an amino acid component; else
240 |             False.
241 |         """
242 |         return self.amino_acid is not None
243 | 
244 |     def is_extended(self) -> bool:
245 |         """Return whether this position was described using the extended syntax.
246 | 
247 |         Returns
248 |         -------
249 |         bool
250 |             True if the position was described using the extended syntax; else False.
251 | 
252 |         """
253 |         return self.utr is not None or self.intronic_position is not None
254 | 
255 |     # string annotation in the type hint below is required for Python 3.6 compatibility
256 |     def is_adjacent(self, other: "VariantPosition") -> bool:
257 |         """Return whether this variant and another are immediately adjacent in sequence
258 |         space.
259 | 
260 |         The following special cases are not handled correctly:
261 | 
262 |         * The special case involving the last variant in a transcript sequence and the
263 |           first base in the 3' UTR will be evaluated as not adjacent, as the object does
264 |           not have sequence length information.
265 |         * The special case involving the two middle bases in an intron where the
266 |           numbering switches from positive with respect to the 5' end of the intron to
267 |           negative with respect to the 3' end of the intron will be evaluated as not
268 |           adjacent, as the object does not have intron length information.
269 |         * This ignores the special case where there is an intron between the last base
270 |           of the 5' UTR and the first base of the coding sequence because it is not
271 |           biologically relevant to the best of my knowledge.
272 | 
273 |         Parameters
274 |         ----------
275 |         other : VariantPosition
276 |             The object to calculate adjacency to.
277 | 
278 |         Returns
279 |         -------
280 |         bool
281 |             True if the positions describe adjacent bases in sequence space; else False.
282 | 
283 |         """
284 |         if self.utr == other.utr:
285 |             if self.intronic_position is None and other.intronic_position is None:
286 |                 return abs(self.position - other.position) == 1
287 |             elif (
288 |                 self.position == other.position
289 |             ):  # intronic positions can only be adjacent if relative to the same base
290 |                 if (
291 |                     self.intronic_position is not None
292 |                     and other.intronic_position is not None
293 |                 ):
294 |                     return abs(self.intronic_position - other.intronic_position) == 1
295 |                 else:
296 |                     # special case for first/last base of intron and
297 |                     # corresponding first/last base of exon
298 |                     return (
299 |                         self.intronic_position == -1
300 |                         or self.intronic_position == 1
301 |                         or other.intronic_position == -1
302 |                         or other.intronic_position == 1
303 |                     )
304 |             else:
305 |                 return False
306 |         else:  # special case for last base of 5' utr and first base of non-UTR sequence
307 |             return (self.position == -1 and other.position == 1) or (
308 |                 other.position == -1 and self.position == 1
309 |             )
310 | 


--------------------------------------------------------------------------------
/src/mavehgvs/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VariantEffect/mavehgvs/69476dde5391022e7c0eca32ecd1734e371436eb/src/mavehgvs/py.typed


--------------------------------------------------------------------------------
/src/mavehgvs/util.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Tuple, Optional, Iterable
 2 | 
 3 | from mavehgvs.variant import Variant
 4 | from mavehgvs.exceptions import MaveHgvsParseError
 5 | 
 6 | __all__ = ["parse_variant_strings"]
 7 | 
 8 | 
 9 | def parse_variant_strings(
10 |     variants: Iterable[str],
11 |     targetseq: Optional[str] = None,
12 |     expected_prefix: Optional[str] = None,
13 | ) -> Tuple[List[Optional[Variant]], List[Optional[str]]]:
14 |     """Parse a list of MAVE-HGVS strings into Variant objects or error messages.
15 | 
16 |     Parameters
17 |     ----------
18 |     variants : Iterable[str]
19 |         Iterable of MAVE-HGVS strings to parse.
20 | 
21 |     targetseq : Optional[str]
22 |         If provided, all variants will be validated for agreement with this sequence.
23 |         See the documentation for :py:class:`Variant` for further details.
24 | 
25 |     expected_prefix : Optional[str]
26 |         If provided, all variants will be expected to have the same single-letter
27 |         prefix.
28 |         Variants that do not have this prefix will be treated as invalid.
29 | 
30 |     Returns
31 |     -------
32 |     Tuple[List[Optional[Variant]], List[Optional[str]]]
33 |         Returns a pair of lists containing variants or error messages.
34 | 
35 |         Both lists have the same length as the input list.
36 |         The first list contains Variant objects if the string was successfully parsed;
37 |         else None.
38 |         The second list contains None if the string was successfully parsed; else the
39 |         error message.
40 | 
41 |     """
42 |     if expected_prefix is not None and expected_prefix not in list("cgmnopr"):
43 |         raise ValueError("invalid expected prefix")
44 | 
45 |     valid = list()
46 |     invalid = list()
47 | 
48 |     for s in variants:
49 |         try:
50 |             v = Variant(s, targetseq=targetseq)
51 |         except MaveHgvsParseError as error:
52 |             valid.append(None)
53 |             invalid.append(str(error))
54 |         else:
55 |             if expected_prefix is not None and v.prefix != expected_prefix:
56 |                 valid.append(None)
57 |                 invalid.append("unexpected variant prefix")
58 |             else:
59 |                 valid.append(v)
60 |                 invalid.append(None)
61 | 
62 |     return valid, invalid
63 | 


--------------------------------------------------------------------------------
/src/mavehgvs/variant.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import itertools
  3 | from typing import Optional, Union, List, Tuple, Mapping, Any, Sequence, Dict, Generator
  4 | 
  5 | from fqfa.constants import AA_CODES
  6 | 
  7 | from mavehgvs.position import VariantPosition
  8 | from mavehgvs.patterns.combined import any_variant
  9 | from mavehgvs.exceptions import MaveHgvsParseError
 10 | 
 11 | __all__ = ["Variant"]
 12 | 
 13 | AA_3_TO_1 = {value: key for key, value in AA_CODES.items()}
 14 | """Dict[str, str]: for converting three-letter amino acid codes to single-letter codes.
 15 | """
 16 | 
 17 | 
 18 | class Variant:
 19 |     fullmatch = re.compile(any_variant, flags=re.ASCII).fullmatch
 20 |     """Callable[[str, int, int], Optional[Match[str]]]: fullmatch callable for parsing a
 21 |     single MAVE-HGVS variant
 22 | 
 23 |     Returns an :py:obj:`re.Match` object if the full string defines a valid MAVE-HGVS
 24 |     variant.
 25 |     Match groups in the result can be used to extract components of the variant.
 26 |     """
 27 | 
 28 |     VTYPES = (
 29 |         "equal",  # equality
 30 |         "sub",  # substitution
 31 |         "fs",  # frame shift
 32 |         "del",  # deletion
 33 |         "dup",  # duplication
 34 |         "ins",  # insertion
 35 |         "delins",  # deletion-insertion
 36 |     )
 37 |     """Tuple[str]: variant type tags used in MAVE-HGVS patterns and variant type names.
 38 |     """
 39 | 
 40 |     def __init__(  # noqa: max-complexity: 37
 41 |         self,
 42 |         s: Union[str, Mapping[str, Any], Sequence[Mapping[str, Any]]],
 43 |         targetseq: Optional[str] = None,
 44 |         relaxed_ordering: bool = False,
 45 |     ):
 46 |         """Convert a MAVE-HGVS variant string into a corresponding object with named
 47 |         fields.
 48 | 
 49 |         Parameters
 50 |         ----------
 51 |         s : Union[str, Mapping[str, Any], Sequence[Mapping[str, Any]]]
 52 |             MAVE-HGVS variant string to convert into an object, dictionary type object
 53 |             containing key-value pairs corresponding to a MAVE-HGVS object, or
 54 |             list/tuple of dictionary type objects for a variant with multiple events.
 55 | 
 56 |         targetseq : Optional[str]
 57 |             If provided, the variant will be validated for agreement with this sequence.
 58 |             Target sequence validation is not supported for variants using the extended
 59 |             position syntax.
 60 | 
 61 |             This must be an amino acid sequence for protein variants or a nucleotide
 62 |             sequence for coding/noncoding/genomic variants.
 63 |             DNA and amino acid sequences should be in uppercase, RNA in lowercase.
 64 | 
 65 |         relaxed_ordering : bool
 66 |             If True, variants that do not observe the 3-prime rule for variant position
 67 |             ordering are allowed.
 68 |             The object representation will observe the 3-prime rule, so it may differ
 69 |             from the input string in this case.
 70 | 
 71 |         """
 72 |         if isinstance(s, str):  # variant string to parse
 73 |             variant_string = s
 74 |         elif isinstance(s, Mapping):  # dictionary-style single variant
 75 |             variant_string = self._variant_dictionary_to_string(s, include_prefix=True)
 76 |         elif isinstance(s, Sequence):  # dictionary-style multi-variant
 77 |             if not all(isinstance(v, Mapping) for v in s):
 78 |                 raise ValueError("multi-variant iterable must contain Mapping objects")
 79 |             try:
 80 |                 all_prefixes = [v["prefix"] for v in s]
 81 |             except KeyError:
 82 |                 raise MaveHgvsParseError("variant dictionary missing required keys")
 83 |             if len(set(all_prefixes)) != 1:
 84 |                 raise MaveHgvsParseError(
 85 |                     "cannot combine variants with different prefixes"
 86 |                 )
 87 |             multivariants = ";".join(
 88 |                 self._variant_dictionary_to_string(v, include_prefix=False) for v in s
 89 |             )
 90 |             variant_string = f"{s[0]['prefix']}.[{multivariants}]"
 91 |         else:
 92 |             raise ValueError("can only create Variants from string or Mapping objects")
 93 | 
 94 |         variant_match = self.fullmatch(variant_string)
 95 |         if variant_match is None:
 96 |             raise MaveHgvsParseError("failed regular expression validation")
 97 |         else:
 98 |             match_dict = variant_match.groupdict()
 99 | 
100 |             # set target id if present
101 |             if match_dict["target_id"] is not None:
102 |                 self._target_id = match_dict["target_id"]
103 |             else:
104 |                 self._target_id = None
105 | 
106 |             # set prefix and determine if this is a multi-variant
107 |             if match_dict["single_variant"] is not None:
108 |                 self.variant_count = 1
109 |                 self._prefix = match_dict["single_variant"][0]
110 |             elif match_dict["multi_variant"] is not None:
111 |                 self.variant_count = len(variant_string.split(";"))
112 |                 self._prefix = match_dict["multi_variant"][0]
113 |             else:  # pragma: no cover
114 |                 raise ValueError("invalid match type")
115 | 
116 |             if self.variant_count == 1:
117 |                 (
118 |                     self._variant_types,
119 |                     self._positions,
120 |                     self._sequences,
121 |                 ) = self._process_string_variant(
122 |                     match_dict, relaxed_ordering=relaxed_ordering
123 |                 )
124 |             elif self.variant_count > 1:
125 |                 self._variant_types = list()
126 |                 self._positions = list()
127 |                 self._sequences = list()
128 | 
129 |                 # format each individual variant event as a single variant and parse it
130 |                 for variant_substring in match_dict["multi_variant"][3:-1].split(";"):
131 |                     groupdict = self.fullmatch(
132 |                         f"{self._prefix}.{variant_substring}"
133 |                     ).groupdict()
134 |                     vt, p, s = self._process_string_variant(
135 |                         groupdict, relaxed_ordering=relaxed_ordering
136 |                     )
137 |                     if vt == "equal":
138 |                         raise MaveHgvsParseError(
139 |                             "multi-variants cannot contain target-identical variants"
140 |                         )
141 | 
142 |                     self._variant_types.append(vt)
143 |                     self._positions.append(p)
144 |                     self._sequences.append(s)
145 | 
146 |                 # ensure that multiple variants aren't defined for the same positions
147 |                 for vp1, vp2 in itertools.combinations(self._positions, 2):
148 |                     if isinstance(vp1, VariantPosition) and isinstance(
149 |                         vp2, VariantPosition
150 |                     ):  # both single position
151 |                         if vp1 == vp2:
152 |                             raise MaveHgvsParseError(
153 |                                 "multi-variant has multiple changes at same position"
154 |                             )
155 |                     elif isinstance(vp1, VariantPosition) and isinstance(vp2, Tuple):
156 |                         if vp2[0] <= vp1 <= vp2[1]:
157 |                             raise MaveHgvsParseError(
158 |                                 "multi-variant has overlapping changes"
159 |                             )
160 |                     elif isinstance(vp1, Tuple) and isinstance(vp2, VariantPosition):
161 |                         if vp1[0] <= vp2 <= vp1[1]:
162 |                             raise MaveHgvsParseError(
163 |                                 "multi-variant has overlapping changes"
164 |                             )
165 |                     elif isinstance(vp1, Tuple) and isinstance(vp2, Tuple):
166 |                         if (
167 |                             vp1[0] <= vp2[0] <= vp1[1]
168 |                             or vp1[0] <= vp2[1] <= vp1[1]
169 |                             or vp2[0] <= vp1[0] <= vp2[1]
170 |                             or vp2[0] <= vp1[1] <= vp2[1]
171 |                         ):
172 |                             raise MaveHgvsParseError(
173 |                                 "multi-variant has overlapping changes"
174 |                             )
175 |                     else:  # pragma: no cover
176 |                         raise ValueError("invalid position type")
177 | 
178 |                 # re-order variants and validate
179 |                 def sort_key(x):
180 |                     if isinstance(x[1], VariantPosition):
181 |                         return x[1]
182 |                     elif isinstance(x[1], Tuple):
183 |                         return x[1][0]
184 |                     else:  # pragma: no cover
185 |                         raise ValueError("invalid position type")
186 | 
187 |                 variant_list = list(self.variant_tuples())
188 |                 ordered_list = sorted(variant_list, key=sort_key)
189 |                 if variant_list != ordered_list:
190 |                     if relaxed_ordering:
191 |                         self._variant_types = [x[0] for x in ordered_list]
192 |                         self._positions = [x[1] for x in ordered_list]
193 |                         self._sequences = [x[2] for x in ordered_list]
194 |                     else:
195 |                         raise MaveHgvsParseError("multi-variants not in sorted order")
196 | 
197 |                 # make sure there is at most one frame shift
198 |                 if sum(x == "fs" for x in self._variant_types) > 1:
199 |                     raise MaveHgvsParseError("maximum of one frame shift is permitted")
200 | 
201 |                 # make sure the frame shift is last if present
202 |                 if any(x == "fs" for x in self._variant_types):
203 |                     if self._variant_types[-1] != "fs":
204 |                         raise MaveHgvsParseError(
205 |                             "no variants are permitted to follow a frame shift"
206 |                         )
207 | 
208 |             else:  # pragma: no cover
209 |                 raise ValueError("invalid variant count")
210 | 
211 |         if targetseq is not None:
212 |             for vtype, pos, seq in self.variant_tuples():
213 |                 if self._prefix != "p" and vtype == "sub":
214 |                     self._target_validate(pos, seq[0], targetseq)
215 |                 elif (
216 |                     pos is None and vtype == "equal"
217 |                 ):  # special case for full-length target identical variants
218 |                     pass
219 |                 else:
220 |                     self._target_validate(pos, None, targetseq)
221 | 
222 |     def variant_tuples(
223 |         self,
224 |     ) -> Generator[
225 |         Tuple[
226 |             str,
227 |             Optional[Union[VariantPosition, Tuple[VariantPosition, VariantPosition]]],
228 |             Optional[Union[str, Tuple[str, str]]],
229 |         ],
230 |         None,
231 |         None,
232 |     ]:
233 |         """Generator that yields tuples containing the variant components.
234 | 
235 |         Yields
236 |         ------
237 |         Tuple
238 |             Tuple of the variant type, position(s), and sequence(s) for each element in
239 |             the variant.
240 | 
241 |         """
242 |         if self.is_multi_variant():
243 |             for vtype, pos, seq in zip(
244 |                 self._variant_types, self._positions, self._sequences
245 |             ):
246 |                 yield vtype, pos, seq
247 |         else:
248 |             yield self._variant_types, self._positions, self._sequences
249 | 
250 |     def _process_string_variant(  # noqa: max-complexity: 23
251 |         self, match_dict: Dict[str, str], relaxed_ordering: bool
252 |     ) -> Tuple[
253 |         str,
254 |         Optional[Union[VariantPosition, Tuple[VariantPosition, VariantPosition]]],
255 |         Optional[Union[str, Tuple[str, str]]],
256 |     ]:
257 |         """Process the match dictionary from a single variant into its components.
258 | 
259 |         Parameters
260 |         ----------
261 |         match_dict : Dict[str, str]
262 |             Match dictionary from the MAVE-HGVS regular expression.
263 |         relaxed_ordering : bool
264 |             If True, variants that do not observe the 3-prime rule for variant position
265 |             ordering are allowed.
266 | 
267 |         Returns
268 |         -------
269 |         Tuple[str, Optional[Union[VariantPosition, Tuple[VariantPosition, \
270 |         VariantPosition]]], Optional[Union[str, Tuple[str, str]]]]
271 |             Returns a 3-tuple containing the variant type, optional position (or
272 |             start/end positions), and optional before/after substitution sequences or
273 |             inserted sequence.
274 | 
275 |         """
276 |         variant_type = None
277 |         positions = None
278 |         sequences = None
279 | 
280 |         # determine which named groups to check
281 |         if self._prefix == "p":
282 |             pattern_group_tuples = [(f"pro_{t}", t) for t in self.VTYPES]
283 |         elif self._prefix == "r":
284 |             pattern_group_tuples = [(f"rna_{t}", t) for t in self.VTYPES if t != "fs"]
285 |         elif self._prefix in tuple("cn"):
286 |             pattern_group_tuples = [
287 |                 (f"dna_{t}_{self._prefix}", t) for t in self.VTYPES if t != "fs"
288 |             ]
289 |         elif self._prefix in tuple("gmo"):
290 |             pattern_group_tuples = [
291 |                 (f"dna_{t}_gmo", t) for t in self.VTYPES if t != "fs"
292 |             ]
293 |         else:  # pragma: no cover
294 |             raise ValueError("unexpected prefix")
295 | 
296 |         # set the variant type
297 |         vtype_set = False
298 |         pattern_group = None
299 |         for pg, vtype in pattern_group_tuples:
300 |             if match_dict[pg] is not None:
301 |                 if vtype_set:  # pragma: no cover
302 |                     raise ValueError(f"ambiguous match: '{pg}' and '{pattern_group}'")
303 |                 variant_type = vtype
304 |                 pattern_group = pg
305 |                 vtype_set = True
306 | 
307 |         # set the position and sequence
308 |         if variant_type == "sub":
309 |             positions = VariantPosition(match_dict[f"{pattern_group}_position"])
310 |             if self._prefix == "p":
311 |                 sequences = (positions.amino_acid, match_dict[f"{pattern_group}_new"])
312 |             elif self._prefix in tuple("gmocnr"):
313 |                 sequences = (
314 |                     match_dict[f"{pattern_group}_ref"],
315 |                     match_dict[f"{pattern_group}_new"],
316 |                 )
317 |             else:  # pragma: no cover
318 |                 raise ValueError("unexpected prefix")
319 |         elif variant_type in ("equal", "fs", "del", "dup", "ins", "delins"):
320 |             # set position
321 |             if (
322 |                 match_dict.get(f"{pattern_group}_position") is not None
323 |             ):  # use get() since ins pattern doesn't have pos
324 |                 positions = VariantPosition(match_dict[f"{pattern_group}_position"])
325 |             elif (
326 |                 match_dict.get(f"{pattern_group}_start") is not None
327 |                 and match_dict.get(f"{pattern_group}_end") is not None
328 |             ):
329 |                 positions = (
330 |                     VariantPosition(match_dict[f"{pattern_group}_start"]),
331 |                     VariantPosition(match_dict[f"{pattern_group}_end"]),
332 |                 )
333 |                 # extra validation on positions
334 |                 if positions[0] >= positions[1]:
335 |                     if relaxed_ordering:
336 |                         positions = (positions[1], positions[0])
337 |                     else:
338 |                         raise MaveHgvsParseError(
339 |                             "start position must be before end position"
340 |                         )
341 |                 if variant_type == "ins":
342 |                     if not positions[0].is_adjacent(positions[1]):
343 |                         raise MaveHgvsParseError("insertion positions must be adjacent")
344 |             else:  # pragma: no cover
345 |                 if variant_type != "equal":
346 |                     raise MaveHgvsParseError("variant position not found")
347 | 
348 |             # set sequence if needed
349 |             if variant_type in ("ins", "delins"):
350 |                 sequences = match_dict[f"{pattern_group}_seq"]
351 |             elif variant_type == "equal":
352 |                 if (
353 |                     match_dict[f"{pattern_group}_equal"] is not None
354 |                 ):  # special case for target identity
355 |                     sequences = match_dict[f"{pattern_group}_equal"]
356 |                 elif match_dict["pro_equal_equal_sy"] is not None:
357 |                     sequences = match_dict["pro_equal_equal_sy"]
358 | 
359 |         return variant_type, positions, sequences
360 | 
361 |     # TODO: API documentation for the dictionary objects
362 |     @staticmethod
363 |     def _variant_dictionary_to_string(  # noqa: max-complexity: 25
364 |         vdict: Mapping[str, Any], include_prefix: bool
365 |     ) -> str:
366 |         """Convert a match dictionary from a single variant into a string for further
367 |         validation.
368 | 
369 |         This method performs minimal validation of the values provided in the input, and
370 |         instead converts it into a variant string that is validated using the regular
371 |         expression based validators.
372 | 
373 |         Parameters
374 |         ----------
375 |         vdict : Mapping[str, Any]
376 |             Key-value pairs describing a single variant.
377 |         include_prefix: bool
378 |             If True, the variant prefix and '.' will be included in the string; else it
379 |             is omitted (for use with multi-variants).
380 | 
381 |         Returns
382 |         -------
383 |         str
384 |             A string representing this variant.
385 | 
386 |         Raises
387 |         ------
388 |         MaveHgvsParseError
389 |             If the dictionary does not have a valid set of keys.
390 | 
391 |         """
392 |         try:
393 |             variant_type = vdict["variant_type"]
394 |             prefix = vdict["prefix"]
395 |         except KeyError:
396 |             raise MaveHgvsParseError("variant dictionary missing required keys")
397 | 
398 |         if variant_type == "equal":
399 |             expected_keys = ["variant_type", "prefix"]
400 |             if prefix == "p":
401 |                 expected_keys.extend(["position", "target"])
402 |             else:
403 |                 expected_keys.extend(["start_position", "end_position"])
404 |             if sorted(vdict.keys()) != sorted(expected_keys):
405 |                 raise MaveHgvsParseError("variant dictionary contains invalid keys")
406 |             if prefix == "p":
407 |                 variant_string = f"{vdict['target']}{vdict['position']}="
408 |             elif vdict["start_position"] == vdict["end_position"]:
409 |                 variant_string = f"{vdict['start_position']}="
410 |             else:
411 |                 variant_string = f"{vdict['start_position']}_{vdict['end_position']}="
412 |         elif variant_type == "sub":
413 |             if sorted(vdict.keys()) != sorted(
414 |                 ["variant_type", "prefix", "position", "target", "variant"]
415 |             ):
416 |                 raise MaveHgvsParseError("variant dictionary contains invalid keys")
417 |             if prefix == "p":
418 |                 variant_string = (
419 |                     f"{vdict['target']}{vdict['position']}{vdict['variant']}"
420 |                 )
421 |             else:
422 |                 variant_string = (
423 |                     f"{vdict['position']}{vdict['target']}>{vdict['variant']}"
424 |                 )
425 |         elif variant_type == "fs":
426 |             if sorted(vdict.keys()) != sorted(
427 |                 ["variant_type", "prefix", "position", "target"]
428 |             ):
429 |                 raise MaveHgvsParseError("variant dictionary contains invalid keys")
430 |             if prefix == "p":
431 |                 variant_string = f"{vdict['target']}{vdict['position']}fs"
432 |             else:
433 |                 raise MaveHgvsParseError(
434 |                     "frame shifts are only supported for protein variants"
435 |                 )
436 |         elif variant_type in ("del", "dup"):
437 |             expected_keys = ["variant_type", "prefix", "start_position", "end_position"]
438 |             if prefix == "p":
439 |                 expected_keys.extend(["start_target", "end_target"])
440 |             if sorted(vdict.keys()) != sorted(expected_keys):
441 |                 raise MaveHgvsParseError("variant dictionary contains invalid keys")
442 |             if prefix == "p":
443 |                 start = f"{vdict['start_target']}{vdict['start_position']}"
444 |                 end = f"{vdict['end_target']}{vdict['end_position']}"
445 |             else:
446 |                 start = vdict["start_position"]
447 |                 end = vdict["end_position"]
448 |             if start == end:
449 |                 variant_string = f"{start}{variant_type}"
450 |             else:
451 |                 variant_string = f"{start}_{end}{variant_type}"
452 |         elif variant_type in ("ins", "delins"):
453 |             expected_keys = [
454 |                 "variant_type",
455 |                 "prefix",
456 |                 "start_position",
457 |                 "end_position",
458 |                 "variant",
459 |             ]
460 |             if prefix == "p":
461 |                 expected_keys.extend(["start_target", "end_target"])
462 |             if sorted(vdict.keys()) != sorted(expected_keys):
463 |                 raise MaveHgvsParseError("variant dictionary contains invalid keys")
464 |             if prefix == "p":
465 |                 start = f"{vdict['start_target']}{vdict['start_position']}"
466 |                 end = f"{vdict['end_target']}{vdict['end_position']}"
467 |             else:
468 |                 start = vdict["start_position"]
469 |                 end = vdict["end_position"]
470 |             if start == end and variant_type == "delins":
471 |                 variant_string = f"{start}{variant_type}{vdict['variant']}"
472 |             else:
473 |                 variant_string = f"{start}_{end}{variant_type}{vdict['variant']}"
474 |         else:
475 |             raise MaveHgvsParseError("invalid variant type")
476 | 
477 |         if include_prefix:
478 |             return f"{vdict['prefix']}.{variant_string}"
479 |         else:
480 |             return variant_string
481 | 
482 |     def _format_component_variants(self) -> List[str]:  # noqa: max-complexity: 14
483 |         """Format each of the component variants of this variant into a variant string.
484 | 
485 |         The result is a list of strings, each representing a single variant. If this
486 |         variant is a single variant, the list will contain a single element equivalent
487 |         to the input string. For multi-variants, the list will contain each component
488 |         variant of the variant.
489 | 
490 |         Returns
491 |         -------
492 |         List[str]
493 |             List of formatted component variants.
494 | 
495 |         """
496 | 
497 |         def format_variant(
498 |             vtype: str,
499 |             pos: Union[VariantPosition, Tuple[VariantPosition, VariantPosition]],
500 |             seq: Optional[Union[str, Tuple[str, str]]],
501 |         ) -> str:
502 |             """Helper function for building variant strings.
503 | 
504 |             Parameters
505 |             ----------
506 |             vtype : str
507 |                 The variant type, as described by :py:obj:`Variant.__vtypes`
508 |             pos : Union[VariantPosition, Tuple[VariantPosition, VariantPosition]]
509 |                 The position or pair of positions describing the variant.
510 |             seq : Optional[Union[str, Tuple[str, str]]]
511 |                 The sequence or pair of sequences describing the variant.
512 |                 Only used for substitions, insertions, and deletion-insertions.
513 | 
514 |             Returns
515 |             -------
516 |             str
517 |                 A string representing this variant element.
518 | 
519 |             """
520 |             if vtype == "sub":
521 |                 if self._prefix == "p":  # protein variant
522 |                     return f"{pos}{seq[1]}"
523 |                 else:  # nucleotide variant
524 |                     return f"{pos}{seq[0]}>{seq[1]}"
525 |             elif vtype == "fs":
526 |                 return f"{pos}fs"
527 |             elif vtype in ("del", "dup"):
528 |                 if isinstance(pos, tuple):
529 |                     return f"{pos[0]}_{pos[1]}{vtype}"
530 |                 else:
531 |                     return f"{pos}{vtype}"
532 |             elif vtype in ("ins", "delins"):
533 |                 if isinstance(pos, tuple):
534 |                     return f"{pos[0]}_{pos[1]}{vtype}{seq}"
535 |                 else:
536 |                     return f"{pos}{vtype}{seq}"
537 |             elif vtype == "equal":
538 |                 if pos is None:
539 |                     return f"{seq}"
540 |                 elif isinstance(pos, tuple):
541 |                     return f"{pos[0]}_{pos[1]}{seq}"
542 |                 else:
543 |                     return f"{pos}{seq}"
544 |             else:  # pragma: no cover
545 |                 raise ValueError("invalid variant type")
546 | 
547 |         return [format_variant(*t) for t in self.variant_tuples()]
548 | 
549 |     def __eq__(self, other: "Variant") -> bool:
550 |         """Equality comparison operator.
551 | 
552 |         Parameters
553 |         ----------
554 |         other : Variant
555 |             The other Variant to compare to.
556 | 
557 |         Returns
558 |         -------
559 |         bool
560 |             True if this variant is the same as the other position; else False.
561 | 
562 |         """
563 |         return (
564 |             self._target_id,
565 |             self.variant_count,
566 |             self._prefix,
567 |             self._variant_types,
568 |             self._positions,
569 |             self._sequences,
570 |         ) == (
571 |             other._target_id,
572 |             other.variant_count,
573 |             other._prefix,
574 |             other._variant_types,
575 |             other._positions,
576 |             other._sequences,
577 |         )
578 | 
579 |     def __repr__(self) -> str:
580 |         """The object representation is equivalent to the input string.
581 | 
582 |         Returns
583 |         -------
584 |         str
585 |             The object representation.
586 | 
587 |         """
588 | 
589 |         elements = self._format_component_variants()
590 | 
591 |         if self._target_id is not None:
592 |             prefix = f"{self._target_id}:{self._prefix}"
593 |         else:
594 |             prefix = f"{self._prefix}"
595 | 
596 |         if self.is_multi_variant():
597 |             return f"{prefix}.[{';'.join(elements)}]"
598 |         else:
599 |             return f"{prefix}.{elements[0]}"
600 | 
601 |     @staticmethod
602 |     def _target_validate(
603 |         pos: Union[VariantPosition, Tuple[VariantPosition, VariantPosition]],
604 |         ref: Optional[str],
605 |         target: str,
606 |     ) -> None:
607 |         """Determine whether the target portion of a variant matches the target
608 |         sequence.
609 | 
610 |         Note that variants using extended syntax cannot be validated with this method.
611 |         If an extended syntax variant is encountered, it will be interpreted as
612 |         valid/matching.
613 | 
614 |         Parameters
615 |         ----------
616 |         pos : Union[VariantPosition, Tuple[VariantPosition, VariantPosition]]
617 |             Single variant position or start/end tuple for an indel.
618 |         ref : Optional[str]
619 |             Reference base to validate for nucleotide substitutions.
620 |             This should be None for amino acid substitutions, since the reference is
621 |             included in the VariantPosition.
622 |         target : str
623 |             Target sequence. This must be an amino acid sequence for protein variants or
624 |             a nucleotide sequence for coding/noncoding/genomic variants.
625 |             RNA sequences should be in lowercase, DNA sequences should be in uppercase.
626 | 
627 |         Returns
628 |         -------
629 |         None
630 | 
631 |         Raises
632 |         ------
633 |         MaveHgvsParseError
634 |             If the reference base or amino acid does not match the target at the given
635 |             position
636 |         MaveHgvsParseError
637 |             If the position is outside the bounds of the target.
638 | 
639 |         """
640 |         if not isinstance(pos, tuple):
641 |             pos = (pos,)
642 | 
643 |         if any(p.is_extended() for p in pos):
644 |             return
645 |         elif any(p.position > len(target) for p in pos):
646 |             raise MaveHgvsParseError("variant coordinate out of bounds")
647 |         else:
648 |             if ref is not None and len(pos) == 1:  # nucleotide substitution
649 |                 if target[pos[0].position - 1] != ref:
650 |                     raise MaveHgvsParseError("variant reference does not match target")
651 |             elif pos[0].amino_acid is not None:  # protein variant
652 |                 for p in pos:
653 |                     if target[p.position - 1] != AA_3_TO_1[p.amino_acid]:
654 |                         raise MaveHgvsParseError(
655 |                             "variant reference does not match target"
656 |                         )
657 |             else:
658 |                 return
659 | 
660 |     def is_target_identical(self) -> bool:
661 |         """Return whether the variant describes the "wild-type" sequence or is the
662 |         special synonymous variant.
663 | 
664 |         This is the variant described with only the equals sign (e.g. ``c.=``)
665 |         or the uncertain equals protein variant (e.g. ``p.(=)``).
666 | 
667 |         Coding or genomic variants that specify an identical region (e.g. ``c.1_3=`` are
668 |         also considered target identical.
669 | 
670 |         Synonymous protein variants (e.g. ``p.Leu12=``) are not considered target
671 |         identical.
672 | 
673 |         Returns
674 |         -------
675 |         bool
676 |             True if this variant describes the wild-type or target sequence; else False.
677 | 
678 |         """
679 |         if self._variant_types == "equal":
680 |             if self._prefix == "p":
681 |                 return self._positions is None
682 |             else:
683 |                 return True
684 |         else:
685 |             return False
686 | 
687 |     def is_synonymous(self) -> bool:
688 |         """Return whether the variant describes a synonymous protein variant or is the
689 |         special synonymous variant.
690 | 
691 |         Returns
692 |         -------
693 |         bool
694 |             True if this variant describes a synonymous protein variant; else False.
695 | 
696 |         """
697 |         return self._variant_types == "equal" and self._prefix == "p"
698 | 
699 |     def is_multi_variant(self) -> bool:
700 |         """Return whether the variant is a multi-variant.
701 | 
702 |         A multi-variant is a single variant describing multiple events enclosed in '[]'.
703 |         Multi-variants are referred to as alleles in the HGVS standard.
704 | 
705 |         Returns
706 |         -------
707 |         bool
708 |             True if the variant is a multi-variant; else False.
709 | 
710 |         """
711 |         return self.variant_count > 1
712 | 
713 |     @property
714 |     def prefix(self) -> str:
715 |         """The single-letter prefix for this variant.
716 | 
717 |         Returns
718 |         -------
719 |         str
720 |             Single-letter prefix corresponding to the sequence type.
721 | 
722 |             See the following table for sequence type prefixes and their meanings:
723 | 
724 |             .. csv-table::
725 |                :file: ../docs/prefix.csv
726 |                :header: "Prefix", "Description"
727 |                :widths: 5, 20
728 | 
729 |         """
730 |         return self._prefix
731 | 
732 |     @property
733 |     def variant_type(self) -> Union[str, List[str]]:
734 |         """The type for this variant.
735 | 
736 |         Valid variant types are:
737 | 
738 |         * ``'sub'`` for substitutions
739 |         * ``'del'`` for deletions
740 |         * ``'dup'`` for duplications
741 |         * ``'ins'`` for insertions
742 |         * ``'delins'`` for deletion-insertions
743 | 
744 |         Returns
745 |         -------
746 |         Union[str, List[str]]
747 |             String containing the variant type. Returns a list of strings for a
748 |             multi-variant.
749 | 
750 |         """
751 |         return self._variant_types
752 | 
753 |     def uses_extended_positions(self) -> bool:
754 |         """Return whether the variant uses the extended position notation to describe
755 |         intronic or UTR positions.
756 | 
757 |         Examples of variants using the extended position notation include:
758 | 
759 |         * c.122-6T>A
760 |         * r.*33a>c
761 |         * c.43-6_595+12delinsCTT
762 | 
763 |         This should always be false for variants with a genomic or protein prefix, as
764 |         variants with these prefixes cannot use positions relative to a transcript under
765 |         the MAVE-HGVS specification.
766 | 
767 |         Returns
768 |         -------
769 |         bool
770 |             True if the variant (or any of the individual variants for a multi-variant)
771 |             uses the extended position notation.
772 | 
773 |         """
774 |         if self.is_multi_variant():
775 |             all_positions = list()
776 |             for p in self.positions:
777 |                 if isinstance(p, tuple):
778 |                     all_positions.extend(p)
779 |                 else:
780 |                     all_positions.append(p)
781 |             return any(p.is_extended() for p in all_positions)
782 |         else:
783 |             if self._positions is None:  # special case for target identity
784 |                 return False
785 |             elif isinstance(self.positions, tuple):
786 |                 return any(p.is_extended() for p in self.positions)
787 |             else:
788 |                 return self.positions.is_extended()
789 | 
790 |     @property
791 |     def positions(
792 |         self,
793 |     ) -> Optional[
794 |         Union[
795 |             VariantPosition,
796 |             Tuple[VariantPosition, VariantPosition],
797 |             List[Union[VariantPosition, Tuple[VariantPosition, VariantPosition]]],
798 |         ]
799 |     ]:
800 |         """The variant position as a single position or tuple containing start and end
801 |         positions.
802 | 
803 |         Each position is an instance of :py:class:`mavehgvs.position.VariantPosition`.
804 | 
805 |         Returns
806 |         -------
807 |         Union[VariantPosition, Tuple[VariantPosition, VariantPosition], \
808 |         List[Union[VariantPosition, Tuple[VariantPosition, VariantPosition]]]]
809 |             Variant position or tuple of start/end positions.
810 |             Returns a list of positions or start/end tuples for a multi-variant.
811 | 
812 |         """
813 |         return self._positions
814 | 
815 |     @property
816 |     def sequence(
817 |         self,
818 |     ) -> Optional[
819 |         Union[str, Tuple[str, str], List[Optional[Union[str, Tuple[str, str]]]]]
820 |     ]:
821 |         """The sequence portion of the variant.
822 | 
823 |         This can be a tuple of target and new bases for a substitution, a single
824 |         sequence for insertions or deletion-insertions, or the "=" character for
825 |         variants that are identical to the target sequence.
826 | 
827 |         Returns
828 |         -------
829 |         Union[str, Tuple[str, str], List[Optional[Union[str, Tuple[str, str]]]]]]
830 |             Tuple of ref/new bases for substitutions, string containing inserted
831 |             sequence, or the "=" character.
832 |             Returns None if the variant does not have a sequence component (deletion or
833 |             duplication).
834 |             Returns a list for a multi-variant, which may contain None values for
835 |             deletions or duplications.
836 | 
837 |         """
838 |         return self._sequences
839 | 
840 |     @property
841 |     def target_id(self) -> Optional[str]:
842 |         """The target identifier for the variant (if applicable).
843 | 
844 |         The target identifier precedes the prefix and is followed by a ``:``.
845 |         For example in ``NM_001130145.3:c.832C>T`` the target identifier is
846 |         "NM_001130145.3".
847 | 
848 |         Returns
849 |         -------
850 |         Optional[str]
851 |             The target identifier, or None if it is not set.
852 | 
853 |         """
854 |         return self._target_id
855 | 
856 |     def components(self) -> Tuple[str, ...]:
857 |         """The component substrings of a variant.
858 | 
859 |         Returns
860 |         -------
861 |         Tuple[str, ...]
862 |             List of component substrings for this variant.
863 | 
864 |         """
865 |         if self.target_id is not None:
866 |             prefix = f"{self.target_id}:{self.prefix}"
867 |         else:
868 |             prefix = f"{self.prefix}"
869 | 
870 |         return tuple(
871 |             [f"{prefix}.{component}" for component in self._format_component_variants()]
872 |         )
873 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VariantEffect/mavehgvs/69476dde5391022e7c0eca32ecd1734e371436eb/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_patterns/__init__.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | from typing import Iterable, Iterator, Tuple
 3 | 
 4 | 
 5 | def build_multi_variants(
 6 |     valid_strings: Iterable[str],
 7 |     invalid_strings: Iterable[str],
 8 |     min_length: int = 2,
 9 |     max_length: int = 3,
10 | ) -> Tuple[Iterator, Iterator]:
11 |     """Build iterators of valid and invalid multi-variant strings to test.
12 | 
13 |     Parameters
14 |     ----------
15 |     valid_strings : Iterable[str]
16 |         Iterable containing all the valid single-variant strings.
17 |     invalid_strings : Iterable[str]
18 |         Iterable containing all the invalid single-variant strings.
19 |     min_length : int
20 |         Minimum length of multi-variants that will be generated.
21 |     max_length : int
22 |         Maximum length of multi-variants that will be generated.
23 |         Note that increasing this value may massively increase test runtime.
24 | 
25 |     Returns
26 |     -------
27 |     Tuple[Iterator, Iterator]
28 |         Returns iterators containing semicolon-separated multi-variant strings.
29 | 
30 |         The first iterator contains multi-variants from only valid_strings and the
31 |         second iterator contains multi-variants that include at least one variant from
32 |         invalid_strings.
33 |     """
34 |     # create an iterable of permutations for each length and store them in lists
35 |     valid_multivariants = list()
36 |     invalid_multivariants = list()
37 | 
38 |     for i in range(min_length, max_length + 1):
39 |         valid_multivariants.append(
40 |             ";".join(x) for x in itertools.permutations(valid_strings, i)
41 |         )
42 |         invalid_multivariants.append(
43 |             ";".join(x)
44 |             for x in itertools.permutations(
45 |                 itertools.chain(valid_strings, invalid_strings), i
46 |             )
47 |             if any(y in x for y in invalid_strings)
48 |         )
49 | 
50 |     # combine the lists into single iterators and return
51 |     return itertools.chain.from_iterable(
52 |         valid_multivariants
53 |     ), itertools.chain.from_iterable(invalid_multivariants)
54 | 


--------------------------------------------------------------------------------
/tests/test_patterns/test_dna.py:
--------------------------------------------------------------------------------
   1 | import unittest
   2 | import re
   3 | from mavehgvs.patterns.dna import (
   4 |     dna_equal_c,
   5 |     dna_equal_n,
   6 |     dna_equal_gmo,
   7 |     dna_sub_c,
   8 |     dna_sub_n,
   9 |     dna_sub_gmo,
  10 |     dna_del_c,
  11 |     dna_del_n,
  12 |     dna_del_gmo,
  13 |     dna_dup_c,
  14 |     dna_dup_n,
  15 |     dna_dup_gmo,
  16 |     dna_ins_c,
  17 |     dna_ins_n,
  18 |     dna_ins_gmo,
  19 |     dna_delins_c,
  20 |     dna_delins_n,
  21 |     dna_delins_gmo,
  22 |     dna_variant_c,
  23 |     dna_variant_n,
  24 |     dna_variant_gmo,
  25 |     dna_single_variant,
  26 |     dna_multi_variant,
  27 | )
  28 | from . import build_multi_variants
  29 | 
  30 | 
  31 | class TestDnaEqualC(unittest.TestCase):
  32 |     @classmethod
  33 |     def setUpClass(cls):
  34 |         cls.pattern = re.compile(dna_equal_c, flags=re.ASCII)
  35 | 
  36 |         cls.valid_strings = [
  37 |             "=",
  38 |             "18=",
  39 |             "10_14=",
  40 |             "122-6=",
  41 |             "*24=",
  42 |             "19+22=",
  43 |             "19+22_88=",
  44 |             "-27+3=",
  45 |         ]
  46 | 
  47 |         cls.invalid_strings = ["=22", "(=)", "18(=)"]
  48 | 
  49 |     def test_valid_strings(self):
  50 |         for s in self.valid_strings:
  51 |             with self.subTest(s=s):
  52 |                 self.assertIsNotNone(
  53 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
  54 |                 )
  55 | 
  56 |     def test_invalid_strings(self):
  57 |         for s in self.invalid_strings:
  58 |             with self.subTest(s=s):
  59 |                 self.assertIsNone(
  60 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
  61 |                 )
  62 | 
  63 | 
  64 | class TestDnaEqualN(unittest.TestCase):
  65 |     @classmethod
  66 |     def setUpClass(cls):
  67 |         cls.pattern = re.compile(dna_equal_n, flags=re.ASCII)
  68 | 
  69 |         cls.valid_strings = ["="]
  70 | 
  71 |         cls.invalid_strings = [
  72 |             "=22",
  73 |             "(=)",
  74 |             "18(=)",
  75 |             "-27+3=",
  76 |             "*24=",
  77 |             "18=",
  78 |             "10_14=",
  79 |             "122-6=",
  80 |             "19+22=",
  81 |             "19+22_88=",
  82 |         ]
  83 | 
  84 |     def test_valid_strings(self):
  85 |         for s in self.valid_strings:
  86 |             with self.subTest(s=s):
  87 |                 self.assertIsNotNone(
  88 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
  89 |                 )
  90 | 
  91 |     def test_invalid_strings(self):
  92 |         for s in self.invalid_strings:
  93 |             with self.subTest(s=s):
  94 |                 self.assertIsNone(
  95 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
  96 |                 )
  97 | 
  98 | 
  99 | class TestDnaEqualGMO(unittest.TestCase):
 100 |     @classmethod
 101 |     def setUpClass(cls):
 102 |         cls.pattern = re.compile(dna_equal_gmo, flags=re.ASCII)
 103 | 
 104 |         cls.valid_strings = ["=", "18=", "10_14="]
 105 | 
 106 |         cls.invalid_strings = [
 107 |             "=22",
 108 |             "(=)",
 109 |             "18(=)",
 110 |             "122-6=",
 111 |             "*24=",
 112 |             "19+22=",
 113 |             "19+22_88=",
 114 |             "-27+3=",
 115 |         ]
 116 | 
 117 |     def test_valid_strings(self):
 118 |         for s in self.valid_strings:
 119 |             with self.subTest(s=s):
 120 |                 self.assertIsNotNone(
 121 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
 122 |                 )
 123 | 
 124 |     def test_invalid_strings(self):
 125 |         for s in self.invalid_strings:
 126 |             with self.subTest(s=s):
 127 |                 self.assertIsNone(
 128 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
 129 |                 )
 130 | 
 131 | 
 132 | class TestDnaSubC(unittest.TestCase):
 133 |     @classmethod
 134 |     def setUpClass(cls):
 135 |         cls.pattern = re.compile(dna_sub_c, flags=re.ASCII)
 136 | 
 137 |         cls.valid_strings = ["48C>A", "122-6T>A", "*24G>C", "19+22A>G", "-27+3T>C"]
 138 | 
 139 |         cls.invalid_strings = ["22g>u", "48C>W", "122=/T>A"]
 140 | 
 141 |     def test_valid_strings(self):
 142 |         for s in self.valid_strings:
 143 |             with self.subTest(s=s):
 144 |                 self.assertIsNotNone(
 145 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
 146 |                 )
 147 | 
 148 |     def test_invalid_strings(self):
 149 |         for s in self.invalid_strings:
 150 |             with self.subTest(s=s):
 151 |                 self.assertIsNone(
 152 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
 153 |                 )
 154 | 
 155 | 
 156 | class TestDnaSubN(unittest.TestCase):
 157 |     @classmethod
 158 |     def setUpClass(cls):
 159 |         cls.pattern = re.compile(dna_sub_n, flags=re.ASCII)
 160 | 
 161 |         cls.valid_strings = ["48C>A", "122-6T>A", "19+22A>G"]
 162 | 
 163 |         cls.invalid_strings = ["22g>u", "48C>W", "122=/T>A", "*24G>C", "-27+3T>C"]
 164 | 
 165 |     def test_valid_strings(self):
 166 |         for s in self.valid_strings:
 167 |             with self.subTest(s=s):
 168 |                 self.assertIsNotNone(
 169 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
 170 |                 )
 171 | 
 172 |     def test_invalid_strings(self):
 173 |         for s in self.invalid_strings:
 174 |             with self.subTest(s=s):
 175 |                 self.assertIsNone(
 176 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
 177 |                 )
 178 | 
 179 | 
 180 | class TestDnaSubGmo(unittest.TestCase):
 181 |     @classmethod
 182 |     def setUpClass(cls):
 183 |         cls.pattern = re.compile(dna_sub_gmo, flags=re.ASCII)
 184 | 
 185 |         cls.valid_strings = ["48C>A"]
 186 | 
 187 |         cls.invalid_strings = ["122-6T>A", "22g>u", "48C>W", "22=", "122=/T>A", "0C>T"]
 188 | 
 189 |     def test_valid_strings(self):
 190 |         for s in self.valid_strings:
 191 |             with self.subTest(s=s):
 192 |                 self.assertIsNotNone(
 193 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
 194 |                 )
 195 | 
 196 |     def test_invalid_strings(self):
 197 |         for s in self.invalid_strings:
 198 |             with self.subTest(s=s):
 199 |                 self.assertIsNone(
 200 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
 201 |                 )
 202 | 
 203 | 
 204 | class TestDnaDelC(unittest.TestCase):
 205 |     @classmethod
 206 |     def setUpClass(cls):
 207 |         cls.pattern = re.compile(dna_del_c, flags=re.ASCII)
 208 | 
 209 |         cls.valid_strings = [
 210 |             "44del",
 211 |             "1_95del",
 212 |             "78+5_78+10del",
 213 |             "-25+1_-25+3del",
 214 |             "*17del",
 215 |         ]
 216 | 
 217 |         cls.invalid_strings = [
 218 |             "(78+1_79-1)_(124+1_125-1)del",
 219 |             "(?_85)_(124_?)del",
 220 |             "122=/del",
 221 |         ]
 222 | 
 223 |     def test_valid_strings(self):
 224 |         for s in self.valid_strings:
 225 |             with self.subTest(s=s):
 226 |                 self.assertIsNotNone(
 227 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
 228 |                 )
 229 | 
 230 |     def test_invalid_strings(self):
 231 |         for s in self.invalid_strings:
 232 |             with self.subTest(s=s):
 233 |                 self.assertIsNone(
 234 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
 235 |                 )
 236 | 
 237 | 
 238 | class TestDnaDelN(unittest.TestCase):
 239 |     @classmethod
 240 |     def setUpClass(cls):
 241 |         cls.pattern = re.compile(dna_del_n, flags=re.ASCII)
 242 | 
 243 |         cls.valid_strings = ["44del", "1_95del", "78+5_78+10del"]
 244 | 
 245 |         cls.invalid_strings = [
 246 |             "(78+1_79-1)_(124+1_125-1)del",
 247 |             "(?_85)_(124_?)del",
 248 |             "122=/del",
 249 |             "-25+1_-25+3del",
 250 |             "*17del",
 251 |         ]
 252 | 
 253 |     def test_valid_strings(self):
 254 |         for s in self.valid_strings:
 255 |             with self.subTest(s=s):
 256 |                 self.assertIsNotNone(
 257 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
 258 |                 )
 259 | 
 260 |     def test_invalid_strings(self):
 261 |         for s in self.invalid_strings:
 262 |             with self.subTest(s=s):
 263 |                 self.assertIsNone(
 264 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
 265 |                 )
 266 | 
 267 | 
 268 | class TestDnaDelGmo(unittest.TestCase):
 269 |     @classmethod
 270 |     def setUpClass(cls):
 271 |         cls.pattern = re.compile(dna_del_gmo, flags=re.ASCII)
 272 | 
 273 |         cls.valid_strings = ["44del", "1_95del"]
 274 | 
 275 |         cls.invalid_strings = [
 276 |             "78+5_78+10del",
 277 |             "-25+1_-25+3del",
 278 |             "*17del",
 279 |             "(78+1_79-1)_(124+1_125-1)del",
 280 |             "(?_85)_(124_?)del",
 281 |             "122=/del",
 282 |         ]
 283 | 
 284 |     def test_valid_strings(self):
 285 |         for s in self.valid_strings:
 286 |             with self.subTest(s=s):
 287 |                 self.assertIsNotNone(
 288 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
 289 |                 )
 290 | 
 291 |     def test_invalid_strings(self):
 292 |         for s in self.invalid_strings:
 293 |             with self.subTest(s=s):
 294 |                 self.assertIsNone(
 295 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
 296 |                 )
 297 | 
 298 | 
 299 | class TestDnaDupC(unittest.TestCase):
 300 |     @classmethod
 301 |     def setUpClass(cls):
 302 |         cls.pattern = re.compile(dna_dup_c, flags=re.ASCII)
 303 | 
 304 |         cls.valid_strings = [
 305 |             "22_24dup",
 306 |             "77dup",
 307 |             "101+1_101+7dup",
 308 |             "-25+1_-25+3dup",
 309 |             "*17dup",
 310 |         ]
 311 | 
 312 |         cls.invalid_strings = [
 313 |             "(78+1_79-1)_(124+1_125-1)dup",
 314 |             "(?_85)_(124_?)dup",
 315 |             "122_125=//dup",
 316 |         ]
 317 | 
 318 |     def test_valid_strings(self):
 319 |         for s in self.valid_strings:
 320 |             with self.subTest(s=s):
 321 |                 self.assertIsNotNone(
 322 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
 323 |                 )
 324 | 
 325 |     def test_invalid_strings(self):
 326 |         for s in self.invalid_strings:
 327 |             with self.subTest(s=s):
 328 |                 self.assertIsNone(
 329 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
 330 |                 )
 331 | 
 332 | 
 333 | class TestDnaDupN(unittest.TestCase):
 334 |     @classmethod
 335 |     def setUpClass(cls):
 336 |         cls.pattern = re.compile(dna_dup_n, flags=re.ASCII)
 337 | 
 338 |         cls.valid_strings = ["22_24dup", "77dup", "101+1_101+7dup"]
 339 | 
 340 |         cls.invalid_strings = [
 341 |             "(78+1_79-1)_(124+1_125-1)dup",
 342 |             "(?_85)_(124_?)dup",
 343 |             "122_125=//dup",
 344 |             "-25+1_-25+3dup",
 345 |             "*17dup",
 346 |         ]
 347 | 
 348 |     def test_valid_strings(self):
 349 |         for s in self.valid_strings:
 350 |             with self.subTest(s=s):
 351 |                 self.assertIsNotNone(
 352 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
 353 |                 )
 354 | 
 355 |     def test_invalid_strings(self):
 356 |         for s in self.invalid_strings:
 357 |             with self.subTest(s=s):
 358 |                 self.assertIsNone(
 359 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
 360 |                 )
 361 | 
 362 | 
 363 | class TestDnaDupGmo(unittest.TestCase):
 364 |     @classmethod
 365 |     def setUpClass(cls):
 366 |         cls.pattern = re.compile(dna_dup_gmo, flags=re.ASCII)
 367 | 
 368 |         cls.valid_strings = ["22_24dup", "77dup"]
 369 | 
 370 |         cls.invalid_strings = [
 371 |             "(78+1_79-1)_(124+1_125-1)dup",
 372 |             "(?_85)_(124_?)dup",
 373 |             "122_125=//dup",
 374 |             "101+1_101+7dup",
 375 |             "-25+1_-25+3dup",
 376 |             "*17dup",
 377 |         ]
 378 | 
 379 |     def test_valid_strings(self):
 380 |         for s in self.valid_strings:
 381 |             with self.subTest(s=s):
 382 |                 self.assertIsNotNone(
 383 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
 384 |                 )
 385 | 
 386 |     def test_invalid_strings(self):
 387 |         for s in self.invalid_strings:
 388 |             with self.subTest(s=s):
 389 |                 self.assertIsNone(
 390 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
 391 |                 )
 392 | 
 393 | 
 394 | class TestDnaInsC(unittest.TestCase):
 395 |     @classmethod
 396 |     def setUpClass(cls):
 397 |         cls.pattern = re.compile(dna_ins_c, flags=re.ASCII)
 398 | 
 399 |         cls.valid_strings = [
 400 |             "234_235insT",
 401 |             "84_85insCTG",
 402 |             "*84_*85insCTG",
 403 |             "99+6_99+7insA",
 404 |             "124+100_124-100insTTG",
 405 |             "124+101_124-100insTTG",
 406 |         ]
 407 | 
 408 |         cls.invalid_strings = ["84_85ins100_125", "234_235ins(10)", "234_235ins(?)"]
 409 | 
 410 |     def test_valid_strings(self):
 411 |         for s in self.valid_strings:
 412 |             with self.subTest(s=s):
 413 |                 self.assertIsNotNone(
 414 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
 415 |                 )
 416 | 
 417 |     def test_invalid_strings(self):
 418 |         for s in self.invalid_strings:
 419 |             with self.subTest(s=s):
 420 |                 self.assertIsNone(
 421 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
 422 |                 )
 423 | 
 424 | 
 425 | class TestDnaInsN(unittest.TestCase):
 426 |     @classmethod
 427 |     def setUpClass(cls):
 428 |         cls.pattern = re.compile(dna_ins_n, flags=re.ASCII)
 429 | 
 430 |         cls.valid_strings = [
 431 |             "234_235insT",
 432 |             "84_85insCTG",
 433 |             "99+6_99+7insA",
 434 |             "124+100_124-100insTTG",
 435 |             "124+101_124-100insTTG",
 436 |         ]
 437 | 
 438 |         cls.invalid_strings = [
 439 |             "84_85ins100_125",
 440 |             "234_235ins(10)",
 441 |             "234_235ins(?)",
 442 |             "*84_*85insCTG",
 443 |         ]
 444 | 
 445 |     def test_valid_strings(self):
 446 |         for s in self.valid_strings:
 447 |             with self.subTest(s=s):
 448 |                 self.assertIsNotNone(
 449 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
 450 |                 )
 451 | 
 452 |     def test_invalid_strings(self):
 453 |         for s in self.invalid_strings:
 454 |             with self.subTest(s=s):
 455 |                 self.assertIsNone(
 456 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
 457 |                 )
 458 | 
 459 | 
 460 | class TestDnaInsGmo(unittest.TestCase):
 461 |     @classmethod
 462 |     def setUpClass(cls):
 463 |         cls.pattern = re.compile(dna_ins_gmo, flags=re.ASCII)
 464 | 
 465 |         cls.valid_strings = ["234_235insT", "84_85insCTG"]
 466 | 
 467 |         cls.invalid_strings = [
 468 |             "99+6_99+7insA",
 469 |             "84_85ins100_125",
 470 |             "234_235ins(10)",
 471 |             "234_235ins(?)",
 472 |         ]
 473 | 
 474 |     def test_valid_strings(self):
 475 |         for s in self.valid_strings:
 476 |             with self.subTest(s=s):
 477 |                 self.assertIsNotNone(
 478 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
 479 |                 )
 480 | 
 481 |     def test_invalid_strings(self):
 482 |         for s in self.invalid_strings:
 483 |             with self.subTest(s=s):
 484 |                 self.assertIsNone(
 485 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
 486 |                 )
 487 | 
 488 | 
 489 | class TestDnaDelinsC(unittest.TestCase):
 490 |     @classmethod
 491 |     def setUpClass(cls):
 492 |         cls.pattern = re.compile(dna_delins_c, flags=re.ASCII)
 493 | 
 494 |         cls.valid_strings = [
 495 |             "22delinsAACG",
 496 |             "83_85delinsT",
 497 |             "43-6_595+12delinsCTT",
 498 |             "*788delinsA",
 499 |         ]
 500 | 
 501 |         cls.invalid_strings = ["84_85delinsAAN", "234delinsW"]
 502 | 
 503 |     def test_valid_strings(self):
 504 |         for s in self.valid_strings:
 505 |             with self.subTest(s=s):
 506 |                 self.assertIsNotNone(
 507 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
 508 |                 )
 509 | 
 510 |     def test_invalid_strings(self):
 511 |         for s in self.invalid_strings:
 512 |             with self.subTest(s=s):
 513 |                 self.assertIsNone(
 514 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
 515 |                 )
 516 | 
 517 | 
 518 | class TestDnaDelinsN(unittest.TestCase):
 519 |     @classmethod
 520 |     def setUpClass(cls):
 521 |         cls.pattern = re.compile(dna_delins_n, flags=re.ASCII)
 522 | 
 523 |         cls.valid_strings = ["22delinsAACG", "83_85delinsT", "43-6_595+12delinsCTT"]
 524 | 
 525 |         cls.invalid_strings = ["84_85delinsAAN", "234delinsW" "*788delinsA"]
 526 | 
 527 |     def test_valid_strings(self):
 528 |         for s in self.valid_strings:
 529 |             with self.subTest(s=s):
 530 |                 self.assertIsNotNone(
 531 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
 532 |                 )
 533 | 
 534 |     def test_invalid_strings(self):
 535 |         for s in self.invalid_strings:
 536 |             with self.subTest(s=s):
 537 |                 self.assertIsNone(
 538 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
 539 |                 )
 540 | 
 541 | 
 542 | class TestDnaDelinsGmo(unittest.TestCase):
 543 |     @classmethod
 544 |     def setUpClass(cls):
 545 |         cls.pattern = re.compile(dna_delins_gmo, flags=re.ASCII)
 546 | 
 547 |         cls.valid_strings = ["22delinsAACG", "83_85delinsT"]
 548 | 
 549 |         cls.invalid_strings = [
 550 |             "43-6_595+12delinsCTT",
 551 |             "*788delinsA",
 552 |             "84_85delinsAAN",
 553 |             "234delinsW",
 554 |         ]
 555 | 
 556 |     def test_valid_strings(self):
 557 |         for s in self.valid_strings:
 558 |             with self.subTest(s=s):
 559 |                 self.assertIsNotNone(
 560 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
 561 |                 )
 562 | 
 563 |     def test_invalid_strings(self):
 564 |         for s in self.invalid_strings:
 565 |             with self.subTest(s=s):
 566 |                 self.assertIsNone(
 567 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
 568 |                 )
 569 | 
 570 | 
 571 | class TestDnaVariantC(unittest.TestCase):
 572 |     @classmethod
 573 |     def setUpClass(cls):
 574 |         cls.pattern = re.compile(dna_variant_c, flags=re.ASCII)
 575 | 
 576 |         cls.valid_strings = [
 577 |             "48C>A",
 578 |             "=",
 579 |             "22=",
 580 |             "4_6=",
 581 |             "122-6T>A",
 582 |             "*24G>C",
 583 |             "19+22A>G",
 584 |             "-27+3T>C",
 585 |             "44del",
 586 |             "1_95del",
 587 |             "78+5_78+10del",
 588 |             "-25+1_-25+3del",
 589 |             "*17del",
 590 |             "22_24dup",
 591 |             "77dup",
 592 |             "101+1_101+7dup",
 593 |             "-25+1_-25+3dup",
 594 |             "*17dup",
 595 |             "234_235insT",
 596 |             "84_85insCTG",
 597 |             "99+6_99+7insA",
 598 |             "22delinsAACG",
 599 |             "83_85delinsT",
 600 |             "43-6_595+12delinsCTT",
 601 |             "*788delinsA",
 602 |         ]
 603 | 
 604 |         cls.invalid_strings = [
 605 |             "22g>u",
 606 |             "48C>W",
 607 |             "122=/T>A",
 608 |             "(78+1_79-1)_(124+1_125-1)del",
 609 |             "(?_85)_(124_?)del",
 610 |             "122=/del",
 611 |             "(78+1_79-1)_(124+1_125-1)dup",
 612 |             "(?_85)_(124_?)dup",
 613 |             "122_125=//dup",
 614 |             "84_85ins100_125",
 615 |             "234_235ins(10)",
 616 |             "234_235ins(?)",
 617 |             "84_85delinsAAN",
 618 |             "234delinsW",
 619 |         ]
 620 | 
 621 |     def test_valid_strings(self):
 622 |         for s in self.valid_strings:
 623 |             with self.subTest(s=s):
 624 |                 self.assertIsNotNone(
 625 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
 626 |                 )
 627 | 
 628 |     def test_invalid_strings(self):
 629 |         for s in self.invalid_strings:
 630 |             with self.subTest(s=s):
 631 |                 self.assertIsNone(
 632 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
 633 |                 )
 634 | 
 635 | 
 636 | class TestDnaVariantN(unittest.TestCase):
 637 |     @classmethod
 638 |     def setUpClass(cls):
 639 |         cls.pattern = re.compile(dna_variant_n, flags=re.ASCII)
 640 | 
 641 |         cls.valid_strings = [
 642 |             "48C>A",
 643 |             "=",
 644 |             "122-6T>A",
 645 |             "19+22A>G",
 646 |             "44del",
 647 |             "1_95del",
 648 |             "78+5_78+10del",
 649 |             "22_24dup",
 650 |             "77dup",
 651 |             "101+1_101+7dup",
 652 |             "234_235insT",
 653 |             "84_85insCTG",
 654 |             "99+6_99+7insA",
 655 |             "22delinsAACG",
 656 |             "83_85delinsT",
 657 |             "43-6_595+12delinsCTT",
 658 |         ]
 659 | 
 660 |         cls.invalid_strings = [
 661 |             "22=",
 662 |             "1_3=",
 663 |             "22g>u",
 664 |             "48C>W",
 665 |             "122=/T>A",
 666 |             "(78+1_79-1)_(124+1_125-1)del",
 667 |             "(?_85)_(124_?)del",
 668 |             "122=/del",
 669 |             "(78+1_79-1)_(124+1_125-1)dup",
 670 |             "(?_85)_(124_?)dup",
 671 |             "122_125=//dup",
 672 |             "84_85ins100_125",
 673 |             "234_235ins(10)",
 674 |             "234_235ins(?)",
 675 |             "84_85delinsAAN",
 676 |             "234delinsW",
 677 |             "*24G>C",
 678 |             "-27+3T>C",
 679 |             "-25+1_-25+3del",
 680 |             "*17del",
 681 |             "-25+1_-25+3dup",
 682 |             "*17dup",
 683 |             "*788delinsA",
 684 |         ]
 685 | 
 686 |     def test_valid_strings(self):
 687 |         for s in self.valid_strings:
 688 |             with self.subTest(s=s):
 689 |                 self.assertIsNotNone(
 690 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
 691 |                 )
 692 | 
 693 |     def test_invalid_strings(self):
 694 |         for s in self.invalid_strings:
 695 |             with self.subTest(s=s):
 696 |                 self.assertIsNone(
 697 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
 698 |                 )
 699 | 
 700 | 
 701 | class TestDnaVariantGmo(unittest.TestCase):
 702 |     @classmethod
 703 |     def setUpClass(cls):
 704 |         cls.pattern = re.compile(dna_variant_gmo, flags=re.ASCII)
 705 | 
 706 |         cls.valid_strings = [
 707 |             "48C>A",
 708 |             "=",
 709 |             "22=",
 710 |             "1_3=",
 711 |             "44del",
 712 |             "1_95del",
 713 |             "22_24dup",
 714 |             "77dup",
 715 |             "234_235insT",
 716 |             "84_85insCTG",
 717 |             "22delinsAACG",
 718 |             "83_85delinsT",
 719 |         ]
 720 | 
 721 |         cls.invalid_strings = [
 722 |             "43-6_595+12delinsCTT",
 723 |             "*788delinsA",
 724 |             "99+6_99+7insA",
 725 |             "101+1_101+7dup",
 726 |             "-25+1_-25+3dup",
 727 |             "*17dup",
 728 |             "78+5_78+10del",
 729 |             "-25+1_-25+3del",
 730 |             "*17del",
 731 |             "*24G>C",
 732 |             "19+22A>G",
 733 |             "122-6T>A",
 734 |             "-27+3T>C",
 735 |             "22g>u",
 736 |             "48C>W",
 737 |             "122=/T>A",
 738 |             "(78+1_79-1)_(124+1_125-1)del",
 739 |             "(?_85)_(124_?)del",
 740 |             "122=/del",
 741 |             "(78+1_79-1)_(124+1_125-1)dup",
 742 |             "(?_85)_(124_?)dup",
 743 |             "122_125=//dup",
 744 |             "84_85ins100_125",
 745 |             "234_235ins(10)",
 746 |             "234_235ins(?)",
 747 |             "84_85delinsAAN",
 748 |             "234delinsW",
 749 |         ]
 750 | 
 751 |     def test_valid_strings(self):
 752 |         for s in self.valid_strings:
 753 |             with self.subTest(s=s):
 754 |                 self.assertIsNotNone(
 755 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
 756 |                 )
 757 | 
 758 |     def test_invalid_strings(self):
 759 |         for s in self.invalid_strings:
 760 |             with self.subTest(s=s):
 761 |                 self.assertIsNone(
 762 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
 763 |                 )
 764 | 
 765 | 
 766 | class TestDnaSingleVariant(unittest.TestCase):
 767 |     @classmethod
 768 |     def setUpClass(cls):
 769 |         cls.pattern = re.compile(dna_single_variant, flags=re.ASCII)
 770 | 
 771 |         cls.valid_strings = [
 772 |             "48C>A",
 773 |             "=",
 774 |             "44del",
 775 |             "1_95del",
 776 |             "22_24dup",
 777 |             "77dup",
 778 |             "234_235insT",
 779 |             "84_85insCTG",
 780 |             "22delinsAACG",
 781 |             "83_85delinsT",
 782 |         ]
 783 | 
 784 |         cls.valid_strings_c_only = [
 785 |             "*788delinsA",
 786 |             "-25+1_-25+3dup",
 787 |             "*17dup",
 788 |             "-25+1_-25+3del",
 789 |             "*17del",
 790 |             "*24G>C",
 791 |             "-27+3T>C",
 792 |         ]
 793 | 
 794 |         cls.valid_strings_cn_only = [
 795 |             "43-6_595+12delinsCTT",
 796 |             "99+6_99+7insA",
 797 |             "101+1_101+7dup",
 798 |             "78+5_78+10del",
 799 |             "19+22A>G",
 800 |             "122-6T>A",
 801 |         ]
 802 | 
 803 |         cls.valid_strings_cgmo_only = ["22=", "4_6="]
 804 | 
 805 |         cls.invalid_strings = [
 806 |             "22g>u",
 807 |             "48C>W",
 808 |             "122=/T>A",
 809 |             "(78+1_79-1)_(124+1_125-1)del",
 810 |             "(?_85)_(124_?)del",
 811 |             "122=/del",
 812 |             "(78+1_79-1)_(124+1_125-1)dup",
 813 |             "(?_85)_(124_?)dup",
 814 |             "122_125=//dup",
 815 |             "84_85ins100_125",
 816 |             "234_235ins(10)",
 817 |             "234_235ins(?)",
 818 |             "84_85delinsAAN",
 819 |             "234delinsW",
 820 |         ]
 821 | 
 822 |     def test_valid_strings(self):
 823 |         for p in "cngmo":
 824 |             for s in self.valid_strings:
 825 |                 with self.subTest(s=s, p=p):
 826 |                     v = f"{p}.{s}"
 827 |                     self.assertIsNotNone(
 828 |                         self.pattern.fullmatch(v), msg=f'failed to match "{v}"'
 829 |                     )
 830 |         for p in "cgmo":
 831 |             for s in self.valid_strings_cgmo_only:
 832 |                 with self.subTest(s=s, p=p):
 833 |                     v = f"{p}.{s}"
 834 |                     self.assertIsNotNone(
 835 |                         self.pattern.fullmatch(v), msg=f'failed to match "{v}"'
 836 |                     )
 837 |         for p in "cn":
 838 |             for s in self.valid_strings_cn_only:
 839 |                 with self.subTest(s=s, p=p):
 840 |                     v = f"{p}.{s}"
 841 |                     self.assertIsNotNone(
 842 |                         self.pattern.fullmatch(v), msg=f'failed to match "{v}"'
 843 |                     )
 844 |         for p in "c":
 845 |             for s in self.valid_strings_c_only:
 846 |                 with self.subTest(s=s, p=p):
 847 |                     v = f"{p}.{s}"
 848 |                     self.assertIsNotNone(
 849 |                         self.pattern.fullmatch(v), msg=f'failed to match "{v}"'
 850 |                     )
 851 | 
 852 |     def test_invalid_strings(self):
 853 |         for p in "cngmo":
 854 |             for s in self.invalid_strings:
 855 |                 with self.subTest(s=s, p=p):
 856 |                     v = f"{p}.{s}"
 857 |                     self.assertIsNone(
 858 |                         self.pattern.fullmatch(v), msg=f'incorrectly matched "{v}"'
 859 |                     )
 860 |         for p in "ngmo":
 861 |             for s in self.valid_strings_c_only:
 862 |                 with self.subTest(s=s, p=p):
 863 |                     v = f"{p}.{s}"
 864 |                     self.assertIsNone(
 865 |                         self.pattern.fullmatch(v), msg=f'incorrectly matched "{v}"'
 866 |                     )
 867 |         for p in "gmo":
 868 |             for s in self.valid_strings_cn_only:
 869 |                 with self.subTest(s=s, p=p):
 870 |                     v = f"{p}.{s}"
 871 |                     self.assertIsNone(
 872 |                         self.pattern.fullmatch(v), msg=f'incorrectly matched "{v}"'
 873 |                     )
 874 |         for p in "n":
 875 |             for s in self.valid_strings_cgmo_only:
 876 |                 with self.subTest(s=s, p=p):
 877 |                     v = f"{p}.{s}"
 878 |                     self.assertIsNone(
 879 |                         self.pattern.fullmatch(v), msg=f'incorrectly matched "{v}"'
 880 |                     )
 881 | 
 882 | 
 883 | class TestDnaMultiVariant(unittest.TestCase):
 884 |     @classmethod
 885 |     def setUpClass(cls):
 886 |         cls.pattern = re.compile(dna_multi_variant, flags=re.ASCII)
 887 | 
 888 |         single_valid_strings = [
 889 |             "48C>A",
 890 |             "=",
 891 |             "44del",
 892 |             "1_95del",
 893 |             "22_24dup",
 894 |             "77dup",
 895 |             "234_235insT",
 896 |             "84_85insCTG",
 897 |             "22delinsAACG",
 898 |             "83_85delinsT",
 899 |         ]
 900 | 
 901 |         single_valid_strings_c_only = [
 902 |             "*788delinsA",
 903 |             "-25+1_-25+3dup",
 904 |             "*17dup",
 905 |             "-25+1_-25+3del",
 906 |             "*17del",
 907 |             "*24G>C",
 908 |             "-27+3T>C",
 909 |         ]
 910 | 
 911 |         single_valid_strings_cn_only = [
 912 |             "43-6_595+12delinsCTT",
 913 |             "99+6_99+7insA",
 914 |             "101+1_101+7dup",
 915 |             "78+5_78+10del",
 916 |             "19+22A>G",
 917 |             "122-6T>A",
 918 |         ]
 919 | 
 920 |         single_valid_strings_cgmo_only = ["22=", "4_6="]
 921 | 
 922 |         single_invalid_strings = [
 923 |             "22g>u",
 924 |             "48C>W",
 925 |             "122=/T>A",
 926 |             "(78+1_79-1)_(124+1_125-1)del",
 927 |             "(?_85)_(124_?)del",
 928 |             "122=/del",
 929 |             "(78+1_79-1)_(124+1_125-1)dup",
 930 |             "(?_85)_(124_?)dup",
 931 |             "122_125=//dup",
 932 |             "84_85ins100_125",
 933 |             "234_235ins(10)",
 934 |             "234_235ins(?)",
 935 |             "84_85delinsAAN",
 936 |             "234delinsW",
 937 |         ]
 938 | 
 939 |         cls.valid_strings, cls.invalid_strings = build_multi_variants(
 940 |             single_valid_strings, single_invalid_strings
 941 |         )
 942 |         cls.valid_strings_c_only, cls.invalid_strings_ngmo = build_multi_variants(
 943 |             single_valid_strings_c_only, single_valid_strings_c_only
 944 |         )
 945 |         cls.valid_strings_cn_only, cls.invalid_strings_gmo = build_multi_variants(
 946 |             single_valid_strings_cn_only, single_valid_strings_cn_only
 947 |         )
 948 |         cls.valid_strings_cgmo_only, cls.invalid_strings_n = build_multi_variants(
 949 |             single_valid_strings_cgmo_only, single_valid_strings_cgmo_only
 950 |         )
 951 | 
 952 |     def test_valid_strings(self):
 953 |         for p in "cngmo":
 954 |             for s in self.valid_strings:
 955 |                 with self.subTest(s=s, p=p):
 956 |                     v = f"{p}.[{s}]"
 957 |                     self.assertIsNotNone(
 958 |                         self.pattern.fullmatch(v), msg=f'failed to match "{v}"'
 959 |                     )
 960 |         for p in "cgmo":
 961 |             for s in self.valid_strings_cgmo_only:
 962 |                 with self.subTest(s=s, p=p):
 963 |                     v = f"{p}.[{s}]"
 964 |                     self.assertIsNotNone(
 965 |                         self.pattern.fullmatch(v), msg=f'failed to match "{v}"'
 966 |                     )
 967 |         for p in "cn":
 968 |             for s in self.valid_strings_cn_only:
 969 |                 with self.subTest(s=s, p=p):
 970 |                     v = f"{p}.[{s}]"
 971 |                     self.assertIsNotNone(
 972 |                         self.pattern.fullmatch(v), msg=f'failed to match "{v}"'
 973 |                     )
 974 |         for p in "c":
 975 |             for s in self.valid_strings_c_only:
 976 |                 with self.subTest(s=s, p=p):
 977 |                     v = f"{p}.[{s}]"
 978 |                     self.assertIsNotNone(
 979 |                         self.pattern.fullmatch(v), msg=f'failed to match "{v}"'
 980 |                     )
 981 | 
 982 |     def test_invalid_strings(self):
 983 |         for p in "cngmo":
 984 |             for s in self.invalid_strings:
 985 |                 with self.subTest(s=s, p=p):
 986 |                     v = f"{p}.[{s}]"
 987 |                     self.assertIsNone(
 988 |                         self.pattern.fullmatch(v), msg=f'incorrectly matched "{v}"'
 989 |                     )
 990 |         for p in "ngmo":
 991 |             for s in self.invalid_strings_ngmo:
 992 |                 with self.subTest(s=s, p=p):
 993 |                     v = f"{p}.[{s}]"
 994 |                     self.assertIsNone(
 995 |                         self.pattern.fullmatch(v), msg=f'incorrectly matched "{v}"'
 996 |                     )
 997 |         for p in "gmo":
 998 |             for s in self.invalid_strings_gmo:
 999 |                 with self.subTest(s=s, p=p):
1000 |                     v = f"{p}.[{s}]"
1001 |                     self.assertIsNone(
1002 |                         self.pattern.fullmatch(v), msg=f'incorrectly matched "{v}"'
1003 |                     )
1004 |         for p in "n":
1005 |             for s in self.invalid_strings_n:
1006 |                 with self.subTest(s=s, p=p):
1007 |                     v = f"{p}.[{s}]"
1008 |                     self.assertIsNone(
1009 |                         self.pattern.fullmatch(v), msg=f'incorrectly matched "{v}"'
1010 |                     )
1011 | 
1012 | 
1013 | if __name__ == "__main__":
1014 |     unittest.main()
1015 | 


--------------------------------------------------------------------------------
/tests/test_patterns/test_protein.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import re
  3 | from mavehgvs.patterns.protein import (
  4 |     pro_equal,
  5 |     pro_sub,
  6 |     pro_fs,
  7 |     pro_del,
  8 |     pro_dup,
  9 |     pro_ins,
 10 |     pro_delins,
 11 |     pro_variant,
 12 |     pro_single_variant,
 13 |     pro_multi_variant,
 14 | )
 15 | from . import build_multi_variants
 16 | 
 17 | 
 18 | class TestProteinEqual(unittest.TestCase):
 19 |     @classmethod
 20 |     def setUpClass(cls):
 21 |         cls.pattern = re.compile(pro_equal, flags=re.ASCII)
 22 | 
 23 |         cls.valid_strings = [
 24 |             "=",
 25 |             "(=)",
 26 |             "Cys22=",
 27 |         ]
 28 | 
 29 |         cls.invalid_strings = ["=22", "Arg18(=)", "Cys-22", "=="]
 30 | 
 31 |     def test_valid_strings(self):
 32 |         for s in self.valid_strings:
 33 |             with self.subTest(s=s):
 34 |                 self.assertIsNotNone(
 35 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
 36 |                 )
 37 | 
 38 |     def test_invalid_strings(self):
 39 |         for s in self.invalid_strings:
 40 |             with self.subTest(s=s):
 41 |                 self.assertIsNone(
 42 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
 43 |                 )
 44 | 
 45 | 
 46 | class TestProteinSub(unittest.TestCase):
 47 |     @classmethod
 48 |     def setUpClass(cls):
 49 |         cls.pattern = re.compile(pro_sub, flags=re.ASCII)
 50 | 
 51 |         cls.valid_strings = ["Glu27Trp", "Ter345Lys"]
 52 | 
 53 |         cls.invalid_strings = [
 54 |             "22A>T",
 55 |             "Xaa12Arg",
 56 |             "Arg21Xaa",
 57 |             "Pro17*",
 58 |             "*345Lys",
 59 |             "(Glu27Trp)",
 60 |         ]
 61 | 
 62 |     def test_valid_strings(self):
 63 |         for s in self.valid_strings:
 64 |             with self.subTest(s=s):
 65 |                 self.assertIsNotNone(
 66 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
 67 |                 )
 68 | 
 69 |     def test_invalid_strings(self):
 70 |         for s in self.invalid_strings:
 71 |             with self.subTest(s=s):
 72 |                 self.assertIsNone(
 73 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
 74 |                 )
 75 | 
 76 | 
 77 | class TestProteinFs(unittest.TestCase):
 78 |     @classmethod
 79 |     def setUpClass(cls):
 80 |         cls.pattern = re.compile(pro_fs, flags=re.ASCII)
 81 | 
 82 |         cls.valid_strings = ["Glu27fs"]
 83 | 
 84 |         cls.invalid_strings = [
 85 |             "=fs",
 86 |             "Arg12LysfsTer18",
 87 |             "Arg12Lysfs*18",
 88 |             "Glu27fs*?",
 89 |             "(Glu27fs)",
 90 |         ]
 91 | 
 92 |     def test_valid_strings(self):
 93 |         for s in self.valid_strings:
 94 |             with self.subTest(s=s):
 95 |                 self.assertIsNotNone(
 96 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
 97 |                 )
 98 | 
 99 |     def test_invalid_strings(self):
100 |         for s in self.invalid_strings:
101 |             with self.subTest(s=s):
102 |                 self.assertIsNone(
103 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
104 |                 )
105 | 
106 | 
107 | class TestProteinDel(unittest.TestCase):
108 |     @classmethod
109 |     def setUpClass(cls):
110 |         cls.pattern = re.compile(pro_del, flags=re.ASCII)
111 | 
112 |         cls.valid_strings = [
113 |             "Gly18del",
114 |             "Gln7_Asn19del",
115 |         ]
116 | 
117 |         cls.invalid_strings = ["=del", "18del", "122_128del", "(Gly18del)"]
118 | 
119 |     def test_valid_strings(self):
120 |         for s in self.valid_strings:
121 |             with self.subTest(s=s):
122 |                 self.assertIsNotNone(
123 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
124 |                 )
125 | 
126 |     def test_invalid_strings(self):
127 |         for s in self.invalid_strings:
128 |             with self.subTest(s=s):
129 |                 self.assertIsNone(
130 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
131 |                 )
132 | 
133 | 
134 | class TestProteinDup(unittest.TestCase):
135 |     @classmethod
136 |     def setUpClass(cls):
137 |         cls.pattern = re.compile(pro_dup, flags=re.ASCII)
138 | 
139 |         cls.valid_strings = [
140 |             "Cys5dup",
141 |             "Pro12_Gly18dup",
142 |         ]
143 | 
144 |         cls.invalid_strings = ["=dup", "18dup", "122_128dup", "(Cys5dup)"]
145 | 
146 |     def test_valid_strings(self):
147 |         for s in self.valid_strings:
148 |             with self.subTest(s=s):
149 |                 self.assertIsNotNone(
150 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
151 |                 )
152 | 
153 |     def test_invalid_strings(self):
154 |         for s in self.invalid_strings:
155 |             with self.subTest(s=s):
156 |                 self.assertIsNone(
157 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
158 |                 )
159 | 
160 | 
161 | class TestProteinIns(unittest.TestCase):
162 |     @classmethod
163 |     def setUpClass(cls):
164 |         cls.pattern = re.compile(pro_ins, flags=re.ASCII)
165 | 
166 |         cls.valid_strings = [
167 |             "His7_Gln8insSer",
168 |             "Ala12_Pro13insGlyProCys",
169 |         ]
170 | 
171 |         cls.invalid_strings = [
172 |             "(His7_Gln8insSer)",
173 |             "(His7_Gln8insX)",
174 |             "(Ala12_Pro13ins(2))",
175 |             "His7_Gln8ins?",
176 |             "His7_Gln8insXaa",
177 |         ]
178 | 
179 |     def test_valid_strings(self):
180 |         for s in self.valid_strings:
181 |             with self.subTest(s=s):
182 |                 self.assertIsNotNone(
183 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
184 |                 )
185 | 
186 |     def test_invalid_strings(self):
187 |         for s in self.invalid_strings:
188 |             with self.subTest(s=s):
189 |                 self.assertIsNone(
190 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
191 |                 )
192 | 
193 | 
194 | class TestProteinDelins(unittest.TestCase):
195 |     @classmethod
196 |     def setUpClass(cls):
197 |         cls.pattern = re.compile(pro_delins, flags=re.ASCII)
198 | 
199 |         cls.valid_strings = [
200 |             "Ile71_Cys80delinsSer",
201 |             "His44delinsValProGlyGlu",
202 |         ]
203 | 
204 |         cls.invalid_strings = ["(Ile71_Cys80delinsSer)", "Ile71_Cys80delinsXaa"]
205 | 
206 |     def test_valid_strings(self):
207 |         for s in self.valid_strings:
208 |             with self.subTest(s=s):
209 |                 self.assertIsNotNone(
210 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
211 |                 )
212 | 
213 |     def test_invalid_strings(self):
214 |         for s in self.invalid_strings:
215 |             with self.subTest(s=s):
216 |                 self.assertIsNone(
217 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
218 |                 )
219 | 
220 | 
221 | class TestProteinVariant(unittest.TestCase):
222 |     @classmethod
223 |     def setUpClass(cls):
224 |         cls.pattern = re.compile(pro_variant, flags=re.ASCII)
225 | 
226 |         cls.valid_strings = [
227 |             "=",
228 |             "(=)",
229 |             "Cys22=",
230 |             "Glu27Trp",
231 |             "Ter345Lys",
232 |             "Glu27fs",
233 |             "Gly18del",
234 |             "Gln7_Asn19del",
235 |             "Cys5dup",
236 |             "Pro12_Gly18dup",
237 |             "His7_Gln8insSer",
238 |             "Ala12_Pro13insGlyProCys",
239 |             "Ile71_Cys80delinsSer",
240 |             "His44delinsValProGlyGlu",
241 |         ]
242 | 
243 |         cls.invalid_strings = [
244 |             "=22",
245 |             "Arg18(=)",
246 |             "Cys-22",
247 |             "==",
248 |             "22A>T",
249 |             "Xaa12Arg",
250 |             "Arg21Xaa",
251 |             "Pro17*",
252 |             "*345Lys",
253 |             "(Glu27Trp)",
254 |             "=fs",
255 |             "Arg12LysfsTer18",
256 |             "Arg12Lysfs*18",
257 |             "Glu27fs*?",
258 |             "(Glu27fs)",
259 |             "=del",
260 |             "18del",
261 |             "122_128del",
262 |             "(Gly18del)",
263 |             "=dup",
264 |             "18dup",
265 |             "122_128dup",
266 |             "(Cys5dup)",
267 |             "(His7_Gln8insSer)",
268 |             "(His7_Gln8insX)",
269 |             "(Ala12_Pro13ins(2))",
270 |             "His7_Gln8ins?",
271 |             "His7_Gln8insXaa",
272 |             "(Ile71_Cys80delinsSer)",
273 |             "Ile71_Cys80delinsXaa",
274 |         ]
275 | 
276 |     def test_valid_strings(self):
277 |         for s in self.valid_strings:
278 |             with self.subTest(s=s):
279 |                 self.assertIsNotNone(
280 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
281 |                 )
282 | 
283 |     def test_invalid_strings(self):
284 |         for s in self.invalid_strings:
285 |             with self.subTest(s=s):
286 |                 self.assertIsNone(
287 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
288 |                 )
289 | 
290 | 
291 | class TestProteinSingleVariant(unittest.TestCase):
292 |     @classmethod
293 |     def setUpClass(cls):
294 |         cls.pattern = re.compile(pro_single_variant, flags=re.ASCII)
295 | 
296 |         cls.valid_strings = [
297 |             "=",
298 |             "(=)",
299 |             "Cys22=",
300 |             "Glu27Trp",
301 |             "Ter345Lys",
302 |             "Glu27fs",
303 |             "Gly18del",
304 |             "Gln7_Asn19del",
305 |             "Cys5dup",
306 |             "Pro12_Gly18dup",
307 |             "His7_Gln8insSer",
308 |             "Ala12_Pro13insGlyProCys",
309 |             "Ile71_Cys80delinsSer",
310 |             "His44delinsValProGlyGlu",
311 |         ]
312 | 
313 |         cls.invalid_strings = [
314 |             "=22",
315 |             "Arg18(=)",
316 |             "Cys-22",
317 |             "==",
318 |             "22A>T",
319 |             "Xaa12Arg",
320 |             "Arg21Xaa",
321 |             "Pro17*",
322 |             "*345Lys",
323 |             "(Glu27Trp)",
324 |             "=fs",
325 |             "Arg12LysfsTer18",
326 |             "Arg12Lysfs*18",
327 |             "Glu27fs*?",
328 |             "(Glu27fs)",
329 |             "=del",
330 |             "18del",
331 |             "122_128del",
332 |             "(Gly18del)",
333 |             "=dup",
334 |             "18dup",
335 |             "122_128dup",
336 |             "(Cys5dup)",
337 |             "(His7_Gln8insSer)",
338 |             "(His7_Gln8insX)",
339 |             "(Ala12_Pro13ins(2))",
340 |             "His7_Gln8ins?",
341 |             "His7_Gln8insXaa",
342 |             "(Ile71_Cys80delinsSer)",
343 |             "Ile71_Cys80delinsXaa",
344 |         ]
345 | 
346 |     def test_valid_strings(self):
347 |         for s in self.valid_strings:
348 |             with self.subTest(s=s):
349 |                 v = f"p.{s}"
350 |                 self.assertIsNotNone(
351 |                     self.pattern.fullmatch(v), msg=f'failed to match "{v}"'
352 |                 )
353 | 
354 |     def test_invalid_strings(self):
355 |         for s in self.invalid_strings:
356 |             with self.subTest(s=s):
357 |                 v = f"p.{s}"
358 |                 self.assertIsNone(
359 |                     self.pattern.fullmatch(v), msg=f'incorrectly matched "{v}"'
360 |                 )
361 | 
362 | 
363 | class TestProteinMultiVariant(unittest.TestCase):
364 |     @classmethod
365 |     def setUpClass(cls):
366 |         cls.pattern = re.compile(pro_multi_variant, flags=re.ASCII)
367 | 
368 |         single_valid_strings = [
369 |             "=",
370 |             "(=)",
371 |             "Cys22=",
372 |             "Glu27Trp",
373 |             "Ter345Lys",
374 |             "Glu27fs",
375 |             "Gly18del",
376 |             "Gln7_Asn19del",
377 |             "Cys5dup",
378 |             "Pro12_Gly18dup",
379 |             "His7_Gln8insSer",
380 |             "Ala12_Pro13insGlyProCys",
381 |             "Ile71_Cys80delinsSer",
382 |             "His44delinsValProGlyGlu",
383 |         ]
384 | 
385 |         single_invalid_strings = [
386 |             "=22",
387 |             "Arg18(=)",
388 |             "Cys-22",
389 |             "==",
390 |             "22A>T",
391 |             "Xaa12Arg",
392 |             "Arg21Xaa",
393 |             "Pro17*",
394 |             "*345Lys",
395 |             "(Glu27Trp)",
396 |             "=fs",
397 |             "Arg12LysfsTer18",
398 |             "Arg12Lysfs*18",
399 |             "Glu27fs*?",
400 |             "(Glu27fs)",
401 |             "=del",
402 |             "18del",
403 |             "122_128del",
404 |             "(Gly18del)",
405 |             "=dup",
406 |             "18dup",
407 |             "122_128dup",
408 |             "(Cys5dup)",
409 |             "(His7_Gln8insSer)",
410 |             "(His7_Gln8insX)",
411 |             "(Ala12_Pro13ins(2))",
412 |             "His7_Gln8ins?",
413 |             "His7_Gln8insXaa",
414 |             "(Ile71_Cys80delinsSer)",
415 |             "Ile71_Cys80delinsXaa",
416 |         ]
417 | 
418 |         cls.valid_strings, cls.invalid_strings = build_multi_variants(
419 |             single_valid_strings, single_invalid_strings
420 |         )
421 | 
422 |     def test_valid_strings(self):
423 |         for s in self.valid_strings:
424 |             with self.subTest(s=s):
425 |                 v = f"p.[{s}]"
426 |                 self.assertIsNotNone(
427 |                     self.pattern.fullmatch(v), msg=f'failed to match "{v}"'
428 |                 )
429 | 
430 |     def test_invalid_strings(self):
431 |         for s in self.invalid_strings:
432 |             with self.subTest(s=s):
433 |                 v = f"p.[{s}]"
434 |                 self.assertIsNone(
435 |                     self.pattern.fullmatch(v), msg=f'incorrectly matched "{v}"'
436 |                 )
437 | 
438 | 
439 | if __name__ == "__main__":
440 |     unittest.main()
441 | 


--------------------------------------------------------------------------------
/tests/test_patterns/test_rna.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import re
  3 | from mavehgvs.patterns.rna import (
  4 |     rna_equal,
  5 |     rna_sub,
  6 |     rna_del,
  7 |     rna_dup,
  8 |     rna_ins,
  9 |     rna_delins,
 10 |     rna_variant,
 11 |     rna_single_variant,
 12 |     rna_multi_variant,
 13 | )
 14 | from . import build_multi_variants
 15 | 
 16 | 
 17 | class TestRnaEqual(unittest.TestCase):
 18 |     @classmethod
 19 |     def setUpClass(cls):
 20 |         cls.pattern = re.compile(rna_equal, flags=re.ASCII)
 21 | 
 22 |         cls.valid_strings = [
 23 |             "=",
 24 |         ]
 25 | 
 26 |         cls.invalid_strings = ["=22", "(=)", "=="]
 27 | 
 28 |     def test_valid_strings(self):
 29 |         for s in self.valid_strings:
 30 |             with self.subTest(s=s):
 31 |                 self.assertIsNotNone(
 32 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
 33 |                 )
 34 | 
 35 |     def test_invalid_strings(self):
 36 |         for s in self.invalid_strings:
 37 |             with self.subTest(s=s):
 38 |                 self.assertIsNone(
 39 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
 40 |                 )
 41 | 
 42 | 
 43 | class TestRnaSub(unittest.TestCase):
 44 |     @classmethod
 45 |     def setUpClass(cls):
 46 |         cls.pattern = re.compile(rna_sub, flags=re.ASCII)
 47 | 
 48 |         cls.valid_strings = ["22g>u", "33+12a>c"]
 49 | 
 50 |         cls.invalid_strings = [
 51 |             "spl",
 52 |             "33+12A>G",
 53 |             "22g>t",
 54 |         ]
 55 | 
 56 |     def test_valid_strings(self):
 57 |         for s in self.valid_strings:
 58 |             with self.subTest(s=s):
 59 |                 self.assertIsNotNone(
 60 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
 61 |                 )
 62 | 
 63 |     def test_invalid_strings(self):
 64 |         for s in self.invalid_strings:
 65 |             with self.subTest(s=s):
 66 |                 self.assertIsNone(
 67 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
 68 |                 )
 69 | 
 70 | 
 71 | class TestRnaDel(unittest.TestCase):
 72 |     @classmethod
 73 |     def setUpClass(cls):
 74 |         cls.pattern = re.compile(rna_del, flags=re.ASCII)
 75 | 
 76 |         cls.valid_strings = ["34_36del", "17del", "27_27+12del", "101+1_101+7del"]
 77 | 
 78 |         cls.invalid_strings = ["=del", "=/9_12del", "(155_185)del", "34_36"]
 79 | 
 80 |     def test_valid_strings(self):
 81 |         for s in self.valid_strings:
 82 |             with self.subTest(s=s):
 83 |                 self.assertIsNotNone(
 84 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
 85 |                 )
 86 | 
 87 |     def test_invalid_strings(self):
 88 |         for s in self.invalid_strings:
 89 |             with self.subTest(s=s):
 90 |                 self.assertIsNone(
 91 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
 92 |                 )
 93 | 
 94 | 
 95 | class TestRnaDup(unittest.TestCase):
 96 |     @classmethod
 97 |     def setUpClass(cls):
 98 |         cls.pattern = re.compile(rna_dup, flags=re.ASCII)
 99 | 
100 |         cls.valid_strings = ["12dup", "2_24dup", "101+1_101+7dup", "12-24_12-12dup"]
101 | 
102 |         cls.invalid_strings = ["=dup", "(78+1_79-1)_(124+1_125-1)dup"]
103 | 
104 |     def test_valid_strings(self):
105 |         for s in self.valid_strings:
106 |             with self.subTest(s=s):
107 |                 self.assertIsNotNone(
108 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
109 |                 )
110 | 
111 |     def test_invalid_strings(self):
112 |         for s in self.invalid_strings:
113 |             with self.subTest(s=s):
114 |                 self.assertIsNone(
115 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
116 |                 )
117 | 
118 | 
119 | class TestRnaIns(unittest.TestCase):
120 |     @classmethod
121 |     def setUpClass(cls):
122 |         cls.pattern = re.compile(rna_ins, flags=re.ASCII)
123 | 
124 |         cls.valid_strings = [
125 |             "22_23insauc",
126 |             "17_18insa",
127 |         ]
128 | 
129 |         cls.invalid_strings = [
130 |             "(27_30)insu",
131 |             "74_74insnnn",
132 |         ]
133 | 
134 |     def test_valid_strings(self):
135 |         for s in self.valid_strings:
136 |             with self.subTest(s=s):
137 |                 self.assertIsNotNone(
138 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
139 |                 )
140 | 
141 |     def test_invalid_strings(self):
142 |         for s in self.invalid_strings:
143 |             with self.subTest(s=s):
144 |                 self.assertIsNone(
145 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
146 |                 )
147 | 
148 | 
149 | class TestRnaDelins(unittest.TestCase):
150 |     @classmethod
151 |     def setUpClass(cls):
152 |         cls.pattern = re.compile(rna_delins, flags=re.ASCII)
153 | 
154 |         cls.valid_strings = ["92delinsgac", "12_17delinsc"]
155 | 
156 |         cls.invalid_strings = ["234_235ins(10)", "(122_125)insg"]
157 | 
158 |     def test_valid_strings(self):
159 |         for s in self.valid_strings:
160 |             with self.subTest(s=s):
161 |                 self.assertIsNotNone(
162 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
163 |                 )
164 | 
165 |     def test_invalid_strings(self):
166 |         for s in self.invalid_strings:
167 |             with self.subTest(s=s):
168 |                 self.assertIsNone(
169 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
170 |                 )
171 | 
172 | 
173 | class TestRnaVariant(unittest.TestCase):
174 |     @classmethod
175 |     def setUpClass(cls):
176 |         cls.pattern = re.compile(rna_variant, flags=re.ASCII)
177 | 
178 |         cls.valid_strings = [
179 |             "=",
180 |             "22g>u",
181 |             "33+12a>c",
182 |             "34_36del",
183 |             "17del",
184 |             "12dup",
185 |             "2_24dup",
186 |             "101+1_101+7dup",
187 |             "22_23insauc",
188 |             "17_18insa",
189 |             "92delinsgac",
190 |             "12_17delinsc",
191 |         ]
192 | 
193 |         cls.invalid_strings = [
194 |             "=22",
195 |             "(=)",
196 |             "==",
197 |             "spl",
198 |             "33+12A>G",
199 |             "22g>t",
200 |             "=del",
201 |             "=/9_12del",
202 |             "(155_185)del",
203 |             "=dup",
204 |             "(78+1_79-1)_(124+1_125-1)dup",
205 |             "(27_30)insu",
206 |             "74_74insnnn",
207 |             "234_235ins(10)",
208 |             "(122_125)insg",
209 |         ]
210 | 
211 |     def test_valid_strings(self):
212 |         for s in self.valid_strings:
213 |             with self.subTest(s=s):
214 |                 self.assertIsNotNone(
215 |                     self.pattern.fullmatch(s), msg=f'failed to match "{s}"'
216 |                 )
217 | 
218 |     def test_invalid_strings(self):
219 |         for s in self.invalid_strings:
220 |             with self.subTest(s=s):
221 |                 self.assertIsNone(
222 |                     self.pattern.fullmatch(s), msg=f'incorrectly matched "{s}"'
223 |                 )
224 | 
225 | 
226 | class TestRnaSingleVariant(unittest.TestCase):
227 |     @classmethod
228 |     def setUpClass(cls):
229 |         cls.pattern = re.compile(rna_single_variant, flags=re.ASCII)
230 | 
231 |         cls.valid_strings = [
232 |             "=",
233 |             "22g>u",
234 |             "33+12a>c",
235 |             "34_36del",
236 |             "17del",
237 |             "12dup",
238 |             "2_24dup",
239 |             "101+1_101+7dup",
240 |             "22_23insauc",
241 |             "17_18insa",
242 |             "92delinsgac",
243 |             "12_17delinsc",
244 |         ]
245 | 
246 |         cls.invalid_strings = [
247 |             "=22",
248 |             "(=)",
249 |             "==",
250 |             "spl",
251 |             "33+12A>G",
252 |             "22g>t",
253 |             "=del",
254 |             "=/9_12del",
255 |             "(155_185)del",
256 |             "=dup",
257 |             "(78+1_79-1)_(124+1_125-1)dup",
258 |             "(27_30)insu",
259 |             "74_74insnnn",
260 |             "234_235ins(10)",
261 |             "(122_125)insg",
262 |         ]
263 | 
264 |     def test_valid_strings(self):
265 |         for s in self.valid_strings:
266 |             with self.subTest(s=s):
267 |                 v = f"r.{s}"
268 |                 self.assertIsNotNone(
269 |                     self.pattern.fullmatch(v), msg=f'failed to match "{v}"'
270 |                 )
271 | 
272 |     def test_invalid_strings(self):
273 |         for s in self.invalid_strings:
274 |             with self.subTest(s=s):
275 |                 v = f"r.{s}"
276 |                 self.assertIsNone(
277 |                     self.pattern.fullmatch(v), msg=f'incorrectly matched "{v}"'
278 |                 )
279 | 
280 | 
281 | class TestRnaMultiVariant(unittest.TestCase):
282 |     @classmethod
283 |     def setUpClass(cls):
284 |         cls.pattern = re.compile(rna_multi_variant, flags=re.ASCII)
285 | 
286 |         single_valid_strings = [
287 |             "=",
288 |             "22g>u",
289 |             "33+12a>c",
290 |             "34_36del",
291 |             "17del",
292 |             "12dup",
293 |             "2_24dup",
294 |             "101+1_101+7dup",
295 |             "22_23insauc",
296 |             "17_18insa",
297 |             "92delinsgac",
298 |             "12_17delinsc",
299 |         ]
300 | 
301 |         single_invalid_strings = [
302 |             "=22",
303 |             "(=)",
304 |             "==",
305 |             "spl",
306 |             "33+12A>G",
307 |             "22g>t",
308 |             "=del",
309 |             "=/9_12del",
310 |             "(155_185)del",
311 |             "=dup",
312 |             "(78+1_79-1)_(124+1_125-1)dup",
313 |             "(27_30)insu",
314 |             "74_74insnnn",
315 |             "234_235ins(10)",
316 |             "(122_125)insg",
317 |         ]
318 |         cls.valid_strings, cls.invalid_strings = build_multi_variants(
319 |             single_valid_strings, single_invalid_strings
320 |         )
321 | 
322 |     def test_valid_strings(self):
323 |         for s in self.valid_strings:
324 |             with self.subTest(s=s):
325 |                 v = f"r.[{s}]"
326 |                 self.assertIsNotNone(
327 |                     self.pattern.fullmatch(v), msg=f'failed to match "{v}"'
328 |                 )
329 | 
330 |     def test_invalid_strings(self):
331 |         for s in self.invalid_strings:
332 |             with self.subTest(s=s):
333 |                 v = f"r.[{s}]"
334 |                 self.assertIsNone(
335 |                     self.pattern.fullmatch(v), msg=f'incorrectly matched "{v}"'
336 |                 )
337 | 
338 | 
339 | if __name__ == "__main__":
340 |     unittest.main()
341 | 


--------------------------------------------------------------------------------
/tests/test_patterns/test_util.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from mavehgvs.patterns.util import combine_patterns, remove_named_groups
 4 | 
 5 | 
 6 | class TestCombinePatterns(unittest.TestCase):
 7 |     def test_without_groupname(self):
 8 |         pattern_tuples = [
 9 |             (
10 |                 ("(?P<a>(?P<x>[1-9]))", "(?P<b>(?P<x>[1-9]))"),
11 |                 "(?:(?P<a>(?P<a_x>[1-9]))|(?P<b>(?P<b_x>[1-9])))",
12 |             )
13 |         ]
14 | 
15 |         for p1, p2 in pattern_tuples:
16 |             with self.subTest(p1=p1, p2=p2):
17 |                 self.assertEqual(combine_patterns(p1), p2)
18 | 
19 |     def test_with_groupname(self):
20 |         pattern_tuples = [
21 |             (
22 |                 ("(?P<a>(?P<x>[1-9]))", "(?P<b>(?P<x>[1-9]))"),
23 |                 "test",
24 |                 "(?P<test>(?P<a>(?P<a_x>[1-9]))|(?P<b>(?P<b_x>[1-9])))",
25 |             )
26 |         ]
27 | 
28 |         for p1, g, p2 in pattern_tuples:
29 |             with self.subTest(p1=p1, g=g, p2=p2):
30 |                 self.assertEqual(combine_patterns(p1, groupname=g), p2)
31 | 
32 | 
33 | class TestRemoveNamedGroups(unittest.TestCase):
34 |     def test_noncapturing(self):
35 |         pattern_tuples = [("(?P<a>(?P<x>[1-9]))", "(?:(?:[1-9]))")]
36 | 
37 |         for p1, p2 in pattern_tuples:
38 |             with self.subTest(p1=p1, p2=p2):
39 |                 self.assertEqual(remove_named_groups(p1, noncapturing=True), p2)
40 | 
41 |     def test_capturing(self):
42 |         pattern_tuples = [("(?P<a>(?P<x>[1-9]))", "(([1-9]))")]
43 | 
44 |         for p1, p2 in pattern_tuples:
45 |             with self.subTest(p1=p1, p2=p2):
46 |                 self.assertEqual(remove_named_groups(p1, noncapturing=False), p2)
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     unittest.main()
51 | 


--------------------------------------------------------------------------------
/tests/test_position.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import itertools
  3 | import random
  4 | from mavehgvs.position import VariantPosition
  5 | from mavehgvs.exceptions import MaveHgvsParseError
  6 | 
  7 | 
  8 | class TestObjectCreation(unittest.TestCase):
  9 |     def test_position_only(self) -> None:
 10 |         v = VariantPosition("8")
 11 |         self.assertTupleEqual(
 12 |             (v.position, v.amino_acid, v.intronic_position, v.utr),
 13 |             (8, None, None, None),
 14 |         )
 15 |         self.assertFalse(v.is_utr())
 16 |         self.assertFalse(v.is_intronic())
 17 |         self.assertFalse(v.is_protein())
 18 |         self.assertFalse(v.is_extended())
 19 | 
 20 |         v = VariantPosition("92380")
 21 |         self.assertTupleEqual(
 22 |             (v.position, v.amino_acid, v.intronic_position, v.utr),
 23 |             (92380, None, None, None),
 24 |         )
 25 |         self.assertFalse(v.is_utr())
 26 |         self.assertFalse(v.is_intronic())
 27 |         self.assertFalse(v.is_protein())
 28 |         self.assertFalse(v.is_extended())
 29 | 
 30 |     def test_amino_acid(self) -> None:
 31 |         v = VariantPosition("Gly8")
 32 |         self.assertTupleEqual(
 33 |             (v.position, v.amino_acid, v.intronic_position, v.utr),
 34 |             (8, "Gly", None, None),
 35 |         )
 36 |         self.assertFalse(v.is_utr())
 37 |         self.assertFalse(v.is_intronic())
 38 |         self.assertTrue(v.is_protein())
 39 |         self.assertFalse(v.is_extended())
 40 | 
 41 |         v = VariantPosition("Cys92380")
 42 |         self.assertTupleEqual(
 43 |             (v.position, v.amino_acid, v.intronic_position, v.utr),
 44 |             (92380, "Cys", None, None),
 45 |         )
 46 |         self.assertFalse(v.is_utr())
 47 |         self.assertFalse(v.is_intronic())
 48 |         self.assertTrue(v.is_protein())
 49 |         self.assertFalse(v.is_extended())
 50 | 
 51 |     def test_invalid_strings(self) -> None:
 52 |         position_strings = (
 53 |             "08",
 54 |             "+12",
 55 |             "*-99",
 56 |             "A",
 57 |             "TCGA",
 58 |             "g",
 59 |             "*",
 60 |             "-",
 61 |             "+",
 62 |             "**6",
 63 |             "800 + 12",
 64 |             "-12*5",
 65 |             "Glu-12",
 66 |             "*5Trp",
 67 |             "Xyz12",
 68 |             "ALA12",
 69 |         )
 70 |         for s in position_strings:
 71 |             with self.subTest(s=s):
 72 |                 with self.assertRaises(MaveHgvsParseError):
 73 |                     VariantPosition(s)
 74 | 
 75 |     def test_utr(self) -> None:
 76 |         v = VariantPosition("*8")
 77 |         self.assertTupleEqual(
 78 |             (v.position, v.amino_acid, v.intronic_position, v.utr),
 79 |             (8, None, None, True),
 80 |         )
 81 |         self.assertTrue(v.is_utr())
 82 |         self.assertFalse(v.is_intronic())
 83 |         self.assertFalse(v.is_protein())
 84 |         self.assertTrue(v.is_extended())
 85 | 
 86 |         v = VariantPosition("-80")
 87 |         self.assertTupleEqual(
 88 |             (v.position, v.amino_acid, v.intronic_position, v.utr),
 89 |             (-80, None, None, True),
 90 |         )
 91 |         self.assertTrue(v.is_utr())
 92 |         self.assertFalse(v.is_intronic())
 93 |         self.assertFalse(v.is_protein())
 94 |         self.assertTrue(v.is_extended())
 95 | 
 96 |     def test_intron(self) -> None:
 97 |         v = VariantPosition("122-6")
 98 |         self.assertTupleEqual(
 99 |             (v.position, v.amino_acid, v.intronic_position, v.utr),
100 |             (122, None, -6, None),
101 |         )
102 |         self.assertFalse(v.is_utr())
103 |         self.assertTrue(v.is_intronic())
104 |         self.assertFalse(v.is_protein())
105 |         self.assertTrue(v.is_extended())
106 | 
107 |         v = VariantPosition("78+10")
108 |         self.assertTupleEqual(
109 |             (v.position, v.amino_acid, v.intronic_position, v.utr), (78, None, 10, None)
110 |         )
111 |         self.assertFalse(v.is_utr())
112 |         self.assertTrue(v.is_intronic())
113 |         self.assertFalse(v.is_protein())
114 |         self.assertTrue(v.is_extended())
115 | 
116 |     def test_utr_intron(self) -> None:
117 |         v = VariantPosition("*89+67")
118 |         self.assertTupleEqual(
119 |             (v.position, v.amino_acid, v.intronic_position, v.utr), (89, None, 67, True)
120 |         )
121 |         self.assertTrue(v.is_utr())
122 |         self.assertTrue(v.is_intronic())
123 |         self.assertFalse(v.is_protein())
124 |         self.assertTrue(v.is_extended())
125 | 
126 |         v = VariantPosition("-127+6")
127 |         self.assertTupleEqual(
128 |             (v.position, v.amino_acid, v.intronic_position, v.utr),
129 |             (-127, None, 6, True),
130 |         )
131 |         self.assertTrue(v.is_utr())
132 |         self.assertTrue(v.is_intronic())
133 |         self.assertFalse(v.is_protein())
134 |         self.assertTrue(v.is_extended())
135 | 
136 |         v = VariantPosition("*73-105")
137 |         self.assertTupleEqual(
138 |             (v.position, v.amino_acid, v.intronic_position, v.utr),
139 |             (73, None, -105, True),
140 |         )
141 |         self.assertTrue(v.is_utr())
142 |         self.assertTrue(v.is_intronic())
143 |         self.assertFalse(v.is_protein())
144 |         self.assertTrue(v.is_extended())
145 | 
146 |         v = VariantPosition("-45-1")
147 |         self.assertTupleEqual(
148 |             (v.position, v.amino_acid, v.intronic_position, v.utr),
149 |             (-45, None, -1, True),
150 |         )
151 |         self.assertTrue(v.is_utr())
152 |         self.assertTrue(v.is_intronic())
153 |         self.assertFalse(v.is_protein())
154 |         self.assertTrue(v.is_extended())
155 | 
156 | 
157 | class TestObjectRepresentation(unittest.TestCase):
158 |     def test_repr(self) -> None:
159 |         position_strings = (
160 |             "8",
161 |             "92380",
162 |             "*8",
163 |             "-80",
164 |             "122-6",
165 |             "78+10",
166 |             "*89+67",
167 |             "-127+6",
168 |             "*73-105",
169 |             "-45-1",
170 |             "Cys234",
171 |             "Ala9",
172 |         )
173 |         for s in position_strings:
174 |             with self.subTest(s=s):
175 |                 v = VariantPosition(s)
176 |                 self.assertEqual(s, repr(v))
177 | 
178 | 
179 | # TODO: add amino acid variants
180 | class TestComparisons(unittest.TestCase):
181 |     def setUp(self) -> None:
182 |         sorted_position_strings = (
183 |             "-45-1",
184 |             "-12",
185 |             "8",
186 |             "99",
187 |             "99+88",
188 |             "99+122",
189 |             "100-12",
190 |             "100",
191 |             "101",
192 |             "202-12",
193 |             "202-1",
194 |             "202",
195 |             "*1",
196 |             "*73-105",
197 |         )
198 | 
199 |         self.sorted_variants = [VariantPosition(p) for p in sorted_position_strings]
200 | 
201 |         # pairwise itertools recipe
202 |         a, b = itertools.tee(self.sorted_variants)
203 |         next(b, None)
204 |         self.sorted_variant_pairs = zip(a, b)
205 | 
206 |     def test_eq(self) -> None:
207 |         for v in self.sorted_variants:
208 |             with self.subTest(v=v):
209 |                 self.assertEqual(v, v)
210 | 
211 |     def test_ne(self) -> None:
212 |         for v1, v2 in self.sorted_variant_pairs:
213 |             with self.subTest(v1=v1, v2=v2):
214 |                 self.assertNotEqual(v1, v2)
215 | 
216 |     def test_lt(self) -> None:
217 |         for v1, v2 in self.sorted_variant_pairs:
218 |             with self.subTest(v1=v1, v2=v2):
219 |                 self.assertLess(v1, v2)
220 | 
221 |     def test_sorting(self) -> None:
222 |         for _ in range(10):
223 |             with self.subTest():
224 |                 shuffled_variants = self.sorted_variants.copy()
225 |                 while shuffled_variants == self.sorted_variants:
226 |                     random.shuffle(shuffled_variants)
227 |                 self.assertListEqual(self.sorted_variants, sorted(shuffled_variants))
228 | 
229 | 
230 | # TODO: add amino acid variants
231 | class TestAdjacency(unittest.TestCase):
232 |     def test_adjacent_pairs(self) -> None:
233 |         adjacent_pairs = (
234 |             ("-45-2", "-45-1"),
235 |             ("-45-1", "-45"),
236 |             ("-12", "-13"),
237 |             ("-1", "1"),
238 |             ("8", "9"),
239 |             ("202-1", "202"),
240 |             ("99", "99+1"),
241 |             ("99+88", "99+89"),
242 |             ("100-12", "100-11"),
243 |             ("100", "101"),
244 |             ("*1", "*2"),
245 |             ("*73-1", "*73"),
246 |         )
247 |         for s1, s2 in adjacent_pairs:
248 |             v1 = VariantPosition(s1)
249 |             v2 = VariantPosition(s2)
250 |             with self.subTest(v1=v1, v2=v2):
251 |                 self.assertTrue(v1.is_adjacent(v2))
252 |             with self.subTest(v1=v1, v2=v2):
253 |                 self.assertTrue(v2.is_adjacent(v1))
254 | 
255 |     def test_not_adjacent_to_self(self) -> None:
256 |         position_strings = (
257 |             "-45-1",
258 |             "-12",
259 |             "8",
260 |             "99",
261 |             "99+88",
262 |             "99+122",
263 |             "100-12",
264 |             "100",
265 |             "103",
266 |             "202-12",
267 |             "202-1",
268 |             "205",
269 |             "*1",
270 |             "*12",
271 |             "*73-105",
272 |         )
273 |         variants = [VariantPosition(s) for s in position_strings]
274 |         for v in variants:
275 |             with self.subTest(v=v):
276 |                 self.assertFalse(v.is_adjacent(v))
277 | 
278 |     def test_non_adjacent_pairs(self) -> None:
279 |         position_strings = (
280 |             "-45-1",
281 |             "-12",
282 |             "8",
283 |             "99",
284 |             "99+88",
285 |             "99+122",
286 |             "100-12",
287 |             "103",
288 |             "202-12",
289 |             "202-1",
290 |             "205",
291 |             "*1",
292 |             "*12",
293 |             "*73-105",
294 |         )
295 |         variants = [VariantPosition(s) for s in position_strings]
296 | 
297 |         for v1, v2 in itertools.permutations(variants, 2):
298 |             with self.subTest(v1=v1, v2=v2):
299 |                 self.assertFalse(v1.is_adjacent(v2))
300 | 
301 | 
302 | if __name__ == "__main__":
303 |     unittest.main()
304 | 


--------------------------------------------------------------------------------
/tests/test_util.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | 
  3 | from mavehgvs.util import parse_variant_strings
  4 | from mavehgvs.variant import Variant
  5 | 
  6 | 
  7 | class TestParseVariantStrings(unittest.TestCase):
  8 |     def test_sets_error_strings_for_invalid_items(self) -> None:
  9 |         invalid_variant_strings = [
 10 |             "g.Glu27Trp",
 11 |             "p.27Glu>Trp",
 12 |             "p.122-6T>A",
 13 |             "G>A",
 14 |             "22G>A",
 15 |             "G.44del",
 16 |             "a.78+5_78+10del",
 17 |             "77dup",
 18 |             "n.Pro12_Gly18dup",
 19 |             "g.22_23insauc",
 20 |             "g.25_24del",
 21 |             "g.25_24ins",
 22 |             "r.43-6_595+12delinsctt",
 23 |             "x.=",
 24 |             "c.(=)",
 25 |         ]
 26 | 
 27 |         for s in invalid_variant_strings:
 28 |             with self.subTest(s=s):
 29 |                 valid, invalid = parse_variant_strings([s])
 30 |                 self.assertIsNone(valid[0])
 31 |                 self.assertIsInstance(invalid[0], str)
 32 | 
 33 |     def test_sets_variant_for_valid_items(self) -> None:
 34 |         valid_variant_strings = [
 35 |             "p.Glu27Trp",
 36 |             "c.122-6T>A",
 37 |             "g.44del",
 38 |             "c.78+5_78+10del",
 39 |             "c.77dup",
 40 |             "p.Pro12_Gly18dup",
 41 |             "p.Ala12_Pro13insGlyProCys",
 42 |             "r.22_23insauc",
 43 |             "c.43-6_595+12delinsCTT",
 44 |             "p.Ile71_Cys80delinsSer",
 45 |             "p.=",
 46 |             "c.=",
 47 |             "p.(=)",
 48 |         ]
 49 | 
 50 |         for s in valid_variant_strings:
 51 |             with self.subTest(s=s):
 52 |                 valid, invalid = parse_variant_strings([s])
 53 |                 self.assertIsInstance(valid[0], Variant)
 54 |                 self.assertIsNone(invalid[0])
 55 | 
 56 |     def test_validates_with_targetseq(self) -> None:
 57 |         targetseq = "ACGT"
 58 |         valid_variant_strings = ["c.1A>T", "c.3G>C", "c.[1A>T;3G>C]"]
 59 |         invalid_variant_strings = ["c.1C>T", "c.3T>C", "c.[1A>T;3T>C]", "c.5A>G"]
 60 | 
 61 |         for s in valid_variant_strings:
 62 |             with self.subTest(s=s, targetseq=targetseq):
 63 |                 valid, invalid = parse_variant_strings([s], targetseq=targetseq)
 64 |                 self.assertIsInstance(valid[0], Variant)
 65 |                 self.assertIsNone(invalid[0])
 66 | 
 67 |         for s in invalid_variant_strings:
 68 |             with self.subTest(s=s, targetseq=targetseq):
 69 |                 valid, invalid = parse_variant_strings([s], targetseq=targetseq)
 70 |                 self.assertIsNone(valid[0])
 71 |                 self.assertIsInstance(invalid[0], str)
 72 | 
 73 |     def test_validates_expected_prefix(self) -> None:
 74 |         valid_variant_strings = ["p.Glu27Trp", "c.122-6T>A", "r.22_23insauc"]
 75 | 
 76 |         for s in valid_variant_strings:
 77 |             p = s[0]
 78 |             with self.subTest(s=s, p=p):
 79 |                 valid, invalid = parse_variant_strings([s], expected_prefix=p)
 80 |                 self.assertIsInstance(valid[0], Variant)
 81 |                 self.assertIsNone(invalid[0])
 82 | 
 83 |         for s in valid_variant_strings:
 84 |             p = "g"
 85 |             with self.subTest(s=s, p=p):
 86 |                 valid, invalid = parse_variant_strings([s], expected_prefix=p)
 87 |                 self.assertIsNone(valid[0])
 88 |                 self.assertIsInstance(invalid[0], str)
 89 | 
 90 |     def test_valid_expected_prefixes_only(self) -> None:
 91 |         valid_prefixes = list("cgmnopr")
 92 |         invalid_prefixes = list("CGMNOPRx.4ab?")
 93 |         variant = "p.Glu27Trp"
 94 | 
 95 |         for p in valid_prefixes:
 96 |             with self.subTest(p=p):
 97 |                 parse_variant_strings([variant], expected_prefix=p)
 98 | 
 99 |         for p in invalid_prefixes:
100 |             with self.subTest(p=p):
101 |                 with self.assertRaises(ValueError):
102 |                     parse_variant_strings([variant], expected_prefix=p)
103 | 


--------------------------------------------------------------------------------
/tests/test_variant.py:
--------------------------------------------------------------------------------
   1 | import unittest
   2 | 
   3 | from mavehgvs.exceptions import MaveHgvsParseError
   4 | from mavehgvs.variant import Variant
   5 | from mavehgvs.position import VariantPosition
   6 | 
   7 | 
   8 | class TestCreateSingleVariantFromString(unittest.TestCase):
   9 |     def test_invalid_raises_error(self) -> None:
  10 |         valid_variant_strings = [
  11 |             "p.Glu27Trp",
  12 |             "c.122-6T>A",
  13 |             "g.44del",
  14 |             "c.78+5_78+10del",
  15 |             "c.77dup",
  16 |             "p.Pro12_Gly18dup",
  17 |             "p.Ala12_Pro13insGlyProCys",
  18 |             "r.22_23insauc",
  19 |             "c.43-6_595+12delinsCTT",
  20 |             "p.Ile71_Cys80delinsSer",
  21 |             "p.=",
  22 |             "c.=",
  23 |             "p.(=)",
  24 |             "c.1_3=",
  25 |             "c.12=",
  26 |             "g.88_99=",
  27 |             "c.43-6_595+12=",
  28 |             "p.Glu27fs",
  29 |             "NM_001301.4:c.122-6T>A",
  30 |         ]
  31 | 
  32 |         invalid_variant_strings = [
  33 |             "g.Glu27Trp",
  34 |             "p.27Glu>Trp",
  35 |             "p.122-6T>A",
  36 |             "G>A",
  37 |             "22G>A",
  38 |             "G.44del",
  39 |             "a.78+5_78+10del",
  40 |             "77dup",
  41 |             "n.Pro12_Gly18dup",
  42 |             "p.Pro12_Gly18insGlyProAla",
  43 |             "g.22_23insauc",
  44 |             "g.25_24del",
  45 |             "g.25_24ins",
  46 |             "r.22_24insauc",
  47 |             "r.43-6_595+12delinsctt",
  48 |             "x.=",
  49 |             "c.(=)",
  50 |             "p.(Gly24=)",
  51 |             "p.Gly24(=)",
  52 |             "p.Arg12LysfsTer18",
  53 |             "p.Glu27fs*?",
  54 |             "NM_001301.4::c.122-6T>A",
  55 |         ]
  56 | 
  57 |         for s in valid_variant_strings:
  58 |             with self.subTest(s=s):
  59 |                 Variant(s)  # should pass
  60 | 
  61 |         for s in invalid_variant_strings:
  62 |             with self.subTest(s=s):
  63 |                 with self.assertRaises(MaveHgvsParseError):
  64 |                     Variant(s)
  65 | 
  66 |     def test_sub(self) -> None:
  67 |         variant_strings = [
  68 |             "p.Glu27Trp",
  69 |             "p.Ter345Lys",
  70 |             "p.Cys22=",
  71 |             "g.48C>A",
  72 |             "c.122-6T>A",
  73 |             "c.*33G>C",
  74 |             "r.22g>u",
  75 |             "r.33+12a>c",
  76 |             "p.=",
  77 |             "p.(=)",
  78 |             "n.=",
  79 |             "c.1_3=",
  80 |             "c.12=",
  81 |             "g.88_99=",
  82 |             "c.43-6_595+12=",
  83 |         ]
  84 | 
  85 |         for s in variant_strings:
  86 |             with self.subTest(s=s):
  87 |                 v = Variant(s)
  88 |                 self.assertEqual(s, str(v))
  89 | 
  90 |     def test_fs(self) -> None:
  91 |         variant_strings = ["p.Glu27fs"]
  92 | 
  93 |         for s in variant_strings:
  94 |             with self.subTest(s=s):
  95 |                 v = Variant(s)
  96 |                 self.assertEqual(s, str(v))
  97 | 
  98 |     def test_del(self) -> None:
  99 |         variant_strings = [
 100 |             "g.44del",
 101 |             "c.78+5_78+10del",
 102 |             "c.1_95del",
 103 |             "p.Gly18del",
 104 |             "p.Gln7_Asn19del",
 105 |             "r.34_36del",
 106 |         ]
 107 | 
 108 |         for s in variant_strings:
 109 |             with self.subTest(s=s):
 110 |                 v = Variant(s)
 111 |                 self.assertEqual(s, str(v))
 112 | 
 113 |     def test_dup(self) -> None:
 114 |         variant_strings = [
 115 |             "g.22_24dup",
 116 |             "c.77dup",
 117 |             "c.101+1_101+7dup",
 118 |             "p.Pro12_Gly18dup",
 119 |             "p.Cys5dup",
 120 |             "r.12dup",
 121 |         ]
 122 | 
 123 |         for s in variant_strings:
 124 |             with self.subTest(s=s):
 125 |                 v = Variant(s)
 126 |                 self.assertEqual(s, str(v))
 127 | 
 128 |     def test_ins(self) -> None:
 129 |         variant_strings = [
 130 |             "g.234_235insT",
 131 |             "c.84_85insCTG",
 132 |             "c.99+6_99+7insA",
 133 |             "p.His7_Gln8insSer",
 134 |             "p.Ala12_Pro13insGlyProCys",
 135 |             "r.22_23insauc",
 136 |         ]
 137 | 
 138 |         for s in variant_strings:
 139 |             with self.subTest(s=s):
 140 |                 v = Variant(s)
 141 |                 self.assertEqual(s, str(v))
 142 | 
 143 |     def test_delins(self) -> None:
 144 |         variant_strings = [
 145 |             "g.22delinsAACG",
 146 |             "c.83_85delinsT",
 147 |             "c.43-6_595+12delinsCTT",
 148 |             "p.Ile71_Cys80delinsSer",
 149 |             "p.His44delinsValProGlyGlu",
 150 |             "r.92delinsgac",
 151 |         ]
 152 | 
 153 |         for s in variant_strings:
 154 |             with self.subTest(s=s):
 155 |                 v = Variant(s)
 156 |                 self.assertEqual(s, str(v))
 157 | 
 158 |     def test_target_identical(self) -> None:
 159 |         identical_variant_strings = [
 160 |             *[f"{prefix}.=" for prefix in tuple("gmocnr")],
 161 |             "p.(=)",
 162 |             "c.1_3=",
 163 |         ]
 164 | 
 165 |         non_identical_variant_strings = [
 166 |             "p.Ter345Lys",
 167 |             "p.Cys22=",
 168 |             "g.48C>A",
 169 |             "c.122-6T>A",
 170 |             "g.22delinsAACG",
 171 |             "c.83_85delinsT",
 172 |         ]
 173 | 
 174 |         for s in identical_variant_strings:
 175 |             with self.subTest(s=s):
 176 |                 v = Variant(s)
 177 |                 self.assertTrue(v.is_target_identical())
 178 | 
 179 |         for s in non_identical_variant_strings:
 180 |             with self.subTest(s=s):
 181 |                 v = Variant(s)
 182 |                 self.assertFalse(v.is_target_identical())
 183 | 
 184 |     def test_synonymous(self) -> None:
 185 |         synonymous_variant_strings = ["p.Gly24=", "p.=", "p.(=)"]
 186 | 
 187 |         nonsynonymous_variant_strings = ["p.Ter345Lys", "c.=", "g.48C>A"]
 188 | 
 189 |         for s in synonymous_variant_strings:
 190 |             with self.subTest(s=s):
 191 |                 v = Variant(s)
 192 |                 self.assertTrue(v.is_synonymous())
 193 | 
 194 |         for s in nonsynonymous_variant_strings:
 195 |             with self.subTest(s=s):
 196 |                 v = Variant(s)
 197 |                 self.assertFalse(v.is_synonymous())
 198 | 
 199 |     def test_relaxed_ordering(self):
 200 |         variant_tuples = [
 201 |             ("c.78+10_78+5del", "c.78+5_78+10del"),
 202 |             ("c.80_77dup", "c.77_80dup"),
 203 |             ("p.Gly18_Pro12dup", "p.Pro12_Gly18dup"),
 204 |             ("p.Pro13_Ala12insGlyProCys", "p.Ala12_Pro13insGlyProCys"),
 205 |             ("r.23_22insauc", "r.22_23insauc"),
 206 |             ("c.595+12_43-6delinsCTT", "c.43-6_595+12delinsCTT"),
 207 |             ("p.Cys80_Ile71delinsSer", "p.Ile71_Cys80delinsSer"),
 208 |             ("c.3_1=", "c.1_3="),
 209 |             ("g.99_88=", "g.88_99="),
 210 |             ("c.595+12_43-6=", "c.43-6_595+12="),
 211 |         ]
 212 | 
 213 |         for v, s in variant_tuples:
 214 |             with self.subTest(v=v, s=s):
 215 |                 self.assertEqual(str(Variant(v, relaxed_ordering=True)), s)
 216 | 
 217 | 
 218 | class TestCreateMultiVariantFromString(unittest.TestCase):
 219 |     def test_creation(self):
 220 |         variant_strings = [
 221 |             "p.[Glu27Trp;Ter345Lys]",
 222 |             "p.[Glu27Trp;Lys212fs]",
 223 |             "p.[Gly18del;Glu27Trp;Ter345Lys]",
 224 |             "p.[Gln7_Asn19del;Glu27Trp;Ter345Lys]",
 225 |             "c.[1_35del;78+5_78+10del;122T>A]",
 226 |             "NM_002002.3:c.[1_35del;78+5_78+10del;122T>A]",
 227 |         ]
 228 | 
 229 |         invalid_variant_strings = [
 230 |             "p.[Glu27Trp;=;Ter345Lys]",
 231 |             "p.[(=);Gly18del;Glu27Trp;Ter345Lys]",
 232 |             "c.[12T>A;=;78+5_78+10del]",
 233 |             "c.[1_3=;12T>A;78+5_78+10del]",
 234 |             "p.[Glu27fs;Arg48Lys]",
 235 |             "p.[Glu27fs;Arg48fs]",
 236 |             "NM_002002.3::c.[1_35del;78+5_78+10del;122T>A]",
 237 |             "NM_002002.3:c.1_35del;78+5_78+10del;122T>A",
 238 |         ]
 239 | 
 240 |         for s in variant_strings:
 241 |             with self.subTest(s=s):
 242 |                 v = Variant(s)
 243 |                 self.assertEqual(s, str(v))
 244 | 
 245 |         for s in invalid_variant_strings:
 246 |             with self.subTest(s=s):
 247 |                 with self.assertRaises(MaveHgvsParseError):
 248 |                     Variant(s)
 249 | 
 250 |     def test_ordering(self):
 251 |         variant_string_tuples = [
 252 |             ("p.[Gly345Lys;Glu27Trp]", "p.[Glu27Trp;Gly345Lys]"),
 253 |             ("p.[Glu27Trp;Gly18del;Ter345Lys]", "p.[Gly18del;Glu27Trp;Ter345Lys]"),
 254 |             ("c.[122T>A;1_35del;78+5_78+10del]", "c.[1_35del;78+5_78+10del;122T>A]"),
 255 |         ]
 256 | 
 257 |         for s, _ in variant_string_tuples:
 258 |             with self.subTest(s=s):
 259 |                 with self.assertRaises(MaveHgvsParseError):
 260 |                     Variant(s, relaxed_ordering=False)
 261 | 
 262 |         for s, s_ordered in variant_string_tuples:
 263 |             with self.subTest(s=s):
 264 |                 # Should pass creation
 265 |                 Variant(s, relaxed_ordering=True)
 266 | 
 267 |         for s, s_ordered in variant_string_tuples:
 268 |             with self.subTest(s=s):
 269 |                 v = Variant(s, relaxed_ordering=True)
 270 |                 self.assertEqual(s_ordered, str(v))
 271 | 
 272 |     def test_overlaps(self):
 273 |         invalid_variant_strings = [
 274 |             "p.[Glu27Trp;Glu27Trp]",
 275 |             "p.[Glu27Trp;Glu27Tyr]",
 276 |             "p.[Pro27Trp;Glu27Tyr]",
 277 |             "p.[Gly18del;Gly18Tyr]",
 278 |             "p.[Gln7_Asn19del;Glu13Trp]",
 279 |             "p.[Glu13Trp;Gln7_Asn19del]",
 280 |             "p.[Gln7_Asn19del;Glu13Trp;Ter345Lys]",
 281 |             "c.[1_95del;78+5_78+10del;122T>A]",
 282 |             "c.[1_95del;22T>A]",
 283 |             "n.[22G>A;22G>T]",
 284 |         ]
 285 | 
 286 |         for s in invalid_variant_strings:
 287 |             with self.subTest(s=s):
 288 |                 with self.assertRaises(MaveHgvsParseError):
 289 |                     Variant(s)
 290 | 
 291 | 
 292 | class TestCreateSingleVariantFromValues(unittest.TestCase):
 293 |     def test_equal(self):
 294 |         valid_dict_tuples = [
 295 |             (
 296 |                 {
 297 |                     "variant_type": "equal",
 298 |                     "prefix": "p",
 299 |                     "position": "27",
 300 |                     "target": "Glu",
 301 |                 },
 302 |                 "p.Glu27=",
 303 |             ),
 304 |             (
 305 |                 {
 306 |                     "variant_type": "equal",
 307 |                     "prefix": "c",
 308 |                     "start_position": "12",
 309 |                     "end_position": "12",
 310 |                 },
 311 |                 "c.12=",
 312 |             ),
 313 |             (
 314 |                 {
 315 |                     "variant_type": "equal",
 316 |                     "prefix": "c",
 317 |                     "start_position": "1",
 318 |                     "end_position": "3",
 319 |                 },
 320 |                 "c.1_3=",
 321 |             ),
 322 |         ]
 323 | 
 324 |         for d, s in valid_dict_tuples:
 325 |             with self.subTest(d=d, s=s):
 326 |                 self.assertEqual(Variant(s), Variant(d))
 327 | 
 328 |     def test_sub(self):
 329 |         valid_dict_tuples = [
 330 |             (
 331 |                 {
 332 |                     "variant_type": "sub",
 333 |                     "prefix": "p",
 334 |                     "position": 27,
 335 |                     "target": "Glu",
 336 |                     "variant": "Trp",
 337 |                 },
 338 |                 "p.Glu27Trp",
 339 |             ),
 340 |             (
 341 |                 {
 342 |                     "variant_type": "sub",
 343 |                     "prefix": "c",
 344 |                     "position": "122-6",
 345 |                     "target": "T",
 346 |                     "variant": "A",
 347 |                 },
 348 |                 "c.122-6T>A",
 349 |             ),
 350 |         ]
 351 | 
 352 |         for d, s in valid_dict_tuples:
 353 |             with self.subTest(d=d, s=s):
 354 |                 self.assertEqual(Variant(s), Variant(d))
 355 | 
 356 |     def test_fs(self):
 357 |         valid_dict_tuples = [
 358 |             (
 359 |                 {
 360 |                     "variant_type": "fs",
 361 |                     "prefix": "p",
 362 |                     "position": 27,
 363 |                     "target": "Glu",
 364 |                 },
 365 |                 "p.Glu27fs",
 366 |             ),
 367 |         ]
 368 | 
 369 |         for d, s in valid_dict_tuples:
 370 |             with self.subTest(d=d, s=s):
 371 |                 self.assertEqual(Variant(s), Variant(d))
 372 | 
 373 |     def test_ins(self):
 374 |         valid_dict_tuples = [
 375 |             (
 376 |                 {
 377 |                     "variant_type": "ins",
 378 |                     "prefix": "p",
 379 |                     "start_position": 12,
 380 |                     "start_target": "Ala",
 381 |                     "end_position": 13,
 382 |                     "end_target": "Pro",
 383 |                     "variant": "GlyProCys",
 384 |                 },
 385 |                 "p.Ala12_Pro13insGlyProCys",
 386 |             ),
 387 |             (
 388 |                 {
 389 |                     "variant_type": "ins",
 390 |                     "prefix": "r",
 391 |                     "start_position": 22,
 392 |                     "end_position": 23,
 393 |                     "variant": "auc",
 394 |                 },
 395 |                 "r.22_23insauc",
 396 |             ),
 397 |         ]
 398 | 
 399 |         for d, s in valid_dict_tuples:
 400 |             with self.subTest(d=d, s=s):
 401 |                 self.assertEqual(Variant(s), Variant(d))
 402 | 
 403 |     def test_del(self):
 404 |         valid_dict_tuples = [
 405 |             (
 406 |                 {
 407 |                     "variant_type": "del",
 408 |                     "prefix": "g",
 409 |                     "start_position": 44,
 410 |                     "end_position": 44,
 411 |                 },
 412 |                 "g.44del",
 413 |             ),
 414 |             (
 415 |                 {
 416 |                     "variant_type": "del",
 417 |                     "prefix": "c",
 418 |                     "start_position": "78+5",
 419 |                     "end_position": "78+10",
 420 |                 },
 421 |                 "c.78+5_78+10del",
 422 |             ),
 423 |             (
 424 |                 {
 425 |                     "variant_type": "del",
 426 |                     "prefix": "p",
 427 |                     "start_position": 33,
 428 |                     "start_target": "Arg",
 429 |                     "end_position": 33,
 430 |                     "end_target": "Arg",
 431 |                 },
 432 |                 "p.Arg33del",
 433 |             ),
 434 |         ]
 435 | 
 436 |         for d, s in valid_dict_tuples:
 437 |             with self.subTest(d=d, s=s):
 438 |                 self.assertEqual(Variant(s), Variant(d))
 439 | 
 440 |     def test_dup(self):
 441 |         valid_dict_tuples = [
 442 |             (
 443 |                 {
 444 |                     "variant_type": "dup",
 445 |                     "prefix": "c",
 446 |                     "start_position": 77,
 447 |                     "end_position": 77,
 448 |                 },
 449 |                 "c.77dup",
 450 |             ),
 451 |             (
 452 |                 {
 453 |                     "variant_type": "dup",
 454 |                     "prefix": "p",
 455 |                     "start_position": 12,
 456 |                     "start_target": "Pro",
 457 |                     "end_position": 18,
 458 |                     "end_target": "Gly",
 459 |                 },
 460 |                 "p.Pro12_Gly18dup",
 461 |             ),
 462 |         ]
 463 | 
 464 |         for d, s in valid_dict_tuples:
 465 |             with self.subTest(d=d, s=s):
 466 |                 self.assertEqual(Variant(s), Variant(d))
 467 | 
 468 |     def test_delins(self):
 469 |         valid_dict_tuples = [
 470 |             (
 471 |                 {
 472 |                     "variant_type": "delins",
 473 |                     "prefix": "c",
 474 |                     "start_position": "43-6",
 475 |                     "end_position": "595+12",
 476 |                     "variant": "CTT",
 477 |                 },
 478 |                 "c.43-6_595+12delinsCTT",
 479 |             ),
 480 |             (
 481 |                 {
 482 |                     "variant_type": "delins",
 483 |                     "prefix": "c",
 484 |                     "start_position": "45",
 485 |                     "end_position": "45",
 486 |                     "variant": "AGA",
 487 |                 },
 488 |                 "c.45delinsAGA",
 489 |             ),
 490 |             (
 491 |                 {
 492 |                     "variant_type": "delins",
 493 |                     "prefix": "p",
 494 |                     "start_position": 71,
 495 |                     "start_target": "Ile",
 496 |                     "end_position": 80,
 497 |                     "end_target": "Cys",
 498 |                     "variant": "Ser",
 499 |                 },
 500 |                 "p.Ile71_Cys80delinsSer",
 501 |             ),
 502 |             (
 503 |                 {
 504 |                     "variant_type": "delins",
 505 |                     "prefix": "p",
 506 |                     "start_position": 50,
 507 |                     "start_target": "Arg",
 508 |                     "end_position": 50,
 509 |                     "end_target": "Arg",
 510 |                     "variant": "AlaGly",
 511 |                 },
 512 |                 "p.Arg50delinsAlaGly",
 513 |             ),
 514 |         ]
 515 | 
 516 |         invalid_dicts = [
 517 |             {
 518 |                 "variant_type": "delins",
 519 |                 "prefix": "p",
 520 |                 "start_position": 50,
 521 |                 "start_target": "Arg",
 522 |                 "end_position": 50,
 523 |                 "end_target": "Cys",
 524 |                 "variant": "AlaGly",
 525 |             },
 526 |         ]
 527 | 
 528 |         for d, s in valid_dict_tuples:
 529 |             with self.subTest(d=d, s=s):
 530 |                 self.assertEqual(Variant(s), Variant(d))
 531 | 
 532 |         for d in invalid_dicts:
 533 |             with self.subTest(d=d):
 534 |                 with self.assertRaises(MaveHgvsParseError):
 535 |                     Variant(d)
 536 | 
 537 |     def test_extra_keys(self):
 538 |         invalid_dicts = [
 539 |             {
 540 |                 "variant_type": "sub",
 541 |                 "prefix": "p",
 542 |                 "position": 27,
 543 |                 "target": "Glu",
 544 |                 "variant": "Trp",
 545 |                 "bonus": "data",
 546 |             },
 547 |             {
 548 |                 "variant_type": "sub",
 549 |                 "prefix": "c",
 550 |                 "position": "122-6",
 551 |                 "start_target": "T",
 552 |                 "target": "T",
 553 |                 "variant": "A",
 554 |             },
 555 |             {
 556 |                 "variant_type": "delins",
 557 |                 "prefix": "p",
 558 |                 "start_target": "Ile",
 559 |                 "end_position": 80,
 560 |                 "end_target": "Cys",
 561 |                 "variant": "Ser",
 562 |                 "position": "Ala",
 563 |             },
 564 |         ]
 565 | 
 566 |         for d in invalid_dicts:
 567 |             with self.subTest(d=d):
 568 |                 with self.assertRaises(MaveHgvsParseError):
 569 |                     Variant(d)
 570 | 
 571 |     def test_missing_keys(self):
 572 |         invalid_dicts = [
 573 |             {"prefix": "p", "position": 27, "target": "Glu", "variant": "Trp"},
 574 |             {"variant_type": "sub", "position": "122-6", "target": "T", "variant": "A"},
 575 |             {
 576 |                 "variant_type": "delins",
 577 |                 "prefix": "p",
 578 |                 "start_target": "Ile",
 579 |                 "end_position": 80,
 580 |                 "end_target": "Cys",
 581 |                 "variant": "Ser",
 582 |             },
 583 |         ]
 584 | 
 585 |         for d in invalid_dicts:
 586 |             with self.subTest(d=d):
 587 |                 with self.assertRaises(MaveHgvsParseError):
 588 |                     Variant(d)
 589 | 
 590 |     def test_invalid_keys(self):
 591 |         invalid_dicts = [
 592 |             {
 593 |                 "variant_type": "equal",
 594 |                 "prefix": "p",
 595 |                 "start_position": "27",
 596 |                 "end_position": "27",
 597 |                 "target": "Glu",
 598 |             },
 599 |             {"variant_type": "dup", "prefix": "c", "position": 77},
 600 |             {
 601 |                 "variant_type": "test",
 602 |                 "prefix": "c",
 603 |                 "start_position": 77,
 604 |                 "end_position": 77,
 605 |             },
 606 |             {
 607 |                 "variant_type": "fs",
 608 |                 "prefix": "c",
 609 |                 "position": "12",
 610 |                 "target": "T",
 611 |             },
 612 |         ]
 613 | 
 614 |         for d in invalid_dicts:
 615 |             with self.subTest(d=d):
 616 |                 with self.assertRaises(MaveHgvsParseError):
 617 |                     Variant(d)
 618 | 
 619 |     def test_invalid_type(self):
 620 |         invalid_values = [1234, None, 5.55, ("p", "Ile", 80, "Cys")]
 621 | 
 622 |         for v in invalid_values:
 623 |             with self.subTest(v=v):
 624 |                 with self.assertRaises(ValueError):
 625 |                     Variant(v)
 626 | 
 627 | 
 628 | class TestCreateMultiVariantFromValues(unittest.TestCase):
 629 |     def test_create_multivariant(self):
 630 |         valid_dict_tuples = [
 631 |             (
 632 |                 [
 633 |                     {
 634 |                         "variant_type": "sub",
 635 |                         "prefix": "p",
 636 |                         "position": 27,
 637 |                         "target": "Glu",
 638 |                         "variant": "Trp",
 639 |                     },
 640 |                     {
 641 |                         "variant_type": "delins",
 642 |                         "prefix": "p",
 643 |                         "start_position": 71,
 644 |                         "start_target": "Ile",
 645 |                         "end_position": 80,
 646 |                         "end_target": "Cys",
 647 |                         "variant": "Ser",
 648 |                     },
 649 |                 ],
 650 |                 "p.[Glu27Trp;Ile71_Cys80delinsSer]",
 651 |             ),
 652 |             (
 653 |                 [
 654 |                     {
 655 |                         "variant_type": "dup",
 656 |                         "prefix": "c",
 657 |                         "start_position": 77,
 658 |                         "end_position": 77,
 659 |                     },
 660 |                     {
 661 |                         "variant_type": "sub",
 662 |                         "prefix": "c",
 663 |                         "position": "122-6",
 664 |                         "target": "T",
 665 |                         "variant": "A",
 666 |                     },
 667 |                 ],
 668 |                 "c.[77dup;122-6T>A]",
 669 |             ),
 670 |         ]
 671 | 
 672 |         invalid_dicts = [
 673 |             [
 674 |                 {
 675 |                     "variant_type": "sub",
 676 |                     "position": 27,
 677 |                     "target": "Glu",
 678 |                     "variant": "Trp",
 679 |                 },
 680 |                 {
 681 |                     "variant_type": "delins",
 682 |                     "prefix": "p",
 683 |                     "start_position": 71,
 684 |                     "start_target": "Ile",
 685 |                     "end_position": 80,
 686 |                     "end_target": "Cys",
 687 |                     "variant": "Ser",
 688 |                 },
 689 |             ],
 690 |             [
 691 |                 {
 692 |                     "variant_type": "sub",
 693 |                     "prefix": "p",
 694 |                     "position": 27,
 695 |                     "target": "Glu",
 696 |                     "variant": "Trp",
 697 |                 },
 698 |                 {
 699 |                     "variant_type": "sub",
 700 |                     "prefix": "c",
 701 |                     "position": "122-6",
 702 |                     "target": "T",
 703 |                     "variant": "A",
 704 |                 },
 705 |             ],
 706 |         ]
 707 | 
 708 |         for d, s in valid_dict_tuples:
 709 |             with self.subTest(d=d, s=s):
 710 |                 self.assertEqual(Variant(s), Variant(d))
 711 | 
 712 |         for d in invalid_dicts:
 713 |             with self.subTest(d=d):
 714 |                 with self.assertRaises(MaveHgvsParseError):
 715 |                     Variant(d)
 716 | 
 717 | 
 718 | class TestTargetSequenceValidation(unittest.TestCase):
 719 |     def test_valid_dna_equal(self):
 720 |         variant_tuples = [("ACGT", "c.1_2="), ("ACGT", "c.4="), ("ACGT", "c.=")]
 721 | 
 722 |         for target, s in variant_tuples:
 723 |             with self.subTest(target=target, s=s):
 724 |                 v = Variant(s, targetseq=target)
 725 |                 self.assertEqual(s, str(v))
 726 | 
 727 |     def test_invalid_dna_equal(self):
 728 |         variant_tuples = [("ACGT", "c.4_5="), ("ACGT", "c.10=")]
 729 | 
 730 |         for target, s in variant_tuples:
 731 |             with self.subTest(target=target, s=s):
 732 |                 with self.assertRaises(MaveHgvsParseError):
 733 |                     Variant(s, targetseq=target)
 734 | 
 735 |     def test_matching_dna_substitution(self):
 736 |         variant_tuples = [
 737 |             ("ACGT", "c.1A>T"),
 738 |             ("ACGT", "c.3G>C"),
 739 |             ("ACGT", "c.[1A>T;3G>C]"),
 740 |         ]
 741 | 
 742 |         for target, s in variant_tuples:
 743 |             with self.subTest(target=target, s=s):
 744 |                 v = Variant(s, targetseq=target)
 745 |                 self.assertEqual(s, str(v))
 746 | 
 747 |     def test_nonmatching_dna_substitution(self):
 748 |         variant_tuples = [
 749 |             ("ACGT", "c.1C>T"),
 750 |             ("ACGT", "c.3T>C"),
 751 |             ("ACGT", "c.[1A>T;3T>C]"),
 752 |             ("ACGT", "c.5A>G"),
 753 |         ]
 754 | 
 755 |         for target, s in variant_tuples:
 756 |             with self.subTest(target=target, s=s):
 757 |                 with self.assertRaises(MaveHgvsParseError):
 758 |                     Variant(s, targetseq=target)
 759 | 
 760 |     def test_valid_dna_del(self):
 761 |         variant_tuples = [("ACGT", "c.1_3del"), ("ACGT", "c.4del")]
 762 | 
 763 |         for target, s in variant_tuples:
 764 |             with self.subTest(target=target, s=s):
 765 |                 v = Variant(s, targetseq=target)
 766 |                 self.assertEqual(s, str(v))
 767 | 
 768 |     def test_invalid_dna_del(self):
 769 |         variant_tuples = [
 770 |             ("ACGT", "c.1_5del"),
 771 |             ("ACGT", "c.6_8del"),
 772 |             ("ACGT", "c.7del"),
 773 |         ]
 774 | 
 775 |         for target, s in variant_tuples:
 776 |             with self.subTest(target=target, s=s):
 777 |                 with self.assertRaises(MaveHgvsParseError):
 778 |                     Variant(s, targetseq=target)
 779 | 
 780 |     def test_valid_dna_dup(self):
 781 |         variant_tuples = [("ACGT", "c.1_3dup"), ("ACGT", "c.4dup")]
 782 | 
 783 |         for target, s in variant_tuples:
 784 |             with self.subTest(target=target, s=s):
 785 |                 v = Variant(s, targetseq=target)
 786 |                 self.assertEqual(s, str(v))
 787 | 
 788 |     def test_invalid_dna_dup(self):
 789 |         variant_tuples = [
 790 |             ("ACGT", "c.1_5dup"),
 791 |             ("ACGT", "c.6_8dup"),
 792 |             ("ACGT", "c.7dup"),
 793 |         ]
 794 | 
 795 |         for target, s in variant_tuples:
 796 |             with self.subTest(target=target, s=s):
 797 |                 with self.assertRaises(MaveHgvsParseError):
 798 |                     Variant(s, targetseq=target)
 799 | 
 800 |     def test_valid_dna_ins(self):
 801 |         variant_tuples = [("ACGT", "c.1_2insAAA"), ("ACGT", "c.3_4insT")]
 802 | 
 803 |         for target, s in variant_tuples:
 804 |             with self.subTest(target=target, s=s):
 805 |                 v = Variant(s, targetseq=target)
 806 |                 self.assertEqual(s, str(v))
 807 | 
 808 |     def test_invalid_dna_ins(self):
 809 |         variant_tuples = [("ACGT", "c.4_5insA"), ("ACGT", "c.10_11insTCG")]
 810 | 
 811 |         for target, s in variant_tuples:
 812 |             with self.subTest(target=target, s=s):
 813 |                 with self.assertRaises(MaveHgvsParseError):
 814 |                     Variant(s, targetseq=target)
 815 | 
 816 |     def test_valid_dna_delins(self):
 817 |         variant_tuples = [("ACGT", "c.1_2delinsA"), ("ACGT", "c.4delinsTAAGC")]
 818 | 
 819 |         for target, s in variant_tuples:
 820 |             with self.subTest(target=target, s=s):
 821 |                 v = Variant(s, targetseq=target)
 822 |                 self.assertEqual(s, str(v))
 823 | 
 824 |     def test_invalid_dna_delins(self):
 825 |         variant_tuples = [("ACGT", "c.4_5delinsA"), ("ACGT", "c.10_delinsTCG")]
 826 | 
 827 |         for target, s in variant_tuples:
 828 |             with self.subTest(target=target, s=s):
 829 |                 with self.assertRaises(MaveHgvsParseError):
 830 |                     Variant(s, targetseq=target)
 831 | 
 832 |     def test_valid_protein_equal(self):
 833 |         variant_tuples = [("RCQY", "p.Arg1="), ("RCQY", "p.Tyr4="), ("RCQY", "p.=")]
 834 | 
 835 |         for target, s in variant_tuples:
 836 |             with self.subTest(target=target, s=s):
 837 |                 v = Variant(s, targetseq=target)
 838 |                 self.assertEqual(s, str(v))
 839 | 
 840 |     def test_invalid_protein_equal(self):
 841 |         variant_tuples = [("RCQY", "p.Trp5=")]
 842 | 
 843 |         for target, s in variant_tuples:
 844 |             with self.subTest(target=target, s=s):
 845 |                 with self.assertRaises(MaveHgvsParseError):
 846 |                     Variant(s, targetseq=target)
 847 | 
 848 |     def test_matching_protein_substitution(self):
 849 |         variant_tuples = [
 850 |             ("RCQY", "p.Arg1Ala"),
 851 |             ("RCQY", "p.Gln3Trp"),
 852 |             ("RCQY", "p.[Arg1Ala;Gln3Trp]"),
 853 |         ]
 854 | 
 855 |         for target, s in variant_tuples:
 856 |             with self.subTest(target=target, s=s):
 857 |                 v = Variant(s, targetseq=target)
 858 |                 self.assertEqual(s, str(v))
 859 | 
 860 |     def test_nonmatching_protein_substitution(self):
 861 |         variant_tuples = [
 862 |             ("RCQY", "p.Cys1Ala"),
 863 |             ("RCQY", "p.Ala3Trp"),
 864 |             ("RCQY", "p.[Arg1Ala;Cys3Trp]"),
 865 |             ("RCQY", "p.Asp5Glu"),
 866 |         ]
 867 | 
 868 |         for target, s in variant_tuples:
 869 |             with self.subTest(target=target, s=s):
 870 |                 with self.assertRaises(MaveHgvsParseError):
 871 |                     Variant(s, targetseq=target)
 872 | 
 873 |     def test_matching_protein_fs(self):
 874 |         variant_tuples = [
 875 |             ("RCQY", "p.Arg1fs"),
 876 |             ("RCQY", "p.Gln3fs"),
 877 |         ]
 878 | 
 879 |         for target, s in variant_tuples:
 880 |             with self.subTest(target=target, s=s):
 881 |                 v = Variant(s, targetseq=target)
 882 |                 self.assertEqual(s, str(v))
 883 | 
 884 |     def test_nonmatching_protein_fs(self):
 885 |         variant_tuples = [
 886 |             ("RCQY", "p.Cys1fs"),
 887 |             ("RCQY", "p.Ala3fs"),
 888 |             ("RCQY", "p.Asp5fs"),
 889 |         ]
 890 | 
 891 |         for target, s in variant_tuples:
 892 |             with self.subTest(target=target, s=s):
 893 |                 with self.assertRaises(MaveHgvsParseError):
 894 |                     Variant(s, targetseq=target)
 895 | 
 896 |     def test_matching_protein_indel(self):
 897 |         variant_tuples = [
 898 |             ("RCQY", "p.Arg1del"),
 899 |             ("RCQY", "p.Arg1_Gln3dup"),
 900 |         ]
 901 | 
 902 |         for target, s in variant_tuples:
 903 |             with self.subTest(target=target, s=s):
 904 |                 v = Variant(s, targetseq=target)
 905 |                 self.assertEqual(s, str(v))
 906 | 
 907 |     def test_nonmatching_protein_indel(self):
 908 |         variant_tuples = [
 909 |             ("RCQY", "p.Cys1del"),
 910 |             ("RCQY", "p.Arg1_Asp3dup"),
 911 |             ("RCQY", "p.Asp5del"),
 912 |         ]
 913 | 
 914 |         for target, s in variant_tuples:
 915 |             with self.subTest(target=target, s=s):
 916 |                 with self.assertRaises(MaveHgvsParseError):
 917 |                     Variant(s, targetseq=target)
 918 | 
 919 |     def test_skips_extended(self):
 920 |         variant_tuples = [
 921 |             ("ACGT", "c.1+3A>T"),
 922 |             ("ACGT", "c.*33G>C"),
 923 |             ("ACGT", "c.43-6_595+12delinsCTT"),
 924 |         ]
 925 | 
 926 |         for target, s in variant_tuples:
 927 |             with self.subTest(target=target, s=s):
 928 |                 v = Variant(s, targetseq=target)
 929 |                 self.assertEqual(s, str(v))
 930 | 
 931 | 
 932 | class TestMiscMethods(unittest.TestCase):
 933 |     def test_is_multi_variant(self):
 934 |         single_variant_strings = [
 935 |             "p.Glu27Trp",
 936 |             "c.122-6T>A",
 937 |             "g.44del",
 938 |             "c.78+5_78+10del",
 939 |             "c.77dup",
 940 |             "p.Pro12_Gly18dup",
 941 |             "p.Ala12_Pro13insGlyProCys",
 942 |             "r.22_23insauc",
 943 |             "c.43-6_595+12delinsCTT",
 944 |             "p.Ile71_Cys80delinsSer",
 945 |             "p.=",
 946 |         ]
 947 | 
 948 |         multi_variant_strings = []
 949 | 
 950 |         for s in single_variant_strings:
 951 |             with self.subTest(s=s):
 952 |                 v = Variant(s)
 953 |                 self.assertFalse(v.is_multi_variant())
 954 | 
 955 |         for s in multi_variant_strings:
 956 |             with self.subTest(s=s):
 957 |                 v = Variant(s)
 958 |                 self.assertTrue(v.is_multi_variant())
 959 | 
 960 |     def test_uses_extended_positions(self):
 961 |         non_extended_variant_strings = [
 962 |             "p.Glu27Trp",
 963 |             "g.44del",
 964 |             "c.77dup",
 965 |             "p.Pro12_Gly18dup",
 966 |             "p.Ala12_Pro13insGlyProCys",
 967 |             "r.22_23insauc",
 968 |             "r.22g>u",
 969 |             "p.Ile71_Cys80delinsSer",
 970 |             "p.=",
 971 |             "p.[Pro12_Gly18dup;Glu27Trp]",
 972 |             "r.[22g>u;35del]",
 973 |         ]
 974 | 
 975 |         extended_variant_strings = [
 976 |             "c.122-6T>A",
 977 |             "c.78+5_78+10del",
 978 |             "c.43-6_595+12delinsCTT",
 979 |             "c.*33G>C",
 980 |             "r.33+12a>c",
 981 |             "c.[12G>T;122-6T>A]",
 982 |             "c.[43-6_595+12delinsCTT;*33G>C]",
 983 |         ]
 984 | 
 985 |         for s in non_extended_variant_strings:
 986 |             with self.subTest(s=s):
 987 |                 v = Variant(s)
 988 |                 self.assertFalse(v.uses_extended_positions())
 989 | 
 990 |         for s in extended_variant_strings:
 991 |             with self.subTest(s=s):
 992 |                 v = Variant(s)
 993 |                 self.assertTrue(v.uses_extended_positions())
 994 | 
 995 |     def test_components(self):
 996 |         variant_strings = [
 997 |             ("p.[Glu27Trp;Ter345Lys]", ("p.Glu27Trp", "p.Ter345Lys")),
 998 |             ("p.[Glu27Trp;Lys212fs]", ("p.Glu27Trp", "p.Lys212fs")),
 999 |             (
1000 |                 "p.[Gly18del;Glu27Trp;Ter345Lys]",
1001 |                 ("p.Gly18del", "p.Glu27Trp", "p.Ter345Lys"),
1002 |             ),
1003 |             (
1004 |                 "p.[Gln7_Asn19del;Glu27Trp;Ter345Lys]",
1005 |                 ("p.Gln7_Asn19del", "p.Glu27Trp", "p.Ter345Lys"),
1006 |             ),
1007 |             (
1008 |                 "c.[1_35del;78+5_78+10del;122T>A]",
1009 |                 ("c.1_35del", "c.78+5_78+10del", "c.122T>A"),
1010 |             ),
1011 |             ("p.Glu27Trp", ("p.Glu27Trp",)),
1012 |             ("NP_002002.3:p.Glu27Trp", ("NP_002002.3:p.Glu27Trp",)),
1013 |             (
1014 |                 "NP_002002.3:p.[Glu27Trp;Lys212fs]",
1015 |                 ("NP_002002.3:p.Glu27Trp", "NP_002002.3:p.Lys212fs"),
1016 |             ),
1017 |         ]
1018 | 
1019 |         for s, expected_components in variant_strings:
1020 |             with self.subTest(s=s):
1021 |                 v = Variant(s)
1022 |                 self.assertTrue(all([c in expected_components for c in v.components()]))
1023 | 
1024 | 
1025 | # TODO: multi-variant test cases
1026 | class TestMiscProperties(unittest.TestCase):
1027 |     def test_prefix(self):
1028 |         variant_tuples = [(prefix, f"{prefix}.=") for prefix in tuple("gmocnr")]
1029 | 
1030 |         for p, s in variant_tuples:
1031 |             with self.subTest(p=p, s=s):
1032 |                 v = Variant(s)
1033 |                 self.assertEqual(p, v.prefix)
1034 | 
1035 |     def test_variant_type(self):
1036 |         variant_tuples = [
1037 |             ("sub", "p.Glu27Trp"),
1038 |             ("sub", "c.122-6T>A"),
1039 |             ("fs", "p.Glu27fs"),
1040 |             ("del", "g.44del"),
1041 |             ("del", "c.78+5_78+10del"),
1042 |             ("dup", "c.77dup"),
1043 |             ("dup", "p.Pro12_Gly18dup"),
1044 |             ("ins", "p.Ala12_Pro13insGlyProCys"),
1045 |             ("ins", "r.22_23insauc"),
1046 |             ("delins", "c.43-6_595+12delinsCTT"),
1047 |             ("delins", "p.Ile71_Cys80delinsSer"),
1048 |         ]
1049 | 
1050 |         for t, s in variant_tuples:
1051 |             with self.subTest(t=t, s=s):
1052 |                 v = Variant(s)
1053 |                 self.assertEqual(t, v.variant_type)
1054 | 
1055 |     def test_position(self):
1056 |         variant_tuples = [
1057 |             (VariantPosition("Glu27"), "p.Glu27Trp"),
1058 |             (VariantPosition("Glu27"), "p.Glu27fs"),
1059 |             (VariantPosition("122-6"), "c.122-6T>A"),
1060 |             (VariantPosition("44"), "g.44del"),
1061 |             ((VariantPosition("78+5"), VariantPosition("78+10")), "c.78+5_78+10del"),
1062 |             (VariantPosition("77"), "c.77dup"),
1063 |             ((VariantPosition("Pro12"), VariantPosition("Gly18")), "p.Pro12_Gly18dup"),
1064 |             (
1065 |                 (VariantPosition("Ala12"), VariantPosition("Pro13")),
1066 |                 "p.Ala12_Pro13insGlyProCys",
1067 |             ),
1068 |             ((VariantPosition("22"), VariantPosition("23")), "r.22_23insauc"),
1069 |             (
1070 |                 (VariantPosition("43-6"), VariantPosition("595+12")),
1071 |                 "c.43-6_595+12delinsCTT",
1072 |             ),
1073 |             (
1074 |                 (VariantPosition("Ile71"), VariantPosition("Cys80")),
1075 |                 "p.Ile71_Cys80delinsSer",
1076 |             ),
1077 |         ]
1078 | 
1079 |         for p, s in variant_tuples:
1080 |             with self.subTest(p=p, s=s):
1081 |                 v = Variant(s)
1082 |                 if isinstance(p, list):  # multi-variant
1083 |                     self.assertEqual(len(p), len(v.positions))
1084 |                     for q, vp in zip(p, v.positions):
1085 |                         if isinstance(q, tuple):
1086 |                             self.assertTupleEqual(q, vp)
1087 |                         else:
1088 |                             self.assertEqual(q, vp)
1089 |                 if isinstance(p, tuple):
1090 |                     self.assertTupleEqual(p, v.positions)
1091 |                 else:
1092 |                     self.assertEqual(p, v.positions)
1093 | 
1094 |     def test_sequence(self):
1095 |         variant_tuples = [
1096 |             (("Glu", "Trp"), "p.Glu27Trp"),
1097 |             (("T", "A"), "c.122-6T>A"),
1098 |             (None, "p.Glu27fs"),
1099 |             (None, "g.44del"),
1100 |             (None, "c.78+5_78+10del"),
1101 |             (None, "c.77dup"),
1102 |             (None, "p.Pro12_Gly18dup"),
1103 |             ("GlyProCys", "p.Ala12_Pro13insGlyProCys"),
1104 |             ("auc", "r.22_23insauc"),
1105 |             ("CTT", "c.43-6_595+12delinsCTT"),
1106 |             ("Ser", "p.Ile71_Cys80delinsSer"),
1107 |         ]
1108 | 
1109 |         for seq, s in variant_tuples:
1110 |             with self.subTest(seq=seq, s=s):
1111 |                 v = Variant(s)
1112 |                 self.assertEqual(seq, v.sequence)
1113 | 
1114 |     def test_target_id(self):
1115 |         variant_tuples = [
1116 |             (None, "p.Glu27Trp"),
1117 |             (None, "c.122-6T>A"),
1118 |             ("GeneX", "GeneX:p.Glu27Trp"),
1119 |             ("YFG1", "YFG1:c.122-6T>A"),
1120 |             ("ENST00000471181.7", "ENST00000471181.7:c.122-6T>A"),
1121 |             ("NM_007294.4", "NM_007294.4:c.122-6T>A"),
1122 |             ("NM_007294.4", "NM_007294.4:c.[122-6T>A;153C>T]"),
1123 |         ]
1124 | 
1125 |         for t, s in variant_tuples:
1126 |             with self.subTest(t=t, s=s):
1127 |                 v = Variant(s)
1128 |                 self.assertEqual(t, v.target_id)
1129 | 
1130 |         for _, s in variant_tuples:
1131 |             with self.subTest(s=s):
1132 |                 v = Variant(s)
1133 |                 self.assertEqual(s, str(v))
1134 | 
1135 | 
1136 | if __name__ == "__main__":
1137 |     unittest.main()
1138 | 


--------------------------------------------------------------------------------