├── .github └── workflows │ ├── package-build-test.yml │ └── publish-to-pypi.yml ├── .gitignore ├── .gitmodules ├── AUTHORS ├── CODE_OF_CONDUCT.md ├── LICENCE ├── README.md ├── assets └── decombinator-logo.png ├── decombinator-runner.py ├── pyproject.toml ├── src └── decombinator │ ├── __init__.py │ ├── __main__.py │ ├── collapse.py │ ├── decombine.py │ ├── io.py │ ├── pipeline.py │ └── translate.py └── tests ├── __init__.py ├── conftest.py ├── resources ├── 2023_01_31_alpha_TINY_1_Decombinator_Summary.csv ├── 2023_01_31_beta_TINY_1_Decombinator_Summary.csv ├── 2023_01_31_dcr_TINY_1_alpha_CDR3_Translation_Summary.csv ├── 2023_01_31_dcr_TINY_1_alpha_Collapsing_Summary.csv ├── 2023_01_31_dcr_TINY_1_beta_CDR3_Translation_Summary.csv ├── 2023_01_31_dcr_TINY_1_beta_Collapsing_Summary.csv ├── TINY_1.fq ├── TINY_2.fq ├── dcr_TINY_1_alpha.freq ├── dcr_TINY_1_alpha.n12 ├── dcr_TINY_1_alpha.tsv ├── dcr_TINY_1_beta.freq ├── dcr_TINY_1_beta.n12 └── dcr_TINY_1_beta.tsv ├── test_cli.py ├── test_collapse.py ├── test_decombine.py ├── test_pipeline.py └── test_subparsers.py /.github/workflows/package-build-test.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | 3 | name: package-build-test 4 | 5 | on: 6 | push: 7 | branches: [ "master" ] 8 | pull_request: 9 | branches: [ "master" ] 10 | 11 | jobs: 12 | build: 13 | 14 | runs-on: ubuntu-latest 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | python-version: ["3.9", "3.10", "3.11", "3.12"] 19 | 20 | steps: 21 | - uses: actions/checkout@v4 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v3 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | - name: Install dependencies 27 | run: | 28 | python -m pip install --upgrade pip 29 | python -m pip install flake8 pytest 30 | pip install .[dev] 31 | # - name: Lint with flake8 32 | # run: | 33 | # # stop the build if there are Python syntax errors or undefined names 34 | # flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 35 | # # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 36 | # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 37 | - name: Test with pytest 38 | run: | 39 | pytest 40 | -------------------------------------------------------------------------------- /.github/workflows/publish-to-pypi.yml: -------------------------------------------------------------------------------- 1 | name: publish-to-pypi-and-test-pypi 2 | 3 | on: 4 | push: 5 | tags: 6 | - v* 7 | 8 | jobs: 9 | build: 10 | name: build-dist 📦 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - uses: actions/checkout@v4 15 | - name: Set up Python 16 | uses: actions/setup-python@v5 17 | with: 18 | python-version: "3.12.x" 19 | - name: Install pypa/build 20 | run: >- 21 | python3 -m 22 | pip install 23 | build 24 | --user 25 | - name: Build a binary wheel and a source tarball 26 | run: python3 -m build 27 | - name: Store the distribution packages 28 | uses: actions/upload-artifact@v4 29 | with: 30 | name: python-package-distributions 31 | path: dist/ 32 | 33 | publish-to-pypi: 34 | name: >- 35 | publish-to-pypi 📢 36 | if: ${{startsWith(github.ref, 'refs/tags/') && !contains(github.ref, '.dev')}} # only publish to PyPI on tag pushes 37 | needs: 38 | - build 39 | runs-on: ubuntu-latest 40 | environment: 41 | name: pypi 42 | url: https://pypi.org/p/decombinator 43 | permissions: 44 | id-token: write # IMPORTANT: mandatory for trusted publishing 45 | 46 | steps: 47 | - name: download-dists 48 | uses: actions/download-artifact@v4 49 | with: 50 | name: python-package-distributions 51 | path: dist/ 52 | - name: publish-dist-to-pypi 53 | uses: pypa/gh-action-pypi-publish@release/v1 54 | 55 | github-release: 56 | name: >- 57 | signed-github-release ✍️ 58 | needs: 59 | - publish-to-pypi 60 | runs-on: ubuntu-latest 61 | 62 | permissions: 63 | contents: write # IMPORTANT: mandatory for making GitHub Releases 64 | id-token: write # IMPORTANT: mandatory for sigstore 65 | 66 | steps: 67 | - name: download-dists 68 | uses: actions/download-artifact@v4 69 | with: 70 | name: python-package-distributions 71 | path: dist/ 72 | - name: sign-with-sigstore 73 | uses: sigstore/gh-action-sigstore-python@v2.1.1 74 | with: 75 | inputs: >- 76 | ./dist/*.tar.gz 77 | ./dist/*.whl 78 | - name: Create GitHub Release 79 | env: 80 | GITHUB_TOKEN: ${{ github.token }} 81 | run: >- 82 | gh release create 83 | '${{ github.ref_name }}' 84 | --repo '${{ github.repository }}' 85 | --notes "" 86 | - name: Upload artifact signatures to GitHub Release 87 | env: 88 | GITHUB_TOKEN: ${{ github.token }} 89 | # Upload to GitHub Release using the `gh` CLI. 90 | # `dist/` contains the built packages, and the 91 | # sigstore-produced signatures and certificates. 92 | run: >- 93 | gh release upload 94 | '${{ github.ref_name }}' dist/** 95 | --repo '${{ github.repository }}' 96 | 97 | publish-to-testpypi: 98 | name: publish-to-testpypi 🔨 99 | if: ${{startsWith(github.ref, 'refs/tags/') && contains(github.ref, '.dev')}} # only publish to test-PyPI on dev tag pushes 100 | needs: 101 | - build 102 | runs-on: ubuntu-latest 103 | 104 | environment: 105 | name: test-pypi 106 | url: https://test.pypi.org/p/decombinator 107 | 108 | permissions: 109 | id-token: write # IMPORTANT: mandatory for trusted publishing 110 | 111 | steps: 112 | - name: download-dists 113 | uses: actions/download-artifact@v4 114 | with: 115 | name: python-package-distributions 116 | path: dist/ 117 | - name: publish-dist-to-testpypi 118 | uses: pypa/gh-action-pypi-publish@release/v1 119 | with: 120 | repository-url: https://test.pypi.org/legacy/ -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | decombinator.egg-info 3 | .pytest_cache 4 | .decombinatorenv 5 | build/ 6 | .vscode 7 | .venv 8 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "tests/resources/Decombinator-Tags-FASTAs"] 2 | path = tests/resources/Decombinator-Tags-FASTAs 3 | url = https://github.com/innate2adaptive/Decombinator-Tags-FASTAs.git 4 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | The Decombinator Authors 2 | ------------------------ 3 | 4 | The following people (alphabetically sorted) have contributed to the current code of Decombinator. 5 | 6 | Benny Chain 7 | James M. Heather 8 | Katharine Best 9 | Matthew V. Cowley 10 | Mazlina Ismail 11 | Niclas Thomas 12 | Tahel Ronel 13 | Theres Oakes 14 | Thomas Peacock 15 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | 2 | # Contributor Covenant Code of Conduct 3 | 4 | ## Our Pledge 5 | 6 | We as members, contributors, and leaders pledge to make participation in our 7 | community a harassment-free experience for everyone, regardless of age, body 8 | size, visible or invisible disability, ethnicity, sex characteristics, gender 9 | identity and expression, level of experience, education, socio-economic status, 10 | nationality, personal appearance, race, caste, color, religion, or sexual 11 | identity and orientation. 12 | 13 | We pledge to act and interact in ways that contribute to an open, welcoming, 14 | diverse, inclusive, and healthy community. 15 | 16 | ## Our Standards 17 | 18 | Examples of behavior that contributes to a positive environment for our 19 | community include: 20 | 21 | * Demonstrating empathy and kindness toward other people 22 | * Being respectful of differing opinions, viewpoints, and experiences 23 | * Giving and gracefully accepting constructive feedback 24 | * Accepting responsibility and apologizing to those affected by our mistakes, 25 | and learning from the experience 26 | * Focusing on what is best not just for us as individuals, but for the overall 27 | community 28 | 29 | Examples of unacceptable behavior include: 30 | 31 | * The use of sexualized language or imagery, and sexual attention or advances of 32 | any kind 33 | * Trolling, insulting or derogatory comments, and personal or political attacks 34 | * Public or private harassment 35 | * Publishing others' private information, such as a physical or email address, 36 | without their explicit permission 37 | * Other conduct which could reasonably be considered inappropriate in a 38 | professional setting 39 | 40 | ## Enforcement Responsibilities 41 | 42 | Community leaders are responsible for clarifying and enforcing our standards of 43 | acceptable behavior and will take appropriate and fair corrective action in 44 | response to any behavior that they deem inappropriate, threatening, offensive, 45 | or harmful. 46 | 47 | Community leaders have the right and responsibility to remove, edit, or reject 48 | comments, commits, code, wiki edits, issues, and other contributions that are 49 | not aligned to this Code of Conduct, and will communicate reasons for moderation 50 | decisions when appropriate. 51 | 52 | ## Scope 53 | 54 | This Code of Conduct applies within all community spaces, and also applies when 55 | an individual is officially representing the community in public spaces. 56 | Examples of representing our community include using an official email address, 57 | posting via an official social media account, or acting as an appointed 58 | representative at an online or offline event. 59 | 60 | ## Enforcement 61 | 62 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 63 | reported to the community leaders responsible for enforcement [here](m.cowley@ucl.ac.uk). 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series of 86 | actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or permanent 93 | ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within the 113 | community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.1, available at 119 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. 120 | 121 | Community Impact Guidelines were inspired by 122 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC]. 123 | 124 | For answers to common questions about this code of conduct, see the FAQ at 125 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at 126 | [https://www.contributor-covenant.org/translations][translations]. 127 | 128 | [homepage]: https://www.contributor-covenant.org 129 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html 130 | [Mozilla CoC]: https://github.com/mozilla/diversity 131 | [FAQ]: https://www.contributor-covenant.org/faq 132 | [translations]: https://www.contributor-covenant.org/translations 133 | -------------------------------------------------------------------------------- /LICENCE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2013 The Decombinator Authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /assets/decombinator-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/innate2adaptive/decombinator/113203ebf61d949b61fdec717e14ea736fe71ef9/assets/decombinator-logo.png -------------------------------------------------------------------------------- /decombinator-runner.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | """Convenience wrapper for running decombinator directly from source tree.""" 6 | 7 | 8 | from src.decombinator.pipeline import main 9 | 10 | 11 | if __name__ == "__main__": 12 | main() 13 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "decombinator" 3 | version = "5.0.0.dev0" 4 | description = "A fast and efficient tool for the analysis of T-cell receptor repertoire sequences produced by deep sequencing." 5 | readme = "README.md" 6 | license = {file = "LICENSE"} 7 | maintainers = [{name = "Matthew V. Cowley", email = "m.cowley@ucl.ac.uk"}] 8 | requires-python = ">=3.9" 9 | dependencies = [ 10 | "acora==2.4", 11 | "biopython==1.84", 12 | "contourpy==1.2.1", 13 | "coverage==7.5.4", 14 | "cycler==0.12.1", 15 | "fonttools==4.53.1", 16 | "igraph==0.11.6", 17 | "iniconfig==2.0.0", 18 | "joblib==1.4.2", 19 | "kiwisolver==1.4.5", 20 | "Levenshtein==0.25.1", 21 | "logomaker==0.8", 22 | "matplotlib==3.9.2", 23 | "networkx==3.2.1", 24 | "numpy==2.0.0", 25 | "packaging==24.1", 26 | "pandas==2.2.2", 27 | "pillow==10.4.0", 28 | "pluggy==1.5.0", 29 | "polyleven==0.8", 30 | "pyparsing==3.1.2", 31 | "pyrepseq==1.5", 32 | "python-dateutil==2.9.0.post0", 33 | "pytz==2024.1", 34 | "rapidfuzz==3.9.4", 35 | "regex==2024.5.15", 36 | "scikit-learn==1.5.1", 37 | "scipy==1.13.1", 38 | "seaborn==0.13.2", 39 | "six==1.16.0", 40 | "texttable==1.7.0", 41 | "threadpoolctl==3.5.0", 42 | "tidytcells==2.1.3", 43 | "tzdata==2024.1" 44 | ] 45 | keywords = ["decombinator", "TCR", "TCRseq", "repertoire", "sequence", "analysis"] 46 | classifiers = [ 47 | # How mature is this project? 48 | "Development Status :: 5 - Production/Stable", 49 | 50 | # Indicate who your project is intended for 51 | "Intended Audience :: Science/Research", 52 | "Topic :: Scientific/Engineering :: Bio-Informatics", 53 | "License :: OSI Approved :: MIT License", 54 | 55 | # Specify the Python versions you support here. 56 | "Programming Language :: Python :: 3.9", 57 | "Programming Language :: Python :: 3.10", 58 | "Programming Language :: Python :: 3.11", 59 | "Programming Language :: Python :: 3.12", 60 | ] 61 | 62 | [project.optional-dependencies] 63 | dev = [ 64 | "pytest==8.2.2", 65 | "pytest-cov==5.0.0", 66 | ] 67 | 68 | [build-system] 69 | requires = ["setuptools>=61.0"] 70 | build-backend = "setuptools.build_meta" 71 | 72 | [tool.setuptools.packages.find] 73 | where = ["src"] 74 | 75 | [tool.pytest.ini_options] 76 | pythonpath = [ 77 | "./src" 78 | ] 79 | 80 | [tool.black] 81 | line-length = 79 82 | 83 | [project.scripts] 84 | decombinator = "decombinator.pipeline:main" 85 | 86 | [project.urls] 87 | "Homepage" = "https://github.com/innate2adaptive/Decombinator" 88 | "Bug Tracker" = "https://github.com/innate2adaptive/Decombinator/issues" 89 | # Homepage = "https://example.com" 90 | # Documentation = "https://readthedocs.org" 91 | # Changelog = "https://github.com/me/spam/blob/master/CHANGELOG.md" 92 | -------------------------------------------------------------------------------- /src/decombinator/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/innate2adaptive/decombinator/113203ebf61d949b61fdec717e14ea736fe71ef9/src/decombinator/__init__.py -------------------------------------------------------------------------------- /src/decombinator/__main__.py: -------------------------------------------------------------------------------- 1 | from .pipeline import main 2 | 3 | main() 4 | -------------------------------------------------------------------------------- /src/decombinator/io.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import gzip 3 | from importlib import metadata 4 | import os 5 | import pandas as pd 6 | import typing 7 | 8 | 9 | def handle_clash( 10 | parser: argparse.ArgumentParser, 11 | argument_name: str, 12 | help_text: str, 13 | shortcut: str, 14 | **kwargs, 15 | ): 16 | present = 0 17 | for action in parser._actions: 18 | if argument_name in action.option_strings: 19 | present += 1 20 | 21 | if not present: 22 | if argument_name == "-in" or argument_name == "--infile": 23 | parser.add_argument( 24 | "-in", 25 | "--infile", 26 | type=str, 27 | required=True, 28 | help=help_text, 29 | ) 30 | else: 31 | parser.add_argument( 32 | shortcut, 33 | argument_name, 34 | help=help_text, 35 | **kwargs, 36 | ) 37 | elif present > 1: 38 | parser.error(f"The {argument_name} argument can only be used once.") 39 | 40 | 41 | def create_parser(): 42 | parser = argparse.ArgumentParser( 43 | description="Decombinator: A fast and efficient tool for the analysis" 44 | " of T-cell receptor repertoire sequences produced by deep sequencing." 45 | " Include a positional argument to run a specific command." 46 | " Please see https://github.com/innate2adaptive/Decombinator/ for details." 47 | ) 48 | 49 | # Add version information 50 | parser.add_argument( 51 | "-v", 52 | "--version", 53 | action="version", 54 | version=metadata.version("decombinator"), 55 | ) 56 | 57 | subparsers = parser.add_subparsers( 58 | dest="command", help="Available commands" 59 | ) 60 | subparsers.required = False 61 | 62 | # Create parser for the "pipeline" command 63 | pipeline_parser = subparsers.add_parser( 64 | "pipeline", help="Run the entire Decombinator pipeline" 65 | ) 66 | add_common_arguments(pipeline_parser) 67 | add_decombine_arguments(pipeline_parser) 68 | add_collapse_arguments(pipeline_parser) 69 | add_translate_arguments(pipeline_parser) 70 | 71 | # Create parser for the "decombine" command 72 | decombine_parser = subparsers.add_parser( 73 | "decombine", help="Decombine TCR reads" 74 | ) 75 | add_common_arguments(decombine_parser) 76 | add_decombine_arguments(decombine_parser) 77 | 78 | # Create parser for the "collapse" command 79 | collapse_parser = subparsers.add_parser( 80 | "collapse", help="Collapse barcodes" 81 | ) 82 | add_common_arguments(collapse_parser) 83 | add_collapse_arguments(collapse_parser) 84 | 85 | # Create parser for the "translate" command 86 | translate_parser = subparsers.add_parser( 87 | "translate", help="Translate Decombinator indexes" 88 | ) 89 | add_common_arguments(translate_parser) 90 | add_translate_arguments(translate_parser) 91 | 92 | return parser 93 | 94 | 95 | def add_common_arguments(parser: argparse.ArgumentParser): 96 | parser.add_argument( 97 | "-s", 98 | "--suppresssummary", 99 | action="store_true", 100 | help="Suppress the production of summary data log/file", 101 | ) 102 | parser.add_argument( 103 | "-dz", 104 | "--dontgzip", 105 | action="store_true", 106 | help="Stop the output FASTQ files automatically being compressed with gzip", 107 | ) 108 | parser.add_argument( 109 | "-dc", 110 | "--dontcount", 111 | action="store_true", 112 | help="Stop/Block printing the running count", 113 | ) 114 | parser.add_argument( 115 | "-op", 116 | "--outpath", 117 | type=str, 118 | help="Path to output directory, writes to directory script was called in by default", 119 | required=False, 120 | default="", 121 | ) 122 | parser.add_argument( 123 | "-c", 124 | "--chain", 125 | type=str, 126 | help="TCR chain (a/b/g/d)", 127 | ) 128 | parser.add_argument( 129 | "-pf", 130 | "--prefix", 131 | type=str, 132 | default="dcr_", 133 | help='Specify the prefix of the output DCR file. Default = "dcr_"', 134 | ) 135 | parser.add_argument( 136 | "-ds", 137 | "--dontsave", 138 | action="store_true", 139 | help="Don't save output files. For use when writing scripts which use the pipeline.", 140 | ) 141 | 142 | 143 | def add_decombine_arguments(parser: argparse.ArgumentParser): 144 | parser.add_argument( 145 | "-in", 146 | "--infile", 147 | type=str, 148 | required=True, 149 | help="Correctly demultiplexed/processed FASTQ file containing TCR reads", 150 | ) 151 | parser.add_argument( 152 | "-br", 153 | "--bc_read", 154 | type=str, 155 | required=True, 156 | help="Which read has bar code (R1,R2). If used, ensure read selected is present in the same directory as the file specified by -in.", 157 | ) 158 | parser.add_argument( 159 | "-dk", "--dontcheck", action="store_true", help="Skip the FASTQ check" 160 | ) 161 | parser.add_argument( 162 | "-ex", 163 | "--extension", 164 | type=str, 165 | default="n12", 166 | help='Specify the file extension of the output DCR file. Default = "n12"', 167 | ) 168 | parser.add_argument( 169 | "-or", 170 | "--orientation", 171 | type=str, 172 | default="reverse", 173 | help="Specify the orientation to search in (forward/reverse/both). Default = reverse", 174 | ) 175 | parser.add_argument( 176 | "-tg", 177 | "--tags", 178 | type=str, 179 | default="extended", 180 | help="Specify which Decombinator tag set to use (extended or original). Default = extended", 181 | ) 182 | parser.add_argument( 183 | "-sp", 184 | "--species", 185 | type=str, 186 | default="human", 187 | help="Specify which species TCR repertoire the data consists of (human or mouse). Default = human", 188 | ) 189 | parser.add_argument( 190 | "-N", 191 | "--allowNs", 192 | action="store_true", 193 | help="Whether to allow VJ rearrangements containing ambiguous base calls ('N'). Default = False", 194 | ) 195 | parser.add_argument( 196 | "-ln", 197 | "--lenthreshold", 198 | type=int, 199 | default=130, 200 | help="Acceptable threshold for inter-tag (V to J) sequence length. Default = 130", 201 | ) 202 | parser.add_argument( 203 | "-tfdir", 204 | "--tagfastadir", 205 | type=str, 206 | default="Decombinator-Tags-FASTAs", 207 | help='Path to folder containing TCR FASTA and Decombinator tag files, for offline analysis. Default = "Decombinator-Tags-FASTAs".', 208 | ) 209 | parser.add_argument( 210 | "-nbc", 211 | "--nobarcoding", 212 | action="store_true", 213 | help="Option to run Decombinator without barcoding, i.e. so as to run on data produced by any protocol.", 214 | ) 215 | parser.add_argument( 216 | "-bl", 217 | "--bclength", 218 | type=int, 219 | default=42, 220 | help="Length of barcode sequence, if applicable. Default is set to 42 bp.", 221 | ) 222 | 223 | 224 | def add_collapse_arguments(parser: argparse.ArgumentParser): 225 | handle_clash( 226 | parser, 227 | argument_name="--infile", 228 | shortcut="-in", 229 | help_text="File containing raw verbose Decombinator output, i.e. 5 part classifier plus barcode and inter-tag sequence and quality strings", 230 | ) 231 | parser.add_argument( 232 | "-mq", 233 | "--minbcQ", 234 | type=int, 235 | default=20, 236 | help="Minimum quality score that barcode nucleotides should be to for that rearrangement to be retained. Default = 20.", 237 | ) 238 | parser.add_argument( 239 | "-bm", 240 | "--bcQbelowmin", 241 | type=int, 242 | default=1, 243 | help="Number of nucleotides per barcode whose quality score are allowed to be below -mq and still be retained. Default = 1.", 244 | ) 245 | parser.add_argument( 246 | "-aq", 247 | "--avgQthreshold", 248 | type=int, 249 | default=30, 250 | help="Average quality threshold that barcode sequences must remain above for rearrangements to be retained. Default = 30", 251 | ) 252 | parser.add_argument( 253 | "-lv", 254 | "--percentlevdist", 255 | type=int, 256 | default=10, 257 | help="Percentage Levenshtein distance that is allowed to estimate whether two sequences within a barcode are derived from the same originator molecule. Default = 10", 258 | ) 259 | parser.add_argument( 260 | "-bc", 261 | "--bcthreshold", 262 | type=int, 263 | default=2, 264 | help="Number of sequence edits that are allowed to consider two barcodes to be derived from same originator during clustering. Default = 2.", 265 | ) 266 | handle_clash( 267 | parser, 268 | argument_name="--extension", 269 | shortcut="-ex", 270 | help_text="Specify the file extension of the output DCR file. Default = 'freq'", 271 | default="freq", 272 | type=str, 273 | required=False, 274 | ) 275 | handle_clash( 276 | parser, 277 | argument_name="--allowNs", 278 | shortcut="-N", 279 | help_text="Used to allow VJ rearrangements containing ambiguous base calls ('N')", 280 | action="store_true", 281 | ) 282 | handle_clash( 283 | parser, 284 | argument_name="--lenthreshold", 285 | shortcut="-ln", 286 | help_text="Acceptable threshold for inter-tag (V to J) sequence length", 287 | default=130, 288 | type=int, 289 | required=False, 290 | ) 291 | parser.add_argument( 292 | "-di", 293 | "--dontcheckinput", 294 | action="store_true", 295 | help="Override the input file sanity check", 296 | ) 297 | parser.add_argument( 298 | "-bd", 299 | "--barcodeduplication", 300 | action="store_true", 301 | help="Optionally output a file containing the final list of clustered barcodes, and their frequencies", 302 | ) 303 | parser.add_argument( 304 | "-pb", 305 | "--positionalbarcodes", 306 | action="store_true", 307 | help="Instead of inferring random barcode sequences from their context relative to spacer sequences, just take the sequence at the default positions. Useful to salvage runs when R2 quality is terrible.", 308 | ) 309 | parser.add_argument( 310 | "-ol", 311 | "--oligo", 312 | type=str, 313 | required=True, 314 | default="m13", 315 | help='Choose experimental oligo for correct identification of spacers ["M13", "I8", "I8_single", "NEBIO"] (default: M13)', 316 | ) 317 | parser.add_argument( 318 | "-wc", 319 | "--writeclusters", 320 | action="store_true", 321 | help="Write cluster data to separate cluster files", 322 | ) 323 | parser.add_argument( 324 | "-uh", 325 | "--UMIhistogram", 326 | action="store_true", 327 | help="Creates histogram of average UMI cluster sizes", 328 | ) 329 | 330 | 331 | def add_translate_arguments(parser: argparse.ArgumentParser): 332 | handle_clash( 333 | parser, 334 | argument_name="--infile", 335 | shortcut="-in", 336 | help_text="File containing 5 part classifier plus barcode and inter-tag sequence and quality strings", 337 | ) 338 | handle_clash( 339 | parser, 340 | argument_name="--species", 341 | shortcut="-sp", 342 | help_text="Specify which species TCR repertoire the data consists of (human or mouse). Default = human", 343 | default="human", 344 | type=str, 345 | required=False, 346 | ) 347 | handle_clash( 348 | parser, 349 | argument_name="--tags", 350 | shortcut="-tg", 351 | help_text="Specify which Decombinator tag set to use (extended or original). Default = extended", 352 | default="extended", 353 | type=str, 354 | required=False, 355 | ) 356 | parser.add_argument( 357 | "-npf", 358 | "--nonproductivefilter", 359 | action="store_true", 360 | help="Filter out non-productive reads from the output", 361 | ) 362 | handle_clash( 363 | parser, 364 | argument_name="--tagfastadir", 365 | shortcut="-tfdir", 366 | help_text="Path to folder containing TCR FASTA and Decombinator tag files, for offline analysis. Default = 'Decombinator-Tags-FASTAs'", 367 | default="Decombinator-Tags-FASTAs", 368 | type=str, 369 | required=False, 370 | ) 371 | handle_clash( 372 | parser, 373 | argument_name="--nobarcoding", 374 | shortcut="-nbc", 375 | help_text="Option to run CD3translator without barcoding, i.e. so as to run on data produced by any protocol.", 376 | action="store_true", 377 | ) 378 | 379 | 380 | def cli_args(): 381 | parser = create_parser() 382 | return vars(parser.parse_args()) 383 | 384 | 385 | def create_args_dict( 386 | infile: str, 387 | chain: str, 388 | bc_read: str, 389 | suppresssummary: bool = False, 390 | dontgzip: bool = False, 391 | dontcheck: bool = False, 392 | dontcount: bool = False, 393 | extension: str = "n12", 394 | prefix: str = "dcr_", 395 | orientation: str = "reverse", 396 | tags: str = "extended", 397 | species: str = "human", 398 | allowNs: bool = False, 399 | lenthreshold: int = 130, 400 | tagfastadir: str = "Decombinator-Tags-FASTAs", 401 | nobarcoding: bool = False, 402 | bclength: int = 42, 403 | minbcQ: int = 20, 404 | bcQbelowmin: int = 1, 405 | avgQthreshold: int = 30, 406 | percentlevdist: int = 10, 407 | bcthreshold: int = 2, 408 | dontcheckinput: bool = False, 409 | barcodeduplication: bool = False, 410 | positionalbarcodes: bool = False, 411 | oligo: str = "M13", 412 | writeclusters: bool = False, 413 | UMIhistogram: bool = False, 414 | nonproductivefilter: bool = False, 415 | outpath: str = None, 416 | dontsave: bool = False, 417 | command: str = None, 418 | ) -> dict: 419 | """ 420 | Creates a function argument dictionary to be used in Decombinator, 421 | Collapsinator, and CDR3translator. 422 | """ 423 | 424 | return { 425 | "infile": infile, 426 | "chain": chain, 427 | "bc_read": bc_read, 428 | "suppresssummary": suppresssummary, 429 | "dontgzip": dontgzip, 430 | "dontcheck": dontcheck, 431 | "dontcount": dontcount, 432 | "extension": extension, 433 | "prefix": prefix, 434 | "orientation": orientation, 435 | "tags": tags, 436 | "species": species, 437 | "allowNs": allowNs, 438 | "lenthreshold": lenthreshold, 439 | "tagfastadir": tagfastadir, 440 | "nobarcoding": nobarcoding, 441 | "bclength": bclength, 442 | "minbcQ": minbcQ, 443 | "bcQbelowmin": bcQbelowmin, 444 | "avgQthreshold": avgQthreshold, 445 | "percentlevdist": percentlevdist, 446 | "bcthreshold": bcthreshold, 447 | "dontcheckinput": dontcheckinput, 448 | "barcodeduplication": barcodeduplication, 449 | "positionalbarcodes": positionalbarcodes, 450 | "oligo": oligo, 451 | "writeclusters": writeclusters, 452 | "UMIhistogram": UMIhistogram, 453 | "nonproductivefilter": nonproductivefilter, 454 | "outpath": outpath, 455 | "dontsave": dontsave, 456 | "command": command, 457 | } 458 | 459 | 460 | def sort_permissions(fl): 461 | """ 462 | Need to ensure proper file permissions on output data. 463 | If users are running pipeline through Docker might otherwise require root access 464 | :param fl: The file to sort permissions on 465 | :return: Nothing: script edits permissions where appropriate, if possible 466 | """ 467 | 468 | if oct(os.stat(fl).st_mode)[4:] != "666": 469 | os.chmod(fl, 0o666) 470 | 471 | 472 | def write_out_intermediate(data: list, inputargs: dict, suffix: str): 473 | chain = inputargs["chain"] 474 | chainnams = {"a": "alpha", "b": "beta", "g": "gamma", "d": "delta"} 475 | filename_id = os.path.basename(inputargs["infile"]).split(".")[0] 476 | if inputargs["command"] in ["collapse", "translate"]: 477 | outfilename = inputargs["outpath"] + f"{filename_id}" + suffix 478 | else: 479 | outfilename = ( 480 | inputargs["outpath"] 481 | + inputargs["prefix"] 482 | + f"{filename_id}" 483 | + f"_{chainnams[chain.lower()]}" 484 | + suffix 485 | ) 486 | with open(outfilename, "w") as outfile: 487 | for line in data: 488 | outfile.write(", ".join(map(str, line)) + "\n") 489 | 490 | if not inputargs["dontgzip"]: 491 | print("Compressing intermediate output file to", outfilename + ".gz") 492 | 493 | with ( 494 | open(outfilename) as infile, 495 | gzip.open(outfilename + ".gz", "wt") as outfile, 496 | ): 497 | outfile.writelines(infile) 498 | os.unlink(outfilename) 499 | 500 | outfilenam = outfilename + ".gz" 501 | 502 | else: 503 | outfilenam = outfilename 504 | 505 | sort_permissions(outfilenam) 506 | 507 | 508 | def write_out_translated(data: pd.DataFrame, inputargs: dict): 509 | suffix = ".tsv" 510 | chain = inputargs["chain"] 511 | chainnams = {"a": "alpha", "b": "beta", "g": "gamma", "d": "delta"} 512 | filename_id = os.path.basename(inputargs["infile"]).split(".")[0] 513 | if inputargs["command"] in ["collapse", "translate"]: 514 | outfilename = inputargs["outpath"] + f"{filename_id}" + suffix 515 | else: 516 | outfilename = ( 517 | inputargs["outpath"] 518 | + inputargs["prefix"] 519 | + f"{filename_id}" 520 | + f"_{chainnams[chain.lower()]}" 521 | + suffix 522 | ) 523 | data.to_csv(f"{outfilename}", sep="\t", index=False) 524 | 525 | if not inputargs["dontgzip"]: 526 | print("Compressing pipeline output file to", outfilename + ".gz") 527 | 528 | with ( 529 | open(outfilename) as infile, 530 | gzip.open(outfilename + ".gz", "wt") as outfile, 531 | ): 532 | outfile.writelines(infile) 533 | os.unlink(outfilename) 534 | 535 | outfilenam = outfilename + ".gz" 536 | 537 | else: 538 | outfilenam = outfilename 539 | 540 | sort_permissions(outfilenam) 541 | -------------------------------------------------------------------------------- /src/decombinator/pipeline.py: -------------------------------------------------------------------------------- 1 | from .decombine import decombinator 2 | from .collapse import collapsinator 3 | from .translate import cdr3translator 4 | from .io import write_out_intermediate, write_out_translated, cli_args 5 | from datetime import datetime 6 | from typing import Optional, Any 7 | from importlib import metadata 8 | 9 | 10 | def run( 11 | args: Optional[dict[str, Any]] = None, 12 | cli_args: Optional[dict[str, Any]] = None, 13 | ): 14 | """ 15 | Run the Decombinator pipeline 16 | """ 17 | startTime = datetime.now() 18 | if not cli_args: 19 | input = args 20 | else: 21 | input = cli_args 22 | # Run pipline, ovewriting data after each function call to save memory 23 | data = decombinator(input) 24 | if not input["dontsave"]: 25 | write_out_intermediate(data, input, ".n12") 26 | print("Decombinator complete...") 27 | 28 | data = collapsinator(data=data, inputargs=input) 29 | if not input["dontsave"]: 30 | write_out_intermediate(data, input, ".freq") 31 | print("Collapsinator complete...") 32 | 33 | data = cdr3translator(data=data, inputargs=input) 34 | print("CDR3translator complete...") 35 | 36 | if not input["dontsave"]: 37 | write_out_translated(data, input) 38 | print(f"Pipeline complete in {datetime.now() - startTime}") 39 | 40 | 41 | def main(): 42 | input = cli_args() 43 | if input["command"] == "decombine": 44 | data = decombinator(input) 45 | write_out_intermediate(data, input, ".n12") 46 | elif input["command"] == "collapse": 47 | data = collapsinator(inputargs=input) 48 | write_out_intermediate(data, input, ".freq") 49 | elif input["command"] == "translate": 50 | data = cdr3translator(inputargs=input) 51 | write_out_translated(data, input) 52 | else: 53 | run(cli_args=input) 54 | 55 | 56 | # If called from the CLI 57 | if __name__ == "__main__": 58 | main() 59 | -------------------------------------------------------------------------------- /src/decombinator/translate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | CDR3translator 6 | https://innate2adaptive.github.io/Decombinator/ 7 | 8 | Take decombined data and translates/extracts the CDR3 sequences. 9 | In order to be classified as (potentially) productive, a rearrangement's CDR3s must be: 10 | in-frame 11 | lacking-stop codons 12 | run from a conserved cysteine to FGXG motif (or appropriate alternatives) 13 | 14 | The major change from v3 is that this version exports to the AIRRseq community tsv format, simplifying the process 15 | and crucially giving TCR gene name output in the raw format (in addition to the classic Decombinator fields). 16 | 17 | """ 18 | 19 | from __future__ import division 20 | from time import strftime 21 | import argparse 22 | import string 23 | import re 24 | import sys 25 | import collections as coll 26 | import os 27 | import urllib 28 | import warnings 29 | import gzip 30 | import pandas as pd 31 | from importlib import metadata 32 | 33 | 34 | # Supress Biopython translation warning when translating sequences where length % 3 != 0 35 | # TODO: Handle translation explicitly 36 | from Bio import BiopythonWarning 37 | from Bio.Seq import Seq 38 | from Bio import SeqIO 39 | 40 | # TODO Potentially add a flag to combine convergent recombinations into a single row? 41 | 42 | 43 | def findfile(filename): 44 | """ 45 | :param filename: Check whether input file exists or not 46 | :return: Nothing: script exits if given input file does not exist 47 | """ 48 | 49 | try: 50 | testopen = open(str(filename), "rt") 51 | testopen.close() 52 | except Exception: 53 | print("Cannot find the specified input file. Please try again") 54 | sys.exit() 55 | 56 | 57 | def read_tcr_file(species, tagset, gene, filetype, expected_dir_name): 58 | """ 59 | Reads in the associated data for the appropriate TCR locus from the ancillary files (hosted in own repo) 60 | :param species: human or mouse 61 | :param tagset: original or extended 62 | :param gene: V or J 63 | :param filetype: tag/fasta/translate/cdrs 64 | :param expected_dir_name: (by default) Decombinator-Tags-FASTAs 65 | :return: the opened file (either locally or remotely) 66 | """ 67 | # Define expected file name 68 | expected_file = ( 69 | species 70 | + "_" 71 | + tagset 72 | + "_" 73 | + "TR" 74 | + chain.upper() 75 | + gene.upper() 76 | + "." 77 | + filetype 78 | ) 79 | 80 | # First check whether the files are available locally (in pwd or in bundled directory) 81 | if os.path.isfile(expected_file): 82 | fl = expected_file 83 | 84 | elif os.path.isfile(expected_dir_name + os.sep + expected_file): 85 | fl = expected_dir_name + os.sep + expected_file 86 | 87 | else: 88 | try: 89 | fl = ( 90 | "https://raw.githubusercontent.com/innate2adaptive/Decombinator-Tags-FASTAs/master/" 91 | + expected_file 92 | ) 93 | urllib.request.urlopen(fl) # Request URL, see whether is found 94 | fl = urllib.request.urlretrieve(fl)[0] 95 | 96 | except Exception: 97 | print( 98 | "Cannot find following file locally or online:", expected_file 99 | ) 100 | print( 101 | "Please either run Decombinator with internet access, or point Decombinator to local copies " 102 | "of the tag and FASTA files with the '-tfdir' flag." 103 | ) 104 | sys.exit() 105 | 106 | # Return opened file, for either FASTA or tag file parsing 107 | return fl 108 | 109 | 110 | def sort_permissions(fl): 111 | """ 112 | Need to ensure proper file permissions on output data. 113 | If users are running pipeline through Docker might otherwise require root access 114 | :param fl: The file to sort permissions on 115 | :return: Nothing: script edits permissions where appropriate, if possible 116 | """ 117 | 118 | if oct(os.stat(fl).st_mode)[4:] != "666": 119 | os.chmod(fl, 0o666) 120 | 121 | 122 | def import_gene_information(inputargs): 123 | """ 124 | Obtains gene-specific information for translation 125 | Runs first: reads in V and J gene sequence and name data (from fasta files) 126 | and positions of conserved cysteine residues in V genes (from separate files) 127 | 128 | If files cannot be found in local directory, script looks for them online at GitHub 129 | 130 | NB that a number of psuedogenes have no officially designated conserved C (or indeed a 3' C at all) 131 | Where possible, the nearest suitable C residue is used, where not an arbitrary position of 0 is given 132 | Moot, as most psuedogenes contain a number of stop codons and thus cannot produce productive rearrangements 133 | 134 | First check that valid tag/species combinations have been used 135 | :param inputargs: command line (argparse) input arguments dictionary 136 | :return: Multiple items of TCR data: the V regions (sequence), J regions (sequence), V gene names, 137 | J gene names, V conserved C translate positions, V conserved position residue identidy, J conserved F 138 | translate position, J conserved position residue identity, V gene functionality, J gene functionality 139 | """ 140 | 141 | global chainnams, chain 142 | chain = inputargs["chain"] 143 | 144 | if inputargs["tags"] == "extended" and inputargs["species"] == "mouse": 145 | print( 146 | "Please note that there is currently no extended tag set for mouse TCR genes.\n" 147 | "Decombinator will now switch the tag set in use from 'extended' to 'original'.\n" 148 | "In future, consider editing the script to change the default, " 149 | "or use the appropriate flags (-sp mouse -tg original)." 150 | ) 151 | inputargs["tags"] = "original" 152 | 153 | if inputargs["tags"] == "extended" and (chain == "g" or chain == "d"): 154 | print( 155 | "Please note that there is currently no extended tag set for gamma/delta TCR genes.\n" 156 | "Decombinator will now switch the tag set in use from 'extended' to 'original'.\n" 157 | "In future, consider editing the script to change the default, or use the appropriate flags." 158 | ) 159 | inputargs["tags"] = "original" 160 | 161 | # Check species information 162 | if inputargs["species"] not in ["human", "mouse"]: 163 | print( 164 | "Species not recognised. Please select either 'human' (default) or 'mouse'.\n" 165 | "If mouse is required by default, consider changing the default value in the script." 166 | ) 167 | sys.exit() 168 | 169 | # Look for tag and V/J fasta and cysteine position files: if these cannot be found in the working directory, 170 | # source them from GitHub repositories 171 | # Note that fasta/tag files fit the pattern "species_tagset_gene.[fasta/tags]" 172 | # I.e. "[human/mouse]_[extended/original]_TR[A/B/G/D][V/J].[fasta/tags]" 173 | 174 | for gene in ["v", "j"]: 175 | # Get FASTA data 176 | fasta_file = read_tcr_file( 177 | inputargs["species"], 178 | inputargs["tags"], 179 | gene, 180 | "fasta", 181 | inputargs["tagfastadir"], 182 | ) 183 | globals()[gene + "_genes"] = list(SeqIO.parse(fasta_file, "fasta")) 184 | 185 | globals()[gene + "_regions"] = [ 186 | str(item.seq.upper()) for item in globals()[gene + "_genes"] 187 | ] 188 | globals()[gene + "_names"] = [ 189 | str(item.id.upper().split("|")[1]) 190 | for item in globals()[gene + "_genes"] 191 | ] 192 | 193 | # Get conserved translation residue sites and functionality data 194 | translation_file = open( 195 | read_tcr_file( 196 | inputargs["species"], 197 | inputargs["tags"], 198 | gene, 199 | "translate", 200 | inputargs["tagfastadir"], 201 | ), 202 | "rt", 203 | ) 204 | translate_data = [x.rstrip() for x in list(translation_file)] 205 | 206 | globals()[gene + "_translate_position"] = [ 207 | int(x.split(",")[1]) for x in translate_data 208 | ] 209 | globals()[gene + "_translate_residue"] = [ 210 | x.split(",")[2] for x in translate_data 211 | ] 212 | globals()[gene + "_functionality"] = [ 213 | x.split(",")[3] for x in translate_data 214 | ] 215 | 216 | if gene == "v": 217 | 218 | if inputargs["species"] == "human": 219 | # Get germline CDR data 220 | cdr_file = open( 221 | read_tcr_file( 222 | inputargs["species"], 223 | inputargs["tags"], 224 | gene, 225 | "cdrs", 226 | inputargs["tagfastadir"], 227 | ), 228 | "rt", 229 | ) 230 | cdr_data = [x.rstrip() for x in list(cdr_file)] 231 | cdr_file.close() 232 | v_cdr1 = [x.split(" ")[1] for x in cdr_data] 233 | v_cdr2 = [x.split(" ")[2] for x in cdr_data] 234 | else: 235 | # cdr_file only exists for human - CDR1 and CDR2 only written to output tsv 236 | # for human. Otherwise create empty lists fo v_cdr1 and v_cdr2, to write empty 237 | # fields to output tsv 238 | v_cdr1 = [""] * len(globals()[gene + "_genes"]) 239 | v_cdr2 = [""] * len(globals()[gene + "_genes"]) 240 | 241 | return ( 242 | v_regions, 243 | j_regions, 244 | v_names, 245 | j_names, 246 | v_translate_position, 247 | v_translate_residue, 248 | j_translate_position, 249 | j_translate_residue, 250 | v_functionality, 251 | j_functionality, 252 | v_cdr1, 253 | v_cdr2, 254 | ) 255 | 256 | 257 | def get_cdr3(dcr, headers, inputargs): 258 | """ 259 | Checks the productivity of a given DCR-assigned rearrangement. 260 | Note it requires certain items to be in memory: import_gene_information() must be run first 261 | :param dcr: the 5 part Decombinator identifier of a given sequence 262 | :param headers: the headers of the fields that will appear in the final output file (including empty ones) 263 | :return: a dictionary of the relevant output fields, for downstream transcription into the out file 264 | """ 265 | 266 | # NB: A productively rearranged receptor does not necessarily mean that it is the working receptor used in a cell! 267 | out_data = coll.defaultdict() 268 | for field in headers: 269 | out_data[field] = "" 270 | 271 | if inputargs["command"] == "translate": 272 | out_data["decombinator_id"] = ",".join(dcr) 273 | else: 274 | out_data["decombinator_id"] = ", ".join(dcr) 275 | out_data["rev_comp"] = "F" 276 | 277 | # CDR3-defining positions 278 | start_cdr3 = 0 279 | end_cdr3 = 0 280 | 281 | # 1. Rebuild whole nucleotide sequence from Decombinator assignment 282 | classifier_elements = dcr 283 | v = int(classifier_elements[0]) 284 | j = int(classifier_elements[1]) 285 | vdel = int(classifier_elements[2]) 286 | jdel = int(classifier_elements[3]) 287 | if inputargs["command"] == "translate": 288 | ins_nt = classifier_elements[4][1:] 289 | else: 290 | ins_nt = classifier_elements[4] 291 | 292 | # TODO remove 'split' if and when the gene names in the tag files get properly adjusted to be consistent 293 | out_data["v_call"] = v_names[v].split("*")[0] 294 | out_data["j_call"] = j_names[j].split("*")[0] 295 | 296 | if vdel == 0: 297 | v_used = v_regions[v] 298 | else: 299 | v_used = v_regions[v][:-vdel] 300 | 301 | j_used = j_regions[j][jdel:] 302 | 303 | out_data["sequence"] = "".join([v_used, ins_nt, j_used]) 304 | 305 | # 2. Translate 306 | with warnings.catch_warnings(): 307 | warnings.simplefilter("ignore", BiopythonWarning) 308 | out_data["sequence_aa"] = str(Seq(out_data["sequence"]).translate()) 309 | 310 | # 3. Check whether whole rearrangement is in frame 311 | if (len(out_data["sequence"]) - 1) % 3 == 0: 312 | out_data["productive"] = "T" 313 | out_data["vj_in_frame"] = "T" 314 | else: 315 | out_data["productive"] = "F" 316 | out_data["vj_in_frame"] = "F" 317 | 318 | # 4. Check for stop codons in the in-frame rearrangements 319 | if "*" in out_data["sequence_aa"]: 320 | out_data["productive"] = "F" 321 | out_data["stop_codon"] = "T" 322 | else: 323 | out_data["stop_codon"] = "F" 324 | 325 | # 5. Check for conserved cysteine in the V gene 326 | if ( 327 | out_data["sequence_aa"][v_translate_position[v] - 1] 328 | == v_translate_residue[v] 329 | ): 330 | start_cdr3 = v_translate_position[v] - 1 331 | out_data["conserved_c"] = "T" 332 | else: 333 | out_data["productive"] = "F" 334 | out_data["conserved_c"] = "F" 335 | 336 | # 5.5 Having found conserved cysteine, only need look downstream to find other end of CDR3 337 | downstream_c = out_data["sequence_aa"][start_cdr3:] 338 | 339 | # 6. Check for presence of FGXG motif (or equivalent) 340 | site = downstream_c[j_translate_position[j] : j_translate_position[j] + 4] 341 | 342 | if re.findall(j_translate_residue[j], site): 343 | end_cdr3 = len(downstream_c) + j_translate_position[j] + start_cdr3 + 1 344 | out_data["conserved_f"] = "T" 345 | else: 346 | out_data["productive"] = "F" 347 | out_data["conserved_f"] = "F" 348 | 349 | if out_data["productive"] == "T": 350 | out_data["junction_aa"] = out_data["sequence_aa"][start_cdr3:end_cdr3] 351 | out_data["junction"] = out_data["sequence"][ 352 | start_cdr3 * 3 : 3 * end_cdr3 353 | ] 354 | out_data["cdr1_aa"] = v_cdr1[v] 355 | out_data["cdr2_aa"] = v_cdr2[v] 356 | 357 | return out_data 358 | 359 | 360 | out_headers = [ 361 | "sequence_id", 362 | "v_call", 363 | "d_call", 364 | "j_call", 365 | "junction_aa", 366 | "duplicate_count", 367 | "sequence", 368 | "junction", 369 | "decombinator_id", 370 | "rev_comp", 371 | "productive", 372 | "sequence_aa", 373 | "cdr1_aa", 374 | "cdr2_aa", 375 | "vj_in_frame", 376 | "stop_codon", 377 | "conserved_c", 378 | "conserved_f", 379 | "sequence_alignment", 380 | "germline_alignment", 381 | "v_cigar", 382 | "d_cigar", 383 | "j_cigar", 384 | "av_UMI_cluster_size", 385 | ] 386 | 387 | 388 | def cdr3translator(inputargs: dict, data=None) -> list: 389 | """Function Wrapper for CDR3translator""" 390 | 391 | global counts 392 | counts = coll.Counter() 393 | 394 | print("Running CDR3Translator version", metadata.version("decombinator")) 395 | 396 | # Get chain information 397 | if not inputargs["chain"]: 398 | # If chain not given, try and infer from input file name 399 | chaincheck = [ 400 | x 401 | for x in ["alpha", "beta", "gamma", "delta"] 402 | if x in inputargs["infile"].lower() 403 | ] 404 | if len(chaincheck) == 1: 405 | chain = chaincheck[0][0] 406 | else: 407 | print( 408 | "TCR chain not recognised. Please choose from a/b/g/d (case-insensitive)." 409 | ) 410 | sys.exit() 411 | else: 412 | if inputargs["chain"].upper() in ["A", "ALPHA", "TRA", "TCRA"]: 413 | chain = "a" 414 | elif inputargs["chain"].upper() in ["B", "BETA", "TRB", "TCRB"]: 415 | chain = "b" 416 | elif inputargs["chain"].upper() in ["G", "GAMMA", "TRG", "TCRG"]: 417 | chain = "g" 418 | elif inputargs["chain"].upper() in ["D", "DELTA", "TRD", "TCRD"]: 419 | chain = "d" 420 | else: 421 | print( 422 | "TCR chain not recognised. Please choose from a/b/g/d (case-insensitive)." 423 | ) 424 | sys.exit() 425 | 426 | inputargs["chain"] = ( 427 | chain # Correct inputarg chain value so that import gene function gets correct input 428 | ) 429 | 430 | suffix = ".tsv" 431 | 432 | # Extract CDR3s # TODO create class object to hold globals 433 | global v_regions, j_regions, v_names, j_names, v_translate_position, v_translate_residue, j_translate_position, j_translate_residue, v_functionality, j_functionality, v_cdr1, v_cdr2 434 | ( 435 | v_regions, 436 | j_regions, 437 | v_names, 438 | j_names, 439 | v_translate_position, 440 | v_translate_residue, 441 | j_translate_position, 442 | j_translate_residue, 443 | v_functionality, 444 | j_functionality, 445 | v_cdr1, 446 | v_cdr2, 447 | ) = import_gene_information(inputargs) 448 | 449 | if inputargs["command"] == "translate": 450 | filename = inputargs["infile"] 451 | findfile(filename) 452 | if inputargs["infile"].endswith(".gz"): 453 | opener = gzip.open 454 | else: 455 | opener = open 456 | infile = opener(filename, "rt") 457 | 458 | else: 459 | infile = data 460 | 461 | counts["line_count"] = 0 462 | 463 | # Count non-productive rearrangments 464 | chainnams = {"a": "alpha", "b": "beta", "g": "gamma", "d": "delta"} 465 | 466 | print( 467 | "Translating", chainnams[chain], "chain CDR3s from", inputargs["infile"] 468 | ) 469 | 470 | filename_id = os.path.basename(inputargs["infile"]).split(".")[0] 471 | outfilename = filename_id + suffix 472 | 473 | out_data = [] 474 | 475 | for line in infile: 476 | 477 | counts["line_count"] += 1 478 | if inputargs["command"] == "translate": 479 | tcr_data = line.rstrip().split(",") 480 | tcr_data[5] = int(tcr_data[5]) 481 | tcr_data[6] = int(tcr_data[6]) 482 | in_dcr = tcr_data[:5] 483 | else: 484 | tcr_data = line 485 | in_dcr = tcr_data[:5] 486 | v = int(tcr_data[0]) 487 | j = int(tcr_data[1]) 488 | 489 | if inputargs["nobarcoding"]: 490 | use_freq = False 491 | frequency = 1 492 | av_UMI_cluster_size = "" 493 | 494 | else: 495 | if isinstance(tcr_data[5], int): 496 | frequency = tcr_data[5] 497 | else: 498 | print( 499 | "TCR frequency could not be detected. If using non-barcoded data," 500 | " please include the additional '-nbc' argument when running" 501 | " CDR3translator." 502 | ) 503 | sys.exit() 504 | 505 | if isinstance(tcr_data[6], (int, float)): 506 | av_UMI_cluster_size = tcr_data[6] 507 | else: 508 | av_UMI_cluster_size = "" 509 | 510 | cdr3_data = get_cdr3(in_dcr, out_headers, inputargs) 511 | cdr3_data["sequence_id"] = str(counts["line_count"]) 512 | 513 | cdr3_data["duplicate_count"] = frequency 514 | cdr3_data["av_UMI_cluster_size"] = av_UMI_cluster_size 515 | 516 | if cdr3_data["productive"] == "T": 517 | counts["prod_recomb"] += 1 518 | productivity = "P" 519 | out_data.append([cdr3_data[x] for x in out_headers]) 520 | else: 521 | productivity = "NP" 522 | counts["NP_count"] += 1 523 | if not inputargs["nonproductivefilter"]: 524 | out_data.append([cdr3_data[x] for x in out_headers]) 525 | 526 | # Count the number of number of each type of gene functionality (by IMGT definitions, based on prototypic) 527 | if inputargs["tags"] == "extended" and inputargs["species"] == "human": 528 | counts[productivity + "_" + "V-" + v_functionality[v]] += 1 529 | counts[productivity + "_" + "J-" + j_functionality[j]] += 1 530 | 531 | out_df = pd.DataFrame(out_data, columns=out_headers) 532 | 533 | print("CDR3 data written to dataframe") 534 | 535 | # Write data to summary file 536 | if not inputargs["suppresssummary"]: 537 | 538 | logpath = inputargs["outpath"] + f"Logs{os.sep}" 539 | 540 | # Check for directory and make summary file 541 | if not os.path.exists(logpath): 542 | os.makedirs(logpath) 543 | date = strftime("%Y_%m_%d") 544 | 545 | # Check for existing date-stamped file 546 | summaryname = ( 547 | logpath 548 | + date 549 | + "_" 550 | + "dcr_" 551 | + filename_id 552 | + f"_{chainnams[chain]}" 553 | + "_CDR3_Translation_Summary.csv" 554 | ) 555 | if not os.path.exists(summaryname): 556 | summaryfile = open(summaryname, "wt") 557 | else: 558 | # If one exists, start an incremental day stamp 559 | for i in range(2, 10000): 560 | summaryname = ( 561 | logpath 562 | + date 563 | + "_" 564 | + "dcr_" 565 | + filename_id 566 | + f"_{chainnams[chain]}" 567 | + "_CDR3_Translation_Summary" 568 | + str(i) 569 | + ".csv" 570 | ) 571 | if not os.path.exists(summaryname): 572 | summaryfile = open(summaryname, "wt") 573 | break 574 | 575 | inout_name = ( 576 | "_".join(f"{filename_id}".split("_")[:-1]) + f"_{chainnams[chain]}" 577 | ) 578 | 579 | # Generate string to write to summary file 580 | summstr = ( 581 | "Property,Value\nDirectory," 582 | + os.getcwd() 583 | + "\nInputFile," 584 | + inout_name 585 | + "\nOutputFile," 586 | + inout_name 587 | + "\nDateFinished," 588 | + date 589 | + "\nTimeFinished," 590 | + strftime("%H:%M:%S") 591 | + "\n\nInputArguments:,\n" 592 | ) 593 | for s in ["species", "chain", "tags", "dontgzip"]: 594 | summstr = summstr + s + "," + str(inputargs[s]) + "\n" 595 | 596 | summstr = ( 597 | summstr 598 | + "\nNumberUniqueDCRsInput," 599 | + str(counts["line_count"]) 600 | + "\nNumberUniqueDCRsProductive," 601 | + str(counts["prod_recomb"]) 602 | + "\nNumberUniqueDCRsNonProductive," 603 | + str(counts["NP_count"]) 604 | ) 605 | 606 | if inputargs["tags"] == "extended" and inputargs["species"] == "human": 607 | summstr = summstr + "\n\nFunctionalityOfGermlineGenesUsed," 608 | for p in ["P", "NP"]: 609 | for g in ["V", "J"]: 610 | for f in ["F", "ORF", "P"]: 611 | target = p + "_" + g + "-" + f 612 | summstr = ( 613 | summstr + "\n" + target + "," + str(counts[target]) 614 | ) 615 | 616 | print(summstr, file=summaryfile) 617 | summaryfile.close() 618 | sort_permissions(summaryname) 619 | 620 | del counts 621 | return out_df 622 | 623 | 624 | if __name__ == "__main__": 625 | print( 626 | "Calling CDR3translator from the shell has been depreciated as of Decombinator V4.3. \ 627 | Please check the README on how to update your script, or alternativley change branch to decombinator_v4.2 \ 628 | which retains this functionality." 629 | ) 630 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/innate2adaptive/decombinator/113203ebf61d949b61fdec717e14ea736fe71ef9/tests/__init__.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import pytest 3 | 4 | 5 | @pytest.fixture(scope="session") 6 | def resource_location() -> pathlib.Path: 7 | return pathlib.Path("tests/resources") 8 | 9 | 10 | @pytest.fixture(scope="session") 11 | def chain_name() -> dict: 12 | return {"a": "alpha", "b": "beta"} 13 | -------------------------------------------------------------------------------- /tests/resources/2023_01_31_alpha_TINY_1_Decombinator_Summary.csv: -------------------------------------------------------------------------------- 1 | Property,Value 2 | Directory,/home/yutanagano/Projects/Decombinator-Test-Data 3 | InputFile,TINY_1.fq.gz 4 | OutputFile,dcr_TINY_1_alpha.n12.gz 5 | DateFinished,2023_01_31 6 | TimeFinished,14:59:14 7 | TimeTaken(Seconds),0.01 8 | 9 | InputArguments:, 10 | species,human 11 | chain,a 12 | extension,n12 13 | tags,extended 14 | dontgzip,False 15 | allowNs,False 16 | orientation,reverse 17 | lenthreshold,130 18 | bc_read,R2 19 | bclength,42 20 | 21 | NumberReadsInput,100 22 | NumberReadsDecombined,35 23 | PercentReadsDecombined,0.35 24 | 25 | ReadsAssignedUsingHalfTags:, 26 | V1error,0 27 | V2error,1 28 | J1error,0 29 | J2error,0 30 | 31 | ReadsFilteredOut:, 32 | AmbiguousBaseCall(DCR),0 33 | AmbiguousBaseCall(Barcode),0 34 | OverlongInterTagSeq,0 35 | ImpossibleDeletions,0 36 | OverlappingTagBoundaries,0 37 | 38 | ReadsFailedAssignment:, 39 | MultipleVtagMatches,0 40 | VTagAtEndRead,0 41 | VDeletionsUndetermined,0 42 | FoundV1HalfTagNotV2,7 43 | FoundV2HalfTagNotV1,9 44 | NoVDetected,49 45 | MultipleJTagMatches,0 46 | JDeletionsUndermined,0 47 | FoundJ1HalfTagNotJ2,0 48 | FoundJ2HalfTagNotJ1,0 49 | NoJDetected,0 50 | -------------------------------------------------------------------------------- /tests/resources/2023_01_31_beta_TINY_1_Decombinator_Summary.csv: -------------------------------------------------------------------------------- 1 | Property,Value 2 | Directory,/home/yutanagano/Projects/Decombinator-Test-Data 3 | InputFile,TINY_1.fq.gz 4 | OutputFile,dcr_TINY_1_beta.n12.gz 5 | DateFinished,2023_01_31 6 | TimeFinished,14:59:16 7 | TimeTaken(Seconds),0.01 8 | 9 | InputArguments:, 10 | species,human 11 | chain,b 12 | extension,n12 13 | tags,extended 14 | dontgzip,False 15 | allowNs,False 16 | orientation,reverse 17 | lenthreshold,130 18 | bc_read,R2 19 | bclength,42 20 | 21 | NumberReadsInput,100 22 | NumberReadsDecombined,42 23 | PercentReadsDecombined,0.42 24 | 25 | ReadsAssignedUsingHalfTags:, 26 | V1error,1 27 | V2error,1 28 | J1error,0 29 | J2error,0 30 | 31 | ReadsFilteredOut:, 32 | AmbiguousBaseCall(DCR),0 33 | AmbiguousBaseCall(Barcode),0 34 | OverlongInterTagSeq,0 35 | ImpossibleDeletions,0 36 | OverlappingTagBoundaries,0 37 | 38 | ReadsFailedAssignment:, 39 | MultipleVtagMatches,0 40 | VTagAtEndRead,0 41 | VDeletionsUndetermined,0 42 | FoundV1HalfTagNotV2,1 43 | FoundV2HalfTagNotV1,2 44 | NoVDetected,55 45 | MultipleJTagMatches,0 46 | JDeletionsUndermined,0 47 | FoundJ1HalfTagNotJ2,0 48 | FoundJ2HalfTagNotJ1,0 49 | NoJDetected,0 50 | -------------------------------------------------------------------------------- /tests/resources/2023_01_31_dcr_TINY_1_alpha_CDR3_Translation_Summary.csv: -------------------------------------------------------------------------------- 1 | Property,Value 2 | Directory,/home/yutanagano/Projects/Decombinator-Test-Data 3 | InputFile,dcr_TINY_1_alpha.freq.gz 4 | OutputFile,dcr_TINY_1_alpha.tsv.gz 5 | DateFinished,2023_01_31 6 | TimeFinished,14:59:17 7 | 8 | InputArguments:, 9 | species,human 10 | chain,a 11 | tags,extended 12 | dontgzip,False 13 | 14 | NumberUniqueDCRsInput,26 15 | NumberUniqueDCRsProductive,25 16 | NumberUniqueDCRsNonProductive,1 17 | 18 | FunctionalityOfGermlineGenesUsed, 19 | P_V-F,0 20 | P_V-ORF,0 21 | P_V-P,0 22 | P_J-F,0 23 | P_J-ORF,0 24 | P_J-P,0 25 | NP_V-F,0 26 | NP_V-ORF,0 27 | NP_V-P,0 28 | NP_J-F,0 29 | NP_J-ORF,0 30 | NP_J-P,0 31 | -------------------------------------------------------------------------------- /tests/resources/2023_01_31_dcr_TINY_1_alpha_Collapsing_Summary.csv: -------------------------------------------------------------------------------- 1 | Property,Value 2 | Version,4.0.4 3 | Directory,/home/yutanagano/Projects/Decombinator-Test-Data 4 | InputFile,dcr_TINY_1_alpha.n12.gz 5 | OutputFile,dcr_TINY_1_alpha.freq.gz 6 | DateFinished,2023_01_31 7 | TimeFinished,14:59:16 8 | TimeTaken(Seconds),0.0 9 | 10 | extension,freq 11 | dontgzip,False 12 | allowNs,False 13 | dontcheckinput,False 14 | barcodeduplication,False 15 | minbcQ,20 16 | bcQbelowmin,1 17 | bcthreshold,2 18 | lenthreshold,130 19 | percentlevdist,10 20 | avgQthreshold,30 21 | positionalbarcodes,False 22 | oligo,M13 23 | 24 | InputUncollapsedDCRLines,35 25 | UniqueDCRsPassingFilters,26 26 | TotalDCRsPassingFilters,30 27 | PercentDCRPassingFilters(withbarcode),0.857 28 | UniqueDCRsPostCollapsing,26 29 | TotalDCRsPostCollapsing,26 30 | PercentUniqueDCRsKept,1.0 31 | PercentTotalDCRsKept,0.867 32 | AverageInputTCRAbundance,1.154 33 | AverageOutputTCRAbundance,1.0 34 | AverageRNAduplication,1.154 35 | 36 | BarcodeFail_ContainedNs,0 37 | BarcodeFail_SpacersNotFound,0 38 | BarcodeFail_LowQuality,5 39 | -------------------------------------------------------------------------------- /tests/resources/2023_01_31_dcr_TINY_1_beta_CDR3_Translation_Summary.csv: -------------------------------------------------------------------------------- 1 | Property,Value 2 | Directory,/home/yutanagano/Projects/Decombinator-Test-Data 3 | InputFile,dcr_TINY_1_beta.freq.gz 4 | OutputFile,dcr_TINY_1_beta.tsv.gz 5 | DateFinished,2023_01_31 6 | TimeFinished,14:59:18 7 | 8 | InputArguments:, 9 | species,human 10 | chain,b 11 | tags,extended 12 | dontgzip,False 13 | 14 | NumberUniqueDCRsInput,35 15 | NumberUniqueDCRsProductive,34 16 | NumberUniqueDCRsNonProductive,1 17 | 18 | FunctionalityOfGermlineGenesUsed, 19 | P_V-F,0 20 | P_V-ORF,0 21 | P_V-P,0 22 | P_J-F,0 23 | P_J-ORF,0 24 | P_J-P,0 25 | NP_V-F,0 26 | NP_V-ORF,0 27 | NP_V-P,0 28 | NP_J-F,0 29 | NP_J-ORF,0 30 | NP_J-P,0 31 | -------------------------------------------------------------------------------- /tests/resources/2023_01_31_dcr_TINY_1_beta_Collapsing_Summary.csv: -------------------------------------------------------------------------------- 1 | Property,Value 2 | Version,4.0.4 3 | Directory,/home/yutanagano/Projects/Decombinator-Test-Data 4 | InputFile,dcr_TINY_1_beta.n12.gz 5 | OutputFile,dcr_TINY_1_beta.freq.gz 6 | DateFinished,2023_01_31 7 | TimeFinished,14:59:16 8 | TimeTaken(Seconds),0.0 9 | 10 | extension,freq 11 | dontgzip,False 12 | allowNs,False 13 | dontcheckinput,False 14 | barcodeduplication,False 15 | minbcQ,20 16 | bcQbelowmin,1 17 | bcthreshold,2 18 | lenthreshold,130 19 | percentlevdist,10 20 | avgQthreshold,30 21 | positionalbarcodes,False 22 | oligo,M13 23 | 24 | InputUncollapsedDCRLines,42 25 | UniqueDCRsPassingFilters,35 26 | TotalDCRsPassingFilters,38 27 | PercentDCRPassingFilters(withbarcode),0.905 28 | UniqueDCRsPostCollapsing,35 29 | TotalDCRsPostCollapsing,35 30 | PercentUniqueDCRsKept,1.0 31 | PercentTotalDCRsKept,0.921 32 | AverageInputTCRAbundance,1.086 33 | AverageOutputTCRAbundance,1.0 34 | AverageRNAduplication,1.086 35 | 36 | BarcodeFail_ContainedNs,0 37 | BarcodeFail_SpacersNotFound,1 38 | BarcodeFail_LowQuality,3 39 | -------------------------------------------------------------------------------- /tests/resources/TINY_1.fq: -------------------------------------------------------------------------------- 1 | @A00261:687:HMMCCDSX5:3:1101:24948:1047 1:N:0:AACCGCGGAT+CTAGCGCTGT 2 | TTCTCTGGTGGGAACACGTTTTTCAGGTCCTCTGGAAAGGGAAGAGGTAATGGGGCTAGGGTTGCTCTAAGAGCTGTCTGGTCCTGGTAGGGGCTCTGTGTATGTGTGAGAGAGAAGGCCGGGAAAGGACCATAATGAAGCACAATTATG 3 | + 4 | FFFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFF:F:FFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 5 | @A00261:687:HMMCCDSX5:3:1101:27272:1063 1:N:0:AACCGCGGAT+CTAGCGCTGT 6 | TCTTTCGGTGGGAACACCTTGTTCAGGTCCTCTACAACGGTTAACCTGGTCCCCGAACCGAAGGTGTAGCCATAGTCCGTTGTCCCCACAATGTTACAGCTTTGTACAAAAACAGCCCCTCCCATGGGTCCGCCCCCAGAGCCTGGGAGA 7 | + 8 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 9 | @A00261:687:HMMCCDSX5:3:1101:5050:1094 1:N:0:AACCGCGGAT+CTAGCGCTGT 10 | AAAAACACGGCAGGGTCAGGGTTCTGGATATTTGGTTTAACTAGCACCCTGGTTCCTCCTCCAAAAGTTAGCTTGTTGCCTGCAGCTTTAGCCACCACACAGAGGTAGGTGGCTGAATCACTGAGCTTGGAGTCTCTGATGAGCAGGGAA 11 | + 12 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF::FFFFFFFFFF 13 | @A00261:687:HMMCCDSX5:3:1101:18331:1110 1:N:0:ACCCGCGGAT+CTAGCGCTGT 14 | TTATTTACGGCAGGGTCAGGGTTCTGGATATCAGGTAAAACAGTCAATTGTGTCCCAGATCCAAAGGTCAGTTGCCTTGCAGAACCAGAAAAGGCGCCCCTGACGATGCAATAGTACACAGCAGTGTCTCTCAGCGTAGCGTGGGGCAGG 15 | + 16 | FFFFFFFFFFFFFFFFFFF:FF:FFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFF:FFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFF 17 | @A00261:687:HMMCCDSX5:3:1101:8684:1125 1:N:0:AACCGCGGAT+CTAGCGCTGT 18 | CGCGGTGGTGGGAACACGTTTTTCAGGTCCTCGAGCACCAGGAGCCGCGTGCCTGGCCCGAAGTACTGGGTCTCTTGGGTCCCCGCGGGGGTAGTAGCACTGCAGATGTAGAAGCTGCTGTCTTCAGGATGGGCACTGGTCACTGTCAGA 19 | + 20 | FFFFFFFFFFFFF:FFFFF,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFFF,FFFFFFFFFFFFFFFF::FFFFFFFFFFFFFFF:FFF,:FFFFFF:FFFF:FFFFF:FFFFFFFFFFFFFFF:FF 21 | @A00261:687:HMMCCDSX5:4:1101:8684:1125 1:N:0:AACCGCGGAT+CTAGCGCTGT 22 | CGCGGTGGTGGGAACACGTTTTTCAGGTCCTCGAGCACCAGGAGCCGCGTGCCTGGCCCGAAGTACTGGGTCTCTTGGGTCCCCGCGGGGGTAGTAGCACTGCAGATGTAGAAGCTGCTGTCTTCAGGATGGGCACTGGTCACTGTCAGA 23 | + 24 | FFFFFFFFFFFFF:FFFFF,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFFF,FFFFFFFFFFFFFFFF::FFFFFFFFFFFFFFF:FFF,:FFFFFF:FFFF:FFFFF:FFFFFFFFFFFFFFF:FF 25 | @A00261:687:HMMCCDSX5:5:1101:8684:1125 1:N:0:AACCGCGGAT+CTAGCGCTGT 26 | CGCGGTGGTGGGAACACGTTTTTCAGGTCCTCGAGCACCAGGAGCCGCGTGCCTGGCCCGAAGTACTGGGTCTCTTGGGTCTCCGCGGGGGTAGTAGCACTGCAGATGTAGAAGCTGCTGTCTTCAGGATGGGCACTGGTCACTGTCAGA 27 | + 28 | FFFFFFFFFFFFF:FFFFF,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFFF,FFFFFFFFFFFFFFFF::FFFFFFFFFFFFFFF:FFF,:FFFFFF:FFFF:FFFFF:FFFFFFFFFFFFFFF:FF 29 | @A00261:687:HMMCCDSX5:5:1101:8684:1125 1:N:0:AACCGCGGAT+CTAGCGCTGT 30 | CGCGGTGGTGGGAACACGTTTTTCAGGTCCTCGAGCACCAGGAGCCGCGTGCCTGGCCCGAAGTACTGGGTCTCTTGGGTCTTCGCGGGGGTAGTAGCACTGCAGATGTAGAAGCTGCTGTCTTCAGGATGGGCACTGGTCACTGTCAGA 31 | + 32 | FFFFFFFFFFFFF:FFFFF,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFFF,FFFFFFFFFFFFFFFF::FFFFFFFFFFFFFFF:FFF,:FFFFFF:FFFF:FFFFF:FFFFFFFFFFFFFFF:FF 33 | @A00261:687:HMMCCDSX5:5:1101:8684:1125 1:N:0:AACCGCGGAT+CTAGCGCTGT 34 | CGCGGTGGTGGGAACACGTTTTTCAGGTCCTCGAGCACCAGGAGCCGCGTGCCTGGCCCGAAGTACTGGGTCTCTTGGGTCTTTGCGGGGGTAGTAGCACTGCAGATGTAGAAGCTGCTGTCTTCAGGATGGGCACTGGTCACTGTCAGA 35 | + 36 | FFFFFFFFFFFFF:FFFFF,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFFF,FFFFFFFFFFFFFFFF::FFFFFFFFFFFFFFF:FFF,:FFFFFF:FFFF:FFFFF:FFFFFFFFFFFFFFF:FF 37 | @A00261:687:HMMCCDSX5:6:1101:8684:1125 1:N:0:AACCGCGGAT+CTAGCGCTGT 38 | CGCGGTGGTGGGAACACGTTTTTCAGGTCCTCGAGCACCAGGAGCCGCGTGCCTGGCCCGAAGTACTGGGTCTCTTGGGTCCCCGCGGGGGTAGTAGCACTGCAGATGTAGAAGCTGCTGTCTTCAGGATGGGCACTGGTCACTGTCAGA 39 | + 40 | FFFFFFFFFFFFF:FFFFF,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFFF,FFFFFFFFFFFFFFFF::FFFFFFFFFFFFFFF:FFF,:FFFFFF:FFFF:FFFFF:FFFFFFFFFFFFFFF:FF 41 | @A00261:687:HMMCCDSX5:7:1101:8684:1125 1:N:0:AACCGCGGAT+CTAGCGCTGT 42 | CGCGGTGGTGGGAACACGTTTTTCAGGTCCTCGAGCACCAGGAGCCGCGTGCCTGGCCCGAAGTACTGGGTCTCTTGGGGGGGTAGGTCCCCGCTAGCACTGCAGATGTAGAAGCTGCTGTCTTCAGGATGGGCACTGGTCACTGTCAGA 43 | + 44 | FFFFFFFFFFFFF:FFFFF,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFFF,FFFFFFFFFFFFFFFF::FFFFFFFFFFFFFFF:FFF,:FFFFFF:FFFF:FFFFF:FFFFFFFFFFFFFFF:FF 45 | @A00261:687:HMMCCDSX5:3:1101:29604:1125 1:N:0:AACCGCGGAT+CTAGCGCTGT 46 | AAAACCGGTGGGAACACCTTGTTCAGGTCCTCTACAACTGTGAGTCTGGTGCCTTGTCCAAAGAAAGCTTCAGCTGTCCCTCCGCTGCTGGCACAGAGATACATGGCCGAGTCCCCCTGCTCTGTGCGCTGGATCTCCAAGGTGGAGAAA 47 | + 48 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 49 | @A00261:687:HMMCCDSX5:3:1101:17300:1141 1:N:0:AACCGCGGAT+CTAGCGCTGT 50 | CCCCAAGGTGGGAACACGTTTTTCAGGTCCTCTAGCACGGTGAGCCGTGTCCCTGGCCCGAAGAACTGCTCAGTGTAGGAGTATTCCGCTAGGCTAGCACTGCAGATGTAGAAGCTGCTGTCTTCAGGATGGGCACTGGTCACTGTCAGA 51 | + 52 | FFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF::FF,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFF:FF:FFFFFFFFFFFFFFFFFFFFFFFFFFFF 53 | @A00261:687:HMMCCDSX5:3:1101:26793:1141 1:N:0:AACCGCGGAT+CTAGCGCTGT 54 | AAAGGCGGTGGGAACACGTTTTTCAGGTCCTCTAGCACGGTGAGCCGTGTCCCTGGCCCGAAGAACTGCTCATTGTAGGAGAACCCCCCGTGAGGGCTGCTGGCACAGAAATAAACTCCAGAATCCTCCAGTTCTGCAGGCTGCACCTTC 55 | + 56 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFF 57 | @A00261:687:HMMCCDSX5:3:1101:28854:1141 1:N:0:AACCGCGGAT+CTAGCGCTGT 58 | AAAGAAGGTGGGAACACGTTTTTCAGGTCCTCCAGTACGGTCAGCCTAGAGCCTTCTCCAAAAAACAGCTCCCCTCACCCGCTAGTCCCGGCTGCTGGCGCAGAAATATACAGATGTCGGGGAGGAGGCAGCAGACTCCAGAGTGAGGGG 59 | + 60 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFF:FFFFF,:,,FF:FF,,,,:FF,,:FF,FFFFFF,:F:,FFFFF:,,,FF,FFFF 61 | @A00261:687:HMMCCDSX5:3:1101:22480:1157 1:N:0:CACCGCGGAT+CTAGCGCTGT 62 | AGAGGAGGTGGGAACACGTTTTTCAGGTCCTCTGTGACCGTGAGCCTGGTGCCCGGCCCGAAGTACTGCTCGTAGGAGACCCCTGGCTCCAAGCTGCTGGCGCAAAGATAAAGGGCCGAGTCCCCCAGCTCCAAGGTGCTCACATTCATC 63 | + 64 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFF::FFFFF 65 | @A00261:687:HMMCCDSX5:3:1101:28926:1172 1:N:0:CACCGCGGAT+CTAGCGCTGT 66 | AGTCGTGGTGGGAACACGTTTTTCAGGTCCTCGAGCACTGTCAGCCGGGTGCCTGGGCCAAAATACTGCGTATCTGTGGAAAAGGTGCCGCTAGGACTGCTGGCACAGAGATACAGGGCCGAGTCTTCTGGCTGCAGGGTGTGTAGGTGA 67 | + 68 | ,FFFF:FFFFFFFFFFFFFFFFFF:FF:FFF:FFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFF:FFFFFF 69 | @A00261:687:HMMCCDSX5:3:1101:28592:1188 1:N:0:AACCGCGGAT+CTAGCGCTGT 70 | AGAAAAACGGCAGGGTCAGGGTTCTGGATATCAGAAGAGATTAACAGTTCCACCATAGGAATAGTTGCATCCCAACTCCAGTGAGGCTGCTTCAGAGAGAATTACGTGGTGGTTATGCTGGCTCACAGACTGGGCTCTGGCATCTCTCAG 71 | + 72 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:F:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFF 73 | @A00261:687:HMMCCDSX5:3:1101:4417:1219 1:N:0:AACCGCGGAT+CTAGCGCTGT 74 | AAAAACACGGCAGGGTCAGGGTTCTGGATATTTGGTTTAACTAGCACCCTGGTTCCTCCTCCAAAAGTTAGCTTGTTGCCTGCAGCTTTAGCCACCACACAGAGGTAGGTGGCTGAATCACTGAGCTTGGAGTCTCTGATGAGCAGGGAA 75 | + 76 | FFFFFFFFFFFFFFFFFFFFFFFF:FFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFF, 77 | @A00261:687:HMMCCDSX5:3:1101:3794:1235 1:N:0:ACCCGCGGAT+CTAGCGCTGT 78 | CTCGGTACGGCAGGGTCAGGGTTCTGGATATTCGGATTTACTGCCAGGCTTGTTCCCAATCCCCAAATCAGCTTACGGTTGTTGCCAGCATTTAAGGTCTCCACAGCACAGAGGTAGATGGCCGAGTCTCCGGTCTGGGTGGCTGTGATG 79 | + 80 | FFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFF:FFFFF,FFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,::FFF:FFF:FFFF:F 81 | @A00261:687:HMMCCDSX5:3:1101:23737:1266 1:N:0:AACCGCGGAT+CGAGCGCTGT 82 | CCTCACGGTGGGAACACCTTGTTCAGGTCCTCCAAGACAGAGAGCTGGGTTCCACTGCCAAAAAACAGTTTTTCACTAACTGCCCCAAAACCCCTAGCACTGCAGATGTAGAAGCTGCTGTCTTCAGGATGGGCACTGGTCACTGTCAGA 83 | + 84 | FFFFFFFF,FFFF:FFFF,FF,:F,FF,F:FFFFFFFF,FFFFFFFFFFF:F:,F,FFFF,F::F::FF::::FFFF:FF:FFFFFF:FFFFFF::FF:FFFFFFF:FF,FF:FF:FF:F:F,FFFFF::FF:F,FFFF:F:FFF:F:FF 85 | @A00261:687:HMMCCDSX5:3:1101:29966:1282 1:N:0:AACCGCGGAT+CTAGCGCTGT 86 | TTTGCCGGTGGGAACACGTTTTTCAGGTCCTCTGTGACCGTGAGCCTGGTGCCCGGCCCGAAGTACTGCTCGTAGGAGCTGCCCCTTTCAACGCTGCAGAGATATATGCTGCTGTCTTCAGGGCTCATGTTGCTCACAGTCAGAGTTGAG 87 | + 88 | FFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFF 89 | @A00261:687:HMMCCDSX5:3:1101:20627:1297 1:N:0:ACCCGCGGAT+CTAGCGCTGT 90 | CGCGTGGTGGGAACACGTTTTTCAGGTCCTCCAGCACGGTCAGCCTGCTGCCGGCCCCGAAAGTCAGGACGTTGGCCCCAGCATCGGTGCTGCTGGCACAGAGATACAGGGCCGAGTCGTCCAGCTCCAAGGCGTTCACATTCAGCTCAG 91 | + 92 | FFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFF 93 | @A00261:687:HMMCCDSX5:3:1101:19027:1313 1:N:0:AACCGCGGAT+CTAGCGCTGT 94 | CGTGACACGGCAGGGTCAGGGTTCTGGATATTGGGTATGATGGTGAGTCTTGTTCCAGTCCCAAAGGTTAATTTCTCATTTCAAACAGCACAGAGGTAGGTGGCTGAGTCACCAGGCTGAGAAGCTGCAATGTATAAAGTACTACGTCCT 95 | + 96 | FFFFFFFFFFFFFFFFFFFFFFFF:FF:FF:FFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFF 97 | @A00261:687:HMMCCDSX5:3:1101:6876:1344 1:N:0:AACCGCGGAT+CTAGCGCTGT 98 | TTATCTACGGCAGGGTCAGGGTTCTGGATATTCGGATTTACTGCCAGGCTTGTTCCCAATCCCCAAATCAGCTTACGGTTGTTGCCAGCATTACACAGTCATAGAAAGCTTTACCAAAACCGACCAGGCCAATGTGTATCAGGAAATCCC 99 | + 100 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFF 101 | @A00261:687:HMMCCDSX5:3:1101:29170:1344 1:N:0:ACCCGCGGAT+CTAGCGCTGT 102 | TTTTTTACGGCAGGGTCAGGGTTCTGGATATCTGGCTTTATAATTAGCTTGGTCCCAGCGCCCCAGATTAACTGATAGTTGCTATCCATCACAGCACAGAGGTAAGAGGCAGAGTCTTTCATCTGGAGCTCCTTCAAAAGGAGGTAACTG 103 | + 104 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFF:FFFFFFFF:FFFFFFFFF,,FFFFFFF,FFFFFF,FFF,FFFFFFFFFFFFFF:F,FF:FFFFFF:F 105 | @A00261:687:HMMCCDSX5:3:1101:21260:1360 1:N:0:AACCGCGGAT+CTAGCGCTGT 106 | ATTTTTACGGCAGGGTCAGGGTTCTGGATATTTGCAATCACAGAAAGTCTTGTGCCCTTTCCAAAGACAAGAGGTGTGTTTCCTCCGAGTCGTCTCACAGCACAGAGGTAAGAGGCAGAGTCTTTCATCTGGAGCTCCTTCAAAAGGAGG 107 | + 108 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFF 109 | @A00261:687:HMMCCDSX5:3:1101:18611:1376 1:N:0:ACCCGCGGAT+CTAGCGCTGT 110 | CTTTTTGGTGGGAACACGTTTTTCAGGTCCTCGAGCACTGTCAGCCGGGTGCCTGGGCCAAAATACTGCGTATCTGTGCTCACAGCCTGGAGGCCCAGGACAAAAACCGGTGCTGCCCGCTCTGCTGTCCCAGACTCAGCTCGGGTCCTT 111 | + 112 | :FFFFFFFF,,FFF:FFF:F:FFFF,,,F:FF::FF:,:F:FFF,,,F,F,,F,::FFFFFFF:FF:FF,FFF:FFF,FFFFFF:FFFF,FF,FFFF:,FFFFFFFFFF:FFFFF:FFFFFFFFFF,:F:,F:F:FFFF:FF,FFFFF:F 113 | @A00261:687:HMMCCDSX5:3:1101:7446:1391 1:N:0:AACCGCGGAT+CTAGCGCTGT 114 | CCCCCTGGTGGGAAGGAGGGCCCGTTGGGAGGCCCAGCGGGCAGGAGGAACGGCTACCGAGGCTCCAGCTTAACGGTATTTGGAGGTCAGCACGGTGCTCACAGAAGCCAGGAACTTGTCCAGGGAGGCGTGCACCGCAGGGGTGAACTC 115 | + 116 | ,FF,F:FFFFF:F,FFFFFFFFFF:FFFFFFFFF::FFFF:FF:FF:FFFF:FFF:FFFF,FFFFFFF:FF:F,FFF:FF:FFF::FFFFFFFFF:FFF::FFFFFFFFFFF::F:FF:FFF:FF,FF:FFFFFFFFFFF:F:F:FFFFF 117 | @A00261:687:HMMCCDSX5:3:1101:8024:1391 1:N:0:AACCGCGGAT+CTAGCGCTGT 118 | ATAAGTGGTGGGAACACCTTGTTCAGGTCCTCGAGCACTGTCAGCCGGGTGCCTGGGCCAAAATACTGCGTATCTGCCCCCCCGAGTTGGCGGCTGGGGCCGAGATAAAGGGCTGTGTGTTCTTGCTGCAGGGCGGGTAGGTGGTGGTTT 119 | + 120 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFF,,,,,:F,:,,:,,:,,,:,,F,,,,,:,,F,:,F,F,,,,FF,F,F:,,F,:,,:::,F 121 | @A00261:687:HMMCCDSX5:3:1101:24641:1391 1:N:0:ACCCGCGGAT+CTAGCGCGGT 122 | TGTGCAGGTGGGAACACCTTGTTCAGGTCCTCTACAACTGTGAGTCTGGTGCCTTGTCCATAGAAAGCTTCAGTGGTCCCCCTGCTGCTGGCACAGAGATACATGGCCGAGTCCCCCTGCTCTGTGCGCTGGATCTCCAAGGTGGAGAAA 123 | + 124 | F:FFFFFF::FFFFFFF:FFF:FFF,FFFFF:FFFFFFF:F:FFF,FFFFFFFFFF,FFF,:F:F:FF:FF,FFFF::FF,FFF:FFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFF:F:FFFFFFFFFF:FFF,F,FFFFFFFFF: 125 | @A00261:687:HMMCCDSX5:3:1101:26585:1407 1:N:0:AACCGCGGAT+CTAGCGCTGT 126 | TTTGTTACGGCAGGGTCAGGGTTCTGGATATCAGGCCTGACCAGCAGTCTGGTCCCACTCCCGAAGATCAATTTATAGCTGCTACCCCCGCCCTCTCTCATTGCACAGAAGTACATTGCGGAGTCCCCCAGTTGTGAAGCGGAGATGACA 127 | + 128 | :FFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFF:FF:FFFFFF,FFFFFFFFFFF:FFFFFFFFFFFFFFFFF, 129 | @A00261:687:HMMCCDSX5:3:1101:30653:1407 1:N:0:AACCGCGGAT+CTACCGCTGT 130 | CAATCAACGGCAGGGTCAGGGTTCTGGATATTGGGTTTCACAGATAACTCCGTTCCCTGTCCGAAGATAAGCTTTCCTCCCTGGTTCATGCTTGCTGCACAGAAGTAGGTGGCTGAGTCTCCAGGCTGGGAATCCATGATATGCAATGAG 131 | + 132 | FFFFFFFFFFFFFFFFFFFFFFFF:FFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:F:FFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFF:FFFFF 133 | @A00261:687:HMMCCDSX5:3:1101:5638:1423 1:N:0:CACCGCGGAT+CTAGCGCTGT 134 | AATCTTGGTGGGAACACGTTTTTCAGGTCCTCTGTGACCGTGAGCCTGGTGCCCGGCCCGAAGTACTGCTCGTAGCTCCCGCTAGTCGTCTCGGTGCTGGCACAGAAGTACACAGATGTTTGGGAGGGAGCAGCCGACTCCAACCCCAGC 135 | + 136 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 137 | @A00261:687:HMMCCDSX5:3:1101:24713:1423 1:N:0:AACCGCGGAT+CTAGCGCTGT 138 | TTTTGTGGTGGGAACACCTTGTTCAGGTCCTCGAGCACCAGGAGCCGCGTGCCTGGCCCGAAGTACTGGGTCTCGCTCGGCTTCCCTCCCGTCCGTTTGCTGGCACAGAAGTACATGGCTGAGTCCTCCAGCTTTGTGGACCGGATCTTC 139 | + 140 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFF:F:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 141 | @A00261:687:HMMCCDSX5:3:1101:24379:1438 1:N:0:AACCGCGGAT+CTAGCGCTGT 142 | CGACGTGGTGGGAACACCTTGTTCAGGTCCTCTACAACTGTGAGTCTGGTGCCTTGTCCAAAGAAAGCTTCAGTGGCCCCCTGTTCCCAGGCACAGAGATAGAAGCCAGAGTCACTGAGAAGGAGCTTCTTAGAACTCAGGATGAACTGC 143 | + 144 | FFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFF 145 | @A00261:687:HMMCCDSX5:3:1101:30309:1438 1:N:0:ACCCGCGGAT+CTAGCGCTGT 146 | TTTTATACGGCAGGGTCAGGGTTCTGGATATAGGGCAGCACGGACAATCTGGTTCCGGGACCAAAGACAAAATTCTGACCATAGTTCCGGCTCACAACACAGAAGTACTCAGCCGCGTCGCTCATATGGGCTGAGGGTTTCGTCAGGTGG 147 | + 148 | FFFFFFFFFFFFFFFFFF:FFFFFFFFFF:FFF,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFF:FFFFF::FFFFFFFFFFFFFFFFFFFFFFFFFFF 149 | @A00261:687:HMMCCDSX5:3:1101:8133:1454 1:N:0:AACCGCGGAT+CTAGCGCTGT 150 | CACACCGGTGGGAACACGTTTTTCAGGTCCTCTAGCACGGTGAGCCGTGTCCCTGGCCCGAAGAACTGCTCATTGTAATTAAACACCCGCTGCCCATCTAAGCTGCTGGCACAGAGATACACGGCCGAGTCCTCAAGCTTTGCAGGTTGG 151 | + 152 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 153 | @A00261:687:HMMCCDSX5:3:1101:24532:1454 1:N:0:AACCGCGGAT+CTAGCGCTGT 154 | AGTCTTACGGCAGGGTCAGGGTTCTGGATATTTGGTTTAACTAGCACCCTGGTTCCTCCTCCAAAAGTTAGCTTGTTGCCTGCAGCTTTGATCCCCCTCGCCCGATCCTCCACAGCACAGTAGTAAACAGCAGCATCTGCCTCCCGCACC 155 | + 156 | FFFFFFFFFFFFFFFFFFFFFFFFFFFF:F,FFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFF:FFFFFF:FF:FFFFFF 157 | @A00261:687:HMMCCDSX5:3:1101:28890:1454 1:N:0:AACCGCGGAT+CTAGCGCTGT 158 | TGAGGTACGGCAGGGTCAGGGTTCTGGATATTTGGTTTTACTGTCAGTCTGGTCCCTGCTCCAAAGCGCATGTCATTGAGCTCACTCAGAGCACAGAAGTACACCGCTGAGTCTGACACTTGAACTGAGCCTTTCTCCAAGTGGAAAGAA 159 | + 160 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 161 | @A00261:687:HMMCCDSX5:3:1101:4869:1470 1:N:0:AACCGCGGAT+CTAGCGCTGT 162 | CATGGTGGTGGGAACACCTTGTTCAGGTCCTCCAAGACAGAGAGCTGGGTTCCACTGCCAAAAAACAGTTCCTTTGCTGTCCCCCAGATTAAGCTGCTGGCACAGAGATACACGGCCGAGTCCTCCTGCTGTGTGCGCTGGATCGTCAGA 163 | + 164 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 165 | @A00261:687:HMMCCDSX5:3:1101:24542:1470 1:N:0:AACCGCGGAT+GTAGCGCTGT 166 | AGTCTTACGGCAGGGTCAGGGTTCTGGATATTTGGTTTAACTAGCACCCTGGTTCCTCCTCCAAAAGTTAGCTTGTTGCCTGCAGCTTTGATCCCCCTCGCCCGATCCTCCACAGCACAGTAGTAAACAGCAGCATCTGCCTCCCGCACC 167 | + 168 | F,,F::FF::FF::FFFFFFFFFFFF:F,::FFF:FFFFFFFFFFFFFFFFFFFFFFF:F:FFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FF:FFFFFFFFFFF:FFFFFFFFFFFFFFF: 169 | @A00261:687:HMMCCDSX5:3:1101:6777:1485 1:N:0:AACCGCGGAT+CTAGCGCTGT 170 | AACGACGGTGGGAACACGTTTTTCAGGTCCTCTAGCACGGTGAGCCGTGTCCCTGGCCCGAAGAACTGCTCGCGGCCCCCTGCGGCTTCAACGCTGCAGAGATATATGCTGCTGTCTTCAGGGCTCATGTTGCTCACAGTCAGAGTTGAG 171 | + 172 | FFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFF,FFFF:F:FFFFFFFFFFFFFFFF 173 | @A00261:687:HMMCCDSX5:3:1101:31331:1485 1:N:0:AACCGCGGAT+CTAGCGCTGT 174 | AAAGGTGGTGGGAACACGTTTTTCAGGTCCTCGAGCACTGTCAGCCGGGTGCCTGGGCCAAAATACTGCGTATCTGTGCTGCTAGTCCTCCCCAAGCTGCTGGCACAGAGATACATGGCCGAGTCCCCCTGCTCTGTGCGCTGGATCTCC 175 | + 176 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFF,:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 177 | @A00261:687:HMMCCDSX5:3:1101:18629:1501 1:N:0:AACCGCGGAT+CTAGCGCTGT 178 | AGAGAAGGTGGGAACACGTTTTTCAGGTCCTCCAGCACGGTCAGCCTGCTGCCGGCCCCGAAAGTCAGGACGTTGGCCCCAGCCTGGAGAGCAACGCTGCAGAGATATATGCTGCTGTCTTCAGGGCTCATGTTGCTCACAGTCAGAGTT 179 | + 180 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,FF:FFFFFFFFFFF:F 181 | @A00261:687:HMMCCDSX5:3:1101:31448:1501 1:N:0:ACCCGCGGAT+CTAGCGCTGT 182 | AAAGGTGGTGGGAACACGTTTTTCAGGTCCTCGAGCACTGTCAGCCGGGTGCCTGGGCCAAAATACTGCGTATCTGTGCTGCTAGTCCTCCCCAAGCTGCTGGCACAGAGATACATGGCCGAGTCCCCCTGCTCTGTGCGCTGGATCTCC 183 | + 184 | F,FFFFF:FFFFF,FFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFF 185 | @A00261:687:HMMCCDSX5:3:1101:1768:1517 1:N:0:AACTGCGGAT+CTAGCGCTGT 186 | ATTGAAGGTGGGAACACGTTTTTCAGGTCCTCTGTGACCGTGAGCCTGGTGCCCGGCCCGAAGTACTGCTCGTAGTCTCCCGCTAGTCGAGCACTGCAGATGTAGAAGCTGCTGTCTTCAGGATGGGCACTGGTCACTGTCAGAGTGGAC 187 | + 188 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFF,FFFF 189 | @A00261:687:HMMCCDSX5:3:1101:9607:1532 1:N:0:AACCGCGGAT+CTAGCGCTGT 190 | CTCGCTGGTGGGAACACGTTTTTCAGGTCCTCTAGCACGGTGAGCCGTGTCCCTGGCCCGAAGAACTGCTCATTGTAGGCTCCTGTCCCGGGACTGCTGGCACAGAAGTACACAGATGTCTGGGAGGGAGCAGCCGACAGCAGCCTGAGC 191 | + 192 | FFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:F:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 193 | @A00261:687:HMMCCDSX5:3:1101:10854:1532 1:N:0:ACCCGCGGAT+CTAGCGCTGT 194 | CAAACAACGGCAGGGTCAGGGTTCTGGATATTTGGTATGACCGAGAGTTTGGTCCCCTTCCCGAAAGTGAGTTGGTAACTCCCAGCCCCAGGGGCCGGAGGGGCACAGAGGTATGTGGCTGAGTCTTCAGGCTGGGATCCTTTGATGTAC 195 | + 196 | FFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 197 | @A00261:687:HMMCCDSX5:3:1101:16866:1548 1:N:0:AACCGCGGAT+CTAGCGCTGT 198 | AGTGTTACGGCAGGGTCAGGGTTCTGGATATGGTATGTGGCTTCAAAACCTTTGTTGCTTCCCTTGTCATCAGCCTTCGTGGCTTTCAGGAGGAGCTGTAGACCTTCTCCAGGATATTGGACATACCAGAAAAGGGAAGGGTATCCTGTG 199 | + 200 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,FF:FFFFFFFFFFFFFFFF 201 | @A00261:687:HMMCCDSX5:3:1101:26214:1548 1:N:0:AACCGCGGAT+CTAGCGCTGT 202 | ACTTGAGGTGGGAACACCTTGTTCAGGTCCTCTGGAAAGGGAAGAGGGGTTGGAGCCAGGGTTGCTCTGAGAGCTGTCTGGTTCTGGTAGGGGCTCTGTGTGTGTGAGAGAGAGCCCTGGGAATGGGCCGTAATGAAGCACAACTATGTC 203 | + 204 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFF:FFFFFFF 205 | @A00261:687:HMMCCDSX5:3:1101:30120:1548 1:N:0:AACCGCAGAT+CTAGCGCTGT 206 | CCAGTTGGTGGGAACACCTTGTTCAGGTCCTCCAAGACAGAGAGCTGGGTTCCACTGCCAAAAAACAGTTTTTCATTAGTTGCCGCGCGGGGGCTGCTGGCACAGAAATACAAAGCTGAGTCCCCCAGCTCCAGAGAGCTCAGGTTTAGT 207 | + 208 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFF 209 | @A00261:687:HMMCCDSX5:3:1101:7238:1564 1:N:0:AACCGCGGAT+CTAGCGCTGT 210 | TACTCCGGTGGGAACACGTTTTTCAGGTCCTCTAGCACGGTGAGCCGTGTCCCTGGCCCGAAGAACTGCTCATTGTGTCCCGCTAGACCCAAGCTGCTGGCACAGAGATAGAGGGCCGAGTCCCCCAGCAACAAGGCGTTCACATTCAGC 211 | + 212 | FFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFFFF:FFFFFFFFFFFFFF,FFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 213 | @A00261:687:HMMCCDSX5:3:1101:13964:1564 1:N:0:CACCGCGGAT+CTAGCGCTGT 214 | AGTGGTACGGCAGGGTCAGGGTTCTGGATATTTGGTATGACCACCACTTGGTTCCCCTTCCCAAAAGCGAGTCTGTTGTAAGGATCACCCACGAGGCAGTAGTACACAGCAGTGTCGCTCAGGGAAACCCGGGGCAGGCTCAGAGTGCTG 215 | + 216 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFFFF 217 | @A00261:687:HMMCCDSX5:3:1101:24777:1564 1:N:0:AACCGCGGAT+CTAGCGCTGT 218 | CCCCGTGGTGGGAACACGTTTTTCAGGTCCTCCAGTACGGTCAGCCTAGAGCCTTCTCCAAAAAACAGCTCCCGGCTAGTCACCTCACTGCTGGCACAGAAGTACATGGCTGAGTCCTCCAGCTTTGTGGACCGGATCTTCAGAGTGAAA 219 | + 220 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFF,FFFFFFFFFFFFFFFFF 221 | @A00261:687:HMMCCDSX5:3:1101:18195:1595 1:N:0:AACCGCGGAT+CTAGCGCTGT 222 | AGAGCAACGGCAGGGTCAGGGTTCGGGATATCAGAAGAGATTAACAGTTCCACCATAGGAATAGTTGCATCCCAACTCCAGTGAGGCTGCTTCAGAGAGAATTACGTGGTGGTTATGCTGGCTCACAGACTGGGCTCTGGCATCTCTCAG 223 | + 224 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 225 | @A00261:687:HMMCCDSX5:3:1101:18484:1626 1:N:0:AACCGCGGAT+CTAGCGCTGT 226 | AGAGAAGGTGGGAACACGTTTTTCAGGTCCTCCAGCACGGTCAGCCTGCTGCCGGCCCCGAAAGTCAGGACGTTGGCCCCAGCCTGGAGAGCAACGCTGCAGAGATATATGCTGCTGTCTTCAGGGCTCATGTTGCTCACAGTCAGAGTT 227 | + 228 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFF:,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFF 229 | @A00261:687:HMMCCDSX5:3:1101:11966:1642 1:N:0:AACCGCGGAT+CTAGCGCTGT 230 | TGCTGTGGTGGGAACACGTTTTTCAGGTCCTCTAGCACGGTGAGCCGTGTCCCTGGCCCGAAGAACTGCTCATTCCTCGGTGCTAGTCCTGGGCTGCTGGCGCAAAGATAAAGGGCCGAGTCCCCCAGCTCCAAGGTGCTCACATTCATC 231 | + 232 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 233 | @A00261:687:HMMCCDSX5:3:1101:26874:1658 1:N:0:AACCGCGGAT+CTAGCGCTGT 234 | ATATAAACGGCAGGGTCAGGGTTCTGGATATAGGCAGTTCAGGGTGACTGCTTTCCTCACTGGCATGGATACTGATGACTGGGCTTGAGTAACCTTCTGGGCCACACTTGATCCAGAGTAGCTGAAGGCCACAAATACACACAGCAGGCT 235 | + 236 | FF,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFF,FFFFFFFFFFFFFFFFFFFFF:FF 237 | @A00261:687:HMMCCDSX5:3:1101:27597:1658 1:N:0:AACCGCGGAT+CTAGCGCTGT 238 | TTAGTAACGGCAGGGTCAGGGTTCTGGATATTTGGAAAGACTTGTAATCTGGTCCCAGTCCCAAAGATGAGCTTGTCGGTGTTATACCTCACAGCACAGAGGTAAGAGGCAGAGTCTTTCATCTGGAGCTCCTTCAAAAGGAGGTAACTG 239 | + 240 | FFFFFFFFFFFFFFFFFFFFFFFFFFFF:FF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFF,FFFFFFF,,FFF 241 | @A00261:687:HMMCCDSX5:3:1101:31901:1658 1:N:0:AACCGCGGAT+CTAGCGCTGT 242 | TTTTATACGGCAGGGTCAGGGTTCTGGATATAGGGCAGCACGGACAATCTGGTTCCGGGACCAAAGACAAAATTCTGACCATAGTTCCGGCTCACAACACAGAAGTACTCAGCCGCGTCGCTCATATGGGCTGAGGGTTTCGTCAGGTGG 243 | + 244 | ,FFFF:FFFFFFFFFFFFFFFFFFFF:FFFFFFFF:FF:FFFFFFFFFFFFF:FFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFF,FFFFFF:FFFF,F,,FFFF:FF,FF:FFF:F,F,FFF,FFFFFF,FFFFF:FFFFFFFFF 245 | @A00261:687:HMMCCDSX5:3:1101:27118:1673 1:N:0:AACCGCGGAT+CTAGCGCTGT 246 | ACTTGAGGTGGGAACACCTTGTTCAGGTCCTCTGGAAAGGGAAGAGGGGTTGGAGCCAGGGTTGCTCTGAGAGCTGTCTGGTTCTGGTAGGGGCTCTGTGTGTGTGAGAGAGAGCCCTGGGAATGGGCCGTAATGAAGCACAACTATGTC 247 | + 248 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFF:F:FFFFFFFFFFFFFFFFF,FFF:FFFFFFFFFFFFF 249 | @A00261:687:HMMCCDSX5:3:1101:7283:1705 1:N:0:AACCGCGGAT+CTAGCGCTGT 250 | TTTTTTGGTGGGAACACCTTGTTCAGGTCCTCTAGGATGGAGAGTCGAGTCCCATCACCAAAATGCTGGGGCTGATTGCTATCGGCCCCTAGATTAGCACTGCAGATGTAGAAGCTGCTGTCTTCAGGATGGGCACTGGTCACTGTCAGA 251 | + 252 | FFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFF:FFFFFFF:FFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFF 253 | @A00261:687:HMMCCDSX5:3:1101:3332:1720 1:N:0:AACCGCGGAT+CTAGCGCTGT 254 | TGAAATACGGCAGGGTCAGGGTTCTGGATATAGGACTTGACTCTCAGAATGGTTCCTGCGCCAAAGACCAGCTTGTTTCCATATTCCACCGCAGCACAGAGGTAGGTGGCTGAGTCACCAGGCTGAGAAGCTGCAATGTATAAAGTACTA 255 | + 256 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFFFFFFFF:FFFFFFFFFFF:FFFFFFFFF 257 | @A00261:687:HMMCCDSX5:3:1101:24722:1720 1:N:0:AACCGCGGAT+CTAGCGCTGT 258 | TGTGTTACGGCAGGGTCAGGGTTCTGGATATCTGGTTGGACCAAGACTGTTCTTGCTACAGGCTGCTCCAGCGGTGGCAACAACAACACCATGGAGTGTCCCTTCTTCCTCACAGTTTTCCTTCAAACGCTGGCTTTGTGGAGGGCATTG 259 | + 260 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:F:FFFFFFFFFFFFFFFFFFFFFF 261 | @A00261:687:HMMCCDSX5:3:1101:24975:1720 1:N:0:AACCGCGGAT+CTAGCGCTGT 262 | AACGGGGTGGGAACACGTTTTTCAGGTCCTCGAGCACCAGGAGCCGCGTGCCTGGCCCGAAGTACTGGGTCTCTTGCGCCCCGGTCGAGCTGCTGGCACAGAAATAAACTCCAGAATCCTCCAGTTCTGCAGGCTGCACCTTCAGAGTAG 263 | + 264 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFF::FFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFF:FFFFFFFFFFFF:FFF:FFF,FFFF,FF,FFF,FFFFFFFFFFFFFF:F 265 | @A00261:687:HMMCCDSX5:3:1101:14172:1736 1:N:0:AACCGCGGAT+CTAGCGCTGT 266 | CCTGGTACGGCAGGGTCAGGGTTCTGGATATTGAGTTCCACTTTTAGCTGAGTGCCTGTCCCAAAGGTGAGTTTGTTTCCTCCTCCCGTGAGTATCACAGTGTTTGATGCCTCACAATAAACTCCCTTTTGTCACTTCACCCCTCTTCTT 267 | + 268 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 269 | @A00261:687:HMMCCDSX5:3:1101:18041:1736 1:N:0:AACCGCGGAT+CTAGCGCTGT 270 | ATTCTTACGGCAGGGTCAGGGTTCTGGATATTTGGCTTCACAGTGAGCGTAGTCCCATCCCCAAAGGTTGATTTGCCTGGAATCTCTGCACAAAAGTAGACAGCTGAGTCTCCAGGTTGAGTAGCTGCAATTTGCAGAGAGAGATGTTTC 271 | + 272 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 273 | @A00261:687:HMMCCDSX5:3:1101:28800:1736 1:N:0:AACCGCGGAT+CTAGCGCTGT 274 | ACAACAGGTGGGAACACGTTTTTCAGGTCCTCGAGCACTGTCAGCCGGGTGCCTGGGCCAAAATACTGCGTATCTGTGCTCACAGCCTGGAGGCCCAGGACAAAAACCGGTGCTGCCCGCTCTGCTGTCCCAGACTCAGCTCGGGTCCTT 275 | + 276 | :FFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 277 | @A00261:687:HMMCCDSX5:3:1101:26350:1752 1:N:0:AACCGCGGAT+CTAGCGCTGT 278 | ATAAAAACGGCAGGGTCAGGGTTCTGGATATTTGGTTTAACTAGCACCCTGGTTCCTCCTCCAAAAGTTAGCTTGTTGCCTGCAGCCCCTGTCACGGCACAGAGGTAGGTGGCTGAATCACTGGGCTGGGAGTCTCTGATGAGCAGAGAA 279 | + 280 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 281 | @A00261:687:HMMCCDSX5:3:1101:3287:1767 1:N:0:AACCGCGGAT+CTAGCGCTGT 282 | TAAAACACGGCAGGGTCAGGGTTCTGGATATTTGCTTTAACAAATAGTCTTGTTCCTGCTCCAAAGATAGTTTTGAAGCCTCCTATACTTGCTGCACAGAAGTAGACAGCCGAGTCTTCAGGTTGGGTCTCTGTGATGTGCAGGGAGAAA 283 | + 284 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFF:F:FFFFFFFFFFF:FFFFF:FFFFFFFF::FFFFFFFF:FFFFFFFFFF:FFFFFFF:FFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 285 | @A00261:687:HMMCCDSX5:3:1101:26115:1783 1:N:0:AACCGCGGAT+CTAGCGCTGT 286 | AAAAAGGTGGGAACACGTTTTTCAGGTCCTCTGTGACCGTGAGCCTGGTGCCCGGCCCGAAGTACTGCTCGTAGGAGCTCCCGCTAAGGGTACTGGCACAGAAATACACAGCAGAGTCACCAAGCTCCAGGGAATTGATGTGAAGATTTA 287 | + 288 | :FFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFF:F:FFFFFFFFFFFFFFFFF:FF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:,FFFFFFFF:FF:FFF 289 | @A00261:687:HMMCCDSX5:3:1101:26295:1783 1:N:0:AACCGCGGAT+CTAGCGCTGT 290 | ATAGAAACGGCAGGGTCAGGGTTCTGGATATCAGGCCAGACAGTCAACTGAGTTCCTCTTCCAAAGTATAGCCTCCCCAGGGTTGAGCCTCTGTCGGCACAATGCTGGTCCCCTTTACATGAACCCAGCCCTAATGTTTCCCATCCCCTC 291 | + 292 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 293 | @A00261:687:HMMCCDSX5:3:1101:26874:1783 1:N:0:AACCGCGGAT+CTAGCGCTGT 294 | AAAAGAGGTGGGAACACGTTTTTCAGGTCCTCGAGCACCAGGAGCCGCGTGCCTGGCCCGAAGTACTGGGTCTCTTGTTGCATGGCGCCTAAGCTGCTGGCACAGAGATACATGACCGAGTCCCCCTGCTCTGTGCGCTGGATCTCCAAG 295 | + 296 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFF:FFFFFFFFFFFFFF 297 | @A00261:687:HMMCCDSX5:3:1101:7862:1799 1:N:0:AACCGCGGAT+CTAGCGCTGT 298 | TTTGTTGGTGGGAACACCTTGTTCAGGTCCTCTACAACTGTGAGTCTGGTGCCTTGTCCAAAGAAAGCTTCAGTGTTCGATTCCCTGACGCTGCTGGCACAGAAATACAAAGCTGAGTCCCCCAGCTCCAGAGAGCTCAGGTTTAGTTCA 299 | + 300 | FFFFFFFFFF:FFFFFFFFFFFFFFF:FFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,FF:FFFFFF:FFFFFF:FFFFFFFFFFFFFFFFF:FFFFF:FFFFFFFFFFF,FF:FFFFFFFFFFFF:FFFFF 301 | @A00261:687:HMMCCDSX5:3:1101:2302:1814 1:N:0:AACCGCAGAT+CTAGCGCTGT 302 | ACAAAAACGGCAGGGTCAGGGTTCTGGATATCGAGCGTGACCTGAAGTCTTGTTCCAGTCCCAAAGGTGAGTTTACTGGCAGTGCCGGTTCTGGTCAGAGCACAGAAGTACACCGCTGAGTCTGACACTTGAACTGAGCCTTTCTCCAAG 303 | + 304 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 305 | @A00261:687:HMMCCDSX5:3:1101:6189:1814 1:N:0:AACCGCGGAT+CTAGCGCTGT 306 | AAAGGAACGGCAGGGTCAGGGTTCTGGATATACGGATGAACAATAAGGCTGGTTCCTCTTCCAAATGTAGGTATGTAGCTTCCAGCGTCCGTAGCACAGAAGTAAGAAGCAGTGTCTGCTGCCCGGGAAGCCGTGATCAACAAGGAACTG 307 | + 308 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFFFFFFFFF:FF 309 | @A00261:687:HMMCCDSX5:3:1101:13096:1814 1:N:0:AACCGCGGAT+CTAGCGCTGT 310 | CCCCCTGGTGGGAACACGTTTTTCAGGTCCTCTAGCACGGTGAGCCGTGTCCCTGGCCCGAAGAACTGCTCATTGTATTCCGTCCCCACAATGTTACAGCTTTGTACAAAAACAGCCCCTCCCATGGGTCCGCCCCCAGAGCCTGGGAGA 311 | + 312 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 313 | @A00261:687:HMMCCDSX5:3:1101:29243:1814 1:N:0:ACCCGCGGAT+GTAGCGCTGT 314 | TTTTTTACGGCAGGGTCAGGGTTCTGGATATCTGGCTTTATAATTAGCTTGGTCCCAGCGCCCCAGATTAACTGATAGTTGCTATCCATCACAGCACAGAGGTAAGAGGCAGAGTCTTTCATCTGGAGCTCCTTCAAAAGGAGGTAACTG 315 | + 316 | F,FFFFFF,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:F:FFFFF:FFFFFFF:FFFFFFF:FFFFFFFFFF::FFFFFF:FFFFFFFFFFFFFFFF,FFFFFFF:FFFFFFFFF,FFFFFF 317 | @A00261:687:HMMCCDSX5:3:1101:29993:1830 1:N:0:AACCGCGGAT+CTAGCGCTGT 318 | TTTTTTACGGCAGGGTCAGGGTTCTGGATATCTGCTGCGTCCCCCGAGGTCCGGCTTCCTGCCTCCCATCTCTCTCCATTTCCTCTGGTTTTTGTCTTGAGGCTTACTGATTTCTCATGTTCAGCATCTCTCTTTCTCAAGGTCTCATTT 319 | + 320 | F:FFFFFFFFFFFFFFFFFFFFFFFFFF::,FFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFF,FFFFFFF:FFFFFFF,FFFFFFFFF:FF 321 | @A00261:687:HMMCCDSX5:3:1101:3296:1846 1:N:0:AACCGCGGAT+CTAGCGCTGT 322 | TTTACCACGGCAGGGTCAGGGTTCTGGATATTTGCTCTTACAGTTACTGTGGTTCCGGCTCCAAAGCTGAGCTTGTAGTCGCCTATGTCACCCACGAGGCAGTAGTACACAGCAGTGTCGCTCAGGGAAACCCCGGGCAGGCTCAGAGTG 323 | + 324 | FF:FFFF:FFFFFF:FFFF:F:,FFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFF,F:FFFFFFF,FFFFFFFFFFFFFFF:FFFFFFFFFFFFFF:FFF:FFFFFFFF:FFF,FFFFFFF,,FF,,,FFF,F:FFF,FFF,:F:FF: 325 | @A00261:687:HMMCCDSX5:3:1101:3314:1846 1:N:0:ACCCGCGGAT+CTAGCGCTGT 326 | TAAATAACGGCAGGGTCAGGGTTCTGGATATTGGGGAGAATATGAAGTCGTGTCCCTTTTCCAAAGATGATCTTGTCTTCAGCTCCTGCACAGAGGTAGAGGCCTGTATCACCAGGCTGGGCTGCAGTGATGTGGAGAGAACTGTCCTTT 327 | + 328 | FFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFF:FFFFFF,FFFF:F,:FFFFFFFF,F,F,F::F:F:FFF:FFFFFFFF:FFF:FFFFFFFFFFFFFFFFFFFFF,F:FFFFFFFFFFF,F,FFFFFF 329 | @A00261:687:HMMCCDSX5:3:1101:9579:1861 1:N:0:AACCGCGGAT+CTAGCGCTGT 330 | AAAGGAGGTGGGAACACGTTTTTCAGGTCCTCGAGCACCAGGAGCCGCGTGCCTGGCCCGAAGTACTGGGTCTCTTGGGTGTGTCCAAACCTCGACTCACTGATGGCACAGAAGTACACAGATGTCTGGGAGCTGGTAGCGGACTCCAGA 331 | + 332 | :FFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:F:FFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFF,FFFF,FFFFFFFFFFFFFFFFFFFFFFFFFFFFF 333 | @A00261:687:HMMCCDSX5:3:1101:16396:1861 1:N:0:AACCGCGGAT+CTAGCGCTGT 334 | AGACAAGGTGGGAACACGTTTTTCAGGTCCTCTGTGACCGTGAGCCTGGTGCCCGGCCCGAAGTACTGCTCGTAGACCCTCCCGCTCTCACTGCTGGCACAGAAGTACACAGATGTTTGGGAGGGAGCAGCCGACTCCAACCCCAGCAGG 335 | + 336 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:F 337 | @A00261:687:HMMCCDSX5:3:1101:19027:1877 1:N:0:AACCGCGGAT+CTAGCGCTGT 338 | AAAATAGGTGGGAACACGTTTTTCAGGTCCTCTAGCACGGTGAGCCGTGTCCCTGGCCCGAAGAACTGCTCATTGTAGTCCGCTAGTCCACTGGCACAGAGGTACATAGATGTCTGGTTGGTGCTGGCGGACTCCAGAATCAGGGAGAAG 339 | + 340 | FFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFF:FFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFFFFFFFF 341 | @A00261:687:HMMCCDSX5:3:1101:23095:1877 1:N:0:AACCGCGGAT+CTAGCGCTGT 342 | CTTTGTGGTGGGAACACGTTTTTCAGGTCCTCTGTGACCGTGAGCCTGGTGCCCGGCCCGAAGTACTGCTCGTAGGAGCTTACGACGCCTCTCGAGTAACTGCTGGCACAGAAGTACACAGATGTCTGGGAGGGAGCAGCCGACAGCAGC 343 | + 344 | F:FFF,:FFF:FF:FFF:FFFFF,FFFF,FFFFFF:FFF::FFF:::FFFFFFFF:FFF,FFFFFF:FFFFFFFF:FF:FFFFFF:F:F:FFF:FFFFFFFFFFFF:FFFFFFF:FFFFF::F,FFF:FFFFF:FF:FF:FFF:FFFFFF 345 | @A00261:687:HMMCCDSX5:3:1101:1099:1892 1:N:0:AACCGCGGAT+CTAGCGCTGT 346 | AGAGGAGGTGGGAACACGTTTTTCAGGTCCTCGAGCACTGTCAGCCGGGTGCCTGGGCCAAAATACTGCGTATCTGTGCTCACAGCCTGGAGGCCCAGGACAAAAACCGGTGCTGCCCGCTCTGCTGTCCCAGACTCAGCTCGGGTCCTT 347 | + 348 | :FFFFFFFFFFF:FFFFFFF:FFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 349 | @A00261:687:HMMCCDSX5:3:1101:2546:1892 1:N:0:AACCGCGGAT+CTAGCGCTGT 350 | TTCTCTGGTGGGAACACGTTTTTCAGGTCCTCTAGCACGGTGAGCCGTGTCCCTGGCCCGAAGAACTGCTCATTAAGCAGACCTAAACTGCTGGCACAGAAGTACACAGCTGAGTCCCTGGGTTCTGAGGGCTGGATCTTCAGAGTGGAG 351 | + 352 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 353 | @A00261:687:HMMCCDSX5:3:1101:9833:1892 1:N:0:AACCGCGGAT+CTAGCGCTGT 354 | AATGGAACGGCAGGGTCAGGGTTCTGGATATCTGGTCTAACACTCAGAGTTATTCCTTTTCCAAATGTCAGCTTACTATTGGCTCCAGTGCCCCCGCTTGCTGCACAGAAGTACACTGCAGAGTCTCCAGGCTGGGAGGGCACAATGTGC 355 | + 356 | :,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFF 357 | @A00261:687:HMMCCDSX5:3:1101:10999:1908 1:N:0:AACCGCGGAT+CTAGCGCTGT 358 | ATTTTTACGGCAGGGTCAGGGTTCTGGATATCAGAAGAGATACACTGAAACAGACGATGAGTAGTTGCACCTCAGCTCCACAGGGGCTTCTTCAAAGACAGGGACTTGGCTGTCAAGCTGGGTCACAGACTGGGCTCTGGTTCCTCCCAG 359 | + 360 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 361 | @A00261:687:HMMCCDSX5:3:1101:20437:1908 1:N:0:AACCGCGGAT+CTAGCGCTGT 362 | TTCTTAGGTGGGAACACGTTTTTCAGGTCCTCTGTGACCGTGAGCCTGGTGCCCGGCCCGAAGTACTGCTCGCCGCGCCCCGTAAGTTCACTGCTGGCACAGAGGTACATAGATGTCTGGTTGGTGCTGGCGGACTCCAGAATCAGGGAG 363 | + 364 | FFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFF:FFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 365 | @A00261:687:HMMCCDSX5:3:1101:8314:1924 1:N:0:AACCGCGGAT+CTAGCGCTGT 366 | TGAATTGGTGGGAACACCTTGTTCAGGTCCTCTACAACTGTGAGTCTGGTGCCTTGTCCAAAGAAAGCTTCAGTGTTCACACAGTGACAGGGGTCAAGGTGAAAATCACATTGAAGGCATAGAGAGGGGAGAGGGCCCTGGCTAGGATGG 367 | + 368 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,:FFFFF 369 | @A00261:687:HMMCCDSX5:3:1101:16848:1924 1:N:0:AACCGCGGAT+CTAGCGCTGT 370 | CGTGTTACGGCAGGGTCAGGGTTCTGGATATTTGGACTGACCAGAAGTCGGGTGCCAGTTCCAAATACAAGTTTCTGAAAGCCTGTCGACGGCCCTGCACAGAAGTAGGTTCCTACATCTGTAGTCTGGGTGGCTGTGATGTGCAGGGAG 371 | + 372 | FFFFFFFFFF:FFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFF:FFFFFFFFFF 373 | @A00261:687:HMMCCDSX5:3:1101:19090:1924 1:N:0:AACCGCGGAT+CTAGCGCTGT 374 | TTTTTTACGGCAGGGTCAGGGTTCTGGATATTTGCTCTTACAGTTACTGTGGTTCCGGCTCCAAAGCTGAGCTTGTAGTCGTTAGTAGGTGTGGCTGCACAGAAGTAGATAGCTGAGTCCCCAGTCTGGGTGTCTGCAATGCGCAGAGAC 375 | + 376 | FFFFFFFFFFF:FFFFFFFFFFFFFFFF:F:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFF:FFFFFFFFFFF::FFFFFFFFFF,::FF,FFFFFF:F:FFFFFFFFFF 377 | @A00261:687:HMMCCDSX5:3:1101:29903:1924 1:N:0:AACCGCGGAT+CTAGCGCTGT 378 | TTTGTTGGTGGGAACACCTTGTTCAGGTCCTCTAGGATGGAGAGTCGAGTCCCATCACCAAAATGCTGGGGCTGATTGGTCCCTGTCCCCACAATGTTACAGCTTTGTACAAAAACAGCCCCTCCCATGGGTCCGCCCCCAGAGCCTGGG 379 | + 380 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFF:FFFFFFFFFFFFFF:FF,FFF 381 | @A00261:687:HMMCCDSX5:3:1101:20871:1939 1:N:0:AACCGCGGAT+CTAGCGCTGT 382 | TTTTTTACGGCAGGGTCAGGGTTCTGGATATTTGGTTGCACTTGGAGTCTTGTTCCACTCCCAAAAGTAAGTGCTCTCCTGCCCGTGTCCCCCCCGTCCGTAGCACAGAAGTAAGAAGCAGTGTCTGCTGCCCGGGAAGCCGTGATCAAC 383 | + 384 | FFF:FFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFF 385 | @A00261:687:HMMCCDSX5:3:1101:5394:1971 1:N:0:AACCGCGGAT+CTAGCGCTGT 386 | AGAGGTGGTGGGAACACGTTTTTCAGGTCCTCTAGCACGGTGAGCCGTGTCCCTGGCCCGAAGAACTGCTCATGGGCCTGTGAGCCACTCATGGTTTGGCTGCTGGCACAGCGATACATGGCCGAGTCCCGCTGCTCTGTGCGCTGGATC 387 | + 388 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 389 | @A00261:687:HMMCCDSX5:3:1101:24542:1971 1:N:0:AACCGCGGAT+CTAGCGCTGT 390 | AATCTTGGTGGGAACACGTTTTTCAGGTCCTCGAGCACCAGGAGCCGCGTGCCTGGCCCGAAGTACTGGGTCTCCTGGCCCCCTCCCCATTCACTGCTGGCACAGAGGTACTGAGAGGTATGTGAGGGCCTGGCAGACTCCAGGGTCAGG 391 | + 392 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FF:FFFFFF:FFFFFFFFFFFFFFFFFFFF:,F,FFF,F:FFFFFFFFFFFFFFFFFFFFFFFFFFFFF 393 | @A00261:687:HMMCCDSX5:3:1101:26205:1971 1:N:0:AACCGCGGAT+CTAGCGCTGT 394 | CTCTCAGGTGGGAACACGTTTTTCAGGTCCTCTAGCACGGTGAGCCGTGTCCCTGGCCCGAAGAACTGCTCATTGTAGGAGCACAGTGGGAAGGGGCTGCCCAGAATTCCTTCTCCTTCCCCACCTCCCTTAGCACAGACAGAGGGCACA 395 | + 396 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFF::FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 397 | @A00261:687:HMMCCDSX5:3:1101:10881:1986 1:N:0:AACCGCGGAT+CTAGCGCTGT 398 | TTTTTTACGGCAGGGTCAGGGTTCTGGATATTTGGTTTTACATTGAGTTTGGTCCCAGATCCAAAGTAAAATTTGTTGAAGCTGTATCCGTTCACCACACAGAGGTAGGTGGCTGAATCACTGAGCTTGGAGTCTCTGATGAGCAGGGAA 399 | + 400 | FFFFFFFFFFFFFFFFFFFFFFF:FFF:F:FFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFF 401 | @A00261:687:HMMCCDSX5:3:1101:12165:1986 1:N:0:AACCGCGGAT+CTAGCGCTGT 402 | CGAATCACGGCAGGGTCAGGGTTCTGGATATAGGGCTGGATGATTAGATGAGTCCCTTTGCCAAAGGTGAGTCCGTCAGCACCTCCTCCTGAATACAAACCGTAAGCACAGAAATACATCGCGGCATCCCCCAGCTGTGAGTCTGAGATC 403 | + 404 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:F:FFF:F 405 | @A00261:687:HMMCCDSX5:3:1101:24008:1986 1:N:0:ACCCGCGGAT+CTAGCGCTGT 406 | TAAAACACGGCAGGGTCAGGGTTCTGGATATAGGGAATAACGGTGAGTCTCGTTCCAGTCCCAAAGAAGAGGTTGTTTGCCCCAGTTCCTACCCCCTCCTCCACAGCACAGTAGTAAACAGCAGCATCTGCCTCCCGCACCTGGAGGATG 407 | + 408 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFF 409 | @A00261:687:HMMCCDSX5:3:1101:30942:2002 1:N:0:AACCGCGGAT+CTAGCGCTGT 410 | AAAACCGGTGGGAACACCTTGTTCAGGTCCTCTACAACTGTGAGTCTGGTGCCTTGTCCAAAGAAAGCTTCAGCTGTCCCTCCGCTGCTGGCACAGAGATACATGGCCGAGTCCCCCTGCTCTGTGCGCTGGATCTCCAAGGTGGAGAAA 411 | + 412 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 413 | @A00261:687:HMMCCDSX5:3:1101:26151:2033 1:N:0:AACCGCGGAT+CTAGCGCTGT 414 | AGTGTAGGTGGGAACACGTTTTTCAGGTCCTCGAGCACCAGGAGCCGCGTGCCTGGCCCGAAGTACTGGGCCCGTCCCACGTCGGACTCAACGCTGCAGAGATATATGCTGCTGTCTTCAGGGCTCATGTTGCTCACAGTCAGAGTTGAG 415 | + 416 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFF:FFFFFFF,FFFFFFFFFFFFFFFFFF:FFFFFFFFFFFF:FFFF:FF:FFF 417 | @A00261:687:HMMCCDSX5:3:1101:6144:2049 1:N:0:AACCGCGGAT+CTAGCGCTGG 418 | TTTTTTACGGCAGGGTCAGGGTTCTGGATATATGGGTGTACAGCCAGCCTGGTCCCTGCTCCAAAAATCAGCTTATTGTAGCCACCTTCACTCACAGCACAGAAGTACTCAGCCGCGTCGCTCATATGGGCTGAGGGTTTCGTCAGGTGG 419 | + 420 | FFFF:FFFFFFFFFFFFFFFFFFFFFFF,FF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFF,FFF 421 | @A00261:687:HMMCCDSX5:3:1101:15727:2080 1:N:0:AACCGCGGAT+CTAGCGCTGT 422 | TAAATCACGGCAGGGTCAGGGTTCTGGATATTTGGATGGACAGTCAAGATGGTCCCTTGTCCAAATGTCAGCTTTCCATAGCTAGTACCACCAGCGCGTCCCGAGCTTGCTGCACAGAAGTACACTGCAGAGTCTCCAGGCTGGGAGGGC 423 | + 424 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFF:FFFFFFFFF 425 | -------------------------------------------------------------------------------- /tests/resources/dcr_TINY_1_alpha.freq: -------------------------------------------------------------------------------- 1 | 3, 7, 4, 5, GCT, 1, 2 2 | 20, 11, 5, 4, GGGCGCCTT, 1, 1 3 | 28, 26, 1, 1, ACCTTA, 1, 1 4 | 15, 37, 4, 12, TT, 1, 1 5 | 1, 22, 4, 0, , 1, 2 6 | 1, 17, 2, 7, CGACTCGG, 1, 1 7 | 8, 2, 0, 7, CGGGGG, 1, 1 8 | 17, 12, 0, 8, TG, 1, 1 9 | 39, 14, 3, 5, CCGG, 1, 2 10 | 13, 7, 0, 1, TCGGGCGAGGGG, 1, 2 11 | 43, 32, 0, 7, GCT, 1, 1 12 | 18, 16, 4, 6, CCTCCGGCCC, 1, 1 13 | 32, 47, 2, 11, TCCTT, 1, 1 14 | 1, 23, 3, 3, G, 1, 1 15 | 15, 36, 5, 0, CGG, 1, 1 16 | 4, 7, 3, 8, CAGGG, 1, 1 17 | 43, 33, 4, 5, CCAGA, 1, 1 18 | 10, 46, 0, 10, CT, 1, 1 19 | 23, 44, 0, 4, GGGGC, 1, 1 20 | 19, 48, 0, 5, CCGTCG, 1, 1 21 | 35, 9, 6, 3, CCACACCTA, 1, 1 22 | 10, 39, 0, 1, GGGG, 1, 1 23 | 3, 10, 1, 5, GGATACAG, 1, 1 24 | 30, 34, 8, 0, CGGTT, 1, 1 25 | 13, 24, 0, 3, GGGGGTAGG, 1, 1 26 | 23, 41, 1, 5, TCGGGACGC, 1, 1 27 | -------------------------------------------------------------------------------- /tests/resources/dcr_TINY_1_alpha.n12: -------------------------------------------------------------------------------- 1 | 3, 7, 4, 5, GCT, A00261:687:HMMCCDSX5:3:1101:5050:1094, AAGCTCAGTGATTCAGCCACCTACCTCTGTGTGGTGGCTAAAGCTGCAGGCAACAAGCTAACTTTTGGAGGAGG, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTTTCCGGTCGTGATAAAGTG, FFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFF,FFFFF:FFF 2 | 20, 11, 5, 4, GGGCGCCTT, A00261:687:HMMCCDSX5:3:1101:18331:1110, ACGCTGAGAGACACTGCTGTGTACTATTGCATCGTCAGGGGCGCCTTTTCTGGTTCTGCAAGGCAACTGACCTTTGGATCTGG, FFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFF:FFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAACACCCTGGTTACCCGTCGTGATGCCTTT, FF,FFF,FFFF:,F,FFFFFF:FFFF::FFFFF:F,F,F::F 3 | 3, 7, 4, 5, GCT, A00261:687:HMMCCDSX5:3:1101:4417:1219, AAGCTCAGTGATTCAGCCACCTACCTCTGTGTGGTGGCTAAAGCTGCAGGCAACAAGCTAACTTTTGGAGGAGG, FFFFFFFFFFF:FFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTTTCCGGTCGTGATAAAGTG, FFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 4 | 28, 26, 1, 1, ACCTTA, A00261:687:HMMCCDSX5:3:1101:3794:1235, CAGACCGGAGACTCGGCCATCTACCTCTGTGCTGTGGAGACCTTAAATGCTGGCAACAACCGTAAGCTGATTTGGGGATTGGGA, F::,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFF,FFFFF:FFFFFFF:FFFFF, GTCGTGACTGGGAAAACCCTGGTTAGCCGTCGTGATGTGCTA, FFFFFFFFFFF:FF,FFFFF:FFFFFF:FFFF,FFF,FFFFF 5 | 15, 37, 4, 12, TT, A00261:687:HMMCCDSX5:3:1101:19027:1313, TCAGCCTGGTGACTCAGCCACCTACCTCTGTGCTGTTTGAAATGAGAAATTAACCTTTGGGACTGG, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGCACTTTGTCGTGATACGATC, FFFFFFFFFFFFFFFFFFFFFFFF:FF:FFFFFFFFFFFFFF 6 | 1, 22, 4, 0, , A00261:687:HMMCCDSX5:3:1101:29170:1344, GAAGGAGCTCCAGATGAAAGACTCTGCCTCTTACCTCTGTGCTGTGATGGATAGCAACTATCAGTTAATCTGGGGCGCTGGGACCAA, FFFFFFFFFFFFF,FFF,FFFFFF,FFFFFFF,,FFFFFFFFF:FFFFFFFF:FFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTTGCCTGTCGTGATTTCTAT, FFFFFFFFFFFFFFFFFFFFF:FFFFF:FFFF::FFFFF,FF 7 | 1, 17, 2, 7, CGACTCGG, A00261:687:HMMCCDSX5:3:1101:21260:1360, GAAGGAGCTCCAGATGAAAGACTCTGCCTCTTACCTCTGTGCTGTGAGACGACTCGGAGGAAACACACCTCTTGTCTTTGGAAAGGGCAC, FFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGCGCGCGGTCGTGATAATCAT, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FF 8 | 8, 2, 0, 7, CGGGGG, A00261:687:HMMCCDSX5:3:1101:26585:1407, TGGGGGACTCCGCAATGTACTTCTGTGCAATGAGAGAGGGCGGGGGTAGCAGCTATAAATTGATCTTCGGGAGTGGGAC, FFFFFFFFFF,FFFFFF:FF:FFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGCTCAAAGTCGTGATCTCCTG, FFFFFFFFFFFFF:FFFFF:FF:FFFFFF:FFFF,:FFFFFF 9 | 17, 12, 0, 8, TG, A00261:687:HMMCCDSX5:3:1101:30653:1407, CAGCCTGGAGACTCAGCCACCTACTTCTGTGCAGCAAGCATGAACCAGGGAGGAAAGCTTATCTTCGGACAGGG, FFFFFF:FFFFFFFFFFFFFFFFF:F:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTTGCCAGTCGTGATGTCTAT, :FFF,FFFFFFFFFFFFFFFFFFF,FFFFFF:F,FFFFFFFF 10 | 39, 14, 3, 5, CCGG, A00261:687:HMMCCDSX5:3:1101:30309:1438, ATATGAGCGACGCGGCTGAGTACTTCTGTGTTGTGAGCCGGAACTATGGTCAGAATTTTGTCTTTGGTCCCGGAAC, FFFF::FFFFF:FFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGGTATTCGTCGTGATGGTCCG, ,FF,FF:FFFFFFFFF,FFFFF:FFFFFFFF:FFFF:FFFF, 11 | 13, 7, 0, 1, TCGGGCGAGGGG, A00261:687:HMMCCDSX5:3:1101:24532:1454, GGGAGGCAGATGCTGCTGTTTACTACTGTGCTGTGGAGGATCGGGCGAGGGGGATCAAAGCTGCAGGCAACAAGCTAACTTTTGGAGGAGG, F:FF:FFFFFF:FFFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTTCCTCGTCGTGATGGCTAC, ::FFFFFFFFFFFFFFFFFFF:FFF:FFFFFFFFFF:F:FFF 12 | 43, 32, 0, 7, GCT, A00261:687:HMMCCDSX5:3:1101:28890:1454, AAGTGTCAGACTCAGCGGTGTACTTCTGTGCTCTGAGTGAGCTCAATGACATGCGCTTTGGAGCAGGGACCAGACT, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGCCCTCAGTCGTGATATCTAG, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FF:FFFFF 13 | 13, 7, 0, 1, TCGGGCGAGGGG, A00261:687:HMMCCDSX5:3:1101:24542:1470, GGGAGGCAGATGCTGCTGTTTACTACTGTGCTGTGGAGGATCGGGCGAGGGGGATCAAAGCTGCAGGCAACAAGCTAACTTTTGGAGGAGG, FFFFFFFFFFF:FFFFFFFFFFF:FF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFF:F:FFFF, GTCGTGACTGGGAAAACCCTGGTTCCTCGTCGTGATGGCTAC, ,FF:FFFFF,FFFFFF:F:FFF:FFF:,FFFFFFFFFF:FF: 14 | 18, 16, 4, 6, CCTCCGGCCC, A00261:687:HMMCCDSX5:3:1101:10854:1532, TCCCAGCCTGAAGACTCAGCCACATACCTCTGTGCCCCTCCGGCCCCTGGGGCTGGGAGTTACCAACTCACTTTCGGGAA, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGAGTGACGTCGTGATTTGTAT, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 15 | 32, 47, 2, 11, TCCTT, A00261:687:HMMCCDSX5:3:1101:13964:1564, TCCCTGAGCGACACTGCTGTGTACTACTGCCTCGTGGGTGATCCTTACAACAGACTCGCTTTTGGGAAGGGGAAC, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTTGTGCGTCGTGATTACTTT, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 16 | 1, 23, 3, 3, G, A00261:687:HMMCCDSX5:3:1101:27597:1658, GAAGGAGCTCCAGATGAAAGACTCTGCCTCTTACCTCTGTGCTGTGAGGTATAACACCGACAAGCTCATCTTTGGGACTGGGACCA, FFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGCTTTTAGTCGTGATATTCAA, FFFF::FFFFFFFFF::FFFFFFFFFFFFFFFFFFFFF:FFF 17 | 39, 14, 3, 5, CCGG, A00261:687:HMMCCDSX5:3:1101:31901:1658, ATATGAGCGACGCGGCTGAGTACTTCTGTGTTGTGAGCCGGAACTATGGTCAGAATTTTGTCTTTGGTCCCGGAAC, FFF,F,F:FFF:FF,FF:FFFF,,F,FFFF:FFFFFF,FFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFF:F, GTCGTGACTGGGAAAACCCTGGGTATTCGTCGTGATGGTCCG, FFFFFFFFFF:FFFF,FF:FFFFFFFFF:FFFFFFFFFFF,F 18 | 15, 36, 5, 0, CGG, A00261:687:HMMCCDSX5:3:1101:3332:1720, TCAGCCTGGTGACTCAGCCACCTACCTCTGTGCTGCGGTGGAATATGGAAACAAGCTGGTCTTTGGCGCAGGAACCAT, FFFFFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTCTACCGTCGTGATTCTACG, FFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFF:F,FFFF 19 | 7, 15, 3, 11, TTC, A00261:687:HMMCCDSX5:3:1101:18041:1736, CAACCTGGAGACTCAGCTGTCTACTTTTGTGCAGAGATTCCAGGCAAATCAACCTTTGGGGATGGGACT, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFF, GTCGTGACTGGGAAAACCCTGGAGCGTAGTCGTGATCTGTAG, FFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFF:,F,:F 20 | 4, 7, 3, 8, CAGGG, A00261:687:HMMCCDSX5:3:1101:26350:1752, CTCATCAGAGACTCCCAGCCCAGTGATTCAGCCACCTACCTCTGTGCCGTGACAGGGGCTGCAGGCAACAAGCTAACTTTTGGAGGAGG, FFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTCTCTTGTCGTGATATGCAA, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 21 | 6, 49, 0, 9, TA, A00261:687:HMMCCDSX5:3:1101:3287:1767, CAACCTGAAGACTCGGCTGTCTACTTCTGTGCAGCAAGTATAGGAGGCTTCAAAACTATCTTTGGAGCAGGAA, FFFFFFFFFFFFFFFF:FFFFF:FFFFFFF:FFFFFFFFFF:FFFFFFFF::FFFFFFFF:FFFFF:FFFFFF, GTCGTGACTGGGAAAACCCTGGTTTCATGTCGTGATAACAAG, FFFFFFFFF:FFFFFFFFFFF,FFFFFFFFFF,FFFF,:F,F 22 | 43, 33, 4, 5, CCAGA, A00261:687:HMMCCDSX5:3:1101:2302:1814, AAGTGTCAGACTCAGCGGTGTACTTCTGTGCTCTGACCAGAACCGGCACTGCCAGTAAACTCACCTTTGGGACTGG, FFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTATTGCGTCGTGATCCATAT, FFFFFFFFFFFFF:FFFFF,FFF:FFFFFFFFFFFFFFFFF: 23 | 10, 46, 0, 10, CT, A00261:687:HMMCCDSX5:3:1101:6189:1814, CGGGCAGCAGACACTGCTTCTTACTTCTGTGCTACGGACGCTGGAAGCTACATACCTACATTTGGAAGAGGA, FFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGGAACATGTCGGATCCTCATG, FFFFFFFFFFFFFFFFFFFF:FFFFFF:FFFFFFFFFFFFFF 24 | 1, 22, 4, 0, , A00261:687:HMMCCDSX5:3:1101:29243:1814, GAAGGAGCTCCAGATGAAAGACTCTGCCTCTTACCTCTGTGCTGTGATGGATAGCAACTATCAGTTAATCTGGGGCGCTGGGACCAA, F:FFFFFFF,FFFFFFFFFFFFFFFF:FFFFFF::FFFFFFFFFF:FFFFFFF:FFFFFFF:FFFFF:F:FFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTTGCCTGTCGTGATTTCTAT, FFFFFFFFFFFFFFFF,:FFF,:::F:FFFFF:FFF,FFFFF 25 | 32, 9, 0, 7, TAGG, A00261:687:HMMCCDSX5:3:1101:3296:1846, TCCCTGAGCGACACTGCTGTGTACTACTGCCTCGTGGGTGACATAGGCGACTACAAGCTCAGCTTTGGAGCCGGAACCAC, ,FF,,FFFFFFF,FFF:FFFFFFFF:FFF:FFFFFFFFFFFFFF:FFFFFFFFFFFFFFF,FFFFFFF:F,FFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTGCACAGTCGTGATTCCGCG, ,,,F,:FF,:,:FFFFF,FF:,F:FFFF:FF:F,F::F,F:, 26 | 22, 19, 0, 11, CTGAA, A00261:687:HMMCCDSX5:3:1101:3314:1846, GCCCAGCCTGGTGATACAGGCCTCTACCTCTGTGCAGGAGCTGAAGACAAGATCATCTTTGGAAAAGGGACACG, FFFFFFFFFFFFFFFF:FFF:FFFFFFFF:FFF:F:F::F,F,F,FFFFFFFF:,F:FFFF,FFFFFF:FFFFF, GTCGTGACTGGGAAAACCCTGGCCACTAGTCGTGATTTAATG, FFFFFF:FFFFFFFFFFFFFFFFFF,FF:FF:FFFF,FFFFF 27 | 23, 44, 0, 4, GGGGC, A00261:687:HMMCCDSX5:3:1101:9833:1892, CAGCCTGGAGACTCTGCAGTGTACTTCTGTGCAGCAAGCGGGGGCACTGGAGCCAATAGTAAGCTGACATTTGGAAAAGGA, FFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFF:FFFFFF, GTCGTGACTGGGAAAACCCTGGCGTGACGTCGTGATTTTAAG, FFFFFFFFF:F:FFFFFFF:F,:FFFF:F,FFFFFFFFFFFF 28 | 19, 48, 0, 5, CCGTCG, A00261:687:HMMCCDSX5:3:1101:16848:1924, CACCCAGACTACAGATGTAGGAACCTACTTCTGTGCAGGGCCGTCGACAGGCTTTCAGAAACTTGTATTTGGAACTGGCAC, FFFFFFFFFF:FFFFFFFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGCACATGGTCGTGATGAGACC, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 29 | 35, 9, 6, 3, CCACACCTA, A00261:687:HMMCCDSX5:3:1101:19090:1924, CAGACTGGGGACTCAGCTATCTACTTCTGTGCAGCCACACCTACTAACGACTACAAGCTCAGCTTTGGAGCCGGAACCAC, ::,FFFFFFFFFF::FFFFFFFFFFF:FFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGACTCAAGTCGTGATTTATTC, FFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFF:FFFF 30 | 10, 39, 0, 1, GGGG, A00261:687:HMMCCDSX5:3:1101:20871:1939, CGGGCAGCAGACACTGCTTCTTACTTCTGTGCTACGGACGGGGGGGACACGGGCAGGAGAGCACTTACTTTTGGGAGTGGAAC, FFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FF, GTCGTGACTGGGAAAACCCTGGTACACCGTCGTGATAAATAT, :FFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFF:FFFFFFF 31 | 3, 10, 1, 5, GGATACAG, A00261:687:HMMCCDSX5:3:1101:10881:1986, AAGCTCAGTGATTCAGCCACCTACCTCTGTGTGGTGAACGGATACAGCTTCAACAAATTTTACTTTGGATCTGGGACCAAAC, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTTCCTAGTCGTGATTTTAGT, FFFFFFFFFFFFFFFFFFFFFF,FFFFFF:FFFFFFFFFFFF 32 | 30, 34, 8, 0, CGGTT, A00261:687:HMMCCDSX5:3:1101:12165:1986, CTGGGGGATGCCGCGATGTATTTCTGTGCTTACGGTTTGTATTCAGGAGGAGGTGCTGACGGACTCACCTTTGGCAA, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFF, GTCGTGACTGGGAAAACCCTGGAGATCTGTCGTGATTCGGGC, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 33 | 13, 24, 0, 3, GGGGGTAGG, A00261:687:HMMCCDSX5:3:1101:24008:1986, GGGAGGCAGATGCTGCTGTTTACTACTGTGCTGTGGAGGAGGGGGTAGGAACTGGGGCAAACAACCTCTTCTTTGGGACTGGAACG, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTTCAGCGTCGTGATTTGTGA, FFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFF:F,FFFF:FF 34 | 39, 28, 10, 8, CTGTGAGTGAA, A00261:687:HMMCCDSX5:3:1101:6144:2049, ATATGAGCGACGCGGCTGAGTACTTCTGTGCTGTGAGTGAAGGTGGCTACAATAAGCTGATTTTTGGAGCAGG, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTTATTTGTCGTGATTTCTTT, F:FFFF:FFFFFFFFFF:FFF:FFF,FFFFFFFF:FFFFF,F 35 | 23, 41, 1, 5, TCGGGACGC, A00261:687:HMMCCDSX5:3:1101:15727:2080, CAGCCTGGAGACTCTGCAGTGTACTTCTGTGCAGCAAGCTCGGGACGCGCTGGTGGTACTAGCTATGGAAAGCTGACATTTGG, FF:FFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTCTATCGTCGTGATACCCCT, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 36 | -------------------------------------------------------------------------------- /tests/resources/dcr_TINY_1_alpha.tsv: -------------------------------------------------------------------------------- 1 | sequence_id v_call d_call j_call junction_aa duplicate_count sequence junction decombinator_id rev_comp productive sequence_aa cdr1_aa cdr2_aa vj_in_frame stop_codon conserved_c conserved_f sequence_alignment germline_alignment v_cigar d_cigar j_cigar av_UMI_cluster_size 2 | 1 TRAV12-1 TRAJ17 CVVAKAAGNKLTF 1 CGGAAGGAGGTGGAGCAGGATCCTGGACCCTTCAATGTTCCAGAGGGAGCCACTGTCGCTTTCAACTGTACTTACAGCAACAGTGCTTCTCAGTCTTTCTTCTGGTACAGACAGGATTGCAGGAAAGAACCTAAGTTGCTGATGTCCGTATACTCCAGTGGTAATGAAGATGGAAGGTTTACAGCACAGCTCAATAGAGCCAGCCAGTATATTTCCCTGCTCATCAGAGACTCCAAGCTCAGTGATTCAGCCACCTACCTCTGTGTGGTGGCTAAAGCTGCAGGCAACAAGCTAACTTTTGGAGGAGGAACCAGGGTGCTAGTTAAACCAA TGTGTGGTGGCTAAAGCTGCAGGCAACAAGCTAACTTTT 3, 7, 4, 5, GCT F T RKEVEQDPGPFNVPEGATVAFNCTYSNSASQSFFWYRQDCRKEPKLLMSVYSSGNEDGRFTAQLNRASQYISLLIRDSKLSDSATYLCVVAKAAGNKLTFGGGTRVLVKP NSASQS VYSSGN T F T T 2 3 | 2 TRAV26-1 TRAJ22 CIVRGAFSGSARQLTF 1 GATGCTAAGACCACCCAGCCCCCCTCCATGGATTGCGCTGAAGGAAGAGCTGCAAACCTGCCTTGTAATCACTCTACCATCAGTGGAAATGAGTATGTGTATTGGTATCGACAGATTCACTCCCAGGGGCCACAGTATATCATTCATGGTCTAAAAAACAATGAAACCAATGAAATGGCCTCTCTGATCATCACAGAAGACAGAAAGTCCAGCACCTTGATCCTGCCCCACGCTACGCTGAGAGACACTGCTGTGTACTATTGCATCGTCAGGGGCGCCTTTTCTGGTTCTGCAAGGCAACTGACCTTTGGATCTGGGACACAATTGACTGTTTTACCTG TGCATCGTCAGGGGCGCCTTTTCTGGTTCTGCAAGGCAACTGACCTTT 20, 11, 5, 4, GGGCGCCTT F T DAKTTQPPSMDCAEGRAANLPCNHSTISGNEYVYWYRQIHSQGPQYIIHGLKNNETNEMASLIITEDRKSSTLILPHATLRDTAVYYCIVRGAFSGSARQLTFGSGTQLTVLP TISGNEY GLKNN T F T T 1 4 | 3 TRAV36/DV7 TRAJ38 CAVETLNAGNNRKLIW 1 GAAGACAAGGTGGTACAAAGCCCTCTATCTCTGGTTGTCCACGAGGGAGACACCGTAACTCTCAATTGCAGTTATGAAGTGACTAACTTTCGAAGCCTACTATGGTACAAGCAGGAAAAGAAAGCTCCCACATTTCTATTTATGCTAACTTCAAGTGGAATTGAAAAGAAGTCAGGAAGACTAAGTAGCATATTAGATAAGAAAGAACTTTCCAGCATCCTGAACATCACAGCCACCCAGACCGGAGACTCGGCCATCTACCTCTGTGCTGTGGAGACCTTAAATGCTGGCAACAACCGTAAGCTGATTTGGGGATTGGGAACAAGCCTGGCAGTAAATCCGA TGTGCTGTGGAGACCTTAAATGCTGGCAACAACCGTAAGCTGATTTGG 28, 26, 1, 1, ACCTTA F T EDKVVQSPLSLVVHEGDTVTLNCSYEVTNFRSLLWYKQEKKAPTFLFMLTSSGIEKKSGRLSSILDKKELSSILNITATQTGDSAIYLCAVETLNAGNNRKLIWGLGTSLAVNP VTNFRS LTSSGIE T F T T 1 5 | 4 TRAV21 TRAJ48 1 AAACAGGAGGTGACGCAGATTCCTGCAGCTCTGAGTGTCCCAGAAGGAGAAAACTTGGTTCTCAACTGCAGTTTCACTGATAGCGCTATTTACAACCTCCAGTGGTTTAGGCAGGACCCTGGGAAAGGTCTCACATCTCTGTTGCTTATTCAGTCAAGTCAGAGAGAGCAAACAAGTGGAAGACTTAATGCCTCGCTGGATAAATCATCAGGACGTAGTACTTTATACATTGCAGCTTCTCAGCCTGGTGACTCAGCCACCTACCTCTGTGCTGTTTGAAATGAGAAATTAACCTTTGGGACTGGAACAAGACTCACCATCATACCCA 15, 37, 4, 12, TT F F KQEVTQIPAALSVPEGENLVLNCSFTDSAIYNLQWFRQDPGKGLTSLLLIQSSQREQTSGRLNASLDKSSGRSTLYIAASQPGDSATYLCAV*NEKLTFGTGTRLTIIP T T T T 1 6 | 5 TRAV1-2 TRAJ33 CAVMDSNYQLIW 1 GGACAAAACATTGACCAGCCCACTGAGATGACAGCTACGGAAGGTGCCATTGTCCAGATCAACTGCACGTACCAGACATCTGGGTTCAACGGGCTGTTCTGGTACCAGCAACATGCTGGCGAAGCACCCACATTTCTGTCTTACAATGTTCTGGATGGTTTGGAGGAGAAAGGTCGTTTTTCTTCATTCCTTAGTCGGTCTAAAGGGTACAGTTACCTCCTTTTGAAGGAGCTCCAGATGAAAGACTCTGCCTCTTACCTCTGTGCTGTGATGGATAGCAACTATCAGTTAATCTGGGGCGCTGGGACCAAGCTAATTATAAAGCCAG TGTGCTGTGATGGATAGCAACTATCAGTTAATCTGG 1, 22, 4, 0, F T GQNIDQPTEMTATEGAIVQINCTYQTSGFNGLFWYQQHAGEAPTFLSYNVLDGLEEKGRFSSFLSRSKGYSYLLLKELQMKDSASYLCAVMDSNYQLIWGAGTKLIIKP TSGFNG NVLDGL T F T T 2 7 | 6 TRAV1-2 TRAJ29 CAVRRLGGNTPLVF 1 GGACAAAACATTGACCAGCCCACTGAGATGACAGCTACGGAAGGTGCCATTGTCCAGATCAACTGCACGTACCAGACATCTGGGTTCAACGGGCTGTTCTGGTACCAGCAACATGCTGGCGAAGCACCCACATTTCTGTCTTACAATGTTCTGGATGGTTTGGAGGAGAAAGGTCGTTTTTCTTCATTCCTTAGTCGGTCTAAAGGGTACAGTTACCTCCTTTTGAAGGAGCTCCAGATGAAAGACTCTGCCTCTTACCTCTGTGCTGTGAGACGACTCGGAGGAAACACACCTCTTGTCTTTGGAAAGGGCACAAGACTTTCTGTGATTGCAA TGTGCTGTGAGACGACTCGGAGGAAACACACCTCTTGTCTTT 1, 17, 2, 7, CGACTCGG F T GQNIDQPTEMTATEGAIVQINCTYQTSGFNGLFWYQQHAGEAPTFLSYNVLDGLEEKGRFSSFLSRSKGYSYLLLKELQMKDSASYLCAVRRLGGNTPLVFGKGTRLSVIA TSGFNG NVLDGL T F T T 1 8 | 7 TRAV14/DV4 TRAJ12 CAMREGGGSSYKLIF 1 GCCCAGAAGATAACTCAAACCCAACCAGGAATGTTCGTGCAGGAAAAGGAGGCTGTGACTCTGGACTGCACATATGACACCAGTGATCCAAGTTATGGTCTATTCTGGTACAAGCAGCCCAGCAGTGGGGAAATGATTTTTCTTATTTATCAGGGGTCTTATGACCAGCAAAATGCAACAGAAGGTCGCTACTCATTGAATTTCCAGAAGGCAAGAAAATCCGCCAACCTTGTCATCTCCGCTTCACAACTGGGGGACTCAGCAATGTACTTCTGTGCAATGAGAGAGGGCGGGGGTAGCAGCTATAAATTGATCTTCGGGAGTGGGACCAGACTGCTGGTCAGGCCTG TGTGCAATGAGAGAGGGCGGGGGTAGCAGCTATAAATTGATCTTC 8, 2, 0, 7, CGGGGG F T AQKITQTQPGMFVQEKEAVTLDCTYDTSDPSYGLFWYKQPSSGEMIFLIYQGSYDQQNATEGRYSLNFQKARKSANLVISASQLGDSAMYFCAMREGGGSSYKLIFGSGTRLLVRP TSDPSYG QGSYDQQN T F T T 1 9 | 8 TRAV23/DV6 TRAJ23 CAASMNQGGKLIF 1 GGCCAACAGAAGGAGAAAAGTGACCAGCAGCAGGTGAAACAAAGTCCTCAATCTTTGATAGTCCAGAAAGGAGGGATTTCAATTATAAACTGTGCTTATGAGAACACTGCGTTTGACTACTTTCCATGGTACCAACAATTCCCTGGGAAAGGCCCTGCATTATTGATAGCCATACGTCCAGATGTGAGTGAAAAGAAAGAAGGAAGATTCACAATCTCCTTCAATAAAAGTGCCAAGCAGTTCTCATTGCATATCATGGATTCCCAGCCTGGAGACTCAGCCACCTACTTCTGTGCAGCAAGCATGAACCAGGGAGGAAAGCTTATCTTCGGACAGGGAACGGAGTTATCTGTGAAACCCA TGTGCAGCAAGCATGAACCAGGGAGGAAAGCTTATCTTC 17, 12, 0, 8, TG F T GQQKEKSDQQQVKQSPQSLIVQKGGISIINCAYENTAFDYFPWYQQFPGKGPALLIAIRPDVSEKKEGRFTISFNKSAKQFSLHIMDSQPGDSATYFCAASMNQGGKLIFGQGTELSVKP NTAFDY IRPDVSE T F T T 1 10 | 9 TRAV8-2 TRAJ26 CVVSRNYGQNFVF 1 GCCCAGTCGGTGACCCAGCTTGACAGCCACGTCTCTGTCTCTGAAGGAACCCCGGTGCTGCTGAGGTGCAACTACTCATCTTCTTATTCACCATCTCTCTTCTGGTATGTGCAACACCCCAACAAAGGACTCCAGCTTCTCCTGAAGTACACATCAGCGGCCACCCTGGTTAAAGGCATCAACGGTTTTGAGGCTGAATTTAAGAAGAGTGAAACCTCCTTCCACCTGACGAAACCCTCAGCCCATATGAGCGACGCGGCTGAGTACTTCTGTGTTGTGAGCCGGAACTATGGTCAGAATTTTGTCTTTGGTCCCGGAACCAGATTGTCCGTGCTGCCCT TGTGTTGTGAGCCGGAACTATGGTCAGAATTTTGTCTTT 39, 14, 3, 5, CCGG F T AQSVTQLDSHVSVSEGTPVLLRCNYSSSYSPSLFWYVQHPNKGLQLLLKYTSAATLVKGINGFEAEFKKSETSFHLTKPSAHMSDAAEYFCVVSRNYGQNFVFGPGTRLSVLP SSYSPS YTSAATLV T F T T 2 11 | 10 TRAV2 TRAJ17 CAVEDRARGIKAAGNKLTF 1 AAGGACCAAGTGTTTCAGCCTTCCACAGTGGCATCTTCAGAGGGAGCTGTGGTGGAAATCTTCTGTAATCACTCTGTGTCCAATGCTTACAACTTCTTCTGGTACCTTCACTTCCCGGGATGTGCACCAAGACTCCTTGTTAAAGGCTCAAAGCCTTCTCAGCAGGGACGATACAACATGACCTATGAACGGTTCTCTTCATCGCTGCTCATCCTCCAGGTGCGGGAGGCAGATGCTGCTGTTTACTACTGTGCTGTGGAGGATCGGGCGAGGGGGATCAAAGCTGCAGGCAACAAGCTAACTTTTGGAGGAGGAACCAGGGTGCTAGTTAAACCAA TGTGCTGTGGAGGATCGGGCGAGGGGGATCAAAGCTGCAGGCAACAAGCTAACTTTT 13, 7, 0, 1, TCGGGCGAGGGG F T KDQVFQPSTVASSEGAVVEIFCNHSVSNAYNFFWYLHFPGCAPRLLVKGSKPSQQGRYNMTYERFSSSLLILQVREADAAVYYCAVEDRARGIKAAGNKLTFGGGTRVLVKP VSNAYN GSKP T F T T 2 12 | 11 TRAV9-2 TRAJ43 CALSELNDMRF 1 GGAAATTCAGTGACCCAGATGGAAGGGCCAGTGACTCTCTCAGAAGAGGCCTTCCTGACTATAAACTGCACGTACACAGCCACAGGATACCCTTCCCTTTTCTGGTATGTCCAATATCCTGGAGAAGGTCTACAGCTCCTCCTGAAAGCCACGAAGGCTGATGACAAGGGAAGCAACAAAGGTTTTGAAGCCACATACCGTAAAGAAACCACTTCTTTCCACTTGGAGAAAGGCTCAGTTCAAGTGTCAGACTCAGCGGTGTACTTCTGTGCTCTGAGTGAGCTCAATGACATGCGCTTTGGAGCAGGGACCAGACTGACAGTAAAACCAA TGTGCTCTGAGTGAGCTCAATGACATGCGCTTT 43, 32, 0, 7, GCT F T GNSVTQMEGPVTLSEEAFLTINCTYTATGYPSLFWYVQYPGEGLQLLLKATKADDKGSNKGFEATYRKETTSFHLEKGSVQVSDSAVYFCALSELNDMRFGAGTRLTVKP ATGYPS ATKADDK T F T T 1 13 | 12 TRAV24 TRAJ28 CAPPAPGAGSYQLTF 1 ATACTGAACGTGGAACAAAGTCCTCAGTCACTGCATGTTCAGGAGGGAGACAGCACCAATTTCACCTGCAGCTTCCCTTCCAGCAATTTTTATGCCTTACACTGGTACAGATGGGAAACTGCAAAAAGCCCCGAGGCCTTGTTTGTAATGACTTTAAATGGGGATGAAAAGAAGAAAGGACGAATAAGTGCCACTCTTAATACCAAGGAGGGTTACAGCTATTTGTACATCAAAGGATCCCAGCCTGAAGACTCAGCCACATACCTCTGTGCCCCTCCGGCCCCTGGGGCTGGGAGTTACCAACTCACTTTCGGGAAGGGGACCAAACTCTCGGTCATACCAA TGTGCCCCTCCGGCCCCTGGGGCTGGGAGTTACCAACTCACTTTC 18, 16, 4, 6, CCTCCGGCCC F T ILNVEQSPQSLHVQEGDSTNFTCSFPSSNFYALHWYRWETAKSPEALFVMTLNGDEKKKGRISATLNTKEGYSYLYIKGSQPEDSATYLCAPPAPGAGSYQLTFGKGTKLSVIP SSNFYA MTLNGDE T F T T 1 14 | 13 TRAV4 TRAJ7 CLVGDPYNRLAF 1 CTTGCTAAGACCACCCAGCCCATCTCCATGGACTCATATGAAGGACAAGAAGTGAACATAACCTGTAGCCACAACAACATTGCTACAAATGATTATATCACGTGGTACCAACAGTTTCCCAGCCAAGGACCACGATTTATTATTCAAGGATACAAGACAAAAGTTACAAACGAAGTGGCCTCCCTGTTTATCCCTGCCGACAGAAAGTCCAGCACTCTGAGCCTGCCCCGGGTTTCCCTGAGCGACACTGCTGTGTACTACTGCCTCGTGGGTGATCCTTACAACAGACTCGCTTTTGGGAAGGGGAACCAAGTGGTGGTCATACCAA TGCCTCGTGGGTGATCCTTACAACAGACTCGCTTTT 32, 47, 2, 11, TCCTT F T LAKTTQPISMDSYEGQEVNITCSHNNIATNDYITWYQQFPSQGPRFIIQGYKTKVTNEVASLFIPADRKSSTLSLPRVSLSDTAVYYCLVGDPYNRLAFGKGNQVVVIP NIATNDY GYKTK T F T T 1 15 | 14 TRAV1-2 TRAJ34 CAVRYNTDKLIF 1 GGACAAAACATTGACCAGCCCACTGAGATGACAGCTACGGAAGGTGCCATTGTCCAGATCAACTGCACGTACCAGACATCTGGGTTCAACGGGCTGTTCTGGTACCAGCAACATGCTGGCGAAGCACCCACATTTCTGTCTTACAATGTTCTGGATGGTTTGGAGGAGAAAGGTCGTTTTTCTTCATTCCTTAGTCGGTCTAAAGGGTACAGTTACCTCCTTTTGAAGGAGCTCCAGATGAAAGACTCTGCCTCTTACCTCTGTGCTGTGAGGTATAACACCGACAAGCTCATCTTTGGGACTGGGACCAGATTACAAGTCTTTCCAA TGTGCTGTGAGGTATAACACCGACAAGCTCATCTTT 1, 23, 3, 3, G F T GQNIDQPTEMTATEGAIVQINCTYQTSGFNGLFWYQQHAGEAPTFLSYNVLDGLEEKGRFSSFLSRSKGYSYLLLKELQMKDSASYLCAVRYNTDKLIFGTGTRLQVFP TSGFNG NVLDGL T F T T 1 16 | 15 TRAV21 TRAJ47 CAAVEYGNKLVF 1 AAACAGGAGGTGACGCAGATTCCTGCAGCTCTGAGTGTCCCAGAAGGAGAAAACTTGGTTCTCAACTGCAGTTTCACTGATAGCGCTATTTACAACCTCCAGTGGTTTAGGCAGGACCCTGGGAAAGGTCTCACATCTCTGTTGCTTATTCAGTCAAGTCAGAGAGAGCAAACAAGTGGAAGACTTAATGCCTCGCTGGATAAATCATCAGGACGTAGTACTTTATACATTGCAGCTTCTCAGCCTGGTGACTCAGCCACCTACCTCTGTGCTGCGGTGGAATATGGAAACAAACTGGTCTTTGGCGCAGGAACCATTCTGAGAGTCAAGTCCT TGTGCTGCGGTGGAATATGGAAACAAACTGGTCTTT 15, 36, 5, 0, CGG F T KQEVTQIPAALSVPEGENLVLNCSFTDSAIYNLQWFRQDPGKGLTSLLLIQSSQREQTSGRLNASLDKSSGRSTLYIAASQPGDSATYLCAAVEYGNKLVFGAGTILRVKS DSAIYN IQSSQRE T F T T 1 17 | 16 TRAV12-2 TRAJ17 CAVTGAAGNKLTF 1 CAGAAGGAGGTGGAGCAGAATTCTGGACCCCTCAGTGTTCCAGAGGGAGCCATTGCCTCTCTCAACTGCACTTACAGTGACCGAGGTTCCCAGTCCTTCTTCTGGTACAGACAATATTCTGGGAAAAGCCCTGAGTTGATAATGTTCATATACTCCAATGGTGACAAAGAAGATGGAAGGTTTACAGCACAGCTCAATAAAGCCAGCCAGTATGTTTCTCTGCTCATCAGAGACTCCCAGCCCAGTGATTCAGCCACCTACCTCTGTGCCGTGACAGGGGCTGCAGGCAACAAGCTAACTTTTGGAGGAGGAACCAGGGTGCTAGTTAAACCAA TGTGCCGTGACAGGGGCTGCAGGCAACAAGCTAACTTTT 4, 7, 3, 8, CAGGG F T QKEVEQNSGPLSVPEGAIASLNCTYSDRGSQSFFWYRQYSGKSPELIMFIYSNGDKEDGRFTAQLNKASQYVSLLIRDSQPSDSATYLCAVTGAAGNKLTFGGGTRVLVKP DRGSQS IYSNGD T F T T 1 18 | 17 TRAV9-2 TRAJ44 CALTRTGTASKLTF 1 GGAAATTCAGTGACCCAGATGGAAGGGCCAGTGACTCTCTCAGAAGAGGCCTTCCTGACTATAAACTGCACGTACACAGCCACAGGATACCCTTCCCTTTTCTGGTATGTCCAATATCCTGGAGAAGGTCTACAGCTCCTCCTGAAAGCCACGAAGGCTGATGACAAGGGAAGCAACAAAGGTTTTGAAGCCACATACCGTAAAGAAACCACTTCTTTCCACTTGGAGAAAGGCTCAGTTCAAGTGTCAGACTCAGCGGTGTACTTCTGTGCTCTGACCAGAACCGGCACTGCCAGTAAACTCACCTTTGGGACTGGAACAAGACTTCAGGTCACGCTCG TGTGCTCTGACCAGAACCGGCACTGCCAGTAAACTCACCTTT 43, 33, 4, 5, CCAGA F T GNSVTQMEGPVTLSEEAFLTINCTYTATGYPSLFWYVQYPGEGLQLLLKATKADDKGSNKGFEATYRKETTSFHLEKGSVQVSDSAVYFCALTRTGTASKLTFGTGTRLQVTL ATGYPS ATKADDK T F T T 1 19 | 18 TRAV17 TRAJ6 CATDAGSYIPTF 1 AGTCAACAGGGAGAAGAGGATCCTCAGGCCTTGAGCATCCAGGAGGGTGAAAATGCCACCATGAACTGCAGTTACAAAACTAGTATAAACAATTTACAGTGGTATAGACAAAATTCAGGTAGAGGCCTTGTCCACCTAATTTTAATACGTTCAAATGAAAGAGAGAAACACAGTGGAAGATTAAGAGTCACGCTTGACACTTCCAAGAAAAGCAGTTCCTTGTTGATCACGGCTTCCCGGGCAGCAGACACTGCTTCTTACTTCTGTGCTACGGACGCTGGAAGCTACATACCTACATTTGGAAGAGGAACCAGCCTTATTGTTCATCCGT TGTGCTACGGACGCTGGAAGCTACATACCTACATTT 10, 46, 0, 10, CT F T SQQGEEDPQALSIQEGENATMNCSYKTSINNLQWYRQNSGRGLVHLILIRSNEREKHSGRLRVTLDTSKKSSSLLITASRAADTASYFCATDAGSYIPTFGRGTSLIVHP TSINN IRSNERE T F T T 1 20 | 19 TRAV29/DV5 TRAJ56 CAASGGTGANSKLTF 1 AGTCAACAGAAGAATGATGACCAGCAAGTTAAGCAAAATTCACCATCCCTGAGCGTCCAGGAAGGAAGAATTTCTATTCTGAACTGTGACTATACTAACAGCATGTTTGATTATTTCCTATGGTACAAAAAATACCCTGCTGAAGGTCCTACATTCCTGATATCTATAAGTTCCATTAAGGATAAAAATGAAGATGGAAGATTCACTGTCTTCTTAAACAAAAGTGCCAAGCACCTCTCTCTGCACATTGTGCCCTCCCAGCCTGGAGACTCTGCAGTGTACTTCTGTGCAGCAAGCGGGGGCACTGGAGCCAATAGTAAGCTGACATTTGGAAAAGGAATAACTCTGAGTGTTAGACCAG TGTGCAGCAAGCGGGGGCACTGGAGCCAATAGTAAGCTGACATTT 23, 44, 0, 4, GGGGC F T SQQKNDDQQVKQNSPSLSVQEGRISILNCDYTNSMFDYFLWYKKYPAEGPTFLISISSIKDKNEDGRFTVFLNKSAKHLSLHIVPSQPGDSAVYFCAASGGTGANSKLTFGKGITLSVRP NSMFDY ISSIKDK T F T T 1 21 | 20 TRAV25 TRAJ8 CAGPSTGFQKLVF 1 GGACAACAGGTAATGCAAATTCCTCAGTACCAGCATGTACAAGAAGGAGAGGACTTCACCACGTACTGCAATTCCTCAACTACTTTAAGCAATATACAGTGGTATAAGCAAAGGCCTGGTGGACATCCCGTTTTTTTGATACAGTTAGTGAAGAGTGGAGAAGTGAAGAAGCAGAAAAGACTGACATTTCAGTTTGGAGAAGCAAAAAAGAACAGCTCCCTGCACATCACAGCCACCCAGACTACAGATGTAGGAACCTACTTCTGTGCAGGGCCGTCGACAGGCTTTCAGAAACTTGTATTTGGAACTGGCACCCGACTTCTGGTCAGTCCAA TGTGCAGGGCCGTCGACAGGCTTTCAGAAACTTGTATTT 19, 48, 0, 5, CCGTCG F T GQQVMQIPQYQHVQEGEDFTTYCNSSTTLSNIQWYKQRPGGHPVFLIQLVKSGEVKKQKRLTFQFGEAKKNSSLHITATQTTDVGTYFCAGPSTGFQKLVFGTGTRLLVSP TTLSN LVKSGEV T F T T 1 22 | 21 TRAV5 TRAJ20 CAATPTNDYKLSF 1 GGAGAGGATGTGGAGCAGAGTCTTTTCCTGAGTGTCCGAGAGGGAGACAGCTCCGTTATAAACTGCACTTACACAGACAGCTCCTCCACCTACTTATACTGGTATAAGCAAGAACCTGGAGCAGGTCTCCAGTTGCTGACGTATATTTTTTCAAATATGGACATGAAACAAGACCAAAGACTCACTGTTCTATTGAATAAAAAGGATAAACATCTGTCTCTGCGCATTGCAGACACCCAGACTGGGGACTCAGCTATCTACTTCTGTGCAGCCACACCTACTAACGACTACAAGCTCAGCTTTGGAGCCGGAACCACAGTAACTGTAAGAGCAA TGTGCAGCCACACCTACTAACGACTACAAGCTCAGCTTT 35, 9, 6, 3, CCACACCTA F T GEDVEQSLFLSVREGDSSVINCTYTDSSSTYLYWYKQEPGAGLQLLTYIFSNMDMKQDQRLTVLLNKKDKHLSLRIADTQTGDSAIYFCAATPTNDYKLSFGAGTTVTVRA DSSSTY IFSNMDM T F T T 1 23 | 22 TRAV17 TRAJ5 CATDGGDTGRRALTF 1 AGTCAACAGGGAGAAGAGGATCCTCAGGCCTTGAGCATCCAGGAGGGTGAAAATGCCACCATGAACTGCAGTTACAAAACTAGTATAAACAATTTACAGTGGTATAGACAAAATTCAGGTAGAGGCCTTGTCCACCTAATTTTAATACGTTCAAATGAAAGAGAGAAACACAGTGGAAGATTAAGAGTCACGCTTGACACTTCCAAGAAAAGCAGTTCCTTGTTGATCACGGCTTCCCGGGCAGCAGACACTGCTTCTTACTTCTGTGCTACGGACGGGGGGGACACGGGCAGGAGAGCACTTACTTTTGGGAGTGGAACAAGACTCCAAGTGCAACCAA TGTGCTACGGACGGGGGGGACACGGGCAGGAGAGCACTTACTTTT 10, 39, 0, 1, GGGG F T SQQGEEDPQALSIQEGENATMNCSYKTSINNLQWYRQNSGRGLVHLILIRSNEREKHSGRLRVTLDTSKKSSSLLITASRAADTASYFCATDGGDTGRRALTFGSGTRLQVQP TSINN IRSNERE T F T T 1 24 | 23 TRAV12-1 TRAJ21 CVVNGYSFNKFYF 1 CGGAAGGAGGTGGAGCAGGATCCTGGACCCTTCAATGTTCCAGAGGGAGCCACTGTCGCTTTCAACTGTACTTACAGCAACAGTGCTTCTCAGTCTTTCTTCTGGTACAGACAGGATTGCAGGAAAGAACCTAAGTTGCTGATGTCCGTATACTCCAGTGGTAATGAAGATGGAAGGTTTACAGCACAGCTCAATAGAGCCAGCCAGTATATTTCCCTGCTCATCAGAGACTCCAAGCTCAGTGATTCAGCCACCTACCTCTGTGTGGTGAACGGATACAGCTTCAACAAATTTTACTTTGGATCTGGGACCAAACTCAATGTAAAACCAA TGTGTGGTGAACGGATACAGCTTCAACAAATTTTACTTT 3, 10, 1, 5, GGATACAG F T RKEVEQDPGPFNVPEGATVAFNCTYSNSASQSFFWYRQDCRKEPKLLMSVYSSGNEDGRFTAQLNRASQYISLLIRDSKLSDSATYLCVVNGYSFNKFYFGSGTKLNVKP NSASQS VYSSGN T F T T 1 25 | 24 TRAV38-2/DV8 TRAJ45 CAYGLYSGGGADGLTF 1 GCTCAGACAGTCACTCAGTCTCAACCAGAGATGTCTGTGCAGGAGGCAGAGACCGTGACCCTGAGCTGCACATATGACACCAGTGAGAGTGATTATTATTTATTCTGGTACAAGCAGCCTCCCAGCAGGCAGATGATTCTCGTTATTCGCCAAGAAGCTTATAAGCAACAGAATGCAACAGAGAATCGTTTCTCTGTGAACTTCCAGAAAGCAGCCAAATCCTTCAGTCTCAAGATCTCAGACTCACAGCTGGGGGATGCCGCGATGTATTTCTGTGCTTACGGTTTGTATTCAGGAGGAGGTGCTGACGGACTCACCTTTGGCAAAGGGACTCATCTAATCATCCAGCCCT TGTGCTTACGGTTTGTATTCAGGAGGAGGTGCTGACGGACTCACCTTT 30, 34, 8, 0, CGGTT F T AQTVTQSQPEMSVQEAETVTLSCTYDTSESDYYLFWYKQPPSRQMILVIRQEAYKQQNATENRFSVNFQKAAKSFSLKISDSQLGDAAMYFCAYGLYSGGGADGLTFGKGTHLIIQP TSESDYY QEAYKQQN T F T T 1 26 | 25 TRAV2 TRAJ36 CAVEEGVGTGANNLFF 1 AAGGACCAAGTGTTTCAGCCTTCCACAGTGGCATCTTCAGAGGGAGCTGTGGTGGAAATCTTCTGTAATCACTCTGTGTCCAATGCTTACAACTTCTTCTGGTACCTTCACTTCCCGGGATGTGCACCAAGACTCCTTGTTAAAGGCTCAAAGCCTTCTCAGCAGGGACGATACAACATGACCTATGAACGGTTCTCTTCATCGCTGCTCATCCTCCAGGTGCGGGAGGCAGATGCTGCTGTTTACTACTGTGCTGTGGAGGAGGGGGTAGGAACTGGGGCAAACAACCTCTTCTTTGGGACTGGAACGAGACTCACCGTTATTCCCT TGTGCTGTGGAGGAGGGGGTAGGAACTGGGGCAAACAACCTCTTCTTT 13, 24, 0, 3, GGGGGTAGG F T KDQVFQPSTVASSEGAVVEIFCNHSVSNAYNFFWYLHFPGCAPRLLVKGSKPSQQGRYNMTYERFSSSLLILQVREADAAVYYCAVEEGVGTGANNLFFGTGTRLTVIP VSNAYN GSKP T F T T 1 27 | 26 TRAV29/DV5 TRAJ52 CAASSGRAGGTSYGKLTF 1 AGTCAACAGAAGAATGATGACCAGCAAGTTAAGCAAAATTCACCATCCCTGAGCGTCCAGGAAGGAAGAATTTCTATTCTGAACTGTGACTATACTAACAGCATGTTTGATTATTTCCTATGGTACAAAAAATACCCTGCTGAAGGTCCTACATTCCTGATATCTATAAGTTCCATTAAGGATAAAAATGAAGATGGAAGATTCACTGTCTTCTTAAACAAAAGTGCCAAGCACCTCTCTCTGCACATTGTGCCCTCCCAGCCTGGAGACTCTGCAGTGTACTTCTGTGCAGCAAGCTCGGGACGCGCTGGTGGTACTAGCTATGGAAAGCTGACATTTGGACAAGGGACCATCTTGACTGTCCATCCAA TGTGCAGCAAGCTCGGGACGCGCTGGTGGTACTAGCTATGGAAAGCTGACATTT 23, 41, 1, 5, TCGGGACGC F T SQQKNDDQQVKQNSPSLSVQEGRISILNCDYTNSMFDYFLWYKKYPAEGPTFLISISSIKDKNEDGRFTVFLNKSAKHLSLHIVPSQPGDSAVYFCAASSGRAGGTSYGKLTFGQGTILTVHP NSMFDY ISSIKDK T F T T 1 28 | -------------------------------------------------------------------------------- /tests/resources/dcr_TINY_1_beta.freq: -------------------------------------------------------------------------------- 1 | 15, 10, 4, 1, CTACCCCCGCGGGGAC, 2, 3 2 | 15, 10, 4, 1, CTACCCCCGCAAAGAC, 1, 1 3 | 43, 0, 5, 6, GGAGGGACAG, 1, 2 4 | 15, 6, 3, 9, CCTAGCGGAATACTCCTACAC, 1, 1 5 | 9, 6, 4, 0, CTCACGGGGGGTT, 1, 1 6 | 0, 7, 6, 8, CCGGGACTAGCGGGTGA, 1, 1 7 | 26, 12, 0, 0, AGCCAGGGGT, 1, 1 8 | 24, 8, 6, 2, TCCTAGCGGCACCTTTTC, 1, 1 9 | 20, 12, 2, 0, AGGGGCAG, 1, 1 10 | 27, 11, 4, 2, ACCGATG, 1, 1 11 | 31, 12, 7, 3, CCGAGACGACTAGCGGGAG, 1, 1 12 | 14, 10, 7, 5, AACGGACGGGAGGGAAGCCGAGC, 1, 1 13 | 22, 0, 5, 4, GAACAGGGGGC, 1, 1 14 | 4, 6, 0, 4, TGGGCAGCGGGTGTTTAAT, 1, 1 15 | 37, 3, 2, 12, ATCTGGGGGACAGCAAAGG, 1, 1 16 | 20, 6, 1, 10, CCGCAGGGGGCCGC, 1, 1 17 | 43, 8, 3, 0, GGGGAGGACTAGC, 1, 2 18 | 20, 11, 4, 2, CTCTCCAGG, 1, 2 19 | 15, 12, 5, 3, CGACTAGCGGGAGA, 1, 1 20 | 33, 6, 5, 2, CCCGGGACAGGAG, 1, 1 21 | 44, 3, 4, 0, CCCCGCGCGG, 1, 1 22 | 29, 6, 0, 5, GTCTAGCGGGAC, 1, 1 23 | 14, 7, 3, 9, GGTGACTAGCC, 1, 1 24 | 26, 6, 4, 7, CCAGGACTAGCACCGAGG, 1, 1 25 | 15, 4, 4, 0, ATCTAGGGGCCGA, 1, 1 26 | 9, 10, 5, 2, TCGACCGGGGCG, 1, 1 27 | 21, 12, 9, 0, TACCCTTAGCGGGAG, 1, 1 28 | 43, 10, 1, 2, GCGCCATGCAA, 1, 1 29 | 44, 0, 2, 1, CAGGGAATC, 1, 1 30 | 2, 10, 0, 1, GAGGTTTGGACACAC, 1, 1 31 | 31, 12, 3, 3, GAGCGGGAGGGT, 1, 1 32 | 19, 6, 9, 3, TGGACTAGCGGA, 1, 1 33 | 6, 6, 1, 7, GTCTGCTT, 1, 1 34 | 19, 12, 5, 6, GAACTTACGGGGCGCGG, 1, 1 35 | 40, 6, 5, 8, CAAACCATGAGTGGCTCACAGGCCC, 1, 1 36 | 17, 10, 1, 5, GGGGAGGGGGCCAG, 1, 1 37 | -------------------------------------------------------------------------------- /tests/resources/dcr_TINY_1_beta.n12: -------------------------------------------------------------------------------- 1 | 15, 10, 4, 1, CTACCCCCGCGGGGAC, A00261:687:HMMCCDSX5:3:1101:8684:1125, ATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTACTACCCCCGCGGGGACCCAAGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCT, FFFF:FFFF:FFFFFF:,FFF:FFFFFFFFFFFFFFF::FFFFFFFFFFFFFFFF,FFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTGCTAAGTCGTGATTTTTAT, FFFFF:FFFF:FFFFFFFFFFFFFFFFFFFFFFFFF,FFFFF 2 | 15, 10, 4, 1, CTACCCCCGCGGGGAC, A00261:687:HMMCCDSX5:4:1101:8684:1125, ATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTACTACCCCCGCGGGGACCCAAGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCT, FFFF:FFFF:FFFFFF:,FFF:FFFFFFFFFFFFFFF::FFFFFFFFFFFFFFFF,FFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTGCTAAGTCGTGATTTTTAT, FFFFF:FFFF:FFFFFFFFFFFFFFFFFFFFFFFFF,FFFFF 3 | 15, 10, 4, 1, CTACCCCCGCGGAGAC, A00261:687:HMMCCDSX5:5:1101:8684:1125, ATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTACTACCCCCGCGGAGACCCAAGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCT, FFFF:FFFF:FFFFFF:,FFF:FFFFFFFFFFFFFFF::FFFFFFFFFFFFFFFF,FFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTGCCAAGTCGTGATTTTTAT, FFFFF:FFFF:FFFFFFFFFFFFFFFFFFFFFFFFF,FFFFF 4 | 15, 10, 4, 1, CTACCCCCGCGAAGAC, A00261:687:HMMCCDSX5:5:1101:8684:1125, ATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTACTACCCCCGCGAAGACCCAAGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCT, FFFF:FFFF:FFFFFF:,FFF:FFFFFFFFFFFFFFF::FFFFFFFFFFFFFFFF,FFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTGCACAGTCGTGATTTTTAT, FFFFF:FFFF:FFFFFFFFFFFFFFFFFFFFFFFFF,FFFFF 5 | 15, 10, 4, 1, CTACCCCCGCAAAGAC, A00261:687:HMMCCDSX5:5:1101:8684:1125, ATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTACTACCCCCGCAAAGACCCAAGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCT, FFFF:FFFF:FFFFFF:,FFF:FFFFFFFFFFFFFFF::FFFFFFFFFFFFFFFF,FFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTGCGGGGTCGTGATTTTTAT, FFFFF:FFFF:FFFFFFFFFFFFFFFFFFFFFFFFF,FFFFF 6 | 15, 10, 4, 1, CTACCCCCGCGGGGAC, A00261:687:HMMCCDSX5:6:1101:8684:1125, ATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTACTACCCCCGCGGGGACCCAAGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCT, FFFF:FFFF:FFFFFF:,FFF:FFFFFFFFFFFFFFF::FFFFFFFFFFFFFFFF,FFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTCGATAGTCGTGATTTTTAT, FFFFF:FFFF:FFFFFFFFFFFFFFFFFFFFFFFFF,FFFFF 7 | 15, 10, 3, 1, CGGGGACCTACCCCC, A00261:687:HMMCCDSX5:7:1101:8684:1125, ATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTAGCGGGGACCTACCCCCCCAAGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCT, FFFF:FFFF:FFFFFF:,FFF:FFFFFFFFFFFFFFF::FFFFFFFFFFFFFFFF,FFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTGCTAAGTCGTGATTTTTAT, FFFFF:FFFF:FFFFFFFFFFFFFFFFFFFFFFFFF,FFFFF 8 | 43, 0, 5, 6, GGAGGGACAG, A00261:687:HMMCCDSX5:3:1101:29604:1125, GAGCAGGGGGACTCGGCCATGTATCTCTGTGCCAGCAGCGGAGGGACAGCTGAAGCTTTCTTTGGACAAGGCACCAGACTCAC, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGCATTCTGTCGTGATTAAAAT, FFFFFFFFFFFFFFFFF:FFFF:FFFFFFF:FF:F:FFFFFF 9 | 15, 6, 3, 9, CCTAGCGGAATACTCCTACAC, A00261:687:HMMCCDSX5:3:1101:17300:1141, ATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTAGCCTAGCGGAATACTCCTACACTGAGCAGTTCTTCGGGCCAGGGACACGGCTC, FFFFFFFF:FF:FFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,FF::FFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGGTTGCCGTCGTGATATTAAT, FFFFFFFF:FFFFFFFFFFFFFF:FF::FFFF,F,FFFFFFF 10 | 9, 6, 4, 0, CTCACGGGGGGTT, A00261:687:HMMCCDSX5:3:1101:26793:1141, TGGAGGATTCTGGAGTTTATTTCTGTGCCAGCAGCCCTCACGGGGGGTTCTCCTACAATGAGCAGTTCTTCGGGCCAGGGACACGGCTC, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTGTAATGTCGTGATTTCAAC, FFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFF: 11 | 0, 7, 6, 8, CCGGGACTAGCGGGTGA, A00261:687:HMMCCDSX5:3:1101:28854:1141, CCTCCTCCCCGACATCTGTATATTTCTGCGCCAGCAGCCGGGACTAGCGGGTGAGGGGAGCTGTTTTTTGGAGAAGGCTCTAGGCT, FFFFF,FF:,,FF:,,,,FF:FF,,:,FFFFF:FFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGCCTTTCGTCGTGATATCCAT, FFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFF 12 | 26, 12, 0, 0, AGCCAGGGGT, A00261:687:HMMCCDSX5:3:1101:22480:1157, CTGGGGGACTCGGCCCTTTATCTTTGCGCCAGCAGCTTGGAGCCAGGGGTCTCCTACGAGCAGTACTTCGGGCCGGGCACCAGGCTCACG, FFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTCCGTTGTCGTGATCCTTAT, :FFFFFFFFFFFFFFFFFFFFFFFFF::FFFFFF:FFFFFFF 13 | 24, 8, 6, 2, TCCTAGCGGCACCTTTTC, A00261:687:HMMCCDSX5:3:1101:28926:1172, AGAAGACTCGGCCCTGTATCTCTGTGCCAGCAGTCCTAGCGGCACCTTTTCCACAGATACGCAGTATTTTGGCCCAGGCACCCGGCTGA, FFFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTTGGGTGTCGTGATCCTTTG, FFFFFFFFFFF,FFFFFFFF,FFF:FFFFFFFFFFF:F:FFF 14 | 15, 3, 3, 7, GGGTTTTGGGGCAGTTAG, A00261:687:HMMCCDSX5:3:1101:23737:1266, ATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTAGGGGTTTTGGGGCAGTTAGTGAAAAACTGTTTTTTGGCAGTGGAACCCAGCT, ::FFFFF,F:F:FF:FF:FF,FF:FFFFFFF:FF::FFFFFF:FFFFFF:FF:FFFF::::FF::F::F,FFFF,F,:F:FFFFFFFF, GTCGTGACTGGGAAAACCCTGGTAGTCAGTCGTGATTGTCAT, F,FF,FFF:FFFF,FFFFF,FF:,FF,:FFFFFFF,,F:FF: 15 | 20, 12, 2, 0, AGGGGCAG, A00261:687:HMMCCDSX5:3:1101:29966:1282, GCCCTGAAGACAGCAGCATATATCTCTGCAGCGTTGAAAGGGGCAGCTCCTACGAGCAGTACTTCGGGCCGGGCACCAGGCTCACG, FFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGCCCTATGTCGTGATCTTTAC, FFFF:FFFFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFF 16 | 27, 11, 4, 2, ACCGATG, A00261:687:HMMCCDSX5:3:1101:20627:1297, CTGGACGACTCGGCCCTGTATCTCTGTGCCAGCAGCACCGATGCTGGGGCCAACGTCCTGACTTTCGGGGCCGGCAGCAGG, FFFFFFFFFFFFFFF:FFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGCTCGCCGTCGTGATGTTTAT, :FFFFFFFFFFFFFF:FFFFFFFFFFFFF,FF:FFFF:F:FF 17 | 43, 0, 5, 4, AGGGGGAC, A00261:687:HMMCCDSX5:3:1101:24641:1391, GAGCAGGGGGACTCGGCCATGTATCTCTGTGCCAGCAGCAGGGGGACCACTGAAGCTTTCTATGGACAAGGCACCAGACTCAC, F:FFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFF:FFF,FF::FFFF,FF:FF:F:F:,FFF,FFFFFFFFFF,FFF:F:, GTCGTGACTGGGAAAACCCTGGTCCCCAGTCGTGATTTTGTT, FFFFFFFFFFF:FFFFF:F:FF,F:FF,FFFFFF,:FFFFFF 18 | 31, 12, 7, 3, CCGAGACGACTAGCGGGAG, A00261:687:HMMCCDSX5:3:1101:5638:1423, GAGTCGGCTGCTCCCTCCCAAACATCTGTGTACTTCTGTGCCAGCACCGAGACGACTAGCGGGAGCTACGAGCAGTACTTCGGGCCGGGCACCAGGCTCACG, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTTTCGGTCGTGATATGAAAG, FFF:FFF:FFF:FFFFF,FFFFFFFFFFFFFFFF,FFFFFFF 19 | 14, 10, 7, 5, AACGGACGGGAGGGAAGCCGAGC, A00261:687:HMMCCDSX5:3:1101:24713:1423, TGGAGGACTCAGCCATGTACTTCTGTGCCAGCAAACGGACGGGAGGGAAGCCGAGCGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCT, FFFFFFFFFF:F:FFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGCCTGAGGTCGTGATCGATTG, FFFFFFFFFFFFFFFFFFFFFFFFFFF:FFF:FFFFFF:FFF 20 | 22, 0, 5, 4, GAACAGGGGGC, A00261:687:HMMCCDSX5:3:1101:24379:1438, TTCTCAGTGACTCTGGCTTCTATCTCTGTGCCTGGGAACAGGGGGCCACTGAAGCTTTCTTTGGACAAGGCACCAGACTCAC, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTGTACTGTCGTGATTCAGAT, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 21 | 4, 6, 0, 4, TGGGCAGCGGGTGTTTAAT, A00261:687:HMMCCDSX5:3:1101:8133:1454, AAGCTTGAGGACTCGGCCGTGTATCTCTGTGCCAGCAGCTTAGATGGGCAGCGGGTGTTTAATTACAATGAGCAGTTCTTCGGGCCAGGGACACGGCTC, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGAAAACCCTGGCCTTAAGTCGTGATTCATTGG, FFFFFFFFFFFFFFFFFFFFFFFF:F:FFFFFF,FFFFFFFF 22 | 37, 3, 2, 12, ATCTGGGGGACAGCAAAGG, A00261:687:HMMCCDSX5:3:1101:4869:1470, AGGAGGACTCGGCCGTGTATCTCTGTGCCAGCAGCTTAATCTGGGGGACAGCAAAGGAACTGTTTTTTGGCAGTGGAACCCAGCT, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFF:FFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGCTTTGAGTCGTGATCATAAG, FF:FFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFF:FFFFFF 23 | 20, 6, 1, 10, CCGCAGGGGGCCGC, A00261:687:HMMCCDSX5:3:1101:6777:1485, GCCCTGAAGACAGCAGCATATATCTCTGCAGCGTTGAAGCCGCAGGGGGCCGCGAGCAGTTCTTCGGGCCAGGGACACGGCTC, FFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGGTTATAGTCGTGATCAAATC, FFFFFF:FFF,FFF:FFFFFFFF:F:F::FFF:F,FFFF:FF 24 | 43, 8, 3, 0, GGGGAGGACTAGC, A00261:687:HMMCCDSX5:3:1101:31331:1485, GAGCAGGGGGACTCGGCCATGTATCTCTGTGCCAGCAGCTTGGGGAGGACTAGCAGCACAGATACGCAGTATTTTGGCCCAGGCACCCGGCTGA, FFFFFFFFFFFFFFFFFFFFF:,FFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFF:FFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGCCAAGGGTCGTGATCTCTAT, FFF:FFFFFFFFFFFFFFFFFFFF:FFFFF:FF:FFF:FF:F 25 | 20, 11, 4, 2, CTCTCCAGG, A00261:687:HMMCCDSX5:3:1101:18629:1501, GCCCTGAAGACAGCAGCATATATCTCTGCAGCGTTGCTCTCCAGGCTGGGGCCAACGTCCTGACTTTCGGGGCCGGCAGCAGG, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGCGTCGAGTCGTGATGTCATG, FFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF::,:F 26 | 43, 8, 3, 0, GGGGAGGACTAGC, A00261:687:HMMCCDSX5:3:1101:31448:1501, GAGCAGGGGGACTCGGCCATGTATCTCTGTGCCAGCAGCTTGGGGAGGACTAGCAGCACAGATACGCAGTATTTTGGCCCAGGCACCCGGCTGA, FFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGCCAAGGGTCGTGATCTCTAT, FFFFFFFFFFFFFFFFFF:FFFFFFFF:FF:F,FFFFFF:,F 27 | 15, 12, 5, 3, CGACTAGCGGGAGA, A00261:687:HMMCCDSX5:3:1101:1768:1517, ATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTCGACTAGCGGGAGACTACGAGCAGTACTTCGGGCCGGGCACCAGGCTCACG, FF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGCGTTGAGTCGTGATGGAGTG, FFFFFFFFFFFFFFFFFF:FFFFFFFF,FFFFFFFFFFFFF: 28 | 33, 6, 5, 2, CCCGGGACAGGAG, A00261:687:HMMCCDSX5:3:1101:9607:1532, CTGTCGGCTGCTCCCTCCCAGACATCTGTGTACTTCTGTGCCAGCAGTCCCGGGACAGGAGCCTACAATGAGCAGTTCTTCGGGCCAGGGACACGGCTC, FFFFFFFFFFFFFFFFFFFFFFFFFF:F:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGCATCTTGTCGTGATACCATA, FFFFFFFFFFFFFFFFFF:FFFFFFF,FFFFFFFFFFFFFFF 29 | 44, 3, 4, 0, CCCCGCGCGG, A00261:687:HMMCCDSX5:3:1101:30120:1548, CTGGGGGACTCAGCTTTGTATTTCTGTGCCAGCAGCCCCCGCGCGGCAACTAATGAAAAACTGTTTTTTGGCAGTGGAACCCAGCT, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTCCAAAGTCGTGATCTCTAG, FFFFFF:FFFFFFFFFFFFFFFFFFFF:FFFF:FFFFFFFFF 30 | 29, 6, 0, 5, GTCTAGCGGGAC, A00261:687:HMMCCDSX5:3:1101:7238:1564, GGGACTCGGCCCTCTATCTCTGTGCCAGCAGCTTGGGTCTAGCGGGACACAATGAGCAGTTCTTCGGGCCAGGGACACGGCTC, FFFFFFFFFFFF:FFFFFFFFF,FFFFFFFFFFFFFF:FFFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFF:FFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTCCTTTGTCGTATACCTACG, FFFF:FFFFFFFFF:FFFFFFFFFFFF:FFFFFFFFFF:FFF 31 | 14, 7, 3, 9, GGTGACTAGCC, A00261:687:HMMCCDSX5:3:1101:24777:1564, TGGAGGACTCAGCCATGTACTTCTGTGCCAGCAGTGAGGTGACTAGCCGGGAGCTGTTTTTTGGAGAAGGCTCTAGGCT, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTTCACTGTCGTGGATTCCTG, ,FFFFFFFF:FFFFF:FFFFFFFFFF:FFFFFF,FFFF:FFF 32 | 20, 11, 4, 2, CTCTCCAGG, A00261:687:HMMCCDSX5:3:1101:18484:1626, GCCCTGAAGACAGCAGCATATATCTCTGCAGCGTTGCTCTCCAGGCTGGGGCCAACGTCCTGACTTTCGGGGCCGGCAGCAGG, FFFFFFFFFFFFFFFFFFFF,:FFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGCGTCGAGTCGTGATGTCATG, FF,FFFFFFFFFF:FFFFFFFFFFFFFF:FFFFFFFFFFFF: 33 | 26, 6, 4, 7, CCAGGACTAGCACCGAGG, A00261:687:HMMCCDSX5:3:1101:11966:1642, CTGGGGGACTCGGCCCTTTATCTTTGCGCCAGCAGCCCAGGACTAGCACCGAGGAATGAGCAGTTCTTCGGGCCAGGGACACGGCTC, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTCCCCTGTCGTGATGAAGAT, FFFFFFFFFFFFFFF:FFFFFFFFFFFFFF:F:FF,FFFFFF 34 | 15, 4, 4, 0, ATCTAGGGGCCGA, A00261:687:HMMCCDSX5:3:1101:7283:1705, ATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTAATCTAGGGGCCGATAGCAATCAGCCCCAGCATTTTGGTGATGGGACTCGACTC, FFFFFFFF:FFFFFFFFFFFFFF:FFFFFFF:FFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGAATCTCGTCGTGATGGAGGC, :FFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFF 35 | 9, 10, 5, 2, TCGACCGGGGCG, A00261:687:HMMCCDSX5:3:1101:24975:1720, TGGAGGATTCTGGAGTTTATTTCTGTGCCAGCAGCTCGACCGGGGCGCAAGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCT, F,FFF:FFF:FFFFFFFFFFFF:FFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFF::FFFFFFFFFFFFFF:FFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTATTACGTCGTGATTTGCTT, FFFFFFFFFFFFFF::FFFFFFF:FFF::FFFF:FFFFF,FF 36 | 21, 12, 9, 0, TACCCTTAGCGGGAG, A00261:687:HMMCCDSX5:3:1101:26115:1783, TTGGTGACTCTGCTGTGTATTTCTGTGCCAGTACCCTTAGCGGGAGCTCCTACGAGCAGTACTTCGGGCCGGGCACCAGGCTCACG, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FF:FFFFFFFFFFFFFFFFF:F:FFFFFFFFFFFFFFFFFFFFFFFFFFF:FFF, GTCGTGACTGGGAAAACCCTGGCGTCATGTCGTGATTAAATG, FFFFFFFF,:FFFFFFFFFFF:FFFFF:FFFFFFFFFF:FFF 37 | 43, 10, 1, 2, GCGCCATGCAA, A00261:687:HMMCCDSX5:3:1101:26874:1783, GAGCAGGGGGACTCGGTCATGTATCTCTGTGCCAGCAGCTTAGGCGCCATGCAACAAGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCT, FFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFF, GTCATGACTGGGAAAACCCTGGCGTGGTCGTGATGTACAACC, FFF,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 38 | 44, 0, 2, 1, CAGGGAATC, A00261:687:HMMCCDSX5:3:1101:7862:1799, CTGGGGGACTCAGCTTTGTATTTCTGTGCCAGCAGCGTCAGGGAATCGAACACTGAAGCTTTCTTTGGACAAGGCACCAGACTCAC, FFFFFFFF:FFFFF:FFFFFFFFFFFFFFFFF:FFFFFF:FFFFFF:FF,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:F, GTCGTGACTGGGAAAACCCTGGCTCGTTGTCGTGATCGGAAC, FFF::FFFFFFFFFFFF,FFFFF:F::FFFFFFF:FFFFFFF 39 | 2, 10, 0, 1, GAGGTTTGGACACAC, A00261:687:HMMCCDSX5:3:1101:9579:1861, CCAGCTCCCAGACATCTGTGTACTTCTGTGCCATCAGTGAGTCGAGGTTTGGACACACCCAAGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCT, FFFFFFFFFFFFFFF,FFFF,FFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFF:F:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTAACCGGTCGTGATGGAGAG, F:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFF 40 | 31, 12, 3, 3, GAGCGGGAGGGT, A00261:687:HMMCCDSX5:3:1101:16396:1861, GAGTCGGCTGCTCCCTCCCAAACATCTGTGTACTTCTGTGCCAGCAGTGAGAGCGGGAGGGTCTACGAGCAGTACTTCGGGCCGGGCACCAGGCTCACG, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTTTGACGTCGTGATTGAATG, :FFFF:FFFFF:FFFFFFFF::FFFFFFFFFFF:FFFFFFFF 41 | 19, 6, 9, 3, TGGACTAGCGGA, A00261:687:HMMCCDSX5:3:1101:19027:1877, CCAACCAGACATCTATGTACCTCTGTGCCAGTGGACTAGCGGACTACAATGAGCAGTTCTTCGGGCCAGGGACACGGCTC, FFFFFFFFFF:FFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGCCACTCGTCGTGATTCTCTC, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:F 42 | 33, 12, 0, 0, GAGAGGCGTCGTAAG, A00261:687:HMMCCDSX5:3:1101:23095:1877, CTGTCGGCTGCTCCCTCCCAGACATCTGTGTACTTCTGTGCCAGCAGTTACTCGAGAGGCGTCGTAAGCTCCTACGAGCAGTACTTCGGGCCGGGCACCAGGCTCACG, FF:FFF:FF:FF:FFFFF:FFF,F::FFFFF:FFFFFFF:FFFFFFFFFFFF:FFF:F:F:FFFFFF:FF:FFFFFFFF:FFFFFF,FFF:FFFFFFFF:::FFF::F, GTCGTGACTGGGAAAACCCTGGGTTGTGGTGGCTCCCCTTTC, FFFF:,FF:FFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFF 43 | 6, 6, 1, 7, GTCTGCTT, A00261:687:HMMCCDSX5:3:1101:2546:1892, CCAGGGACTCAGCTGTGTACTTCTGTGCCAGCAGTTTAGGTCTGCTTAATGAGCAGTTCTTCGGGCCAGGGACACGGCTC, FFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGGTGAAAGTCGTGATGTTAAT, :FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFF 44 | 19, 12, 5, 6, GAACTTACGGGGCGCGG, A00261:687:HMMCCDSX5:3:1101:20437:1908, CCAACCAGACATCTATGTACCTCTGTGCCAGCAGTGAACTTACGGGGCGCGGCGAGCAGTACTTCGGGCCGGGCACCAGGCTCACG, FFFFFFFFFFFF:FFFFFFFFFFFF:FFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGCTTCGCGTCGTGATTTATGG, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 45 | 40, 6, 5, 8, CAAACCATGAGTGGCTCACAGGCCC, A00261:687:HMMCCDSX5:3:1101:5394:1971, GAGCAGCGGGACTCGGCCATGTATCGCTGTGCCAGCAGCCAAACCATGAGTGGCTCACAGGCCCATGAGCAGTTCTTCGGGCCAGGGACACGGCTC, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:F, GTCGTGACTGGGAAAACCCTGGCCCCCTGTCGTGATCAAATT, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFF 46 | 17, 10, 1, 5, GGGGAGGGGGCCAG, A00261:687:HMMCCDSX5:3:1101:24542:1971, CCTCACATACCTCTCAGTACCTCTGTGCCAGCAGTGAATGGGGAGGGGGCCAGGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCT, FFFFFF:F,FFF,F,:FFFFFFFFFFFFFFFFFFFF:FFFFFF:FF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGCCACATGTCGTGATTTCTTT, FFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFF: 47 | 43, 0, 5, 6, GGAGGGACAG, A00261:687:HMMCCDSX5:3:1101:30942:2002, GAGCAGGGGGACTCGGCCATGTATCTCTGTGCCAGCAGCGGAGGGACAGCTGAAGCTTTCTTTGGACAAGGCACCAGACTCAC, FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGCATTCTGTCGTGATTAAAAT, F,:FF:FFFFFFFFFFFFFFFF:FFFFFFFF:FFFFFFFFFF 48 | 20, 10, 3, 9, GTCCGACGTGGGACGGG, A00261:687:HMMCCDSX5:3:1101:26151:2033, GCCCTGAAGACAGCAGCATATATCTCTGCAGCGTTGAGTCCGACGTGGGACGGGCCCAGTACTTCGGGCCAGGCACGCGGCTCCT, FFFFFFFFFFFFFFFFF,FFFFFFF:FFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFFF, GTCGTGACTGGGAAAACCCTGGTTTTATGTCGTGATTTTAAC, FFFFFFFFFFFFF:FFFFFFFFF,F:,FFFFFFFFFFFFFFF 49 | -------------------------------------------------------------------------------- /tests/resources/dcr_TINY_1_beta.tsv: -------------------------------------------------------------------------------- 1 | sequence_id v_call d_call j_call junction_aa duplicate_count sequence junction decombinator_id rev_comp productive sequence_aa cdr1_aa cdr2_aa vj_in_frame stop_codon conserved_c conserved_f sequence_alignment germline_alignment v_cigar d_cigar j_cigar av_UMI_cluster_size 2 | 1 TRBV20-1 TRBJ2-5 CSATTPAGTQETQYF 2 GGTGCTGTCGTCTCTCAACATCCGAGCTGGGTTATCTGTAAGAGTGGAACCTCTGTGAAGATCGAGTGCCGTTCCCTGGACTTTCAGGCCACAACTATGTTTTGGTATCGTCAGTTCCCGAAACAGAGTCTCATGCTGATGGCAACTTCCAATGAGGGCTCCAAGGCCACATACGAGCAAGGCGTCGAGAAGGACAAGTTTCTCATCAACCATGCAAGCCTGACCTTGTCCACTCTGACAGTGACCAGTGCCCATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTACTACCCCCGCGGGGACCCAAGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCTGGTGCTCG TGCAGTGCTACTACCCCCGCGGGGACCCAAGAGACCCAGTACTTC 15, 10, 4, 1, CTACCCCCGCGGGGAC F T GAVVSQHPSWVICKSGTSVKIECRSLDFQATTMFWYRQFPKQSLMLMATSNEGSKATYEQGVEKDKFLINHASLTLSTLTVTSAHPEDSSFYICSATTPAGTQETQYFGPGTRLLVL DFQATT SNEGSKA T F T T 3 3 | 2 TRBV20-1 TRBJ2-5 CSATTPAKTQETQYF 1 GGTGCTGTCGTCTCTCAACATCCGAGCTGGGTTATCTGTAAGAGTGGAACCTCTGTGAAGATCGAGTGCCGTTCCCTGGACTTTCAGGCCACAACTATGTTTTGGTATCGTCAGTTCCCGAAACAGAGTCTCATGCTGATGGCAACTTCCAATGAGGGCTCCAAGGCCACATACGAGCAAGGCGTCGAGAAGGACAAGTTTCTCATCAACCATGCAAGCCTGACCTTGTCCACTCTGACAGTGACCAGTGCCCATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTACTACCCCCGCAAAGACCCAAGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCTGGTGCTCG TGCAGTGCTACTACCCCCGCAAAGACCCAAGAGACCCAGTACTTC 15, 10, 4, 1, CTACCCCCGCAAAGAC F T GAVVSQHPSWVICKSGTSVKIECRSLDFQATTMFWYRQFPKQSLMLMATSNEGSKATYEQGVEKDKFLINHASLTLSTLTVTSAHPEDSSFYICSATTPAKTQETQYFGPGTRLLVL DFQATT SNEGSKA T F T T 1 4 | 3 TRBV7-9 TRBJ1-1 CASSGGTAEAFF 1 GATACTGGAGTCTCCCAGAACCCCAGACACAAGATCACAAAGAGGGGACAGAATGTAACTTTCAGGTGTGATCCAATTTCTGAACACAACCGCCTTTATTGGTACCGACAGACCCTGGGGCAGGGCCCAGAGTTTCTGACTTACTTCCAGAATGAAGCTCAACTAGAAAAATCAAGGCTGCTCAGTGATCGGTTCTCTGCAGAGAGGCCTAAGGGATCTTTCTCCACCTTGGAGATCCAGCGCACAGAGCAGGGGGACTCGGCCATGTATCTCTGTGCCAGCAGCGGAGGGACAGCTGAAGCTTTCTTTGGACAAGGCACCAGACTCACAGTTGTAG TGTGCCAGCAGCGGAGGGACAGCTGAAGCTTTCTTT 43, 0, 5, 6, GGAGGGACAG F T DTGVSQNPRHKITKRGQNVTFRCDPISEHNRLYWYRQTLGQGPEFLTYFQNEAQLEKSRLLSDRFSAERPKGSFSTLEIQRTEQGDSAMYLCASSGGTAEAFFGQGTRLTVV SEHNR FQNEAQ T F T T 2 5 | 4 TRBV20-1 TRBJ2-1 CSASLAEYSYTEQFF 1 GGTGCTGTCGTCTCTCAACATCCGAGCTGGGTTATCTGTAAGAGTGGAACCTCTGTGAAGATCGAGTGCCGTTCCCTGGACTTTCAGGCCACAACTATGTTTTGGTATCGTCAGTTCCCGAAACAGAGTCTCATGCTGATGGCAACTTCCAATGAGGGCTCCAAGGCCACATACGAGCAAGGCGTCGAGAAGGACAAGTTTCTCATCAACCATGCAAGCCTGACCTTGTCCACTCTGACAGTGACCAGTGCCCATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTAGCCTAGCGGAATACTCCTACACTGAGCAGTTCTTCGGGCCAGGGACACGGCTCACCGTGCTAG TGCAGTGCTAGCCTAGCGGAATACTCCTACACTGAGCAGTTCTTC 15, 6, 3, 9, CCTAGCGGAATACTCCTACAC F T GAVVSQHPSWVICKSGTSVKIECRSLDFQATTMFWYRQFPKQSLMLMATSNEGSKATYEQGVEKDKFLINHASLTLSTLTVTSAHPEDSSFYICSASLAEYSYTEQFFGPGTRLTVL DFQATT SNEGSKA T F T T 1 6 | 5 TRBV14 TRBJ2-1 CASSPHGGFSYNEQFF 1 GAAGCTGGAGTTACTCAGTTCCCCAGCCACAGCGTAATAGAGAAGGGCCAGACTGTGACTCTGAGATGTGACCCAATTTCTGGACATGATAATCTTTATTGGTATCGACGTGTTATGGGAAAAGAAATAAAATTTCTGTTACATTTTGTGAAAGAGTCTAAACAGGATGAGTCCGGTATGCCCAACAATCGATTCTTAGCTGAAAGGACTGGAGGGACGTATTCTACTCTGAAGGTGCAGCCTGCAGAACTGGAGGATTCTGGAGTTTATTTCTGTGCCAGCAGCCCTCACGGGGGGTTCTCCTACAATGAGCAGTTCTTCGGGCCAGGGACACGGCTCACCGTGCTAG TGTGCCAGCAGCCCTCACGGGGGGTTCTCCTACAATGAGCAGTTCTTC 9, 6, 4, 0, CTCACGGGGGGTT F T EAGVTQFPSHSVIEKGQTVTLRCDPISGHDNLYWYRRVMGKEIKFLLHFVKESKQDESGMPNNRFLAERTGGTYSTLKVQPAELEDSGVYFCASSPHGGFSYNEQFFGPGTRLTVL SGHDN FVKESK T F T T 1 7 | 6 TRBV10-1 TRBJ2-2 1 GATGCTGAAATCACCCAGAGCCCAAGACACAAGATCACAGAGACAGGAAGGCAGGTGACCTTGGCGTGTCACCAGACTTGGAACCACAACAATATGTTCTGGTATCGACAAGACCTGGGACATGGGCTGAGGCTGATCCATTACTCATATGGTGTTCAAGACACTAACAAAGGAGAAGTCTCAGATGGCTACAGTGTCTCTAGATCAAACACAGAGGACCTCCCCCTCACTCTGGAGTCTGCTGCCTCCTCCCAGACATCTGTATATTTCTGCGCCAGCAGCCGGGACTAGCGGGTGAGGGGAGCTGTTTTTTGGAGAAGGCTCTAGGCTGACCGTACTGG 0, 7, 6, 8, CCGGGACTAGCGGGTGA F F DAEITQSPRHKITETGRQVTLACHQTWNHNNMFWYRQDLGHGLRLIHYSYGVQDTNKGEVSDGYSVSRSNTEDLPLTLESAASSQTSVYFCASSRD*RVRGAVFWRRL*ADRT F T T F 1 8 | 7 TRBV5-1 TRBJ2-7 CASSLEPGVSYEQYF 1 AAGGCTGGAGTCACTCAAACTCCAAGATATCTGATCAAAACGAGAGGACAGCAAGTGACACTGAGCTGCTCCCCTATCTCTGGGCATAGGAGTGTATCCTGGTACCAACAGACCCCAGGACAGGGCCTTCAGTTCCTCTTTGAATACTTCAGTGAGACACAGAGAAACAAAGGAAACTTCCCTGGTCGATTCTCAGGGCGCCAGTTCTCTAACTCTCGCTCTGAGATGAATGTGAGCACCTTGGAGCTGGGGGACTCGGCCCTTTATCTTTGCGCCAGCAGCTTGGAGCCAGGGGTCTCCTACGAGCAGTACTTCGGGCCGGGCACCAGGCTCACGGTCACAG TGCGCCAGCAGCTTGGAGCCAGGGGTCTCCTACGAGCAGTACTTC 26, 12, 0, 0, AGCCAGGGGT F T KAGVTQTPRYLIKTRGQQVTLSCSPISGHRSVSWYQQTPGQGLQFLFEYFSETQRNKGNFPGRFSGRQFSNSRSEMNVSTLELGDSALYLCASSLEPGVSYEQYFGPGTRLTVT SGHRS YFSETQ T F T T 1 9 | 8 TRBV4-2 TRBJ2-3 CASSPSGTFSTDTQYF 1 ACGGGAGTTACGCAGACACCAAGACACCTGGTCATGGGAATGACAAATAAGAAGTCTTTGAAATGTGAACAACATCTGGGGCATAACGCTATGTATTGGTACAAGCAAAGTGCTAAGAAGCCACTGGAGCTCATGTTTGTCTACAACTTTAAAGAACAGACTGAAAACAACAGTGTGCCAAGTCGCTTCTCACCTGAATGCCCCAACAGCTCTCACTTATTCCTTCACCTACACACCCTGCAGCCAGAAGACTCGGCCCTGTATCTCTGTGCCAGCAGTCCTAGCGGCACCTTTTCCACAGATACGCAGTATTTTGGCCCAGGCACCCGGCTGACAGTGCTCG TGTGCCAGCAGTCCTAGCGGCACCTTTTCCACAGATACGCAGTATTTT 24, 8, 6, 2, TCCTAGCGGCACCTTTTC F T TGVTQTPRHLVMGMTNKKSLKCEQHLGHNAMYWYKQSAKKPLELMFVYNFKEQTENNSVPSRFSPECPNSSHLFLHLHTLQPEDSALYLCASSPSGTFSTDTQYFGPGTRLTVL LGHNA YNFKEQ T F T T 1 10 | 9 TRBV29-1 TRBJ2-7 CSVERGSSYEQYF 1 AGTGCTGTCATCTCTCAAAAGCCAAGCAGGGATATCTGTCAACGTGGAACCTCCCTGACGATCCAGTGTCAAGTCGATAGCCAAGTCACCATGATGTTCTGGTACCGTCAGCAACCTGGACAGAGCCTGACACTGATCGCAACTGCAAATCAGGGCTCTGAGGCCACATATGAGAGTGGATTTGTCATTGACAAGTTTCCCATCAGCCGCCCAAACCTAACATTCTCAACTCTGACTGTGAGCAACATGAGCCCTGAAGACAGCAGCATATATCTCTGCAGCGTTGAAAGGGGCAGCTCCTACGAGCAGTACTTCGGGCCGGGCACCAGGCTCACGGTCACAG TGCAGCGTTGAAAGGGGCAGCTCCTACGAGCAGTACTTC 20, 12, 2, 0, AGGGGCAG F T SAVISQKPSRDICQRGTSLTIQCQVDSQVTMMFWYRQQPGQSLTLIATANQGSEATYESGFVIDKFPISRPNLTFSTLTVSNMSPEDSSIYLCSVERGSSYEQYFGPGTRLTVT SQVTM ANQGSEA T F T T 1 11 | 10 TRBV5-4 TRBJ2-6 CASSTDAGANVLTF 1 GAGACTGGAGTCACCCAAAGTCCCACACACCTGATCAAAACGAGAGGACAGCAAGTGACTCTGAGATGCTCTTCTCAGTCTGGGCACAACACTGTGTCCTGGTACCAACAGGCCCTGGGTCAGGGGCCCCAGTTTATCTTTCAGTATTATAGGGAGGAAGAGAATGGCAGAGGAAACTTCCCTCCTAGATTCTCAGGTCTCCAGTTCCCTAATTATAGCTCTGAGCTGAATGTGAACGCCTTGGAGCTGGACGACTCGGCCCTGTATCTCTGTGCCAGCAGCACCGATGCTGGGGCCAACGTCCTGACTTTCGGGGCCGGCAGCAGGCTGACCGTGCTGG TGTGCCAGCAGCACCGATGCTGGGGCCAACGTCCTGACTTTC 27, 11, 4, 2, ACCGATG F T ETGVTQSPTHLIKTRGQQVTLRCSSQSGHNTVSWYQQALGQGPQFIFQYYREEENGRGNFPPRFSGLQFPNYSSELNVNALELDDSALYLCASSTDAGANVLTFGAGSRLTVL SGHNT YYREEE T F T T 1 12 | 11 TRBV6-1 TRBJ2-7 CASTETTSGSYEQYF 1 AATGCTGGTGTCACTCAGACCCCAAAATTCCAGGTCCTGAAGACAGGACAGAGCATGACACTGCAGTGTGCCCAGGATATGAACCATAACTCCATGTACTGGTATCGACAAGACCCAGGCATGGGACTGAGGCTGATTTATTACTCAGCTTCTGAGGGTACCACTGACAAAGGAGAAGTCCCCAATGGCTACAATGTCTCCAGATTAAACAAACGGGAGTTCTCGCTCAGGCTGGAGTCGGCTGCTCCCTCCCAGACATCTGTGTACTTCTGTGCCAGCACCGAGACGACTAGCGGGAGCTACGAGCAGTACTTCGGGCCGGGCACCAGGCTCACGGTCACAG TGTGCCAGCACCGAGACGACTAGCGGGAGCTACGAGCAGTACTTC 31, 12, 7, 3, CCGAGACGACTAGCGGGAG F T NAGVTQTPKFQVLKTGQSMTLQCAQDMNHNSMYWYRQDPGMGLRLIYYSASEGTTDKGEVPNGYNVSRLNKREFSLRLESAAPSQTSVYFCASTETTSGSYEQYFGPGTRLTVT MNHNS SASEGT T F T T 1 13 | 12 TRBV2 TRBJ2-5 CASKRTGGKPSETQYF 1 GAACCTGAAGTCACCCAGACTCCCAGCCATCAGGTCACACAGATGGGACAGGAAGTGATCTTGCGCTGTGTCCCCATCTCTAATCACTTATACTTCTATTGGTACAGACAAATCTTGGGGCAGAAAGTCGAGTTTCTGGTTTCCTTTTATAATAATGAAATCTCAGAGAAGTCTGAAATATTCGATGATCAATTCTCAGTTGAAAGGCCTGATGGATCAAATTTCACTCTGAAGATCCGGTCCACAAAGCTGGAGGACTCAGCCATGTACTTCTGTGCCAGCAAACGGACGGGAGGGAAGCCGAGCGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCTGGTGCTCG TGTGCCAGCAAACGGACGGGAGGGAAGCCGAGCGAGACCCAGTACTTC 14, 10, 7, 5, AACGGACGGGAGGGAAGCCGAGC F T EPEVTQTPSHQVTQMGQEVILRCVPISNHLYFYWYRQILGQKVEFLVSFYNNEISEKSEIFDDQFSVERPDGSNFTLKIRSTKLEDSAMYFCASKRTGGKPSETQYFGPGTRLLVL SNHLY FYNNEI T F T T 1 14 | 13 TRBV30 TRBJ1-1 CAWEQGATEAFF 1 TCTCAGACTATTCATCAATGGCCAGCGACCCTGGTGCAGCCTGTGGGCAGCCCGCTCTCTCTGGAGTGCACTGTGGAGGGAACATCAAACCCCAACCTATACTGGTACCGACAGGCTGCAGGCAGGGGCCTCCAGCTGCTCTTCTACTCCGTTGGTATTGGCCAGATCAGCTCTGAGGTGCCCCAGAATCTCTCAGCCTCCAGACCCCAGGACCGGCAGTTCATCCTGAGTTCTAAGAAGCTCCTTCTCAGTGACTCTGGCTTCTATCTCTGTGCCTGGGAACAGGGGGCCACTGAAGCTTTCTTTGGACAAGGCACCAGACTCACAGTTGTAG TGTGCCTGGGAACAGGGGGCCACTGAAGCTTTCTTT 22, 0, 5, 4, GAACAGGGGGC F T SQTIHQWPATLVQPVGSPLSLECTVEGTSNPNLYWYRQAAGRGLQLLFYSVGIGQISSEVPQNLSASRPQDRQFILSSKKLLLSDSGFYLCAWEQGATEAFFGQGTRLTVV GTSNPN SVGIG T F T T 1 15 | 14 TRBV11-2 TRBJ2-1 CASSLDGQRVFNYNEQFF 1 GAAGCTGGAGTTGCCCAGTCTCCCAGATATAAGATTATAGAGAAAAGGCAGAGTGTGGCTTTTTGGTGCAATCCTATATCTGGCCATGCTACCCTTTACTGGTACCAGCAGATCCTGGGACAGGGCCCAAAGCTTCTGATTCAGTTTCAGAATAACGGTGTAGTGGATGATTCACAGTTGCCTAAGGATCGATTTTCTGCAGAGAGGCTCAAAGGAGTAGACTCCACTCTCAAGATCCAGCCTGCAAAGCTTGAGGACTCGGCCGTGTATCTCTGTGCCAGCAGCTTAGATGGGCAGCGGGTGTTTAATTACAATGAGCAGTTCTTCGGGCCAGGGACACGGCTCACCGTGCTAG TGTGCCAGCAGCTTAGATGGGCAGCGGGTGTTTAATTACAATGAGCAGTTCTTC 4, 6, 0, 4, TGGGCAGCGGGTGTTTAAT F T EAGVAQSPRYKIIEKRQSVAFWCNPISGHATLYWYQQILGQGPKLLIQFQNNGVVDDSQLPKDRFSAERLKGVDSTLKIQPAKLEDSAVYLCASSLDGQRVFNYNEQFFGPGTRLTVL SGHAT FQNNGV T F T T 1 16 | 15 TRBV7-2 TRBJ1-4 CASSLIWGTAKELFF 1 GGAGCTGGAGTCTCCCAGTCCCCCAGTAACAAGGTCACAGAGAAGGGAAAGGATGTAGAGCTCAGGTGTGATCCAATTTCAGGTCATACTGCCCTTTACTGGTACCGACAGAGCCTGGGGCAGGGCCTGGAGTTTTTAATTTACTTCCAAGGCAACAGTGCACCAGACAAATCAGGGCTGCCCAGTGATCGCTTCTCTGCAGAGAGGACTGGGGGATCCGTCTCCACTCTGACGATCCAGCGCACACAGCAGGAGGACTCGGCCGTGTATCTCTGTGCCAGCAGCTTAATCTGGGGGACAGCAAAGGAACTGTTTTTTGGCAGTGGAACCCAGCTCTCTGTCTTGG TGTGCCAGCAGCTTAATCTGGGGGACAGCAAAGGAACTGTTTTTT 37, 3, 2, 12, ATCTGGGGGACAGCAAAGG F T GAGVSQSPSNKVTEKGKDVELRCDPISGHTALYWYRQSLGQGLEFLIYFQGNSAPDKSGLPSDRFSAERTGGSVSTLTIQRTQQEDSAVYLCASSLIWGTAKELFFGSGTQLSVL SGHTA FQGNSA T F T T 1 17 | 16 TRBV29-1 TRBJ2-1 CSVEAAGGREQFF 1 AGTGCTGTCATCTCTCAAAAGCCAAGCAGGGATATCTGTCAACGTGGAACCTCCCTGACGATCCAGTGTCAAGTCGATAGCCAAGTCACCATGATGTTCTGGTACCGTCAGCAACCTGGACAGAGCCTGACACTGATCGCAACTGCAAATCAGGGCTCTGAGGCCACATATGAGAGTGGATTTGTCATTGACAAGTTTCCCATCAGCCGCCCAAACCTAACATTCTCAACTCTGACTGTGAGCAACATGAGCCCTGAAGACAGCAGCATATATCTCTGCAGCGTTGAAGCCGCAGGGGGCCGCGAGCAGTTCTTCGGGCCAGGGACACGGCTCACCGTGCTAG TGCAGCGTTGAAGCCGCAGGGGGCCGCGAGCAGTTCTTC 20, 6, 1, 10, CCGCAGGGGGCCGC F T SAVISQKPSRDICQRGTSLTIQCQVDSQVTMMFWYRQQPGQSLTLIATANQGSEATYESGFVIDKFPISRPNLTFSTLTVSNMSPEDSSIYLCSVEAAGGREQFFGPGTRLTVL SQVTM ANQGSEA T F T T 1 18 | 17 TRBV7-9 TRBJ2-3 CASSLGRTSSTDTQYF 1 GATACTGGAGTCTCCCAGAACCCCAGACACAAGATCACAAAGAGGGGACAGAATGTAACTTTCAGGTGTGATCCAATTTCTGAACACAACCGCCTTTATTGGTACCGACAGACCCTGGGGCAGGGCCCAGAGTTTCTGACTTACTTCCAGAATGAAGCTCAACTAGAAAAATCAAGGCTGCTCAGTGATCGGTTCTCTGCAGAGAGGCCTAAGGGATCTTTCTCCACCTTGGAGATCCAGCGCACAGAGCAGGGGGACTCGGCCATGTATCTCTGTGCCAGCAGCTTGGGGAGGACTAGCAGCACAGATACGCAGTATTTTGGCCCAGGCACCCGGCTGACAGTGCTCG TGTGCCAGCAGCTTGGGGAGGACTAGCAGCACAGATACGCAGTATTTT 43, 8, 3, 0, GGGGAGGACTAGC F T DTGVSQNPRHKITKRGQNVTFRCDPISEHNRLYWYRQTLGQGPEFLTYFQNEAQLEKSRLLSDRFSAERPKGSFSTLEIQRTEQGDSAMYLCASSLGRTSSTDTQYFGPGTRLTVL SEHNR FQNEAQ T F T T 2 19 | 18 TRBV29-1 TRBJ2-6 CSVALQAGANVLTF 1 AGTGCTGTCATCTCTCAAAAGCCAAGCAGGGATATCTGTCAACGTGGAACCTCCCTGACGATCCAGTGTCAAGTCGATAGCCAAGTCACCATGATGTTCTGGTACCGTCAGCAACCTGGACAGAGCCTGACACTGATCGCAACTGCAAATCAGGGCTCTGAGGCCACATATGAGAGTGGATTTGTCATTGACAAGTTTCCCATCAGCCGCCCAAACCTAACATTCTCAACTCTGACTGTGAGCAACATGAGCCCTGAAGACAGCAGCATATATCTCTGCAGCGTTGCTCTCCAGGCTGGGGCCAACGTCCTGACTTTCGGGGCCGGCAGCAGGCTGACCGTGCTGG TGCAGCGTTGCTCTCCAGGCTGGGGCCAACGTCCTGACTTTC 20, 11, 4, 2, CTCTCCAGG F T SAVISQKPSRDICQRGTSLTIQCQVDSQVTMMFWYRQQPGQSLTLIATANQGSEATYESGFVIDKFPISRPNLTFSTLTVSNMSPEDSSIYLCSVALQAGANVLTFGAGSRLTVL SQVTM ANQGSEA T F T T 2 20 | 19 TRBV20-1 TRBJ2-7 CSARLAGDYEQYF 1 GGTGCTGTCGTCTCTCAACATCCGAGCTGGGTTATCTGTAAGAGTGGAACCTCTGTGAAGATCGAGTGCCGTTCCCTGGACTTTCAGGCCACAACTATGTTTTGGTATCGTCAGTTCCCGAAACAGAGTCTCATGCTGATGGCAACTTCCAATGAGGGCTCCAAGGCCACATACGAGCAAGGCGTCGAGAAGGACAAGTTTCTCATCAACCATGCAAGCCTGACCTTGTCCACTCTGACAGTGACCAGTGCCCATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTCGACTAGCGGGAGACTACGAGCAGTACTTCGGGCCGGGCACCAGGCTCACGGTCACAG TGCAGTGCTCGACTAGCGGGAGACTACGAGCAGTACTTC 15, 12, 5, 3, CGACTAGCGGGAGA F T GAVVSQHPSWVICKSGTSVKIECRSLDFQATTMFWYRQFPKQSLMLMATSNEGSKATYEQGVEKDKFLINHASLTLSTLTVTSAHPEDSSFYICSARLAGDYEQYFGPGTRLTVT DFQATT SNEGSKA T F T T 1 21 | 20 TRBV6-5 TRBJ2-1 CASSPGTGAYNEQFF 1 AATGCTGGTGTCACTCAGACCCCAAAATTCCAGGTCCTGAAGACAGGACAGAGCATGACACTGCAGTGTGCCCAGGATATGAACCATGAATACATGTCCTGGTATCGACAAGACCCAGGCATGGGGCTGAGGCTGATTCATTACTCAGTTGGTGCTGGTATCACTGACCAAGGAGAAGTCCCCAATGGCTACAATGTCTCCAGATCAACCACAGAGGATTTCCCGCTCAGGCTGCTGTCGGCTGCTCCCTCCCAGACATCTGTGTACTTCTGTGCCAGCAGTCCCGGGACAGGAGCCTACAATGAGCAGTTCTTCGGGCCAGGGACACGGCTCACCGTGCTAG TGTGCCAGCAGTCCCGGGACAGGAGCCTACAATGAGCAGTTCTTC 33, 6, 5, 2, CCCGGGACAGGAG F T NAGVTQTPKFQVLKTGQSMTLQCAQDMNHEYMSWYRQDPGMGLRLIHYSVGAGITDQGEVPNGYNVSRSTTEDFPLRLLSAAPSQTSVYFCASSPGTGAYNEQFFGPGTRLTVL MNHEY SVGAGI T F T T 1 22 | 21 TRBV9 TRBJ1-4 CASSPRAATNEKLFF 1 GATTCTGGAGTCACACAAACCCCAAAGCACCTGATCACAGCAACTGGACAGCGAGTGACGCTGAGATGCTCCCCTAGGTCTGGAGACCTCTCTGTGTACTGGTACCAACAGAGCCTGGACCAGGGCCTCCAGTTCCTCATTCAGTATTATAATGGAGAAGAGAGAGCAAAAGGAAACATTCTTGAACGATTCTCCGCACAACAGTTCCCTGACTTGCACTCTGAACTAAACCTGAGCTCTCTGGAGCTGGGGGACTCAGCTTTGTATTTCTGTGCCAGCAGCCCCCGCGCGGCAACTAATGAAAAACTGTTTTTTGGCAGTGGAACCCAGCTCTCTGTCTTGG TGTGCCAGCAGCCCCCGCGCGGCAACTAATGAAAAACTGTTTTTT 44, 3, 4, 0, CCCCGCGCGG F T DSGVTQTPKHLITATGQRVTLRCSPRSGDLSVYWYQQSLDQGLQFLIQYYNGEERAKGNILERFSAQQFPDLHSELNLSSLELGDSALYFCASSPRAATNEKLFFGSGTQLSVL SGDLS YYNGEE T F T T 1 23 | 22 TRBV5-6 TRBJ2-1 CASSLGLAGHNEQFF 1 GACGCTGGAGTCACCCAAAGTCCCACACACCTGATCAAAACGAGAGGACAGCAAGTGACTCTGAGATGCTCTCCTAAGTCTGGGCATGACACTGTGTCCTGGTACCAACAGGCCCTGGGTCAGGGGCCCCAGTTTATCTTTCAGTATTATGAGGAGGAAGAGAGACAGAGAGGCAACTTCCCTGATCGATTCTCAGGTCACCAGTTCCCTAACTATAGCTCTGAGCTGAATGTGAACGCCTTGTTGCTGGGGGACTCGGCCCTCTATCTCTGTGCCAGCAGCTTGGGTCTAGCGGGACACAATGAGCAGTTCTTCGGGCCAGGGACACGGCTCACCGTGCTAG TGTGCCAGCAGCTTGGGTCTAGCGGGACACAATGAGCAGTTCTTC 29, 6, 0, 5, GTCTAGCGGGAC F T DAGVTQSPTHLIKTRGQQVTLRCSPKSGHDTVSWYQQALGQGPQFIFQYYEEEERQRGNFPDRFSGHQFPNYSSELNVNALLLGDSALYLCASSLGLAGHNEQFFGPGTRLTVL SGHDT YYEEEE T F T T 1 24 | 23 TRBV2 TRBJ2-2 CASSEVTSRELFF 1 GAACCTGAAGTCACCCAGACTCCCAGCCATCAGGTCACACAGATGGGACAGGAAGTGATCTTGCGCTGTGTCCCCATCTCTAATCACTTATACTTCTATTGGTACAGACAAATCTTGGGGCAGAAAGTCGAGTTTCTGGTTTCCTTTTATAATAATGAAATCTCAGAGAAGTCTGAAATATTCGATGATCAATTCTCAGTTGAAAGGCCTGATGGATCAAATTTCACTCTGAAGATCCGGTCCACAAAGCTGGAGGACTCAGCCATGTACTTCTGTGCCAGCAGTGAGGTGACTAGCCGGGAGCTGTTTTTTGGAGAAGGCTCTAGGCTGACCGTACTGG TGTGCCAGCAGTGAGGTGACTAGCCGGGAGCTGTTTTTT 14, 7, 3, 9, GGTGACTAGCC F T EPEVTQTPSHQVTQMGQEVILRCVPISNHLYFYWYRQILGQKVEFLVSFYNNEISEKSEIFDDQFSVERPDGSNFTLKIRSTKLEDSAMYFCASSEVTSRELFFGEGSRLTVL SNHLY FYNNEI T F T T 1 25 | 24 TRBV5-1 TRBJ2-1 CASSPGLAPRNEQFF 1 AAGGCTGGAGTCACTCAAACTCCAAGATATCTGATCAAAACGAGAGGACAGCAAGTGACACTGAGCTGCTCCCCTATCTCTGGGCATAGGAGTGTATCCTGGTACCAACAGACCCCAGGACAGGGCCTTCAGTTCCTCTTTGAATACTTCAGTGAGACACAGAGAAACAAAGGAAACTTCCCTGGTCGATTCTCAGGGCGCCAGTTCTCTAACTCTCGCTCTGAGATGAATGTGAGCACCTTGGAGCTGGGGGACTCGGCCCTTTATCTTTGCGCCAGCAGCCCAGGACTAGCACCGAGGAATGAGCAGTTCTTCGGGCCAGGGACACGGCTCACCGTGCTAG TGCGCCAGCAGCCCAGGACTAGCACCGAGGAATGAGCAGTTCTTC 26, 6, 4, 7, CCAGGACTAGCACCGAGG F T KAGVTQTPRYLIKTRGQQVTLSCSPISGHRSVSWYQQTPGQGLQFLFEYFSETQRNKGNFPGRFSGRQFSNSRSEMNVSTLELGDSALYLCASSPGLAPRNEQFFGPGTRLTVL SGHRS YFSETQ T F T T 1 26 | 25 TRBV20-1 TRBJ1-5 CSANLGADSNQPQHF 1 GGTGCTGTCGTCTCTCAACATCCGAGCTGGGTTATCTGTAAGAGTGGAACCTCTGTGAAGATCGAGTGCCGTTCCCTGGACTTTCAGGCCACAACTATGTTTTGGTATCGTCAGTTCCCGAAACAGAGTCTCATGCTGATGGCAACTTCCAATGAGGGCTCCAAGGCCACATACGAGCAAGGCGTCGAGAAGGACAAGTTTCTCATCAACCATGCAAGCCTGACCTTGTCCACTCTGACAGTGACCAGTGCCCATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTAATCTAGGGGCCGATAGCAATCAGCCCCAGCATTTTGGTGATGGGACTCGACTCTCCATCCTAG TGCAGTGCTAATCTAGGGGCCGATAGCAATCAGCCCCAGCATTTT 15, 4, 4, 0, ATCTAGGGGCCGA F T GAVVSQHPSWVICKSGTSVKIECRSLDFQATTMFWYRQFPKQSLMLMATSNEGSKATYEQGVEKDKFLINHASLTLSTLTVTSAHPEDSSFYICSANLGADSNQPQHFGDGTRLSIL DFQATT SNEGSKA T F T T 1 27 | 26 TRBV14 TRBJ2-5 CASSSTGAQETQYF 1 GAAGCTGGAGTTACTCAGTTCCCCAGCCACAGCGTAATAGAGAAGGGCCAGACTGTGACTCTGAGATGTGACCCAATTTCTGGACATGATAATCTTTATTGGTATCGACGTGTTATGGGAAAAGAAATAAAATTTCTGTTACATTTTGTGAAAGAGTCTAAACAGGATGAGTCCGGTATGCCCAACAATCGATTCTTAGCTGAAAGGACTGGAGGGACGTATTCTACTCTGAAGGTGCAGCCTGCAGAACTGGAGGATTCTGGAGTTTATTTCTGTGCCAGCAGCTCGACCGGGGCGCAAGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCTGGTGCTCG TGTGCCAGCAGCTCGACCGGGGCGCAAGAGACCCAGTACTTC 9, 10, 5, 2, TCGACCGGGGCG F T EAGVTQFPSHSVIEKGQTVTLRCDPISGHDNLYWYRRVMGKEIKFLLHFVKESKQDESGMPNNRFLAERTGGTYSTLKVQPAELEDSGVYFCASSSTGAQETQYFGPGTRLLVL SGHDN FVKESK T F T T 1 28 | 27 TRBV3-1 TRBJ2-7 CASTLSGSSYEQYF 1 ACAGCTGTTTCCCAGACTCCAAAATACCTGGTCACACAGATGGGAAACGACAAGTCCATTAAATGTGAACAAAATCTGGGCCATGATACTATGTATTGGTATAAACAGGACTCTAAGAAATTTCTGAAGATAATGTTTAGCTACAATAATAAGGAGCTCATTATAAATGAAACAGTTCCAAATCGCTTCTCACCTAAATCTCCAGACAAAGCTCACTTAAATCTTCACATCAATTCCCTGGAGCTTGGTGACTCTGCTGTGTATTTCTGTGCCAGTACCCTTAGCGGGAGCTCCTACGAGCAGTACTTCGGGCCGGGCACCAGGCTCACGGTCACAG TGTGCCAGTACCCTTAGCGGGAGCTCCTACGAGCAGTACTTC 21, 12, 9, 0, TACCCTTAGCGGGAG F T TAVSQTPKYLVTQMGNDKSIKCEQNLGHDTMYWYKQDSKKFLKIMFSYNNKELIINETVPNRFSPKSPDKAHLNLHINSLELGDSAVYFCASTLSGSSYEQYFGPGTRLTVT LGHDT YNNKEL T F T T 1 29 | 28 TRBV7-9 TRBJ2-5 CASSLGAMQQETQYF 1 GATACTGGAGTCTCCCAGAACCCCAGACACAAGATCACAAAGAGGGGACAGAATGTAACTTTCAGGTGTGATCCAATTTCTGAACACAACCGCCTTTATTGGTACCGACAGACCCTGGGGCAGGGCCCAGAGTTTCTGACTTACTTCCAGAATGAAGCTCAACTAGAAAAATCAAGGCTGCTCAGTGATCGGTTCTCTGCAGAGAGGCCTAAGGGATCTTTCTCCACCTTGGAGATCCAGCGCACAGAGCAGGGGGACTCGGCCATGTATCTCTGTGCCAGCAGCTTAGGCGCCATGCAACAAGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCTGGTGCTCG TGTGCCAGCAGCTTAGGCGCCATGCAACAAGAGACCCAGTACTTC 43, 10, 1, 2, GCGCCATGCAA F T DTGVSQNPRHKITKRGQNVTFRCDPISEHNRLYWYRQTLGQGPEFLTYFQNEAQLEKSRLLSDRFSAERPKGSFSTLEIQRTEQGDSAMYLCASSLGAMQQETQYFGPGTRLLVL SEHNR FQNEAQ T F T T 1 30 | 29 TRBV9 TRBJ1-1 CASSVRESNTEAFF 1 GATTCTGGAGTCACACAAACCCCAAAGCACCTGATCACAGCAACTGGACAGCGAGTGACGCTGAGATGCTCCCCTAGGTCTGGAGACCTCTCTGTGTACTGGTACCAACAGAGCCTGGACCAGGGCCTCCAGTTCCTCATTCAGTATTATAATGGAGAAGAGAGAGCAAAAGGAAACATTCTTGAACGATTCTCCGCACAACAGTTCCCTGACTTGCACTCTGAACTAAACCTGAGCTCTCTGGAGCTGGGGGACTCAGCTTTGTATTTCTGTGCCAGCAGCGTCAGGGAATCGAACACTGAAGCTTTCTTTGGACAAGGCACCAGACTCACAGTTGTAG TGTGCCAGCAGCGTCAGGGAATCGAACACTGAAGCTTTCTTT 44, 0, 2, 1, CAGGGAATC F T DSGVTQTPKHLITATGQRVTLRCSPRSGDLSVYWYQQSLDQGLQFLIQYYNGEERAKGNILERFSAQQFPDLHSELNLSSLELGDSALYFCASSVRESNTEAFFGQGTRLTVV SGDLS YYNGEE T F T T 1 31 | 30 TRBV10-3 TRBJ2-5 CAISESRFGHTQETQYF 1 GATGCTGGAATCACCCAGAGCCCAAGACACAAGGTCACAGAGACAGGAACACCAGTGACTCTGAGATGTCACCAGACTGAGAACCACCGCTATATGTACTGGTATCGACAAGACCCGGGGCATGGGCTGAGGCTGATCCATTACTCATATGGTGTTAAAGATACTGACAAAGGAGAAGTCTCAGATGGCTATAGTGTCTCTAGATCAAAGACAGAGGATTTCCTCCTCACTCTGGAGTCCGCTACCAGCTCCCAGACATCTGTGTACTTCTGTGCCATCAGTGAGTCGAGGTTTGGACACACCCAAGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCTGGTGCTCG TGTGCCATCAGTGAGTCGAGGTTTGGACACACCCAAGAGACCCAGTACTTC 2, 10, 0, 1, GAGGTTTGGACACAC F T DAGITQSPRHKVTETGTPVTLRCHQTENHRYMYWYRQDPGHGLRLIHYSYGVKDTDKGEVSDGYSVSRSKTEDFLLTLESATSSQTSVYFCAISESRFGHTQETQYFGPGTRLLVL ENHRY SYGVKD T F T T 1 32 | 31 TRBV6-1 TRBJ2-7 CASSESGRVYEQYF 1 AATGCTGGTGTCACTCAGACCCCAAAATTCCAGGTCCTGAAGACAGGACAGAGCATGACACTGCAGTGTGCCCAGGATATGAACCATAACTCCATGTACTGGTATCGACAAGACCCAGGCATGGGACTGAGGCTGATTTATTACTCAGCTTCTGAGGGTACCACTGACAAAGGAGAAGTCCCCAATGGCTACAATGTCTCCAGATTAAACAAACGGGAGTTCTCGCTCAGGCTGGAGTCGGCTGCTCCCTCCCAGACATCTGTGTACTTCTGTGCCAGCAGTGAGAGCGGGAGGGTCTACGAGCAGTACTTCGGGCCGGGCACCAGGCTCACGGTCACAG TGTGCCAGCAGTGAGAGCGGGAGGGTCTACGAGCAGTACTTC 31, 12, 3, 3, GAGCGGGAGGGT F T NAGVTQTPKFQVLKTGQSMTLQCAQDMNHNSMYWYRQDPGMGLRLIYYSASEGTTDKGEVPNGYNVSRLNKREFSLRLESAAPSQTSVYFCASSESGRVYEQYFGPGTRLTVT MNHNS SASEGT T F T T 1 33 | 32 TRBV28 TRBJ2-1 CASGLADYNEQFF 1 GATGTGAAAGTAACCCAGAGCTCGAGATATCTAGTCAAAAGGACGGGAGAGAAAGTTTTTCTGGAATGTGTCCAGGATATGGACCATGAAAATATGTTCTGGTATCGACAAGACCCAGGTCTGGGGCTACGGCTGATCTATTTCTCATATGATGTTAAAATGAAAGAAAAAGGAGATATTCCTGAGGGGTACAGTGTCTCTAGAGAGAAGAAGGAGCGCTTCTCCCTGATTCTGGAGTCCGCCAGCACCAACCAGACATCTATGTACCTCTGTGCCAGTGGACTAGCGGACTACAATGAGCAGTTCTTCGGGCCAGGGACACGGCTCACCGTGCTAG TGTGCCAGTGGACTAGCGGACTACAATGAGCAGTTCTTC 19, 6, 9, 3, TGGACTAGCGGA F T DVKVTQSSRYLVKRTGEKVFLECVQDMDHENMFWYRQDPGLGLRLIYFSYDVKMKEKGDIPEGYSVSREKKERFSLILESASTNQTSMYLCASGLADYNEQFFGPGTRLTVL MDHEN SYDVKM T F T T 1 34 | 33 TRBV12-4 TRBJ2-1 CASSLGLLNEQFF 1 GATGCTGGAGTTATCCAGTCACCCCGGCACGAGGTGACAGAGATGGGACAAGAAGTGACTCTGAGATGTAAACCAATTTCAGGACACGACTACCTTTTCTGGTACAGACAGACCATGATGCGGGGACTGGAGTTGCTCATTTACTTTAACAACAACGTTCCGATAGATGATTCAGGGATGCCCGAGGATCGATTCTCAGCTAAGATGCCTAATGCATCATTCTCCACTCTGAAGATCCAGCCCTCAGAACCCAGGGACTCAGCTGTGTACTTCTGTGCCAGCAGTTTAGGTCTGCTTAATGAGCAGTTCTTCGGGCCAGGGACACGGCTCACCGTGCTAG TGTGCCAGCAGTTTAGGTCTGCTTAATGAGCAGTTCTTC 6, 6, 1, 7, GTCTGCTT F T DAGVIQSPRHEVTEMGQEVTLRCKPISGHDYLFWYRQTMMRGLELLIYFNNNVPIDDSGMPEDRFSAKMPNASFSTLKIQPSEPRDSAVYFCASSLGLLNEQFFGPGTRLTVL SGHDY FNNNVP T F T T 1 35 | 34 TRBV28 TRBJ2-7 CASSELTGRGEQYF 1 GATGTGAAAGTAACCCAGAGCTCGAGATATCTAGTCAAAAGGACGGGAGAGAAAGTTTTTCTGGAATGTGTCCAGGATATGGACCATGAAAATATGTTCTGGTATCGACAAGACCCAGGTCTGGGGCTACGGCTGATCTATTTCTCATATGATGTTAAAATGAAAGAAAAAGGAGATATTCCTGAGGGGTACAGTGTCTCTAGAGAGAAGAAGGAGCGCTTCTCCCTGATTCTGGAGTCCGCCAGCACCAACCAGACATCTATGTACCTCTGTGCCAGCAGTGAACTTACGGGGCGCGGCGAGCAGTACTTCGGGCCGGGCACCAGGCTCACGGTCACAG TGTGCCAGCAGTGAACTTACGGGGCGCGGCGAGCAGTACTTC 19, 12, 5, 6, GAACTTACGGGGCGCGG F T DVKVTQSSRYLVKRTGEKVFLECVQDMDHENMFWYRQDPGLGLRLIYFSYDVKMKEKGDIPEGYSVSREKKERFSLILESASTNQTSMYLCASSELTGRGEQYFGPGTRLTVT MDHEN SYDVKM T F T T 1 36 | 35 TRBV7-6 TRBJ2-1 CASSQTMSGSQAHEQFF 1 GGTGCTGGAGTCTCCCAGTCTCCCAGGTACAAAGTCACAAAGAGGGGACAGGATGTAGCTCTCAGGTGTGATCCAATTTCGGGTCATGTATCCCTTTATTGGTACCGACAGGCCCTGGGGCAGGGCCCAGAGTTTCTGACTTACTTCAATTATGAAGCCCAACAAGACAAATCAGGGCTGCCCAATGATCGGTTCTCTGCAGAGAGGCCTGAGGGATCCATCTCCACTCTGACGATCCAGCGCACAGAGCAGCGGGACTCGGCCATGTATCGCTGTGCCAGCAGCCAAACCATGAGTGGCTCACAGGCCCATGAGCAGTTCTTCGGGCCAGGGACACGGCTCACCGTGCTAG TGTGCCAGCAGCCAAACCATGAGTGGCTCACAGGCCCATGAGCAGTTCTTC 40, 6, 5, 8, CAAACCATGAGTGGCTCACAGGCCC F T GAGVSQSPRYKVTKRGQDVALRCDPISGHVSLYWYRQALGQGPEFLTYFNYEAQQDKSGLPNDRFSAERPEGSISTLTIQRTEQRDSAMYRCASSQTMSGSQAHEQFFGPGTRLTVL SGHVS FNYEAQ T F T T 1 37 | 36 TRBV25-1 TRBJ2-5 CASSEWGGGQETQYF 1 GAAGCTGACATCTACCAGACCCCAAGATACCTTGTTATAGGGACAGGAAAGAAGATCACTCTGGAATGTTCTCAAACCATGGGCCATGACAAAATGTACTGGTATCAACAAGATCCAGGAATGGAACTACACCTCATCCACTATTCCTATGGAGTTAATTCCACAGAGAAGGGAGATCTTTCCTCTGAGTCAACAGTCTCCAGAATAAGGACGGAGCATTTTCCCCTGACCCTGGAGTCTGCCAGGCCCTCACATACCTCTCAGTACCTCTGTGCCAGCAGTGAATGGGGAGGGGGCCAGGAGACCCAGTACTTCGGGCCAGGCACGCGGCTCCTGGTGCTCG TGTGCCAGCAGTGAATGGGGAGGGGGCCAGGAGACCCAGTACTTC 17, 10, 1, 5, GGGGAGGGGGCCAG F T EADIYQTPRYLVIGTGKKITLECSQTMGHDKMYWYQQDPGMELHLIHYSYGVNSTEKGDLSSESTVSRIRTEHFPLTLESARPSHTSQYLCASSEWGGGQETQYFGPGTRLLVL MGHDK SYGVNS T F T T 1 38 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | from decombinator import pipeline, io 2 | import pytest 3 | import pathlib 4 | import os 5 | import subprocess 6 | 7 | pytestmark = pytest.mark.usefixtures("resource_location", "chain_name") 8 | 9 | 10 | @pytest.fixture(params=["a", "b"], scope="module") 11 | def chain_type(request: pytest.FixtureRequest) -> str: 12 | return request.param 13 | 14 | 15 | @pytest.fixture(scope="module") 16 | def output_dir(tmp_path_factory: pytest.TempPathFactory) -> pathlib.Path: 17 | # Create a temporary output directory 18 | output_dir = tmp_path_factory.mktemp("output") 19 | return output_dir 20 | 21 | 22 | @pytest.fixture(scope="module") 23 | def race_pipeline( 24 | output_dir: pathlib.Path, resource_location: pathlib.Path, chain_type: str 25 | ) -> None: 26 | filename: str = "TINY_1.fq" 27 | process = subprocess.run( 28 | [ 29 | "./decombinator-runner.py", 30 | "pipeline", 31 | "-in", 32 | str((resource_location / filename).resolve()), 33 | "-br", 34 | "R2", 35 | "-bl", 36 | "42", 37 | "-ol", 38 | "M13", 39 | "-c", 40 | chain_type, 41 | "-op", 42 | f"{output_dir}{os.sep}", 43 | "-tfdir", 44 | "tests/resources/Decombinator-Tags-FASTAs", 45 | "-dz", 46 | ] 47 | ) 48 | print(f"running with {chain_type} chain") 49 | print(process.stdout) 50 | 51 | 52 | # @pytest.mark.filterwarnings("ignore::Bio.BiopythonWarning") 53 | def test_tsv_output( 54 | race_pipeline: None, 55 | output_dir: pathlib.Path, 56 | resource_location: pathlib.Path, 57 | chain_type: str, 58 | chain_name: dict, 59 | ) -> None: 60 | 61 | # Load reference tsv 62 | reference_file = f"dcr_TINY_1_{chain_name[chain_type]}.tsv" 63 | reference_path = resource_location / reference_file 64 | with open(reference_path, "r") as f: 65 | reference_data = f.read() 66 | 67 | # Load output file generated by pipeline.run 68 | output_file = f"dcr_TINY_1_{chain_name[chain_type]}.tsv" 69 | output_path = output_dir / output_file 70 | with open(output_path, "r") as f: 71 | output_data = f.read() 72 | 73 | # Perform comparison 74 | assert output_data == reference_data, "Output does not match reference data" 75 | 76 | 77 | def test_n12_output( 78 | race_pipeline: None, 79 | output_dir: pathlib.Path, 80 | resource_location: pathlib.Path, 81 | chain_type: str, 82 | chain_name: dict, 83 | ) -> None: 84 | 85 | # Load reference n12 86 | reference_file = f"dcr_TINY_1_{chain_name[chain_type]}.n12" 87 | reference_path = resource_location / reference_file 88 | with open(reference_path, "r") as f: 89 | reference_data = f.read() 90 | 91 | # Load output file generated by pipeline.run 92 | output_file = f"dcr_TINY_1_{chain_name[chain_type]}.n12" 93 | output_path = output_dir / output_file 94 | with open(output_path, "r") as f: 95 | output_data = f.read() 96 | 97 | # Perform comparison 98 | assert output_data == reference_data, "Output does not match reference data" 99 | 100 | 101 | def test_freq_output( 102 | race_pipeline: None, 103 | output_dir: pathlib.Path, 104 | resource_location: pathlib.Path, 105 | chain_type: str, 106 | chain_name: dict, 107 | ) -> None: 108 | 109 | # Load reference freq 110 | reference_file = f"dcr_TINY_1_{chain_name[chain_type]}.freq" 111 | reference_path = resource_location / reference_file 112 | with open(reference_path, "r") as f: 113 | reference_data = f.read() 114 | 115 | # Load output file generated by pipeline.run 116 | output_file = f"dcr_TINY_1_{chain_name[chain_type]}.freq" 117 | output_path = output_dir / output_file 118 | with open(output_path, "r") as f: 119 | output_data = f.read() 120 | 121 | # Perform comparison 122 | assert output_data == reference_data, "Output does not match reference data" 123 | 124 | 125 | def test_log_output( 126 | race_pipeline: None, 127 | output_dir: pathlib.Path, 128 | resource_location: pathlib.Path, 129 | chain_type: str, 130 | chain_name: dict, 131 | ) -> None: 132 | 133 | # Load reference logs 134 | reference_log_paths = [file for file in resource_location.glob("*")] 135 | 136 | reference_logs = [ 137 | file 138 | for file in reference_log_paths 139 | if (chain_name[chain_type] in file.name) 140 | ] 141 | 142 | # Load output logs 143 | output_log_paths = [file for file in output_dir.glob("**/*.csv")] 144 | 145 | output_logs = [ 146 | file 147 | for file in output_log_paths 148 | if (chain_name[chain_type] in file.name) 149 | ] 150 | 151 | # Perform comparison 152 | comparison_start = { 153 | "Decombinator_Summary.csv": 8, 154 | "Collapsing_Summary.csv": 9, 155 | "Translation_Summary.csv": 7, 156 | } 157 | for reference_log, output_log in zip( 158 | sorted(reference_logs), sorted(output_logs) 159 | ): 160 | 161 | with open(reference_log, "r") as f: 162 | comparison_label = "_".join(reference_log.name.split("_")[-2:]) 163 | print(comparison_label) 164 | reference_log_lines = f.readlines()[ 165 | comparison_start[comparison_label] 166 | ] 167 | 168 | with open(output_log, "r") as f: 169 | comparison_label = "_".join(output_log.name.split("_")[-2:]) 170 | print(comparison_label) 171 | output_log_lines = f.readlines()[comparison_start[comparison_label]] 172 | 173 | assert output_log_lines == reference_log_lines 174 | -------------------------------------------------------------------------------- /tests/test_collapse.py: -------------------------------------------------------------------------------- 1 | import collections as coll 2 | import pathlib 3 | 4 | import pytest 5 | 6 | from decombinator import collapse 7 | 8 | 9 | class TestClusterUMIs: 10 | def test_no_umis(self): 11 | with pytest.raises(ValueError): 12 | collapse.cluster_UMIs(coll.defaultdict(list), {}, 0, 0, False) 13 | 14 | @pytest.fixture 15 | def barcode_dcretc_list(self): 16 | return { 17 | "AAAA|0|AAAA": ["AAAA"], 18 | "GGGG|0|GGGG": ["GGGG"], 19 | "AAAG|0|AAAG": ["AAAG"], 20 | "AAAA|1|GGGG": ["GGGG"], 21 | } 22 | 23 | def test_create_cluster_structures(self, barcode_dcretc_list): 24 | num_initial_groups, barcode_dcretc_list, umi_protoseq_tuple = ( 25 | collapse.create_clustering_objs(barcode_dcretc_list) 26 | ) 27 | 28 | assert num_initial_groups == 4 29 | assert barcode_dcretc_list == [ 30 | ("AAAA|0|AAAA", ["AAAA"]), 31 | ("GGGG|0|GGGG", ["GGGG"]), 32 | ("AAAG|0|AAAG", ["AAAG"]), 33 | ("AAAA|1|GGGG", ["GGGG"]), 34 | ] 35 | assert umi_protoseq_tuple == [ 36 | ("AAAA", "AAAA"), 37 | ("GGGG", "GGGG"), 38 | ("AAAG", "AAAG"), 39 | ("AAAA", "GGGG"), 40 | ] 41 | 42 | def test_merge_order(self, barcode_dcretc_list): 43 | clusters = collapse.cluster_UMIs( 44 | barcode_dcretc_list, {"writeclusters": False}, 2, 25, True 45 | ) 46 | 47 | assert clusters == { 48 | "AAAA|0|AAAA": ["AAAA", "AAAG"], 49 | "GGGG|0|GGGG": ["GGGG"], 50 | "AAAA|1|GGGG": ["GGGG"], 51 | } 52 | 53 | 54 | class TestGetBarcodePositions: 55 | 56 | @pytest.fixture 57 | def counter(self): 58 | return coll.Counter() 59 | 60 | def test_m13(self, counter): 61 | m13 = "GTCGTGACTGGGAAAACCCTGG" 62 | i8 = "GTCGTGAT" 63 | bcseq = "GTCGTGACTGGGAAAACCCTGGTTTCCGGTCGTGATAAAGTG" 64 | inputargs = { 65 | "oligo": "m13", 66 | "allowNs": False, 67 | } 68 | 69 | assert collapse.get_barcode_positions(bcseq, inputargs, counter) == [ 70 | len(m13), 71 | len(m13) + 6, 72 | len(m13) + 6 + len(i8), 73 | len(m13) + 6 + len(i8) + 6, 74 | ] 75 | 76 | def test_i8(self, counter): 77 | i8 = "GTCGTGAT" 78 | bcseq = "GTCGTGATTTTCCGGTCGTGATAAAGTG" 79 | inputargs = { 80 | "oligo": "i8", 81 | "allowNs": False, 82 | } 83 | 84 | assert collapse.get_barcode_positions(bcseq, inputargs, counter) == [ 85 | len(i8), 86 | len(i8) + 6, 87 | len(i8) + 6 + len(i8), 88 | len(i8) + 6 + len(i8) + 6, 89 | ] 90 | 91 | def test_i8_single(self, counter): 92 | i8 = "ATCACGAC" 93 | bcseq = "GAAGCTATCACGACATCACTAC" 94 | inputargs = { 95 | "oligo": "i8_single", 96 | "allowNs": False, 97 | } 98 | 99 | assert collapse.get_barcode_positions(bcseq, inputargs, counter) == [ 100 | 0, 101 | 6, 102 | 6 + len(i8), 103 | 6 + len(i8) + 6, 104 | ] 105 | 106 | def test_nebio(self, counter): 107 | bcseq = "CGGGCTTGGTATCGGCCGATCTACGGG" 108 | inputargs = { 109 | "oligo": "nebio", 110 | "allowNs": False, 111 | "bclength": 18, 112 | } 113 | 114 | assert collapse.get_barcode_positions(bcseq, inputargs, counter) == [ 115 | 0, 116 | 18, 117 | ] 118 | 119 | 120 | class TestFindFirstSpacer: 121 | 122 | def test_m13(self): 123 | oligo = { 124 | "spcr1": "GTCGTGACTGGGAAAACCCTGG", 125 | } 126 | seq = "GTCGTGACTGGGAAAACCCTGGTTTCCGGTCGTGATAAAGTG" 127 | oligo_start = 0 128 | allowance = 10 129 | oligo_end = allowance + len(oligo["spcr1"]) 130 | 131 | assert collapse.findFirstSpacer( 132 | oligo, seq, oligo_start, oligo_end 133 | ) == [oligo["spcr1"]] 134 | 135 | def test_i8(self): 136 | oligo = { 137 | "spcr1": "GTCGTGAT", 138 | } 139 | seq = "GTCGTGATTTTCCGGTCGTGATAAAGTG" 140 | oligo_start = 0 141 | allowance = 10 142 | oligo_end = allowance + len(oligo["spcr1"]) 143 | 144 | assert collapse.findFirstSpacer( 145 | oligo, seq, oligo_start, oligo_end 146 | ) == [oligo["spcr1"]] 147 | 148 | def test_i8_single(self): 149 | oligo = { 150 | "spcr1": "ATCACGAC", 151 | } 152 | seq = "GAAGCTATCACGACATCACTAC" 153 | oligo_start = 0 154 | allowance = 10 155 | oligo_end = allowance + len(oligo["spcr1"]) 156 | 157 | assert collapse.findFirstSpacer( 158 | oligo, seq, oligo_start, oligo_end 159 | ) == [oligo["spcr1"]] 160 | 161 | def test_nebio(self): 162 | oligo = { 163 | "spcr1": "TACGGG", 164 | } 165 | seq = "CGGGCTTGGTATCGGCCGATCTACGGG" 166 | oligo_start = 18 167 | oligo_end = oligo_start + 10 168 | 169 | assert collapse.findFirstSpacer( 170 | oligo, seq, oligo_start, oligo_end 171 | ) == [oligo["spcr1"]] 172 | 173 | 174 | class TestReadInData: 175 | 176 | collapse.counts = coll.Counter() 177 | 178 | @pytest.fixture 179 | def blank_input(self): 180 | return [] 181 | 182 | @pytest.fixture 183 | def pipe_args(self): 184 | return {"command": "pipeline"} 185 | 186 | def test_no_dcr(self, blank_input, pipe_args): 187 | with pytest.raises(ValueError): 188 | collapse.read_in_data( 189 | blank_input, pipe_args, None, None, None, None 190 | ) 191 | 192 | 193 | class TestCheckDcrFile: 194 | 195 | @pytest.fixture(scope="class") 196 | def output_dir( 197 | self, tmp_path_factory: pytest.TempPathFactory 198 | ) -> pathlib.Path: 199 | output_dir = tmp_path_factory.mktemp("output") 200 | return output_dir 201 | 202 | @pytest.fixture 203 | def empty_filepath(self, output_dir: pathlib.Path) -> pathlib.Path: 204 | return output_dir / "empty.n12" 205 | 206 | @pytest.fixture 207 | def empty_file(self, empty_filepath: pathlib.Path) -> None: 208 | output = "" 209 | empty_filepath.write_text(output) 210 | 211 | def test_empty_n12(self, empty_file, empty_filepath: pathlib.Path) -> None: 212 | with pytest.raises(ValueError): 213 | collapse.check_dcr_file(empty_filepath, open) 214 | -------------------------------------------------------------------------------- /tests/test_decombine.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import time 3 | from typing import Any 4 | 5 | import pytest 6 | 7 | from decombinator import decombine 8 | 9 | 10 | class TestEmptyFq: 11 | """ 12 | Testing that empty FASTQ is handled gracefully with logging. 13 | """ 14 | 15 | @pytest.fixture(scope="class") 16 | def output_dir( 17 | self, tmp_path_factory: pytest.TempPathFactory 18 | ) -> pathlib.Path: 19 | output_dir = tmp_path_factory.mktemp("output") 20 | return output_dir 21 | 22 | @pytest.fixture 23 | def empty_filepath(self, output_dir: pathlib.Path) -> pathlib.Path: 24 | return output_dir / "empty_merge.fq" 25 | 26 | @pytest.fixture 27 | def empty_file(self, empty_filepath: pathlib.Path) -> None: 28 | output = "" 29 | empty_filepath.write_text(output) 30 | 31 | @pytest.fixture 32 | def pipe_args( 33 | self, output_dir: pathlib.Path, empty_filepath: pathlib.Path 34 | ) -> dict[str, Any]: 35 | return { 36 | "command": "pipeline", 37 | "infile": str(empty_filepath.resolve()), 38 | "dontcheck": False, 39 | "chain": "a", 40 | "outpath": str(output_dir.resolve()) + "/", 41 | "suppresssummary": False, 42 | "tags": "extended", 43 | "species": "human", 44 | "tagfastadir": "tests/resources/Decombinator-Tags-FASTAs", 45 | } 46 | 47 | @pytest.fixture 48 | def test_empty_fq( 49 | self, empty_file: None, pipe_args: dict[str, Any] 50 | ) -> None: 51 | with pytest.raises(ValueError): 52 | decombine.decombinator(pipe_args) 53 | 54 | @pytest.fixture 55 | def expected_log(self) -> str: 56 | return "OutputFile," + "empty_alpha" + "\nNumberReadsInput," + "0\n" 57 | 58 | @pytest.fixture 59 | def test_empty_log( 60 | self, output_dir: pathlib.Path, test_empty_fq: None, expected_log: str 61 | ) -> None: 62 | date = time.strftime("%Y_%m_%d") 63 | logfile = ( 64 | output_dir 65 | / "Logs" 66 | / f"{date}_alpha_empty_merge_Decombinator_Summary.csv" 67 | ) 68 | with logfile.open() as log: 69 | assert log.read() == expected_log 70 | 71 | @pytest.fixture 72 | def test_empty_fq_repeat( 73 | self, pipe_args: dict[str, Any], test_empty_log: None 74 | ) -> None: 75 | with pytest.raises(ValueError): 76 | decombine.decombinator(pipe_args) 77 | 78 | # TODO: Test this logic independently. Due to logging design in Decombinator 79 | # at present this must be tested in the empty fq case specifically 80 | def test_2nd_log( 81 | self, 82 | output_dir: pathlib.Path, 83 | test_empty_fq: None, 84 | expected_log: str, 85 | test_empty_fq_repeat: None, 86 | ) -> None: 87 | date = time.strftime("%Y_%m_%d") 88 | logfile = ( 89 | output_dir 90 | / "Logs" 91 | / f"{date}_alpha_empty_merge_Decombinator_Summary2.csv" 92 | ) 93 | with logfile.open() as log: 94 | assert log.read() == expected_log 95 | -------------------------------------------------------------------------------- /tests/test_pipeline.py: -------------------------------------------------------------------------------- 1 | from Bio import BiopythonWarning 2 | from decombinator import pipeline, io 3 | import pytest 4 | import pathlib 5 | import os 6 | 7 | pytestmark = pytest.mark.usefixtures("resource_location", "chain_name") 8 | 9 | 10 | @pytest.fixture(params=["a", "b"], scope="module") 11 | def chain_type(request: pytest.FixtureRequest) -> str: 12 | return request.param 13 | 14 | 15 | @pytest.fixture(scope="module") 16 | def output_dir(tmp_path_factory: pytest.TempPathFactory) -> pathlib.Path: 17 | # Create a temporary output directory 18 | output_dir = tmp_path_factory.mktemp("output") 19 | return output_dir 20 | 21 | 22 | @pytest.fixture(scope="module") 23 | def race_pipeline( 24 | output_dir: pathlib.Path, resource_location: pathlib.Path, chain_type: str 25 | ) -> dict: 26 | filename: str = "TINY_1.fq" 27 | args = io.create_args_dict( 28 | infile=str((resource_location / filename).resolve()), 29 | chain=chain_type, 30 | bc_read="R2", 31 | dontgzip=True, 32 | outpath=f"{output_dir}{os.sep}", 33 | tagfastadir="tests/resources/Decombinator-Tags-FASTAs", 34 | ) 35 | print(f"running with {chain_type} chain") 36 | pipeline.run(args) 37 | 38 | 39 | def test_tsv_output( 40 | race_pipeline: None, 41 | output_dir: pathlib.Path, 42 | resource_location: pathlib.Path, 43 | chain_type: str, 44 | chain_name: dict, 45 | ) -> None: 46 | 47 | # Load reference tsv 48 | reference_file = f"dcr_TINY_1_{chain_name[chain_type]}.tsv" 49 | reference_path = resource_location / reference_file 50 | with open(reference_path, "r") as f: 51 | reference_data = f.read() 52 | 53 | # Load output file generated by pipeline.run 54 | output_file = f"dcr_TINY_1_{chain_name[chain_type]}.tsv" 55 | output_path = output_dir / output_file 56 | with open(output_path, "r") as f: 57 | output_data = f.read() 58 | 59 | # Perform comparison 60 | assert output_data == reference_data, "Output does not match reference data" 61 | 62 | 63 | def test_n12_output( 64 | race_pipeline: None, 65 | output_dir: pathlib.Path, 66 | resource_location: pathlib.Path, 67 | chain_type: str, 68 | chain_name: dict, 69 | ) -> None: 70 | 71 | # Load reference n12 72 | reference_file = f"dcr_TINY_1_{chain_name[chain_type]}.n12" 73 | reference_path = resource_location / reference_file 74 | with open(reference_path, "r") as f: 75 | reference_data = f.read() 76 | 77 | # Load output file generated by pipeline.run 78 | output_file = f"dcr_TINY_1_{chain_name[chain_type]}.n12" 79 | output_path = output_dir / output_file 80 | with open(output_path, "r") as f: 81 | output_data = f.read() 82 | 83 | # Perform comparison 84 | assert output_data == reference_data, "Output does not match reference data" 85 | 86 | 87 | def test_freq_output( 88 | race_pipeline: None, 89 | output_dir: pathlib.Path, 90 | resource_location: pathlib.Path, 91 | chain_type: str, 92 | chain_name: dict, 93 | ) -> None: 94 | 95 | # Load reference freq 96 | reference_file = f"dcr_TINY_1_{chain_name[chain_type]}.freq" 97 | reference_path = resource_location / reference_file 98 | with open(reference_path, "r") as f: 99 | reference_data = f.read() 100 | 101 | # Load output file generated by pipeline.run 102 | output_file = f"dcr_TINY_1_{chain_name[chain_type]}.freq" 103 | output_path = output_dir / output_file 104 | with open(output_path, "r") as f: 105 | output_data = f.read() 106 | 107 | # Perform comparison 108 | assert output_data == reference_data, "Output does not match reference data" 109 | 110 | 111 | def test_log_output( 112 | race_pipeline: None, 113 | output_dir: pathlib.Path, 114 | resource_location: pathlib.Path, 115 | chain_type: str, 116 | chain_name: dict, 117 | ) -> None: 118 | 119 | # Load reference logs 120 | reference_log_paths = [file for file in resource_location.glob("*")] 121 | 122 | reference_logs = [ 123 | file 124 | for file in reference_log_paths 125 | if (chain_name[chain_type] in file.name) 126 | ] 127 | 128 | # Load output logs 129 | output_log_paths = [file for file in output_dir.glob("**/*.csv")] 130 | 131 | output_logs = [ 132 | file 133 | for file in output_log_paths 134 | if (chain_name[chain_type] in file.name) 135 | ] 136 | 137 | # Perform comparison 138 | comparison_start = { 139 | "Decombinator_Summary.csv": 8, 140 | "Collapsing_Summary.csv": 9, 141 | "Translation_Summary.csv": 7, 142 | } 143 | for reference_log, output_log in zip( 144 | sorted(reference_logs), sorted(output_logs) 145 | ): 146 | 147 | with open(reference_log, "r") as f: 148 | comparison_label = "_".join(reference_log.name.split("_")[-2:]) 149 | print(comparison_label) 150 | reference_log_lines = f.readlines()[ 151 | comparison_start[comparison_label] 152 | ] 153 | 154 | with open(output_log, "r") as f: 155 | comparison_label = "_".join(output_log.name.split("_")[-2:]) 156 | print(comparison_label) 157 | output_log_lines = f.readlines()[comparison_start[comparison_label]] 158 | 159 | assert output_log_lines == reference_log_lines 160 | -------------------------------------------------------------------------------- /tests/test_subparsers.py: -------------------------------------------------------------------------------- 1 | from decombinator import pipeline, io 2 | import pytest 3 | import pathlib 4 | import os 5 | import subprocess 6 | 7 | pytestmark = pytest.mark.usefixtures("resource_location", "chain_name") 8 | 9 | 10 | @pytest.fixture(params=["a", "b"], scope="module") 11 | def chain_type(request: pytest.FixtureRequest) -> str: 12 | return request.param 13 | 14 | 15 | @pytest.fixture(scope="module") 16 | def output_dir(tmp_path_factory: pytest.TempPathFactory) -> pathlib.Path: 17 | # Create a temporary output directory 18 | output_dir = tmp_path_factory.mktemp("output") 19 | return output_dir 20 | 21 | 22 | @pytest.fixture(scope="module") 23 | def cli_decombine( 24 | output_dir: pathlib.Path, resource_location: pathlib.Path, chain_type: str 25 | ) -> None: 26 | filename: str = "TINY_1.fq" 27 | process = subprocess.run( 28 | [ 29 | "./decombinator-runner.py", 30 | "decombine", 31 | "-in", 32 | str((resource_location / filename).resolve()), 33 | "-br", 34 | "R2", 35 | "-bl", 36 | "42", 37 | "-c", 38 | chain_type, 39 | "-op", 40 | f"{output_dir}{os.sep}", 41 | "-tfdir", 42 | "tests/resources/Decombinator-Tags-FASTAs", 43 | "-dz", 44 | ] 45 | ) 46 | print(f"running with {chain_type} chain") 47 | print(process.stdout) 48 | 49 | 50 | @pytest.fixture(scope="module") 51 | def cli_collapse( 52 | output_dir: pathlib.Path, 53 | resource_location: pathlib.Path, 54 | chain_name: dict, 55 | chain_type: str, 56 | ) -> None: 57 | filename: str = "dcr_TINY_1_" + chain_name[chain_type] + ".n12" 58 | process = subprocess.run( 59 | [ 60 | "./decombinator-runner.py", 61 | "collapse", 62 | "-in", 63 | str((resource_location / filename).resolve()), 64 | "-ol", 65 | "M13", 66 | "-c", 67 | chain_type, 68 | "-op", 69 | f"{output_dir}{os.sep}", 70 | "-dz", 71 | ] 72 | ) 73 | print(f"running with {chain_type} chain") 74 | print(process.stdout) 75 | 76 | 77 | @pytest.fixture(scope="module") 78 | def cli_translate( 79 | output_dir: pathlib.Path, 80 | resource_location: pathlib.Path, 81 | chain_name: dict, 82 | chain_type: str, 83 | ) -> None: 84 | filename: str = "dcr_TINY_1_" + chain_name[chain_type] + ".freq" 85 | process = subprocess.run( 86 | [ 87 | "./decombinator-runner.py", 88 | "translate", 89 | "-in", 90 | str((resource_location / filename).resolve()), 91 | "-c", 92 | chain_type, 93 | "-op", 94 | f"{output_dir}{os.sep}", 95 | "-dz", 96 | "-tfdir", 97 | "tests/resources/Decombinator-Tags-FASTAs", 98 | ] 99 | ) 100 | print(f"running with {chain_type} chain") 101 | print(process.stdout) 102 | 103 | 104 | def test_n12_output( 105 | cli_decombine: None, 106 | output_dir: pathlib.Path, 107 | resource_location: pathlib.Path, 108 | chain_type: str, 109 | chain_name: dict, 110 | ) -> None: 111 | 112 | # Load reference n12 113 | reference_file = f"dcr_TINY_1_{chain_name[chain_type]}.n12" 114 | reference_path = resource_location / reference_file 115 | with open(reference_path, "r") as f: 116 | reference_data = f.read() 117 | 118 | # Load output file generated by pipeline.run 119 | output_file = f"dcr_TINY_1_{chain_name[chain_type]}.n12" 120 | output_path = output_dir / output_file 121 | with open(output_path, "r") as f: 122 | output_data = f.read() 123 | 124 | # Perform comparison 125 | assert output_data == reference_data, "Output does not match reference data" 126 | 127 | 128 | def test_freq_output( 129 | cli_collapse: None, 130 | output_dir: pathlib.Path, 131 | resource_location: pathlib.Path, 132 | chain_type: str, 133 | chain_name: dict, 134 | ) -> None: 135 | 136 | # Load reference freq 137 | reference_file = f"dcr_TINY_1_{chain_name[chain_type]}.freq" 138 | reference_path = resource_location / reference_file 139 | with open(reference_path, "r") as f: 140 | reference_data = f.read() 141 | 142 | # Load output file generated by pipeline.run 143 | output_file = f"dcr_TINY_1_{chain_name[chain_type]}.freq" 144 | output_path = output_dir / output_file 145 | with open(output_path, "r") as f: 146 | output_data = f.read() 147 | 148 | # Perform comparison 149 | assert output_data == reference_data, "Output does not match reference data" 150 | 151 | 152 | # @pytest.mark.filterwarnings("ignore::Bio.BiopythonWarning") 153 | def test_tsv_output( 154 | cli_translate: None, 155 | output_dir: pathlib.Path, 156 | resource_location: pathlib.Path, 157 | chain_type: str, 158 | chain_name: dict, 159 | ) -> None: 160 | 161 | # Load reference tsv 162 | reference_file = f"dcr_TINY_1_{chain_name[chain_type]}.tsv" 163 | reference_path = resource_location / reference_file 164 | with open(reference_path, "r") as f: 165 | reference_data = f.read() 166 | 167 | # Load output file generated by pipeline.run 168 | output_file = f"dcr_TINY_1_{chain_name[chain_type]}.tsv" 169 | output_path = output_dir / output_file 170 | with open(output_path, "r") as f: 171 | output_data = f.read() 172 | 173 | # Perform comparison 174 | print(output_data) 175 | print(reference_data) 176 | assert output_data == reference_data, "Output does not match reference data" 177 | 178 | 179 | def test_log_output( 180 | output_dir: pathlib.Path, 181 | resource_location: pathlib.Path, 182 | chain_type: str, 183 | chain_name: dict, 184 | ) -> None: 185 | 186 | # Load reference logs 187 | reference_log_paths = [file for file in resource_location.glob("*")] 188 | 189 | reference_logs = [ 190 | file 191 | for file in reference_log_paths 192 | if (chain_name[chain_type] in file.name) 193 | ] 194 | 195 | # Load output logs 196 | output_log_paths = [file for file in output_dir.glob("**/*.csv")] 197 | 198 | output_logs = [ 199 | file 200 | for file in output_log_paths 201 | if (chain_name[chain_type] in file.name) 202 | ] 203 | 204 | # Perform comparison 205 | comparison_start = { 206 | "Decombinator_Summary.csv": 8, 207 | "Collapsing_Summary.csv": 9, 208 | "Translation_Summary.csv": 7, 209 | } 210 | for reference_log, output_log in zip( 211 | sorted(reference_logs), sorted(output_logs) 212 | ): 213 | 214 | with open(reference_log, "r") as f: 215 | comparison_label = "_".join(reference_log.name.split("_")[-2:]) 216 | print(comparison_label) 217 | reference_log_lines = f.readlines()[ 218 | comparison_start[comparison_label] 219 | ] 220 | 221 | with open(output_log, "r") as f: 222 | comparison_label = "_".join(output_log.name.split("_")[-2:]) 223 | print(comparison_label) 224 | output_log_lines = f.readlines()[comparison_start[comparison_label]] 225 | 226 | assert output_log_lines == reference_log_lines 227 | --------------------------------------------------------------------------------