├── confindr_src ├── __init__.py ├── wrappers │ ├── __init__.py │ └── mash.py ├── create_genus_specific_db.py ├── confindr.py └── database_setup.py ├── tests ├── fake_fastqs │ ├── test_1.fastq.gz │ ├── test_2.fastq.gz │ ├── test_R1.fastq.gz │ ├── test_R2.fastq.gz │ └── test_alone.fastq.gz ├── rmlst.fasta.fai └── real_fastqs │ ├── hiseq_precasava_R1.fastq │ ├── hiseq_precasava_R2.fastq │ ├── hiseq_precasava_multilane_R1.fastq │ ├── hiseq_precasava_multilane_R2.fastq │ ├── hiseq_precasava_sra_R1.fastq │ ├── hiseq_precasava_sra_R2.fastq │ ├── miseq_casava_R2.fastq │ ├── miseq_casava_multilane_R2.fastq │ ├── miseq_casava_R1.fastq │ ├── miseq_casava_multilane_R1.fastq │ ├── miseq_casava_sra_R2.fastq │ └── miseq_casava_sra_R1.fastq ├── requirements.txt ├── docs ├── performance.png ├── index.md └── install.md ├── refseq_sketch └── refseq.msh ├── .gitattributes ├── mkdocs.yml ├── setup.py ├── .travis.yml ├── LICENSE ├── .circleci └── config.yml ├── .gitignore ├── CHANGELOG.md └── README.md /confindr_src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /confindr_src/wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/fake_fastqs/test_1.fastq.gz: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/fake_fastqs/test_2.fastq.gz: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/fake_fastqs/test_R1.fastq.gz: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/fake_fastqs/test_R2.fastq.gz: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/fake_fastqs/test_alone.fastq.gz: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | biopython==1.81 2 | pysam==0.21.0 3 | pytest==7.3.1 4 | numpy==1.24.3 -------------------------------------------------------------------------------- /docs/performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OLC-Bioinformatics/ConFindr/HEAD/docs/performance.png -------------------------------------------------------------------------------- /refseq_sketch/refseq.msh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OLC-Bioinformatics/ConFindr/HEAD/refseq_sketch/refseq.msh -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | databases/rMLST_combined.fasta filter=lfs diff=lfs merge=lfs -text 2 | databases/RefSeqSketchesDefaults.msh filter=lfs diff=lfs merge=lfs -text 3 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: ConFindr 2 | site_url: https://olc-bioinformatics.github.io/ConFindr 3 | site_description: ConFindr Pipeline for detecting intra-species contamination in raw bacterial NGS reads. 4 | repo_url: https://github.com/OLC-Bioinformatics/ConFindr 5 | theme: readthedocs 6 | 7 | nav: 8 | - Home: 'index.md' 9 | - Installation: 'install.md' 10 | - Usage: 'usage.md' 11 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup, find_packages 4 | 5 | setup( 6 | name="confindr", 7 | version="0.8.2", 8 | packages=find_packages(), 9 | entry_points={ 10 | 'console_scripts': [ 11 | 'confindr.py = confindr_src.confindr:main', 12 | 'confindr = confindr_src.confindr:main', 13 | 'confindr_database_setup = confindr_src.database_setup:main', 14 | 'confindr_create_db = confindr_src.create_genus_specific_db:main' 15 | ], 16 | }, 17 | author="Adam Koziol", 18 | author_email="adam.koziol@inspection.gc.ca", 19 | url="https://github.com/OLC-Bioinformatics/ConFindr", 20 | install_requires=['biopython', 21 | 'pysam', 22 | 'pytest', 23 | 'numpy', 24 | 'rauth'] 25 | ) 26 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - '3.6' 4 | - '3.7' 5 | - '3.8' 6 | cache: pip 7 | 8 | install: 9 | - wget -O confindr_integration.tar.gz https://ndownloader.figshare.com/files/14773226 && tar xf confindr_integration.tar.gz 10 | - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; 11 | - bash miniconda.sh -b -p $HOME/miniconda 12 | - export PATH="$HOME/miniconda/bin:$PATH" 13 | - hash -r 14 | - conda config --set always_yes yes --set changeps1 no 15 | - conda update -q conda 16 | - conda info -a 17 | - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION -c conda-forge 18 | - source activate test-environment 19 | - conda install -c bioconda -c conda-forge bbmap biopython kma==1.2.0 mash minimap2 pluggy pysam pytest rauth samtools 20 | - pip install -e . 21 | branches: 22 | only: 23 | - master 24 | 25 | script: 26 | - source activate test-environment 27 | - travis_wait 30 pytest tests/test_confindr.py -s -vv 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2020 Government of Canada 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | 3 | # orbs: 4 | # python: circleci/python@1.5.0 5 | 6 | jobs: 7 | confindr: 8 | docker: 9 | - image: ubuntu:22.04 10 | steps: 11 | - checkout 12 | - run: 13 | name: Setup Environment and Run Tests 14 | command: | 15 | apt update 16 | apt install -y wget coreutils curl gnupg 17 | wget -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" 18 | bash Miniforge3.sh -b -p "${HOME}/conda" 19 | source "${HOME}/conda/etc/profile.d/conda.sh" 20 | source "${HOME}/conda/etc/profile.d/mamba.sh" 21 | conda config --add channels bioconda 22 | conda config --set always_yes yes --set changeps1 no 23 | conda activate 24 | mamba create -n confindr bioconda::confindr=0.8.2 25 | source activate confindr 26 | wget https://figshare.com/ndownloader/files/41228577 -O test_samples.tar.gz && \ 27 | tar -xzvf test_samples.tar.gz && \ 28 | mv test_samples/ tests/ && \ 29 | rm test_samples.tar.gz 30 | python -m pytest tests/ -vvv 31 | workflows: 32 | build_and_test: 33 | jobs: 34 | - confindr: 35 | filters: 36 | branches: 37 | ignore: gh-pages -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Python Files 2 | *.pyc 3 | 4 | # Docs 5 | site/ 6 | 7 | # Access keys 8 | get/ 9 | 10 | .idea/ 11 | 12 | # Edited text files 13 | *~ 14 | 15 | # Byte-compiled / optimized / DLL files 16 | __pycache__/ 17 | *.py[cod] 18 | *$py.class 19 | 20 | # C extensions 21 | *.so 22 | 23 | # Distribution / packaging 24 | .Python 25 | env/ 26 | build/ 27 | develop-eggs/ 28 | dist/ 29 | downloads/ 30 | eggs/ 31 | .eggs/ 32 | lib/ 33 | lib64/ 34 | parts/ 35 | sdist/ 36 | var/ 37 | *.egg-info/ 38 | .installed.cfg 39 | *.egg 40 | 41 | # PyInstaller 42 | # Usually these files are written by a python script from a template 43 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 44 | *.manifest 45 | *.spec 46 | 47 | # Installer logs 48 | pip-log.txt 49 | pip-delete-this-directory.txt 50 | 51 | # Unit test / coverage reports 52 | htmlcov/ 53 | .tox/ 54 | .coverage 55 | .coverage.* 56 | .cache 57 | nosetests.xml 58 | coverage.xml 59 | *,cover 60 | .hypothesis/ 61 | 62 | # Translations 63 | *.mo 64 | *.pot 65 | 66 | # Django stuff: 67 | *.log 68 | local_settings.py 69 | 70 | # Flask stuff: 71 | instance/ 72 | .webassets-cache 73 | 74 | # Scrapy stuff: 75 | .scrapy 76 | 77 | # Sphinx documentation 78 | docs/_build/ 79 | 80 | # PyBuilder 81 | target/ 82 | 83 | # IPython Notebook 84 | .ipynb_checkpoints 85 | 86 | # pyenv 87 | .python-version 88 | 89 | # celery beat schedule file 90 | celerybeat-schedule 91 | 92 | # dotenv 93 | .env 94 | 95 | # virtualenv 96 | venv/ 97 | ENV/ 98 | 99 | # Spyder project settings 100 | .spyderproject 101 | 102 | # pypi distributions 103 | dist/ 104 | 105 | # integration test files 106 | confindr_integration* 107 | 108 | # Rope project settings 109 | .ropeproject 110 | -------------------------------------------------------------------------------- /tests/rmlst.fasta.fai: -------------------------------------------------------------------------------- 1 | BACT000001_30 1674 15 1674 1675 2 | BACT000002_25 726 1705 726 727 3 | BACT000003_16 702 2447 702 703 4 | BACT000004_18 621 3165 621 622 5 | BACT000005_28 504 3802 504 505 6 | BACT000006_23 396 4322 396 397 7 | BACT000007_18 471 4734 471 472 8 | BACT000008_15 393 5221 393 394 9 | BACT000009_24 393 5630 393 394 10 | BACT000010_24 312 6039 312 313 11 | BACT000011_13 390 6367 390 391 12 | BACT000012_20 375 6773 375 376 13 | BACT000013_13 357 7164 357 358 14 | BACT000014_12 306 7537 306 307 15 | BACT000015_14 270 7859 270 271 16 | BACT000016_1279 249 8147 249 250 17 | BACT000017_6 255 8411 255 256 18 | BACT000018_16 228 8682 228 229 19 | BACT000019_12 279 8926 279 280 20 | BACT000020_32 264 9221 264 265 21 | BACT000021_15 216 9501 216 217 22 | BACT000030_22 705 9733 705 706 23 | BACT000031_28 822 10454 822 823 24 | BACT000032_26 630 11292 630 631 25 | BACT000033_28 606 11938 606 607 26 | BACT000034_19 540 12560 540 541 27 | BACT000035_18 534 13116 534 535 28 | BACT000036_19 366 13666 366 367 29 | BACT000038_35 450 14048 450 451 30 | BACT000039_15 498 14514 498 499 31 | BACT000040_25 429 15028 429 430 32 | BACT000042_20 429 15473 429 430 33 | BACT000043_17 372 15918 372 373 34 | BACT000044_18 435 16306 435 436 35 | BACT000045_20 411 16757 411 412 36 | BACT000046_17 384 17184 384 385 37 | BACT000047_14 354 17584 354 355 38 | BACT000048_22 348 17954 348 349 39 | BACT000049_31 357 18318 357 358 40 | BACT000050_14 312 18691 312 313 41 | BACT000051_13 333 19019 333 334 42 | BACT000052_21 303 19368 303 304 43 | BACT000053_23 315 19687 315 316 44 | BACT000056_31 258 20018 258 259 45 | BACT000057_26 237 20292 237 238 46 | BACT000058_9 192 20544 192 193 47 | BACT000059_14 180 20752 180 181 48 | BACT000061_18 174 20948 174 175 49 | BACT000062_17 168 21138 168 169 50 | BACT000063_20 141 21322 141 142 51 | BACT000064_13 198 21479 198 199 52 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 0.8.2 - 2025-02-07 4 | 5 | This is a minor release. The most important changes include updates to URL schemes, improvements in handling temporary directories, and enhancements in read name processing. 6 | 7 | ### Added 8 | 9 | - Added checks to ensure temporary directories exist before attempting to remove them in `confindr_src/confindr.py` (#65). 10 | - Initialized dictionaries for summarizing base types with specific keys in `confindr_src/methods.py` to prevent key errors (#65). 11 | - Added Pytest unit tests and test FASTQ files for ensuring proper parsing of paired-end Illumina FASTQ headers (#70). 12 | 13 | ### Changed 14 | 15 | - Improved logic for using temporary directories during database creation in `confindr_src/methods.py` (#65). Fixes #57. 16 | - Updated the installation guide to reflect changes in the PubMLST API key generation process in `docs/install.md` (#62, #71). 17 | - Corrected the URL for the example dataset in `docs/usage.md` to point to the latest version. 18 | 19 | ### Fixed 20 | 21 | - Updated URLs from HTTP to HTTPS in the `__init__` method of `confindr_src/database_setup.py` to ensure secure connections and allow for successful downloads of databases (#69). Fixes #67. 22 | - Improved addition of read direction suffixes to read names in `confindr_src/methods.py` to handle cases where the suffix already exists (#70). Fixes #63 and resolves #54. 23 | 24 | ## 0.8.1 - 2023-05-19 25 | 26 | This is a major release. 27 | 28 | ### Added 29 | 30 | - Integrated congruency of SNVs within paired-end reads into contamination calculations (a1744a4). 31 | - Base cutoff values are now dynamically calculated based upon gene-specific quality score, length, and depth of coverage, with a starting cutoff of 3 which can be changed using `--base_cutoff` (880445d, 84b7d91, c06d438). 32 | - Option to download rMLST databases using `-u/--unverified` within the `confindr_database_setup` command, for downloading databases behind a firewall and/or have a self-signed certificate. 33 | 34 | ### Changed 35 | 36 | - Refactored code by moving methods to `methods.py` (a7e9af6). 37 | - Improved `README.md` and MkDocs documentation with increased accuracy and readability. 38 | - Pytest tests now use downsampled samples from the originally published ConFindr benchmarking dataset, and instructions for running these tests have been added to the MkDocs (cc27c94). 39 | - Enforced Phred33 encoding for `bbduk.sh` calls to support future development with Nanopore reads (#39) (ac3b976). 40 | 41 | ### Removed 42 | 43 | - Percentage contamination reporting; this was found to be unreliable and sometimes misleading (ec3ae7a). 44 | - `--cross_details` flag; analysis is now always continued after cross-genus contamination has been detected (ec3ae7a). 45 | 46 | ### Fixed 47 | 48 | - TypeError that occurred when using an older version of BioPython (#27, #30, #38, #41) (19d0d1d, 96e1c7d). 49 | - Error in `install.md` which suggested that rMLST databases are freely available to all users (a1ce7dc). 50 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![CircleCI](https://dl.circleci.com/status-badge/img/gh/OLC-Bioinformatics/ConFindr/tree/main.svg?style=svg)](https://dl.circleci.com/status-badge/redirect/gh/OLC-Bioinformatics/ConFindr/tree/main) 2 | [![PyPI version](https://badge.fury.io/py/confindr.svg)](https://badge.fury.io/py/confindr) 3 | [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat-square)](http://bioconda.github.io/recipes/confindr/README.html) 4 | 5 | 6 | # ConFindr 7 | 8 | This program is designed to find bacterial intra-species contamination in raw Illumina data. It does this 9 | by looking for multiple alleles of core, single copy genes. 10 | 11 | For **complete instructions on installation and usage**, please visit [the ConFindr GitHub Pages site](https://olc-bioinformatics.github.io/ConFindr/). 12 | 13 | ## Important Note 14 | 15 | ConFindr has only been validated using rMLST databases. **Please use them if possible** (`--rmlst`). Complete installation instructions can be found [here](https://olc-bioinformatics.github.io/ConFindr/install/#downloading-confindr-databases). 16 | 17 | ## Quickstart 18 | 19 | ### Installing ConFindr 20 | 21 | 1. Follow the instructions [here](https://bioconda.github.io/) to add the Bioconda channel to your list of conda channels, if it hasn't already been added. 22 | 23 | 2. Install ConFindr into a new conda environment named 'confindr': 24 | 25 | `conda create -n confindr -c bioconda confindr=0.8.2` 26 | 27 | 3. Activate the new conda environment: 28 | 29 | `conda activate confindr` 30 | 31 | ### Downloading and setting up the rMLST databases 32 | 33 | Instructions for downloading and setting up the rMLST databases can be found [here](https://olc-bioinformatics.github.io/ConFindr/install/#downloading-confindr-databases). 34 | 35 | ### Testing ConFindr 36 | 37 | 1. To obtain an example dataset, run the following command, which will create a folder named `test_samples` in your current working directory: 38 | 39 | ```bash 40 | wget https://figshare.com/ndownloader/files/41228577 -O test_samples.tar.gz && \ 41 | tar -xzvf test_samples.tar.gz && \ 42 | rm test_samples.tar.gz 43 | ``` 44 | 45 | 2. As of version `0.7.0` ConFindr can be run automatically on _Escherichia_, _Salmonella_, and _Listeria_ with no further 46 | work on your part, using core-gene databases (*experimental*). Simply run: 47 | 48 | `confindr -i test_samples -o test_out` 49 | 50 | 3. To use the *recommended* rMLST database (after installation): 51 | 52 | `confindr -i test_samples -o test_out --rmlst` 53 | 54 | The results for this analysis can be found within `test_out/confindr_report.csv`. 55 | More extensive tests can be performed by following the instructions [here](https://olc-bioinformatics.github.io/ConFindr/usage/#example-dataset). 56 | 57 | If you want to run ConFindr on genera other than the 3 listed above, you'll need to get access to and download the rMLST databases by following the instructions [here](https://olc-bioinformatics.github.io/ConFindr/install/#downloading-confindr-databases). 58 | 59 | ## Running ConFindr in a Python Script 60 | 61 | If you want to run ConFindr from within a script instead of running from the command line, here's how: 62 | 63 | ```python 64 | from confindr_src import confindr 65 | 66 | # Find read files. 67 | paired_reads = confindr.find_paired_reads('path_to_fastq_folder', forward_id='_R1', reverse_id='_R2') 68 | # Run confindr. This assumes that you have already downloaded the databases. If you haven't, 69 | # you can run confindr.check_for_databases_and_download(database_location='path/where/you/want/to/download, tmpdir='a/tmp/dir') 70 | for pair in paired_reads: 71 | confindr.find_contamination(pair=pair, 72 | forward_id='_R1', # change if yours is different 73 | threads=4, 74 | output_folder='path/to/output', 75 | databases_folder='path/to/databases') 76 | 77 | ``` 78 | 79 | ## Reporting Issues 80 | 81 | If you have any problems installing or running ConFindr, or have feature request, 82 | please open an issue here on GitHub. 83 | 84 | ## Citing ConFindr 85 | 86 | ConFindr has been published in PeerJ—if you use it in your work, please cite the following: 87 | 88 | ``` 89 | Low AJ, Koziol AG, Manninger PA, Blais B, Carrillo CD. 2019. ConFindr: rapid detection of intraspecies and cross-species contamination in bacterial whole-genome sequence data. PeerJ 7:e6995 https://doi.org/10.7717/peerj.6995 90 | ``` -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # What is ConFindr? 2 | 3 | ConFindr is a pipeline that can detect contamination in bacterial NGS data, both between and within species. It can do this with pretty darn good sensitivity - two samples mixed together with as few as 4 | 500 SNPs between them (> 99.9 percent identity!) can be identified. This allows for stringent quality control of NGS samples. 5 | 6 | ### How Does ConFindr Work? 7 | 8 | ConFindr works by looking at conserved core genes - either using rMLST genes (53 genes are known to be single copy and conserved across all bacteria with some known exceptions, which ConFindr handles), or 9 | custom sets of genes derived from core-genome schemes. As the genes ConFindr looks at are single copy, any sample that has multiple alleles of one or more gene is likely to be contaminated. 10 | To identify the presence of multiple alleles in a sample, the following workflow is followed: 11 | 12 | 1. Use Mash to determine the genus of each sample so that genus-specific databases can be constructed 13 | and check for interspecies contamination. 14 | 2. Perform stringent quality trimming and bait out reads that contain rMLST gene sequence, using BBDuk. 15 | 3. Align reads back to the rMLST genes. 16 | 4. Look at the alignment to find `Contaminating SNVs` - those that have sites where more than one base is present, indicating 17 | that multiple alleles are present, indicating contamination. 18 | 19 | ### Limits of Detection 20 | 21 | #### Intraspecies 22 | 23 | The limit of detection for ConFindr is going to be a function of several factors, so it's hard to give 24 | exact information on what can and can't be detected. However, here are a few guidelines: 25 | 26 | - Two strains with identical rMLST genes won't ever have contamination detected between them. 27 | - ConFindr has a cutoff of finding 3 `Contaminating SNVs` found for calling a sample as contaminated, as 28 | there will occasionally be 1 false positive, and very rarely 2 false positive sites. In practice, this still lets you detect 29 | contamination essentially whenever two strains have different rMLST types, as >99 percent of types have 30 | at least 3 SNPs between them in _E. coli_, _S. enterica_, and _L. monocytogenes_. I haven't looked at other 31 | species in detail, but this very likely holds true for them too. 32 | - The degree of contamination matters: things with very low levels of contamination (around 1 percent) probably 33 | won't ever be picked up under default parameters. Once you hit 5 percent contamination, whether or not it gets 34 | detected is a function of sequencing depth and distance between species. 35 | - Sequencing depth matters: `Contaminating SNVs` are found more reliably the more depth you have. ConFindr seems 36 | to work pretty well once you hit 40X or 50X, but trying to run it on a sample with only 10X coverage is unlikely to ever 37 | find anything (unless default parameters get changed) 38 | - Relatedness of the contaminant strain is the the final major factor - with something very closely related to the 39 | strain you actually want, picking up contamination at 5 percent will be pretty difficult. 40 | 41 | 42 | The below graph shows the magnitude of contamination detected for several synthetic datasets in *Escherichia coli*. 43 | Strains were mixed together that were either identical (and so should have no contamination), not identical but 44 | have the same rMLST type (and so are contaminated, but beyond the limit of detection), two strains with the same serotype, 45 | and therefore very closely related, or two strains of differing serotypes. 46 | 47 | The black line on the graph represents our cutoff for contaminated samples - any sample with a magnitude above that 48 | can reliably be called contaminated. As can be seen, two different serotypes from the same species are reliably detected 49 | at contamination levels of 5 percent or higher, two strains of the same serotype are often detected at 10 percent 50 | contamination and almost always at 20 percent contamination, and two strains that have the same rMLST or are identical 51 | never have contamination detected. Results should be very similar for other species. 52 | 53 | ![alt text](performance.png "ConFindr Performance") 54 | 55 | #### Interspecies 56 | 57 | ConFindr seems to reliably be able to detect interspecies contamination at levels of 5 percent or above, but its checks 58 | for interspecies contamination are not particularly rigorous. If you're very worried about interspecies contamination, 59 | it would be a good idea to put your samples through some sort of metagenomics software (such as Kraken or Kaiju). 60 | 61 | ### Feedback 62 | 63 | If you run into any issues installing or running ConFindr, have feature requests, want some help interpreting results, or 64 | anything else, feel free to open an issue on GitHub or send me an email at `adam.koziol@canada.ca`. 65 | -------------------------------------------------------------------------------- /docs/install.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | ## System Requirements 4 | 5 | ConFindr has been tested with Debian-based Linux systems, 6 | but should in principle work on any flavour of Linux, as well as MacOSX. 7 | Windows isn't supported, but it may very well be installable via bioconda. If you have any success running 8 | ConFindr on a Windows machine, let me know! 9 | 10 | ConFindr should run on any regular desktop/laptop with 8 GB or RAM or more. 11 | 12 | ## Downloading ConFindr Databases 13 | 14 | As of ConFindr 0.7.0, databases for detecting contamination in _Escherichia_, _Listeria_, and _Salmonella_ derived from 15 | core-gene schemes are freely available and will be automatically downloaded by ConFindr when it runs. If you only want 16 | to run ConFindr on these three genera, nothing further is necessary. If you want to run ConFindr on any other genera, keep 17 | reading on how to get access to the necessary databases. 18 | 19 | ConFindr uses the ribosomal multi-locus sequence typing (rMLST) scheme to detect contamination in genera other 20 | than the ones listed above. These databases are available free for academic use, but you will need to jump through a few 21 | hoops before you can get access to them due to an associated [licence agreement](https://pubmlst.org/rmlst/rMLST_licence.pdf). 22 | Non-academic use will require a commercial licence. 23 | 24 | Here are the steps to getting databases downloaded: 25 | 26 | 1. Register for a PubMLST account if you do not already have one. Link to register is [here](https://pubmlst.org/bigsdb). 27 | Click on `Register for a site-wide account.` 28 | 29 | 2. Login to your account at [https://pubmlst.org/bigsdb](https://pubmlst.org/bigsdb) and request access to `Ribosomal MLST genomes (pubmlst_rmlst_isolates)` and `Ribosomal MLST typing (pubmlst_rmlst_seqdef)` under 'Database registrations'. Additionally, create a PubMLST API key on the same page under 'API keys'. The generated client ID (consumer key) and client secret (consumer secret) will enable you to access the database programatically. 30 | 31 | 3. Once you've gotten your consumer key and consumer secret, put them into a text file 32 | with the key on the first line and the secret on the second. It should look something like the below 33 | snippet: 34 | 35 | ``` 36 | efKXmqp2D0EBlMBkZaGC2lPf 37 | F$M+fQ2AFFB2YBDfF9fpHF^qSWJdmmN%L4Fxf5Gur3 38 | ``` 39 | 40 | 4. Install ConFindr as shown in the next section. 41 | 42 | 5. With ConFindr installed, use the command `confindr_database_setup` to have ConFindr download the latest version 43 | of the rMLST databases. This script takes two arguments - a `-s` where you give the path to the text file containing your consumer 44 | key and secret, and a `-o` to specify where you want the sequences downloaded. Only the `-s` is mandatory. If your output 45 | directory is not specified, ConFindr will first search for an environmental variable called `CONFINDR_DB`, and if it can't 46 | find that it will automatically download to a folder called `.confindr_db` in your home directory. 47 | 48 | ## Installing Using Conda (Recommended) 49 | 50 | 1. Follow the instructions [here](https://bioconda.github.io/) to add the Bioconda channel to your list of conda channels, if it hasn't already been added. 51 | 52 | 2. Install ConFindr into a new conda environment named 'confindr': 53 | 54 | `conda create -n confindr -c bioconda confindr=0.8.2` 55 | 56 | 3. Activate the new conda environment: 57 | 58 | `conda activate confindr` 59 | 60 | Typing `confindr -h` into the command-line will show the help menu for the program. See the [Usage](usage.md) section for instructions on how to use ConFindr, including a ConFindr run on an example dataset. 61 | 62 | ## Manual Installation 63 | 64 | ### Executable 65 | 66 | ConFindr can also be installed using `pip`. Use of a virtual environment for ConFindr is highly recommended. To create a virtualenv: 67 | 68 | 1. Create an empty directory (i.e. `mkdir ~/Virtual_Environments/ConFindr`). 69 | 2. Virtualenv that directory (`virtualenv -p /usr/bin/python3 ~/Virtual_Environments/ConFindr`). 70 | 3. Activate the virtualenv (`source ~/Virtual_Environments/ConFindr/bin/activate`). 71 | 4. Install ConFindr—this should also install any packages that ConFindr depends upon (`pip install confindr`). 72 | 73 | With this done, you'll need to make sure that any necessary dependencies are installed. 74 | 75 | ### Dependencies 76 | 77 | Before using ConFindr when installed using `pip`, you'll need to download and add the following programs to your $PATH: 78 | 79 | - [BBMap (>=39.01)](https://jgi.doe.gov/data-and-tools/bbtools/) 80 | - [Mash (>=2.3)](https://github.com/marbl/Mash/releases) 81 | - [KMA (>=1.4.9)](https://bitbucket.org/genomicepidemiology/kma) 82 | - [Python (>=3.9.15)](https://www.python.org/downloads/) 83 | - [SAMtools (>=1.17)](https://github.com/samtools/samtools) 84 | - [pysam (>=0.21.0)](https://pypi.org/project/pysam/) 85 | 86 | If you want to run ConFindr in Nanopore mode (`-dt Nanopore`), you'll also need to install [minimap2](https://github.com/lh3/minimap2). 87 | 88 | Instructions on adding programs to your $PATH can be found [here](https://stackoverflow.com/questions/14637979/how-to-permanently-set-path-on-linux-unix). 89 | 90 | If ConFindr can't find these dependencies when you try to run it, you will see an error message and the program will quit. 91 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /confindr_src/wrappers/mash.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from subprocess import Popen, PIPE 3 | 4 | 5 | def run_subprocess(command): 6 | """ 7 | command is the command to run, as a string. 8 | runs a subprocess, returns stdout and stderr from the subprocess as strings. 9 | """ 10 | x = Popen(command, shell=True, stdout=PIPE, stderr=PIPE) 11 | out, err = x.communicate() 12 | out = out.decode('utf-8') 13 | err = err.decode('utf-8') 14 | return out, err 15 | 16 | 17 | class MashResult: 18 | def __init__(self, mash_result_row): 19 | x = mash_result_row.split() 20 | self.reference = x[0] 21 | self.query = x[1] 22 | self.distance = float(x[2]) 23 | self.pvalue = float(x[3]) 24 | self.matching_hash = x[4] 25 | 26 | 27 | class ScreenResult: 28 | def __init__(self, screen_result_row): 29 | x = screen_result_row.split() 30 | self.identity = float(x[0]) 31 | self.shared_hashes = x[1] 32 | self.median_multiplicity = x[2] 33 | self.pvalue = float(x[3]) 34 | self.query_id = x[4] 35 | 36 | 37 | def kwargs_to_string(kwargs): 38 | """ 39 | Given a set of kwargs, turns them into a string which can then be passed to a command. 40 | :param kwargs: kwargs from a function call. 41 | :return: outstr: A string, which is '' if no kwargs were given, and the kwargs in string format otherwise. 42 | """ 43 | outstr = '' 44 | for arg in kwargs: 45 | outstr += ' -{} {}'.format(arg, kwargs[arg]) 46 | return outstr 47 | 48 | 49 | def sketch(*args, output_sketch='sketch.msh', threads=1, returncmd=False, **kwargs): 50 | """ 51 | Wrapper for mash sketch. 52 | :param args: Files you want to sketch. Any number can be passed in, file patterns (i.e. *fasta) can be used. 53 | :param output_sketch: Output file for your sketch. Default sketch.msh. 54 | :param threads: Number of threads to run analysis on. 55 | :param kwargs: Other arguments, in parameter='argument' format. If parameter is just a switch, do parameter='' 56 | :param returncmd: If true, will return the command used to call mash as well as out and err. 57 | :return: stdout and stderr from mash sketch 58 | """ 59 | options = kwargs_to_string(kwargs) 60 | if len(args) == 0: 61 | raise ValueError('At least one file to sketch must be specified. You specified 0 files.') 62 | cmd = 'mash sketch ' 63 | for arg in args: 64 | cmd += arg + ' ' 65 | cmd += '-o {} -p {} {}'.format(output_sketch, str(threads), options) 66 | out, err = run_subprocess(cmd) 67 | if returncmd: 68 | return out, err, cmd 69 | else: 70 | return out, err 71 | 72 | 73 | def dist(*args, output_file='distances.tab', threads=1, returncmd=False, **kwargs): 74 | """ 75 | Wrapper for mash dist. 76 | :param args: Files you want to find distances between. Can be 77 | :param output_file: Output file to write your distances to. Default distances.tab 78 | :param threads: Number of threads to run mash on. 79 | :param kwargs: Other arguments, in parameter='argument' format. If parameter is just a switch, do parameter='' 80 | :param returncmd: If true, will return the command used to call mash as well as out and err. 81 | :return: stdout and stderr from mash dist 82 | """ 83 | options = kwargs_to_string(kwargs) 84 | if len(args) == 0: 85 | raise ValueError('At least one file to sketch must be specified. You specified 0 files.') 86 | cmd = 'mash dist ' 87 | for arg in args: 88 | cmd += arg + ' ' 89 | cmd += ' -p {} {} > {}'.format(str(threads), options, output_file) 90 | out, err = run_subprocess(cmd) 91 | if returncmd: 92 | return out, err, cmd 93 | else: 94 | return out, err 95 | 96 | 97 | def screen(*args, output_file='screen.tab', threads=1, returncmd=False, **kwargs): 98 | """ 99 | Wrapper for mash screen. Requires mash v2.0 or higher. 100 | :param args: Files you want to screen. First argument must be a sketch. 101 | :param output_file: Output to write containment info to. 102 | :param threads: Number of threads to run mash on. 103 | :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value. 104 | :param kwargs: Other arguments, in parameter='argument' format. If parameter is just a switch, do parameter='' 105 | :return: stdout and stderr from mash screen 106 | """ 107 | options = kwargs_to_string(kwargs) 108 | cmd = 'mash screen ' 109 | for arg in args: 110 | cmd += arg + ' ' 111 | cmd += ' -p {} {} | sort -gr > {}'.format(str(threads), options, output_file) 112 | out, err = run_subprocess(cmd) 113 | if returncmd: 114 | return out, err, cmd 115 | else: 116 | return out, err 117 | 118 | 119 | def read_mash_output(result_file): 120 | """ 121 | :param result_file: Tab-delimited result file generated by mash dist. 122 | :return: mash_results: A list with each entry in the result file as an entry, with attributes reference, query, 123 | distance, pvalue, and matching_hash 124 | """ 125 | with open(result_file) as handle: 126 | lines = handle.readlines() 127 | mash_results = list() 128 | for line in lines: 129 | result = MashResult(line) 130 | mash_results.append(result) 131 | return mash_results 132 | 133 | 134 | def read_mash_screen(screen_result): 135 | """ 136 | :param screen_result: Tab-delimited result file generated by mash screen. 137 | :return: results: A list with each line in the result file as an entry, with attributes identity, shared_hashes, 138 | median_multiplicity, pvalue, and query_id 139 | """ 140 | with open(screen_result) as handle: 141 | lines = handle.readlines() 142 | results = list() 143 | for line in lines: 144 | result = ScreenResult(line) 145 | results.append(result) 146 | return results 147 | -------------------------------------------------------------------------------- /tests/real_fastqs/hiseq_precasava_R1.fastq: -------------------------------------------------------------------------------- 1 | @HWI-D00430:125:HHFCJADXX:1:1101:1227:2085/1 2 | ATTCTGGTGGATGATTCCAGCGCAGGCGCGCAGTCAGGAATTGCGGCGGGGAGGGAAGGATTTTTTTTCTGCGCCGATCCCCATAAAAAACCCAGCGATCA 3 | + 4 | ?@@DDDDDHDADDDDGGHGHGG>@B?;@>@94:4::4::>@8?#### 13 | @HWI-D00430:125:HHFCJADXX:1:1101:5829:2107/1 14 | ATACTCATCTGTTTACCGGGCATACCATCCAGAGAAAATCGGGCCGCGACTTCCGCGACGCGTTCTGAACCTTTGGTAATAACGATAAACTGGACCACGGG 15 | + 16 | @@CFFFDFHHHHHJIIJJJJJJJIJJJJJJJJJJIJIIIJJIJJJJJHFDDDDDDDDDDDDD@BDDDEDDDDDDDD8ACEEDDDBDDD>ACDDDDDDDDD# 17 | @HWI-D00430:125:HHFCJADXX:1:1101:6639:2039/1 18 | CTTTAAAACCGTTAATGACGCTTATGGTCATGATATCGGCGATAAGCTACTGGTGGCGGTCACGCATCTGTCTCTTATACACATCTGACGCTGCCGACGAA 19 | + 20 | @@CFFFFFHHHHHJJJJJJJJJIJJJJHIJJJIJJIJJJIJGIGIGIGIHIJHHHHFFD?BBDDBBDDDCCACACCCC@@CC>@AC>ACBD<<@55 21 | @HWI-D00430:125:HHFCJADXX:1:1101:4309:2408/1 22 | TCCTAAAGGTAACTGCATCCATCATTCAGACGACATATTATCATGCTCAAAAAAGAATTCAGCCCCCTAATTAACCTGAGTTTAACTTATAAGTATCACTT 23 | + 24 | CCCFFFFFHFHHHJJJJJJJJJJJJJJJJJJJJJJJJJIJJJJJJJJJJJJJJJJJJIJJJJIJHHHFFFFEEEEEEDDDDEDEDDDDDDDCC5@DCDDC> 25 | @HWI-D00430:125:HHFCJADXX:1:1101:7031:2445/1 26 | GCTACGCTCGCCCTTCGGGCCGCCGCTAGCGGCGTTCAAAACGCTAACGCGTTTTGGCGAGCACTGCCCAGGTCCAAAATGGCAAGTAAAATAGCCCTAAT 27 | + 28 | CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJHHFDDDDDEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCA# 29 | @HWI-D00430:125:HHFCJADXX:1:1101:11147:2290/1 30 | GTTTATTGCTGGTAAATTCGTAGCCCTCTGTGGGCCAGTAGCCGACAAATTTAACGCCTTTGAGGGCAAGTTTGTCATGCAACATACCCAGCGCATCCAGG 31 | + 32 | @@CFFFFFHHHGHJJJJJJJJJJJJJJJJJJJIIJJJJJJJJJJHJIIIIJJGIGGGIIHHHHFFFFDDDEDEDDDEEDDDDDDDDDDDDDDDDDDDDDD< 33 | @HWI-D00430:125:HHFCJADXX:1:1101:12642:2441/1 34 | GCAATGGAGAGCATCACAACCACCACCGTCAAGCCGACGATCAGCAGCGGTAGCAGCGCAATCAGGTGTTGTGGAGTTATTGTCATGGCGATTTACGGCCT 35 | + 36 | CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJHHDFDEEEEDDDDDDDDDCDDDDDDDDDCDDEDDEEEEDDDDDDDDDEDDDB< 37 | @HWI-D00430:125:HHFCJADXX:1:1101:19285:2490/1 38 | ATACAGGAGCGCGAACATGTGTTATCCCTGGAACGGGAGCGTCAGCCGGAAATACAGGAACGCACGCTGGATGGCCCTTCGCTGGGATGGTGAAACCATGA 39 | + 40 | CCCFFFFFHHHHHJJJIJJHIIIJJJJJJJJJJJJJJIIHIHHHFFFFDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCDDDDD@CDCDDDDDDD 41 | @HWI-D00430:125:HHFCJADXX:1:1101:3301:2626/1 42 | ACCTTCAACCTGCCCATGGCTAGATCACCGGGTTTCGGGTCTATACCCTGCAACTTAACGCCCGGTTAAGACTCGGTTTCCCTACGGCTCCCCTATACGGT 43 | + 44 | @CCFFFFFHHHHHJJJJJJJJJJJJJJJJJJGDGHIGGIGIIJJJJJIIIIIHJJIJJHHHFFDDBDDDDDDCDDDBDDDDDDDDDDDDDDDDDDDCDDBA 45 | @HWI-D00430:125:HHFCJADXX:1:1101:12272:2744/1 46 | ATCATAGCCGAAACTATTAGCGCGGATATTACCTAACCCTTCTTCTGCCAAATTAGCAACATTAATTCTAGAGGGGTCTACCATCCCGGTTATACCGCTTA 47 | + 48 | @DCDCBBD<9 49 | @HWI-D00430:125:HHFCJADXX:1:1101:17746:2589/1 50 | ACGTGAATGTCCTCGGCAACGTTAACGCCAGCGGCACGCTGATGGACAACGGCGGCAACTCTAACCACCACTCTCACTAACCTGCAAATTGCTGCTGGATG 51 | + 52 | @CCFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJJJJJJJJJHHHHHFFFFDDDDDD>>(5&22?<<@>AC58??################ 57 | @HWI-D00430:125:HHFCJADXX:1:1101:3127:2825/1 58 | TTCTACCACGTTGGCGATAAACGAAATTTCGCAAATGTGCGGCTACCCGTCACTGCAATATTTCTATTCGGTGTTTAAAAAGGAGTACGTCACTACGCCGA 59 | + 60 | CCCFFFFFHHHHHIJJJJJJJJJJJJJJJJJIJJJJJIIJGJJJJJIJGHHFFFFFEEEDEFEFEEEEEDDBBBDDDDDDDDDDDCCDDDDDDDDDDBBDB 61 | @HWI-D00430:125:HHFCJADXX:1:1101:11048:2889/1 62 | ATCCCAGACTTTCCAGTAAAAACGCGCACGCGTTAGTGACCGCCGTATCATCATCCAGTAGATGAATTGTCGCCATCCCTGCCCCCATTTTCATGTAAGAA 63 | + 64 | CCCFFFFFHHHHHIJJJJJJIJIJJIGIJIJJIJJJIJIJJJHHFDDEDEEDDDDDDDDDDDEDDDEECDDDDDDDDDDDDBDDDDDDDEEEDDDDDDDDD 65 | @HWI-D00430:125:HHFCJADXX:1:1101:16489:2779/1 66 | CCTCACGGGCAGTATCATCATCAACGCCATTAAGCGCGCTGAAGATTTCCACCGTGGCAATTGGCTCCGGCTGGAAAAGCGTCAGCAATACCTGACAGCGG 67 | + 68 | ?<@FDDD8DDHHHAGHIJIHHJJIGIJJBGH@HIIEIIHGIECA>AHHHHEFFFECDCDDDCCC>AA?@BBBBB?CC?9<>@BDBC@CDCCCCD9?>C<<5 69 | @HWI-D00430:125:HHFCJADXX:1:1101:19971:2793/1 70 | GTTCTAATCTCTTTCTGAAACGGATCAAAAAGGTGAGCATTGCGCTCACCTTTTTATCTGTCTCTTATACACATCTGACGCTGCCGACGAAGAGGATAGTG 71 | + 72 | C@@FFFFFHHHHGHIJIJJJJJJJIGGGJFH:C9?DHGIHGGGIG6@GHGIEHFHIGIIGEGIGGIGGHDEHEDFFFFEDDBDDD@BDDDB@D@ 73 | @HWI-D00430:125:HHFCJADXX:1:1101:3668:3115/1 74 | GATTTACTCAGATTCAAAATCTTTGGTATGCCGCTACCGCTTTATGCCTTTGCATTAATTACTTTATTACTTTCTCATTTTTATAATGCTATACCGACTGT 75 | + 76 | =8+BDD;=:=AD?A:<,<<3+<<@A:;>AAA>A;AA?##### 77 | @HWI-D00430:125:HHFCJADXX:1:1101:6875:3120/1 78 | ATGCGATCCGCTCGCACCCGCGCCTGCGTCAGCTCAGGGTAGAACTCCAGTAGCCACGCAAAGCCACTTTTGGCTGTCTCTTATACACATCTGACGCTGCC 79 | + 80 | @@@FDDDFHHHHHJJJIJGIHIJJIGIIIIIGGIJJJJGHGHFFFFFFEDCEEDDDDDDBD@CCDDDDDDDCDDCDACDDCDCDDDEDDDDDDDDDD@BD? 81 | @HWI-D00430:125:HHFCJADXX:1:1101:12357:3069/1 82 | GTTTACACCTACTCCTCTATCGCCAGCACGTTTGTGGTGACGGGGGGAATGTTTGGCGCCATGAGCCTGTACGGGTACACCACTAAACGCGATCTTAGCGG 83 | + 84 | @BCFFFFFHHHHHJJJJJJJJJJJJJJJJJIJJJHIIDGIJJJJJHD:BDDDEDDCDDDDDDDDDDDDDDDDDDDDDDCCDDDDDCDDDDD>B@DDDCDD> 85 | @HWI-D00430:125:HHFCJADXX:1:1101:16625:3083/1 86 | GATTTACTCACGGAGCTGAAGTCAGTCGAAGATACCAGCTGGCTGCAACTGTTTATTAAAAACACAGCACTGTGCAAACACGAAAGTGGACGTATACGGTG 87 | + 88 | CCCFFFFFHHHHHJJJJJIIJJJJJJJJJJJJJJJJJJJJJIJJJJJJJJJIIJJJJIJJJJJJHHHHFFFF@CEEEEEDDDDDDD@CCDDDEEEDCBBCCABB?@><@B?>ACCDDDECC@ACBBBB3AAB4> 101 | @HWI-D00430:125:HHFCJADXX:1:1101:3805:3740/1 102 | GAAATATTCCACGAAGCCGGCTGACATGGTGGCCGATCCTGAATGGTTCCTCGAACTGACCCGCCAGACGCACAAGCGCCGCTTTGTGGCGACCGGCGGTG 103 | + 104 | CCCFFFFFHHHHHJJJJJJJJJJIHIJJJHIJJJJJJJJJJJIIJJJJIIJHHHFFFFFEEEDDDDDDDDDDDDDBDDDDDBDDDDDDDDDDDDDD>BB## 105 | @HWI-D00430:125:HHFCJADXX:1:1101:4297:3536/1 106 | GTACACGCATCGGGTCGCGCGCCGCAAACGGAATAGCGCCTTCAGTGATAAAGCACAGCCCCAGCACCAACGCCGCTTTACCACCTTCCTGCTGCGCTTTG 107 | + 108 | CCCFFFFFHHHHHJHIJJJJJJJJJJJJHHHFFFFEEDDDDDDDDFDDFFDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD> 109 | @HWI-D00430:125:HHFCJADXX:1:1101:11456:3711/1 110 | GGATACGCCACCGGCTCCCTGATTTTGAAGATGAAGGTAATGCAAAAATACGCGCTTCACGCTTACCCAGTTATGGCCCTGATGGTCGCGACGCTGACAGG 111 | + 112 | CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJIJJJJJJHHIJJJJJJJJJJJJJJHFFDEEDEDDDDDDDDDDDEDDDDDDDDDDDDDDDDDDDDDDDDDD? 113 | @HWI-D00430:125:HHFCJADXX:1:1101:18177:3726/1 114 | CCTGAAAACCCGTACAACATGCAGCCTGTCCTGTTGTGTGTGGTGAAGTAAGAAGCCTCTTTTGCATTTTTGCCTCACATCACGCCGGATGGCGGCTGTGC 115 | + 116 | @@CFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJIJJIJIJJJJFHHIIJJIIIJJJJJIJJIJJHHHGHHHFFFFEEDEEDDDDDDDDDBDDDDDD@BDDD 117 | @HWI-D00430:125:HHFCJADXX:1:1101:4322:3989/1 118 | CTCTGAATGATGTGGTAAACGCTGTCACTGATGCGGCTGCGCAGCATGTGCATTAACAAAGAAATAAATTTTTTGTGGTCAAGAAAGCGGGATAAAACGGG 119 | + 120 | CCCFFFFFHHHHHIJHIIJJJJJJJJGJJJJJJJJJJJJIJJJJJJJJEGIJJJJHHHHHFFFFFEECEEEEDDDDDDCDDDDDDDCDDDDBDDDDDDDD# 121 | @HWI-D00430:125:HHFCJADXX:1:1101:4703:3982/1 122 | GGTGGAAATCATGTACGCCCTGCGGCAGGGGGGGTGCGTTAAAGGGTAGAAAATGGCAAGGATCATCGCGCTGGATGGGGCACAGGGAGGGGGCGGCGCGC 123 | + 124 | 81+4A?DDBDADDCD?8? 21 | @HWI-D00430:125:HHFCJADXX:1:1101:4309:2408/2 22 | ATGAAATAGAGTGATAAGCGCTTTCGTACATACATCTCATTAGTACAACTGATAGTGTTATCATTTGATTGAGTGTAATCCGATAACAATTCTCTCCGCGT 23 | + 24 | CCCFFFFFHHHFFIJJJJJJJJJJJJHIJJJJJJJJJJJJJJJIIJJJJIJJJJJIJJJJJJJJJJJJJJJIJHHGIJHHHHDFDDEEEDCDEDDDDDDD8 25 | @HWI-D00430:125:HHFCJADXX:1:1101:7031:2445/2 26 | GTATGGTTATTCAACCTATCGCGGGAGCTAATTTAGAGCCTATCCCATTAGGGCTATTTTACTTGCCATTTTGGACCTGGGCAGTGCTCGCCAAAACGCGT 27 | + 28 | CBCFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJHHHHHFFFFFFEEEEEEEEDDDDDDDDDDDDDDDEDDDDDDDDDDDD> 29 | @HWI-D00430:125:HHFCJADXX:1:1101:11147:2290/2 30 | ATCCTGTTCCATTCGCGCTCCCCACTTCACGCTGACTTTGGGTGCATTATGCCTCATTGTGCCGTGAATGCAGCGATTCTGTTACACTACCCGCAACGCTA 31 | + 32 | CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJJJJJJJJIIIIIJJJJJJJJJFIIJHHHHHFFFDDEEEDDDDDDDDDEEDDDDDDDDDDBDDDDDDD< 33 | @HWI-D00430:125:HHFCJADXX:1:1101:12642:2441/2 34 | TGATTACCGTGATCTCGACCTTTGGGCTGGTATTCGCCTCTGTATACTCGCTGGCGATGCTGCATCGCGCTTACTTTGGTAAGGCGAAGAGCCAGATTGCC 35 | + 36 | CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJIIJJJJJJJJJJIIJJJJJIJJJIJHHHFFFFEDEEDDDDDDDCDDEDDDDDDDDDDDDDDDDDDDDDDDC 37 | @HWI-D00430:125:HHFCJADXX:1:1101:19285:2490/2 38 | GATTTACCATAATCCCTTAATTGTACGCACTGCTAAAACGCGTTCAGCGCGATCACGGCAGCAGACAGGTAAAAATGGCAACAAACCACCCGAAAAACTGC 39 | + 40 | CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJIJJJHHFFEFDDDDDDDDDCBCDDDDDDCDDDDDDDDDDDDDDDDDDDDDDCD 41 | @HWI-D00430:125:HHFCJADXX:1:1101:3301:2626/2 42 | AACGGGGAGGAGCCCAGAGCCTGAATCAGCATGTGTGTTAGTGGAAGCGTCTGGAAAGGCGCGCGATACAGGGTGACAGCCCCGTACACAAAAGCGCATGT 43 | + 44 | CCCFFFFFGHHHHJJJJJJJJJJJJJJJJJIJIIIJJJJJJIJIJJJJJJJJHHHHHFFFDDDDDDDDDDDDDCBDDDDDDDDDDDDDDDDD@DDDDDBD: 45 | @HWI-D00430:125:HHFCJADXX:1:1101:12272:2744/2 46 | CACTACAACACCATGGGTTATCAAGCCTTGGACGGATGACAATCAGTGGCTAACGGATGCCGCAGCGGTCGTTGCCACTTTAAAACAATCTGAAACTGATG 47 | + 48 | @B@FFFFFHHHHHJJJJGIJJJJJJJJJJJJJJJJGIIJJHHIJJJIJJJJJJIJJJHHHHFDCDDBDDDDDDDDDCCDDDDDDDDBDDCC>ACDDDDCCC 49 | @HWI-D00430:125:HHFCJADXX:1:1101:17746:2589/2 50 | CCCATAAAACCGACGTACAGCAAATAACGTCCGGTTGATACGCTGGCTCCAAAGGTAAACCGGGCGTTAGCGAAAAAACTAAAACTGACAGCGACAGCAAA 51 | + 52 | CCCFFFFFHHHHHJJIJJJJJJJIJJJJJJJJJJIJJJJJJJJJJJHHHHFFFFFEEEEEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDC 53 | @HWI-D00430:125:HHFCJADXX:1:1101:19043:2712/2 54 | ACTTTATCCCAACTGGTCGCCAGCGCGGGTGCGAAATCATCCCAGTAGTAAGCCAGGATGGTCGAACCGATGACATTGCCGAGGCCCACCCACCAGTAATG 55 | + 56 | @@@FFFFFGHHHHJGGHHEHIIIGGIJJDAGCHJHGFFFFDFDCEAEEFDE>A?C?BAADDDDDBDDDDBDDCCCDCCDCDDBB<8>??BD@D:AC# 57 | @HWI-D00430:125:HHFCJADXX:1:1101:3127:2825/2 58 | GCGCTACGCTATCCGGCTTACAAGGCGTCTGTAGGCTGGGTAAACTACAACAACGCTTCACTATGCTGGTCGCGATACTCCTTCGGCGTAGTGACGTACTC 59 | + 60 | CCCFFFFFHHHHHJJJJJJJJJIHJJJJJJJIJJJJJIJJEHFHIJJJJJJHHHHFFDDEEEDDDDEDCCDDDDDDDDCDDDDDDDDDDDDBCDDD@BDDD 61 | @HWI-D00430:125:HHFCJADXX:1:1101:11048:2889/2 62 | ATATCAGCATCAGGAACCAGACCGCGCCGGACGGTCTGTCGGGAACGGTGGTCACGATACATTTCTTACATGAAAATGGGGGCAGGGATGGCGACAATTCA 63 | + 64 | CCCFFFFFHGHHHJJJJJJJJJJJJJJJJJJJIJHHHHHHFFDDDDDDDDDDCCDDDDDDDEEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD@DDDC: 65 | @HWI-D00430:125:HHFCJADXX:1:1101:16489:2779/2 66 | ACATAAAACTCAGCAGAAGCAACGATCCGCCCGCCCGCTCAGGATGCGTCGCCACCGCATTAAGCAGCGCGCTCAACTGCTGATTTACGTCCCGCTGCCAA 67 | + 68 | <@:ACACCDDDD@@D9>BDBBBC> 69 | @HWI-D00430:125:HHFCJADXX:1:1101:19971:2793/2 70 | ATAAAAAGGTGAGCGCAATGCTCACCTTTTTGATCCGTTTCAGAAAGAGATTAGAACCTGTCTCTTATACACATCTCCGAGCCCACGAGACAAGAGGCAAA 71 | + 72 | @@@DDDEDDADFHIIIBGHIIEHIIGHIHGIIEFGEGDHHIIEHGEHEIICEHGIIGHIFHIHGHEEEHC@DC@DECECBBBBBBBBBBBBCCB?BBCCC# 73 | @HWI-D00430:125:HHFCJADXX:1:1101:3668:3115/2 74 | CCATTACGTTGCTGATCGCATCAATTTCTTTCTGCGTAAATATGCCAGCATAGACAAAATAAGCTGCAACCAGAAATATCATAACCGGTGCGCCGCCAATA 75 | + 76 | CCCFFFFFHHHHHIJJIIJJJJJJJJJJJJJJJJIJIJIGHJJJIJJJJJJJJJJJJJJJJIIIJIHJHHHHFFFFFEEDDEEEDDDDDDBDDDDDDDDD@ 77 | @HWI-D00430:125:HHFCJADXX:1:1101:6875:3120/2 78 | CCAAAAGTGGCTTTGCGTGGCTACTGGAGTTCTACCCTGAGCTGACGCAGGCGCGGGTGCGAGCGGATCGCATCTGTCTCTTATACACATCTCCGAGCCCC 79 | + 80 | CCCFFDFFHHHHHJIIJFHIJGIJJJJIJFIHIJJJJJJJJJGGIIJJJJJJGIHFD28?=?55;B@;@BB2<9A::@@>@A4:::@@>>C@AC90005?# 81 | @HWI-D00430:125:HHFCJADXX:1:1101:12357:3069/2 82 | GCCGTATACGTATTGCAGCCAGAGGTCCCACTTGTCATTTATCACTCTTCTCATGATCACGGTGATAAGAATGGCAAGCGCAGGAAAATATTTCTAAGATG 83 | + 84 | @@CFDDFFHHHHHJJJJJJJJJJJJJJJJJJJJJIJJJJJJJJIJJJJJJJJIJJJIJJJJJGIIJJJJJJJJHHHHFFFDDDDDDDDDDFFFEEEEDDD@ 85 | @HWI-D00430:125:HHFCJADXX:1:1101:16625:3083/2 86 | GGTTCACGGGGTCTTTCCGTCTTGCCGCGGGTACACTGCATCTTCACAGCGAGTTCAATTTCACTGAGTCTCGGGTGGAGACAGCCTGGCCATCATTACGC 87 | + 88 | @CBFFFFFHHHHHJJJJJJJJJJJJJJJJJJHIJJJIHHGHHHFFFFFFDDBD>BCEEEEDDEDDDDDDDDDCDD<@B?ACBCCBDCADBDDDC>ADCC@@ 89 | @HWI-D00430:125:HHFCJADXX:1:1101:18210:3173/2 90 | ACCTTTAACAGTGCAGGGTGATTGTTATTCGTTCAACGAAAGATACTCATTAAATTGCTCAATACTTGGATAAGAGGATTGGGTGCCTTTCCCGGTGACGC 91 | + 92 | @@?DFFEFHFHFHGIHIGAFFHIJJIJJJIIIJJJIJJGGIJIIJJJJJJJGHHIJGEGIIJJIGIIJIEGHEE@EACDD@BFBCDDCDDDDDB=;=BDD5 93 | @HWI-D00430:125:HHFCJADXX:1:1101:2472:3467/2 94 | GTTCAGCTAAATGCGGGGAGTACCAGTGTGACGGTTAAACGCCCGGTCTCGGCCAGTCTGGCTAAGGCTTTTTTCTATATTGTGCTGTTGTCTATTCCTGT 95 | + 96 | CCCFFFFFHHHHHJJJJJJJHIJJJJJJJJJJJJIIJJJJJHHFFDDDDDDDDDDDCDDDDCCCDCDACCD:AB9@@@@CD>>@CDDDD>C@CDDEDDCC: 97 | @HWI-D00430:125:HHFCJADXX:1:1101:1731:3720/2 98 | GGATAAGGGGCTAAAAGTCGGCCAGCGCGTTGGAATCGGCTGGACGGCGCGCAGCTGCGGACACTGCGATGCCTGTATCAGCGGCAATCAAATTAACTGCC 99 | + 100 | @@C:>7?B8@CCCC@CCAC7@5@BCCCC>AA>CCACCC 101 | @HWI-D00430:125:HHFCJADXX:1:1101:3805:3740/2 102 | TCCCCGATCAACATGTCGGCATCGGTTTCCTGATCGAGCTTCAGTACGTCTTTGAGTGCACCGCCGGTCGCCACAAAGCGGCGCTTGTGCGTCTGGCGGGG 103 | + 104 | CCCFFFFFHHHHHJJIJJJJJJJJJIJJJJJJJJJJHIJJJJJJIIJJJJJHHHHHFFFFFFDDDDDDDDDDDDDDDDDDDDDDDDDBDDDDDDDDDDDD# 105 | @HWI-D00430:125:HHFCJADXX:1:1101:4297:3536/2 106 | GATGTGTACCGACATGGGCGGCCCGGTGAACAAAGCGGCGTATGCGTTTGGCGTTGGTCTGCTGAGTACGCAAACTTACGCGCCGATGGCGGCGATCATGG 107 | + 108 | CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJJHHHHHFFDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDC 109 | @HWI-D00430:125:HHFCJADXX:1:1101:11456:3711/2 110 | ATATTACGCGGTCGACGATCTTCAAAAAGCGGCTGATAGCCATAATTAATCGGCTGCGCAGACTGAAATATGGAGCCATTCGCCACCGGTACCGGGCCAGG 111 | + 112 | CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJJJJHHHHHHFFFFFFFEEDDDDDDDDDDDDDDDDDDDEEDDDDDDDDEEDDBDDDDBDDDDDDDDDDD 113 | @HWI-D00430:125:HHFCJADXX:1:1101:18177:3726/2 114 | GGGCAGGACAACATTTATTCCGATCTGACGGCTGGTCGTATTGATGCAGCGTTCCAGGACGAGGTCGCGGCCAGCGAAGGTTTCCTTAAACAGCCCGTCGG 115 | + 116 | CCCFFFFFHHHHHJJJJJJJJJIJJJJJJJJJJJJFHIHJJJJJJJJIIJIEHGHHHFFFDCDD?CDDDDDDDDDB@BDD?CCCDDDCDDDDDDDDDDDDD 117 | @HWI-D00430:125:HHFCJADXX:1:1101:4322:3989/2 118 | CTCCTTTTCACCGTTTTATCCCGCTTTCTTGACCACAAAAAATTTATTTCTTTGTTAATGCACATGCTGCGCAGCCGCATCAGTGACAGCGTTTACCACAT 119 | + 120 | CCCFFFFFHHHHHIJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJIJJJJJJJHHHHHFFDDDDDDDDDDDDDDDDDDDDDDDDDDDD: 121 | @HWI-D00430:125:HHFCJADXX:1:1101:4703:3982/2 122 | AGTTCGGTCAGTTGTTCGCTTTGAATCATCAGCGACAGCGCGTTGCCCGGCCCGCAGCCGCCTTCCAGCGCAACGACGTTTTTCTCCGCCTGCGGGAAGTG 123 | + 124 | @@@FFFFDHDHHHGDEHIJJJIIIIIEGGHIIJIJJIGIEGG;AEEHHFDCDD@DDDDDDDDBDDDD>?BD;B@B@B@9BD99<:> 125 | @HWI-D00430:125:HHFCJADXX:1:1101:5755:4181/2 126 | GATCAATACGGCGCAGCGATTTCAACACTTTGACTTTTTCATCGCGAATACTGTCGGCGCTCAGGTCAGACGGCGGTGACATGGCAATCATGCAGAGAATT 127 | + 128 | CCCFFFFFHHHHHJIJJJIJJJJJJJJJJJJJJJJJJJJJJJJJJJJFFFEEEFEDDDDDDDDDD@CCDADDDDDD>BBDDDDDDDCCACCDDDDDDDDDD 129 | -------------------------------------------------------------------------------- /tests/real_fastqs/hiseq_precasava_multilane_R1.fastq: -------------------------------------------------------------------------------- 1 | @HWI-D00430:125:HHFCJADXX:1:1101:1227:2085/1 2 | ATTCTGGTGGATGATTCCAGCGCAGGCGCGCAGTCAGGAATTGCGGCGGGGAGGGAAGGATTTTTTTTCTGCGCCGATCCCCATAAAAAACCCAGCGATCA 3 | + 4 | ?@@DDDDDHDADDDDGGHGHGG>@B?;@>@94:4::4::>@8?#### 13 | @HWI-D00430:125:HHFCJADXX:1:1101:5829:2107/1 14 | ATACTCATCTGTTTACCGGGCATACCATCCAGAGAAAATCGGGCCGCGACTTCCGCGACGCGTTCTGAACCTTTGGTAATAACGATAAACTGGACCACGGG 15 | + 16 | @@CFFFDFHHHHHJIIJJJJJJJIJJJJJJJJJJIJIIIJJIJJJJJHFDDDDDDDDDDDDD@BDDDEDDDDDDDD8ACEEDDDBDDD>ACDDDDDDDDD# 17 | @HWI-D00430:125:HHFCJADXX:1:1101:6639:2039/1 18 | CTTTAAAACCGTTAATGACGCTTATGGTCATGATATCGGCGATAAGCTACTGGTGGCGGTCACGCATCTGTCTCTTATACACATCTGACGCTGCCGACGAA 19 | + 20 | @@CFFFFFHHHHHJJJJJJJJJIJJJJHIJJJIJJIJJJIJGIGIGIGIHIJHHHHFFD?BBDDBBDDDCCACACCCC@@CC>@AC>ACBD<<@55 21 | @HWI-D00430:125:HHFCJADXX:1:1101:4309:2408/1 22 | TCCTAAAGGTAACTGCATCCATCATTCAGACGACATATTATCATGCTCAAAAAAGAATTCAGCCCCCTAATTAACCTGAGTTTAACTTATAAGTATCACTT 23 | + 24 | CCCFFFFFHFHHHJJJJJJJJJJJJJJJJJJJJJJJJJIJJJJJJJJJJJJJJJJJJIJJJJIJHHHFFFFEEEEEEDDDDEDEDDDDDDDCC5@DCDDC> 25 | @HWI-D00430:125:HHFCJADXX:1:1101:7031:2445/1 26 | GCTACGCTCGCCCTTCGGGCCGCCGCTAGCGGCGTTCAAAACGCTAACGCGTTTTGGCGAGCACTGCCCAGGTCCAAAATGGCAAGTAAAATAGCCCTAAT 27 | + 28 | CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJHHFDDDDDEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCA# 29 | @HWI-D00430:125:HHFCJADXX:1:1101:11147:2290/1 30 | GTTTATTGCTGGTAAATTCGTAGCCCTCTGTGGGCCAGTAGCCGACAAATTTAACGCCTTTGAGGGCAAGTTTGTCATGCAACATACCCAGCGCATCCAGG 31 | + 32 | @@CFFFFFHHHGHJJJJJJJJJJJJJJJJJJJIIJJJJJJJJJJHJIIIIJJGIGGGIIHHHHFFFFDDDEDEDDDEEDDDDDDDDDDDDDDDDDDDDDD< 33 | @HWI-D00430:125:HHFCJADXX:1:1101:12642:2441/1 34 | GCAATGGAGAGCATCACAACCACCACCGTCAAGCCGACGATCAGCAGCGGTAGCAGCGCAATCAGGTGTTGTGGAGTTATTGTCATGGCGATTTACGGCCT 35 | + 36 | CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJHHDFDEEEEDDDDDDDDDCDDDDDDDDDCDDEDDEEEEDDDDDDDDDEDDDB< 37 | @HWI-D00430:125:HHFCJADXX:1:1101:19285:2490/1 38 | ATACAGGAGCGCGAACATGTGTTATCCCTGGAACGGGAGCGTCAGCCGGAAATACAGGAACGCACGCTGGATGGCCCTTCGCTGGGATGGTGAAACCATGA 39 | + 40 | CCCFFFFFHHHHHJJJIJJHIIIJJJJJJJJJJJJJJIIHIHHHFFFFDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCDDDDD@CDCDDDDDDD 41 | @HWI-D00430:125:HHFCJADXX:1:1101:3301:2626/1 42 | ACCTTCAACCTGCCCATGGCTAGATCACCGGGTTTCGGGTCTATACCCTGCAACTTAACGCCCGGTTAAGACTCGGTTTCCCTACGGCTCCCCTATACGGT 43 | + 44 | @CCFFFFFHHHHHJJJJJJJJJJJJJJJJJJGDGHIGGIGIIJJJJJIIIIIHJJIJJHHHFFDDBDDDDDDCDDDBDDDDDDDDDDDDDDDDDDDCDDBA 45 | @HWI-D00430:125:HHFCJADXX:1:1101:12272:2744/1 46 | ATCATAGCCGAAACTATTAGCGCGGATATTACCTAACCCTTCTTCTGCCAAATTAGCAACATTAATTCTAGAGGGGTCTACCATCCCGGTTATACCGCTTA 47 | + 48 | @DCDCBBD<9 49 | @HWI-D00430:125:HHFCJADXX:1:1101:17746:2589/1 50 | ACGTGAATGTCCTCGGCAACGTTAACGCCAGCGGCACGCTGATGGACAACGGCGGCAACTCTAACCACCACTCTCACTAACCTGCAAATTGCTGCTGGATG 51 | + 52 | @CCFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJJJJJJJJJHHHHHFFFFDDDDDD>>(5&22?<<@>AC58??################ 57 | @HWI-D00430:125:HHFCJADXX:1:1101:3127:2825/1 58 | TTCTACCACGTTGGCGATAAACGAAATTTCGCAAATGTGCGGCTACCCGTCACTGCAATATTTCTATTCGGTGTTTAAAAAGGAGTACGTCACTACGCCGA 59 | + 60 | CCCFFFFFHHHHHIJJJJJJJJJJJJJJJJJIJJJJJIIJGJJJJJIJGHHFFFFFEEEDEFEFEEEEEDDBBBDDDDDDDDDDDCCDDDDDDDDDDBBDB 61 | @HWI-D00430:125:HHFCJADXX:1:1101:11048:2889/1 62 | ATCCCAGACTTTCCAGTAAAAACGCGCACGCGTTAGTGACCGCCGTATCATCATCCAGTAGATGAATTGTCGCCATCCCTGCCCCCATTTTCATGTAAGAA 63 | + 64 | CCCFFFFFHHHHHIJJJJJJIJIJJIGIJIJJIJJJIJIJJJHHFDDEDEEDDDDDDDDDDDEDDDEECDDDDDDDDDDDDBDDDDDDDEEEDDDDDDDDD 65 | @HWI-D00430:125:HHFCJADXX:2:1101:16489:2779/1 66 | CCTCACGGGCAGTATCATCATCAACGCCATTAAGCGCGCTGAAGATTTCCACCGTGGCAATTGGCTCCGGCTGGAAAAGCGTCAGCAATACCTGACAGCGG 67 | + 68 | ?<@FDDD8DDHHHAGHIJIHHJJIGIJJBGH@HIIEIIHGIECA>AHHHHEFFFECDCDDDCCC>AA?@BBBBB?CC?9<>@BDBC@CDCCCCD9?>C<<5 69 | @HWI-D00430:125:HHFCJADXX:2:1101:19971:2793/1 70 | GTTCTAATCTCTTTCTGAAACGGATCAAAAAGGTGAGCATTGCGCTCACCTTTTTATCTGTCTCTTATACACATCTGACGCTGCCGACGAAGAGGATAGTG 71 | + 72 | C@@FFFFFHHHHGHIJIJJJJJJJIGGGJFH:C9?DHGIHGGGIG6@GHGIEHFHIGIIGEGIGGIGGHDEHEDFFFFEDDBDDD@BDDDB@D@ 73 | @HWI-D00430:125:HHFCJADXX:2:1101:3668:3115/1 74 | GATTTACTCAGATTCAAAATCTTTGGTATGCCGCTACCGCTTTATGCCTTTGCATTAATTACTTTATTACTTTCTCATTTTTATAATGCTATACCGACTGT 75 | + 76 | =8+BDD;=:=AD?A:<,<<3+<<@A:;>AAA>A;AA?##### 77 | @HWI-D00430:125:HHFCJADXX:2:1101:6875:3120/1 78 | ATGCGATCCGCTCGCACCCGCGCCTGCGTCAGCTCAGGGTAGAACTCCAGTAGCCACGCAAAGCCACTTTTGGCTGTCTCTTATACACATCTGACGCTGCC 79 | + 80 | @@@FDDDFHHHHHJJJIJGIHIJJIGIIIIIGGIJJJJGHGHFFFFFFEDCEEDDDDDDBD@CCDDDDDDDCDDCDACDDCDCDDDEDDDDDDDDDD@BD? 81 | @HWI-D00430:125:HHFCJADXX:2:1101:12357:3069/1 82 | GTTTACACCTACTCCTCTATCGCCAGCACGTTTGTGGTGACGGGGGGAATGTTTGGCGCCATGAGCCTGTACGGGTACACCACTAAACGCGATCTTAGCGG 83 | + 84 | @BCFFFFFHHHHHJJJJJJJJJJJJJJJJJIJJJHIIDGIJJJJJHD:BDDDEDDCDDDDDDDDDDDDDDDDDDDDDDCCDDDDDCDDDDD>B@DDDCDD> 85 | @HWI-D00430:125:HHFCJADXX:2:1101:16625:3083/1 86 | GATTTACTCACGGAGCTGAAGTCAGTCGAAGATACCAGCTGGCTGCAACTGTTTATTAAAAACACAGCACTGTGCAAACACGAAAGTGGACGTATACGGTG 87 | + 88 | CCCFFFFFHHHHHJJJJJIIJJJJJJJJJJJJJJJJJJJJJIJJJJJJJJJIIJJJJIJJJJJJHHHHFFFF@CEEEEEDDDDDDD@CCDDDEEEDCBBCCABB?@><@B?>ACCDDDECC@ACBBBB3AAB4> 101 | @HWI-D00430:125:HHFCJADXX:2:1101:3805:3740/1 102 | GAAATATTCCACGAAGCCGGCTGACATGGTGGCCGATCCTGAATGGTTCCTCGAACTGACCCGCCAGACGCACAAGCGCCGCTTTGTGGCGACCGGCGGTG 103 | + 104 | CCCFFFFFHHHHHJJJJJJJJJJIHIJJJHIJJJJJJJJJJJIIJJJJIIJHHHFFFFFEEEDDDDDDDDDDDDDBDDDDDBDDDDDDDDDDDDDD>BB## 105 | @HWI-D00430:125:HHFCJADXX:2:1101:4297:3536/1 106 | GTACACGCATCGGGTCGCGCGCCGCAAACGGAATAGCGCCTTCAGTGATAAAGCACAGCCCCAGCACCAACGCCGCTTTACCACCTTCCTGCTGCGCTTTG 107 | + 108 | CCCFFFFFHHHHHJHIJJJJJJJJJJJJHHHFFFFEEDDDDDDDDFDDFFDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD> 109 | @HWI-D00430:125:HHFCJADXX:2:1101:11456:3711/1 110 | GGATACGCCACCGGCTCCCTGATTTTGAAGATGAAGGTAATGCAAAAATACGCGCTTCACGCTTACCCAGTTATGGCCCTGATGGTCGCGACGCTGACAGG 111 | + 112 | CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJIJJJJJJHHIJJJJJJJJJJJJJJHFFDEEDEDDDDDDDDDDDEDDDDDDDDDDDDDDDDDDDDDDDDDD? 113 | @HWI-D00430:125:HHFCJADXX:2:1101:18177:3726/1 114 | CCTGAAAACCCGTACAACATGCAGCCTGTCCTGTTGTGTGTGGTGAAGTAAGAAGCCTCTTTTGCATTTTTGCCTCACATCACGCCGGATGGCGGCTGTGC 115 | + 116 | @@CFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJIJJIJIJJJJFHHIIJJIIIJJJJJIJJIJJHHHGHHHFFFFEEDEEDDDDDDDDDBDDDDDD@BDDD 117 | @HWI-D00430:125:HHFCJADXX:2:1101:4322:3989/1 118 | CTCTGAATGATGTGGTAAACGCTGTCACTGATGCGGCTGCGCAGCATGTGCATTAACAAAGAAATAAATTTTTTGTGGTCAAGAAAGCGGGATAAAACGGG 119 | + 120 | CCCFFFFFHHHHHIJHIIJJJJJJJJGJJJJJJJJJJJJIJJJJJJJJEGIJJJJHHHHHFFFFFEECEEEEDDDDDDCDDDDDDDCDDDDBDDDDDDDD# 121 | @HWI-D00430:125:HHFCJADXX:2:1101:4703:3982/1 122 | GGTGGAAATCATGTACGCCCTGCGGCAGGGGGGGTGCGTTAAAGGGTAGAAAATGGCAAGGATCATCGCGCTGGATGGGGCACAGGGAGGGGGCGGCGCGC 123 | + 124 | 81+4A?DDBDADDCD?8? 21 | @HWI-D00430:125:HHFCJADXX:1:1101:4309:2408/2 22 | ATGAAATAGAGTGATAAGCGCTTTCGTACATACATCTCATTAGTACAACTGATAGTGTTATCATTTGATTGAGTGTAATCCGATAACAATTCTCTCCGCGT 23 | + 24 | CCCFFFFFHHHFFIJJJJJJJJJJJJHIJJJJJJJJJJJJJJJIIJJJJIJJJJJIJJJJJJJJJJJJJJJIJHHGIJHHHHDFDDEEEDCDEDDDDDDD8 25 | @HWI-D00430:125:HHFCJADXX:1:1101:7031:2445/2 26 | GTATGGTTATTCAACCTATCGCGGGAGCTAATTTAGAGCCTATCCCATTAGGGCTATTTTACTTGCCATTTTGGACCTGGGCAGTGCTCGCCAAAACGCGT 27 | + 28 | CBCFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJHHHHHFFFFFFEEEEEEEEDDDDDDDDDDDDDDDEDDDDDDDDDDDD> 29 | @HWI-D00430:125:HHFCJADXX:1:1101:11147:2290/2 30 | ATCCTGTTCCATTCGCGCTCCCCACTTCACGCTGACTTTGGGTGCATTATGCCTCATTGTGCCGTGAATGCAGCGATTCTGTTACACTACCCGCAACGCTA 31 | + 32 | CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJJJJJJJJIIIIIJJJJJJJJJFIIJHHHHHFFFDDEEEDDDDDDDDDEEDDDDDDDDDDBDDDDDDD< 33 | @HWI-D00430:125:HHFCJADXX:1:1101:12642:2441/2 34 | TGATTACCGTGATCTCGACCTTTGGGCTGGTATTCGCCTCTGTATACTCGCTGGCGATGCTGCATCGCGCTTACTTTGGTAAGGCGAAGAGCCAGATTGCC 35 | + 36 | CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJIIJJJJJJJJJJIIJJJJJIJJJIJHHHFFFFEDEEDDDDDDDCDDEDDDDDDDDDDDDDDDDDDDDDDDC 37 | @HWI-D00430:125:HHFCJADXX:1:1101:19285:2490/2 38 | GATTTACCATAATCCCTTAATTGTACGCACTGCTAAAACGCGTTCAGCGCGATCACGGCAGCAGACAGGTAAAAATGGCAACAAACCACCCGAAAAACTGC 39 | + 40 | CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJIJJJHHFFEFDDDDDDDDDCBCDDDDDDCDDDDDDDDDDDDDDDDDDDDDDCD 41 | @HWI-D00430:125:HHFCJADXX:1:1101:3301:2626/2 42 | AACGGGGAGGAGCCCAGAGCCTGAATCAGCATGTGTGTTAGTGGAAGCGTCTGGAAAGGCGCGCGATACAGGGTGACAGCCCCGTACACAAAAGCGCATGT 43 | + 44 | CCCFFFFFGHHHHJJJJJJJJJJJJJJJJJIJIIIJJJJJJIJIJJJJJJJJHHHHHFFFDDDDDDDDDDDDDCBDDDDDDDDDDDDDDDDD@DDDDDBD: 45 | @HWI-D00430:125:HHFCJADXX:1:1101:12272:2744/2 46 | CACTACAACACCATGGGTTATCAAGCCTTGGACGGATGACAATCAGTGGCTAACGGATGCCGCAGCGGTCGTTGCCACTTTAAAACAATCTGAAACTGATG 47 | + 48 | @B@FFFFFHHHHHJJJJGIJJJJJJJJJJJJJJJJGIIJJHHIJJJIJJJJJJIJJJHHHHFDCDDBDDDDDDDDDCCDDDDDDDDBDDCC>ACDDDDCCC 49 | @HWI-D00430:125:HHFCJADXX:1:1101:17746:2589/2 50 | CCCATAAAACCGACGTACAGCAAATAACGTCCGGTTGATACGCTGGCTCCAAAGGTAAACCGGGCGTTAGCGAAAAAACTAAAACTGACAGCGACAGCAAA 51 | + 52 | CCCFFFFFHHHHHJJIJJJJJJJIJJJJJJJJJJIJJJJJJJJJJJHHHHFFFFFEEEEEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDC 53 | @HWI-D00430:125:HHFCJADXX:1:1101:19043:2712/2 54 | ACTTTATCCCAACTGGTCGCCAGCGCGGGTGCGAAATCATCCCAGTAGTAAGCCAGGATGGTCGAACCGATGACATTGCCGAGGCCCACCCACCAGTAATG 55 | + 56 | @@@FFFFFGHHHHJGGHHEHIIIGGIJJDAGCHJHGFFFFDFDCEAEEFDE>A?C?BAADDDDDBDDDDBDDCCCDCCDCDDBB<8>??BD@D:AC# 57 | @HWI-D00430:125:HHFCJADXX:1:1101:3127:2825/2 58 | GCGCTACGCTATCCGGCTTACAAGGCGTCTGTAGGCTGGGTAAACTACAACAACGCTTCACTATGCTGGTCGCGATACTCCTTCGGCGTAGTGACGTACTC 59 | + 60 | CCCFFFFFHHHHHJJJJJJJJJIHJJJJJJJIJJJJJIJJEHFHIJJJJJJHHHHFFDDEEEDDDDEDCCDDDDDDDDCDDDDDDDDDDDDBCDDD@BDDD 61 | @HWI-D00430:125:HHFCJADXX:1:1101:11048:2889/2 62 | ATATCAGCATCAGGAACCAGACCGCGCCGGACGGTCTGTCGGGAACGGTGGTCACGATACATTTCTTACATGAAAATGGGGGCAGGGATGGCGACAATTCA 63 | + 64 | CCCFFFFFHGHHHJJJJJJJJJJJJJJJJJJJIJHHHHHHFFDDDDDDDDDDCCDDDDDDDEEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD@DDDC: 65 | @HWI-D00430:125:HHFCJADXX:2:1101:16489:2779/2 66 | ACATAAAACTCAGCAGAAGCAACGATCCGCCCGCCCGCTCAGGATGCGTCGCCACCGCATTAAGCAGCGCGCTCAACTGCTGATTTACGTCCCGCTGCCAA 67 | + 68 | <@:ACACCDDDD@@D9>BDBBBC> 69 | @HWI-D00430:125:HHFCJADXX:2:1101:19971:2793/2 70 | ATAAAAAGGTGAGCGCAATGCTCACCTTTTTGATCCGTTTCAGAAAGAGATTAGAACCTGTCTCTTATACACATCTCCGAGCCCACGAGACAAGAGGCAAA 71 | + 72 | @@@DDDEDDADFHIIIBGHIIEHIIGHIHGIIEFGEGDHHIIEHGEHEIICEHGIIGHIFHIHGHEEEHC@DC@DECECBBBBBBBBBBBBCCB?BBCCC# 73 | @HWI-D00430:125:HHFCJADXX:2:1101:3668:3115/2 74 | CCATTACGTTGCTGATCGCATCAATTTCTTTCTGCGTAAATATGCCAGCATAGACAAAATAAGCTGCAACCAGAAATATCATAACCGGTGCGCCGCCAATA 75 | + 76 | CCCFFFFFHHHHHIJJIIJJJJJJJJJJJJJJJJIJIJIGHJJJIJJJJJJJJJJJJJJJJIIIJIHJHHHHFFFFFEEDDEEEDDDDDDBDDDDDDDDD@ 77 | @HWI-D00430:125:HHFCJADXX:2:1101:6875:3120/2 78 | CCAAAAGTGGCTTTGCGTGGCTACTGGAGTTCTACCCTGAGCTGACGCAGGCGCGGGTGCGAGCGGATCGCATCTGTCTCTTATACACATCTCCGAGCCCC 79 | + 80 | CCCFFDFFHHHHHJIIJFHIJGIJJJJIJFIHIJJJJJJJJJGGIIJJJJJJGIHFD28?=?55;B@;@BB2<9A::@@>@A4:::@@>>C@AC90005?# 81 | @HWI-D00430:125:HHFCJADXX:2:1101:12357:3069/2 82 | GCCGTATACGTATTGCAGCCAGAGGTCCCACTTGTCATTTATCACTCTTCTCATGATCACGGTGATAAGAATGGCAAGCGCAGGAAAATATTTCTAAGATG 83 | + 84 | @@CFDDFFHHHHHJJJJJJJJJJJJJJJJJJJJJIJJJJJJJJIJJJJJJJJIJJJIJJJJJGIIJJJJJJJJHHHHFFFDDDDDDDDDDFFFEEEEDDD@ 85 | @HWI-D00430:125:HHFCJADXX:2:1101:16625:3083/2 86 | GGTTCACGGGGTCTTTCCGTCTTGCCGCGGGTACACTGCATCTTCACAGCGAGTTCAATTTCACTGAGTCTCGGGTGGAGACAGCCTGGCCATCATTACGC 87 | + 88 | @CBFFFFFHHHHHJJJJJJJJJJJJJJJJJJHIJJJIHHGHHHFFFFFFDDBD>BCEEEEDDEDDDDDDDDDCDD<@B?ACBCCBDCADBDDDC>ADCC@@ 89 | @HWI-D00430:125:HHFCJADXX:2:1101:18210:3173/2 90 | ACCTTTAACAGTGCAGGGTGATTGTTATTCGTTCAACGAAAGATACTCATTAAATTGCTCAATACTTGGATAAGAGGATTGGGTGCCTTTCCCGGTGACGC 91 | + 92 | @@?DFFEFHFHFHGIHIGAFFHIJJIJJJIIIJJJIJJGGIJIIJJJJJJJGHHIJGEGIIJJIGIIJIEGHEE@EACDD@BFBCDDCDDDDDB=;=BDD5 93 | @HWI-D00430:125:HHFCJADXX:2:1101:2472:3467/2 94 | GTTCAGCTAAATGCGGGGAGTACCAGTGTGACGGTTAAACGCCCGGTCTCGGCCAGTCTGGCTAAGGCTTTTTTCTATATTGTGCTGTTGTCTATTCCTGT 95 | + 96 | CCCFFFFFHHHHHJJJJJJJHIJJJJJJJJJJJJIIJJJJJHHFFDDDDDDDDDDDCDDDDCCCDCDACCD:AB9@@@@CD>>@CDDDD>C@CDDEDDCC: 97 | @HWI-D00430:125:HHFCJADXX:2:1101:1731:3720/2 98 | GGATAAGGGGCTAAAAGTCGGCCAGCGCGTTGGAATCGGCTGGACGGCGCGCAGCTGCGGACACTGCGATGCCTGTATCAGCGGCAATCAAATTAACTGCC 99 | + 100 | @@C:>7?B8@CCCC@CCAC7@5@BCCCC>AA>CCACCC 101 | @HWI-D00430:125:HHFCJADXX:2:1101:3805:3740/2 102 | TCCCCGATCAACATGTCGGCATCGGTTTCCTGATCGAGCTTCAGTACGTCTTTGAGTGCACCGCCGGTCGCCACAAAGCGGCGCTTGTGCGTCTGGCGGGG 103 | + 104 | CCCFFFFFHHHHHJJIJJJJJJJJJIJJJJJJJJJJHIJJJJJJIIJJJJJHHHHHFFFFFFDDDDDDDDDDDDDDDDDDDDDDDDDBDDDDDDDDDDDD# 105 | @HWI-D00430:125:HHFCJADXX:2:1101:4297:3536/2 106 | GATGTGTACCGACATGGGCGGCCCGGTGAACAAAGCGGCGTATGCGTTTGGCGTTGGTCTGCTGAGTACGCAAACTTACGCGCCGATGGCGGCGATCATGG 107 | + 108 | CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJJHHHHHFFDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDC 109 | @HWI-D00430:125:HHFCJADXX:2:1101:11456:3711/2 110 | ATATTACGCGGTCGACGATCTTCAAAAAGCGGCTGATAGCCATAATTAATCGGCTGCGCAGACTGAAATATGGAGCCATTCGCCACCGGTACCGGGCCAGG 111 | + 112 | CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJJJJHHHHHHFFFFFFFEEDDDDDDDDDDDDDDDDDDDEEDDDDDDDDEEDDBDDDDBDDDDDDDDDDD 113 | @HWI-D00430:125:HHFCJADXX:2:1101:18177:3726/2 114 | GGGCAGGACAACATTTATTCCGATCTGACGGCTGGTCGTATTGATGCAGCGTTCCAGGACGAGGTCGCGGCCAGCGAAGGTTTCCTTAAACAGCCCGTCGG 115 | + 116 | CCCFFFFFHHHHHJJJJJJJJJIJJJJJJJJJJJJFHIHJJJJJJJJIIJIEHGHHHFFFDCDD?CDDDDDDDDDB@BDD?CCCDDDCDDDDDDDDDDDDD 117 | @HWI-D00430:125:HHFCJADXX:2:1101:4322:3989/2 118 | CTCCTTTTCACCGTTTTATCCCGCTTTCTTGACCACAAAAAATTTATTTCTTTGTTAATGCACATGCTGCGCAGCCGCATCAGTGACAGCGTTTACCACAT 119 | + 120 | CCCFFFFFHHHHHIJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJIJJJJJJJHHHHHFFDDDDDDDDDDDDDDDDDDDDDDDDDDDD: 121 | @HWI-D00430:125:HHFCJADXX:2:1101:4703:3982/2 122 | AGTTCGGTCAGTTGTTCGCTTTGAATCATCAGCGACAGCGCGTTGCCCGGCCCGCAGCCGCCTTCCAGCGCAACGACGTTTTTCTCCGCCTGCGGGAAGTG 123 | + 124 | @@@FFFFDHDHHHGDEHIJJJIIIIIEGGHIIJIJJIGIEGG;AEEHHFDCDD@DDDDDDDDBDDDD>?BD;B@B@B@9BD99<:> 125 | @HWI-D00430:125:HHFCJADXX:2:1101:5755:4181/2 126 | GATCAATACGGCGCAGCGATTTCAACACTTTGACTTTTTCATCGCGAATACTGTCGGCGCTCAGGTCAGACGGCGGTGACATGGCAATCATGCAGAGAATT 127 | + 128 | CCCFFFFFHHHHHJIJJJIJJJJJJJJJJJJJJJJJJJJJJJJJJJJFFFEEEFEDDDDDDDDDD@CCDADDDDDD>BBDDDDDDDCCACCDDDDDDDDDD 129 | -------------------------------------------------------------------------------- /tests/real_fastqs/hiseq_precasava_sra_R1.fastq: -------------------------------------------------------------------------------- 1 | @SRR3712208.123 HWI-D00430:125:HHFCJADXX:1:1101:1227:2085/1 2 | ATTCTGGTGGATGATTCCAGCGCAGGCGCGCAGTCAGGAATTGCGGCGGGGAGGGAAGGATTTTTTTTCTGCGCCGATCCCCATAAAAAACCCAGCGATCA 3 | + 4 | ?@@DDDDDHDADDDDGGHGHGG>@B?;@>@94:4::4::>@8?#### 13 | @SRR3712208.239 HWI-D00430:125:HHFCJADXX:1:1101:5829:2107/1 14 | ATACTCATCTGTTTACCGGGCATACCATCCAGAGAAAATCGGGCCGCGACTTCCGCGACGCGTTCTGAACCTTTGGTAATAACGATAAACTGGACCACGGG 15 | + 16 | @@CFFFDFHHHHHJIIJJJJJJJIJJJJJJJJJJIJIIIJJIJJJJJHFDDDDDDDDDDDDD@BDDDEDDDDDDDD8ACEEDDDBDDD>ACDDDDDDDDD# 17 | @SRR3712208.249 HWI-D00430:125:HHFCJADXX:1:1101:6639:2039/1 18 | CTTTAAAACCGTTAATGACGCTTATGGTCATGATATCGGCGATAAGCTACTGGTGGCGGTCACGCATCTGTCTCTTATACACATCTGACGCTGCCGACGAA 19 | + 20 | @@CFFFFFHHHHHJJJJJJJJJIJJJJHIJJJIJJIJJJIJGIGIGIGIHIJHHHHFFD?BBDDBBDDDCCACACCCC@@CC>@AC>ACBD<<@55 21 | @SRR3712208.638 HWI-D00430:125:HHFCJADXX:1:1101:4309:2408/1 22 | TCCTAAAGGTAACTGCATCCATCATTCAGACGACATATTATCATGCTCAAAAAAGAATTCAGCCCCCTAATTAACCTGAGTTTAACTTATAAGTATCACTT 23 | + 24 | CCCFFFFFHFHHHJJJJJJJJJJJJJJJJJJJJJJJJJIJJJJJJJJJJJJJJJJJJIJJJJIJHHHFFFFEEEEEEDDDDEDEDDDDDDDCC5@DCDDC> 25 | @SRR3712208.693 HWI-D00430:125:HHFCJADXX:1:1101:7031:2445/1 26 | GCTACGCTCGCCCTTCGGGCCGCCGCTAGCGGCGTTCAAAACGCTAACGCGTTTTGGCGAGCACTGCCCAGGTCCAAAATGGCAAGTAAAATAGCCCTAAT 27 | + 28 | CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJHHFDDDDDEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCA# 29 | @SRR3712208.765 HWI-D00430:125:HHFCJADXX:1:1101:11147:2290/1 30 | GTTTATTGCTGGTAAATTCGTAGCCCTCTGTGGGCCAGTAGCCGACAAATTTAACGCCTTTGAGGGCAAGTTTGTCATGCAACATACCCAGCGCATCCAGG 31 | + 32 | @@CFFFFFHHHGHJJJJJJJJJJJJJJJJJJJIIJJJJJJJJJJHJIIIIJJGIGGGIIHHHHFFFFDDDEDEDDDEEDDDDDDDDDDDDDDDDDDDDDD< 33 | @SRR3712208.792 HWI-D00430:125:HHFCJADXX:1:1101:12642:2441/1 34 | GCAATGGAGAGCATCACAACCACCACCGTCAAGCCGACGATCAGCAGCGGTAGCAGCGCAATCAGGTGTTGTGGAGTTATTGTCATGGCGATTTACGGCCT 35 | + 36 | CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJHHDFDEEEEDDDDDDDDDCDDDDDDDDDCDDEDDEEEEDDDDDDDDDEDDDB< 37 | @SRR3712208.946 HWI-D00430:125:HHFCJADXX:1:1101:19285:2490/1 38 | ATACAGGAGCGCGAACATGTGTTATCCCTGGAACGGGAGCGTCAGCCGGAAATACAGGAACGCACGCTGGATGGCCCTTCGCTGGGATGGTGAAACCATGA 39 | + 40 | CCCFFFFFHHHHHJJJIJJHIIIJJJJJJJJJJJJJJIIHIHHHFFFFDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCDDDDD@CDCDDDDDDD 41 | @SRR3712208.1028 HWI-D00430:125:HHFCJADXX:1:1101:3301:2626/1 42 | ACCTTCAACCTGCCCATGGCTAGATCACCGGGTTTCGGGTCTATACCCTGCAACTTAACGCCCGGTTAAGACTCGGTTTCCCTACGGCTCCCCTATACGGT 43 | + 44 | @CCFFFFFHHHHHJJJJJJJJJJJJJJJJJJGDGHIGGIGIIJJJJJIIIIIHJJIJJHHHFFDDBDDDDDDCDDDBDDDDDDDDDDDDDDDDDDDCDDBA 45 | @SRR3712208.1222 HWI-D00430:125:HHFCJADXX:1:1101:12272:2744/1 46 | ATCATAGCCGAAACTATTAGCGCGGATATTACCTAACCCTTCTTCTGCCAAATTAGCAACATTAATTCTAGAGGGGTCTACCATCCCGGTTATACCGCTTA 47 | + 48 | @DCDCBBD<9 49 | @SRR3712208.1334 HWI-D00430:125:HHFCJADXX:1:1101:17746:2589/1 50 | ACGTGAATGTCCTCGGCAACGTTAACGCCAGCGGCACGCTGATGGACAACGGCGGCAACTCTAACCACCACTCTCACTAACCTGCAAATTGCTGCTGGATG 51 | + 52 | @CCFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJJJJJJJJJHHHHHFFFFDDDDDD>>(5&22?<<@>AC58??################ 57 | @SRR3712208.1442 HWI-D00430:125:HHFCJADXX:1:1101:3127:2825/1 58 | TTCTACCACGTTGGCGATAAACGAAATTTCGCAAATGTGCGGCTACCCGTCACTGCAATATTTCTATTCGGTGTTTAAAAAGGAGTACGTCACTACGCCGA 59 | + 60 | CCCFFFFFHHHHHIJJJJJJJJJJJJJJJJJIJJJJJIIJGJJJJJIJGHHFFFFFEEEDEFEFEEEEEDDBBBDDDDDDDDDDDCCDDDDDDDDDDBBDB 61 | @SRR3712208.1598 HWI-D00430:125:HHFCJADXX:1:1101:11048:2889/1 62 | ATCCCAGACTTTCCAGTAAAAACGCGCACGCGTTAGTGACCGCCGTATCATCATCCAGTAGATGAATTGTCGCCATCCCTGCCCCCATTTTCATGTAAGAA 63 | + 64 | CCCFFFFFHHHHHIJJJJJJIJIJJIGIJIJJIJJJIJIJJJHHFDDEDEEDDDDDDDDDDDEDDDEECDDDDDDDDDDDDBDDDDDDDEEEDDDDDDDDD 65 | @SRR3712208.1696 HWI-D00430:125:HHFCJADXX:1:1101:16489:2779/1 66 | CCTCACGGGCAGTATCATCATCAACGCCATTAAGCGCGCTGAAGATTTCCACCGTGGCAATTGGCTCCGGCTGGAAAAGCGTCAGCAATACCTGACAGCGG 67 | + 68 | ?<@FDDD8DDHHHAGHIJIHHJJIGIJJBGH@HIIEIIHGIECA>AHHHHEFFFECDCDDDCCC>AA?@BBBBB?CC?9<>@BDBC@CDCCCCD9?>C<<5 69 | @SRR3712208.1780 HWI-D00430:125:HHFCJADXX:1:1101:19971:2793/1 70 | GTTCTAATCTCTTTCTGAAACGGATCAAAAAGGTGAGCATTGCGCTCACCTTTTTATCTGTCTCTTATACACATCTGACGCTGCCGACGAAGAGGATAGTG 71 | + 72 | C@@FFFFFHHHHGHIJIJJJJJJJIGGGJFH:C9?DHGIHGGGIG6@GHGIEHFHIGIIGEGIGGIGGHDEHEDFFFFEDDBDDD@BDDDB@D@ 73 | @SRR3712208.1842 HWI-D00430:125:HHFCJADXX:1:1101:3668:3115/1 74 | GATTTACTCAGATTCAAAATCTTTGGTATGCCGCTACCGCTTTATGCCTTTGCATTAATTACTTTATTACTTTCTCATTTTTATAATGCTATACCGACTGT 75 | + 76 | =8+BDD;=:=AD?A:<,<<3+<<@A:;>AAA>A;AA?##### 77 | @SRR3712208.1900 HWI-D00430:125:HHFCJADXX:1:1101:6875:3120/1 78 | ATGCGATCCGCTCGCACCCGCGCCTGCGTCAGCTCAGGGTAGAACTCCAGTAGCCACGCAAAGCCACTTTTGGCTGTCTCTTATACACATCTGACGCTGCC 79 | + 80 | @@@FDDDFHHHHHJJJIJGIHIJJIGIIIIIGGIJJJJGHGHFFFFFFEDCEEDDDDDDBD@CCDDDDDDDCDDCDACDDCDCDDDEDDDDDDDDDD@BD? 81 | @SRR3712208.2028 HWI-D00430:125:HHFCJADXX:1:1101:12357:3069/1 82 | GTTTACACCTACTCCTCTATCGCCAGCACGTTTGTGGTGACGGGGGGAATGTTTGGCGCCATGAGCCTGTACGGGTACACCACTAAACGCGATCTTAGCGG 83 | + 84 | @BCFFFFFHHHHHJJJJJJJJJJJJJJJJJIJJJHIIDGIJJJJJHD:BDDDEDDCDDDDDDDDDDDDDDDDDDDDDDCCDDDDDCDDDDD>B@DDDCDD> 85 | @SRR3712208.2116 HWI-D00430:125:HHFCJADXX:1:1101:16625:3083/1 86 | GATTTACTCACGGAGCTGAAGTCAGTCGAAGATACCAGCTGGCTGCAACTGTTTATTAAAAACACAGCACTGTGCAAACACGAAAGTGGACGTATACGGTG 87 | + 88 | CCCFFFFFHHHHHJJJJJIIJJJJJJJJJJJJJJJJJJJJJIJJJJJJJJJIIJJJJIJJJJJJHHHHFFFF@CEEEEEDDDDDDD@CCDDDEEEDCBBCCABB?@><@B?>ACCDDDECC@ACBBBB3AAB4> 101 | @SRR3712208.2726 HWI-D00430:125:HHFCJADXX:1:1101:3805:3740/1 102 | GAAATATTCCACGAAGCCGGCTGACATGGTGGCCGATCCTGAATGGTTCCTCGAACTGACCCGCCAGACGCACAAGCGCCGCTTTGTGGCGACCGGCGGTG 103 | + 104 | CCCFFFFFHHHHHJJJJJJJJJJIHIJJJHIJJJJJJJJJJJIIJJJJIIJHHHFFFFFEEEDDDDDDDDDDDDDBDDDDDBDDDDDDDDDDDDDD>BB## 105 | @SRR3712208.2736 HWI-D00430:125:HHFCJADXX:1:1101:4297:3536/1 106 | GTACACGCATCGGGTCGCGCGCCGCAAACGGAATAGCGCCTTCAGTGATAAAGCACAGCCCCAGCACCAACGCCGCTTTACCACCTTCCTGCTGCGCTTTG 107 | + 108 | CCCFFFFFHHHHHJHIJJJJJJJJJJJJHHHFFFFEEDDDDDDDDFDDFFDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD> 109 | @SRR3712208.2873 HWI-D00430:125:HHFCJADXX:1:1101:11456:3711/1 110 | GGATACGCCACCGGCTCCCTGATTTTGAAGATGAAGGTAATGCAAAAATACGCGCTTCACGCTTACCCAGTTATGGCCCTGATGGTCGCGACGCTGACAGG 111 | + 112 | CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJIJJJJJJHHIJJJJJJJJJJJJJJHFFDEEDEDDDDDDDDDDDEDDDDDDDDDDDDDDDDDDDDDDDDDD? 113 | @SRR3712208.2992 HWI-D00430:125:HHFCJADXX:1:1101:18177:3726/1 114 | CCTGAAAACCCGTACAACATGCAGCCTGTCCTGTTGTGTGTGGTGAAGTAAGAAGCCTCTTTTGCATTTTTGCCTCACATCACGCCGGATGGCGGCTGTGC 115 | + 116 | @@CFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJIJJIJIJJJJFHHIIJJIIIJJJJJIJJIJJHHHGHHHFFFFEEDEEDDDDDDDDDBDDDDDD@BDDD 117 | @SRR3712208.3126 HWI-D00430:125:HHFCJADXX:1:1101:4322:3989/1 118 | CTCTGAATGATGTGGTAAACGCTGTCACTGATGCGGCTGCGCAGCATGTGCATTAACAAAGAAATAAATTTTTTGTGGTCAAGAAAGCGGGATAAAACGGG 119 | + 120 | CCCFFFFFHHHHHIJHIIJJJJJJJJGJJJJJJJJJJJJIJJJJJJJJEGIJJJJHHHHHFFFFFEECEEEEDDDDDDCDDDDDDDCDDDDBDDDDDDDD# 121 | @SRR3712208.3133 HWI-D00430:125:HHFCJADXX:1:1101:4703:3982/1 122 | GGTGGAAATCATGTACGCCCTGCGGCAGGGGGGGTGCGTTAAAGGGTAGAAAATGGCAAGGATCATCGCGCTGGATGGGGCACAGGGAGGGGGCGGCGCGC 123 | + 124 | 81+4A?DDBDADDCD?8? 21 | @SRR3712208.638 HWI-D00430:125:HHFCJADXX:1:1101:4309:2408/2 22 | ATGAAATAGAGTGATAAGCGCTTTCGTACATACATCTCATTAGTACAACTGATAGTGTTATCATTTGATTGAGTGTAATCCGATAACAATTCTCTCCGCGT 23 | + 24 | CCCFFFFFHHHFFIJJJJJJJJJJJJHIJJJJJJJJJJJJJJJIIJJJJIJJJJJIJJJJJJJJJJJJJJJIJHHGIJHHHHDFDDEEEDCDEDDDDDDD8 25 | @SRR3712208.693 HWI-D00430:125:HHFCJADXX:1:1101:7031:2445/2 26 | GTATGGTTATTCAACCTATCGCGGGAGCTAATTTAGAGCCTATCCCATTAGGGCTATTTTACTTGCCATTTTGGACCTGGGCAGTGCTCGCCAAAACGCGT 27 | + 28 | CBCFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJHHHHHFFFFFFEEEEEEEEDDDDDDDDDDDDDDDEDDDDDDDDDDDD> 29 | @SRR3712208.765 HWI-D00430:125:HHFCJADXX:1:1101:11147:2290/2 30 | ATCCTGTTCCATTCGCGCTCCCCACTTCACGCTGACTTTGGGTGCATTATGCCTCATTGTGCCGTGAATGCAGCGATTCTGTTACACTACCCGCAACGCTA 31 | + 32 | CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJJJJJJJJIIIIIJJJJJJJJJFIIJHHHHHFFFDDEEEDDDDDDDDDEEDDDDDDDDDDBDDDDDDD< 33 | @SRR3712208.792 HWI-D00430:125:HHFCJADXX:1:1101:12642:2441/2 34 | TGATTACCGTGATCTCGACCTTTGGGCTGGTATTCGCCTCTGTATACTCGCTGGCGATGCTGCATCGCGCTTACTTTGGTAAGGCGAAGAGCCAGATTGCC 35 | + 36 | CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJIIJJJJJJJJJJIIJJJJJIJJJIJHHHFFFFEDEEDDDDDDDCDDEDDDDDDDDDDDDDDDDDDDDDDDC 37 | @SRR3712208.946 HWI-D00430:125:HHFCJADXX:1:1101:19285:2490/2 38 | GATTTACCATAATCCCTTAATTGTACGCACTGCTAAAACGCGTTCAGCGCGATCACGGCAGCAGACAGGTAAAAATGGCAACAAACCACCCGAAAAACTGC 39 | + 40 | CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJIJJJHHFFEFDDDDDDDDDCBCDDDDDDCDDDDDDDDDDDDDDDDDDDDDDCD 41 | @SRR3712208.1028 HWI-D00430:125:HHFCJADXX:1:1101:3301:2626/2 42 | AACGGGGAGGAGCCCAGAGCCTGAATCAGCATGTGTGTTAGTGGAAGCGTCTGGAAAGGCGCGCGATACAGGGTGACAGCCCCGTACACAAAAGCGCATGT 43 | + 44 | CCCFFFFFGHHHHJJJJJJJJJJJJJJJJJIJIIIJJJJJJIJIJJJJJJJJHHHHHFFFDDDDDDDDDDDDDCBDDDDDDDDDDDDDDDDD@DDDDDBD: 45 | @SRR3712208.1222 HWI-D00430:125:HHFCJADXX:1:1101:12272:2744/2 46 | CACTACAACACCATGGGTTATCAAGCCTTGGACGGATGACAATCAGTGGCTAACGGATGCCGCAGCGGTCGTTGCCACTTTAAAACAATCTGAAACTGATG 47 | + 48 | @B@FFFFFHHHHHJJJJGIJJJJJJJJJJJJJJJJGIIJJHHIJJJIJJJJJJIJJJHHHHFDCDDBDDDDDDDDDCCDDDDDDDDBDDCC>ACDDDDCCC 49 | @SRR3712208.1334 HWI-D00430:125:HHFCJADXX:1:1101:17746:2589/2 50 | CCCATAAAACCGACGTACAGCAAATAACGTCCGGTTGATACGCTGGCTCCAAAGGTAAACCGGGCGTTAGCGAAAAAACTAAAACTGACAGCGACAGCAAA 51 | + 52 | CCCFFFFFHHHHHJJIJJJJJJJIJJJJJJJJJJIJJJJJJJJJJJHHHHFFFFFEEEEEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDC 53 | @SRR3712208.1373 HWI-D00430:125:HHFCJADXX:1:1101:19043:2712/2 54 | ACTTTATCCCAACTGGTCGCCAGCGCGGGTGCGAAATCATCCCAGTAGTAAGCCAGGATGGTCGAACCGATGACATTGCCGAGGCCCACCCACCAGTAATG 55 | + 56 | @@@FFFFFGHHHHJGGHHEHIIIGGIJJDAGCHJHGFFFFDFDCEAEEFDE>A?C?BAADDDDDBDDDDBDDCCCDCCDCDDBB<8>??BD@D:AC# 57 | @SRR3712208.1442 HWI-D00430:125:HHFCJADXX:1:1101:3127:2825/2 58 | GCGCTACGCTATCCGGCTTACAAGGCGTCTGTAGGCTGGGTAAACTACAACAACGCTTCACTATGCTGGTCGCGATACTCCTTCGGCGTAGTGACGTACTC 59 | + 60 | CCCFFFFFHHHHHJJJJJJJJJIHJJJJJJJIJJJJJIJJEHFHIJJJJJJHHHHFFDDEEEDDDDEDCCDDDDDDDDCDDDDDDDDDDDDBCDDD@BDDD 61 | @SRR3712208.1598 HWI-D00430:125:HHFCJADXX:1:1101:11048:2889/2 62 | ATATCAGCATCAGGAACCAGACCGCGCCGGACGGTCTGTCGGGAACGGTGGTCACGATACATTTCTTACATGAAAATGGGGGCAGGGATGGCGACAATTCA 63 | + 64 | CCCFFFFFHGHHHJJJJJJJJJJJJJJJJJJJIJHHHHHHFFDDDDDDDDDDCCDDDDDDDEEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD@DDDC: 65 | @SRR3712208.1696 HWI-D00430:125:HHFCJADXX:1:1101:16489:2779/2 66 | ACATAAAACTCAGCAGAAGCAACGATCCGCCCGCCCGCTCAGGATGCGTCGCCACCGCATTAAGCAGCGCGCTCAACTGCTGATTTACGTCCCGCTGCCAA 67 | + 68 | <@:ACACCDDDD@@D9>BDBBBC> 69 | @SRR3712208.1780 HWI-D00430:125:HHFCJADXX:1:1101:19971:2793/2 70 | ATAAAAAGGTGAGCGCAATGCTCACCTTTTTGATCCGTTTCAGAAAGAGATTAGAACCTGTCTCTTATACACATCTCCGAGCCCACGAGACAAGAGGCAAA 71 | + 72 | @@@DDDEDDADFHIIIBGHIIEHIIGHIHGIIEFGEGDHHIIEHGEHEIICEHGIIGHIFHIHGHEEEHC@DC@DECECBBBBBBBBBBBBCCB?BBCCC# 73 | @SRR3712208.1842 HWI-D00430:125:HHFCJADXX:1:1101:3668:3115/2 74 | CCATTACGTTGCTGATCGCATCAATTTCTTTCTGCGTAAATATGCCAGCATAGACAAAATAAGCTGCAACCAGAAATATCATAACCGGTGCGCCGCCAATA 75 | + 76 | CCCFFFFFHHHHHIJJIIJJJJJJJJJJJJJJJJIJIJIGHJJJIJJJJJJJJJJJJJJJJIIIJIHJHHHHFFFFFEEDDEEEDDDDDDBDDDDDDDDD@ 77 | @SRR3712208.1900 HWI-D00430:125:HHFCJADXX:1:1101:6875:3120/2 78 | CCAAAAGTGGCTTTGCGTGGCTACTGGAGTTCTACCCTGAGCTGACGCAGGCGCGGGTGCGAGCGGATCGCATCTGTCTCTTATACACATCTCCGAGCCCC 79 | + 80 | CCCFFDFFHHHHHJIIJFHIJGIJJJJIJFIHIJJJJJJJJJGGIIJJJJJJGIHFD28?=?55;B@;@BB2<9A::@@>@A4:::@@>>C@AC90005?# 81 | @SRR3712208.2028 HWI-D00430:125:HHFCJADXX:1:1101:12357:3069/2 82 | GCCGTATACGTATTGCAGCCAGAGGTCCCACTTGTCATTTATCACTCTTCTCATGATCACGGTGATAAGAATGGCAAGCGCAGGAAAATATTTCTAAGATG 83 | + 84 | @@CFDDFFHHHHHJJJJJJJJJJJJJJJJJJJJJIJJJJJJJJIJJJJJJJJIJJJIJJJJJGIIJJJJJJJJHHHHFFFDDDDDDDDDDFFFEEEEDDD@ 85 | @SRR3712208.2116 HWI-D00430:125:HHFCJADXX:1:1101:16625:3083/2 86 | GGTTCACGGGGTCTTTCCGTCTTGCCGCGGGTACACTGCATCTTCACAGCGAGTTCAATTTCACTGAGTCTCGGGTGGAGACAGCCTGGCCATCATTACGC 87 | + 88 | @CBFFFFFHHHHHJJJJJJJJJJJJJJJJJJHIJJJIHHGHHHFFFFFFDDBD>BCEEEEDDEDDDDDDDDDCDD<@B?ACBCCBDCADBDDDC>ADCC@@ 89 | @SRR3712208.2153 HWI-D00430:125:HHFCJADXX:1:1101:18210:3173/2 90 | ACCTTTAACAGTGCAGGGTGATTGTTATTCGTTCAACGAAAGATACTCATTAAATTGCTCAATACTTGGATAAGAGGATTGGGTGCCTTTCCCGGTGACGC 91 | + 92 | @@?DFFEFHFHFHGIHIGAFFHIJJIJJJIIIJJJIJJGGIJIIJJJJJJJGHHIJGEGIIJJIGIIJIEGHEE@EACDD@BFBCDDCDDDDDB=;=BDD5 93 | @SRR3712208.2259 HWI-D00430:125:HHFCJADXX:1:1101:2472:3467/2 94 | GTTCAGCTAAATGCGGGGAGTACCAGTGTGACGGTTAAACGCCCGGTCTCGGCCAGTCTGGCTAAGGCTTTTTTCTATATTGTGCTGTTGTCTATTCCTGT 95 | + 96 | CCCFFFFFHHHHHJJJJJJJHIJJJJJJJJJJJJIIJJJJJHHFFDDDDDDDDDDDCDDDDCCCDCDACCD:AB9@@@@CD>>@CDDDD>C@CDDEDDCC: 97 | @SRR3712208.2675 HWI-D00430:125:HHFCJADXX:1:1101:1731:3720/2 98 | GGATAAGGGGCTAAAAGTCGGCCAGCGCGTTGGAATCGGCTGGACGGCGCGCAGCTGCGGACACTGCGATGCCTGTATCAGCGGCAATCAAATTAACTGCC 99 | + 100 | @@C:>7?B8@CCCC@CCAC7@5@BCCCC>AA>CCACCC 101 | @SRR3712208.2726 HWI-D00430:125:HHFCJADXX:1:1101:3805:3740/2 102 | TCCCCGATCAACATGTCGGCATCGGTTTCCTGATCGAGCTTCAGTACGTCTTTGAGTGCACCGCCGGTCGCCACAAAGCGGCGCTTGTGCGTCTGGCGGGG 103 | + 104 | CCCFFFFFHHHHHJJIJJJJJJJJJIJJJJJJJJJJHIJJJJJJIIJJJJJHHHHHFFFFFFDDDDDDDDDDDDDDDDDDDDDDDDDBDDDDDDDDDDDD# 105 | @SRR3712208.2736 HWI-D00430:125:HHFCJADXX:1:1101:4297:3536/2 106 | GATGTGTACCGACATGGGCGGCCCGGTGAACAAAGCGGCGTATGCGTTTGGCGTTGGTCTGCTGAGTACGCAAACTTACGCGCCGATGGCGGCGATCATGG 107 | + 108 | CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJJHHHHHFFDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDC 109 | @SRR3712208.2873 HWI-D00430:125:HHFCJADXX:1:1101:11456:3711/2 110 | ATATTACGCGGTCGACGATCTTCAAAAAGCGGCTGATAGCCATAATTAATCGGCTGCGCAGACTGAAATATGGAGCCATTCGCCACCGGTACCGGGCCAGG 111 | + 112 | CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJJJJHHHHHHFFFFFFFEEDDDDDDDDDDDDDDDDDDDEEDDDDDDDDEEDDBDDDDBDDDDDDDDDDD 113 | @SRR3712208.2992 HWI-D00430:125:HHFCJADXX:1:1101:18177:3726/2 114 | GGGCAGGACAACATTTATTCCGATCTGACGGCTGGTCGTATTGATGCAGCGTTCCAGGACGAGGTCGCGGCCAGCGAAGGTTTCCTTAAACAGCCCGTCGG 115 | + 116 | CCCFFFFFHHHHHJJJJJJJJJIJJJJJJJJJJJJFHIHJJJJJJJJIIJIEHGHHHFFFDCDD?CDDDDDDDDDB@BDD?CCCDDDCDDDDDDDDDDDDD 117 | @SRR3712208.3126 HWI-D00430:125:HHFCJADXX:1:1101:4322:3989/2 118 | CTCCTTTTCACCGTTTTATCCCGCTTTCTTGACCACAAAAAATTTATTTCTTTGTTAATGCACATGCTGCGCAGCCGCATCAGTGACAGCGTTTACCACAT 119 | + 120 | CCCFFFFFHHHHHIJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJIJJJJJJJHHHHHFFDDDDDDDDDDDDDDDDDDDDDDDDDDDD: 121 | @SRR3712208.3133 HWI-D00430:125:HHFCJADXX:1:1101:4703:3982/2 122 | AGTTCGGTCAGTTGTTCGCTTTGAATCATCAGCGACAGCGCGTTGCCCGGCCCGCAGCCGCCTTCCAGCGCAACGACGTTTTTCTCCGCCTGCGGGAAGTG 123 | + 124 | @@@FFFFDHDHHHGDEHIJJJIIIIIEGGHIIJIJJIGIEGG;AEEHHFDCDD@DDDDDDDDBDDDD>?BD;B@B@B@9BD99<:> 125 | @SRR3712208.3540 HWI-D00430:125:HHFCJADXX:1:1101:5755:4181/2 126 | GATCAATACGGCGCAGCGATTTCAACACTTTGACTTTTTCATCGCGAATACTGTCGGCGCTCAGGTCAGACGGCGGTGACATGGCAATCATGCAGAGAATT 127 | + 128 | CCCFFFFFHHHHHJIJJJIJJJJJJJJJJJJJJJJJJJJJJJJJJJJFFFEEEFEDDDDDDDDDD@CCDADDDDDD>BBDDDDDDDCCACCDDDDDDDDDD 129 | -------------------------------------------------------------------------------- /confindr_src/create_genus_specific_db.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from Bio import SeqIO 4 | import urllib.request 5 | import subprocess 6 | import argparse 7 | import tempfile 8 | import logging 9 | import glob 10 | import csv 11 | import os 12 | 13 | 14 | def main(): 15 | logging.basicConfig(format='\033[92m \033[1m %(asctime)s \033[0m %(message)s ', 16 | level=logging.INFO, 17 | datefmt='%Y-%m-%d %H:%M:%S') 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument('-o', '--output_folder', 20 | type=str, 21 | required=True, 22 | help='Folder to first store temporary files, and eventually store the created database.') 23 | parser.add_argument('-i', '--input_folder', 24 | type=str, 25 | required=True, 26 | help='Folder with your input files to try to find core genes. Each gene should be in a ' 27 | 'FASTA file. Expected extension is .fasta') 28 | parser.add_argument('-g', '--genus', 29 | type=str, 30 | required=True, 31 | help='Name of genus you\'re creating a database for.') 32 | parser.add_argument('--desired_number_genes', 33 | type=int, 34 | default=50, 35 | help='Minimum number of genes you want to find.') 36 | args = parser.parse_args() 37 | 38 | if not os.path.isdir(args.output_folder): 39 | os.makedirs(args.output_folder) 40 | # Steps to get this done: 41 | # 1) Get the RefSeq assembly summary (ftp://ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/assembly_summary_refseq.txt) 42 | download_refseq_summary(args.output_folder) 43 | # 2) From the RefSeq assembly summary, download complete genomes for your genus of interest. 44 | download_refseq_genomes(args.output_folder, os.path.join(args.output_folder, 'assembly_summary_refseq.txt'), args.genus) 45 | # 3) BLAST each of the potential genes to be used against the RefSeq genomes of interest. We only want to keep genes 46 | # that both hit all genomes, and also hit only once per genome. 47 | find_hits_per_genome(args.input_folder, args.output_folder) 48 | # 4) BLAST the potential genes we've found against each other to make sure none of them are similar to each other. 49 | potential_genes = get_potential_genes(os.path.join(args.output_folder, 'gene_hit_report.csv'), args.desired_number_genes) 50 | genomes = sorted(glob.glob(os.path.join(args.output_folder, '*.fasta'))) 51 | confirmed_genes = check_for_similar_genes(potential_genes, genomes) 52 | for gene in confirmed_genes: 53 | cmd = 'cat {} >> {}'.format(gene, args.genus + '_db_cgderived.fasta') 54 | subprocess.call(cmd, shell=True) 55 | # 5) ??? 56 | # 6) Profit! (but not actually, free and open source, wooooo!) 57 | 58 | 59 | def check_for_similar_genes(potential_genes, genomes): 60 | # For each of our potential genes make a blast DB. 61 | confirmed_genes = list() 62 | for potential_gene in potential_genes: 63 | cmd = 'makeblastdb -dbtype nucl -in {}'.format(potential_gene) 64 | subprocess.call(cmd, shell=True) 65 | # Then, blast each gene against all other genes, and raise warnings if you find any significant-looking hits. 66 | for gene1 in potential_genes: 67 | for gene2 in potential_genes: 68 | if gene1 != gene2: 69 | similar_genes_found = False 70 | with tempfile.TemporaryDirectory() as tmpdir: 71 | blast_file = os.path.join(tmpdir, 'blast_out.tsv') 72 | cmd = 'blastn -query {seqfile} -db {genome} -out {outfile} -outfmt ' \ 73 | '"6 qseqid sseqid pident length qlen qstart qend sstart send evalue"'.format(seqfile=gene1, 74 | genome=gene2, 75 | outfile=blast_file) 76 | subprocess.call(cmd, shell=True) 77 | with open(blast_file) as f: 78 | for line in f: 79 | blast_result = BlastResult(line.rstrip()) 80 | if blast_result.percent_identity >= 70 or blast_result.query_coverage >= 50: 81 | similar_genes_found = True 82 | if gene1 not in confirmed_genes and similar_genes_found is False: 83 | confirmed_genes.append(gene1) 84 | # Also check that our confirmed genes only hit each genome once, with very loose settings. 85 | really_confirmed_genes = list() 86 | for confirmed_gene in confirmed_genes: 87 | with tempfile.TemporaryDirectory() as tmpdir: 88 | count = 0 89 | for contig in SeqIO.parse(confirmed_gene, 'fasta'): 90 | if count == 0: 91 | SeqIO.write([contig], os.path.join(tmpdir, 'sequence.fasta'), 'fasta') 92 | count += 1 93 | only_one_per_genome = True 94 | for genome in genomes: 95 | hits = 0 96 | blast_file = os.path.join(tmpdir, 'blast_out.tsv') 97 | cmd = 'blastn -query {seqfile} -db {genome} -out {outfile} -outfmt ' \ 98 | '"6 qseqid sseqid pident length qlen qstart qend sstart send evalue"'.format(seqfile=os.path.join(tmpdir, 'sequence.fasta'), 99 | genome=genome, 100 | outfile=blast_file) 101 | subprocess.call(cmd, shell=True) 102 | with open(blast_file) as f: 103 | for line in f: 104 | blast_result = BlastResult(line.rstrip()) 105 | if blast_result.percent_identity >= 70 or blast_result.query_coverage >= 50: 106 | hits += 1 107 | if hits > 1: 108 | only_one_per_genome = False 109 | if only_one_per_genome is True: 110 | really_confirmed_genes.append(confirmed_gene) 111 | return really_confirmed_genes 112 | 113 | 114 | def get_potential_genes(gene_report, desired_genes): 115 | proportion_in_genomes = dict() 116 | potential_genes = list() 117 | lowest_proportion = 1 118 | with open(gene_report) as csvfile: 119 | reader = csv.DictReader(csvfile) 120 | for row in reader: 121 | gene = row['Gene'] 122 | proportion = float(row['OneHitPerGenome']) 123 | proportion_in_genomes[gene] = proportion 124 | sorted_proportions = sorted(proportion_in_genomes.items(), key=lambda kv: kv[1], reverse=True) 125 | genes_added = 0 126 | for gene, proportion in sorted_proportions: 127 | if proportion == 1: 128 | potential_genes.append(gene) 129 | genes_added += 1 130 | elif genes_added < desired_genes: 131 | potential_genes.append(gene) 132 | genes_added += 1 133 | lowest_proportion = proportion 134 | logging.info('Found {} genes. Lowest proportion found was {}'.format(genes_added, lowest_proportion)) 135 | return potential_genes 136 | 137 | 138 | def download_refseq_summary(output_folder): 139 | logging.info('Downloading RefSeq summary...') 140 | urllib.request.urlretrieve('ftp://ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/assembly_summary_refseq.txt', 141 | os.path.join(output_folder, 'assembly_summary_refseq.txt')) 142 | assert os.path.isfile(os.path.join(output_folder, 'assembly_summary_refseq.txt')) 143 | 144 | 145 | def download_refseq_genomes(output_folder, assembly_summary, genus): 146 | logging.info('Downloading complete RefSeq genomes for {}. Depending on genus, this may take a while...'.format(genus)) 147 | i = 0 148 | with open(assembly_summary) as f: 149 | for line in f: 150 | if not line.startswith('#'): 151 | x = line.split('\t') 152 | organism = x[7] 153 | level = x[11] 154 | ftp_folder = x[19] 155 | download_link = ftp_folder + '/' + ftp_folder.split('/')[-1] + '_genomic.fna.gz' 156 | if genus in organism and 'PHAGE' not in organism.upper() and 'Complete' in level: 157 | i += 1 158 | output_file = os.path.join(output_folder, 'genome_{}.fasta.gz'.format(i)) 159 | urllib.request.urlretrieve(download_link, output_file) 160 | # System call to gzip since it's faster 161 | subprocess.call('gunzip {}'.format(output_file), shell=True) 162 | # Make sure files are big enough to be genomes and aren't phage/plasmid/something else. 163 | if os.path.getsize(output_file.replace('.gz', '')) < 2000000: 164 | os.remove(output_file.replace('.gz', '')) 165 | i -= 1 166 | logging.info('Done downloading! Got {} genomes.'.format(i)) 167 | 168 | 169 | def find_hits_per_genome(genes_folder, genomes_folder): 170 | # Make blast DBs for all of our genomes. 171 | genomes = sorted(glob.glob(os.path.join(genomes_folder, '*.fasta'))) 172 | genome_hit_report_file = os.path.join(genomes_folder, 'genome_hit_report.csv') 173 | gene_report_file = os.path.join(genomes_folder, 'gene_hit_report.csv') 174 | with open(gene_report_file, 'w') as f: 175 | f.write('Gene,OneHitPerGenome\n') 176 | with open(genome_hit_report_file, 'w') as f: 177 | to_write = 'Gene,' 178 | for genome in genomes: 179 | to_write += genome + ',' 180 | to_write = to_write[:-1] 181 | f.write(to_write + '\n') 182 | logging.info('Creating BLAST databases for genomes of interest.') 183 | for genome in genomes: 184 | cmd = 'makeblastdb -dbtype nucl -in {}'.format(genome) 185 | subprocess.call(cmd, shell=True) 186 | # Now that Blast DBs are created, take the first allele from each gene file (it's assumed alleles are REALLY 187 | # similar), and BLAST it against each of the genomes. 188 | genes = sorted(glob.glob(os.path.join(genes_folder, '*.fasta'))) 189 | for gene in genes: 190 | i = 0 191 | for sequence in SeqIO.parse(gene, 'fasta'): 192 | if i == 0: 193 | with tempfile.TemporaryDirectory() as tmpdir: 194 | seqfile = os.path.join(tmpdir, 'sequence.fasta') 195 | SeqIO.write([sequence], seqfile, 'fasta') 196 | genomes_with_one_hit = 0 197 | hits_per_genome = dict() 198 | for genome in genomes: 199 | blast_file = os.path.join(tmpdir, 'blast_out.tsv') 200 | cmd = 'blastn -query {seqfile} -db {genome} -out {outfile} -outfmt ' \ 201 | '"6 qseqid sseqid pident length qlen qstart qend sstart send evalue"'.format(seqfile=seqfile, 202 | genome=genome, 203 | outfile=blast_file) 204 | subprocess.call(cmd, shell=True) 205 | number_hits = 0 206 | with open(blast_file) as f: 207 | for line in f: 208 | blast_result = BlastResult(line.rstrip()) 209 | if blast_result.percent_identity >= 90 and blast_result.query_coverage >= 90: 210 | number_hits += 1 211 | hits_per_genome[genome] = number_hits 212 | if number_hits == 1: 213 | genomes_with_one_hit += 1 214 | with open(genome_hit_report_file, 'a+') as f: 215 | to_write = gene + ',' 216 | for genome in genomes: 217 | to_write += str(hits_per_genome[genome]) + ',' 218 | to_write = to_write[:-1] 219 | f.write(to_write + '\n') 220 | with open(gene_report_file, 'a+') as f: 221 | f.write('{},{}\n'.format(gene, genomes_with_one_hit/len(genomes))) 222 | i += 1 223 | 224 | 225 | class BlastResult: 226 | def __init__(self, blast_tabdelimited_line): 227 | # With my custom output format, headers are: 228 | # Index 0: query sequence name 229 | # Index 1: subject sequence name 230 | # Index 2: percent identity 231 | # Index 3: alignment length 232 | # Index 4: query sequence length 233 | # Index 5: query start position 234 | # Index 6: query end position 235 | # Index 7: subject start position 236 | # Index 8: subject end position 237 | # Index 9: evalue 238 | x = blast_tabdelimited_line.rstrip().split() 239 | self.query_name = x[0] 240 | self.subject_name = x[1] 241 | self.percent_identity = float(x[2]) 242 | self.alignment_length = int(x[3]) 243 | self.query_sequence_length = int(x[4]) 244 | self.query_start_position = int(x[5]) 245 | self.query_end_position = int(x[6]) 246 | self.subject_start_position = int(x[7]) 247 | self.subject_end_position = int(x[8]) 248 | self.evalue = float(x[9]) 249 | # Also need to have amount of query sequence covered as a percentage. 250 | self.query_coverage = 100.0 * self.alignment_length/self.query_sequence_length 251 | 252 | 253 | if __name__ == '__main__': 254 | main() 255 | -------------------------------------------------------------------------------- /confindr_src/confindr.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from confindr_src.methods import check_acceptable_xmx, check_for_databases_and_download, check_valid_base_fraction, \ 3 | dependency_check, find_paired_reads, find_unpaired_reads, find_contamination, get_version, write_output 4 | import multiprocessing 5 | import subprocess 6 | import traceback 7 | import argparse 8 | import logging 9 | import shutil 10 | import os 11 | 12 | 13 | def confindr(args): 14 | # Check for dependencies. 15 | all_dependencies_present = True 16 | # Re-enable minimap2 as dependency once nanopore stuff actually works. 17 | if args.data_type == 'Illumina': 18 | dependencies = ['bbmap.sh', 'bbduk.sh', 'mash', 'kma'] 19 | else: 20 | dependencies = ['bbduk.sh', 'mash', 'minimap2', 'kma'] 21 | 22 | for dependency in dependencies: 23 | if dependency_check(dependency) is False: 24 | logging.error('Dependency {} not found. Please make sure it is installed and present' 25 | ' on your $PATH.'.format(dependency)) 26 | all_dependencies_present = False 27 | if not all_dependencies_present: 28 | logging.error('Could not find all necessary dependencies, quitting...') 29 | quit(code=1) 30 | 31 | # Check that the base fraction specified actually makes sense. 32 | if check_valid_base_fraction(args.base_fraction_cutoff) is False: 33 | logging.error('Base fraction must be between 0 and 1 if specified. Input value was: {}' 34 | .format(args.base_fraction_cutoff)) 35 | quit(code=1) 36 | 37 | # If user specified Xmx, make sure that they actually entered a value that will work. If not, the method will tell 38 | # them what they did wrong. Then quit. 39 | if args.Xmx: 40 | valid_xmx = check_acceptable_xmx(args.Xmx) 41 | if valid_xmx is False: 42 | quit(code=1) 43 | 44 | # Don't yet have cgmlst support with Nanopore reads - don't let user do this. 45 | if args.cgmlst and args.data_type == 'Nanopore': 46 | logging.error('ERROR: cgMLST schemes not yet supported for Nanopore reads. Quitting...') 47 | quit(code=1) 48 | 49 | if args.data_type == 'Nanopore': 50 | logging.warning('WARNING: Nanopore contamination detection is highly experimental. Any results should be taken ' 51 | 'with several very large grains of salt. If you are going to try this, try setting -q to ' 52 | 'somewhere in the range of 12-15, and only count things as contaminated that have at least ' 53 | '10 contaminating SNVs. Even then, results may be wonky. In particular, samples with lots of ' 54 | 'depth will probably always show up as contaminated.') 55 | 56 | # Make the output directory. 57 | if not os.path.isdir(args.output_name): 58 | os.makedirs(args.output_name) 59 | # Remove any reports created by previous iterations of ConFindr 60 | try: 61 | os.remove(os.path.join(args.output_name, 'confindr_report.csv')) 62 | except FileNotFoundError: 63 | pass 64 | # Set the minimum number of matching hashes 65 | min_matching_hashes = args.min_matching_hashes 66 | # Check if databases necessary to run are present, and download them if they aren't 67 | check_for_databases_and_download(database_location=args.databases) 68 | 69 | # Figure out what pairs of reads, as well as unpaired reads, are present. 70 | paired_reads = find_paired_reads(args.input_directory, 71 | forward_id=args.forward_id, 72 | reverse_id=args.reverse_id) 73 | unpaired_reads = find_unpaired_reads(args.input_directory, 74 | forward_id=args.forward_id, 75 | reverse_id=args.reverse_id, 76 | find_fasta=args.fasta) 77 | # Consolidate read lists 78 | reads = sorted(paired_reads + unpaired_reads) 79 | # Process paired reads, one sample at a time. 80 | for fastq in reads: 81 | if len(fastq) == 1: 82 | sample_name = os.path.split(fastq[0])[-1].split('.')[0] 83 | else: 84 | sample_name = os.path.split(fastq[0])[-1].split(args.forward_id)[0] 85 | logging.info('Beginning analysis of sample {}...'.format(sample_name)) 86 | try: 87 | find_contamination(pair=fastq, 88 | forward_id=args.forward_id, 89 | threads=args.threads, 90 | output_folder=args.output_name, 91 | databases_folder=args.databases, 92 | keep_files=args.keep_files, 93 | quality_cutoff=args.quality_cutoff, 94 | base_cutoff=args.base_cutoff, 95 | base_fraction_cutoff=args.base_fraction_cutoff, 96 | cgmlst_db=args.cgmlst, 97 | xmx=args.Xmx, 98 | tmpdir=args.tmp, 99 | data_type=args.data_type, 100 | use_rmlst=args.rmlst, 101 | min_matching_hashes=min_matching_hashes, 102 | fasta=args.fasta, 103 | debug=args.verbosity) 104 | except subprocess.CalledProcessError: 105 | # If something unforeseen goes wrong, traceback will be printed to screen. 106 | # We then add the sample to the report with a note that it failed. 107 | multi_positions = 0 108 | genus = 'Error processing sample' 109 | write_output(output_report=os.path.join(args.output_name, 'confindr_report.csv'), 110 | sample_name=sample_name, 111 | multi_positions=multi_positions, 112 | genus=genus, 113 | total_gene_length=0, 114 | database_download_date='ND') 115 | logging.warning('Encountered error when attempting to run ConFindr on sample ' 116 | '{sample}. Skipping...'.format(sample=sample_name)) 117 | logging.warning('Error encountered was:\n{}'.format(traceback.format_exc())) 118 | if args.keep_files is False: 119 | shutil.rmtree(os.path.join(args.output_name, sample_name)) 120 | if args.keep_files is False and args.tmp is not None: 121 | if os.path.isdir(args.tmp): 122 | shutil.rmtree(args.tmp) 123 | logging.info('Contamination detection complete!') 124 | 125 | 126 | def main(): 127 | version = get_version() 128 | cpu_count = multiprocessing.cpu_count() 129 | parser = argparse.ArgumentParser() 130 | parser.add_argument('-i', '--input_directory', 131 | type=str, 132 | required=True, 133 | help='Folder that contains fastq files you want to check for contamination. ' 134 | 'Will find any file that contains .fq or .fastq in the filename.') 135 | parser.add_argument('-o', '--output_name', 136 | type=str, 137 | required=True, 138 | help='Base name for output/temporary directories.') 139 | parser.add_argument('-d', '--databases', 140 | type=str, 141 | default=os.environ.get('CONFINDR_DB', os.path.expanduser('~/.confindr_db')), 142 | help='Databases folder. To download these, you will need to get access to the rMLST databases. ' 143 | 'For complete instructions on how to do this, please see ' 144 | 'https://olc-bioinformatics.github.io/ConFindr/install/#downloading-confindr-databases') 145 | parser.add_argument('--rmlst', 146 | default=False, 147 | action='store_true', 148 | help='Activate to prefer using rMLST databases over core-gene derived databases. By default,' 149 | 'ConFindr will use core-gene derived databases where available.') 150 | parser.add_argument('-t', '--threads', 151 | type=int, 152 | default=cpu_count, 153 | help='Number of threads to run analysis with.') 154 | parser.add_argument('-tmp', '--tmp', 155 | type=str, 156 | help='If your ConFindr databases are in a location you don\'t have write access to, ' 157 | 'you can enter this option to specify a temporary directory to put genus-specific ' 158 | 'databases to.') 159 | parser.add_argument('-k', '--keep_files', 160 | default=False, 161 | action='store_true', 162 | help='By default, intermediate files are deleted. Activate this flag to keep intermediate ' 163 | 'files.') 164 | parser.add_argument('-q', '--quality_cutoff', 165 | type=int, 166 | default=20, 167 | help='Base quality needed to support a multiple allele call. Defaults to 20.') 168 | parser.add_argument('-b', '--base_cutoff', 169 | type=int, 170 | default=3, 171 | help='Number of bases necessary to support a multiple allele call, and automatically ' 172 | 'increments based upon gene-specific quality score, length and depth of coverage. ' 173 | 'Default is 3.') 174 | parser.add_argument('-bf', '--base_fraction_cutoff', 175 | type=float, 176 | default=0.05, 177 | help='Fraction of bases necessary to support a multiple allele call. Particularly useful when ' 178 | 'dealing with very high coverage samples. Default is 0.05.') 179 | parser.add_argument('-e', '--error_cutoff', 180 | type=float, 181 | default=1.0, 182 | help='Value to use for the calculated error cutoff when setting the base cutoff value. ' 183 | 'Default is 1.0%%.') 184 | parser.add_argument('-fid', '--forward_id', 185 | type=str, 186 | default='_R1', 187 | help='Identifier for forward reads.') 188 | parser.add_argument('-rid', '--reverse_id', 189 | type=str, 190 | default='_R2', 191 | help='Identifier for reverse reads.') 192 | parser.add_argument('-v', '--version', 193 | action='version', 194 | version=version) 195 | parser.add_argument('-dt', '--data_type', 196 | choices=['Illumina', 'Nanopore'], 197 | default='Illumina', 198 | help='Type of input data. Default is Illumina, but can be used for Nanopore too. No PacBio ' 199 | 'support (yet).') 200 | parser.add_argument('-Xmx', '--Xmx', 201 | type=str, 202 | help='Very occasionally, parts of the pipeline that use the BBMap suite will have their memory ' 203 | 'reservation fail and request not enough, or sometimes negative, memory. If this happens ' 204 | 'to you, you can use this flag to override automatic memory reservation and use an amount ' 205 | 'of memory requested by you. -Xmx 20g will specify 20 gigs of RAM, and -Xmx 800m ' 206 | 'will specify 800 megs.') 207 | parser.add_argument('-cgmlst', '--cgmlst', 208 | type=str, 209 | help='Path to a cgMLST database to use for contamination detection instead of using the default' 210 | ' rMLST database. Sequences in this file should have headers in format ' 211 | '>genename_allelenumber. To speed up ConFindr runs, clustering the cgMLST database with ' 212 | 'CD-HIT before running ConFindr is recommended. This is highly experimental, results ' 213 | 'should be interpreted with great care.') 214 | parser.add_argument('--fasta', 215 | default=False, 216 | action='store_true', 217 | help='If activated, will look for FASTA files instead of FASTQ for unpaired reads.') 218 | parser.add_argument('-verbosity', '--verbosity', 219 | choices=['debug', 'info', 'warning'], 220 | default='info', 221 | help='Amount of output you want printed to the screen. Defaults to info, which should be good ' 222 | 'for most users.') 223 | parser.add_argument('-m', '--min_matching_hashes', 224 | default=150, 225 | type=int, 226 | help='Minimum number of matching hashes in a MASH screen in order for a genus to be considered ' 227 | 'present in a sample. Default is 150') 228 | args = parser.parse_args() 229 | # Setup the logger. TODO: Different colors for different levels. 230 | if args.verbosity == 'info': 231 | logging.basicConfig(format='\033[92m \033[1m %(asctime)s \033[0m %(message)s ', 232 | level=logging.INFO, 233 | datefmt='%Y-%m-%d %H:%M:%S') 234 | elif args.verbosity == 'debug': 235 | logging.basicConfig(format='\033[92m \033[1m %(asctime)s \033[0m %(message)s ', 236 | level=logging.DEBUG, 237 | datefmt='%Y-%m-%d %H:%M:%S') 238 | elif args.verbosity == 'warning': 239 | logging.basicConfig(format='\033[92m \033[1m %(asctime)s \033[0m %(message)s ', 240 | level=logging.WARNING, 241 | datefmt='%Y-%m-%d %H:%M:%S') 242 | 243 | logging.info('Welcome to {version}! Beginning analysis of your samples...'.format(version=version)) 244 | confindr(args) 245 | 246 | 247 | if __name__ == '__main__': 248 | main() 249 | -------------------------------------------------------------------------------- /tests/real_fastqs/miseq_casava_R2.fastq: -------------------------------------------------------------------------------- 1 | @M05722:45:000000000-LHCCL:1:1101:22781:2332 2:N:0:9 2 | GGCGACGCCAGACGAAATATTGGCTAATAAAAAACTGAGCGCAATAAAGCTTTTGCCGTATTTAATTCGCCGCAACCCGGCGGCAAACAGCAGCGAAGCGATAAAGACAATCGGGATATACCACATCAGAAAGCGACAGATATTCCACATCGACGTCCAGAAAAGCGCATCATTGACCACGCCAAGCAGGTTTTGTAGCCCCGAAAAACGGGGCGTACCGATAAACTGCCATTGGGTCACGCTCAGCCCC 3 | + 4 | CCCCCGCFGDGCFGGGGGGGGDFC@@E9FFFEDC@FG9FFFGEFFFEGGGG9FEGGGG@@FGCFDDFGGGC+>FFGFGGDGGGFGGGGGGGF=CECFGGGDDGBA,AFGCFGDEC@GDGGFDC@@E7B99D8BF88CGGFEGGGGFGFGGGG6C8;9CEDFC,8C<=DD<7FGF:+@CGFFEABBA>A;30;6=8:C66)4<9>>(41:9011,3719?F:(3(6794,1)604((- 5 | @M05722:45:000000000-LHCCL:1:1101:17124:2405 2:N:0:9 6 | GAGAATTTCATCGGCAATCGGCGGCGGTGAGCAGAGCACCACCTTTTTACAGCCGGCAATACGTGCCGGGGTAGCCAGCATTAACACCGTTGAAAACAGCGGGGCTGAACCGCCAGGAATATAGAGACCCACGGACGCGATTGGGCGGGTAACCTGCTGGCAGCGCACGCCCGGCTGGGTTTCGATATCCACCGGCGGCAGAATCTGCGCGGTGTGGAAGGGATCAATATTGCCCACCGCCGCCTGCATGG 7 | + 8 | CCCCC9FGGGGGG@GGGGGFGGGGGGEFEFGFGFGGGGEFGGGG@,EEFFCFGGGDEGGGGGGGGGFCECCECEGGGFGGBFGGGGGGGFGGCFFGGGGFECFFGGGGGGGG7EGGGCDFGGGCGGFEEF>EGGGGEGDGE5@FGGGGGCEGGFFGCGGGEG=3=DGDG373DDD)97>:5)@:>FFEFF@FFFB>F31(7:(690(8?14>: 9 | @M05722:45:000000000-LHCCL:1:1101:9672:2430 2:N:0:9 10 | CCCTTTACGGTACCGGGCAACTGCCGAAATTTGCCGGCGACATGTTCCATACTCGTCCGCTGGAAGAAGAAGCTGACAGCAGCCACTATGCGCTGATCCCAACGGCGGAAGTGCCGCTGACTAACCTCGTTCTCGATGACATCATCGCCGAAGACGATCTGCCGATCAAACTGATCTCACACACGCCGTGCTTCCTCACTGAATCGGGTTCCTACGGTCGTGACACGCGTGGTCTGATCCGTATGCACAC 11 | + 12 | CC>,@,,7>E7@E:*@EEC8C?>EGEEC99?DDEC47*::?+@+;D69A)58>FFB5A5:83).+.:A2<)61491<624),8:?<942016.(-2440,>):46A4(38(1))-.. 13 | @M05722:45:000000000-LHCCL:1:1101:8972:2506 2:N:0:9 14 | GATCGTACGCGTACCGACGGAGTAGCCCGCTTCCATGGCGTCTAACGAACTCTGCGCAGAGACAACGGCCTGTTTGTAGGCGTTGATGCTGCTGATAGACGCGTTGACGTTGTTGAAGGAGGAACGCACGGTCTGCACTACGGAACGGTGCGCGCTTTCCAGCTGCTCGCTGGCGCCGACGAAGTTGTACTGCGCCTGTTTCACCTGCGAGTTCACCATCCCGCCCTGGTACAGCGGC 15 | + 16 | @8ACCGGGGGEGGGGFDCFFGGGGGG9FFGGGGGGGFEFFFCGGGFFF>>146>BDABB?0966>FFFFAF<7>B9 17 | @M05722:45:000000000-LHCCL:1:1101:6726:2518 2:N:0:9 18 | TCTTTGCTGCAGCCCGTTAATGAGTAGCCTTTAATCTGGACATTCTTGTAACCGCTATCGACAAGAACTTTTTTTGCTTCATTGGCATCGGTGCAACCAGATGCGGCAAATACGACCAATTCGAGTAACATCTTCACTTTCCCCCTCCCCCCCCCCTACTCCCCCCGCTCAATTCCCCCTTCCCCTTTTATGTCCTCTCCTCTTTCTATTTCTCCCCTTTTCTTCTTTATCTCCGCCCCCTCCCCATTCT 19 | + 20 | <@CCCCEEDF8EE,C+,,9,,9,94>DF,>==AD9D,+@@E6?CE>CE87*,,41,*@?*/)))*2++1*2)0))02)2./*+1+3+1).1((.(1/).))11.)1))/,(.)-65)-...)))...(,(-((,(-,((.)-- 21 | @M05722:45:000000000-LHCCL:1:1101:13069:3053 2:N:0:9 22 | GTACGTAATCATCTGCGGAGATGAATTCATCATAAAGCTTACGGGCAGGATTATGAATACGCGTATGCCAGTAGAAACGCGACCATTTCTGTTCTTTGGCTTCATTTAGCACGAAATGCATGAGTTGCCGGGCAATGCCCAGGCCACGAAATGACGGGTCCACGAACAGGTCTTCCAGATAACAGATGGGTTGGGTCACCCACGTGCCTTCGTGTAATACGCACAGGGCAAAACCGGTCACGCGCCCTT 23 | + 24 | CCCCCGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGG?@FGGGGGGGGGGGGGGGGGGDGGGDGGGGGGFFGGGGDFGFFFFFFFFFFFFFBFFFFFEEFDFFFFDD4C<@@FFDEEFFFFFFFF@@@F16637>>FFF??<11>F02 25 | @M05722:45:000000000-LHCCL:1:1101:8021:3120 2:N:0:9 26 | GTTGTGACGCGGGAAGGAAGCCACTTGGCGAACCGCACCGTTTGCCAGCGTCTGGATAACCAGCGCCGAACGGCCGCTTTCGAAGGTGACGTAAGCCAGTTTCGAGCCGTCCGGAGACCACGCCGGAGACATCAGCGGCTGCGGAGAACGGTGAACGACAAACTGGTTGTAACCATCGTAGTCAGAAACGCGCAGTTCGTACGGGAACTGACCGCCGTTGGTCTGCACGACGTAGGCAATACGGGTACGGA 27 | + 28 | CAB@CF:CEFFGDCFEGGGFGGGGGGGG9FF@CFDBFC@CC:FBFGF=FC:4BC=FGGGGGFFGGFGG<5EFCFEFCCG7F=FG@:FFC:EGGGGGGDFC,DFCGGGGDG>CC5EGCFEGGGGGGCEGGGGG7?DGCCDGGGGG4CDGFFFFFFFFFF3@DDDF?AFF6??06>:>B07A>?>FFFFB0:?::3( 29 | @M05722:45:000000000-LHCCL:1:1101:7487:3197 2:N:0:9 30 | GTTACTACCAGTTTAAACAGCGTCTGTAGACTATGATTGAGTAAACTTTGTCTCGTGGGGTTATCGCGATATGCGCCTTGTGCAGCTTTCTCGTCATTCGATTGCTTTTCCCTCACCTGAGGGCGCATTACGTGAACCAAACGGACTGCTGGCGTTAGGCGGCGATCTCGGCCCGGCGCGCCTGTTGATGGCCTATCAACGCGTGATCTTCCCGTGGTTTTCACCCGGCGACCCGATCCTCTGGTGGTCG 31 | + 32 | CCCCCGGGFGGFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDDGGGGGGGGEGGEFGGGGGGGGGGGGGGGFGGGGGGGFFGGGGGGGGGFGGGGGFCGFGGGGGGGGGGGD@FGGGGGGGGGGGGE8ECEGFGGEGGGDGGGGGGGGGGGGGGGCC*3;DGGFFFFFFFFF+;AFFF@3=C7@FF@(4:1A;FFFF>;6;F9BBF:AFF?)1 33 | @M05722:45:000000000-LHCCL:1:1101:7839:3227 2:N:0:9 34 | CCTTTCTGTACAGCAACGTTTTCTGCTGGCCGTACCAGCCGTACGAGGAAAATTATGAGACGCCTTCCCGTGTTTTTTGTCCTGGACTGTTCAGAGTCCATGATTGGTGAAAACCTGAAAAAAATGACTGATGGTCTGCAAATGATCGTCGGAGATTTAAGAAAGGATCCACACGCACTTGAAACAGCCTGGGTCTCGGTAATCGCATTTGCCGGTGTAGCCCGTACGATTGTACCTCTTCACGAAATTG 35 | + 36 | CCCCCCGFGFFGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGDGGGGGGGGGFGGGEGGGGGFGGEFFGGGGGGGGGFGGGGGGGGGGGDGFGEGGGGGFGEEFGGFDFCFGGGFGGGGFGGGGGGGGGGGCGGGDFGGGGGGGGGGFFFGGDCDGGGFFFFFBFFA59ACFFDCCFBADAECB@D2@?E4BFB=@F=EF3:>B>?B0(6;4?F75 37 | @M05722:45:000000000-LHCCL:1:1101:21825:3431 2:N:0:9 38 | TGTTTCATCATGCCTAAATGCTGGGTCACGTAAATATACGAGATCCCCTGTTTTTCCTGCAGTTCGAGCATCAGGTTAACCAGCTGCGAGCGCATCGACATATCCAGCGAGGCCAGCGCTTCATCACAGATGATCACTTTAGGACGTAGGATTAGCGCGCGCGCCAGCCCCATACGCTGTTTCTGGCCCGGCGCCAGCATATGCGGATAGT 39 | + 40 | 8A88,CEFFEDF9F9FFFA;DFDA3>9>FD<47566:11<(9 41 | @M05722:45:000000000-LHCCL:1:1101:15030:3498 2:N:0:9 42 | CATCAGCACCGCGCAGTTAAGCTCCTGGCGAAGCTGATTAATCAGATCGTATAGCGCCACCTGGCCGTTGACGTCAACGCCCTGGGTGGGTTCATCCAGCACCAGCAGCTGAGGCTTGTTTAACAGCGCACGCGCCAGCAGCACGCGCTGGGTTTCACCACCGGAGAGCTTCTGCATCGGTGCATCCTGCAGGTGCCCGGCCTGTACGCGCATCAGCGCGGGCAGAATATCTTCTTTGCGCGTTGCCGGGG 43 | + 44 | CCCCCGGGGGGGGGGGGGGGGD>EEGGGGGGGGGGD5EEGFEGG0;(24??BFFF;BFF(5<22)).-)<)4)3>>BBF59@FFGEC*>;;:EEGGEFEEGGFGDECF:+3+0**).08@FF))9>>)84?9)*54((7(-2(444((,3(-,(,(((462(((--40:4:666,81<0(311(( 49 | @M05722:45:000000000-LHCCL:1:1101:14547:3883 2:N:0:9 50 | ACGCTAAACAGCCGCTGCTGATTCGTACGCGCCGATTACTGGGGCTGTGGTGCTTTGCGTGGGCGACGATCCACCTGACCAGCTACAGTTTACTGGAGCTCGGCATCCGCAATGTGTCGCTATTAGGGCAGGAGATTGTTACCCGGCCGTATTTGTTGCTGGGTTTCGCCTGCTGGTTAATCCTGCTCGCGCTGGCGGTGACCTCTAC 51 | + 52 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGDGGGGGGGGGGGGGGGGFFFFFFFFFFAFFFAFFF>@FFFEFFFB< 53 | @M05722:45:000000000-LHCCL:1:1101:25476:3918 2:N:0:9 54 | TTTGAGAACAACCTGCGCATTATGGTCGTGGGCGACTTCGATGCCGATGGCGCCACCAGCACCGCGCTGAGCGTGCTGTCGCTGCGCGCCATGGGCTGTCGCGCCGTCGAGTATCTGGTGCCGAACCGTTTTGAAGACGGCTACGGTCTGAGCCCGGCAGTGGTCGATCAGGCCCACGCCCGCGGCCCGCAAATGATCATGACGGTCGATAACGGGATCTCTTCCCATCCGGGGGTCGATCTTGCCCATGA 55 | + 56 | @CCCCGGGGGDGGGG8FEGGG@FFGFFEG@CFFCEFGGGG?<C7CFGGE9=FDFADE+BC8B>CEGE5DC;EC*@C>F54D:?C79D,;8:*474=:+FGCDDG3))2)>>)<).)7)7)9<25=59A<>4>8<>B6(4)-4(((48>A>6A:<6(((2(-39?0(-(()-),.4(4- 57 | @M05722:45:000000000-LHCCL:1:1101:14095:3991 2:N:0:9 58 | CTACACCACCGATCCGCGCGTGGTGCCTGCGGCGAAACGTATTGATGAAATTGCCTTTGAAGAGGCCGCCGAGCTGGCGACCTTCGGCGCGAAGGTTCTGCATCCGGCCACGTTGCTGCCAGCCGTACGCAGCGACATTCCTGTGTTTGTCGGCTCCAGTAAAGATCCCAAAGCGGGCGGCACGCTGGTGTGCAACGAAACCGCCAATCCGCCGCTGTTCCGCGCGCTGGCGCTGCGCCGTAAGCAAACG 59 | + 60 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGFGGFGGGGGGGGGGGGDGGGGGGGGDGGGGGGGGGGGGGGFFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCGGGGGGGCGGGGGGGGFGCGGFGGGGGCFCGGFGGFGGGGGFFFFFFFEDFF>BFFADB;B?9BF6BBFFFFF1?F>FFFFFBFFFF:1?BBFFBBF3??FFBBBFF 61 | @M05722:45:000000000-LHCCL:1:1101:19331:4173 2:N:0:9 62 | ACGCTGAAAGATGCCTGTAACGAAGCGCTGCGCGACTGGTCTGGCAGCTATGAAACCGCGCACTATATGCTCGGCACCGCAGCAGGCCCGCACCCGTTCCCAACCATCGTGCGTGAATTCCAGCGCATGATTGGTGAAGAGACCAAAGTGCAAATCCGTGAAAAAGAAGGTCGTCTGCCGGATGCGGTTATTGCCTGCGTCGGCGGCGGTTCTAACGCCATCGGTATGTTCGCC 63 | + 64 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGEGGGGGGGGGGGGGGGGGGFDEGDFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEFGGGGGEGGGGGGGGGGGGGGGGFFFFDFFFFFFDFFFFFFFFFFFFBB>FBFFFFFDFFFFFF>FFFFFB?FFFFFFB 65 | @M05722:45:000000000-LHCCL:1:1101:23071:4268 2:N:0:9 66 | AACGTTGGCTATTCCGCGCAGATCCGCAGCGATGCCACCGCCTGGCGTATGTCGAACAGTAAAGCCAACCCGGTCGTTAATATTTACAACAACCATGATGTGAGCATGCCAGCGTACGCATCGGTGGGCGGTAACTATCACGATCCGCTGGTGACTGCGCGTAACCGTACTCAGGGCTGGCTGTTCAGCGATACCGTCGGATTCTTTGACGATACGCTGCTATTTACCGACGCGGCGCGCAAGCAGAAGGG 67 | + 68 | >7BC86BEGGGGACCFGGFGDCF@EC@CGGGGGGGGEED=CE4*CCFGGFC*CDFB45AF+;6@>3:A:;<(>690:1<<49:A)41:44?F0:>FA>)6)671(4141291(,-3-((47DG3C>@FF?FFBF:<757.,9>EF;(11)6AA>4< 73 | @M05722:45:000000000-LHCCL:1:1101:11189:4446 2:N:0:9 74 | CAACAAGGAGAGGTGAAATGAGCGTGATGACCTTAATTAATAGCGCGGTGGCGTGGTTTGCTTTTGCCGCGGTATTCGCCTTTTTACTGTCGCTGAAAAAATCGCTGAGCGGTGTGATTGCCGGTATCGGTGGCGCACTGGGTAGCCTGTGCGCGCTGCTGGCTGGCGTGCAGGTGCTGGTTCGTGGCTTCCCGGCGATGGGCAGCCTACGGCTGGTGCACTACAGCGTACTGGTCACCCCGCTGAATGCG 75 | + 76 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDCFGGGGGGGFFFFFFFFFFFFFFFFFFFFF:BFFFFFFF:?B>>FFBFFFFFFF?F7>081:A:A<9:>FFBBF>9?FAA1 77 | @M05722:45:000000000-LHCCL:1:1101:13328:4518 2:N:0:9 78 | GTCCCAGCAACCGCTGCAACCGTGGCTGCTGAACACGCGCCAGGGGGTGTAAAACTCGGCGTTCGGCAGGCTGAAGCCGCTAATCAGCACCACCGGTATTTTTGCCGCCCAGGCAAGCCACGACAGCCCGCTGGCCAGGCCGATAAAGAAGCTGGCGTGGCGAAGCAGATCAACCCGCGCCTGCAGCGGCAGGGCCCCGGTGCAGTCTTCTGCGCCGTAGGGGATGTGGTTCCAGACGAAGCCGTGCC 79 | + 80 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGFFGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGGGGGGDGGGGGGGGGGGGGFGGGGGGGGGGGGCEGDGGGGGGGFGGFGGGEGDDCEFGGGGGFGGGGGGGGGGGGFG4DGDGFGGGGFFFFFFFFFEEFFB@?:9DBFFF(:BFF>AFBB>6:?FF>BB?FFBBFFFFFBBFF:019:>F: 81 | @M05722:45:000000000-LHCCL:1:1101:24115:4677 2:N:0:9 82 | GCGCGCGGCGGGTATCGGGCAGGTGGTGTATCACCGCAGCCGTGATGCGCAGGCGGCTGGCGTGGCGTGGAGTGAGGCGGATATCAGCGCCATCAAACGCCTGGCGGATATGGGCTTTAAAGTCACCGTCACCGGTGGACTGGCGCTGGAAGATTTACCGCTGTTTAAAGG 83 | + 84 | CCCCCGEEEGGGGGGGGGGGDCGGFFGGFGGGGFGE@CEFGGGDGFGGGG7C=FGDGGGDGF:CEGGBFG:FFCFFCDEDGGGA=FFFGGEFEGCC 85 | @M05722:45:000000000-LHCCL:1:1101:11908:4786 2:N:0:9 86 | CGCCGGGACTCTGCGCCCGACACGCCATAGTAGTAGCGGTTAAAGCGGTCATCGTAATAATAGACACCCGCCGCGGGAATCACCGACAGCTTGCCCACCGGCAATACGCGGAACCAGGACAGCTCGCCAACCCAGCCATCGCTGTTGTCCAGAACATCCGCCGCCGCCGACAGCTTCAGGCTGCCCCACTTTTCATGGTGATACCATGCCATGCCCGCCATGGCGGTACTGTGACGCTTATCCAGCTGCTT 87 | + 88 | CEGEGGC@CEGFFDECEC:ECGGFEFCECE8CGG>CC>>EFF>FGDGGGGG@FGGFC78C6FCGGGGEDDDG<<3CD:CFGGFF45>F:5>>DDFFFFFAF)0942677A0>21:B96<2<612;1:AFF)6>B<>34.4:2 89 | @M05722:45:000000000-LHCCL:1:1101:14685:4816 2:N:0:9 90 | CATGAACGATTATCTGCCTGGCGAAACCGCCATCTGGCAGCGCATTGAAGGCACACTCAAGCAGGTGCTCGGCAGCTACGGTTACAGCGAAATCCGTTTGCCGATTGTAGAGCAGACCCCGTTATTCAAGCGCGCTATCGGTGAAGTCACCGACGTGGTTGAAAAAGAGATGTATACCTTTGAAGACCGCAACGGCGATAGCCTGACGCTGCGTCCGGAAGGTACTGCGGGCTGCGTACGCGCCGGCATCG 91 | + 92 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDFFGGGGGGGGGGGGGFFFFFFFFFDEBFFFFFFFFFFFFEFFFFFFFBBFF>9>?>FFFFBFFBFB9B>FFFF:;FF9BFFFF9B>2 93 | @M05722:45:000000000-LHCCL:1:1101:9465:4932 2:N:0:9 94 | GTGATAGAAACCAACTGCCTGATTGATGCGCTGGGCACGCATCCGCTGGCCTTTGGCAAACTGCCGCCGCTGATGAACGGCCTGACCCAGCAGGTGAAAGATTTTGAGCGTCTGACCATTGATGCCGCCGTATATGGCGATAAGCAAAAAGCGCTGCTGGCGCTGGTCGCCAACCCGCTGGTCGCCGA 95 | + 96 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGCGGGGGGFGGGGGGGGGGGGFGGFGGGGGGGDGGGGFGFGGFGGGGGGFGGGGGGGGGGDGGGGG>9DGGCFGGGDG4CDG4CEFFFF>BFF 97 | @M05722:45:000000000-LHCCL:1:1101:5895:5333 2:N:0:9 98 | ATCCAGCGGCGTGGTTTATCAAACGGATAGTACAGCGCGGCGAACATCTGATGCTGGCCAACGTCGGAGGTCACATAAGCATCGCCGTGGGTCAGCCTCCAGATGGCCTCAATGACCGCCTGCGGCTTTATCTGTTCACTTTTTTCGTCATACTTCAGGCA 99 | + 100 | 8CC,AFGEE7FFGGGDE,,,D,AF9=DD@C+=7,8>EEG,@DC;F>@;B;5@>E@8EGGEFC,;DFGCCCFG 101 | @M05722:45:000000000-LHCCL:1:1101:6264:5384 2:N:0:9 102 | GTCCTGATTCGCGCTCAGATCGCCGGTGTTGCCGTTCAGTTCAAAGCCCGGTACACCGCGCATCTGAGAGAAATGATTCGCCAGCGCCCAGGCATCGACGCCCATTGCGTACAGGCGCGCCAGCGAGTAGTCGTTGTTGACCGCGCGTAGCGCCTGCTGCATCAGCGCCGGATTGCTGCCGGCCAGCATCGGGATTTCACTGTACTGCAAACCTTCCATTTCCAGACGGAAGTCCGGGCCCGCAGTGCC 103 | + 104 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGAAGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGDGGGGDEFFFGGGCFGGGGGGGGGGGGFEGGGGGGGGGGGCE5=FDDGGFGGGGGGGGFGGGGGGDGGGGFGGFFDFFFFFFFFFEDEB7?FAB2>AFFFFFFFF?B?FFFFFFFAFBFF:BB?:F:>DBB;(39BBB>F(49 105 | @M05722:45:000000000-LHCCL:1:1101:26553:5391 2:N:0:9 106 | GGCTGGTGCTCAATTATGACCTCTATGCCTCACAGGAAAAAAGTACGCAAAATAGCGCCAGCGCCTACAGCGAATTGCGCGCGTTTAGCGGCATTGGCGTGCTCAGCTCCACGCAAATAGCGCGCTATAC 107 | + 108 | CCCCCGGEFFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGDFDEGGGGGGGGGGDFGGGGGFGCEGGGGGGGGEGGGGGGGGGGGDFDGCEGDBDFGGFEEFGGGGGGGGGGGGGGGGGG; 109 | @M05722:45:000000000-LHCCL:1:1101:5389:5526 2:N:0:9 110 | GTGGACAACCTGACCAACGCCGGTAACATTGTGTTTGTGCCGTCCCAGTGCACCTTCACGCCTCACACGCTTACCGCAACCAACCTTATTGGTAACGGCTCCACCATCACGCTCAATACCGTTCCGGTCTACAGCAGTTCACCGTTAGATAAAG 111 | + 112 | -A,6,C+,4+++++,3,,3,,733,3,+,6+3,6,,,3 113 | @M05722:45:000000000-LHCCL:1:1101:22262:5649 2:N:0:9 114 | CTCGCCAGATGCGCGGCGCAGCGGTGAACCGCCGCTTTTGGCAGCCCGGTCGAGCCGGAGGTCAGCGTCATGGAGGCCAGCCGTTCAGCCTGCCAGCCGACATCGTGGTACTGGCGCGCGCTACGCAGCCGCAGCACAGGCAAATCGTAGTGCGCCTCGCCGTCGAGATTAAGCACATGGCTGACGGTCAGCCCCGGCAGCAGGTCACTGACCAGAGCCGCGGGCAGACGCGGGTTGCGCGGTAAACTGCG 115 | + 116 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGDGEGDGGGG+@FGGFGGGGGCECF7FEEGGCFFFFGGGGCGGFCGGGFGCFEFFGFCFGFGGGGGGGDGGGDGGGFFGGGGCGGGGEFGEGGGGGE@DGDEGGGGGGGGGF*FFGGGGCFF@>>)83>>>44;B0>6DF>B?((48>:A)8?BF6?6((,(49>09?BFF;;>04(4(,179676<)6DEFFFFFFFBE69?F?(:D?<:?>?FFB>:67:7:?F>B>>B20,119>FF>B;>21:)622(-7(( 121 | @M05722:45:000000000-LHCCL:1:1101:14226:5786 2:N:0:9 122 | CCCTGCCAGCCGTCAACAACCTGAATACCGAACTGCATGTCACCTTCTTCTTTCACCAGGTTGAGCGTCCCTTTACCGGTTAACTGAATCAGCGACGATTTGCCCTGCAGGTTATTCAGCGTCAGCACGCCGTTATCGAGATCGATGTCGGTGCTCATTTCATCCAGACGCGTAGCGTTGTCATCGTTCTCCTGCGCGCGCACGTCGGTGCTGCGCTCAACGGCCTGCTGCACCAGCTTCTGGAAGTTCAT 123 | + 124 | CCCCCGGGFGGFGG7FDFAFGFGGCFGGFCEGGGGGFFFGGGGFGGGEEDEGFGCFG9FCFACF,CCFGGGGFGEGGGDGGDCGFG9:CC8+>EGCG68CGGGGF?+8DE?>B:FG8+@=CEFDFEGGFGGGFD89DG>D77?C>C*@FAFFFFFFFFBFAE@@)656(31:@0(609<<)5(54<)) 125 | @M05722:45:000000000-LHCCL:1:1101:5108:6228 2:N:0:9 126 | GTGCCTGCTCGTCAACGTATCGGTTCGCGACGTCGGCCAGCGCCATTATTTCACGAATCGCTTTACCGAATTCACGGCTGTCCCACGCTTCGCCAATGCTGGTTGCCGCGTCGGTGTAGGTTTTGTACATCTCTGGGTCAGCCAGTTCAGCGGACAGCACGCCGTCGAAACGCTTAGCGATAAAGCCAGCGTTACGGGAAGCCCGGTTCACCACTGTGTTCACGATATCGGCGTTGACGCGCTGGATGAAA 127 | + 128 | <@,BCDFFCDFFF,FGDC@,C;+FC,@FCEGGGGGDEGGGD>FFDG9FCCG<+8CEFGG@FFGG9,7>9EC:>>FFFGD8FGCAF7E5E>EEC,CGCC5)//*/*1><35/C*+)).0:+1+8A=FA)0)4((.4,,(4((-1,(642<13)65-,))6(,(-(:(909>1(4<2:(29((...2)6 129 | -------------------------------------------------------------------------------- /tests/real_fastqs/miseq_casava_multilane_R2.fastq: -------------------------------------------------------------------------------- 1 | @M05722:45:000000000-LHCCL:1:1101:22781:2332 2:N:0:9 2 | GGCGACGCCAGACGAAATATTGGCTAATAAAAAACTGAGCGCAATAAAGCTTTTGCCGTATTTAATTCGCCGCAACCCGGCGGCAAACAGCAGCGAAGCGATAAAGACAATCGGGATATACCACATCAGAAAGCGACAGATATTCCACATCGACGTCCAGAAAAGCGCATCATTGACCACGCCAAGCAGGTTTTGTAGCCCCGAAAAACGGGGCGTACCGATAAACTGCCATTGGGTCACGCTCAGCCCC 3 | + 4 | CCCCCGCFGDGCFGGGGGGGGDFC@@E9FFFEDC@FG9FFFGEFFFEGGGG9FEGGGG@@FGCFDDFGGGC+>FFGFGGDGGGFGGGGGGGF=CECFGGGDDGBA,AFGCFGDEC@GDGGFDC@@E7B99D8BF88CGGFEGGGGFGFGGGG6C8;9CEDFC,8C<=DD<7FGF:+@CGFFEABBA>A;30;6=8:C66)4<9>>(41:9011,3719?F:(3(6794,1)604((- 5 | @M05722:45:000000000-LHCCL:1:1101:17124:2405 2:N:0:9 6 | GAGAATTTCATCGGCAATCGGCGGCGGTGAGCAGAGCACCACCTTTTTACAGCCGGCAATACGTGCCGGGGTAGCCAGCATTAACACCGTTGAAAACAGCGGGGCTGAACCGCCAGGAATATAGAGACCCACGGACGCGATTGGGCGGGTAACCTGCTGGCAGCGCACGCCCGGCTGGGTTTCGATATCCACCGGCGGCAGAATCTGCGCGGTGTGGAAGGGATCAATATTGCCCACCGCCGCCTGCATGG 7 | + 8 | CCCCC9FGGGGGG@GGGGGFGGGGGGEFEFGFGFGGGGEFGGGG@,EEFFCFGGGDEGGGGGGGGGFCECCECEGGGFGGBFGGGGGGGFGGCFFGGGGFECFFGGGGGGGG7EGGGCDFGGGCGGFEEF>EGGGGEGDGE5@FGGGGGCEGGFFGCGGGEG=3=DGDG373DDD)97>:5)@:>FFEFF@FFFB>F31(7:(690(8?14>: 9 | @M05722:45:000000000-LHCCL:1:1101:9672:2430 2:N:0:9 10 | CCCTTTACGGTACCGGGCAACTGCCGAAATTTGCCGGCGACATGTTCCATACTCGTCCGCTGGAAGAAGAAGCTGACAGCAGCCACTATGCGCTGATCCCAACGGCGGAAGTGCCGCTGACTAACCTCGTTCTCGATGACATCATCGCCGAAGACGATCTGCCGATCAAACTGATCTCACACACGCCGTGCTTCCTCACTGAATCGGGTTCCTACGGTCGTGACACGCGTGGTCTGATCCGTATGCACAC 11 | + 12 | CC>,@,,7>E7@E:*@EEC8C?>EGEEC99?DDEC47*::?+@+;D69A)58>FFB5A5:83).+.:A2<)61491<624),8:?<942016.(-2440,>):46A4(38(1))-.. 13 | @M05722:45:000000000-LHCCL:1:1101:8972:2506 2:N:0:9 14 | GATCGTACGCGTACCGACGGAGTAGCCCGCTTCCATGGCGTCTAACGAACTCTGCGCAGAGACAACGGCCTGTTTGTAGGCGTTGATGCTGCTGATAGACGCGTTGACGTTGTTGAAGGAGGAACGCACGGTCTGCACTACGGAACGGTGCGCGCTTTCCAGCTGCTCGCTGGCGCCGACGAAGTTGTACTGCGCCTGTTTCACCTGCGAGTTCACCATCCCGCCCTGGTACAGCGGC 15 | + 16 | @8ACCGGGGGEGGGGFDCFFGGGGGG9FFGGGGGGGFEFFFCGGGFFF>>146>BDABB?0966>FFFFAF<7>B9 17 | @M05722:45:000000000-LHCCL:1:1101:6726:2518 2:N:0:9 18 | TCTTTGCTGCAGCCCGTTAATGAGTAGCCTTTAATCTGGACATTCTTGTAACCGCTATCGACAAGAACTTTTTTTGCTTCATTGGCATCGGTGCAACCAGATGCGGCAAATACGACCAATTCGAGTAACATCTTCACTTTCCCCCTCCCCCCCCCCTACTCCCCCCGCTCAATTCCCCCTTCCCCTTTTATGTCCTCTCCTCTTTCTATTTCTCCCCTTTTCTTCTTTATCTCCGCCCCCTCCCCATTCT 19 | + 20 | <@CCCCEEDF8EE,C+,,9,,9,94>DF,>==AD9D,+@@E6?CE>CE87*,,41,*@?*/)))*2++1*2)0))02)2./*+1+3+1).1((.(1/).))11.)1))/,(.)-65)-...)))...(,(-((,(-,((.)-- 21 | @M05722:45:000000000-LHCCL:1:1101:13069:3053 2:N:0:9 22 | GTACGTAATCATCTGCGGAGATGAATTCATCATAAAGCTTACGGGCAGGATTATGAATACGCGTATGCCAGTAGAAACGCGACCATTTCTGTTCTTTGGCTTCATTTAGCACGAAATGCATGAGTTGCCGGGCAATGCCCAGGCCACGAAATGACGGGTCCACGAACAGGTCTTCCAGATAACAGATGGGTTGGGTCACCCACGTGCCTTCGTGTAATACGCACAGGGCAAAACCGGTCACGCGCCCTT 23 | + 24 | CCCCCGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGG?@FGGGGGGGGGGGGGGGGGGDGGGDGGGGGGFFGGGGDFGFFFFFFFFFFFFFBFFFFFEEFDFFFFDD4C<@@FFDEEFFFFFFFF@@@F16637>>FFF??<11>F02 25 | @M05722:45:000000000-LHCCL:1:1101:8021:3120 2:N:0:9 26 | GTTGTGACGCGGGAAGGAAGCCACTTGGCGAACCGCACCGTTTGCCAGCGTCTGGATAACCAGCGCCGAACGGCCGCTTTCGAAGGTGACGTAAGCCAGTTTCGAGCCGTCCGGAGACCACGCCGGAGACATCAGCGGCTGCGGAGAACGGTGAACGACAAACTGGTTGTAACCATCGTAGTCAGAAACGCGCAGTTCGTACGGGAACTGACCGCCGTTGGTCTGCACGACGTAGGCAATACGGGTACGGA 27 | + 28 | CAB@CF:CEFFGDCFEGGGFGGGGGGGG9FF@CFDBFC@CC:FBFGF=FC:4BC=FGGGGGFFGGFGG<5EFCFEFCCG7F=FG@:FFC:EGGGGGGDFC,DFCGGGGDG>CC5EGCFEGGGGGGCEGGGGG7?DGCCDGGGGG4CDGFFFFFFFFFF3@DDDF?AFF6??06>:>B07A>?>FFFFB0:?::3( 29 | @M05722:45:000000000-LHCCL:1:1101:7487:3197 2:N:0:9 30 | GTTACTACCAGTTTAAACAGCGTCTGTAGACTATGATTGAGTAAACTTTGTCTCGTGGGGTTATCGCGATATGCGCCTTGTGCAGCTTTCTCGTCATTCGATTGCTTTTCCCTCACCTGAGGGCGCATTACGTGAACCAAACGGACTGCTGGCGTTAGGCGGCGATCTCGGCCCGGCGCGCCTGTTGATGGCCTATCAACGCGTGATCTTCCCGTGGTTTTCACCCGGCGACCCGATCCTCTGGTGGTCG 31 | + 32 | CCCCCGGGFGGFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDDGGGGGGGGEGGEFGGGGGGGGGGGGGGGFGGGGGGGFFGGGGGGGGGFGGGGGFCGFGGGGGGGGGGGD@FGGGGGGGGGGGGE8ECEGFGGEGGGDGGGGGGGGGGGGGGGCC*3;DGGFFFFFFFFF+;AFFF@3=C7@FF@(4:1A;FFFF>;6;F9BBF:AFF?)1 33 | @M05722:45:000000000-LHCCL:1:1101:7839:3227 2:N:0:9 34 | CCTTTCTGTACAGCAACGTTTTCTGCTGGCCGTACCAGCCGTACGAGGAAAATTATGAGACGCCTTCCCGTGTTTTTTGTCCTGGACTGTTCAGAGTCCATGATTGGTGAAAACCTGAAAAAAATGACTGATGGTCTGCAAATGATCGTCGGAGATTTAAGAAAGGATCCACACGCACTTGAAACAGCCTGGGTCTCGGTAATCGCATTTGCCGGTGTAGCCCGTACGATTGTACCTCTTCACGAAATTG 35 | + 36 | CCCCCCGFGFFGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGDGGGGGGGGGFGGGEGGGGGFGGEFFGGGGGGGGGFGGGGGGGGGGGDGFGEGGGGGFGEEFGGFDFCFGGGFGGGGFGGGGGGGGGGGCGGGDFGGGGGGGGGGFFFGGDCDGGGFFFFFBFFA59ACFFDCCFBADAECB@D2@?E4BFB=@F=EF3:>B>?B0(6;4?F75 37 | @M05722:45:000000000-LHCCL:1:1101:21825:3431 2:N:0:9 38 | TGTTTCATCATGCCTAAATGCTGGGTCACGTAAATATACGAGATCCCCTGTTTTTCCTGCAGTTCGAGCATCAGGTTAACCAGCTGCGAGCGCATCGACATATCCAGCGAGGCCAGCGCTTCATCACAGATGATCACTTTAGGACGTAGGATTAGCGCGCGCGCCAGCCCCATACGCTGTTTCTGGCCCGGCGCCAGCATATGCGGATAGT 39 | + 40 | 8A88,CEFFEDF9F9FFFA;DFDA3>9>FD<47566:11<(9 41 | @M05722:45:000000000-LHCCL:1:1101:15030:3498 2:N:0:9 42 | CATCAGCACCGCGCAGTTAAGCTCCTGGCGAAGCTGATTAATCAGATCGTATAGCGCCACCTGGCCGTTGACGTCAACGCCCTGGGTGGGTTCATCCAGCACCAGCAGCTGAGGCTTGTTTAACAGCGCACGCGCCAGCAGCACGCGCTGGGTTTCACCACCGGAGAGCTTCTGCATCGGTGCATCCTGCAGGTGCCCGGCCTGTACGCGCATCAGCGCGGGCAGAATATCTTCTTTGCGCGTTGCCGGGG 43 | + 44 | CCCCCGGGGGGGGGGGGGGGGD>EEGGGGGGGGGGD5EEGFEGG0;(24??BFFF;BFF(5<22)).-)<)4)3>>BBF59@FFGEC*>;;:EEGGEFEEGGFGDECF:+3+0**).08@FF))9>>)84?9)*54((7(-2(444((,3(-,(,(((462(((--40:4:666,81<0(311(( 49 | @M05722:45:000000000-LHCCL:1:1101:14547:3883 2:N:0:9 50 | ACGCTAAACAGCCGCTGCTGATTCGTACGCGCCGATTACTGGGGCTGTGGTGCTTTGCGTGGGCGACGATCCACCTGACCAGCTACAGTTTACTGGAGCTCGGCATCCGCAATGTGTCGCTATTAGGGCAGGAGATTGTTACCCGGCCGTATTTGTTGCTGGGTTTCGCCTGCTGGTTAATCCTGCTCGCGCTGGCGGTGACCTCTAC 51 | + 52 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGDGGGGGGGGGGGGGGGGFFFFFFFFFFAFFFAFFF>@FFFEFFFB< 53 | @M05722:45:000000000-LHCCL:1:1101:25476:3918 2:N:0:9 54 | TTTGAGAACAACCTGCGCATTATGGTCGTGGGCGACTTCGATGCCGATGGCGCCACCAGCACCGCGCTGAGCGTGCTGTCGCTGCGCGCCATGGGCTGTCGCGCCGTCGAGTATCTGGTGCCGAACCGTTTTGAAGACGGCTACGGTCTGAGCCCGGCAGTGGTCGATCAGGCCCACGCCCGCGGCCCGCAAATGATCATGACGGTCGATAACGGGATCTCTTCCCATCCGGGGGTCGATCTTGCCCATGA 55 | + 56 | @CCCCGGGGGDGGGG8FEGGG@FFGFFEG@CFFCEFGGGG?<C7CFGGE9=FDFADE+BC8B>CEGE5DC;EC*@C>F54D:?C79D,;8:*474=:+FGCDDG3))2)>>)<).)7)7)9<25=59A<>4>8<>B6(4)-4(((48>A>6A:<6(((2(-39?0(-(()-),.4(4- 57 | @M05722:45:000000000-LHCCL:1:1101:14095:3991 2:N:0:9 58 | CTACACCACCGATCCGCGCGTGGTGCCTGCGGCGAAACGTATTGATGAAATTGCCTTTGAAGAGGCCGCCGAGCTGGCGACCTTCGGCGCGAAGGTTCTGCATCCGGCCACGTTGCTGCCAGCCGTACGCAGCGACATTCCTGTGTTTGTCGGCTCCAGTAAAGATCCCAAAGCGGGCGGCACGCTGGTGTGCAACGAAACCGCCAATCCGCCGCTGTTCCGCGCGCTGGCGCTGCGCCGTAAGCAAACG 59 | + 60 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGFGGFGGGGGGGGGGGGDGGGGGGGGDGGGGGGGGGGGGGGFFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCGGGGGGGCGGGGGGGGFGCGGFGGGGGCFCGGFGGFGGGGGFFFFFFFEDFF>BFFADB;B?9BF6BBFFFFF1?F>FFFFFBFFFF:1?BBFFBBF3??FFBBBFF 61 | @M05722:45:000000000-LHCCL:1:1101:19331:4173 2:N:0:9 62 | ACGCTGAAAGATGCCTGTAACGAAGCGCTGCGCGACTGGTCTGGCAGCTATGAAACCGCGCACTATATGCTCGGCACCGCAGCAGGCCCGCACCCGTTCCCAACCATCGTGCGTGAATTCCAGCGCATGATTGGTGAAGAGACCAAAGTGCAAATCCGTGAAAAAGAAGGTCGTCTGCCGGATGCGGTTATTGCCTGCGTCGGCGGCGGTTCTAACGCCATCGGTATGTTCGCC 63 | + 64 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGEGGGGGGGGGGGGGGGGGGFDEGDFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEFGGGGGEGGGGGGGGGGGGGGGGFFFFDFFFFFFDFFFFFFFFFFFFBB>FBFFFFFDFFFFFF>FFFFFB?FFFFFFB 65 | @M05722:45:000000000-LHCCL:2:1101:23071:4268 2:N:0:9 66 | AACGTTGGCTATTCCGCGCAGATCCGCAGCGATGCCACCGCCTGGCGTATGTCGAACAGTAAAGCCAACCCGGTCGTTAATATTTACAACAACCATGATGTGAGCATGCCAGCGTACGCATCGGTGGGCGGTAACTATCACGATCCGCTGGTGACTGCGCGTAACCGTACTCAGGGCTGGCTGTTCAGCGATACCGTCGGATTCTTTGACGATACGCTGCTATTTACCGACGCGGCGCGCAAGCAGAAGGG 67 | + 68 | >7BC86BEGGGGACCFGGFGDCF@EC@CGGGGGGGGEED=CE4*CCFGGFC*CDFB45AF+;6@>3:A:;<(>690:1<<49:A)41:44?F0:>FA>)6)671(4141291(,-3-((47DG3C>@FF?FFBF:<757.,9>EF;(11)6AA>4< 73 | @M05722:45:000000000-LHCCL:2:1101:11189:4446 2:N:0:9 74 | CAACAAGGAGAGGTGAAATGAGCGTGATGACCTTAATTAATAGCGCGGTGGCGTGGTTTGCTTTTGCCGCGGTATTCGCCTTTTTACTGTCGCTGAAAAAATCGCTGAGCGGTGTGATTGCCGGTATCGGTGGCGCACTGGGTAGCCTGTGCGCGCTGCTGGCTGGCGTGCAGGTGCTGGTTCGTGGCTTCCCGGCGATGGGCAGCCTACGGCTGGTGCACTACAGCGTACTGGTCACCCCGCTGAATGCG 75 | + 76 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDCFGGGGGGGFFFFFFFFFFFFFFFFFFFFF:BFFFFFFF:?B>>FFBFFFFFFF?F7>081:A:A<9:>FFBBF>9?FAA1 77 | @M05722:45:000000000-LHCCL:2:1101:13328:4518 2:N:0:9 78 | GTCCCAGCAACCGCTGCAACCGTGGCTGCTGAACACGCGCCAGGGGGTGTAAAACTCGGCGTTCGGCAGGCTGAAGCCGCTAATCAGCACCACCGGTATTTTTGCCGCCCAGGCAAGCCACGACAGCCCGCTGGCCAGGCCGATAAAGAAGCTGGCGTGGCGAAGCAGATCAACCCGCGCCTGCAGCGGCAGGGCCCCGGTGCAGTCTTCTGCGCCGTAGGGGATGTGGTTCCAGACGAAGCCGTGCC 79 | + 80 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGFFGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGGGGGGDGGGGGGGGGGGGGFGGGGGGGGGGGGCEGDGGGGGGGFGGFGGGEGDDCEFGGGGGFGGGGGGGGGGGGFG4DGDGFGGGGFFFFFFFFFEEFFB@?:9DBFFF(:BFF>AFBB>6:?FF>BB?FFBBFFFFFBBFF:019:>F: 81 | @M05722:45:000000000-LHCCL:2:1101:24115:4677 2:N:0:9 82 | GCGCGCGGCGGGTATCGGGCAGGTGGTGTATCACCGCAGCCGTGATGCGCAGGCGGCTGGCGTGGCGTGGAGTGAGGCGGATATCAGCGCCATCAAACGCCTGGCGGATATGGGCTTTAAAGTCACCGTCACCGGTGGACTGGCGCTGGAAGATTTACCGCTGTTTAAAGG 83 | + 84 | CCCCCGEEEGGGGGGGGGGGDCGGFFGGFGGGGFGE@CEFGGGDGFGGGG7C=FGDGGGDGF:CEGGBFG:FFCFFCDEDGGGA=FFFGGEFEGCC 85 | @M05722:45:000000000-LHCCL:2:1101:11908:4786 2:N:0:9 86 | CGCCGGGACTCTGCGCCCGACACGCCATAGTAGTAGCGGTTAAAGCGGTCATCGTAATAATAGACACCCGCCGCGGGAATCACCGACAGCTTGCCCACCGGCAATACGCGGAACCAGGACAGCTCGCCAACCCAGCCATCGCTGTTGTCCAGAACATCCGCCGCCGCCGACAGCTTCAGGCTGCCCCACTTTTCATGGTGATACCATGCCATGCCCGCCATGGCGGTACTGTGACGCTTATCCAGCTGCTT 87 | + 88 | CEGEGGC@CEGFFDECEC:ECGGFEFCECE8CGG>CC>>EFF>FGDGGGGG@FGGFC78C6FCGGGGEDDDG<<3CD:CFGGFF45>F:5>>DDFFFFFAF)0942677A0>21:B96<2<612;1:AFF)6>B<>34.4:2 89 | @M05722:45:000000000-LHCCL:2:1101:14685:4816 2:N:0:9 90 | CATGAACGATTATCTGCCTGGCGAAACCGCCATCTGGCAGCGCATTGAAGGCACACTCAAGCAGGTGCTCGGCAGCTACGGTTACAGCGAAATCCGTTTGCCGATTGTAGAGCAGACCCCGTTATTCAAGCGCGCTATCGGTGAAGTCACCGACGTGGTTGAAAAAGAGATGTATACCTTTGAAGACCGCAACGGCGATAGCCTGACGCTGCGTCCGGAAGGTACTGCGGGCTGCGTACGCGCCGGCATCG 91 | + 92 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDFFGGGGGGGGGGGGGFFFFFFFFFDEBFFFFFFFFFFFFEFFFFFFFBBFF>9>?>FFFFBFFBFB9B>FFFF:;FF9BFFFF9B>2 93 | @M05722:45:000000000-LHCCL:2:1101:9465:4932 2:N:0:9 94 | GTGATAGAAACCAACTGCCTGATTGATGCGCTGGGCACGCATCCGCTGGCCTTTGGCAAACTGCCGCCGCTGATGAACGGCCTGACCCAGCAGGTGAAAGATTTTGAGCGTCTGACCATTGATGCCGCCGTATATGGCGATAAGCAAAAAGCGCTGCTGGCGCTGGTCGCCAACCCGCTGGTCGCCGA 95 | + 96 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGCGGGGGGFGGGGGGGGGGGGFGGFGGGGGGGDGGGGFGFGGFGGGGGGFGGGGGGGGGGDGGGGG>9DGGCFGGGDG4CDG4CEFFFF>BFF 97 | @M05722:45:000000000-LHCCL:2:1101:5895:5333 2:N:0:9 98 | ATCCAGCGGCGTGGTTTATCAAACGGATAGTACAGCGCGGCGAACATCTGATGCTGGCCAACGTCGGAGGTCACATAAGCATCGCCGTGGGTCAGCCTCCAGATGGCCTCAATGACCGCCTGCGGCTTTATCTGTTCACTTTTTTCGTCATACTTCAGGCA 99 | + 100 | 8CC,AFGEE7FFGGGDE,,,D,AF9=DD@C+=7,8>EEG,@DC;F>@;B;5@>E@8EGGEFC,;DFGCCCFG 101 | @M05722:45:000000000-LHCCL:2:1101:6264:5384 2:N:0:9 102 | GTCCTGATTCGCGCTCAGATCGCCGGTGTTGCCGTTCAGTTCAAAGCCCGGTACACCGCGCATCTGAGAGAAATGATTCGCCAGCGCCCAGGCATCGACGCCCATTGCGTACAGGCGCGCCAGCGAGTAGTCGTTGTTGACCGCGCGTAGCGCCTGCTGCATCAGCGCCGGATTGCTGCCGGCCAGCATCGGGATTTCACTGTACTGCAAACCTTCCATTTCCAGACGGAAGTCCGGGCCCGCAGTGCC 103 | + 104 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGAAGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGDGGGGDEFFFGGGCFGGGGGGGGGGGGFEGGGGGGGGGGGCE5=FDDGGFGGGGGGGGFGGGGGGDGGGGFGGFFDFFFFFFFFFEDEB7?FAB2>AFFFFFFFF?B?FFFFFFFAFBFF:BB?:F:>DBB;(39BBB>F(49 105 | @M05722:45:000000000-LHCCL:2:1101:26553:5391 2:N:0:9 106 | GGCTGGTGCTCAATTATGACCTCTATGCCTCACAGGAAAAAAGTACGCAAAATAGCGCCAGCGCCTACAGCGAATTGCGCGCGTTTAGCGGCATTGGCGTGCTCAGCTCCACGCAAATAGCGCGCTATAC 107 | + 108 | CCCCCGGEFFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGDFDEGGGGGGGGGGDFGGGGGFGCEGGGGGGGGEGGGGGGGGGGGDFDGCEGDBDFGGFEEFGGGGGGGGGGGGGGGGGG; 109 | @M05722:45:000000000-LHCCL:2:1101:5389:5526 2:N:0:9 110 | GTGGACAACCTGACCAACGCCGGTAACATTGTGTTTGTGCCGTCCCAGTGCACCTTCACGCCTCACACGCTTACCGCAACCAACCTTATTGGTAACGGCTCCACCATCACGCTCAATACCGTTCCGGTCTACAGCAGTTCACCGTTAGATAAAG 111 | + 112 | -A,6,C+,4+++++,3,,3,,733,3,+,6+3,6,,,3 113 | @M05722:45:000000000-LHCCL:2:1101:22262:5649 2:N:0:9 114 | CTCGCCAGATGCGCGGCGCAGCGGTGAACCGCCGCTTTTGGCAGCCCGGTCGAGCCGGAGGTCAGCGTCATGGAGGCCAGCCGTTCAGCCTGCCAGCCGACATCGTGGTACTGGCGCGCGCTACGCAGCCGCAGCACAGGCAAATCGTAGTGCGCCTCGCCGTCGAGATTAAGCACATGGCTGACGGTCAGCCCCGGCAGCAGGTCACTGACCAGAGCCGCGGGCAGACGCGGGTTGCGCGGTAAACTGCG 115 | + 116 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGDGEGDGGGG+@FGGFGGGGGCECF7FEEGGCFFFFGGGGCGGFCGGGFGCFEFFGFCFGFGGGGGGGDGGGDGGGFFGGGGCGGGGEFGEGGGGGE@DGDEGGGGGGGGGF*FFGGGGCFF@>>)83>>>44;B0>6DF>B?((48>:A)8?BF6?6((,(49>09?BFF;;>04(4(,179676<)6DEFFFFFFFBE69?F?(:D?<:?>?FFB>:67:7:?F>B>>B20,119>FF>B;>21:)622(-7(( 121 | @M05722:45:000000000-LHCCL:2:1101:14226:5786 2:N:0:9 122 | CCCTGCCAGCCGTCAACAACCTGAATACCGAACTGCATGTCACCTTCTTCTTTCACCAGGTTGAGCGTCCCTTTACCGGTTAACTGAATCAGCGACGATTTGCCCTGCAGGTTATTCAGCGTCAGCACGCCGTTATCGAGATCGATGTCGGTGCTCATTTCATCCAGACGCGTAGCGTTGTCATCGTTCTCCTGCGCGCGCACGTCGGTGCTGCGCTCAACGGCCTGCTGCACCAGCTTCTGGAAGTTCAT 123 | + 124 | CCCCCGGGFGGFGG7FDFAFGFGGCFGGFCEGGGGGFFFGGGGFGGGEEDEGFGCFG9FCFACF,CCFGGGGFGEGGGDGGDCGFG9:CC8+>EGCG68CGGGGF?+8DE?>B:FG8+@=CEFDFEGGFGGGFD89DG>D77?C>C*@FAFFFFFFFFBFAE@@)656(31:@0(609<<)5(54<)) 125 | @M05722:45:000000000-LHCCL:2:1101:5108:6228 2:N:0:9 126 | GTGCCTGCTCGTCAACGTATCGGTTCGCGACGTCGGCCAGCGCCATTATTTCACGAATCGCTTTACCGAATTCACGGCTGTCCCACGCTTCGCCAATGCTGGTTGCCGCGTCGGTGTAGGTTTTGTACATCTCTGGGTCAGCCAGTTCAGCGGACAGCACGCCGTCGAAACGCTTAGCGATAAAGCCAGCGTTACGGGAAGCCCGGTTCACCACTGTGTTCACGATATCGGCGTTGACGCGCTGGATGAAA 127 | + 128 | <@,BCDFFCDFFF,FGDC@,C;+FC,@FCEGGGGGDEGGGD>FFDG9FCCG<+8CEFGG@FFGG9,7>9EC:>>FFFGD8FGCAF7E5E>EEC,CGCC5)//*/*1><35/C*+)).0:+1+8A=FA)0)4((.4,,(4((-1,(642<13)65-,))6(,(-(:(909>1(4<2:(29((...2)6 129 | -------------------------------------------------------------------------------- /tests/real_fastqs/miseq_casava_R1.fastq: -------------------------------------------------------------------------------- 1 | @M05722:45:000000000-LHCCL:1:1101:22781:2332 1:N:0:9 2 | GAGATCCAGCGCAGCATGACGCAGATGCTGGAGCGCGTCATTTTCAACAATGATGCCCCAGCGAAGGCGCTGGAACAGTCGCAGCAGGAAATTGATAAGCTGCTGGCGAAATAAGGGGCGCGTCATGGCTGGATATGATTCCCGTACCGGAGGCCTGCTGGCCTCCACATGGATCGGTTATTCATTGCTGTTCTGGTTTTATCCGCTGGCCTGGCTGGCGGTGCTGAGCGTGACCCAATGGCAGTTTATCG 3 | + 4 | CCCCCGGGGGGGGGGGGB:@F?BF?00:7?BFAAF)( 5 | @M05722:45:000000000-LHCCL:1:1101:17124:2405 1:N:0:9 6 | ACAAGGCCCTGCGTGAATACAGCGCAAAATTTGATAAAACGGAAGTCGCCGCACTGCGCGTCTCCGAGGATGAAATTCAGCAGGCAGGCGCGCGTCTGAGCGACGAGCTGAAGCAAGCCATGCAGGCGGCGGTGCGCAATATTGATACCTTCCACAACGCGCAGATTCTGCCGCCGGTGGATATCGAAACCCAGCCGGGCGTGCGCTGCCAGCAGGTTACCCGCCCAATCGCGTCCGTGGGTCTCTATATT 7 | + 8 | CCCCCGGCGGGGGGGGGGGDGGGGGGGDFGDCFGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGFFFEGGGGGGGDFGECFGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGDGGGGEGGGGG>CAFGFGGGGGGFGGGGGGGGGGGGG9:CFCEFGGGGGGGGG?F7FGGFCECEGGGGGGCGGGGGGGDGGGGG:4CGGGGFGFFE>B:@@FFFFEFBBFB>F3<:4:9942. 9 | @M05722:45:000000000-LHCCL:1:1101:9672:2430 1:N:0:9 10 | GTCGAGGATTAACCGATAAATTCCAGACCGTTCATATATGGACGCAGCACCTCTGGTACTTCAATGCGGCCATCAGCCTGCTGGTAGTTTTCCAGCACGGCAACCAGCGTACGCCCTACCGCCAGCACAGAACCGTTGAGGGTAGGGACCAGGCGGGTTGTCTTATCGGACTTGCTGCGTCAGCGAGCCTGCATGCGGCGCGCCTGGAAATCACAGACGTTAGAGCAAGTGGGATTCTCGCGGGACGTATT 11 | + 12 | CCCCCGCBECFG7F+3CFD7FF:+:FDEG*>;;DEFDFC>:89B?FFBC**/8AEEGGG8CCFGGCGGG55CG=DF**97CF*7*:**)07.)099FF>*9?7@)(.*.8?BF(1(-2(5. 13 | @M05722:45:000000000-LHCCL:1:1101:8972:2506 1:N:0:9 14 | GCCGCTGTACCAGGGCGGGATGGTGAACTCGCAGGTGAAACAGGCGCAGTACAACTTCGTCGGCGCCAGCGAGCAGCTGGAAAGCGCGCACCGTTCCGTAGTGCAGACCGTGCGTTCCTCCTTCAACAACGTCAACGCGTCTATCAGCAGCATCAACGCCTACAAACAGGCCGTTGTCTCTGCGCAGAGTTCGTTAGACGCCATGGAAGCGGGCTACTCCGTCGGTACGCGTACGATC 15 | + 16 | CCCCCGGGGGGFGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGFEFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGDGCGGGGGGGGGGGGGGGGGGGGGGGDDFGGGFGFFFFFFFFFEFFFF?:F 17 | @M05722:45:000000000-LHCCL:1:1101:6726:2518 1:N:0:9 18 | CTACCCGGTAGTTATTACGCCGATCCAACTGGCTCTCTCTGCAAAATCCTCCGCACCACCTCAACCACAGTCCACTACCAGCGAAACGGTCATAACTGCATAGCCAGCATGATGCGGTTTAACGCAGATTTTGAGTATGTGGATGGCGCGGAGTTAAAGCAGATATGGGCAGACATCGAAACAGCAGAGCACATTAAACGGCTACGCGCTATTCAGCGGGTGGCTTAGGTGGGTGGGATGGGGAAAGTGAA 19 | + 20 | CCCCCDFDGCFGGGGGGGGGGGCGGGGGGGGGGGGGGGEGGGGFFFGGGGGGGEFGGGGGGGGGGGGGGGFGGGGGGGGGGGGD7FGEEGGGGGCGGA@BFCFCCFCEFGGFGGGE7FGGFFEFE@ECGDFGF@DFGF9FCFFGGFFEGFECEGFGAC;FCC@FGFD98>:C5BFGGC6CBCC@FDC?CE6F>EFFFFCE45?=FCCGGGGGGFF77<)5C>:) 21 | @M05722:45:000000000-LHCCL:1:1101:13069:3053 1:N:0:9 22 | GTGGTAGAAGGGCGCGTGACCGGTTTTGCCCTGTGCGTATTACACGAAGGCACGTGGGTGACCCAACCCATCTGTTATCTGGAAGACCTGTTCGTGGACCCGTCATTTCGTGGCCTGGGCATTGCCCGGCAACTCATGCATTTCGTGCTAAATGAAGCCAAAGAACAGAAATGGTCGCGTTTCTACTGGCATACGCGTATTCATAATCCTGCCCGTAAGCTTTATGATGAATTCATCTCCGCAGATGATTA 23 | + 24 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGFGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGFGGGGGGGFGFFFFFFFFFFFFFFEFFFFFFFFB?AFFB4 25 | @M05722:45:000000000-LHCCL:1:1101:8021:3120 1:N:0:9 26 | GGCCACACTGCCAGTGATGAAGTGTTCGAGAAGCTGACCAGCATCAAGGGAGCATTCCGTACCCGTATTGCCTACGTCGTGCAGACCAACGGCGGTCAGTTCCCGTACGAACTGCGCGTTTCTGACTACGATGGTTACAACCAGTTTGTCGTTCACCGTTCTCCGCAGCCGCTGATGTCTCCGGCGTGGTCTCCGGACGGCTCGAAACTGGCTTACGTCACCTTCGAAAGCGGCCGTTCGGCGCTGGTTAT 27 | + 28 | CCCCCGGEGGGGGFGGGGGGFFGGGGGGGGGGEFGGGGGGFGGGGGGGFEGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGFGGGFGGGGEG=FGGGGEGGGGGFEGGGCGGGGGGGFFGGFCFEFGGGGGG7FFFGGGGGG8>FFCGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGG=EGGGGDGGGGGGGGGGGGGFDCF@GFFFFFFFFFFD>@FFFFFFBFF9>0;7>A5 29 | @M05722:45:000000000-LHCCL:1:1101:7487:3197 1:N:0:9 30 | GCGTAGTTGAGGGTGACATGATAGGGCGACTGGCGATGAAAACGCTTCATACTGCGGCTGAGGTGAAACTGTTCGGGCCACAAAACGGCACGGGGATCCGGCGACCACCAGAGGATCGGGTCGCCGGGTGAAAACCACGGGAAGATCCCGCGTTGATAGGCCATCAACAGGCGCGCCGGGCCGAGATCGCCGCCTAACGCCAGCAGTCCGTTTGGTTCACGTAATGCGCCCTCAGGTGAGGGAAAAGCAAT 31 | + 32 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCGGGDGGGGGGGGGGGGGGGGGGGGGGFGGFGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGFCFGGGGGGFGGGGGGGFGGGGGG5BEDGGGGGGGGGGGGGGGGDGGGGGGGGGGCDFGGCGGGGGCDGDFGGFFF47>FFFFEFEFFFF>?A4(2(8:FFFF>FEFFFF9>?66?BF?>F 37 | @M05722:45:000000000-LHCCL:1:1101:21825:3431 1:N:0:9 38 | ACTATCCGCATATGCTGGCGCCGGGCCAGAAACAGCGTCTGGGGCTGGCGCGCGCGCTAATCCTACGTCCTAAAGTGATCATCTGTGATGAAGCGCTGGCCTCGCTGGATATGTCGATGCGCTCGCAGCTGGTTAACCTGATGCTCGAACTGCAGGAAAAACAGGGGATCTCGTATATTTACGTGACCCAGCATTTAGGCATGATGAAACA 39 | + 40 | @CCCCFGGGEFGGGCFADC6@CC@FFGGGGCDGGGGG@FGFG8FGGEGGGGGGGGGGGGGFCDEFGGCEFCFFDGGFGGEFFA@FFC9EBAFFFGGGGDGGGGGGGGGF9FFGGGGFEFCEGEGGGGGGGFGGGFGGFGGC=BCFCCFGGFFFDGGGCGFGGFG77CE8CFFGCE8EGGGGFGGGGFCGGGGGFGGGGG?6>B>?3 45 | @M05722:45:000000000-LHCCL:1:1101:21540:3758 1:N:0:9 46 | CGCCAATCAGCATCAGCAGGTATCAGTTAAAGCCGCTGTTACCGAACAGCGTTTTGTAGTTGCTGATAAGCCGGGTTTTCGGTGCGCCGGTCGGACGCGTTTCCGGCATCCAGCGCGCCATGCTGAAGGTGACGCCTGCGCAGAGCACCAGCAGGAAACCGTAGCAGGCGCGCCAGTTGATGAGTGTCTCCAGCACGCCGCCAATCAGAGGCGCCAGCAGCGGGCTCACCAGAATACCCATATTTAACAGA 47 | + 48 | CCCCCGGGGCFFGGGFGFGGGG,CFEC:CBFGGGG7FDFCG>E@>F@FGGGGGGDFGG<<=CFFCCFGGGGGG,,,@FGG+8>EG@ECDFG9*1:CEDEB559@FF;9AFG+CGGGGCC?GE=GGGCDEGGGF***1/))/7CD:C7@>BF>?7>F4@@FDFGGGGGGGGGGGGFFGGEEGGGGGGGGGGGGGG>GGGCFEEGG*CEGGGGGGGGGGGGGDGEGGGGGGGGGDFGGGGGGGG@FFGGEGGGGGCFCFGGGGGGDGGDGCDGGFGBBDDF@BEFFDBED?FF??BFF4@FFFFFFFF@ 65 | @M05722:45:000000000-LHCCL:1:1101:23071:4268 1:N:0:9 66 | GCGCGATCTCAATGCTCTGACCATAGTTTTTTGCCGTTTTCGGCGCATTGTCCGCCGGCTGAAGGGCTTCCGTGTGGTTGGCATACAGCGACACCGTTTGCCACGGCTTGTAGACCAGGCCAAAAGTGGGCATCCAGCGACTTTCGGTAAAGCTGGAGGAGGTGGTCTCCAGGCCCGTCTCGTTGCTGTAGTTACGAATCACCACCTTCTGATTGCGCGCCGCGGCGGTAAATAGCAGCGTATCGTCAAAG 67 | + 68 | C<<8FFFGGFFGGGGGFGEFBFGGFGDCCGGCFE,AFGGGG7BFGGGGGGGGGGFGGGGFGGGGGGGFGGGGGGFGGGFGDGGGEGFEC@DDFCGGGDG55CFGFGCGDFFGGFFFGEE57:DDFGFDGGGGGGCF=DGDGEGFEFFDEF>BEFFFFFFF?0,43:EFGGEGGGGGGG5AEGGGGFGGGDGFCFF7FGGGGGGGD5 73 | @M05722:45:000000000-LHCCL:1:1101:11189:4446 1:N:0:9 74 | GCTATCAGCCAGACCGCATTCAGCGGGGTGACCAGTACGCTGTAGTGCACCAGCCGTAGGCTGCCCATCGCCGGGAAGCCACGAACCAGCACCTGCACGCCAGCCAGCAGCGCGCACAGGCTACCCAGTGCGCCACCGATACCGGCAATCACACCGCTCAGCGATTTTTTCAGCGACAGTAAAAAGGCGAATACCGCGGCAAAAGCAAACCACGCCACCGCGCTATTAATTAAGGTCATCACGCTCATTT 75 | + 76 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGGGGFGCDGGGGGFGFFFFFFFFFFFFFFFFFFF9?FFFBFFFF 77 | @M05722:45:000000000-LHCCL:1:1101:13328:4518 1:N:0:9 78 | CCTTGGGGTTGACCCCACGGAAGCACCGCCGCGTCTTAATCTCAGCGCGCCGCGCACCATTGAACAGCCCTACGTTTGCATCGCCGTGCAGTCGACCTGCCAGGCCAAGTTCTGGAACAACGGCCACGGCTGGACGGAGGTGGTGGCCCACCTGAAATCGCTTGGCTATCGGGTGCTGTGTATTGACCGTGAGCCCACCACAGGGCACGGCTTCGTCTGGAACCACATCCCCTACGGCGCAGAAGACTTCA 79 | + 80 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGFGF@FFFFFFFFFFFFFFFFFFF>FFFFFFF) 81 | @M05722:45:000000000-LHCCL:1:1101:24115:4677 1:N:0:9 82 | CCTTTAAACAGCGGTAAATCTTCCAGCGCCAGTCCACCGGTGACGGTGACTTTAAAGCCCATATCCGCCAGGCGTTTGATGGCGCTGATATCCGCCTCACTCCACGCCACGCCAGCCGCCTGCGCATCACGGCTGCGGTGATACACCACCTGCCCGATACCCGCCGCGCGC 83 | + 84 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGEGGGDGGGGGCEEGGGGGGGGGGGGGGGC@@CFFG@@FGGFFFGGGGGGGGGGGG@?CGGA9FBFEECCFCE7CCFGGGGGGGGGF@>FC77FEGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGG@E@EEE 85 | @M05722:45:000000000-LHCCL:1:1101:11908:4786 1:N:0:9 86 | CTTTAAAACCCCACGTGATGAAGTCAGCCTGACCGCTTCCTGGATGCCGCTTACCTTTGACCCGTCGGATAACGACGATAGCGCCATGAAGCAGCTGGATAAGCGTCACAGTACCGCCATGGCGGGCATGGCATGGTATCACCATGAAAAGTGGGGCAGCCTGAAGCTGTCGGCGGCGGCGGATGTTCTGGACAACAGCGATGGCTGGGTTGGCGAGCTGTCCTGGTTCCGCGTATTGCCGGTGGGCAAG 87 | + 88 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGFGCGFFGGGGGGGGGGGGGGGGFGGGGGGGFGGGGGDGGGGGGGGGGCFCFGGFGGFGFGGGGGGGGGGGGGGGGGGGGGGGGEGDGGGGGGGGGGGGGGGGGGGGDCFGGGGGG@EEGGGGD@FGGCFGGCEEGGC=CGDGGGGGGGGFFEGFFGFCEDGDGFGGCDGG<:>+EEGF<+DEEF8F,5CFFGGG@FE@:FCGFDF9FFGEGB 101 | @M05722:45:000000000-LHCCL:1:1101:6264:5384 1:N:0:9 102 | ATTGTATGCCAGTTCCCGCAGCGCACAGGGCACTGCGGGCCCGGACTTCCGTCTGGAAATGGAAGGTTTGCAGTACAGTGAAATCCCGATGCTGGCCGGCAGCAATCCGGCGCTGATGCAGCAGGCGCTACGCGCGGTCAACAACGACTACTCGCTGGCGCGCCTGTACGCAATGGGCGTCGATGCCTGGGCGCTGGCGAATCATTTCTCTCAGATGCGCGGTGTACCGGGCTTTGAACTGAACGGCAACA 103 | + 104 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCGGGGGGGGGGGGGGGGGFGGGGFGFFFFFFFFE>:@@FAAFDFD7>BF69>BB2 105 | @M05722:45:000000000-LHCCL:1:1101:26553:5391 1:N:0:9 106 | GTATAGCGCGCTATTTGCGTGGAGCTGAGCACGCCAATGCCGCTAAACGCGCGCAATTCGCTGTAGGCGCTGGCGCTATTTTGCGTACTTTTTTCCTGTGAGGCATAGAGGTCATAATTGAGCACCAGCC 107 | + 108 | CCCCCGDGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGE,CCGGGCGGGGGGGGGDGGGGGGGGGGGGEGGGGGGGGGFGGGGGGGGGGGGGGGGGG9?FFGGEEC8E8,,@FFGGGGGGGGGCGGGGGGGGGGCCCFC:+E,ABE:CE@FGF;C,@D7C>BA: 113 | @M05722:45:000000000-LHCCL:1:1101:22262:5649 1:N:0:9 114 | ACCATCCTCAGGCGGTGCTGGCCTGGCTGGCGCTGCTCCAGTGCGGCGCCCGCATTTTACCGCTCAACCCGCGTCTGCCCGCGTCTCTGGTCAGTGAACTGCTGCCGGGGCTGACCCTCAGCCATGTGCTTAATCTCGACGGCGAGGCGCACTACGATTTGCCTGTGCTGCGGCTGCGTAGCGCGCGCCAGTACCACGATGTCGGCTGGCAGGCTGAACGGCTGGCCTCCATGACGCTGACCTCCGGCTCG 115 | + 116 | CCC3BFFF@FFFB=:??EB>7:DFF>>024( 117 | @M05722:45:000000000-LHCCL:1:1101:8547:5781 1:N:0:9 118 | GTATGGATCTTCACCACCGGCGACGCGCAGGACGGCGAACGCCCGCTGGCGGTGCTGCTCGACGGTCAGTTCTGGGCCGAAAGCATGCCGGTATGGCCTGCGCTGACAGCGCTCACTCGCGAAGGCAAACTGCCTGCGGCGGTGTATGTGCTGATTGACGTGATTGATAACGCCCACCGCAGCGTCGAACTGCCGTGCAATCCCGACTTCTGGCTGGCGGTACAGCATGAGCTGCTTTCCCAGGTACGGA 119 | + 120 | CCCCCGGGGGGGGGGGGGGGGEGGEGGGGGGGGGGGGGGGGGGGGGGGGGGDGEGGGGGGGGGGGGGGGGGGGGGCFGGGGGGGGGFGGGDGGGGGGGGFFGGGGCGGGGFGGGCGFGFGGEGGCFFGFGGFGGG,FF@F>BB?F?F692A*6A<EDGDECE5CC5@FC=FFGGGFGCGGGEDCDCDD7DFFGG>FF=G=FFFFF:E>EFFFFFFF001;99 125 | @M05722:45:000000000-LHCCL:1:1101:5108:6228 1:N:0:9 126 | CCGAACGCGCCGGGCAAATATTTCTACGTCTGGCTGGACGCGCCGATTGGCTACATGGGCTCCTTCAAGAACCTGTGCGACAAACGCGGCGACACCACCAGCTTCGATGAATACTGGAAGAAAGACTCTACCGCCGAGCTGTATCACTTTATCGGCAAAGATATCGTCTACTTCCACAGCCTGTTCTGGCCTGCCATGCTGGAAGGCAGCAACTTCCGTAAGCCGACCAACCTGTTTGTGCACGGCTACGT 127 | + 128 | CCCCCFGGEEGGGGGGGGGGGGGGFGGGGGG?FGGEGGGGGGGGGGGCFGGGEFGDFD7FGCEFGGGGCDGGGFGFEGDGGGGGGGGGDF7FGGGGGGGGGGDGG@FCEFFDFGGGFGFGGFFGGDFGFFEGG>EGCCGDDGGGGGGGGFGGGGDFGGGGGGGGGEFG>FGGGFF;FGFFFFCGFGFCGCFGFGGGGFGGFFFGGG4:CGFFGGGG=3DFDF4F>DFFFFFFF@FB2??726<:;BB03<4 129 | -------------------------------------------------------------------------------- /tests/real_fastqs/miseq_casava_multilane_R1.fastq: -------------------------------------------------------------------------------- 1 | @M05722:45:000000000-LHCCL:1:1101:22781:2332 1:N:0:9 2 | GAGATCCAGCGCAGCATGACGCAGATGCTGGAGCGCGTCATTTTCAACAATGATGCCCCAGCGAAGGCGCTGGAACAGTCGCAGCAGGAAATTGATAAGCTGCTGGCGAAATAAGGGGCGCGTCATGGCTGGATATGATTCCCGTACCGGAGGCCTGCTGGCCTCCACATGGATCGGTTATTCATTGCTGTTCTGGTTTTATCCGCTGGCCTGGCTGGCGGTGCTGAGCGTGACCCAATGGCAGTTTATCG 3 | + 4 | CCCCCGGGGGGGGGGGGB:@F?BF?00:7?BFAAF)( 5 | @M05722:45:000000000-LHCCL:1:1101:17124:2405 1:N:0:9 6 | ACAAGGCCCTGCGTGAATACAGCGCAAAATTTGATAAAACGGAAGTCGCCGCACTGCGCGTCTCCGAGGATGAAATTCAGCAGGCAGGCGCGCGTCTGAGCGACGAGCTGAAGCAAGCCATGCAGGCGGCGGTGCGCAATATTGATACCTTCCACAACGCGCAGATTCTGCCGCCGGTGGATATCGAAACCCAGCCGGGCGTGCGCTGCCAGCAGGTTACCCGCCCAATCGCGTCCGTGGGTCTCTATATT 7 | + 8 | CCCCCGGCGGGGGGGGGGGDGGGGGGGDFGDCFGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGFFFEGGGGGGGDFGECFGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGDGGGGEGGGGG>CAFGFGGGGGGFGGGGGGGGGGGGG9:CFCEFGGGGGGGGG?F7FGGFCECEGGGGGGCGGGGGGGDGGGGG:4CGGGGFGFFE>B:@@FFFFEFBBFB>F3<:4:9942. 9 | @M05722:45:000000000-LHCCL:1:1101:9672:2430 1:N:0:9 10 | GTCGAGGATTAACCGATAAATTCCAGACCGTTCATATATGGACGCAGCACCTCTGGTACTTCAATGCGGCCATCAGCCTGCTGGTAGTTTTCCAGCACGGCAACCAGCGTACGCCCTACCGCCAGCACAGAACCGTTGAGGGTAGGGACCAGGCGGGTTGTCTTATCGGACTTGCTGCGTCAGCGAGCCTGCATGCGGCGCGCCTGGAAATCACAGACGTTAGAGCAAGTGGGATTCTCGCGGGACGTATT 11 | + 12 | CCCCCGCBECFG7F+3CFD7FF:+:FDEG*>;;DEFDFC>:89B?FFBC**/8AEEGGG8CCFGGCGGG55CG=DF**97CF*7*:**)07.)099FF>*9?7@)(.*.8?BF(1(-2(5. 13 | @M05722:45:000000000-LHCCL:1:1101:8972:2506 1:N:0:9 14 | GCCGCTGTACCAGGGCGGGATGGTGAACTCGCAGGTGAAACAGGCGCAGTACAACTTCGTCGGCGCCAGCGAGCAGCTGGAAAGCGCGCACCGTTCCGTAGTGCAGACCGTGCGTTCCTCCTTCAACAACGTCAACGCGTCTATCAGCAGCATCAACGCCTACAAACAGGCCGTTGTCTCTGCGCAGAGTTCGTTAGACGCCATGGAAGCGGGCTACTCCGTCGGTACGCGTACGATC 15 | + 16 | CCCCCGGGGGGFGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGFEFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGDGCGGGGGGGGGGGGGGGGGGGGGGGDDFGGGFGFFFFFFFFFEFFFF?:F 17 | @M05722:45:000000000-LHCCL:1:1101:6726:2518 1:N:0:9 18 | CTACCCGGTAGTTATTACGCCGATCCAACTGGCTCTCTCTGCAAAATCCTCCGCACCACCTCAACCACAGTCCACTACCAGCGAAACGGTCATAACTGCATAGCCAGCATGATGCGGTTTAACGCAGATTTTGAGTATGTGGATGGCGCGGAGTTAAAGCAGATATGGGCAGACATCGAAACAGCAGAGCACATTAAACGGCTACGCGCTATTCAGCGGGTGGCTTAGGTGGGTGGGATGGGGAAAGTGAA 19 | + 20 | CCCCCDFDGCFGGGGGGGGGGGCGGGGGGGGGGGGGGGEGGGGFFFGGGGGGGEFGGGGGGGGGGGGGGGFGGGGGGGGGGGGD7FGEEGGGGGCGGA@BFCFCCFCEFGGFGGGE7FGGFFEFE@ECGDFGF@DFGF9FCFFGGFFEGFECEGFGAC;FCC@FGFD98>:C5BFGGC6CBCC@FDC?CE6F>EFFFFCE45?=FCCGGGGGGFF77<)5C>:) 21 | @M05722:45:000000000-LHCCL:1:1101:13069:3053 1:N:0:9 22 | GTGGTAGAAGGGCGCGTGACCGGTTTTGCCCTGTGCGTATTACACGAAGGCACGTGGGTGACCCAACCCATCTGTTATCTGGAAGACCTGTTCGTGGACCCGTCATTTCGTGGCCTGGGCATTGCCCGGCAACTCATGCATTTCGTGCTAAATGAAGCCAAAGAACAGAAATGGTCGCGTTTCTACTGGCATACGCGTATTCATAATCCTGCCCGTAAGCTTTATGATGAATTCATCTCCGCAGATGATTA 23 | + 24 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGFGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGFGGGGGGGFGFFFFFFFFFFFFFFEFFFFFFFFB?AFFB4 25 | @M05722:45:000000000-LHCCL:1:1101:8021:3120 1:N:0:9 26 | GGCCACACTGCCAGTGATGAAGTGTTCGAGAAGCTGACCAGCATCAAGGGAGCATTCCGTACCCGTATTGCCTACGTCGTGCAGACCAACGGCGGTCAGTTCCCGTACGAACTGCGCGTTTCTGACTACGATGGTTACAACCAGTTTGTCGTTCACCGTTCTCCGCAGCCGCTGATGTCTCCGGCGTGGTCTCCGGACGGCTCGAAACTGGCTTACGTCACCTTCGAAAGCGGCCGTTCGGCGCTGGTTAT 27 | + 28 | CCCCCGGEGGGGGFGGGGGGFFGGGGGGGGGGEFGGGGGGFGGGGGGGFEGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGFGGGFGGGGEG=FGGGGEGGGGGFEGGGCGGGGGGGFFGGFCFEFGGGGGG7FFFGGGGGG8>FFCGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGG=EGGGGDGGGGGGGGGGGGGFDCF@GFFFFFFFFFFD>@FFFFFFBFF9>0;7>A5 29 | @M05722:45:000000000-LHCCL:1:1101:7487:3197 1:N:0:9 30 | GCGTAGTTGAGGGTGACATGATAGGGCGACTGGCGATGAAAACGCTTCATACTGCGGCTGAGGTGAAACTGTTCGGGCCACAAAACGGCACGGGGATCCGGCGACCACCAGAGGATCGGGTCGCCGGGTGAAAACCACGGGAAGATCCCGCGTTGATAGGCCATCAACAGGCGCGCCGGGCCGAGATCGCCGCCTAACGCCAGCAGTCCGTTTGGTTCACGTAATGCGCCCTCAGGTGAGGGAAAAGCAAT 31 | + 32 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCGGGDGGGGGGGGGGGGGGGGGGGGGGFGGFGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGFCFGGGGGGFGGGGGGGFGGGGGG5BEDGGGGGGGGGGGGGGGGDGGGGGGGGGGCDFGGCGGGGGCDGDFGGFFF47>FFFFEFEFFFF>?A4(2(8:FFFF>FEFFFF9>?66?BF?>F 37 | @M05722:45:000000000-LHCCL:1:1101:21825:3431 1:N:0:9 38 | ACTATCCGCATATGCTGGCGCCGGGCCAGAAACAGCGTCTGGGGCTGGCGCGCGCGCTAATCCTACGTCCTAAAGTGATCATCTGTGATGAAGCGCTGGCCTCGCTGGATATGTCGATGCGCTCGCAGCTGGTTAACCTGATGCTCGAACTGCAGGAAAAACAGGGGATCTCGTATATTTACGTGACCCAGCATTTAGGCATGATGAAACA 39 | + 40 | @CCCCFGGGEFGGGCFADC6@CC@FFGGGGCDGGGGG@FGFG8FGGEGGGGGGGGGGGGGFCDEFGGCEFCFFDGGFGGEFFA@FFC9EBAFFFGGGGDGGGGGGGGGF9FFGGGGFEFCEGEGGGGGGGFGGGFGGFGGC=BCFCCFGGFFFDGGGCGFGGFG77CE8CFFGCE8EGGGGFGGGGFCGGGGGFGGGGG?6>B>?3 45 | @M05722:45:000000000-LHCCL:1:1101:21540:3758 1:N:0:9 46 | CGCCAATCAGCATCAGCAGGTATCAGTTAAAGCCGCTGTTACCGAACAGCGTTTTGTAGTTGCTGATAAGCCGGGTTTTCGGTGCGCCGGTCGGACGCGTTTCCGGCATCCAGCGCGCCATGCTGAAGGTGACGCCTGCGCAGAGCACCAGCAGGAAACCGTAGCAGGCGCGCCAGTTGATGAGTGTCTCCAGCACGCCGCCAATCAGAGGCGCCAGCAGCGGGCTCACCAGAATACCCATATTTAACAGA 47 | + 48 | CCCCCGGGGCFFGGGFGFGGGG,CFEC:CBFGGGG7FDFCG>E@>F@FGGGGGGDFGG<<=CFFCCFGGGGGG,,,@FGG+8>EG@ECDFG9*1:CEDEB559@FF;9AFG+CGGGGCC?GE=GGGCDEGGGF***1/))/7CD:C7@>BF>?7>F4@@FDFGGGGGGGGGGGGFFGGEEGGGGGGGGGGGGGG>GGGCFEEGG*CEGGGGGGGGGGGGGDGEGGGGGGGGGDFGGGGGGGG@FFGGEGGGGGCFCFGGGGGGDGGDGCDGGFGBBDDF@BEFFDBED?FF??BFF4@FFFFFFFF@ 65 | @M05722:45:000000000-LHCCL:2:1101:23071:4268 1:N:0:9 66 | GCGCGATCTCAATGCTCTGACCATAGTTTTTTGCCGTTTTCGGCGCATTGTCCGCCGGCTGAAGGGCTTCCGTGTGGTTGGCATACAGCGACACCGTTTGCCACGGCTTGTAGACCAGGCCAAAAGTGGGCATCCAGCGACTTTCGGTAAAGCTGGAGGAGGTGGTCTCCAGGCCCGTCTCGTTGCTGTAGTTACGAATCACCACCTTCTGATTGCGCGCCGCGGCGGTAAATAGCAGCGTATCGTCAAAG 67 | + 68 | C<<8FFFGGFFGGGGGFGEFBFGGFGDCCGGCFE,AFGGGG7BFGGGGGGGGGGFGGGGFGGGGGGGFGGGGGGFGGGFGDGGGEGFEC@DDFCGGGDG55CFGFGCGDFFGGFFFGEE57:DDFGFDGGGGGGCF=DGDGEGFEFFDEF>BEFFFFFFF?0,43:EFGGEGGGGGGG5AEGGGGFGGGDGFCFF7FGGGGGGGD5 73 | @M05722:45:000000000-LHCCL:2:1101:11189:4446 1:N:0:9 74 | GCTATCAGCCAGACCGCATTCAGCGGGGTGACCAGTACGCTGTAGTGCACCAGCCGTAGGCTGCCCATCGCCGGGAAGCCACGAACCAGCACCTGCACGCCAGCCAGCAGCGCGCACAGGCTACCCAGTGCGCCACCGATACCGGCAATCACACCGCTCAGCGATTTTTTCAGCGACAGTAAAAAGGCGAATACCGCGGCAAAAGCAAACCACGCCACCGCGCTATTAATTAAGGTCATCACGCTCATTT 75 | + 76 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGGGGFGCDGGGGGFGFFFFFFFFFFFFFFFFFFF9?FFFBFFFF 77 | @M05722:45:000000000-LHCCL:2:1101:13328:4518 1:N:0:9 78 | CCTTGGGGTTGACCCCACGGAAGCACCGCCGCGTCTTAATCTCAGCGCGCCGCGCACCATTGAACAGCCCTACGTTTGCATCGCCGTGCAGTCGACCTGCCAGGCCAAGTTCTGGAACAACGGCCACGGCTGGACGGAGGTGGTGGCCCACCTGAAATCGCTTGGCTATCGGGTGCTGTGTATTGACCGTGAGCCCACCACAGGGCACGGCTTCGTCTGGAACCACATCCCCTACGGCGCAGAAGACTTCA 79 | + 80 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGFGF@FFFFFFFFFFFFFFFFFFF>FFFFFFF) 81 | @M05722:45:000000000-LHCCL:2:1101:24115:4677 1:N:0:9 82 | CCTTTAAACAGCGGTAAATCTTCCAGCGCCAGTCCACCGGTGACGGTGACTTTAAAGCCCATATCCGCCAGGCGTTTGATGGCGCTGATATCCGCCTCACTCCACGCCACGCCAGCCGCCTGCGCATCACGGCTGCGGTGATACACCACCTGCCCGATACCCGCCGCGCGC 83 | + 84 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGEGGGDGGGGGCEEGGGGGGGGGGGGGGGC@@CFFG@@FGGFFFGGGGGGGGGGGG@?CGGA9FBFEECCFCE7CCFGGGGGGGGGF@>FC77FEGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGG@E@EEE 85 | @M05722:45:000000000-LHCCL:2:1101:11908:4786 1:N:0:9 86 | CTTTAAAACCCCACGTGATGAAGTCAGCCTGACCGCTTCCTGGATGCCGCTTACCTTTGACCCGTCGGATAACGACGATAGCGCCATGAAGCAGCTGGATAAGCGTCACAGTACCGCCATGGCGGGCATGGCATGGTATCACCATGAAAAGTGGGGCAGCCTGAAGCTGTCGGCGGCGGCGGATGTTCTGGACAACAGCGATGGCTGGGTTGGCGAGCTGTCCTGGTTCCGCGTATTGCCGGTGGGCAAG 87 | + 88 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGFGCGFFGGGGGGGGGGGGGGGGFGGGGGGGFGGGGGDGGGGGGGGGGCFCFGGFGGFGFGGGGGGGGGGGGGGGGGGGGGGGGEGDGGGGGGGGGGGGGGGGGGGGDCFGGGGGG@EEGGGGD@FGGCFGGCEEGGC=CGDGGGGGGGGFFEGFFGFCEDGDGFGGCDGG<:>+EEGF<+DEEF8F,5CFFGGG@FE@:FCGFDF9FFGEGB 101 | @M05722:45:000000000-LHCCL:2:1101:6264:5384 1:N:0:9 102 | ATTGTATGCCAGTTCCCGCAGCGCACAGGGCACTGCGGGCCCGGACTTCCGTCTGGAAATGGAAGGTTTGCAGTACAGTGAAATCCCGATGCTGGCCGGCAGCAATCCGGCGCTGATGCAGCAGGCGCTACGCGCGGTCAACAACGACTACTCGCTGGCGCGCCTGTACGCAATGGGCGTCGATGCCTGGGCGCTGGCGAATCATTTCTCTCAGATGCGCGGTGTACCGGGCTTTGAACTGAACGGCAACA 103 | + 104 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCGGGGGGGGGGGGGGGGGFGGGGFGFFFFFFFFE>:@@FAAFDFD7>BF69>BB2 105 | @M05722:45:000000000-LHCCL:2:1101:26553:5391 1:N:0:9 106 | GTATAGCGCGCTATTTGCGTGGAGCTGAGCACGCCAATGCCGCTAAACGCGCGCAATTCGCTGTAGGCGCTGGCGCTATTTTGCGTACTTTTTTCCTGTGAGGCATAGAGGTCATAATTGAGCACCAGCC 107 | + 108 | CCCCCGDGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGE,CCGGGCGGGGGGGGGDGGGGGGGGGGGGEGGGGGGGGGFGGGGGGGGGGGGGGGGGG9?FFGGEEC8E8,,@FFGGGGGGGGGCGGGGGGGGGGCCCFC:+E,ABE:CE@FGF;C,@D7C>BA: 113 | @M05722:45:000000000-LHCCL:2:1101:22262:5649 1:N:0:9 114 | ACCATCCTCAGGCGGTGCTGGCCTGGCTGGCGCTGCTCCAGTGCGGCGCCCGCATTTTACCGCTCAACCCGCGTCTGCCCGCGTCTCTGGTCAGTGAACTGCTGCCGGGGCTGACCCTCAGCCATGTGCTTAATCTCGACGGCGAGGCGCACTACGATTTGCCTGTGCTGCGGCTGCGTAGCGCGCGCCAGTACCACGATGTCGGCTGGCAGGCTGAACGGCTGGCCTCCATGACGCTGACCTCCGGCTCG 115 | + 116 | CCC3BFFF@FFFB=:??EB>7:DFF>>024( 117 | @M05722:45:000000000-LHCCL:2:1101:8547:5781 1:N:0:9 118 | GTATGGATCTTCACCACCGGCGACGCGCAGGACGGCGAACGCCCGCTGGCGGTGCTGCTCGACGGTCAGTTCTGGGCCGAAAGCATGCCGGTATGGCCTGCGCTGACAGCGCTCACTCGCGAAGGCAAACTGCCTGCGGCGGTGTATGTGCTGATTGACGTGATTGATAACGCCCACCGCAGCGTCGAACTGCCGTGCAATCCCGACTTCTGGCTGGCGGTACAGCATGAGCTGCTTTCCCAGGTACGGA 119 | + 120 | CCCCCGGGGGGGGGGGGGGGGEGGEGGGGGGGGGGGGGGGGGGGGGGGGGGDGEGGGGGGGGGGGGGGGGGGGGGCFGGGGGGGGGFGGGDGGGGGGGGFFGGGGCGGGGFGGGCGFGFGGEGGCFFGFGGFGGG,FF@F>BB?F?F692A*6A<EDGDECE5CC5@FC=FFGGGFGCGGGEDCDCDD7DFFGG>FF=G=FFFFF:E>EFFFFFFF001;99 125 | @M05722:45:000000000-LHCCL:2:1101:5108:6228 1:N:0:9 126 | CCGAACGCGCCGGGCAAATATTTCTACGTCTGGCTGGACGCGCCGATTGGCTACATGGGCTCCTTCAAGAACCTGTGCGACAAACGCGGCGACACCACCAGCTTCGATGAATACTGGAAGAAAGACTCTACCGCCGAGCTGTATCACTTTATCGGCAAAGATATCGTCTACTTCCACAGCCTGTTCTGGCCTGCCATGCTGGAAGGCAGCAACTTCCGTAAGCCGACCAACCTGTTTGTGCACGGCTACGT 127 | + 128 | CCCCCFGGEEGGGGGGGGGGGGGGFGGGGGG?FGGEGGGGGGGGGGGCFGGGEFGDFD7FGCEFGGGGCDGGGFGFEGDGGGGGGGGGDF7FGGGGGGGGGGDGG@FCEFFDFGGGFGFGGFFGGDFGFFEGG>EGCCGDDGGGGGGGGFGGGGDFGGGGGGGGGEFG>FGGGFF;FGFFFFCGFGFCGCFGFGGGGFGGFFFGGG4:CGFFGGGG=3DFDF4F>DFFFFFFF@FB2??726<:;BB03<4 129 | -------------------------------------------------------------------------------- /confindr_src/database_setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from confindr_src.methods import download_cgmlst_derived_data, download_mash_sketch, index 3 | from rauth import OAuth1Session 4 | from Bio import SeqIO 5 | import argparse 6 | import datetime 7 | import logging 8 | import shutil 9 | import glob 10 | import ssl 11 | import csv 12 | import re 13 | import os 14 | 15 | class RmlstRest(object): 16 | 17 | def get_session_token(self): 18 | session_request = OAuth1Session(self.consumer_key, 19 | self.consumer_secret, 20 | access_token=self.access_token, 21 | access_token_secret=self.access_secret) 22 | url = self.test_rest_url + '/oauth/get_session_token' 23 | if self.unverified: 24 | r = session_request.get(url, verify=False) 25 | else: 26 | r = session_request.get(url) 27 | if r.status_code == 200: 28 | self.session_token = r.json()['oauth_token'] 29 | self.session_secret = r.json()['oauth_token_secret'] 30 | else: 31 | logging.error('ERROR: Couldn\'t get a session token for rMLST database download. Check that your consumer ' 32 | 'secret and access token files have valid credentials and try again.') 33 | quit(code=1) 34 | 35 | def get_loci_and_scheme_url(self): 36 | session = OAuth1Session(self.consumer_key, 37 | self.consumer_secret, 38 | access_token=self.session_token, 39 | access_token_secret=self.session_secret) 40 | if self.unverified: 41 | r = session.get(self.test_rest_url, verify=False) 42 | else: 43 | r = session.get(self.test_rest_url) 44 | if r.status_code == 200 or r.status_code == 201: 45 | if re.search('json', r.headers['content-type'], flags=0): 46 | decoded = r.json() 47 | else: 48 | decoded = r.text 49 | # Extract the URLs from the returned data 50 | self.loci = decoded['loci'] 51 | self.profile = decoded['schemes'] 52 | else: 53 | logging.error('ERROR: Could not find URLs for rMLST download, they may have moved. Please open an issue ' 54 | 'at https://github.com/OLC-Bioinformatics/ConFindr/issues and we\'ll get things sorted out.') 55 | quit(code=1) 56 | 57 | def download_loci(self): 58 | session = OAuth1Session(self.consumer_key, 59 | self.consumer_secret, 60 | access_token=self.session_token, 61 | access_token_secret=self.session_secret) 62 | if self.unverified: 63 | r = session.get(self.loci, verify=False) 64 | else: 65 | r = session.get(self.loci) 66 | if r.status_code == 200 or r.status_code == 201: 67 | if re.search('json', r.headers['content-type'], flags=0): 68 | decoded = r.json() 69 | else: 70 | decoded = r.text 71 | # Extract all the URLs in the decoded dictionary under the key 'loci' 72 | for locus_url in decoded['loci']: 73 | output_file = os.path.join(self.output_folder, '{}.tfa'.format(os.path.split(locus_url)[1])) 74 | logging.info('Downloading {}...'.format(os.path.split(locus_url)[1])) 75 | with open(output_file, 'w') as f: 76 | if self.unverified: 77 | download = session.get(locus_url + '/alleles_fasta', verify=False) 78 | else: 79 | download = session.get(locus_url + '/alleles_fasta') 80 | if download.status_code == 200 or download.status_code == 201: 81 | if re.search('json', download.headers['content-type'], flags=0): 82 | decoded = download.json() 83 | else: 84 | decoded = download.text 85 | with open(output_file, 'w') as locus_fasta: 86 | locus_fasta.write(decoded) 87 | 88 | else: 89 | logging.error('ERROR: Could not find URLs for rMLST download, they may have moved. Please open an issue ' 90 | 'at https://github.com/OLC-Bioinformatics/ConFindr/issues and we\'ll get things sorted out.') 91 | quit(code=1) 92 | 93 | def download_profile(self): 94 | profile_file = os.path.join(self.output_folder, 'profiles.txt') 95 | session = OAuth1Session(self.consumer_key, 96 | self.consumer_secret, 97 | access_token=self.session_token, 98 | access_token_secret=self.session_secret) 99 | if self.unverified: 100 | r = session.get(self.profile + '/1/profiles_csv', verify=False) 101 | else: 102 | r = session.get(self.profile + '/1/profiles_csv') 103 | logging.info('Downloading rMLST profiles...') 104 | if r.status_code == 200 or r.status_code == 201: 105 | if re.search('json', r.headers['content-type'], flags=0): 106 | decoded = r.json() 107 | else: 108 | decoded = r.text 109 | # Write the profile file to disk 110 | with open(profile_file, 'w') as profile: 111 | profile.write(decoded) 112 | 113 | def get_request_token(self): 114 | session = OAuth1Session(consumer_key=self.consumer_key, 115 | consumer_secret=self.consumer_secret) 116 | # Use the test URL in the GET request 117 | r = session.request(method='GET', 118 | url=self.request_token_url, 119 | params={'oauth_callback': 'oob'}) 120 | if r.status_code == 200: 121 | self.request_token = r.json()['oauth_token'] 122 | self.request_secret = r.json()['oauth_token_secret'] 123 | 124 | def get_access_token(self): 125 | authorize_url = self.test_web_url + '&page=authorizeClient&oauth_token=' + self.request_token 126 | print('Visit this URL in your browser: ' + authorize_url) 127 | verifier = input('Enter oauth_verifier from browser: ') 128 | session_request = OAuth1Session(consumer_key=self.consumer_key, 129 | consumer_secret=self.consumer_secret, 130 | access_token=self.request_token, 131 | access_token_secret=self.request_secret) 132 | # Perform a GET request with the appropriate keys and tokens 133 | if self.unverified: 134 | r = session_request.get(self.access_token_url, verify=False, 135 | params={ 136 | 'oauth_verifier': verifier 137 | }) 138 | else: 139 | r = session_request.get(self.access_token_url, 140 | params={ 141 | 'oauth_verifier': verifier 142 | }) 143 | # If the status code is '200' (OK), proceed 144 | if r.status_code == 200: 145 | # Save the JSON-decoded token secret and token 146 | self.access_token = r.json()['oauth_token'] 147 | self.access_secret = r.json()['oauth_token_secret'] 148 | 149 | def __init__(self, consumer_secret_file, output_folder, unverified=False): 150 | self.test_rest_url = 'https://rest.pubmlst.org/db/pubmlst_rmlst_seqdef' 151 | self.test_web_url = 'https://pubmlst.org/cgi-bin/bigsdb/bigsdb.pl?db=pubmlst_rmlst_seqdef' 152 | self.request_token_url = self.test_rest_url + '/oauth/get_request_token' 153 | self.access_token_url = self.test_rest_url + '/oauth/get_access_token' 154 | self.authorize_url = self.test_web_url + '&page=authorizeClient' 155 | self.output_folder = output_folder 156 | self.unverified = unverified 157 | 158 | # Get the consumer secret set up. 159 | if not os.path.isfile(consumer_secret_file): 160 | logging.error('ERROR: Could not find consumer secret file. Please make sure the file you specified ' 161 | '({}) exists and try again.'.format(consumer_secret_file)) 162 | quit(code=1) 163 | with open(consumer_secret_file) as f: 164 | lines = f.readlines() 165 | try: 166 | self.consumer_key = lines[0].rstrip() 167 | self.consumer_secret = lines[1].rstrip() 168 | except IndexError: 169 | logging.error('ERROR: Could not parse your consumer secret file. File should have supplied consumer key ' 170 | 'on first line, and consumer secret on the second line.') 171 | quit(code=1) 172 | 173 | self.session_secret = str() 174 | self.session_token = str() 175 | self.loci = str() 176 | self.profile = str() 177 | self.request_token = str() 178 | self.request_secret = str() 179 | self.access_token = str() 180 | self.access_secret = str() 181 | 182 | 183 | def create_gene_allele_file(profiles_file, gene_allele_file): 184 | genus_allele_info = dict() 185 | genera = set() 186 | with open(profiles_file) as tsvfile: 187 | reader = csv.DictReader(tsvfile, delimiter='\t') 188 | for row in reader: 189 | genus = row['genus'] 190 | # If the genus is uncertain e.g. Escherichia/Shigella, split on the /, and use Escherichia as the genus 191 | if '/' in genus: 192 | genus = genus.split('/')[0] 193 | genera.add(genus) 194 | if genus not in genus_allele_info: 195 | genus_allele_info[genus] = list() 196 | for i in range(1, 66): 197 | if i < 10: 198 | gene = 'BACT00000' + str(i) 199 | else: 200 | gene = 'BACT0000' + str(i) 201 | if gene in row: 202 | allele_number = row[gene] 203 | gene_allele = '{}_{}'.format(gene, allele_number) 204 | if allele_number != 'N' and gene_allele not in genus_allele_info[genus]: 205 | genus_allele_info[genus].append(gene_allele) 206 | with open(gene_allele_file, 'w') as f: 207 | for genus in genus_allele_info: 208 | f.write(str(genus) + ':') 209 | for allele in genus_allele_info[genus]: 210 | f.write(str(allele) + ',') 211 | f.write('\n') 212 | return genera 213 | 214 | 215 | def setup_confindr_database(output_folder, consumer_secret, index_databases=False, unverified=False): 216 | # Go through the REST API in order to get profiles downloaded. 217 | rmlst_rest = RmlstRest(consumer_secret_file=consumer_secret, 218 | output_folder=output_folder, unverified=unverified) 219 | rmlst_rest.get_request_token() 220 | rmlst_rest.get_access_token() 221 | rmlst_rest.get_session_token() 222 | rmlst_rest.get_loci_and_scheme_url() 223 | rmlst_rest.download_loci() 224 | rmlst_rest.download_profile() 225 | 226 | # With the sequences downloaded, make a file of all rMLST sequences combined. 227 | logging.info('Combining rMLST files...') 228 | with open(os.path.join(output_folder, 'rMLST_combined.fasta'), 'w') as f: 229 | locus_files = sorted(glob.glob(os.path.join(output_folder, 'BACT*.tfa'))) 230 | for locus_file in locus_files: 231 | for record in SeqIO.parse(locus_file, 'fasta'): 232 | record.id = record.id.replace('-', '_') 233 | try: 234 | record.seq._data = record.seq._data.replace('-', '').replace('N', '') 235 | except TypeError: 236 | record.seq._data = record.seq._data.replace(b'-', b'').replace(b'N', b'') 237 | 238 | # If the entire FASTA sequence is encoded in byte-like 239 | # formatting (b' at the beginning and ' at the end of the 240 | # sequence), fix: 241 | if record.seq._data[0:2] == "b'" and record.seq._data[-1] == "'": 242 | record.seq._data = record.seq._data.replace("b'", "").replace("'", "") 243 | 244 | record.name = '' 245 | record.description = '' 246 | SeqIO.write(record, f, 'fasta') 247 | # Clean up individual file. 248 | try: 249 | os.remove(locus_file) 250 | except OSError: 251 | logging.warning('WARNING: Could not delete {}. This won\'t affect ConFindr performance, but ' 252 | ' you may want to delete it to save on disk space.'.format(locus_file)) 253 | 254 | logging.info('Assigning alleles to genera...') 255 | # Parse profiles so that we know what alleles are found with each genus. 256 | genera = create_gene_allele_file(profiles_file=os.path.join(output_folder, 'profiles.txt'), 257 | gene_allele_file=os.path.join(output_folder, 'gene_allele.txt')) 258 | if index_databases: 259 | index(output_folder=output_folder, 260 | genera=sorted(list(genera)), 261 | cgderived=False) 262 | 263 | 264 | def main(): 265 | logging.basicConfig(format='\033[92m \033[1m %(asctime)s \033[0m %(message)s ', 266 | level=logging.INFO, 267 | datefmt='%Y-%m-%d %H:%M:%S') 268 | parser = argparse.ArgumentParser() 269 | parser.add_argument('-o', '--output_folder', 270 | default=os.environ.get('CONFINDR_DB', os.path.expanduser('~/.confindr_db')), 271 | help='Path to download databases to - if folder does not exist, will be created. If folder does' 272 | ' exist, will be deleted and updated sequences downloaded. Defaults to ~/.confindr_db, or ' 273 | 'the CONFINDR_DB environmental variable.') 274 | parser.add_argument('-s', '--secret_file', 275 | type=str, 276 | help='Path to consumer secret file for rMLST database.') 277 | parser.add_argument('-i', '--index_databases', 278 | action='store_true', 279 | help='Enable this option if you are installing the databases to a drive that will be read-only ' 280 | 'after the installation. The script will create and index all the necessary genus-specific' 281 | ' database files. Note that this is very slow for the rMLST database.') 282 | parser.add_argument('-u', '--unverified', 283 | action='store_true', 284 | help="Enable this option if you plan on running ConFindr behind a firewall and/or have a self- " 285 | "signed certificate. Adds 'verify=False' during session requests.") 286 | args = parser.parse_args() 287 | if os.path.isdir(args.output_folder): 288 | logging.info('Removing old databases...') 289 | shutil.rmtree(args.output_folder) 290 | os.makedirs(args.output_folder) 291 | if args.unverified: 292 | ssl._create_default_https_context = ssl._create_unverified_context 293 | download_cgmlst_derived_data(args.output_folder) 294 | if args.secret_file is None: 295 | logging.warning('WARNING: Without an rMLST secret file, data will only be downloaded for Escherichia, ' 296 | 'Salmonella, and Listeria. See ' 297 | 'https://olc-bioinformatics.github.io/ConFindr/install/#downloading-confindr-databases for ' 298 | 'instructions on how to get access to rMLST databases so ConFindr can be used for other species' 299 | ' as well') 300 | else: 301 | setup_confindr_database(output_folder=args.output_folder, 302 | consumer_secret=args.secret_file, 303 | index_databases=args.index_databases, 304 | unverified=args.unverified) 305 | download_mash_sketch(args.output_folder) 306 | current_year = datetime.datetime.utcnow().year 307 | current_month = datetime.datetime.utcnow().month 308 | current_day = datetime.datetime.utcnow().day 309 | with open(os.path.join(args.output_folder, 'download_date.txt'), 'w') as f: 310 | f.write('{}-{}-{}'.format(current_year, current_month, current_day)) 311 | logging.info('Done downloading ConFindr databases!') 312 | 313 | 314 | if __name__ == '__main__': 315 | main() 316 | -------------------------------------------------------------------------------- /tests/real_fastqs/miseq_casava_sra_R2.fastq: -------------------------------------------------------------------------------- 1 | @SRR31134600.123 M05722:45:000000000-LHCCL:1:1101:22781:2332/2 2 | GGCGACGCCAGACGAAATATTGGCTAATAAAAAACTGAGCGCAATAAAGCTTTTGCCGTATTTAATTCGCCGCAACCCGGCGGCAAACAGCAGCGAAGCGATAAAGACAATCGGGATATACCACATCAGAAAGCGACAGATATTCCACATCGACGTCCAGAAAAGCGCATCATTGACCACGCCAAGCAGGTTTTGTAGCCCCGAAAAACGGGGCGTACCGATAAACTGCCATTGGGTCACGCTCAGCCCC 3 | + 4 | CCCCCGCFGDGCFGGGGGGGGDFC@@E9FFFEDC@FG9FFFGEFFFEGGGG9FEGGGG@@FGCFDDFGGGC+>FFGFGGDGGGFGGGGGGGF=CECFGGGDDGBA,AFGCFGDEC@GDGGFDC@@E7B99D8BF88CGGFEGGGGFGFGGGG6C8;9CEDFC,8C<=DD<7FGF:+@CGFFEABBA>A;30;6=8:C66)4<9>>(41:9011,3719?F:(3(6794,1)604((- 5 | @SRR31134600.173 M05722:45:000000000-LHCCL:1:1101:17124:2405/2 6 | GAGAATTTCATCGGCAATCGGCGGCGGTGAGCAGAGCACCACCTTTTTACAGCCGGCAATACGTGCCGGGGTAGCCAGCATTAACACCGTTGAAAACAGCGGGGCTGAACCGCCAGGAATATAGAGACCCACGGACGCGATTGGGCGGGTAACCTGCTGGCAGCGCACGCCCGGCTGGGTTTCGATATCCACCGGCGGCAGAATCTGCGCGGTGTGGAAGGGATCAATATTGCCCACCGCCGCCTGCATGG 7 | + 8 | CCCCC9FGGGGGG@GGGGGFGGGGGGEFEFGFGFGGGGEFGGGG@,EEFFCFGGGDEGGGGGGGGGFCECCECEGGGFGGBFGGGGGGGFGGCFFGGGGFECFFGGGGGGGG7EGGGCDFGGGCGGFEEF>EGGGGEGDGE5@FGGGGGCEGGFFGCGGGEG=3=DGDG373DDD)97>:5)@:>FFEFF@FFFB>F31(7:(690(8?14>: 9 | @SRR31134600.194 M05722:45:000000000-LHCCL:1:1101:9672:2430/2 10 | CCCTTTACGGTACCGGGCAACTGCCGAAATTTGCCGGCGACATGTTCCATACTCGTCCGCTGGAAGAAGAAGCTGACAGCAGCCACTATGCGCTGATCCCAACGGCGGAAGTGCCGCTGACTAACCTCGTTCTCGATGACATCATCGCCGAAGACGATCTGCCGATCAAACTGATCTCACACACGCCGTGCTTCCTCACTGAATCGGGTTCCTACGGTCGTGACACGCGTGGTCTGATCCGTATGCACAC 11 | + 12 | CC>,@,,7>E7@E:*@EEC8C?>EGEEC99?DDEC47*::?+@+;D69A)58>FFB5A5:83).+.:A2<)61491<624),8:?<942016.(-2440,>):46A4(38(1))-.. 13 | @SRR31134600.239 M05722:45:000000000-LHCCL:1:1101:8972:2506/2 14 | GATCGTACGCGTACCGACGGAGTAGCCCGCTTCCATGGCGTCTAACGAACTCTGCGCAGAGACAACGGCCTGTTTGTAGGCGTTGATGCTGCTGATAGACGCGTTGACGTTGTTGAAGGAGGAACGCACGGTCTGCACTACGGAACGGTGCGCGCTTTCCAGCTGCTCGCTGGCGCCGACGAAGTTGTACTGCGCCTGTTTCACCTGCGAGTTCACCATCCCGCCCTGGTACAGCGGC 15 | + 16 | @8ACCGGGGGEGGGGFDCFFGGGGGG9FFGGGGGGGFEFFFCGGGFFF>>146>BDABB?0966>FFFFAF<7>B9 17 | @SRR31134600.249 M05722:45:000000000-LHCCL:1:1101:6726:2518/2 18 | TCTTTGCTGCAGCCCGTTAATGAGTAGCCTTTAATCTGGACATTCTTGTAACCGCTATCGACAAGAACTTTTTTTGCTTCATTGGCATCGGTGCAACCAGATGCGGCAAATACGACCAATTCGAGTAACATCTTCACTTTCCCCCTCCCCCCCCCCTACTCCCCCCGCTCAATTCCCCCTTCCCCTTTTATGTCCTCTCCTCTTTCTATTTCTCCCCTTTTCTTCTTTATCTCCGCCCCCTCCCCATTCT 19 | + 20 | <@CCCCEEDF8EE,C+,,9,,9,94>DF,>==AD9D,+@@E6?CE>CE87*,,41,*@?*/)))*2++1*2)0))02)2./*+1+3+1).1((.(1/).))11.)1))/,(.)-65)-...)))...(,(-((,(-,((.)-- 21 | @SRR31134600.638 M05722:45:000000000-LHCCL:1:1101:13069:3053/2 22 | GTACGTAATCATCTGCGGAGATGAATTCATCATAAAGCTTACGGGCAGGATTATGAATACGCGTATGCCAGTAGAAACGCGACCATTTCTGTTCTTTGGCTTCATTTAGCACGAAATGCATGAGTTGCCGGGCAATGCCCAGGCCACGAAATGACGGGTCCACGAACAGGTCTTCCAGATAACAGATGGGTTGGGTCACCCACGTGCCTTCGTGTAATACGCACAGGGCAAAACCGGTCACGCGCCCTT 23 | + 24 | CCCCCGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGG?@FGGGGGGGGGGGGGGGGGGDGGGDGGGGGGFFGGGGDFGFFFFFFFFFFFFFBFFFFFEEFDFFFFDD4C<@@FFDEEFFFFFFFF@@@F16637>>FFF??<11>F02 25 | @SRR31134600.693 M05722:45:000000000-LHCCL:1:1101:8021:3120/2 26 | GTTGTGACGCGGGAAGGAAGCCACTTGGCGAACCGCACCGTTTGCCAGCGTCTGGATAACCAGCGCCGAACGGCCGCTTTCGAAGGTGACGTAAGCCAGTTTCGAGCCGTCCGGAGACCACGCCGGAGACATCAGCGGCTGCGGAGAACGGTGAACGACAAACTGGTTGTAACCATCGTAGTCAGAAACGCGCAGTTCGTACGGGAACTGACCGCCGTTGGTCTGCACGACGTAGGCAATACGGGTACGGA 27 | + 28 | CAB@CF:CEFFGDCFEGGGFGGGGGGGG9FF@CFDBFC@CC:FBFGF=FC:4BC=FGGGGGFFGGFGG<5EFCFEFCCG7F=FG@:FFC:EGGGGGGDFC,DFCGGGGDG>CC5EGCFEGGGGGGCEGGGGG7?DGCCDGGGGG4CDGFFFFFFFFFF3@DDDF?AFF6??06>:>B07A>?>FFFFB0:?::3( 29 | @SRR31134600.765 M05722:45:000000000-LHCCL:1:1101:7487:3197/2 30 | GTTACTACCAGTTTAAACAGCGTCTGTAGACTATGATTGAGTAAACTTTGTCTCGTGGGGTTATCGCGATATGCGCCTTGTGCAGCTTTCTCGTCATTCGATTGCTTTTCCCTCACCTGAGGGCGCATTACGTGAACCAAACGGACTGCTGGCGTTAGGCGGCGATCTCGGCCCGGCGCGCCTGTTGATGGCCTATCAACGCGTGATCTTCCCGTGGTTTTCACCCGGCGACCCGATCCTCTGGTGGTCG 31 | + 32 | CCCCCGGGFGGFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDDGGGGGGGGEGGEFGGGGGGGGGGGGGGGFGGGGGGGFFGGGGGGGGGFGGGGGFCGFGGGGGGGGGGGD@FGGGGGGGGGGGGE8ECEGFGGEGGGDGGGGGGGGGGGGGGGCC*3;DGGFFFFFFFFF+;AFFF@3=C7@FF@(4:1A;FFFF>;6;F9BBF:AFF?)1 33 | @SRR31134600.792 M05722:45:000000000-LHCCL:1:1101:7839:3227/2 34 | CCTTTCTGTACAGCAACGTTTTCTGCTGGCCGTACCAGCCGTACGAGGAAAATTATGAGACGCCTTCCCGTGTTTTTTGTCCTGGACTGTTCAGAGTCCATGATTGGTGAAAACCTGAAAAAAATGACTGATGGTCTGCAAATGATCGTCGGAGATTTAAGAAAGGATCCACACGCACTTGAAACAGCCTGGGTCTCGGTAATCGCATTTGCCGGTGTAGCCCGTACGATTGTACCTCTTCACGAAATTG 35 | + 36 | CCCCCCGFGFFGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGDGGGGGGGGGFGGGEGGGGGFGGEFFGGGGGGGGGFGGGGGGGGGGGDGFGEGGGGGFGEEFGGFDFCFGGGFGGGGFGGGGGGGGGGGCGGGDFGGGGGGGGGGFFFGGDCDGGGFFFFFBFFA59ACFFDCCFBADAECB@D2@?E4BFB=@F=EF3:>B>?B0(6;4?F75 37 | @SRR31134600.946 M05722:45:000000000-LHCCL:1:1101:21825:3431/2 38 | TGTTTCATCATGCCTAAATGCTGGGTCACGTAAATATACGAGATCCCCTGTTTTTCCTGCAGTTCGAGCATCAGGTTAACCAGCTGCGAGCGCATCGACATATCCAGCGAGGCCAGCGCTTCATCACAGATGATCACTTTAGGACGTAGGATTAGCGCGCGCGCCAGCCCCATACGCTGTTTCTGGCCCGGCGCCAGCATATGCGGATAGT 39 | + 40 | 8A88,CEFFEDF9F9FFFA;DFDA3>9>FD<47566:11<(9 41 | @SRR31134600.1028 M05722:45:000000000-LHCCL:1:1101:15030:3498/2 42 | CATCAGCACCGCGCAGTTAAGCTCCTGGCGAAGCTGATTAATCAGATCGTATAGCGCCACCTGGCCGTTGACGTCAACGCCCTGGGTGGGTTCATCCAGCACCAGCAGCTGAGGCTTGTTTAACAGCGCACGCGCCAGCAGCACGCGCTGGGTTTCACCACCGGAGAGCTTCTGCATCGGTGCATCCTGCAGGTGCCCGGCCTGTACGCGCATCAGCGCGGGCAGAATATCTTCTTTGCGCGTTGCCGGGG 43 | + 44 | CCCCCGGGGGGGGGGGGGGGGD>EEGGGGGGGGGGD5EEGFEGG0;(24??BFFF;BFF(5<22)).-)<)4)3>>BBF59@FFGEC*>;;:EEGGEFEEGGFGDECF:+3+0**).08@FF))9>>)84?9)*54((7(-2(444((,3(-,(,(((462(((--40:4:666,81<0(311(( 49 | @SRR31134600.1334 M05722:45:000000000-LHCCL:1:1101:14547:3883/2 50 | ACGCTAAACAGCCGCTGCTGATTCGTACGCGCCGATTACTGGGGCTGTGGTGCTTTGCGTGGGCGACGATCCACCTGACCAGCTACAGTTTACTGGAGCTCGGCATCCGCAATGTGTCGCTATTAGGGCAGGAGATTGTTACCCGGCCGTATTTGTTGCTGGGTTTCGCCTGCTGGTTAATCCTGCTCGCGCTGGCGGTGACCTCTAC 51 | + 52 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGDGGGGGGGGGGGGGGGGFFFFFFFFFFAFFFAFFF>@FFFEFFFB< 53 | @SRR31134600.1373 M05722:45:000000000-LHCCL:1:1101:25476:3918/2 54 | TTTGAGAACAACCTGCGCATTATGGTCGTGGGCGACTTCGATGCCGATGGCGCCACCAGCACCGCGCTGAGCGTGCTGTCGCTGCGCGCCATGGGCTGTCGCGCCGTCGAGTATCTGGTGCCGAACCGTTTTGAAGACGGCTACGGTCTGAGCCCGGCAGTGGTCGATCAGGCCCACGCCCGCGGCCCGCAAATGATCATGACGGTCGATAACGGGATCTCTTCCCATCCGGGGGTCGATCTTGCCCATGA 55 | + 56 | @CCCCGGGGGDGGGG8FEGGG@FFGFFEG@CFFCEFGGGG?<C7CFGGE9=FDFADE+BC8B>CEGE5DC;EC*@C>F54D:?C79D,;8:*474=:+FGCDDG3))2)>>)<).)7)7)9<25=59A<>4>8<>B6(4)-4(((48>A>6A:<6(((2(-39?0(-(()-),.4(4- 57 | @SRR31134600.1442 M05722:45:000000000-LHCCL:1:1101:14095:3991/2 58 | CTACACCACCGATCCGCGCGTGGTGCCTGCGGCGAAACGTATTGATGAAATTGCCTTTGAAGAGGCCGCCGAGCTGGCGACCTTCGGCGCGAAGGTTCTGCATCCGGCCACGTTGCTGCCAGCCGTACGCAGCGACATTCCTGTGTTTGTCGGCTCCAGTAAAGATCCCAAAGCGGGCGGCACGCTGGTGTGCAACGAAACCGCCAATCCGCCGCTGTTCCGCGCGCTGGCGCTGCGCCGTAAGCAAACG 59 | + 60 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGFGGFGGGGGGGGGGGGDGGGGGGGGDGGGGGGGGGGGGGGFFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCGGGGGGGCGGGGGGGGFGCGGFGGGGGCFCGGFGGFGGGGGFFFFFFFEDFF>BFFADB;B?9BF6BBFFFFF1?F>FFFFFBFFFF:1?BBFFBBF3??FFBBBFF 61 | @SRR31134600.1598 M05722:45:000000000-LHCCL:1:1101:19331:4173/2 62 | ACGCTGAAAGATGCCTGTAACGAAGCGCTGCGCGACTGGTCTGGCAGCTATGAAACCGCGCACTATATGCTCGGCACCGCAGCAGGCCCGCACCCGTTCCCAACCATCGTGCGTGAATTCCAGCGCATGATTGGTGAAGAGACCAAAGTGCAAATCCGTGAAAAAGAAGGTCGTCTGCCGGATGCGGTTATTGCCTGCGTCGGCGGCGGTTCTAACGCCATCGGTATGTTCGCC 63 | + 64 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGEGGGGGGGGGGGGGGGGGGFDEGDFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEFGGGGGEGGGGGGGGGGGGGGGGFFFFDFFFFFFDFFFFFFFFFFFFBB>FBFFFFFDFFFFFF>FFFFFB?FFFFFFB 65 | @SRR31134600.1696 M05722:45:000000000-LHCCL:1:1101:23071:4268/2 66 | AACGTTGGCTATTCCGCGCAGATCCGCAGCGATGCCACCGCCTGGCGTATGTCGAACAGTAAAGCCAACCCGGTCGTTAATATTTACAACAACCATGATGTGAGCATGCCAGCGTACGCATCGGTGGGCGGTAACTATCACGATCCGCTGGTGACTGCGCGTAACCGTACTCAGGGCTGGCTGTTCAGCGATACCGTCGGATTCTTTGACGATACGCTGCTATTTACCGACGCGGCGCGCAAGCAGAAGGG 67 | + 68 | >7BC86BEGGGGACCFGGFGDCF@EC@CGGGGGGGGEED=CE4*CCFGGFC*CDFB45AF+;6@>3:A:;<(>690:1<<49:A)41:44?F0:>FA>)6)671(4141291(,-3-((47DG3C>@FF?FFBF:<757.,9>EF;(11)6AA>4< 73 | @SRR31134600.1842 M05722:45:000000000-LHCCL:1:1101:11189:4446/2 74 | CAACAAGGAGAGGTGAAATGAGCGTGATGACCTTAATTAATAGCGCGGTGGCGTGGTTTGCTTTTGCCGCGGTATTCGCCTTTTTACTGTCGCTGAAAAAATCGCTGAGCGGTGTGATTGCCGGTATCGGTGGCGCACTGGGTAGCCTGTGCGCGCTGCTGGCTGGCGTGCAGGTGCTGGTTCGTGGCTTCCCGGCGATGGGCAGCCTACGGCTGGTGCACTACAGCGTACTGGTCACCCCGCTGAATGCG 75 | + 76 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDCFGGGGGGGFFFFFFFFFFFFFFFFFFFFF:BFFFFFFF:?B>>FFBFFFFFFF?F7>081:A:A<9:>FFBBF>9?FAA1 77 | @SRR31134600.1900 M05722:45:000000000-LHCCL:1:1101:13328:4518/2 78 | GTCCCAGCAACCGCTGCAACCGTGGCTGCTGAACACGCGCCAGGGGGTGTAAAACTCGGCGTTCGGCAGGCTGAAGCCGCTAATCAGCACCACCGGTATTTTTGCCGCCCAGGCAAGCCACGACAGCCCGCTGGCCAGGCCGATAAAGAAGCTGGCGTGGCGAAGCAGATCAACCCGCGCCTGCAGCGGCAGGGCCCCGGTGCAGTCTTCTGCGCCGTAGGGGATGTGGTTCCAGACGAAGCCGTGCC 79 | + 80 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGFFGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGGGGGGDGGGGGGGGGGGGGFGGGGGGGGGGGGCEGDGGGGGGGFGGFGGGEGDDCEFGGGGGFGGGGGGGGGGGGFG4DGDGFGGGGFFFFFFFFFEEFFB@?:9DBFFF(:BFF>AFBB>6:?FF>BB?FFBBFFFFFBBFF:019:>F: 81 | @SRR31134600.2028 M05722:45:000000000-LHCCL:1:1101:24115:4677/2 82 | GCGCGCGGCGGGTATCGGGCAGGTGGTGTATCACCGCAGCCGTGATGCGCAGGCGGCTGGCGTGGCGTGGAGTGAGGCGGATATCAGCGCCATCAAACGCCTGGCGGATATGGGCTTTAAAGTCACCGTCACCGGTGGACTGGCGCTGGAAGATTTACCGCTGTTTAAAGG 83 | + 84 | CCCCCGEEEGGGGGGGGGGGDCGGFFGGFGGGGFGE@CEFGGGDGFGGGG7C=FGDGGGDGF:CEGGBFG:FFCFFCDEDGGGA=FFFGGEFEGCC 85 | @SRR31134600.2116 M05722:45:000000000-LHCCL:1:1101:11908:4786/2 86 | CGCCGGGACTCTGCGCCCGACACGCCATAGTAGTAGCGGTTAAAGCGGTCATCGTAATAATAGACACCCGCCGCGGGAATCACCGACAGCTTGCCCACCGGCAATACGCGGAACCAGGACAGCTCGCCAACCCAGCCATCGCTGTTGTCCAGAACATCCGCCGCCGCCGACAGCTTCAGGCTGCCCCACTTTTCATGGTGATACCATGCCATGCCCGCCATGGCGGTACTGTGACGCTTATCCAGCTGCTT 87 | + 88 | CEGEGGC@CEGFFDECEC:ECGGFEFCECE8CGG>CC>>EFF>FGDGGGGG@FGGFC78C6FCGGGGEDDDG<<3CD:CFGGFF45>F:5>>DDFFFFFAF)0942677A0>21:B96<2<612;1:AFF)6>B<>34.4:2 89 | @SRR31134600.2153 M05722:45:000000000-LHCCL:1:1101:14685:4816/2 90 | CATGAACGATTATCTGCCTGGCGAAACCGCCATCTGGCAGCGCATTGAAGGCACACTCAAGCAGGTGCTCGGCAGCTACGGTTACAGCGAAATCCGTTTGCCGATTGTAGAGCAGACCCCGTTATTCAAGCGCGCTATCGGTGAAGTCACCGACGTGGTTGAAAAAGAGATGTATACCTTTGAAGACCGCAACGGCGATAGCCTGACGCTGCGTCCGGAAGGTACTGCGGGCTGCGTACGCGCCGGCATCG 91 | + 92 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDFFGGGGGGGGGGGGGFFFFFFFFFDEBFFFFFFFFFFFFEFFFFFFFBBFF>9>?>FFFFBFFBFB9B>FFFF:;FF9BFFFF9B>2 93 | @SRR31134600.2259 M05722:45:000000000-LHCCL:1:1101:9465:4932/2 94 | GTGATAGAAACCAACTGCCTGATTGATGCGCTGGGCACGCATCCGCTGGCCTTTGGCAAACTGCCGCCGCTGATGAACGGCCTGACCCAGCAGGTGAAAGATTTTGAGCGTCTGACCATTGATGCCGCCGTATATGGCGATAAGCAAAAAGCGCTGCTGGCGCTGGTCGCCAACCCGCTGGTCGCCGA 95 | + 96 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGCGGGGGGFGGGGGGGGGGGGFGGFGGGGGGGDGGGGFGFGGFGGGGGGFGGGGGGGGGGDGGGGG>9DGGCFGGGDG4CDG4CEFFFF>BFF 97 | @SRR31134600.2675 M05722:45:000000000-LHCCL:1:1101:5895:5333/2 98 | ATCCAGCGGCGTGGTTTATCAAACGGATAGTACAGCGCGGCGAACATCTGATGCTGGCCAACGTCGGAGGTCACATAAGCATCGCCGTGGGTCAGCCTCCAGATGGCCTCAATGACCGCCTGCGGCTTTATCTGTTCACTTTTTTCGTCATACTTCAGGCA 99 | + 100 | 8CC,AFGEE7FFGGGDE,,,D,AF9=DD@C+=7,8>EEG,@DC;F>@;B;5@>E@8EGGEFC,;DFGCCCFG 101 | @SRR31134600.2726 M05722:45:000000000-LHCCL:1:1101:6264:5384/2 102 | GTCCTGATTCGCGCTCAGATCGCCGGTGTTGCCGTTCAGTTCAAAGCCCGGTACACCGCGCATCTGAGAGAAATGATTCGCCAGCGCCCAGGCATCGACGCCCATTGCGTACAGGCGCGCCAGCGAGTAGTCGTTGTTGACCGCGCGTAGCGCCTGCTGCATCAGCGCCGGATTGCTGCCGGCCAGCATCGGGATTTCACTGTACTGCAAACCTTCCATTTCCAGACGGAAGTCCGGGCCCGCAGTGCC 103 | + 104 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGAAGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGDGGGGDEFFFGGGCFGGGGGGGGGGGGFEGGGGGGGGGGGCE5=FDDGGFGGGGGGGGFGGGGGGDGGGGFGGFFDFFFFFFFFFEDEB7?FAB2>AFFFFFFFF?B?FFFFFFFAFBFF:BB?:F:>DBB;(39BBB>F(49 105 | @SRR31134600.2736 M05722:45:000000000-LHCCL:1:1101:26553:5391/2 106 | GGCTGGTGCTCAATTATGACCTCTATGCCTCACAGGAAAAAAGTACGCAAAATAGCGCCAGCGCCTACAGCGAATTGCGCGCGTTTAGCGGCATTGGCGTGCTCAGCTCCACGCAAATAGCGCGCTATAC 107 | + 108 | CCCCCGGEFFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGDFDEGGGGGGGGGGDFGGGGGFGCEGGGGGGGGEGGGGGGGGGGGDFDGCEGDBDFGGFEEFGGGGGGGGGGGGGGGGGG; 109 | @SRR31134600.2873 M05722:45:000000000-LHCCL:1:1101:5389:5526/2 110 | GTGGACAACCTGACCAACGCCGGTAACATTGTGTTTGTGCCGTCCCAGTGCACCTTCACGCCTCACACGCTTACCGCAACCAACCTTATTGGTAACGGCTCCACCATCACGCTCAATACCGTTCCGGTCTACAGCAGTTCACCGTTAGATAAAG 111 | + 112 | -A,6,C+,4+++++,3,,3,,733,3,+,6+3,6,,,3 113 | @SRR31134600.2992 M05722:45:000000000-LHCCL:1:1101:22262:5649/2 114 | CTCGCCAGATGCGCGGCGCAGCGGTGAACCGCCGCTTTTGGCAGCCCGGTCGAGCCGGAGGTCAGCGTCATGGAGGCCAGCCGTTCAGCCTGCCAGCCGACATCGTGGTACTGGCGCGCGCTACGCAGCCGCAGCACAGGCAAATCGTAGTGCGCCTCGCCGTCGAGATTAAGCACATGGCTGACGGTCAGCCCCGGCAGCAGGTCACTGACCAGAGCCGCGGGCAGACGCGGGTTGCGCGGTAAACTGCG 115 | + 116 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGDGEGDGGGG+@FGGFGGGGGCECF7FEEGGCFFFFGGGGCGGFCGGGFGCFEFFGFCFGFGGGGGGGDGGGDGGGFFGGGGCGGGGEFGEGGGGGE@DGDEGGGGGGGGGF*FFGGGGCFF@>>)83>>>44;B0>6DF>B?((48>:A)8?BF6?6((,(49>09?BFF;;>04(4(,179676<)6DEFFFFFFFBE69?F?(:D?<:?>?FFB>:67:7:?F>B>>B20,119>FF>B;>21:)622(-7(( 121 | @SRR31134600.3133 M05722:45:000000000-LHCCL:1:1101:14226:5786/2 122 | CCCTGCCAGCCGTCAACAACCTGAATACCGAACTGCATGTCACCTTCTTCTTTCACCAGGTTGAGCGTCCCTTTACCGGTTAACTGAATCAGCGACGATTTGCCCTGCAGGTTATTCAGCGTCAGCACGCCGTTATCGAGATCGATGTCGGTGCTCATTTCATCCAGACGCGTAGCGTTGTCATCGTTCTCCTGCGCGCGCACGTCGGTGCTGCGCTCAACGGCCTGCTGCACCAGCTTCTGGAAGTTCAT 123 | + 124 | CCCCCGGGFGGFGG7FDFAFGFGGCFGGFCEGGGGGFFFGGGGFGGGEEDEGFGCFG9FCFACF,CCFGGGGFGEGGGDGGDCGFG9:CC8+>EGCG68CGGGGF?+8DE?>B:FG8+@=CEFDFEGGFGGGFD89DG>D77?C>C*@FAFFFFFFFFBFAE@@)656(31:@0(609<<)5(54<)) 125 | @SRR31134600.3540 M05722:45:000000000-LHCCL:1:1101:5108:6228/2 126 | GTGCCTGCTCGTCAACGTATCGGTTCGCGACGTCGGCCAGCGCCATTATTTCACGAATCGCTTTACCGAATTCACGGCTGTCCCACGCTTCGCCAATGCTGGTTGCCGCGTCGGTGTAGGTTTTGTACATCTCTGGGTCAGCCAGTTCAGCGGACAGCACGCCGTCGAAACGCTTAGCGATAAAGCCAGCGTTACGGGAAGCCCGGTTCACCACTGTGTTCACGATATCGGCGTTGACGCGCTGGATGAAA 127 | + 128 | <@,BCDFFCDFFF,FGDC@,C;+FC,@FCEGGGGGDEGGGD>FFDG9FCCG<+8CEFGG@FFGG9,7>9EC:>>FFFGD8FGCAF7E5E>EEC,CGCC5)//*/*1><35/C*+)).0:+1+8A=FA)0)4((.4,,(4((-1,(642<13)65-,))6(,(-(:(909>1(4<2:(29((...2)6 129 | -------------------------------------------------------------------------------- /tests/real_fastqs/miseq_casava_sra_R1.fastq: -------------------------------------------------------------------------------- 1 | @SRR31134600.123 M05722:45:000000000-LHCCL:1:1101:22781:2332/1 2 | GAGATCCAGCGCAGCATGACGCAGATGCTGGAGCGCGTCATTTTCAACAATGATGCCCCAGCGAAGGCGCTGGAACAGTCGCAGCAGGAAATTGATAAGCTGCTGGCGAAATAAGGGGCGCGTCATGGCTGGATATGATTCCCGTACCGGAGGCCTGCTGGCCTCCACATGGATCGGTTATTCATTGCTGTTCTGGTTTTATCCGCTGGCCTGGCTGGCGGTGCTGAGCGTGACCCAATGGCAGTTTATCG 3 | + 4 | CCCCCGGGGGGGGGGGGB:@F?BF?00:7?BFAAF)( 5 | @SRR31134600.173 M05722:45:000000000-LHCCL:1:1101:17124:2405/1 6 | ACAAGGCCCTGCGTGAATACAGCGCAAAATTTGATAAAACGGAAGTCGCCGCACTGCGCGTCTCCGAGGATGAAATTCAGCAGGCAGGCGCGCGTCTGAGCGACGAGCTGAAGCAAGCCATGCAGGCGGCGGTGCGCAATATTGATACCTTCCACAACGCGCAGATTCTGCCGCCGGTGGATATCGAAACCCAGCCGGGCGTGCGCTGCCAGCAGGTTACCCGCCCAATCGCGTCCGTGGGTCTCTATATT 7 | + 8 | CCCCCGGCGGGGGGGGGGGDGGGGGGGDFGDCFGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGFFFEGGGGGGGDFGECFGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGDGGGGEGGGGG>CAFGFGGGGGGFGGGGGGGGGGGGG9:CFCEFGGGGGGGGG?F7FGGFCECEGGGGGGCGGGGGGGDGGGGG:4CGGGGFGFFE>B:@@FFFFEFBBFB>F3<:4:9942. 9 | @SRR31134600.194 M05722:45:000000000-LHCCL:1:1101:9672:2430/1 10 | GTCGAGGATTAACCGATAAATTCCAGACCGTTCATATATGGACGCAGCACCTCTGGTACTTCAATGCGGCCATCAGCCTGCTGGTAGTTTTCCAGCACGGCAACCAGCGTACGCCCTACCGCCAGCACAGAACCGTTGAGGGTAGGGACCAGGCGGGTTGTCTTATCGGACTTGCTGCGTCAGCGAGCCTGCATGCGGCGCGCCTGGAAATCACAGACGTTAGAGCAAGTGGGATTCTCGCGGGACGTATT 11 | + 12 | CCCCCGCBECFG7F+3CFD7FF:+:FDEG*>;;DEFDFC>:89B?FFBC**/8AEEGGG8CCFGGCGGG55CG=DF**97CF*7*:**)07.)099FF>*9?7@)(.*.8?BF(1(-2(5. 13 | @SRR31134600.239 M05722:45:000000000-LHCCL:1:1101:8972:2506/1 14 | GCCGCTGTACCAGGGCGGGATGGTGAACTCGCAGGTGAAACAGGCGCAGTACAACTTCGTCGGCGCCAGCGAGCAGCTGGAAAGCGCGCACCGTTCCGTAGTGCAGACCGTGCGTTCCTCCTTCAACAACGTCAACGCGTCTATCAGCAGCATCAACGCCTACAAACAGGCCGTTGTCTCTGCGCAGAGTTCGTTAGACGCCATGGAAGCGGGCTACTCCGTCGGTACGCGTACGATC 15 | + 16 | CCCCCGGGGGGFGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGFEFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGDGCGGGGGGGGGGGGGGGGGGGGGGGDDFGGGFGFFFFFFFFFEFFFF?:F 17 | @SRR31134600.249 M05722:45:000000000-LHCCL:1:1101:6726:2518/1 18 | CTACCCGGTAGTTATTACGCCGATCCAACTGGCTCTCTCTGCAAAATCCTCCGCACCACCTCAACCACAGTCCACTACCAGCGAAACGGTCATAACTGCATAGCCAGCATGATGCGGTTTAACGCAGATTTTGAGTATGTGGATGGCGCGGAGTTAAAGCAGATATGGGCAGACATCGAAACAGCAGAGCACATTAAACGGCTACGCGCTATTCAGCGGGTGGCTTAGGTGGGTGGGATGGGGAAAGTGAA 19 | + 20 | CCCCCDFDGCFGGGGGGGGGGGCGGGGGGGGGGGGGGGEGGGGFFFGGGGGGGEFGGGGGGGGGGGGGGGFGGGGGGGGGGGGD7FGEEGGGGGCGGA@BFCFCCFCEFGGFGGGE7FGGFFEFE@ECGDFGF@DFGF9FCFFGGFFEGFECEGFGAC;FCC@FGFD98>:C5BFGGC6CBCC@FDC?CE6F>EFFFFCE45?=FCCGGGGGGFF77<)5C>:) 21 | @SRR31134600.638 M05722:45:000000000-LHCCL:1:1101:13069:3053/1 22 | GTGGTAGAAGGGCGCGTGACCGGTTTTGCCCTGTGCGTATTACACGAAGGCACGTGGGTGACCCAACCCATCTGTTATCTGGAAGACCTGTTCGTGGACCCGTCATTTCGTGGCCTGGGCATTGCCCGGCAACTCATGCATTTCGTGCTAAATGAAGCCAAAGAACAGAAATGGTCGCGTTTCTACTGGCATACGCGTATTCATAATCCTGCCCGTAAGCTTTATGATGAATTCATCTCCGCAGATGATTA 23 | + 24 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGFGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGFGGGGGGGFGFFFFFFFFFFFFFFEFFFFFFFFB?AFFB4 25 | @SRR31134600.693 M05722:45:000000000-LHCCL:1:1101:8021:3120/1 26 | GGCCACACTGCCAGTGATGAAGTGTTCGAGAAGCTGACCAGCATCAAGGGAGCATTCCGTACCCGTATTGCCTACGTCGTGCAGACCAACGGCGGTCAGTTCCCGTACGAACTGCGCGTTTCTGACTACGATGGTTACAACCAGTTTGTCGTTCACCGTTCTCCGCAGCCGCTGATGTCTCCGGCGTGGTCTCCGGACGGCTCGAAACTGGCTTACGTCACCTTCGAAAGCGGCCGTTCGGCGCTGGTTAT 27 | + 28 | CCCCCGGEGGGGGFGGGGGGFFGGGGGGGGGGEFGGGGGGFGGGGGGGFEGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGFGGGFGGGGEG=FGGGGEGGGGGFEGGGCGGGGGGGFFGGFCFEFGGGGGG7FFFGGGGGG8>FFCGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGG=EGGGGDGGGGGGGGGGGGGFDCF@GFFFFFFFFFFD>@FFFFFFBFF9>0;7>A5 29 | @SRR31134600.765 M05722:45:000000000-LHCCL:1:1101:7487:3197/1 30 | GCGTAGTTGAGGGTGACATGATAGGGCGACTGGCGATGAAAACGCTTCATACTGCGGCTGAGGTGAAACTGTTCGGGCCACAAAACGGCACGGGGATCCGGCGACCACCAGAGGATCGGGTCGCCGGGTGAAAACCACGGGAAGATCCCGCGTTGATAGGCCATCAACAGGCGCGCCGGGCCGAGATCGCCGCCTAACGCCAGCAGTCCGTTTGGTTCACGTAATGCGCCCTCAGGTGAGGGAAAAGCAAT 31 | + 32 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCGGGDGGGGGGGGGGGGGGGGGGGGGGFGGFGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGFCFGGGGGGFGGGGGGGFGGGGGG5BEDGGGGGGGGGGGGGGGGDGGGGGGGGGGCDFGGCGGGGGCDGDFGGFFF47>FFFFEFEFFFF>?A4(2(8:FFFF>FEFFFF9>?66?BF?>F 37 | @SRR31134600.946 M05722:45:000000000-LHCCL:1:1101:21825:3431/1 38 | ACTATCCGCATATGCTGGCGCCGGGCCAGAAACAGCGTCTGGGGCTGGCGCGCGCGCTAATCCTACGTCCTAAAGTGATCATCTGTGATGAAGCGCTGGCCTCGCTGGATATGTCGATGCGCTCGCAGCTGGTTAACCTGATGCTCGAACTGCAGGAAAAACAGGGGATCTCGTATATTTACGTGACCCAGCATTTAGGCATGATGAAACA 39 | + 40 | @CCCCFGGGEFGGGCFADC6@CC@FFGGGGCDGGGGG@FGFG8FGGEGGGGGGGGGGGGGFCDEFGGCEFCFFDGGFGGEFFA@FFC9EBAFFFGGGGDGGGGGGGGGF9FFGGGGFEFCEGEGGGGGGGFGGGFGGFGGC=BCFCCFGGFFFDGGGCGFGGFG77CE8CFFGCE8EGGGGFGGGGFCGGGGGFGGGGG?6>B>?3 45 | @SRR31134600.1222 M05722:45:000000000-LHCCL:1:1101:21540:3758/1 46 | CGCCAATCAGCATCAGCAGGTATCAGTTAAAGCCGCTGTTACCGAACAGCGTTTTGTAGTTGCTGATAAGCCGGGTTTTCGGTGCGCCGGTCGGACGCGTTTCCGGCATCCAGCGCGCCATGCTGAAGGTGACGCCTGCGCAGAGCACCAGCAGGAAACCGTAGCAGGCGCGCCAGTTGATGAGTGTCTCCAGCACGCCGCCAATCAGAGGCGCCAGCAGCGGGCTCACCAGAATACCCATATTTAACAGA 47 | + 48 | CCCCCGGGGCFFGGGFGFGGGG,CFEC:CBFGGGG7FDFCG>E@>F@FGGGGGGDFGG<<=CFFCCFGGGGGG,,,@FGG+8>EG@ECDFG9*1:CEDEB559@FF;9AFG+CGGGGCC?GE=GGGCDEGGGF***1/))/7CD:C7@>BF>?7>F4@@FDFGGGGGGGGGGGGFFGGEEGGGGGGGGGGGGGG>GGGCFEEGG*CEGGGGGGGGGGGGGDGEGGGGGGGGGDFGGGGGGGG@FFGGEGGGGGCFCFGGGGGGDGGDGCDGGFGBBDDF@BEFFDBED?FF??BFF4@FFFFFFFF@ 65 | @SRR31134600.1696 M05722:45:000000000-LHCCL:1:1101:23071:4268/1 66 | GCGCGATCTCAATGCTCTGACCATAGTTTTTTGCCGTTTTCGGCGCATTGTCCGCCGGCTGAAGGGCTTCCGTGTGGTTGGCATACAGCGACACCGTTTGCCACGGCTTGTAGACCAGGCCAAAAGTGGGCATCCAGCGACTTTCGGTAAAGCTGGAGGAGGTGGTCTCCAGGCCCGTCTCGTTGCTGTAGTTACGAATCACCACCTTCTGATTGCGCGCCGCGGCGGTAAATAGCAGCGTATCGTCAAAG 67 | + 68 | C<<8FFFGGFFGGGGGFGEFBFGGFGDCCGGCFE,AFGGGG7BFGGGGGGGGGGFGGGGFGGGGGGGFGGGGGGFGGGFGDGGGEGFEC@DDFCGGGDG55CFGFGCGDFFGGFFFGEE57:DDFGFDGGGGGGCF=DGDGEGFEFFDEF>BEFFFFFFF?0,43:EFGGEGGGGGGG5AEGGGGFGGGDGFCFF7FGGGGGGGD5 73 | @SRR31134600.1842 M05722:45:000000000-LHCCL:1:1101:11189:4446/1 74 | GCTATCAGCCAGACCGCATTCAGCGGGGTGACCAGTACGCTGTAGTGCACCAGCCGTAGGCTGCCCATCGCCGGGAAGCCACGAACCAGCACCTGCACGCCAGCCAGCAGCGCGCACAGGCTACCCAGTGCGCCACCGATACCGGCAATCACACCGCTCAGCGATTTTTTCAGCGACAGTAAAAAGGCGAATACCGCGGCAAAAGCAAACCACGCCACCGCGCTATTAATTAAGGTCATCACGCTCATTT 75 | + 76 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGGGGFGCDGGGGGFGFFFFFFFFFFFFFFFFFFF9?FFFBFFFF 77 | @SRR31134600.1900 M05722:45:000000000-LHCCL:1:1101:13328:4518/1 78 | CCTTGGGGTTGACCCCACGGAAGCACCGCCGCGTCTTAATCTCAGCGCGCCGCGCACCATTGAACAGCCCTACGTTTGCATCGCCGTGCAGTCGACCTGCCAGGCCAAGTTCTGGAACAACGGCCACGGCTGGACGGAGGTGGTGGCCCACCTGAAATCGCTTGGCTATCGGGTGCTGTGTATTGACCGTGAGCCCACCACAGGGCACGGCTTCGTCTGGAACCACATCCCCTACGGCGCAGAAGACTTCA 79 | + 80 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGFGF@FFFFFFFFFFFFFFFFFFF>FFFFFFF) 81 | @SRR31134600.2028 M05722:45:000000000-LHCCL:1:1101:24115:4677/1 82 | CCTTTAAACAGCGGTAAATCTTCCAGCGCCAGTCCACCGGTGACGGTGACTTTAAAGCCCATATCCGCCAGGCGTTTGATGGCGCTGATATCCGCCTCACTCCACGCCACGCCAGCCGCCTGCGCATCACGGCTGCGGTGATACACCACCTGCCCGATACCCGCCGCGCGC 83 | + 84 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGEGGGDGGGGGCEEGGGGGGGGGGGGGGGC@@CFFG@@FGGFFFGGGGGGGGGGGG@?CGGA9FBFEECCFCE7CCFGGGGGGGGGF@>FC77FEGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGG@E@EEE 85 | @SRR31134600.2116 M05722:45:000000000-LHCCL:1:1101:11908:4786/1 86 | CTTTAAAACCCCACGTGATGAAGTCAGCCTGACCGCTTCCTGGATGCCGCTTACCTTTGACCCGTCGGATAACGACGATAGCGCCATGAAGCAGCTGGATAAGCGTCACAGTACCGCCATGGCGGGCATGGCATGGTATCACCATGAAAAGTGGGGCAGCCTGAAGCTGTCGGCGGCGGCGGATGTTCTGGACAACAGCGATGGCTGGGTTGGCGAGCTGTCCTGGTTCCGCGTATTGCCGGTGGGCAAG 87 | + 88 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGFGCGFFGGGGGGGGGGGGGGGGFGGGGGGGFGGGGGDGGGGGGGGGGCFCFGGFGGFGFGGGGGGGGGGGGGGGGGGGGGGGGEGDGGGGGGGGGGGGGGGGGGGGDCFGGGGGG@EEGGGGD@FGGCFGGCEEGGC=CGDGGGGGGGGFFEGFFGFCEDGDGFGGCDGG<:>+EEGF<+DEEF8F,5CFFGGG@FE@:FCGFDF9FFGEGB 101 | @SRR31134600.2726 M05722:45:000000000-LHCCL:1:1101:6264:5384/1 102 | ATTGTATGCCAGTTCCCGCAGCGCACAGGGCACTGCGGGCCCGGACTTCCGTCTGGAAATGGAAGGTTTGCAGTACAGTGAAATCCCGATGCTGGCCGGCAGCAATCCGGCGCTGATGCAGCAGGCGCTACGCGCGGTCAACAACGACTACTCGCTGGCGCGCCTGTACGCAATGGGCGTCGATGCCTGGGCGCTGGCGAATCATTTCTCTCAGATGCGCGGTGTACCGGGCTTTGAACTGAACGGCAACA 103 | + 104 | CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCGGGGGGGGGGGGGGGGGFGGGGFGFFFFFFFFE>:@@FAAFDFD7>BF69>BB2 105 | @SRR31134600.2736 M05722:45:000000000-LHCCL:1:1101:26553:5391/1 106 | GTATAGCGCGCTATTTGCGTGGAGCTGAGCACGCCAATGCCGCTAAACGCGCGCAATTCGCTGTAGGCGCTGGCGCTATTTTGCGTACTTTTTTCCTGTGAGGCATAGAGGTCATAATTGAGCACCAGCC 107 | + 108 | CCCCCGDGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGE,CCGGGCGGGGGGGGGDGGGGGGGGGGGGEGGGGGGGGGFGGGGGGGGGGGGGGGGGG9?FFGGEEC8E8,,@FFGGGGGGGGGCGGGGGGGGGGCCCFC:+E,ABE:CE@FGF;C,@D7C>BA: 113 | @SRR31134600.2992 M05722:45:000000000-LHCCL:1:1101:22262:5649/1 114 | ACCATCCTCAGGCGGTGCTGGCCTGGCTGGCGCTGCTCCAGTGCGGCGCCCGCATTTTACCGCTCAACCCGCGTCTGCCCGCGTCTCTGGTCAGTGAACTGCTGCCGGGGCTGACCCTCAGCCATGTGCTTAATCTCGACGGCGAGGCGCACTACGATTTGCCTGTGCTGCGGCTGCGTAGCGCGCGCCAGTACCACGATGTCGGCTGGCAGGCTGAACGGCTGGCCTCCATGACGCTGACCTCCGGCTCG 115 | + 116 | CCC3BFFF@FFFB=:??EB>7:DFF>>024( 117 | @SRR31134600.3126 M05722:45:000000000-LHCCL:1:1101:8547:5781/1 118 | GTATGGATCTTCACCACCGGCGACGCGCAGGACGGCGAACGCCCGCTGGCGGTGCTGCTCGACGGTCAGTTCTGGGCCGAAAGCATGCCGGTATGGCCTGCGCTGACAGCGCTCACTCGCGAAGGCAAACTGCCTGCGGCGGTGTATGTGCTGATTGACGTGATTGATAACGCCCACCGCAGCGTCGAACTGCCGTGCAATCCCGACTTCTGGCTGGCGGTACAGCATGAGCTGCTTTCCCAGGTACGGA 119 | + 120 | CCCCCGGGGGGGGGGGGGGGGEGGEGGGGGGGGGGGGGGGGGGGGGGGGGGDGEGGGGGGGGGGGGGGGGGGGGGCFGGGGGGGGGFGGGDGGGGGGGGFFGGGGCGGGGFGGGCGFGFGGEGGCFFGFGGFGGG,FF@F>BB?F?F692A*6A<EDGDECE5CC5@FC=FFGGGFGCGGGEDCDCDD7DFFGG>FF=G=FFFFF:E>EFFFFFFF001;99 125 | @SRR31134600.3540 M05722:45:000000000-LHCCL:1:1101:5108:6228/1 126 | CCGAACGCGCCGGGCAAATATTTCTACGTCTGGCTGGACGCGCCGATTGGCTACATGGGCTCCTTCAAGAACCTGTGCGACAAACGCGGCGACACCACCAGCTTCGATGAATACTGGAAGAAAGACTCTACCGCCGAGCTGTATCACTTTATCGGCAAAGATATCGTCTACTTCCACAGCCTGTTCTGGCCTGCCATGCTGGAAGGCAGCAACTTCCGTAAGCCGACCAACCTGTTTGTGCACGGCTACGT 127 | + 128 | CCCCCFGGEEGGGGGGGGGGGGGGFGGGGGG?FGGEGGGGGGGGGGGCFGGGEFGDFD7FGCEFGGGGCDGGGFGFEGDGGGGGGGGGDF7FGGGGGGGGGGDGG@FCEFFDFGGGFGFGGFFGGDFGFFEGG>EGCCGDDGGGGGGGGFGGGGDFGGGGGGGGGEFG>FGGGFF;FGFFFFCGFGFCGCFGFGGGGFGGFFFGGG4:CGFFGGGG=3DFDF4F>DFFFFFFF@FB2??726<:;BB03<4 129 | --------------------------------------------------------------------------------