├── extras
├── ani-bbh.png
├── maniac-logo.png
└── ani-fragment.png
├── .gitignore
├── env.yml
├── test
├── configs
│ ├── easy-cds-aa.yml
│ ├── easy-cds-nt.yml
│ ├── easy-fragment-based.yml
│ ├── advanced-fragment-based.yml
│ ├── advanced-cds-aa.yml
│ └── advanced-cds-nt.yml
└── data
│ ├── cds-aa-based.fasta
│ └── fragment-based.fasta
├── scoring
├── unit-scoring.out
└── blastn-scoring.out
├── scripts
├── split_fasta.py
├── sort.R
├── best_hits.sh
├── get_fasta_lengths.py
├── helpers.py
└── process_results.R
├── MANIAC
└── README.md
/extras/ani-bbh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioinf-mcb/MANIAC/HEAD/extras/ani-bbh.png
--------------------------------------------------------------------------------
/extras/maniac-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioinf-mcb/MANIAC/HEAD/extras/maniac-logo.png
--------------------------------------------------------------------------------
/extras/ani-fragment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioinf-mcb/MANIAC/HEAD/extras/ani-fragment.png
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | playground
2 | __pycache__
3 | RAFAL
4 | *.DS_Store
5 | .snakemake
6 | configs-jk
7 | nohup*
8 | test/configs/cds-nt-test*.yml
9 | test/data/cds-*-test*.fasta
10 | test/output
11 | sftp-config.json
12 |
--------------------------------------------------------------------------------
/env.yml:
--------------------------------------------------------------------------------
1 | name: MANIAC
2 | channels:
3 | - conda-forge
4 | - bioconda
5 | dependencies:
6 | - bash
7 | - mmseqs2
8 | - biopython
9 | - numpy
10 | - pathlib
11 | - pandas
12 | - r-base
13 | - r-essentials
14 | - r-arrow
15 | - datamash
16 | - pyopenssl
17 | - parallel
--------------------------------------------------------------------------------
/test/configs/easy-cds-aa.yml:
--------------------------------------------------------------------------------
1 | # input file is a fasta file with PROTEIN (CDS) sequences.
2 | # each record should follow convention: {GENOMEID}_CDS_{CDSID}
3 |
4 | INPUT_FILE: "test/data/cds-aa-based.fasta"
5 | OUTPUT_DIR: "test/output/EASY-CDS-AA-BASED"
6 | MODE: "CDS_AA" # [FRAGMENTS_NT | CDS_NT | CDS_AA]
7 | FAST: False
8 | MEMORY_GB: 16 #Declare available memory for MANIAC in GB
--------------------------------------------------------------------------------
/test/configs/easy-cds-nt.yml:
--------------------------------------------------------------------------------
1 | # input file is a fasta file with ORF ("nucleotide CDS") sequences.
2 | # each record should follow convention: {GENOMEID}_CDS_{CDSID}
3 |
4 | INPUT_FILE: "test/data/cds-nt-based.fasta"
5 | OUTPUT_DIR: "test/output/EASY-CDS-NT-BASED"
6 | MODE: "CDS_NT" # [FRAGMENTS_NT | CDS_NT | CDS_AA]
7 | FAST: False
8 | MEMORY_GB: 16 #Declare available memory for MANIAC in GB
9 |
--------------------------------------------------------------------------------
/test/configs/easy-fragment-based.yml:
--------------------------------------------------------------------------------
1 | # Input file should match the selected mode:
2 | # Here a fasta file with phage genome sequences will be used to run in fragment mode
3 |
4 | INPUT_FILE: "test/data/fragment-based.fasta"
5 | OUTPUT_DIR: "test/output/EASY-FRAGMENT-BASED"
6 | MODE: "FRAGMENTS_NT" # [FRAGMENTS_NT | CDS_NT | CDS_AA]
7 | FAST: False
8 | MEMORY_GB: 16 #Declare available memory for MANIAC in GB
9 |
--------------------------------------------------------------------------------
/scoring/unit-scoring.out:
--------------------------------------------------------------------------------
1 | # NUCL in 1/2 Bit
2 | # Background (precomputed optional): 0.2499975 0.2499975 0.2499975 0.2499975 0.00001
3 | # Lambda (precomputed optional): 0.6337314
4 | A C T G X
5 | A 1.0000 -1.0000 -1.0000 -1.0000 -1.0000
6 | C -1.0000 1.0000 -1.0000 -1.0000 -1.0000
7 | T -1.0000 -1.0000 1.0000 -1.0000 -1.0000
8 | G -1.0000 -1.0000 -1.0000 1.0000 -1.0000
9 | X -1.0000 -1.0000 -1.0000 -1.0000 -1.0000
--------------------------------------------------------------------------------
/scoring/blastn-scoring.out:
--------------------------------------------------------------------------------
1 | # NUCL in 1/2 Bit
2 | # Background (precomputed optional): 0.2499975 0.2499975 0.2499975 0.2499975 0.00001
3 | # Lambda (precomputed optional): 0.6337314
4 | A C T G X
5 | A 2.0000 -3.0000 -3.0000 -3.0000 -3.0000
6 | C -3.0000 2.0000 -3.0000 -3.0000 -3.0000
7 | T -3.0000 -3.0000 2.0000 -3.0000 -3.0000
8 | G -3.0000 -3.0000 -3.0000 2.0000 -3.0000
9 | X -3.0000 -3.0000 -3.0000 -3.0000 -3.0000
10 |
--------------------------------------------------------------------------------
/test/configs/advanced-fragment-based.yml:
--------------------------------------------------------------------------------
1 | # Input file should match the selected mode:
2 | # Here a fasta file with phage genome sequences will be used to run in fragment mode
3 |
4 | INPUT_FILE: "test/data/fragment-based.fasta"
5 | OUTPUT_DIR: "test/output/ADVANCED-FRAGMENT-BASED"
6 | MODE: "FRAGMENTS_NT" # [FRAGMENTS_NT | CDS_NT | CDS_AA]
7 | FAST: False
8 | MEMORY_GB: 16 #Declare available memory for MANIAC in GB
9 |
10 | DELETE_INTERMEDIATE_FILES: True
11 |
12 | FRAGMENT_SIZE: 1020
13 | IDENTITY: 0.3
14 | COVERAGE: 0.7
15 |
16 | MMSEQS_PARAMS:
17 | EVALUE: 1e-15
18 | SENSITIVITY: 7.5
19 | ZDROP: 40
20 | MAX_SEQS: 10000
21 | MAX_SEQ_LEN: 65000
22 | KMER: 11
23 | SEED_SUB_MATRIX: "scoring/blastn-scoring.out"
24 | SUB_MATRIX: "scoring/blastn-scoring.out"
--------------------------------------------------------------------------------
/test/configs/advanced-cds-aa.yml:
--------------------------------------------------------------------------------
1 | # input file is a fasta file with PROTEIN (CDS) sequences.
2 | # each record should follow convention: {GENOMEID}_CDS_{CDSID}
3 |
4 | INPUT_FILE: "test/data/cds-aa-based.fasta"
5 | OUTPUT_DIR: "test/output/ADVANCED-CDS-AA-BASED"
6 | MODE: "CDS_AA" # [FRAGMENTS_NT | CDS_NT | CDS_AA]
7 | FAST: False
8 | MEMORY_GB: 16 #Declare available memory for MANIAC in GB
9 |
10 | DELETE_INTERMEDIATE_FILES: True
11 |
12 | # BBH & homologous proteins definition
13 | HOMOLOGS:
14 | IDENTITY: 0.3
15 | COVERAGE: 0.7
16 |
17 | # conservative proteins definition
18 | CONSERVED:
19 | IDENTITY: 0.8
20 | COVERAGE: 0.5
21 |
22 | # mmseqs params
23 | MMSEQS_PARAMS:
24 | EVALUE: 1e-15
25 | SENSITIVITY: 7.5
26 | SEARCH_TYPE: 1
27 | ZDROP: 40
28 | MAX_SEQS: 10000
29 | MAX_SEQ_LEN: 65000
--------------------------------------------------------------------------------
/scripts/split_fasta.py:
--------------------------------------------------------------------------------
1 |
2 | from Bio import SeqIO
3 | from Bio.SeqRecord import SeqRecord
4 |
5 | INPUT_PATH = snakemake.input[0]
6 | OUTPUT_PATH = snakemake.output[0]
7 | FRAGMENT_SIZE = snakemake.params.FRAGMENT_SIZE
8 |
9 | def split_into_fragments(seq, fragment_length = 1020):
10 | '''A generator to divide a sequence into fragments of n units.'''
11 | while seq:
12 | yield seq[:fragment_length]
13 | seq = seq[fragment_length:]
14 |
15 | with open(OUTPUT_PATH, "w") as output_handle:
16 | for record in SeqIO.parse(INPUT_PATH, "fasta"):
17 | for i, fragment in enumerate(split_into_fragments(record.seq, FRAGMENT_SIZE)):
18 | fragment_record = SeqRecord(fragment, id=record.id + "_FRAGMENT_" + str(i), description="")
19 | SeqIO.write(fragment_record, output_handle, "fasta")
--------------------------------------------------------------------------------
/test/configs/advanced-cds-nt.yml:
--------------------------------------------------------------------------------
1 | # input file is a fasta file with ORF ("nucleotide CDS") sequences.
2 | # each record should follow convention: {GENOMEID}_CDS_{CDSID}
3 |
4 | INPUT_FILE: "test/data/cds-nt-based.fasta"
5 | OUTPUT_DIR: "test/output/ADVANCED-CDS-NT-BASED"
6 | MODE: "CDS_NT" # [FRAGMENTS_NT | CDS_NT | CDS_AA]
7 | FAST: False
8 | MEMORY_GB: 16 #Declare available memory for MANIAC in GB
9 |
10 | DELETE_INTERMEDIATE_FILES: True
11 |
12 | # BBH & homologous proteins definition
13 | HOMOLOGS:
14 | IDENTITY: 0.3
15 | COVERAGE: 0.7
16 |
17 | # conservative proteins definition
18 | CONSERVED:
19 | IDENTITY: 0.8
20 | COVERAGE: 0.5
21 |
22 | # mmseqs params
23 | MMSEQS_PARAMS:
24 | EVALUE: 1e-15
25 | SENSITIVITY: 7.5
26 | ZDROP: 40
27 | MAX_SEQS: 10000
28 | MAX_SEQ_LEN: 65000
29 | KMER: 11
30 | SEED_SUB_MATRIX: "scoring/blastn-scoring.out"
31 | SUB_MATRIX: "scoring/blastn-scoring.out"
--------------------------------------------------------------------------------
/scripts/sort.R:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env Rscript
2 |
3 | # Load the necessary library
4 | library(data.table)
5 | # Limit cpus for better parallel handling
6 | setDTthreads(1)
7 |
8 | # Check command line arguments
9 | args <- commandArgs(trailingOnly = TRUE)
10 | separator <- args[1] # Separator for splitting (e.g., "_CDS")
11 | input_file <- args[2] # Input file
12 | input_name <- basename(input_file)
13 | input_path <- dirname(input_file)
14 | output_file <- file.path(input_path, paste0("sorted_", input_name))
15 |
16 | # Read the input file into a data.table
17 | dt <- fread(input_file)
18 |
19 | # Create a new column (V9) by splitting V2 on the specified separator
20 | dt[, V9 := sapply(strsplit(as.character(V2), separator), `[`, 1)]
21 |
22 | # Sort the data.table by the specified keys (adjust column names as needed)
23 | setorder(dt, V1, V9, V7)
24 |
25 | # Write the sorted data.table to the output file
26 | fwrite(dt, output_file, sep = "\t", col.names=F)
--------------------------------------------------------------------------------
/scripts/best_hits.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | MEMORY_THIRD=$((snakemake_params[MEMORY_GB] * 1 / 3))
4 | if [ "${snakemake_params[MMSEQS_THREADS]}" -le "$MEMORY_THIRD" ]; then
5 | JOBNB=${snakemake_params[MMSEQS_THREADS]}
6 | else
7 | JOBNB=$MEMORY_THIRD
8 | fi
9 |
10 | split -l ${snakemake_params[CHUNKSIZE]} "${snakemake_input[0]}" "${snakemake_params[CHUNK]}"
11 |
12 | ls "${snakemake_params[CHUNK]}"* | parallel --will-cite --silent -j$JOBNB Rscript ${snakemake_params[SORTPATH]} ${snakemake_params[SEPARATOR]} {} 1> /dev/null
13 |
14 | sort --parallel=${snakemake_params[MMSEQS_THREADS]} -m -k1,1 -k9,9 -k7,7g "${snakemake_params[INTERMEDIATE_FILES_DIR]}"/sorted_* > "${snakemake_params[MERGED]}"
15 |
16 | rm "${snakemake_params[CHUNK]}"* "${snakemake_params[INTERMEDIATE_FILES_DIR]}"/sorted_*
17 |
18 | datamash -g 1,9 first 2 first 3 first 4 first 5 first 6 first 7 first 8 < "${snakemake_params[MERGED]}" | cut -f1,3-9 > "${snakemake_output[0]}"
19 |
20 | rm "${snakemake_params[MERGED]}"
21 |
22 | awk -v chunk_size=${snakemake_params[CHUNKSIZE]} -v prefix="${snakemake_params[CHUNK]}" -v separator="${snakemake_params[SEPARATOR]}" '
23 | BEGIN {
24 | current_id = "";
25 | chunk_count = 0;
26 | line_count = 0;
27 | }
28 | {
29 | split($1, id_parts, separator);
30 | id = id_parts[1];
31 | if (line_count >= chunk_size && id != current_id) {
32 | close(output_file);
33 | chunk_count++;
34 | line_count = 0;
35 | }
36 | if (line_count == 0) {
37 | output_file = sprintf("%s%04d.tsv", prefix, chunk_count);
38 | }
39 | print $0 >> output_file;
40 | line_count++;
41 | current_id = id;
42 | }' "${snakemake_output[0]}"
43 |
--------------------------------------------------------------------------------
/scripts/get_fasta_lengths.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 |
4 | from Bio import SeqIO
5 |
6 | FASTA_PATHS = snakemake.input[0]
7 | OUTPUT_PATH = snakemake.output[0]
8 | SEPARATOR = snakemake.params.SEPARATOR
9 | CDS_BASED = snakemake.params.CDS_BASED
10 |
11 | names = []
12 | lengths = []
13 |
14 | for seq_record in SeqIO.parse(FASTA_PATHS, "fasta"):
15 |
16 | # convert record ID depending on the pipeline mode
17 | if CDS_BASED:
18 | name = seq_record.id # phage protein/ORF ID
19 | name = SEPARATOR.join(name.split(SEPARATOR)[:-1]) # get phage ID
20 | if '|' in name: # WARNING
21 | print("WARNING!")
22 | print(f"{name} contains '|' character!")
23 | print("MANIAC handles that, but it potentially can lead to errors.")
24 | name = name.split('|')[-1] # remove text preceding '|' character
25 | length = len(seq_record.seq) # get protein/ORF ID
26 |
27 | # whole phage is loaded
28 | else:
29 | name = seq_record.id # unique phageID
30 | length = len(seq_record.seq) # whole phage length
31 |
32 | # append
33 | names.append(name)
34 | lengths.append(length)
35 |
36 |
37 | # create table
38 | length_table = pd.DataFrame({"genome": names, "length": lengths})
39 | df_prots=length_table['genome'].value_counts()
40 | prots_count=df_prots.to_dict()
41 | length_table_final=length_table.groupby('genome').sum().reset_index()
42 | length_table_final['n_prots']=length_table_final['genome'].map(prots_count)
43 |
44 | # save number ORFs per genome only in cds-based mode
45 | if CDS_BASED: length_table_final.to_csv(OUTPUT_PATH, index = False)
46 | else: length_table_final.iloc[:, :-1].to_csv(OUTPUT_PATH, index = False)
47 |
48 |
49 |
--------------------------------------------------------------------------------
/MANIAC:
--------------------------------------------------------------------------------
1 | """ MANIAC
2 |
3 | Tool for efficient comparison of DNA fragments optimized for phage genomes.
4 |
5 | Version: 1.0.0
6 | Authors: Jan Havranek, Wanangwa Ndovie, Janusz Koszucki, Jade Leconte, Rafal Mostowy
7 |
8 | """
9 |
10 | # modules
11 | import itertools
12 | from pathlib import Path
13 | from scripts.helpers import input_checkpoint, get_params
14 | from scripts.helpers import display_settings, format_mmseqs_params
15 |
16 | print('Running MANIAC!\n')
17 |
18 | # default config file
19 | configfile: "config.yml"
20 |
21 | # paths
22 | INPUT_FILE = config["INPUT_FILE"]
23 | OUTPUT_DIR = config["OUTPUT_DIR"]
24 | LOG_DIR = Path(OUTPUT_DIR, ".log")
25 |
26 | # intermediate directoriescd .
27 | INTERMEDIATE_FILES_DIR = Path(OUTPUT_DIR, "intermediate")
28 | MMSEQS_DIR = Path(INTERMEDIATE_FILES_DIR, "mmseqs2")
29 | MMSEQS_TEMP_DIR = Path(MMSEQS_DIR, "temp")
30 |
31 | MODE, FAST, CDS_BASED, FRAGMENT_SIZE, SEPARATOR, DELETE_INTERMEDIATE_FILES, HOMOLOGS_IDENTITY, HOMOLOGS_COVERAGE, CONSERVED_IDENTITY, CONSERVED_COVERAGE, MMSEQS_PARAMS, MEMORY_GB, MMSEQS_THREADS = get_params(config, workflow.cores, workflow.basedir)
32 |
33 | # input file checkpoint
34 | SEPARATOR = input_checkpoint(INPUT_FILE, SEPARATOR, CDS_BASED)
35 |
36 | # print settings to user
37 | display_settings(MODE, INPUT_FILE, OUTPUT_DIR, LOG_DIR, INTERMEDIATE_FILES_DIR,
38 | FRAGMENT_SIZE, CDS_BASED, SEPARATOR,
39 | MMSEQS_THREADS, MEMORY_GB, MMSEQS_PARAMS,
40 | HOMOLOGS_IDENTITY, HOMOLOGS_COVERAGE,
41 | CONSERVED_IDENTITY, CONSERVED_COVERAGE,
42 | FAST, DELETE_INTERMEDIATE_FILES)
43 |
44 | ### run MANIAC
45 | rule target:
46 | input: Path(OUTPUT_DIR, "genome-alignment.csv")
47 |
48 |
49 | rule split_genomes:
50 | input: Path(INPUT_FILE)
51 | output: Path(INTERMEDIATE_FILES_DIR, "split/INPUT_FILE")
52 | params: FRAGMENT_SIZE = FRAGMENT_SIZE
53 | script: "scripts/split_fasta.py"
54 |
55 | rule make_query_db:
56 | input: Path(INTERMEDIATE_FILES_DIR, "split/INPUT_FILE")
57 | output: Path(MMSEQS_DIR, "query-db/query-db")
58 | log: Path(LOG_DIR, "mmseqs_query_createdb.log")
59 | shell: "mmseqs createdb {input} {output} > {log}.log 2>&1"
60 |
61 | ### fragment-based / CDS-based switch
62 | rule make_db:
63 | input: Path(INPUT_FILE)
64 | output: Path(MMSEQS_DIR, "cds-db/cds-db") if CDS_BASED else \
65 | Path(MMSEQS_DIR, "reference-db/reference-db")
66 | log: Path(LOG_DIR, "mmseqs_reference_createdb.log")
67 | shell: "mmseqs createdb {input} {output} > {log} 2>&1"
68 |
69 | rule get_total_cds_lengths:
70 | input: Path(INPUT_FILE)
71 | output: Path(INTERMEDIATE_FILES_DIR, "phage_lengths.csv")
72 | params:
73 | SEPARATOR=SEPARATOR,
74 | CDS_BASED=CDS_BASED
75 | script: "scripts/get_fasta_lengths.py"
76 |
77 | ### ANI calculation (Gorie et al) - genome fragments against whole genome
78 | rule mmseqs_qr_search:
79 | input:
80 | Path(MMSEQS_DIR, "query-db/query-db"),
81 | Path(MMSEQS_DIR, "reference-db/reference-db")
82 | output: Path(MMSEQS_DIR, "results-qr-db/results-qr-db.index")
83 | log: Path(LOG_DIR, "mmseqs_query_search.log")
84 | params:
85 | RESULTS_BASENAME = Path(MMSEQS_DIR, "results-qr-db/results-qr-db"),
86 | MMSEQS_TEMP_DIR = MMSEQS_TEMP_DIR,
87 | MMSEQS_PARAMS = MMSEQS_PARAMS
88 | threads: MMSEQS_THREADS
89 | shell:
90 | """
91 | mmseqs search {input} {params.RESULTS_BASENAME} {params.MMSEQS_TEMP_DIR} \
92 | --threads {threads} {params.MMSEQS_PARAMS} >> {log} 2>&1
93 | """
94 |
95 | ### CDS-based ANI calculation (BBH) [all-by-all CDS]
96 | rule mmseqs_cds_search:
97 | input: Path(MMSEQS_DIR, "cds-db/cds-db")
98 | output: Path(MMSEQS_DIR, "results-cds-db/results-cds-db.index")
99 | log: Path(LOG_DIR, "mmseqs_cds_search.log")
100 | params:
101 | RESULTS_BASENAME = Path(MMSEQS_DIR, "results-cds-db/results-cds-db"),
102 | MMSEQS_TEMP_DIR = MMSEQS_TEMP_DIR,
103 | MMSEQS_PARAMS = MMSEQS_PARAMS
104 | threads: MMSEQS_THREADS
105 | shell:
106 | """
107 | mmseqs search {input} {input} {params.RESULTS_BASENAME} {params.MMSEQS_TEMP_DIR} \
108 | --threads {threads} {params.MMSEQS_PARAMS} > {log} 2>&1
109 | """
110 |
111 | rule mmseqs_cds_convert:
112 | input:
113 | Path(MMSEQS_DIR, "cds-db/cds-db"),
114 | Path(MMSEQS_DIR, "results-cds-db/results-cds-db.index")
115 | output: Path(INTERMEDIATE_FILES_DIR, "1_search_results.tsv")
116 | log: Path(LOG_DIR, "mmseqs_cds_convert.log")
117 | params: RESULTS_BASENAME = Path(MMSEQS_DIR, "results-cds-db/results-cds-db")
118 | threads: MMSEQS_THREADS
119 | shell:
120 | """
121 | mmseqs convertalis {input[0]} {input[0]} \
122 | {params.RESULTS_BASENAME} {output} --threads {threads} \
123 | --format-output 'query,target,nident,alnlen,mismatch,pident,evalue,qlen' > {log} 2>&1
124 | """
125 |
126 | rule mmseqs_qr_convert:
127 | input:
128 | Path(MMSEQS_DIR, "query-db/query-db"),
129 | Path(MMSEQS_DIR, "reference-db/reference-db"),
130 | Path(MMSEQS_DIR, "results-qr-db/results-qr-db.index")
131 | output: Path(INTERMEDIATE_FILES_DIR, "1_search_results.tsv")
132 | log: Path(LOG_DIR, "mmseqs_qr_convert.log")
133 | params: RESULTS_BASENAME = Path(MMSEQS_DIR, "results-qr-db/results-qr-db")
134 | threads: MMSEQS_THREADS
135 | shell:
136 | """
137 | mmseqs convertalis {input[0]} {input[1]} \
138 | {params.RESULTS_BASENAME} {output} --threads {threads} \
139 | --format-output 'query,target,nident,alnlen,mismatch,pident,evalue,qlen' > {log} 2>&1
140 | """
141 |
142 | rule best_hits:
143 | input: Path(INTERMEDIATE_FILES_DIR, "1_search_results.tsv")
144 | output: Path(INTERMEDIATE_FILES_DIR, "2_significant_hits.tsv")
145 | log: Path(LOG_DIR, "best_hits.log")
146 | params:
147 | SORTPATH = Path(workflow.basedir, "scripts/sort.R"),
148 | SEPARATOR = SEPARATOR,
149 | CHUNK = Path(INTERMEDIATE_FILES_DIR, "chunk_"),
150 | MERGED = Path(INTERMEDIATE_FILES_DIR, "merged_chunks"),
151 | INTERMEDIATE_FILES_DIR = INTERMEDIATE_FILES_DIR,
152 | MMSEQS_THREADS = MMSEQS_THREADS,
153 | MEMORY_GB = MEMORY_GB,
154 | CHUNKSIZE = 30000000
155 | script: "scripts/best_hits.sh"
156 |
157 | rule process_results:
158 | input:
159 | Path(INTERMEDIATE_FILES_DIR, "2_significant_hits.tsv"),
160 | Path(INTERMEDIATE_FILES_DIR, "phage_lengths.csv")
161 | output: Path(OUTPUT_DIR, "genome-alignment.csv")
162 | log: Path(LOG_DIR, "process_results.log")
163 | params: COVERAGE = HOMOLOGS_COVERAGE,
164 | IDENTITY = HOMOLOGS_IDENTITY,
165 | MODE = MODE,
166 | CDS_BASED = CDS_BASED,
167 | SEPARATOR = SEPARATOR,
168 | CDS_ALIGNMENT_FILE = Path(OUTPUT_DIR, "cds-alignment.csv"),
169 | DELETE_INTERMEDIATE_FILES = DELETE_INTERMEDIATE_FILES,
170 | MMSEQS_TEMP_DIR = MMSEQS_TEMP_DIR,
171 | CONSERVED_IDENTITY = CONSERVED_IDENTITY,
172 | CONSERVED_COVERAGE = CONSERVED_COVERAGE,
173 | script: "scripts/process_results.R"
174 |
--------------------------------------------------------------------------------
/scripts/helpers.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | HEADER_LINE_START = "Seq1,Seq2"
4 |
5 | def read_pair_list(list_path):
6 | query_genomes = []
7 | reference_genomes = []
8 |
9 | with open(list_path) as f:
10 | for line in f:
11 | line = line.strip()
12 |
13 | if not line or line[0] == "#" or line.startswith(HEADER_LINE_START):
14 | continue
15 |
16 | genome_pair = line.split(",")
17 | query_genomes.append(genome_pair[0].strip())
18 | reference_genomes.append(genome_pair[1].strip())
19 |
20 | return query_genomes, reference_genomes
21 |
22 |
23 | def format_mmseqs_params(params):
24 | return " ".join([f"-{key} {value}" if len(key) == 1 else f"--{key} {value}"
25 | for key, value in params.items()])
26 |
27 |
28 | def input_checkpoint(INPUT_FILE, SEPARATOR, CDS_BASED):
29 |
30 | print('Processing input... ', end='')
31 |
32 | # verify separator for CDS BASED
33 | if CDS_BASED:
34 | print('Veryfiying separator... ', end='')
35 | with open(INPUT_FILE) as f:
36 | first_line = f.readline()
37 |
38 | if SEPARATOR in first_line: pass
39 | elif '_CDS' in first_line:
40 | print(f'\nWARNING: CHANGING SEPARATOR! "{SEPARATOR}" not found, but "_CDS" found!')
41 | SEPARATOR = '_CDS'
42 | elif '_cds' in first_line:
43 | print(f'\nWARNING: CHANGING SEPARATOR! "{SEPARATOR}" not found, but "_cds" found!')
44 | SEPARATOR = '_cds'
45 | else:
46 | print(f'\nFAILED! "{SEPARATOR}" nor "_CDS" nor "_cds" found in first header!')
47 | exit()
48 |
49 |
50 | # print('File does not exists! Abort!')
51 | # print('File is empty! Abort!')
52 | # print('File is not a fasta file!')
53 | # print('Fasta file is corrupted! [less then 10 characters | less then 2 proteins]')
54 | # print('You are trying to run protein comparisons, but your file do not contain proteins!')
55 | # print('You are trying to run nucleotide comparisons, but your file do not contain nucleotides!')
56 | # print('Extracting phage identifiers... ')
57 | # print('Parsing protein IDs... ') # [PHAGEID_"CDS"_NUMBER]
58 |
59 |
60 | # print('Phage IDs are not unique!')
61 | # print('Protein IDs are not unique!')
62 |
63 |
64 | print('Success!')
65 |
66 | return SEPARATOR
67 |
68 | def get_params(config, cores, basedir, modes_available = ['FRAGMENTS_NT', 'CDS_NT', 'CDS_AA']):
69 | """ extract information about parameters from config file """
70 |
71 |
72 | # global params
73 | MODE = nested_get(config, ['MODE'])
74 | MMSEQS_THREADS = cores # mmseqs threads (equivalent to CPU cores)
75 | FAST = config.get("FAST", False)
76 | DELETE_INTERMEDIATE_FILES = config.get("DELETE_INTERMEDIATE_FILES", True)
77 | MEMORY_GB = config.get("MEMORY_GB", 16)
78 |
79 | # checkpoint for mode
80 | modes_fstring = ' | '.join(modes_available)
81 | if MODE in modes_available: print(f'MODE: "{MODE}"\t[{modes_fstring}]\nFAST: {FAST}\t[True | False]')
82 | else:
83 | print(f'\nFATAL ERROR! Incorrect mode "{MODE}"\t(FAST: {FAST}.\t Available modes: [{modes_fstring}]\nFAST: [True | False]')
84 | exit()
85 |
86 |
87 | # mmseqs params per mode
88 | if MODE == 'CDS_AA':
89 |
90 | # CDS
91 | CDS_BASED = True
92 | FRAGMENT_SIZE = None
93 | SEPARATOR = config.get("SEPARATOR", "_CDS")
94 |
95 | # homologous proteins
96 | HOMOLOGS_IDENTITY = nested_get(config, ['HOMOLOGS', 'IDENTITY'], default=0.3)
97 | HOMOLOGS_COVERAGE = nested_get(config, ['HOMOLOGS', 'COVERAGE'], default=0.7)
98 |
99 | # conservative proteins
100 | CONSERVED_IDENTITY = nested_get(config, ['CONSERVED', 'IDENTITY'], default=0.8)
101 | CONSERVED_COVERAGE = nested_get(config, ['CONSERVED', 'COVERAGE'], default=0.5)
102 |
103 | # mmseqs params
104 | EVALUE = nested_get(config, ['MMSEQS_PARAMS', 'EVALUE'], default='1e-15')
105 | SEARCH_TYPE = 1
106 | SENSITIVITY = nested_get(config, ['MMSEQS_PARAMS', 'SENSITIVITY'], default=7.5)
107 | ZDROP = nested_get(config, ['MMSEQS_PARAMS', 'ZDROP'], default=40)
108 | MAX_SEQS = nested_get(config, ['MMSEQS_PARAMS', 'MAX_SEQS'], default=10000)
109 | MAX_SEQ_LEN = nested_get(config, ['MMSEQS_PARAMS', 'MAX_SEQ_LEN'], default=65000)
110 |
111 | MMSEQS_PARAMS = f"--search-type {SEARCH_TYPE} -a --max-seqs {MAX_SEQS} --max-seq-len {MAX_SEQ_LEN} -s {SENSITIVITY} --mask 0 -e {EVALUE} --zdrop {ZDROP} -c {HOMOLOGS_COVERAGE} --cov-mode 2"
112 |
113 |
114 | elif MODE == 'CDS_NT':
115 |
116 | # CDS
117 | CDS_BASED = True
118 | FRAGMENT_SIZE = None
119 | SEPARATOR = config.get("SEPARATOR", "_CDS")
120 |
121 | # homologous proteins
122 | HOMOLOGS_IDENTITY = nested_get(config, ['HOMOLOGS', 'IDENTITY'], default=0.3)
123 | HOMOLOGS_COVERAGE = nested_get(config, ['HOMOLOGS', 'COVERAGE'], default=0.7)
124 |
125 | # conservative proteins
126 | CONSERVED_IDENTITY = nested_get(config, ['CONSERVED', 'IDENTITY'], default=0.8)
127 | CONSERVED_COVERAGE = nested_get(config, ['CONSERVED', 'COVERAGE'], default=0.5)
128 |
129 | # mmseqs params
130 | EVALUE = nested_get(config, ['MMSEQS_PARAMS', 'EVALUE'], default='1e-15')
131 | SEARCH_TYPE = 3
132 | SENSITIVITY = nested_get(config, ['MMSEQS_PARAMS', 'SENSITIVITY'], default=7.5)
133 | ZDROP = nested_get(config, ['MMSEQS_PARAMS', 'ZDROP'], default=40)
134 | MAX_SEQS = nested_get(config, ['MMSEQS_PARAMS', 'MAX_SEQS'], default=10000)
135 | MAX_SEQ_LEN = nested_get(config, ['MMSEQS_PARAMS', 'MAX_SEQ_LEN'], default=65000)
136 | KMER = nested_get(config, ['MMSEQS_PARAMS', 'KMER'], default=11)
137 | SEED_SUB_MATRIX = nested_get(config, ['MMSEQS_PARAMS', 'SEED_SUB_MATRIX'], default=os.path.join(basedir, 'scoring/blastn-scoring.out'))
138 | SUB_MATRIX = nested_get(config, ['MMSEQS_PARAMS', 'SUB_MATRIX'], default=os.path.join(basedir, 'scoring/blastn-scoring.out'))
139 |
140 | if FAST:
141 | KMER = 15
142 |
143 | MMSEQS_PARAMS = f'--search-type {SEARCH_TYPE} -a --max-seqs {MAX_SEQS} --max-seq-len {MAX_SEQ_LEN} -s {SENSITIVITY} --mask 0 -e {EVALUE} -k {KMER} --zdrop {ZDROP} -c {HOMOLOGS_COVERAGE} --cov-mode 2 --seed-sub-mat "{SEED_SUB_MATRIX}" --sub-mat "{SUB_MATRIX}"'
144 |
145 |
146 |
147 | elif MODE == 'FRAGMENTS_NT':
148 |
149 | # CDS
150 | CDS_BASED = False
151 | SEPARATOR = config.get("SEPARATOR", "_FRAGMENT")
152 | FRAGMENT_SIZE = config.get("FRAGMENT_SIZE", 500)
153 |
154 | # filter significant proteins
155 | HOMOLOGS_IDENTITY = nested_get(config, ['IDENTITY'], default=0.3)
156 | HOMOLOGS_COVERAGE = nested_get(config, ['COVERAGE'], default=0.7)
157 |
158 | CONSERVED_IDENTITY = None
159 | CONSERVED_COVERAGE = None
160 |
161 | # mmseqs params
162 | EVALUE = nested_get(config, ['MMSEQS_PARAMS', 'EVALUE'], default='1e-15')
163 | SEARCH_TYPE = 3
164 | SENSITIVITY = nested_get(config, ['MMSEQS_PARAMS', 'SENSITIVITY'], default=7.5)
165 | ZDROP = nested_get(config, ['MMSEQS_PARAMS', 'ZDROP'], default=40)
166 | MAX_SEQS = nested_get(config, ['MMSEQS_PARAMS', 'MAX_SEQS'], default=10000)
167 | MAX_SEQ_LEN = nested_get(config, ['MMSEQS_PARAMS', 'MAX_SEQ_LEN'], default=65000)
168 | KMER = nested_get(config, ['MMSEQS_PARAMS', 'KMER'], default=11)
169 | SEED_SUB_MATRIX = nested_get(config, ['MMSEQS_PARAMS', 'SEED_SUB_MATRIX'], default=os.path.join(basedir, 'scoring/blastn-scoring.out'))
170 | SUB_MATRIX = nested_get(config, ['MMSEQS_PARAMS', 'SUB_MATRIX'], default=os.path.join(basedir, 'scoring/blastn-scoring.out'))
171 |
172 | if FAST:
173 | KMER = 15
174 | FRAGMENT_SIZE = 1020
175 |
176 | MMSEQS_PARAMS = f'--search-type {SEARCH_TYPE} -a --max-seqs {MAX_SEQS} --max-seq-len {MAX_SEQ_LEN} -s {SENSITIVITY} --mask 0 -e {EVALUE} -k {KMER} --zdrop {ZDROP} -c {HOMOLOGS_COVERAGE} --cov-mode 2 --seed-sub-mat "{SEED_SUB_MATRIX}" --sub-mat "{SUB_MATRIX}"'
177 |
178 | else:
179 | print('FATAL MODE ERROR!')
180 | exit()
181 |
182 | return MODE, FAST, CDS_BASED, FRAGMENT_SIZE, SEPARATOR, DELETE_INTERMEDIATE_FILES, HOMOLOGS_IDENTITY, HOMOLOGS_COVERAGE, CONSERVED_IDENTITY, CONSERVED_COVERAGE, MMSEQS_PARAMS, MEMORY_GB, MMSEQS_THREADS
183 |
184 |
185 | def display_settings(MODE, INPUT_FILE, OUTPUT_DIR, LOG_DIR, INTERMEDIATE_FILES_DIR, FRAGMENT_SIZE, CDS_BASED, SEPARATOR, MMSEQS_THREADS, MEMORY_GB, MMSEQS_PARAMS, HOMOLOGS_IDENTITY, HOMOLOGS_COVERAGE, CONSERVED_IDENTITY, CONSERVED_COVERAGE, FAST, DELETE_INTERMEDIATE_FILES):
186 | """ print settings to console """
187 |
188 | print('\nPATHS:')
189 | print(f'Input file: {INPUT_FILE}')
190 | print(f'Output directory: {OUTPUT_DIR}')
191 | print(f'Log directory: {LOG_DIR}')
192 | print(f'Intermediate directory: {INTERMEDIATE_FILES_DIR}\n')
193 |
194 | if MODE == 'CDS_AA' or MODE == 'CDS_NT':
195 | print(f'PARAMETERS:')
196 | print(f'CDS based: {CDS_BASED}')
197 | print(f'Separator: {SEPARATOR}')
198 | print(f'Delete intermediate files and fragment/CDS alignments: {DELETE_INTERMEDIATE_FILES})\n')
199 |
200 | print('Homologs proteins definition:')
201 | print(f'Minimum identity: {HOMOLOGS_IDENTITY}')
202 | print(f'Minimum query & target coverage: {HOMOLOGS_COVERAGE}\n')
203 |
204 | print('Conserved proteins definition:')
205 | print(f'Minimum identity: {CONSERVED_IDENTITY}')
206 | print(f'Minimum query & target coverage: {CONSERVED_COVERAGE}\n')
207 |
208 | elif MODE == 'FRAGMENTS_NT':
209 | print(f'PARAMETERS ({MODE}):')
210 | print(f'Fragment size: {FRAGMENT_SIZE}')
211 | print(f'CDS based: {CDS_BASED}')
212 |
213 | print('DNA significant hits definition:')
214 | print(f'Minimum identity: {HOMOLOGS_IDENTITY}')
215 | print(f'Minimum query & target coverage: {HOMOLOGS_COVERAGE}\n')
216 |
217 | else:
218 | print('FATAL MODE ERROR!')
219 | exit()
220 |
221 | print(f'Available memory: {MEMORY_GB}G')
222 | print(f'MMSEQS CPU cores: {MMSEQS_THREADS}')
223 | print(f'MMSEQS params: {MMSEQS_PARAMS}\n')
224 |
225 |
226 |
227 | def nested_get(d, keys, default=None):
228 | """
229 | Recursively get a value from a nested dictionary using a list of keys.
230 |
231 | Parameters:
232 | - d (dict): The dictionary to search.
233 | - keys (list): A list of keys representing the path to the value.
234 | - default: A default value if the key path doesn't exist.
235 |
236 | Returns:
237 | - The value if found, else the default.
238 | """
239 | for key in keys:
240 | if isinstance(d, dict):
241 | d = d.get(key, default)
242 | else:
243 | return default
244 | return d
245 |
246 |
--------------------------------------------------------------------------------
/scripts/process_results.R:
--------------------------------------------------------------------------------
1 |
2 | ##Get params
3 | INPUT_PATH = snakemake@input[[1]]
4 | PHAGE_LENGTHS_PATH = snakemake@input[[2]]
5 |
6 | COVERAGE_THR = snakemake@params[["COVERAGE"]]
7 | IDENTITY_THR = snakemake@params[["IDENTITY"]]
8 | CDS_BASED = snakemake@params[["CDS_BASED"]]
9 | SEPARATOR = snakemake@params[["SEPARATOR"]]
10 | MODE = snakemake@params[["MODE"]]
11 |
12 | GENOME_ALIGNMENT = snakemake@output[[1]]
13 | CDS_ALIGNMENT_FILE = snakemake@params[["CDS_ALIGNMENT_FILE"]]
14 | MMSEQS_TEMP_DIR = snakemake@params[["MMSEQS_TEMP_DIR"]]
15 |
16 | CONSERVED_IDENTITY = snakemake@params[["CONSERVED_IDENTITY"]]
17 | CONSERVED_COVERAGE = snakemake@params[["CONSERVED_COVERAGE"]]
18 |
19 | DELETE_INTERMEDIATE_FILES = snakemake@params[["DELETE_INTERMEDIATE_FILES"]]
20 | INTERMEDIATE_FILES_DIR = dirname(snakemake@input[[1]])
21 |
22 | # Load necessary libraries
23 | suppressPackageStartupMessages(library(data.table))
24 | suppressPackageStartupMessages(library(arrow))
25 | suppressPackageStartupMessages(library(dplyr))
26 | suppressPackageStartupMessages(library(tidyr))
27 |
28 |
29 | # loading params
30 | col.names <- c("query_fragment_id", "reference_fragment_id", "matches", "length", "mismatches", "pident", "evalue", "qlen")
31 |
32 | # Two methods depending on the mode (fragment or CDS: fragment is processed by chunks while CDS have to be done all at once)
33 |
34 | if(CDS_BASED){ #METHOD 1: CDS
35 | # Load mmseqs results
36 | writeLines("Loading mmseqs results table...")
37 | mmseqs_results <- fread(INPUT_PATH, header = F)
38 | setnames(mmseqs_results, col.names)
39 |
40 | # queries phage identifiers [from fragments/ORFs/proteins]
41 | writeLines("Curate phage identifiers...")
42 | mmseqs_results[, query_seq := sapply(strsplit(mmseqs_results[['query_fragment_id']], SEPARATOR, fixed = TRUE), `[`, 1)]
43 | mmseqs_results[, reference_seq := sapply(strsplit(mmseqs_results[['reference_fragment_id']], SEPARATOR, fixed = TRUE), `[`, 1)]
44 |
45 | writeLines("Filtering...")
46 | mmseqs_results[, gaps := length - matches - mismatches]
47 | mmseqs_results[, ani_alnlen := mismatches + matches]
48 | mmseqs_results[, ani_cov := ani_alnlen / qlen]
49 | mmseqs_results[, ani_pid := matches / qlen]
50 | mmseqs_results[, pident := pident * 0.01]
51 |
52 | mmseqs_results <- mmseqs_results[query_seq != reference_seq & ani_cov > COVERAGE_THR & ani_pid > IDENTITY_THR]
53 |
54 | writeLines("Finding best bidirectional hits (BBH)...")
55 | # Perform an inner join to find bidirectional hits
56 | mmseqs_results <- merge(mmseqs_results, mmseqs_results,
57 | by.x = c("query_fragment_id", "reference_fragment_id"),
58 | by.y = c("reference_fragment_id", "query_fragment_id"))
59 |
60 | # Deduplicate by filtering where query_seq_x < reference_seq_x
61 | mmseqs_results <- mmseqs_results[query_seq.x < reference_seq.x]
62 |
63 | # Calculate the average pident and ani_alnlen
64 | mmseqs_results[, `:=`(
65 | pident = round((pident.x + pident.y) / 2, 6),
66 | ani_alnlen = round((ani_alnlen.x + ani_alnlen.y) / 2, 6)
67 | )]
68 |
69 | # clean best hits table
70 | setnames(mmseqs_results, old = c("query_seq.x", "reference_seq.x"), new = c("query_seq", "reference_seq"))
71 |
72 | writeLines("Calculating ANI...")
73 | # Load genome lengths
74 | lengths_df <- fread(PHAGE_LENGTHS_PATH)
75 |
76 | # genome lengths (ANImm calculation)
77 | genome_length_df <- lengths_df[, .(genome, length)]
78 | setkey(genome_length_df, genome)
79 |
80 | # calculate ANI
81 | ani <- mmseqs_results[, .(ANI = mean(pident)), by = .(query_seq, reference_seq)]
82 |
83 | # calculate different measures
84 | aligned_nucleotides <- mmseqs_results[, .(ani_alnlen = sum(ani_alnlen)), by = .(query_seq, reference_seq)]
85 | aligned_nucleotides[, len_1 := genome_length_df[query_seq, length]]
86 | aligned_nucleotides[, len_2 := genome_length_df[reference_seq, length]]
87 | aligned_nucleotides[, af_1 := round(ani_alnlen / len_1, 6)]
88 | aligned_nucleotides[, af_2 := round(ani_alnlen / len_2, 6)]
89 | aligned_nucleotides[, af_mean := round(2 * ani_alnlen / (len_1 + len_2), 6)]
90 | aligned_nucleotides[, af_min := round(ani_alnlen / pmin(len_1, len_2), 6)]
91 | aligned_nucleotides[, af_max := round(ani_alnlen / pmax(len_1, len_2), 6)]
92 | aligned_nucleotides[, af_jaccard := round(ani_alnlen / (len_1 + len_2 - ani_alnlen), 6)]
93 | # add measures and rename columns
94 | merged <- merge(ani, aligned_nucleotides, by = c("query_seq", "reference_seq"))
95 | merged[, wgANI := round(ANI * af_mean, 6)]
96 | setnames(merged, c("query_seq", "reference_seq", "ANI"), c("Seq1", "Seq2", "ANI"))
97 |
98 | writeLines("Calculating wGRR...")
99 | # Proteins number per genome (wGRR calculation)
100 | n_prot_df <- lengths_df[, .(Seq1 = genome, n_prots)]
101 |
102 | # Get number of BBH used to calculate mean ani
103 | cds_alignment_cols <- c("query_seq", "reference_seq", "query_fragment_id", "reference_fragment_id", "pident.x", "ani_cov.x", "pident.y", "ani_cov.y")
104 | cds_alignment_df <- mmseqs_results[, .N, by = cds_alignment_cols][, cds_alignment_cols, with=FALSE] # Group by phages and ORFs
105 |
106 | # Rename columns
107 | setnames(cds_alignment_df, old = c("query_seq", "reference_seq", "query_fragment_id", "reference_fragment_id",
108 | "pident.x", "ani_cov.x", "pident.y", "ani_cov.y"),
109 | new = c("Seq1", "Seq2", "seq1_fragment_id", "seq2_fragment_id",
110 | "seq1_fragment_pident", "seq1_fragment_cov",
111 | "seq2_fragment_pident", "seq2_fragment_cov"))
112 |
113 | # Calculate homologous proteins per phage
114 | cds_alignment_df[, cds_alignments_counts := 1]
115 |
116 | ### seq1
117 | seq1_homologous_prots_df <- cds_alignment_df[, .(seq1_n_prots_hom = .N), by = .(Seq1, Seq2)]
118 |
119 | ### seq2
120 | seq2_homologous_prots_df <- cds_alignment_df[, .(seq2_n_prots_hom = .N), by = .(Seq2, Seq1)]
121 |
122 | # Merge
123 | homologous_prots_df <- merge(seq1_homologous_prots_df, seq2_homologous_prots_df, by = c("Seq1", "Seq2"), all = TRUE)
124 |
125 | # Calculate conserved proteins per phage
126 | filt_conserved_identity <- (cds_alignment_df$seq1_fragment_pident >= CONSERVED_IDENTITY) & (cds_alignment_df$seq2_fragment_pident >= CONSERVED_IDENTITY)
127 | filt_conserved_coverage <- (cds_alignment_df$seq1_fragment_cov >= CONSERVED_COVERAGE) & (cds_alignment_df$seq2_fragment_cov >= CONSERVED_COVERAGE)
128 | conserved_df <- cds_alignment_df[filt_conserved_identity & filt_conserved_coverage]
129 |
130 | # Calculate conserved proteins per phage for Seq1
131 | seq1_conserved_prots_df <- conserved_df[, .(seq1_n_prots_cons = .N), by = .(Seq1, Seq2)]
132 |
133 | # Calculate conserved proteins per phage for Seq2
134 | seq2_conserved_prots_df <- conserved_df[, .(seq2_n_prots_cons = .N), by = .(Seq2, Seq1)]
135 |
136 | # Merge the results for Seq1 and Seq2
137 | conserved_prots_df <- merge(seq1_conserved_prots_df, seq2_conserved_prots_df, by = c("Seq1", "Seq2"), all = TRUE)
138 |
139 | # Specify columns to save
140 | cols2save <- c("Seq1", "Seq2", "seq1_fragment_id", "seq2_fragment_id", "seq1_fragment_pident", "seq2_fragment_pident", "seq1_fragment_cov", "seq2_fragment_cov")
141 |
142 | if (!DELETE_INTERMEDIATE_FILES) {
143 | writeLines("Saving CDS alignment file... ")
144 | fwrite(cds_alignment_df[, ..cols2save], CDS_ALIGNMENT_FILE, row.names = FALSE)
145 | }
146 |
147 | # Count BBH
148 | cds_alignment_df <- cds_alignment_df[, .N, by = .(Seq1, Seq2)]
149 | setnames(cds_alignment_df, "N", "cds_alignments_counts")
150 |
151 | ### Add number of protein types to genome alignments
152 |
153 | # Total number of proteins
154 | genome_alignment_df <- merge(merged, n_prot_df, by = "Seq1", all.x = TRUE)
155 | setnames(genome_alignment_df, "n_prots", "seq1_n_prots")
156 |
157 | setnames(n_prot_df, "Seq1", "Seq2")
158 | genome_alignment_df <- merge(genome_alignment_df, n_prot_df, by = "Seq2", all.x = TRUE)
159 | setnames(genome_alignment_df, "n_prots", "seq2_n_prots")
160 |
161 | # Homologous proteins
162 | genome_alignment_df <- merge(genome_alignment_df, homologous_prots_df, by = c("Seq1", "Seq2"), all.x = TRUE)
163 |
164 | # Conserved proteins
165 | genome_alignment_df <- merge(genome_alignment_df, conserved_prots_df, by = c("Seq1", "Seq2"), all.x = TRUE)
166 | genome_alignment_df[, c("seq1_n_prots_cons", "seq2_n_prots_cons") := lapply(.SD, function(x) replace(x, is.na(x), 0)), .SDcols = c("seq1_n_prots_cons", "seq2_n_prots_cons")]
167 | # BBH hits number
168 | genome_alignment_df <- merge(genome_alignment_df, cds_alignment_df, by = c("Seq1", "Seq2"), all.x = TRUE)
169 |
170 | # Minimum number of proteins
171 | genome_alignment_df[, min_prots := pmin(seq1_n_prots, seq2_n_prots, na.rm = TRUE)]
172 |
173 | # Calculate wGRR
174 | genome_alignment_df[, cds_alignments_ani_sum := round(ANI * cds_alignments_counts, 6)]
175 | genome_alignment_df[, wgrr := round(cds_alignments_ani_sum / min_prots, 3)]
176 |
177 | if (MODE == "CDS_AA") {
178 | setnames(genome_alignment_df, old = c("ANI", "wgANI"), new = c("AAI", "wgAAI"))
179 | }
180 | # Save
181 | writeLines("Saving final table...")
182 | fwrite(genome_alignment_df, GENOME_ALIGNMENT, row.names = FALSE)
183 |
184 | } else{ #METHOD 2: FRAGMENT
185 | writeLines("Starting post-processing...")
186 | files <- list.files(path=dirname(INPUT_PATH), pattern="chunk_.*.tsv", full.names=TRUE)
187 | SEPARATOR2 <- paste(SEPARATOR,".*",sep="")
188 |
189 | lengths_df <- fread(PHAGE_LENGTHS_PATH)
190 | genome_length_df <- lengths_df[, .(genome, length)]
191 | setkey(genome_length_df, genome)
192 |
193 | invisible(lapply(files, function(x) {
194 | mmseqs_results_csv <- open_dataset(sources = x,format='tsv', read_options = CsvReadOptions$create(column_names = col.names))
195 | mmseqs_results <- mmseqs_results_csv |>
196 | filter(sub(SEPARATOR2,"", query_fragment_id) != reference_fragment_id, (mismatches + matches) / qlen > COVERAGE_THR, matches / qlen > IDENTITY_THR) |>
197 | group_by(Seq1=sub(SEPARATOR2,"", query_fragment_id),Seq2=reference_fragment_id) |>
198 | summarize(ANI = mean(pident)* 0.01, ani_alnlen = sum(mismatches + matches)) |>
199 | collect()
200 | mmseqs_results <- data.table(mmseqs_results)
201 | mmseqs_results[, len_1 := genome_length_df[Seq1, length]]
202 | mmseqs_results[, len_2 := genome_length_df[Seq2, length]]
203 | mmseqs_results[, af_1 := round(ani_alnlen / len_1, 6)]
204 | mmseqs_results[, af_2 := round(ani_alnlen / len_2, 6)]
205 | mmseqs_results[, af_mean := round(2 * ani_alnlen / (len_1 + len_2), 6)]
206 | mmseqs_results[, af_min := round(ani_alnlen / pmin(len_1, len_2), 6)]
207 | mmseqs_results[, af_max := round(ani_alnlen / pmax(len_1, len_2), 6)]
208 | mmseqs_results[, af_jaccard := round(ani_alnlen / (len_1 + len_2 - ani_alnlen), 6)]
209 | mmseqs_results[, wgANI := round(ANI * af_mean, 6)]
210 | mmseqs_results <- mmseqs_results[order(Seq1,Seq2)]
211 | fwrite(mmseqs_results, GENOME_ALIGNMENT, row.names = FALSE, append = TRUE)
212 | unlink(x)
213 | }))
214 | }
215 |
216 | # Remove intermediate files
217 | if (DELETE_INTERMEDIATE_FILES) {
218 | writeLines('Removing intermediate files...')
219 | unlink(INTERMEDIATE_FILES_DIR, recursive = TRUE)
220 | }
221 |
222 | # Remove mmseqs temp dir
223 | tryCatch({
224 | unlink(MMSEQS_TEMP_DIR, recursive = TRUE)
225 | }, error = function(e) {
226 | if (!grepl("no temporary mmseqs dir found", e$message, ignore.case = TRUE)) stop(e)
227 | })
228 |
229 | # End
230 | writeLines("\n\nSuccess! Thank you for using MANIAC!\n\n")
231 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # MANIAC
2 |

3 |
4 | ## 1. What is MANIAC?
5 | MANIAC stands for **M**Mseqs2-based **A**verage **N**ucleotide **I**dentity **A**ccurate **C**alculator. It is a bioinformatic pipeline, written using SnakeMake, for rapid and accurate calculation of average nucleotide identity (ANI) and Alignment Fraction (AF) between viral genomes. The goal of MANIAC is to provide a user-friendly and efficient tool for researchers in genomics, bioinformatics, and virology. MANIAC has been developed and optimised for bacteriophages but in principle can be used on any microbial genomes.
6 |
7 | Publication: https://doi.org/10.1101/2024.04.23.590796
8 | Contact: jade.leconte@uj.edu.pl
9 |
10 | ## 2. Features
11 | - High throughput: MANIAC can efficiently process large datasets (thousands) of viral genomes.
12 | - Accurate: Uses MMseqs2 to ensure accurate calculation of average nucleotide identity (ANI) and alignment fraction (AF).
13 | - Comprehensive: Provides analysis at both nucleotide and amino-acid level.
14 | - User-friendly: Easy-to-use Snakemake workflow.
15 | - Reproducible: Conda-based installation support ensures reproducibility.
16 |
17 | ## 3. ANI calculation
18 | ### Fragment mode
19 | 
20 |
21 | The standard and quickest way of ANI calculation is based on the approach proposed by Goris et al. for bacterial genomes [1]. Specifically, each query is chopped into short fragments of pre-defined length (by default 1020 nt). Then, each fragment is aligned with the subject and the best hit is found – but only if the query coverage is at least 70% and the sequence identity is 30% across the entire query length. ANI is then taken as the mean percentage identity of all aligned fragments and query AF is calculated as the length of the aligned query genome (i.e., the summed length of all aligned fragments) to the full query length.
22 |
23 |
24 | ### Best-bidirectional hits or CDS mode
25 | 
26 |
27 | In addition to the standard, fragment-based ANI calculation, MANIAC carries out the calculation using best-bidirectional hits approach should the user provide coding sequences (CDSs) for input genomes, either in nucleotide or amino-acid. The calculation is then carried out analogously as in the fragment mode with the following differences:
28 |
29 | 1. CDS are being used instead of fragments
30 | 2. To calculate ANI and AF, in both query and subject only CDSs which are each others best hits are considered.
31 |
32 |
33 | ## 4. Install
34 |
35 | ### Linux
36 |
37 | Create and activate a conda environment.
38 | ```
39 | conda create -n maniac -c conda-forge mamba python=3.9
40 | conda activate maniac
41 | mamba install -c conda-forge -c bioconda snakemake pandas biopython=1.79 mmseqs2 r-base r-essentials r-arrow datamash pyopenssl=24.2 parallel=20240922
42 | ```
43 |
44 | Clone MANIAC repository.
45 | ```
46 | git clone https://github.com/bioinf-mcb/MANIAC
47 | ```
48 |
49 | Test using example input data and configuration files in the `test` folder.
50 | ```
51 | snakemake --cores 8 --quiet --snakefile MANIAC --configfile test/configs/easy-fragment-based.yml
52 | snakemake --cores 8 --quiet --snakefile MANIAC --configfile test/configs/easy-cds-aa.yml
53 | snakemake --cores 8 --quiet --snakefile MANIAC --configfile test/configs/easy-cds-nt.yml
54 | ```
55 |
56 | ### macOS
57 |
58 | Install dependencies using [homebrew](https://brew.sh/)
59 | ```
60 | brew update
61 | brew install bash
62 | brew install coreutils
63 | brew install gnu-sed
64 | brew install gawk
65 | brew install parallel
66 | brew install datamash
67 | brew install mmseqs2
68 |
69 | export PATH="$(brew --prefix coreutils)/libexec/gnubin:$PATH"
70 | export PATH="$(brew --prefix gnu-sed)/libexec/gnubin:$PATH"
71 | export PATH="$(brew --prefix gawk)/libexec/gnubin:$PATH"
72 | export PATH="/usr/local/opt/mmseqs2/bin:$PATH"
73 | ```
74 |
75 | Create and activate a conda environment.
76 | ```
77 | conda create -n maniac -c conda-forge mamba python=3.9
78 | conda activate maniac
79 | mamba install -c conda-forge -c bioconda snakemake pandas biopython=1.79 r-base r-essentials r-arrow datamash pyopenssl=24.2
80 | ```
81 |
82 | Clone MANIAC repository.
83 | ```
84 | git clone https://github.com/bioinf-mcb/MANIAC
85 | ```
86 |
87 | Test using example input data and configuration files in the `test` folder.
88 | ```
89 | snakemake --cores 8 --quiet --snakefile MANIAC --configfile test/configs/easy-fragment-based.yml
90 | snakemake --cores 8 --quiet --snakefile MANIAC --configfile test/configs/easy-cds-aa.yml
91 | snakemake --cores 8 --quiet --snakefile MANIAC --configfile test/configs/easy-cds-nt.yml
92 | ```
93 |
94 | ### Windows
95 |
96 | To install MANIAC on Windows, you first need to install Windows Subsystem for Linux (WSL) and set it up. Once WSL is installed, follow the instructions for installing MANIAC on Linux.
97 |
98 | 1. Click the Start menu, type "PowerShell," right-click on Windows PowerShell, and select Run as administrator.
99 | 2. In the PowerShell window, enter the following command `wsl --install` to install WSL.
100 | 3. Restart Your Computer, choose Linux to launch and follow the on-screen instructions.
101 | 4. Once your Linux environment is ready, follow the [Linux](#linux) Debian-Based installation steps to install MANIAC.
102 |
103 |
104 | ### Dependencies details:
105 | MANIAC was successfully tested on Linux, macOS and Windows with the following dependencies versions. Make sure the bash version used is 5.0 or above by running `bash --version` and that all dependencies were successfully installed with homebrew and/or conda.
106 | - python=3.9
107 | - bash=5.2.21
108 | - r-base=4.4.1
109 | - r-essentials=4.4
110 | - r-arrows=17.0.0
111 | - snakemake=7.32.4
112 | - pandas=2.2
113 | - biopython=1.79
114 | - mmseqs2=15.6
115 | - datamash=1.8
116 | - pyopenssl=24.2
117 | - parallel=20240922
118 |
119 | ## 5. Running MANIAC
120 | This section will guide you on how to prepare your input files, create a yaml configuration file, and run the MANIAC software. We'll also cover the types of output files you can expect from MANIAC.
121 |
122 | ### Input files
123 | MANIAC requires one of two types of input files:
124 |
125 | 1. Full genome files (for the fragment calculation),
126 | 2. Nucleotide or amino-acid coding-sequences (for the BBH calculation).
127 |
128 | Each file should be in FASTA format. The header convention for CDS input should be the genome name, followed by a `_CDS` string, followed by its unique suffix. For example, if genome named **XYZ_phageVp123** has three coding sequences, the input file headers could be
129 |
130 | `>XYZ_phageVp123_CDS1`, `>XYZ_phageVp123_CDS2` and `>XYZ_phageVp123_CDS5`
131 |
132 | Examples of input files are located in `test/data`.
133 |
134 | ### Configuration file
135 | MANIAC uses a yaml configuration file to set the workflow parameters. Here's an example of what a simple configuration file might look like:
136 |
137 | ```
138 | INPUT_FILE: "test/data/fragment-based.fasta"
139 | OUTPUT_DIR: "test/output/FRAGMENT-BASED"
140 | MODE: FRAGMENTS_NT
141 | FAST: False
142 | MEMORY_GB: 16
143 | ```
144 | Here are details of various parameters.
145 |
146 | #### Parameters: required
147 | * `INPUT_FILE`: full genome or CDS file
148 | * `OUTPUT_DIR`: directory where the output should be written
149 | * `MODE`: FRAGMENTS_NT requires full genomes as an input, while CDS_NT and CDS_AA use BBH to calculate ANI and require the input to be CDS (nucleotide or protein respectively) [FRAGMENTS_NT | CDS_NT | CDS_AA]
150 | * `FAST`: Enable Fast mode. Fast mode will automatically overwrite some parameters to prioritize speed over accuracy (KMER: 15, FRAGMENT_SIZE: 1020) [True/False]
151 | * `MEMORY_GB`: Declare the memory available for MANIAC in GB. Note: This will not actually limit the memory and is only used to optimize post-processing run speed. (default: `16`)
152 |
153 | #### Parameters: specific to fragment mode (optional)
154 | * `COVERAGE`: minimal query coverage used for filtering (default: `0.7`)
155 | * `IDENTITY`: minimal query identity used for filtering (default: `0.3`)
156 | * `FRAGMENT_SIZE`: length of the genome fragments to be used in search (default: `500`)
157 |
158 | #### Parameters: specific to BBH mode (optional)
159 | * `HOMOLOGS:` BBH & homologous CDS definition
160 | * `IDENTITY`: (default: `0.3`)
161 | * `COVERAGE`: (default: `0.7`)
162 | * `CONSERVED`: conservative CDS definition
163 | * `IDENTITY`: (default: `0.8`)
164 | * `COVERAGE`: (default: `0.5`)
165 |
166 | #### Parameters: others (optional)
167 | * `DELETE_INTERMEDIATE_FILES`: [True/False] (default: `True`)
168 | * `MEMORY_EFFICIENT`: mode used to run in a memory stringent manner. Only loads table columns that are important for the analysis and drops all columns that are not used for ANI calculation [True/False] (default: `True`)
169 | * `MMSEQS_PARAMS`: any additional parameters to be passed to MMseqs2 search, default values calibrated with Pyani
170 | * `EVALUE`: (default: `1e-15`)
171 | * `SENSITIVITY`: (default: `7.5`)
172 | * `ZDROP`: (default: `40`)
173 | * `MAX_SEQS`: (default: `10000`)
174 | * `MAX_SEQ_LEN`: (default: `65000`)
175 | * `KMER`: (default: `11`)
176 | * `SEED_SUB_MATRIX`: (default: `scoring/blastn-scoring.out`)
177 | * `SUB_MATRIX`: (default: `scoring/blastn-scoring.out`)
178 |
179 | #### Parameters: recommendations
180 | For full genome and nucleotide CDS mode, the alignment scoring matrix should be provided. Matrices for the blastn and unit-scoring modes are provided in the repository. Please note that the sensitivity parameter will not matter for nucleotide-based calculations, only k-mer size will. If FAST is enabled, k-mer size will be forced to 15.
181 |
182 | For amino-acid calculations, no scoring matrix has to be provided but a more sensitive search is recommended (such as `-s 7.5` or higher). Please refer to the original mmseqs publication [2].
183 |
184 | Examples of input files for different calculation modes are located in `test/configs`. A minumum working example is provided, as well as different examples with more complete sets of parameters for advanced users. **We strongly recommend against changing the mmseqs input parameters** as they have been optimised for different calculation modes.
185 |
186 |
187 | ### Running MANIAC
188 | After your input files are ready and your configuration file is set, you can run MANIAC as follows:
189 | ```
190 | snakemake --cores 8 --quiet --snakefile MANIAC --configfile your-path-to-configuration-file.yml
191 | ```
192 | where `your-path-to-configuration-file.yml` is the full path to your configuration file. The type of the configuration file will determine whether MANIAC runs in the fragment mode or the BBH mode. `cores` should be adapted to the machine you are using to run MANIAC.
193 |
194 | If MANIAC ran into an error because of an issue (such as wrong installation) that you were able to correct, please delete the output directory before trying to run the pipeline again.
195 |
196 | ### Maniac Output Description
197 |
198 | Maniac generates output files in the user-defined output directory. The `genome-alignment.csv` file contains the ANI results along with associated metrics. The file is a table with fields detailed below:
199 |
200 | | Metrics | Description |
201 | |----------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
202 | | **ANI/AAI** | Average nucleotide or aminoacid identity between the query and reference sequences |
203 | | **len_1** | The length of the query sequence |
204 | | **len_2** | The length of the reference sequence |
205 | | **ani_alnlen** | The total length of aligned nucleotides between the query and reference sequences |
206 | | **af_1** | Alignment fraction of the query sequence calculated by dividing the aligned length by the total length of the query sequence |
207 | | **af_2** | Alignment fraction of the reference sequence calculated by dividing the aligned length by the total length of the reference sequence |
208 | | **af_min** | The minimum alignment fraction between the query and reference sequence calculated by dividing the aligned nucleotide length by the shorter sequence between the query and reference sequence |
209 | | **af_max** | The maximum alignment fraction between the query and reference sequence calculated by dividing the aligned nucleotide length by the longer sequence between the query and reference sequence |
210 | | **af_mean** | Mean alignment fraction between the query and reference sequences. It is calculated by averaging the alignment fraction of both query and reference sequences weighted by their length. Users can also calculate `af_mean` by considering the alignment fraction between pairs since the results of MANIAC are asymmetrical i.e (af_1 + af_2)/2 |
211 | | **af_jaccard** | The jaccard index of the alignment fraction calculated as the ratio of the aligned length to the total length of the union of the query and reference sequences |
212 | | **wgANI/wgAAI** | wgANI/AAI is the whole genome ANI/AAI. It is calculated by multiplying ANI or AAI by the mean AF |
213 | | **seq1_n_prots** | Number of proteins or CDS in the query sequence |
214 | | **seq2_n_prots** | Number of proteins or CDS in the reference sequence |
215 | | **seq1_n_prots_hom** | Number of homologous proteins or CDS in the query sequence |
216 | | **seq2_n_prots_hom** | Number of homologous proteins or CDS in the reference sequence |
217 | | **seq1_n_prots_cons** | Number of conservative proteins or CDS in the query sequence |
218 | | **seq2_n_prots_cons** | Number of conservative proteins or CDS in the reference sequence |
219 | | **cds_alignments_counts** | Number of alignments proteins or CDS between query and reference sequences |
220 | | **cds_alignments_ani_sum** | Sum of nucleotide or aminoacid identities of aligned proteins or CDS between query and reference sequences |
221 | | **min_prots** | The minimum number of proteins or CDS between the query and reference sequences |
222 | | **wgrr** | wGRR is the weighted gene repertoire relatedness. It is calculated as the ratio of bi-directional best hits between the query and reference genomes weighted by the sequence identity of homologs (CDS or protein homologs for the CDS or protein mode respectively) |
223 |
224 |
225 | ## 6. References
226 | 1. Goris, J. et al. DNA-DNA hybridization values and their relationship to whole-genome sequence similarities. Int. J. Syst. Evol. Microbiol. 57, 81–91 (2007).
227 | 2. Steinegger, M. & Söding, J. MMseqs2 enables sensitive protein sequence searching for the analysis of massive data sets. Nat. Biotechnol. 35, 1026–1028 (2017).
228 | 3. Johannes Köster, Sven Rahmann, Snakemake—a scalable bioinformatics workflow engine. Bioinformatics, Volume 28, Issue 19, Pages 2520–2522 (2012).
229 | 4. Richardson N, Cook I, Crane N, Dunnington D, François R, Keane J, Moldovan-Grünfeld D, Ooms J, Wujciak-Jens J, Apache Arrow. arrow: Integration to 'Apache' 'Arrow'. R package version 17.0.0, https://github.com/apache/arrow (2024).
230 | 5. Tange, O. GNU Parallel 20240922 ('Gold Apollo AR924'). Zenodo. doi: 10.5281/zenodo.13826092 (2024)
231 |
--------------------------------------------------------------------------------
/test/data/cds-aa-based.fasta:
--------------------------------------------------------------------------------
1 | >PHAGE1_COMPLETE_CDS_1 core
2 | MRENYYYGQGKVFLAPRDNKRAFRWVGDVSTLKIAFSYEQQITKASRGGQLYQNEKIITG
3 | ASGSVSSTWHNFSVENLALLLGAQPVDEPFSFNQQYALPNGIVKGDIIALPHTTVFNVSI
4 | NGLNRGADYIVDRQFGTIEFLVTPETPELIAGYDHLFNQWLPFFSVKPQEFHLRFQGVNL
5 | AEDTAPVLLELYRVSVDPLATLEMISSGTDIAGMDMTSLILPDFNQQTGTAFSYFGRMQL
6 | IAPQSPQPPQIALTYDGRANYDGQYQYRGK
7 | >PHAGE1_COMPLETE_CDS_2 core
8 | MGGKGSKKVTVGYRYSWDVQAGLGRGPVNEIVSIMADKKTVFAGTPGQISSSTSVYIDKP
9 | GLFGGDDTGGEGGIQGQLDIMMGEPDQVPPASLLKLLTGLVPGFRGVVTTFFSGLVSCYS
10 | ASPKPWLYRVRRTTKGWDGDVWYPEKATIMLENSEAQLDDDSDLLSEQLANLRAIHAMNP
11 | AHILVECATNRDWGRQLTLADDLNLDSYRAVADTLYEEGFGLCFRYNRQDGLDTFVQQVL
12 | DHVGAVQYADLETGKLTLKLLRGDYRVDDLPLFTYDNGIIAVQDDDSATTTSNPNEIVVT
13 | WNDPVTNADGEVRAQNLGAIQNSGLNSSSVEYRAIPTHSLAARVAQRDLETAQSELTRLV
14 | IQFDRRGGILRPGDVFRVKLPDRNIDNMVLRVGKIEEGDTGVLTLTVVQDVFGLPSTSYS
15 | SGQQDSGWTPPDKSTRPVTTQRLIDLPYAVLAGTLSQADLSYLKPESGHLGVMAVAPTSL
16 | SINYQLQTRAAGAAFADRGQGDWTPSGTLTSPVGRLDTVLHVNIDSFPAVGDGLIVNDEI
17 | MRVDAVDRLAGTITVGRGCMDTLPSGHFAGDRCWAYQDALDSDGLEYLSGETVEVRLLTR
18 | TSAETLAESAAPVVTLTMSGRQARPYLPGNIRVNGVLYPDVVASTDKFTLAFSHRDRLLQ
19 | ADRLIDCTENSIGPEPGTEYVVKLIAQGTGAEVWSMATSDASIPIPYVTGGAGAGVHTLT
20 | LKSRRDGLLSLYTFRAGLPAGRYKAFPITVTMNLTIVDGHDWATATPEDTTTGAVPELHA
21 | IADAAGITVWYPPDLVSSGLSIPADDTEWPSGTWPTSPWSFGTHPVLAISQWTETSGPLT
22 | VLALQGDISALLLDSATGSAAAIEWDAGIYLEEMDITIFTTDGPVLNEGIISLKLTERSI
23 | GP
24 | >PHAGE1_COMPLETE_CDS_3 core-elongated
25 | MWITMEHIRAGGGCAWGLRTFFSRYNLDLQAFIRDGGIDSDMLAGTGDALAIKIVELAQQ
26 | TQKEAGA
27 | >PHAGE1_COMPLETE_CDS_4 core-elongated
28 | VDPFSWAAVAKFVATLVASYVLNSALAPKAKNSTPEAATEEDWNMPMPDEGTPQCVFFGD
29 | CWTADWFVLGYGNYRYQAIKK
30 | >PHAGE1_COMPLETE_CDS_5 single
31 | LGCGRQICCHACRIVCAEFRTGSQSQKLHAGSGHGGGLEYAHAR
32 | >PHAGE1_COMPLETE_CDS_6 core
33 | MSWSEFEYSVADGQPLTLYEFRLGDSLFWRYSNADKDIDFAGQVWEAQAISNSGLSSGSG
34 | DGMDITVPASNEVALLFRATPPSRAVRVRVMRWHATDTSGEFRVVWIGEISSVKREQIES
35 | CKLLTISLASTFSRVGLRLTFGRQCPYALYDHNCRVDPLQFAVSGVGVTALDGSSITASL
36 | PAGLASDWFSGGYIEFDRNGYTERRGLRTQDGNTLHLFGGTTGLQVGQSVTLYPGCDRTI
37 | ATCDSKFSNHLNYGGQPHMPGKSPYTIIKLF
38 | >PHAGE1_COMPLETE_CDS_7 core
39 | MTTLFPWLADPDWSRGVTETLEWKTDVLQSPTGAEQRISRRLSPRRTFEFTALVHDTGRQ
40 | RFENMLWQGCAGTWAMPVYPDIYALPAAVSSGATAFSIPTAGRDFSVGGTVLLKTSESPD
41 | ATSRMATIAGITGDTLQLGSPLTDSWPAGSLVYPVRPAVLTEPPSLSHLTDIVTTAQVRF
42 | RIAEHNAFSDAPMLTQYRGHPVLESETDWGESVSSCYQPLTRELDNGSSVPFRIDTAGRP
43 | FWRQTHSWFTANRPAQTSLRQLLWYLRGRQRPIWVPGQTLDFSPTSAISGNAVDVVEAGF
44 | TELGIHPGRRDICILLTDGTRHYRRITSVSLVSGAERLVLDGDAISAVQHQIVSISLMTL
45 | ARQDADSVSWEHVTDADGVARVATTFTGVRDELE
46 | >PHAGE1_COMPLETE_CDS_8 core
47 | MADNSTLNLMLKIRADLADASRALQGLAGDVEDVGSAATTSSQKLSTTARAQDSVAESAR
48 | GHAHAEQSASVAASQTGDAVQQAATDYAGYQAAIARTRAEMGSLQSGMDGTTADIDAQRA
49 | ALTALVNRIDPVVAAYGRLDDMQEQLTAFRGAGLVGDDDFEQYSSRLNELRLQVEKSAYA
50 | ATDAGRKEAAAAREAAQAEAQAAAAKEQFINLLREQAETMNLTTAELLQYKAAQLGISAE
51 | AAPFIQKITDQNAAMSKGGISAGQYAQAMRYLPMQITDVVTSLASGMPVWMVAIQQGGQI
52 | KDSFGGIGNTFKALTTLITPARIAMGGLVGIVAAAGIAAVSAMNDQDEFNRSIQKTGNYA
53 | GVTSGELEQMVQQGGQLRGNYSQVRDILNGLVSSGRFTGETLTSVAQAATLMAELSGQSA
54 | DEVVSSFLKMNDGVTSWAANTTQQYHFLDLETYQRIQSLEDQGRKEEAIEVASQAFKKAS
55 | EERLRTMEQQLNRAARAWNNVKIAATGAWESFKDKAGGALGLDAPADELTNKIKELEAKI
56 | AAAGSDTEVAMQPREYQESVKQYKADLAALKEKQQAEEKAIAAEAKRKQTDAESIAAAEK
57 | LQKLWKGNRSELEKEADAVEETRKNYETLWKSASGRDMLQSRGVTSTDGKNFSGGQWDID
58 | TKALDKSGQKAEQYNKQLQQTLNQKKAITELDRVEAEIRNGTLSSATKAQQDEARALAKK
59 | IDAANAANKATKEGQSLARQQETSNKNFVKQLEDQAAKRTQGAAATRAQEIATRNLTAEQ
60 | RRQAEAANAAITAQEFKGQNLQLQLEYMRDTGDTAGASMLELQNRVSDLRREFEASGNTE
61 | GLNWLDKLLPVAETKIRVDDLKKQLDDLFTWQSQQETSIQAQVQGGLLNEIQGRQRLVQL
62 | HQEVGDKIKGYLPQLKEMATAPGEAGDKIREMIRQLEEELGKLNQAGNELTQAFRDGLQS
63 | GIESSLMGLAKGTMNLRDAVKNLALTIINSMAQLAAQQLAQMATSSLIGSSGGIGGLLGS
64 | VFAADGGQVRGPGSTTSDSIPAMLSDQEFVTRAAVVQQPGALDFLHAFNRHGMAAVEGWL
65 | PRVRHASGGLAGIPAQNMPVPASVPETAMATPAAASLQPISLQQQLVLDPSEVYTAGAQT
66 | LAGQRQFITSLKAQVPTLKQWLGLNK
67 | >PHAGE1_COMPLETE_CDS_9 core
68 | MSKQKPADTEDELSVLLSTRNITIAGRGLVIREYTLVDMLQLGDKLDALTHSLAEVMQTP
69 | WPLIEEIEAVLRKHAGDIPELIACSVDQPVQWVALLPAGEGQSLIDWWWTQNRRFFMNAV
70 | VRLETIRATRAKLSASAASSQPSSGPDTTRAGSETTHSAS
71 | >PHAGE1_COMPLETE_CDS_10 core
72 | MAQLETYYYGQGKVFLARRLANGKPGAFRWVGDVSALSLALTVERLNHKESYSGRRGTVR
73 | SFVTNQDGTLTSTWHDLAPENLAVVLYGEQVVIPAGTITGELLPAGIVAGERYILDHQRV
74 | SDVVIGTLVEGTDYEVDYTYGAITFLTAQATAPSVNYEYAGSVNTTLFTQQPEDFYLRFE
75 | GINLAEGGAAKVLELYKISFSPASALALIQGDTSLAGLETTSTVLYDNARPDDPTIGRFG
76 | RVIDVAEPVA
77 | >PHAGE1_COMPLETE_CDS_11 core
78 | MPDVKLLQPHTHQGKRFAAGETITVTEAEEVWLRDHQLIGVATPVVSDTQGNRGKSKQQE
79 | PEDNGTA
80 | >PHAGE1_COMPLETE_CDS_12 core
81 | MSSEPFSISLIVERLQPLTPSPLSFLGTIVEYSKVQELSGFAVPGAYVLMGPERGVPGNG
82 | TRAQVAEAVFGVAVAVRNYGQGADGLTHEISPLVGQIRDQLIGWVPGKLATTGIQWLKGD
83 | ILDYDGGTLLWMDTFQVNHVIGGRRCPT
84 | >PHAGE1_COMPLETE_CDS_13 core-elongated
85 | MPYRHKRRRVTAVTWYISLAELADRPGAVELSQVTQLPGKPPARPELLDAVLRGEETTSW
86 | PPAEVAVALEVVERIGGAVEEAQNLIDGYLRQRGYTLPLVKVHPILSSWGRSVVRYKLHQ
87 | HRISDERTDPIVRDYRDAMKLMEQLANGKFSLGATDTQKPAGGPPMVDGPGRTFSMDSLK
88 | DFGK
89 | >PHAGE1_COMPLETE_CDS_14 core
90 | MAKAPFPIDPHLTAIAIGYRNLSLIADSVLPRVPVGKAEFKWWKFDLGQGFTVPTTTVGR
91 | TSQPNQIQFDAEDETSSTNDYALDAPVPQSDIDNAPANYDPLGRATERVSDIIMLDREVR
92 | TSKEVFNAANYPVGNKENLAAADQWDNDASKPIKKIVTALDKMIMRPNVAVLGRSTATAL
93 | RQNPSVVKAYNGTLGEDGLVPLDFLRGLLELDEIVVGSAFVNIARPGQKPVLVRAWANHA
94 | AFIYRNLLADTQGGVTFGFTAQFGSRVSGSIPDPDMGMRGGQRVRVGESVRELIVAQDCG
95 | YFFQNAVSA
96 | >PHAGE1_COMPLETE_CDS_15 core
97 | MNIPGLITCHKAEVALAARRMVTHGTVPDEITLAVDGSKLIIGVTTLVAASVGEPTDVVR
98 | SQLTPVIYGGDVVAGDPLTADAEGRAVPATAGQFYLGFAEYDGAEDDLGSVWIAPGQLAA
99 | AAAGGG
100 | >PHAGE1_COMPLETE_CDS_16 core
101 | MTTAMTTSTAKATLAVFAPGTHTAMDGRTITFTPEDCIDLANSYDPSVSEAPFVIGHPSL
102 | TAPAYGWAERLEFRDGIVYAAPRQVNPAFAKAFNAGSYKKRSLSIYQPDSPGNPKPGHYY
103 | ARHVGFLGAVPPGVKGLPDAQFAEACGDNGPLEFALPWEADNLASLFQSIRDWVIQEKSI
104 | EQADSIIPQWRIQSILDSATDDRKSSISPLAYAEENNVDPNQTTTVTAEELTRRETALAE
105 | REEKLRRDEEAAKQRDAKVRRDAVISYADGLVKAGSILPRQKNTVVEVLLSLDSTPLSFA
106 | DGDATVNKTPEELLRDVLSQRPKVVDFSEKTGADDDPVDFADASALATAAQNYQAEQAKQ
107 | GRTISMTDAVNHVKKGAQQ
108 | >PHAGE1_COMPLETE_CDS_17 single
109 | MTQSTLATRIIIRFNTVPGPVPPPRGPSVLSSVLFLSARGQITPSAQFVMMSP
110 | >PHAGE1_COMPLETE_CDS_18 core-elongated
111 | MSGVTLTFNAQDALSKLWDARADMMRPEPLLRSMGERLLEFHQQRFRDQTSPEGVKWKEL
112 | SWRYKQRKRKNRDQILTRDGYLRNTLRWQVNADELLFGTDRVYGAIHQFGGTIEIAARSQ
113 | QAYYRKKKDGELDNQFVRKNKSNFAQWHTIPAYKIKIEARPWLGVSKAEGATLIDMAKNY
114 | LQGAFN
115 | >PHAGE1_COMPLETE_CDS_19 core
116 | MAGNVSYGSLPFSEQFAFFRRKFNTKTDAWTDVYGSAHDNEFMVAGANRDALLADLRTAV
117 | EKSLDGGTLETFRKDFAAIVARYGWSYNGGFEWRSRVIYETNLRSSYMAGRYQQLMAMRD
118 | THPYWEYVHSDVVEHPRQEHLGWNGMVLRADDPWWIYHFPVNAWGCQCSVIARTEDDLKR
119 | IGKDGPDTAPPIKFITREIGQRSPGGPRTVIVPEGIDPGFEHTPGRSRVFSEVPPPRSQN
120 | PVSDGPFTPVAEPPATPAPLPSPRPAPVTEAETDPVDAFLQLFGATADRDAAFRDPTGQR
121 | LAIGHDMFASSNGQGQIPLTLAQALQLAETIRNPDEIWAQIVWLPEVQQSLVRRYYLARL
122 | QQDGGADPLSVVFATGRDGWTGNISTDDTLLQSLRQGIKLWSRED
123 | >PHAGE1_COMPLETE_CDS_20 core
124 | VSRKKQNRRVSSQPSTPRPELGREFASTGDGRDITRPWIGALALSDDSVLQHRGAPDLKI
125 | YREVLSDDEVKSAFSQRQDALISREIKVEAGGERPVDIEAADAMRQQIDALGFDRITRLM
126 | HYGVFYGYAVSELIYGIRDNLLWIDDIKVRDRRRFRFSPKGELRLLTPQNMMAGEPCEGP
127 | YFWSFSTGADHDDEPYGLGLAHWLYWPTFFKRNDIKFWLIFLDKFGMPTVAGKHPEGATP
128 | EQKRNLLALTRAISTDSGVIMPEGMTVELLSAARSGAADYQAMYNAMNEAIRRVTVGQIS
129 | SSGGAAKGIGGNESLQDKVLDSIVKADADVICESWNRGPGKWFTEFNFPDAAVPVVSRVF
130 | EEAEDLKDRAERDKTISETTGYRPTLATIKETYGGEWEPRPEQQVSVPRAAAPSSFAEHD
131 | PDHNDTSTLMAGRLNTELRPVMDGWINQIKALVDSAETADELRDGLTALIPDMSLDDYAR
132 | ILGEAMSAAALAGRNDLLEEMNGR
133 | >PHAGE1_COMPLETE_CDS_21 core
134 | MKPLASTIRTVEWDELPARAREIPFGFNPFADGVLMAHQVECLKYDVSILAIPKGRRTGI
135 | TFAWGLNSTLIAGAQKAAGGDNVYYIGDTKEKGLEFIGYVAKFARVIAAQQAQDVSAIEE
136 | FLFEDQDEQGNTRMIAAYRVRFASGFQVAALSSRPANIRGLQGVVIIDEAAFHADVQGVL
137 | DAATALLIWGGRIVVISSENGKNNPFHQFCKDIEEGRYGDDAAVLRITFDDAVTNGLYER
138 | VCAMKGEAATVEGKKTWYNRIRNAYGPRKAAMREELDAIPRDGNGICIPGVWIERAMPEE
139 | RPVIRLALDDDFIHMTEAERAAWGNDWIDRELRPVMAETLNPELRHVFGMDFARHRHFSS
140 | IVPMAIMQNLCRDVPFLLELNNVPSALQQQILFWLIEHLPRQSGGAMDATGPGMVLAEYT
141 | ADRYGRPRIAEITLNRKWYGFWMPKFTSLFEDSMIILPRDENTAQDLRAVENIDGVPMVA
142 | SLEKKDLKDPELVRHGDTAIAGCLANYAALNLATEIAFESTGERDIFRVLSGFGDSSSAG
143 | EFTDTGFGTVRGINDFGGFL
144 | >PHAGE1_COMPLETE_CDS_22 subcore
145 | MGRKSTIHRLEPDVRAHIERRLREDRMTLDELLSDIHEHFPGEETPSRSALGRYKQNFGL
146 | LVDRMRQQDQMARLLVSELGENPDERAGALMVQAVTTLTTHAAFTAQQEEDPDIDTVRHL
147 | ARAAKDVLQSRKASLDERREIERAARERLLREQEENLKETARAQGLSEDQVQFWRERVLG
148 | IK
149 | >PHAGE1_COMPLETE_CDS_23 subcore
150 | MSFSDYLRTDMRLVILRILSEMPSYSSNSSVIWSVLTRYGHSPSRDQVKSELRWLEEQGL
151 | VTVEDIETVLVARLTERGADVATGRAVVPGVKRPGAGG
152 | >PHAGE1_COMPLETE_CDS_24 single
153 | MVSSLDPQHTFTPELHLVFTQSLCPGGLFQVLFLLTQQALTRGTLNLAALVETGLTRLQD
154 | IFCCPGKVTHCVDIRIFFLLSGKGGVGGERGDSLNHQGTSAFVRVLSQFADQQTGHLVLL
155 | AHPVNQQAKVLFVATQRTAARGFFSGEVLMDIGQQLIERHAVFTQAALDMSPNVWFKAMD
156 | CRFTAHISLPRRDASRQARLHDRWPHQHRVRSTGQPAPSRYLQPSPALALQASAVLISPD
157 | RGSVNGHSVSVPTR
158 | >PHAGE1_COMPLETE_CDS_25 core
159 | MKFEELQFSWQVLQWAVLSAIGIYSWIVGRQSASNRELLELRTRLASIEAQVAQMPTQRQ
160 | VSILLEKLSSTEASITGMNNQMSGMASRLETINNYLLNTK
161 | >PHAGE1_COMPLETE_CDS_26 subcore-elongated
162 | MVFPAEMTVKCPPPEEADDNSMDASAVALKKLYDLYGICAGRHADLIHYIQNLQGK
163 | >PHAGE1_COMPLETE_CDS_27 core
164 | MTLTDGIKLLARYLLLAASGLGMIWFIWHQGYERGAGDVRLEVANQKTQEAGDALQKFID
165 | GARLLTAEANKAGNALAAQVAARQAADQQSTREIKDALKKTASQRVMCVFDDDVMRLLRE
166 | ARQRAATAAADGLSGGDDRKVPAPGGGGR
167 | >PHAGE1_COMPLETE_CDS_28 core
168 | MTRLALSVILLCLLSGCHPALAASIPVEARQYQRELTRNARVVWGLNAPVSTFAAQIHQE
169 | SQWNTRARSPVGAQGLAQFMPATASWIAGIYPDQLKDHQPYNPSWSMRALAQYNWWHWQR
170 | ITGTASDCERMAFVLSAYNGGLGWVQKDRRLASSRGMDASRYWNHVEKVNAGRSAANFRE
171 | NRGYPLKIIYTWQPLYLAAGWGPGECYDAD
172 | >PHAGE1_COMPLETE_CDS_29 core
173 | MSLLHKVRHQRLRNWIILAVALLAAIAVISPEQLGVTLYKLSLVSIAAILGYHLDRALFP
174 | YASPGSYLIDDWKENLGKPVPVNRNEPEFPVATGYELIFAAVLLRRALIVAAICLGVTMG
175 | L
176 | >PHAGE1_COMPLETE_CDS_30 core-elongated
177 | VTISFETLTELYRRMEFREECRAGSLSLSDQPDCDLISRLLDDPGEYGLSVESGTLSPGS
178 | SIALRIAPPRKGLGVVFATYNSLLSDPKNQCQEPAKYFILEAKFRYDDAQVPAFINNYRA
179 | ILKFVDLLKQTAAYFDGSTCQLVFLRKEVIKLSPLFSAETVQNLKRDHLDNLIACFNDDT
180 | HKDQKLDILIESIQAVCEDIDPQRMFAFLLDNVQRLHDKFLKGYRIYSSGFSYDKVMDQL
181 | RTAKVEEMGKIHKAFSDIQNHILGIPVASVIVATQFKEASVWSSQGITNTIILLGCMFAA
182 | TLIWLALSNQMQSMKALNEEIEYKKRQVNKEYSFIKDDVNGVFSSIKTRLETQKRTFWII
183 | RGVLIIGIITAVVVYCWYTKPVLYWIQFLVAKFFPLACS
184 | >PHAGE1_COMPLETE_CDS_31 core
185 | MTEQAIEVKHVVVHVLVKQQNGDASEELSPEERQVTEASLRLINDICAKYAGRTGKGYGY
186 | FEGDVDNYPMERMAGDYLDGADDFYQSSCRMMHHLTERSQRENMATGGFVLFSHIMIGHN
187 | EHLLIAIVNATIGSTVDADFTIKDSTYLDIAKLRMAGRIDLTAWKSDAERYISFLKGQGN
188 | VSNYFKQFLGCNDVLIAKRESEKLRDTLTAFAAEQNLEGAEKDTFLKTAFEHLNALSKAS
189 | EPLNLETFVNAVWPQAPEELSGKLASEELELSDGFVPDGRVIRALVSFKGKSKHWELKFD
190 | REGTTTGDIDYNPETNIITLRNVPDDFREMWLNEV
191 | >PHAGE1_COMPLETE_CDS_32 single
192 | VLDFNGLLSHWLSILLCLLFDDKQLSLCSEWELIYFFN
193 | >PHAGE1_COMPLETE_CDS_33 single
194 | VLAFLEKERPAGCANIPQASNTQHVPVSHIMAQSVSRDRFSLLHFHSMKKAYRI
195 | >PHAGE1_COMPLETE_CDS_34 core
196 | MKSQFLPVLPWMGGKRRLARHILPLFPSHTCYVEPFCGAAALYFMKEPSKVEVINDIHGE
197 | LINLYRVIKHHLDEFVRQFRWALVSRQIYRWMKDTPEETLTDIQRAARFFYLQKQAFGGK
198 | VADHTFGTTTTSAPRLNLLRIEEELSLAHLRLSRTTIEHLDWATCIKRYDRPHTLFYCDP
199 | PYLKTEGYGVEFGLEEYVRMAELARTISGKMVISVNDIEEMREAFSGLRTQKIDVRYSLQ
200 | TSGKAELKRELIISNF
201 | >PHAGE1_COMPLETE_CDS_35 core
202 | MKFLQIHTRFFMGKRYLLLLLSVSPLILSGCDGAQKAATEYVAQQMRDPSSAKFRDVQTV
203 | TDSADPYVQNICGFVNGKNLFGAYTGEKRFVVEIFTGPQKSTFTPLRFIVEGDDNQSRRA
204 | TKGSNKTPHPQTLFEEQYWNKRCLTDKRKPTYSGVSWKTPFELCREKILSSLEIKGDVPV
205 | VADNSINGDSILEWYGDNEFFVPTSTPNVMDLRRVRCVVNNASGEISEFNVF
206 | >PHAGE1_COMPLETE_CDS_36 core-elongated
207 | MCVRLWCRKTHQSILKCRKTHMIGLRIKEERERLSLTQQGLADAIEIAKRTLIDWEKGRT
208 | SPNAVQLSALSGLGVDVLYIVTGVRNASLPAISTTSSLTKRQSALLDNYENTDEAGKKII
209 | EATAAEVAQQIGLVKRSG
210 | >PHAGE1_COMPLETE_CDS_37 core
211 | MTAEQVKAQFRQRGITFTSWAEEHGYTRNEVYRVLNGQTKANYGKSHEIAVKLGLKSGNV
212 | AA
213 | >PHAGE1_COMPLETE_CDS_38 core
214 | MTKAITSTSGGRILRVLKALKGSSLSGRSNSELAKALDESPANINRALNTLIEEGLAQKL
215 | DNGRFALSVQVLQIAVAHSNEIARAQGRIDELNQRVIAGSR
216 | >PHAGE1_COMPLETE_CDS_39 core
217 | MARTKQQTSELTPDVTLNPELESTQNLMAIVSGQMNDERDLLNQLLGQAQMADAFEQFSR
218 | TVRTSKLAFVKENKLYRNLKGKKTPNGSEFLGTWDEFCSVLGISVDKADLDIANLTAFGE
219 | EALESMSRMGIGYRELRQFRRLPEDQKSALIEVAKEGDKAALLDLAEEMITKHAREKEEL
220 | KTDLEISRQMLAEKKEELGTMRNEKEELKARLVRRSTTETPDEEGVALETEVTGFKSGVI
221 | SAFFDLKSGFNALTEHTERTGINHTGMMAGLLDDLQAQFEELRQEFSLPEARETSVIPDW
222 | VKEAQQEDENNG
223 | >PHAGE1_COMPLETE_CDS_40 core
224 | MNPVLTQRLVAIAEAASAAGHGNKEAVYQAACEELCMSRATLLKKLNAVRLQKPRKQRSD
225 | AGDSALTRDEAMTISGTLMETIRGTGKRTLSVEKAINSLRDNGLIVSGRLDETTGEIVPL
226 | SASAIIRALRQYRLHPDQLRAPAPAVQLASRHPNHVWQLDASICVLYYLKNPAKGVKGDT
227 | GLRIMDEKEFNKNKPANVAKVVNDRVWSFEGTDHTTGWIYLEYRFGGETTENFTSVLINM
228 | MQERGGADVLHGVPKVLFTDPGAALKSPTMGNLCQALGIRLIAHKARNARATGSVEKARD
229 | ILERDFEHGLRFCRVESIDELNRLARLWRMKFNRSAIHSRYGMARTDKWLLITEEQLVKA
230 | PSVEVCREAAVSAPVSCKVDSFIRVRFRGRQYDVSAVPGVCVNDRVMVARNLYRDDQAQV
231 | VMTGEDGLKSFFLVDEVQKDEHGFAVDAPVIGESFKPLPQTVAQQHLDEVEQHVFGTASK
232 | EETEAARKGKSLPFGGRFNPYLDIERDDHPTYLPKRGQESQVRGPRIEQRPLSHVEAAKL
233 | LRERLTAAGHNWFPEHYAQLVSRFPEGVPAEDIDAIVQELAGNKAPRLSIVNGH
234 | >PHAGE1_COMPLETE_CDS_41 core
235 | MLVLKDLMKQHGIEQTEVAAAAAVSQPAVSQLINHGIWPKRRPEEVRQKIMTFLASRGLG
236 | QELSRAFDEVLTAEPASTSVPQQANNEEDENMLLAKQVLNPVTKKQFGIFRDPFADDAMQ
237 | SSEDVFTTPDIRYVREALYQTARHGGFMAVIGESGAGKSTLRRDLIERIHRENTPVIVIE
238 | PYIIAMEDNDNKGKTLKAASIAEAIISTIAPLEGVKRSQEARFRQLHRVLKDSSNAGYSH
239 | VLVIEEAHSLPLPTLKHLKRFFELEHGFKKLLSIVLIGQPELAMKLSERNQEVREVVQRC
240 | EVVELLPLDTELERFLEFKFKRAGKSVSDVLDNSAVDAIRARLSNNIGGRRGVVSLLYPL
241 | AISNLIIAAMNMAAQLGVPVVNADVIKGV
242 | >PHAGE1_COMPLETE_CDS_42 core
243 | MKTIPNVNKQLADLMTAISALNAMNTPVTSIMIYSGKPVIRVSRDSPCVSHFRGKKSGYT
244 | MTGIDHQGRYRQGEVEMYGCRVIWSESLLH
245 | >PHAGE1_COMPLETE_CDS_43 core
246 | MAIKIEILIALTPEGFIYCKMTGANTKDASESELATLESLKPVVNESVLNKLKQSGYRVA
247 | ADYLQPSGKSH
248 | >PHAGE1_COMPLETE_CDS_44 core
249 | MTTINQDEYMKDRKGRLVPISQISDYDLAMDSFVREQVAAAKAKNAELGEFKDRAFNECY
250 | AWLDLVAEKFGRTRGGAKGNVTFPTFDGSQQITIRVQETLTFGPELQIAKELFDECVTDW
251 | SKGANANLQAIVTDAFQVDKEGQLNTGRILSLRRVKIQDERWIKAMDAISESLQVAMSKT
252 | YINFREKDKSGKLVNIPLDIAAI
253 | >PHAGE1_COMPLETE_CDS_45 core
254 | MLTLAGFILLVSACGTDACDALPVTEDIYLNKQSCELVAEIIHERQPDAQLICGEVWREE
255 | ESNE
256 | >PHAGE1_COMPLETE_CDS_46 core
257 | MSESEKYLAKIKKLLNLARRSSNPHEAATALNQAQALMHKHKLSQTDIDLMDITSKASKG
258 | APSHAQSIPRYMALLGQLICRAIGVNCYYSFSRNYMNGQKQNTVIFYGPDERPEIAAYAF
259 | DVLSRQMVKARRTFISSLRKNIKPTTKTARADQFCEGWTEGAYQAIEPFVVTETEKTLMA
260 | NFLAKMKKEQELSDLKPREAKKCRGDQDAAEAGFNEGLKARLNHGVSGKDSARSLEYKP
261 | >PHAGE1_COMPLETE_CDS_47 core
262 | MKLTIDNIRLLLVLGCIVIWVVVAFIFYPAVVSVWDYFHKP
263 | >PHAGE1_COMPLETE_CDS_48 core
264 | MNKKLIAEASRFQSSQERKGFWCLILGTLTAALMLFFIFNMAYMA
265 | >PHAGE1_COMPLETE_CDS_49 subcore
266 | VKTIIVTLEIDVPDNATDKDISDWVDVEYGQCGSRKLDNPCMANGGDATELFRHYWKYEA
267 | >PHAGE1_COMPLETE_CDS_50 single
268 | MFLIMPPIRIFQTGWTLNMDSAEAASWITHAWLMVVMLLNYSAITGNMRPNL
269 | >PHAGE2_FRAGMENT_CDS_1 core
270 | MRENYYYGQGKVFLAPRDNKRAFRWVGDVSTLKIAFSYEQQITKASRGGQLYQNEKIITG
271 | ASGSVSSTWHNFSVENLALLLGAQPVDEPFSFNQQYALPNGIVKGDIIALPHTTVFNVSI
272 | NGLNRGADYIVDRQFGTIEFLVTPETPELIAGYDHLFNQWLPFFSVKPQEFHLRFQGVNL
273 | AEDTAPVLLELYRVSVDPLATLEMISSGTDIAGMDMTSLILPDFNQQTGTAFSYFGRMQL
274 | IAPQSPQPPQIALTYDGRANYDGQYQYRGK
275 | >PHAGE2_FRAGMENT_CDS_2 core
276 | MGGKGSKKVTVGYRYSWDVQAGLGRGPVNEIVSIMADKKTVFAGTPGQISSSTSVYIDKP
277 | GLFGGDDTGGEGGIQGQLDIMMGEPDQVPPASLLKLLTGLVPGFRGVVTTFFSGLVSCYS
278 | ASPKPWLYRVRRTTKGWDGDVWYPEKATIMLENSEAQLDDDSDLLSEQLANLRAIHAMNP
279 | AHILVECATNRDWGRQLTLADDLNLDSYRAVADTLYEEGFGLCFRYNRQDGLDTFVQQVL
280 | DHVGAVQYADLETGKLTLKLLRGDYRVDDLPLFTYDNGIIAVQDDDSATTTSNPNEIVVT
281 | WNDPVTNADGEVRAQNLGAIQNSGLNSSSVEYRAIPTHSLAARVAQRDLETAQSELTRLV
282 | IQFDRRGGILRPGDVFRVKLPDRNIDNMVLRVGKIEEGDTGVLTLTVVQDVFGLPSTSYS
283 | SGQQDSGWTPPDKSTRPVTTQRLIDLPYAVLAGTLSQADLSYLKPESGHLGVMAVAPTSL
284 | SINYQLQTRAAGAAFADRGQGDWTPSGTLTSPVGRLDTVLHVNIDSFPAVGDGLIVNDEI
285 | MRVDAVDRLAGTITVGRGCMDTLPSGHFAGDRCWAYQDALDSDGLEYLSGETVEVRLLTR
286 | TSAETLAESAAPVVTLTMSGRQARPYLPGNIRVNGVLYPDVVASTDKFTLAFSHRDRLLQ
287 | ADRLIDCTENSIGPEPGTEYVVKLIAQGTGAEVWSMATSDASIPIPYVTGGAGAGVHTLT
288 | LKSRRDGLLSLYTFRAGLPAGRYKAFPITVTMNLTIVDGHDWATATPEDTTTGAVPELHA
289 | IADAAGITVWYPPDLVSSGLSIPADDTEWPSGTWPTSPWSFGTHPVLAISQWTETSGPLT
290 | VLALQGDISALLLDSATGSAAAIEWDAGIYLEEMDITIFTTDGPVLNEGIISLKLTERSI
291 | GP
292 | >PHAGE2_FRAGMENT_CDS_3 core-elongated
293 | MWITMEHIRAGGGCAWGLRTFFSRYNLDLQAFIRDGGIDSDMLAGTGDALAIKIVELAQQ
294 | TQKEAGA
295 | >PHAGE2_FRAGMENT_CDS_4 core-elongated
296 | VDPFSWAAVAKFVATLVASYVLNSALAPKAKNSTPEAATEEDWNMPMPDEGTPQCVFFGD
297 | CWTADWFVLGYGNYRYQAIKK
298 | >PHAGE2_FRAGMENT_CDS_5 single
299 | LGCGRQICCHACRIVCAEFRTGSQSQKLHAGSGHGGGLEYAHAR
300 | >PHAGE2_FRAGMENT_CDS_6 core
301 | MSWSEFEYSVADGQPLTLYEFRLGDSLFWRYSNADKDIDFAGQVWEAQAISNSGLSSGSG
302 | DGMDITVPASNEVALLFRATPPSRAVRVRVMRWHATDTSGEFRVVWIGEISSVKREQIES
303 | CKLLTISLASTFSRVGLRLTFGRQCPYALYDHNCRVDPLQFAVSGVGVTALDGSSITASL
304 | PAGLASDWFSGGYIEFDRNGYTERRGLRTQDGNTLHLFGGTTGLQVGQSVTLYPGCDRTI
305 | ATCDSKFSNHLNYGGQPHMPGKSPYTIIKLF
306 | >PHAGE2_FRAGMENT_CDS_7 core
307 | MTTLFPWLADPDWSRGVTETLEWKTDVLQSPTGAEQRISRRLSPRRTFEFTALVHDTGRQ
308 | RFENMLWQGCAGTWAMPVYPDIYALPAAVSSGATAFSIPTAGRDFSVGGTVLLKTSESPD
309 | ATSRMATIAGITGDTLQLGSPLTDSWPAGSLVYPVRPAVLTEPPSLSHLTDIVTTAQVRF
310 | RIAEHNAFSDAPMLTQYRGHPVLESETDWGESVSSCYQPLTRELDNGSSVPFRIDTAGRP
311 | FWRQTHSWFTANRPAQTSLRQLLWYLRGRQRPIWVPGQTLDFSPTSAISGNAVDVVEAGF
312 | TELGIHPGRRDICILLTDGTRHYRRITSVSLVSGAERLVLDGDAISAVQHQIVSISLMTL
313 | ARQDADSVSWEHVTDADGVARVATTFTGVRDELE
314 | >PHAGE2_FRAGMENT_CDS_8 core
315 | MADNSTLNLMLKIRADLADASRALQGLAGDVEDVGSAATTSSQKLSTTARAQDSVAESAR
316 | GHAHAEQSASVAASQTGDAVQQAATDYAGYQAAIARTRAEMGSLQSGMDGTTADIDAQRA
317 | ALTALVNRIDPVVAAYGRLDDMQEQLTAFRGAGLVGDDDFEQYSSRLNELRLQVEKSAYA
318 | ATDAGRKEAAAAREAAQAEAQAAAAKEQFINLLREQAETMNLTTAELLQYKAAQLGISAE
319 | AAPFIQKITDQNAAMSKGGISAGQYAQAMRYLPMQITDVVTSLASGMPVWMVAIQQGGQI
320 | KDSFGGIGNTFKALTTLITPARIAMGGLVGIVAAAGIAAVSAMNDQDEFNRSIQKTGNYA
321 | GVTSGELEQMVQQGGQLRGNYSQVRDILNGLVSSGRFTGETLTSVAQAATLMAELSGQSA
322 | DEVVSSFLKMNDGVTSWAANTTQQYHFLDLETYQRIQSLEDQGRKEEAIEVASQAFKKAS
323 | EERLRTMEQQLNRAARAWNNVKIAATGAWESFKDKAGGALGLDAPADELTNKIKELEAKI
324 | AAAGSDTEVAMQPREYQESVKQYKADLAALKEKQQAEEKAIAAEAKRKQTDAESIAAAEK
325 | LQKLWKGNRSELEKEADAVEETRKNYETLWKSASGRDMLQSRGVTSTDGKNFSGGQWDID
326 | TKALDKSGQKAEQYNKQLQQTLNQKKAITELDRVEAEIRNGTLSSATKAQQDEARALAKK
327 | IDAANAANKATKEGQSLARQQETSNKNFVKQLEDQAAKRTQGAAATRAQEIATRNLTAEQ
328 | RRQAEAANAAITAQEFKGQNLQLQLEYMRDTGDTAGASMLELQNRVSDLRREFEASGNTE
329 | GLNWLDKLLPVAETKIRVDDLKKQLDDLFTWQSQQETSIQAQVQGGLLNEIQGRQRLVQL
330 | HQEVGDKIKGYLPQLKEMATAPGEAGDKIREMIRQLEEELGKLNQAGNELTQAFRDGLQS
331 | GIESSLMGLAKGTMNLRDAVKNLALTIINSMAQLAAQQLAQMATSSLIGSSGGIGGLLGS
332 | VFAADGGQVRGPGSTTSDSIPAMLSDQEFVTRAAVVQQPGALDFLHAFNRHGMAAVEGWL
333 | PRVRHASGGLAGIPAQNMPVPASVPETAMATPAAASLQPISLQQQLVLDPSEVYTAGAQT
334 | LAGQRQFITSLKAQVPTLKQWLGLNK
335 | >PHAGE2_FRAGMENT_CDS_9 core
336 | MSKQKPADTEDELSVLLSTRNITIAGRGLVIREYTLVDMLQLGDKLDALTHSLAEVMQTP
337 | WPLIEEIEAVLRKHAGDIPELIACSVDQPVQWVALLPAGEGQSLIDWWWTQNRRFFMNAV
338 | VRLETIRATRAKLSASAASSQPSSGPDTTRAGSETTHSAS
339 | >PHAGE2_FRAGMENT_CDS_10 core
340 | MAQLETYYYGQGKVFLARRLANGKPGAFRWVGDVSALSLALTVERLNHKESYSGRRGTVR
341 | SFVTNQDGTLTSTWHDLAPENLAVVLYGEQVVIPAGTITGELLPAGIVAGERYILDHQRV
342 | SDVVIGTLVEGTDYEVDYTYGAITFLTAQATAPSVNYEYAGSVNTTLFTQQPEDFYLRFE
343 | GINLAEGGAAKVLELYKISFSPASALALIQGDTSLAGLETTSTVLYDNARPDDPTIGRFG
344 | RVIDVAEPVA
345 | >PHAGE2_FRAGMENT_CDS_11 core
346 | MPDVKLLQPHTHQGKRFAAGETITVTEAEEVWLRDHQLIGVATPVVSDTQGNRGKSKQQE
347 | PEDNGTA
348 | >PHAGE2_FRAGMENT_CDS_12 core
349 | MSSEPFSISLIVERLQPLTPSPLSFLGTIVEYSKVQELSGFAVPGAYVLMGPERGVPGNG
350 | TRAQVAEAVFGVAVAVRNYGQGADGLTHEISPLVGQIRDQLIGWVPGKLATTGIQWLKGD
351 | ILDYDGGTLLWMDTFQVNHVIGGRRCPT
352 | >PHAGE2_FRAGMENT_CDS_13 core-elongated
353 | MPYRHKRRRVTAVTWYISLAELADRPGAVELSQVTQLPGKPPARPELLDAVLRGEETTSW
354 | PPAEVAVALEVVERIGGAVEEAQNLIDGYLRQRGYTLPLVKVHPILSSWGRSVVRYKLHQ
355 | HRISDERTDPIVRDYRDAMKLMEQLANGKFSLGATDTQKPAGGPPMVDGPGRTFSMDSLK
356 | DFGK
357 | >PHAGE2_FRAGMENT_CDS_14 core
358 | MAKAPFPIDPHLTAIAIGYRNLSLIADSVLPRVPVGKAEFKWWKFDLGQGFTVPTTTVGR
359 | TSQPNQIQFDAEDETSSTNDYALDAPVPQSDIDNAPANYDPLGRATERVSDIIMLDREVR
360 | TSKEVFNAANYPVGNKENLAAADQWDNDASKPIKKIVTALDKMIMRPNVAVLGRSTATAL
361 | RQNPSVVKAYNGTLGEDGLVPLDFLRGLLELDEIVVGSAFVNIARPGQKPVLVRAWANHA
362 | AFIYRNLLADTQGGVTFGFTAQFGSRVSGSIPDPDMGMRGGQRVRVGESVRELIVAQDCG
363 | YFFQNAVSA
364 | >PHAGE2_FRAGMENT_CDS_15 core
365 | MNIPGLITCHKAEVALAARRMVTHGTVPDEITLAVDGSKLIIGVTTLVAASVGEPTDVVR
366 | SQLTPVIYGGDVVAGDPLTADAEGRAVPATAGQFYLGFAEYDGAEDDLGSVWIAPGQLAA
367 | AAAGGG
368 | >PHAGE2_FRAGMENT_CDS_16 core
369 | MTTAMTTSTAKATLAVFAPGTHTAMDGRTITFTPEDCIDLANSYDPSVSEAPFVIGHPSL
370 | TAPAYGWAERLEFRDGIVYAAPRQVNPAFAKAFNAGSYKKRSLSIYQPDSPGNPKPGHYY
371 | ARHVGFLGAVPPGVKGLPDAQFAEACGDNGPLEFALPWEADNLASLFQSIRDWVIQEKSI
372 | EQADSIIPQWRIQSILDSATDDRKSSISPLAYAEENNVDPNQTTTVTAEELTRRETALAE
373 | REEKLRRDEEAAKQRDAKVRRDAVISYADGLVKAGSILPRQKNTVVEVLLSLDSTPLSFA
374 | DGDATVNKTPEELLRDVLSQRPKVVDFSEKTGADDDPVDFADASALATAAQNYQAEQAKQ
375 | GRTISMTDAVNHVKKGAQQ
376 | >PHAGE2_FRAGMENT_CDS_17 single
377 | MTQSTLATRIIIRFNTVPGPVPPPRGPSVLSSVLFLSARGQITPSAQFVMMSP
378 | >PHAGE2_FRAGMENT_CDS_18 core-elongated
379 | MSGVTLTFNAQDALSKLWDARADMMRPEPLLRSMGERLLEFHQQRFRDQTSPEGVKWKEL
380 | SWRYKQRKRKNRDQILTRDGYLRNTLRWQVNADELLFGTDRVYGAIHQFGGTIEIAARSQ
381 | QAYYRKKKDGELDNQFVRKNKSNFAQWHTIPAYKIKIEARPWLGVSKAEGATLIDMAKNY
382 | LQGAFN
383 | >PHAGE2_FRAGMENT_CDS_19 core
384 | MAGNVSYGSLPFSEQFAFFRRKFNTKTDAWTDVYGSAHDNEFMVAGANRDALLADLRTAV
385 | EKSLDGGTLETFRKDFAAIVARYGWSYNGGFEWRSRVIYETNLRSSYMAGRYQQLMAMRD
386 | THPYWEYVHSDVVEHPRQEHLGWNGMVLRADDPWWIYHFPVNAWGCQCSVIARTEDDLKR
387 | IGKDGPDTAPPIKFITREIGQRSPGGPRTVIVPEGIDPGFEHTPGRSRVFSEVPPPRSQN
388 | PVSDGPFTPVAEPPATPAPLPSPRPAPVTEAETDPVDAFLQLFGATADRDAAFRDPTGQR
389 | LAIGHDMFASSNGQGQIPLTLAQALQLAETIRNPDEIWAQIVWLPEVQQSLVRRYYLARL
390 | QQDGGADPLSVVFATGRDGWTGNISTDDTLLQSLRQGIKLWSRED
391 | >PHAGE2_FRAGMENT_CDS_20 core
392 | VSRKKQNRRVSSQPSTPRPELGREFASTGDGRDITRPWIGALALSDDSVLQHRGAPDLKI
393 | YREVLSDDEVKSAFSQRQDALISREIKVEAGGERPVDIEAADAMRQQIDALGFDRITRLM
394 | HYGVFYGYAVSELIYGIRDNLLWIDDIKVRDRRRFRFSPKGELRLLTPQNMMAGEPCEGP
395 | YFWSFSTGADHDDEPYGLGLAHWLYWPTFFKRNDIKFWLIFLDKFGMPTVAGKHPEGATP
396 | EQKRNLLALTRAISTDSGVIMPEGMTVELLSAARSGAADYQAMYNAMNEAIRRVTVGQIS
397 | SSGGAAKGIGGNESLQDKVLDSIVKADADVICESWNRGPGKWFTEFNFPDAAVPVVSRVF
398 | EEAEDLKDRAERDKTISETTGYRPTLATIKETYGGEWEPRPEQQVSVPRAAAPSSFAEHD
399 | PDHNDTSTLMAGRLNTELRPVMDGWINQIKALVDSAETADELRDGLTALIPDMSLDDYAR
400 | ILGEAMSAAALAGRNDLLEEMNGR
401 | >PHAGE2_FRAGMENT_CDS_21 core
402 | MKPLASTIRTVEWDELPARAREIPFGFNPFADGVLMAHQVECLKYDVSILAIPKGRRTGI
403 | TFAWGLNSTLIAGAQKAAGGDNVYYIGDTKEKGLEFIGYVAKFARVIAAQQAQDVSAIEE
404 | FLFEDQDEQGNTRMIAAYRVRFASGFQVAALSSRPANIRGLQGVVIIDEAAFHADVQGVL
405 | DAATALLIWGGRIVVISSENGKNNPFHQFCKDIEEGRYGDDAAVLRITFDDAVTNGLYER
406 | VCAMKGEAATVEGKKTWYNRIRNAYGPRKAAMREELDAIPRDGNGICIPGVWIERAMPEE
407 | RPVIRLALDDDFIHMTEAERAAWGNDWIDRELRPVMAETLNPELRHVFGMDFARHRHFSS
408 | IVPMAIMQNLCRDVPFLLELNNVPSALQQQILFWLIEHLPRQSGGAMDATGPGMVLAEYT
409 | ADRYGRPRIAEITLNRKWYGFWMPKFTSLFEDSMIILPRDENTAQDLRAVENIDGVPMVA
410 | SLEKKDLKDPELVRHGDTAIAGCLANYAALNLATEIAFESTGERDIFRVLSGFGDSSSAG
411 | EFTDTGFGTVRGINDFGGFL
412 | >PHAGE2_FRAGMENT_CDS_22 subcore
413 | MGRKSTIHRLEPDVRAHIERRLREDRMTLDELLSDIHEHFPGEETPSRSALGRYKQNFGL
414 | LVDRMRQQDQMARLLVSELGENPDERAGALMVQAVTTLTTHAAFTAQQEEDPDIDTVRHL
415 | ARAAKDVLQSRKASLDERREIERAARERLLREQEENLKETARAQGLSEDQVQFWRERVLG
416 | IK
417 | >PHAGE2_FRAGMENT_CDS_23 subcore
418 | MSFSDYLRTDMRLVILRILSEMPSYSSNSSVIWSVLTRYGHSPSRDQVKSELRWLEEQGL
419 | VTVEDIETVLVARLTERGADVATGRAVVPGVKRPGAGG
420 | >PHAGE2_FRAGMENT_CDS_24 single
421 | MVSSLDPQHTFTPELHLVFTQSLCPGGLFQVLFLLTQQALTRGTLNLAALVETGLTRLQD
422 | IFCCPGKVTHCVDIRIFFLLSGKGGVGGERGDSLNHQGTSAFVRVLSQFADQQTGHLVLL
423 | AHPVNQQAKVLFVATQRTAARGFFSGEVLMDIGQQLIERHAVFTQAALDMSPNVWFKAMD
424 | CRFTAHISLPRRDASRQARLHDRWPHQHRVRSTGQPAPSRYLQPSPALALQASAVLISPD
425 | RGSVNGHSVSVPTR
426 | >PHAGE2_FRAGMENT_CDS_25 core
427 | MKFEELQFSWQVLQWAVLSAIGIYSWIVGRQSASNRELLELRTRLASIEAQVAQMPTQRQ
428 | VSILLEKLSSTEASITGMNNQMSGMASRLETINNYLLNTK
429 | >PHAGE3_MOSAIC_CDS_1 core
430 | MRENYYYGQGKVFLAPRDNKRAFRWVGDVSTLKIAFSYEQQITKASRGGQLYQNEKIITG
431 | ASGSVSSTWHNFSVENLALLLGAQPVDEPFSFNQQYALPNGIVKGDIIALPHTTVFNVSI
432 | NGLNRGADYIVDRQFGTIEFLVTPETPELIAGYDHLFNQWLPFFSVKPQEFHLRFQGVNL
433 | AEDTAPVLLELYRVSVDPLATLEMISSGTDIAGMDMTSLILPDFNQQTGTAFSYFGRMQL
434 | IAPQSPQPPQIALTYDGRANYDGQYQYRGK
435 | >PHAGE3_MOSAIC_CDS_2 core
436 | MGGKGSKKVTVGYRYSWDVQAGLGRGPVNEIVSIMADKKTVFAGTPGQISSSTSVYIDKP
437 | GLFGGDDTGGEGGIQGQLDIMMGEPDQVPPASLLKLLTGLVPGFRGVVTTFFSGLVSCYS
438 | ASPKPWLYRVRRTTKGWDGDVWYPEKATIMLENSEAQLDDDSDLLSEQLANLRAIHAMNP
439 | AHILVECATNRDWGRQLTLADDLNLDSYRAVADTLYEEGFGLCFRYNRQDGLDTFVQQVL
440 | DHVGAVQYADLETGKLTLKLLRGDYRVDDLPLFTYDNGIIAVQDDDSATTTSNPNEIVVT
441 | WNDPVTNADGEVRAQNLGAIQNSGLNSSSVEYRAIPTHSLAARVAQRDLETAQSELTRLV
442 | IQFDRRGGILRPGDVFRVKLPDRNIDNMVLRVGKIEEGDTGVLTLTVVQDVFGLPSTSYS
443 | SGQQDSGWTPPDKSTRPVTTQRLIDLPYAVLAGTLSQADLSYLKPESGHLGVMAVAPTSL
444 | SINYQLQTRAAGAAFADRGQGDWTPSGTLTSPVGRLDTVLHVNIDSFPAVGDGLIVNDEI
445 | MRVDAVDRLAGTITVGRGCMDTLPSGHFAGDRCWAYQDALDSDGLEYLSGETVEVRLLTR
446 | TSAETLAESAAPVVTLTMSGRQARPYLPGNIRVNGVLYPDVVASTDKFTLAFSHRDRLLQ
447 | ADRLIDCTENSIGPEPGTEYVVKLIAQGTGAEVWSMATSDASIPIPYVTGGAGAGVHTLT
448 | LKSRRDGLLSLYTFRAGLPAGRYKAFPITVTMNLTIVDGHDWATATPEDTTTGAVPELHA
449 | IADAAGITVWYPPDLVSSGLSIPADDTEWPSGTWPTSPWSFGTHPVLAISQWTETSGPLT
450 | VLALQGDISALLLDSATGSAAAIEWDAGIYLEEMDITIFTTDGPVLNEGIISLKLTERSI
451 | GP
452 | >PHAGE3_MOSAIC_CDS_3 core-elongated
453 | MWITMEHIRAGGGCAWGLRTFFSRYNLDLQAFIRDGGIDSDMLAGTGDALAIKIVELAQQ
454 | TQKEAGA
455 | >PHAGE3_MOSAIC_CDS_4 core-elongated
456 | VDPFSWAAVAKFVATLVASYVLNSALAPKAKNSTPEAATEEDWNMPMPDEGTPQCVFFGD
457 | CWTADWFVLGYGNYRYQAIKK
458 | >PHAGE3_MOSAIC_CDS_5 single
459 | LGCGRQICCHACRIVCAEFRTGSQSQKLHAGSGHGGGLEYAHAR
460 | >PHAGE3_MOSAIC_CDS_6 core
461 | MSWSEFEYSVADGQPLTLYEFRLGDSLFWRYSNADKDIDFAGQVWEAQAISNSGLSSGSG
462 | DGMDITVPASNEVALLFRATPPSRAVRVRVMRWHATDTSGEFRVVWIGEISSVKREQIES
463 | CKLLTISLASTFSRVGLRLTFGRQCPYALYDHNCRVDPLQFAVSGVGVTALDGSSITASL
464 | PAGLASDWFSGGYIEFDRNGYTERRGLRTQDGNTLHLFGGTTGLQVGQSVTLYPGCDRTI
465 | ATCDSKFSNHLNYGGQPHMPGKSPYTIIKLF
466 | >PHAGE3_MOSAIC_CDS_7 core
467 | MTTLFPWLADPDWSRGVTETLEWKTDVLQSPTGAEQRISRRLSPRRTFEFTALVHDTGRQ
468 | RFENMLWQGCAGTWAMPVYPDIYALPAAVSSGATAFSIPTAGRDFSVGGTVLLKTSESPD
469 | ATSRMATIAGITGDTLQLGSPLTDSWPAGSLVYPVRPAVLTEPPSLSHLTDIVTTAQVRF
470 | RIAEHNAFSDAPMLTQYRGHPVLESETDWGESVSSCYQPLTRELDNGSSVPFRIDTAGRP
471 | FWRQTHSWFTANRPAQTSLRQLLWYLRGRQRPIWVPGQTLDFSPTSAISGNAVDVVEAGF
472 | TELGIHPGRRDICILLTDGTRHYRRITSVSLVSGAERLVLDGDAISAVQHQIVSISLMTL
473 | ARQDADSVSWEHVTDADGVARVATTFTGVRDELE
474 | >PHAGE3_MOSAIC_CDS_8 core
475 | MADNSTLNLMLKIRADLADASRALQGLAGDVEDVGSAATTSSQKLSTTARAQDSVAESAR
476 | GHAHAEQSASVAASQTGDAVQQAATDYAGYQAAIARTRAEMGSLQSGMDGTTADIDAQRA
477 | ALTALVNRIDPVVAAYGRLDDMQEQLTAFRGAGLVGDDDFEQYSSRLNELRLQVEKSAYA
478 | ATDAGRKEAAAAREAAQAEAQAAAAKEQFINLLREQAETMNLTTAELLQYKAAQLGISAE
479 | AAPFIQKITDQNAAMSKGGISAGQYAQAMRYLPMQITDVVTSLASGMPVWMVAIQQGGQI
480 | KDSFGGIGNTFKALTTLITPARIAMGGLVGIVAAAGIAAVSAMNDQDEFNRSIQKTGNYA
481 | GVTSGELEQMVQQGGQLRGNYSQVRDILNGLVSSGRFTGETLTSVAQAATLMAELSGQSA
482 | DEVVSSFLKMNDGVTSWAANTTQQYHFLDLETYQRIQSLEDQGRKEEAIEVASQAFKKAS
483 | EERLRTMEQQLNRAARAWNNVKIAATGAWESFKDKAGGALGLDAPADELTNKIKELEAKI
484 | AAAGSDTEVAMQPREYQESVKQYKADLAALKEKQQAEEKAIAAEAKRKQTDAESIAAAEK
485 | LQKLWKGNRSELEKEADAVEETRKNYETLWKSASGRDMLQSRGVTSTDGKNFSGGQWDID
486 | TKALDKSGQKAEQYNKQLQQTLNQKKAITELDRVEAEIRNGTLSSATKAQQDEARALAKK
487 | IDAANAANKATKEGQSLARQQETSNKNFVKQLEDQAAKRTQGAAATRAQEIATRNLTAEQ
488 | RRQAEAANAAITAQEFKGQNLQLQLEYMRDTGDTAGASMLELQNRVSDLRREFEASGNTE
489 | GLNWLDKLLPVAETKIRVDDLKKQLDDLFTWQSQQETSIQAQVQGGLLNEIQGRQRLVQL
490 | HQEVGDKIKGYLPQLKEMATAPGEAGDKIREMIRQLEEELGKLNQAGNELTQAFRDGLQS
491 | GIESSLMGLAKGTMNLRDAVKNLALTIINSMAQLAAQQLAQMATSSLIGSSGGIGGLLGS
492 | VFAADGGQVRGPGSTTSDSIPAMLSDQEFVTRAAVVQQPGALDFLHAFNRHGMAAVEGWL
493 | PRVRHASGGLAGIPAQNMPVPASVPETAMATPAAASLQPISLQQQLVLDPSEVYTAGAQT
494 | LAGQRQFITSLKAQVPTLKQWLGLNK
495 | >PHAGE3_MOSAIC_CDS_9 core
496 | MSKQKPADTEDELSVLLSTRNITIAGRGLVIREYTLVDMLQLGDKLDALTHSLAEVMQTP
497 | WPLIEEIEAVLRKHAGDIPELIACSVDQPVQWVALLPAGEGQSLIDWWWTQNRRFFMNAV
498 | VRLETIRATRAKLSASAASSQPSSGPDTTRAGSETTHSAS
499 | >PHAGE3_MOSAIC_CDS_10 core
500 | MAQLETYYYGQGKVFLARRLANGKPGAFRWVGDVSALSLALTVERLNHKESYSGRRGTVR
501 | SFVTNQDGTLTSTWHDLAPENLAVVLYGEQVVIPAGTITGELLPAGIVAGERYILDHQRV
502 | SDVVIGTLVEGTDYEVDYTYGAITFLTAQATAPSVNYEYAGSVNTTLFTQQPEDFYLRFE
503 | GINLAEGGAAKVLELYKISFSPASALALIQGDTSLAGLETTSTVLYDNARPDDPTIGRFG
504 | RVIDVAEPVA
505 | >PHAGE3_MOSAIC_CDS_11 core
506 | MPDVKLLQPHTHQGKRFAAGETITVTEAEEVWLRDHQLIGVATPVVSDTQGNRGKSKQQE
507 | PEDNGTA
508 | >PHAGE3_MOSAIC_CDS_12 core
509 | MSSEPFSISLIVERLQPLTPSPLSFLGTIVEYSKVQELSGFAVPGAYVLMGPERGVPGNG
510 | TRAQVAEAVFGVAVAVRNYGQGADGLTHEISPLVGQIRDQLIGWVPGKLATTGIQWLKGD
511 | ILDYDGGTLLWMDTFQVNHVIGGRRCPT
512 | >PHAGE3_MOSAIC_CDS_13 core-elongated
513 | MPYRHKRRRVTAVTWYISLAELADRPGAVELSQVTQLPGKPPARPELLDAVLRGEETTSW
514 | PPAEVAVALEVVERIGGAVEEAQNLIDGYLRQRGYTLPLVKVHPILSSWGRSVVRYKLHQ
515 | HRISDERTDPIVRDYRDAMKLMEQLANGKFSLGATDTQKPAGGPPMVDGPGRTFSMDSLK
516 | DFGK
517 | >PHAGE3_MOSAIC_CDS_14 core
518 | MAKAPFPIDPHLTAIAIGYRNLSLIADSVLPRVPVGKAEFKWWKFDLGQGFTVPTTTVGR
519 | TSQPNQIQFDAEDETSSTNDYALDAPVPQSDIDNAPANYDPLGRATERVSDIIMLDREVR
520 | TSKEVFNAANYPVGNKENLAAADQWDNDASKPIKKIVTALDKMIMRPNVAVLGRSTATAL
521 | RQNPSVVKAYNGTLGEDGLVPLDFLRGLLELDEIVVGSAFVNIARPGQKPVLVRAWANHA
522 | AFIYRNLLADTQGGVTFGFTAQFGSRVSGSIPDPDMGMRGGQRVRVGESVRELIVAQDCG
523 | YFFQNAVSA
524 | >PHAGE3_MOSAIC_CDS_15 core
525 | MNIPGLITCHKAEVALAARRMVTHGTVPDEITLAVDGSKLIIGVTTLVAASVGEPTDVVR
526 | SQLTPVIYGGDVVAGDPLTADAEGRAVPATAGQFYLGFAEYDGAEDDLGSVWIAPGQLAA
527 | AAAGGG
528 | >PHAGE3_MOSAIC_CDS_16 core
529 | MTTAMTTSTAKATLAVFAPGTHTAMDGRTITFTPEDCIDLANSYDPSVSEAPFVIGHPSL
530 | TAPAYGWAERLEFRDGIVYAAPRQVNPAFAKAFNAGSYKKRSLSIYQPDSPGNPKPGHYY
531 | ARHVGFLGAVPPGVKGLPDAQFAEACGDNGPLEFALPWEADNLASLFQSIRDWVIQEKSI
532 | EQADSIIPQWRIQSILDSATDDRKSSISPLAYAEENNVDPNQTTTVTAEELTRRETALAE
533 | REEKLRRDEEAAKQRDAKVRRDAVISYADGLVKAGSILPRQKNTVVEVLLSLDSTPLSFA
534 | DGDATVNKTPEELLRDVLSQRPKVVDFSEKTGADDDPVDFADASALATAAQNYQAEQAKQ
535 | GRTISMTDAVNHVKKGAQQ
536 | >PHAGE3_MOSAIC_CDS_17 single
537 | MTQSTLATRIIIRFNTVPGPVPPPRGPSVLSSVLFLSARGQITPSAQFVMMSP
538 | >PHAGE3_MOSAIC_CDS_18 core-elongated
539 | MSGVTLTFNAQDALSKLWDARADMMRPEPLLRSMGERLLEFHQQRFRDQTSPEGVKWKEL
540 | SWRYKQRKRKNRDQILTRDGYLRNTLRWQVNADELLFGTDRVYGAIHQFGGTIEIAARSQ
541 | QAYYRKKKDGELDNQFVRKNKSNFAQWHTIPAYKIKIEARPWLGVSKAEGATLIDMAKNY
542 | LQGAFN
543 | >PHAGE3_MOSAIC_CDS_19 core
544 | MAGNVSYGSLPFSEQFAFFRRKFNTKTDAWTDVYGSAHDNEFMVAGANRDALLADLRTAV
545 | EKSLDGGTLETFRKDFAAIVARYGWSYNGGFEWRSRVIYETNLRSSYMAGRYQQLMAMRD
546 | THPYWEYVHSDVVEHPRQEHLGWNGMVLRADDPWWIYHFPVNAWGCQCSVIARTEDDLKR
547 | IGKDGPDTAPPIKFITREIGQRSPGGPRTVIVPEGIDPGFEHTPGRSRVFSEVPPPRSQN
548 | PVSDGPFTPVAEPPATPAPLPSPRPAPVTEAETDPVDAFLQLFGATADRDAAFRDPTGQR
549 | LAIGHDMFASSNGQGQIPLTLAQALQLAETIRNPDEIWAQIVWLPEVQQSLVRRYYLARL
550 | QQDGGADPLSVVFATGRDGWTGNISTDDTLLQSLRQGIKLWSRED
551 | >PHAGE3_MOSAIC_CDS_20 core
552 | VSRKKQNRRVSSQPSTPRPELGREFASTGDGRDITRPWIGALALSDDSVLQHRGAPDLKI
553 | YREVLSDDEVKSAFSQRQDALISREIKVEAGGERPVDIEAADAMRQQIDALGFDRITRLM
554 | HYGVFYGYAVSELIYGIRDNLLWIDDIKVRDRRRFRFSPKGELRLLTPQNMMAGEPCEGP
555 | YFWSFSTGADHDDEPYGLGLAHWLYWPTFFKRNDIKFWLIFLDKFGMPTVAGKHPEGATP
556 | EQKRNLLALTRAISTDSGVIMPEGMTVELLSAARSGAADYQAMYNAMNEAIRRVTVGQIS
557 | SSGGAAKGIGGNESLQDKVLDSIVKADADVICESWNRGPGKWFTEFNFPDAAVPVVSRVF
558 | EEAEDLKDRAERDKTISETTGYRPTLATIKETYGGEWEPRPEQQVSVPRAAAPSSFAEHD
559 | PDHNDTSTLMAGRLNTELRPVMDGWINQIKALVDSAETADELRDGLTALIPDMSLDDYAR
560 | ILGEAMSAAALAGRNDLLEEMNGR
561 | >PHAGE3_MOSAIC_CDS_21 core
562 | MKPLASTIRTVEWDELPARAREIPFGFNPFADGVLMAHQVECLKYDVSILAIPKGRRTGI
563 | TFAWGLNSTLIAGAQKAAGGDNVYYIGDTKEKGLEFIGYVAKFARVIAAQQAQDVSAIEE
564 | FLFEDQDEQGNTRMIAAYRVRFASGFQVAALSSRPANIRGLQGVVIIDEAAFHADVQGVL
565 | DAATALLIWGGRIVVISSENGKNNPFHQFCKDIEEGRYGDDAAVLRITFDDAVTNGLYER
566 | VCAMKGEAATVEGKKTWYNRIRNAYGPRKAAMREELDAIPRDGNGICIPGVWIERAMPEE
567 | RPVIRLALDDDFIHMTEAERAAWGNDWIDRELRPVMAETLNPELRHVFGMDFARHRHFSS
568 | IVPMAIMQNLCRDVPFLLELNNVPSALQQQILFWLIEHLPRQSGGAMDATGPGMVLAEYT
569 | ADRYGRPRIAEITLNRKWYGFWMPKFTSLFEDSMIILPRDENTAQDLRAVENIDGVPMVA
570 | SLEKKDLKDPELVRHGDTAIAGCLANYAALNLATEIAFESTGERDIFRVLSGFGDSSSAG
571 | EFTDTGFGTVRGINDFGGFL
572 | >PHAGE3_MOSAIC_CDS_22 subcore
573 | MGRKSTIHRLEPDVRAHIERRLREDRMTLDELLSDIHEHFPGEETPSRSALGRYKQNFGL
574 | LVDRMRQQDQMARLLVSELGENPDERAGALMVQAVTTLTTHAAFTAQQEEDPDIDTVRHL
575 | ARAAKDVLQSRKASLDERREIERAARERLLREQEENLKETARAQGLSEDQVQFWRERVLG
576 | IK
577 | >PHAGE3_MOSAIC_CDS_23 subcore
578 | MSFSDYLRTDMRLVILRILSEMPSYSSNSSVIWSVLTRYGHSPSRDQVKSELRWLEEQGL
579 | VTVEDIETVLVARLTERGADVATGRAVVPGVKRPGAGG
580 | >PHAGE3_MOSAIC_CDS_24 single
581 | MVSSLDPQHTFTPELHLVFTQSLCPGGLFQVLFLLTQQALTRGTLNLAALVETGLTRLQD
582 | IFCCPGKVTHCVDIRIFFLLSGKGGVGGERGDSLNHQGTSAFVRVLSQFADQQTGHLVLL
583 | AHPVNQQAKVLFVATQRTAARGFFSGEVLMDIGQQLIERHAVFTQAALDMSPNVWFKAMD
584 | CRFTAHISLPRRDASRQARLHDRWPHQHRVRSTGQPAPSRYLQPSPALALQASAVLISPD
585 | RGSVNGHSVSVPTR
586 | >PHAGE3_MOSAIC_CDS_25 core
587 | MKFEELQFSWQVLQWAVLSAIGIYSWIVGRQSASNRELLELRTRLASIEAQVAQMPTQRQ
588 | VSILLEKLSSTEASITGMNNQMSGMASRLETINNYLLNTK
589 | >PHAGE3_MOSAIC_CDS_26 core
590 | MTPIKELGECVIGTGDREFFFRPSFRNMARIGEPEEIVQAFYDLCNDETTSFAQRAAEAY
591 | IRDEYSRLPDCVLRFMQSGLLSRKAIMAAHTVLTACCDDDIGDLVGWMKPGKSRKRGFVW
592 | RPGSMPPESMVIVAQNLMMHGIIGKAKVRKLQRYETNETTAEFRAADYIMAARNHFGISR
593 | EEAENLTMTEFAMMINAKYPNQNGFTREEYDTVMDEDDRRWQAMMESQANH
594 | >PHAGE3_MOSAIC_CDS_27 core
595 | MQGCANDTGKLIGKVAVLRMAFGCADTVPALSEWKRLGAMTTKGFDYSMNTVSSEADDTK
596 | GLVENLVNNMDFTISGEGEFRKKDKTTEVGAIAISKYIFDEVQAGRQPTVWVRFDFTGED
597 | AGTYIMGYFNTTSWSGDFGTSDISTFSGEWKVADADSVVFEVAPPALAFTTNLPTTKSVT
598 | AGSALNMSVVVEGGTSPYTYVWKKDGTVVSGQTTATFNKASAVSGDAGAYTCEVTDSSAT
599 | PVKITSASCTVTIS
600 | >PHAGE3_MOSAIC_CDS_28 core-elongated
601 | MTPPMYMRLKDLFVDEGLAAGFKVQWRQWRDTGKDTDQFIVFRPSGGTDITFDLGGDWYV
602 | MVDVISSKANPDAADAAVNAIVEYISAQSGADDCVGALRLVGNVPAPIPTEEGRLVTRLL
603 | VSCTYGE
604 | >PHAGE3_MOSAIC_CDS_29 core
605 | MGAKVRGIRQAKANLDRIIKDVQGRKVVRAIQSAMLIGSAQAALYTPIDTSTLINSQFRE
606 | IMANGTRVTGRVGYSANYAVYVHDPAVKQNFRRATARKEFLTKGFEDTRSQIDAVVKKEL
607 | SL
608 | >PHAGE3_MOSAIC_CDS_30 core
609 | LSSIASWSYTATATIWRRIRDADGSDTDGGGQPYGWEAPIAILCDYQGGLSAKIGDLGRE
610 | IVVKNTIWTEYATAREGDYILIGASTDAAPPDEADEIRQIVQFADTFERLADDFALITGV
611 | >PHAGE3_MOSAIC_CDS_31 core
612 | MSDKTSGGKIDDDATYGDAGDKSETIHVGAIHYDIEVSMAGRLHMEVRELIDVHAFELTD
613 | NGGFNYLFIWINDYGLKFMGVSLKTYEEAKEHLINYDREKITGPSGRCEQIPGLISAISR
614 | KIERFSL
615 | >PHAGE3_MOSAIC_CDS_32 core
616 | MAWVSVQQRLPRTFTRVWVITDTGEQTTAYVKSDGEWFINCDRIRATGAAVLRWRE
617 | >PHAGE3_MOSAIC_CDS_33 core
618 | MVTLDQAKEYLEGQGITIPDFVLQAFVDEANSIQDCLDAHYPASTALLIQLYLLALMGLG
619 | SGDKYISSQTAPSGASRSFRYQSFSDRWKSSVNLLRSLDKYGCASALIPADPTASPAFAG
620 | IWIGKGGCMCGDK
621 | >PHAGE3_MOSAIC_CDS_34 core
622 | MAKYEVVRPWNGVALGQVVELENLHPALKSNVRLMRGEAGGELSPATPEAGTDTKSRKEI
623 | IQARLTELGIEFKGNLGAEKLGELLPDGELEKLFPAE
624 | >PHAGE3_MOSAIC_CDS_35 core
625 | MYFSKETLATNARLGAHWNELWANRNMWNAQHDAMIAVNRAHMTPEMLACNAVGGFSRDF
626 | WAEIDNQILQLRDQEDGMEIINDLMGVQTVLSVGKTAKLYNVVGDIADDVSVSIDGQAPF
627 | SFDHTDYASDGDPIPVFTAGYGVNWRHAAGLNSVGVDLVLDSQMAKLKKVNKRRVAYYLS
628 | GDANIQVQGYPAQGMKNHRNTKKINLGSGAGGVNIDLTTATTEQIIEFFGNGAFGTTARA
629 | NKVSSYDVMWVSDEIWANLAKPYVVNGVISGNVLQAVLPFAPVKEIRPTFALSGNEFIAY
630 | VRRRDVISPLVGMAQGVIALPRPLPNVNYNFQIMSAEGLQITADDQGLSGVVYGANLA
631 | >PHAGE3_MOSAIC_CDS_36 core
632 | MARYRRVNIDGLSLYKTETRTTAADLLPGTAATINSSDKFAQATALTGRLYIIDVGYHQG
633 | LTITEAIPAGDSAVGNYVEEGRELALRCLPGAYKKDSPIKLGTAGQFTLATDDTDSVIGY
634 | SQDEYTIAASTTDFIRVRMRVGTVAAAGA
635 | >PHAGE3_MOSAIC_CDS_37 core-elongated
636 | MPMQVNITTKVNSQSIRRETYNGREHLVLPSYTLPANVVMNGGLYTQEEIDAHYQGLEGT
637 | LAPLGHPQVNGQFVSAFSPEGINAGHIGAWNRNVKKSGNRIYLEKWVDVARAGESEGGKE
638 | LLERVAAIERGEDVPPIHTSVAAFLDQLEPNEQQRATGADWVAKIHSMDHDAILLHEVGA
639 | >PHAGE3_MOSAIC_CDS_38 core-elongated
640 | MAAKKTKPPILPRNYQDPTGVDALERRAMKDFARRMNKIGKAYKAALDKIPSSLAVNARY
641 | EYQLNPTILSIILNDASYLVDQVLLDGNEYDLWFYEYVDLASEKGTGQSFYNLSQQSPVY
642 | AAGRESLASILASDPYQQRMALVHARVFEEMKGLTADVKRDMARVLTDGVGRGLNPRDIA
643 | RNLTDQTGIEKRRANRIARTEVTTALRRAKWDEDQEANDLYGLKTLLVHISALSPTTRHT
644 | HAVRHAHLYTNEEVRDWYSKDANSINCKCTQQSVLVDEEGKPIYPDTITKLKQEYKTMQA
645 | RGYAWAEK
646 | >PHAGE3_MOSAIC_CDS_39 core
647 | MTDKLTLAVNHALNDARMARARMGLMAPTMGLDNKRHSAWCEYGFPEQVTYENLYALYRR
648 | GGIAHGAVEKLVGKCWQTNPEIIEGDDADESEDETAWERKSKQVFTNRFWRSFAEADRRR
649 | LVGRYAGIILHVRDEKDWNLPVTKGRGLQKISVAWAGSLTVSEWDTGLNSKTYGQPKMWQ
650 | YAERLPNGSSRRVNIHPDRIFILGDYSDDAIGFLEPAYNAFVSLEKVEGGSGESFLKNAA
651 | RQLNVNFEKEIDFNNLASLYGVTVTELQDKFNEVAGEINRGNDVLMTTQGASVTPLVTSV
652 | ADPTATYNVNLQTAAAGVDIPTRILVGNQQAERSSTEDQKYFNSRCQSRRGDLSFEIEDF
653 | CDKLIDLQIIDAVSQKAVVWDDLNEQTGTEKLTNAKTMGEINQAMMGSGEEPAFSREEIR
654 | TAAGYENDGEIPLGEEDGSEEDEATDSTA
655 | >PHAGE3_MOSAIC_CDS_40 core
656 | MISGLHHFDAWLERNTWYKSHPSEARLFYLALEKVIAENPGVLIHQQYVRDYILTKKVST
657 | LADGTLIPTAQKYGLLAENISDYLLNTQ
658 | >PHAGE3_MOSAIC_CDS_41 core
659 | MTTAEQKAFARKVECEEDGLYYARYFFKQRTGGKMIVAPHHKVIQKTLDRVIEGEIQRLI
660 | INIPPGYTKTELATINMMGRGLALNCRARFMHLSYSHNLALLNSSTARGMIKSQAYQSMW
661 | PMALRDDADSKAMWWTEHGGGVYASSAAGQVTGFRAGHMEPGWQGALIIDDPVKPDDAYS
662 | ETVRDGVNNRFNETIKSRLAIETTPMIVIMQRIHYHDLSGYLLRGGSGEKWHHLNLPVII
663 | DNSQPYAAQYPENTHAIPIDHGLPDGWLWPFKHNESHRVSLFSHRRTAEAQYMQKPRRFN
664 | AEGALWTEVMISAARDLQIHHDKVRTVVAIDPQATNSDESDESGIVVASAYGAGDKKQYT
665 | VDGDYSGKFSPAGWAKKAMWAYEEHGADAIVIETNQGGDMAEETLRNAGFKGRIIRVHAS
666 | KGKYARAEPISALYEQGRVAHHGNLYTLENQLMEYVPATAKKSPDRLDAMVYALTELGGA
667 | QPMGMMIPKRLQGR
668 | >PHAGE3_MOSAIC_CDS_42 core
669 | MAALSTEVKAFIVQSLACFESPTKVIELVKAEYGIDVSRQQVSQYTPGNAMAAKLSQKWI
670 | DLFNATRKRFQNEIADIPIANKAYRLRVLDRMATNAEKMKNYGMTSQLIEQAAKEMGDAY
671 | TNKHKFEHSGPNGGAIQTITMSKEEYKSARQEMMEDDDC
672 | >PHAGE3_MOSAIC_CDS_43 core
673 | MADSTVIRPYPPVNFTGENWLPYTRLIPAPEIGEWVNQNILTEDGRIHNPDHAHLVDADV
674 | AFMWASGSFAKSGRIVLGQCEQVMMRSGGWQKSRMEQQMHEWFGRIPKFIITLAADYCEQ
675 | CNDLEFCALVEHELYHIAQATDDYGAPKFNKETGMPVLKLRGHDVEEFVGVVRRYGASKD
676 | VQEMVDAANRPAEVAHIDVARACGTCMLKLA
677 | >PHAGE3_MOSAIC_CDS_44 single
678 | VKLTGGYGLITVLSAILASNNTLKYNQYLRHRFCVVYTILTAFY
679 | >PHAGE3_MOSAIC_CDS_45 single
680 | LKLSHILHSEQLLLHFRRAHVEDIHQYLDLFVAPSRWKILPREYGYLESQFSFYIHQSFK
681 | ASCCAVI
682 | >PHAGE3_MOSAIC_CDS_46 subcore
683 | MTDEYKKKIGIPDNHTLEEVSSTWKGPRRGQDTDEYLLRELDENGEVVAHYEVYDSTSTY
684 | PPFGRSITYKKV
685 | >PHAGE3_MOSAIC_CDS_47 single
686 | MTKQYIVYLNMFNHFATRTILRRSTPVFWRRTAISAGAVQQG
687 | >PHAGE3_MOSAIC_CDS_48 single
688 | MSPPCKASDAGKDTDEDLQSDVETAQCLRQLRLDKYRWQAYYRAVSQ
689 | >PHAGE3_MOSAIC_CDS_49 core
690 | MSRLAVIISAVVILLLTCIFSWRSGWNSHADHVNALAAKKKEKAEKTIQPVEQKAAAATE
691 | EGKVIYRTITRDVVKYVQSPNRTVCQFDDDAVQLRQRAIDAANTIPGFDESSMQSK
692 | >PHAGE3_MOSAIC_CDS_50 core
693 | MTPSMRKKLIGVIAGGGGAIAIASVMLGNADGLEGRRYYAYQDVVGVWTVCDGHTGTDIR
694 | HGHRYTDRECDSLLKADLQKVASAIDPLIKVRIPDPTRAALYSFTYNVGSGAFASSTLLK
695 | KLNAGDVPGACKELQRWTYAGGKQWKGLITRREIEREVCEWGQK
696 |
697 |
--------------------------------------------------------------------------------
/test/data/fragment-based.fasta:
--------------------------------------------------------------------------------
1 | >PHAGE1_COMPLETE
2 | ATCAACGCCTGGCTCCTGTATCGTCCAGAGAAGGTTGCACTGGGGTAGTGGCTTATGACT
3 | GTTTAAGCCAAGGGGCATTGTACGCGGAATGCGCGTTTTTGTCGGTTTATTGTCTGTGCG
4 | ATTGACTGGGGCGCCCCTTCCTGTCCTGGCTAATCCACACGTGGATAATTTCGGCAAATT
5 | ATGATGGCGAGAAGAATATGCCATGGCTGGCGTTTAGGTAATGGTGGGAAATAGCGGTCA
6 | ACGTTTATATGGTGTCCCCTGCAGGAATCGAACCTGCAACTAGCCCTTAGGAGGGGCTCG
7 | TTATATCCATTTAACTAAGGGGACATAGAACTTACTGATTTTTAAGCGTTCAGCTTGTGT
8 | TGCATGTTATCCTATCATTCCCCGTACCATCAAGCATTTCATTCCTTTTATTTCCTTTCC
9 | GTTCGCATCGATTCGCTTAGAAAATCACTTCGTTCACTTGCCATTGCGTACACATTGAGT
10 | ACAGAATGCAGAATTTCAGTGTGTACAGGATACAGAGCCGTGGCCCTTAGTGATACCAAA
11 | CTCCGTAGCATCAATGCTAAGCCATACAGCGGCGCAGCTGAGGTCACAGATGGTGACGGG
12 | CTGAGTGTACGCATAACTCCCACAGGCACGATCACATTCCAGTTTCGTTATCGCTGGAAC
13 | GGTAAGCCCGTTCGCCTCTCCATTGGCCGCTATCCCGCTATGTCTCTCAAGGAGGCGCGC
14 | GTAGTCGTCGGTGAGATGCGCGAATTGTACCTCAAGGGACTAAACCCGAAAAAATATTTT
15 | GCCAAAGAAGATGGCGAGCTGACTCTAAAAGAGTGCCTGGATCAGTGGTGGGGCAAGTAT
16 | GTTGAAACGCTGAAGCCGAACACTCAGACGCTGTACAAGTCAGTTGTGTACAACACGATG
17 | TACACAGAATTCCCGGACGCTCCGGTAGTAAACATTCCTGCTTCTGCATGGGTGCGTTTC
18 | TTTGATAAGCAGGAAAAGAAGAACAGCAAAAAGGCCAGGGTGCTTCTTCTACAACTACGT
19 | TCTGTAATGAACTGGTGTATCAGCCGCCAGTTGATCCCATCGTGCGAGGTCCTGAAGCTT
20 | AGCGTTAAGACCATCGGAAAAAAACCTGATGTGGGTAGCCGTGTTCTCACGTATACCGAG
21 | TTGGCTAAAATCTGGCTGGCGCTGGAGAACAACAAGATCGTTACCTCCAACAAGGTGCTT
22 | CATCAATTGCTTTTGCTTTGGGGAGCCAGGCTATCAGAGCTGCGTCTAGCTACCGCCAGC
23 | GAATTCAATATGGATGATCTTATCTGGACGACGCCAGGAGAGCATTCCAAGATGGGTAAC
24 | GTTATCCGTCGACCGGTGTTCGATCAGGTGAAACCTTTTGTTGAAAGACTCCTCAATGCA
25 | GGAAACGATGTTTTGTTTCCTGGCCAGGAACTGGACAAGCCTATAGATCGCTCGTCAGCA
26 | AATCTCTATATGAAAAAATTAAGGGATAAAATTGATATACAAGAGTGGCGAACGCATGAC
27 | TTCAGGCGCTCACTAGTGACGAATTTATCAGGGGAAGGGGTTATGCCTCACGTCACCGAA
28 | AAGATGCTGGGCCATGAGTTGGGGGGAGTGATGGCGGTGTATAACAAACACGATTGGCTG
29 | GTGGAGCAGAAAGAAGCTTATGAACTTTATGCAGATAAAATTTTTTGGTATGCTAAAAAT
30 | ATAGTTAAATATTAGCTTGCAAAGTTACAAAGAAGGTTTGATTTTTCTGACAAGAAAACC
31 | AATGATACAGGGGTAATAATACCCCTTTTTTGAATCTTAGTTTTTTAATTTTCGTCAAAG
32 | TCATGTATGTCATATTTATATCCGGAATTGCTTACGGAAACATTGAAAATTGGAATGTCT
33 | TTTCTAATGCTATCTCTTATTATCTTCACAAGATTCAGGCTTGAGTGGCTGACTCGTCCA
34 | CCGACATAAATAGCTTGAATTGCCATCGGGGAATATTTTAATAATCTGTTTTTTCCAGAT
35 | TGTATATTTACCAGCCTGTATTCTTCCTCATACCACCAAGCATTATGTTTTTTAAACATA
36 | ATGTCTTCATCTTTTGCTTCGTTGAAGGTGTTAATTGTATGTGCGAGTGATGTATAAATA
37 | ACATCTTTATGAATGATTCGAGATTCTTTATTCTCATTAAGAGACTGTAAAAGTTTACCT
38 | TGGTCAAACATAATACATATTCCTGAGAATTGCGCAGCATAATGAGACCACATCAAGGTG
39 | CTTTCTTTAAAATTGATCTGATCTATTGGGTTGATTTTTGAGAAGCAACATACTCCAGAA
40 | TCATTAACTAGAGCAGAAATAGTGCCATTGACTTTGTTCACATTATTTGTCTCACCAAAT
41 | ATCATTTGAGCAGTTAATTCATATGTGCCTTCAAATGGGTCATTAAGATTATCAAAACCA
42 | GAAAACCATAAGCAACAATTACTCAATGAACTTAGAGAGTCCAAACTAATGCGTTGGTAT
43 | TTATATAGATTCATCGCACTTTCCAGTTATCTTTAATAATAATGTGGTTAATTATACATC
44 | AAAATGTTAAATAATCAAAAATGATTCTCATGAGATCATAAAGGAAAGTGCTTCTTCATT
45 | GTGTTTATTTTACATTAAAGGATTAATGATAATTAATACTGTCTGTTTATCCCTCCAGAT
46 | TCTATCCACTGTAATACTGCTTTACGGCTGTATCGGGTTGGATATGTAAGTACAGGTTTA
47 | GGGAAACCATGATCCTTTCGTAATCTCCAAACAGCAGTTTTCTTCTTACGCAGCAGGTCA
48 | AATACTTCTTTCTCTTCCATAAAGTCTGTAGAAGTCATAAGCACCTCATTCAAAATTACC
49 | GTTAAAAATACAGGTTCCACACCCACCGCGAGCCCCTTCAGTACAAACATCACAGCGGTC
50 | TACTTTTTTACGAGGTCGTTCTTTGATGTGCAGCCTTGGTTCCCCGTCTTTTGGCTCCGG
51 | CCATGAGCGCTGCTTGTTTACCGCCAGCTTTTCGATCATCGCTTGGGTAATCTGCTCATC
52 | AGTGATGCCTGCACGACGCTGCGCATCCCACAGCAGGAACTGCATATCAGCCCATTCCGA
53 | CAGGTCATCCGGTTCCGCTGCGGCCTCCATAGCCTCTTTTGCTAGGTGCTTCAGTGGGCC
54 | AATAGGGCCGACATTACCGAATGTTGCCTGTGACCACTCTGCGTGCTCGCGGCGTACCTG
55 | CTCTCGGTCCTTTGCTGGGTACGCTGGCGGATAATTTGCCAGCATCCAACTAATGACGTA
56 | GTCGGCCTTGAACCGCTCAACCGGAAATCCTTCATTCCAGTCACGGAAGTGATAGATAAC
57 | TTTTGCCAGCTCAGGTTGAAGCGCCACCGGATCGCTTTCCATTTCGGCCAGCGCGATGCT
58 | GGCCAACTCCTCGGCCTCTTCAGCTAGCAGCATTACGTTGCTTCCAGCTCCGTAGGTTTC
59 | ACGCCATAGTTTAATTTTTTCCAGTCGTTCTCTGGTTAACTGGTTATTGGTCATTTATTC
60 | AGCCCTACGCTGTTAGCATGGCAATAATGCTCACCATTCGGGCGGGTAGACGTTACACCA
61 | CAGCGCGGACATGGTTCAGGAAACGTTAGAACCAACTGTTCAACAACTGTGGAAACTTCC
62 | TGCTGTTCAGCGTCGGTAAGCTGGCGCTTAAGTTCAAACTCCAACGAATCGACGATTAAG
63 | AAGGCGAAGCCCTCAGTAGAACTGCGCATAGCATCAATGATTTTTTCTTTCGTCAGAGCA
64 | TCAGTCATGGCTGGCCTCCTCGAATAACACTTCACCCTCAATACCGCCGACCTGATAAAC
65 | GATCGATCCATCGTCCCGATATTCCACTGGTGCAGCGCTCCAGCCTTCGCCATTGGGATC
66 | GTCATCGTCGCCGACTTGGAAAAACCCGCCAGCCACTACACGGGCCGGATACATTTCACC
67 | ATCCGTCCAGTATCCTTCGGTATCTTTTAAGCATTTGACATTCATGCTGCCACCCATTGA
68 | TTGACCAGCCATATTCCCAGCGCAGAAAGCGATGCCGCTCCAACCAGAACTATTGCATCC
69 | AGGATTAATGTCTTTCGCAGAATGGGATCACGGTAAATTTTGAAAATCATTTACTAACAC
70 | CTCCCATTGTTGAATCAGATTCAATCGCGCGGCACCAATGCTATCGGCGTGGTAGGTCAC
71 | CTTTACAGAATGATGTTTGTGCGGACATTGCAAAGTCCCATAACGCATATAGGGGCTATT
72 | ACCATGCCAGGAGAATTGAGCTAAAGCCCCACAGGTAGGGCATTTGAGGATAGTTTCTGT
73 | CATGCGGCATCACTTTCTTCAGATGATGAGGTCTGTTCATTGTTTTGCTCGCCTTCCCCA
74 | GGTGCCGATGTTTCGTAGCGGAATTCCTGAAGAATCGACAGCACCTCGGCCTGCATTGCT
75 | GGTGGTACTTCAATGATCAGTCCGCCGCTGGTGGTCTCTTTGCATGATGAGATGATCTCC
76 | AGAAACTTCCGCGCCTTTCCTGCATTGAATTGTGGCTTGGCGATGCTCTTTGTGACTTTC
77 | GTTTTCCCGGCTGCTTCTGCTTTTTTCATCAGCCTGGCGGCTTCCCGGTCTGCATAAACG
78 | CCATGTTCACGAGAAATGCCAATCGCAATGGCATAGTTCATAGAGCCATCGCGAACCAGC
79 | TTTTTGATATACGGGGTGCATTCATGAAGCTGGAGATGTTGAAGGATATCGGACTCTGAA
80 | CGCTTAACTTTTGCGGCAATCTCTGCCGGGCTCCATCCCTGATTCTGAAGGCGGTGATAC
81 | GCCGCACCACGTTCAAGGGGAGTAAGTGCCAGCCCTTGCGAGCTAGTCACCATAAACGCG
82 | ATCTTATCGGCTTCAGTACCAACAAAATCTTTGCACTCAAGGCGCACGATATCGTGTCCC
83 | ATAGCAATAGCAGCGAGCGCACCGTGATAGCGGTGGTGGCCGTCGATCACCTTTACACCA
84 | CGCTCAGTAACTTCGACGGCCAGCGGAGGAATATATTCACCGGCAATAAACGCATCGCGG
85 | AATTCATCGACATGCGCCTGATTCAGTTCGCGAACGTTGTAGCCTTCTTCCGCATAAATT
86 | GAGGCGATCGGGACGTTATAAGTTTTACGGGTAGTTAACCCGGATTCTTTATCGTTATAG
87 | AGCTGGCCTAAGCTTGGCATATGGTCACCTTTTTGAATTAGGGAGTGCTTCGCTATGCGC
88 | CCCACCTGGAGGCGCATAAAACAACACACGGGATGGATGGGTTAGATGGAGCCTTCGTAG
89 | ATAGGCAGTTCATCGCCGAGCTGGTTTTCCATATCGGCTACGATCTCCTGGAAGGCATGC
90 | TCAATGATTTTTTTCGGCTCGATCAGCTCATACCAGAGGACCAGCTGACCGTCGCGCAGG
91 | CGGTAGCGAATTCGCGCATCAATCTGGTACGGTGCGCCATTATGGAAAGGCGCGATTGCC
92 | AGGCTGATTTTTTCTGGGATTTTGGTATTGCCTGAGCCGGATTTATCATCGCTGTACTGG
93 | AACTGACAGGTTCCGTCCTGTAGGCGCTTAACCGACTTGAACTCAACTTTCCTTGTCTCT
94 | TGGAAGGCGAGTACCATTTCCAGGAGATCGGTACCGGACGGGCCTTTATAGTTATCGCTA
95 | ATCGGCGCGATGTTCTGGATGTTGTTTTCCAGAAACTCAGCGAAGTCGATCTGATTCATC
96 | TTGTTACCATCAGACCCCACCCATGCTTTCCAGTCATCAGAGAAAGGACAGTCATAAACT
97 | GCTTTGTGCATTCCCCAGTGTGGGTTATCGGCGTCCTGGTGGAAGTCCAGCACCGCGACG
98 | ATCCGGGTTTTTGTCTTGTCGGCGAAAACAACAGAACGCGTATCACGGAATCGCTGGATA
99 | TATGCGATTAGCGAACCGGGGGAAATCAGGTTTGTATTCTGGCGAATACGAGACGGGGCA
100 | CTCTGGAGGCTTTCTAACGATTTGATATCGAAGCCATCCGGGACGACGACGGAAGGGATG
101 | TCGGTATTAGTTTTCAGCGTTGCAGCAACCAGATCGCGGATGTCGTGCACGGCAGAGCCT
102 | TCAATTTGAGACATTGAATAATTCCTTTAGATAGGTGAGTTGAAAAAGAGGGTGGGATTA
103 | CTGAGCCAGCTTAATAGGCGCAGCTTGCGGTGCTTGTTCGATAACTTTCAAATCCATCTG
104 | AACTTGCGCCGGGTCATCACGCAGCAGATCGCCATCAGCGGTAGAGAACATGATGGTATC
105 | GGCGCGGTCCAGTTCCGGGATCGTGCGGGTTACTTTTGGCGTGACCTTCATTGTGTTTTC
106 | GTCACGGGTATTCAGCATTGAACAGTTAAGGGTAAGGGTCACAGCTCCCTTTTTACCCGT
107 | TTCACGTACAGCCTTGATGACTTCGGCCAGCGCTTCGGTCAGCTCGGCATCGAGAGTGCC
108 | TTTGTTGATGTACGCCAACTGCTGGCTAAACGGCGTGGTATTTTTGGTTTCGGACATAAT
109 | TATCTCCAGTTATCAGCAAGGATCGCCTTTCTGGGTAAGAAGCCTGTACAGCCAGCTCTG
110 | CCGCCAGAAGCGAACGAATGATTTAGGGTTGCGAACAGCCTGCACACCACGAGGGACGCG
111 | CATCAGATCGCCGTAAGGTAAATTAACGTTACGGAAGGTCATATAAATCACCGATTAATT
112 | AGGTATCCGGCAGGAGTTGAACCCGCGCTGGGTTGGGCAGCCCAGCCAACACCGGGAGCG
113 | GACACATAGAAGAAAAAGGGCGGTTACCCATCAGAACATTATCCTCTTCCTCCTGTTTGA
114 | TTGGTGGAAGACCAGGTAACCGCCAAGATGTTAAGGACACTTCATATTGGAGGAATTACT
115 | CACATCGTAAGAATGTTTTTAGAGGTACTATTCAAAAATGTGTATAGTATATTAGTTGCA
116 | AACTAAAAAATAACGAGGTCGTTTTATGGTAAGCAAAGATAGTTTTCTTAAGAAATTAAA
117 | CCGGAATTCCGAAGCGCGGCTTTCAGATGAACAAAAAATAGAAGCCGCAAAGAGAGAAAT
118 | GATAGATAAAGCCACGCATGACTCAAGTTTTTACGCATCGGAAGTAAAGAGTCTAGTTTC
119 | TAATGTGGAAAAATGGATCTCTGAGTCATCAATACAAATAGTAAAACAAGAGATAGTTGT
120 | AAGTGAGTTTTTGAAGGATAACATCACGCCGGTAAGTTATAGAGTTTTACAATTTGCTCT
121 | TGTTTATGAAGGTTTAAATTTAATCTTTACTCCCCAAGGTTGTCTCAGATTTCAAAATAG
122 | TGGATTAATTGATATTTGTGTTGACTCCCCGAACCTAACCAAAGTATATGAAAATATAGC
123 | TTTGTTAGTTGATTCGGAATTTAAGTATCAGTGGTTCTTTTCTGATGATAAAAATACTTT
124 | TATTGTGAATGGAGATACATTTAGGGATTTTGTGCTCAAAACAATTGGGATTGAGTGATT
125 | CTATTAATTTTGAGCGAATATTATATTTCCACACGCCTTAGTAGTAAGGCGTGTCATCCG
126 | ACTTGCTTTACTCATTACTTGTTATGGATAAGTGGAGAATGATAGCATTCTGCCCGCCCT
127 | GTAAGTCGGTTTTTATTGCGAGAACATTTTGTACTTATCCAAACAAGCATCCTTCTGATG
128 | CATCAGACTTCGAAACATTCTAAGTAGAGAAAATGGCGCTAATAGCTCTTCACTCTGTGA
129 | AGAGAAAAAATACTAACAAGAAGAGAAAGGAGCTAAAAAGCGCCACAGACTTAATAACCT
130 | CTTACCGAAACGGTTACACAGGACGGTAATTTGGTGGTGCTTTCGGGCAAGGAAAAAAAT
131 | AACTTCCCGAAAGCTATCAACTCGCGCTTGCACATATCCTCCTGCCAGTGTTGCCCGTTC
132 | ACGTCTGTTGTCACAGAGCTAGTCTTCTCACCGACCGGATCGCACCCGGTGATACTCCGC
133 | ATTTGTGCGTAGGGGTCTAAACAGGTTTCATGTGCTGTTCCGACTTTGCTGATTGTAAAA
134 | GAGCGGTATTGCTTCAGCGGACCCCGTTCGTAACGTTTACGGTTGGGTATCTGTCCGCCG
135 | TTGATGGATAGATATTCCTACTTAAAGTAGAAAATATCAACAACCAAAAGTAGAAAATGT
136 | AGTGGTGTTTTATCATGTCATTGATTTAAAGTTGAATTTATTTTTACGCTTACAGTGTGT
137 | TATGGTTAAAAAAACACCAGTGAGGGTTGCAAAATGGAAAGCGATTGGGAAATTGAGCGG
138 | CCCGCATTCATAGCTGGCGAGGTCGGTGAAGCCGCAATCTGGCTAATACTGAAAGGCGAA
139 | GAGGTCAACCGAGCCGCGATGGCAGAATATTTGGAGAAGAAGCGCCGGGAGGTGGGCAAC
140 | ACGATCCACAAAGGTGTCTTGCGAGATGCAGCCAGGCTTGTGAGGGATGGAAAATTTTAG
141 | GGCTTACCCGGCGTAACATGCTCGCCGGGAAGTCAGCCTACGGATGAGATATTGTTGAGG
142 | CATTTCCCTGCTCAATAACACCAGACCTTACCTTGTAACTTCCTAATATTTTTATTGTTT
143 | TATTTCCCTTCATAGCATCAAAGATTATACCTATTTCATCTGGATCATTGATGAAACTAG
144 | TATCAACATAAATAGGAAAGGACGTTTCTCCAGTCGGTTCGTGGCAACTAAGAGTCAGCT
145 | TGTCCGCAGAGCGTTTGATGCTATCTATTAAAACCTCAAGAGTAAGCTGTTCAGATTTAG
146 | CTTTTTCAATAGGGTTTTTGATGATTTCTTGGATATCGTTGTGAGTCAGCTCAACTTTAT
147 | CCACACCCGTTATCGTTATGTGGGTGGCATCGGAAGCGCCCTTCAGGACGCCCGTAAATG
148 | CTTTTGCTGTATGTTCCTGAATGCCTTCTGCTCTTTCAATGGTATCTACTCCGGCATGCT
149 | CTTTAATTGCAGAAAGCATTCCATCTTTAAGAAGAGTCATCCTTTCGATTTCAGCTTTTT
150 | GCTTTACATCTTCTAATTGAATTTCTTGCTGCTTTTCTTCAATGCTAGCCTTGCTTTCAA
151 | GGTATGACGTGCCGATCCAGCCCCCGCCTAATACGGCCAGAGCAAACAGGAAACACATTG
152 | TCTTCTGTGTTGGGCTCATACCTTGGGTTACCTTTGCAAATGCTTCACCAAAAGCCTCAA
153 | ACAACTCTTTTAAAGAGGCGATTATCTCGGTGCAACCTGGATTAATGGTGAAAACCAACT
154 | CAGCAGCTTCACGATCTTCTATGGTTAATTTCTGTAAGTTTGAAGTTTTATACTTTATCA
155 | GAGTATAAACTTTATACATTTCTGTCTGAAACTCACAGATACCCTGAGCAAGACTTGATG
156 | GCAATGTACCATTGTATTTGTTTTCATCGCCATAGATTTTGAAATCAATGCTATTGAAAA
157 | AATTTAGCTTAATATTTTCAATGCGTATGTCTTCGCCTGCCATTAAGCTGTTAAAAACAG
158 | TTTCAAGGTCGCCAAGTGAGTTTATGTTGATTGGTAAGGCTGAAGCTTCCATGTTAAATC
159 | CTTGATTATAAACATATCTGCAAAGCTACAGACCCCGGTCGAAAGAGCCTCATAGCCGGT
160 | TATATTTTATTGATTCATGAATCAATGCCTTACCCATCACATAAAGGTTATCTTGGGTAT
161 | CTTCGTTGATGTACCACTTCTCATAAGCTGGGTTATCTGACAGAACCGCTAGCTTATCAC
162 | CCTGCATTTGCAGGCGCTTAACGTGGAAAGTCTTCCCATAAACAAATGAGTAGACGCCAT
163 | CAGTCTGAAAATTCCGAACCGATACATCAACGAACAGCCGATCGCCCGACACAAGGGTAG
164 | GGGCCATGCTATCGCCGTTAACGGTCATAACCTTAACATCTGCAGGATCACGATTTCCAA
165 | AGAGCACCCGCGCATGCTCTGTAGTGAACTCAATAGCGTAAAGCACTTCAACATAGTCGG
166 | AAATCATGTAGTTTCCGGGGCCTGCACTGACTGTTAAATCAAGTACTTCAACGCGATAAA
167 | CATCCTGCAATGGATTGCTGGCAATAGACTTCACCGCTTCACCCTCCGAGGGCTTACCAA
168 | CGCCATACTCAAGATATGCCGCGTCCACCCCCAAAACCGCAGAAAGCTTCCGCATTGTTG
169 | GAGTTCTTGGTTTTGCCGTCCCAAGCGTATAGCGCCTAACCATCTCATAGGTTACACCAG
170 | TCTTTTCTGATAACTGAGTGATTGAGATATCGCTCATAGAAATCAGCCCATTCAGGCGGG
171 | CGGCAAAATCTGGATACTTTTGTTCTTCTACCATAGGTAGAAGATTACGTAATTGGCAGT
172 | AATTCGTCATTTCTATTTTCAGTAGTTGATCTTTCTACTTTAAGTAGTATTATTCCTCTA
173 | TCAACTAGAGGAGTCTCTCATGTCCAATTCACCCACCGAAGCCGCGATTAAGGCAGCAGG
174 | GCGGTCTTTATCAGAGGTCGCGCGTAGCTTTGGCTTCAAGTCTACCCAGTCAGTAGCCAA
175 | CTGGGTCATTAACGATCAGGTCCCATCTGAGCGCGTTTTGCAGCTATGCGAACTTGGCGG
176 | GTGGATTGTTACTCCCCATCAGCTACGGCCTGATATCTATCCGAACCCCAATGATGGGCT
177 | CCCTGACAACAAGACTCAGGTTACTACTTCTGCTGCTTAATCATAACCACAACTCAAGAG
178 | GTGAAATCGTGGGTAATGAACCTGAATGGAAAGTAGAACGCCAGCCTGCATGGCTGGTGG
179 | CCGCGATAAGAAGAACTATTGCTGATTTATCGGGTGGATATGACGAAGCTGCTGAAATTC
180 | TCGGGGTTTATAAGTCCGATGACGTAACTCCGGCTACCGACCCATTGCATAACCGGCTTC
181 | GGACCAACGGCGATCAAATCTTCCCTCTGGGTTGGGCGATGGTTTTACAAGCTGCTGGTG
182 | GCTCAAATCACATCGCAAATGCCGTTGCTCGTCACTCCAATGGGCTCTTTGTACCGCTAG
183 | CTGATGTGGAAGATATCGATAACGCCGATATCAACCAGCGTCTGATGGAATCTATTGAAT
184 | GGATTGGCAGGCACTCGCAGTACATCCGAAAAGCTACTGCTGATGGTGTTATTGATGCTG
185 | CCGAACGCGCTCAAATTGAAGAGAACAGCTACCAGGTTATGACCAAATGGCAGGAGCACC
186 | TGACTCTGTTATTCCGGGTTTTCTGCGCCCCTGATGAGGTTTCCCGACCGCCAGACTAAT
187 | CAGTCTATTGCCCGGCTCACAGAAGTGAAGCAGGAGGGCTTATGTATCAGGACGAATATT
188 | TTCACGTGACTATGCCCACGGTTTTTGCTCGTGAGGACGCCCCGTGGATTAAAGAGCAAT
189 | TAGCAGCACTCCCGGCAGGTATGCGGGAAAAAATCGCGATGGCGTATGCGCAGGCGTACC
190 | AGGAGGCGTTCGACGCAGAACCGGTGTCATTCCGGCAGCAGAACGCAGCACGACGAACGG
191 | CAAACCGCCGATTGCGAGAGTTTTGCACGAGGTATACCCCAGCGGTTAGGGGATATACGT
192 | CGCTCCCACCCAGGGTTTGATTTTCTGAATCTGGGTTGGGGGAAAGGGGGCGGTGTTGGG
193 | TTTTAGCCCGAAGGGCTGGAACAGCTTTACCAGAAGAGAACGATCTAACAGAGAGATCAC
194 | TGTATGGGGTTGAAAACGTCGCTTGGAAGTTTAGACGTTTAGACATCCAAAAGGAGCCAA
195 | AATGATTTATTCAGACGCTAACGAAAAATGGGCCCCGGTTCCGGTTGAGCCGTATTCCAA
196 | AGCCTACGAAGTCAGCAACCTCGGACGGGTACGCAGTGTTCCGCGCCTGGCTAACTCTGA
197 | ATATTTTATTCGACACATTCACGGAGGTTTTCTTAAAGGCCGCCAGCGCAAAGACGGGAC
198 | CAAAACCGTTACGTTGTCGGTTCAGCGTCAGCGCACTAAGTTTGTCATCGCCGAGCTGGT
199 | GGCTATGGCCTTCGGGGAGGTTACTGCTAATGCTTAACATCCAGCCCCGCGAAAAACAGG
200 | TAGTCGCGTTAAACATGCTGCGCAGCGCCTGGAAACAGAATAACTCCTTCATGCTCTACG
201 | CCCCTGTAGGGTTCGGCAAAACAGCAATAGCCGCGCTGATCACTGATGGCTTTGTAAGCC
202 | GTGAAATGCGCGTAATGTTTGTGGCTCCGTATACGGTTCTGCTGGACCAGACCGCAGCCC
203 | GATTCATGGAATACGGCCTTCCTGGCGAAGAAATCAGTTATGTCTGGCGTGATCACCCGT
204 | CATACAACCCCACAGCTCTAATCCAGATTGCCAGTGCGGATACGCTGATTCGCCGTGAGT
205 | TCCCGGACAATATCGACCTGTTGATCGTTGATGAAGCCCACCTGAAGCGCAAAAAACTGC
206 | TGGAGGTTATCGACAATCTCACTCGCAACACAGCAACGAAGGTAATCGGCCTTTCCGGTA
207 | CGCCTTTCGCTAAGTTCATGGGCAATTACTACCAGCGCCTGATTAAGCCAACGACCATGA
208 | AGGAACTGATCGCCATTGGTGCATTGAGCAAATATGAGTTCTATGCACCGTCGCATCCTG
209 | ATCTGTCCAAAGTGGAAACGTCATACGTAGCAGGCTATGGCAGCGACTACAAAGAAAACC
210 | AGCTCAGCCAGGTAATGAGCGAAGCCAAGCTGGTAGGCGACATCGTGAAAAACTGGCTGG
211 | AGAACGGCGAAGACCGCCCGACGATTTGTTTTTGCGTAGATGTCGCTCACGCCAATTTTG
212 | TCACGGTTGAATTTGCCAGCGCTGGCGTGACGGTTGAAGTTATGACAGCCAGCACACCGC
213 | ACGACGAACGGCAGCTCACGATCCGCCGCTTCGAACAGGGCATAACCAAAATCATCATTA
214 | ACGTTGGTGTTCTGGTAGCCGGTTTTGATAGTGATGTTCGCTGCATTATCTTCGCCCGGC
215 | CAACAAAAAGCGAAATGCGCTGGATTCAGATTCTTGGGCGTGGCCTGCGTGCCGCTCCTG
216 | GTAAAGATCACTGCCTAATCTTCGACCACACAGGCACGGTTAATAAGCTGGGCTATCCCG
217 | ACGATATTGAATACGACTACCTCCCTTCATCGTCTGATGGCATGGAAGACGCGCCGCAGA
218 | GAGCCGTAAAGACCGATGAAGCGGAAAAACTGCCGAAAGAATGCAGCCAGTGCCACTACG
219 | TCAAACCAGCTGGGATTTACATCTGCCCGAAATGTGGTTTTAAACCGCTCGCCGGTGAAG
220 | ACGTGGAAACAGATAAATCCCGTGGACTGAAAAAGGTAAGCAAAGCGGAAGTCAAATATA
221 | CCGCTGAACAGAAGCAATCCTGGTGGTCTCAGATTCTGTTTTATCAGCGAACCCGTGCAG
222 | CGCAGGGACGCCCGGTCAGTGATGGCTGGTGTGCGCATACCTACAAACAAAAGTTTTCAG
223 | TATGGCCTCGGGGGTTACATCACACTCCGCAACAGATCACGCCTGAAGTAACGAATTTCA
224 | TCAAATCAAAACAGATCGCCTTTGCGAAGAGAAAAGAGAAAGAAGGAGATGCCGCATGAA
225 | TACCAAACAAGCTGCTATTGGTCGCTGGGCGGAAATTTACAAATACTATGGCCTCCCAGG
226 | TATTACCGGGAAAAGCCATCTCAAAGGAGAGTGCCCTCTTTGTGGCCGTAAAGGTAAATT
227 | TCGCTGTGATGATAAAGACGGCACCGGGTCATATATCTGCGTTTGTGGCTCTGGCGATGG
228 | CTGGGCGTTGCTGACTGCCAAGACTGGCAAAGAATTTAAGGTTTTGGCCTCGGAAATAGA
229 | CAGGCTGATCGGGAACCCCTACACCTCGGATCGGACCAGAGTAAATCCGGTGCGTACATC
230 | TCTGGCACAACAACGTGACAAAGTCAGTCGTAAGTTTTCGAAGCTCATCCCTCTCCGTGG
231 | TACCGGTGCAGATAGCTACCTGAAGGGGCGCGGTATTAACTCCCACCCAGCAGAGAGCAT
232 | CAAGTACTGCGATAAACAGCCAGTAGATGGAAAGAACCTCCAGGCTATTTATGCGCTGGC
233 | TACAGATGACCGCGGGGAACTGTGTTATTTGCACCGCACTCTGCTTGACGGTGATAAGAA
234 | GGCGCAAACAGGCGGCGCAGCCAAGAAGATGATGAAACTGCAGGAGGATAGCTATTTAGA
235 | GTATGCCAAATCCGTTGCTATTCGCATGTTCCCAATATCCTCAACGCTGGGAATTGCTGA
236 | AGGGATCGAAACGGCTCTGGCCTGCCACCAGATCACGAAGTGCAACACGTGGGCGACGAT
237 | GAATACCGCCTTCATGAAGAAATTCCGCGTTCCTGCCGGAGTAAAGAACCTCATCATTTT
238 | TGCTGACTCAGACGCCAACGCAGCAGGTCATGCCGCTGCTTTTGAATGCGCTGCTGCAAA
239 | TCTGCACGCAAAGAATGATCTGGAAAGTGTCTCCGTGCGCTGGCCTGCACAGGGGGACTT
240 | TAATGATCTGCTGCTTAACGGCTCAGAAGTATTCGAGTGGGTATTTCACCGGGGGATGAA
241 | ACAGTGAAGAAACCAGCGGCTGCAAAGGTGAAAACGTACAAACCGAAGAAGTGCGCCAGC
242 | TGTGGTGAAACCTTCACTCCGGCCCGCAACCTGCAAAAGGTTTGTGGCCCGCTCTGTGCT
243 | ATAGCCCACAACAGGGCGCTGAAACAAAAAAAAGCGGAGGCGGAACAGAAGGACAAGCTG
244 | AAGATGCGCAAAAAGGCTCTGCTTACCCGTGGCGACTACATCAAAAAAGCCCAGTCAGCC
245 | TTTAATGCCTTTATCCGTGAACGCGACGAGGGGAAACCATGCCCATCATGTGGCACTTAT
246 | CACCCACCTATGATCTTTGGCGGCCAGTGGGATTGCGGTCATTTCATGGGGGTAGGCGCT
247 | CGTCCTGAATTGCGCTTTGAAGAGAAGAATGCTTACCGGCAGTGCAAAGCCTGTAATGGT
248 | GGATCGGGTCGGTTCGCTGCAAAGAATGCCACTGTACATGCCCGCTACAGGGAGACGCTG
249 | ATCGAGTGGTATGGATTGCCGCTGGTGGAATGGCTGGAAGGCCCACACGAAGCGAAGCAT
250 | TACTCAAAAGAAGACCTGGAAAACATAGCGGCTAAATACCGCCGTAAAACTCGCGAACTG
251 | AAAAAGCAGAGGGCCGCATGAATTACGATCTTATCTACTGTGATCCGCCGTGGGAATATG
252 | GCAACCGAATTAGCAACGGCGCAGCCTGTAATCATTACAGCACAATGAGCATTGAAGACC
253 | TGAAACGGCTCCCTGTCTGGTCTCTGGCTGCTGATAACGCCGTACTGGCGATGTGGTATA
254 | CGGGGACCCATAACCGCGAGGCTGTAGAACTGGCTGAATCATGGGGTTTCCGGGTCAGAA
255 | CAATGAAAGGCTTTACCTGGGTAAAACTGAATCAGAACGCCGCTGACCGCTTCAACAAGG
256 | CACTAAACGCCGGAAAGCTGGTGGACTTCAATGATCTTCTTGAGATGCTGGACCGTGAGA
257 | CGCGCATGAACGGCGGCAATCATACCCGGAGCAATACAGAAGATGTCCTGATAGCGACCA
258 | GGGGAACCGGACTAACCCGCGCCAGCGCATCGGTAAAACAGGTTGTTCACACCTGCCTCG
259 | GTGAGCACAGCGCTAAACCGTGGGAAGTAAGGAACCGACTGGAGCAATTATACGGTGATG
260 | TGAAACGGATCGAACTATTCGCTCGGGAAGAGTGGAAAGGATGGGACCGCTGGGGAAATC
261 | AATGCAACAACAGTATCGAAATTATTACCGGACTGATTAAAGAGGTGAACCATGCAGCGT
262 | GATATTCAACTGGTACTCGAACGGTGGGGAACCTGGGCTATTAGTGAAGGCTCACAGGTT
263 | GACTGGTCACCAATTGCAGCGGGTTTTAAAGGCCTCCTGTTAAATACCTCAAAGTCTCGC
264 | GAGTCATGTTGTGACAATGATGGCCTTATTGTAGACGCTGCCGTAGGAATGCTTAAACGA
265 | GCTGGCCGGGATGATGAGTTAAATCTGGTGATGTTGCATTACATGCATAACGTTTCTAAA
266 | TCGACTATTGCCCGCTGGGAAAAATGTTCAGAGGGAAAAATACGTAACAGGTTAATGATA
267 | GCCGAAACGTTTATTGATGCCTGCATCATTATGAGTGGTGCCAGATTAGAAATGGATGAT
268 | TGGGCCCATAAAAAAGAAGTAGAGAAAGTTGCATAAAAGTCTATTCGTTACGAATTTTAT
269 | ATATTAATGTGTTAAGAGTGGTCACTTAGACACGAACTTAAATATTACAGAACCTCGCCA
270 | ATTGGCGGGGTTTTTTCATTTCAGGCCCTGACTAAAAGTTGCAGATTAACCGTGAAATGC
271 | ATGAGCCTGCGGCCTGAATTCTTTCCCCTCGTTCTGAGAGGATTCACAGCAATAGAGGGG
272 | GACCGATGTCCGAACCAATAACCGGCACAGGCTTAGCTGGTGGCGCTTTAACTGGGGCGA
273 | GTGTTTACGGGCTATTAACCGGTACAGACTACGGTGTTGTGTTCGGGGCATTTGCTGGTT
274 | CCGTCTTTTATATAGCGACAGCGGCCGATTTGAGCGCCCCACGACGGATGGCATATTTCG
275 | TTGTGTCCTATATTGCTGGAGTTCTGTGCTCCGGGCTGGTCGGTTCTAAGTTATCCGACC
276 | TTACCGGGTACAACGATAAGCCTCTGGATGCAATTGGTGCCGTAATCATTTCGGCATTGG
277 | CCGTAAAAATACTCACCTTCCTGAACAATCAGGATATTGGCTCGCTGGTGGCGCTAATAA
278 | CGCGCCGGGGAGGTTCCGGTGGAACTAAATGATCCTACTGCAACCATCAATGCGCTGTTA
279 | TGTGCTGGTGTCGTTGTTACGTTGATGTTCTATCGCCGCAGAGACTCACGTCATCGTAAG
280 | TGGGTGTCGCGGCTGGCATGGCTGATAACAGTGATATACAGCTCTGTGCCGTTGGCGTAT
281 | CTGTGCGGCATCTATCCCTATTCATCATGGCCCACCATTGCGGCCAATATCATGATCCTT
282 | GTTGTGCTGCTGAGCGTAAGAGGCAATGTAGCGCGGCTAGTTGATGCACTGAGGCACTAA
283 | TGAATCAAACACAATTCCAGAAGGCGGCTGGTATCAGCGCCGGGTTAGCTGCGCGCTGGT
284 | TTCCGCATATTGATGCTGCGATGAAAGAATTTTGCATCACTGCTCCACTCGACCAGGCGA
285 | TGTTTATTGCGCAGTGCGGCCACGAAAGTACTTCATTCACTCAACTGGTAGAAAGCTTCA
286 | ACTATAGCGTTGCCGGGCTGGCTGGTTTTGTGAAGGCAAAGCGTATCACGCAGGACCAGG
287 | CAAACTCACTCGGGCGCAAATCATCCGAGAAGGCGCTACCGCTTGAACGCCAGAAGGCAA
288 | TCGCAAATCTGGTCTACAGCAATCGCTACGGCAATAAGTCAGCAGGTGATGGCTGGAAAT
289 | ATCGCGGGCGAGGACTAAAGCAAATCACTTTCTTGGATAATTACCTGCGATGTGGTACTG
290 | CACTAAAGCTCGATTTAGTCAGCAACCCTGAACTGCTGGAGAAAGATATTAACGCAGCCC
291 | GCAGCGCCGCATGGTTCTATACCTCAAGCGGTTGTTTGAAATACCCCGGCGACTTAGTGC
292 | GCGTGACCCAGATTATCAACGGTGGTCAGAACGGTATTGATGACCGACGCGCCCGATTCC
293 | TGAAAGCTAAATCTGTTCTGATGTGAGGTCCTCATGGGCATTGAAATGATTATTGGTCTG
294 | GCAACTGCGTTGCTGGCCATTGTCGCTGGAGCATTTGGATTAGGCCATACGCGAGGTACT
295 | AATAAAGCGGAAGCCAAAGCCGATCAGCAGCGAATCGAAGATACCGCCGCTGCCAGCGTC
296 | GCAGCGGCGGAACGGAAAGAGGAAGCCACCAGAGAGGCCAGCAATGTACAACAGACTGTT
297 | AGCCATATGCCTGATGACGATGTTGATCGGGAGCTGCGCGAGCACTTCACCCGCCCCGGC
298 | AGTCGTTGATACCGCATGCAACTGGGTACGGATCATCTACCTGACTGACCACGATATCGA
299 | CGTTCTGGATAAGCAGACCAAACGCGACATTCTGGCGCACAACAAATCCGTGCAGGCTAA
300 | TTGCATGAAGCCCCTCTGATGAGGGGCTGTGATGATGACTTAATTTTATGATTTTCGAAA
301 | GATTCTTCCTAGTGCATATAAAGCGGCGCTTCGAGTATCACTACCACCTTCTTCAGCTGC
302 | TTTTTTTAGGCAGACAATGACTTCATTAGTTGCAGGAGCGCCTTCCCCTAAAGCTTTGAG
303 | TGCCGCTATTCTGGTAGATGCACCGCCGTAGCCAGCGTTATGTATCAGAAGATCAATTAT
304 | ATCTTGCGTCATAGATGTACTCGTTAACTATTGAAATAAGTGAACTGAAAAATTCATGGT
305 | TATGCGTAAAGATATTGTGTGTAAACGTCTATTTAAAAAGGAATACCTAGCTTCTTACAA
306 | ATTCTCGTAAGTAATTCACCTGCGTTGAAATAATCTTGGACTCCTACTGCAGCCCCACAA
307 | GATGCGCACTGGATAAACATTAGTCTATATCGTGAACCACTTACTACTGTTTCTTTCATT
308 | TCGAATCTTGTAGAGGAGCATTTTGGGCAAGTTGTTGTGGCCATAACAATCCTTTTCAGA
309 | GGTAATCAGCCATCCCCCAGATAGAGTTCGCCAGTGTCCCACCATTGACGGGCTGAGTAT
310 | TAAAATTAATCAATATTCATTAACCATGCAAATCATAAGTCATAAAGTATGCATAACGCG
311 | AACAGCATGAAGCAGAGAGGAAAGAACGATGCCCGCACTAATCCCCCGCGCCTGCCGTAA
312 | GCGTGGATGCGCAGGTACCACCACCGACCGCTCAGGCTACTGCGTGAAGCACAGAAATGA
313 | AGGCTGGCAGCAGCACCAACAGGGCAAGAGCCGACACGAGCGCGGCTATGGTAGCCAATG
314 | GGATATCAAACGTGCCCGCATCCTTCAACGTGACAATCACCTGTGTCAGAACTGTTTGCG
315 | TGACGGTCGCGCGGTAGCAGCCAAGACCGTTGACCACATCAGGGCTAAGGCTCATGGGGG
316 | GACCGATGACGATTCGAACCTCGAAAGCCTGTGCTGGCCGTGCCACAGAACAAAAACCGG
317 | GCGTGAACGTTTCAAATGATATCAATTCCCATTTGAATGGTGGCAGGGCGGGGGCGGGGT
318 | CAAATCCCTGATGGCAAAGGCCCAAAGGACCGCCGCCTAACCTTTTTTCACACCGCCGCA
319 | GGTTAGAAAACTTTTTTTGGGGTCCCCCATCCAATGATTAATAGGAGTTTTCGATTATGC
320 | CAGGACCACCGAAAACCCCGACACATCTGGCTTTAGTGAAGGGGAACCCATCCAAGCGCC
321 | CGATCAATAAGAACGAGCCAAAACCCCCGTCAGGGGTCCCCCCAATACCGAAACATTTCG
322 | ATAAACAGGGTAAGTACTGGTTCAAACGGATTGGTGAGGAACTTGATGCCGTCGGCGTGT
323 | TGACCACGCTTGATGCTAAAGCGCTGGAGTTGTTGATAGAAGCCTATGTTGAATACCGGC
324 | ATCACTGCGACACGCTTGATCGTGAAGGTTACACCTATGCCGTCTACAGCGAAGATGATT
325 | CAGATGAAGGAGGGGAGCGGGAAATCAGAATGATAAAACCGCACCCAGCAGCAGTCATGA
326 | AGGCTGATGCGTGGAAACGGATCAGAGCGATGCTGAGCGAATTCGGCATGACACCAGCCA
327 | GCCGATCAAAGGTTGGTGCAAAAGGCCCGGCAGAAGCCGACCCACTGGAAGAATTTCTTA
328 | AAAAGCGCAAATGATGAATGGCAACCGTTGCAGATGGATTCCGCTACGCCGAGCGCGTGG
329 | TATCTGGCGATATCGTTGCTGGCGAACTGGTGCGTCTTGCGTGCCAGCGGTTCTTTCATG
330 | ATTTAGAGCACGGCCCGGAGCGCGGTGTTTATTTTGATGAAGGCCGCGCCCAGCACGTTC
331 | TCGATTTTTATAACTTCGTCCCCCATGTGAAGGGGCACTTGACCGGCAAGCCGATCGAGT
332 | TGATGGACTGGCACACCTTCATCCTGATTAACCTTTTCGGGTTTGTCGTCCCGCTGATAG
333 | ATGAAATAACGTTTGAGAGCATTCTTGACGACGATGGCGACCCCATGTTTGTGCGTCGCT
334 | TTCGTACCGCCTATGACGAAGTAGCGCGTAAAAATGCAAAATCAACGCTTTCGTCTGGCA
335 | TCGGGCTTTATATGACTGGTGCCGACGGTGAGGGTGGTTCTGAGGTTTATTCCGCAGCAA
336 | CAACCAGGGATCAGGCCCGCATCGTGTTTGATGATGCGAAGCGCATGATTAAGCTGGCTC
337 | CGAAAACACTGGGCCGGTTGTTTGGTAGTAACAAGTTGAATATTCACCAGGAGCGGACGG
338 | GTTCAAAATTCGAACCTGTAGCCAGTGATGCGAATAACCTCGACGGCCTTAATATTCACT
339 | GCGGGATCGTTGATGAGCTGCACGCACATAAAACCCGTGACGTCTGGGAAGTTCTGGAAA
340 | CAGCGACCGGTGCGCGCCTGCAGTCCCTTATTTTTGCAATCACTACTGCGGGTTTTAATA
341 | AAGAAGGTATCTGCTACGAGCAACGTGATTATGCAATCAAGGTTCTGAAGAACTTTGATA
342 | ACCCTGACCCGCTTTCAATTAAGGATGACAGCTATTTTGCGCTGATTTATACCCTGGATG
343 | AGGGGGACGATCCTTTCGACGAGGCAAACTGGCCGAAAGCAAATCCCGGCCTGGGGATAT
344 | GTAAGCGTTGGGACGATATGCGCCGTCTGGCTAAAAAGGCGAAAGAGCAGGTGGCAGCGC
345 | GGGTCGGATTTTTTACCAAGCATCTCAATATCTGGGTGCAGGGTGAAAAAGCGTGGATGG
346 | ATATGTCGCGCTGGGAAAAATGCCGCGATACCTGGGATGACTCAACTACGGCCAGCTGGT
347 | CAATGTGGCTCGGCGTTGATCTTTCCAACAAAATTGATATTTCAGCTGCAGTTAAAGTCT
348 | GGCTTGCTCCAAATGGCGATGTTTATGTCCGCTCCAGATTCTGGATACCTGAAGGTCGGC
349 | TGGAAGCCTGTTCCAAGCAGCAGGCGGACCTTTACAGAAAATGGAATCTCGCTGGATTCC
350 | TTGAGTTTACCGATGGCGATGTCGTTGACCATGCAGTAATTAAAGAGGAAACGGTCGAAT
351 | GGGCGCGAGGTGACTCGCTGAACGAGTTTGCATACGACCCGTGGAGTGCCACTCAGTTTG
352 | CTTTGTCGGTAGCAGCTGAAGGTGTACCGATTGTTGAAGTCCCTCAGACGGTTAAAAACC
353 | TGTCTGAAGCAATGAAGGAGGTTGAGGCAAAAATTTACGCCGGGCGTTTTCATCACGATG
354 | GCAATCCGGTGATGACATGGATGATGTCAAACGTCACCGTCAAACCAGACAAAAACGAGA
355 | ATATTTTCCCCAACAAGGCCACGCCTGAAAACAAAATTGACGGTCCTGTCGCGATGTTTA
356 | TTGCGATGAGTCGCTTGCTTGTTAACGGTGGTGGTGAGGTTGACTTCCTGTCCACTATCG
357 | ATCCTGACGAAGACCTTTTACTTCTATGAAAACTCTAATCACTGATGTTATCGGGCTTAC
358 | CGGGTTCGGTTCGCTTGCTGCAGGCGTGTATCTCCAGTTCGGGCTGGCGATGTCTCTGAT
359 | GATGTCGGGAACCCTGCTACTCATTTATGCGCTGTTAGCGGCAATGAGGGGGAATAATGC
360 | TGCTTGATGCTCTTTTTCGCAGTGAACCACTGGAAAACCCGGCTACTCCGATCACGAGTG
361 | AATCGGCAGAAACCGATAACGTGTTTGCCCGAGACGTATTTGTCAGCCCGCAAACGGCAA
362 | TGAAGCTGGCTGCGGTGTATGCCTGTATTTACGTTATCTCTTCGAATATCGCTCAGATGC
363 | CACTGCACGTTATGCGGAAAACCAATAACAAGGTTGAAGCTGCCCGCGATCACCCTGTGT
364 | TTTACCTGGTTCACGATGAGCCGAATATGTGGCAGACCAGCTATAAGTGGCGTGAGTTAA
365 | AACAGCGTCATATTTTGGGCTGGGGGAATGGTTACACCTGGGTGAAGCGTTCCCGTCGTG
366 | GTGAAGTTTCCGGGCTGGAATGCTGCATGCCCTGGGAAACAACACTGCTTAACACGGGGG
367 | GGCGGTATACCTATGGCGTTTACAACGAAGAGGGGGCGTTTGCCGTCAATCCCGACGATA
368 | TGGTGCATATCCGGGCGCTGGGTAACAACCAGAAGATGGGGCTTAGCCCAATTATGCAGC
369 | ATGCCGAGACGATAGGCATGGGGATGAGCGGGCAGGCTTATACCAGTTCATTCTTCAACG
370 | GTAATGCGCGACCCGCTGGCATTATTTCGGTGAAAAACCAGCTGAATGAAGAAAGCTGGG
371 | GGCGTTTAAAAAGCATGTGGCAAAAAGCCACAGCTGCTTTGCGCAGCCAGGAGAATAAAA
372 | CAATGCTTCTCCCGGCAGAGCTGGATTACAAAGCGCTCACCGTTTCCCCGGTTGATGCCC
373 | AGATCATTGATATGTCGAAGCTGAATCGGTCGATGATTGCCGGGATATTTAATGTACCGG
374 | CGCACATGATTAACGATCTCGAAAAAGCCACTTTCTCAAATATTACGCAGCAGGCCATTC
375 | AGTTTGTCCGCTACACGATTATGCCGTGGGTAACGAACTGGGAACAGGAACTCAATCGCC
376 | GCCTGTTCACCCGTGCTGAACTGGCCGCCGGATATTACGTCAGGTTTAACCTGACAGGCC
377 | TGCTACGTGGGACCCCGCAGGAACGTGCTCAGTTCTACCACTTTGCGATCACTGATGGCT
378 | GGATGAGCCGCAATGAAGCGCGAGCCTTCGAAGACATGAATCCGGTAGATGGCCTGGATG
379 | AAATGCTGGTGAGCGTTAACGCCGCGAACCCCGCAGACGATTTTAAGGCACCTAAAACCG
380 | ACGAGGAAAAGCCCAATGAATGACCGTGAAACGCGCTGTTACAGCGGGGAGGTCAGAGCC
381 | GAGCAACGCACCGATGAACCTACCCGCATTCTGGGCTATGGCTCGGTGTTCAACAGCCGT
382 | TCTGAACCCCTGTGGGGATTCCGTGAAATCATCAAGCCCGGAGCATTTGACGATGTGCTG
383 | AATGATGATGTCCGCGGGCTGTTTAACCATGACCCCAACTTTATTCTCGGACGGAGCGCT
384 | GCCGGGACGCTATCCCTGTCTGTCGATGAGCGCGGCCTGCGTTACGACATTACAGCGCCG
385 | GATACGCAAACTATCCGTGATCTGGTGCTGGCGCCGATGATGCGCGGTGACATTAACCAG
386 | TCATCTTTTGCCTTCCGGGTATCCCATGACGGTGAAAATTGGTACCAGGACGATGAAGGG
387 | ATCGTTATTCGTGAAATATCGAAGTTTTCCCGGCTGTTTGATGTCAGTCCGGTGACTTAT
388 | CCCGCATATCAGGAGGCCGACTCCGGCGTCCGATCGATGAAAGCCTGGCAGGAGGCGCGC
389 | GACAGCGGTGCGCTAAAGAACGCCATTAATCAACGAATGGCGCGTGAGCGCCTGCTGACC
390 | CTTCTTAACGCGTAAGGAAAAATCATGAAACTGCATGAAATGAAGCAAAAACGTAACATC
391 | ATCGCCAAAGATATGCGTGCCCTGCATGACAAAATTGGCGATACACCCTGGACCGATGAG
392 | CAGCGTACTCAGTGGAACGCTGCAAAATCGGAGCTTGACGCTCTTGATGAGCGTATTGCA
393 | CGCGAAGAGGAACTGCGCCGCCAGGATCAGAACTATATCCACGAAAACGAGCCGGAACAG
394 | CGCCAGCAGCAGAATCGTGATCCAGCAAACCCGGAAGCACAGGCTAACGAACGCCGTGCT
395 | GCGGCGTTTAATGCGTTTTTGCGCCGTGGTCTTGGCGAGATGAGCGCTGAAGAACGCCAG
396 | GCTTTAAAGGAGCTGCGTGCTCAGGGCACGACGCCGGATGAAAAAGGGGGTTACACCGTA
397 | CCAACCCAGTTCCGAAATAAGATCGTCGAAGCACTGAAAGATTACGGTGGAATTGCCAGT
398 | GTGGCGCAAATTCTGAATACCGCCAACGGCCAGGACATTGACTGGGCAACCTCTGACGGT
399 | ACTACTGAAGAAGGTGAACTGCTGGGCGAAAACACTGAAACCAGTGAAGAAGACGTGTCT
400 | TTCGGCGGTGCAACGCTGGGGGCTAAAAAACTGTCCTCTAAAATCATTCGCGTATCCAAT
401 | GAACTGCTCCAGGACAGCGGCGTAGATATCGAGGCGTTCCTGGCCGCGCGTATCGCCACT
402 | CGCATCGGACGTGGTGAAGCGAAGTATCTGGTATTAGGGACCGGCACCGGCACCCCGCTG
403 | CAGCCTAAAGGGTTGGCTGCGTCGGTAACTGGCACCAAAAATACCGCAGCAGCGACCACC
404 | TTTACCTGGAAAGAGCTGAACGCACTGAAGCACTCTGTCGACCCGGCATACCGTAACGGT
405 | CCAAAGGTGCGCTGGGCCTTTAACGATGCAACGTTGCAGCTGGTGGAGGAAATGGAGGAC
406 | GGACAGGGCCGCCCGCTCTGGTTACCGAACATTATCGGTGGCGCACCTGCCACTGTTCTG
407 | CAGGTGCCGTATGTCGTTGACCAGGCTATTCCTGATATCGCGGCTGGTGCCAAATTTGCC
408 | TACTTCGGCGATTTTAACCGCTTTATCGTTCGTCGCGTCACTTACATGACGCTGAAACGA
409 | CTGGTTGAGCGCTACGCAGAGTACGATCAGACTGGCTTCCTGGCCTTCCACCGCTTCGAC
410 | TGCGTACTGGAAGATACTGGCGCGATTAAGGCGCTGGTGGGTAAACCGGCATCTGGCGGC
411 | TAAGGCAACAATCAGCTTCAACCTCCACCGCTCCGGCGGTTTTTTTATGCCCGCAGTTCG
412 | CTGCGGGCCAGGGAAAATACATGAGCACAACGATTGAGAAGTTACGGGCTCAGTGCCGGA
413 | TTGATATCGACGACACCACGGAAGATGAGGTGCTTACGCTCTATTATGGTGCTGCGCGCC
414 | GAAAGGCGGAGAACTTCATCAACCGCCATCTTTATGAAGACGAAGTGCCGGAAACTGATC
415 | CAGACGGGCTGGTGATTGCTGACGACATTCTCCTGGCGTTGATGCTGCTTGTCGGGCACT
416 | GGTATGAAAACAGAGAAGAGTCGTCAGACGCAGCAAAAACCAGCATCCCATTTGGCTTTA
417 | CATCACTGATAGAGCCGTACCGCTATATTCCGCTCTAGGAGGAATTATGCAGGCAGGACG
418 | ATTACGGCATCGCGTCACTATTCAGAACTTCACAATATCAAAAACACCTTCCGGCCAGCC
419 | GGTAGAAAGCTGGACTGATGGAAAAACTATCTGGGCCGAGGTTAAAGGGATCAGCGGTAG
420 | GGAGCTGCTAGCCGATGGCGTTGAGCGTGCTGATGCCACCATTCGCGTCTGGGTGCGTTT
421 | TCGTACAGACATCTCAGCTTCTTCCCGTTTGAAAGTACTGAATGGCCCATACAAAGATGC
422 | GGTCCTGAATGTCACTGGGCCTCCGGTTCCGGATATAAAAGGTACCCGGCTGGAAATTCT
423 | CTGCAAACAGGGGACCGAAAAATGATTGATGTGAATCTGGATTTTTCCGGGTTGCAGGAT
424 | ATTGCCCGCGATCTGCAAACGCTCAGCAAGGCCGAAAATAATAAAGTTCTCCGGGAGTCG
425 | ACCCGTGCTGGTGCCGAATTGCTCCGCGAGGAGGTGATTGATCGCGCTCCTGAGAAATCC
426 | GGAAAACTGAAGAAAAACGTTGTTGTCGTCACCCAGAAAAGTCGCCGTCGCGGTGAAATT
427 | TCATCTGGGGTGCATATTCGTGGCGTTAACCCGCGAACGGGGAACAGCGACAATACAATG
428 | AAGGCAAGCAACAAGCGGAATGCGTTTTACTGGCGCTTCGTGGAGTTGGGAACATCTACC
429 | GCGCCTGCACATCCGTTTGTTCGCCCAGCTTTTGATACCCGCATGGAAGAAGCTACGCAG
430 | GTGGCGATGCAGCGGATGAATCAGGCTATCGATGAGGTGTTATCAAAATGACAGAGGATG
431 | ATCTCTATGACCTGCTGTCGACGCTGGCAGACGGGCGGGTTTATCCGTATGTGGTACCGC
432 | TAGGCAGCGACGGACTTCCTGCAGTTTCCACTCCCTATGTCATTTTCTCGATACCGACTG
433 | ATGTTGCCGGGGATGTTTTCTGCGGCCAGGCAGAGTCGACACTGCGCATTCAGGTTGATG
434 | TATGGGCTGAAACGAATGACGAAGCCAGAGCGTTACGCCTGGACGCCCTGGCTCGCCTGC
435 | AGGTTCTTTCACCTGTCGAGGTGACAAAAATTCCTGGCTACGACACGACAACCCATCTTC
436 | ATCGGGCAACCCTCGAAATAACGGTCATTGCCTGACAAAAACCAATCCAATCCGACCGCC
437 | GCTGGCGGTTTTTTCATTTATGGAGGCTGCGATGTCAGCACTATTTGAACGTGCCCAAAA
438 | AACGGTAGTAATGATTACCTCTGTGCCGGTCACCGCGGCAGAGCTGGATACCGCAACCTG
439 | GTTAAACCTGAGTTGCACTATCAAACAGGCAAGCTTTACCGCTGGTCAGAAAAACGATAT
440 | TGACGTGACAACGCTCTGTTCGGATGAAACGGAAAATATCAACGGCCTTCCTGCTCCGTC
441 | TGAAATGTCACTTTCCGGTAACTTCTACCGCAACCCGGCGCAGGATGCACTTCGTGCAGC
442 | ATATGATAACGACGGGGTTTATGGGTTTAAGGTTATTTTCCCGTCTGGTAATGGATTCCT
443 | GATGCGCGCTGAGGTACGTCAGCACACCTGGGATTCTCAAACCAATGGCGTGGTTGCTGC
444 | AACGTTCTCGCTGCGTCTGAAAGGTAAACCCACCAATATTAACGCCCCAGGAGTTCTATC
445 | GTTTGCCACTGACCTTCCGGCGTCCCAAACGGTCGCGGCAGGAAGCGCCCTGACCATGGG
446 | CGTGGTCGTCCAGGGCGGTACGGCACCTTATACCTACGCCTGGAAAAAGGGCACCTCGAC
447 | GGTCAGCGGCCAGACCAGCGCAACGTTTACGAAAGCCAGCGCTGTATCCGGTGATGCCGG
448 | GGTTTATTCCTGCGTGGTTACTGATGCCGATGGCACTGTGATCACTTCTTCTGATTGCAC
449 | CGTCACAATCAATTAACGGAGCGCCGGGAGACCGGCGATAAAATTAATGTCAAAACCGAG
450 | TCTTAAAGCACTGGCACTGGCACCGATGGCGGGCTTTCGTAAAAAAGAAGTCTCCGTTCC
451 | GGAGTGGGATAACGCCAAAGTCATCATTCGTGAGCCATCAGCAGAAGCCTGGATTCGCTG
452 | GCAGGGCATTGCCAGCCCGGAACCACCCAAACTACCGGAAGGGCAGGAGCCCCAGGAGGC
453 | ACCAGAACTGACCCCTTCAGAACGAGCCTTCCGCACGATGCGGGCCGACGTCACGCTTTT
454 | CATCGATATTTTGCTGGATACCGACCTGCAGCCCGTCTTTACTGTCGATGACACCGAACA
455 | GGTTGAAGCGATCTATGGCCCTGTGCATTCCCGGCTGTTGAAGCAGGCACTTGATCTCAT
456 | TCGTGACGCGGATGATGCTAAAGCAAAGTAAAAATGCCTGGCATGCAGTTCCTGATGGCG
457 | CTGGCGCTCCGGATGGGCCGCACGCTGGGCGAACTGCGACAAACCATGACGGTTGGCGAA
458 | TTCAGGATGTGGGCTGAGTACGACCGTATCAGCCCAATCGGCGATATTCGCGGCGATATC
459 | CTCAATGCTCAGCTGGTATCTGCGGTTTACGGAGCGCAGGGCGTTAAAGTCACCATTGAA
460 | GATGCTCAGCTTCAGTGGAGCACAGAAGAGATTGAGGTAAACGACGGCGGCGATCCCTTT
461 | GCAGGGCTGGAAGCGGCACTGCTGGCTGCGTCAGCATAGCCAGTGATAATTCGTGTGGAT
462 | GCCACTCATAACAGGTGTTATGTTGTTTTTTTTGACACACGGAGTGCTTTAAATGACTAC
463 | TACTGGCTGGATATTATTATTTGTTTTTGCTCGCCTTATTGATCTTGTTATCTGGTATTT
464 | CCTGAACAGAGGAAGCGTAAGAGCTAATGATCAGATCGCTATGCTTAAAGAAATCTCTGA
465 | AAAGCAAAGTGCTCAAATTGATCTTCTGATTGCACTTGCTCATAAAAAAGAGGAACCAGA
466 | AAAAGATTATCTGGAAGAAGCAAGGAAAAAAGCTGGTTTAATTTAATAATATTGAAATCA
467 | TAAAAAAGCCCCACAATGTGGGGCTTTTTGTTTCTGAGGAAATGAAATGGCAACCCTGCG
468 | TGAACTTATCATTAAAGTTTCTGCTAACTCTCAGTCATTCCAGACCGAGATAGCCCGCGC
469 | GTCACGTATGGGGGCTGACTATTATAAGACAATGCAGAATGGCGGCAGGCAGGCTGCAGC
470 | TTCAGTTCGGGAAACTCGCCGTTCTGTTGCAGAGCTAACTGACCAGATGGAGTCAGCAAA
471 | GGCTACCGCACTTGGATTGACCGGGGCATTTGCTGGTGCTTTTGCTACGGGGCATTTGAT
472 | ATCCCTGGCTGATGAATGGAATTCAGTAAACGCCCGCTTAAAACAGGCATCTCAGTCAAC
473 | TGATGATTTTACCAGCTCTCAAAAACAGCTGATGGATATCAGCCAGAAAACGGGCACATC
474 | TTTTTCTGACAACGCTAATTTATTTTCCCGTTCAGCAGCCTCAATGCGGGAATATGGTTA
475 | CAGCTCTAGCCAGGTGCTGGATATTACTGAGGCTATTTCTACTGGTTTAAAACTTTCTGG
476 | CGCGAATGCTCAGGAGTCCAGTTCGGTCATCACTCAGTTTAGCCAGGCTCTGGCGCAGGG
477 | CGTGCTGAGAGGTGAAGAATTCAATGCAGTCAACGAGAGCGGCGACAGGGTTATACGGGC
478 | GCTTGCGGCAGGGATGGGGGTTGCGCGTAAAGACCTTAAATCTATGGCGGATCAGGGGCA
479 | GTTAACCATTGATAAAGTAGTGCCAGCCCTCATCAGCCAGCTTGGTAAGCTACGGAATGA
480 | ATATGGTGAATTGCCGCAGACTGTTTCATCGTCGGCAACAAAAGTTGAAAACGCTTTTAT
481 | GCAATGGGTCGGTGGAGCTAATGAAGCTAGTGGCGCCACAAATACCCTGACCGGATTACT
482 | TGATGGCGTAGCCAACAATATTGATCAGGTCGCCACTGCTGCCGGAGCGCTTGTTGCCGT
483 | TGGCGCAGCCCGATATTTGGGAAATATGGCTCTTGGTGCCAGCTCTGCAACGGCTGGGAT
484 | TATTAACGCCGCAAAAAGTGAAGTAGCTTTAGCTGAAGCCCAGGTCAGAGGGACGCAGGT
485 | TTCGACAGCTCGCGCGCGTGCTGCAGTTTATCGTGCCCAGCAGGCACTGGCAGCGGCGCG
486 | GGGTACAGACGCGCAGGCCGCCGCAGAAAAACGGCTCTCACTGGCGCAGGAGTCACTTAA
487 | CCGTAATATTCAGGCCAGAGTATCCGCTCAGACTGCGCTGAACTCGGTTACTGCTGTAGG
488 | TTCGCGGCTCATGGGGGGAGCATTAAGCCTCGTTGGCGGTATTCCAGGGCTGGTTCTGCT
489 | TGGTGCCGGTGCCTGGTACACGATGTACCAGAATCAGGAACAGGCCAGATTATCCGCTCA
490 | GGAATATGCAAACACCATTGATGCAGTCCGTGAAAAGACAAAATCAATGTCCCTGCCCGA
491 | AGTTTCTGATAATGAGACCAAAACCCGTCAGGCGCTGGAGGAGCAAAACCGTCTTGTTGA
492 | TGCACAGGCATCAAAAGTAAAAAGCCTGAAGGAAGAGATTGCGGGTTATCAGTATGTTCT
493 | GTCCAACCCCGGGCCGACAACCAGTGGCGGTTTCATGATAAACCACCTTACTTCGGTTGA
494 | AACGGTCACCCGTAGTCTGGAAGAAGCGACTTCCGCTCTGGCCGTTGAACAGGAGAGGCT
495 | GACTCAGATGCAGGCTAAGTCTGAGTCGATCCAGTCGGTACTGGAAGGGATAGAGAACAG
496 | GAGAATAGCATTAATCCGGCAGCAGGCCGCAGAACAGAATTCAGCATATCAATCGTTATT
497 | AATGATGAACGGTGAGCATACTGAATTTAACCGTTTGCTGGGTCTCGGAAATAATCTCCT
498 | CATGGCCCGGCAGGGGCTGGTAAACGCACCACTACGCTTACCACAGGTAGACCTGACAAC
499 | CCAGCAAACGGCTGCACTGGAAAAAAGCCGCCGTGACCTGGCGCTTTCAAAACTCAAAGG
500 | AGAGGACAAAGAACGCGCACGACTGGGTTACGCTGCGGATGACCTGGGGTTAACTAACGA
501 | TCCTCAGTTTCAGACCGGACGGCAGGAGTTTATTAATAACGGCCTGAATGAATGGAGAAA
502 | CAACCAGGAAAATAAACCCAAGCCAAAAGGGAGGCATGGGAAAACCGAGGCGGAGAAAAC
503 | CGAAGATACCTATACCCGGTTGATTAAACAGCAACGGGAGCAAATTGCTCTTTCCAGCCA
504 | AAACACTGAACTGGCAAAGATGAAATATCAGGTTACTCAGGGGGAATTATCTTCGCTTGA
505 | AAAATCCAAAAAGGAAACATTGCTACACAATGCTGCGCTTATTGATCAGAAAAATATCGC
506 | TGAACAGTTAAAAACATTCCGCGAAGGTCTGGCCGACAGTAATGCTGCCGCCCGGGAAAG
507 | GGGGAATATCGATTTACTCGGCGCGGGACAAGGGGATAAAGCCCGTGATCGAATGAAGGA
508 | AATGGCGGATATTCGCGCTGATTTTCTGAGGCAGCAGCGTGATTTACAGCGTGATTTCAG
509 | TCGTGGGCAGATTTCCGAAGACCTGTATAAAAAGCAAACGGAAGCGCTTAAAACGGCGCT
510 | TGCCGAACGCCTGGATATTCAGGAGGAGTATTACAAAAAAACCGATGAACAGCAGTCAGA
511 | CTGGCGCGCGGGAATCAGCGATTCCCTGATGAACTATGCCGATCAGGCTTCTGATCTGAG
512 | TTCAATGGCTGCCACTGCAACCAGCGAGATTCTGGATGCCACCACAAACTCTATCTCTAA
513 | CAACCTGACAAACGTCCTGACAGGCGCTGCTTCTTTTAAAGATGGGATGTCTAATATTTT
514 | CTCTTCCCTGGGCGAAACGGTGATTAAGACGCTGATCCAGATGGCAACACAGGCGTTAAT
515 | CACCAAAGCAATTATGGCGTCATTTGGCGGCGGAGCGGGTGGGTTGTTCGGTAGTCTTTT
516 | TGGCGGTGCCAGCGGTGCGGCAAGTAGTGGTACCGCTATTCAAAGCGCGGGAGCTAATTT
517 | TTCATTTAACGCTCTCGGAGGCGTTTACGATTCTCCGTCACTTTCTGCCTACAGCAATGG
518 | TGTTTACAGCACTCCCCAATATTTTGCGTTTGCGAAAGGGGCAGGTGTATTCGGCGAGGC
519 | CGGGCCCGAAGCCATCATGCCGCTTACCCGTGGCGCTGATGGTTCGCTGGGGGTTCGTGC
520 | GGTTGGACGTGAGTCACCGGCAGTCCAGGATGCTGCAAGGCAGATTGAGGCGCAACCACG
521 | AATCGCGGTCAGTGTTGATGCCCGTAGCACGTTTAGCGGGCAACCTGACGACGCAACAAT
522 | GCTGGCAGTAGATCGAAGGAATGCTGCACTGGAGCGACGCATCATCAACACACTCACTGC
523 | TGAAGTAAATAACCCCCAGAAGAAATTCGGACGCGCCATCTACTCCAATCTACAGCCCAA
524 | AAAACCAAGATAGACTGCCCGGAGGGAAAGTTAATGGCGGATATTATCTATCCGGATGAG
525 | TACCTGCCCATGCCTCTTATGGACGGGTACGGTTTTAAGCCCATCTCACCTTTACTGCGA
526 | ACGGAAATGACGTCCGGTCGAGCAAGGCAAAGGCGGCGATACACCTCAACACCCACCCAT
527 | GCCTCGGTTAAATGGATTTTTCAGACTGATGCGCTGGCGCAGGTGTTTGAGGCCTTTTTC
528 | AGGGACGCACTGAAAGACGGACAGTCCTGGTTCTATCTGAGGCTCCAGACTCCGATCGGG
529 | GTAAAGCCCTACAAAGCCAGGTTCATTGATATTTACGAAGGTCCGACACTTGTCGCGCCA
530 | AAATACTGGCAGTACAGCGCAACGCTGGAGTTATGGGAGCGTCCGTTACCGCCTACAGGA
531 | TGGGGGAATTACCCGGAATGGCTGGCTGGTCAGTCGTTACTGGATATTGCGCTAAACAGA
532 | GAGTGGCCTGAGCATGACAATTCTTGAGCAACTTTATGCAAGCAGCGGCTCTGAAGTCAT
533 | TCACGACACGCTGCAGATCACGGCAGGTGATCAGAACTACTGGCTTACCCGCGGGTGGGA
534 | CAATATTACTGCCTCGTTAGAAGACGGGCAGCAGGTAACGTTTGAAGGGTGTGCTATCGA
535 | TATAGCATTGCCTGCCAGGAATGCCGACGGAACGCAAGATCTGAAATTTTCCATCAGTAA
536 | CATCGATGGTGTCGTATCCGATACGATTGACAGAATTCTGGACGAAATGAAATCGGCAAC
537 | ACTGACTTTTCGGCGGTATATCTCCTCTGATTTATCTGCACCTGCGGCATCGCCTTACAC
538 | CCTTGATGTGAAATCCGGATCGTGGACGGCAACTGCGGTGCAGGTAACTGCCGGATATAT
539 | GAACATCCTTAAAACGGCCTGGCCGCGTAATCGTTATAACCTGGCTGAACATCCCGGTCT
540 | TCGTTATATGTCTTCCTGAGGTATTCACATGTTCCATTCTGATAAATACCTTTCGGTCAA
541 | ATGGCTGAAGGGCGGGCGCGTTTATCCTGAGCTCGACTGTTTCGGCATTATCAATGAAAT
542 | CCGCGGCGATCTCCTTCTCCCGTTATGGCCGGATTTTTCCGGCGTGACGAAAGATGAGGG
543 | AGGGCTCGATCGTGAGGCCAGGAAGTTTATGAAATCCCTCACACGCTGTGAGCCTTGTAT
544 | CGGGGCCGGGGTTGCTTGTTATTCAGGATCAACCGTGACGCATGTTGGTATCGTTGTTTT
545 | GCTGGATGGCCAGTTGCAGGTTGCCGAATGTAATCCGGGAACCAATGTCACCTTTCTACC
546 | TCTTCCGCGATTTGTCCGTCGGTTTAACCGTGTGGAGTTCTGGCAATGACGATAAGAATC
547 | TACCCTTCCCGGCTCCCCGGAGAACCGCTTGAAACTCATGAGCACGGCAATATTACGCTG
548 | CATCAATGGATGGTCAGAAATGTTCCTGGGTACAGCCAGGACAGATCGCACCCAGTTGCC
549 | GTTGAATTAAATGGCCGCACACTTCCTCCCGATGAGTGGCCGCTTTGCCAGTTGAGCCCT
550 | GACAGTGATGTCAGAATTTATCCTGTTCCCTACGGAACGGGGCTGGAAATTGCTGTCTGG
551 | GTTTCTGTTGCGATATCAGCTGCCAGCGCAGCCTACTCGTTGTTCTTCGGGCCGAAAGTC
552 | GATCTCGGTGGTTATTCATCGGGGAGTGGTCGCTCACTTGAGCTTAATCCAGCAAAAGCT
553 | AACACGGCAAAACTGGGTGACCCGATACGTGAGGTGTTTGGTCGATGCCGCATCTATCCT
554 | GATTATCTGGTGCAGCCGGTTACCCGTTTTGACCCCGATGATCCAACGCGAATGACGGTC
555 | GAAATGTTTCTTTGTGTCGGGCAGGGGAGATTTTCGTTTACGGGAGGAGATAAACGGATT
556 | GGAGAAACCCCGGCAGCCTCGCTGGGTGATGGTTTCAGCGATAAGGTGTACCAGCCAGGA
557 | GAAGACGTATCTTCTGATCCGCGAAGTGAAAACTGGTTCAACTCGACAGAAGTCGGCGGA
558 | ACATCAAGCGGAACAGGGCTGGATATGGCCCAGACCTCACCTGATTCCGACGATATTATC
559 | GCTGACAGCATGACGGTTTCTGGTGCATCCGTAACCTTTACAGGCCTTGATACGGATGAT
560 | GGTGACGATGACGACGAGGACGATAATTCTCTCCCGGACAGCTGGATAACGGGGGCCATA
561 | GTTGAAATTAAGGCGCCGACAAATTATCTGATCTCCACCTCTTCTGGTTACAGTGTTTTT
562 | GCCAGCTCATTGCTTACCGAACTTGCTCCCGTAGCGGGTATGCCGGTGACGCTGAGTTTC
563 | AACAGTGTTGATTATGACCTCGTCATTGCGTCCTATACCCCGGGTCAGGATGCTGTGCCT
564 | GGCGAGGGTGGCAGTGCAGCAAAAATTCAGGCCAGTGCGGCTCCCGTCACCTACGATTTT
565 | TCGAACAGCTCCAGTACGTTCATGATCACATGGCAGGGCACCACCTATACGGTGTCGCTG
566 | GTAGCGAACTACATCTCGATGTCGGGACTGCTGGCGGCTATCACCGAGGGGCTCACTGGC
567 | TCCGGCCTGGTCGCACGGGACAACGGCGGTACCGTACTGATAACCGAGGCGGCCAGTCCG
568 | TTCGTTGGTGGGGCAATCACATCCTCCTCGCTGCCTGCAGCCGTTTTCGGTGATGCCCCG
569 | GTTTACACCTCCGGCACGGCATCAACCGGCGGCAGCCCGGCGGTAACGGCAAACGTGACG
570 | CTTGCGTATAACAGCACTACGGGAACCGCATTCTCGGGCATGCCTGAAGGTGTGCAACGG
571 | CTTTCACTGGCTCACCGCGGGAATGAGTACCAGATCGTCTCTGCCGACGGCACAACGGCA
572 | ACAGTGGTGCGCCTGGTTAATGGGTCCGTTGATGAGTCGTGGCCGGGATTCACCGCCAGG
573 | ACGATGATCGACTATGAGGCCACTGGTCTTAACGACACGCTGAGCTGGCTGGGGCCGTTC
574 | CTGGTTTGCCCTGAAAATGAGACCGTGGATATGTTCGAGGTGAATTTCTCTTTCCCGAAC
575 | GGTATCTGCGGCTTTGACAGTAAGGGGAAAAAACGCATTCGCCATGTTGAGTGGGAGATT
576 | CAGTATCGCGTCTACGGTTCCGGATCGGGATGGGTGAGTCACCAGGGCGAGTACGCGCTG
577 | AAAAACATCAACGGGTTAGGTTTCACTGAGCGGATCACCCTCAGTTCTCCGGGGCTGGTG
578 | GAAGTTCGCTGTCGTCGACGCAATGAGCAGGGCAGTAATAACTCGCGCGACAATATGTAC
579 | TGGCAGGCTTTGAGAGGGAGGCTTCTGGCAAGACCGGTATCCTACTCAGGTGTAACAACC
580 | TGGGCAATTACCGTTGAAACCGGAGGGAAGCTGGCGGCACAGTCTGACAGGCGCGTCAGC
581 | GTGGTCGCTACCCGTGAATATGAGGGGGGAGGTAACAGAACTATAAGCGGCGCATTCCGT
582 | CATGTGGCAAATAGTCTTGGATTTAATGCTAATCAGCTCGACACCTCTGCAATAAATGCT
583 | CTTGAAACTGCCTGGTGGACGCCGAGGGGAGAATATTTTGACTATGAGGCAAGCAGCGAC
584 | AGTGCTTCAGCGAAAGATATTTTCGACAAAATCACCGAAGCAGGCATGAGCTATTTTTTG
585 | CTATCAGATGGGCTCTTATCTGCCGGGCGCGAAGGTATCAAAACCTGGACCGGGATCATC
586 | ACTCCCCAGGATACGGTAGAGGAAATGCAGACATCATTCAGGGCCCCTTCTGATGATGAT
587 | TATGATGGTGTTGACGTCACATATATTAATCCGGTTACCTGGGCGGAGGAAATCGTTCAG
588 | TGTCGGACAGCTGATAATCCTGTGCCACGCAAAGTGGAGTCGTACTCGCTGGGCATTGTA
589 | ATGACTGCAGATCGTGCTTACCGGATAGGTATGCGCAGGCTCATGAAATATCTGCACCAG
590 | CGCAGGACCTATGAATGCACAACTGAGCTTCTTGGCTGGTGCTATCAGTTTGGCGATCAC
591 | ATCATTCTTTCTGATGATATTCCGACGGGTAAAACAATCAGCTGTCTGATAGAAGGCGTG
592 | ACATTCGATGATGAAGTTATCACGTTAACAGTCACTGAGCTTCTTGACTGGAGCTATGCT
593 | AATCCGCGCTGCTGGATTCAGTTTCAGGGGGGGCGGCCGTCGACTCGTTTGCTAACGCCG
594 | ACACGTGTCGATGACTTCACCCTTACTATACCGTACAACGACGACCTGCACCCGGAAGAC
595 | TGGATTATGGATGATCCGGATGTTGAATTACCTCGCCTGTTGTTTTGTGACAGTGAGAAG
596 | GGGGCGCGGCACGGTATCGTTCAGGAAATTGTCCCGTCTGATGACTGTACTTGCCAGGTC
597 | ACAGCCCCGGAATATAAAGAAATCTTTTACGCATACGACGACGCTACATACCCTGGCGAC
598 | GTAGCTTAGCAATTTCAAAAAAATCAATTCACCCGCTTCGGCGGGTTTTTTCATTTTTGG
599 | AGCACAATGTATGGCCAACATCGAAAAACTTGGCTCGTCATCACCAGAGGTATTGCTTAA
600 | GAATGCAACTAACCTCGATAAGTTAGTCAATGGCCGGGAATCGGAATCATTACCTGATCG
601 | CTTTGGTGTACTGCGCAAAACTTGGCACGGCATGGAGATGATCTTCAACCGCTTTATAGA
602 | CTACATCACTGGTCGCGGCGAGCAGGCAGTTGCAGCTATCGGCTGGCAGGAGCTTGGCAA
603 | CTGGGCTGTTGGTCTGGCTGTAGATAATCGCCAGCAGATCGTCTACTACAATGGCTCCTG
604 | GTACAAATACCTTGGTGAGCTTGAACACGTCATTGCCGGAGATTCTCCTGAGAACGATGG
605 | CGGTGTGTGGTCGGCTGCAAACCCCACAGGGAAATGGTCGAACATCGGTGACGCGGCTCT
606 | TCGCTCAAACCTGGGTTCAGGCGAAGGCTTTGCTTTGGTGGGGCAGGTATCATCATTTAC
607 | TGCTCTGCGCTCTGTTGTTCCTTCATATGAAGGTCAAAGTATTTTATTGCGCGCCCATCC
608 | TGTCGGTTGGGCCGCAATGTCTCACGGTCCCGTTGGTGGCGGAGAATTTATTTCAAGGCG
609 | AGGCTCAGCTGAGGATGATGGTGGTTATATATGCGTGCCGACAGGTCAATCTGAGTACTA
610 | CTGGCAACGCATCCCAAAAAACCCCGGCAAAGTCTGCGCGACAGAGTTCGGTCTTTATGA
611 | TGGTGCTGCTCTTGACGACATCTGGACCAGTGCAATTAACTATTGCATTAAGAACTCCAT
612 | TGGTTACTTCTCAACCCCGTCACTGGGGCCTGCCGGGTATACGTTAGTTGGTGGGCTGGA
613 | GTTCATTAACTCAACAAACGGGCTTATCATTGAAGGCCCTGGCATGGGAACGAAGGGCAA
614 | CATTCCAGTAATTACCCATACCGGCGCAAACGTCGCCCTGACGTTTAAGCGTACTACGCA
615 | GGCTCAGAGCCTTTTTAATGCGGTCATTCTTAAGAATTTCACAGTAGTTGGGAATGCTCT
616 | CGCTACTGCTCTTGTCAGGTTTTCTGATTTTTATGGCGGTTCAGTATTTGACTCCGTAAT
617 | TCGAGATTACACTACTGGAACAGCAATAGATGTATACAACGACAAAGGATGGACTGAAGT
618 | TATCAGGGTAGATAATGTCGTCGTCAGGACGTCACAGCGTGGGATCTGGTTCCACTCAAA
619 | CCCTGCATCTACTGACGATAAAACGCTTTCGTTTTATGGCGCCAGCATTTCCAACTTTGG
620 | CTTCCAGCACGGAATCACTGCGGCGTCATATGGCATCTATGTCGGTGACGGCTCACGCGC
621 | AGATAATTTGTATAACTGCGATATCGACATGATGGGATGGTGGGAGGTCGGCGGTAACAG
622 | CACGGCACTTTATGCAGCTGATAAAGCGCGCGTTGATGGTTTTGCCAACTTCCGATACGA
623 | CGGTTTTGCCGCAAGCCCAATTACATCAAGCTCTCAACCATGCCGTCTTGTGAGGAAGGC
624 | TGGTCTGACAGGCTACATTAAGCTGAATTGTAAAAATTACAAGCACCAGGCTGGTCTTGG
625 | ATTAACTTCTGGAGTTACGCAATTAACGATCCGTCCATGGTTAGCCATTGCGGAGGCAGT
626 | TGCCGGTGTGGCAACCCCACACCCGACTCTTCCGGCTGAAAGCATAATCAGCGTGCCTGG
627 | AATGAAGTGCAAACTAACGGGGACTTTATTTAAAGGGCAGAACTCAGTTATCTCTGTCGT
628 | TGGAATGCCGCCATGGCACAGATATAAAGTTACTACCCGGTGTGATTTATCAAGCACCTC
629 | TCAGCAGCAATATATCGTAAATATTCCAAACGGAGCGAACGCTGGCATTACTACACGCAC
630 | CGACTCAGTTCCAGCTGTAACGACAACAACCACTATAAGCGGAGGGTTAGCAACCAGTAC
631 | CTCAACTGCTAAAAATAAAAACTTCGAGCCAGTATTTATTACTAATGCTGGAAATCTTCC
632 | TGATAATACATTTAGCGAAACAAATAAGCAGGGATTCGATATTCATCTGGATGGCACACA
633 | ACCAAACGTTATTAATGACGAGTATCCGGTATCCATTGAGATTGAAGCTATAGATTAAT
634 | >PHAGE2_TRUNCATED
635 | ATCAACGCCTGGCTCCTGTATCGTCCAGAGAAGGTTGCACTGGGGTAGTGGCTTATGACT
636 | GTTTAAGCCAAGGGGCATTGTACGCGGAATGCGCGTTTTTGTCGGTTTATTGTCTGTGCG
637 | ATTGACTGGGGCGCCCCTTCCTGTCCTGGCTAATCCACACGTGGATAATTTCGGCAAATT
638 | ATGATGGCGAGAAGAATATGCCATGGCTGGCGTTTAGGTAATGGTGGGAAATAGCGGTCA
639 | ACGTTTATATGGTGTCCCCTGCAGGAATCGAACCTGCAACTAGCCCTTAGGAGGGGCTCG
640 | TTATATCCATTTAACTAAGGGGACATAGAACTTACTGATTTTTAAGCGTTCAGCTTGTGT
641 | TGCATGTTATCCTATCATTCCCCGTACCATCAAGCATTTCATTCCTTTTATTTCCTTTCC
642 | GTTCGCATCGATTCGCTTAGAAAATCACTTCGTTCACTTGCCATTGCGTACACATTGAGT
643 | ACAGAATGCAGAATTTCAGTGTGTACAGGATACAGAGCCGTGGCCCTTAGTGATACCAAA
644 | CTCCGTAGCATCAATGCTAAGCCATACAGCGGCGCAGCTGAGGTCACAGATGGTGACGGG
645 | CTGAGTGTACGCATAACTCCCACAGGCACGATCACATTCCAGTTTCGTTATCGCTGGAAC
646 | GGTAAGCCCGTTCGCCTCTCCATTGGCCGCTATCCCGCTATGTCTCTCAAGGAGGCGCGC
647 | GTAGTCGTCGGTGAGATGCGCGAATTGTACCTCAAGGGACTAAACCCGAAAAAATATTTT
648 | GCCAAAGAAGATGGCGAGCTGACTCTAAAAGAGTGCCTGGATCAGTGGTGGGGCAAGTAT
649 | GTTGAAACGCTGAAGCCGAACACTCAGACGCTGTACAAGTCAGTTGTGTACAACACGATG
650 | TACACAGAATTCCCGGACGCTCCGGTAGTAAACATTCCTGCTTCTGCATGGGTGCGTTTC
651 | TTTGATAAGCAGGAAAAGAAGAACAGCAAAAAGGCCAGGGTGCTTCTTCTACAACTACGT
652 | TCTGTAATGAACTGGTGTATCAGCCGCCAGTTGATCCCATCGTGCGAGGTCCTGAAGCTT
653 | AGCGTTAAGACCATCGGAAAAAAACCTGATGTGGGTAGCCGTGTTCTCACGTATACCGAG
654 | TTGGCTAAAATCTGGCTGGCGCTGGAGAACAACAAGATCGTTACCTCCAACAAGGTGCTT
655 | CATCAATTGCTTTTGCTTTGGGGAGCCAGGCTATCAGAGCTGCGTCTAGCTACCGCCAGC
656 | GAATTCAATATGGATGATCTTATCTGGACGACGCCAGGAGAGCATTCCAAGATGGGTAAC
657 | GTTATCCGTCGACCGGTGTTCGATCAGGTGAAACCTTTTGTTGAAAGACTCCTCAATGCA
658 | GGAAACGATGTTTTGTTTCCTGGCCAGGAACTGGACAAGCCTATAGATCGCTCGTCAGCA
659 | AATCTCTATATGAAAAAATTAAGGGATAAAATTGATATACAAGAGTGGCGAACGCATGAC
660 | TTCAGGCGCTCACTAGTGACGAATTTATCAGGGGAAGGGGTTATGCCTCACGTCACCGAA
661 | AAGATGCTGGGCCATGAGTTGGGGGGAGTGATGGCGGTGTATAACAAACACGATTGGCTG
662 | GTGGAGCAGAAAGAAGCTTATGAACTTTATGCAGATAAAATTTTTTGGTATGCTAAAAAT
663 | ATAGTTAAATATTAGCTTGCAAAGTTACAAAGAAGGTTTGATTTTTCTGACAAGAAAACC
664 | AATGATACAGGGGTAATAATACCCCTTTTTTGAATCTTAGTTTTTTAATTTTCGTCAAAG
665 | TCATGTATGTCATATTTATATCCGGAATTGCTTACGGAAACATTGAAAATTGGAATGTCT
666 | TTTCTAATGCTATCTCTTATTATCTTCACAAGATTCAGGCTTGAGTGGCTGACTCGTCCA
667 | CCGACATAAATAGCTTGAATTGCCATCGGGGAATATTTTAATAATCTGTTTTTTCCAGAT
668 | TGTATATTTACCAGCCTGTATTCTTCCTCATACCACCAAGCATTATGTTTTTTAAACATA
669 | ATGTCTTCATCTTTTGCTTCGTTGAAGGTGTTAATTGTATGTGCGAGTGATGTATAAATA
670 | ACATCTTTATGAATGATTCGAGATTCTTTATTCTCATTAAGAGACTGTAAAAGTTTACCT
671 | TGGTCAAACATAATACATATTCCTGAGAATTGCGCAGCATAATGAGACCACATCAAGGTG
672 | CTTTCTTTAAAATTGATCTGATCTATTGGGTTGATTTTTGAGAAGCAACATACTCCAGAA
673 | TCATTAACTAGAGCAGAAATAGTGCCATTGACTTTGTTCACATTATTTGTCTCACCAAAT
674 | ATCATTTGAGCAGTTAATTCATATGTGCCTTCAAATGGGTCATTAAGATTATCAAAACCA
675 | GAAAACCATAAGCAACAATTACTCAATGAACTTAGAGAGTCCAAACTAATGCGTTGGTAT
676 | TTATATAGATTCATCGCACTTTCCAGTTATCTTTAATAATAATGTGGTTAATTATACATC
677 | AAAATGTTAAATAATCAAAAATGATTCTCATGAGATCATAAAGGAAAGTGCTTCTTCATT
678 | GTGTTTATTTTACATTAAAGGATTAATGATAATTAATACTGTCTGTTTATCCCTCCAGAT
679 | TCTATCCACTGTAATACTGCTTTACGGCTGTATCGGGTTGGATATGTAAGTACAGGTTTA
680 | GGGAAACCATGATCCTTTCGTAATCTCCAAACAGCAGTTTTCTTCTTACGCAGCAGGTCA
681 | AATACTTCTTTCTCTTCCATAAAGTCTGTAGAAGTCATAAGCACCTCATTCAAAATTACC
682 | GTTAAAAATACAGGTTCCACACCCACCGCGAGCCCCTTCAGTACAAACATCACAGCGGTC
683 | TACTTTTTTACGAGGTCGTTCTTTGATGTGCAGCCTTGGTTCCCCGTCTTTTGGCTCCGG
684 | CCATGAGCGCTGCTTGTTTACCGCCAGCTTTTCGATCATCGCTTGGGTAATCTGCTCATC
685 | AGTGATGCCTGCACGACGCTGCGCATCCCACAGCAGGAACTGCATATCAGCCCATTCCGA
686 | CAGGTCATCCGGTTCCGCTGCGGCCTCCATAGCCTCTTTTGCTAGGTGCTTCAGTGGGCC
687 | AATAGGGCCGACATTACCGAATGTTGCCTGTGACCACTCTGCGTGCTCGCGGCGTACCTG
688 | CTCTCGGTCCTTTGCTGGGTACGCTGGCGGATAATTTGCCAGCATCCAACTAATGACGTA
689 | GTCGGCCTTGAACCGCTCAACCGGAAATCCTTCATTCCAGTCACGGAAGTGATAGATAAC
690 | TTTTGCCAGCTCAGGTTGAAGCGCCACCGGATCGCTTTCCATTTCGGCCAGCGCGATGCT
691 | GGCCAACTCCTCGGCCTCTTCAGCTAGCAGCATTACGTTGCTTCCAGCTCCGTAGGTTTC
692 | ACGCCATAGTTTAATTTTTTCCAGTCGTTCTCTGGTTAACTGGTTATTGGTCATTTATTC
693 | AGCCCTACGCTGTTAGCATGGCAATAATGCTCACCATTCGGGCGGGTAGACGTTACACCA
694 | CAGCGCGGACATGGTTCAGGAAACGTTAGAACCAACTGTTCAACAACTGTGGAAACTTCC
695 | TGCTGTTCAGCGTCGGTAAGCTGGCGCTTAAGTTCAAACTCCAACGAATCGACGATTAAG
696 | AAGGCGAAGCCCTCAGTAGAACTGCGCATAGCATCAATGATTTTTTCTTTCGTCAGAGCA
697 | TCAGTCATGGCTGGCCTCCTCGAATAACACTTCACCCTCAATACCGCCGACCTGATAAAC
698 | GATCGATCCATCGTCCCGATATTCCACTGGTGCAGCGCTCCAGCCTTCGCCATTGGGATC
699 | GTCATCGTCGCCGACTTGGAAAAACCCGCCAGCCACTACACGGGCCGGATACATTTCACC
700 | ATCCGTCCAGTATCCTTCGGTATCTTTTAAGCATTTGACATTCATGCTGCCACCCATTGA
701 | TTGACCAGCCATATTCCCAGCGCAGAAAGCGATGCCGCTCCAACCAGAACTATTGCATCC
702 | AGGATTAATGTCTTTCGCAGAATGGGATCACGGTAAATTTTGAAAATCATTTACTAACAC
703 | CTCCCATTGTTGAATCAGATTCAATCGCGCGGCACCAATGCTATCGGCGTGGTAGGTCAC
704 | CTTTACAGAATGATGTTTGTGCGGACATTGCAAAGTCCCATAACGCATATAGGGGCTATT
705 | ACCATGCCAGGAGAATTGAGCTAAAGCCCCACAGGTAGGGCATTTGAGGATAGTTTCTGT
706 | CATGCGGCATCACTTTCTTCAGATGATGAGGTCTGTTCATTGTTTTGCTCGCCTTCCCCA
707 | GGTGCCGATGTTTCGTAGCGGAATTCCTGAAGAATCGACAGCACCTCGGCCTGCATTGCT
708 | GGTGGTACTTCAATGATCAGTCCGCCGCTGGTGGTCTCTTTGCATGATGAGATGATCTCC
709 | AGAAACTTCCGCGCCTTTCCTGCATTGAATTGTGGCTTGGCGATGCTCTTTGTGACTTTC
710 | GTTTTCCCGGCTGCTTCTGCTTTTTTCATCAGCCTGGCGGCTTCCCGGTCTGCATAAACG
711 | CCATGTTCACGAGAAATGCCAATCGCAATGGCATAGTTCATAGAGCCATCGCGAACCAGC
712 | TTTTTGATATACGGGGTGCATTCATGAAGCTGGAGATGTTGAAGGATATCGGACTCTGAA
713 | CGCTTAACTTTTGCGGCAATCTCTGCCGGGCTCCATCCCTGATTCTGAAGGCGGTGATAC
714 | GCCGCACCACGTTCAAGGGGAGTAAGTGCCAGCCCTTGCGAGCTAGTCACCATAAACGCG
715 | ATCTTATCGGCTTCAGTACCAACAAAATCTTTGCACTCAAGGCGCACGATATCGTGTCCC
716 | ATAGCAATAGCAGCGAGCGCACCGTGATAGCGGTGGTGGCCGTCGATCACCTTTACACCA
717 | CGCTCAGTAACTTCGACGGCCAGCGGAGGAATATATTCACCGGCAATAAACGCATCGCGG
718 | AATTCATCGACATGCGCCTGATTCAGTTCGCGAACGTTGTAGCCTTCTTCCGCATAAATT
719 | GAGGCGATCGGGACGTTATAAGTTTTACGGGTAGTTAACCCGGATTCTTTATCGTTATAG
720 | AGCTGGCCTAAGCTTGGCATATGGTCACCTTTTTGAATTAGGGAGTGCTTCGCTATGCGC
721 | CCCACCTGGAGGCGCATAAAACAACACACGGGATGGATGGGTTAGATGGAGCCTTCGTAG
722 | ATAGGCAGTTCATCGCCGAGCTGGTTTTCCATATCGGCTACGATCTCCTGGAAGGCATGC
723 | TCAATGATTTTTTTCGGCTCGATCAGCTCATACCAGAGGACCAGCTGACCGTCGCGCAGG
724 | CGGTAGCGAATTCGCGCATCAATCTGGTACGGTGCGCCATTATGGAAAGGCGCGATTGCC
725 | AGGCTGATTTTTTCTGGGATTTTGGTATTGCCTGAGCCGGATTTATCATCGCTGTACTGG
726 | AACTGACAGGTTCCGTCCTGTAGGCGCTTAACCGACTTGAACTCAACTTTCCTTGTCTCT
727 | TGGAAGGCGAGTACCATTTCCAGGAGATCGGTACCGGACGGGCCTTTATAGTTATCGCTA
728 | ATCGGCGCGATGTTCTGGATGTTGTTTTCCAGAAACTCAGCGAAGTCGATCTGATTCATC
729 | TTGTTACCATCAGACCCCACCCATGCTTTCCAGTCATCAGAGAAAGGACAGTCATAAACT
730 | GCTTTGTGCATTCCCCAGTGTGGGTTATCGGCGTCCTGGTGGAAGTCCAGCACCGCGACG
731 | ATCCGGGTTTTTGTCTTGTCGGCGAAAACAACAGAACGCGTATCACGGAATCGCTGGATA
732 | TATGCGATTAGCGAACCGGGGGAAATCAGGTTTGTATTCTGGCGAATACGAGACGGGGCA
733 | CTCTGGAGGCTTTCTAACGATTTGATATCGAAGCCATCCGGGACGACGACGGAAGGGATG
734 | TCGGTATTAGTTTTCAGCGTTGCAGCAACCAGATCGCGGATGTCGTGCACGGCAGAGCCT
735 | TCAATTTGAGACATTGAATAATTCCTTTAGATAGGTGAGTTGAAAAAGAGGGTGGGATTA
736 | CTGAGCCAGCTTAATAGGCGCAGCTTGCGGTGCTTGTTCGATAACTTTCAAATCCATCTG
737 | AACTTGCGCCGGGTCATCACGCAGCAGATCGCCATCAGCGGTAGAGAACATGATGGTATC
738 | GGCGCGGTCCAGTTCCGGGATCGTGCGGGTTACTTTTGGCGTGACCTTCATTGTGTTTTC
739 | GTCACGGGTATTCAGCATTGAACAGTTAAGGGTAAGGGTCACAGCTCCCTTTTTACCCGT
740 | TTCACGTACAGCCTTGATGACTTCGGCCAGCGCTTCGGTCAGCTCGGCATCGAGAGTGCC
741 | TTTGTTGATGTACGCCAACTGCTGGCTAAACGGCGTGGTATTTTTGGTTTCGGACATAAT
742 | TATCTCCAGTTATCAGCAAGGATCGCCTTTCTGGGTAAGAAGCCTGTACAGCCAGCTCTG
743 | CCGCCAGAAGCGAACGAATGATTTAGGGTTGCGAACAGCCTGCACACCACGAGGGACGCG
744 | CATCAGATCGCCGTAAGGTAAATTAACGTTACGGAAGGTCATATAAATCACCGATTAATT
745 | AGGTATCCGGCAGGAGTTGAACCCGCGCTGGGTTGGGCAGCCCAGCCAACACCGGGAGCG
746 | GACACATAGAAGAAAAAGGGCGGTTACCCATCAGAACATTATCCTCTTCCTCCTGTTTGA
747 | TTGGTGGAAGACCAGGTAACCGCCAAGATGTTAAGGACACTTCATATTGGAGGAATTACT
748 | CACATCGTAAGAATGTTTTTAGAGGTACTATTCAAAAATGTGTATAGTATATTAGTTGCA
749 | AACTAAAAAATAACGAGGTCGTTTTATGGTAAGCAAAGATAGTTTTCTTAAGAAATTAAA
750 | CCGGAATTCCGAAGCGCGGCTTTCAGATGAACAAAAAATAGAAGCCGCAAAGAGAGAAAT
751 | GATAGATAAAGCCACGCATGACTCAAGTTTTTACGCATCGGAAGTAAAGAGTCTAGTTTC
752 | TAATGTGGAAAAATGGATCTCTGAGTCATCAATACAAATAGTAAAACAAGAGATAGTTGT
753 | AAGTGAGTTTTTGAAGGATAACATCACGCCGGTAAGTTATAGAGTTTTACAATTTGCTCT
754 | TGTTTATGAAGGTTTAAATTTAATCTTTACTCCCCAAGGTTGTCTCAGATTTCAAAATAG
755 | TGGATTAATTGATATTTGTGTTGACTCCCCGAACCTAACCAAAGTATATGAAAATATAGC
756 | TTTGTTAGTTGATTCGGAATTTAAGTATCAGTGGTTCTTTTCTGATGATAAAAATACTTT
757 | TATTGTGAATGGAGATACATTTAGGGATTTTGTGCTCAAAACAATTGGGATTGAGTGATT
758 | CTATTAATTTTGAGCGAATATTATATTTCCACACGCCTTAGTAGTAAGGCGTGTCATCCG
759 | ACTTGCTTTACTCATTACTTGTTATGGATAAGTGGAGAATGATAGCATTCTGCCCGCCCT
760 | GTAAGTCGGTTTTTATTGCGAGAACATTTTGTACTTATCCAAACAAGCATCCTTCTGATG
761 | CATCAGACTTCGAAACATTCTAAGTAGAGAAAATGGCGCTAATAGCTCTTCACTCTGTGA
762 | AGAGAAAAAATACTAACAAGAAGAGAAAGGAGCTAAAAAGCGCCACAGACTTAATAACCT
763 | CTTACCGAAACGGTTACACAGGACGGTAATTTGGTGGTGCTTTCGGGCAAGGAAAAAAAT
764 | AACTTCCCGAAAGCTATCAACTCGCGCTTGCACATATCCTCCTGCCAGTGTTGCCCGTTC
765 | ACGTCTGTTGTCACAGAGCTAGTCTTCTCACCGACCGGATCGCACCCGGTGATACTCCGC
766 | ATTTGTGCGTAGGGGTCTAAACAGGTTTCATGTGCTGTTCCGACTTTGCTGATTGTAAAA
767 | GAGCGGTATTGCTTCAGCGGACCCCGTTCGTAACGTTTACGGTTGGGTATCTGTCCGCCG
768 | TTGATGGATAGATATTCCTACTTAAAGTAGAAAATATCAACAACCAAAAGTAGAAAATGT
769 | AGTGGTGTTTTATCATGTCATTGATTTAAAGTTGAATTTATTTTTACGCTTACAGTGTGT
770 | TATGGTTAAAAAAACACCAGTGAGGGTTGCAAAATGGAAAGCGATTGGGAAATTGAGCGG
771 | CCCGCATTCATAGCTGGCGAGGTCGGTGAAGCCGCAATCTGGCTAATACTGAAAGGCGAA
772 | GAGGTCAACCGAGCCGCGATGGCAGAATATTTGGAGAAGAAGCGCCGGGAGGTGGGCAAC
773 | ACGATCCACAAAGGTGTCTTGCGAGATGCAGCCAGGCTTGTGAGGGATGGAAAATTTTAG
774 | GGCTTACCCGGCGTAACATGCTCGCCGGGAAGTCAGCCTACGGATGAGATATTGTTGAGG
775 | CATTTCCCTGCTCAATAACACCAGACCTTACCTTGTAACTTCCTAATATTTTTATTGTTT
776 | TATTTCCCTTCATAGCATCAAAGATTATACCTATTTCATCTGGATCATTGATGAAACTAG
777 | TATCAACATAAATAGGAAAGGACGTTTCTCCAGTCGGTTCGTGGCAACTAAGAGTCAGCT
778 | TGTCCGCAGAGCGTTTGATGCTATCTATTAAAACCTCAAGAGTAAGCTGTTCAGATTTAG
779 | CTTTTTCAATAGGGTTTTTGATGATTTCTTGGATATCGTTGTGAGTCAGCTCAACTTTAT
780 | CCACACCCGTTATCGTTATGTGGGTGGCATCGGAAGCGCCCTTCAGGACGCCCGTAAATG
781 | CTTTTGCTGTATGTTCCTGAATGCCTTCTGCTCTTTCAATGGTATCTACTCCGGCATGCT
782 | CTTTAATTGCAGAAAGCATTCCATCTTTAAGAAGAGTCATCCTTTCGATTTCAGCTTTTT
783 | GCTTTACATCTTCTAATTGAATTTCTTGCTGCTTTTCTTCAATGCTAGCCTTGCTTTCAA
784 | GGTATGACGTGCCGATCCAGCCCCCGCCTAATACGGCCAGAGCAAACAGGAAACACATTG
785 | TCTTCTGTGTTGGGCTCATACCTTGGGTTACCTTTGCAAATGCTTCACCAAAAGCCTCAA
786 | ACAACTCTTTTAAAGAGGCGATTATCTCGGTGCAACCTGGATTAATGGTGAAAACCAACT
787 | CAGCAGCTTCACGATCTTCTATGGTTAATTTCTGTAAGTTTGAAGTTTTATACTTTATCA
788 | GAGTATAAACTTTATACATTTCTGTCTGAAACTCACAGATACCCTGAGCAAGACTTGATG
789 | GCAATGTACCATTGTATTTGTTTTCATCGCCATAGATTTTGAAATCAATGCTATTGAAAA
790 | AATTTAGCTTAATATTTTCAATGCGTATGTCTTCGCCTGCCATTAAGCTGTTAAAAACAG
791 | TTTCAAGGTCGCCAAGTGAGTTTATGTTGATTGGTAAGGCTGAAGCTTCCATGTTAAATC
792 | CTTGATTATAAACATATCTGCAAAGCTACAGACCCCGGTCGAAAGAGCCTCATAGCCGGT
793 | TATATTTTATTGATTCATGAATCAATGCCTTACCCATCACATAAAGGTTATCTTGGGTAT
794 | CTTCGTTGATGTACCACTTCTCATAAGCTGGGTTATCTGACAGAACCGCTAGCTTATCAC
795 | CCTGCATTTGCAGGCGCTTAACGTGGAAAGTCTTCCCATAAACAAATGAGTAGACGCCAT
796 | CAGTCTGAAAATTCCGAACCGATACATCAACGAACAGCCGATCGCCCGACACAAGGGTAG
797 | GGGCCATGCTATCGCCGTTAACGGTCATAACCTTAACATCTGCAGGATCACGATTTCCAA
798 | AGAGCACCCGCGCATGCTCTGTAGTGAACTCAATAGCGTAAAGCACTTCAACATAGTCGG
799 | AAATCATGTAGTTTCCGGGGCCTGCACTGACTGTTAAATCAAGTACTTCAACGCGATAAA
800 | CATCCTGCAATGGATTGCTGGCAATAGACTTCACCGCTTCACCCTCCGAGGGCTTACCAA
801 | CGCCATACTCAAGATATGCCGCGTCCACCCCCAAAACCGCAGAAAGCTTCCGCATTGTTG
802 | GAGTTCTTGGTTTTGCCGTCCCAAGCGTATAGCGCCTAACCATCTCATAGGTTACACCAG
803 | TCTTTTCTGATAACTGAGTGATTGAGATATCGCTCATAGAAATCAGCCCATTCAGGCGGG
804 | CGGCAAAATCTGGATACTTTTGTTCTTCTACCATAGGTAGAAGATTACGTAATTGGCAGT
805 | AATTCGTCATTTCTATTTTCAGTAGTTGATCTTTCTACTTTAAGTAGTATTATTCCTCTA
806 | TCAACTAGAGGAGTCTCTCATGTCCAATTCACCCACCGAAGCCGCGATTAAGGCAGCAGG
807 | GCGGTCTTTATCAGAGGTCGCGCGTAGCTTTGGCTTCAAGTCTACCCAGTCAGTAGCCAA
808 | CTGGGTCATTAACGATCAGGTCCCATCTGAGCGCGTTTTGCAGCTATGCGAACTTGGCGG
809 | GTGGATTGTTACTCCCCATCAGCTACGGCCTGATATCTATCCGAACCCCAATGATGGGCT
810 | CCCTGACAACAAGACTCAGGTTACTACTTCTGCTGCTTAATCATAACCACAACTCAAGAG
811 | GTGAAATCGTGGGTAATGAACCTGAATGGAAAGTAGAACGCCAGCCTGCATGGCTGGTGG
812 | CCGCGATAAGAAGAACTATTGCTGATTTATCGGGTGGATATGACGAAGCTGCTGAAATTC
813 | TCGGGGTTTATAAGTCCGATGACGTAACTCCGGCTACCGACCCATTGCATAACCGGCTTC
814 | GGACCAACGGCGATCAAATCTTCCCTCTGGGTTGGGCGATGGTTTTACAAGCTGCTGGTG
815 | GCTCAAATCACATCGCAAATGCCGTTGCTCGTCACTCCAATGGGCTCTTTGTACCGCTAG
816 | CTGATGTGGAAGATATCGATAACGCCGATATCAACCAGCGTCTGATGGAATCTATTGAAT
817 | GGATTGGCAGGCACTCGCAGTACATCCGAAAAGCTACTGCTGATGGTGTTATTGATGCTG
818 | CCGAACGCGCTCAAATTGAAGAGAACAGCTACCAGGTTATGACCAAATGGCAGGAGCACC
819 | TGACTCTGTTATTCCGGGTTTTCTGCGCCCCTGATGAGGTTTCCCGACCGCCAGACTAAT
820 | CAGTCTATTGCCCGGCTCACAGAAGTGAAGCAGGAGGGCTTATGTATCAGGACGAATATT
821 | TTCACGTGACTATGCCCACGGTTTTTGCTCGTGAGGACGCCCCGTGGATTAAAGAGCAAT
822 | TAGCAGCACTCCCGGCAGGTATGCGGGAAAAAATCGCGATGGCGTATGCGCAGGCGTACC
823 | AGGAGGCGTTCGACGCAGAACCGGTGTCATTCCGGCAGCAGAACGCAGCACGACGAACGG
824 | CAAACCGCCGATTGCGAGAGTTTTGCACGAGGTATACCCCAGCGGTTAGGGGATATACGT
825 | CGCTCCCACCCAGGGTTTGATTTTCTGAATCTGGGTTGGGGGAAAGGGGGCGGTGTTGGG
826 | TTTTAGCCCGAAGGGCTGGAACAGCTTTACCAGAAGAGAACGATCTAACAGAGAGATCAC
827 | TGTATGGGGTTGAAAACGTCGCTTGGAAGTTTAGACGTTTAGACATCCAAAAGGAGCCAA
828 | AATGATTTATTCAGACGCTAACGAAAAATGGGCCCCGGTTCCGGTTGAGCCGTATTCCAA
829 | AGCCTACGAAGTCAGCAACCTCGGACGGGTACGCAGTGTTCCGCGCCTGGCTAACTCTGA
830 | ATATTTTATTCGACACATTCACGGAGGTTTTCTTAAAGGCCGCCAGCGCAAAGACGGGAC
831 | CAAAACCGTTACGTTGTCGGTTCAGCGTCAGCGCACTAAGTTTGTCATCGCCGAGCTGGT
832 | GGCTATGGCCTTCGGGGAGGTTACTGCTAATGCTTAACATCCAGCCCCGCGAAAAACAGG
833 | TAGTCGCGTTAAACATGCTGCGCAGCGCCTGGAAACAGAATAACTCCTTCATGCTCTACG
834 | CCCCTGTAGGGTTCGGCAAAACAGCAATAGCCGCGCTGATCACTGATGGCTTTGTAAGCC
835 | GTGAAATGCGCGTAATGTTTGTGGCTCCGTATACGGTTCTGCTGGACCAGACCGCAGCCC
836 | GATTCATGGAATACGGCCTTCCTGGCGAAGAAATCAGTTATGTCTGGCGTGATCACCCGT
837 | CATACAACCCCACAGCTCTAATCCAGATTGCCAGTGCGGATACGCTGATTCGCCGTGAGT
838 | TCCCGGACAATATCGACCTGTTGATCGTTGATGAAGCCCACCTGAAGCGCAAAAAACTGC
839 | TGGAGGTTATCGACAATCTCACTCGCAACACAGCAACGAAGGTAATCGGCCTTTCCGGTA
840 | CGCCTTTCGCTAAGTTCATGGGCAATTACTACCAGCGCCTGATTAAGCCAACGACCATGA
841 | AGGAACTGATCGCCATTGGTGCATTGAGCAAATATGAGTTCTATGCACCGTCGCATCCTG
842 | ATCTGTCCAAAGTGGAAACGTCATACGTAGCAGGCTATGGCAGCGACTACAAAGAAAACC
843 | AGCTCAGCCAGGTAATGAGCGAAGCCAAGCTGGTAGGCGACATCGTGAAAAACTGGCTGG
844 | AGAACGGCGAAGACCGCCCGACGATTTGTTTTTGCGTAGATGTCGCTCACGCCAATTTTG
845 | TCACGGTTGAATTTGCCAGCGCTGGCGTGACGGTTGAAGTTATGACAGCCAGCACACCGC
846 | ACGACGAACGGCAGCTCACGATCCGCCGCTTCGAACAGGGCATAACCAAAATCATCATTA
847 | ACGTTGGTGTTCTGGTAGCCGGTTTTGATAGTGATGTTCGCTGCATTATCTTCGCCCGGC
848 | CAACAAAAAGCGAAATGCGCTGGATTCAGATTCTTGGGCGTGGCCTGCGTGCCGCTCCTG
849 | GTAAAGATCACTGCCTAATCTTCGACCACACAGGCACGGTTAATAAGCTGGGCTATCCCG
850 | ACGATATTGAATACGACTACCTCCCTTCATCGTCTGATGGCATGGAAGACGCGCCGCAGA
851 | GAGCCGTAAAGACCGATGAAGCGGAAAAACTGCCGAAAGAATGCAGCCAGTGCCACTACG
852 | TCAAACCAGCTGGGATTTACATCTGCCCGAAATGTGGTTTTAAACCGCTCGCCGGTGAAG
853 | ACGTGGAAACAGATAAATCCCGTGGACTGAAAAAGGTAAGCAAAGCGGAAGTCAAATATA
854 | CCGCTGAACAGAAGCAATCCTGGTGGTCTCAGATTCTGTTTTATCAGCGAACCCGTGCAG
855 | CGCAGGGACGCCCGGTCAGTGATGGCTGGTGTGCGCATACCTACAAACAAAAGTTTTCAG
856 | TATGGCCTCGGGGGTTACATCACACTCCGCAACAGATCACGCCTGAAGTAACGAATTTCA
857 | TCAAATCAAAACAGATCGCCTTTGCGAAGAGAAAAGAGAAAGAAGGAGATGCCGCATGAA
858 | TACCAAACAAGCTGCTATTGGTCGCTGGGCGGAAATTTACAAATACTATGGCCTCCCAGG
859 | TATTACCGGGAAAAGCCATCTCAAAGGAGAGTGCCCTCTTTGTGGCCGTAAAGGTAAATT
860 | TCGCTGTGATGATAAAGACGGCACCGGGTCATATATCTGCGTTTGTGGCTCTGGCGATGG
861 | CTGGGCGTTGCTGACTGCCAAGACTGGCAAAGAATTTAAGGTTTTGGCCTCGGAAATAGA
862 | CAGGCTGATCGGGAACCCCTACACCTCGGATCGGACCAGAGTAAATCCGGTGCGTACATC
863 | TCTGGCACAACAACGTGACAAAGTCAGTCGTAAGTTTTCGAAGCTCATCCCTCTCCGTGG
864 | TACCGGTGCAGATAGCTACCTGAAGGGGCGCGGTATTAACTCCCACCCAGCAGAGAGCAT
865 | CAAGTACTGCGATAAACAGCCAGTAGATGGAAAGAACCTCCAGGCTATTTATGCGCTGGC
866 | TACAGATGACCGCGGGGAACTGTGTTATTTGCACCGCACTCTGCTTGACGGTGATAAGAA
867 | GGCGCAAACAGGCGGCGCAGCCAAGAAGATGATGAAACTGCAGGAGGATAGCTATTTAGA
868 | GTATGCCAAATCCGTTGCTATTCGCATGTTCCCAATATCCTCAACGCTGGGAATTGCTGA
869 | AGGGATCGAAACGGCTCTGGCCTGCCACCAGATCACGAAGTGCAACACGTGGGCGACGAT
870 | GAATACCGCCTTCATGAAGAAATTCCGCGTTCCTGCCGGAGTAAAGAACCTCATCATTTT
871 | TGCTGACTCAGACGCCAACGCAGCAGGTCATGCCGCTGCTTTTGAATGCGCTGCTGCAAA
872 | TCTGCACGCAAAGAATGATCTGGAAAGTGTCTCCGTGCGCTGGCCTGCACAGGGGGACTT
873 | TAATGATCTGCTGCTTAACGGCTCAGAAGTATTCGAGTGGGTATTTCACCGGGGGATGAA
874 | ACAGTGAAGAAACCAGCGGCTGCAAAGGTGAAAACGTACAAACCGAAGAAGTGCGCCAGC
875 | TGTGGTGAAACCTTCACTCCGGCCCGCAACCTGCAAAAGGTTTGTGGCCCGCTCTGTGCT
876 | ATAGCCCACAACAGGGCGCTGAAACAAAAAAAAGCGGAGGCGGAACAGAAGGACAAGCTG
877 | AAGATGCGCAAAAAGGCTCTGCTTACCCGTGGCGACTACATCAAAAAAGCCCAGTCAGCC
878 | TTTAATGCCTTTATCCGTGAACGCGACGAGGGGAAACCATGCCCATCATGTGGCACTTAT
879 | CACCCACCTATGATCTTTGGCGGCCAGTGGGATTGCGGTCATTTCATGGGGGTAGGCGCT
880 | CGTCCTGAATTGCGCTTTGAAGAGAAGAATGCTTACCGGCAGTGCAAAGCCTGTAATGGT
881 | GGATCGGGTCGGTTCGCTGCAAAGAATGCCACTGTACATGCCCGCTACAGGGAGACGCTG
882 | ATCGAGTGGTATGGATTGCCGCTGGTGGAATGGCTGGAAGGCCCACACGAAGCGAAGCAT
883 | TACTCAAAAGAAGACCTGGAAAACATAGCGGCTAAATACCGCCGTAAAACTCGCGAACTG
884 | AAAAAGCAGAGGGCCGCATGAATTACGATCTTATCTACTGTGATCCGCCGTGGGAATATG
885 | GCAACCGAATTAGCAACGGCGCAGCCTGTAATCATTACAGCACAATGAGCATTGAAGACC
886 | TGAAACGGCTCCCTGTCTGGTCTCTGGCTGCTGATAACGCCGTACTGGCGATGTGGTATA
887 | CGGGGACCCATAACCGCGAGGCTGTAGAACTGGCTGAATCATGGGGTTTCCGGGTCAGAA
888 | CAATGAAAGGCTTTACCTGGGTAAAACTGAATCAGAACGCCGCTGACCGCTTCAACAAGG
889 | CACTAAACGCCGGAAAGCTGGTGGACTTCAATGATCTTCTTGAGATGCTGGACCGTGAGA
890 | CGCGCATGAACGGCGGCAATCATACCCGGAGCAATACAGAAGATGTCCTGATAGCGACCA
891 | GGGGAACCGGACTAACCCGCGCCAGCGCATCGGTAAAACAGGTTGTTCACACCTGCCTCG
892 | GTGAGCACAGCGCTAAACCGTGGGAAGTAAGGAACCGACTGGAGCAATTATACGGTGATG
893 | TGAAACGGATCGAACTATTCGCTCGGGAAGAGTGGAAAGGATGGGACCGCTGGGGAAATC
894 | AATGCAACAACAGTATCGAAATTATTACCGGACTGATTAAAGAGGTGAACCATGCAGCGT
895 | GATATTCAACTGGTACTCGAACGGTGGGGAACCTGGGCTATTAGTGAAGGCTCACAGGTT
896 | GACTGGTCACCAATTGCAGCGGGTTTTAAAGGCCTCCTGTTAAATACCTCAAAGTCTCGC
897 | GAGTCATGTTGTGACAATGATGGCCTTATTGTAGACGCTGCCGTAGGAATGCTTAAACGA
898 | GCTGGCCGGGATGATGAGTTAAATCTGGTGATGTTGCATTACATGCATAACGTTTCTAAA
899 | TCGACTATTGCCCGCTGGGAAAAATGTTCAGAGGGAAAAATACGTAACAGGTTAATGATA
900 | GCCGAAACGTTTATTGATGCCTGCATCATTATGAGTGGTGCCAGATTAGAAATGGATGAT
901 | TGGGCCCATAAAAAAGAAGTAGAGAAAGTTGCATAAAAGTCTATTCGTTACGAATTTTAT
902 | ATATTAATGTGTTAAGAGTGGTCACTTAGACACGAACTTAAATATTACAGAACCTCGCCA
903 | ATTGGCGGGGTTTTTTCATTTCAGGCCCTGACTAAAAGTTGCAGATTAACCGTGAAATGC
904 | ATGAGCCTGCGGCCTGAATTCTTTCCCCTCGTTCTGAGAGGATTCACAGCAATAGAGGGG
905 | GACCGATGTCCGAACCAATAACCGGCACAGGCTTAGCTGGTGGCGCTTTAACTGGGGCGA
906 | GTGTTTACGGGCTATTAACCGGTACAGACTACGGTGTTGTGTTCGGGGCATTTGCTGGTT
907 | CCGTCTTTTATATAGCGACAGCGGCCGATTTGAGCGCCCCACGACGGATGGCATATTTCG
908 | TTGTGTCCTATATTGCTGGAGTTCTGTGCTCCGGGCTGGTCGGTTCTAAGTTATCCGACC
909 | TTACCGGGTACAACGATAAGCCTCTGGATGCAATTGGTGCCGTAATCATTTCGGCATTGG
910 | CCGTAAAAATACTCACCTTCCTGAACAATCAGGATATTGGCTCGCTGGTGGCGCTAATAA
911 | CGCGCCGGGGAGGTTCCGGTGGAACTAAATGATCCTACTGCAACCATCAATGCGCTGTTA
912 | TGTGCTGGTGTCGTTGTTACGTTGATGTTCTATCGCCGCAGAGACTCACGTCATCGTAAG
913 | TGGGTGTCGCGGCTGGCATGGCTGATAACAGTGATATACAGCTCTGTGCCGTTGGCGTAT
914 | CTGTGCGGCATCTATCCCTATTCATCATGGCCCACCATTGCGGCCAATATCATGATCCTT
915 | GTTGTGCTGCTGAGCGTAAGAGGCAATGTAGCGCGGCTAGTTGATGCACTGAGGCACTAA
916 | TGAATCAAACACAATTCCAGAAGGCGGCTGGTATCAGCGCCGGGTTAGCTGCGCGCTGGT
917 | TTCCGCATATTGATGCTGCGATGAAAGAATTTTGCATCACTGCTCCACTCGACCAGGCGA
918 | TGTTTATTGCGCAGTGCGGCCACGAAAGTACTTCATTCACTCAACTGGTAGAAAGCTTCA
919 | ACTATAGCGTTGCCGGGCTGGCTGGTTTTGTGAAGGCAAAGCGTATCACGCAGGACCAGG
920 | CAAACTCACTCGGGCGCAAATCATCCGAGAAGGCGCTACCGCTTGAACGCCAGAAGGCAA
921 | TCGCAAATCTGGTCTACAGCAATCGCTACGGCAATAAGTCAGCAGGTGATGGCTGGAAAT
922 | ATCGCGGGCGAGGACTAAAGCAAATCACTTTCTTGGATAATTACCTGCGATGTGGTACTG
923 | CACTAAAGCTCGATTTAGTCAGCAACCCTGAACTGCTGGAGAAAGATATTAACGCAGCCC
924 | GCAGCGCCGCATGGTTCTATACCTCAAGCGGTTGTTTGAAATACCCCGGCGACTTAGTGC
925 | GCGTGACCCAGATTATCAACGGTGGTCAGAACGGTATTGATGACCGACGCGCCCGATTCC
926 | TGAAAGCTAAATCTGTTCTGATGTGAGGTCCTCATGGGCATTGAAATGATTATTGGTCTG
927 | GCAACTGCGTTGCTGGCCATTGTCGCTGGAGCATTTGGATTAGGCCATACGCGAGGTACT
928 | AATAAAGCGGAAGCCAAAGCCGATCAGCAGCGAATCGAAGATACCGCCGCTGCCAGCGTC
929 | GCAGCGGCGGAACGGAAAGAGGAAGCCACCAGAGAGGCCAGCAATGTACAACAGACTGTT
930 | AGCCATATGCCTGATGACGATGTTGATCGGGAGCTGCGCGAGCACTTCACCCGCCCCGGC
931 | AGTCGTTGATACCGCATGCAACTGGGTACGGATCATCTACCTGACTGACCACGATATCGA
932 | CGTTCTGGATAAGCAGACCAAACGCGACATTCTGGCGCACAACAAATCCGTGCAGGCTAA
933 | TTGCATGAAGCCCCTCTGATGAGGGGCTGTGATGATGACTTAATTTTATGATTTTCGAAA
934 |
--------------------------------------------------------------------------------