├── .travis.yml ├── INSTALL.pl ├── ProphET_standalone.pl ├── README.md ├── README_BROAD_USERS.md ├── README_INSTALLING_PERL_MODULES.md ├── UTILS.dir ├── extrair_ncbi_prophage_families.pl ├── fasta2line ├── fetch_genomes_based_on_taxid.pl ├── gff2gene_protein_seq.pl ├── gff2graph-from-scratch.pl ├── line2fasta ├── obtain_prot_with_annot_seq.pl ├── retrieve_proteins.SLOW.sh ├── retrieve_proteins.sh ├── script_remover_vazios.pl ├── split_genbank.pl ├── splitfasta.pl └── union.pl ├── config.dir ├── ABC_transporters_to_grep.txt ├── Prophages_names_sem_Claviviridae_Guttaviridae-TxID └── Prophages_names_sem_Claviviridae_Guttaviridae-TxID.SMOKE_TEST ├── expected_output_of_test ├── NC_005362.1.phage_1.fas ├── NC_005362.1.phage_2.fas └── phages_coords ├── test.fasta └── test.gff /.travis.yml: -------------------------------------------------------------------------------- 1 | language: perl 2 | perl: 3 | - "5.8" 4 | 5 | # Use this to prepare the system to install prerequisites or dependencies. 6 | before_install: 7 | - sudo apt-get update 8 | - sudo apt-get autoremove 9 | - sudo apt-get install libdb5.3++-dev 10 | - sudo apt-get install emboss 11 | - sudo apt-get install bedtools 12 | - sudo apt-get install libgd3 13 | - sudo apt-get install libgd-dev 14 | - sudo apt-get install libgd-perl 15 | - sudo apt-get install libgd-svg-perl 16 | - wget ftp://ftp.ncbi.nlm.nih.gov/blast/executables/legacy.NOTSUPPORTED/2.2.26/blast-2.2.26-x64-linux.tar.gz 17 | - tar -xvzf blast-2.2.26-x64-linux.tar.gz 18 | - PATH=$PATH:`pwd`/blast-2.2.26/bin 19 | - echo $PATH 20 | 21 | 22 | 23 | # Use this to install any prerequisites or dependencies necessary to run your build. 24 | install: 25 | - cpanm Bio::Perl 26 | - cpanm SVG 27 | - cpanm GD 28 | - cpanm DB_File 29 | - cpanm --force GD::SVG 30 | - cpanm Bio::Graphics 31 | - cpanm LWP::Simple 32 | - cpanm XML::Simple 33 | - cpanm Mozilla::CA 34 | - cpanm LWP::Protocol::https 35 | 36 | # Use this to prepare your build for testing 37 | # e.g. copy database configurations, environment variables, etc. 38 | before_script: true 39 | 40 | # Default is specific to project language 41 | # All commands must exit with code 0 on success. 42 | # Anything else is considered failure. 43 | script: 44 | - ./INSTALL.pl 45 | - travis_wait 30 ./ProphET_standalone.pl --fasta test.fasta --gff_in test.gff --outdir test 46 | # - diff -r test/NC_005362.1.phage_1.fas expected_output_of_test/NC_005362.1.phage_1.fas | wc 47 | # - diff -r test/NC_005362.1.phage_2.fas expected_output_of_test/NC_005362.1.phage_2.fas | wc 48 | #diff -r test/phages_coords expected_output_of_test/phages_coords 49 | - paste test/phages_coords expected_output_of_test/phages_coords | awk 'BEGIN{count = 0} $3 - $7 > 15000 || $7 - $3 > 15000 {count++} $4 - $8 > 15000 || $8 - $4 > 15000 {count++} {print $0} END {if( count != 0 ){ print count; exit 1 }}' 50 | 51 | after_success: true 52 | 53 | after_failure: true 54 | 55 | after_script: true 56 | -------------------------------------------------------------------------------- /INSTALL.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | 4 | use Pod::Usage; 5 | use Getopt::Long; 6 | use File::Path; 7 | use File::Copy; 8 | 9 | =head1 NAME 10 | 11 | INSTALL.pl 12 | 13 | =head1 SYNOPSIS 14 | 15 | INSTALL.pl 16 | [ --update_db_only ] | 17 | 18 | =head1 OPTIONS 19 | 20 | B<--update_db_only> - Only update the database of known prophages B<(Optional)>. 21 | 22 | B<--help> - prints the usage information. B<(Optional)> 23 | 24 | =head1 DESCRIPTION 25 | 26 | =head1 CONTACT 27 | Gustavo C. Cerqueira (2018) 28 | cerca11@gmail.com 29 | gcerqueira@pgdx.com 30 | =cut 31 | 32 | my ($update_db_only, $phage_families_file); 33 | my $help; 34 | 35 | my $config_dir = "../config.dir"; 36 | my $default_phage_families_file = "Prophages_names_sem_Claviviridae_Guttaviridae-TxID"; 37 | 38 | 39 | GetOptions( 'update_db_only' => \$update_db_only, 40 | 'phage_families_file=s' => \$phage_families_file, 41 | 'help' => \$help ); 42 | 43 | if( defined($help) ){ 44 | pod2usage(-verbose => 1 ,-exitval => 2); 45 | } 46 | 47 | 48 | goto DOWNLOADING_DB if defined( $update_db_only ); 49 | 50 | #----------------------------------------- 51 | 52 | print "Looking for required programs in the enviroment PATH...\n"; 53 | my $config_file = "./config.dir/Third_party_programs_paths.log"; 54 | 55 | open(LOGS, ">$config_file" ) or die "Unable to write on config file: $config_file\n"; 56 | 57 | my $emboss_extractseq = `which extractseq`; 58 | die "\nERROR: Unable to find \"extractseq\", EMBOSS suite\n" if( $emboss_extractseq eq '' ); 59 | chomp $emboss_extractseq; 60 | print "\tFound EMBOSS extractseq: $emboss_extractseq\n"; 61 | 62 | my $blastall = `which blastall`; 63 | die "\nERROR: Unable to find \"blastall\", BLAST suite\n\n" if( $blastall eq '' ); 64 | chomp $blastall; 65 | print "\tFound blastall: $blastall\n"; 66 | 67 | my $formatdb = `which formatdb`; 68 | die "\nERROR: Unable to find \"formatdb\", BLAST suite\n\n" if( $formatdb eq '' ); 69 | chomp $formatdb; 70 | print "\tFound blastall: $formatdb\n"; 71 | 72 | my $bedtools = `which bedtools`; 73 | die "\nERROR: Unable to find \"bedtools\"\n\n" if( $bedtools eq '' ); 74 | chomp $bedtools; 75 | print "\tFound bedtools: $bedtools\n"; 76 | 77 | #----------------------------------------- 78 | print "Saving program paths in $config_file ...\n"; 79 | print LOGS "Emboss_extractseq_path\t$emboss_extractseq\n"; 80 | print LOGS "Blastall_path\t$blastall\n"; 81 | print LOGS "Formatdb_path\t$formatdb\n"; 82 | print LOGS "Bedtools_path\t$bedtools\n"; 83 | close(LOGS); 84 | 85 | 86 | #----------------------------------------- 87 | print "Looking for required Perl libraries...\n"; 88 | 89 | my $output = system("perl -e 'use Bio::Perl;'"); 90 | die "\nERROR: Unable to find Perl module Bio::Perl\n\n" if( $output ); 91 | 92 | $output = system("perl -e 'use LWP::Simple;'"); 93 | die "\nERROR: Unable to find Perl module LWP::Simple\n\n" if( $output ); 94 | 95 | $output = system("perl -e 'use XML::Simple;'"); 96 | die "\nERROR: Unable to find Perl module XML::Simple\n\n" if( $output ); 97 | 98 | $output = system("perl -e 'use GD;'"); 99 | die "\nERROR: Unable to find Perl module GD\n\n" if( $output ); 100 | 101 | 102 | #----------------------------------------- 103 | print "Downloading GFFLib ...\n"; 104 | #$output = system("svn --force export https://github.com/gustavo11/GFFLib/trunk UTILS.dir/GFFLib"); 105 | $output = system("rm -rf UTILS.dir/GFFLib"); 106 | $output = system("git clone https://github.com/gustavo11/GFFLib.git UTILS.dir/GFFLib"); 107 | die "ERROR: Unable to download GFFLib from github\n\n" if( $output ); 108 | 109 | 110 | 111 | DOWNLOADING_DB: 112 | 113 | #----------------------------------------- 114 | print "Creating database directory...\n"; 115 | 116 | my $database_dir = "PhrophET_phage_proteins_database.dir"; 117 | 118 | if( -e $database_dir ){ 119 | my $datestring = localtime(); 120 | $datestring =~ s/ /_/g; 121 | my $src = $database_dir; 122 | my $dst = "$database_dir.$datestring.bak"; 123 | 124 | move( $src, $dst ) 125 | || die("ERROR: Unable to move directory $src to $dst!"); 126 | } 127 | 128 | mkdir($database_dir, 0755) or 129 | die "ERROR: Unable to create directory $database_dir\n"; 130 | 131 | 132 | #----------------------------------------- 133 | print "Creating database temp directory ...\n"; 134 | my $temp = "ProphET_install_temp.dir"; 135 | 136 | 137 | if( -e $temp ){ 138 | rmtree( $temp ) 139 | || die("ERROR: Unable to remove directory $temp!"); 140 | } 141 | 142 | mkdir($temp, 0755) or 143 | die "ERROR: Unable to create directory $temp.\n"; 144 | 145 | chdir "$temp" or 146 | die "ERROR: Unable to enter directory $temp\n"; 147 | 148 | #----------------------------------------- 149 | 150 | print "Downloading Phage sequences ...\n"; 151 | 152 | my $eff_phage_families_file; 153 | if ( defined( $phage_families_file) ){ 154 | $eff_phage_families_file = $config_dir . "/" . $phage_families_file; 155 | }else{ 156 | $eff_phage_families_file = $config_dir . "/" . $default_phage_families_file; 157 | } 158 | 159 | if ( ! ( -e $eff_phage_families_file ) ){ 160 | die "ERROR: Phage families file $eff_phage_families_file does not exist!"; 161 | } 162 | 163 | $output = system("../UTILS.dir/extrair_ncbi_prophage_families.pl $eff_phage_families_file"); 164 | die "ERROR: Unable to execute extrair_ncbi_prophage_families.pl\n\n" if( $output ); 165 | 166 | `perl ../UTILS.dir/obtain_prot_with_annot_seq.pl $eff_phage_families_file > Phage_proteins_pre_raw.db`; 167 | 168 | #----------------------------------------- 169 | print "Formating sequences ...\n"; 170 | `sed s'/[*]//g' Phage_proteins_pre_raw.db > Phage_proteins_pre_raw_without_stop.db `; # Remove asterisks representing STOP codons 171 | `perl ../UTILS.dir/script_remover_vazios.pl Phage_proteins_pre_raw_without_stop.db > Phage_proteins_raw.db`; 172 | `formatdb -p T -i Phage_proteins_raw.db`; 173 | if ($? == -1) { 174 | die "ERROR: Unable to execute formatdb!\n"; 175 | } 176 | 177 | `../UTILS.dir/fasta2line Phage_proteins_raw.db > Phage_proteins_raw.line`; 178 | 179 | 180 | #----------------------------------------- 181 | print "Removing ABC-Transporters ...\n"; 182 | 183 | # Retrieve ABC transporter from database based on their annotation 184 | `grep -wf ../config.dir/ABC_transporters_to_grep.txt Phage_proteins_raw.line | sort -u | awk '{print ">"\$2"\\n"\$1}' > ABC_transporters_seqs.fasta`; 185 | 186 | # BLAST those ABC transporters against the rest of the database 187 | `blastall -p blastp -d Phage_proteins_raw.db -i ABC_transporters_seqs.fasta -e 1e-5 -m8 -o ABC_trans_BLAST_matches`; 188 | if ($? == -1) { 189 | die "ERROR: Unable to execute blastall!\n"; 190 | } 191 | 192 | # Retrieve the ID of matches against ABC transporters 193 | `cat ABC_trans_BLAST_matches | awk '{print \$2}' | sort -u > IDs_Matches_com_ABC_transporters`; 194 | 195 | # Remove those matches from the database 196 | if ( -z "IDs_Matches_com_ABC_transporters" && -z "ABC_transporters_seqs.fasta"){ 197 | `awk '{print ">"\$2"\\n"\$1}' Phage_proteins_raw.line > Phage_proteins_without_ABC-t.db` 198 | }elsif( -z "IDs_Matches_com_ABC_transporters" ){ 199 | die "ERROR: Incomplete or corrupted BLAST search for ABC transporters!!\n"; 200 | }else{ 201 | `grep -vf IDs_Matches_com_ABC_transporters Phage_proteins_raw.line | awk '{print ">"\$2"\\n"\$1}' > Phage_proteins_without_ABC-t.db`; 202 | } 203 | 204 | #----------------------------------------- 205 | print "Finalizing phage database...\n"; 206 | `cp Phage_proteins_without_ABC-t.db ../$database_dir`; 207 | `cp phage_db.summary.stats ../$database_dir`; 208 | chdir "../$database_dir"; 209 | `formatdb -p T -i Phage_proteins_without_ABC-t.db`; 210 | 211 | 212 | #----------------------------------------- 213 | chdir "../"; 214 | #`rm -rf ProphET_install_temp.dir`; 215 | print "Installation completed!\n"; 216 | 217 | 218 | exit(0); 219 | 220 | -------------------------------------------------------------------------------- /ProphET_standalone.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use Pod::Usage; 5 | 6 | use Getopt::Long; 7 | use FindBin; 8 | 9 | use lib "$FindBin::Bin/UTILS.dir/GFFLib"; 10 | 11 | use GFFFile; 12 | 13 | 14 | =head1 NAME 15 | 16 | ProphET is a user friendly algorithm to identify prophages in bacterial genomes. 17 | 18 | =head1 SYNOPSIS 19 | 20 | usage: ProphET_standalone.pl --fasta_in --gff_in --outdir [--evalue_cutoff ] [--window_size ]> [--grid] [--help] 21 | 22 | 23 | =head1 OPTIONS 24 | 25 | B<--fasta_in> - Bacterial genome FASTA file 26 | 27 | B<--gff_in> - Bacterial GFF file 28 | 29 | B<--outdir> - output directory 30 | 31 | B<--grid> - Use UGER for BLAST jobs (Currently only works in the Broad Institute UGER grid system) B<(Optional)> 32 | 33 | B<--evalue_cutoff> - Sets the BLAST evalue cutoff. Please do not use scientific notation (default=0.00001) B<(Optional)> 34 | 35 | B<--window_size> - Sets the size of the sliding windown used to calculate gene density (minimum value = 1000, default = 10000) B<(Optional)> 36 | 37 | B<--help> - print this and some additional info. about FASTA and GFF input format B<(Optional)> 38 | 39 | =head1 DESCRIPTION 40 | 41 | B 44 | 45 | B 46 | 47 | FASTA: 48 | 49 | >NC_005362.1 50 | TTGTTTGATCTAGATAAATTTTGGCAATTTTTTAATGCTGAGATGAAAAAAAGCTACAGCACGGTTGCCT 51 | ATAATGCTTGGTTTAAAAATACTAAACCAATTTCCTTTAATAAAAAGACAAAAGAAATGATAATCGCTGT 52 | 53 | GFF: 54 | 55 | NC_005362.1 . gene 1 1365 . + . ID=LJ_RS00005;Name=LJ_RS00005; 56 | NC_005362.1 . mRNA 1 1365 . + . ID=LJ_RS00005.t01;Parent=LJ_RS00005; 57 | NC_005362.1 . exon 1 1365 . + . ID=LJ_RS00005.t01-E1;Parent=LJ_RS00005.t01; 58 | NC_005362.1 . CDS 1 1365 . + 0 ID=LJ_RS00005.p01;Parent=LJ_RS00005.t01; 59 | 60 | =head1 CONTACT 61 | 62 | Joao Luis R. Cunha (2017) 63 | jaumlrc@gmail.com 64 | 65 | Gustavo C. Cerqueira (2017) 66 | cerca11@gmail.com 67 | =cut 68 | 69 | my $fasta_in; #Fasta file 70 | my $gff_in; #GFF file 71 | my $gff_trna; #GFF file with tRNA coordinates 72 | my $outdir; #Output name 73 | 74 | my $evalue_cutoff; 75 | my $window_size; 76 | 77 | my $help; 78 | my $grid; 79 | 80 | GetOptions( 81 | 'fasta_in=s' => \$fasta_in, 82 | 'gff_in=s' => \$gff_in, 83 | 'gff_trna=s' => \$gff_trna, 84 | 'outdir=s' => \$outdir, 85 | 'grid' => \$grid, 86 | 'evalue_cutoff=s' => \$evalue_cutoff, 87 | 'window_size=s' => \$window_size, 88 | 'help!' => \$help 89 | ); 90 | 91 | if ( defined($evalue_cutoff) ) { 92 | if ( $evalue_cutoff !~ /^[0-9.]+$/ ) { 93 | pod2usage( 94 | -message => "Error: Parameter --evalue_cutoff should be a number !!!!\n\n", 95 | -verbose => 1, 96 | -exitval => 1, 97 | -output => \*STDERR 98 | ); 99 | } 100 | }else{ 101 | $evalue_cutoff = 0.00001; 102 | } 103 | 104 | 105 | if ( defined($window_size) ) { 106 | if ( $window_size !~ /^[0-9]+$/ || $window_size < 1000) { 107 | pod2usage( 108 | -message => "Error: Parameter --window_size should be an integer value higher or equal to 1000 !!!!\n\n", 109 | -verbose => 1, 110 | -exitval => 1, 111 | -output => \*STDERR 112 | ); 113 | } 114 | }else{ 115 | $window_size = 10000; 116 | } 117 | 118 | 119 | if ( defined($help) ) { 120 | pod2usage( -verbose => 2, -exitval => 0 ); 121 | } 122 | 123 | if ( not defined($fasta_in) ) { 124 | pod2usage( 125 | -message => "Error: Parameter --fasta_in is required !!!!\n\n", 126 | -verbose => 1, 127 | -exitval => 1, 128 | -output => \*STDERR 129 | ); 130 | } 131 | 132 | if ( not defined($gff_in) ) { 133 | pod2usage( 134 | -message => "Error: Parameter --gff_in is required !!!!\n\n", 135 | -verbose => 1, 136 | -exitval => 1, 137 | -output => \*STDERR 138 | ); 139 | } 140 | 141 | if ( not defined($outdir) ) { 142 | pod2usage( 143 | -message => "Error: Parameter --outdir is required !!!!\n\n", 144 | -verbose => 1, 145 | -exitval => 1, 146 | -output => \*STDERR 147 | ); 148 | } 149 | 150 | # Checking if users has provided 151 | my $separate_gff_trna = 0; 152 | if ( defined($gff_trna) ) { 153 | $separate_gff_trna = 1; 154 | } 155 | 156 | my $debug = 0; 157 | my $blast_grid_output_directory = "$outdir/blast_grid"; 158 | 159 | mkdir( "$outdir", 0755 ); 160 | 161 | # Number of jobs issued when using the grid 162 | my $number_of_jobs = 20; 163 | 164 | 165 | # Setting path of some important directories 166 | my $UTILS_DIR = "$FindBin::Bin/UTILS.dir"; 167 | my $CONFIG_DIR = "$FindBin::Bin/config.dir"; 168 | my $OBA_DIR = "/cil/shed/apps/internal/OBA/GridSubmissions"; 169 | 170 | 171 | # Retrieving from configuration file path to auxiliar applications 172 | my $EMBOSS_EXTRACTSEQ_PATH = 173 | `grep 'Emboss_extractseq_path' $CONFIG_DIR/Third_party_programs_paths.log | awk '{printf \$2}'`; 174 | 175 | my $BLAST_PATH = 176 | `grep 'Blastall_path' $CONFIG_DIR/Third_party_programs_paths.log | awk '{printf \$2}'`; 177 | 178 | my $BEDTOOLS_PATH = 179 | `grep 'Bedtools_path' $CONFIG_DIR/Third_party_programs_paths.log | awk '{printf \$2}'`; 180 | 181 | 182 | # Database with prophage proteins 183 | my $PROPHET_DB_DIR = "$FindBin::Bin/PhrophET_phage_proteins_database.dir"; 184 | 185 | my $usage = "ProphET_standalone.pl "; 186 | 187 | 188 | ######### 189 | #Processing the input files and separating in one fasta per GFF 190 | 191 | # Get the scaffold IDs from the gff 192 | 193 | 194 | my $gff_handler = GFFFile::new($gff_in); 195 | $gff_handler->read(); 196 | my @scaffold_ids = $gff_handler->get_chrom_names(); 197 | 198 | ################# 199 | # Print scaffolds in the GFF 200 | 201 | print "\nProcessing the following scaffolds/chromosomes:\n"; 202 | map { chomp $_ } @scaffold_ids; 203 | map { print STDERR "$_\n" } @scaffold_ids; 204 | 205 | `perl $UTILS_DIR/fasta2line $fasta_in > $outdir/fasta.line`; 206 | 207 | print STDERR "\n"; 208 | 209 | # Array storing BLAST cmds and output files 210 | my @cmds; 211 | my %output_files; # One set per each scaffold 212 | 213 | # Iterate through each scaffold/chromosome 214 | foreach my $scaff_chrom (@scaffold_ids) { 215 | 216 | print STDERR "Processing scaffold/chromosome: $scaff_chrom ...\n"; 217 | 218 | my $intermediate_files_dir = "$outdir/$scaff_chrom"; 219 | mkdir( "$intermediate_files_dir", 0755 ); 220 | 221 | my $curr_fasta = "$intermediate_files_dir/$scaff_chrom.fasta"; 222 | my $curr_gff = "$intermediate_files_dir/$scaff_chrom.gff"; 223 | 224 | 225 | # Generate a GFF and FASTA per each scaffold/chromosome 226 | `grep -v "^#" $gff_in | awk 'id == \$1 {print \$0}' id=$scaff_chrom $gff_in > $curr_gff`; 227 | `awk 'id == \$2 {print ">"\$2"\\n"\$1}' id=$scaff_chrom $outdir/fasta.line > $curr_fasta`; 228 | 229 | ########## 230 | #Checking the input files 231 | #Checking all the gffs IDs matches the fasta sequence 232 | 233 | my $num_seqs = `grep -c '>' $curr_fasta | awk '{print \$1}'`; 234 | die "ERROR: The file $curr_fasta has either more than one sequence or no sequence." if ( $num_seqs != 1 ); 235 | 236 | 237 | my $gff_ids_count = 238 | `awk '{print \$1}' $curr_gff | sort -u | wc | awk '{print \$1}'`; 239 | my $gff_ids_ids = `awk '{print \$1}' $curr_gff | sort -u`; 240 | if ( $gff_ids_count > 1 ) { 241 | die 242 | "ERROR: The input gff file has more than one genome id:\n" . 243 | "$gff_ids_ids Check if you are not submitting a file with plasmids as well as the genome file\n"; 244 | } 245 | 246 | 247 | my $fasta_only_id = `grep '>' $curr_fasta | sed s'/>//'`; 248 | my $gff_only_id = `awk '{print \$1}' $curr_gff | sort -u`; 249 | if ( $fasta_only_id ne $gff_only_id ) { 250 | die 251 | "ERROR: The FASTA headers do not match the GFF sequence id:\n FASTA:$fasta_only_id GFF_SEQ_ID:$gff_only_id"; 252 | } 253 | 254 | 255 | #Script to generate the proteins fasta based on the genome fasta and gff 256 | print STDERR "Generating file containing protein and gene sequence...\n"; 257 | `$UTILS_DIR/./gff2gene_protein_seq.pl $curr_gff $curr_fasta 11 $intermediate_files_dir/$scaff_chrom.trans $intermediate_files_dir/$scaff_chrom.cds $intermediate_files_dir/$scaff_chrom.prot`; 258 | 259 | 260 | # BLAST predicted protein against our phage db 261 | if( $grid ){ 262 | split_blast( 263 | $scaff_chrom, 264 | "$BLAST_PATH -p blastp -d $PROPHET_DB_DIR/Phage_proteins_without_ABC-t.db -e $evalue_cutoff -m8 -a8", 265 | "$intermediate_files_dir/$scaff_chrom.prot", 266 | \$number_of_jobs, 267 | \@cmds, 268 | \@{ $output_files{$scaff_chrom} }, 269 | $outdir, 270 | $blast_grid_output_directory 271 | ); 272 | 273 | # BLAST locally 274 | }else{ 275 | print STDERR 276 | "BLASting protein sequences against phage proteins db (e-value <= $evalue_cutoff) ...\n"; 277 | `$BLAST_PATH -p blastp -d $PROPHET_DB_DIR/Phage_proteins_without_ABC-t.db -i $intermediate_files_dir/$scaff_chrom.prot -e $evalue_cutoff -m8 -a8 -o $intermediate_files_dir/$scaff_chrom.blast`; 278 | } 279 | } 280 | 281 | `rm $outdir/fasta.line`; 282 | 283 | if( $grid ){ 284 | 285 | # Issue BLAST jobs in the grid and wait... 286 | execute_blast_on_grid( $outdir ); 287 | 288 | # Merge BLAST results 289 | foreach my $scaff_chrom (@scaffold_ids){ 290 | my $intermediate_files_dir = "$outdir/$scaff_chrom"; 291 | my $curr_blast = "$intermediate_files_dir/$scaff_chrom.blast"; 292 | 293 | if ($debug) { 294 | foreach my $file ( @{ $output_files{$scaff_chrom} } ) { 295 | print "Files to merge: $file\n" if $debug; 296 | } 297 | } 298 | merge_blast( \@{ $output_files{$scaff_chrom} }, $curr_blast ); 299 | } 300 | } 301 | 302 | foreach my $scaff_chrom (@scaffold_ids) { 303 | 304 | my $intermediate_files_dir = "$outdir/$scaff_chrom"; 305 | 306 | my $curr_fasta = "$intermediate_files_dir/$scaff_chrom.fasta"; 307 | my $curr_gff = "$intermediate_files_dir/$scaff_chrom.gff"; 308 | my $curr_blast = "$intermediate_files_dir/$scaff_chrom.blast"; 309 | my $curr_blast_best_matches = "$intermediate_files_dir/$scaff_chrom.blastt-best-matches"; 310 | my $curr_blast_best_matches_ids = "$intermediate_files_dir/$scaff_chrom.blastt-best-matches-ids"; 311 | my $curr_blast_matches_coords = "$intermediate_files_dir/$scaff_chrom.matches-coords"; 312 | my $curr_blast_union = "$intermediate_files_dir/$scaff_chrom.matches-coords-union"; 313 | my $tRNAfile = "$intermediate_files_dir/$scaff_chrom.tRNA_file_coordinates"; 314 | 315 | 316 | # Skip scaffold if BLAST result is empty 317 | next if ( -z "$curr_blast" ); 318 | 319 | #Extracting only BLAST best matches 320 | print STDERR "Parsing results...\n"; 321 | `cat $curr_blast | awk '{OFS="\t";print \$0, \$1 }' \$1 | sort -k 1,1 -k 12,12rn | uniq -f 12 | awk '{OFS="\t";print \$1, \$2, \$3, \$4, \$5, \$6, \$7, \$8, \$9, \$10, \$11, \$12 }' > $curr_blast_best_matches`; 322 | 323 | #Extracting the gene ID 324 | `cat $curr_blast_best_matches | awk '{print \$1}' > $curr_blast_best_matches_ids`; 325 | 326 | #Extracting the gene ID coordinates 327 | `grep -wf $curr_blast_best_matches_ids $curr_gff | grep 'CDS' | awk '{print \$1"\t"\$4"\t"\$5}' | sort -nk 3,3 > $curr_blast_matches_coords`; 328 | 329 | #Union comand - There are some genes with overlap, so we use union so we do not count some positions twice 330 | print STDERR "Collapsing results...\n"; 331 | `$UTILS_DIR/./union.pl --in $curr_blast_matches_coords --seg_name 1 --seg_start 2 --seg_end 3 > $curr_blast_union`; 332 | 333 | #Extraction tRNA coordinates from the GFF 334 | print STDERR "Extracting tRNA records...\n"; 335 | my $gff_input_selector; 336 | 337 | if ($separate_gff_trna) { 338 | $gff_input_selector = $gff_trna; 339 | } 340 | else { 341 | $gff_input_selector = $curr_gff; 342 | } 343 | `awk '\$3=="tRNA"{print \$0}' $gff_input_selector | awk '{print \$1"\t"\$4"\t"\$5}' > $tRNAfile`; 344 | 345 | 346 | 347 | my $blast_window_output = compute_density_sliding_windows( $scaff_chrom, $curr_blast, $curr_blast_union, $intermediate_files_dir, $window_size ); 348 | 349 | my $blast_merged_final = group_consecutive_windows( $scaff_chrom, $blast_window_output, $intermediate_files_dir, $window_size ); 350 | 351 | my $blast_merged_united = trim_phages( $scaff_chrom, $curr_blast_union, $blast_merged_final, $intermediate_files_dir ); 352 | 353 | my ($ref_arr_phages_found , $phages_curr_scaffold ) = 354 | trim_phages_by_trna_or_coding_gene( $scaff_chrom, $curr_blast_union, $blast_merged_united, $tRNAfile, $intermediate_files_dir, $outdir ); 355 | 356 | render_phage( $scaff_chrom, $ref_arr_phages_found, $curr_fasta, $curr_gff, $phages_curr_scaffold, $curr_blast_union, $outdir ); 357 | 358 | slice_out_phage_seq( $scaff_chrom, $ref_arr_phages_found, $curr_fasta, $outdir ); 359 | 360 | } 361 | 362 | # Copy phage_db stats to results dir 363 | `cp $PROPHET_DB_DIR/phage_db.summary.stats $outdir/phage_db.summary.stats`; 364 | 365 | 366 | exit(0); 367 | 368 | ######### 369 | # Sliding window and phage predictions: 370 | #-Input: Coordinates with phage matches 371 | #-Sliding window and intersect with phage matches - Windows of 10,000 with 1,000 increment 372 | #-Estimates the Density of nucleotides with phage matches in a given window 373 | #-Output: Phage matches for each window 374 | 375 | sub compute_density_sliding_windows{ 376 | 377 | my ( $scaff_chrom, $blast, $blast_union, $intermediate_files_dir, $window_size ) = @_; 378 | 379 | print STDERR "Computing density of prophage genes for each sliding window (window size = $window_size bp)...\n"; 380 | 381 | my $blast_window_output = "$intermediate_files_dir/$scaff_chrom.blast.window.output"; 382 | my $blast_log_matches = "$intermediate_files_dir/$scaff_chrom.blast.log.matches"; 383 | my $blast_window = "$intermediate_files_dir/$scaff_chrom.blast.window"; 384 | my $blast_intersect_entre_phage_janela = "$intermediate_files_dir/$scaff_chrom.entre.phage.janela"; 385 | 386 | 387 | open( BLAST, "$blast_union" ) 388 | or die "Unable to open file $blast_union!\n"; 389 | my @genes = ; 390 | close(BLAST); 391 | 392 | open( RESULTS, ">$blast_window_output" ) 393 | or die "Unable to write on file $blast_window_output!\n"; 394 | ; #File with the number of bases with phage matches in a given window 395 | 396 | open( LOG, ">$blast_log_matches" ) 397 | or die "Unable to write on file $blast_log_matches!\n"; 398 | ; #Log-file with the coordinates of matches in a given window 399 | 400 | my @last_pos = 401 | split( /[\t]/, $genes[$#genes] ) 402 | ; #As we use a $window_size window, we stop the analysis $window_size after the last match with a phage gene 403 | my $id_genoma = $last_pos[0]; 404 | my $final_pos = $last_pos[2]; 405 | my $final_real = $final_pos + $window_size; 406 | chomp $id_genoma; 407 | 408 | for ( my $i = $window_size ; $i <= $final_real ; $i = $i + 1000 ) 409 | { #Sliding window to intersect with the phage matches coordinates 410 | open( WINDOW, ">$blast_window" ) 411 | or die "Unable to write on file $blast_window!\n" 412 | ; # Window overwrites at each iteration 413 | my $coord_ini = $i - ( $window_size - 1 ); 414 | chomp $coord_ini; 415 | chomp $i; 416 | print WINDOW "$id_genoma\t$coord_ini\t$i\n"; 417 | close(WINDOW); 418 | 419 | `$BEDTOOLS_PATH intersect -a $blast_union -b $blast_window > $blast_intersect_entre_phage_janela` 420 | ; # Makes the intersect between the current window and our phage-matches coordinates 421 | 422 | open( INTERSECTED, "$blast_intersect_entre_phage_janela" ) 423 | or die "Unable to open file $blast_intersect_entre_phage_janela!\n"; 424 | ; #Open the intersect file 425 | my @merged = ; 426 | close(INTERSECTED); 427 | 428 | print LOG "#Window\t$coord_ini\t$i\n" 429 | ; #Stores the coordinates with phage matches in a given window in the log file 430 | 431 | my $gene_content = 0; 432 | for ( my $j = 0 ; $j <= $#merged ; $j++ ) 433 | { #Runs trough all the genes with phage matches in the current window 434 | my @splitted = split( /[\t]/, $merged[$j] ); 435 | my $initial = $splitted[1]; 436 | my $final = $splitted[2]; 437 | my $size = 438 | ( $final + 1 ) - 439 | $initial 440 | ; #Stores the size of the gene with phage matches (could be more than one per window) 441 | $gene_content = 442 | $gene_content + $size 443 | ; #Stores the number whole extent of genes with phage matches in a give window 444 | chomp $initial; 445 | chomp $final; 446 | print LOG "$id_genoma\t$initial\t$final\n" 447 | ; #Log with the coordinates of the genes with phage matches in each window 448 | } 449 | 450 | print LOG 451 | "--------------------------------\n"; #Inter-window log separator 452 | print RESULTS "$id_genoma\t$coord_ini\t$i\t$gene_content\n" 453 | ; #Tem output file - containing the number of phage-related nucleotides in each window 454 | 455 | } 456 | 457 | close(LOG); 458 | close(RESULTS); 459 | return $blast_window_output; 460 | } 461 | 462 | 463 | 464 | ########## 465 | # Group consecutive windows with phage matches 466 | #-Input: Phage matches for each window 467 | #-Group consecutive windows with phage matches given the phage content in a given window is higher than half of the window (5000b), generatin raw-clusters 468 | #-Then, group raw-clusters with overlaps 469 | #-Output - Raw phage coordinates prediction 470 | 471 | sub group_consecutive_windows { 472 | 473 | print STDERR "Grouping consecuting windows containing the putative prophage...\n"; 474 | 475 | 476 | my ( $scaff_chrom, $blast_window_output, $intermediate_files_dir, $window_size) = @_; 477 | 478 | my $half_window = int( $window_size / 2 ); 479 | 480 | my $blast_merged = "$intermediate_files_dir/$scaff_chrom.blast.merged"; 481 | my $blast_merged_final = "$intermediate_files_dir/$scaff_chrom.blast.merged.final"; 482 | 483 | open( INPUT, "$blast_window_output" ) 484 | or die "Unable to open file $blast_window_output!\n"; 485 | my @matches = ; 486 | close(INPUT); 487 | 488 | open( TEMP, ">$blast_merged" ) 489 | or die "Unable to write on file $blast_merged!\n"; 490 | 491 | for ( my $k = 0 ; $k <= $#matches ; $k++ ) 492 | { #Loop to cluster consectutive windows with "phage content" higher than $half_window, half of the window 493 | my @splitted = split( /[\t]/, $matches[$k] ); 494 | my $coverage = $splitted[3]; 495 | my $ini_coordinates = $splitted[1]; 496 | my $end_coordinates = $splitted[2]; 497 | my $id = $splitted[0]; 498 | 499 | if ( $coverage > $half_window ) { 500 | while ( $coverage > $half_window && $k < $#matches ) 501 | { #while coverage>$half_window, group consecutive windows 502 | my @splitted2 = split( /[\t]/, $matches[$k] ); 503 | $coverage = $splitted2[3]; 504 | $k++; 505 | } 506 | 507 | my @splitted3 = 508 | split( /[\t]/, $matches[ $k - 2 ] ) 509 | ; #Get the initial, final coordinates and size of the clustered windows 510 | my $end_coordinates3 = $splitted3[2]; 511 | my $size = 512 | ( $end_coordinates3 + 1 ) - 513 | $ini_coordinates; #### Nao tenho usado o size para nada... 514 | chomp $ini_coordinates; 515 | chomp $end_coordinates3; 516 | chomp $id; 517 | chomp $size; 518 | 519 | print TEMP "$id\t$ini_coordinates\t$end_coordinates3\n"; 520 | } 521 | } 522 | close(TEMP); 523 | 524 | #Merge regions raw-clusters with overlap in sequences 525 | `$UTILS_DIR/union.pl --in $blast_merged --seg_name 1 --seg_start 2 --seg_end 3 > $blast_merged_final`; 526 | 527 | return $blast_merged_final; 528 | } 529 | 530 | ########## 531 | # Trimming raw phages with less than 8 genes with phage matches 532 | #-Input: Raw phage coordinates prediction 533 | #-Check in the raw phage has at least 8 genes with phage matches 534 | #-Output: raw phages that have at least 8 genes with phage matches 535 | 536 | sub trim_phages { 537 | 538 | print STDERR "Trimming prophage...\n"; 539 | 540 | my ($scaff_chrom, $blast_union, $blast_merged_final, $intermediate_files_dir ) = @_; 541 | 542 | my $blast_merged_united = "$intermediate_files_dir/$scaff_chrom.blast.merged.united"; 543 | 544 | 545 | open( MERGED, "$blast_merged_final" ) or die "couldnt open $blast_merged_final\n"; 546 | my @merged = ; 547 | close(MERGED); 548 | 549 | open( BLAST, "$blast_union" ) or die "couldnt open $blast_union"; 550 | my @blast = ; 551 | close(BLAST); 552 | 553 | open( TEMP2, ">$blast_merged_united" ) 554 | or die "Unable to write on file $blast_merged_united!\n"; 555 | 556 | for ( my $l = 0 ; $l <= $#merged ; $l++ ) { 557 | my @P_positions = split( /[\t]/, $merged[$l] ); 558 | my $P_start = $P_positions[1]; 559 | my $P_end = $P_positions[2]; 560 | my $contador = 0; 561 | 562 | for ( my $m = 0 ; $m <= $#blast ; $m++ ) { 563 | my @B_positions = split( /[\t]/, $blast[$m] ); 564 | my $B_start = $B_positions[1]; 565 | my $B_end = $B_positions[2]; 566 | 567 | if ( $B_start > $P_start && $B_end < $P_end ) { 568 | $contador++; 569 | } 570 | if ( $contador == 8 ) { 571 | print TEMP2 "$merged[$l]"; 572 | last; 573 | } 574 | } 575 | } 576 | close(TEMP2); 577 | 578 | return $blast_merged_united; 579 | } 580 | 581 | 582 | ########## 583 | # Trimming raw phages borders by tRNAs or last gene with a phage match 584 | #-Input: Phage coordinates 585 | #-Trims the phage border to the last gene with and then searches 3kb upstream and downstream for tRNA genes and extends the coordinates accordingly 586 | #-Output: Polished final phage prediction 587 | 588 | sub trim_phages_by_trna_or_coding_gene { 589 | 590 | print STDERR "Trimming prophage based on tRNA or last gene in the last and first window ...\n"; 591 | 592 | 593 | my ($scaff_chrom, $blast_union, $blast_merged_united, $tRNAfile, $intermediate_files_dir, $outdir ) = @_; 594 | 595 | my $trna_log = "$intermediate_files_dir/$scaff_chrom.trna.log"; 596 | my $phages_coord = "$outdir/phages_coords"; 597 | my $phages_curr_scaffold = "$intermediate_files_dir/$scaff_chrom.phages_coords"; 598 | 599 | 600 | open( PHAGES, "$blast_merged_united" ) 601 | or die "Unable to open file $blast_merged_united!\n"; 602 | my @phages = ; 603 | close(PHAGES); 604 | map( chomp, @phages); 605 | 606 | open( MATCHES, "$blast_union" ) 607 | or die "Unable to open file $blast_union!\n"; 608 | my @matches = ; 609 | close(MATCHES); 610 | map( chomp, @matches); 611 | 612 | open( TRNA, "$tRNAfile" ) or die "Unable to open file $tRNAfile!\n"; 613 | my @tRNA = ; 614 | close(TRNA); 615 | map( chomp, @tRNA); 616 | 617 | open( OUTLOG, ">$trna_log" ) 618 | or die "Unable to write on file $trna_log!\n"; 619 | 620 | open( PHAGESFINAL, ">>$phages_coord" ) 621 | or die "Unable to write on file $phages_coord!\n"; 622 | 623 | open( PHAGES_CURR_SCAFFOLD, ">$phages_curr_scaffold" ) 624 | or die "Unable to write on file $phages_curr_scaffold!\n"; 625 | 626 | my @phages_found; 627 | 628 | for ( my $n = 0 ; $n <= $#phages ; $n++ ) 629 | { #Searches for the last gene with phage match in the beggining and end of the raw phage 630 | my @phage_coords = split( /[\t]/, $phages[$n] ); 631 | 632 | my $phage_scaff_chrom = $phage_coords[0]; 633 | my $phage_id = $n + 1; 634 | my $phage_initial = $phage_coords[1]; 635 | my $phage_final = $phage_coords[2]; 636 | 637 | my @inside_initial = (); 638 | my @inside_final = (); 639 | 640 | for ( my $p = 0 ; $p <= $#matches ; $p++ ) { 641 | my @matches_coords = split( /[\t]/, $matches[$p] ); 642 | my $matches_id = $matches_coords[0]; 643 | my $matches_initial = $matches_coords[1]; 644 | my $matches_final = $matches_coords[2]; 645 | 646 | if ( $matches_final > $phage_initial 647 | && $matches_final < $phage_final ) 648 | { 649 | push( @inside_initial, $matches_coords[1] ); 650 | } 651 | 652 | if ( $matches_initial > $phage_initial 653 | && $matches_initial < $phage_final ) 654 | { 655 | push( @inside_final, $matches_coords[2] ); 656 | } 657 | } 658 | 659 | $phage_initial = $inside_initial[0]; 660 | my $number = $#inside_final; 661 | $phage_final = $inside_final[$number]; 662 | my $phage_begining = $phage_initial; # Para poder alterar com o tRNA 663 | my $phage_ending = $phage_final; # Para poder alterar com o tRNA 664 | 665 | my $inital_minus = $phage_initial - 3000; 666 | my $initial_plus = $phage_initial + 3000; 667 | my $final_minus = $phage_final - 3000; 668 | my $final_plus = $phage_final + 3000; 669 | 670 | my $distancia = 3000; #distancia inicial para permitir o tRNA 671 | my $distancia2 = 3000; 672 | my $diff = 3000; #diferenca para mudar com o tRNA 673 | my $diff2 = 3000; 674 | 675 | for ( my $o = 0 ; $o <= $#tRNA ; $o++ ) 676 | { # Checks for tRNA genes near phage borders 677 | my @tRNA_coords = split( /[\t]/, $tRNA[$o] ); 678 | my $trna_id = $tRNA_coords[0]; 679 | my $trna_initial = $tRNA_coords[1]; 680 | my $trna_final = $tRNA_coords[2]; 681 | 682 | if ( $trna_initial > $inital_minus 683 | && $trna_final < $initial_plus ) 684 | { 685 | if ( $trna_initial < $phage_initial 686 | && $trna_final < $phage_initial ) 687 | { 688 | $diff = $phage_initial - $trna_final; 689 | 690 | if ( $distancia > $diff ) { 691 | $distancia = $diff; 692 | 693 | $phage_begining = $trna_initial; 694 | } 695 | } 696 | elsif ($trna_initial > $phage_initial 697 | && $trna_final > $phage_initial ) 698 | { 699 | $diff = $trna_initial - $phage_initial; 700 | if ( $distancia > $diff ) { 701 | $distancia = $diff; 702 | $phage_begining = $trna_initial; 703 | } 704 | } 705 | 706 | elsif ($trna_initial < $phage_initial 707 | && $trna_final > $phage_initial ) 708 | { 709 | $diff = 0; 710 | $phage_begining = $trna_initial; 711 | } 712 | 713 | print OUTLOG $tRNA[$o]; 714 | } 715 | 716 | elsif ($trna_initial > $final_minus 717 | && $trna_final < $final_plus ) 718 | { 719 | if ( $trna_initial < $phage_final 720 | && $trna_final < $phage_final ) 721 | { 722 | $diff2 = $phage_final - $trna_final; 723 | if ( $distancia2 > $diff2 ) { 724 | $distancia2 = $diff2; 725 | $phage_ending = $trna_final; 726 | } 727 | } 728 | 729 | elsif ($trna_initial > $phage_final 730 | && $trna_final > $phage_final ) 731 | { 732 | $diff2 = $trna_initial - $phage_final; 733 | if ( $distancia2 > $diff2 ) { 734 | $distancia2 = $diff2; 735 | $phage_ending = $trna_final; 736 | } 737 | } 738 | 739 | elsif ($trna_initial < $phage_final 740 | && $trna_final > $phage_final ) 741 | { 742 | $diff2 = 0; 743 | $phage_ending = $trna_final; 744 | } 745 | 746 | print OUTLOG $tRNA[$o]; 747 | } 748 | 749 | } 750 | 751 | $phage_initial = $phage_begining; 752 | $phage_final = $phage_ending; 753 | 754 | #chomp $phage_id; 755 | #chomp $phage_begining; 756 | #chomp $phage_ending; 757 | 758 | push @phages_found, 759 | { 760 | scaff_chrom => $phage_scaff_chrom, 761 | id => $phage_id, 762 | start => $phage_begining, 763 | end => $phage_ending 764 | }; 765 | print PHAGESFINAL "$phage_scaff_chrom\t$phage_id\t$phage_begining\t$phage_ending\n"; 766 | print PHAGES_CURR_SCAFFOLD 767 | "$phage_id\t$phage_begining\t$phage_ending\n"; 768 | 769 | } 770 | 771 | close(PHAGESFINAL); 772 | close(PHAGES_CURR_SCAFFOLD); 773 | 774 | return (\@phages_found, $phages_curr_scaffold); 775 | } 776 | 777 | ######### 778 | # Generate the image file 779 | #-Input: Final phage prediction, genome fasta, gff file 780 | #-Generates the image output file 781 | #-Output: Image output file 782 | 783 | sub render_phage { 784 | 785 | print STDERR "Rendering graph depicting prophage genes ...\n"; 786 | 787 | 788 | my ( $scaff_chrom, $ref_arr_phages_found, $fasta, $gff, $phages_coords, $blast_union, $intermediate_files_dir ) = @_; 789 | 790 | my $gff_ultraformated = "$intermediate_files_dir/$scaff_chrom.gff_ultraformated"; 791 | my $svg = "$intermediate_files_dir/$scaff_chrom.svg"; 792 | 793 | 794 | if ( scalar(@{$ref_arr_phages_found}) != 0 ) { 795 | 796 | `cat $gff | grep 'CDS' | awk '{print \$1"\t"\$4"\t"\$5}' > $gff_ultraformated`; 797 | `$UTILS_DIR/./gff2graph-from-scratch.pl $gff_ultraformated $fasta $svg $blast_union $phages_coords`; 798 | `rm -r $gff_ultraformated`; 799 | 800 | #open(FINALPROGRAM, ">$name-run-sucessfuly-completed.log"); 801 | #print FINALPROGRAM "The Run was sucessfull"; 802 | #close(FINALPROGRAM); 803 | 804 | } 805 | } 806 | 807 | sub slice_out_phage_seq{ 808 | 809 | my ( $scaff_chrom, $ref_arr_phages_found, $fasta, $outdir ) = @_; 810 | 811 | if ( scalar(@{$ref_arr_phages_found}) != 0 ) { 812 | foreach my $curr_phage (@{$ref_arr_phages_found}) { 813 | my $scaff_chrom = $curr_phage->{scaff_chrom}; 814 | my $id = $curr_phage->{id}; 815 | my $start = $curr_phage->{start}; 816 | my $end = $curr_phage->{end}; 817 | 818 | my $cmd = "$EMBOSS_EXTRACTSEQ_PATH -sequence $fasta -regions \"$start-$end\" -osdbname2 phage_$id:$start-$end $outdir/$scaff_chrom.phage_$id.fas"; 819 | #print STDERR "$cmd\n"; 820 | `$cmd`; 821 | } 822 | } 823 | } 824 | 825 | 826 | ############# 827 | # Executing BLAST cmds if using the grid 828 | sub execute_blast_on_grid { 829 | my ( $outdir ) = @_; 830 | 831 | print STDERR "BLASting protein sequences against phage proteins db...\n"; 832 | 833 | open BLAST_CMDS, ">$outdir/blast.cmds" 834 | or die "Unable to write on file $outdir/blast.cmds!\n"; 835 | 836 | foreach my $cmd (@cmds) { 837 | print "Writing on file BLAST command: $cmd" if $debug; 838 | print BLAST_CMDS $cmd; 839 | } 840 | 841 | close(BLAST_CMDS); 842 | 843 | my $cmd = "$OBA_DIR/run_cmds_on_grid.py $outdir/blast.cmds"; 844 | print "$cmd\n" if $debug; 845 | 846 | my $output = `$cmd`; 847 | print $output if $debug; 848 | 849 | if ( $output !~ "No failed commands" ) { 850 | die 851 | "\nERROR: Unable to complete succesfully all BLAST commands!! Please check for errors in the UGER log files\n\n"; 852 | } 853 | else { 854 | print "No failed commands.\n"; 855 | } 856 | } 857 | 858 | 859 | sub split_blast { 860 | my ( 861 | $batch_name, $blast_cmd_base, $input_file, 862 | $ref_num_of_parts, $refArrCmds, $refArrOutFiles, 863 | $outdir, $blastDirectory 864 | ) = @_; 865 | 866 | print "Spliting FASTA file $input_file...\n"; 867 | 868 | my $cmd_split_fasta = 869 | "$UTILS_DIR/splitfasta.pl --fasta_in $input_file --num_files $$ref_num_of_parts --pre $batch_name --out_dir $blastDirectory"; 870 | 871 | print $cmd_split_fasta . "\n" if $debug; 872 | 873 | my $output = `$cmd_split_fasta`; 874 | 875 | my @cols = split " ", $output; 876 | my $num_files_created = pop @cols; 877 | 878 | print STDERR "Number of files created: $num_files_created\n" if $debug; 879 | 880 | # Adjusting the value of created files 881 | $$ref_num_of_parts = $num_files_created; 882 | 883 | print STDERR $output . "\n" if $debug; 884 | 885 | for ( 886 | my $file_index = 1 ; 887 | $file_index <= $$ref_num_of_parts ; 888 | $file_index++ 889 | ) 890 | { 891 | my $outputFile = 892 | $blastDirectory . "/" . "$batch_name.$file_index.blast"; 893 | my $inputFile = $blastDirectory . "/" . "$batch_name.$file_index.fas"; 894 | my $cmd = $blast_cmd_base . " -i $inputFile -o $outputFile\n"; 895 | push( @$refArrCmds, $cmd ); 896 | push( @$refArrOutFiles, $outputFile ); 897 | print "Generating BLAST cmd: $cmd" if $debug; 898 | } 899 | } 900 | 901 | sub merge_blast { 902 | print "Merging BLAST files...\n"; 903 | 904 | my ( $refArrOutFiles, $output_file ) = @_; 905 | 906 | my $all_files_to_combine = join " ", @{$refArrOutFiles}; 907 | my $cmd = "cat $all_files_to_combine > $output_file"; 908 | print $cmd . "\n" if $debug; 909 | `$cmd`; 910 | } 911 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 |

ProphET, Prophage Estimation Tool: a standalone prophage sequence prediction tool with self-updating reference database.

3 | 4 | João L. Reis-Cunha1,2, Daniella C. Bartholomeu2, Ashlee M. Earl1, Gustavo C. Cerqueira1 5 | 6 | PMID: 31577829 PMCID: PMC6774505 DOI: 10.1371/journal.pone.0223364 7 | 8 | Manuscript Published in Plos One 9 | 10 | ------ 11 | 12 | 1 Broad Institute of Harvard and MIT, Cambridge, Massachusetts, United States (2017) 13 | 14 | 2 Instituto de Ciências Biológicas, Universidade Federal de Minas Gerais, Brazil 15 | 16 | ------ 17 |

Contact

18 | 19 | jaumlrc@gmail.com 20 | 21 | gustavo@broadinstitute.org 22 | 23 | ------ 24 | 25 | ProphET is an open source software developed to be used in the Linux platform. Users are free to bundle executables and modify the script. However, we do not guarantee the efficiency and precision of predictions if modifications were performed in the script and dependencies. 26 | 27 | ------ 28 | 29 | 30 |

Required libraries and programs:

31 | 32 | Broad users don't need to install any of the of programs and libraries listed below. If you are **Broadie** please follow the instructions on [README_BROAD_USERS.md](README_BROAD_USERS.md) before installing and running ProphET. 33 | 34 | * EMBOSS suite 35 | 36 | * BEDTools suite 37 | 38 | * BLAST 39 | 40 | * Perl module Bio::Perl 41 | 42 | * Perl module SVG 43 | 44 | * Perl module GD 45 | 46 | * Perl moduel GD::SVG 47 | 48 | * Perl module Bio::Graphics 49 | 50 | * Perl module LWP::Simple 51 | 52 | * Perl module XML::Simple 53 | 54 | * Perl module Mozilla::CA 55 | 56 | * Perl module LWP::Protocol::https 57 | 58 | ------ 59 | 60 |

ProphET Dependencies:

61 | 62 | ProphET requires that BLAST (legacy) EMBOSS and BEDTools are installed and added to the enviroment variable PATH. 63 | 64 | **BLAST:** 65 | 66 | BLAST legacy can be downloaded from the following link: 67 | ftp://ftp.ncbi.nlm.nih.gov/blast/executables/legacy.NOTSUPPORTED/2.2.26/ 68 | 69 | Download BLAST legacy using wget for Linux: 70 | ``` 71 | $ wget ftp://ftp.ncbi.nlm.nih.gov/blast/executables/legacy.NOTSUPPORTED/2.2.26/blast-2.2.26-x64-linux.tar.gz 72 | ``` 73 | 74 | Unpack files using tar: 75 | ``` 76 | $ tar -xvzf blast-2.2.26-x64-linux.tar.gz 77 | ``` 78 | 79 | Change directory to where blastall and formatdb executables are: 80 | ``` 81 | $ cd blast-2.2.26/bin 82 | ``` 83 | 84 | Print the full path to current folder using pwd: 85 | ``` 86 | $ pwd -P 87 | ``` 88 | example output from pwd: 89 | ``` 90 | /data/usr/BLAST/blast-2.2.26/bin 91 | ``` 92 | 93 | Add BLAST binary folder to the $PATH environment variable: 94 | ``` 95 | PATH=:$PATH 96 | ``` 97 | example: 98 | ``` 99 | PATH=/data/usr/BLAST/blast-2.2.26/bin:$PATH 100 | ``` 101 | 102 | Test if blastall and formatdb commands are on the path: 103 | ``` 104 | $ blastall --> Will print the blastall help information 105 | $ formatdb --> will print "[formatdb] ERROR: No database name was specified 106 | ``` 107 | 108 | **EMBOSS** 109 | 110 | EMBOSS suite installation instructions can be obtained from this link: 111 | http://emboss.sourceforge.net/download/ 112 | 113 | **BEDTools** 114 | 115 | BEDTools suite installation instructions can be obtained from this link: 116 | https://bedtools.readthedocs.io/en/latest/content/installation.html 117 | 118 | **Perl modules/libraries:** 119 | 120 | If the script fails and reports missing Perl modules/libraries, please follow the instructions on file [README_INSTALLING_PERL_MODULES.md](README_INSTALLING_PERL_MODULES.md) on how to install those. 121 | 122 | 123 | **Adding third party programs to the $PATH enviroment** 124 | 125 | EMBOSS and BEDTools folders also have to be added to the $PATH environment prior to run ProphET INSTALL.pl installation. This can be done using the following command: 126 | ``` 127 | PATH=:$PATH 128 | PATH=$PATH 129 | ``` 130 | Example: 131 | ``` 132 | PATH=/home/bin/EMBOSS-6.3.1/emboss/:$PATH 133 | PATH=/home/bin/bedtools:$PATH 134 | ``` 135 | 136 | 137 | **ProphET was tested with the following versions of third-party dependencies:** 138 | 139 | ``` 140 | blast-legacy-2.2.26 141 | ftp://ftp.ncbi.nlm.nih.gov/blast/executables/legacy.NOTSUPPORTED/2.2.26/blast-2.2.26-x64-linux.tar.gz 142 | blast-legacy-2.2.9 143 | ftp://ftp.ncbi.nlm.nih.gov/blast/executables/legacy.NOTSUPPORTED/2.2.9/blast-2.2.9-amd64-linux.tar.gz 144 | bedtools v2.23.0 145 | https://github.com/arq5x/bedtools2/releases/download/v2.23.0/bedtools-2.23.0.tar.gz 146 | bedtools-v2.28.0 147 | https://github.com/arq5x/bedtools2/releases/download/v2.28.0/bedtools-2.28.0.tar.gz 148 | EMBOSS-6.3.1 149 | ftp://emboss.open-bio.org/pub/EMBOSS/old/6.3.1/EMBOSS-6.3.1.tar.gz 150 | EMBOSS-6.6.0 151 | ftp://emboss.open-bio.org/pub/EMBOSS/EMBOSS-6.6.0.tar.gz 152 | ``` 153 | 154 | ------ 155 | 156 |

ProphET installation:

157 | 158 | To install ProphET and download bacteriophage database please execute the following command from ProphET's home directory: 159 | ``` 160 | $ ./INSTALL.pl 161 | ``` 162 | 163 | This will search for required libraries, set the paths of required programs and download from Genbank (NCBI) all genomes associated to 16 families of bacteriophages 164 | (listed in [config.dir/Prophages_names_sem_Claviviridae_Guttaviridae-TxID](config.dir/Prophages_names_sem_Claviviridae_Guttaviridae-TxID) ). 165 | 166 | IMPORTANT: Please ensure that the third-party programs blastall, EMBOSS and BEDTools were added to the environment variable PATH, or the installation will crash. See “ProphET Dependencies” section for more instructions. 167 | 168 | 169 | Some warnings will be issued during the setup of ProphET DB. See some examples below: 170 | ``` 171 | Warning: bad /anticodon value '(pos:complement(13054..13056),aa:Met,seq:cat)' 172 | Warning: NC_022920: Bad value '(pos:complement(13054..13056),aa:Met,seq:cat)' for tag '/anticodon' 173 | ``` 174 | Those warnings refer to unexpected format for coordinates of tRNA features and they won't affect the execution. 175 | 176 | ------ 177 | 178 |

Testing installation:

179 | 180 | From ProphET's home directory execute either the following command (GFF file containing both coding genes and tRNAs): 181 | ``` 182 | $ ./ProphET_standalone.pl --fasta test.fasta --gff_in test.gff --outdir test 183 | ``` 184 | 185 | The execution should take ~ 5 minutes. 186 | 187 | Two putative prophages should be reported and their coordinates indicated in the file *test/phages_coords*: 188 | ``` 189 | FORMAT: 190 | <#prophage> 191 | 192 | CONTENT: 193 | NC_005362.1 1 327710 378140 194 | NC_005362.1 2 1292553 1330556 195 | ``` 196 | 197 | Small differences between the coordinates reported above and the coordinates obtained by your first test run of ProphET are expected. 198 | This is due to changes in the database of known prophage proteins, which is updated on each installation of ProphET. 199 | 200 | 201 | The nucleotide sequence of each prophage can be found in: 202 | ``` 203 | test/NC_005362.1.phage_1.fas 204 | test/NC_005362.1.phage_2.fas 205 | ``` 206 | 207 | The program also renders a simple diagram depicting all coding genes in the bacterial genome, coding genes with significant matches to phage genes and the location of predicted prophages: 208 | ``` 209 | test/NC_005362.1.svg 210 | ``` 211 | 212 | ------ 213 | 214 |

Updating phage database:

215 | 216 | To update the bacteriophage database with the latest sequences deposited at Genbank, please execute the following command from ProphET's home directory: 217 | 218 | ``` 219 | $ ./INSTALL.pl --update_db_only 220 | ``` 221 | 222 | The current database will be backed up as 223 | ``` 224 | PhrophET_phage_proteins_database.dir..bak 225 | ``` 226 | 227 | and an updated database will be saved at: 228 | ``` 229 | PhrophET_phage_proteins_database.dir 230 | ``` 231 | 232 | All instances of the prophage DB (current and backups) include a file reporting the download date and stats.: `phage_db.summary.stats`. This file is copied to the results directory of every ProphET execution to enable auditing and reproducibility of results. 233 | 234 | 235 | ------ 236 |

Before running ProphET in your favorite bacterial genome

237 | 238 | * Check if the GFF file that will be provided to ProphET has the format specified by [The Sequence Ontology Consortium](https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md) 239 | 240 | * If your GFF does not meet those specifications, a converter is provided as part of GFFLib (package installed during Prophet setup): 241 | ``` 242 | GFFLib/gff_rewrite --input -output --add_missing_features 243 | ``` 244 | 245 | * The GFF converter will not work for all cases. If you happen to encounter one of those, please issue a ticket reporting that. 246 | 247 | * Check if all sequences IDs in the FASTA file (header of each sequence) matches perfectly the source field in the GFF file (first column of the GFF) and vice-versa. 248 | 249 | ------ 250 | 251 |

Usage:

252 | 253 | ``` 254 | ProphET_standalone.pl --fasta_in --gff_in --outdir 255 | [--grid] [--gff_trna ] [--help] 256 | 257 | Options: 258 | --fasta_in - Bacterial genome Fasta file 259 | 260 | --gff_in - Bacterial GFF file 261 | 262 | --gff_trna - Optional parameter, in case the tRNAs are reported in a 263 | separate GFF please provide it here <(Optional)> 264 | 265 | --outdir - output directory 266 | 267 | --grid - Use UGER for BLAST jobs (Currently only works in the Broad 268 | Institute UGER grid system) (Optional) 269 | 270 | --help - print this and some additional info. about FASTA and GFF input 271 | format (Optional) 272 | ``` 273 | 274 | -------------------------------------------------------------------------------- /README_BROAD_USERS.md: -------------------------------------------------------------------------------- 1 |

Setting up environment to run ProphET on Broad UNIX systems

2 | 3 | Broad users only need to load the following dot kits before installing and running ProphET 4 | 5 | ``` 6 | use EMBOSS 7 | use BLAST 8 | use BedTools 9 | use Perl-5.8 10 | use UGER 11 | ``` 12 | -------------------------------------------------------------------------------- /README_INSTALLING_PERL_MODULES.md: -------------------------------------------------------------------------------- 1 | 2 |

Installing CPAN:

3 | 4 | Perl modules can be installed using CPAN. Please first certify that CPAN is installed and configured by issuing the command below: 5 | 6 | ``$ perl -MCPAN -e shell`` 7 | 8 | 9 | * If the command above returns the prompt ```cpan[1]>``` or similar prompt, then CPAN is already configured. So quit the cpan shell by typing: ```cpan[1]> quit``` 10 | 11 | * If the command returns ```CPAN requires configuration...``` then follow the steps for automatic configuration. Select the default option in every question. Quit CPAN after the configuration is done by typing: ```cpan[1]> quit``` 12 | 13 | * If the command returns: ```Can't locate CPAN.pm in @INC (@INC contains:... ``` then you will need Administrative privileges to install CPAN either using apt-get ```sudo apt-get install build-essential``` or yum ```sudo yum install perl-CPAN``` 14 | 15 | ------ 16 | 17 |

Installing Perl modules:

18 | 19 | Now you are ready to install the required Perl modules. Issue the following commands: 20 | 21 | ``` 22 | $ perl -MCPAN -e 'install Bio::Perl' 23 | $ perl -MCPAN -e 'install SVG' 24 | $ perl -MCPAN -e 'install GD' 25 | $ perl -MCPAN -e 'install GD::SVG' 26 | $ perl -MCPAN -e 'install Bio::Graphics' 27 | $ perl -MCPAN -e 'install LWP::Simple' 28 | $ perl -MCPAN -e 'install XML::Simple' 29 | $ perl -MCPAN -e 'install Mozilla::CA' 30 | $ perl -MCPAN -e 'install LWP::Protocol::https' 31 | ``` 32 | -------------------------------------------------------------------------------- /UTILS.dir/extrair_ncbi_prophage_families.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | 5 | # Given a tab delimited file having as the first column 6 | # a bacteriophage family and its respective 7 | # Taxonomy ID (NCBI) as the second column 8 | # this script retrieves from Genbank the proteome 9 | # of all species belonging to each family 10 | 11 | my $input = $ARGV[0]; 12 | open (INPUT, "$input"); 13 | my @N_seq = ; 14 | close(INPUT); 15 | 16 | # Add date and time to summary stats 17 | my $datestring = localtime(); 18 | `echo $datestring > phage_db.summary.stats`; 19 | 20 | for (my $i=0; $i<=$#N_seq; $i++) { 21 | my @temp1 = split (/\t/, $N_seq[$i]); 22 | my $name = $temp1[0]; 23 | my $Tx_id = $temp1[1]; 24 | 25 | print "Downloading $name from Genbank (NCBI) ...\n"; 26 | mkdir ("$name.dir", 0755); 27 | chdir "$name.dir"; 28 | my $output = system("../../UTILS.dir/retrieve_proteins.sh $name $Tx_id"); 29 | chdir "../"; 30 | 31 | chomp($Tx_id); 32 | # Add stats (#genomes #proteins) to phage_db.summary.stats 33 | `echo $name TaxId: $Tx_id >> phage_db.summary.stats`; 34 | `cat $name.dir/$Tx_id.ncbi_utils.log >> phage_db.summary.stats`; 35 | `cat $name.dir/$Tx_id.prot.log >> phage_db.summary.stats`; 36 | } 37 | -------------------------------------------------------------------------------- /UTILS.dir/fasta2line: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | 5 | use Bio::Seq; 6 | use Bio::SeqIO; 7 | use IO::Handle; 8 | 9 | my $usage = "\n\nv0.1\nfasta2line [--length]\n\n" . 10 | "Convert fasta file to one sequence per line file. " . 11 | "The output file is tab delimited and have the following format:\n" . 12 | "\t\t[]\n\n\n"; 13 | 14 | die $usage if( scalar( @ARGV ) > 2 ); 15 | 16 | my $inputFile = $ARGV[0]; 17 | 18 | my $inSeqIO = Bio::SeqIO->new(-file => $inputFile, '-format' => 'Fasta'); 19 | while ( my $inSeq = $inSeqIO->next_seq() ){ 20 | if( !defined( $inSeq->seq() ) ){ 21 | print STDERR $inSeq->id() . " have an empty string as sequence. Sequence discarded\n"; 22 | } else { 23 | print $inSeq->seq() . "\t" . $inSeq->id(); 24 | print "\t" . $inSeq->desc() if( defined( $inSeq->desc() ) ); 25 | print "\t" . $inSeq->length() . "\n" if( defined( $ARGV[1] ) && $ARGV[1] eq "--length" ); 26 | print "\n" if( ! defined( $ARGV[1] ) ); 27 | } 28 | } 29 | $inSeqIO->close(); 30 | -------------------------------------------------------------------------------- /UTILS.dir/fetch_genomes_based_on_taxid.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use LWP::Simple; 5 | use XML::Simple; 6 | 7 | #use Data::Dumper; 8 | 9 | my ( $name, $outname, $url, $xml, $out, $count, $query_key, $webenv, $ids ); 10 | my @genomeId; 11 | my @genomeId_nuccore; 12 | my $base = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/'; 13 | 14 | #my $limit = 'srcdb+refseq[prop]+AND+gene+in+chromosome[prop])'; 15 | my $limit = 'srcdb+refseq[prop]'; 16 | 17 | my $tax_id = $ARGV[0]; 18 | my $debug = 0; 19 | my $DOWNLOAD_INCREMENTS = 500; 20 | my $delay = 3; 21 | 22 | open( LOG, ">$tax_id.ncbi_utils.log" ) 23 | or die "ERROR: Unable to write on file $tax_id.ncbi_utils.log\n"; 24 | 25 | undef @genomeId; 26 | $query_key = $webenv = ''; 27 | $tax_id =~ s/ /+/g; 28 | 29 | # ESearch 30 | $url = 31 | $base 32 | . "esearch.fcgi?db=genome&term=txid" 33 | . $tax_id 34 | . "[Organism:exp]&usehistory=y"; 35 | print "URL esearch 1: " . $url . "\n" if $debug; 36 | sleep($delay); 37 | $xml = get($url); 38 | 39 | if ( $xml =~ /(\d+)<\/Count>/ ) { 40 | $count = $1; 41 | } 42 | 43 | print "Number of records in Genome database: $count\n"; 44 | 45 | if ( $count > 20 ) { 46 | $url = 47 | $base 48 | . "esearch.fcgi?db=genome&term==txid" 49 | . $tax_id 50 | . "[Organism:exp]&retmax=$count&usehistory=y"; 51 | print "URL esearch 2: " . $url . "\n" if $debug; 52 | sleep($delay); 53 | $xml = get($url); 54 | } 55 | while ( $xml =~ /(\d+?)<\/Id>/gs ) { 56 | my $curr_genome_id = $1; 57 | print "$curr_genome_id\n" if $debug; 58 | push( @genomeId, $curr_genome_id ); 59 | } 60 | 61 | #----------------------------------------------------------------------- 62 | # Converting genome Ids to nuccore 63 | 64 | my $num_genomeId = scalar(@genomeId); 65 | for ( my $ind = 0 ; $ind < $num_genomeId ; $ind += $DOWNLOAD_INCREMENTS ) { 66 | my $last = $ind + ( $DOWNLOAD_INCREMENTS - 1 ); 67 | $last = $num_genomeId - 1 if $last >= $num_genomeId; 68 | 69 | print "Converting genomeids " 70 | . ( $ind + 1 ) . " to " 71 | . ( $last + 1 ) . "...\n"; 72 | 73 | my $ids = join( ',', @genomeId[ $ind .. $last ] ); 74 | 75 | # ELink 76 | $url = $base 77 | . "elink.fcgi?dbfrom=genome&db=nuccore&id=$ids&term=$limit&usehistory=y"; 78 | print "URL elink: $url\n" if $debug; 79 | sleep($delay); 80 | $xml = get($url); 81 | 82 | # create object 83 | my $xmlIn = new XML::Simple( ForceArray => 1 ); 84 | 85 | # read XML file 86 | my $xmlContent = $xmlIn->XMLin($xml); 87 | 88 | foreach my $value ( 89 | @{ $xmlContent->{'LinkSet'}->[0]->{'LinkSetDb'}->[0]->{'Link'} } ) 90 | { 91 | my $curr_id = $value->{'Id'}->[0]; 92 | push( @genomeId_nuccore, $curr_id ); 93 | print $curr_id . "\n" if $debug; 94 | } 95 | 96 | } 97 | getc() if $debug; 98 | 99 | 100 | #----------------------------------------------------------------------- 101 | 102 | # Downloading genomes 103 | $num_genomeId = scalar(@genomeId_nuccore); 104 | 105 | 106 | print "Number of genomes under TaxID $tax_id: " . $num_genomeId . "\n"; 107 | print LOG "Number of genomes under TaxID $tax_id: " . $num_genomeId . "\n"; 108 | if ( -e "$tax_id.gb" ) { 109 | `rm $tax_id.gb`; 110 | } 111 | 112 | getc() if $debug; 113 | sleep($delay); 114 | 115 | for ( my $ind = 0 ; $ind < $num_genomeId ; $ind += $DOWNLOAD_INCREMENTS ) { 116 | my $last = $ind + ( $DOWNLOAD_INCREMENTS - 1 ); 117 | $last = $num_genomeId - 1 if $last >= $num_genomeId; 118 | 119 | print "Downloading genomes " 120 | . ( $ind + 1 ) . " to " 121 | . ( $last + 1 ) . "...\n"; 122 | 123 | my $ids = join( ',', @genomeId_nuccore[ $ind .. $last ] ); 124 | 125 | # EFetch 126 | $url = $base . "efetch.fcgi?db=nuccore&id=$ids&rettype=gb&retmode=text"; 127 | print "URL efetch: $url\n" if $debug; 128 | sleep($delay); 129 | $out = get($url); 130 | 131 | open( OUT, ">>$tax_id.gb" ); 132 | print OUT $out; 133 | close OUT; 134 | sleep(10); 135 | } 136 | 137 | close(LOG); 138 | 139 | getc() if $debug; 140 | 141 | 142 | -------------------------------------------------------------------------------- /UTILS.dir/gff2gene_protein_seq.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | 4 | use FindBin; 5 | use lib "$FindBin::Bin/GFFLib"; 6 | 7 | 8 | use GFFFile; 9 | use GFFUtils; 10 | use Bio::SeqIO; 11 | use Bio::Seq; 12 | use Bio::Tools::CodonTable; 13 | 14 | my $usage = 15 | "\nusage: gff2gene_protein_seq.pl " 16 | . " " 17 | . " [] [1=add gene name to header] \n\n"; 18 | 19 | $usage .= "Available codon tables:\n"; 20 | 21 | my $tables = Bio::Tools::CodonTable->tables; 22 | while ( my ( $id, $name ) = each %{$tables} ) { 23 | $usage .= "$id = $name\n"; 24 | } 25 | $usage .= "\n"; 26 | 27 | $usage .= "*File format:\n"; 28 | $usage .= "\\t\\n\n"; 29 | $usage .= "\n"; 30 | 31 | die $usage if ( scalar(@ARGV) != 6 && scalar(@ARGV) != 7 && scalar(@ARGV) != 8 ); 32 | 33 | my $gff_filename = $ARGV[0]; 34 | my $genome_filename = $ARGV[1]; 35 | my $codon_table_file_or_number = $ARGV[2]; 36 | my $out_trans = $ARGV[3]; 37 | my $out_cds = $ARGV[4]; 38 | my $out_pep = $ARGV[5]; 39 | my $transcript_buffer = $ARGV[6]; 40 | my $add_gene_name_to_header = $ARGV[7]; 41 | 42 | 43 | my $single_codon_table = -1; 44 | my %seq_codon_table; 45 | 46 | if ( defined $transcript_buffer ) { 47 | die "\n\nERROR: the length of transcript extension should be >= 1\n\n" 48 | if ( $transcript_buffer <= 0 ); 49 | } 50 | 51 | 52 | # Testing if codon table parameter is a number 53 | if( $codon_table_file_or_number =~ /^\d+$/ ){ 54 | print "\n\nINFO: Using a single codon table to all chromosomes: $codon_table_file_or_number\n\n"; 55 | $single_codon_table = $codon_table_file_or_number; 56 | 57 | }else{ 58 | print "\n\nINFO: Codon table FILE provided by user: $codon_table_file_or_number.\nUsing multiple codon tables\n"; 59 | 60 | open CODON, "$codon_table_file_or_number" or die "\n\nERROR: Not able to open file: $codon_table_file_or_number!!!\n\n"; 61 | my $cont_line = 1; 62 | while () { 63 | my $line = $_; 64 | chomp $line; 65 | 66 | my @cols = split "\t", $line; 67 | 68 | die "\n\nERROR: Error on line $cont_line: $line\n" 69 | . "Every line should have the following format:\n" 70 | . " \\t\n\n" 71 | if ( scalar(@cols) != 2 or $cols[0] eq "" or $cols[1] eq "" ); 72 | 73 | $seq_codon_table{ $cols[0] } = $cols[1]; 74 | 75 | print STDERR 76 | "Using codon table \'$cols[1]\' for chromosome \'$cols[0]\'\n"; 77 | 78 | $cont_line++; 79 | } 80 | close(CODON); 81 | 82 | print STDERR "\n"; 83 | 84 | 85 | } 86 | 87 | 88 | my $gffFile = GFFFile::new($gff_filename); 89 | 90 | print "Reading GFF file...\n"; 91 | $gffFile->read(); 92 | 93 | # Read FASTA file 94 | print "Reading FASTA file...\n"; 95 | my %fastaSeq; 96 | my @fastaSeqOrder; 97 | my $seq_in = Bio::SeqIO->new( 98 | '-file' => $genome_filename, 99 | '-format' => "fasta" 100 | ); 101 | 102 | while ( my $inseq = $seq_in->next_seq ) { 103 | $fastaSeq{ $inseq->id() } = $inseq->seq(); 104 | push( @fastaSeqOrder, $inseq->id() ); 105 | } 106 | 107 | $seq_in->close(); 108 | 109 | my $gffGenes = $gffFile->get_genes_hash(); 110 | 111 | # Ordering genes based on template name and start coord 112 | my @gffGenesArray = values %{$gffGenes}; 113 | GFFUtils::sort_gene_arrays( \@gffGenesArray, 0 ); 114 | 115 | print "Number of genes in the GFF file: " . scalar(@gffGenesArray) . "\n"; 116 | 117 | open OUT_TRAN, ">$out_trans" or die "Unable to open file $out_trans to write\n"; 118 | open OUT_CDS, ">$out_cds" or die "Unable to open file $out_cds to write\n"; 119 | open OUT_PEP, ">$out_pep" or die "Unable to open file $out_pep to write\n"; 120 | 121 | my $cont_trans = 0; 122 | my $cont_cds = 0; 123 | 124 | for my $currGene (@gffGenesArray) { 125 | my $gene_id = $currGene->get_id(); 126 | my $chrom = $currGene->get_chrom(); 127 | my $strand = $currGene->get_strand(); 128 | 129 | die "Unable to find $chrom in the FASTA file $genome_filename" 130 | if not defined $fastaSeq{$chrom}; 131 | 132 | my $fastaSeqStr = $fastaSeq{$chrom}; 133 | 134 | my $gffTranscripts = $currGene->get_transcripts_hash(); 135 | for my $currTranscript ( values %{$gffTranscripts} ) { 136 | my $transcript_id = $currTranscript->get_id(); 137 | $transcript_id = 138 | $currTranscript->get_id() . " " . $currTranscript->get_name() if ( $add_gene_name_to_header == 1 ); 139 | 140 | my $transcript_seq = ""; 141 | for my $currExon ( @{ $currTranscript->get_exon_array() } ) { 142 | $transcript_seq .= substr( 143 | $fastaSeqStr, 144 | $currExon->get_start() - 1, 145 | $currExon->get_end() - $currExon->get_start() + 1 146 | ); 147 | } 148 | 149 | if ( defined $transcript_buffer && $transcript_buffer >= 1 ) { 150 | my $curr_start = $currTranscript->get_start(); 151 | my $curr_end = $currTranscript->get_end(); 152 | 153 | # Setting effective start = start - buffer. 154 | # Taking care of genes near beginning of sequence 155 | my $effective_start = $curr_start - $transcript_buffer; 156 | $effective_start = 1 if $effective_start < 1; 157 | 158 | # Setting effective end = end - buffer. 159 | # Taking care of genes near beginning of sequence 160 | my $effective_end = $curr_end + $transcript_buffer; 161 | $effective_end = length($fastaSeqStr) 162 | if $effective_end > length($fastaSeqStr); 163 | 164 | # Adding buffer sequence to original transcript sequence 165 | $transcript_seq = 166 | substr( $fastaSeqStr, $effective_start, $transcript_buffer - 1 ) 167 | . $transcript_seq 168 | . substr( $fastaSeqStr, $curr_end, $effective_end - $curr_end ); 169 | 170 | } 171 | 172 | my $cds_seq = ""; 173 | for my $currCDS ( @{ $currTranscript->get_CDS_array() } ) { 174 | $cds_seq .= substr( 175 | $fastaSeqStr, 176 | $currCDS->get_start() - 1, 177 | $currCDS->get_end() - $currCDS->get_start() + 1 178 | ); 179 | } 180 | 181 | if ( $strand eq "-" ) { 182 | $transcript_seq = reverse uc($transcript_seq); 183 | $transcript_seq =~ tr/AGCT/TCGA/; 184 | 185 | $cds_seq = reverse uc($cds_seq); 186 | $cds_seq =~ tr/AGCT/TCGA/; 187 | } 188 | 189 | # Tranlating CDS 190 | my $cdsSeqObj = Bio::Seq->new( 191 | -seq => $cds_seq, 192 | -display_id => "temp", 193 | -alphabet => "dna" 194 | ); 195 | 196 | my $codon_table; 197 | 198 | if ( $single_codon_table != -1 ) { 199 | $codon_table = $single_codon_table; 200 | } 201 | else { 202 | $codon_table = $seq_codon_table{$chrom}; 203 | die 204 | "Error. Using file to define the codon table of each chromosome. " 205 | . "But codon table for chromosome \'$chrom\' not found\n" 206 | if not defined $codon_table; 207 | } 208 | 209 | my $pepObj = $cdsSeqObj->translate( -codontable_id => $codon_table ); 210 | my $pep_seq = $pepObj->seq(); 211 | 212 | # Writing to files 213 | print OUT_TRAN ">$transcript_id\n$transcript_seq\n"; 214 | $cont_trans++; 215 | 216 | if ( scalar @{ $currTranscript->get_CDS_array() } != 0 ) { 217 | print OUT_CDS ">$transcript_id\n$cds_seq\n"; 218 | print OUT_PEP ">$transcript_id\n$pep_seq\n"; 219 | $cont_cds++; 220 | } 221 | 222 | } 223 | } 224 | close(OUT_TRAN); 225 | close(OUT_CDS); 226 | close(OUT_PEP); 227 | 228 | print "Number of transcripts sequences: $cont_trans\n"; 229 | print "Number of protein/CDS sequences: $cont_cds\n"; 230 | -------------------------------------------------------------------------------- /UTILS.dir/gff2graph-from-scratch.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | 4 | use FindBin; 5 | use lib "$FindBin::Bin/GFFLib"; 6 | 7 | use GD; 8 | use Bio::SeqIO; 9 | use Bio::Graphics; 10 | use Bio::SeqFeature::Generic; 11 | use Getopt::Std; 12 | use GFFFile; 13 | 14 | my $MAX_E_VALUE = 1000; 15 | my $DEFAULT_WIDTH = 5000; 16 | 17 | my $DEFAULT_COLOR = "blue"; 18 | my $LEFT_BORDER = 0; 19 | 20 | 21 | my $gff_file = $ARGV[0]; 22 | my $fasta_file = $ARGV[1]; 23 | my $outputFile = $ARGV[2]; 24 | my $Blast_matches = $ARGV[3]; 25 | my $Phage_coordinates = $ARGV[4]; 26 | 27 | ### Gerar a figura inicial e a Linha baseada no Fasta: 28 | 29 | my $width = $DEFAULT_WIDTH; ## Sets the width of figure 30 | my $overallMaxLength = 0; ## Stores the length of the largest scaffold. This will be used to define the ratio bp/pixel 31 | my %fasta_seq; ## Stores the length of each sequence. Not being used currently, but it will be used in the next versions of ProphET 32 | my @fasta_seqnames; 33 | 34 | ###################### 35 | # Opening FASTA file and Processing 36 | 37 | my $inSeqIO = Bio::SeqIO->new( -file => $fasta_file, '-format' => 'Fasta' ); 38 | while ( my $inSeq = $inSeqIO->next_seq() ) { 39 | if ( !defined( $inSeq->seq() ) || length( $inSeq->seq() ) == 0 ) { 40 | print STDERR $inSeq->id() 41 | . " have an empty string as sequence. Sequence discarded\n"; 42 | } 43 | else { 44 | my $seq_length = $inSeq->length(); 45 | my $seq_name = $inSeq->id(); 46 | 47 | push( @fasta_seqnames, $seq_name ); ## Array with the names of the scaffolds 48 | $fasta_seq{$seq_name}{len} = $seq_length; 49 | 50 | $overallMaxLength = $seq_length if ( $overallMaxLength < $seq_length ); 51 | } 52 | } 53 | $inSeqIO->close(); 54 | 55 | print STDERR "Largest sequence in the FASTA files has $overallMaxLength\n"; 56 | 57 | 58 | ###################### 59 | # Generate the base panel in which all the widgets will be rendered 60 | my $panel = Bio::Graphics::Panel->new( 61 | -length => $overallMaxLength, ## Sets the lenght in bp to the length of the largest scaffold 62 | -key_style => 'between', 63 | -image_class => 'GD::SVG', 64 | -width => $width, ## sets the length in pixels 65 | -pad_left => 40, 66 | -pad_right => 200, 67 | -pad_top => 40, 68 | -pad_bottom => 40, 69 | ); 70 | 71 | ###################### 72 | # Adds the X-axis arrow indicating the coordinates on the scaffold 73 | my $full_length = Bio::SeqFeature::Generic->new(-start=>1,-end=> $overallMaxLength); 74 | $panel->add_track($full_length, 75 | -glyph => 'arrow', 76 | -tick => 2, 77 | -fgcolor => 'black', 78 | -double => 4, 79 | -key => "Genome_sequence", 80 | -height => 15, 81 | ); 82 | 83 | ###################### 84 | # Adds the track that will contain all the CDSs in the scaffold 85 | 86 | my $track = $panel->add_track(-glyph => 'graded_segments', 87 | -label => 0, 88 | -bgcolor => 'blue', 89 | -key => "GFF CDSs:", 90 | -fgcolor => 'black', 91 | -bump => +1, 92 | -height => 8, 93 | 94 | ); 95 | open (INPUT1,"$gff_file"); 96 | 97 | while () { # reads GFF file 98 | chomp; 99 | next if /^\#/; # ignore comments 100 | my($name,$start,$end) = split /\t+/; 101 | my $feature = Bio::SeqFeature::Generic->new(-display_name=> $name, 102 | -start => $start, 103 | -end => $end, 104 | ); 105 | $track->add_feature($feature); 106 | } 107 | close (INPUT1); 108 | 109 | 110 | 111 | 112 | 113 | ###################### 114 | # Add track that will contain the BLAST results against ProphET database 115 | my $track = $panel->add_track(-glyph => 'graded_segments', 116 | -label => 0, 117 | -bgcolor => 'green', 118 | -fgcolor => 'black', 119 | -key => "Blast Matches:", 120 | -fgcolor => 'black', 121 | -font2color => 'red', 122 | -bump => +1, 123 | -height => 8, 124 | 125 | ); 126 | open (INPUT2,"$Blast_matches"); 127 | 128 | while () { # reads blast results file 129 | chomp; 130 | next if /^\#/; # ignore comments 131 | my($name,$start,$end) = split /\t+/; 132 | my $feature = Bio::SeqFeature::Generic->new(-display_name=> $name, 133 | -start => $start, 134 | -end => $end, 135 | ); 136 | $track->add_feature($feature); 137 | } 138 | close (INPUT2); 139 | 140 | ###################### 141 | # Add track that will contain the boundaries of the prophage predictions 142 | my $track = $panel->add_track(-glyph => 'graded_segments', 143 | -label => 0, 144 | -bgcolor => 'cyan', 145 | -key => "Predicted Phages:", 146 | -fgcolor => 'black', 147 | -bump => +10, 148 | -height => 8, 149 | 150 | ); 151 | open (INPUT3,"$Phage_coordinates"); 152 | 153 | while () { # reads prophage coordinates 154 | chomp; 155 | next if /^\#/; # ignore comments 156 | my($name,$start,$end) = split /\t+/; 157 | my $feature = Bio::SeqFeature::Generic->new(-display_name=> $name, 158 | -start => $start, 159 | -end => $end, 160 | ); 161 | $track->add_feature($feature); 162 | } 163 | close (INPUT3); 164 | 165 | open OUTPUT, ">" . $outputFile; 166 | print OUTPUT $panel->svg; 167 | close OUTPUT; 168 | 169 | 170 | 171 | -------------------------------------------------------------------------------- /UTILS.dir/line2fasta: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | 4 | use Bio::Seq; 5 | use Bio::SeqIO; 6 | use IO::Handle; 7 | 8 | my $usage = "\n\nv0.2\nline2fasta []\n\n" . 9 | "Translate back files created with the fasta2line script\n"; 10 | 11 | die $usage if( scalar( @ARGV ) > 1 ); 12 | 13 | my $inputFile = $ARGV[0]; 14 | 15 | 16 | if( defined( $inputFile ) ){ 17 | 18 | open INFILE, $inputFile; 19 | 20 | while( ){ 21 | chomp; 22 | my ( $seq, $id ) = ( $_ =~ /([\w\W]+?)[\s\t]+([\w\W]+)/ ); 23 | $seq =~ s/(\w{60})/$1\n/g; 24 | print ">$id\n$seq\n"; 25 | } 26 | 27 | close INFILE; 28 | 29 | } else { 30 | 31 | while( <> ){ 32 | chomp; 33 | my ( $seq, $id ) = ( $_ =~ /([\w\W]+?)[\s\t]+([\w\W]+)/ ); 34 | $seq =~ s/(\w{60})/$1\n/g; 35 | print ">$id\n$seq\n"; 36 | } 37 | } 38 | 39 | -------------------------------------------------------------------------------- /UTILS.dir/obtain_prot_with_annot_seq.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | # Retrieves and concatenate the protein sequences retrieved from Genbank (NCBI) 5 | # 6 | # This script seems to be unecessary after the latest modifications to 7 | # retrieve_proteins.sh. I explain... 8 | # 9 | # Earlier retrieve_proteins.sh produced the proteome file of each family. 10 | # But now, for the sake of improving speed, one single file with the proteome 11 | # of all families is being generated 12 | 13 | my $input = $ARGV[0]; # File containing the name of bacteriophage families 14 | open (INPUT, "$input"); 15 | my @N_seq = ; 16 | close(INPUT); 17 | 18 | 19 | my @proteinseq = (); 20 | for (my $j=0; $j<=$#N_seq; $j++) { 21 | my @temp1 = split (/\t/, $N_seq[$j]); 22 | my $name = $temp1[0]; 23 | chdir "$name.dir"; 24 | my @temp_seq = `cat all.prot.fas`; 25 | push (@proteinseq, @temp_seq); 26 | chdir "../"; 27 | } 28 | 29 | print @proteinseq; 30 | -------------------------------------------------------------------------------- /UTILS.dir/retrieve_proteins.SLOW.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | family=$1 4 | txid=$2 5 | 6 | 7 | ################################### 8 | ## Retrieving sequence from Genbank 9 | echo "Retrieving representatives of virus family $family, TaxID $txid ..." 10 | 11 | ../../UTILS.dir/fetch_genomes_based_on_taxid.pl $txid 12 | 13 | # Split genbank file 14 | ../../UTILS.dir/split_genbank.pl $txid.gb 15 | 16 | # Number of genbank files 17 | num_gb_files=`ls -l *.gbk | wc | awk '{print $1}'` 18 | echo "Number of sequences retrieved from genbank: $num_gb_files" 19 | 20 | ## List organisms and definition associated to each genbank file 21 | if [ -f gb_description.txt ] 22 | then 23 | rm gb_description.txt 24 | fi 25 | 26 | 27 | echo "Generating a summary of each genome..." 28 | for gb_seq in `ls -l *.gbk | awk '{print $9}'` 29 | do org=`awk '$1 == "ORGANISM" {print $0}' $gb_seq | sed 's/ORGANISM//' | sed 's/^ \+//' | sed 's/ /_/g'` 30 | def=`awk '$1 == "DEFINITION" {print $0}' $gb_seq | sed 's/DEFINITION//' | sed 's/^ \+//'` 31 | segment=`perl -lane 'print $seg if ( ($seg) = ( $_ =~ /\/segment=\"([\w\W]+?)\"/ ) )' $gb_seq` 32 | chromosome=`perl -lane 'print $seg if ( ($seg) = ( $_ =~ /\/chromosome=\"([\w\W]+?)\"/ ) )' $gb_seq` 33 | 34 | if [[ ("$segment" == "") && ("$chromosome" != "") ]]; then segment=$chromosome; fi 35 | if [[ ("$chromosome" == "") && ("$segment" != "") ]]; then segment=$segment; fi 36 | if [[ ("$segment" == "") && ("$chromosome" == "") ]]; then segment=ND; fi 37 | 38 | segment_print=`echo $segment | sed 's/[sS]egment //' | sed 's/ [sS]egment//'` 39 | gb=`echo $gb_seq | sed 's/\.gbk//'` 40 | 41 | echo -e "$gb\t$org\t$segment_print\t$def" >> gb_description.txt 42 | done 43 | 44 | # Number of species 45 | num_species=`awk '{print $2}' gb_description.txt | sort | uniq | wc | awk '{print $1}'` 46 | echo "Number of species: $num_species" 47 | 48 | 49 | ########################################################## 50 | # Extract mat_peptide (segments of a polyprotein) and CDS 51 | 52 | echo "Extracting segments of polyproteins and coding sequences..." 53 | for i in `ls -l *.gbk | awk '{print $9}'` 54 | do filename_wo_gb=`echo $i | sed 's/\.gbk//g'` 55 | echo "Working on $i ..." 56 | # Extract sense 57 | extractfeat $i -sense 1 -type 'mat_peptide|CDS' -describe 'product|protein_id|locus_tag' -outseq $filename_wo_gb.sense -auto 58 | 59 | perl -pe 's/(>[\W\w]+?) /\1 + /g' $filename_wo_gb.sense > $filename_wo_gb.fasta 60 | 61 | # Extract anti-sense 62 | extractfeat $i -sense -1 -type 'mat_peptide|CDS' -describe 'product|protein_id|locus_tag' -outseq $filename_wo_gb.antisense -auto 63 | 64 | perl -pe 's/(>[\W\w]+?) /\1 - /g' $filename_wo_gb.antisense >> $filename_wo_gb.fasta 65 | 66 | done 67 | 68 | ################################################# 69 | # Extract and report all features in the files 70 | echo "Extracting all other protein coding features ..." 71 | for i in `ls -l *.gbk | awk '{print $9}'` 72 | do filename_wo_gb=`echo $i | sed 's/\.gbk//g'` 73 | echo "Working on $i ..." 74 | # Extract sense 75 | extractfeat $i -describe 'product|protein_id|locus_tag' -outseq $filename_wo_gb.all_features -auto 76 | done 77 | grep ">" *.all_features | sed 's/>/ /' | awk '{print $2,$3}' > all_features 78 | awk '{print $2}' all_features | sort | uniq > all_feature_types 79 | 80 | 81 | ################################################# 82 | # Retrieving the product of each CDS or mat_peptide 83 | echo "Retrieving the featured product for each CDS or mat_peptide ..." 84 | 85 | if [ -f feature_product.txt ] 86 | then 87 | rm feature_product.txt 88 | fi 89 | 90 | for i in *.fasta 91 | do grep ">" $i | perl -lane '$line = $_; ($feature) = ( $line =~ /^>([\w\W]+?)\s/ ); ($product) = ( $line =~ /product=\"(.+?)\"/ ); print "$feature\t$product"' >> feature_product.txt 92 | done 93 | 94 | 95 | ############################################## 96 | # Translate genes 97 | echo "Translating genes ..." 98 | for i in *.fasta; do transeq $i $i.prot -auto; done 99 | 100 | # Combine all proteins 101 | echo "Gathering all protein sequences in a single file ..." 102 | cat *.prot > all.prot.fas 103 | 104 | # Remove "*" STOP codons from each sequence 105 | echo "Removing * representing STOP codons ..." 106 | ../../UTILS.dir/fasta2line all.prot.fas | perl -lane '$seq = $F[0]; $seq =~ s/\*$//; print $seq . "\t" . $F[1]' | ../../UTILS.dir/line2fasta > tmp 107 | mv tmp all.prot.fas 108 | 109 | 110 | # Number of CDS and mat_peptides 111 | num_CDS_mat=`grep -c ">" all.prot.fas` 112 | echo "Number of features: $num_CDS_mat" 113 | 114 | -------------------------------------------------------------------------------- /UTILS.dir/retrieve_proteins.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | family=$1 4 | txid=$2 5 | 6 | 7 | ################################### 8 | ## Retrieving sequence from Genbank 9 | echo "Retrieving representatives of virus family $family, TaxID $txid ..." 10 | 11 | ../../UTILS.dir/fetch_genomes_based_on_taxid.pl $txid 12 | 13 | 14 | ########################################################## 15 | # Extract mat_peptide (segments of a polyprotein) and CDS 16 | 17 | echo "Extracting segments of polyproteins and coding sequences..." 18 | extractfeat $txid.gb -sense 1 -type 'mat_peptide|CDS' -describe 'product|protein_id|locus_tag' -outseq $txid.sense -auto 19 | perl -pe 's/(>[\W\w]+?) /\1 + /g' $txid.sense > $txid.fasta 20 | 21 | # Extract anti-sense 22 | extractfeat $txid.gb -sense -1 -type 'mat_peptide|CDS' -describe 'product|protein_id|locus_tag' -outseq $txid.antisense -auto 23 | perl -pe 's/(>[\W\w]+?) /\1 - /g' $txid.antisense >> $txid.fasta 24 | 25 | 26 | ################################################# 27 | # Extract and report all features in the files 28 | echo "Extracting all other protein coding features ..." 29 | extractfeat $txid.gb -describe 'product|protein_id|locus_tag' -outseq $txid.all_features -auto 30 | grep ">" $txid.all_features | sed 's/>/ /' | awk '{print $2,$3}' > all_features 31 | awk '{print $2}' all_features | sort | uniq > all_feature_types 32 | 33 | 34 | ################################################# 35 | # Retrieving the product of each CDS or mat_peptide 36 | echo "Retrieving the featured product for each CDS or mat_peptide ..." 37 | 38 | if [ -f feature_product.txt ] 39 | then 40 | rm feature_product.txt 41 | fi 42 | 43 | grep ">" $txid.fasta | perl -lane '$line = $_; ($feature) = ( $line =~ /^>([\w\W]+?)\s/ ); ($product) = ( $line =~ /product=\"(.+?)\"/ ); print "$feature\t$product"' >> feature_product.txt 44 | 45 | 46 | ############################################## 47 | # Translate genes 48 | echo "Translating genes ..." 49 | transeq $txid.fasta $txid.prot -auto 50 | 51 | # Remove "*" STOP codons from each sequence 52 | echo "Removing * representing STOP codons ..." 53 | ../../UTILS.dir/fasta2line $txid.prot | perl -lane '$seq = $F[0]; $seq =~ s/\*$//; print $seq . "\t" . join " ", @F[1..$#F]' | ../../UTILS.dir/line2fasta > all.prot.fas 54 | 55 | # Number of CDS and mat_peptides 56 | num_CDS_mat=`grep -c ">" all.prot.fas` 57 | echo "Number of coding features under TaxID $txid : $num_CDS_mat" 58 | echo "Number of coding features under TaxID $txid : $num_CDS_mat" >> $txid.prot.log 59 | 60 | -------------------------------------------------------------------------------- /UTILS.dir/script_remover_vazios.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | $/ = "\n>"; 7 | while (<>) { 8 | s/>//g; 9 | my ( $id, $seq ) = split( /\n/, $_ ); 10 | print ">$_" if ( length $seq ); 11 | } 12 | -------------------------------------------------------------------------------- /UTILS.dir/split_genbank.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | 5 | my $usage = "\n\nsplit_genbank.pl []\n\n"; 6 | die $usage if scalar( @ARGV ) != 1 && scalar( @ARGV ) != 2; 7 | 8 | open ARQ, $ARGV[0]; 9 | 10 | my $suffix = ".gbk"; 11 | $suffix = $ARGV[1] if $#ARGV == 1; 12 | 13 | my @todoArq = ; 14 | my $todoString = join '', @todoArq; 15 | 16 | my @record = split 'LOCUS', $todoString; 17 | shift @record; 18 | 19 | foreach my $currRecord ( @record ){ 20 | my ($fileName) = ( $currRecord =~ /\s+(\w+)/ ); 21 | open OUT_ARQ, ">" . $fileName . $suffix; 22 | print OUT_ARQ "LOCUS" . $currRecord; 23 | close OUT_ARQ; 24 | } 25 | 26 | close ARQ; 27 | -------------------------------------------------------------------------------- /UTILS.dir/splitfasta.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | 5 | use Bio::Seq; 6 | use Bio::SeqIO; 7 | use IO::Handle; 8 | 9 | use Pod::Usage; 10 | use Getopt::Long; 11 | use FindBin; 12 | 13 | =head1 NAME 14 | 15 | Splits the fasta sequences in term number of sequences or number of files. 16 | 17 | =head1 SYNOPSIS 18 | 19 | usage: splitfasta.pl --fasta_in [ --num_seq_per_file | --num_files ] [--out_dir] --pre 20 | 21 | 22 | =head1 OPTIONS 23 | 24 | B<--fasta_in> - FASTA input 25 | 26 | B<--num_seq> - Number of sequences in each file B<(Optional)> 27 | 28 | B<--num_files> - Number of file to be divided the original file B<(Optional)> 29 | 30 | B<--pre> - prefix of the output files 31 | 32 | B<--out_dir> - output directory B<(Optional)> 33 | 34 | B<--help> - print this message B<(Optional)> 35 | 36 | =head1 DESCRIPTION 37 | 38 | $num_seq_per_file_per_file 39 | =head1 CONTACT 40 | Gustavo C. Cerqueira (2015) 41 | cerca11@gmail.com 42 | gustavo@broadinstitute.org 43 | =cut 44 | 45 | 46 | my $fasta_in; 47 | my $num_seq_per_file; 48 | my $num_files; 49 | my $pre; 50 | my $help; 51 | my $output_dir; 52 | 53 | 54 | GetOptions( 55 | 'fasta_in=s' => \$fasta_in, 56 | 'num_seq_per_file=s' => \$num_seq_per_file, 57 | 'num_files=s' => \$num_files, 58 | 'out_dir=s' => \$output_dir, 59 | 'pre=s' => \$pre, 60 | 'help!' => \$help 61 | ); 62 | 63 | if ( defined($help) ) { 64 | pod2usage( -verbose => 2, -exitval => 0 ); 65 | } 66 | 67 | 68 | if ( not defined($fasta_in) ) { 69 | pod2usage( 70 | -message => "Error: Parameter --fasta_in is required !!!!\n\n", 71 | -verbose => 1, 72 | -exitval => 1, 73 | -output => \*STDERR 74 | ); 75 | } 76 | 77 | if ( not defined($pre) ) { 78 | pod2usage( 79 | -message => "Error: Parameter --pre is required !!!!\n\n", 80 | -verbose => 1, 81 | -exitval => 1, 82 | -output => \*STDERR 83 | ); 84 | } 85 | 86 | if ( defined($num_seq_per_file) && defined($num_files)) { 87 | pod2usage( 88 | -message => "Error: Either use the parameter --num_seq or --num_files!!!!\n\n", 89 | -verbose => 1, 90 | -exitval => 1, 91 | -output => \*STDERR 92 | ); 93 | } 94 | 95 | if ( not defined($output_dir) ) { 96 | $output_dir = '.'; 97 | } 98 | 99 | 100 | if ( not defined($num_seq_per_file) && not defined($num_files)) { 101 | $num_seq_per_file = 1; 102 | } 103 | 104 | 105 | # Counting the number of sequences in the FASTA file 106 | my $num_seq_in_orig_file = 0; 107 | my $inSeqIO = Bio::SeqIO->new(-file => $fasta_in, '-format' => 'Fasta'); 108 | while ( my $inSeq = $inSeqIO->next_seq() ){ 109 | $num_seq_in_orig_file++ if( defined( $inSeq->seq() ) ); 110 | } 111 | $inSeqIO->close(); 112 | 113 | # Calculate the number of sequences per each file 114 | # given the number of files as result of split and the number of sequences 115 | # in the original file 116 | if( defined ( $num_files ) ){ 117 | $num_seq_per_file = int( $num_seq_in_orig_file / $num_files ); 118 | $num_seq_per_file = 1 if $num_seq_per_file == 0; 119 | } 120 | 121 | 122 | # Generating files 123 | my $count_seqs = 1; 124 | my $count_files = 1; 125 | my $seqs_written = 0; # Number of seqs. already read from the original final and written in one of the segmented files 126 | 127 | my $out_file = "$output_dir/$pre.$count_files.fas"; 128 | 129 | mkdir($output_dir) unless(-d $output_dir); 130 | 131 | open OUT, ">$out_file"; 132 | 133 | $inSeqIO = Bio::SeqIO->new(-file => $fasta_in, '-format' => 'Fasta'); 134 | while ( my $inSeq = $inSeqIO->next_seq() ){ 135 | 136 | if( !defined( $inSeq->seq() ) ){ 137 | print STDERR $inSeq->id() . " have an empty string as sequence. Sequence discarded\n"; 138 | } else { 139 | print OUT ">" . $inSeq->id() . "\n" . $inSeq->seq() . "\n"; 140 | $count_seqs++; 141 | $seqs_written++; 142 | } 143 | 144 | 145 | # if max number of seqs. per file was reached 146 | if( $count_seqs >= $num_seq_per_file ){ 147 | 148 | 149 | # ... if the number of files was defined, meaning that the number of sequences is the directive of this script 150 | # close the current file and open another one 151 | if( not defined($num_files) ){ 152 | close(OUT); 153 | 154 | last if ( $seqs_written == $num_seq_in_orig_file ); 155 | 156 | $count_files++; 157 | my $out_file = "$output_dir/$pre.$count_files.fas"; 158 | open OUT, ">$out_file"; 159 | $count_seqs = 1; 160 | }else{ 161 | 162 | # ... if the number of seq per file was reached but this is not the last file 163 | # close the current file and open another 164 | if( $count_files != $num_files ){ 165 | close(OUT); 166 | 167 | last if ( $seqs_written == $num_seq_in_orig_file ); 168 | 169 | $count_files++; 170 | my $out_file = "$output_dir/$pre.$count_files.fas"; 171 | open OUT, ">$out_file"; 172 | $count_seqs = 1; 173 | 174 | } 175 | } 176 | } 177 | } 178 | close(OUT); 179 | $inSeqIO->close(); 180 | 181 | print "Number of files created: $count_files\n"; 182 | -------------------------------------------------------------------------------- /UTILS.dir/union.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use Pod::Usage; 5 | use Getopt::Long; 6 | 7 | =head1 NAME 8 | 9 | union.pl 10 | 11 | =head1 SYNOPSIS 12 | 13 | USAGE: union.pl --in --seg_name --seg_start --seg_end 14 | 15 | =head1 DESCRIPTION 16 | 17 | Segment operations - UNION 18 | Generate the union of all segments described in a file in the following format 19 | 20 | Input: 21 | seg1 4 5 22 | seg1 6 10 23 | seg1 7 12 24 | seg2 1 2 25 | seg1 1 2 26 | seg2 3 2 27 | 28 | Output: 29 | seg1 4 12 30 | seg2 1 3 31 | seg1 1 2 32 | 33 | For testing purposes use as input file: 34 | /seq/aspergillus1/gustavo/devel/tab_file/file_for_tests/union.pl.txt 35 | 36 | =head1 AUTHOR 37 | Gustavo C. Cerqueira 2013 38 | 39 | =cut 40 | 41 | 42 | my $in; 43 | my $segment_col; 44 | my $start_col; 45 | my $end_col; 46 | 47 | GetOptions( 'in=s' => \$in, 48 | 'seg_name=i' => \$segment_col, 49 | 'seg_start=i' => \$start_col, 50 | 'seg_end=i' => \$end_col ); 51 | 52 | 53 | 54 | if( not defined($in) ){ 55 | print STDERR "!!!! Parameter --in is required !!!!\n\n"; 56 | pod2usage(-verbose => 1 ,-exitval => 2); 57 | } 58 | 59 | if( not defined($segment_col) ){ 60 | print STDERR "!!!! Parameter --seg_name is required !!!!\n\n"; 61 | pod2usage(-verbose => 1 ,-exitval => 2); 62 | } 63 | 64 | if( not defined($start_col) ){ 65 | print STDERR "!!!! Parameter --seg_start is required !!!!\n\n"; 66 | pod2usage(-verbose => 1 ,-exitval => 2); 67 | } 68 | 69 | if( not defined($end_col) ){ 70 | print STDERR "!!!! Parameter --seg_end is required !!!!\n\n"; 71 | pod2usage(-verbose => 1 ,-exitval => 2); 72 | } 73 | 74 | 75 | open IN, $in or die "Unable to open file $in\n"; 76 | 77 | my %segVec; 78 | my %sortedSegVec; 79 | 80 | while(){ 81 | my $line = $_; 82 | chomp( $line ); 83 | my @cols = split "\t", $line; 84 | 85 | my $seg_name = $cols[ $segment_col - 1 ]; 86 | my $start = $cols[ $start_col - 1 ]; 87 | my $end = $cols[ $end_col - 1 ]; 88 | 89 | #print " $seg_name $start $end \n"; 90 | 91 | if( $end > $start ){ 92 | push @{$segVec{$seg_name}}, { type => "start", coord => $start }; 93 | push @{$segVec{$seg_name}}, { type => "end", coord => $end }; 94 | }else{ 95 | push @{$segVec{$seg_name}}, { type => "start", coord => $end }; 96 | push @{$segVec{$seg_name}}, { type => "end", coord => $start }; 97 | } 98 | 99 | } 100 | 101 | close(IN); 102 | 103 | #print $segVec{seg1}[0]{type} . "\n"; 104 | #print $segVec{seg1}[0]{coord} . "\n"; 105 | 106 | foreach my $seg_name (keys %segVec) { 107 | @{$sortedSegVec{$seg_name}} = sort {$a->{coord} <=> $b->{coord}} @{$segVec{$seg_name}}; 108 | 109 | my $union_start = -1; 110 | my $collapsed_ranges = 0; 111 | 112 | foreach my $item ( @{$sortedSegVec{$seg_name}} ){ 113 | my $type = $item->{type}; 114 | my $coord = $item->{coord}; 115 | 116 | if ( $type eq "start" ){ 117 | #print "start: $coord\n"; 118 | $union_start = $coord if ( $collapsed_ranges == 0 ); 119 | $collapsed_ranges++; 120 | }else{ 121 | #print "end: $coord\n"; 122 | $collapsed_ranges--; 123 | my $union_end = $coord; 124 | print "$seg_name\t$union_start\t$union_end\n" if ( $collapsed_ranges == 0 ); 125 | } 126 | } 127 | 128 | } 129 | 130 | #getc(); 131 | #print $sortedSegVec{seg1}[0]{type} . "\n"; 132 | #print $sortedSegVec{seg1}[0]{coord} . "\n"; 133 | 134 | 135 | # Evaluating union 136 | 137 | 138 | 139 | 140 | 141 | -------------------------------------------------------------------------------- /config.dir/ABC_transporters_to_grep.txt: -------------------------------------------------------------------------------- 1 | ABC transporter 2 | ABC transport-like 3 | ABC binding casette transporter 4 | ABC-type phosphate transport system 5 | -------------------------------------------------------------------------------- /config.dir/Prophages_names_sem_Claviviridae_Guttaviridae-TxID: -------------------------------------------------------------------------------- 1 | Myoviridae 10662 2 | Siphoviridae 10699 3 | Podoviridae 10744 4 | Lipothrixviridae 10477 5 | Rudiviridae 157897 6 | Ampullaviridae 573053 7 | Bicaudaviridae 423358 8 | Corticoviridae 10659 9 | Cystoviridae 10877 10 | Fuselloviridae 10474 11 | Globuloviridae 292638 12 | Inoviridae 10860 13 | Leviviridae 11989 14 | Microviridae 10841 15 | Plasmaviridae 10472 16 | Tectiviridae 10656 17 | -------------------------------------------------------------------------------- /config.dir/Prophages_names_sem_Claviviridae_Guttaviridae-TxID.SMOKE_TEST: -------------------------------------------------------------------------------- 1 | Plasmaviridae 10472 2 | Tectiviridae 10656 3 | -------------------------------------------------------------------------------- /expected_output_of_test/NC_005362.1.phage_1.fas: -------------------------------------------------------------------------------- 1 | >phage_1:327710-367791:NC_005362.1 2 | CGGGATGTGGCTCAGCTTGGTAGAGCGCTACGTTCGGGACGTAGAAGTCGCAGGTTCAAA 3 | TCCTGTACTCTCCTTATTTGATTAAAAGAATGTTGATATAGCAACGTTTAGAACACTAAA 4 | AATACTAGAATGGTAACATTTTGGTAACATAAAGAAAAAATGCATCCGGAAGGGTGCATT 5 | TTCTTTATATATTTAATTGTTTTAATGCATTAACTGCATCTTCTTTATTTTTATTATTTA 6 | CATGCGTATAAATATCTAAAGTCATTTGGATATTTTCATGACCAAGCAGCATTTGAACAG 7 | TTTTAGGTTTTACATTAGTTTCCGCAATAAGTAGTGTTGCAAAAGTATGTCTAAAGCCAT 8 | GAACTGTTATATGACGTAAATCAGGTTGTGGTTCTTTTAAGTTGTGTGTTTTTGCATACT 9 | TAACTTTTTCCTCATGATCTTTAGCATAAATTGATTTAAGCCATTGATCTGGTTTGCTCA 10 | TTTTTAAATACGTTCCTTTAATTGTGTGAAAGAGCTTGTTAGAGATGATTTTTTGCTTTT 11 | GCTTATAATCTATCAAAACTTCTTTTAAACTGTCCGAGATGGGCAAAATACGCTTAGATT 12 | TAGGTGATTTAGGTGGCTGGATAATTGTTTTATTGTCTAAACCATATGCCAAGGTTTTAT 13 | TTATGGATAAAGTACCTGCTTTTAAATCAATATCTTGCCAAGTCAAAGCTAATGCTTCGG 14 | ATTTTCTAAGACCGGTAGCAGAAAGGAGCTTGAAGTAAGTATATTGAACTAATCCATACT 15 | CTTTAGCCACTTCAAGAAATTGATTTAGTTCCTCTCGTGTATAAACATTATGTTCGGTAT 16 | CTCTACGTGGTCGAGATGTCTTTTTAGGGATAATGATCCGCTTTAAAGGATTTTCAGACA 17 | CATAGTTTAACCGAATACCATATTCAAAAAGAGCATTAAATTCATTAATAGCATCCCTAT 18 | ATTTAACAATTTGTTTTGCTTTCCTATCAGCCCACTTTTGTAATTCAGCTACTGTAATCT 19 | TGTCAACGTATTGGTTACCAAAATCCGGAATAACATGGTTATCAAAGACCTGCTTATTTT 20 | TATTCGCAGTAGATTCTTTTACCTGAGTTTTATAGTTTTCAAACCATAGATCACGTAATT 21 | CAGAAACTTTAATCTGTTTTTGTTTTACAAAATTATCTGGTTTAGTTTGAGATAATTTGT 22 | TGAATACTGCTTCTGCTTCAGCATAGGTTTTAAAACCACGCCGATGAACTTGAACTCGGT 23 | CACCATTGCTGTTTTGACCCAGTGAAATAACAAATTCATATCGTTTCTTTCCAGATTTTA 24 | ATTTATAATCTTTGATACTGGTGTTTTTTCTTTTTGGCATAATAAAAAACCTCCTATTGT 25 | AGTCTAAATGTGAACTCTGCTACAATAATAGGGCAGAGTTCATATTTTTATGGCTTTGTT 26 | AATCGAATTTATGTTGAGCTATTGACGTAGCTCATTTCTTCCCACTGATGTTGGCGCATC 27 | GGTGGGATTTTTTATTTACTTAGATAAATTATCTACAGCATATTGTGCTTCTTCTGGAGT 28 | AAATTTGTCCCCAGAATTAGAAGTTAATTGATCTTTAATTTCTGCTGTAGACATATTCAT 29 | ATCTTTTTGGTAAGTTTTAGCACTTTCTAAAGCATTTTTATTCCAATCAGCTTTAACATG 30 | ATCAACAGCATATTTTGCAGCATCTTCTGGGAATTTATCTCCAGATTTGGAAATTAGTTG 31 | ATCATAAATTGCGGCCTTAGACATATGCATATCGTTTGCGTATGTTTCAGCGCTTCCTAA 32 | AGCAGCGGTAAATTCTCCCGAAACTTTAGGTTTAGTAGATGATTTCTGCGATTGAGCCTT 33 | TGAAGAAGTTTTTTCCACTTTATCAGAACTTGATGAAGAGTCGTCATCAGAGCCACTCCC 34 | GAGTGCACCACCAATGACTACAACTAAAACAACAACTACAATCCAAAACCATGCTCTTTT 35 | GTAGAATGGTTTTTGCTCTACATAAACTTTTCCATCTTCGCCGGTTATCTTCTTACTCAT 36 | ATTTTCCTCCCTGTGAGTATATAAAAATCAGCTTTTAGAGTCTTCAGTATCTTTGGACAA 37 | TGAACCTATAGAGTGAAACTTAAGGTGCTAAACTAATCTTCAGCAGCTTCTTTTTCCTTT 38 | GATACCGGATAAGGATGTTTTTTGTAGCCTTGTTTACCCCAAGGACAACCAATAGAGTAG 39 | GCGATCATTAATCCAATTCCAAAGCCACCTGCATTACTGATGCCACTTGCAAAAGCAAAA 40 | AGAACGTTCAATCCAATTATTATAAAGAAAATAATCTTAACAATTCGTTCCGCTTTTTTG 41 | TTTGGTGGAGTTTTTAATGTAACAAAATACCAGATTGCTAATCCTACACCCCAAGCAGAG 42 | AACCAATACAAGAAGCCTTGATAGTCTCTTACAATTCCAAACATAGCAACGCTACTAACA 43 | GATTGAGCTTCTACAAATCCAAAGAAAGCTCCAAGTGCCAAGAAAATAGCACCTACTAAT 44 | CTTCTAACATTCAAAGTGTTTTCTTTATTCACTTTTAAACCTCCATGATTTGCCTTAAGT 45 | TTAATGTCATTGGCATTGTGGACAATAAAATATTTTGGGATAAAAAGAGGCATGCAATAT 46 | ATTTCGAGAAGAAATACTCGCATGCCTCGCTCATCAATCTGAGCAAAAACCTAAAGACCA 47 | CTTTCATGGTAGACATTCCGCTGTCTATATAATAATTATTACACACGAATATTTTTATTC 48 | TAGTATTAAACAATCATGCAATTAATTCTTTAACAACGCTATCGCGTTTAGTAAATGGAA 49 | CTGATTTAATATCTTTTTCTGAAAATAGTTCTTCAATTTCAGGTTTAATTTGTACTTCCT 50 | TATCGTTTGAAGAAATATCATTTAGAAATACATAAAATGTGGAATTTTCACGTATTTCTC 51 | TTGTTTGAAGAATGTCAGTTAGCGCCGCTTTTGCGAATAATGAGTTATTTGGATTAGATA 52 | GAGTTTTTATAAAACGTGTAGGAATATCTTTATAGCCAGAAATTAAGTAATCAAAATTAA 53 | ATGTGATACCACTTGTACCGGGAATAGAGATATCTTCTGTAGCACGAATATGATTTTCTT 54 | CCAGAAATGATCCTACATCCTCAAAAAATAGGGACTTTGTAGTATTTTTTGAAAGCATGA 55 | ACATATCATTAACTGCTAGAATAGCTTGTAATAATCTGTTCTTTGCAGTTGGAAAATATT 56 | TATATTCTACAGTAGTAGTTAATTCTCCATCTTTTTCAGTAATACCAAACGCATTAAGTC 57 | TATTAGTAAAAATCCTTTTTCTATTTTTAGATCTTGAAATAGTTACTCCATTACTCTTTA 58 | AGTTATCTAGCGTCCAGCCATCATCAGTAAGAGTAATACTATTGTTTTTATTCTTGATAG 59 | CATACATAACTATTTCATCAGAAAATGAGTCTAAGAAAGGAACTTCAATTCTAATAACGT 60 | TTTGTGAAAGATCTTTAAACTTTGCTTCTTTTTTATACCAGTTAACGTAGTCAGTTAAGA 61 | GCTTGTCGGCTAGTGTCATTTTTATCAGTCCTTATATGAACGAACTGTAAAAACTTTTCA 62 | ATGGCATTTTCAAGTTCGTCATTTGAATTAAAAATATAATCATCTAAAGGATATGCATAT 63 | GAAACAATGCTGTTATTGACATATCTATATAAATGAATGTGATTTCCTTCTACAATTGTA 64 | CCATCATAGTTGTGATGTCTAGTTCCGTTTATGCACAGTCTAACTAAATGTGCATTATTT 65 | TGTTTAAAGCGAATATGGAGAGAGTACTTAGTATCTAGACGTCCTTTATATCCGTGCCAA 66 | ACATACTTTAAATTGCTACTTTCTCCTAATACAATGTGATCCTCATTGAATTTACCAAGT 67 | GGAACGTTAAGAGTAAAACTTTCAAAAAAATGTTTAGCTTCATCTATCAGTTCATCTACT 68 | TCATTATCGCTTAGCTTTTCTAAATCCATTAATTAATTCTCTCTATCCTTTTATTATTTA 69 | CAACTTACTAGTAAATTGAATTGCTTTATCTATGATTTTCTTTGAATACTCAAGCTTGTT 70 | CCTCAAGTAAGTAGTAGCAATCTCTAGGGATGCCGAAAGCTTGAGCAAAGGTGTACATAT 71 | TATAGAAGTAAATATCGTGTAAGTCGCAATATTCAGTTAAAAGATACAAGCTGAACTCAT 72 | TTGCTTCTCGTTCTAGGCGAATACTGTTTACCGAACTAGAATGGTATTCGCTTTTTTCGT 73 | GTCCTAGAAATAGATGGCCTAACTCATGACCAATAATAAAAGGAATTTCATCTATATTTT 74 | TCCAGTTAGCATTAATTACAGCGGTGCGATACTCAGGAAAGGCCTCAGATGGATCATCGT 75 | TTTCAGCAGCGTAAGAGAAGGTGACTCCATATTGGTGATCTAAAGCGTAATTCATTAACC 76 | ACCTAATTACTTGATTTCTAGCTGCTGTTTTCTGTTCAGGTGTTTGGTACATGGGCTATT 77 | GGTCCTTTTTTGGAAGATTATGTTTTTCTGCGTACTGTTGGGCAAGTAAGCGGAAAGTAT 78 | CGTTTAAATCCTCCGAGATAACTCCTTTGTAGGGCAGACCAAGATCTTCGTAACCAAGTT 79 | CTTTTTTTGAAGAAGGCATAGATAGGTCAAGCAACGAATCAGAAGAAATGTTAAAAGCTT 80 | TAGCTAATGCTTTTAATTCATCACTAGATACCTTACGAGTGCCTCGCTCTATTTTGTTTA 81 | AGGCAGTACGTTCTATGCCTGTTTTTTCTGATAGTTCTATTTGTGACATATTACGATCTT 82 | CACGTAAATTAATGATCTTTTTTGCAAGAGTTATTCTATCATCTATCATTTTATCCACCT 83 | TGTTTATTGCGCTTTTCGCAATAAATATAACACTTTTTGTGCTTTTTTAGTACTTTTTTT 84 | CTAAAATAGCACAAAAAAATGTTGACAGTGCGAAAATCGCAATGTATTATAATAAATGTA 85 | AATTGAATTGCGAAAATCGCAAGTTAAACAGAGGTGAGAAAATGACTAAAGCAATAAATC 86 | TTCCTTTGATTAAAGCTACCAGATTAAGACTGGGGTATACAAATGAAGAAATGGCGTCTG 87 | CCTTGGGATTAAGTGGTGCAGATAAGTATTATCGTAGAGAACAAGGGGAATACAATTTTA 88 | AAGCGACTGAATTACCAGCCTTATCACACGTTTTACATATTCCGCTTGAAAAAATTTTTA 89 | CTTAAATTTGTGCGATTATAGCACAATAGAGGGAGAAGAATTATGAAAGATTTAATTCAA 90 | ATAAAAGTAGAAAACGATCAGCAACTAGTAAGTGCACGAGATTTACATAAAGCTTTAGGG 91 | TTTAAAAAGAAATTCAGTGGATGGTGGGAGCAAAACCAAGATCAGTTTGAAAAAGGTATT 92 | GATTTCAACGAGGTACCAAAAGGTTACATCGTTGAAAGTGGCAATGGAACAACGAGAGCA 93 | TATGACGATTATTGGTTAACAGTTGATACCGCTAAAGAACTTTGCATGATGTCTAGAACT 94 | GAAAAAGGTAAGACAATCAGAAAATATTTCATTCAAGTCGAAAAGAATTGGAACTCACCA 95 | GAAATGGTAATGAACAGAGCGTTGCAAATCTCGAAGGCTAGGGTCGAAAAGCTGGAAGCA 96 | GATAATAAAAGCCTTTCTTTACAACTTGAAGAAAGCAATAAGAAGGCTAGCTATTTAGAC 97 | ATCATCTTAGGAACACCAGATGCATTAGCGATTACGCAGATTGCCGCTGATTATGGTTAT 98 | GGTGCAGTAAACTTCAACAAACTTCTAAAACAAGTTGGTATTCAGCATAAAGTTAATGGC 99 | CAATGGATTCTTTACAAGGTCTATATGGGTAAAGGCTATGTAGTAAGTCAAGCTTTCACT 100 | TTTAAAGATCATTTAGGTAAAGACAGAAGCAAGACAACTACTTATTGGACTCAGAAGGGG 101 | AGAAAGCTTATCTACGATGTGCTTAAAGATAACGATACTCTGCCGTTAATTGAACGTGAC 102 | GATATTGCATAGGGGACGTAATCATGAAAATGAAAAAGGATGATCTAACTATTAAAGCCT 103 | TTATTTATACCGAAAGGAACCACAATATGTCGTGGCAAAGAGTGGAAGAGGTATTACAAA 104 | TTAAAAATATAAGTTTAAAGAGTCTATCTATCAAATCCGGTCTTAACTATGAAAACCTTA 105 | GGAATTATCGATATAAGCACTATGAACCTACTTTCAAGACAATGTGTAAGATTGCTGACG 106 | CTTTGGGAGTTAGTCTTGATGAATTGAGAGGAGATAAGGATGAAGTTAACACTAGAAGGT 107 | AAATCGGAAGAAATTCAAGATATTCTAAGTTTTATCCATCTTCAGCAAGAAACTTATCTT 108 | GCTGTAAAGGGAGTAGATAGAGCTAGTAAGGAAAGTACTTTGATTCATTAAAAAGGAGAT 109 | GATTTCATGCAAATCGCAATCCCAGAACGAGTTATTGAAAGAGCTATTCAACAGAAGTAT 110 | TTTAACGTGACGGCATCAGCAAACTTCTTAGGAGTTAGTCCATCTACATTTAGAATTTGG 111 | CTGAAAAAGTTTGATTTTAAGCCAATTAGTATTGATGGCCAAATCTTATATGACAAGGAT 112 | GTCTTACAAAAATTTATGGAGGATCACAAACTATGAGTAAATGGATTAACCACAAGATCA 113 | ACGAATTCATGGGGACTGATTTCACAGTACGTGAAACAGAAATACTTGCGCTAGGTACCA 114 | TGTGTACAGCATTAGTAGCACTTATTTTTACGATGTATACAGCCATTTTTCCTAATATTT 115 | AAGTAAGGTGATGAAAAAATATGATTAAAAACAAAAAATACAGGGATCCATTTAAAGGTA 116 | TGAGCTTTAATGGAAACCCTGTATCAAAAAAAAGAATATATTGCAGCTTATGAAAACTAC 117 | GTTAAACGTTGTTCTGGCGAAGCCAAAGATTAGTTCTAAAGGAGGTGAGAAAGATGACAT 118 | TAGAAGCAAGATTAATCAGCAATAGCAACGCATTTTTTGCAAGACAAGATAAGTCACCGC 119 | TAGTAGCTGACGAATACGAAAAGCAATTCAAAATTGCGTTAATGCAACAAAAAAAGTCGT 120 | TACTGACTGGCATCAGTAACGACTAATTCAAAGGTTTAAAAATTTAATTTATCTATAAGG 121 | AGTATACCACATGAAAGAGTTTGATCCTAGAGATCTCTGGAAACTACAAGAAGTTAACGG 122 | AATGGTTTTAAGAGATATACATGGCATTGATGTAGCTATTGGAAAAGGCTTTGAATACAA 123 | GAATATTAAAGCTTTCATTGAAGTTTATACAACTGAATATGGTGTTAAAGATTTCATGGA 124 | AAAAATGGGCTTTGAAAATAGTGAAGACTTTACTAAATATTATTTTAAAGAGTTTCCCGA 125 | TGAATGTGATTGGTACGATGCTTGTTATTGGGCATTTAACGGGATATATGCTGACGATTT 126 | AGCACTAAAAGGATACGAGGAAGAAGCCTATTTAGATGCAGAAGATGCAAAAAGAGACCG 127 | CTTAGCAGGTAAATAAAGGAGACTACTATGAGAACTACTGTCACAAAAGAATTAATTAAA 128 | TTTGATAATGAAGAATTACCGATCAATTTAGAACCTGCAAAAATTTATTTTCCAAAATAT 129 | GAAGAACTAAAGAAGCGAGTTGATAGGCTGACCGATAGTTTAAAGGACTATGTAGTTACA 130 | GAAGACTCATACGAACAAGATAAAAAAGTAAGAGCTGAATTAAATCGCATTCAGAAAATG 131 | CTAAGTAAAAAAAGAATAGAAATTTTTAATGAAGCTGTCGAGCCTGCTAATGAATTTAAA 132 | CAACATGTATCTGGCTTGGAAAAGCAAATAAAGTGTGCATCGGACCGAATAAGTGAAGGA 133 | ATTAAACATTATACCGATAAAGAAAAAGATGCAAAATTTCAAGAAACAAAGCTTCGACTA 134 | GGAAAGCTTGCTCTTAAATATAATGTTTCAATTCGACTATTAGGAGATATTCGAGAAAAA 135 | GATAATCAATTCAATCATTGGCTTAATAAGTCTTGTTCTTGGAAAAGAATTGAAACTGAA 136 | GCTGAGAATTTCTTTAAATCAGAAGCTGAGAAAGCAACTGCTAAAGCAGATGCACAGAAA 137 | GTAATTATCAATAAAGCTAACAATCCATTATTTAAATCAGCACTACCGATTTCTCCTTAT 138 | TTAGAGATGTTGGATTACAAATCTCTACCAGACGTCTTAAATCAAATGGACAATGATTTA 139 | GAGAGTGTAAAGAAACAAAAAAAGCAACAGAAAAAAGCTATGCAAGACATAAAAAAACAT 140 | GGTGACCAGTATATAGACACTAATACTGGCGAGGTAGTTGATACAGTTCATAGTATGACC 141 | ATTAAATTTTCGGGCACTATAGAACAATTTAAAGCATTACTTCAGTATGCCGAAAGTAAC 142 | AACATTAGCTATGAGAGGGTGAAATAACGATGGAATTACTTAATCAGCCGACACCACCAA 143 | TCTCTAGTCCTCAGGCCGTCGGATTAATGAATATCCAAACTATAGTAGGTATGGATGCTA 144 | AACAATCTCTAGATGCCAAATTATCCTTATTAAATGACTTTGTAGAGATGAAAAGGGTAC 145 | TTGCCCAGCCTTCTAAAAGCAAAGATGGCTATGGCTATAAATATGCTGATTTGAACGATG 146 | TCTTGTCAGTCATTCAACAAGCAATCGGAGATTTAGATTTATCCTTCATTCAGCAACCAA 147 | TTAACAAAACAGCTAAAACAGGGGTGGAAAACTATGTATTTAATAGCAAAGGAGCAATCC 148 | TTGATTTTGGAAGTTACATGCTAGATATTACGAAGCCTCAAGCACAACAATATGGCTCAG 149 | CACTAACCTATTGTCGTCGGTATTCCATTAGTAGCATTTTTGGAATTGCATCTGAAGAAG 150 | ATACTGATGCTAAAGCTTTGCCCCAATATATGAGTCCTGAAGAAATTGATCGATTAACAC 151 | TTCCTTACAAGGGAAAACAAGTTTCATTAGCTAAGTTGTTTTCTCTAGGGTTAGCTGGTG 152 | ATTCAAAAGCTAAAGCTAAGTTACTTGATCGAGAAAATAATAATGTTACCAAGTTAGCAG 153 | TTAAAAGTATGACTGATATGTGGGATTTCTCAAAAGATATAGAGGCCATGAAGATTAATG 154 | AACAGACTGAAATTAAAGCTAGCAAAGATCAAGAAGAAAAAGCGAAGCAAGCTGCTTTGA 155 | ATAAGGTTCAAAAAGGTAAGAAAGATCCGTTTGAGGATAAGAAAGTAGAATCTGATTCTG 156 | ATCCTGAAGTAGACAAATTGTTTTAGGTGATAGTAATGGTAAAAATCAAAAAGGTATACG 157 | ACAAACATTACACTGTCATAAATAACTCGGTTCTCAATGATGTATCGCTTAAGTGTGAGG 158 | ATAAGGGGCTGTTCACATATCTATGGTCTCAAGCCGATGAGTGGGATTTTTATGCAAAAG 159 | AAGTAGCAAAGCATAGTGCTGATAGCGAAGATAAGATCTATAAAATACTTCGAAAACTAG 160 | AAGAACATGGCTATCTACTAAGAGAGCGACAAAGAAATAGTAAAGGGCAGCTTAAAGCTA 161 | ATAAGTGGTATTTATCTGAAACTCCTAAACAGAAATGGATAAATATTTTTAAAAAACGTT 162 | CCACTACAAATAAAAAGCCTATTCGGCAAAATCCAAAACAGGTAAAACCAGATCTGGAAA 163 | AACCTAATATGGAAAAAACGGATCTAACAAGTACTAACTGTAACAAATACTTATCTAAAC 164 | AAGTAAAGAACTTAAATAAATCTCTCTCTAAGGGAGAGAGGGAGAGAGATAAGGGAATAG 165 | TAAAAATGTTACTAAATTATCTCGACATCTTTGCTGAGCAGTGGCACAGCAAGCCAATTA 166 | TATTTTCGAAATCAGAGGTACAGAAGCTAATAAAAGCAGTGCATGGGAAAGATACCCAAA 167 | AACTTAAAGAAGCAGCCGAAAAGACCGTTGTTTACGGTGAACAGTATCCACAAGGTTACC 168 | TGCTTACTTGCATCAAGAACTTACCGGAGGAAGTAGTAAATGAAAAATAGACTTAAAGAG 169 | TTAGATTGGAGTTCTAAGATAATGAAACAAAATAATAAAAAAGATAATCCCTGTAAATTT 170 | TGTAATATCAACGACGATAACTATGGTAAGGACTTTTCCGAAGACGAATATGCTGAATTT 171 | AAATTGTATAGAGTGAAGGATGATTATTACATCTATAATTATTCAGCTTATAGCGATCCT 172 | ACTTCTGACTGGTTTGAAACATATTCTGGAAAGATTAATTATTGTCCTAAATGCGGAAGG 173 | AAGTTAGCTGAAAATGAAAATTAAATCAATTATTGAAACAACTAAATGTATGTATGCAGT 174 | ATCAAATGATGTAGATACTATTTCAGTTTCTGCAGGGATGGAAAAGTACAGACAAAGAAT 175 | TTATAAAGGCACAACTTCCAAACATTTTTGGAGAGGACGATGAAGATGAATAACGAATTA 176 | CAGCTATTCGACTTTGAAAACAATCAAATTAGAGTTTTAAAAATCAACAATGAGCCATGG 177 | TTTGTTGGAAAAGATTTAGCAAATGTTCTTGGATATTCTAATACTCAAAAAGCGATTAGA 178 | GATCACATTGATCCAGACGATTTAAGGGGTGAACGAATCGTTACCCCCTCTGGAAAACAA 179 | ATGACAATTATTACTAATGAATCTGGAATGTACAGTTTAATTCTTTCTAGTAAATTGCCA 180 | AGCGCAAAGAAGTTTAAGCGTTGGGTAACAAGCGAAGTGCTTCCAGCAATTCGAGAAGAT 181 | GGAGCATATATTACTGACAATAAGGCTATGCAATTGATGAGCGATCCTCAGGAGCTTGGC 182 | AATTTCTTGTTAACTATTGGCAATAGAGTTAAGGCATTAGAAGCTGAGAAAAAAGAGCTT 183 | AAAGACACCAATGCAAAACAAGCAGCAAAGATTGCTAGAGATGCTGATGATGTGGTTTTT 184 | GCTAAGGCTATTAGGTATAGCCATCATGCAATTCCTGTTGGCGAATTAGCTGAAATTTTA 185 | ACTCAAAACGGTTTTGTGATTGGAAGAAATCAACTATTCCAATTACTTAGAGAAGAAAAA 186 | TATCTTTCAAGTTTTAATCATAGCTGGAACGTGCCTATGACACAGATGGTTAAAAGAGGG 187 | CTATTTAGAATTACTCATAATTTGACCAGAGATGGCAGAGGATATTCTCAAACATGGGTA 188 | ACGCCTAAAGGTCAGAAACACATTATTAACAAAGCGTTAAGAGGAAGATTTGATGATACT 189 | TATCAAAAAGTTATGGTATCAACTTTGAACGTTTAGTGAGGTTAGAAAAATGAGCGAAAT 190 | TGAAAACGTAAAAAACGGATTAACTTATCTTTTAGATATTAATGATTTAGCAACTAGTGT 191 | GACAGTAACAGAGGGTGGCGAATCGCATACGATGAATTTAAAAGATTTAAAAGTGACAAA 192 | TACCGATGTTCTTTTACAAATTGCTGATTTATTAGGTATTGATATCAACAATTAGGAGGA 193 | GACTAGAAACGAATTAGAGGTAGGCAACGAAACAATGGTCATGATGTACTTAAATATTTT 194 | GAAATATGCTAAGCATCATTGTTCTGACAACGAAGATCCTTATGAAATTACTGACCGAGT 195 | ATTTACTGATGCGTTAGTAGCAAATAATAAAAACAATTAGAAGACGAGAGGGAGTGTAAG 196 | TGTGGTTCAGATGAATTTAGGGCTAATGCCTGATAAAAAATTGACTGCTAAGAACGTAAA 197 | AAGATTTTTGGTGGTTGATTTTCAGCAATATTTAGACTTAGCGGGTATGCATCGTAATCA 198 | ACTTACCAGTCCTCACCTGTCATTAGCTCCTGGTTCTACTAATAAAAATAATATTGAAAA 199 | TAATTTTATTGAGGATACGCAAAGAGACATGGATATTGCAGATCCAGCAAGAATTGTTTG 200 | TGCGGCTGTGTATAGAACTATGGAGGATTGTACTGACACCGAAGATAAGCCATACAAAAG 201 | AATATTAATTGATACATATATTAAAAAATTGCGAATTTTTGAAGTTGCAGCTAACACTTC 202 | ATTATCTACTAGCTCTGTCGATAAAAAGAAAATTGATGCACAGGTTCAGTTTGCTAATCG 203 | TTGGTTACATTGGGCAGATTTTTATGGATTAGAAGATTATCCTGATTTTAGAGTTATGAA 204 | AAAAGTGGGATAAATTAAAATTGTTACGTTTTTTGTATGTTTTATTTATGTTCTTGGGAG 205 | GTTGTTTAGATGTTAAATTGTTATCGTCGAAAGATTAGGTGGTAAGATCTTTCGACAACA 206 | CCTCGACGATGCTATGAGTGCGGTTTTTCGAACTTAAGAAATACTCCTTTTAGAATTAAT 207 | AGAAGATCTTGATTGTGTAAGTACAATTGACTGACCGTCCTTGGTAGGCGATAGCAGTCG 208 | TCCGATTCGACTGGCGGTTATAGCCTGTTTTCGCAGGCTAGGTGTCCCTAACAAAAGTAT 209 | TTTTCATTTTTCTTAATCCTAAAAATTACTTTTTTTATTGAGTAATCTGGTTCGCCCCCA 210 | GATTGCTCATTAGCCTAGGCAGGTAGTCATTAGTGACTATTATATATAATAGTTTTTTCG 211 | CCTAGGCACAACATCTCCATGCCATGACCCTCAACGGTTTCGAGGCGAGCGTAGAGTTAA 212 | GAGCTGGTTATCTGTTCGCAGTGGACCGCAACAGATGCCGTGGGTAGAACGGAACACAAT 213 | TTGGAATGAAAGAAGGCGATCTTCTTTCACGTATCTAGCAAGGTTCGACTCCTTGCCTGC 214 | TCATTGTCCGGCGGAAAACGGACGTAAAAATATAAAAAATATACCTGGTAAATACAGTAT 215 | ATTTGATGTTTTATTCGTAATAGTAATATATATATACACGAAAACTATATTACTTGTCGT 216 | TTAGTTTACAGCGTCTGGTTTAGCTCTGTACCTTAACAGAGCATTACAGGAGTGTAGCCA 217 | AATGGTACGGCATCGTGTTTGAGCCCGAGTTATGTTGGTTCGAGTCCAGCCACTCCTATT 218 | GATTAGCTTTGTTTGATTATCTTTAATCTCTGTTCGTTTCATCAAAGCTAATCTATTGGC 219 | TCGTTGGTCAAATGGTCAAGACGCCACCCTTTCACGGTGGAGTTACAAGTTCAATTCTTG 220 | TACGAGCTATGGGAACTAATTGAATTAATTCACCGCATTAAGAAATTCCTCTATATTATG 221 | TATTTAACACTTGAGGCTATTGCTCTGTGTTCGCTGTGCGGACATGCATCCCTTATAAGG 222 | ATGTCCTTAATAAAAAGAACATAGTTTGGCAAAGTGGTCCAGTAGCAACGACAGCGGACT 223 | GTAAATCCGCCACCGAAAGGTTACGTAGGTGCGAGTCCTACCTTTGCCATAAGGACACAC 224 | AGCGTACGAACAATGTGTGTCCTTTAATTTACAGCATCTGGTTTAGCTCTGTACCTAACA 225 | GAGCATCATGGCAACTTAGCTCAGTAGGTAGAGCGTTAGAGTGAAGTTCTAAGTCGTCGG 226 | TGGTTCGATCCCATCAGTTACCATAAGCTAAGAAATTATCTTGGCTATTCCTAATATTAT 227 | TCATGAAGAAGGGTGTAACCTTCTTTTTTTATTGATAAAATTATTGTGAATAATATTAGG 228 | AGGATTCGTATGGAAAATAAAGATGCTATAAAACTAGTTAAATTTAAAGATTTAAATTTA 229 | GAGGATCCTTTTTTCGATTCATTGAGAGATGATTATGAAGGATTTAATGATTGGTTTGAA 230 | AGAAAAGCACTTTCTGATAGTGACGCTTATGTGTTATTTAAATCTGATGGTATGCTTCAA 231 | GCTTTTTTATATTTGAAGCAAGAAAGAGATGAAGATACGACTATCTCTCCAAGTCTTCCT 232 | CAGTGTAATAAATTAAAAGTTGGTACTTTCAAGGTAGAAGCACATAAAACTTCTTTAGGT 233 | CAAAGATTTATGAACATTATTGTTCAAGCTTTAATTCGGTATCATTTTGATTTAACGTAT 234 | GTGACTTTTTATCCGAAGCAAGTTCAATTAAAGACTTTGTTTAAAAAATATGGATTTGAA 235 | AAATGGGGAATGAAAAATAATGAAGAGGTTTATTATAAGGATTTAAAAGTTAAAAATAGT 236 | ACATTTAAAGATTTCCCCCGTATTAATGAAAAAAGGCTAGTCAATAAATTTTTACTGGGA 237 | ATTTATCCAAAATATCATACTGAATTATTTCCTGATTCTAAATTATGCAATGAAAAAGAT 238 | TTTGTTAGAAAAGATATTTCGTTTTCAAACTCTATTATTAAGTGCTATTTAGGTGGTATG 239 | GAGGAAATGAGTAACTTTAAGCCTAATGATCTCATTGCAATATATAGGACAAAGGATAAT 240 | TGGGCTAATAGTGCATTCTATAATTCAGTTATAACATCAGTTTGTACAGTTATAGATACA 241 | CGTAATATTAATGACTTTACCGATTTTTCGTCTTTCAAAAGATATGTAGGAAAAGAAACA 242 | ATATTTTCAGAACAAGAACTTAAACGTTTCTATGAAACCAAAAGATATCCTTATATAGTA 243 | AAAATGCTTTATAATTTTCCTTTAAATAAGCGTATTACTAATGGTGAATTAAGAACTAAA 244 | TTAAATATTGTTCTTGACTATTGGGGATGTGCTGAATTAAACAACAATCAATTTAATGCG 245 | ATTTTAAAGTATGGTGAAATAAATGAAGATTTTATTATCAATTAAGCCTAAGTATGTAAG 246 | TAGTATCATGAATGGCACAAAAAAATTTGAATTTAGGCGAAAGATCTTTAAAAGAAAAGA 247 | TGTAGATACTGTAGTTGTGTATGCAACCAAGCCTATAGGAAAGGTTGTGGGAGAATTTGA 248 | AATAAAGCAAGTAATTTCAGATACTCCTAAGAGCGTATGGAATATGACAGCACAATATGC 249 | TGGTCTTGATAAATTAGATTTTTATAATTATTTTGAAGGATTAGATGAAGCATTTGCCAT 250 | TCAAATTTCTAAAGTTACGCCTTATAAAAAACAACTAGATTTGAGTGAACTAGAAGATGG 251 | AAATTTAAAAGCTCCTCAGTCATTTATATATTTAAATGGATAAATATATTATTAGCATCT 252 | AGTATTGAAGGTGATTAATATAAACAATATTAATTGGACAATAGTAGCTAGCATAGCTGC 253 | AGCTGTATCAGCATTTGCATCTTTAATTAGCATTATTATAAGTTACCATTGGAATAGAAA 254 | AACATATAAGGCAAATGTTGAAATTGAGCCAAAATTAGAAGCATTATATACTTTAAGAAA 255 | ATTAATTCCAGATTATATTGCTGAAATAAATTATGTTACTTATTTATACTGTAAGGCTGC 256 | AGCTAATCAAAATGATGAAAGACGAGCTAAGGAGAATATACTTCCAGACGGTGTAATTTG 257 | GGGTAATATTACTTTTGAAGATCACGATAGACAAATGGCAAAAACCAAGCTCGTTCACGA 258 | ACATTTAACAGCTATTCTTAGATTAGAAGGGGCTGCATTACTGTTAAAGGATGCTCAAGA 259 | GCTTTGGAATTGTCTAAGTTTAAGGAAAGAATATTATAAAGAAGCTACAAATGAATTCGT 260 | ATCTAAAAAAGAAAAAGAATTCAATCATTTGTTAAATGAAACAAGTAATAAATTAAATAA 261 | TGATTTTATAGAATATTATAAAAGTAAAATTGAACTTTATGAAAAAGGAAAGTCAGCATA 262 | AGCTGGCTTTTTATTTTACATTAAATTGAAAAAAGGTGGTGTGGTGATATGTCATGAGCA 263 | AAATAGAAGATGCAAAAGCAGATTACTTAGCAGGAATGAAGTACAAAGATATTGCTAAGA 264 | AATATGAAGTTGCATTAAGTACTGTTAAGTCATGGAAGACTAGAAATAAGTGGCAACGTA 265 | ATAATGCAACCAAAAGGAAAAGTATGCATACAAAACAAAAAAGTACGCGTACAAAACGAG 266 | AAAAGGTTGCACCATCATTACCATCACCAGAATTACCAAATAATGATGAACTTACTGATA 267 | AGCAAAGAGCCTTTTGCCTGTACTATTTGCAACGATATAATGCGACCTGGGCTTATCAAA 268 | AAGCTTATGGCGGAAACTATGAAACTGCATTGCGAGCAGGACCAAGATTGTTGGGAAATG 269 | TTGGAGTTAAAAATTACTTAGCTGAGCTAAAGAAACAACAATCGCAAGATCTATATGCGA 270 | CCGCAAATGACATATTGCTGCGCTATTTAAAGCAGGCGACTAGTGATGTTACCGATGTTC 271 | TGTCTTTTAAAACGAAAAAGCGCTTGGTTTATTACAAAATTCATGATAAAGATGGTCCAT 272 | ACGAGGATGCTGGAGGTAAGTTCAGGTATGAGCCTAAGATTGATCCAGAGACAGGTGAAC 273 | AAGCGTATTACTATGAACATTTAATTACTCTGAAAGATAGTGAAGATATTGATACGTCAA 274 | ATATTAAGAGTATTCGAATAGATAAGGGTGAGCCTGTAGTTGAAATGGAAGATAGGCAAA 275 | AGGCGATGCAGATTTTACTTGATCGCTTGCCTGAACCAGAAGTTAGTGATGACAGTACTA 276 | CTTCCTTGTTACGTGCTCTTAAAAATGGTATGGATAAGATTTGGAGTAAAGATAATGAAG 277 | ATAACTAGATTTAACTTTGTTCCGTTTTCTCGTAAGCAACTGCAAGTCTTAAGCTGGTGG 278 | AGTAATCCTCAAATTCTAAACCAGGAAGCTATTATTTGTGATGGTTCAGTTCGTGCTGGA 279 | AAGACTGTAGTAATGGCGCTCTCATATATATTGTGGTCCATGACTAATTTTTCAGGGCAA 280 | CAATTTGGAATGGCGGGAAAGACGATCGGATCTTTTAGACGCAATGTACTTCGCCCATTA 281 | AGAAGTATGTTAGAAAGCGAAGGGTATAACGTATATGATTCTCGCTCTGAGAATATGATA 282 | ACTATCAGCAAAAATGGGCATACAAACTTTTACTTCATCTTTGGTGGTAAAGATGAAGCA 283 | TCACAGGACCTGGTTCAAGGTATTACTTTGGCTGGGTTCTTTTTTGATGAAGTTGCCCTT 284 | ATGCCGCAATCATTTGTTAATCAAGCAACAGCTCGTTGTTCAGTGACAGGCTCTAAAATG 285 | TGGTTCAACTGTAATCCGTCTGGCCCGTTTCATTGGTTTAAACTAAATTGGATTGACCAG 286 | ATGAAAGATAAGCGTGCTTTACGCATCCACTTCACGATGCATGATAATCCATCCCTTGAC 287 | AGTGTAACTATTAACAGATACGAGCGTATGTATTCAGGAGTGTTTTATCAGAGATATATA 288 | CAAGGTCTATGGGTAATGTCGGAAGGAGTTATCTATGATAATTTTGATAAAGACACAATG 289 | GTAGTTAATGAATTACCCAATCATTTTGAAAAGTATTATGTATCTTGTGACTACGGTACA 290 | TTAAACCCTACTGCGTTCCTATTATGGGGACGCAATCATGGAGTTTGGTACTTGGTTAAG 291 | GAATATTACTACTCAGGAAGAACTACATCCCGTCAAAAGACAGATGAAGAGTATTGCCAT 292 | GATCTCAAAGAGTTTCTTGGAGATATTCGAGCAGAGATGATTATCGATCCATCTGCTGCT 293 | TCATTTAGTACAACTTTAAGACAGAATGGTTTTAAAGTTCGAAAGGCTAAAAACGATGTA 294 | TTAGATGGCATTAGAGTTACTCAAACTGCAATGAATGAAGGAAAAATAAAGTTTAGCATG 295 | AACTGTCCTAATCTGTTTAAGGAATTAGCTAGTTATGTTTGGGATGATAAAGCAGCAGAA 296 | CATGGTGAAGATAAACCTGTTAAACAGCATGACCATGCTTGTGATGCAATGAGATATTTC 297 | GTATACACAATTATTTACAAGAAAGTTACTGCAAAAGTTACTGTACGACCTAGAGTACGT 298 | GGCTTATAGAAGGAAGGTGTAAATGTGGCAGTTGTAATTGATAAAGATTTACTTGACGAT 299 | GTGAATGAACCCAATATTGAAGCAATTAACTATGCGATTAGAGAATTGCAAAATCGCAAA 300 | AAAAGGTTAGACAAACTGTCTGATTATTATAATGGTAAGCAAGAAATTGAAAAGCATGAG 301 | TTTGATAATGCTACCGTTGAAGCAGCTAATGTGATGGTTAATCATGCTAAATACATTACT 302 | GATATGAACGTGGGCTTTATGACTGGTAACCCCGTGAAATATGTTGCTGAAAAGGGGAAG 303 | AATATTGATGATATTCTAGAAGTTTTTAATCAAATTGATATCCATAAACACGATATTGAA 304 | CTAGAGAAAGATCTGTCGGTATTTGGGTATGGATATGAGTTGCTTTACCTTAAAAAAACT 305 | GATCCAATATCTGTTAGAGATGAACTAGGGAACGAGAAACTTACTCCAAATACTGAATTA 306 | AAAATTGAAGTAATTGACCCACGGGCAACTGTTGTGGTTTGTGATGATACAGTTGAGCAT 307 | GATCCTTTATTTGCTGTATTTACACAAGAAAAGAAGGACTTAGAGGGTAATACTAATGGT 308 | TATAGCATTACTGTTTACATGCCTCAGCGAATTGTTGAATATCGCACAAAAACAACTATG 309 | GAAGTTTCAGCAAATGATCCAATTGTTTATGATGGAGAGAATTTATTTGGTGCAGTTCCA 310 | ATAATTGAATTCAGAAACAACGAAGAAAGACAGGGAGATTTTGAACAGCTTATCTCTTTG 311 | ATTGATGCTTACAACCTACTACAAACAGACCGAATTTCTGACAAAGAAGCGTTTGTTGAT 312 | GCATTGCTGGTTACATTCGGATTTGGCCTAGGGGATGACAAGGATGATATTCAACGATTA 313 | AAACGTGGCGCAATCGAAGCTCCTCCTCGTGAAGAGGGAGCGGATATCGAATGGTTAACC 314 | AAGAGTTTTGACGAAACTCAAGTCAACTTACTTAGTCAATCTATTGAAAATGACATCCAC 315 | AAGATTTCATATGTGCCAAATATGAATGATGAGAAGTTCATGGGCAATGTTTCTGGTGAA 316 | GCAATGAAATTTAAGTTGTTTGGTTTAGAAAACTTGTTATCAATTAAACAGCGATACTTT 317 | TTTGATGGCTTACGTCGAAGATTGAAACTAATCCAAACTATTGTAAACATCAAGGGTGCT 318 | AATGATGATGCTAGTGGATGTAAGATCTCACTTGTAGCTAATATTCCTTCTAATTTATCT 319 | GATGTAGTTAATAACGTTAAAAATGCGGATGGAATAATTCCTAGAAAATATACTTATAGT 320 | TGGCTTCCTGATGTTGATAATCCTCAAGATGTCATTGATGAAATGAATCAACAAGATGCT 321 | GAAACAATTAAAAAGAATCAAGAAGCATTACGTGGGCAAGATCCAGATCGTTTAGAATTG 322 | GAGGATAAGCAAGATGATTCGAGTGAGAATGATAAAGAAGCCGGATCAAACCATAATCAG 323 | AGCCACCGGACACGCGCAGTATAGTGTTAAAGGATCTGATATTGTTTGTGCTTCATTTTC 324 | TACGCTTATTACTCATACTGTAAATAATTGTACGAAAGTAACTGCAACTGATAAGGATGG 325 | CGTATTAATTGCTACATTTTCAGATCCTAAAAGTATTGAAAATAAGACGCTATTAAATGC 326 | ATTTGAAAATACAGTTAACCAATTAATTGACCAATACGGGCAATACATTTGTTGGTGTTG 327 | ATGGTCTATGAAAGTTAATAAGAAGAAATTTACGTATTGGCAGTTACGTGATTTGCAAGA 328 | TGAGCAGAGAAATCAAGATGAAGCAACCGAAAGGTTAAAAATAATTAATAATGCATATCA 329 | AAAAGCACAATCATATTTAAGCGACGAGGTTCAAAAAATCTATCGTCGCTATTTTTATGC 330 | AGATATTTCAAAAGATGAAGTTGCAACTATCATGTCATCGCACATTTCGCCTTCTGAATT 331 | GGTTACTCTTAAAGCTTTATCGTCTAGTATCACGGACAAGGAAAGTAGGCAAGCAGTTGA 332 | TAATTACTTAAGTAGACTTGCAGCTAAGAGTAGAATTACACGACTTGAAGAAATGCAACT 333 | TAAAGCATACATTGCTGCAAAGTCAGCTGGTGCAACTGAATTAGATCAAAATGTTAAATT 334 | GCATACTGACATAATGAAACGTGCTTGGTCAGAGGCAGAAAAACAAAGTGCTGTTTATGA 335 | CACCACTAAGGATTACACATTACACTCTCCTCACTCTGTAGAAGTTAAACAGGATAAAAT 336 | AGTCATTAAAAATCCTGATACTGGTAAAGAAGTGGCTACTGTTCCGATGGATAAGGATGT 337 | TCCAAAGAGTAAAATCACTGAAATACCTAATCGCTATGTTGAAAAATCATTAGAAACTCG 338 | CTGGAAAGGAAAGAACTTTTCAGCGCGTATCTGGGGAAATACTGATAAATTAGCTGAACG 339 | ACTGCAGGAATTGTTTACGGTTAAAGAACTGAGTAATCTCCCTGAACGTGAAATGATTAA 340 | GCGAATTGAACAAGAATTTAATGTTGGTAAGTTTTATGCCAGTCGCTTGATTAGGACTGA 341 | AGCTAATTTCTTTTATTCTAAAATTAAGCTAGATAATTGGCGTAAACGTGGAGTTAAACA 342 | ATACCAATTGCTAGCAGTCATTGATAGTCGTACTAGTAAGATATGTAGATCAATAAATGG 343 | GAATATTTATAATGTTAAAGATGCAGTATTTGGTAAGAATGTTCCACCATTGCATCCCTT 344 | TTGTCGGACTGTTCCTGTAATTTATCTGGGTAATGCTAGAAGTGCTAATAATAAACCGGT 345 | AAAAAAGTAATTCGTAGACCTAAGCAAGTCATAAAACTGCATTGAAAACTAAATATACGT 346 | ATGTGGATTTGTTACTGCATATCTTGAAGTCACCGTGTAGAAATATGGGGTGGCTTTTTT 347 | CATGCAGTTAATTTGCAAATGTTCATGGGTAGAAAGGAATAATTATGGAAAATAAATTTT 348 | ATGAAGGTTTATTAAAACTTGATTTACAACGATTTGCTGATGAAGGACAAGGTGAGGGAG 349 | GTGATGACAGTAATAATGCGCAAGATAATGTAAAAGATGATAGTTCAACAGATGAACAGC 350 | CTTTTAAAACATTTAGTTCTCAATCAGAACTGGATTCTTTTGTAGATAAAAAATTAGCTA 351 | AGGCTTTAGATACGGCTCGCTCTAATTGGGAAAAAGAACAAAACGATAAGGCACAAAAAG 352 | CCAAAGATTTAAAGGAAATGTCCCCAGAAGAACGACAAGAATATGATTTAAAGCAACGTG 353 | AAAAAGCTTTAACTGATCGTGAAGCCGAAGTAACTAAGCGTGAAAATAAGAGTAAATTAG 354 | CCACACAATTAATTACAGATGGTTTGCCGGCAGAACTAGTGGATGTTTTTGACGATGTGC 355 | TAGCTGATGAGGATAAGATGACCAATACTTATCAAAAGGTTAGTGATGTTTTTCGAAATG 356 | CTGTTCATGACGCAGTTGAAACTCGACTAGCTCAAAGTGCAAGACCTCCAAAGAGCTTAG 357 | GTGATACTCAAACTAAAAAGTCTACTGGAGAAATGTTTGCTGAAAGAGCTAATGAAACTC 358 | AAAAGGTAAAAAATGATTTTTGGAATTAGAAAGGAATAGATATGTATACACAATTTCAAA 359 | ATGGCAAGCAGCTAAACTTTTTAGCTTCTGAAAAGTTCACTGCTTTCCCAGAAACAATTA 360 | ATAAAGATAATTACAACGTCCAAACAGATGACTTAGGACGTAAATATGTACCAGCGGGAA 361 | CGGTATATCCAACTAATGATGCAAAAGCTGTCGGAATCACTGTAAATGATGTGTACGTGT 362 | CAGAAGATGGATCTAATCAAATGGTTGCTGTTATGCGTGATGGTTGGGTGTTAAGTCAAA 363 | GATTGAATCCAACTCCAACAGCAGAGGCTATTAAGGCAATGTCAGCAATTCACTTTAAAG 364 | ATTTGGATACAACGGGAGTTGCATCTGTTAGTACCCTAAAGGACTAATAGAAGGGAGAAT 365 | ATAATTAATGAAAAATCAAAAACTACAATTAGATTTACAACGATTTGCAACACCTATTCT 366 | TGATATGTTTAGTCAAAATACGGTGCTTGACTATACCCGCAATCGTCAATATCCAGAAAT 367 | GTTAGGTGATACTTTATTCCCAGCGGTCAAAGTACCAACACTTGAAGTTGATATTTTGAA 368 | AGCTGGTAGTCGTGTACCAACAATCGCAAGTGTTTCAGCTTTTGATGCTGAAGCTGAAAT 369 | TGGCACTCGTGAAGCAAGTAAGATGACTGCTGAATTGGCATACGTGAAGCGCAAAATGCA 370 | AATTACTGAAGAAATGCTAATCAAATTGCAATCACCACGTAATACTGCAGAAGAAAACTA 371 | CTTAAAACAGTATGTTTTTGATGATATTGACGCTATGGTTCAAGCAGTTAAGGCACGTGG 372 | CGAAAAAATGACTATGGAAATGTTTGCTACTGGTAAAATTACTGATAAGAAAAATGGCAT 373 | TGCTATTGATTATGGCGTGCCAAAGAAACATCAAGAGACATTATCAGGTACCAAGACGTG 374 | GGATAAGAGTGATGCTTCTATTATTGATAACTTGCAAGATTGGTCAGATTCACTAGATGT 375 | TACACCAACCCGAGCTTTAACTTCTAAGAAAGTTTTACGTATATTAATGCGTAGTACTGA 376 | AATTAAAGAAGCTATCTTTGGTAAAGATACTGGACGTGTTGTTGGCCAAGCTGATTTAGA 377 | TCAATGGATGACAGCCCAAGGGTTACCAATTATTCGTGCTTATGATGGTAAGTATCGAGA 378 | TGAAGATTCAAGGGGCAATTTAACAACTAACTCTTACTTCCCAGAGGATCGCATCGTTCT 379 | TTTCAATGATGAAGTACCTGGACAAAAGATCTATGGTCCAACTCCAGAAGAAAATCGCTT 380 | AATTTCAAGCAATGCTCAAGTATCTAATGTTGGCAATATTATGGCCAAGATTTATGAAAC 381 | TAGTGAAGATCCAATTGGAACATGGATTTTAGCGTCCGCAACCATGCTTCCATCATTTGC 382 | TAGTGCGGATGATGTTTTCCAAGCTAAGGTTCTTTAATTAAGGGAGGTGCTTAATGTGGA 383 | TCAACTAACGGAAATAGTTTCAGCTTTAAGTACTCGATTAGAAAATGTTAATAATGCCTT 384 | ACTAACTGAACTAGTTAAAGAATCGATAGCACAGGTACTAGATTACACTGGTCAAAAGAA 385 | ATTAGTTGGTAGTATGGATATTTACGTTAAAAAACTAGCAGTTATTAATTACAATCGGTT 386 | AGGCATTGAAGGCGAAACACAACGTTCCGAAGGCGGTATAACTAACTATCTTGAGACTGG 387 | CATTCCAAAAGATATTCGACAAGGATTAAATAGTTATCGAATTGCTAAGGTGAAAAAGCT 388 | ATGAGATTAAAAGACAGTGATCTTATAACTGTTTATCTTAGAGAACCAATGAATACTCAA 389 | GATGATGAAGGTTATAGTATTTGTGGCTGGGGTGATCCACAACCAATTAGAATGAATGTT 390 | CAATCAGCTGGTGGTACGGTCAATGCTCAACTTTATGGGAAAGATATCAAGTACATCAAA 391 | ACATGTAAGTATCAAGGTGATTTACTTGCAGAGGGACATGGTGAAGGTTTTGGCGTTTGT 392 | CTAAAGGTTTCCAGTTCTAGTGATCCTGATTACAAGATTACGGCTATTCAGGAGTTTTCT 393 | ACTCATAAAAACGTTACTTTAGAACGTATAAAGAGGGATGAACAAAATGATTGAAGTTGA 394 | ACTTAAAGGAGTCAATGAATTAAGAGCAAAGCTAAAGAAATTACCGGATATAATGGCAAA 395 | AGCAACAGCTAATGCTCAAGAAAATGCAATTGAGCAGGCGGAAGCCTATGCAGTTGATGA 396 | GTTGCAATCCAGTATTAAATACTCCACCGGCGAACTTACTCGAAGCTTTAAACATGAAGT 397 | GAAGGTTGATGGAGATGAAGTTATTGGTCGTTGGTGGAACTCTTCTATGGTTGCTGTATT 398 | TCGTGAATTTGGGACTGGGTTAGTTGGTGAACGGTCACATAAGCAACTTCCTAAGAATGT 399 | TGCTATTATTTATAGACAGACGCCATGGTTCTTCCCAGTTGATTCAGTTGATCTTGATCT 400 | CACAAAAATATATGGCATTCCTAAGATTAAGATCAATGGAAAATATTTTTATAGAACTAC 401 | TGGACAGCCAGCAAGGCAATTTATGACTCCTGCTGCTAATAAAATGGCTAAGGAAGCACC 402 | AGAGATTATAAAAAGATCAATTGACCAAGAACTTCATGATAAATTGGGTGGTTAAATGGA 403 | AATCTATAATGTTAAAGCACTAGTGTTTAAGACTTTAAAGTCTATGCCGGAATTAAAACT 404 | AGTTTCACCATCTTATCCAGATAAATTTACAACATTCCCAGCAGCTATTTATTCAACTAG 405 | TCAATCTTCTTATATACGTAATGCACAACAAGAAGAGACAGACACTGAATGGAAGATAAC 406 | AATTGATTTATATAATGACCATGGATCATTAACTAATATAAAAGCAAAGCTCATTGCTAG 407 | ATTTTCAGCAATGGGCTTTTCTAATAGTGTTGGTGACCAAGATTTAAATGGAGTATCACG 408 | TGTGGTAATAGTTTTCGCAGGAATTGTAGATAATACTAGCCATCGTGTATATCAGAAAGG 409 | ATAAAAATGAACAACGTAAAATTTTATAGTGATTTATTAACATTAGACCTTCAACGTTTT 410 | GCAGTTGATAGCTCAGAAGGTCTTGCAGGAACAGGAACAACACTCGAAAGATCAGTAGAT 411 | GGATCATCTTGGGATGAAATTGCTGATATTAAAACCATCCCTGAATTGGGTGGAGACACT 412 | GAAAAAATCGATGTAACTACTTTGGCAGATGATAGACGTAAGCAAATTGAAGGTATTCAA 413 | AATGCTTCTAACGTTCAATTCCAAGCAGTATATAAGGGTGCTAGTTTTGCAAAAGCTCTG 414 | GCGCAGGCTGGTGATAGAAAGCAATACCAATGGAAAGTTACATATCCAGATGGAATGACA 415 | GCTACGATGAAAGGATCATACAACATTAAATTTGGCGCAGTATCAGTTAACGGCGCATTA 416 | GGTTACACAATCACTATTACTGTATCTGATGGACCTACATTTGCTGCTGCAGGAGCAGCT 417 | CATTAAATTGGTAAGTAATAGAAAGTGATAGGCGTGGGTTCGATTCCTACGTCTATTTTT 418 | ATACAACATGAATAGGAGAAATTATGGCAACAACAATTAAGAAAGCAACTAAAACAGTAC 419 | AGCTAGGTGATCTTGAACTTAATTTAAAACTAGGTGGCCGTGAGATTTTTAAAATTGAAC 420 | GTCGTCTAGGTAAATCAATGCTTTCACTTTTTATGGATTCACAAGGCGGAAATAAATTAC 421 | CTCCTGTAAATGAAATTCTTATCGTATTACAAGGAGCTAATCAAAATCACGGTGTAACAG 422 | ATAAACGAGTTATTGAAGCCTTTGAAAAGTATTTAGACGATGGTCATACCACTATGGATT 423 | TGTTCAATGAATTAATGGAATTATTTGATGAATCTGGTTTTTTCGGCAAGAAAAAGAAGA 424 | AGGCTACAAAGACCAATATGGAATCGGACGAAGTAACTCTAGATCCAGTGGAAGCAGAAA 425 | GCGAATTGATGTAGATGCAAATCATTTCGAGACAGTATCTGATCTATTTAGACATCTGTA 426 | TCCAATCGCAGTGGAATCAGGTATAGAGGCTGACCATTTTTGGGATATGGATTTCGGTGA 427 | AATTATGACCCAAATTTCAGCAAATGAAAAACGTCGATTGGAAGATTTACGAGCTAAAGC 428 | TTATATGGATCATCGCCTAAGTGAAATGATGGCTTTTGCTTTTAACGATCCAGCAAAAAT 429 | GCCCAAAGTTGAAGAAGCTTATCCATTTGTGAAAGATAATTTGGATCAAACAAATGAGAG 430 | GTCTGAGGAAGAACCCAATTGGAAAAAAGATCAGATTATTTTTATGCAGCAAGCCCAGAA 431 | AATCAAACAATTTAATAAAGATAAAGGAGGTGGAAATTAATGGATCTAGAAGAACTTGAG 432 | TTAAGATTTAGAGCAAATTATGGGGATGTCATTCAAAAGATTGATGAATTAACTAATCTT 433 | ATTGTGCAAAAAACAGGTGATATGCAATATAAGATGCAAAATAATTTAGATAAATTTCAG 434 | CAATCAATGAATGAGAGTACTTCAAGAGCTAATGAAAATGTTAAAGAAGAGATTCATCAA 435 | CGTAGTGAAGCGGAAGAAGCTAAACAAAAATTACTTGAGCAGACTCTTAATACTCAAAAT 436 | GAGGTAACAGATAAGATTGTTCAAGGAAATAAAGAACAAGCCGAAAGCTCTAAAGAAGCT 437 | GTTAATCAGTCAGAAAAGAGCCTGGATAGTTTAACAGCTCGCTTGCAAGAAGCTTCTAAT 438 | ATGCAACAACGAATTGCACAACAAACTAGTACAGCTCATGATGTAATAAAAGATATTTCT 439 | AGACCCAAATCACAAACGCAAGTAAAAGAACCATTAAAGTCCACTTCACAGAATGACTAC 440 | TCAAGCTTTGATAACTATCAAGAAAAACGAATTCAGAGTTATATGCCAAAGCGTCCTGTT 441 | GATTTAGGTATTGATGATGAAATACAAGCTGAAGCATCCAGAGCTAAAAAAGAAGTTGAT 442 | AGCTTAGTAACTCACATCAACGAGAAGATGGAACAGGCTCGATCAATGCAACGGAGAATT 443 | GCTACTTTAACAGCTAATCGAGACAATCTGGATATGAGCAAGCAGGGTAGTAAAGTTAAA 444 | GCAATGAGATTAGATGATCAAATTGCTGACGCTCAAATTAAGATGGAACGGTATCAGACT 445 | CAAGCTAAGGCTCTTGCACAGGAAATGTCGCAGGAGCTTGATACTGTTCCTAACTCATTA 446 | AGACGCATTGAGCGTGAAATGGATCAAACGGAAGGCAAGATTGAACGAATTAGACGTTCT 447 | ATTGCTGAAATGAGGGATAATGATGCGACTCTTGGCCGATCCTCGGGCACTAATAAAGAG 448 | ATTAAGAATGCTGAAGCTGAGTACAGAAGATTAGTTGCTAGAAGTAATGAGTTAGCAAAG 449 | GCATATACATACGTCAGTGCACGTGGAGATGAGTTAAGAAATAGTTCTTCACGAATTAAT 450 | ACTGAATTAGCTGAAGAAAACAGAAATGTCTCAAATCTTGGTTCTAAGTTTAGCAGGCTA 451 | AAAAATACTATTTCTAATGTTAATTCATCTCTTAGACGATTTGGTAATAGTGGGAATTCT 452 | TCTATACGGAAAGCTGGATCAGGCGTTTCAGTTTTAAGTGAACGACTAAGGGGCGTTCGA 453 | ATGGCAATTCGAATGTTAGCCAGTCAATTAATTGTGTTTACTCTGCTCTATCAAGGAATT 454 | ATGATGCTTGCCCAAGGTATGGGTGCAGCATTAATGACTAATCGACAATTTGCAAGTAGT 455 | TTTAACGCAATTAAGGTTAACTTGTTGACTGCTTTTTATCCAATTTATAGTTATGTTTTA 456 | CCTGCTATAAATGCTTTTATGAGTTCATTGCAAAAAGCAACTGCATGGATTGCTCAATTT 457 | ACTTCTGCGTTAACAGGAATGAGCTTGTCTAGTGCTAGAAGTGGTGCTCATGGTTTGTAC 458 | GATCAAGTACAGGCAATGAATGATACTTCAAAGGCTGCAAGTAAAGCTAATGAAGCTGTT 459 | AAAAAACAACAGCAAGAACAAGCTAAAGCAGTTCAACGTGCTAATCAGCAAATTGCTCAA 460 | GCTAATCGTCAAGGAGCGGCAGCCGTTGCAGCAGAAAATGAAAGAATAAAAGCATCTAAC 461 | GAACAAGCTAAAAAAGCTTTTGAAGATACAAAGAAAGCTAATGAAGACCTTCAAGCATCA 462 | CTCATGGGCTTTGATGAGCTGAACGTTTTAGATAACAGCAAAAACAATAACGATAATGGT 463 | AGTTTTGAAGCTCAACCACTAGAAAAATTTACTCCACAGCAAAAGCAGGATACACCTATT 464 | TTTGATGATCCCGGAATAGATGATGGTGAGACAGGTGGAAATCAGGGGCTTGATTGGAAT 465 | GTTCCATTAGTTGCATCCCAAAATGCTATTGATGCTGCTAATAAAGTTAAAAAGGTTTTA 466 | GGCGAAATCTTTGATCCAATGAAAAAAGCTTGGGATGAAAAAGGCCAAGCTGTTGTTGAT 467 | GCAGCTAAGTATTCATGGACTGAAATTAGGCGCTTATTAGGGGATGTAGGTAATTCTTTC 468 | TTACATGTTTGGGACAATGGAACAGGACAAAAAGTAATAGAAAATCTACTTCAACTTTTA 469 | GCCGATATGCTGAACGTCATTGGCGACATTTCACGAGCTTTTGCAGAAGCATGGGAAGAG 470 | AGTGGTCGAGGGACCAAGTTTATTCAGACTATCTTCAATTCTCTCAATAATGTTCTTGAA 471 | ACTATCCATCATATAGCTCAAGCGTTTCGTGATGCATGGAATACTGGTGATCTAGGCAAG 472 | AGAATTTTTGCTAATTTGCTTGATTTAGCTACTAAGTTAGTTGGGTTTGTTGGCGATATT 473 | GCTAAATCCTTTGATGAGGCATGGCAACATGGTAATGCCGGAACTAGACTATGGCAAGCT 474 | TGGCTAAATGCACTGAATAATATACTTGATACTTACAAGCATATAGTTACATCAATTGAT 475 | GAAGCATGGAAACACTCCAACTTAGGCGTTTCAATTTGGAGTCATATTATTCAAATAGTA 476 | ACTGGTGTGGGAAACACTATTGGCAATTTAGCTGGTCAATTTGATAAAGCGTGGCAACAT 477 | GGAAATGTTGGAACTTCTATCTTTAAAACTTTGCTCGGTATGGTAGATGACATGCTTGGA 478 | TCACTTGGTGATATGGCAACGTATACTGCAAACTGGGCTAAGAAGCTTGATTTTACCCCA 479 | TTGCTTCAGTCAATTGATAATCTGCTAAAAGCTATTCGACCAGTAACCAAAGATGTTTGG 480 | GATGGATTATCGTGGGCATACAAGAATGTCTTGCTTCCACTCGCACGTTTTACAATTACT 481 | AAGGTAATCCCTGAGTTCTTAAATGCCTTAGCTGCTGCTCTTAAAGTCGTACATAGTGTA 482 | ATCAAAGCGGCAGAACCCGTTCTAGAATGGTTCTTCGATAGTTTTATTAAACCATTGGCT 483 | AAAATAGCTGGTTTTGCAATTGTAGAGGCTTTAAAACTTCTAACTAAAGCACTTGAGGGC 484 | TTATCTGATTGGATAGATCATCATCAAACGGCCGTTAAGATCATGACTGCAACGTTGCTT 485 | ACGCTTTTAGGTATTAAGGTTGCTAAAGCTACTATTGCTGGAATTCAAAGCTTTACTGAT 486 | ACTCTGAAGATCCTTGCAATGCTCAAATTTGATAAATTGAAAGCTGCTGCAAAGTATGCT 487 | GATGATTTATTAGGAGTAGCAATTGAATTTGCAAAGCACCCAATAACTAATATTAAAGAA 488 | CTTGCAAAATTAAGTTTTGAAAATATCAAGGGTGGCTATAACCGCATCAAAGATCTTTGG 489 | GGTGAAGTAAATAAAGGCTGGCAGGATAGTAATCTTGCCAAGACCGATTTTCTTAAATCT 490 | GCTCGCTCTTCAATACAATCTGGCGAACCAATGAAGCTGGGGCAAAAACTTGGTACTGGC 491 | TTATCTGGTGCTATGATTGCTGTAACTTCAGGAATTGATATCTACAAAGGAATCAAAGCT 492 | AATAACAAAGAAGAAAAATTTGCAGATTTTGGTTCTGGAATTGGTGGTGCAGTTGGTGGT 493 | GCCATTGGACTCTGGTTCGGTGGCCCACTAGGGGCAGCCGTAGGACAACAGATTGGCTCA 494 | TTAATTGGTAAATGGGGCGGTGTTGGTGCCTCTAAATTTGGTGATGGTTGGTCTAAGTAT 495 | GGTAAAGGTAAGAAACCTAAAGACTGGGTTGAAGCCATAGGCTTTAAGGCCCATGAAATC 496 | TTAGATAACTTTACTTCTTGGGCTAAATCCGTTGGTAGAGATATTAATAACAATATTTCT 497 | AAAGGTAAAAAAGATGTTCAAACTGCTAGCTCTAATATCCATAAATGGTCCACTAATTTT 498 | ATTTCTGGTGCAAAGAAAGATATTAAATCTTGGGCACAAAATATCGGTTCTAACATTCAT 499 | AAAGATGTAGACAAAGGTAAGAAACTAGCTAAACAAGCTGGAGATAAAGTAAAAGAATGG 500 | TCTACTAGCTTTATTAGTGATGCTAAAAAGAAAGTTCATGATTGGTCTTCTCAAATTGGT 501 | TCTAATGTAAATAATAGCGTTGAAAATGGTCAAGCCATGGCTAAGAATGCTGGAACTAAA 502 | ATTAAAAATTGGACTACCGACTTTAGAGAATCAGCCAGTGGACTAGTAAGATCATGGGCC 503 | GAAAGACTAGGAGAACACATTAATAATGGCTCTGAATCATCTCGCTCTGGAGCCACTAAT 504 | GCCGGTAATAAAATTTCTCAATGGACAAGAAGTTTCTTTGGAGGAGCCAACCAAAGTGTT 505 | TCTAGTTGGGCCAGTGGTCTAGGCGGTCATGTTAATGACGGAATAGGTGGAGCTTATAAC 506 | TCTGCTAAAAATGCTGGTGAACGTCTAGGAAGCTGGGTTTCAAGCTTTAGACATGATACT 507 | TCAAGAAAACTTGGTTCATGGGCTGGCACTCTTGGGAGCACAATTGGGAATGGAATTACT 508 | GATGGACTGCACAACATTAGTCGAGCCGTTCGAAAAGTGGTTAATGCAATTGTTAAACCG 509 | GTGCAAAATGCTACTGATCAAGTTAGAAAAGGTATTAACTGGGTACTTGGTAAACTTGGT 510 | GGTGGGTCAATTGGCTGGGGATTCTTTGATTGGAATTCTTATGCGAATGGGACGAATAGC 511 | CACCCAGGTGGACTAGCCTTGGTTAATGATCAAGCCGGAGACACTTATCGGGAAAGTTAT 512 | GAATTACCTAACGGAGAACAAGGTTTATTCCCAGCTGAACGTAATTTGTTAACTTACTTG 513 | CCAGCTGGCACAAAAGTTAAAACTGCAACTGATACGGCTAATGAATTAGCAGGTATGGTT 514 | CCTAAATATGCTGGTGGTATAGGTAACTTTAATTTCGATTTTAGTGGTATCTTTAGCGGA 515 | ATTAGTAGCGCTCTTGGAAACCTAAATTTTGGGAATATATTTGATGGGGTTGGCAATTTT 516 | GTTGATGGTGTAATGGAAGAACTTGAAAAAGTTACTGATGATATAGCTCATCCAGAACGA 517 | TTAGTTAACTACATTATTGATAAATTTGTCACCTATGATTGGAGTTTAGGCGACGCTTCA 518 | TTAAAGTTTGCTAAAGGCGCTGTTCATCAAGAAAAGAAAGGCATGATGAATTGGGCTAAG 519 | AAAGTTATCAATCAATTTGGTGGTGCAACTCATCAAACAGGGCTGGGTGCGGAAGGTTGG 520 | CGTAGTGCTGTTAAGAAAGCATTGCGCAAGAATGGTCTACCTGCAACACCAGCATATGTA 521 | AATGCATGGGTTCGTCAAATCCAAACTGAGTCTGGTGGTAATGAACATGCTGTACAAGGC 522 | GGTTATACGGATATCAACACCTTAACAGGAGACTTGGCAAAAGGATTGTTGCAAACTATC 523 | TCTGCAACTTTTAACGCTTATAAATTCCCTGGTCACGGTAATATCTTTAATGGTTACGAC 524 | AACATGCTTGCAGCAATTAACTATGCTAAGCATCGCTACGGTTCAGACATGCTTGCTGTT 525 | ATCGGTCACGGACATGGCTATGAAGACGGTGGCTTAATTGCTAAACACGGTTTCTATGAG 526 | ATTAGCGAAAACGACAAGCCAGAAATGGTTGTTCCTTTAACTAATCGTGAGTTAGGTATG 527 | CGAAGAATTAATGAAGCTATTGCATTTATGAATCAGAATTTTGGTGGTGGCTTGCAAATG 528 | CCATCTTCCTTAAGCAGACGAACTGCTATTGATAGTTCAATTTATTCTGATACTCAATCT 529 | AATGACTCTACGTCAGTTCAGCGTGGAGGATTCAAAGAGATGAGTACAGAATTAGTAAAT 530 | GCTATAGTTCAAGCAATTCAAATGCAAAACTTTAATAGTAATAACGGAAAACCTATTGAT 531 | TTACACTTATCTGTCAAGATTGGTGATGAGTCATTTGGCGAACACGCTATTAAAGGTATT 532 | AATACTATCAATCAAAAGAATGGTAGAAACATGTTAAATCTTTAGAAGGAGGAAATGAGT 533 | GATTGTATTCTTTAAAAATTTCTGGGACAGTGGTTAATCCTGCCCCGCAAACTATGCAGG 534 | TAACAATTCAAGATATTGACGCAAAAGCAACGCGTGATGCGCAAGGATTATTGCACCGAG 535 | ATCGAGTAGCTACTAAAAGAAAAATTACATTGGCATTTGGTGCACTCACGGTTCCGGAAT 536 | GTGCAAAGATTTTAGGAGCAGTTAAAAGTGAATTTTTTATAGTAGATTACTTAGATCCAC 537 | AAGATGGAAATATGCGGTCAGGAACATTCTATGTGGGAGATCGAACAACACCTGTTTATT 538 | CCTTTGTAGATTCGGTCCCTGTTTGGAAAGGCTTATCCTTTGATTTGATAGAACAGTAGG 539 | AGGTGAGAAATATTGCTAACACAAACGAAAGAAGTTAGGGATGCTTGGCGAGCATCACAA 540 | CGAACGTTAGATATTAAAGTGGCAGTTAATGGGAAAACTTATACCGCTACCGATATCAAC 541 | AGTTTGAAATATGATTCAGGAGCTTATACTGGTGACACGTTTGCCATTGGCTCAACATAC 542 | TCAAATAGTGTTCAAATTGAATTTTCACATTTAATAGAGAATCTAAAACTTGGTATGGAA 543 | GTTTTACCAAGTATCGGGATTAAGACGTCTAGTGGCTATGTATATGAGCCGTTGGGCGTT 544 | TTTATTATCTCTAGTGAAATCAAGATGGATCGCAATAACAATCTTACTTCCATTAGTGCA 545 | AGTGATAGATTTTGCGGGTTAGAAGGATCCTATAAATCTAAGCTGGCTTATCCAGCAAAA 546 | GTTTTAGATGTAATTGCTGAAATCTGTGAACAATCAGGAGTCAAGGCTAATGTGGATGAT 547 | CTGGCTAGACTTCCACATCAAGCTGACTTACCTAGACCTATCACTGGTCAAACATATAGA 548 | AAGGCTTTAGGTTGGATTGCACAACTATATGCCGGGTATGCTACGTTCGATCGTAAAGGT 549 | TTATTTACGATTAGAACTATCGCAGAGCCTAATTATGAGTTAGATCCCAGTCAGTATGAA 550 | CAAGCTGGTTTAACTAAAAATGAAGCTCCATATAGAATTAGCGGTATTCAGTGTCAAACG 551 | ACAATTACCACTAAAACTAGGGATGGTGAAGATACTGATGAAACTAAGAATTATCAAGTT 552 | GGAGATACGAATGGATCTCAAATTAAGCTTGAAAACAATATTATGACGCCTGATAGGCTG 553 | ACTAATATTTGGGAGCAGATTAAAGATGTTAATTTCTATCCATTTAGTTTGAATTGGTTT 554 | GGAAATCCTGCAATTGAAGCGGGAGATTGGTTAAAACTACAAGACAAGCAAGGCAATAAG 555 | TTTATTGTTCCCAACAATAGCTACACACTTGATTTTAATGGAGGACTTTCAGCAACTTCT 556 | AAGGCAGATCAAACTTCTTCCACAGACTCTGTAATAGCTTGGGAGGGAACTTTCTCTCAA 557 | ACTATTAGAGAACTTCAAGGTCGTAAAGCACCAGATGGAACAGTGATTTTTCCGCCTAGT 558 | GTAACTGAACCACCTACGAATGCTAAACCTAACGATGTTTGGTTCAAGCAAAATGGTAAT 559 | TCAACAGAATTGTGGGTGTTTACTGAACAAGAAGATGGGACTAGAAAATGGATTAGGAGG 560 | GATTTAACTCCTGACGAGATTAAGAAACAAGTTCAAGAAGCACAAGATGGCTTGAAAGAT 561 | GCTAAGAAAGAAATAGCAGATAATCTTGCTAAAGCGGATAAAGATATTGCTGAACTTAAT 562 | GAAAGTATTAACAATCAAAAAGGTTCACTTGATGGTCTAAGTACTACAGTCAATACCGTT 563 | GTTATTCCTAAAGTAACCGATGTAACTAACCAAGTTTCTGATGCTATTAAAAAAGTAAAT 564 | GAACAGAAAAATATTGTTACTGGACTGCAAAATCAGGCTATTCAACAAGGAAAAGATATC 565 | TCTAAGATTACTTCTGATGTTCATGGTGTAACTGTTGATCTAGCTAATCTCAATGGGGAT 566 | GTAAACCAGACAAAAGCCACAGTTCAAGGCCTTCAATCAACGTTAGGCAATGCCCAAGGT 567 | GATATTGCCCAGATCAAAGTAGATGCGAAGAAATTGGAAACTAGTCTATCTGGTAAAGTT 568 | GATAATTCTACTTATGCAACTTTTGTTAATCAAACTAATCAAGCATTAAATGCTAAGTTG 569 | ATTGCTAGTGATCTGAATGGATATGCTAAAACTACTGATTTGCAAGCTACGGCTAATGGA 570 | TTGCAGTTTAATATCAATAGCGTAACGGATAAATTGAATAATCTAAGAATTGGAGGTAGA 571 | AATTTAGTTGGTGGGACTGACAAAGAATACGTAATGGGATTTGGAATTCCTAATACCGTT 572 | TGGAAAGATAATTTTGCTTATATCTCTTTACCGTTAATTAGTGGTGATGGAGGCGAAATT 573 | TTACCCCAAGGTGCAGGATTTTGGCATGTATTAAGTCCAGGAGAAGAATATACTCAAACC 574 | ATTTGGATCGAAACAGATGCTATCATAAAAAGTTTAAATGGTACATTTTTAACATGGCTA 575 | ACTATGGACGATGGGCATGATGTGCAAAAAGCATATATTCAAAGGATAGGCAATAACAGC 576 | TATAAAATTGTAGGTTCTTATACTTGGCCACAAAATAAAAAAGGCAATAGAGTTCGCTTA 577 | TTTGATATTTTTAGTTTGCCAAATTCGATTGACTTGAAAAGTGGAACATATCTAAAGTTT 578 | GGAAAACTAAAGTTAGAAGTGGGAAATGCTTCAACTGACTGGACTCCGGCTGTTGAAGAT 579 | ACTGAACACGATCTAGAAAGTTTGTCTGCACGCATAACTATTAATAGTCAACAATTTGGT 580 | TCTTACTATACTAAAGGTGAGTCTGACAATAGAACTAATACTGCAAAAAATGAAGTTATC 581 | AATTCAATTAAGAATGATAGTAATTGGCATGGATTAACCAATATACTAACGAATAGTGGC 582 | TTTATACAAACCGCAGATGGCTTTATACAAAAAGTACAGCAAACGATGCAACCAATTATT 583 | AATGAAAATAATAGTGGTGGAGTGAATTTATTAACTAATACATATACTTTGCGTAACTTT 584 | AATAATGCCGGTAATGCTGGAGAAGCTACTGGATTTGAAATTGATTCTGGAGATAGCCAT 585 | AAAAACGAATTAAATGCTGGTGAAATAAGCACAAATGTTTTTCATGTTTATGCCAAAGAT 586 | TTAAATCATGCTCCTGTTTACTTTGGTCAAAACTTTACACTACCCAAAGGAACTTGGACT 587 | CTAAGTTTTTTAGTTCGTCATAATGGAAGTAGTGAACAAAAAATCAGACTAAGCTTTTAT 588 | ACAGACGATTGTGCTAACAGAGGATGGATTCCTCTAGGAACATCTGATGAGATAGATAAC 589 | ATTTGGAAAAGACATCAGATAACTTTTACAACAACCCAGGAAATTTTTGAACAAAATCCG 590 | AGAGTACATATGCCAACTGATGATATAGTCCCCGGTGGCTCTTTGTATTTTGCAAATTTT 591 | AAATTAGAATCCGGATCAATTGCTACTACTTGGTGTCCTGCTCCCGAAGATTTAGCTAAG 592 | CAAGTAGCAGTTACTGAATTAGGTCAAACCATAAAAGGGCTTCAATCAACTGTTTCAAGT 593 | AACTACGGAAATCTACAGAGCCAGATAAGTCAAACTGCTGGAACTATTAGGAATGAAATA 594 | ACTGATAGAACCAATAATCTACAAAGTCAAATTACTCAAAATGCTAATAATATTAGTATT 595 | AAAGTAAATGCCGTGGGGGATTTATCTAACATATGTTTAAATCCAACATTTATTGATGGA 596 | AGTACTGAAGGTTGGGAAAACGTATTTAGTTCGTCTGGAGATCCCGGTAGTCCTACAAAG 597 | TTTTACGGGGGAGTTAATACAAGAAATGCGTTTTACGGAAATATGTTCTCAGTGGCGGCA 598 | GGGGATAAATATTTTTTCTCGGTCTTTGCTTGGCAAAATCAATCTACTAATCCTTTAAAT 599 | ATAGGATTTACTTATTTGCAAAAAGATGGAACTTGGAATTGGCAATCCGCTATAAATTTT 600 | GCTCCTAATGAAGGTGCGCGAACTAAATCAGGATCTATCACTATTCCTAAGGAAGCAGTT 601 | AAAGCCAGAATATGGGTAGAAATTGATTCATTTTCTAATTTTGGAAATTGGTGGTTTACC 602 | AATCTTATAGTTAGAAAGAATGATACTATCTCGCAAATTAATATGTCAGCTGGCACAACT 603 | TTAATCCAGAATGACAAAATATATATGGATGCAAGCTCAACCGTATTTTCTGGTAAAGCC 604 | TTCATTCCTGATGCAGCAATAACTAATATATCAGCTGATAAAATTAACACTGGTACACTC 605 | GATGCCGGCAGAATTAATGTAATTAACTTAAATGCAAACAACATCACTACTGGGACAATT 606 | AAGGGACAAAATAGTAGTTTTAATTTAACTGATGGAACGTTAACGGCTTTAAATCAGTAT 607 | AATGAGGGCGTTTTTATGAGAAATGGGAAACTTGAGTTTACCTCACGTCAAAGCTGGAAT 608 | AATAACTCCACAGCTACATATGGATATATTCAATCTTTACCCAAAATGTTTGCCTTAGGA 609 | TATAACTATGGTGGATTAGATATTAATGGTACTAGTGGGTTTGTACTGCATAATGATAAA 610 | ACGAAAATTAAGACTAGTAATAGTGGACAGAGTGGTTCAGCAGTTGAAGGAGGAAGCTAT 611 | ATTTTTGGATCTGATGGTCTTACTAGCATAAATAATGAAAATTCAGTGATTATTAATGCA 612 | GGTGCTCAATCATACAGTGAGTGGACGGCTGGAATAAGCTTATTATCAGGGAATGCATCT 613 | AGAAATGTTCCTCCAAGTATCACTTTAGGATCCTCTAACAGTCTCAATGATCGCTCTACA 614 | GTTACTATATCTGGAAGTCTTGCGGTATTAGGATCGAAGAATGCCGCTGTTCGAACATCT 615 | CAAGGATTAAGAGCTATTAACGCATATGAAACAGCCGAATATTACTTTGGAGATATAGGA 616 | AGGGCAAAGACTAACAGTAATGGGGCTGTTTGCATATACATGGATCCGTTATTTCTTGAA 617 | ACAGTCAATACAGGAGTTCCATATCATATATTTCTTACTAGTTATAGTGAGGCAAGAATT 618 | TGGGTTTCAGAGATGTATCCATCATATTTTGTAATTAGATCAGATAAGCCAAATGCTGAC 619 | TTTGTTTGGGAAATTAAAGCAAAGCGTAAAGGATATGAAAATGATAGATTAAAAATTATA 620 | GATCAGGAGAAAAAAGTAAATGAATAACAATAATGCAGTATCACAAAAATTAATTAATAA 621 | ACTAGCGGTTAGTGAGTATAACAATGCAGTTTTAGAAGCAAAAGTTGATGAGCTAAATCA 622 | AGAAGTAAATCAATTAAAGGCAGAAAAGGAGAATAAAGATGTTAAAAACAAGTAAATCAA 623 | TCGCAATTTCAGGTCGTTCAATGGTAGATGATAAGCAAGTAGCAACTTTTAACGCTAATA 624 | TTTATGAAGCTAATGCTTCAGGTGGTAGTGACAACATTAATATGATTATTACTGATCGGG 625 | ATTTATATGATGCAAACAAAGTTACTGTACGGAAGGATTTACAAGACTTTCAAACAAAAG 626 | TATGGTCAGCACAAGATGAAGTGATGGCAGGAAGTGATGAAAAGGCAAGTGAAGGTTAAT 627 | GAATGAGATAGCAAAACTATGGGAAAATATAAAACAAAATCCTCTGCATTTAATTATTGG 628 | GCTAATGATTTTTTTATCAGGAAGCTTTTTGTTACTTTGTGATGATTTCTTTCTTTGGCC 629 | GCCTGAATGGACCAATGTTTTTAATAATGATCTGATTGACGCTATTGCGATTGTGATAGG 630 | CGTCTTTTATTTTGCTTACGTTTTAGCAGGTGCTAATAGTCCACTAGCTAACATTTGTTT 631 | ATTAACGGCTAGTGCTTTCCTTTTAACAATTCTCATTGTTTTAGAAATTGGGTGCGTTTT 632 | AGCATTTCAAAATTATCCATTAGTAATGGCAGCAATTTATCAGTTCGGTGCATTGCTATT 633 | AGTTCAAGTATGTGCATCAAGATCACCAGGCTTAAAAAGAAAGTAGGGATAAGAGGTGCA 634 | AGAATGGATTGACTTTGTACAAAAAATTGCATTTCTTATTTCTAGCATTGTAGGACTTTT 635 | AACAATTTTCAATACTCTTAAGCAAACAGACTATTCAAATGCTAAGGAAAAGAAGGAAGA 636 | GCTAAAGGAAGATGTAGAGCTTTATAGAAAAAGATGGCTTCAGGCAGAAGAAGCCTACGA 637 | TAAACTTTTGAAAGACAATGAAAGGCTTAAAAGACAAGTTACTAGATTAGAAAGTAGGAA 638 | AAATAATGAATAATATTTTAGTTAATGTATCCGCTGTAATTGTTGCATTGGTATTGTCAT 639 | ACTTTGTATTTTGGTATCGAAATCATAAAATTGAAATTGATAAAAAAAGAGCTAAGGGAG 640 | ATGCTTTAGCTTTTGTGGTTGATACTTTGGGTCAAGTTGCTACCAATGTAGTTTACGATT 641 | TAAAGGATAATTCAGAAAAAGGTACTGAAAAGAAAGAACAAGCTAAAGTTAAAATTAAGA 642 | ATTTTTATGCTGACGCACATTTACCAGCACCTAGTGATGCCCAAGTTTCTGGTGCAATTG 643 | AAAAGGCTGTAGCTATAATGAAGATGTCAAGCGAAGGTGAAAAGAATGACTAAAGTAGAA 644 | AAAAGAAGTTATGGAGTAGATATATCCAGCTTTCAGGGGACTGATTTATTAGCTATGGGC 645 | AGATCAGGTGCTAAGTTTGCTATTGTAAAGACTTCTGAAGGACTGGACTATAGAAATCCA 646 | AAAGCCCAAGCTCAAATTAATAGCGCTAGAAACAATGGAATGATGGTTAGTGGATATCAC 647 | TATGCAAGATTTAGCAATAATGCTAATGTAGCAGCTCAAGAAGGAAACTATGCAGTTAGT 648 | TCAGCTAAACAAGTTGGCATTGAACCAGGTAGTTACCTAGCTTGTGATTATGAGCAAGGC 649 | AGTGGAAATGAAACTGGAGGTAATCGTGAAGAAAACACGACTGCGATTCTAGCATTTCTA 650 | AATACAATCGTTAGTGCAGGATATAAGCCTTTATTATATTCAGGTGCAGAACTTATGAAA 651 | AATAAGGTTAATACCGCTAAAATTTTGGCAAAATATCCTAATTGCTTATGGGTTGCCGCA 652 | TATCCTTATAAAGGAGCAACAAATACTGCTGATTTCAATTATTTCCCTTCTATGGAAGGC 653 | GTGATCATTTGGCAGTTTACAGATAATTGGCGTGGTATGAATGTTGATGGAAACATCAAC 654 | TTATTAGAATTAGAAACTAAAAAATCTGTGTCACAACCAGCTAACGCACCTGGAGAATCT 655 | ACTAGTTGGATTGATAATTTTGGAGATAAATGGGTTAAAGAGGATGGCACTTTTATCACT 656 | GATCGACCAATTAATTTGAGAAAAGGAGCCAAGCTCACTAGTGACATTATCACTACCTTA 657 | CCAGCTGGTTCAGTAATTAAATATGATGCTTATAGTAGACATGGTGGTTATGTCTGGATT 658 | AGACAGCCCCGTGAAAATGGCCAATACGGTTACTTAGTTTGTCGTCAAGGAAATACCCCG 659 | TGGGGAACATTTAAATAAAGAAAAAAGCCACTCTGGAGAAATTAATTCTCTGGAGTGGCT 660 | TTTTTGCATACAACAAATTTTCTATTATTTTAACTATGTTTTCTTATTACAATTTTACTT 661 | ACAAAATGTTATACTTAACTTGCAACAAGTTAGAAATTCTAAAATTCAATGCACTCAAAA 662 | AGCATTACCTGATCGTCACAAAAGGTAATGCTTTTTTATCGCTTAGTGCAACCTTCTAAG 663 | CGTTGAGGGTGGAGTAAGGCGACTATTAATGCTTTTTGTTGTGATGCCGATCATCCAACC 664 | AGTGATCGAATAGTGATTTCACTATTTCCACAAGAATGGGAGCTACAACTAAAGTTAGAA 665 | ATTCCTTCAATGAACTCACCTCCTTTCAAGAGGAGGCAATAGTCGCTGAAATAATTATAG 666 | CAAAATCAAAGTCGGAAAAGTGATTGAAAAAAGCAAGTAAATTAGCTGAACTTTTTAAAG 667 | ATTTTGGTTATGAAGCTTACTGGAAAGAAAAACACGAACTTGACTTTAGAGACGAGGATC 668 | TATTAGCAGTAATCGATTTAAAAGACGTTAGTCAACTAAAAGAGTTAGCTCTTGAAGGTA 669 | AAATTTACGTAGACTTTGAAAATCAAATAATTGTACCCACTAGCTATGAAGTAGATGACT 670 | AA 671 | -------------------------------------------------------------------------------- /expected_output_of_test/NC_005362.1.phage_2.fas: -------------------------------------------------------------------------------- 1 | >phage_2:1292553-1330556:NC_005362.1 2 | TTACTTAAAAGTTCCCCAAGCATCGTTGCCTACACGACCAACTAAATAACCATAGTGATT 3 | ACTTCCACGCGGTTGTTGTAGCCAAACTCTGCCTGCACTATCTCTAGCCCAAGCGTTGTA 4 | TTTAACTACTGACCCTGCTGGCAATGTGGTAATTAACATGCTTTCCGTGCTAGCACCCCA 5 | GCGTAAGTTAATTGCGCCACCAGTAATGAATGTGCCGTGTTCTTCATGCCAAGTCATACC 6 | TTGTACATCAGTCCAAGTCGTACCAGTTGGGTTAGCAGGAGTTGGGTTTGATGGTTGTGG 7 | TTTAAGAGATGGTGCATCAGACTTAAGATCAATTAAAGTGATATTAGCATCAACATCTAA 8 | GTCGTACCAATTGCTACTAAATTGCCAAATTGCAATATTATCCATTGATGGAAAATAACC 9 | AAAATCTGCTGTATCTTGACGATTCATAGTCTTATATGATGCAACCCATAAACAAGTCCC 10 | ATACTTAGCAGTGATCTTCTTAACATTTATGTTATTAGTTAATAAGGATTTTCCAGAATA 11 | TAACAGTGGCTTATATCCTGCTGCAACAATCACATCAAGAAAAGCTAATACAGCATTAGC 12 | ACTAGCTTCATAGCCTTGATTAGTTACATTACCACTACCAGTTTCCCAGTCGCAAGTTAG 13 | GTATGAACCCTTTGGCAATCCAACTGCTACAGCAGAGTTAACGGCATAATGACCTTCTTG 14 | TACTGCTACGCCACCATTGGCGCTAAAACGTGCATAGTGATATCCACTAACCATCATTCC 15 | ATTATCTTTAGCACTATTAATTTGAGCCTCTGCTTTAGGATTGCGATAATCTAGTCCTTC 16 | AGATACTTTCACAAACGCATATTTAGCACCAAGATTAGCATATTTAGCTAAATCTGTTCC 17 | TTGAAAACTTGAAACATCAACGCCATAACTTCTTTTTGCTACTTGCATCTACTTACCTCC 18 | TACTTCTTTACTTCTCTTAATTCTCCAACCACAGTAGGTTTAGGCTTATCTACTGTATCT 19 | GCAATTGATTGTGCTTGCTTCATAGCAGTGACCGACTTTTCAACCATACCTTTGACAAAG 20 | CTCATAGTTGGGTGTGGCAAGTGTGCCATATCTAAGAGCAGAAACAAGCCTTGAACGACA 21 | AAAGTTAGCTTATCTTCTCCGTCCCCTCCCCTCTTTTCTGCTTGGTAGACAAGGGGACTT 22 | ACAGACTGGGCAACAATCTTTTCAGCCTTAGCCAAAAGATCACCTTGTGCTGCTTTCTTG 23 | TCGATTGCAATTTTGTGTTTAGCATACACAGAAGCAACAACGACTGCTGCAACTGATACG 24 | ACTACGATGGATAGATCTAATAATTGGTTGACACTCATTATTTTGCCTCCTTTAAATCTT 25 | TAATACGTAACTTCAAAGCCTTGGCATACTTACCCATTGCTTGCTTTTGCTCGATTAGTA 26 | AAGCACGTTGGTTAACAGATAAAGTTTTCTTGTTTTGTTTTGACAAAAATTTTGACAACT 27 | TACCGCGCTTTGCATTTAGCTTATTTAACTCTTTTTCTAGTTTTTTAATCACGTTTTTTA 28 | CTCTTTCCATCAAAAAAGGCACCATTAGTTGGTGTCTTCTTTACATATTCATCTACTAAA 29 | ATCTGTTGTTGATCAACCTTACGTTGCAATTTCAGATTTTTTTCTTTTAGTCTATCTATC 30 | TCTTTCTCTTGTTCTTGGCATTTTTCTTGGTAGTGGTCACGCTCTTCTTTGAGAGCATCG 31 | AATACCCACTTCCAAACCGCAATCAAGAACGTACCTATACCACCAAGATAAGGCAGTGCC 32 | CTTAGTAAATCGCCTATAAAGCACCACCCCTATCTAATAATTATGCTTCTTACTTTTGCT 33 | AATGATAGAAAAATCAAAAAGTAAAATAATAACTTCTAGAAAACCGGCTGTAAACATATG 34 | TGGATGTCCGGATATATAACCGTGGCAGAATTCAGCAATTGCTTCAAATGCTAACAAGCC 35 | TGCAGTAATAATTAATAAATTGCGATTAGTAGAAATACTATTTTTATTGCTGATTGCCCA 36 | ACTAAAAAGCCAAACACCAATAGCAATGAAGATACCACCTACTAAGTCGTCATTTAAGAC 37 | CTCAACCGCAAAAGGTGGCCAGAAAAAGTAGAAGTCATTGCAGATAAGAATTAGCCCTAT 38 | ACCCATCATTGCAAAAGCCAGTATTTCATGTTGTGGGTGTTCAGAATGTAGGAGCTTTTT 39 | TAGCTTAAGTTTCATCTTACACCTTCTCATTTATAAATTTGCCAATTCTGTCTGCTAAAA 40 | GAATGTGTCCAGCTCTAGTTGGATGTAGAGAATCTGTCATGTACTTATCTTTGAAATCTT 41 | TGTTGAATGGGTAAAACATTGAGGAGTATAGATCTAGAACAGGAACAGCATACTTAGATG 42 | CACTTTTCTTAATTGCATCTACATAATCTTTGAGATACAATCCTTCCACATTTTTTCCGC 43 | TTGTATCATCTACACCATTGGCTGGTCTAGTAGTCCAATCTGTTTGTAATGGTGTGACGA 44 | GTATAATAGTTGCTCCAACATATTGAGATCTCAAATTAGTCAAAAGTGTATTTAACGCTC 45 | CGTAAAAAGTAGTAATATCTACACTCTTGTCGTCCCCTAATTGTCTCCCGTATACCCAGT 46 | CATTAACTCCTGCAAATATCACAATCACATCTGCATCTTGATCCATTCGAGAGTAAGAAC 47 | TTATTAGCGGCTCAATATTATCCCATTCAAATCTATCAATAGATTTTTGAGATATTGATG 48 | CACCTGCAACACCATAGTTCCTTATAAGATCTGCTCCAGTAACAGTCTTTAAATCACTTA 49 | CATATGATTTTGACGTATTAATTCCTTGACTAATACTATCCCCAAGAACATTAATTTTAT 50 | GTCCTTTTAATGGTTCAAAACTATTATATTTAACTCCATCTATAAAAATAGTTGGATTAC 51 | CATTTAGAACAATATGATTAGGATACCAATAAGCACCAATATTTCCCCAATTAAAACTGA 52 | TATTTTCATCTAAGCTTGATACTATTTTTTTTACATCAAAATCATAGTATAGCCATGCTC 53 | CTGATTTTATTTTTTCAATATTAATAGGATTAGAATTAATTTCTTCGGTAGTGTGACCAT 54 | AAGAAATGTATAAGTTATTAAAGCTCATAGTATTAGCTGCAGTATCTATTTGTACTGAAC 55 | CTAAGCCACTAATCATTGGAATTGTAGTGCTTGGTGTTATGTTCTCATTAGTAATAACAC 56 | CTAAAGTTTTAGGATTATTATTTATTTTTAGTTGGAGATTGGTTTTAATGTTCCACGAAG 57 | ATGGCTCGAATGAATCATACATCATACCTACACTTATAAGATTACTATCTTCATTTGGTG 58 | CACTAAATGTCTGTAATTTCCAAGTTGCATCATTGAAATAGATCCAAGTTCCTCTACTAT 59 | GACCTAGACCGACTAGAGCAACTTTTTTGTTAGAAATAGGCATCATTCTACTGCGATATA 60 | CTACTTGGCTTTCCTGTGGTAGGTTCATATACCACTGTTCATCATCGAAAGTAATATTTA 61 | TTGTCCCTTGACCAACAATCATTCCATCTCCAACTTGCAGAGATACATTTTTTGATGGAA 62 | TCCAAGTTTTAGGAATTACATTATTTTGAGGGTAAGCATCAGCTATTTTGGATCCTCGAA 63 | CGATACAAAAGTCCCGTTTTTTAAAAGTATCTGTATCTCTTAAAGATACAGGAATTTTAA 64 | TCGCAGTAATATCTTGCGGAATTGTTACTGTGATTAAAGATTCATTGGCATCCCCACCAG 65 | ATACATAAGTATCGTTTTTCCAAAAAGTGTAACTGTAAGTTAATTGATTCCAAGTAATCT 66 | GTTCACTCGGACTAACCCCAATGGATATTTCTCCCCAGTCGGAGTTTGGAATAAAAGTCC 67 | CATCAGCTTTATGAAATCCGCCTAGTCTGATATTTGATGATAATTCCGAGTATAAATTAG 68 | GCGCTTTCTCATCTTGGTATACGCCTTTTAAGTTTTCAATATCTACCGTCTCTGGAGCAA 69 | TGCCTGCACTTTGATAAATTCCTCCATCTTGCCATTGGCTGTCAGTCCAGATAGCTCTAT 70 | GACCATTAGATGTTACAACTAGTTGTTCTGTCCCATTGGGAAATTTTTCTTTTACTGAAT 71 | CTAAATTAGCAAGCATGATGGGTGTTGCAGAAATTTTTGACAATTGATTTTGAACCATAT 72 | TTTTCATAGATTCATTATTTTGGTTGAATTCACCTCGTTTGATGATGTCGTTTTTGTCAA 73 | TTTGATCTTGATAATTAATCAATCGATTTCCAATAATTCTTTCTTCAGCTCTAACTTCTT 74 | CAACCTCGGCATTCTTGGCAATTAAGGCATCACGAGTAGCCTGTGTCTCTTTAGTATAAG 75 | CATCTCTAGCTTCTTTGATTACTTGATCTGTTTTGTCTTCAATTTGACGGATTAGTTTTT 76 | CAACTCTATCACTATAGTACTTCTGAGTTAATCCAAACATGACATCATTGCCAAGTACTC 77 | TAAACCAAATGTCAATTGAAGTTAATCTGCGACCGTTCCCATCCATCAAGCCAATTTTTC 78 | CATAGAAAATGCCCTTCTGAGTAAACATTTGATCAGTCAGAATGTAATCAGCCCAGCCAC 79 | CATCTCGTGTATTAGCAGCACTACCTTGCCAATCACGATAATCGGCATCAGGAGCCATTA 80 | CGATATTGTCATCGTCGTCAGTATGCCACTGACCAACTGCACCCTCTATAAAAGGGATCA 81 | TGCCATTTACGTTCATCACACGTCCTTGGTCATACCACTGTACTGGCAAAATTTTGCCAT 82 | TATCAGCCACACGCATCTTAATGTAGTCATTAATTGATGCACTTGATTGACCAGCCTTAG 83 | AAATATCGGCTTTTAAGAAATATGGTTCTCCATGATTAACGCTTGTCATTGTCATAATCT 84 | ATACTTATCCCCTTTGCTTTCTTTGCATTATTGATATCGTTTTGACTATCAATTGATATT 85 | TCAGTAGCCCCATCGCCAAAGACGGCAAGATTGAGCTTGTCAATCTGTTTATTAATCGTT 86 | CCAATATCTCGTTTAACTTCTGCCACTTCATTAGGCAAAACTTCTAATGCTGTTACTCGC 87 | TTATCCAGATTACTTAAATCCTGCTCAAGCTTATCCACCTTTTGAGCTTGTGTCATGCAG 88 | TACTCAAGTCCCTGCGCCATAGCTTCACGGACATCAACGCCAAACTTTTTGTGGCGTATA 89 | GCGTCCGCAATTTGTTGTGGCGTTAACTGTTTGCCATCTATGTGGCTGTTGTCAAAATAA 90 | TTAGCCATCACTTTCTCCCTTCTGTTCTAATTTATCCAGCCTTTCTCCAAATGATTTCAG 91 | TTGTTCAACTGTCGCATACTTAATTGGTAGTTCTTCATCTAAGAACTTCTTAACCTTGGC 92 | ATAGTCTGTTAAGTGCTTATCTAGCTCATCTTTGATTTTTTGAGCATTAAAATCGCTCTG 93 | TTCTGAAACATGTTGAACTTTTTGATTAGTCTGATTATTGCTACTTTCAGCACTCATTGC 94 | ACCAGATGAAAGGTTAGCAATGGTCAAAGATTGCTGGGCTAAAGTTTTTTGAATAGATTG 95 | AAAACGAGCATTAATTTTCTTACCGTCCTCAATTTGATAGTCAGTTAAACTCTTAGCAAC 96 | ATCAGCGATCTTTAAGCTACTATTTCTTTCGTGAGCGAAATCAATATCTTTTTCGACTAC 97 | TTGAAGTAATTGGGCTGTTTTGGATACCTGAGGGTTGATGAACTGATATGAATCTCCAAC 98 | TCTAAACCGATCAAATTCAGGCAATTCTAATGCTGAAACTTCAAATGAGTTCTTAGTGAC 99 | GTGCTGTTTTTCATTAGCAAGCCATTTTTCGCCCATTTGTTTGATTTGAGCTGGGTCTTG 100 | CAAGTCATCAAAAATCTTCGTTCCTTCGATCACGCCAAACTGCCGTATTAAGTCAGCATC 101 | TTCAATGTAATCTTTGCCACCATTAACACTAGTGATGTTAAATCTAGGCTGTGGGCTTTC 102 | ATCTTCGTAAGTAGTCGAGGTACTATCTTTATCGCCCTTCTGACCGCCCTCTTTAATTAA 103 | CTTCAAAGGATCTAGCCAAGCCCAGTTAGGGTCAAAAGAATGACCTTTTGTGTAACCCTC 104 | ATACCAAGATTTTTTCGTAACCCCGATATGGACGTGACTAGTATCACGTATTCCAATTAC 105 | ATCGCCAGTCTTAATTGTTTGTCCTTGACTAACTTTGATGTTTCCACGACTTGAGAAAGC 106 | TTCTTGATAGACAATATCATAGCCGTCTCGTGAGTGAGTAACTACATACCAACCTATGTA 107 | GCCGTCAGAACCAATACGAGTTACTGTACCACCATGAATTGCATGTACTTCACTTCCAGG 108 | GTGGTCAATACTACCGAAATCAAGCCCATCATGAAAATTATTGGTTCTTCCATTACCTGC 109 | ATGAACGCCAAAAAGTTGTGCCTCCATGAAGTGACCTTCGCCAACGCTAGGAAACGGCCA 110 | ACCCCATGTATTATGTGATGTTTTATGAACTGTAGTAGTACTTACTTCTTTAAGGCGTGG 111 | ATTACCGTTAGGTGACCACGAATGATAGATCTGCCACTTATCAACTTCTGATTGCCAGTC 112 | GTCCATATTGAATAAGCATAAGAGCTGATCGAACCCAGCTTTCCAAGTAGTAAACGGTTT 113 | AACACAATAATGCTTAAAAGTTTCTTCAATGAATTGCAATAGCCCCGCACTTGGGTGACC 114 | AGCAGCTGCATTACTGTCTATTGTGTTAACAACAGTTTCAGATCCACCACTTTCGCCTTG 115 | TATCATGCGCTTAATCTTTTCAACATAATCATCAGTTATGTTTACGCCCATAAGCTTAGC 116 | AGCATATCTAATAGCAGGTCCCCAATCGCCATTAACAGCATGGGTTGTTCCTGAAATAGG 117 | AACACCATGTTCATCGAGTGTATCATCGTTTCCTAAATTAATCTTAGGCGATGGTTGAAC 118 | CTTACCAAGCGGGATTAATCGAGTGATTATCCCAGTAGGGTCAATGGTCTGCTTAGCCGA 119 | TAACATATTCTTACCTACAGAAATAGGAGTATCAGCCTTATGCGATACTCCTATTGTTTT 120 | TGTGTAGTCAATATAATTTGTCTGTTTATCGTGGTCATATCTAACTCGAATATAACCACC 121 | GCTTTTATTAATTAGCTGGGCTGTAATTGCTTCTTTAGTAGTCGGATAATCAATCTGCCT 122 | TTGAACAATACCTTCGCTGTCTGAATAATCACAGTTACCAAGTGTAAAGTGCTTATAAGT 123 | ATCATTCAACTGCTCATTATGCACCTTAATTAGTTTCTGTAAGTACTGCTTAGCACTTGA 124 | CCCAGAATCATTATCAAAGCGTTGAATGCTATCTAAAAGATAAGCATCAATAGCTTCAAA 125 | AACGTATTCACGAGTAAATCCACCATTTGAAGCCATCTCTTTGATTGGTTTAATCGCTCG 126 | TCCACGAAAGATAAGCGCATCATCTTCATAGACTTCAACGTGAGTAATTAACGGCTCTAC 127 | ATTGTCATAGAGAGGATTGTTCTGATTAACAGTGATATCAAGATCATCAATATCTGTTAA 128 | TTTAAGATTTAAACTTCCAGATACTAAGTTCCTTGTTGGAAGGCTCTGGTCATACACTAT 129 | ATAGCCATTCTTATCTGTCGGCTTGTCATATGCTATAATCCGATATCCCACTACATCATC 130 | TCCTCTCGTTTGAATTGGAAGTGGATTGTGCCATTGCCTGAAACAGTAAGATTAGTCACT 131 | CCAACTGGTAGCATTACACTAACCTTAGTTGCTGCAACTTGATCTAAATCAAGATTGATA 132 | TTTGAGCCTGTAACTTTCACCTTTCCTGTTACTGCAAAGCCTAGTTCTATCGGCTTAGAG 133 | CCGTAATTATCAAGGTCAGCCTTAACAGAATTGCCATTTACTTCAAAAGAAGTTTGCTGG 134 | CATTCCCAATTGTCAAAATTGATCTCATCCCAGATATCACTACCTTCATAGCTCCTAGCA 135 | ATTGCATAAGGATATGCTTTGAACTTAACCGTGGCTGTTAGAGTACCGTTACTTTCATCA 136 | TCAGAACACTCAATACTTTCAGATTTAGCAGAAAAACAAAATCCAGGATTAAAAGTATCA 137 | TCAAGCTCTCTATATCCTGCTGCTTGCATGATTAGGCGCTTGATTTCCTTTTCTTTCGCC 138 | TTCCTATATGAATAATCATCGTCAACTAAAAGAAGCTCGTAAGTGATCTCACGGGCTTCA 139 | AAAAATCTCTGATTATCTAAGTTAGAAAAGTCTTCCTCGCCTTGCGAATAAGGAACTGAA 140 | TAAGTGATCTCTTTTTCTTTAGGTGTTGGTGCATCCCTCTTAGTAAGCCACCAGCCTTTA 141 | TCAGCTGAATTGAAGTTAGCAAACGTAAAGCCTTCATCAGGAGCACGAACCATAACCGGC 142 | GAGGTTTCGATTGTGTCATGAAAAAGGCCATCATTCTCATATTGCCTATTAAACTGTGAA 143 | AAAATTCTATCTTCACGCATTATTTTGAAAACCTTTCATTTAATGAAATATCACTACCGA 144 | ATTTTTGATTATATACATCAGCAGTTGATCCAACTAATGCACCAGTATCCATTACTATTT 145 | GTTGCTTTCTATCAAGCTTGCTGCTAATATTTGCAATTAAACCAAGCAACTGATTATTAG 146 | GATTAATGCTAAGTTGACTACGAGTATCAAGAATAAATCCAGCTTCGCCAGCTAATGTAC 147 | TACGAACTCGTCCAATTAAATTCTGGGCACTCTTTTGCGCATCTTCTGTATATTCGTCAA 148 | TACCAGAAGCAATACCTTTAGCAAGCCATTGACCAACTTCTAATTTCATTACCCTTGAAG 149 | GTGAATGAATATCAGCTTTTCTTCTAGCAGCTGCAACTGCATTTTGAACTGCACTTTCTG 150 | CTGCTTGTCTAATAGCTCCTGATCCACTAGAGATACCATCAGCAATTCCTTCTGCCAAAT 151 | ATTGACCAGTTGAATACATATCGCTGTGATAAGATCTAGCTCCACTATTGGCTGAACTGC 152 | CTAAATCTTTTCCGGTTGATCTCATTGCACTGGATTTAGATTTTAGACCGCTCTGCATAG 153 | AACTTCCTAATGTTGTTCCAGTACTTTTAGCTGAAGACTTTTTACTGTTTAAGCCACTGA 154 | TTGCACTTGATCCTAATGAAGCAGCAGCAGAATGAACTGAATCCTTGCCACTTTTAATTC 155 | CAGAAGCATAATTACTTGCTACTGATTTACCAGCAGACTTAACTGAACTAGTTGATGACT 156 | TAGCACCATTAGCTGCCGCTTTAGGTAATTCCTTCGAAGCAGAAGAAACAGAACCCTTAC 157 | CAGATTTAACCCCTGAAGAGTATGCAGTGGCTGTCTTTTTACCTTCTGCTTTTGCTTTGC 158 | CTGTATTAGCTTTTAATCCTTTATCGGCAGCATCAGAAACTTTTTTAGCGGATTTTTGAA 159 | CATTGGGAGCTTGAGAGCTTAATCCATTAGAGATTTCTTTAGCTCGCTTTGTTCCTTCAT 160 | CGTTGGAACCTTTTCCCTTTCCGCCACCTTTACGTTTAGATAAGCCTTTATCGACTTTAT 161 | CACTAATATTTTCTCCGGCAGCTTCATAATTTCCTCGCTTAATAGCATCTAAGAAACTCT 162 | GCTTTCCGTCACTACCACCTTTAAACATCTTGGCTGGTAATTTACCCAAATCTTTCATAA 163 | CTTGGTTATTCATTAAGTTACCAGCTTTTTCTAAATCTCCGGAAGCTACAGCATTTAAGA 164 | AGTTTTGTGTTCCTTGTTTTCCAGCTGTTCCAAAGTACTGAGCCATTACCTGCATACGTT 165 | CCCACATCGTAGCTGAATGTTGATTAATTTCACTCAACATTTCTTCTAAGCCTTGAGTGG 166 | TACTAATTTTCATCTTATTTAATGCTTGTGTGAAAGTCTTATGAGTTAGCTGACCGGAAC 167 | GAGCCATTGACTCAACAATTTGAGTATTGTTATTGTCAATCTCTTGCAGTTTTTTCTGAT 168 | CGCTTTGTTTTAAAGCGTCTAATTGCATTTGATAGCCATTCTTTTCAGTTTCGGTTTTGG 169 | CACTAGCTAACTTTTGTTCTAAGTCCTTTTCACGTTGTGCATTAGCTTCATGTTCTGAAT 170 | ACAAAGCATTATTTTGTTGCTTTATTGCAGCGATCTGTCTGGTTGTATCTTCTTTAGTCC 171 | ACTTTTCATGATTATCAATCTTAAGTCTAAAAGCTCTTAATTCCATATTGGATTGATCAA 172 | TTAAAGCTTGTGCAACTGCCTCATTTTGATCTTTAAGGGCTTGCTTTTGCTCATCAGTTA 173 | ATTTCTGACCATTAGAAGCAACTCCAGATTGCAGCATTTTATTATTGTCATTCATAATTT 174 | GCTGCATTTGATCAGCGCCCTTAGATTGGTCGCTTAGCATCTCATTAATAGCAGCAGCTC 175 | GTAACTTCCGCTGATTTTCTCCGCTTTCTTTTGTTGCTTGCTCCATTAAGGCACTGGATT 176 | TCTTTTGAATCTCTTGCTGTTTAGAACTCCATGATTTTAATAAAGTTTCGCTGGTTGAAA 177 | CATATCCTTTCATCCCATCATCAGAACCAAAGGCTGACTTCATCGATTCGACGACTTGGT 178 | CAAAACTTTCTTTATTAGCCTTATTATCATTTTGCATAGATTTAGAGGCTTCAGACCACG 179 | CCTTTTGAAAATCTTTTGCTTTTTCTGCATCATCATCTGTCGATTTACCAACTTGCTTTA 180 | AAGCATTAACAGTTTTAGCAGTGGCACTATTATTAGCTAAGTTCTGCCATTGCTTACCAA 181 | TATCAGAAATTCCATCTTTAATAGCACCTACTGATCCTTTAGCACTTTCAGCGGCGCCCT 182 | TAAAGTCGCCTTTAAAGAACTTTCCGATAGCCTCTCCACCTTTTGCAATAGCGGTAATTA 183 | AAGCTTTAATTGCAATAACAATTGTGGAGATACCTGTGATTACAGTTCTAATAGTATCAA 184 | TCGCACCAGCGACTACCAACATAATCGCTGATAAAGCAACCAATAAAGTACCACCAATAA 185 | CTATTGCAATTCCTTGCAAAACTGGTTTCAGGGCTTCTAATACTACTTTTAAGGCATTGA 186 | TTATCGGACTAACTGTACTTTGCATACTAGAAAAGCTTTGACTAACACCAGAAACAAAAC 187 | TAGAAACAACTCCACGAATATTCATAAAGTTGTTTTTCCACATTACAGTAAAAGCCACAA 188 | CTGCAGCAGTTATTGCAGCAAGTACGATTCCAACTGGACCAAGTGCAGCAGTAAGCGATG 189 | CACCAAATGTTCGTGCAACACCCAAAACCCTACCTAACCCGCTAGTAATTGCAGCAATAG 190 | TATTTTGAAAAACGGAAAGCAACATAGGCCCAATTGATCTTAGAGAAGAAAATGAAGCTC 191 | TAATTCCAGTTCCCATTGTTTTAAAAATAGAAACTATTCTAGAGACAACGGTTCTAGCAA 192 | TCGTTTGCATTGAACTTCCTGCACTTAGAAGACTAGGTCCTATATTTCTAACTTGTGCGG 193 | TCGATTTTTCGACATTCGGACCTAATTTATTGAATTCTTCTTTGACAAGATTTAAACGAT 194 | TTCTCAAAGGATTTAATGAATTACTTAAAGCATTTATTCCGCTAATAGATTCAGTAGATT 195 | TAGTACTACTTTCTTCAATCTTAGTTAAAAGATTTGAAAGCTCCCTAGCAGTTGTCTGAT 196 | CTTGCTTTCCCAATAAAGAAATAGCATTTCTTGCCTTATTTATTGCAGTAGTTGATGCAT 197 | TACTTTTAGTTCCTAATTTATCAATAATTGCAACTGCCTCGCTAATAGCATTCTTTGCCG 198 | TTTCACCAGCAGGCCCCATTGTCTTGATATTATTAGACACCGCAAGTATACGAGCGCCAA 199 | AAGTTTGAAATTGAGTACCAATCCCAAAGAATTGAGTTAAACGAGCTGTAGCAGTACTGA 200 | TTCCACCAGCTAAACCTAAGAAAGGTTGAGCAATAGTATTAGAAATATTTATTGCAGTTT 201 | TAAATGCTAAAAATGCAGTAGTTGCACGTAATACTATTGGAATTAAAGCGGCTACTTTAG 202 | TGCTATTTTGCTGAATAAAGTTAGCTAATCCTACTAGAGCCTGTGTTATCTGCTGAACTA 203 | TTGCTCGAAATGTATTTAAAGCCGCGGCTTTATCGTTTTGCGCATACAGTAAGCCATAGG 204 | ATTGAACCACACTCTCAATTGCTGATTTCCATGCTGGAAAAATCTGCATAATAGCATCAC 205 | GTAAAATATTAAAAATATTTCTAATTGTGGCTACTGATCTTTTAATTTGATTGCCTAATG 206 | CATCAAGATTTTCTTTAATTTTTGAAGAACTCTTGTTAATCCAACCTGCAAGATCTACAC 207 | CAGTTGCCTTTTTTATCGTATTATCAATATCTTCAATGATATCCGCTAATCCGGCCTTGA 208 | CACGGTTCTTCATATTGGCTACTGCTGTACCAATACCCGCAGTAGCATTCAATGCTGCTT 209 | CATGAAAACCAGTTGCACCTTGATCGAGCTCAATAAAACGATCATTTAATTGTTTCATTG 210 | AAATCTGACCAGATTGTACTTTAGAATACAAATCTTGCATTGAACCAGATGCAATACCGA 211 | ATGATTTAGCTACTTTTTGCAAAGCATAAGGCATAGTTTCTTCAAGGGATCTCCATGACA 212 | TCATATCTACTTTTCCGGTAGAAAGCATTTGAGTATATTGGATTAAACCACGACTTGCGT 213 | CTTCAGCACTAGCCCCTGATGCAATAAAGGCATCATTCAACGCTAAAGTGGATTTAGCAG 214 | CTTTATTTACGTTATTTCCGACAGCCGGATAAAGTTTTTCAGTAGTAGAGGCAACTGTGT 215 | CTAATGCCGTCGGCAAGCCGTCAACACCTTTTTTCAGCATGTTAATTGCTGAGGTACTCT 216 | CTTTAATTGGTGACCCCATTATTTTCATAATTCTTGGAAAATTTTGTAGGGTATCAAACC 217 | GTGAGATCGCCCCATCTAAGGAGCTAGTCATTTCTCCTATTCCCTTAGAAATTAAAGAGA 218 | ATACTGCTGTACCTTTGGCCATCTCACCTAGACTCGCCGTAACTTGGTTTGTATCTTCAG 219 | ACATTCCTCGGATACGTGATCTCATTTGAGCAATAGCACTAGATATATTATCCCTAGCAC 220 | TAAACGTTGCAGTAACACCATAACTATCAGACATCAGTTACCTCCTTTCCGTTTTGCTAT 221 | TTGTTTCTTAGCTAATTGGCGTGCTCTTTCTGCTCTTTGTTGCAATTGAAGATATTTATT 222 | AACCTTCTTTTGAGACTTACCACTAATTGCCGATTTTTGCCGCTCTAAATCAATTCCAAT 223 | TTCTTTTAATGATCTATATTTGTAATATCCATCTTTATCAGTAATAGTTGTTAGTTGCTG 224 | AATAAATGGAATCTGATAAAAAAGACTCTCCGCTTCAATATATGCATAGCGGTGAGCCTT 225 | TAGTCTTTGTTTCCATTCAGTCAGAGTCATATTTCCTATTTCTTCACGAGTAAAAGTAGG 226 | ATAATACTGCATCATTGTAATAATGATTTCATCAAAAGTTATTCTTGATTTGCTAGTTTC 227 | TCCATTGGATTTTTGATCTTGTCGAATGCTTCTGTAATCAATGGATTGATAGTCTTCATC 228 | ATCCGTTTGGTAAGTGGTTCTGTCTTGAAAAAATCGATTAATACTTTATATAGATCTCCA 229 | CTATTCTTATCACGAATAACTGCTTTTTCGATGTCTTTTTCTTTGATGTTGTAGCGTTTT 230 | AATAGCTTAGTCAAAACATCAATTAAAGTAAAAGCATCTCCCAGATTTAGACCAGCCACA 231 | ATATTACTTAAAATTTCAGAATTTTCACCAATTTTATTGGGTACAAAACCAAAATCACGT 232 | AAGTTGTTTAAAGTTAGTTCCAAATTGTAGGTCTTACCGTTTAAATTAATCTTTTTCATT 233 | AATGCGTCTCACTTTCATTATTAGTATTAGAGCTAATTACGTTTGTTACAGTTGAATTAG 234 | TTGTTACTCGCAGGTTTATTTGTATCTTCAGGATAGATTTGACTGTATTCATCTAGACCC 235 | TTTTCTTCGGTAGCACTTGCGGTGGTATCACGGAAGAAGTTATTAGCAGTAGCCACATCT 236 | TCATCACTAACAGTTGCATACCCACTGACTAACTTACCATCAACATTCAATGTAGTTTTA 237 | ATTGTTTCGTTGTCTCCCACCTTATCAGGAGTTTCCCAACTTGAGAGATATCCAGAACCA 238 | TATTGAGCTAAATATTTACCTTTACTATCTGGTGATGGTTTATCAAAGTTGATTTCCCAG 239 | CAATCTACTAATTCACTATGCTTTGCAGCATAATGTAATAAACGATAAGTTGGAGAATCA 240 | GAAGCTAGAGCTTCAATATCAATAGTTGTAGTAATTTCGCCAGGATTGTTAACTGTACCA 241 | TCTTTAGTTTCAGTAGTAGCAACTTTCCCAGCTTCCTTAATAGTATGTGTAGTTTCTAAT 242 | GCCAAACGAGTAGCATTTTTCTTATCTCTATCTTTGGCCAATTTAAACATTAGAACTTTG 243 | CGAGTTCCACTTAATGCTTTTTGCATATTATTTTCCTTTCTATCTCAAATTAAAATCTAA 244 | AATTAATCGTCCATGCTGCAATTGAATACTATCTGCAGTTTCACCCAAAATATTTGATGA 245 | ACTGCCTGTCAATCCAAACAAATAATGATTAGTAGTAATGAATTTCGGGATTAATTGCTG 246 | AATTTGCTCCATAATAGTAGCCACTTCAGCTCGATAATTGTATTCAGCAAAAACATGAAT 247 | GGTTAAATTACTAGTCCCAGTGACAAAATCCTTAGCATAGATATCATTATTAATTTGATC 248 | TCCGACCATAATAAAAGGATATCCTTGGCTCTCGTCTGGTAAATAATCGAAAGTGGGATA 249 | CCCTGTCTTTTTGCAATATTCAATTACAGTTTTAAACAATTCTTCATATGGATTCATCTT 250 | GTTAAATCTCTTAAATCATCAATAAACTTCATCCGTTCTGACATAAATGGCACACCAACA 251 | TATTTTCTAGCTGTCCATAAATCTAGTTCCAAATTCAACATAAGCAGCATAATTTACTAA 252 | TCCACCACCATAAGTAACAGAACCAGTAAATCCATCTCTGCTAATTTGAATCTGTGCAGA 253 | TTGCTTTAAATGCCCCGTATCAACTGGAACAGCTTGTTGAGTAGCAGTCATTAAATTGGC 254 | TGTATGTTTCTTTACAACCTTTTTAACAGTATTCATATTTTGCTGATTAGCCAATGCTTC 255 | ATCTAAAGCATCTAATCCATCCCATGAAATACTATAATCTGCCATGATACTCAACTCCGA 256 | TCAATGTGGTTATTCTATCCGTTGTTCGTGCACTAGTTACCTCTAAGTTGCAAGGTAAAG 257 | AATTAGATTTAATGTAACCATTTCGAAAGTCCACTGGAATAGGCAATCTAACTACATAAG 258 | GTCTTTGCTTAGCATCACCAAAAATATTAATGGTTTGATCTAATCCCATCTGAGTGATGT 259 | TAGCTGGATAAGAACTAACTTTATTAGTTTGATGTTTTTCAATTAGCTCTACCATTTTGT 260 | CGTATCTCATATGGATTCACCCACATTCCTTTATTCATGCCCGTATGGTCTTTACGCCAC 261 | TGATTTATTTCATCTTCCCACTCATCAAAATCAGAGGAAGCAAAAGTTATAGACTCTCCA 262 | TCTTGTGAGTAGCTGGCCATACCTTCATTTTTAAAACGATTAAATTTCTTAACAGCTGCT 263 | CCACGAACTATAGCCAGCATTTCGGTTGGGATTACGCCAGTAATGGATAACCTTAATTTT 264 | AAAAAGTCAGTAGCATCTTCAAGATACAGCTTTAGAAGATCATCATGTTTAGTATCAGTC 265 | AATTGAAGAGCTGTTTTAAGTTTTTTGAGCTGATCATCTAATACATCATTCATTTTGATC 266 | TTCTTTCTTATCTTCCCCGCCTGGAGTAGGTTGCTTATTACCTTGCGGGGTTATGCTTTT 267 | TTTACTGATACAAAAATACCATCACGTTTATGCTTCTTAATAAATAAATCATGGTATAAA 268 | CGATTTTGATATAGATATCCATCGCCGTCAGTATGTTGACCTGGAGCGAATAAAAATACC 269 | GCATTTTCTTTCACAATTGAAATAACTGCTGGTTTTGCAACAACTAAGAAGTTAATCGCC 270 | TTAGCATCTTCAGTCGGCTTTGCACCATCAGTAAAATCGTACTTAGTCATGAATCTATTA 271 | GATTCATACACTTCAATCAATTGCACACCATCAATAGAAGTAATACGTGATTCAAGTGCA 272 | GTAGTTCCCACATTCTGATTAGTGATATTTCGAGTAAATTCTTTTGAACGCTCTAATGCA 273 | TCCATCACTTCACTAGACACATAACCTACTAAGTTTTGAGTACCATACTTACGTACTTTT 274 | CCAATACCTGTCTTCAATTGTGAGTAAGCATTTGTTTCGTCTAAAGTTTCCTCAGTCTTA 275 | TGGGTCTTAGCTAACAAAGTACCTTCAGTATCGGTACCATCAAGATTATCAAATGAAGTA 276 | GCGATTTTTGAAAAGCGATAAGAATCTAATTCTGGTTGAACGTGTTCAGTAATAAATACA 277 | TTAGAAATATTAGCCATTGCTAATTCATTATCTGTTTCATCTACATCTTGTCGATCAAGA 278 | TAGAATTCAACATCTCGGTCTTGTCCCATCGTATAAATAGTCTTTTCATCTGAAATTGTA 279 | CCTGAATTAAAGCCTTTTCCACGGGTATGATCTTTTAAACCACTAGTACTAATAGTCTTT 280 | AATGTAAAAGAACGTCCTCCATTGACCAAATCAACCTCTGGCGTTCCTAAAGCTGCAGTA 281 | AATAAACCTGCAGTAATCTTTTGATCTAATAAATTTCCATCTTTTGTTACATAGTTGAAA 282 | CCTCTAGTTTCTGCATCTGTTGGCATATTCTAAAATCCTTTCTATAAACCTAGAGCACGT 283 | TTCAAGTCTTCTTGCGGAGTTTTAAGCTTATTTCCTCCAATTTTAGGAGTTTGGCCTTGA 284 | AGAATTGAGTTTCTACCTTCCTCACGTGCTTTATTAACTAAATCAATAATTGCCTTGGCG 285 | TTAGAAACAGTTACCGTAGCATCATTGTTAACAACCATATTAAGAACATCGTCACCAATA 286 | TTCAGATCTGCCTTCTTAAATATCCCATTAACTTCTTTAATGTCATTTGAACGTTTAATT 287 | TGAGCATTTAAATCAGCATTCTGTTTTTTTAATGACTCAATTTCCTTAGTACGTTCATCT 288 | TCTTCAGTGATTTCCTTAGCGGATTTGCTACTCTTGCCCCTTAACTGCTCGATTTCTTTC 289 | ATCGCATCTTCGTATTTAGCTTTATAATCATTCTTTTGTGCTTGTTCTTTAGTGATTCGA 290 | TCAATATAAGGTTTTACAAGTGAATCGGCATCAATTTTTTTGAAATCATCTTTATTCTCT 291 | GAACTAGATGCTGCAACTTTATCTGGTTCAGTAGTAGAAACTGTTTCTTTCACATCATCT 292 | TTTGGCATAATTAAATGTCCTTTCTATAACTCACGTTTTAAAGTTTCGGGGAACTAACAC 293 | ATGGTGTTCTTTAATGCCTGCACACAAGGAAAAAGGCAAAATAAAAGACCATTGAACTTA 294 | ATCAATAGTCTTAAAACGTATTTTATTTCATTAACGGCAACCAATCTCTAACTTCTTTAA 295 | AGATCTTATATGCTTTTTTCATCATTGTATTTTCCTCTAGATATTGAAGGCCATCACTAG 296 | TTATATAAACTCTATCCATATTAGAAACTATAGTGCCTGTTTTAGTAGGAGTAAATTTCA 297 | GCCCTTTAATATAGCCATCATCCATCAACATCATTAAGGTTTTATAAAATTGATTATTGG 298 | ATATTTTAAATGTAGGAGCATCCAATATATCCAAATCGGGATTCTGTCCCTCTTCATAGC 299 | AATACTTCAAATAACTTAAAATTTTATAAGCTACAGTAAAATAGTCATTATGCGCCATTT 300 | ATTTAACCTTTCAAACCAGTTCAGCACTAGAATAATCAAACTTTCCTTTTATTAGTAAAT 301 | CAACAATTTTTTTCTTGTTAGCCTCTACGCTTTGAGTAGCTCTTCCTATTATTTCATGTT 302 | TTTTTCCATCATCAGTTAATAAATACTTATGACCATAAATATCATTAGTGCCTTGAAGTG 303 | CAACCCAAGATAGATTCTTTTCAATCTGTGGCGTGACAATTACCACTTTTAGTTTTTTAG 304 | CCATGATGAATCCTTGCTTTCTTAGTCTTCCAATGATTTAAATTCTGCTCTAATTCTTTA 305 | ATTTCATTATAGCTGAATTTCATGGCCTTAGCATTTTTTAGCAGATATTTTTGTGCTTCA 306 | ATTTCTAAATTTACAGTCTCTTCCTCGGAAAGCTCCCACATTGCTTTTTTAGCATTTTTA 307 | AGCTGTCTCGTATGATAAAGTTCTTCTTGAATTGCTGACCTTGTAGCCCAAGAAGATAAA 308 | GAAATATGTGTACTATCGAGAGTTGAAGCAGACGCACTTACACTACGAAGATGTCTATCA 309 | ACATCTTCACTCATATCCACTACTCCTCCACGACTAATAAATTTACGTAGTATTTTATTT 310 | AGGTCGTAAGAAGACACATATTTTGTCCCTGGTAAATGATAACTATTTTTATGAAATAAT 311 | TCAGATGCTGATTTCTTCTCATTATCGATTTTTTTAACATCATTGTCTTTATCGTCTACC 312 | CAATAAGCAGAAATAGCACATCTACAATTAGGATGAGTTTGTGCAGGGATGGTAGGCACC 313 | TTGTCATATTCATATACGCCACTCCCATGTCCGTTGTCCGCACTACCAATTGAAGAACAT 314 | AAAAGACAGGCTTTAGGTTCGTTTATCCATTTTACAAACCTATAACCTGCCTTTTGGATA 315 | CTTCCCCATTGAGCCATATAAAGTACTCTGGAAACTTCAGTATTAAGAAGCCTATCTGCA 316 | ACATATCTCCAATTATGGACAGTATCAGTCATTAAAGACTGCATTCTTTTGATATCAAAA 317 | TGAGCTAATCCAGTAGCAAGAACTTGTTCAAGATTAGCTCTTAACCCTACTTGATTAGCC 318 | CATAATCTACTTGACCAATTAGCTCCATTTACATCCGCAGTAACAACACCCTGAATAGCT 319 | GCATTATTAAGCATACTATCGTTTTTACCACTTATACCAAGGATACCTGATTGTCTTTTG 320 | GTTTCAGCAATGTATCTATCCACTAAAGTGTTTCCTGTAATAGATGCGGCATTCAAACTA 321 | GCTCTTGCAACTTCTAATGCAATGTTAGCTCTCAGATATTCTAGGCGGTTAATTCTCATA 322 | GTAGTGTTATAGACACGTAACTCCTGATTAACTGCTTCAGAATAGTCTTTATAAGTAACA 323 | TGATGACCTTCTTTTCGCATTTTAGCCGCTTGTTCAACTAATTTCTTAGCCTTACTTGAA 324 | TAAGCTTGAACATCCATTTGTGTCACTTGCTTTTTAGCTAATTCTAGGCCAACATTACTT 325 | CTTTCTGCCAAAGAAGATATCTCTGCTGCAATTTGCTGATTAATATTATTTAGCATCGTG 326 | TCATAATGCTTGTTCAATTGAGCAATAGCTTCTTGATCCTGTTCAATGTATTTTTCAATT 327 | GCTTCTTTTTCCATTGCTTGACGATCAAGCCAGTACTTACTACTCTTCTTCAGCATTATT 328 | ATCACGCTTTAGGATATCAATTGGCATACTCAACTGTTGCGTTTGCTTAATGGCATCTGC 329 | TTTTTCTTTGGCAATTTGTTTCATTTCTGCATCTGGTTCAATATCAGGGATCATTCCTAG 330 | TTGTGTCTTCTTAGAGACAATTCCTTCAGCATTCTTTGCATTATCAATTGCACTAGCCAT 331 | ATCTTCAGGAAGATTTCTAGTGAATTTAAAATCTAATTCACTCCATAGTTCTTGATCTTG 332 | CTTATTGTTAAACAATGTAGCAAGTACAATTCTATATAGCTGCATTAATGACTTATCAAA 333 | CTTGCGCTCTTTGCTATCTGCCTTATTTTTCATTGCAAATAGTTTATATTGAAGAGCAAC 334 | ACCACTAGAATTTCCAGCAAAATTTTTATCCTGGATATTAGGTACCATAGCCATCATAAA 335 | AATAAAATCTGTTAGATGCTGAATAAGATTTTCTTGCATTTGATCAGCATCTGGTTTGGC 336 | GATAAAACCTATTTGTGGATTTGTATCTGGATCAAGCTGGCTAACATATAATACACGATT 337 | GTTTTTAAAATCGAATTTAGGATTACCTTCATCATCTTCTGGAAGCTTAAAGCCAATCAT 338 | ATACATGTAGGCATTATCAAAGTATTCCACTTGATTAGCCTTCTGACTTATCACTTTATC 339 | TAATGCATTAATCAGCGTTTTAATAGAATCAAAAATTCCCTGACGTTCTTCATTTTCGAA 340 | GAACTCGACTGCAGGAACCAATCCATATGGATTTATTGCATAGCCGGCTGCATTAGTATC 341 | TTCTTCAATATCATAACCTTTAAATTTATAAAACTTGTCAGCATACTGAATAACTCCATA 342 | AGCATCTGTCCAATTGTTAGAATTATCAATCTGATAATGTACAAATGCTAAAGGCTGACG 343 | TTGTACTGTATCATCATAAATAATAAAAGCATGATTAGGGCTTGAATACATTAAGTGCGG 344 | CCTTGCATCTTCGCCTTGATAAATACTGGCAATAGAACGTCCAAAGATATCACACTGCTT 345 | ACTAATTTCATTAATGGTATCAAAAAAATTTTCTTGTCTATTCCAGCGGGCAATTTCATC 346 | AATTTTTGAGCTATCATTTAACAAAGCTAGTTTAGGCTCAATACCACAAAAGTATCCATT 347 | GTACACATCAACGACATATTTTGCTGAATTAACGACAATCCGATTATCTGCTCCAGTTTC 348 | CTTTTCTGGTGCGGTTAATATCTTATGTTTACCTAGATATAACTTCATATTCTCTCTATA 349 | TCTAGGTTTCAAAACTGTTTCGTTATATGCAATAAAACCTAATAATTCATTACTAGTTAA 350 | TTTTTCTCCCTTAGGAAAAATAAAGCTACTGTTTCCAGTAACTTTATCTCTTCCATAATT 351 | AATATCTTTCAAAATCTCACTCCCTACATGAATGATTTAAGTAGCGTCGCTTTCGTACTT 352 | CCATGACCATTCTCTTCCTCTACCGCATAACGAATAGCATCAATACAGTGGTTATATGCA 353 | TCGATCGGCCTATTTGTGTATTCATCGGTCTTCTTATCTTTCACGTATGTGTAATTCTGC 354 | AATTCTTCAATTGTCTTAACGCAACGATCATCAACCACCCATTTAAATTGCTGTAGAAAT 355 | TGAATTCCTTGAATTATTGAATCAGGACCTTTTAATGCTGGTCTAATTCGATAAATACCG 356 | TCTCTTTTCATTTCAGCAATTGATTTCTTTTCAGCACTATCAGCTGTTATTACTTCTTTT 357 | GAATAGCCCATATCCTTAATTACTTGTGCTAGCTGATTATTAAGCAAGCCTTTTTTAACA 358 | AATTCATCCATCACATAAAGTGTTTTATTCCTCATATCTAGCTTGATATGCATAAAAGCA 359 | CTTGGATCATTGATAAATCCATAATCAAGACCAAAATAATCATTTAACGCTAATAATTTA 360 | GGATCACGAGGATTAAGTCGCTTAGTTTCAAAGTAAGGAAAAACAAGTTTATCTAATGTT 361 | GCAAACTCACCTAATGTATAAATCTTGTAATAAGCTGGATTAGTATTTTTCAGTTCTTCA 362 | ATCGTTCTAATATTGTCTTCATCTAAAAATCTATTATCTTTGTAAGTTGATTGATGGATA 363 | GCTACTCTTGACCGATCATAATCTGCACTAGGATCAAACCAAGTCTGATAAGTCCAATTG 364 | AGTTTTGATACTGGGTTAAACATACAAAAAATTTGTCGCTGTTTATGCTTAGGTTCACGC 365 | AAACGCAAAGTAAGTTGAGTGTAGTCATTATGATTAAATTCACTTGCTTCTTCCATAACC 366 | ACATCAGACAGCCCCTTAATGGATTTAATCTTTTCTGGATCATCCATACCTTGAAATAAA 367 | AAAATCGCCCCATTTGGAAGGACGATTGTTTTATCAGATCTATTTACATGACAGTATTGA 368 | AGGATATTCCATCCACTTAAACACTCAGTTACGTCAGTAAAAATTGAATTTTTAACTGTT 369 | CTATCAACCTTACGTAGCCAAAGCACTTTTCTTGGAACATTCCAATGCTGCAATGATTTC 370 | AATACAACTTTTTGTACTACTCCATGACTTTTACCACTTGATGCACCGCCATACCACACT 371 | TCTGTTAAGTGCGAATAATCAAAAAGGTTGTTAAAGATCTGTTTATTAAATACTTTTGAG 372 | GGATGCGGAAAATTAAGTTGAATATTAACCATCATAATCACCAGGATCTATGTTTACTGT 373 | AATATCGGTTTGAGTTTCAACTTTATCGGTAAAAATCTTATAGTATTTACCAAGTAATTC 374 | TAAAGCCTTTAATTGGTCCTGATCTTCTGGAGTAACAGTCGTGACATTATCATATTCAAC 375 | TTTAGGCTTTTTCCCTTTATAAGAAATACGCTTAAAAACCTTTTCTCTAGGTTTTCCAAT 376 | TGCTAACTGGCTCAAATTATCCAGAACTCCATTAATATCAAGAATATCGTTAACTTTAGT 377 | TTGATGCATTGTTTCGCTAAGATATTCAGATACGTTATCATTTGCTATCAATCTTGATCC 378 | ATTAGCTCTCGCTTGATTATCACTTTTTACTTTATAAACAGCCTTATACGCTCTAGAAGC 379 | ATTGAAATCTTTTAAATATTCCTGACAAAACTTCTTCTGTTTTGGAGTTAGTTCTTTCAA 380 | ATTATCACCACCTTAATTTTTGCAAAATAAAAAGCCAGCTTATGCTGACTTAAAAATCAT 381 | AAATTTTCCAGACAAAACTTATATTGTTTATCGTAATCTTCAAATTCATCAATTATATTA 382 | ATACTAGATAATATCTCTCCATCTTTAGCTACTAAATTAATTAAAATATTTTGACCACCA 383 | ATCCCAACCCTTTCTTTAGTTTTTTTGTCGTCAATATTTTTTTCGATTTTCATTAGTTCT 384 | TTGAATATATGAATTTTATTATTTGAATCGCAATTAAGCAAATAATCTATCGCTTCTTCA 385 | CACTGAGGCTTTAATATCCAATTAGGATTTGAATAACTTGCGATCTCCTGAATATCAAAT 386 | TTATTAGTGCTCCTAAGAGCATAAGCATGTGTATTCATATCTTCATCAAAGCCGAAGATA 387 | TAAATTGTTGTGTCTGTATTTACTTTTTCGATATTATTTTTTAAAACATCTCGAATTATT 388 | AGACTAAATGTTTTAACTTCTTTCGATAAAATTGATCTAGATCTATCAAAAGCTAGTCTA 389 | ATAATTGAAAAATCTCCAGTTCCACACAATATGCTTTGAGTAAAAGGATAATATTCAAAT 390 | TTTTGAGTAAAGTAAGATGGAATAGTTTTTCCATTGTTGTTTTTTCTCGATGCTAAAGTA 391 | TCCATAGATAAAATAATACTATTAGGACTAATTTCAGCATTTAAAGCAGTCATAAAATCA 392 | CCTCAATAACAATTATAAAAAAATCAATTTCGAAATAATAGGATTCATTCTTTGAGATGA 393 | ATATAAATTTAAGCTAACTAAGTCCAACAATGGTAAATATTTTAGAACCACTGCTAAAAG 394 | TTTAATAATATTTCCATTAAAAAAGAACTGGCCTTTCGACTAACTCTTATATAATTATTT 395 | ATTCAATTTAAACTTTTCTGGATTATTTTTCATAATATCCAAAGCCGGCGGTTGAAGTAT 396 | ATACAAAATCGGAACATCATTTATATCTTCTATTTCTCCAAATTTAGTATATGATACATT 397 | CTTAGTTTTAACTATTACTTTAGATTGATATAATTCAAGAACATTAGGATCATTTTCATG 398 | TAAATAGCCTTTATATTGAGGAGGATTATTAAATAATTTCCTAACAATATTTAGTTTTTC 399 | TTCTGTGAGACCTAAAACATATTTTTTATATTCTTGCTGTTCTTTTTTCGTTTTATATCT 400 | TTTCCAAATTACGTCGCCAATATCAAGGCCAATCGAAATCCAAAAAATCAATGCTACAAA 401 | ATATATAAAAAGAAAACACATAGTTAATACTTTACCCACTGGCAATGTTTCAATATAAGA 402 | TCTTGACAACAAAACTATCGTACAAGTAATAGCTATTAGTAATCTTGTTTTCATGTTTGA 403 | CTTAAGAGCCTCCCATGCTGTCTTTACTAATTCTTCCAATTAATTACCTCCATTCGTTTC 404 | AAAAATAATTATAAAAAAGCCAAGTCTAATTAAAAGACTTGACTTTCTTGAAACGAACGG 405 | AATTTTAGTCCTAAGAAGGACTATGGGAAATACTGGAATCGAACCAAGCTTAATGTCTCA 406 | CCATTTAGAGATATTTCCCACTTGCTCTTTAACAAAATAATTTTGAAGGGACATTCTTAA 407 | AGAATGTGTGTATGCGCAGCTAACACAGAGCAATAGCCTCAAGTGTTAAATACATAATAT 408 | AGAGAGGAATTCTATTAATTCCCATAGCTCGTACAAGAATCGAACTTGTGTTACTGGTAT 409 | GAAAAACCAGTGTCTTAAACCACTTGACCAACGAGCCAGACTAGGAAAGCCGCATCTCTT 410 | ATGCTGCGCTTTTACGATTGTTGCAGTGTGTGCTTTACAGTTTAATGTAGGGGTTACATT 411 | AGTCACATGTTCAGTTCACCCACCTACCGCGAAAACAATCAACCTAATAACTGTAAAATT 412 | AAATGACTTATTTCGTGAATTTATCATTTAAATTACTTTACCAATTGTATGACAAGGTAA 413 | AGTTTTTGTTTTACGTCCGTTTTCCGCCGGACAATGAGCAGCGTGGGGATCGAACCCACT 414 | GTATATTGTGAAAGAAGATTCCTTCTTTCTAATTCCAAATTGTGTCATACTGCCCACGAC 415 | TATCGCGCAGTCCACTGAACGATAGCCAACTCTTAACTACAATACGCTCGCCGGGGCGTC 416 | GCATATCAGTCCTAGATCATAGCGTGACCTAGGTAAGTAGCAAATAGGAATCGAGCCTAA 417 | ATGCTACTGCACATCAATCATGAGACTATTTAACAGAAATTATAAATTTAACCAAGATGA 418 | TAAATTAATGATTGATGTGCTAGGAGATCTTTTCAGATCTAAAATTGTAAAAAATATGAA 419 | GGATTGGATCTTTTAACTTGAAAAACTCATATTACCGTGTCGGACTTTCTTACTTTCCGA 420 | CAATATCTATTTAAGCATAGATAGTTGTCAGTAAATTACCACCATATTCCCACAAAAATA 421 | CCCTATTTATATTTATGTAAATCAATAATAGGCTGGCAATTGTTCACTCTTTGCCAATAG 422 | TCAAAGCCATCAGCAAATTCAAACATTGCTTTCCTTCGTAAATAACTGAACTTGTTGTTT 423 | TCGCTATAAACTAAAGGTTTAACTTCATTCCATGGCCATTTACGTATAAATAGCCCTATC 424 | AAAATAATTTGAGAATTATGCGAGCAATGATGTATTGCATCACTAGTCGCTTTTACCATT 425 | GCCTCAGCATTCAATCCTCTAATAATCGCTTTTTCACTATTATTACTTCCATTACTATGA 426 | GCAGGAGCCGAACTTAATGTAGGAGACTTAAGATCTAATAATTCATGGCCACTCATCAAT 427 | AGCAATCGTTCCAAATCTTTGTTAAAGAATTTTGCTACTTCTTTTAGCGTCTCTTTTTCA 428 | TCCAATTCTGGAAATAAACTCACTGTATCAGCTCCTAGTTCTATTTTCTATCCTTCTAAG 429 | CCATAATCTCTATATGGTTCATCATGCGCTAAGTGATAGCGCCTGTATTCATTACTCATC 430 | TATGACATCCTTAAATTCATAATTCATTTCGTCTCTATCGTATTTTTCATAAATCATTAA 431 | AGCTGAAAACGCCAGTCCTGAATCCCCCATATCCCAGGGAGTAACTTGATATTTAATTTT 432 | TTCTATTGTCAATGGACCTTCTTTTTCAAGAAAATCATTGATGGCATCTTCTAAATATTT 433 | AGATGCACTTTCTTCAAATAACTTAACTTTCATCTTTACCACCTTGTCTTTCTTAAATCG 434 | TATCCTGTTCTAGCCACTTTTGTAGTCCAGAAGCATAATCCCTTATCCAGTCACTAATAA 435 | CTTTTCCGTCAGGCAAGTTTTCCCAATAATCCGGATCAAACTCTACAGCTATTTTGGTTT 436 | CTAAATCAGAACTCATGTGATAGCCCGTTCTATCATCCCAGCTTTCCAAAATAGAGTTCT 437 | TTAATTCTTGACGTTCTGATTTTGATAAATAGCAATCATCATTTTCATCGGATAATAGTA 438 | AATAATCATTAAGCTGTTCATCTAAAAGACTGTAATCATAACTCCATAATTCTTCTGCTG 439 | CTTTTACTTTAGAAACAAAGTAGCCTAAATCTTTACTGTATTCCGCAATATCTCGCAAAG 440 | TGTTTTGACTTGAATACCAGCAGAACACTGCAAATCCAAAATCACCACTAATGCTTAACG 441 | TTGAACTTGACCGATCGAAAAGGTAATTTATTCGATTTTTCCATTCATATTCAGGATCTT 442 | TACAAGCAAGATATTCTATTTTGTTATCAAAGCATTTGTATTCAAATACGTGGTTATTAA 443 | AGCTCATCTTAAAACTCCAACTTAACTCTCTTCCGTTTATTATCTGCTACCCATCTAGCA 444 | GTTACTTCTAACATTAATTTTTGAAAATCCTCATTTTGCCATGCTGACTTACCTAATAGT 445 | TGACTCCGTGCATATTCTTCATTCAAGGTGTCTGCAAGCTTCTTGCTGGTTAATTTGCTA 446 | TAGGCATCCCTTCGACTTTCTTTTTTCTCCTCCGCATAAAAATCATTAAAGGGATTGGCC 447 | AATGCTAGGTGCTTTCTTTTTGCTTGATAGCCGGCTGAAAATACCCCCCCGATTAAGTTA 448 | AGAGCATCTAGGTCTTCTAACGGTTTATGCTCATCAAGTTTCTTTACAATTGTTTTATCA 449 | ACATAGGCGGTCTTTAGATCACCTAGTACATCAGATGCTACTTTCATTTAATTTCTCTCC 450 | TTATATTTTCTGGCTATGAAGTTAATCCTTGCAGTTTCTAGCTGCATCTCTAAGCGTGAC 451 | ATAGCATATAGAACTCCTAAATTCAAAAGCGCAGTTATTGTACCTTTAGCTATTGCACGT 452 | TGCGCACCTTTTCTATATTGCTTTTGCAATTGGTTAATAGTCATTTTTGCTTGTTTTTTC 453 | TTCAACTTATCTCTTTTTCTCATTTTTAAATCCTTACACAAATCTCAAAAAGATCAAAAC 454 | TATATCAATGATTGTTAAGCAAGTTATTGGAACATAAGACTTTTGATCGTCTCTAATTAC 455 | ATCTATAAAAAATAAATTAATTAAAATAAAAACGCACCACCATTCGGCAGATAATTGTCC 456 | GCTCATTGTTTTTCTCCAAACTCTTTAAAGATTTTGTTTCTTTCTTCTACAGTAATTTGC 457 | GGTGTAGAAGATTGTTTAGGACGATATGTACTCCAATCAGTTGCCTTACGAACATATTTA 458 | TTTTGTTGATTAAAACGAGGCTTAATAGGTGTCATATCTAAATCATCATCGAATCGACCA 459 | TTAAACCATGTAGATCCATTAAGGGTATATTCTAGTGGGGTTTGTTTAAGCTTCAGAAAA 460 | CGAATGTATTTTTCTAGTTTTTCTTTCATAACCTCAAAAGTATTGTCCTTACTCTGCTTT 461 | CGCCAAGCTTTATAGTGCCTTAATGCAGCTTTCTTTCCTTGTTTTTTAGGATAGAGTGCC 462 | CATAACTGTTCGAATTCTGAGTCATAAATTTGGGTCGGAGATTCGGAAGAATCGACTATA 463 | TTATTTATACTATCCTTACCTAACCTATCCTTACCTAACCTATGCGGTCCATTTGACGTC 464 | CATTGGTTGTCCACTGGACGTCCATTATTTTCAATTGCTTTAACACCAGTCTTCTTTCCC 465 | GTATCTGCTCTTGCTTTAGCTTGCTGAATTTCAATGTCTGGTAGCATTTTGACAAGTAAG 466 | TCTCGATAGATAGAGTCAACTTTTCTATCGGACCGAATGCGGTTATTTTCTTTCCAATCA 467 | TTTATATAAGTAACCAGATCAGCATTTAAAACTGTTATAAGGCCTTTAGCTGCCAGAATT 468 | TTTAAATCATCTTCAGTAGAGCCGGTTTGTCGCATCACGTTATAGCCTTCAACCACTCCA 469 | TCATCGTCGGCATGTATGCCTAAATGAAAATATAAGGCCTGAGTTGACGGAGGCATTTTA 470 | AGAAAACGGGCGCTCTCTACTATTCTTTGTGAGAACATTCTACGTTGTGCCAATCATTTC 471 | ACCACCTAAAACAATTTATCTACTTCAGGATCTTTAGATTCTACTTTCTTATCTTCAAAT 472 | GGATCCTTTTTAACAGTTTGCCGAATTTTATCAATTGCAGCTTGTTTCTCCTTAGTTTCT 473 | TGTTTTACTGAATGATTTTGCTTAAACATTTCGGCAATTTGATGTACAGCCTGTGCATCT 474 | TGCGGTGTATGTGGTTCCTTCAGCCAGTCTTGTGCATCTTTAATGCCATCTTTCGCTTCT 475 | TGATAAAGATCGTATAAGTTAGCTTGTATGCCATTGTAATAGACTTTATAGTCCTCAAGT 476 | TCTTGTTTAGTTAAGATTTTAGGTTCGTAAATAGTCTTTTGATCTTGTGCATCATCGTCA 477 | TTATCTGCAGCGATCCCAAAGGCTCCACTAAGTGAATATCTTTTTGCATAACTGATAAGA 478 | CTGGCGGTTGCTTGTGCGTCCCAAGCTTTATTATTTTGAAATACCACCTTATTCGTTTTG 479 | AGCGTAAAGCCAGAAGAATCTACTAAAATTGTTTGTACTTCTACAGTATTATTTTCAGTT 480 | CTAATATCGAAAAAATAGCTAAATACGACATTGCCATCTTTATCTGTAACTTTTTTGATA 481 | CCATCCATTACTGCGGCATCAATGTCATTTAAATCAGCATAATTATAGTCATAGCTATAG 482 | GGTGTACCTTGCTTGGTTTTTCCAGAAACAGTAACTTTATGCGTTTTTTCAGGCTGTTTA 483 | ATATTAGCCTTCACTTGTGCATAATGCATTGCCCACTTAGCTCTGTCTTCTTCTTTTCCA 484 | TAGATTTCCATCTTAGTTCACCCTCTCATAGCTAATACCCCAGTCTCGAATGAAATTAGA 485 | TAGGGCTGTTAGCTGATCGGTAGTACCAGTAAGTTTAAGTGTGACGCTATGTACCTTATC 486 | AACTACTTCGCCTGTTTTGGCATCGACATACTTATCGCCATGTTGTTCAAGCGATTCTAT 487 | AGCTTTCTTACGATTTTCTTCTTGCTGTTTAGCCTGCTTAATTAAATATTCATGGTCATT 488 | ATCCATTTGAGTTAGTACGTCAGGTAGCGACTTATAATCAAGCATTTGAAGATATGGACT 489 | AGCAGTCATAGCAGGTTTAGTGTACTCACTAGCCTTAGTAGCAATAACTTGCTCAGCTTC 490 | TTTTCTAGCCTTTAGTCTTTCAGCAATTGCTTTAAATTGTTGTCTAGCTTCCTCCTCAAT 491 | TTTTCCCCAGCTAGCTGTCTTGTTGTCCCACTTTTCTTGGTAATCAAGCTCCTGTAAAGG 492 | TATATTGTATTCAACTGCAATCTTACCCAGTTGCAATAAATTCTGTTGATGTTTGTCTTT 493 | TTTAGCTTTATCATCAAAAGACTTAATTCCTTCACTGATGTGATCAACTGCTACCTTAAT 494 | TTTCAAATCAAGTGATTTAATTTGAGCAGTAAACTCATCAATAGGTTGACTAGCTTGTTG 495 | TGTAATTGATTTTCTTCGCTCATTTAAGGCTTTTCGAATGCGATTTAATTCTGCCCTAGT 496 | CTTTTTATCATAAGGATACGATTTAGAAGTTACTACATAGCTTTCCCATCCTTTAGCTAA 497 | ATCATCTACTTTAGCATTTAGTGACTCGTACCCAGGAAAGTCTATTTCAGCCTTTTTAAA 498 | ACTAATTGGGAAATTTTCATTTTCAAACGTAATCATGTCTTTTTTAGCTTCGTTAATCTT 499 | ATTCATGTTATAATTCCTTTGGTTTTGTATTTTTTGTAGTCGTTAAACTGTTGGCGCAGT 500 | TAACGGCTTTTTTGTTGTCATTAAATCTGTCTCAAATTGTTTTTGATATTCATCCGGTAT 501 | AAGCGGTTGTCTTGTTTCTCTAGTGAAGAAAGCATTACTGTTACTGATTAATTTTGTTTC 502 | TAATGTCATTTATCTCATCTCCCTAAATATTTGGAAAAATCGCTGTATACATCGTAAAAA 503 | TAAGTGCTACTAATGCTGTACACATGGTACCTAGCGTAAGTATTTCTGTTTCACGTACTG 504 | TGAAATCAGTACCCATGAATTCGTTGATCTTGTGGTTAATCCATTTACTCATAGTTTGTG 505 | ATCCTCCATAAACTTTTGTAAGACATCCTTGTCATATAAGATTTGGCCATCAATACTAAT 506 | TGGCTTAAAATCAAACTTTTTCAGCCAAATTCTAAATGTAGATGGGCTAACTCCTAAGAA 507 | GTTTGCTGATGCTGTCACGTTAAAATACTTCTGTTGAATAGCTCTTTCAATAACTCGTTC 508 | TGGGATTGCGATTTGCATGAAATCATCTCCTTTTTAATGAATCAAAATACTTTCCTTACT 509 | AGCTCTATCTACTCCTTTTACAGCAAGATAAGTTTCTTGCTGAAGATGGATAAAACTTAG 510 | AATATCTTGAATTTCTTCTGGTTTACCTTCTAGTGTCACCTTCATTTTTATCTCCTCTCA 511 | ATTCATCAAGACTAACGCCTAACGCATCAGCAATTTTGCACATAGTTTTGAATGTAGGCT 512 | CAGTCTTTAAATATCTATAATTTCTAATTGTTTGGTAATTTATTTTCGATTCATTTGATA 513 | ATTTACGTAAACTCCAATGCTTATTTTTTAAAATACGATCAACTGTTTGCCACGACATAT 514 | AGTGTTATCACTTTCCTATCTACTTTATATTGTGTTATATTTGATACATGATATTTTTTA 515 | GAAAGAAGAGACATAGTGGCAACTCAATATAAACAATCTATCTGCTTAAATGGTCATCAG 516 | ATTACTGATGAATTTAGTCCCAACGAAACCGTAACTGGCTATTGCGAACAATGTGGCGCT 517 | AAGCTTATTGATAGCTGTCCCCATTGCAAATATCCTATTGAAGGATTTTATTATCCAGAC 518 | GGAGTAGTATATTTACGAAGTCCAAATGATAAACTTCCAGTACCTAAATACTGTAAAAAA 519 | TGTGGCACACCATATCCTTGGACTAAAGATTCTTTAGATGCATTGAATGAAGTAATACAA 520 | TTGTCTAACCTATCTATTCAAGACAAAGAATCTTTACAGGCTTCAACTCCTGATCTTTTA 521 | GTTGACACACCTAGAACAAAAGTAGCTGTTTTAAAATGGAAAACAATAGGTAAATCCATA 522 | TTGAACTTAGCTCACGACATTATTGTTGAAGTAGCTAGTGAATCAATCACTAAAGCAATT 523 | TATGGAAACTAATCTATTTCCACACTGTTCACAGTAATTACCATCGTGAACGACATGATG 524 | ACAATTGCTGCATACAGCTAAATTCATCAACTTAAAGTATCTACTAGCAGCTTTTATAAA 525 | AATAAGCTGCTTTTCTTTTTGCTCTTTTTCCATTGCTTTTACCTACACTTCTACTTTTCT 526 | AAGCTTATTAACAGAATACTGTTGCCCCTTACCTGTTACCTTAGGTGTCTTGCTAATTGA 527 | TGTTGAACCGTTTGAATGGTTAATCGTTGTTTCTTTGATCTTGAACAAACCAAGATCCAT 528 | TGATTTCTGCGTTGGCATGTTCCAATCACTACCCTTACGATTGATCAGATATCCATGTTC 529 | TCGCATCCAAGCAAACAAACGATTAGCTCCAATATCTATTCCATTACCTCTTAAAATTTT 530 | GGCCAACTCGCCAATCAAAATTGTAGTGTGGCTAGTTGCTACTGAATCTGCAAACAATGC 531 | CTTAGGTTTCATTTCAGCAATTTCAATATCTTTTTGTTTAAGCTGATCCGCTGCTTGTTG 532 | AAGTAAGTCAGCTAACCCGGTCTTGTTACGGACTACATCGAATGCTTTTTCGTCAGTCAT 533 | ATATGCACCGTGTTTGCGAATTGCTGGAAGAACTTCTGAAGTAACCCATCGTTTGAACTT 534 | TTTAGCATTGGGAAGCTTACTTGATAAGATCAAACTGTACAATCCTGATTCGTTAATTAA 535 | TATTTGATCTCTCATCTGACCTGCGGTACTGATTTGGTACCTCAGCTTATCTTCCTCGTC 536 | AACGTGAGTATTTATATCTCTACTACCATTCTTATATCCAAGAATAGTTGCTACATCTTT 537 | ACCAACAAAATATGGTTCTTCATCAACTGTCAAAGTTCTAACTTGTTGATTTTCAAAATT 538 | AAATAATTGCAGTTTATTTTCCATTCAGTTTCTCCTCTCTTATTGTTCAAATTAATTTTG 539 | AACTTTTTCATCAAAAAAATATTCTGCAACTTTAGACCTATCCAAATCGAGTAAATCCAT 540 | TGCATGTGCAATTTCATTTGTTTTCCAACTAACTCGGTTATTTAGCTTATTAGAAAGACT 541 | ATGTTCAGATAAACCTAGTGCTTTTGCAAAATTATATTGAGTTCCATATTTTTCAATAAT 542 | GCGCCCATTCAATTTTCTAAAATCAAATGCCATACCTATCTCCTTTCTCTTTTGTTCAAA 543 | TTAATTTTGAACTTTACACAAACTATAATACACTCTTAAAAAACTCTGTCAACATTTTTC 544 | TTCAATTTTTTTGAACTAAAAATTGAACTAAGTTCAAGAACATCATATAATTAAAATATA 545 | AAATTATAAAAATATAAAGGATGTGAGATTAAGTGGAAAAGAATCAAAATGCAGATAAAG 546 | CAGAATTATACTCTGGAAATCGAGCTGCAGTTGGCGACCGTTTAAAAGAATACATGAAAG 547 | AACATAACCTAAAACAGGTAGATATTATTGAAAAAACTAAGCCTTTCTCAACTTCTAAGC 548 | TTAAGATTACAAAAACTGATTTAAGTCAATATGTAAATAACAAAACTGATCCTCGCTCCG 549 | ATAAGTTACATTTACTTGCAAAATCATTGGATGTTGATGAAGCATGGTTACTTGGCTTTG 550 | ATAAACACGCCCATAAAACTTCTCATCATAATTCCACTTCAAGGGATGAAAATGAAAGTC 551 | AACAAGCTGACTTAGACGATGGCGACACCATATTTACTTTTCAGGGGCGCCCTATTCCGC 552 | CAGAGGATTTAAAAATTATAAGAAGGTTGCTTAAAAACGATGATGAATAATTTAACAGTT 553 | ACTTATTTATTTAATTACGCAATGGCTCATGATATTCAATTTGAAGCTACTCACTTACTT 554 | CATCCTGGCACCCCTTCTTGTTGCAATACTACTAACCGAAAAATGGTCATTAATCTTAAT 555 | AATGACGAAGAAGGTTTACCGTTAGAAATAGCTCATGAAATTGGTCATATTTTTAATGGA 556 | GACAAAGGTAAATTTTATTATTGTGGTGATAGTCGTTCATCCCCTGTAGAAGTAAATGCA 557 | CATAAAACAGGTATTAAAATTCTTGCCAATTATTATTTTGAGGATATTCCTAAAGAAGAA 558 | TGGAATGTAGATAATTTTATGTATTATTACTGTATTCCACCCTCTTATAAAGATTGGACT 559 | ATTCAATATTTAAAATCGCTATAAGCAATAATTGTCCGCAATGACGTTAAACTAAAAATA 560 | TATAAGTATGGAGGATAAAATGAAAAAGAAACTTGTTATTGCAGTTACTAGTTTGGCTCT 561 | TTTAGGGATGGTGACTAGCGCTTGTTCGTCCAACAATAATTCAAGTAATAGTAGTAAATC 562 | ATCTTCATCAAAAGTAAATAAGCCTAAGAAAGTAGTTCCATCTGATAAAGCAAAAGATAG 563 | AACTTGGACTTATAAAAATAAAGTTTTCGATGCTGGAATTGAAACTTACAAATTTACTAA 564 | ATCTGAAATAAGAGATTCTGCTACAGAAGGTAAAAAATTATTAGTTCTTTATTGTGATGT 565 | AACTAATAATTCAAAGAAGGAACAAGATCCATCTAACGTATACATGGTAGTCCATGCTTA 566 | TCAAAAAAATGACACTTCTAATATTCAATTAGATCCTGGCTCAGCCGCATTAGATGATAA 567 | TGGCAATGATCCATTACAAACTTATTCTGATAATTTAAATAATAAATTGTTACCTGGTAA 568 | AACAACTAAGGCTTGTATTGCATATGAATTAAAAAATGACAATCCAGTAACTGTTAAATT 569 | CGAAAATTCAGATTTTGATACTATTGGTCAAGGAATCTATCAAATTAAATAAAAGCTGGT 570 | GATTGTTAATGCGATCATTCATTAGAAAACCATCATTAAAAAAATCTTTTTCTGCACGAA 571 | CTAAAGGCAGAGCCACTCGTTCAATGAAAAGAATGATTAACCCAACCTATGGTAAAAAAG 572 | GTACGGGAATGCTGACGAACCCTAAAAAGAGTATGTACAACAAAGTTTATTCACATACTT 573 | CAATTAGTGCCATGCCTAAAGGCAGCAAAAAGAAAATATCCCGTAGCCACTCAGATGATG 574 | GTTGTTGGGGATGTGGATGCTTAATTATTTTTATGATTATTGTATTTATCGTATGTTTGT 575 | TAGTTTAGGAGAAAAAATGGGATTTTTATCAAAAAGCAGAAAAGCTAGTGGGAAAAATTT 576 | ATATGATAGCGATGTTAAACCACGTTTAGTAGAAAAAGATGGTAATACTCACATTATTAT 577 | GATTAATAGCTTTAGTAAGTTTCTAGATCAGAACTTTGGCGTAGAAACAAAATATACAGA 578 | ACAAATTGGATCTGTAGTTGATTCGCTCCAAAACGATGGCTATGAAATTGTAGATATTAA 579 | ATTTGATTCATTAAAGGGTCAAGGTATCACCGGAACAGCTGAAGGATTTCACACTTTAAT 580 | TATGTATAAATAGATGCACAAATATAAGGTAAAACCAATGACTATACAAAATAAATACAA 581 | TGTTTCTGTAATATCATTTAAATTAACAGATTCTTCTGGAAAACAAATTTACACTCTTCC 582 | TTTTAACACAAATAAAAAGGAGTACAACAAGGTTGATTCTGAATGTGTAGTATATCTTCC 583 | GAATAACTTTATAGAAAATTTTCCAAATGCAGAAAAAGTCTTCTTCCATTTATCTACTTA 584 | TGAAAAAATCAGTTTTGATTCCGGTTTACTTTATCAAGAGACGGCTATTTCTGTTAAAGA 585 | GCTGCCAGCTAATGGATTACTTAAATTGAACTTTAATATGAATTTCCCTAGTGAATTTAA 586 | GCCTTATAAATTGGTTGGATACAGGTTTTCTGTTTCCGACAAAGATGCTATGCATCAAGT 587 | GACCGATGAAGATGAAAATGAGGGTGTTTTCTTCGATACAGTTATTCCTTCAGAGGTAAT 588 | TGACAATGGCTAAAATAACCACATTACCCACTGCAAAAGCATTTTATCCTAAAGACTTGA 589 | AGAAACCCTCAAGACTATCTGATAATAATGGTGGAGGTGAACCACCTATGGATAATAAAT 590 | ATGTAACTCATCAGGAATTAGAATTAAGCAACGAAAAACTCCTGCATCATATGGATAATA 591 | AATTCGCTGAGATGCAACAGCAGATGAATCAACAATTTAATGAAGTGGATAAACATTTCA 592 | ACGATCTTGAGTTAAAAGTCAATGATATTAAAAATACAGCAAATAATAACAAAGAAAAAA 593 | TAAACTGGTTATTATATACCGCTATTGGTGGAATTATTATTTCAGTAATCACTACAATCA 594 | TTTCTAATCTTTTAACAAAATAACTTAATCCATGAATTATAGCTATTTTTGAATGTCACA 595 | ACCAGAATAATTAGTATAGGAAAAAATGAAAAAGAAGTATGTTATTGCAGCAAGTTTATT 596 | AGTAGCACTGGGAATGTCTACTGCGGCTTGTTCATCAAATTCAAATAAGTCAAATTCAAC 597 | TGTTCAATCATCTAAAAGTGTGCATAAAAAGAAGTCAACCAAATTACCAAAGCCTGATTA 598 | TGCAACAGCTGAAGATGCCGAAGCAGCATTAAATGCAGGAAAAGTTCTTGAAGGTAAAAC 599 | TGTTCGTTTTAGAGTAAACGATTTAAGACCTAAAAGTGCTTTTGGATATAATTTAGAAAC 600 | TGGCGAGCATTTAAACTTTGTTAGTCCACATAATCCAAAAGTAAAAGTTGGCGATGTAGT 601 | TATTGTTAAAGTGAAAGAAGTAACGTCTACCCTAGGATCATTTGTTATTAAGTATTCAAG 602 | TCTGCGTAAAGTACCGTTAAATTCTTTATCTAAGGCAGAAAAAGAAAAATTAATTGGAAA 603 | GACTGATACAGAAAATAAATCAAACACTGAGACTAATTCTTCCAGTCAGCAATCTTCTTC 604 | CCAGCAAACTACTGCACAATCTTCTTCATCAACTGTTCAAAATAGTAACACAGCTAAGCA 605 | ACAATCTAAGCCTAAATCACAAGGCGAGATTAACAAGGAACTTGGCCACGATCCAAAAGG 606 | TGCCCCACTCTTACCAGGACAAGATCATGCTGCTGGCGCAAACGTAAATGGTGATCCTGA 607 | TCCTTGGGTACAGGGACAAATTGATTGGGCTATTCGTGAAGGTTACATGAATCCTGACGG 608 | AACGGACACTGAAAAAGGTAAACAATTATTACAACAGGGTTCTGATGATGAAGACGCAGA 609 | TTCCAATAATTCTAGTTACGATACAAACGATGATGATTCAAGTTATGATACGGACTATTA 610 | AAAAATAAACAAAAATCCCACTGATGCGCCAACATCAGTGGGATAAGGAAATGAGCTACG 611 | CCAATAGCTCAAATTAATGTTTTAAAATTAACAAAGCCATAAAAATATGAACTCTGCCCT 612 | CTTATTGTAGCAGAGTTCATATTTAGGCTACAATAGGAGGTTTTTTATTATGCCAAAAAG 613 | AAAAAATACAAGTATTAAAGATTATAAATTAAAATCTGGTAAGAAACGGTATATGTTTCA 614 | GATTTATTTAGGTACTAACAGCAATGGTAAGCCTATCATTACCCGCAGGCGTGGTTTTAA 615 | ATCTTATGCTGAAGCAGAAGCTGTATTCAATAAAATGTCACAAATTAAACCAGATAATTT 616 | TGTAAAACAAAAACAAATTAAAGTAGCTGAATTACGTGAACTGTGGTTTGAAAATTATAA 617 | GACCCAAGTAAAAGAATCCACTGCAAATAAAAATAAACAAGTCTTTGATAACCATGTTAT 618 | TCCCGCTTTTGGTAATCAGTATATTGACAAGATTACAGTGGCTGAATTACAAAAGTGGGC 619 | TGATAGAAAAGCAAAACAGATTGTTAAATATAGAGATGCTATTAATGAATTCAATGCCCT 620 | TTTTGAATACGGTATTCGCTTAAACTATGTTTCTGAAAATCCGTTAAAACGGATCATTAT 621 | TCCTAAGAAAACATCTCGACCACGTAGAGACACTGAACATAATGTTTATACCCAAGATGA 622 | ACTAAATCAATTCCTTGAAGTTGCAAAAGATTATGGATTGGTCCCGTATACTTACTTCAA 623 | GCTACTTTCTGCTACTGGTCTTAGAAAATCTGAAGCTCTCGCGTTAACATGGTCAGATAT 624 | TAACCTAACTGCCGGTACTTTATCAGTAAATAAAACCTTGGCATATGGTTTAAATGGTAA 625 | AACGATTATTCAGCCACCTAAATCACCAAAATCTAAACGAATTTTGCCGATCTCTGACGG 626 | TTTGAAAGAAGTTTTGATAGATTACAAGCGAAAGCAGAAAATCATCTCTAAAGAGCTATT 627 | TCACACTAATAAGGGAACTTATTTAAGAATAAGCAAACCGGATCAATGGTTAAAATCAAT 628 | TTACGCTAAAGACCACGAAGAAAAAGCTGAATATGCAAAAAAATATAACTTAAAAGAGCC 629 | ACAACCGGATTTACGTCATATTACAGTTCACGGTTTTAGACATACTTTTGCAACACTACT 630 | TATTGCGGAAACTAATGTAAAACCTAAAACAGTTCAGATGTTACTTGGTCATGAAAACAT 631 | TCAAATGACTTTAGACATTTATACTCATATTAACAAGAAAAATACAGAAGATGCAGTCAA 632 | TGCATTAAAGCAATTAAATATATAAACAAAAAAACACCTTTTTTAGCCATTTCTTAGCCA 633 | AAAAGGTGTTTTTATTACTTAAAACATTGCTATATCAACATTCTTTTATTCAAATAAGGA 634 | GAGTACAGGATTTGAACCTGCGCGCCGGTATAAGCCGGTTCGCCGGATTTCGAGTCCGGT 635 | GCATTACCACTCTGCCAACTCTCC 636 | -------------------------------------------------------------------------------- /expected_output_of_test/phages_coords: -------------------------------------------------------------------------------- 1 | NC_005362.1 1 327710 367791 2 | NC_005362.1 2 1292553 1330556 3 | --------------------------------------------------------------------------------