├── .gitignore ├── docs ├── FEY_2_chromo_alitv.jpg └── PRYMETIME_pipeline_description_2.jpg ├── docker └── install-karyoploteR.R ├── PRYMETIME ├── sorter2.sh ├── nucmer.sh ├── nucmer_long.sh ├── illumina_merge.sh ├── eng_sig_cmap.sh ├── flye.sh ├── medaka.sh ├── bowtie2.sh ├── racon_long.sh ├── racon.sh ├── eng_sig_alitv.sh ├── eng_sig_cmap.R ├── split.sh ├── pilon.sh ├── pilon_long.sh ├── filter_contigs.sh ├── unicycler_short.sh ├── alitv.yml ├── sorter2.py ├── blastn_parse.pl ├── racon_merge.py ├── nucmer4.py ├── nucmer_long.py ├── filter_contigs.py ├── unicycler_long.sh ├── unicycler.sh ├── CEN.fasta ├── eng_sig_genome_3.sh ├── PRYMETIME.sh ├── bp_search2gff.pl └── TELO_R.fasta ├── LICENSE ├── pilon ├── README.md └── Dockerfile /.gitignore: -------------------------------------------------------------------------------- 1 | # emacs backup files 2 | *~ 3 | -------------------------------------------------------------------------------- /docs/FEY_2_chromo_alitv.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emyounglab/prymetime/HEAD/docs/FEY_2_chromo_alitv.jpg -------------------------------------------------------------------------------- /docs/PRYMETIME_pipeline_description_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emyounglab/prymetime/HEAD/docs/PRYMETIME_pipeline_description_2.jpg -------------------------------------------------------------------------------- /docker/install-karyoploteR.R: -------------------------------------------------------------------------------- 1 | if (!requireNamespace("BiocManager", quietly = TRUE)) 2 | install.packages("BiocManager") 3 | 4 | BiocManager::install("karyoploteR") 5 | -------------------------------------------------------------------------------- /PRYMETIME/sorter2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -N 1 3 | #SBATCH -n 16 4 | 5 | EXECDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd)" 6 | cp "${EXECDIR}/sorter2.py" "$1" 7 | cd "$1" 8 | python3 sorter2.py 9 | -------------------------------------------------------------------------------- /PRYMETIME/nucmer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -N 1 3 | #SBATCH -n 16 4 | #SBATCH -o nucmer_%j.out 5 | #SBATCH -e nucmer_%j.err 6 | #SBATCH -J nucmer 7 | 8 | EXECDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd)" 9 | cp "${EXECDIR}/nucmer4.py" "$1" 10 | cd "$1" 11 | python3 nucmer4.py 12 | -------------------------------------------------------------------------------- /PRYMETIME/nucmer_long.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -N 1 3 | #SBATCH -n 16 4 | #SBATCH -o nucmer_%j.out 5 | #SBATCH -e nucmer_%j.err 6 | #SBATCH -J nucmer 7 | 8 | EXECDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd)" 9 | cp "${EXECDIR}/nucmer_long.py" "$1" 10 | cd "$1" 11 | python3 nucmer_long.py 12 | -------------------------------------------------------------------------------- /PRYMETIME/illumina_merge.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -N 1 3 | #SBATCH -n 16 4 | #SBATCH --mem=160G 5 | #SBATCH -o illumina_merge_%j.out 6 | #SBATCH -e illumina_merge_%j.err 7 | #SBATCH -J illumina_merge 8 | 9 | EXECDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd)" 10 | python "${EXECDIR}/racon_merge.py" "$1" "$2" > "$3" 11 | -------------------------------------------------------------------------------- /PRYMETIME/eng_sig_cmap.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -J eng_sig_figure 3 | 4 | EXECDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd)" 5 | PREFIX=$(basename "$1") 6 | 7 | cp "$EXECDIR"/eng_sig_cmap.R "$1" 8 | cd "$1" 9 | 10 | cp "$PREFIX"_final.bed genome.bed 11 | cp "$PREFIX"_cmap.txt cmap.txt 12 | Rscript eng_sig_cmap.R "$1" 13 | -------------------------------------------------------------------------------- /PRYMETIME/flye.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -N 1 3 | #SBATCH -n 16 4 | 5 | 6 | if [[ "$LONG_READ_TYPE" == "pacbio" ]]; then 7 | 8 | echo "Long reads identified as pacbio hifi" 9 | flye --threads ${N_THREADS} --min-overlap 5000 --pacbio-hifi "$1" --meta -o "$2" 10 | 11 | else 12 | 13 | echo "Long reads identified as nanopore" 14 | flye --threads ${N_THREADS} --min-overlap 5000 --nano-raw "$1" --meta -o "$2" 15 | 16 | fi 17 | -------------------------------------------------------------------------------- /PRYMETIME/medaka.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -N 1 3 | #SBATCH -n 16 4 | #SBATCH --mem-per-cpu=10000MB 5 | #SBATCH -o medaka_%j.out 6 | #SBATCH -e medaka_%j.err 7 | #SBATCH -J medaka 8 | 9 | # source $HOME/medaka/bin/activate 10 | #medaka_consensus -i ~/nanopore_6/fastq/demulti/BC08.fastq -d ~/nanopore_6/FEY_48/scaffolds.fasta -o ~/nanopore_6/FEY_48/FEY_48_Flye_All_med 11 | medaka_consensus -t ${N_THREADS} -i "$1" -d "$2" -o "$3" 12 | -------------------------------------------------------------------------------- /PRYMETIME/bowtie2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | log() { 4 | echo "[$( date '+%Y-%m-%d %H:%M:%S' ) $( basename ${BASH_SOURCE[0]} )]: $1" 5 | } 6 | 7 | cd "$4" 8 | 9 | log "Building index" 10 | bowtie2-build --threads ${N_THREADS} "$1" medaka-idx 11 | log "Done" 12 | 13 | log "Mapping reads" 14 | bowtie2 -x medaka-idx --threads ${N_THREADS} --no-unal -U "$2" -S "$3" 15 | log "Done" 16 | 17 | log "Removing index" 18 | rm medaka-idx*.bt2 19 | -------------------------------------------------------------------------------- /PRYMETIME/racon_long.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -N 1 3 | #SBATCH -n 16 4 | #SBATCH --mem=160G 5 | #SBATCH -o racon_%j.out 6 | #SBATCH -e racon_%j.err 7 | #SBATCH -J racon 8 | 9 | #racon ~/illumina_2/FEY48_ill_com.fastq ~/nanopore_6/FEY_48/FEY_48_Flye_All_med.sam ~/nanopore_6/FEY_48/FEY_48_Flye_All_med/consensus.fasta > ~/nanopore_6/FEY_48/FEY_48_Flye_All_med_rac.fasta 10 | racon --threads ${N_THREADS} "$1" > "$2" 11 | 12 | bwa index "$2" 13 | -------------------------------------------------------------------------------- /PRYMETIME/racon.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -N 1 3 | #SBATCH -n 16 4 | #SBATCH --mem=160G 5 | #SBATCH -o racon_%j.out 6 | #SBATCH -e racon_%j.err 7 | #SBATCH -J racon 8 | 9 | #racon ~/illumina_2/FEY48_ill_com.fastq ~/nanopore_6/FEY_48/FEY_48_Flye_All_med.sam ~/nanopore_6/FEY_48/FEY_48_Flye_All_med/consensus.fasta > ~/nanopore_6/FEY_48/FEY_48_Flye_All_med_rac.fasta 10 | racon --threads ${N_THREADS} "$1" "$2" "$3" > "$4" 11 | 12 | bwa index "$4" 13 | -------------------------------------------------------------------------------- /PRYMETIME/eng_sig_alitv.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -N 1 3 | #SBATCH -n 16 4 | #SBATCH -J eng_sig_alitv 5 | 6 | EXECDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd)" 7 | PREFIX=$(basename "$1") 8 | 9 | cp "$EXECDIR"/alitv.yml "$1" 10 | cp "$2" "$1" 11 | cd "$1" 12 | cp "$2" reference.fasta 13 | cp "$PREFIX"_final.fasta prymetime_assembly.fasta 14 | 15 | perl ~/AliTV-perl-interface-1.0.6/bin/alitv.pl --project "$PREFIX" alitv.yml 16 | -------------------------------------------------------------------------------- /PRYMETIME/eng_sig_cmap.R: -------------------------------------------------------------------------------- 1 | args<-commandArgs(TRUE) 2 | 3 | name <- args[1] 4 | 5 | title <- paste(name," annotated genome",sep = "") 6 | 7 | library(chromoMap) 8 | 9 | library(htmltools) 10 | 11 | map <- chromoMap('genome.bed', 'cmap.txt', data_based_color_map = T, data_type = "categorical",legend = T, lg_x = 100, lg_y = 300, left_margin = 55, title = title) 12 | 13 | save_html(map, 'genome_annotations.html', background = "white", libdir = "lib") 14 | 15 | while (!is.null(dev.list())) dev.off() 16 | -------------------------------------------------------------------------------- /PRYMETIME/split.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -N 1 3 | #SBATCH -n 16 4 | #SBATCH -o split_%j.out 5 | #SBATCH -e split_%j.err 6 | #SBATCH -J split 7 | 8 | #fail if there's a typo in variable names 9 | set -u 10 | #fail if any command fails 11 | set -e 12 | 13 | # arg1: output directory 14 | # arg2: circular contigs file 15 | 16 | if [[ -s "$2" ]]; then 17 | 18 | # if there are cir_rep_contigs 19 | cd "$1" 20 | mkdir -p unicycler 21 | seqkit seq -m 5500 cir_rep_contigs.fasta > cir_rep_contigs_trimmed.fasta 22 | awk '/^>/{s="unicycler/"++d".fasta"} {print > s}' cir_rep_contigs_trimmed.fasta 23 | 24 | else 25 | 26 | echo "WARNING: no circular contigs found; continuing with only linear contigs" >&2 27 | 28 | fi 29 | -------------------------------------------------------------------------------- /PRYMETIME/pilon.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -N 1 3 | #SBATCH -n 16 4 | #SBATCH --mem=160G 5 | #SBATCH -o pilon_%j.out 6 | #SBATCH -e pilon_%j.err 7 | #SBATCH -J pilon 8 | 9 | 10 | # map illumina reads for pilon 11 | #bwa mem -t 14 ~/nanopore_6/FEY_48/FEY_48_Flye_All_med_rac.fasta ~/illumina_2/FEY48_ill_com.fastq | samtools view - -Sb | samtools sort - -@14 -o ~/nanopore_6/FEY_48/FEY_48_Flye_All_med_rac_pil.bam 12 | bwa mem -t ${N_THREADS} "$1" "$2" | samtools view - -Sb | samtools sort - -@ ${N_THREADS} -o "$3" 13 | #samtools index ~/nanopore_6/FEY_48/FEY_48_Flye_All_med_rac_pil.bam 14 | samtools index -@ ${N_THREADS} "$3" 15 | 16 | # pilon polish 17 | #pilon -Xmx160G --genome ~/nanopore_6/FEY_48/FEY_48_Flye_All_med_rac.fasta --bam ~/nanopore_6/FEY_48/FEY_48_Flye_All_med_rac_pil.bam --output ~/nanopore_6/FEY_48/FEY_48_Flye_All_med_rac_pil 18 | #--threads listed as experimental for pilon 19 | pilon -Xmx160G --genome "$1" --bam "$3" --output "$4" 20 | -------------------------------------------------------------------------------- /PRYMETIME/pilon_long.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -N 1 3 | #SBATCH -n 16 4 | #SBATCH --mem=160G 5 | #SBATCH -o pilon_%j.out 6 | #SBATCH -e pilon_%j.err 7 | #SBATCH -J pilon 8 | 9 | 10 | # map illumina reads for pilon 11 | #bwa mem -t 14 ~/nanopore_6/FEY_48/FEY_48_Flye_All_med_rac.fasta ~/illumina_2/FEY48_ill_com.fastq | samtools view - -Sb | samtools sort - -@14 -o ~/nanopore_6/FEY_48/FEY_48_Flye_All_med_rac_pil.bam 12 | #bwa mem -t ${N_THREADS} "$1" "$2" | samtools view - -Sb | samtools sort - -@ ${N_THREADS} -o "$3" 13 | #samtools index ~/nanopore_6/FEY_48/FEY_48_Flye_All_med_rac_pil.bam 14 | samtools index -@ ${N_THREADS} "$2" 15 | 16 | # pilon polish 17 | #pilon -Xmx160G --genome ~/nanopore_6/FEY_48/FEY_48_Flye_All_med_rac.fasta --bam ~/nanopore_6/FEY_48/FEY_48_Flye_All_med_rac_pil.bam --output ~/nanopore_6/FEY_48/FEY_48_Flye_All_med_rac_pil 18 | #--threads listed as experimental for pilon 19 | pilon -Xmx160G --genome "$1" --bam "$2" --output "$3" 20 | -------------------------------------------------------------------------------- /PRYMETIME/filter_contigs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --mem-per-cpu=1000MB 3 | 4 | #fail if there's a typo in variable names 5 | set -u 6 | #fail if any command fails 7 | set -e 8 | 9 | MIN_COVERAGE_DEPTH=50 10 | 11 | log() { 12 | echo "[$( date '+%Y-%m-%d %H:%M:%S' ) $( basename ${BASH_SOURCE[0]} )]: $1" 13 | } 14 | 15 | EXECDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd)" 16 | cd "$4" 17 | 18 | log "Building index" 19 | bowtie2-build --threads ${N_THREADS} "$1" flye-idx 20 | echo "Done" 21 | 22 | log "Mapping reads" 23 | bowtie2 -x flye-idx --threads ${N_THREADS} --no-unal -1 "$2" -2 "$3" -S - \ 24 | | samtools view --threads ${N_THREADS} -b - \ 25 | | samtools sort --threads ${N_THREADS} -m 5G - -o mapping_result_sorted.bam 26 | log "Done" 27 | 28 | log "Building index" 29 | samtools index -@ ${N_THREADS} mapping_result_sorted.bam 30 | log "Done" 31 | log "Filtering contigs" 32 | samtools mpileup mapping_result_sorted.bam | python3 "${EXECDIR}/filter_contigs.py" accept.fasta reject.fasta ${MIN_COVERAGE_DEPTH} 33 | log "Done" 34 | 35 | rm -f flye-idx*.bt2 36 | -------------------------------------------------------------------------------- /PRYMETIME/unicycler_short.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -N 1 3 | #SBATCH -n 16 4 | #SBATCH -p long 5 | #SBATCH --mem-per-cpu=10000MB 6 | #SBATCH -t 7-00:00:00 7 | #SBATCH -o unicycler_%j.out 8 | #SBATCH -e unicycler_%j.err 9 | #SBATCH -J unicycler 10 | 11 | #fail if there's a typo in variable names 12 | set -u 13 | #fail if any command fails 14 | set -e 15 | 16 | PREFIX=$(basename "$3") 17 | 18 | cd "$3" 19 | mkdir -p unicycler 20 | 21 | cd unicycler 22 | 23 | echo "WARNING: only short contigs found, performing Unicycler only" 24 | 25 | unicycler --threads ${N_THREADS} -1 "$1" -2 "$2" -o "$PREFIX"_unicycler 26 | cat *_unicycler/assembly.fasta > ../unicycler_contigs.fasta 27 | 28 | cd ../ 29 | 30 | # seqkit has threads on by default 31 | seqkit seq unicycler_contigs.fasta -m 1000 > unicycler_contigs_filtered.fasta 32 | seqkit rename unicycler_contigs_filtered.fasta | seqkit sort --by-length --reverse \ 33 | | awk '/^>/ {if (/circular=true/) \ 34 | {printf(">scaffold_%d_circ\n", ++i)} \ 35 | else {printf(">scaffold_%d\n", ++i)} next} \ 36 | { print }' unicycler_contigs_filtered.fasta > "$PREFIX"_final.fasta 37 | 38 | -------------------------------------------------------------------------------- /PRYMETIME/alitv.yml: -------------------------------------------------------------------------------- 1 | --- 2 | genomes: 3 | - 4 | name: Reference 5 | sequence_files: 6 | - reference.fasta 7 | feature_files: 8 | eng_sig: 9 | - eng_sig.tsv 10 | telomere: 11 | - telomere.tsv 12 | centromere: 13 | - centromere.tsv 14 | mitochondrion: 15 | - mitochondrion.tsv 16 | - 17 | name: Prymetime 18 | sequence_files: 19 | - prymetime_assembly.fasta 20 | feature_files: 21 | eng_sig: 22 | - eng_sig.tsv 23 | telomere: 24 | - telomere.tsv 25 | centromere: 26 | - centromere.tsv 27 | mitochondrion: 28 | - mitochondrion.tsv 29 | features: 30 | eng_sig: 31 | color: "#FF0000" 32 | form: rect 33 | height: 30 34 | visible: 1 35 | telomere: 36 | color: "#0000FF" 37 | form: rect 38 | height: 30 39 | visible: 1 40 | centromere: 41 | color: "#008000" 42 | form: rect 43 | height: 30 44 | visible: 1 45 | mitochondrion: 46 | color: "#FFFF00" 47 | form: rect 48 | height: 30 49 | visible: 1 50 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Worcester Polytechnic Institute 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /PRYMETIME/sorter2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Title: Sorting Circular and Linear Contigs 4 | Created on Tue Aug 13 2019 5 | 6 | @author: Eric 7 | @email: ericyoung7@gmail.com 8 | """ 9 | import glob, os 10 | import pandas as pd 11 | from Bio import SeqIO 12 | 13 | ### Make a dictionary of circular or linear from Flye output info file ### 14 | ########################################################################## 15 | df = pd.read_csv("assembly_info.txt",sep='\t') 16 | 17 | circ_D = {} 18 | 19 | for x in range(0, len(df.index)): 20 | circ_D[df.loc[x,"#seq_name"]] = df.loc[x,"circ."] 21 | 22 | ### Make a fasta file of only the circular contigs ### 23 | ###################################################### 24 | fasta_sequences = SeqIO.parse(open("assembly.fasta"),'fasta') 25 | 26 | cir_seqs = [x for x in fasta_sequences if circ_D[x.id] == "Y"] 27 | 28 | SeqIO.write(cir_seqs , "cir_contigs.fasta", "fasta") 29 | 30 | ### Make a fasta file of only the linear contigs ### 31 | #################################################### 32 | fasta_sequences = SeqIO.parse(open("assembly.fasta"),'fasta') 33 | 34 | lin_seqs = [x for x in fasta_sequences if circ_D[x.id] == 'N'] 35 | 36 | SeqIO.write(lin_seqs , "lin_contigs.fasta", "fasta") 37 | -------------------------------------------------------------------------------- /PRYMETIME/blastn_parse.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use strict; 4 | use warnings; 5 | use Bio::SearchIO; 6 | 7 | #print usage statement if blast output filename is not provided on teh command line 8 | my $usage = "\nUSAGE: $0 blast_file\n\n"; 9 | my $file = shift or die ($usage); 10 | 11 | #Import Blast output file as a BioPerl object 12 | my $SearchIO_obj = new Bio::SearchIO(-format => 'blast',-file => $file); 13 | 14 | #Print header line of the output 15 | print "Query\tHit\tQuery_Start\tQuery_End\tHit_Start\tHit_End\tStrand\tLength\tIdentity\tEvalue\n"; 16 | 17 | #loop through the blast file, going through each query ("Result"), each hit within each query, and each high-scoring pair (HSP) within each hit. 18 | #Extraction and print key information including hit location, length, identity, and e-value. 19 | while( my $result_obj = $SearchIO_obj->next_result ) { 20 | my $query_name = $result_obj->query_name; 21 | my $query_desc = $result_obj->query_description; 22 | while( my $hit_obj = $result_obj->next_hit ) { 23 | my $hit_name = $hit_obj->name; 24 | my $hit_desc = $hit_obj->description; 25 | while (my $hsp_obj = $hit_obj->next_hsp){ 26 | my $evalue = $hsp_obj->evalue; 27 | my $id = $hsp_obj->percent_identity; 28 | my $length = $hsp_obj->length('total'); 29 | my $query_start = $hsp_obj->start('query'); 30 | my $query_end = $hsp_obj->end('query'); 31 | my $hit_start = $hsp_obj->start('hit'); 32 | my $hit_end = $hsp_obj->end('hit'); 33 | my $strand = $hsp_obj->strand('hit'); 34 | print "$query_name $query_desc\t$hit_name $hit_desc\t$query_start\t$query_end\t$hit_start\t$hit_end\t$strand\t$length\t$id\t$evalue\n" 35 | } 36 | } 37 | } 38 | 39 | 40 | exit; 41 | -------------------------------------------------------------------------------- /PRYMETIME/racon_merge.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import print_function 4 | import sys 5 | 6 | def eprint(*args, **kwargs): 7 | print(*args, file=sys.stderr, **kwargs) 8 | 9 | def parse_file(file_name, read_set): 10 | line_id = 0 11 | name = '' 12 | data = '' 13 | qual = '' 14 | valid = False 15 | with (open(file_name)) as f: 16 | for line in f: 17 | if (line_id == 0): 18 | if (valid): 19 | if (len(name) == 0 or len(data) == 0 or len(data) != len(qual)): 20 | eprint('File is not in FASTQ format') 21 | sys.exit(1) 22 | valid = False 23 | if (name in read_set): 24 | print(name + '2') 25 | else: 26 | read_set.add(name) 27 | print(name + '1') 28 | print(data) 29 | print('+') 30 | print(qual) 31 | name = line.rstrip().split(' ')[0] 32 | data = '' 33 | qual = '' 34 | line_id = 1 35 | elif (line_id == 1): 36 | if (line[0] == '+'): 37 | line_id = 2 38 | else: 39 | data += line.rstrip() 40 | elif (line_id == 2): 41 | qual += line.rstrip() 42 | if (len(qual) >= len(data)): 43 | valid = True 44 | line_id = 0 45 | 46 | if (valid): 47 | if (len(name) == 0 or len(data) == 0 or len(data) != len(qual)): 48 | eprint(len(name), len(data), len(qual)) 49 | eprint('File is not in FASTQ format') 50 | sys.exit(1) 51 | if (name in read_set): 52 | print(name + '2') 53 | else: 54 | read_set.add(name) 55 | print(name + '1') 56 | print(data) 57 | print('+') 58 | print(qual) 59 | 60 | if __name__ == '__main__': 61 | 62 | read_set = set() 63 | 64 | if (len(sys.argv) > 1): 65 | parse_file(sys.argv[1], read_set) 66 | if (len(sys.argv) > 2): 67 | parse_file(sys.argv[2], read_set) 68 | -------------------------------------------------------------------------------- /PRYMETIME/nucmer4.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Title: Sending Contigs to Nucmer 4 | Created on Tue Aug 13 2019 5 | 6 | @author: Eric 7 | @email: ericyoung7@gmail.com 8 | """ 9 | import glob, os 10 | import pandas as pd 11 | from Bio import SeqIO 12 | from pymummer import nucmer 13 | from pathlib import Path 14 | 15 | path_to_file = "pilon.fasta" 16 | path = Path(path_to_file) 17 | 18 | short_contigs = [] 19 | contigs = [] 20 | 21 | if path.is_file(): 22 | 23 | for x in SeqIO.parse(open("pilon.fasta"),'fasta'): 24 | 25 | if len(x.seq) < 50000: 26 | short_contigs.append(x) 27 | SeqIO.write(x, "%(x)s.fasta" % {'x':x.id}, 'fasta') 28 | 29 | else: 30 | contigs.append(x) 31 | #print("long", x.id) 32 | 33 | for pathname in glob.glob("*.fasta"): 34 | basename = os.path.basename(pathname) 35 | 36 | for x in short_contigs: 37 | 38 | if x.id in basename : 39 | runner = nucmer.Runner(basename, basename, "%(x)s_out.coords" % {'x':x.id}, 40 | maxmatch=True, simplify=False, mincluster=2000, min_id=99, min_length=2000, coords_header=True) 41 | 42 | runner.run() 43 | 44 | # The below lines are for saving fasta files of the contigs if desired 45 | #SeqIO.write(short_contigs , "short_contigs.fasta", "fasta") 46 | #SeqIO.write(lin_contigs , "lin_contigs.fasta", "fasta") 47 | 48 | # The below lines are for visually checking which files are repetitive or not 49 | ''' 50 | for pathname in glob.glob("*.coords"): 51 | 52 | basename = os.path.basename(pathname) 53 | name = basename.split(".") 54 | 55 | df = pd.read_csv(basename) 56 | 57 | print(df) 58 | 59 | if len(df.index) > 1 : 60 | 61 | print(name[0], "morethan 1") 62 | ''' 63 | 64 | cir_path = "cir_contigs.fasta" 65 | path_cir = Path(cir_path) 66 | 67 | if path_cir.is_file(): 68 | 69 | cir_rep_contigs = [x for x in SeqIO.parse(open("cir_contigs.fasta"), 'fasta')] 70 | 71 | for x in short_contigs: 72 | if len(pd.read_csv("%(x)s_out.coords" % {'x': x.id}).index) > 4 : 73 | cir_rep_contigs.append(x) 74 | else: 75 | #print(x.id) 76 | contigs.append(x) 77 | 78 | SeqIO.write(cir_rep_contigs, "cir_rep_contigs.fasta", "fasta") 79 | 80 | SeqIO.write(contigs, "polished_contigs.fasta", "fasta") 81 | -------------------------------------------------------------------------------- /PRYMETIME/nucmer_long.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Title: Sending Contigs to Nucmer 4 | Created on Tue Aug 13 2019 5 | 6 | @author: Eric 7 | @email: ericyoung7@gmail.com 8 | """ 9 | import glob, os 10 | import pandas as pd 11 | from Bio import SeqIO 12 | from pymummer import nucmer 13 | from pathlib import Path 14 | 15 | path_to_file = "lin_contigs.fasta" 16 | path = Path(path_to_file) 17 | 18 | short_contigs = [] 19 | contigs = [] 20 | 21 | if path.is_file(): 22 | 23 | for x in SeqIO.parse(open("lin_contigs.fasta"),'fasta'): 24 | 25 | if len(x.seq) < 50000: 26 | short_contigs.append(x) 27 | SeqIO.write(x, "%(x)s.fasta" % {'x':x.id}, 'fasta') 28 | 29 | else: 30 | contigs.append(x) 31 | #print("long", x.id) 32 | 33 | for pathname in glob.glob("*.fasta"): 34 | basename = os.path.basename(pathname) 35 | 36 | for x in short_contigs: 37 | 38 | if x.id in basename : 39 | runner = nucmer.Runner(basename, basename, "%(x)s_out.coords" % {'x':x.id}, 40 | maxmatch=True, simplify=False, mincluster=2000, min_id=99, min_length=2000, coords_header=True) 41 | 42 | runner.run() 43 | 44 | # The below lines are for saving fasta files of the contigs if desired 45 | #SeqIO.write(short_contigs , "short_contigs.fasta", "fasta") 46 | #SeqIO.write(lin_contigs , "lin_contigs.fasta", "fasta") 47 | 48 | # The below lines are for visually checking which files are repetitive or not 49 | ''' 50 | for pathname in glob.glob("*.coords"): 51 | 52 | basename = os.path.basename(pathname) 53 | name = basename.split(".") 54 | 55 | df = pd.read_csv(basename) 56 | 57 | print(df) 58 | 59 | if len(df.index) > 1 : 60 | 61 | print(name[0], "morethan 1") 62 | ''' 63 | 64 | cir_path = "cir_contigs.fasta" 65 | path_cir = Path(cir_path) 66 | 67 | if path_cir.is_file(): 68 | 69 | cir_rep_contigs = [x for x in SeqIO.parse(open("cir_contigs.fasta"), 'fasta')] 70 | 71 | for x in short_contigs: 72 | if len(pd.read_csv("%(x)s_out.coords" % {'x': x.id}).index) > 4 : 73 | cir_rep_contigs.append(x) 74 | else: 75 | #print(x.id) 76 | contigs.append(x) 77 | 78 | SeqIO.write(cir_rep_contigs, "cir_rep_contigs.fasta", "fasta") 79 | 80 | SeqIO.write(contigs, "polished_contigs.fasta", "fasta") 81 | -------------------------------------------------------------------------------- /PRYMETIME/filter_contigs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | from Bio import SeqIO 5 | 6 | def main(): 7 | # ensure we can write outputs and that all inputs are available 8 | with open(sys.argv[1], 'w') as f: 9 | pass 10 | with open(sys.argv[2], 'w') as f: 11 | pass 12 | THRESHOLD = int(sys.argv[3]) 13 | 14 | # compute coverage using data from stdin 15 | contig_reads = {} 16 | for line in sys.stdin: 17 | contig_name, _, _, read_count, _, _ = line.strip().split('\t') 18 | reads = contig_reads.get(contig_name, 0) 19 | contig_reads[contig_name] = reads + int(read_count) 20 | 21 | # accept contigs if they have average read count >= threshold 22 | # reject otherwise 23 | with open(sys.argv[1], 'w') as accept: 24 | with open(sys.argv[2], 'w') as reject: 25 | with open('assembly.fasta') as f: 26 | accept_batch = [] 27 | reject_batch = [] 28 | 29 | for record in SeqIO.parse(f, 'fasta'): 30 | contig_name = record.id 31 | reads = contig_reads.get(contig_name, 0) 32 | 33 | if reads == 0: 34 | # no reads were mapped for this contig, 35 | # no need to count seq length 36 | average_reads = 0 37 | else: 38 | average_reads = reads / len(record.seq) 39 | 40 | if average_reads >= THRESHOLD: 41 | print(f'Accepted {contig_name} because it has an average of {average_reads} reads') 42 | accept_batch.append(record) 43 | if len(accept_batch) >= 100: 44 | SeqIO.write(accept_batch, accept, 'fasta') 45 | accept_batch = [] 46 | else: 47 | print(f'Rejected {contig_name} because it has an average of {average_reads} reads') 48 | reject_batch.append(record) 49 | if len(reject_batch) >= 100: 50 | SeqIO.write(reject_batch, reject, 'fasta') 51 | reject_batch = [] 52 | if accept_batch: 53 | SeqIO.write(accept_batch, accept, 'fasta') 54 | if reject_batch: 55 | SeqIO.write(reject_batch, reject, 'fasta') 56 | 57 | if __name__ == '__main__': 58 | main() 59 | -------------------------------------------------------------------------------- /PRYMETIME/unicycler_long.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -N 1 3 | #SBATCH -n 16 4 | #SBATCH -p long 5 | #SBATCH --mem-per-cpu=10000MB 6 | #SBATCH -t 7-00:00:00 7 | #SBATCH -o unicycler_%j.out 8 | #SBATCH -e unicycler_%j.err 9 | #SBATCH -J unicycler 10 | 11 | #fail if there's a typo in variable names 12 | set -u 13 | #fail if any command fails 14 | set -e 15 | 16 | PREFIX=$(basename "$2") 17 | cd "$2" 18 | 19 | if [[ -e "./cir_rep_contigs.fasta" ]]; then 20 | 21 | echo "cir_rep_contigs.fasta exists" 22 | 23 | if [[ -s "./cir_rep_contigs.fasta" ]]; then 24 | 25 | echo "circular repetitve contigs found, performing Unicycler" 26 | 27 | cd unicycler 28 | 29 | unicycler --threads ${N_THREADS} -l "$1" -o "$PREFIX"_unicycler 30 | 31 | cat *_unicycler/assembly.fasta > ../unicycler_contigs.fasta 32 | 33 | cd ../ 34 | 35 | cat unicycler_contigs.fasta polished_contigs.fasta > "$PREFIX"_comb.fasta 36 | 37 | # seqkit has threads on by default 38 | seqkit seq "$PREFIX"_comb.fasta -m 1000 > "$PREFIX"_comb_filtered.fasta 39 | seqkit rename "$PREFIX"_comb_filtered.fasta | seqkit sort --by-length --reverse \ 40 | | awk '/^>/ {if (/circular=true/) \ 41 | {printf(">scaffold_%d_circ\n", ++i)} \ 42 | else {printf(">scaffold_%d\n", ++i)} next} \ 43 | { print }' "$PREFIX"_comb_filtered.fasta > "$PREFIX"_final.fasta 44 | else 45 | 46 | echo "WARNING: no circular contigs, treat only linear" 47 | seqkit seq polished_contigs.fasta -m 1000 > polished_contigs_filtered.fasta 48 | seqkit rename polished_contigs_filtered.fasta | seqkit sort --by-length --reverse \ 49 | | seqkit replace -p '.+' -r 'scaffold_{nr}' > "$PREFIX"_final.fasta 50 | 51 | fi 52 | 53 | else 54 | cd "$2" 55 | mkdir -p unicycler 56 | 57 | cd unicycler 58 | 59 | echo "WARNING: only linear contigs found, performing Unicycler only" 60 | 61 | unicycler --threads ${N_THREADS} -l "$1" -o "$PREFIX"_unicycler 62 | 63 | cat *_unicycler/assembly.fasta > ../unicycler_contigs.fasta 64 | 65 | cd ../ 66 | 67 | # seqkit has threads on by default 68 | seqkit seq unicycler_contigs.fasta -m 1000 > unicycler_contigs_filtered.fasta 69 | seqkit rename unicycler_contigs_filtered.fasta | seqkit sort --by-length --reverse \ 70 | | awk '/^>/ {if (/circular=true/) \ 71 | {printf(">scaffold_%d_circ\n", ++i)} \ 72 | else {printf(">scaffold_%d\n", ++i)} next} \ 73 | { print }' unicycler_contigs_filtered.fasta > "$PREFIX"_final.fasta 74 | 75 | fi 76 | -------------------------------------------------------------------------------- /pilon: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Wrapper script for Java Conda packages that ensures that the java runtime 4 | # is invoked with the right options. Adapted from the bash script (http://stackoverflow.com/questions/59895/can-a-bash-script-tell-what-directory-its-stored-in/246128#246128). 5 | # 6 | 7 | # 8 | # Program Parameters 9 | # 10 | import os 11 | import sys 12 | import subprocess 13 | from os import access, getenv, X_OK 14 | jar_file = 'pilon-1.23.jar' 15 | 16 | default_jvm_mem_opts = ['-Xms512m', '-Xmx16g'] 17 | 18 | # !!! End of parameter section. No user-serviceable code below this line !!! 19 | 20 | def real_dirname(path): 21 | """Return the symlink-resolved, canonicalized directory-portion of path.""" 22 | return os.path.dirname(os.path.realpath(path)) 23 | 24 | 25 | def java_executable(): 26 | """Return the executable name of the Java interpreter.""" 27 | java_home = getenv('JAVA_HOME') 28 | java_bin = os.path.join('bin', 'java') 29 | 30 | if java_home and access(os.path.join(java_home, java_bin), X_OK): 31 | return os.path.join(java_home, java_bin) 32 | else: 33 | return 'java' 34 | 35 | 36 | def jvm_opts(argv): 37 | """Construct list of Java arguments based on our argument list. 38 | 39 | The argument list passed in argv must not include the script name. 40 | The return value is a 3-tuple lists of strings of the form: 41 | (memory_options, prop_options, passthrough_options) 42 | """ 43 | mem_opts = [] 44 | prop_opts = [] 45 | pass_args = [] 46 | 47 | for arg in argv: 48 | if arg.startswith('-D'): 49 | prop_opts.append(arg) 50 | elif arg.startswith('-XX'): 51 | prop_opts.append(arg) 52 | elif arg.startswith('-Xm'): 53 | mem_opts.append(arg) 54 | else: 55 | pass_args.append(arg) 56 | 57 | # In the original shell script the test coded below read: 58 | # if [ "$jvm_mem_opts" == "" ] && [ -z ${_JAVA_OPTIONS+x} ] 59 | # To reproduce the behaviour of the above shell code fragment 60 | # it is important to explictly check for equality with None 61 | # in the second condition, so a null envar value counts as True! 62 | 63 | if mem_opts == [] and getenv('_JAVA_OPTIONS') == None: 64 | mem_opts = default_jvm_mem_opts 65 | 66 | return (mem_opts, prop_opts, pass_args) 67 | 68 | 69 | def main(): 70 | java = java_executable() 71 | jar_dir = real_dirname(sys.argv[0]) 72 | (mem_opts, prop_opts, pass_args) = jvm_opts(sys.argv[1:]) 73 | 74 | if pass_args != [] and pass_args[0].startswith('eu'): 75 | jar_arg = '-cp' 76 | else: 77 | jar_arg = '-jar' 78 | 79 | jar_path = os.path.join(jar_dir, jar_file) 80 | 81 | java_args = [java]+ mem_opts + prop_opts + [jar_arg] + [jar_path] + pass_args 82 | 83 | if '--jar_dir' in sys.argv[1:]: 84 | print(jar_path) 85 | else: 86 | sys.exit(subprocess.call(java_args)) 87 | 88 | 89 | if __name__ == '__main__': 90 | main() 91 | -------------------------------------------------------------------------------- /PRYMETIME/unicycler.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -N 1 3 | #SBATCH -n 16 4 | #SBATCH -p long 5 | #SBATCH --mem-per-cpu=10000MB 6 | #SBATCH -t 7-00:00:00 7 | #SBATCH -o unicycler_%j.out 8 | #SBATCH -e unicycler_%j.err 9 | #SBATCH -J unicycler 10 | 11 | #fail if there's a typo in variable names 12 | set -u 13 | #fail if any command fails 14 | set -e 15 | 16 | PREFIX=$(basename "$4") 17 | cd "$4" 18 | 19 | if [[ -e "./cir_rep_contigs.fasta" ]]; then 20 | 21 | echo "cir_rep_contigs.fasta exists" 22 | 23 | if [[ -s "./cir_rep_contigs.fasta" ]]; then 24 | 25 | echo "circular repetitve contigs found, performing Unicycler" 26 | 27 | cd unicycler 28 | 29 | for f in *.fasta; do 30 | minimap2 -t ${N_THREADS} -ax map-ont "$f" "$1" | samtools fastq --threads ${N_THREADS} -n -f 4 - > "$f"_nano_map.fastq 31 | 32 | bowtie2-build --threads ${N_THREADS} "$f" "${f}-idx" 33 | bowtie2 -x "${f}-idx" --threads ${N_THREADS} --no-unal -1 "$2" -2 "$3" | samtools fastq --threads ${N_THREADS} -n -f 2 -1 "$f"_ill_map_1.fastq -2 "$f"_ill_map_2.fastq - 34 | rm -f "${f}-idx"*.bt2 35 | 36 | if [[ -s "${f}"_ill_map_1.fastq ]]; then 37 | 38 | unicycler --threads ${N_THREADS} -1 "$f"_ill_map_1.fastq -2 "$f"_ill_map_2.fastq -l "$f"_nano_map.fastq -o "$f"_unicycler 39 | 40 | fi 41 | 42 | done 43 | 44 | cat *_unicycler/assembly.fasta > ../unicycler_contigs.fasta 45 | 46 | cd ../ 47 | 48 | cat unicycler_contigs.fasta polished_contigs.fasta > "$PREFIX"_comb.fasta 49 | 50 | # seqkit has threads on by default 51 | seqkit seq "$PREFIX"_comb.fasta -m 5000 > "$PREFIX"_comb_filtered.fasta 52 | seqkit rename "$PREFIX"_comb_filtered.fasta | seqkit sort --by-length --reverse \ 53 | | awk '/^>/ {if (/circular=true/) \ 54 | {printf(">scaffold_%d_circ\n", ++i)} \ 55 | else {printf(">scaffold_%d\n", ++i)} next} \ 56 | { print }' "$PREFIX"_comb_filtered.fasta > "$PREFIX"_final.fasta 57 | else 58 | 59 | echo "WARNING: no circular contigs, treat only linear" 60 | seqkit seq polished_contigs.fasta -m 5000 > polished_contigs_filtered.fasta 61 | seqkit rename polished_contigs_filtered.fasta | seqkit sort --by-length --reverse | seqkit replace -p '.+' -r 'scaffold_{nr}' > "$PREFIX"_final.fasta 62 | 63 | fi 64 | 65 | else 66 | cd "$4" 67 | mkdir -p unicycler 68 | 69 | cd unicycler 70 | 71 | echo "WARNING: only linear contigs found, performing Unicycler only" 72 | 73 | unicycler --threads ${N_THREADS} -1 "$2" -2 "$3" -l "$1" -o "$PREFIX"_unicycler 74 | 75 | cat *_unicycler/assembly.fasta > ../unicycler_contigs.fasta 76 | 77 | cd ../ 78 | 79 | # seqkit has threads on by default 80 | seqkit seq unicycler_contigs.fasta -m 5000 > unicycler_contigs_filtered.fasta 81 | seqkit rename unicycler_contigs_filtered.fasta | seqkit sort --by-length --reverse \ 82 | | awk '/^>/ {if (/circular=true/) \ 83 | {printf(">scaffold_%d_circ\n", ++i)} \ 84 | else {printf(">scaffold_%d\n", ++i)} next} \ 85 | { print }' unicycler_contigs_filtered.fasta > "$PREFIX"_final.fasta 86 | 87 | fi 88 | -------------------------------------------------------------------------------- /PRYMETIME/CEN.fasta: -------------------------------------------------------------------------------- 1 | >CEN10_S288C CEN10 SGD:S000006471 centromere Chromosome X centromere 2 | ATCACGTGTTAAATAATTAATTTACTTTAAAATTTATTTTTTAATATAAAATATTTATTC 3 | TTTTTATTTAAAAATAAAAAACACAAAAAAACAATGTTTATGATTTCCGAACCTAAATA 4 | >CEN8_S288C CEN8 SGD:S000006469 centromere Chromosome VIII centromere 5 | ATCACATGACTAATAATTCTTTTAATTTTAATTAATTTAATAAAATTAAAATAATATATA 6 | TACTAAATTGTTTATTAAAAATGATTAAACATTGGGTTTTGTGTTCCGAACTTAGAAA 7 | >CEN1_S288C CEN1 SGD:S000006463 centromere Chromosome I centromere 8 | GTCACATGACATAATAATAAATAATTTTAAAAATATAAAATATTTTTAATAGTTTTTAAA 9 | TATTTTACAGTTTATTTTTTAAATTTATTTATATGTTTTTGTTTTCCGAAGCAGTCAA 10 | >CEN16_S288C CEN16 SGD:S000006477 centromere Chromosome XVI centromere 11 | ATCACATGATATATTTTTTATTTTTAATTTTTTTTAATTATAAAAATAATTTTTTTCTTT 12 | AAATTAAACAAAAATAAAAAATTGTTTTTTGTTGGTTAAGATTTCCGAAAATAGAAA 13 | >CEN6_S288C CEN6 SGD:S000001896 centromere Chromosome VI centromere 14 | ATCACGTGCTATAAAAATAATTATAATTTAAATTTTTTAATATAAATATATAAATTAAAA 15 | ATAGAAAGTAAAAAAAGAAATTAAAGAAAAAATAGTTTTTGTTTTCCGAAGATGTAAA 16 | >CEN5_S288C CEN5 SGD:S000006467 centromere Chromosome V centromere 17 | ATCACGTGCTTTTTAAAAAATATAAATTTAATTTCATTTTCTATTTCAATATTTATTAAA 18 | TAAAAAATTTGAAAAATATATAAAAATTGTAGCAGTATTAGATTTCCGAAAAGAAAAA 19 | >CEN2_S288C CEN2 SGD:S000006464 centromere Chromosome II centromere 20 | ATCATGTGACTTATTTATTTAATTATTATTAAGTAAAAAAGATTTTCTATTTAAATTTAT 21 | TAATTAATTTTTTTTCTTAAATAATTATTTTATGTTTTTGTTTTCCGAAAAAGAAAA 22 | >CEN11_S288C CEN11 SGD:S000006472 centromere Chromosome XI centromere 23 | GTCACATGATAAAAACATATTTAAAATTTTAAAAAAATTAATTTTCAAAATAAATTTATT 24 | ATATTTTTTTAATTACATAATCATAAAAATAAATGTTCATGATTTCCGAACGTATAAA 25 | >CEN12_S288C CEN12 SGD:S000006473 centromere Chromosome XII centromere 26 | ATCACGTGTAATAAATATTATTAAAAAGTTTATTAAAATAAAATAATAATTTAAATTACT 27 | ATTTTTAAATAAGTTTTATTTTTTAATAACACTATTGTATTTGTTATCCGAACAATAAAA 28 | >CEN13_S288C CEN13 SGD:S000006474 centromere Chromosome XIII centromere 29 | ATCACATGACTACCTAACAAAATATTTATTTTTCTTTTTTAATATTTGAAAATACTAAAA 30 | TATTTTTGTTGTTTTTTGAAAAAAGGATTTTTAATGTGTATGCGTTCCGAACTTTAAAT 31 | >CEN7_S288C CEN7 SGD:S000006468 centromere Chromosome VII centromere 32 | ATCACGTGTTATATTTACTATATAAAAATTCAATAAATAAAAAGTTAGAAGATAAAAATT 33 | ATATTATACATATTTTTATTTTTATTATAATTTTTGTTTTTGCCTTCCGAAAAGAAAAT 34 | >CEN9_S288C CEN9 SGD:S000006470 centromere Chromosome IX centromere 35 | TTCACGTGAAAATTTTTATATTTTTAATTAAATTTTTATAATATTATAAATTATTATAAT 36 | ATTGATATTTAAAATTAAAAACAAATTATTAATGGTTTTGTTTTCCGAAATGTTTTT 37 | >CEN14_S288C CEN14 SGD:S000006475 centromere Chromosome XIV centromere 38 | GTCACGTGCAGCTTTTTAAAAATATTTTAAAACATTTTAAAAAATATACATTTTTTTATT 39 | ATTTTTTTATATATTAATGTTAAAATTTATTTATGTATTTGTCTTCCGAAAAGTAAAA 40 | >CEN4_S288C CEN4 SGD:S000006466 centromere Chromosome IV centromere 41 | GTCACATGCTTATAATCAACTTTTTTAAAAATTTAAAATACTTTTTTATTTTTTATTTTT 42 | AAACATAAATGAAATAATTTATTTATTGTTTATGATTACCGAAACATAAAA 43 | >CEN3_S288C CEN3 SGD:S000006465 centromere Chromosome III centromere 44 | GTCACATGATGATATTTGATTTTATTATATTTTTAAAAAAAGTAAAAAATAAAAAGTAGT 45 | TTATTTTTAAAAAATAAAATTTAAAATATTAGTGTATTTGATTTCCGAAAGTTAAAA 46 | >CEN15_S288C CEN15 SGD:S000006476 centromere Chromosome XV centromere 47 | ATCACGTGAACTTATTTTGCATTTAAAAAAAAGTAAAAACTATTTGCTAAAATATATTTT 48 | TTTAATTTTTAAAAATAATGTTTTAATTATTTAATGTATATGACTTCCGAAAAATATAT 49 | -------------------------------------------------------------------------------- /PRYMETIME/eng_sig_genome_3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -N 1 3 | #SBATCH -n 16 4 | #SBATCH -J eng_sig_blast 5 | 6 | EXECDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd)" 7 | 8 | cp "${EXECDIR}/blastn_parse.pl" "$1" 9 | cp "${EXECDIR}/TELO_L.fasta" "$1" 10 | cp "${EXECDIR}/TELO_R.fasta" "$1" 11 | cp "${EXECDIR}/CEN.fasta" "$1" 12 | cp "${EXECDIR}/MITO.fasta" "$1" 13 | 14 | PREFIX=$(basename "$1") 15 | cd "$1" 16 | 17 | makeblastdb -in "$PREFIX"_final.fasta -dbtype nucl 18 | 19 | #eng_sig blast 20 | blastn -task blastn -query "$2" -db "$PREFIX"_final.fasta -perc_identity 98 -qcov_hsp_perc 98 -out "$PREFIX"_blastn_eng_sig.txt 21 | 22 | perl blastn_parse.pl "$PREFIX"_blastn_eng_sig.txt > "$PREFIX"_blastn_eng_sig.parsed.txt 23 | 24 | bp_search2gff --input "$PREFIX"_blastn_eng_sig.txt --addid --version 3 --type hit -o "$PREFIX"_blastn_eng_sig.gff -f blast --method eng_sig 25 | 26 | #telomere L blast 27 | blastn -task blastn -query TELO_L.fasta -db "$PREFIX"_final.fasta -out "$PREFIX"_blastn_telo_L.txt -max_target_seqs 1 -max_hsps 1 28 | 29 | perl blastn_parse.pl "$PREFIX"_blastn_telo_L.txt > "$PREFIX"_blastn_telo_L.parsed.txt 30 | 31 | bp_search2gff --input "$PREFIX"_blastn_telo_L.txt --addid --version 3 --type hit -o "$PREFIX"_blastn_telo_L.gff -f blast --method telomere 32 | 33 | #telomere R blast 34 | blastn -task blastn -query TELO_R.fasta -db "$PREFIX"_final.fasta -out "$PREFIX"_blastn_telo_R.txt -max_target_seqs 1 -max_hsps 1 35 | 36 | perl blastn_parse.pl "$PREFIX"_blastn_telo_R.txt > "$PREFIX"_blastn_telo_R.parsed.txt 37 | 38 | bp_search2gff --input "$PREFIX"_blastn_telo_R.txt --addid --version 3 --type hit -o "$PREFIX"_blastn_telo_R.gff -f blast --method telomere 39 | 40 | #centromere blast 41 | blastn -task blastn -query CEN.fasta -db "$PREFIX"_final.fasta -out "$PREFIX"_blastn_cent.txt -max_target_seqs 1 -max_hsps 1 42 | 43 | perl blastn_parse.pl "$PREFIX"_blastn_cent.txt > "$PREFIX"_blastn_cent.parsed.txt 44 | 45 | bp_search2gff --input "$PREFIX"_blastn_cent.txt --addid --version 3 --type hit -o "$PREFIX"_blastn_cent.gff -f blast --method centromere 46 | 47 | #mitochondrion blast 48 | blastn -task blastn -query MITO.fasta -db "$PREFIX"_final.fasta -out "$PREFIX"_blastn_mito.txt -max_target_seqs 1 -max_hsps 1 49 | 50 | perl blastn_parse.pl "$PREFIX"_blastn_mito.txt > "$PREFIX"_blastn_mito.parsed.txt 51 | 52 | bp_search2gff --input "$PREFIX"_blastn_mito.txt --addid --version 3 --type hit -o "$PREFIX"_blastn_mito.gff -f blast --method mito 53 | 54 | #genome_bed 55 | samtools faidx "$PREFIX"_final.fasta 56 | 57 | awk 'BEGIN {FS="\t"}; {print $1 FS "1" FS $2}' "$PREFIX"_final.fasta.fai > "$PREFIX"_final.bed 58 | 59 | #alitv 60 | 61 | #eng_sig alitv 62 | gff2bed < "$PREFIX"_blastn_eng_sig.gff > "$PREFIX"_blastn_eng_sig.bed 63 | 64 | awk -F'\t' -v OFS="\t" '{ print $1, $2, $3, -1, $4 }' "$PREFIX"_blastn_eng_sig.bed > eng_sig.tsv 65 | 66 | #telomere L alitv 67 | gff2bed < "$PREFIX"_blastn_telo_L.gff > "$PREFIX"_blastn_telo_L.bed 68 | 69 | awk -F'\t' -v OFS="\t" '{ print $1, $2, $3, -1, $4 }' "$PREFIX"_blastn_telo_L.bed > telomere_L.tsv 70 | 71 | #telomere R alitv 72 | gff2bed < "$PREFIX"_blastn_telo_R.gff > "$PREFIX"_blastn_telo_R.bed 73 | 74 | awk -F'\t' -v OFS="\t" '{ print $1, $2, $3, -1, $4 }' "$PREFIX"_blastn_telo_R.bed > telomere_R.tsv 75 | 76 | cat telomere_L.tsv telomere_R.tsv > telomere.tsv 77 | 78 | #centromere alitv 79 | gff2bed < "$PREFIX"_blastn_cent.gff > "$PREFIX"_blastn_cent.bed 80 | 81 | awk -F'\t' -v OFS="\t" '{ print $1, $2, $3, -1, $4 }' "$PREFIX"_blastn_cent.bed > centromere.tsv 82 | 83 | #mitochondrion alitv 84 | gff2bed < "$PREFIX"_blastn_mito.gff > "$PREFIX"_blastn_mito.bed 85 | 86 | awk -F'\t' -v OFS="\t" '{ print $1, $2, $3, -1, $4 }' "$PREFIX"_blastn_mito.bed > mitochondrion.tsv 87 | 88 | #chromomap 89 | 90 | #eng_sig chromomap 91 | gff2bed < "$PREFIX"_blastn_eng_sig.gff > "$PREFIX"_blastn_eng_sig.bed 92 | 93 | awk -F'\t' -v OFS="\t" '{ print $4, $1, $2, $3, $8 }' "$PREFIX"_blastn_eng_sig.bed > "$PREFIX"_blastn_eng_sig_cmap.txt 94 | 95 | #telomere L chromomap 96 | gff2bed < "$PREFIX"_blastn_telo_L.gff > "$PREFIX"_blastn_telo_L.bed 97 | 98 | awk -F'\t' -v OFS="\t" '{ print $4, $1, $2, $3, $8 }' "$PREFIX"_blastn_telo_L.bed > "$PREFIX"_blastn_telo_L_cmap.txt 99 | 100 | #telomere R chromomap 101 | gff2bed < "$PREFIX"_blastn_telo_R.gff > "$PREFIX"_blastn_telo_R.bed 102 | 103 | awk -F'\t' -v OFS="\t" '{ print $4, $1, $2, $3, $8 }' "$PREFIX"_blastn_telo_R.bed > "$PREFIX"_blastn_telo_R_cmap.txt 104 | 105 | #centromere chromomap 106 | gff2bed < "$PREFIX"_blastn_cent.gff > "$PREFIX"_blastn_cent.bed 107 | 108 | awk -F'\t' -v OFS="\t" '{ print $4, $1, $2, $3, $8 }' "$PREFIX"_blastn_cent.bed > "$PREFIX"_blastn_cent_cmap.txt 109 | 110 | #mitochondrion chromomap 111 | gff2bed < "$PREFIX"_blastn_mito.gff > "$PREFIX"_blastn_mito.bed 112 | 113 | awk -F'\t' -v OFS="\t" '{ print $4, $1, $2, $3, $8 }' "$PREFIX"_blastn_mito.bed > "$PREFIX"_blastn_mito_cmap.txt 114 | 115 | #combine all chromomap 116 | cat "$PREFIX"_blastn_eng_sig_cmap.txt "$PREFIX"_blastn_telo_L_cmap.txt "$PREFIX"_blastn_telo_R_cmap.txt "$PREFIX"_blastn_cent_cmap.txt "$PREFIX"_blastn_mito_cmap.txt > "$PREFIX"_cmap.txt 117 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Prymetime 2 | 3 | Prymetime is a de novo genome assembly pipeline that uses long reads from Oxford Nanopore Technologies and PacBio and short reads from Illumina. It was designed to produce high-quality genome assemblies from engineered yeast and bacteria strains. Prymetime relies on the long read de novo assembler Flye for linear contigs and the hybrid assembler Unicycler for circular contigs. Prymetime now allows for long-read or short-read only. 4 | 5 | All software requirements for Prymetime have been packaged together into a Docker image. Docker is available freely here: https://hub.docker.com/search?offering=community&type=edition 6 | 7 | Although it is possible to run the Prymetime Docker image on a desktop computer, we strongly recommend running the pipeline on a server. The memory requirements of Flye and Unicycler at the recommended 40X genome coverage for nanopore and Illumina reads are likely not possible on a "normal" desktop computer. 8 | 9 | Additionally, this Docker image can be wrapped in a Singularity image to run on HPCs for ease of use. 10 | 11 | ## Build Singularity image 12 | 13 | Build Singularity image 14 | ```shell 15 | singularity build prymetime docker://sjtrauber/prymetime:v2 16 | ``` 17 | Run Prymetime assembly pipeline 18 | ```shell 19 | singularity run \ 20 | -B ~/path/to/input:/input \ 21 | -B ~/path/to/output:/output \ 22 | ~/prymetime \ 23 | -long ~/path/to/nanopore.fastq \ 24 | -illumina_1 ~/path/to/illumina_1.fastq \ 25 | -illumina_2 ~/path/to/illumina_2.fastq \ 26 | -outdir ~/path/to/output \ 27 | -preferred_assembly short \ # indicates bacterial assembly, remove for yeast 28 | -read_type \ # include if using only or reads or for inputting pre-assembled genome for eng_sig identification 29 | -eng_sig ~/path/to/bacterial_signatures.fna \ # optional 30 | -ref_genome ~/path/to/output/GCF_001456255.1.fna # optional 31 | ``` 32 | The final genome assembly will be the my_directory_final.fasta file. 33 | 34 | ## Build Docker image 35 | 36 | Download Docker image 37 | ```shell 38 | git clone https://github.com/emyounglab/prymetime.git 39 | ``` 40 | Build Docker image 41 | ```shell 42 | docker build --tag prymetimev2 prymetime 43 | ``` 44 | Install time is around one hour on a desktop computer. 45 | 46 | ## Run Docker image with data 47 | 48 | Mount a directory with the `-v` flag. The directory before the `:` 49 | must be an absolute path to a file or directory, and the directory 50 | after the `:` is where it will be mounted inside the container. 51 | 52 | Run Prymetime assembly pipeline 53 | ```shell 54 | docker run -it --rm \ 55 | -v /path/to/input_dir:/input \ 56 | -v /path/to/output_dir:/output \ 57 | prymetime \ 58 | -long /input/my_long.fastq \ 59 | -illumina_1 /input/my_illumina_1.fastq \ 60 | -illumina_2 /input/my_illumina_2.fastq \ 61 | -outdir /output/my_directory 62 | -preferred_assembly short \ # indicates bacterial assembly, remove for yeast 63 | -read_type \ # include if using only or reads or for inputting pre-assembled genome for eng_sig identification 64 | -eng_sig ~/path/to/bacterial_signatures.fna \ # optional 65 | -ref_genome ~/path/to/output/GCF_001456255.1.fna # optional 66 | ``` 67 | The final genome assembly will be the my_directory_final.fasta file. 68 | 69 | The -eng_sig option will also produce a PDF displaying engineering signatures that were found in the genome assembly, shown below: 70 | 71 | ![FEY_2 engineering signatures](https://github.com/emyounglab/prymetime/blob/master/docs/FEY_2_chromo_alitv.jpg) 72 | 73 | The eng_sig_felix.fasta file (provided in the PRYMETIME folder) contains all engineering signatures used in this study. 74 | 75 | ## Run Docker image interactively 76 | 77 | The entrypoint script can be overridden for debugging using the 78 | `--entrypoint` argument to docker run. Using `/bin/bash` as the 79 | entrypoint starts an interactive shell when the docker image is 80 | run. Here is an example: 81 | 82 | ```shell 83 | docker run -it --rm \ 84 | -v $(realpath ../data):/input \ 85 | -v $(realpath output):/output \ 86 | --entrypoint /bin/bash \ 87 | prymetime 88 | ``` 89 | 90 | The run time of Prymetime will depend highly on the computer or server used, and the size of the read libraries. On a desktop computer with a small 10X genome coverage read library, Prymetime took approximately 7 hours. 91 | 92 | ### Detailed Prymetime genome assembly pipeline 93 | 94 | ![Prymetime_pipeline](https://github.com/emyounglab/prymetime/blob/master/docs/PRYMETIME_pipeline_description_2.jpg) 95 | 96 | ### Supporting software 97 | Prymetime utilizes the following software packages: 98 | * [Flye](https://github.com/fenderglass/Flye) 99 | * [Medaka](https://github.com/nanoporetech/medaka) 100 | * [Racon](https://github.com/lbcb-sci/racon) 101 | * [Pilon](https://github.com/broadinstitute/pilon) 102 | * [Unicycler](https://github.com/rrwick/Unicycler) 103 | * [Minimap2](https://github.com/lh3/minimap2) 104 | * [BWA](https://github.com/lh3/bwa) 105 | * [Samtools](https://github.com/samtools/samtools) 106 | * [Fastq-pair](https://github.com/linsalrob/fastq-pair) 107 | * [Mummer](https://github.com/mummer4/mummer) 108 | * [chromoMap](https://github.com/cran/chromoMap) 109 | * [AliTV](https://github.com/AliTVTeam/AliTV) 110 | 111 | ### Prymetime genome assemblies 112 | * [prymetime_genomes](https://github.com/emyounglab/prymetime_genomes) 113 | 114 | ### Publication 115 | 116 | Prymetime publication is available: https://www.nature.com/articles/s41467-021-21656-9 117 | -------------------------------------------------------------------------------- /PRYMETIME/PRYMETIME.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | EXECDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd)" 5 | 6 | function usage { 7 | cat <] [-illumina_1 ] [-illumina_2 ]\ 9 | [-outdir ] [-eng_sig ] [-ref_genome ] [-preferred_assembly]\ 10 | [-read_type] 11 | 12 | Processes fastq long reads plus illumina files 13 | 14 | -help Print Help 15 | -verbose Be Verbose 16 | -long Specify long read fastq file 17 | -illumina_1 Paired-end read 1 18 | -illumina_2 Paired-end read 2 19 | -outdir Specify an output directory 20 | -eng_sig Fasta file with engineering signatures 21 | -ref_genome Reference genome fasta file for comparison 22 | -preferred_assembly Tags if organism should be assembled with short-read preference 23 | -read_type Tags if providing only short or long reads or assembly to visualize edits 24 | -long_read_type Tags if long reads are Nanopore or HiFi PacBio 25 | -v Verbose 26 | EOF 27 | } 28 | 29 | VERBOSE=no 30 | 31 | while [ $# -gt 0 ]; do 32 | case "$1" in 33 | -help) usage; exit 0;; 34 | -verbose) VERBOSE=yes;; 35 | -threads) shift;N_THREADS="$1";; 36 | -long) shift;IN_FASTQ_LONG="$1";; 37 | -illumina_1) shift;IN_FASTQ_ILLUMINA_1="$1";; 38 | -illumina_2) shift;IN_FASTQ_ILLUMINA_2="$1";; 39 | -outdir) shift;OUTDIR="$1";; 40 | -eng_sig) shift;ENG_SIG="$1";; 41 | -ref_genome) shift;REF_GENOME="$1";; 42 | -preferred_assembly) shift;SHORT="$1";; 43 | -read_type) shift;READ_TYPE="$1";; 44 | -long_read_type) shift;LONG_READ_TYPE="$1";; 45 | -) shift; break;; 46 | -*) 47 | usage; 48 | exit 1;; 49 | *) break;; # terminate while loop 50 | esac 51 | shift 52 | done 53 | 54 | 55 | if [[ -z "$IN_FASTQ_LONG" ]]; then 56 | echo "WARNING: Must specify a nanopore or hifi pacbio fastq file with -long " >&2 57 | fi 58 | if [[ -z "$IN_FASTQ_ILLUMINA_1" ]]; then 59 | echo "WARNING: Must specify a merged illumina fastq file with -illumina_1 " >&2 60 | fi 61 | if [[ -z "$IN_FASTQ_ILLUMINA_2" ]]; then 62 | echo "WARNING: Must specify a merged illumina fastq file with -illumina_2 " >&2 63 | fi 64 | if [[ ! -f "$IN_FASTQ_LONG" ]]; then 65 | echo "WARNING: Long read file '$IN_FASTQ_LONG' does not exist" >&2 66 | fi 67 | if [[ ! -f "$IN_FASTQ_ILLUMINA_1" ]]; then 68 | echo "WARNING: Illumina file '$IN_FASTQ_ILLUMINA_1' does not exist" >&2 69 | fi 70 | if [[ ! -f "$IN_FASTQ_ILLUMINA_2" ]]; then 71 | echo "WARNING: Illumina file '$IN_FASTQ_ILLUMINA_2' does not exist" >&2 72 | fi 73 | 74 | if [[ -z "$OUTDIR" ]]; then 75 | OUTDIR=/output 76 | echo "WARNING: -outdir not specified; outputting to $OUTDIR" >&2 77 | else 78 | OUTDIR="$OUTDIR" 79 | fi 80 | 81 | if [[ -z "$N_THREADS" ]]; then 82 | N_THREADS=8 83 | echo "WARNING: -threads not specified; using $N_THREADS" >&2 84 | fi 85 | export N_THREADS 86 | 87 | if [[ "$VERBOSE" = "yes" ]]; then 88 | echo "Submitting jobs" >&2 89 | set -v 90 | set -x 91 | fi 92 | 93 | #fail if there's a typo in variable names 94 | #set -u 95 | #fail if any command fails 96 | set -e 97 | 98 | ### ENG SIGS ONLY 99 | 100 | if [[ "$READ_TYPE" == "assembly" ]]; then 101 | 102 | # if ENG_SIG & REF_GENOME argument was provided, do some more work 103 | if [[ ! -z "$ENG_SIG" ]]; then 104 | 105 | echo "engineered signatures found" 106 | 107 | $EXECDIR/eng_sig_genome_3.sh "$OUTDIR" "$ENG_SIG" 108 | $EXECDIR/eng_sig_cmap.sh "$OUTDIR" 109 | 110 | else echo "no engineered signatures found" 111 | fi 112 | exit 2 113 | fi 114 | 115 | 116 | ### SHORT READS ONLY 117 | 118 | if [[ "$READ_TYPE" == "short" ]]; then 119 | echo "WARNING: only short reads detected, continuing without long reads" >&2 120 | 121 | $EXECDIR/unicycler_short.sh "$IN_FASTQ_ILLUMINA_1" \ 122 | "$IN_FASTQ_ILLUMINA_2" "$OUTDIR" 123 | 124 | # if ENG_SIG & REF_GENOME argument was provided, do some more work 125 | if [[ ! -z "$ENG_SIG" ]]; then 126 | 127 | echo "engineered signatures found" 128 | 129 | $EXECDIR/eng_sig_genome_3.sh "$OUTDIR" "$ENG_SIG" 130 | $EXECDIR/eng_sig_cmap.sh "$OUTDIR" 131 | 132 | else echo "no engineered signatures found" 133 | fi 134 | exit 2 135 | fi 136 | 137 | ### LONG READS ONLY 138 | 139 | if [[ "$READ_TYPE" == "long" ]]; then 140 | echo "WARNING: only long reads detected, continuing without short reads" >&2 141 | 142 | $EXECDIR/flye.sh "$IN_FASTQ_LONG" "$OUTDIR" 143 | 144 | $EXECDIR/sorter2.sh "$OUTDIR" 145 | 146 | if [[ -s "$OUTDIR/lin_contigs.fasta" ]]; then 147 | 148 | $EXECDIR/medaka.sh "$IN_FASTQ_LONG" "$OUTDIR/lin_contigs.fasta" "$OUTDIR/medaka" 149 | 150 | $EXECDIR/racon_long.sh "$OUTDIR/medaka/consensus.fasta" "$OUTDIR/racon.fasta" 151 | 152 | $EXECDIR/nucmer_long.sh "$OUTDIR" 153 | 154 | $EXECDIR/split.sh "$OUTDIR" "$OUTDIR/cir_rep_contigs.fasta" 155 | 156 | $EXECDIR/unicycler_long.sh "$IN_FASTQ_LONG" "$OUTDIR" 157 | 158 | # if ENG_SIG & REF_GENOME argument was provided, do some more work 159 | if [[ ! -z "$ENG_SIG" ]]; then 160 | 161 | echo "engineered signatures found" 162 | 163 | $EXECDIR/eng_sig_genome_3.sh "$OUTDIR" "$ENG_SIG" 164 | 165 | $EXECDIR/eng_sig_cmap.sh "$OUTDIR" 166 | 167 | else echo "no engineered signatures found" 168 | fi 169 | exit 2 170 | else 171 | #skip medaka, racon, and pilon if no linear contigs 172 | echo "WARNING: no linear contigs found; continuing with only circular contigs" >&2 173 | 174 | $EXECDIR/nucmer.sh "$OUTDIR" 175 | 176 | $EXECDIR/split.sh "$OUTDIR" "$OUTDIR/cir_rep_contigs.fasta" 177 | 178 | $EXECDIR/unicycler_long.sh "$IN_FASTQ_LONG" "$OUTDIR" 179 | 180 | # if ENG_SIG & REF_GENOME argument was provided, do some more work 181 | if [[ ! -z "$ENG_SIG" ]]; then 182 | 183 | echo "engineered signatures found" 184 | 185 | $EXECDIR/eng_sig_genome_3.sh "$OUTDIR" "$ENG_SIG" 186 | 187 | $EXECDIR/eng_sig_cmap.sh "$OUTDIR" 188 | 189 | else echo "no engineered signatures found" 190 | fi 191 | exit 2 192 | fi 193 | fi 194 | 195 | 196 | 197 | ### BOTH READS, EITHER PREFERENCE 198 | 199 | if [[ -z "$SHORT" ]]; then 200 | echo "Not tagged as short preferred, continuting to assemble with Flye" 201 | else 202 | echo "Tagged as short preferred assembly, skipping to Unicycler" 203 | 204 | $EXECDIR/unicycler.sh "$IN_FASTQ_LONG" "$IN_FASTQ_ILLUMINA_1" \ 205 | "$IN_FASTQ_ILLUMINA_2" "$OUTDIR" 206 | 207 | # if ENG_SIG & REF_GENOME argument was provided, do some more work 208 | if [[ ! -z "$ENG_SIG" ]]; then 209 | 210 | echo "engineered signatures found" 211 | 212 | $EXECDIR/eng_sig_genome_3.sh "$OUTDIR" "$ENG_SIG" 213 | 214 | $EXECDIR/eng_sig_cmap.sh "$OUTDIR" 215 | 216 | fi 217 | 218 | exit 2 219 | 220 | fi 221 | 222 | 223 | $EXECDIR/flye.sh "$IN_FASTQ_LONG" "$OUTDIR" 224 | 225 | $EXECDIR/sorter2.sh "$OUTDIR" 226 | 227 | if [[ -s "$OUTDIR/lin_contigs.fasta" ]]; then 228 | 229 | $EXECDIR/medaka.sh "$IN_FASTQ_LONG" "$OUTDIR/lin_contigs.fasta" "$OUTDIR/medaka" 230 | 231 | $EXECDIR/illumina_merge.sh "$IN_FASTQ_ILLUMINA_1" "$IN_FASTQ_ILLUMINA_2" "$OUTDIR/illumina_merge.fastq" 232 | 233 | $EXECDIR/bowtie2.sh "$OUTDIR/lin_contigs.fasta" "$OUTDIR/illumina_merge.fastq" "$OUTDIR/bowtie2.sam" "$OUTDIR" 234 | 235 | $EXECDIR/racon.sh "$OUTDIR/illumina_merge.fastq" "$OUTDIR/bowtie2.sam" \ 236 | "$OUTDIR/medaka/consensus.fasta" "$OUTDIR/racon.fasta" 237 | 238 | $EXECDIR/pilon.sh "$OUTDIR/racon.fasta" "$OUTDIR/illumina_merge.fastq" \ 239 | "$OUTDIR/pilon.bam" "$OUTDIR/pilon" "$OUTDIR" 240 | 241 | $EXECDIR/nucmer.sh "$OUTDIR" 242 | 243 | $EXECDIR/split.sh "$OUTDIR" "$OUTDIR/cir_rep_contigs.fasta" 244 | 245 | $EXECDIR/unicycler.sh "$IN_FASTQ_LONG" "$IN_FASTQ_ILLUMINA_1" \ 246 | "$IN_FASTQ_ILLUMINA_2" "$OUTDIR" 247 | 248 | # if ENG_SIG & REF_GENOME argument was provided, do some more work 249 | if [[ ! -z "$ENG_SIG" ]]; then 250 | 251 | echo "engineered signatures found" 252 | 253 | $EXECDIR/eng_sig_genome_3.sh "$OUTDIR" "$ENG_SIG" 254 | 255 | $EXECDIR/eng_sig_cmap.sh "$OUTDIR" 256 | 257 | else echo "no engineered signatures found" 258 | fi 259 | 260 | else 261 | #skip medaka, racon, and pilon if no linear contigs 262 | echo "WARNING: no linear contigs found; continuing with only circular contigs" >&2 263 | 264 | $EXECDIR/nucmer.sh "$OUTDIR" 265 | 266 | $EXECDIR/split.sh "$OUTDIR" "$OUTDIR/cir_rep_contigs.fasta" 267 | 268 | $EXECDIR/unicycler.sh "$IN_FASTQ_LONG" "$IN_FASTQ_ILLUMINA_1" \ 269 | "$IN_FASTQ_ILLUMINA_2" "$OUTDIR" 270 | 271 | # if ENG_SIG & REF_GENOME argument was provided, do some more work 272 | if [[ ! -z "$ENG_SIG" ]]; then 273 | 274 | echo "engineered signatures found" 275 | 276 | $EXECDIR/eng_sig_genome_3.sh "$OUTDIR" "$ENG_SIG" 277 | 278 | $EXECDIR/eng_sig_cmap.sh "$OUTDIR" 279 | 280 | else echo "no engineered signatures found" 281 | 282 | fi 283 | fi 284 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:focal as build 2 | 3 | #ENV CUDO_VISIBLE_DEVICES=-1 4 | 5 | # Set locale settings 6 | ENV LANGUAGE=en_US.en 7 | ENV LC_ALL=en_US.UTF-8 8 | ENV LANG=en_US.UTF-8 9 | RUN apt-get update && apt-get install -y locales && \ 10 | sed -i -e "s/# $LANG.*/$LANG UTF-8/" /etc/locale.gen && \ 11 | dpkg-reconfigure --frontend=noninteractive locales && \ 12 | update-locale LANG=$LANG 13 | 14 | # Generate locale settings 15 | RUN locale-gen en_US.UTF-8 16 | 17 | RUN apt-get -y update && \ 18 | DEBIAN_FRONTEND=noninteractive \ 19 | apt-get -y install \ 20 | autoconf \ 21 | automake \ 22 | cmake \ 23 | curl \ 24 | gcc \ 25 | git \ 26 | libbz2-dev \ 27 | libcurl4-gnutls-dev \ 28 | liblzma-dev \ 29 | libncurses5-dev \ 30 | libssl-dev \ 31 | libtool \ 32 | make \ 33 | wget \ 34 | apt-utils \ 35 | zip \ 36 | zlib1g-dev \ 37 | yasm \ 38 | build-essential \ 39 | python3-dev \ 40 | python3-pip \ 41 | zlib1g-dev \ 42 | libncursesw5-dev \ 43 | gfortran \ 44 | libreadline8 \ 45 | libreadline-dev \ 46 | libx11-dev \ 47 | libxt6 \ 48 | xorg-dev \ 49 | libpcre2-posix2 \ 50 | libpcre2-dev \ 51 | && \ 52 | apt-get clean 53 | 54 | # downgrade numpy for deprecated np.bool 55 | RUN pip3 install numpy==1.23.1 56 | 57 | # Install Flye 2.9 58 | RUN pip3 install git+https://github.com/fenderglass/Flye@2.9.2 59 | 60 | # Install Medaka (https://github.com/nanoporetech/medaka) 61 | 62 | # Medaka depends on bgzip, minimap2, samtools, bcftools, and tabix. Install 63 | # those dependencies. 64 | 65 | # Install minimap2 (https://github.com/lh3/minimap2) 66 | RUN curl -L https://github.com/lh3/minimap2/releases/download/v2.24/minimap2-2.24_x64-linux.tar.bz2 \ 67 | | tar -jxvf - -C /usr/local \ 68 | && ln -s /usr/local/minimap2-2.24_x64-linux/minimap2 /usr/local/bin 69 | 70 | # Install bowtie2 (http://bowtie-bio.sourceforge.net/bowtie2/index.shtml) 71 | RUN curl -L -o bowtie2-2.4.5-linux-x86_64.zip https://sourceforge.net/projects/bowtie-bio/files/bowtie2/2.4.5/bowtie2-2.4.5-linux-x86_64.zip/download \ 72 | && unzip bowtie2-2.4.5-linux-x86_64.zip -d /usr/local \ 73 | && ln -s /usr/local/bowtie2-2.4.5-linux-x86_64/bowtie2 /usr/local/bin \ 74 | && ln -s /usr/local/bowtie2-2.4.5-linux-x86_64/bowtie2-build /usr/local/bin 75 | 76 | # Install packages needed to build HTSlib and samtools 77 | RUN apt-get -y install \ 78 | autoconf \ 79 | automake \ 80 | make \ 81 | gcc \ 82 | zlib1g-dev \ 83 | libbz2-dev \ 84 | liblzma-dev \ 85 | libcurl4-gnutls-dev \ 86 | libssl-dev \ 87 | libncurses5-dev \ 88 | && \ 89 | apt-get clean 90 | 91 | # Install HTSlib for bgzip, tabix 92 | RUN wget https://github.com/samtools/htslib/releases/download/1.17/htslib-1.17.tar.bz2 \ 93 | && bunzip2 -c htslib-1.17.tar.bz2 | tar xf - \ 94 | && cd htslib-1.17 \ 95 | && ./configure \ 96 | && make \ 97 | && make install 98 | 99 | # Install samtools 100 | RUN wget https://github.com/samtools/samtools/releases/download/1.17/samtools-1.17.tar.bz2 \ 101 | && bunzip2 -c samtools-1.17.tar.bz2 | tar xf - \ 102 | && cd samtools-1.17 \ 103 | && ./configure \ 104 | && make \ 105 | && make install 106 | 107 | # Install bcftools 108 | RUN wget https://github.com/samtools/bcftools/releases/download/1.17/bcftools-1.17.tar.bz2 \ 109 | && bunzip2 -c bcftools-1.17.tar.bz2 | tar xf - \ 110 | && cd bcftools-1.17 \ 111 | && ./configure \ 112 | && make \ 113 | && make install 114 | 115 | # Install idna 116 | RUN pip3 install idna 117 | 118 | # Install Cython 119 | RUN pip3 install --upgrade cython 120 | 121 | # Install Setuptools 122 | RUN pip3 install setuptools-scm==6.4.2 123 | 124 | # Install protobuf 125 | RUN pip3 install protobuf==4.22.1 126 | 127 | # Install Medaka 128 | RUN pip3 install medaka==1.7.3 129 | 130 | # Install racon 131 | RUN git clone --recursive https://github.com/isovic/racon.git racon \ 132 | && cd racon \ 133 | && mkdir build \ 134 | && cd build \ 135 | && cmake -DCMAKE_BUILD_TYPE=Release .. \ 136 | && make \ 137 | && make install 138 | 139 | # Install Unicycler 0.4.8 140 | RUN pip3 install git+https://github.com/rrwick/Unicycler.git@v0.5.0 141 | 142 | # Install fastq_pair 143 | RUN curl -s -L https://github.com/linsalrob/fastq-pair/archive/v1.0.tar.gz | tar xzf - \ 144 | && cd fastq-pair-1.0 \ 145 | && mkdir build \ 146 | && cd build \ 147 | && cmake ../ \ 148 | && make \ 149 | && make install 150 | 151 | # Install SPAdes for unicycler 152 | RUN wget http://cab.spbu.ru/files/release3.15.5/SPAdes-3.15.5-Linux.tar.gz \ 153 | && tar -xzf SPAdes-3.15.5-Linux.tar.gz \ 154 | && cd SPAdes-3.15.5-Linux \ 155 | && cp bin/* /usr/local/bin/ \ 156 | && cp -r share/* /usr/local/share/ 157 | 158 | # Install seqkit v0.12.0 in /usr/local/bin 159 | RUN curl -s -L \ 160 | https://github.com/shenwei356/seqkit/releases/download/v0.12.0/seqkit_linux_amd64.tar.gz \ 161 | | tar -xzf - -C /usr/local/bin 162 | 163 | # Install Blast+ v2.10.0 in /usr/local 164 | RUN curl -s -L \ 165 | https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.13.0/ncbi-blast-2.13.0+-x64-linux.tar.gz \ 166 | | tar -xzf - -C /usr/local \ 167 | && ln -s /usr/local/ncbi-blast-2.13.0+/bin/* /usr/local/bin 168 | 169 | # R 170 | RUN wget -c https://cloud.r-project.org/src/base/R-4/R-4.2.3.tar.gz \ 171 | && tar -zxf R-4.2.3.tar.gz \ 172 | && cd R-4.2.3 \ 173 | && ./configure \ 174 | && make -j9 \ 175 | && make install 176 | 177 | 178 | # ---------------------------------------------------------------------- 179 | # 180 | # Now construct the final docker image without all of the development 181 | # gunk so that it is a leaner docker image. 182 | # 183 | # ---------------------------------------------------------------------- 184 | FROM ubuntu:focal 185 | 186 | # Install locales package 187 | 188 | # Set locale settings 189 | ENV LANGUAGE=en_US.en 190 | ENV LC_ALL=en_US.UTF-8 191 | ENV LANG=en_US.UTF-8 192 | RUN apt-get update && apt-get install -y locales && \ 193 | sed -i -e "s/# $LANG.*/$LANG UTF-8/" /etc/locale.gen && \ 194 | dpkg-reconfigure --frontend=noninteractive locales && \ 195 | update-locale LANG=$LANG 196 | 197 | # sorter needs pandas and biopython 198 | RUN apt-get -y update && \ 199 | DEBIAN_FRONTEND=noninteractive \ 200 | apt-get -y --no-install-recommends install \ 201 | bowtie2 \ 202 | build-essential \ 203 | bwa \ 204 | default-jdk-headless \ 205 | gfortran \ 206 | libbz2-dev \ 207 | libcurl4-gnutls-dev \ 208 | libjpeg9-dev \ 209 | liblzma-dev \ 210 | libmariadbclient-dev \ 211 | libpng-dev \ 212 | libssl-dev \ 213 | libxml2-dev \ 214 | bedops \ 215 | python3 \ 216 | python3-biopython \ 217 | python3-idna \ 218 | python3-pandas \ 219 | python3-pymummer \ 220 | python3-pip \ 221 | bedtools \ 222 | zlib1g \ 223 | && \ 224 | apt-get clean 225 | 226 | RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 10 227 | 228 | # downgrade numpy for deprecated np.bool 229 | RUN pip3 install numpy==1.23.1 230 | 231 | COPY --from=build /usr/local /usr/local 232 | 233 | # Install AliTV 234 | 235 | # AliTV needs libyaml-perl libhash-merge-perl bioperl perl cpanm lastz 236 | RUN apt-get -y update && \ 237 | DEBIAN_FRONTEND=noninteractive \ 238 | apt-get -y --no-install-recommends install \ 239 | libyaml-perl \ 240 | libhash-merge-perl \ 241 | bioperl \ 242 | perl \ 243 | wget \ 244 | git \ 245 | cpanminus && \ 246 | apt-get clean 247 | 248 | #RUN cpan FindBin::Real 249 | #RUN cpan Log::Log4perl 250 | #RUN cpanm JSON 251 | #RUN cpanm Bio::Perl Bio::FeatureIO 252 | 253 | # Download, compile and install lastz 254 | #RUN wget https://github.com/lastz/lastz/archive/1.04.22.tar.gz && \ 255 | # tar -xf 1.04.22.tar.gz && \ 256 | # cd lastz-1.04.22 && \ 257 | # make && \ 258 | # make install 259 | # remove -Werror from Makefile to fix compile errors 260 | # sed -i 's/-Werror //' src/Makefile && \ 261 | # make && \ 262 | # install -m 0755 src/lastz /usr/local/bin/ && \ 263 | # install -m 0755 src/lastz_D /usr/local/bin/ && \ 264 | # cd .. && rm -rf lastz-* 265 | 266 | #WORKDIR /app 267 | #COPY . /app 268 | #ENV PERL5LIB="/app/lib:${PERL5LIB}" 269 | 270 | # AliTV v1.0.6 install 271 | #RUN git clone https://github.com/AliTVTeam/AlitTV-perl-interface && \ 272 | # cd AliTV-perl-interface \ 273 | # cpanm --installdeps . 274 | 275 | #RUN chmod 755 AliTV-perl-interface-1.0.6/bin/alitv.pl 276 | 277 | # Install chromoMap 278 | RUN R -e "install.packages('chromoMap', repos = 'http://cran.us.r-project.org')" 279 | RUN R -e "install.packages('htmltools', repos = 'http://cran.us.r-project.org')" 280 | 281 | # pilon 282 | ADD https://github.com/broadinstitute/pilon/releases/download/v1.23/pilon-1.23.jar /usr/local/bin/ 283 | RUN chmod 755 /usr/local/bin/pilon-1.23.jar 284 | ADD pilon /usr/local/bin/ 285 | 286 | # Add the entrypoint script 287 | COPY PRYMETIME /usr/local/bin/prymetime 288 | 289 | # When the docker image is launched the resulting container runs the entrypoint script 290 | ENTRYPOINT ["/usr/local/bin/prymetime/PRYMETIME.sh"] 291 | -------------------------------------------------------------------------------- /PRYMETIME/bp_search2gff.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | eval 'exec /usr/bin/perl -S $0 ${1+"$@"}' 4 | if 0; # not running under some shell 5 | 6 | =head1 NAME 7 | 8 | bp_search2gff 9 | 10 | =head1 SYNOPSIS 11 | 12 | Usage: 13 | 14 | bp_search2gff [-o outputfile] [-f reportformat] [-i inputfilename] OR file1 file2 .. 15 | 16 | =head1 DESCRIPTION 17 | 18 | This script will turn a SearchIO report (BLAST, FASTP, SSEARCH, 19 | AXT, WABA) into GFF. 20 | 21 | The options are: 22 | 23 | -i infilename - (optional) inputfilename, will read 24 | either ARGV files or from STDIN 25 | -o filename - the output filename [default STDOUT] 26 | -f format - search result format (blast, fasta,waba,axt) 27 | (ssearch is fasta format). default is blast. 28 | -t/--type seqtype - if you want to see query or hit information 29 | in the GFF report 30 | -s/--source - specify the source (will be algorithm name 31 | otherwise like BLASTN) 32 | --method - the method tag (primary_tag) of the features 33 | (default is similarity) 34 | --scorefunc - a string or a file that when parsed evaluates 35 | to a closure which will be passed a feature 36 | object and that returns the score to be printed 37 | --locfunc - a string or a file that when parsed evaluates 38 | to a closure which will be passed two 39 | features, query and hit, and returns the 40 | location (Bio::LocationI compliant) for the 41 | GFF3 feature created for each HSP; the closure 42 | may use the clone_loc() and create_loc() 43 | functions for convenience, see their PODs 44 | --onehsp - only print the first HSP feature for each hit 45 | -p/--parent - the parent to which HSP features should refer 46 | if not the name of the hit or query (depending 47 | on --type) 48 | --target/--notarget - whether to always add the Target tag or not 49 | -h - this help menu 50 | --version - GFF version to use (put a 3 here to use gff 3) 51 | --component - generate GFF component fields (chromosome) 52 | -m/--match - generate a 'match' line which is a container 53 | of all the similarity HSPs 54 | --addid - add ID tag in the absence of --match 55 | -c/--cutoff - specify an evalue cutoff 56 | 57 | Additionally specify the filenames you want to process on the 58 | command-line. If no files are specified then STDIN input is assumed. 59 | You specify this by doing: bp_search2gff E file1 file2 file3 60 | 61 | =head1 AUTHOR 62 | 63 | Jason Stajich, jason-at-bioperl-dot-org 64 | 65 | =head1 Contributors 66 | 67 | Hilmar Lapp, hlapp-at-gmx-dot-net 68 | 69 | =cut 70 | 71 | use strict; 72 | use warnings; 73 | use Bio::Tools::GFF; 74 | use Getopt::Long; 75 | use Bio::SearchIO; 76 | use Bio::Location::Simple; # pre-declare to simplify $locfunc implementations 77 | use Bio::Location::Atomic; # pre-declare to simplify $locfunc implementations 78 | use Storable qw(dclone); # for cloning location objects 79 | use Bio::Factory::FTLocationFactory; 80 | 81 | my ( 82 | $output, # output file (if not stdout) 83 | $input, # name of the input file 84 | $format, # format of the input file, defauly is blast 85 | $type, # 'query' or 'hit' 86 | $cutoff, # cut-off value for e-value filter 87 | $sourcetag, # explicit source tag (will be taken from program 88 | # otherwise 89 | $methodtag, # primary tag (a.k.a. method), default 'similarity' 90 | $gffver, # GFF version (dialect) to write 91 | $scorefunc, # closure returning the score for a passed feature 92 | $locfunc, # closure returning a location object for a passed 93 | # query and hit feature 94 | $addid, # flag: whether to always add the ID for $match == 0 95 | $parent, # the name of the parent to use; if set and $match == 0 96 | # will always add the target 97 | $comp, # flag: whether to print a component feature 98 | $addtarget, # flag: whether to always add the Target tag, default 99 | # is true 100 | $match, # flag: whether to print match lines as containers 101 | $onehsp, # flag: whether to consider only the first HSP for a hit 102 | $quiet, # flag: run quietly 103 | $help # flag: show help screen 104 | ); 105 | 106 | # set defaults: 107 | $format = 'blast'; 108 | $type = 'query'; 109 | $gffver = 2; 110 | $methodtag = "similarity"; 111 | $addtarget = 1; 112 | 113 | GetOptions( 114 | 'i|input:s' => \$input, 115 | 'component' => \$comp, 116 | 'm|match' => \$match, 117 | 'o|output:s' => \$output, 118 | 'f|format:s' => \$format, 119 | 's|source:s' => \$sourcetag, 120 | 'method=s' => \$methodtag, 121 | 'addid' => \$addid, 122 | 'scorefunc=s' => \$scorefunc, 123 | 'locfunc=s' => \$locfunc, 124 | 'p|parent=s' => \$parent, 125 | 'target!' => \$addtarget, 126 | 'onehsp' => \$onehsp, 127 | 't|type:s' => \$type, 128 | 'c|cutoff:s' => \$cutoff, 129 | 'v|version:i' => \$gffver, 130 | 'q|quiet' => \$quiet, 131 | 'h|help' => sub { 132 | exec( 'perldoc', $0 ); 133 | exit(0); 134 | }, 135 | ); 136 | $type = lc($type); 137 | if ( $type =~ /target/ ) { $type = 'hit' } 138 | elsif ( $type ne 'query' && $type ne 'hit' ) { 139 | die("seqtype must be either 'query' or 'hit'"); 140 | } 141 | 142 | # custom or default function returning the score 143 | $scorefunc = 144 | defined($scorefunc) ? parse_code($scorefunc) : sub { shift->score }; 145 | 146 | # custom or default function returning the location 147 | $locfunc = defined($locfunc) ? parse_code($locfunc) : sub { shift->location }; 148 | 149 | # if --match is given then $addid needs to be disabled 150 | $addid = undef if $addid && $match; 151 | 152 | # if no input is provided STDIN will be used 153 | my $parser = new Bio::SearchIO( 154 | -format => $format, 155 | -verbose => $quiet ? -1 : 0, 156 | -file => $input 157 | ); 158 | 159 | my $out; 160 | if ( defined $output ) { 161 | $out = new Bio::Tools::GFF( 162 | -gff_version => $gffver, 163 | -file => ">$output" 164 | ); 165 | } 166 | else { 167 | $out = new Bio::Tools::GFF( -gff_version => $gffver ); # STDOUT 168 | } 169 | my ( %seen_hit, %seen ); 170 | my $other = $type eq 'query' ? 'hit' : 'query'; 171 | 172 | while ( my $result = $parser->next_result ) { 173 | my $qname = $result->query_name; 174 | if ( $comp 175 | && $type eq 'query' 176 | && $result->query_length ) 177 | { 178 | $out->write_feature( 179 | Bio::SeqFeature::Generic->new( 180 | -start => 1, 181 | -end => $result->query_length, 182 | -seq_id => $qname, 183 | -source_tag => 'chromosome', 184 | -primary_tag => 'Component', 185 | -tag => { 186 | 'Sequence' => $qname 187 | } 188 | ) 189 | ); 190 | } 191 | while ( my $hit = $result->next_hit ) { 192 | next if ( defined $cutoff && $hit->significance > $cutoff ); 193 | my $acc = $qname; 194 | if ( $seen{ $qname . "-" . $hit->name }++ ) { 195 | $acc = $qname . "-" . $seen{ $qname . '-' . $hit->name }; 196 | } 197 | 198 | if ( $comp 199 | && $type eq 'hit' 200 | && $hit->length 201 | && !$seen_hit{ $hit->name }++ ) 202 | { 203 | $out->write_feature( 204 | Bio::SeqFeature::Generic->new( 205 | -start => 1, 206 | -end => $hit->length, 207 | -seq_id => $hit->name, 208 | -source_tag => 'chromosome', 209 | -primary_tag => 'Component', 210 | -tag => { 211 | 'Sequence' => $hit->name 212 | } 213 | ) 214 | ); 215 | } 216 | my ( %min, %max, $seqid, $name, $st ); 217 | while ( my $hsp = $hit->next_hsp ) { 218 | my $feature = new Bio::SeqFeature::Generic; 219 | my ( $proxyfor, $otherf ); 220 | if ( $type eq 'query' ) { 221 | ( $proxyfor, $otherf ) = ( $hsp->query, $hsp->hit ); 222 | $name ||= $hit->name; 223 | } 224 | else { 225 | ( $otherf, $proxyfor ) = ( $hsp->query, $hsp->hit ); 226 | $name ||= $acc; 227 | } 228 | $proxyfor->score( $hit->bits ) unless ( $proxyfor->score ); 229 | if ( ( $gffver == 3 ) && ( $match || $parent ) ) { 230 | $feature->add_tag_value( 'Parent', $parent || $name ); 231 | } 232 | 233 | $min{$type} = $proxyfor->start 234 | unless defined $min{$type} && $min{$type} < $proxyfor->start; 235 | $max{$type} = $proxyfor->end 236 | unless defined $max{$type} && $max{$type} > $proxyfor->end; 237 | $min{$other} = $otherf->start 238 | unless defined $min{$other} && $min{$other} < $otherf->start; 239 | $max{$other} = $otherf->end 240 | unless defined $max{$other} && $max{$other} > $otherf->end; 241 | if ( $addtarget || $match ) { 242 | $feature->add_tag_value( 'Target', 'Sequence:' . $name ); 243 | $feature->add_tag_value( 'Target', $otherf->start ); 244 | $feature->add_tag_value( 'Target', $otherf->end ); 245 | } 246 | if ($addid) { 247 | $feature->add_tag_value( 'ID', $name ); 248 | } 249 | 250 | $feature->location( &$locfunc( $proxyfor, $otherf ) ); 251 | 252 | # strand for feature is always going to be product of 253 | # query & hit strands so that target can always be just 254 | # '+' 255 | $feature->strand( $proxyfor->strand * $otherf->strand ); 256 | if ($sourcetag) { 257 | $feature->source_tag($sourcetag); 258 | } 259 | else { 260 | $feature->source_tag( $proxyfor->source_tag ); 261 | } 262 | $feature->score( &$scorefunc($proxyfor) ); 263 | $feature->frame( $proxyfor->frame ); 264 | $feature->seq_id( $proxyfor->seq_id ); 265 | $feature->primary_tag($methodtag); 266 | 267 | # add annotation if encoded in the query description 268 | my $desc = $result->query_description; 269 | while ( $desc =~ /\/([^=]+)=(\S+)/g ) { 270 | $feature->add_tag_value( $1, $2 ); 271 | } 272 | $seqid ||= $proxyfor->seq_id; 273 | $out->write_feature($feature); 274 | $st ||= $sourcetag || $proxyfor->source_tag; 275 | last if $onehsp; 276 | } 277 | if ($match) { 278 | 279 | my $matchf = Bio::SeqFeature::Generic->new( 280 | -start => $min{$type}, 281 | -end => $max{$type}, 282 | -strand => $hit->strand($type) * $hit->strand($other), 283 | -primary_tag => 'match', 284 | -source_tag => $st, 285 | -score => $hit->bits, 286 | -seq_id => $seqid 287 | ); 288 | if ( $gffver == 3 ) { 289 | $matchf->add_tag_value( 'ID', $name ); 290 | } 291 | $matchf->add_tag_value( 'Target', "Sequence:$name" ); 292 | $out->write_feature($matchf); 293 | } 294 | } 295 | } 296 | 297 | sub parse_code { 298 | my $src = shift; 299 | my $code; 300 | 301 | # file or subroutine? 302 | if ( -r $src ) { 303 | if ( !( ( $code = do $src ) && ( ref($code) eq "CODE" ) ) ) { 304 | die "error in parsing code block $src: $@" if $@; 305 | die "unable to read file $src: $!" if $!; 306 | die "failed to run $src, or it failed to return a closure"; 307 | } 308 | } 309 | else { 310 | $code = eval $src; 311 | die "error in parsing code block \"$src\": $@" if $@; 312 | die "\"$src\" fails to return a closure" 313 | unless ref($code) eq "CODE"; 314 | } 315 | return $code; 316 | } 317 | 318 | =head2 clone_loc 319 | 320 | Title : clone_loc 321 | Usage : my $l = clone_loc($feature->location); 322 | Function: Helper function to simplify the task of cloning locations 323 | for --locfunc closures. 324 | 325 | Presently simply implemented using Storable::dclone(). 326 | Example : 327 | Returns : A L object of the same type and with the 328 | same properties as the argument, but physically different. 329 | All structured properties will be cloned as well. 330 | Args : A L compliant object 331 | 332 | =cut 333 | 334 | sub clone_loc { 335 | return dclone(shift); 336 | } 337 | 338 | =head2 create_loc 339 | 340 | Title : create_loc 341 | Usage : my $l = create_loc("10..12"); 342 | Function: Helper function to simplify the task of creating locations 343 | for --locfunc closures. Creates a location from a feature- 344 | table formatted string. 345 | 346 | Example : 347 | Returns : A L object representing the location given 348 | as formatted string. 349 | Args : A GenBank feature-table formatted string. 350 | 351 | =cut 352 | 353 | sub create_loc { 354 | return Bio::Factory::FTLocationFactory->from_string(shift); 355 | } 356 | -------------------------------------------------------------------------------- /PRYMETIME/TELO_R.fasta: -------------------------------------------------------------------------------- 1 | >TEL10R_S288C TEL10R SGD:S000028974 telomere Telomeric region on the right arm of Chromosome X 2 | TAAGTGTGTTTATGTATTATGGTTGAAGAATAGAATATTTTTATGTTTAGGTGATTTTAG 3 | TGGTGATTTTTTTGTAATATTGGCATAAGTGTATATAAATTGAGTGGTTAGTATACGGTG 4 | TAAAAGTGGTATAACGTATGTATTAAGAGCAGTTATACAATATTTGGGGCCGCCGAATGA 5 | GATATAGATATTAAAATGTGGATAATCGTGGGAGTTATGGGTAAATGGCACAGGGTATAG 6 | ACCGCTGAGGCAAGTGCCGTGCATAATGATGCGAGTGCATTTGGTACTGATGGTGAGAGA 7 | TGGGTGATGGAGTGGAATGTGAGAGTAGGGTAAGTTTGAGAGTGGTATATACTGTAGCAT 8 | CCGTGTGCATATGCCATATCAGTATACAAGTGCAAGTGAGTATGGCATGTGGTGGTGGGA 9 | TTAGAGTGGTAGGGTAAGTATATGTGTATTATTTACGATTATTTGTTAACGTTTCAATAT 10 | GGAGGGTAGAACAACAGTACAGTGAGTAGGACATGGTGGATGGTAGGGTAATAGTAGGGT 11 | AATGGTAGTGGAGTTGGATATGGGTAATTGGAGGGTAACGGTTATGGTGGACGGTGGATG 12 | GTAGTAGTAAGTAGAGAGATGGATGGTGGTTGGGAGTGGTATAATGAAATGGGACAGGGT 13 | AACGAGTGGGGAGGTAGGGTAATGGAGGGTACGTTAAGAGACAGGTTTATCAGGGTTGGA 14 | TTAGAATAGGGTTAGGGTAGTGTTAGGGTAGTGTTAGGGTAGTGTGGTGTGGTGTGTGGG 15 | TGTGGGTGTGGGTGTGTGTGTGGGTGTGGTGTGTGGGTGTGGTGTGTGGGTGTGGTGTGT 16 | GTGTGGGTGT 17 | >TEL11R_S288C TEL11R SGD:S000028978 telomere Telomeric region on the right arm of Chromosome XI 18 | AGAGTGTGTTTATGTATTATTGTCGAAAGATAGAATATTTTTATGTTTAGGTGATTTTGG 19 | TGGTGATTTTTTGGTTATATTAACATAAGTGTATATAAATTGAGTGGTTAGTATATGGTG 20 | TAAAAGTGGTATAACGTATGTATTAAGAGCATTTATACGATATTTGGGCCCGCCGAATGA 21 | GATATAGATATTAAAATGTGGATAATCGTGGGAATTATGCGTAAATGGCACAGGGTATAG 22 | ACCGCTGAGGCAAGTGCCGTGCATAATGATGTGGGTGCATTTGGTACTGATTTAATGAGA 23 | ATGGGCCATGGATTGGAGTGTGAGAGTAGGGTAAGTTTGAGAGTGGTATATACTGTAGCA 24 | TCCGTGTGCGTATGACCGATCAGAATACAAGTGAGGATGGCTATGGCATGTGGTAGTGGG 25 | ATTAGAGTGGTAGGGTAAGTATGTATGTATTATTTACGATCATTTGTTAACGTTTCATAT 26 | GGTGGGTAGAACAACAGTATGGTGAGTAACAGATGGCTGATGGTAGGGTAATGGCAGGGT 27 | AAGTGGTGGAGTTGGATATGGGTAATTGGAGGGTAACGGTTATGATGGATGGTGGTTGGG 28 | ATTGGTAGGGTTGAATGGCACAGGGTAACGAATGATGAGTTGGGTAATGGAGGTGTAAGT 29 | TGTGAGACAGGTTCATCAGGGTTGGAGTAGGGTAGTGTTAGGGTTGTGGGTGTGTGGGTG 30 | TGGTGTGGGTGTGGTGTGGGTGTGGGTGTGGTGTGGGTGTGGTGTGGGTGTGGTGTGGGT 31 | GTGTGGGTGTGGTGTGTGGGTGTGTGGTGTGTGGGTGTGTGTGGGTGTGGTGTGTGTGGG 32 | TGTGGTGTGTGTGGGTGTGGTGTGTGTGTGTGTGGGTGTGGTGTGTGTGTGGGTGTGGGT 33 | GTGGTGTGTGTGT 34 | >TEL15R_S288C TEL15R SGD:S000028997 telomere Telomeric region on the right arm of Chromosome XV 35 | TCAATATGTTTATGTATTATTGTTGAAGAATGGAATATTTTTATGTTTAGGTGATTTTGA 36 | TGGTGATTTTTTGGTTATATTAACATAAGTGTATATAAATTAAGTGGTTAGTATACGGTG 37 | TAAAAGTGGTATAACGTATGTATTAAGAGCAGTTATACAATATTTGGGGCCGCCGAATGA 38 | GATATAGATATTAAAATGTGGATAATCGTGGGAGTTATGCGTAAATGGCACAGGGTATAG 39 | ACCGCTGAGGCAAGTGCCGTTAATAATGATGTGAGTGCATTTAGTACTGATGAAGTGAGA 40 | GATGGGCCATGGAGTGGAATGTGAGAGTAGGGTAACTTGAGATGATATATACTGTAGCAT 41 | CCGTGTGCCTATGCCATATCAGTATACAAGTGAGGGTGGATACGGCATGTGGTGGTAGGG 42 | TAAGTATATGTGTATTATTTACGATCATTTGTTAACGTTTCAACATGGTGGGTAGAACAA 43 | CAGTATGGTGAGTAGCGGATGATGGATGGTAGGGTAATAGTAGGGTAAGTGGTGGTGGAG 44 | TTGGATATGGGTAATTGGAGGGTAACGGTTATGATGGGCGGTGGATGGTAGTAGTAAGTA 45 | GAGAGATGGATGGTGGTTGGGAGTGGTATGGTTGAGTGAGACAGGGTAACGAGTGGAGAG 46 | GTAGGGTAATGGAGGGTAAGTTGAGAGACAGGTTCATCATATATATGTCACTGTATTGCA 47 | TGCTGGATGGTGTTAGACAAGGCCGTAGGGACATATAGCATCTAGGAAGTAACCTTGTAC 48 | GAAAATAGGCAATATTTCCTGTTTAGGCGATTGTGACGCAGATTTTAGTCCAACGATCTA 49 | GCGTCAAGGAATTTTTTTATAGTGGGACATTGCACCAAGGAAGTAACTTGATACGTCGTG 50 | GGTGAATGGGTCTGTTTTCTTATTCGGCGGGGTAATACATTTTTGGGGGAAGTTTGTCTG 51 | TCTGACGCGCCATATGTAGGTACGCCAAAAAGGGCTCCTCTACTTCGAAGCGCGAGGTCG 52 | TATACCTAATAAGGAAATGTAATTTATAACTTTCTATTATATTGGTCTTTTCGAGAGCGG 53 | AAGAAGTTGTAGGCTAAGCGCAGGCTAAGCGTAGGTCCATATTTAAAGTATCCAAGAGAA 54 | TATCCACGAAGCGGCTGAGCAACGAACAGAATCCTGGTTCTCCTCGACTAAGCAGATAGT 55 | TAAGATACTGTGCACCATGGAAATTGAAAACGAAAGTACGTACCGACTACTTTATTTTTG 56 | CAGGCCGGAAATCAAGCGATGAATGAGACATCCTTCTGTTTTCTATGTTGGGACAGACAG 57 | TCGCTTATCTTAGTGAGATTTCTTATTAACTGAATTTTCTTTGCTGCTGCTGGAGATTTG 58 | CACCTGCATAGCGCAGATTCTGCTTCTTCTCAATAGAGTAGCTTAATTATTACATTCTTA 59 | GATGATGATAAGACGGAAACTGGACAATCTTTTGTTTATATTGATGGATTTCTTGTCAAA 60 | AAGCATAACAATCAACATACTATTGTTAATTTCGAAACTTACAAAAATAAAATGAAAGTT 61 | TCCGATAGGCGTAAGTTTGAAAAAGCAAACTTTGACGAGTTTGAGTCGGCTCTAAATAAC 62 | AAAAACGACTTGGTACATTGTCCCTCAATAACTTTATTTGAATCGATCCCCACGGAAGTG 63 | CGGTCATTCTACGAAGACGAAAAGTCTGGCCTAATCAAAGTGGTAAAATTCAGAACTGGT 64 | GCAATGGATAGGAAAAGGTCTTTTGAAAAAATTGTCATTTCCGTCATGGTCGGGAAAAAT 65 | GTACAAAAGTTCCTGACATTTGTTGAAGACGAACCAGATTTCCAGGGCGGACCAATCCCT 66 | TCAAAGTATCTTATTCCCAAGAAAATCAACTTGATGGTCTACACGTTGTTTCAAGTGCAT 67 | ACTTTGAAATTCAATAGAAAGGATTACGATACCCTTTCTCTTTTTTACCTCAACAGAGGA 68 | TACTATAATGAGTTGAGTTTCCGTGTCCTGGAACGTTGTCACGAAATAGCGAGTGCCAGG 69 | CCGAACGACAGCTCTACGATGCGTACTTTCACTGACTTTGTTTCTGGCGCACCTATTGTA 70 | AGGAGTCTTCAGAAAAGCACCATAAGGAAATATGGGTACAATTTGGCACCCTACATGTTC 71 | TTGTTACTACACGTAGATGAGCTATCGATTTTTTCTGCATACCAAGCAAGTTTACCTGGC 72 | GAAAAGAAAGTCGACACAGAGCGGCTGAAGCGTGATCTATGCCCACGTAAACCCATTGAG 73 | ATAAAGTACTTTTCACAGATATGTAACGATATGATGAACAAAAAAGACCGATTGGGTGAT 74 | ATTTTGCATATTATCTTGCGAGCATGTGCGCTCAATTTCGGGGCGGGTCCCCGTGGTGGC 75 | GCTGGTGACGAAGAGGATCGATCTATTACGAATGAAGAACCCATTATTCCCTCTGTGGAC 76 | GAGCATGGCTTGAAAGTATGTAAGTTGCGTAGTCCTAACACTCCACGAAGACTCAGAAAA 77 | ACACTAGATGCCGTGAAAGCTTTATTGGTGTCGTCTTGTGCTTGTACTGCAAGGGATTTA 78 | GATATATTTGATGACACCAACGGCGTTGCAATGTGGAAATGGATCAAAATTCTGTACCAC 79 | GAAGTAGCGCAGGAAACCACGCTGAAGGACTCTTATAGAATAACTTTGGTACCTTCTTCT 80 | GATGGTATATCAGTATGTGGAAAACTTTTTAATCGCGAGTATGTCCGCGGCTTTTACTTT 81 | GCATGCAAGGCTCAGTTCGATAACCTTTGGGGAGAGTTGAACAACTGCTTTTATATGCCT 82 | ACAGTGGTTGATATTGCCAGCCTCATTTTGCGTAATCGAGAAGTTTTGTTCAGAGAGCCA 83 | AAGCGAGGAATTGACGAGTATCTGGAAAACGATTCTTTTCTTCAAATGATACCTGTTAAA 84 | TATCGTGAAATTGTGCTGCCCAAGTTGAGAAGAGATACTAACAAAATGACCGCGGCTCTT 85 | AAAAATAAAGTCACTGTTGCAATTGACGAGCTTACGGTGCCACTTATGTGGATGGTCCAT 86 | TTTGCCGTAGGATACCCTTACCGTTATCCAGAGCTTCAGCTACTCGCTTTTGCCGGTCCT 87 | CAGCGCAACGTATACGTCGATGATACAACAAGACGCATCCAACTGTACACTGATTACAAC 88 | AAGAACGGTTCATCGGAGCCTCGACTTAAGACGCTTGACGGACTCACTTCAGATTACGTG 89 | TTTTATTTTGTCACTGTGCTAAGGCAAATGCAAATATGTGCGCTTGGTAACAGTTATGAC 90 | GCTTTTAATCATGATCCTTGGATGGATGTGGTGGGATTTGAGGATCCAGATCAAGTAACA 91 | AATCGAGACATTTCGAGGATAGTTTTGTATTCCTACATGTTTCTGAATACCGCGAAGGGC 92 | TGTCTGGTTGAATACGCAACTTTTCGGCAGTACATGAGGGAACTTCCGAAGAATGCACCT 93 | CAGAAGCTGAATTTTCGGGAGATGCGTCAGGGGTTGATTGCCCTAGGACGGCACTGCGTA 94 | GGTAGCAGATTTGAAACAGATTTGTACGAGTCGGCGACGAGTGAACTCATGGCCAATCAT 95 | TCCGTTCAAACAGGGCGAAATATTTACGGTGTGGATTCCTTTTCGTTAACTAGTGTCAGT 96 | GGGACGACCGCCACTTTATTGCAGGAACGAGCTTCCGAGCGCTGGATTCAATGGTTAGGC 97 | CTTGAAAGCGACTACCATTGTTCATTCTCTAGTACTCGGAATGCGGAAGACGTAGTGGCA 98 | GGTGAGGCGGCGAGTTCAGATCATGATCAAAAAATTTCAAGAGTAACGCGAAAAAGGCCC 99 | CGAGAGCCCAAGAGTACAAACGATATCCTCGTCGCAGGCCAGAAACTCTTTGGCAGCTCC 100 | TTTGAATTCAGGGACTTGCATCAGTTGCGCTTATGTCATGAAATATACATGGCAGACACA 101 | CCCTCTGTGGCAGTACAGGCCCCACCGGGCTATGGTAAGACGGAGTTATTTCATCTCCCC 102 | TTGATAGCACTGGCGTCTAAGGGCGACGTGAAATATGTGTCGTTTCTGTTTGTACCGTAC 103 | ACAGTGTTGCTTGCTAATTGCATGATCAGGTTGAGCCGATGCGGTTGCTTGAATGTGGCC 104 | CCTGTAAGAAACTTTATTGAAGAAGGTTGCGATGGCGTTACTGATTTATACGTGGGGATC 105 | TACGATGATCTTGCTAGCACTAATTTCACAGACAGGATAGCTGCGTGGGAGAATATTGTT 106 | GAGTGCACCTTTAGGACCAACAACGTAAAATTGGGTTACCTCATTGTAGATGAGTTTCAC 107 | AACTTTGAAACGGAGGTCTACCGGCAGTCGCAATTTGGGGGCATAACTAACCTTGATTTT 108 | GACGCTTTTGAGAAAGCAATCTTTTTGAGCGGCACAGCACCTGAGGCTGTAGCTGATGCT 109 | GCGTTGCAGCGTATTGGGCTTACGGGACTGGCCAAGAAGTCGATGGACATCAACGAGCTC 110 | AAACGGTCGGAAGATCTCAGCAGAGGTCTATCCAGCTATCCAACACGGATGTTTAATCTA 111 | ATCAAGGAGAAATCCGAGGTGCCTTTAGGGCATGTTCATAAAATTTGGAAGAAAGTGGAA 112 | TCACAGCCCGAAGAAGCACTGAAGCTTCTTTTAGCCCTCTTTGAAATTGAACCAGAGTCG 113 | AAGGCCATTGTAGTTGCAAGCACAACCAACGAAGTGGAAGAATTGGCCTGCTCTTGGAGA 114 | AAGTATTTTAGGGTGGTATGGATACACGGGAAGCTGGGTGCTGCAGAAAAGGTGTCTCGC 115 | ACAAAGGAGTTTGTCACTGACGGTAGCATGCGAGTTCTCATCGGAACGAAATTAGTGACT 116 | GAAGGAATTGACATTAAGCAATTGATGATGGTGATCATGCTTGATAATAGACTTAATATT 117 | ATTGAGCTCATTCAAGGCGTAGGGAGACTAAGAGATGGGGGCCTCTGTTATCTATTATCT 118 | AGAAAAAACAGTTGGGCGGCAAGGAATCGTAAGGGTGAATTACCACCGATTAAGGAAGGC 119 | TGTATAACCGAACAGGTACGCGAGTTCTATGGACTTGAATCAAAGAAAGGAAAAAAGGGC 120 | CAGCATGTTGGATGCTGTGGCTCCAGGACAGACCTGTCTGCTGACACAGTGGAACTGATA 121 | GAAAGAATGGACAGATTGGCTGAAAAACAGGCGACAGCTTCCATGTCGATCATTGCGTTA 122 | CCGTCTAGCTTCCAGGAGAGCAATAGCAGTGACAGGTGCAGAAAGTATTGCAGCAGTGAT 123 | GAGGACAGCGACACGTGCATTCATGGTAGTGCTAATGCCAGTACCAATGCGACTACCAAC 124 | TCCAGCACTAATGCTACTACCACTGCCAGCACCAACGTCAGGACTAGTGCTACTACCACT 125 | GCCAGCATCAACGTCAGGACTAGTGCGATTACCACTGAAAGTACCAACTCCAGCACTAAT 126 | GCTACTACCACTGCCAGCACCAACGTCAGGACTAGTGCTACTACCACTGCCAGCATCAAC 127 | GTCAGGACTAGTGCGACTACCACTGAAAGTACCAACTCCAACACTAGTGCTACTACCACC 128 | GAAAGTACCGACTCCAACACTAGTGCTACTACCACCGAAAGTACCGACTCCAACACTAGT 129 | GCTACTACCACTGCTAGCACCAACTCCAGCACTAATGCCACTACCACTGCTAGCACCAAC 130 | TCCAGCACTAATGCCACTACCACTGAAAGTACCAACGCTAGTGCCAAGGAGGACGCCAAT 131 | AAAGATGGCAATGCTGAGGATAATAGATTCCATCCAGTCACCGACATTAACAAAGAGTCG 132 | TATAAGCGGAAAGGGAGTCAAATGGTTTTGCTAGAGAGAAAGAAACTGAAAGCACAATTT 133 | CCCAATACTTCCGAGAATATGAATGTCTTACAGTTTCTTGGATTTCGGTCTGACGAAATT 134 | AAACATCTTTTCCTCTATGGTATTGACGTATACTTCTGCCCAGAGGGAGTATTCACACAA 135 | TACGGATTATGCAAGGGCTGTCAAAAGATGTTCGAGCTCTGTGTCTGTTGGGCTGGCCAG 136 | AAAGTATCGTATCGGAGGATGGCTTGGGAAGCACTAGCTGTGGAGAGAATGCTGCGAAAT 137 | GACGAGGAATACAAAGAATACTTGGAAGACATCGAGCCATATCATGGGGACCCTGTAGGA 138 | TATTTGAAATATTTTAGCGTAAAAAGGGGAGAGATCTACTCTCAGATACAGAGAAATTAT 139 | GCTTGGTACCTGGCCATTACTAGAAGAAGAGAAACAATTAGTGTATTGGATTCGACAAGA 140 | GGCAAGCAAGGGAGCCAAGTTTTCCGCATGTCTGGAAGGCAGATCAAAGAGTTGTATTAT 141 | AAAGTATGGAGCAACTTGCGTGAATCGAAGACAGAGGTGCTGCAGTACTTTTTGAACTGG 142 | GACGAGAAAAAGTGCCGGGAAGAATGGGAGGCAAAAGACGATACGGTCTTTGTGGAAGCG 143 | CTCGAGAAAGTTGGAGTTTTTCAGCGTTTGCGTTCCATGACGAGCGCTGGACTGCAGGGT 144 | CCGCAGTACGTCAAGCTGCAGTTTAGCAGGCATCATCGACAGTTGAGGAGCAGATATGAA 145 | TTAAGTCTAGGAATGCACTTGCGAGATCAGCTTGCGCTGGGAGTTACCCCATCTAAAGTG 146 | CCGCATTGGACGGCATTCCTGTCGATGCTGATAGGGCTGTTCTACAATAAAACATTTCGG 147 | CAGAAACTGGAATATCTTTTGGAGCAGATTTCGGAGGTGTGGTTGTTACCACATTGGCTT 148 | GATTTGGCAAACGTTGAAGTTCTCGCTGCAGATAACACGAGGGTACCGCTGTACATGCTG 149 | ATGGTAGCGGTTCACAAAGAGCTGGATAGCGATGATGTTCCAGACGGTAGATTTGATATA 150 | ATATTACTATGTAGAGATTCGAGCAGAGAAGTTGGAGAGTGAAGGAAATTGTTGTTACGA 151 | AAGTCAGTGATTATGTATTGTGTAGTATAGTATATTGTAAGAAATTTTTTTTTCTAGGGA 152 | ATATGCGTTTTGATGTAGTAGTATTTCACTGTTTTGATTTAGTGTTTGTTGCACGGCAGT 153 | AGCGAGAGACAAGTGGGAAAGAGTAGGATAAAAAGACAATCTATAAAAAGTAAACATAAA 154 | ATAAAGGTAGTAAGTAGCTTTTGGTTGAACATCCGGGTAAGAGACAACAGGGCTTGGAGG 155 | AGACGTACATGAGGGCTATTTAGGGCTATTTAGGGCTATGTAGAAGTGCTGTAGGGCTAA 156 | AGAACAGGGTTTCATTTTCATTTTTTTTTTTTAATTTCGGTCAGAAAGCCGGGTAAGGTA 157 | TGACAGCGAGAGTAGAGGTAGATGTGAGAGAGTGTGTGGGTGTGGTGTGT 158 | >TEL08R_S288C TEL08R SGD:S000028964 telomere Telomeric region on the right arm of Chromosome VIII 159 | TTAGTATGTTTATATCTTCTCATTAATGAATAGTTAATTTTTACGTTTAGGTGATTTTGG 160 | TGGTGATTTTTCTATAATATTGAAATAAGTGTATATAAATTGAGTGGTTAGTATATGGTG 161 | AAAAAGTGGTATAACGTATGTATTAAGAGCATTTATACGGTATTTGGGCCCGCCGAATGA 162 | GATATAGATATTAAAATGTGGATAATGATGGGCTTTATGGGTAAATGGCACAGGGTATAG 163 | ACCGCTGAGGCAAGTGCCGTTAATGGTGATGTGAGTGCACTTAGTACTGATTTAGTGAGA 164 | GAGATGGGCCATGGATTGGAGTGTGAGAGTAGGGTAACTTGAGAGTGGTATATACTGTAG 165 | CATCCGTGTGCGTATGCCATAGGATAATTAAAGGTGAGTATGGCATGTGGTGAGGTGGTA 166 | GTGATGGTGATATAGAGTGGTAGGGTAAGTGTATGTGCATTATTTACGATTATTTGTTAA 167 | CGTTTCAATATGGAGGGTAGAACAACAGTACAGTGAGTAGGACATGGTGGATGGTAGGGT 168 | AATAGCAGGGTAATGGTAGTGGAGTTGGATATGGGTAATTGGAGGGTAACGGTTATGATG 169 | GGCGGTGGATGGTAGTAGTAAGTAGAGAGATGGATGGTGGTTGGGAGTGGTATGGTTGAG 170 | TGGGGCAGGGTAACGAGTGGGGAGGTAGGGTAATGTGAGGGTAGGTTAAGAGACAGGTTA 171 | GATAGGGTGGTGTGTGGTGTGTGGGTGTGGTGTGGTGTGGGTGTGGGTGTGGTGTGTGGG 172 | TGTGGTGTGTGTGGGTGTGGGTGTGGTGTGGGTGTGGGTGTGTGTGGGTGTGTGTGTGTG 173 | GGTGTGTGTGGGTGTGGGTGTGGGTGTGGGTGTGGTGTGTGTATATATATGTCACTGTAT 174 | TGCATGCTGGATGGTGTTAGACAAGGCCGTAGGGACATATAGCATCTAGGAAGTAACCTT 175 | GTACGAAAATAGGCAATATTTCCTGTTTAGGCGATTGTGACGCAGATTTTAGTCCAACGA 176 | TCTAGCGTCAAGGAATTTTTTTATAGTGGGACATTGCACCAAGGAAGTAACTTGATACGT 177 | CGTGGGTGAATGGGTCTGTTTTCTTATTCGGCGGGGTAATACATTTTTGGGGGAAGTTTG 178 | TCTGTCTGACGCGCCATATGTAGGTACGCCAAAAAGGGCTCCTCTACTTCGAAGCGCGAG 179 | GTCGTATACCTAATAAGGAAATGTAATTTATAACTTTTTATTATATTGGTCTTTTCGAGA 180 | GCGGAACGTAGGTCCATGTTTAAAGTATCCAAGAGAATATCCACGAAGCGGCTGAGCAAC 181 | GAACAGAATCCTGGTTCTCCTCGACTAAGCAGATAGTTAAGATACTGTGCACCATGGAAA 182 | TTGAAAACGAAAGTACGTACCGACTACTTTATTTTTGCAGGCCGGAAATCAAGCGATGAA 183 | TGAGACATCCTTCTGTTTTCTATGTTGTGCTTGAAGGGGACAGACAGTCGCTTATCTTAG 184 | TGAGATTTCTTATTAACTGAATTTTCTTTGCTGCTGCTGGAGATTTGCACCTGCATAGCG 185 | CAGATTCTGCTTCTTCTCAATAGAGTAGCTTAATTATTACATTCTTAGATGATGATAAGA 186 | CGGAAACTGGACAATCTTTTGTTTATATTGATGGATTTCTTGTCAAAAAGCATAGCAATC 187 | AACATACTATTGTTAATTTCGAAACTTACAAAAATAAAATGAAAGTTTCCGATAGCGTAA 188 | GTTTGAAAAAGCAAACTTTGACGAGTTTGAGTCGGCTCTAAATAACAAAAACGACTTGGT 189 | ACATTGTCCCTCAATAACTTTATTTGAATCGATCCCCACGGAAGTGCGGTCATTCTACGA 190 | AGACGAAAAGTCTGGTCTAATCAAAGTGGTAAAATTCAGAACTGGTGCAATGGATAGGAA 191 | AAGGTCTTTTGAAAAAATTGTCGTTTCCGTCATGGTCGGGAAAAATGTACAAAAGTTCCT 192 | GACGTTTGTTGAAGACGAACCAGATTTCCAGGGCGGACCAATCCCTTCAAAGTATCTTAT 193 | TCCCAAGAAAATCAACTTGATGGTCTACACGTTGTTTCAAGTGCATACTTTGAAATTCAA 194 | TAGAAAGGATTACGATACCCTTTCTCTTTTTTACCTCAACAGAGGATACTATAATGAGTT 195 | GAGTTTCCGTGTCCTGGAACGTTGTTACGAAATAGCGAGTGCCAGGCCGAACGACAGCTC 196 | TACGATGCGTACTTTCACTGACTTTGTTTCTGGCACACCTATTGTAAGGAGTCTTCAGAA 197 | AAGCACCATAAGGAAATATGGATACAATTTGGCACCCTACATGTTCTTGTTACTACACGT 198 | AGATGAGCTATCGATTTTTTCTGCATACCAAGCAAGTTTACCTGGCGAAAAGAAAGTCGA 199 | CACAGAGCGGCTGAAGCGTGATCTATGCCCACGTAAACCCACTGAGATAAAGTACTTTTC 200 | ACAGATATGTAACGATATGATGAACAAAAAGGACCGATTGGGTGATGTTTTGCATGTGTG 201 | CTGCCCAAGTTGAGAAGAGATACTAACAAAATGACCGCGGCTCTCAAAAATAATTGACGA 202 | GCTTACGGTGATACGCTTACCGTTATCCAGAGCTACAGCGCAACGTATACGTCGACGATA 203 | CAACAAGAACGGTTCATCGGAGCCTCGACTAAAGACGCTTGACGGACTCACTTCCGAGCG 204 | CTGGATTCAATGGTTAGGCCTTGAAAGCGACTACCATTGTTCATTCTCTAGTACTCGGAA 205 | TGCGGAAGACGTAGTGGCAGGTGAGGCGGCGAGTTCAGATCATGATCAAAAAATTTCAAG 206 | AGTAACGCGAAAAAGGCCCCGAGAGCCCAAGAGTACAAACGATATCCTCGTCGCAGGCCG 207 | GAAACTCTTTGGCAGCTCCTTTGAATTCAGGGACTTGCATCAGTTGCGCTTATGTCATGA 208 | AATATACATGGCAGACACACCCTCTGTGGCAGTACAGGCCCCACCGGGCTATGGTAAGAC 209 | GGAGTTATTTCATCTCCCCTTGATAGCACTGGCATCTAAGGGCGACGTGAAATATGTGTC 210 | GTTTCTGTTTGTACCGTACACAGTGTTGCTTGCTAATTGCATGATCAGGTTGGGCCGACG 211 | CGGTTGCTTGAATGTGGCCCCTGTAAGAAACTTTATTGAAGAAGGTTGCGATGGCGTTAC 212 | TGATTTATACGTGGGGATCTACGATGATCTTGCTAGCACTAATTTCACAGACAGGATAGC 213 | TGCGTGGGAGAATATTGTTGAGTGCACCTTTAGGACCAACAACGTAAAATTGGGTTACCT 214 | CATTGTAGATGAGTTTCACAACTTTGAAACGGAGGTCTACCGGCAGTCGCAATTTGGGGG 215 | CATAACTAACCTTGATTTTGACGCTTTTGAGAAAGCAATCTTTTTGAGCGGCACAGCCCC 216 | TGAGGCTGTTGCTGATGCTGCGTTGCAGCGTATTGGGCTTACGGGACTGGCCAAGAAGTC 217 | GATGGACATCAACGAGCTCAAACGGTCGGAAGATCTCAGCAGAGGTCTATCCAGCTATCC 218 | AACACGGATGTTTAATCTAATCAAGGAGAAATCCGAGGTGCCTTTAGGGCATGTTCATAA 219 | AATTTGGAAGAAAGTGGAATCACAGCCCGAAGAAGCACTGAAGCTTCTTTTAGCCCTCTT 220 | TGAAATTGAACCAGAGTCGAAGGCCATTGTAGTTGCAAGCACAACCAACGAAGTGGAAGA 221 | ATTGGCCTGCTCTTGGAGAAAGTATTTTAGGGTGGTATGGATACACGGGAAGCTTGGGTG 222 | CTGCAGAAAAGGTGTCTCGCACAAAGGAGTTTGTCACTGACGGTAGCATGCAAGTTCTCA 223 | TCGGAACGAAATTAGTGACTGAAGGAATTGACATTAAGCAATTGATGATGGTGATCATGC 224 | TTGATAATAGACTTAATATTATTGAGCTCATTCAAGGCGTAGGGAGACTAAGAGATGGGG 225 | GCCTCTGTTATCTATTATCTAGAAAAAACAGTTGGGCGGCAAGGAATCGTAAGGGTGAAT 226 | TACCACCGATTAAGGAAGGCTGTATAACCGAACAGGTACGCGAGTTCTATGGACTTGAAT 227 | CAAAGAAAGGAAAAAAAGGGCCAGCATGTTGGATGCTGTGGCTCCAGGACAGACCTGTCT 228 | GCTGACACAGTGGAACTGATAGAAAGAATGGACAGATTGGCTGAAAATCAGGCGACAGCT 229 | TCCATGTCGATCGTTGCGTTACCGTCTAGCTTCCAGGAGAGCAATAGCAGTGACAGGTGC 230 | AGAAAGTATTGCAGCAGTGATGAGGACAGCGACACGTGCATTCATGGTAGTGCTAATGCC 231 | AGTACCAATGCGACTACCAACTCCAGCACTAATGCTACTACCACTGCCAGCATCAACGTC 232 | AGGACTAGTGCGACTACCACTGCCAGCATCAACGTCAGGACTAGTGCGACTACCACTGAA 233 | AGTACCAACTCCAACACTAATGCTACTACCACTGAAAGTACCAACTCCAGCACTAATGCT 234 | ACTACCACTGCCAGCATCAACGTCAGGACTAGTGCGACTACCACTGAAAGTACCAACTCC 235 | AACACTAGTGCTACTACCACCGAAAGTACCGACTCCAACACTAGTGCTACTACCACCGAA 236 | AGTACCGACTCCAACACTAGTGCTACTACCACTGCTAGCACCAACTCCAGCACTAATGCC 237 | ACTACCACTGCTAGCACCAACTCCAGCACTAATGCCACTACCACTGAAAGTACCAACGCT 238 | AGTGCCAAGGAGGACGCCAATAAAGATGGCAATGCTGAGGATAATAGATTCCATCCAGTC 239 | ACCGACATTAACAAAGAGTCGTATAAGCGGAAAGGGAGTCAAATGGTTTTGCTAGAGAGA 240 | AAGAAACTGAAAGCACAATTTCCCAATACTTCCGAGAATATGAATGTCTTACAGTTTCTT 241 | GGATTTCGGTCTGACGAAATTAAACATCTTTTCCTCTATGGTATTGACATATACTTCTGC 242 | CCAGAGGGAGTATTCACACAATACGGATTATGCAAGGGCTGTCAAAAGATGTTCGAGCTC 243 | TGTGTCTGTTGGGCTGGCCAGAAAGTATCGTATCGGAGGATGGCTTGGGAAGCACTAGCT 244 | GTGGAGAGAATGCTGCGAAATGACGAGGAATACAAAGAATACTTGGAAGACATCGAGCCA 245 | TATCATGGGGACCCTGTAGGGTATTTGAAATTTTTTAGCGTAAAAAGGGGAGAGATCTAC 246 | TCTCAGATACAGAGAAATTATGCTTGGTACCTGGCCATTACTAGAAGAAGAGAAACAATT 247 | AGTGTATTGGATTCGACAAGAGGCAAGCAAGGGAGCCAAGTTTTCCGCATGTCTGGAAGG 248 | CAGATCAAAGAGTTGTATTATAAAGTATGGAGCAACTTGCGTGAATCGAAGACAGAGGTG 249 | CTGCAGTACTTTTTGAACTGGGACGAAAAAAAGTGCCGGGAAGAATGGGAGGCAAAAGAC 250 | GATACGGTCTTTGTGGAAGCGCTCGAGAAAGTTGGAGTTTTTCAGCGTTTGCGTTCCATG 251 | ACGAGCGCTGGACTGCAGGGTCCGCAGTACGTCAAGCTGCAGTTTAGCAGGCATCATCGA 252 | CAGTTGAGGAGCAGATATGAATTAAGTCTAGGAATGCACTTGCGAGATCAGCTTGCGCTG 253 | GGAGTTACCCCATCTAAAGTGCCGCATTGGACGGCATTCCTGTCGATGCTGATAGGGCTG 254 | TTCTACAATAAAACATTTCGGCAGAAACTGGAATATCTTTTGGAGCAGATTTCGGAGGTG 255 | TGGTTGTTACCACATTGGGTTGATTTGGCAAACGTTGAAGTTCTCGCTGCAGATAACACG 256 | AGGGTACCGCTGTACATGCTGATGGTAGCGGTTCACAAAGAGCTGGATAGCGATGATGTT 257 | CCAGACGGTAGATTTGATATAATATTACTATGTAGAGATTCGAGCAGAGAAGTTGGAGAG 258 | TGAAGGAAATTGTTGTTACGAAAGTCAGTGATTATGTATTGTGTAGTATAGTATATTGTA 259 | AGAAATTTTTTTTTCTAGGGAATATGCGTTTTGATGTAGTAGTATTTCACTGTTTTGATT 260 | TAGTGTTTGTTGCACGGCAGTAGCGAGAGACAAGTGGGAAAGAGTAGGATAAAAAGACAA 261 | TCTATAAAAAGTAAACATAAAATAAAGGTAGTAAGTAGCTTTTGGTTGAACATCCGGGTA 262 | AGAGACAACAGGGCTTGGAGGAGACGTACATGAGGGCTATTTAGGGCTATTTAGGGCTAT 263 | GTAGAAGTGCTGTAGGGCTAAAGAACAGGGTTTCATTTTCATTTTTTTTTTTTAATTTCG 264 | GTCAGAAAGCCGGGTAAGGAGTGACAGCGAGAGTAAAGATAGATGTGAAAAGTGTGGGTG 265 | TGGTGTGTGGGTGTGGGTGTGTGTGTGGGTGTGGTGTGTGGGTGTGGGTGTGGGTGTGGG 266 | TGTGGTGTGGGTGTGGTGTGTGGGTGTGGTGTGTGGGTGTGGTGTGGGTGTGGTGTGGTG 267 | TGTGGGTGTGTGGGTGTGGTGTGGTGTGTGGGTGTGGTGTGGGTGTGGTGTGTGTGTGG 268 | >TEL02R_S288C TEL02R SGD:S000028940 telomere Telomeric region on the right arm of Chromosome II 269 | TAAGTGTGTTTATGTATTATGGTTGAAGGATAGAATATTTTTATGTTTAGGTGATTTTAG 270 | TGGTGATTTTTTTGTAATATTGGCATAAGTGTATATAAATTGAGTGGTTAGTATACGGTG 271 | TAAAAGTGGTATAACGTATGTATTAAGAGCTGTTATACAATACTTGGGGCCGCCGAATGA 272 | GATATAGATATTAAAATGTGGATAATCGTGGGAGTTATGCGTAAATGGCACAGGGTATAG 273 | ACCGCTGAGGCAAGTGCCGTGAATAATGATGTGAGTGCATTTGGTACTGATTTAGTGAGA 274 | ATGGGGCCATGGTGTGGAATATGAAAGTAGGGTAAGTTTGAGATGGTATATACTGTAGCA 275 | TCCGTGTGCGTATGACATATCAGTAGAAGTGAAGGTGAGTGTGGCAAGTGGCGGTGGTGG 276 | TAGTGGTGGTATAGAGTGGTAGGGTAAGTATGTATGTATTATTTACGATCATTTGTTAAC 277 | GTTTCAATATGGAGGGTAGAACAACAGTACAGTGAGTAGGACATGGTGGATGGTAGGGTA 278 | ATAGTAGGGTAAGTGGTAGTGGAGTTGGATATGGGTAATTGGAGGGTAACGGTTATGGTG 279 | GACGGTGGTTAGTGGTAAGTAGAGAGATGATGGATGGTGGTTGGGAGTGGTATGGTTGAA 280 | TGAGACAGGGTAACGAGTGGGGAGGTAGGGTAATGGAGGGTAGGTTTGGAGACAGGTTCA 281 | TCAGGGTTAGAATAGGGTACTGTTAGGATTGTGTTAGGGTGTGTGGGTGTGGGTGTGGTG 282 | TGTGTGGGTGTGGTGTGTGGGTGTGT 283 | >TEL04R_S288C TEL04R SGD:S000028948 telomere Telomeric region on the right arm of Chromosome IV 284 | TCAATATGTTTATGTATTATTGTTGAAGGATAGAATATTTTTATGTTTAGGTGATTTTGG 285 | TGGTGATTTTTCTGTAATATTGGCATAAGTGTATATAAATTGAGTGGTTAGTATACGGTG 286 | TAAAAGCGGTATAACGTATGTATTAAGAGCAGTTTTACAATATTTGGGGCCGCCGAATGA 287 | GATATAGATATTAAAATGTGGATAATCATGGGCGTTATGGGTAAATGGCACAGGGTATAG 288 | ACCGCTGAGGCAAGTGCCGTGTATAATGATGTGAGTGCATTTGTACTGATTTAGTGAGAG 289 | ATGGGCCATGGAGTGGAGTGGAATGTGAGAGTAGGGTAAGTTTGAGATGATATATACTGT 290 | AGCATCCGTGTGCGTATGGCATATCAGTATACAAGTGAAGGTGAGTATGGCATGTGGTGG 291 | TGGGATTAGAGTGGTAGAGTAAGTATGTGTGTATTATTTACGATCATTTGTTAGCGTTTC 292 | AATAGTGGTGGGTAGAACAATAGTATGGTGAGTAGTAGATGGGAGATGGTAGGGTAAGTG 293 | GTAGTGGAGTTGGATATGGGTAATTGGAGGGTAACGGTTATGGTGGACGGTCGGTTGGTG 294 | GTAGTACACAGGGAGATGGATGGTGGTTGGGGTGGTATAGTTGAATGAGTCAGGGTAACG 295 | AGTGGGGAGGTAGGGTAATGGAGGGTAATTTGAGAGACAGGTTGGTCAGGCTTGGCTTGT 296 | CTTAGTCTTAGGCTTAGGCTTCTCTCTGGTGTGGGTGTGTGGGTGTGTGGGTGTGGTGTG 297 | GGTGTGGGTGTGGTGTGGGTGTGGTGGGTGTGGTGTGGTGTGTGTGGGTGTGGTGTGGTG 298 | TGTATATATATGTCACTGTATTGCATGCTGGATGGTGTTAGACAAGGCCGTAGGGACATA 299 | TAGCATCTAGGAAGTAACCTTGTACGAAAATAGGCAATATTTCCTGTTTAGGCGATTGTG 300 | ACGCAGATTTTAGTCCAACGATCTAGCGTCAAGGAATTTTTTTATAGTGGGACATTGCAC 301 | CAAGGAAGTAACTTGATACGTCGTGGGTGAATGGGTCTGTTTTCTTATTCGGCGGGGTAA 302 | TACATTTTTGGGGGAAGTTTGTCTGTCTGACGCGCCATATGTAGGTACGCCAAAAAGGGC 303 | TCCTCTACTTCGAAGCGCGAGGTCGTATACCTAATAAGGAAATGTAATTTATAACTTTCT 304 | ATTATATTGGTCTTTTCGAGAGCGGAAGAAGTTGTAGGCTAAGCGCAGGCTAAGCGTAGG 305 | TCCATATTTAAAGTATCCAAGAGAATATCCACGAAGCGGCTGAGCAACGAACAGAATCCT 306 | GGTTCTCCTCGACTAAGCAGATAGTTAAGATACTGTGCACCATGGAAATTGAAAACGAAA 307 | GTACGTACCGACTACTTTATTTTTGCAGGCCGGAAATCAAGCGATGAATGAGACATCCTT 308 | CTGTTTTCTATGTTGGGACAGACAGTCGCTTATCTTAGTGAGATTTCTTATTAACTGAAT 309 | TTTCTTTGCTGCTGCTGGAGATTTGCACCTGCATAGCGCAGATTCTGCTTCTTCTCAATA 310 | GAGTAGCTTAATTATTACATTCTTAGATGATGATAAGACGGAAACTGGACAATCTTTTGT 311 | TTATATTGATGGATTTCTTGTCAAAAAGCATAACAATCAACATACTATTGTTAATTTCGA 312 | AACTTACAAAAATAAAATGAAAGTTTCCGATAGGCGTAAGTTTGAAAAAGCAAACTTTGA 313 | CGAGTTTGAGTCGGCTCTAAATAACAAAAACGACTTGGTACATTGTCCCTCAATAACTTT 314 | ATTTGAATCGATCCCCACGGAAGTGCGGTCATTCTACGAAGACGAAAAGTCTGGCCTAAT 315 | CAAAGTGGTAAAATTCAGAACTGGTGCAATGGATAGGAAAAGGTCTTTTGAAAAAATTGT 316 | CATTTCCGTCATGGTCGGGAAAAATGTACAAAAGTTCCTGACATTTGTTGAAGACGAACC 317 | AGATTTCCAGGGCGGACCAATCCCTTCAAAGTATCTTATTCCCAAGAAAATCAACTTGAT 318 | GGTCTACACGTTGTTTCAAGTGCATACTTTGAAATTCAATAGAAAGGATTACGATACCCT 319 | TTCTCTTTTTTACCTCAACAGAGGATACTATAATGAGTTGAGTTTCCGTGTCCTGGAACG 320 | TTGTCACGAAATAGCGAGTGCCAGGCCGAACGACAGCTCTACGATGCGTACTTTCACTGA 321 | CTTTGTTTCTGGCGCACCTATTGTAAGGAGTCTTCAGAAAAGCACCATAAGGAAATATGG 322 | GTACAATTTGGCACCCTACATGTTCTTGTTACTACACGTAGATGAGCTATCGATTTTTTC 323 | TGCATACCAAGCAAGTTTACCTGGCGAAAAGAAAGTCGACACAGAGCGGCTGAAGCGTGA 324 | TCTATGCCCACGTAAACCCATTGAGATAAAGTACTTTTCACAGATATGTAACGATATGAT 325 | GAACAAAAAAGACCGATTGGGTGATATTTTGCATATTATCTTGCGAGCATGTGCGCTCAA 326 | TTTCGGGGCGGGTCCCCGTGGTGGCGCTGGTGACGAAGAGGATCGATCTATTACGAATGA 327 | AGAACCCATTATTCCCTCTGTGGACGAGCATGGCTTGAAAGTATGTAAGTTGCGTAGTCC 328 | TAACACTCCACGAAGACTCAGAAAAACACTAGATGCCGTGAAAGCTTTATTGGTGTCGTC 329 | TTGTGCTTGTACTGCAAGGGATTTAGATATATTTGATGACACCAACGGCGTTGCAATGTG 330 | GAAATGGATCAAAATTCTGTACCACGAAGTAGCGCAGGAAACCACGCTGAAGGACTCTTA 331 | TAGAATAACTTTGGTACCTTCTTCTGATGGTATATCAGTATGTGGAAAACTTTTTAATCG 332 | CGAGTATGTCCGCGGCTTTTACTTTGCATGCAAGGCTCAGTTCGATAACCTTTGGGGAGA 333 | GTTGAACAACTGCTTTTATATGCCTACAGTGGTTGATATTGCCAGCCTCATTTTGCGTAA 334 | TCGAGAAGTTTTGTTCAGAGAGCCAAAGCGAGGAATTGACGAGTATCTGGAAAACGATTC 335 | TTTTCTTCAAATGATACCTGTTAAATATCGTGAAATTGTGCTGCCCAAGTTGAGAAGAGA 336 | TACTAACAAAATGACCGCGGCTCTTAAAAATAAAGTCACTGTTGCAATTGACGAGCTTAC 337 | GGTGCCACTTATGTGGATGGTCCATTTTGCCGTAGGATACCCTTACCGTTATCCAGAGCT 338 | TCAGCTACTCGCTTTTGCCGGTCCTCAGCGCAACGTATACGTCGATGATACAACAAGACG 339 | CATCCAACTGTACACTGATTACAACAAGAACGGTTCATCGGAGCCTCGACTTAAGACGCT 340 | TGACGGACTCACTTCAGATTACGTGTTTTATTTTGTCACTGTGCTAAGGCAAATGCAAAT 341 | ATGTGCGCTTGGTAACAGTTATGACGCTTTTAATCATGATCCTTGGATGGATGTGGTGGG 342 | ATTTGAGGATCCAGATCAAGTAACAAATCGAGACATTTCGAGGATAGTTTTGTATTCCTA 343 | CATGTTTCTGAATACCGCGAAGGGCTGTCTGGTTGAATACGCAACTTTTCGGCAGTACAT 344 | GAGGGAACTTCCGAAGAATGCACCTCAGAAGCTGAATTTTCGGGAGATGCGTCAGGGGTT 345 | GATTGCCCTAGGACGGCACTGCGTAGGTAGCAGATTTGAAACAGATTTGTACGAGTCGGC 346 | GACGAGTGAACTCATGGCCAATCATTCCGTTCAAACAGGGCGAAATATTTACGGTGTGGA 347 | TTCCTTTTCGTTAACTAGTGTCAGTGGGACGACCGCCACTTTATTGCAGGAACGAGCTTC 348 | CGAGCGCTGGATTCAATGGTTAGGCCTTGAAAGCGACTACCATTGTTCATTCTCTAGTAC 349 | TCGGAATGCGGAAGACGTAGTGGCAGGTGAGGCGGCGAGTTCAGATCATGATCAAAAAAT 350 | TTCAAGAGTAACGCGAAAAAGGCCCCGAGAGCCCAAGAGTACAAACGATATCCTCGTCGC 351 | AGGCCAGAAACTCTTTGGCAGCTCCTTTGAATTCAGGGACTTGCATCAGTTGCGCTTATG 352 | TCATGAAATATACATGGCAGACACACCCTCTGTGGCAGTACAGGCCCCACCGGGCTATGG 353 | TAAGACGGAGTTATTTCATCTCCCCTTGATAGCACTGGCGTCTAAGGGCGACGTGAAATA 354 | TGTGTCGTTTCTGTTTGTACCGTACACAGTGTTGCTTGCTAATTGCATGATCAGGTTGAG 355 | CCGATGCGGTTGCTTGAATGTGGCCCCTGTAAGAAACTTTATTGAAGAAGGTTGCGATGG 356 | CGTTACTGATTTATACGTGGGGATCTACGATGATCTTGCTAGCACTAATTTCACAGACAG 357 | GATAGCTGCGTGGGAGAATATTGTTGAGTGCACCTTTAGGACCAACAACGTAAAATTGGG 358 | TTACCTCATTGTAGATGAGTTTCACAACTTTGAAACGGAGGTCTACCGGCAGTCGCAATT 359 | TGGGGGCATAACTAACCTTGATTTTGACGCTTTTGAGAAAGCAATCTTTTTGAGCGGCAC 360 | AGCACCTGAGGCTGTAGCTGATGCTGCGTTGCAGCGTATTGGGCTTACGGGACTGGCCAA 361 | GAAGTCGATGGACATCAACGAGCTCAAACGGTCGGAAGATCTCAGCAGAGGTCTATCCAG 362 | CTATCCAACACGGATGTTTAATCTAATCAAGGAGAAATCCGAGGTGCCTTTAGGGCATGT 363 | TCATAAAATTTGGAAGAAAGTGGAATCACAGCCCGAAGAAGCACTGAAGCTTCTTTTAGC 364 | CCTCTTTGAAATTGAACCAGAGTCGAAGGCCATTGTAGTTGCAAGCACAACCAACGAAGT 365 | GGAAGAATTGGCCTGCTCTTGGAGAAAGTATTTTAGGGTGGTATGGATACACGGGAAGCT 366 | GGGTGCTGCAGAAAAGGTGTCTCGCACAAAGGAGTTTGTCACTGACGGTAGCATGCGAGT 367 | TCTCATCGGAACGAAATTAGTGACTGAAGGAATTGACATTAAGCAATTGATGATGGTGAT 368 | CATGCTTGATAATAGACTTAATATTATTGAGCTCATTCAAGGCGTAGGGAGACTAAGAGA 369 | TGGGGGCCTCTGTTATCTATTATCTAGAAAAAACAGTTGGGCGGCAAGGAATCGTAAGGG 370 | TGAATTACCACCGATTAAGGAAGGCTGTATAACCGAACAGGTACGCGAGTTCTATGGACT 371 | TGAATCAAAGAAAGGAAAAAAGGGCCAGCATGTTGGATGCTGTGGCTCCAGGACAGACCT 372 | GTCTGCTGACACAGTGGAACTGATAGAAAGAATGGACAGATTGGCTGAAAAACAGGCGAC 373 | AGCTTCCATGTCGATCATTGCGTTACCGTCTAGCTTCCAGGAGAGCAATAGCAGTGACAG 374 | GTGCAGAAAGTATTGCAGCAGTGATGAGGACAGCGACACGTGCATTCATGGTAGTGCTAA 375 | TGCCAGTACCAATGCGACTACCAACTCCAGCACTAATGCTACTACCACTGCCAGCACCAA 376 | CGTCAGGACTAGTGCTACTACCACTGCCAGCATCAACGTCAGGACTAGTGCGATTACCAC 377 | TGAAAGTACCAACTCCAGCACTAATGCTACTACCACTGCCAGCACCAACGTCAGGACTAG 378 | TGCTACTACCACTGCCAGCATCAACGTCAGGACTAGTGCGACTACCACTGAAAGTACCAA 379 | CTCCAACACTAGTGCTACTACCACCGAAAGTACCGACTCCAACACTAGTGCTACTACCAC 380 | CGAAAGTACCGACTCCAACACTAGTGCTACTACCACTGCTAGCACCAACTCCAGCACTAA 381 | TGCCACTACCACTGCTAGCACCAACTCCAGCACTAATGCCACTACCACTGAAAGTACCAA 382 | CGCTAGTGCCAAGGAGGACGCCAATAAAGATGGCAATGCTGAGGATAATAGATTCCATCC 383 | AGTCACCGACATTAACAAAGAGTCGTATAAGCGGAAAGGGAGTCAAATGGTTTTGCTAGA 384 | GAGAAAGAAACTGAAAGCACAATTTCCCAATACTTCCGAGAATATGAATGTCTTACAGTT 385 | TCTTGGATTTCGGTCTGACGAAATTAAACATCTTTTCCTCTATGGTATTGACGTATACTT 386 | CTGCCCAGAGGGAGTATTCACACAATACGGATTATGCAAGGGCTGTCAAAAGATGTTCGA 387 | GCTCTGTGTCTGTTGGGCTGGCCAGAAAGTATCGTATCGGAGGATGGCTTGGGAAGCACT 388 | AGCTGTGGAGAGAATGCTGCGAAATGACGAGGAATACAAAGAATACTTGGAAGACATCGA 389 | GCCATATCATGGGGACCCTGTAGGATATTTGAAATATTTTAGCGTAAAAAGGGGAGAGAT 390 | CTACTCTCAGATACAGAGAAATTATGCTTGGTACCTGGCCATTACTAGAAGAAGAGAAAC 391 | AATTAGTGTATTGGATTCGACAAGAGGCAAGCAAGGGAGCCAAGTTTTCCGCATGTCTGG 392 | AAGGCAGATCAAAGAGTTGTATTATAAAGTATGGAGCAACTTGCGTGAATCGAAGACAGA 393 | GGTGCTGCAGTACTTTTTGAACTGGGACGAGAAAAAGTGCCGGGAAGAATGGGAGGCAAA 394 | AGACGATACGGTCTTTGTGGAAGCGCTCGAGAAAGTTGGAGTTTTTCAGCGTTTGCGTTC 395 | CATGACGAGCGCTGGACTGCAGGGTCCGCAGTACGTCAAGCTGCAGTTTAGCAGGCATCA 396 | TCGACAGTTGAGGAGCAGATATGAATTAAGTCTAGGAATGCACTTGCGAGATCAGCTTGC 397 | GCTGGGAGTTACCCCATCTAAAGTGCCGCATTGGACGGCATTCCTGTCGATGCTGATAGG 398 | GCTGTTCTACAATAAAACATTTCGGCAGAAACTGGAATATCTTTTGGAGCAGATTTCGGA 399 | GGTGTGGTTGTTACCACATTGGCTTGATTTGGCAAACGTTGAAGTTCTCGCTGCAGATAA 400 | CACGAGGGTACCGCTGTACATGCTGATGGTAGCGGTTCACAAAGAGCTGGATAGCGATGA 401 | TGTTCCAGACGGTAGATTTGATATAATATTACTATGTAGAGATTCGAGCAGAGAAGTTGG 402 | AGAGTGAAGGAAATTGTTGTTACGAAAGTCAGTGATTATGTATTGTGTAGTATAGTATAT 403 | TGTAAGAAATTTTTTTTTCTAGGGAATATGCGTTTTGATGTAGTAGTATTTCACTGTTTT 404 | GATTTAGTGTTTGTTGCACGGCAGTAGCGAGAGACAAGTGGGAAAGAGTAGGATAAAAAG 405 | ACAATCTATAAAAAGTAAACATAAAATAAAGGTAGTAAGTAGCTTTTGG 406 | >TEL14R_S288C TEL14R SGD:S000028993 telomere Telomeric region on the right arm of Chromosome XIV 407 | TCAATATGTTTATTTCGTAAAGTTGAAAGATAAATAATTTTTATGTTTAGGTGATTTTGG 408 | TGGTGATTTTGTGGGTATATTGAAATAAGTGTGTATAAATTGAGTGGTTAGTATATGGTG 409 | CAAAAGTGGTATAACGTATGTATTAAGAGCAGTTATACAATATTTGGGGCCGCCGAATGA 410 | GATATAGATATTAAAATGTGGATAATCGTGGGAGTTATGGGTAAATGGCTCAGGGTATAG 411 | ACCGCTGAGGCAAGTGCCGTGCATAATGATGTGGGTGCATTTAGTACTGATTTAGTGAGA 412 | GATGGGTCATGGAGTGGAGTGCAATATGAGAGTAGGGTAAGTTGAGATGGTATATACTGT 413 | AGCATTCGTGTGCGTATGCCCCATCAATATAAGTGAAGGTGAGTATGGCATGTGGTGGTG 414 | GGATTAGAGTGGTAGGGTAAGTATATGTGTATTATTTACGATCATTTGTTAACGTTTCAA 415 | TATGGTGGGTGAACAACAGTATAGTGAGTAGCAGATGGGGGATGGTAGGGTAATGGCAGG 416 | GTAAGTGGTAGTGGGGTTGGATATGGGTAATTGGAGGGTAACGGTTATGGTGGGCGGTGG 417 | TTAGTGGTAAGTAGAGAGATGGATGGTGGTTGGAGTTGTAGAATGGAATGGAACAGGGTA 418 | ACGAGTGGGGAGGTAGGGTAATGGAGGGTACGTTAAGAGACAGGTTTATCAGGGTTGGAT 419 | TAGAATAGGGTTAGGGTAGTGTTAGGGTAGTGTTAGGGTAGTGTGTGGGTGTGGTGTGTG 420 | TGGGTGTGTGGGTGTGGGTGTGTGGGTGTGGGTGTGGTGTGTGGGTGTGGTGTGTGGGTG 421 | TGGTGTGGGTGTGGTGTGTGGGTGTGTGTGGGTGTGGTGTGGGTGTGGTGTGGTGTGTGG 422 | GTGTGGGTGTGGGTGTGGTGTGTGTGGGTGTGGTGTGGGTGTGTGGGTGTGGGTGTGGTG 423 | TGTGGTGTGTGTGTGGGTGTGGGTGTGGGTGTGGTGTGTGGGTGTGGGTGTGGTGTGTGG 424 | GTGTGGTGTGTGGGTGTGTGTGGGTGTGGTGTGGGT 425 | >TEL09R_S288C TEL09R SGD:S000028970 telomere Telomeric region on the right arm of Chromosome IX 426 | TCAAAGTGTTTATTTCGTAAAGTTGAAAGGTAAAATATTTTTATGTTTAGGTGATTTTAG 427 | TGGTAATTTTTCTGTAATATTGACATAAGTTTATAAAAATTGAGTGGTTAGTATATGGCG 428 | TAAAAGTGGTATAATGTATGCATTAAGAGCAGTTATACAATATTTGGAGCCGCTGAATGA 429 | GATATAGATATTAAAATGTGGATAATCGTGAGCTTTATGGGTAAATGGCACAGGGTATAG 430 | ACCGCTGAGGCAAGTACCGTGCACAATGATGTGAGTGCATTTGTACTGATTTAGTGAGAG 431 | ATGGGCCATGGAGTGGAGTGGAATGTGAGAGTAGGGTAAGTTTGAGAGTGGTATATACTG 432 | TAGCATCCGTGTGCGTATGCCCTATCAGTATACAATTAAAGGTGAGTATGGCATGTGGTG 433 | GTGGGATTAGAGTGGTAGGGTAAGTATGTGTGTATTATTTACGATCATTTGTTAACGTTT 434 | CAATATGGTGGGTAGAACAACAGTATAGTGAGTAGCAGATGGTGGATGGTAGGGTAATGG 435 | TAGGGTAAGTGGCAGTGGGGTTGGATATGGGTAATTGGAGGGTAACGGTTATGGTGGACG 436 | GTGGGTTGGTGGTAGTACGTAGAGAGATGGATTGTGGTTCGGAGTGGTATGGTTGAATGG 437 | AACAGGGTAACAAGTGGGGAGGAAGGGTAATGGAGGGTAAGTTGAGAGACAGGATGGTTA 438 | GGGTTAAAGTAGGGTAGTGTTAGGGTAGTGTGGTGTGTGGGTGTGGGTGTGGATGTGGTG 439 | TGGATGTGGTGTGGGTGTGGATGTGGGTGTGGTGTGTGTGT 440 | >TEL07R_S288C TEL07R SGD:S000028960 telomere Telomeric region on the right arm of Chromosome VII 441 | TGAGTGTGTTTATGTATTATTGTTGAAAAGTAGAATATTTTTATGTTTAGGTGATTTTGA 442 | TGATATTTTTATGTAATATTGACATAAGTGCATATAAATTGAGTGGTTAGTATATGGTGC 443 | AAAAGTGGTACAACATATGTATTAAGGGCATTTATACAATATTTGGGGCCGCCGAATGAG 444 | ATATAGATATTAAAATGTGGATAATCATGGGATTTATGGGTAAATGGCACAGGGTATAGA 445 | ACGCTGAGGCAAGCGCCGTGCATAATGATGTGAGTGCATCTAGTACTGATTTAGTGAGAA 446 | TGGGCCATGGAGTGGAGTGTGAGAGTAGGGTAACTTGAGATGGTATATACTGTAGCATCC 447 | GTGTGCGTATGCCGTATCAGTATACAAGTGAGGGTGAGTGTGGCATGTGGTGGTGGGATT 448 | AGAGTGGCAGGGTAAGTATGTGTGTATTATTTACGATCATTTGTTAACATTTCAATATGT 449 | TGGGTAGAACAACAGTATAGTGAGTAACAAGATGGGGCATGGTAGGGTAATGGCAGGGTA 450 | AGTGGTAGTGGAGTTGAATATGGGCAATTGGAGGGTAACAGGTGGTGGATGTGGGTGAGT 451 | GGTAGTAAGTAGAGAGATGGATGGTGGTTGGGGTGTGGTATAGTTGAATGAGACAGGGTA 452 | ACTTGTGGGGAGGTAGGGTAATGGAGGGTAAGTTGAGAGACAGGTTAAATCATATATATG 453 | TCACTGTATTGCATGCTGGATGGTGTTAGACAAGGCCGTAGGGACATATAGCATCTAGGA 454 | AGTAACCTTGTACGAAAATAGGCAATATTTCCTGTTTAGGCGATTGTGACGCAGATTTTA 455 | GTCCAACGATCTAGCGTCAAGGAATTTTTTTATAGTGGGACATTGCACCAAGGAAGTAAC 456 | TTGATACGTCGTGGGTGAATGGGTCTGTTTTCTTATTCGGCGGGGTAATACATTTTTGGG 457 | GGAAGTTTGTCTGTCTGACGCGCCATATGTAGGTACGCCAAAAAGGGCTCCTCTACTTCG 458 | AAGCGCGAGGTCGTATACCTAATAAGGAAATGTAATTTATAACTTTCTATTATATTGGTC 459 | TTTTCGAGAGCGGAAGAAGTTGTAGGCTAAGCGCAGGCTAAGCGTAGGTCCATATTTAAA 460 | GTATCCAAGAGAATATCCACGAAGCGGCTGAGCAACGAACAGAATCCTGGTTCTCCTCGA 461 | CTAAGCAGATAGTTAAGATACTGTGCACCATGGAAATTGAAAACGAACGTACGTACCGAC 462 | TACTTTATTTTTGCAGGCCGGAAATCAAGCGATGAATGAGACATCCTTCTGCTTTCTATG 463 | TTGTGCTTGAAGGGGACAGACAGTCGCTTATCTTAGTGAGATTTCTTACTAACTGAATTT 464 | ACTTTGCTGCTGCTAGAGATTTGCACCTGCATAGCGCAGATTCTGCATCTTCTCAATAGC 465 | TTAATTATTACATTCTTAGATGATGATAAGACGGAAACTGGACAATCTTTTGTTTATATT 466 | GATGGATTTCTTGTCAAAAAGCATAACAATCAACATACTATTGTTAATTTCGAAACTTAC 467 | AAAAATAAAATGAAAGTTTCCGATAGGCGTAAGTTTGAAAAAGCAAACTTTGACGAGTTT 468 | GAGTCGGCTCTAAATAACAAAAACGACTTGGTACATTGTCCCTCAATAACTTTATTTGAA 469 | TCGATCCCCACGGAAGTGCGGTCATTCTACGAAGACGAAAAGTCTGGTCTAATCAAAGTG 470 | GTAAAATTCAGAACTGGTGCAATGGATAGGAAAAGGTCTTTTGAAAAAATTGTCGTTTCC 471 | GTCATGGTCGGGAAAAATGTACAAAAGTTCCTGACGTTTGTTGAAGACGAACCAGATTTC 472 | CAGGGCGGACCAATCCCTTCAAAGTATCTTATTCCCAAGAAAATCAACTTGATGGTCTAC 473 | ACGTTGTTTCAAGTGCATACTTTGAAATTCAATAGAAAGGATTACGATACCCTTTCTCTT 474 | TTTTACCTCAACAGAGGATACTATAATGAGTTGAGTTTCCGTGTCCTGGAACGTTGTTAC 475 | GAAATAGCGAGTGCCAGGCCGAACGACAGCTCTACGATGCGTACTTTCACTGACTTTGTT 476 | TCTGGCACACCTATTGTAAGGGGTCTTCAGAAAAGCACCATAAGGAAATATGGATACAAT 477 | TTGGCACCCTACATGTTTTTGTTACTACACGTAGATGAGCTATCGATTTTTTCTGCATAC 478 | CAAGCAAGTTTACCTGGCGAAAAGAAAGTCGACACAGAGCGGCTGAAGCGTGATCTATGC 479 | CCACGTAAACCCACTGAGATAAAGTACTTTTCACAGATATGTAACGATATGATGAACAAA 480 | AAGGACCGATTGGGTGATATTTTGCATATTATCTTGCGAGCATGTGCGCTCAATTTCGGG 481 | GCGGGTCCCCGTGGTGGCGCTGGTGACGAAGAGGATCGATCCATTACGAATGAAGAACCC 482 | ATTATTCCCTCTGTGGACGAGCATGGCTTGAAAGTATGTAAGTTGCGCAGTCCTAACACT 483 | CCACGAAGACTCAGAAAAACACTAGATGCCGTGAAAGCTTTATTGGTGTCGTCTTGTGCT 484 | TGTACCGCAAGGGATTTAGATATATTTGATGACAACAACGGCGTTGCGATGTGGAAATGG 485 | ATCAAAATTCTGTACCACGAAGTAGCGCAGGAAACCGCGCTGAAGGACTCTTATAGAATA 486 | ACTTTGGTACCTTCTTCTGATGGTGTATCAGTATGTGGAAAACTGTTTAATCGCGAGTAT 487 | GTCCGCGGCTTTTACTTTGCATGCAAGGCTCAGTTTGATAACCTTTGGGAAGAATTGAAC 488 | GACTGCTTTTATATGCCTACAGTGGTTGATATTGCCAGCCTCATTTTGCGTAATCGAGAA 489 | GTTTTGTTCAGAGAGCCAAAGCGAGGAATTGACGAGTATCTGGAGAACGATTCTTTCCTT 490 | CAAATGATACCTGTTAAATATCGTGAAATTGTGCTGCCCAAGTTGAGAAGAGATACTAAC 491 | AAAATGACCGCGGCTCTTAAAAATAAAGTCACTGTTGCAATTGACGAGCTTACGGTGCCA 492 | CTTATGTGGATGATCCATTTTGCCGTAGGATACCCTTACCGTTATCCAGAGCTTCAGCTA 493 | CTCGCTTTTGCCGGTCCTCAGCGCAACGTATACGTCGATGATACAACAAGACGCATCCAA 494 | CTGTACACTGATTACAACAAGAACGGTTCATCGGAGCCTCGACTTAAGACGCTTGACGGA 495 | CTCACTTCAGATTACGTGTTTTATTTTGTCACTGTGCTAAGGCAAATGCAAATATGTGCG 496 | CTTGGTAACAGTTATGACGCTTTTAATCATGATCCTTGGATGGATGTGGTGGGATTTGAG 497 | GATCCAGATCAAGTAACAAATCGAGACATTTCGAGGATAGTTTTGTATTCCTACATGTTT 498 | CTGAATACCGCGAAGGGCTGTCTGGTTGAATACGCAACTTTTCGGCAGTACATGAGGGAA 499 | CTTCCGAAGAATGCACCTCAGAAGCTGAATTTTCGGGAGATGCGTCAGGGGTTGATTGCC 500 | CTAGGACGGCACTGCGTAGGTAGCAGATTTGAAACAGATTTGTACGAGTCGGCGACGAGT 501 | GAACTCATGGCCAATCATTCCGTTCAAACAGGGCGAAATATTTACGGTGTGGATTCCTTT 502 | TCGTTAACTAGTGTCAGTGGGACGACCGCCACTTTATTGCAGGAACGAGCTTCCGAGCGC 503 | TGGATTCAATGGTTAGGCCTTGAAAGCGACTACCATTGTTCATTCTCTAGTACTCGGAAT 504 | GCGGAAGACGTAGTGGCAGGTGAGGCGGCGAGTTCAGATCATCATCAAAAAATTTCAAGA 505 | GTAACGCGAAAAAGGCCCCGAGAGCCCAAGAGTACAAACGATATCCTCGTCGCAGGCCAG 506 | AAACTCTTTGGCAGCTCCTTTGAATTCAGGGACTTGCATCAGTTGCGCTTATGTCATGAA 507 | ATATACATGGCAGACACACCCTCTGTGGCAGTACAGGCCCCACCGGGCTATGGTAAGACG 508 | GAGTTATTTCATCTCCCCTTGATAGCACTGGCGTCTAAGGGCGACGTGAAATATGTGTCG 509 | TTTCTGTTTGTACCGTACACAGTGTTGCTTGCTAATTGCATGATCAGGTTGAGCCGATGC 510 | GGTTGCTTGAATGTGGCCCCTGTAAGAAACTTTATTGAAGAAGGTTGCGATGGCGTTACT 511 | GATTTATACGTGGGGATCTACGATGATCTTGCTAGCACTAATTTCACAGACAGGATAGCT 512 | GCGTGGGAGAATATTGTTGAGTGCACCTTTAGGACCAACAACGTAAAATTGGGTTACCTC 513 | ATTGTAGATGAGTTTCACAACTTTGAAACGGAGGTCTACCGGCAGTCGCAATTTGGGGGC 514 | ATAACTAACCTTGATTTTGACGCTTTTGAGAAAGCAATCTTTTTGAGCGGCACAGCACCT 515 | GAGGCTGTAGCTGATGCTGCGTTGCAGCGTATTGGGCTTACGGGACTGGCCAAGAAGTCG 516 | ATGGACATCAACGAGCTCAAACGGTCGGAAGATCTCAGCAGAGGTCTATCCAGCTATCCA 517 | ACACGGATGTTTAATCTAATCAAGGAGAAATCCGAGGTGCCTTTAGGGCATGTTCATAAA 518 | ATTTGGAAGAAAGTGGAATCACAGCCCGAAGAAGCACTGAAGCTTCTTTTAGCCCTCTTT 519 | GAAATTGAACCAGAGTCGAAGGCCATTGTAGTTGCAAGCACAACCAACGAAGTGGAAGAA 520 | TTGGCCTGCTCTTGGAGAAAGTATTTTAGGGTGGTATGGATACACGGGAAGCTGGGTGCT 521 | GCAGAAAAGGTGTCTCGCACAAAGGAGTTTGTCACTGACGGTAGCATGCGAGTTCTCATC 522 | GGAACGAAATTAGTGACTGAAGGAATTGACATTAAGCAATTGATGATGGTGATCATGCTT 523 | GATAATAGACTTAATATTATTGAGCTCATTCAAGGCGTAGGGAGACTAAGAGATGGGGGC 524 | CTCTGTTATCTATTATCTAGAAAAAACAGTTGGGCGGCAAGGAATCGTAAGGGTGAATTA 525 | CCACCGATTAAGGAAGGCTGTATAACCGAACAGGTACGCGAGTTCTATGGACTTGAATCA 526 | AAGAAAGGAAAAAAGGGCCAGCATGTTGGATGCTGTGGCTCCAGGACAGACCTGTCTGCT 527 | GACACAGTGGAACTGATAGAAAGAATGGACAGATTGGCTGAAAAACAGGCGACAGCTTCC 528 | ATGTCGATCGTTGCGTTACCGTCTAGCTTCCAGGAGAGCAATAGCAGTGACAGGTGCAGA 529 | AAGTATTGCAGCAGTGATGAGGACAGCGACACGTGCATTCATGGTAGTGCTAATGCCAGT 530 | ACCAATGCGACTACCAACTCCAGCACTAATGCTACTACCACTGCCAGCACCAACGTCAGG 531 | ACTAGTGCTACTACCACTGCCAGCATCAACGTCAGGACTAGTGCGACTACCACTGAAAGT 532 | ACCAACTCCAGCACTAATGCTACTACCACTGCCAGCACCAACGTCAGGACTAGTGCTACT 533 | ACCACTGCCAGCATCAACGTCAGGACTAGTGCGACTACCACTGAAAGTACCAACTCCAAC 534 | ACTAGTGCTACTACCACCGAAAGTACCGACTCCAACACTAGTGCTACTACCACCGAAAGT 535 | ACCGACTCCAACACTAGTGCTACTACCACTGCTAGCACCAACTCCAGCACTAATGCCACT 536 | ACCACTGCTAGCACCAACTCCAGCACTAATGCCACTACCACTGAAAGTACCAACGCTAGT 537 | GCCAAGGAGGACGCCAATAAAGATGGCAATGCTGAGGATAATAGATTCCATCCAGTCACC 538 | GACATTAACAAAGAGTCGTATAAGCGGAAAGGGAGTCAAATGGTTTTGCTAGAGAGAAAG 539 | AAACTGAAAGCACAATTTCCCAATACTTCCGAGAATATGAATGTCTTACAGTTTCTTGGA 540 | TTTCGGTCTGACGAAATTAAACATCTTTTCCTCTATGGTATTGACGTATACTTCTGCCCA 541 | GAGGGAGTATTCACACAATACGGATTATGCAAGGGCTGTCAAAAGATGTTCGAGCTCTGT 542 | GTCTGTTGGGCTGGCCAGAAAGTATCGTATCGGAGGATGGCTTGGGAAGCACTAGCTGTG 543 | GAGAGAATGCTGCGAAATGACGAGGAATACAAAGAATACTTGGAAGACATCGAGCCATAT 544 | CATGGGGACCCTGTAGGATATTTGAAATATTTTAGCGTAAAAAGGGGAGAGATCTACTCT 545 | CAGATACAGAGAAATTATGCTTGGTACCTGGCCATTACTAGAAGAAGAGAAACAATTAGT 546 | GTATTGGATTCGACAAGAGGCAAGCAAGGGAGCCAAGTTTTCCGCATGTCTGGAAGGCAG 547 | ATCAAAGAGTTGTATTATAAAGTATGGAGCAACTTGCGTGAATCGAAGACAGAGGTGCTG 548 | CAGTACTTTTTGAACTGGGACGAAAAAAAGTGCCGGGAAGAATGGGAGGCAAAAGACGAT 549 | ACGGTCTTTGTGGAAGCGCTCGAGAAAGTTGGAGTTTTTCAGCGTTTGCGTTCCATGACG 550 | AGCGCTGGACTGCAGGGTCCGCAGTACGTCAAGCTGCAGTTTAGCAGGCATCATCGACAG 551 | TTGAGGAGCAGATATGAATTAAGTCTAGGAATGCACTTGCGAGATCAGCTTGCGCTGGGA 552 | GTTACCCCATCTAAAGTGCCGCATTGGACGGCATTCCTGTCGATGCTGATAGGGCTGTTC 553 | TGCAATAAAACATTTCGGCAGAAACTGGAATATCTTTTGGAGCAGATTTCGGAGGTGTGG 554 | TTGTTACCACATTGGCTTGATTTGGCAAACGTTGAAGTTCTCGCTGCAGATAACACGAGG 555 | GTACCGCTGTACATGCTGATGGTAGCGGTTCACAAAGAGCTGGATAGCGATGATGTTCCA 556 | GACGGTAGATTTGATATATTATTATGTAGAGATTCGAGCAGAGAAGTTGGAGAGTGAAGG 557 | AAATTGTTGTTACGAAAGTCAGTGATTATGTATTGTGTAGTATAGTATATTGTAAGAAAT 558 | TTTTTTTTCTAGGGAATATGCGTTTTGATGTAGTAGTATTTCACTGTTTTGATTTAGTGT 559 | TTGTTGCACGGCAGTAGCGAGAGACAAGTGGGAAAGAGTAGGATAAAAAGACAATCTATA 560 | AAAAGTAAACATAAAATAAAGGTAGTAAGTAGCTTTTGGTTGAACATCCGGGTAAGAGAC 561 | AACAGGGCTTGGAGGAGACGTACATGAGGGCTATTTAGGGCTATTTAGGGCTATGTAGAA 562 | GTGCTGTAGGGCTAAAGAACAGGGTTTCATTTTCATTTTTTTTTTT 563 | >TEL03R_S288C TEL03R SGD:S000028944 telomere Telomeric region on the right arm of Chromosome III 564 | TCAAAGTGTTTATGTATTATGGTTGAAGAATAGAATATTTTTATGTTTAGGTGATTTTAG 565 | TGGTGATTTTTCTGTAATATTGACATAAGTGTATATAAATTAAGTGGTTAGTATACGGTG 566 | AAAAAGAGGTATAACGTATGTATTAAGGGAATTTATACGATATTTGGGCCCGCCGAATGA 567 | GATATAGATATTAAAATGTGGATAATCGTGGGCTTTATGGGTAAATGGCACAGGGTATAG 568 | ACCGCTGAGGCAAGTGCCGTGCATAATGATGTGGGTGCATTTGGTACTGATTTAGTGAGA 569 | ATGGGCCATGGATTGGAGTGTGAGAGTAGGGTAACTTGAGAGTGGTATATACTGTAGCAT 570 | CCGTGTGCGTATGCCCCATCAATATAAGTGAAGGTGAGTATGGCATGTGGTGGTGGTATA 571 | GAGTGGTAGGGTAAGTATGTATGTATTATTTACGATCATTTGTTAACGTTTCAATATGGT 572 | GGGTGAACAACAGTACAGTGAGTAGGACATGGTGGATGGTAGGGTAATAGTAGGGTAAGT 573 | GGTGGTGGAGTTGGATATGGGTAATTGGAGGGTAACGGTTATGATGGGCGGTGGATGGTG 574 | GTAGTAAGTAGAGAGATGGATGGTGGTTGGGAGTGGTATGGTTGAGTGGGGCAGGGTAAC 575 | GAGTGGGGAGGTAGGGTAATGTGAGGGTAGGTTTGGAGACAGGTAAAATCAGGGTTAGAA 576 | TAGGGTAGTGTTAGGGTAGTGTGTGGGTGTGGGTGTGTGGGTGTGGTGTGTGGGTGTGGT 577 | GTGTGTGGGTGTGGTGTGTGGGTGTGGGTGTGTGGGTGTGGTGGGTGTGGTGTGTGTG 578 | >TEL16R_S288C TEL16R SGD:S000029002 telomere Telomeric region on the right arm of Chromosome XVI 579 | TTAGTATGTTTATATCTTCTCATTAATGAATGGTTAATTTTTATGTTTAGGTGATTTTGG 580 | TGGTGATTTTGTGGTTATATTGACATAAGTGTGTATAAATTGAGTGGTTAGTATATGGTG 581 | CAGTTATACAATATTTGGAACCGCAAAATGAGATGTAGATATTAAAATGTGGATAATCAT 582 | GGGCTTTATGGGTGAATGGCACAGGGTATAGACCGCTGAGGCAAGTGCCGTGCATAATGA 583 | TGTGAGTGCATCTAGTACTGATTTAGTGAGAATGGGGCCATGGTGTGGAATATGAAAGTA 584 | GGGTAAGTTTGAGATGGTATATACTGTAGCATCCGTGTGCGTATGACATATCAGTAGAAG 585 | TGAAGGTGAGTGTGGCAAGTGGCGGTGGTGGTAGTGGTGGTATAGAGTGGTAGGCTCCTT 586 | TACTTCGAAGCGTGAGGTCGTATACCTAATAAGGAAATGTAATTTATAACTTTTTATTAT 587 | ATTGGTCTTTTCGAGAGCGGAAGAAGTTGTAGGCTAAGCGCAGGCTAAGCGTAGGTCCAT 588 | GTTTAAAGTATCCAAGAGAATATCCACGAAGCGGCTGAGCAACGAACAGAATCCTGGTTC 589 | TCCTCGACTAAGCAGATAGTTAAGATACTGTGCACCATGGAAATTGAAAACGAACGTACG 590 | TACCGACTACTTTATTTTTGCAGGCCGGAAATCAAGCGATGAATGAGACATCCTTCTGTT 591 | TTCTATGTTGTGCTTGAAGGGGACAGACAGTCGCTTATCTTAGTGAGATTGCTTACTAAC 592 | TGAATTTACTTTGCTGCTGCTAGAGATTTGCACCTGCATAGCGCAGATTCTGCATCTTCT 593 | CAATAGCTTAATTATTACATTCTCAGATGATGATAAGACGGAAACTGGACAATCTTTTGT 594 | TTATATTGATGGATTTCTTGTCAAAAAGCATAACAATCAACATACTATTGTTAATTTCGA 595 | AACTTACAAAAATAAAATGAAAGTTTCCGATAGGCGTAAGTTTGAAAAAGCAAACTTTGA 596 | CGAGTTTGAGTCGGCTCTAAATAACAAAAACGACTTGGTACATTGTCCCTCAATAACTTT 597 | ATTTGAATCGATCCCCACGGAAGTGCGGTCATTCTACGAAGACGAAAAGTCTGGCCTAAT 598 | CAAAGTGGTAAAATTCAGAACTGGTGCAATGGATAGGAAAAGGTCTTTTGAAAAAATTGT 599 | CATTTCCGTCATGGTCGGGAAAAATGTACAAAAGTTCCTGACATTTGTTGAAGACGAACC 600 | AGATTTCCAGGGCGGACCAATCCCTTCAAAGTATCTTATTCCCAAGAAAATCAACTTGAT 601 | GGTCTACACGTTGTTTCAAGTGCATACTTTGAAATTCAATAGAAAGGATTACGATACCCT 602 | TTCTCTTTTTTACCTCAACAGAGGATACTATAATGAGTTGAGTTTTCCGTGTCCTGGAAC 603 | GTTGTCACGAAATAGCGAGTGCCAGGCCGAACGACAGCTCTACGATGCGTACTTTCACTG 604 | ACTTTGTTTCTGGCGCACCTATTGTAAGGAGTCTTCAGAAAAGCACCATAAGGAAATATG 605 | GGTACAATTTGGCACCCCACATGTTTTTGTTACTACACGTAGATGAGCTATCGATTTTTT 606 | CTGCATACCAAGCAAGTTTACCTGGCGAAAAGAAAGTCGACACAGAGCGGCTGAAGCGTG 607 | ATCTATGCCCACGTAAACCCATTGAGATAAAGTACTTTTCACAGATATGTAACGATATGA 608 | TGAACAAAAAGGACCGATTGGGTGATGTTTTGCATGTGTGCTGCCCAAGTTGAGAAGAGA 609 | TACTAACAAAATGACCGCGGCTCTCAAAAATAATTGACGAGCTTACGGTGATACGCTTAC 610 | CGTTATCCAGAGCTACAGCGCAACGTATACGTCGACGATACAACAAGAACGGTTCATCGG 611 | AGCCTCGACTAAAGACGCTTGACGGACTCACTTCCGAGCGCTGGATTCAATGGTTAGGCC 612 | TTGAAAGCGACTACCATTGTTCATTCTCTAGTACTCGGAATGCGGAAGACGTAGTGGCAG 613 | GTGAGGCGGCGAGTTCAGATCATCATCAAAAAATTTCAAGAGTAACGCGAAAAAGGCCCC 614 | GAGAGCCCAAGAGTACAAACGATATCCTCGTCGCAGGCCAGAAACTCTTTGGCAGCTCCT 615 | TTGAATTCAGGGACTTGCATCAGTTGCGCTTATGTCATGAAATATACATGGCAGACACAC 616 | CCTCTGTGGCAGTACAGGCCCCACCGGGCTATGGTAAGACGGAGTTATTTCATCTCCCCT 617 | TGATAGCACTGGCGTCTAAGGGCGACGTGAAATATGTGTCGTTTCTGTTTGTACCGTACA 618 | CAGTGTTGCTTGCTAATTGCATGATCAGGTTGGGCCGATGCGGTTGCTTGAATGTGGCCC 619 | CTGTAAGAAACTTTATTGAAGAAGGTTGCGATGGCGTTACTGATTTATACGTGGGGATCT 620 | ACGATGATCTTGCTAGCACTAATTTCACAGACAGGATAGCTGCGTGGGAGAATATTGTTG 621 | AGTGCACCTTTAGGACCAACAACGTAAAATTGGGTTACCTCATTGTAGATGAGTTTCACA 622 | ACTTTGAAACGGAGGTCTACCGGCAGTCGCAATTTGGGGGCATAACTAACCTTGATTTTG 623 | ACGCTTTTGAGAAAGCAATCTTTTTGAGCGGCACAGCACCTGAGGCTGTAGCTGATGCTG 624 | CGTTGCAGCGTATTGGGCTTACGGGACTGGCCAAGAAGTCGATGGACATCAACGAGCTCA 625 | AACGGTCGGAAGATCTCAGCAGAGGTCTATCCAGCTATCCAACACGGATGTTTAATCTAA 626 | TCAAGGAGAAATCCGAGGTGCCTTTAGGGCATGTTCATAAAATTTGGAAGAAAGTGGAAT 627 | CACAGCCCGAAGAAGCACTGAAGCTTCTTTTAGCCCTCTTTGAAATTGAACCAGAGTCGA 628 | AGGCCATTGTAGTTGCAAGCACAACCAACGAAGTGGAAGAATTGGCCTGCTCTTGGAGAA 629 | AGTATTTTAGGGTGGTATGGATACACGGGAAGCTGGGTGCTGCAGAAAAGGTGTCTCGCA 630 | CAAAGGAGTTTGTCACTGACGGTAGCATGCGAGTTCTCATCGGAACGAAATTAGTGACTG 631 | AAGGAATTGACATTAAGCAATTGATGATGGTGATCATGCTTGATAATAGACTTAATATTA 632 | TTGAGCTCATTCAAGGCGTAGGGAGACTAAGAGATGGGGGCCTCTGTTATCTATTATCTA 633 | GAAAAAACAGTTGGGCGGCAAGGAATCGTAAGGGTGAATTACCACCGATTAAGGAAGGCT 634 | GTATAACCGAACAGGTACGCGAGTTCTATGGACTTGAATCAAAGAAAGGAAAAAAGGGCC 635 | AGCATGTTGGATGCTGTGGCTCCAGGACAGACCTGTCTGCTGACACAGTGGAACTGATAG 636 | AAAGAATGGACAGATTGGCTGAAAAACAGGCGACAGCTTCCATGTCGATCGTTGCGTTAC 637 | CGTCTAGCTTCCAGGAGAGCAATAGCAGTGACAGGTGCAGAAAGTATTGCAGCAGTGATG 638 | AGGACAGCAACACGTGCATTCATGGTAGTGCTAATGCCAGTACCAATGCGACTACCAACT 639 | CCAGCACTAATGCTACTACCACTGCCAGCACCAACGTCAGGACTAGTGCTACTACCACTG 640 | CCAGCATCAACGTCAGGACTAGTGCGACTACCACTGAAAGTACCAACTCCAGCACTAATG 641 | CTACTACCACTGCCAGCACCAACGTCAGGACTAGTGCTACTACCACTGCCAGCATCAACG 642 | TCAGGACTAGTGCGACTACCACTGAAAGTACCAACTCCAACACTAGTGCTACTACCACCG 643 | AAAGTACCGACTCCAACACTAGTGCTACTACCACTGAAAGTACCAACTCCAGCACTAATG 644 | CTACTACCACTGCCAGCATCAACGTCAGGACTAGTGCGACTACCACTGAAAGTACCAACT 645 | CCAACACTAATGCTACTACCACTGAAAGTACCAACTCCAGCACTAATGCTACTACCACTG 646 | AAGGTACCAACTCCAACACTAGTGCTACTACCACTGCTAGCACCAACTCCAGCACTAATG 647 | CTACTACCACTGAAAGTACCAACGCTAGTGCCAAGGAGGACGCCAATAAAGATGGCAATG 648 | CTGAGGATAATAGATTCCATCCAGTCACCGACATTAACAAAGAGTCGTATAAGCGGAAAG 649 | GGAGTCAAATGGTTTTGCTAGAGAGAAAGAAACTGAAAGCACAATTTCCCAATACTTCCG 650 | AGAATATGAATGTCTTACAGTTTCTTGGATTTCGGTCTGACGAAATTAAACATCTTTTCC 651 | TCTATGGTATTGACGTATACTTCTGCCCAGAGGGAGTATTCACACAATACGGATTATGCA 652 | AGGGCTGTCAAAAGATGTTCGAGCTCTGTGTCTGTTGGGCTGGCCAGAAAGTATCGTATC 653 | GGAGGATGGCTTGGGAAGCACTAGCTGTGGAGAGAATGCTGCGAAATGACGAGGAATACA 654 | AAGAATACTTGGAAGACATCGAGCCATATCATGGGGACCCTGTAGGATATTTGAAATATT 655 | TTAGCGTAAAAAGGGGAGAGATCTACTCTCAGATACAGAGAAATTATGCTTGGTACCTGG 656 | CCATTACTAGAAGAAGAGAAACAATTAGTGTATTGGATTCGACAAGAGGCAAGCAAGGGA 657 | GCCAAGTTTTCCGCATGTCTGGAAGGCAGATCAAAGAGTTGTATTATAAAGTATGGAGCA 658 | ACTTGCGTGAATCGAAGACAGAGGTGCTGCAGTACTTTTTGAACTGGGACGAAAAAAAGT 659 | GCCGGGAAGAATGGGAGGCAAAAGACGATACGGTCTTTGTGGAAGCGCTCGAGAAAGTTG 660 | GAGTTTTTCAGCGTTTGCGTTCCATGACGAGCGCTGGACTGCAGGGTCCGCAGTACGTCA 661 | AGCTGCAGTTTAGCAGGCATCATCGACAGTTGAGGAGCAGATATGAATTAAGTCTAGGAA 662 | TGCACTTGCGAGATCAGCTTGCGCTGGGAGTTACCCCATCTAAAGTGCCGCATTGGACGG 663 | CATTCCTGTCGATGCTGATAGGGCTGTTCTGCAATAAAACATTTCGGCAGAAACTGGAAT 664 | ATCTTTTGGAGCAGATTTCGGAGGTGTGGTTGTTACCACATTGGCTTGATTTGGCAAACG 665 | TTGAAGTTCTCGCTGCAGATAACACGAGGGTACCGCTGTACATGCTGATGGTAGCGGTTC 666 | ACAAAGAGCTGGATAGCGATGATGTTCCAGACGGTAGATTTGATATATTATTATGTAGAG 667 | ATTCGAGCAGAGAAGTTGGAGAGTGAAGGAAATTGTTGTTACGAAAGTCAGTGATTATGT 668 | ATTGTGTAGTATAGTATATTGTAAGAAATTTTTTTTTCTAGGGAATATGCGTTTTGATGT 669 | AGTAGTATTTCACTGTTTTGATTTAGTGTTTGTTGCACGGCAGTAGCGAGAGACAAGTGG 670 | GAAAGAGTAGGATAAAAAGACAATCTATAAAAAGTAAACATAAAATAAAGGTAGTAAGTA 671 | GCTTTTGGTTGAACATCCGGGTAAGAGACAACAGGGCTTGGAGGAGACGTACATGAGGGC 672 | TATTTAGGGCTATTTAGGGCTATGTAGAAGTGTTG 673 | >TEL13R_S288C TEL13R SGD:S000028989 telomere Telomeric region on the right arm of Chromosome XIII 674 | TCAATATGTTTATTTTGTAAAGTTGAAAGATAATTATTTTTATGTTTAGGTGATTTTGGT 675 | GTTGAATTTTCTGTAATATTAACATAAGAGTAATACATTGAGTGGTTAGTATATGGTGTA 676 | AAAGTGGTATAACGCATGTATTAAGAGCAGTTATACAATATTTGGGGCCGCTGAATGAGA 677 | TATAGATATTAAAATGTGGATAATCATGGGCTTTATGGGTAAATGGAACAGGGTATAGAC 678 | CACTGAGGCAAGTGCCGTGCATAATGATATGAGTGCATCTAGTACTGATTTAGTGAGAGA 679 | TGGGCCGTGGAGTGGAATGTGAGAGTAGGGTAAGTTGAGAGTGGTATATACTTGTAGCAT 680 | CCGTGTGCGTATGCCATATCAGTATACAAGTGAAGGTGAGTATGGCAAGTGGTGGTGGGA 681 | TTGGTATAAAGTGGTAGGGTAAGTATGTGTGTATTATTTACGATCATTTGTTAACGTTTC 682 | AATATGGTAGGTAGAAGAACAGTATGGTGAGTAGCAGATGGTGGATGGTAGAGGAATAGC 683 | AGGGTAAGTGGTAGTGGAGTTGGATATGGGTAATTGGAGGGTAACGGTTATGGTGCACGA 684 | TGGGTTGGTGGTAGCAAGTAGAGAGATGGATGGTGGTTGGAGCGGTATGGTTGAAGGGGA 685 | CAGGGTAACGAGTGGGGAGGTAGCGTAATGGAGGGTAAGTTAAGAGACATGCTAAATCAG 686 | GGTAAGAATAGGGTAGGGTTAGGGTAGGGTTAGGGTAGTGTTAGGGTGTGGGTGTGGTGT 687 | GTGTGTGTGGGTGTGGTGTGGGTGTGGGTGTGGTGTGTGGGTGTGGTGTGTGGGTGTGTG 688 | TGGGTGTGGTGTGGTGTGTGGGTGTGGTGTGGGTGTGGTGTGTGTGTGGGG 689 | >TEL06R_S288C TEL06R SGD:S000028957 telomere Telomeric region on the right arm of Chromosome VI 690 | TATAGTATGCTCACATTTTCTTATTGCTGAATAGTTCTTTTTTACGTTTAGCTGAGTTTA 691 | ACGGTGATTATTAGGTGGATTTTATATTAGTCTACATAAAAATAAGTGGTGGATATCTAC 692 | ATAAAATTGTCATAACGCGTAAACTAAAAATTATTTTTATGATCATTGAGGATCTATAAT 693 | CAACTATAGACATTAATGTATGGATAATCATGAGGATTATAGGTAAATGGCAAGGGTAAA 694 | AACCAGTGAGGCCATTTCCGTGTGTAGTGATCCGAACTCAGTTACTATTGATGGAAATGA 695 | GGACTGGGTCATGGGGCGCAATGGAGTGAAGTAATATATACTTTAGCATACGTGTGCGTA 696 | CGCCATATCAATATGCTAGTGAGGTGGTGTGGGTGTGGTGTGTGGGTGTGGTGTGTGGGT 697 | GTGGTGTGTGG 698 | >TEL05R_S288C TEL05R SGD:S000028953 telomere Telomeric region on the right arm of Chromosome V 699 | TGAGTGTGTTTATGTATTATCGTTGAAGGATAGAATATTTTTATGTTTAGGTGATTTTAG 700 | TGGTGATTTTTTTGTAATATTGACATAAGTGTATATAAATTGAGTGGATAGTAGATGGTG 701 | AAAAAGTGGTATAACGTATGTATTAAGGGCAGTTATACAATATTTGGGGCCGCCGAATGA 702 | GATATAGATATTAAAATGTGGATAATCGTGGGCTTTATGGGTAAATGGCACAGGGTATAA 703 | ACCGCTGAGGCAAGTGCCGTGCATAATGATGTGAGTGCATTTGTACTGATTTAGTGAGAG 704 | ATGGGCCATGGAGTGGAATGTGAGAGTAGGGTAAGTTTGAGAGTGGTATACTGTAGTAGC 705 | ATCCGTGTGCGTATGCCATATCAGTATACAAGTGAGGGTGAGTATGGCATGTGGTGGTGG 706 | GATTGGAGTGGTAGGGTAAGCACGTGTGTATTATTTACGATCATTTGTTAACGTTTCAAT 707 | ATGGTGGTAGAACAACAGTATAGTGAGTAGGACATGGTGGATGGTAGGGTAATGGTAGGG 708 | TAAGTGGTGGTGGAGTTGGATATGGGTAATTGGAGGGTAACGGTTATGGTGGACGGTGGG 709 | TTGGTGGTAGGAAGTAGAGGGATGGATGGTGGTTGGGGTGGTATGGTTGAATGGGACAGG 710 | GTAACGAGTGGACAGTAGGGTAATGGAGGGTAAGTTTGGAGACAGGTTGGCGAGGGTTAG 711 | ATTAGGATAGCATATCTATGTCACCTTATTGCATGCTGGATGGTGTTAGACAAGGCCGTA 712 | GGGACATATAGCATCTAGGAAGTAACCTTGTACGAAAATAGGCAATATTTCCTGTTTGAC 713 | GCAGATTTTAGCCCAAAGATCTAGCGTTAAGGAATTTTTTTATAGTGGGACATTGCAAAC 714 | CAAGGAAGTAACTTGATACGTCGTTGGTGAATGGGTCTGTTTTCTTATTCGGCGGGGTAA 715 | TACATTTTGAGGGAAGGTTGTCTGTCTGACGCGCCATATGTAGGTACGCCAAAAAGGGCT 716 | CCTTTACTTCGAAGCGCGAGGTCGTATACCTAATAAGGAAATGTAATTTATAACTTTTTA 717 | TTATATTGGTCTTTTCGAGAGCGGAAGAAGTTGTAGGCTAAGCGCAGGCTAAGCGTAGGT 718 | CCATGTTTAAAGTATCCAAGAGAATATCCACGAAGCGGCTGAGCAACGAACAGAATCCTG 719 | GTTCTCCTCGACTAAGCAGATAGTTAAGATACTGTGCACCATGGAAATTGAAAACGAACG 720 | TACGTACCGACTACTTTATTTTTGCAGGCCGGAAATCAAGCGATGAATGAGACATCCTTC 721 | TGTTTTCTATGTTGTGCTTGAAGGGGACAGACAGTCGCTTATCTTAGTGAGATTTTGTTT 722 | GCTTTTGCTGCACCTGCATAGCGCAGATTCTGCATCTTCTCAATAGCTTAATTATTACAT 723 | TCTTAGATGATGATAAGACGGAAACTGGAGAATCTTTTGTTTATATTGATGGATTTCTTG 724 | TCAAAAAGCATAACAATCAACATACTATTGTTAATTTCGAAACTTACAAAAATAAAATGA 725 | AAGTTTCCGATAGGCGTAAGTTTGAAAAAGCAAACTTTGACGAGTTTGAGTCGGCTCTAA 726 | ATAACAAAAACGACTTGGTACATTGTCCCTCAATAACTTTATTTGAATCGATCCCCACGG 727 | AAGTGCGGTCATTCTACGAAGACGAAAAGTCTGGCCTAATCAAAGTGGTAAAATTCAGAA 728 | CTGGTGCAATGGATAGGAAAAGGTCTTTTGAAAAAATTGTCATTTCCGTCATGGTCGGGA 729 | AAAATGTACAAAAGTTCCTGACGTTTGTTGAAGACGAACCAGATTTCCAGGGCGGACCAA 730 | TCCCTTCAAACAAACCACGTGATGGTCTACACGTTGTTTCAAGTGCATACTTTGAAATTC 731 | AATAGAAAGGATTACGATACCCTTTCTCTTTTTTACCTCAACAGAGGATACTATAATGAG 732 | TTGAGTTTCCGTGTCCTGGAACGTTGTCACGAAAAAGCGAGTGCTAGGCCGAACGACAGC 733 | TCTACGATGCGTACTTTCACTGACTTTGTTTCCGGCGCACCTATTGTAAGGAGTCTTCAG 734 | AAAAGTACCATAAGGAAATATGGGTACAATTTGGCACCCTACATGTTTTTGTTACTACAC 735 | GTAGATGAGCTATCGATTTTTTCTGCATACCAAGCAAGTTTACCTGGCGAAAAGAAAGTC 736 | GACACAGAGCGGCTGAAGCGTGATCTATGCCCACGTAAACCCACTGAGATAAAGTACTTT 737 | TCACAGATATGTAACGATATGATGAACAAAAAGGACCGATTGGGTGATATTTTGCATATT 738 | ATCTTGCGAGCATGTGCGCTCAATTTCGGGGCGGGTCCCCGTGGTGGCGCTGGTGACGAA 739 | GAGGATCGATCCATTACGAATGAAGAACCCATTATTCCCTCTGTGGACGAGCATGGCTTG 740 | AAAGTATGTAAGTTGCGCAGTCCTAACACTCCACGAAGACTCAGAAAAACACTAGATGCC 741 | GTGAAAGCTTTATTGGTGTCGTCTTGTGCTTGTACCGCAAGGGATTTAGATATATTTGAT 742 | GACAACAACGGCGTTGCGATGTGGAAATGGATCAAAATTCTGTACCACGAAGTAGCGCAG 743 | GAAACCGCGCTGAAGGACTCTTATAGAATAACTTTGGTACCTTCTTCTGATGGTGTATCA 744 | GTATGTGGAAAACTGTTTAATCGCGAGTATGTCCGCGGCTTTTACTTTGCATGCAAGGCT 745 | CAGTTTGATAACCTTTGGGAAGAATTGAACGACTGCTTTTATATGCCTACAGTGGTTGAT 746 | ATTGCCAGCCTCATTTTGCGTAATCGAGAAGTTTTGTTCAGAGAGCCAAAGCGAGGAATT 747 | GACGAGTATCTGGAGAACGATTCTTTCCTTCAAATGATACCTGTTAAATATCGTGAAATT 748 | GTGCTGCCCAAGTTGAGAAGAGATACTAACAAAATGACCGCGGCTCTTAAAAATAAAGTC 749 | ACTGTTGCAATTGACGAGCTTACGGTGCCACTTATGTGGATGATCCATTTTGCCGTAGGA 750 | TACCCTTACCGTTATCCAGAGCTTCAGCTACTCGCTTTTGCCGGTCCTCAGCGCAACGTA 751 | TACGTCGATGATACAACAAGACGCATCCAACTGTACACTGATTACAACAAGAACGGTTCA 752 | TCGGAGCCTCGACTTAAGACGCTTGACGGACTCACTTCAGATTACGTGTTTTATTTTGTC 753 | ACTGTGCTAAGGCAAATGCAAATATGTGCGCTTGGTAACAGTTATGACGCTTTTAATCAT 754 | GATCCTTGGATGGATGTGGTGGGATTTGAGGATCCAGATCAAGTAACAAATCGAGACATT 755 | TCGAGGATAGTTTTGTATTCCTACATGTTTCTGAATACCGCGAAGGGCTGTCTGGTTGAA 756 | TACGCAACTTTTCGGCAGTACATGAGGGAACTTCCGAAGAATGCACCTCAGAAGCTGAAT 757 | TTTCGGGAGATGCGTCAGGGGTTGATTGCCCTAGGACGGCACTGCGTAGGTAGCAGATTT 758 | GAAACAGATTTGTACGAGTCGGCGACGAGTGAACTCATGGCCAATCATTCCGTTCAAACA 759 | GGGCGAAATATTTACGGTGTGGATTCCTTTTCGTTAACTAGTGTCAGTGGGACGACCGCC 760 | ACTTTATTGCAGGAACGAGCTTCCGAGCGCTGGATTCAATGGTTAGGCCTTGAAAGCGAC 761 | TACCATTGTTCATTCTCTAGTACTCGGAATGCGGAAGACGTAGTGGCAGGTGAGGCGGCG 762 | AGTTCAGATCATCATCAAAAAATTTCAAGAGTAACGCGAAAAAGGCCCCGAGAGCCCAAG 763 | AGTACAAACGATATCCTCGTCGCAGGCCAGAAACTCTTTGGCAGCTCCTTTGAATTCAGG 764 | GACTTGCATCAGTTGCGCTTATGTCATGAAATATACATGGCAGACACACCCTCTGTGGCA 765 | GTACAGGCCCCACCGGGCTATGGTAAGACGGAGTTATTTCATCTCCCCTTGATAGCACTG 766 | GCGTCTAAGGGCGACGTGAAATATGTGTCGTTTCTGTTTGTACCGTACACAGTGTTGCTT 767 | GCTAATTGCATGATCAGGTTGAGCCGATGCGGTTGCTTGAATGTGGCCCCTGTAAGAAAC 768 | TTTATTGAAGAAGGTTGCGATGGCGTTACTGATTTATACGTGGGGATCTACGATGATCTT 769 | GCTAGCACTAATTTCACAGACAGGATAGCTGCGTGGGAGAATATTGTTGAGTGCACCTTT 770 | AGGACCAACAACGTAAAATTGGGTTACCTCATTGTAGATGAGTTTCACAACTTTGAAACG 771 | GAGGTCTACCGGCAGTCGCAATTTGGGGGCATAACTAACCTTGATTTTGACGCTTTTGAG 772 | AAAGCAATCTTTTTGAGCGGCACAGCACCTGAGGCTGTAGCTGATGCTGCGTTGCAGCGT 773 | ATTGGGCTTACGGGACTGGCCAAGAAGTCGATGGACATCAACGAGCTCAAACGGTCGGAA 774 | GATCTCAGCAGAGGTCTATCCAGCTATCCAACACGGATGTTTAATCTAATCAAGGAGAAA 775 | TCCGAGGTGCCTTTAGGGCATGTTCATAAAATTTGGAAGAAAGTGGAATCACAGCCCGAA 776 | GAAGCACTGAAGCTTCTTTTAGCCCTCTTTGAAATTGAACCAGAGTCGAAGGCCATTGTA 777 | GTTGCAAGCACAACCAACGAAGTGGAAGAATTGGCCTGCTCTTGGAGAAAGTATTTTAGG 778 | GTGGTATGGATACACGGGAAGCTGGGTGCTGCAGAAAAGGTGTCTCGCACAAAGGAGTTT 779 | GTCACTGACGGTAGCATGCGAGTTCTCATCGGAACGAAATTAGTGACTGAAGGAATTGAC 780 | ATTAAGCAATTGATGATGGTGATCATGCTTGATAATAGACTTAATATTATTGAGCTCATT 781 | CAAGGCGTAGGGAGACTAAGAGATGGGGGCCTCTGTTATCTATTATCTAGAAAAAACAGT 782 | TGGGCGGCAAGGAATCGTAAGGGTGAATTACCACCGATTAAGGAAGGCTGTATAACCGAA 783 | CAGGTACGCGAGTTCTATGGACTTGAATCAAAGAAAGGAAAAAAGGGCCAGCATGTTGGA 784 | TGCTGTGGCTCCAGGACAGACCTGTCTGCTGACACAGTGGAACTGATAGAAAGAATGGAC 785 | AGATTGGCTGAAAAACAGGCGACAGCTTCCATGTCGATCGTTGCGTTACCGTCTAGCTTC 786 | CAGGAGAGCAATAGCAGTGACAGGTGCAGAAAGTATTGCAGCAGTGATGAGGACAGCGAC 787 | ACGTGCATTCATGGTAGTGCTAATGCCAGTACCAATGCGACTACCAACTCCAGCACTAAT 788 | GCTACTACCACTGCCAGCACCAACGTCAGGACTAGTGCTACTACCACTGCCAGCATCAAC 789 | GTCAGGACTAGTGCGACTACCACTGAAAGTACCAACTCCAGCACTAATGCTACTACCACT 790 | GCCAGCACCAACGTCAGGACTAGTGCTACTACCACTGCCAGCATCAACGTCAGGACTAGT 791 | GCGACTACCACTGAAAGTACCAACTCCAACACTAGTGCTACTACCACCGAAAGTACCGAC 792 | TCCAACACTAGTGCTACTACCACCGAAAGTACCGACTCCAACACTAGTGCTACTACCACT 793 | GCTAGCACCAACTCCAGCACTAATGCCACTACCACTGCTAGCACCAACTCCAGCACTAAT 794 | GCCACTACCACTGAAAGTACCAACGCTAGTGCCAAGGAGGACGCCAATAAAGATGGCAAT 795 | GCTGAGGATAATAGATTCCATCCAGTCACCGACATTAACAAAGAGTCGTATAAGCGGAAA 796 | GGGAGTCAAATGGTTTTGCTAGAGAGAAAGAAACTGAAAGCACAATTTCCCAATACTTCC 797 | GAGAATATGAATGTCTTACAGTTTCTTGGATTTCGGTCTGACGAAATTAAACATCTTTTC 798 | CTCTATGGTATTGACGTATACTTCTGCCCAGAGGGAGTATTCACACAATACGGATTATGC 799 | AAGGGCTGTCAAAAGATGTTCGAGCTCTGTGTCTGTTGGGCTGGCCAGAAAGTATCGTAT 800 | CGGAGGATGGCTTGGGAAGCACTAGCTGTGGAGAGAATGCTGCGAAATGACGAGGAATAC 801 | AAAGAATACTTGGAAGACATCGAGCCATATCATGGGGACCCTGTAGGATATTTGAAATAT 802 | TTTAGCGTAAAAAGGGGAGAGATCTACTCTCAGATACAGAGAAATTATGCTTGGTACCTG 803 | GCCATTACTAGAAGAAGAGAAACAATTAGTGTATTGGATTCGACAAGAGGCAAGCAAGGG 804 | AGCCAAGTTTTCCGCATGTCTGGAAGGCAGATCAAAGAGTTGTATTATAAAGTATGGAGC 805 | AACTTGCGTGAATCGAAGACAGAGGTGCTGCAGTACTTTTTGAACTGGGACGAAAAAAAG 806 | TGCCGGGAAGAATGGGAGGCAAAAGACGATACGGTCTTTGTGGAAGCGCTCGAGAAAGTT 807 | GGAGTTTTTCAGCGTTTGCGTTCCATGACGAGCGCTGGACTGCAGGGTCCGCAGTACGTC 808 | AAGCTGCAGTTTAGCAGGCATCATCGACAGTTGAGGAGCAGATATGAATTAAGTCTAGGA 809 | ATGCACTTGCGAGATCAGCTTGCGCTGGGAGTTACCCCATCTAAAGTGCCGCATTGGACG 810 | GCATTCCTGTCGATGCTGATAGGGCTGTTCTGCAATAAAACATTTCGGCAGAAACTGGAA 811 | TATCTTTTGGAGCAGATTTCGGAGGTGTGGTTGTTACCACATTGGCTTGATTTGGCAAAC 812 | GTTGAAGTTCTCGCTGCAGATAACACGAGGGTACCGCTGTACATGCTGATGGTAGCGGTT 813 | CACAAAGAGCTGGATAGCGATGATGTTCCAGACGGTAGATTTGATATATTATTATGTAGA 814 | GATTCGAGCAGAGAAGTTGGAGAGTGAAGGAAATTGTTGTTACGAAAGTCAGTGATTATG 815 | TATTGTGTAGTATAGTATATTGTAAGAAATTTTTTTTTCTAGGGAATATGCGTTTTGATG 816 | TAGTAGTATTTCACTGTTTTGATTTAGTGTTTGTTGCACGGCAGTAGCGAGAGACAAGTG 817 | GGAAAGAGTAGGATAAAAAGACAATCTATAAAAAGTAAACATAAAATAAAGGTAGTAAGT 818 | AGCTTTTGGTTGAACATCCGGGTAAGAGACAACAGGGCTTGGAGGAGACGTACATGAGGG 819 | CTATTTAGGGCTATTTAGGGCTATGTAGAAGTGCTGTAGGGCTAAAGAACAGGGTTTCAT 820 | TTTCATTTTTTTTTTT 821 | >TEL01R_S288C TEL01R SGD:S000028937 telomere Telomeric region on the right arm of Chromosome I 822 | ATAGTGTGTTTATACCTTATTATTGATGATTAGTATATATTTTTATATTTAGGTGATTTT 823 | AGTGGAGATTATTTGGTGGTAATTACACTAGTATACATAAAATGGGTAGTGGATATTTGT 824 | ATAGAAAGGGCATTACGCATGGAGTTAAGAGTATTTACATGATAATTGGGGTTCCGTGAT 825 | TCATTATAGATAATAAAACGTGGATAATATTGGGTGTTATAGGTAAATGGGACAGGGTAT 826 | AGACCGCTGAGGCAAGTGCCGTGTATGGTGATGTGGTATGGTATCGAGTACCGATGGAGT 827 | GAGAGATGGCCTTGGTGTAGAGTATTATGGCGGGTAAGTTAGATGATGTATTGTTTACGT 828 | TATATTTGTTTAAATTGGATTTGTTTACATTAGATTTGTTTACATTTCAATATATCAATG 829 | GAGGGTATGTAGCATTATGGTAAGTAGCACGTGGTAGATGGGGATTGTAGGTGGATGGTA 830 | GGATGAGTGGTAGTGAGAGTTGGATAAGATATATTGGGCAGGGGATAGATGGTTGTTGGG 831 | GTGTGGTGATGGATAGTGAGTGGATAGTGAGTGGATGGATGGTGGAGTGGGGGAATGAGA 832 | CAGGGCATGGGGTGGTGAGGTAAGTGCCGTGGATTGTGATGATGGAGAGGGAGGGTAGTT 833 | GACATGGAGTTAGAATTGGGTCAGTGTTAGTGTTAGTGTTAGTATTAGGGTGTGGTGTGT 834 | GGGTGTGGTGTGGGTGTGGGTGTGGGTGTGGGTGTGGGTGTGGGTGTGGTGTGGTGTGTG 835 | GGTGTGGTGTGGGTGTGGTGTGTGTGGG 836 | >TEL12R_S288C TEL12R SGD:S000028982 telomere Telomeric region on the right arm of Chromosome XII 837 | TCAATATGTTTTTGTATTATTGTTGAAGAATAGAATATTTTTATGTTTAGGTGATTTTAG 838 | TGGTGATTTTTCTGTAATATTGGCATAAGTGTATTTAAATTGAGTGGTTAGTATATGGTG 839 | CAAAAGTAATATAACGTATGTATTAAGAGCATTTTTACAATATTCGAGGCCACTGAATGA 840 | GATATAGATATTAAAATGTGGATAATCATGGGATTTATGGGTAAATGGCACAGAGTATAG 841 | ACCGCTGAGGCAAGTGCCGTGCATAATGATGTGAGTGCATTTGGCACTGATTTAGTGAGA 842 | GATGGGCCATGGAGTGGAGTGGAATATGAGAGTAGGGTAAGTTGAGATGGTATATACTGT 843 | AGTATCCGTGTGCGTATGACATATCAGTATACAAGTGAAGGTGAGTGTGGCAAGTGGTGG 844 | TGATGGTATAGAGTGATAGGGTAAGTATATGTGTATTGTTTACGATCATTTGTTAACGTT 845 | TCAATATGGTGGGTAGAACAACAGTATGGTGAGTAGTAGATGGGAGATGGTAGAGGAATG 846 | GCAGGGTAAGTGGTGGCGGAGTTGGATATGGGTAATTGGAGGGTAACGGTTATGGTGGAC 847 | GGTGGGTTGGTGGTAGGAAGTAGAGAGGTGGATGGTAGTTGGGAGTGGTATAGTTGAATG 848 | AGACAGGGTAACGAGTGGGGAGTAGGGTAATGGAGGGTAAGTTGAGAGACAGGTTGATCA 849 | GGGTTAGAGTAGGGTAGTGTTAGGGTTAGGTGTGTGTGTGTGTGTGGGTGTGGTGTGTGG 850 | GTGTGTGGGTGTGGTGTGGTGTGTGTGTGTGTGGTGTGTGGGTGTGGTGGGTGTGGTGTG 851 | GGTGTGGGTGTGGGTGTGGGTGTGGGTGTGGTGTGTATATCTATGTCACCTTATTGCATG 852 | CTGGATGGTGTTAGACAAGGCCGTAGGGACATATAGCATCTAGGAAGTAACCTTGTACGA 853 | AAATAGGCAATATTTCCTGTTTGACGCAGATTTTAGCCCAAAGATCTAGCGTCAAGGAAT 854 | TTTTTTATAGTGGGACATTGCAAACCAAGGAAGTAACTTGATACGTCGTTGGTGAATGGG 855 | TCTGTTTTCTTATTCGGCGGGGTAATACATTTTTGGGGGAAGTTTGTCTGTCTGACGCGC 856 | CATATGTAGGTACGCCAAAAAGGGCTCCTCTACTTCGAAGCGCGAGGTCGTATACCTAAT 857 | AAGGAAATGTAATTTATAACTTTTTATTATATTGGTCTTTTCGAGAGCGGAACGTAGGTC 858 | CATGTTTAAAGTATCCAAGAGAATATCCACGAAGCGGCTGAGCAACGAACAGAATCCTGG 859 | TTCTCCTCGACTAAGCAGATAGTTAAGATACTGTGCACCATGGAAATTGAAAACGAAAGT 860 | ACGTACCGACTACTTTATTTTTGCAGGCCGGAAATCAAGCGATGAATGAGACATCCTTCT 861 | GTTTTCTATGTTGTGCTTGAAGGGGACAGACAGTCGCTTATCTTAGTGAGATTTTGTTTG 862 | CTTTTGCTGCACCTGCATAGCGCAGATTCTGCATCTTCTCAATAGCTTAATTATTACATT 863 | CTTAGATGATGATAAGACGAAAACTGGACAATCTTTTGTTTATATTGATGGATTTCTTGT 864 | CAAAAAGCATAAAAATCAACATACTATTGTTAATTTCGAAACTTACAAAAATAAAATGAA 865 | AGTTTCCGATAGGCGTAAGTTTGAAAAAGCAAACTTTGACGAGTTTGAGTCGGCTCTAAA 866 | TAACAAAAACGACTTGGTACATTGTCCCTCAATAACTTTATTTGAATCGATCCCCACGGA 867 | AGTGCGGTCATTCTACGAAGACGAAAAGTCTGGCCTAATCAAAGTGGTAAAATTCAGAAC 868 | TGGTGCAATGGATAGGAAAAGGTCTTTTGAAAAAGTTGTCATTTCCGTCATGGTCGGGAA 869 | AAATGTAAAAAAGTTCCTGACGTTTGTTGAAGACGAACCAGATTTCCAGGGCGGACCAAT 870 | CCCTTCAAAGTATCTTGTTCCCAAGAAAATCAACTTGATGGTCTACACGTTGTTTCAAGT 871 | GCATACTTTGAAATTCAATAGAAAGGATTACGATACCCTTTCTCTTTTTTACCTCAACAG 872 | AGGATACTATAATGAGTTGAGTTTCCGTGTCCTGGAACGTTGTCACGAAATAGCGAGTGC 873 | TAGGCCGAACGACAGCTCTACGATGCGTACTTTCACTGACTTTGTTTCCGGCGCACCTAT 874 | TGTAAGGAGTCTTCAGAAAAGCACCATAAGGAAATATGGGTACAATTTGGCAGCCTACAC 875 | GTAGATGAGCTATGCAAGCAAGTTTACCTGGCGAAAAGAAAGTCGACACAGAGCGGCTGA 876 | AGCGTGATCTATGCCCACGTAAACCCATTGAGATAAAGTACTTTTCACAGATATGTAACG 877 | ATATGATGAACAAAAAGGACCGATTGGGTGATATTTTGCATATTATCTTGCGAGCATGTG 878 | CACTCAATTTCGGGGCGGGTCCCCGTGGTGGCGCTGGTGACGAAGAGGATCGATCTATTA 879 | CGAATGAAGAACCCATTATTCCCTCTGTGGACGAGCATGGCCTGAAAGTATGTAAGTTGC 880 | GCAGTCCTAACACTCCACGAAGACTCAGAAAAACACTAGATGCCGTGAAAGCTTTATTGG 881 | TGTCGTCTTGTGCTTGTACCGCAAGGGATTTAGATATATTTGATGACAACAACGGCGTTG 882 | CAATGTGGAAATGGATCAAAATTCTGTACCACGAAGTAGCGCAGGAAACCACGCTGAAGG 883 | ACTCTTATAGAATAACTTTGGTACCTTCTTCTGATGGTATATCAGTATGTGGAAAACTTT 884 | GGGGAGAGTTGAACAACTGCTTCCATATGCCAGCCTCATTTTGCGTAATCGAGAAGTTTT 885 | GTTCAGAGAACCGAAGCGAGGAATTGACGAGTATCTGGAAAACGATTCTTTTTTTCAAAT 886 | GATACCTGTTAAATATCGTGAAATTGTGCTGCCCAAGTTGAGAAGAGATACTAACAAAAT 887 | GACCGCGGCTCTTAAAAATAAAGTCGCTGTTGCAATTGACGAGCTTACGGTGCCACTTAT 888 | GTGGATGATCCATTTTGCCGTAGGATACCCTTACCGTTATCCAGAGCTTCAGCTACTCGC 889 | TTTTGCCGGTCCTCAGCGCAACGTATACGTCGATGATACAACAAGACGCATCCAACTGTA 890 | CACTGATTACAACAAGAACGGTTCATCGGAGCCTCGACTAAAGACGCTTGACGGACTCAC 891 | TTCAGATTACGTGTTTTATTTTGTCACTGTGCTAAGGCAAATGCAAATATGTGCGCTTGG 892 | TAACAGTTATGACGCTTTTAATCATGATCCTTGGATGGATGTGGTGGGATTTGAGGATCC 893 | AGATCAAGTAACAAATCGAGACATTTCGAGGATAGTTTTGTATTCCTACATGTTTCTGAA 894 | TACCGCGAAGGGCTGTCTGGTTGAATACGCAACTTTTCGGCAGTACATGAGGGAACTTCC 895 | GAAGAATGCACCTCAGAAGCTGAATTTTCGGGAGATGCGTCAGGGGTTGATTGCCCTAGG 896 | ACGGCACTGCGTAGGTAGCAGATTTGAAACAGATTTGTACGAGTCGGCGACGAGTGAACT 897 | CATGGCCAATCATTCCGTTCAAACAGGGCGAAATATTTACGGTGTGGATTCCTTTTCGTT 898 | AACTAGTGTCAGTGGGACGACCGCCACTTTATTGCAGGAACGAGCTTCCGAGCGCTGGAT 899 | TCAATGGTTAGGCCTTGAAAGCGACTACCATTGTTCATTCTCTAGTACTCGGAATGCGGA 900 | AGACGTAGTGGCAGGTGAGGCGGCGAGTTCAGATCATCATCAAAAAATTTCAAGAGTAAC 901 | GCGAAAAAGGCCCCGAGAGCCCAAGAGTACAAACGATATCCTCGTCGCAGGCCGGAAACT 902 | CTTTGGCAGCTCCTTTGAATTCAGGGACTTGCATCAGTTGCGCTTATGTCATGAAATATA 903 | CATGGCAGACACACCCTCTGTGGCAGTACAGGCCCCACCGGGCTATGGTAAGACGGAGTT 904 | ATTTCATCTCCCCTTGATAGCACTGGCGTCTAAGGGCGACGTGAAATATGTGTCGTTTCT 905 | GTTTGTACCGTACACAGTGTTGCTTGCTAATTGCATGATCAGGTTGAGCCGATGCGGTTG 906 | CTTGAATGTGGCCCCTGTAAGAAACTTTATTGAAGAAGGTTGCGATGGCGTTACTGATTT 907 | ATACGTGGGGATCTACGATGATCTTGCTAGCACTAATTTCACAGACAGGATAGCTGCGTG 908 | GGAGAATATTGTTGAGTGCACCTTTAGGACCAACAACGTAAAATTGGGTTACCTCATTGT 909 | AGATGAGTTTCACAACTTTGAAACGGAGGTCTACCGGCAGTCGCAATTTGGGGGCATAAC 910 | TAACCTTGATTTTGACGCTTTTGAGAAAGCAATCTTTTTGAGCGGCACAGCACCTGAGGC 911 | TGTAGCTGATGCTGCGTTGCAGCGTATTGGGCTTACGGGACTGGCCAAGAAGTCGATGGA 912 | CATCAACGAGCTCAAACGGTCGGAAGATCTCAGCAGAGGTCTATCCAGCTATCCAACACG 913 | GATGTTTAATCTAATCAAGGAGAAATCCGAGGTGCCTTTAGGGCATGTTCATAAAATTTG 914 | GAAGAAAGTGGAATCACAGCCCGAAGAAGCACTGAAGCTTCTTTTAGCCCTCTTTGAAAT 915 | TGAACCAGAGTCGAAGGCCATTGTAGTTGCAAGCACAACCAACGAAGTGGAAGAATTGGC 916 | CTGCTCTTGGAGAAAGTATTTTAGGGTGGTATGGATACACGGGAAGCTGGGTGCTGCAGA 917 | AAAGGTGTCTCGCACAAAGGAGTTTGTCACTGACGGTAGCATGCGAGTTCTCATCGGAAC 918 | GAAATTAGTGACTGAAGGAATTGACATTAAGCAATTGATGATGGTGATCATGCTTGATAA 919 | TAGACTTAATATTATTGAGCTCATTCAAGGCGTAGGGAGACTAAGAGATGGGGGCCTCTG 920 | TTATCTATTATCTAGAAAAAACAGTTGGGCGGCAAGGAATCGTAAGGGTGAATTACCACC 921 | GATTAAGGAAGGCTGTATAACCGAACAGGTACGCGAGTTCTATGGACTTGAATCAAAGAA 922 | AGGAAAAAAGGGCCAGCATGTTGGATGCTGTGGCTCCAGGACAGACCTGTCTGCTGACAC 923 | AGTGGAACTGATAGAAAGAATGGACAGATTGGCTGAAAAACAGGCGACAGCTTCCATGTC 924 | GATCATTGCGTTACCGTCTAGCTTCCAGGAGAGCAATAGCAGTGACAGGTGCAGAAAGTA 925 | TTGCAGCAGTGATGAGGACAGCGACACGTGCATTCATGGTAGTGCTAATGCCAGTACCAA 926 | TGCGACTACCAACTCCAGCACTAATGCTACTACCACTGCCAGCACCAACGTCAGGACTAG 927 | TGCTACTACCACTGCCAGCATCAACGTCAGGACTAGTGCGATTACCACTGAAAGTACCAA 928 | CTCCAGCACTAATGCTACTACCACTGCCAGCACCAACGTCAGGACTAGTGCTACTACCAC 929 | TGCCAGCATCAACGTCAGGACTAGTGCGACTACCACTGAAAGTACCAACTCCAACACTAG 930 | TGCTACTACCACCGAAAGTACCGACTCCAACACTAGTGCTACTACCACCGAAAGTACCGA 931 | CTCCAACACTAGTGCTACTACCACTGCTAGCACCAACTCCAGCACTAATGCCACTACCAC 932 | TGCTAGCACCAACTCCAGCACTAATGCCACTACCACTGAAAGTACCAACGCTAGTGCCAA 933 | GGAGGACGCCAATAAAGATGGCAATGCTGAGGATAATAGATTCCATCCAGTCACCGACAT 934 | TAACAAAGAGTCGTATAAGCGGAAAGGGAGTCAAATGGTTTTGCTAGAGAGAAAGAAACT 935 | GAAAGCACAATTTCCCAATACTTCCGAGAATATGAATGTCTTACAGTTTCTTGGATTTCG 936 | GTCTGACGAAATTAAACATCTTTTCCTCTATGGTATTGACGTATACTTCTGCCCAGAGGG 937 | AGTATTCACACAATACGGATTATGCAAGGGCTGTCAAAAGATGTTCGAGCTCTGTGTCTG 938 | TTGGGCTGGCCAGAAAGTATCGTATCGGAGGATGGCTTGGGAAGCACTAGCTGTGGAGAG 939 | AATGCTGCGAAATGACGAGGAATACAAAGAATACTTGGAAGACATCGAGCCATATCATGG 940 | GGACCCTGTAGGATATTTGAAATATTTTAGCGTAAAAAGGGGAGAGATCTACTCTCAGAT 941 | ACAGAGAAATTATGCTTGGTACCTGGCCATTACTAGAAGAAGAGAAACAATTAGTGTATT 942 | GGATTCGACAAGAGGCAAGCAAGGGAGCCAAGTTTTCCGCATGTCTGGAAGGCAGATCAA 943 | AGAGTTGTATTATAAAGTATGGAGCAACTTGCGTGAATCGAAGACAGAGGTGCTGCAGTA 944 | CTTTTTGAACTGGGACGAGAAAAAGTGCCGGGAAGAATGGGAGGCAAAAGACGATACGGT 945 | CTTTGTGGAAGCGCTCGAGAAAGTTGGAGTTTTTCAGCGTTTGCGTTCCATGACGAGCGC 946 | TGGACTGCAGGGTCCGCAGTACGTCAAGCTGCAGTTTAGCAGGCATCATCGACAGTTGAG 947 | GAGCAGATATGAATTAAGTCTAGGAATGCACTTGCGAGATCAGCTTGCGCTGGGAGTTAC 948 | CCCATCTAAAGTGCCGCATTGGACGGCATTCCTGTCGATGCTGATAGGGCTGTTCTACAA 949 | TAAAACATTTCGGCAGAAACTGGAATATCTTTTGGAGCAGATTTCGGAGGTGTGGTTGTT 950 | ACCACATTGGCTTGATTTGGCAAACGTTGAAGTTCTCGCTGCAGATAACACGAGGGTACC 951 | GCTGTACATGCTGATGGTAGCGGTTCACAAAGAGCTGGATAGCGATGATGTTCCAGACGG 952 | TAGATTTGATATAATATTACTATGTAGAGATTCGAGCAGAGAAGTTGGAGAGTGAAGGAA 953 | ATTGTTGTTACGAAAGTCAGTGATTATGTATTGTGTAGTATAGTATATTGTAAGAAATTT 954 | TTTTTTCTAGGGAATATGCGTTTTGATGTAGTAGTATTTCACTGTTTTGATTTAGTGTTT 955 | GTTGCACGGCAGTAGCGAGAGACAAGTGGGAAAGAGTAGGATAAAAAGACAATCTATAAA 956 | AAGTAAACATAAAATAAAGGTAGTAAGTAGCTTTTGGTTGAACATCCGGGTAAGAGACAA 957 | CAGGGCTTGGAGGAGACGTACATGAGGGCTATTTAGGGCTATTTAGGGCTATGTAGAAGT 958 | GCTGTAGGGCTAAAGAACAGGGTTTCATTTTCATTTTTTTTTTTTAATTTCGGTCAGAAA 959 | GCCGGGTAAGGTATGACAGCGAGAGTAGAGGTAGATGTGAGAGAGTGTGTGGGTATATAT 960 | ATGTCACTGTATTGCATGCTGGATGGTGTTAGACAAGGCCGTAGGGACATATAGCATCTA 961 | GGAAGTAACCTTGTACGAAAATAGGCAATATTTCCTGTTTAGGCGATTGTGACGCAGATT 962 | TTAGTCCAACGATCTAGCGTCAAGGAATTTTTTTATAGTGGGACATTGCACCAAGGAAGT 963 | AACTTGATACGTCGTGGGTGAATGGGTCTGTTTTCTTATTCGGCGGGGTAATACATTTTT 964 | GGGGGAAGTTTGTCTGTCTGACGCGCCATATGTAGGTACGCCAAAAAGGGCTCCTCTACT 965 | TCGAAGCGCGAGGTCGTATACCTAATAAGGAAATGTAATTTATAACTTTCTATTATATTG 966 | GTCTTTTCGAGAGCGGAAGAAGTTGTAGGCTAAGCGCAGGCTAAGCGTAGGTCCATATTT 967 | AAAGTATCCAAGAGAATATCCACGAAGCGGCTGAGCAACGAACAGAATCCTGGTTCTCCT 968 | CGACTAAGCAGATAGTTAAGATACTGTGCACCATGGAAATTGAAAACGAAAGTACGTACC 969 | GACTACTTTATTTTTGCAGGCCGGAAATCAAGCGATGAATGAGACATCCTTCTGTTTTCT 970 | ATGTTGGGACAGACAGTCGCTTATCTTAGTGAGATTTCTTATTAACTGAATTTTCTTTGC 971 | TGCTGCTGGAGATTTGCACCTGCATAGCGCAGATTCTGCTTCTTCTCAATAGAGTAGCTT 972 | AATTATTACATTCTTAGATGATGATAAGACGGAAACTGGACAATCTTTTGTTTATATTGA 973 | TGGATTTCTTGTCAAAAAGCATAACAATCAACATACTATTGTTAATTTCGAAACTTACAA 974 | AAATAAAATGAAAGTTTCCGATAGGCGTAAGTTTGAAAAAGCAAACTTTGACGAGTTTGA 975 | GTCGGCTCTAAATAACAAAAACGACTTGGTACATTGTCCCTCAATAACTTTATTTGAATC 976 | GATCCCCACGGAAGTGCGGTCATTCTACGAAGACGAAAAGTCTGGCCTAATCAAAGTGGT 977 | AAAATTCAGAACTGGTGCAATGGATAGGAAAAGGTCTTTTGAAAAAATTGTCATTTCCGT 978 | CATGGTCGGGAAAAATGTACAAAAGTTCCTGACATTTGTTGAAGACGAACCAGATTTCCA 979 | GGGCGGACCAATCCCTTCAAAGTATCTTATTCCCAAGAAAATCAACTTGATGGTCTACAC 980 | GTTGTTTCAAGTGCATACTTTGAAATTCAATAGAAAGGATTACGATACCCTTTCTCTTTT 981 | TTACCTCAACAGAGGATACTATAATGAGTTGAGTTTCCGTGTCCTGGAACGTTGTCACGA 982 | AATAGCGAGTGCCAGGCCGAACGACAGCTCTACGATGCGTACTTTCACTGACTTTGTTTC 983 | TGGCGCACCTATTGTAAGGAGTCTTCAGAAAAGCACCATAAGGAAATATGGGTACAATTT 984 | GGCACCCTACATGTTCTTGTTACTACACGTAGATGAGCTATCGATTTTTTCTGCATACCA 985 | AGCAAGTTTACCTGGCGAAAAGAAAGTCGACACAGAGCGGCTGAAGCGTGATCTATGCCC 986 | ACGTAAACCCATTGAGATAAAGTACTTTTCACAGATATGTAACGATATGATGAACAAAAA 987 | AGACCGATTGGGTGATATTTTGCATATTATCTTGCGAGCATGTGCGCTCAATTTCGGGGC 988 | GGGTCCCCGTGGTGGCGCTGGTGACGAAGAGGATCGATCTATTACGAATGAAGAACCCAT 989 | TATTCCCTCTGTGGACGAGCATGGCTTGAAAGTATGTAAGTTGCGTAGTCCTAACACTCC 990 | ACGAAGACTCAGAAAAACACTAGATGCCGTGAAAGCTTTATTGGTGTCGTCTTGTGCTTG 991 | TACTGCAAGGGATTTAGATATATTTGATGACACCAACGGCGTTGCAATGTGGAAATGGAT 992 | CAAAATTCTGTACCACGAAGTAGCGCAGGAAACCACGCTGAAGGACTCTTATAGAATAAC 993 | TTTGGTACCTTCTTCTGATGGTATATCAGTATGTGGAAAACTTTTTAATCGCGAGTATGT 994 | CCGCGGCTTTTACTTTGCATGCAAGGCTCAGTTCGATAACCTTTGGGGAGAGTTGAACAA 995 | CTGCTTTTATATGCCTACAGTGGTTGATATTGCCAGCCTCATTTTGCGTAATCGAGAAGT 996 | TTTGTTCAGAGAGCCAAAGCGAGGAATTGACGAGTATCTGGAAAACGATTCTTTTCTTCA 997 | AATGATACCTGTTAAATATCGTGAAATTGTGCTGCCCAAGTTGAGAAGAGATACTAACAA 998 | AATGACCGCGGCTCTTAAAAATAAAGTCACTGTTGCAATTGACGAGCTTACGGTGCCACT 999 | TATGTGGATGGTCCATTTTGCCGTAGGATACCCTTACCGTTATCCAGAGCTTCAGCTACT 1000 | CGCTTTTGCCGGTCCTCAGCGCAACGTATACGTCGATGATACAACAAGACGCATCCAACT 1001 | GTACACTGATTACAACAAGAACGGTTCATCGGAGCCTCGACTTAAGACGCTTGACGGACT 1002 | CACTTCAGATTACGTGTTTTATTTTGTCACTGTGCTAAGGCAAATGCAAATATGTGCGCT 1003 | TGGTAACAGTTATGACGCTTTTAATCATGATCCTTGGATGGATGTGGTGGGATTTGAGGA 1004 | TCCAGATCAAGTAACAAATCGAGACATTTCGAGGATAGTTTTGTATTCCTACATGTTTCT 1005 | GAATACCGCGAAGGGCTGTCTGGTTGAATACGCAACTTTTCGGCAGTACATGAGGGAACT 1006 | TCCGAAGAATGCACCTCAGAAGCTGAATTTTCGGGAGATGCGTCAGGGGTTGATTGCCCT 1007 | AGGACGGCACTGCGTAGGTAGCAGATTTGAAACAGATTTGTACGAGTCGGCGACGAGTGA 1008 | ACTCATGGCCAATCATTCCGTTCAAACAGGGCGAAATATTTACGGTGTGGATTCCTTTTC 1009 | GTTAACTAGTGTCAGTGGGACGACCGCCACTTTATTGCAGGAACGAGCTTCCGAGCGCTG 1010 | GATTCAATGGTTAGGCCTTGAAAGCGACTACCATTGTTCATTCTCTAGTACTCGGAATGC 1011 | GGAAGACGTAGTGGCAGGTGAGGCGGCGAGTTCAGATCATGATCAAAAAATTTCAAGAGT 1012 | AACGCGAAAAAGGCCCCGAGAGCCCAAGAGTACAAACGATATCCTCGTCGCAGGCCAGAA 1013 | ACTCTTTGGCAGCTCCTTTGAATTCAGGGACTTGCATCAGTTGCGCTTATGTCATGAAAT 1014 | ATACATGGCAGACACACCCTCTGTGGCAGTACAGGCCCCACCGGGCTATGGTAAGACGGA 1015 | GTTATTTCATCTCCCCTTGATAGCACTGGCGTCTAAGGGCGACGTGAAATATGTGTCGTT 1016 | TCTGTTTGTACCGTACACAGTGTTGCTTGCTAATTGCATGATCAGGTTGAGCCGATGCGG 1017 | TTGCTTGAATGTGGCCCCTGTAAGAAACTTTATTGAAGAAGGTTGCGATGGCGTTACTGA 1018 | TTTATACGTGGGGATCTACGATGATCTTGCTAGCACTAATTTCACAGACAGGATAGCTGC 1019 | GTGGGAGAATATTGTTGAGTGCACCTTTAGGACCAACAACGTAAAATTGGGTTACCTCAT 1020 | TGTAGATGAGTTTCACAACTTTGAAACGGAGGTCTACCGGCAGTCGCAATTTGGGGGCAT 1021 | AACTAACCTTGATTTTGACGCTTTTGAGAAAGCAATCTTTTTGAGCGGCACAGCACCTGA 1022 | GGCTGTAGCTGATGCTGCGTTGCAGCGTATTGGGCTTACGGGACTGGCCAAGAAGTCGAT 1023 | GGACATCAACGAGCTCAAACGGTCGGAAGATCTCAGCAGAGGTCTATCCAGCTATCCAAC 1024 | ACGGATGTTTAATCTAATCAAGGAGAAATCCGAGGTGCCTTTAGGGCATGTTCATAAAAT 1025 | TTGGAAGAAAGTGGAATCACAGCCCGAAGAAGCACTGAAGCTTCTTTTAGCCCTCTTTGA 1026 | AATTGAACCAGAGTCGAAGGCCATTGTAGTTGCAAGCACAACCAACGAAGTGGAAGAATT 1027 | GGCCTGCTCTTGGAGAAAGTATTTTAGGGTGGTATGGATACACGGGAAGCTGGGTGCTGC 1028 | AGAAAAGGTGTCTCGCACAAAGGAGTTTGTCACTGACGGTAGCATGCGAGTTCTCATCGG 1029 | AACGAAATTAGTGACTGAAGGAATTGACATTAAGCAATTGATGATGGTGATCATGCTTGA 1030 | TAATAGACTTAATATTATTGAGCTCATTCAAGGCGTAGGGAGACTAAGAGATGGGGGCCT 1031 | CTGTTATCTATTATCTAGAAAAAACAGTTGGGCGGCAAGGAATCGTAAGGGTGAATTACC 1032 | ACCGATTAAGGAAGGCTGTATAACCGAACAGGTACGCGAGTTCTATGGACTTGAATCAAA 1033 | GAAAGGAAAAAAGGGCCAGCATGTTGGATGCTGTGGCTCCAGGACAGACCTGTCTGCTGA 1034 | CACAGTGGAACTGATAGAAAGAATGGACAGATTGGCTGAAAAACAGGCGACAGCTTCCAT 1035 | GTCGATCATTGCGTTACCGTCTAGCTTCCAGGAGAGCAATAGCAGTGACAGGTGCAGAAA 1036 | GTATTGCAGCAGTGATGAGGACAGCGACACGTGCATTCATGGTAGTGCTAATGCCAGTAC 1037 | CAATGCGACTACCAACTCCAGCACTAATGCTACTACCACTGCCAGCACCAACGTCAGGAC 1038 | TAGTGCTACTACCACTGCCAGCATCAACGTCAGGACTAGTGCGATTACCACTGAAAGTAC 1039 | CAACTCCAGCACTAATGCTACTACCACTGCCAGCACCAACGTCAGGACTAGTGCTACTAC 1040 | CACTGCCAGCATCAACGTCAGGACTAGTGCGACTACCACTGAAAGTACCAACTCCAACAC 1041 | TAGTGCTACTACCACCGAAAGTACCGACTCCAACACTAGTGCTACTACCACCGAAAGTAC 1042 | CGACTCCAACACTAGTGCTACTACCACTGCTAGCACCAACTCCAGCACTAATGCCACTAC 1043 | CACTGCTAGCACCAACTCCAGCACTAATGCCACTACCACTGAAAGTACCAACGCTAGTGC 1044 | CAAGGAGGACGCCAATAAAGATGGCAATGCTGAGGATAATAGATTCCATCCAGTCACCGA 1045 | CATTAACAAAGAGTCGTATAAGCGGAAAGGGAGTCAAATGGTTTTGCTAGAGAGAAAGAA 1046 | ACTGAAAGCACAATTTCCCAATACTTCCGAGAATATGAATGTCTTACAGTTTCTTGGATT 1047 | TCGGTCTGACGAAATTAAACATCTTTTCCTCTATGGTATTGACGTATACTTCTGCCCAGA 1048 | GGGAGTATTCACACAATACGGATTATGCAAGGGCTGTCAAAAGATGTTCGAGCTCTGTGT 1049 | CTGTTGGGCTGGCCAGAAAGTATCGTATCGGAGGATGGCTTGGGAAGCACTAGCTGTGGA 1050 | GAGAATGCTGCGAAATGACGAGGAATACAAAGAATACTTGGAAGACATCGAGCCATATCA 1051 | TGGGGACCCTGTAGGATATTTGAAATATTTTAGCGTAAAAAGGGGAGAGATCTACTCTCA 1052 | GATACAGAGAAATTATGCTTGGTACCTGGCCATTACTAGAAGAAGAGAAACAATTAGTGT 1053 | ATTGGATTCGACAAGAGGCAAGCAAGGGAGCCAAGTTTTCCGCATGTCTGGAAGGCAGAT 1054 | CAAAGAGTTGTATTATAAAGTATGGAGCAACTTGCGTGAATCGAAGACAGAGGTGCTGCA 1055 | GTACTTTTTGAACTGGGACGAGAAAAAGTGCCGGGAAGAATGGGAGGCAAAAGACGATAC 1056 | GGTCTTTGTGGAAGCGCTCGAGAAAGTTGGAGTTTTTCAGCGTTTGCGTTCCATGACGAG 1057 | CGCTGGACTGCAGGGTCCGCAGTACGTCAAGCTGCAGTTTAGCAGGCATCATCGACAGTT 1058 | GAGGAGCAGATATGAATTAAGTCTAGGAATGCACTTGCGAGATCAGCTTGCGCTGGGAGT 1059 | TACCCCATCTAAAGTGCCGCATTGGACGGCATTCCTGTCGATGCTGATAGGGCTGTTCTA 1060 | CAATAAAACATTTCGGCAGAAACTGGAATATCTTTTGGAGCAGATTTCGGAGGTGTGGTT 1061 | GTTACCACATTGGCTTGATTTGGCAAACGTTGAAGTTCTCGCTGCAGATAACACGAGGGT 1062 | ACCGCTGTACATGCTGATGGTAGCGGTTCACAAAGAGCTGGATAGCGATGATGTTCCAGA 1063 | CGGTAGATTTGATATAATATTACTATGTAGAGATTCGAGCAGAGAAGTTGGAGAGTGAAG 1064 | GAAATTGTTGTTACGAAAGTCAGTGATTATGTATTGTGTAGTATAGTATATTGTAAGAAA 1065 | TTTTTTTTTCTAGGGAATATGCGTTTTGATGTAGTAGTATTTCACTGTTTTGATTTAGTG 1066 | TTTGTTGCACGGCAGTAGCGAGAGACAAGTGGGAAAGAGTAGGATAAAAAGACAATCTAT 1067 | AAAAAGTAAACATAAAATAAAGGTAGTAAGTAGCTTTTGGTTGAACATCCGGGTAAGAGA 1068 | CAACAGGGCTTGGAGGAGACGTACATGAGGGCTATTT 1069 | --------------------------------------------------------------------------------