├── Basics_dna_data_explore.py ├── DNA_sequence_classification.py ├── PX000098_PRESENTATION.jpeg ├── README.md ├── chimp_data.txt ├── dog_data.txt ├── example.fa ├── human_data.txt └── test.fasta /Basics_dna_data_explore.py: -------------------------------------------------------------------------------- 1 | #pip install biopython 2 | #data : https://www.kaggle.com/thomasnelson/examplefasta 3 | 4 | from Bio import SeqIO 5 | for sequence in SeqIO.parse('example.fa', "fasta"): 6 | print(sequence.id) 7 | print(sequence.seq) 8 | print(len(sequence)) 9 | 10 | #Ordinal encoding DNA sequence data¶ 11 | # function to convert a DNA sequence string to a numpy array 12 | # converts to lower case, changes any non 'acgt' characters to 'n' 13 | import numpy as np 14 | import re 15 | def string_to_array(seq_string): 16 | seq_string = seq_string.lower() 17 | seq_string = re.sub('[^acgt]', 'z', seq_string) 18 | seq_string = np.array(list(seq_string)) 19 | return seq_string 20 | 21 | # create a label encoder with 'acgtn' alphabet 22 | from sklearn.preprocessing import LabelEncoder 23 | label_encoder = LabelEncoder() 24 | label_encoder.fit(np.array(['a','c','g','t','z'])) 25 | 26 | # function to encode a DNA sequence string as an ordinal vector 27 | # returns a numpy vector with a=0.25, c=0.50, g=0.75, t=1.00, n=0.00 28 | def ordinal_encoder(my_array): 29 | integer_encoded = label_encoder.transform(my_array) 30 | float_encoded = integer_encoded.astype(float) 31 | float_encoded[float_encoded == 0] = 0.25 # A 32 | float_encoded[float_encoded == 1] = 0.50 # C 33 | float_encoded[float_encoded == 2] = 0.75 # G 34 | float_encoded[float_encoded == 3] = 1.00 # T 35 | float_encoded[float_encoded == 4] = 0.00 # anything else, lets say z 36 | return float_encoded 37 | 38 | seq_test = 'TTCAGCCAGTG' 39 | ordinal_encoder(string_to_array(seq_test)) 40 | 41 | 42 | 43 | #One-hot encoding DNA sequence data¶ 44 | # function to one-hot encode a DNA sequence string 45 | # non 'acgt' bases (n) are 0000 46 | # returns a L x 4 numpy array 47 | from sklearn.preprocessing import OneHotEncoder 48 | def one_hot_encoder(seq_string): 49 | int_encoded = label_encoder.transform(seq_string) 50 | onehot_encoder = OneHotEncoder(sparse=False, dtype=int) 51 | int_encoded = int_encoded.reshape(len(int_encoded), 1) 52 | onehot_encoded = onehot_encoder.fit_transform(int_encoded) 53 | onehot_encoded = np.delete(onehot_encoded, -1, 1) 54 | return onehot_encoded 55 | 56 | seq_test = 'GAATTCTCGAA' 57 | one_hot_encoder(string_to_array(seq_test)) 58 | 59 | #treat the DNA sequence as a language (text) and use various "language" #processing methods. 60 | def Kmers_funct(seq, size): 61 | return [seq[x:x+size].lower() for x in range(len(seq) - size + 1)] 62 | 63 | mySeq = 'GTGCCCAGGTTCAGTGAGTGACACAGGCAG' 64 | Kmers_funct(mySeq, size=7) 65 | 66 | words = Kmers_funct(mySeq, size=6) 67 | joined_sentence = ' '.join(words) 68 | joined_sentence 69 | 70 | mySeq1 = 'TCTCACACATGTGCCAATCACTGTCACCC' 71 | mySeq2 = 'GTGCCCAGGTTCAGTGAGTGACACAGGCAG' 72 | sentence1 = ' '.join(Kmers_funct(mySeq1, size=6)) 73 | sentence2 = ' '.join(Kmers_funct(mySeq2, size=6)) 74 | 75 | # Creating the Bag of Words model 76 | from sklearn.feature_extraction.text import CountVectorizer 77 | cv = CountVectorizer() 78 | X = cv.fit_transform([joined_sentence, sentence1, sentence2]).toarray() 79 | -------------------------------------------------------------------------------- /DNA_sequence_classification.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import pandas as pd 4 | import matplotlib.pyplot as plt 5 | #matplotlib inline 6 | 7 | human_dna = pd.read_table('human_data.txt') 8 | human_dna.head() 9 | 10 | chimp_dna = pd.read_table('chimp_data.txt') 11 | chimp_dna.head() 12 | 13 | dog_dna = pd.read_table('dog_data.txt') 14 | dog_dna.head() 15 | 16 | 17 | #Let's define a function to collect all possible overlapping k-mers of a specified length from any sequence string. 18 | # function to convert sequence strings into k-mer words, default size = 6 (hexamer words) 19 | def Kmers_funct(seq, size=6): 20 | return [seq[x:x+size].lower() for x in range(len(seq) - size + 1)] 21 | 22 | #Now we can convert our training data sequences into short overlapping k-mers of legth 6. Lets do that for each species of data we have using our getKmers function. 23 | human_dna['words'] = human_dna.apply(lambda x: Kmers_funct(x['sequence']), axis=1) 24 | human_dna = human_dna.drop('sequence', axis=1) 25 | chimp_dna['words'] = chimp_dna.apply(lambda x: Kmers_funct(x['sequence']), axis=1) 26 | chimp_dna = chimp_dna.drop('sequence', axis=1) 27 | dog_dna['words'] = dog_dna.apply(lambda x: Kmers_funct(x['sequence']), axis=1) 28 | dog_dna = dog_dna.drop('sequence', axis=1) 29 | 30 | #Now, our coding sequence data is changed to lowercase, split up into all possible k-mer words of length 6 and ready for the next step. 31 | 32 | #Since we are going to use scikit-learn natural language processing tools to do the k-mer counting, we need to now convert the lists of k-mers for each gene into #string sentences of words that the count vectorizer can use. We can also make a y variable to hold the class labels. 33 | human_texts = list(human_dna['words']) 34 | for item in range(len(human_texts)): 35 | human_texts[item] = ' '.join(human_texts[item]) 36 | #separate labels 37 | y_human = human_dna.iloc[:, 0].values # y_human for human_dna 38 | 39 | 40 | #Now let's do the same for chimp and dog. 41 | chimp_texts = list(chimp_dna['words']) 42 | for item in range(len(chimp_texts)): 43 | chimp_texts[item] = ' '.join(chimp_texts[item]) 44 | #separate labels 45 | y_chim = chimp_dna.iloc[:, 0].values # y_chim for chimp_dna 46 | 47 | dog_texts = list(dog_dna['words']) 48 | for item in range(len(dog_texts)): 49 | dog_texts[item] = ' '.join(dog_texts[item]) 50 | #separate labels 51 | y_dog = dog_dna.iloc[:, 0].values # y_dog for dog_dna 52 | 53 | #Now let's review how to use sklearn's "Natural Language" Processing tools to convert our k-mer words into uniform length numerical vectors that represent counts for #every k-mer in the vocabulary 54 | 55 | # Creating the Bag of Words model using CountVectorizer() 56 | # This is equivalent to k-mer counting 57 | # The n-gram size of 4 was previously determined by testing 58 | from sklearn.feature_extraction.text import CountVectorizer 59 | cv = CountVectorizer(ngram_range=(4,4)) 60 | X = cv.fit_transform(human_texts) 61 | X_chimp = cv.transform(chimp_texts) 62 | X_dog = cv.transform(dog_texts) 63 | 64 | #Let's see what we have... for human we have 4380 genes converted into uniform length feature vectors of 4-gram k-mer (length 6) counts. For chimp and dog we have the expected same number of features with 1682 and 820 genes respectively. 65 | 66 | print(X.shape) 67 | print(X_chimp.shape) 68 | print(X_dog.shape) 69 | 70 | human_dna['class'].value_counts().sort_index().plot.bar() 71 | 72 | chimp_dna['class'].value_counts().sort_index().plot.bar() 73 | 74 | dog_dna['class'].value_counts().sort_index().plot.bar() 75 | 76 | """ 77 | So now that we know how to transform our DNA sequences into uniform length numerical vectors in the form of k-mer counts and ngrams, we can now go ahead and build a classification model that can predict the DNA sequence function based only on the sequence itself. 78 | Here I will use the human data to train the model, holding out 20% of the human data to test the model. Then we can really challenge the model's generalizability by trying to predict sequence function in other species (the chimpanzee and dog). 79 | So below we will - 1: train/test spit. 2: Build simple multinomial naive Bayes classifier and 3: test the model performance.""" 80 | 81 | # Splitting the human dataset into the training set and test set 82 | from sklearn.model_selection import train_test_split 83 | X_train, X_test, y_train, y_test = train_test_split(X, 84 | y_human, 85 | test_size = 0.20, 86 | random_state=42) 87 | 88 | 89 | print(X_train.shape) 90 | print(X_test.shape) 91 | 92 | #A multinomial naive Bayes classifier will be created. I previously did some parameter tuning and found the ngram size of 4 (reflected in the Countvectorizer() instance) and a model alpha of 0.1 did the best. Just to keep it simple I won't show that code here. 93 | 94 | ### Multinomial Naive Bayes Classifier ### 95 | # The alpha parameter was determined by grid search previously 96 | from sklearn.naive_bayes import MultinomialNB 97 | classifier = MultinomialNB(alpha=0.1) 98 | classifier.fit(X_train, y_train) 99 | 100 | #Now let's make predictions on the human hold out test set and see how it performes on unseen data. 101 | 102 | y_pred = classifier.predict(X_test) 103 | 104 | """ 105 | Okay, so let's look at some model performce metrics like the confusion matrix, accuracy, precision, recall and f1 score. We are getting really good results on our unseen data, so it looks like our model did not overfit to the training data. In a real project I would go back and sample many more train test splits since we have a relatively small data set. 106 | 107 | """ 108 | from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score 109 | print("Confusion matrix\n") 110 | print(pd.crosstab(pd.Series(y_test, name='Actual'), pd.Series(y_pred, name='Predicted'))) 111 | def get_metrics(y_test, y_predicted): 112 | accuracy = accuracy_score(y_test, y_predicted) 113 | precision = precision_score(y_test, y_predicted, average='weighted') 114 | recall = recall_score(y_test, y_predicted, average='weighted') 115 | f1 = f1_score(y_test, y_predicted, average='weighted') 116 | return accuracy, precision, recall, f1 117 | accuracy, precision, recall, f1 = get_metrics(y_test, y_pred) 118 | print("accuracy = %.3f \nprecision = %.3f \nrecall = %.3f \nf1 = %.3f" % (accuracy, precision, recall, f1)) 119 | 120 | """ 121 | Now for the real test. Let's see how our model perfoms on the DNA sequences from other species. First we'll try the Chimpanzee, which we would expect to be very similar to human. Then we will try man's (and woman's) best friend, the Dog DNA sequences. 122 | """ 123 | #Make predictions for the Chimp and dog sequences¶ 124 | 125 | # Predicting the chimp, dog and worm sequences 126 | y_pred_chimp = classifier.predict(X_chimp) 127 | y_pred_dog = classifier.predict(X_dog) 128 | 129 | # performance on chimp genes 130 | print("Confusion matrix\n") 131 | print(pd.crosstab(pd.Series(y_chim, name='Actual'), pd.Series(y_pred_chimp, name='Predicted'))) 132 | accuracy, precision, recall, f1 = get_metrics(y_chim, y_pred_chimp) 133 | print("accuracy = %.3f \nprecision = %.3f \nrecall = %.3f \nf1 = %.3f" % (accuracy, precision, recall, f1)) 134 | 135 | # performance on dog genes 136 | print("Confusion matrix\n") 137 | print(pd.crosstab(pd.Series(y_dog, name='Actual'), pd.Series(y_pred_dog, name='Predicted'))) 138 | accuracy, precision, recall, f1 = get_metrics(y_dog, y_pred_dog) 139 | print("accuracy = %.3f \nprecision = %.3f \nrecall = %.3f \nf1 = %.3f" % (accuracy, precision, recall, f1)) 140 | -------------------------------------------------------------------------------- /PX000098_PRESENTATION.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nageshsinghc4/DNA-Sequence-Machine-learning/e87d56545df6d899099767635e3ca1213e2305be/PX000098_PRESENTATION.jpeg -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DNA Sequencing using Machine learning 2 | ![Image](https://github.com/nageshsinghc4/DNA-Sequence-Machine-learning/blob/master/PX000098_PRESENTATION.jpeg) 3 | The double-helix is the correct chemical representation of DNA. But DNA is special. It’s a nucleotide made of four types of nitrogen bases: Adenine (A), Thymine (T), Guanine (G) and Cytosine. We always call them A, C, Gand T. 4 | 5 | A genome is a complete collection of DNA in an organism. All living species possess a genome, but they differ considerably in size. 6 | 7 | As a data-driven science, genomics extensively utilizes machine learning to capture dependencies in data and infer new biological hypotheses. Nonetheless, the ability to extract new insights from the exponentially increasing volume of genomics data requires more powerful machine learning models. By efficiently leveraging large data sets, deep learning has reconstructed fields such as computer vision and natural language processing. It has become the method of preference for many genomics modeling tasks, including predicting the influence of genetic variation on gene regulatory mechanisms such as DNA receptiveness and splicing. 8 | 9 | So here, we will understand DNA structure and how machine learning can be used to work with DNA sequence data. 10 | 11 | Pre requisits: 12 | 13 | 1. **Biopython** :is a collection of python modules that provide functions to deal with DNA, RNA & protein sequence. 14 | 15 | ```pip install biopython``` 16 | 17 | 2. **Squiggle** : a software tool that automatically generates interactive web-based two-dimensional graphical representations of raw DNA sequences. 18 | 19 | ```pip install Squiggle``` 20 | 21 | DNA sequence data usually are contained in a file format called “fasta” format. Fasta format is simply a single line prefixed by the greater than symbol that contains annotations and another line that contains the sequence: 22 | 23 | ***“AAGGTGAGTGAAATCTCAACACGAGTATGGTTCTGAGAGTAGCTCTGTAACTCTGAGG”*** 24 | 25 | In this repository, we are building a classification model that is trained on the human DNA sequence and can predict a gene family based on the DNA sequence of the coding sequence. To test the model, we will use the DNA sequence of humans, dogs, and chimpanzees and compare the accuracies. 26 | 27 | You can read this article to understand the project step by step from [www.theaidream.com](https://www.theaidream.com/post/demystify-dna-sequencing-with-machine-learning-and-python) or my [kaggle notebook](https://www.kaggle.com/nageshsingh/demystify-dna-sequencing-with-machine-learning) for implementation. 28 | -------------------------------------------------------------------------------- /example.fa: -------------------------------------------------------------------------------- 1 | >ENST00000435737.5 cds chromosome:GRCh38:3:112039620:112081269:1 gene:ENSG00000176040.13 gene_biotype:protein_coding transcript_biotype:nonsense_mediated_decay gene_symbol:TMPRSS7 description:transmembrane protease, serine 7 [Source:HGNC Symbol;Acc:HGNC:30846] 2 | ATGTTTCGCATCACCAACATTGAGTTTCTTCCCGAATACCGACAAAAGGAGTCCAGGGAA 3 | TTTCTTTCAGTGTCACGGACTGTGCAGCAAGTGATAAACCTGGTTTATACAACATCTGCC 4 | TTCTCCAAATTTTATGAGCAGTCTGTTGTTGCAGATGTCAGCAACAACAAAGGCGGCCTC 5 | CTTGTCCACTTTTGGATTGTTTTTGTCATGCCACGTGCCAAAGGCCACATCTTCTGTGAA 6 | GACTGTGTTGCCGCCATCTTGAAGGACTCCATCCAGACAAGCATCATAAACCGGACCTCT 7 | GTGGGGAGCTTGCAGGGACTGGCTGTGGACATGGACTCTGTGGTACTAAATGAAGTCCTG 8 | GGGCTGACTCTCATTGTCTGGATTGACTGA 9 | >ENST00000419127.5 cds chromosome:GRCh38:3:112039706:112081165:1 gene:ENSG00000176040.13 gene_biotype:protein_coding transcript_biotype:protein_coding gene_symbol:TMPRSS7 description:transmembrane protease, serine 7 [Source:HGNC Symbol;Acc:HGNC:30846] 10 | ATGTTTCGCATCACCAACATTGAGTTTCTTCCCGAATACCGACAAAAGGAGTCCAGGGAA 11 | TTTCTTTCAGTGTCACGGACTGTGCAGCAAGTGATAAACCTGGTTTATACAACATCTGCC 12 | TTCTCCAAATTTTATGAGCAGTCTGTTGTTGCAGATGTCAGCAACAACAAAGGCGGCCTC 13 | CTTGTCCACTTTTGGATTGTTTTTGTCATGCCACGTGCCAAAGGCCACATCTTCTGTGAA 14 | GACTGTGTTGCCGCCATCTTGAAGGACTCCATCCAGACAAGCATCATAAACCGGACCTCT 15 | GTGGGGAGCTTGCAGGGACTGGCTGTGGACATGGACTCTGTGGTACTAAATGACAAAGGC 16 | TGCTCTCAGTACTTCTATGCAGAGCATCTGTCTCTCCACTACCCGCTGGAGATTTCTGCA 17 | GCCTCAGGGAGGCTGATGTGTCACTTCAAGCTGGTGGCCATAGTGGGCTACCTGATTCGT 18 | CTCTCAATCAAGTCCATCCAAATCGAAGCCGACAACTGTGTCACTGACTCCCTGACCATT 19 | TACGACTCCCTTTTGCCCATCCGGAGCAGCATCTTGTACAGAATTTGTGAACCCACAAGA 20 | ACATTAATGTCATTTGTTTCTACAAATAATCTCATGTTGGTGACATTTAAGTCTCCTCAT 21 | ATACGGAGGCTCTCAGGAATCCGGGCATATTTTGAGGTCATTCCAGAACAAAAGTGTGAA 22 | AACACAGTGTTGGTCAAAGACATCACTGGCTTTGAAGGGAAAATTTCAAGCCCATATTAC 23 | CCGAGCTACTATCCTCCAAAATGCAAGTGTACCTGGAAATTTCAGACTTCTCTATCAACT 24 | CTTGGCATAGCACTGAAATTCTATAACTATTCAATAACCAAGAAGAGTATGAAAGGCTGT 25 | GAGCATGGATGGTGGGAAATTAATGAGCACATGTACTGTGGCTCCTACATGGATCATCAG 26 | ACAATTTTTCGAGTGCCCAGCCCTCTGGTTCACATTCAGCTCCAGTGCAGTTCAAGGCTT 27 | TCAGACAAGCCACTTTTGGCAGAATATGGCAGTTACAACATCAGTCAACCCTGCCCTGTT 28 | GGATCTTTTAGATGCTCCTCCGGTTTATGTGTCCCTCAGGCCCAGCGTTGTGATGGAGTA 29 | AATGACTGCTTTGATGAAAGTGATGAACTGTTTTGCGTGAGCCCTCAACCTGCCTGCAAT 30 | ACCAGCTCCTTCAGGCAGCATGGCCCTCTCATCTGTGATGGCTTCAGGGACTGTGAGAAT 31 | GGCCGGGATGAGCAAAACTGCACTCAAAGTATTCCATGCAACAACAGAACTTTTAAGTGT 32 | GGCAATGATATTTGCTTTAGGAAACAAAATGCAAAATGTGATGGGACAGTGGATTGTCCA 33 | GATGGAAGTGATGAAGAAGGCTGCACCTGCAGCAGGAGTTCCTCCGCCCTTCACCGCATC 34 | ATCGGAGGCACAGACACCCTGGAGGGGGGTTGGCCGTGGCAGGTCAGCCTCCACTTTGTT 35 | GGATCTGCCTACTGTGGTGCCTCAGTCATCTCCAGGGAGTGGCTTCTTTCTGCAGCCCAC 36 | TGTTTTCATGGAAACAGGCTGTCAGATCCCACACCATGGACTGCACACCTCGGGATGTAT 37 | GTTCAGGGGAATGCCAAGTTTGTCTCCCCGGTGAGAAGAATTGTGGTCCACGAGTACTAT 38 | AACAGTCAGACTTTTGATTATGATATTGCTTTGCTACAGCTCAGTATTGCCTGGCCTGAG 39 | ACCCTGAAACAGCTCATTCAGCCAATATGCATTCCTCCCACTGGTCAGAGAGTTCGCAGT 40 | GGGGAGAAGTGCTGGGTAACTGGCTGGGGGCGAAGACACGAAGCAGATAATAAAGGCTCC 41 | CTCGTTCTGCAGCAAGCGGAGGTAGAGCTCATTGATCAAACGCTCTGTGTTTCCACCTAC 42 | GGGATCATCACTTCTCGGATGCTCTGTGCAGGCATAATGTCAGGCAAGAGAGATGCCTGC 43 | AAAGGAGATTCGGGTGGACCTTTATCTTGTCGAAGAAAAAGTGATGGAAAATGGATTTTG 44 | ACTGGCATTGTTAGCTGGGGACATGGAAGTGGACGACCAAACTTTCCTGGTGTTTACACA 45 | AGGGTGTCAAACTTTGTTCCCTGGATTCATAAATATGTCCCTTCTCTTTTGTAA 46 | -------------------------------------------------------------------------------- /test.fasta: -------------------------------------------------------------------------------- 1 | >HSNFM 2 | CAGCTGCTTTAAGACAAGGGGTGGGGGAAGGGGAGGGAGGCAAGAAAAGATGAGGGTGGG 3 | GGAGGGGAAAAGAGGGAATGCAAGGGGAAGGAGGGAGGAGACGGGGAGAAGGAAAGATTG 4 | GAAGAAAAGGATCTCCGAGGAAGGGGCTGAGAGAAGGGCAGGGTGAACTGGACTAAAGGC 5 | CAGAGTAGGAAGGAGAAGAGGGGCCAAAAAAGAAGGGGATGAAATTAAGCACAGAAGATG 6 | GGTAAAGAAAAAAGTATCAGGGAAAGGGCAAAATAAGAGAAAGCCTTGAGGATAAGAGGG 7 | TAGAAGGCTAAAGAACAAGGGGACCACGGGGTCGGGGAAGCGCTGCCTGAACGGCGGGAC 8 | AGTGACAAAAGAAAGGGCGCTGGCGATATTCCGACCAAGGGAAACGCAATCGGGAGGTGA 9 | GAAATCGGGAGGTGAGAAATGGAAAGAAGGCGAATCCGCGGCTACAAGTAGCCTGGGACT 10 | GAAAGGGGACCTGGGGGAGGGGCTGGGCCCAGGGCAGAAAAGTCCAGGTTCCCATGCGGC 11 | CTGGGCCCACGTGGAGCGGGCGCTGAATCACCGTTCAGCCGCCCCCCTCCCCTCCTCCCC 12 | GACCGGTGCCCGCAGTCCCCGCCTCCTCGGCCGCCGCCTCCACGGGGCGGGCCCTGGCCC 13 | GGGACCAGCGCCGCGGCTATAAATGGGCTGCGGCGAGGCCGGCAGAACGCTGTGACAGCC 14 | ACACGCCCCAAGGCCTCCAAGATGAGCTACACGTTGGACTCGCTGGGCAACCCGTCCGCC 15 | TACCGGCGGGTAACCGAGACCCGCTCGAGCTTCAGCCGCGTCAGCGGCTCCCCGTCCAGT 16 | GGCTTCCGCTCGCAGTCGTGGTCCCGCGGCTCGCCCAGCACCGTGTCCTCCTCCTATAAG 17 | CGCAGCATGCTCGCCCCGCGCCTCGCTTACAGCTCGGCCATGCTCAGCTCCGCCGAGAGC 18 | AGCCTTGACTTCAGCCAGTCCTCGTCCCTGCTCAACGGCGGCTCCGGACCCGGCGGCGAC 19 | TACAAGCTGTCCCGCTCCAACGAGAAGGAGCAGCTGCAGGGGCTGAACGACCGCTTTGCC 20 | GGCTACATAGAGAAGGTGCACTACCTGGAGCAGCAGAATAAGGAGATTGAGGCGGAGATC 21 | CAGGCGCTGCGGCAGAAGCAGGCCTCGCACGCCCAGCTGGGCGACGCGTACGACCAGGAG 22 | ATCCGCGAGCTGCGCGCCACCCTGGAGATGGTGAACCACGAGAAGGCTCAGGTGCAGCTG 23 | GACTCGGACCACCTGGAGGAAGACATCCACCGGCTCAAGGAGCGCTTTGAGGAGGAGGCG 24 | CGGTTGCGGGACGACACTGAGGCGGCCATCCGGGCGCTGCGCAAAGACATCGAGGAGGCG 25 | TCGCTGGTCAAGGTGGAGCTGGACAAGAAGGTGCAGTCGCTGCAGGATGAGGTGGCCTTC 26 | CTGCGGAGCAACCACGAGGAGGAGGTGGCCGACCTTCTGGCCCAGATCCAGGCATCGCAC 27 | ATCACGGTGGAGCGCAAAGACTACCTGAAGACAGACATCTCGACGGCGCTGAAGGAAATC 28 | CGCTCCCAGCTCGAAAGCCACTCAGACCAGAATATGCACCAGGCCGAAGAGTGGTTCAAA 29 | TGCCGCTACGCCAAGCTCACCGAGGCGGCCGAGCAGAACAAGGAGGCCATCCGCTCCGCC 30 | AAGGAAGAGATCGCCGAGTACCGGCGCCAGCTGCAGTCCAAGAGCATCGAGCTAGAGTCG 31 | GTGCGCGGCACCAAGGAGTCCCTGGAGCGGCAGCTCAGCGACATCGAGGAGCGCCACAAC 32 | CACGACCTCAGCAGCTACCAGGTAGGAACCGCGGCCTCGGCCAGCCTCGGCCACGGCCAC 33 | GCCGCGCGCCCCCGACACTTGGGCTCGTGCCCAGGCGCCCTCTCCGCCGCGCTCCCTGGT 34 | GGCCGCTCGCTAGAGCACGCGCGCCGCAGACCTAGGGTATTTGCGGATCAGCGTCCTCGC 35 | CCATCTCATCCTCCACACTCCGCCCCCACCCACCTGCCCCAGCTGCTAAGGGTCTTGACC 36 | TTTTTCAGAAACGTGCATCTTTTCCCAGTTCTAATTTTGCACGCTTGCACGTTTAAAGCA 37 | GGAGGGATGAATTCGGTAGTGGATAAATCAGCAACTTTAGGATAGCTTATGCAGAAACGC 38 | GTGTATTCTCTACTTTTCCGGCAGTGATCGGAAGAGCTCTCAAAATTGGCTTCAGCCAAA 39 | GGGCTCAGATGGGAATGGCCAGGTCAGCCATGGAGTTTCCCCATGCATGTTTGTGTCCTG 40 | TTGAGACGTGTTCTAAGTCCACTGGTCTCCGTGCGTGATGTGCCCAGGAAGTGTCCTATT 41 | GTCTTACTGATCTTGTATCTTCATTTGAGAATCGCTTAGATTTAAAAGAAAAAGGGGGTG 42 | GGACGGGGGGCTGGGAGTCAGGTGTCAGCGAGGTTTGCAGAAGTGGAGGGAGACGGGAGG 43 | AGGCCAGGGGGAAGGGGTAGCAAGTGGTTTGCGAAGGAAGTTGCTGTTTGCAAGGATGAG 44 | TCTGGGGAGATTCTCTGTGTCTGTTTCAGGACACCATCCAGCAGCTGGAAAATGAGCTTC 45 | GGGGCACAAAGTGGGAAATGGCTCGTCATTTGCGCGAATACCAGGACCTCCTCAACGTCA 46 | AGATGGCTCTGGATATAGAAATCGCTGCGTACAGGTACGATGCTTACTACGTGCGTGGCC 47 | GGAACACTAACCGCAGTGCAGAGGCTGTTCCGGCAGAGCTTCCACCACTTAAGTTAAAGC 48 | AGGCAGGGTGCAGGCATCAACTCAGCACCTGGTTATCTTGCTTACTTAAAAAGAAATTAT 49 | TCTAAAGAATTGCAAGTGTAGTTTTATCTCTTTTTATGCAGCTTTAAAAGAATGAATACT 50 | AGTAGAAACAAAAGGTTTTTGAATTACACAAAGGAGGTGCAGATTAATCTCAATGCACAT 51 | GCTTAAACTTTTTATGGAAAAATGTTTTCAAATGCTGGAAGCATGAACAGAGTTTTGGTT 52 | TCTAATATTTCATCTAGTGGTTTCAGCTTTTCAAATGTATAATGTCAAGGACAAACACCA 53 | GGACGTTCTATTTCTCTGTTTCTCTGTTATATAGCTTACTATTGCCATCATCTGGCTGAG 54 | AATAGATATAGAATGATAGAATATAGATATAGTTCTTTTATATATGTAGATAATTTATAT 55 | GTATTATATTTTATGCTAGACTGTAGTATAAATTATATCAATATATCATGTATGATATTA 56 | ATCTAGATCTATAGATACACATATGTGCATATGCATATAAATCTAGATATATAGACACAA 57 | ATATATATGATCGTTTTATAGATAGTGAGATAGGTTATAGGTCTATTAACTGAAGTGACC 58 | TTGCTGTTGAGTAAGCGCAAAGGACAAAATCGTTGATTAAAATTTTTCTGCTACCAATAA 59 | GGTAGTTATAATATAACGAGAATAAATTGCATTTACAGAGCTATCTCTCTTTTCAGGAAA 60 | GCTGAATAACTACATTAAATAGACACTTTATGATAAAAATTATCAACAAATTTATAACTC 61 | GATACACCTGAAAATCTAAACGTTTAAGAAAGTGACTACTCTCAGAAAGGCTGTTTGGCT 62 | TTGGAGTTTGGGGGCGTTTTGTTTATGGCTTCTTGTTTTTTTGTTTTTGTTTTTGTTTTT 63 | TGCTATTTGGCCACTAACAAGTTTTTCAGCATATTCATGTTGTACCTAATGGATCTCTAC 64 | TGCAGGGCCAAGACTTAGTAGCTGGGTGTGGTTAGTGGACTATTGGGCAAGGTTAGTCAT 65 | TGTAGGGGGCAACTGTCTGGCAGTCCAGGAGAATCTTTCTCTGTCACTGAGTATAATGTA 66 | ATATGCCAGTAAGTGATAGCAGGTATTATAGTGAATTCATAGAATATTCTACTTATGTAA 67 | TTCTATTTATTCAAAGGTAGCTACCACAATACCCAGAATGTAATGAAGCTCAGAAGGCCT 68 | AGTGAAATTTTTACTATGTCTTATGTTCTTGGATTTTCTCCTTAGAAAACTCCTGGAGGG 69 | TGAAGAGACTAGATTTAGCACATTTGCAGGAAGCATCACTGGGCCACTGTATACACACCG 70 | ACCCCCAATCACAATATCCAGTAAGATTCAGAAAACCAAGGTGGAAGCTCCCAAGCTTAA 71 | GGTCCAACACAAATTTGTCGAGGAGATCATAGAGGAAACCAAAGTGGAGGATGAGAAGTC 72 | AGAAATGGAAGAGGCCCTGACAGCCATTACAGAGGAATTGGCCGCTTCCATGAAGGAAGA 73 | GAAGAAAGAAGCAGCAGAAGAAAAGGAAGAGGAACCCGAAGCTGAAGAAGAAGAAGTAGC 74 | TGCCAAAAAGTCTCCAGTGAAAGCAACTGCACCTGAAGTTAAAGAAGAGGAAGGGGAAAA 75 | GGAGGAAGAAGAAGGCCAGGAAGAAGAGGAGGAAGAAGATGAGGGAGCTAAGTCAGACCA 76 | AGCCGAAGAGGGAGGATCCGAGAAGGAAGGCTCTAGTGAAAAAGAGGAAGGTGAGCAGGA 77 | AGAAGGAGAAACAGAAGCTGAAGCTGAAGGAGAGGAAGCCGAAGCTAAAGAGGAAAAGAA 78 | AGTGGAGGAAAAGAGTGAGGAAGTGGCTACCAAGGAGGAGCTGGTGGCAGATGCCAAGGT 79 | GGAAAAGCCAGAAAAAGCCAAGTCTCCTGTGCCAAAATCACCAGTGGAAGAGAAAGGCAA 80 | GTCTCCTGTGCCCAAGTCACCAGTGGAAGAGAAAGGCAAGTCTCCTGTGCCCAAGTCACC 81 | AGTGGAAGAGAAAGGCAAGTCTCCTGTGCCGAAATCACCAGTGGAAGAGAAAGGCAAGTC 82 | TCCTGTGTCAAAATCACCAGTGGAAGAGAAAGCCAAATCTCCTGTGCCAAAATCACCAGT 83 | GGAAGAGGCAAAGTCAAAAGCAGAAGTGGGGAAAGGTGAACAGAAAGAGGAAGAAGAAAA 84 | GGAAGTCAAGGAAGCTCCCAAGGAAGAGAAGGTAGAGAAAAAGGAAGAGAAACCAAAGGA 85 | TGTGCCAGAGAAGAAGAAAGCTGAGTCCCCTGTAAAGGAGGAAGCTGTGGCAGAGGTGGT 86 | CACCATCACCAAATCGGTAAAGGTGCACTTGGAGAAAGAGACCAAAGAAGAGGGGAAGCC 87 | ACTGCAGCAGGAGAAAGAGAAGGAGAAAGCGGGAGGAGAGGGAGGAAGTGAGGAGGAAGG 88 | GAGTGATAAAGGTGCCAAGGGATCCAGGAAGGAAGACATAGCTGTCAATGGGGAGGTAGA 89 | AGGAAAAGAGGAGGTAGAGCAGGAGACCAAGGAAAAAGGCAGTGGGAGGGAAGAGGAGAA 90 | AGGCGTTGTCACCAATGGCCTAGACTTGAGCCCAGCAGATGAAAAGAAGGGGGGTGATAA 91 | AAGTGAGGAGAAAGTGGTGGTGACCAAAACGGTAGAAAAAATCACCAGTGAGGGGGGAGA 92 | TGGTGCTACCAAATACATCACTAAATCTGTAACCGTCACTCAAAAGGTTGAAGAGCATGA 93 | AGAGACCTTTGAGGAGAAACTAGTGTCTACTAAAAAGGTAGAAAAAGTCACTTCACACGC 94 | CATAGTAAAGGAAGTCACCCAGAGTGACTAAGATTTGAGTCCATTGCAAAAGGTTAAGCC 95 | ATATGACAATTTCAAAATGCATGTGATTGGCAGCTTCAAAACAGAACGGGTTCTCCCATG 96 | GGGGCTCCAGACATTGTATTTTACTTTGTGCAATATGAGGGGACTGCATGCAAGCTCAGG 97 | GTGCTCCCTCCTCAGTCTTTGGGGGATTCAAATGCATGATATTGTATGTACCTGGGAAAT 98 | TTGCCGATTTCCTAAGCTGTTGGAAGGGGGTCACTTAAGGGGGGATGTCTTGAGATGTAT 99 | TATGCAAAGTACCAACTGAGCCAAAAACAATAAATGAAACACAGAACTCAGCCTTAAGAA 100 | AGCTATATATGAATAATTATGTTTACCTCACTGGTGCATTTAAAATGGACTTTTGTTCAT 101 | GGGAGAACCTCGTTGACATGCACAGTTTGCAATCTTATGTTGATCGATGTTAAACGTCAC 102 | AGCAGTACTTGCTCAATAAAGGTCATATTGGAAACATAGTCAATTGCTGAGTCTTATGTC 103 | ATTTCTCTTTTTCTAATTTTTATTTATTTATTTTTATTTAGAGATGGGGTCTTGCTATGT 104 | GGCCTCAAGCAGTCCTCCCACCTCAGCCACCCAAAGTGCTGGGATTACAGGCATGAGCCA 105 | CCACGCCCAGCCTGTTATGCCATTTCAAAGTGAAATCTCCACTACCTGAAGCTTGC 106 | >HSODF2 107 | AAGGTGAGTGAAATCTCAACACGAGTATGGTTCTGAGAGTAGCTCTGTAACTCTGAGGAT 108 | GGTCTCTGGAGACCATGACTGTGTACAGTTCACACGGTAACCAGAAGACTATGACATACT 109 | TCAGAAGGTGGTGAGGTCATAGAACACAAGCTTTAAAGTAAGTGAATCATGTGTGCCTCA 110 | TTTATTTTTAAAAGCAACTTCTGAGAAGGGCTTAGAACAAATTTTTTCCCTGAGTGCCAT 111 | TTCCCAAAGGTACTCACAGAACAATCAGGTGTGACCATAATGGCTGCACTGAGTTGTCTC 112 | TTGGACAGTGTCAGAAGGGACATAAAGAAGGTGGACAGAGAACTAAGGCAACTGAGATGC 113 | ATCGACGAATTTAGCACACGGTGCCTGTGCGACTTGTATATGCACCCCTATTGCTGCTGT 114 | GACTTGCACCCATATCCGTACTGCTTGTGCTATTCCAAGCGATCACGCTCTTGCGGCCTG 115 | TGTGATCTCTACCCATGTTGCCTGTGTGATTATAAGCTTTACTGTCTGCGACCATCTCTC 116 | AGAAGTTTGGAGAGGAAAGCCATCAGAGCCATAGAAGATGAGAAGCGAGAGCTTGCCAAG 117 | TAAAATAACTTATTTTTAAATTTTTATAGTCGGTATATTAGCCTTATAAGTTGGAATAAG 118 | GAAAAATATGTGACAATCAATAGTTGAACAAAGATTAAAAGGGTTCAGGATAATGACCCT 119 | GGTGACTTAGGAAAGATTTAAGGCTTGCTATTCAAAGCTAGAAATCAAATATTGCTTCTC 120 | AAGCTGTGTCTGGATTCTGAGGTCTGAGCTCCCTCCCACCCTATCCCTTTTCTCAATTCC 121 | AGACTGAGAAGAACAACAAATAGAATTCTGGCTTCCTCCTGCTGTAGCAGTAACATTTTA 122 | GGATCGGTGAATGTATGCGGTTTTGAACCCGATCAAGTCAAAGTTCGAGTGAAGGATGGA 123 | AAGGTATGTGTGTCGGCTGAGCGGGAGAACAGGTACGACTGCCTTGGATCGAAAAAGTAC 124 | AGCTACATGAACATCTGCAAAGAGTTCAGCTTGCCGCCCTGTGTGGATGAGAAGGATGTA 125 | ACATACTCCTATGGGCTCGGCAGCTGTGTCAAGATCGAGTCTCCTTGCTACCCTTGCACT 126 | TCTCCTTGCAGCCCGTGCAGCCCGTGCAACCCCTGCAACCCCTGCAGCCCCTGCAACCCG 127 | TGCAGCCCATATGATCCTTGCAACCCGTGTTATCCCTGTGGAAGCCGATTTTCCTGTAGG 128 | AAGATGATTTTGTAAAGTGCGCATAGGAACCCATTACTTAATAGAAGTCAGTTACTCCAG 129 | CCAGGCAGCTCTCCCAATGTTTCTCCTCTCCTTCCCATGGCCCCTGTTGTTGAAGTACGT 130 | AGGAAACTGAATACATAACTGCAATCTGCTGGTGTTGTCTGAAAGTCTTT 131 | >HSPAT133 132 | TGCAGTTGTCTATCAAAATTTTACATATACTTTGGCACACAATTACACTCGTTGGAATTT 133 | ATCTCACAGACACATTCACATTTATATAGTCAAGGATGTTGACTCCAACAACATTGGTAT 134 | AAGTAAAAAGTTGGAAACTGCCTACATGCTCATGGATAGGGGATAAATCGACTAACTCAA 135 | GGTCCTCCCTTACAATGGAGTACTATACAGCCTTGAAAAAGAATGACAAGATCTTTATGT 136 | CAGGACATGGCACAATCTCTAAGTTATTTCAAGTGAAAAAATAATAATAATAATAATATT 137 | GCAGAACAATGTGTATGGTTGGATATACATAAATTCTTCTGAAAGAATACCCAAGAAACT 138 | TAAATGTGGCTACCTCAGGAGCTAGGAATGAGGTAGGTGGAGAGGAGGGACTTTTACTTT 139 | TCAATTTAGAATATACCCTGTTGTGCTAGTTGACTTTCCTCCTACTGTTAACAAGTATTA 140 | CAGTCCTTTTTAATTTAAATTATTTTTCCTAGCACAAATTTAAAAAAAAAAAACTTAAAA 141 | TGGAAAAACACACAGGCGTGAGCAAAAGAAATGAATGGCAATAATCTCTGTGGTTTACTG 142 | AGTGCGCCTGGACATTTATGTGCTCTCAGTTTCTCTCCACAGGAAATGCACAGGTGAGAA 143 | ACTGACGTTAAGGGGGACTGAGTGTCAAGCTAGTTAGTGGCAGAGGGCAGATTCAAACCC 144 | AACACGGTCCTCCCCTGCTGCCCCTCGGCCTCTGCCTCCAGGTGGGAAGCGCATCTACCG 145 | GACGGTCGGCCCGGTGAGGCGCAGCGCCCCAGACTGGCGCATCCGCGGCCCCAGCGCTCC 146 | ACGCCTGGGGAGCGCGCGCGCACGCAGCGGCGCGAGCCTGGCGGCGGCGGCGACAACAAC 147 | AACGTCACAGCTCGAGCTTTCCTTTTCGGAGTCCCCGGCACACATCCTGTGTCCATGTTT 148 | GGGCATTTACGTCACGGCGGCAGGGCCGGGGCCTCCCAAAATGGCAGTGGCCCGGGGAGT 149 | CGGAAGCCCGGAGCCAGCGCCGCCGCAGCTATATAAGTGGGGGGGCTGTGGGCTGGGGGA 150 | GCCCGGCAGCGCTTTGGAGAGGCGAGGAGCCGCCGCCCGAGGCCGGTGCGGGCGAGCGAG 151 | GGCGCCGCGGCTCCCCGACTCCTTTCCCAGAGGTGAGTGCCCGAAGCCAGGAGCCCGGCG 152 | CCCATAGGTCTGTGCGCTGCGGGGAACCCCTACCGCCAGCCTCCCCGCCACCCGCGCGCC 153 | CCCAAGCCCAGCGGGCGAGGCCCCGGGCGCCCCACAGCCGGCGCCGCGCCATGCTCCACC 154 | TTAGCGAGTTTTCCGAACCCGACGCGCTCCTCGTCAAGTCCACTGAAGGCTGTTGCGCCG 155 | AACCCAGCGCTGAATTGCCCCGGCTGCCTGCCAGGGACGCTCCCGCGGCCACCGGCTACC 156 | CTGGAGGTAAGGAGGGCGAGCAGGGGTGCTCAGACGACGACGGCGCAGCGCGGGGGCGCA 157 | CCATACCTGAGAACCAGGAGGGACTGGGACATTGGAGCTATGAGAATAGGGGCGATGGGA 158 | AGCTTAGGAGTCCTGGGGTGCGCACCCCCATTCCCACCCACACTGGGCCGCCAGCGCCTC 159 | CGCAGGAACCTGTGCGGTTATCGGAGCGCCCTTTTGCCTGTGCACGTGTGTTTTGCGTGT 160 | GCATGTTTCTATGTGCTCCTTGGGACGTATGCGGGCCCCTGCTGAATCAGAATGTGCAAA 161 | AGGCACTTTGTGTATATCCGTGGGCACCAAGAGTTTTGTAGGTAGGGGCTGTGGACTCAG 162 | GTGCACCCTTTGATGTGCCCAGAGCTGATTCCTGCTCCCTCCTCAGCAGGCGACTTCTTG 163 | AGCTGGGCTTTGAACAGCTGCGGCGCAAGTGGGGACTTAGCCGACTCCTGCTTCCTGGAG 164 | GGGCCTGCGCCCACACCCCCTCCCGGCCTCAGCTACAGCGGTAGCTTCTTCATTCAGGCA 165 | GTGCCCGAACACCCGCACGACCCGGAGGCACTCTTCAACCTCATGTCGGGCGTCTTAGGC 166 | CTGGCACCCTTCCCCGGTCCAGAGGCAGCAGCGTCCAGATCCCCGCTGGATGCCCCTTTT 167 | CCTGCGGGGTCCGATGCCTTGCTGCCGGGTCCGCCGGACCTTTACTCCCCGGATCTGGGC 168 | GCTGCCCCTTTCCCAGAGGCGTTCTGGGAGGCCTCGCCTTGCGCGGGTGCCCCCTCGCAG 169 | TGCCTGTATGAGCCTCAGCTCTCCCCGCCCGACGTCAAGCCCGGCCTCCGGCGGCCTCCC 170 | GCCTCGCCAGCGCTGGACGCTGTCTCTGCCTTCAAGGGTCCCTACGCGCCCTGGGAGCTG 171 | CTTTCTGTGGGGGCCCCAGGGAACTGTGGGTCACAGGGAGACTACCAGGCCGCCCCGGAG 172 | GCTCGTTTTCCCGTAATAGGGACCAAGATTGAGGACTTGCTGTCCATCAGCTGCCCTGCG 173 | GAACTGCCGGCCGTCCCAGCCAACAGACTCTATCCCAGCGGGGCCTATGACGCTTTCCCG 174 | CTGGCCCCGGGTGACTTAGGGGAGGGGGCTGAGGGCCTCCCTGGGCTCCTGACCCCTCCT 175 | AGTGGGGAGGGAGGGAGTAGCGGCGACGGCGGAGAGTTTCTGGCCAGTACGCAGCCTCAG 176 | CTTTCCCCGCTGGGCCTTCGCAGCGCCGCCGCGGCGGACTTCCCTAAACCTCTGGTGGCG 177 | GACATCCCTGGAAGCAGTGGCGTGGCTGCACCACCCGTGCCGCCGCCGCCGCCCACCCCT 178 | TTCCCCCAGGCCAAGGCGCGACGCAAGGGGCGCCGCGGCGGCAAATGCAGCACGCGCTGC 179 | TTCTGCCCGCGGCCGCACGCCAAGGCCTTCGCTTGCCCGGTGGAGAGTTGTGTGCGGAGC 180 | TTTGCGCGCTCCGACGAGCTCAATCGCCACCTGCGCATCCACACGGGCCACAAACCCTTC 181 | CAGTGCCGCATCTGCCTCCGCAACTTCAGCCGCAGCGACCACCTCACCAGCCACGTGCGC 182 | ACCCACACCGGCGAGAAGCCTTTTGCTTGCGACGTGTGCGGCCGCCGCTTCGCGCGCAGC 183 | GATGAGAAGAAACGGCACAGCAAGGTGCACCTCAAGCAGAAGGCGCGCGCCGAGGAGCGG 184 | CTCAAGGGCCTCGGCTTTTACTCGCTGGGCCTCTCCTTCGCTTCTCTCTGAGCAAGAGAT 185 | GGGTTTATGGGTTGGGGCGCCGCCGTTCGGCGCGCACGAGTTCCGGGCCGTTCCCCTCCC 186 | CGCTCTTCTTCCAACTCCTCCTCGCACGCCCGAGGGCCGGCCTCCGTCCCGCTTCCAGTT 187 | TCCTTGAAGCGCCCGCCGCACACGCCCTATTCAGCACCAGCTCGCCGGACAGTTCCCGCG 188 | GTCCAGGCGCTGTCACCCTTGTCAGCCGCGCTTTGGGGGAAGTCTTCTGAGACCACCCAG 189 | TGAATAGGCACTACCCTGGGATTCAAGACAGTCTTTTGTAACTGCACACGCCCCACGCCT 190 | TCCTCTATAACCCCCAGAGACAGGCTGGGGCAGGCCAAGGCGGTCTCGCGCGGGACTTTG 191 | TACAGCAGTGTCTTATCCAGCAGCCGATTGGATGTAACGTTTTGCTTTGGGTTTTTTTTC 192 | CTTTTGTTGTTGTTAATTTTTGTAAAGCAGACGCTACTCTCAAGCAGTTGACAAAACTGT 193 | TTATTTTTGCAATTAAAATTATTGTGCTAAAAGCTTACTGAATCTGCCATGTAAGCTCCT 194 | >HSPLAPL 195 | GAGTACAGGTGTCTTGTCACACAGTGAGCAGGGTTGGGGAGGCCCTCGGCGGAGATCGCA 196 | CACTCGACTATACCCAAAATCCCACCCTTCCCTGGGACACCTGGTCCCACCCTAAGCTGC 197 | CTTTCTCAGACCCCAGCCCCAGCCCAGCCCAGCCACACCCTGCCACTCCCTTCAGCCAGT 198 | GTGGCTTCAGGTCAAGAGGCTGGGCGGGGTCAAGGTGGTAACAAGGGGAGGGGCCAGGAC 199 | ACAGTTTTCCCTGATTTAAACCCAGGCAGCCTGGAGTGCAGCTCATACTCCATACCTGGA 200 | TTTCCGCCTCGCCGCTCTCCCGACAGCTTCCAGACATGCAGGGGCCCTGGGTGCTGCTCC 201 | TGCTGGGCCTGAGGCTACAGCTCTCCCTGGGCATCATCCCAGGTAATGAGGCTCCCCCAG 202 | CTGCCCCTACAGGGCACCCCCCAGCCCAGGCTGACCTGATCTTTGCTCTCCCCCTGGCCA 203 | GTTGAGGAGGAGAACCCGGACTTCTGGAACCGCCAGGCAGCCGAGGCCCTGGGTGCCGCC 204 | AAGAAGCTGCAGCCTGCACAGACAGCCGCCAAGAACCTCATCATCTTCCTGGGTGACGGT 205 | GAGTGAGCCAGGCCTTCCAGCCCCGCAGCCCTCACAGCCCCGGCGCCCGGACCCTCAGTG 206 | GTTCCAGGACAGCCCTGGGGAGCAAGCCTCACACACTTCTGCTCCTTCAGGGATGGGGGT 207 | GTCTACGGTGACAGCTGCCAGGATCCTAAAAGGGCAGAAGAAGGACAAACTGGGGCCTGA 208 | GACCTTCCTGGCCATGGACCGCTTCCCGTACGTGGCTCTGTCCAAGGTAAGTGCTGGGCT 209 | ACCTTAGAGTCCTCCAAGCAGAGAAGGGGAATCTGGCTATGGAGTGTGGTAGGAGGGAGG 210 | GACCCTAAACAGCTGGGGCTCCAATAAGGAGCTGGAGGCAGTTGGAATCCCAGAGGACAG 211 | AGATCAGGGTCTTGTTTGTCGTCCCCAGAGAAGAGCTCAGAGTGTCTCTGTCCCAGACAT 212 | ACAGTGTAGACAAGCATGTGCCAGACAGTGGAGCCACAGCCACGGCCTACCTGTGCGGGG 213 | TCAAGGGCAACTTCCAGACCATTGGCTTGAGTGCAGCCGCCCGCTTTAACCAGTGCAACA 214 | CGACACGCGGCAACGAGGTCATCTCCGTGATGAATCGGGCCAAGAAACGAGGTGACGTGG 215 | GGCCTCGTGTGGGGTCAGGGCCAGGTGACAGACCTCTATCTCATATCTGACCTCTATACC 216 | TCAGGAATCTGTGGAGTGGTAC 217 | >HSPRB3L 218 | CCTAATGTGATACTCTCTGTACACACACATCACGAGAGATGGAAATAGGCTATAAAAACA 219 | GCCAGAGATATTGATCATCATCACCGCTGGCTGTAGCTTCACTGCCCACACACACATACA 220 | CAGAAATAAATAGCAAAGTTGAAAGTGGACAATAGTCTTACCAAAAAGATTGAAACTCTA 221 | ATGACGTCTGAAGTTCAATTATGACACCGTGCTGATTGCTTGGACACAGTTCCTTTAAAA 222 | CCTTTGTTCAAGTCATAGTTGTACCTTTTAGAAATAAGGTACAAACAACATCCAACCCAA 223 | CCTGTTCCTCTGGGCTAGAGTCCCAGACAGAAATAAGGGATACACCTGACCTGCAGTAAG 224 | GAAAGCAGAACCCAGTCTCTGGGTGGTGAGGCCCACCCAGTGTGGAGCTCAAAGGTGCCA 225 | CTGTTCTGCTCCTCTTTATAAAGGGAGCTGCCATGGTTCTCCCAGCACAGAGTTGGGAGT 226 | GACTCCAGAGCCTCCAGCAAGATGCTACTGATTCTGCTGTCGGTGGCCCTGCTGGCCCTG 227 | AGCTCAGCTCAGAGCTTAAATGAAGGTAAGACAGAAGGAGGAGAAGATGTGGTGACTCTG 228 | CTTGGGGCTTAGGAGGTGATAGTTGTAATTATGGGGAAGAGAGGAGAATGAAAACACAGA 229 | TGGGGCTGCAGAGGTTTCATGCCTAGGATCAGGAGACCTGTTGTGCCCTCATTCCACACC 230 | AAGGGCTTCTAATTTATTTAATGTACAATGAAAATCCAATAAAGAATTTGTACCAGGGGA 231 | ATGAGAAGGTAAGATTTGCATTTATAGAGAGATAGAACTGTGCTGTGAAGGATGCAGTGG 232 | TGAATGCAAGGCAGATTCAGGAAAGTCCAGCTGTGAAGATCCTAAACTGATCTCAGTAAG 233 | TACACAGGGATGATGGTGGCCTTGCTCTACCTAGTGGATCAGCATTGATGATGGAGATAA 234 | ACACACATTAGAGATACTGCAGAGACAGAACTGGATAAAACACTTGCCTGTGTCTAACTA 235 | CAGATATAGAAATATCAGATTCAATCATTGTGATTTTTCTTTCCCCTACACACAGTATTT 236 | CAATGTGCTGGGAGTGGTATAGGTAAGATTGTATTGAAACAATTACTTCTGGTTACCGAA 237 | TTGAGAAAGCTTGTGAATATAGCAATGTATTTATAGGAGATGGAGGGCATAAGAATACCA 238 | AAATATCACATTGAAGTACCTGGCATGTGTAAACTAAATTAGCATTAAGTCTTGAAGGAT 239 | GCTAGGGAAGAAAAAAAGGGGCTCTTCTATATTGAGTTCATGGCTGTTGCTCTGTGTGGT 240 | AACAACCCTGCCTCCCCTTACACCTTCCTCCCCTTCCAGCAGCTTCACAGATGGTAGCTG 241 | ACGAGCTAACCTAGGGGATGCATGGGGTGTGGTGAGAAGTCCCCTTTCCCTGTAGAACAC 242 | CTGTGAATCTTGCAAAATTCGAGATGTAACCTTTCCCATCATCCTGTGCTTCTCTTCTAG 243 | ATGTCAGCCAGGAAGAATCTCCCTCCGTAATATCAGGTAAATCCCAATTCACTCTCAATC 244 | TGTTTTAACTATCTTCTTCTGCTTATGAATGGATCAGTTCTCCAGTGTCTTCTCACCAAC 245 | ATTTTCCTTTAGAAATTGATTAATATTAGTCCCCCTAATAATGCAGGCAATCTTCATGCA 246 | ACCTTGATTTTGGGGACCATGAGCAGGGCCACCAAATTGAATGGCAGAGATGCTTGGCTT 247 | AGATGAAAACGGGAGTGGGTTGACTTCCCTCCTGCCAGGAGTGCCTGCTGGGAGATGACA 248 | GACAAATGGCCAGTGTCCTTATTCTGACTCCTCCTTAGACTGAGAGCCCCTCAACTGCTT 249 | CCTTCTTCTCCAGCATTCCACTCCAGAGTTCTAGAGCTTCACTGAAAATGCAAAGAAATT 250 | AGTGTCTGGGTCTTATTTTTGTGCATTTCCCCATTTAGCTGCGTTACTGTAAAAATTTGC 251 | GGCAACTATTCAGTGAATGCCGTGTGTCCCCCACCTCCTCCAGGAAAGCCAGAAGGACGA 252 | CGCCCACAAGGAGGAAACCAGCCCCAACGTACCCCACCTCCTCTAGGAAAGCCAGAAGGA 253 | CGACCCCCACAAGGAGGCAACCAGTCCCAAGGTCCCCCACCTCGTCCAGGAAAGCCAGAA 254 | GGACCACCCCCACAAGGAGGAAACCAGTCCCAAGGTCCCCCACCTCGTCCGGGAAAGCCA 255 | GAAGGACAACCCCCACAAGGAGGAAACCAGTCCCAAGGTCCCCCACCTCGTCCGGGAAAG 256 | CCAGAAGGACCACCCCCACAAGGAGGAAACCAGTCCCAAGGTCCCCCGCCTCGTCCGGGA 257 | GAGCCAGAAGGACCACCCCCACAAGGAGGAAACCAGTCCCAAGGTCCCCCGCCTCATCCG 258 | GGAAAGCCAGAAGGACCACCCCCACAAGGAGGAAACCAGTCCCAAGGTCCCCCACCTCGT 259 | CCGGGAAAGCCAGAAGGACCACCCCCACAAGGAGGAAACCAGTCCCAAGGTCCCCCACCT 260 | CGTCCGGGAAAGCCAGAAGGACCACCCCCACAAGGAGGAAACCAGTCCCAAGGTCCCCCA 261 | CCTCGTCCGGGAAAGCCAGAAGGACCACCCCCACAAGGAGGAAACCAGTCCCAAGGTCCC 262 | CCACCTCGTCCAGGAAAGCCAGAAGGATCACCTTCACAAGGAGGCAACAAACCTCGAGGT 263 | CCCCCACCTCATCCAGGAAAGCCACAAGGACCACCCCCACAAGAAGGCAACAAACCTCAA 264 | CGTCCCCCTCCTCCAAGAAGGCCACAAGGACCACCCCCACCAGGAGGCAATCCCCAGCAG 265 | CCTCTGCCACCTCCCGCTGGAAAGCCCCAGGGACCACCTCCACCTCCTCAAGGGGGCAGA 266 | CCACACAGACCTCCCCAGGGACAGCCTCCCCAGTAATCAAGGTTCAATGACAGGTATGAT 267 | TCCAGTTTATTCTTCACCAAGTGCTCTACAGTGTTACAGTTCTCCAACTTTATTGTGCCA 268 | ATGAATCAACTAAAAACCCATTGACATTGTATTGTCCTAGAACCCATTTCTAAAGATTTG 269 | TATTCAGATACTCTGGAATAGGGTAAGAAGACCCTGTATTTCTAACAAACTCTTTAAGGA 270 | ACTCTGATGTTGAGAAACAACATACCATACAATCTGTCTTAAATTGTGTTGGCAATGAGG 271 | AAGTAGTACCATGTCCATTCTTGGTGCTCTGCTTTCGGTCCACAAACTCAGAGATATTGC 272 | ATTTAAATTTTTCACCTGAGCACTGTTTGCTCATTCCTGCCTCACGCCAGCCTCTCGAGT 273 | CCAGTATTCCTGCCAAATGGTCCCTGATCTTTCAGCAGCTAAATGGTGTCTCATTTTTTA 274 | CATACTTAGTTTTCAGTAAGTACATGATTAAGCTAACAAAAGATATCTAATGGAATGGAA 275 | AAATATGAAGTTAATTTTAAAGGCGTAGCTCATCCTACTCACCTTCCTTCCTTCAAAAAG 276 | CTACCACTGTTGACTTTATGGGATCTTCTCTTTGAAATATTTATGTGTGCATAGACATAC 277 | AGCATTCTTTTACCCTACCACTAATACCATAACTTATATGCAGGTATATATGGTAGTCAT 278 | TTAAAAATACACTTCTTGAAAATTTCCACATCAGTTTATGAAGCTAAATACATCTCTTCA 279 | GTGGTTTTCTGTTTGCTTTTACAATTTTATGCTACTCCATTGTGTGGCTGCACCATGATT 280 | TCTTTAATCAATCCCTGTCACTGGATACTGAGGGTGGTTTCATCTTATCACTATTATAAA 281 | ATAGGTTCTGATTATCATATGTGTAAATATATCCCTGAACAAATTCAACAGCAATGAGTC 282 | ATAACAACCTAAGGATGATCTTTTCTCTTCATCTTCTCAGCAACAATTTGGAGCACATTG 283 | TGTGCAAGGGCATCAAAAGAGTGAACACAGAAAAAATCAGGAAAGAATCACAGGAGGTTG 284 | AAGGGATTGGGGAGAGAGGATGGGATCTCATGTACTCTACTGCAGTAACACCAGTGAGGA 285 | ATTCGACATTTCCTGCCATGTCAAGTCTGCTCTATGAAATTCCTTGTTCTGTTTGTTTCA 286 | GGAAGTGAACAAGAAGATGACAGTGATTCAAATGATTCAAATGCCATGACATTGGAAGAA 287 | GGTGGTCATAGCTCTACCTTTAATATACCAATAAAACAAACAGCTTACAATTTCTGATTG 288 | TGGTGTCTCTTTCTCAGTGTTTGTGAATGTGGAGTGTGAGGACCAAGAACACATTATAAG 289 | AACATCTAGGACCCCTTCTCTTTGATGCTTCCAGGGAGATTCCCTCCTCTTTAATCCTAA 290 | TTTAGCCAGCTGCCATGAAAAATATTTTACTGTTTATCTCCTTCCCTGACTTCTATTTTT 291 | TTCTTTTTCTGAAATGGAGTCTTGCTCTATCACCTAGGTTGGAGTGCAGTGGCATAATCT 292 | TGGCTCACTACAACCTGCATCTCCTGGATTCAAGCTATTCTCCTACCTCACCCTTCACAG 293 | TAGCTGGGATTACAGG 294 | >HSPRB4S 295 | AAGCTTGAACAATAGTTGTAGCCCGAGCAGGCTGTGGGGCAGCTTTCTGTCATCTATGTG 296 | TGCCCAGGAGTGTGTTGTCTTCACACCATCACACAGGTAACAGAAACCATCTCTATCGCA 297 | TCAAAACTTTAGCCAATTGTGATACTATCTGTACACACACATCACAAGAGATGGAAATAG 298 | GCTCTATAAAGAGCCAGAGAAATTGACCATCTTCACTGCTGGCTGTAGCTTCACTGCCCC 299 | CACAAATATTACACAGAAATAAATAGTAGAGTGGAAAGCAGACATAGTCTTACCAAAAGG 300 | ATTGAAACTCTAATGATGTCTGAAGTTCAATTATGACACAGTGCTGATAGCTTGGACACA 301 | GTTCCTTTAGAACCTTTATTCAAGTCATAGTCGTACCTTTTAGAAATAAGGTACAAACAA 302 | CATCCAACCCACCCCACCCTCTGGGCTAGAGTCCCAAAGAGAAATAAGGGATACACCTGA 303 | CCTGCAGTAAGGAAAGCAGAACCCGATCTCTGAGGTGGTGAGGCCCACCCAGGGCTCAAA 304 | GGTGCCATTGTTTTGCTCCTCTTTATAAAGGGAGTTGCCACGTTCCTCCCAGCACAGAGT 305 | TGGGAGTGACTCCAGAGCCTCCAGCGAGATGCTGCTGATTCTGCTGTCAGTGGCCCTGCT 306 | GGCCTTGAGCTCAGCTGAGAGTTCAAGTGAAGGTAAAACAGAAGGGGGAAAAGATGCGGT 307 | GACTGCTTGGGACTTAGGAGGTGACAGTGGTAATTATGGGGAAGAGAGGAGAATGAAAAC 308 | ACAGATGGGGCTGCAGAGTTTTCATGCCTAGGATCAGGAGACCTGTTGTGCCCTCATTCC 309 | ACAATAAGGACTTCTAATTTATTTAATGTACAATGAAATCCAATAACGAATTTGTTCCAG 310 | GGGAATGAGAAGGTAAGATTTGAATTTATAGAGATAGAACTGTGCTGTGAAGGCTGCAGT 311 | GGAGAGTGCAAGGCAGATTCAGGGAAGTCCAGCTGTGAAGATCCTATACTGATCCCAGTA 312 | AGTACACAGGGATGATGGTGGCCTTGCTGTACGGACGGTCGGCATTGATGATAGAGATAC 313 | ATACACATCGGAGATACTGCGGAGACGGAACTGGATAGAACACTTATCTCTGTCTAACTA 314 | AAGATGTAGAAATATCAGAGCCAATCATTACAATTTTTCTCTCCCCTACATGCAGTATTT 315 | CAATGTGCTGGGAGTGGAATGGGTTAGATTGTATTGAAATGATTACTTCTGGTTACCCCT 316 | ATTGAGAAAACATGTGTATGTATGCAATATATTAACAGGAGATGGAGGGCATAAGAACAC 317 | CAAAATATCACATTGAAGTACCTGGCATGCATAAACTAAATAAGCATTAAGTCTCGAGGG 318 | ATGCTAGGGAGGAAAAAAAGGGGCTGTTCTATGTCGAACTCATTGCTGTTGCTCTGTGTG 319 | GTAACAACCCTGCCTCCTCTTACACCTTCCACCCCTTCCAGCACCTTCACAGATGGTGGC 320 | TGATGAGTTAACCTAGGGGATGCATGGGGTGTGGTGAGAAGACAATTTTCCCTGTAGAAC 321 | ACTTGTGAGTCTTGAAGATTTGAGATGTAACATTTCCCATCATCCTGTGCTTCTCTTCTA 322 | GATGTCAGCCAGGAAGAATCTCTCTTCCTAATATCAGGTAAATCCCAATTCATTCTCAAT 323 | CTGTTTTGACTCCCTTTTTCTGCTTACAAATGGGTCATTTCTCCAGTGTCTTCTTATCAA 324 | CACTTTCCTTTCAGGAATTGATTAATGTTATTGCCCCTAATGATATAGGCAATCTTCATG 325 | CAAACTTGATTCTGGGACCATGAGCAGGCCACCAAATGGAATGTCAGAGATGCTTGGGTT 326 | GGATGACAACAGGAGTGGGTTGACATCCCCCTGGGAGATGACAGACAAATGGCCAGTGTC 327 | CTTATTCTGACTCCTTCCTAGACTGGGCCTATTCTCCTCCTTAGACTGAGAGCCCCTCAA 328 | CTTCTCCCTTTTCCCCCAGCGTTCCACTCCAGAGTTCTAGGGCTTCACTGAAAATGCAAA 329 | GAAATTAGTATCTGGGTCTCATTTTTGTGCATTTCCCCATTTAGCTCCATTACTGTAAAA 330 | ATTTGTGGCAACTATTCAGTGAATGCCGTATGTCCCCCACCTCCTCCAGGAAAGCCAGAA 331 | GGACGACGCCCACAAGGAGGAAACCAGCCCCAACGTCCCCCACCTCCTCCAGGAAAGCCA 332 | CAAGGACCACCCCCACAAGGAGGAAACCAGTCCCAAGGTCCCCCACCTCCTCCAGGAAAG 333 | CCAGAAGGACGACCCCCACAAGGAGGCAACCAGTCCCAAGGTCCCCCACCTCATCCAGGA 334 | AAGCCAGAAAGACCACCCCCACAAGGAGGAAACCAGTCCCAAGGTACCCCACCTCCTCCA 335 | GGAAAGCCAGAAAGACCACCCCCACAAGGAGGCAACCAGTCCCACCGTCCCCCACCTCCT 336 | CCAGGAAAGCCAGAAAGACCACCCCCACAAGGAGGTAACCAGTCCCAAGGTCCCCCACCT 337 | CATCCAGGAAAGCCAGAAGGACCACCCCCACAGGAAGGAAACAAGTCCCGAAGTGCCCGA 338 | TCTCCTCCAGGAAAGCCACAAGGACCACCCCAACAAGAAGGCAACAAGCCTCAAGGTCCC 339 | CCACCTCCTGGAAAGCCACAAGGCCCACCCCCAGCAGGAGGCAATCCCCAGCAGCCTCAG 340 | GCACCTCCTGCTGGAAAGCCCCAGGGGCCACCTCCACCTCCTCAAGGGGGCAGGCCACCC 341 | AGACCTGCCCAGGGACAACAGCCTCCCCAGTAATCTAGGATTCAATGACAGGTATGATTC 342 | CACTTTATTATTCATCAGGACTCTAATTGCACAGTTCTCCAACTTTATTGTGCCAATGAA 343 | TCAACTAAAACCCATTGACATTGTATTGTCCTGGAACCCATTTCTAAAAATTTGTATTCA 344 | GATACTCTGGAATAGGGTAAGGGGACCCTGTATTTCTAACAAAATCGTTTAAGGAATTCT 345 | GATGTTGAGAAACAACATACCATATGATCTGTCTTAAATTGTGTTGGCAATGAGGAGGTA 346 | GTACCATGTTCATTCTTGGCGTTCTGTTTTCTATCCACTAACTCAGAGACCTCCCATTTA 347 | AAGTTTTCACCTGAGCACCATTTGCTCAGTCCTGCCTCACACCAGCCTCTCGAGTCCAGT 348 | ATTCCTGCCAAATGGTCCCTGATCTTTCAGCAGCTAAATGGCGTGTCACTTTTTAGATAC 349 | TTAACTTTTCAATACGTACATGATTAAGCTAACAAAAAATATCTAATGGAATGGAAAAAT 350 | ATGAAGCTAATTTTAAAGGCCTAACACATCCTACCCCACCTTCCTTCCTTCAAAAAGCTC 351 | CCAGTGGTTAACCTTATGGGATCTTTTCTTTGAAATATTTATGTGTGCATAGACATATAG 352 | CATTCTTTTACCCTACCACTAATGCCATAACTTATATGCAGGTATATATGTTAGTCATTT 353 | AAAAAATACATTTTTTTAAAATTTCCACATCAGTTTATGAAGGTCACTACATATCTTCAG 354 | TGGTTTTCTGTTTGCTTTTACATTTTTATACTACTCTATTGTGTAGCTGTGCCATGATTT 355 | CGTTAACCAATCCCTGTCACTGGACACTGAGGGTGGTTTTAGCTTCTCAGTATTATAGAA 356 | TATGTTCCAGTTACCATCTGTGTAAATATATCCCTGAACAAATTCAACAGCAATGAGTCA 357 | CAGCAACCTAAGGATGGTCTTTTCTCTTCATCTTCTAAGCCACAATTTGGAGCACATTGT 358 | GTGCAAGGGCATCAAAAGAGTGAATCTATGAACTTGCTTGTTTGTTTATTTCAGGAAGTG 359 | AATAAGAAGATATCAGTGAATTCAAATAATTCAATTGCTACAAATGCCGTGACATTGGAA 360 | CAAGGTCATCATAGCTCTAACTTTAATATACCAATAAAATAATCAGCTTGCAATTTCTGA 361 | TTGTGGTGTTCTTTCTCAGTGTTTGCGGAATGTGGAATGTGAGGACCAAGAACACATTAT 362 | AAGAACATCTAGGACCCCTTCTGTCTGATGCTTCCAAGGAGTTTCCCTTCTCTTTAATCC 363 | TAACTTAGCCAGCTGCCATGAAAAATGTTTTGCTGTTTATCTCTTTCCCTGACTTCAATT 364 | TTTTTTTCTTTTTCTGAGATGGAGTCTTGCTCTATCACCTAGGTTGGAATGCAGTGGCGT 365 | GATCTTGGCTAACTGCAACCTGCACCTCCAGGGTTCAAGCTATTCTCCTGCCTCACCCTT 366 | CACAGTAGCTGGGATTACAGGTTCCCACCATCACACCAGGGTAATCTTTGTAATTTTAGT 367 | TGAGATGTGTTTTCACCCTGTTGGCAGGCTAGGCTAGAAATTCTGACTTCAGGTCATCCG 368 | CTTGCCTTGGCCTCCAAATGTGTTGGGATTACAGGCATGAGCCACCACACCTAGCTCCTT 369 | TCTGACTTCTACAGCACAAATTGAAAATCTAAAATTATTTTCAGATTGTTTACTGATATT 370 | CCAGTAATTTTAAGGACAAAAACCACAACAAATGGAAAATAAGTCACAGAAACTAAAAGA 371 | AATCCTTATAATTTCTGAGTTTGGTTTCAAGGGAACAAACAGGGTTCTATGCTTCTTATT 372 | CCCAGAGCCCTCTCTATCCCATTGACCCTATTTTAACAGTGATCACTTCCCTCCCTCCCT 373 | ATGTTCCTCACCTTTCTTTAATGAAACCTGAATGGATTTCATCAAGGAGGCAGCATGACT 374 | TTTAGGAGCAAAGAATTGGGACACTCTCAGATTTTAGTTAAGACATAACTCTTTCTTGCT 375 | AGCCTGAACTCTTAAAAAGCTACTTGGTCTCTCAGAGCTTCAATTTCCTCATCTACAATG 376 | AGAAGAATCAAAACAACTACCTTAGAATATGGAGACTATTCAGATAACATATGTACCAAA 377 | AACCTTGCAGAGATTGGCATGTCTGCTTCTCAAGCAAGGAAGGTTCAATATTAGAAAACT 378 | GCCCCTGTGCCCACCGATAGCCTCAGATAATTCACTATGAATTTCAGAAATTTCAGAATA 379 | GAAGGATCTCACTGTAACCATCACCAAGTTGAGCAACCCACATTCAGTTCAATCCCAGTT 380 | CTCTGACTTCTCTCCTATTATCATAGTTGAAGGCTCCCTACCCCTATCTCTTATCTTTCC 381 | TCTTGATTCTGAACCACATTACCCAGTCAAGGATTTTGCTTCTGTATGTGACCCTTTTGT 382 | CTCCTGATTCTTACATCTATGCTCTATGGGATTACCTCAATCAGCAAAAGCCTGCTGAAA 383 | CATCACCCATTTTTACAGAGATTTTGCTAAGACTCTTAGTGTTTCTTTCCTACCCTATTA 384 | TTTGTCTGCCATTTTTATTGCAAAAGTTCTTGAAACATATGTATGTGATTATTTCCCCAT 385 | CCCCTCCCTTCCATTTTCTTTTTAAACACACATTAAAGATGCTTTTGTTCTTTCCACTCC 386 | AAGTCTGTCAAGGTCATCTACTGCCTGCATTCCACTCATTTCAGGAATCGATTATCAGTC 387 | CTGCATCTCCTGTGACCCCTTGGCAGTTTAACACCATTGATCCTACAATTCTTTTGGGAA 388 | CACTCTATCAATCTTTCCGGGAACCTCCCACTCTCTCCTCGGGTTTCTCCTACCTCTGCC 389 | TCTTCCCTCCTGTAACTACAAAGTTACTCCTCTTCCAACTCTATTTGGTCTTGGTAAATT 390 | ATTCATCTAATTAATTAAGGAAACTAGGATTTATTCTAGACTCTTCTCTTTTCCTCTTAC 391 | ATCACATTACATCTAGTCAAATCAGCTATGTTATCATTGTGAAATTCAAGCTT --------------------------------------------------------------------------------