├── README.md ├── datasets ├── rosalind_cons.txt ├── rosalind_dna.txt ├── rosalind_eval.txt ├── rosalind_gc.txt ├── rosalind_grph.txt ├── rosalind_hamm.txt ├── rosalind_kmer.txt ├── rosalind_kmp.txt ├── rosalind_lcs.txt ├── rosalind_lexf.txt ├── rosalind_lexv.txt ├── rosalind_long.txt ├── rosalind_mrna.txt ├── rosalind_orf.txt ├── rosalind_perm.txt ├── rosalind_prob.txt ├── rosalind_prot.txt ├── rosalind_prtm.txt ├── rosalind_revc.txt ├── rosalind_revp.txt ├── rosalind_rna.txt ├── rosalind_sign.txt ├── rosalind_spec.txt ├── rosalind_splc.txt ├── rosalind_sseq.txt └── rosalind_subs.txt ├── e001-dna.py ├── e002-rna.py ├── e003-revc.py ├── e004-gc.py ├── e005-hamm.py ├── e006-perm.py ├── e007-prob.py ├── e008-prot.py ├── e009-subs.py ├── e010-cons.py ├── e011-eval.py ├── e012-grph.py ├── e013-kmp.py ├── e014-lcs.py ├── e015-lexf.py ├── e016-mrna.py ├── e017-orf.py ├── e018-prtm.py ├── e019-rear.py ├── e020-revp.py ├── e021-sign.py ├── e022-splc.py ├── e023-kmer.py ├── e024-lexv.py ├── e025-long.py ├── e027-spec.py └── e028-sseq.py /README.md: -------------------------------------------------------------------------------- 1 | Rosalind-Problems 2 | ================= 3 | 4 | Solutions to problems from http://rosalind.info/ -------------------------------------------------------------------------------- /datasets/rosalind_cons.txt: -------------------------------------------------------------------------------- 1 | TGTCCCATTTGTATTGGTTTTAGTCAATCCTTGCACGAGCAATTGAAACTGGGGCTCGAGGAGAACCACGGCTTGTTATAAGAATACGTCCCCTATATAAAGCCAAAGAGTGTCTGAACGACGAACGTAAGCTGTACTACAAACTCTTCATAAGCGGATGGATCATCCTGTCCCCCACCACGTGGCGTTCATCATGTACGGTTTAATTATAAACAATTGAGTGATATGAGCGTGCCTCATGTAATCTTTTCATTGTGGTAACAGCTTCCAAAGGCCCGCAATCTTTGATGTCTGATACCTGCATTAACCAATACGCGATCAGGCCCAACCAACATGCTTGTCCGGGAGAACATTGCTTACGTAGGTGGGTCCCTAGAAACTCTGGCAGCGTATTCAACATCGTAAGCCGGTTTACTGAGTCCTGAACGCGAATATTCTTGGCTAGCTCTTTCTTTGATAGGTTCTCCGGCAGTCTTCATACAACACCTGAGCGCACAGAGTTTGCTAACGCTCGCATATCTGCAGCGAGGGCATTCCTGACAAACGATACGGGGTATGCCCCCCGCGTACCACCCTACAGGAATGTCTTTCGTACGCGCTGCCACATTAACCCGGCTTCCATGTTACGGGAACGCGGTTCAGTTTGATAAGTAACGTTACTTCCGACAGAAGTCTCACGATCAAGAACTAATGGCGCGTTGCCAACGTGGCGTGATCTGCTAATGCCTGTTCTTTCAGATTTGCACTTCTCGGATATGTCGAATCTAGCATAGGAGAGACAAATGGCATTATCTGACAGTGGAATCCCCCTATTGAATCACTAGGCGCAAGAGTCTTCGTAATAGGGTCAATCCGCGAAGTAGCAAGGTGTTGACGTACGTGTTTCAAAGGACAGAGAAATGTCGGTCAGTTAGGCCACG 2 | TCAAAGCAACCTAACAGCGATGCGGTTAAACGGCTGGTGACGGCTTCTGGTGCCGTACTGACACATAGGCTGCTTCCGTATACGAACCGAAGCCGAGGGTGATGCGCGCGCGCAGCGATCAACGGACCCCTGCCATCCGTCGCCCTTCAACTCAAGACCCAGCCAGTGTCCGTTCGGGAGCAGTCCAGCATCGGCCTATTAGACGTTGTTCTGTCTTTGCCACTGTCTGATCCGTTGTATGTGCATGGGCGGTTCCTATCATTAGGCGGTACGCAGATCAAAGGTGATGTGATGCTATCTGTAAGTAAGCGACTAAACCGTGGTAGTACGAATGGTATGGTCGCCTCCATCGAATCGGGAACGAAGGCGATTACTACTTAGCCCCAACCTTTTGGCACCCAGCACCGAAAGGCAACGAGATTTTAGAGATTGCTATCGTATAGGAAATTATGCTTGCCGCGTCACCCTAGTCAGTCTGCACAAATAACGAATCCATATGGGTAGGAAATATGGTAGAGGGATAAGGTTTCAACTCGAGACTGGGAATCAGAGCCGGCCTACAATTAGGCCTCGCCATGTAAGCACAGCGCAACAGCGGGGTTCCGGTGCGCTCCACCCCGGGCCGGCGTTACTCACATGTCGAGGTAGAGTAGCAGAACCACCTTCTGGTAACCGGCCCTTGGATAACAGTTGAAGTCGACAGTGAGGTTACTTGAGCCCAACAGGTGCGGGCTTCGGGGACTTCCCTTGTCTTCGGGCCAGCGGGCGACCCCGCCTGCTCCTATGAATAGGCCAATCCTGATGGCTCAAGATTTCCCGTCAAAATTCCCGCCCATCTCACCTTCCTGGAAGGTAGGGGTCTGGTTCTCTGGGCCTTGAGCCGCGTGAACGATTATTGCGGAGGGGGAGCGTGCGCAGTA 3 | AGGTGTGCAGGGGATTCCATCTTGCAGTGGCCGACCTCCGAACGCGGAATTGGAGCTCGCAAATGCTTGTGGGAGGATTTCGTAAGTGGGAACTTCTGTGCGGGATCCACCTGGTAGGGGTATGACCCGTCTGCTATTATTTAACACGCTACTATATGCCTAGGGGGCCTCGATTTCAAGCATTTCAGGTCATTTTCATCTGTGATCTTCCATCCGAATACCTCCACGTCTCTAGATCGTGGCAATGTATGGGCCGTTCATACCAACCCTGTGCCAGTCCTAACAATACATTTACGAGTCTGGCACGTCCTTGTAATGGTCCTGCTTTCATGGCTGTCGCGGCCTGGTAATTGTCCGTCAAGCTATTAACGGTCGCACGAAAGGAGCGCATGTGCTCCATGATAGGGTCAGATAACCCACCTTTGACATTCTGTATCTGACTTTGACTCCAACTCAAGGCCGTTATTTCCGGGGTTGAAACAGTAACACGGATATCAGATGCTGAGCCAGCCCTGCCTACGATACAGCACAGCCAGCTCCCCAGTCCTGACGTAAGATTCCAGGGGATCGCACAGCCAATGCTCACCGCATCACTAGACGTGGCATTCGCTCATCAGGAGTATATCCGCACTTTGCTAACCCTCCGTGCTGCCCTCATACTTTCGTCATAATTCGATCTTCATCCGGTTTTATTTCCAGTGGACGAAAGATGCTGCTTGCGCCGTACCTTAAACATTGTGCCTCCAGCGAAGCAGGATGGTCTGCTGAGTCCACTCCACGGGCAGAGAAGCGTGACGTACTTGGAGTTCCAATTCAACGCTAATTTGTGGCGTGGAGTCCGCGCGTCCGGGGAACCGAACAATTTTCCCGATCAACGGAGATGCTCATAAATTTATAGTTTGATCTAAAGGTTGTCAAAA 4 | GGGAGCCTAAGGACACCCTATGCGTTAGTTGCAGTGGTTCGACGCTAGAAAGGCTTTTCCAGATTAAGACTCCATGAACTTCTCGAATCCGTGGGGCTCCCGTCAACTGAAGAGATACGCGCTCCTGCGCGCAGTACATTGACCCCATAGTGCTCGTGGTCCGCGGTCAATCGACGTCTGGCTGATTGGAACGCGTAGTGTCAGGGGTACGCATGCCGACTCAGCGCTGCAACCGTAGCGCTTCCAAGGTAACCGTAAGTTTGTGGCCAAACGCACGTTTTATTCTCGTAATGGTCGTGCGACTCGATTGAATCACTAACCTTATGGGCTGCAAATGTCCAATTAACTCGCTCGCGATGGAAAGACCGAGAACCCTTCTTCAATGGTGAGTTGCGCGCAGCATAAGAAAAAGATGTCAGTCCGGTCTGTGCGATTTAGGGGATCTTCCCTGGGCAGTGTGTGTCGCATAGGGCTCGGCGTAGGATATTGTTGATAGTAATTCGGAAACGGGCGCTCGCTCAATACACGCTGAGATGAAGACTACAGAAACCTTGCGCGCCACGACTAAGGCTGGGGGTCCCAAACAGCTCGATGGAGCTTAGTCTCTGCCCGACTGTATTGACACGTTGTGCGTTGATTTGTAGGGGAGACGGTTATAAGCGATGCTGTAGCGGTACCGCCACACAGAGTAGCCACTATGCGTTTACGGATTATATACGAGGCAAGATCATCACTGTTTGTGACCGATTGCGGCTCCTCAGCCAGCCAATTGTTCTTCTAGCAAGGAGCATGGTCTGGCGGAATGGCGGGCTCAAAGGTTGGCCAATCAACTCATGTGAGCGTAATGATGAAGTTTCCACCAACACACACATTCTAACGCAAAACGATGCACACTTAGGAGCACGGATACACTTGGCATG 5 | CCAATGATGGTCCCCCTCGCAGCGAGGAGGTAGATATCCGGCACCTTAGGGTAAGGCGGGCCAGAGAGTTTCAGCTCACTGATGTGAGGAGTGAGTCAGAGCAATATCGGCTCATCATTCAAGGTCCAACATGTGCATGTGTTAGTATCATTTCTTCTGGGGATCTCAATCACGTCGCACATCTCGACGTCACGTTTTTACGCATGCAGAAAGGTAGTCCATAAAACTCAGCAACTCATTTAATGTGAAGGCACCGTACTCTTGAACGGGGTATAGAGATTGGAGGACCGCCGGGTGCGTTAGAACTATATAGGCACAAGGATGCGGCTTTATATCCTAAAGCCGGCGCGGTGAAGCTTATACAGCACCTGCATTGCTAGCCACATGAGTCGATTGCAAACCAAGGTTACGCAGCCGCGCAAAGTCATGCCACGTGAGCGGTAAAGGCCCCGCACACCCACATCCGATTGGGGAGCGTCTGATATCTGTTCCCTACTCCTGCCCGTCTCGTAGTGTGGTCCACTAATACCCCATAATGACTAGGGATACTATGTCGTATATCACTTATCCTAACCCATCTGGATACGACCTCCGAAACGATCCGGTTACCACGATATTAGGAAAGAGCTGTAGTGTCCCGTAAGTCAGAATTCCTGCATGTTTGAGTTCCAGATTTGTCATCGGCAATGCTACTGGAGAACGGCCAAGATACATCGGTAGCAAACTGCAGTACCAGGAGCGGCACGACGAGGGTAATTATCTGCTGTTTGTCATGTCGGCTCTCGACGAGCCTCACTCCAGTCGCCAGACCAACGTGGATAGGCGCCACGGTGAAGCCATAGAAACCATACGGATTTGAACCCTAGGCAATATCTTCAAATTTATGGAACACTGCCTAAGATTCTGTTATTTGATCGGCA 6 | ACGTAAGTACGATAAGATTGCTAACATGGATTGTTAAATCTCTCTCGGACGGTATGTAATACACAGTCGTAGATCGTTCTACTCCCCATCGCCTCTGCCTCTGAATACCACACTCAATCGCGGACCCTATATTAAAAAAATGTCTACCAGCGTACGTCACATTCGGTCTCTCTGTAATGAAGGGGCTTGCTTGCAAACCGTCGGCGGCGCCGGGAAGATCTGAACATACCGACTAGTCATTACTACGGGCCCTGCGCGCATTTAATGCTGATTGATAAACTATGTTATGGGTGATGATCTCGCGGAGGGGGTTATCCGCCTAGCTCCTAAGTAATAGGCACGCCCGTTTTGAAGCTTCGGGCCGCTTTCAGTCTGGCCACCTTGAAGAGACAATTGAGTTGGAGGAGTATTATGAACCTCTACCACGCAACCTCCGTTGTGTTTTGGAGACCTGGCCGCACTGCGTTACGGGGGCAAAAGTATGCTGAGTCATGGAGTCTCCCACACAGGCTTCACCTTAACCCTAGCCAAGTGGAGCAATTTTCCGGCGGGCTAATTGTTCTGGTGGTGCTACCAGGACGTATACATTGACTTAGTTGATAACTTAATCGCTTTCTCCTTCCGTGTATGGCGCCCAGTGTGCGTGCCGGAGTCACAGCCAACTCAAACAAGCTTTTCCTGGTCATGCGTCAACGCGTCTATTACACGCTCCATTTTCTGGGTGCCACCAGTATCGCGTGGTTTAAACCCATGTATGGTTGCTGCCGAGCTGCATTAGACCTTCGATGCCGCTAATGTGAGCTCGGTCAGTTAACCTACCCAAGTCTCACTCGGAAGTATATTACTGACCAGGTCAGCCGGAAGAAGAACTAGCTGTGCCAAAGGAGGCTCGCTTGGGCTGGAACCATAGCCCCGTATCT 7 | CAATCATGGATCGCTTCGCTGGGTGTGCGCAGCGAGTCTTCGACTGAGAGTGGTGGTTCCTTCAACGTTTTTTACTTTCCGCATCTATCTGGGTTCTCTTTAGTTGGCGTTCGAGGGTAATGTCGATTAGACGTCCAGCTCTTCCATAATGTTGGCGCACTCCGTGTGCTTATCATCAAAGTCCAAGTTTTCATGTATAAAAAAGATGTGCCGCGTGTCTCGCACGTATCTCCACTTTTAACTTTTGGCTTAAACGTTGGGATACTCGCTCAGTCCCTCCGACTCCTGTCAATTGAAGGCCTAAGGGGTCCATAATCTGCTTGTGTCCCCTTAGGGTAAAATTGATCTTAGGCCTGAATGCTGAGTCATCAAAACACGTTCGGACTACATGGATCCAAGAAATAAGGCGCATCTCTACGACTATTGACTCCACTCACGCCCCGCTCAAAGCAGCAACACTTTCTGCACGTATCTTCTGCTGAAGTATGTACCACCGCGAGAAAAGGGAGACGGTCTTAACTGTTCCACCTAGACATTACAAAGAATCTAGGCCCCGTCACTTACATAGAACTACCTACGTCGAGGGGCGGTAATGCCATCACTGGTGAAGCATAGGCATGTACTCTAGCCCGCTTTATTTGCCAATGAGGGTAGACACAAGGGATGGCGTGTTCGTCCCATGAACTTTGTTCTTTCCACTTAAAAATTAAGATACAGGGCGCCGTGTTAGTCCTTATGCGCTATCCCTTGTCACCCACTTGTGGTGCCCACAATCTGCGGGGGTGACCGTGATGCTCTTGCCTTAAGCTTCACCAATGGAATTAGCGGAATGGGAACCTAGTGTGTTATCTCACGAGTAAGACTTTTTGCAACAGATAATACTTCGAAGAATTTACAAACCCGATGGGCCTGCTGCCGTC 8 | ACATTGACTCACTTTTCGCGGAGAGACATTGGTTCCGTAATCTGTATATGTGGGAATAGCAACATTCTAACTTTGTGGCCATTACGCTGCCCTAGCCCTAGCGGCGTATGCTCAGACTCGTGGGTTAAGAACTTTTGTGCGCAAGGATATGCACGGACGTGCATTTCGCACCCCCCAACATGTATAGGTTATCGCAATTTCCCAGGCAGCTGGAATGCAGACTGCTCCACGTATGGGCACATGGCCGCGCTACCGAGCCTTTGACATTTATTTGCGCCTTCCCCCGTCCCTACGGACGATCAGTAAGACCGTAGCGAATCTCCCTGTACGCCACTATCAAAGGCCTAAGGAGGCATCACCTCCACAAGCCACGCAAGGTTAACGATAGTTTTAACTAACTATTTGACTTGTAGTTCCAGTCTGTCCTGGGAGGCCTAGTAGCTGTCTCACAAGGTTGCACATAGTTGTGTCCTTGGGGAGACCTGAGCTGGTATTACAGTTCGGCTGGGTCCACAGGACTATTGCGTTGTGGATTACGGCCGTTACGCCAAATAACGCCATACCGGCCATCATGTAAGCTATCGCCGTTCCTAGAGTCACCTTATTCTACCGGAGTTATCCGATCTAACCACGGACGAACCGGGCACTCTCTTGAACTTGTGAGAAATCGATAGTACAAGCGGCTCATTCTATCCTCGAGTGTTCAGCTATACTGTCTGCGATGGATGATTGCGCTTTTATCCATAAGCTTCGTAGATCAGGTAAGATTAGCCGCCCTGGCTTCGACCGTAAGAGAAATGCTTTAAGTGTGTTGCGAGTCGAGATCCACAGCAATCGGGTATACATGTCGAGGACATTGTACTTACCTTATACCCAGGTCACGCCCCCTGAAGATACATGCGCTTGGACACAATCGTAAT 9 | TACCCCGTCTTTCACAGGGCTTCCAACCTCAACAGGCGCGGCTCTTGGTAAATGGGATAATGTGCGATGGGCGTAGCGTCGAATAGCCGGCTAGTAAAGGGCAAAAGCATCTGGGCGAGCTTCGGTCGCTGCGAGGTCTCTGCAGCGCGTATTTGTATAAGCCTCCCACCGGCCCAGAATAGCCATATGGATGTGGCCGGGAAATGTAGTGTAAGCAGTGATATGCCTCCAATCCCTGCCCAAGGACTATCCTGTACCATTTTATCAAGCCAAACTATCCAGTAGGACAAGGCCCCATACGTAGCGAATACGTATCGAAGAGTTATCGCAAGCCCCATGTAAGATGACCATAGCAATGTGTTTCTTACCTGCCTTTGATTACAGGTACTCCCGATAAGAGTGAGCGAAACGGGATTGGCTAGACGTCAGCCTGGTACCGCGCCGCAAGCCTCAAGCTCGATGAGGTTAGAAGACGTACCTGTAGTCGATCGAACTCACTGTGGCCTGCCGAGTGACAATCAGCAAGAATCGCGAGATGGACGTTAGCCGTCACTAAAAGCGCTTGAAGCAGACGGGTCTCTCGTTCACACCATCGCCGATAACAATCAACCTGAGGAGTGCAGCATCGAGTATCTCCTGCGGTCGTGTGCCACTAGTTATAGAAGATTTACGGGTTTTTAGATATCCAGGCTATATGGCTTTGTAGGTTAGGTTGTATAGGGTAGATAATGAACTCGCGTGACGATAAAATTCACCCGCTTTACCGTGGCAATTTAGGTTCGTATTATTCGTTCCAATGTAGTCCCGCGCGCGCCCGTCAGTCGAAAAGCTTGCACGTTAGTACCACAGAATCCCCAACGTGAGAGCGTATTTAATCTATGAGCCGCTCAAGGACTCTTGGGTTTCGTGTGACGATGTAT 10 | TTTCTCGGTCACCTCGTCTATGTCGATATGGACTACCTTGAATATTCGGATTGAGACATCAGCCCGGTACATGGCCCAGTTGGGGTGCCAAGTCTAACTCGAGTTTTGTCGACAGCCAGACATTAAATAGAGCCCGTGGTCTGCCGCAATCCTTCCTTGGACAGCGCAGGCCTCCTTCACTATCGTTGGTCAGAAAATACCAAAGTAGCCCGCTACCATAGAACCCCCAGCCGGGCCAACCGTCCTTAGTCGAGTAGCATGGGAGCATATCGTTCGGGACGCTAGACCGAGTACAGCACCCCTCCTGACCGGCTCCTCGAACCTTCGGTGACCCTCGAATGGTTTTTCGCATCCTTGATGTTCTACATCCGCCGATTTCTAACGTGCTACAATGGTCCAAAGCTGCACCTCCGCCCCTAGCGGTAACGGGTGTGTAAGAGGTAGAAACGAATAGCATTGGTCCCTCCACATCAGACTATCTCAAGACAAGTGTGCAGATGGCAACCAAGCAGTTATTTAACGCAAGCCCGGCCGTGAACGAAATCCCCCTGCGGCATTTTTAACTGTCAGTCTTTATAGGTGCTATGACGAGGTATGTTGATGTGCGGAATCCCCAAGGAGCCAGGGTGGAGCTCAATTACACAAAGGAGAGTTCAGCTATGACGGATGCCTATATATAGACGCAATTCGATAGAAGTTCTAAACGCGCTGTAGGGTGTTCATGTGACACGTAACTTCGACAACATTGACGGCTACACATCCCTCTTTGTTGCTAATACGCCCGTTAATATACCCACGCCAGTATTCGAACTCAGCTGTGGGGAGTTTTGTCCATTAAAATCCTTCCGATCTTTGCACGGTCGTACCCCAGAGCTTGACATTTTCATATGACGGGGAGCATCACGCGTAGGAGGATCCAG -------------------------------------------------------------------------------- /datasets/rosalind_dna.txt: -------------------------------------------------------------------------------- 1 | TAGCGTAGGATGGAGCTTAGTTCGCAAGCCTAATTATCCTCGCCCGCTGACGTGATGAAGATAACTGCAACGCACAGCAGGATATAAACTAGCAACGCAAAATGGTGGGGCCATGCACTGTCTATCCCAGCTATATCTAATATGTTGGCCGTTTGTGGAAATGCATGATCTGGGTAATTTCTAGGAGAACTCTTAGTCCTCAAGACTATAAGGCGGCGAAATAATAGTAACAGTCTTCGTACCAATTGAGAATCAAGCTCCTCGACGTCGAAGATGGGGGTTTACACCCCTTGACCAGCGTCCCCGGCCGTTAATCTATCTATAGGTTCACGTGGGGCGAACAGCGCCGAGTGAGCTCTACCCAATGATCGGGTGCGGCTTTGCGACTCGTATTGGGCGATGCGCCGCACCTGGCCCTGGGGACATACGCATTGTTTCGAATAAGAGCATACGCTAGTACCCCATACGAATGTGTCCGTAAAGACTAGTCCTTCCTGCGCTAAGGACGGGATTTGTTGAAACCTACGCTGATTGGCGACCGAGTAATCTGGAGATTATGTTATGATTGTAAAGGGAACACATAAGCCCTTCGTTCTTTTGAGTACCTTAGCGAAAAGGTATCAGTCTACGCCCAACGCTATCTCATGGGGTATCCCGAATCCAATGCGCAGCCACCTATCGTACAAGGAGCACCCAAGCCGATATTCGTGGATGGATCCTCCTTGGTGTGTAACTCTGAATCATGGAATCCGTCTAAAGCCTGTACTGGGTTAATCACCCCCGGTAACTTGAGTTTCCTGTCCCTTGAACGTATCTAGAGTTAA -------------------------------------------------------------------------------- /datasets/rosalind_eval.txt: -------------------------------------------------------------------------------- 1 | 10 8783 2 | 0.000 0.054 0.123 0.179 0.211 0.278 0.345 0.377 0.405 0.453 0.512 0.569 0.601 0.670 0.704 0.784 0.814 0.859 0.935 1.000 -------------------------------------------------------------------------------- /datasets/rosalind_gc.txt: -------------------------------------------------------------------------------- 1 | >Rosalind_6783 2 | TAAAGTGTCACCGCGGTTCGATGATTGGCGGACCAATGGCTGCTTGCGCCTCGCGTAGCG 3 | GCGGACTGCTACCTGACTCGCTTGTCCGACTTTAAGTGCTAATGGCAGTGGAGGCCAATT 4 | CAGGTTACCTACGTATCGGATCGGCGAAAGCCCCAACTTCACAGTACGAGTTCGGCCAGT 5 | CTAGTCCATGCTATTTAACTGTTGGTTGAGTGCACATGCGAGCGAACTTAATATCTGCAA 6 | GATTGCACTCCTAAGGCGACTTGATTCGGCCATATAGCTTGCGGAGCGCGTGTGTTTAAC 7 | ATGGCCACGGTGGGGTTCGGAGTCGGCGCACAGCCGAAATAGCATCATTGACAAGCCCGC 8 | ATAGTAGTTCACTTTAAGATATCAAACTCGATTGAGTATTTCTTCCTACGGAGGGGGGGT 9 | GACGTCCGTGTGCATACGAGTCGTCTGATCCATTAACCCATCATCAAGAAGTTGCCATGC 10 | GTTACGGTGATGACTAATGGAGCCGCGACGGAGTGTATATGTGCGTAGATGCCCCGGCGC 11 | TTTGGTTAAAGTGGTAAGGCAACGACGTTCCTATCGCTACCGCGCCAGAGGATTTGGTGG 12 | TGTTCTTTGGGGTGCCCCTGATCCCGAGGACGCCAATCAGCGTTCATCATTGGAGGCTAA 13 | GGAATGTAAAAAACATGGGGGCAAGGTGGATGGGGGCCACGTGCTGTTTTCTCAAGTCAG 14 | CCCCATGCTACTAAAGTCCGATCCAATAGAATCGTGACTCCACTAGATAAATGACCGGTG 15 | TTAATAGTAAAAAAAGAATTTAGGCCGGGAAGCAGTGGCCTTGAGCAGGCAGATCCTCTG 16 | AACCCTCCTTTTAGGTGTAG 17 | >Rosalind_4665 18 | GAGGGCGGGGGCGTTTACGACTGTGACAGCGGGTCATGGTCCCTAGTAGAATCCTGGTAC 19 | CTCGACGAAGGAACGTAAGTCCGTTCCGCGTCTCCCCGAACCTGATGCTGTTGTGGACGG 20 | GACCCTATCGCTGCCCCTAATAGAGAGAGAGACTGACAGCCTACACCTCTGTTGGCGGGA 21 | CTATGTCAAAGCGCGCGCACCCGCTGTGGTAAGTGAGGTGCGTGCTTTTTATTTTGCGGT 22 | CGTTTTCTTGGAAGTCACCTTCTTAAGGGTTAACACCAATTGTGGCACTACTGTCATCCG 23 | GGCTAAAATGTAACCCACACTGGTGAGCCCTGCAGCTTACTGTAGGGTACTAGAATCCGT 24 | CGCGCAAGCCAGCACCGCTGTCGACCTTCGACCTTCACTTCCATACTGAGATAATATTCT 25 | GCATGCGATAAGGAGCTCCTCTCTTTTCTAGGGGAGCAGTCCGAATACTGGAATAGTACG 26 | TCGCCGTCCTTTACAAAGCTGCGATGGCCGTGTTTTTTGCTACTGTCTCCTCTCCCGCAA 27 | GGTCTTCTCCTCGGACGTTGAATTATACCAGAGAAGAACAAAGTGGGGTCGGTAACCCAA 28 | ACATTGAATATGCGCAGGAAAACTGCGCTGTGATGATAAACTGTCGTCTCACGATATAAG 29 | CCACAAGGCACCGTGTACTCTATAAAACATCATCGGGAGGTAATCCCCTCATGATCCCAC 30 | CATCCTGGACAAATGAGTCGGCATACGAACGAACTAGGTCAAGAGCCCTTGGTTAGCGAG 31 | GGGATGGTGTGATTAGTTCGCACCAAGTAGCAACGTATGTGGCTCTTAGATTTCGGTATG 32 | CTCGCTCTGTATTACTACCTCACCGTAGATGCCCTGAAGGA 33 | >Rosalind_6275 34 | CAGTGAACGAGAATGCCCAGTTCCTCTTAAAATAAACGCTTGTTTGCCCGGGAATAACGC 35 | TGGGCAAGAACAAGGCCCCCCCAACAAGGTTGTGTGAGCAGAGATATCCCGTGCGCCCGC 36 | CCGGTCCGAATCACCCGTACTGAAGGTTAATAATTGACGAGTGTCTAATAGTCCTCATCT 37 | GGCACATCGCAATACTCCACGACGAACTCACATTACATACGTTGTGGAGTTCTTGATTTC 38 | CAGGTTTGTGCTCGTCTCCCTAGATTCAGTCTACTGAGCAAAGGATCGTGTCTAACGAGG 39 | TTGAGACCACCACCTGTTAGAAAGCCTTGCAGAACCCTAGGTCTGTGTTCACTGGTGGAA 40 | TCACAGGCATTCCTGGTTGCGTTAGAGTTCCCAGGACTTACTTTATCTAGCGCCTCTGGT 41 | TTTTGTTGTTTCCGGCTGCTTAGAAGGTGGCCATGCTGCGAACTGTTACTAGTCTTCTCT 42 | GGGATCACCCAAGTTCTAGGGTAGCCGCTGACTGCAATGGTGATCTTACACCTAGCCCGT 43 | GTTGATAAGTTATTTGGCGCGGGCCCCTTTATCATCCGGGCCATGAAATGGTGCCATAAC 44 | CATAATCGTGGACGGTTGTGTTTGTTTGCCGTCGGTAACAGAATTCGTGATCAAAACCAC 45 | AAAGATAGTCCGCGTCTATAAGCTATTAAGGACGTTCTATAGCCGAGACTGTTCTCGACA 46 | GGCGACGGTCTCGACGGGAGCAAGGGACGCGATATCGGCTCAGGACATAGATTTAGAGGC 47 | TCCCGGTTCATGGCTAAGCGATCCTGGCGGTCCCTTTATTTGGCCGACTGTTCATAGTGG 48 | CCTAGAGAGACCAACTCTCCAACTAAACCCAATTCCAGTGGCTTCCAAAAGTTCCTAACG 49 | TATCTGAACGACATACGAAATTTCT 50 | >Rosalind_2564 51 | CGCACCGCGGGGAATCCGATGTGTACGCTGAATTTCGATCCTAATACTGAGAAAAGGCGG 52 | GTCACATTCTAAAAGTCGAGCCTGGTTAGATCCTTAAGTCATACCGTGGAGTCATCGGCT 53 | AAATTGGACAGTCTACACGGCCCCAGCAAAGCTGGACGGGGTGGTCCTATCCGGTTCATG 54 | TACATAAAACTCCTTTACAGTTGGCCAGAACCAGGGCGCGCGTCATGACGAAACACTACC 55 | GTACACCGACGATACTTAGTTCTAGACCAAATGCCCAAGCAGGGTCACGCCGTCATGGAT 56 | TTCACGGATTTGACAGGCCATGTACTCAGCGCGATAAAATCGAGAGCCTAAGTTTGTTAC 57 | GCAATTTGGCACTTCCTGTTGAGTCGCGGATGCCGATGTACAATCATAGCTTAGTACCTA 58 | TCTTCTGCCGCAGTAGTAATACTGTTCCAATCGAGTTTAGGCCTTCGTACGCAACGCACA 59 | AGCTACGGCACATTGATCGCTCCCCATTAAAAAAGATGGATGCTTGTCAAGACGTTTGCG 60 | CGGGACGGAAGAGCGTACCTAAAGCCAAGGAGGCCGTCATTGCTCCGTCATGTGGGATCA 61 | TCACTTGAGTTGAACGTGGATTGACGGACGTTGCCTGCGGGGGGGGGCCCCCGTATTACC 62 | CTCCAGCGTTGAAATGGATGATACGCTAGTAGCATCGGATGGCAGTGAATGGTCTGGGTG 63 | ACGAGGCAGCCTGTCATGGCGCACCACTCAACAAGTCGGACGGGACGCCATACACCTCCG 64 | AACCATGATACGAGCGGGGCGACCGATGAAAGACAGGCACTCGAGCCAGTGCAAAAAATT 65 | TCGCACGACTTATTTCCCAGTCTAAGATAGTTTTTCTCCTAGTACCTATAAGTTTGCCT 66 | >Rosalind_8047 67 | ATGGGTTTCTAGCGAGGCTTGGATTGGGAATGTATCGCCTTGAGAATTCCTTGCTTATTA 68 | CTCGTTGGTGCTCCTAGGAACACCTAAAAACATATACCCTAGACCGTCGGGCTTCTTACA 69 | ATCCTCGCCAGCACATCTGCATTATGACGTGTATTACTCGTATCTGTATTAGTACTTGGG 70 | CTTGAGGTCCATACAGATTAAGCGGATAGTTGAAAGGTCAGGGGTAAAGTTGTGTCTAGC 71 | GCCCGGATGCCTCCACTCTTTAACCACCTTAGGACCCAACGGTTAACCTTCGCGCCGGTT 72 | TCAAAAATGCACCTCCTCGGAGGTCCTTACTATATCTCGTTACAAATAAAGGATCTGGCA 73 | AACACTACGACCAAAGGCTGATATACAAATGTCTGGCGTTCCGTTGATCTATCTTCCTCC 74 | CTTGTGGCTTTACCCTCCCCCGTTGAAAACGAATCCTCGTAAAGATGGGGGTAGACCCAC 75 | CTTCTGCACATCCAAGATCTGTACGATATAGCGCAGGCCCTCAACTACGGCGCCCTATGA 76 | AGGCATCAGAACACCGTTTCGCTTGGAGGTGTGTGCTCGATCACAATCAAAAAAAGGATA 77 | AATAACACGATTGTGGGCACTTGCGAGTCTAAGCTCAATCACGGGTTCTCAGCGGCCTTA 78 | CTTTAACCTGCCTGTATCTGACAGGGATAACCAGGAGTAGTTGGCAGCGTCTAGCGCATT 79 | TATAACATTGAATCTTCGGTTCTGTCTGTTTATCTCCTTATGGCTAGCTAGATCAGGAGG 80 | GACAAAAAGCGGCTGAATTTTAATAACCTCTTGCCCTGACCACCATTTTACATTACGTTG 81 | GATGAATGACAGCAACTGCCGAGTAGTGGTGACAGAGGTCATTATTCACTTATCCGCGTC 82 | CGCTGGTAGCGATTAGGGGCA 83 | >Rosalind_9587 84 | GAGTCCTCGGGACAGGTCAGATATACATATCGAATAACGGAACGAACCGCTATGAAAGAT 85 | ATTCACACGAACCGCTTCACCCTACTCAAGGCTCAGATTCGAAGGCTTCTTTCTCAAAAT 86 | TGTTGGCGCTCTCGCCATTCTCGGCAGTTTGGACCTGTGTCTGGCTCGCGGCATAGCTAA 87 | CATCGCCGTGGTTTCTAAGCCCTTAGGTGACTGTATAACCGCTCGAATTGCGAATATAAG 88 | AGACCCCCGCATATGGTTAAACTTCAGGGATCGCCGCATTATAGCTCTCCCGCGAAGTAG 89 | ACGATGTCCCGCGCTGTCGGTCAATTTCTACAGGCCTGAATGTAGACCATCCCTACTGCT 90 | CGGGTCCGGGTTGCAGAGTTAGCGGAGGTTACTCGTACCATAACCCCTTTCCGGCGTGGT 91 | CAAAAGAGTTCCTTGTGCGCACTCTAAAGCTCAGGCTTAAACCAAATGCCGTACTAGCTG 92 | GTGAGTGCCTTCGCTACAGGATAAATTCGATCGGGGGGGGGGATGCTCGGGTCTTGCGTT 93 | GCTCAGCCTATTCAACCGATGTGTACATAGCACGGCAGAATCCTTCAATACCAAAGAAAG 94 | GTTGCCATCCCTCATGGCAACGCGTCGGTGACTGCTAGCATTTGAGATATAGGGGCTCGG 95 | ATCCGTATTTACTTATGTCCATGCGCACCTAGGACATCCGTCAGGCAGACAATGCAAGGG 96 | TATAGCCAAAAGAGTTCTGATTACCTGGCTGCCCAACCGATTCAATCTGTATTGAGGCGG 97 | ATTATCTTTGCGCTCTTAACTCTAGGCGGTGTGCGCATTCGATCGCCAACAAACAGAGGC 98 | AAGGGAGAGACGAGGCTAACCTTTCACGTAACAGATTATGTGGCAAATCAGAATCCAGCG 99 | CGACCTGCCCAGGGGGACAACCATGTGTAAACGCCTCACTCGATGGAAGGCAGAGGCAAA 100 | GTCGCTAGTATCA 101 | >Rosalind_1286 102 | AATCGCTCCCAGGACCGTCCCAGCTTTGAGCGGCACAGTTCGACGACGCTGTAAAACTAG 103 | GATGCAATGTAGAACCGGACCTTGTGTCAGACGGTCTTTGCAATCTCAGTGTGATCAAGC 104 | AAGCTTCGGACTGCCGGGTCGCCTGACTCCTACTCGCTGTAACGCAGACATTTACATAGC 105 | GTTCGCTGGACCCCACCAACATGACTATTGAATACTAGTATGCATGAGCATAACGTATCG 106 | CAGGACCCACATCTTAAGTAGTAAGGGAGCGACACACCTACGGACTCTTATGGTCCAACC 107 | GCTGCGGAGGGGTCTCTTACGTGCTTCGTCGGCTCCATGCAGCTCTTGCTTCTACAACTT 108 | TGGAGACGGGCGCCAGAGAATATGACTTATGTGCTTTGCCAACGCGGAATGTGGTACGAC 109 | GACCCCCCACCCCACATGACGTGAGGCTAAATACAAACCTGTTTTTATATACATAAGCAC 110 | ACTCGTGATCCTGGTCTGTTTTTCCGAATGCGTTGCCCGCGCGATAGCACAGGCGGTAAG 111 | GATCAGGAAGAGACATCTATCGAGATAATATAACCGGTAGATACTGACGGTACATCCTCT 112 | GGGCGCACGATTCTCGCACGAACGCGCCAAAGGCGATGACGTAGCTAATAAGCATAGGGA 113 | TGCGGGAAATGGCGCGGGGCTAGAAAGAATTAATGGACGATATGGCGAGTCAAGTCAGTT 114 | GATTCCTTGCTGTGATCGAGCGCCATGCGTACACAAACGCTTAAATGAGTGCGCAAGGGG 115 | CTGGGGGACCCGCGAGTGCCGTTAACTACAAACAGCATTTTCTCGCGCGCGTTAAGACGA 116 | TTCCTGTTCTACATTGAACGATGTGATTACAA 117 | >Rosalind_9708 118 | TACGCTACTGCTTAGCAGTAGCAGAAGCTACCAGCTAATAGTCGCTATCCAAAGGTGCGC 119 | TCAGGGTTACATCGTGCTTTCATCGAGCCAGTCCACGACGTATGGGGTAATTGGAAATAG 120 | TGCTTCGCATTACTACGTGACTGCAAACAAATGATAGTGAACCTCCCAGGTACATTCTGC 121 | TCCCTTGACTTACATTGGCTACGGGGCGCGGATTGGGATGTTCATTGGGGCCCCTGCGCA 122 | TTCCCCTATACGATATCCTCCCACGCCGGGGCTGACGGCCCTGTGCCAGGTCTATGAACC 123 | GGCTAATACTCTATAATTAATAAAGGATCGTTCAACTGTCGTTTGACGTCTAGAACATTG 124 | TGCATGCGTACCAGGTTTTTGAACTCTGCCCAGATGGACGAGGCGAATCCAGAACACGCT 125 | GTGAGGCGATTTTATTAAATAAATCAAACTATACGTGTACAATCCCAAAAGTTAGCATCG 126 | CCTGTTACCAGTTACACTGACCTTATTTACGGATGCACGTGGAGGTCTTAACAGACGGAA 127 | CGACATCTGGTCGACCCACTGACGATGCTAGTCGAGTGAAACCGATGCCGGTTCTAGTTC 128 | CACTCTCTCAAGACCAGTGGCGTTGAGGCACTACGTAGCACCAACACTAACAGACAGTTT 129 | CAAGGAGCTGATGGAGGTGACCCTATCCGCTGCTAGTACAATTGACAAGGCCGTGGGACC 130 | ATGCGTTGTAGAGTTGATCAGACTCTGTCATCATACTTCATGTGACTCCGAGGGGAGTCC 131 | ACTGCAGCTTTTAGCAGCTGGGT 132 | >Rosalind_1638 133 | GAGTAGCGATCGGACGATCCCATTTGACTTCCTGATGGAGAAGGGCGGATCGTTAATCTT 134 | CCCCGTCGCATGCCCCAAGGCGGTTATTTTTGAGCCTGCGGTGCATTCTGCTCCACAACA 135 | GAAACTCTCGGTTAGGCAGCCCGACCAAGCCTGTGACTGATTACTGATGCCACCTGTGTA 136 | TAGCTAATACTGTAACTTCAATCTTTAACCCGTTCGTCTACTAAAAAGCCGTGTAAAGTC 137 | TGCCATTACACATATCGTAATATAGGGTGATTAATGATTAACCGAAGCGTCTGGAGCATA 138 | ATCGTACTGTTACAGCGAAAAAATGACTCAAGTTGATCTTCTACCTACTTGGAGTAAAGT 139 | ACTGACGTCGGTCAGGGACTCCGACTAACTTGAGAAATCAAATGTGAAAGACAGCCCCCA 140 | TGTCGTCATGCAATCATTTATAGGCAGCGTCGTCTGCTTACTTAAAGTTGGAGACCTGGC 141 | CAGTTTTGGAAGCCACTGCTTGCACCGGCAGCGTGCATCGCGGTTGAGTCATGAAAGTTG 142 | GGTAACCGTCACACTCACGTCGGTTTGCGTAGTTTTGTAGTCAATTTCGCAGGCTGATCC 143 | GAACGATTTAGTCCTGCCAGGCTCTCCCTGGTGTGATTTAGTTCGCCGCCTGGCGACGGC 144 | ATACCATCGCATCAATTGCATGCTGATGAACCACCGTGAATCTGTTACCGCCGTAGTTCT 145 | TGATATATCCCGCGTACTTTAAGGCTTCAGTAAAAACCGTCGCTTCCCCTTTGGTCGGTA 146 | TGTCAATACCGTATTCCTTGTTCAATCGTGTATCCAAATCTTGGATCTATCCATAAGAGT 147 | TGATGACCCGTAGGTGTCATAGGCGTTACGTCT 148 | -------------------------------------------------------------------------------- /datasets/rosalind_grph.txt: -------------------------------------------------------------------------------- 1 | >Rosalind_8260 2 | CTTGGAGTAGGCACACCTATCTTGGGCGATTCTTATGCGTATGGTGTGACCTCGTCCCAA 3 | TGGAGGGTCCGCGCCTTGACATCC 4 | >Rosalind_3895 5 | ACACTCCAACGACAGTCACCCAGGGGCCATGGCCGTAAATACCGGTAATTGAAAAAACTA 6 | GGTAGAGGTGTTAGTCCACGG 7 | >Rosalind_4630 8 | GTCGCACGGACACCACCATCCACGTGCACATCGATAATGCGCTGTAACCTGGAGGCCGGT 9 | CAGTCTGGAAGGCGCTAGACCTAGAT 10 | >Rosalind_0142 11 | CAGAAGAGTGATAAGGGCATCCTGGTGTACCCCAAGTTTCAACTCCCGAATGGATCACAT 12 | TGATCCGTCTTTGGGGCCGTGGATAAAATGTTG 13 | >Rosalind_4822 14 | GTGAGTTTGGGGCCAGATGGATGGAACCACCCCATTCAAGACTGGTGCCTATAACCGAGA 15 | AAAGCTTCTTTCGCGAAATTCCCGTTAACACTATTG 16 | >Rosalind_0952 17 | ACTGTACGTACCCCCGCTGTAGTGCAGAATATCGTGTATGACATTCAGAGGAATCGTCGA 18 | CGAGGCATATCCTAGTAGTTAGCCTTAGAACAAT 19 | >Rosalind_1836 20 | GCACAGGAACTCAGGGATCCTCGAACTGCACAAACACTTGTCCCACAGCTTAACTGGTCT 21 | CCCATTGCTCCGCGAGTACAT 22 | >Rosalind_9442 23 | CTCATGTTGCCAGACTCCACTGGAGGACAGCAGCAGACGTTGCGCAATCGGATTACAGCA 24 | ATGTCCCGGTTTGGGCCTTTCGATAGC 25 | >Rosalind_2034 26 | TGTCCTTAGGCCGTACTATTCTTGTGTAGTCAGTACGTCAAGTTCAAAAGCAGTACGGTA 27 | ACCGGGAGGCACGTCACTAACCTTA 28 | >Rosalind_5803 29 | ATGCCCTATAAGGTAGAAGGCAAACGGAGCACGTTTGAAGTGTGCATGCATTAAGGTAAA 30 | TCAAGTAGAACTGTGCACGCTAAGTGTGTCCGATTGAG 31 | >Rosalind_2537 32 | CCATACCATCTACAAGGCCGACACATTTAATAAGACTCAAGTAGCTCCTAAACATGAAAC 33 | ACCATTCGGGTCTCACCCTGAGTATTTCGGTCATCG 34 | >Rosalind_7771 35 | GATTTGTATCCACGCCTCTTCGTACCCTCCGTGCCGACAATACCATACACTAATTTGGAT 36 | GCTGAGTCTCCAGGCTGCGGGCTGTTCGAAATCTAAGTAG 37 | >Rosalind_7832 38 | GCACACAGGTTATTTTAGAGCTTCTCGTGTTGGTGGACTGGAGTTAACCTTCTCGGCAGG 39 | CGTCGCGTTATTTAGAAAAGGCTGCCTATAAAATACTG 40 | >Rosalind_9942 41 | GAGTACAACAACCAGGTGAAATGATGGCAAGGGCAACGCCTGAATCCTACGACTTTTCTG 42 | CGCATTTAGTGGAGACGTTAGTGTACTTGATGCAATCCGT 43 | >Rosalind_4427 44 | GGGGCGTAAGGAAGCCGTTCCGGTGGAGGGAGCGATGGTGTCGATACCACCACTTTATCT 45 | ACAGCACTCGTGGGGGAGCG 46 | >Rosalind_7260 47 | ATGGCCTGTCCTGTCCTGGCAGGATTCGTTACGCCTACTTAGCCGCGTTATTGGTGATGA 48 | CTTCCCCCAGCAGAATGTTCCCGTGG 49 | >Rosalind_8448 50 | AGCTAGGTCTGAAAGCGGTGCTGTTGCCACATGCCTCGGCGGTCTCTAATCGCGTATTAT 51 | GGATGACCACTGTAGCACTGGCGGCAGTTCACGCC 52 | >Rosalind_7936 53 | TCAAGCTGTCTCGGCTTCCAGACGTAGTCTTAACTGCAGGCTGACTATATTACCCGTTTT 54 | CTAGGGTTCGAGCATCCGACCGGATTCA 55 | >Rosalind_5961 56 | GCTCCGGGTTTACTATATGTTTAACTTATACAGATGCTACCCGCTCCAGCAGTTACCTCT 57 | TTGGAAAAAGTTTCTTTAAGTTCGCACAGAACAC 58 | >Rosalind_8804 59 | CCTGGTCCGATCTCTCATGCTGCGCTCGCCCGCTTCTTACGGATCATTTTAGTATCTCGA 60 | AGACGCCAAGGACGGTGAACGTT 61 | >Rosalind_2097 62 | GCGAACCCATCGAATCTTACACCTTTATGTGCACCACCCATAGTCAACGATTAGCTATCG 63 | ATTGCATAGCGGACTGACCAACATGGTCTTCGCGAAATT 64 | >Rosalind_2275 65 | GTTCTGCAGAATGTAAGACTTACCTGTCCGTTGAAAGACACAGCCATTAGTAGTGAATGG 66 | GCTGTGGTGATCACGTGGTCTC 67 | >Rosalind_4576 68 | CTGTTAACACCGTAAGGGTGGCTGTAGGACGCAGTTCTGATGGGTAATGCTCCAGATGTA 69 | TTGCGCAGGGGGAATCCCTTCAGCAAAG 70 | >Rosalind_0466 71 | CTATGTAAACTCCCGGGTTAAGGTTAGATTAGCATGCTGCTCAAGGACGCCTTTTTCAGC 72 | CAATCGATCAGTGAGCGGCGCAGATCGTAAGCTT 73 | >Rosalind_2307 74 | ATGGGCACGGGTCAGGTTTCGACGCTGCCCCAATTCGTTTCTCGAGATGCGTGCCTTAGG 75 | TGCCGAGACGATGGCCCGGATACCCTCTAGGGCCATACT 76 | >Rosalind_5982 77 | CCCCTCAACAGCGTAATGCTTTTCGCTTATGCGCCATAAGGAGCGGCTTGCCGTAACGCG 78 | GCACTCAAATATCGACCCTTGTTTAGAGGGAACG 79 | >Rosalind_2057 80 | TTGAGTCGCCCATCCATAAAAAAAGCAATCGACGAGCAACGTACAACTTAATGACACTGC 81 | ATCGTTATTGGTGTTCTCCCTAGCTCTCTCG 82 | >Rosalind_4776 83 | ACGAATTTGCGGTTGACGGCAATATCGCGGAATTAACGCGGGGTCAGTCTTCTGACATTT 84 | GACATACATTTAAGCAGATG 85 | >Rosalind_1127 86 | CCGTCGATCATGGGTTGAGACTCACATGTAGCACACCTTAAACGACCTCAGGCAAATTCA 87 | CCTGCAAAGAATCGGCGCACCG 88 | >Rosalind_1711 89 | AGGCTTGGACTAAAGCACATTCACACCGTGTAGGACGAAAAGTTGCCCGGATATCAGTAA 90 | GCAGCCAGCTTTTCATGGGGTACGGCAG 91 | >Rosalind_5566 92 | ACGCTCGTCACTCTCCTATTGTAGGGTATGGGCGTCTACAAGTTTAAAGCTATAGCTACT 93 | CTTTGGCTTTAGCCGGTGACGGCATGTTTCGTAGT 94 | >Rosalind_0384 95 | GCTCTATTCGGCTTCCTGCGGGGAACAACGCAATTCGCACGAGACACCAGTTGGCACCGC 96 | TCTTGAACTATGCACGGGACATGTGGTCCTCATCGTT 97 | >Rosalind_4682 98 | CTGGTAATTTAAGGACCTCCGCGACGAGTTTAGTCATGTGGAAAAAAGTTCGGCTTAGGG 99 | AACGTAACAGAGCTATGACCCAGGACGGGGAGT 100 | >Rosalind_1211 101 | GATGAAGATTTGGAACGTGTGAGTTCGATAAATAGACTCTGCCGGCTAAATAGTAAATCA 102 | TCACTCGCGAAAACATCAGCTCCCGTTCTACGCTGCTAGT 103 | >Rosalind_7944 104 | GTGGCTACGATCTTTCGCACCCAGTTGATCACTCTACACTCGATCGAACTACCTGCTTCT 105 | CCGCCAGCAGCTGGACTGCTCCGTCAGCCCT 106 | >Rosalind_3838 107 | TAACGCGGTGGTCGGCATACGTTATGTTCAGGCAGGGACCTTTACTTATGGATAATGTGC 108 | AATTTCGGCAATGTCTGCACCC 109 | >Rosalind_7737 110 | TTCTCTCCGATTGGATGTAGCCACTACATGAACACCTGTCCTGGTGAGTCCAGAATCCGT 111 | CCTGACCTCCGAATCAGATTTCCCAGCTCG 112 | >Rosalind_3517 113 | GAATTCAGGCTGTAATACTGGTCTTCTTCCCACTTAGCGCTACGTTCACGTGAATTGGGC 114 | TTTTTTCGGAAGAGACAGGACGGGCCGGATGGACGAG 115 | >Rosalind_7965 116 | ACCTTGATGCTATGCAAGGCGGTTGCAGACCGCTCGAGCACTTATTCGTCACGGATCCGC 117 | ATATACCTAGCCGATCCTGGTGCGGACTTCCCGCTA 118 | >Rosalind_2950 119 | CTTAAAGGCTCCCGTGGTCGGTGCTGCTAAACGCCCCATCTCTTGCTGCCCGGTCCCCTA 120 | TATGGGGTTCATTTACTAGAGCCTG 121 | >Rosalind_8010 122 | AACCGTAAATGGTCTTTTAATGGGAGAGCACAGGACCGAAGAAGTTTTTTTGGCGCAACT 123 | CGTGGCGTCTTTTGAACCGAACGGTCGCATTTGT 124 | >Rosalind_0757 125 | ACAGCACATCAAGGCTAGTCGGTAACAAGACGGCACCCCCCATTGCTCCGTATCTCAAAC 126 | GTTCTCTTGTTTGAGGGGGTCCCCTTATCATAT 127 | >Rosalind_1008 128 | GCACTAATCATACATAAGTGAACTTGCACTCGCTAAACGGCCCAAACTAAAAGTATGCAC 129 | ATAGGAGACCTACTGGATACGTGTTGGGGG 130 | >Rosalind_4509 131 | ATCTCCTTTGCTCAAGGAGCCAAGTGACCTAATCAGGGACTTCGTTGGGATATTCTACGA 132 | GGAAGGTAACTCTAATAAGTC 133 | >Rosalind_2069 134 | CCGGCTTATACCTAGCGTTTTAACCGGTTGACGCGATGCCAAGGGTAGCCGGGGGGTCTT 135 | GTCCGGTAGCGGTACGGTTAGGACTCG 136 | >Rosalind_2662 137 | CTACATATTACTCCTATGAGTTTTAATTTTAGCCCGTTATACATTTATTCGAGACGGTAG 138 | TGTGGGGTGTTAACGAGATATCTATGATGGTCCTTTATG 139 | >Rosalind_5554 140 | GCGCGTCCGTCACTCCCGCGTACCTTACTAGAGGTTACCTAGTTTAGTACTACAGTGCAT 141 | TGAGTAATAAGAACCGGCGAATGTAGGGAGCGCTGCTG 142 | >Rosalind_6786 143 | GCTTACCAGTGGCAACCGTAGTTACCGCCTAAGAATAGCGTGTTTATACGAACCTGCACC 144 | TCTAGTACCTTTACTCGATTTCTCAC 145 | >Rosalind_7850 146 | TGTATACGGCTACAGGCCGCAACAGCGGACTCGTATTGATATCTTTGCGGGTACGAGACG 147 | GTTTCATGACATTTCGGCAA 148 | >Rosalind_7491 149 | CGGTATAGACACCTGAATTAACTTAGTCAATCCGGCACCCGCAATGTCTAGTCTCTGGCA 150 | CTACGGGGATAGGCTGTAAGCCACCC 151 | >Rosalind_2149 152 | TCGTATGGGATAGGGCAGGCCCAGGAATACTCGCCCGCATTAGACTCATACAGTTGGCCG 153 | GATTCTGCCAAATAGAAATAGAGAAAGATGT 154 | >Rosalind_5749 155 | ATATCATCGAACCTGGAAAACGGCTCACTTGCAGTAAAGTGTTACCAACGACCCTGTTGC 156 | GACACTTCGTTGCGGCTAGTGAAAAACATGACA 157 | >Rosalind_2498 158 | AGGCACCCGGGGAACTTCTGAACAGAACTACAGGAACTCTTCTCAGAAGTACCGCGGAGC 159 | GTAGCTGAGAAGGGGGTCACTTTGGTCGATATTAAATAGT 160 | >Rosalind_4920 161 | GATCTCTCCTGGTCTCTGCGAGTCGTGTTGTAGGTTCTAAGTTCTGGCAATGGCGTACCG 162 | TATTGCCTTGGGCCCCTCATCACACGCCATCGCTCAC 163 | >Rosalind_0705 164 | CGTCACTAAACAAAGTGTCACCAGGTATTGGCCGTCTAGTTGTCCTCGACTATTATCCGA 165 | AACGACACACCGGGTGTGCGTCGAACCAGTA 166 | >Rosalind_6308 167 | GCGCTTACCCCACGCTATTTTGGAGGGACGATTACTAGTGAAGATATGCTGGGAGCCGCA 168 | GTAACGTCTCGCGAGTGCCGATCTT 169 | >Rosalind_9053 170 | TCACCAAGCTAACACTTGCGGCTTGTTTAGTCTAAATAACGGGTCGCGAGCGCTAATAGG 171 | CAAGTACTCGATTCTGCCTCCAGGGTTGC 172 | >Rosalind_5839 173 | CCCGGTTATTTCGCGTCCGGCGAGTCACGGAGCCAACATCCCGATGAGTGACTATCCCGG 174 | AGCGATTGAGTATAGTACAAAGTTGCCACTCCTGGAT 175 | >Rosalind_9137 176 | CTATCCCGCGAGATAATTCAAACCTGAAACCGACCGGCCACGTTAGCAATCGGAGTGGGT 177 | GGATTCAGCCGATATGATCGGTTTTTCCG 178 | >Rosalind_9717 179 | ACTATAAGTCGTCACTGAGGTCTATAGAACGGTCATCCTCGGACGTAAAGAACGGAACGG 180 | TGTGCACTGGACTTAAGGGTGATTATCTA 181 | >Rosalind_0457 182 | TAGCAGTCCTAGGCCTGCCGCGGCAACGTGTGAGGTGATTAGGAATAACCCACAGCTCAA 183 | CGTCGGGGTCTACAACGGTTGCTTCA 184 | >Rosalind_7084 185 | ATAAAAGCAATGGAACGTAACCCGATATAGGGTTAATGGCGGCTGAGTGGTCTTGGAGAG 186 | AGCAGCGATCCATCTTGGCT 187 | >Rosalind_4023 188 | GTACGCACTGTTGGGGGGATAACAAACGATGTCCACATAGAAACCTTCGTAGAGAAGTAT 189 | GAGTCGATCGAGTGTAGACCGCCGTGTTG 190 | >Rosalind_7164 191 | AACAGCCCAAAGGGTTGCCGGTAGAACTAAGGAAATTACGATAGCCCATGACTCCATGCC 192 | AACAGTCGAACTCAGGTAGCTATGAGATCGAGTGCCA 193 | >Rosalind_7827 194 | AAGTACCAGGTCACCGGAGCTCGTGTCGGCGACAGCCGATAAAAAACCTCTGTGTCCGAA 195 | GACTGCAGGTTAGTATCTGCAAGACGTCT 196 | >Rosalind_8335 197 | CCCACTGCTTCCTGGGAAGGATCGGTTCGGGATCAGTGTATCGAACGCCTTCGACCGAGA 198 | AAACTCCAATTGGGGCGCTCTATATTTGGTTAATTT 199 | >Rosalind_0364 200 | TTCTATGATGGGAGGTTCAAATATACCAGGGGTTACCCTAGAGTTGTCCTCTGATACCCC 201 | AGTCCACCTATACGCTCGGGTGTGCCTGATCGAAT 202 | >Rosalind_5937 203 | TAGTTGCATAACCCACCCTACAATGTCGGTGCCTGAAAATGCCCGTGGGATTCATAGCTC 204 | AGGGCGGGAGATATCTTCGTGTAAGGCCT 205 | >Rosalind_4368 206 | CTGCTTGGGAAGTTCAGTCCGGATGACATAGCCGTAATGATGGTCCCGCAGAGAACTATC 207 | TCGTTACTGGGCCTAAGCGGGCATGAGCGTATAAAC 208 | >Rosalind_0745 209 | TGGACGTCCTATGGGGCATTTATCACAGCGTCATTGGCCACGCCCGTAACAGTGGCAAGG 210 | ACAAAGACCTAAGGGGCAATTAGCTTTTTATTTGTT 211 | >Rosalind_0996 212 | TCTGGGTGATGTTGCCCTTAGCATTAAATAGAATCCTTAACTACTTGACCCGTCGGAATC 213 | TCTCCAGAGGTCGACCACGGTATGGCGAAAAAGACGTG 214 | >Rosalind_5131 215 | CAAATATATCCTCAAGTAAAGACGCATCCAGATCTACTCGTAAGACTCGCACCCCCCGAC 216 | CCGGGATAGGCTTTCGTAGCGCGTCTAA 217 | >Rosalind_3322 218 | AACCATGATTGATCACTGTCGAAAAAGGTCTTTGCAGCTTTACTCACGGTTCCTGGTTAA 219 | GTCATGCCTCACCTCCACGCGCTTGGACGCACTCTC 220 | >Rosalind_5321 221 | CGGCGAACACCTGCCTCCGACTCCTTGTTCCAGGTGCTTGCCGGCTGACAAGCACGGAGG 222 | CACTATGCTGGGCGGAGGATCACT 223 | >Rosalind_6081 224 | TGCGTACTAAACACATTCTAAAGCGTGCATGCTACGTATGGGGACAGCGCAACAGTGACA 225 | GCTACCTTATCCGGACAGTCCTAGA 226 | >Rosalind_4767 227 | GAGTAGTGGGCATACCACGTGAGGGGTGATCTCCGGAAGTTAGTAGTAGGCCCCTAATAA 228 | GTTTGTGCCGGCGGCCAGGTCA 229 | >Rosalind_0260 230 | GACGGTTTGTCTAGAAATCAGCCTGGTGCTCCCGTGGGTCTGACGAAATCCGAAGATAAT 231 | CACTTTTGAGAGCACGGGCCA 232 | >Rosalind_5121 233 | CTGCCAATCGGGTATGAGTAGGCACTTAGGATTCCATACCTCTAACCGCAGCTGCGTTTG 234 | ATGCTTCGGTCATGTTCTAA 235 | >Rosalind_6703 236 | GTTGTACCCGTATGCTTGCAGGAAGTACCTGGCACCCACTTGTCGAGAATATAAGTGGTG 237 | ACCTAGGATGGTCAGCAGGTGGCACGGACGGGGGG 238 | >Rosalind_4516 239 | TTATAAATCCTCCCCTCGTTAGGCTGTGCAGGTATAAACGACTTATTCGTGCGATACAAG 240 | TGGCATGACGGTAAGCATCACCCGTGAGGA 241 | >Rosalind_9615 242 | ACGGCTAGGTCGAGGTCAGCTTAAGCTTTGCGCGGATGTGCTCCACAATTGTAAGCTTGG 243 | CCAACCGACAGTCCTGATTCGTTTC 244 | >Rosalind_6188 245 | ATAAATTATGTGTCGACGTGCTGCCTCTTTCAAATGCAGGAAGTAGCCGATACAAAGGGC 246 | AGGACCCACCAGGGTCTCGTTCTGATTGGTTGATGCCC 247 | >Rosalind_7538 248 | GCGTTACGGAGTGTCGTTAGGAATCTAAGGCATCAAAATTAGGGGGCTCTCCACTATATT 249 | AGTAGGCCTGCGGGAGGTGAATAGGCGCGGGAC 250 | >Rosalind_1712 251 | CATGTCATAGTAACTGCCTAAAAAAAACGTGATAGAGCAGAACGTTTACCCCTGCGAACT 252 | CGGAAACTGCAACTATGTTTCGAAGTCGGTTACGAT 253 | >Rosalind_9515 254 | TGCGTGTTATAGCGCCTTCCCGGGAAGCGACATTGCGCATAAAACGCAGGGTGATGAGCC 255 | GAAGCCTTTGCTACAGGCAGGGACCAGGT 256 | >Rosalind_1437 257 | AATTGCACGTAACCCGACCCTGACTCGCTCTCACTATCTATGGTAGGGGTGAGGGTATTG 258 | ATCAAGGTAGGCATAGGGAGGTTTATGAG 259 | >Rosalind_2699 260 | CGTGCTGAGAAGTAGCCTGGTAAAAGCTGGGTGGTGCTAATAGGGCTACTTTAGTTGGCT 261 | AAAATTGGCATATTGATTCGCCTTGGCCC 262 | >Rosalind_1009 263 | CTCCTCGTTCGAATCCGCCAGTTTGTAGGTTCTAGGACACGTCCAGGAGTGGTCAAACCT 264 | ACCCAGCTAAATGGCTCCATTGGA 265 | >Rosalind_4672 266 | CCTACGTAACGCAAGGAGAGGAAGGTTTAAATCCGAATCGCGGAGATCAGAGCGTTGTGA 267 | AGCATCGCTAAATCATGAAAACAATGCAGGCTC 268 | >Rosalind_9160 269 | TCCCGTAGCACGCCTCTCGGCCTCACAAAGACGGCACGTCCACGACAACTCCTGACCGAG 270 | TCCCGCTTAGCCGGTTGCTCGCTTAGGCGGCATATT 271 | >Rosalind_9397 272 | CTTCAAATCGATCGCTTACCCCCCTTTAGAACTCCAATGTCGAACTAAATTAGCGCTAGA 273 | AACGTATGACGCATACCCTGTCCCCATGTCAGG 274 | >Rosalind_9551 275 | AAGAGCGAAACTATCTAATCGTGAGGACACCGTGTCTTCTCGCGAGATCTCAAGCCGTTC 276 | CACAGATGTCCACTGGTTTCCGCCCTCCAAACAACCCC 277 | >Rosalind_1481 278 | GAGAATGGTGTTAGTTATGCCGTTAGCGAACTACAGTGTTATAGCCGGAAGATATCCGAT 279 | TTAGGGGTATCAGCCGGCACGTATAATGTATTC 280 | >Rosalind_1690 281 | ACATGTTAGTTCACCCACTCGAGTGGGGATGTATTTCGACCCGATCGGGTCCTGAGCGTC 282 | GTCCGACAAAGCATAAATCA 283 | >Rosalind_1611 284 | CGCATTCTCCCGCGGGTGCCCGTGTCTTACTAAAAACACGTATCCGTTTCATTAATATCG 285 | TTCACTACTATTATGTACCGAAGCTCTT 286 | >Rosalind_9311 287 | TGGTTATCGGATAGTATTAGATGGTCGAGCAGTGGTTAGTGTTATACCCAATAACCCCCG 288 | GACGACCTAGCATGTTGGGGGGGGTATTTCC 289 | >Rosalind_8666 290 | ACAGGTCTAATAAAAGTTGGTGATCTACGGGGCAATGAATGCGGAATATTGCCTGCATTA 291 | ACTAGGCTTTGATACTGGATGGTCTCTGTCTTGCGCCA 292 | >Rosalind_3514 293 | TCCTATGAGAAGAATTCAGCCATCTGTTGAACGGCTTCATTTTGGTCCGCGGGCTTAAAT 294 | ATCACTTCCATCATATCATC 295 | >Rosalind_9989 296 | GTAGGGTGCGCACTCGAATCGCTAAAGGTATGCATACTGAAGCAGCTTCCCTAAGAGATA 297 | GAGCAACCATCCGGCTATGTAGTGAATCGTAATC 298 | >Rosalind_8668 299 | TAAAAACGAGACGCAACCCGCGGGGTTGCTTCGGCGAAGATGTACAATGCGGTGGGGAGG 300 | CTTATTACAGGTCCGCGGAATCCAC 301 | -------------------------------------------------------------------------------- /datasets/rosalind_hamm.txt: -------------------------------------------------------------------------------- 1 | CATTATAACCCGATCGGTTCTAGTCTAGTCTCAGGAATACCACATAGGTCCCCAGGTGACCCGTTGCTAGTAGGAAAAAGGAGAGGAGGGGAATACGTAGACTGTCCTTAAGAATTGTTTGTCTTCCTTGGACTGTTTTTTACGGTCCCTTGTCCAGAGGCAATTCCCCATCGCGAGCAGCCTGTACGCAAGCCGATCTAATGTAGCTAAAAAGGCAGTACGCTTAACTTCCTAACGTCATCGGTGGTTGATAAAAGGACGGGGCTCTTAAAACTGCCAGTACATTCACCCAGCGGCGCGTACCACTCAAGACTGTCTCGATGAGTTAACCTCACGTCGGACCCATCGCGACCACTGGAAACTCAGTCATCTAGCAGACCGCGCGTGTAGCTGATGTCCGAATTCTAGGTGTATCAAAAGGGTTGTAATACGGGGGCCACCTACCACGACTAAAATGAGCCGTTGTCGCGCGCTTTCTCCTGGAGTCCTGGGCATGGAATTTAGGAGCACAGGATTTCTACGTCCCAGCCTTGTCTGCACTAGAACACTATATAACGATGAAAATACGTGGCACTTCCTCGCTAAAGTTTCCAGCCTTGACAATGCTCAAGTGACTCCAGATGCGCGAATGAAGCCCTATACATATATGGGGTAAGTCCTAGCCCACGTGGAGCTGCCTTATAGTTTGAGGGCGGTCACCCTGACCAGTGTAAGCTTTCTGCAACCACTATACAAGCGTTCTTATATCTGGCAATAGCCATCGATCTTGCTATTGTCGGCAACGTGCGCACTGTTCGACATAAGTTCAAAATGCTTCGCATCAAGAGTGAATACGCTGATCCACTGTCGATCAATGTCATCGTAAGTGGCACGCTAGTGGAGAAACACCCTTCTCGTGCCATATAGGCTCCCAGATCAGTACCGATCGCAACCTAGATTGTGGCGAGAGCAGGCCCGAGTAGCTGGGCATAGGCCCAA 2 | TTTTGAAATGTGAACTATGGTCGTCGTGTCTAACCTCTACCATATGTGACCTGAAAGGACCCTAGACATGGGGTAAGTAGGGTATCAACTGACGAATTCCTCTTAAACTAACGCTTACCGGAGTTCGAGACCCGCTATGGTATGGTACCGGAGCGAATCGTATCTTCCCTGCACGTGTGGCTTGGCCCCGCGACAATATTATGTTACTTATATGCGGTACCTAGTAACCTAATAACGGTGTGCTTAGGTTAAAAAAGGACGGTATTCTTCAATATTCTATCACTTATACCTAGAGGAGTTTACCGTTAAATAAGGTGTAGGATGTTCAACCAGCAGTTTTCCCCTCCTGCAACACAGCCAGCTCTTTCGCATGGTCCTCCAGGTCGGGGTGGGACCACCTAAGATTACGTACAGATCTAGAACATTCATACGTGCAACTCATAGTAGAACTTAATGAAGTTAATGTCGGGCTCTTCGTTCGAGTCACTCGACAATGGAATTAGGGAACAACGTATTGCTTCGCGCCTGTCATGTTTGCCAAATAAAGTAATGCCACTTTGCCGATAGGACGCACTTCCACTCACTCATGGACTGCCTTGACCTTGATAGATGAATGCGAGATACTGGAAGCGTGGTTTATTTTAGTATACGGTGACCCGTAGCGAAGACCAAGGCGGTGTAAAGTTTGTGGCAGAGCAGCCAGTACAAAGTATGACTTGTTTTACTACCAGCGAGGCTTTGCTACGCCACCCAAGCGCGTTCCTTCTGGAAAGGTCGATGCCCGGAGGCGATGACCGAAAAAAGTGCGTGTCTTGTCGTATCCAGGGCGCATACATCACTCGCGTGTGGATGGAAGGGGACGTATCTGTATCGTTATTCCGCCAAAAACCATCGCGCTCCATATGTATGTCCTGATCAGGACAGATGGGTTTGTTGCTGGTCGCAAGCGTGGGCTCGGTAAGGCGAGCATTGGGCGAT -------------------------------------------------------------------------------- /datasets/rosalind_lexf.txt: -------------------------------------------------------------------------------- 1 | P F Y Q Z 2 | 4 -------------------------------------------------------------------------------- /datasets/rosalind_lexv.txt: -------------------------------------------------------------------------------- 1 | R Q H D W K X V A B O 2 | 4 3 | -------------------------------------------------------------------------------- /datasets/rosalind_long.txt: -------------------------------------------------------------------------------- 1 | AAAGACACTTATAGGTCCCCTGCCCAGGAGGGCATGAATCCCTAGGTATAGTCTTTCCATTCTTCTGTTTACGATGAATCATAGTTATGACAATTGTCCGTATTCAGCTGGATATCCTTAACCGGGGGAAAGTTTATCTCGTATTACGTTGACGTCTGTTATGTAGAAATCCGAGTCTTAAATGTCGCATACACTCGCGTGCTAAGGGACGACAGCGTGTCGGCCGTGCTAATACCCAACAGGCGTTACCCCTCACAAAGCGTACAGGTTGGTGCTGTATTCAGGATCCGTGAGGCCGTAGCATGGTTCACAAGCCTTGTGTTAACGTGAAGTCGCACACGAGCCTGCTCGTCTTCGAGGCCCTGTGCGATTAGCGCAGCTATTCATCCATCCTGTTTACAGTACCGGTGGAGGCCGAGGACCATATTACTGGCTGGGTTTCGTATAGCCCCTTATCCGTGTGCATTATTCGGTTCTGACGGTTCTCAGATAGGCCGCGGGACTCGAGACATAATAGATTACCACGTACTTCCGGCAGCCGCAAAGTTCCCTCCAGCAGCAAGCTATTGCTCATGGCATATCGCGGCTCAGCGAGTGGGTTCTCCGACCAAACAAACTCCAGTAAATGATTCTCCACGCAAATAAACAGTATCTCTGGGATATCGGTATCTAAATTAGAGTCATTCGCATTATATCAGTCGCATCCCTCTGGGCAAGCATCTGTCCAATAGTACCGCGACCGGCAAGTCCTCAGCGTCGTGGGTCGACTCTCTGTCGAAATACCACCGGCGACACCACCTTCGACTGTGACACTGTAGAATGAGGCCGAAACACACAATCAACCTGTCGCGCGCCAGTCACGTTTTCTAACCGTGTACGACATGGGCGGGTATAGGGCAATTACAGGGCGCTGGTCATGTGTCCTCGTGATTTCGCTTACAAAAAGTTGTCGGTCATCCGCCTCTTACAAGAGGCCTATAACCGACTATCCAATATGGCG 2 | ATAATTTTCTCGTGTATTGAGTTATGCGGCTGGTAATTTACGTCTACCATCAAGCACAGCGCCGCGCACTTCCCATATCCTTGTTTGGTAGAACATTCCTCATTGATCTCGACAGTTAACTAGCAAAGGACTAGTAGGTGATTTGGGCTCGGGACGCGCCAGGCTTCTACGACACCGTAGGATGCTCGAGGCCCCATAGGCTGAACCCCCCGGGCCGTTATACTAATAGGAACCGCGACTGATGTTTAGGGAGCCGTCTCTGAATAGCGAGCGGGACCCTCCAATTATTTTCGTGAAAGACTATCCGAAATACGACGGGTTACTTATATGGTCTATTGAGACCTCGGAATTTTTTACAGTTTCTCATGAGTATTGCCTTCTACGCTAGAACAGTAATGACACTAGTATACCTGTCACCAGAGCGCTAAAACCGGTGAACCACTACACTTTGAACTATATGGCTAGTCCGCATGACCAGCTCTGACAGGGTAGCTCGTTTGTGCTGATGTGAAAGCGACTGGCAGATTTGACGCGGTCTGAAACTGGCCCATGCTCGGAAGGCAACCCTACTGTGGGGCCAGATTTTTCCAGTTTTCACTTTTGATTCATGAAGTCAGCCTCATAACTGTGTACGGGTGGGGGTGCTTTACATTTCCCGAAGTTATCGAAGCTGCAGTCTATATAAATGTTAGTTTGTCTTGAAATAACCGCCTTGCGTGTTCCGTGTCATATGTGAGGCATCCTATGACGGCCAGCACCACTGTGGATCGCGCTAGCGCCTTGAGAATCAGGCCCACGTCCCCACTGGGGAACTTTCCTCCGATAGATAATCGACACGCGGGTAGCCGGGTACTTCAGCCGATCAAGTAACCGCCTGCATCCACTAAGGAAGGCAGTGAAGCAAACTAGCAATGAAGGGGGATGCATAAGAGGTCGGTTCCGAATGTGCAGTTAAGATCAGACTACGGCCGAACCATGTTCAGCTGACCGAACATAAGCA 3 | GCTCTACGCCTTCGGTAAGTCCGCACATGTCAGCCAGTCAAAAACGTCGTTGTAATCAAACGGTAACCCGTATGACTACATTCCATAAGTAAAGCTGCGCTGGCAATTATAGATAATAGACCGGTGCATTACATTGGCTTGTATGTGTGTGTCGTATCTTGTCGATTCGTCTGAAAGGCAGCTAATCAGCTCCCCACAAAGATATCCCGATATGGGGCCAAGGATAGGCCGGCGTGGAGACACGAACCGTGAGCTAAGAGCAAACTCCGTATATTAGATCGTGACGAGTCGGATACGAGTTGATGAGCGGTGTTCTCGCACACCACTTGAGGGTGATTGCTCACACAGGTCCCCAGTAGAATGGGAGACGTCTTTACCTCCCTCGTTCACTTCGGGGCAGACACCTTATAAGCCCACGGAGCTTTAGGTTCCGCCGCTTTAATTTTCTCTCACATGGGAAGCTCCTCGCCCGACAATAGGTCTATCTTCTTTCAGCGCTTGCCCATCCTACGCAGCTATTAGACTATCGTGTCGTAAAAGTGTAAATGGGACCGTCATGACCGTCAGTGATGAACCAGTCACACCGCAGAGGAAATCGAGTTAACGGGGGGATAGAACAAAGAAGATGTCTTATTTCCTCGGAGAATCTGCAACGCAACTATTGACGCGGCCGAGGATGCGGTTAAATTCATCATTTTGATAACCGAGAAAGCCGATCATTATACTTGTGCCGACCGGATATATTAGATGTGGCGCTCGTCACGTTGCCCAGCCACAGCATCACGTACTGATGACAATTCAATGTCGTGTAAACGGTCCAGCACTCCCCTTCTCTGGAATGCTGGGATAGTCCACGACCCATAGAAAAAACCGGGTGACCCCACGCGGGACAGACACATCCCAATAACACTCACTGCTCATGCTTATCAGTCCCTATTTAAATAAAATGGGCCTGTGTGGCACGACACTCCTATTCGAGGTACGACACGGGTCACGGTAA 4 | TTGGGGGGCTATGATTAGGCTTGATGGAGAAAGAGCAATAGCCGGTTTTGTCCTTAAACCGGACAGGAAGCTCTGGGCTTATCAGCAACACTGATGTATGTTAGGGACTTACGCTAAGGTCGTCGGTCGACCCAGCATACACCCATAATAACGCTAACCCGAACCCGTCCGACCCAACTAGCTGTTCTAGTCGCGCGCCGGCTCGGGGACCGGCATAATAGATTTTCGGTCTTTGGTAATTTTACCAGCTTTCGAACTAGCATCTTCCATACTGAATGAGGTAAATTGAGGTCCGGAAGGCGTGAAACCTGGACGTAGTATCGCCTGTGTAGGAATCTATTGCTCCCATCGAAGGTGAGGGGATGAGAAGGTATAAGAAGTTAATGGTCGGGCCGATAAGAAATTATCAGTTTTAGCTCACCACCTTAAAGATACTTTTCTGGTTTGTCACGTCTGCATAGCGTATGGTTACTAGCGAGGTGTTAGTCCTGCTCTTCGAATGTTAGCATGCCCTTTACCTTGGACGGCCAGCGTAGTCCGTTCTCCCACCCACTAGAGGCTGAATTTTATGTGCCTAAAAGTAATTGTTGCCGTTCAGAGGGCCGGCTTCGGACAAGACTAGCTCCATGAGATGTCTCGGACGCGCATGCGGGTTGACGTATAGCGATGTTCTCGCACATTATTAACCCGTGGACCAGAGATAGATACACTGTTAGAGTTACCAAATTTAATACAAGCCAGGGCCACCCACTTCCGTCGGAGGGACATGTTGTTAGTTCAATAGGAATGGGGAGAGATGCAACTTCACTAACTGGTTACGCGAGATTCCCCACTCCGGCTGGCTTCTCGAGTCCCAACCCTGCGGAAGCTCTACGCCAATATAATTCAGGCAATGCTCCCAATCCGACCCTCGCAGCGCAGGTCTTAGTAAACAACTGCTACACTTGATGTAACGTGCGCGGTGTGATATTCTAGGCAATCATAATACAAGTTACCATTG 5 | TGTCCCCCCTCAACGGAATGGTCAGTTGTTGCCCGACGTTATTCAGGAGAACAGAGAGGTGGACAGATAGCTCTCTTGATGCACAGGTGGAGCTGTAGTGCGAATTCGCGCATTTAGACAGTATAAGGCTATGAAGTGACTAATCCGGCCACAAATCGTGAAGTGCTTACTCTCTAACGGTGAGGGACTAGGTAGCATACACGCAATGCGTACTTCACAGGTATCGCGTCAGAGGGTCAACTCGCCCCCTCACCTGGATCCCAGTGCACCGGCAGAGTAGGTACTGACTGCTAGAACTAGTCGCTTATATCGTCAAGTCCTCGGTTGACGGCTCAGAGGCGGCGGATACGGGTGCACGAGACCTTCCGGCTCGGATGGGATGGATCTGGTCCTGGTTTCTACTTGTCGCAGTTTTCGTCAAACATAAAGGCCAATAGGAACCCGAGATTTTTGAACTGGACGAGACATATCCAACCTGAATATACGGAACATGTTTTCCTCCGCAGAATACCCCATACATTAAGCGCGAAGTCAAGACGTAGGTTCTTCGGTTTGGCCCAATTCAGGCTACCCAGGACGTGCCTGGCGATTGGATAAACCCGATTTCGTGCTAGCCGGCGGTAAATGACACCTCCCATGAATACTACAAACCATGGGCTTCAGAGCAAACCACATTACGTTGGTTTGAGTAGCTTTCAAAGGCAGCGCTGGCAGTACAGAATTAGTAGTCGTGGGCTGCAGTGCAGAACCCCAAAATAGATCAGTACGGCCCAGGGTAAATAGGAGGTAATTTGGTTCTTGTACGGAATCCTGATTTAACGCTCGTTCTTGCGAAGCTCGATTTACGAAGCCGTGTTACAAAGACTCTTGGGGAAATAGTGAGATCAATATCATACACTGCATTTATTAGTCGCTGTGTGAAGTTATGCCTATGGGTCCCTCAAGTGAGCAAGTACAAGTAAAGTGCTCCCTCGTGTGTTCGTAGATACTTCTTCTGTGC 6 | GGATGATACCATAATTGATTAGGATGCCGCGACTATAGCTGTGAATGATGCTTAAGTGGACCCCTGAGCTTACGTGGCCCCCCCTTCGATTTGCTAGTCGTAGTTTCCTCCCAGGCGCCGCACGTAGGGCATTACACTCCATCAATTAGAGCCTCTGACTTATATGATTAATTTAAGCCGAGGGAATGCCTGTACCTTTGTCTCCCGCACATGGATGCGCTAATGGATTAGGCAGTAGCGTTAAAAAGCTGCCTGGACACCCCTCTGTTCTCGAGTGTAGCACTCCTGTAACAATTATCGGACTAGTAGCAGGGGTCTATCTCGTGCCGAAAGTTTCGAGATTCGTGTATTCCCCCGATCCTGCTGGGCTGCAAACTGTCCTTAGTCCTCGTCGCTTTACTACATCTTATTTCGCGTTATTTTTAAGGGCTAGAACGAACTTCAAGATAGTCAACGGGGGGGTTTCTTTCTCGCAGAAGAGCCCAGATGATGTACGACTACGTGTCACAAGACCAGTTTTTTAAACGCTGCGTCGAGGTACAACCAGAGCGCGTTACCACTGCTGTCCGCACCCTACTGATGTGCGCGGCCAGTTCAACGTGAGCGATAGTGCGACGGGCGCAACGAACTTTCGTTGTAACGTTTGGAGATTTTTACAATATTACCCGTTTCGTACGTCGTAGGGGTCCAGCTTTCCTGCCAATAAATTCGCATTAGGCACGGCAATAGATATGTCGTGATGATCCACACAAACAGAAAACACTTTGCTGAAAGAATTACCTAATCCTGTGCCTAGATATATGCTACCCGTGTTCATGTAAGGTGGGTTGCCTATGTTTGAGCGACTCTCGACTATCAGTACAGAAGCGTTGGTCGTCTGTCCTTGGGACTAAGTACATTGGACTCGCGTCTAACCCTGGGCGACTGCTCAAGAAAGTCGCGTCCTCACCTTCATTAGTATCAATCAGTCCACTTACAGGACACTTTTCATTATGAGATC 7 | TATAAGAACAGCAAGTAACGAGCCCCGATGCATTCGTCCAACGCACACTAAATACTCAGGGATCAGGAGACATGAAGTCCAAGGGCGCTGTTTATAACAGAAAGACCGTGCGTATAAACTGCGTTAAAACGGGTAGAAGACACCCATTGTGCATCTTCAAGTTTGGACAAGTTCTGCCCGCTAATTGGACACGAAAACTTGTAATTTCGTTGCAACACCTATGACCGACTGTATTGAGCGCGTTATTTACCTTGTCATGGGTCCAAATAATCAGGACAAGCATTCGTACGCGATAAATAGGCTCTATCTACCGTGCCCGCAAAGTGGCAGAGAATTGGCGATAGTAGAACGCGTTCAATGATAAGAAGTCCATAAGGGCCTAAGACAGAAGCGACGTTGTAATAGTACGAGATCGAACGAGAATGCTTCATACTGAGCGAACTGCAAAGAAGCCCCTATTCTGCCACGTTCAATACGGGTACGAAGGTACTGTACGCAACATTTCCACCCCCTTGGGAGGTCTTATATTGCTCCACCTCTCAATATGCTTGAGGGGCCTCGAAAGGTGATGAGGGATGGTGAAGCGTTCGGGTTTCGATCTTAATGTCAGCGGCTGAGATCGAATACAGGCGCGACCGCATGGTTCAGTGTCAAGTTGCGAGATTAGTGACTCCCGTTTCGGTGCCAGTCTTAGTCCCGTGAAGAACAGCGCCACGATTTCCCAGTCAGACTTCTTGAACCAGAGTTATAACCAAGAAGCATCCTTTAGGGTGTATACCCTCTACCGGTCTTGTTTACTCACTTTTCTGCTATTAGTGATACATTAGGATGCTGCCGACTTAAGCTTCATCTATTTCAGGCGTGCAGACTTCTCAATCTCCACCGGTGAGGCTCAACAGAGTTTTAGACTACTTGAGGGTTGTAGACGTGGATTCCCCTGGACGTGCTTGGGAACGACAGTGGATTAAGAGAGTTATCAATAGTAAGTTGTTATATGCGT 8 | GGCGCTGGTCATGTGTCCTCGTGATTTCGCTTACAAAAAGTTGTCGGTCATCCGCCTCTTACAAGAGGCCTATAACCGACTATCCAATATGGCGGCCTTTACTGTGCTCTGTCATCCCACCTCCCCGAGATCCGACTGTCCTAGTGAGCCCCGTCGGGCGTGGAAATTATCGCGCCTTCTAATCTAAGACTTGGCGCCGTCAGAGAGGACCTGATCAGGTGACCTTTTCCGCAAAAAATGTCTTCATGAGCTTTAGAAACCTCACTGATCTGCGCGCACGAAAAAGAGGACGTTGCGGGACTAGAAAAAATAAGAGTTCACAGGGCAGTGGCATGTTCCCGAGAAAATTGCCGGTTTATGCTTTGGCAGGTTGATGTTTTTTACTTGTGAGTCTCCCAGGTGATTGCACCGCTTGCAAACAGTATTTGTAAATCCAGCACAATACAGTGATAACCGAATAGTAGATGTTCCAATCAGAAGCGGCAGGTTGGGCCTGCCCCCCATCTAATCAGCCTTCCTTGGTACGGTGAGGCAACAAGGGTCATTAGGTCAGTACAATGCCACATCTCAGCCGAGCGGACAGCTCAAGCACACGGAGGCACCTTGACTGTCTCGGAGCATGCTCTAAAGTATCTTCGAAGACGCTCGGCGACTAGCGATTCGAACTGATTTAGTCGGCTTTAAGAAGCATGGCGGTCCTAGCGTAAGGTAGGCCCTGTGCTAAATAGCGGACACTTTTCCGCAGGGTGCGTCGCGAGTTGGATCATGAGTCTGACGTCTACCCGGATCGACACAATTACATAATAGCTCAGCCGGAATTATCGTTCATAGTCTAAGTAGGATCAAATCTGCGGGGAATACTTGTGCCCGATATTCATAATAAGCTTACTGCCCCCTGAGACCGTACGTTAGACAAGCCTGTGGTACACAGGGCGGGAGGCGGTCCTCTGGCCTCCAGCACTTAGTCTATTACCAGATGATTCACGAATTAACATGCCTG 9 | TGGAGACACCTGAGGGTGGAGGGTCTCGCCCCACGAGCTTCTGCTGGATTACTCCGTGTAATATAAAATCCTTGCACCACTTTAGATAAAGTTGGATTGTGTAAAGACATCTCGATTTGTTTTTCGGTATACATGTAGCGGGTACTAGAAGCACTAGTTCCCGTTGATACTCCAGTGCCGACCGCGTCGGTCCATTAATTACAATTGGCCTCGCGGCTCGCCATTAGTCATGAGGTGAGAACCCACAACTAGGCCGATATTCCCTTCTTTTGCGCGAGGCTCACGACCAATGGTGGCTTATCAGACATTTCCAGTCTCGATAGAAGCAGTCCGACCCAGGACTATCATTACCCCTCAGTGGGGTAGTAGAGTGCGCAATTTTGGTCAGTATCTCTAATTGATTTTTGGTCAGCTTGTGCAGATCAATACCCGGTTGCTATGCAGGCTAACATGGTGGCCCGGTAACGCCGCACCGCCTTGGCTGGGCCTTAAATTTGGGAGAGAAAAAACCTCCTGTCTTAAAGTCTCTCTACAGCTTGGACAACAACCGTTACCACGGACCCGTAAATGCTCCGTATGGAACGCACTTTTAGGACCGTTGGCGACATAGGAGCTAAATAAAATTACCTACGACGCGTACCTTCATCATTTCGGTGAAGCATGTTGATGGCCTACAAATGACGTACGTATATCTCAGTCATAAGATTGAGGGAAAACTTCACGAGCCTCCTCCAGCATCAACACCAAAGGGGTTTTACCGTGTAAAAATATATTTCAGATAGGGACCCCTGATGCCTCCAGACCCTGAGTACCGCAGAGACCACCTTAGGGCGTCGTCAGTGATGTCAGAGACAGGTGATCACGCAATCGTTATAATCGAAGAGCGATCTAATTTGGTTTATAATCCTTGTCCCCCCTCAACGGAATGGTCAGTTGTTGCCCGACGTTATTCAGGAGAACAGAGAGGTGGACAGATAGCTCTCTTGATGCACAGGTGGAG 10 | CACTAGCAGTAAACCGATAACTTCACAGGGAGGCAAAAACTAATCTTATAGTTGGACTTTAGGCCTTCTGCGTGCACGGAACAAACGAGAAACAAATTTATGAAGGCCCAATCTAACTCCGAAAGCGGATCAGTGGTCCGTTGTACAAGTACAATCCCGCCTGAATGCTCTGCAACTGCTTCCGGCGAGGTAGACCTCGCAGAGAAGTCTTGGCCCCATCTATGGGCAGAGTTGTCAGCTGCTATGCGCAGTGGTTAGGCTTAGATATATGCCTGGTCACCGGTGACTACCTAGTAGTAAAAAAGCTGAGCTAGAAGTGTGGGAGTATTCCGTGTGACAGGACCTTCCCCTGATTCTTGATAATGTGTCGACAACCGTCGAGGTTACTCCATTAGTTTAAGAAAAATGACCGATGATGAGAAATCAAGGCGGTCGCGACAGAGGTTATGGTACGGGATTTTGGCCCTCATCGGCGCGTATGTATCGCAACACTTGAACGATTGTGTTATAAAAGCCTGGGGCACTACCACACGGAGTTATCCTATAATATGCCCAAAGCTTGCAGACGCGGGAGCGCGTCCTTTCGGGGCACCTTGGAGACGTTCCGTGGGTTATGTACAGGGAGACCGAGTTACCACAATACGGTGGTCAGTGACACAGTCGGGACTTCACCCGGCTGTATAAAGGTTAGAGTAAGCGAATGCACTAGTACGGGTGAGGCTCGCGCATTTCAATTATCATGAAGAACCTAGGGTCCTCTAGATATCGCAGATCATCCTTGTGTGATCGCCGGTTTCCGCAACTACCGCAACTTGGTCCTGACCCTCTTGGCGAACTTCAATACTGACCCGGCCGCAACGCTTACTCAAGGCTTCAAACACTTCAGATCTCCGATTTAGTGTTAATAGTGTAGGGAAGACTTTTTGAATCGGATTGCGGTCGGAATCATCCGTCAGGATACTCCGCGGGTACGGTCATCGGTTCTGCAAGGGAGTACGAA 11 | CCCAGCCACAGCATCACGTACTGATGACAATTCAATGTCGTGTAAACGGTCCAGCACTCCCCTTCTCTGGAATGCTGGGATAGTCCACGACCCATAGAAAAAACCGGGTGACCCCACGCGGGACAGACACATCCCAATAACACTCACTGCTCATGCTTATCAGTCCCTATTTAAATAAAATGGGCCTGTGTGGCACGACACTCCTATTCGAGGTACGACACGGGTCACGGTAATTCATGTACAATTAGAGGGTGTTAGACTCGCATGTATGACCGGAGAGCTAGGGCGCGTGTGCCAACTAACCACTAACCTCTGGGTAACTATCCAGAACCCCTCCAATCGGACGTCGAAGTTGCGCTGACGTCGGTTATCGGTCACACCGATCAGACGATATTCTACATAGTGCGGTTTATAGTAATTCCCTAGACATACACGCATACTTTCTGCCCCCTCATGAGGTATTTCAAGCACGCGTTTCGTGTGACCGGGCCTGAGTGGTCGATCAAACTGGTTGCCCTAGAGATCGGCTACATAAAATCTGCGGGTAGGTCGGCGGGTTTCTGTGTGTCACGCACAACCTCAGAAATAAAAGCTAGGGTATAGAAACTGCAAATACAAAAGGACGACAAGGGCGTGAGCAAACGTACATTCTAGCCTCAGCAGGGCTCGTGGATGAGCTGCCCTATGTATGTATTTGGGTACTACAGTCCCGCGTATTGAAGAATATACCATCTTCTTGTGAAGCTTATTTTCTCTTCCCTTATCAAGAACGAGCGCTTGCATCCAATATCCTTGCAATATCCACCAATTGCTACCTAAGAGGTCGGATGTGAGTGCGTGCGGCTTCTCCAGGACTGGGTAAAACTAACTACATACACCGCATACAGTAGTTCCCCGCGACAACGCGCTCCTGCGTCAATATGGGGTGTTTACGTACAACTGAGCCGTAGAGCACTTTCGCAGTAGGGATACCCACGCATGTACCCCGCTCTCGAGCGGC 12 | AACTGGTCTGGTCATTACGTTCCCCCTCGGGTGCGGGCTGAGGGGTTCCGATCTTGACTCATCGCGCTCGATCCCTGGTAGGACATACTAGGTACGTTGGGTGCGGGAATGCGTAGCAATCTTTCCTTAGGGACGACTGTAGACGTACCCTCACTCCCAACAGACATCAATAAGAATAGTGGCAACGCAGTACGCTATATGGATGCACAAATGAGAGAAGTTAAAAGTGCTTCCGCTCAGCCAGACGCGGGCGGACGCGATTCAATCTTACATCGGAACCCTAGTGACGAGTCACCCCGTTCTGGCTTTACTTAGTCTTGTGGCGGGCAAAGTGATTGTCTAACAATTAACAACCCCGCTGGCGAGGGGGATGATACCATAATTGATTAGGATGCCGCGACTATAGCTGTGAATGATGCTTAAGTGGACCCCTGAGCTTACGTGGCCCCCCCTTCGATTTGCTAGTCGTAGTTTCCTCCCAGGCGCCGCACGTAGGGCATTACACTCCATCAATTAGAGCCTCTGACTTATATGATTAATTTAAGCCGAGGGAATGCCTGTACCTTTGTCTCCCGCACATGGATGCGCTAATGGATTAGGCAGTAGCGTTAAAAAGCTGCCTGGACACCCCTCTGTTCTCGAGTGTAGCACTCCTGTAACAATTATCGGACTAGTAGCAGGGGTCTATCTCGTGCCGAAAGTTTCGAGATTCGTGTATTCCCCCGATCCTGCTGGGCTGCAAACTGTCCTTAGTCCTCGTCGCTTTACTACATCTTATTTCGCGTTATTTTTAAGGGCTAGAACGAACTTCAAGATAGTCAACGGGGGGGTTTCTTTCTCGCAGAAGAGCCCAGATGATGTACGACTACGTGTCACAAGACCAGTTTTTTAAACGCTGCGTCGAGGTACAACCAGAGCGCGTTACCACTGCTGTCCGCACCCTACTGATGTGCGCGGCCAGTTCAACGTGAGCGATAGTGCGACGGGCGCAACGAACTTT 13 | CTTGCAAACAGTATTTGTAAATCCAGCACAATACAGTGATAACCGAATAGTAGATGTTCCAATCAGAAGCGGCAGGTTGGGCCTGCCCCCCATCTAATCAGCCTTCCTTGGTACGGTGAGGCAACAAGGGTCATTAGGTCAGTACAATGCCACATCTCAGCCGAGCGGACAGCTCAAGCACACGGAGGCACCTTGACTGTCTCGGAGCATGCTCTAAAGTATCTTCGAAGACGCTCGGCGACTAGCGATTCGAACTGATTTAGTCGGCTTTAAGAAGCATGGCGGTCCTAGCGTAAGGTAGGCCCTGTGCTAAATAGCGGACACTTTTCCGCAGGGTGCGTCGCGAGTTGGATCATGAGTCTGACGTCTACCCGGATCGACACAATTACATAATAGCTCAGCCGGAATTATCGTTCATAGTCTAAGTAGGATCAAATCTGCGGGGAATACTTGTGCCCGATATTCATAATAAGCTTACTGCCCCCTGAGACCGTACGTTAGACAAGCCTGTGGTACACAGGGCGGGAGGCGGTCCTCTGGCCTCCAGCACTTAGTCTATTACCAGATGATTCACGAATTAACATGCCTGTTCCGCCCACCGGACATGCAGCCGATCCCGTCAACGTGAATTTGCGCTGACTTCCGGATCTTCAGCCCCACAATCGTCACATGTACTAGGTGCGATCGACCGAATCGATTGCATCAAGATATATAATTTTCTCGTGTATTGAGTTATGCGGCTGGTAATTTACGTCTACCATCAAGCACAGCGCCGCGCACTTCCCATATCCTTGTTTGGTAGAACATTCCTCATTGATCTCGACAGTTAACTAGCAAAGGACTAGTAGGTGATTTGGGCTCGGGACGCGCCAGGCTTCTACGACACCGTAGGATGCTCGAGGCCCCATAGGCTGAACCCCCCGGGCCGTTATACTAATAGGAACCGCGACTGATGTTTAGGGAGCCGTCTCTGAATAGCGAGCGGGACCCTCCAATTATT 14 | CTAGACATACACGCATACTTTCTGCCCCCTCATGAGGTATTTCAAGCACGCGTTTCGTGTGACCGGGCCTGAGTGGTCGATCAAACTGGTTGCCCTAGAGATCGGCTACATAAAATCTGCGGGTAGGTCGGCGGGTTTCTGTGTGTCACGCACAACCTCAGAAATAAAAGCTAGGGTATAGAAACTGCAAATACAAAAGGACGACAAGGGCGTGAGCAAACGTACATTCTAGCCTCAGCAGGGCTCGTGGATGAGCTGCCCTATGTATGTATTTGGGTACTACAGTCCCGCGTATTGAAGAATATACCATCTTCTTGTGAAGCTTATTTTCTCTTCCCTTATCAAGAACGAGCGCTTGCATCCAATATCCTTGCAATATCCACCAATTGCTACCTAAGAGGTCGGATGTGAGTGCGTGCGGCTTCTCCAGGACTGGGTAAAACTAACTACATACACCGCATACAGTAGTTCCCCGCGACAACGCGCTCCTGCGTCAATATGGGGTGTTTACGTACAACTGAGCCGTAGAGCACTTTCGCAGTAGGGATACCCACGCATGTACCCCGCTCTCGAGCGGCTGGGAACAACTCGAGATATTAGTATCAGACGCCGAGGTTGATAGATATATATGCCTCACTGGCAGGTCTGGGTGGGTTTGGGTGTAGATCCCTCTCTTTGGCACGTCTGGATTCCAAGGGGATATTATTACTGTGTCACACGTATAATACTGTCTCCCCGCGGGGACTCATAATATCTGGAGTAGACTGCACCTGCATGATATACTACAGTCAAAACCGCTACGGAATGTTCCATGCGGTGCCCACAGGTCATACCACTACGCTGCGTGTTAATGTGTCGCCTCATAACTTATGCAATTGTCTTATGACACCTGGGTTACAAAGCGTTTGTGGTGGCCAACGTTTGACAGAGCTTACGAGCTTAGTTAGGCCTCGCACCTAGTCATTGTGCGATAGGGACCCTCCAAGATTCATCGCCTCAT 15 | GCCTAATTTCCTTGGTGCTCGTCTCCGGATTTTTCAGATAATTCCGTTTTGATCGAGGATCCCAAGGCCACGTAGCGCGGAGGTTCCCCTTTAGTCCCGCGGGGTAAATTATGTTCTTCGAAACTAAGTCTAATTGTCACCGGATTTTGATCTCCGGACTAGTCTACACCGCTGCATCGTGGATTTGCTTATTTGTCGTTCCCTTCGAATCCGTTATCGTTATCTGTCTCAGTGGGATTCCGAGGTCCTTGTTCCTTCTGCACAGGATATTCGTTGAAACACAGCATTCACGAGAGTACTCGGTTCACACGCATTGGCTAGCCCGCCGGGGGATGCGGTTACTACCTGGCGGCTATCGCTAATTCCATGCCCCATCCGTCCCTGAAAGCCTCGTGAGGTTTGACGGGTAAGAGTTATTAGCTAGAAGCCTTTTCCCTACGATGAGCGCGCTCCCGTGCCAGCTCTATGGACATGAAGGCTACGATCTTAGGTCAGCGGAAAACCACCCGGTGACCTCGTTCGATTCCATGACCTAGCTAACCGCGCTTGAGTTTAGAACATAGACTACCTCAATTGCTTCTGCGTCCGAATTGCTCCACCGCCCTTGAAAAGCTCTACGCCTTCGGTAAGTCCGCACATGTCAGCCAGTCAAAAACGTCGTTGTAATCAAACGGTAACCCGTATGACTACATTCCATAAGTAAAGCTGCGCTGGCAATTATAGATAATAGACCGGTGCATTACATTGGCTTGTATGTGTGTGTCGTATCTTGTCGATTCGTCTGAAAGGCAGCTAATCAGCTCCCCACAAAGATATCCCGATATGGGGCCAAGGATAGGCCGGCGTGGAGACACGAACCGTGAGCTAAGAGCAAACTCCGTATATTAGATCGTGACGAGTCGGATACGAGTTGATGAGCGGTGTTCTCGCACACCACTTGAGGGTGATTGCTCACACAGGTCCCCAGTAGAATGGGAGACGTCTTTACCTCCCTCGTTCA 16 | AGTATGCTGCTAGTTGGATTCAAAGCTGATTCAACGAACTACCAAGTCACATGCGCATGTGGAGACTAACGTTCGACCCATGTTCTTTGCGCTAAGCAGGTATTTGACACCCACAACTCGCTTATGGCTAAAAACCAACAGTAGTGGATTTTACTGGAGATATTAGACATTACATGTTTGGCGTCTGCCTAATGAGGGGCGCTAGGATCGTTGCAAAACTGGTGCTGACTTGGACGCAATTACAATAGAGGAGTAGTTGATTCCCGAATGTATATGAATGCCAATGCGGTGCCAACTAGAGTGACTGCGTACCCCAAAGGTCCGATAGCTTATACTCGGCTCATAGCAACATATAGTAAGCACTATCTGTGCGAACATAATAAGCCGGTCTCCTAGTGGTCAGCTAGAGCCACTTCTTAGACCTGCCTGTCCAAAATATACACATGGTTCTCGTTTAATTACATTCCTATTACCGCGCAATCACTCTTGAAGAAGCTACGAGCCATCACTGGAGGTGTCATGTCGTATGGCAAATTCAAGTTGAGAGACCGTATATACTCACCCCTCATTTGAGTAAGTTATACAGGGCGCTCATGGGGTAAAACGCCGGGCTTGTAGGTGGTCGTAATTTGCCCGACGATCAATAGTCTTTGGGAGCATCTAGTTGATGAGAGGGTCTGCCCGTTCTTGTGTCGGGGCGCAGCGCTACAGGTATCGATCGGTGAAAATGCTAGACCAATCTTCCTAGTGTAAGCTTCAGTCGTCCACGCATGTAATGGCAATGCGGCGGCTAACTCATGATTCCAATACGGTTACGCCTGTCGACCCACCTCATTACTCGCCAATAAAGAATTGGAATATATGTCAAGTTCTCTTACTGGGGGAAATTCTGGTACAACCAACTCAGGTACGCCAGTACACTCACTCCTGCAGACTCCCCGATGTCCGACAGAGCACTCGCTGAGTAGGGTCTGTGGCACGAGTCTTATGTAACCTATCT 17 | GATATTCGTTGAAACACAGCATTCACGAGAGTACTCGGTTCACACGCATTGGCTAGCCCGCCGGGGGATGCGGTTACTACCTGGCGGCTATCGCTAATTCCATGCCCCATCCGTCCCTGAAAGCCTCGTGAGGTTTGACGGGTAAGAGTTATTAGCTAGAAGCCTTTTCCCTACGATGAGCGCGCTCCCGTGCCAGCTCTATGGACATGAAGGCTACGATCTTAGGTCAGCGGAAAACCACCCGGTGACCTCGTTCGATTCCATGACCTAGCTAACCGCGCTTGAGTTTAGAACATAGACTACCTCAATTGCTTCTGCGTCCGAATTGCTCCACCGCCCTTGAAAAGCTCTACGCCTTCGGTAAGTCCGCACATGTCAGCCAGTCAAAAACGTCGTTGTAATCAAACGGTAACCCGTATGACTACATTCCATAAGTAAAGCTGCGCTGGCAATTATAGATAATAGACCGGTGCATTACATTGGCTTGTATGTGTGTGTCGTATCTTGTCGATTCGTCTGAAAGGCAGCTAATCAGCTCCCCACAAAGATATCCCGATATGGGGCCAAGGATAGGCCGGCGTGGAGACACGAACCGTGAGCTAAGAGCAAACTCCGTATATTAGATCGTGACGAGTCGGATACGAGTTGATGAGCGGTGTTCTCGCACACCACTTGAGGGTGATTGCTCACACAGGTCCCCAGTAGAATGGGAGACGTCTTTACCTCCCTCGTTCACTTCGGGGCAGACACCTTATAAGCCCACGGAGCTTTAGGTTCCGCCGCTTTAATTTTCTCTCACATGGGAAGCTCCTCGCCCGACAATAGGTCTATCTTCTTTCAGCGCTTGCCCATCCTACGCAGCTATTAGACTATCGTGTCGTAAAAGTGTAAATGGGACCGTCATGACCGTCAGTGATGAACCAGTCACACCGCAGAGGAAATCGAGTTAACGGGGGGATAGAACAAAGAAGATGTCTTATTTCCTCGGAGAATCTGCAAC 18 | TAATTTTCTCTCACATGGGAAGCTCCTCGCCCGACAATAGGTCTATCTTCTTTCAGCGCTTGCCCATCCTACGCAGCTATTAGACTATCGTGTCGTAAAAGTGTAAATGGGACCGTCATGACCGTCAGTGATGAACCAGTCACACCGCAGAGGAAATCGAGTTAACGGGGGGATAGAACAAAGAAGATGTCTTATTTCCTCGGAGAATCTGCAACGCAACTATTGACGCGGCCGAGGATGCGGTTAAATTCATCATTTTGATAACCGAGAAAGCCGATCATTATACTTGTGCCGACCGGATATATTAGATGTGGCGCTCGTCACGTTGCCCAGCCACAGCATCACGTACTGATGACAATTCAATGTCGTGTAAACGGTCCAGCACTCCCCTTCTCTGGAATGCTGGGATAGTCCACGACCCATAGAAAAAACCGGGTGACCCCACGCGGGACAGACACATCCCAATAACACTCACTGCTCATGCTTATCAGTCCCTATTTAAATAAAATGGGCCTGTGTGGCACGACACTCCTATTCGAGGTACGACACGGGTCACGGTAATTCATGTACAATTAGAGGGTGTTAGACTCGCATGTATGACCGGAGAGCTAGGGCGCGTGTGCCAACTAACCACTAACCTCTGGGTAACTATCCAGAACCCCTCCAATCGGACGTCGAAGTTGCGCTGACGTCGGTTATCGGTCACACCGATCAGACGATATTCTACATAGTGCGGTTTATAGTAATTCCCTAGACATACACGCATACTTTCTGCCCCCTCATGAGGTATTTCAAGCACGCGTTTCGTGTGACCGGGCCTGAGTGGTCGATCAAACTGGTTGCCCTAGAGATCGGCTACATAAAATCTGCGGGTAGGTCGGCGGGTTTCTGTGTGTCACGCACAACCTCAGAAATAAAAGCTAGGGTATAGAAACTGCAAATACAAAAGGACGACAAGGGCGTGAGCAAACGTACATTCTAGCCTCAGCAGGGCTCGTGG 19 | GCAGTACAGAATTAGTAGTCGTGGGCTGCAGTGCAGAACCCCAAAATAGATCAGTACGGCCCAGGGTAAATAGGAGGTAATTTGGTTCTTGTACGGAATCCTGATTTAACGCTCGTTCTTGCGAAGCTCGATTTACGAAGCCGTGTTACAAAGACTCTTGGGGAAATAGTGAGATCAATATCATACACTGCATTTATTAGTCGCTGTGTGAAGTTATGCCTATGGGTCCCTCAAGTGAGCAAGTACAAGTAAAGTGCTCCCTCGTGTGTTCGTAGATACTTCTTCTGTGCAATACGCACTGTAAAGCGAGCGTCATTCTTACATGAAGGTAGAAGGCATGAGAGTGGTCTTTCAATATTTATACATGGGTACGGAGGCATCGAACGCGTCCCTCAGCGGAGTTGTAGATGAAGTTAGGTTAAACCGGATAAGTTAGTACTTGCTAGTTATGTTTCTTATCCTTTGTCAGAACCCGCTGATTCCCTCACTTGCAAGCGTCGATCTGTGTTGAAAGAGTATACCGGTGTGTAAGATAAACCAGCCCACTCAGATCCCGCGACGATGACTATATTGTGCCAGGACGGCCTTACGTTCCTTTAAATGCGTAGATATTCATTACATCGGTGAATTACTAAAGCGGGTGAGCCTACATGACATCCCAAGACTCTTTTGTGGGGTACGCAAAAAGGGAGGTTTACATCCTCATAGTGAGCATCAGGTGAGGTCTAATATGTTGCTGGAGGCGCTATTCGTGACGGAGTAATTCAATGCGCATTCTAGCGTCGTCGGACGTACTAATGACCCCCGGTTTGCACGCGCGTGGTTCTCACTAACGGTCATCCTATAAGAACAGCAAGTAACGAGCCCCGATGCATTCGTCCAACGCACACTAAATACTCAGGGATCAGGAGACATGAAGTCCAAGGGCGCTGTTTATAACAGAAAGACCGTGCGTATAAACTGCGTTAAAACGGGTAGAAGACACCCATTGTGCATCTTC 20 | AGAGGGGTGTCCGCCTGGGCCTCTTGAATTATAAGCTCTCCATCTCACTTATATAACTCGGCAGATCGGCTTGCGGAGTGCGGGTGTCCCTGATATAGCTTAAGGAGGGGGAGCGCTTCACTACGTTCATCCCTTACCTCCTAAAGATAGGGATCTTCTTCGTTATCTCTCACGTGCGCCATGACGCTTGCACTGATCCCCAACAAGGTCGTACCCAGCAAGTGGGCTGGCGGGCACAGAGACAGGAAATAGCTACACTCTCGGCACGGACGACGTCAGTCTCTTCCAACAAGAGTTCCACTAAATAAAGCTACAGAGGCACAAATCCATCACTTGAGTTATTAGGTCGCAACGGAACTGTTCGCCCAAAGACCGAATAAGCTGACCCCACTTGACCCTATGTGCATACGTCTGTATTATATACTGGGGGAGAGTCACGGGAGGTCACCATATGGAGTTACATTCAAAAGGGCGTGTATCATATGCTATCCAGCTGGTAGACGGAACGATCGCAGGCTCTAAAGTCTCATTCTTTAGATTTTCAGCGGCGGAGCAAGCCTAGCTTCAAAATCGTCATCAATGGAGACACCTGAGGGTGGAGGGTCTCGCCCCACGAGCTTCTGCTGGATTACTCCGTGTAATATAAAATCCTTGCACCACTTTAGATAAAGTTGGATTGTGTAAAGACATCTCGATTTGTTTTTCGGTATACATGTAGCGGGTACTAGAAGCACTAGTTCCCGTTGATACTCCAGTGCCGACCGCGTCGGTCCATTAATTACAATTGGCCTCGCGGCTCGCCATTAGTCATGAGGTGAGAACCCACAACTAGGCCGATATTCCCTTCTTTTGCGCGAGGCTCACGACCAATGGTGGCTTATCAGACATTTCCAGTCTCGATAGAAGCAGTCCGACCCAGGACTATCATTACCCCTCAGTGGGGTAGTAGAGTGCGCAATTTTGGTCAGTATCTCTAATTGATTTTTGGTCAGCTTGTGCA 21 | GTTGGTTCTTACAGGAGTACGCTTTTACGCAGATATCCCGCGAAAGACATGCACGGGTTCGCATCAAGATATACACAGTATTGGCTCGTGATAACCCGTGTTCGGTTGAAACACTTACCTGAAGAACGGACCCAACTTACGCATCAAACAAGTGAGCGTGCTCTTACACATCTTCTGGCAACCATTACACAGTGGGCGATTATCCAGAACATTAAAGACTCCTTGCAGTCTAGGTAACAGGTACGATTGACATGACTGAGCTTCTACCGTAGCAGCTCTGGGCGTAGTCGCATTTCCCGCGTAGTGGAGTTCACTATGCACGTCTCCCATCATATTCAGCTCAAAACAAAACCGAGCGCCCGAGGCATGCTATTCAACGAGATTTTTTTAATCCTGTTTCTGATCCACTCATAGACATTGGCACAGAGTAGTCGATATGCTGAGTTAATTAGGCTCGCCCCTATTTACAGTATAATCCGAGGCCTGATGGAACAAGAGGGATGGGGCCAATCGGACTGGAGGCCGCTCGCTCTCGGCCTCATGTTGGAGGCTCACTGTGAGGTTTAAATGACTACCACGGTGTGTAGACTGGATCCGGACCCCCCGTGCTGGAACACTCCCTGGAAATACGATGTTACGAACCCACAACCTATTGTCACAGGCACCCGCAAGCGTAGCCCTAATTTTATGAACGCGATCACGAGGGTCTTGATTGAACCGGAAACGAGAAAGACTGCCTCAATCGAGTACGTCCGTTATTTTCTTCCCCGGGTCGGACCCACTAGACGATAATGCGAGCCGGGTACCTGGAACGCAGGGCCTTTGCCACCTGTTGGAGCTTACAGGAAGTTACTAGGAGTCTATCAATCCTGCGTCCCCCGAGCTTTTCATGCTGTTAGCCCTCCCACCCAGGCCTCGCCTCAGCCCCCTATAAGGGACTATAAGCACGACACCCACAGGGTTTTATTGTATTAATGCAGAATGTGGATGCGTAGCTGTG 22 | CAGGTTGGTGCTGTATTCAGGATCCGTGAGGCCGTAGCATGGTTCACAAGCCTTGTGTTAACGTGAAGTCGCACACGAGCCTGCTCGTCTTCGAGGCCCTGTGCGATTAGCGCAGCTATTCATCCATCCTGTTTACAGTACCGGTGGAGGCCGAGGACCATATTACTGGCTGGGTTTCGTATAGCCCCTTATCCGTGTGCATTATTCGGTTCTGACGGTTCTCAGATAGGCCGCGGGACTCGAGACATAATAGATTACCACGTACTTCCGGCAGCCGCAAAGTTCCCTCCAGCAGCAAGCTATTGCTCATGGCATATCGCGGCTCAGCGAGTGGGTTCTCCGACCAAACAAACTCCAGTAAATGATTCTCCACGCAAATAAACAGTATCTCTGGGATATCGGTATCTAAATTAGAGTCATTCGCATTATATCAGTCGCATCCCTCTGGGCAAGCATCTGTCCAATAGTACCGCGACCGGCAAGTCCTCAGCGTCGTGGGTCGACTCTCTGTCGAAATACCACCGGCGACACCACCTTCGACTGTGACACTGTAGAATGAGGCCGAAACACACAATCAACCTGTCGCGCGCCAGTCACGTTTTCTAACCGTGTACGACATGGGCGGGTATAGGGCAATTACAGGGCGCTGGTCATGTGTCCTCGTGATTTCGCTTACAAAAAGTTGTCGGTCATCCGCCTCTTACAAGAGGCCTATAACCGACTATCCAATATGGCGGCCTTTACTGTGCTCTGTCATCCCACCTCCCCGAGATCCGACTGTCCTAGTGAGCCCCGTCGGGCGTGGAAATTATCGCGCCTTCTAATCTAAGACTTGGCGCCGTCAGAGAGGACCTGATCAGGTGACCTTTTCCGCAAAAAATGTCTTCATGAGCTTTAGAAACCTCACTGATCTGCGCGCACGAAAAAGAGGACGTTGCGGGACTAGAAAAAATAAGAGTTCACAGGGCAGTGGCATGTTCCCGAGAAAATTGCCGGTTTA 23 | AACAAGAGTTCCACTAAATAAAGCTACAGAGGCACAAATCCATCACTTGAGTTATTAGGTCGCAACGGAACTGTTCGCCCAAAGACCGAATAAGCTGACCCCACTTGACCCTATGTGCATACGTCTGTATTATATACTGGGGGAGAGTCACGGGAGGTCACCATATGGAGTTACATTCAAAAGGGCGTGTATCATATGCTATCCAGCTGGTAGACGGAACGATCGCAGGCTCTAAAGTCTCATTCTTTAGATTTTCAGCGGCGGAGCAAGCCTAGCTTCAAAATCGTCATCAATGGAGACACCTGAGGGTGGAGGGTCTCGCCCCACGAGCTTCTGCTGGATTACTCCGTGTAATATAAAATCCTTGCACCACTTTAGATAAAGTTGGATTGTGTAAAGACATCTCGATTTGTTTTTCGGTATACATGTAGCGGGTACTAGAAGCACTAGTTCCCGTTGATACTCCAGTGCCGACCGCGTCGGTCCATTAATTACAATTGGCCTCGCGGCTCGCCATTAGTCATGAGGTGAGAACCCACAACTAGGCCGATATTCCCTTCTTTTGCGCGAGGCTCACGACCAATGGTGGCTTATCAGACATTTCCAGTCTCGATAGAAGCAGTCCGACCCAGGACTATCATTACCCCTCAGTGGGGTAGTAGAGTGCGCAATTTTGGTCAGTATCTCTAATTGATTTTTGGTCAGCTTGTGCAGATCAATACCCGGTTGCTATGCAGGCTAACATGGTGGCCCGGTAACGCCGCACCGCCTTGGCTGGGCCTTAAATTTGGGAGAGAAAAAACCTCCTGTCTTAAAGTCTCTCTACAGCTTGGACAACAACCGTTACCACGGACCCGTAAATGCTCCGTATGGAACGCACTTTTAGGACCGTTGGCGACATAGGAGCTAAATAAAATTACCTACGACGCGTACCTTCATCATTTCGGTGAAGCATGTTGATGGCCTACAAATGACGTACGTATATCTCAGTCATAAGATT 24 | GCTCCAGAGATACTCATGGTAAATTATGACGATCGACAAAGAGTGAGCTATTAAAGCGCAAGTTTTTCCGTATATGGGGAAATGAATGTTTGCACAAAAGTACGAATATGCTGCGTATAGAGCGTTATCACAGTCGCCGTTTTCAGAGGACCGCGCGGGGCGCGTTCATACTCGGGTGGGGAGGTATGCCCATTTATTGCGCTTATGTAACGAGGTGTTTAATTTTACCCTAGTAAACGTATCGGGGCGCACCCATTCACAATACCTCACGGCACTCGGCCCTAAATTTGCGTAATAGGGATTAGTCGTTCAAAAATTCGAGTAACTTATAGTTCTTAAACGGGTCCTCGATTTGGGGGGCTATGATTAGGCTTGATGGAGAAAGAGCAATAGCCGGTTTTGTCCTTAAACCGGACAGGAAGCTCTGGGCTTATCAGCAACACTGATGTATGTTAGGGACTTACGCTAAGGTCGTCGGTCGACCCAGCATACACCCATAATAACGCTAACCCGAACCCGTCCGACCCAACTAGCTGTTCTAGTCGCGCGCCGGCTCGGGGACCGGCATAATAGATTTTCGGTCTTTGGTAATTTTACCAGCTTTCGAACTAGCATCTTCCATACTGAATGAGGTAAATTGAGGTCCGGAAGGCGTGAAACCTGGACGTAGTATCGCCTGTGTAGGAATCTATTGCTCCCATCGAAGGTGAGGGGATGAGAAGGTATAAGAAGTTAATGGTCGGGCCGATAAGAAATTATCAGTTTTAGCTCACCACCTTAAAGATACTTTTCTGGTTTGTCACGTCTGCATAGCGTATGGTTACTAGCGAGGTGTTAGTCCTGCTCTTCGAATGTTAGCATGCCCTTTACCTTGGACGGCCAGCGTAGTCCGTTCTCCCACCCACTAGAGGCTGAATTTTATGTGCCTAAAAGTAATTGTTGCCGTTCAGAGGGCCGGCTTCGGACAAGACTAGCTCCATGAGATGTCTCGGACGCGCAT 25 | CTGAAATAGTATCACAGTTTCCAAACAGATCACCACGATAGTAATACCGTAGGCCAACCTCTTCCTTAACGAATGTCCGTGTAAAAATTTTTCAGAGTGCGTCTCGCGATAACCTTCGGCCCTTTGTCTATAAGTTACCCGAATGCACGTGCGAGCTCCGGAGGCCAGCACACTCTGACTGTCATTTCCGGGCCGTCACTCAACATAACGCAATTCATTATGAACTCACTAACCGCTGCAACTTCCAAAACATCGTAGTTATTTTGTTCATACGCAGCCCTCCCTATATCATTAGGTGACAAATGAGGTATTTCGTGTGATATGTCGGTGTATCGGACCACTAGCAGTAAACCGATAACTTCACAGGGAGGCAAAAACTAATCTTATAGTTGGACTTTAGGCCTTCTGCGTGCACGGAACAAACGAGAAACAAATTTATGAAGGCCCAATCTAACTCCGAAAGCGGATCAGTGGTCCGTTGTACAAGTACAATCCCGCCTGAATGCTCTGCAACTGCTTCCGGCGAGGTAGACCTCGCAGAGAAGTCTTGGCCCCATCTATGGGCAGAGTTGTCAGCTGCTATGCGCAGTGGTTAGGCTTAGATATATGCCTGGTCACCGGTGACTACCTAGTAGTAAAAAAGCTGAGCTAGAAGTGTGGGAGTATTCCGTGTGACAGGACCTTCCCCTGATTCTTGATAATGTGTCGACAACCGTCGAGGTTACTCCATTAGTTTAAGAAAAATGACCGATGATGAGAAATCAAGGCGGTCGCGACAGAGGTTATGGTACGGGATTTTGGCCCTCATCGGCGCGTATGTATCGCAACACTTGAACGATTGTGTTATAAAAGCCTGGGGCACTACCACACGGAGTTATCCTATAATATGCCCAAAGCTTGCAGACGCGGGAGCGCGTCCTTTCGGGGCACCTTGGAGACGTTCCGTGGGTTATGTACAGGGAGACCGAGTTACCACAATACGGTGGTCAGTGACACAGTC 26 | TTCAGTACTTGCGACCATTGACGATATCTCCTGATCCATGAATAACGACGCTCATCCCAGTGTGGTAACAAGAGAGTGGGCAGAGCGTTCAAACATGCTTAATAAGTGCTCAATACGAACGAAGCCCTCTGCGATGGTTACCAAGGATGGCTGATACTTCTTGCAAGAATCTCTGAACAATACCCGCAGGTCGACAAAAACGGCCTCCGAAACAGGAGCGTCGGTAGAGTTATGTTGTGGCACCTAATACTCGAGGGTATTCTTTGAAGCTAACGCTGGGTCATTAAAACATGGCTTTAAGCCGGTCGGCACTTCTTTGAGTGCGTCCAGGCACGATATTATACGTACCAATAGTAAGTTCAAACTGGTCTGGTCATTACGTTCCCCCTCGGGTGCGGGCTGAGGGGTTCCGATCTTGACTCATCGCGCTCGATCCCTGGTAGGACATACTAGGTACGTTGGGTGCGGGAATGCGTAGCAATCTTTCCTTAGGGACGACTGTAGACGTACCCTCACTCCCAACAGACATCAATAAGAATAGTGGCAACGCAGTACGCTATATGGATGCACAAATGAGAGAAGTTAAAAGTGCTTCCGCTCAGCCAGACGCGGGCGGACGCGATTCAATCTTACATCGGAACCCTAGTGACGAGTCACCCCGTTCTGGCTTTACTTAGTCTTGTGGCGGGCAAAGTGATTGTCTAACAATTAACAACCCCGCTGGCGAGGGGGATGATACCATAATTGATTAGGATGCCGCGACTATAGCTGTGAATGATGCTTAAGTGGACCCCTGAGCTTACGTGGCCCCCCCTTCGATTTGCTAGTCGTAGTTTCCTCCCAGGCGCCGCACGTAGGGCATTACACTCCATCAATTAGAGCCTCTGACTTATATGATTAATTTAAGCCGAGGGAATGCCTGTACCTTTGTCTCCCGCACATGGATGCGCTAATGGATTAGGCAGTAGCGTTAAAAAGCTGCCTGGACACCCCTCTGTTC 27 | TGGTCGGGCCGATAAGAAATTATCAGTTTTAGCTCACCACCTTAAAGATACTTTTCTGGTTTGTCACGTCTGCATAGCGTATGGTTACTAGCGAGGTGTTAGTCCTGCTCTTCGAATGTTAGCATGCCCTTTACCTTGGACGGCCAGCGTAGTCCGTTCTCCCACCCACTAGAGGCTGAATTTTATGTGCCTAAAAGTAATTGTTGCCGTTCAGAGGGCCGGCTTCGGACAAGACTAGCTCCATGAGATGTCTCGGACGCGCATGCGGGTTGACGTATAGCGATGTTCTCGCACATTATTAACCCGTGGACCAGAGATAGATACACTGTTAGAGTTACCAAATTTAATACAAGCCAGGGCCACCCACTTCCGTCGGAGGGACATGTTGTTAGTTCAATAGGAATGGGGAGAGATGCAACTTCACTAACTGGTTACGCGAGATTCCCCACTCCGGCTGGCTTCTCGAGTCCCAACCCTGCGGAAGCTCTACGCCAATATAATTCAGGCAATGCTCCCAATCCGACCCTCGCAGCGCAGGTCTTAGTAAACAACTGCTACACTTGATGTAACGTGCGCGGTGTGATATTCTAGGCAATCATAATACAAGTTACCATTGACGTTCGCGAGACCGCTACTAAGCTACCTGGGATGGGCTTCGGTAGCTCAGCCTACCCTGCACACAAAAAGTCGGTTCTCAGATTGTACCTGCTGCAATCGTCCACAGTCCTAGATATCCGCACATCCATCAGTTTCGTTCTAGGCTGAGTAGTACTGCTGATAGAATTCCCATTCGCGAGTCACAGTTGCGACTATTGGTTGGGAGTTGCCGTCCAGGAGGAACCCCGCTGAAATAGTATCACAGTTTCCAAACAGATCACCACGATAGTAATACCGTAGGCCAACCTCTTCCTTAACGAATGTCCGTGTAAAAATTTTTCAGAGTGCGTCTCGCGATAACCTTCGGCCCTTTGTCTATAAGTTACCCGAATGCACGTGCGAG 28 | AGGTATCGATCGGTGAAAATGCTAGACCAATCTTCCTAGTGTAAGCTTCAGTCGTCCACGCATGTAATGGCAATGCGGCGGCTAACTCATGATTCCAATACGGTTACGCCTGTCGACCCACCTCATTACTCGCCAATAAAGAATTGGAATATATGTCAAGTTCTCTTACTGGGGGAAATTCTGGTACAACCAACTCAGGTACGCCAGTACACTCACTCCTGCAGACTCCCCGATGTCCGACAGAGCACTCGCTGAGTAGGGTCTGTGGCACGAGTCTTATGTAACCTATCTAGATAGAGCCCGTGGGGAACTAATGCTCCAGAGATACTCATGGTAAATTATGACGATCGACAAAGAGTGAGCTATTAAAGCGCAAGTTTTTCCGTATATGGGGAAATGAATGTTTGCACAAAAGTACGAATATGCTGCGTATAGAGCGTTATCACAGTCGCCGTTTTCAGAGGACCGCGCGGGGCGCGTTCATACTCGGGTGGGGAGGTATGCCCATTTATTGCGCTTATGTAACGAGGTGTTTAATTTTACCCTAGTAAACGTATCGGGGCGCACCCATTCACAATACCTCACGGCACTCGGCCCTAAATTTGCGTAATAGGGATTAGTCGTTCAAAAATTCGAGTAACTTATAGTTCTTAAACGGGTCCTCGATTTGGGGGGCTATGATTAGGCTTGATGGAGAAAGAGCAATAGCCGGTTTTGTCCTTAAACCGGACAGGAAGCTCTGGGCTTATCAGCAACACTGATGTATGTTAGGGACTTACGCTAAGGTCGTCGGTCGACCCAGCATACACCCATAATAACGCTAACCCGAACCCGTCCGACCCAACTAGCTGTTCTAGTCGCGCGCCGGCTCGGGGACCGGCATAATAGATTTTCGGTCTTTGGTAATTTTACCAGCTTTCGAACTAGCATCTTCCATACTGAATGAGGTAAATTGAGGTCCGGAAGGCGTGAAACCTGGACGTAGTATCGCCTGTGTAGG 29 | GCCTTGGCTGGGCCTTAAATTTGGGAGAGAAAAAACCTCCTGTCTTAAAGTCTCTCTACAGCTTGGACAACAACCGTTACCACGGACCCGTAAATGCTCCGTATGGAACGCACTTTTAGGACCGTTGGCGACATAGGAGCTAAATAAAATTACCTACGACGCGTACCTTCATCATTTCGGTGAAGCATGTTGATGGCCTACAAATGACGTACGTATATCTCAGTCATAAGATTGAGGGAAAACTTCACGAGCCTCCTCCAGCATCAACACCAAAGGGGTTTTACCGTGTAAAAATATATTTCAGATAGGGACCCCTGATGCCTCCAGACCCTGAGTACCGCAGAGACCACCTTAGGGCGTCGTCAGTGATGTCAGAGACAGGTGATCACGCAATCGTTATAATCGAAGAGCGATCTAATTTGGTTTATAATCCTTGTCCCCCCTCAACGGAATGGTCAGTTGTTGCCCGACGTTATTCAGGAGAACAGAGAGGTGGACAGATAGCTCTCTTGATGCACAGGTGGAGCTGTAGTGCGAATTCGCGCATTTAGACAGTATAAGGCTATGAAGTGACTAATCCGGCCACAAATCGTGAAGTGCTTACTCTCTAACGGTGAGGGACTAGGTAGCATACACGCAATGCGTACTTCACAGGTATCGCGTCAGAGGGTCAACTCGCCCCCTCACCTGGATCCCAGTGCACCGGCAGAGTAGGTACTGACTGCTAGAACTAGTCGCTTATATCGTCAAGTCCTCGGTTGACGGCTCAGAGGCGGCGGATACGGGTGCACGAGACCTTCCGGCTCGGATGGGATGGATCTGGTCCTGGTTTCTACTTGTCGCAGTTTTCGTCAAACATAAAGGCCAATAGGAACCCGAGATTTTTGAACTGGACGAGACATATCCAACCTGAATATACGGAACATGTTTTCCTCCGCAGAATACCCCATACATTAAGCGCGAAGTCAAGACGTAGGTTCTTCGGTTTGGCCCAATTCAG 30 | CTCACTTGCAAGCGTCGATCTGTGTTGAAAGAGTATACCGGTGTGTAAGATAAACCAGCCCACTCAGATCCCGCGACGATGACTATATTGTGCCAGGACGGCCTTACGTTCCTTTAAATGCGTAGATATTCATTACATCGGTGAATTACTAAAGCGGGTGAGCCTACATGACATCCCAAGACTCTTTTGTGGGGTACGCAAAAAGGGAGGTTTACATCCTCATAGTGAGCATCAGGTGAGGTCTAATATGTTGCTGGAGGCGCTATTCGTGACGGAGTAATTCAATGCGCATTCTAGCGTCGTCGGACGTACTAATGACCCCCGGTTTGCACGCGCGTGGTTCTCACTAACGGTCATCCTATAAGAACAGCAAGTAACGAGCCCCGATGCATTCGTCCAACGCACACTAAATACTCAGGGATCAGGAGACATGAAGTCCAAGGGCGCTGTTTATAACAGAAAGACCGTGCGTATAAACTGCGTTAAAACGGGTAGAAGACACCCATTGTGCATCTTCAAGTTTGGACAAGTTCTGCCCGCTAATTGGACACGAAAACTTGTAATTTCGTTGCAACACCTATGACCGACTGTATTGAGCGCGTTATTTACCTTGTCATGGGTCCAAATAATCAGGACAAGCATTCGTACGCGATAAATAGGCTCTATCTACCGTGCCCGCAAAGTGGCAGAGAATTGGCGATAGTAGAACGCGTTCAATGATAAGAAGTCCATAAGGGCCTAAGACAGAAGCGACGTTGTAATAGTACGAGATCGAACGAGAATGCTTCATACTGAGCGAACTGCAAAGAAGCCCCTATTCTGCCACGTTCAATACGGGTACGAAGGTACTGTACGCAACATTTCCACCCCCTTGGGAGGTCTTATATTGCTCCACCTCTCAATATGCTTGAGGGGCCTCGAAAGGTGATGAGGGATGGTGAAGCGTTCGGGTTTCGATCTTAATGTCAGCGGCTGAGATCGAATACAGGCGCGACCGCAT 31 | GAGGTTTAAATGACTACCACGGTGTGTAGACTGGATCCGGACCCCCCGTGCTGGAACACTCCCTGGAAATACGATGTTACGAACCCACAACCTATTGTCACAGGCACCCGCAAGCGTAGCCCTAATTTTATGAACGCGATCACGAGGGTCTTGATTGAACCGGAAACGAGAAAGACTGCCTCAATCGAGTACGTCCGTTATTTTCTTCCCCGGGTCGGACCCACTAGACGATAATGCGAGCCGGGTACCTGGAACGCAGGGCCTTTGCCACCTGTTGGAGCTTACAGGAAGTTACTAGGAGTCTATCAATCCTGCGTCCCCCGAGCTTTTCATGCTGTTAGCCCTCCCACCCAGGCCTCGCCTCAGCCCCCTATAAGGGACTATAAGCACGACACCCACAGGGTTTTATTGTATTAATGCAGAATGTGGATGCGTAGCTGTGTGAAAGGGTGCATCTCTACGGGTAGCGACTTGACGAGGAGGAAGGTGGAACTTGATTAGCCAGCAATCTCTATTTTCGGACACCGGTCGCGTGTCACAAATAATCACTGAACTAACTTGTCCCTATGGCGGTCAAATGAGGGGATGCGCCGAAGTCGATGCTTAACGTCACAGAAGGTAGTGGCTAAGCCCTCGATACGACCGTTGTTACGAGTAGAACATTGTTGAAGCAACTGACCGTACTCTGCATGAGCGGTTGCTGCGACACCCACTTACACCTCACGCCCACACGGCTAGCAGTGGCTTCTAAGAGACCCAGTGTGCAGCTGGAAAGGTTTTTTTTTCGGAAATACGGTTATCGGACGCACTGGTGGCTTAAGCTCATACTGATGATGCGACCGAGCTTCGCTCCTAAGAAAACCAGACGATATATCGCGCCACTCACAGCAAGTGTCCGGAGCGGCCCAATACCTGGACCTTGCCCGCCCACTCGGCGAACACTAATGGACTCTTCATCTCTGCTTAACCTACTCACGTAATGCGCGCGGACAGGGAGCGG 32 | TCGAGTCCCAACCCTGCGGAAGCTCTACGCCAATATAATTCAGGCAATGCTCCCAATCCGACCCTCGCAGCGCAGGTCTTAGTAAACAACTGCTACACTTGATGTAACGTGCGCGGTGTGATATTCTAGGCAATCATAATACAAGTTACCATTGACGTTCGCGAGACCGCTACTAAGCTACCTGGGATGGGCTTCGGTAGCTCAGCCTACCCTGCACACAAAAAGTCGGTTCTCAGATTGTACCTGCTGCAATCGTCCACAGTCCTAGATATCCGCACATCCATCAGTTTCGTTCTAGGCTGAGTAGTACTGCTGATAGAATTCCCATTCGCGAGTCACAGTTGCGACTATTGGTTGGGAGTTGCCGTCCAGGAGGAACCCCGCTGAAATAGTATCACAGTTTCCAAACAGATCACCACGATAGTAATACCGTAGGCCAACCTCTTCCTTAACGAATGTCCGTGTAAAAATTTTTCAGAGTGCGTCTCGCGATAACCTTCGGCCCTTTGTCTATAAGTTACCCGAATGCACGTGCGAGCTCCGGAGGCCAGCACACTCTGACTGTCATTTCCGGGCCGTCACTCAACATAACGCAATTCATTATGAACTCACTAACCGCTGCAACTTCCAAAACATCGTAGTTATTTTGTTCATACGCAGCCCTCCCTATATCATTAGGTGACAAATGAGGTATTTCGTGTGATATGTCGGTGTATCGGACCACTAGCAGTAAACCGATAACTTCACAGGGAGGCAAAAACTAATCTTATAGTTGGACTTTAGGCCTTCTGCGTGCACGGAACAAACGAGAAACAAATTTATGAAGGCCCAATCTAACTCCGAAAGCGGATCAGTGGTCCGTTGTACAAGTACAATCCCGCCTGAATGCTCTGCAACTGCTTCCGGCGAGGTAGACCTCGCAGAGAAGTCTTGGCCCCATCTATGGGCAGAGTTGTCAGCTGCTATGCGCAGTGGTTAGGCTTAGATATATGCCTGGTCA 33 | ACACTTTGAACTATATGGCTAGTCCGCATGACCAGCTCTGACAGGGTAGCTCGTTTGTGCTGATGTGAAAGCGACTGGCAGATTTGACGCGGTCTGAAACTGGCCCATGCTCGGAAGGCAACCCTACTGTGGGGCCAGATTTTTCCAGTTTTCACTTTTGATTCATGAAGTCAGCCTCATAACTGTGTACGGGTGGGGGTGCTTTACATTTCCCGAAGTTATCGAAGCTGCAGTCTATATAAATGTTAGTTTGTCTTGAAATAACCGCCTTGCGTGTTCCGTGTCATATGTGAGGCATCCTATGACGGCCAGCACCACTGTGGATCGCGCTAGCGCCTTGAGAATCAGGCCCACGTCCCCACTGGGGAACTTTCCTCCGATAGATAATCGACACGCGGGTAGCCGGGTACTTCAGCCGATCAAGTAACCGCCTGCATCCACTAAGGAAGGCAGTGAAGCAAACTAGCAATGAAGGGGGATGCATAAGAGGTCGGTTCCGAATGTGCAGTTAAGATCAGACTACGGCCGAACCATGTTCAGCTGACCGAACATAAGCAGGGGTTAGTACCGGCGGAGATAACTTTTCAATACCCCTCGAAACCTGAAACGCCGGGCTAGCCACACCATTGGTTAGTGCTCCTATGAGGTCCCGTCACGACCGTCCTTCCTGCAGTGTATACTCTGGAGTACTATTTTATAGCAAGAGTCCAGTTACTACGTAAGCCGCAGCCGGTTAGTGAATTGTACTTGTCACATCTCGTGTCTGAATAGTCTGACTCGGTCACTGTGAAAGGCTAAGGACCGGCCAGAAGAAGAGCTCCCATCAATCCCGAATAAGGATCAAGTCACGTTAGTTCGCTATATGCCGTCAACCGGAGCTAACTATCGGCCGATTGTTCAGCGCTGGTTTGCGCTTACGTCTCCGAGCTCGGGATACGTTGGGGCCGTGTTTTGATTCACGATCGGTTGACACCGTTCCGGGACAACCTAGACTGCGTCT 34 | TTATCGGACTAGTAGCAGGGGTCTATCTCGTGCCGAAAGTTTCGAGATTCGTGTATTCCCCCGATCCTGCTGGGCTGCAAACTGTCCTTAGTCCTCGTCGCTTTACTACATCTTATTTCGCGTTATTTTTAAGGGCTAGAACGAACTTCAAGATAGTCAACGGGGGGGTTTCTTTCTCGCAGAAGAGCCCAGATGATGTACGACTACGTGTCACAAGACCAGTTTTTTAAACGCTGCGTCGAGGTACAACCAGAGCGCGTTACCACTGCTGTCCGCACCCTACTGATGTGCGCGGCCAGTTCAACGTGAGCGATAGTGCGACGGGCGCAACGAACTTTCGTTGTAACGTTTGGAGATTTTTACAATATTACCCGTTTCGTACGTCGTAGGGGTCCAGCTTTCCTGCCAATAAATTCGCATTAGGCACGGCAATAGATATGTCGTGATGATCCACACAAACAGAAAACACTTTGCTGAAAGAATTACCTAATCCTGTGCCTAGATATATGCTACCCGTGTTCATGTAAGGTGGGTTGCCTATGTTTGAGCGACTCTCGACTATCAGTACAGAAGCGTTGGTCGTCTGTCCTTGGGACTAAGTACATTGGACTCGCGTCTAACCCTGGGCGACTGCTCAAGAAAGTCGCGTCCTCACCTTCATTAGTATCAATCAGTCCACTTACAGGACACTTTTCATTATGAGATCGGATAGCGTTTATATGGCAGACTAGCAATGAATTCACCTGGGTTCCCATACGAGCTAAACCCCCTGATGTATAAGTACTCACGAGTTATTAACCACGACGGGCAAAGAGCCGGTTAATACGGGTGCCTTGTCAATCGTCTTCTGATGCTGGCACGCACTTCCGGTGGGTCCTAGGCCCGCAGTTGGTTCTTACAGGAGTACGCTTTTACGCAGATATCCCGCGAAAGACATGCACGGGTTCGCATCAAGATATACACAGTATTGGCTCGTGATAACCCGTGTTCGGTTGAAACA 35 | TTATCGTTCATAGTCTAAGTAGGATCAAATCTGCGGGGAATACTTGTGCCCGATATTCATAATAAGCTTACTGCCCCCTGAGACCGTACGTTAGACAAGCCTGTGGTACACAGGGCGGGAGGCGGTCCTCTGGCCTCCAGCACTTAGTCTATTACCAGATGATTCACGAATTAACATGCCTGTTCCGCCCACCGGACATGCAGCCGATCCCGTCAACGTGAATTTGCGCTGACTTCCGGATCTTCAGCCCCACAATCGTCACATGTACTAGGTGCGATCGACCGAATCGATTGCATCAAGATATATAATTTTCTCGTGTATTGAGTTATGCGGCTGGTAATTTACGTCTACCATCAAGCACAGCGCCGCGCACTTCCCATATCCTTGTTTGGTAGAACATTCCTCATTGATCTCGACAGTTAACTAGCAAAGGACTAGTAGGTGATTTGGGCTCGGGACGCGCCAGGCTTCTACGACACCGTAGGATGCTCGAGGCCCCATAGGCTGAACCCCCCGGGCCGTTATACTAATAGGAACCGCGACTGATGTTTAGGGAGCCGTCTCTGAATAGCGAGCGGGACCCTCCAATTATTTTCGTGAAAGACTATCCGAAATACGACGGGTTACTTATATGGTCTATTGAGACCTCGGAATTTTTTACAGTTTCTCATGAGTATTGCCTTCTACGCTAGAACAGTAATGACACTAGTATACCTGTCACCAGAGCGCTAAAACCGGTGAACCACTACACTTTGAACTATATGGCTAGTCCGCATGACCAGCTCTGACAGGGTAGCTCGTTTGTGCTGATGTGAAAGCGACTGGCAGATTTGACGCGGTCTGAAACTGGCCCATGCTCGGAAGGCAACCCTACTGTGGGGCCAGATTTTTCCAGTTTTCACTTTTGATTCATGAAGTCAGCCTCATAACTGTGTACGGGTGGGGGTGCTTTACATTTCCCGAAGTTATCGAAGCTGCAGTCTATATAAATGTTAGTTTG 36 | GCGAACTGCAAAGAAGCCCCTATTCTGCCACGTTCAATACGGGTACGAAGGTACTGTACGCAACATTTCCACCCCCTTGGGAGGTCTTATATTGCTCCACCTCTCAATATGCTTGAGGGGCCTCGAAAGGTGATGAGGGATGGTGAAGCGTTCGGGTTTCGATCTTAATGTCAGCGGCTGAGATCGAATACAGGCGCGACCGCATGGTTCAGTGTCAAGTTGCGAGATTAGTGACTCCCGTTTCGGTGCCAGTCTTAGTCCCGTGAAGAACAGCGCCACGATTTCCCAGTCAGACTTCTTGAACCAGAGTTATAACCAAGAAGCATCCTTTAGGGTGTATACCCTCTACCGGTCTTGTTTACTCACTTTTCTGCTATTAGTGATACATTAGGATGCTGCCGACTTAAGCTTCATCTATTTCAGGCGTGCAGACTTCTCAATCTCCACCGGTGAGGCTCAACAGAGTTTTAGACTACTTGAGGGTTGTAGACGTGGATTCCCCTGGACGTGCTTGGGAACGACAGTGGATTAAGAGAGTTATCAATAGTAAGTTGTTATATGCGTCCCGAGACGAGATTTCGAATGGGTCTGCATGTAATTGGTGCCCCAGCCTTAAAAGACGCCAGCGCATGGTCTACGTCAGAAAGTGAGCCGCCCGCTGTACATCGGATACTACTGAGTTGGGACTTGCTGTTTTCAAAGAAATACGTTCAAAATTAGATGATGTGCCGATGGCACGCAGTATGCTGCTAGTTGGATTCAAAGCTGATTCAACGAACTACCAAGTCACATGCGCATGTGGAGACTAACGTTCGACCCATGTTCTTTGCGCTAAGCAGGTATTTGACACCCACAACTCGCTTATGGCTAAAAACCAACAGTAGTGGATTTTACTGGAGATATTAGACATTACATGTTTGGCGTCTGCCTAATGAGGGGCGCTAGGATCGTTGCAAAACTGGTGCTGACTTGGACGCAATTACAATAGAGGAGTAGTTGA 37 | TATTGCTCATGGCATATCGCGGCTCAGCGAGTGGGTTCTCCGACCAAACAAACTCCAGTAAATGATTCTCCACGCAAATAAACAGTATCTCTGGGATATCGGTATCTAAATTAGAGTCATTCGCATTATATCAGTCGCATCCCTCTGGGCAAGCATCTGTCCAATAGTACCGCGACCGGCAAGTCCTCAGCGTCGTGGGTCGACTCTCTGTCGAAATACCACCGGCGACACCACCTTCGACTGTGACACTGTAGAATGAGGCCGAAACACACAATCAACCTGTCGCGCGCCAGTCACGTTTTCTAACCGTGTACGACATGGGCGGGTATAGGGCAATTACAGGGCGCTGGTCATGTGTCCTCGTGATTTCGCTTACAAAAAGTTGTCGGTCATCCGCCTCTTACAAGAGGCCTATAACCGACTATCCAATATGGCGGCCTTTACTGTGCTCTGTCATCCCACCTCCCCGAGATCCGACTGTCCTAGTGAGCCCCGTCGGGCGTGGAAATTATCGCGCCTTCTAATCTAAGACTTGGCGCCGTCAGAGAGGACCTGATCAGGTGACCTTTTCCGCAAAAAATGTCTTCATGAGCTTTAGAAACCTCACTGATCTGCGCGCACGAAAAAGAGGACGTTGCGGGACTAGAAAAAATAAGAGTTCACAGGGCAGTGGCATGTTCCCGAGAAAATTGCCGGTTTATGCTTTGGCAGGTTGATGTTTTTTACTTGTGAGTCTCCCAGGTGATTGCACCGCTTGCAAACAGTATTTGTAAATCCAGCACAATACAGTGATAACCGAATAGTAGATGTTCCAATCAGAAGCGGCAGGTTGGGCCTGCCCCCCATCTAATCAGCCTTCCTTGGTACGGTGAGGCAACAAGGGTCATTAGGTCAGTACAATGCCACATCTCAGCCGAGCGGACAGCTCAAGCACACGGAGGCACCTTGACTGTCTCGGAGCATGCTCTAAAGTATCTTCGAAGACGCTCGGCGACTAGCG 38 | GGCGGATACGGGTGCACGAGACCTTCCGGCTCGGATGGGATGGATCTGGTCCTGGTTTCTACTTGTCGCAGTTTTCGTCAAACATAAAGGCCAATAGGAACCCGAGATTTTTGAACTGGACGAGACATATCCAACCTGAATATACGGAACATGTTTTCCTCCGCAGAATACCCCATACATTAAGCGCGAAGTCAAGACGTAGGTTCTTCGGTTTGGCCCAATTCAGGCTACCCAGGACGTGCCTGGCGATTGGATAAACCCGATTTCGTGCTAGCCGGCGGTAAATGACACCTCCCATGAATACTACAAACCATGGGCTTCAGAGCAAACCACATTACGTTGGTTTGAGTAGCTTTCAAAGGCAGCGCTGGCAGTACAGAATTAGTAGTCGTGGGCTGCAGTGCAGAACCCCAAAATAGATCAGTACGGCCCAGGGTAAATAGGAGGTAATTTGGTTCTTGTACGGAATCCTGATTTAACGCTCGTTCTTGCGAAGCTCGATTTACGAAGCCGTGTTACAAAGACTCTTGGGGAAATAGTGAGATCAATATCATACACTGCATTTATTAGTCGCTGTGTGAAGTTATGCCTATGGGTCCCTCAAGTGAGCAAGTACAAGTAAAGTGCTCCCTCGTGTGTTCGTAGATACTTCTTCTGTGCAATACGCACTGTAAAGCGAGCGTCATTCTTACATGAAGGTAGAAGGCATGAGAGTGGTCTTTCAATATTTATACATGGGTACGGAGGCATCGAACGCGTCCCTCAGCGGAGTTGTAGATGAAGTTAGGTTAAACCGGATAAGTTAGTACTTGCTAGTTATGTTTCTTATCCTTTGTCAGAACCCGCTGATTCCCTCACTTGCAAGCGTCGATCTGTGTTGAAAGAGTATACCGGTGTGTAAGATAAACCAGCCCACTCAGATCCCGCGACGATGACTATATTGTGCCAGGACGGCCTTACGTTCCTTTAAATGCGTAGATATTCATTACATCGGTGAATT 39 | TTATGGTACGGGATTTTGGCCCTCATCGGCGCGTATGTATCGCAACACTTGAACGATTGTGTTATAAAAGCCTGGGGCACTACCACACGGAGTTATCCTATAATATGCCCAAAGCTTGCAGACGCGGGAGCGCGTCCTTTCGGGGCACCTTGGAGACGTTCCGTGGGTTATGTACAGGGAGACCGAGTTACCACAATACGGTGGTCAGTGACACAGTCGGGACTTCACCCGGCTGTATAAAGGTTAGAGTAAGCGAATGCACTAGTACGGGTGAGGCTCGCGCATTTCAATTATCATGAAGAACCTAGGGTCCTCTAGATATCGCAGATCATCCTTGTGTGATCGCCGGTTTCCGCAACTACCGCAACTTGGTCCTGACCCTCTTGGCGAACTTCAATACTGACCCGGCCGCAACGCTTACTCAAGGCTTCAAACACTTCAGATCTCCGATTTAGTGTTAATAGTGTAGGGAAGACTTTTTGAATCGGATTGCGGTCGGAATCATCCGTCAGGATACTCCGCGGGTACGGTCATCGGTTCTGCAAGGGAGTACGAAATGCTACTTTCGGCAAGGTCCGCGAACCGTTGTCTTGGGTCTCCGCCTTCGTTGCGGGATGGGTCCACTGATCAGTAAGTGGTTCCAGCCCATGTAACCCTGGGACCTAAGTTATTGCACAGCTGTCACCTGATCGGGGAGAAGGGTGGGGATCGATCCAACGGGAGTTGGCATTACACGCCTAATTTCCTTGGTGCTCGTCTCCGGATTTTTCAGATAATTCCGTTTTGATCGAGGATCCCAAGGCCACGTAGCGCGGAGGTTCCCCTTTAGTCCCGCGGGGTAAATTATGTTCTTCGAAACTAAGTCTAATTGTCACCGGATTTTGATCTCCGGACTAGTCTACACCGCTGCATCGTGGATTTGCTTATTTGTCGTTCCCTTCGAATCCGTTATCGTTATCTGTCTCAGTGGGATTCCGAGGTCCTTGTTCCTTCTGCACAG 40 | CAGCGCTGGTTTGCGCTTACGTCTCCGAGCTCGGGATACGTTGGGGCCGTGTTTTGATTCACGATCGGTTGACACCGTTCCGGGACAACCTAGACTGCGTCTAGTTTGTGTAAAGTCGCATACTTGACTTGTAGAGAGGGGGCCGGTTATCCGACTTTGAAGCCTGAAGAGAGGACTTACATCGGGAGGGGGCGTGAACCATCTTTCGATATGTCCTTTTATAGGTGTGTTCTCGCGCTGGCCAGACATTCCTTGCTCGTCCTACTTGGCGGGCTGGGTTAGATGTGGGGTGGGCCGAATTTCGCCCTCACTCGTTTTAAATGGATACTTTGACACCTAGGTGCGTGGACTGAGCTCTCGAAGGCTTGATAGGGTTACGCGGCCTTGTCCCGCTGTTGCTTTTAGGCTTTACGCAAAATACAAGGTCATGAAATAGATGATCTGATGTCCCTAAGCTTAACTACGCAACGCAAAAAATTCGAAGCAACAACTCTTTCGAAGATCAGTCTGAATGTAGCTGATGATGAGAGAGCTGGCGGAGTACCGGCTAGAGAGCGTGCACTGTACCGCGCGCGAGACCATTTAACTCAGACCTAATTGGGCCATATGGAATTGGGCTATGACCGAGATCCCGCGGCGGGCGCTGTCCTTTCTCCTGTCGACTCCGCGGAAAATGAGGCCAGAGTAACAGGAACTTCTATGAGTGACTACTAAAGTGTTTATAATGCTAAGGGCATTTGTTTGCGTCGGGCGGAGCTGGCCGAGAGATGAGCACCATATCGGTAATGTCCGACACCTTCCGCACGACGAATGGTACAAGAGGGGGACTGCTCATTACTTCACATGTTAAGAAAAGGACAGTTGGCCCGGACATAAATTTGGAGAACATACGACCTTAGGTTATCGACAACGTCCTGAAGTAGATACATGGCCTGTGGCTCGCTATTAGTAGTCTTTGGATGTTCTTTTCACAAAGACGTTATGGCTAATGTCCGATTCC 41 | TGATGATGCGACCGAGCTTCGCTCCTAAGAAAACCAGACGATATATCGCGCCACTCACAGCAAGTGTCCGGAGCGGCCCAATACCTGGACCTTGCCCGCCCACTCGGCGAACACTAATGGACTCTTCATCTCTGCTTAACCTACTCACGTAATGCGCGCGGACAGGGAGCGGATACGGACCCCAGTACCCCTAATCGGGAGCCTGACGTAAGCTATACTGTTGACACAGCGGCAGAGCAATTTGTGATCCGCCAGCAGGACTGGGGCGCGCCGTATTAAAAGCTAATCGTCGATAAAAGGGATCAAGGAGAAACCGTTTTGAAAGAGGCCTGTGACATGTGCGCTCGAGCTAGTTCTTACCCCTACCGCAAAGACACTTATAGGTCCCCTGCCCAGGAGGGCATGAATCCCTAGGTATAGTCTTTCCATTCTTCTGTTTACGATGAATCATAGTTATGACAATTGTCCGTATTCAGCTGGATATCCTTAACCGGGGGAAAGTTTATCTCGTATTACGTTGACGTCTGTTATGTAGAAATCCGAGTCTTAAATGTCGCATACACTCGCGTGCTAAGGGACGACAGCGTGTCGGCCGTGCTAATACCCAACAGGCGTTACCCCTCACAAAGCGTACAGGTTGGTGCTGTATTCAGGATCCGTGAGGCCGTAGCATGGTTCACAAGCCTTGTGTTAACGTGAAGTCGCACACGAGCCTGCTCGTCTTCGAGGCCCTGTGCGATTAGCGCAGCTATTCATCCATCCTGTTTACAGTACCGGTGGAGGCCGAGGACCATATTACTGGCTGGGTTTCGTATAGCCCCTTATCCGTGTGCATTATTCGGTTCTGACGGTTCTCAGATAGGCCGCGGGACTCGAGACATAATAGATTACCACGTACTTCCGGCAGCCGCAAAGTTCCCTCCAGCAGCAAGCTATTGCTCATGGCATATCGCGGCTCAGCGAGTGGGTTCTCCGACCAAACAAACTCCAGTAAATGATT 42 | GAGTTCACTATGCACGTCTCCCATCATATTCAGCTCAAAACAAAACCGAGCGCCCGAGGCATGCTATTCAACGAGATTTTTTTAATCCTGTTTCTGATCCACTCATAGACATTGGCACAGAGTAGTCGATATGCTGAGTTAATTAGGCTCGCCCCTATTTACAGTATAATCCGAGGCCTGATGGAACAAGAGGGATGGGGCCAATCGGACTGGAGGCCGCTCGCTCTCGGCCTCATGTTGGAGGCTCACTGTGAGGTTTAAATGACTACCACGGTGTGTAGACTGGATCCGGACCCCCCGTGCTGGAACACTCCCTGGAAATACGATGTTACGAACCCACAACCTATTGTCACAGGCACCCGCAAGCGTAGCCCTAATTTTATGAACGCGATCACGAGGGTCTTGATTGAACCGGAAACGAGAAAGACTGCCTCAATCGAGTACGTCCGTTATTTTCTTCCCCGGGTCGGACCCACTAGACGATAATGCGAGCCGGGTACCTGGAACGCAGGGCCTTTGCCACCTGTTGGAGCTTACAGGAAGTTACTAGGAGTCTATCAATCCTGCGTCCCCCGAGCTTTTCATGCTGTTAGCCCTCCCACCCAGGCCTCGCCTCAGCCCCCTATAAGGGACTATAAGCACGACACCCACAGGGTTTTATTGTATTAATGCAGAATGTGGATGCGTAGCTGTGTGAAAGGGTGCATCTCTACGGGTAGCGACTTGACGAGGAGGAAGGTGGAACTTGATTAGCCAGCAATCTCTATTTTCGGACACCGGTCGCGTGTCACAAATAATCACTGAACTAACTTGTCCCTATGGCGGTCAAATGAGGGGATGCGCCGAAGTCGATGCTTAACGTCACAGAAGGTAGTGGCTAAGCCCTCGATACGACCGTTGTTACGAGTAGAACATTGTTGAAGCAACTGACCGTACTCTGCATGAGCGGTTGCTGCGACACCCACTTACACCTCACGCCCACACGGCTAGCAGTGGCTTC 43 | ACCGCTACGGAATGTTCCATGCGGTGCCCACAGGTCATACCACTACGCTGCGTGTTAATGTGTCGCCTCATAACTTATGCAATTGTCTTATGACACCTGGGTTACAAAGCGTTTGTGGTGGCCAACGTTTGACAGAGCTTACGAGCTTAGTTAGGCCTCGCACCTAGTCATTGTGCGATAGGGACCCTCCAAGATTCATCGCCTCATTGGAGAGTGTCGCCATTGTTCGCCTATTACCCGAGCAAGGAACGCAAGTACCGGATCTCATCGGCTAATAAGTGTCCGGAAGTAGCTTTTAGAACGCAACGGAACGCTTGATCGCGAAATTTCAGTACTTGCGACCATTGACGATATCTCCTGATCCATGAATAACGACGCTCATCCCAGTGTGGTAACAAGAGAGTGGGCAGAGCGTTCAAACATGCTTAATAAGTGCTCAATACGAACGAAGCCCTCTGCGATGGTTACCAAGGATGGCTGATACTTCTTGCAAGAATCTCTGAACAATACCCGCAGGTCGACAAAAACGGCCTCCGAAACAGGAGCGTCGGTAGAGTTATGTTGTGGCACCTAATACTCGAGGGTATTCTTTGAAGCTAACGCTGGGTCATTAAAACATGGCTTTAAGCCGGTCGGCACTTCTTTGAGTGCGTCCAGGCACGATATTATACGTACCAATAGTAAGTTCAAACTGGTCTGGTCATTACGTTCCCCCTCGGGTGCGGGCTGAGGGGTTCCGATCTTGACTCATCGCGCTCGATCCCTGGTAGGACATACTAGGTACGTTGGGTGCGGGAATGCGTAGCAATCTTTCCTTAGGGACGACTGTAGACGTACCCTCACTCCCAACAGACATCAATAAGAATAGTGGCAACGCAGTACGCTATATGGATGCACAAATGAGAGAAGTTAAAAGTGCTTCCGCTCAGCCAGACGCGGGCGGACGCGATTCAATCTTACATCGGAACCCTAGTGACGAGTCACCCCGTTCTGGCTTTAC 44 | GCAGATCATCCTTGTGTGATCGCCGGTTTCCGCAACTACCGCAACTTGGTCCTGACCCTCTTGGCGAACTTCAATACTGACCCGGCCGCAACGCTTACTCAAGGCTTCAAACACTTCAGATCTCCGATTTAGTGTTAATAGTGTAGGGAAGACTTTTTGAATCGGATTGCGGTCGGAATCATCCGTCAGGATACTCCGCGGGTACGGTCATCGGTTCTGCAAGGGAGTACGAAATGCTACTTTCGGCAAGGTCCGCGAACCGTTGTCTTGGGTCTCCGCCTTCGTTGCGGGATGGGTCCACTGATCAGTAAGTGGTTCCAGCCCATGTAACCCTGGGACCTAAGTTATTGCACAGCTGTCACCTGATCGGGGAGAAGGGTGGGGATCGATCCAACGGGAGTTGGCATTACACGCCTAATTTCCTTGGTGCTCGTCTCCGGATTTTTCAGATAATTCCGTTTTGATCGAGGATCCCAAGGCCACGTAGCGCGGAGGTTCCCCTTTAGTCCCGCGGGGTAAATTATGTTCTTCGAAACTAAGTCTAATTGTCACCGGATTTTGATCTCCGGACTAGTCTACACCGCTGCATCGTGGATTTGCTTATTTGTCGTTCCCTTCGAATCCGTTATCGTTATCTGTCTCAGTGGGATTCCGAGGTCCTTGTTCCTTCTGCACAGGATATTCGTTGAAACACAGCATTCACGAGAGTACTCGGTTCACACGCATTGGCTAGCCCGCCGGGGGATGCGGTTACTACCTGGCGGCTATCGCTAATTCCATGCCCCATCCGTCCCTGAAAGCCTCGTGAGGTTTGACGGGTAAGAGTTATTAGCTAGAAGCCTTTTCCCTACGATGAGCGCGCTCCCGTGCCAGCTCTATGGACATGAAGGCTACGATCTTAGGTCAGCGGAAAACCACCCGGTGACCTCGTTCGATTCCATGACCTAGCTAACCGCGCTTGAGTTTAGAACATAGACTACCTCAATTGCTTCTGCGTCCG 45 | CGGGTAGCGACTTGACGAGGAGGAAGGTGGAACTTGATTAGCCAGCAATCTCTATTTTCGGACACCGGTCGCGTGTCACAAATAATCACTGAACTAACTTGTCCCTATGGCGGTCAAATGAGGGGATGCGCCGAAGTCGATGCTTAACGTCACAGAAGGTAGTGGCTAAGCCCTCGATACGACCGTTGTTACGAGTAGAACATTGTTGAAGCAACTGACCGTACTCTGCATGAGCGGTTGCTGCGACACCCACTTACACCTCACGCCCACACGGCTAGCAGTGGCTTCTAAGAGACCCAGTGTGCAGCTGGAAAGGTTTTTTTTTCGGAAATACGGTTATCGGACGCACTGGTGGCTTAAGCTCATACTGATGATGCGACCGAGCTTCGCTCCTAAGAAAACCAGACGATATATCGCGCCACTCACAGCAAGTGTCCGGAGCGGCCCAATACCTGGACCTTGCCCGCCCACTCGGCGAACACTAATGGACTCTTCATCTCTGCTTAACCTACTCACGTAATGCGCGCGGACAGGGAGCGGATACGGACCCCAGTACCCCTAATCGGGAGCCTGACGTAAGCTATACTGTTGACACAGCGGCAGAGCAATTTGTGATCCGCCAGCAGGACTGGGGCGCGCCGTATTAAAAGCTAATCGTCGATAAAAGGGATCAAGGAGAAACCGTTTTGAAAGAGGCCTGTGACATGTGCGCTCGAGCTAGTTCTTACCCCTACCGCAAAGACACTTATAGGTCCCCTGCCCAGGAGGGCATGAATCCCTAGGTATAGTCTTTCCATTCTTCTGTTTACGATGAATCATAGTTATGACAATTGTCCGTATTCAGCTGGATATCCTTAACCGGGGGAAAGTTTATCTCGTATTACGTTGACGTCTGTTATGTAGAAATCCGAGTCTTAAATGTCGCATACACTCGCGTGCTAAGGGACGACAGCGTGTCGGCCGTGCTAATACCCAACAGGCGTTACCCCTCACAAAGCGT 46 | CCAATTGCTACCTAAGAGGTCGGATGTGAGTGCGTGCGGCTTCTCCAGGACTGGGTAAAACTAACTACATACACCGCATACAGTAGTTCCCCGCGACAACGCGCTCCTGCGTCAATATGGGGTGTTTACGTACAACTGAGCCGTAGAGCACTTTCGCAGTAGGGATACCCACGCATGTACCCCGCTCTCGAGCGGCTGGGAACAACTCGAGATATTAGTATCAGACGCCGAGGTTGATAGATATATATGCCTCACTGGCAGGTCTGGGTGGGTTTGGGTGTAGATCCCTCTCTTTGGCACGTCTGGATTCCAAGGGGATATTATTACTGTGTCACACGTATAATACTGTCTCCCCGCGGGGACTCATAATATCTGGAGTAGACTGCACCTGCATGATATACTACAGTCAAAACCGCTACGGAATGTTCCATGCGGTGCCCACAGGTCATACCACTACGCTGCGTGTTAATGTGTCGCCTCATAACTTATGCAATTGTCTTATGACACCTGGGTTACAAAGCGTTTGTGGTGGCCAACGTTTGACAGAGCTTACGAGCTTAGTTAGGCCTCGCACCTAGTCATTGTGCGATAGGGACCCTCCAAGATTCATCGCCTCATTGGAGAGTGTCGCCATTGTTCGCCTATTACCCGAGCAAGGAACGCAAGTACCGGATCTCATCGGCTAATAAGTGTCCGGAAGTAGCTTTTAGAACGCAACGGAACGCTTGATCGCGAAATTTCAGTACTTGCGACCATTGACGATATCTCCTGATCCATGAATAACGACGCTCATCCCAGTGTGGTAACAAGAGAGTGGGCAGAGCGTTCAAACATGCTTAATAAGTGCTCAATACGAACGAAGCCCTCTGCGATGGTTACCAAGGATGGCTGATACTTCTTGCAAGAATCTCTGAACAATACCCGCAGGTCGACAAAAACGGCCTCCGAAACAGGAGCGTCGGTAGAGTTATGTTGTGGCACCTAATACTCGAGGGTATTC 47 | GTATATGAATGCCAATGCGGTGCCAACTAGAGTGACTGCGTACCCCAAAGGTCCGATAGCTTATACTCGGCTCATAGCAACATATAGTAAGCACTATCTGTGCGAACATAATAAGCCGGTCTCCTAGTGGTCAGCTAGAGCCACTTCTTAGACCTGCCTGTCCAAAATATACACATGGTTCTCGTTTAATTACATTCCTATTACCGCGCAATCACTCTTGAAGAAGCTACGAGCCATCACTGGAGGTGTCATGTCGTATGGCAAATTCAAGTTGAGAGACCGTATATACTCACCCCTCATTTGAGTAAGTTATACAGGGCGCTCATGGGGTAAAACGCCGGGCTTGTAGGTGGTCGTAATTTGCCCGACGATCAATAGTCTTTGGGAGCATCTAGTTGATGAGAGGGTCTGCCCGTTCTTGTGTCGGGGCGCAGCGCTACAGGTATCGATCGGTGAAAATGCTAGACCAATCTTCCTAGTGTAAGCTTCAGTCGTCCACGCATGTAATGGCAATGCGGCGGCTAACTCATGATTCCAATACGGTTACGCCTGTCGACCCACCTCATTACTCGCCAATAAAGAATTGGAATATATGTCAAGTTCTCTTACTGGGGGAAATTCTGGTACAACCAACTCAGGTACGCCAGTACACTCACTCCTGCAGACTCCCCGATGTCCGACAGAGCACTCGCTGAGTAGGGTCTGTGGCACGAGTCTTATGTAACCTATCTAGATAGAGCCCGTGGGGAACTAATGCTCCAGAGATACTCATGGTAAATTATGACGATCGACAAAGAGTGAGCTATTAAAGCGCAAGTTTTTCCGTATATGGGGAAATGAATGTTTGCACAAAAGTACGAATATGCTGCGTATAGAGCGTTATCACAGTCGCCGTTTTCAGAGGACCGCGCGGGGCGCGTTCATACTCGGGTGGGGAGGTATGCCCATTTATTGCGCTTATGTAACGAGGTGTTTAATTTTACCCTAGTAAACGTATCGG 48 | AGTTTTAGACTACTTGAGGGTTGTAGACGTGGATTCCCCTGGACGTGCTTGGGAACGACAGTGGATTAAGAGAGTTATCAATAGTAAGTTGTTATATGCGTCCCGAGACGAGATTTCGAATGGGTCTGCATGTAATTGGTGCCCCAGCCTTAAAAGACGCCAGCGCATGGTCTACGTCAGAAAGTGAGCCGCCCGCTGTACATCGGATACTACTGAGTTGGGACTTGCTGTTTTCAAAGAAATACGTTCAAAATTAGATGATGTGCCGATGGCACGCAGTATGCTGCTAGTTGGATTCAAAGCTGATTCAACGAACTACCAAGTCACATGCGCATGTGGAGACTAACGTTCGACCCATGTTCTTTGCGCTAAGCAGGTATTTGACACCCACAACTCGCTTATGGCTAAAAACCAACAGTAGTGGATTTTACTGGAGATATTAGACATTACATGTTTGGCGTCTGCCTAATGAGGGGCGCTAGGATCGTTGCAAAACTGGTGCTGACTTGGACGCAATTACAATAGAGGAGTAGTTGATTCCCGAATGTATATGAATGCCAATGCGGTGCCAACTAGAGTGACTGCGTACCCCAAAGGTCCGATAGCTTATACTCGGCTCATAGCAACATATAGTAAGCACTATCTGTGCGAACATAATAAGCCGGTCTCCTAGTGGTCAGCTAGAGCCACTTCTTAGACCTGCCTGTCCAAAATATACACATGGTTCTCGTTTAATTACATTCCTATTACCGCGCAATCACTCTTGAAGAAGCTACGAGCCATCACTGGAGGTGTCATGTCGTATGGCAAATTCAAGTTGAGAGACCGTATATACTCACCCCTCATTTGAGTAAGTTATACAGGGCGCTCATGGGGTAAAACGCCGGGCTTGTAGGTGGTCGTAATTTGCCCGACGATCAATAGTCTTTGGGAGCATCTAGTTGATGAGAGGGTCTGCCCGTTCTTGTGTCGGGGCGCAGCGCTACAGGTATCGATCGGT 49 | CCTGCCAATAAATTCGCATTAGGCACGGCAATAGATATGTCGTGATGATCCACACAAACAGAAAACACTTTGCTGAAAGAATTACCTAATCCTGTGCCTAGATATATGCTACCCGTGTTCATGTAAGGTGGGTTGCCTATGTTTGAGCGACTCTCGACTATCAGTACAGAAGCGTTGGTCGTCTGTCCTTGGGACTAAGTACATTGGACTCGCGTCTAACCCTGGGCGACTGCTCAAGAAAGTCGCGTCCTCACCTTCATTAGTATCAATCAGTCCACTTACAGGACACTTTTCATTATGAGATCGGATAGCGTTTATATGGCAGACTAGCAATGAATTCACCTGGGTTCCCATACGAGCTAAACCCCCTGATGTATAAGTACTCACGAGTTATTAACCACGACGGGCAAAGAGCCGGTTAATACGGGTGCCTTGTCAATCGTCTTCTGATGCTGGCACGCACTTCCGGTGGGTCCTAGGCCCGCAGTTGGTTCTTACAGGAGTACGCTTTTACGCAGATATCCCGCGAAAGACATGCACGGGTTCGCATCAAGATATACACAGTATTGGCTCGTGATAACCCGTGTTCGGTTGAAACACTTACCTGAAGAACGGACCCAACTTACGCATCAAACAAGTGAGCGTGCTCTTACACATCTTCTGGCAACCATTACACAGTGGGCGATTATCCAGAACATTAAAGACTCCTTGCAGTCTAGGTAACAGGTACGATTGACATGACTGAGCTTCTACCGTAGCAGCTCTGGGCGTAGTCGCATTTCCCGCGTAGTGGAGTTCACTATGCACGTCTCCCATCATATTCAGCTCAAAACAAAACCGAGCGCCCGAGGCATGCTATTCAACGAGATTTTTTTAATCCTGTTTCTGATCCACTCATAGACATTGGCACAGAGTAGTCGATATGCTGAGTTAATTAGGCTCGCCCCTATTTACAGTATAATCCGAGGCCTGATGGAACAAGAGGGATGGGGCCAATCGG 50 | GAATGTGCAGTTAAGATCAGACTACGGCCGAACCATGTTCAGCTGACCGAACATAAGCAGGGGTTAGTACCGGCGGAGATAACTTTTCAATACCCCTCGAAACCTGAAACGCCGGGCTAGCCACACCATTGGTTAGTGCTCCTATGAGGTCCCGTCACGACCGTCCTTCCTGCAGTGTATACTCTGGAGTACTATTTTATAGCAAGAGTCCAGTTACTACGTAAGCCGCAGCCGGTTAGTGAATTGTACTTGTCACATCTCGTGTCTGAATAGTCTGACTCGGTCACTGTGAAAGGCTAAGGACCGGCCAGAAGAAGAGCTCCCATCAATCCCGAATAAGGATCAAGTCACGTTAGTTCGCTATATGCCGTCAACCGGAGCTAACTATCGGCCGATTGTTCAGCGCTGGTTTGCGCTTACGTCTCCGAGCTCGGGATACGTTGGGGCCGTGTTTTGATTCACGATCGGTTGACACCGTTCCGGGACAACCTAGACTGCGTCTAGTTTGTGTAAAGTCGCATACTTGACTTGTAGAGAGGGGGCCGGTTATCCGACTTTGAAGCCTGAAGAGAGGACTTACATCGGGAGGGGGCGTGAACCATCTTTCGATATGTCCTTTTATAGGTGTGTTCTCGCGCTGGCCAGACATTCCTTGCTCGTCCTACTTGGCGGGCTGGGTTAGATGTGGGGTGGGCCGAATTTCGCCCTCACTCGTTTTAAATGGATACTTTGACACCTAGGTGCGTGGACTGAGCTCTCGAAGGCTTGATAGGGTTACGCGGCCTTGTCCCGCTGTTGCTTTTAGGCTTTACGCAAAATACAAGGTCATGAAATAGATGATCTGATGTCCCTAAGCTTAACTACGCAACGCAAAAAATTCGAAGCAACAACTCTTTCGAAGATCAGTCTGAATGTAGCTGATGATGAGAGAGCTGGCGGAGTACCGGCTAGAGAGCGTGCACTGTACCGCGCGCGAGACCATTTAACTCAGACCTAATTG 51 | -------------------------------------------------------------------------------- /datasets/rosalind_mrna.txt: -------------------------------------------------------------------------------- 1 | MQDLMRDGWPHLAPQNWDVTYNPSKLTYYPHQPSWVEPDLWHQFFQEDVKTTYGSLPSECGYWFPYTCVDMYKDEQGGQMTISKVEGRYNSPINVRAYPYICNYVKRIWMNNLQRRSPWWMEHELCSSVKMGVDYEHSNCATCCGIGARLMGCYVQWHEGMKKNVPPQFGPNTYWMWLQPPMCRVGCRPAYRKGIHTTCPINICSMPRPEWPMIPKKDPMYWLEPERVQEWCIRHTNKYHKIVIINQETYQSVIIYSCREDSLTRPPLPNCVQLEEPKKIMPIVSFFQHMGFYISYTSCDDKQAAWAWQRDHEFHARAHCRIDWHVNVGFTNIFCNDNVCQEQYGANYAQCSAQWINRPYCGVAQQLFNQFTTRFPYVSECFMIKVQFMFISDDAHRGQRSVATLNCDIKMTERRKSIMHRMGHKMVIVEHRIFAKRSQIYLEGFFHDLDAMNWYKMMRSYNYHMYHINAEANHESMYHQIHICHGSEPLACVCMYSQNRKDQKRSMTLQMPKRSKDTSATGKMFNPTRVNYTKDTLMHWTQIEWEWCDPQFAWDECNMYTYNRSVFSHMFTNGINRLNNNQKQHPNDYYQIVTHNYTCMQLGSIRPYETASQHWWRLKQQVQERVKRWEHHVWYINVMPKLKSIPVIFTFDPPLWRDNLACMQFCAIAEKAWTKRMMMFFREPCYTLMYGSAYVKGKSQERESDKQHHWSELGLFAQFARYCAQERKFERKTQEYMASMSQTVFRIMEMTPWGHWCVFTMDLMSILCMRFEWQEGWVRGPTKLMNMCYNMWVCGMNFLVYDVSECQTPNDDVCQNETNKKMYFHTNDWGHQENSAGRGEPMRWELSQQINMGAPMGEEYNEVNIHVGQQDTTNWHIPEAWECKSENQCSVEYHHLTIKQRVFHCVVYVVVRYQWKNHARKVPALQTHHDPCKMWYGGTVCLRVPVGTMEGRMQCVNYAFIAMVRCRCLWDFPAWCDQSAVPPPYYKRMFNGQLHSQSHD 2 | -------------------------------------------------------------------------------- /datasets/rosalind_orf.txt: -------------------------------------------------------------------------------- 1 | GGTCACTTGGAGGCTTAACTGAAGGGCGAGTGATTGGTCTAATCATTAGCGATCAAGATCGATATGCCTTCGGCACCATATGAATGTAAGGTCCTCAAGTGAAATCCTAAGAACGAATGCCCCTACTTCAAAACCTTAGACTCAAACCGTCCGAGTTGCCTAGCCGGAATCCGTCTGTGTTCAGGTTGCGCTGTATCACCTAGCCATCATGAATAGGCCTACGGAGGAATGCGGCGGTGCTTCGATGGCGCGAGTAGATGCGTACTCAGATTACACATATGGATCGCTGATAATTGAGCACAAAAAGGCCCCTTCAACAGGATGATCCCTATCGAGCTACAAGCCTTGATACATGTGTGAAAAAAAGATTTAGCCTGAACGTACTCGGGGGATTCGTAGAAATCGAGCCCAAAGGGGTCCCGATAACACTCAATCAGGCATTTTAGTTGGATGAGCGGGTCGGACCTCGTTACCCACCACTAGCTAGTGGTGGGTAACGAGGTCCGACCCGCTCATTGCCATTCGCGTTCAACGCAAGTCCTAGAGTGGAACATAGGCTCAGGGGTTTTAGCCAGCACGAATGCTTCACAGTACCGGCTAGCACCCTATGAAAGAGCGTATGTACTATGAGTCCGTGTTCTCAAATTATCGTATTTATGTGTTCAATGTCGTCGGGCTTGCGGAATGTTGCAGAGGTAGCGGAAGAGAGGGGCCTCTGATGCCGTCAACAACACGCGCCCCACGCTGCGAGACTCGATTTCAGCAGAACACTTCCTTGGTTGTGAGCCATCCATGCCAAGCCTAGATTAGTGAAAAGGCCTACAACGCGTCCGAACTGCCTACTTTGGTGGAGCAAAATGCGGTCGCACCGTGGAGGCGGGGAAACACGGTCCCTCGCAATGAGATAATATCAGTGTCACAGTATCATCGGCGTTTTCAGCGCTAGCTGGCAATAAAGATAGAAGG 2 | -------------------------------------------------------------------------------- /datasets/rosalind_perm.txt: -------------------------------------------------------------------------------- 1 | 7 -------------------------------------------------------------------------------- /datasets/rosalind_prob.txt: -------------------------------------------------------------------------------- 1 | 0.000 0.073 0.162 0.199 0.236 0.312 0.385 0.391 0.480 0.522 0.556 0.614 0.697 0.771 0.818 0.849 0.941 1.000 -------------------------------------------------------------------------------- /datasets/rosalind_prot.txt: -------------------------------------------------------------------------------- 1 | AUGAGACCGGGAGCGGCUGCCUUCGUUCACACUAAGACUCACUGUUGCCAAUGUGAUGGAUCCGCCCGUUACUACCACGGCUCGCCAGCGUGCUUAGUAACACCUGGCUGGCAGUUUGUAGCUCCGUCCUAUUCAGGUGGAUUCUGCCGUAAACAAUGUGGUCCUGAGCGUGAAUACGAUCGUUCACAGAGGAACUGCCCAGAUCGUAGAGAACUACGGAUUAUGCGGAACACCGUAAACCAAGAUGUUUGUCUAUUGAUCAUACAGGCGAUUCGUUGCUAUCCUAUAAUAUACGGCGAAGGCAGUGUGAUCUCUAAGCGCGGACGGACUACCGAAUUGGUCUAUCAGAAUUUAUUGAACAAAAAUCACAUCUGCCUCUCCACUACACGCAGUCUUAGUCUAGUGUGUUACUGCCCGAUGGUUAGUAUUACACCAUUGCUUCAUCAAAGCAUCAUGAAGGAAUGCGCCAUGCCCCGCUUGAGCAUUGAUGUUAAGCGGUCACCUUUAAAGAGGCGCGUAUCUGUAUUCUUCGCGCGACGUAAUGGUGAGUUUGAUCGUCGACAAGCAUAUACCCAGUCGGAAGUCACUCAACAUCCCCCCCCGUUCAUAAAGCGGAAACCAGCCUGUGUAGUAAAGUGUAACUUUCUCAUUGCGAUUGUAAGUCCAAGGGAUAUCAAUCCCCGGGGUAGCGUUCCGCACAAUUAUGACGUAACCACCUUGGGGACGCUCAGUUGGAUUAUGAGCAGGACCCAUGAAGGACAGCUUCUUAACCAGCGACGAUGUCCUCAUUUUUACAUAAGUAUAUGGGAAUCCAGGUGGGACCUGAUAAGGCUCUUGGUUCUAGUUCUCUUGCCACGCCAACUCCGUCAUAAGCCCAACAAUCGAGUUACCAGCACGCGGGUAUACCGAGAUUCUGGGCGUAGCUUUGCCAAGAUCGUAAGUAGGUACGCUUUCUUGCAUAUCAAUCAAGGUGGUAUAUCGGGACACGGCUCCGCCCGCAUGGGGUUUCGGUCCUUUGCAGUCACGCACCCCCUACCGGUUAGGGGAAACGGAGUUCUUCUAUACCUUCCCGUUAGAUGUGAACGUCUAAUUUCGCUUGUCCAGACUGAAUGCCCAGACAGCAUUUUCCGCUUUGACCCUGGCUCUGCAUGUCAGUUCGCCAAUACCGUAAGUUACAUGCAGAUCUCUAAUUGCUUGCCACGGAUAGAUCUUCAUGCAUCAUGUUAUGUCGGGACUGGACAUGUAGAAGAUAUGCGCUAUACAUCCCAAAUAGAUUGUGAGGCCGUGAAGUUGAUGUCUGCUCGCAUUGCCGACCAAUUCUUUAGCUAUACCAAGGCGUUUACCUACUUGAGAGGCUUGCCCACCGGCUUCGUAUGUGGGUCUUUACUUAUGUAUACAUUGCCGCUCCGUUAUGGUAUGGGGCACCGCAACCUAGACACGACUGGCCGCCUGUUCAUACUAAAUCCUGCUUUCUGCGCGCCGACGGUUGCGGAUAUUAAUUGUACGAGACAAAUCAUCGCGUCGACUGCCAUCUUUAGUGCAAACGAUUUACCGCUGGUACUUGGUGGACUUUUCCCCAUUAGCUCACGGUGUCCAAAAUCAAAAAAUAUCAUCUCACUGAUCCCGCCGCAUAUACGAUUGGCCCGUUGGCGCGUUUCUAGUGCACUCCUAUACUACCUCCGUAAACGUAAGUACGUCACUCCUAGGGUUGCUGUGAAUGUGUGCCGUUUGUUGACGUGUCAACCAUGCCUGCCCUCUAACCGACGGUGUACGCAUGCUAUGUGCCUGCGUCCAAUCAAAACAUCUCAACAGACUAAUCUGAAUCUUAUGACGAUGUUCGCCCUGACGCGCGUGGGCCAUACAUGUCUGUUUCCGCGUGAAUUACACGACGUGCGGGUGGUCGUUGCUUCCCUCCGAAUGCCGCAUUACGCGCUUCGUGUAAGUGUUAAAUCCUCUCGUUUGCUCCCGCAACAGCCGGGACGGGAAAUAUUCAUUGCCGAGUGCGCGUAUGCUGUCGAAAUCUCGCGAAUCUCCAAGAUGGCCCCAUGUCGGCUUAUGCAUACACCGGGAGGGCGGCCCAUUUGUUAUCCGAAUCUUAACCGGGGCGUACUGACGGUAGCUACGACUCGUUGCAAGGGGUUCCGGUAUGUGAAUUACAGACAGACUCGCUGCUCUCUAUCGCUUAUCAGGCCACUGAGCGUGAGCCGUCGUGGGUGUAAAAUAGAACAUGGUACGCGCGACGGCCGUGCCUUGAUUCGCGCCGGAUCCUUGGUUGAAAGAGUGAGUUUGCGCGAAUGGUGUUUUGUGAACGCGCGAUGGGACCUGUUCCAGAGUCAGGCUAAGUGCAACAUAGUUUAUGUUAAGCUCACUAUUCAAGCCUACUGGCUUGUUGGAGUGCUACAGUCUGAUACAAGCGUCAUAUUGGCUACUGUAGACACUCAAUCUUGCCGGCCCUUUUACCAACGGACGGGUAGGACAAGAUAUCCACAGUCAUACCGCACCACGCUCGCACGAUAUAAAGGCAGAUGUAUGCGUGUGGCUGUUGGACCAGAAUGUCCAACUUUCCUGAAUAUCUACAUCGCCUGUAUUCCUAACACUGGUAUCCUUAUGCAAGAAGGUUUGAUUGGGUACAUGCGAGACUUGUGUCUCUACAGUUGUUUACGUUCUAUCACCAGAAAGGUGCAAAUUACGAACCGCGCGGCGAUGCUUUCUUUCCAUCGCAACACACUGAUCAGAGCGAUUGUCUUCGCGACGGGCGAGUGCUCACACCGACACGCUAAUCAUCAGGUAUUCGUCUCGCUAAGGUUACCAGGUAAAUGCAAACUACCACAUUUAAGCCAUGGAUACGGACUACCAUUUGUCGGUAGCCAAUCACCCUGUCAGAGGUUACGUGGUCUCCCACUGGACGAGUCUAGCUAUGAGCUGUCCUUCGAGGAUAGCUUAUGUAAAUUCCAGACCGCGGGAGUUCAGCUGUUCCUCAUAAUAUGUAAAAAACUGAAAGACACUCGUAGCGCUCAAAAAACCAAACUUCUUUUCGCCGCUGCAUUUGUAUUAGAGAUGAGUGUCAGGAAGUCCUCAGUCAGAGGAGCAAGAAAUGAACUCAUAACCCAUGAUUCACGCCGCUUACAACCGGAGAUGACGGGUAUCCGUGAACACGGUGAUGUGCGGACCGGGUUGGGUUGUAACGGAUUACGCCUUGGGAUGCGUUCGUAUUACAACUCAAUGACCCUACAGCUAUAUUGCAUAAGGGUAUACGUCCAUGCGCCAACAAAAUCGCGCAGACCUCUAUCGCGGGACUUUUUCGUGUCUAUUCCGGGUGAUAGGAUGCCCGAUACGGACGAACGAUAUUUAUAUAGAUGGCCACUCAAGCCCGCCAUGUACCUACUCCCUAUAAGAGGCCUUAAAAUUUGGCAUGGUCGUGAAUGGUAUUGGAUUAAGGAACGCCGUGGGUCUCGGGAACGCAACGAGUAUAUUCUUACACUUCUUGGCACUCCCACCUGCAUUGAACGGGACGCGCCCAAACUCGAUCAAAGAGCAGAACUCUCCGCGCCCGUCAUAGCUCACAUUUCGCCACGAACCACAGAUGGUUUAAUAAAUACUAGCCUACGGCAGACUGAAUGGUUCGGGCCCAAGUGGCCUAUCGUGGGCCCGGCAACCCUGGCGAUCAGGGCGGGAUCGGAGACUACAUAUAAAAGACGAACGCACUACGCCACCAGCCUACUUGUGAUUAACUCGGCUGGUGAAGCUCACGUACGAACGUCGCACCGCGGGUGUGUCCGCAGGACCUCUUGCCCCUGUGGCAUCUAUCGAGUGGUUAUGCACCGCACGCGAAUCCCCGCCUUCCAUCCGCAACACGAUGGGCUCUAUCUUUAUUACCUCGGUCGACGUUCUCUCAAUAUUGGGCGCACGAGGCGUGCAUGCCUUGAGGUGCAAAAUCACGUAGAUGUGAGUUCUCUUUUGUCUCGCACUCCGCCCCGAUGUCUAGAACAAUCUUUCAGACACCGCGUCGUUAAAUAUUUACUACAUUUCUUGAUUGAGGAUGGUCGGACGCCUCAGAGUUCGUCCCCUCCCCCUACUUGCCUGCUUUAUUUUGGCAAAGCAAUGUCCCUGAAGACUGCAUACUUAAAUGGCCGGGGUCUCAUGUUCAUCCACCCCUCAUGGGUAGCGCUCACGACAAAGUUUUUAUCAAAGAGUCCCUUGUCACAACAGAUGAUGCCGAUGCGACAAAUAAUGGCACGUUCGUGGGCGUUGCAGAGGAGGGGCUUGUCCCACGGGACGCGAAGGGCCCCCGGUCUUAGUCGGAACGGAAUACAGUACUCUGCCCAUACCUGUCGCGGCACUUAUGUCCCGAAUACGACACUCAUCCGCUGCAGUGUCCGCUCAAUUCAUUCUAGCUAUUGGGGAUCAAGGCCAUCCACGACAAGGGGGCAAACAGUCCUCAGUGACCUACCCUUGCCUACCGGCCAUGGCGGUACAAUGCCGCCCUGUUUUCAGCGUAAGUGCGCAGAAGCCAGAGCACAGCAACAGACAGCGGCGAUUCUGGUGCACAACCAACUCUUGGGUACGAUCCUGAGAGAGGAAGACACACUGUAUGUUGUCCUAGAGGUGAGGAGGCGUCGUAGGAGCAGACGGGCUUAUUUUGUCCAACACGGGGGAGGUCGUACUUCAUUUAGAUGUAGGUGGGUUCGAAACCGGGAGGAACCUAGUGCUCUAUCUCCGGCUUGGGAGCCUAAAGAUGUCGGUUAUGGCGUAGUAUGGCGGAUUCGAACAACUUUCGAAAAAACCAGAGCGAGGGGAGAGUGUGUAUUGGCAUUGGGACCUUUUUUUAAGGUACUGCCGGGCAUGCAGAAACCCGAAUCUGGUCCGUAUAGGACUCGUAGACGGUAUCUAGCGAUAAUUAAUUCGUUCGGAAGCAUGAAGAAGGUCGGAAGUGCGAAUGAGGCGGCGUCGUCUGCCGAGACACAGUGUACCCGCUUACGGCGGGUCUGCAAAGGUUGUACUUAUCGCUUCUCGUCGCACUUAUGCAUGCAGAUCGAUUCACCCGACCGGUUCUUGGAGGGAAGCGAACCGGAUCGAGACUGGAGAGAUCCGGGUAAAGAACACGUAAAGCAGUCCAGAUGCUACGGCAAACCACCGUCAUAUAGGUUGUGGCUAUCGGGGAUGCGCCCUAUUAGGGCUUUUGUGGCCGGGAAAAUGGUAAAUCCUUUACGGGCCGGGACGAUCCGUGCGCUGUGGACACCCUGCAGCAGGAGCCAUAUUGUACUAUUUGCGUCCAUAUAUUCAUGCCACUAUACUUUAAGCCUUGCAACGCUAAUGAGUGUCGGUUUCAUUUUGCUGUUACAGUCCACAUUCUUUUUUGAAUUCAGCACACUGAUUUGCGAGCUGGCGACCAUUGGUCUUGUCGGGCUGGGUUUAACCACAUCAAGGAUCCGAUGUCAACGCGAUAUCUGGUCCAAGUCUAUUGAGGGUCUCUCGAGAUCCAACUCGUAUGCGGCAACUAAACAAGGUCCAAUUGAAUGCCGCCGGAAUUGCCUUGUUGUGGCCAGCAAUGCUAGAGUUGUUAAGUGUCAGGCGGAUAAUCGCAAAUAUGUAUUAAUUAACGACCGCUGGGUCGCCAGAGAUCGAUUUAAACCAAAAUUACCCCGGUCGGCUGGAAGCGGGGAGCAAUCACGGGGCCCCCAGAGGCGGCUGAAGCCUCUGAUAGGCGUCUGCGAUGGUGUCAAAACGCGGUUUUCUCAUGCGACUGUCGUAAAUAGCUUGCCUGAGAGAGAAGAUAUAAUCAGCUACAAGACAUCACGCUGCGCUCUUGGCUACUAUCCACUCGGGGGUGCUCCUUACCUGCAAGCUUUUGACAGCGCGACCAUGCCUUACCUCAGAAUCAAGAAACGAUAUGAAACCCGGCCAUGUGACUGGCAUUUGUCUGCGCGCAAAUGUAUAGGAAUAUUUCACGCAAAAUCUCGUUCGGUUAUGCGGGUGUCGUCUAUACUGUACUUGCCGUCCCGAAGACGUACUGAUCCCCUCCACCGAACGAGCCUCCGAUGCUUUCUUUCCUUACCGCCAAAAUCCGAGCCGGUCAGCCACCUAGCUAUCGUGAUUGCUACGGUUCACACGGUUAUCCCACCUAUUACUCCGGAGGUAGGUAUGGAAACUCCGGAAGUGGGCCCAACUGCUGGAGAGCAAGAUAGCACGAUCUUGCUCCGUAUAGCCCGGCACGGCCUCUGUAUCCUCAUAUGCGCGAUGGGACUGAGGUGCAGGGAGGGUCAAGCGGCUUUGGGCAACACAAACCCCUAUAUGAAAACAUCCCUCUACACUGUAGGGCCUUUGCAAACGGUUCUACAUCAAUGUCCGCUGAAACAUUUUCAAAACUCCAUGAUAACCCACACAGAAUGUACCUAUAACCGGAUACACGUUUAUCAAGAGGAAACGGUAGAUUCGUUCGCUGUCCGACUUCCCCUACGGGUCUCUCGGGGCAAUCACUGUAUCUGCGUCAGUUGCCGGGGACGAACGCACUCCCGCUGGAGCCACUUACGGUGCACCGCGCACAGGGCAAGACUCGGUUUAAGAGACAUAACCUCAAUUGCUUUUACCGACGGGCCGGAGUUUUGGCAGUUCUCCGGUCCCUGGCCGAUCUUAUUUCGAUGUACACCCUUUCACGCCUGGCCCGGAACAACAGGACAUAGAGUCCCCGAGUCAAUUCGUGCUAAAAUACAGUCGACGUGUGACGUAAACAAGUGGAAAAAUGUCGUAUCUUUUGGACGCGUACGUAAAUUUCGUCUGUAUAGUCACCGGUCAACCCACCCGUGGGCUAAGGAAUCGGGGCUACUCCCUAAGCUACCGUUCGGGCGCGUGCGCGGGAAGCUAUGCAUCCACAUCUGGGCUGUACGCCAUUCCAACUAUAAAGACGAUACGGCGUUCAAACAGUCGUUAAGUUGUAAUUGCAUUCCGAAGGGCCGCGAUUGCGCCUGCAAUAAAAUAGACGCGAGCCCGGUGGUUUCCGUCGAAUUCAGGGGGACACUCGCCGAAAGUGAUCGAAAUCGUUACCAAGCAAUGACUAGAGAGAAGUACAGUAACCUGGCCCCGCCCUGUCUCGCGCUCUGUCUCGAGGUGUCGCAGGACCUACCCCUGCGGAUCAACAUGUUGUACCGAGUGCAGACUUGUUGUGCGUUCAGGAGUGCAUUAAUUGUAGUUAUCCCAGUACGAGGCUCAGGCGGAACGCAGGUGGGGUACACGAUUCUAACGGACAACUUCCGACCAUCAAUCCAGCGGGAAACUGACUUGAAAACCUACGAUGAGCAGCUAAGUAUCCCCGGAGAAGGGGGACGCACGGUUCUUAGUCAGUUACAAAACCGGAAGGACGCGACGUGGGCAGAAGAAUCAUAUUGUAAUGGUUGCUUUUUACCCCACGACCACCGAGGCGAACGUCGCGCUUUGAGAGUGCAAGUGACCCCUCGGUCUCGCGGGGAUCGGGGAUCGCACAGAUGUCAACAUGACCGGCGUAUUCAAUAUGUUAAUCACCCCAUCCGCUUACGAAGGAAUCAAAGGCUUUCCGACCGGGGUGCCUCGUGGCGUCAAUUCGGCUUGCUGCUCAGUUAUCCCACUAAAACCUUAUACUCGACGGCAUCCCGGACUGGUCUUAUCCAACAGACGAGCACUUCAGGGCUUAACGGUGGGUCUAGCGCACACCCUACGCUCGCGAUCAACUUCAACAAAGCCUUGCUUCUCACCAUCCUCAGAAUAGAUUAUGAAGCCAUGUGUGCCACAAGCCCACUUGAGUGCCUCCUGACGGGGGUAUUCCGAAUCUUUAGGCAGUUGAUCCGUGUUGGCCCUUCGGCGGUUCGCCUAAAUAACGACCAUACAAUCAUUACGCGUAGUAAGCGACGCGUAGUCUUCACACUGACAACAGUGCUCUGUUUGGAGGCGCUCUGGACCCGGGUUAAACCUAGACACGGCGGACCGAUAGGCCUAAAGAAACCCACUGAUGUUGUAACGCUGGCGGGGAGGGGCAGAUGUCGCCUAUCCUUAUUCAGAGACACACACUGGGAACGCUUAAAUCACCAUUGUGUUAUCGCUUGGUAUAACGAGUCAAACUUUCAAUUAGCAAAAAUCGCCAGCAACGACAUACUUACUAAACGGGGGCUUAGAAUCCUGUCCCACCUCGAUUUACUUUCACGCCUGUACGUGCUAGCCAGCUGGGCGAAAAGACGAUCUUGCCUUUCGCAUGAAUACUGUUUCAGGCCGUAUGGGAAAGAAUGUGGCGGCCAGUACAACACUUACAUAGUAAUCAGCCAUGCUACACCGGUACGGGCACGAAUUAAAAUUCAGUUAAUUAACGCCAGAGCAGUUCCAGUUUCUACCCUGAUUUGCGUAAGAAAUGAUUGGCUUCGACGUGACAAUUCGCCUUGCGUCGCCAUAAAUCUAUUUCGACGGCCGUCUGCCCAACGCGGUUGGCGGCGUACUAAUAACCGGUCCCAUCAUUGCAGGCGCUCCGCCACAGAGUCAAGUAUGACAUGGUUCUCGGAUCGCUGCAUUAUUAUGCCCGUUACUUCGGCUAUAUCCAUAAAAGAUUCCGAUUCUAAUAUGUUUGAUCUGUAUUGUGACACGAUAGGAGUAGGUAACCGCCACGAAGACGUUAGAGAGCCCUCGAUACGGGUCGAGGACCUUCGAACUGUAGACACAUACUGUGAGUCCGGCUGGGUGUAUGAACCCCUAACCAGAUCGGGAAACUUCAGACUUUUCGGGGGGCUGAAUCCGUUCGGAUUGUCUUCCACAGGAGUAUGUAGUGGAGGGAUAAUGGAUCUUUCAGAGCGUAACUUCCAACCAGCCGUGAAGUUCGUAGCAAGACCACCAUCCAGUACGCCUGCAAAGAUGGGUGGUCCCGCUGGUGUCUGGUCGUUGGGUUGGAAACGCGCAGACCUCCCUUUAGAUUACUUUUUACUCGAACUUGGGCAGGACCCUCCACCUCGAUCGCCUCUGGGCGCUCAUGAGUGUGUUCGGUCCAGCCGGUUUGUAGAUUCAAGGAACUUAGGGUCUGGGCGGCGUACUGCCGUAUUUCAUACGAGGGCGGGUCUAGUAGCGUCGACUGUCCUCCCCGCGCUCAACGUGGAUUAUGGCGAGGUCAUUACAGUGCAGCUUCAGCGCGUGGUGACGGGAUCCUUCGAACCAUAUUGUACGGUAAGGAACGUAUUUCACAUGAUACGCAACAUUGCACUCCGUCAAUGUGUUGUUUUAGGUGGACUGCCUCAAUGCGAUACGGGACUAUCUCUAGUACCUCACGGGAUCGGGAGGCAGGCAGCUGCAUACUGGAAAAUUAAAAUCCCUGUCUUCUCCAUUAUUAGGGAGGGAGCUUGCUUUAAUGUGUAUUUCCCUAGCUCUCCCCCAUUGGUCUCUGAUGGAACGGAGUCAGGGAAUCGUAUCUCAUUACUGAAGAAAAUUACAAAAAUGGCCCAGGAUCCGUUCUACAGAGAUCCAACUCCGCUUUUUGUAACGAUAAGAGGCAGGAAAGGGGCGGCAGCCAGAAUGUCUAAACAAUGUGCCUAUAAGAAUCCGGCUUGCCCUGCGUCGCAUCAAUUCACGGCUAACCCUUUGUUUGGUGCGGUCUCCAGCUCUAAGAGGACCUCCCUCCUUACGGGCGGUGCUAUUGAGCCGAUUCACGGCGAGCGGUGCGGCCAGCCGUGCCUUCGACGGACAAAUUCUCGUGGGGAACGACGCCGCGCAAAUGGGCCUCAUCCCAGCGGCGACAUACUAAAGUAUCCUAAACCACAAAACAGGGGCUCGCAUUCCAAAUGGACUGCACUUAAUCCCCGCGUCGCGAUCGAAAGAAGUAACUCGGAUACGGGCGCGUCGACUGCUUUGGGCACUUUAAAUACGGCGCCUAUCCGGGAAAGCUACUCACUCUGCCGGUAG -------------------------------------------------------------------------------- /datasets/rosalind_prtm.txt: -------------------------------------------------------------------------------- 1 | ACPATICQFGCFEFEHGVQTQCWLHKGFKNAWRYHQPRWYLAMDCYYTQQVGDEVYTCNIMAGSPVMHLHFLWKHQKKHEFQLDYLTGLTMIWPWMMMSNRDVKKCDSIFHHEWMVPIWHKMRNWWRNHCKCPTTKHEPYMQNQELMIEKISSQCRATWMQKDMGVSEHETIPHNCIGVRALATSITGKRGCVASLFMCQAYRICWQLEHWRGFIEYYESMGNFWAMKHVLKKAYKPERQPIPRWCSLDIGRMEKEHFINGWGHEVCNMIKIILWWDEVDLCQISSAKGQGIVPTYEQQEYTSIWPHLPGAHRKADKEVGNCITGQWETEQWWCFGWNYFMAMLSRKQFMDIGKPDNRPDNLCIWTKIYSYGMDNRMTNIRFQEEWGFLNMVACVRSDWCPLHFHDLCWTLRVSIYKVAQELVQREPPDVVSRTIRPGQLNTPGLQYEADKQSGGFIPNSCYKWDIKTIQYHASDYCMFIWVWYVRHHNGYDVMRYCWLLQPGPCQHLKFIQCVHLNADCRHYSMMERSKVRLFGFDGYHILEQNNIWVMGKGYPFHRAIDICRAFSDCAYGWWVWCHRFKCDSPWIHFWDRVLQMRWSVSGVNHHEKTLNHKEEAEQMGAADKTHYQTFQYSKATSISCVRQWKSMPNFFDIYPQGHRQRYMWTTHTYFGAHFWERTDDFRAHHEEAKRRLENFAYGRCCSWVNRGYCYIRKKTPFDFIDDNAGVEECEINGPFNHEQIPLYQLMGDFMKSRGSINCWLLRRWWKCGTEAWIQGGWTHAHRELYICMMYTRKWLLKMVNTATYFYLNKYAKSKDAGLVCLTMQMVGQWVSHG 2 | -------------------------------------------------------------------------------- /datasets/rosalind_revc.txt: -------------------------------------------------------------------------------- 1 | GGTTGTCTTCACGACAACACCGCGCTGGCCAAGGTATTAAATAGGCGCATCACTGCTTCTCCCATGACATTGAAAAGGTTAACTTCCCCCATCCAACGTCAGTCTCCATTCTTTCAGTCGGGGGCAAATATGACCGGAGTTAAAGGACTCGAGCTATTGCAGAGCCACGGACGAGAGCAATTCACCAGAGCATAGCAAAAGTGCCGGCCGGCCTACGACCCGGAGACCGTGAAGTTGCGTCCCATTGCTGTTGCATTACAACCGCTCGCGAACGCATCAGTTGGTATATATATCTAAAAGCGTGGCGTGGCCTTGGTACTATTCGGAGAGATCCTAGTTATGCATGAGCAAGCTCGCCGTACGGGTCTGCCACGGTGGAGTTGAGTTTGTAGGCGATAGCCTAGCCGACGGGCTGACTGCGCCAAGAGACTTTCTCCTGATTGAGCTTATGATTGAAAGGCAACGACACAATCATATCGCCGGCGAGTAGACGTCTACTAAAGTTACAGTATTATTTAGCGGCTGAGGAGTTCACACCCTGTCCCAGGTGCGGACAATACTGCTATCTAGGGGCACATGGGTGATGGCATCGGACCGTATTGGATGGGAGGAAATTGGTTCCTTTAGACTAAAGGGTCGTTTCAGCGACAAATATTCAACGTCCCTCTTTTCGGATTTCATTTGCTCATGCAGATCGTGACTACTTACCTGCGACCCGGTAGACATGTGAAGGCTGTTTGTCTAATATCCTTGCATGAGGGTCGACGTATATTGGTCTACCAGGTTCAAGACAGCGGACTATGTATAATCTTCTGGTGTTCCATTAACTCACCCTCAGACCCGACGAAATCCAGCCAACCCTGGGAACTGATATCCCCACAAAATTTCTC -------------------------------------------------------------------------------- /datasets/rosalind_revp.txt: -------------------------------------------------------------------------------- 1 | TAGCCTTCGCTGGAGCAGAAATAAAAGGAAGTCCATACACTGAAGTCCATTGGAGCATTACTCGGTTCAGCTTATAGGACAATGACAGCGCAGCTAACCGCCATCATCCCGACTGATTGTTTCGCCTTAAGAGGCTACTTGGCGTGAACGAACGATCCTGCTCTCTCTTTGCCGAGATAGTCTGCCGCCGTTGCTCCCTCGTGGGATCACGCGGAGGTGAGAGTAGGCTTTTCGTTGAAGGGTTTAAAAGTATATTCGTCTTAGGCCGTAGGAGTCTCCAGGGTTGAGACTTCCCCGTCCTACCTGAGCGAACTCTTCCGTGATCCGGATCAGGATAAACTGAGGTTGGATGTTGTGGGTTCCGGGTGGTTTAAGCAGCCCATCAAAGGCCAGGATGCGTATCTGAAAGGCGATTAGCCCTGAGATGTATTGATCGATCCAACATTTTCGCGATCAGTTCTATTCGCTCCTAGTTTCCGTAACGCCACGTCCTGAGGAATCCCGCAATACAATGCGCTTTAGGATCCGGCGGATGGTCAGGGAGTTATAAATTTGGGGTTATCGCAGAGTCACATGCCATCGTTTCGACAGCAAGACTAGTGAAGAACCTAGTAGTGACCCCCATCGCAAGTCCAATAACCTATCCTGGAAGCAGTGATTCCAGTCATCGTGAATGCTAGTGCTATCCGCGGTTATACACCCTTCACCTATGTTCGCGACAGTAGCGCGCAAAGGTCAAGAGGGGCTCAGTGCGACGAGCGGGGCGGTCATATCCAGACCCAGATTCCCGCGCCGAATGGAAAGCTGTGTACGCGTCCTAGCCGTGGAGCGAAGGTCATTGCGAGATGCTCCCCACCTTTCATTCGGGACGCCCGCCTGCACTGTGTCGAAGGCGGTAGC 2 | -------------------------------------------------------------------------------- /datasets/rosalind_rna.txt: -------------------------------------------------------------------------------- 1 | GAACTAAGGGCTGGGTTATGCTCCCTAGTTCCAACCTGCGGCTAAGAATCATTAGGTAGATAGTGCGGTAAGGGTGTAACATTACCGCCTTTAATCTTCTCTCCGTAGGGTTAAGGCTGGGTTAAGAGTACGACTATGATCCAGGGCGACTGATGGGTGTTTCGGGGACGCCCTGTTGAGCAAGGCTGAATTAATGCGAGTGTAACTTAGTATGTCGCAACTCGCTGTGCCATCCGTAAGGCGTTACGCACAGGTTATGCATCTTGCACGGCCTCTATGGAGAGACTTGTCACGTTATTTATGACAACACTGGGGCAGATATACCCTCACCGAGTGCCCGCCCTGTTCACTACAACCATCGACGCGATCTCGAACTTGATACTAACTCTGGGCGACGTACAAGGTCGGGGTCACACCGCCCGGGGCATACATTAGTGAGAGCTAACCCACATATTTCATTGGTGTCCCGATACTCCGCCGCTATTATGTATAATTTGTCACACCGCTGGCCTAGACCACACCACACACTGACTTAATGCTTGCACCTCTCCTAAAACAAGTGCACATGAGGGTAATATCGCGACTTTCTTCATGAACATCATCGATGTCACATCTCAATAGTGCTTCTGAATCGCTAGCCTCTGGTCGGAGGATACTGATAAGGCGCTTAACCAAGGCATTCAGTGAGTTAGCGGGTCGTCTTGCGCGCCAATCCCTTACAAATTTTTCTGGCTTAGCCCTGAAGCCTTCTCTACATATAGGGGTATGTGAGTCGCAGGTAAGCCTTCTTCTAGGGTAAGGTCAGACAATCTGTGTATTATATCGTAAGGTAGGCAAGCCCGTTGCTTCGTGCCTTTATCATTGACATATTCAAAATTGGCGGGACTCAAGCTTGCGTGGAC -------------------------------------------------------------------------------- /datasets/rosalind_sign.txt: -------------------------------------------------------------------------------- 1 | 3 2 | -------------------------------------------------------------------------------- /datasets/rosalind_spec.txt: -------------------------------------------------------------------------------- 1 | 4034.62705316 2 | 4131.67981316 3 | 4218.71184316 4 | 4331.79590316 5 | 4462.83639316 6 | 4577.86333316 7 | 4678.91101316 8 | 4781.92020316 9 | 4909.97878316 10 | 4981.01589316 11 | 5068.04792316 12 | 5139.08503316 13 | 5240.13271316 14 | 5396.23382316 15 | 5552.33493316 16 | 5609.35639316 17 | 5696.38842316 18 | 5824.48338316 19 | 5925.53106316 20 | 6088.59439316 21 | 6235.66280316 22 | 6363.72138316 23 | 6466.73057316 24 | 6537.76768316 25 | 6665.86264316 26 | 6796.90313316 27 | 6867.94024316 28 | 6998.98073316 29 | 7185.06004316 30 | 7284.12845316 31 | 7371.16048316 32 | 7470.22889316 33 | 7598.32385316 34 | 7735.38276316 35 | 7848.46682316 36 | 8011.53015316 37 | 8139.58873316 38 | 8276.64764316 39 | 8379.65683316 40 | 8516.71574316 41 | 8644.77432316 42 | 8743.84273316 43 | 8880.90164316 44 | 9011.94213316 45 | 9175.00546316 46 | 9338.06879316 47 | 9466.16375316 48 | 9652.24306316 49 | 9780.33802316 50 | 9909.38061316 51 | 10022.4646732 52 | 10159.5235832 53 | 10258.5919932 54 | 10357.6604032 55 | 10504.7288132 56 | 10617.8128732 57 | 10718.8605532 58 | 10881.9238832 59 | 11009.9824632 60 | 11166.0835732 61 | 11322.1846832 62 | 11451.2272732 63 | 11550.2956832 64 | 11678.3542632 65 | 11841.4175932 66 | 12004.4809232 67 | 12133.5235132 68 | 12319.6028232 69 | 12420.6505032 70 | 12533.7345632 71 | 12620.7665932 72 | 12735.7935332 73 | 12848.8775932 74 | 13034.9569032 75 | 13191.0580132 76 | 13328.1169232 77 | 13443.1438632 78 | 13580.2027732 79 | 13708.2977332 80 | 13836.3563132 81 | 13967.3968032 82 | 14068.4444832 83 | 14125.4659432 84 | 14256.5064332 85 | 14371.5333732 86 | 14484.6174332 87 | 14647.6807632 88 | 14760.7648232 89 | 14873.8488832 90 | 14986.9329432 91 | 15114.9915232 92 | 15216.0392032 93 | 15303.0712332 94 | 15418.0981732 95 | 15581.1615032 96 | 15696.1884432 97 | 15859.2517732 98 | 15956.3045332 99 | 16071.3314732 100 | 16172.3791532 101 | -------------------------------------------------------------------------------- /datasets/rosalind_splc.txt: -------------------------------------------------------------------------------- 1 | ATGACCCGCTCCCCACCCGTCGTGCAGACCCAGAGTGACCAGGTCCAGCAAACGGACACATGTTCACAGGCTTGATGATACCGTCAGCCGAAGATGTTCCCTGGGTTTATTTTCCTCAGGCTGTATACTACTCAACTGCCTAAGCATGAAAGGCAATCTTACTTTGGTATTTCGCTACAGTACCGTGCTCCGCAAATTACTCTTACCATGGTCGGTACACAAGTACTACCCGCTCGCCTCCGGTGATCTACATTGTATCTAAGAGGTAGCCCTCGGGTAATAACCTTTGATCCAGAATCTCGATATTGCCGGCTCTGGCGCAGCGCCTAGGGACGATCCAACCCCCGAGCATGCTAATCGAGACCGTTCGCGCGCAAGCGTCCAGGTGTGATGTTTGATTACTATAGTAAAATTAAAACTAGAAATGGATAGGGTTGCGGCTCGGCCACAGTTCAGATCTCCATGGAAAGCTGCAGATTCCCACCAAATCACGCAGACGCCCAGCGCTCTAATCGAACAAGTCACGATGTTAGCAATCGCGCTCGCAAGGGACTCTTTGTTACAGCATACTTTAGCGGAACCCCTAACTCTAGTAGATGCGGACCCCATTCTAAGAGCGCATTTGATTACAAGACCTCTTTTAGGCAACGTAGCGCCTATACCACCACGAAGGAGTGCGCTGCCTGGAAGCTATTCCGGCTTTCGCGGTATGGTGGTACGCTTAATACGTGTACTGACCACTATATACAAGCTCCCACTTGTGGCGAGTTTAAAGGCGATTGCGCTAGACCGCTTACCACGCCAGGGTTACTCACCCCGTTGCGGAGGGTCTTACCTGCCTGGCCGGTAAAACGAACCTATCTAATTCACTCTCACCCCCCCGTTCCTAAGGAGAGGGCGAACACAGCTAGGCGCTACGCGCGGCCGGTTTTTCTCGCGGGGCATGGCTGGCGACGTAAGTGATTACTATAGGCGGCCCTGCGAGACGGACTAG 2 | AGGGTTGCGGCTCG 3 | GGTGATCTACATTGTAT 4 | AACTGCCTAAGCATGAAAGGCAATC 5 | CGCTCTAATCGAACAAGTCACGATGTTAGCAATCGCGCTCGCAAGGGAC 6 | GACACATGTTCACAGGCTTGATGATACCGTC 7 | CAACGTAGCGCCTATACCACCACGAAGGAGTGCGCTG 8 | CGGCTCTGGCGCAGCGCCTAGGGACGATCCA 9 | GCTAGGCGCTACGCGCGGCCGGTTTTTCTCGCGG 10 | GGCGAGTTTAAAGGCGATTGCGCT 11 | GTACGCTTAATACGTGT 12 | GCAAATTACTCTTACCATGGTCGGTACACAAGTAC 13 | CAGGTGTGATGTTTGATT 14 | CCCATTCTAAGAGCGCATTT 15 | GCCTGGCCGGTA 16 | -------------------------------------------------------------------------------- /datasets/rosalind_sseq.txt: -------------------------------------------------------------------------------- 1 | TGGACGCCCTTCGTTATTGCTTTTCCCCAAGGACGTGTTCGTTAGCACCACTGACCCTATCACCGCGGACCTGTCTGAAACGATCAGTACTTTTTTGAACAGGCGATCTGAACGAAGAGGACATAGTTATTTTTTTACGCTTAGCAAGTACTCTCTCTGTAGTACGATCACGCTCTGAATGCTCATAGGGTTATCCCTGAAGACGGTTTAAGTAAAAGCCATGCCGACTAGCCGTTAGGAATATTCGTACCTGAGCTACCGCCAGATCGACATGACCCTATATTGTGCGTATGAGCTAGCAGGACCGGATCGACATACCGTTGGGCTCTATCGTACGCAGTGGTTTATGTTATCGGGTAGATCAAGCGGTCGCGGGCAGTGCGTCAGTTATTAATAAAATGAACCCGCCGTCGGTGCCATCATTCATGAAGTCTATATAATGGTACTTGATAGAGTTGATGCATATCTAGACAATGCTTTGCCGGTGGCGGCTGGATTGCTGACGGCGCAAGTTTTGTTCCCTGTAGGAACTGTGCTGCCAGATTCAGCGCTGCCATACCTACGCAAGCTATCACCATGATGGCTTTAGAGACCATCTTCTCGAGGCTGTTGATCAGGTGGATGGAACCTTACTTTTTAGTCCAGAAGTTCTTCTCGGAATCTTCATTCTCTTTGCTTCCGGAACCAGTCGTGAAAGAGTCAGGACTAGTAGCCCAGGGGTGCAGACGCCAGGCGGTAGATCGCGGGCCGTTGGACCCGTTAGGAGCTTGTCATACGCCATCAACGACCACTATACTCCTCACACTATTAACTTGAACATGCTTTCTAAAATCATTGTCTCAGCGTTTCCGAAGTGAGAGCAGCGAGATAGATTTATCGACTCCGGTTGGCTGAGTGTTCGTGGCGGCTGAACATCGATCGGTTGGTCCATGTTCGTGGCAGCCCAGACTAGCTCGGCCGTATCCCAACATGAGGCGTG 2 | CGTTGATTCGGCACGGGCCATAGCTCCCCAG 3 | -------------------------------------------------------------------------------- /datasets/rosalind_subs.txt: -------------------------------------------------------------------------------- 1 | GGTCGCGTTGACGCGCTAGTAGCGCTAGGAGCGCTAGCACGCGCGCTAGTGCGCGCTAGGCGCTAGGTCCGCGCTAGTGGCGCTAGAGCGCGCTAGGCGCTAGGCGCTAGCACAAAAGCGCTAGTGCGCTAGCTGCGCTAGCCGGCGCTAGTAGTACTTCGGCGCTAGAGCGCTAGTTGCGCTAGCCGCGCTAGGCGCTAGTCACTCTTGCGCTAGGGCGCTAGGCGCTAGGCGGCGCTAGCAGAGAATGCGCTAGTAACCTCCGCGCTAGAGACGACGGCGCTAGGCGCTAGGCGCTAGGCCGGGCGCTAGCTGCGCTAGGCGCTAGTGCGCTAGAGCGCTAGGGCGCTAGGCGCTAGCGAACGCGCTAGTGCGCGCTAGGCGCTAGTAAGCGCTAGAGCGCTAGGAGCGCTAGTTCAGCGCTAGGTGCGCTAGGTACCTTGCGCTAGGCTGCGCTAGGCGCGCTAGACTAGCGCTAGAGCGCTAGGCGCTAGCTACGCGCTAGGCGCTAGGCGCTAGGGGCGCTAGGCGCTAGGCGCTAGAGTCACTGTTCAGCGCTAGACCGCGCTAGGCGCTAGGCGCTAGAAGGGTCAAGCGCTAGTGCGCTAGAGTGTTGCGCTAGGGCGCTAGGCGCTAGGGAAGCGCTAGGGGCGCTAGAAGCGCTAGCGCGCTAGTAGCGCTAGTAACCTGAACATGCGCTAGGCGCTAGGCGCTAGGGCGCTAGACCAGGGGGCGCTAGGCGCTAGTGCGCTAGGGCGCTAGTTGTGACGCGCTAGATGCGCTAGCGTCAGAGGCGCTAGAAACTGCGCTAGCCCGCGCTAGCTTAGCGCGCTAGTTGTTTCGCGCTAG 2 | GCGCTAGGC -------------------------------------------------------------------------------- /e001-dna.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Counting Nucleotides 4 | # ==================== 5 | #  6 | # A string is simply an ordered collection of symbols selected from some 7 | # alphabet and formed into a word; the length of a string is the number of 8 | # symbols that it contains. 9 | #  10 | # An example of a DNA string (whose alphabet contains the symbols A, C, G, 11 | # and T) is "ATGCTTCAGAAAGGTCTTACG". 12 | #  13 | # Given: A DNA string s of length at most 1000 nt. 14 | #  15 | # Return: Four integers separated by space corresponding to the number of 16 | # times that the symbols A, C, G, and T occur in s. 17 | # 18 | # Sample Dataset 19 | # -------------- 20 | # AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC 21 | # 22 | # Sample Output 23 | # ------------- 24 | # 20 12 17 21 25 | 26 | 27 | def count_basie(s): 28 | counted = {} 29 | ordered = [] 30 | 31 | for c in s: 32 | if not counted.has_key(c): 33 | counted[c] = 0 34 | counted[c] += 1 35 | 36 | for c in sorted(counted.iterkeys()): 37 | ordered.append(counted[c]) 38 | 39 | return ordered 40 | 41 | 42 | if __name__ == "__main__": 43 | 44 | small_dataset = "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC" 45 | large_dataset = "TAGCGTAGGATGGAGCTTAGTTCGCAAGCCTAATTATCCTCGCCCGCTGACGTGATGAAGATAACTGCAACGCACAGCAGGATATAAACTAGCAACGCAAAATGGTGGGGCCATGCACTGTCTATCCCAGCTATATCTAATATGTTGGCCGTTTGTGGAAATGCATGATCTGGGTAATTTCTAGGAGAACTCTTAGTCCTCAAGACTATAAGGCGGCGAAATAATAGTAACAGTCTTCGTACCAATTGAGAATCAAGCTCCTCGACGTCGAAGATGGGGGTTTACACCCCTTGACCAGCGTCCCCGGCCGTTAATCTATCTATAGGTTCACGTGGGGCGAACAGCGCCGAGTGAGCTCTACCCAATGATCGGGTGCGGCTTTGCGACTCGTATTGGGCGATGCGCCGCACCTGGCCCTGGGGACATACGCATTGTTTCGAATAAGAGCATACGCTAGTACCCCATACGAATGTGTCCGTAAAGACTAGTCCTTCCTGCGCTAAGGACGGGATTTGTTGAAACCTACGCTGATTGGCGACCGAGTAATCTGGAGATTATGTTATGATTGTAAAGGGAACACATAAGCCCTTCGTTCTTTTGAGTACCTTAGCGAAAAGGTATCAGTCTACGCCCAACGCTATCTCATGGGGTATCCCGAATCCAATGCGCAGCCACCTATCGTACAAGGAGCACCCAAGCCGATATTCGTGGATGGATCCTCCTTGGTGTGTAACTCTGAATCATGGAATCCGTCTAAAGCCTGTACTGGGTTAATCACCCCCGGTAACTTGAGTTTCCTGTCCCTTGAACGTATCTAGAGTTAA" 46 | 47 | counts = count_basie(large_dataset) 48 | 49 | print ' '.join(map(str, counts)) 50 | 51 | -------------------------------------------------------------------------------- /e002-rna.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # RNA Transcription 4 | # ================= 5 | # 6 | # An RNA string is formed from the alphabet containing A, C, G, and U. 7 | # 8 | # Given a DNA string t corresponding to a coding strand, its transcribed RNA 9 | # string u is formed by replacing all occurrences of T with U. 10 | # 11 | # Given: A DNA string t of length at most 1000 nucleotides. 12 | # 13 | # Return: The transcribed RNA string of t. 14 | # 15 | # Sample Dataset 16 | # -------------- 17 | # GATGGAACTTGACTACGTAAATT 18 | # 19 | # Sample Output 20 | # ------------- 21 | # GAUGGAACUUGACUACGUAAAUU 22 | 23 | 24 | def transcribe_rna(t): 25 | return t.replace('T','U') 26 | 27 | 28 | if __name__ == "__main__": 29 | 30 | small_dataset = "GATGGAACTTGACTACGTAAATT" 31 | large_dataset = "GAACTAAGGGCTGGGTTATGCTCCCTAGTTCCAACCTGCGGCTAAGAATCATTAGGTAGATAGTGCGGTAAGGGTGTAACATTACCGCCTTTAATCTTCTCTCCGTAGGGTTAAGGCTGGGTTAAGAGTACGACTATGATCCAGGGCGACTGATGGGTGTTTCGGGGACGCCCTGTTGAGCAAGGCTGAATTAATGCGAGTGTAACTTAGTATGTCGCAACTCGCTGTGCCATCCGTAAGGCGTTACGCACAGGTTATGCATCTTGCACGGCCTCTATGGAGAGACTTGTCACGTTATTTATGACAACACTGGGGCAGATATACCCTCACCGAGTGCCCGCCCTGTTCACTACAACCATCGACGCGATCTCGAACTTGATACTAACTCTGGGCGACGTACAAGGTCGGGGTCACACCGCCCGGGGCATACATTAGTGAGAGCTAACCCACATATTTCATTGGTGTCCCGATACTCCGCCGCTATTATGTATAATTTGTCACACCGCTGGCCTAGACCACACCACACACTGACTTAATGCTTGCACCTCTCCTAAAACAAGTGCACATGAGGGTAATATCGCGACTTTCTTCATGAACATCATCGATGTCACATCTCAATAGTGCTTCTGAATCGCTAGCCTCTGGTCGGAGGATACTGATAAGGCGCTTAACCAAGGCATTCAGTGAGTTAGCGGGTCGTCTTGCGCGCCAATCCCTTACAAATTTTTCTGGCTTAGCCCTGAAGCCTTCTCTACATATAGGGGTATGTGAGTCGCAGGTAAGCCTTCTTCTAGGGTAAGGTCAGACAATCTGTGTATTATATCGTAAGGTAGGCAAGCCCGTTGCTTCGTGCCTTTATCATTGACATATTCAAAATTGGCGGGACTCAAGCTTGCGTGGAC" 32 | 33 | print transcribe_rna(large_dataset) 34 | 35 | -------------------------------------------------------------------------------- /e003-revc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Reverse Complement 4 | # ================== 5 | # 6 | # In a DNA string, the complement of A is T, and the complement of C is G. 7 | # 8 | # The reverse complement of a DNA string s is the string sc formed by reversing 9 | # the symbols of s, then taking the complement of each symbol (e.g., the reverse 10 | # complement of GTCA is TGAC). 11 | # 12 | # Given: A DNA string s of length at most 1000 bp. 13 | # 14 | # Return: The reverse complement of s. 15 | # 16 | # Sample Dataset 17 | # -------------- 18 | # AAAACCCGGT 19 | # 20 | # Sample Output 21 | # ------------- 22 | # ACCGGGTTTT 23 | 24 | 25 | def reverse_complement(s): 26 | complements = {'A':'T', 'C':'G', 'T':'A', 'G':'C'} 27 | 28 | sc = reversed(s) 29 | sc = [complements[c] for c in sc] 30 | 31 | return ''.join(sc) 32 | 33 | 34 | if __name__ == "__main__": 35 | 36 | small_dataset = "AAAACCCGGT" 37 | large_dataset = "GGTTGTCTTCACGACAACACCGCGCTGGCCAAGGTATTAAATAGGCGCATCACTGCTTCTCCCATGACATTGAAAAGGTTAACTTCCCCCATCCAACGTCAGTCTCCATTCTTTCAGTCGGGGGCAAATATGACCGGAGTTAAAGGACTCGAGCTATTGCAGAGCCACGGACGAGAGCAATTCACCAGAGCATAGCAAAAGTGCCGGCCGGCCTACGACCCGGAGACCGTGAAGTTGCGTCCCATTGCTGTTGCATTACAACCGCTCGCGAACGCATCAGTTGGTATATATATCTAAAAGCGTGGCGTGGCCTTGGTACTATTCGGAGAGATCCTAGTTATGCATGAGCAAGCTCGCCGTACGGGTCTGCCACGGTGGAGTTGAGTTTGTAGGCGATAGCCTAGCCGACGGGCTGACTGCGCCAAGAGACTTTCTCCTGATTGAGCTTATGATTGAAAGGCAACGACACAATCATATCGCCGGCGAGTAGACGTCTACTAAAGTTACAGTATTATTTAGCGGCTGAGGAGTTCACACCCTGTCCCAGGTGCGGACAATACTGCTATCTAGGGGCACATGGGTGATGGCATCGGACCGTATTGGATGGGAGGAAATTGGTTCCTTTAGACTAAAGGGTCGTTTCAGCGACAAATATTCAACGTCCCTCTTTTCGGATTTCATTTGCTCATGCAGATCGTGACTACTTACCTGCGACCCGGTAGACATGTGAAGGCTGTTTGTCTAATATCCTTGCATGAGGGTCGACGTATATTGGTCTACCAGGTTCAAGACAGCGGACTATGTATAATCTTCTGGTGTTCCATTAACTCACCCTCAGACCCGACGAAATCCAGCCAACCCTGGGAACTGATATCCCCACAAAATTTCTC" 38 | 39 | print reverse_complement(large_dataset) 40 | 41 | -------------------------------------------------------------------------------- /e004-gc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # GC Content 4 | # ========== 5 | # 6 | # DNA strings must be labeled when they are consolidated into a database. A 7 | # commonly used method of string labeling is called FASTA format. In this 8 | # format, the string is introduced by a line that begins with ">", followed by 9 | # some information naming and characterizing the string. Subsequent lines 10 | # contain the string itself; the next line starting with ">" indicates the label 11 | # of the next string. 12 | # 13 | # In Rosalind's implementation, a string in FASTA format will be labeled by the 14 | # ID "Rosalind_xxxx", where "xxxx" denotes a four-digit code between 0000 and 15 | # 9999. 16 | # 17 | # Given: At most 10 DNA strings in FASTA format (of length at most 1 kbp each). 18 | # 19 | # Return: The ID of the string having the highest GC-content, followed by the 20 | # GC-content of that string. The GC-content should have an accuracy of 4 decimal 21 | # places (see the note below on decimal accuracy). 22 | # 23 | # Sample Dataset 24 | # -------------- 25 | # >Rosalind_6404 26 | # CCTGCGGAAGATCGGCACTAGAATAGCCAGAACCGTTTCTCTGAGGCTTCCGGCCTTCCC 27 | # TCCCACTAATAATTCTGAGG 28 | # >Rosalind_5959 29 | # CCATCGGTAGCGCATCCTTAGTCCAATTAAGTCCCTATCCAGGCGCTCCGCCGAAGGTCT 30 | # ATATCCATTTGTCAGCAGACACGC 31 | # >Rosalind_0808 32 | # CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGCCCGGAC 33 | # TGGGAACCTGCGGGCAGTAGGTGGAAT 34 | # 35 | # Sample Output 36 | # ------------- 37 | # Rosalind_0808 38 | # 60.919540% 39 | 40 | 41 | def parse_fasta(s): 42 | results = {} 43 | strings = s.strip().split('>') 44 | 45 | for s in strings: 46 | if len(s) == 0: 47 | continue 48 | 49 | parts = s.split() 50 | label = parts[0] 51 | bases = ''.join(parts[1:]) 52 | 53 | results[label] = bases 54 | 55 | return results 56 | 57 | 58 | def gc_content(s): 59 | n = len(s) 60 | m = 0 61 | 62 | for c in s: 63 | if c == 'G' or c == 'C': 64 | m += 1 65 | 66 | return 100 * (float(m) / n) 67 | 68 | 69 | if __name__ == "__main__": 70 | 71 | small_dataset = """ 72 | >Rosalind_6404 73 | CCTGCGGAAGATCGGCACTAGAATAGCCAGAACCGTTTCTCTGAGGCTTCCGGCCTTCCC 74 | TCCCACTAATAATTCTGAGG 75 | >Rosalind_5959 76 | CCATCGGTAGCGCATCCTTAGTCCAATTAAGTCCCTATCCAGGCGCTCCGCCGAAGGTCT 77 | ATATCCATTTGTCAGCAGACACGC 78 | >Rosalind_0808 79 | CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGCCCGGAC 80 | TGGGAACCTGCGGGCAGTAGGTGGAAT 81 | """ 82 | 83 | large_dataset = open('datasets/rosalind_gc.txt').read() 84 | 85 | results = parse_fasta(large_dataset) 86 | results = dict([(k, gc_content(v)) for k, v in results.iteritems()]) 87 | 88 | highest_k = None 89 | highest_v = 0 90 | 91 | for k, v in results.iteritems(): 92 | if v > highest_v: 93 | highest_k = k 94 | highest_v = v 95 | 96 | print highest_k 97 | print '%f%%' % highest_v 98 | 99 | -------------------------------------------------------------------------------- /e005-hamm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Counting Point Mutations 4 | # ======================== 5 | # 6 | # Given two strings s and t of equal length, the Hamming distance between s and 7 | # t, denoted dH(s,t), is the number of corresponding symbols that differ in s 8 | # and t. See Figure 2. 9 | # 10 | # Given: Two DNA strings s and t of equal length (not exceeding 1 kbp). 11 | # 12 | # Return: The Hamming distance dH(s,t). 13 | # 14 | # Sample Dataset 15 | # -------------- 16 | # GAGCCTACTAACGGGAT 17 | # CATCGTAATGACGGCCT 18 | # 19 | # Sample Output 20 | # ------------- 21 | # 7 22 | 23 | 24 | def hamming_distance(s, t): 25 | dh = 0 26 | 27 | for i, c in enumerate(s): 28 | if c != t[i]: 29 | dh += 1 30 | 31 | return dh 32 | 33 | 34 | if __name__ == "__main__": 35 | 36 | small_dataset = """ 37 | GAGCCTACTAACGGGAT 38 | CATCGTAATGACGGCCT 39 | """ 40 | 41 | large_dataset = open('datasets/rosalind_hamm.txt').read() 42 | 43 | s, t = large_dataset.split() 44 | dist = hamming_distance(s, t) 45 | 46 | print dist 47 | 48 | -------------------------------------------------------------------------------- /e006-perm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | # Enumerating Gene Orders 5 | # ======================= 6 | # 7 | # A permutation of length n is some ordering of the positive integers {1,2,…,n}. 8 | # For example, π=(5,3,2,1,4) is a permutation of length 5. 9 | # 10 | # Given: A positive integer n≤7. 11 | # 12 | # Return: The total number of permutations of length n, followed by a list of 13 | # all such permutations (in any order). 14 | # 15 | # Sample Dataset 16 | # -------------- 17 | # 3 18 | # 19 | # Sample Output 20 | # ------------- 21 | # 6 22 | # 1 2 3 23 | # 1 3 2 24 | # 2 1 3 25 | # 2 3 1 26 | # 3 1 2 27 | # 3 2 1 28 | 29 | 30 | def fac(n): 31 | if n <= 2: 32 | return n 33 | else: 34 | return n * fac(n-1) 35 | 36 | 37 | def permutations(n): 38 | a = range(1, n+1) 39 | 40 | while True: 41 | yield a[:] 42 | 43 | k = l = None 44 | 45 | for i in range(0, len(a) - 1): 46 | if a[i] < a[i+1]: 47 | k = i 48 | 49 | if k == None: 50 | break 51 | 52 | for i in range(k + 1, len(a)): 53 | if a[k] < a[i]: 54 | l = i 55 | 56 | a[k], a[l] = a[l], a[k] 57 | a[k+1:] = reversed(a[k+1:]) 58 | 59 | 60 | if __name__ == "__main__": 61 | 62 | small_dataset = 3 63 | large_dataset = 7 64 | 65 | n = large_dataset 66 | 67 | print fac(n) 68 | for p in permutations(n): 69 | print ' '.join(map(str, p)) 70 | 71 | -------------------------------------------------------------------------------- /e007-prob.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | # Introduction to Probability 5 | # =========================== 6 | # 7 | # Just as a string is an ordered collection of symbols, an array is an abstract 8 | # structure ordering a collection of objects (numbers, strings, other arrays, 9 | # etc.). A string could therefore be viewed as a specific case of array. We let 10 | # A[k] denote the k-th value of A. 11 | # 12 | # In a random string, each symbol is selected randomly from an alphabet based on 13 | # some underlying distribution in which every symbol has a symbol frequency, or 14 | # its own fixed chance of being drawn at any time. 15 | # 16 | # GC-content gives us a natural way to form a realistic random DNA string for a 17 | # given species. If the GC-content is x, then we make the symbol frequencies of 18 | # C and G equal to x/2 and set the symbol frequencies of A and T equal to 1−x/2. 19 | # In other words, if the GC-content is 40%, then as we construct the string, we 20 | # have a 20% chance of the next added symbol being 'C', a 20% chance that it is 21 | # 'G', a 30% chance that it is 'A', and a 30% chance that it is 'T'. 22 | #  23 | # Given: An array A containing at most 20 real numbers between 0 and 1, 24 | # inclusively. 25 | #  26 | # Return: An array B in which B[i] represents the probability (to an accuracy of 27 | # three decimal places) that for the GC-content in A[i], two randomly chosen 28 | # symbols will be the same. 29 | # 30 | # Sample Dataset 31 | # -------------- 32 | # 0.23 0.31 0.75 33 | # 34 | # Sample Output 35 | # ------------- 36 | # 0.322900 0.286100 0.312500 37 | 38 | 39 | def probabilities(s): 40 | results = [] 41 | 42 | gc_contents = map(float, s.split()) 43 | for x in gc_contents: 44 | a = x / 2.0 45 | b = (1 - x) / 2.0 46 | p = (a * a) + (a * a) + (b * b) + (b * b) 47 | results.append(p) 48 | 49 | return results 50 | 51 | 52 | if __name__ == "__main__": 53 | 54 | small_dataset = "0.23 0.31 0.75" 55 | large_dataset = open('datasets/rosalind_prob.txt').read() 56 | 57 | results = probabilities(large_dataset) 58 | 59 | print ' '.join(map(str, results)) 60 | 61 | -------------------------------------------------------------------------------- /e008-prot.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | # Protein Translation 5 | # =================== 6 | # 7 | # The 20 commonly occurring amino acids are abbreviated by using 20 letters from 8 | # the English alphabet (all letters except for B, J, O, U, X, and Z). Protein 9 | # strings are constructed from these 20 symbols. Henceforth, the term genetic 10 | # string will incorporate protein strings along with DNA strings and RNA 11 | # strings. 12 | # 13 | # The RNA codon table dictates the details regarding the encoding of specific 14 | # codons into the amino acid alphabet. 15 | # 16 | # Given: An RNA string s corresponding to a strand of mRNA (of length at most 10 17 | # kbp). 18 | # 19 | # Return: The protein string encoded by s. 20 | # 21 | # Sample Dataset 22 | # -------------- 23 | # AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA 24 | # 25 | # Sample Output 26 | # ------------- 27 | # MAMAPRTEINSTRING 28 | 29 | 30 | RNA_CODON_TABLE = { 31 | 'UUU': 'F', 'CUU': 'L', 'AUU': 'I', 'GUU': 'V', 32 | 'UUC': 'F', 'CUC': 'L', 'AUC': 'I', 'GUC': 'V', 33 | 'UUA': 'L', 'CUA': 'L', 'AUA': 'I', 'GUA': 'V', 34 | 'UUG': 'L', 'CUG': 'L', 'AUG': 'M', 'GUG': 'V', 35 | 'UCU': 'S', 'CCU': 'P', 'ACU': 'T', 'GCU': 'A', 36 | 'UCC': 'S', 'CCC': 'P', 'ACC': 'T', 'GCC': 'A', 37 | 'UCA': 'S', 'CCA': 'P', 'ACA': 'T', 'GCA': 'A', 38 | 'UCG': 'S', 'CCG': 'P', 'ACG': 'T', 'GCG': 'A', 39 | 'UAU': 'Y', 'CAU': 'H', 'AAU': 'N', 'GAU': 'D', 40 | 'UAC': 'Y', 'CAC': 'H', 'AAC': 'N', 'GAC': 'D', 41 | 'UAA': 'Stop', 'CAA': 'Q', 'AAA': 'K', 'GAA': 'E', 42 | 'UAG': 'Stop', 'CAG': 'Q', 'AAG': 'K', 'GAG': 'E', 43 | 'UGU': 'C', 'CGU': 'R', 'AGU': 'S', 'GGU': 'G', 44 | 'UGC': 'C', 'CGC': 'R', 'AGC': 'S', 'GGC': 'G', 45 | 'UGA': 'Stop', 'CGA': 'R', 'AGA': 'R', 'GGA': 'G', 46 | 'UGG': 'W', 'CGG': 'R', 'AGG': 'R', 'GGG': 'G' 47 | } 48 | 49 | 50 | def protein_string(mrna): 51 | result = '' 52 | 53 | for i in range(0, len(mrna), 3): 54 | symbol = RNA_CODON_TABLE[mrna[i:i+3]] 55 | if symbol == 'Stop': 56 | break 57 | result += symbol 58 | 59 | return result 60 | 61 | 62 | if __name__ == "__main__": 63 | 64 | small_dataset = "AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA" 65 | large_dataset = open('datasets/rosalind_prot.txt').read() 66 | 67 | print protein_string(large_dataset) 68 | 69 | -------------------------------------------------------------------------------- /e009-subs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | # Finding a Motif in DNA 5 | # ====================== 6 | # 7 | # Given two strings s = s1 s2 ... sn and t = t1 t2 ... tm where m ≤ n, t is a 8 | # substring of s if t is contained as a contiguous collection of symbols in s. 9 | # 10 | # The position of a symbol in a string is the total number of symbols found to 11 | # its left, including itself (e.g., the positions of all occurrences of U in 12 | # AUGCUUCAGAAAGGUCUUACG are 2, 5, 6, 15, 17, and 18). The symbol at position i 13 | # of s is denoted by s[i]. For that matter, a substring of s can be represented 14 | # as s[j:k], where j and k represent the starting and ending positions of the 15 | # substring in s. 16 | # 17 | # The location of a substring is its beginning position; note that t will have 18 | # multiple locations in s if it occurs more than once as a substring of s (see 19 | # the Sample sections below). 20 | # 21 | # Given: Two DNA strings s and t (each of length at most 1 kbp). 22 | # 23 | # Return: All locations of t as a substring of s. 24 | # 25 | # Sample Dataset 26 | # -------------- 27 | # ACGTACGTACGTACGT 28 | # GTA 29 | # 30 | # Sample Output 31 | # ------------- 32 | # 3 7 11 33 | 34 | 35 | def locations(s_and_t): 36 | results = [] 37 | 38 | s, t = s_and_t.split() 39 | l = len(t) 40 | 41 | for i in range(len(s) - l): 42 | if s[i:i+l] == t: 43 | results.append(i + 1) 44 | 45 | return results 46 | 47 | 48 | if __name__ == "__main__": 49 | 50 | small_dataset = "ACGTACGTACGTACGT\nGTA" 51 | large_dataset = open('datasets/rosalind_subs.txt').read() 52 | 53 | results = locations(large_dataset) 54 | 55 | print ' '.join(map(str, results)) 56 | 57 | -------------------------------------------------------------------------------- /e010-cons.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | # Consensus and Profile 5 | # ===================== 6 | # 7 | # A matrix is a rectangular table of values divided into rows and columns. An 8 | # m×n matrix has m rows and n columns. Given a matrix A, we write Ai,j to 9 | # indicate the value found at the intersection of row i and column j. You may 10 | # choose to think of A as a collection of m arrays, each of length n. 11 | # 12 | # Say that we have a collection of DNA strings, all having the same length n. 13 | # Their profile matrix is a 4×n matrix P in which P1,j represents the number of 14 | # times that 'A' occurs in the jth position of one of the strings, P2,j 15 | # represents the number of times that C occurs in the jth position, and so on 16 | # (see below). 17 | # 18 | # A consensus string c is a string of length n formed from our collection by 19 | # taking the most common symbol at each position; the jth symbol of c therefore 20 | # corresponds to the symbol having the maximum value in the j-th column of the 21 | # profile matrix. Of course, there may be more than one most common symbol, 22 | # leading to multiple possible consensus strings. 23 | # 24 | # Given: A collection of at most 10 DNA strings of equal length (at most 1 kbp). 25 | # 26 | # Return: A consensus string and profile matrix for the collection. (If several 27 | # possible consensus strings exist, then you may return any one of them.) 28 | # 29 | # Sample Dataset 30 | # -------------- 31 | # ATCCAGCT 32 | # GGGCAACT 33 | # ATGGATCT 34 | # AAGCAACC 35 | # TTGGAACT 36 | # ATGCCATT 37 | # ATGGCACT 38 | # 39 | # Sample Output 40 | # ------------- 41 | # ATGCAACT 42 | # A: 5 1 0 0 5 5 0 0 43 | # C: 0 0 1 4 2 0 6 1 44 | # G: 1 1 6 3 0 1 0 0 45 | # T: 1 5 0 0 0 1 1 6 46 | 47 | 48 | def profile(matrix): 49 | strings = matrix.split() 50 | 51 | default = [0] * len(strings[0]) 52 | results = { 53 | 'A': default[:], 54 | 'C': default[:], 55 | 'G': default[:], 56 | 'T': default[:], 57 | } 58 | 59 | for s in strings: 60 | for i, c in enumerate(s): 61 | results[c][i] += 1 62 | 63 | return results 64 | 65 | 66 | def consensus(profile): 67 | result = [] 68 | 69 | keys = profile.keys() 70 | 71 | for i in range(len(profile[keys[0]])): 72 | max_v = 0 73 | max_k = None 74 | for k in keys: 75 | v = profile[k][i] 76 | if v > max_v: 77 | max_v = v 78 | max_k = k 79 | result.append(max_k) 80 | 81 | return ''.join(result) 82 | 83 | 84 | if __name__ == "__main__": 85 | 86 | small_dataset = """ 87 | ATCCAGCT 88 | GGGCAACT 89 | ATGGATCT 90 | AAGCAACC 91 | TTGGAACT 92 | ATGCCATT 93 | ATGGCACT 94 | """ 95 | 96 | large_dataset = open('datasets/rosalind_cons.txt').read() 97 | 98 | p = profile(large_dataset) 99 | c = consensus(p) 100 | 101 | print c 102 | for k in sorted(p.iterkeys()): 103 | print "%s: %s" % (k, ' '.join(map(str, p[k]))) 104 | 105 | -------------------------------------------------------------------------------- /e011-eval.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | # Introduction to Expected Value 5 | # ============================== 6 | # 7 | # Given a finite collection of events x1,x2,…,xn, where xi has probability pi, 8 | # the expected value for the number of events occurring is given by p1+p2+⋯+pn. 9 | # 10 | # As a simple example, if you were to place 4 bets on sporting events this 11 | # weekend, and the probability of each of your bets winning is 0.4, 0.7, 0.8, 12 | # and 0.5, then the expected value for the number of bets you will win is 0.4 + 13 | # 0.7 + 0.8 + 0.5 = 2.4. Notice that this value is a decimal even though you can 14 | # only win a whole number of your bets, and also that the expected value only 15 | # quantifies an average case scenario: it says nothing about the actual outcome 16 | # of your gambling habit. 17 | # 18 | # Given: A positive integer m (m≤10), a positive integer n (n≤10,000), and an 19 | # array A of length at most 20 containing real numbers between 0 and 1, 20 | # inclusively. 21 | # 22 | # Return: An array B in which B[i] represents the expected number of substring 23 | # matches of a random string of length m inside a random string of length n, 24 | # where both are formed from GC-content A[i] (see “Introduction to 25 | # Probability”). Each value in B should be accurate to three decimal places. 26 | # 27 | # Sample Dataset 28 | # -------------- 29 | # 2 10 30 | # 0.32 0.42 0.81 31 | # 32 | # Sample Output 33 | # ------------- 34 | # 0.717748 0.591669 1.078067 35 | 36 | 37 | from math import pow 38 | 39 | 40 | def symbol_match_probability(gc): 41 | a = gc / 2.0 42 | b = (1 - gc) / 2.0 43 | p = (a * a) + (a * a) + (b * b) + (b * b) 44 | return p 45 | 46 | 47 | def string_match_probability(gc, l): 48 | return pow(symbol_match_probability(gc), l) 49 | 50 | 51 | def expected_matches(m, n, gc): 52 | positions = 1 + (n - m) 53 | probability = string_match_probability(gc, m) 54 | return positions * probability 55 | 56 | 57 | def calculate_expected(input): 58 | bits = input.split() 59 | 60 | m = int(bits[0]) 61 | n = int(bits[1]) 62 | gc_contents = map(float, bits[2:]) 63 | 64 | return [expected_matches(m, n, gc) for gc in gc_contents] 65 | 66 | 67 | if __name__ == "__main__": 68 | small_dataset = "2 10\n0.32 0.42 0.81" 69 | large_dataset = open('datasets/rosalind_eval.txt').read() 70 | 71 | results = calculate_expected(large_dataset) 72 | 73 | print ' '.join(map(str, results)) 74 | 75 | -------------------------------------------------------------------------------- /e012-grph.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | # Overlap Graphs 5 | # ============== 6 | # 7 | # A graph whose nodes have all been labeled can be represented by an adjacency 8 | # list, in which each row of the list contains the two node labels corresponding 9 | # to a unique edge. 10 | # 11 | # A directed graph (or digraph) is a graph containing directed edges, each of 12 | # which has an orientation. That is, a directed edge is represented by an arrow 13 | # instead of simply a segment; the starting and ending nodes of an edge form its 14 | # tail and head, respectively. The directed edge with tail v and head w is 15 | # represented by (v,w) (but not by (w,v)). A directed loop is a directed edge of 16 | # the form (v,v). 17 | # 18 | # For a collection of strings and a positive integer k, the overlap graph for 19 | # the strings is a directed graph Ok in which each string is represented by a 20 | # node, and string s is connected to string t with a directed edge if and only 21 | # if there is a length k suffix of s that matches a length k prefix of t. 22 | # Directed loops are not allowed in the overlap graph. 23 | # 24 | # Given: A collection of DNA strings in FASTA format having total length at most 25 | # 10 kbp. 26 | # 27 | # Return: The adjacency list corresponding to O3. 28 | # 29 | # Sample Dataset 30 | # -------------- 31 | # >Rosalind_0498 32 | # AAATAAA 33 | # >Rosalind_2391 34 | # AAATTTT 35 | # >Rosalind_2323 36 | # TTTTCCC 37 | # >Rosalind_0442 38 | # AAATCCC 39 | # >Rosalind_5013 40 | # GGGTGGG 41 | # 42 | # Sample Output 43 | # ------------- 44 | # Rosalind_0498 Rosalind_2391 45 | # Rosalind_0498 Rosalind_0442 46 | # Rosalind_2391 Rosalind_2323 47 | 48 | 49 | def parse_fasta(fasta): 50 | results = [] 51 | strings = fasta.strip().split('>') 52 | 53 | for s in strings: 54 | if len(s): 55 | parts = s.split() 56 | k = parts[0] 57 | v = ''.join(parts[1:]) 58 | results.append((k, v)) 59 | 60 | return results 61 | 62 | 63 | def overlap_graph(fasta, n): 64 | results = [] 65 | 66 | dna = parse_fasta(fasta) 67 | 68 | for k1, v1 in dna: 69 | for k2, v2 in dna: 70 | if k1 != k2 and v1.endswith(v2[:n]): 71 | results.append((k1, k2)) 72 | 73 | return results 74 | 75 | 76 | if __name__ == "__main__": 77 | 78 | small_dataset = """ 79 | >Rosalind_0498 80 | AAATAAA 81 | >Rosalind_2391 82 | AAATTTT 83 | >Rosalind_2323 84 | TTTTCCC 85 | >Rosalind_0442 86 | AAATCCC 87 | >Rosalind_5013 88 | GGGTGGG 89 | """ 90 | 91 | large_dataset = open('datasets/rosalind_grph.txt').read() 92 | 93 | for edge in overlap_graph(large_dataset, 3): 94 | print edge[0], edge[1] 95 | 96 | -------------------------------------------------------------------------------- /e013-kmp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | # Speeding Up Motif Finding 5 | # ========================= 6 | # 7 | # A prefix of a string s having length n is a substring s[1:j]; a suffix of s is 8 | # a substring s[k:n]. 9 | # 10 | # The failure array of s is an array P of length n for which P[k] is the length 11 | # of the longest substring s[j:k] that is equal to some prefix s[1:k−j+1], where 12 | # j cannot equal 1 (otherwise, P[k] would always equal k). By convention, 13 | # P[1]=0. 14 | # 15 | # Given: A DNA string s (of length at most 100 kbp). 16 | # 17 | # Return: The failure array of s. 18 | # 19 | # Sample Dataset 20 | # -------------- 21 | # CAGTAAGCAGGGACTG 22 | # 23 | # Sample Output 24 | # ------------- 25 | # 0 0 0 0 0 0 0 1 2 3 0 0 0 1 0 0 26 | 27 | 28 | def kmp_preprocess(s): 29 | j = -1 30 | b = [j] 31 | 32 | for i, c in enumerate(s): 33 | while j >= 0 and s[j] != c: 34 | j = b[j] 35 | j += 1 36 | b.append(j) 37 | 38 | return b[1:] 39 | 40 | 41 | if __name__ == "__main__": 42 | 43 | small_dataset = "CAGTAAGCAGGGACTG" 44 | large_dataset = open('datasets/rosalind_kmp.txt').read() 45 | 46 | results = kmp_preprocess(large_dataset) 47 | 48 | print ' '.join(map(str, results)) 49 | 50 | -------------------------------------------------------------------------------- /e014-lcs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | # Finding a Shared Motif 5 | # ====================== 6 | # 7 | # A common substring of a collection of strings is a substring of every member 8 | # of the collection. We say that a common substring is a longest common 9 | # substring if a longer common substring of the collection does not exist. For 10 | # example, CG is a common substring of ACGTACGT and AACCGGTATA, whereas GTA is a 11 | # longest common substring. Note that multiple longest common substrings may 12 | # exist. 13 | # 14 | # Given: A collection of k DNA strings (of length at most 1 kbp each; k≤100). 15 | # 16 | # Return: A longest common substring of the collection. (If multiple solutions 17 | # exist, you may return any single solution.) 18 | # 19 | # Sample Dataset 20 | # -------------- 21 | # GATTACA 22 | # TAGACCA 23 | # ATACA 24 | # 25 | # Sample Output 26 | # ------------- 27 | # AC 28 | 29 | 30 | def lcs(strings): 31 | strings = sorted(strings.split()) 32 | short_string = strings[0] 33 | other_strings = strings[1:] 34 | 35 | l = len(short_string) 36 | m = '' 37 | for i in range(0, l): 38 | for j in range(l, i + len(m), -1): 39 | s1 = short_string[i:j] 40 | 41 | matched_all = True 42 | for s2 in other_strings: 43 | if s1 not in s2: 44 | matched_all = False 45 | break 46 | 47 | if matched_all: 48 | m = s1 49 | break 50 | 51 | return m 52 | 53 | 54 | if __name__ == "__main__": 55 | 56 | small_dataset = "GATTACA\nTAGACCA\nATACA" 57 | large_dataset = open('datasets/rosalind_lcs.txt').read() 58 | 59 | print lcs(large_dataset) 60 | 61 | -------------------------------------------------------------------------------- /e015-lexf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | # Enumerating k-mers Lexicographically 5 | # ==================================== 6 | # 7 | # Assume that an alphabet 𝒜 has a predetermined order; that is, we write the 8 | # alphabet as a permutation 𝒜=(a1,a2,…,ak), where a1 1: 77 | print ' '.join([str(0 if x == 10 else x) for x in a]), ':', j 78 | print ' '.join(([' '] * i) + (['-'] * (j - i))) 79 | a[i:j] = reversed(a[i:j]) 80 | r += 1 81 | 82 | print r 83 | 84 | 85 | 86 | def result(s): 87 | pairs = s.strip().split("\n\n") 88 | pairs = [handle_pair(p) for p in pairs] 89 | 90 | for a, b in pairs: 91 | reversal_distance(a, b) 92 | 93 | 94 | if __name__ == "__main__": 95 | 96 | small_dataset = """ 97 | 1 2 3 4 5 6 7 8 9 10 98 | 3 1 5 2 7 4 9 6 10 8 99 | 100 | 3 10 8 2 5 4 7 1 6 9 101 | 5 2 3 1 7 4 10 8 6 9 102 | 103 | 8 6 7 9 4 1 3 10 2 5 104 | 8 2 7 6 9 1 5 3 10 4 105 | 106 | 3 9 10 4 1 8 6 7 5 2 107 | 2 9 8 5 1 7 3 4 6 10 108 | 109 | 1 2 3 4 5 6 7 8 9 10 110 | 1 2 3 4 5 6 7 8 9 10 111 | """ 112 | # large_dataset = open('datasets/rosalind_rear.txt').read().strip() 113 | 114 | result(small_dataset) 115 | 116 | -------------------------------------------------------------------------------- /e020-revp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | # Locating Restriction Sites 5 | # ========================== 6 | # 7 | # A DNA string is a reverse palindrome if it is equal to its reverse complement. 8 | # For instance, GCATGC is a reverse palindrome because its reverse complement is 9 | # GCATGC. 10 | # 11 | # Given: A DNA string of length at most 1 kbp. 12 | # 13 | # Return: The position and length of every reverse palindrome in the string 14 | # having length between 4 and 8. 15 | # 16 | # Sample Dataset 17 | # -------------- 18 | # TCAATGCATGCGGGTCTATATGCAT 19 | # 20 | # Sample Output 21 | # ------------- 22 | # 4 6 23 | # 5 4 24 | # 6 6 25 | # 7 4 26 | # 17 4 27 | # 18 4 28 | # 20 6 29 | # 21 4 30 | 31 | 32 | def reverse_complement(s): 33 | complements = {'A':'T', 'T':'A', 'G':'C', 'C':'G'} 34 | return ''.join([complements[c] for c in reversed(s)]) 35 | 36 | 37 | def reverse_palindromes(s): 38 | results = [] 39 | 40 | l = len(s) 41 | 42 | for i in range(l): 43 | for j in range(4, 9): 44 | 45 | if i + j > l: 46 | continue 47 | 48 | s1 = s[i:i+j] 49 | s2 = reverse_complement(s1) 50 | 51 | if s1 == s2: 52 | results.append((i + 1, j)) 53 | 54 | return results 55 | 56 | 57 | if __name__ == "__main__": 58 | 59 | small_dataset = "TCAATGCATGCGGGTCTATATGCAT" 60 | large_dataset = open('datasets/rosalind_revp.txt').read().strip() 61 | 62 | results = reverse_palindromes(large_dataset) 63 | 64 | print "\n".join([' '.join(map(str, r)) for r in results]) 65 | 66 | -------------------------------------------------------------------------------- /e021-sign.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | # Enumerating Oriented Gene Orderings 5 | # =================================== 6 | # 7 | # A signed permutation of length n is some ordering of the positive integers 8 | # {1,2,…,n} in which each integer is then provided with either a positive or 9 | # negative sign (for the sake of simplicity, we omit the positive sign). For 10 | # example, π=(5,−3,−2,1,4) is a signed permutation of length 5. 11 | # 12 | # Given: A positive integer n≤6. 13 | # 14 | # Return: The total number of signed permutations of length n, followed by a list 15 | # of all such permutations (you may list the signed permutations in any order). 16 | # 17 | # Sample Dataset 18 | # -------------- 19 | # 2 20 | # 21 | # Sample Output 22 | # ------------- 23 | # 8 24 | # -1 -2 25 | # -1 2 26 | # 1 -2 27 | # 1 2 28 | # -2 -1 29 | # -2 1 30 | # 2 -1 31 | # 2 1 32 | 33 | 34 | from itertools import permutations, product 35 | 36 | 37 | def merge_product(product): 38 | result = [] 39 | numbers, signs = product 40 | for i, number in enumerate(numbers): 41 | sign = signs[i] 42 | number = int(sign + str(number)) 43 | result.append(number) 44 | return result 45 | 46 | 47 | def result(n): 48 | numbers = list(permutations(range(1, n + 1))) 49 | signs = list(product('-+', repeat=n)) 50 | 51 | results = list(product(numbers, signs)) 52 | results = map(merge_product, results) 53 | 54 | return results 55 | 56 | 57 | if __name__ == "__main__": 58 | 59 | small_dataset = 2 60 | large_dataset = int(open('datasets/rosalind_sign.txt').read().strip()) 61 | 62 | results = result(large_dataset) 63 | 64 | print len(results) 65 | for r in results: 66 | print ' '.join(map(str, r)) 67 | 68 | -------------------------------------------------------------------------------- /e022-splc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | # RNA Splicing 5 | # ============ 6 | # 7 | # After identifying the exons and introns of an RNA string, we only need to 8 | # delete the introns and concatenate the exons to form a new string ready for 9 | # translation. 10 | # 11 | # Given: A DNA string s (of length at most 1 kbp) and a collection of substrings 12 | # of s acting as introns. 13 | # 14 | # Return: A protein string resulting from transcribing and translating the exons 15 | # of s. (Note: Only one solution will exist for the dataset provided.) 16 | # 17 | # Sample Dataset 18 | # -------------- 19 | # ATGGTCTACATAGCTGACAAACAGCACGTAGCAATCGGTCGAATCTCGAGAGGCATATGGTCACATGATCGGTCGAGCGTGTTTCAAAGTTTGCGCCTAG 20 | # ATCGGTCGAA 21 | # ATCGGTCGAGCGTGT 22 | # 23 | # Sample Output 24 | # ------------- 25 | # MVYIADKQHVASREAYGHMFKVCA 26 | 27 | 28 | DNA_CODON_TABLE = { 29 | 'TTT': 'F', 'CTT': 'L', 'ATT': 'I', 'GTT': 'V', 30 | 'TTC': 'F', 'CTC': 'L', 'ATC': 'I', 'GTC': 'V', 31 | 'TTA': 'L', 'CTA': 'L', 'ATA': 'I', 'GTA': 'V', 32 | 'TTG': 'L', 'CTG': 'L', 'ATG': 'M', 'GTG': 'V', 33 | 'TCT': 'S', 'CCT': 'P', 'ACT': 'T', 'GCT': 'A', 34 | 'TCC': 'S', 'CCC': 'P', 'ACC': 'T', 'GCC': 'A', 35 | 'TCA': 'S', 'CCA': 'P', 'ACA': 'T', 'GCA': 'A', 36 | 'TCG': 'S', 'CCG': 'P', 'ACG': 'T', 'GCG': 'A', 37 | 'TAT': 'Y', 'CAT': 'H', 'AAT': 'N', 'GAT': 'D', 38 | 'TAC': 'Y', 'CAC': 'H', 'AAC': 'N', 'GAC': 'D', 39 | 'TAA': '-', 'CAA': 'Q', 'AAA': 'K', 'GAA': 'E', 40 | 'TAG': '-', 'CAG': 'Q', 'AAG': 'K', 'GAG': 'E', 41 | 'TGT': 'C', 'CGT': 'R', 'AGT': 'S', 'GGT': 'G', 42 | 'TGC': 'C', 'CGC': 'R', 'AGC': 'S', 'GGC': 'G', 43 | 'TGA': '-', 'CGA': 'R', 'AGA': 'R', 'GGA': 'G', 44 | 'TGG': 'W', 'CGG': 'R', 'AGG': 'R', 'GGG': 'G' 45 | } 46 | 47 | 48 | def result(s): 49 | result = '' 50 | 51 | lines = s.split() 52 | dna = lines[0] 53 | introns = lines[1:] 54 | 55 | for intron in introns: 56 | dna = dna.replace(intron, '') 57 | 58 | for i in range(0, len(dna), 3): 59 | codon = dna[i:i+3] 60 | 61 | protein = None 62 | if DNA_CODON_TABLE.has_key(codon): 63 | protein = DNA_CODON_TABLE[codon] 64 | 65 | if protein == '-': 66 | break 67 | 68 | if protein: 69 | result += protein 70 | 71 | return ''.join(list(result)) 72 | 73 | 74 | if __name__ == "__main__": 75 | 76 | small_dataset = """ 77 | ATGGTCTACATAGCTGACAAACAGCACGTAGCAATCGGTCGAATCTCGAGAGGCATATGGTCACATGATCGGTCGAGCGTGTTTCAAAGTTTGCGCCTAG 78 | ATCGGTCGAA 79 | ATCGGTCGAGCGTGT 80 | """ 81 | large_dataset = open('datasets/rosalind_splc.txt').read().strip() 82 | 83 | print result(large_dataset) 84 | 85 | 86 | -------------------------------------------------------------------------------- /e023-kmer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | # k-Mer Composition 5 | # ================= 6 | # 7 | # For a fixed positive integer k, order all possible k-mers taken from an 8 | # underlying alphabet lexicographically. 9 | # 10 | # Then the k-mer composition of a string s can be represented by an array A for 11 | # which A[m] denotes the number of times that the mth k-mer (with respect to the 12 | # lexicographic order) appears in s. 13 | # 14 | # Given: A DNA string s in FASTA format (having length at most 100 kbp). 15 | # 16 | # Return: The 4-mer composition of s. 17 | # 18 | # Sample Dataset 19 | # -------------- 20 | # >Rosalind_6431 21 | # CTTCGAAAGTTTGGGCCGAGTCTTACAGTCGGTCTTGAAGCAAAGTAACGAACTCCACGG 22 | # CCCTGACTACCGAACCAGTTGTGAGTACTCAACTGGGTGAGAGTGCAGTCCCTATTGAGT 23 | # TTCCGAGACTCACCGGGATTTTCGATCCAGCCTCAGTCCAGTCTTGTGGCCAACTCACCA 24 | # AATGACGTTGGAATATCCCTGTCTAGCTCACGCAGTACTTAGTAAGAGGTCGCTGCAGCG 25 | # GGGCAAGGAGATCGGAAAATGTGCTCTATATGCGACTAAAGCTCCTAACTTACACGTAGA 26 | # CTTGCCCGTGTTAAAAACTCGGCTCACATGCTGTCTGCGGCTGGCTGTATACAGTATCTA 27 | # CCTAATACCCTTCAGTTCGCCGCACAAAAGCTGGGAGTTACCGCGGAAATCACAG 28 | # 29 | # Sample Output 30 | # ------------- 31 | # 4 1 4 3 0 1 1 5 1 3 1 2 2 1 2 0 1 1 3 1 2 1 3 1 1 1 1 2 2 5 1 3 0 2 2 1 1 1 1 3 1 0 0 1 5 5 1 5 0 2 0 2 1 2 1 1 1 2 0 1 0 0 1 1 3 2 1 0 3 2 3 0 0 2 0 8 0 0 1 0 2 1 3 0 0 0 1 4 3 2 1 1 3 1 2 1 3 1 2 1 2 1 1 1 2 3 2 1 1 0 1 1 3 2 1 2 6 2 1 1 1 2 3 3 3 2 3 0 3 2 1 1 0 0 1 4 3 0 1 5 0 2 0 1 2 1 3 0 1 2 2 1 1 0 3 0 0 4 5 0 3 0 2 1 1 3 0 3 2 2 1 1 0 2 1 0 2 2 1 2 0 2 2 5 2 2 1 1 2 1 2 2 2 2 1 1 3 4 0 2 1 1 0 1 2 2 1 1 1 5 2 0 3 2 1 1 2 2 3 0 3 0 1 3 1 2 3 0 2 1 2 2 1 2 3 0 1 2 3 1 1 3 1 0 1 1 3 0 2 1 2 2 0 2 1 1 32 | 33 | 34 | from itertools import product 35 | 36 | 37 | def parse_fasta(fasta): 38 | results = [] 39 | strings = fasta.strip().split('>') 40 | 41 | for s in strings: 42 | if len(s): 43 | parts = s.split() 44 | k = parts[0] 45 | v = ''.join(parts[1:]) 46 | results.append((k, v)) 47 | 48 | return results 49 | 50 | 51 | def possible_kmers(k): 52 | return [''.join(x) for x in product('ATGC', repeat=k)] 53 | 54 | 55 | def kmer_composition(s, k): 56 | kmers = {} 57 | 58 | for kmer in possible_kmers(k): 59 | kmers[kmer] = 0 60 | 61 | for i in range(len(s) - (k - 1)): 62 | kmer = s[i:i+k] 63 | kmers[kmer] += 1 64 | 65 | return kmers 66 | 67 | 68 | def result(s): 69 | fastas = parse_fasta(s) 70 | k_comp = kmer_composition(fastas[0][1], 4) 71 | 72 | result = [] 73 | for kmer in sorted(k_comp.iterkeys()): 74 | result.append(k_comp[kmer]) 75 | 76 | return result 77 | 78 | 79 | if __name__ == "__main__": 80 | 81 | small_dataset = """ 82 | >Rosalind_6431 83 | CTTCGAAAGTTTGGGCCGAGTCTTACAGTCGGTCTTGAAGCAAAGTAACGAACTCCACGG 84 | CCCTGACTACCGAACCAGTTGTGAGTACTCAACTGGGTGAGAGTGCAGTCCCTATTGAGT 85 | TTCCGAGACTCACCGGGATTTTCGATCCAGCCTCAGTCCAGTCTTGTGGCCAACTCACCA 86 | AATGACGTTGGAATATCCCTGTCTAGCTCACGCAGTACTTAGTAAGAGGTCGCTGCAGCG 87 | GGGCAAGGAGATCGGAAAATGTGCTCTATATGCGACTAAAGCTCCTAACTTACACGTAGA 88 | CTTGCCCGTGTTAAAAACTCGGCTCACATGCTGTCTGCGGCTGGCTGTATACAGTATCTA 89 | CCTAATACCCTTCAGTTCGCCGCACAAAAGCTGGGAGTTACCGCGGAAATCACAG 90 | """ 91 | large_dataset = open('datasets/rosalind_kmer.txt').read().strip() 92 | 93 | print ' '.join(map(str, result(large_dataset))) 94 | 95 | 96 | -------------------------------------------------------------------------------- /e024-lexv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | # Ordering Strings of Varying Length Lexicographically 5 | # ==================================================== 6 | # 7 | # Say that we have strings s=s1s2⋯sm and t=t1t2⋯tn with mLext if s>Lext′ 12 | # (e.g., APPLET 0: 69 | for c in alphabet: 70 | res.append(acc + c) 71 | alpha_combs(alphabet, n - 1, acc + c, res) 72 | return res 73 | 74 | 75 | def result(s): 76 | bits = s.split() 77 | alphabet = bits[:-1] 78 | length = int(bits[-1]) 79 | return alpha_combs(alphabet, length) 80 | 81 | 82 | if __name__ == "__main__": 83 | 84 | small_dataset = "D N A\n3" 85 | large_dataset = open('datasets/rosalind_lexv.txt').read().strip() 86 | 87 | print "\n".join(result(large_dataset)) 88 | 89 | 90 | -------------------------------------------------------------------------------- /e025-long.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | # Genome Assembly as Shortest Superstring 5 | # ======================================= 6 | # 7 | # Given a collection of strings, a larger string containing every one of the 8 | # smaller strings as a substring is called a superstring. 9 | # 10 | # By the assumption of parsimony, a shortest possible superstring over a 11 | # collection of reads serves as a candidate chromosome. 12 | # 13 | # Given: At most 50 DNA strings of equal length not exceeding 1 kbp (which 14 | # represent reads deriving from the same strand of a single linear chromosome). 15 | # 16 | # The dataset is guaranteed to satisfy the following condition: there exists a 17 | # unique way to reconstruct the entire chromosome from these reads by gluing 18 | # together pairs of reads that overlap by more than half their length. 19 | # 20 | # Return: A shortest superstring containing all the given strings (thus 21 | # corresponding to a reconstructed chromosome). 22 | # 23 | # Sample Dataset 24 | # -------------- 25 | # ATTAGACCTG 26 | # CCTGCCGGAA 27 | # AGACCTGCCG 28 | # GCCGGAATAC 29 | # 30 | # Sample Output 31 | # ------------- 32 | # ATTAGACCTGCCGGAATAC 33 | 34 | 35 | def find_overlaps(arr, acc=''): 36 | if len(arr) == 0: 37 | return acc 38 | 39 | elif len(acc) == 0: 40 | acc = arr.pop(0) 41 | return find_overlaps(arr, acc) 42 | 43 | else: 44 | 45 | for i in range(len(arr)): 46 | a = arr[i] 47 | l = len(a) 48 | 49 | for p in range(l / 2): 50 | q = l - p 51 | 52 | if acc.startswith(a[p:]): 53 | arr.pop(i) 54 | return find_overlaps(arr, a[:p] + acc) 55 | 56 | if acc.endswith(a[:q]): 57 | arr.pop(i) 58 | return find_overlaps(arr, acc + a[q:]) 59 | 60 | 61 | if __name__ == "__main__": 62 | 63 | small_dataset = """ 64 | ATTAGACCTG 65 | CCTGCCGGAA 66 | AGACCTGCCG 67 | GCCGGAATAC 68 | """ 69 | large_dataset = open('datasets/rosalind_long.txt').read().strip() 70 | 71 | print find_overlaps(large_dataset.split()) 72 | 73 | 74 | -------------------------------------------------------------------------------- /e027-spec.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | # Inferring Protein from Spectrum 5 | # =============================== 6 | # 7 | # The prefix spectrum of a weighted string is the collection of all its prefix 8 | # weights. 9 | # 10 | # Given: A list L of n (n≤100) positive real numbers. 11 | # 12 | # Return: A protein string of length n−1 whose prefix spectrum is equal to L (if 13 | # multiple solutions exist, you may output any one of them). Consult the 14 | # monoisotopic mass table. 15 | # 16 | # Sample Dataset 17 | # -------------- 18 | # 3524.8542 19 | # 3710.9335 20 | # 3841.974 21 | # 3970.0326 22 | # 4057.0646 23 | # 24 | # Sample Output 25 | # ------------- 26 | # WMQS 27 | 28 | 29 | MONOISOTOPIC_MASS_TABLE = { 30 | 'A': 71.03711, 31 | 'C': 103.00919, 32 | 'D': 115.02694, 33 | 'E': 129.04259, 34 | 'F': 147.06841, 35 | 'G': 57.02146, 36 | 'H': 137.05891, 37 | 'I': 113.08406, 38 | 'K': 128.09496, 39 | 'L': 113.08406, 40 | 'M': 131.04049, 41 | 'N': 114.04293, 42 | 'P': 97.05276, 43 | 'Q': 128.05858, 44 | 'R': 156.10111, 45 | 'S': 87.03203, 46 | 'T': 101.04768, 47 | 'V': 99.06841, 48 | 'W': 186.07931, 49 | 'Y': 163.06333, 50 | } 51 | 52 | 53 | def protein_string(s): 54 | result = '' 55 | prefix_spectrum = map(float, s.split()) 56 | 57 | inverted_table = {} 58 | for k, v in MONOISOTOPIC_MASS_TABLE.iteritems(): 59 | inverted_table[round(v, 4)] = k 60 | 61 | for i in range(1, len(prefix_spectrum)): 62 | a = prefix_spectrum[i - 1] 63 | b = prefix_spectrum[i] 64 | result += inverted_table[round(b - a, 4)] 65 | 66 | return result 67 | 68 | 69 | if __name__ == "__main__": 70 | 71 | small_dataset = """ 72 | 3524.8542 73 | 3710.9335 74 | 3841.974 75 | 3970.0326 76 | 4057.0646 77 | """ 78 | large_dataset = open('datasets/rosalind_spec.txt').read().strip() 79 | 80 | print protein_string(large_dataset) 81 | 82 | 83 | -------------------------------------------------------------------------------- /e028-sseq.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | # Finding a Spliced Motif 5 | # ======================= 6 | # 7 | # A subsequence of a string is a collection of symbols contained in order 8 | # (though not necessarily contiguously) in the string (e.g., ACG is a 9 | # subsequence of TATGCTAAGATC). The indices of a subsequence are the positions 10 | # in the string at which the symbols of the subsequence appear; thus, the 11 | # indices of ACG in TATGCTAAGATC can be represented by (2, 5, 9). 12 | # 13 | # As a substring can have multiple locations, a subsequence can have multiple 14 | # collections of indices, and the same index can be reused in more than one 15 | # appearance of the subsequence; for example, ACG is a subsequence of AACCGGTT 16 | # in 8 different ways. 17 | # 18 | # Given: Two DNA strings s and t (each of length at most 1 kbp). 19 | # 20 | # Return: One collection of indices of s in which the symbols of t appear as a 21 | # subsequence of s. If multiple solutions exist, you may return any one. 22 | # 23 | # Sample Dataset 24 | # -------------- 25 | # ACGTACGTGACG 26 | # GTA 27 | # 28 | # Sample Output 29 | # ------------- 30 | # 3 8 10 31 | 32 | 33 | def subsequence_indices(s): 34 | indices = [] 35 | 36 | s, t = s.split() 37 | 38 | i = j = 0 39 | while i < len(s) and j < len(t): 40 | if s[i] == t[j]: 41 | indices.append(i + 1) 42 | j += 1 43 | i += 1 44 | 45 | return indices 46 | 47 | 48 | if __name__ == "__main__": 49 | 50 | small_dataset = "ACGTACGTGACG\nGTA" 51 | large_dataset = open('datasets/rosalind_sseq.txt').read().strip() 52 | 53 | result = subsequence_indices(large_dataset) 54 | 55 | print ' '.join(map(str, result)) 56 | 57 | 58 | --------------------------------------------------------------------------------