├── Calculating_Protein_Mass.py ├── Complementing_a_Strand_of_DNA.py ├── Computing_GC_Content.py ├── Consensus_and_Profile.py ├── Counting_DNA_Nucleotides.py ├── Counting_Point_Mutations.py ├── Enumerating_Gene_Orders.py ├── Enumerating_Oriented_Gene_Orderings.py ├── Enumerating_k-mers_Lexicographically.py ├── Finding_a_Motif_in_DNA.py ├── Inferring_mRNA_from_Protein.py ├── Mortal_Fibonacci_Rabbits.py ├── Ordering_Strings_of_Varying_Length_Lexicographically.py ├── Overlap_Graphs.py ├── Partial_Permutations.py ├── README.md ├── RNA_Splicing.py ├── Rabbits_and_Recurrence_Relations.py ├── Transcribing_DNA_into_RNA.py ├── Transitions_and_Transversions.py ├── Translating_RNA_into_Protein.py ├── rosalind_grph.txt └── rosalind_splc.txt /Calculating_Protein_Mass.py: -------------------------------------------------------------------------------- 1 | protein = input() 2 | 3 | d = { 4 | 'A': 71.03711, 5 | 'C': 103.00919, 6 | 'D': 115.02694, 7 | 'E': 129.04259, 8 | 'F': 147.06841, 9 | 'G': 57.02146, 10 | 'H': 137.05891, 11 | 'I': 113.08406, 12 | 'K': 128.09496, 13 | 'L': 113.08406, 14 | 'M': 131.04049, 15 | 'N': 114.04293, 16 | 'P': 97.05276, 17 | 'Q': 128.05858, 18 | 'R': 156.10111, 19 | 'S': 87.03203, 20 | 'T': 101.04768, 21 | 'V': 99.06841, 22 | 'W': 186.07931, 23 | 'Y': 163.06333, 24 | } 25 | 26 | S = 0 27 | for i in protein: 28 | S += d[i] 29 | 30 | print(round(S, 3)) 31 | -------------------------------------------------------------------------------- /Complementing_a_Strand_of_DNA.py: -------------------------------------------------------------------------------- 1 | DNA = input() 2 | reverse_complement = '' 3 | 4 | for i in DNA: 5 | if i == 'A': 6 | reverse_complement += 'T' 7 | elif i == 'T': 8 | reverse_complement += 'A' 9 | elif i == 'C': 10 | reverse_complement += 'G' 11 | else: 12 | reverse_complement += 'C' 13 | 14 | reverse_complement_output = ''.join(reversed(reverse_complement)) 15 | print(reverse_complement_output) 16 | -------------------------------------------------------------------------------- /Computing_GC_Content.py: -------------------------------------------------------------------------------- 1 | user_name = input() 2 | d = {} 3 | 4 | while user_name[0] != '': 5 | d[user_name] = '' 6 | DNA = input() 7 | if user_name != '>': 8 | while DNA[0] != '>': 9 | d[user_name] += DNA 10 | DNA = input() 11 | user_name = DNA 12 | else: 13 | break 14 | 15 | 16 | def make_pocent(str): 17 | sum = 0 18 | for i in str: 19 | if i == 'C' or i == 'G': 20 | sum += 1 21 | return round(100*sum/len(str), 7) 22 | 23 | 24 | print(d) 25 | del d['>'] 26 | 27 | MAX = 0 28 | for j in d: 29 | if make_pocent(d[j]) > MAX: 30 | MAX = make_pocent(d[j]) 31 | name = j 32 | 33 | print(name[1:]) 34 | print(MAX) 35 | -------------------------------------------------------------------------------- /Consensus_and_Profile.py: -------------------------------------------------------------------------------- 1 | OUT = { 2 | 'A': [], 3 | 'C': [], 4 | 'G': [], 5 | 'T': [] 6 | } 7 | 8 | user_name = input() 9 | d = {} 10 | 11 | while user_name[0] != '': 12 | d[user_name] = '' 13 | DNA = input() 14 | if user_name != '>': 15 | while DNA[0] != '>': 16 | d[user_name] += DNA 17 | DNA = input() 18 | user_name = DNA 19 | else: 20 | break 21 | 22 | print(d) 23 | del d['>'] 24 | 25 | FASTA = [] 26 | for i in d: 27 | FASTA.append(d[i]) 28 | 29 | print(FASTA) 30 | 31 | for i in range(len(FASTA[0])): 32 | OUT['A'].append(0) 33 | OUT['C'].append(0) 34 | OUT['G'].append(0) 35 | OUT['T'].append(0) 36 | 37 | print(OUT) 38 | 39 | for i in range(len(FASTA)): 40 | for j in range(len(FASTA[0])): 41 | if FASTA[i][j] == 'A': 42 | OUT['A'][j] += 1 43 | elif FASTA[i][j] == 'C': 44 | OUT['C'][j] += 1 45 | elif FASTA[i][j] == 'G': 46 | OUT['G'][j] += 1 47 | elif FASTA[i][j] == 'T': 48 | OUT['T'][j] += 1 49 | 50 | print(OUT) 51 | 52 | for i in range(len(FASTA[0])): 53 | if OUT['A'][i] == max(OUT['A'][i], OUT['C'][i], OUT['G'][i], OUT['T'][i]): 54 | print('A', end='') 55 | elif OUT['C'][i] == max(OUT['A'][i], OUT['C'][i], OUT['G'][i], OUT['T'][i]): 56 | print('C', end='') 57 | elif OUT['G'][i] == max(OUT['A'][i], OUT['C'][i], OUT['G'][i], OUT['T'][i]): 58 | print('G', end='') 59 | elif OUT['T'][i] == max(OUT['A'][i], OUT['C'][i], OUT['G'][i], OUT['T'][i]): 60 | print('T', end='') 61 | print() 62 | 63 | print('A:', end=' ') 64 | for i in range(len(FASTA[0])): 65 | print(OUT['A'][i], end=' ') 66 | print() 67 | 68 | print('C:', end=' ') 69 | for i in range(len(FASTA[0])): 70 | print(OUT['C'][i], end=' ') 71 | print() 72 | 73 | print('G:', end=' ') 74 | for i in range(len(FASTA[0])): 75 | print(OUT['G'][i], end=' ') 76 | print() 77 | 78 | print('T:', end=' ') 79 | for i in range(len(FASTA[0])): 80 | print(OUT['T'][i], end=' ') 81 | print() 82 | -------------------------------------------------------------------------------- /Counting_DNA_Nucleotides.py: -------------------------------------------------------------------------------- 1 | DNA = input() 2 | A_count = 0 3 | C_count = 0 4 | G_count = 0 5 | T_count = 0 6 | 7 | for i in DNA: 8 | if i == 'A': 9 | A_count += 1 10 | elif i == 'C': 11 | C_count += 1 12 | elif i == 'G': 13 | G_count += 1 14 | else: 15 | T_count += 1 16 | 17 | print(A_count, C_count, G_count, T_count) 18 | -------------------------------------------------------------------------------- /Counting_Point_Mutations.py: -------------------------------------------------------------------------------- 1 | first_DNA = input() 2 | second_DNA = input() 3 | intersections = 0 4 | 5 | for i in range(len(first_DNA)): 6 | if first_DNA[i] != second_DNA[i]: 7 | intersections += 1 8 | 9 | print(intersections) 10 | -------------------------------------------------------------------------------- /Enumerating_Gene_Orders.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import math 3 | 4 | permutations = [] 5 | n = int(input()) 6 | 7 | for i in range(1, n + 1): 8 | permutations.append(i) 9 | print(math.factorial(n)) 10 | for i in itertools.permutations(permutations): 11 | for j in range(n): 12 | print(i[j], end=' ') 13 | print() 14 | -------------------------------------------------------------------------------- /Enumerating_Oriented_Gene_Orderings.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | from collections import Counter 3 | 4 | n = int(input()) 5 | permutations = [] 6 | count = [] 7 | 8 | for i in range(1, n + 1): 9 | permutations.append(-i) 10 | permutations.append(i) 11 | 12 | for i in itertools.permutations(permutations, n): 13 | test = [] 14 | SUM = 0 15 | print(i) 16 | for j in range(n): 17 | test.append(abs(i[j])) 18 | print(test) 19 | c_test = Counter(test) 20 | for q in c_test: 21 | print(c_test[q]) 22 | if c_test[q] >= 2: 23 | continue 24 | else: 25 | SUM += 1 26 | if SUM == n: 27 | count.append(i) 28 | 29 | f = open('text.txt', 'w') 30 | f.write(str(len(count))) 31 | f.write('\n') 32 | for i in count: 33 | for j in i: 34 | f.write(str(j) + ' ') 35 | f.write('\n') 36 | -------------------------------------------------------------------------------- /Enumerating_k-mers_Lexicographically.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | 3 | 4 | symbols_connection = input().split() 5 | k = int(input()) 6 | 7 | for i in itertools.product(symbols_connection, repeat=k): 8 | for j in range(k): 9 | print(i[j], end='') 10 | print() 11 | -------------------------------------------------------------------------------- /Finding_a_Motif_in_DNA.py: -------------------------------------------------------------------------------- 1 | first_DNA = input() 2 | second_DNA = input() 3 | substring_numbers = [] 4 | 5 | 6 | def find_line(fl, sl): 7 | if fl.find(sl) != -1: 8 | if len(substring_numbers) != 0: 9 | substring_numbers.append(fl.find(sl) + 1 + substring_numbers[-1]) 10 | else: 11 | substring_numbers.append(fl.find(sl) + 1) 12 | find_line(fl[fl.find(sl) + 1:], sl) 13 | else: 14 | return 15 | 16 | 17 | find_line(first_DNA, second_DNA) 18 | 19 | print(substring_numbers) 20 | -------------------------------------------------------------------------------- /Inferring_mRNA_from_Protein.py: -------------------------------------------------------------------------------- 1 | protein = input() 2 | count_RNA = 1 3 | 4 | amino_acids = 'FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG' 5 | for i in protein: 6 | s = 0 7 | for j in amino_acids: 8 | if i == j: 9 | s += 1 10 | if s != 0: 11 | count_RNA *= s 12 | 13 | print(count_RNA*3 % 1000000) 14 | -------------------------------------------------------------------------------- /Mortal_Fibonacci_Rabbits.py: -------------------------------------------------------------------------------- 1 | n, m = map(int, input().split()) 2 | MORTAL_FIB = [1, 1] 3 | 4 | for i in range(2, n + 1): 5 | if i < m: 6 | MORTAL_FIB.append(MORTAL_FIB[i - 1] + MORTAL_FIB[i - 2]) 7 | elif i == m: 8 | MORTAL_FIB.append(MORTAL_FIB[i - 1] + MORTAL_FIB[i - 2] - 1) 9 | else: 10 | MORTAL_FIB.append(MORTAL_FIB[i-1] + MORTAL_FIB[i-2] - MORTAL_FIB[i-m-1]) 11 | 12 | print(MORTAL_FIB[n-1]) 13 | -------------------------------------------------------------------------------- /Ordering_Strings_of_Varying_Length_Lexicographically.py: -------------------------------------------------------------------------------- 1 | line = input().split() 2 | N = int(input()) 3 | 4 | for i in line: 5 | for j in ([' '] + line): 6 | for p in ([' '] + line): 7 | if i != ' ' and not (j == ' ' and p != ' '): 8 | print(i + j + p) 9 | -------------------------------------------------------------------------------- /Overlap_Graphs.py: -------------------------------------------------------------------------------- 1 | NAME_DNA = {} 2 | CHILDRENS = {} 3 | N = 3 4 | 5 | # Read file 6 | with open('rosalind_grph.txt', 'r') as file: 7 | lines = file.readlines() 8 | 9 | # Fill NAME_DNA: key=DNA_STRING, value=STRING_NAME 10 | for i in range(2, len(lines), 3): 11 | NAME_DNA[lines[i-1][:-1]+lines[i][:-1]] = lines[i-2][1:-1] 12 | 13 | for key in NAME_DNA.keys(): 14 | for dif_key in NAME_DNA.keys(): 15 | if key != dif_key and key[(-1*N):] == dif_key[:N]: 16 | if NAME_DNA[key] not in CHILDRENS: 17 | CHILDRENS[NAME_DNA[key]] = [NAME_DNA[dif_key]] 18 | else: 19 | CHILDRENS[NAME_DNA[key]].append(NAME_DNA[dif_key]) 20 | 21 | for key in CHILDRENS.keys(): 22 | for value in CHILDRENS[key]: 23 | print(key, value) 24 | -------------------------------------------------------------------------------- /Partial_Permutations.py: -------------------------------------------------------------------------------- 1 | n, k = map(int, input().split()) 2 | 3 | permutations_number = 1 4 | for i in range(n-k+1, n+1): 5 | permutations_number *= i 6 | 7 | print(permutations_number % 1000000) 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ROSALIND 2 | Solutions for some tasks from Bioinformatics Stronghold 3 | http://rosalind.info/problems/list-view/ 4 | 5 | My account: 6 | http://rosalind.info/users/artem_dav/ 7 | -------------------------------------------------------------------------------- /RNA_Splicing.py: -------------------------------------------------------------------------------- 1 | 2 | def rna_to_protein(rna: str): 3 | bases = ['U', 'C', 'A', 'G'] 4 | codons = [a+b+c for a in bases for b in bases for c in bases] 5 | amino_acids = 'FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG' 6 | codon_table = dict(zip(codons, amino_acids)) 7 | SOLUTION = '' 8 | 9 | for i in range(2, len(rna), 3): 10 | if codon_table[rna[i-2] + rna[i - 1] + rna[i]] != '*': 11 | SOLUTION += codon_table[rna[i-2] + rna[i - 1] + rna[i]] 12 | else: 13 | break 14 | 15 | return SOLUTION 16 | 17 | 18 | def dna_to_rna(dna: str): 19 | rna = '' 20 | for i in dna: 21 | if i == 'T': 22 | rna += 'U' 23 | else: 24 | rna += i 25 | return rna 26 | 27 | 28 | NAME_DNA = [] 29 | 30 | # Read file 31 | with open('rosalind_splc.txt', 'r') as file: 32 | lines = file.readlines() 33 | 34 | # Fill NAME_DNA: key=DNA_STRING, value=STRING_NAME 35 | for i in range(1, len(lines), 2): 36 | NAME_DNA.append(lines[i][:-1]) 37 | 38 | DNA = NAME_DNA[0] 39 | 40 | for intron in NAME_DNA[1:]: 41 | DNA = DNA.replace(intron, '') 42 | 43 | print(rna_to_protein(dna_to_rna(DNA))) 44 | -------------------------------------------------------------------------------- /Rabbits_and_Recurrence_Relations.py: -------------------------------------------------------------------------------- 1 | n, k = map(int, input().split()) 2 | 3 | FIB = [1, 1] 4 | for i in range(2, n + 1): 5 | FIB.append(FIB[i-1] + (FIB[i-2])*k) 6 | 7 | print(FIB[n-1]) 8 | -------------------------------------------------------------------------------- /Transcribing_DNA_into_RNA.py: -------------------------------------------------------------------------------- 1 | DNA = input() 2 | RNA = '' 3 | for i in DNA: 4 | if i == 'T': 5 | RNA += 'U' 6 | else: 7 | RNA += i 8 | 9 | print(RNA) 10 | -------------------------------------------------------------------------------- /Transitions_and_Transversions.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def transitions(s1: str, s2: str): 4 | k = 0 5 | for i in range(len(s1)): 6 | if (s1[i] == "A" and s2[i] == "G") or (s1[i] == "G" and s2[i] == "A") or (s1[i] == "C" and s2[i] == "T") or (s1[i] == "T" and s2[i] == "C"): 7 | k += 1 8 | 9 | return k 10 | 11 | 12 | def transversions(s1: str, s2: str): 13 | k = 0 14 | 15 | for i in range(len(s1)): 16 | if (s1[i] in ("A", "G") and s2[i] in ("T", "C")) or (s1[i] in ("T", "C") and s2[i] in ("A", "G")): 17 | k += 1 18 | 19 | return k 20 | 21 | 22 | s1 = input() 23 | s2 = input() 24 | 25 | print(transitions(s1, s2)/transversions(s1, s2)) 26 | -------------------------------------------------------------------------------- /Translating_RNA_into_Protein.py: -------------------------------------------------------------------------------- 1 | # really cool code 2 | RNA = input() 3 | 4 | 5 | def rna_to_protein(rna: str): 6 | bases = ['U', 'C', 'A', 'G'] 7 | codons = [a+b+c for a in bases for b in bases for c in bases] 8 | amino_acids = 'FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG' 9 | codon_table = dict(zip(codons, amino_acids)) 10 | SOLUTION = '' 11 | 12 | for i in range(0, len(rna), 3): 13 | if codon_table[rna[i] + rna[i + 1] + rna[i + 2]] != '*': 14 | SOLUTION += codon_table[rna[i] + rna[i + 1] + rna[i + 2]] 15 | else: 16 | break 17 | 18 | return SOLUTION 19 | 20 | 21 | print(rna_to_protein(RNA)) 22 | -------------------------------------------------------------------------------- /rosalind_grph.txt: -------------------------------------------------------------------------------- 1 | >Rosalind_7448 2 | CTAGGTGGGTAAAGAAATTAATCATATTTTAGGCGCCTTTGGTCTGCATTGTCTGGTGGG 3 | GTGCCGTTCACCATTCCTTTAGTC 4 | >Rosalind_5897 5 | GGTTTTATTCCTTAGCGATTCAAATTCTGCCTTGGCCCACCCAGGACTCAGGAAAATCTG 6 | CTATAACGCGGTGAGCTCCGCTC 7 | >Rosalind_6794 8 | CGACGGACCTGGCTAATCCTGTACGACACACAGGAGTGCGCGGAGACTCGCGCACGATAG 9 | CCAGGTGGATCGCGAGAGGGA 10 | >Rosalind_8826 11 | CTTATCGATATTAATGGGGCTCGCCCTAGGAGCTCCCTAGGTGGTAGATGGATTACCACC 12 | GACCACACGAAGTCGCGAGTGGCAACCTTA 13 | >Rosalind_2021 14 | ATGAATGGTGGTGGAGTATTACGCGTACGCTCGGGCAACACCCATTAGACCGTCTACGTA 15 | CTCGTTTTGGGCGCTGTTGGCG 16 | >Rosalind_3958 17 | ACATACAAAAACTCTGTGAGAATGGAAGACGGAAAATGTCCAAGCTCACACTTTTCCCTA 18 | GTCGGTTTACTCGCGGACCAATGTAAACCAACTACACAAC 19 | >Rosalind_1338 20 | CTTGTGTACATGGCGCGACCTAGTATTCAGAGCCATCTACCCCATTAGGTTCCCAACAAG 21 | AAGGCTCAGCTCGAATCTTTTTACAATACAAACTCAT 22 | >Rosalind_4132 23 | GGCGGACGCTCGCATATATACTCTTGGATCGTTCTTGTAGACTTGCGATAGTGTTATGAA 24 | GCGTGCATCCCGTGGCGCCAAGATG 25 | >Rosalind_8338 26 | GGCGTTATATAGTAAAACACACGTAATCTCTTCCCCTTACTGACGAGTAGAGGGTCTGAC 27 | ATGGTCATATACCTCTCATAGTCTGTGAGGGGGCCAG 28 | >Rosalind_2974 29 | CTGCGTCGCCTCGTAGCCGTTCCTCATGGGGCAGTCGCCCCAATAGGTGTACGTGTATCC 30 | GTGCGAGTACGTCGTTCCAGCGCAA 31 | >Rosalind_3390 32 | CAATTTTGAACCGACAAAACTGGCGGGGTGAAGAACCTTACTCGATACCATTCCGGTCAG 33 | TTCCCCGTAGCCGCGTAGGAAATAATGCACGGGCTCTGCT 34 | >Rosalind_9288 35 | TCGGCAAACGCCTTAGAATCTAAGTTACGACAACTGATCTGCCATCCCGGAATCGCCCTC 36 | GTTAGAGAGTGCAAGACTCGATAAA 37 | >Rosalind_0135 38 | GCTCGAGCAAGAGCGTCCAATTCGCTAGCTCATCCTGTTAGTGGCGAAACGGAGTGAAGT 39 | GCACGCGTCAGCGAAGCTCA 40 | >Rosalind_6735 41 | GGCTTGTGCCTCCCGACACGTCCCCTGAGGGACAAGTCCAACAAACAACTTATAGTAGAG 42 | ACTCGCTGTAGCTAGCGGCGTCAGATCCTTCGATAAGCG 43 | >Rosalind_0004 44 | CTTCAAATCATCTGCGTTGTGAGATAATAACATTGACAATTTTGTTCTGAAGTATGAGAG 45 | TACGAACCACTTATCCCGACGTAGGAG 46 | >Rosalind_6953 47 | AGCTCATAGCATACCCTTCGCCTTTGATACTCCTTTTTCAACGAGGGGCATTAGCACGCC 48 | CAACTGTCTGACGCCTTAAGATGCTAG 49 | >Rosalind_0176 50 | GTTCTGAGGGCGTTTGTAGCAGTAGTCTTGCTAGCCGACACATAGCCTCACTCCGAAAAA 51 | CAAAACGCAAGCCTCCTTATTGC 52 | >Rosalind_2009 53 | TGGGCATGGGCTGTACCGCAACGACCAGCCATAGTATATTTATCCGGCACCGCTTCGCCT 54 | GCCCGAATCCGATCAGGCCACTCCTTGGTCCAAGCGAAC 55 | >Rosalind_6472 56 | TAGTTGGAATGAGGAGGCTCCCGTGCGAGGATCAGGAGCACGAAACACTGGTCTCAGCAA 57 | TGACGACTCTACTAACGCCTCCCTTTTCGTTCGT 58 | >Rosalind_5134 59 | CGGAAAAGCCTGCAGCCACAGGGTTGTATAAATACCGACATTTTGGGAGGCTCTTTTGAC 60 | CCCTGATCGAAAGATTACACCGTA 61 | >Rosalind_6595 62 | AGAGTTAACTATGTTAGTCGAGACGGCTGCTGGTACGGATGCGCCTACGGGGCGGACTGG 63 | TGACGCCGTGCCGCGAAATC 64 | >Rosalind_9174 65 | TGAGGGATTTCGCGTTTTCCGTTGTTACTCTGCCCCTCCCACCGGTGAGACGACAGAGCT 66 | AACTTGGGAGCTGACTCGTGT 67 | >Rosalind_5424 68 | GGGCGTTTGTCAACTACCTGCATCGGGGCCGAGACCCGACTATGCATACAGGCGTTACAT 69 | CCCGCCTGGGCTCGGGCATCCTGTTGAATCGCGACCTGG 70 | >Rosalind_1443 71 | ACGCGTGCTAAGATCCTGCTTGCGGTGACTTGGATCTATGAATCCAACCTCGCGGGGCCG 72 | TGGTCCCACTTGGCCGTACCACGAAT 73 | >Rosalind_7029 74 | CCAGTGAGAACGAGCTAAACCCGCCTTCGCTGTACCTTTTATTTGTGGCGTATTAGGTCC 75 | TAGCCTATAGTCCGGTCCTTCAGCTGGGCTATGCAGC 76 | >Rosalind_8643 77 | TCCCTGTAAAGCGATTAAAAATGTCAGTATTCGACCTAATATTGATTCCGTTTTGCGTGT 78 | TAACCATTGGGCGCCTCCGAACACG 79 | >Rosalind_5035 80 | GAGAAGTTTCTAAACTACAGGATCTGTATAAAATTTGGATCGAACGCGGGCGTCCAACAC 81 | TCGGTCTGGTAGTAATACCAACAATCTTTGAGCGCG 82 | >Rosalind_7534 83 | TTGACGTGTGGCGCGCTTCCGCGGTGGCAAGGTCCCCCCATCCTCACCGGCTTCCTAACC 84 | ACGTATATTACTAATCGAGTGAAGTGG 85 | >Rosalind_7913 86 | AAGAATGATATGCATCTCCTGAGCCGCGTGTATCGAAACATGAAGAAAGAGGACCCCCAG 87 | GAGGATCAGATTAACACGCGTGAGACTGCCAGTCGAA 88 | >Rosalind_6724 89 | CGTCGCGCGCACTCTCGGACGCGGACTGCTACGTGAGAGCCCTGATACGGAATTCCGTAA 90 | CATCCACCTTGTTAGTGACATGTTCGGCGG 91 | >Rosalind_2851 92 | TCATCATACTTAAGGGTAGCCTTCACCCACAAGTAGCATGTACGAGTGGCTGACACTCCA 93 | TCGCGTCAACAAAACATGTAAATGCAGGTA 94 | >Rosalind_9039 95 | TAAACGCACTAACAATGTGTAGGCGCGTACACATTAACCTACGCACATTTACCATACTGC 96 | TAGTCGTTGCACACCGGTCAGTTGAACGTA 97 | >Rosalind_1379 98 | GGACTGCTGCCTAGGGGCCGTCATGTACCTCGCTTGATACAGAGGACTGGTACGAACACT 99 | GACAGCTCAGTCAGGCTCTTACA 100 | >Rosalind_3337 101 | ACCTGCTTACCATAGGCCCCGAGGCACAGGCGATCCGGCCACACACGATCTGGAAATATG 102 | ATCAGGGTGGCTCAAGGTACCAGTTGGATCTAG 103 | >Rosalind_4477 104 | ACTAATAAACGCTCCCTCACCAGCTTTGATATGAGATGAGGGTCGGAGCCTGAGTGGTGA 105 | ATACAATCCTACATAGACGACG 106 | >Rosalind_7347 107 | TTAAGGGGTGTTTCGTTTGGAATCTTCCACTTTGGTGATATTGCAAATAGCAACTTTGGG 108 | GTCTACATTGCCTCGGTGAGATCGTT 109 | >Rosalind_7805 110 | CGTTTTGAAACGCTGACTGCGGCAATGCAAAAATCGGGACCAACGATCTTTCGTCGAAAC 111 | CGCGAGAGACCGGAGTCCTAT 112 | >Rosalind_7628 113 | TTCTTCTTCTAGAGAAAACCTGGACACGGCATGAGCGTAAGGAAGATCAGATGCAAGCTC 114 | GGATCCTACGCATCACGGGAGTAAGTAGAGAACC 115 | >Rosalind_8811 116 | AAGGTTAATCAAGCGAAGGCTTACGGCTGGTGCCGGGCATGCCCAGCAATATGTTCCGCG 117 | ACATAGGTCATCGTCTACAC 118 | >Rosalind_6720 119 | CAACTTGAGCTGGCATGAGCCATCTTGAGACCTGAGCTTGGCGGGTGAGCGGGACCCTGC 120 | GGGGATGCTGAGGAATTTAA 121 | >Rosalind_4087 122 | CGTCTACTGGGTGATATCAGTTTATGGATATTAGTCAGGGCCAGTAGCACGTTCAACCGA 123 | TAATAGCATTCCTGTCAAGAGAAGGTGCC 124 | >Rosalind_3719 125 | TAGTTCAAACCGGCACACGCAGAGTACGAAGTTAATTGGCCACAAACTCCGTAGCGAGGT 126 | AGTGAAAGGGTCTAACCAGGTCCTTAACCG 127 | >Rosalind_3136 128 | TCGTCTACGGTCGTCGAGTACACACCCTATAAGTTAATCACTATCCAGGGCCTAGGCAAC 129 | CGAAGTCAACGTATGATCTATTAGCTGCTAGGTC 130 | >Rosalind_0403 131 | GTCTGTCGGTACGGAGCTGGATCCTCTAGTCAGAGCTCATTCACCTTTGTACAGCACGAA 132 | AGGGCGAAAGTGGCGCGTATGAGGG 133 | >Rosalind_7168 134 | CGGCTCATGCACAACGGGTACCAAAAGAATCGGTCGCCCATGACGTTGGCGCGCCGGGAA 135 | GGCAACTTCGGCCACAAGTCAATTGCAA 136 | >Rosalind_9081 137 | ACAATCCGTTCTCTTATAGTACTAATCGTTCCTCGTCATTGCCGATTAAACTAAAGTACT 138 | CGCTCTCCCCGTTGCGACAGCATCGAAT 139 | >Rosalind_7258 140 | CCAGAGACGTCACTGCTCATAATAAATGAACGAACTCATGGAATCCTCTCTTGGCCTGAC 141 | GACTCTCTTGATATTCAGCGTTATAGGGCCA 142 | >Rosalind_8793 143 | ACCAGCGCTTCGCACTGGTGGAGCATCTTCTGCGCCGTGTCCGAGACTTGGATGCCCATG 144 | GTACTACGGTCCGAATCCTCAAAC 145 | >Rosalind_0960 146 | CGTGGAAACACGCTCTGGAACCGACCTCGCAACCAAGCTGGCGCGATCCGAGGCCTGTAT 147 | GCTTATAGACAGCCCCCGTACGACGGTTTTCGGGAG 148 | >Rosalind_3657 149 | CTTAGCCTTTTTCTGTGCTGAAACATAACCCAGAGAGGGAAGGTATGAGTGACCTTTGTG 150 | AATTGGATTTACACGGACCCCTTGAACAGGA 151 | >Rosalind_0968 152 | TTTAAAATGCGTTCTCTAGACGAATAACAGGTAGTTCAGCTGGGAAATGCTTTAGTGGTA 153 | CCCCCCGGCCTTGACATATGACTACGCCTAATAAGATT 154 | >Rosalind_5736 155 | ATAATCTTCGCGGCACACAACCACGACAATCTTAATTTTAGTGACAACCAGGAATTGTGC 156 | CGAAGTGCGGTACTATTGTATAAGGATATTCA 157 | >Rosalind_3967 158 | GCCTACTTATCACTATCTTCACGTCAACCTCAGGGCGCGCGCTGGCTATTAGACGGTACT 159 | TCCACTATATTGTATGGCGTTTCAA 160 | >Rosalind_1997 161 | TCTGGAGGAAGTGAGCCAGACGGCTCTGCACCAGAGCCATCCGGCCGCTTACGGCCGGAG 162 | CCTAATGACGTGGCCAGGGCTGTTTGTTTAGA 163 | >Rosalind_4977 164 | CCCGCTGGAACGCACCAAGGTTTCTTTACTATATTAGCGAGAAAATTCAACGCAAACTGA 165 | TATCAGCTAGTAAAAACTACAAGGGCAGGCGA 166 | >Rosalind_0440 167 | CACCTGGCCGTCTTCCCATAAAAGCGTTTTAAATTTCTTTGTATTTTCCGTTGACTACCT 168 | GTGTTATCCTGCTGATTCTACCTGTTCAGGCTTTAA 169 | >Rosalind_1331 170 | GTGCTCTCCATTTTACGTTCGTCCATGGTCCTTGTAGCCAGCCTCAAAAGGGTCTAACGC 171 | ATGGTCAGAGCTGCTGGTGTATTCCA 172 | >Rosalind_0408 173 | AGGAAGCTACTTGAACGTGGGTACTTGAACGCATGCACACTACGGTTAGGCGAGCAAACG 174 | TGCCTGGCCTTAAGTTACAACACGGAGACAAGG 175 | >Rosalind_8465 176 | GGAGGCCTTGCGCAGCAAATGGTTCCCACAAGCCTCAGCCGAGGAGCACCGTTAGCTGCT 177 | CCAAGTCAGCCAGTGCATAGCCTAC 178 | >Rosalind_7643 179 | AAGAGCAGCCGAACGGTACTTTCGTAGCTAGTTTTTCTCCGAACAAGTAAAAAGGCTCAT 180 | GTGCACGCGTTCGGCGGAACCTAAGGCCACC 181 | >Rosalind_7414 182 | GGGGTTTTTCAGACTCTTCTCCGTGTCTAGATGGTGGAGTTTTGGTCTCTCCGCGGGCCG 183 | AGTATCGTGACCTGGTTATTTCAG 184 | >Rosalind_9680 185 | GATTGGGTTTTTAATTTGGGTGTCTTTTCATAGAACGCGCACACTCCAACACTGGCGAGA 186 | AAAATCACAGTGAAGCCATCCTACTGCCCCAAAGT 187 | >Rosalind_5154 188 | GGACTTCACTCACTCTCTAAGCACACACTTTAGTACCATGCGTTCCCCTCCTTCCAACCG 189 | ATGCCCAAAGGCGAAAAAGCGCAGGATAAAGT 190 | >Rosalind_2284 191 | AAACCAGTGTTCTACATTGGGGTCCTATTGGCATTTGTTTTAAAGGTACGGCAATGCATG 192 | GGAACCGTGAGTCTCATTCAACCGGAATATCAAACGT 193 | >Rosalind_9291 194 | GACACCTTGAGTAATATGTAACATTTAACCCCGCTCAAAGCAATGCGTACGAGCACCCCC 195 | GTGGATGTTCCTGTCGCAGCGTAGTGTACACATGACGA 196 | >Rosalind_5465 197 | TTGATTATCAGGCTTTCGCAAGATAGCGGCGTCCCTACCGTAATAAACCTATAGCCAGAT 198 | CGAGGAAATCGCTCCACTGTGTCATCGATGTGTGT 199 | >Rosalind_5347 200 | CAGCCACCCTCCTGTGTGATTACCCAGAGATACGTGTGCATGTGGGTAGGTCTTACTAAC 201 | TTAATGGAATTTCCTTCCGTAACTCC 202 | >Rosalind_0878 203 | GCGTTGCTGGGGAGCCTTTTCGCCTCAAGCAAGCCCAACGCCAACCCGTTAAGCCCCTGT 204 | AAGGACTACAAGGACCTAACGACCGGCCTCTGGA 205 | >Rosalind_3980 206 | ACCTTCACGTTCCCTATTAGCGACTGTCGACATAGAAATTCTTTAATATACGCTATCAGA 207 | CACGTCATAAAATGCTAAGGATGGAGCTAAAG 208 | >Rosalind_8601 209 | CCTTGGTTACTTATTGGTTATGGTATGTCAATCCACAGTCAGCAATGGTCAGTTTGGTGA 210 | CTAGGGCCAATTCGATCGTTATTCGGTTTGAGATA 211 | >Rosalind_5914 212 | AGACGCAGAGATGCCCTGAGCTATTAATGCGACATTGTAACACTGGTTGCGGTACTCCGT 213 | TCCCCGTATGTCCCTGTCGTCTGCTGTACGCTAACCAG 214 | >Rosalind_7402 215 | CCGGCATGGGGGGGGCTAATGTGATACATAGTTCGTGCTCCGCGCGGGGGAGTGGGGATC 216 | TCTTATGGGTCAGGAGTAGTTC 217 | >Rosalind_8779 218 | TTCTTCTATGAAATGGTTGATCACTTCATTCATCGAATAATGGCTTGAAATAGGGACGCG 219 | TTGCGAAGCGGGCCGATCGACG 220 | >Rosalind_7598 221 | GTTACGGCGACGGTGTGTTCATCAGAGACGACGCTATAGGGGGTGTTTTTAATCTTTGGC 222 | GTAATACACGTGAGAATGCAACCATTGTGTG 223 | >Rosalind_5989 224 | TTCGCCTTTACAACGTGCATTTGATCACGCAGTCTAGAGAAGAGACATGACCCGCGAAAT 225 | GGACGATGCCGACCGACATAACTATGAT 226 | >Rosalind_1565 227 | CGCACCACTCCCTCATGCGCTGGTCCGCAAACCTAGGTGGTTGCTGCGAAATCCCCCCTT 228 | GGACTGGCCCTTTCGCTCGTTGCTCTGTA 229 | >Rosalind_5738 230 | ATAGACTTGTCCCGTTAAATCAAGCTCGGGCAGGAATCATGAAGACTGTGGATAGACACA 231 | ACATGGCCCGTTGATCAATAAGGGG 232 | >Rosalind_4737 233 | CACCCTCTGCGCCTATCCAACTTAACGGCTCGACGACTTGGATGCGCAGAGGTTTCCATA 234 | ATAACGAGTGGCGTGGCGGAGGTGA 235 | >Rosalind_7372 236 | GTTCACCTTCTCTCCGCGACTGGGCCGATTTGGGAGAACAGATTGTCCACCGCGCTAAGA 237 | ATGCACATGATGTATTTTTTCGT 238 | >Rosalind_7420 239 | GACCCCCACACAGAAGACCGTTAGATGTAATAACGCACGCCGGGGACCAAAGGCTCCAGC 240 | TGATCATCTTCGTGCCAGAGTGACGTAGCCC 241 | >Rosalind_5936 242 | ATAGCCTCGAAAGTGATAATGAGTTTGATCCATAGACCTACTCTAACTGAAGGGAGCCCT 243 | CTCCAAAGCATGCTGAGCAGCTATGGTGGTTACAG 244 | >Rosalind_3830 245 | ACCGCTAAATTTTCGTGCAAACTAGTGCTATCACCTTTCTCGAAGGCGAAAACCCACCGC 246 | TAGTACAACGCTATATGAACAAGACCAGTCATCT 247 | >Rosalind_0194 248 | TGTCCCAAATGTTAATTAGCGGCTGCACGCCCCACTGAGACAGACGGGCCCACGAACTAC 249 | CACGGCTCTTCGCAGACATAG 250 | >Rosalind_8182 251 | CTTGACCTTTCGTGGGGGGCGCTATAGACCAACGGTTGTTACTTTGTACCTCACCGCGAG 252 | GACCGCTTGACGTCGCGCCAGACCC 253 | >Rosalind_1208 254 | GGTTGGAAACAATCCCCTAGTCACGCGTGGGTTTCTGGTCATCAGCAATTTAATAGGAAC 255 | AACACCAATGAAACGTGGATATCTTCG 256 | >Rosalind_3721 257 | ATACACTTTATTAAGCAGTCTCCATAATACGGGTTTGACGGGGGGTACAGTGGTGAGAGA 258 | CCTGTCTTTGAGCGAGAGCACGTCCCTTTTCGATTAACAC 259 | >Rosalind_7334 260 | AGCAGGTTAGCCATTCTATGCTTTGGTAGAAGCTACTGCGTACTCCATAACTCTTCTTTG 261 | TCGACTTTATCTGCCATAAACCCCCTCGGTACCACG 262 | >Rosalind_2868 263 | CGTTCACCCCTCACACGAGGGGTAGGTAGTCCCAAACCCATTCATTTTGATCAGCGTCTC 264 | CTGTGTCTCGTGGAACGCGC 265 | >Rosalind_1600 266 | TAGAAATATAAGTTGGGATTAATACGGGCACCCGAGCGGCCATTCACCGTAGAGTCTTAT 267 | GCAGAGGGAGTGTTTTATTTTTTCGCACTGATTTCGTCA 268 | >Rosalind_8354 269 | ATACAAAGAAACAGATGACTGACAGTTTATCACGGATCCCCCGTTCCCCTCGAGCTATCA 270 | TGTCAGAAACGCTGTTGAATG 271 | >Rosalind_1628 272 | ACCTAATGGTCCCACTTTGCGGGGTTTTGTTATGGTGTCTTCGACGGCTTACATGAGAGG 273 | CGGGTTTCACCATAGCCGCCCCCTCTAGGGTAAAGACG 274 | >Rosalind_7493 275 | GGGTTTATTGGCGTCTCCGAACCATTGCATCTAGACGAGCACGTCAAGCAGAGTGCACGT 276 | GCAAGTCCTGGAATATATAGATACATTACATCGCGAA 277 | >Rosalind_2217 278 | GTTATCCACTCCGCTGGGGCGTGCAGCGCCTATCAACGTATGTCTAGAGTCACTATATAT 279 | ACGGACAGACACCTCTGCAGCCGTGGTACCTTCGACA 280 | >Rosalind_9096 281 | CTTAGTCATGACTTGGTACTCCCTGCAACTTCAACTAAGGCTTTAGAGCCGTCATCGAAG 282 | GCAGGATTAATTATGTGAACCACGAG 283 | >Rosalind_9853 284 | GACACCAAAAAACCCTTGGGCACCGCAATTTACCTCCTAATGAAGTTCTAAGTCCCACAT 285 | GTCAGACGGCGGTCGCGGAAGAGTTGATGATCGAT 286 | >Rosalind_8901 287 | CTCCACGTCCGATCGCGCCCTGTTGACCGCAAAATTGTACACGTCACCCCAGTAGCTGAA 288 | GGCACCCTGACCCCCTTATA 289 | >Rosalind_4330 290 | CGGACTTGCCTTCTTCACACTCTTACCAAATCCTATAGGGGCCGCGAGTAAATAATGCGG 291 | GCAATTCAGGTCCGACCTGGTGGCTTCTCC 292 | >Rosalind_2343 293 | AGCTGCGTTAGACGCGGAACGGCCCTACCCTTCACCATGAAAACGTTGGAACAGAGTACA 294 | GATCCGTTCCGGGCTGCAGTTGTGGACCAATAATA 295 | >Rosalind_0302 296 | GGACAGCCGCCTTATAAATTCGCCCCTCGGTCCTAGCCTTTTGGTCCGGGCCGACGCGAA 297 | AGAAGGGTTGAAGGGGTAAGGTA 298 | >Rosalind_7332 299 | ACTGATAAAAAACCTTAGATACCTGGACGCCTCAAACTTAGAAAGTGGTACTTTCAGACA 300 | GATATGGTCGCACGCTTATTGGGCCAAGTCACTGT 301 | -------------------------------------------------------------------------------- /rosalind_splc.txt: -------------------------------------------------------------------------------- 1 | >Rosalind_0641 2 | ATGAGCCAAATTAACGCAGGGTATTGGCGCACCACCAAGAGGGTTCCTGTTTCAAGTCGCGCACCGAGACGCTGATACACGGTACCACTAAAAGAATTCGTAGACTAAGGGCGGTCAGTGGTGATGTATGCGACTAGCCTAGATTATCTACATGAATGAGGTTGCTATGGCACACACCTCGATGGTATTTATGGCCGACGGTCCCTTGCAGGGATAGCGAAGCAGCAAATGTCAGTGTGACACCTCGGAACAGCCTAGGAAACAGCCGAGCTCCCATTGTGGAAAGATTGCCGGTCTACAACCTCAGATTGTTACACTTTGGGCACTACCGTCTTTAATCGTGGGGCACCAGTTACAGTTAGACGCCCGTCTCTCCCCCGTAGGGTTCTGGCAATGGACAGCCCCGTTGTCGGGTGCATCTTGCATTGCCTGTCTGCATAGGCGATCGCTCTTCAACAACTCTAATGAGGAATAATACATTAATTCGTAGACAAGCTTAAAGTTGGTATTGAGTTCGACTCTCTCGCGACTGGAGCGACAGAATAATTACCGTGCTCTCTTCCCGCGCTATAGACTGCCTTTAAATTAAACGGGGGAGAAAGTGACGGGCGAGTATGATACACTTGTACTTAGATGTTTGCATCATTAGGACGATGTTGGTTGTGATGTGACTCTGCCCAGACCGTAAAATGGTAAACTACTAACCACCCGCCTACGCGAAAGTTCGACGATTGATGCATCTCGCAGTAATTTCGTTCGTGAGGAGAGCCTCTCGCGGAAGCCGGCCTCGCACATCAGGCACCGTCCATATCTTCGGATCCCGCAGCCCCGCTAGCGGAAAGAACCAGTTGTTCGTCCAATGCAAAATTGCTGCGCTTAAGGCAGTCTTTGGGTCTCACACACCGCGCTTGATAGACTGGGTACCACAACACTCTAGTTCGTGCCGAATCGTTACCCGCCTAAGTTTATGCTTACGGCCCTAG 3 | >Rosalind_1137 4 | CGCCCGTCTCTCCCCCGTAGGGTTCTGGCAATGGACAGCCCCGTTGTCGG 5 | >Rosalind_9199 6 | GACGGTCCCTTGCAGGGATAGCGAAGCA 7 | >Rosalind_7080 8 | AGACAAGCTTAAAGTTGGTATTGAGTTCGACTCTCTCGCGACTG 9 | >Rosalind_6256 10 | GTTGTGATGTGACTCTGC 11 | >Rosalind_7700 12 | AACTACTAACCACCCGCCTACGCGAAAGTTCGACG 13 | >Rosalind_7475 14 | ATAGGCGATCGCTCTTCAACAACTCTAATGAGG 15 | >Rosalind_9439 16 | TATAGACTGCCTTTAAATTAAACGGGGGAGAAAGTGA 17 | >Rosalind_0087 18 | GTGGAAAGATTGCCGGTCTACAACCTCA 19 | >Rosalind_6986 20 | GAGGAGAGCCTCTCGCGGAAGCCGGCCTCGCACATCAGG 21 | >Rosalind_8275 22 | GCTTGATAGACTGGGTACCACAACACTCTAGT 23 | >Rosalind_8429 24 | CGGATCCCGCAGCCCCGCTAGCGGAAAGAACCAGTTGTTCGTCCAAT 25 | >Rosalind_6180 26 | AGGGCGGTCAGTGGTGATGTATGCGACTAGCCTAGATTATC 27 | >Rosalind_5136 28 | AGTCGCGCACCGAGACGCTG 29 | --------------------------------------------------------------------------------