├── codon-table-grouped.csv
├── vaccine_dict.json
├── 3rd-gc.py
├── README.md
├── 3rd-gc.go
├── vaccine-s.fasta
├── ncov-s.fasta
├── part-2-reverse-engineering-source-code-of-the-biontech-pfizer-vaccine.md
├── combined-codons-s-protein.csv
├── side-by-side.csv
└── reverse-engineering-source-code-of-the-biontech-pfizer-vaccine.md
/codon-table-grouped.csv:
--------------------------------------------------------------------------------
1 | aminoacid,codon
2 | A,GCA
3 | A,GCC
4 | A,GCG
5 | A,GCT
6 | C,TGC
7 | C,TGT
8 | D,GAC
9 | D,GAT
10 | E,GAA
11 | E,GAG
12 | F,TTC
13 | F,TTT
14 | G,GGA
15 | G,GGC
16 | G,GGG
17 | G,GGT
18 | H,CAC
19 | H,CAT
20 | I,ATA
21 | I,ATC
22 | I,ATT
23 | K,AAA
24 | K,AAG
25 | L,CTA
26 | L,CTC
27 | L,CTG
28 | L,CTT
29 | L,TTA
30 | L,TTG
31 | M,ATG
32 | N,AAC
33 | N,AAT
34 | P,CCA
35 | P,CCC
36 | P,CCG
37 | P,CCT
38 | Q,CAA
39 | Q,CAG
40 | R,AGA
41 | R,AGG
42 | R,CGA
43 | R,CGC
44 | R,CGG
45 | R,CGT
46 | s,TAA
47 | s,TAG
48 | s,TGA
49 | S,AGC
50 | S,AGT
51 | S,TCA
52 | S,TCC
53 | S,TCG
54 | S,TCT
55 | T,ACA
56 | T,ACC
57 | T,ACG
58 | T,ACT
59 | V,GTA
60 | V,GTC
61 | V,GTG
62 | V,GTT
63 | W,TGG
64 | Y,TAC
65 | Y,TAT
66 |
--------------------------------------------------------------------------------
/vaccine_dict.json:
--------------------------------------------------------------------------------
1 | {"ATG": "ATG", "TTT": "TTC", "GTT": "GTG", "CTT": "CTG", "TTA": "CTG", "TTG": "CTG", "CCA": "CCC", "CTA": "CTG", "GTC": "GTG", "TCT": "AGC", "AGT": "AGC", "CAG": "CAG", "TGT": "TGC", "AAT": "AAC", "ACA": "ACC", "ACC": "ACC", "AGA": "AGA", "ACT": "ACC", "CAA": "CAG", "CCC": "CCC", "CCT": "CCT", "GCA": "GCC", "TAC": "TAC", "TTC": "TTC", "CGT": "AGA", "GGT": "GGC", "TAT": "TAC", "GAC": "GAC", "AAA": "AAG", "TCC": "AGC", "TCA": "AGC", "CAT": "CAC", "TGG": "TGG", "GCT": "GCC", "ATA": "ATC", "GGG": "GGA", "AAG": "AAG", "AGG": "CGG", "GAT": "GAC", "AAC": "AAC", "GAG": "GAG", "GGC": "GGC", "ATT": "ATC", "TCG": "AGC", "GAA": "GAG", "CAC": "CAC", "GCG": "GCC", "TGC": "TGC", "GGA": "GGC", "GTG": "GTG", "ACG": "ACC", "CTC": "CTG", "GTA": "GTG", "ATC": "ATC", "GCC": "GCC", "AGC": "AGC", "CTG": "CTG", "CGG": "CGG", "CGC": "CGG", "TAA": "TGA"}
2 |
--------------------------------------------------------------------------------
/3rd-gc.py:
--------------------------------------------------------------------------------
1 | import csv
2 |
3 |
4 | def read_csv(filename):
5 | records = []
6 | with open(filename, 'rt') as fp:
7 | reader = csv.reader(fp, delimiter=',')
8 | for ii, row in enumerate(reader):
9 | if ii > 0:
10 | records.append(row)
11 |
12 | return records
13 |
14 |
15 | codons = read_csv('codon-table-grouped.csv')
16 | #print(codons)
17 |
18 | c2s= {}
19 | for c in codons:
20 | c2s[c[1]] = c[0]
21 | print(c2s)
22 |
23 |
24 | virvac = read_csv("side-by-side.csv")
25 | #print(virvac)
26 |
27 | matches = 0
28 |
29 | for element in virvac:
30 |
31 | vir = element[1]
32 | vac = element[2]
33 |
34 | print(f'{vir} v {vac}, amino: {c2s[vir]} == {c2s[vac]}.')
35 |
36 | our = vir
37 |
38 | if vir[2] == 'G' or vir[2] == 'C':
39 | print('codon ended on G or C already, not doing anything')
40 |
41 | else:
42 | prop = vir[:2]+"G"
43 | print(f'Attempting G substution, new candidate {prop}')
44 |
45 | if c2s[vir] == c2s[prop]:
46 | print('amino acid still the same, done!')
47 | our = prop
48 | else:
49 | print(f'Oops, maino acid changed. Trying C, new candidate {prop}')
50 | prop = vir[:2] + "C"
51 |
52 | if c2s[vir] == c2s[prop]:
53 | print('Amino acid still the same, done!')
54 | our = prop
55 |
56 | if vac == our:
57 | print('Matched the vaccine!')
58 | matches +=1
59 | else:
60 | print('No Match.')
61 |
62 | print(100*matches / len(virvac))
63 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | The big BNT162b2 archive
2 | ------------------------
3 | All vaccine data here is sourced from this [World Health
4 | Organization
5 | document](https://mednet-communities.net/inn/db/media/docs/11889.doc).
6 |
7 | This describes the RNA contents of the BNT162b2 SARS-CoV-2 vaccine. We
8 | should all be very grateful that BioNTech has shared this data with us. And
9 | of course we should also be grateful to the many many researchers that
10 | worked for decades to bring the state of the art to the point that such a
11 | vaccine could be developed. It is marvelous.
12 |
13 | This GitHub repository is a companion to [Reverse Engineering the source code of the BioNTech/Pfizer SARS-CoV-2
14 | Vaccine](https://berthub.eu/articles/posts/reverse-engineering-source-code-of-the-biontech-pfizer-vaccine/)
15 | and [part
16 | 2](https://berthub.eu/articles/posts/part-2-reverse-engineering-source-code-of-the-biontech-pfizer-vaccine/).
17 |
18 | In part 2 we find the challenge: Can we find an algorithm that turns the
19 | viral RNA into the vaccine RNA?
20 |
21 | If so that would help explain how the vaccine is designed. It would also be
22 | useful for other researchers to turn viral RNA into RNA that gets converted
23 | into proteins efficiently.
24 |
25 | Details are in [part 2](https://berthub.eu/articles/posts/part-2-reverse-engineering-source-code-of-the-biontech-pfizer-vaccine/).
26 |
27 | Data files
28 | ----------
29 |
30 | * [ncov-s.fasta](ncov-s.fasta): the unprocessed RNA of the virus S protein
31 | * [vaccine-s.fasta](ncov-s.fasta): the unprocessed RNA of the vaccine S protein
32 | * [side-by-side.csv](side-by-side.csv): the two files aligned, with virual and vaccine codons side by side
33 |
34 |
35 |
36 |
--------------------------------------------------------------------------------
/3rd-gc.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "encoding/csv"
5 | "fmt"
6 | "log"
7 | "os"
8 | )
9 |
10 |
11 | // from https://stackoverflow.com/questions/24999079/reading-csv-file-in-go
12 | func readCsvFile(filePath string) [][]string {
13 | f, err := os.Open(filePath)
14 | if err != nil {
15 | log.Fatal("Unable to read input file " + filePath, err)
16 | }
17 | defer f.Close()
18 |
19 | csvReader := csv.NewReader(f)
20 | records, err := csvReader.ReadAll()
21 | if err != nil {
22 | log.Fatal("Unable to parse file as CSV for " + filePath, err)
23 | }
24 |
25 | return records
26 | }
27 |
28 |
29 |
30 | func main() {
31 | // Read the codon/amino acid table
32 | codons := readCsvFile("codon-table-grouped.csv")[1:]
33 |
34 | c2s:= make(map[string]string)
35 | for _, element := range codons {
36 | c2s[element[1]]=element[0]
37 | }
38 |
39 |
40 | // read the codons
41 | virvac := readCsvFile("side-by-side.csv")[1:]
42 |
43 | matches := 0.0
44 | for _, element := range virvac {
45 | vir:=element[1]
46 | vac:=element[2]
47 | var our string
48 | var prop string
49 | fmt.Printf("%s v %s, amino: %s == %s. ",
50 | vir, vac,
51 | c2s[vir], c2s[vac])
52 |
53 | // base case, don't do anything
54 | our = vir
55 |
56 | // don't do anything if codon ends on G or C already
57 | if(vir[2] == 'G' || vir[2] =='C') {
58 | fmt.Printf("Codon ended on G or C already, not doing anything.")
59 | } else {
60 | prop = vir[:2]+"G"
61 | fmt.Printf("Attempting G substitution, new candidate '%s'. ", prop)
62 | if(c2s[vir] == c2s[prop]) {
63 | fmt.Printf("Amino acid still the same, done!")
64 | our = prop
65 | } else {
66 | fmt.Printf("Oops, amino acid changed. Trying C, new candidate '%s'. ", prop)
67 | prop = vir[:2]+"C"
68 | if(c2s[vir] == c2s[prop]) {
69 | fmt.Printf("Amino acid still the same, done!")
70 | our=prop
71 | }
72 |
73 | }
74 |
75 | }
76 |
77 | fmt.Printf(" ")
78 | if(vac == our) {
79 | fmt.Printf("Matched the vaccine!\n")
80 | matches++
81 | } else {
82 | fmt.Printf("No match.\n")
83 | }
84 | }
85 | fmt.Printf("%.1f%%\n", 100.0*matches/float64(len(virvac)))
86 |
87 | }
88 |
--------------------------------------------------------------------------------
/vaccine-s.fasta:
--------------------------------------------------------------------------------
1 | >xxx BNT162b2
2 | ATGTTCGTGTTCCTGGTGCTGCTGCCTCTGGTGTCCAGCCAGTGTG
3 | TGAACCTGACCACCAGAACACAGCTGCCTCCAGCCTACACCAACAGCTTT
4 | ACCAGAGGCGTGTACTACCCCGACAAGGTGTTCAGATCCAGCGTGCTGCA
5 | CTCTACCCAGGACCTGTTCCTGCCTTTCTTCAGCAACGTGACCTGGTTCC
6 | ACGCCATCCACGTGTCCGGCACCAATGGCACCAAGAGATTCGACAACCCC
7 | GTGCTGCCCTTCAACGACGGGGTGTACTTTGCCAGCACCGAGAAGTCCAA
8 | CATCATCAGAGGCTGGATCTTCGGCACCACACTGGACAGCAAGACCCAGA
9 | GCCTGCTGATCGTGAACAACGCCACCAACGTGGTCATCAAAGTGTGCGAG
10 | TTCCAGTTCTGCAACGACCCCTTCCTGGGCGTCTACTACCACAAGAACAA
11 | CAAGAGCTGGATGGAAAGCGAGTTCCGGGTGTACAGCAGCGCCAACAACT
12 | GCACCTTCGAGTACGTGTCCCAGCCTTTCCTGATGGACCTGGAAGGCAAG
13 | CAGGGCAACTTCAAGAACCTGCGCGAGTTCGTGTTTAAGAACATCGACGG
14 | CTACTTCAAGATCTACAGCAAGCACACCCCTATCAACCTCGTGCGGGATC
15 | TGCCTCAGGGCTTCTCTGCTCTGGAACCCCTGGTGGATCTGCCCATCGGC
16 | ATCAACATCACCCGGTTTCAGACACTGCTGGCCCTGCACAGAAGCTACCT
17 | GACACCTGGCGATAGCAGCAGCGGATGGACAGCTGGTGCCGCCGCTTACT
18 | ATGTGGGCTACCTGCAGCCTAGAACCTTCCTGCTGAAGTACAACGAGAAC
19 | GGCACCATCACCGACGCCGTGGATTGTGCTCTGGATCCTCTGAGCGAGAC
20 | AAAGTGCACCCTGAAGTCCTTCACCGTGGAAAAGGGCATCTACCAGACCA
21 | GCAACTTCCGGGTGCAGCCCACCGAATCCATCGTGCGGTTCCCCAATATC
22 | ACCAATCTGTGCCCCTTCGGCGAGGTGTTCAATGCCACCAGATTCGCCTC
23 | TGTGTACGCCTGGAACCGGAAGCGGATCAGCAATTGCGTGGCCGACTACT
24 | CCGTGCTGTACAACTCCGCCAGCTTCAGCACCTTCAAGTGCTACGGCGTG
25 | TCCCCTACCAAGCTGAACGACCTGTGCTTCACAAACGTGTACGCCGACAG
26 | CTTCGTGATCCGGGGAGATGAAGTGCGGCAGATTGCCCCTGGACAGACAG
27 | GCAAGATCGCCGACTACAACTACAAGCTGCCCGACGACTTCACCGGCTGT
28 | GTGATTGCCTGGAACAGCAACAACCTGGACTCCAAAGTCGGCGGCAACTA
29 | CAATTACCTGTACCGGCTGTTCCGGAAGTCCAATCTGAAGCCCTTCGAGC
30 | GGGACATCTCCACCGAGATCTATCAGGCCGGCAGCACCCCTTGTAACGGC
31 | GTGGAAGGCTTCAACTGCTACTTCCCACTGCAGTCCTACGGCTTTCAGCC
32 | CACAAATGGCGTGGGCTATCAGCCCTACAGAGTGGTGGTGCTGAGCTTCG
33 | AACTGCTGCATGCCCCTGCCACAGTGTGCGGCCCTAAGAAAAGCACCAAT
34 | CTCGTGAAGAACAAATGCGTGAACTTCAACTTCAACGGCCTGACCGGCAC
35 | CGGCGTGCTGACAGAGAGCAACAAGAAGTTCCTGCCATTCCAGCAGTTTG
36 | GCCGGGATATCGCCGATACCACAGACGCCGTTAGAGATCCCCAGACACTG
37 | GAAATCCTGGACATCACCCCTTGCAGCTTCGGCGGAGTGTCTGTGATCAC
38 | CCCTGGCACCAACACCAGCAATCAGGTGGCAGTGCTGTACCAGGACGTGA
39 | ACTGTACCGAAGTGCCCGTGGCCATTCACGCCGATCAGCTGACACCTACA
40 | TGGCGGGTGTACTCCACCGGCAGCAATGTGTTTCAGACCAGAGCCGGCTG
41 | TCTGATCGGAGCCGAGCACGTGAACAATAGCTACGAGTGCGACATCCCCA
42 | TCGGCGCTGGAATCTGCGCCAGCTACCAGACACAGACAAACAGCCCTCGG
43 | AGAGCCAGAAGCGTGGCCAGCCAGAGCATCATTGCCTACACAATGTCTCT
44 | GGGCGCCGAGAACAGCGTGGCCTACTCCAACAACTCTATCGCTATCCCCA
45 | CCAACTTCACCATCAGCGTGACCACAGAGATCCTGCCTGTGTCCATGACC
46 | AAGACCAGCGTGGACTGCACCATGTACATCTGCGGCGATTCCACCGAGTG
47 | CTCCAACCTGCTGCTGCAGTACGGCAGCTTCTGCACCCAGCTGAATAGAG
48 | CCCTGACAGGGATCGCCGTGGAACAGGACAAGAACACCCAAGAGGTGTTC
49 | GCCCAAGTGAAGCAGATCTACAAGACCCCTCCTATCAAGGACTTCGGCGG
50 | CTTCAATTTCAGCCAGATTCTGCCCGATCCTAGCAAGCCCAGCAAGCGGA
51 | GCTTCATCGAGGACCTGCTGTTCAACAAAGTGACACTGGCCGACGCCGGC
52 | TTCATCAAGCAGTATGGCGATTGTCTGGGCGACATTGCCGCCAGGGATCT
53 | GATTTGCGCCCAGAAGTTTAACGGACTGACAGTGCTGCCTCCTCTGCTGA
54 | CCGATGAGATGATCGCCCAGTACACATCTGCCCTGCTGGCCGGCACAATC
55 | ACAAGCGGCTGGACATTTGGAGCAGGCGCCGCTCTGCAGATCCCCTTTGC
56 | TATGCAGATGGCCTACCGGTTCAACGGCATCGGAGTGACCCAGAATGTGC
57 | TGTACGAGAACCAGAAGCTGATCGCCAACCAGTTCAACAGCGCCATCGGC
58 | AAGATCCAGGACAGCCTGAGCAGCACAGCAAGCGCCCTGGGAAAGCTGCA
59 | GGACGTGGTCAACCAGAATGCCCAGGCACTGAACACCCTGGTCAAGCAGC
60 | TGTCCTCCAACTTCGGCGCCATCAGCTCTGTGCTGAACGATATCCTGAGC
61 | AGACTGGACCCTCCTGAGGCCGAGGTGCAGATCGACAGACTGATCACAGG
62 | CAGACTGCAGAGCCTCCAGACATACGTGACCCAGCAGCTGATCAGAGCCG
63 | CCGAGATTAGAGCCTCTGCCAATCTGGCCGCCACCAAGATGTCTGAGTGT
64 | GTGCTGGGCCAGAGCAAGAGAGTGGACTTTTGCGGCAAGGGCTACCACCT
65 | GATGAGCTTCCCTCAGTCTGCCCCTCACGGCGTGGTGTTTCTGCACGTGA
66 | CATATGTGCCCGCTCAAGAGAAGAATTTCACCACCGCTCCAGCCATCTGC
67 | CACGACGGCAAAGCCCACTTTCCTAGAGAAGGCGTGTTCGTGTCCAACGG
68 | CACCCATTGGTTCGTGACACAGCGGAACTTCTACGAGCCCCAGATCATCA
69 | CCACCGACAACACCTTCGTGTCTGGCAACTGCGACGTCGTGATCGGCATT
70 | GTGAACAATACCGTGTACGACCCTCTGCAGCCCGAGCTGGACAGCTTCAA
71 | AGAGGAACTGGACAAGTACTTTAAGAACCACACAAGCCCCGACGTGGACC
72 | TGGGCGATATCAGCGGAATCAATGCCAGCGTCGTGAACATCCAGAAAGAG
73 | ATCGACCGGCTGAACGAGGTGGCCAAGAATCTGAACGAGAGCCTGATCGA
74 | CCTGCAAGAACTGGGGAAGTACGAGCAGTACATCAAGTGGCCCTGGTACA
75 | TCTGGCTGGGCTTTATCGCCGGACTGATTGCCATCGTGATGGTCACAATC
76 | ATGCTGTGTTGCATGACCAGCTGCTGTAGCTGCCTGAAGGGCTGTTGTAG
77 | CTGTGGCAGCTGCTGCAAGTTCGACGAGGACGATTCTGAGCCCGTGCTGA
78 | AGGGCGTGAAACTGCACTACACATGA
79 |
--------------------------------------------------------------------------------
/ncov-s.fasta:
--------------------------------------------------------------------------------
1 | >lcl|NC_045512.2_cds_YP_009724390.1_3 [gene=S] [locus_tag=GU280_gp02] [db_xref=GeneID:43740568] [protein=surface glycoprotein] [protein_id=YP_009724390.1] [location=21563..25384] [gbkey=CDS]
2 | ATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAATCTTACAACCAGAACTCAAT
3 | TACCCCCTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCAGT
4 | TTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCCAATGTTACTTGGTTCCATGCTATACATGTC
5 | TCTGGGACCAATGGTACTAAGAGGTTTGATAACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGCTT
6 | CCACTGAGAAGTCTAACATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCT
7 | ACTTATTGTTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATGATCCATTT
8 | TTGGGTGTTTATTACCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTATTCTAGTGCGA
9 | ATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTAATTTCAA
10 | AAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAATATATTCTAAGCACACGCCTATT
11 | AATTTAGTGCGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTA
12 | ACATCACTAGGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCAGG
13 | TTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATAAT
14 | GAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTTGA
15 | AATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTCCAACCAACAGAATCTATTGT
16 | TAGATTTCCTAATATTACAAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCCACCAGATTTGCATCTGTT
17 | TATGCTTGGAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATCAT
18 | TTTCCACTTTTAAGTGTTATGGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGC
19 | AGATTCATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAAGATTGCTGAT
20 | TATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAATCTTGATTCTA
21 | AGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGAGA
22 | TATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGTAATGGTGTTGAAGGTTTTAATTGTTACTTT
23 | CCTTTACAATCATATGGTTTCCAACCCACTAATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACTTT
24 | CTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAA
25 | ATGTGTCAATTTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTG
26 | CCTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAGACACTTGAGA
27 | TTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAATACTTCTAACCA
28 | GGTTGCTGTTCTTTATCAGGATGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAACTTACT
29 | CCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCTG
30 | AACATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCA
31 | GACTAATTCTCCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGT
32 | GCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATTAGTGTTACCA
33 | CAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATGTACATTTGTGGTGATTCAAC
34 | TGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTACACAATTAAACCGTGCTTTAACTGGAATA
35 | GCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAACAAATTTACAAAACACCACCAA
36 | TTAAAGATTTTGGTGGTTTTAATTTTTCACAAATATTACCAGATCCATCAAAACCAAGCAAGAGGTCATT
37 | TATTGAAGATCTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATTGC
38 | CTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTTGCCACCTT
39 | TGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGGAC
40 | CTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCTTATAGGTTTAATGGTATTGGA
41 | GTTACACAGAATGTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAA
42 | TTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCAAAATGCACA
43 | AGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATATC
44 | CTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAGTT
45 | TGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCTAC
46 | TAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTATG
47 | TCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAAGA
48 | ACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTC
49 | AAATGGCACACACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACACA
50 | TTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCTTTGCAACCTG
51 | AATTAGACTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACATCACCAGATGTTGATTTAGG
52 | TGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCAATGAGGTTGCC
53 | AAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCCAT
54 | GGTACATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTAT
55 | GACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGAC
56 | TCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACATAA
57 |
--------------------------------------------------------------------------------
/part-2-reverse-engineering-source-code-of-the-biontech-pfizer-vaccine.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Reverse Engineering Source Code of the Biontech Pfizer Vaccine: Part 2"
3 | date: 2020-12-31T12:22:03+01:00
4 | draft: false
5 | images:
6 | - dna-codon-table.png
7 | ---
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 | All BNT162b2 vaccine data on this page is sourced from this [World Health
16 | Organization
17 | document](https://mednet-communities.net/inn/db/media/docs/11889.doc).
18 |
19 | > This is a living page, shared already so people can get going! But
20 | > check back frequently for updates.
21 |
22 | In short: the vaccine mRNA has been optimized by the manufacturer by
23 | changing bits of RNA from (say) `UUU` to `UUC`, and people would like to
24 | understand the logic behind these changes. This challenge is quite close to what
25 | cryptologists and reverse engineering people encounter regularly. On this
26 | page, you'll find all the details you need to get cracking to reverse
27 | engineer just HOW the vaccine has been optimized.
28 |
29 | I thought this would just be a fun puzzle, but I have just been informed that
30 | figuring out the optimization procedure & documenting it is tremendously
31 | important for researchers around the world, as this would help them design
32 | code for proteins and vaccines.
33 |
34 | So, if you want to help vaccine research, do read on!
35 |
36 | The leader board
37 | ----------------
38 | Here are the current best entrants to the optimization algorithm (average of 20 runs):
39 |
40 |
41 | Name Codon Match Nucleotide Match Author Comment
42 |
43 | codon mapping
44 | 79.51 %
45 | 91.52 %
46 | Harry Harpel
47 | A simple static codon mapping
48 |
49 |
50 | most-frequent.py
51 | 78.57 %
52 | 91.08 %
53 | Seo Sanghyeon
54 | Codon frequency optimization using python_codon_tables
55 |
56 |
57 | dnachisel
58 | 76.99 %
59 | 91.06 %
60 | Erik Brauer
61 | DNAChisel algorithm
62 |
63 |
64 | dnachisel
65 | 76.89 %
66 | 90.89 %
67 | Pedro José Pereira Vieito
68 | DNAChisel algorithm
69 |
70 |
71 | remap
72 | 71.11 %
73 | 88.59 %
74 | Howard Chu
75 | Map every codon to an amino acid, pick the best codon for that amino acid
76 |
77 |
78 | 3rd-cg.py
79 | 60.83 %
80 | 85.11 %
81 | Peter Kuhar
82 | If third position is already 'G' or 'C', no change. Otherwise replace third position by a C, if protein still matches, done. Otherwise try a G.
83 |
84 |
85 | 3rd-gc.go
86 | 53.06 %
87 | 81.55 %
88 | bert hubert
89 | If third position is already 'G' or 'C', no change. Otherwise replace third position by a G, if protein still matches, done. Otherwise try a C.
90 |
91 |
92 | dnachisel
93 | 46.33 %
94 | 79.48 %
95 | Naomi Jacobs
96 | DNAChisel algorithm
97 |
98 |
99 | NOP
100 | 27.63 %
101 | 72.23 %
102 |
103 | Does not do any optimization at all
104 |
105 |
106 |
107 | Please send updates to bert@hubertnet.nl or
108 | [@PowerDNS_Bert](https://twitter.com/PowerDNS_Bert).
109 |
110 |
111 | BioNTech
112 | --------
113 | We should all be very grateful that BioNTech has shared this data with us.
114 | And of course we should also be grateful to the many many researchers and
115 | lab workers that worked for decades to bring the state of the art to the
116 | point that such a vaccine could be developed. It is marvelous.
117 |
118 | Because it is so marvelous, I want to understand everything about the
119 | vaccine. I wrote a page [Reverse Engineering the source code of the BioNTech/Pfizer SARS-CoV-2
120 | Vaccine](https://berthub.eu/articles/posts/reverse-engineering-source-code-of-the-biontech-pfizer-vaccine/)
121 | that describes in some detail what is in the mRNA of the vaccine. It helps
122 | to read this page before continuing, I promise you it will be interesting.
123 |
124 | The post left open some questions however, and this is where it gets
125 | fascinating.
126 |
127 | The codon optimization
128 | ----------------------
129 | The vaccine contains RNA code for a very *slightly* modified copy of the
130 | SARS-CoV-2 S protein.
131 |
132 | The RNA code of the vaccine itself however is *highly* modified from the viral original!
133 | This has been done by the manufacturer, based on their understanding of
134 | nature.
135 |
136 | And from what we understand, these modifications make the vaccine **much
137 | much more** effective. It would be a lot of fun to understand these
138 | modifications. It might for example explain why the Moderna vaccine needs
139 | 100 micrograms and the BioNTech vaccine only 30 micrograms.
140 |
141 | Here is the beginning of the S protein in both the virus and the BNT162b2
142 | vaccine RNA code. Exclamation marks denote differences.
143 |
144 | ```
145 | Virus: AUG UUU GUU UUU CUU GUU UUA UUG CCA CUA GUC UCU AGU CAG UGU GUU
146 | Vaccine: AUG UUC GUG UUC CUG GUG CUG CUG CCU CUG GUG UCC AGC CAG UGU GUU
147 | ! ! ! ! ! ! ! ! ! ! ! ! !
148 | ```
149 |
150 | RNA is a string (literally) of RNA characters, `A`, `C`, `G` and `U`. There is no
151 | physical framing on there, but it makes sense to analyse it in groups of
152 | three.
153 |
154 | Each group (called a codon) maps to an amino acid (denoted by a capital
155 | letter). A string of amino acids is a protein. Here is what that looks
156 | like:
157 |
158 | ```
159 | Virus: AUG UUU GUU UUU CUU GUU UUA UUG CCA CUA GUC UCU AGU CAG UGU GUU
160 | M F V F L V L L P L V S S Q C V
161 | Vaccine: AUG UUC GUG UUC CUG GUG CUG CUG CCU CUG GUG UCC AGC CAG UGU GUU
162 | ! ! ! ! ! ! ! ! ! ! ! ! !
163 | ```
164 |
165 | Here we can see that while the codons are different, the amino acid version
166 | is the same. There are 4*4*4 codons but only 20 amino acids. This means you
167 | can typically change every codon into one of two others, and still code for
168 | the same amino acid.
169 |
170 | So in the second codon, `UUU` was changed to `UUC`. This is a net addition
171 | of one 'C' to the vaccine. The third codon changed from `GUU` to `GUG`, which is
172 | a net addition of one `G`.
173 |
174 | **It is known that a higher fraction of `G` and `C` characters improves the
175 | efficiency of an mRNA vaccine**.
176 |
177 | Now, if that was all there was to it, this could be the end of this page.
178 | "The algorithm is change codons so we get more G and C in there". But then
179 | we meet the 9th codon which changes `CCA` to `CCU`.
180 |
181 | Throughout the ~4000 characters of the vaccine, this happens many times.
182 |
183 | Our challenge
184 | -------------
185 | The goal is: find an algorithm that modifies the 'wild type' RNA code into
186 | the BNT162b2 one. Because everyone would like to understand how to turn
187 | viral RNA into an effective vaccine. The algorithm does not need to
188 | reproduce the _exact_ RNA code of course, but it would be super nice if it
189 | came up with something very similar, while also being brief.
190 |
191 | To help you, I have provided the data in a number of forms, as described on
192 | [the GitHub page](https://github.com/berthubert/bnt162b2).
193 |
194 | > Note that in these files the `U` mentioned above appears as a `T`. `U` and
195 | > `T` are the RNA and DNA manifestations of the same information.
196 |
197 | The easiest place to start might be the
198 | '[side-by-side.csv](https://github.com/berthubert/bnt162b2/blob/master/side-by-side.csv)'
199 | file. This lists the original and modified version of each codon, side by
200 | side:
201 |
202 | ```
203 | abspos,codonOrig,codonVaccine
204 | 0,ATG,ATG
205 | 3,TTT,TTC
206 | 6,GTT,GTG
207 | ...
208 | 3813,TAC,TAC
209 | 3816,ACA,ACA
210 | 3819,TAA,TGA
211 | ```
212 |
213 | There is also an equivalency table that shows wich codons can be
214 | interchanged without changing the amino acid output. Please find this in
215 | [codon-table-grouped.csv](https://github.com/berthubert/bnt162b2/blob/master/codon-table-grouped.csv).
216 | There is also a visual version
217 | [here](https://en.wikipedia.org/wiki/DNA_and_RNA_codon_tables#Standard_DNA_codon_table).
218 |
219 | A sample algorithm
220 | ------------------
221 | On the [GitHub repository](https://github.com/berthubert/bnt162b2) you can
222 | find
223 | [3rd-gc.gp](https://github.com/berthubert/bnt162b2/blob/master/3rd-gc.go).
224 |
225 | This implements a simple strategy that works like this:
226 |
227 | * If a virus codon already ended on G or C, copy it to the vaccine mRNA
228 | * If not, replace last nucleotide in codon by a G, see if the amino acid
229 | still matches, if so, copy to the vaccine mRNA
230 | * Try the same with a C
231 | * Otherwise copy as is
232 |
233 | Or in `golang`:
234 |
235 | ```
236 | // base case, don't do anything
237 | our = vir
238 |
239 | // don't do anything if codon ends on G or C already
240 | if(vir[2] == 'G' || vir[2] =='C') {
241 | fmt.Printf("Codon ended on G or C already, not doing anything.")
242 | } else {
243 | prop = vir[:2]+"G"
244 | fmt.Printf("Attempting G substitution, new candidate '%s'. ", prop)
245 | if(c2s[vir] == c2s[prop]) {
246 | fmt.Printf("Amino acid still the same, done!")
247 | our = prop
248 | } else {
249 | fmt.Printf("Oops, amino acid changed. Trying C, new candidate '%s'. ", prop)
250 | prop = vir[:2]+"C"
251 | if(c2s[vir] == c2s[prop]) {
252 | fmt.Printf("Amino acid still the same, done!")
253 | our=prop
254 | }
255 |
256 | }
257 |
258 | }
259 | ```
260 |
261 | This achieves a rather poor 53.1% match with the BioNTech RNA vaccine, but
262 | it is a start.
263 |
264 | When you design your algorithm, be sure to only base your choices on the
265 | virus RNA. Do not peak into the BioNTech RNA!
266 |
267 | If you have achieved a score beyond 53.1% please email a link to your code
268 | to bert@hubertnet.nl (or [@PowerDNS_Bert](https://twitter.com/PowerDNS_Bert)
269 | and I'll put it on the leader board at the top of this page!
270 |
271 |
272 | Things that will help
273 | ---------------------
274 | As with every form of reverse engineering or cryptanalysis, it helps to
275 | understand what we are looking at.
276 |
277 | GC ratio
278 | --------
279 | We know that one goal of the 'codon optimization' is to get more `C`s and
280 | `G`s into the vaccine version of the RNA. However, there is also a limit to
281 | that. In DNA, which is also used to manufacture the vaccine, `G` and `C`
282 | bind together strongly, to the point that if you put too many of these
283 | 'nucleotides' in there, the DNA will no longer be replicated efficiently.
284 |
285 | So some modifications may actually happen to manage *down* the GC percentage of a
286 | stretch of DNA if it was getting too high.
287 |
288 | I [tweeted about this](https://twitter.com/PowerDNS_Bert/status/1344036143961169920) earlier.
289 |
290 | Codon optimization
291 | ------------------
292 | Some codons are rare in human DNA, or in certain cells. It may be that some
293 | codons are replaced by other ones simply because they are more frequently
294 | used by some cells.
295 |
296 | I [tweeted about this](https://twitter.com/PowerDNS_Bert/status/1344400081802448897)
297 | earlier.
298 |
299 | RNA folding
300 | -----------
301 | We've been looking at codons up to here. The RNA itself however does not
302 | know about codons, there are no markers that say where a codon begins and
303 | ends. The first codon on a protein however is always ATG (or AUG in RNA).
304 |
305 | RNA curls up into a shape. This shape might help evade the immune system or
306 | it might improve translation into amino acids. This only depends on the
307 | sequence of RNA nucleotides and not on specific codons.
308 |
309 | You can submit RNA sequences to [this server of the Institute for
310 | Theoretical Chemistry at the University of
311 | Vienna](http://rna.tbi.univie.ac.at/cgi-bin/RNAWebSuite/RNAfold.cgi) and it
312 | will fold RNA for you. This is a very advanced server that does meticulous
313 | calculations.
314 |
315 | This [Wikipedia
316 | page](https://en.wikipedia.org/wiki/Nucleic_acid_structure_prediction)
317 | describes how this works.
318 |
319 | It may be that some optimizations improve folding.
320 |
321 | I am also told that this paper by Moderna (another mRNA vaccine
322 | manufacturer) may be relevant:
323 | [mRNA structure regulates protein expression through changes in functional
324 | half-life](https://www.pnas.org/content/116/48/24075).
325 |
--------------------------------------------------------------------------------
/combined-codons-s-protein.csv:
--------------------------------------------------------------------------------
1 | orig,cvac,bntc
2 | AUG,AUG,AUG
3 | UUU,UUC,UUC
4 | GUU,GUG,GUG
5 | UUU,UUC,UUC
6 | CUU,CUG,CUG
7 | GUU,GUC,GUG
8 | UUA,CUC,CUG
9 | UUG,CUG,CUG
10 | CCA,CCC,CCU
11 | CUA,CUG,CUG
12 | GUC,GUG,GUG
13 | UCU,AGC,UCC
14 | AGU,UCC,AGC
15 | CAG,CAG,CAG
16 | UGU,UGC,UGU
17 | GUU,GUG,GUG
18 | AAU,AAC,AAC
19 | CUU,CUC,CUG
20 | ACA,ACC,ACC
21 | ACC,ACC,ACC
22 | AGA,CGC,AGA
23 | ACU,ACG,ACA
24 | CAA,CAG,CAG
25 | UUA,CUG,CUG
26 | CCC,CCG,CCU
27 | CCU,CCC,CCA
28 | GCA,GCC,GCC
29 | UAC,UAC,UAC
30 | ACU,ACC,ACC
31 | AAU,AAC,AAC
32 | UCU,AGC,AGC
33 | UUC,UUC,UUU
34 | ACA,ACC,ACC
35 | CGU,CGG,AGA
36 | GGU,GGC,GGC
37 | GUU,GUC,GUG
38 | UAU,UAC,UAC
39 | UAC,UAC,UAC
40 | CCU,CCC,CCC
41 | GAC,GAC,GAC
42 | AAA,AAG,AAG
43 | GUU,GUG,GUG
44 | UUC,UUC,UUC
45 | AGA,CGC,AGA
46 | UCC,UCC,UCC
47 | UCA,AGC,AGC
48 | GUU,GUG,GUG
49 | UUA,CUG,CUG
50 | CAU,CAC,CAC
51 | UCA,UCC,UCU
52 | ACU,ACC,ACC
53 | CAG,CAG,CAG
54 | GAC,GAC,GAC
55 | UUG,CUC,CUG
56 | UUC,UUC,UUC
57 | UUA,CUG,CUG
58 | CCU,CCC,CCU
59 | UUC,UUC,UUC
60 | UUU,UUC,UUC
61 | UCC,AGC,AGC
62 | AAU,AAC,AAC
63 | GUU,GUC,GUG
64 | ACU,ACG,ACC
65 | UGG,UGG,UGG
66 | UUC,UUC,UUC
67 | CAU,CAC,CAC
68 | GCU,GCC,GCC
69 | AUA,AUC,AUC
70 | CAU,CAC,CAC
71 | GUC,GUG,GUG
72 | UCU,UCC,UCC
73 | GGG,GGG,GGC
74 | ACC,ACC,ACC
75 | AAU,AAC,AAU
76 | GGU,GGC,GGC
77 | ACU,ACC,ACC
78 | AAG,AAG,AAG
79 | AGG,CGG,AGA
80 | UUU,UUC,UUC
81 | GAU,GAC,GAC
82 | AAC,AAC,AAC
83 | CCU,CCG,CCC
84 | GUC,GUG,GUG
85 | CUA,CUG,CUG
86 | CCA,CCC,CCC
87 | UUU,UUC,UUC
88 | AAU,AAC,AAC
89 | GAU,GAC,GAC
90 | GGU,GGG,GGG
91 | GUU,GUC,GUG
92 | UAU,UAC,UAC
93 | UUU,UUC,UUU
94 | GCU,GCG,GCC
95 | UCC,AGC,AGC
96 | ACU,ACC,ACC
97 | GAG,GAG,GAG
98 | AAG,AAG,AAG
99 | UCU,UCC,UCC
100 | AAC,AAC,AAC
101 | AUA,AUC,AUC
102 | AUA,AUC,AUC
103 | AGA,CGC,AGA
104 | GGC,GGC,GGC
105 | UGG,UGG,UGG
106 | AUU,AUC,AUC
107 | UUU,UUC,UUC
108 | GGU,GGG,GGC
109 | ACU,ACG,ACC
110 | ACU,ACC,ACA
111 | UUA,CUC,CUG
112 | GAU,GAC,GAC
113 | UCG,AGC,AGC
114 | AAG,AAG,AAG
115 | ACC,ACC,ACC
116 | CAG,CAG,CAG
117 | UCC,UCC,AGC
118 | CUA,CUG,CUG
119 | CUU,CUG,CUG
120 | AUU,AUC,AUC
121 | GUU,GUG,GUG
122 | AAU,AAC,AAC
123 | AAC,AAC,AAC
124 | GCU,GCC,GCC
125 | ACU,ACC,ACC
126 | AAU,AAC,AAC
127 | GUU,GUG,GUG
128 | GUU,GUC,GUC
129 | AUU,AUC,AUC
130 | AAA,AAG,AAA
131 | GUC,GUG,GUG
132 | UGU,UGC,UGC
133 | GAA,GAG,GAG
134 | UUU,UUC,UUC
135 | CAA,CAG,CAG
136 | UUU,UUC,UUC
137 | UGU,UGC,UGC
138 | AAU,AAC,AAC
139 | GAU,GAC,GAC
140 | CCA,CCC,CCC
141 | UUU,UUC,UUC
142 | UUG,CUC,CUG
143 | GGU,GGC,GGC
144 | GUU,GUG,GUC
145 | UAU,UAC,UAC
146 | UAC,UAC,UAC
147 | CAC,CAC,CAC
148 | AAA,AAG,AAG
149 | AAC,AAC,AAC
150 | AAC,AAC,AAC
151 | AAA,AAG,AAG
152 | AGU,AGC,AGC
153 | UGG,UGG,UGG
154 | AUG,AUG,AUG
155 | GAA,GAG,GAA
156 | AGU,UCC,AGC
157 | GAG,GAG,GAG
158 | UUC,UUC,UUC
159 | AGA,CGG,CGG
160 | GUU,GUC,GUG
161 | UAU,UAC,UAC
162 | UCU,AGC,AGC
163 | AGU,UCC,AGC
164 | GCG,GCC,GCC
165 | AAU,AAC,AAC
166 | AAU,AAC,AAC
167 | UGC,UGC,UGC
168 | ACU,ACG,ACC
169 | UUU,UUC,UUC
170 | GAA,GAG,GAG
171 | UAU,UAC,UAC
172 | GUC,GUG,GUG
173 | UCU,AGC,UCC
174 | CAG,CAG,CAG
175 | CCU,CCC,CCU
176 | UUU,UUC,UUC
177 | CUU,CUG,CUG
178 | AUG,AUG,AUG
179 | GAC,GAC,GAC
180 | CUU,CUG,CUG
181 | GAA,GAG,GAA
182 | GGA,GGC,GGC
183 | AAA,AAG,AAG
184 | CAG,CAG,CAG
185 | GGU,GGG,GGC
186 | AAU,AAC,AAC
187 | UUC,UUC,UUC
188 | AAA,AAG,AAG
189 | AAU,AAC,AAC
190 | CUU,CUC,CUG
191 | AGG,CGC,CGC
192 | GAA,GAG,GAG
193 | UUU,UUC,UUC
194 | GUG,GUG,GUG
195 | UUU,UUC,UUU
196 | AAG,AAG,AAG
197 | AAU,AAC,AAC
198 | AUU,AUC,AUC
199 | GAU,GAC,GAC
200 | GGU,GGC,GGC
201 | UAU,UAC,UAC
202 | UUU,UUC,UUC
203 | AAA,AAG,AAG
204 | AUA,AUC,AUC
205 | UAU,UAC,UAC
206 | UCU,UCC,AGC
207 | AAG,AAG,AAG
208 | CAC,CAC,CAC
209 | ACG,ACC,ACC
210 | CCU,CCG,CCU
211 | AUU,AUC,AUC
212 | AAU,AAC,AAC
213 | UUA,CUG,CUC
214 | GUG,GUC,GUG
215 | CGU,CGG,CGG
216 | GAU,GAC,GAU
217 | CUC,CUG,CUG
218 | CCU,CCC,CCU
219 | CAG,CAG,CAG
220 | GGU,GGG,GGC
221 | UUU,UUC,UUC
222 | UCG,AGC,UCU
223 | GCU,GCC,GCU
224 | UUA,CUC,CUG
225 | GAA,GAG,GAA
226 | CCA,CCC,CCC
227 | UUG,CUG,CUG
228 | GUA,GUG,GUG
229 | GAU,GAC,GAU
230 | UUG,CUG,CUG
231 | CCA,CCC,CCC
232 | AUA,AUC,AUC
233 | GGU,GGC,GGC
234 | AUU,AUC,AUC
235 | AAC,AAC,AAC
236 | AUC,AUC,AUC
237 | ACU,ACC,ACC
238 | AGG,CGC,CGG
239 | UUU,UUC,UUU
240 | CAA,CAG,CAG
241 | ACU,ACC,ACA
242 | UUA,CUC,CUG
243 | CUU,CUG,CUG
244 | GCU,GCC,GCC
245 | UUA,CUG,CUG
246 | CAU,CAC,CAC
247 | AGA,CGG,AGA
248 | AGU,UCC,AGC
249 | UAU,UAC,UAC
250 | UUG,CUC,CUG
251 | ACU,ACG,ACA
252 | CCU,CCG,CCU
253 | GGU,GGG,GGC
254 | GAU,GAC,GAU
255 | UCU,AGC,AGC
256 | UCU,AGC,AGC
257 | UCA,UCC,AGC
258 | GGU,GGC,GGA
259 | UGG,UGG,UGG
260 | ACA,ACC,ACA
261 | GCU,GCG,GCU
262 | GGU,GGC,GGU
263 | GCU,GCC,GCC
264 | GCA,GCC,GCC
265 | GCU,GCC,GCU
266 | UAU,UAC,UAC
267 | UAU,UAC,UAU
268 | GUG,GUG,GUG
269 | GGU,GGG,GGC
270 | UAU,UAC,UAC
271 | CUU,CUG,CUG
272 | CAA,CAG,CAG
273 | CCU,CCC,CCU
274 | AGG,CGC,AGA
275 | ACU,ACC,ACC
276 | UUU,UUC,UUC
277 | CUA,CUG,CUG
278 | UUA,CUC,CUG
279 | AAA,AAG,AAG
280 | UAU,UAC,UAC
281 | AAU,AAC,AAC
282 | GAA,GAG,GAG
283 | AAU,AAC,AAC
284 | GGA,GGC,GGC
285 | ACC,ACC,ACC
286 | AUU,AUC,AUC
287 | ACA,ACG,ACC
288 | GAU,GAC,GAC
289 | GCU,GCG,GCC
290 | GUA,GUC,GUG
291 | GAC,GAC,GAU
292 | UGU,UGC,UGU
293 | GCA,GCC,GCU
294 | CUU,CUG,CUG
295 | GAC,GAC,GAU
296 | CCU,CCC,CCU
297 | CUC,CUG,CUG
298 | UCA,AGC,AGC
299 | GAA,GAG,GAG
300 | ACA,ACC,ACA
301 | AAG,AAG,AAG
302 | UGU,UGC,UGC
303 | ACG,ACC,ACC
304 | UUG,CUC,CUG
305 | AAA,AAG,AAG
306 | UCC,UCC,UCC
307 | UUC,UUC,UUC
308 | ACU,ACC,ACC
309 | GUA,GUG,GUG
310 | GAA,GAG,GAA
311 | AAA,AAG,AAG
312 | GGA,GGG,GGC
313 | AUC,AUC,AUC
314 | UAU,UAC,UAC
315 | CAA,CAG,CAG
316 | ACU,ACG,ACC
317 | UCU,AGC,AGC
318 | AAC,AAC,AAC
319 | UUU,UUC,UUC
320 | AGA,CGG,CGG
321 | GUC,GUG,GUG
322 | CAA,CAG,CAG
323 | CCA,CCC,CCC
324 | ACA,ACC,ACC
325 | GAA,GAG,GAA
326 | UCU,UCC,UCC
327 | AUU,AUC,AUC
328 | GUU,GUC,GUG
329 | AGA,CGC,CGG
330 | UUU,UUC,UUC
331 | CCU,CCG,CCC
332 | AAU,AAC,AAU
333 | AUU,AUC,AUC
334 | ACA,ACC,ACC
335 | AAC,AAC,AAU
336 | UUG,CUG,CUG
337 | UGC,UGC,UGC
338 | CCU,CCC,CCC
339 | UUU,UUC,UUC
340 | GGU,GGC,GGC
341 | GAA,GAG,GAG
342 | GUU,GUG,GUG
343 | UUU,UUC,UUC
344 | AAC,AAC,AAU
345 | GCC,GCC,GCC
346 | ACC,ACC,ACC
347 | AGA,CGG,AGA
348 | UUU,UUC,UUC
349 | GCA,GCC,GCC
350 | UCU,AGC,UCU
351 | GUU,GUC,GUG
352 | UAU,UAC,UAC
353 | GCU,GCC,GCC
354 | UGG,UGG,UGG
355 | AAC,AAC,AAC
356 | AGG,CGC,CGG
357 | AAG,AAG,AAG
358 | AGA,CGG,CGG
359 | AUC,AUC,AUC
360 | AGC,UCC,AGC
361 | AAC,AAC,AAU
362 | UGU,UGC,UGC
363 | GUU,GUG,GUG
364 | GCU,GCG,GCC
365 | GAU,GAC,GAC
366 | UAU,UAC,UAC
367 | UCU,AGC,UCC
368 | GUC,GUG,GUG
369 | CUA,CUG,CUG
370 | UAU,UAC,UAC
371 | AAU,AAC,AAC
372 | UCC,UCC,UCC
373 | GCA,GCC,GCC
374 | UCA,AGC,AGC
375 | UUU,UUC,UUC
376 | UCC,UCC,AGC
377 | ACU,ACG,ACC
378 | UUU,UUC,UUC
379 | AAG,AAG,AAG
380 | UGU,UGC,UGC
381 | UAU,UAC,UAC
382 | GGA,GGC,GGC
383 | GUG,GUC,GUG
384 | UCU,AGC,UCC
385 | CCU,CCC,CCU
386 | ACU,ACC,ACC
387 | AAA,AAG,AAG
388 | UUA,CUC,CUG
389 | AAU,AAC,AAC
390 | GAU,GAC,GAC
391 | CUC,CUG,CUG
392 | UGC,UGC,UGC
393 | UUU,UUC,UUC
394 | ACU,ACC,ACA
395 | AAU,AAC,AAC
396 | GUC,GUG,GUG
397 | UAU,UAC,UAC
398 | GCA,GCC,GCC
399 | GAU,GAC,GAC
400 | UCA,UCC,AGC
401 | UUU,UUC,UUC
402 | GUA,GUG,GUG
403 | AUU,AUC,AUC
404 | AGA,CGC,CGG
405 | GGU,GGG,GGA
406 | GAU,GAC,GAU
407 | GAA,GAG,GAA
408 | GUC,GUC,GUG
409 | AGA,CGG,CGG
410 | CAA,CAG,CAG
411 | AUC,AUC,AUU
412 | GCU,GCC,GCC
413 | CCA,CCC,CCU
414 | GGG,GGC,GGA
415 | CAA,CAG,CAG
416 | ACU,ACC,ACA
417 | GGA,GGG,GGC
418 | AAG,AAG,AAG
419 | AUU,AUC,AUC
420 | GCU,GCC,GCC
421 | GAU,GAC,GAC
422 | UAU,UAC,UAC
423 | AAU,AAC,AAC
424 | UAU,UAC,UAC
425 | AAA,AAG,AAG
426 | UUA,CUG,CUG
427 | CCA,CCG,CCC
428 | GAU,GAC,GAC
429 | GAU,GAC,GAC
430 | UUU,UUC,UUC
431 | ACA,ACG,ACC
432 | GGC,GGC,GGC
433 | UGC,UGC,UGU
434 | GUU,GUG,GUG
435 | AUA,AUC,AUU
436 | GCU,GCG,GCC
437 | UGG,UGG,UGG
438 | AAU,AAC,AAC
439 | UCU,AGC,AGC
440 | AAC,AAC,AAC
441 | AAU,AAC,AAC
442 | CUU,CUC,CUG
443 | GAU,GAC,GAC
444 | UCU,UCC,UCC
445 | AAG,AAG,AAA
446 | GUU,GUC,GUC
447 | GGU,GGG,GGC
448 | GGU,GGC,GGC
449 | AAU,AAC,AAC
450 | UAU,UAC,UAC
451 | AAU,AAC,AAU
452 | UAC,UAC,UAC
453 | CUG,CUG,CUG
454 | UAU,UAC,UAC
455 | AGA,CGC,CGG
456 | UUG,CUG,CUG
457 | UUU,UUC,UUC
458 | AGG,CGC,CGG
459 | AAG,AAG,AAG
460 | UCU,AGC,UCC
461 | AAU,AAC,AAU
462 | CUC,CUC,CUG
463 | AAA,AAG,AAG
464 | CCU,CCC,CCC
465 | UUU,UUC,UUC
466 | GAG,GAG,GAG
467 | AGA,CGG,CGG
468 | GAU,GAC,GAC
469 | AUU,AUC,AUC
470 | UCA,UCC,UCC
471 | ACU,ACC,ACC
472 | GAA,GAG,GAG
473 | AUC,AUC,AUC
474 | UAU,UAC,UAU
475 | CAG,CAG,CAG
476 | GCC,GCC,GCC
477 | GGU,GGC,GGC
478 | AGC,AGC,AGC
479 | ACA,ACC,ACC
480 | CCU,CCC,CCU
481 | UGU,UGC,UGU
482 | AAU,AAC,AAC
483 | GGU,GGG,GGC
484 | GUU,GUG,GUG
485 | GAA,GAG,GAA
486 | GGU,GGC,GGC
487 | UUU,UUC,UUC
488 | AAU,AAC,AAC
489 | UGU,UGC,UGC
490 | UAC,UAC,UAC
491 | UUU,UUC,UUC
492 | CCU,CCC,CCA
493 | UUA,CUG,CUG
494 | CAA,CAG,CAG
495 | UCA,UCC,UCC
496 | UAU,UAC,UAC
497 | GGU,GGG,GGC
498 | UUC,UUC,UUU
499 | CAA,CAG,CAG
500 | CCC,CCG,CCC
501 | ACU,ACC,ACA
502 | AAU,AAC,AAU
503 | GGU,GGC,GGC
504 | GUU,GUG,GUG
505 | GGU,GGG,GGC
506 | UAC,UAC,UAU
507 | CAA,CAG,CAG
508 | CCA,CCC,CCC
509 | UAC,UAC,UAC
510 | AGA,CGG,AGA
511 | GUA,GUG,GUG
512 | GUA,GUC,GUG
513 | GUA,GUG,GUG
514 | CUU,CUG,CUG
515 | UCU,AGC,AGC
516 | UUU,UUC,UUC
517 | GAA,GAG,GAA
518 | CUU,CUC,CUG
519 | CUA,CUG,CUG
520 | CAU,CAC,CAU
521 | GCA,GCC,GCC
522 | CCA,CCC,CCU
523 | GCA,GCC,GCC
524 | ACU,ACG,ACA
525 | GUU,GUG,GUG
526 | UGU,UGC,UGC
527 | GGA,GGC,GGC
528 | CCU,CCC,CCU
529 | AAA,AAG,AAG
530 | AAG,AAG,AAA
531 | UCU,UCC,AGC
532 | ACU,ACC,ACC
533 | AAU,AAC,AAU
534 | UUG,CUG,CUC
535 | GUU,GUC,GUG
536 | AAA,AAG,AAG
537 | AAC,AAC,AAC
538 | AAA,AAG,AAA
539 | UGU,UGC,UGC
540 | GUC,GUG,GUG
541 | AAU,AAC,AAC
542 | UUC,UUC,UUC
543 | AAC,AAC,AAC
544 | UUC,UUC,UUC
545 | AAU,AAC,AAC
546 | GGU,GGC,GGC
547 | UUA,CUC,CUG
548 | ACA,ACC,ACC
549 | GGC,GGG,GGC
550 | ACA,ACC,ACC
551 | GGU,GGC,GGC
552 | GUU,GUG,GUG
553 | CUU,CUG,CUG
554 | ACU,ACC,ACA
555 | GAG,GAG,GAG
556 | UCU,AGC,AGC
557 | AAC,AAC,AAC
558 | AAA,AAG,AAG
559 | AAG,AAG,AAG
560 | UUU,UUC,UUC
561 | CUG,CUG,CUG
562 | CCU,CCG,CCA
563 | UUC,UUC,UUC
564 | CAA,CAG,CAG
565 | CAA,CAG,CAG
566 | UUU,UUC,UUU
567 | GGC,GGG,GGC
568 | AGA,CGC,CGG
569 | GAC,GAC,GAU
570 | AUU,AUC,AUC
571 | GCU,GCC,GCC
572 | GAC,GAC,GAU
573 | ACU,ACG,ACC
574 | ACU,ACC,ACA
575 | GAU,GAC,GAC
576 | GCU,GCG,GCC
577 | GUC,GUC,GUU
578 | CGU,CGC,AGA
579 | GAU,GAC,GAU
580 | CCA,CCC,CCC
581 | CAG,CAG,CAG
582 | ACA,ACC,ACA
583 | CUU,CUG,CUG
584 | GAG,GAG,GAA
585 | AUU,AUC,AUC
586 | CUU,CUC,CUG
587 | GAC,GAC,GAC
588 | AUU,AUC,AUC
589 | ACA,ACC,ACC
590 | CCA,CCC,CCU
591 | UGU,UGC,UGC
592 | UCU,AGC,AGC
593 | UUU,UUC,UUC
594 | GGU,GGC,GGC
595 | GGU,GGG,GGA
596 | GUC,GUG,GUG
597 | AGU,UCC,UCU
598 | GUU,GUG,GUG
599 | AUA,AUC,AUC
600 | ACA,ACG,ACC
601 | CCA,CCG,CCU
602 | GGA,GGC,GGC
603 | ACA,ACC,ACC
604 | AAU,AAC,AAC
605 | ACU,ACC,ACC
606 | UCU,AGC,AGC
607 | AAC,AAC,AAU
608 | CAG,CAG,CAG
609 | GUU,GUC,GUG
610 | GCU,GCC,GCA
611 | GUU,GUG,GUG
612 | CUU,CUG,CUG
613 | UAU,UAC,UAC
614 | CAG,CAG,CAG
615 | GAU,GAC,GAC
616 | GUU,GUG,GUG
617 | AAC,AAC,AAC
618 | UGC,UGC,UGU
619 | ACA,ACC,ACC
620 | GAA,GAG,GAA
621 | GUC,GUC,GUG
622 | CCU,CCC,CCC
623 | GUU,GUG,GUG
624 | GCU,GCC,GCC
625 | AUU,AUC,AUU
626 | CAU,CAC,CAC
627 | GCA,GCC,GCC
628 | GAU,GAC,GAU
629 | CAA,CAG,CAG
630 | CUU,CUG,CUG
631 | ACU,ACG,ACA
632 | CCU,CCC,CCU
633 | ACU,ACC,ACA
634 | UGG,UGG,UGG
635 | CGU,CGG,CGG
636 | GUU,GUG,GUG
637 | UAU,UAC,UAC
638 | UCU,UCC,UCC
639 | ACA,ACC,ACC
640 | GGU,GGC,GGC
641 | UCU,AGC,AGC
642 | AAU,AAC,AAU
643 | GUU,GUC,GUG
644 | UUU,UUC,UUU
645 | CAA,CAG,CAG
646 | ACA,ACC,ACC
647 | CGU,CGC,AGA
648 | GCA,GCC,GCC
649 | GGC,GGG,GGC
650 | UGU,UGC,UGU
651 | UUA,CUC,CUG
652 | AUA,AUC,AUC
653 | GGG,GGC,GGA
654 | GCU,GCG,GCC
655 | GAA,GAG,GAG
656 | CAU,CAC,CAC
657 | GUC,GUG,GUG
658 | AAC,AAC,AAC
659 | AAC,AAC,AAU
660 | UCA,UCC,AGC
661 | UAU,UAC,UAC
662 | GAG,GAG,GAG
663 | UGU,UGC,UGC
664 | GAC,GAC,GAC
665 | AUA,AUC,AUC
666 | CCC,CCC,CCC
667 | AUU,AUC,AUC
668 | GGU,GGG,GGC
669 | GCA,GCC,GCU
670 | GGU,GGC,GGA
671 | AUA,AUC,AUC
672 | UGC,UGC,UGC
673 | GCU,GCC,GCC
674 | AGU,AGC,AGC
675 | UAU,UAC,UAC
676 | CAG,CAG,CAG
677 | ACU,ACG,ACA
678 | CAG,CAG,CAG
679 | ACU,ACC,ACA
680 | AAU,AAC,AAC
681 | UCU,UCC,AGC
682 | CCU,CCG,CCU
683 | CGG,CGG,CGG
684 | CGG,CGC,AGA
685 | GCA,GCC,GCC
686 | CGU,CGG,AGA
687 | AGU,AGC,AGC
688 | GUA,GUG,GUG
689 | GCU,GCG,GCC
690 | AGU,UCC,AGC
691 | CAA,CAG,CAG
692 | UCC,AGC,AGC
693 | AUC,AUC,AUC
694 | AUU,AUC,AUU
695 | GCC,GCC,GCC
696 | UAC,UAC,UAC
697 | ACU,ACC,ACA
698 | AUG,AUG,AUG
699 | UCA,UCC,UCU
700 | CUU,CUG,CUG
701 | GGU,GGC,GGC
702 | GCA,GCC,GCC
703 | GAA,GAG,GAG
704 | AAU,AAC,AAC
705 | UCA,AGC,AGC
706 | GUU,GUC,GUG
707 | GCU,GCC,GCC
708 | UAC,UAC,UAC
709 | UCU,UCC,UCC
710 | AAU,AAC,AAC
711 | AAC,AAC,AAC
712 | UCU,AGC,UCU
713 | AUU,AUC,AUC
714 | GCC,GCC,GCU
715 | AUA,AUC,AUC
716 | CCC,CCC,CCC
717 | ACA,ACC,ACC
718 | AAU,AAC,AAC
719 | UUU,UUC,UUC
720 | ACU,ACG,ACC
721 | AUU,AUC,AUC
722 | AGU,UCC,AGC
723 | GUU,GUG,GUG
724 | ACC,ACC,ACC
725 | ACA,ACC,ACA
726 | GAA,GAG,GAG
727 | AUU,AUC,AUC
728 | CUA,CUG,CUG
729 | CCA,CCC,CCU
730 | GUG,GUG,GUG
731 | UCU,AGC,UCC
732 | AUG,AUG,AUG
733 | ACC,ACC,ACC
734 | AAG,AAG,AAG
735 | ACA,ACG,ACC
736 | UCA,AGC,AGC
737 | GUA,GUC,GUG
738 | GAU,GAC,GAC
739 | UGU,UGC,UGC
740 | ACA,ACC,ACC
741 | AUG,AUG,AUG
742 | UAC,UAC,UAC
743 | AUU,AUC,AUC
744 | UGU,UGC,UGC
745 | GGU,GGG,GGC
746 | GAU,GAC,GAU
747 | UCA,UCC,UCC
748 | ACU,ACC,ACC
749 | GAA,GAG,GAG
750 | UGC,UGC,UGC
751 | AGC,AGC,UCC
752 | AAU,AAC,AAC
753 | CUU,CUC,CUG
754 | UUG,CUG,CUG
755 | UUG,CUG,CUG
756 | CAA,CAG,CAG
757 | UAU,UAC,UAC
758 | GGC,GGC,GGC
759 | AGU,UCC,AGC
760 | UUU,UUC,UUC
761 | UGU,UGC,UGC
762 | ACA,ACC,ACC
763 | CAA,CAG,CAG
764 | UUA,CUC,CUG
765 | AAC,AAC,AAU
766 | CGU,CGC,AGA
767 | GCU,GCG,GCC
768 | UUA,CUG,CUG
769 | ACU,ACG,ACA
770 | GGA,GGG,GGG
771 | AUA,AUC,AUC
772 | GCU,GCC,GCC
773 | GUU,GUG,GUG
774 | GAA,GAG,GAA
775 | CAA,CAG,CAG
776 | GAC,GAC,GAC
777 | AAA,AAG,AAG
778 | AAC,AAC,AAC
779 | ACC,ACC,ACC
780 | CAA,CAG,CAA
781 | GAA,GAG,GAG
782 | GUU,GUG,GUG
783 | UUU,UUC,UUC
784 | GCA,GCC,GCC
785 | CAA,CAG,CAA
786 | GUC,GUC,GUG
787 | AAA,AAG,AAG
788 | CAA,CAG,CAG
789 | AUU,AUC,AUC
790 | UAC,UAC,UAC
791 | AAA,AAG,AAG
792 | ACA,ACG,ACC
793 | CCA,CCC,CCU
794 | CCA,CCG,CCU
795 | AUU,AUC,AUC
796 | AAA,AAG,AAG
797 | GAU,GAC,GAC
798 | UUU,UUC,UUC
799 | GGU,GGC,GGC
800 | GGU,GGG,GGC
801 | UUU,UUC,UUC
802 | AAU,AAC,AAU
803 | UUU,UUC,UUC
804 | UCA,AGC,AGC
805 | CAA,CAG,CAG
806 | AUA,AUC,AUU
807 | UUA,CUG,CUG
808 | CCA,CCC,CCC
809 | GAU,GAC,GAU
810 | CCA,CCC,CCU
811 | UCA,UCC,AGC
812 | AAA,AAG,AAG
813 | CCA,CCC,CCC
814 | AGC,AGC,AGC
815 | AAG,AAG,AAG
816 | AGG,CGG,CGG
817 | UCA,UCC,AGC
818 | UUU,UUC,UUC
819 | AUU,AUC,AUC
820 | GAA,GAG,GAG
821 | GAU,GAC,GAC
822 | CUA,CUC,CUG
823 | CUU,CUG,CUG
824 | UUC,UUC,UUC
825 | AAC,AAC,AAC
826 | AAA,AAG,AAA
827 | GUG,GUG,GUG
828 | ACA,ACC,ACA
829 | CUU,CUG,CUG
830 | GCA,GCC,GCC
831 | GAU,GAC,GAC
832 | GCU,GCC,GCC
833 | GGC,GGC,GGC
834 | UUC,UUC,UUC
835 | AUC,AUC,AUC
836 | AAA,AAG,AAG
837 | CAA,CAG,CAG
838 | UAU,UAC,UAU
839 | GGU,GGC,GGC
840 | GAU,GAC,GAU
841 | UGC,UGC,UGU
842 | CUU,CUC,CUG
843 | GGU,GGG,GGC
844 | GAU,GAC,GAC
845 | AUU,AUC,AUU
846 | GCU,GCG,GCC
847 | GCU,GCC,GCC
848 | AGA,CGC,AGG
849 | GAC,GAC,GAU
850 | CUC,CUG,CUG
851 | AUU,AUC,AUU
852 | UGU,UGC,UGC
853 | GCA,GCC,GCC
854 | CAA,CAG,CAG
855 | AAG,AAG,AAG
856 | UUU,UUC,UUU
857 | AAC,AAC,AAC
858 | GGC,GGC,GGA
859 | CUU,CUG,CUG
860 | ACU,ACC,ACA
861 | GUU,GUG,GUG
862 | UUG,CUC,CUG
863 | CCA,CCG,CCU
864 | CCU,CCC,CCU
865 | UUG,CUG,CUG
866 | CUC,CUG,CUG
867 | ACA,ACC,ACC
868 | GAU,GAC,GAU
869 | GAA,GAG,GAG
870 | AUG,AUG,AUG
871 | AUU,AUC,AUC
872 | GCU,GCC,GCC
873 | CAA,CAG,CAG
874 | UAC,UAC,UAC
875 | ACU,ACC,ACA
876 | UCU,AGC,UCU
877 | GCA,GCC,GCC
878 | CUG,CUC,CUG
879 | UUA,CUG,CUG
880 | GCG,GCG,GCC
881 | GGU,GGG,GGC
882 | ACA,ACC,ACA
883 | AUC,AUC,AUC
884 | ACU,ACG,ACA
885 | UCU,UCC,AGC
886 | GGU,GGC,GGC
887 | UGG,UGG,UGG
888 | ACC,ACC,ACA
889 | UUU,UUC,UUU
890 | GGU,GGG,GGA
891 | GCA,GCC,GCA
892 | GGU,GGC,GGC
893 | GCU,GCC,GCC
894 | GCA,GCC,GCU
895 | UUA,CUG,CUG
896 | CAA,CAG,CAG
897 | AUA,AUC,AUC
898 | CCA,CCC,CCC
899 | UUU,UUC,UUU
900 | GCU,GCG,GCU
901 | AUG,AUG,AUG
902 | CAA,CAG,CAG
903 | AUG,AUG,AUG
904 | GCU,GCC,GCC
905 | UAU,UAC,UAC
906 | AGG,CGG,CGG
907 | UUU,UUC,UUC
908 | AAU,AAC,AAC
909 | GGU,GGC,GGC
910 | AUU,AUC,AUC
911 | GGA,GGG,GGA
912 | GUU,GUC,GUG
913 | ACA,ACC,ACC
914 | CAG,CAG,CAG
915 | AAU,AAC,AAU
916 | GUU,GUG,GUG
917 | CUC,CUC,CUG
918 | UAU,UAC,UAC
919 | GAG,GAG,GAG
920 | AAC,AAC,AAC
921 | CAA,CAG,CAG
922 | AAA,AAG,AAG
923 | UUG,CUG,CUG
924 | AUU,AUC,AUC
925 | GCC,GCC,GCC
926 | AAC,AAC,AAC
927 | CAA,CAG,CAG
928 | UUU,UUC,UUC
929 | AAU,AAC,AAC
930 | AGU,AGC,AGC
931 | GCU,GCC,GCC
932 | AUU,AUC,AUC
933 | GGC,GGC,GGC
934 | AAA,AAG,AAG
935 | AUU,AUC,AUC
936 | CAA,CAG,CAG
937 | GAC,GAC,GAC
938 | UCA,UCC,AGC
939 | CUU,CUG,CUG
940 | UCU,AGC,AGC
941 | UCC,UCC,AGC
942 | ACA,ACC,ACA
943 | GCA,GCC,GCA
944 | AGU,AGC,AGC
945 | GCA,GCG,GCC
946 | CUU,CUC,CUG
947 | GGA,GGG,GGA
948 | AAA,AAG,AAG
949 | CUU,CUG,CUG
950 | CAA,CAG,CAG
951 | GAU,GAC,GAC
952 | GUG,GUG,GUG
953 | GUC,GUC,GUC
954 | AAC,AAC,AAC
955 | CAA,CAG,CAG
956 | AAU,AAC,AAU
957 | GCA,GCC,GCC
958 | CAA,CAG,CAG
959 | GCU,GCC,GCA
960 | UUA,CUG,CUG
961 | AAC,AAC,AAC
962 | ACG,ACG,ACC
963 | CUU,CUC,CUG
964 | GUU,GUG,GUC
965 | AAA,AAG,AAG
966 | CAA,CAG,CAG
967 | CUU,CUG,CUG
968 | AGC,UCC,UCC
969 | UCC,AGC,UCC
970 | AAU,AAC,AAC
971 | UUU,UUC,UUC
972 | GGU,GGC,GGC
973 | GCA,GCC,GCC
974 | AUU,AUC,AUC
975 | UCA,UCC,AGC
976 | AGU,AGC,UCU
977 | GUU,GUG,GUG
978 | UUA,CUG,CUG
979 | AAU,AAC,AAC
980 | GAU,GAC,GAU
981 | AUC,AUC,AUC
982 | CUU,CUC,CUG
983 | UCA,UCC,AGC
984 | CGU,CGC,AGA
985 | CUU,CUG,CUG
986 | GAC,GAC,GAC
987 | AAA,CCG,CCU
988 | GUU,CCC,CCU
989 | GAG,GAG,GAG
990 | GCU,GCC,GCC
991 | GAA,GAG,GAG
992 | GUG,GUG,GUG
993 | CAA,CAG,CAG
994 | AUU,AUC,AUC
995 | GAU,GAC,GAC
996 | AGG,CGG,AGA
997 | UUG,CUG,CUG
998 | AUC,AUC,AUC
999 | ACA,ACC,ACA
1000 | GGC,GGG,GGC
1001 | AGA,CGC,AGA
1002 | CUU,CUC,CUG
1003 | CAA,CAG,CAG
1004 | AGU,AGC,AGC
1005 | UUG,CUG,CUC
1006 | CAG,CAG,CAG
1007 | ACA,ACC,ACA
1008 | UAU,UAC,UAC
1009 | GUG,GUG,GUG
1010 | ACU,ACC,ACC
1011 | CAA,CAG,CAG
1012 | CAA,CAG,CAG
1013 | UUA,CUG,CUG
1014 | AUU,AUC,AUC
1015 | AGA,CGG,AGA
1016 | GCU,GCG,GCC
1017 | GCA,GCC,GCC
1018 | GAA,GAG,GAG
1019 | AUC,AUC,AUU
1020 | AGA,CGC,AGA
1021 | GCU,GCC,GCC
1022 | UCU,AGC,UCU
1023 | GCU,GCC,GCC
1024 | AAU,AAC,AAU
1025 | CUU,CUC,CUG
1026 | GCU,GCC,GCC
1027 | GCU,GCG,GCC
1028 | ACU,ACC,ACC
1029 | AAA,AAG,AAG
1030 | AUG,AUG,AUG
1031 | UCA,UCC,UCU
1032 | GAG,GAG,GAG
1033 | UGU,UGC,UGU
1034 | GUA,GUC,GUG
1035 | CUU,CUG,CUG
1036 | GGA,GGC,GGC
1037 | CAA,CAG,CAG
1038 | UCA,AGC,AGC
1039 | AAA,AAG,AAG
1040 | AGA,CGG,AGA
1041 | GUU,GUG,GUG
1042 | GAU,GAC,GAC
1043 | UUU,UUC,UUU
1044 | UGU,UGC,UGC
1045 | GGA,GGC,GGC
1046 | AAG,AAG,AAG
1047 | GGC,GGG,GGC
1048 | UAU,UAC,UAC
1049 | CAU,CAC,CAC
1050 | CUU,CUG,CUG
1051 | AUG,AUG,AUG
1052 | UCC,UCC,AGC
1053 | UUC,UUC,UUC
1054 | CCU,CCC,CCU
1055 | CAG,CAG,CAG
1056 | UCA,AGC,UCU
1057 | GCA,GCC,GCC
1058 | CCU,CCG,CCU
1059 | CAU,CAC,CAC
1060 | GGU,GGC,GGC
1061 | GUA,GUG,GUG
1062 | GUC,GUC,GUG
1063 | UUC,UUC,UUU
1064 | UUG,CUC,CUG
1065 | CAU,CAC,CAC
1066 | GUG,GUG,GUG
1067 | ACU,ACG,ACA
1068 | UAU,UAC,UAU
1069 | GUC,GUC,GUG
1070 | CCU,CCC,CCC
1071 | GCA,GCC,GCU
1072 | CAA,CAG,CAA
1073 | GAA,GAG,GAG
1074 | AAG,AAG,AAG
1075 | AAC,AAC,AAU
1076 | UUC,UUC,UUC
1077 | ACA,ACC,ACC
1078 | ACU,ACC,ACC
1079 | GCU,GCC,GCU
1080 | CCU,CCC,CCA
1081 | GCC,GCG,GCC
1082 | AUU,AUC,AUC
1083 | UGU,UGC,UGC
1084 | CAU,CAC,CAC
1085 | GAU,GAC,GAC
1086 | GGA,GGG,GGC
1087 | AAA,AAG,AAA
1088 | GCA,GCC,GCC
1089 | CAC,CAC,CAC
1090 | UUU,UUC,UUU
1091 | CCU,CCC,CCU
1092 | CGU,CGC,AGA
1093 | GAA,GAG,GAA
1094 | GGU,GGC,GGC
1095 | GUC,GUG,GUG
1096 | UUU,UUC,UUC
1097 | GUU,GUG,GUG
1098 | UCA,UCC,UCC
1099 | AAU,AAC,AAC
1100 | GGC,GGG,GGC
1101 | ACA,ACC,ACC
1102 | CAC,CAC,CAU
1103 | UGG,UGG,UGG
1104 | UUU,UUC,UUC
1105 | GUA,GUC,GUG
1106 | ACA,ACG,ACA
1107 | CAA,CAG,CAG
1108 | AGG,CGG,CGG
1109 | AAU,AAC,AAC
1110 | UUU,UUC,UUC
1111 | UAU,UAC,UAC
1112 | GAA,GAG,GAG
1113 | CCA,CCG,CCC
1114 | CAA,CAG,CAG
1115 | AUC,AUC,AUC
1116 | AUU,AUC,AUC
1117 | ACU,ACC,ACC
1118 | ACA,ACC,ACC
1119 | GAC,GAC,GAC
1120 | AAC,AAC,AAC
1121 | ACA,ACC,ACC
1122 | UUU,UUC,UUC
1123 | GUG,GUG,GUG
1124 | UCU,AGC,UCU
1125 | GGU,GGC,GGC
1126 | AAC,AAC,AAC
1127 | UGU,UGC,UGC
1128 | GAU,GAC,GAC
1129 | GUU,GUG,GUC
1130 | GUA,GUC,GUG
1131 | AUA,AUC,AUC
1132 | GGA,GGC,GGC
1133 | AUU,AUC,AUU
1134 | GUC,GUG,GUG
1135 | AAC,AAC,AAC
1136 | AAC,AAC,AAU
1137 | ACA,ACG,ACC
1138 | GUU,GUG,GUG
1139 | UAU,UAC,UAC
1140 | GAU,GAC,GAC
1141 | CCU,CCC,CCU
1142 | UUG,CUG,CUG
1143 | CAA,CAG,CAG
1144 | CCU,CCC,CCC
1145 | GAA,GAG,GAG
1146 | UUA,CUG,CUG
1147 | GAC,GAC,GAC
1148 | UCA,UCC,AGC
1149 | UUC,UUC,UUC
1150 | AAG,AAG,AAA
1151 | GAG,GAG,GAG
1152 | GAG,GAG,GAA
1153 | UUA,CUC,CUG
1154 | GAU,GAC,GAC
1155 | AAA,AAG,AAG
1156 | UAU,UAC,UAC
1157 | UUU,UUC,UUU
1158 | AAG,AAG,AAG
1159 | AAU,AAC,AAC
1160 | CAU,CAC,CAC
1161 | ACA,ACC,ACA
1162 | UCA,AGC,AGC
1163 | CCA,CCC,CCC
1164 | GAU,GAC,GAC
1165 | GUU,GUC,GUG
1166 | GAU,GAC,GAC
1167 | UUA,CUG,CUG
1168 | GGU,GGG,GGC
1169 | GAC,GAC,GAU
1170 | AUC,AUC,AUC
1171 | UCU,UCC,AGC
1172 | GGC,GGC,GGA
1173 | AUU,AUC,AUC
1174 | AAU,AAC,AAU
1175 | GCU,GCC,GCC
1176 | UCA,AGC,AGC
1177 | GUU,GUG,GUC
1178 | GUA,GUG,GUG
1179 | AAC,AAC,AAC
1180 | AUU,AUC,AUC
1181 | CAA,CAG,CAG
1182 | AAA,AAG,AAA
1183 | GAA,GAG,GAG
1184 | AUU,AUC,AUC
1185 | GAC,GAC,GAC
1186 | CGC,CGC,CGG
1187 | CUC,CUG,CUG
1188 | AAU,AAC,AAC
1189 | GAG,GAG,GAG
1190 | GUU,GUC,GUG
1191 | GCC,GCC,GCC
1192 | AAG,AAG,AAG
1193 | AAU,AAC,AAU
1194 | UUA,CUC,CUG
1195 | AAU,AAC,AAC
1196 | GAA,GAG,GAG
1197 | UCU,UCC,AGC
1198 | CUC,CUG,CUG
1199 | AUC,AUC,AUC
1200 | GAU,GAC,GAC
1201 | CUC,CUG,CUG
1202 | CAA,CAG,CAA
1203 | GAA,GAG,GAA
1204 | CUU,CUC,CUG
1205 | GGA,GGG,GGG
1206 | AAG,AAG,AAG
1207 | UAU,UAC,UAC
1208 | GAG,GAG,GAG
1209 | CAG,CAG,CAG
1210 | UAU,UAC,UAC
1211 | AUA,AUC,AUC
1212 | AAA,AAG,AAG
1213 | UGG,UGG,UGG
1214 | CCA,CCG,CCC
1215 | UGG,UGG,UGG
1216 | UAC,UAC,UAC
1217 | AUU,AUC,AUC
1218 | UGG,UGG,UGG
1219 | CUA,CUG,CUG
1220 | GGU,GGC,GGC
1221 | UUU,UUC,UUU
1222 | AUA,AUC,AUC
1223 | GCU,GCC,GCC
1224 | GGC,GGC,GGA
1225 | UUG,CUG,CUG
1226 | AUU,AUC,AUU
1227 | GCC,GCG,GCC
1228 | AUA,AUC,AUC
1229 | GUA,GUG,GUG
1230 | AUG,AUG,AUG
1231 | GUG,GUG,GUC
1232 | ACA,ACC,ACA
1233 | AUU,AUC,AUC
1234 | AUG,AUG,AUG
1235 | CUU,CUC,CUG
1236 | UGC,UGC,UGU
1237 | UGU,UGC,UGC
1238 | AUG,AUG,AUG
1239 | ACC,ACC,ACC
1240 | AGU,AGC,AGC
1241 | UGC,UGC,UGC
1242 | UGU,UGC,UGU
1243 | AGU,UCC,AGC
1244 | UGU,UGC,UGC
1245 | CUC,CUG,CUG
1246 | AAG,AAG,AAG
1247 | GGC,GGG,GGC
1248 | UGU,UGC,UGU
1249 | UGU,UGC,UGU
1250 | UCU,AGC,AGC
1251 | UGU,UGC,UGU
1252 | GGA,GGC,GGC
1253 | UCC,UCC,AGC
1254 | UGC,UGC,UGC
1255 | UGC,UGC,UGC
1256 | AAA,AAG,AAG
1257 | UUU,UUC,UUC
1258 | GAU,GAC,GAC
1259 | GAA,GAG,GAG
1260 | GAC,GAC,GAC
1261 | GAC,GAC,GAU
1262 | UCU,AGC,UCU
1263 | GAG,GAG,GAG
1264 | CCA,CCC,CCC
1265 | GUG,GUC,GUG
1266 | CUC,CUG,CUG
1267 | AAA,AAG,AAG
1268 | GGA,GGG,GGC
1269 | GUC,GUG,GUG
1270 | AAA,AAG,AAA
1271 | UUA,CUC,CUG
1272 | CAU,CAC,CAC
1273 | UAC,UAC,UAC
1274 | ACA,ACG,ACA
1275 | UAA,UGA,UGA
1276 |
1277 |
--------------------------------------------------------------------------------
/side-by-side.csv:
--------------------------------------------------------------------------------
1 | abspos,codonOrig,codonVaccine
2 | 0,ATG,ATG
3 | 3,TTT,TTC
4 | 6,GTT,GTG
5 | 9,TTT,TTC
6 | 12,CTT,CTG
7 | 15,GTT,GTG
8 | 18,TTA,CTG
9 | 21,TTG,CTG
10 | 24,CCA,CCT
11 | 27,CTA,CTG
12 | 30,GTC,GTG
13 | 33,TCT,TCC
14 | 36,AGT,AGC
15 | 39,CAG,CAG
16 | 42,TGT,TGT
17 | 45,GTT,GTG
18 | 48,AAT,AAC
19 | 51,CTT,CTG
20 | 54,ACA,ACC
21 | 57,ACC,ACC
22 | 60,AGA,AGA
23 | 63,ACT,ACA
24 | 66,CAA,CAG
25 | 69,TTA,CTG
26 | 72,CCC,CCT
27 | 75,CCT,CCA
28 | 78,GCA,GCC
29 | 81,TAC,TAC
30 | 84,ACT,ACC
31 | 87,AAT,AAC
32 | 90,TCT,AGC
33 | 93,TTC,TTT
34 | 96,ACA,ACC
35 | 99,CGT,AGA
36 | 102,GGT,GGC
37 | 105,GTT,GTG
38 | 108,TAT,TAC
39 | 111,TAC,TAC
40 | 114,CCT,CCC
41 | 117,GAC,GAC
42 | 120,AAA,AAG
43 | 123,GTT,GTG
44 | 126,TTC,TTC
45 | 129,AGA,AGA
46 | 132,TCC,TCC
47 | 135,TCA,AGC
48 | 138,GTT,GTG
49 | 141,TTA,CTG
50 | 144,CAT,CAC
51 | 147,TCA,TCT
52 | 150,ACT,ACC
53 | 153,CAG,CAG
54 | 156,GAC,GAC
55 | 159,TTG,CTG
56 | 162,TTC,TTC
57 | 165,TTA,CTG
58 | 168,CCT,CCT
59 | 171,TTC,TTC
60 | 174,TTT,TTC
61 | 177,TCC,AGC
62 | 180,AAT,AAC
63 | 183,GTT,GTG
64 | 186,ACT,ACC
65 | 189,TGG,TGG
66 | 192,TTC,TTC
67 | 195,CAT,CAC
68 | 198,GCT,GCC
69 | 201,ATA,ATC
70 | 204,CAT,CAC
71 | 207,GTC,GTG
72 | 210,TCT,TCC
73 | 213,GGG,GGC
74 | 216,ACC,ACC
75 | 219,AAT,AAT
76 | 222,GGT,GGC
77 | 225,ACT,ACC
78 | 228,AAG,AAG
79 | 231,AGG,AGA
80 | 234,TTT,TTC
81 | 237,GAT,GAC
82 | 240,AAC,AAC
83 | 243,CCT,CCC
84 | 246,GTC,GTG
85 | 249,CTA,CTG
86 | 252,CCA,CCC
87 | 255,TTT,TTC
88 | 258,AAT,AAC
89 | 261,GAT,GAC
90 | 264,GGT,GGG
91 | 267,GTT,GTG
92 | 270,TAT,TAC
93 | 273,TTT,TTT
94 | 276,GCT,GCC
95 | 279,TCC,AGC
96 | 282,ACT,ACC
97 | 285,GAG,GAG
98 | 288,AAG,AAG
99 | 291,TCT,TCC
100 | 294,AAC,AAC
101 | 297,ATA,ATC
102 | 300,ATA,ATC
103 | 303,AGA,AGA
104 | 306,GGC,GGC
105 | 309,TGG,TGG
106 | 312,ATT,ATC
107 | 315,TTT,TTC
108 | 318,GGT,GGC
109 | 321,ACT,ACC
110 | 324,ACT,ACA
111 | 327,TTA,CTG
112 | 330,GAT,GAC
113 | 333,TCG,AGC
114 | 336,AAG,AAG
115 | 339,ACC,ACC
116 | 342,CAG,CAG
117 | 345,TCC,AGC
118 | 348,CTA,CTG
119 | 351,CTT,CTG
120 | 354,ATT,ATC
121 | 357,GTT,GTG
122 | 360,AAT,AAC
123 | 363,AAC,AAC
124 | 366,GCT,GCC
125 | 369,ACT,ACC
126 | 372,AAT,AAC
127 | 375,GTT,GTG
128 | 378,GTT,GTC
129 | 381,ATT,ATC
130 | 384,AAA,AAA
131 | 387,GTC,GTG
132 | 390,TGT,TGC
133 | 393,GAA,GAG
134 | 396,TTT,TTC
135 | 399,CAA,CAG
136 | 402,TTT,TTC
137 | 405,TGT,TGC
138 | 408,AAT,AAC
139 | 411,GAT,GAC
140 | 414,CCA,CCC
141 | 417,TTT,TTC
142 | 420,TTG,CTG
143 | 423,GGT,GGC
144 | 426,GTT,GTC
145 | 429,TAT,TAC
146 | 432,TAC,TAC
147 | 435,CAC,CAC
148 | 438,AAA,AAG
149 | 441,AAC,AAC
150 | 444,AAC,AAC
151 | 447,AAA,AAG
152 | 450,AGT,AGC
153 | 453,TGG,TGG
154 | 456,ATG,ATG
155 | 459,GAA,GAA
156 | 462,AGT,AGC
157 | 465,GAG,GAG
158 | 468,TTC,TTC
159 | 471,AGA,CGG
160 | 474,GTT,GTG
161 | 477,TAT,TAC
162 | 480,TCT,AGC
163 | 483,AGT,AGC
164 | 486,GCG,GCC
165 | 489,AAT,AAC
166 | 492,AAT,AAC
167 | 495,TGC,TGC
168 | 498,ACT,ACC
169 | 501,TTT,TTC
170 | 504,GAA,GAG
171 | 507,TAT,TAC
172 | 510,GTC,GTG
173 | 513,TCT,TCC
174 | 516,CAG,CAG
175 | 519,CCT,CCT
176 | 522,TTT,TTC
177 | 525,CTT,CTG
178 | 528,ATG,ATG
179 | 531,GAC,GAC
180 | 534,CTT,CTG
181 | 537,GAA,GAA
182 | 540,GGA,GGC
183 | 543,AAA,AAG
184 | 546,CAG,CAG
185 | 549,GGT,GGC
186 | 552,AAT,AAC
187 | 555,TTC,TTC
188 | 558,AAA,AAG
189 | 561,AAT,AAC
190 | 564,CTT,CTG
191 | 567,AGG,CGC
192 | 570,GAA,GAG
193 | 573,TTT,TTC
194 | 576,GTG,GTG
195 | 579,TTT,TTT
196 | 582,AAG,AAG
197 | 585,AAT,AAC
198 | 588,ATT,ATC
199 | 591,GAT,GAC
200 | 594,GGT,GGC
201 | 597,TAT,TAC
202 | 600,TTT,TTC
203 | 603,AAA,AAG
204 | 606,ATA,ATC
205 | 609,TAT,TAC
206 | 612,TCT,AGC
207 | 615,AAG,AAG
208 | 618,CAC,CAC
209 | 621,ACG,ACC
210 | 624,CCT,CCT
211 | 627,ATT,ATC
212 | 630,AAT,AAC
213 | 633,TTA,CTC
214 | 636,GTG,GTG
215 | 639,CGT,CGG
216 | 642,GAT,GAT
217 | 645,CTC,CTG
218 | 648,CCT,CCT
219 | 651,CAG,CAG
220 | 654,GGT,GGC
221 | 657,TTT,TTC
222 | 660,TCG,TCT
223 | 663,GCT,GCT
224 | 666,TTA,CTG
225 | 669,GAA,GAA
226 | 672,CCA,CCC
227 | 675,TTG,CTG
228 | 678,GTA,GTG
229 | 681,GAT,GAT
230 | 684,TTG,CTG
231 | 687,CCA,CCC
232 | 690,ATA,ATC
233 | 693,GGT,GGC
234 | 696,ATT,ATC
235 | 699,AAC,AAC
236 | 702,ATC,ATC
237 | 705,ACT,ACC
238 | 708,AGG,CGG
239 | 711,TTT,TTT
240 | 714,CAA,CAG
241 | 717,ACT,ACA
242 | 720,TTA,CTG
243 | 723,CTT,CTG
244 | 726,GCT,GCC
245 | 729,TTA,CTG
246 | 732,CAT,CAC
247 | 735,AGA,AGA
248 | 738,AGT,AGC
249 | 741,TAT,TAC
250 | 744,TTG,CTG
251 | 747,ACT,ACA
252 | 750,CCT,CCT
253 | 753,GGT,GGC
254 | 756,GAT,GAT
255 | 759,TCT,AGC
256 | 762,TCT,AGC
257 | 765,TCA,AGC
258 | 768,GGT,GGA
259 | 771,TGG,TGG
260 | 774,ACA,ACA
261 | 777,GCT,GCT
262 | 780,GGT,GGT
263 | 783,GCT,GCC
264 | 786,GCA,GCC
265 | 789,GCT,GCT
266 | 792,TAT,TAC
267 | 795,TAT,TAT
268 | 798,GTG,GTG
269 | 801,GGT,GGC
270 | 804,TAT,TAC
271 | 807,CTT,CTG
272 | 810,CAA,CAG
273 | 813,CCT,CCT
274 | 816,AGG,AGA
275 | 819,ACT,ACC
276 | 822,TTT,TTC
277 | 825,CTA,CTG
278 | 828,TTA,CTG
279 | 831,AAA,AAG
280 | 834,TAT,TAC
281 | 837,AAT,AAC
282 | 840,GAA,GAG
283 | 843,AAT,AAC
284 | 846,GGA,GGC
285 | 849,ACC,ACC
286 | 852,ATT,ATC
287 | 855,ACA,ACC
288 | 858,GAT,GAC
289 | 861,GCT,GCC
290 | 864,GTA,GTG
291 | 867,GAC,GAT
292 | 870,TGT,TGT
293 | 873,GCA,GCT
294 | 876,CTT,CTG
295 | 879,GAC,GAT
296 | 882,CCT,CCT
297 | 885,CTC,CTG
298 | 888,TCA,AGC
299 | 891,GAA,GAG
300 | 894,ACA,ACA
301 | 897,AAG,AAG
302 | 900,TGT,TGC
303 | 903,ACG,ACC
304 | 906,TTG,CTG
305 | 909,AAA,AAG
306 | 912,TCC,TCC
307 | 915,TTC,TTC
308 | 918,ACT,ACC
309 | 921,GTA,GTG
310 | 924,GAA,GAA
311 | 927,AAA,AAG
312 | 930,GGA,GGC
313 | 933,ATC,ATC
314 | 936,TAT,TAC
315 | 939,CAA,CAG
316 | 942,ACT,ACC
317 | 945,TCT,AGC
318 | 948,AAC,AAC
319 | 951,TTT,TTC
320 | 954,AGA,CGG
321 | 957,GTC,GTG
322 | 960,CAA,CAG
323 | 963,CCA,CCC
324 | 966,ACA,ACC
325 | 969,GAA,GAA
326 | 972,TCT,TCC
327 | 975,ATT,ATC
328 | 978,GTT,GTG
329 | 981,AGA,CGG
330 | 984,TTT,TTC
331 | 987,CCT,CCC
332 | 990,AAT,AAT
333 | 993,ATT,ATC
334 | 996,ACA,ACC
335 | 999,AAC,AAT
336 | 1002,TTG,CTG
337 | 1005,TGC,TGC
338 | 1008,CCT,CCC
339 | 1011,TTT,TTC
340 | 1014,GGT,GGC
341 | 1017,GAA,GAG
342 | 1020,GTT,GTG
343 | 1023,TTT,TTC
344 | 1026,AAC,AAT
345 | 1029,GCC,GCC
346 | 1032,ACC,ACC
347 | 1035,AGA,AGA
348 | 1038,TTT,TTC
349 | 1041,GCA,GCC
350 | 1044,TCT,TCT
351 | 1047,GTT,GTG
352 | 1050,TAT,TAC
353 | 1053,GCT,GCC
354 | 1056,TGG,TGG
355 | 1059,AAC,AAC
356 | 1062,AGG,CGG
357 | 1065,AAG,AAG
358 | 1068,AGA,CGG
359 | 1071,ATC,ATC
360 | 1074,AGC,AGC
361 | 1077,AAC,AAT
362 | 1080,TGT,TGC
363 | 1083,GTT,GTG
364 | 1086,GCT,GCC
365 | 1089,GAT,GAC
366 | 1092,TAT,TAC
367 | 1095,TCT,TCC
368 | 1098,GTC,GTG
369 | 1101,CTA,CTG
370 | 1104,TAT,TAC
371 | 1107,AAT,AAC
372 | 1110,TCC,TCC
373 | 1113,GCA,GCC
374 | 1116,TCA,AGC
375 | 1119,TTT,TTC
376 | 1122,TCC,AGC
377 | 1125,ACT,ACC
378 | 1128,TTT,TTC
379 | 1131,AAG,AAG
380 | 1134,TGT,TGC
381 | 1137,TAT,TAC
382 | 1140,GGA,GGC
383 | 1143,GTG,GTG
384 | 1146,TCT,TCC
385 | 1149,CCT,CCT
386 | 1152,ACT,ACC
387 | 1155,AAA,AAG
388 | 1158,TTA,CTG
389 | 1161,AAT,AAC
390 | 1164,GAT,GAC
391 | 1167,CTC,CTG
392 | 1170,TGC,TGC
393 | 1173,TTT,TTC
394 | 1176,ACT,ACA
395 | 1179,AAT,AAC
396 | 1182,GTC,GTG
397 | 1185,TAT,TAC
398 | 1188,GCA,GCC
399 | 1191,GAT,GAC
400 | 1194,TCA,AGC
401 | 1197,TTT,TTC
402 | 1200,GTA,GTG
403 | 1203,ATT,ATC
404 | 1206,AGA,CGG
405 | 1209,GGT,GGA
406 | 1212,GAT,GAT
407 | 1215,GAA,GAA
408 | 1218,GTC,GTG
409 | 1221,AGA,CGG
410 | 1224,CAA,CAG
411 | 1227,ATC,ATT
412 | 1230,GCT,GCC
413 | 1233,CCA,CCT
414 | 1236,GGG,GGA
415 | 1239,CAA,CAG
416 | 1242,ACT,ACA
417 | 1245,GGA,GGC
418 | 1248,AAG,AAG
419 | 1251,ATT,ATC
420 | 1254,GCT,GCC
421 | 1257,GAT,GAC
422 | 1260,TAT,TAC
423 | 1263,AAT,AAC
424 | 1266,TAT,TAC
425 | 1269,AAA,AAG
426 | 1272,TTA,CTG
427 | 1275,CCA,CCC
428 | 1278,GAT,GAC
429 | 1281,GAT,GAC
430 | 1284,TTT,TTC
431 | 1287,ACA,ACC
432 | 1290,GGC,GGC
433 | 1293,TGC,TGT
434 | 1296,GTT,GTG
435 | 1299,ATA,ATT
436 | 1302,GCT,GCC
437 | 1305,TGG,TGG
438 | 1308,AAT,AAC
439 | 1311,TCT,AGC
440 | 1314,AAC,AAC
441 | 1317,AAT,AAC
442 | 1320,CTT,CTG
443 | 1323,GAT,GAC
444 | 1326,TCT,TCC
445 | 1329,AAG,AAA
446 | 1332,GTT,GTC
447 | 1335,GGT,GGC
448 | 1338,GGT,GGC
449 | 1341,AAT,AAC
450 | 1344,TAT,TAC
451 | 1347,AAT,AAT
452 | 1350,TAC,TAC
453 | 1353,CTG,CTG
454 | 1356,TAT,TAC
455 | 1359,AGA,CGG
456 | 1362,TTG,CTG
457 | 1365,TTT,TTC
458 | 1368,AGG,CGG
459 | 1371,AAG,AAG
460 | 1374,TCT,TCC
461 | 1377,AAT,AAT
462 | 1380,CTC,CTG
463 | 1383,AAA,AAG
464 | 1386,CCT,CCC
465 | 1389,TTT,TTC
466 | 1392,GAG,GAG
467 | 1395,AGA,CGG
468 | 1398,GAT,GAC
469 | 1401,ATT,ATC
470 | 1404,TCA,TCC
471 | 1407,ACT,ACC
472 | 1410,GAA,GAG
473 | 1413,ATC,ATC
474 | 1416,TAT,TAT
475 | 1419,CAG,CAG
476 | 1422,GCC,GCC
477 | 1425,GGT,GGC
478 | 1428,AGC,AGC
479 | 1431,ACA,ACC
480 | 1434,CCT,CCT
481 | 1437,TGT,TGT
482 | 1440,AAT,AAC
483 | 1443,GGT,GGC
484 | 1446,GTT,GTG
485 | 1449,GAA,GAA
486 | 1452,GGT,GGC
487 | 1455,TTT,TTC
488 | 1458,AAT,AAC
489 | 1461,TGT,TGC
490 | 1464,TAC,TAC
491 | 1467,TTT,TTC
492 | 1470,CCT,CCA
493 | 1473,TTA,CTG
494 | 1476,CAA,CAG
495 | 1479,TCA,TCC
496 | 1482,TAT,TAC
497 | 1485,GGT,GGC
498 | 1488,TTC,TTT
499 | 1491,CAA,CAG
500 | 1494,CCC,CCC
501 | 1497,ACT,ACA
502 | 1500,AAT,AAT
503 | 1503,GGT,GGC
504 | 1506,GTT,GTG
505 | 1509,GGT,GGC
506 | 1512,TAC,TAT
507 | 1515,CAA,CAG
508 | 1518,CCA,CCC
509 | 1521,TAC,TAC
510 | 1524,AGA,AGA
511 | 1527,GTA,GTG
512 | 1530,GTA,GTG
513 | 1533,GTA,GTG
514 | 1536,CTT,CTG
515 | 1539,TCT,AGC
516 | 1542,TTT,TTC
517 | 1545,GAA,GAA
518 | 1548,CTT,CTG
519 | 1551,CTA,CTG
520 | 1554,CAT,CAT
521 | 1557,GCA,GCC
522 | 1560,CCA,CCT
523 | 1563,GCA,GCC
524 | 1566,ACT,ACA
525 | 1569,GTT,GTG
526 | 1572,TGT,TGC
527 | 1575,GGA,GGC
528 | 1578,CCT,CCT
529 | 1581,AAA,AAG
530 | 1584,AAG,AAA
531 | 1587,TCT,AGC
532 | 1590,ACT,ACC
533 | 1593,AAT,AAT
534 | 1596,TTG,CTC
535 | 1599,GTT,GTG
536 | 1602,AAA,AAG
537 | 1605,AAC,AAC
538 | 1608,AAA,AAA
539 | 1611,TGT,TGC
540 | 1614,GTC,GTG
541 | 1617,AAT,AAC
542 | 1620,TTC,TTC
543 | 1623,AAC,AAC
544 | 1626,TTC,TTC
545 | 1629,AAT,AAC
546 | 1632,GGT,GGC
547 | 1635,TTA,CTG
548 | 1638,ACA,ACC
549 | 1641,GGC,GGC
550 | 1644,ACA,ACC
551 | 1647,GGT,GGC
552 | 1650,GTT,GTG
553 | 1653,CTT,CTG
554 | 1656,ACT,ACA
555 | 1659,GAG,GAG
556 | 1662,TCT,AGC
557 | 1665,AAC,AAC
558 | 1668,AAA,AAG
559 | 1671,AAG,AAG
560 | 1674,TTT,TTC
561 | 1677,CTG,CTG
562 | 1680,CCT,CCA
563 | 1683,TTC,TTC
564 | 1686,CAA,CAG
565 | 1689,CAA,CAG
566 | 1692,TTT,TTT
567 | 1695,GGC,GGC
568 | 1698,AGA,CGG
569 | 1701,GAC,GAT
570 | 1704,ATT,ATC
571 | 1707,GCT,GCC
572 | 1710,GAC,GAT
573 | 1713,ACT,ACC
574 | 1716,ACT,ACA
575 | 1719,GAT,GAC
576 | 1722,GCT,GCC
577 | 1725,GTC,GTT
578 | 1728,CGT,AGA
579 | 1731,GAT,GAT
580 | 1734,CCA,CCC
581 | 1737,CAG,CAG
582 | 1740,ACA,ACA
583 | 1743,CTT,CTG
584 | 1746,GAG,GAA
585 | 1749,ATT,ATC
586 | 1752,CTT,CTG
587 | 1755,GAC,GAC
588 | 1758,ATT,ATC
589 | 1761,ACA,ACC
590 | 1764,CCA,CCT
591 | 1767,TGT,TGC
592 | 1770,TCT,AGC
593 | 1773,TTT,TTC
594 | 1776,GGT,GGC
595 | 1779,GGT,GGA
596 | 1782,GTC,GTG
597 | 1785,AGT,TCT
598 | 1788,GTT,GTG
599 | 1791,ATA,ATC
600 | 1794,ACA,ACC
601 | 1797,CCA,CCT
602 | 1800,GGA,GGC
603 | 1803,ACA,ACC
604 | 1806,AAT,AAC
605 | 1809,ACT,ACC
606 | 1812,TCT,AGC
607 | 1815,AAC,AAT
608 | 1818,CAG,CAG
609 | 1821,GTT,GTG
610 | 1824,GCT,GCA
611 | 1827,GTT,GTG
612 | 1830,CTT,CTG
613 | 1833,TAT,TAC
614 | 1836,CAG,CAG
615 | 1839,GAT,GAC
616 | 1842,GTT,GTG
617 | 1845,AAC,AAC
618 | 1848,TGC,TGT
619 | 1851,ACA,ACC
620 | 1854,GAA,GAA
621 | 1857,GTC,GTG
622 | 1860,CCT,CCC
623 | 1863,GTT,GTG
624 | 1866,GCT,GCC
625 | 1869,ATT,ATT
626 | 1872,CAT,CAC
627 | 1875,GCA,GCC
628 | 1878,GAT,GAT
629 | 1881,CAA,CAG
630 | 1884,CTT,CTG
631 | 1887,ACT,ACA
632 | 1890,CCT,CCT
633 | 1893,ACT,ACA
634 | 1896,TGG,TGG
635 | 1899,CGT,CGG
636 | 1902,GTT,GTG
637 | 1905,TAT,TAC
638 | 1908,TCT,TCC
639 | 1911,ACA,ACC
640 | 1914,GGT,GGC
641 | 1917,TCT,AGC
642 | 1920,AAT,AAT
643 | 1923,GTT,GTG
644 | 1926,TTT,TTT
645 | 1929,CAA,CAG
646 | 1932,ACA,ACC
647 | 1935,CGT,AGA
648 | 1938,GCA,GCC
649 | 1941,GGC,GGC
650 | 1944,TGT,TGT
651 | 1947,TTA,CTG
652 | 1950,ATA,ATC
653 | 1953,GGG,GGA
654 | 1956,GCT,GCC
655 | 1959,GAA,GAG
656 | 1962,CAT,CAC
657 | 1965,GTC,GTG
658 | 1968,AAC,AAC
659 | 1971,AAC,AAT
660 | 1974,TCA,AGC
661 | 1977,TAT,TAC
662 | 1980,GAG,GAG
663 | 1983,TGT,TGC
664 | 1986,GAC,GAC
665 | 1989,ATA,ATC
666 | 1992,CCC,CCC
667 | 1995,ATT,ATC
668 | 1998,GGT,GGC
669 | 2001,GCA,GCT
670 | 2004,GGT,GGA
671 | 2007,ATA,ATC
672 | 2010,TGC,TGC
673 | 2013,GCT,GCC
674 | 2016,AGT,AGC
675 | 2019,TAT,TAC
676 | 2022,CAG,CAG
677 | 2025,ACT,ACA
678 | 2028,CAG,CAG
679 | 2031,ACT,ACA
680 | 2034,AAT,AAC
681 | 2037,TCT,AGC
682 | 2040,CCT,CCT
683 | 2043,CGG,CGG
684 | 2046,CGG,AGA
685 | 2049,GCA,GCC
686 | 2052,CGT,AGA
687 | 2055,AGT,AGC
688 | 2058,GTA,GTG
689 | 2061,GCT,GCC
690 | 2064,AGT,AGC
691 | 2067,CAA,CAG
692 | 2070,TCC,AGC
693 | 2073,ATC,ATC
694 | 2076,ATT,ATT
695 | 2079,GCC,GCC
696 | 2082,TAC,TAC
697 | 2085,ACT,ACA
698 | 2088,ATG,ATG
699 | 2091,TCA,TCT
700 | 2094,CTT,CTG
701 | 2097,GGT,GGC
702 | 2100,GCA,GCC
703 | 2103,GAA,GAG
704 | 2106,AAT,AAC
705 | 2109,TCA,AGC
706 | 2112,GTT,GTG
707 | 2115,GCT,GCC
708 | 2118,TAC,TAC
709 | 2121,TCT,TCC
710 | 2124,AAT,AAC
711 | 2127,AAC,AAC
712 | 2130,TCT,TCT
713 | 2133,ATT,ATC
714 | 2136,GCC,GCT
715 | 2139,ATA,ATC
716 | 2142,CCC,CCC
717 | 2145,ACA,ACC
718 | 2148,AAT,AAC
719 | 2151,TTT,TTC
720 | 2154,ACT,ACC
721 | 2157,ATT,ATC
722 | 2160,AGT,AGC
723 | 2163,GTT,GTG
724 | 2166,ACC,ACC
725 | 2169,ACA,ACA
726 | 2172,GAA,GAG
727 | 2175,ATT,ATC
728 | 2178,CTA,CTG
729 | 2181,CCA,CCT
730 | 2184,GTG,GTG
731 | 2187,TCT,TCC
732 | 2190,ATG,ATG
733 | 2193,ACC,ACC
734 | 2196,AAG,AAG
735 | 2199,ACA,ACC
736 | 2202,TCA,AGC
737 | 2205,GTA,GTG
738 | 2208,GAT,GAC
739 | 2211,TGT,TGC
740 | 2214,ACA,ACC
741 | 2217,ATG,ATG
742 | 2220,TAC,TAC
743 | 2223,ATT,ATC
744 | 2226,TGT,TGC
745 | 2229,GGT,GGC
746 | 2232,GAT,GAT
747 | 2235,TCA,TCC
748 | 2238,ACT,ACC
749 | 2241,GAA,GAG
750 | 2244,TGC,TGC
751 | 2247,AGC,TCC
752 | 2250,AAT,AAC
753 | 2253,CTT,CTG
754 | 2256,TTG,CTG
755 | 2259,TTG,CTG
756 | 2262,CAA,CAG
757 | 2265,TAT,TAC
758 | 2268,GGC,GGC
759 | 2271,AGT,AGC
760 | 2274,TTT,TTC
761 | 2277,TGT,TGC
762 | 2280,ACA,ACC
763 | 2283,CAA,CAG
764 | 2286,TTA,CTG
765 | 2289,AAC,AAT
766 | 2292,CGT,AGA
767 | 2295,GCT,GCC
768 | 2298,TTA,CTG
769 | 2301,ACT,ACA
770 | 2304,GGA,GGG
771 | 2307,ATA,ATC
772 | 2310,GCT,GCC
773 | 2313,GTT,GTG
774 | 2316,GAA,GAA
775 | 2319,CAA,CAG
776 | 2322,GAC,GAC
777 | 2325,AAA,AAG
778 | 2328,AAC,AAC
779 | 2331,ACC,ACC
780 | 2334,CAA,CAA
781 | 2337,GAA,GAG
782 | 2340,GTT,GTG
783 | 2343,TTT,TTC
784 | 2346,GCA,GCC
785 | 2349,CAA,CAA
786 | 2352,GTC,GTG
787 | 2355,AAA,AAG
788 | 2358,CAA,CAG
789 | 2361,ATT,ATC
790 | 2364,TAC,TAC
791 | 2367,AAA,AAG
792 | 2370,ACA,ACC
793 | 2373,CCA,CCT
794 | 2376,CCA,CCT
795 | 2379,ATT,ATC
796 | 2382,AAA,AAG
797 | 2385,GAT,GAC
798 | 2388,TTT,TTC
799 | 2391,GGT,GGC
800 | 2394,GGT,GGC
801 | 2397,TTT,TTC
802 | 2400,AAT,AAT
803 | 2403,TTT,TTC
804 | 2406,TCA,AGC
805 | 2409,CAA,CAG
806 | 2412,ATA,ATT
807 | 2415,TTA,CTG
808 | 2418,CCA,CCC
809 | 2421,GAT,GAT
810 | 2424,CCA,CCT
811 | 2427,TCA,AGC
812 | 2430,AAA,AAG
813 | 2433,CCA,CCC
814 | 2436,AGC,AGC
815 | 2439,AAG,AAG
816 | 2442,AGG,CGG
817 | 2445,TCA,AGC
818 | 2448,TTT,TTC
819 | 2451,ATT,ATC
820 | 2454,GAA,GAG
821 | 2457,GAT,GAC
822 | 2460,CTA,CTG
823 | 2463,CTT,CTG
824 | 2466,TTC,TTC
825 | 2469,AAC,AAC
826 | 2472,AAA,AAA
827 | 2475,GTG,GTG
828 | 2478,ACA,ACA
829 | 2481,CTT,CTG
830 | 2484,GCA,GCC
831 | 2487,GAT,GAC
832 | 2490,GCT,GCC
833 | 2493,GGC,GGC
834 | 2496,TTC,TTC
835 | 2499,ATC,ATC
836 | 2502,AAA,AAG
837 | 2505,CAA,CAG
838 | 2508,TAT,TAT
839 | 2511,GGT,GGC
840 | 2514,GAT,GAT
841 | 2517,TGC,TGT
842 | 2520,CTT,CTG
843 | 2523,GGT,GGC
844 | 2526,GAT,GAC
845 | 2529,ATT,ATT
846 | 2532,GCT,GCC
847 | 2535,GCT,GCC
848 | 2538,AGA,AGG
849 | 2541,GAC,GAT
850 | 2544,CTC,CTG
851 | 2547,ATT,ATT
852 | 2550,TGT,TGC
853 | 2553,GCA,GCC
854 | 2556,CAA,CAG
855 | 2559,AAG,AAG
856 | 2562,TTT,TTT
857 | 2565,AAC,AAC
858 | 2568,GGC,GGA
859 | 2571,CTT,CTG
860 | 2574,ACT,ACA
861 | 2577,GTT,GTG
862 | 2580,TTG,CTG
863 | 2583,CCA,CCT
864 | 2586,CCT,CCT
865 | 2589,TTG,CTG
866 | 2592,CTC,CTG
867 | 2595,ACA,ACC
868 | 2598,GAT,GAT
869 | 2601,GAA,GAG
870 | 2604,ATG,ATG
871 | 2607,ATT,ATC
872 | 2610,GCT,GCC
873 | 2613,CAA,CAG
874 | 2616,TAC,TAC
875 | 2619,ACT,ACA
876 | 2622,TCT,TCT
877 | 2625,GCA,GCC
878 | 2628,CTG,CTG
879 | 2631,TTA,CTG
880 | 2634,GCG,GCC
881 | 2637,GGT,GGC
882 | 2640,ACA,ACA
883 | 2643,ATC,ATC
884 | 2646,ACT,ACA
885 | 2649,TCT,AGC
886 | 2652,GGT,GGC
887 | 2655,TGG,TGG
888 | 2658,ACC,ACA
889 | 2661,TTT,TTT
890 | 2664,GGT,GGA
891 | 2667,GCA,GCA
892 | 2670,GGT,GGC
893 | 2673,GCT,GCC
894 | 2676,GCA,GCT
895 | 2679,TTA,CTG
896 | 2682,CAA,CAG
897 | 2685,ATA,ATC
898 | 2688,CCA,CCC
899 | 2691,TTT,TTT
900 | 2694,GCT,GCT
901 | 2697,ATG,ATG
902 | 2700,CAA,CAG
903 | 2703,ATG,ATG
904 | 2706,GCT,GCC
905 | 2709,TAT,TAC
906 | 2712,AGG,CGG
907 | 2715,TTT,TTC
908 | 2718,AAT,AAC
909 | 2721,GGT,GGC
910 | 2724,ATT,ATC
911 | 2727,GGA,GGA
912 | 2730,GTT,GTG
913 | 2733,ACA,ACC
914 | 2736,CAG,CAG
915 | 2739,AAT,AAT
916 | 2742,GTT,GTG
917 | 2745,CTC,CTG
918 | 2748,TAT,TAC
919 | 2751,GAG,GAG
920 | 2754,AAC,AAC
921 | 2757,CAA,CAG
922 | 2760,AAA,AAG
923 | 2763,TTG,CTG
924 | 2766,ATT,ATC
925 | 2769,GCC,GCC
926 | 2772,AAC,AAC
927 | 2775,CAA,CAG
928 | 2778,TTT,TTC
929 | 2781,AAT,AAC
930 | 2784,AGT,AGC
931 | 2787,GCT,GCC
932 | 2790,ATT,ATC
933 | 2793,GGC,GGC
934 | 2796,AAA,AAG
935 | 2799,ATT,ATC
936 | 2802,CAA,CAG
937 | 2805,GAC,GAC
938 | 2808,TCA,AGC
939 | 2811,CTT,CTG
940 | 2814,TCT,AGC
941 | 2817,TCC,AGC
942 | 2820,ACA,ACA
943 | 2823,GCA,GCA
944 | 2826,AGT,AGC
945 | 2829,GCA,GCC
946 | 2832,CTT,CTG
947 | 2835,GGA,GGA
948 | 2838,AAA,AAG
949 | 2841,CTT,CTG
950 | 2844,CAA,CAG
951 | 2847,GAT,GAC
952 | 2850,GTG,GTG
953 | 2853,GTC,GTC
954 | 2856,AAC,AAC
955 | 2859,CAA,CAG
956 | 2862,AAT,AAT
957 | 2865,GCA,GCC
958 | 2868,CAA,CAG
959 | 2871,GCT,GCA
960 | 2874,TTA,CTG
961 | 2877,AAC,AAC
962 | 2880,ACG,ACC
963 | 2883,CTT,CTG
964 | 2886,GTT,GTC
965 | 2889,AAA,AAG
966 | 2892,CAA,CAG
967 | 2895,CTT,CTG
968 | 2898,AGC,TCC
969 | 2901,TCC,TCC
970 | 2904,AAT,AAC
971 | 2907,TTT,TTC
972 | 2910,GGT,GGC
973 | 2913,GCA,GCC
974 | 2916,ATT,ATC
975 | 2919,TCA,AGC
976 | 2922,AGT,TCT
977 | 2925,GTT,GTG
978 | 2928,TTA,CTG
979 | 2931,AAT,AAC
980 | 2934,GAT,GAT
981 | 2937,ATC,ATC
982 | 2940,CTT,CTG
983 | 2943,TCA,AGC
984 | 2946,CGT,AGA
985 | 2949,CTT,CTG
986 | 2952,GAC,GAC
987 | 2955,AAA,CCT
988 | 2958,GTT,CCT
989 | 2961,GAG,GAG
990 | 2964,GCT,GCC
991 | 2967,GAA,GAG
992 | 2970,GTG,GTG
993 | 2973,CAA,CAG
994 | 2976,ATT,ATC
995 | 2979,GAT,GAC
996 | 2982,AGG,AGA
997 | 2985,TTG,CTG
998 | 2988,ATC,ATC
999 | 2991,ACA,ACA
1000 | 2994,GGC,GGC
1001 | 2997,AGA,AGA
1002 | 3000,CTT,CTG
1003 | 3003,CAA,CAG
1004 | 3006,AGT,AGC
1005 | 3009,TTG,CTC
1006 | 3012,CAG,CAG
1007 | 3015,ACA,ACA
1008 | 3018,TAT,TAC
1009 | 3021,GTG,GTG
1010 | 3024,ACT,ACC
1011 | 3027,CAA,CAG
1012 | 3030,CAA,CAG
1013 | 3033,TTA,CTG
1014 | 3036,ATT,ATC
1015 | 3039,AGA,AGA
1016 | 3042,GCT,GCC
1017 | 3045,GCA,GCC
1018 | 3048,GAA,GAG
1019 | 3051,ATC,ATT
1020 | 3054,AGA,AGA
1021 | 3057,GCT,GCC
1022 | 3060,TCT,TCT
1023 | 3063,GCT,GCC
1024 | 3066,AAT,AAT
1025 | 3069,CTT,CTG
1026 | 3072,GCT,GCC
1027 | 3075,GCT,GCC
1028 | 3078,ACT,ACC
1029 | 3081,AAA,AAG
1030 | 3084,ATG,ATG
1031 | 3087,TCA,TCT
1032 | 3090,GAG,GAG
1033 | 3093,TGT,TGT
1034 | 3096,GTA,GTG
1035 | 3099,CTT,CTG
1036 | 3102,GGA,GGC
1037 | 3105,CAA,CAG
1038 | 3108,TCA,AGC
1039 | 3111,AAA,AAG
1040 | 3114,AGA,AGA
1041 | 3117,GTT,GTG
1042 | 3120,GAT,GAC
1043 | 3123,TTT,TTT
1044 | 3126,TGT,TGC
1045 | 3129,GGA,GGC
1046 | 3132,AAG,AAG
1047 | 3135,GGC,GGC
1048 | 3138,TAT,TAC
1049 | 3141,CAT,CAC
1050 | 3144,CTT,CTG
1051 | 3147,ATG,ATG
1052 | 3150,TCC,AGC
1053 | 3153,TTC,TTC
1054 | 3156,CCT,CCT
1055 | 3159,CAG,CAG
1056 | 3162,TCA,TCT
1057 | 3165,GCA,GCC
1058 | 3168,CCT,CCT
1059 | 3171,CAT,CAC
1060 | 3174,GGT,GGC
1061 | 3177,GTA,GTG
1062 | 3180,GTC,GTG
1063 | 3183,TTC,TTT
1064 | 3186,TTG,CTG
1065 | 3189,CAT,CAC
1066 | 3192,GTG,GTG
1067 | 3195,ACT,ACA
1068 | 3198,TAT,TAT
1069 | 3201,GTC,GTG
1070 | 3204,CCT,CCC
1071 | 3207,GCA,GCT
1072 | 3210,CAA,CAA
1073 | 3213,GAA,GAG
1074 | 3216,AAG,AAG
1075 | 3219,AAC,AAT
1076 | 3222,TTC,TTC
1077 | 3225,ACA,ACC
1078 | 3228,ACT,ACC
1079 | 3231,GCT,GCT
1080 | 3234,CCT,CCA
1081 | 3237,GCC,GCC
1082 | 3240,ATT,ATC
1083 | 3243,TGT,TGC
1084 | 3246,CAT,CAC
1085 | 3249,GAT,GAC
1086 | 3252,GGA,GGC
1087 | 3255,AAA,AAA
1088 | 3258,GCA,GCC
1089 | 3261,CAC,CAC
1090 | 3264,TTT,TTT
1091 | 3267,CCT,CCT
1092 | 3270,CGT,AGA
1093 | 3273,GAA,GAA
1094 | 3276,GGT,GGC
1095 | 3279,GTC,GTG
1096 | 3282,TTT,TTC
1097 | 3285,GTT,GTG
1098 | 3288,TCA,TCC
1099 | 3291,AAT,AAC
1100 | 3294,GGC,GGC
1101 | 3297,ACA,ACC
1102 | 3300,CAC,CAT
1103 | 3303,TGG,TGG
1104 | 3306,TTT,TTC
1105 | 3309,GTA,GTG
1106 | 3312,ACA,ACA
1107 | 3315,CAA,CAG
1108 | 3318,AGG,CGG
1109 | 3321,AAT,AAC
1110 | 3324,TTT,TTC
1111 | 3327,TAT,TAC
1112 | 3330,GAA,GAG
1113 | 3333,CCA,CCC
1114 | 3336,CAA,CAG
1115 | 3339,ATC,ATC
1116 | 3342,ATT,ATC
1117 | 3345,ACT,ACC
1118 | 3348,ACA,ACC
1119 | 3351,GAC,GAC
1120 | 3354,AAC,AAC
1121 | 3357,ACA,ACC
1122 | 3360,TTT,TTC
1123 | 3363,GTG,GTG
1124 | 3366,TCT,TCT
1125 | 3369,GGT,GGC
1126 | 3372,AAC,AAC
1127 | 3375,TGT,TGC
1128 | 3378,GAT,GAC
1129 | 3381,GTT,GTC
1130 | 3384,GTA,GTG
1131 | 3387,ATA,ATC
1132 | 3390,GGA,GGC
1133 | 3393,ATT,ATT
1134 | 3396,GTC,GTG
1135 | 3399,AAC,AAC
1136 | 3402,AAC,AAT
1137 | 3405,ACA,ACC
1138 | 3408,GTT,GTG
1139 | 3411,TAT,TAC
1140 | 3414,GAT,GAC
1141 | 3417,CCT,CCT
1142 | 3420,TTG,CTG
1143 | 3423,CAA,CAG
1144 | 3426,CCT,CCC
1145 | 3429,GAA,GAG
1146 | 3432,TTA,CTG
1147 | 3435,GAC,GAC
1148 | 3438,TCA,AGC
1149 | 3441,TTC,TTC
1150 | 3444,AAG,AAA
1151 | 3447,GAG,GAG
1152 | 3450,GAG,GAA
1153 | 3453,TTA,CTG
1154 | 3456,GAT,GAC
1155 | 3459,AAA,AAG
1156 | 3462,TAT,TAC
1157 | 3465,TTT,TTT
1158 | 3468,AAG,AAG
1159 | 3471,AAT,AAC
1160 | 3474,CAT,CAC
1161 | 3477,ACA,ACA
1162 | 3480,TCA,AGC
1163 | 3483,CCA,CCC
1164 | 3486,GAT,GAC
1165 | 3489,GTT,GTG
1166 | 3492,GAT,GAC
1167 | 3495,TTA,CTG
1168 | 3498,GGT,GGC
1169 | 3501,GAC,GAT
1170 | 3504,ATC,ATC
1171 | 3507,TCT,AGC
1172 | 3510,GGC,GGA
1173 | 3513,ATT,ATC
1174 | 3516,AAT,AAT
1175 | 3519,GCT,GCC
1176 | 3522,TCA,AGC
1177 | 3525,GTT,GTC
1178 | 3528,GTA,GTG
1179 | 3531,AAC,AAC
1180 | 3534,ATT,ATC
1181 | 3537,CAA,CAG
1182 | 3540,AAA,AAA
1183 | 3543,GAA,GAG
1184 | 3546,ATT,ATC
1185 | 3549,GAC,GAC
1186 | 3552,CGC,CGG
1187 | 3555,CTC,CTG
1188 | 3558,AAT,AAC
1189 | 3561,GAG,GAG
1190 | 3564,GTT,GTG
1191 | 3567,GCC,GCC
1192 | 3570,AAG,AAG
1193 | 3573,AAT,AAT
1194 | 3576,TTA,CTG
1195 | 3579,AAT,AAC
1196 | 3582,GAA,GAG
1197 | 3585,TCT,AGC
1198 | 3588,CTC,CTG
1199 | 3591,ATC,ATC
1200 | 3594,GAT,GAC
1201 | 3597,CTC,CTG
1202 | 3600,CAA,CAA
1203 | 3603,GAA,GAA
1204 | 3606,CTT,CTG
1205 | 3609,GGA,GGG
1206 | 3612,AAG,AAG
1207 | 3615,TAT,TAC
1208 | 3618,GAG,GAG
1209 | 3621,CAG,CAG
1210 | 3624,TAT,TAC
1211 | 3627,ATA,ATC
1212 | 3630,AAA,AAG
1213 | 3633,TGG,TGG
1214 | 3636,CCA,CCC
1215 | 3639,TGG,TGG
1216 | 3642,TAC,TAC
1217 | 3645,ATT,ATC
1218 | 3648,TGG,TGG
1219 | 3651,CTA,CTG
1220 | 3654,GGT,GGC
1221 | 3657,TTT,TTT
1222 | 3660,ATA,ATC
1223 | 3663,GCT,GCC
1224 | 3666,GGC,GGA
1225 | 3669,TTG,CTG
1226 | 3672,ATT,ATT
1227 | 3675,GCC,GCC
1228 | 3678,ATA,ATC
1229 | 3681,GTA,GTG
1230 | 3684,ATG,ATG
1231 | 3687,GTG,GTC
1232 | 3690,ACA,ACA
1233 | 3693,ATT,ATC
1234 | 3696,ATG,ATG
1235 | 3699,CTT,CTG
1236 | 3702,TGC,TGT
1237 | 3705,TGT,TGC
1238 | 3708,ATG,ATG
1239 | 3711,ACC,ACC
1240 | 3714,AGT,AGC
1241 | 3717,TGC,TGC
1242 | 3720,TGT,TGT
1243 | 3723,AGT,AGC
1244 | 3726,TGT,TGC
1245 | 3729,CTC,CTG
1246 | 3732,AAG,AAG
1247 | 3735,GGC,GGC
1248 | 3738,TGT,TGT
1249 | 3741,TGT,TGT
1250 | 3744,TCT,AGC
1251 | 3747,TGT,TGT
1252 | 3750,GGA,GGC
1253 | 3753,TCC,AGC
1254 | 3756,TGC,TGC
1255 | 3759,TGC,TGC
1256 | 3762,AAA,AAG
1257 | 3765,TTT,TTC
1258 | 3768,GAT,GAC
1259 | 3771,GAA,GAG
1260 | 3774,GAC,GAC
1261 | 3777,GAC,GAT
1262 | 3780,TCT,TCT
1263 | 3783,GAG,GAG
1264 | 3786,CCA,CCC
1265 | 3789,GTG,GTG
1266 | 3792,CTC,CTG
1267 | 3795,AAA,AAG
1268 | 3798,GGA,GGC
1269 | 3801,GTC,GTG
1270 | 3804,AAA,AAA
1271 | 3807,TTA,CTG
1272 | 3810,CAT,CAC
1273 | 3813,TAC,TAC
1274 | 3816,ACA,ACA
1275 | 3819,TAA,TGA
1276 |
--------------------------------------------------------------------------------
/reverse-engineering-source-code-of-the-biontech-pfizer-vaccine.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Reverse Engineering the source code of the BioNTech/Pfizer SARS-CoV-2 Vaccine"
3 | date: 2020-12-25T20:12:20+01:00
4 | draft: false
5 | images:
6 | - bnt162b2.png
7 | ---
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 | **Translations**: [ελληνικά](https://berthub.eu/articles/posts/greek-reverse-engineering-source-code-of-the-biontech-pfizer-vaccine/)
16 | / [عربى](https://docs.google.com/document/d/17IEvUBHZnx-Yf-sPoGzih_pAr4eemBmXUplOd0WtWk4/edit)
17 | / [中文](https://mp.weixin.qq.com/s/b0Mw8uKLYuXHJ5Bj3t2Dwg) ([Weixin video](https://mp.weixin.qq.com/s/3z3L0ZtI_JcdlXLB_ZH4lQ), [Youtube video](https://www.youtube.com/watch?v=G75j4qKexN0&feature=youtu.be))
18 | / [粵文](https://medium.com/@it9gamelog/reverse-engineering-biontech-pfizer-bnt162b2-2ce758508fb4)
19 | / [bahasa Indonesia](https://berthub.eu/articles/posts/merekayasa-balik-kode-sumber-vaksin-sars-cov-2-biontech-pfizer/)
20 | / [český](https://benedikz.space/articles/reverse-engineering-zdrojoveho-kodu-vakciny-biontech-pfizer.html)
21 | / [Català](https://www.webscatalunya.com/blog-disseny-web/programacio/enginyeria-inversa-del-codi-font-de-la-vacuna-de-biontech-pfizer-per-la-sars-cov-2/)
22 | / [český](https://benedikz.space/articles/reverse-engineering-zdrojoveho-kodu-vakciny-biontech-pfizer.html)
23 | / [Deutsch](https://berthub.eu/articles/posts/german-reverse-engineering-source-code-of-the-biontech-pfizer-vaccine/)
24 | / [Español](https://berthub.eu/articles/posts/ingenieria_inversa_del_codigo_fuente_de_la_vacuna_de_biontech_pfizer_para_el_sars-cov-2/)
25 | / [2فارسی](https://virgool.io/@afeizi/%D9%85%D9%87%D9%86%D8%AF%D8%B3%DB%8C-%D9%85%D8%B9%DA%A9%D9%88%D8%B3-%DA%A9%D9%8F%D8%AF-%D9%85%D9%86%D8%A8%D8%B9%D9%8D-%D9%88%D8%A7%DA%A9%D8%B3%D9%86-%D9%85%D8%B4%D8%AA%D8%B1%DA%A9-%D8%A8%DB%8C%D9%88%D8%A7%D9%86%D8%AA%DA%A9-%D9%88-%D9%BE%DB%8C-%D9%81%D8%A7%DB%8C%D8%B2%D8%B1-yk6ti7m1aabg)
26 | / [فارسی](https://docs.google.com/document/d/1zNoxsxP-vW5Odv6QZUZYJ4FxAFK1EktQnjwQ5AuCTzc/edit)
27 | / [Français](https://renaudguerin.net/posts/explorons-le-code-source-du-vaccin-biontech-pfizer-sars-cov-2/)
28 | / [עִברִית](https://github.com/chilik/Hebrew-ReversingSARS-CoV-2mRNAVaccine/blob/main/%D7%94%D7%A0%D7%93%D7%A1%D7%94%20%D7%9C%D7%90%D7%97%D7%95%D7%A8%20%D7%A9%D7%9C%20%D7%A7%D7%95%D7%93%20%D7%94%D7%9E%D7%A7%D7%95%D7%A8%20%D7%A9%D7%9C%20%D7%94%D7%97%D7%99%D7%A1%D7%95%D7%9F%20BioNTech%20-%20Pfizer%20SARS-CoV-2.pdf)
29 | / [Hrvatski](https://docs.google.com/document/d/1BODRitAvGuDYGZCHU5LY-AkNhs9_1cVDubdRvz-cSPY/edit)
30 | / [Italiano](https://berthub.eu/articles/posts/italian-reverse-engineering-source-code-of-the-biontech-pfizer-vaccine/)
31 | / [Magyar](https://www.covid1001.hu/vakcina-forraskod-visszafejtese/)
32 | / [Nederlands](https://berthub.eu/articles/posts/dutch-reverse-engineering-source-code-of-the-biontech-pfizer-vaccine/)
33 | / [日本語](https://note.com/yubais/n/n349ab986da42)
34 | / [日本語 2](https://msakai.github.io/bnt162b2/reverse-engineering-source-code-of-the-biontech-pfizer-vaccine.ja/)
35 | / [नेपाली](https://onedrive.live.com/view.aspx?resid=9C571BA15BC4287D!15298&ithint=file%2cdocx&authkey=!ALATa2b8xetI7lQ)
36 | / [Polskie](https://randomseed.pl/rna/reverse-engineering-kodu-zrodlowego-szczepionki-biontech-pfizer-covid-sars-cov-2/)
37 | / [русский](https://localcrew.ru/reversepfizer)
38 | / [Português](https://docs.google.com/document/d/1pDo40DXcpXjzqAUfhFfup50-IQ2Qct-mhLnmRpjFZWM/edit)
39 | / [Română](https://www.astarostech.com/read/sarscov2-ro/vaccine-mrna.html)
40 | / [Slovensky](https://dennikn.sk/blog/2205850/ako-funguje-zdrojovy-kod-vakciny-sars-cov-2/)
41 | / [Slovenščina](https://berthub.eu/articles/posts/reverzni-inženiring-izvorne-kode-cepiva-biontech-pfizer-proti-sars-cov-2/)
42 | / [Srpski](https://berthub.eu/articles/posts/naliza-izvornog-k%C3%B4da-vakcine-biontech-pfizer-sars-cov-2/)
43 | / [Türk](https://berthub.eu/articles/posts/biontech-pfizer-mrna-a%C5%9F%C4%B1lar%C4%B1n%C4%B1n-kaynak-kodu/)
44 | / [український](https://texty.org.ua/articles/102631/rekonstrukciya-vyhidnoho-kodu-vakcyny-biontechpfizer-sars-cov-2/)
45 | / [Markdown for translating](https://raw.githubusercontent.com/berthubert/bnt162b2/master/reverse-engineering-source-code-of-the-biontech-pfizer-vaccine.md)
46 | / [**Fun video by LlamaExplains**](https://www.youtube.com/watch?v=RntuQ_BULho&feature=youtu.be)
47 | / [Video version by Giff Ransom](https://www.youtube.com/watch?v=JfwlKMZrY0U&feature=emb_logo)
48 |
49 | Welcome! In this post, we'll be taking a character-by-character look at the
50 | source code of the BioNTech/Pfizer SARS-CoV-2 mRNA vaccine.
51 |
52 | > Update: The other up and coming vaccines are described in
53 | > [The Genetic Code and Proteins of the Other Covid-19
54 | > Vaccines](https://berthub.eu/articles/posts/genetic-code-of-covid-19-vaccines/).
55 |
56 | > *I want to thank the large cast of people who spent time previewing this
57 | > article for legibility and correctness. All mistakes remain mine though,
58 | > but I would love to hear about them quickly at bert@hubertnet.nl or
59 | > [@PowerDNS_Bert](https://twitter.com/PowerDNS_Bert)*
60 |
61 | Now, these words may be somewhat jarring - the vaccine is a liquid that gets
62 | injected in your arm. How can we talk about source code?
63 |
64 | This is a good question, so let's start off with a small part of the very
65 | source code of the BioNTech/Pfizer vaccine, also known as
66 | [BNT162b2](https://en.wikipedia.org/wiki/Tozinameran), also
67 | known as Tozinameran [also known as
68 | Comirnaty](https://twitter.com/PowerDNS_Bert/status/1342109138965422083).
69 |
70 |
71 | {{< figure src="/articles/bnt162b2.png" caption="First 500 characters of the BNT162b2 mRNA. Source: [World Health Organization](/articles/11889.doc)">}}
72 |
73 |
74 | The BNT162b2 mRNA vaccine has this digital code at its heart. It is 4284
75 | characters long, so it would fit in a bunch of tweets. At the very
76 | beginning of the vaccine production process, someone uploaded this code to a
77 | DNA printer (yes), which then converted the bytes on disk to actual DNA
78 | molecules.
79 |
80 |
81 | {{< figure src="/articles/kilobaser.jpg" caption="A [Kilobaser](https://kilobaser.com/) Express DNA Machine" >}}
82 |
83 |
84 | Out of such a machine come tiny amounts of DNA, which after a lot of
85 | biological and chemical processing end up as RNA (more about which later) in
86 | the vaccine vial. A 30 microgram dose turns out to actually contain 30
87 | micrograms of RNA. In addition, there is a clever lipid (fatty) packaging
88 | system that gets the mRNA into our cells.
89 |
90 | > Update: Derek Lowe of the famous [In the pipeline blog](https://blogs.sciencemag.org/pipeline/)
91 | > over at Science has written a comprehensive post "[RNA Vaccines And Their
92 | > Lipids](https://blogs.sciencemag.org/pipeline/archives/2021/01/11/rna-vaccines-and-their-lipids)"
93 | > which neatly explains the lipid and delivery parts of the vaccines that I
94 | > am not competent to describe. Luckily Derek is!
95 |
96 | > Update 2:
97 | > Jonas Neubert and Cornelia Scheitz have written [this awesome page](https://blog.jonasneubert.com/2021/01/10/exploring-the-supply-chain-of-the-pfizer-biontech-and-moderna-covid-19-vaccines/)
98 | > with loads of detail on how the vaccines actually get produced and
99 | > distributed. Recommended!
100 |
101 | RNA is the volatile 'working memory' version of DNA. DNA is like the flash
102 | drive storage of biology. DNA is very durable, internally redundant and
103 | very reliable. But much like computers do not execute code directly from a
104 | flash drive, before something happens, code gets copied to a faster,
105 | more versatile yet far more fragile system.
106 |
107 | For computers, this is RAM, for biology it is RNA. The resemblance is
108 | striking. Unlike flash memory, RAM degrades very quickly unless lovingly
109 | tended to. The reason the Pfizer/BioNTech mRNA vaccine must be stored in the
110 | deepest of deep freezers is the same: RNA is a fragile flower.
111 |
112 | Each RNA character weighs on the order of 0.53·10⁻²¹ grams, meaning
113 | there are around 6·10¹⁶ characters in a single 30 microgram vaccine dose.
114 | Expressed in bytes, this is around 14 petabytes, although it must be said
115 | this consists of around [13,000 billion
116 | repetitions](https://docs.google.com/spreadsheets/d/1vc6p9IXQVRpVQntcI1tCdSMLNDuT8fl8rags0gDxMZA/edit?usp=sharing) of the same 4284
117 | characters. The actual informational content of the vaccine is just over a
118 | kilobyte. [SARS-CoV-2 itself](https://www.ncbi.nlm.nih.gov/projects/sviewer/?id=NC_045512&tracks=[key:sequence_track,name:Sequence,display_name:Sequence,id:STD649220238,annots:Sequence,ShowLabel:false,ColorGaps:false,shown:true,order:1][key:gene_model_track,name:Genes,display_name:Genes,id:STD3194982005,annots:Unnamed,Options:ShowAllButGenes,CDSProductFeats:true,NtRuler:true,AaRuler:true,HighlightMode:2,ShowLabel:true,shown:true,order:9]&v=1:29903&c=null&select=null&slim=0) weighs in at around 7.5 kilobytes.
119 |
120 | > Update: In the original post these numbers were off. [Here is a
121 | > spreadsheet](https://docs.google.com/spreadsheets/d/1vc6p9IXQVRpVQntcI1tCdSMLNDuT8fl8rags0gDxMZA/edit?usp=sharing)
122 | > with the correct calculations.
123 |
124 | The briefest bit of background
125 | ------------------------------
126 | DNA is a digital code. Unlike computers, which use 0 and 1, life uses A, C, G
127 | and U/T (the 'nucleotides', 'nucleosides' or 'bases').
128 |
129 | In computers we store the 0 and 1 as the presence or absence of a charge, or
130 | as a current, as a magnetic transition, or as a voltage, or as a modulation
131 | of a signal, or as a change in reflectivity. Or in short, the 0 and 1 are
132 | not some kind of abstract concept - they live as electrons and in many other
133 | physical embodiments.
134 |
135 | In nature, A, C, G and U/T are molecules, stored as chains in DNA (or RNA).
136 |
137 | In computers, we group 8 bits into a byte, and the byte is the typical unit
138 | of data being processed.
139 |
140 | Nature groups 3 nucleotides into a codon, and this codon is the typical unit
141 | of processing. A codon contains 6 bits of information (2 bits per DNA
142 | character, 3 characters = 6 bits. This means 2⁶ = 64 different codon values).
143 |
144 | Pretty digital so far. When in doubt, [head to the WHO
145 | document](/articles/11889.doc) with the
146 | digital code to see for yourself.
147 |
148 | > *Some further reading is [available
149 | > here](https://berthub.eu/articles/posts/what-is-life/) - this link ('What
150 | > is life') might help make sense of the rest of this page. Or, if you like
151 | > video, I have [two hours for you](https://berthub.eu/dna).*
152 |
153 | So what does that code DO?
154 | --------------------------
155 | The idea of a vaccine is to teach our immune system how to fight a pathogen,
156 | without us actually getting ill. Historically this has been done by
157 | injecting a weakened or incapacitated (attenuated) virus, plus an 'adjuvant'
158 | to scare our immune system into action. This was a decidedly analogue
159 | technique involving billions of eggs (or insects). It also required a lot
160 | of luck and loads of time. Sometimes a different (unrelated) virus was also
161 | used.
162 |
163 | An mRNA vaccine achieves the same thing ('educate our immune system') but in
164 | a laser like way. And I mean this in both senses - very narrow but also
165 | very powerful.
166 |
167 | So here is how it works. The injection contains volatile genetic material
168 | that describes the famous SARS-CoV-2 'Spike' protein. Through clever
169 | chemical means, the vaccine manages to get this genetic material into some of
170 | our cells.
171 |
172 | These then dutifully start producing SARS-CoV-2 Spike proteins in large
173 | enough quantities that our immune system springs into action. Confronted
174 | with Spike proteins, and (importantly) tell-tale signs that cells have been
175 | taken over, our immune system develops a powerful response against multiple
176 | aspects of the Spike protein AND the production process.
177 |
178 | And this is what gets us to the 95% efficient vaccine.
179 |
180 | The source code!
181 | ----------------
182 | [Let's start at the very beginning, a very good place
183 | to start](https://youtu.be/jp0opnxQ4rY?t=8). The WHO document has this
184 | helpful picture:
185 |
186 |
187 | {{< figure src="/articles/vaccine-toc.png" >}}
188 |
189 |
190 | This is a sort of table of contents. We'll start with the 'cap', actually
191 | depicted as a little hat.
192 |
193 | Much like you can't just plonk opcodes in a file on a computer and run it,
194 | the biological operating system requires headers, has linkers and things
195 | like calling conventions.
196 |
197 | The code of the vaccine starts with the following two nucleotides:
198 |
199 | ```
200 | GA
201 | ```
202 |
203 | This can be compared very much to every [DOS and Windows executable starting
204 | with MZ](https://en.wikipedia.org/wiki/DOS_MZ_executable), or UNIX scripts starting with
205 | [`#!`](https://en.wikipedia.org/wiki/Shebang_(Unix)). In both life and
206 | operating systems, these two characters are not executed in any way. But
207 | they have to be there because otherwise nothing happens.
208 |
209 | The mRNA 'cap' [has a number of
210 | functions](https://en.wikipedia.org/wiki/Five-prime_cap#Function). For one, it marks code as coming
211 | from the nucleus. In our case of course it doesn't, our code comes from a
212 | vaccination. But we don't need to tell the cell that. The cap makes our code
213 | look legit, which protects it from destruction.
214 |
215 | The initial two `GA` nucleotides are also chemically slightly different from
216 | the rest of the RNA. In this sense, the `GA` has some out-of-band
217 | signaling on it.
218 |
219 | The "five-prime untranslated region"
220 | ------------------------------------
221 | Some lingo here. RNA molecules can only be read in one direction.
222 | Confusingly, the part where the reading begins is called the 5' or
223 | 'five-prime'. The reading stops at the 3' or three-prime end.
224 |
225 | Life consists of proteins (or things made by proteins). And these proteins
226 | are described in RNA. When RNA gets converted into proteins, this is called
227 | translation.
228 |
229 | Here we have the 5' untranslated region ('UTR'), so this bit does not end up
230 | in the protein:
231 |
232 | ```
233 | GAAΨAAACΨAGΨAΨΨCΨΨCΨGGΨCCCCACAGACΨCAGAGAGAACCCGCCACC
234 | ```
235 |
236 | Here we encounter our first surprise. The normal RNA characters are A, C, G
237 | and U. U is also known as 'T' in DNA. But here we find a Ψ, what is going
238 | on?
239 |
240 | This is one of the exceptionally clever bits about the vaccine. Our body
241 | runs a powerful antivirus system ("the original one"). For this reason,
242 | cells are extremely unenthusiastic about foreign RNA and try very hard to
243 | destroy it before it does anything.
244 |
245 | This is somewhat of a problem for our vaccine - it needs to sneak past our
246 | immune system. Over many years of experimentation, it was found that if the
247 | U in RNA is replaced by a slightly modified molecule, our immune system
248 | loses interest. For real.
249 |
250 | So in the BioNTech/Pfizer vaccine, every U has been replaced by
251 | 1-methyl-3'-pseudouridylyl, denoted by Ψ. The really clever bit is that
252 | [although this replacement Ψ placates (calms) our immune
253 | system](https://pubmed.ncbi.nlm.nih.gov/16111635/), it is
254 | accepted as a normal U by relevant parts of the cell.
255 |
256 | In computer security we also know this trick - it sometimes is possible to
257 | transmit a slightly corrupted version of a message that confuses firewalls and
258 | security solutions, but that is still accepted by the backend servers -
259 | which can then get hacked.
260 |
261 | We are now reaping the benefits of fundamental scientific research performed
262 | in the past. The
263 | [discoverers](https://twitter.com/PennMedicine/status/1341766354232365059)
264 | of this Ψ technique had to fight to get
265 | [their](https://www.statnews.com/2020/11/10/the-story-of-mrna-how-a-once-dismissed-idea-became-a-leading-technology-in-the-covid-vaccine-race/)
266 | work funded and then accepted. We should all be very grateful, and I am sure
267 | the [Nobel prizes will arrive in due
268 | course](https://twitter.com/PowerDNS_Bert/status/1329861047168225281).
269 |
270 | > Many people have asked, could viruses also use the Ψ technique to beat our
271 | > immune systems? In short, this is extremely unlikely. Life simply does
272 | > not have the machinery to build 1-methyl-3'-pseudouridylyl nucleotides.
273 | > Viruses rely on the machinery of life to reproduce themselves, and this
274 | > facility is simply not there. The mRNA vaccines quickly degrade in the
275 | > human body, and there is no possibility of the Ψ-modified RNA
276 | > replicating with the Ψ still in there. "[No, Really, mRNA Vaccines Are Not Going To Affect Your
277 | > DNA](https://www.deplatformdisease.com/blog/no-really-mrna-vaccines-are-not-going-to-affect-your-dna)"
278 | > is also a good read.
279 |
280 | Ok, back to the 5' UTR. What do these 52 characters do? As everything in
281 | nature, almost nothing has one clear function.
282 |
283 | When our cells need to *translate* RNA into proteins, this is done using a
284 | machine called the ribosome. The ribosome is like a 3D printer for
285 | proteins. It ingests a strand of RNA and based on that it emits a string of
286 | amino acids, which then fold into a protein.
287 |
288 |
289 |
290 |
291 |
292 |
293 | Source: [Wikipedia user Bensaccount](https://commons.wikimedia.org/wiki/File:Protein_translation.gif)
294 |
295 |
296 |
297 | This is what we see happening above. The black ribbon at the bottom is RNA.
298 | The ribbon appearing in the green bit is the protein being formed. The
299 | things flying in and out are amino acids plus adaptors to make them fit on
300 | RNA.
301 |
302 | This ribosome needs to physically sit on the RNA strand for it to get to
303 | work. Once seated, it can start forming proteins based on further RNA it
304 | ingests. From this, you can imagine that it can't yet read the parts where
305 | it lands on first. This is just one of the functions of the UTR: the
306 | ribosome landing zone. The UTR provides 'lead-in'.
307 |
308 | In addition to this, the UTR also contains metadata: when should translation
309 | happen? And how much? For the vaccine, they took the most 'right now' UTR
310 | they could find, taken from the [alpha globin
311 | gene](https://www.tandfonline.com/doi/full/10.1080/15476286.2018.1450054).
312 | This gene is known to robustly produce a lot of proteins. In previous
313 | years, scientists had already found ways to optimize this UTR even further
314 | (according to the WHO document), so this is not quite the alpha globin UTR.
315 | It is better.
316 |
317 | The S glycoprotein signal peptide
318 | ---------------------------------
319 | As noted, the goal of the vaccine is to get the cell to produce copious
320 | amounts of the Spike protein of SARS-CoV-2. Up to this point, we have mostly
321 | encountered metadata and "calling convention" stuff in the vaccine source
322 | code. But now we enter the actual viral protein territory.
323 |
324 | We still have one layer of metadata to go however. Once the ribosome (from the
325 | splendid animation above) has made a protein, that protein still needs to go
326 | somewhere. This is encoded in the "S glycoprotein signal peptide (extended leader
327 | sequence)".
328 |
329 | The way to see this is that at the beginning of the protein there is a sort
330 | of address label - encoded as part of the protein itself. In this specific
331 | case, the signal peptide says that this protein should exit the cell via the
332 | "endoplasmic reticulum". Even Star Trek lingo is not as fancy as this!
333 |
334 | The "signal peptide" is not very long, but when we look at the code, there
335 | are differences between the viral and vaccine RNA:
336 |
337 | (Note that for comparison purposes, I have replaced the fancy modified Ψ by a
338 | regular RNA U)
339 |
340 | ```
341 | 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
342 | Virus: AUG UUU GUU UUU CUU GUU UUA UUG CCA CUA GUC UCU AGU CAG UGU GUU
343 | Vaccine: AUG UUC GUG UUC CUG GUG CUG CUG CCU CUG GUG UCC AGC CAG UGU GUG
344 | ! ! ! ! ! ! ! ! ! ! ! ! ! !
345 | ```
346 |
347 | So what is going on? I have not accidentally listed the RNA in groups of 3
348 | letters. Three RNA characters make up a codon. And every codon encodes for a
349 | specific amino acid. The signal peptide in the vaccine consists of *exactly*
350 | the same amino acids as in the virus itself.
351 |
352 | So how come the RNA is different?
353 |
354 | There are 4³=64 different codons, since there are 4 RNA characters, and
355 | there are three of them in a codon. Yet there are only 20 different
356 | amino acids. This means that multiple codons encode for the same amino acid.
357 |
358 | Life uses the following nearly universal table for mapping RNA codons to
359 | amino acids:
360 |
361 |
362 | {{< figure src="/articles/rna-codon-table.png" caption="[The RNA codon table](https://en.wikipedia.org/wiki/DNA_and_RNA_codon_tables) (Wikipedia)" >}}
363 |
364 |
365 | In this table, we can see that the modifications in the vaccine (UUU ->
366 | UUC) are all *synonymous*. The vaccine RNA code is different, but the same
367 | amino acids and the same protein come out.
368 |
369 | If we look closely, we see that the majority of the changes happen in the
370 | third codon position, noted with a '3' above. And if we check the universal
371 | codon table, we see that this third position indeed often does not matter
372 | for which amino acid is produced.
373 |
374 | So, the changes are synonymous, but then why are they there? Looking
375 | closely, we see that all changes *except one* lead to more C and Gs.
376 |
377 | So why would you do that? As noted above, our immune system takes a very dim
378 | view of 'exogenous' RNA, RNA code coming from outside the cell. To evade
379 | detection, the 'U' in the RNA was already replaced by a Ψ.
380 |
381 | However, it turns out that RNA with [a higher
382 | amount](https://www.nature.com/articles/nrd.2017.243) of Gs and Cs is
383 | also [converted more efficiently into
384 | proteins](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1463026/),
385 |
386 | And this has been achieved in the vaccine RNA by replacing many characters
387 | with [Gs and Cs](https://www.embopress.org/doi/full/10.15252/embr.201948220) wherever this was possible.
388 |
389 | > I'm slightly fascinated by the *one* change that did not lead to an
390 | > additional C or G, the CCA -> CCU modification. If anyone knows the reason,
391 | > please let me know! Note that I'm aware that some codons are more common
392 | > than others in the human genome, but [I also read that this does not
393 | > influence translation speed a
394 | > lot](https://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1006024).
395 | > UPDATE: A number of readers [have pointed
396 | > out](https://twitter.com/noort_zuit/status/1348924353921081344) that this
397 | > change could prevent a "hairpin" in the RNA. You can try this out yourself on
398 | > the [RNAFold service](http://rna.tbi.univie.ac.at/cgi-bin/RNAWebSuite/RNAfold.cgi).
399 | >
400 | > This [marvelous article by Chelsea Voss
401 | > ](https://csvoss.com/a-mechanists-guide-to-the-coronavirus-genome) goes
402 | > into great depth on the RNA shape and contents of SARS-CoV-2.
403 |
404 | The actual Spike protein
405 | ------------------------
406 | The next 3777 characters of the vaccine RNA are similarly 'codon optimized'
407 | to add a lot of C's and G's. In the interest of space I won't list all
408 | the code here, but we are going to zoom in on one exceptionally special
409 | bit. This is the bit that makes it work, the part that will actually help us
410 | return to life as normal:
411 |
412 | ```
413 | * *
414 | L D K V E A E V Q I D R L I T G
415 | Virus: CUU GAC AAA GUU GAG GCU GAA GUG CAA AUU GAU AGG UUG AUC ACA GGC
416 | Vaccine: CUG GAC CCU CCU GAG GCC GAG GUG CAG AUC GAC AGA CUG AUC ACA GGC
417 | L D P P E A E V Q I D R L I T G
418 | ! !!! !! ! ! ! ! ! ! !
419 | ```
420 |
421 | Here we see the usual synonymous RNA changes. For example, in the first
422 | codon we see that CUU is changed into CUG. This adds another 'G' to the
423 | vaccine, which we know helps enhance protein production. Both CUU
424 | and CUG encode for the amino acid 'L' or Leucine, so nothing changed in the
425 | protein.
426 |
427 | When we compare the entire Spike protein in the vaccine, all changes are
428 | synonymous like this.. except for two, and this is what we see here.
429 |
430 | The third and fourth codons above represent actual changes. The K and V
431 | amino acids there are both replaced by 'P' or Proline. For 'K' this required
432 | three changes ('!!!') and for 'V' it required only two ('!!').
433 |
434 | **It turns out that these two changes enhance the vaccine efficiency
435 | enormously**.
436 |
437 | So what is happening here? If you look at a real SARS-CoV-2 particle, you
438 | can see the Spike protein as, well, a bunch of spikes:
439 |
440 |
441 | {{< figure src="/articles/sars-em.jpg" caption="[SARS virus particles](https://en.wikipedia.org/wiki/Severe_acute_respiratory_syndrome_coronavirus) (Wikipedia)" >}}
442 |
443 |
444 | The spikes are mounted on the virus body ('the nucleocapsid protein'). But
445 | the thing is, our vaccine is only generating the spikes itself, and we're
446 | not mounting them on any kind of virus body.
447 |
448 | It turns out that, unmodified, freestanding Spike proteins collapse into a
449 | different structure. If injected as a vaccine, this would indeed cause our
450 | bodies to develop immunity.. but only against the collapsed spike protein.
451 |
452 | And the real SARS-CoV-2 shows up with the spiky Spike. The vaccine would not
453 | work very well in that case.
454 |
455 | So what to do? In [2017 it was described how putting a double Proline
456 | substitution in just the right
457 | place](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5584442/) would make the
458 | SARS-CoV-1 and MERS
459 | S proteins take up their 'pre-fusion' configuration, even without being part of
460 | the whole virus. This works [because Proline is a very rigid amino
461 | acid](https://cen.acs.org/pharmaceuticals/vaccines/tiny-tweak-behind-COVID-19/98/i38). It
462 | acts as a kind of splint, stabilising the protein in the state we need to
463 | show to the immune system.
464 |
465 | The [people](https://twitter.com/goodwish916) that
466 | [discovered](https://twitter.com/KizzyPhD) this should be walking
467 | around high-fiving themselves incessantly. Unbearable amounts of smugness
468 | should be emanating from them. [And it would all be well
469 | deserved](https://twitter.com/McLellan_Lab/status/1291077489566142464).
470 |
471 | > Update! I have been contacted by the [McLellan
472 | > lab](https://twitter.com/McLellan_Lab/status/1291077489566142464), one of the
473 | > groups behind the Proline discovery. They tell me the high-fiving is
474 | > subdued because of the ongoing pandemic, but they are pleased to have
475 | > contributed to the vaccines. They also stress the importance of many other
476 | > groups, workers and volunteers.
477 |
478 | The end of the protein, next steps
479 | ----------------------------------
480 | If we scroll through the rest of the source code, we encounter some small
481 | modifications at the end of the Spike protein:
482 |
483 | ```
484 | V L K G V K L H Y T s
485 | Virus: GUG CUC AAA GGA GUC AAA UUA CAU UAC ACA UAA
486 | Vaccine: GUG CUG AAG GGC GUG AAA CUG CAC UAC ACA UGA UGA
487 | V L K G V K L H Y T s s
488 | ! ! ! ! ! ! ! !
489 | ```
490 |
491 | At the end of a protein we find a 'stop' codon, denoted here by a lowercase
492 | 's'. This is a polite way of saying that the protein should end here. The
493 | original virus uses the UAA stop codon, the vaccine uses two UGA stop
494 | codons, perhaps just for good measure.
495 |
496 | The 3' Untranslated Region
497 | --------------------------
498 | Much like the ribosome needed some lead-in at the 5' end, where we found the
499 | 'five prime untranslated region', at the end of a protein coding region we find a similar
500 | construct called the 3' UTR.
501 |
502 | Many words could be written about the 3' UTR, but here I quote [what the
503 | Wikipedia
504 | says](https://en.wikipedia.org/wiki/Three_prime_untranslated_region): "The 3'-untranslated region plays a crucial role in gene
505 | expression by influencing the localization, stability, export, and
506 | translation efficiency of an mRNA .. **despite our current understanding of
507 | 3'-UTRs, they are still relative mysteries**".
508 |
509 | What we do know is that certain 3'-UTRs are very successful at promoting
510 | protein expression. According to the WHO document, the BioNTech/Pfizer
511 | vaccine 3'-UTR was picked from "the amino-terminal enhancer of split (AES)
512 | mRNA and the mitochondrial encoded 12S ribosomal RNA to confer RNA stability
513 | and high total protein expression". To which I say, well done.
514 |
515 |
516 | {{< figure src="/articles/vaccine.jpg" >}}
517 |
518 |
519 |
520 | The AAAAAAAAAAAAAAAAAAAAAA end of it all
521 | ----------------------------------------
522 | The very end of mRNA is polyadenylated. This is a fancy way of saying it
523 | ends on a lot of AAAAAAAAAAAAAAAAAAA. Even mRNA has had enough of 2020 it
524 | appears.
525 |
526 | mRNA can be reused many times, but as this happens, it also loses some of
527 | the A's at the end. Once the A's run out, the mRNA is no longer functional
528 | and gets discarded. In this way, the 'poly-A' tail is protection from
529 | degradation.
530 |
531 | Studies have been done to find out what the optimal number of A's at the end
532 | is for mRNA vaccines. I read in the open literature that this peaked at 120
533 | or so.
534 |
535 | The BNT162b2 vaccine ends with:
536 |
537 | ```
538 | ****** ****
539 | UAGCAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAGCAUAU GACUAAAAAA AAAAAAAAAA
540 | AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAA
541 | ```
542 |
543 | This is 30 A's, then a "10 nucleotide linker" (GCAUAUGACU), followed by another 70
544 | A's.
545 |
546 | There are various theories why this linker is there. Some people tell me it
547 | has to do with DNA plasmid stability, I have also received this from an
548 | actual expert:
549 |
550 | "The 10-nucleotide linker within the poly(A) tail makes it easier to stitch
551 | together the synthetic DNA fragments that become the template for transcribing
552 | the mRNA. It also reduces slipping by T7 RNA polymerase so that the
553 | transcribed mRNA is more uniform in length".
554 |
555 | The article "[Segmented poly(A) tails significantly reduce recombination of plasmid DNA without affecting mRNA translation efficiency or
556 | half-life](https://rnajournal.cshlp.org/content/25/4/507.long)" also has a
557 | compelling description of how a linked can benefit efficacy.
558 |
559 | Summarising
560 | -----------
561 | With this, we now know the exact mRNA contents of the BNT162b2 vaccine, and
562 | for most parts we understand why they are there:
563 |
564 | * The CAP to make sure the RNA looks like regular mRNA
565 | * A known successful and optimized 5' untranslated region (UTR)
566 | * A codon optimized signal peptide to send the Spike protein to the right
567 | place (amino acids copied 100% from the original virus)
568 | * A codon optimized version of the original spike, with two 'Proline'
569 | substitutions to make sure the protein appears in the right form
570 | * A known successful and optimized 3' untranslated region
571 | * A poly-A tail with a 'linker' in there
572 |
573 | The codon optimization adds a lot of G and C to the mRNA. Meanwhile, using Ψ
574 | (1-methyl-3'-pseudouridylyl) instead of U helps evade our immune system, so
575 | the mRNA stays around long enough so we can actually help train the immune
576 | system.
577 |
578 | Further reading/viewing
579 | -----------------------
580 | If you like this work, [you can hire
581 | me](https://berthub.eu/articles/posts/hire-me-semi-popular-science/) to
582 | write about your scientific/technical/medical product as well!
583 |
584 | In 2017 I held a two hour presentation on DNA, which you can [view
585 | here](https://berthub.eu/dna). Like this page it is aimed at computer
586 | people.
587 |
588 | In addition, I've been maintaining a page on '[DNA for
589 | programmers](https://berthub.eu/articles/posts/amazing-dna/)' since 2001.
590 |
591 | You might also enjoy [this introduction to our amazing immune
592 | system](https://berthub.eu/articles/posts/immune-system/).
593 |
594 | Finally, [this listing of my blog posts](https://berthub.eu/articles) has quite some
595 | DNA, SARS-CoV-2 and COVID related material.
596 |
597 | As an update, the other up and coming vaccines are described in [The Genetic
598 | Code and Proteins of the Other Covid-19 Vaccines](https://berthub.eu/articles/posts/genetic-code-of-covid-19-vaccines/)
599 |
600 | As a further update, there is now also a post [describing the CureVac mRNA
601 | vaccine](https://berthub.eu/articles/posts/curevac-vaccine-and-wonders-of-biology/).
602 | The CureVac vaccine consists of mRNA that has not been modified, but instead
603 | has taken a leaf out of other parts of biology in hopes of making things
604 | work, and the post touches on those.
605 |
606 |
607 |
--------------------------------------------------------------------------------