├── .gitignore ├── example.fasta ├── read_fasta.py ├── biopython_examples.ipynb ├── README.md └── resistance_sample.fasta /.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | 3 | -------------------------------------------------------------------------------- /example.fasta: -------------------------------------------------------------------------------- 1 | >Sequence#1 2 | ACAAGATGCCATTGTCCCCCGGCCTCCTG 3 | >Sequence#2 4 | CCCGGCCTCCTGTACGGGCCCTAGC 5 | -------------------------------------------------------------------------------- /read_fasta.py: -------------------------------------------------------------------------------- 1 | from Bio import SeqIO 2 | 3 | infilename = "resistance.fasta" 4 | outfilename = "resistance.blaOXA.fasta" 5 | 6 | blaOXA_records = list() 7 | 8 | count = 0 9 | for seq_record in SeqIO.parse(infilename, "fasta"): 10 | #print seq_record.id 11 | #print seq_record.seq 12 | if "blaOXA" in seq_record.id: 13 | blaOXA_records.append(seq_record) 14 | count += 1 15 | 16 | SeqIO.write(blaOXA_records, outfilename, "fasta") 17 | print count 18 | -------------------------------------------------------------------------------- /biopython_examples.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "" 4 | }, 5 | "nbformat": 3, 6 | "nbformat_minor": 0, 7 | "worksheets": [ 8 | { 9 | "cells": [ 10 | { 11 | "cell_type": "code", 12 | "collapsed": false, 13 | "input": [ 14 | "from Bio.Seq import Seq\n", 15 | "my_seq = Seq(\"AGTACACTGGT\")" 16 | ], 17 | "language": "python", 18 | "metadata": {}, 19 | "outputs": [], 20 | "prompt_number": 1 21 | }, 22 | { 23 | "cell_type": "code", 24 | "collapsed": false, 25 | "input": [ 26 | "from Bio.Seq import Seq\n", 27 | "from Bio.Alphabet import IUPAC\n", 28 | "my_seq = Seq(\"AGTACACTGGT\", IUPAC.unambiguous_dna)" 29 | ], 30 | "language": "python", 31 | "metadata": {}, 32 | "outputs": [], 33 | "prompt_number": 2 34 | }, 35 | { 36 | "cell_type": "code", 37 | "collapsed": false, 38 | "input": [ 39 | "from Bio.Seq import Seq\n", 40 | "simple_seq = Seq(\"GATC\")\n", 41 | "from Bio.SeqRecord import SeqRecord\n", 42 | "simple_seq_r = SeqRecord(simple_seq)\n", 43 | "print simple_seq_r.id" 44 | ], 45 | "language": "python", 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "output_type": "stream", 50 | "stream": "stdout", 51 | "text": [ 52 | "\n" 53 | ] 54 | } 55 | ], 56 | "prompt_number": 3 57 | }, 58 | { 59 | "cell_type": "code", 60 | "collapsed": false, 61 | "input": [ 62 | "print simple_seq_r" 63 | ], 64 | "language": "python", 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "output_type": "stream", 69 | "stream": "stdout", 70 | "text": [ 71 | "ID: \n", 72 | "Name: \n", 73 | "Description: \n", 74 | "Number of features: 0\n", 75 | "Seq('GATC', Alphabet())\n" 76 | ] 77 | } 78 | ], 79 | "prompt_number": 11 80 | }, 81 | { 82 | "cell_type": "code", 83 | "collapsed": false, 84 | "input": [ 85 | "from Bio import SeqIO\n", 86 | "for record in SeqIO.parse(\"example.fasta\", \"fasta\") :\n", 87 | " print record.id" 88 | ], 89 | "language": "python", 90 | "metadata": {}, 91 | "outputs": [ 92 | { 93 | "output_type": "stream", 94 | "stream": "stdout", 95 | "text": [ 96 | "Sequence#1\n", 97 | "Sequence#2\n" 98 | ] 99 | } 100 | ], 101 | "prompt_number": 4 102 | }, 103 | { 104 | "cell_type": "code", 105 | "collapsed": false, 106 | "input": [ 107 | "from Bio.Seq import Seq\n", 108 | "from Bio.Alphabet import IUPAC\n", 109 | "from Bio.SeqRecord import SeqRecord\n", 110 | "from Bio import SeqIO\n", 111 | "\n", 112 | "# Create some Seq objects\n", 113 | "seq1 = Seq(\"GATCAGATTA\", IUPAC.unambiguous_dna)\n", 114 | "seq2 = Seq(\"GTGCAGTATA\", IUPAC.unambiguous_dna)\n", 115 | "\n", 116 | "# Put them into SeqRecord objects\n", 117 | "record1 = SeqRecord(seq1, id=\"Sequence 1\")\n", 118 | "record2 = SeqRecord(seq2, id=\"Sequence 2\")\n", 119 | "\n", 120 | "# Write the SeqRecords to a fasta file\n", 121 | "list_of_SeqRecords = [record1, record2]\n", 122 | "SeqIO.write(list_of_SeqRecords, \"example_output.fasta\", \"fasta\")" 123 | ], 124 | "language": "python", 125 | "metadata": {}, 126 | "outputs": [ 127 | { 128 | "metadata": {}, 129 | "output_type": "pyout", 130 | "prompt_number": 5, 131 | "text": [ 132 | "2" 133 | ] 134 | } 135 | ], 136 | "prompt_number": 5 137 | }, 138 | { 139 | "cell_type": "code", 140 | "collapsed": false, 141 | "input": [], 142 | "language": "python", 143 | "metadata": {}, 144 | "outputs": [] 145 | } 146 | ], 147 | "metadata": {} 148 | } 149 | ] 150 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Python for Bioinformatics 2 | ========================= 3 | 4 | Some examples of using Python for Bioinformatics 5 | To get the data go to your shell and type "git clone https://github.com/hdashnow/python_for_bioinformatics.git" 6 | 7 | Biopython 8 | --------- 9 | 10 | ###Sequence objects 11 | http://biopython.org/DIST/docs/tutorial/Tutorial.html#sec17 12 | 13 | ```python 14 | from Bio.Seq import Seq 15 | my_seq = Seq("AGTACACTGGT") 16 | ``` 17 | 18 | However, Biopython doesn't know if your sequence is DNA. Seq objects can be DNA, RNA or protein. We can use the IUPAC standards to define what kind of sequence this is. 19 | 20 | ```python 21 | from Bio.Seq import Seq 22 | from Bio.Alphabet import IUPAC 23 | my_seq = Seq("AGTACACTGGT", IUPAC.unambiguous_dna) 24 | ``` 25 | 26 | Now that Biopython knows we are dealing with DNA, you can use some handy functions: 27 | - `.complement()` 28 | - `.reverse_complement()` 29 | - `.transcribe()` 30 | 31 | And many more. 32 | 33 | **Try this** 34 | Find the reverse complement of this sequence: 35 | > ACAAGATGCCATTGTCCCCCGGCCTCCTG 36 | 37 | ###SeqRecord objects 38 | http://biopython.org/DIST/docs/tutorial/Tutorial.html#sec32 39 | 40 | The SeqRecord class is a more sophisticated way to represent a sequence. It has a number of attributes, but the most useful are usually: 41 | - .seq 42 | - The sequence itself, typically a Seq object. 43 | - .id 44 | - The primary ID used to identify the sequence – a string. In most cases this is something like an accession number. 45 | 46 | You can create a SeqRecord object by hand like this: 47 | 48 | ```python 49 | from Bio.Seq import Seq 50 | simple_seq = Seq("GATC") 51 | from Bio.SeqRecord import SeqRecord 52 | simple_seq_r = SeqRecord(simple_seq) 53 | print simple_seq_r.id 54 | ``` 55 | 56 | Note the output: 57 | ```python 58 | '' 59 | ``` 60 | The id is not set for the sequence record. You can see a summary of what values have been created for the SeqRecord function: 61 | ```python 62 | print simple_seq_r 63 | ``` 64 | **Try this** 65 | Add more information to the `simple_seq_r` record. 66 | 67 | ###Fasta 68 | 69 | However usually, you would obtain a SeqRecord object by reading in a file, such as fasta file. 70 | http://en.wikipedia.org/wiki/FASTA_format 71 | 72 | Here is an example DNA sequence in FASTA format: 73 | 74 | > \>AB000263 |acc=AB000263|descr=Homo sapiens mRNA for prepro cortistatin like peptide, complete cds.|len=368 75 | > ACAAGATGCCATTGTCCCCCGGCCTCCTGCTGCTGCTGCTCTCCGGGGCCACGGCCACCGCTGCCCTGCC 76 | > CCTGGAGGGTGGCCCCACCGGCCGAGACAGCGAGCATATGCAGGAAGCGGCAGGAATAAGGAAAAGCAGC 77 | > CTCCTGACTTTCCTCGCTTGGTGGTTTGAGTGGACCTCCCAGGCCAGTGCCGGGCCCCTCATAGGAGAGG 78 | > AAGCTCGGGAGGTGGCCAGGCGGCAGGAAGGCGCACCCCCCCAGCAATCCGCGCGCCGGGACAGAATGCC 79 | > CTGCAGGAACTTCTTCTGGAAGACCTTCTCCTCCTGCAAATAAAACCTCACCCATGAATGCTCACGCAAG 80 | > TTTAATTACAGACCTGAA 81 | 82 | ###Reading and writing sequencing files 83 | http://biopython.org/DIST/docs/tutorial/Tutorial.html#sec51 84 | 85 | Bio.SeqIO.parse() takes a file handle (or filename) and format string, and returns a SeqRecord iterator. You can try this with the [example.fasta](https://raw.githubusercontent.com/hdashnow/python_for_bioinformatics/master/example.fasta) file. 86 | 87 | ```python 88 | from Bio import SeqIO 89 | for record in SeqIO.parse("example.fasta", "fasta") : 90 | print record.id 91 | ``` 92 | 93 | For writing records to a file use the function Bio.SeqIO.write(), which takes a SeqRecord iterator (or list of SeqRecords), output handle (or filename) and a format string: 94 | 95 | ```python 96 | from Bio.Seq import Seq 97 | from Bio.Alphabet import IUPAC 98 | from Bio.SeqRecord import SeqRecord 99 | from Bio import SeqIO 100 | 101 | # Create some Seq objects 102 | seq1 = Seq("GATCAGATTA", IUPAC.unambiguous_dna) 103 | seq2 = Seq("GTGCAGTATA", IUPAC.unambiguous_dna) 104 | 105 | # Put them into SeqRecord objects 106 | record1 = SeqRecord(seq1, id="Sequence 1") 107 | record2 = SeqRecord(seq2, id="Sequence 2") 108 | 109 | # Write the SeqRecords to a fasta file 110 | list_of_SeqRecords = [record1, record2] 111 | SeqIO.write(list_of_SeqRecords, "example_output.fasta", "fasta") 112 | ``` 113 | 114 | **Try these** 115 | 116 | [resistance_sample.fasta](https://raw.githubusercontent.com/hdashnow/python_for_bioinformatics/master/resistance_sample.fasta) contains 10 genes known to cause resistance to antibiotics in bacteria 117 | Write a script that prints the id of every sequence in this file. Then edit it so that it also prints the GC content of each sequence (the proportion of bases that are G or C). 118 | Note: you can treat a Seq object like a string and maniplate it in the ususal ways. 119 | 120 | [resistance.fasta](https://raw.githubusercontent.com/hdashnow/python_for_bioinformatics/master/resistance.fasta) contains the full set of bacterial resistance genes. 121 | Write a script that reads this file, then writes a new fasta file containing only those sequences with "blaOXA" in the file name (there should be 202). If you get stuck, the solution is in [read_fasta.py](read_fasta.py). 122 | 123 | 124 | ####Want more bioinformatics problems? Try these resources: 125 | * http://rosalind.info/ 126 | * http://biopython.org/DIST/docs/tutorial/Tutorial.html 127 | 128 | And just in case you ever need to read a BAM file or write a pipeline: 129 | * http://pysam.readthedocs.org/en/latest/ 130 | * https://code.google.com/p/bpipe/ 131 | 132 | 133 | ####So you want to be a bioinformatician? Stay informed and get involved! 134 | * http://combine.org.au/ 135 | * http://www.vlsci.org.au/ 136 | * http://parkville-bioinformatics-journal-club.blogspot.com.au/ 137 | * https://www.facebook.com/groups/BGSA.melbourne/ 138 | * http://australianbioinformatics.net/ 139 | * http://www.iscb.org/ 140 | -------------------------------------------------------------------------------- /resistance_sample.fasta: -------------------------------------------------------------------------------- 1 | >344__blaOXA__blaOXA-181__1 blaOXA-181_1_HM992946; HM992946; betalactamase 2 | ATGCGTGTATTAGCCTTATCGGCTGTGTTTTTGGTGGCATCGATTATCGGAATGCCAGCG 3 | GTAGCAAAGGAATGGCAAGAAAACAAAAGTTGGAATGCTCACTTTACTGAACATAAATCA 4 | CAGGGCGTAGTTGTGCTCTGGAATGAGAATAAGCAGCAAGGATTTACCAATAATCTTAAA 5 | CGGGCGAACCAAGCATTTTTACCCGCATCTACCTTTAAAATTCCCAATAGCTTGATCGCC 6 | CTCGATTTGGGCGTGGTTAAGGATGAACACCAAGTCTTTAAGTGGGATGGACAGACGCGT 7 | GATATCGCCGCTTGGAATCGTGACCATGACTTAATTACCGCGATGAAGTACTCAGTTGTG 8 | CCTGTTTATCAAGAATTTGCCCGCCAAATTGGTGAGGCACGTATGAGTAAAATGCTGCAC 9 | GCCTTCGATTATGGCAATGAGGATATCTCGGGCAATGTAGACAGTTTTTGGCTCGATGGT 10 | GGTATTCGCATTTCGGCTACCCAGCAAATCGCTTTTTTACGCAAGCTGTATCACAACAAG 11 | CTGCACGTTTCTGAGCGTAGTCAGCGCATCGTGAAACAAGCCATGCTGACCGAAGCCAAT 12 | GGCGACTATATTATTCGGGCTAAAACGGGATACTCGACTAGAATCGAACCTAAGATTGGC 13 | TGGTGGGTTGGTTGGGTTGAACTTGATGATAATGTGTGGTTTTTTGCGATGAATATGGAT 14 | ATGCCCACATCGGATGGTTTAGGGCTGCGCCAAGCCATCACAAAAGAAGTGCTCAAACAG 15 | GAGAAAATTATTCCCTAG 16 | >0__oqxB__oqxB__2 oqxB_1_EU370913; EU370913; quinolone 17 | ATGGACTTTTCCCGCTTTTTTATCGACAGGCCGATTTTCGCCGCGGTGCTGTCGATTTTA 18 | ATTTTTATCACCGGGTTAATCGCTATCCCACTGCTGCCGGTGAGCGAATATCCGGATGTC 19 | GTCCCGCCGAGCGTCCAGGTGCGCGCGGAGTATCCCGGCGCCAACCCGAAAGTGATTGCC 20 | GAGACCGTGGCGACGCCGCTGGAGGAAGCGATCAACGGCGTTGAAAACATGATGTACATG 21 | AAATCGGTCGCCGGCTCCGACGGCGTGCTGGTCACCACCGTCACCTTCCGCCCGGGTACC 22 | GACCCGGATCAGGCGCAGGTTCAGGTGCAGAACCGCGTCGCGCAGGCCGAAGCGCGTCTG 23 | CCGGAGGATGTACGCCGTCTGGGGATCACCACCCAGAAGCAGTCTCCGACGCTGACCCTG 24 | GTGGTGCATCTGTTCTCCCCCGGCGGGAAGTACGACTCGCTGTATATGCGCAACTACGCC 25 | ACGCTGAAAGTGAAGGATGAGCTGGCGCGCCTGCCCGGCGTCGGCCAGATCCAGATTTTT 26 | GGCTCCGGTGAATATGCGATGCGCGTCTGGCTGGATCCCAATAAGGTCGCTGCCCGCGGT 27 | CTGACGGCCTCGGATGTGGTGACGGCGATGCAGGAGCAAAACGTCCAGGTGTCTGCCGGA 28 | CAGCTTGGCGCCGAGCCGCTGCCGCAGGAGAGCGATTTCCTGATCTCCATTAACGCCCAG 29 | GGCCGTCTGCATACCGAAGAAGAGTTTGGCAATATCATTCTGAAAACGGCGCAGGATGGC 30 | TCGCTGGTCCGCCTGCGCGACGTGGCGCGCATCGAGATGGGTTCCGGTAGCTATGCGCTG 31 | CGCTCCCAGCTCAACAATAAGGATGCGGTCGGGATCGGTATCTTCCAGTCACCCGGCGCT 32 | AACGCCATCGATCTGTCGAACGCGGTACGCGCCAAAATGGCCGAGCTGGCCACCCGCTTC 33 | CCGGAAGATATGCAATGGGCGGCGCCGTACGACCCGACGGTTTTCGTCCGCGACTCCATC 34 | CGCGCGGTGGTGCAGACGCTGCTGGAGGCGGTAGTGCTGGTGGTGCTGGTAGTGATCCTG 35 | TTCCTGCAGACCTGGCGCGCGTCGATTATCCCGTTGATCGCTGTGCCGGTATCGGTGGTG 36 | GGTACCTTCAGCATTCTCTATCTGCTGGGCTTCTCGCTGAATACCCTGAGCCTGTTCGGG 37 | CTGGTACTGGCTATCGGTATCGTGGTGGACGACGCCATCGTGGTGGTGGAGAACGTCGAG 38 | CGTAATATCGAAGAGGGGCTTGCGCCGCTTGCCGCGGCGCATCAGGCGATGCGTGAGGTC 39 | TCCGGGCCGATTATCGCCATTGCGCTGGTGCTGTGTGCGGTGTTCGTGCCGATGGCGTTT 40 | CTCTCCGGGGTCACCGGCCAGTTCTACAAACAGTTCGCGGTGACCATCGCCATCTCGACG 41 | GTGATCTCGGCCATCAACTCGCTGACGCTCTCCCCGGCGCTGGCGGCCCTGCTGTTAAAG 42 | CCGCACGGCGCGAAGAAAGACCTCCCTACCCGGCTGATCGATCGCCTGTTTGGCTGGATT 43 | TTCCGTCCGTTTAACCGCTTTTTCCTGCGCAGCTCGAACGGCTATCAGGGACTGGTAGGC 44 | AAAACGCTTGGACGCCGTGGCGCAGTGTTTGCGGTGTACCTGCTGCTGCTCTGCGCCGCT 45 | GGGGTGATGTTTAAAGTCGTCCCCGGCGGGTTTATTCCCACCCAGGATAAGCTGTATCTC 46 | ATTGGCGGCGTGAAGATGCCGGAAGGGTCGTCGCTGGCGCGCACCGACGCGGTGATCCGC 47 | AAAATGAGCGAGATCGGGATGAATACCGAAGGGGTCGACTATGCGGTCGCTTTCCCGGGG 48 | CTTAACGCGCTGCAGTTCACCAACACGCCGAATACCGGGACGGTCTTTTTTGGCCTGAAA 49 | CCGTTCGACCAGCGCAAACACACGGCGGCGGAAATTAACGCGGAGATCAACGCCAAAATC 50 | GCGCAAATCCAGCAGGGCTTTGGCTTTTCCATCCTGCCGCCGCCGATTTTAGGTCTGGGT 51 | CAGGGTTCCGGCTACTCCCTGTACATCCAGGATCGCGGAGGGCTGGGCTATGGCGCGCTG 52 | CAAAGCGCGGTGAATGCGATGTCCGGGGCGATTATGCAGACGCCGGGGATGCACTTCCCG 53 | ATCTCGACTTACCAGGCTAACGTGCCGCAGCTGGACGTGCAGGTCGATCGCGATAAGGCG 54 | AAAGCGCAGGGGGTATCGCTAACCGATCTGTTCGGTACGCTGCAGACCTATCTCGGCTCG 55 | TCTTATGTCAATGACTTTAACCAGTTCGGGCGTACCTGGCGCGTGATGGCCCAGGCTGAC 56 | GGACCATACCGCGAGAGCGTGGAAGATATCGCCAATCTGCGCACCCGCAATAATCAGGGC 57 | GAAATGGTACCGATCGGCAGTATGGTGAATATCAGTACCACCTACGGGCC 58 | >346__aph(3')-IIa__aph(3')-IIa__3 aph(3')-IIa_1_X57709; X57709; aminoglycosides 59 | ATGATTGAACAAGATGGATTGCACGCAGGTTCTCCGGCCGCTTGGGTGGAGAGGCTATTC 60 | GGCTATGACTGGGCACAACAGACAATCGGCTGCTCTGATGGCGCCGTGTTCCGGCTGTCA 61 | GCGCAGGGGCGCCCGGTTCTTTTTGTCAAGACCGACCTGTCCGGTGCCCTGAATGAACTG 62 | CAGGACGAGGCAGCGCGGCTATCGTGGCTGGCCACGACGGGCGTTCCTTGCGCAGCTGTG 63 | CTCGACGTTGTAACTGAAGCGGGAAGGGACTGGCTGCTATTGGGCGAAGTGCCGGGGCAG 64 | GATCTCCTGTCATCTCACCTTGCTCCTGCCGAGAAAGTATCCATCATGGCTGATGCAATG 65 | CGGCGGCTGCATACGCTTGATCCGGCTACCTGCCCATTCGACCACCAAGCGAAACATCGC 66 | ATCGAGCGAGCACGTACTCGGATGGAAGCCGGTCTTGTCGATCAGGATGATCTGGACGAA 67 | GAGCATCAGGGGCTCGCGCCAGCCGAACTGTTCGCCAGGCTCAAGGCGCGCATGCCCGAC 68 | GGCGAGGATCTCGTCGTGACCCATGGCGATGCCTGCTTGCCGAATATCATGGTGGAAAAT 69 | GGCCGCTTTTCTGGATTCATCGACTGTGGCCGGCTGGGTGTGGCGGACCGCTATCAGGAC 70 | ATAGCGTTGGCTACCCGTGATATTGCTGAAGAGCTTGGCGGCGAATGGGCTGACCGCTTC 71 | CTCGTGCTTTACGGTATCGCCGCTCCCGATTCGCAGCGCATCGCCTTCTATCGCCTTCTT 72 | GACGAGTTCTTCTGA 73 | >347__aph(3')-III__aph(3')-III__4 aph(3')-III_1_M26832; M26832; aminoglycosides 74 | ATGGCTAAAATGAGAATATCACCGGAATTGAAAAAACTGATCGAAAAATACCGCTGCGTA 75 | AAAGATACGGAAGGAATGTCTCCTGCTAAGGTATATAAGCTGGTGGGAGAAAATGAAAAC 76 | CTATATTTAAAAATGACGGACAGCCGGTATAAAGGGACCACCTATGATGTGGAACGGGAA 77 | AAGGACATGATGCTATGGCTGGAAGGAAAGCTGCCTGTTCCAAAGGTCCTGCACTTTGAA 78 | CGGCATGATGGCTGGAGCAATCTGCTCATGAGTGAGGCCGATGGCGTCCTTTGCTCGGAA 79 | GAGTATGAAGATGAACAAAGCCCTGAAAAGATTATCGAGCTGTATGCGGAGTGCATCAGG 80 | CTCTTTCACTCCATCGACATATCGGATTGTCCCTATACGAATAGCTTAGACAGCCGCTTA 81 | GCCGAATTGGATTACTTACTGAATAACGATCTGGCCGATGTGGATTGCGAAAACTGGGAA 82 | GAAGACACTCCATTTAAAGATCCGCGCGAGCTGTATGATTTTTTAAAGACGGAAAAGCCC 83 | GAAGAGGAACTTGTCTTTTCCCACGGCGACCTGGGAGACAGCAACATCTTTGTGAAAGAT 84 | GGCAAAGTAAGTGGCTTTATTGATCTTGGGAGAAGCGGCAGGGCGGACAAGTGGTATGAC 85 | ATTGCCTTCTGCGTCCGGTCGATCAGGGAGGATATCGGGGAAGAACAGTATGTCGAGCTA 86 | TTTTTTGACTTACTGGGGATCAAGCCTGATTGGGAGAAAATAAAATATTATATTTTACTG 87 | GATGAATTGTTTTAG 88 | >340__aadA__aadA7__5 aadA7_1_AF224733; AF224733; aminoglycosides 89 | ATGAGTGAAAAAGTGCCCGCCGAGATTTCGGTGCAACTATCACAAGCACTCAACGTCATC 90 | GGGCGCCACTTGGAGTCGACGTTGCTGGCCGTGCATTTGTACGGCTCCGCACTGGATGGC 91 | GGATTGAAACCGTACAGTGATATTGATTTGCTGGTGACTGTAGCTGCACCGCTCAATGAT 92 | GCCGTGCGGCAAGCCCTGCTCGTCGATCTCTTGGAGGTTTCAGCTTCCCCTGGCCAAAAC 93 | AAGGCACTCCGCGCCTTGGAAGTGACCATCGTCGTGCACAGTGACATCGTACCTTGGCGT 94 | TATCCGGCCAGGCGGGAACTGCAGTTCGGAGAGTGGCAGCGCAAAGACATCCTTGCGGGC 95 | ATCTTCGAGCCCGCCACAACCGATTCTGACTTGGCGATTCTGCTAACAAAGGCAAAGCAA 96 | CATAGCGTCGTCTTGGCAGGTTCAGCAGCGAAGGATCTCTTCAGCTCAGTCCCAGAAAGC 97 | GATCTATTCAAGGCACTGGCCGATACTCTGAAGCTATGGAACTCGCCGCCAGATTGGGCG 98 | GGCGATGAGCGGAATGTAGTGCTTACTTTGTCTCGTATCTGGTACACCGCAGCAACCGGC 99 | AAGATCGCGCCAAAGGATGTTGCTGCCACTTGGGCAATGGCACGCTTGCCAGCTCAACAT 100 | CAGCCCATCCTGTTGAATGCCAAGCGGGCTTATCTTGGGCAAGAAGAAGATTATTTGCCC 101 | GCTCGTGCGGATCAGGTGGCGGCGCTCATTAAATTCGTGAAGTATGAAGCAGTTAAACTG 102 | CTTGGTGCCAGCCAATGA 103 | >341__aadA__aadA13__6 aadA13_1_AY713504; AY713504; aminoglycosides 104 | ATGAGGGACTCAGTGACCGCCGAAATTTCGACGCAACTATCCAAGGTGCTTAGTGTTATC 105 | GAGCACCATCTGGAACCGACGTTGCTTGCCGTACATTTGTACGGCTCCGCAGTGGATGGC 106 | GGCCTGAAGCCATACAGTGATATTGATTTGCTGGTTACTGTGACCGCAAGGCTTGATGAC 107 | ACAACGCGGCGAGCTTTGTTCAACGATCTTTTGGAGGTTTCGGCTTTCCCAGGCGAGAGT 108 | GAGATTCTCCGCGCTATAGAAGTCACCATTGTCGTGCACGAAGACATTATGCCGTGGCGT 109 | TATCCAGCCAAGCGCGAACTGCAATTTGGAGAATGGCAGCGCAATGACATTCTTGCGGGT 110 | ATCTTCGAGCCAGCCACGATCGACATCGATCTGGCTATCTTGCTAACGAAAGCGAGAGAA 111 | CATAGCGTGGCTTTGGTAGGTCCGGCGGCGGAGGAACTCTTTGATCCAGTTCCTGAACAA 112 | GATCTAATCAAGGCGCTGAATGAAACCTTGAAGCTATGGAACTCGCAGCCCGACTGGGCC 113 | GGCGATGAGCGAAATGTAGTGCTTACGTTGTCCCGCATTTGGTACAGCGCAGCAACTGGT 114 | AAAATCGCGCCGAAGGATGTCGCTGCCAACTGGGCAATGGAACATCTACCTGCCCAGCAT 115 | CAGTCTGTCTTGCTTGAAGCTAGACAGGCTTATCTTGGGCAAGAGGAAGATCGCTCGGTC 116 | TTGCGCGCAGATAAGTTGGAAGAATTTATTCACTTCATGAAAAGCGAGATCACCAAGGTG 117 | CTCGGCAATGATGTCTAA 118 | >342__blaOXA__blaOXA-54__7 blaOXA-54_1_AY500137; AY500137; betalactamase 119 | ATGCGTGTGTTAGCCTTATCGGCTGTATTAGTGGTGGCATCGATTGTTGGCATGCCGGCG 120 | ATGGCAAACGAATGGCAGGAAAAACCGAGTTGGAATACTCATTTTTCGGAACATAAAGCG 121 | CAGGGTGTGATAGTGCTTTGGAACGAGAACAAACAGCAAGGATTTACCAATAATCTTAAG 122 | CGGGCAAACCAAGCATTTTTACCCGCATCGACCTTTAAAATCCCCAATAGCTTGATTGCC 123 | TTGGATTTAGGTGTCGTGAAGGATGAGCATCAAGTCTTTAAATGGGATGGACAGACTCGG 124 | GATATCGCGGCGTGGAATCGCGACCATGACTTAATCACTGCGATGAAATACTCGGTCGTG 125 | CCCGTGTATCAAGAGTTTGCGCGCCAAATTGGGCAGGCGCGCATGAGTAAAATGTTGCAC 126 | GCATTTGATTATGGCAATGAAGATATTTCGGGCAATCTAGACAGCTTTTGGCTCGATGGC 127 | GGCATTCGGATTTCGGCAACGGAGCAAGTCGCATTTCTACGAAAGCTGTATCATAACAAG 128 | TTGCATGTATCAGAACGCAGTCAGCGTATCGTCAAGCAAGCCATGCTTACCGAGGCTAAT 129 | AGTGACTACATAATCCGCGCTAAAACCGGATACTCGACCAGAATTGAGCCTCAGATCGGT 130 | TGGTGGGTCGGTTGGGTTGAACTCGATGATAATGTGTGGTTCTTCGCGATGAATATGGAT 131 | ATGCCTACGGCTGATGGTTTAGGGCTACGTCAAGCCATCACTAAAGAAGTGCTTAAACAG 132 | GAAAAGATAATTCCATAG 133 | >343__blaOXA__blaOXA-163__8 blaOXA-163_1_HQ700343; HQ700343; betalactamase 134 | ATGCGTGTATTAGCCTTATCGGCTGTGTTTTTGGTGGCATCGATTATCGGAATGCCTGCG 135 | GTAGCAAAGGAATGGCAAGAAAACAAAAGTTGGAATGCTCACTTTACTGAACATAAATCA 136 | CAGGGCGTAGTTGTGCTCTGGAATGAGAATAAGCAGCAAGGATTTACCAATAATCTTAAA 137 | CGGGCGAACCAAGCATTTTTACCCGCATCTACCTTTAAAATTCCCAATAGCTTGATCGCC 138 | CTCGATTTGGGCGTGGTTAAGGATGAACACCAAGTCTTTAAGTGGGATGGACAGACGCGC 139 | GATATCGCCACTTGGAATCGCGATCATAATCTAATCACCGCGATGAAATATTCAGTTGTG 140 | CCTGTTTATCAAGAATTTGCCCGCCAAATTGGCGAGGCACGTATGAGCAAGATGCTACAT 141 | GCTTTCGATTATGGTAATGAGGACATTTCGGGCAATGTAGACAGTTTCTGGCTCGACGGT 142 | GGTATTCGAATTTCGGCCACGGAGCAAATCAGCTTTTTAAGAAAGCTGTATCACAATAAG 143 | TTACACGTATCGGAGCGCAGCCAGCGTATTGTCAAACAAGCCATGCTGACCGAAGCCAAT 144 | GGCGACTATATTATTCGGGCTAAAACTGGATACGATACTAAGATTGGCTGGTGGGTCGGT 145 | TGGGTTGAACTTGATGATAATGTGTGGTTTTTTGCGATGAATATGGATATGCCCACATCG 146 | GATGGTTTAGGGCTGCGCCAAGCCATCACAAAAGAAGTGCTCAAACAGGAAAAAATTATT 147 | CCCTAG 148 | >343__blaOXA__blaOXA-48__9 blaOXA-48_1_HM755942; HM755942; betalactamase 149 | AGAAGGAAGAAACAAAGTTGGAATGCTCACTTTACTGAACATAAATCACAGGGCGTAGTT 150 | GTGCTCTGGAATGAGAATAAGCAGCAAGGATTTACCAATAATCTTAAACGGGCGAACCAA 151 | GCATTTTTACCCGCATCTACCTTTAAAATTCCCAATAGCTTGATCGCCCTCGATTTGGGC 152 | GTGGTTAAGGATGAACACCAAGTCTTTAAGTGGGATGGACAGACGCGCGATATCGCCACT 153 | TGGAATCGCGATCATAATCTAATCACCGCGATGAAATATTCAGTTGTGCCTGTTTATCAA 154 | GAATTTGCCCGCCAAATTGGCGAGGCACGTATGAGCAAGATGCTACATGCTTTCGATTAT 155 | GGTAATGAGGACATTTCGGGCAATGTAGACAGTTTCTGGCTCGATGGTGGTATTCGAATT 156 | TCGGCCACTGAGCAAATCAGCTTTTTAAGAAAGCTGTATCACAATAAGTTACACGTATCG 157 | GAGCGCAGCCAGCGTATTGTCAAACAAGCCATGCTGACCGAAGCCAATGGCGACTATATT 158 | ATTCGGGCTAAAACGGGATACTCGACTAGAATCGAACCTAAGATTGGCTGGTGGGTCGGT 159 | TGGGTTGAACTTGATGATAATGTGTGGTTTTTTGCGATGAATATGGATATGCCCACATCG 160 | GATGGTTTAGGGCTGCGCCAAGCCATCACAA 161 | >343__blaOXA__blaOXA-48_2__10 blaOXA-48_2_AY236073; AY236073; betalactamase 162 | ATGCGTGTATTAGCCTTATCGGCTGTGTTTTTGGTGGCATCGATTATCGGAATGCCTGCG 163 | GTAGCAAAGGAATGGCAAGAAAACAAAAGTTGGAATGCTCACTTTACTGAACATAAATCA 164 | CAGGGCGTAGTTGTGCTCTGGAATGAGAATAAGCAGCAAGGATTTACCAATAATCTTAAA 165 | CGGGCGAACCAAGCATTTTTACCCGCATCTACCTTTAAAATTCCCAATAGCTTGATCGCC 166 | CTCGATTTGGGCGTGGTTAAGGATGAACACCAAGTCTTTAAGTGGGATGGACAGACGCGC 167 | GATATCGCCACTTGGAATCGCGATCATAATCTAATCACCGCGATGAAATATTCAGTTGTG 168 | CCTGTTTATCAAGAATTTGCCCGCCAAATTGGCGAGGCACGTATGAGCAAGATGCTACAT 169 | GCTTTCGATTATGGTAATGAGGACATTTCGGGCAATGTAGACAGTTTCTGGCTCGACGGT 170 | GGTATTCGAATTTCGGCCACGGAGCAAATCAGCTTTTTAAGAAAGCTGTATCACAATAAG 171 | TTACACGTATCGGAGCGCAGCCAGCGTATTGTCAAACAAGCCATGCTGACCGAAGCCAAT 172 | GGTGACTATATTATTCGGGCTAAAACTGGATACTCGACTAGAATCGAACCTAAGATTGGC 173 | TGGTGGGTCGGTTGGGTTGAACTTGATGATAATGTGTGGTTTTTTGCGATGAATATGGAT 174 | ATGCCCACATCGGATGGTTTAGGGCTGCGCCAAGCCATCACAAAAGAAGTGCTCAAACAG 175 | GAAAAAATTATTCCCTAG 176 | --------------------------------------------------------------------------------