├── kappagate ├── version.py ├── __init__.py ├── predict_assembly_accuracy.py ├── tools.py └── reporting.py ├── docs └── title.png ├── MANIFEST.in ├── examples ├── success_rate_facts.png ├── plotting_interactions.png ├── plotting_interactions.py ├── success_rate_facts.py └── basic_success_rate_prediction.py ├── .travis.yml ├── .gitignore ├── setup.py ├── pypi-readme.rst ├── LICENCE.txt ├── tests ├── test_basics.py └── data │ └── records │ ├── partA.gb │ ├── partB.gb │ ├── partC.gb │ └── assembled_construct.gb ├── README.rst └── ez_setup.py /kappagate/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1.3" 2 | -------------------------------------------------------------------------------- /docs/title.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/kappagate/HEAD/docs/title.png -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.txt 2 | include *.rst 3 | recursive-include examples *.txt *.py 4 | include ez_setup.py 5 | -------------------------------------------------------------------------------- /examples/success_rate_facts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/kappagate/HEAD/examples/success_rate_facts.png -------------------------------------------------------------------------------- /examples/plotting_interactions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/kappagate/HEAD/examples/plotting_interactions.png -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.6" 4 | # command to install dependencies 5 | install: 6 | - pip install coveralls pytest-cov==2.6 pytest==3.2.3 7 | - pip install -e . 8 | # command to run tests 9 | script: 10 | - python -m pytest -v --cov kappagate --cov-report term-missing 11 | 12 | after_success: 13 | - coveralls 14 | -------------------------------------------------------------------------------- /examples/plotting_interactions.py: -------------------------------------------------------------------------------- 1 | from kappagate import overhangs_list_to_slots, plot_circular_interactions 2 | overhangs = ['TAGG', 'GACT', 'GGAC', 'CAGC', 3 | 'GGTC', 'GCGT', 'TGCT', 'GGTA', 4 | 'CGTC', 'CTAC', 'GCAA', 'CCCT'] 5 | slots = overhangs_list_to_slots(overhangs) 6 | ax = plot_circular_interactions( 7 | slots, annealing_data=('25C', '01h'), rate_limit=200) 8 | ax.figure.savefig("plotting_interactions.png", bbox_inches='tight') -------------------------------------------------------------------------------- /kappagate/__init__.py: -------------------------------------------------------------------------------- 1 | """ dna_sequencing_viewer/__init__.py """ 2 | 3 | # __all__ = [] 4 | 5 | from .predict_assembly_accuracy import predict_assembly_accuracy 6 | from .tools import (overhangs_list_to_slots, parts_records_to_slots, 7 | construct_record_to_slots, load_record) 8 | from .reporting import (plot_colony_picking_graph, 9 | min_trials_for_one_success, 10 | average_trials_until_success, 11 | plot_colony_picking_graph, 12 | plot_circular_interactions, 13 | success_rate_facts) -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | *.tar.gz 10 | dist 11 | build 12 | eggs 13 | parts 14 | bin 15 | var 16 | sdist 17 | develop-eggs 18 | .installed.cfg 19 | lib 20 | lib64 21 | __pycache__ 22 | 23 | # Sublime 24 | .sublime-project 25 | 26 | # Installer logs 27 | pip-log.txt 28 | 29 | # Unit test / coverage reports 30 | .coverage 31 | .tox 32 | nosetests.xml 33 | 34 | # Translations 35 | *.mo 36 | 37 | # Mr Developer 38 | .mr.developer.cfg 39 | .project 40 | .pydevproject 41 | 42 | # Temp files 43 | 44 | *~ 45 | 46 | # Pipy codes 47 | 48 | .pypirc 49 | 50 | .cache 51 | .vscode 52 | -------------------------------------------------------------------------------- /examples/success_rate_facts.py: -------------------------------------------------------------------------------- 1 | from kappagate import (overhangs_list_to_slots, predict_assembly_accuracy, 2 | plot_colony_picking_graph, success_rate_facts) 3 | 4 | overhangs = ['TAGG', 'GACT', 'GGAC', 'CAGC', 5 | 'GGTC', 'GCGT', 'TGCT', 'GGTA', 6 | 'CGTC', 'CTAC', 'GCAA', 'CCCT'] 7 | slots = overhangs_list_to_slots(overhangs) 8 | predicted_rate, _, _ = predict_assembly_accuracy(slots) 9 | ax = plot_colony_picking_graph(success_rate=predicted_rate) 10 | ax.figure.savefig("success_rate_facts.png", bbox_inches='tight') 11 | 12 | print("SUMMARY:") 13 | print (success_rate_facts(predicted_rate, plain_text=True)) 14 | print ("See 'success_rate_facts.png' for an illustration.") 15 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import ez_setup 2 | ez_setup.use_setuptools() 3 | 4 | from setuptools import setup, find_packages 5 | 6 | exec(open('kappagate/version.py').read()) # loads __version__ 7 | 8 | setup( 9 | name='kappagate', 10 | version=__version__, 11 | author='Zulko', 12 | url='https://github.com/Edinburgh-Genome-Foundry/kappagate', 13 | description='Predict valid-clone-rates in Golden Gate DNA assemblies', 14 | long_description=open('pypi-readme.rst').read(), 15 | license='MIT', 16 | keywords="DNA assembly synthetic biology golden gate", 17 | packages=find_packages(exclude='docs'), 18 | install_requires=['topkappy', 'networkx', 'tatapov', 'matplotlib', 19 | 'dnacauldron', 'proglog', 'flametree', 'biopython', 20 | 'snapgene_reader']) 21 | -------------------------------------------------------------------------------- /pypi-readme.rst: -------------------------------------------------------------------------------- 1 | Kappagate 2 | ========= 3 | 4 | kappagate is a Python library built on top ``kappy`` to use the Kappa language (for simulation of biological complexation processes) in a pythonic way, using Python objects to define agents and rules, and with some methods 5 | for plotting complexes and time series. 6 | 7 | Infos 8 | ----- 9 | 10 | **PIP installation:** 11 | 12 | .. code:: bash 13 | 14 | pip install kappagate 15 | 16 | **Github Page** 17 | 18 | ``_ 19 | 20 | **License:** MIT, Copyright Edinburgh Genome Foundry 21 | 22 | More biology software 23 | --------------------- 24 | 25 | .. image:: https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/Edinburgh-Genome-Foundry.github.io/master/static/imgs/logos/egf-codon-horizontal.png 26 | :target: https://edinburgh-genome-foundry.github.io/ 27 | 28 | kappagate is part of the `EGF Codons `_ synthetic biology software suite for DNA design, manufacturing and validation. 29 | -------------------------------------------------------------------------------- /LICENCE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | [OSI Approved License] 3 | 4 | The MIT License (MIT) 5 | 6 | Copyright (c) 2018 Edinburgh Genome Foundry 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | -------------------------------------------------------------------------------- /examples/basic_success_rate_prediction.py: -------------------------------------------------------------------------------- 1 | from kappagate import predict_assembly_accuracy, overhangs_list_to_slots 2 | 3 | 4 | overhangs= ['GGAG', 'GGCA', 'TCGC', 'CAGT', 'TCCA', 5 | 'GAAT', 'AGTA', 'TCTT', 'CAAA', 'GCAC', 6 | 'AACG', 'GTCT', 'CCAT'] 7 | slots = overhangs_list_to_slots(overhangs) 8 | success_rate, _, _ = predict_assembly_accuracy(slots, initial_quantities=2000) 9 | print ("Prediction for 12 High-Fi overhangs:", "%.1f%%" % (100 * success_rate)) 10 | 11 | 12 | overhangs = ['GGAG', 'GATA', 'GGCA', 'GGTC', 'TCGC', 13 | 'GAGG', 'CAGT', 'GTAA', 'TCCA', 'CACA', 14 | 'GAAT', 'ATAG', 'AGTA', 'ATCA', 'TCTT', 15 | 'AGGT', 'CAAA', 'AAGC', 'GCAC', 'CAAC', 16 | 'AACG', 'CGAA', 'GTCT', 'TCAG', 'CCAT'] 17 | slots = overhangs_list_to_slots(overhangs) 18 | success_rate, _, _ = predict_assembly_accuracy(slots, initial_quantities=2000) 19 | print ("Prediction for 24 High-Fi overhangs:", "%.1f%%" % (100 * success_rate)) 20 | 21 | 22 | overhangs= ['GGAG', 'GGTC', 'AGCA', 'CAGT', 'GGTA', 23 | 'GAAT', 'GGTT', 'TCTT', 'GGTG', 'GCAC', 24 | 'AGCG', 'GTCT', 'CCAT'] 25 | slots = overhangs_list_to_slots(overhangs) 26 | success_rate, _, _ = predict_assembly_accuracy(slots, initial_quantities=2000) 27 | print ("Prediction for 12 Low-Fi overhangs:", 28 | "%.1f%%" % (100 * success_rate)) 29 | -------------------------------------------------------------------------------- /tests/test_basics.py: -------------------------------------------------------------------------------- 1 | import os 2 | import matplotlib 3 | matplotlib.use("Agg") 4 | from kappagate import (overhangs_list_to_slots, predict_assembly_accuracy, 5 | plot_colony_picking_graph, success_rate_facts, 6 | plot_circular_interactions, load_record, 7 | parts_records_to_slots, construct_record_to_slots) 8 | import flametree 9 | 10 | records_dict = { 11 | name: load_record(os.path.join('tests', 'data', 'records', name + '.gb'), 12 | id=name, topology='circular') 13 | for name in ("partA", "partB", "partC", "assembled_construct") 14 | } 15 | 16 | def test_basic_success_rate_prediction(): 17 | overhangs = ['GGAG', 'GGCA', 'TCGC', 'CAGT', 'TCCA', 18 | 'GAAT', 'AGTA', 'TCTT', 'CAAA', 'GCAC', 19 | 'AACG', 'GTCT', 'CCAT'] 20 | slots = overhangs_list_to_slots(overhangs) 21 | success_rate, _, _ = predict_assembly_accuracy( 22 | slots, initial_quantities=5000) 23 | assert success_rate > 0.95 24 | 25 | 26 | overhangs = ['GGAG', 'GATA', 'GGCA', 'GGTC', 'TCGC', 27 | 'GAGG', 'CAGT', 'GTAA', 'TCCA', 'CACA', 28 | 'GAAT', 'ATAG', 'AGTA', 'ATCA', 'TCTT', 29 | 'AGGT', 'CAAA', 'AAGC', 'GCAC', 'CAAC', 30 | 'AACG', 'CGAA', 'GTCT', 'TCAG', 'CCAT'] 31 | slots = overhangs_list_to_slots(overhangs) 32 | success_rate, _, _ = predict_assembly_accuracy( 33 | slots, initial_quantities=5000) 34 | assert 0.8 < success_rate < 0.92 35 | 36 | 37 | overhangs = ['GGAG', 'GGTC', 'AGCA', 'CAGT', 'GGTA', 38 | 'GAAT', 'GGTT', 'TCTT', 'GGTG', 'GCAC', 39 | 'AGCG', 'GTCT', 'CCAT'] 40 | slots = overhangs_list_to_slots(overhangs) 41 | success_rate, _, _ = predict_assembly_accuracy( 42 | slots, initial_quantities=5000) 43 | assert 0.2 < success_rate < 0.4 44 | 45 | def test_plot_circular_interactions(): 46 | overhangs = ['TAGG', 'GACT', 'GGAC', 'CAGC', 47 | 'GGTC', 'GCGT', 'TGCT', 'GGTA', 48 | 'CGTC', 'CTAC', 'GCAA', 'CCCT'] 49 | slots = overhangs_list_to_slots(overhangs) 50 | plot_circular_interactions( 51 | slots, annealing_data=('25C', '01h'), rate_limit=200) 52 | 53 | def test_success_rate_facts(): 54 | overhangs = ['TAGG', 'GACT', 'GGAC', 'CAGC', 55 | 'GGTC', 'GCGT', 'TGCT', 'GGTA', 56 | 'CGTC', 'CTAC', 'GCAA', 'CCCT'] 57 | slots = overhangs_list_to_slots(overhangs) 58 | predicted_rate, _, _ = predict_assembly_accuracy(slots, duration=10) 59 | plot_colony_picking_graph(success_rate=predicted_rate) 60 | 61 | def test_parts_records_to_slots(): 62 | records = [records_dict[n] for n in ["partA", "partB", "partC"]] 63 | slots = parts_records_to_slots(records, enzyme='auto') 64 | assert slots == [('backbone-left', 'LEFT', 'ATTG'), 65 | ('partA', 'ATTG', 'GGCT'), 66 | ('partB', 'GGCT', 'GGGC'), 67 | ('partC', 'GGGC', 'GGCA'), 68 | ('backbone-right', 'GGCA', 'RIGHT')] 69 | 70 | 71 | def test_construct_record_to_slots(): 72 | record = records_dict['assembled_construct'] 73 | slots = construct_record_to_slots(record, backbone_annotations='receptor') 74 | assert slots == [('backbone-left', 'LEFT', 'ATTG'), 75 | ('p001', 'ATTG', 'GGCT'), 76 | ('p002', 'GGCT', 'GGGC'), 77 | ('p003', 'GGGC', 'GGCA'), 78 | ('backbone-right', 'GGCA', 'RIGHT')] -------------------------------------------------------------------------------- /tests/data/records/partA.gb: -------------------------------------------------------------------------------- 1 | LOCUS . 4022 bp DNA UNK 01-JAN-1980 2 | DEFINITION . 3 | ACCESSION 4 | VERSION 5 | KEYWORDS . 6 | SOURCE . 7 | ORGANISM . 8 | . 9 | FEATURES Location/Qualifiers 10 | annotation 22..1001 11 | /label="pA" 12 | cds 1123..2022 13 | /label="AmpR" 14 | rep._origin 3023..3522 15 | /label="RepO_1" 16 | ORIGIN 17 | 1 cgtctcaatt gtaagataga acagggatat agctcgttgc atccataaat tgcccaccgt 18 | 61 agggagaacg ccgttggaaa atgagagtta ggcgatgagc ggttgtcgcc cgcacagaca 19 | 121 aaggatgcct ttataaatag tagcggcctt gtctgcacca gacgtttgtg ggtcgacgtg 20 | 181 ctattctagc caaaagcaaa atgtatcata ctacaggcaa taggccctat gctgaccaaa 21 | 241 cttggggacc cttgcgcctt ctctcttacg cataacatgc ataggtacct ctactcatca 22 | 301 ggcagggcgg cgtgaggtca ctatggctca agatgtgtac gactaaagaa aggtttatgc 23 | 361 tccttcccca aggacgcatt tgggactgct acttgcccct agcgaattca ctaggatttt 24 | 421 tgtagaacca tgagcgccct atccgatagc acagagacaa tgctacaagc aactgtgcat 25 | 481 gcgctcgatc gccgtgcatt aatacgtatt atagcgtatc gtgtacgcta atatcttagt 26 | 541 gcaccgcacg ctggttggat acaattccgt gaaataattc ctgcttacac aggggctttc 27 | 601 tggcgcatgg ctgtgtctgg atgtttgtga taagaggctc catgaacccg gcgggaaatg 28 | 661 aggggaaacc ccggggagca acgacactaa gcctggcagt tgttcagata ggacgctttt 29 | 721 gtcagtgggc ggttttgcat ccactaacta tcataaacga cacaacgtcg aatgatcatc 30 | 781 ggcggtttgc atgtagagac agctcctgca acaccatacc ggctcgtgga attacgctgt 31 | 841 gcgccggccc tcattcgatt gtgtccagcg tcggcgcgaa tgatgattga gatgtgcttc 32 | 901 ggctcagagc gtgcacaggt agttctcccc ccttacctag ctgatccagc aattccacgc 33 | 961 tggcccgcgc gtccttgggt aacactggat ctgtacccaa atacaccgtc gggctagaga 34 | 1021 cgctccacct gcgtctatcg tacagtgcta aaatggcagc agatagtgaa acttccgcta 35 | 1081 agctagcccc tcagggtaca ctgcaccgag gcgtgctgtc aatacttgat taagtcgggt 36 | 1141 tgtcggggac ctgccgtcac gcttccgagt gtatatccgg atttagttga cgtcatacag 37 | 1201 aggcactaag aagaataaac gcttacctcc agcaatcgtg tagtgtcagg tgtacgttct 38 | 1261 cccttgcgcc gttcggcaag cgtccggtgt cgggctgcaa ggaataaatc tttatggacc 39 | 1321 agaggggctt gtttcctcat atgggtgcgt gcacttatac gattcaaagg tggatatggc 40 | 1381 cgcataacac gtagccaggc tatagtcccg cggcctaatt ccttcgagtg cgggtgcctg 41 | 1441 tttttgtttt tcctttacga cacgaaccgc tctaacctgc tctatttcgc cacgttccag 42 | 1501 tgaacctctt agcctaccgc ccacgtacgg tgggacgcgt cgagcagtta aggtactgtg 43 | 1561 gagaaatcgt tcaatattag aaaacaggcg gtgtacgaat tactgtgtcc cggtgttgcc 44 | 1621 cgtttaacgg ctgccgtggt cataccgtga ggcaccacga ggggatgcta cgcaacatgc 45 | 1681 gaggtgtaat cagcagggaa gatcccgggg atcgaaagcg gtccgcgatt tgcgaccgat 46 | 1741 atgcataagg tgtcatttat attacaccta cgacactggt accggctcac agccaaatgc 47 | 1801 acagtctcaa gatagaattc gcaaaaatta ggcttatcgg gcgcttactc tttgttacac 48 | 1861 ttctggtctg tgagtgacgc cctgtgtccc atcactgcat gtgaggatgc gtgtactgca 49 | 1921 cctgtgaatt acagacgttt ccctaccagg gcgctacgtt ataatgttcg gttgcaaccc 50 | 1981 tctatagggt gatctcgaca taccactatg gtttgcgcgt ttcgggtgct tttggcgagg 51 | 2041 gccaggtacg gcggtgagac agcgtatttt gcgaataggg tattcggacg cacggtgtat 52 | 2101 agctcttgga aaacgactct tcgacgggcg tgtagaagta gccaagcagt tcggttgtta 53 | 2161 tcactaaatg cggaccatgt gcagccgttt aggagaggag tctgcaatgg atcagctggt 54 | 2221 cagattgtac cactcggttg agcctaaggc acttcaggtg ctcggattgt aatgaaccct 55 | 2281 caggttacgc cgttgcgctg agatcccaca agccataagc aatgagaatc gagcagctat 56 | 2341 cgaattacgc agcacttgct gatgaactgg gtgcacctaa agcgcccgat aaaacggagg 57 | 2401 gagtgaggta actactacat tgcgagtagt gcgaggggcc tccgagacta cattagagca 58 | 2461 gccatgccga atacgacaaa atctatagcc ttgcgaacgt aggggactga tttatggaca 59 | 2521 acaggaccga aacaagttgg agccacaacg gagctatgga cgttttcact attgatgggt 60 | 2581 ggcttgcgtc cgcatcccag gtttctctgg ttaattttca tgacgattct ttgtggtacg 61 | 2641 atccaggcat aagatagggc acctaacccg atactgtgaa cttatatgac ggctgaacat 62 | 2701 gccctaatag tctgccaaaa gggaaaccta taacggctct aaacgggccc cgggtacgta 63 | 2761 acagcaatgt tcctgtccac caaggcgttt ttagtaaccg ccgatcgatc tcgatcgaat 64 | 2821 tccgttttac gccccagatc gcagctccct tgacagtgtc agttgaatcc ccattctagg 65 | 2881 tttcctgata aaactaagta cggggacgca tcagtcgaaa gcaatcacag ataatagata 66 | 2941 tcttgagcag aaagacgtct tcacgaatca ggagcgagtg tcccctttga aagtgcgggc 67 | 3001 aacaataggt gtgaggtttc acggttgctt gatcagtagc tgttcactct ctcgcaggcc 68 | 3061 tacatagtga ggtggtgaga ttgttcaggt ctgtctcctg cgcggttgtc gagccccgtc 69 | 3121 gggcacgcta acaaggacag gaattcgggt cgtcaaataa caatttacgt catcgcatcc 70 | 3181 ctcaagggta aggcgtgatc tccatctcgg ttcccaattg gtcattgtga tagccacact 71 | 3241 gcagttaccg aaggcgattt tatactaccc tgataagtgc tcgcagcccc tcctccctaa 72 | 3301 taatgtaacg acgagcatta acccaggagt atttctcacg ttgctcgtgc ccagccaagg 73 | 3361 cgatcaacgc agaccggata gcgttagcta gcccacgggt aaaagcgatt gttgtactct 74 | 3421 ttcatatgaa tacagcactc gcgaatgcaa gcctacgtcg gctcgattct cttcctcacc 75 | 3481 acactggagg cgatgatcta gcccgtcttt caccccgtac ctttatactc ggccgcgtat 76 | 3541 ttctccaccg ggagctctgc ctaccaaagc gtcgtacgcc caggaagccg tttcctttat 77 | 3601 cctccctcat gtctcaacgc gcgagcatcg gagagtacgc gaaatcgact gtctggccaa 78 | 3661 cactggacag acgcgcgaga cactgataaa cccccatttt cgtggggtta gtaaatcggt 79 | 3721 ggcttcaaac ggttcgatcg ctcgcacgag cattagtcag gagtatgatt gcgcatcatc 80 | 3781 tctactccgt actaggttct gtagctcagg atctccgggg ggatcaagct tttatgtccc 81 | 3841 gcgggattag aagatatcca tccatatgtt tactttgatg cgggtccgag ctctctatga 82 | 3901 tgctgctttg acaaaatgtg gaaagcctca gcgaccgctc tggggtcaaa gctctacgac 83 | 3961 acgttgatgc taaacaagtt ggactactaa gcaaatgaag cggaagagat actgagcgct 84 | 4021 ct 85 | // 86 | -------------------------------------------------------------------------------- /kappagate/predict_assembly_accuracy.py: -------------------------------------------------------------------------------- 1 | """This application is experimental.""" 2 | 3 | import itertools 4 | import networkx 5 | import tatapov 6 | from topkappy import (KappaAgent, KappaSiteState, KappaRule, KappaModel, 7 | snapshot_agent_nodes_to_graph) 8 | 9 | from .tools import overhangs_list_to_slots, linear_graph_to_nodes_list 10 | 11 | def slots_to_agents_and_rules(slots, annealing_data=('25C', '01h'), 12 | corrective_factor=1.0): 13 | """Generate Topkappy rules and agents objects modeling parts interactions. 14 | 15 | Parameters 16 | ---------- 17 | 18 | slots 19 | A list [(slot_name, left_overhang, right_overhang), ...] 20 | 21 | annealing_data 22 | Either a pandas dataframe or a couple (temperature, duration) indicating 23 | an experimental dataset from Potapov et al. 2018 24 | 25 | corrective_factor 26 | A factor that can be applied to decrease (when <1) or increase (>1) 27 | the differences in affinity in the dataset. 28 | 29 | Returns 30 | ------- 31 | 32 | agents, rules 33 | Lists of Topkappy agents and rules, ready to be fed to a KappaModel 34 | """ 35 | if isinstance(annealing_data, tuple): 36 | ad_temp, ad_duration = annealing_data 37 | annealing_data = tatapov.annealing_data[ad_temp][ad_duration] 38 | agents = [ 39 | KappaAgent(pos, (left, right)) 40 | for pos, left, right in slots 41 | ] 42 | all_overhangs = set([o for (pos, l, r) in slots for o in (l, r) 43 | if set(o) <= set('ATGC')]) 44 | rules = [] 45 | for agent1, agent2 in itertools.product(agents, agents): 46 | _, a1_right = agent1.sites 47 | a2_left, a2_right = agent2.sites 48 | for site1, site2, a2_side in ((a1_right, a2_left, 'left'), 49 | (a1_right, a2_right, 'right')): 50 | if (site1 not in all_overhangs) or (site2 not in all_overhangs): 51 | continue 52 | if a2_side == 'left': 53 | ov1, ov2 = site1, tatapov.reverse_complement(site2) 54 | else: 55 | ov1, ov2 = site1, site2 56 | # rev_ov1 = tatapov.reverse_complement(ov1) 57 | # rev_ov2 = tatapov.reverse_complement(ov2) 58 | 59 | rate = annealing_data[ov1][ov2] #+ annealing_data[rev_ov2][rev_ov1] 60 | if rate == 0: 61 | continue 62 | rules.append(KappaRule( 63 | '%s-left.%s-%s' % (agent1.name, agent2.name, a2_side), 64 | [ 65 | KappaSiteState(agent1.name, site1, '.'), 66 | KappaSiteState(agent2.name, site2, '.') 67 | ], 68 | '->', 69 | [ 70 | KappaSiteState(agent1.name, site1, '1'), 71 | KappaSiteState(agent2.name, site2, '1') 72 | ], 73 | rate=rate ** corrective_factor 74 | )) 75 | return agents, rules 76 | 77 | def predict_assembly_accuracy(slots, duration=1000, initial_quantities=1000, 78 | corrective_factor=1.0, 79 | annealing_data=('25C', '01h')): 80 | """Predict the accuracy of the assembly (proportion of good clones). 81 | 82 | Parameters 83 | ---------- 84 | 85 | slots 86 | A list [(slot_name, left_overhang, right_overhang), ...] 87 | 88 | annealing_data 89 | Either a pandas dataframe or a couple (temperature, duration) indicating 90 | an experimental dataset from Potapov et al. 2018 91 | 92 | duration 93 | Virtual duration of the Kappa complexation simulation experiments. 94 | A large number ensures that the experiment comes to equilibrium, and 95 | is not necessarily longer. So keep it large 96 | 97 | initial_quantities 98 | Either a dict {slot_name: initial_quantity} or an integer in case all 99 | agents start the simulation with the same initial quantity. The higher 100 | the initial quantities, the less noisy the results of the simulation. 101 | 102 | corrective_factor 103 | A factor that can be applied to decrease (when <1) or increase (>1) 104 | the differences in affinity in the dataset. 105 | 106 | Returns 107 | ------- 108 | 109 | proportion, other_constructs, simulation_results 110 | Where proportion is the proportion of good clones, other_constructs is 111 | a dict {parts_tuple: proportion} showing the proportion of circular 112 | constructs (in bad clones), and simulation_results is the topkappy 113 | simulation results object. 114 | """ 115 | agents, rules = slots_to_agents_and_rules( 116 | slots, annealing_data=annealing_data, 117 | corrective_factor=corrective_factor) 118 | if isinstance(initial_quantities, int): 119 | initial_quantities = {a: initial_quantities for a in agents} 120 | model = KappaModel( 121 | agents=agents, 122 | rules=rules, 123 | initial_quantities=initial_quantities, 124 | duration=duration, 125 | snapshot_times={'end': duration} 126 | ) 127 | simulation_results = model.get_simulation_results() 128 | expected_slots_order = tuple(pos for pos, _, _ in slots) 129 | first_slot, last_slot = expected_slots_order[0], expected_slots_order[1] 130 | snapshots = simulation_results['snapshots'] 131 | end_time = 'end' if 'end' in snapshots else 'deadlock' 132 | end_agents = snapshots[end_time]['snapshot_agents'] 133 | filtered_agents = [ 134 | (freq, snapshot_agent_nodes_to_graph(nodes, with_ports=False)) 135 | for freq, nodes in end_agents 136 | if any(node['node_type'] == first_slot for node in nodes) 137 | and any(node['node_type'] == last_slot for node in nodes) 138 | ] 139 | n_filtered_agents = sum(fa[0] for fa in filtered_agents) 140 | filtered_agents_with_slots = { 141 | linear_graph_to_nodes_list(graph, node_name='node_name'): 142 | 1.0 * freq / n_filtered_agents 143 | for freq, graph in filtered_agents 144 | } 145 | score = (filtered_agents_with_slots.get(expected_slots_order, 0) + 146 | filtered_agents_with_slots.get(expected_slots_order[::-1], 0)) 147 | return score, filtered_agents_with_slots, simulation_results 148 | -------------------------------------------------------------------------------- /tests/data/records/partB.gb: -------------------------------------------------------------------------------- 1 | LOCUS . 5022 bp DNA UNK 01-JAN-1980 2 | DEFINITION . 3 | ACCESSION 4 | VERSION 5 | KEYWORDS . 6 | SOURCE . 7 | ORGANISM . 8 | . 9 | FEATURES Location/Qualifiers 10 | annotation 22..2001 11 | /label="pB" 12 | cds 2123..3022 13 | /label="AmpR" 14 | rep._origin 4023..4522 15 | /label="RepO_1" 16 | ORIGIN 17 | 1 cgtctcaggc ttgattgtta actaccgtat ggcggttacg atcgagtgac actattatta 18 | 61 gaccgctact ccaagtccca acttttcatc gccatacaga agagaacgag aaagtcgaag 19 | 121 agttatagcg tgtacacttc tgattagcta acgaatcgat gcgtacagca tactcgagca 20 | 181 actttatggg tcgtctgtct ccttgtaccc cgcttgacga ggatgtgttt gtaggcggct 21 | 241 tggatatcgg cagtgtatcc gcatggtcaa acactttagc ccgaccggtg ctcaaggtcg 22 | 301 aaatcctaat atagattggg ataggacccg tccattacta ggcgacattc gcagtcacga 23 | 361 gcccgcacgg ggtgcctcaa cacggtgaca ctcaattcgc cggcactgag tacgacgatc 24 | 421 gtgattgcca taccgtcatg ttttgtgtta cccctgataa gaagcgcaga tatggcacgg 25 | 481 gggcagaggg cggagtagac ccatgcaaaa ctctagcctc tgggcagcat gatcgacgta 26 | 541 gaacgccgtt gtgactcact agtttcagtt atgatgttca gccaagaatc ctccattttg 27 | 601 acgtgaacct tctcccttta caccttgaag ctgcaatatt aagtcgaccc tttaagttgc 28 | 661 cttgtcttgg tctagagccc gcagctcacc gataaaatga gcagtcaatt ttagcggcgg 29 | 721 ttgtccaggt tacatgcact ttgggctgga accgcgttac gtttggagta cttagaagtg 30 | 781 tcgtaggcat aggagagccg aaacttgata ttgggtctag ttagtgctct accccgcttc 31 | 841 ggatccgtaa tagtgatgtt gcataagacg cttgcctcaa ggcgacatac ggtggcggtt 32 | 901 gctggaaatg acgccccacc ccgtgataat ttggggacga gccgcacgat aacttctgcc 33 | 961 agcgaccgcc cctcccacgt ccgatggtga ttccgaattt tgattacatg ccagacacgc 34 | 1021 tttaatctcg ccaccacatt ggcaccccca ccaagtaatg tatgtaagaa gcagctgcgc 35 | 1081 cccaagttac gtgccgtttc cgatgggagc agcaataatg tcgatagcaa ccacccccct 36 | 1141 ctctactgac gaagcaacac cggggacacc gtcaccttgg cttcatttcg tattcattca 37 | 1201 cctacgcaca accgcctgac cgttcgttat cacgagccta tccccttcaa gcaatttccc 38 | 1261 ccccttgttc agcagcctac actatacgtt ggtacactgt gtgcgacaga tatttggtgc 39 | 1321 ccgtcttatt ctattctgag agagtttaat agccaaaaga tatgagtagg aacgggaggt 40 | 1381 acccgactcg cgttgaagca ctcttaagac ctctcttagc acttcgaagg gtaggtgcgt 41 | 1441 tcaaagaacc tctgctttta actaagatca aaaggattgc gtacaacttc cccgttaact 42 | 1501 ctccagagga tccgtagaag tctacaccaa aatacgtttg gtttcgccaa ggtgactatc 43 | 1561 ggtacttgtg cacgtaacag ggtaaagaat tgaaagcctc aaattgtacg acgttacgcc 44 | 1621 cggacggtcg ccggtgatag ggtgacgcat ttgcgtttgt catatcattt gggattagac 45 | 1681 ggagctggtg tacctctatg gacgttacaa aaccttgtgg taacagcctg ggtgccatcc 46 | 1741 acctagtccg aagtttcctt ctaaccgcgg catcagatgg atatgtacct tatggcgctt 47 | 1801 cccgggaagt ccaattcttc gtgttgaagg gacagctcta caggcctggc cacttgtggc 48 | 1861 gcatctaagt agtctaggtg catgattctc tcccttgggg gcgctgtcgt gggtcctgcc 49 | 1921 gagccacacc ctaacctcac gtcgcaagac ttggtaaata gctttttttt ttatgcccaa 50 | 1981 tctatttacg taacaatcga catcagagga ggggcagaga cgctccacct gcgtctatcg 51 | 2041 tacagtgcta aaatggcagc agatagtgaa acttccgcta agctagcccc tcagggtaca 52 | 2101 ctgcaccgag gcgtgctgtc aatacttgat taagtcgggt tgtcggggac ctgccgtcac 53 | 2161 gcttccgagt gtatatccgg atttagttga cgtcatacag aggcactaag aagaataaac 54 | 2221 gcttacctcc agcaatcgtg tagtgtcagg tgtacgttct cccttgcgcc gttcggcaag 55 | 2281 cgtccggtgt cgggctgcaa ggaataaatc tttatggacc agaggggctt gtttcctcat 56 | 2341 atgggtgcgt gcacttatac gattcaaagg tggatatggc cgcataacac gtagccaggc 57 | 2401 tatagtcccg cggcctaatt ccttcgagtg cgggtgcctg tttttgtttt tcctttacga 58 | 2461 cacgaaccgc tctaacctgc tctatttcgc cacgttccag tgaacctctt agcctaccgc 59 | 2521 ccacgtacgg tgggacgcgt cgagcagtta aggtactgtg gagaaatcgt tcaatattag 60 | 2581 aaaacaggcg gtgtacgaat tactgtgtcc cggtgttgcc cgtttaacgg ctgccgtggt 61 | 2641 cataccgtga ggcaccacga ggggatgcta cgcaacatgc gaggtgtaat cagcagggaa 62 | 2701 gatcccgggg atcgaaagcg gtccgcgatt tgcgaccgat atgcataagg tgtcatttat 63 | 2761 attacaccta cgacactggt accggctcac agccaaatgc acagtctcaa gatagaattc 64 | 2821 gcaaaaatta ggcttatcgg gcgcttactc tttgttacac ttctggtctg tgagtgacgc 65 | 2881 cctgtgtccc atcactgcat gtgaggatgc gtgtactgca cctgtgaatt acagacgttt 66 | 2941 ccctaccagg gcgctacgtt ataatgttcg gttgcaaccc tctatagggt gatctcgaca 67 | 3001 taccactatg gtttgcgcgt ttcgggtgct tttggcgagg gccaggtacg gcggtgagac 68 | 3061 agcgtatttt gcgaataggg tattcggacg cacggtgtat agctcttgga aaacgactct 69 | 3121 tcgacgggcg tgtagaagta gccaagcagt tcggttgtta tcactaaatg cggaccatgt 70 | 3181 gcagccgttt aggagaggag tctgcaatgg atcagctggt cagattgtac cactcggttg 71 | 3241 agcctaaggc acttcaggtg ctcggattgt aatgaaccct caggttacgc cgttgcgctg 72 | 3301 agatcccaca agccataagc aatgagaatc gagcagctat cgaattacgc agcacttgct 73 | 3361 gatgaactgg gtgcacctaa agcgcccgat aaaacggagg gagtgaggta actactacat 74 | 3421 tgcgagtagt gcgaggggcc tccgagacta cattagagca gccatgccga atacgacaaa 75 | 3481 atctatagcc ttgcgaacgt aggggactga tttatggaca acaggaccga aacaagttgg 76 | 3541 agccacaacg gagctatgga cgttttcact attgatgggt ggcttgcgtc cgcatcccag 77 | 3601 gtttctctgg ttaattttca tgacgattct ttgtggtacg atccaggcat aagatagggc 78 | 3661 acctaacccg atactgtgaa cttatatgac ggctgaacat gccctaatag tctgccaaaa 79 | 3721 gggaaaccta taacggctct aaacgggccc cgggtacgta acagcaatgt tcctgtccac 80 | 3781 caaggcgttt ttagtaaccg ccgatcgatc tcgatcgaat tccgttttac gccccagatc 81 | 3841 gcagctccct tgacagtgtc agttgaatcc ccattctagg tttcctgata aaactaagta 82 | 3901 cggggacgca tcagtcgaaa gcaatcacag ataatagata tcttgagcag aaagacgtct 83 | 3961 tcacgaatca ggagcgagtg tcccctttga aagtgcgggc aacaataggt gtgaggtttc 84 | 4021 acggttgctt gatcagtagc tgttcactct ctcgcaggcc tacatagtga ggtggtgaga 85 | 4081 ttgttcaggt ctgtctcctg cgcggttgtc gagccccgtc gggcacgcta acaaggacag 86 | 4141 gaattcgggt cgtcaaataa caatttacgt catcgcatcc ctcaagggta aggcgtgatc 87 | 4201 tccatctcgg ttcccaattg gtcattgtga tagccacact gcagttaccg aaggcgattt 88 | 4261 tatactaccc tgataagtgc tcgcagcccc tcctccctaa taatgtaacg acgagcatta 89 | 4321 acccaggagt atttctcacg ttgctcgtgc ccagccaagg cgatcaacgc agaccggata 90 | 4381 gcgttagcta gcccacgggt aaaagcgatt gttgtactct ttcatatgaa tacagcactc 91 | 4441 gcgaatgcaa gcctacgtcg gctcgattct cttcctcacc acactggagg cgatgatcta 92 | 4501 gcccgtcttt caccccgtac ctttatactc ggccgcgtat ttctccaccg ggagctctgc 93 | 4561 ctaccaaagc gtcgtacgcc caggaagccg tttcctttat cctccctcat gtctcaacgc 94 | 4621 gcgagcatcg gagagtacgc gaaatcgact gtctggccaa cactggacag acgcgcgaga 95 | 4681 cactgataaa cccccatttt cgtggggtta gtaaatcggt ggcttcaaac ggttcgatcg 96 | 4741 ctcgcacgag cattagtcag gagtatgatt gcgcatcatc tctactccgt actaggttct 97 | 4801 gtagctcagg atctccgggg ggatcaagct tttatgtccc gcgggattag aagatatcca 98 | 4861 tccatatgtt tactttgatg cgggtccgag ctctctatga tgctgctttg acaaaatgtg 99 | 4921 gaaagcctca gcgaccgctc tggggtcaaa gctctacgac acgttgatgc taaacaagtt 100 | 4981 ggactactaa gcaaatgaag cggaagagat actgagcgct ct 101 | // 102 | -------------------------------------------------------------------------------- /tests/data/records/partC.gb: -------------------------------------------------------------------------------- 1 | LOCUS . 5022 bp DNA UNK 01-JAN-1980 2 | DEFINITION . 3 | ACCESSION 4 | VERSION 5 | KEYWORDS . 6 | SOURCE . 7 | ORGANISM . 8 | . 9 | FEATURES Location/Qualifiers 10 | annotation 22..2001 11 | /label="pC" 12 | cds 2123..3022 13 | /label="AmpR" 14 | rep._origin 4023..4522 15 | /label="RepO_1" 16 | ORIGIN 17 | 1 cgtctcaggg caaagattac gatagaaaaa cactcgacac agagtagtat gataaaagtt 18 | 61 gggacggcat gctggactcg agaagtgtcg atccggcaat ccagatacgg caatggtacc 19 | 121 ggtcacacct cattctaaga gcagtcaggg gctgtcaggt ctacagggca ccggccctga 20 | 181 ggcccttggg ctagggtctg tcacccctat ctgcactgac tttaaagtaa ccgagcaata 21 | 241 agacaatcaa tcgcagggct ctggatcggg gtccgaaggc ggggtgcagt gtctccctaa 22 | 301 ttgttaagct caaaagagca gccactcgtt ctcgatcctt agtccatcgc cgtgaggtac 23 | 361 aagcagtaca cccacggaag gcccggcatt gaagatgcaa gaaatagagc actggggtga 24 | 421 cgacggcatg ctaaggtaag aaggcaaatt ctggaggact gacttcccca caagcggcgt 25 | 481 aagctcggat aagattggga taaacggacc acggcggctg gatcttcacc cctttgcacg 26 | 541 gtatgaaaac ggatcaggat tctcttccac ctattgcacc tgctgagaag agagcgctgg 27 | 601 ggggacgtta cttatcccaa tcgcgacgtc acggacaaca tcgtgttaca ggtcaggctc 28 | 661 gaaatcctag gcagctggta tccgacgcgc aagttatcta ccaaataagt taggcgatcg 29 | 721 aaatcatagg ctgtttagtg cttttcttgg tatcatcctg aacttctgtc acctccgagg 30 | 781 ccgaagcaac tattatgagg acgactcgat gattgtccgg agatgttgca ccccggcaag 31 | 841 cctagaaaac cccggtcctt tgagttttaa tctaatagta gaagttctcg gaaatcagag 32 | 901 ctggagactc ggtgacgtta tagaaatgtt cgcacgaaga gcgcgacatg tcctacttca 33 | 961 caagatgctt tctaggaggg gatctggatt ctcaagccct acacatggtc cgtggtgtat 34 | 1021 gttggatcgt agtgctttta ccgactatgt cgcgttaaat tgtctttcat ttctacaatt 35 | 1081 cggggagcat ggggccatgt gacttaggtt cagaaggtgc gagcgggcgt actacgtttg 36 | 1141 gtggtcgtgt ataatcagaa gaatcggcga tatccgctag caagcaaagt gtccttgacg 37 | 1201 cttgtggaaa tccaactatt tcctgcctct caagtgttta gaaaaatatc ccctggagga 38 | 1261 gatatcgctc ctctcttctg cgaccaacac cagttcaatt ccgcagtgca gcagtcctat 39 | 1321 tccgcatgga tcacgctgac gagatgcggg gttgagacta ccccaaaaga tatgttgaga 40 | 1381 taaattacca ggatggataa tctggtaccg gtgcgaaagg gtggttacac cagagtcgcc 41 | 1441 tgatgagctt atgcgcaatt tcaagtccct atgttttact atgaaagtac gaccgagccg 42 | 1501 ctagcctgag tcaaatagcg cagaatttgg gtcgtggaga tcgcgcttgt taacctggtc 43 | 1561 taggcgcatc tgagtcataa cggggttata acgactgtca acgtgtgacc gcatatcctg 44 | 1621 caaggcgaac aaataacgcc ccgcccaaaa tattacattt aagtagttat ctaactcccg 45 | 1681 gcatgcgggg catttggggg cagcgtgccc attccgcccc tatgcctgtt tcaacacgca 46 | 1741 accgtagaga ttggctagga tgggttttca cgagccaact gccacattag acatagaatt 47 | 1801 ccgataagta taagagatat cagcgaggtt cagggtagtt tggaacgaca gccggttcag 48 | 1861 aagttcagta tggtcattca tcatccgttg tgttgagtcc tttagatcgt ggctttatca 49 | 1921 tgcatgtctc ccgccagccc tcaaactatg tcgtcacgcg catgtttaca ttatatggaa 50 | 1981 caaaacgaga ggtagaagcc aatcactatc cggcaagaga cgctccacct gcgtctatcg 51 | 2041 tacagtgcta aaatggcagc agatagtgaa acttccgcta agctagcccc tcagggtaca 52 | 2101 ctgcaccgag gcgtgctgtc aatacttgat taagtcgggt tgtcggggac ctgccgtcac 53 | 2161 gcttccgagt gtatatccgg atttagttga cgtcatacag aggcactaag aagaataaac 54 | 2221 gcttacctcc agcaatcgtg tagtgtcagg tgtacgttct cccttgcgcc gttcggcaag 55 | 2281 cgtccggtgt cgggctgcaa ggaataaatc tttatggacc agaggggctt gtttcctcat 56 | 2341 atgggtgcgt gcacttatac gattcaaagg tggatatggc cgcataacac gtagccaggc 57 | 2401 tatagtcccg cggcctaatt ccttcgagtg cgggtgcctg tttttgtttt tcctttacga 58 | 2461 cacgaaccgc tctaacctgc tctatttcgc cacgttccag tgaacctctt agcctaccgc 59 | 2521 ccacgtacgg tgggacgcgt cgagcagtta aggtactgtg gagaaatcgt tcaatattag 60 | 2581 aaaacaggcg gtgtacgaat tactgtgtcc cggtgttgcc cgtttaacgg ctgccgtggt 61 | 2641 cataccgtga ggcaccacga ggggatgcta cgcaacatgc gaggtgtaat cagcagggaa 62 | 2701 gatcccgggg atcgaaagcg gtccgcgatt tgcgaccgat atgcataagg tgtcatttat 63 | 2761 attacaccta cgacactggt accggctcac agccaaatgc acagtctcaa gatagaattc 64 | 2821 gcaaaaatta ggcttatcgg gcgcttactc tttgttacac ttctggtctg tgagtgacgc 65 | 2881 cctgtgtccc atcactgcat gtgaggatgc gtgtactgca cctgtgaatt acagacgttt 66 | 2941 ccctaccagg gcgctacgtt ataatgttcg gttgcaaccc tctatagggt gatctcgaca 67 | 3001 taccactatg gtttgcgcgt ttcgggtgct tttggcgagg gccaggtacg gcggtgagac 68 | 3061 agcgtatttt gcgaataggg tattcggacg cacggtgtat agctcttgga aaacgactct 69 | 3121 tcgacgggcg tgtagaagta gccaagcagt tcggttgtta tcactaaatg cggaccatgt 70 | 3181 gcagccgttt aggagaggag tctgcaatgg atcagctggt cagattgtac cactcggttg 71 | 3241 agcctaaggc acttcaggtg ctcggattgt aatgaaccct caggttacgc cgttgcgctg 72 | 3301 agatcccaca agccataagc aatgagaatc gagcagctat cgaattacgc agcacttgct 73 | 3361 gatgaactgg gtgcacctaa agcgcccgat aaaacggagg gagtgaggta actactacat 74 | 3421 tgcgagtagt gcgaggggcc tccgagacta cattagagca gccatgccga atacgacaaa 75 | 3481 atctatagcc ttgcgaacgt aggggactga tttatggaca acaggaccga aacaagttgg 76 | 3541 agccacaacg gagctatgga cgttttcact attgatgggt ggcttgcgtc cgcatcccag 77 | 3601 gtttctctgg ttaattttca tgacgattct ttgtggtacg atccaggcat aagatagggc 78 | 3661 acctaacccg atactgtgaa cttatatgac ggctgaacat gccctaatag tctgccaaaa 79 | 3721 gggaaaccta taacggctct aaacgggccc cgggtacgta acagcaatgt tcctgtccac 80 | 3781 caaggcgttt ttagtaaccg ccgatcgatc tcgatcgaat tccgttttac gccccagatc 81 | 3841 gcagctccct tgacagtgtc agttgaatcc ccattctagg tttcctgata aaactaagta 82 | 3901 cggggacgca tcagtcgaaa gcaatcacag ataatagata tcttgagcag aaagacgtct 83 | 3961 tcacgaatca ggagcgagtg tcccctttga aagtgcgggc aacaataggt gtgaggtttc 84 | 4021 acggttgctt gatcagtagc tgttcactct ctcgcaggcc tacatagtga ggtggtgaga 85 | 4081 ttgttcaggt ctgtctcctg cgcggttgtc gagccccgtc gggcacgcta acaaggacag 86 | 4141 gaattcgggt cgtcaaataa caatttacgt catcgcatcc ctcaagggta aggcgtgatc 87 | 4201 tccatctcgg ttcccaattg gtcattgtga tagccacact gcagttaccg aaggcgattt 88 | 4261 tatactaccc tgataagtgc tcgcagcccc tcctccctaa taatgtaacg acgagcatta 89 | 4321 acccaggagt atttctcacg ttgctcgtgc ccagccaagg cgatcaacgc agaccggata 90 | 4381 gcgttagcta gcccacgggt aaaagcgatt gttgtactct ttcatatgaa tacagcactc 91 | 4441 gcgaatgcaa gcctacgtcg gctcgattct cttcctcacc acactggagg cgatgatcta 92 | 4501 gcccgtcttt caccccgtac ctttatactc ggccgcgtat ttctccaccg ggagctctgc 93 | 4561 ctaccaaagc gtcgtacgcc caggaagccg tttcctttat cctccctcat gtctcaacgc 94 | 4621 gcgagcatcg gagagtacgc gaaatcgact gtctggccaa cactggacag acgcgcgaga 95 | 4681 cactgataaa cccccatttt cgtggggtta gtaaatcggt ggcttcaaac ggttcgatcg 96 | 4741 ctcgcacgag cattagtcag gagtatgatt gcgcatcatc tctactccgt actaggttct 97 | 4801 gtagctcagg atctccgggg ggatcaagct tttatgtccc gcgggattag aagatatcca 98 | 4861 tccatatgtt tactttgatg cgggtccgag ctctctatga tgctgctttg acaaaatgtg 99 | 4921 gaaagcctca gcgaccgctc tggggtcaaa gctctacgac acgttgatgc taaacaagtt 100 | 4981 ggactactaa gcaaatgaag cggaagagat actgagcgct ct 101 | // 102 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | .. raw:: html 2 | 3 |

4 | kappagate logo 5 |

6 |

7 | 8 | .. image:: https://travis-ci.org/Edinburgh-Genome-Foundry/kappagate.svg?branch=master 9 | :target: https://travis-ci.org/Edinburgh-Genome-Foundry/kappagate 10 | :alt: Travis CI build status 11 | 12 | .. image:: https://coveralls.io/repos/github/Edinburgh-Genome-Foundry/kappagate/badge.svg?branch=master 13 | :target: https://coveralls.io/github/Edinburgh-Genome-Foundry/kappagate?branch=master 14 | 15 | 16 | Kappagate is a Python library to predict the percentage of good clones (carrying 17 | a correct version of the desired assembly) when assembling DNA with a method 18 | relying on 4bp overhangs (e.g. Golden Gate assembly, OGAB, etc.). 19 | 20 | Using Kappagate, you can get an estimation of how difficult the 21 | assembly will be, and how many clones should be tested to find a correct one. 22 | 23 | Kappagate uses the exhaustive relative overhang affinity tables provided 24 | in Potapov et. al. 2018 (ACS Syn. Bio.). In this publication the authors show 25 | that the proportion of valid clones rates can be predicted using focused 26 | in-vitro experiments focused on the overhangs present in the assembly. 27 | 28 | Kappagate attempts to predict clone validity rates without any overhang-subset-specific 29 | experiment, using computer simulations instead. It simulates the temporal evolution 30 | of the DNA fragments ligation reaction using the Kappa biological modeling system. 31 | At the end of the cloning simulation, Kappagate returns the ratio between "good" 32 | constructs (with all expected parts in the right order) and bad circular assembly-forming 33 | constructs (which may produce bad clones after transformation and plating). 34 | 35 | This is an experimental piece of software, useful to us, but coming with no warranty. 36 | 37 | Examples 38 | -------- 39 | 40 | 41 | .. code:: python 42 | 43 | from kappagate import predict_assembly_accuracy, overhangs_list_to_slots 44 | 45 | # FIRST TEST ON 12 WELL-DESIGNED OVERHANGS 46 | 47 | overhangs= ['GGAG', 'GGCA', 'TCGC', 'CAGT', 'TCCA', 48 | 'GAAT', 'AGTA', 'TCTT', 'CAAA', 'GCAC', 49 | 'AACG', 'GTCT', 'CCAT'] 50 | slots = overhangs_list_to_slots(overhangs) 51 | predicted_rate, _, _ = predict_assembly_accuracy(slots) 52 | 53 | print (predicted_rate) 54 | # >>> 0.987 55 | 56 | This means that 98.7% of clones will carry a valid assembly. It is really 57 | not far from the experimental observation in Potapov et al., which was 58 | 99.2% +- 0.6% (1 std). 59 | Let's have a look at a few more sets: 60 | 61 | .. code:: python 62 | 63 | overhangs = ['GGAG', 'GATA', 'GGCA', 'GGTC', 'TCGC', 64 | 'GAGG', 'CAGT', 'GTAA', 'TCCA', 'CACA', 65 | 'GAAT', 'ATAG', 'AGTA', 'ATCA', 'TCTT', 66 | 'AGGT', 'CAAA', 'AAGC', 'GCAC', 'CAAC', 67 | 'AACG', 'CGAA', 'GTCT', 'TCAG', 'CCAT'] 68 | slots = overhangs_list_to_slots(overhangs) 69 | predicted_rate, _, _ = predict_assembly_accuracy(slots) 70 | print (predicted_rate) 71 | # >>> 0.846 72 | # In Potapov 2018: 84% +/- 5% 73 | 74 | .. code:: python 75 | 76 | overhangs= ['GGAG', 'GGTC', 'AGCA', 'CAGT', 'GGTA', 77 | 'GAAT', 'GGTT', 'TCTT', 'GGTG', 'GCAC', 78 | 'AGCG', 'GTCT', 'CCAT'] 79 | slots = overhangs_list_to_slots(overhangs) 80 | predicted_rate, _, _ = predict_assembly_accuracy(slots) 81 | print (predicted_rate) 82 | # >>> 0.33 83 | # In Potapov 2018: 45% +/- 5% 84 | 85 | Moar examples !! 86 | ---------------- 87 | 88 | Plotting interactions 89 | ~~~~~~~~~~~~~~~~~~~~~ 90 | 91 | To plot the parts circularly with their interaction: 92 | 93 | .. code:: python 94 | 95 | from kappagate import overhangs_list_to_slots, plot_circular_interactions 96 | overhangs = ['TAGG', 'GACT', 'GGAC', 'CAGC', 97 | 'GGTC', 'GCGT', 'TGCT', 'GGTA', 98 | 'CGTC', 'CTAC', 'GCAA', 'CCCT'] 99 | slots = overhangs_list_to_slots(overhangs) 100 | ax = plot_circular_interactions( 101 | slots, annealing_data=('25C', '01h'), rate_limit=200) 102 | ax.figure.savefig("test.png", bbox_inches='tight') 103 | 104 | The unwanted overhang interactions appear in red in the resulting figure: 105 | 106 | .. raw:: html 107 | 108 |

109 | 110 |

111 | 112 | Colony picking statistics 113 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 114 | 115 | To convert the predicted success rate into decisions regarding how many colonies 116 | to pick, and when to stop picking colonies: 117 | 118 | .. code:: python 119 | 120 | from kappagate import (overhangs_list_to_slots, predict_assembly_accuracy, 121 | plot_colony_picking_graph, success_rate_facts) 122 | 123 | overhangs = ['TAGG', 'GACT', 'GGAC', 'CAGC', 124 | 'GGTC', 'GCGT', 'TGCT', 'GGTA', 125 | 'CGTC', 'CTAC', 'GCAA', 'CCCT'] 126 | slots = overhangs_list_to_slots(overhangs) 127 | predicted_rate, _, _ = predict_assembly_accuracy(slots) 128 | ax = plot_colony_picking_graph(success_rate=predicted_rate) 129 | ax.figure.savefig("success_rate_facts.png", bbox_inches='tight') 130 | 131 | print (success_rate_facts(predicted_rate, plain_text=True)) 132 | 133 | Result: 134 | 135 | .. code:: raw 136 | 137 | The valid colony rate is 47.7%. Expect 1.9 clones in average 138 | until success. Pick 5 clones or more for 95% chances of at 139 | least one success. If no success after 8 clones, there is 140 | likely another problem (p-value=0.01). 141 | 142 | .. raw:: html 143 | 144 |

145 | 146 |

147 | 148 | Installation 149 | ------------- 150 | 151 | You can install kappagate through PIP 152 | 153 | .. code:: 154 | 155 | sudo pip install kappagate 156 | 157 | Alternatively, you can unzip the sources in a folder and type 158 | 159 | .. code:: 160 | 161 | sudo python setup.py install 162 | 163 | License = MIT 164 | -------------- 165 | 166 | Kappagate is an open-source software originally written at the `Edinburgh Genome Foundry `_ by `Zulko `_ and `released on Github `_ under the MIT licence (Copyright 2018 Edinburgh Genome Foundry). 167 | 168 | Everyone is welcome to contribute ! 169 | 170 | More biology software 171 | --------------------- 172 | 173 | .. image:: https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/Edinburgh-Genome-Foundry.github.io/master/static/imgs/logos/egf-codon-horizontal.png 174 | :target: https://edinburgh-genome-foundry.github.io/ 175 | 176 | Kappagate is part of the `EGF Codons `_ synthetic biology software suite for DNA design, manufacturing and validation. 177 | -------------------------------------------------------------------------------- /kappagate/tools.py: -------------------------------------------------------------------------------- 1 | import os 2 | import networkx as nx 3 | from Bio import SeqIO 4 | from dnacauldron import ( 5 | RestrictionLigationMix, 6 | autoselect_enzyme, 7 | list_overhangs_from_record_annotations, 8 | generate_type2s_restriction_mix, 9 | ) 10 | from dnacauldron.biotools import reverse_complement 11 | from snapgene_reader import snapgene_file_to_seqrecord 12 | 13 | 14 | def parts_records_to_slots(parts_records, enzyme="auto"): 15 | """Return slots from parts records, ready to feed to other methods. 16 | 17 | Parameters 18 | ---------- 19 | parts_records 20 | A list of Biopython records of the parts of the assembly. Do NOT 21 | include the backbone. 22 | 23 | enzyme 24 | Name of a Type-2S enzyme or "auto" to select automatically based 25 | on restriction sites in the records sequences. 26 | 27 | 28 | Returns 29 | ------- 30 | slots 31 | A list [(slot_name, left_overhang, right_overhang), ...] ready to be fed 32 | to the other Kappagate methods. 33 | 34 | """ 35 | 36 | if enzyme == "auto": 37 | enzyme = autoselect_enzyme(parts_records) 38 | # mix = RestrictionLigationMix(parts_records, enzymes=[enzyme]) 39 | mix = generate_type2s_restriction_mix(parts=parts_records, enzyme=enzyme) 40 | 41 | slots_parts = mix.compute_slots() 42 | graph = mix.slots_graph(with_overhangs=False) 43 | slots = [ 44 | (list(slots_parts[(s1, s2)])[0], s1, s2) 45 | for s1, s2 in linear_graph_to_nodes_list(graph) 46 | ] 47 | for i in list(range(len(slots) - 1)): 48 | name, left, right = slots[i + 1] 49 | if slots[i][2] != left: 50 | if slots[i][2] == reverse_complement(right): 51 | slots[i + 1] = ( 52 | name, 53 | reverse_complement(right), 54 | reverse_complement(left), 55 | ) 56 | else: 57 | name, left, right = slots[i] 58 | slots[i] = ( 59 | name, 60 | reverse_complement(right), 61 | reverse_complement(left), 62 | ) 63 | return ( 64 | [("backbone-left", "LEFT", slots[0][1])] 65 | + slots 66 | + [("backbone-right", slots[-1][2], "RIGHT")] 67 | ) 68 | 69 | 70 | def _find_backbone_center(record, backbone_annotations=()): 71 | """Find an annotation from the backbone, return the index of its center""" 72 | record.features = [f for f in record.features if f.location is not None] 73 | for feature in record.features: 74 | for qualifier in feature.qualifiers.values(): 75 | qualifiers = str(qualifier) 76 | if any([ann in qualifiers for ann in backbone_annotations]): 77 | return int((feature.location.start + feature.location.end) / 2) 78 | raise ValueError( 79 | "Could not find any of the following in record %s: %s" 80 | % (record.id, ", ".join(backbone_annotations)) 81 | ) 82 | 83 | 84 | def construct_record_to_slots(record, backbone_annotations=()): 85 | """Return slots from a construct record, ready to feed to other methods. 86 | 87 | Parameters 88 | ---------- 89 | record 90 | A biopython record of an assembly construct, either created by 91 | DnaCauldron, or with explicit annotations with feature type "homology" 92 | to indicate overhangs. 93 | 94 | backone_annotations 95 | Texts that can be found in the annotations located in the "backbone part" 96 | of the provided record. e.g. ['AmpR', 'Origin'] etc. 97 | 98 | Returns 99 | ------- 100 | slots 101 | A list [(slot_name, left_overhang, right_overhang), ...] ready to be fed 102 | to the other Kappagate methods. 103 | 104 | """ 105 | backbone_center = _find_backbone_center( 106 | record, backbone_annotations=backbone_annotations 107 | ) 108 | overhangs = list_overhangs_from_record_annotations( 109 | record, with_locations=True 110 | ) 111 | if overhangs is None: 112 | raise ValueError( 113 | "Could not find any overhang in the provided record " 114 | "with id %s" % record.id 115 | ) 116 | overhangs = [o for loc, o in overhangs if loc > backbone_center] + [ 117 | o for loc, o in overhangs if loc <= backbone_center 118 | ] 119 | return overhangs_list_to_slots(overhangs) 120 | 121 | 122 | def linear_graph_to_nodes_list(graph, node_name=None): 123 | """Return a list of node names as they appear in the linear graph.""" 124 | start, end = [n for n in graph.nodes if graph.degree[n] == 1] 125 | linear_path_nodes = list(nx.all_simple_paths(graph, start, end))[0] 126 | if node_name is None: 127 | return tuple(linear_path_nodes) 128 | return tuple([graph.nodes[n][node_name] for n in linear_path_nodes]) 129 | 130 | 131 | def overhangs_list_to_slots(overhangs): 132 | """Return slots from a list of overhangs, ready to feed to other methods. 133 | 134 | Parameters 135 | ---------- 136 | overhangs 137 | A list of the form ['ATGC', 'TTGC', 'TTAC'...] of the overhangs as they 138 | appear in the modelled assembly (i.e. the first part has overhangs 139 | ATGC-TTGC, the second TTGC-TTAC, etc.) 140 | 141 | backone_annotations 142 | Texts that can be found in the annotations located in the "backbone part" 143 | of the provided record. e.g. ['AmpR', 'Origin'] etc. 144 | 145 | Returns 146 | ------- 147 | slots 148 | A list [(slot_name, left_overhang, right_overhang), ...] ready to be fed 149 | to the other Kappagate methods. 150 | 151 | """ 152 | overhangs = list(overhangs) 153 | slots_overhangs = zip(["LEFT"] + overhangs, overhangs + ["RIGHT"]) 154 | overhangs = [ 155 | ("p%03d" % i, left, right) 156 | for i, (left, right) in enumerate(slots_overhangs) 157 | ] 158 | overhangs[0] = ("backbone-left", *overhangs[0][1:]) 159 | overhangs[-1] = ("backbone-right", *overhangs[-1][1:]) 160 | return overhangs 161 | 162 | 163 | def set_record_topology(record, topology, pass_if_already_set=False): 164 | record_topology = record.annotations.get("topology", None) 165 | do_nothing = pass_if_already_set and (record_topology is not None) 166 | if not do_nothing: 167 | record.annotations["topology"] = topology 168 | 169 | 170 | def load_record( 171 | filename, 172 | topology="auto", 173 | default_topology="linear", 174 | id="auto", 175 | upperize=True, 176 | ): 177 | if hasattr(filename, "read"): 178 | record = SeqIO.read(filename, "genbank") 179 | if id == "auto": 180 | raise ValueError("Can't have id == 'auto' when reading filelikes.") 181 | elif filename.lower().endswith(("gb", "gbk")): 182 | record = SeqIO.read(filename, "genbank") 183 | elif filename.lower().endswith(("fa", "fasta")): 184 | record = SeqIO.read(filename, "fasta") 185 | elif filename.lower().endswith(".dna"): 186 | record = snapgene_file_to_seqrecord(filename) 187 | else: 188 | raise ValueError("Unknown format for file: %s" % filename) 189 | if upperize: 190 | record = record.upper() 191 | if topology == "auto": 192 | set_record_topology(record, default_topology, pass_if_already_set=True) 193 | else: 194 | set_record_topology(record, topology) 195 | if id == "auto": 196 | id = record.id 197 | if id in [None, "", "", ".", " "]: 198 | id = os.path.splitext(os.path.basename(filename))[0] 199 | record.name = id.replace(" ", "_")[:20] 200 | record.id = id 201 | elif id is not None: 202 | record.id = id 203 | record.name = id.replace(" ", "_")[:20] 204 | return record 205 | -------------------------------------------------------------------------------- /kappagate/reporting.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import networkx as nx 4 | import itertools 5 | 6 | from .predict_assembly_accuracy import slots_to_agents_and_rules 7 | from .tools import overhangs_list_to_slots, linear_graph_to_nodes_list 8 | 9 | 10 | def plot_colony_picking_graph(success_rate=None, ax=None): 11 | """Plot a generic graph of colonies to pick in different scenarios. 12 | 13 | Parameters 14 | ---------- 15 | 16 | success_rate 17 | If provided (betweem 0 and 1), it will be highlighted on the map 18 | 19 | ax 20 | A matplotlib ax (one is created and returned if none is provided) 21 | 22 | Returns 23 | ------- 24 | 25 | ax 26 | The matplotlib ax of the plot 27 | """ 28 | if ax is None: 29 | fig, ax = plt.subplots(1, figsize=(15, 5)) 30 | ax.set_title("Number of colonies to pick to get at least one " 31 | "good clone with certainty X%\n", 32 | fontdict=dict(size=16)) 33 | ax.set_xlabel("Proportion of good clones", fontdict=dict(size=16)) 34 | ax.set_ylabel("Certainty level", fontdict=dict(size=16)) 35 | ax.set_yscale('log') 36 | # ax.set_xscale('log') 37 | 38 | # ax.invert_yaxis() 39 | 40 | X = np.linspace(0.05, 0.999, 1000) 41 | Y = np.linspace(0.001, 0.500, 1000)[::-1] 42 | XX, YY = np.meshgrid(X, Y) 43 | ZZ = np.log(YY)/np.log(1-XX) 44 | levels = (list(range(1, 10)) + 45 | list(range(10, 20, 2)) + 46 | list(range(20, 30, 5)) + 47 | list(range(30, 55, 10))) 48 | # ax.contourf(XX, YY, ZZ, levels=levels) 49 | 50 | xticks = np.arange(0.1, 1.01, 0.1) 51 | yticks = [0.5, 0.2, 0.1, 0.05, 0.01] 52 | ax.set_xticks(xticks) 53 | ax.set_xticklabels([("%.02f" % (x)).rstrip('0') for x in xticks]) 54 | ax.set_yticks(yticks) 55 | ax.set_yticklabels(["%d%%" % (100 * (1 - y)) for y in yticks]) 56 | cs = ax.contour(XX, YY, ZZ, levels=levels, colors=['black']) 57 | ax.clabel(cs, inline=1, fontsize=10, fmt='%d', 58 | manual=[(1.0 - (0.025) ** (1.0/x), 0.025) for x in levels]) 59 | if success_rate is not None: 60 | ax.axvline(x=success_rate, ls=':', color='red') 61 | ax.plot(len(yticks) * [success_rate], 62 | [y for y in yticks], 63 | lw=0, marker='o', c='r', markeredgecolor='r', 64 | markerfacecolor='white') 65 | return ax 66 | 67 | def min_trials_for_one_success(success_rate, certainty): 68 | """Return the minimal number of trials to be X% certain to have at least 69 | one success.""" 70 | if success_rate == 1: 71 | return 1 72 | if success_rate == 0: 73 | return np.Inf 74 | return np.ceil(np.log(1 - certainty) / np.log(1 - success_rate)) 75 | 76 | def average_trials_until_success(success_rate): 77 | """Return the average number of trials before a success is encountered.""" 78 | if success_rate == 0: 79 | return np.Inf 80 | return 1.0 / success_rate 81 | 82 | def success_rate_facts(success_rate, plain_text=True): 83 | """Return relevant stats for the given success rate. 84 | 85 | 86 | Returns 87 | ------- 88 | plain_text (if plain_text=True) 89 | A plain text as follows: "The valid colony rate is 47.7%. Expect 1.9 90 | clones in average until success. Pick 5 clones or more for 95% chances 91 | of at least one success. If no success after 8 clones, there is 92 | likely another problem (p-value=0.01)" 93 | 94 | dict (if plain_text=False) 95 | Dict containing the same infos as above: dict(success_rate_percent, 96 | average_colonies, min_trials_q95, max_trials_q99) 97 | ) 98 | 99 | """ 100 | if not plain_text: 101 | return dict( 102 | success_rate_percent=100 * success_rate, 103 | average_colonies=average_trials_until_success(success_rate), 104 | min_trials_q95=min_trials_for_one_success(success_rate, 0.95), 105 | max_trials_q99=min_trials_for_one_success(success_rate, 0.99) 106 | ) 107 | results = success_rate_facts(success_rate, plain_text=False) 108 | return ( 109 | "The valid colony rate is %(success_rate_percent).1f%%. Expect " 110 | "%(average_colonies).1f clones in average until " 111 | "success. Pick %(min_trials_q95)d clones or more for 95%% chances of " 112 | "at least one success. If no success after %(max_trials_q99)d clones, " 113 | "there is likely another problem (p-value=0.01)") % results 114 | 115 | def plot_circular_interactions(slots, annealing_data=('25C', '01h'), 116 | corrective_factor=1.0, rate_limit=200, ax=None): 117 | """Plot the slots circularly, show the strength of overhangs interactions. 118 | 119 | Parameters 120 | ---------- 121 | slots 122 | A list [(slot_name, left_overhang, right_overhang), ...] 123 | 124 | annealing_data 125 | Either a pandas dataframe or a couple (temperature, duration) indicating 126 | an experimental dataset from Potapov et al. 2018 127 | 128 | corrective_factor 129 | A factor that can be applied to decrease (when <1) or increase (>1) 130 | the differences in affinity in the dataset. 131 | 132 | rate_limit 133 | Any interaction with a "rate" (value in the annealing_data) below this 134 | value with not be shown. 135 | 136 | ax 137 | A matplotlib ax (one is created and returned if none is provided) 138 | 139 | Returns 140 | ------- 141 | ax 142 | The matplotlib ax of the plot 143 | 144 | """ 145 | agents, rules = slots_to_agents_and_rules( 146 | slots, annealing_data=annealing_data, 147 | corrective_factor=corrective_factor) 148 | 149 | 150 | graph = nx.Graph([((s[0], s[1]), (s[0], s[2])) for s in slots]) 151 | expected_interactions = [] 152 | for n1, n2 in itertools.combinations(list(graph.nodes()), 2): 153 | if n1[1] == n2[1]: 154 | graph.add_edge(n1, n2) 155 | expected_interactions.append((n1, n2)) 156 | ordered_nodes = linear_graph_to_nodes_list(graph) 157 | positions = { 158 | node: (np.sin(a), np.cos(a)) 159 | for (node, a) in zip(ordered_nodes, 160 | np.linspace(0, 6.28, len(ordered_nodes) + 1)) 161 | } 162 | 163 | 164 | graph = nx.Graph() 165 | 166 | for a in agents: 167 | graph.add_edge(*((a.name, site) for site in a.sites), rate=1.0, 168 | is_slot=True, slot_name=a.name) 169 | for rule in rules: 170 | if rule.rate < rate_limit: 171 | continue 172 | graph.add_edge(*((rule.reactants[i].agent, rule.reactants[i].site) 173 | for i in range(2)), 174 | rate=rule.rate, is_interaction=True) 175 | 176 | if ax is None: 177 | fig, ax = plt.subplots(1, figsize=(len(slots), len(slots))) 178 | ax.axis("off") 179 | # ax.set_aspect('equal') 180 | for (n1, n2, data) in graph.edges(data=True): 181 | (x1, y1), (x2, y2) = positions[n1], positions[n2] 182 | xmiddle, ymiddle = np.array((0.5 * (x1 + x2), 0.5 * (y1 + y2))) 183 | if data.get('is_interaction', False): 184 | if any(i in expected_interactions for i in [(n1, n2), (n2, n1)]): 185 | color, weight = 'grey', 'normal' 186 | else: 187 | color, weight = 'red', 'bold' 188 | ax.plot([x1, x2], [y1, y2], c=color, ls=':') 189 | factor = 1.2 if n1 == n2 else 1.0 190 | ax.text(factor * xmiddle, factor * ymiddle, "%d" % data['rate'], 191 | ha='center', va='center', 192 | fontdict=dict(color=color, weight=weight, size=10), 193 | bbox=dict(boxstyle='round', 194 | facecolor='#ffffffcc', 195 | edgecolor='white'), zorder=1000) 196 | if data.get('is_slot', False): 197 | ax.plot([x1, x2], [y1, y2], c='b', ls='-', lw=6, alpha=0.3) 198 | ax.plot([xmiddle, 1.2 * xmiddle], [ymiddle, 1.3 * ymiddle], 199 | c='b', ls='-', alpha=0.3, lw=2) 200 | ax.text(1.3 * xmiddle, 1.3 * ymiddle, data['slot_name'], 201 | ha='center', va='center', 202 | fontdict=dict(color='blue', weight='bold'), 203 | bbox=dict(boxstyle='round', 204 | facecolor='white', 205 | edgecolor='white'), zorder=500) 206 | 207 | 208 | for node in graph.nodes(): 209 | x, y = positions[node] 210 | ax.text(x, y, node[1], ha='center', va='center', 211 | bbox=dict(boxstyle='round', 212 | facecolor='white', 213 | edgecolor='white')) 214 | return ax -------------------------------------------------------------------------------- /ez_setup.py: -------------------------------------------------------------------------------- 1 | 2 | #!python 3 | """Bootstrap setuptools installation 4 | 5 | If you want to use setuptools in your package's setup.py, just include this 6 | file in the same directory with it, and add this to the top of your setup.py:: 7 | 8 | from ez_setup import use_setuptools 9 | use_setuptools() 10 | 11 | If you want to require a specific version of setuptools, set a download 12 | mirror, or use an alternate download directory, you can do so by supplying 13 | the appropriate options to ``use_setuptools()``. 14 | 15 | This file can also be run as a script to install or upgrade setuptools. 16 | """ 17 | import os 18 | import shutil 19 | import sys 20 | import tempfile 21 | import tarfile 22 | import optparse 23 | import subprocess 24 | 25 | from distutils import log 26 | 27 | try: 28 | from site import USER_SITE 29 | except ImportError: 30 | USER_SITE = None 31 | 32 | DEFAULT_VERSION = "0.9.6" 33 | DEFAULT_URL = "https://pypi.python.org/packages/source/s/setuptools/" 34 | 35 | def _python_cmd(*args): 36 | args = (sys.executable,) + args 37 | return subprocess.call(args) == 0 38 | 39 | def _install(tarball, install_args=()): 40 | # extracting the tarball 41 | tmpdir = tempfile.mkdtemp() 42 | log.warn('Extracting in %s', tmpdir) 43 | old_wd = os.getcwd() 44 | try: 45 | os.chdir(tmpdir) 46 | tar = tarfile.open(tarball) 47 | _extractall(tar) 48 | tar.close() 49 | 50 | # going in the directory 51 | subdir = os.path.join(tmpdir, os.listdir(tmpdir)[0]) 52 | os.chdir(subdir) 53 | log.warn('Now working in %s', subdir) 54 | 55 | # installing 56 | log.warn('Installing Setuptools') 57 | if not _python_cmd('setup.py', 'install', *install_args): 58 | log.warn('Something went wrong during the installation.') 59 | log.warn('See the error message above.') 60 | # exitcode will be 2 61 | return 2 62 | finally: 63 | os.chdir(old_wd) 64 | shutil.rmtree(tmpdir) 65 | 66 | 67 | def _build_egg(egg, tarball, to_dir): 68 | # extracting the tarball 69 | tmpdir = tempfile.mkdtemp() 70 | log.warn('Extracting in %s', tmpdir) 71 | old_wd = os.getcwd() 72 | try: 73 | os.chdir(tmpdir) 74 | tar = tarfile.open(tarball) 75 | _extractall(tar) 76 | tar.close() 77 | 78 | # going in the directory 79 | subdir = os.path.join(tmpdir, os.listdir(tmpdir)[0]) 80 | os.chdir(subdir) 81 | log.warn('Now working in %s', subdir) 82 | 83 | # building an egg 84 | log.warn('Building a Setuptools egg in %s', to_dir) 85 | _python_cmd('setup.py', '-q', 'bdist_egg', '--dist-dir', to_dir) 86 | 87 | finally: 88 | os.chdir(old_wd) 89 | shutil.rmtree(tmpdir) 90 | # returning the result 91 | log.warn(egg) 92 | if not os.path.exists(egg): 93 | raise IOError('Could not build the egg.') 94 | 95 | 96 | def _do_download(version, download_base, to_dir, download_delay): 97 | egg = os.path.join(to_dir, 'setuptools-%s-py%d.%d.egg' 98 | % (version, sys.version_info[0], sys.version_info[1])) 99 | if not os.path.exists(egg): 100 | tarball = download_setuptools(version, download_base, 101 | to_dir, download_delay) 102 | _build_egg(egg, tarball, to_dir) 103 | sys.path.insert(0, egg) 104 | import setuptools 105 | setuptools.bootstrap_install_from = egg 106 | 107 | 108 | def use_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL, 109 | to_dir=os.curdir, download_delay=15): 110 | # making sure we use the absolute path 111 | to_dir = os.path.abspath(to_dir) 112 | was_imported = 'pkg_resources' in sys.modules or \ 113 | 'setuptools' in sys.modules 114 | try: 115 | import pkg_resources 116 | except ImportError: 117 | return _do_download(version, download_base, to_dir, download_delay) 118 | try: 119 | pkg_resources.require("setuptools>=" + version) 120 | return 121 | except pkg_resources.VersionConflict: 122 | e = sys.exc_info()[1] 123 | if was_imported: 124 | sys.stderr.write( 125 | "The required version of setuptools (>=%s) is not available,\n" 126 | "and can't be installed while this script is running. Please\n" 127 | "install a more recent version first, using\n" 128 | "'easy_install -U setuptools'." 129 | "\n\n(Currently using %r)\n" % (version, e.args[0])) 130 | sys.exit(2) 131 | else: 132 | del pkg_resources, sys.modules['pkg_resources'] # reload ok 133 | return _do_download(version, download_base, to_dir, 134 | download_delay) 135 | except pkg_resources.DistributionNotFound: 136 | return _do_download(version, download_base, to_dir, 137 | download_delay) 138 | 139 | 140 | def download_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL, 141 | to_dir=os.curdir, delay=15): 142 | """Download setuptools from a specified location and return its filename 143 | 144 | `version` should be a valid setuptools version number that is available 145 | as an egg for download under the `download_base` URL (which should end 146 | with a '/'). `to_dir` is the directory where the egg will be downloaded. 147 | `delay` is the number of seconds to pause before an actual download 148 | attempt. 149 | """ 150 | # making sure we use the absolute path 151 | to_dir = os.path.abspath(to_dir) 152 | try: 153 | from urllib.request import urlopen 154 | except ImportError: 155 | from urllib2 import urlopen 156 | tgz_name = "setuptools-%s.tar.gz" % version 157 | url = download_base + tgz_name 158 | saveto = os.path.join(to_dir, tgz_name) 159 | src = dst = None 160 | if not os.path.exists(saveto): # Avoid repeated downloads 161 | try: 162 | log.warn("Downloading %s", url) 163 | src = urlopen(url) 164 | # Read/write all in one block, so we don't create a corrupt file 165 | # if the download is interrupted. 166 | data = src.read() 167 | dst = open(saveto, "wb") 168 | dst.write(data) 169 | finally: 170 | if src: 171 | src.close() 172 | if dst: 173 | dst.close() 174 | return os.path.realpath(saveto) 175 | 176 | 177 | def _extractall(self, path=".", members=None): 178 | """Extract all members from the archive to the current working 179 | directory and set owner, modification time and permissions on 180 | directories afterwards. `path' specifies a different directory 181 | to extract to. `members' is optional and must be a subset of the 182 | list returned by getmembers(). 183 | """ 184 | import copy 185 | import operator 186 | from tarfile import ExtractError 187 | directories = [] 188 | 189 | if members is None: 190 | members = self 191 | 192 | for tarinfo in members: 193 | if tarinfo.isdir(): 194 | # Extract directories with a safe mode. 195 | directories.append(tarinfo) 196 | tarinfo = copy.copy(tarinfo) 197 | tarinfo.mode = 448 # decimal for oct 0700 198 | self.extract(tarinfo, path) 199 | 200 | # Reverse sort directories. 201 | if sys.version_info < (2, 4): 202 | def sorter(dir1, dir2): 203 | return cmp(dir1.name, dir2.name) 204 | directories.sort(sorter) 205 | directories.reverse() 206 | else: 207 | directories.sort(key=operator.attrgetter('name'), reverse=True) 208 | 209 | # Set correct owner, mtime and filemode on directories. 210 | for tarinfo in directories: 211 | dirpath = os.path.join(path, tarinfo.name) 212 | try: 213 | self.chown(tarinfo, dirpath) 214 | self.utime(tarinfo, dirpath) 215 | self.chmod(tarinfo, dirpath) 216 | except ExtractError: 217 | e = sys.exc_info()[1] 218 | if self.errorlevel > 1: 219 | raise 220 | else: 221 | self._dbg(1, "tarfile: %s" % e) 222 | 223 | 224 | def _build_install_args(options): 225 | """ 226 | Build the arguments to 'python setup.py install' on the setuptools package 227 | """ 228 | install_args = [] 229 | if options.user_install: 230 | if sys.version_info < (2, 6): 231 | log.warn("--user requires Python 2.6 or later") 232 | raise SystemExit(1) 233 | install_args.append('--user') 234 | return install_args 235 | 236 | def _parse_args(): 237 | """ 238 | Parse the command line for options 239 | """ 240 | parser = optparse.OptionParser() 241 | parser.add_option( 242 | '--user', dest='user_install', action='store_true', default=False, 243 | help='install in user site package (requires Python 2.6 or later)') 244 | parser.add_option( 245 | '--download-base', dest='download_base', metavar="URL", 246 | default=DEFAULT_URL, 247 | help='alternative URL from where to download the setuptools package') 248 | options, args = parser.parse_args() 249 | # positional arguments are ignored 250 | return options 251 | 252 | def main(version=DEFAULT_VERSION): 253 | """Install or upgrade setuptools and EasyInstall""" 254 | options = _parse_args() 255 | tarball = download_setuptools(download_base=options.download_base) 256 | return _install(tarball, _build_install_args(options)) 257 | 258 | if __name__ == '__main__': 259 | sys.exit(main()) 260 | -------------------------------------------------------------------------------- /tests/data/records/assembled_construct.gb: -------------------------------------------------------------------------------- 1 | LOCUS . 8016 bp DNA UNK 01-JAN-1980 2 | DEFINITION .. 3 | ACCESSION 4 | VERSION 5 | KEYWORDS . 6 | SOURCE . 7 | ORGANISM . 8 | . 9 | FEATURES Location/Qualifiers 10 | homology 1..4 11 | /label="GGCA" 12 | cds 505..1204 13 | /label="KanR" 14 | rep._origin 2005..2504 15 | /label="RepO_2" 16 | misc_feature 5..3004 17 | /source="receptor" 18 | /note="From receptor" 19 | homology 3005..3008 20 | /label="ATTG" 21 | annotation 3019..3998 22 | /label="pA" 23 | misc_feature 3009..4008 24 | /source="parta" 25 | /note="From parta" 26 | homology 4009..4012 27 | /label="GGCT" 28 | annotation 4023..6002 29 | /label="pB" 30 | misc_feature 4013..6012 31 | /source="partb" 32 | /note="From partb" 33 | homology 6013..6016 34 | /label="GGGC" 35 | annotation 6027..8006 36 | /label="pC" 37 | misc_feature 6017..8016 38 | /source="partc" 39 | /note="From partc" 40 | ORIGIN 41 | 1 ggcactttcg ctatatggcg acgtgcgaaa aacgcgattt ggccgaaatc tcaaataata 42 | 61 atgcaatgtg ttggctaagc tcgggcaaac tcaggatgtg acctagtacc ctctgccccg 43 | 121 tgagtaccca tacccaatca agccagccca attcgcgata ggtacagata agccttactc 44 | 181 gtcccgatcc ttgactaaaa cactcatcct cgatagtacg tgccttgtag agtccgacgg 45 | 241 ccataggtaa acgaccagtt agtgtcgtag ggtcctccac tttagctacg attgcaactc 46 | 301 ccttaccttc gtgccggcgc gctcctcttg ctaccatcac gcttcaaatc gggtggaaga 47 | 361 ccaactgatg aatgagggtg cagaccccag tggagcataa tacgatgcgc ccacgatact 48 | 421 ctagagccac gcgtcgacgt tcgtaagatg tcccgggccc gtgggtggcc acatcggccg 49 | 481 cccatccttg tgtcttccga taagtactcc ctttcaagga ctctccgata ccggcttaga 50 | 541 acacgaacgg acaacgcttt cagcgctgag aaatgacggc tgcttcaacc agaacgatac 51 | 601 agccttcttt gctagatagc ctctagtgtc agccaccaag tccgccacct aatccttcgc 52 | 661 cgagggtcaa agcttctgac catccaattg ttaggcactc ttggaacctt gcaagtacct 53 | 721 gatcatgcag aaaatggact tgcccggagc atattcatta taagtaccat tcacggcagg 54 | 781 ttatagtatg cgttgacgcg gggtgatacg gtaatgcaag taaccatcgg gccgcggtat 55 | 841 gcacatagaa gggtgtttga cttggtttct tacagtgcaa ctgcatatgt ggaccgccat 56 | 901 tccaagtgaa aggatacgaa gggaggtttc ggactgctat ccagagcatg gaaagccttc 57 | 961 aaggtggtgc gcactggaat agtgactcgg ggattgtatg tcgagccgcg tccagtcttt 58 | 1021 cctatactta tgcgagcaaa tagaccgagt ggaacttgat tgcatcagcc gaatcacgat 59 | 1081 gaaaactcag cttacaattc gtaagcactt caaatatatg cccggaaaat cgccgtccag 60 | 1141 tgacttgtaa tagctccagg gtcagtagcg ctgccctggc cgaaccaggt gacggttacg 61 | 1201 tggacctcac agtatcaatg ttgcagtgcg ccaccagatt aagctggccg tgtcatggtg 62 | 1261 aacaaccgtg ataaaagaac gggagtatga gagtgccacc caagagtggt actccaggat 63 | 1321 tcggaaaggg gagggggctt ttcctgctta acaaccttcc tcggcacggt ctaagggtct 64 | 1381 cactcgtcct aaactctacc atcatgcagc taggcgaaag tataacttta gggtctctca 65 | 1441 ggatgagcta tctgtaccac ggtgatgaga taagtagggg gtgcgctagg gtcgttcgtg 66 | 1501 ttttcatcgt ctatagtacc tatttctcgt cttccattag cgcatacggc catctcgcgg 67 | 1561 cggctacgtc agccagcgtc aaccccaagt cactaatttc cgttattata ggtgtagctc 68 | 1621 ataacttctt ctgccgacca agccaaaagc tttgcggcag ctgcaaggcc acgttaggct 69 | 1681 ctcggacggc agatatttat taataacacc actgggttcc agctcttaac gcacttccct 70 | 1741 tcactggatc acctgtacgc ccataagcta agcctgaagc cacggcctca atcgtggaca 71 | 1801 ttactcttag caaatccaca ttaccatgtc accaatcccc gaaccccgcg ccgacctaag 72 | 1861 gcggggtacc tcgtgcacgg cagcatcgag ggaaatatat tacaaaagac gcggcgatcc 73 | 1921 aacggttgat aaaaacagtt ccgtggaagg cggacgtgtg cttagcgcat gcttctcacc 74 | 1981 gagtgacagc accgattcca tctcgctagc acgccgatga gcctcgctgt gggactatgg 75 | 2041 tccagcctat gcttaactag gcagaacaga gagtatggtg gtcctcggta aggtcgattt 76 | 2101 ggtggcactc gcaagatacg cgggttatta gtagagcgca gtcagggccc atcgccagca 77 | 2161 tttgcatgct cgccgtacgc tatccttgta ctacggttcc tcagtggcta taaaggaatg 78 | 2221 tcacaactcc attagcccct tttgttcagc gcttagtcat ggtgggactg actcaccgag 79 | 2281 actacatacg gattgtgccc gcagagacct gcctttcatg ctggggtttt gtaagcagct 80 | 2341 ccacggggta gtgtcaccat gccttcgacc tatgcttatg aaacccgagg taatgcggca 81 | 2401 ggatcccgtg gatcctacgc ttagcctgtt taagccatcg ttctcctcgg cccccagcca 82 | 2461 gcgcgtattc cgcgcattcc tccgagtgcg tatcgcgcaa accacaagat acctctcaac 83 | 2521 gactagcggt gaaccaccaa gcttgacata cgtgcccctt cggaaacgtt attagtggtc 84 | 2581 ctgagtggta cgtcccgagt atacggggca gcgatagtaa cgctacaaat agctaggcga 85 | 2641 ggttgcgata cctgaatgaa tatccaacag tggcaacgag gcgcttagaa agggaaacta 86 | 2701 cataccagtt caatccggga tgacttcaaa agggtatgtt tgttttgaag cgaagctttg 87 | 2761 gttgtcattc agttcctaga gtttacggcg ctgatatggc tctaggagtc cagtgaattc 88 | 2821 gtcagcataa gccccacatc gtaggaaatt gcccggtcgt cggtctagca aatgctcggt 89 | 2881 aaactgggag tgtactaaca ccatgtcgtg gtaacccggt ggaccggcta ctagagacat 90 | 2941 atttattcat gctcagttga acctgtttcg agggggccga gttaacataa agtgattagt 91 | 3001 cacaattgta agatagaaca gggatatagc tcgttgcatc cataaattgc ccaccgtagg 92 | 3061 gagaacgccg ttggaaaatg agagttaggc gatgagcggt tgtcgcccgc acagacaaag 93 | 3121 gatgccttta taaatagtag cggccttgtc tgcaccagac gtttgtgggt cgacgtgcta 94 | 3181 ttctagccaa aagcaaaatg tatcatacta caggcaatag gccctatgct gaccaaactt 95 | 3241 ggggaccctt gcgccttctc tcttacgcat aacatgcata ggtacctcta ctcatcaggc 96 | 3301 agggcggcgt gaggtcacta tggctcaaga tgtgtacgac taaagaaagg tttatgctcc 97 | 3361 ttccccaagg acgcatttgg gactgctact tgcccctagc gaattcacta ggatttttgt 98 | 3421 agaaccatga gcgccctatc cgatagcaca gagacaatgc tacaagcaac tgtgcatgcg 99 | 3481 ctcgatcgcc gtgcattaat acgtattata gcgtatcgtg tacgctaata tcttagtgca 100 | 3541 ccgcacgctg gttggataca attccgtgaa ataattcctg cttacacagg ggctttctgg 101 | 3601 cgcatggctg tgtctggatg tttgtgataa gaggctccat gaacccggcg ggaaatgagg 102 | 3661 ggaaaccccg gggagcaacg acactaagcc tggcagttgt tcagatagga cgcttttgtc 103 | 3721 agtgggcggt tttgcatcca ctaactatca taaacgacac aacgtcgaat gatcatcggc 104 | 3781 ggtttgcatg tagagacagc tcctgcaaca ccataccggc tcgtggaatt acgctgtgcg 105 | 3841 ccggccctca ttcgattgtg tccagcgtcg gcgcgaatga tgattgagat gtgcttcggc 106 | 3901 tcagagcgtg cacaggtagt tctcccccct tacctagctg atccagcaat tccacgctgg 107 | 3961 cccgcgcgtc cttgggtaac actggatctg tacccaaata caccgtcggg cttgattgtt 108 | 4021 aactaccgta tggcggttac gatcgagtga cactattatt agaccgctac tccaagtccc 109 | 4081 aacttttcat cgccatacag aagagaacga gaaagtcgaa gagttatagc gtgtacactt 110 | 4141 ctgattagct aacgaatcga tgcgtacagc atactcgagc aactttatgg gtcgtctgtc 111 | 4201 tccttgtacc ccgcttgacg aggatgtgtt tgtaggcggc ttggatatcg gcagtgtatc 112 | 4261 cgcatggtca aacactttag cccgaccggt gctcaaggtc gaaatcctaa tatagattgg 113 | 4321 gataggaccc gtccattact aggcgacatt cgcagtcacg agcccgcacg gggtgcctca 114 | 4381 acacggtgac actcaattcg ccggcactga gtacgacgat cgtgattgcc ataccgtcat 115 | 4441 gttttgtgtt acccctgata agaagcgcag atatggcacg ggggcagagg gcggagtaga 116 | 4501 cccatgcaaa actctagcct ctgggcagca tgatcgacgt agaacgccgt tgtgactcac 117 | 4561 tagtttcagt tatgatgttc agccaagaat cctccatttt gacgtgaacc ttctcccttt 118 | 4621 acaccttgaa gctgcaatat taagtcgacc ctttaagttg ccttgtcttg gtctagagcc 119 | 4681 cgcagctcac cgataaaatg agcagtcaat tttagcggcg gttgtccagg ttacatgcac 120 | 4741 tttgggctgg aaccgcgtta cgtttggagt acttagaagt gtcgtaggca taggagagcc 121 | 4801 gaaacttgat attgggtcta gttagtgctc taccccgctt cggatccgta atagtgatgt 122 | 4861 tgcataagac gcttgcctca aggcgacata cggtggcggt tgctggaaat gacgccccac 123 | 4921 cccgtgataa tttggggacg agccgcacga taacttctgc cagcgaccgc ccctcccacg 124 | 4981 tccgatggtg attccgaatt ttgattacat gccagacacg ctttaatctc gccaccacat 125 | 5041 tggcaccccc accaagtaat gtatgtaaga agcagctgcg ccccaagtta cgtgccgttt 126 | 5101 ccgatgggag cagcaataat gtcgatagca accacccccc tctctactga cgaagcaaca 127 | 5161 ccggggacac cgtcaccttg gcttcatttc gtattcattc acctacgcac aaccgcctga 128 | 5221 ccgttcgtta tcacgagcct atccccttca agcaatttcc cccccttgtt cagcagccta 129 | 5281 cactatacgt tggtacactg tgtgcgacag atatttggtg cccgtcttat tctattctga 130 | 5341 gagagtttaa tagccaaaag atatgagtag gaacgggagg tacccgactc gcgttgaagc 131 | 5401 actcttaaga cctctcttag cacttcgaag ggtaggtgcg ttcaaagaac ctctgctttt 132 | 5461 aactaagatc aaaaggattg cgtacaactt ccccgttaac tctccagagg atccgtagaa 133 | 5521 gtctacacca aaatacgttt ggtttcgcca aggtgactat cggtacttgt gcacgtaaca 134 | 5581 gggtaaagaa ttgaaagcct caaattgtac gacgttacgc ccggacggtc gccggtgata 135 | 5641 gggtgacgca tttgcgtttg tcatatcatt tgggattaga cggagctggt gtacctctat 136 | 5701 ggacgttaca aaaccttgtg gtaacagcct gggtgccatc cacctagtcc gaagtttcct 137 | 5761 tctaaccgcg gcatcagatg gatatgtacc ttatggcgct tcccgggaag tccaattctt 138 | 5821 cgtgttgaag ggacagctct acaggcctgg ccacttgtgg cgcatctaag tagtctaggt 139 | 5881 gcatgattct ctcccttggg ggcgctgtcg tgggtcctgc cgagccacac cctaacctca 140 | 5941 cgtcgcaaga cttggtaaat agcttttttt tttatgccca atctatttac gtaacaatcg 141 | 6001 acatcagagg aggggcaaag attacgatag aaaaacactc gacacagagt agtatgataa 142 | 6061 aagttgggac ggcatgctgg actcgagaag tgtcgatccg gcaatccaga tacggcaatg 143 | 6121 gtaccggtca cacctcattc taagagcagt caggggctgt caggtctaca gggcaccggc 144 | 6181 cctgaggccc ttgggctagg gtctgtcacc cctatctgca ctgactttaa agtaaccgag 145 | 6241 caataagaca atcaatcgca gggctctgga tcggggtccg aaggcggggt gcagtgtctc 146 | 6301 cctaattgtt aagctcaaaa gagcagccac tcgttctcga tccttagtcc atcgccgtga 147 | 6361 ggtacaagca gtacacccac ggaaggcccg gcattgaaga tgcaagaaat agagcactgg 148 | 6421 ggtgacgacg gcatgctaag gtaagaaggc aaattctgga ggactgactt ccccacaagc 149 | 6481 ggcgtaagct cggataagat tgggataaac ggaccacggc ggctggatct tcaccccttt 150 | 6541 gcacggtatg aaaacggatc aggattctct tccacctatt gcacctgctg agaagagagc 151 | 6601 gctgggggga cgttacttat cccaatcgcg acgtcacgga caacatcgtg ttacaggtca 152 | 6661 ggctcgaaat cctaggcagc tggtatccga cgcgcaagtt atctaccaaa taagttaggc 153 | 6721 gatcgaaatc ataggctgtt tagtgctttt cttggtatca tcctgaactt ctgtcacctc 154 | 6781 cgaggccgaa gcaactatta tgaggacgac tcgatgattg tccggagatg ttgcaccccg 155 | 6841 gcaagcctag aaaaccccgg tcctttgagt tttaatctaa tagtagaagt tctcggaaat 156 | 6901 cagagctgga gactcggtga cgttatagaa atgttcgcac gaagagcgcg acatgtccta 157 | 6961 cttcacaaga tgctttctag gaggggatct ggattctcaa gccctacaca tggtccgtgg 158 | 7021 tgtatgttgg atcgtagtgc ttttaccgac tatgtcgcgt taaattgtct ttcatttcta 159 | 7081 caattcgggg agcatggggc catgtgactt aggttcagaa ggtgcgagcg ggcgtactac 160 | 7141 gtttggtggt cgtgtataat cagaagaatc ggcgatatcc gctagcaagc aaagtgtcct 161 | 7201 tgacgcttgt ggaaatccaa ctatttcctg cctctcaagt gtttagaaaa atatcccctg 162 | 7261 gaggagatat cgctcctctc ttctgcgacc aacaccagtt caattccgca gtgcagcagt 163 | 7321 cctattccgc atggatcacg ctgacgagat gcggggttga gactacccca aaagatatgt 164 | 7381 tgagataaat taccaggatg gataatctgg taccggtgcg aaagggtggt tacaccagag 165 | 7441 tcgcctgatg agcttatgcg caatttcaag tccctatgtt ttactatgaa agtacgaccg 166 | 7501 agccgctagc ctgagtcaaa tagcgcagaa tttgggtcgt ggagatcgcg cttgttaacc 167 | 7561 tggtctaggc gcatctgagt cataacgggg ttataacgac tgtcaacgtg tgaccgcata 168 | 7621 tcctgcaagg cgaacaaata acgccccgcc caaaatatta catttaagta gttatctaac 169 | 7681 tcccggcatg cggggcattt gggggcagcg tgcccattcc gcccctatgc ctgtttcaac 170 | 7741 acgcaaccgt agagattggc taggatgggt tttcacgagc caactgccac attagacata 171 | 7801 gaattccgat aagtataaga gatatcagcg aggttcaggg tagtttggaa cgacagccgg 172 | 7861 ttcagaagtt cagtatggtc attcatcatc cgttgtgttg agtcctttag atcgtggctt 173 | 7921 tatcatgcat gtctcccgcc agccctcaaa ctatgtcgtc acgcgcatgt ttacattata 174 | 7981 tggaacaaaa cgagaggtag aagccaatca ctatcc 175 | // 176 | --------------------------------------------------------------------------------